642 files changed, 70600 insertions, 15939 deletions
diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c
index 91ada52f776b..a7533780dddc 100644
--- a/drivers/net/ethernet/3com/3c509.c
+++ b/drivers/net/ethernet/3com/3c509.c
@@ -508,7 +508,6 @@ static const struct net_device_ops netdev_ops = {
 	.ndo_get_stats 		= el3_get_stats,
 	.ndo_set_rx_mode	= set_multicast_list,
 	.ndo_tx_timeout 	= el3_tx_timeout,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -1041,67 +1040,68 @@ el3_link_ok(struct net_device *dev)
 }
 
 static int
-el3_netdev_get_ecmd(struct net_device *dev, struct ethtool_cmd *ecmd)
+el3_netdev_get_ecmd(struct net_device *dev, struct ethtool_link_ksettings *cmd)
 {
 	u16 tmp;
 	int ioaddr = dev->base_addr;
+	u32 supported;
 
 	EL3WINDOW(0);
 	/* obtain current transceiver via WN4_MEDIA? */
 	tmp = inw(ioaddr + WN0_ADDR_CONF);
-	ecmd->transceiver = XCVR_INTERNAL;
 	switch (tmp >> 14) {
 	case 0:
-		ecmd->port = PORT_TP;
+		cmd->base.port = PORT_TP;
 		break;
 	case 1:
-		ecmd->port = PORT_AUI;
-		ecmd->transceiver = XCVR_EXTERNAL;
+		cmd->base.port = PORT_AUI;
 		break;
 	case 3:
-		ecmd->port = PORT_BNC;
+		cmd->base.port = PORT_BNC;
 	default:
 		break;
 	}
 
-	ecmd->duplex = DUPLEX_HALF;
-	ecmd->supported = 0;
+	cmd->base.duplex = DUPLEX_HALF;
+	supported = 0;
 	tmp = inw(ioaddr + WN0_CONF_CTRL);
 	if (tmp & (1<<13))
-		ecmd->supported |= SUPPORTED_AUI;
+		supported |= SUPPORTED_AUI;
 	if (tmp & (1<<12))
-		ecmd->supported |= SUPPORTED_BNC;
+		supported |= SUPPORTED_BNC;
 	if (tmp & (1<<9)) {
-		ecmd->supported |= SUPPORTED_TP | SUPPORTED_10baseT_Half |
+		supported |= SUPPORTED_TP | SUPPORTED_10baseT_Half |
 				SUPPORTED_10baseT_Full;	/* hmm... */
 		EL3WINDOW(4);
 		tmp = inw(ioaddr + WN4_NETDIAG);
 		if (tmp & FD_ENABLE)
-			ecmd->duplex = DUPLEX_FULL;
+			cmd->base.duplex = DUPLEX_FULL;
 	}
 
-	ethtool_cmd_speed_set(ecmd, SPEED_10);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						supported);
+	cmd->base.speed = SPEED_10;
 	EL3WINDOW(1);
 	return 0;
 }
 
 static int
-el3_netdev_set_ecmd(struct net_device *dev, struct ethtool_cmd *ecmd)
+el3_netdev_set_ecmd(struct net_device *dev,
+		    const struct ethtool_link_ksettings *cmd)
 {
 	u16 tmp;
 	int ioaddr = dev->base_addr;
 
-	if (ecmd->speed != SPEED_10)
+	if (cmd->base.speed != SPEED_10)
 		return -EINVAL;
-	if ((ecmd->duplex != DUPLEX_HALF) && (ecmd->duplex != DUPLEX_FULL))
-		return -EINVAL;
-	if ((ecmd->transceiver != XCVR_INTERNAL) && (ecmd->transceiver != XCVR_EXTERNAL))
+	if ((cmd->base.duplex != DUPLEX_HALF) &&
+	    (cmd->base.duplex != DUPLEX_FULL))
 		return -EINVAL;
 
 	/* change XCVR type */
 	EL3WINDOW(0);
 	tmp = inw(ioaddr + WN0_ADDR_CONF);
-	switch (ecmd->port) {
+	switch (cmd->base.port) {
 	case PORT_TP:
 		tmp &= ~(3<<14);
 		dev->if_port = 0;
@@ -1131,7 +1131,7 @@ el3_netdev_set_ecmd(struct net_device *dev, struct ethtool_cmd *ecmd)
 
 	EL3WINDOW(4);
 	tmp = inw(ioaddr + WN4_NETDIAG);
-	if (ecmd->duplex == DUPLEX_FULL)
+	if (cmd->base.duplex == DUPLEX_FULL)
 		tmp |= FD_ENABLE;
 	else
 		tmp &= ~FD_ENABLE;
@@ -1147,24 +1147,26 @@ static void el3_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info
 	strlcpy(info->version, DRV_VERSION, sizeof(info->version));
 }
 
-static int el3_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
+static int el3_get_link_ksettings(struct net_device *dev,
+				  struct ethtool_link_ksettings *cmd)
 {
 	struct el3_private *lp = netdev_priv(dev);
 	int ret;
 
 	spin_lock_irq(&lp->lock);
-	ret = el3_netdev_get_ecmd(dev, ecmd);
+	ret = el3_netdev_get_ecmd(dev, cmd);
 	spin_unlock_irq(&lp->lock);
 	return ret;
 }
 
-static int el3_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
+static int el3_set_link_ksettings(struct net_device *dev,
+				  const struct ethtool_link_ksettings *cmd)
 {
 	struct el3_private *lp = netdev_priv(dev);
 	int ret;
 
 	spin_lock_irq(&lp->lock);
-	ret = el3_netdev_set_ecmd(dev, ecmd);
+	ret = el3_netdev_set_ecmd(dev, cmd);
 	spin_unlock_irq(&lp->lock);
 	return ret;
 }
@@ -1192,11 +1194,11 @@ static void el3_set_msglevel(struct net_device *dev, u32 v)
 
 static const struct ethtool_ops ethtool_ops = {
 	.get_drvinfo = el3_get_drvinfo,
-	.get_settings = el3_get_settings,
-	.set_settings = el3_set_settings,
 	.get_link = el3_get_link,
 	.get_msglevel = el3_get_msglevel,
 	.set_msglevel = el3_set_msglevel,
+	.get_link_ksettings = el3_get_link_ksettings,
+	.set_link_ksettings = el3_set_link_ksettings,
 };
 
 static void
diff --git a/drivers/net/ethernet/3com/3c515.c b/drivers/net/ethernet/3com/3c515.c
index b26e038b4a0e..b9f4c463e516 100644
--- a/drivers/net/ethernet/3com/3c515.c
+++ b/drivers/net/ethernet/3com/3c515.c
@@ -570,7 +570,6 @@ static const struct net_device_ops netdev_ops = {
 	.ndo_tx_timeout		= corkscrew_timeout,
 	.ndo_get_stats		= corkscrew_get_stats,
 	.ndo_set_rx_mode	= set_rx_mode,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 };
diff --git a/drivers/net/ethernet/3com/3c574_cs.c b/drivers/net/ethernet/3com/3c574_cs.c
index b88afd759307..9359a37fedc0 100644
--- a/drivers/net/ethernet/3com/3c574_cs.c
+++ b/drivers/net/ethernet/3com/3c574_cs.c
@@ -254,7 +254,6 @@ static const struct net_device_ops el3_netdev_ops = {
 	.ndo_get_stats		= el3_get_stats,
 	.ndo_do_ioctl		= el3_ioctl,
 	.ndo_set_rx_mode	= set_multicast_list,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 };
diff --git a/drivers/net/ethernet/3com/3c589_cs.c b/drivers/net/ethernet/3com/3c589_cs.c
index 71396e4b87e3..e28254a00599 100644
--- a/drivers/net/ethernet/3com/3c589_cs.c
+++ b/drivers/net/ethernet/3com/3c589_cs.c
@@ -188,7 +188,6 @@ static const struct net_device_ops el3_netdev_ops = {
 	.ndo_set_config		= el3_config,
 	.ndo_get_stats		= el3_get_stats,
 	.ndo_set_rx_mode	= set_multicast_list,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 };
diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c
index 9133e7926da5..b3560a364e53 100644
--- a/drivers/net/ethernet/3com/3c59x.c
+++ b/drivers/net/ethernet/3com/3c59x.c
@@ -1062,7 +1062,6 @@ static const struct net_device_ops boomrang_netdev_ops = {
 	.ndo_do_ioctl 		= vortex_ioctl,
 #endif
 	.ndo_set_rx_mode	= set_rx_mode,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -1080,7 +1079,6 @@ static const struct net_device_ops vortex_netdev_ops = {
 	.ndo_do_ioctl 		= vortex_ioctl,
 #endif
 	.ndo_set_rx_mode	= set_rx_mode,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -2909,18 +2907,20 @@ static int vortex_nway_reset(struct net_device *dev)
 	return mii_nway_restart(&vp->mii);
 }
 
-static int vortex_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int vortex_get_link_ksettings(struct net_device *dev,
+				     struct ethtool_link_ksettings *cmd)
 {
 	struct vortex_private *vp = netdev_priv(dev);
 
-	return mii_ethtool_gset(&vp->mii, cmd);
+	return mii_ethtool_get_link_ksettings(&vp->mii, cmd);
 }
 
-static int vortex_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int vortex_set_link_ksettings(struct net_device *dev,
+				     const struct ethtool_link_ksettings *cmd)
 {
 	struct vortex_private *vp = netdev_priv(dev);
 
-	return mii_ethtool_sset(&vp->mii, cmd);
+	return mii_ethtool_set_link_ksettings(&vp->mii, cmd);
 }
 
 static u32 vortex_get_msglevel(struct net_device *dev)
@@ -3033,13 +3033,13 @@ static const struct ethtool_ops vortex_ethtool_ops = {
 	.set_msglevel           = vortex_set_msglevel,
 	.get_ethtool_stats      = vortex_get_ethtool_stats,
 	.get_sset_count		= vortex_get_sset_count,
-	.get_settings           = vortex_get_settings,
-	.set_settings           = vortex_set_settings,
 	.get_link               = ethtool_op_get_link,
 	.nway_reset             = vortex_nway_reset,
 	.get_wol                = vortex_get_wol,
 	.set_wol                = vortex_set_wol,
 	.get_ts_info		= ethtool_op_get_ts_info,
+	.get_link_ksettings     = vortex_get_link_ksettings,
+	.set_link_ksettings     = vortex_set_link_ksettings,
 };
 
 #ifdef CONFIG_PCI
diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c
index 8f8418d2ac4a..a0cacbe846ba 100644
--- a/drivers/net/ethernet/3com/typhoon.c
+++ b/drivers/net/ethernet/3com/typhoon.c
@@ -996,28 +996,30 @@ typhoon_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
 }
 
 static int
-typhoon_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+typhoon_get_link_ksettings(struct net_device *dev,
+			   struct ethtool_link_ksettings *cmd)
 {
 	struct typhoon *tp = netdev_priv(dev);
+	u32 supported, advertising = 0;
 
-	cmd->supported = SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full |
+	supported = SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full |
 				SUPPORTED_Autoneg;
 
 	switch (tp->xcvr_select) {
 	case TYPHOON_XCVR_10HALF:
-		cmd->advertising = ADVERTISED_10baseT_Half;
+		advertising = ADVERTISED_10baseT_Half;
 		break;
 	case TYPHOON_XCVR_10FULL:
-		cmd->advertising = ADVERTISED_10baseT_Full;
+		advertising = ADVERTISED_10baseT_Full;
 		break;
 	case TYPHOON_XCVR_100HALF:
-		cmd->advertising = ADVERTISED_100baseT_Half;
+		advertising = ADVERTISED_100baseT_Half;
 		break;
 	case TYPHOON_XCVR_100FULL:
-		cmd->advertising = ADVERTISED_100baseT_Full;
+		advertising = ADVERTISED_100baseT_Full;
 		break;
 	case TYPHOON_XCVR_AUTONEG:
-		cmd->advertising = ADVERTISED_10baseT_Half |
+		advertising = ADVERTISED_10baseT_Half |
 					    ADVERTISED_10baseT_Full |
 					    ADVERTISED_100baseT_Half |
 					    ADVERTISED_100baseT_Full |
@@ -1026,54 +1028,57 @@ typhoon_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 	}
 
 	if(tp->capabilities & TYPHOON_FIBER) {
-		cmd->supported |= SUPPORTED_FIBRE;
-		cmd->advertising |= ADVERTISED_FIBRE;
-		cmd->port = PORT_FIBRE;
+		supported |= SUPPORTED_FIBRE;
+		advertising |= ADVERTISED_FIBRE;
+		cmd->base.port = PORT_FIBRE;
 	} else {
-		cmd->supported |= SUPPORTED_10baseT_Half |
+		supported |= SUPPORTED_10baseT_Half |
 		    			SUPPORTED_10baseT_Full |
 					SUPPORTED_TP;
-		cmd->advertising |= ADVERTISED_TP;
-		cmd->port = PORT_TP;
+		advertising |= ADVERTISED_TP;
+		cmd->base.port = PORT_TP;
 	}
 
 	/* need to get stats to make these link speed/duplex valid */
 	typhoon_do_get_stats(tp);
-	ethtool_cmd_speed_set(cmd, tp->speed);
-	cmd->duplex = tp->duplex;
-	cmd->phy_address = 0;
-	cmd->transceiver = XCVR_INTERNAL;
+	cmd->base.speed = tp->speed;
+	cmd->base.duplex = tp->duplex;
+	cmd->base.phy_address = 0;
 	if(tp->xcvr_select == TYPHOON_XCVR_AUTONEG)
-		cmd->autoneg = AUTONEG_ENABLE;
+		cmd->base.autoneg = AUTONEG_ENABLE;
 	else
-		cmd->autoneg = AUTONEG_DISABLE;
-	cmd->maxtxpkt = 1;
-	cmd->maxrxpkt = 1;
+		cmd->base.autoneg = AUTONEG_DISABLE;
+
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						supported);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+						advertising);
 
 	return 0;
 }
 
 static int
-typhoon_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+typhoon_set_link_ksettings(struct net_device *dev,
+			   const struct ethtool_link_ksettings *cmd)
 {
 	struct typhoon *tp = netdev_priv(dev);
-	u32 speed = ethtool_cmd_speed(cmd);
+	u32 speed = cmd->base.speed;
 	struct cmd_desc xp_cmd;
 	__le16 xcvr;
 	int err;
 
 	err = -EINVAL;
-	if (cmd->autoneg == AUTONEG_ENABLE) {
+	if (cmd->base.autoneg == AUTONEG_ENABLE) {
 		xcvr = TYPHOON_XCVR_AUTONEG;
 	} else {
-		if (cmd->duplex == DUPLEX_HALF) {
+		if (cmd->base.duplex == DUPLEX_HALF) {
 			if (speed == SPEED_10)
 				xcvr = TYPHOON_XCVR_10HALF;
 			else if (speed == SPEED_100)
 				xcvr = TYPHOON_XCVR_100HALF;
 			else
 				goto out;
-		} else if (cmd->duplex == DUPLEX_FULL) {
+		} else if (cmd->base.duplex == DUPLEX_FULL) {
 			if (speed == SPEED_10)
 				xcvr = TYPHOON_XCVR_10FULL;
 			else if (speed == SPEED_100)
@@ -1091,12 +1096,12 @@ typhoon_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 		goto out;
 
 	tp->xcvr_select = xcvr;
-	if(cmd->autoneg == AUTONEG_ENABLE) {
+	if (cmd->base.autoneg == AUTONEG_ENABLE) {
 		tp->speed = 0xff;	/* invalid */
 		tp->duplex = 0xff;	/* invalid */
 	} else {
 		tp->speed = speed;
-		tp->duplex = cmd->duplex;
+		tp->duplex = cmd->base.duplex;
 	}
 
 out:
@@ -1145,13 +1150,13 @@ typhoon_get_ringparam(struct net_device *dev, struct ethtool_ringparam *ering)
 }
 
 static const struct ethtool_ops typhoon_ethtool_ops = {
-	.get_settings		= typhoon_get_settings,
-	.set_settings		= typhoon_set_settings,
 	.get_drvinfo		= typhoon_get_drvinfo,
 	.get_wol		= typhoon_get_wol,
 	.set_wol		= typhoon_set_wol,
 	.get_link		= ethtool_op_get_link,
 	.get_ringparam		= typhoon_get_ringparam,
+	.get_link_ksettings	= typhoon_get_link_ksettings,
+	.set_link_ksettings	= typhoon_set_link_ksettings,
 };
 
 static int
@@ -2255,7 +2260,6 @@ static const struct net_device_ops typhoon_netdev_ops = {
 	.ndo_get_stats		= typhoon_get_stats,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 };
 
 static int
diff --git a/drivers/net/ethernet/8390/8390.c b/drivers/net/ethernet/8390/8390.c
index 5db1f55abef4..a43544af257b 100644
--- a/drivers/net/ethernet/8390/8390.c
+++ b/drivers/net/ethernet/8390/8390.c
@@ -64,7 +64,6 @@ const struct net_device_ops ei_netdev_ops = {
 	.ndo_set_rx_mode	= ei_set_multicast_list,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address 	= eth_mac_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= ei_poll,
 #endif
diff --git a/drivers/net/ethernet/8390/8390p.c b/drivers/net/ethernet/8390/8390p.c
index e8fc2e87e840..46d2257c4430 100644
--- a/drivers/net/ethernet/8390/8390p.c
+++ b/drivers/net/ethernet/8390/8390p.c
@@ -69,7 +69,6 @@ const struct net_device_ops eip_netdev_ops = {
 	.ndo_set_rx_mode	= eip_set_multicast_list,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address 	= eth_mac_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= eip_poll,
 #endif
diff --git a/drivers/net/ethernet/8390/ax88796.c b/drivers/net/ethernet/8390/ax88796.c
index 39ca9350d1b2..b0a3b85fc6f8 100644
--- a/drivers/net/ethernet/8390/ax88796.c
+++ b/drivers/net/ethernet/8390/ax88796.c
@@ -536,7 +536,6 @@ static const struct net_device_ops ax_netdev_ops = {
 	.ndo_set_rx_mode	= ax_ei_set_multicast_list,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= ax_ei_poll,
 #endif
diff --git a/drivers/net/ethernet/8390/axnet_cs.c b/drivers/net/ethernet/8390/axnet_cs.c
index 4ea717d68c95..1d84a0544ace 100644
--- a/drivers/net/ethernet/8390/axnet_cs.c
+++ b/drivers/net/ethernet/8390/axnet_cs.c
@@ -134,7 +134,6 @@ static const struct net_device_ops axnet_netdev_ops = {
 	.ndo_tx_timeout		= axnet_tx_timeout,
 	.ndo_get_stats		= get_stats,
 	.ndo_set_rx_mode	= set_multicast_list,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 };
diff --git a/drivers/net/ethernet/8390/etherh.c b/drivers/net/ethernet/8390/etherh.c
index d686b9cac29f..11cbf22ad201 100644
--- a/drivers/net/ethernet/8390/etherh.c
+++ b/drivers/net/ethernet/8390/etherh.c
@@ -654,7 +654,6 @@ static const struct net_device_ops etherh_netdev_ops = {
 	.ndo_set_rx_mode	= __ei_set_multicast_list,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= __ei_poll,
 #endif
diff --git a/drivers/net/ethernet/8390/hydra.c b/drivers/net/ethernet/8390/hydra.c
index 0fe19d609c2e..8ae249195301 100644
--- a/drivers/net/ethernet/8390/hydra.c
+++ b/drivers/net/ethernet/8390/hydra.c
@@ -105,7 +105,6 @@ static const struct net_device_ops hydra_netdev_ops = {
 	.ndo_set_rx_mode	= __ei_set_multicast_list,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= __ei_poll,
 #endif
diff --git a/drivers/net/ethernet/8390/mac8390.c b/drivers/net/ethernet/8390/mac8390.c
index b9283901136e..9497f18eaba0 100644
--- a/drivers/net/ethernet/8390/mac8390.c
+++ b/drivers/net/ethernet/8390/mac8390.c
@@ -483,7 +483,6 @@ static const struct net_device_ops mac8390_netdev_ops = {
 	.ndo_set_rx_mode	= __ei_set_multicast_list,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address 	= eth_mac_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= __ei_poll,
 #endif
diff --git a/drivers/net/ethernet/8390/mcf8390.c b/drivers/net/ethernet/8390/mcf8390.c
index e1c055574a11..4bb967bc879e 100644
--- a/drivers/net/ethernet/8390/mcf8390.c
+++ b/drivers/net/ethernet/8390/mcf8390.c
@@ -308,7 +308,6 @@ static const struct net_device_ops mcf8390_netdev_ops = {
 	.ndo_set_rx_mode	= __ei_set_multicast_list,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= __ei_poll,
 #endif
diff --git a/drivers/net/ethernet/8390/ne2k-pci.c b/drivers/net/ethernet/8390/ne2k-pci.c
index 57e97910c728..07355302443d 100644
--- a/drivers/net/ethernet/8390/ne2k-pci.c
+++ b/drivers/net/ethernet/8390/ne2k-pci.c
@@ -209,7 +209,6 @@ static const struct net_device_ops ne2k_netdev_ops = {
 	.ndo_set_rx_mode	= ei_set_multicast_list,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address 	= eth_mac_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller = ei_poll,
 #endif
diff --git a/drivers/net/ethernet/8390/pcnet_cs.c b/drivers/net/ethernet/8390/pcnet_cs.c
index 2f79d29f17f2..63079a6e20d9 100644
--- a/drivers/net/ethernet/8390/pcnet_cs.c
+++ b/drivers/net/ethernet/8390/pcnet_cs.c
@@ -227,7 +227,6 @@ static const struct net_device_ops pcnet_netdev_ops = {
 	.ndo_do_ioctl 		= ei_ioctl,
 	.ndo_set_rx_mode	= ei_set_multicast_list,
 	.ndo_tx_timeout 	= ei_tx_timeout,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/8390/smc-ultra.c b/drivers/net/ethernet/8390/smc-ultra.c
index 139385dcdaa7..364b6514f65f 100644
--- a/drivers/net/ethernet/8390/smc-ultra.c
+++ b/drivers/net/ethernet/8390/smc-ultra.c
@@ -195,7 +195,6 @@ static const struct net_device_ops ultra_netdev_ops = {
 	.ndo_set_rx_mode	= ei_set_multicast_list,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address 	= eth_mac_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller 	= ultra_poll,
 #endif
diff --git a/drivers/net/ethernet/8390/wd.c b/drivers/net/ethernet/8390/wd.c
index dd7d816bde52..ad019cbc698f 100644
--- a/drivers/net/ethernet/8390/wd.c
+++ b/drivers/net/ethernet/8390/wd.c
@@ -156,7 +156,6 @@ static const struct net_device_ops wd_netdev_ops = {
 	.ndo_set_rx_mode	= ei_set_multicast_list,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address 	= eth_mac_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller 	= ei_poll,
 #endif
diff --git a/drivers/net/ethernet/8390/zorro8390.c b/drivers/net/ethernet/8390/zorro8390.c
index 8308728fad05..6d93956b293b 100644
--- a/drivers/net/ethernet/8390/zorro8390.c
+++ b/drivers/net/ethernet/8390/zorro8390.c
@@ -284,7 +284,6 @@ static const struct net_device_ops zorro8390_netdev_ops = {
 	.ndo_set_rx_mode	= __ei_set_multicast_list,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= __ei_poll,
 #endif
diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig
index 8cc7467b6c1f..6e16e441f85e 100644
--- a/drivers/net/ethernet/Kconfig
+++ b/drivers/net/ethernet/Kconfig
@@ -21,6 +21,7 @@ source "drivers/net/ethernet/3com/Kconfig"
 source "drivers/net/ethernet/adaptec/Kconfig"
 source "drivers/net/ethernet/aeroflex/Kconfig"
 source "drivers/net/ethernet/agere/Kconfig"
+source "drivers/net/ethernet/alacritech/Kconfig"
 source "drivers/net/ethernet/allwinner/Kconfig"
 source "drivers/net/ethernet/alteon/Kconfig"
 source "drivers/net/ethernet/altera/Kconfig"
@@ -165,6 +166,7 @@ source "drivers/net/ethernet/seeq/Kconfig"
 source "drivers/net/ethernet/silan/Kconfig"
 source "drivers/net/ethernet/sis/Kconfig"
 source "drivers/net/ethernet/sfc/Kconfig"
+source "drivers/net/ethernet/sfc/falcon/Kconfig"
 source "drivers/net/ethernet/sgi/Kconfig"
 source "drivers/net/ethernet/smsc/Kconfig"
 source "drivers/net/ethernet/stmicro/Kconfig"
diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile
index a09423df83f2..24330f4885a9 100644
--- a/drivers/net/ethernet/Makefile
+++ b/drivers/net/ethernet/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_NET_VENDOR_8390) += 8390/
 obj-$(CONFIG_NET_VENDOR_ADAPTEC) += adaptec/
 obj-$(CONFIG_GRETH) += aeroflex/
 obj-$(CONFIG_NET_VENDOR_AGERE) += agere/
+obj-$(CONFIG_NET_VENDOR_ALACRITECH) += alacritech/
 obj-$(CONFIG_NET_VENDOR_ALLWINNER) += allwinner/
 obj-$(CONFIG_NET_VENDOR_ALTEON) += alteon/
 obj-$(CONFIG_ALTERA_TSE) += altera/
@@ -75,6 +76,7 @@ obj-$(CONFIG_NET_VENDOR_SEEQ) += seeq/
 obj-$(CONFIG_NET_VENDOR_SILAN) += silan/
 obj-$(CONFIG_NET_VENDOR_SIS) += sis/
 obj-$(CONFIG_SFC) += sfc/
+obj-$(CONFIG_SFC_FALCON) += sfc/falcon/
 obj-$(CONFIG_NET_VENDOR_SGI) += sgi/
 obj-$(CONFIG_NET_VENDOR_SMSC) += smsc/
 obj-$(CONFIG_NET_VENDOR_STMICRO) += stmicro/
diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c
index 8af2c88d5b33..3aaad33cdbc6 100644
--- a/drivers/net/ethernet/adaptec/starfire.c
+++ b/drivers/net/ethernet/adaptec/starfire.c
@@ -634,7 +634,6 @@ static const struct net_device_ops netdev_ops = {
 	.ndo_get_stats		= get_stats,
 	.ndo_set_rx_mode	= set_rx_mode,
 	.ndo_do_ioctl		= netdev_ioctl,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 #ifdef VLAN_SUPPORT
@@ -1817,21 +1816,23 @@ static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
 	strlcpy(info->bus_info, pci_name(np->pci_dev), sizeof(info->bus_info));
 }
 
-static int get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
+static int get_link_ksettings(struct net_device *dev,
+			      struct ethtool_link_ksettings *cmd)
 {
 	struct netdev_private *np = netdev_priv(dev);
 	spin_lock_irq(&np->lock);
-	mii_ethtool_gset(&np->mii_if, ecmd);
+	mii_ethtool_get_link_ksettings(&np->mii_if, cmd);
 	spin_unlock_irq(&np->lock);
 	return 0;
 }
 
-static int set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
+static int set_link_ksettings(struct net_device *dev,
+			      const struct ethtool_link_ksettings *cmd)
 {
 	struct netdev_private *np = netdev_priv(dev);
 	int res;
 	spin_lock_irq(&np->lock);
-	res = mii_ethtool_sset(&np->mii_if, ecmd);
+	res = mii_ethtool_set_link_ksettings(&np->mii_if, cmd);
 	spin_unlock_irq(&np->lock);
 	check_duplex(dev);
 	return res;
@@ -1862,12 +1863,12 @@ static void set_msglevel(struct net_device *dev, u32 val)
 static const struct ethtool_ops ethtool_ops = {
 	.begin = check_if_running,
 	.get_drvinfo = get_drvinfo,
-	.get_settings = get_settings,
-	.set_settings = set_settings,
 	.nway_reset = nway_reset,
 	.get_link = get_link,
 	.get_msglevel = get_msglevel,
 	.set_msglevel = set_msglevel,
+	.get_link_ksettings = get_link_ksettings,
+	.set_link_ksettings = set_link_ksettings,
 };
 
 static int netdev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
diff --git a/drivers/net/ethernet/adi/Kconfig b/drivers/net/ethernet/adi/Kconfig
index 6b94ba610399..98cc8f535021 100644
--- a/drivers/net/ethernet/adi/Kconfig
+++ b/drivers/net/ethernet/adi/Kconfig
@@ -58,7 +58,7 @@ config BFIN_RX_DESC_NUM
 config BFIN_MAC_USE_HWSTAMP
 	bool "Use IEEE 1588 hwstamp"
 	depends on BFIN_MAC && BF518
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	default y
 	---help---
 	  To support the IEEE 1588 Precision Time Protocol (PTP), select y here
diff --git a/drivers/net/ethernet/adi/bfin_mac.c b/drivers/net/ethernet/adi/bfin_mac.c
index 00f9ee3fc3e5..88164529b52a 100644
--- a/drivers/net/ethernet/adi/bfin_mac.c
+++ b/drivers/net/ethernet/adi/bfin_mac.c
@@ -1571,7 +1571,6 @@ static const struct net_device_ops bfin_mac_netdev_ops = {
 	.ndo_set_rx_mode	= bfin_mac_set_multicast_list,
 	.ndo_do_ioctl           = bfin_mac_ioctl,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= bfin_mac_poll_controller,
 #endif
diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c
index f8df8248035e..93def92f9997 100644
--- a/drivers/net/ethernet/aeroflex/greth.c
+++ b/drivers/net/ethernet/aeroflex/greth.c
@@ -1290,15 +1290,6 @@ static int greth_mdio_probe(struct net_device *dev)
 	return 0;
 }
 
-static inline int phy_aneg_done(struct phy_device *phydev)
-{
-	int retval;
-
-	retval = phy_read(phydev, MII_BMSR);
-
-	return (retval < 0) ? retval : (retval & BMSR_ANEGCOMPLETE);
-}
-
 static int greth_mdio_init(struct greth_private *greth)
 {
 	int ret;
diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c
index 906683851c7d..831bab352f8e 100644
--- a/drivers/net/ethernet/agere/et131x.c
+++ b/drivers/net/ethernet/agere/et131x.c
@@ -176,6 +176,8 @@ MODULE_DESCRIPTION("10/100/1000 Base-T Ethernet Driver for the ET1310 by Agere S
 #define NUM_FBRS		2
 
 #define MAX_PACKETS_HANDLED	256
+#define ET131X_MIN_MTU		64
+#define ET131X_MAX_MTU		9216
 
 #define ALCATEL_MULTICAST_PKT	0x01000000
 #define ALCATEL_BROADCAST_PKT	0x02000000
@@ -3869,9 +3871,6 @@ static int et131x_change_mtu(struct net_device *netdev, int new_mtu)
 	int result = 0;
 	struct et131x_adapter *adapter = netdev_priv(netdev);
 
-	if (new_mtu < 64 || new_mtu > 9216)
-		return -EINVAL;
-
 	et131x_disable_txrx(netdev);
 
 	netdev->mtu = new_mtu;
@@ -3958,6 +3957,8 @@ static int et131x_pci_setup(struct pci_dev *pdev,
 
 	netdev->watchdog_timeo = ET131X_TX_TIMEOUT;
 	netdev->netdev_ops     = &et131x_netdev_ops;
+	netdev->min_mtu        = ET131X_MIN_MTU;
+	netdev->max_mtu        = ET131X_MAX_MTU;
 
 	SET_NETDEV_DEV(netdev, &pdev->dev);
 	netdev->ethtool_ops = &et131x_ethtool_ops;
diff --git a/drivers/net/ethernet/alacritech/Kconfig b/drivers/net/ethernet/alacritech/Kconfig
new file mode 100644
index 000000000000..09496e18cdc5
--- /dev/null
+++ b/drivers/net/ethernet/alacritech/Kconfig
@@ -0,0 +1,28 @@
+config NET_VENDOR_ALACRITECH
+	bool "Alacritech devices"
+	default y
+	---help---
+	  If you have a network (Ethernet) card belonging to this class, say Y.
+
+	  Note that the answer to this question doesn't directly affect the
+	  kernel: saying N will just cause the configurator to skip all the
+	  questions about Alacritech devices. If you say Y, you will be asked
+	  for your specific device in the following questions.
+
+if NET_VENDOR_ALACRITECH
+
+config SLICOSS
+	tristate "Alacritech Slicoss support"
+	depends on PCI
+	select CRC32
+	---help---
+	  This driver supports Gigabit Ethernet adapters based on the
+	  Session Layer Interface (SLIC) technology by Alacritech.
+
+	  Supported are Mojave (1 port) and Oasis (1, 2 and 4 port) cards,
+	  both copper and fiber.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called slicoss. This is recommended.
+
+endif # NET_VENDOR_ALACRITECH
diff --git a/drivers/net/ethernet/alacritech/Makefile b/drivers/net/ethernet/alacritech/Makefile
new file mode 100644
index 000000000000..8790e9ed8496
--- /dev/null
+++ b/drivers/net/ethernet/alacritech/Makefile
@@ -0,0 +1,4 @@
+#
+# Makefile for the Alacritech Slicoss driver
+#
+obj-$(CONFIG_SLICOSS) += slicoss.o
diff --git a/drivers/net/ethernet/alacritech/slic.h b/drivers/net/ethernet/alacritech/slic.h
new file mode 100644
index 000000000000..08931b4afc96
--- /dev/null
+++ b/drivers/net/ethernet/alacritech/slic.h
@@ -0,0 +1,575 @@
+
+#ifndef _SLIC_H
+#define _SLIC_H
+
+#include <linux/types.h>
+#include <linux/netdevice.h>
+#include <linux/spinlock_types.h>
+#include <linux/dma-mapping.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/list.h>
+#include <linux/u64_stats_sync.h>
+
+#define SLIC_VGBSTAT_XPERR		0x40000000
+#define SLIC_VGBSTAT_XERRSHFT		25
+#define SLIC_VGBSTAT_XCSERR		0x23
+#define SLIC_VGBSTAT_XUFLOW		0x22
+#define SLIC_VGBSTAT_XHLEN		0x20
+#define SLIC_VGBSTAT_NETERR		0x01000000
+#define SLIC_VGBSTAT_NERRSHFT		16
+#define SLIC_VGBSTAT_NERRMSK		0x1ff
+#define SLIC_VGBSTAT_NCSERR		0x103
+#define SLIC_VGBSTAT_NUFLOW		0x102
+#define SLIC_VGBSTAT_NHLEN		0x100
+#define SLIC_VGBSTAT_LNKERR		0x00000080
+#define SLIC_VGBSTAT_LERRMSK		0xff
+#define SLIC_VGBSTAT_LDEARLY		0x86
+#define SLIC_VGBSTAT_LBOFLO		0x85
+#define SLIC_VGBSTAT_LCODERR		0x84
+#define SLIC_VGBSTAT_LDBLNBL		0x83
+#define SLIC_VGBSTAT_LCRCERR		0x82
+#define SLIC_VGBSTAT_LOFLO		0x81
+#define SLIC_VGBSTAT_LUFLO		0x80
+
+#define SLIC_IRHDDR_FLEN_MSK		0x0000ffff
+#define SLIC_IRHDDR_SVALID		0x80000000
+#define SLIC_IRHDDR_ERR			0x10000000
+
+#define SLIC_VRHSTAT_802OE		0x80000000
+#define SLIC_VRHSTAT_TPOFLO		0x10000000
+#define SLIC_VRHSTATB_802UE		0x80000000
+#define SLIC_VRHSTATB_RCVE		0x40000000
+#define SLIC_VRHSTATB_BUFF		0x20000000
+#define SLIC_VRHSTATB_CARRE		0x08000000
+#define SLIC_VRHSTATB_LONGE		0x02000000
+#define SLIC_VRHSTATB_PREA		0x01000000
+#define SLIC_VRHSTATB_CRC		0x00800000
+#define SLIC_VRHSTATB_DRBL		0x00400000
+#define SLIC_VRHSTATB_CODE		0x00200000
+#define SLIC_VRHSTATB_TPCSUM		0x00100000
+#define SLIC_VRHSTATB_TPHLEN		0x00080000
+#define SLIC_VRHSTATB_IPCSUM		0x00040000
+#define SLIC_VRHSTATB_IPLERR		0x00020000
+#define SLIC_VRHSTATB_IPHERR		0x00010000
+
+#define SLIC_CMD_XMT_REQ		0x01
+#define SLIC_CMD_TYPE_DUMB		3
+
+#define SLIC_RESET_MAGIC		0xDEAD
+#define SLIC_ICR_INT_OFF		0
+#define SLIC_ICR_INT_ON			1
+#define SLIC_ICR_INT_MASK		2
+
+#define SLIC_ISR_ERR			0x80000000
+#define SLIC_ISR_RCV			0x40000000
+#define SLIC_ISR_CMD			0x20000000
+#define SLIC_ISR_IO			0x60000000
+#define SLIC_ISR_UPC			0x10000000
+#define SLIC_ISR_LEVENT			0x08000000
+#define SLIC_ISR_RMISS			0x02000000
+#define SLIC_ISR_UPCERR			0x01000000
+#define SLIC_ISR_XDROP			0x00800000
+#define SLIC_ISR_UPCBSY			0x00020000
+
+#define SLIC_ISR_PING_MASK		0x00700000
+#define SLIC_ISR_UPCERR_MASK		(SLIC_ISR_UPCERR | SLIC_ISR_UPCBSY)
+#define SLIC_ISR_UPC_MASK		(SLIC_ISR_UPC | SLIC_ISR_UPCERR_MASK)
+#define SLIC_WCS_START			0x80000000
+#define SLIC_WCS_COMPARE		0x40000000
+#define SLIC_RCVWCS_BEGIN		0x40000000
+#define SLIC_RCVWCS_FINISH		0x80000000
+
+#define SLIC_MIICR_REG_16		0x00100000
+#define SLIC_MRV_REG16_XOVERON		0x0068
+
+#define SLIC_GIG_LINKUP			0x0001
+#define SLIC_GIG_FULLDUPLEX		0x0002
+#define SLIC_GIG_SPEED_MASK		0x000C
+#define SLIC_GIG_SPEED_1000		0x0008
+#define SLIC_GIG_SPEED_100		0x0004
+#define SLIC_GIG_SPEED_10		0x0000
+
+#define SLIC_GMCR_RESET			0x80000000
+#define SLIC_GMCR_GBIT			0x20000000
+#define SLIC_GMCR_FULLD			0x10000000
+#define SLIC_GMCR_GAPBB_SHIFT		14
+#define SLIC_GMCR_GAPR1_SHIFT		7
+#define SLIC_GMCR_GAPR2_SHIFT		0
+#define SLIC_GMCR_GAPBB_1000		0x60
+#define SLIC_GMCR_GAPR1_1000		0x2C
+#define SLIC_GMCR_GAPR2_1000		0x40
+#define SLIC_GMCR_GAPBB_100		0x70
+#define SLIC_GMCR_GAPR1_100		0x2C
+#define SLIC_GMCR_GAPR2_100		0x40
+
+#define SLIC_XCR_RESET			0x80000000
+#define SLIC_XCR_XMTEN			0x40000000
+#define SLIC_XCR_PAUSEEN		0x20000000
+#define SLIC_XCR_LOADRNG		0x10000000
+
+#define SLIC_GXCR_RESET			0x80000000
+#define SLIC_GXCR_XMTEN			0x40000000
+#define SLIC_GXCR_PAUSEEN		0x20000000
+
+#define SLIC_GRCR_RESET			0x80000000
+#define SLIC_GRCR_RCVEN			0x40000000
+#define SLIC_GRCR_RCVALL		0x20000000
+#define SLIC_GRCR_RCVBAD		0x10000000
+#define SLIC_GRCR_CTLEN			0x08000000
+#define SLIC_GRCR_ADDRAEN		0x02000000
+#define SLIC_GRCR_HASHSIZE_SHIFT	17
+#define SLIC_GRCR_HASHSIZE		14
+
+/* Reset Register */
+#define SLIC_REG_RESET			0x0000
+/* Interrupt Control Register */
+#define SLIC_REG_ICR			0x0008
+/* Interrupt status pointer */
+#define SLIC_REG_ISP			0x0010
+/* Interrupt status */
+#define SLIC_REG_ISR			0x0018
+/* Header buffer address reg
+ * 31-8 - phy addr of set of contiguous hdr buffers
+ *  7-0 - number of buffers passed
+ * Buffers are 256 bytes long on 256-byte boundaries.
+ */
+#define SLIC_REG_HBAR			0x0020
+/* Data buffer handle & address reg
+ * 4 sets of registers; Buffers are 2K bytes long 2 per 4K page.
+ */
+#define SLIC_REG_DBAR			0x0028
+/* Xmt Cmd buf addr regs.
+ * 1 per XMT interface
+ * 31-5 - phy addr of host command buffer
+ *  4-0 - length of cmd in multiples of 32 bytes
+ * Buffers are 32 bytes up to 512 bytes long
+ */
+#define SLIC_REG_CBAR			0x0030
+/* Write control store */
+#define	SLIC_REG_WCS			0x0034
+/*Response buffer address reg.
+ * 31-8 - phy addr of set of contiguous response buffers
+ * 7-0 - number of buffers passed
+ * Buffers are 32 bytes long on 32-byte boundaries.
+ */
+#define	SLIC_REG_RBAR			0x0038
+/* Read statistics (UPR) */
+#define	SLIC_REG_RSTAT			0x0040
+/* Read link status */
+#define	SLIC_REG_LSTAT			0x0048
+/* Write Mac Config */
+#define	SLIC_REG_WMCFG			0x0050
+/* Write phy register */
+#define SLIC_REG_WPHY			0x0058
+/* Rcv Cmd buf addr reg */
+#define	SLIC_REG_RCBAR			0x0060
+/* Read SLIC Config*/
+#define SLIC_REG_RCONFIG		0x0068
+/* Interrupt aggregation time */
+#define SLIC_REG_INTAGG			0x0070
+/* Write XMIT config reg */
+#define	SLIC_REG_WXCFG			0x0078
+/* Write RCV config reg */
+#define	SLIC_REG_WRCFG			0x0080
+/* Write rcv addr a low */
+#define	SLIC_REG_WRADDRAL		0x0088
+/* Write rcv addr a high */
+#define	SLIC_REG_WRADDRAH		0x0090
+/* Write rcv addr b low */
+#define	SLIC_REG_WRADDRBL		0x0098
+/* Write rcv addr b high */
+#define	SLIC_REG_WRADDRBH		0x00a0
+/* Low bits of mcast mask */
+#define	SLIC_REG_MCASTLOW		0x00a8
+/* High bits of mcast mask */
+#define	SLIC_REG_MCASTHIGH		0x00b0
+/* Ping the card */
+#define SLIC_REG_PING			0x00b8
+/* Dump command */
+#define SLIC_REG_DUMP_CMD		0x00c0
+/* Dump data pointer */
+#define SLIC_REG_DUMP_DATA		0x00c8
+/* Read card's pci_status register */
+#define	SLIC_REG_PCISTATUS		0x00d0
+/* Write hostid field */
+#define SLIC_REG_WRHOSTID		0x00d8
+/* Put card in a low power state */
+#define SLIC_REG_LOW_POWER		0x00e0
+/* Force slic into quiescent state  before soft reset */
+#define SLIC_REG_QUIESCE		0x00e8
+/* Reset interface queues */
+#define SLIC_REG_RESET_IFACE		0x00f0
+/* Register is only written when it has changed.
+ * Bits 63-32 for host i/f addrs.
+ */
+#define SLIC_REG_ADDR_UPPER		0x00f8
+/* 64 bit Header buffer address reg */
+#define SLIC_REG_HBAR64			0x0100
+/* 64 bit Data buffer handle & address reg */
+#define SLIC_REG_DBAR64			0x0108
+/* 64 bit Xmt Cmd buf addr regs. */
+#define SLIC_REG_CBAR64			0x0110
+/* 64 bit Response buffer address reg.*/
+#define SLIC_REG_RBAR64			0x0118
+/* 64 bit Rcv Cmd buf addr reg*/
+#define	SLIC_REG_RCBAR64		0x0120
+/* Read statistics (64 bit UPR) */
+#define	SLIC_REG_RSTAT64		0x0128
+/* Download Gigabit RCV sequencer ucode */
+#define SLIC_REG_RCV_WCS		0x0130
+/* Write VlanId field */
+#define SLIC_REG_WRVLANID		0x0138
+/* Read Transformer info */
+#define SLIC_REG_READ_XF_INFO		0x0140
+/* Write Transformer info */
+#define SLIC_REG_WRITE_XF_INFO		0x0148
+/* Write card ticks per second */
+#define SLIC_REG_TICKS_PER_SEC		0x0170
+#define SLIC_REG_HOSTID			0x1554
+
+#define PCI_VENDOR_ID_ALACRITECH		0x139A
+#define PCI_DEVICE_ID_ALACRITECH_MOJAVE		0x0005
+#define PCI_SUBDEVICE_ID_ALACRITECH_1000X1	0x0005
+#define PCI_SUBDEVICE_ID_ALACRITECH_1000X1_2	0x0006
+#define PCI_SUBDEVICE_ID_ALACRITECH_1000X1F	0x0007
+#define PCI_SUBDEVICE_ID_ALACRITECH_CICADA	0x0008
+#define PCI_SUBDEVICE_ID_ALACRITECH_SES1001T	0x2006
+#define PCI_SUBDEVICE_ID_ALACRITECH_SES1001F	0x2007
+#define PCI_DEVICE_ID_ALACRITECH_OASIS		0x0007
+#define PCI_SUBDEVICE_ID_ALACRITECH_SEN2002XT	0x000B
+#define PCI_SUBDEVICE_ID_ALACRITECH_SEN2002XF	0x000C
+#define PCI_SUBDEVICE_ID_ALACRITECH_SEN2001XT	0x000D
+#define PCI_SUBDEVICE_ID_ALACRITECH_SEN2001XF	0x000E
+#define PCI_SUBDEVICE_ID_ALACRITECH_SEN2104EF	0x000F
+#define PCI_SUBDEVICE_ID_ALACRITECH_SEN2104ET	0x0010
+#define PCI_SUBDEVICE_ID_ALACRITECH_SEN2102EF	0x0011
+#define PCI_SUBDEVICE_ID_ALACRITECH_SEN2102ET	0x0012
+
+/* Note: power of two required for number descriptors  */
+#define SLIC_NUM_RX_LES			256
+#define SLIC_RX_BUFF_SIZE		2048
+#define SLIC_RX_BUFF_ALIGN		256
+#define SLIC_RX_BUFF_HDR_SIZE		34
+#define SLIC_MAX_REQ_RX_DESCS		1
+
+#define SLIC_NUM_TX_DESCS		256
+#define SLIC_TX_DESC_ALIGN		32
+#define SLIC_MIN_TX_WAKEUP_DESCS	10
+#define SLIC_MAX_REQ_TX_DESCS		1
+#define SLIC_MAX_TX_COMPLETIONS		100
+
+#define SLIC_NUM_STAT_DESCS		128
+#define SLIC_STATS_DESC_ALIGN		256
+
+#define SLIC_NUM_STAT_DESC_ARRAYS	4
+#define SLIC_INVALID_STAT_DESC_IDX	0xffffffff
+
+#define SLIC_NAPI_WEIGHT		64
+
+#define SLIC_UPR_LSTAT			0
+#define SLIC_UPR_CONFIG			1
+
+#define SLIC_EEPROM_SIZE		128
+#define SLIC_EEPROM_MAGIC		0xa5a5
+
+#define SLIC_FIRMWARE_MOJAVE		"slicoss/gbdownload.sys"
+#define SLIC_FIRMWARE_OASIS		"slicoss/oasisdownload.sys"
+#define SLIC_RCV_FIRMWARE_MOJAVE	"slicoss/gbrcvucode.sys"
+#define SLIC_RCV_FIRMWARE_OASIS		"slicoss/oasisrcvucode.sys"
+#define SLIC_FIRMWARE_MIN_SIZE		64
+#define SLIC_FIRMWARE_MAX_SECTIONS	3
+
+#define SLIC_MODEL_MOJAVE		0
+#define SLIC_MODEL_OASIS		1
+
+#define SLIC_INC_STATS_COUNTER(st, counter)	\
+do {						\
+	u64_stats_update_begin(&(st)->syncp);	\
+	(st)->counter++;			\
+	u64_stats_update_end(&(st)->syncp);	\
+} while (0)
+
+#define SLIC_GET_STATS_COUNTER(newst, st, counter)			\
+{									\
+	unsigned int start;						\
+	do {							\
+		start = u64_stats_fetch_begin_irq(&(st)->syncp);	\
+		newst = (st)->counter;					\
+	} while (u64_stats_fetch_retry_irq(&(st)->syncp, start));	\
+}
+
+struct slic_upr {
+	dma_addr_t paddr;
+	unsigned int type;
+	struct list_head list;
+};
+
+struct slic_upr_list {
+	bool pending;
+	struct list_head list;
+	/* upr list lock */
+	spinlock_t lock;
+};
+
+/* SLIC EEPROM structure for Mojave */
+struct slic_mojave_eeprom {
+	__le16 id;		/* 00 EEPROM/FLASH Magic code 'A5A5'*/
+	__le16 eeprom_code_size;/* 01 Size of EEPROM Codes (bytes * 4)*/
+	__le16 flash_size;	/* 02 Flash size */
+	__le16 eeprom_size;	/* 03 EEPROM Size */
+	__le16 vendor_id;	/* 04 Vendor ID */
+	__le16 dev_id;		/* 05 Device ID */
+	u8 rev_id;		/* 06 Revision ID */
+	u8 class_code[3];	/* 07 Class Code */
+	u8 irqpin_dbg;		/* 08 Debug Interrupt pin */
+	u8 irqpin;		/*    Network Interrupt Pin */
+	u8 min_grant;		/* 09 Minimum grant */
+	u8 max_lat;		/*    Maximum Latency */
+	__le16 pci_stat;	/* 10 PCI Status */
+	__le16 sub_vendor_id;	/* 11 Subsystem Vendor Id */
+	__le16 sub_id;		/* 12 Subsystem ID */
+	__le16 dev_id_dbg;	/* 13 Debug Device Id */
+	__le16 ramrom;		/* 14 Dram/Rom function */
+	__le16 dram_size2pci;	/* 15 DRAM size to PCI (bytes * 64K) */
+	__le16 rom_size2pci;	/* 16 ROM extension size to PCI (bytes * 4k) */
+	u8 pad[2];		/* 17 Padding */
+	u8 freetime;		/* 18 FreeTime setting */
+	u8 ifctrl;		/* 10-bit interface control (Mojave only) */
+	__le16 dram_size;	/* 19 DRAM size (bytes * 64k) */
+	u8 mac[ETH_ALEN];	/* 20 MAC addresses */
+	u8 mac2[ETH_ALEN];
+	u8 pad2[6];
+	u16 dev_id2;		/* Device ID for 2nd PCI function */
+	u8 irqpin2;		/* Interrupt pin for 2nd PCI function */
+	u8 class_code2[3];	/* Class Code for 2nd PCI function */
+	u16 cfg_byte6;		/* Config Byte 6 */
+	u16 pme_cap;		/* Power Mgment capabilities */
+	u16 nwclk_ctrl;		/* NetworkClockControls */
+	u8 fru_format;		/* Alacritech FRU format type */
+	u8 fru_assembly[6];	/* Alacritech FRU information */
+	u8 fru_rev[2];
+	u8 fru_serial[14];
+	u8 fru_pad[3];
+	u8 oem_fru[28];		/* optional OEM FRU format type */
+	u8 pad3[4];		/* Pad to 128 bytes - includes 2 cksum bytes
+				 * (if OEM FRU info exists) and two unusable
+				 * bytes at the end
+				 */
+};
+
+/* SLIC EEPROM structure for Oasis */
+struct slic_oasis_eeprom {
+	__le16 id;		/* 00 EEPROM/FLASH Magic code 'A5A5' */
+	__le16 eeprom_code_size;/* 01 Size of EEPROM Codes (bytes * 4)*/
+	__le16 spidev0_cfg;	/* 02 Flash Config for SPI device 0 */
+	__le16 spidev1_cfg;	/* 03 Flash Config for SPI device 1 */
+	__le16 vendor_id;	/* 04 Vendor ID */
+	__le16 dev_id;		/* 05 Device ID (function 0) */
+	u8 rev_id;		/* 06 Revision ID */
+	u8 class_code0[3];	/* 07 Class Code for PCI function 0 */
+	u8 irqpin1;		/* 08 Interrupt pin for PCI function 1*/
+	u8 class_code1[3];	/* 09 Class Code for PCI function 1 */
+	u8 irqpin2;		/* 10 Interrupt pin for PCI function 2*/
+	u8 irqpin0;		/*    Interrupt pin for PCI function 0*/
+	u8 min_grant;		/* 11 Minimum grant */
+	u8 max_lat;		/*    Maximum Latency */
+	__le16 sub_vendor_id;	/* 12 Subsystem Vendor Id */
+	__le16 sub_id;		/* 13 Subsystem ID */
+	__le16 flash_size;	/* 14 Flash size (bytes / 4K) */
+	__le16 dram_size2pci;	/* 15 DRAM size to PCI (bytes / 64K) */
+	__le16 rom_size2pci;	/* 16 Flash (ROM extension) size to PCI
+				 *   (bytes / 4K)
+				 */
+	__le16 dev_id1;		/* 17 Device Id (function 1) */
+	__le16 dev_id2;		/* 18 Device Id (function 2) */
+	__le16 dev_stat_cfg;	/* 19 Device Status Config Bytes 6-7 */
+	__le16 pme_cap;		/* 20 Power Mgment capabilities */
+	u8 msi_cap;		/* 21 MSI capabilities */
+	u8 clock_div;		/*    Clock divider */
+	__le16 pci_stat_lo;	/* 22 PCI Status bits 15:0 */
+	__le16 pci_stat_hi;	/* 23 PCI Status bits 31:16 */
+	__le16 dram_cfg_lo;	/* 24 DRAM Configuration bits 15:0 */
+	__le16 dram_cfg_hi;	/* 25 DRAM Configuration bits 31:16 */
+	__le16 dram_size;	/* 26 DRAM size (bytes / 64K) */
+	__le16 gpio_tbi_ctrl;	/* 27 GPIO/TBI controls for functions 1/0 */
+	__le16 eeprom_size;	/* 28 EEPROM Size */
+	u8 mac[ETH_ALEN];	/* 29 MAC addresses (2 ports) */
+	u8 mac2[ETH_ALEN];
+	u8 fru_format;		/* 35 Alacritech FRU format type */
+	u8 fru_assembly[6];	/* Alacritech FRU information */
+	u8 fru_rev[2];
+	u8 fru_serial[14];
+	u8 fru_pad[3];
+	u8 oem_fru[28];		/* optional OEM FRU information */
+	u8 pad[4];		/* Pad to 128 bytes - includes 2 checksum bytes
+				 * (if OEM FRU info exists) and two unusable
+				 * bytes at the end
+				 */
+};
+
+struct slic_stats {
+	u64 rx_packets;
+	u64 rx_bytes;
+	u64 rx_mcasts;
+	u64 rx_errors;
+	u64 tx_packets;
+	u64 tx_bytes;
+	/* HW STATS */
+	u64 rx_buff_miss;
+	u64 tx_dropped;
+	u64 irq_errs;
+	/* transport layer */
+	u64 rx_tpcsum;
+	u64 rx_tpoflow;
+	u64 rx_tphlen;
+	/* ip layer */
+	u64 rx_ipcsum;
+	u64 rx_iplen;
+	u64 rx_iphlen;
+	/* link layer */
+	u64 rx_early;
+	u64 rx_buffoflow;
+	u64 rx_lcode;
+	u64 rx_drbl;
+	u64 rx_crc;
+	u64 rx_oflow802;
+	u64 rx_uflow802;
+	/* oasis only */
+	u64 tx_carrier;
+	struct u64_stats_sync syncp;
+};
+
+struct slic_shmem_data {
+	__le32 isr;
+	__le32 link;
+};
+
+struct slic_shmem {
+	dma_addr_t isr_paddr;
+	dma_addr_t link_paddr;
+	struct slic_shmem_data *shmem_data;
+};
+
+struct slic_rx_info_oasis {
+	__le32 frame_status;
+	__le32 frame_status_b;
+	__le32 time_stamp;
+	__le32 checksum;
+};
+
+struct slic_rx_info_mojave {
+	__le32 frame_status;
+	__le16 byte_cnt;
+	__le16 tp_chksum;
+	__le16 ctx_hash;
+	__le16 mac_hash;
+	__le16 buff_lnk;
+};
+
+struct slic_stat_desc {
+	__le32 hnd;
+	__u8 pad[8];
+	__le32 status;
+	__u8 pad2[16];
+};
+
+struct slic_stat_queue {
+	struct slic_stat_desc *descs[SLIC_NUM_STAT_DESC_ARRAYS];
+	dma_addr_t paddr[SLIC_NUM_STAT_DESC_ARRAYS];
+	unsigned int addr_offset[SLIC_NUM_STAT_DESC_ARRAYS];
+	unsigned int active_array;
+	unsigned int len;
+	unsigned int done_idx;
+	size_t mem_size;
+};
+
+struct slic_tx_desc {
+	__le32 hnd;
+	__le32 rsvd;
+	u8 cmd;
+	u8 flags;
+	__le16 rsvd2;
+	__le32 totlen;
+	__le32 paddrl;
+	__le32 paddrh;
+	__le32 len;
+	__le32 type;
+};
+
+struct slic_tx_buffer {
+	struct sk_buff *skb;
+	DEFINE_DMA_UNMAP_ADDR(map_addr);
+	DEFINE_DMA_UNMAP_LEN(map_len);
+	struct slic_tx_desc *desc;
+	dma_addr_t desc_paddr;
+};
+
+struct slic_tx_queue {
+	struct dma_pool *dma_pool;
+	struct slic_tx_buffer *txbuffs;
+	unsigned int len;
+	unsigned int put_idx;
+	unsigned int done_idx;
+};
+
+struct slic_rx_desc {
+	u8 pad[16];
+	__le32 buffer;
+	__le32 length;
+	__le32 status;
+};
+
+struct slic_rx_buffer {
+	struct sk_buff *skb;
+	DEFINE_DMA_UNMAP_ADDR(map_addr);
+	DEFINE_DMA_UNMAP_LEN(map_len);
+	unsigned int addr_offset;
+};
+
+struct slic_rx_queue {
+	struct slic_rx_buffer *rxbuffs;
+	unsigned int len;
+	unsigned int done_idx;
+	unsigned int put_idx;
+};
+
+struct slic_device {
+	struct pci_dev *pdev;
+	struct net_device *netdev;
+	void __iomem *regs;
+	/* upper address setting lock */
+	spinlock_t upper_lock;
+	struct slic_shmem shmem;
+	struct napi_struct napi;
+	struct slic_rx_queue rxq;
+	struct slic_tx_queue txq;
+	struct slic_stat_queue stq;
+	struct slic_stats stats;
+	struct slic_upr_list upr_list;
+	/* link configuration lock */
+	spinlock_t link_lock;
+	bool promisc;
+	int speed;
+	unsigned int duplex;
+	bool is_fiber;
+	unsigned char model;
+};
+
+static inline u32 slic_read(struct slic_device *sdev, unsigned int reg)
+{
+	return ioread32(sdev->regs + reg);
+}
+
+static inline void slic_write(struct slic_device *sdev, unsigned int reg,
+			      u32 val)
+{
+	iowrite32(val, sdev->regs + reg);
+}
+
+static inline void slic_flush_write(struct slic_device *sdev)
+{
+	(void)ioread32(sdev->regs + SLIC_REG_HOSTID);
+}
+
+#endif /* _SLIC_H */
diff --git a/drivers/net/ethernet/alacritech/slicoss.c b/drivers/net/ethernet/alacritech/slicoss.c
new file mode 100644
index 000000000000..b9fbd0107008
--- /dev/null
+++ b/drivers/net/ethernet/alacritech/slicoss.c
@@ -0,0 +1,1871 @@
+/*
+ * Driver for Gigabit Ethernet adapters based on the Session Layer
+ * Interface (SLIC) technology by Alacritech. The driver does not
+ * support the hardware acceleration features provided by these cards.
+ *
+ * Copyright (C) 2016 Lino Sanfilippo <LinoSanfilippo@gmx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/crc32.h>
+#include <linux/dma-mapping.h>
+#include <linux/ethtool.h>
+#include <linux/mii.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/list.h>
+#include <linux/u64_stats_sync.h>
+
+#include "slic.h"
+
+#define DRV_NAME			"slicoss"
+#define DRV_VERSION			"1.0"
+
+static const struct pci_device_id slic_id_tbl[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_ALACRITECH,
+		     PCI_DEVICE_ID_ALACRITECH_MOJAVE) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_ALACRITECH,
+		     PCI_DEVICE_ID_ALACRITECH_OASIS) },
+	{ 0 }
+};
+
+static const char slic_stats_strings[][ETH_GSTRING_LEN] = {
+	"rx_packets",
+	"rx_bytes",
+	"rx_multicasts",
+	"rx_errors",
+	"rx_buff_miss",
+	"rx_tp_csum",
+	"rx_tp_oflow",
+	"rx_tp_hlen",
+	"rx_ip_csum",
+	"rx_ip_len",
+	"rx_ip_hdr_len",
+	"rx_early",
+	"rx_buff_oflow",
+	"rx_lcode",
+	"rx_drbl",
+	"rx_crc",
+	"rx_oflow_802",
+	"rx_uflow_802",
+	"tx_packets",
+	"tx_bytes",
+	"tx_carrier",
+	"tx_dropped",
+	"irq_errs",
+};
+
+static inline int slic_next_queue_idx(unsigned int idx, unsigned int qlen)
+{
+	return (idx + 1) & (qlen - 1);
+}
+
+static inline int slic_get_free_queue_descs(unsigned int put_idx,
+					    unsigned int done_idx,
+					    unsigned int qlen)
+{
+	if (put_idx >= done_idx)
+		return (qlen - (put_idx - done_idx) - 1);
+	return (done_idx - put_idx - 1);
+}
+
+static unsigned int slic_next_compl_idx(struct slic_device *sdev)
+{
+	struct slic_stat_queue *stq = &sdev->stq;
+	unsigned int active = stq->active_array;
+	struct slic_stat_desc *descs;
+	struct slic_stat_desc *stat;
+	unsigned int idx;
+
+	descs = stq->descs[active];
+	stat = &descs[stq->done_idx];
+
+	if (!stat->status)
+		return SLIC_INVALID_STAT_DESC_IDX;
+
+	idx = (le32_to_cpu(stat->hnd) & 0xffff) - 1;
+	/* reset desc */
+	stat->hnd = 0;
+	stat->status = 0;
+
+	stq->done_idx = slic_next_queue_idx(stq->done_idx, stq->len);
+	/* check for wraparound */
+	if (!stq->done_idx) {
+		dma_addr_t paddr = stq->paddr[active];
+
+		slic_write(sdev, SLIC_REG_RBAR, lower_32_bits(paddr) |
+						stq->len);
+		/* make sure new status descriptors are immediately available */
+		slic_flush_write(sdev);
+		active++;
+		active &= (SLIC_NUM_STAT_DESC_ARRAYS - 1);
+		stq->active_array = active;
+	}
+	return idx;
+}
+
+static unsigned int slic_get_free_tx_descs(struct slic_tx_queue *txq)
+{
+	/* ensure tail idx is updated */
+	smp_mb();
+	return slic_get_free_queue_descs(txq->put_idx, txq->done_idx, txq->len);
+}
+
+static unsigned int slic_get_free_rx_descs(struct slic_rx_queue *rxq)
+{
+	return slic_get_free_queue_descs(rxq->put_idx, rxq->done_idx, rxq->len);
+}
+
+static void slic_clear_upr_list(struct slic_upr_list *upr_list)
+{
+	struct slic_upr *upr;
+	struct slic_upr *tmp;
+
+	spin_lock_bh(&upr_list->lock);
+	list_for_each_entry_safe(upr, tmp, &upr_list->list, list) {
+		list_del(&upr->list);
+		kfree(upr);
+	}
+	upr_list->pending = false;
+	spin_unlock_bh(&upr_list->lock);
+}
+
+static void slic_start_upr(struct slic_device *sdev, struct slic_upr *upr)
+{
+	u32 reg;
+
+	reg = (upr->type == SLIC_UPR_CONFIG) ? SLIC_REG_RCONFIG :
+					       SLIC_REG_LSTAT;
+	slic_write(sdev, reg, lower_32_bits(upr->paddr));
+	slic_flush_write(sdev);
+}
+
+static void slic_queue_upr(struct slic_device *sdev, struct slic_upr *upr)
+{
+	struct slic_upr_list *upr_list = &sdev->upr_list;
+	bool pending;
+
+	spin_lock_bh(&upr_list->lock);
+	pending = upr_list->pending;
+	INIT_LIST_HEAD(&upr->list);
+	list_add_tail(&upr->list, &upr_list->list);
+	upr_list->pending = true;
+	spin_unlock_bh(&upr_list->lock);
+
+	if (!pending)
+		slic_start_upr(sdev, upr);
+}
+
+static struct slic_upr *slic_dequeue_upr(struct slic_device *sdev)
+{
+	struct slic_upr_list *upr_list = &sdev->upr_list;
+	struct slic_upr *next_upr = NULL;
+	struct slic_upr *upr = NULL;
+
+	spin_lock_bh(&upr_list->lock);
+	if (!list_empty(&upr_list->list)) {
+		upr = list_first_entry(&upr_list->list, struct slic_upr, list);
+		list_del(&upr->list);
+
+		if (list_empty(&upr_list->list))
+			upr_list->pending = false;
+		else
+			next_upr = list_first_entry(&upr_list->list,
+						    struct slic_upr, list);
+	}
+	spin_unlock_bh(&upr_list->lock);
+	/* trigger processing of the next upr in list */
+	if (next_upr)
+		slic_start_upr(sdev, next_upr);
+
+	return upr;
+}
+
+static int slic_new_upr(struct slic_device *sdev, unsigned int type,
+			dma_addr_t paddr)
+{
+	struct slic_upr *upr;
+
+	upr = kmalloc(sizeof(*upr), GFP_ATOMIC);
+	if (!upr)
+		return -ENOMEM;
+	upr->type = type;
+	upr->paddr = paddr;
+
+	slic_queue_upr(sdev, upr);
+
+	return 0;
+}
+
+static void slic_set_mcast_bit(u64 *mcmask, unsigned char const *addr)
+{
+	u64 mask = *mcmask;
+	u8 crc;
+	/* Get the CRC polynomial for the mac address: we use bits 1-8 (lsb),
+	 * bitwise reversed, msb (= lsb bit 0 before bitrev) is automatically
+	 * discarded.
+	 */
+	crc = ether_crc(ETH_ALEN, addr) >> 23;
+	 /* we only have space on the SLIC for 64 entries */
+	crc &= 0x3F;
+	mask |= (u64)1 << crc;
+	*mcmask = mask;
+}
+
+/* must be called with link_lock held */
+static void slic_configure_rcv(struct slic_device *sdev)
+{
+	u32 val;
+
+	val = SLIC_GRCR_RESET | SLIC_GRCR_ADDRAEN | SLIC_GRCR_RCVEN |
+	      SLIC_GRCR_HASHSIZE << SLIC_GRCR_HASHSIZE_SHIFT | SLIC_GRCR_RCVBAD;
+
+	if (sdev->duplex == DUPLEX_FULL)
+		val |= SLIC_GRCR_CTLEN;
+
+	if (sdev->promisc)
+		val |= SLIC_GRCR_RCVALL;
+
+	slic_write(sdev, SLIC_REG_WRCFG, val);
+}
+
+/* must be called with link_lock held */
+static void slic_configure_xmt(struct slic_device *sdev)
+{
+	u32 val;
+
+	val = SLIC_GXCR_RESET | SLIC_GXCR_XMTEN;
+
+	if (sdev->duplex == DUPLEX_FULL)
+		val |= SLIC_GXCR_PAUSEEN;
+
+	slic_write(sdev, SLIC_REG_WXCFG, val);
+}
+
+/* must be called with link_lock held */
+static void slic_configure_mac(struct slic_device *sdev)
+{
+	u32 val;
+
+	if (sdev->speed == SPEED_1000) {
+		val = SLIC_GMCR_GAPBB_1000 << SLIC_GMCR_GAPBB_SHIFT |
+		      SLIC_GMCR_GAPR1_1000 << SLIC_GMCR_GAPR1_SHIFT |
+		      SLIC_GMCR_GAPR2_1000 << SLIC_GMCR_GAPR2_SHIFT |
+		      SLIC_GMCR_GBIT; /* enable GMII */
+	} else {
+		val = SLIC_GMCR_GAPBB_100 << SLIC_GMCR_GAPBB_SHIFT |
+		      SLIC_GMCR_GAPR1_100 << SLIC_GMCR_GAPR1_SHIFT |
+		      SLIC_GMCR_GAPR2_100 << SLIC_GMCR_GAPR2_SHIFT;
+	}
+
+	if (sdev->duplex == DUPLEX_FULL)
+		val |= SLIC_GMCR_FULLD;
+
+	slic_write(sdev, SLIC_REG_WMCFG, val);
+}
+
+static void slic_configure_link_locked(struct slic_device *sdev, int speed,
+				       unsigned int duplex)
+{
+	struct net_device *dev = sdev->netdev;
+
+	if (sdev->speed == speed && sdev->duplex == duplex)
+		return;
+
+	sdev->speed = speed;
+	sdev->duplex = duplex;
+
+	if (sdev->speed == SPEED_UNKNOWN) {
+		if (netif_carrier_ok(dev))
+			netif_carrier_off(dev);
+	} else {
+		/* (re)configure link settings */
+		slic_configure_mac(sdev);
+		slic_configure_xmt(sdev);
+		slic_configure_rcv(sdev);
+		slic_flush_write(sdev);
+
+		if (!netif_carrier_ok(dev))
+			netif_carrier_on(dev);
+	}
+}
+
+static void slic_configure_link(struct slic_device *sdev, int speed,
+				unsigned int duplex)
+{
+	spin_lock_bh(&sdev->link_lock);
+	slic_configure_link_locked(sdev, speed, duplex);
+	spin_unlock_bh(&sdev->link_lock);
+}
+
+static void slic_set_rx_mode(struct net_device *dev)
+{
+	struct slic_device *sdev = netdev_priv(dev);
+	struct netdev_hw_addr *hwaddr;
+	bool set_promisc;
+	u64 mcmask;
+
+	if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) {
+		/* Turn on all multicast addresses. We have to do this for
+		 * promiscuous mode as well as ALLMCAST mode (it saves the
+		 * microcode from having to keep state about the MAC
+		 * configuration).
+		 */
+		mcmask = ~(u64)0;
+	} else  {
+		mcmask = 0;
+
+		netdev_for_each_mc_addr(hwaddr, dev) {
+			slic_set_mcast_bit(&mcmask, hwaddr->addr);
+		}
+	}
+
+	slic_write(sdev, SLIC_REG_MCASTLOW, lower_32_bits(mcmask));
+	slic_write(sdev, SLIC_REG_MCASTHIGH, upper_32_bits(mcmask));
+
+	set_promisc = !!(dev->flags & IFF_PROMISC);
+
+	spin_lock_bh(&sdev->link_lock);
+	if (sdev->promisc != set_promisc) {
+		sdev->promisc = set_promisc;
+		slic_configure_rcv(sdev);
+		/* make sure writes to receiver cant leak out of the lock */
+		mmiowb();
+	}
+	spin_unlock_bh(&sdev->link_lock);
+}
+
+static void slic_xmit_complete(struct slic_device *sdev)
+{
+	struct slic_tx_queue *txq = &sdev->txq;
+	struct net_device *dev = sdev->netdev;
+	unsigned int idx = txq->done_idx;
+	struct slic_tx_buffer *buff;
+	unsigned int frames = 0;
+	unsigned int bytes = 0;
+
+	/* Limit processing to SLIC_MAX_TX_COMPLETIONS frames to avoid that new
+	 * completions during processing keeps the loop running endlessly.
+	 */
+	do {
+		idx = slic_next_compl_idx(sdev);
+		if (idx == SLIC_INVALID_STAT_DESC_IDX)
+			break;
+
+		txq->done_idx = idx;
+		buff = &txq->txbuffs[idx];
+
+		if (unlikely(!buff->skb)) {
+			netdev_warn(dev,
+				    "no skb found for desc idx %i\n", idx);
+			continue;
+		}
+		dma_unmap_single(&sdev->pdev->dev,
+				 dma_unmap_addr(buff, map_addr),
+				 dma_unmap_len(buff, map_len), DMA_TO_DEVICE);
+
+		bytes += buff->skb->len;
+		frames++;
+
+		dev_kfree_skb_any(buff->skb);
+		buff->skb = NULL;
+	} while (frames < SLIC_MAX_TX_COMPLETIONS);
+	/* make sure xmit sees the new value for done_idx */
+	smp_wmb();
+
+	u64_stats_update_begin(&sdev->stats.syncp);
+	sdev->stats.tx_bytes += bytes;
+	sdev->stats.tx_packets += frames;
+	u64_stats_update_end(&sdev->stats.syncp);
+
+	netif_tx_lock(dev);
+	if (netif_queue_stopped(dev) &&
+	    (slic_get_free_tx_descs(txq) >= SLIC_MIN_TX_WAKEUP_DESCS))
+		netif_wake_queue(dev);
+	netif_tx_unlock(dev);
+}
+
+static void slic_refill_rx_queue(struct slic_device *sdev, gfp_t gfp)
+{
+	const unsigned int ALIGN_MASK = SLIC_RX_BUFF_ALIGN - 1;
+	unsigned int maplen = SLIC_RX_BUFF_SIZE;
+	struct slic_rx_queue *rxq = &sdev->rxq;
+	struct net_device *dev = sdev->netdev;
+	struct slic_rx_buffer *buff;
+	struct slic_rx_desc *desc;
+	unsigned int misalign;
+	unsigned int offset;
+	struct sk_buff *skb;
+	dma_addr_t paddr;
+
+	while (slic_get_free_rx_descs(rxq) > SLIC_MAX_REQ_RX_DESCS) {
+		skb = alloc_skb(maplen + ALIGN_MASK, gfp);
+		if (!skb)
+			break;
+
+		paddr = dma_map_single(&sdev->pdev->dev, skb->data, maplen,
+				       DMA_FROM_DEVICE);
+		if (dma_mapping_error(&sdev->pdev->dev, paddr)) {
+			netdev_err(dev, "mapping rx packet failed\n");
+			/* drop skb */
+			dev_kfree_skb_any(skb);
+			break;
+		}
+		/* ensure head buffer descriptors are 256 byte aligned */
+		offset = 0;
+		misalign = paddr & ALIGN_MASK;
+		if (misalign) {
+			offset = SLIC_RX_BUFF_ALIGN - misalign;
+			skb_reserve(skb, offset);
+		}
+		/* the HW expects dma chunks for descriptor + frame data */
+		desc = (struct slic_rx_desc *)skb->data;
+		/* temporarily sync descriptor for CPU to clear status */
+		dma_sync_single_for_cpu(&sdev->pdev->dev, paddr,
+					offset + sizeof(*desc),
+					DMA_FROM_DEVICE);
+		desc->status = 0;
+		/* return it to HW again */
+		dma_sync_single_for_device(&sdev->pdev->dev, paddr,
+					   offset + sizeof(*desc),
+					   DMA_FROM_DEVICE);
+
+		buff = &rxq->rxbuffs[rxq->put_idx];
+		buff->skb = skb;
+		dma_unmap_addr_set(buff, map_addr, paddr);
+		dma_unmap_len_set(buff, map_len, maplen);
+		buff->addr_offset = offset;
+		/* complete write to descriptor before it is handed to HW */
+		wmb();
+		/* head buffer descriptors are placed immediately before skb */
+		slic_write(sdev, SLIC_REG_HBAR, lower_32_bits(paddr) + offset);
+		rxq->put_idx = slic_next_queue_idx(rxq->put_idx, rxq->len);
+	}
+}
+
+static void slic_handle_frame_error(struct slic_device *sdev,
+				    struct sk_buff *skb)
+{
+	struct slic_stats *stats = &sdev->stats;
+
+	if (sdev->model == SLIC_MODEL_OASIS) {
+		struct slic_rx_info_oasis *info;
+		u32 status_b;
+		u32 status;
+
+		info = (struct slic_rx_info_oasis *)skb->data;
+		status = le32_to_cpu(info->frame_status);
+		status_b = le32_to_cpu(info->frame_status_b);
+		/* transport layer */
+		if (status_b & SLIC_VRHSTATB_TPCSUM)
+			SLIC_INC_STATS_COUNTER(stats, rx_tpcsum);
+		if (status & SLIC_VRHSTAT_TPOFLO)
+			SLIC_INC_STATS_COUNTER(stats, rx_tpoflow);
+		if (status_b & SLIC_VRHSTATB_TPHLEN)
+			SLIC_INC_STATS_COUNTER(stats, rx_tphlen);
+		/* ip layer */
+		if (status_b & SLIC_VRHSTATB_IPCSUM)
+			SLIC_INC_STATS_COUNTER(stats, rx_ipcsum);
+		if (status_b & SLIC_VRHSTATB_IPLERR)
+			SLIC_INC_STATS_COUNTER(stats, rx_iplen);
+		if (status_b & SLIC_VRHSTATB_IPHERR)
+			SLIC_INC_STATS_COUNTER(stats, rx_iphlen);
+		/* link layer */
+		if (status_b & SLIC_VRHSTATB_RCVE)
+			SLIC_INC_STATS_COUNTER(stats, rx_early);
+		if (status_b & SLIC_VRHSTATB_BUFF)
+			SLIC_INC_STATS_COUNTER(stats, rx_buffoflow);
+		if (status_b & SLIC_VRHSTATB_CODE)
+			SLIC_INC_STATS_COUNTER(stats, rx_lcode);
+		if (status_b & SLIC_VRHSTATB_DRBL)
+			SLIC_INC_STATS_COUNTER(stats, rx_drbl);
+		if (status_b & SLIC_VRHSTATB_CRC)
+			SLIC_INC_STATS_COUNTER(stats, rx_crc);
+		if (status & SLIC_VRHSTAT_802OE)
+			SLIC_INC_STATS_COUNTER(stats, rx_oflow802);
+		if (status_b & SLIC_VRHSTATB_802UE)
+			SLIC_INC_STATS_COUNTER(stats, rx_uflow802);
+		if (status_b & SLIC_VRHSTATB_CARRE)
+			SLIC_INC_STATS_COUNTER(stats, tx_carrier);
+	} else { /* mojave */
+		struct slic_rx_info_mojave *info;
+		u32 status;
+
+		info = (struct slic_rx_info_mojave *)skb->data;
+		status = le32_to_cpu(info->frame_status);
+		/* transport layer */
+		if (status & SLIC_VGBSTAT_XPERR) {
+			u32 xerr = status >> SLIC_VGBSTAT_XERRSHFT;
+
+			if (xerr == SLIC_VGBSTAT_XCSERR)
+				SLIC_INC_STATS_COUNTER(stats, rx_tpcsum);
+			if (xerr == SLIC_VGBSTAT_XUFLOW)
+				SLIC_INC_STATS_COUNTER(stats, rx_tpoflow);
+			if (xerr == SLIC_VGBSTAT_XHLEN)
+				SLIC_INC_STATS_COUNTER(stats, rx_tphlen);
+		}
+		/* ip layer */
+		if (status & SLIC_VGBSTAT_NETERR) {
+			u32 nerr = status >> SLIC_VGBSTAT_NERRSHFT &
+				   SLIC_VGBSTAT_NERRMSK;
+
+			if (nerr == SLIC_VGBSTAT_NCSERR)
+				SLIC_INC_STATS_COUNTER(stats, rx_ipcsum);
+			if (nerr == SLIC_VGBSTAT_NUFLOW)
+				SLIC_INC_STATS_COUNTER(stats, rx_iplen);
+			if (nerr == SLIC_VGBSTAT_NHLEN)
+				SLIC_INC_STATS_COUNTER(stats, rx_iphlen);
+		}
+		/* link layer */
+		if (status & SLIC_VGBSTAT_LNKERR) {
+			u32 lerr = status & SLIC_VGBSTAT_LERRMSK;
+
+			if (lerr == SLIC_VGBSTAT_LDEARLY)
+				SLIC_INC_STATS_COUNTER(stats, rx_early);
+			if (lerr == SLIC_VGBSTAT_LBOFLO)
+				SLIC_INC_STATS_COUNTER(stats, rx_buffoflow);
+			if (lerr == SLIC_VGBSTAT_LCODERR)
+				SLIC_INC_STATS_COUNTER(stats, rx_lcode);
+			if (lerr == SLIC_VGBSTAT_LDBLNBL)
+				SLIC_INC_STATS_COUNTER(stats, rx_drbl);
+			if (lerr == SLIC_VGBSTAT_LCRCERR)
+				SLIC_INC_STATS_COUNTER(stats, rx_crc);
+			if (lerr == SLIC_VGBSTAT_LOFLO)
+				SLIC_INC_STATS_COUNTER(stats, rx_oflow802);
+			if (lerr == SLIC_VGBSTAT_LUFLO)
+				SLIC_INC_STATS_COUNTER(stats, rx_uflow802);
+		}
+	}
+	SLIC_INC_STATS_COUNTER(stats, rx_errors);
+}
+
+static void slic_handle_receive(struct slic_device *sdev, unsigned int todo,
+				unsigned int *done)
+{
+	struct slic_rx_queue *rxq = &sdev->rxq;
+	struct net_device *dev = sdev->netdev;
+	struct slic_rx_buffer *buff;
+	struct slic_rx_desc *desc;
+	unsigned int frames = 0;
+	unsigned int bytes = 0;
+	struct sk_buff *skb;
+	u32 status;
+	u32 len;
+
+	while (todo && (rxq->done_idx != rxq->put_idx)) {
+		buff = &rxq->rxbuffs[rxq->done_idx];
+
+		skb = buff->skb;
+		if (!skb)
+			break;
+
+		desc = (struct slic_rx_desc *)skb->data;
+
+		dma_sync_single_for_cpu(&sdev->pdev->dev,
+					dma_unmap_addr(buff, map_addr),
+					buff->addr_offset + sizeof(*desc),
+					DMA_FROM_DEVICE);
+
+		status = le32_to_cpu(desc->status);
+		if (!(status & SLIC_IRHDDR_SVALID)) {
+			dma_sync_single_for_device(&sdev->pdev->dev,
+						   dma_unmap_addr(buff,
+								  map_addr),
+						   buff->addr_offset +
+						   sizeof(*desc),
+						   DMA_FROM_DEVICE);
+			break;
+		}
+
+		buff->skb = NULL;
+
+		dma_unmap_single(&sdev->pdev->dev,
+				 dma_unmap_addr(buff, map_addr),
+				 dma_unmap_len(buff, map_len),
+				 DMA_FROM_DEVICE);
+
+		/* skip rx descriptor that is placed before the frame data */
+		skb_reserve(skb, SLIC_RX_BUFF_HDR_SIZE);
+
+		if (unlikely(status & SLIC_IRHDDR_ERR)) {
+			slic_handle_frame_error(sdev, skb);
+			dev_kfree_skb_any(skb);
+		} else {
+			struct ethhdr *eh = (struct ethhdr *)skb->data;
+
+			if (is_multicast_ether_addr(eh->h_dest))
+				SLIC_INC_STATS_COUNTER(&sdev->stats, rx_mcasts);
+
+			len = le32_to_cpu(desc->length) & SLIC_IRHDDR_FLEN_MSK;
+			skb_put(skb, len);
+			skb->protocol = eth_type_trans(skb, dev);
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+			napi_gro_receive(&sdev->napi, skb);
+
+			bytes += len;
+			frames++;
+		}
+		rxq->done_idx = slic_next_queue_idx(rxq->done_idx, rxq->len);
+		todo--;
+	}
+
+	u64_stats_update_begin(&sdev->stats.syncp);
+	sdev->stats.rx_bytes += bytes;
+	sdev->stats.rx_packets += frames;
+	u64_stats_update_end(&sdev->stats.syncp);
+
+	slic_refill_rx_queue(sdev, GFP_ATOMIC);
+}
+
+static void slic_handle_link_irq(struct slic_device *sdev)
+{
+	struct slic_shmem *sm = &sdev->shmem;
+	struct slic_shmem_data *sm_data = sm->shmem_data;
+	unsigned int duplex;
+	int speed;
+	u32 link;
+
+	link = le32_to_cpu(sm_data->link);
+
+	if (link & SLIC_GIG_LINKUP) {
+		if (link & SLIC_GIG_SPEED_1000)
+			speed = SPEED_1000;
+		else if (link & SLIC_GIG_SPEED_100)
+			speed = SPEED_100;
+		else
+			speed = SPEED_10;
+
+		duplex = (link & SLIC_GIG_FULLDUPLEX) ? DUPLEX_FULL :
+							DUPLEX_HALF;
+	} else {
+		duplex = DUPLEX_UNKNOWN;
+		speed = SPEED_UNKNOWN;
+	}
+	slic_configure_link(sdev, speed, duplex);
+}
+
+static void slic_handle_upr_irq(struct slic_device *sdev, u32 irqs)
+{
+	struct slic_upr *upr;
+
+	/* remove upr that caused this irq (always the first entry in list) */
+	upr = slic_dequeue_upr(sdev);
+	if (!upr) {
+		netdev_warn(sdev->netdev, "no upr found on list\n");
+		return;
+	}
+
+	if (upr->type == SLIC_UPR_LSTAT) {
+		if (unlikely(irqs & SLIC_ISR_UPCERR_MASK)) {
+			/* try again */
+			slic_queue_upr(sdev, upr);
+			return;
+		}
+		slic_handle_link_irq(sdev);
+	}
+	kfree(upr);
+}
+
+static int slic_handle_link_change(struct slic_device *sdev)
+{
+	return slic_new_upr(sdev, SLIC_UPR_LSTAT, sdev->shmem.link_paddr);
+}
+
+static void slic_handle_err_irq(struct slic_device *sdev, u32 isr)
+{
+	struct slic_stats *stats = &sdev->stats;
+
+	if (isr & SLIC_ISR_RMISS)
+		SLIC_INC_STATS_COUNTER(stats, rx_buff_miss);
+	if (isr & SLIC_ISR_XDROP)
+		SLIC_INC_STATS_COUNTER(stats, tx_dropped);
+	if (!(isr & (SLIC_ISR_RMISS | SLIC_ISR_XDROP)))
+		SLIC_INC_STATS_COUNTER(stats, irq_errs);
+}
+
+static void slic_handle_irq(struct slic_device *sdev, u32 isr,
+			    unsigned int todo, unsigned int *done)
+{
+	if (isr & SLIC_ISR_ERR)
+		slic_handle_err_irq(sdev, isr);
+
+	if (isr & SLIC_ISR_LEVENT)
+		slic_handle_link_change(sdev);
+
+	if (isr & SLIC_ISR_UPC_MASK)
+		slic_handle_upr_irq(sdev, isr);
+
+	if (isr & SLIC_ISR_RCV)
+		slic_handle_receive(sdev, todo, done);
+
+	if (isr & SLIC_ISR_CMD)
+		slic_xmit_complete(sdev);
+}
+
+static int slic_poll(struct napi_struct *napi, int todo)
+{
+	struct slic_device *sdev = container_of(napi, struct slic_device, napi);
+	struct slic_shmem *sm = &sdev->shmem;
+	struct slic_shmem_data *sm_data = sm->shmem_data;
+	u32 isr = le32_to_cpu(sm_data->isr);
+	int done = 0;
+
+	slic_handle_irq(sdev, isr, todo, &done);
+
+	if (done < todo) {
+		napi_complete_done(napi, done);
+		/* reenable irqs */
+		sm_data->isr = 0;
+		/* make sure sm_data->isr is cleard before irqs are reenabled */
+		wmb();
+		slic_write(sdev, SLIC_REG_ISR, 0);
+		slic_flush_write(sdev);
+	}
+
+	return done;
+}
+
+static irqreturn_t slic_irq(int irq, void *dev_id)
+{
+	struct slic_device *sdev = dev_id;
+	struct slic_shmem *sm = &sdev->shmem;
+	struct slic_shmem_data *sm_data = sm->shmem_data;
+
+	slic_write(sdev, SLIC_REG_ICR, SLIC_ICR_INT_MASK);
+	slic_flush_write(sdev);
+	/* make sure sm_data->isr is read after ICR_INT_MASK is set */
+	wmb();
+
+	if (!sm_data->isr) {
+		dma_rmb();
+		/* spurious interrupt */
+		slic_write(sdev, SLIC_REG_ISR, 0);
+		slic_flush_write(sdev);
+		return IRQ_NONE;
+	}
+
+	napi_schedule_irqoff(&sdev->napi);
+
+	return IRQ_HANDLED;
+}
+
+static void slic_card_reset(struct slic_device *sdev)
+{
+	u16 cmd;
+
+	slic_write(sdev, SLIC_REG_RESET, SLIC_RESET_MAGIC);
+	/* flush write by means of config space */
+	pci_read_config_word(sdev->pdev, PCI_COMMAND, &cmd);
+	mdelay(1);
+}
+
+static int slic_init_stat_queue(struct slic_device *sdev)
+{
+	const unsigned int DESC_ALIGN_MASK = SLIC_STATS_DESC_ALIGN - 1;
+	struct slic_stat_queue *stq = &sdev->stq;
+	struct slic_stat_desc *descs;
+	unsigned int misalign;
+	unsigned int offset;
+	dma_addr_t paddr;
+	size_t size;
+	int err;
+	int i;
+
+	stq->len = SLIC_NUM_STAT_DESCS;
+	stq->active_array = 0;
+	stq->done_idx = 0;
+
+	size = stq->len * sizeof(*descs) + DESC_ALIGN_MASK;
+
+	for (i = 0; i < SLIC_NUM_STAT_DESC_ARRAYS; i++) {
+		descs = dma_zalloc_coherent(&sdev->pdev->dev, size, &paddr,
+					    GFP_KERNEL);
+		if (!descs) {
+			netdev_err(sdev->netdev,
+				   "failed to allocate status descriptors\n");
+			err = -ENOMEM;
+			goto free_descs;
+		}
+		/* ensure correct alignment */
+		offset = 0;
+		misalign = paddr & DESC_ALIGN_MASK;
+		if (misalign) {
+			offset = SLIC_STATS_DESC_ALIGN - misalign;
+			descs += offset;
+			paddr += offset;
+		}
+
+		slic_write(sdev, SLIC_REG_RBAR, lower_32_bits(paddr) |
+						stq->len);
+		stq->descs[i] = descs;
+		stq->paddr[i] = paddr;
+		stq->addr_offset[i] = offset;
+	}
+
+	stq->mem_size = size;
+
+	return 0;
+
+free_descs:
+	while (i--) {
+		dma_free_coherent(&sdev->pdev->dev, stq->mem_size,
+				  stq->descs[i] - stq->addr_offset[i],
+				  stq->paddr[i] - stq->addr_offset[i]);
+	}
+
+	return err;
+}
+
+static void slic_free_stat_queue(struct slic_device *sdev)
+{
+	struct slic_stat_queue *stq = &sdev->stq;
+	int i;
+
+	for (i = 0; i < SLIC_NUM_STAT_DESC_ARRAYS; i++) {
+		dma_free_coherent(&sdev->pdev->dev, stq->mem_size,
+				  stq->descs[i] - stq->addr_offset[i],
+				  stq->paddr[i] - stq->addr_offset[i]);
+	}
+}
+
+static int slic_init_tx_queue(struct slic_device *sdev)
+{
+	struct slic_tx_queue *txq = &sdev->txq;
+	struct slic_tx_buffer *buff;
+	struct slic_tx_desc *desc;
+	unsigned int i;
+	int err;
+
+	txq->len = SLIC_NUM_TX_DESCS;
+	txq->put_idx = 0;
+	txq->done_idx = 0;
+
+	txq->txbuffs = kcalloc(txq->len, sizeof(*buff), GFP_KERNEL);
+	if (!txq->txbuffs)
+		return -ENOMEM;
+
+	txq->dma_pool = dma_pool_create("slic_pool", &sdev->pdev->dev,
+					sizeof(*desc), SLIC_TX_DESC_ALIGN,
+					4096);
+	if (!txq->dma_pool) {
+		err = -ENOMEM;
+		netdev_err(sdev->netdev, "failed to create dma pool\n");
+		goto free_buffs;
+	}
+
+	for (i = 0; i < txq->len; i++) {
+		buff = &txq->txbuffs[i];
+		desc = dma_pool_zalloc(txq->dma_pool, GFP_KERNEL,
+				       &buff->desc_paddr);
+		if (!desc) {
+			netdev_err(sdev->netdev,
+				   "failed to alloc pool chunk (%i)\n", i);
+			err = -ENOMEM;
+			goto free_descs;
+		}
+
+		desc->hnd = cpu_to_le32((u32)(i + 1));
+		desc->cmd = SLIC_CMD_XMT_REQ;
+		desc->flags = 0;
+		desc->type = cpu_to_le32(SLIC_CMD_TYPE_DUMB);
+		buff->desc = desc;
+	}
+
+	return 0;
+
+free_descs:
+	while (i--) {
+		buff = &txq->txbuffs[i];
+		dma_pool_free(txq->dma_pool, buff->desc, buff->desc_paddr);
+	}
+	dma_pool_destroy(txq->dma_pool);
+
+free_buffs:
+	kfree(txq->txbuffs);
+
+	return err;
+}
+
+static void slic_free_tx_queue(struct slic_device *sdev)
+{
+	struct slic_tx_queue *txq = &sdev->txq;
+	struct slic_tx_buffer *buff;
+	unsigned int i;
+
+	for (i = 0; i < txq->len; i++) {
+		buff = &txq->txbuffs[i];
+		dma_pool_free(txq->dma_pool, buff->desc, buff->desc_paddr);
+		if (!buff->skb)
+			continue;
+
+		dma_unmap_single(&sdev->pdev->dev,
+				 dma_unmap_addr(buff, map_addr),
+				 dma_unmap_len(buff, map_len), DMA_TO_DEVICE);
+		consume_skb(buff->skb);
+	}
+	dma_pool_destroy(txq->dma_pool);
+
+	kfree(txq->txbuffs);
+}
+
+static int slic_init_rx_queue(struct slic_device *sdev)
+{
+	struct slic_rx_queue *rxq = &sdev->rxq;
+	struct slic_rx_buffer *buff;
+
+	rxq->len = SLIC_NUM_RX_LES;
+	rxq->done_idx = 0;
+	rxq->put_idx = 0;
+
+	buff = kcalloc(rxq->len, sizeof(*buff), GFP_KERNEL);
+	if (!buff)
+		return -ENOMEM;
+
+	rxq->rxbuffs = buff;
+	slic_refill_rx_queue(sdev, GFP_KERNEL);
+
+	return 0;
+}
+
+static void slic_free_rx_queue(struct slic_device *sdev)
+{
+	struct slic_rx_queue *rxq = &sdev->rxq;
+	struct slic_rx_buffer *buff;
+	unsigned int i;
+
+	/* free rx buffers */
+	for (i = 0; i < rxq->len; i++) {
+		buff = &rxq->rxbuffs[i];
+
+		if (!buff->skb)
+			continue;
+
+		dma_unmap_single(&sdev->pdev->dev,
+				 dma_unmap_addr(buff, map_addr),
+				 dma_unmap_len(buff, map_len),
+				 DMA_FROM_DEVICE);
+		consume_skb(buff->skb);
+	}
+	kfree(rxq->rxbuffs);
+}
+
+static void slic_set_link_autoneg(struct slic_device *sdev)
+{
+	unsigned int subid = sdev->pdev->subsystem_device;
+	u32 val;
+
+	if (sdev->is_fiber) {
+		/* We've got a fiber gigabit interface, and register 4 is
+		 * different in fiber mode than in copper mode.
+		 */
+		/* advertise FD only @1000 Mb */
+		val = MII_ADVERTISE << 16 | ADVERTISE_1000XFULL |
+		      ADVERTISE_1000XPAUSE | ADVERTISE_1000XPSE_ASYM;
+		/* enable PAUSE frames */
+		slic_write(sdev, SLIC_REG_WPHY, val);
+		/* reset phy, enable auto-neg  */
+		val = MII_BMCR << 16 | BMCR_RESET | BMCR_ANENABLE |
+		      BMCR_ANRESTART;
+		slic_write(sdev, SLIC_REG_WPHY, val);
+	} else {	/* copper gigabit */
+		/* We've got a copper gigabit interface, and register 4 is
+		 * different in copper mode than in fiber mode.
+		 */
+		/* advertise 10/100 Mb modes   */
+		val = MII_ADVERTISE << 16 | ADVERTISE_100FULL |
+		      ADVERTISE_100HALF | ADVERTISE_10FULL | ADVERTISE_10HALF;
+		/* enable PAUSE frames  */
+		val |= ADVERTISE_PAUSE_CAP | ADVERTISE_PAUSE_ASYM;
+		/* required by the Cicada PHY  */
+		val |= ADVERTISE_CSMA;
+		slic_write(sdev, SLIC_REG_WPHY, val);
+
+		/* advertise FD only @1000 Mb  */
+		val = MII_CTRL1000 << 16 | ADVERTISE_1000FULL;
+		slic_write(sdev, SLIC_REG_WPHY, val);
+
+		if (subid != PCI_SUBDEVICE_ID_ALACRITECH_CICADA) {
+			 /* if a Marvell PHY enable auto crossover */
+			val = SLIC_MIICR_REG_16 | SLIC_MRV_REG16_XOVERON;
+			slic_write(sdev, SLIC_REG_WPHY, val);
+
+			/* reset phy, enable auto-neg  */
+			val = MII_BMCR << 16 | BMCR_RESET | BMCR_ANENABLE |
+			      BMCR_ANRESTART;
+			slic_write(sdev, SLIC_REG_WPHY, val);
+		} else {
+			/* enable and restart auto-neg (don't reset)  */
+			val = MII_BMCR << 16 | BMCR_ANENABLE | BMCR_ANRESTART;
+			slic_write(sdev, SLIC_REG_WPHY, val);
+		}
+	}
+}
+
+static void slic_set_mac_address(struct slic_device *sdev)
+{
+	u8 *addr = sdev->netdev->dev_addr;
+	u32 val;
+
+	val = addr[5] | addr[4] << 8 | addr[3] << 16 | addr[2] << 24;
+
+	slic_write(sdev, SLIC_REG_WRADDRAL, val);
+	slic_write(sdev, SLIC_REG_WRADDRBL, val);
+
+	val = addr[0] << 8 | addr[1];
+
+	slic_write(sdev, SLIC_REG_WRADDRAH, val);
+	slic_write(sdev, SLIC_REG_WRADDRBH, val);
+	slic_flush_write(sdev);
+}
+
+static u32 slic_read_dword_from_firmware(const struct firmware *fw, int *offset)
+{
+	int idx = *offset;
+	__le32 val;
+
+	memcpy(&val, fw->data + *offset, sizeof(val));
+	idx += 4;
+	*offset = idx;
+
+	return le32_to_cpu(val);
+}
+
+MODULE_FIRMWARE(SLIC_RCV_FIRMWARE_MOJAVE);
+MODULE_FIRMWARE(SLIC_RCV_FIRMWARE_OASIS);
+
+static int slic_load_rcvseq_firmware(struct slic_device *sdev)
+{
+	const struct firmware *fw;
+	const char *file;
+	u32 codelen;
+	int idx = 0;
+	u32 instr;
+	u32 addr;
+	int err;
+
+	file = (sdev->model == SLIC_MODEL_OASIS) ?  SLIC_RCV_FIRMWARE_OASIS :
+						    SLIC_RCV_FIRMWARE_MOJAVE;
+	err = request_firmware(&fw, file, &sdev->pdev->dev);
+	if (err) {
+		dev_err(&sdev->pdev->dev,
+			"failed to load receive sequencer firmware %s\n", file);
+		return err;
+	}
+	/* Do an initial sanity check concerning firmware size now. A further
+	 * check follows below.
+	 */
+	if (fw->size < SLIC_FIRMWARE_MIN_SIZE) {
+		dev_err(&sdev->pdev->dev,
+			"invalid firmware size %zu (min %u expected)\n",
+			fw->size, SLIC_FIRMWARE_MIN_SIZE);
+		err = -EINVAL;
+		goto release;
+	}
+
+	codelen = slic_read_dword_from_firmware(fw, &idx);
+
+	/* do another sanity check against firmware size */
+	if ((codelen + 4) > fw->size) {
+		dev_err(&sdev->pdev->dev,
+			"invalid rcv-sequencer firmware size %zu\n", fw->size);
+		err = -EINVAL;
+		goto release;
+	}
+
+	/* download sequencer code to card */
+	slic_write(sdev, SLIC_REG_RCV_WCS, SLIC_RCVWCS_BEGIN);
+	for (addr = 0; addr < codelen; addr++) {
+		__le32 val;
+		/* write out instruction address */
+		slic_write(sdev, SLIC_REG_RCV_WCS, addr);
+
+		instr = slic_read_dword_from_firmware(fw, &idx);
+		/* write out the instruction data low addr */
+		slic_write(sdev, SLIC_REG_RCV_WCS, instr);
+
+		val = (__le32)fw->data[idx];
+		instr = le32_to_cpu(val);
+		idx++;
+		/* write out the instruction data high addr */
+		slic_write(sdev, SLIC_REG_RCV_WCS, instr);
+	}
+	/* finish download */
+	slic_write(sdev, SLIC_REG_RCV_WCS, SLIC_RCVWCS_FINISH);
+	slic_flush_write(sdev);
+release:
+	release_firmware(fw);
+
+	return err;
+}
+
+MODULE_FIRMWARE(SLIC_FIRMWARE_MOJAVE);
+MODULE_FIRMWARE(SLIC_FIRMWARE_OASIS);
+
+static int slic_load_firmware(struct slic_device *sdev)
+{
+	u32 sectstart[SLIC_FIRMWARE_MAX_SECTIONS];
+	u32 sectsize[SLIC_FIRMWARE_MAX_SECTIONS];
+	const struct firmware *fw;
+	unsigned int datalen;
+	const char *file;
+	int code_start;
+	unsigned int i;
+	u32 numsects;
+	int idx = 0;
+	u32 sect;
+	u32 instr;
+	u32 addr;
+	u32 base;
+	int err;
+
+	file = (sdev->model == SLIC_MODEL_OASIS) ?  SLIC_FIRMWARE_OASIS :
+						    SLIC_FIRMWARE_MOJAVE;
+	err = request_firmware(&fw, file, &sdev->pdev->dev);
+	if (err) {
+		dev_err(&sdev->pdev->dev, "failed to load firmware %s\n", file);
+		return err;
+	}
+	/* Do an initial sanity check concerning firmware size now. A further
+	 * check follows below.
+	 */
+	if (fw->size < SLIC_FIRMWARE_MIN_SIZE) {
+		dev_err(&sdev->pdev->dev,
+			"invalid firmware size %zu (min is %u)\n", fw->size,
+			SLIC_FIRMWARE_MIN_SIZE);
+		err = -EINVAL;
+		goto release;
+	}
+
+	numsects = slic_read_dword_from_firmware(fw, &idx);
+	if (numsects == 0 || numsects > SLIC_FIRMWARE_MAX_SECTIONS) {
+		dev_err(&sdev->pdev->dev,
+			"invalid number of sections in firmware: %u", numsects);
+		err = -EINVAL;
+		goto release;
+	}
+
+	datalen = numsects * 8 + 4;
+	for (i = 0; i < numsects; i++) {
+		sectsize[i] = slic_read_dword_from_firmware(fw, &idx);
+		datalen += sectsize[i];
+	}
+
+	/* do another sanity check against firmware size */
+	if (datalen > fw->size) {
+		dev_err(&sdev->pdev->dev,
+			"invalid firmware size %zu (expected >= %u)\n",
+			fw->size, datalen);
+		err = -EINVAL;
+		goto release;
+	}
+	/* get sections */
+	for (i = 0; i < numsects; i++)
+		sectstart[i] = slic_read_dword_from_firmware(fw, &idx);
+
+	code_start = idx;
+	instr = slic_read_dword_from_firmware(fw, &idx);
+
+	for (sect = 0; sect < numsects; sect++) {
+		unsigned int ssize = sectsize[sect] >> 3;
+
+		base = sectstart[sect];
+
+		for (addr = 0; addr < ssize; addr++) {
+			/* write out instruction address */
+			slic_write(sdev, SLIC_REG_WCS, base + addr);
+			/* write out instruction to low addr */
+			slic_write(sdev, SLIC_REG_WCS, instr);
+			instr = slic_read_dword_from_firmware(fw, &idx);
+			/* write out instruction to high addr */
+			slic_write(sdev, SLIC_REG_WCS, instr);
+			instr = slic_read_dword_from_firmware(fw, &idx);
+		}
+	}
+
+	idx = code_start;
+
+	for (sect = 0; sect < numsects; sect++) {
+		unsigned int ssize = sectsize[sect] >> 3;
+
+		instr = slic_read_dword_from_firmware(fw, &idx);
+		base = sectstart[sect];
+		if (base < 0x8000)
+			continue;
+
+		for (addr = 0; addr < ssize; addr++) {
+			/* write out instruction address */
+			slic_write(sdev, SLIC_REG_WCS,
+				   SLIC_WCS_COMPARE | (base + addr));
+			/* write out instruction to low addr */
+			slic_write(sdev, SLIC_REG_WCS, instr);
+			instr = slic_read_dword_from_firmware(fw, &idx);
+			/* write out instruction to high addr */
+			slic_write(sdev, SLIC_REG_WCS, instr);
+			instr = slic_read_dword_from_firmware(fw, &idx);
+		}
+	}
+	slic_flush_write(sdev);
+	mdelay(10);
+	/* everything OK, kick off the card */
+	slic_write(sdev, SLIC_REG_WCS, SLIC_WCS_START);
+	slic_flush_write(sdev);
+	/* wait long enough for ucode to init card and reach the mainloop */
+	mdelay(20);
+release:
+	release_firmware(fw);
+
+	return err;
+}
+
+static int slic_init_shmem(struct slic_device *sdev)
+{
+	struct slic_shmem *sm = &sdev->shmem;
+	struct slic_shmem_data *sm_data;
+	dma_addr_t paddr;
+
+	sm_data = dma_zalloc_coherent(&sdev->pdev->dev, sizeof(*sm_data),
+				      &paddr, GFP_KERNEL);
+	if (!sm_data) {
+		dev_err(&sdev->pdev->dev, "failed to allocate shared memory\n");
+		return -ENOMEM;
+	}
+
+	sm->shmem_data = sm_data;
+	sm->isr_paddr = paddr;
+	sm->link_paddr = paddr + offsetof(struct slic_shmem_data, link);
+
+	return 0;
+}
+
+static void slic_free_shmem(struct slic_device *sdev)
+{
+	struct slic_shmem *sm = &sdev->shmem;
+	struct slic_shmem_data *sm_data = sm->shmem_data;
+
+	dma_free_coherent(&sdev->pdev->dev, sizeof(*sm_data), sm_data,
+			  sm->isr_paddr);
+}
+
+static int slic_init_iface(struct slic_device *sdev)
+{
+	struct slic_shmem *sm = &sdev->shmem;
+	int err;
+
+	sdev->upr_list.pending = false;
+
+	err = slic_init_shmem(sdev);
+	if (err) {
+		netdev_err(sdev->netdev, "failed to init shared memory\n");
+		return err;
+	}
+
+	err = slic_load_firmware(sdev);
+	if (err) {
+		netdev_err(sdev->netdev, "failed to load firmware\n");
+		goto free_sm;
+	}
+
+	err = slic_load_rcvseq_firmware(sdev);
+	if (err) {
+		netdev_err(sdev->netdev,
+			   "failed to load firmware for receive sequencer\n");
+		goto free_sm;
+	}
+
+	slic_write(sdev, SLIC_REG_ICR, SLIC_ICR_INT_OFF);
+	slic_flush_write(sdev);
+	mdelay(1);
+
+	err = slic_init_rx_queue(sdev);
+	if (err) {
+		netdev_err(sdev->netdev, "failed to init rx queue: %u\n", err);
+		goto free_sm;
+	}
+
+	err = slic_init_tx_queue(sdev);
+	if (err) {
+		netdev_err(sdev->netdev, "failed to init tx queue: %u\n", err);
+		goto free_rxq;
+	}
+
+	err = slic_init_stat_queue(sdev);
+	if (err) {
+		netdev_err(sdev->netdev, "failed to init status queue: %u\n",
+			   err);
+		goto free_txq;
+	}
+
+	slic_write(sdev, SLIC_REG_ISP, lower_32_bits(sm->isr_paddr));
+	napi_enable(&sdev->napi);
+	/* disable irq mitigation */
+	slic_write(sdev, SLIC_REG_INTAGG, 0);
+	slic_write(sdev, SLIC_REG_ISR, 0);
+	slic_flush_write(sdev);
+
+	slic_set_mac_address(sdev);
+
+	spin_lock_bh(&sdev->link_lock);
+	sdev->duplex = DUPLEX_UNKNOWN;
+	sdev->speed = SPEED_UNKNOWN;
+	spin_unlock_bh(&sdev->link_lock);
+
+	slic_set_link_autoneg(sdev);
+
+	err = request_irq(sdev->pdev->irq, slic_irq, IRQF_SHARED, DRV_NAME,
+			  sdev);
+	if (err) {
+		netdev_err(sdev->netdev, "failed to request irq: %u\n", err);
+		goto disable_napi;
+	}
+
+	slic_write(sdev, SLIC_REG_ICR, SLIC_ICR_INT_ON);
+	slic_flush_write(sdev);
+	/* request initial link status */
+	err = slic_handle_link_change(sdev);
+	if (err)
+		netdev_warn(sdev->netdev,
+			    "failed to set initial link state: %u\n", err);
+	return 0;
+
+disable_napi:
+	napi_disable(&sdev->napi);
+	slic_free_stat_queue(sdev);
+free_txq:
+	slic_free_tx_queue(sdev);
+free_rxq:
+	slic_free_rx_queue(sdev);
+free_sm:
+	slic_free_shmem(sdev);
+	slic_card_reset(sdev);
+
+	return err;
+}
+
+static int slic_open(struct net_device *dev)
+{
+	struct slic_device *sdev = netdev_priv(dev);
+	int err;
+
+	netif_carrier_off(dev);
+
+	err = slic_init_iface(sdev);
+	if (err) {
+		netdev_err(dev, "failed to initialize interface: %i\n", err);
+		return err;
+	}
+
+	netif_start_queue(dev);
+
+	return 0;
+}
+
+static int slic_close(struct net_device *dev)
+{
+	struct slic_device *sdev = netdev_priv(dev);
+	u32 val;
+
+	netif_stop_queue(dev);
+
+	/* stop irq handling */
+	napi_disable(&sdev->napi);
+	slic_write(sdev, SLIC_REG_ICR, SLIC_ICR_INT_OFF);
+	slic_write(sdev, SLIC_REG_ISR, 0);
+	slic_flush_write(sdev);
+
+	free_irq(sdev->pdev->irq, sdev);
+	/* turn off RCV and XMT and power down PHY */
+	val = SLIC_GXCR_RESET | SLIC_GXCR_PAUSEEN;
+	slic_write(sdev, SLIC_REG_WXCFG, val);
+
+	val = SLIC_GRCR_RESET | SLIC_GRCR_CTLEN | SLIC_GRCR_ADDRAEN |
+	      SLIC_GRCR_HASHSIZE << SLIC_GRCR_HASHSIZE_SHIFT;
+	slic_write(sdev, SLIC_REG_WRCFG, val);
+
+	val = MII_BMCR << 16 | BMCR_PDOWN;
+	slic_write(sdev, SLIC_REG_WPHY, val);
+	slic_flush_write(sdev);
+
+	slic_clear_upr_list(&sdev->upr_list);
+	slic_write(sdev, SLIC_REG_QUIESCE, 0);
+
+	slic_free_stat_queue(sdev);
+	slic_free_tx_queue(sdev);
+	slic_free_rx_queue(sdev);
+	slic_free_shmem(sdev);
+
+	slic_card_reset(sdev);
+	netif_carrier_off(dev);
+
+	return 0;
+}
+
+static netdev_tx_t slic_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct slic_device *sdev = netdev_priv(dev);
+	struct slic_tx_queue *txq = &sdev->txq;
+	struct slic_tx_buffer *buff;
+	struct slic_tx_desc *desc;
+	dma_addr_t paddr;
+	u32 cbar_val;
+	u32 maplen;
+
+	if (unlikely(slic_get_free_tx_descs(txq) < SLIC_MAX_REQ_TX_DESCS)) {
+		netdev_err(dev, "BUG! not enough tx LEs left: %u\n",
+			   slic_get_free_tx_descs(txq));
+		return NETDEV_TX_BUSY;
+	}
+
+	maplen = skb_headlen(skb);
+	paddr = dma_map_single(&sdev->pdev->dev, skb->data, maplen,
+			       DMA_TO_DEVICE);
+	if (dma_mapping_error(&sdev->pdev->dev, paddr)) {
+		netdev_err(dev, "failed to map tx buffer\n");
+		goto drop_skb;
+	}
+
+	buff = &txq->txbuffs[txq->put_idx];
+	buff->skb = skb;
+	dma_unmap_addr_set(buff, map_addr, paddr);
+	dma_unmap_len_set(buff, map_len, maplen);
+
+	desc = buff->desc;
+	desc->totlen = cpu_to_le32(maplen);
+	desc->paddrl = cpu_to_le32(lower_32_bits(paddr));
+	desc->paddrh = cpu_to_le32(upper_32_bits(paddr));
+	desc->len = cpu_to_le32(maplen);
+
+	txq->put_idx = slic_next_queue_idx(txq->put_idx, txq->len);
+
+	cbar_val = lower_32_bits(buff->desc_paddr) | 1;
+	/* complete writes to RAM and DMA before hardware is informed */
+	wmb();
+
+	slic_write(sdev, SLIC_REG_CBAR, cbar_val);
+
+	if (slic_get_free_tx_descs(txq) < SLIC_MAX_REQ_TX_DESCS)
+		netif_stop_queue(dev);
+	/* make sure writes to io-memory cant leak out of tx queue lock */
+	mmiowb();
+
+	return NETDEV_TX_OK;
+drop_skb:
+	dev_kfree_skb_any(skb);
+
+	return NETDEV_TX_OK;
+}
+
+static struct rtnl_link_stats64 *slic_get_stats(struct net_device *dev,
+						struct rtnl_link_stats64 *lst)
+{
+	struct slic_device *sdev = netdev_priv(dev);
+	struct slic_stats *stats = &sdev->stats;
+
+	SLIC_GET_STATS_COUNTER(lst->rx_packets, stats, rx_packets);
+	SLIC_GET_STATS_COUNTER(lst->tx_packets, stats, tx_packets);
+	SLIC_GET_STATS_COUNTER(lst->rx_bytes, stats, rx_bytes);
+	SLIC_GET_STATS_COUNTER(lst->tx_bytes, stats, tx_bytes);
+	SLIC_GET_STATS_COUNTER(lst->rx_errors, stats, rx_errors);
+	SLIC_GET_STATS_COUNTER(lst->rx_dropped, stats, rx_buff_miss);
+	SLIC_GET_STATS_COUNTER(lst->tx_dropped, stats, tx_dropped);
+	SLIC_GET_STATS_COUNTER(lst->multicast, stats, rx_mcasts);
+	SLIC_GET_STATS_COUNTER(lst->rx_over_errors, stats, rx_buffoflow);
+	SLIC_GET_STATS_COUNTER(lst->rx_crc_errors, stats, rx_crc);
+	SLIC_GET_STATS_COUNTER(lst->rx_fifo_errors, stats, rx_oflow802);
+	SLIC_GET_STATS_COUNTER(lst->tx_carrier_errors, stats, tx_carrier);
+
+	return lst;
+}
+
+static int slic_get_sset_count(struct net_device *dev, int sset)
+{
+	switch (sset) {
+	case ETH_SS_STATS:
+		return ARRAY_SIZE(slic_stats_strings);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void slic_get_ethtool_stats(struct net_device *dev,
+				   struct ethtool_stats *eth_stats, u64 *data)
+{
+	struct slic_device *sdev = netdev_priv(dev);
+	struct slic_stats *stats = &sdev->stats;
+
+	SLIC_GET_STATS_COUNTER(data[0], stats, rx_packets);
+	SLIC_GET_STATS_COUNTER(data[1], stats, rx_bytes);
+	SLIC_GET_STATS_COUNTER(data[2], stats, rx_mcasts);
+	SLIC_GET_STATS_COUNTER(data[3], stats, rx_errors);
+	SLIC_GET_STATS_COUNTER(data[4], stats, rx_buff_miss);
+	SLIC_GET_STATS_COUNTER(data[5], stats, rx_tpcsum);
+	SLIC_GET_STATS_COUNTER(data[6], stats, rx_tpoflow);
+	SLIC_GET_STATS_COUNTER(data[7], stats, rx_tphlen);
+	SLIC_GET_STATS_COUNTER(data[8], stats, rx_ipcsum);
+	SLIC_GET_STATS_COUNTER(data[9], stats, rx_iplen);
+	SLIC_GET_STATS_COUNTER(data[10], stats, rx_iphlen);
+	SLIC_GET_STATS_COUNTER(data[11], stats, rx_early);
+	SLIC_GET_STATS_COUNTER(data[12], stats, rx_buffoflow);
+	SLIC_GET_STATS_COUNTER(data[13], stats, rx_lcode);
+	SLIC_GET_STATS_COUNTER(data[14], stats, rx_drbl);
+	SLIC_GET_STATS_COUNTER(data[15], stats, rx_crc);
+	SLIC_GET_STATS_COUNTER(data[16], stats, rx_oflow802);
+	SLIC_GET_STATS_COUNTER(data[17], stats, rx_uflow802);
+	SLIC_GET_STATS_COUNTER(data[18], stats, tx_packets);
+	SLIC_GET_STATS_COUNTER(data[19], stats, tx_bytes);
+	SLIC_GET_STATS_COUNTER(data[20], stats, tx_carrier);
+	SLIC_GET_STATS_COUNTER(data[21], stats, tx_dropped);
+	SLIC_GET_STATS_COUNTER(data[22], stats, irq_errs);
+}
+
+static void slic_get_strings(struct net_device *dev, u32 stringset, u8 *data)
+{
+	if (stringset == ETH_SS_STATS) {
+		memcpy(data, slic_stats_strings, sizeof(slic_stats_strings));
+		data += sizeof(slic_stats_strings);
+	}
+}
+
+static void slic_get_drvinfo(struct net_device *dev,
+			     struct ethtool_drvinfo *info)
+{
+	struct slic_device *sdev = netdev_priv(dev);
+
+	strlcpy(info->driver, DRV_NAME, sizeof(info->driver));
+	strlcpy(info->version, DRV_VERSION, sizeof(info->version));
+	strlcpy(info->bus_info, pci_name(sdev->pdev), sizeof(info->bus_info));
+}
+
+static const struct ethtool_ops slic_ethtool_ops = {
+	.get_drvinfo		= slic_get_drvinfo,
+	.get_link		= ethtool_op_get_link,
+	.get_strings		= slic_get_strings,
+	.get_ethtool_stats	= slic_get_ethtool_stats,
+	.get_sset_count		= slic_get_sset_count,
+};
+
+static const struct net_device_ops slic_netdev_ops = {
+	.ndo_open		= slic_open,
+	.ndo_stop		= slic_close,
+	.ndo_start_xmit		= slic_xmit,
+	.ndo_set_mac_address	= eth_mac_addr,
+	.ndo_get_stats64	= slic_get_stats,
+	.ndo_set_rx_mode	= slic_set_rx_mode,
+	.ndo_change_mtu		= eth_change_mtu,
+	.ndo_validate_addr	= eth_validate_addr,
+};
+
+static u16 slic_eeprom_csum(unsigned char *eeprom, unsigned int len)
+{
+	unsigned char *ptr = eeprom;
+	u32 csum = 0;
+	__le16 data;
+
+	while (len > 1) {
+		memcpy(&data, ptr, sizeof(data));
+		csum += le16_to_cpu(data);
+		ptr += 2;
+		len -= 2;
+	}
+	if (len > 0)
+		csum += *(u8 *)ptr;
+	while (csum >> 16)
+		csum = (csum & 0xFFFF) + ((csum >> 16) & 0xFFFF);
+	return ~csum;
+}
+
+/* check eeprom size, magic and checksum */
+static bool slic_eeprom_valid(unsigned char *eeprom, unsigned int size)
+{
+	const unsigned int MAX_SIZE = 128;
+	const unsigned int MIN_SIZE = 98;
+	__le16 magic;
+	__le16 csum;
+
+	if (size < MIN_SIZE || size > MAX_SIZE)
+		return false;
+	memcpy(&magic, eeprom, sizeof(magic));
+	if (le16_to_cpu(magic) != SLIC_EEPROM_MAGIC)
+		return false;
+	/* cut checksum bytes */
+	size -= 2;
+	memcpy(&csum, eeprom + size, sizeof(csum));
+
+	return (le16_to_cpu(csum) == slic_eeprom_csum(eeprom, size));
+}
+
+static int slic_read_eeprom(struct slic_device *sdev)
+{
+	unsigned int devfn = PCI_FUNC(sdev->pdev->devfn);
+	struct slic_shmem *sm = &sdev->shmem;
+	struct slic_shmem_data *sm_data = sm->shmem_data;
+	const unsigned int MAX_LOOPS = 5000;
+	unsigned int codesize;
+	unsigned char *eeprom;
+	struct slic_upr *upr;
+	unsigned int i = 0;
+	dma_addr_t paddr;
+	int err = 0;
+	u8 *mac[2];
+
+	eeprom = dma_zalloc_coherent(&sdev->pdev->dev, SLIC_EEPROM_SIZE,
+				     &paddr, GFP_KERNEL);
+	if (!eeprom)
+		return -ENOMEM;
+
+	slic_write(sdev, SLIC_REG_ICR, SLIC_ICR_INT_OFF);
+	/* setup ISP temporarily */
+	slic_write(sdev, SLIC_REG_ISP, lower_32_bits(sm->isr_paddr));
+
+	err = slic_new_upr(sdev, SLIC_UPR_CONFIG, paddr);
+	if (!err) {
+		for (i = 0; i < MAX_LOOPS; i++) {
+			if (le32_to_cpu(sm_data->isr) & SLIC_ISR_UPC)
+				break;
+			mdelay(1);
+		}
+		if (i == MAX_LOOPS) {
+			dev_err(&sdev->pdev->dev,
+				"timed out while waiting for eeprom data\n");
+			err = -ETIMEDOUT;
+		}
+		upr = slic_dequeue_upr(sdev);
+		kfree(upr);
+	}
+
+	slic_write(sdev, SLIC_REG_ISP, 0);
+	slic_write(sdev, SLIC_REG_ISR, 0);
+	slic_flush_write(sdev);
+
+	if (err)
+		goto free_eeprom;
+
+	if (sdev->model == SLIC_MODEL_OASIS) {
+		struct slic_oasis_eeprom *oee;
+
+		oee = (struct slic_oasis_eeprom *)eeprom;
+		mac[0] = oee->mac;
+		mac[1] = oee->mac2;
+		codesize = le16_to_cpu(oee->eeprom_code_size);
+	} else {
+		struct slic_mojave_eeprom *mee;
+
+		mee = (struct slic_mojave_eeprom *)eeprom;
+		mac[0] = mee->mac;
+		mac[1] = mee->mac2;
+		codesize = le16_to_cpu(mee->eeprom_code_size);
+	}
+
+	if (!slic_eeprom_valid(eeprom, codesize)) {
+		dev_err(&sdev->pdev->dev, "invalid checksum in eeprom\n");
+		err = -EINVAL;
+		goto free_eeprom;
+	}
+	/* set mac address */
+	ether_addr_copy(sdev->netdev->dev_addr, mac[devfn]);
+free_eeprom:
+	dma_free_coherent(&sdev->pdev->dev, SLIC_EEPROM_SIZE, eeprom, paddr);
+
+	return err;
+}
+
+static int slic_init(struct slic_device *sdev)
+{
+	int err;
+
+	spin_lock_init(&sdev->upper_lock);
+	spin_lock_init(&sdev->link_lock);
+	INIT_LIST_HEAD(&sdev->upr_list.list);
+	spin_lock_init(&sdev->upr_list.lock);
+	u64_stats_init(&sdev->stats.syncp);
+
+	slic_card_reset(sdev);
+
+	err = slic_load_firmware(sdev);
+	if (err) {
+		dev_err(&sdev->pdev->dev, "failed to load firmware\n");
+		return err;
+	}
+
+	/* we need the shared memory to read EEPROM so set it up temporarily */
+	err = slic_init_shmem(sdev);
+	if (err) {
+		dev_err(&sdev->pdev->dev, "failed to init shared memory\n");
+		return err;
+	}
+
+	err = slic_read_eeprom(sdev);
+	if (err) {
+		dev_err(&sdev->pdev->dev, "failed to read eeprom\n");
+		goto free_sm;
+	}
+
+	slic_card_reset(sdev);
+	slic_free_shmem(sdev);
+
+	return 0;
+free_sm:
+	slic_free_shmem(sdev);
+
+	return err;
+}
+
+static bool slic_is_fiber(unsigned short subdev)
+{
+	switch (subdev) {
+	/* Mojave */
+	case PCI_SUBDEVICE_ID_ALACRITECH_1000X1F: /* fallthrough */
+	case PCI_SUBDEVICE_ID_ALACRITECH_SES1001F: /* fallthrough */
+	/* Oasis */
+	case PCI_SUBDEVICE_ID_ALACRITECH_SEN2002XF: /* fallthrough */
+	case PCI_SUBDEVICE_ID_ALACRITECH_SEN2001XF: /* fallthrough */
+	case PCI_SUBDEVICE_ID_ALACRITECH_SEN2104EF: /* fallthrough */
+	case PCI_SUBDEVICE_ID_ALACRITECH_SEN2102EF: /* fallthrough */
+		return true;
+	}
+	return false;
+}
+
+static void slic_configure_pci(struct pci_dev *pdev)
+{
+	u16 old;
+	u16 cmd;
+
+	pci_read_config_word(pdev, PCI_COMMAND, &old);
+
+	cmd = old | PCI_COMMAND_PARITY | PCI_COMMAND_SERR;
+	if (old != cmd)
+		pci_write_config_word(pdev, PCI_COMMAND, cmd);
+}
+
+static int slic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	struct slic_device *sdev;
+	struct net_device *dev;
+	int err;
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "failed to enable PCI device\n");
+		return err;
+	}
+
+	pci_set_master(pdev);
+	pci_try_set_mwi(pdev);
+
+	slic_configure_pci(pdev);
+
+	err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
+	if (err) {
+		dev_err(&pdev->dev, "failed to setup DMA\n");
+		goto disable;
+	}
+
+	dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
+
+	err = pci_request_regions(pdev, DRV_NAME);
+	if (err) {
+		dev_err(&pdev->dev, "failed to obtain PCI regions\n");
+		goto disable;
+	}
+
+	dev = alloc_etherdev(sizeof(*sdev));
+	if (!dev) {
+		dev_err(&pdev->dev, "failed to alloc ethernet device\n");
+		err = -ENOMEM;
+		goto free_regions;
+	}
+
+	SET_NETDEV_DEV(dev, &pdev->dev);
+	pci_set_drvdata(pdev, dev);
+	dev->irq = pdev->irq;
+	dev->netdev_ops = &slic_netdev_ops;
+	dev->hw_features = NETIF_F_RXCSUM;
+	dev->features |= dev->hw_features;
+
+	dev->ethtool_ops = &slic_ethtool_ops;
+
+	sdev = netdev_priv(dev);
+	sdev->model = (pdev->device == PCI_DEVICE_ID_ALACRITECH_OASIS) ?
+		      SLIC_MODEL_OASIS : SLIC_MODEL_MOJAVE;
+	sdev->is_fiber = slic_is_fiber(pdev->subsystem_device);
+	sdev->pdev = pdev;
+	sdev->netdev = dev;
+	sdev->regs = ioremap_nocache(pci_resource_start(pdev, 0),
+				     pci_resource_len(pdev, 0));
+	if (!sdev->regs) {
+		dev_err(&pdev->dev, "failed to map registers\n");
+		err = -ENOMEM;
+		goto free_netdev;
+	}
+
+	err = slic_init(sdev);
+	if (err) {
+		dev_err(&pdev->dev, "failed to initialize driver\n");
+		goto unmap;
+	}
+
+	netif_napi_add(dev, &sdev->napi, slic_poll, SLIC_NAPI_WEIGHT);
+	netif_carrier_off(dev);
+
+	err = register_netdev(dev);
+	if (err) {
+		dev_err(&pdev->dev, "failed to register net device: %i\n", err);
+		goto unmap;
+	}
+
+	return 0;
+
+unmap:
+	iounmap(sdev->regs);
+free_netdev:
+	free_netdev(dev);
+free_regions:
+	pci_release_regions(pdev);
+disable:
+	pci_disable_device(pdev);
+
+	return err;
+}
+
+static void slic_remove(struct pci_dev *pdev)
+{
+	struct net_device *dev = pci_get_drvdata(pdev);
+	struct slic_device *sdev = netdev_priv(dev);
+
+	unregister_netdev(dev);
+	iounmap(sdev->regs);
+	free_netdev(dev);
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+}
+
+static struct pci_driver slic_driver = {
+	.name = DRV_NAME,
+	.id_table = slic_id_tbl,
+	.probe = slic_probe,
+	.remove = slic_remove,
+};
+
+module_pci_driver(slic_driver);
+
+MODULE_DESCRIPTION("Alacritech non-accelerated SLIC driver");
+MODULE_AUTHOR("Lino Sanfilippo <LinoSanfilippo@gmx.de>");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_VERSION);
diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c
index 6ffdff68bfc4..c8f4d26fc9d4 100644
--- a/drivers/net/ethernet/allwinner/sun4i-emac.c
+++ b/drivers/net/ethernet/allwinner/sun4i-emac.c
@@ -37,6 +37,11 @@
 
 #define EMAC_MAX_FRAME_LEN	0x0600
 
+#define EMAC_DEFAULT_MSG_ENABLE 0x0000
+static int debug = -1;     /* defaults above */;
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "debug message flags");
+
 /* Transmit timeout, default 5 seconds. */
 static int watchdog = 5000;
 module_param(watchdog, int, 0400);
@@ -225,11 +230,27 @@ static void emac_get_drvinfo(struct net_device *dev,
 	strlcpy(info->bus_info, dev_name(&dev->dev), sizeof(info->bus_info));
 }
 
+static u32 emac_get_msglevel(struct net_device *dev)
+{
+	struct emac_board_info *db = netdev_priv(dev);
+
+	return db->msg_enable;
+}
+
+static void emac_set_msglevel(struct net_device *dev, u32 value)
+{
+	struct emac_board_info *db = netdev_priv(dev);
+
+	db->msg_enable = value;
+}
+
 static const struct ethtool_ops emac_ethtool_ops = {
 	.get_drvinfo	= emac_get_drvinfo,
 	.get_link	= ethtool_op_get_link,
 	.get_link_ksettings = phy_ethtool_get_link_ksettings,
 	.set_link_ksettings = phy_ethtool_set_link_ksettings,
+	.get_msglevel	= emac_get_msglevel,
+	.set_msglevel	= emac_set_msglevel,
 };
 
 static unsigned int emac_setup(struct net_device *ndev)
@@ -571,8 +592,7 @@ static void emac_rx(struct net_device *dev)
 		/* A packet ready now  & Get status/length */
 		good_packet = true;
 
-		emac_inblk_32bit(db->membase + EMAC_RX_IO_DATA_REG,
-				&rxhdr, sizeof(rxhdr));
+		rxhdr = readl(db->membase + EMAC_RX_IO_DATA_REG);
 
 		if (netif_msg_rx_status(db))
 			dev_dbg(db->dev, "rxhdr: %x\n", *((int *)(&rxhdr)));
@@ -773,7 +793,6 @@ static const struct net_device_ops emac_netdev_ops = {
 	.ndo_tx_timeout		= emac_timeout,
 	.ndo_set_rx_mode	= emac_set_rx_mode,
 	.ndo_do_ioctl		= emac_ioctl,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= emac_set_mac_address,
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -805,6 +824,7 @@ static int emac_probe(struct platform_device *pdev)
 	db->dev = &pdev->dev;
 	db->ndev = ndev;
 	db->pdev = pdev;
+	db->msg_enable = netif_msg_init(debug, EMAC_DEFAULT_MSG_ENABLE);
 
 	spin_lock_init(&db->lock);
 
diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c
index b90a26b13fdf..16f0c70266bc 100644
--- a/drivers/net/ethernet/alteon/acenic.c
+++ b/drivers/net/ethernet/alteon/acenic.c
@@ -429,14 +429,16 @@ static const char version[] =
   "acenic.c: v0.92 08/05/2002  Jes Sorensen, linux-acenic@SunSITE.dk\n"
   "                            http://home.cern.ch/~jes/gige/acenic.html\n";
 
-static int ace_get_settings(struct net_device *, struct ethtool_cmd *);
-static int ace_set_settings(struct net_device *, struct ethtool_cmd *);
+static int ace_get_link_ksettings(struct net_device *,
+				  struct ethtool_link_ksettings *);
+static int ace_set_link_ksettings(struct net_device *,
+				  const struct ethtool_link_ksettings *);
 static void ace_get_drvinfo(struct net_device *, struct ethtool_drvinfo *);
 
 static const struct ethtool_ops ace_ethtool_ops = {
-	.get_settings = ace_get_settings,
-	.set_settings = ace_set_settings,
 	.get_drvinfo = ace_get_drvinfo,
+	.get_link_ksettings = ace_get_link_ksettings,
+	.set_link_ksettings = ace_set_link_ksettings,
 };
 
 static void ace_watchdog(struct net_device *dev);
@@ -474,6 +476,8 @@ static int acenic_probe_one(struct pci_dev *pdev,
 	dev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
 
 	dev->watchdog_timeo = 5*HZ;
+	dev->min_mtu = 0;
+	dev->max_mtu = ACE_JUMBO_MTU;
 
 	dev->netdev_ops = &ace_netdev_ops;
 	dev->ethtool_ops = &ace_ethtool_ops;
@@ -2548,9 +2552,6 @@ static int ace_change_mtu(struct net_device *dev, int new_mtu)
 	struct ace_private *ap = netdev_priv(dev);
 	struct ace_regs __iomem *regs = ap->regs;
 
-	if (new_mtu > ACE_JUMBO_MTU)
-		return -EINVAL;
-
 	writel(new_mtu + ETH_HLEN + 4, &regs->IfMtu);
 	dev->mtu = new_mtu;
 
@@ -2580,43 +2581,44 @@ static int ace_change_mtu(struct net_device *dev, int new_mtu)
 	return 0;
 }
 
-static int ace_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
+static int ace_get_link_ksettings(struct net_device *dev,
+				  struct ethtool_link_ksettings *cmd)
 {
 	struct ace_private *ap = netdev_priv(dev);
 	struct ace_regs __iomem *regs = ap->regs;
 	u32 link;
+	u32 supported;
+
+	memset(cmd, 0, sizeof(struct ethtool_link_ksettings));
 
-	memset(ecmd, 0, sizeof(struct ethtool_cmd));
-	ecmd->supported =
-		(SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
-		 SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full |
-		 SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full |
-		 SUPPORTED_Autoneg | SUPPORTED_FIBRE);
+	supported = (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full |
+		     SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full |
+		     SUPPORTED_1000baseT_Half | SUPPORTED_1000baseT_Full |
+		     SUPPORTED_Autoneg | SUPPORTED_FIBRE);
 
-	ecmd->port = PORT_FIBRE;
-	ecmd->transceiver = XCVR_INTERNAL;
+	cmd->base.port = PORT_FIBRE;
 
 	link = readl(&regs->GigLnkState);
-	if (link & LNK_1000MB)
-		ethtool_cmd_speed_set(ecmd, SPEED_1000);
-	else {
+	if (link & LNK_1000MB) {
+		cmd->base.speed = SPEED_1000;
+	} else {
 		link = readl(&regs->FastLnkState);
 		if (link & LNK_100MB)
-			ethtool_cmd_speed_set(ecmd, SPEED_100);
+			cmd->base.speed = SPEED_100;
 		else if (link & LNK_10MB)
-			ethtool_cmd_speed_set(ecmd, SPEED_10);
+			cmd->base.speed = SPEED_10;
 		else
-			ethtool_cmd_speed_set(ecmd, 0);
+			cmd->base.speed = 0;
 	}
 	if (link & LNK_FULL_DUPLEX)
-		ecmd->duplex = DUPLEX_FULL;
+		cmd->base.duplex = DUPLEX_FULL;
 	else
-		ecmd->duplex = DUPLEX_HALF;
+		cmd->base.duplex = DUPLEX_HALF;
 
 	if (link & LNK_NEGOTIATE)
-		ecmd->autoneg = AUTONEG_ENABLE;
+		cmd->base.autoneg = AUTONEG_ENABLE;
 	else
-		ecmd->autoneg = AUTONEG_DISABLE;
+		cmd->base.autoneg = AUTONEG_DISABLE;
 
 #if 0
 	/*
@@ -2627,13 +2629,15 @@ static int ace_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
 	ecmd->txcoal = readl(&regs->TuneTxCoalTicks);
 	ecmd->rxcoal = readl(&regs->TuneRxCoalTicks);
 #endif
-	ecmd->maxtxpkt = readl(&regs->TuneMaxTxDesc);
-	ecmd->maxrxpkt = readl(&regs->TuneMaxRxDesc);
+
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						supported);
 
 	return 0;
 }
 
-static int ace_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
+static int ace_set_link_ksettings(struct net_device *dev,
+				  const struct ethtool_link_ksettings *cmd)
 {
 	struct ace_private *ap = netdev_priv(dev);
 	struct ace_regs __iomem *regs = ap->regs;
@@ -2656,11 +2660,11 @@ static int ace_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
 		LNK_RX_FLOW_CTL_Y | LNK_NEG_FCTL;
 	if (!ACE_IS_TIGON_I(ap))
 		link |= LNK_TX_FLOW_CTL_Y;
-	if (ecmd->autoneg == AUTONEG_ENABLE)
+	if (cmd->base.autoneg == AUTONEG_ENABLE)
 		link |= LNK_NEGOTIATE;
-	if (ethtool_cmd_speed(ecmd) != speed) {
+	if (cmd->base.speed != speed) {
 		link &= ~(LNK_1000MB | LNK_100MB | LNK_10MB);
-		switch (ethtool_cmd_speed(ecmd)) {
+		switch (cmd->base.speed) {
 		case SPEED_1000:
 			link |= LNK_1000MB;
 			break;
@@ -2673,7 +2677,7 @@ static int ace_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
 		}
 	}
 
-	if (ecmd->duplex == DUPLEX_FULL)
+	if (cmd->base.duplex == DUPLEX_FULL)
 		link |= LNK_FULL_DUPLEX;
 
 	if (link != ap->link) {
diff --git a/drivers/net/ethernet/altera/altera_tse.h b/drivers/net/ethernet/altera/altera_tse.h
index e0052003d16f..e2feee87180a 100644
--- a/drivers/net/ethernet/altera/altera_tse.h
+++ b/drivers/net/ethernet/altera/altera_tse.h
@@ -120,6 +120,17 @@
 #define MAC_CMDCFG_DISABLE_READ_TIMEOUT_GET(v)	GET_BIT_VALUE(v, 27)
 #define MAC_CMDCFG_CNT_RESET_GET(v)		GET_BIT_VALUE(v, 31)
 
+/* SGMII PCS register addresses
+ */
+#define SGMII_PCS_SCRATCH	0x10
+#define SGMII_PCS_REV		0x11
+#define SGMII_PCS_LINK_TIMER_0	0x12
+#define SGMII_PCS_LINK_TIMER_1	0x13
+#define SGMII_PCS_IF_MODE	0x14
+#define SGMII_PCS_DIS_READ_TO	0x15
+#define SGMII_PCS_READ_TO	0x16
+#define SGMII_PCS_SW_RESET_TIMEOUT 100 /* usecs */
+
 /* MDIO registers within MAC register Space
  */
 struct altera_tse_mdio {
@@ -443,7 +454,6 @@ struct altera_tse_private {
 	/* RX/TX MAC FIFO configs */
 	u32 tx_fifo_depth;
 	u32 rx_fifo_depth;
-	u32 max_mtu;
 
 	/* Hash filter settings */
 	u32 hash_filter;
diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c
index bda31f308cc2..25864bff25ee 100644
--- a/drivers/net/ethernet/altera/altera_tse_main.c
+++ b/drivers/net/ethernet/altera/altera_tse_main.c
@@ -37,6 +37,7 @@
 #include <linux/io.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/mii.h>
 #include <linux/netdevice.h>
 #include <linux/of_device.h>
 #include <linux/of_mdio.h>
@@ -96,6 +97,27 @@ static inline u32 tse_tx_avail(struct altera_tse_private *priv)
 	return priv->tx_cons + priv->tx_ring_size - priv->tx_prod - 1;
 }
 
+/* PCS Register read/write functions
+ */
+static u16 sgmii_pcs_read(struct altera_tse_private *priv, int regnum)
+{
+	return csrrd32(priv->mac_dev,
+		       tse_csroffs(mdio_phy0) + regnum * 4) & 0xffff;
+}
+
+static void sgmii_pcs_write(struct altera_tse_private *priv, int regnum,
+				u16 value)
+{
+	csrwr32(value, priv->mac_dev, tse_csroffs(mdio_phy0) + regnum * 4);
+}
+
+/* Check PCS scratch memory */
+static int sgmii_pcs_scratch_test(struct altera_tse_private *priv, u16 value)
+{
+	sgmii_pcs_write(priv, SGMII_PCS_SCRATCH, value);
+	return (sgmii_pcs_read(priv, SGMII_PCS_SCRATCH) == value);
+}
+
 /* MDIO specific functions
  */
 static int altera_tse_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
@@ -400,12 +422,6 @@ static int tse_rx(struct altera_tse_private *priv, int limit)
 
 		skb_put(skb, pktlength);
 
-		/* make cache consistent with receive packet buffer */
-		dma_sync_single_for_cpu(priv->device,
-					priv->rx_ring[entry].dma_addr,
-					priv->rx_ring[entry].len,
-					DMA_FROM_DEVICE);
-
 		dma_unmap_single(priv->device, priv->rx_ring[entry].dma_addr,
 				 priv->rx_ring[entry].len, DMA_FROM_DEVICE);
 
@@ -469,7 +485,6 @@ static int tse_tx_complete(struct altera_tse_private *priv)
 
 	if (unlikely(netif_queue_stopped(priv->dev) &&
 		     tse_tx_avail(priv) > TSE_TX_THRESH(priv))) {
-		netif_tx_lock(priv->dev);
 		if (netif_queue_stopped(priv->dev) &&
 		    tse_tx_avail(priv) > TSE_TX_THRESH(priv)) {
 			if (netif_msg_tx_done(priv))
@@ -477,7 +492,6 @@ static int tse_tx_complete(struct altera_tse_private *priv)
 					   __func__);
 			netif_wake_queue(priv->dev);
 		}
-		netif_tx_unlock(priv->dev);
 	}
 
 	spin_unlock(&priv->tx_lock);
@@ -592,10 +606,6 @@ static int tse_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	buffer->dma_addr = dma_addr;
 	buffer->len = nopaged_len;
 
-	/* Push data out of the cache hierarchy into main memory */
-	dma_sync_single_for_device(priv->device, buffer->dma_addr,
-				   buffer->len, DMA_TO_DEVICE);
-
 	priv->dmaops->tx_buffer(priv, buffer);
 
 	skb_tx_timestamp(skb);
@@ -819,6 +829,8 @@ static int init_phy(struct net_device *dev)
 
 	if (!phydev) {
 		netdev_err(dev, "Could not find the PHY\n");
+		if (fixed_link)
+			of_phy_deregister_fixed_link(priv->device->of_node);
 		return -ENODEV;
 	}
 
@@ -994,20 +1006,11 @@ static void tse_set_mac(struct altera_tse_private *priv, bool enable)
  */
 static int tse_change_mtu(struct net_device *dev, int new_mtu)
 {
-	struct altera_tse_private *priv = netdev_priv(dev);
-	unsigned int max_mtu = priv->max_mtu;
-	unsigned int min_mtu = ETH_ZLEN + ETH_FCS_LEN;
-
 	if (netif_running(dev)) {
 		netdev_err(dev, "must be stopped to change its MTU\n");
 		return -EBUSY;
 	}
 
-	if ((new_mtu < min_mtu) || (new_mtu > max_mtu)) {
-		netdev_err(dev, "invalid MTU, max MTU is: %u\n", max_mtu);
-		return -EINVAL;
-	}
-
 	dev->mtu = new_mtu;
 	netdev_update_features(dev);
 
@@ -1092,6 +1095,66 @@ static void tse_set_rx_mode(struct net_device *dev)
 	spin_unlock(&priv->mac_cfg_lock);
 }
 
+/* Initialise (if necessary) the SGMII PCS component
+ */
+static int init_sgmii_pcs(struct net_device *dev)
+{
+	struct altera_tse_private *priv = netdev_priv(dev);
+	int n;
+	unsigned int tmp_reg = 0;
+
+	if (priv->phy_iface != PHY_INTERFACE_MODE_SGMII)
+		return 0; /* Nothing to do, not in SGMII mode */
+
+	/* The TSE SGMII PCS block looks a little like a PHY, it is
+	 * mapped into the zeroth MDIO space of the MAC and it has
+	 * ID registers like a PHY would.  Sadly this is often
+	 * configured to zeroes, so don't be surprised if it does
+	 * show 0x00000000.
+	 */
+
+	if (sgmii_pcs_scratch_test(priv, 0x0000) &&
+		sgmii_pcs_scratch_test(priv, 0xffff) &&
+		sgmii_pcs_scratch_test(priv, 0xa5a5) &&
+		sgmii_pcs_scratch_test(priv, 0x5a5a)) {
+		netdev_info(dev, "PCS PHY ID: 0x%04x%04x\n",
+				sgmii_pcs_read(priv, MII_PHYSID1),
+				sgmii_pcs_read(priv, MII_PHYSID2));
+	} else {
+		netdev_err(dev, "SGMII PCS Scratch memory test failed.\n");
+		return -ENOMEM;
+	}
+
+	/* Starting on page 5-29 of the MegaCore Function User Guide
+	 * Set SGMII Link timer to 1.6ms
+	 */
+	sgmii_pcs_write(priv, SGMII_PCS_LINK_TIMER_0, 0x0D40);
+	sgmii_pcs_write(priv, SGMII_PCS_LINK_TIMER_1, 0x03);
+
+	/* Enable SGMII Interface and Enable SGMII Auto Negotiation */
+	sgmii_pcs_write(priv, SGMII_PCS_IF_MODE, 0x3);
+
+	/* Enable Autonegotiation */
+	tmp_reg = sgmii_pcs_read(priv, MII_BMCR);
+	tmp_reg |= (BMCR_SPEED1000 | BMCR_FULLDPLX | BMCR_ANENABLE);
+	sgmii_pcs_write(priv, MII_BMCR, tmp_reg);
+
+	/* Reset PCS block */
+	tmp_reg |= BMCR_RESET;
+	sgmii_pcs_write(priv, MII_BMCR, tmp_reg);
+	for (n = 0; n < SGMII_PCS_SW_RESET_TIMEOUT; n++) {
+		if (!(sgmii_pcs_read(priv, MII_BMCR) & BMCR_RESET)) {
+			netdev_info(dev, "SGMII PCS block initialised OK\n");
+			return 0;
+		}
+		udelay(1);
+	}
+
+	/* We failed to reset the block, return a timeout */
+	netdev_err(dev, "SGMII PCS block reset failed.\n");
+	return -ETIMEDOUT;
+}
+
 /* Open and initialize the interface
  */
 static int tse_open(struct net_device *dev)
@@ -1116,6 +1179,15 @@ static int tse_open(struct net_device *dev)
 		netdev_warn(dev, "TSE revision %x\n", priv->revision);
 
 	spin_lock(&priv->mac_cfg_lock);
+	/* no-op if MAC not operating in SGMII mode*/
+	ret = init_sgmii_pcs(dev);
+	if (ret) {
+		netdev_err(dev,
+			   "Cannot init the SGMII PCS (error: %d)\n", ret);
+		spin_unlock(&priv->mac_cfg_lock);
+		goto phy_error;
+	}
+
 	ret = reset_mac(priv);
 	/* Note that reset_mac will fail if the clocks are gated by the PHY
 	 * due to the PHY being put into isolation or power down mode.
@@ -1338,11 +1410,13 @@ static int altera_tse_probe(struct platform_device *pdev)
 		if (upper_32_bits(priv->rxdescmem_busaddr)) {
 			dev_dbg(priv->device,
 				"SGDMA bus addresses greater than 32-bits\n");
+			ret = -EINVAL;
 			goto err_free_netdev;
 		}
 		if (upper_32_bits(priv->txdescmem_busaddr)) {
 			dev_dbg(priv->device,
 				"SGDMA bus addresses greater than 32-bits\n");
+			ret = -EINVAL;
 			goto err_free_netdev;
 		}
 	} else if (priv->dmaops &&
@@ -1446,15 +1520,16 @@ static int altera_tse_probe(struct platform_device *pdev)
 		of_property_read_bool(pdev->dev.of_node,
 				      "altr,has-supplementary-unicast");
 
+	priv->dev->min_mtu = ETH_ZLEN + ETH_FCS_LEN;
 	/* Max MTU is 1500, ETH_DATA_LEN */
-	priv->max_mtu = ETH_DATA_LEN;
+	priv->dev->max_mtu = ETH_DATA_LEN;
 
 	/* Get the max mtu from the device tree. Note that the
 	 * "max-frame-size" parameter is actually max mtu. Definition
 	 * in the ePAPR v1.1 spec and usage differ, so go with usage.
 	 */
 	of_property_read_u32(pdev->dev.of_node, "max-frame-size",
-			     &priv->max_mtu);
+			     &priv->dev->max_mtu);
 
 	/* The DMA buffer size already accounts for an alignment bias
 	 * to avoid unaligned access exceptions for the NIOS processor,
@@ -1545,10 +1620,15 @@ err_free_netdev:
 static int altera_tse_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
+	struct altera_tse_private *priv = netdev_priv(ndev);
 
-	if (ndev->phydev)
+	if (ndev->phydev) {
 		phy_disconnect(ndev->phydev);
 
+		if (of_phy_is_fixed_link(priv->device->of_node))
+			of_phy_deregister_fixed_link(priv->device->of_node);
+	}
+
 	platform_set_drvdata(pdev, NULL);
 	altera_tse_mdio_destroy(ndev);
 	unregister_netdev(ndev);
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index bfeaec5bd7b9..cc8b13ebfa75 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -103,13 +103,6 @@ static int ena_change_mtu(struct net_device *dev, int new_mtu)
 	struct ena_adapter *adapter = netdev_priv(dev);
 	int ret;
 
-	if ((new_mtu > adapter->max_mtu) || (new_mtu < ENA_MIN_MTU)) {
-		netif_err(adapter, drv, dev,
-			  "Invalid MTU setting. new_mtu: %d\n", new_mtu);
-
-		return -EINVAL;
-	}
-
 	ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu);
 	if (!ret) {
 		netif_dbg(adapter, drv, dev, "set MTU to %d\n", new_mtu);
@@ -2755,6 +2748,8 @@ static void ena_set_conf_feat_params(struct ena_adapter *adapter,
 	ena_set_dev_offloads(feat, netdev);
 
 	adapter->max_mtu = feat->dev_attr.max_mtu;
+	netdev->max_mtu = adapter->max_mtu;
+	netdev->min_mtu = ENA_MIN_MTU;
 }
 
 static int ena_rss_init_default(struct ena_adapter *adapter)
@@ -3018,12 +3013,9 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	adapter->last_keep_alive_jiffies = jiffies;
 
-	init_timer(&adapter->timer_service);
-	adapter->timer_service.expires = round_jiffies(jiffies + HZ);
-	adapter->timer_service.function = ena_timer_service;
-	adapter->timer_service.data = (unsigned long)adapter;
-
-	add_timer(&adapter->timer_service);
+	setup_timer(&adapter->timer_service, ena_timer_service,
+		    (unsigned long)adapter);
+	mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
 
 	dev_info(&pdev->dev, "%s found at mem %lx, mac addr %pM Queues %d\n",
 		 DEVICE_NAME, (long)pci_resource_start(pdev, 0),
diff --git a/drivers/net/ethernet/amd/Kconfig b/drivers/net/ethernet/amd/Kconfig
index 0038709fd317..d5c15e8bb3de 100644
--- a/drivers/net/ethernet/amd/Kconfig
+++ b/drivers/net/ethernet/amd/Kconfig
@@ -173,11 +173,13 @@ config SUNLANCE
 
 config AMD_XGBE
 	tristate "AMD 10GbE Ethernet driver"
-	depends on ((OF_NET && OF_ADDRESS) || ACPI) && HAS_IOMEM && HAS_DMA
-	depends on ARM64 || COMPILE_TEST
+	depends on ((OF_NET && OF_ADDRESS) || ACPI || PCI) && HAS_IOMEM && HAS_DMA
+	depends on X86 || ARM64 || COMPILE_TEST
 	select BITREVERSE
 	select CRC32
-	select PTP_1588_CLOCK
+	select PHYLIB
+	select AMD_XGBE_HAVE_ECC if X86
+	imply PTP_1588_CLOCK
 	---help---
 	  This driver supports the AMD 10GbE Ethernet device found on an
 	  AMD SoC.
@@ -195,4 +197,8 @@ config AMD_XGBE_DCB
 
 	  If unsure, say N.
 
+config AMD_XGBE_HAVE_ECC
+	bool
+	default n
+
 endif # NET_VENDOR_AMD
diff --git a/drivers/net/ethernet/amd/a2065.c b/drivers/net/ethernet/amd/a2065.c
index a83cd1c4ce1d..ee4b94e3cda9 100644
--- a/drivers/net/ethernet/amd/a2065.c
+++ b/drivers/net/ethernet/amd/a2065.c
@@ -665,7 +665,6 @@ static const struct net_device_ops lance_netdev_ops = {
 	.ndo_tx_timeout		= lance_tx_timeout,
 	.ndo_set_rx_mode	= lance_set_multicast,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address	= eth_mac_addr,
 };
 
diff --git a/drivers/net/ethernet/amd/am79c961a.c b/drivers/net/ethernet/amd/am79c961a.c
index fcdf5dda448f..b11e910850f7 100644
--- a/drivers/net/ethernet/amd/am79c961a.c
+++ b/drivers/net/ethernet/amd/am79c961a.c
@@ -663,7 +663,6 @@ static const struct net_device_ops am79c961_netdev_ops = {
 	.ndo_set_rx_mode	= am79c961_setmulticastlist,
 	.ndo_tx_timeout		= am79c961_timeout,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address	= eth_mac_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= am79c961_poll_controller,
diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c
index f92cc97151ec..11cf1e3e0295 100644
--- a/drivers/net/ethernet/amd/amd8111e.c
+++ b/drivers/net/ethernet/amd/amd8111e.c
@@ -1421,21 +1421,23 @@ static void amd8111e_get_regs(struct net_device *dev, struct ethtool_regs *regs,
 	amd8111e_read_regs(lp, buf);
 }
 
-static int amd8111e_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
+static int amd8111e_get_link_ksettings(struct net_device *dev,
+				       struct ethtool_link_ksettings *cmd)
 {
 	struct amd8111e_priv *lp = netdev_priv(dev);
 	spin_lock_irq(&lp->lock);
-	mii_ethtool_gset(&lp->mii_if, ecmd);
+	mii_ethtool_get_link_ksettings(&lp->mii_if, cmd);
 	spin_unlock_irq(&lp->lock);
 	return 0;
 }
 
-static int amd8111e_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd)
+static int amd8111e_set_link_ksettings(struct net_device *dev,
+				       const struct ethtool_link_ksettings *cmd)
 {
 	struct amd8111e_priv *lp = netdev_priv(dev);
 	int res;
 	spin_lock_irq(&lp->lock);
-	res = mii_ethtool_sset(&lp->mii_if, ecmd);
+	res = mii_ethtool_set_link_ksettings(&lp->mii_if, cmd);
 	spin_unlock_irq(&lp->lock);
 	return res;
 }
@@ -1482,12 +1484,12 @@ static const struct ethtool_ops ops = {
 	.get_drvinfo = amd8111e_get_drvinfo,
 	.get_regs_len = amd8111e_get_regs_len,
 	.get_regs = amd8111e_get_regs,
-	.get_settings = amd8111e_get_settings,
-	.set_settings = amd8111e_set_settings,
 	.nway_reset = amd8111e_nway_reset,
 	.get_link = amd8111e_get_link,
 	.get_wol = amd8111e_get_wol,
 	.set_wol = amd8111e_set_wol,
+	.get_link_ksettings = amd8111e_get_link_ksettings,
+	.set_link_ksettings = amd8111e_set_link_ksettings,
 };
 
 /* This function handles all the  ethtool ioctls. It gives driver info,
@@ -1556,9 +1558,6 @@ static int amd8111e_change_mtu(struct net_device *dev, int new_mtu)
 	struct amd8111e_priv *lp = netdev_priv(dev);
 	int err;
 
-	if ((new_mtu < AMD8111E_MIN_MTU) || (new_mtu > AMD8111E_MAX_MTU))
-		return -EINVAL;
-
 	if (!netif_running(dev)) {
 		/* new_mtu will be used
 		 * when device starts netxt time
@@ -1874,6 +1873,8 @@ static int amd8111e_probe_one(struct pci_dev *pdev,
 	dev->ethtool_ops = &ops;
 	dev->irq =pdev->irq;
 	dev->watchdog_timeo = AMD8111E_TX_TIMEOUT;
+	dev->min_mtu = AMD8111E_MIN_MTU;
+	dev->max_mtu = AMD8111E_MAX_MTU;
 	netif_napi_add(dev, &lp->napi, amd8111e_rx_poll, 32);
 
 #if AMD8111E_VLAN_TAG_USED
diff --git a/drivers/net/ethernet/amd/ariadne.c b/drivers/net/ethernet/amd/ariadne.c
index 968b7bfac8fc..5fd7b15b0574 100644
--- a/drivers/net/ethernet/amd/ariadne.c
+++ b/drivers/net/ethernet/amd/ariadne.c
@@ -706,7 +706,6 @@ static const struct net_device_ops ariadne_netdev_ops = {
 	.ndo_get_stats		= ariadne_get_stats,
 	.ndo_set_rx_mode	= set_multicast_list,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address	= eth_mac_addr,
 };
 
diff --git a/drivers/net/ethernet/amd/atarilance.c b/drivers/net/ethernet/amd/atarilance.c
index d2bc8e5dcd23..796c37a5bbde 100644
--- a/drivers/net/ethernet/amd/atarilance.c
+++ b/drivers/net/ethernet/amd/atarilance.c
@@ -460,7 +460,6 @@ static const struct net_device_ops lance_netdev_ops = {
 	.ndo_set_mac_address	= lance_set_mac_address,
 	.ndo_tx_timeout		= lance_tx_timeout,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 };
 
 static unsigned long __init lance_probe1( struct net_device *dev,
@@ -1013,13 +1012,9 @@ static int lance_rx( struct net_device *dev )
 					u_char *data = PKTBUF_ADDR(head);
 
 					printk(KERN_DEBUG "%s: RX pkt type 0x%04x from %pM to %pM "
-						   "data %02x %02x %02x %02x %02x %02x %02x %02x "
-						   "len %d\n",
+						   "data %8ph len %d\n",
 						   dev->name, ((u_short *)data)[6],
-						   &data[6], data,
-						   data[15], data[16], data[17], data[18],
-						   data[19], data[20], data[21], data[22],
-						   pkt_len);
+						   &data[6], data, &data[15], pkt_len);
 				}
 
 				skb_reserve( skb, 2 );	/* 16 byte align */
diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c
index df664187cd82..a3c90fe5de00 100644
--- a/drivers/net/ethernet/amd/au1000_eth.c
+++ b/drivers/net/ethernet/amd/au1000_eth.c
@@ -1103,7 +1103,6 @@ static const struct net_device_ops au1000_netdev_ops = {
 	.ndo_tx_timeout		= au1000_tx_timeout,
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 };
 
 static int au1000_probe(struct platform_device *pdev)
diff --git a/drivers/net/ethernet/amd/declance.c b/drivers/net/ethernet/amd/declance.c
index b799c7ac899b..76e5fc7adff5 100644
--- a/drivers/net/ethernet/amd/declance.c
+++ b/drivers/net/ethernet/amd/declance.c
@@ -1013,7 +1013,6 @@ static const struct net_device_ops lance_netdev_ops = {
 	.ndo_start_xmit		= lance_start_xmit,
 	.ndo_tx_timeout		= lance_tx_timeout,
 	.ndo_set_rx_mode	= lance_set_multicast,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
 };
diff --git a/drivers/net/ethernet/amd/hplance.c b/drivers/net/ethernet/amd/hplance.c
index 6c9de117ffc6..c3dbf1c8a269 100644
--- a/drivers/net/ethernet/amd/hplance.c
+++ b/drivers/net/ethernet/amd/hplance.c
@@ -72,7 +72,6 @@ static const struct net_device_ops hplance_netdev_ops = {
 	.ndo_stop		= hplance_close,
 	.ndo_start_xmit		= lance_start_xmit,
 	.ndo_set_rx_mode	= lance_set_multicast,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/amd/lance.c b/drivers/net/ethernet/amd/lance.c
index abb1ba228b26..61a641f23149 100644
--- a/drivers/net/ethernet/amd/lance.c
+++ b/drivers/net/ethernet/amd/lance.c
@@ -461,7 +461,6 @@ static const struct net_device_ops lance_netdev_ops = {
 	.ndo_get_stats		= lance_get_stats,
 	.ndo_set_rx_mode	= set_multicast_list,
 	.ndo_tx_timeout		= lance_tx_timeout,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 };
diff --git a/drivers/net/ethernet/amd/mvme147.c b/drivers/net/ethernet/amd/mvme147.c
index 0660ac5846bb..0a920448522f 100644
--- a/drivers/net/ethernet/amd/mvme147.c
+++ b/drivers/net/ethernet/amd/mvme147.c
@@ -62,7 +62,6 @@ static const struct net_device_ops lance_netdev_ops = {
 	.ndo_start_xmit		= lance_start_xmit,
 	.ndo_set_rx_mode	= lance_set_multicast,
 	.ndo_tx_timeout		= lance_tx_timeout,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
 };
diff --git a/drivers/net/ethernet/amd/ni65.c b/drivers/net/ethernet/amd/ni65.c
index cda53db75f17..5985bf220a8d 100644
--- a/drivers/net/ethernet/amd/ni65.c
+++ b/drivers/net/ethernet/amd/ni65.c
@@ -407,7 +407,6 @@ static const struct net_device_ops ni65_netdev_ops = {
 	.ndo_start_xmit		= ni65_send_packet,
 	.ndo_tx_timeout		= ni65_timeout,
 	.ndo_set_rx_mode	= set_multicast_list,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 };
diff --git a/drivers/net/ethernet/amd/nmclan_cs.c b/drivers/net/ethernet/amd/nmclan_cs.c
index 2807e181647b..113a3b3cc50c 100644
--- a/drivers/net/ethernet/amd/nmclan_cs.c
+++ b/drivers/net/ethernet/amd/nmclan_cs.c
@@ -427,7 +427,6 @@ static const struct net_device_ops mace_netdev_ops = {
 	.ndo_set_config		= mace_config,
 	.ndo_get_stats		= mace_get_stats,
 	.ndo_set_rx_mode	= set_multicast_list,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 };
diff --git a/drivers/net/ethernet/amd/pcnet32.c b/drivers/net/ethernet/amd/pcnet32.c
index c22bf52d3320..41e58cca8fee 100644
--- a/drivers/net/ethernet/amd/pcnet32.c
+++ b/drivers/net/ethernet/amd/pcnet32.c
@@ -677,7 +677,8 @@ static void pcnet32_poll_controller(struct net_device *dev)
 }
 #endif
 
-static int pcnet32_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int pcnet32_get_link_ksettings(struct net_device *dev,
+				      struct ethtool_link_ksettings *cmd)
 {
 	struct pcnet32_private *lp = netdev_priv(dev);
 	unsigned long flags;
@@ -685,14 +686,15 @@ static int pcnet32_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 
 	if (lp->mii) {
 		spin_lock_irqsave(&lp->lock, flags);
-		mii_ethtool_gset(&lp->mii_if, cmd);
+		mii_ethtool_get_link_ksettings(&lp->mii_if, cmd);
 		spin_unlock_irqrestore(&lp->lock, flags);
 		r = 0;
 	}
 	return r;
 }
 
-static int pcnet32_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int pcnet32_set_link_ksettings(struct net_device *dev,
+				      const struct ethtool_link_ksettings *cmd)
 {
 	struct pcnet32_private *lp = netdev_priv(dev);
 	unsigned long flags;
@@ -700,7 +702,7 @@ static int pcnet32_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 
 	if (lp->mii) {
 		spin_lock_irqsave(&lp->lock, flags);
-		r = mii_ethtool_sset(&lp->mii_if, cmd);
+		r = mii_ethtool_set_link_ksettings(&lp->mii_if, cmd);
 		spin_unlock_irqrestore(&lp->lock, flags);
 	}
 	return r;
@@ -1440,8 +1442,6 @@ static void pcnet32_get_regs(struct net_device *dev, struct ethtool_regs *regs,
 }
 
 static const struct ethtool_ops pcnet32_ethtool_ops = {
-	.get_settings		= pcnet32_get_settings,
-	.set_settings		= pcnet32_set_settings,
 	.get_drvinfo		= pcnet32_get_drvinfo,
 	.get_msglevel		= pcnet32_get_msglevel,
 	.set_msglevel		= pcnet32_set_msglevel,
@@ -1455,6 +1455,8 @@ static const struct ethtool_ops pcnet32_ethtool_ops = {
 	.get_regs_len		= pcnet32_get_regs_len,
 	.get_regs		= pcnet32_get_regs,
 	.get_sset_count		= pcnet32_get_sset_count,
+	.get_link_ksettings	= pcnet32_get_link_ksettings,
+	.set_link_ksettings	= pcnet32_set_link_ksettings,
 };
 
 /* only probes for non-PCI devices, the rest are handled by
@@ -1527,7 +1529,6 @@ static const struct net_device_ops pcnet32_netdev_ops = {
 	.ndo_get_stats		= pcnet32_get_stats,
 	.ndo_set_rx_mode	= pcnet32_set_multicast_list,
 	.ndo_do_ioctl		= pcnet32_ioctl,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/amd/sun3lance.c b/drivers/net/ethernet/amd/sun3lance.c
index 3d8c6b2cdea4..12bb4f1489fc 100644
--- a/drivers/net/ethernet/amd/sun3lance.c
+++ b/drivers/net/ethernet/amd/sun3lance.c
@@ -299,7 +299,6 @@ static const struct net_device_ops lance_netdev_ops = {
 	.ndo_start_xmit		= lance_start_xmit,
 	.ndo_set_rx_mode	= set_multicast_list,
 	.ndo_set_mac_address	= NULL,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 };
 
diff --git a/drivers/net/ethernet/amd/sunlance.c b/drivers/net/ethernet/amd/sunlance.c
index 9b56b40259dc..291ca5187f12 100644
--- a/drivers/net/ethernet/amd/sunlance.c
+++ b/drivers/net/ethernet/amd/sunlance.c
@@ -1294,7 +1294,6 @@ static const struct net_device_ops sparc_lance_ops = {
 	.ndo_start_xmit		= lance_start_xmit,
 	.ndo_set_rx_mode	= lance_set_multicast,
 	.ndo_tx_timeout		= lance_tx_timeout,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 };
diff --git a/drivers/net/ethernet/amd/xgbe/Makefile b/drivers/net/ethernet/amd/xgbe/Makefile
index 171a7e68048d..0dea8f5da899 100644
--- a/drivers/net/ethernet/amd/xgbe/Makefile
+++ b/drivers/net/ethernet/amd/xgbe/Makefile
@@ -2,7 +2,10 @@ obj-$(CONFIG_AMD_XGBE) += amd-xgbe.o
 
 amd-xgbe-objs := xgbe-main.o xgbe-drv.o xgbe-dev.o \
 		 xgbe-desc.o xgbe-ethtool.o xgbe-mdio.o \
-		 xgbe-ptp.o
+		 xgbe-ptp.o \
+		 xgbe-i2c.o xgbe-phy-v1.o xgbe-phy-v2.o \
+		 xgbe-platform.o
 
+amd-xgbe-$(CONFIG_PCI) += xgbe-pci.o
 amd-xgbe-$(CONFIG_AMD_XGBE_DCB) += xgbe-dcb.o
 amd-xgbe-$(CONFIG_DEBUG_FS) += xgbe-debugfs.o
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-common.h b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
index bbef95973c27..5b7ba25e0065 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-common.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-common.h
@@ -159,6 +159,8 @@
 #define DMA_ISR_MACIS_WIDTH		1
 #define DMA_ISR_MTLIS_INDEX		16
 #define DMA_ISR_MTLIS_WIDTH		1
+#define DMA_MR_INTM_INDEX		12
+#define DMA_MR_INTM_WIDTH		2
 #define DMA_MR_SWR_INDEX		0
 #define DMA_MR_SWR_WIDTH		1
 #define DMA_SBMR_EAME_INDEX		11
@@ -309,6 +311,11 @@
 #define MAC_HWF0R			0x011c
 #define MAC_HWF1R			0x0120
 #define MAC_HWF2R			0x0124
+#define MAC_MDIOSCAR			0x0200
+#define MAC_MDIOSCCDR			0x0204
+#define MAC_MDIOISR			0x0214
+#define MAC_MDIOIER			0x0218
+#define MAC_MDIOCL22R			0x0220
 #define MAC_GPIOCR			0x0278
 #define MAC_GPIOSR			0x027c
 #define MAC_MACA0HR			0x0300
@@ -409,10 +416,34 @@
 #define MAC_ISR_MMCTXIS_WIDTH		1
 #define MAC_ISR_PMTIS_INDEX		4
 #define MAC_ISR_PMTIS_WIDTH		1
+#define MAC_ISR_SMI_INDEX		1
+#define MAC_ISR_SMI_WIDTH		1
 #define MAC_ISR_TSIS_INDEX		12
 #define MAC_ISR_TSIS_WIDTH		1
 #define MAC_MACA1HR_AE_INDEX		31
 #define MAC_MACA1HR_AE_WIDTH		1
+#define MAC_MDIOIER_SNGLCOMPIE_INDEX	12
+#define MAC_MDIOIER_SNGLCOMPIE_WIDTH	1
+#define MAC_MDIOISR_SNGLCOMPINT_INDEX	12
+#define MAC_MDIOISR_SNGLCOMPINT_WIDTH	1
+#define MAC_MDIOSCAR_DA_INDEX		21
+#define MAC_MDIOSCAR_DA_WIDTH		5
+#define MAC_MDIOSCAR_PA_INDEX		16
+#define MAC_MDIOSCAR_PA_WIDTH		5
+#define MAC_MDIOSCAR_RA_INDEX		0
+#define MAC_MDIOSCAR_RA_WIDTH		16
+#define MAC_MDIOSCAR_REG_INDEX		0
+#define MAC_MDIOSCAR_REG_WIDTH		21
+#define MAC_MDIOSCCDR_BUSY_INDEX	22
+#define MAC_MDIOSCCDR_BUSY_WIDTH	1
+#define MAC_MDIOSCCDR_CMD_INDEX		16
+#define MAC_MDIOSCCDR_CMD_WIDTH		2
+#define MAC_MDIOSCCDR_CR_INDEX		19
+#define MAC_MDIOSCCDR_CR_WIDTH		3
+#define MAC_MDIOSCCDR_DATA_INDEX	0
+#define MAC_MDIOSCCDR_DATA_WIDTH	16
+#define MAC_MDIOSCCDR_SADDR_INDEX	18
+#define MAC_MDIOSCCDR_SADDR_WIDTH	1
 #define MAC_PFR_HMC_INDEX		2
 #define MAC_PFR_HMC_WIDTH		1
 #define MAC_PFR_HPF_INDEX		10
@@ -790,6 +821,10 @@
 #define MTL_Q_RQOMR_RSF_WIDTH		1
 #define MTL_Q_RQOMR_RTC_INDEX		0
 #define MTL_Q_RQOMR_RTC_WIDTH		2
+#define MTL_Q_TQDR_TRCSTS_INDEX		1
+#define MTL_Q_TQDR_TRCSTS_WIDTH		2
+#define MTL_Q_TQDR_TXQSTS_INDEX		4
+#define MTL_Q_TQDR_TXQSTS_WIDTH		1
 #define MTL_Q_TQOMR_FTQ_INDEX		0
 #define MTL_Q_TQOMR_FTQ_WIDTH		1
 #define MTL_Q_TQOMR_Q2TCMAP_INDEX	8
@@ -852,14 +887,16 @@
 #define MTL_TSA_SP			0x00
 #define MTL_TSA_ETS			0x02
 
-/* PCS MMD select register offset
- *  The MMD select register is used for accessing PCS registers
- *  when the underlying APB3 interface is using indirect addressing.
- *  Indirect addressing requires accessing registers in two phases,
- *  an address phase and a data phase.  The address phases requires
- *  writing an address selection value to the MMD select regiesters.
- */
-#define PCS_MMD_SELECT			0xff
+/* PCS register offsets */
+#define PCS_V1_WINDOW_SELECT		0x03fc
+#define PCS_V2_WINDOW_DEF		0x9060
+#define PCS_V2_WINDOW_SELECT		0x9064
+
+/* PCS register entry bit positions and sizes */
+#define PCS_V2_WINDOW_DEF_OFFSET_INDEX	6
+#define PCS_V2_WINDOW_DEF_OFFSET_WIDTH	14
+#define PCS_V2_WINDOW_DEF_SIZE_INDEX	2
+#define PCS_V2_WINDOW_DEF_SIZE_WIDTH	4
 
 /* SerDes integration register offsets */
 #define SIR0_KR_RT_1			0x002c
@@ -903,6 +940,198 @@
 #define RXTX_REG129_RXDFE_CONFIG_INDEX	14
 #define RXTX_REG129_RXDFE_CONFIG_WIDTH	2
 
+/* MAC Control register offsets */
+#define XP_PROP_0			0x0000
+#define XP_PROP_1			0x0004
+#define XP_PROP_2			0x0008
+#define XP_PROP_3			0x000c
+#define XP_PROP_4			0x0010
+#define XP_PROP_5			0x0014
+#define XP_MAC_ADDR_LO			0x0020
+#define XP_MAC_ADDR_HI			0x0024
+#define XP_ECC_ISR			0x0030
+#define XP_ECC_IER			0x0034
+#define XP_ECC_CNT0			0x003c
+#define XP_ECC_CNT1			0x0040
+#define XP_DRIVER_INT_REQ		0x0060
+#define XP_DRIVER_INT_RO		0x0064
+#define XP_DRIVER_SCRATCH_0		0x0068
+#define XP_DRIVER_SCRATCH_1		0x006c
+#define XP_INT_EN			0x0078
+#define XP_I2C_MUTEX			0x0080
+#define XP_MDIO_MUTEX			0x0084
+
+/* MAC Control register entry bit positions and sizes */
+#define XP_DRIVER_INT_REQ_REQUEST_INDEX		0
+#define XP_DRIVER_INT_REQ_REQUEST_WIDTH		1
+#define XP_DRIVER_INT_RO_STATUS_INDEX		0
+#define XP_DRIVER_INT_RO_STATUS_WIDTH		1
+#define XP_DRIVER_SCRATCH_0_COMMAND_INDEX	0
+#define XP_DRIVER_SCRATCH_0_COMMAND_WIDTH	8
+#define XP_DRIVER_SCRATCH_0_SUB_COMMAND_INDEX	8
+#define XP_DRIVER_SCRATCH_0_SUB_COMMAND_WIDTH	8
+#define XP_ECC_CNT0_RX_DED_INDEX		24
+#define XP_ECC_CNT0_RX_DED_WIDTH		8
+#define XP_ECC_CNT0_RX_SEC_INDEX		16
+#define XP_ECC_CNT0_RX_SEC_WIDTH		8
+#define XP_ECC_CNT0_TX_DED_INDEX		8
+#define XP_ECC_CNT0_TX_DED_WIDTH		8
+#define XP_ECC_CNT0_TX_SEC_INDEX		0
+#define XP_ECC_CNT0_TX_SEC_WIDTH		8
+#define XP_ECC_CNT1_DESC_DED_INDEX		8
+#define XP_ECC_CNT1_DESC_DED_WIDTH		8
+#define XP_ECC_CNT1_DESC_SEC_INDEX		0
+#define XP_ECC_CNT1_DESC_SEC_WIDTH		8
+#define XP_ECC_IER_DESC_DED_INDEX		0
+#define XP_ECC_IER_DESC_DED_WIDTH		1
+#define XP_ECC_IER_DESC_SEC_INDEX		1
+#define XP_ECC_IER_DESC_SEC_WIDTH		1
+#define XP_ECC_IER_RX_DED_INDEX			2
+#define XP_ECC_IER_RX_DED_WIDTH			1
+#define XP_ECC_IER_RX_SEC_INDEX			3
+#define XP_ECC_IER_RX_SEC_WIDTH			1
+#define XP_ECC_IER_TX_DED_INDEX			4
+#define XP_ECC_IER_TX_DED_WIDTH			1
+#define XP_ECC_IER_TX_SEC_INDEX			5
+#define XP_ECC_IER_TX_SEC_WIDTH			1
+#define XP_ECC_ISR_DESC_DED_INDEX		0
+#define XP_ECC_ISR_DESC_DED_WIDTH		1
+#define XP_ECC_ISR_DESC_SEC_INDEX		1
+#define XP_ECC_ISR_DESC_SEC_WIDTH		1
+#define XP_ECC_ISR_RX_DED_INDEX			2
+#define XP_ECC_ISR_RX_DED_WIDTH			1
+#define XP_ECC_ISR_RX_SEC_INDEX			3
+#define XP_ECC_ISR_RX_SEC_WIDTH			1
+#define XP_ECC_ISR_TX_DED_INDEX			4
+#define XP_ECC_ISR_TX_DED_WIDTH			1
+#define XP_ECC_ISR_TX_SEC_INDEX			5
+#define XP_ECC_ISR_TX_SEC_WIDTH			1
+#define XP_I2C_MUTEX_BUSY_INDEX			31
+#define XP_I2C_MUTEX_BUSY_WIDTH			1
+#define XP_I2C_MUTEX_ID_INDEX			29
+#define XP_I2C_MUTEX_ID_WIDTH			2
+#define XP_I2C_MUTEX_ACTIVE_INDEX		0
+#define XP_I2C_MUTEX_ACTIVE_WIDTH		1
+#define XP_MAC_ADDR_HI_VALID_INDEX		31
+#define XP_MAC_ADDR_HI_VALID_WIDTH		1
+#define XP_PROP_0_CONN_TYPE_INDEX		28
+#define XP_PROP_0_CONN_TYPE_WIDTH		3
+#define XP_PROP_0_MDIO_ADDR_INDEX		16
+#define XP_PROP_0_MDIO_ADDR_WIDTH		5
+#define XP_PROP_0_PORT_ID_INDEX			0
+#define XP_PROP_0_PORT_ID_WIDTH			8
+#define XP_PROP_0_PORT_MODE_INDEX		8
+#define XP_PROP_0_PORT_MODE_WIDTH		4
+#define XP_PROP_0_PORT_SPEEDS_INDEX		23
+#define XP_PROP_0_PORT_SPEEDS_WIDTH		4
+#define XP_PROP_1_MAX_RX_DMA_INDEX		24
+#define XP_PROP_1_MAX_RX_DMA_WIDTH		5
+#define XP_PROP_1_MAX_RX_QUEUES_INDEX		8
+#define XP_PROP_1_MAX_RX_QUEUES_WIDTH		5
+#define XP_PROP_1_MAX_TX_DMA_INDEX		16
+#define XP_PROP_1_MAX_TX_DMA_WIDTH		5
+#define XP_PROP_1_MAX_TX_QUEUES_INDEX		0
+#define XP_PROP_1_MAX_TX_QUEUES_WIDTH		5
+#define XP_PROP_2_RX_FIFO_SIZE_INDEX		16
+#define XP_PROP_2_RX_FIFO_SIZE_WIDTH		16
+#define XP_PROP_2_TX_FIFO_SIZE_INDEX		0
+#define XP_PROP_2_TX_FIFO_SIZE_WIDTH		16
+#define XP_PROP_3_GPIO_MASK_INDEX		28
+#define XP_PROP_3_GPIO_MASK_WIDTH		4
+#define XP_PROP_3_GPIO_MOD_ABS_INDEX		20
+#define XP_PROP_3_GPIO_MOD_ABS_WIDTH		4
+#define XP_PROP_3_GPIO_RATE_SELECT_INDEX	16
+#define XP_PROP_3_GPIO_RATE_SELECT_WIDTH	4
+#define XP_PROP_3_GPIO_RX_LOS_INDEX		24
+#define XP_PROP_3_GPIO_RX_LOS_WIDTH		4
+#define XP_PROP_3_GPIO_TX_FAULT_INDEX		12
+#define XP_PROP_3_GPIO_TX_FAULT_WIDTH		4
+#define XP_PROP_3_GPIO_ADDR_INDEX		8
+#define XP_PROP_3_GPIO_ADDR_WIDTH		3
+#define XP_PROP_3_MDIO_RESET_INDEX		0
+#define XP_PROP_3_MDIO_RESET_WIDTH		2
+#define XP_PROP_3_MDIO_RESET_I2C_ADDR_INDEX	8
+#define XP_PROP_3_MDIO_RESET_I2C_ADDR_WIDTH	3
+#define XP_PROP_3_MDIO_RESET_I2C_GPIO_INDEX	12
+#define XP_PROP_3_MDIO_RESET_I2C_GPIO_WIDTH	4
+#define XP_PROP_3_MDIO_RESET_INT_GPIO_INDEX	4
+#define XP_PROP_3_MDIO_RESET_INT_GPIO_WIDTH	2
+#define XP_PROP_4_MUX_ADDR_HI_INDEX		8
+#define XP_PROP_4_MUX_ADDR_HI_WIDTH		5
+#define XP_PROP_4_MUX_ADDR_LO_INDEX		0
+#define XP_PROP_4_MUX_ADDR_LO_WIDTH		3
+#define XP_PROP_4_MUX_CHAN_INDEX		4
+#define XP_PROP_4_MUX_CHAN_WIDTH		3
+#define XP_PROP_4_REDRV_ADDR_INDEX		16
+#define XP_PROP_4_REDRV_ADDR_WIDTH		7
+#define XP_PROP_4_REDRV_IF_INDEX		23
+#define XP_PROP_4_REDRV_IF_WIDTH		1
+#define XP_PROP_4_REDRV_LANE_INDEX		24
+#define XP_PROP_4_REDRV_LANE_WIDTH		3
+#define XP_PROP_4_REDRV_MODEL_INDEX		28
+#define XP_PROP_4_REDRV_MODEL_WIDTH		3
+#define XP_PROP_4_REDRV_PRESENT_INDEX		31
+#define XP_PROP_4_REDRV_PRESENT_WIDTH		1
+
+/* I2C Control register offsets */
+#define IC_CON					0x0000
+#define IC_TAR					0x0004
+#define IC_DATA_CMD				0x0010
+#define IC_INTR_STAT				0x002c
+#define IC_INTR_MASK				0x0030
+#define IC_RAW_INTR_STAT			0x0034
+#define IC_CLR_INTR				0x0040
+#define IC_CLR_TX_ABRT				0x0054
+#define IC_CLR_STOP_DET				0x0060
+#define IC_ENABLE				0x006c
+#define IC_TXFLR				0x0074
+#define IC_RXFLR				0x0078
+#define IC_TX_ABRT_SOURCE			0x0080
+#define IC_ENABLE_STATUS			0x009c
+#define IC_COMP_PARAM_1				0x00f4
+
+/* I2C Control register entry bit positions and sizes */
+#define IC_COMP_PARAM_1_MAX_SPEED_MODE_INDEX	2
+#define IC_COMP_PARAM_1_MAX_SPEED_MODE_WIDTH	2
+#define IC_COMP_PARAM_1_RX_BUFFER_DEPTH_INDEX	8
+#define IC_COMP_PARAM_1_RX_BUFFER_DEPTH_WIDTH	8
+#define IC_COMP_PARAM_1_TX_BUFFER_DEPTH_INDEX	16
+#define IC_COMP_PARAM_1_TX_BUFFER_DEPTH_WIDTH	8
+#define IC_CON_MASTER_MODE_INDEX		0
+#define IC_CON_MASTER_MODE_WIDTH		1
+#define IC_CON_RESTART_EN_INDEX			5
+#define IC_CON_RESTART_EN_WIDTH			1
+#define IC_CON_RX_FIFO_FULL_HOLD_INDEX		9
+#define IC_CON_RX_FIFO_FULL_HOLD_WIDTH		1
+#define IC_CON_SLAVE_DISABLE_INDEX		6
+#define IC_CON_SLAVE_DISABLE_WIDTH		1
+#define IC_CON_SPEED_INDEX			1
+#define IC_CON_SPEED_WIDTH			2
+#define IC_DATA_CMD_CMD_INDEX			8
+#define IC_DATA_CMD_CMD_WIDTH			1
+#define IC_DATA_CMD_STOP_INDEX			9
+#define IC_DATA_CMD_STOP_WIDTH			1
+#define IC_ENABLE_ABORT_INDEX			1
+#define IC_ENABLE_ABORT_WIDTH			1
+#define IC_ENABLE_EN_INDEX			0
+#define IC_ENABLE_EN_WIDTH			1
+#define IC_ENABLE_STATUS_EN_INDEX		0
+#define IC_ENABLE_STATUS_EN_WIDTH		1
+#define IC_INTR_MASK_TX_EMPTY_INDEX		4
+#define IC_INTR_MASK_TX_EMPTY_WIDTH		1
+#define IC_RAW_INTR_STAT_RX_FULL_INDEX		2
+#define IC_RAW_INTR_STAT_RX_FULL_WIDTH		1
+#define IC_RAW_INTR_STAT_STOP_DET_INDEX		9
+#define IC_RAW_INTR_STAT_STOP_DET_WIDTH		1
+#define IC_RAW_INTR_STAT_TX_ABRT_INDEX		6
+#define IC_RAW_INTR_STAT_TX_ABRT_WIDTH		1
+#define IC_RAW_INTR_STAT_TX_EMPTY_INDEX		4
+#define IC_RAW_INTR_STAT_TX_EMPTY_WIDTH		1
+
+/* I2C Control register value */
+#define IC_TX_ABRT_7B_ADDR_NOACK		0x0001
+#define IC_TX_ABRT_ARB_LOST			0x1000
+
 /* Descriptor/Packet entry bit positions and sizes */
 #define RX_PACKET_ERRORS_CRC_INDEX		2
 #define RX_PACKET_ERRORS_CRC_WIDTH		1
@@ -1027,6 +1256,10 @@
 #define MDIO_PMA_10GBR_FECCTRL		0x00ab
 #endif
 
+#ifndef MDIO_PCS_DIG_CTRL
+#define MDIO_PCS_DIG_CTRL		0x8000
+#endif
+
 #ifndef MDIO_AN_XNP
 #define MDIO_AN_XNP			0x0016
 #endif
@@ -1047,11 +1280,48 @@
 #define MDIO_AN_INT			0x8002
 #endif
 
+#ifndef MDIO_VEND2_AN_ADVERTISE
+#define MDIO_VEND2_AN_ADVERTISE		0x0004
+#endif
+
+#ifndef MDIO_VEND2_AN_LP_ABILITY
+#define MDIO_VEND2_AN_LP_ABILITY	0x0005
+#endif
+
+#ifndef MDIO_VEND2_AN_CTRL
+#define MDIO_VEND2_AN_CTRL		0x8001
+#endif
+
+#ifndef MDIO_VEND2_AN_STAT
+#define MDIO_VEND2_AN_STAT		0x8002
+#endif
+
 #ifndef MDIO_CTRL1_SPEED1G
 #define MDIO_CTRL1_SPEED1G		(MDIO_CTRL1_SPEED10G & ~BMCR_SPEED100)
 #endif
 
+#ifndef MDIO_VEND2_CTRL1_AN_ENABLE
+#define MDIO_VEND2_CTRL1_AN_ENABLE	BIT(12)
+#endif
+
+#ifndef MDIO_VEND2_CTRL1_AN_RESTART
+#define MDIO_VEND2_CTRL1_AN_RESTART	BIT(9)
+#endif
+
+#ifndef MDIO_VEND2_CTRL1_SS6
+#define MDIO_VEND2_CTRL1_SS6		BIT(6)
+#endif
+
+#ifndef MDIO_VEND2_CTRL1_SS13
+#define MDIO_VEND2_CTRL1_SS13		BIT(13)
+#endif
+
 /* MDIO mask values */
+#define XGBE_AN_CL73_INT_CMPLT		BIT(0)
+#define XGBE_AN_CL73_INC_LINK		BIT(1)
+#define XGBE_AN_CL73_PG_RCV		BIT(2)
+#define XGBE_AN_CL73_INT_MASK		0x07
+
 #define XGBE_XNP_MCF_NULL_MESSAGE	0x001
 #define XGBE_XNP_ACK_PROCESSED		BIT(12)
 #define XGBE_XNP_MP_FORMATTED		BIT(13)
@@ -1060,6 +1330,19 @@
 #define XGBE_KR_TRAINING_START		BIT(0)
 #define XGBE_KR_TRAINING_ENABLE		BIT(1)
 
+#define XGBE_PCS_CL37_BP		BIT(12)
+
+#define XGBE_AN_CL37_INT_CMPLT		BIT(0)
+#define XGBE_AN_CL37_INT_MASK		0x01
+
+#define XGBE_AN_CL37_HD_MASK		0x40
+#define XGBE_AN_CL37_FD_MASK		0x20
+
+#define XGBE_AN_CL37_PCS_MODE_MASK	0x06
+#define XGBE_AN_CL37_PCS_MODE_BASEX	0x00
+#define XGBE_AN_CL37_PCS_MODE_SGMII	0x04
+#define XGBE_AN_CL37_TX_CONFIG_MASK	0x08
+
 /* Bit setting and getting macros
  *  The get macro will extract the current bit field value from within
  *  the variable
@@ -1195,12 +1478,28 @@ do {									\
 /* Macros for building, reading or writing register values or bits
  * within the register values of XPCS registers.
  */
-#define XPCS_IOWRITE(_pdata, _off, _val)				\
+#define XPCS_GET_BITS(_var, _prefix, _field)				\
+	GET_BITS((_var),                                                \
+		 _prefix##_##_field##_INDEX,                            \
+		 _prefix##_##_field##_WIDTH)
+
+#define XPCS_SET_BITS(_var, _prefix, _field, _val)                      \
+	SET_BITS((_var),                                                \
+		 _prefix##_##_field##_INDEX,                            \
+		 _prefix##_##_field##_WIDTH, (_val))
+
+#define XPCS32_IOWRITE(_pdata, _off, _val)				\
 	iowrite32(_val, (_pdata)->xpcs_regs + (_off))
 
-#define XPCS_IOREAD(_pdata, _off)					\
+#define XPCS32_IOREAD(_pdata, _off)					\
 	ioread32((_pdata)->xpcs_regs + (_off))
 
+#define XPCS16_IOWRITE(_pdata, _off, _val)				\
+	iowrite16(_val, (_pdata)->xpcs_regs + (_off))
+
+#define XPCS16_IOREAD(_pdata, _off)					\
+	ioread16((_pdata)->xpcs_regs + (_off))
+
 /* Macros for building, reading or writing register values or bits
  * within the register values of SerDes integration registers.
  */
@@ -1278,6 +1577,72 @@ do {									\
 } while (0)
 
 /* Macros for building, reading or writing register values or bits
+ * within the register values of MAC Control registers.
+ */
+#define XP_GET_BITS(_var, _prefix, _field)				\
+	GET_BITS((_var),						\
+		 _prefix##_##_field##_INDEX,				\
+		 _prefix##_##_field##_WIDTH)
+
+#define XP_SET_BITS(_var, _prefix, _field, _val)			\
+	SET_BITS((_var),						\
+		 _prefix##_##_field##_INDEX,				\
+		 _prefix##_##_field##_WIDTH, (_val))
+
+#define XP_IOREAD(_pdata, _reg)						\
+	ioread32((_pdata)->xprop_regs + (_reg))
+
+#define XP_IOREAD_BITS(_pdata, _reg, _field)				\
+	GET_BITS(XP_IOREAD((_pdata), (_reg)),				\
+		 _reg##_##_field##_INDEX,				\
+		 _reg##_##_field##_WIDTH)
+
+#define XP_IOWRITE(_pdata, _reg, _val)					\
+	iowrite32((_val), (_pdata)->xprop_regs + (_reg))
+
+#define XP_IOWRITE_BITS(_pdata, _reg, _field, _val)			\
+do {									\
+	u32 reg_val = XP_IOREAD((_pdata), (_reg));			\
+	SET_BITS(reg_val,						\
+		 _reg##_##_field##_INDEX,				\
+		 _reg##_##_field##_WIDTH, (_val));			\
+	XP_IOWRITE((_pdata), (_reg), reg_val);				\
+} while (0)
+
+/* Macros for building, reading or writing register values or bits
+ * within the register values of I2C Control registers.
+ */
+#define XI2C_GET_BITS(_var, _prefix, _field)				\
+	GET_BITS((_var),						\
+		 _prefix##_##_field##_INDEX,				\
+		 _prefix##_##_field##_WIDTH)
+
+#define XI2C_SET_BITS(_var, _prefix, _field, _val)			\
+	SET_BITS((_var),						\
+		 _prefix##_##_field##_INDEX,				\
+		 _prefix##_##_field##_WIDTH, (_val))
+
+#define XI2C_IOREAD(_pdata, _reg)					\
+	ioread32((_pdata)->xi2c_regs + (_reg))
+
+#define XI2C_IOREAD_BITS(_pdata, _reg, _field)				\
+	GET_BITS(XI2C_IOREAD((_pdata), (_reg)),				\
+		 _reg##_##_field##_INDEX,				\
+		 _reg##_##_field##_WIDTH)
+
+#define XI2C_IOWRITE(_pdata, _reg, _val)				\
+	iowrite32((_val), (_pdata)->xi2c_regs + (_reg))
+
+#define XI2C_IOWRITE_BITS(_pdata, _reg, _field, _val)			\
+do {									\
+	u32 reg_val = XI2C_IOREAD((_pdata), (_reg));			\
+	SET_BITS(reg_val,						\
+		 _reg##_##_field##_INDEX,				\
+		 _reg##_##_field##_WIDTH, (_val));			\
+	XI2C_IOWRITE((_pdata), (_reg), reg_val);			\
+} while (0)
+
+/* Macros for building, reading or writing register values or bits
  * using MDIO.  Different from above because of the use of standardized
  * Linux include values.  No shifting is performed with the bit
  * operations, everything works on mask values.
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c b/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c
index 96f485ab612e..7546b660d6b5 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-debugfs.c
@@ -153,7 +153,7 @@ static ssize_t xgbe_common_write(const char __user *buffer, size_t count,
 	int ret;
 
 	if (*ppos != 0)
-		return 0;
+		return -EINVAL;
 
 	if (count >= sizeof(workarea))
 		return -ENOSPC;
@@ -316,6 +316,126 @@ static const struct file_operations xpcs_reg_value_fops = {
 	.write = xpcs_reg_value_write,
 };
 
+static ssize_t xprop_reg_addr_read(struct file *filp, char __user *buffer,
+				   size_t count, loff_t *ppos)
+{
+	struct xgbe_prv_data *pdata = filp->private_data;
+
+	return xgbe_common_read(buffer, count, ppos, pdata->debugfs_xprop_reg);
+}
+
+static ssize_t xprop_reg_addr_write(struct file *filp,
+				    const char __user *buffer,
+				    size_t count, loff_t *ppos)
+{
+	struct xgbe_prv_data *pdata = filp->private_data;
+
+	return xgbe_common_write(buffer, count, ppos,
+				 &pdata->debugfs_xprop_reg);
+}
+
+static ssize_t xprop_reg_value_read(struct file *filp, char __user *buffer,
+				    size_t count, loff_t *ppos)
+{
+	struct xgbe_prv_data *pdata = filp->private_data;
+	unsigned int value;
+
+	value = XP_IOREAD(pdata, pdata->debugfs_xprop_reg);
+
+	return xgbe_common_read(buffer, count, ppos, value);
+}
+
+static ssize_t xprop_reg_value_write(struct file *filp,
+				     const char __user *buffer,
+				     size_t count, loff_t *ppos)
+{
+	struct xgbe_prv_data *pdata = filp->private_data;
+	unsigned int value;
+	ssize_t len;
+
+	len = xgbe_common_write(buffer, count, ppos, &value);
+	if (len < 0)
+		return len;
+
+	XP_IOWRITE(pdata, pdata->debugfs_xprop_reg, value);
+
+	return len;
+}
+
+static const struct file_operations xprop_reg_addr_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read =  xprop_reg_addr_read,
+	.write = xprop_reg_addr_write,
+};
+
+static const struct file_operations xprop_reg_value_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read =  xprop_reg_value_read,
+	.write = xprop_reg_value_write,
+};
+
+static ssize_t xi2c_reg_addr_read(struct file *filp, char __user *buffer,
+				  size_t count, loff_t *ppos)
+{
+	struct xgbe_prv_data *pdata = filp->private_data;
+
+	return xgbe_common_read(buffer, count, ppos, pdata->debugfs_xi2c_reg);
+}
+
+static ssize_t xi2c_reg_addr_write(struct file *filp,
+				   const char __user *buffer,
+				   size_t count, loff_t *ppos)
+{
+	struct xgbe_prv_data *pdata = filp->private_data;
+
+	return xgbe_common_write(buffer, count, ppos,
+				 &pdata->debugfs_xi2c_reg);
+}
+
+static ssize_t xi2c_reg_value_read(struct file *filp, char __user *buffer,
+				   size_t count, loff_t *ppos)
+{
+	struct xgbe_prv_data *pdata = filp->private_data;
+	unsigned int value;
+
+	value = XI2C_IOREAD(pdata, pdata->debugfs_xi2c_reg);
+
+	return xgbe_common_read(buffer, count, ppos, value);
+}
+
+static ssize_t xi2c_reg_value_write(struct file *filp,
+				    const char __user *buffer,
+				    size_t count, loff_t *ppos)
+{
+	struct xgbe_prv_data *pdata = filp->private_data;
+	unsigned int value;
+	ssize_t len;
+
+	len = xgbe_common_write(buffer, count, ppos, &value);
+	if (len < 0)
+		return len;
+
+	XI2C_IOWRITE(pdata, pdata->debugfs_xi2c_reg, value);
+
+	return len;
+}
+
+static const struct file_operations xi2c_reg_addr_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read =  xi2c_reg_addr_read,
+	.write = xi2c_reg_addr_write,
+};
+
+static const struct file_operations xi2c_reg_value_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read =  xi2c_reg_value_read,
+	.write = xi2c_reg_value_write,
+};
+
 void xgbe_debugfs_init(struct xgbe_prv_data *pdata)
 {
 	struct dentry *pfile;
@@ -367,6 +487,38 @@ void xgbe_debugfs_init(struct xgbe_prv_data *pdata)
 	if (!pfile)
 		netdev_err(pdata->netdev, "debugfs_create_file failed\n");
 
+	if (pdata->xprop_regs) {
+		pfile = debugfs_create_file("xprop_register", 0600,
+					    pdata->xgbe_debugfs, pdata,
+					    &xprop_reg_addr_fops);
+		if (!pfile)
+			netdev_err(pdata->netdev,
+				   "debugfs_create_file failed\n");
+
+		pfile = debugfs_create_file("xprop_register_value", 0600,
+					    pdata->xgbe_debugfs, pdata,
+					    &xprop_reg_value_fops);
+		if (!pfile)
+			netdev_err(pdata->netdev,
+				   "debugfs_create_file failed\n");
+	}
+
+	if (pdata->xi2c_regs) {
+		pfile = debugfs_create_file("xi2c_register", 0600,
+					    pdata->xgbe_debugfs, pdata,
+					    &xi2c_reg_addr_fops);
+		if (!pfile)
+			netdev_err(pdata->netdev,
+				   "debugfs_create_file failed\n");
+
+		pfile = debugfs_create_file("xi2c_register_value", 0600,
+					    pdata->xgbe_debugfs, pdata,
+					    &xi2c_reg_value_fops);
+		if (!pfile)
+			netdev_err(pdata->netdev,
+				   "debugfs_create_file failed\n");
+	}
+
 	kfree(buf);
 }
 
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
index 1babcc11a248..aaf0350076a9 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-dev.c
@@ -123,6 +123,11 @@
 #include "xgbe.h"
 #include "xgbe-common.h"
 
+static inline unsigned int xgbe_get_max_frame(struct xgbe_prv_data *pdata)
+{
+	return pdata->netdev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+}
+
 static unsigned int xgbe_usec_to_riwt(struct xgbe_prv_data *pdata,
 				      unsigned int usec)
 {
@@ -491,6 +496,27 @@ static void xgbe_config_rss(struct xgbe_prv_data *pdata)
 			   "error configuring RSS, RSS disabled\n");
 }
 
+static bool xgbe_is_pfc_queue(struct xgbe_prv_data *pdata,
+			      unsigned int queue)
+{
+	unsigned int prio, tc;
+
+	for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) {
+		/* Does this queue handle the priority? */
+		if (pdata->prio2q_map[prio] != queue)
+			continue;
+
+		/* Get the Traffic Class for this priority */
+		tc = pdata->ets->prio_tc[prio];
+
+		/* Check if PFC is enabled for this traffic class */
+		if (pdata->pfc->pfc_en & (1 << tc))
+			return true;
+	}
+
+	return false;
+}
+
 static int xgbe_disable_tx_flow_control(struct xgbe_prv_data *pdata)
 {
 	unsigned int max_q_count, q_count;
@@ -528,27 +554,14 @@ static int xgbe_enable_tx_flow_control(struct xgbe_prv_data *pdata)
 	for (i = 0; i < pdata->rx_q_count; i++) {
 		unsigned int ehfc = 0;
 
-		if (pfc && ets) {
-			unsigned int prio;
-
-			for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) {
-				unsigned int tc;
-
-				/* Does this queue handle the priority? */
-				if (pdata->prio2q_map[prio] != i)
-					continue;
-
-				/* Get the Traffic Class for this priority */
-				tc = ets->prio_tc[prio];
-
-				/* Check if flow control should be enabled */
-				if (pfc->pfc_en & (1 << tc)) {
+		if (pdata->rx_rfd[i]) {
+			/* Flow control thresholds are established */
+			if (pfc && ets) {
+				if (xgbe_is_pfc_queue(pdata, i))
 					ehfc = 1;
-					break;
-				}
+			} else {
+				ehfc = 1;
 			}
-		} else {
-			ehfc = 1;
 		}
 
 		XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, EHFC, ehfc);
@@ -633,6 +646,11 @@ static void xgbe_enable_dma_interrupts(struct xgbe_prv_data *pdata)
 	unsigned int dma_ch_isr, dma_ch_ier;
 	unsigned int i;
 
+	/* Set the interrupt mode if supported */
+	if (pdata->channel_irq_mode)
+		XGMAC_IOWRITE_BITS(pdata, DMA_MR, INTM,
+				   pdata->channel_irq_mode);
+
 	channel = pdata->channel;
 	for (i = 0; i < pdata->channel_count; i++, channel++) {
 		/* Clear all the interrupts which are set */
@@ -654,19 +672,21 @@ static void xgbe_enable_dma_interrupts(struct xgbe_prv_data *pdata)
 		if (channel->tx_ring) {
 			/* Enable the following Tx interrupts
 			 *   TIE  - Transmit Interrupt Enable (unless using
-			 *          per channel interrupts)
+			 *          per channel interrupts in edge triggered
+			 *          mode)
 			 */
-			if (!pdata->per_channel_irq)
+			if (!pdata->per_channel_irq || pdata->channel_irq_mode)
 				XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, TIE, 1);
 		}
 		if (channel->rx_ring) {
 			/* Enable following Rx interrupts
 			 *   RBUE - Receive Buffer Unavailable Enable
 			 *   RIE  - Receive Interrupt Enable (unless using
-			 *          per channel interrupts)
+			 *          per channel interrupts in edge triggered
+			 *          mode)
 			 */
 			XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, RBUE, 1);
-			if (!pdata->per_channel_irq)
+			if (!pdata->per_channel_irq || pdata->channel_irq_mode)
 				XGMAC_SET_BITS(dma_ch_ier, DMA_CH_IER, RIE, 1);
 		}
 
@@ -702,34 +722,90 @@ static void xgbe_enable_mac_interrupts(struct xgbe_prv_data *pdata)
 	/* Enable all counter interrupts */
 	XGMAC_IOWRITE_BITS(pdata, MMC_RIER, ALL_INTERRUPTS, 0xffffffff);
 	XGMAC_IOWRITE_BITS(pdata, MMC_TIER, ALL_INTERRUPTS, 0xffffffff);
+
+	/* Enable MDIO single command completion interrupt */
+	XGMAC_IOWRITE_BITS(pdata, MAC_MDIOIER, SNGLCOMPIE, 1);
 }
 
-static int xgbe_set_gmii_speed(struct xgbe_prv_data *pdata)
+static void xgbe_enable_ecc_interrupts(struct xgbe_prv_data *pdata)
 {
-	if (XGMAC_IOREAD_BITS(pdata, MAC_TCR, SS) == 0x3)
-		return 0;
+	unsigned int ecc_isr, ecc_ier = 0;
 
-	XGMAC_IOWRITE_BITS(pdata, MAC_TCR, SS, 0x3);
+	if (!pdata->vdata->ecc_support)
+		return;
 
-	return 0;
+	/* Clear all the interrupts which are set */
+	ecc_isr = XP_IOREAD(pdata, XP_ECC_ISR);
+	XP_IOWRITE(pdata, XP_ECC_ISR, ecc_isr);
+
+	/* Enable ECC interrupts */
+	XP_SET_BITS(ecc_ier, XP_ECC_IER, TX_DED, 1);
+	XP_SET_BITS(ecc_ier, XP_ECC_IER, TX_SEC, 1);
+	XP_SET_BITS(ecc_ier, XP_ECC_IER, RX_DED, 1);
+	XP_SET_BITS(ecc_ier, XP_ECC_IER, RX_SEC, 1);
+	XP_SET_BITS(ecc_ier, XP_ECC_IER, DESC_DED, 1);
+	XP_SET_BITS(ecc_ier, XP_ECC_IER, DESC_SEC, 1);
+
+	XP_IOWRITE(pdata, XP_ECC_IER, ecc_ier);
 }
 
-static int xgbe_set_gmii_2500_speed(struct xgbe_prv_data *pdata)
+static void xgbe_disable_ecc_ded(struct xgbe_prv_data *pdata)
 {
-	if (XGMAC_IOREAD_BITS(pdata, MAC_TCR, SS) == 0x2)
-		return 0;
+	unsigned int ecc_ier;
 
-	XGMAC_IOWRITE_BITS(pdata, MAC_TCR, SS, 0x2);
+	ecc_ier = XP_IOREAD(pdata, XP_ECC_IER);
 
-	return 0;
+	/* Disable ECC DED interrupts */
+	XP_SET_BITS(ecc_ier, XP_ECC_IER, TX_DED, 0);
+	XP_SET_BITS(ecc_ier, XP_ECC_IER, RX_DED, 0);
+	XP_SET_BITS(ecc_ier, XP_ECC_IER, DESC_DED, 0);
+
+	XP_IOWRITE(pdata, XP_ECC_IER, ecc_ier);
 }
 
-static int xgbe_set_xgmii_speed(struct xgbe_prv_data *pdata)
+static void xgbe_disable_ecc_sec(struct xgbe_prv_data *pdata,
+				 enum xgbe_ecc_sec sec)
 {
-	if (XGMAC_IOREAD_BITS(pdata, MAC_TCR, SS) == 0)
-		return 0;
+	unsigned int ecc_ier;
+
+	ecc_ier = XP_IOREAD(pdata, XP_ECC_IER);
+
+	/* Disable ECC SEC interrupt */
+	switch (sec) {
+	case XGBE_ECC_SEC_TX:
+	XP_SET_BITS(ecc_ier, XP_ECC_IER, TX_SEC, 0);
+		break;
+	case XGBE_ECC_SEC_RX:
+	XP_SET_BITS(ecc_ier, XP_ECC_IER, RX_SEC, 0);
+		break;
+	case XGBE_ECC_SEC_DESC:
+	XP_SET_BITS(ecc_ier, XP_ECC_IER, DESC_SEC, 0);
+		break;
+	}
 
-	XGMAC_IOWRITE_BITS(pdata, MAC_TCR, SS, 0);
+	XP_IOWRITE(pdata, XP_ECC_IER, ecc_ier);
+}
+
+static int xgbe_set_speed(struct xgbe_prv_data *pdata, int speed)
+{
+	unsigned int ss;
+
+	switch (speed) {
+	case SPEED_1000:
+		ss = 0x03;
+		break;
+	case SPEED_2500:
+		ss = 0x02;
+		break;
+	case SPEED_10000:
+		ss = 0x00;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (XGMAC_IOREAD_BITS(pdata, MAC_TCR, SS) != ss)
+		XGMAC_IOWRITE_BITS(pdata, MAC_TCR, SS, ss);
 
 	return 0;
 }
@@ -1019,8 +1095,101 @@ static int xgbe_config_rx_mode(struct xgbe_prv_data *pdata)
 	return 0;
 }
 
-static int xgbe_read_mmd_regs(struct xgbe_prv_data *pdata, int prtad,
-			      int mmd_reg)
+static int xgbe_clr_gpio(struct xgbe_prv_data *pdata, unsigned int gpio)
+{
+	unsigned int reg;
+
+	if (gpio > 15)
+		return -EINVAL;
+
+	reg = XGMAC_IOREAD(pdata, MAC_GPIOSR);
+
+	reg &= ~(1 << (gpio + 16));
+	XGMAC_IOWRITE(pdata, MAC_GPIOSR, reg);
+
+	return 0;
+}
+
+static int xgbe_set_gpio(struct xgbe_prv_data *pdata, unsigned int gpio)
+{
+	unsigned int reg;
+
+	if (gpio > 15)
+		return -EINVAL;
+
+	reg = XGMAC_IOREAD(pdata, MAC_GPIOSR);
+
+	reg |= (1 << (gpio + 16));
+	XGMAC_IOWRITE(pdata, MAC_GPIOSR, reg);
+
+	return 0;
+}
+
+static int xgbe_read_mmd_regs_v2(struct xgbe_prv_data *pdata, int prtad,
+				 int mmd_reg)
+{
+	unsigned long flags;
+	unsigned int mmd_address, index, offset;
+	int mmd_data;
+
+	if (mmd_reg & MII_ADDR_C45)
+		mmd_address = mmd_reg & ~MII_ADDR_C45;
+	else
+		mmd_address = (pdata->mdio_mmd << 16) | (mmd_reg & 0xffff);
+
+	/* The PCS registers are accessed using mmio. The underlying
+	 * management interface uses indirect addressing to access the MMD
+	 * register sets. This requires accessing of the PCS register in two
+	 * phases, an address phase and a data phase.
+	 *
+	 * The mmio interface is based on 16-bit offsets and values. All
+	 * register offsets must therefore be adjusted by left shifting the
+	 * offset 1 bit and reading 16 bits of data.
+	 */
+	mmd_address <<= 1;
+	index = mmd_address & ~pdata->xpcs_window_mask;
+	offset = pdata->xpcs_window + (mmd_address & pdata->xpcs_window_mask);
+
+	spin_lock_irqsave(&pdata->xpcs_lock, flags);
+	XPCS32_IOWRITE(pdata, PCS_V2_WINDOW_SELECT, index);
+	mmd_data = XPCS16_IOREAD(pdata, offset);
+	spin_unlock_irqrestore(&pdata->xpcs_lock, flags);
+
+	return mmd_data;
+}
+
+static void xgbe_write_mmd_regs_v2(struct xgbe_prv_data *pdata, int prtad,
+				   int mmd_reg, int mmd_data)
+{
+	unsigned long flags;
+	unsigned int mmd_address, index, offset;
+
+	if (mmd_reg & MII_ADDR_C45)
+		mmd_address = mmd_reg & ~MII_ADDR_C45;
+	else
+		mmd_address = (pdata->mdio_mmd << 16) | (mmd_reg & 0xffff);
+
+	/* The PCS registers are accessed using mmio. The underlying
+	 * management interface uses indirect addressing to access the MMD
+	 * register sets. This requires accessing of the PCS register in two
+	 * phases, an address phase and a data phase.
+	 *
+	 * The mmio interface is based on 16-bit offsets and values. All
+	 * register offsets must therefore be adjusted by left shifting the
+	 * offset 1 bit and writing 16 bits of data.
+	 */
+	mmd_address <<= 1;
+	index = mmd_address & ~pdata->xpcs_window_mask;
+	offset = pdata->xpcs_window + (mmd_address & pdata->xpcs_window_mask);
+
+	spin_lock_irqsave(&pdata->xpcs_lock, flags);
+	XPCS32_IOWRITE(pdata, PCS_V2_WINDOW_SELECT, index);
+	XPCS16_IOWRITE(pdata, offset, mmd_data);
+	spin_unlock_irqrestore(&pdata->xpcs_lock, flags);
+}
+
+static int xgbe_read_mmd_regs_v1(struct xgbe_prv_data *pdata, int prtad,
+				 int mmd_reg)
 {
 	unsigned long flags;
 	unsigned int mmd_address;
@@ -1041,15 +1210,15 @@ static int xgbe_read_mmd_regs(struct xgbe_prv_data *pdata, int prtad,
 	 * offset 2 bits and reading 32 bits of data.
 	 */
 	spin_lock_irqsave(&pdata->xpcs_lock, flags);
-	XPCS_IOWRITE(pdata, PCS_MMD_SELECT << 2, mmd_address >> 8);
-	mmd_data = XPCS_IOREAD(pdata, (mmd_address & 0xff) << 2);
+	XPCS32_IOWRITE(pdata, PCS_V1_WINDOW_SELECT, mmd_address >> 8);
+	mmd_data = XPCS32_IOREAD(pdata, (mmd_address & 0xff) << 2);
 	spin_unlock_irqrestore(&pdata->xpcs_lock, flags);
 
 	return mmd_data;
 }
 
-static void xgbe_write_mmd_regs(struct xgbe_prv_data *pdata, int prtad,
-				int mmd_reg, int mmd_data)
+static void xgbe_write_mmd_regs_v1(struct xgbe_prv_data *pdata, int prtad,
+				   int mmd_reg, int mmd_data)
 {
 	unsigned int mmd_address;
 	unsigned long flags;
@@ -1066,14 +1235,113 @@ static void xgbe_write_mmd_regs(struct xgbe_prv_data *pdata, int prtad,
 	 *
 	 * The mmio interface is based on 32-bit offsets and values. All
 	 * register offsets must therefore be adjusted by left shifting the
-	 * offset 2 bits and reading 32 bits of data.
+	 * offset 2 bits and writing 32 bits of data.
 	 */
 	spin_lock_irqsave(&pdata->xpcs_lock, flags);
-	XPCS_IOWRITE(pdata, PCS_MMD_SELECT << 2, mmd_address >> 8);
-	XPCS_IOWRITE(pdata, (mmd_address & 0xff) << 2, mmd_data);
+	XPCS32_IOWRITE(pdata, PCS_V1_WINDOW_SELECT, mmd_address >> 8);
+	XPCS32_IOWRITE(pdata, (mmd_address & 0xff) << 2, mmd_data);
 	spin_unlock_irqrestore(&pdata->xpcs_lock, flags);
 }
 
+static int xgbe_read_mmd_regs(struct xgbe_prv_data *pdata, int prtad,
+			      int mmd_reg)
+{
+	switch (pdata->vdata->xpcs_access) {
+	case XGBE_XPCS_ACCESS_V1:
+		return xgbe_read_mmd_regs_v1(pdata, prtad, mmd_reg);
+
+	case XGBE_XPCS_ACCESS_V2:
+	default:
+		return xgbe_read_mmd_regs_v2(pdata, prtad, mmd_reg);
+	}
+}
+
+static void xgbe_write_mmd_regs(struct xgbe_prv_data *pdata, int prtad,
+				int mmd_reg, int mmd_data)
+{
+	switch (pdata->vdata->xpcs_access) {
+	case XGBE_XPCS_ACCESS_V1:
+		return xgbe_write_mmd_regs_v1(pdata, prtad, mmd_reg, mmd_data);
+
+	case XGBE_XPCS_ACCESS_V2:
+	default:
+		return xgbe_write_mmd_regs_v2(pdata, prtad, mmd_reg, mmd_data);
+	}
+}
+
+static int xgbe_write_ext_mii_regs(struct xgbe_prv_data *pdata, int addr,
+				   int reg, u16 val)
+{
+	unsigned int mdio_sca, mdio_sccd;
+
+	reinit_completion(&pdata->mdio_complete);
+
+	mdio_sca = 0;
+	XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, REG, reg);
+	XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, DA, addr);
+	XGMAC_IOWRITE(pdata, MAC_MDIOSCAR, mdio_sca);
+
+	mdio_sccd = 0;
+	XGMAC_SET_BITS(mdio_sccd, MAC_MDIOSCCDR, DATA, val);
+	XGMAC_SET_BITS(mdio_sccd, MAC_MDIOSCCDR, CMD, 1);
+	XGMAC_SET_BITS(mdio_sccd, MAC_MDIOSCCDR, BUSY, 1);
+	XGMAC_IOWRITE(pdata, MAC_MDIOSCCDR, mdio_sccd);
+
+	if (!wait_for_completion_timeout(&pdata->mdio_complete, HZ)) {
+		netdev_err(pdata->netdev, "mdio write operation timed out\n");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static int xgbe_read_ext_mii_regs(struct xgbe_prv_data *pdata, int addr,
+				  int reg)
+{
+	unsigned int mdio_sca, mdio_sccd;
+
+	reinit_completion(&pdata->mdio_complete);
+
+	mdio_sca = 0;
+	XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, REG, reg);
+	XGMAC_SET_BITS(mdio_sca, MAC_MDIOSCAR, DA, addr);
+	XGMAC_IOWRITE(pdata, MAC_MDIOSCAR, mdio_sca);
+
+	mdio_sccd = 0;
+	XGMAC_SET_BITS(mdio_sccd, MAC_MDIOSCCDR, CMD, 3);
+	XGMAC_SET_BITS(mdio_sccd, MAC_MDIOSCCDR, BUSY, 1);
+	XGMAC_IOWRITE(pdata, MAC_MDIOSCCDR, mdio_sccd);
+
+	if (!wait_for_completion_timeout(&pdata->mdio_complete, HZ)) {
+		netdev_err(pdata->netdev, "mdio read operation timed out\n");
+		return -ETIMEDOUT;
+	}
+
+	return XGMAC_IOREAD_BITS(pdata, MAC_MDIOSCCDR, DATA);
+}
+
+static int xgbe_set_ext_mii_mode(struct xgbe_prv_data *pdata, unsigned int port,
+				 enum xgbe_mdio_mode mode)
+{
+	unsigned int reg_val = 0;
+
+	switch (mode) {
+	case XGBE_MDIO_MODE_CL22:
+		if (port > XGMAC_MAX_C22_PORT)
+			return -EINVAL;
+		reg_val |= (1 << port);
+		break;
+	case XGBE_MDIO_MODE_CL45:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	XGMAC_IOWRITE(pdata, MAC_MDIOCL22R, reg_val);
+
+	return 0;
+}
+
 static int xgbe_tx_complete(struct xgbe_ring_desc *rdesc)
 {
 	return !XGMAC_GET_BITS_LE(rdesc->desc3, TX_NORMAL_DESC3, OWN);
@@ -1264,14 +1532,21 @@ static u64 xgbe_get_tstamp_time(struct xgbe_prv_data *pdata)
 
 static u64 xgbe_get_tx_tstamp(struct xgbe_prv_data *pdata)
 {
-	unsigned int tx_snr;
+	unsigned int tx_snr, tx_ssr;
 	u64 nsec;
 
-	tx_snr = XGMAC_IOREAD(pdata, MAC_TXSNR);
+	if (pdata->vdata->tx_tstamp_workaround) {
+		tx_snr = XGMAC_IOREAD(pdata, MAC_TXSNR);
+		tx_ssr = XGMAC_IOREAD(pdata, MAC_TXSSR);
+	} else {
+		tx_ssr = XGMAC_IOREAD(pdata, MAC_TXSSR);
+		tx_snr = XGMAC_IOREAD(pdata, MAC_TXSNR);
+	}
+
 	if (XGMAC_GET_BITS(tx_snr, MAC_TXSNR, TXTSSTSMIS))
 		return 0;
 
-	nsec = XGMAC_IOREAD(pdata, MAC_TXSSR);
+	nsec = tx_ssr;
 	nsec *= NSEC_PER_SEC;
 	nsec += tx_snr;
 
@@ -1327,106 +1602,6 @@ static int xgbe_config_tstamp(struct xgbe_prv_data *pdata,
 	return 0;
 }
 
-static void xgbe_config_tc(struct xgbe_prv_data *pdata)
-{
-	unsigned int offset, queue, prio;
-	u8 i;
-
-	netdev_reset_tc(pdata->netdev);
-	if (!pdata->num_tcs)
-		return;
-
-	netdev_set_num_tc(pdata->netdev, pdata->num_tcs);
-
-	for (i = 0, queue = 0, offset = 0; i < pdata->num_tcs; i++) {
-		while ((queue < pdata->tx_q_count) &&
-		       (pdata->q2tc_map[queue] == i))
-			queue++;
-
-		netif_dbg(pdata, drv, pdata->netdev, "TC%u using TXq%u-%u\n",
-			  i, offset, queue - 1);
-		netdev_set_tc_queue(pdata->netdev, i, queue - offset, offset);
-		offset = queue;
-	}
-
-	if (!pdata->ets)
-		return;
-
-	for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++)
-		netdev_set_prio_tc_map(pdata->netdev, prio,
-				       pdata->ets->prio_tc[prio]);
-}
-
-static void xgbe_config_dcb_tc(struct xgbe_prv_data *pdata)
-{
-	struct ieee_ets *ets = pdata->ets;
-	unsigned int total_weight, min_weight, weight;
-	unsigned int mask, reg, reg_val;
-	unsigned int i, prio;
-
-	if (!ets)
-		return;
-
-	/* Set Tx to deficit weighted round robin scheduling algorithm (when
-	 * traffic class is using ETS algorithm)
-	 */
-	XGMAC_IOWRITE_BITS(pdata, MTL_OMR, ETSALG, MTL_ETSALG_DWRR);
-
-	/* Set Traffic Class algorithms */
-	total_weight = pdata->netdev->mtu * pdata->hw_feat.tc_cnt;
-	min_weight = total_weight / 100;
-	if (!min_weight)
-		min_weight = 1;
-
-	for (i = 0; i < pdata->hw_feat.tc_cnt; i++) {
-		/* Map the priorities to the traffic class */
-		mask = 0;
-		for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) {
-			if (ets->prio_tc[prio] == i)
-				mask |= (1 << prio);
-		}
-		mask &= 0xff;
-
-		netif_dbg(pdata, drv, pdata->netdev, "TC%u PRIO mask=%#x\n",
-			  i, mask);
-		reg = MTL_TCPM0R + (MTL_TCPM_INC * (i / MTL_TCPM_TC_PER_REG));
-		reg_val = XGMAC_IOREAD(pdata, reg);
-
-		reg_val &= ~(0xff << ((i % MTL_TCPM_TC_PER_REG) << 3));
-		reg_val |= (mask << ((i % MTL_TCPM_TC_PER_REG) << 3));
-
-		XGMAC_IOWRITE(pdata, reg, reg_val);
-
-		/* Set the traffic class algorithm */
-		switch (ets->tc_tsa[i]) {
-		case IEEE_8021QAZ_TSA_STRICT:
-			netif_dbg(pdata, drv, pdata->netdev,
-				  "TC%u using SP\n", i);
-			XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_TC_ETSCR, TSA,
-					       MTL_TSA_SP);
-			break;
-		case IEEE_8021QAZ_TSA_ETS:
-			weight = total_weight * ets->tc_tx_bw[i] / 100;
-			weight = clamp(weight, min_weight, total_weight);
-
-			netif_dbg(pdata, drv, pdata->netdev,
-				  "TC%u using DWRR (weight %u)\n", i, weight);
-			XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_TC_ETSCR, TSA,
-					       MTL_TSA_ETS);
-			XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_TC_QWR, QW,
-					       weight);
-			break;
-		}
-	}
-
-	xgbe_config_tc(pdata);
-}
-
-static void xgbe_config_dcb_pfc(struct xgbe_prv_data *pdata)
-{
-	xgbe_config_flow_control(pdata);
-}
-
 static void xgbe_tx_start_xmit(struct xgbe_channel *channel,
 			       struct xgbe_ring *ring)
 {
@@ -1901,7 +2076,7 @@ static int xgbe_disable_int(struct xgbe_channel *channel,
 	return 0;
 }
 
-static int xgbe_exit(struct xgbe_prv_data *pdata)
+static int __xgbe_exit(struct xgbe_prv_data *pdata)
 {
 	unsigned int count = 2000;
 
@@ -1923,6 +2098,20 @@ static int xgbe_exit(struct xgbe_prv_data *pdata)
 	return 0;
 }
 
+static int xgbe_exit(struct xgbe_prv_data *pdata)
+{
+	int ret;
+
+	/* To guard against possible incorrectly generated interrupts,
+	 * issue the software reset twice.
+	 */
+	ret = __xgbe_exit(pdata);
+	if (ret)
+		return ret;
+
+	return __xgbe_exit(pdata);
+}
+
 static int xgbe_flush_tx_queues(struct xgbe_prv_data *pdata)
 {
 	unsigned int i, count;
@@ -2000,61 +2189,331 @@ static void xgbe_config_mtl_mode(struct xgbe_prv_data *pdata)
 	XGMAC_IOWRITE_BITS(pdata, MTL_OMR, RAA, MTL_RAA_SP);
 }
 
-static unsigned int xgbe_calculate_per_queue_fifo(unsigned int fifo_size,
-						  unsigned int queue_count)
+static void xgbe_queue_flow_control_threshold(struct xgbe_prv_data *pdata,
+					      unsigned int queue,
+					      unsigned int q_fifo_size)
+{
+	unsigned int frame_fifo_size;
+	unsigned int rfa, rfd;
+
+	frame_fifo_size = XGMAC_FLOW_CONTROL_ALIGN(xgbe_get_max_frame(pdata));
+
+	if (pdata->pfcq[queue] && (q_fifo_size > pdata->pfc_rfa)) {
+		/* PFC is active for this queue */
+		rfa = pdata->pfc_rfa;
+		rfd = rfa + frame_fifo_size;
+		if (rfd > XGMAC_FLOW_CONTROL_MAX)
+			rfd = XGMAC_FLOW_CONTROL_MAX;
+		if (rfa >= XGMAC_FLOW_CONTROL_MAX)
+			rfa = XGMAC_FLOW_CONTROL_MAX - XGMAC_FLOW_CONTROL_UNIT;
+	} else {
+		/* This path deals with just maximum frame sizes which are
+		 * limited to a jumbo frame of 9,000 (plus headers, etc.)
+		 * so we can never exceed the maximum allowable RFA/RFD
+		 * values.
+		 */
+		if (q_fifo_size <= 2048) {
+			/* rx_rfd to zero to signal no flow control */
+			pdata->rx_rfa[queue] = 0;
+			pdata->rx_rfd[queue] = 0;
+			return;
+		}
+
+		if (q_fifo_size <= 4096) {
+			/* Between 2048 and 4096 */
+			pdata->rx_rfa[queue] = 0;	/* Full - 1024 bytes */
+			pdata->rx_rfd[queue] = 1;	/* Full - 1536 bytes */
+			return;
+		}
+
+		if (q_fifo_size <= frame_fifo_size) {
+			/* Between 4096 and max-frame */
+			pdata->rx_rfa[queue] = 2;	/* Full - 2048 bytes */
+			pdata->rx_rfd[queue] = 5;	/* Full - 3584 bytes */
+			return;
+		}
+
+		if (q_fifo_size <= (frame_fifo_size * 3)) {
+			/* Between max-frame and 3 max-frames,
+			 * trigger if we get just over a frame of data and
+			 * resume when we have just under half a frame left.
+			 */
+			rfa = q_fifo_size - frame_fifo_size;
+			rfd = rfa + (frame_fifo_size / 2);
+		} else {
+			/* Above 3 max-frames - trigger when just over
+			 * 2 frames of space available
+			 */
+			rfa = frame_fifo_size * 2;
+			rfa += XGMAC_FLOW_CONTROL_UNIT;
+			rfd = rfa + frame_fifo_size;
+		}
+	}
+
+	pdata->rx_rfa[queue] = XGMAC_FLOW_CONTROL_VALUE(rfa);
+	pdata->rx_rfd[queue] = XGMAC_FLOW_CONTROL_VALUE(rfd);
+}
+
+static void xgbe_calculate_flow_control_threshold(struct xgbe_prv_data *pdata,
+						  unsigned int *fifo)
 {
 	unsigned int q_fifo_size;
-	unsigned int p_fifo;
+	unsigned int i;
 
-	/* Calculate the configured fifo size */
-	q_fifo_size = 1 << (fifo_size + 7);
+	for (i = 0; i < pdata->rx_q_count; i++) {
+		q_fifo_size = (fifo[i] + 1) * XGMAC_FIFO_UNIT;
 
+		xgbe_queue_flow_control_threshold(pdata, i, q_fifo_size);
+	}
+}
+
+static void xgbe_config_flow_control_threshold(struct xgbe_prv_data *pdata)
+{
+	unsigned int i;
+
+	for (i = 0; i < pdata->rx_q_count; i++) {
+		XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQFCR, RFA,
+				       pdata->rx_rfa[i]);
+		XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQFCR, RFD,
+				       pdata->rx_rfd[i]);
+	}
+}
+
+static unsigned int xgbe_get_tx_fifo_size(struct xgbe_prv_data *pdata)
+{
 	/* The configured value may not be the actual amount of fifo RAM */
-	q_fifo_size = min_t(unsigned int, XGBE_FIFO_MAX, q_fifo_size);
+	return min_t(unsigned int, pdata->tx_max_fifo_size,
+		     pdata->hw_feat.tx_fifo_size);
+}
 
-	q_fifo_size = q_fifo_size / queue_count;
+static unsigned int xgbe_get_rx_fifo_size(struct xgbe_prv_data *pdata)
+{
+	/* The configured value may not be the actual amount of fifo RAM */
+	return min_t(unsigned int, pdata->rx_max_fifo_size,
+		     pdata->hw_feat.rx_fifo_size);
+}
 
-	/* Each increment in the queue fifo size represents 256 bytes of
-	 * fifo, with 0 representing 256 bytes. Distribute the fifo equally
-	 * between the queues.
+static void xgbe_calculate_equal_fifo(unsigned int fifo_size,
+				      unsigned int queue_count,
+				      unsigned int *fifo)
+{
+	unsigned int q_fifo_size;
+	unsigned int p_fifo;
+	unsigned int i;
+
+	q_fifo_size = fifo_size / queue_count;
+
+	/* Calculate the fifo setting by dividing the queue's fifo size
+	 * by the fifo allocation increment (with 0 representing the
+	 * base allocation increment so decrement the result by 1).
 	 */
-	p_fifo = q_fifo_size / 256;
+	p_fifo = q_fifo_size / XGMAC_FIFO_UNIT;
 	if (p_fifo)
 		p_fifo--;
 
-	return p_fifo;
+	/* Distribute the fifo equally amongst the queues */
+	for (i = 0; i < queue_count; i++)
+		fifo[i] = p_fifo;
+}
+
+static unsigned int xgbe_set_nonprio_fifos(unsigned int fifo_size,
+					   unsigned int queue_count,
+					   unsigned int *fifo)
+{
+	unsigned int i;
+
+	BUILD_BUG_ON_NOT_POWER_OF_2(XGMAC_FIFO_MIN_ALLOC);
+
+	if (queue_count <= IEEE_8021QAZ_MAX_TCS)
+		return fifo_size;
+
+	/* Rx queues 9 and up are for specialized packets,
+	 * such as PTP or DCB control packets, etc. and
+	 * don't require a large fifo
+	 */
+	for (i = IEEE_8021QAZ_MAX_TCS; i < queue_count; i++) {
+		fifo[i] = (XGMAC_FIFO_MIN_ALLOC / XGMAC_FIFO_UNIT) - 1;
+		fifo_size -= XGMAC_FIFO_MIN_ALLOC;
+	}
+
+	return fifo_size;
+}
+
+static unsigned int xgbe_get_pfc_delay(struct xgbe_prv_data *pdata)
+{
+	unsigned int delay;
+
+	/* If a delay has been provided, use that */
+	if (pdata->pfc->delay)
+		return pdata->pfc->delay / 8;
+
+	/* Allow for two maximum size frames */
+	delay = xgbe_get_max_frame(pdata);
+	delay += XGMAC_ETH_PREAMBLE;
+	delay *= 2;
+
+	/* Allow for PFC frame */
+	delay += XGMAC_PFC_DATA_LEN;
+	delay += ETH_HLEN + ETH_FCS_LEN;
+	delay += XGMAC_ETH_PREAMBLE;
+
+	/* Allow for miscellaneous delays (LPI exit, cable, etc.) */
+	delay += XGMAC_PFC_DELAYS;
+
+	return delay;
+}
+
+static unsigned int xgbe_get_pfc_queues(struct xgbe_prv_data *pdata)
+{
+	unsigned int count, prio_queues;
+	unsigned int i;
+
+	if (!pdata->pfc->pfc_en)
+		return 0;
+
+	count = 0;
+	prio_queues = XGMAC_PRIO_QUEUES(pdata->rx_q_count);
+	for (i = 0; i < prio_queues; i++) {
+		if (!xgbe_is_pfc_queue(pdata, i))
+			continue;
+
+		pdata->pfcq[i] = 1;
+		count++;
+	}
+
+	return count;
+}
+
+static void xgbe_calculate_dcb_fifo(struct xgbe_prv_data *pdata,
+				    unsigned int fifo_size,
+				    unsigned int *fifo)
+{
+	unsigned int q_fifo_size, rem_fifo, addn_fifo;
+	unsigned int prio_queues;
+	unsigned int pfc_count;
+	unsigned int i;
+
+	q_fifo_size = XGMAC_FIFO_ALIGN(xgbe_get_max_frame(pdata));
+	prio_queues = XGMAC_PRIO_QUEUES(pdata->rx_q_count);
+	pfc_count = xgbe_get_pfc_queues(pdata);
+
+	if (!pfc_count || ((q_fifo_size * prio_queues) > fifo_size)) {
+		/* No traffic classes with PFC enabled or can't do lossless */
+		xgbe_calculate_equal_fifo(fifo_size, prio_queues, fifo);
+		return;
+	}
+
+	/* Calculate how much fifo we have to play with */
+	rem_fifo = fifo_size - (q_fifo_size * prio_queues);
+
+	/* Calculate how much more than base fifo PFC needs, which also
+	 * becomes the threshold activation point (RFA)
+	 */
+	pdata->pfc_rfa = xgbe_get_pfc_delay(pdata);
+	pdata->pfc_rfa = XGMAC_FLOW_CONTROL_ALIGN(pdata->pfc_rfa);
+
+	if (pdata->pfc_rfa > q_fifo_size) {
+		addn_fifo = pdata->pfc_rfa - q_fifo_size;
+		addn_fifo = XGMAC_FIFO_ALIGN(addn_fifo);
+	} else {
+		addn_fifo = 0;
+	}
+
+	/* Calculate DCB fifo settings:
+	 *   - distribute remaining fifo between the VLAN priority
+	 *     queues based on traffic class PFC enablement and overall
+	 *     priority (0 is lowest priority, so start at highest)
+	 */
+	i = prio_queues;
+	while (i > 0) {
+		i--;
+
+		fifo[i] = (q_fifo_size / XGMAC_FIFO_UNIT) - 1;
+
+		if (!pdata->pfcq[i] || !addn_fifo)
+			continue;
+
+		if (addn_fifo > rem_fifo) {
+			netdev_warn(pdata->netdev,
+				    "RXq%u cannot set needed fifo size\n", i);
+			if (!rem_fifo)
+				continue;
+
+			addn_fifo = rem_fifo;
+		}
+
+		fifo[i] += (addn_fifo / XGMAC_FIFO_UNIT);
+		rem_fifo -= addn_fifo;
+	}
+
+	if (rem_fifo) {
+		unsigned int inc_fifo = rem_fifo / prio_queues;
+
+		/* Distribute remaining fifo across queues */
+		for (i = 0; i < prio_queues; i++)
+			fifo[i] += (inc_fifo / XGMAC_FIFO_UNIT);
+	}
 }
 
 static void xgbe_config_tx_fifo_size(struct xgbe_prv_data *pdata)
 {
 	unsigned int fifo_size;
+	unsigned int fifo[XGBE_MAX_QUEUES];
 	unsigned int i;
 
-	fifo_size = xgbe_calculate_per_queue_fifo(pdata->hw_feat.tx_fifo_size,
-						  pdata->tx_q_count);
+	fifo_size = xgbe_get_tx_fifo_size(pdata);
+
+	xgbe_calculate_equal_fifo(fifo_size, pdata->tx_q_count, fifo);
 
 	for (i = 0; i < pdata->tx_q_count; i++)
-		XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_TQOMR, TQS, fifo_size);
+		XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_TQOMR, TQS, fifo[i]);
 
 	netif_info(pdata, drv, pdata->netdev,
 		   "%d Tx hardware queues, %d byte fifo per queue\n",
-		   pdata->tx_q_count, ((fifo_size + 1) * 256));
+		   pdata->tx_q_count, ((fifo[0] + 1) * XGMAC_FIFO_UNIT));
 }
 
 static void xgbe_config_rx_fifo_size(struct xgbe_prv_data *pdata)
 {
 	unsigned int fifo_size;
+	unsigned int fifo[XGBE_MAX_QUEUES];
+	unsigned int prio_queues;
 	unsigned int i;
 
-	fifo_size = xgbe_calculate_per_queue_fifo(pdata->hw_feat.rx_fifo_size,
-						  pdata->rx_q_count);
+	/* Clear any DCB related fifo/queue information */
+	memset(pdata->pfcq, 0, sizeof(pdata->pfcq));
+	pdata->pfc_rfa = 0;
+
+	fifo_size = xgbe_get_rx_fifo_size(pdata);
+	prio_queues = XGMAC_PRIO_QUEUES(pdata->rx_q_count);
+
+	/* Assign a minimum fifo to the non-VLAN priority queues */
+	fifo_size = xgbe_set_nonprio_fifos(fifo_size, pdata->rx_q_count, fifo);
+
+	if (pdata->pfc && pdata->ets)
+		xgbe_calculate_dcb_fifo(pdata, fifo_size, fifo);
+	else
+		xgbe_calculate_equal_fifo(fifo_size, prio_queues, fifo);
 
 	for (i = 0; i < pdata->rx_q_count; i++)
-		XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, RQS, fifo_size);
+		XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQOMR, RQS, fifo[i]);
 
-	netif_info(pdata, drv, pdata->netdev,
-		   "%d Rx hardware queues, %d byte fifo per queue\n",
-		   pdata->rx_q_count, ((fifo_size + 1) * 256));
+	xgbe_calculate_flow_control_threshold(pdata, fifo);
+	xgbe_config_flow_control_threshold(pdata);
+
+	if (pdata->pfc && pdata->ets && pdata->pfc->pfc_en) {
+		netif_info(pdata, drv, pdata->netdev,
+			   "%u Rx hardware queues\n", pdata->rx_q_count);
+		for (i = 0; i < pdata->rx_q_count; i++)
+			netif_info(pdata, drv, pdata->netdev,
+				   "RxQ%u, %u byte fifo queue\n", i,
+				   ((fifo[i] + 1) * XGMAC_FIFO_UNIT));
+	} else {
+		netif_info(pdata, drv, pdata->netdev,
+			   "%u Rx hardware queues, %u byte fifo per queue\n",
+			   pdata->rx_q_count,
+			   ((fifo[0] + 1) * XGMAC_FIFO_UNIT));
+	}
 }
 
 static void xgbe_config_queue_mapping(struct xgbe_prv_data *pdata)
@@ -2090,8 +2549,7 @@ static void xgbe_config_queue_mapping(struct xgbe_prv_data *pdata)
 	}
 
 	/* Map the 8 VLAN priority values to available MTL Rx queues */
-	prio_queues = min_t(unsigned int, IEEE_8021QAZ_MAX_TCS,
-			    pdata->rx_q_count);
+	prio_queues = XGMAC_PRIO_QUEUES(pdata->rx_q_count);
 	ppq = IEEE_8021QAZ_MAX_TCS / prio_queues;
 	ppq_extra = IEEE_8021QAZ_MAX_TCS % prio_queues;
 
@@ -2139,16 +2597,120 @@ static void xgbe_config_queue_mapping(struct xgbe_prv_data *pdata)
 	}
 }
 
-static void xgbe_config_flow_control_threshold(struct xgbe_prv_data *pdata)
+static void xgbe_config_tc(struct xgbe_prv_data *pdata)
 {
-	unsigned int i;
+	unsigned int offset, queue, prio;
+	u8 i;
 
-	for (i = 0; i < pdata->rx_q_count; i++) {
-		/* Activate flow control when less than 4k left in fifo */
-		XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQFCR, RFA, 2);
+	netdev_reset_tc(pdata->netdev);
+	if (!pdata->num_tcs)
+		return;
+
+	netdev_set_num_tc(pdata->netdev, pdata->num_tcs);
+
+	for (i = 0, queue = 0, offset = 0; i < pdata->num_tcs; i++) {
+		while ((queue < pdata->tx_q_count) &&
+		       (pdata->q2tc_map[queue] == i))
+			queue++;
 
-		/* De-activate flow control when more than 6k left in fifo */
-		XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_Q_RQFCR, RFD, 4);
+		netif_dbg(pdata, drv, pdata->netdev, "TC%u using TXq%u-%u\n",
+			  i, offset, queue - 1);
+		netdev_set_tc_queue(pdata->netdev, i, queue - offset, offset);
+		offset = queue;
+	}
+
+	if (!pdata->ets)
+		return;
+
+	for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++)
+		netdev_set_prio_tc_map(pdata->netdev, prio,
+				       pdata->ets->prio_tc[prio]);
+}
+
+static void xgbe_config_dcb_tc(struct xgbe_prv_data *pdata)
+{
+	struct ieee_ets *ets = pdata->ets;
+	unsigned int total_weight, min_weight, weight;
+	unsigned int mask, reg, reg_val;
+	unsigned int i, prio;
+
+	if (!ets)
+		return;
+
+	/* Set Tx to deficit weighted round robin scheduling algorithm (when
+	 * traffic class is using ETS algorithm)
+	 */
+	XGMAC_IOWRITE_BITS(pdata, MTL_OMR, ETSALG, MTL_ETSALG_DWRR);
+
+	/* Set Traffic Class algorithms */
+	total_weight = pdata->netdev->mtu * pdata->hw_feat.tc_cnt;
+	min_weight = total_weight / 100;
+	if (!min_weight)
+		min_weight = 1;
+
+	for (i = 0; i < pdata->hw_feat.tc_cnt; i++) {
+		/* Map the priorities to the traffic class */
+		mask = 0;
+		for (prio = 0; prio < IEEE_8021QAZ_MAX_TCS; prio++) {
+			if (ets->prio_tc[prio] == i)
+				mask |= (1 << prio);
+		}
+		mask &= 0xff;
+
+		netif_dbg(pdata, drv, pdata->netdev, "TC%u PRIO mask=%#x\n",
+			  i, mask);
+		reg = MTL_TCPM0R + (MTL_TCPM_INC * (i / MTL_TCPM_TC_PER_REG));
+		reg_val = XGMAC_IOREAD(pdata, reg);
+
+		reg_val &= ~(0xff << ((i % MTL_TCPM_TC_PER_REG) << 3));
+		reg_val |= (mask << ((i % MTL_TCPM_TC_PER_REG) << 3));
+
+		XGMAC_IOWRITE(pdata, reg, reg_val);
+
+		/* Set the traffic class algorithm */
+		switch (ets->tc_tsa[i]) {
+		case IEEE_8021QAZ_TSA_STRICT:
+			netif_dbg(pdata, drv, pdata->netdev,
+				  "TC%u using SP\n", i);
+			XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_TC_ETSCR, TSA,
+					       MTL_TSA_SP);
+			break;
+		case IEEE_8021QAZ_TSA_ETS:
+			weight = total_weight * ets->tc_tx_bw[i] / 100;
+			weight = clamp(weight, min_weight, total_weight);
+
+			netif_dbg(pdata, drv, pdata->netdev,
+				  "TC%u using DWRR (weight %u)\n", i, weight);
+			XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_TC_ETSCR, TSA,
+					       MTL_TSA_ETS);
+			XGMAC_MTL_IOWRITE_BITS(pdata, i, MTL_TC_QWR, QW,
+					       weight);
+			break;
+		}
+	}
+
+	xgbe_config_tc(pdata);
+}
+
+static void xgbe_config_dcb_pfc(struct xgbe_prv_data *pdata)
+{
+	if (!test_bit(XGBE_DOWN, &pdata->dev_state)) {
+		/* Just stop the Tx queues while Rx fifo is changed */
+		netif_tx_stop_all_queues(pdata->netdev);
+
+		/* Suspend Rx so that fifo's can be adjusted */
+		pdata->hw_if.disable_rx(pdata);
+	}
+
+	xgbe_config_rx_fifo_size(pdata);
+	xgbe_config_flow_control(pdata);
+
+	if (!test_bit(XGBE_DOWN, &pdata->dev_state)) {
+		/* Resume Rx */
+		pdata->hw_if.enable_rx(pdata);
+
+		/* Resume Tx queues */
+		netif_tx_start_all_queues(pdata->netdev);
 	}
 }
 
@@ -2175,19 +2737,7 @@ static void xgbe_config_jumbo_enable(struct xgbe_prv_data *pdata)
 
 static void xgbe_config_mac_speed(struct xgbe_prv_data *pdata)
 {
-	switch (pdata->phy_speed) {
-	case SPEED_10000:
-		xgbe_set_xgmii_speed(pdata);
-		break;
-
-	case SPEED_2500:
-		xgbe_set_gmii_2500_speed(pdata);
-		break;
-
-	case SPEED_1000:
-		xgbe_set_gmii_speed(pdata);
-		break;
-	}
+	xgbe_set_speed(pdata, pdata->phy_speed);
 }
 
 static void xgbe_config_checksum_offload(struct xgbe_prv_data *pdata)
@@ -2223,17 +2773,33 @@ static u64 xgbe_mmc_read(struct xgbe_prv_data *pdata, unsigned int reg_lo)
 	bool read_hi;
 	u64 val;
 
-	switch (reg_lo) {
-	/* These registers are always 64 bit */
-	case MMC_TXOCTETCOUNT_GB_LO:
-	case MMC_TXOCTETCOUNT_G_LO:
-	case MMC_RXOCTETCOUNT_GB_LO:
-	case MMC_RXOCTETCOUNT_G_LO:
-		read_hi = true;
-		break;
+	if (pdata->vdata->mmc_64bit) {
+		switch (reg_lo) {
+		/* These registers are always 32 bit */
+		case MMC_RXRUNTERROR:
+		case MMC_RXJABBERERROR:
+		case MMC_RXUNDERSIZE_G:
+		case MMC_RXOVERSIZE_G:
+		case MMC_RXWATCHDOGERROR:
+			read_hi = false;
+			break;
 
-	default:
-		read_hi = false;
+		default:
+			read_hi = true;
+		}
+	} else {
+		switch (reg_lo) {
+		/* These registers are always 64 bit */
+		case MMC_TXOCTETCOUNT_GB_LO:
+		case MMC_TXOCTETCOUNT_G_LO:
+		case MMC_RXOCTETCOUNT_GB_LO:
+		case MMC_RXOCTETCOUNT_G_LO:
+			read_hi = true;
+			break;
+
+		default:
+			read_hi = false;
+		}
 	}
 
 	val = XGMAC_IOREAD(pdata, reg_lo);
@@ -2563,20 +3129,48 @@ static void xgbe_config_mmc(struct xgbe_prv_data *pdata)
 	XGMAC_IOWRITE_BITS(pdata, MMC_CR, CR, 1);
 }
 
+static void xgbe_txq_prepare_tx_stop(struct xgbe_prv_data *pdata,
+				     unsigned int queue)
+{
+	unsigned int tx_status;
+	unsigned long tx_timeout;
+
+	/* The Tx engine cannot be stopped if it is actively processing
+	 * packets. Wait for the Tx queue to empty the Tx fifo.  Don't
+	 * wait forever though...
+	 */
+	tx_timeout = jiffies + (XGBE_DMA_STOP_TIMEOUT * HZ);
+	while (time_before(jiffies, tx_timeout)) {
+		tx_status = XGMAC_MTL_IOREAD(pdata, queue, MTL_Q_TQDR);
+		if ((XGMAC_GET_BITS(tx_status, MTL_Q_TQDR, TRCSTS) != 1) &&
+		    (XGMAC_GET_BITS(tx_status, MTL_Q_TQDR, TXQSTS) == 0))
+			break;
+
+		usleep_range(500, 1000);
+	}
+
+	if (!time_before(jiffies, tx_timeout))
+		netdev_info(pdata->netdev,
+			    "timed out waiting for Tx queue %u to empty\n",
+			    queue);
+}
+
 static void xgbe_prepare_tx_stop(struct xgbe_prv_data *pdata,
-				 struct xgbe_channel *channel)
+				 unsigned int queue)
 {
 	unsigned int tx_dsr, tx_pos, tx_qidx;
 	unsigned int tx_status;
 	unsigned long tx_timeout;
 
+	if (XGMAC_GET_BITS(pdata->hw_feat.version, MAC_VR, SNPSVER) > 0x20)
+		return xgbe_txq_prepare_tx_stop(pdata, queue);
+
 	/* Calculate the status register to read and the position within */
-	if (channel->queue_index < DMA_DSRX_FIRST_QUEUE) {
+	if (queue < DMA_DSRX_FIRST_QUEUE) {
 		tx_dsr = DMA_DSR0;
-		tx_pos = (channel->queue_index * DMA_DSR_Q_WIDTH) +
-			 DMA_DSR0_TPS_START;
+		tx_pos = (queue * DMA_DSR_Q_WIDTH) + DMA_DSR0_TPS_START;
 	} else {
-		tx_qidx = channel->queue_index - DMA_DSRX_FIRST_QUEUE;
+		tx_qidx = queue - DMA_DSRX_FIRST_QUEUE;
 
 		tx_dsr = DMA_DSR1 + ((tx_qidx / DMA_DSRX_QPR) * DMA_DSRX_INC);
 		tx_pos = ((tx_qidx % DMA_DSRX_QPR) * DMA_DSR_Q_WIDTH) +
@@ -2601,7 +3195,7 @@ static void xgbe_prepare_tx_stop(struct xgbe_prv_data *pdata,
 	if (!time_before(jiffies, tx_timeout))
 		netdev_info(pdata->netdev,
 			    "timed out waiting for Tx DMA channel %u to stop\n",
-			    channel->queue_index);
+			    queue);
 }
 
 static void xgbe_enable_tx(struct xgbe_prv_data *pdata)
@@ -2633,13 +3227,8 @@ static void xgbe_disable_tx(struct xgbe_prv_data *pdata)
 	unsigned int i;
 
 	/* Prepare for Tx DMA channel stop */
-	channel = pdata->channel;
-	for (i = 0; i < pdata->channel_count; i++, channel++) {
-		if (!channel->tx_ring)
-			break;
-
-		xgbe_prepare_tx_stop(pdata, channel);
-	}
+	for (i = 0; i < pdata->tx_q_count; i++)
+		xgbe_prepare_tx_stop(pdata, i);
 
 	/* Disable MAC Tx */
 	XGMAC_IOWRITE_BITS(pdata, MAC_TCR, TE, 0);
@@ -2763,13 +3352,8 @@ static void xgbe_powerdown_tx(struct xgbe_prv_data *pdata)
 	unsigned int i;
 
 	/* Prepare for Tx DMA channel stop */
-	channel = pdata->channel;
-	for (i = 0; i < pdata->channel_count; i++, channel++) {
-		if (!channel->tx_ring)
-			break;
-
-		xgbe_prepare_tx_stop(pdata, channel);
-	}
+	for (i = 0; i < pdata->tx_q_count; i++)
+		xgbe_prepare_tx_stop(pdata, i);
 
 	/* Disable MAC Tx */
 	XGMAC_IOWRITE_BITS(pdata, MAC_TCR, TE, 0);
@@ -2856,12 +3440,10 @@ static int xgbe_init(struct xgbe_prv_data *pdata)
 	xgbe_config_rx_threshold(pdata, pdata->rx_threshold);
 	xgbe_config_tx_fifo_size(pdata);
 	xgbe_config_rx_fifo_size(pdata);
-	xgbe_config_flow_control_threshold(pdata);
 	/*TODO: Error Packet and undersized good Packet forwarding enable
 		(FEP and FUP)
 	 */
 	xgbe_config_dcb_tc(pdata);
-	xgbe_config_dcb_pfc(pdata);
 	xgbe_enable_mtl_interrupts(pdata);
 
 	/*
@@ -2877,6 +3459,11 @@ static int xgbe_init(struct xgbe_prv_data *pdata)
 	xgbe_config_mmc(pdata);
 	xgbe_enable_mac_interrupts(pdata);
 
+	/*
+	 * Initialize ECC related features
+	 */
+	xgbe_enable_ecc_interrupts(pdata);
+
 	DBGPR("<--xgbe_init\n");
 
 	return 0;
@@ -2903,9 +3490,14 @@ void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *hw_if)
 	hw_if->read_mmd_regs = xgbe_read_mmd_regs;
 	hw_if->write_mmd_regs = xgbe_write_mmd_regs;
 
-	hw_if->set_gmii_speed = xgbe_set_gmii_speed;
-	hw_if->set_gmii_2500_speed = xgbe_set_gmii_2500_speed;
-	hw_if->set_xgmii_speed = xgbe_set_xgmii_speed;
+	hw_if->set_speed = xgbe_set_speed;
+
+	hw_if->set_ext_mii_mode = xgbe_set_ext_mii_mode;
+	hw_if->read_ext_mii_regs = xgbe_read_ext_mii_regs;
+	hw_if->write_ext_mii_regs = xgbe_write_ext_mii_regs;
+
+	hw_if->set_gpio = xgbe_set_gpio;
+	hw_if->clr_gpio = xgbe_clr_gpio;
 
 	hw_if->enable_tx = xgbe_enable_tx;
 	hw_if->disable_tx = xgbe_disable_tx;
@@ -2984,5 +3576,9 @@ void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *hw_if)
 	hw_if->set_rss_hash_key = xgbe_set_rss_hash_key;
 	hw_if->set_rss_lookup_table = xgbe_set_rss_lookup_table;
 
+	/* For ECC */
+	hw_if->disable_ecc_ded = xgbe_disable_ecc_ded;
+	hw_if->disable_ecc_sec = xgbe_disable_ecc_sec;
+
 	DBGPR("<--xgbe_init_function_ptrs\n");
 }
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 7f9216db026f..155190db682d 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -114,7 +114,7 @@
  *     THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <linux/platform_device.h>
+#include <linux/module.h>
 #include <linux/spinlock.h>
 #include <linux/tcp.h>
 #include <linux/if_vlan.h>
@@ -127,8 +127,35 @@
 #include "xgbe.h"
 #include "xgbe-common.h"
 
+static unsigned int ecc_sec_info_threshold = 10;
+static unsigned int ecc_sec_warn_threshold = 10000;
+static unsigned int ecc_sec_period = 600;
+static unsigned int ecc_ded_threshold = 2;
+static unsigned int ecc_ded_period = 600;
+
+#ifdef CONFIG_AMD_XGBE_HAVE_ECC
+/* Only expose the ECC parameters if supported */
+module_param(ecc_sec_info_threshold, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(ecc_sec_info_threshold,
+		 " ECC corrected error informational threshold setting");
+
+module_param(ecc_sec_warn_threshold, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(ecc_sec_warn_threshold,
+		 " ECC corrected error warning threshold setting");
+
+module_param(ecc_sec_period, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(ecc_sec_period, " ECC corrected error period (in seconds)");
+
+module_param(ecc_ded_threshold, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(ecc_ded_threshold, " ECC detected error threshold setting");
+
+module_param(ecc_ded_period, uint, S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(ecc_ded_period, " ECC detected error period (in seconds)");
+#endif
+
 static int xgbe_one_poll(struct napi_struct *, int);
 static int xgbe_all_poll(struct napi_struct *, int);
+static void xgbe_stop(struct xgbe_prv_data *);
 
 static int xgbe_alloc_channels(struct xgbe_prv_data *pdata)
 {
@@ -160,18 +187,8 @@ static int xgbe_alloc_channels(struct xgbe_prv_data *pdata)
 		channel->dma_regs = pdata->xgmac_regs + DMA_CH_BASE +
 				    (DMA_CH_INC * i);
 
-		if (pdata->per_channel_irq) {
-			/* Get the DMA interrupt (offset 1) */
-			ret = platform_get_irq(pdata->pdev, i + 1);
-			if (ret < 0) {
-				netdev_err(pdata->netdev,
-					   "platform_get_irq %u failed\n",
-					   i + 1);
-				goto err_irq;
-			}
-
-			channel->dma_irq = ret;
-		}
+		if (pdata->per_channel_irq)
+			channel->dma_irq = pdata->channel_irq[i];
 
 		if (i < pdata->tx_ring_count) {
 			spin_lock_init(&tx_ring->lock);
@@ -194,9 +211,6 @@ static int xgbe_alloc_channels(struct xgbe_prv_data *pdata)
 
 	return 0;
 
-err_irq:
-	kfree(rx_ring);
-
 err_rx_ring:
 	kfree(tx_ring);
 
@@ -257,11 +271,6 @@ static int xgbe_calc_rx_buf_size(struct net_device *netdev, unsigned int mtu)
 {
 	unsigned int rx_buf_size;
 
-	if (mtu > XGMAC_JUMBO_PACKET_MTU) {
-		netdev_alert(netdev, "MTU exceeds maximum supported value\n");
-		return -EINVAL;
-	}
-
 	rx_buf_size = mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
 	rx_buf_size = clamp_val(rx_buf_size, XGBE_RX_MIN_BUF_SIZE, PAGE_SIZE);
 
@@ -271,48 +280,161 @@ static int xgbe_calc_rx_buf_size(struct net_device *netdev, unsigned int mtu)
 	return rx_buf_size;
 }
 
-static void xgbe_enable_rx_tx_ints(struct xgbe_prv_data *pdata)
+static void xgbe_enable_rx_tx_int(struct xgbe_prv_data *pdata,
+				  struct xgbe_channel *channel)
 {
 	struct xgbe_hw_if *hw_if = &pdata->hw_if;
-	struct xgbe_channel *channel;
 	enum xgbe_int int_id;
+
+	if (channel->tx_ring && channel->rx_ring)
+		int_id = XGMAC_INT_DMA_CH_SR_TI_RI;
+	else if (channel->tx_ring)
+		int_id = XGMAC_INT_DMA_CH_SR_TI;
+	else if (channel->rx_ring)
+		int_id = XGMAC_INT_DMA_CH_SR_RI;
+	else
+		return;
+
+	hw_if->enable_int(channel, int_id);
+}
+
+static void xgbe_enable_rx_tx_ints(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_channel *channel;
 	unsigned int i;
 
 	channel = pdata->channel;
-	for (i = 0; i < pdata->channel_count; i++, channel++) {
-		if (channel->tx_ring && channel->rx_ring)
-			int_id = XGMAC_INT_DMA_CH_SR_TI_RI;
-		else if (channel->tx_ring)
-			int_id = XGMAC_INT_DMA_CH_SR_TI;
-		else if (channel->rx_ring)
-			int_id = XGMAC_INT_DMA_CH_SR_RI;
-		else
-			continue;
+	for (i = 0; i < pdata->channel_count; i++, channel++)
+		xgbe_enable_rx_tx_int(pdata, channel);
+}
 
-		hw_if->enable_int(channel, int_id);
-	}
+static void xgbe_disable_rx_tx_int(struct xgbe_prv_data *pdata,
+				   struct xgbe_channel *channel)
+{
+	struct xgbe_hw_if *hw_if = &pdata->hw_if;
+	enum xgbe_int int_id;
+
+	if (channel->tx_ring && channel->rx_ring)
+		int_id = XGMAC_INT_DMA_CH_SR_TI_RI;
+	else if (channel->tx_ring)
+		int_id = XGMAC_INT_DMA_CH_SR_TI;
+	else if (channel->rx_ring)
+		int_id = XGMAC_INT_DMA_CH_SR_RI;
+	else
+		return;
+
+	hw_if->disable_int(channel, int_id);
 }
 
 static void xgbe_disable_rx_tx_ints(struct xgbe_prv_data *pdata)
 {
-	struct xgbe_hw_if *hw_if = &pdata->hw_if;
 	struct xgbe_channel *channel;
-	enum xgbe_int int_id;
 	unsigned int i;
 
 	channel = pdata->channel;
-	for (i = 0; i < pdata->channel_count; i++, channel++) {
-		if (channel->tx_ring && channel->rx_ring)
-			int_id = XGMAC_INT_DMA_CH_SR_TI_RI;
-		else if (channel->tx_ring)
-			int_id = XGMAC_INT_DMA_CH_SR_TI;
-		else if (channel->rx_ring)
-			int_id = XGMAC_INT_DMA_CH_SR_RI;
-		else
-			continue;
+	for (i = 0; i < pdata->channel_count; i++, channel++)
+		xgbe_disable_rx_tx_int(pdata, channel);
+}
+
+static bool xgbe_ecc_sec(struct xgbe_prv_data *pdata, unsigned long *period,
+			 unsigned int *count, const char *area)
+{
+	if (time_before(jiffies, *period)) {
+		(*count)++;
+	} else {
+		*period = jiffies + (ecc_sec_period * HZ);
+		*count = 1;
+	}
 
-		hw_if->disable_int(channel, int_id);
+	if (*count > ecc_sec_info_threshold)
+		dev_warn_once(pdata->dev,
+			      "%s ECC corrected errors exceed informational threshold\n",
+			      area);
+
+	if (*count > ecc_sec_warn_threshold) {
+		dev_warn_once(pdata->dev,
+			      "%s ECC corrected errors exceed warning threshold\n",
+			      area);
+		return true;
 	}
+
+	return false;
+}
+
+static bool xgbe_ecc_ded(struct xgbe_prv_data *pdata, unsigned long *period,
+			 unsigned int *count, const char *area)
+{
+	if (time_before(jiffies, *period)) {
+		(*count)++;
+	} else {
+		*period = jiffies + (ecc_ded_period * HZ);
+		*count = 1;
+	}
+
+	if (*count > ecc_ded_threshold) {
+		netdev_alert(pdata->netdev,
+			     "%s ECC detected errors exceed threshold\n",
+			     area);
+		return true;
+	}
+
+	return false;
+}
+
+static irqreturn_t xgbe_ecc_isr(int irq, void *data)
+{
+	struct xgbe_prv_data *pdata = data;
+	unsigned int ecc_isr;
+	bool stop = false;
+
+	/* Mask status with only the interrupts we care about */
+	ecc_isr = XP_IOREAD(pdata, XP_ECC_ISR);
+	ecc_isr &= XP_IOREAD(pdata, XP_ECC_IER);
+	netif_dbg(pdata, intr, pdata->netdev, "ECC_ISR=%#010x\n", ecc_isr);
+
+	if (XP_GET_BITS(ecc_isr, XP_ECC_ISR, TX_DED)) {
+		stop |= xgbe_ecc_ded(pdata, &pdata->tx_ded_period,
+				     &pdata->tx_ded_count, "TX fifo");
+	}
+
+	if (XP_GET_BITS(ecc_isr, XP_ECC_ISR, RX_DED)) {
+		stop |= xgbe_ecc_ded(pdata, &pdata->rx_ded_period,
+				     &pdata->rx_ded_count, "RX fifo");
+	}
+
+	if (XP_GET_BITS(ecc_isr, XP_ECC_ISR, DESC_DED)) {
+		stop |= xgbe_ecc_ded(pdata, &pdata->desc_ded_period,
+				     &pdata->desc_ded_count,
+				     "descriptor cache");
+	}
+
+	if (stop) {
+		pdata->hw_if.disable_ecc_ded(pdata);
+		schedule_work(&pdata->stopdev_work);
+		goto out;
+	}
+
+	if (XP_GET_BITS(ecc_isr, XP_ECC_ISR, TX_SEC)) {
+		if (xgbe_ecc_sec(pdata, &pdata->tx_sec_period,
+				 &pdata->tx_sec_count, "TX fifo"))
+			pdata->hw_if.disable_ecc_sec(pdata, XGBE_ECC_SEC_TX);
+	}
+
+	if (XP_GET_BITS(ecc_isr, XP_ECC_ISR, RX_SEC))
+		if (xgbe_ecc_sec(pdata, &pdata->rx_sec_period,
+				 &pdata->rx_sec_count, "RX fifo"))
+			pdata->hw_if.disable_ecc_sec(pdata, XGBE_ECC_SEC_RX);
+
+	if (XP_GET_BITS(ecc_isr, XP_ECC_ISR, DESC_SEC))
+		if (xgbe_ecc_sec(pdata, &pdata->desc_sec_period,
+				 &pdata->desc_sec_count, "descriptor cache"))
+			pdata->hw_if.disable_ecc_sec(pdata, XGBE_ECC_SEC_DESC);
+
+out:
+	/* Clear all ECC interrupts */
+	XP_IOWRITE(pdata, XP_ECC_ISR, ecc_isr);
+
+	return IRQ_HANDLED;
 }
 
 static irqreturn_t xgbe_isr(int irq, void *data)
@@ -321,7 +443,7 @@ static irqreturn_t xgbe_isr(int irq, void *data)
 	struct xgbe_hw_if *hw_if = &pdata->hw_if;
 	struct xgbe_channel *channel;
 	unsigned int dma_isr, dma_ch_isr;
-	unsigned int mac_isr, mac_tssr;
+	unsigned int mac_isr, mac_tssr, mac_mdioisr;
 	unsigned int i;
 
 	/* The DMA interrupt status register also reports MAC and MTL
@@ -358,6 +480,13 @@ static irqreturn_t xgbe_isr(int irq, void *data)
 				/* Turn on polling */
 				__napi_schedule_irqoff(&pdata->napi);
 			}
+		} else {
+			/* Don't clear Rx/Tx status if doing per channel DMA
+			 * interrupts, these will be cleared by the ISR for
+			 * per channel DMA interrupts.
+			 */
+			XGMAC_SET_BITS(dma_ch_isr, DMA_CH_SR, TI, 0);
+			XGMAC_SET_BITS(dma_ch_isr, DMA_CH_SR, RI, 0);
 		}
 
 		if (XGMAC_GET_BITS(dma_ch_isr, DMA_CH_SR, RBU))
@@ -367,13 +496,16 @@ static irqreturn_t xgbe_isr(int irq, void *data)
 		if (XGMAC_GET_BITS(dma_ch_isr, DMA_CH_SR, FBE))
 			schedule_work(&pdata->restart_work);
 
-		/* Clear all interrupt signals */
+		/* Clear interrupt signals */
 		XGMAC_DMA_IOWRITE(channel, DMA_CH_SR, dma_ch_isr);
 	}
 
 	if (XGMAC_GET_BITS(dma_isr, DMA_ISR, MACIS)) {
 		mac_isr = XGMAC_IOREAD(pdata, MAC_ISR);
 
+		netif_dbg(pdata, intr, pdata->netdev, "MAC_ISR=%#010x\n",
+			  mac_isr);
+
 		if (XGMAC_GET_BITS(mac_isr, MAC_ISR, MMCTXIS))
 			hw_if->tx_mmc_int(pdata);
 
@@ -383,6 +515,9 @@ static irqreturn_t xgbe_isr(int irq, void *data)
 		if (XGMAC_GET_BITS(mac_isr, MAC_ISR, TSIS)) {
 			mac_tssr = XGMAC_IOREAD(pdata, MAC_TSSR);
 
+			netif_dbg(pdata, intr, pdata->netdev,
+				  "MAC_TSSR=%#010x\n", mac_tssr);
+
 			if (XGMAC_GET_BITS(mac_tssr, MAC_TSSR, TXTSC)) {
 				/* Read Tx Timestamp to clear interrupt */
 				pdata->tx_tstamp =
@@ -391,8 +526,31 @@ static irqreturn_t xgbe_isr(int irq, void *data)
 					   &pdata->tx_tstamp_work);
 			}
 		}
+
+		if (XGMAC_GET_BITS(mac_isr, MAC_ISR, SMI)) {
+			mac_mdioisr = XGMAC_IOREAD(pdata, MAC_MDIOISR);
+
+			netif_dbg(pdata, intr, pdata->netdev,
+				  "MAC_MDIOISR=%#010x\n", mac_mdioisr);
+
+			if (XGMAC_GET_BITS(mac_mdioisr, MAC_MDIOISR,
+					   SNGLCOMPINT))
+				complete(&pdata->mdio_complete);
+		}
 	}
 
+	/* If there is not a separate AN irq, handle it here */
+	if (pdata->dev_irq == pdata->an_irq)
+		pdata->phy_if.an_isr(irq, pdata);
+
+	/* If there is not a separate ECC irq, handle it here */
+	if (pdata->vdata->ecc_support && (pdata->dev_irq == pdata->ecc_irq))
+		xgbe_ecc_isr(irq, pdata);
+
+	/* If there is not a separate I2C irq, handle it here */
+	if (pdata->vdata->i2c_support && (pdata->dev_irq == pdata->i2c_irq))
+		pdata->i2c_if.i2c_isr(irq, pdata);
+
 isr_done:
 	return IRQ_HANDLED;
 }
@@ -400,18 +558,29 @@ isr_done:
 static irqreturn_t xgbe_dma_isr(int irq, void *data)
 {
 	struct xgbe_channel *channel = data;
+	struct xgbe_prv_data *pdata = channel->pdata;
+	unsigned int dma_status;
 
 	/* Per channel DMA interrupts are enabled, so we use the per
 	 * channel napi structure and not the private data napi structure
 	 */
 	if (napi_schedule_prep(&channel->napi)) {
 		/* Disable Tx and Rx interrupts */
-		disable_irq_nosync(channel->dma_irq);
+		if (pdata->channel_irq_mode)
+			xgbe_disable_rx_tx_int(pdata, channel);
+		else
+			disable_irq_nosync(channel->dma_irq);
 
 		/* Turn on polling */
 		__napi_schedule_irqoff(&channel->napi);
 	}
 
+	/* Clear Tx/Rx signals */
+	dma_status = 0;
+	XGMAC_SET_BITS(dma_status, DMA_CH_SR, TI, 1);
+	XGMAC_SET_BITS(dma_status, DMA_CH_SR, RI, 1);
+	XGMAC_DMA_IOWRITE(channel, DMA_CH_SR, dma_status);
+
 	return IRQ_HANDLED;
 }
 
@@ -428,7 +597,10 @@ static void xgbe_tx_timer(unsigned long data)
 	if (napi_schedule_prep(napi)) {
 		/* Disable Tx and Rx interrupts */
 		if (pdata->per_channel_irq)
-			disable_irq_nosync(channel->dma_irq);
+			if (pdata->channel_irq_mode)
+				xgbe_disable_rx_tx_int(pdata, channel);
+			else
+				disable_irq_nosync(channel->dma_irq);
 		else
 			xgbe_disable_rx_tx_ints(pdata);
 
@@ -595,6 +767,10 @@ void xgbe_get_all_hw_features(struct xgbe_prv_data *pdata)
 	hw_feat->tx_ch_cnt++;
 	hw_feat->tc_cnt++;
 
+	/* Translate the fifo sizes into actual numbers */
+	hw_feat->rx_fifo_size = 1 << (hw_feat->rx_fifo_size + 7);
+	hw_feat->tx_fifo_size = 1 << (hw_feat->tx_fifo_size + 7);
+
 	DBGPR("<--xgbe_get_all_hw_features\n");
 }
 
@@ -657,6 +833,16 @@ static int xgbe_request_irqs(struct xgbe_prv_data *pdata)
 		return ret;
 	}
 
+	if (pdata->vdata->ecc_support && (pdata->dev_irq != pdata->ecc_irq)) {
+		ret = devm_request_irq(pdata->dev, pdata->ecc_irq, xgbe_ecc_isr,
+				       0, pdata->ecc_name, pdata);
+		if (ret) {
+			netdev_alert(netdev, "error requesting ecc irq %d\n",
+				     pdata->ecc_irq);
+			goto err_dev_irq;
+		}
+	}
+
 	if (!pdata->per_channel_irq)
 		return 0;
 
@@ -673,17 +859,21 @@ static int xgbe_request_irqs(struct xgbe_prv_data *pdata)
 		if (ret) {
 			netdev_alert(netdev, "error requesting irq %d\n",
 				     channel->dma_irq);
-			goto err_irq;
+			goto err_dma_irq;
 		}
 	}
 
 	return 0;
 
-err_irq:
+err_dma_irq:
 	/* Using an unsigned int, 'i' will go to UINT_MAX and exit */
 	for (i--, channel--; i < pdata->channel_count; i--, channel--)
 		devm_free_irq(pdata->dev, channel->dma_irq, channel);
 
+	if (pdata->vdata->ecc_support && (pdata->dev_irq != pdata->ecc_irq))
+		devm_free_irq(pdata->dev, pdata->ecc_irq, pdata);
+
+err_dev_irq:
 	devm_free_irq(pdata->dev, pdata->dev_irq, pdata);
 
 	return ret;
@@ -696,6 +886,9 @@ static void xgbe_free_irqs(struct xgbe_prv_data *pdata)
 
 	devm_free_irq(pdata->dev, pdata->dev_irq, pdata);
 
+	if (pdata->vdata->ecc_support && (pdata->dev_irq != pdata->ecc_irq))
+		devm_free_irq(pdata->dev, pdata->ecc_irq, pdata);
+
 	if (!pdata->per_channel_irq)
 		return;
 
@@ -783,7 +976,7 @@ static void xgbe_free_rx_data(struct xgbe_prv_data *pdata)
 	DBGPR("<--xgbe_free_rx_data\n");
 }
 
-static int xgbe_phy_init(struct xgbe_prv_data *pdata)
+static int xgbe_phy_reset(struct xgbe_prv_data *pdata)
 {
 	pdata->phy_link = -1;
 	pdata->phy_speed = SPEED_UNKNOWN;
@@ -879,16 +1072,16 @@ static int xgbe_start(struct xgbe_prv_data *pdata)
 
 	hw_if->init(pdata);
 
-	ret = phy_if->phy_start(pdata);
-	if (ret)
-		goto err_phy;
-
 	xgbe_napi_enable(pdata, 1);
 
 	ret = xgbe_request_irqs(pdata);
 	if (ret)
 		goto err_napi;
 
+	ret = phy_if->phy_start(pdata);
+	if (ret)
+		goto err_irqs;
+
 	hw_if->enable_tx(pdata);
 	hw_if->enable_rx(pdata);
 
@@ -897,16 +1090,18 @@ static int xgbe_start(struct xgbe_prv_data *pdata)
 	xgbe_start_timers(pdata);
 	queue_work(pdata->dev_workqueue, &pdata->service_work);
 
+	clear_bit(XGBE_STOPPED, &pdata->dev_state);
+
 	DBGPR("<--xgbe_start\n");
 
 	return 0;
 
+err_irqs:
+	xgbe_free_irqs(pdata);
+
 err_napi:
 	xgbe_napi_disable(pdata, 1);
 
-	phy_if->phy_stop(pdata);
-
-err_phy:
 	hw_if->exit(pdata);
 
 	return ret;
@@ -923,6 +1118,9 @@ static void xgbe_stop(struct xgbe_prv_data *pdata)
 
 	DBGPR("-->xgbe_stop\n");
 
+	if (test_bit(XGBE_STOPPED, &pdata->dev_state))
+		return;
+
 	netif_tx_stop_all_queues(netdev);
 
 	xgbe_stop_timers(pdata);
@@ -948,9 +1146,29 @@ static void xgbe_stop(struct xgbe_prv_data *pdata)
 		netdev_tx_reset_queue(txq);
 	}
 
+	set_bit(XGBE_STOPPED, &pdata->dev_state);
+
 	DBGPR("<--xgbe_stop\n");
 }
 
+static void xgbe_stopdev(struct work_struct *work)
+{
+	struct xgbe_prv_data *pdata = container_of(work,
+						   struct xgbe_prv_data,
+						   stopdev_work);
+
+	rtnl_lock();
+
+	xgbe_stop(pdata);
+
+	xgbe_free_tx_data(pdata);
+	xgbe_free_rx_data(pdata);
+
+	rtnl_unlock();
+
+	netdev_alert(pdata->netdev, "device stopped\n");
+}
+
 static void xgbe_restart_dev(struct xgbe_prv_data *pdata)
 {
 	DBGPR("-->xgbe_restart_dev\n");
@@ -1297,8 +1515,8 @@ static int xgbe_open(struct net_device *netdev)
 
 	DBGPR("-->xgbe_open\n");
 
-	/* Initialize the phy */
-	ret = xgbe_phy_init(pdata);
+	/* Reset the phy settings */
+	ret = xgbe_phy_reset(pdata);
 	if (ret)
 		return ret;
 
@@ -1333,6 +1551,7 @@ static int xgbe_open(struct net_device *netdev)
 
 	INIT_WORK(&pdata->service_work, xgbe_service);
 	INIT_WORK(&pdata->restart_work, xgbe_restart);
+	INIT_WORK(&pdata->stopdev_work, xgbe_stopdev);
 	INIT_WORK(&pdata->tx_tstamp_work, xgbe_tx_tstamp);
 	xgbe_init_timers(pdata);
 
@@ -2041,6 +2260,7 @@ static int xgbe_one_poll(struct napi_struct *napi, int budget)
 {
 	struct xgbe_channel *channel = container_of(napi, struct xgbe_channel,
 						    napi);
+	struct xgbe_prv_data *pdata = channel->pdata;
 	int processed = 0;
 
 	DBGPR("-->xgbe_one_poll: budget=%d\n", budget);
@@ -2057,7 +2277,10 @@ static int xgbe_one_poll(struct napi_struct *napi, int budget)
 		napi_complete_done(napi, processed);
 
 		/* Enable Tx and Rx interrupts */
-		enable_irq(channel->dma_irq);
+		if (pdata->channel_irq_mode)
+			xgbe_enable_rx_tx_int(pdata, channel);
+		else
+			enable_irq(channel->dma_irq);
 	}
 
 	DBGPR("<--xgbe_one_poll: received = %d\n", processed);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
index 4007b429c80c..920566a3a599 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
@@ -272,97 +272,86 @@ static int xgbe_set_pauseparam(struct net_device *netdev,
 	return ret;
 }
 
-static int xgbe_get_settings(struct net_device *netdev,
-			     struct ethtool_cmd *cmd)
+static int xgbe_get_link_ksettings(struct net_device *netdev,
+				   struct ethtool_link_ksettings *cmd)
 {
 	struct xgbe_prv_data *pdata = netdev_priv(netdev);
 
-	cmd->phy_address = pdata->phy.address;
+	cmd->base.phy_address = pdata->phy.address;
 
-	cmd->supported = pdata->phy.supported;
-	cmd->advertising = pdata->phy.advertising;
-	cmd->lp_advertising = pdata->phy.lp_advertising;
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						pdata->phy.supported);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+						pdata->phy.advertising);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.lp_advertising,
+						pdata->phy.lp_advertising);
 
-	cmd->autoneg = pdata->phy.autoneg;
-	ethtool_cmd_speed_set(cmd, pdata->phy.speed);
-	cmd->duplex = pdata->phy.duplex;
+	cmd->base.autoneg = pdata->phy.autoneg;
+	cmd->base.speed = pdata->phy.speed;
+	cmd->base.duplex = pdata->phy.duplex;
 
-	cmd->port = PORT_NONE;
-	cmd->transceiver = XCVR_INTERNAL;
+	cmd->base.port = PORT_NONE;
 
 	return 0;
 }
 
-static int xgbe_set_settings(struct net_device *netdev,
-			     struct ethtool_cmd *cmd)
+static int xgbe_set_link_ksettings(struct net_device *netdev,
+				   const struct ethtool_link_ksettings *cmd)
 {
 	struct xgbe_prv_data *pdata = netdev_priv(netdev);
+	u32 advertising;
 	u32 speed;
 	int ret;
 
-	speed = ethtool_cmd_speed(cmd);
+	speed = cmd->base.speed;
 
-	if (cmd->phy_address != pdata->phy.address) {
+	if (cmd->base.phy_address != pdata->phy.address) {
 		netdev_err(netdev, "invalid phy address %hhu\n",
-			   cmd->phy_address);
+			   cmd->base.phy_address);
 		return -EINVAL;
 	}
 
-	if ((cmd->autoneg != AUTONEG_ENABLE) &&
-	    (cmd->autoneg != AUTONEG_DISABLE)) {
+	if ((cmd->base.autoneg != AUTONEG_ENABLE) &&
+	    (cmd->base.autoneg != AUTONEG_DISABLE)) {
 		netdev_err(netdev, "unsupported autoneg %hhu\n",
-			   cmd->autoneg);
+			   cmd->base.autoneg);
 		return -EINVAL;
 	}
 
-	if (cmd->autoneg == AUTONEG_DISABLE) {
-		switch (speed) {
-		case SPEED_10000:
-			break;
-		case SPEED_2500:
-			if (pdata->speed_set != XGBE_SPEEDSET_2500_10000) {
-				netdev_err(netdev, "unsupported speed %u\n",
-					   speed);
-				return -EINVAL;
-			}
-			break;
-		case SPEED_1000:
-			if (pdata->speed_set != XGBE_SPEEDSET_1000_10000) {
-				netdev_err(netdev, "unsupported speed %u\n",
-					   speed);
-				return -EINVAL;
-			}
-			break;
-		default:
+	if (cmd->base.autoneg == AUTONEG_DISABLE) {
+		if (!pdata->phy_if.phy_valid_speed(pdata, speed)) {
 			netdev_err(netdev, "unsupported speed %u\n", speed);
 			return -EINVAL;
 		}
 
-		if (cmd->duplex != DUPLEX_FULL) {
+		if (cmd->base.duplex != DUPLEX_FULL) {
 			netdev_err(netdev, "unsupported duplex %hhu\n",
-				   cmd->duplex);
+				   cmd->base.duplex);
 			return -EINVAL;
 		}
 	}
 
+	ethtool_convert_link_mode_to_legacy_u32(&advertising,
+						cmd->link_modes.advertising);
+
 	netif_dbg(pdata, link, netdev,
 		  "requested advertisement %#x, phy supported %#x\n",
-		  cmd->advertising, pdata->phy.supported);
+		  advertising, pdata->phy.supported);
 
-	cmd->advertising &= pdata->phy.supported;
-	if ((cmd->autoneg == AUTONEG_ENABLE) && !cmd->advertising) {
+	advertising &= pdata->phy.supported;
+	if ((cmd->base.autoneg == AUTONEG_ENABLE) && !advertising) {
 		netdev_err(netdev,
 			   "unsupported requested advertisement\n");
 		return -EINVAL;
 	}
 
 	ret = 0;
-	pdata->phy.autoneg = cmd->autoneg;
+	pdata->phy.autoneg = cmd->base.autoneg;
 	pdata->phy.speed = speed;
-	pdata->phy.duplex = cmd->duplex;
-	pdata->phy.advertising = cmd->advertising;
+	pdata->phy.duplex = cmd->base.duplex;
+	pdata->phy.advertising = advertising;
 
-	if (cmd->autoneg == AUTONEG_ENABLE)
+	if (cmd->base.autoneg == AUTONEG_ENABLE)
 		pdata->phy.advertising |= ADVERTISED_Autoneg;
 	else
 		pdata->phy.advertising &= ~ADVERTISED_Autoneg;
@@ -602,8 +591,6 @@ static int xgbe_get_ts_info(struct net_device *netdev,
 }
 
 static const struct ethtool_ops xgbe_ethtool_ops = {
-	.get_settings = xgbe_get_settings,
-	.set_settings = xgbe_set_settings,
 	.get_drvinfo = xgbe_get_drvinfo,
 	.get_msglevel = xgbe_get_msglevel,
 	.set_msglevel = xgbe_set_msglevel,
@@ -621,6 +608,8 @@ static const struct ethtool_ops xgbe_ethtool_ops = {
 	.get_rxfh = xgbe_get_rxfh,
 	.set_rxfh = xgbe_set_rxfh,
 	.get_ts_info = xgbe_get_ts_info,
+	.get_link_ksettings = xgbe_get_link_ksettings,
+	.set_link_ksettings = xgbe_set_link_ksettings,
 };
 
 const struct ethtool_ops *xgbe_get_ethtool_ops(void)
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
new file mode 100644
index 000000000000..0c7088a426e9
--- /dev/null
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-i2c.c
@@ -0,0 +1,492 @@
+/*
+ * AMD 10Gb Ethernet driver
+ *
+ * This file is available to you under your choice of the following two
+ * licenses:
+ *
+ * License 1: GPLv2
+ *
+ * Copyright (c) 2016 Advanced Micro Devices, Inc.
+ *
+ * This file is free software; you may copy, redistribute and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * License 2: Modified BSD
+ *
+ * Copyright (c) 2016 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Advanced Micro Devices, Inc. nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/delay.h>
+#include <linux/completion.h>
+#include <linux/mutex.h>
+
+#include "xgbe.h"
+#include "xgbe-common.h"
+
+#define XGBE_ABORT_COUNT	500
+#define XGBE_DISABLE_COUNT	1000
+
+#define XGBE_STD_SPEED		1
+
+#define XGBE_INTR_RX_FULL	BIT(IC_RAW_INTR_STAT_RX_FULL_INDEX)
+#define XGBE_INTR_TX_EMPTY	BIT(IC_RAW_INTR_STAT_TX_EMPTY_INDEX)
+#define XGBE_INTR_TX_ABRT	BIT(IC_RAW_INTR_STAT_TX_ABRT_INDEX)
+#define XGBE_INTR_STOP_DET	BIT(IC_RAW_INTR_STAT_STOP_DET_INDEX)
+#define XGBE_DEFAULT_INT_MASK	(XGBE_INTR_RX_FULL  |	\
+				 XGBE_INTR_TX_EMPTY |	\
+				 XGBE_INTR_TX_ABRT  |	\
+				 XGBE_INTR_STOP_DET)
+
+#define XGBE_I2C_READ		BIT(8)
+#define XGBE_I2C_STOP		BIT(9)
+
+static int xgbe_i2c_abort(struct xgbe_prv_data *pdata)
+{
+	unsigned int wait = XGBE_ABORT_COUNT;
+
+	/* Must be enabled to recognize the abort request */
+	XI2C_IOWRITE_BITS(pdata, IC_ENABLE, EN, 1);
+
+	/* Issue the abort */
+	XI2C_IOWRITE_BITS(pdata, IC_ENABLE, ABORT, 1);
+
+	while (wait--) {
+		if (!XI2C_IOREAD_BITS(pdata, IC_ENABLE, ABORT))
+			return 0;
+
+		usleep_range(500, 600);
+	}
+
+	return -EBUSY;
+}
+
+static int xgbe_i2c_set_enable(struct xgbe_prv_data *pdata, bool enable)
+{
+	unsigned int wait = XGBE_DISABLE_COUNT;
+	unsigned int mode = enable ? 1 : 0;
+
+	while (wait--) {
+		XI2C_IOWRITE_BITS(pdata, IC_ENABLE, EN, mode);
+		if (XI2C_IOREAD_BITS(pdata, IC_ENABLE_STATUS, EN) == mode)
+			return 0;
+
+		usleep_range(100, 110);
+	}
+
+	return -EBUSY;
+}
+
+static int xgbe_i2c_disable(struct xgbe_prv_data *pdata)
+{
+	unsigned int ret;
+
+	ret = xgbe_i2c_set_enable(pdata, false);
+	if (ret) {
+		/* Disable failed, try an abort */
+		ret = xgbe_i2c_abort(pdata);
+		if (ret)
+			return ret;
+
+		/* Abort succeeded, try to disable again */
+		ret = xgbe_i2c_set_enable(pdata, false);
+	}
+
+	return ret;
+}
+
+static int xgbe_i2c_enable(struct xgbe_prv_data *pdata)
+{
+	return xgbe_i2c_set_enable(pdata, true);
+}
+
+static void xgbe_i2c_clear_all_interrupts(struct xgbe_prv_data *pdata)
+{
+	XI2C_IOREAD(pdata, IC_CLR_INTR);
+}
+
+static void xgbe_i2c_disable_interrupts(struct xgbe_prv_data *pdata)
+{
+	XI2C_IOWRITE(pdata, IC_INTR_MASK, 0);
+}
+
+static void xgbe_i2c_enable_interrupts(struct xgbe_prv_data *pdata)
+{
+	XI2C_IOWRITE(pdata, IC_INTR_MASK, XGBE_DEFAULT_INT_MASK);
+}
+
+static void xgbe_i2c_write(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_i2c_op_state *state = &pdata->i2c.op_state;
+	unsigned int tx_slots;
+	unsigned int cmd;
+
+	/* Configured to never receive Rx overflows, so fill up Tx fifo */
+	tx_slots = pdata->i2c.tx_fifo_size - XI2C_IOREAD(pdata, IC_TXFLR);
+	while (tx_slots && state->tx_len) {
+		if (state->op->cmd == XGBE_I2C_CMD_READ)
+			cmd = XGBE_I2C_READ;
+		else
+			cmd = *state->tx_buf++;
+
+		if (state->tx_len == 1)
+			XI2C_SET_BITS(cmd, IC_DATA_CMD, STOP, 1);
+
+		XI2C_IOWRITE(pdata, IC_DATA_CMD, cmd);
+
+		tx_slots--;
+		state->tx_len--;
+	}
+
+	/* No more Tx operations, so ignore TX_EMPTY and return */
+	if (!state->tx_len)
+		XI2C_IOWRITE_BITS(pdata, IC_INTR_MASK, TX_EMPTY, 0);
+}
+
+static void xgbe_i2c_read(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_i2c_op_state *state = &pdata->i2c.op_state;
+	unsigned int rx_slots;
+
+	/* Anything to be read? */
+	if (state->op->cmd != XGBE_I2C_CMD_READ)
+		return;
+
+	rx_slots = XI2C_IOREAD(pdata, IC_RXFLR);
+	while (rx_slots && state->rx_len) {
+		*state->rx_buf++ = XI2C_IOREAD(pdata, IC_DATA_CMD);
+		state->rx_len--;
+		rx_slots--;
+	}
+}
+
+static void xgbe_i2c_clear_isr_interrupts(struct xgbe_prv_data *pdata,
+					  unsigned int isr)
+{
+	struct xgbe_i2c_op_state *state = &pdata->i2c.op_state;
+
+	if (isr & XGBE_INTR_TX_ABRT) {
+		state->tx_abort_source = XI2C_IOREAD(pdata, IC_TX_ABRT_SOURCE);
+		XI2C_IOREAD(pdata, IC_CLR_TX_ABRT);
+	}
+
+	if (isr & XGBE_INTR_STOP_DET)
+		XI2C_IOREAD(pdata, IC_CLR_STOP_DET);
+}
+
+static irqreturn_t xgbe_i2c_isr(int irq, void *data)
+{
+	struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
+	struct xgbe_i2c_op_state *state = &pdata->i2c.op_state;
+	unsigned int isr;
+
+	isr = XI2C_IOREAD(pdata, IC_RAW_INTR_STAT);
+	netif_dbg(pdata, intr, pdata->netdev,
+		  "I2C interrupt received: status=%#010x\n", isr);
+
+	xgbe_i2c_clear_isr_interrupts(pdata, isr);
+
+	if (isr & XGBE_INTR_TX_ABRT) {
+		netif_dbg(pdata, link, pdata->netdev,
+			  "I2C TX_ABRT received (%#010x) for target %#04x\n",
+			  state->tx_abort_source, state->op->target);
+
+		xgbe_i2c_disable_interrupts(pdata);
+
+		state->ret = -EIO;
+		goto out;
+	}
+
+	/* Check for data in the Rx fifo */
+	xgbe_i2c_read(pdata);
+
+	/* Fill up the Tx fifo next */
+	xgbe_i2c_write(pdata);
+
+out:
+	/* Complete on an error or STOP condition */
+	if (state->ret || XI2C_GET_BITS(isr, IC_RAW_INTR_STAT, STOP_DET))
+		complete(&pdata->i2c_complete);
+
+	return IRQ_HANDLED;
+}
+
+static void xgbe_i2c_set_mode(struct xgbe_prv_data *pdata)
+{
+	unsigned int reg;
+
+	reg = XI2C_IOREAD(pdata, IC_CON);
+	XI2C_SET_BITS(reg, IC_CON, MASTER_MODE, 1);
+	XI2C_SET_BITS(reg, IC_CON, SLAVE_DISABLE, 1);
+	XI2C_SET_BITS(reg, IC_CON, RESTART_EN, 1);
+	XI2C_SET_BITS(reg, IC_CON, SPEED, XGBE_STD_SPEED);
+	XI2C_SET_BITS(reg, IC_CON, RX_FIFO_FULL_HOLD, 1);
+	XI2C_IOWRITE(pdata, IC_CON, reg);
+}
+
+static void xgbe_i2c_get_features(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_i2c *i2c = &pdata->i2c;
+	unsigned int reg;
+
+	reg = XI2C_IOREAD(pdata, IC_COMP_PARAM_1);
+	i2c->max_speed_mode = XI2C_GET_BITS(reg, IC_COMP_PARAM_1,
+					    MAX_SPEED_MODE);
+	i2c->rx_fifo_size = XI2C_GET_BITS(reg, IC_COMP_PARAM_1,
+					  RX_BUFFER_DEPTH);
+	i2c->tx_fifo_size = XI2C_GET_BITS(reg, IC_COMP_PARAM_1,
+					  TX_BUFFER_DEPTH);
+
+	if (netif_msg_probe(pdata))
+		dev_dbg(pdata->dev, "I2C features: %s=%u, %s=%u, %s=%u\n",
+			"MAX_SPEED_MODE", i2c->max_speed_mode,
+			"RX_BUFFER_DEPTH", i2c->rx_fifo_size,
+			"TX_BUFFER_DEPTH", i2c->tx_fifo_size);
+}
+
+static void xgbe_i2c_set_target(struct xgbe_prv_data *pdata, unsigned int addr)
+{
+	XI2C_IOWRITE(pdata, IC_TAR, addr);
+}
+
+static irqreturn_t xgbe_i2c_combined_isr(int irq, struct xgbe_prv_data *pdata)
+{
+	if (!XI2C_IOREAD(pdata, IC_RAW_INTR_STAT))
+		return IRQ_HANDLED;
+
+	return xgbe_i2c_isr(irq, pdata);
+}
+
+static int xgbe_i2c_xfer(struct xgbe_prv_data *pdata, struct xgbe_i2c_op *op)
+{
+	struct xgbe_i2c_op_state *state = &pdata->i2c.op_state;
+	int ret;
+
+	mutex_lock(&pdata->i2c_mutex);
+
+	reinit_completion(&pdata->i2c_complete);
+
+	ret = xgbe_i2c_disable(pdata);
+	if (ret) {
+		netdev_err(pdata->netdev, "failed to disable i2c master\n");
+		goto unlock;
+	}
+
+	xgbe_i2c_set_target(pdata, op->target);
+
+	memset(state, 0, sizeof(*state));
+	state->op = op;
+	state->tx_len = op->len;
+	state->tx_buf = op->buf;
+	state->rx_len = op->len;
+	state->rx_buf = op->buf;
+
+	xgbe_i2c_clear_all_interrupts(pdata);
+	ret = xgbe_i2c_enable(pdata);
+	if (ret) {
+		netdev_err(pdata->netdev, "failed to enable i2c master\n");
+		goto unlock;
+	}
+
+	/* Enabling the interrupts will cause the TX FIFO empty interrupt to
+	 * fire and begin to process the command via the ISR.
+	 */
+	xgbe_i2c_enable_interrupts(pdata);
+
+	if (!wait_for_completion_timeout(&pdata->i2c_complete, HZ)) {
+		netdev_err(pdata->netdev, "i2c operation timed out\n");
+		ret = -ETIMEDOUT;
+		goto disable;
+	}
+
+	ret = state->ret;
+	if (ret) {
+		if (state->tx_abort_source & IC_TX_ABRT_7B_ADDR_NOACK)
+			ret = -ENOTCONN;
+		else if (state->tx_abort_source & IC_TX_ABRT_ARB_LOST)
+			ret = -EAGAIN;
+	}
+
+disable:
+	xgbe_i2c_disable_interrupts(pdata);
+	xgbe_i2c_disable(pdata);
+
+unlock:
+	mutex_unlock(&pdata->i2c_mutex);
+
+	return ret;
+}
+
+static void xgbe_i2c_stop(struct xgbe_prv_data *pdata)
+{
+	if (!pdata->i2c.started)
+		return;
+
+	netif_dbg(pdata, link, pdata->netdev, "stopping I2C\n");
+
+	pdata->i2c.started = 0;
+
+	xgbe_i2c_disable_interrupts(pdata);
+	xgbe_i2c_disable(pdata);
+	xgbe_i2c_clear_all_interrupts(pdata);
+
+	if (pdata->dev_irq != pdata->i2c_irq)
+		devm_free_irq(pdata->dev, pdata->i2c_irq, pdata);
+}
+
+static int xgbe_i2c_start(struct xgbe_prv_data *pdata)
+{
+	int ret;
+
+	if (pdata->i2c.started)
+		return 0;
+
+	netif_dbg(pdata, link, pdata->netdev, "starting I2C\n");
+
+	/* If we have a separate I2C irq, enable it */
+	if (pdata->dev_irq != pdata->i2c_irq) {
+		ret = devm_request_irq(pdata->dev, pdata->i2c_irq,
+				       xgbe_i2c_isr, 0, pdata->i2c_name,
+				       pdata);
+		if (ret) {
+			netdev_err(pdata->netdev, "i2c irq request failed\n");
+			return ret;
+		}
+	}
+
+	pdata->i2c.started = 1;
+
+	return 0;
+}
+
+static int xgbe_i2c_init(struct xgbe_prv_data *pdata)
+{
+	int ret;
+
+	xgbe_i2c_disable_interrupts(pdata);
+
+	ret = xgbe_i2c_disable(pdata);
+	if (ret) {
+		dev_err(pdata->dev, "failed to disable i2c master\n");
+		return ret;
+	}
+
+	xgbe_i2c_get_features(pdata);
+
+	xgbe_i2c_set_mode(pdata);
+
+	xgbe_i2c_clear_all_interrupts(pdata);
+
+	return 0;
+}
+
+void xgbe_init_function_ptrs_i2c(struct xgbe_i2c_if *i2c_if)
+{
+	i2c_if->i2c_init		= xgbe_i2c_init;
+
+	i2c_if->i2c_start		= xgbe_i2c_start;
+	i2c_if->i2c_stop		= xgbe_i2c_stop;
+
+	i2c_if->i2c_xfer		= xgbe_i2c_xfer;
+
+	i2c_if->i2c_isr			= xgbe_i2c_combined_isr;
+}
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
index 9de078819aa6..17ac8f9a51a0 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c
@@ -116,19 +116,10 @@
 
 #include <linux/module.h>
 #include <linux/device.h>
-#include <linux/platform_device.h>
 #include <linux/spinlock.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/io.h>
-#include <linux/of.h>
-#include <linux/of_net.h>
-#include <linux/of_address.h>
-#include <linux/of_platform.h>
-#include <linux/clk.h>
-#include <linux/property.h>
-#include <linux/acpi.h>
-#include <linux/mdio.h>
 
 #include "xgbe.h"
 #include "xgbe-common.h"
@@ -145,42 +136,6 @@ MODULE_PARM_DESC(debug, " Network interface message level setting");
 static const u32 default_msg_level = (NETIF_MSG_LINK | NETIF_MSG_IFDOWN |
 				      NETIF_MSG_IFUP);
 
-static const u32 xgbe_serdes_blwc[] = {
-	XGBE_SPEED_1000_BLWC,
-	XGBE_SPEED_2500_BLWC,
-	XGBE_SPEED_10000_BLWC,
-};
-
-static const u32 xgbe_serdes_cdr_rate[] = {
-	XGBE_SPEED_1000_CDR,
-	XGBE_SPEED_2500_CDR,
-	XGBE_SPEED_10000_CDR,
-};
-
-static const u32 xgbe_serdes_pq_skew[] = {
-	XGBE_SPEED_1000_PQ,
-	XGBE_SPEED_2500_PQ,
-	XGBE_SPEED_10000_PQ,
-};
-
-static const u32 xgbe_serdes_tx_amp[] = {
-	XGBE_SPEED_1000_TXAMP,
-	XGBE_SPEED_2500_TXAMP,
-	XGBE_SPEED_10000_TXAMP,
-};
-
-static const u32 xgbe_serdes_dfe_tap_cfg[] = {
-	XGBE_SPEED_1000_DFE_TAP_CONFIG,
-	XGBE_SPEED_2500_DFE_TAP_CONFIG,
-	XGBE_SPEED_10000_DFE_TAP_CONFIG,
-};
-
-static const u32 xgbe_serdes_dfe_tap_ena[] = {
-	XGBE_SPEED_1000_DFE_TAP_ENABLE,
-	XGBE_SPEED_2500_DFE_TAP_ENABLE,
-	XGBE_SPEED_10000_DFE_TAP_ENABLE,
-};
-
 static void xgbe_default_config(struct xgbe_prv_data *pdata)
 {
 	DBGPR("-->xgbe_default_config\n");
@@ -206,455 +161,124 @@ static void xgbe_init_all_fptrs(struct xgbe_prv_data *pdata)
 {
 	xgbe_init_function_ptrs_dev(&pdata->hw_if);
 	xgbe_init_function_ptrs_phy(&pdata->phy_if);
+	xgbe_init_function_ptrs_i2c(&pdata->i2c_if);
 	xgbe_init_function_ptrs_desc(&pdata->desc_if);
-}
-
-#ifdef CONFIG_ACPI
-static int xgbe_acpi_support(struct xgbe_prv_data *pdata)
-{
-	struct device *dev = pdata->dev;
-	u32 property;
-	int ret;
-
-	/* Obtain the system clock setting */
-	ret = device_property_read_u32(dev, XGBE_ACPI_DMA_FREQ, &property);
-	if (ret) {
-		dev_err(dev, "unable to obtain %s property\n",
-			XGBE_ACPI_DMA_FREQ);
-		return ret;
-	}
-	pdata->sysclk_rate = property;
-
-	/* Obtain the PTP clock setting */
-	ret = device_property_read_u32(dev, XGBE_ACPI_PTP_FREQ, &property);
-	if (ret) {
-		dev_err(dev, "unable to obtain %s property\n",
-			XGBE_ACPI_PTP_FREQ);
-		return ret;
-	}
-	pdata->ptpclk_rate = property;
 
-	return 0;
+	pdata->vdata->init_function_ptrs_phy_impl(&pdata->phy_if);
 }
-#else   /* CONFIG_ACPI */
-static int xgbe_acpi_support(struct xgbe_prv_data *pdata)
-{
-	return -EINVAL;
-}
-#endif  /* CONFIG_ACPI */
 
-#ifdef CONFIG_OF
-static int xgbe_of_support(struct xgbe_prv_data *pdata)
-{
-	struct device *dev = pdata->dev;
-
-	/* Obtain the system clock setting */
-	pdata->sysclk = devm_clk_get(dev, XGBE_DMA_CLOCK);
-	if (IS_ERR(pdata->sysclk)) {
-		dev_err(dev, "dma devm_clk_get failed\n");
-		return PTR_ERR(pdata->sysclk);
-	}
-	pdata->sysclk_rate = clk_get_rate(pdata->sysclk);
-
-	/* Obtain the PTP clock setting */
-	pdata->ptpclk = devm_clk_get(dev, XGBE_PTP_CLOCK);
-	if (IS_ERR(pdata->ptpclk)) {
-		dev_err(dev, "ptp devm_clk_get failed\n");
-		return PTR_ERR(pdata->ptpclk);
-	}
-	pdata->ptpclk_rate = clk_get_rate(pdata->ptpclk);
-
-	return 0;
-}
-
-static struct platform_device *xgbe_of_get_phy_pdev(struct xgbe_prv_data *pdata)
-{
-	struct device *dev = pdata->dev;
-	struct device_node *phy_node;
-	struct platform_device *phy_pdev;
-
-	phy_node = of_parse_phandle(dev->of_node, "phy-handle", 0);
-	if (phy_node) {
-		/* Old style device tree:
-		 *   The XGBE and PHY resources are separate
-		 */
-		phy_pdev = of_find_device_by_node(phy_node);
-		of_node_put(phy_node);
-	} else {
-		/* New style device tree:
-		 *   The XGBE and PHY resources are grouped together with
-		 *   the PHY resources listed last
-		 */
-		get_device(dev);
-		phy_pdev = pdata->pdev;
-	}
-
-	return phy_pdev;
-}
-#else   /* CONFIG_OF */
-static int xgbe_of_support(struct xgbe_prv_data *pdata)
-{
-	return -EINVAL;
-}
-
-static struct platform_device *xgbe_of_get_phy_pdev(struct xgbe_prv_data *pdata)
-{
-	return NULL;
-}
-#endif  /* CONFIG_OF */
-
-static unsigned int xgbe_resource_count(struct platform_device *pdev,
-					unsigned int type)
-{
-	unsigned int count;
-	int i;
-
-	for (i = 0, count = 0; i < pdev->num_resources; i++) {
-		struct resource *res = &pdev->resource[i];
-
-		if (type == resource_type(res))
-			count++;
-	}
-
-	return count;
-}
-
-static struct platform_device *xgbe_get_phy_pdev(struct xgbe_prv_data *pdata)
-{
-	struct platform_device *phy_pdev;
-
-	if (pdata->use_acpi) {
-		get_device(pdata->dev);
-		phy_pdev = pdata->pdev;
-	} else {
-		phy_pdev = xgbe_of_get_phy_pdev(pdata);
-	}
-
-	return phy_pdev;
-}
-
-static int xgbe_probe(struct platform_device *pdev)
+struct xgbe_prv_data *xgbe_alloc_pdata(struct device *dev)
 {
 	struct xgbe_prv_data *pdata;
 	struct net_device *netdev;
-	struct device *dev = &pdev->dev, *phy_dev;
-	struct platform_device *phy_pdev;
-	struct resource *res;
-	const char *phy_mode;
-	unsigned int i, phy_memnum, phy_irqnum;
-	enum dev_dma_attr attr;
-	int ret;
-
-	DBGPR("--> xgbe_probe\n");
 
 	netdev = alloc_etherdev_mq(sizeof(struct xgbe_prv_data),
 				   XGBE_MAX_DMA_CHANNELS);
 	if (!netdev) {
-		dev_err(dev, "alloc_etherdev failed\n");
-		ret = -ENOMEM;
-		goto err_alloc;
+		dev_err(dev, "alloc_etherdev_mq failed\n");
+		return ERR_PTR(-ENOMEM);
 	}
 	SET_NETDEV_DEV(netdev, dev);
 	pdata = netdev_priv(netdev);
 	pdata->netdev = netdev;
-	pdata->pdev = pdev;
-	pdata->adev = ACPI_COMPANION(dev);
 	pdata->dev = dev;
-	platform_set_drvdata(pdev, netdev);
 
 	spin_lock_init(&pdata->lock);
 	spin_lock_init(&pdata->xpcs_lock);
 	mutex_init(&pdata->rss_mutex);
 	spin_lock_init(&pdata->tstamp_lock);
+	mutex_init(&pdata->i2c_mutex);
+	init_completion(&pdata->i2c_complete);
+	init_completion(&pdata->mdio_complete);
 
 	pdata->msg_enable = netif_msg_init(debug, default_msg_level);
 
 	set_bit(XGBE_DOWN, &pdata->dev_state);
+	set_bit(XGBE_STOPPED, &pdata->dev_state);
 
-	/* Check if we should use ACPI or DT */
-	pdata->use_acpi = dev->of_node ? 0 : 1;
-
-	phy_pdev = xgbe_get_phy_pdev(pdata);
-	if (!phy_pdev) {
-		dev_err(dev, "unable to obtain phy device\n");
-		ret = -EINVAL;
-		goto err_phydev;
-	}
-	phy_dev = &phy_pdev->dev;
-
-	if (pdev == phy_pdev) {
-		/* New style device tree or ACPI:
-		 *   The XGBE and PHY resources are grouped together with
-		 *   the PHY resources listed last
-		 */
-		phy_memnum = xgbe_resource_count(pdev, IORESOURCE_MEM) - 3;
-		phy_irqnum = xgbe_resource_count(pdev, IORESOURCE_IRQ) - 1;
-	} else {
-		/* Old style device tree:
-		 *   The XGBE and PHY resources are separate
-		 */
-		phy_memnum = 0;
-		phy_irqnum = 0;
-	}
-
-	/* Set and validate the number of descriptors for a ring */
-	BUILD_BUG_ON_NOT_POWER_OF_2(XGBE_TX_DESC_CNT);
-	pdata->tx_desc_count = XGBE_TX_DESC_CNT;
-	if (pdata->tx_desc_count & (pdata->tx_desc_count - 1)) {
-		dev_err(dev, "tx descriptor count (%d) is not valid\n",
-			pdata->tx_desc_count);
-		ret = -EINVAL;
-		goto err_io;
-	}
-	BUILD_BUG_ON_NOT_POWER_OF_2(XGBE_RX_DESC_CNT);
-	pdata->rx_desc_count = XGBE_RX_DESC_CNT;
-	if (pdata->rx_desc_count & (pdata->rx_desc_count - 1)) {
-		dev_err(dev, "rx descriptor count (%d) is not valid\n",
-			pdata->rx_desc_count);
-		ret = -EINVAL;
-		goto err_io;
-	}
-
-	/* Obtain the mmio areas for the device */
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	pdata->xgmac_regs = devm_ioremap_resource(dev, res);
-	if (IS_ERR(pdata->xgmac_regs)) {
-		dev_err(dev, "xgmac ioremap failed\n");
-		ret = PTR_ERR(pdata->xgmac_regs);
-		goto err_io;
-	}
-	if (netif_msg_probe(pdata))
-		dev_dbg(dev, "xgmac_regs = %p\n", pdata->xgmac_regs);
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-	pdata->xpcs_regs = devm_ioremap_resource(dev, res);
-	if (IS_ERR(pdata->xpcs_regs)) {
-		dev_err(dev, "xpcs ioremap failed\n");
-		ret = PTR_ERR(pdata->xpcs_regs);
-		goto err_io;
-	}
-	if (netif_msg_probe(pdata))
-		dev_dbg(dev, "xpcs_regs  = %p\n", pdata->xpcs_regs);
-
-	res = platform_get_resource(phy_pdev, IORESOURCE_MEM, phy_memnum++);
-	pdata->rxtx_regs = devm_ioremap_resource(dev, res);
-	if (IS_ERR(pdata->rxtx_regs)) {
-		dev_err(dev, "rxtx ioremap failed\n");
-		ret = PTR_ERR(pdata->rxtx_regs);
-		goto err_io;
-	}
-	if (netif_msg_probe(pdata))
-		dev_dbg(dev, "rxtx_regs  = %p\n", pdata->rxtx_regs);
-
-	res = platform_get_resource(phy_pdev, IORESOURCE_MEM, phy_memnum++);
-	pdata->sir0_regs = devm_ioremap_resource(dev, res);
-	if (IS_ERR(pdata->sir0_regs)) {
-		dev_err(dev, "sir0 ioremap failed\n");
-		ret = PTR_ERR(pdata->sir0_regs);
-		goto err_io;
-	}
-	if (netif_msg_probe(pdata))
-		dev_dbg(dev, "sir0_regs  = %p\n", pdata->sir0_regs);
-
-	res = platform_get_resource(phy_pdev, IORESOURCE_MEM, phy_memnum++);
-	pdata->sir1_regs = devm_ioremap_resource(dev, res);
-	if (IS_ERR(pdata->sir1_regs)) {
-		dev_err(dev, "sir1 ioremap failed\n");
-		ret = PTR_ERR(pdata->sir1_regs);
-		goto err_io;
-	}
-	if (netif_msg_probe(pdata))
-		dev_dbg(dev, "sir1_regs  = %p\n", pdata->sir1_regs);
-
-	/* Retrieve the MAC address */
-	ret = device_property_read_u8_array(dev, XGBE_MAC_ADDR_PROPERTY,
-					    pdata->mac_addr,
-					    sizeof(pdata->mac_addr));
-	if (ret || !is_valid_ether_addr(pdata->mac_addr)) {
-		dev_err(dev, "invalid %s property\n", XGBE_MAC_ADDR_PROPERTY);
-		if (!ret)
-			ret = -EINVAL;
-		goto err_io;
-	}
-
-	/* Retrieve the PHY mode - it must be "xgmii" */
-	ret = device_property_read_string(dev, XGBE_PHY_MODE_PROPERTY,
-					  &phy_mode);
-	if (ret || strcmp(phy_mode, phy_modes(PHY_INTERFACE_MODE_XGMII))) {
-		dev_err(dev, "invalid %s property\n", XGBE_PHY_MODE_PROPERTY);
-		if (!ret)
-			ret = -EINVAL;
-		goto err_io;
-	}
-	pdata->phy_mode = PHY_INTERFACE_MODE_XGMII;
-
-	/* Check for per channel interrupt support */
-	if (device_property_present(dev, XGBE_DMA_IRQS_PROPERTY))
-		pdata->per_channel_irq = 1;
+	return pdata;
+}
 
-	/* Retrieve the PHY speedset */
-	ret = device_property_read_u32(phy_dev, XGBE_SPEEDSET_PROPERTY,
-				       &pdata->speed_set);
-	if (ret) {
-		dev_err(dev, "invalid %s property\n", XGBE_SPEEDSET_PROPERTY);
-		goto err_io;
-	}
+void xgbe_free_pdata(struct xgbe_prv_data *pdata)
+{
+	struct net_device *netdev = pdata->netdev;
 
-	switch (pdata->speed_set) {
-	case XGBE_SPEEDSET_1000_10000:
-	case XGBE_SPEEDSET_2500_10000:
-		break;
-	default:
-		dev_err(dev, "invalid %s property\n", XGBE_SPEEDSET_PROPERTY);
-		ret = -EINVAL;
-		goto err_io;
-	}
+	free_netdev(netdev);
+}
 
-	/* Retrieve the PHY configuration properties */
-	if (device_property_present(phy_dev, XGBE_BLWC_PROPERTY)) {
-		ret = device_property_read_u32_array(phy_dev,
-						     XGBE_BLWC_PROPERTY,
-						     pdata->serdes_blwc,
-						     XGBE_SPEEDS);
-		if (ret) {
-			dev_err(dev, "invalid %s property\n",
-				XGBE_BLWC_PROPERTY);
-			goto err_io;
-		}
-	} else {
-		memcpy(pdata->serdes_blwc, xgbe_serdes_blwc,
-		       sizeof(pdata->serdes_blwc));
-	}
+void xgbe_set_counts(struct xgbe_prv_data *pdata)
+{
+	/* Set all the function pointers */
+	xgbe_init_all_fptrs(pdata);
 
-	if (device_property_present(phy_dev, XGBE_CDR_RATE_PROPERTY)) {
-		ret = device_property_read_u32_array(phy_dev,
-						     XGBE_CDR_RATE_PROPERTY,
-						     pdata->serdes_cdr_rate,
-						     XGBE_SPEEDS);
-		if (ret) {
-			dev_err(dev, "invalid %s property\n",
-				XGBE_CDR_RATE_PROPERTY);
-			goto err_io;
-		}
-	} else {
-		memcpy(pdata->serdes_cdr_rate, xgbe_serdes_cdr_rate,
-		       sizeof(pdata->serdes_cdr_rate));
-	}
+	/* Populate the hardware features */
+	xgbe_get_all_hw_features(pdata);
 
-	if (device_property_present(phy_dev, XGBE_PQ_SKEW_PROPERTY)) {
-		ret = device_property_read_u32_array(phy_dev,
-						     XGBE_PQ_SKEW_PROPERTY,
-						     pdata->serdes_pq_skew,
-						     XGBE_SPEEDS);
-		if (ret) {
-			dev_err(dev, "invalid %s property\n",
-				XGBE_PQ_SKEW_PROPERTY);
-			goto err_io;
-		}
-	} else {
-		memcpy(pdata->serdes_pq_skew, xgbe_serdes_pq_skew,
-		       sizeof(pdata->serdes_pq_skew));
-	}
+	/* Set default max values if not provided */
+	if (!pdata->tx_max_channel_count)
+		pdata->tx_max_channel_count = pdata->hw_feat.tx_ch_cnt;
+	if (!pdata->rx_max_channel_count)
+		pdata->rx_max_channel_count = pdata->hw_feat.rx_ch_cnt;
 
-	if (device_property_present(phy_dev, XGBE_TX_AMP_PROPERTY)) {
-		ret = device_property_read_u32_array(phy_dev,
-						     XGBE_TX_AMP_PROPERTY,
-						     pdata->serdes_tx_amp,
-						     XGBE_SPEEDS);
-		if (ret) {
-			dev_err(dev, "invalid %s property\n",
-				XGBE_TX_AMP_PROPERTY);
-			goto err_io;
-		}
-	} else {
-		memcpy(pdata->serdes_tx_amp, xgbe_serdes_tx_amp,
-		       sizeof(pdata->serdes_tx_amp));
-	}
+	if (!pdata->tx_max_q_count)
+		pdata->tx_max_q_count = pdata->hw_feat.tx_q_cnt;
+	if (!pdata->rx_max_q_count)
+		pdata->rx_max_q_count = pdata->hw_feat.rx_q_cnt;
 
-	if (device_property_present(phy_dev, XGBE_DFE_CFG_PROPERTY)) {
-		ret = device_property_read_u32_array(phy_dev,
-						     XGBE_DFE_CFG_PROPERTY,
-						     pdata->serdes_dfe_tap_cfg,
-						     XGBE_SPEEDS);
-		if (ret) {
-			dev_err(dev, "invalid %s property\n",
-				XGBE_DFE_CFG_PROPERTY);
-			goto err_io;
-		}
-	} else {
-		memcpy(pdata->serdes_dfe_tap_cfg, xgbe_serdes_dfe_tap_cfg,
-		       sizeof(pdata->serdes_dfe_tap_cfg));
-	}
+	/* Calculate the number of Tx and Rx rings to be created
+	 *  -Tx (DMA) Channels map 1-to-1 to Tx Queues so set
+	 *   the number of Tx queues to the number of Tx channels
+	 *   enabled
+	 *  -Rx (DMA) Channels do not map 1-to-1 so use the actual
+	 *   number of Rx queues or maximum allowed
+	 */
+	pdata->tx_ring_count = min_t(unsigned int, num_online_cpus(),
+				     pdata->hw_feat.tx_ch_cnt);
+	pdata->tx_ring_count = min_t(unsigned int, pdata->tx_ring_count,
+				     pdata->tx_max_channel_count);
+	pdata->tx_ring_count = min_t(unsigned int, pdata->tx_ring_count,
+				     pdata->tx_max_q_count);
 
-	if (device_property_present(phy_dev, XGBE_DFE_ENA_PROPERTY)) {
-		ret = device_property_read_u32_array(phy_dev,
-						     XGBE_DFE_ENA_PROPERTY,
-						     pdata->serdes_dfe_tap_ena,
-						     XGBE_SPEEDS);
-		if (ret) {
-			dev_err(dev, "invalid %s property\n",
-				XGBE_DFE_ENA_PROPERTY);
-			goto err_io;
-		}
-	} else {
-		memcpy(pdata->serdes_dfe_tap_ena, xgbe_serdes_dfe_tap_ena,
-		       sizeof(pdata->serdes_dfe_tap_ena));
-	}
+	pdata->tx_q_count = pdata->tx_ring_count;
 
-	/* Obtain device settings unique to ACPI/OF */
-	if (pdata->use_acpi)
-		ret = xgbe_acpi_support(pdata);
-	else
-		ret = xgbe_of_support(pdata);
-	if (ret)
-		goto err_io;
+	pdata->rx_ring_count = min_t(unsigned int, num_online_cpus(),
+				     pdata->hw_feat.rx_ch_cnt);
+	pdata->rx_ring_count = min_t(unsigned int, pdata->rx_ring_count,
+				     pdata->rx_max_channel_count);
 
-	/* Set the DMA coherency values */
-	attr = device_get_dma_attr(dev);
-	if (attr == DEV_DMA_NOT_SUPPORTED) {
-		dev_err(dev, "DMA is not supported");
-		goto err_io;
-	}
-	pdata->coherent = (attr == DEV_DMA_COHERENT);
-	if (pdata->coherent) {
-		pdata->axdomain = XGBE_DMA_OS_AXDOMAIN;
-		pdata->arcache = XGBE_DMA_OS_ARCACHE;
-		pdata->awcache = XGBE_DMA_OS_AWCACHE;
-	} else {
-		pdata->axdomain = XGBE_DMA_SYS_AXDOMAIN;
-		pdata->arcache = XGBE_DMA_SYS_ARCACHE;
-		pdata->awcache = XGBE_DMA_SYS_AWCACHE;
-	}
+	pdata->rx_q_count = min_t(unsigned int, pdata->hw_feat.rx_q_cnt,
+				  pdata->rx_max_q_count);
 
-	/* Get the device interrupt */
-	ret = platform_get_irq(pdev, 0);
-	if (ret < 0) {
-		dev_err(dev, "platform_get_irq 0 failed\n");
-		goto err_io;
+	if (netif_msg_probe(pdata)) {
+		dev_dbg(pdata->dev, "TX/RX DMA channel count = %u/%u\n",
+			pdata->tx_ring_count, pdata->rx_ring_count);
+		dev_dbg(pdata->dev, "TX/RX hardware queue count = %u/%u\n",
+			pdata->tx_q_count, pdata->rx_q_count);
 	}
-	pdata->dev_irq = ret;
+}
 
-	/* Get the auto-negotiation interrupt */
-	ret = platform_get_irq(phy_pdev, phy_irqnum++);
-	if (ret < 0) {
-		dev_err(dev, "platform_get_irq phy 0 failed\n");
-		goto err_io;
-	}
-	pdata->an_irq = ret;
+int xgbe_config_netdev(struct xgbe_prv_data *pdata)
+{
+	struct net_device *netdev = pdata->netdev;
+	struct device *dev = pdata->dev;
+	unsigned int i;
+	int ret;
 
 	netdev->irq = pdata->dev_irq;
 	netdev->base_addr = (unsigned long)pdata->xgmac_regs;
 	memcpy(netdev->dev_addr, pdata->mac_addr, netdev->addr_len);
 
-	/* Set all the function pointers */
-	xgbe_init_all_fptrs(pdata);
+	/* Initialize ECC timestamps */
+	pdata->tx_sec_period = jiffies;
+	pdata->tx_ded_period = jiffies;
+	pdata->rx_sec_period = jiffies;
+	pdata->rx_ded_period = jiffies;
+	pdata->desc_sec_period = jiffies;
+	pdata->desc_ded_period = jiffies;
 
 	/* Issue software reset to device */
 	pdata->hw_if.exit(pdata);
 
-	/* Populate the hardware features */
-	xgbe_get_all_hw_features(pdata);
-
 	/* Set default configuration data */
 	xgbe_default_config(pdata);
 
@@ -663,33 +287,46 @@ static int xgbe_probe(struct platform_device *pdev)
 					DMA_BIT_MASK(pdata->hw_feat.dma_width));
 	if (ret) {
 		dev_err(dev, "dma_set_mask_and_coherent failed\n");
-		goto err_io;
+		return ret;
 	}
 
-	/* Calculate the number of Tx and Rx rings to be created
-	 *  -Tx (DMA) Channels map 1-to-1 to Tx Queues so set
-	 *   the number of Tx queues to the number of Tx channels
-	 *   enabled
-	 *  -Rx (DMA) Channels do not map 1-to-1 so use the actual
-	 *   number of Rx queues
-	 */
-	pdata->tx_ring_count = min_t(unsigned int, num_online_cpus(),
-				     pdata->hw_feat.tx_ch_cnt);
-	pdata->tx_q_count = pdata->tx_ring_count;
+	/* Set default max values if not provided */
+	if (!pdata->tx_max_fifo_size)
+		pdata->tx_max_fifo_size = pdata->hw_feat.tx_fifo_size;
+	if (!pdata->rx_max_fifo_size)
+		pdata->rx_max_fifo_size = pdata->hw_feat.rx_fifo_size;
+
+	/* Set and validate the number of descriptors for a ring */
+	BUILD_BUG_ON_NOT_POWER_OF_2(XGBE_TX_DESC_CNT);
+	pdata->tx_desc_count = XGBE_TX_DESC_CNT;
+
+	BUILD_BUG_ON_NOT_POWER_OF_2(XGBE_RX_DESC_CNT);
+	pdata->rx_desc_count = XGBE_RX_DESC_CNT;
+
+	/* Adjust the number of queues based on interrupts assigned */
+	if (pdata->channel_irq_count) {
+		pdata->tx_ring_count = min_t(unsigned int, pdata->tx_ring_count,
+					     pdata->channel_irq_count);
+		pdata->rx_ring_count = min_t(unsigned int, pdata->rx_ring_count,
+					     pdata->channel_irq_count);
+
+		if (netif_msg_probe(pdata))
+			dev_dbg(pdata->dev,
+				"adjusted TX/RX DMA channel count = %u/%u\n",
+				pdata->tx_ring_count, pdata->rx_ring_count);
+	}
+
+	/* Set the number of queues */
 	ret = netif_set_real_num_tx_queues(netdev, pdata->tx_ring_count);
 	if (ret) {
 		dev_err(dev, "error setting real tx queue count\n");
-		goto err_io;
+		return ret;
 	}
 
-	pdata->rx_ring_count = min_t(unsigned int,
-				     netif_get_num_default_rss_queues(),
-				     pdata->hw_feat.rx_ch_cnt);
-	pdata->rx_q_count = pdata->hw_feat.rx_q_cnt;
 	ret = netif_set_real_num_rx_queues(netdev, pdata->rx_ring_count);
 	if (ret) {
 		dev_err(dev, "error setting real rx queue count\n");
-		goto err_io;
+		return ret;
 	}
 
 	/* Initialize RSS hash key and lookup table */
@@ -704,7 +341,9 @@ static int xgbe_probe(struct platform_device *pdev)
 	XGMAC_SET_BITS(pdata->rss_options, MAC_RSSCR, UDP4TE, 1);
 
 	/* Call MDIO/PHY initialization routine */
-	pdata->phy_if.phy_init(pdata);
+	ret = pdata->phy_if.phy_init(pdata);
+	if (ret)
+		return ret;
 
 	/* Set device operations */
 	netdev->netdev_ops = xgbe_get_netdev_ops();
@@ -738,6 +377,8 @@ static int xgbe_probe(struct platform_device *pdev)
 	pdata->netdev_features = netdev->features;
 
 	netdev->priv_flags |= IFF_UNICAST_FLT;
+	netdev->min_mtu = 0;
+	netdev->max_mtu = XGMAC_JUMBO_PACKET_MTU;
 
 	/* Use default watchdog timeout */
 	netdev->watchdog_timeo = 0;
@@ -749,13 +390,21 @@ static int xgbe_probe(struct platform_device *pdev)
 	ret = register_netdev(netdev);
 	if (ret) {
 		dev_err(dev, "net device registration failed\n");
-		goto err_io;
+		return ret;
 	}
 
 	/* Create the PHY/ANEG name based on netdev name */
 	snprintf(pdata->an_name, sizeof(pdata->an_name) - 1, "%s-pcs",
 		 netdev_name(netdev));
 
+	/* Create the ECC name based on netdev name */
+	snprintf(pdata->ecc_name, sizeof(pdata->ecc_name) - 1, "%s-ecc",
+		 netdev_name(netdev));
+
+	/* Create the I2C name based on netdev name */
+	snprintf(pdata->i2c_name, sizeof(pdata->i2c_name) - 1, "%s-i2c",
+		 netdev_name(netdev));
+
 	/* Create workqueues */
 	pdata->dev_workqueue =
 		create_singlethread_workqueue(netdev_name(netdev));
@@ -773,15 +422,15 @@ static int xgbe_probe(struct platform_device *pdev)
 		goto err_wq;
 	}
 
-	xgbe_ptp_register(pdata);
+	if (IS_REACHABLE(CONFIG_PTP_1588_CLOCK))
+		xgbe_ptp_register(pdata);
 
 	xgbe_debugfs_init(pdata);
 
-	platform_device_put(phy_pdev);
-
-	netdev_notice(netdev, "net device enabled\n");
-
-	DBGPR("<-- xgbe_probe\n");
+	netif_dbg(pdata, drv, pdata->netdev, "%u Tx software queues\n",
+		  pdata->tx_ring_count);
+	netif_dbg(pdata, drv, pdata->netdev, "%u Rx software queues\n",
+		  pdata->rx_ring_count);
 
 	return 0;
 
@@ -791,28 +440,19 @@ err_wq:
 err_netdev:
 	unregister_netdev(netdev);
 
-err_io:
-	platform_device_put(phy_pdev);
-
-err_phydev:
-	free_netdev(netdev);
-
-err_alloc:
-	dev_notice(dev, "net device not enabled\n");
-
 	return ret;
 }
 
-static int xgbe_remove(struct platform_device *pdev)
+void xgbe_deconfig_netdev(struct xgbe_prv_data *pdata)
 {
-	struct net_device *netdev = platform_get_drvdata(pdev);
-	struct xgbe_prv_data *pdata = netdev_priv(netdev);
-
-	DBGPR("-->xgbe_remove\n");
+	struct net_device *netdev = pdata->netdev;
 
 	xgbe_debugfs_exit(pdata);
 
-	xgbe_ptp_unregister(pdata);
+	if (IS_REACHABLE(CONFIG_PTP_1588_CLOCK))
+		xgbe_ptp_unregister(pdata);
+
+	pdata->phy_if.phy_exit(pdata);
 
 	flush_workqueue(pdata->an_workqueue);
 	destroy_workqueue(pdata->an_workqueue);
@@ -821,94 +461,29 @@ static int xgbe_remove(struct platform_device *pdev)
 	destroy_workqueue(pdata->dev_workqueue);
 
 	unregister_netdev(netdev);
-
-	free_netdev(netdev);
-
-	DBGPR("<--xgbe_remove\n");
-
-	return 0;
 }
 
-#ifdef CONFIG_PM
-static int xgbe_suspend(struct device *dev)
+static int __init xgbe_mod_init(void)
 {
-	struct net_device *netdev = dev_get_drvdata(dev);
-	struct xgbe_prv_data *pdata = netdev_priv(netdev);
-	int ret = 0;
-
-	DBGPR("-->xgbe_suspend\n");
-
-	if (netif_running(netdev))
-		ret = xgbe_powerdown(netdev, XGMAC_DRIVER_CONTEXT);
+	int ret;
 
-	pdata->lpm_ctrl = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
-	pdata->lpm_ctrl |= MDIO_CTRL1_LPOWER;
-	XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl);
+	ret = xgbe_platform_init();
+	if (ret)
+		return ret;
 
-	DBGPR("<--xgbe_suspend\n");
+	ret = xgbe_pci_init();
+	if (ret)
+		return ret;
 
-	return ret;
+	return 0;
 }
 
-static int xgbe_resume(struct device *dev)
+static void __exit xgbe_mod_exit(void)
 {
-	struct net_device *netdev = dev_get_drvdata(dev);
-	struct xgbe_prv_data *pdata = netdev_priv(netdev);
-	int ret = 0;
-
-	DBGPR("-->xgbe_resume\n");
-
-	pdata->lpm_ctrl &= ~MDIO_CTRL1_LPOWER;
-	XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl);
-
-	if (netif_running(netdev)) {
-		ret = xgbe_powerup(netdev, XGMAC_DRIVER_CONTEXT);
-
-		/* Schedule a restart in case the link or phy state changed
-		 * while we were powered down.
-		 */
-		schedule_work(&pdata->restart_work);
-	}
-
-	DBGPR("<--xgbe_resume\n");
+	xgbe_pci_exit();
 
-	return ret;
+	xgbe_platform_exit();
 }
-#endif /* CONFIG_PM */
-
-#ifdef CONFIG_ACPI
-static const struct acpi_device_id xgbe_acpi_match[] = {
-	{ "AMDI8001", 0 },
-	{},
-};
-
-MODULE_DEVICE_TABLE(acpi, xgbe_acpi_match);
-#endif
-
-#ifdef CONFIG_OF
-static const struct of_device_id xgbe_of_match[] = {
-	{ .compatible = "amd,xgbe-seattle-v1a", },
-	{},
-};
-
-MODULE_DEVICE_TABLE(of, xgbe_of_match);
-#endif
-
-static SIMPLE_DEV_PM_OPS(xgbe_pm_ops, xgbe_suspend, xgbe_resume);
-
-static struct platform_driver xgbe_driver = {
-	.driver = {
-		.name = "amd-xgbe",
-#ifdef CONFIG_ACPI
-		.acpi_match_table = xgbe_acpi_match,
-#endif
-#ifdef CONFIG_OF
-		.of_match_table = xgbe_of_match,
-#endif
-		.pm = &xgbe_pm_ops,
-	},
-	.probe = xgbe_probe,
-	.remove = xgbe_remove,
-};
 
-module_platform_driver(xgbe_driver);
+module_init(xgbe_mod_init);
+module_exit(xgbe_mod_exit);
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
index 84c5d296d13e..4c5b90eea4af 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-mdio.c
@@ -125,303 +125,284 @@
 #include "xgbe.h"
 #include "xgbe-common.h"
 
-static void xgbe_an_enable_kr_training(struct xgbe_prv_data *pdata)
+static void xgbe_an37_clear_interrupts(struct xgbe_prv_data *pdata)
 {
-	unsigned int reg;
-
-	reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL);
+	int reg;
 
-	reg |= XGBE_KR_TRAINING_ENABLE;
-	XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg);
+	reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_STAT);
+	reg &= ~XGBE_AN_CL37_INT_MASK;
+	XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_STAT, reg);
 }
 
-static void xgbe_an_disable_kr_training(struct xgbe_prv_data *pdata)
+static void xgbe_an37_disable_interrupts(struct xgbe_prv_data *pdata)
 {
-	unsigned int reg;
+	int reg;
 
-	reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL);
+	reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_CTRL);
+	reg &= ~XGBE_AN_CL37_INT_MASK;
+	XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_CTRL, reg);
 
-	reg &= ~XGBE_KR_TRAINING_ENABLE;
-	XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg);
+	reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_PCS_DIG_CTRL);
+	reg &= ~XGBE_PCS_CL37_BP;
+	XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_PCS_DIG_CTRL, reg);
 }
 
-static void xgbe_pcs_power_cycle(struct xgbe_prv_data *pdata)
+static void xgbe_an37_enable_interrupts(struct xgbe_prv_data *pdata)
 {
-	unsigned int reg;
-
-	reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
+	int reg;
 
-	reg |= MDIO_CTRL1_LPOWER;
-	XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg);
+	reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_PCS_DIG_CTRL);
+	reg |= XGBE_PCS_CL37_BP;
+	XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_PCS_DIG_CTRL, reg);
 
-	usleep_range(75, 100);
-
-	reg &= ~MDIO_CTRL1_LPOWER;
-	XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg);
+	reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_CTRL);
+	reg |= XGBE_AN_CL37_INT_MASK;
+	XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_CTRL, reg);
 }
 
-static void xgbe_serdes_start_ratechange(struct xgbe_prv_data *pdata)
+static void xgbe_an73_clear_interrupts(struct xgbe_prv_data *pdata)
 {
-	/* Assert Rx and Tx ratechange */
-	XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, RATECHANGE, 1);
+	XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INT, 0);
 }
 
-static void xgbe_serdes_complete_ratechange(struct xgbe_prv_data *pdata)
+static void xgbe_an73_disable_interrupts(struct xgbe_prv_data *pdata)
 {
-	unsigned int wait;
-	u16 status;
-
-	/* Release Rx and Tx ratechange */
-	XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, RATECHANGE, 0);
+	XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, 0);
+}
 
-	/* Wait for Rx and Tx ready */
-	wait = XGBE_RATECHANGE_COUNT;
-	while (wait--) {
-		usleep_range(50, 75);
+static void xgbe_an73_enable_interrupts(struct xgbe_prv_data *pdata)
+{
+	XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, XGBE_AN_CL73_INT_MASK);
+}
 
-		status = XSIR0_IOREAD(pdata, SIR0_STATUS);
-		if (XSIR_GET_BITS(status, SIR0_STATUS, RX_READY) &&
-		    XSIR_GET_BITS(status, SIR0_STATUS, TX_READY))
-			goto rx_reset;
+static void xgbe_an_enable_interrupts(struct xgbe_prv_data *pdata)
+{
+	switch (pdata->an_mode) {
+	case XGBE_AN_MODE_CL73:
+	case XGBE_AN_MODE_CL73_REDRV:
+		xgbe_an73_enable_interrupts(pdata);
+		break;
+	case XGBE_AN_MODE_CL37:
+	case XGBE_AN_MODE_CL37_SGMII:
+		xgbe_an37_enable_interrupts(pdata);
+		break;
+	default:
+		break;
 	}
+}
 
-	netif_dbg(pdata, link, pdata->netdev, "SerDes rx/tx not ready (%#hx)\n",
-		  status);
-
-rx_reset:
-	/* Perform Rx reset for the DFE changes */
-	XRXTX_IOWRITE_BITS(pdata, RXTX_REG6, RESETB_RXD, 0);
-	XRXTX_IOWRITE_BITS(pdata, RXTX_REG6, RESETB_RXD, 1);
+static void xgbe_an_clear_interrupts_all(struct xgbe_prv_data *pdata)
+{
+	xgbe_an73_clear_interrupts(pdata);
+	xgbe_an37_clear_interrupts(pdata);
 }
 
-static void xgbe_xgmii_mode(struct xgbe_prv_data *pdata)
+static void xgbe_an73_enable_kr_training(struct xgbe_prv_data *pdata)
 {
 	unsigned int reg;
 
-	/* Enable KR training */
-	xgbe_an_enable_kr_training(pdata);
-
-	/* Set MAC to 10G speed */
-	pdata->hw_if.set_xgmii_speed(pdata);
-
-	/* Set PCS to KR/10G speed */
-	reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2);
-	reg &= ~MDIO_PCS_CTRL2_TYPE;
-	reg |= MDIO_PCS_CTRL2_10GBR;
-	XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL2, reg);
+	reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL);
 
-	reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
-	reg &= ~MDIO_CTRL1_SPEEDSEL;
-	reg |= MDIO_CTRL1_SPEED10G;
-	XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg);
+	reg |= XGBE_KR_TRAINING_ENABLE;
+	XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg);
+}
 
-	xgbe_pcs_power_cycle(pdata);
+static void xgbe_an73_disable_kr_training(struct xgbe_prv_data *pdata)
+{
+	unsigned int reg;
 
-	/* Set SerDes to 10G speed */
-	xgbe_serdes_start_ratechange(pdata);
+	reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL);
 
-	XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, DATARATE, XGBE_SPEED_10000_RATE);
-	XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, WORDMODE, XGBE_SPEED_10000_WORD);
-	XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, PLLSEL, XGBE_SPEED_10000_PLL);
+	reg &= ~XGBE_KR_TRAINING_ENABLE;
+	XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL, reg);
+}
 
-	XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, CDR_RATE,
-			   pdata->serdes_cdr_rate[XGBE_SPEED_10000]);
-	XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, TXAMP,
-			   pdata->serdes_tx_amp[XGBE_SPEED_10000]);
-	XRXTX_IOWRITE_BITS(pdata, RXTX_REG20, BLWC_ENA,
-			   pdata->serdes_blwc[XGBE_SPEED_10000]);
-	XRXTX_IOWRITE_BITS(pdata, RXTX_REG114, PQ_REG,
-			   pdata->serdes_pq_skew[XGBE_SPEED_10000]);
-	XRXTX_IOWRITE_BITS(pdata, RXTX_REG129, RXDFE_CONFIG,
-			   pdata->serdes_dfe_tap_cfg[XGBE_SPEED_10000]);
-	XRXTX_IOWRITE(pdata, RXTX_REG22,
-		      pdata->serdes_dfe_tap_ena[XGBE_SPEED_10000]);
+static void xgbe_kr_mode(struct xgbe_prv_data *pdata)
+{
+	/* Enable KR training */
+	xgbe_an73_enable_kr_training(pdata);
 
-	xgbe_serdes_complete_ratechange(pdata);
+	/* Set MAC to 10G speed */
+	pdata->hw_if.set_speed(pdata, SPEED_10000);
 
-	netif_dbg(pdata, link, pdata->netdev, "10GbE KR mode set\n");
+	/* Call PHY implementation support to complete rate change */
+	pdata->phy_if.phy_impl.set_mode(pdata, XGBE_MODE_KR);
 }
 
-static void xgbe_gmii_2500_mode(struct xgbe_prv_data *pdata)
+static void xgbe_kx_2500_mode(struct xgbe_prv_data *pdata)
 {
-	unsigned int reg;
-
 	/* Disable KR training */
-	xgbe_an_disable_kr_training(pdata);
+	xgbe_an73_disable_kr_training(pdata);
 
 	/* Set MAC to 2.5G speed */
-	pdata->hw_if.set_gmii_2500_speed(pdata);
+	pdata->hw_if.set_speed(pdata, SPEED_2500);
 
-	/* Set PCS to KX/1G speed */
-	reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2);
-	reg &= ~MDIO_PCS_CTRL2_TYPE;
-	reg |= MDIO_PCS_CTRL2_10GBX;
-	XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL2, reg);
+	/* Call PHY implementation support to complete rate change */
+	pdata->phy_if.phy_impl.set_mode(pdata, XGBE_MODE_KX_2500);
+}
 
-	reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
-	reg &= ~MDIO_CTRL1_SPEEDSEL;
-	reg |= MDIO_CTRL1_SPEED1G;
-	XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg);
+static void xgbe_kx_1000_mode(struct xgbe_prv_data *pdata)
+{
+	/* Disable KR training */
+	xgbe_an73_disable_kr_training(pdata);
 
-	xgbe_pcs_power_cycle(pdata);
+	/* Set MAC to 1G speed */
+	pdata->hw_if.set_speed(pdata, SPEED_1000);
 
-	/* Set SerDes to 2.5G speed */
-	xgbe_serdes_start_ratechange(pdata);
+	/* Call PHY implementation support to complete rate change */
+	pdata->phy_if.phy_impl.set_mode(pdata, XGBE_MODE_KX_1000);
+}
 
-	XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, DATARATE, XGBE_SPEED_2500_RATE);
-	XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, WORDMODE, XGBE_SPEED_2500_WORD);
-	XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, PLLSEL, XGBE_SPEED_2500_PLL);
+static void xgbe_sfi_mode(struct xgbe_prv_data *pdata)
+{
+	/* If a KR re-driver is present, change to KR mode instead */
+	if (pdata->kr_redrv)
+		return xgbe_kr_mode(pdata);
 
-	XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, CDR_RATE,
-			   pdata->serdes_cdr_rate[XGBE_SPEED_2500]);
-	XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, TXAMP,
-			   pdata->serdes_tx_amp[XGBE_SPEED_2500]);
-	XRXTX_IOWRITE_BITS(pdata, RXTX_REG20, BLWC_ENA,
-			   pdata->serdes_blwc[XGBE_SPEED_2500]);
-	XRXTX_IOWRITE_BITS(pdata, RXTX_REG114, PQ_REG,
-			   pdata->serdes_pq_skew[XGBE_SPEED_2500]);
-	XRXTX_IOWRITE_BITS(pdata, RXTX_REG129, RXDFE_CONFIG,
-			   pdata->serdes_dfe_tap_cfg[XGBE_SPEED_2500]);
-	XRXTX_IOWRITE(pdata, RXTX_REG22,
-		      pdata->serdes_dfe_tap_ena[XGBE_SPEED_2500]);
+	/* Disable KR training */
+	xgbe_an73_disable_kr_training(pdata);
 
-	xgbe_serdes_complete_ratechange(pdata);
+	/* Set MAC to 10G speed */
+	pdata->hw_if.set_speed(pdata, SPEED_10000);
 
-	netif_dbg(pdata, link, pdata->netdev, "2.5GbE KX mode set\n");
+	/* Call PHY implementation support to complete rate change */
+	pdata->phy_if.phy_impl.set_mode(pdata, XGBE_MODE_SFI);
 }
 
-static void xgbe_gmii_mode(struct xgbe_prv_data *pdata)
+static void xgbe_x_mode(struct xgbe_prv_data *pdata)
 {
-	unsigned int reg;
-
 	/* Disable KR training */
-	xgbe_an_disable_kr_training(pdata);
+	xgbe_an73_disable_kr_training(pdata);
 
 	/* Set MAC to 1G speed */
-	pdata->hw_if.set_gmii_speed(pdata);
+	pdata->hw_if.set_speed(pdata, SPEED_1000);
 
-	/* Set PCS to KX/1G speed */
-	reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2);
-	reg &= ~MDIO_PCS_CTRL2_TYPE;
-	reg |= MDIO_PCS_CTRL2_10GBX;
-	XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL2, reg);
-
-	reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
-	reg &= ~MDIO_CTRL1_SPEEDSEL;
-	reg |= MDIO_CTRL1_SPEED1G;
-	XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg);
+	/* Call PHY implementation support to complete rate change */
+	pdata->phy_if.phy_impl.set_mode(pdata, XGBE_MODE_X);
+}
 
-	xgbe_pcs_power_cycle(pdata);
+static void xgbe_sgmii_1000_mode(struct xgbe_prv_data *pdata)
+{
+	/* Disable KR training */
+	xgbe_an73_disable_kr_training(pdata);
 
-	/* Set SerDes to 1G speed */
-	xgbe_serdes_start_ratechange(pdata);
+	/* Set MAC to 1G speed */
+	pdata->hw_if.set_speed(pdata, SPEED_1000);
 
-	XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, DATARATE, XGBE_SPEED_1000_RATE);
-	XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, WORDMODE, XGBE_SPEED_1000_WORD);
-	XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, PLLSEL, XGBE_SPEED_1000_PLL);
+	/* Call PHY implementation support to complete rate change */
+	pdata->phy_if.phy_impl.set_mode(pdata, XGBE_MODE_SGMII_1000);
+}
 
-	XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, CDR_RATE,
-			   pdata->serdes_cdr_rate[XGBE_SPEED_1000]);
-	XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, TXAMP,
-			   pdata->serdes_tx_amp[XGBE_SPEED_1000]);
-	XRXTX_IOWRITE_BITS(pdata, RXTX_REG20, BLWC_ENA,
-			   pdata->serdes_blwc[XGBE_SPEED_1000]);
-	XRXTX_IOWRITE_BITS(pdata, RXTX_REG114, PQ_REG,
-			   pdata->serdes_pq_skew[XGBE_SPEED_1000]);
-	XRXTX_IOWRITE_BITS(pdata, RXTX_REG129, RXDFE_CONFIG,
-			   pdata->serdes_dfe_tap_cfg[XGBE_SPEED_1000]);
-	XRXTX_IOWRITE(pdata, RXTX_REG22,
-		      pdata->serdes_dfe_tap_ena[XGBE_SPEED_1000]);
+static void xgbe_sgmii_100_mode(struct xgbe_prv_data *pdata)
+{
+	/* Disable KR training */
+	xgbe_an73_disable_kr_training(pdata);
 
-	xgbe_serdes_complete_ratechange(pdata);
+	/* Set MAC to 1G speed */
+	pdata->hw_if.set_speed(pdata, SPEED_1000);
 
-	netif_dbg(pdata, link, pdata->netdev, "1GbE KX mode set\n");
+	/* Call PHY implementation support to complete rate change */
+	pdata->phy_if.phy_impl.set_mode(pdata, XGBE_MODE_SGMII_100);
 }
 
-static void xgbe_cur_mode(struct xgbe_prv_data *pdata,
-			  enum xgbe_mode *mode)
+static enum xgbe_mode xgbe_cur_mode(struct xgbe_prv_data *pdata)
 {
-	unsigned int reg;
-
-	reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2);
-	if ((reg & MDIO_PCS_CTRL2_TYPE) == MDIO_PCS_CTRL2_10GBR)
-		*mode = XGBE_MODE_KR;
-	else
-		*mode = XGBE_MODE_KX;
+	return pdata->phy_if.phy_impl.cur_mode(pdata);
 }
 
 static bool xgbe_in_kr_mode(struct xgbe_prv_data *pdata)
 {
-	enum xgbe_mode mode;
-
-	xgbe_cur_mode(pdata, &mode);
+	return (xgbe_cur_mode(pdata) == XGBE_MODE_KR);
+}
 
-	return (mode == XGBE_MODE_KR);
+static void xgbe_change_mode(struct xgbe_prv_data *pdata,
+			     enum xgbe_mode mode)
+{
+	switch (mode) {
+	case XGBE_MODE_KX_1000:
+		xgbe_kx_1000_mode(pdata);
+		break;
+	case XGBE_MODE_KX_2500:
+		xgbe_kx_2500_mode(pdata);
+		break;
+	case XGBE_MODE_KR:
+		xgbe_kr_mode(pdata);
+		break;
+	case XGBE_MODE_SGMII_100:
+		xgbe_sgmii_100_mode(pdata);
+		break;
+	case XGBE_MODE_SGMII_1000:
+		xgbe_sgmii_1000_mode(pdata);
+		break;
+	case XGBE_MODE_X:
+		xgbe_x_mode(pdata);
+		break;
+	case XGBE_MODE_SFI:
+		xgbe_sfi_mode(pdata);
+		break;
+	case XGBE_MODE_UNKNOWN:
+		break;
+	default:
+		netif_dbg(pdata, link, pdata->netdev,
+			  "invalid operation mode requested (%u)\n", mode);
+	}
 }
 
 static void xgbe_switch_mode(struct xgbe_prv_data *pdata)
 {
-	/* If we are in KR switch to KX, and vice-versa */
-	if (xgbe_in_kr_mode(pdata)) {
-		if (pdata->speed_set == XGBE_SPEEDSET_1000_10000)
-			xgbe_gmii_mode(pdata);
-		else
-			xgbe_gmii_2500_mode(pdata);
-	} else {
-		xgbe_xgmii_mode(pdata);
-	}
+	xgbe_change_mode(pdata, pdata->phy_if.phy_impl.switch_mode(pdata));
 }
 
 static void xgbe_set_mode(struct xgbe_prv_data *pdata,
 			  enum xgbe_mode mode)
 {
-	enum xgbe_mode cur_mode;
+	if (mode == xgbe_cur_mode(pdata))
+		return;
 
-	xgbe_cur_mode(pdata, &cur_mode);
-	if (mode != cur_mode)
-		xgbe_switch_mode(pdata);
+	xgbe_change_mode(pdata, mode);
 }
 
-static bool xgbe_use_xgmii_mode(struct xgbe_prv_data *pdata)
+static bool xgbe_use_mode(struct xgbe_prv_data *pdata,
+			  enum xgbe_mode mode)
 {
-	if (pdata->phy.autoneg == AUTONEG_ENABLE) {
-		if (pdata->phy.advertising & ADVERTISED_10000baseKR_Full)
-			return true;
-	} else {
-		if (pdata->phy.speed == SPEED_10000)
-			return true;
-	}
+	return pdata->phy_if.phy_impl.use_mode(pdata, mode);
+}
+
+static void xgbe_an37_set(struct xgbe_prv_data *pdata, bool enable,
+			  bool restart)
+{
+	unsigned int reg;
+
+	reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_CTRL1);
+	reg &= ~MDIO_VEND2_CTRL1_AN_ENABLE;
 
-	return false;
+	if (enable)
+		reg |= MDIO_VEND2_CTRL1_AN_ENABLE;
+
+	if (restart)
+		reg |= MDIO_VEND2_CTRL1_AN_RESTART;
+
+	XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_CTRL1, reg);
 }
 
-static bool xgbe_use_gmii_2500_mode(struct xgbe_prv_data *pdata)
+static void xgbe_an37_restart(struct xgbe_prv_data *pdata)
 {
-	if (pdata->phy.autoneg == AUTONEG_ENABLE) {
-		if (pdata->phy.advertising & ADVERTISED_2500baseX_Full)
-			return true;
-	} else {
-		if (pdata->phy.speed == SPEED_2500)
-			return true;
-	}
+	xgbe_an37_enable_interrupts(pdata);
+	xgbe_an37_set(pdata, true, true);
 
-	return false;
+	netif_dbg(pdata, link, pdata->netdev, "CL37 AN enabled/restarted\n");
 }
 
-static bool xgbe_use_gmii_mode(struct xgbe_prv_data *pdata)
+static void xgbe_an37_disable(struct xgbe_prv_data *pdata)
 {
-	if (pdata->phy.autoneg == AUTONEG_ENABLE) {
-		if (pdata->phy.advertising & ADVERTISED_1000baseKX_Full)
-			return true;
-	} else {
-		if (pdata->phy.speed == SPEED_1000)
-			return true;
-	}
+	xgbe_an37_set(pdata, false, false);
+	xgbe_an37_disable_interrupts(pdata);
 
-	return false;
+	netif_dbg(pdata, link, pdata->netdev, "CL37 AN disabled\n");
 }
 
-static void xgbe_set_an(struct xgbe_prv_data *pdata, bool enable, bool restart)
+static void xgbe_an73_set(struct xgbe_prv_data *pdata, bool enable,
+			  bool restart)
 {
 	unsigned int reg;
 
@@ -437,22 +418,62 @@ static void xgbe_set_an(struct xgbe_prv_data *pdata, bool enable, bool restart)
 	XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_CTRL1, reg);
 }
 
-static void xgbe_restart_an(struct xgbe_prv_data *pdata)
+static void xgbe_an73_restart(struct xgbe_prv_data *pdata)
+{
+	xgbe_an73_enable_interrupts(pdata);
+	xgbe_an73_set(pdata, true, true);
+
+	netif_dbg(pdata, link, pdata->netdev, "CL73 AN enabled/restarted\n");
+}
+
+static void xgbe_an73_disable(struct xgbe_prv_data *pdata)
 {
-	xgbe_set_an(pdata, true, true);
+	xgbe_an73_set(pdata, false, false);
+	xgbe_an73_disable_interrupts(pdata);
 
-	netif_dbg(pdata, link, pdata->netdev, "AN enabled/restarted\n");
+	netif_dbg(pdata, link, pdata->netdev, "CL73 AN disabled\n");
+}
+
+static void xgbe_an_restart(struct xgbe_prv_data *pdata)
+{
+	switch (pdata->an_mode) {
+	case XGBE_AN_MODE_CL73:
+	case XGBE_AN_MODE_CL73_REDRV:
+		xgbe_an73_restart(pdata);
+		break;
+	case XGBE_AN_MODE_CL37:
+	case XGBE_AN_MODE_CL37_SGMII:
+		xgbe_an37_restart(pdata);
+		break;
+	default:
+		break;
+	}
 }
 
-static void xgbe_disable_an(struct xgbe_prv_data *pdata)
+static void xgbe_an_disable(struct xgbe_prv_data *pdata)
 {
-	xgbe_set_an(pdata, false, false);
+	switch (pdata->an_mode) {
+	case XGBE_AN_MODE_CL73:
+	case XGBE_AN_MODE_CL73_REDRV:
+		xgbe_an73_disable(pdata);
+		break;
+	case XGBE_AN_MODE_CL37:
+	case XGBE_AN_MODE_CL37_SGMII:
+		xgbe_an37_disable(pdata);
+		break;
+	default:
+		break;
+	}
+}
 
-	netif_dbg(pdata, link, pdata->netdev, "AN disabled\n");
+static void xgbe_an_disable_all(struct xgbe_prv_data *pdata)
+{
+	xgbe_an73_disable(pdata);
+	xgbe_an37_disable(pdata);
 }
 
-static enum xgbe_an xgbe_an_tx_training(struct xgbe_prv_data *pdata,
-					enum xgbe_rx *state)
+static enum xgbe_an xgbe_an73_tx_training(struct xgbe_prv_data *pdata,
+					  enum xgbe_rx *state)
 {
 	unsigned int ad_reg, lp_reg, reg;
 
@@ -476,13 +497,15 @@ static enum xgbe_an xgbe_an_tx_training(struct xgbe_prv_data *pdata,
 	/* Start KR training */
 	reg = XMDIO_READ(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL);
 	if (reg & XGBE_KR_TRAINING_ENABLE) {
-		XSIR0_IOWRITE_BITS(pdata, SIR0_KR_RT_1, RESET, 1);
+		if (pdata->phy_if.phy_impl.kr_training_pre)
+			pdata->phy_if.phy_impl.kr_training_pre(pdata);
 
 		reg |= XGBE_KR_TRAINING_START;
 		XMDIO_WRITE(pdata, MDIO_MMD_PMAPMD, MDIO_PMA_10GBR_PMD_CTRL,
 			    reg);
 
-		XSIR0_IOWRITE_BITS(pdata, SIR0_KR_RT_1, RESET, 0);
+		if (pdata->phy_if.phy_impl.kr_training_post)
+			pdata->phy_if.phy_impl.kr_training_post(pdata);
 
 		netif_dbg(pdata, link, pdata->netdev,
 			  "KR training initiated\n");
@@ -491,8 +514,8 @@ static enum xgbe_an xgbe_an_tx_training(struct xgbe_prv_data *pdata,
 	return XGBE_AN_PAGE_RECEIVED;
 }
 
-static enum xgbe_an xgbe_an_tx_xnp(struct xgbe_prv_data *pdata,
-				   enum xgbe_rx *state)
+static enum xgbe_an xgbe_an73_tx_xnp(struct xgbe_prv_data *pdata,
+				     enum xgbe_rx *state)
 {
 	u16 msg;
 
@@ -508,8 +531,8 @@ static enum xgbe_an xgbe_an_tx_xnp(struct xgbe_prv_data *pdata,
 	return XGBE_AN_PAGE_RECEIVED;
 }
 
-static enum xgbe_an xgbe_an_rx_bpa(struct xgbe_prv_data *pdata,
-				   enum xgbe_rx *state)
+static enum xgbe_an xgbe_an73_rx_bpa(struct xgbe_prv_data *pdata,
+				     enum xgbe_rx *state)
 {
 	unsigned int link_support;
 	unsigned int reg, ad_reg, lp_reg;
@@ -528,12 +551,12 @@ static enum xgbe_an xgbe_an_rx_bpa(struct xgbe_prv_data *pdata,
 
 	return ((ad_reg & XGBE_XNP_NP_EXCHANGE) ||
 		(lp_reg & XGBE_XNP_NP_EXCHANGE))
-	       ? xgbe_an_tx_xnp(pdata, state)
-	       : xgbe_an_tx_training(pdata, state);
+	       ? xgbe_an73_tx_xnp(pdata, state)
+	       : xgbe_an73_tx_training(pdata, state);
 }
 
-static enum xgbe_an xgbe_an_rx_xnp(struct xgbe_prv_data *pdata,
-				   enum xgbe_rx *state)
+static enum xgbe_an xgbe_an73_rx_xnp(struct xgbe_prv_data *pdata,
+				     enum xgbe_rx *state)
 {
 	unsigned int ad_reg, lp_reg;
 
@@ -543,11 +566,11 @@ static enum xgbe_an xgbe_an_rx_xnp(struct xgbe_prv_data *pdata,
 
 	return ((ad_reg & XGBE_XNP_NP_EXCHANGE) ||
 		(lp_reg & XGBE_XNP_NP_EXCHANGE))
-	       ? xgbe_an_tx_xnp(pdata, state)
-	       : xgbe_an_tx_training(pdata, state);
+	       ? xgbe_an73_tx_xnp(pdata, state)
+	       : xgbe_an73_tx_training(pdata, state);
 }
 
-static enum xgbe_an xgbe_an_page_received(struct xgbe_prv_data *pdata)
+static enum xgbe_an xgbe_an73_page_received(struct xgbe_prv_data *pdata)
 {
 	enum xgbe_rx *state;
 	unsigned long an_timeout;
@@ -566,20 +589,20 @@ static enum xgbe_an xgbe_an_page_received(struct xgbe_prv_data *pdata)
 			pdata->an_start = jiffies;
 
 			netif_dbg(pdata, link, pdata->netdev,
-				  "AN timed out, resetting state\n");
+				  "CL73 AN timed out, resetting state\n");
 		}
 	}
 
 	state = xgbe_in_kr_mode(pdata) ? &pdata->kr_state
-					   : &pdata->kx_state;
+				       : &pdata->kx_state;
 
 	switch (*state) {
 	case XGBE_RX_BPA:
-		ret = xgbe_an_rx_bpa(pdata, state);
+		ret = xgbe_an73_rx_bpa(pdata, state);
 		break;
 
 	case XGBE_RX_XNP:
-		ret = xgbe_an_rx_xnp(pdata, state);
+		ret = xgbe_an73_rx_xnp(pdata, state);
 		break;
 
 	default:
@@ -589,7 +612,7 @@ static enum xgbe_an xgbe_an_page_received(struct xgbe_prv_data *pdata)
 	return ret;
 }
 
-static enum xgbe_an xgbe_an_incompat_link(struct xgbe_prv_data *pdata)
+static enum xgbe_an xgbe_an73_incompat_link(struct xgbe_prv_data *pdata)
 {
 	/* Be sure we aren't looping trying to negotiate */
 	if (xgbe_in_kr_mode(pdata)) {
@@ -611,23 +634,43 @@ static enum xgbe_an xgbe_an_incompat_link(struct xgbe_prv_data *pdata)
 			return XGBE_AN_NO_LINK;
 	}
 
-	xgbe_disable_an(pdata);
+	xgbe_an73_disable(pdata);
 
 	xgbe_switch_mode(pdata);
 
-	xgbe_restart_an(pdata);
+	xgbe_an73_restart(pdata);
 
 	return XGBE_AN_INCOMPAT_LINK;
 }
 
-static irqreturn_t xgbe_an_isr(int irq, void *data)
+static void xgbe_an37_isr(struct xgbe_prv_data *pdata)
 {
-	struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
+	unsigned int reg;
 
-	netif_dbg(pdata, intr, pdata->netdev, "AN interrupt received\n");
+	/* Disable AN interrupts */
+	xgbe_an37_disable_interrupts(pdata);
+
+	/* Save the interrupt(s) that fired */
+	reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_STAT);
+	pdata->an_int = reg & XGBE_AN_CL37_INT_MASK;
+	pdata->an_status = reg & ~XGBE_AN_CL37_INT_MASK;
 
+	if (pdata->an_int) {
+		/* Clear the interrupt(s) that fired and process them */
+		reg &= ~XGBE_AN_CL37_INT_MASK;
+		XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_STAT, reg);
+
+		queue_work(pdata->an_workqueue, &pdata->an_irq_work);
+	} else {
+		/* Enable AN interrupts */
+		xgbe_an37_enable_interrupts(pdata);
+	}
+}
+
+static void xgbe_an73_isr(struct xgbe_prv_data *pdata)
+{
 	/* Disable AN interrupts */
-	XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, 0);
+	xgbe_an73_disable_interrupts(pdata);
 
 	/* Save the interrupt(s) that fired */
 	pdata->an_int = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_INT);
@@ -639,13 +682,37 @@ static irqreturn_t xgbe_an_isr(int irq, void *data)
 		queue_work(pdata->an_workqueue, &pdata->an_irq_work);
 	} else {
 		/* Enable AN interrupts */
-		XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK,
-			    XGBE_AN_INT_MASK);
+		xgbe_an73_enable_interrupts(pdata);
+	}
+}
+
+static irqreturn_t xgbe_an_isr(int irq, void *data)
+{
+	struct xgbe_prv_data *pdata = (struct xgbe_prv_data *)data;
+
+	netif_dbg(pdata, intr, pdata->netdev, "AN interrupt received\n");
+
+	switch (pdata->an_mode) {
+	case XGBE_AN_MODE_CL73:
+	case XGBE_AN_MODE_CL73_REDRV:
+		xgbe_an73_isr(pdata);
+		break;
+	case XGBE_AN_MODE_CL37:
+	case XGBE_AN_MODE_CL37_SGMII:
+		xgbe_an37_isr(pdata);
+		break;
+	default:
+		break;
 	}
 
 	return IRQ_HANDLED;
 }
 
+static irqreturn_t xgbe_an_combined_isr(int irq, struct xgbe_prv_data *pdata)
+{
+	return xgbe_an_isr(irq, pdata);
+}
+
 static void xgbe_an_irq_work(struct work_struct *work)
 {
 	struct xgbe_prv_data *pdata = container_of(work,
@@ -679,36 +746,87 @@ static const char *xgbe_state_as_string(enum xgbe_an state)
 	}
 }
 
-static void xgbe_an_state_machine(struct work_struct *work)
+static void xgbe_an37_state_machine(struct xgbe_prv_data *pdata)
 {
-	struct xgbe_prv_data *pdata = container_of(work,
-						   struct xgbe_prv_data,
-						   an_work);
 	enum xgbe_an cur_state = pdata->an_state;
 
-	mutex_lock(&pdata->an_mutex);
+	if (!pdata->an_int)
+		return;
+
+	if (pdata->an_int & XGBE_AN_CL37_INT_CMPLT) {
+		pdata->an_state = XGBE_AN_COMPLETE;
+		pdata->an_int &= ~XGBE_AN_CL37_INT_CMPLT;
+
+		/* If SGMII is enabled, check the link status */
+		if ((pdata->an_mode == XGBE_AN_MODE_CL37_SGMII) &&
+		    !(pdata->an_status & XGBE_SGMII_AN_LINK_STATUS))
+			pdata->an_state = XGBE_AN_NO_LINK;
+	}
+
+	netif_dbg(pdata, link, pdata->netdev, "CL37 AN %s\n",
+		  xgbe_state_as_string(pdata->an_state));
+
+	cur_state = pdata->an_state;
+
+	switch (pdata->an_state) {
+	case XGBE_AN_READY:
+		break;
+
+	case XGBE_AN_COMPLETE:
+		netif_dbg(pdata, link, pdata->netdev,
+			  "Auto negotiation successful\n");
+		break;
+
+	case XGBE_AN_NO_LINK:
+		break;
+
+	default:
+		pdata->an_state = XGBE_AN_ERROR;
+	}
+
+	if (pdata->an_state == XGBE_AN_ERROR) {
+		netdev_err(pdata->netdev,
+			   "error during auto-negotiation, state=%u\n",
+			   cur_state);
+
+		pdata->an_int = 0;
+		xgbe_an37_clear_interrupts(pdata);
+	}
+
+	if (pdata->an_state >= XGBE_AN_COMPLETE) {
+		pdata->an_result = pdata->an_state;
+		pdata->an_state = XGBE_AN_READY;
+
+		netif_dbg(pdata, link, pdata->netdev, "CL37 AN result: %s\n",
+			  xgbe_state_as_string(pdata->an_result));
+	}
+
+	xgbe_an37_enable_interrupts(pdata);
+}
+
+static void xgbe_an73_state_machine(struct xgbe_prv_data *pdata)
+{
+	enum xgbe_an cur_state = pdata->an_state;
 
 	if (!pdata->an_int)
-		goto out;
+		return;
 
 next_int:
-	if (pdata->an_int & XGBE_AN_PG_RCV) {
+	if (pdata->an_int & XGBE_AN_CL73_PG_RCV) {
 		pdata->an_state = XGBE_AN_PAGE_RECEIVED;
-		pdata->an_int &= ~XGBE_AN_PG_RCV;
-	} else if (pdata->an_int & XGBE_AN_INC_LINK) {
+		pdata->an_int &= ~XGBE_AN_CL73_PG_RCV;
+	} else if (pdata->an_int & XGBE_AN_CL73_INC_LINK) {
 		pdata->an_state = XGBE_AN_INCOMPAT_LINK;
-		pdata->an_int &= ~XGBE_AN_INC_LINK;
-	} else if (pdata->an_int & XGBE_AN_INT_CMPLT) {
+		pdata->an_int &= ~XGBE_AN_CL73_INC_LINK;
+	} else if (pdata->an_int & XGBE_AN_CL73_INT_CMPLT) {
 		pdata->an_state = XGBE_AN_COMPLETE;
-		pdata->an_int &= ~XGBE_AN_INT_CMPLT;
+		pdata->an_int &= ~XGBE_AN_CL73_INT_CMPLT;
 	} else {
 		pdata->an_state = XGBE_AN_ERROR;
 	}
 
-	pdata->an_result = pdata->an_state;
-
 again:
-	netif_dbg(pdata, link, pdata->netdev, "AN %s\n",
+	netif_dbg(pdata, link, pdata->netdev, "CL73 AN %s\n",
 		  xgbe_state_as_string(pdata->an_state));
 
 	cur_state = pdata->an_state;
@@ -719,14 +837,14 @@ again:
 		break;
 
 	case XGBE_AN_PAGE_RECEIVED:
-		pdata->an_state = xgbe_an_page_received(pdata);
+		pdata->an_state = xgbe_an73_page_received(pdata);
 		pdata->an_supported++;
 		break;
 
 	case XGBE_AN_INCOMPAT_LINK:
 		pdata->an_supported = 0;
 		pdata->parallel_detect = 0;
-		pdata->an_state = xgbe_an_incompat_link(pdata);
+		pdata->an_state = xgbe_an73_incompat_link(pdata);
 		break;
 
 	case XGBE_AN_COMPLETE:
@@ -745,14 +863,14 @@ again:
 
 	if (pdata->an_state == XGBE_AN_NO_LINK) {
 		pdata->an_int = 0;
-		XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INT, 0);
+		xgbe_an73_clear_interrupts(pdata);
 	} else if (pdata->an_state == XGBE_AN_ERROR) {
 		netdev_err(pdata->netdev,
 			   "error during auto-negotiation, state=%u\n",
 			   cur_state);
 
 		pdata->an_int = 0;
-		XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INT, 0);
+		xgbe_an73_clear_interrupts(pdata);
 	}
 
 	if (pdata->an_state >= XGBE_AN_COMPLETE) {
@@ -762,7 +880,7 @@ again:
 		pdata->kx_state = XGBE_RX_BPA;
 		pdata->an_start = 0;
 
-		netif_dbg(pdata, link, pdata->netdev, "AN result: %s\n",
+		netif_dbg(pdata, link, pdata->netdev, "CL73 AN result: %s\n",
 			  xgbe_state_as_string(pdata->an_result));
 	}
 
@@ -772,20 +890,88 @@ again:
 	if (pdata->an_int)
 		goto next_int;
 
-out:
-	/* Enable AN interrupts on the way out */
-	XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, XGBE_AN_INT_MASK);
+	xgbe_an73_enable_interrupts(pdata);
+}
+
+static void xgbe_an_state_machine(struct work_struct *work)
+{
+	struct xgbe_prv_data *pdata = container_of(work,
+						   struct xgbe_prv_data,
+						   an_work);
+
+	mutex_lock(&pdata->an_mutex);
+
+	switch (pdata->an_mode) {
+	case XGBE_AN_MODE_CL73:
+	case XGBE_AN_MODE_CL73_REDRV:
+		xgbe_an73_state_machine(pdata);
+		break;
+	case XGBE_AN_MODE_CL37:
+	case XGBE_AN_MODE_CL37_SGMII:
+		xgbe_an37_state_machine(pdata);
+		break;
+	default:
+		break;
+	}
 
 	mutex_unlock(&pdata->an_mutex);
 }
 
-static void xgbe_an_init(struct xgbe_prv_data *pdata)
+static void xgbe_an37_init(struct xgbe_prv_data *pdata)
 {
-	unsigned int reg;
+	unsigned int advertising, reg;
+
+	advertising = pdata->phy_if.phy_impl.an_advertising(pdata);
+
+	/* Set up Advertisement register */
+	reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_ADVERTISE);
+	if (advertising & ADVERTISED_Pause)
+		reg |= 0x100;
+	else
+		reg &= ~0x100;
+
+	if (advertising & ADVERTISED_Asym_Pause)
+		reg |= 0x80;
+	else
+		reg &= ~0x80;
+
+	/* Full duplex, but not half */
+	reg |= XGBE_AN_CL37_FD_MASK;
+	reg &= ~XGBE_AN_CL37_HD_MASK;
+
+	XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_ADVERTISE, reg);
+
+	/* Set up the Control register */
+	reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_CTRL);
+	reg &= ~XGBE_AN_CL37_TX_CONFIG_MASK;
+	reg &= ~XGBE_AN_CL37_PCS_MODE_MASK;
+
+	switch (pdata->an_mode) {
+	case XGBE_AN_MODE_CL37:
+		reg |= XGBE_AN_CL37_PCS_MODE_BASEX;
+		break;
+	case XGBE_AN_MODE_CL37_SGMII:
+		reg |= XGBE_AN_CL37_PCS_MODE_SGMII;
+		break;
+	default:
+		break;
+	}
+
+	XMDIO_WRITE(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_CTRL, reg);
+
+	netif_dbg(pdata, link, pdata->netdev, "CL37 AN (%s) initialized\n",
+		  (pdata->an_mode == XGBE_AN_MODE_CL37) ? "BaseX" : "SGMII");
+}
+
+static void xgbe_an73_init(struct xgbe_prv_data *pdata)
+{
+	unsigned int advertising, reg;
+
+	advertising = pdata->phy_if.phy_impl.an_advertising(pdata);
 
 	/* Set up Advertisement register 3 first */
 	reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2);
-	if (pdata->phy.advertising & ADVERTISED_10000baseR_FEC)
+	if (advertising & ADVERTISED_10000baseR_FEC)
 		reg |= 0xc000;
 	else
 		reg &= ~0xc000;
@@ -794,13 +980,13 @@ static void xgbe_an_init(struct xgbe_prv_data *pdata)
 
 	/* Set up Advertisement register 2 next */
 	reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1);
-	if (pdata->phy.advertising & ADVERTISED_10000baseKR_Full)
+	if (advertising & ADVERTISED_10000baseKR_Full)
 		reg |= 0x80;
 	else
 		reg &= ~0x80;
 
-	if ((pdata->phy.advertising & ADVERTISED_1000baseKX_Full) ||
-	    (pdata->phy.advertising & ADVERTISED_2500baseX_Full))
+	if ((advertising & ADVERTISED_1000baseKX_Full) ||
+	    (advertising & ADVERTISED_2500baseX_Full))
 		reg |= 0x20;
 	else
 		reg &= ~0x20;
@@ -809,12 +995,12 @@ static void xgbe_an_init(struct xgbe_prv_data *pdata)
 
 	/* Set up Advertisement register 1 last */
 	reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE);
-	if (pdata->phy.advertising & ADVERTISED_Pause)
+	if (advertising & ADVERTISED_Pause)
 		reg |= 0x400;
 	else
 		reg &= ~0x400;
 
-	if (pdata->phy.advertising & ADVERTISED_Asym_Pause)
+	if (advertising & ADVERTISED_Asym_Pause)
 		reg |= 0x800;
 	else
 		reg &= ~0x800;
@@ -824,7 +1010,25 @@ static void xgbe_an_init(struct xgbe_prv_data *pdata)
 
 	XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE, reg);
 
-	netif_dbg(pdata, link, pdata->netdev, "AN initialized\n");
+	netif_dbg(pdata, link, pdata->netdev, "CL73 AN initialized\n");
+}
+
+static void xgbe_an_init(struct xgbe_prv_data *pdata)
+{
+	/* Set up advertisement registers based on current settings */
+	pdata->an_mode = pdata->phy_if.phy_impl.an_mode(pdata);
+	switch (pdata->an_mode) {
+	case XGBE_AN_MODE_CL73:
+	case XGBE_AN_MODE_CL73_REDRV:
+		xgbe_an73_init(pdata);
+		break;
+	case XGBE_AN_MODE_CL37:
+	case XGBE_AN_MODE_CL37_SGMII:
+		xgbe_an37_init(pdata);
+		break;
+	default:
+		break;
+	}
 }
 
 static const char *xgbe_phy_fc_string(struct xgbe_prv_data *pdata)
@@ -842,6 +1046,8 @@ static const char *xgbe_phy_fc_string(struct xgbe_prv_data *pdata)
 static const char *xgbe_phy_speed_string(int speed)
 {
 	switch (speed) {
+	case SPEED_100:
+		return "100Mbps";
 	case SPEED_1000:
 		return "1Gbps";
 	case SPEED_2500:
@@ -907,24 +1113,32 @@ static void xgbe_phy_adjust_link(struct xgbe_prv_data *pdata)
 		xgbe_phy_print_status(pdata);
 }
 
+static bool xgbe_phy_valid_speed(struct xgbe_prv_data *pdata, int speed)
+{
+	return pdata->phy_if.phy_impl.valid_speed(pdata, speed);
+}
+
 static int xgbe_phy_config_fixed(struct xgbe_prv_data *pdata)
 {
+	enum xgbe_mode mode;
+
 	netif_dbg(pdata, link, pdata->netdev, "fixed PHY configuration\n");
 
 	/* Disable auto-negotiation */
-	xgbe_disable_an(pdata);
-
-	/* Validate/Set specified speed */
-	switch (pdata->phy.speed) {
-	case SPEED_10000:
-		xgbe_set_mode(pdata, XGBE_MODE_KR);
+	xgbe_an_disable(pdata);
+
+	/* Set specified mode for specified speed */
+	mode = pdata->phy_if.phy_impl.get_mode(pdata, pdata->phy.speed);
+	switch (mode) {
+	case XGBE_MODE_KX_1000:
+	case XGBE_MODE_KX_2500:
+	case XGBE_MODE_KR:
+	case XGBE_MODE_SGMII_100:
+	case XGBE_MODE_SGMII_1000:
+	case XGBE_MODE_X:
+	case XGBE_MODE_SFI:
 		break;
-
-	case SPEED_2500:
-	case SPEED_1000:
-		xgbe_set_mode(pdata, XGBE_MODE_KX);
-		break;
-
+	case XGBE_MODE_UNKNOWN:
 	default:
 		return -EINVAL;
 	}
@@ -933,38 +1147,60 @@ static int xgbe_phy_config_fixed(struct xgbe_prv_data *pdata)
 	if (pdata->phy.duplex != DUPLEX_FULL)
 		return -EINVAL;
 
+	xgbe_set_mode(pdata, mode);
+
 	return 0;
 }
 
 static int __xgbe_phy_config_aneg(struct xgbe_prv_data *pdata)
 {
+	int ret;
+
 	set_bit(XGBE_LINK_INIT, &pdata->dev_state);
 	pdata->link_check = jiffies;
 
-	if (pdata->phy.autoneg != AUTONEG_ENABLE)
-		return xgbe_phy_config_fixed(pdata);
+	ret = pdata->phy_if.phy_impl.an_config(pdata);
+	if (ret)
+		return ret;
+
+	if (pdata->phy.autoneg != AUTONEG_ENABLE) {
+		ret = xgbe_phy_config_fixed(pdata);
+		if (ret || !pdata->kr_redrv)
+			return ret;
 
-	netif_dbg(pdata, link, pdata->netdev, "AN PHY configuration\n");
+		netif_dbg(pdata, link, pdata->netdev, "AN redriver support\n");
+	} else {
+		netif_dbg(pdata, link, pdata->netdev, "AN PHY configuration\n");
+	}
 
 	/* Disable auto-negotiation interrupt */
 	disable_irq(pdata->an_irq);
 
 	/* Start auto-negotiation in a supported mode */
-	if (pdata->phy.advertising & ADVERTISED_10000baseKR_Full) {
+	if (xgbe_use_mode(pdata, XGBE_MODE_KR)) {
 		xgbe_set_mode(pdata, XGBE_MODE_KR);
-	} else if ((pdata->phy.advertising & ADVERTISED_1000baseKX_Full) ||
-		   (pdata->phy.advertising & ADVERTISED_2500baseX_Full)) {
-		xgbe_set_mode(pdata, XGBE_MODE_KX);
+	} else if (xgbe_use_mode(pdata, XGBE_MODE_KX_2500)) {
+		xgbe_set_mode(pdata, XGBE_MODE_KX_2500);
+	} else if (xgbe_use_mode(pdata, XGBE_MODE_KX_1000)) {
+		xgbe_set_mode(pdata, XGBE_MODE_KX_1000);
+	} else if (xgbe_use_mode(pdata, XGBE_MODE_SFI)) {
+		xgbe_set_mode(pdata, XGBE_MODE_SFI);
+	} else if (xgbe_use_mode(pdata, XGBE_MODE_X)) {
+		xgbe_set_mode(pdata, XGBE_MODE_X);
+	} else if (xgbe_use_mode(pdata, XGBE_MODE_SGMII_1000)) {
+		xgbe_set_mode(pdata, XGBE_MODE_SGMII_1000);
+	} else if (xgbe_use_mode(pdata, XGBE_MODE_SGMII_100)) {
+		xgbe_set_mode(pdata, XGBE_MODE_SGMII_100);
 	} else {
 		enable_irq(pdata->an_irq);
 		return -EINVAL;
 	}
 
 	/* Disable and stop any in progress auto-negotiation */
-	xgbe_disable_an(pdata);
+	xgbe_an_disable_all(pdata);
 
 	/* Clear any auto-negotitation interrupts */
-	XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INT, 0);
+	xgbe_an_clear_interrupts_all(pdata);
 
 	pdata->an_result = XGBE_AN_READY;
 	pdata->an_state = XGBE_AN_READY;
@@ -974,11 +1210,8 @@ static int __xgbe_phy_config_aneg(struct xgbe_prv_data *pdata)
 	/* Re-enable auto-negotiation interrupt */
 	enable_irq(pdata->an_irq);
 
-	/* Set up advertisement registers based on current settings */
 	xgbe_an_init(pdata);
-
-	/* Enable and start auto-negotiation */
-	xgbe_restart_an(pdata);
+	xgbe_an_restart(pdata);
 
 	return 0;
 }
@@ -1016,108 +1249,52 @@ static void xgbe_check_link_timeout(struct xgbe_prv_data *pdata)
 	}
 }
 
-static void xgbe_phy_status_force(struct xgbe_prv_data *pdata)
+static enum xgbe_mode xgbe_phy_status_aneg(struct xgbe_prv_data *pdata)
 {
-	if (xgbe_in_kr_mode(pdata)) {
-		pdata->phy.speed = SPEED_10000;
-	} else {
-		switch (pdata->speed_set) {
-		case XGBE_SPEEDSET_1000_10000:
-			pdata->phy.speed = SPEED_1000;
-			break;
-
-		case XGBE_SPEEDSET_2500_10000:
-			pdata->phy.speed = SPEED_2500;
-			break;
-		}
-	}
-	pdata->phy.duplex = DUPLEX_FULL;
+	return pdata->phy_if.phy_impl.an_outcome(pdata);
 }
 
-static void xgbe_phy_status_aneg(struct xgbe_prv_data *pdata)
+static void xgbe_phy_status_result(struct xgbe_prv_data *pdata)
 {
-	unsigned int ad_reg, lp_reg;
+	enum xgbe_mode mode;
 
 	pdata->phy.lp_advertising = 0;
 
 	if ((pdata->phy.autoneg != AUTONEG_ENABLE) || pdata->parallel_detect)
-		return xgbe_phy_status_force(pdata);
-
-	pdata->phy.lp_advertising |= ADVERTISED_Autoneg;
-	pdata->phy.lp_advertising |= ADVERTISED_Backplane;
-
-	/* Compare Advertisement and Link Partner register 1 */
-	ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE);
-	lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA);
-	if (lp_reg & 0x400)
-		pdata->phy.lp_advertising |= ADVERTISED_Pause;
-	if (lp_reg & 0x800)
-		pdata->phy.lp_advertising |= ADVERTISED_Asym_Pause;
-
-	if (pdata->phy.pause_autoneg) {
-		/* Set flow control based on auto-negotiation result */
-		pdata->phy.tx_pause = 0;
-		pdata->phy.rx_pause = 0;
-
-		if (ad_reg & lp_reg & 0x400) {
-			pdata->phy.tx_pause = 1;
-			pdata->phy.rx_pause = 1;
-		} else if (ad_reg & lp_reg & 0x800) {
-			if (ad_reg & 0x400)
-				pdata->phy.rx_pause = 1;
-			else if (lp_reg & 0x400)
-				pdata->phy.tx_pause = 1;
-		}
-	}
-
-	/* Compare Advertisement and Link Partner register 2 */
-	ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1);
-	lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 1);
-	if (lp_reg & 0x80)
-		pdata->phy.lp_advertising |= ADVERTISED_10000baseKR_Full;
-	if (lp_reg & 0x20) {
-		switch (pdata->speed_set) {
-		case XGBE_SPEEDSET_1000_10000:
-			pdata->phy.lp_advertising |= ADVERTISED_1000baseKX_Full;
-			break;
-		case XGBE_SPEEDSET_2500_10000:
-			pdata->phy.lp_advertising |= ADVERTISED_2500baseX_Full;
-			break;
-		}
-	}
+		mode = xgbe_cur_mode(pdata);
+	else
+		mode = xgbe_phy_status_aneg(pdata);
 
-	ad_reg &= lp_reg;
-	if (ad_reg & 0x80) {
+	switch (mode) {
+	case XGBE_MODE_SGMII_100:
+		pdata->phy.speed = SPEED_100;
+		break;
+	case XGBE_MODE_X:
+	case XGBE_MODE_KX_1000:
+	case XGBE_MODE_SGMII_1000:
+		pdata->phy.speed = SPEED_1000;
+		break;
+	case XGBE_MODE_KX_2500:
+		pdata->phy.speed = SPEED_2500;
+		break;
+	case XGBE_MODE_KR:
+	case XGBE_MODE_SFI:
 		pdata->phy.speed = SPEED_10000;
-		xgbe_set_mode(pdata, XGBE_MODE_KR);
-	} else if (ad_reg & 0x20) {
-		switch (pdata->speed_set) {
-		case XGBE_SPEEDSET_1000_10000:
-			pdata->phy.speed = SPEED_1000;
-			break;
-
-		case XGBE_SPEEDSET_2500_10000:
-			pdata->phy.speed = SPEED_2500;
-			break;
-		}
-
-		xgbe_set_mode(pdata, XGBE_MODE_KX);
-	} else {
+		break;
+	case XGBE_MODE_UNKNOWN:
+	default:
 		pdata->phy.speed = SPEED_UNKNOWN;
 	}
 
-	/* Compare Advertisement and Link Partner register 3 */
-	ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2);
-	lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 2);
-	if (lp_reg & 0xc000)
-		pdata->phy.lp_advertising |= ADVERTISED_10000baseR_FEC;
-
 	pdata->phy.duplex = DUPLEX_FULL;
+
+	xgbe_set_mode(pdata, mode);
 }
 
 static void xgbe_phy_status(struct xgbe_prv_data *pdata)
 {
-	unsigned int reg, link_aneg;
+	unsigned int link_aneg;
+	int an_restart;
 
 	if (test_bit(XGBE_LINK_ERR, &pdata->dev_state)) {
 		netif_carrier_off(pdata->netdev);
@@ -1128,12 +1305,12 @@ static void xgbe_phy_status(struct xgbe_prv_data *pdata)
 
 	link_aneg = (pdata->phy.autoneg == AUTONEG_ENABLE);
 
-	/* Get the link status. Link status is latched low, so read
-	 * once to clear and then read again to get current state
-	 */
-	reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1);
-	reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1);
-	pdata->phy.link = (reg & MDIO_STAT1_LSTATUS) ? 1 : 0;
+	pdata->phy.link = pdata->phy_if.phy_impl.link_status(pdata,
+							     &an_restart);
+	if (an_restart) {
+		xgbe_phy_config_aneg(pdata);
+		return;
+	}
 
 	if (pdata->phy.link) {
 		if (link_aneg && !xgbe_phy_aneg_done(pdata)) {
@@ -1141,7 +1318,7 @@ static void xgbe_phy_status(struct xgbe_prv_data *pdata)
 			return;
 		}
 
-		xgbe_phy_status_aneg(pdata);
+		xgbe_phy_status_result(pdata);
 
 		if (test_bit(XGBE_LINK_INIT, &pdata->dev_state))
 			clear_bit(XGBE_LINK_INIT, &pdata->dev_state);
@@ -1155,7 +1332,7 @@ static void xgbe_phy_status(struct xgbe_prv_data *pdata)
 				return;
 		}
 
-		xgbe_phy_status_aneg(pdata);
+		xgbe_phy_status_result(pdata);
 
 		netif_carrier_off(pdata->netdev);
 	}
@@ -1168,13 +1345,19 @@ static void xgbe_phy_stop(struct xgbe_prv_data *pdata)
 {
 	netif_dbg(pdata, link, pdata->netdev, "stopping PHY\n");
 
+	if (!pdata->phy_started)
+		return;
+
+	/* Indicate the PHY is down */
+	pdata->phy_started = 0;
+
 	/* Disable auto-negotiation */
-	xgbe_disable_an(pdata);
+	xgbe_an_disable_all(pdata);
 
-	/* Disable auto-negotiation interrupts */
-	XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, 0);
+	if (pdata->dev_irq != pdata->an_irq)
+		devm_free_irq(pdata->dev, pdata->an_irq, pdata);
 
-	devm_free_irq(pdata->dev, pdata->an_irq, pdata);
+	pdata->phy_if.phy_impl.stop(pdata);
 
 	pdata->phy.link = 0;
 	netif_carrier_off(pdata->netdev);
@@ -1189,64 +1372,74 @@ static int xgbe_phy_start(struct xgbe_prv_data *pdata)
 
 	netif_dbg(pdata, link, pdata->netdev, "starting PHY\n");
 
-	ret = devm_request_irq(pdata->dev, pdata->an_irq,
-			       xgbe_an_isr, 0, pdata->an_name,
-			       pdata);
-	if (ret) {
-		netdev_err(netdev, "phy irq request failed\n");
+	ret = pdata->phy_if.phy_impl.start(pdata);
+	if (ret)
 		return ret;
+
+	/* If we have a separate AN irq, enable it */
+	if (pdata->dev_irq != pdata->an_irq) {
+		ret = devm_request_irq(pdata->dev, pdata->an_irq,
+				       xgbe_an_isr, 0, pdata->an_name,
+				       pdata);
+		if (ret) {
+			netdev_err(netdev, "phy irq request failed\n");
+			goto err_stop;
+		}
 	}
 
 	/* Set initial mode - call the mode setting routines
 	 * directly to insure we are properly configured
 	 */
-	if (xgbe_use_xgmii_mode(pdata)) {
-		xgbe_xgmii_mode(pdata);
-	} else if (xgbe_use_gmii_mode(pdata)) {
-		xgbe_gmii_mode(pdata);
-	} else if (xgbe_use_gmii_2500_mode(pdata)) {
-		xgbe_gmii_2500_mode(pdata);
+	if (xgbe_use_mode(pdata, XGBE_MODE_KR)) {
+		xgbe_kr_mode(pdata);
+	} else if (xgbe_use_mode(pdata, XGBE_MODE_KX_2500)) {
+		xgbe_kx_2500_mode(pdata);
+	} else if (xgbe_use_mode(pdata, XGBE_MODE_KX_1000)) {
+		xgbe_kx_1000_mode(pdata);
+	} else if (xgbe_use_mode(pdata, XGBE_MODE_SFI)) {
+		xgbe_sfi_mode(pdata);
+	} else if (xgbe_use_mode(pdata, XGBE_MODE_X)) {
+		xgbe_x_mode(pdata);
+	} else if (xgbe_use_mode(pdata, XGBE_MODE_SGMII_1000)) {
+		xgbe_sgmii_1000_mode(pdata);
+	} else if (xgbe_use_mode(pdata, XGBE_MODE_SGMII_100)) {
+		xgbe_sgmii_100_mode(pdata);
 	} else {
 		ret = -EINVAL;
 		goto err_irq;
 	}
 
-	/* Set up advertisement registers based on current settings */
-	xgbe_an_init(pdata);
+	/* Indicate the PHY is up and running */
+	pdata->phy_started = 1;
 
-	/* Enable auto-negotiation interrupts */
-	XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INTMASK, 0x07);
+	xgbe_an_init(pdata);
+	xgbe_an_enable_interrupts(pdata);
 
 	return xgbe_phy_config_aneg(pdata);
 
 err_irq:
-	devm_free_irq(pdata->dev, pdata->an_irq, pdata);
+	if (pdata->dev_irq != pdata->an_irq)
+		devm_free_irq(pdata->dev, pdata->an_irq, pdata);
+
+err_stop:
+	pdata->phy_if.phy_impl.stop(pdata);
 
 	return ret;
 }
 
 static int xgbe_phy_reset(struct xgbe_prv_data *pdata)
 {
-	unsigned int count, reg;
-
-	reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
-	reg |= MDIO_CTRL1_RESET;
-	XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg);
-
-	count = 50;
-	do {
-		msleep(20);
-		reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
-	} while ((reg & MDIO_CTRL1_RESET) && --count);
+	int ret;
 
-	if (reg & MDIO_CTRL1_RESET)
-		return -ETIMEDOUT;
+	ret = pdata->phy_if.phy_impl.reset(pdata);
+	if (ret)
+		return ret;
 
 	/* Disable auto-negotiation for now */
-	xgbe_disable_an(pdata);
+	xgbe_an_disable_all(pdata);
 
 	/* Clear auto-negotiation interrupts */
-	XMDIO_WRITE(pdata, MDIO_MMD_AN, MDIO_AN_INT, 0);
+	xgbe_an_clear_interrupts_all(pdata);
 
 	return 0;
 }
@@ -1257,74 +1450,96 @@ static void xgbe_dump_phy_registers(struct xgbe_prv_data *pdata)
 
 	dev_dbg(dev, "\n************* PHY Reg dump **********************\n");
 
-	dev_dbg(dev, "PCS Control Reg (%#04x) = %#04x\n", MDIO_CTRL1,
+	dev_dbg(dev, "PCS Control Reg (%#06x) = %#06x\n", MDIO_CTRL1,
 		XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1));
-	dev_dbg(dev, "PCS Status Reg (%#04x) = %#04x\n", MDIO_STAT1,
+	dev_dbg(dev, "PCS Status Reg (%#06x) = %#06x\n", MDIO_STAT1,
 		XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1));
-	dev_dbg(dev, "Phy Id (PHYS ID 1 %#04x)= %#04x\n", MDIO_DEVID1,
+	dev_dbg(dev, "Phy Id (PHYS ID 1 %#06x)= %#06x\n", MDIO_DEVID1,
 		XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVID1));
-	dev_dbg(dev, "Phy Id (PHYS ID 2 %#04x)= %#04x\n", MDIO_DEVID2,
+	dev_dbg(dev, "Phy Id (PHYS ID 2 %#06x)= %#06x\n", MDIO_DEVID2,
 		XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVID2));
-	dev_dbg(dev, "Devices in Package (%#04x)= %#04x\n", MDIO_DEVS1,
+	dev_dbg(dev, "Devices in Package (%#06x)= %#06x\n", MDIO_DEVS1,
 		XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVS1));
-	dev_dbg(dev, "Devices in Package (%#04x)= %#04x\n", MDIO_DEVS2,
+	dev_dbg(dev, "Devices in Package (%#06x)= %#06x\n", MDIO_DEVS2,
 		XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_DEVS2));
 
-	dev_dbg(dev, "Auto-Neg Control Reg (%#04x) = %#04x\n", MDIO_CTRL1,
+	dev_dbg(dev, "Auto-Neg Control Reg (%#06x) = %#06x\n", MDIO_CTRL1,
 		XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_CTRL1));
-	dev_dbg(dev, "Auto-Neg Status Reg (%#04x) = %#04x\n", MDIO_STAT1,
+	dev_dbg(dev, "Auto-Neg Status Reg (%#06x) = %#06x\n", MDIO_STAT1,
 		XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_STAT1));
-	dev_dbg(dev, "Auto-Neg Ad Reg 1 (%#04x) = %#04x\n",
+	dev_dbg(dev, "Auto-Neg Ad Reg 1 (%#06x) = %#06x\n",
 		MDIO_AN_ADVERTISE,
 		XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE));
-	dev_dbg(dev, "Auto-Neg Ad Reg 2 (%#04x) = %#04x\n",
+	dev_dbg(dev, "Auto-Neg Ad Reg 2 (%#06x) = %#06x\n",
 		MDIO_AN_ADVERTISE + 1,
 		XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1));
-	dev_dbg(dev, "Auto-Neg Ad Reg 3 (%#04x) = %#04x\n",
+	dev_dbg(dev, "Auto-Neg Ad Reg 3 (%#06x) = %#06x\n",
 		MDIO_AN_ADVERTISE + 2,
 		XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2));
-	dev_dbg(dev, "Auto-Neg Completion Reg (%#04x) = %#04x\n",
+	dev_dbg(dev, "Auto-Neg Completion Reg (%#06x) = %#06x\n",
 		MDIO_AN_COMP_STAT,
 		XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_COMP_STAT));
 
 	dev_dbg(dev, "\n*************************************************\n");
 }
 
-static void xgbe_phy_init(struct xgbe_prv_data *pdata)
+static int xgbe_phy_best_advertised_speed(struct xgbe_prv_data *pdata)
 {
+	if (pdata->phy.advertising & ADVERTISED_10000baseKR_Full)
+		return SPEED_10000;
+	else if (pdata->phy.advertising & ADVERTISED_10000baseT_Full)
+		return SPEED_10000;
+	else if (pdata->phy.advertising & ADVERTISED_2500baseX_Full)
+		return SPEED_2500;
+	else if (pdata->phy.advertising & ADVERTISED_1000baseKX_Full)
+		return SPEED_1000;
+	else if (pdata->phy.advertising & ADVERTISED_1000baseT_Full)
+		return SPEED_1000;
+	else if (pdata->phy.advertising & ADVERTISED_100baseT_Full)
+		return SPEED_100;
+
+	return SPEED_UNKNOWN;
+}
+
+static void xgbe_phy_exit(struct xgbe_prv_data *pdata)
+{
+	xgbe_phy_stop(pdata);
+
+	pdata->phy_if.phy_impl.exit(pdata);
+}
+
+static int xgbe_phy_init(struct xgbe_prv_data *pdata)
+{
+	int ret;
+
 	mutex_init(&pdata->an_mutex);
 	INIT_WORK(&pdata->an_irq_work, xgbe_an_irq_work);
 	INIT_WORK(&pdata->an_work, xgbe_an_state_machine);
 	pdata->mdio_mmd = MDIO_MMD_PCS;
 
-	/* Initialize supported features */
-	pdata->phy.supported = SUPPORTED_Autoneg;
-	pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
-	pdata->phy.supported |= SUPPORTED_Backplane;
-	pdata->phy.supported |= SUPPORTED_10000baseKR_Full;
-	switch (pdata->speed_set) {
-	case XGBE_SPEEDSET_1000_10000:
-		pdata->phy.supported |= SUPPORTED_1000baseKX_Full;
-		break;
-	case XGBE_SPEEDSET_2500_10000:
-		pdata->phy.supported |= SUPPORTED_2500baseX_Full;
-		break;
-	}
-
+	/* Check for FEC support */
 	pdata->fec_ability = XMDIO_READ(pdata, MDIO_MMD_PMAPMD,
 					MDIO_PMA_10GBR_FECABLE);
 	pdata->fec_ability &= (MDIO_PMA_10GBR_FECABLE_ABLE |
 			       MDIO_PMA_10GBR_FECABLE_ERRABLE);
-	if (pdata->fec_ability & MDIO_PMA_10GBR_FECABLE_ABLE)
-		pdata->phy.supported |= SUPPORTED_10000baseR_FEC;
 
+	/* Setup the phy (including supported features) */
+	ret = pdata->phy_if.phy_impl.init(pdata);
+	if (ret)
+		return ret;
 	pdata->phy.advertising = pdata->phy.supported;
 
 	pdata->phy.address = 0;
 
-	pdata->phy.autoneg = AUTONEG_ENABLE;
-	pdata->phy.speed = SPEED_UNKNOWN;
-	pdata->phy.duplex = DUPLEX_UNKNOWN;
+	if (pdata->phy.advertising & ADVERTISED_Autoneg) {
+		pdata->phy.autoneg = AUTONEG_ENABLE;
+		pdata->phy.speed = SPEED_UNKNOWN;
+		pdata->phy.duplex = DUPLEX_UNKNOWN;
+	} else {
+		pdata->phy.autoneg = AUTONEG_DISABLE;
+		pdata->phy.speed = xgbe_phy_best_advertised_speed(pdata);
+		pdata->phy.duplex = DUPLEX_FULL;
+	}
 
 	pdata->phy.link = 0;
 
@@ -1346,11 +1561,14 @@ static void xgbe_phy_init(struct xgbe_prv_data *pdata)
 
 	if (netif_msg_drv(pdata))
 		xgbe_dump_phy_registers(pdata);
+
+	return 0;
 }
 
 void xgbe_init_function_ptrs_phy(struct xgbe_phy_if *phy_if)
 {
 	phy_if->phy_init        = xgbe_phy_init;
+	phy_if->phy_exit        = xgbe_phy_exit;
 
 	phy_if->phy_reset       = xgbe_phy_reset;
 	phy_if->phy_start       = xgbe_phy_start;
@@ -1358,4 +1576,8 @@ void xgbe_init_function_ptrs_phy(struct xgbe_phy_if *phy_if)
 
 	phy_if->phy_status      = xgbe_phy_status;
 	phy_if->phy_config_aneg = xgbe_phy_config_aneg;
+
+	phy_if->phy_valid_speed = xgbe_phy_valid_speed;
+
+	phy_if->an_isr          = xgbe_an_combined_isr;
 }
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
new file mode 100644
index 000000000000..e76b7f65b805
--- /dev/null
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c
@@ -0,0 +1,529 @@
+/*
+ * AMD 10Gb Ethernet driver
+ *
+ * This file is available to you under your choice of the following two
+ * licenses:
+ *
+ * License 1: GPLv2
+ *
+ * Copyright (c) 2016 Advanced Micro Devices, Inc.
+ *
+ * This file is free software; you may copy, redistribute and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * License 2: Modified BSD
+ *
+ * Copyright (c) 2016 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Advanced Micro Devices, Inc. nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+#include <linux/log2.h>
+
+#include "xgbe.h"
+#include "xgbe-common.h"
+
+static int xgbe_config_msi(struct xgbe_prv_data *pdata)
+{
+	unsigned int msi_count;
+	unsigned int i, j;
+	int ret;
+
+	msi_count = XGBE_MSIX_BASE_COUNT;
+	msi_count += max(pdata->rx_ring_count,
+			 pdata->tx_ring_count);
+	msi_count = roundup_pow_of_two(msi_count);
+
+	ret = pci_enable_msi_exact(pdata->pcidev, msi_count);
+	if (ret < 0) {
+		dev_info(pdata->dev, "MSI request for %u interrupts failed\n",
+			 msi_count);
+
+		ret = pci_enable_msi(pdata->pcidev);
+		if (ret < 0) {
+			dev_info(pdata->dev, "MSI enablement failed\n");
+			return ret;
+		}
+
+		msi_count = 1;
+	}
+
+	pdata->irq_count = msi_count;
+
+	pdata->dev_irq = pdata->pcidev->irq;
+
+	if (msi_count > 1) {
+		pdata->ecc_irq = pdata->pcidev->irq + 1;
+		pdata->i2c_irq = pdata->pcidev->irq + 2;
+		pdata->an_irq = pdata->pcidev->irq + 3;
+
+		for (i = XGBE_MSIX_BASE_COUNT, j = 0;
+		     (i < msi_count) && (j < XGBE_MAX_DMA_CHANNELS);
+		     i++, j++)
+			pdata->channel_irq[j] = pdata->pcidev->irq + i;
+		pdata->channel_irq_count = j;
+
+		pdata->per_channel_irq = 1;
+		pdata->channel_irq_mode = XGBE_IRQ_MODE_LEVEL;
+	} else {
+		pdata->ecc_irq = pdata->pcidev->irq;
+		pdata->i2c_irq = pdata->pcidev->irq;
+		pdata->an_irq = pdata->pcidev->irq;
+	}
+
+	if (netif_msg_probe(pdata))
+		dev_dbg(pdata->dev, "MSI interrupts enabled\n");
+
+	return 0;
+}
+
+static int xgbe_config_msix(struct xgbe_prv_data *pdata)
+{
+	unsigned int msix_count;
+	unsigned int i, j;
+	int ret;
+
+	msix_count = XGBE_MSIX_BASE_COUNT;
+	msix_count += max(pdata->rx_ring_count,
+			  pdata->tx_ring_count);
+
+	pdata->msix_entries = devm_kcalloc(pdata->dev, msix_count,
+					   sizeof(struct msix_entry),
+					   GFP_KERNEL);
+	if (!pdata->msix_entries)
+		return -ENOMEM;
+
+	for (i = 0; i < msix_count; i++)
+		pdata->msix_entries[i].entry = i;
+
+	ret = pci_enable_msix_range(pdata->pcidev, pdata->msix_entries,
+				    XGBE_MSIX_MIN_COUNT, msix_count);
+	if (ret < 0) {
+		dev_info(pdata->dev, "MSI-X enablement failed\n");
+		devm_kfree(pdata->dev, pdata->msix_entries);
+		pdata->msix_entries = NULL;
+		return ret;
+	}
+
+	pdata->irq_count = ret;
+
+	pdata->dev_irq = pdata->msix_entries[0].vector;
+	pdata->ecc_irq = pdata->msix_entries[1].vector;
+	pdata->i2c_irq = pdata->msix_entries[2].vector;
+	pdata->an_irq = pdata->msix_entries[3].vector;
+
+	for (i = XGBE_MSIX_BASE_COUNT, j = 0; i < ret; i++, j++)
+		pdata->channel_irq[j] = pdata->msix_entries[i].vector;
+	pdata->channel_irq_count = j;
+
+	pdata->per_channel_irq = 1;
+	pdata->channel_irq_mode = XGBE_IRQ_MODE_LEVEL;
+
+	if (netif_msg_probe(pdata))
+		dev_dbg(pdata->dev, "MSI-X interrupts enabled\n");
+
+	return 0;
+}
+
+static int xgbe_config_irqs(struct xgbe_prv_data *pdata)
+{
+	int ret;
+
+	ret = xgbe_config_msix(pdata);
+	if (!ret)
+		goto out;
+
+	ret = xgbe_config_msi(pdata);
+	if (!ret)
+		goto out;
+
+	pdata->irq_count = 1;
+	pdata->irq_shared = 1;
+
+	pdata->dev_irq = pdata->pcidev->irq;
+	pdata->ecc_irq = pdata->pcidev->irq;
+	pdata->i2c_irq = pdata->pcidev->irq;
+	pdata->an_irq = pdata->pcidev->irq;
+
+out:
+	if (netif_msg_probe(pdata)) {
+		unsigned int i;
+
+		dev_dbg(pdata->dev, " dev irq=%d\n", pdata->dev_irq);
+		dev_dbg(pdata->dev, " ecc irq=%d\n", pdata->ecc_irq);
+		dev_dbg(pdata->dev, " i2c irq=%d\n", pdata->i2c_irq);
+		dev_dbg(pdata->dev, "  an irq=%d\n", pdata->an_irq);
+		for (i = 0; i < pdata->channel_irq_count; i++)
+			dev_dbg(pdata->dev, " dma%u irq=%d\n",
+				i, pdata->channel_irq[i]);
+	}
+
+	return 0;
+}
+
+static int xgbe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct xgbe_prv_data *pdata;
+	struct device *dev = &pdev->dev;
+	void __iomem * const *iomap_table;
+	unsigned int ma_lo, ma_hi;
+	unsigned int reg;
+	int bar_mask;
+	int ret;
+
+	pdata = xgbe_alloc_pdata(dev);
+	if (IS_ERR(pdata)) {
+		ret = PTR_ERR(pdata);
+		goto err_alloc;
+	}
+
+	pdata->pcidev = pdev;
+	pci_set_drvdata(pdev, pdata);
+
+	/* Get the version data */
+	pdata->vdata = (struct xgbe_version_data *)id->driver_data;
+
+	ret = pcim_enable_device(pdev);
+	if (ret) {
+		dev_err(dev, "pcim_enable_device failed\n");
+		goto err_pci_enable;
+	}
+
+	/* Obtain the mmio areas for the device */
+	bar_mask = pci_select_bars(pdev, IORESOURCE_MEM);
+	ret = pcim_iomap_regions(pdev, bar_mask, XGBE_DRV_NAME);
+	if (ret) {
+		dev_err(dev, "pcim_iomap_regions failed\n");
+		goto err_pci_enable;
+	}
+
+	iomap_table = pcim_iomap_table(pdev);
+	if (!iomap_table) {
+		dev_err(dev, "pcim_iomap_table failed\n");
+		ret = -ENOMEM;
+		goto err_pci_enable;
+	}
+
+	pdata->xgmac_regs = iomap_table[XGBE_XGMAC_BAR];
+	if (!pdata->xgmac_regs) {
+		dev_err(dev, "xgmac ioremap failed\n");
+		ret = -ENOMEM;
+		goto err_pci_enable;
+	}
+	pdata->xprop_regs = pdata->xgmac_regs + XGBE_MAC_PROP_OFFSET;
+	pdata->xi2c_regs = pdata->xgmac_regs + XGBE_I2C_CTRL_OFFSET;
+	if (netif_msg_probe(pdata)) {
+		dev_dbg(dev, "xgmac_regs = %p\n", pdata->xgmac_regs);
+		dev_dbg(dev, "xprop_regs = %p\n", pdata->xprop_regs);
+		dev_dbg(dev, "xi2c_regs  = %p\n", pdata->xi2c_regs);
+	}
+
+	pdata->xpcs_regs = iomap_table[XGBE_XPCS_BAR];
+	if (!pdata->xpcs_regs) {
+		dev_err(dev, "xpcs ioremap failed\n");
+		ret = -ENOMEM;
+		goto err_pci_enable;
+	}
+	if (netif_msg_probe(pdata))
+		dev_dbg(dev, "xpcs_regs  = %p\n", pdata->xpcs_regs);
+
+	/* Configure the PCS indirect addressing support */
+	reg = XPCS32_IOREAD(pdata, PCS_V2_WINDOW_DEF);
+	pdata->xpcs_window = XPCS_GET_BITS(reg, PCS_V2_WINDOW_DEF, OFFSET);
+	pdata->xpcs_window <<= 6;
+	pdata->xpcs_window_size = XPCS_GET_BITS(reg, PCS_V2_WINDOW_DEF, SIZE);
+	pdata->xpcs_window_size = 1 << (pdata->xpcs_window_size + 7);
+	pdata->xpcs_window_mask = pdata->xpcs_window_size - 1;
+	if (netif_msg_probe(pdata)) {
+		dev_dbg(dev, "xpcs window      = %#010x\n",
+			pdata->xpcs_window);
+		dev_dbg(dev, "xpcs window size = %#010x\n",
+			pdata->xpcs_window_size);
+		dev_dbg(dev, "xpcs window mask = %#010x\n",
+			pdata->xpcs_window_mask);
+	}
+
+	pci_set_master(pdev);
+
+	/* Enable all interrupts in the hardware */
+	XP_IOWRITE(pdata, XP_INT_EN, 0x1fffff);
+
+	/* Retrieve the MAC address */
+	ma_lo = XP_IOREAD(pdata, XP_MAC_ADDR_LO);
+	ma_hi = XP_IOREAD(pdata, XP_MAC_ADDR_HI);
+	pdata->mac_addr[0] = ma_lo & 0xff;
+	pdata->mac_addr[1] = (ma_lo >> 8) & 0xff;
+	pdata->mac_addr[2] = (ma_lo >> 16) & 0xff;
+	pdata->mac_addr[3] = (ma_lo >> 24) & 0xff;
+	pdata->mac_addr[4] = ma_hi & 0xff;
+	pdata->mac_addr[5] = (ma_hi >> 8) & 0xff;
+	if (!XP_GET_BITS(ma_hi, XP_MAC_ADDR_HI, VALID) ||
+	    !is_valid_ether_addr(pdata->mac_addr)) {
+		dev_err(dev, "invalid mac address\n");
+		ret = -EINVAL;
+		goto err_pci_enable;
+	}
+
+	/* Clock settings */
+	pdata->sysclk_rate = XGBE_V2_DMA_CLOCK_FREQ;
+	pdata->ptpclk_rate = XGBE_V2_PTP_CLOCK_FREQ;
+
+	/* Set the DMA coherency values */
+	pdata->coherent = 1;
+	pdata->axdomain = XGBE_DMA_OS_AXDOMAIN;
+	pdata->arcache = XGBE_DMA_OS_ARCACHE;
+	pdata->awcache = XGBE_DMA_OS_AWCACHE;
+
+	/* Set the maximum channels and queues */
+	reg = XP_IOREAD(pdata, XP_PROP_1);
+	pdata->tx_max_channel_count = XP_GET_BITS(reg, XP_PROP_1, MAX_TX_DMA);
+	pdata->rx_max_channel_count = XP_GET_BITS(reg, XP_PROP_1, MAX_RX_DMA);
+	pdata->tx_max_q_count = XP_GET_BITS(reg, XP_PROP_1, MAX_TX_QUEUES);
+	pdata->rx_max_q_count = XP_GET_BITS(reg, XP_PROP_1, MAX_RX_QUEUES);
+	if (netif_msg_probe(pdata)) {
+		dev_dbg(dev, "max tx/rx channel count = %u/%u\n",
+			pdata->tx_max_channel_count,
+			pdata->tx_max_channel_count);
+		dev_dbg(dev, "max tx/rx hw queue count = %u/%u\n",
+			pdata->tx_max_q_count, pdata->rx_max_q_count);
+	}
+
+	/* Set the hardware channel and queue counts */
+	xgbe_set_counts(pdata);
+
+	/* Set the maximum fifo amounts */
+	reg = XP_IOREAD(pdata, XP_PROP_2);
+	pdata->tx_max_fifo_size = XP_GET_BITS(reg, XP_PROP_2, TX_FIFO_SIZE);
+	pdata->tx_max_fifo_size *= 16384;
+	pdata->tx_max_fifo_size = min(pdata->tx_max_fifo_size,
+				      pdata->vdata->tx_max_fifo_size);
+	pdata->rx_max_fifo_size = XP_GET_BITS(reg, XP_PROP_2, RX_FIFO_SIZE);
+	pdata->rx_max_fifo_size *= 16384;
+	pdata->rx_max_fifo_size = min(pdata->rx_max_fifo_size,
+				      pdata->vdata->rx_max_fifo_size);
+	if (netif_msg_probe(pdata))
+		dev_dbg(dev, "max tx/rx max fifo size = %u/%u\n",
+			pdata->tx_max_fifo_size, pdata->rx_max_fifo_size);
+
+	/* Configure interrupt support */
+	ret = xgbe_config_irqs(pdata);
+	if (ret)
+		goto err_pci_enable;
+
+	/* Configure the netdev resource */
+	ret = xgbe_config_netdev(pdata);
+	if (ret)
+		goto err_pci_enable;
+
+	netdev_notice(pdata->netdev, "net device enabled\n");
+
+	return 0;
+
+err_pci_enable:
+	xgbe_free_pdata(pdata);
+
+err_alloc:
+	dev_notice(dev, "net device not enabled\n");
+
+	return ret;
+}
+
+static void xgbe_pci_remove(struct pci_dev *pdev)
+{
+	struct xgbe_prv_data *pdata = pci_get_drvdata(pdev);
+
+	xgbe_deconfig_netdev(pdata);
+
+	xgbe_free_pdata(pdata);
+}
+
+#ifdef CONFIG_PM
+static int xgbe_pci_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	struct xgbe_prv_data *pdata = pci_get_drvdata(pdev);
+	struct net_device *netdev = pdata->netdev;
+	int ret = 0;
+
+	if (netif_running(netdev))
+		ret = xgbe_powerdown(netdev, XGMAC_DRIVER_CONTEXT);
+
+	pdata->lpm_ctrl = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
+	pdata->lpm_ctrl |= MDIO_CTRL1_LPOWER;
+	XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl);
+
+	return ret;
+}
+
+static int xgbe_pci_resume(struct pci_dev *pdev)
+{
+	struct xgbe_prv_data *pdata = pci_get_drvdata(pdev);
+	struct net_device *netdev = pdata->netdev;
+	int ret = 0;
+
+	pdata->lpm_ctrl &= ~MDIO_CTRL1_LPOWER;
+	XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl);
+
+	if (netif_running(netdev)) {
+		ret = xgbe_powerup(netdev, XGMAC_DRIVER_CONTEXT);
+
+		/* Schedule a restart in case the link or phy state changed
+		 * while we were powered down.
+		 */
+		schedule_work(&pdata->restart_work);
+	}
+
+	return ret;
+}
+#endif /* CONFIG_PM */
+
+static const struct xgbe_version_data xgbe_v2a = {
+	.init_function_ptrs_phy_impl	= xgbe_init_function_ptrs_phy_v2,
+	.xpcs_access			= XGBE_XPCS_ACCESS_V2,
+	.mmc_64bit			= 1,
+	.tx_max_fifo_size		= 229376,
+	.rx_max_fifo_size		= 229376,
+	.tx_tstamp_workaround		= 1,
+	.ecc_support			= 1,
+	.i2c_support			= 1,
+};
+
+static const struct xgbe_version_data xgbe_v2b = {
+	.init_function_ptrs_phy_impl	= xgbe_init_function_ptrs_phy_v2,
+	.xpcs_access			= XGBE_XPCS_ACCESS_V2,
+	.mmc_64bit			= 1,
+	.tx_max_fifo_size		= 65536,
+	.rx_max_fifo_size		= 65536,
+	.tx_tstamp_workaround		= 1,
+	.ecc_support			= 1,
+	.i2c_support			= 1,
+};
+
+static const struct pci_device_id xgbe_pci_table[] = {
+	{ PCI_VDEVICE(AMD, 0x1458),
+	  .driver_data = (kernel_ulong_t)&xgbe_v2a },
+	{ PCI_VDEVICE(AMD, 0x1459),
+	  .driver_data = (kernel_ulong_t)&xgbe_v2b },
+	/* Last entry must be zero */
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, xgbe_pci_table);
+
+static struct pci_driver xgbe_driver = {
+	.name = XGBE_DRV_NAME,
+	.id_table = xgbe_pci_table,
+	.probe = xgbe_pci_probe,
+	.remove = xgbe_pci_remove,
+#ifdef CONFIG_PM
+	.suspend = xgbe_pci_suspend,
+	.resume = xgbe_pci_resume,
+#endif
+};
+
+int xgbe_pci_init(void)
+{
+	return pci_register_driver(&xgbe_driver);
+}
+
+void xgbe_pci_exit(void)
+{
+	pci_unregister_driver(&xgbe_driver);
+}
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v1.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v1.c
new file mode 100644
index 000000000000..c75edcac5e0a
--- /dev/null
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v1.c
@@ -0,0 +1,845 @@
+/*
+ * AMD 10Gb Ethernet driver
+ *
+ * This file is available to you under your choice of the following two
+ * licenses:
+ *
+ * License 1: GPLv2
+ *
+ * Copyright (c) 2016 Advanced Micro Devices, Inc.
+ *
+ * This file is free software; you may copy, redistribute and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * License 2: Modified BSD
+ *
+ * Copyright (c) 2016 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Advanced Micro Devices, Inc. nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/kmod.h>
+#include <linux/device.h>
+#include <linux/property.h>
+#include <linux/mdio.h>
+#include <linux/phy.h>
+
+#include "xgbe.h"
+#include "xgbe-common.h"
+
+#define XGBE_BLWC_PROPERTY		"amd,serdes-blwc"
+#define XGBE_CDR_RATE_PROPERTY		"amd,serdes-cdr-rate"
+#define XGBE_PQ_SKEW_PROPERTY		"amd,serdes-pq-skew"
+#define XGBE_TX_AMP_PROPERTY		"amd,serdes-tx-amp"
+#define XGBE_DFE_CFG_PROPERTY		"amd,serdes-dfe-tap-config"
+#define XGBE_DFE_ENA_PROPERTY		"amd,serdes-dfe-tap-enable"
+
+/* Default SerDes settings */
+#define XGBE_SPEED_1000_BLWC		1
+#define XGBE_SPEED_1000_CDR		0x2
+#define XGBE_SPEED_1000_PLL		0x0
+#define XGBE_SPEED_1000_PQ		0xa
+#define XGBE_SPEED_1000_RATE		0x3
+#define XGBE_SPEED_1000_TXAMP		0xf
+#define XGBE_SPEED_1000_WORD		0x1
+#define XGBE_SPEED_1000_DFE_TAP_CONFIG	0x3
+#define XGBE_SPEED_1000_DFE_TAP_ENABLE	0x0
+
+#define XGBE_SPEED_2500_BLWC		1
+#define XGBE_SPEED_2500_CDR		0x2
+#define XGBE_SPEED_2500_PLL		0x0
+#define XGBE_SPEED_2500_PQ		0xa
+#define XGBE_SPEED_2500_RATE		0x1
+#define XGBE_SPEED_2500_TXAMP		0xf
+#define XGBE_SPEED_2500_WORD		0x1
+#define XGBE_SPEED_2500_DFE_TAP_CONFIG	0x3
+#define XGBE_SPEED_2500_DFE_TAP_ENABLE	0x0
+
+#define XGBE_SPEED_10000_BLWC		0
+#define XGBE_SPEED_10000_CDR		0x7
+#define XGBE_SPEED_10000_PLL		0x1
+#define XGBE_SPEED_10000_PQ		0x12
+#define XGBE_SPEED_10000_RATE		0x0
+#define XGBE_SPEED_10000_TXAMP		0xa
+#define XGBE_SPEED_10000_WORD		0x7
+#define XGBE_SPEED_10000_DFE_TAP_CONFIG	0x1
+#define XGBE_SPEED_10000_DFE_TAP_ENABLE	0x7f
+
+/* Rate-change complete wait/retry count */
+#define XGBE_RATECHANGE_COUNT		500
+
+static const u32 xgbe_phy_blwc[] = {
+	XGBE_SPEED_1000_BLWC,
+	XGBE_SPEED_2500_BLWC,
+	XGBE_SPEED_10000_BLWC,
+};
+
+static const u32 xgbe_phy_cdr_rate[] = {
+	XGBE_SPEED_1000_CDR,
+	XGBE_SPEED_2500_CDR,
+	XGBE_SPEED_10000_CDR,
+};
+
+static const u32 xgbe_phy_pq_skew[] = {
+	XGBE_SPEED_1000_PQ,
+	XGBE_SPEED_2500_PQ,
+	XGBE_SPEED_10000_PQ,
+};
+
+static const u32 xgbe_phy_tx_amp[] = {
+	XGBE_SPEED_1000_TXAMP,
+	XGBE_SPEED_2500_TXAMP,
+	XGBE_SPEED_10000_TXAMP,
+};
+
+static const u32 xgbe_phy_dfe_tap_cfg[] = {
+	XGBE_SPEED_1000_DFE_TAP_CONFIG,
+	XGBE_SPEED_2500_DFE_TAP_CONFIG,
+	XGBE_SPEED_10000_DFE_TAP_CONFIG,
+};
+
+static const u32 xgbe_phy_dfe_tap_ena[] = {
+	XGBE_SPEED_1000_DFE_TAP_ENABLE,
+	XGBE_SPEED_2500_DFE_TAP_ENABLE,
+	XGBE_SPEED_10000_DFE_TAP_ENABLE,
+};
+
+struct xgbe_phy_data {
+	/* 1000/10000 vs 2500/10000 indicator */
+	unsigned int speed_set;
+
+	/* SerDes UEFI configurable settings.
+	 *   Switching between modes/speeds requires new values for some
+	 *   SerDes settings.  The values can be supplied as device
+	 *   properties in array format.  The first array entry is for
+	 *   1GbE, second for 2.5GbE and third for 10GbE
+	 */
+	u32 blwc[XGBE_SPEEDS];
+	u32 cdr_rate[XGBE_SPEEDS];
+	u32 pq_skew[XGBE_SPEEDS];
+	u32 tx_amp[XGBE_SPEEDS];
+	u32 dfe_tap_cfg[XGBE_SPEEDS];
+	u32 dfe_tap_ena[XGBE_SPEEDS];
+};
+
+static void xgbe_phy_kr_training_pre(struct xgbe_prv_data *pdata)
+{
+		XSIR0_IOWRITE_BITS(pdata, SIR0_KR_RT_1, RESET, 1);
+}
+
+static void xgbe_phy_kr_training_post(struct xgbe_prv_data *pdata)
+{
+		XSIR0_IOWRITE_BITS(pdata, SIR0_KR_RT_1, RESET, 0);
+}
+
+static enum xgbe_mode xgbe_phy_an_outcome(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	enum xgbe_mode mode;
+	unsigned int ad_reg, lp_reg;
+
+	pdata->phy.lp_advertising |= ADVERTISED_Autoneg;
+	pdata->phy.lp_advertising |= ADVERTISED_Backplane;
+
+	/* Compare Advertisement and Link Partner register 1 */
+	ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE);
+	lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA);
+	if (lp_reg & 0x400)
+		pdata->phy.lp_advertising |= ADVERTISED_Pause;
+	if (lp_reg & 0x800)
+		pdata->phy.lp_advertising |= ADVERTISED_Asym_Pause;
+
+	if (pdata->phy.pause_autoneg) {
+		/* Set flow control based on auto-negotiation result */
+		pdata->phy.tx_pause = 0;
+		pdata->phy.rx_pause = 0;
+
+		if (ad_reg & lp_reg & 0x400) {
+			pdata->phy.tx_pause = 1;
+			pdata->phy.rx_pause = 1;
+		} else if (ad_reg & lp_reg & 0x800) {
+			if (ad_reg & 0x400)
+				pdata->phy.rx_pause = 1;
+			else if (lp_reg & 0x400)
+				pdata->phy.tx_pause = 1;
+		}
+	}
+
+	/* Compare Advertisement and Link Partner register 2 */
+	ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1);
+	lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 1);
+	if (lp_reg & 0x80)
+		pdata->phy.lp_advertising |= ADVERTISED_10000baseKR_Full;
+	if (lp_reg & 0x20) {
+		if (phy_data->speed_set == XGBE_SPEEDSET_2500_10000)
+			pdata->phy.lp_advertising |= ADVERTISED_2500baseX_Full;
+		else
+			pdata->phy.lp_advertising |= ADVERTISED_1000baseKX_Full;
+	}
+
+	ad_reg &= lp_reg;
+	if (ad_reg & 0x80) {
+		mode = XGBE_MODE_KR;
+	} else if (ad_reg & 0x20) {
+		if (phy_data->speed_set == XGBE_SPEEDSET_2500_10000)
+			mode = XGBE_MODE_KX_2500;
+		else
+			mode = XGBE_MODE_KX_1000;
+	} else {
+		mode = XGBE_MODE_UNKNOWN;
+	}
+
+	/* Compare Advertisement and Link Partner register 3 */
+	ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2);
+	lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 2);
+	if (lp_reg & 0xc000)
+		pdata->phy.lp_advertising |= ADVERTISED_10000baseR_FEC;
+
+	return mode;
+}
+
+static unsigned int xgbe_phy_an_advertising(struct xgbe_prv_data *pdata)
+{
+	return pdata->phy.advertising;
+}
+
+static int xgbe_phy_an_config(struct xgbe_prv_data *pdata)
+{
+	/* Nothing uniquely required for an configuration */
+	return 0;
+}
+
+static enum xgbe_an_mode xgbe_phy_an_mode(struct xgbe_prv_data *pdata)
+{
+	return XGBE_AN_MODE_CL73;
+}
+
+static void xgbe_phy_pcs_power_cycle(struct xgbe_prv_data *pdata)
+{
+	unsigned int reg;
+
+	reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
+
+	reg |= MDIO_CTRL1_LPOWER;
+	XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg);
+
+	usleep_range(75, 100);
+
+	reg &= ~MDIO_CTRL1_LPOWER;
+	XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg);
+}
+
+static void xgbe_phy_start_ratechange(struct xgbe_prv_data *pdata)
+{
+	/* Assert Rx and Tx ratechange */
+	XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, RATECHANGE, 1);
+}
+
+static void xgbe_phy_complete_ratechange(struct xgbe_prv_data *pdata)
+{
+	unsigned int wait;
+	u16 status;
+
+	/* Release Rx and Tx ratechange */
+	XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, RATECHANGE, 0);
+
+	/* Wait for Rx and Tx ready */
+	wait = XGBE_RATECHANGE_COUNT;
+	while (wait--) {
+		usleep_range(50, 75);
+
+		status = XSIR0_IOREAD(pdata, SIR0_STATUS);
+		if (XSIR_GET_BITS(status, SIR0_STATUS, RX_READY) &&
+		    XSIR_GET_BITS(status, SIR0_STATUS, TX_READY))
+			goto rx_reset;
+	}
+
+	netif_dbg(pdata, link, pdata->netdev, "SerDes rx/tx not ready (%#hx)\n",
+		  status);
+
+rx_reset:
+	/* Perform Rx reset for the DFE changes */
+	XRXTX_IOWRITE_BITS(pdata, RXTX_REG6, RESETB_RXD, 0);
+	XRXTX_IOWRITE_BITS(pdata, RXTX_REG6, RESETB_RXD, 1);
+}
+
+static void xgbe_phy_kr_mode(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	unsigned int reg;
+
+	/* Set PCS to KR/10G speed */
+	reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2);
+	reg &= ~MDIO_PCS_CTRL2_TYPE;
+	reg |= MDIO_PCS_CTRL2_10GBR;
+	XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL2, reg);
+
+	reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
+	reg &= ~MDIO_CTRL1_SPEEDSEL;
+	reg |= MDIO_CTRL1_SPEED10G;
+	XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg);
+
+	xgbe_phy_pcs_power_cycle(pdata);
+
+	/* Set SerDes to 10G speed */
+	xgbe_phy_start_ratechange(pdata);
+
+	XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, DATARATE, XGBE_SPEED_10000_RATE);
+	XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, WORDMODE, XGBE_SPEED_10000_WORD);
+	XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, PLLSEL, XGBE_SPEED_10000_PLL);
+
+	XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, CDR_RATE,
+			   phy_data->cdr_rate[XGBE_SPEED_10000]);
+	XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, TXAMP,
+			   phy_data->tx_amp[XGBE_SPEED_10000]);
+	XRXTX_IOWRITE_BITS(pdata, RXTX_REG20, BLWC_ENA,
+			   phy_data->blwc[XGBE_SPEED_10000]);
+	XRXTX_IOWRITE_BITS(pdata, RXTX_REG114, PQ_REG,
+			   phy_data->pq_skew[XGBE_SPEED_10000]);
+	XRXTX_IOWRITE_BITS(pdata, RXTX_REG129, RXDFE_CONFIG,
+			   phy_data->dfe_tap_cfg[XGBE_SPEED_10000]);
+	XRXTX_IOWRITE(pdata, RXTX_REG22,
+		      phy_data->dfe_tap_ena[XGBE_SPEED_10000]);
+
+	xgbe_phy_complete_ratechange(pdata);
+
+	netif_dbg(pdata, link, pdata->netdev, "10GbE KR mode set\n");
+}
+
+static void xgbe_phy_kx_2500_mode(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	unsigned int reg;
+
+	/* Set PCS to KX/1G speed */
+	reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2);
+	reg &= ~MDIO_PCS_CTRL2_TYPE;
+	reg |= MDIO_PCS_CTRL2_10GBX;
+	XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL2, reg);
+
+	reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
+	reg &= ~MDIO_CTRL1_SPEEDSEL;
+	reg |= MDIO_CTRL1_SPEED1G;
+	XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg);
+
+	xgbe_phy_pcs_power_cycle(pdata);
+
+	/* Set SerDes to 2.5G speed */
+	xgbe_phy_start_ratechange(pdata);
+
+	XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, DATARATE, XGBE_SPEED_2500_RATE);
+	XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, WORDMODE, XGBE_SPEED_2500_WORD);
+	XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, PLLSEL, XGBE_SPEED_2500_PLL);
+
+	XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, CDR_RATE,
+			   phy_data->cdr_rate[XGBE_SPEED_2500]);
+	XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, TXAMP,
+			   phy_data->tx_amp[XGBE_SPEED_2500]);
+	XRXTX_IOWRITE_BITS(pdata, RXTX_REG20, BLWC_ENA,
+			   phy_data->blwc[XGBE_SPEED_2500]);
+	XRXTX_IOWRITE_BITS(pdata, RXTX_REG114, PQ_REG,
+			   phy_data->pq_skew[XGBE_SPEED_2500]);
+	XRXTX_IOWRITE_BITS(pdata, RXTX_REG129, RXDFE_CONFIG,
+			   phy_data->dfe_tap_cfg[XGBE_SPEED_2500]);
+	XRXTX_IOWRITE(pdata, RXTX_REG22,
+		      phy_data->dfe_tap_ena[XGBE_SPEED_2500]);
+
+	xgbe_phy_complete_ratechange(pdata);
+
+	netif_dbg(pdata, link, pdata->netdev, "2.5GbE KX mode set\n");
+}
+
+static void xgbe_phy_kx_1000_mode(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	unsigned int reg;
+
+	/* Set PCS to KX/1G speed */
+	reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2);
+	reg &= ~MDIO_PCS_CTRL2_TYPE;
+	reg |= MDIO_PCS_CTRL2_10GBX;
+	XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL2, reg);
+
+	reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
+	reg &= ~MDIO_CTRL1_SPEEDSEL;
+	reg |= MDIO_CTRL1_SPEED1G;
+	XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg);
+
+	xgbe_phy_pcs_power_cycle(pdata);
+
+	/* Set SerDes to 1G speed */
+	xgbe_phy_start_ratechange(pdata);
+
+	XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, DATARATE, XGBE_SPEED_1000_RATE);
+	XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, WORDMODE, XGBE_SPEED_1000_WORD);
+	XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, PLLSEL, XGBE_SPEED_1000_PLL);
+
+	XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, CDR_RATE,
+			   phy_data->cdr_rate[XGBE_SPEED_1000]);
+	XSIR1_IOWRITE_BITS(pdata, SIR1_SPEED, TXAMP,
+			   phy_data->tx_amp[XGBE_SPEED_1000]);
+	XRXTX_IOWRITE_BITS(pdata, RXTX_REG20, BLWC_ENA,
+			   phy_data->blwc[XGBE_SPEED_1000]);
+	XRXTX_IOWRITE_BITS(pdata, RXTX_REG114, PQ_REG,
+			   phy_data->pq_skew[XGBE_SPEED_1000]);
+	XRXTX_IOWRITE_BITS(pdata, RXTX_REG129, RXDFE_CONFIG,
+			   phy_data->dfe_tap_cfg[XGBE_SPEED_1000]);
+	XRXTX_IOWRITE(pdata, RXTX_REG22,
+		      phy_data->dfe_tap_ena[XGBE_SPEED_1000]);
+
+	xgbe_phy_complete_ratechange(pdata);
+
+	netif_dbg(pdata, link, pdata->netdev, "1GbE KX mode set\n");
+}
+
+static enum xgbe_mode xgbe_phy_cur_mode(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	enum xgbe_mode mode;
+	unsigned int reg;
+
+	reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL2);
+	reg &= MDIO_PCS_CTRL2_TYPE;
+
+	if (reg == MDIO_PCS_CTRL2_10GBR) {
+		mode = XGBE_MODE_KR;
+	} else {
+		if (phy_data->speed_set == XGBE_SPEEDSET_2500_10000)
+			mode = XGBE_MODE_KX_2500;
+		else
+			mode = XGBE_MODE_KX_1000;
+	}
+
+	return mode;
+}
+
+static enum xgbe_mode xgbe_phy_switch_mode(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	enum xgbe_mode mode;
+
+	/* If we are in KR switch to KX, and vice-versa */
+	if (xgbe_phy_cur_mode(pdata) == XGBE_MODE_KR) {
+		if (phy_data->speed_set == XGBE_SPEEDSET_2500_10000)
+			mode = XGBE_MODE_KX_2500;
+		else
+			mode = XGBE_MODE_KX_1000;
+	} else {
+		mode = XGBE_MODE_KR;
+	}
+
+	return mode;
+}
+
+static enum xgbe_mode xgbe_phy_get_mode(struct xgbe_prv_data *pdata,
+					int speed)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+	switch (speed) {
+	case SPEED_1000:
+		return (phy_data->speed_set == XGBE_SPEEDSET_1000_10000)
+			? XGBE_MODE_KX_1000 : XGBE_MODE_UNKNOWN;
+	case SPEED_2500:
+		return (phy_data->speed_set == XGBE_SPEEDSET_2500_10000)
+			? XGBE_MODE_KX_2500 : XGBE_MODE_UNKNOWN;
+	case SPEED_10000:
+		return XGBE_MODE_KR;
+	default:
+		return XGBE_MODE_UNKNOWN;
+	}
+}
+
+static void xgbe_phy_set_mode(struct xgbe_prv_data *pdata, enum xgbe_mode mode)
+{
+	switch (mode) {
+	case XGBE_MODE_KX_1000:
+		xgbe_phy_kx_1000_mode(pdata);
+		break;
+	case XGBE_MODE_KX_2500:
+		xgbe_phy_kx_2500_mode(pdata);
+		break;
+	case XGBE_MODE_KR:
+		xgbe_phy_kr_mode(pdata);
+		break;
+	default:
+		break;
+	}
+}
+
+static bool xgbe_phy_check_mode(struct xgbe_prv_data *pdata,
+				enum xgbe_mode mode, u32 advert)
+{
+	if (pdata->phy.autoneg == AUTONEG_ENABLE) {
+		if (pdata->phy.advertising & advert)
+			return true;
+	} else {
+		enum xgbe_mode cur_mode;
+
+		cur_mode = xgbe_phy_get_mode(pdata, pdata->phy.speed);
+		if (cur_mode == mode)
+			return true;
+	}
+
+	return false;
+}
+
+static bool xgbe_phy_use_mode(struct xgbe_prv_data *pdata, enum xgbe_mode mode)
+{
+	switch (mode) {
+	case XGBE_MODE_KX_1000:
+		return xgbe_phy_check_mode(pdata, mode,
+					   ADVERTISED_1000baseKX_Full);
+	case XGBE_MODE_KX_2500:
+		return xgbe_phy_check_mode(pdata, mode,
+					   ADVERTISED_2500baseX_Full);
+	case XGBE_MODE_KR:
+		return xgbe_phy_check_mode(pdata, mode,
+					   ADVERTISED_10000baseKR_Full);
+	default:
+		return false;
+	}
+}
+
+static bool xgbe_phy_valid_speed(struct xgbe_prv_data *pdata, int speed)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+	switch (speed) {
+	case SPEED_1000:
+		if (phy_data->speed_set != XGBE_SPEEDSET_1000_10000)
+			return false;
+		return true;
+	case SPEED_2500:
+		if (phy_data->speed_set != XGBE_SPEEDSET_2500_10000)
+			return false;
+		return true;
+	case SPEED_10000:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart)
+{
+	unsigned int reg;
+
+	*an_restart = 0;
+
+	/* Link status is latched low, so read once to clear
+	 * and then read again to get current state
+	 */
+	reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1);
+	reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1);
+
+	return (reg & MDIO_STAT1_LSTATUS) ? 1 : 0;
+}
+
+static void xgbe_phy_stop(struct xgbe_prv_data *pdata)
+{
+	/* Nothing uniquely required for stop */
+}
+
+static int xgbe_phy_start(struct xgbe_prv_data *pdata)
+{
+	/* Nothing uniquely required for start */
+	return 0;
+}
+
+static int xgbe_phy_reset(struct xgbe_prv_data *pdata)
+{
+	unsigned int reg, count;
+
+	/* Perform a software reset of the PCS */
+	reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
+	reg |= MDIO_CTRL1_RESET;
+	XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, reg);
+
+	count = 50;
+	do {
+		msleep(20);
+		reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
+	} while ((reg & MDIO_CTRL1_RESET) && --count);
+
+	if (reg & MDIO_CTRL1_RESET)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static void xgbe_phy_exit(struct xgbe_prv_data *pdata)
+{
+	/* Nothing uniquely required for exit */
+}
+
+static int xgbe_phy_init(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data;
+	int ret;
+
+	phy_data = devm_kzalloc(pdata->dev, sizeof(*phy_data), GFP_KERNEL);
+	if (!phy_data)
+		return -ENOMEM;
+
+	/* Retrieve the PHY speedset */
+	ret = device_property_read_u32(pdata->phy_dev, XGBE_SPEEDSET_PROPERTY,
+				       &phy_data->speed_set);
+	if (ret) {
+		dev_err(pdata->dev, "invalid %s property\n",
+			XGBE_SPEEDSET_PROPERTY);
+		return ret;
+	}
+
+	switch (phy_data->speed_set) {
+	case XGBE_SPEEDSET_1000_10000:
+	case XGBE_SPEEDSET_2500_10000:
+		break;
+	default:
+		dev_err(pdata->dev, "invalid %s property\n",
+			XGBE_SPEEDSET_PROPERTY);
+		return -EINVAL;
+	}
+
+	/* Retrieve the PHY configuration properties */
+	if (device_property_present(pdata->phy_dev, XGBE_BLWC_PROPERTY)) {
+		ret = device_property_read_u32_array(pdata->phy_dev,
+						     XGBE_BLWC_PROPERTY,
+						     phy_data->blwc,
+						     XGBE_SPEEDS);
+		if (ret) {
+			dev_err(pdata->dev, "invalid %s property\n",
+				XGBE_BLWC_PROPERTY);
+			return ret;
+		}
+	} else {
+		memcpy(phy_data->blwc, xgbe_phy_blwc,
+		       sizeof(phy_data->blwc));
+	}
+
+	if (device_property_present(pdata->phy_dev, XGBE_CDR_RATE_PROPERTY)) {
+		ret = device_property_read_u32_array(pdata->phy_dev,
+						     XGBE_CDR_RATE_PROPERTY,
+						     phy_data->cdr_rate,
+						     XGBE_SPEEDS);
+		if (ret) {
+			dev_err(pdata->dev, "invalid %s property\n",
+				XGBE_CDR_RATE_PROPERTY);
+			return ret;
+		}
+	} else {
+		memcpy(phy_data->cdr_rate, xgbe_phy_cdr_rate,
+		       sizeof(phy_data->cdr_rate));
+	}
+
+	if (device_property_present(pdata->phy_dev, XGBE_PQ_SKEW_PROPERTY)) {
+		ret = device_property_read_u32_array(pdata->phy_dev,
+						     XGBE_PQ_SKEW_PROPERTY,
+						     phy_data->pq_skew,
+						     XGBE_SPEEDS);
+		if (ret) {
+			dev_err(pdata->dev, "invalid %s property\n",
+				XGBE_PQ_SKEW_PROPERTY);
+			return ret;
+		}
+	} else {
+		memcpy(phy_data->pq_skew, xgbe_phy_pq_skew,
+		       sizeof(phy_data->pq_skew));
+	}
+
+	if (device_property_present(pdata->phy_dev, XGBE_TX_AMP_PROPERTY)) {
+		ret = device_property_read_u32_array(pdata->phy_dev,
+						     XGBE_TX_AMP_PROPERTY,
+						     phy_data->tx_amp,
+						     XGBE_SPEEDS);
+		if (ret) {
+			dev_err(pdata->dev, "invalid %s property\n",
+				XGBE_TX_AMP_PROPERTY);
+			return ret;
+		}
+	} else {
+		memcpy(phy_data->tx_amp, xgbe_phy_tx_amp,
+		       sizeof(phy_data->tx_amp));
+	}
+
+	if (device_property_present(pdata->phy_dev, XGBE_DFE_CFG_PROPERTY)) {
+		ret = device_property_read_u32_array(pdata->phy_dev,
+						     XGBE_DFE_CFG_PROPERTY,
+						     phy_data->dfe_tap_cfg,
+						     XGBE_SPEEDS);
+		if (ret) {
+			dev_err(pdata->dev, "invalid %s property\n",
+				XGBE_DFE_CFG_PROPERTY);
+			return ret;
+		}
+	} else {
+		memcpy(phy_data->dfe_tap_cfg, xgbe_phy_dfe_tap_cfg,
+		       sizeof(phy_data->dfe_tap_cfg));
+	}
+
+	if (device_property_present(pdata->phy_dev, XGBE_DFE_ENA_PROPERTY)) {
+		ret = device_property_read_u32_array(pdata->phy_dev,
+						     XGBE_DFE_ENA_PROPERTY,
+						     phy_data->dfe_tap_ena,
+						     XGBE_SPEEDS);
+		if (ret) {
+			dev_err(pdata->dev, "invalid %s property\n",
+				XGBE_DFE_ENA_PROPERTY);
+			return ret;
+		}
+	} else {
+		memcpy(phy_data->dfe_tap_ena, xgbe_phy_dfe_tap_ena,
+		       sizeof(phy_data->dfe_tap_ena));
+	}
+
+	/* Initialize supported features */
+	pdata->phy.supported = SUPPORTED_Autoneg;
+	pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+	pdata->phy.supported |= SUPPORTED_Backplane;
+	pdata->phy.supported |= SUPPORTED_10000baseKR_Full;
+	switch (phy_data->speed_set) {
+	case XGBE_SPEEDSET_1000_10000:
+		pdata->phy.supported |= SUPPORTED_1000baseKX_Full;
+		break;
+	case XGBE_SPEEDSET_2500_10000:
+		pdata->phy.supported |= SUPPORTED_2500baseX_Full;
+		break;
+	}
+
+	if (pdata->fec_ability & MDIO_PMA_10GBR_FECABLE_ABLE)
+		pdata->phy.supported |= SUPPORTED_10000baseR_FEC;
+
+	pdata->phy_data = phy_data;
+
+	return 0;
+}
+
+void xgbe_init_function_ptrs_phy_v1(struct xgbe_phy_if *phy_if)
+{
+	struct xgbe_phy_impl_if *phy_impl = &phy_if->phy_impl;
+
+	phy_impl->init			= xgbe_phy_init;
+	phy_impl->exit			= xgbe_phy_exit;
+
+	phy_impl->reset			= xgbe_phy_reset;
+	phy_impl->start			= xgbe_phy_start;
+	phy_impl->stop			= xgbe_phy_stop;
+
+	phy_impl->link_status		= xgbe_phy_link_status;
+
+	phy_impl->valid_speed		= xgbe_phy_valid_speed;
+
+	phy_impl->use_mode		= xgbe_phy_use_mode;
+	phy_impl->set_mode		= xgbe_phy_set_mode;
+	phy_impl->get_mode		= xgbe_phy_get_mode;
+	phy_impl->switch_mode		= xgbe_phy_switch_mode;
+	phy_impl->cur_mode		= xgbe_phy_cur_mode;
+
+	phy_impl->an_mode		= xgbe_phy_an_mode;
+
+	phy_impl->an_config		= xgbe_phy_an_config;
+
+	phy_impl->an_advertising	= xgbe_phy_an_advertising;
+
+	phy_impl->an_outcome		= xgbe_phy_an_outcome;
+
+	phy_impl->kr_training_pre	= xgbe_phy_kr_training_pre;
+	phy_impl->kr_training_post	= xgbe_phy_kr_training_post;
+}
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
new file mode 100644
index 000000000000..9d8c953083b4
--- /dev/null
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-phy-v2.c
@@ -0,0 +1,3084 @@
+/*
+ * AMD 10Gb Ethernet driver
+ *
+ * This file is available to you under your choice of the following two
+ * licenses:
+ *
+ * License 1: GPLv2
+ *
+ * Copyright (c) 2016 Advanced Micro Devices, Inc.
+ *
+ * This file is free software; you may copy, redistribute and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * License 2: Modified BSD
+ *
+ * Copyright (c) 2016 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Advanced Micro Devices, Inc. nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kmod.h>
+#include <linux/mdio.h>
+#include <linux/phy.h>
+
+#include "xgbe.h"
+#include "xgbe-common.h"
+
+#define XGBE_PHY_PORT_SPEED_100		BIT(0)
+#define XGBE_PHY_PORT_SPEED_1000	BIT(1)
+#define XGBE_PHY_PORT_SPEED_2500	BIT(2)
+#define XGBE_PHY_PORT_SPEED_10000	BIT(3)
+
+#define XGBE_MUTEX_RELEASE		0x80000000
+
+#define XGBE_SFP_DIRECT			7
+
+/* I2C target addresses */
+#define XGBE_SFP_SERIAL_ID_ADDRESS	0x50
+#define XGBE_SFP_DIAG_INFO_ADDRESS	0x51
+#define XGBE_SFP_PHY_ADDRESS		0x56
+#define XGBE_GPIO_ADDRESS_PCA9555	0x20
+
+/* SFP sideband signal indicators */
+#define XGBE_GPIO_NO_TX_FAULT		BIT(0)
+#define XGBE_GPIO_NO_RATE_SELECT	BIT(1)
+#define XGBE_GPIO_NO_MOD_ABSENT		BIT(2)
+#define XGBE_GPIO_NO_RX_LOS		BIT(3)
+
+/* Rate-change complete wait/retry count */
+#define XGBE_RATECHANGE_COUNT		500
+
+enum xgbe_port_mode {
+	XGBE_PORT_MODE_RSVD = 0,
+	XGBE_PORT_MODE_BACKPLANE,
+	XGBE_PORT_MODE_BACKPLANE_2500,
+	XGBE_PORT_MODE_1000BASE_T,
+	XGBE_PORT_MODE_1000BASE_X,
+	XGBE_PORT_MODE_NBASE_T,
+	XGBE_PORT_MODE_10GBASE_T,
+	XGBE_PORT_MODE_10GBASE_R,
+	XGBE_PORT_MODE_SFP,
+	XGBE_PORT_MODE_MAX,
+};
+
+enum xgbe_conn_type {
+	XGBE_CONN_TYPE_NONE = 0,
+	XGBE_CONN_TYPE_SFP,
+	XGBE_CONN_TYPE_MDIO,
+	XGBE_CONN_TYPE_RSVD1,
+	XGBE_CONN_TYPE_BACKPLANE,
+	XGBE_CONN_TYPE_MAX,
+};
+
+/* SFP/SFP+ related definitions */
+enum xgbe_sfp_comm {
+	XGBE_SFP_COMM_DIRECT = 0,
+	XGBE_SFP_COMM_PCA9545,
+};
+
+enum xgbe_sfp_cable {
+	XGBE_SFP_CABLE_UNKNOWN = 0,
+	XGBE_SFP_CABLE_ACTIVE,
+	XGBE_SFP_CABLE_PASSIVE,
+};
+
+enum xgbe_sfp_base {
+	XGBE_SFP_BASE_UNKNOWN = 0,
+	XGBE_SFP_BASE_1000_T,
+	XGBE_SFP_BASE_1000_SX,
+	XGBE_SFP_BASE_1000_LX,
+	XGBE_SFP_BASE_1000_CX,
+	XGBE_SFP_BASE_10000_SR,
+	XGBE_SFP_BASE_10000_LR,
+	XGBE_SFP_BASE_10000_LRM,
+	XGBE_SFP_BASE_10000_ER,
+	XGBE_SFP_BASE_10000_CR,
+};
+
+enum xgbe_sfp_speed {
+	XGBE_SFP_SPEED_UNKNOWN = 0,
+	XGBE_SFP_SPEED_100_1000,
+	XGBE_SFP_SPEED_1000,
+	XGBE_SFP_SPEED_10000,
+};
+
+/* SFP Serial ID Base ID values relative to an offset of 0 */
+#define XGBE_SFP_BASE_ID			0
+#define XGBE_SFP_ID_SFP				0x03
+
+#define XGBE_SFP_BASE_EXT_ID			1
+#define XGBE_SFP_EXT_ID_SFP			0x04
+
+#define XGBE_SFP_BASE_10GBE_CC			3
+#define XGBE_SFP_BASE_10GBE_CC_SR		BIT(4)
+#define XGBE_SFP_BASE_10GBE_CC_LR		BIT(5)
+#define XGBE_SFP_BASE_10GBE_CC_LRM		BIT(6)
+#define XGBE_SFP_BASE_10GBE_CC_ER		BIT(7)
+
+#define XGBE_SFP_BASE_1GBE_CC			6
+#define XGBE_SFP_BASE_1GBE_CC_SX		BIT(0)
+#define XGBE_SFP_BASE_1GBE_CC_LX		BIT(1)
+#define XGBE_SFP_BASE_1GBE_CC_CX		BIT(2)
+#define XGBE_SFP_BASE_1GBE_CC_T			BIT(3)
+
+#define XGBE_SFP_BASE_CABLE			8
+#define XGBE_SFP_BASE_CABLE_PASSIVE		BIT(2)
+#define XGBE_SFP_BASE_CABLE_ACTIVE		BIT(3)
+
+#define XGBE_SFP_BASE_BR			12
+#define XGBE_SFP_BASE_BR_1GBE_MIN		0x0a
+#define XGBE_SFP_BASE_BR_1GBE_MAX		0x0d
+#define XGBE_SFP_BASE_BR_10GBE_MIN		0x64
+#define XGBE_SFP_BASE_BR_10GBE_MAX		0x68
+
+#define XGBE_SFP_BASE_CU_CABLE_LEN		18
+
+#define XGBE_SFP_BASE_VENDOR_NAME		20
+#define XGBE_SFP_BASE_VENDOR_NAME_LEN		16
+#define XGBE_SFP_BASE_VENDOR_PN			40
+#define XGBE_SFP_BASE_VENDOR_PN_LEN		16
+#define XGBE_SFP_BASE_VENDOR_REV		56
+#define XGBE_SFP_BASE_VENDOR_REV_LEN		4
+
+#define XGBE_SFP_BASE_CC			63
+
+/* SFP Serial ID Extended ID values relative to an offset of 64 */
+#define XGBE_SFP_BASE_VENDOR_SN			4
+#define XGBE_SFP_BASE_VENDOR_SN_LEN		16
+
+#define XGBE_SFP_EXTD_DIAG			28
+#define XGBE_SFP_EXTD_DIAG_ADDR_CHANGE		BIT(2)
+
+#define XGBE_SFP_EXTD_SFF_8472			30
+
+#define XGBE_SFP_EXTD_CC			31
+
+struct xgbe_sfp_eeprom {
+	u8 base[64];
+	u8 extd[32];
+	u8 vendor[32];
+};
+
+#define XGBE_BEL_FUSE_VENDOR	"BEL-FUSE        "
+#define XGBE_BEL_FUSE_PARTNO	"1GBT-SFP06      "
+
+struct xgbe_sfp_ascii {
+	union {
+		char vendor[XGBE_SFP_BASE_VENDOR_NAME_LEN + 1];
+		char partno[XGBE_SFP_BASE_VENDOR_PN_LEN + 1];
+		char rev[XGBE_SFP_BASE_VENDOR_REV_LEN + 1];
+		char serno[XGBE_SFP_BASE_VENDOR_SN_LEN + 1];
+	} u;
+};
+
+/* MDIO PHY reset types */
+enum xgbe_mdio_reset {
+	XGBE_MDIO_RESET_NONE = 0,
+	XGBE_MDIO_RESET_I2C_GPIO,
+	XGBE_MDIO_RESET_INT_GPIO,
+	XGBE_MDIO_RESET_MAX,
+};
+
+/* Re-driver related definitions */
+enum xgbe_phy_redrv_if {
+	XGBE_PHY_REDRV_IF_MDIO = 0,
+	XGBE_PHY_REDRV_IF_I2C,
+	XGBE_PHY_REDRV_IF_MAX,
+};
+
+enum xgbe_phy_redrv_model {
+	XGBE_PHY_REDRV_MODEL_4223 = 0,
+	XGBE_PHY_REDRV_MODEL_4227,
+	XGBE_PHY_REDRV_MODEL_MAX,
+};
+
+enum xgbe_phy_redrv_mode {
+	XGBE_PHY_REDRV_MODE_CX = 5,
+	XGBE_PHY_REDRV_MODE_SR = 9,
+};
+
+#define XGBE_PHY_REDRV_MODE_REG	0x12b0
+
+/* PHY related configuration information */
+struct xgbe_phy_data {
+	enum xgbe_port_mode port_mode;
+
+	unsigned int port_id;
+
+	unsigned int port_speeds;
+
+	enum xgbe_conn_type conn_type;
+
+	enum xgbe_mode cur_mode;
+	enum xgbe_mode start_mode;
+
+	unsigned int rrc_count;
+
+	unsigned int mdio_addr;
+
+	unsigned int comm_owned;
+
+	/* SFP Support */
+	enum xgbe_sfp_comm sfp_comm;
+	unsigned int sfp_mux_address;
+	unsigned int sfp_mux_channel;
+
+	unsigned int sfp_gpio_address;
+	unsigned int sfp_gpio_mask;
+	unsigned int sfp_gpio_rx_los;
+	unsigned int sfp_gpio_tx_fault;
+	unsigned int sfp_gpio_mod_absent;
+	unsigned int sfp_gpio_rate_select;
+
+	unsigned int sfp_rx_los;
+	unsigned int sfp_tx_fault;
+	unsigned int sfp_mod_absent;
+	unsigned int sfp_diags;
+	unsigned int sfp_changed;
+	unsigned int sfp_phy_avail;
+	unsigned int sfp_cable_len;
+	enum xgbe_sfp_base sfp_base;
+	enum xgbe_sfp_cable sfp_cable;
+	enum xgbe_sfp_speed sfp_speed;
+	struct xgbe_sfp_eeprom sfp_eeprom;
+
+	/* External PHY support */
+	enum xgbe_mdio_mode phydev_mode;
+	struct mii_bus *mii;
+	struct phy_device *phydev;
+	enum xgbe_mdio_reset mdio_reset;
+	unsigned int mdio_reset_addr;
+	unsigned int mdio_reset_gpio;
+
+	/* Re-driver support */
+	unsigned int redrv;
+	unsigned int redrv_if;
+	unsigned int redrv_addr;
+	unsigned int redrv_lane;
+	unsigned int redrv_model;
+};
+
+/* I2C, MDIO and GPIO lines are muxed, so only one device at a time */
+static DEFINE_MUTEX(xgbe_phy_comm_lock);
+
+static enum xgbe_an_mode xgbe_phy_an_mode(struct xgbe_prv_data *pdata);
+
+static int xgbe_phy_i2c_xfer(struct xgbe_prv_data *pdata,
+			     struct xgbe_i2c_op *i2c_op)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+	/* Be sure we own the bus */
+	if (WARN_ON(!phy_data->comm_owned))
+		return -EIO;
+
+	return pdata->i2c_if.i2c_xfer(pdata, i2c_op);
+}
+
+static int xgbe_phy_redrv_write(struct xgbe_prv_data *pdata, unsigned int reg,
+				unsigned int val)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	struct xgbe_i2c_op i2c_op;
+	__be16 *redrv_val;
+	u8 redrv_data[5], csum;
+	unsigned int i, retry;
+	int ret;
+
+	/* High byte of register contains read/write indicator */
+	redrv_data[0] = ((reg >> 8) & 0xff) << 1;
+	redrv_data[1] = reg & 0xff;
+	redrv_val = (__be16 *)&redrv_data[2];
+	*redrv_val = cpu_to_be16(val);
+
+	/* Calculate 1 byte checksum */
+	csum = 0;
+	for (i = 0; i < 4; i++) {
+		csum += redrv_data[i];
+		if (redrv_data[i] > csum)
+			csum++;
+	}
+	redrv_data[4] = ~csum;
+
+	retry = 1;
+again1:
+	i2c_op.cmd = XGBE_I2C_CMD_WRITE;
+	i2c_op.target = phy_data->redrv_addr;
+	i2c_op.len = sizeof(redrv_data);
+	i2c_op.buf = redrv_data;
+	ret = xgbe_phy_i2c_xfer(pdata, &i2c_op);
+	if (ret) {
+		if ((ret == -EAGAIN) && retry--)
+			goto again1;
+
+		return ret;
+	}
+
+	retry = 1;
+again2:
+	i2c_op.cmd = XGBE_I2C_CMD_READ;
+	i2c_op.target = phy_data->redrv_addr;
+	i2c_op.len = 1;
+	i2c_op.buf = redrv_data;
+	ret = xgbe_phy_i2c_xfer(pdata, &i2c_op);
+	if (ret) {
+		if ((ret == -EAGAIN) && retry--)
+			goto again2;
+
+		return ret;
+	}
+
+	if (redrv_data[0] != 0xff) {
+		netif_dbg(pdata, drv, pdata->netdev,
+			  "Redriver write checksum error\n");
+		ret = -EIO;
+	}
+
+	return ret;
+}
+
+static int xgbe_phy_i2c_write(struct xgbe_prv_data *pdata, unsigned int target,
+			      void *val, unsigned int val_len)
+{
+	struct xgbe_i2c_op i2c_op;
+	int retry, ret;
+
+	retry = 1;
+again:
+	/* Write the specfied register */
+	i2c_op.cmd = XGBE_I2C_CMD_WRITE;
+	i2c_op.target = target;
+	i2c_op.len = val_len;
+	i2c_op.buf = val;
+	ret = xgbe_phy_i2c_xfer(pdata, &i2c_op);
+	if ((ret == -EAGAIN) && retry--)
+		goto again;
+
+	return ret;
+}
+
+static int xgbe_phy_i2c_read(struct xgbe_prv_data *pdata, unsigned int target,
+			     void *reg, unsigned int reg_len,
+			     void *val, unsigned int val_len)
+{
+	struct xgbe_i2c_op i2c_op;
+	int retry, ret;
+
+	retry = 1;
+again1:
+	/* Set the specified register to read */
+	i2c_op.cmd = XGBE_I2C_CMD_WRITE;
+	i2c_op.target = target;
+	i2c_op.len = reg_len;
+	i2c_op.buf = reg;
+	ret = xgbe_phy_i2c_xfer(pdata, &i2c_op);
+	if (ret) {
+		if ((ret == -EAGAIN) && retry--)
+			goto again1;
+
+		return ret;
+	}
+
+	retry = 1;
+again2:
+	/* Read the specfied register */
+	i2c_op.cmd = XGBE_I2C_CMD_READ;
+	i2c_op.target = target;
+	i2c_op.len = val_len;
+	i2c_op.buf = val;
+	ret = xgbe_phy_i2c_xfer(pdata, &i2c_op);
+	if ((ret == -EAGAIN) && retry--)
+		goto again2;
+
+	return ret;
+}
+
+static int xgbe_phy_sfp_put_mux(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	struct xgbe_i2c_op i2c_op;
+	u8 mux_channel;
+
+	if (phy_data->sfp_comm == XGBE_SFP_COMM_DIRECT)
+		return 0;
+
+	/* Select no mux channels */
+	mux_channel = 0;
+	i2c_op.cmd = XGBE_I2C_CMD_WRITE;
+	i2c_op.target = phy_data->sfp_mux_address;
+	i2c_op.len = sizeof(mux_channel);
+	i2c_op.buf = &mux_channel;
+
+	return xgbe_phy_i2c_xfer(pdata, &i2c_op);
+}
+
+static int xgbe_phy_sfp_get_mux(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	struct xgbe_i2c_op i2c_op;
+	u8 mux_channel;
+
+	if (phy_data->sfp_comm == XGBE_SFP_COMM_DIRECT)
+		return 0;
+
+	/* Select desired mux channel */
+	mux_channel = 1 << phy_data->sfp_mux_channel;
+	i2c_op.cmd = XGBE_I2C_CMD_WRITE;
+	i2c_op.target = phy_data->sfp_mux_address;
+	i2c_op.len = sizeof(mux_channel);
+	i2c_op.buf = &mux_channel;
+
+	return xgbe_phy_i2c_xfer(pdata, &i2c_op);
+}
+
+static void xgbe_phy_put_comm_ownership(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+	phy_data->comm_owned = 0;
+
+	mutex_unlock(&xgbe_phy_comm_lock);
+}
+
+static int xgbe_phy_get_comm_ownership(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	unsigned long timeout;
+	unsigned int mutex_id;
+
+	if (phy_data->comm_owned)
+		return 0;
+
+	/* The I2C and MDIO/GPIO bus is multiplexed between multiple devices,
+	 * the driver needs to take the software mutex and then the hardware
+	 * mutexes before being able to use the busses.
+	 */
+	mutex_lock(&xgbe_phy_comm_lock);
+
+	/* Clear the mutexes */
+	XP_IOWRITE(pdata, XP_I2C_MUTEX, XGBE_MUTEX_RELEASE);
+	XP_IOWRITE(pdata, XP_MDIO_MUTEX, XGBE_MUTEX_RELEASE);
+
+	/* Mutex formats are the same for I2C and MDIO/GPIO */
+	mutex_id = 0;
+	XP_SET_BITS(mutex_id, XP_I2C_MUTEX, ID, phy_data->port_id);
+	XP_SET_BITS(mutex_id, XP_I2C_MUTEX, ACTIVE, 1);
+
+	timeout = jiffies + (5 * HZ);
+	while (time_before(jiffies, timeout)) {
+		/* Must be all zeroes in order to obtain the mutex */
+		if (XP_IOREAD(pdata, XP_I2C_MUTEX) ||
+		    XP_IOREAD(pdata, XP_MDIO_MUTEX)) {
+			usleep_range(100, 200);
+			continue;
+		}
+
+		/* Obtain the mutex */
+		XP_IOWRITE(pdata, XP_I2C_MUTEX, mutex_id);
+		XP_IOWRITE(pdata, XP_MDIO_MUTEX, mutex_id);
+
+		phy_data->comm_owned = 1;
+		return 0;
+	}
+
+	mutex_unlock(&xgbe_phy_comm_lock);
+
+	netdev_err(pdata->netdev, "unable to obtain hardware mutexes\n");
+
+	return -ETIMEDOUT;
+}
+
+static int xgbe_phy_mdio_mii_write(struct xgbe_prv_data *pdata, int addr,
+				   int reg, u16 val)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+	if (reg & MII_ADDR_C45) {
+		if (phy_data->phydev_mode != XGBE_MDIO_MODE_CL45)
+			return -ENOTSUPP;
+	} else {
+		if (phy_data->phydev_mode != XGBE_MDIO_MODE_CL22)
+			return -ENOTSUPP;
+	}
+
+	return pdata->hw_if.write_ext_mii_regs(pdata, addr, reg, val);
+}
+
+static int xgbe_phy_i2c_mii_write(struct xgbe_prv_data *pdata, int reg, u16 val)
+{
+	__be16 *mii_val;
+	u8 mii_data[3];
+	int ret;
+
+	ret = xgbe_phy_sfp_get_mux(pdata);
+	if (ret)
+		return ret;
+
+	mii_data[0] = reg & 0xff;
+	mii_val = (__be16 *)&mii_data[1];
+	*mii_val = cpu_to_be16(val);
+
+	ret = xgbe_phy_i2c_write(pdata, XGBE_SFP_PHY_ADDRESS,
+				 mii_data, sizeof(mii_data));
+
+	xgbe_phy_sfp_put_mux(pdata);
+
+	return ret;
+}
+
+static int xgbe_phy_mii_write(struct mii_bus *mii, int addr, int reg, u16 val)
+{
+	struct xgbe_prv_data *pdata = mii->priv;
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	int ret;
+
+	ret = xgbe_phy_get_comm_ownership(pdata);
+	if (ret)
+		return ret;
+
+	if (phy_data->conn_type == XGBE_CONN_TYPE_SFP)
+		ret = xgbe_phy_i2c_mii_write(pdata, reg, val);
+	else if (phy_data->conn_type & XGBE_CONN_TYPE_MDIO)
+		ret = xgbe_phy_mdio_mii_write(pdata, addr, reg, val);
+	else
+		ret = -ENOTSUPP;
+
+	xgbe_phy_put_comm_ownership(pdata);
+
+	return ret;
+}
+
+static int xgbe_phy_mdio_mii_read(struct xgbe_prv_data *pdata, int addr,
+				  int reg)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+	if (reg & MII_ADDR_C45) {
+		if (phy_data->phydev_mode != XGBE_MDIO_MODE_CL45)
+			return -ENOTSUPP;
+	} else {
+		if (phy_data->phydev_mode != XGBE_MDIO_MODE_CL22)
+			return -ENOTSUPP;
+	}
+
+	return pdata->hw_if.read_ext_mii_regs(pdata, addr, reg);
+}
+
+static int xgbe_phy_i2c_mii_read(struct xgbe_prv_data *pdata, int reg)
+{
+	__be16 mii_val;
+	u8 mii_reg;
+	int ret;
+
+	ret = xgbe_phy_sfp_get_mux(pdata);
+	if (ret)
+		return ret;
+
+	mii_reg = reg;
+	ret = xgbe_phy_i2c_read(pdata, XGBE_SFP_PHY_ADDRESS,
+				&mii_reg, sizeof(mii_reg),
+				&mii_val, sizeof(mii_val));
+	if (!ret)
+		ret = be16_to_cpu(mii_val);
+
+	xgbe_phy_sfp_put_mux(pdata);
+
+	return ret;
+}
+
+static int xgbe_phy_mii_read(struct mii_bus *mii, int addr, int reg)
+{
+	struct xgbe_prv_data *pdata = mii->priv;
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	int ret;
+
+	ret = xgbe_phy_get_comm_ownership(pdata);
+	if (ret)
+		return ret;
+
+	if (phy_data->conn_type == XGBE_CONN_TYPE_SFP)
+		ret = xgbe_phy_i2c_mii_read(pdata, reg);
+	else if (phy_data->conn_type & XGBE_CONN_TYPE_MDIO)
+		ret = xgbe_phy_mdio_mii_read(pdata, addr, reg);
+	else
+		ret = -ENOTSUPP;
+
+	xgbe_phy_put_comm_ownership(pdata);
+
+	return ret;
+}
+
+static void xgbe_phy_sfp_phy_settings(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+	if (phy_data->sfp_mod_absent) {
+		pdata->phy.speed = SPEED_UNKNOWN;
+		pdata->phy.duplex = DUPLEX_UNKNOWN;
+		pdata->phy.autoneg = AUTONEG_ENABLE;
+		pdata->phy.advertising = pdata->phy.supported;
+	}
+
+	pdata->phy.advertising &= ~ADVERTISED_Autoneg;
+	pdata->phy.advertising &= ~ADVERTISED_TP;
+	pdata->phy.advertising &= ~ADVERTISED_FIBRE;
+	pdata->phy.advertising &= ~ADVERTISED_100baseT_Full;
+	pdata->phy.advertising &= ~ADVERTISED_1000baseT_Full;
+	pdata->phy.advertising &= ~ADVERTISED_10000baseT_Full;
+	pdata->phy.advertising &= ~ADVERTISED_10000baseR_FEC;
+
+	switch (phy_data->sfp_base) {
+	case XGBE_SFP_BASE_1000_T:
+	case XGBE_SFP_BASE_1000_SX:
+	case XGBE_SFP_BASE_1000_LX:
+	case XGBE_SFP_BASE_1000_CX:
+		pdata->phy.speed = SPEED_UNKNOWN;
+		pdata->phy.duplex = DUPLEX_UNKNOWN;
+		pdata->phy.autoneg = AUTONEG_ENABLE;
+		pdata->phy.advertising |= ADVERTISED_Autoneg;
+		break;
+	case XGBE_SFP_BASE_10000_SR:
+	case XGBE_SFP_BASE_10000_LR:
+	case XGBE_SFP_BASE_10000_LRM:
+	case XGBE_SFP_BASE_10000_ER:
+	case XGBE_SFP_BASE_10000_CR:
+	default:
+		pdata->phy.speed = SPEED_10000;
+		pdata->phy.duplex = DUPLEX_FULL;
+		pdata->phy.autoneg = AUTONEG_DISABLE;
+		break;
+	}
+
+	switch (phy_data->sfp_base) {
+	case XGBE_SFP_BASE_1000_T:
+	case XGBE_SFP_BASE_1000_CX:
+	case XGBE_SFP_BASE_10000_CR:
+		pdata->phy.advertising |= ADVERTISED_TP;
+		break;
+	default:
+		pdata->phy.advertising |= ADVERTISED_FIBRE;
+	}
+
+	switch (phy_data->sfp_speed) {
+	case XGBE_SFP_SPEED_100_1000:
+		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100)
+			pdata->phy.advertising |= ADVERTISED_100baseT_Full;
+		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000)
+			pdata->phy.advertising |= ADVERTISED_1000baseT_Full;
+		break;
+	case XGBE_SFP_SPEED_1000:
+		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000)
+			pdata->phy.advertising |= ADVERTISED_1000baseT_Full;
+		break;
+	case XGBE_SFP_SPEED_10000:
+		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000)
+			pdata->phy.advertising |= ADVERTISED_10000baseT_Full;
+		break;
+	default:
+		/* Choose the fastest supported speed */
+		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000)
+			pdata->phy.advertising |= ADVERTISED_10000baseT_Full;
+		else if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000)
+			pdata->phy.advertising |= ADVERTISED_1000baseT_Full;
+		else if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100)
+			pdata->phy.advertising |= ADVERTISED_100baseT_Full;
+	}
+}
+
+static bool xgbe_phy_sfp_bit_rate(struct xgbe_sfp_eeprom *sfp_eeprom,
+				  enum xgbe_sfp_speed sfp_speed)
+{
+	u8 *sfp_base, min, max;
+
+	sfp_base = sfp_eeprom->base;
+
+	switch (sfp_speed) {
+	case XGBE_SFP_SPEED_1000:
+		min = XGBE_SFP_BASE_BR_1GBE_MIN;
+		max = XGBE_SFP_BASE_BR_1GBE_MAX;
+		break;
+	case XGBE_SFP_SPEED_10000:
+		min = XGBE_SFP_BASE_BR_10GBE_MIN;
+		max = XGBE_SFP_BASE_BR_10GBE_MAX;
+		break;
+	default:
+		return false;
+	}
+
+	return ((sfp_base[XGBE_SFP_BASE_BR] >= min) &&
+		(sfp_base[XGBE_SFP_BASE_BR] <= max));
+}
+
+static void xgbe_phy_free_phy_device(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+	if (phy_data->phydev) {
+		phy_detach(phy_data->phydev);
+		phy_device_remove(phy_data->phydev);
+		phy_device_free(phy_data->phydev);
+		phy_data->phydev = NULL;
+	}
+}
+
+static bool xgbe_phy_finisar_phy_quirks(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	unsigned int phy_id = phy_data->phydev->phy_id;
+
+	if ((phy_id & 0xfffffff0) != 0x01ff0cc0)
+		return false;
+
+	/* Enable Base-T AN */
+	phy_write(phy_data->phydev, 0x16, 0x0001);
+	phy_write(phy_data->phydev, 0x00, 0x9140);
+	phy_write(phy_data->phydev, 0x16, 0x0000);
+
+	/* Enable SGMII at 100Base-T/1000Base-T Full Duplex */
+	phy_write(phy_data->phydev, 0x1b, 0x9084);
+	phy_write(phy_data->phydev, 0x09, 0x0e00);
+	phy_write(phy_data->phydev, 0x00, 0x8140);
+	phy_write(phy_data->phydev, 0x04, 0x0d01);
+	phy_write(phy_data->phydev, 0x00, 0x9140);
+
+	phy_data->phydev->supported = PHY_GBIT_FEATURES;
+	phy_data->phydev->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+	phy_data->phydev->advertising = phy_data->phydev->supported;
+
+	netif_dbg(pdata, drv, pdata->netdev,
+		  "Finisar PHY quirk in place\n");
+
+	return true;
+}
+
+static void xgbe_phy_external_phy_quirks(struct xgbe_prv_data *pdata)
+{
+	if (xgbe_phy_finisar_phy_quirks(pdata))
+		return;
+}
+
+static int xgbe_phy_find_phy_device(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	struct phy_device *phydev;
+	int ret;
+
+	/* If we already have a PHY, just return */
+	if (phy_data->phydev)
+		return 0;
+
+	/* Check for the use of an external PHY */
+	if (phy_data->phydev_mode == XGBE_MDIO_MODE_NONE)
+		return 0;
+
+	/* For SFP, only use an external PHY if available */
+	if ((phy_data->port_mode == XGBE_PORT_MODE_SFP) &&
+	    !phy_data->sfp_phy_avail)
+		return 0;
+
+	/* Create and connect to the PHY device */
+	phydev = get_phy_device(phy_data->mii, phy_data->mdio_addr,
+				(phy_data->phydev_mode == XGBE_MDIO_MODE_CL45));
+	if (IS_ERR(phydev)) {
+		netdev_err(pdata->netdev, "get_phy_device failed\n");
+		return -ENODEV;
+	}
+	netif_dbg(pdata, drv, pdata->netdev, "external PHY id is %#010x\n",
+		  phydev->phy_id);
+
+	/*TODO: If c45, add request_module based on one of the MMD ids? */
+
+	ret = phy_device_register(phydev);
+	if (ret) {
+		netdev_err(pdata->netdev, "phy_device_register failed\n");
+		phy_device_free(phydev);
+		return ret;
+	}
+
+	ret = phy_attach_direct(pdata->netdev, phydev, phydev->dev_flags,
+				PHY_INTERFACE_MODE_SGMII);
+	if (ret) {
+		netdev_err(pdata->netdev, "phy_attach_direct failed\n");
+		phy_device_remove(phydev);
+		phy_device_free(phydev);
+		return ret;
+	}
+	phy_data->phydev = phydev;
+
+	xgbe_phy_external_phy_quirks(pdata);
+	phydev->advertising &= pdata->phy.advertising;
+
+	phy_start_aneg(phy_data->phydev);
+
+	return 0;
+}
+
+static void xgbe_phy_sfp_external_phy(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	int ret;
+
+	if (!phy_data->sfp_changed)
+		return;
+
+	phy_data->sfp_phy_avail = 0;
+
+	if (phy_data->sfp_base != XGBE_SFP_BASE_1000_T)
+		return;
+
+	/* Check access to the PHY by reading CTRL1 */
+	ret = xgbe_phy_i2c_mii_read(pdata, MII_BMCR);
+	if (ret < 0)
+		return;
+
+	/* Successfully accessed the PHY */
+	phy_data->sfp_phy_avail = 1;
+}
+
+static bool xgbe_phy_belfuse_parse_quirks(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	struct xgbe_sfp_eeprom *sfp_eeprom = &phy_data->sfp_eeprom;
+
+	if (memcmp(&sfp_eeprom->base[XGBE_SFP_BASE_VENDOR_NAME],
+		   XGBE_BEL_FUSE_VENDOR, XGBE_SFP_BASE_VENDOR_NAME_LEN))
+		return false;
+
+	if (!memcmp(&sfp_eeprom->base[XGBE_SFP_BASE_VENDOR_PN],
+		    XGBE_BEL_FUSE_PARTNO, XGBE_SFP_BASE_VENDOR_PN_LEN)) {
+		phy_data->sfp_base = XGBE_SFP_BASE_1000_SX;
+		phy_data->sfp_cable = XGBE_SFP_CABLE_ACTIVE;
+		phy_data->sfp_speed = XGBE_SFP_SPEED_1000;
+		if (phy_data->sfp_changed)
+			netif_dbg(pdata, drv, pdata->netdev,
+				  "Bel-Fuse SFP quirk in place\n");
+		return true;
+	}
+
+	return false;
+}
+
+static bool xgbe_phy_sfp_parse_quirks(struct xgbe_prv_data *pdata)
+{
+	if (xgbe_phy_belfuse_parse_quirks(pdata))
+		return true;
+
+	return false;
+}
+
+static void xgbe_phy_sfp_parse_eeprom(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	struct xgbe_sfp_eeprom *sfp_eeprom = &phy_data->sfp_eeprom;
+	u8 *sfp_base;
+
+	sfp_base = sfp_eeprom->base;
+
+	if (sfp_base[XGBE_SFP_BASE_ID] != XGBE_SFP_ID_SFP)
+		return;
+
+	if (sfp_base[XGBE_SFP_BASE_EXT_ID] != XGBE_SFP_EXT_ID_SFP)
+		return;
+
+	if (xgbe_phy_sfp_parse_quirks(pdata))
+		return;
+
+	/* Assume ACTIVE cable unless told it is PASSIVE */
+	if (sfp_base[XGBE_SFP_BASE_CABLE] & XGBE_SFP_BASE_CABLE_PASSIVE) {
+		phy_data->sfp_cable = XGBE_SFP_CABLE_PASSIVE;
+		phy_data->sfp_cable_len = sfp_base[XGBE_SFP_BASE_CU_CABLE_LEN];
+	} else {
+		phy_data->sfp_cable = XGBE_SFP_CABLE_ACTIVE;
+	}
+
+	/* Determine the type of SFP */
+	if (sfp_base[XGBE_SFP_BASE_10GBE_CC] & XGBE_SFP_BASE_10GBE_CC_SR)
+		phy_data->sfp_base = XGBE_SFP_BASE_10000_SR;
+	else if (sfp_base[XGBE_SFP_BASE_10GBE_CC] & XGBE_SFP_BASE_10GBE_CC_LR)
+		phy_data->sfp_base = XGBE_SFP_BASE_10000_LR;
+	else if (sfp_base[XGBE_SFP_BASE_10GBE_CC] & XGBE_SFP_BASE_10GBE_CC_LRM)
+		phy_data->sfp_base = XGBE_SFP_BASE_10000_LRM;
+	else if (sfp_base[XGBE_SFP_BASE_10GBE_CC] & XGBE_SFP_BASE_10GBE_CC_ER)
+		phy_data->sfp_base = XGBE_SFP_BASE_10000_ER;
+	else if (sfp_base[XGBE_SFP_BASE_1GBE_CC] & XGBE_SFP_BASE_1GBE_CC_SX)
+		phy_data->sfp_base = XGBE_SFP_BASE_1000_SX;
+	else if (sfp_base[XGBE_SFP_BASE_1GBE_CC] & XGBE_SFP_BASE_1GBE_CC_LX)
+		phy_data->sfp_base = XGBE_SFP_BASE_1000_LX;
+	else if (sfp_base[XGBE_SFP_BASE_1GBE_CC] & XGBE_SFP_BASE_1GBE_CC_CX)
+		phy_data->sfp_base = XGBE_SFP_BASE_1000_CX;
+	else if (sfp_base[XGBE_SFP_BASE_1GBE_CC] & XGBE_SFP_BASE_1GBE_CC_T)
+		phy_data->sfp_base = XGBE_SFP_BASE_1000_T;
+	else if ((phy_data->sfp_cable == XGBE_SFP_CABLE_PASSIVE) &&
+		 xgbe_phy_sfp_bit_rate(sfp_eeprom, XGBE_SFP_SPEED_10000))
+		phy_data->sfp_base = XGBE_SFP_BASE_10000_CR;
+
+	switch (phy_data->sfp_base) {
+	case XGBE_SFP_BASE_1000_T:
+		phy_data->sfp_speed = XGBE_SFP_SPEED_100_1000;
+		break;
+	case XGBE_SFP_BASE_1000_SX:
+	case XGBE_SFP_BASE_1000_LX:
+	case XGBE_SFP_BASE_1000_CX:
+		phy_data->sfp_speed = XGBE_SFP_SPEED_1000;
+		break;
+	case XGBE_SFP_BASE_10000_SR:
+	case XGBE_SFP_BASE_10000_LR:
+	case XGBE_SFP_BASE_10000_LRM:
+	case XGBE_SFP_BASE_10000_ER:
+	case XGBE_SFP_BASE_10000_CR:
+		phy_data->sfp_speed = XGBE_SFP_SPEED_10000;
+		break;
+	default:
+		break;
+	}
+}
+
+static void xgbe_phy_sfp_eeprom_info(struct xgbe_prv_data *pdata,
+				     struct xgbe_sfp_eeprom *sfp_eeprom)
+{
+	struct xgbe_sfp_ascii sfp_ascii;
+	char *sfp_data = (char *)&sfp_ascii;
+
+	netif_dbg(pdata, drv, pdata->netdev, "SFP detected:\n");
+	memcpy(sfp_data, &sfp_eeprom->base[XGBE_SFP_BASE_VENDOR_NAME],
+	       XGBE_SFP_BASE_VENDOR_NAME_LEN);
+	sfp_data[XGBE_SFP_BASE_VENDOR_NAME_LEN] = '\0';
+	netif_dbg(pdata, drv, pdata->netdev, "  vendor:         %s\n",
+		  sfp_data);
+
+	memcpy(sfp_data, &sfp_eeprom->base[XGBE_SFP_BASE_VENDOR_PN],
+	       XGBE_SFP_BASE_VENDOR_PN_LEN);
+	sfp_data[XGBE_SFP_BASE_VENDOR_PN_LEN] = '\0';
+	netif_dbg(pdata, drv, pdata->netdev, "  part number:    %s\n",
+		  sfp_data);
+
+	memcpy(sfp_data, &sfp_eeprom->base[XGBE_SFP_BASE_VENDOR_REV],
+	       XGBE_SFP_BASE_VENDOR_REV_LEN);
+	sfp_data[XGBE_SFP_BASE_VENDOR_REV_LEN] = '\0';
+	netif_dbg(pdata, drv, pdata->netdev, "  revision level: %s\n",
+		  sfp_data);
+
+	memcpy(sfp_data, &sfp_eeprom->extd[XGBE_SFP_BASE_VENDOR_SN],
+	       XGBE_SFP_BASE_VENDOR_SN_LEN);
+	sfp_data[XGBE_SFP_BASE_VENDOR_SN_LEN] = '\0';
+	netif_dbg(pdata, drv, pdata->netdev, "  serial number:  %s\n",
+		  sfp_data);
+}
+
+static bool xgbe_phy_sfp_verify_eeprom(u8 cc_in, u8 *buf, unsigned int len)
+{
+	u8 cc;
+
+	for (cc = 0; len; buf++, len--)
+		cc += *buf;
+
+	return (cc == cc_in) ? true : false;
+}
+
+static int xgbe_phy_sfp_read_eeprom(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	struct xgbe_sfp_eeprom sfp_eeprom;
+	u8 eeprom_addr;
+	int ret;
+
+	ret = xgbe_phy_sfp_get_mux(pdata);
+	if (ret) {
+		netdev_err(pdata->netdev, "I2C error setting SFP MUX\n");
+		return ret;
+	}
+
+	/* Read the SFP serial ID eeprom */
+	eeprom_addr = 0;
+	ret = xgbe_phy_i2c_read(pdata, XGBE_SFP_SERIAL_ID_ADDRESS,
+				&eeprom_addr, sizeof(eeprom_addr),
+				&sfp_eeprom, sizeof(sfp_eeprom));
+	if (ret) {
+		netdev_err(pdata->netdev, "I2C error reading SFP EEPROM\n");
+		goto put;
+	}
+
+	/* Validate the contents read */
+	if (!xgbe_phy_sfp_verify_eeprom(sfp_eeprom.base[XGBE_SFP_BASE_CC],
+					sfp_eeprom.base,
+					sizeof(sfp_eeprom.base) - 1)) {
+		ret = -EINVAL;
+		goto put;
+	}
+
+	if (!xgbe_phy_sfp_verify_eeprom(sfp_eeprom.extd[XGBE_SFP_EXTD_CC],
+					sfp_eeprom.extd,
+					sizeof(sfp_eeprom.extd) - 1)) {
+		ret = -EINVAL;
+		goto put;
+	}
+
+	/* Check for an added or changed SFP */
+	if (memcmp(&phy_data->sfp_eeprom, &sfp_eeprom, sizeof(sfp_eeprom))) {
+		phy_data->sfp_changed = 1;
+
+		if (netif_msg_drv(pdata))
+			xgbe_phy_sfp_eeprom_info(pdata, &sfp_eeprom);
+
+		memcpy(&phy_data->sfp_eeprom, &sfp_eeprom, sizeof(sfp_eeprom));
+
+		if (sfp_eeprom.extd[XGBE_SFP_EXTD_SFF_8472]) {
+			u8 diag_type = sfp_eeprom.extd[XGBE_SFP_EXTD_DIAG];
+
+			if (!(diag_type & XGBE_SFP_EXTD_DIAG_ADDR_CHANGE))
+				phy_data->sfp_diags = 1;
+		}
+
+		xgbe_phy_free_phy_device(pdata);
+	} else {
+		phy_data->sfp_changed = 0;
+	}
+
+put:
+	xgbe_phy_sfp_put_mux(pdata);
+
+	return ret;
+}
+
+static void xgbe_phy_sfp_signals(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	unsigned int gpio_input;
+	u8 gpio_reg, gpio_ports[2];
+	int ret;
+
+	/* Read the input port registers */
+	gpio_reg = 0;
+	ret = xgbe_phy_i2c_read(pdata, phy_data->sfp_gpio_address,
+				&gpio_reg, sizeof(gpio_reg),
+				gpio_ports, sizeof(gpio_ports));
+	if (ret) {
+		netdev_err(pdata->netdev, "I2C error reading SFP GPIOs\n");
+		return;
+	}
+
+	gpio_input = (gpio_ports[1] << 8) | gpio_ports[0];
+
+	if (phy_data->sfp_gpio_mask & XGBE_GPIO_NO_MOD_ABSENT) {
+		/* No GPIO, just assume the module is present for now */
+		phy_data->sfp_mod_absent = 0;
+	} else {
+		if (!(gpio_input & (1 << phy_data->sfp_gpio_mod_absent)))
+			phy_data->sfp_mod_absent = 0;
+	}
+
+	if (!(phy_data->sfp_gpio_mask & XGBE_GPIO_NO_RX_LOS) &&
+	    (gpio_input & (1 << phy_data->sfp_gpio_rx_los)))
+		phy_data->sfp_rx_los = 1;
+
+	if (!(phy_data->sfp_gpio_mask & XGBE_GPIO_NO_TX_FAULT) &&
+	    (gpio_input & (1 << phy_data->sfp_gpio_tx_fault)))
+		phy_data->sfp_tx_fault = 1;
+}
+
+static void xgbe_phy_sfp_mod_absent(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+	xgbe_phy_free_phy_device(pdata);
+
+	phy_data->sfp_mod_absent = 1;
+	phy_data->sfp_phy_avail = 0;
+	memset(&phy_data->sfp_eeprom, 0, sizeof(phy_data->sfp_eeprom));
+}
+
+static void xgbe_phy_sfp_reset(struct xgbe_phy_data *phy_data)
+{
+	phy_data->sfp_rx_los = 0;
+	phy_data->sfp_tx_fault = 0;
+	phy_data->sfp_mod_absent = 1;
+	phy_data->sfp_diags = 0;
+	phy_data->sfp_base = XGBE_SFP_BASE_UNKNOWN;
+	phy_data->sfp_cable = XGBE_SFP_CABLE_UNKNOWN;
+	phy_data->sfp_speed = XGBE_SFP_SPEED_UNKNOWN;
+}
+
+static void xgbe_phy_sfp_detect(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	int ret;
+
+	/* Reset the SFP signals and info */
+	xgbe_phy_sfp_reset(phy_data);
+
+	ret = xgbe_phy_get_comm_ownership(pdata);
+	if (ret)
+		return;
+
+	/* Read the SFP signals and check for module presence */
+	xgbe_phy_sfp_signals(pdata);
+	if (phy_data->sfp_mod_absent) {
+		xgbe_phy_sfp_mod_absent(pdata);
+		goto put;
+	}
+
+	ret = xgbe_phy_sfp_read_eeprom(pdata);
+	if (ret) {
+		/* Treat any error as if there isn't an SFP plugged in */
+		xgbe_phy_sfp_reset(phy_data);
+		xgbe_phy_sfp_mod_absent(pdata);
+		goto put;
+	}
+
+	xgbe_phy_sfp_parse_eeprom(pdata);
+
+	xgbe_phy_sfp_external_phy(pdata);
+
+put:
+	xgbe_phy_sfp_phy_settings(pdata);
+
+	xgbe_phy_put_comm_ownership(pdata);
+}
+
+static void xgbe_phy_phydev_flowctrl(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	u16 lcl_adv = 0, rmt_adv = 0;
+	u8 fc;
+
+	pdata->phy.tx_pause = 0;
+	pdata->phy.rx_pause = 0;
+
+	if (!phy_data->phydev)
+		return;
+
+	if (phy_data->phydev->advertising & ADVERTISED_Pause)
+		lcl_adv |= ADVERTISE_PAUSE_CAP;
+	if (phy_data->phydev->advertising & ADVERTISED_Asym_Pause)
+		lcl_adv |= ADVERTISE_PAUSE_ASYM;
+
+	if (phy_data->phydev->pause) {
+		pdata->phy.lp_advertising |= ADVERTISED_Pause;
+		rmt_adv |= LPA_PAUSE_CAP;
+	}
+	if (phy_data->phydev->asym_pause) {
+		pdata->phy.lp_advertising |= ADVERTISED_Asym_Pause;
+		rmt_adv |= LPA_PAUSE_ASYM;
+	}
+
+	fc = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv);
+	if (fc & FLOW_CTRL_TX)
+		pdata->phy.tx_pause = 1;
+	if (fc & FLOW_CTRL_RX)
+		pdata->phy.rx_pause = 1;
+}
+
+static enum xgbe_mode xgbe_phy_an37_sgmii_outcome(struct xgbe_prv_data *pdata)
+{
+	enum xgbe_mode mode;
+
+	pdata->phy.lp_advertising |= ADVERTISED_Autoneg;
+	pdata->phy.lp_advertising |= ADVERTISED_TP;
+
+	/* Use external PHY to determine flow control */
+	if (pdata->phy.pause_autoneg)
+		xgbe_phy_phydev_flowctrl(pdata);
+
+	switch (pdata->an_status & XGBE_SGMII_AN_LINK_SPEED) {
+	case XGBE_SGMII_AN_LINK_SPEED_100:
+		if (pdata->an_status & XGBE_SGMII_AN_LINK_DUPLEX) {
+			pdata->phy.lp_advertising |= ADVERTISED_100baseT_Full;
+			mode = XGBE_MODE_SGMII_100;
+		} else {
+			/* Half-duplex not supported */
+			pdata->phy.lp_advertising |= ADVERTISED_100baseT_Half;
+			mode = XGBE_MODE_UNKNOWN;
+		}
+		break;
+	case XGBE_SGMII_AN_LINK_SPEED_1000:
+		if (pdata->an_status & XGBE_SGMII_AN_LINK_DUPLEX) {
+			pdata->phy.lp_advertising |= ADVERTISED_1000baseT_Full;
+			mode = XGBE_MODE_SGMII_1000;
+		} else {
+			/* Half-duplex not supported */
+			pdata->phy.lp_advertising |= ADVERTISED_1000baseT_Half;
+			mode = XGBE_MODE_UNKNOWN;
+		}
+		break;
+	default:
+		mode = XGBE_MODE_UNKNOWN;
+	}
+
+	return mode;
+}
+
+static enum xgbe_mode xgbe_phy_an37_outcome(struct xgbe_prv_data *pdata)
+{
+	enum xgbe_mode mode;
+	unsigned int ad_reg, lp_reg;
+
+	pdata->phy.lp_advertising |= ADVERTISED_Autoneg;
+	pdata->phy.lp_advertising |= ADVERTISED_FIBRE;
+
+	/* Compare Advertisement and Link Partner register */
+	ad_reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_ADVERTISE);
+	lp_reg = XMDIO_READ(pdata, MDIO_MMD_VEND2, MDIO_VEND2_AN_LP_ABILITY);
+	if (lp_reg & 0x100)
+		pdata->phy.lp_advertising |= ADVERTISED_Pause;
+	if (lp_reg & 0x80)
+		pdata->phy.lp_advertising |= ADVERTISED_Asym_Pause;
+
+	if (pdata->phy.pause_autoneg) {
+		/* Set flow control based on auto-negotiation result */
+		pdata->phy.tx_pause = 0;
+		pdata->phy.rx_pause = 0;
+
+		if (ad_reg & lp_reg & 0x100) {
+			pdata->phy.tx_pause = 1;
+			pdata->phy.rx_pause = 1;
+		} else if (ad_reg & lp_reg & 0x80) {
+			if (ad_reg & 0x100)
+				pdata->phy.rx_pause = 1;
+			else if (lp_reg & 0x100)
+				pdata->phy.tx_pause = 1;
+		}
+	}
+
+	if (lp_reg & 0x40)
+		pdata->phy.lp_advertising |= ADVERTISED_1000baseT_Half;
+	if (lp_reg & 0x20)
+		pdata->phy.lp_advertising |= ADVERTISED_1000baseT_Full;
+
+	/* Half duplex is not supported */
+	ad_reg &= lp_reg;
+	mode = (ad_reg & 0x20) ? XGBE_MODE_X : XGBE_MODE_UNKNOWN;
+
+	return mode;
+}
+
+static enum xgbe_mode xgbe_phy_an73_redrv_outcome(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	enum xgbe_mode mode;
+	unsigned int ad_reg, lp_reg;
+
+	pdata->phy.lp_advertising |= ADVERTISED_Autoneg;
+	pdata->phy.lp_advertising |= ADVERTISED_Backplane;
+
+	/* Use external PHY to determine flow control */
+	if (pdata->phy.pause_autoneg)
+		xgbe_phy_phydev_flowctrl(pdata);
+
+	/* Compare Advertisement and Link Partner register 2 */
+	ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1);
+	lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 1);
+	if (lp_reg & 0x80)
+		pdata->phy.lp_advertising |= ADVERTISED_10000baseKR_Full;
+	if (lp_reg & 0x20)
+		pdata->phy.lp_advertising |= ADVERTISED_1000baseKX_Full;
+
+	ad_reg &= lp_reg;
+	if (ad_reg & 0x80) {
+		switch (phy_data->port_mode) {
+		case XGBE_PORT_MODE_BACKPLANE:
+			mode = XGBE_MODE_KR;
+			break;
+		default:
+			mode = XGBE_MODE_SFI;
+			break;
+		}
+	} else if (ad_reg & 0x20) {
+		switch (phy_data->port_mode) {
+		case XGBE_PORT_MODE_BACKPLANE:
+			mode = XGBE_MODE_KX_1000;
+			break;
+		case XGBE_PORT_MODE_1000BASE_X:
+			mode = XGBE_MODE_X;
+			break;
+		case XGBE_PORT_MODE_SFP:
+			switch (phy_data->sfp_base) {
+			case XGBE_SFP_BASE_1000_T:
+				if (phy_data->phydev &&
+				    (phy_data->phydev->speed == SPEED_100))
+					mode = XGBE_MODE_SGMII_100;
+				else
+					mode = XGBE_MODE_SGMII_1000;
+				break;
+			case XGBE_SFP_BASE_1000_SX:
+			case XGBE_SFP_BASE_1000_LX:
+			case XGBE_SFP_BASE_1000_CX:
+			default:
+				mode = XGBE_MODE_X;
+				break;
+			}
+			break;
+		default:
+			if (phy_data->phydev &&
+			    (phy_data->phydev->speed == SPEED_100))
+				mode = XGBE_MODE_SGMII_100;
+			else
+				mode = XGBE_MODE_SGMII_1000;
+			break;
+		}
+	} else {
+		mode = XGBE_MODE_UNKNOWN;
+	}
+
+	/* Compare Advertisement and Link Partner register 3 */
+	ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2);
+	lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 2);
+	if (lp_reg & 0xc000)
+		pdata->phy.lp_advertising |= ADVERTISED_10000baseR_FEC;
+
+	return mode;
+}
+
+static enum xgbe_mode xgbe_phy_an73_outcome(struct xgbe_prv_data *pdata)
+{
+	enum xgbe_mode mode;
+	unsigned int ad_reg, lp_reg;
+
+	pdata->phy.lp_advertising |= ADVERTISED_Autoneg;
+	pdata->phy.lp_advertising |= ADVERTISED_Backplane;
+
+	/* Compare Advertisement and Link Partner register 1 */
+	ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE);
+	lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA);
+	if (lp_reg & 0x400)
+		pdata->phy.lp_advertising |= ADVERTISED_Pause;
+	if (lp_reg & 0x800)
+		pdata->phy.lp_advertising |= ADVERTISED_Asym_Pause;
+
+	if (pdata->phy.pause_autoneg) {
+		/* Set flow control based on auto-negotiation result */
+		pdata->phy.tx_pause = 0;
+		pdata->phy.rx_pause = 0;
+
+		if (ad_reg & lp_reg & 0x400) {
+			pdata->phy.tx_pause = 1;
+			pdata->phy.rx_pause = 1;
+		} else if (ad_reg & lp_reg & 0x800) {
+			if (ad_reg & 0x400)
+				pdata->phy.rx_pause = 1;
+			else if (lp_reg & 0x400)
+				pdata->phy.tx_pause = 1;
+		}
+	}
+
+	/* Compare Advertisement and Link Partner register 2 */
+	ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 1);
+	lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 1);
+	if (lp_reg & 0x80)
+		pdata->phy.lp_advertising |= ADVERTISED_10000baseKR_Full;
+	if (lp_reg & 0x20)
+		pdata->phy.lp_advertising |= ADVERTISED_1000baseKX_Full;
+
+	ad_reg &= lp_reg;
+	if (ad_reg & 0x80)
+		mode = XGBE_MODE_KR;
+	else if (ad_reg & 0x20)
+		mode = XGBE_MODE_KX_1000;
+	else
+		mode = XGBE_MODE_UNKNOWN;
+
+	/* Compare Advertisement and Link Partner register 3 */
+	ad_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_ADVERTISE + 2);
+	lp_reg = XMDIO_READ(pdata, MDIO_MMD_AN, MDIO_AN_LPA + 2);
+	if (lp_reg & 0xc000)
+		pdata->phy.lp_advertising |= ADVERTISED_10000baseR_FEC;
+
+	return mode;
+}
+
+static enum xgbe_mode xgbe_phy_an_outcome(struct xgbe_prv_data *pdata)
+{
+	switch (pdata->an_mode) {
+	case XGBE_AN_MODE_CL73:
+		return xgbe_phy_an73_outcome(pdata);
+	case XGBE_AN_MODE_CL73_REDRV:
+		return xgbe_phy_an73_redrv_outcome(pdata);
+	case XGBE_AN_MODE_CL37:
+		return xgbe_phy_an37_outcome(pdata);
+	case XGBE_AN_MODE_CL37_SGMII:
+		return xgbe_phy_an37_sgmii_outcome(pdata);
+	default:
+		return XGBE_MODE_UNKNOWN;
+	}
+}
+
+static unsigned int xgbe_phy_an_advertising(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	unsigned int advertising;
+
+	/* Without a re-driver, just return current advertising */
+	if (!phy_data->redrv)
+		return pdata->phy.advertising;
+
+	/* With the KR re-driver we need to advertise a single speed */
+	advertising = pdata->phy.advertising;
+	advertising &= ~ADVERTISED_1000baseKX_Full;
+	advertising &= ~ADVERTISED_10000baseKR_Full;
+
+	switch (phy_data->port_mode) {
+	case XGBE_PORT_MODE_BACKPLANE:
+		advertising |= ADVERTISED_10000baseKR_Full;
+		break;
+	case XGBE_PORT_MODE_BACKPLANE_2500:
+		advertising |= ADVERTISED_1000baseKX_Full;
+		break;
+	case XGBE_PORT_MODE_1000BASE_T:
+	case XGBE_PORT_MODE_1000BASE_X:
+	case XGBE_PORT_MODE_NBASE_T:
+		advertising |= ADVERTISED_1000baseKX_Full;
+		break;
+	case XGBE_PORT_MODE_10GBASE_T:
+		if (phy_data->phydev &&
+		    (phy_data->phydev->speed == SPEED_10000))
+			advertising |= ADVERTISED_10000baseKR_Full;
+		else
+			advertising |= ADVERTISED_1000baseKX_Full;
+		break;
+	case XGBE_PORT_MODE_10GBASE_R:
+		advertising |= ADVERTISED_10000baseKR_Full;
+		break;
+	case XGBE_PORT_MODE_SFP:
+		switch (phy_data->sfp_base) {
+		case XGBE_SFP_BASE_1000_T:
+		case XGBE_SFP_BASE_1000_SX:
+		case XGBE_SFP_BASE_1000_LX:
+		case XGBE_SFP_BASE_1000_CX:
+			advertising |= ADVERTISED_1000baseKX_Full;
+			break;
+		default:
+			advertising |= ADVERTISED_10000baseKR_Full;
+			break;
+		}
+		break;
+	default:
+		advertising |= ADVERTISED_10000baseKR_Full;
+		break;
+	}
+
+	return advertising;
+}
+
+static int xgbe_phy_an_config(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	int ret;
+
+	ret = xgbe_phy_find_phy_device(pdata);
+	if (ret)
+		return ret;
+
+	if (!phy_data->phydev)
+		return 0;
+
+	phy_data->phydev->autoneg = pdata->phy.autoneg;
+	phy_data->phydev->advertising = phy_data->phydev->supported &
+					pdata->phy.advertising;
+
+	if (pdata->phy.autoneg != AUTONEG_ENABLE) {
+		phy_data->phydev->speed = pdata->phy.speed;
+		phy_data->phydev->duplex = pdata->phy.duplex;
+	}
+
+	ret = phy_start_aneg(phy_data->phydev);
+
+	return ret;
+}
+
+static enum xgbe_an_mode xgbe_phy_an_sfp_mode(struct xgbe_phy_data *phy_data)
+{
+	switch (phy_data->sfp_base) {
+	case XGBE_SFP_BASE_1000_T:
+		return XGBE_AN_MODE_CL37_SGMII;
+	case XGBE_SFP_BASE_1000_SX:
+	case XGBE_SFP_BASE_1000_LX:
+	case XGBE_SFP_BASE_1000_CX:
+		return XGBE_AN_MODE_CL37;
+	default:
+		return XGBE_AN_MODE_NONE;
+	}
+}
+
+static enum xgbe_an_mode xgbe_phy_an_mode(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+	/* A KR re-driver will always require CL73 AN */
+	if (phy_data->redrv)
+		return XGBE_AN_MODE_CL73_REDRV;
+
+	switch (phy_data->port_mode) {
+	case XGBE_PORT_MODE_BACKPLANE:
+		return XGBE_AN_MODE_CL73;
+	case XGBE_PORT_MODE_BACKPLANE_2500:
+		return XGBE_AN_MODE_NONE;
+	case XGBE_PORT_MODE_1000BASE_T:
+		return XGBE_AN_MODE_CL37_SGMII;
+	case XGBE_PORT_MODE_1000BASE_X:
+		return XGBE_AN_MODE_CL37;
+	case XGBE_PORT_MODE_NBASE_T:
+		return XGBE_AN_MODE_CL37_SGMII;
+	case XGBE_PORT_MODE_10GBASE_T:
+		return XGBE_AN_MODE_CL73;
+	case XGBE_PORT_MODE_10GBASE_R:
+		return XGBE_AN_MODE_NONE;
+	case XGBE_PORT_MODE_SFP:
+		return xgbe_phy_an_sfp_mode(phy_data);
+	default:
+		return XGBE_AN_MODE_NONE;
+	}
+}
+
+static int xgbe_phy_set_redrv_mode_mdio(struct xgbe_prv_data *pdata,
+					enum xgbe_phy_redrv_mode mode)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	u16 redrv_reg, redrv_val;
+
+	redrv_reg = XGBE_PHY_REDRV_MODE_REG + (phy_data->redrv_lane * 0x1000);
+	redrv_val = (u16)mode;
+
+	return pdata->hw_if.write_ext_mii_regs(pdata, phy_data->redrv_addr,
+					       redrv_reg, redrv_val);
+}
+
+static int xgbe_phy_set_redrv_mode_i2c(struct xgbe_prv_data *pdata,
+				       enum xgbe_phy_redrv_mode mode)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	unsigned int redrv_reg;
+	int ret;
+
+	/* Calculate the register to write */
+	redrv_reg = XGBE_PHY_REDRV_MODE_REG + (phy_data->redrv_lane * 0x1000);
+
+	ret = xgbe_phy_redrv_write(pdata, redrv_reg, mode);
+
+	return ret;
+}
+
+static void xgbe_phy_set_redrv_mode(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	enum xgbe_phy_redrv_mode mode;
+	int ret;
+
+	if (!phy_data->redrv)
+		return;
+
+	mode = XGBE_PHY_REDRV_MODE_CX;
+	if ((phy_data->port_mode == XGBE_PORT_MODE_SFP) &&
+	    (phy_data->sfp_base != XGBE_SFP_BASE_1000_CX) &&
+	    (phy_data->sfp_base != XGBE_SFP_BASE_10000_CR))
+		mode = XGBE_PHY_REDRV_MODE_SR;
+
+	ret = xgbe_phy_get_comm_ownership(pdata);
+	if (ret)
+		return;
+
+	if (phy_data->redrv_if)
+		xgbe_phy_set_redrv_mode_i2c(pdata, mode);
+	else
+		xgbe_phy_set_redrv_mode_mdio(pdata, mode);
+
+	xgbe_phy_put_comm_ownership(pdata);
+}
+
+static void xgbe_phy_start_ratechange(struct xgbe_prv_data *pdata)
+{
+	if (!XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS))
+		return;
+
+	/* Log if a previous command did not complete */
+	netif_dbg(pdata, link, pdata->netdev,
+		  "firmware mailbox not ready for command\n");
+}
+
+static void xgbe_phy_complete_ratechange(struct xgbe_prv_data *pdata)
+{
+	unsigned int wait;
+
+	/* Wait for command to complete */
+	wait = XGBE_RATECHANGE_COUNT;
+	while (wait--) {
+		if (!XP_IOREAD_BITS(pdata, XP_DRIVER_INT_RO, STATUS))
+			return;
+
+		usleep_range(1000, 2000);
+	}
+
+	netif_dbg(pdata, link, pdata->netdev,
+		  "firmware mailbox command did not complete\n");
+}
+
+static void xgbe_phy_rrc(struct xgbe_prv_data *pdata)
+{
+	unsigned int s0;
+
+	xgbe_phy_start_ratechange(pdata);
+
+	/* Receiver Reset Cycle */
+	s0 = 0;
+	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 5);
+	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 0);
+
+	/* Call FW to make the change */
+	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
+	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
+	XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
+
+	xgbe_phy_complete_ratechange(pdata);
+
+	netif_dbg(pdata, link, pdata->netdev, "receiver reset complete\n");
+}
+
+static void xgbe_phy_power_off(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+	xgbe_phy_start_ratechange(pdata);
+
+	/* Call FW to make the change */
+	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, 0);
+	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
+	XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
+
+	xgbe_phy_complete_ratechange(pdata);
+
+	phy_data->cur_mode = XGBE_MODE_UNKNOWN;
+
+	netif_dbg(pdata, link, pdata->netdev, "phy powered off\n");
+}
+
+static void xgbe_phy_sfi_mode(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	unsigned int s0;
+
+	xgbe_phy_set_redrv_mode(pdata);
+
+	xgbe_phy_start_ratechange(pdata);
+
+	/* 10G/SFI */
+	s0 = 0;
+	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 3);
+	if (phy_data->sfp_cable != XGBE_SFP_CABLE_PASSIVE) {
+		XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 0);
+	} else {
+		if (phy_data->sfp_cable_len <= 1)
+			XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 1);
+		else if (phy_data->sfp_cable_len <= 3)
+			XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 2);
+		else
+			XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 3);
+	}
+
+	/* Call FW to make the change */
+	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
+	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
+	XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
+
+	xgbe_phy_complete_ratechange(pdata);
+
+	phy_data->cur_mode = XGBE_MODE_SFI;
+
+	netif_dbg(pdata, link, pdata->netdev, "10GbE SFI mode set\n");
+}
+
+static void xgbe_phy_x_mode(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	unsigned int s0;
+
+	xgbe_phy_set_redrv_mode(pdata);
+
+	xgbe_phy_start_ratechange(pdata);
+
+	/* 1G/X */
+	s0 = 0;
+	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 1);
+	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 3);
+
+	/* Call FW to make the change */
+	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
+	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
+	XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
+
+	xgbe_phy_complete_ratechange(pdata);
+
+	phy_data->cur_mode = XGBE_MODE_X;
+
+	netif_dbg(pdata, link, pdata->netdev, "1GbE X mode set\n");
+}
+
+static void xgbe_phy_sgmii_1000_mode(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	unsigned int s0;
+
+	xgbe_phy_set_redrv_mode(pdata);
+
+	xgbe_phy_start_ratechange(pdata);
+
+	/* 1G/SGMII */
+	s0 = 0;
+	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 1);
+	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 2);
+
+	/* Call FW to make the change */
+	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
+	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
+	XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
+
+	xgbe_phy_complete_ratechange(pdata);
+
+	phy_data->cur_mode = XGBE_MODE_SGMII_1000;
+
+	netif_dbg(pdata, link, pdata->netdev, "1GbE SGMII mode set\n");
+}
+
+static void xgbe_phy_sgmii_100_mode(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	unsigned int s0;
+
+	xgbe_phy_set_redrv_mode(pdata);
+
+	xgbe_phy_start_ratechange(pdata);
+
+	/* 1G/SGMII */
+	s0 = 0;
+	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 1);
+	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 1);
+
+	/* Call FW to make the change */
+	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
+	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
+	XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
+
+	xgbe_phy_complete_ratechange(pdata);
+
+	phy_data->cur_mode = XGBE_MODE_SGMII_100;
+
+	netif_dbg(pdata, link, pdata->netdev, "100MbE SGMII mode set\n");
+}
+
+static void xgbe_phy_kr_mode(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	unsigned int s0;
+
+	xgbe_phy_set_redrv_mode(pdata);
+
+	xgbe_phy_start_ratechange(pdata);
+
+	/* 10G/KR */
+	s0 = 0;
+	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 4);
+	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 0);
+
+	/* Call FW to make the change */
+	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
+	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
+	XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
+
+	xgbe_phy_complete_ratechange(pdata);
+
+	phy_data->cur_mode = XGBE_MODE_KR;
+
+	netif_dbg(pdata, link, pdata->netdev, "10GbE KR mode set\n");
+}
+
+static void xgbe_phy_kx_2500_mode(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	unsigned int s0;
+
+	xgbe_phy_set_redrv_mode(pdata);
+
+	xgbe_phy_start_ratechange(pdata);
+
+	/* 2.5G/KX */
+	s0 = 0;
+	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 2);
+	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 0);
+
+	/* Call FW to make the change */
+	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
+	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
+	XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
+
+	xgbe_phy_complete_ratechange(pdata);
+
+	phy_data->cur_mode = XGBE_MODE_KX_2500;
+
+	netif_dbg(pdata, link, pdata->netdev, "2.5GbE KX mode set\n");
+}
+
+static void xgbe_phy_kx_1000_mode(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	unsigned int s0;
+
+	xgbe_phy_set_redrv_mode(pdata);
+
+	xgbe_phy_start_ratechange(pdata);
+
+	/* 1G/KX */
+	s0 = 0;
+	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, COMMAND, 1);
+	XP_SET_BITS(s0, XP_DRIVER_SCRATCH_0, SUB_COMMAND, 3);
+
+	/* Call FW to make the change */
+	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_0, s0);
+	XP_IOWRITE(pdata, XP_DRIVER_SCRATCH_1, 0);
+	XP_IOWRITE_BITS(pdata, XP_DRIVER_INT_REQ, REQUEST, 1);
+
+	xgbe_phy_complete_ratechange(pdata);
+
+	phy_data->cur_mode = XGBE_MODE_KX_1000;
+
+	netif_dbg(pdata, link, pdata->netdev, "1GbE KX mode set\n");
+}
+
+static enum xgbe_mode xgbe_phy_cur_mode(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+	return phy_data->cur_mode;
+}
+
+static enum xgbe_mode xgbe_phy_switch_baset_mode(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+	/* No switching if not 10GBase-T */
+	if (phy_data->port_mode != XGBE_PORT_MODE_10GBASE_T)
+		return xgbe_phy_cur_mode(pdata);
+
+	switch (xgbe_phy_cur_mode(pdata)) {
+	case XGBE_MODE_SGMII_100:
+	case XGBE_MODE_SGMII_1000:
+		return XGBE_MODE_KR;
+	case XGBE_MODE_KR:
+	default:
+		return XGBE_MODE_SGMII_1000;
+	}
+}
+
+static enum xgbe_mode xgbe_phy_switch_bp_2500_mode(struct xgbe_prv_data *pdata)
+{
+	return XGBE_MODE_KX_2500;
+}
+
+static enum xgbe_mode xgbe_phy_switch_bp_mode(struct xgbe_prv_data *pdata)
+{
+	/* If we are in KR switch to KX, and vice-versa */
+	switch (xgbe_phy_cur_mode(pdata)) {
+	case XGBE_MODE_KX_1000:
+		return XGBE_MODE_KR;
+	case XGBE_MODE_KR:
+	default:
+		return XGBE_MODE_KX_1000;
+	}
+}
+
+static enum xgbe_mode xgbe_phy_switch_mode(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+	switch (phy_data->port_mode) {
+	case XGBE_PORT_MODE_BACKPLANE:
+		return xgbe_phy_switch_bp_mode(pdata);
+	case XGBE_PORT_MODE_BACKPLANE_2500:
+		return xgbe_phy_switch_bp_2500_mode(pdata);
+	case XGBE_PORT_MODE_1000BASE_T:
+	case XGBE_PORT_MODE_NBASE_T:
+	case XGBE_PORT_MODE_10GBASE_T:
+		return xgbe_phy_switch_baset_mode(pdata);
+	case XGBE_PORT_MODE_1000BASE_X:
+	case XGBE_PORT_MODE_10GBASE_R:
+	case XGBE_PORT_MODE_SFP:
+		/* No switching, so just return current mode */
+		return xgbe_phy_cur_mode(pdata);
+	default:
+		return XGBE_MODE_UNKNOWN;
+	}
+}
+
+static enum xgbe_mode xgbe_phy_get_basex_mode(struct xgbe_phy_data *phy_data,
+					      int speed)
+{
+	switch (speed) {
+	case SPEED_1000:
+		return XGBE_MODE_X;
+	case SPEED_10000:
+		return XGBE_MODE_KR;
+	default:
+		return XGBE_MODE_UNKNOWN;
+	}
+}
+
+static enum xgbe_mode xgbe_phy_get_baset_mode(struct xgbe_phy_data *phy_data,
+					      int speed)
+{
+	switch (speed) {
+	case SPEED_100:
+		return XGBE_MODE_SGMII_100;
+	case SPEED_1000:
+		return XGBE_MODE_SGMII_1000;
+	case SPEED_10000:
+		return XGBE_MODE_KR;
+	default:
+		return XGBE_MODE_UNKNOWN;
+	}
+}
+
+static enum xgbe_mode xgbe_phy_get_sfp_mode(struct xgbe_phy_data *phy_data,
+					    int speed)
+{
+	switch (speed) {
+	case SPEED_100:
+		return XGBE_MODE_SGMII_100;
+	case SPEED_1000:
+		if (phy_data->sfp_base == XGBE_SFP_BASE_1000_T)
+			return XGBE_MODE_SGMII_1000;
+		else
+			return XGBE_MODE_X;
+	case SPEED_10000:
+	case SPEED_UNKNOWN:
+		return XGBE_MODE_SFI;
+	default:
+		return XGBE_MODE_UNKNOWN;
+	}
+}
+
+static enum xgbe_mode xgbe_phy_get_bp_2500_mode(int speed)
+{
+	switch (speed) {
+	case SPEED_2500:
+		return XGBE_MODE_KX_2500;
+	default:
+		return XGBE_MODE_UNKNOWN;
+	}
+}
+
+static enum xgbe_mode xgbe_phy_get_bp_mode(int speed)
+{
+	switch (speed) {
+	case SPEED_1000:
+		return XGBE_MODE_KX_1000;
+	case SPEED_10000:
+		return XGBE_MODE_KR;
+	default:
+		return XGBE_MODE_UNKNOWN;
+	}
+}
+
+static enum xgbe_mode xgbe_phy_get_mode(struct xgbe_prv_data *pdata,
+					int speed)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+	switch (phy_data->port_mode) {
+	case XGBE_PORT_MODE_BACKPLANE:
+		return xgbe_phy_get_bp_mode(speed);
+	case XGBE_PORT_MODE_BACKPLANE_2500:
+		return xgbe_phy_get_bp_2500_mode(speed);
+	case XGBE_PORT_MODE_1000BASE_T:
+	case XGBE_PORT_MODE_NBASE_T:
+	case XGBE_PORT_MODE_10GBASE_T:
+		return xgbe_phy_get_baset_mode(phy_data, speed);
+	case XGBE_PORT_MODE_1000BASE_X:
+	case XGBE_PORT_MODE_10GBASE_R:
+		return xgbe_phy_get_basex_mode(phy_data, speed);
+	case XGBE_PORT_MODE_SFP:
+		return xgbe_phy_get_sfp_mode(phy_data, speed);
+	default:
+		return XGBE_MODE_UNKNOWN;
+	}
+}
+
+static void xgbe_phy_set_mode(struct xgbe_prv_data *pdata, enum xgbe_mode mode)
+{
+	switch (mode) {
+	case XGBE_MODE_KX_1000:
+		xgbe_phy_kx_1000_mode(pdata);
+		break;
+	case XGBE_MODE_KX_2500:
+		xgbe_phy_kx_2500_mode(pdata);
+		break;
+	case XGBE_MODE_KR:
+		xgbe_phy_kr_mode(pdata);
+		break;
+	case XGBE_MODE_SGMII_100:
+		xgbe_phy_sgmii_100_mode(pdata);
+		break;
+	case XGBE_MODE_SGMII_1000:
+		xgbe_phy_sgmii_1000_mode(pdata);
+		break;
+	case XGBE_MODE_X:
+		xgbe_phy_x_mode(pdata);
+		break;
+	case XGBE_MODE_SFI:
+		xgbe_phy_sfi_mode(pdata);
+		break;
+	default:
+		break;
+	}
+}
+
+static bool xgbe_phy_check_mode(struct xgbe_prv_data *pdata,
+				enum xgbe_mode mode, u32 advert)
+{
+	if (pdata->phy.autoneg == AUTONEG_ENABLE) {
+		if (pdata->phy.advertising & advert)
+			return true;
+	} else {
+		enum xgbe_mode cur_mode;
+
+		cur_mode = xgbe_phy_get_mode(pdata, pdata->phy.speed);
+		if (cur_mode == mode)
+			return true;
+	}
+
+	return false;
+}
+
+static bool xgbe_phy_use_basex_mode(struct xgbe_prv_data *pdata,
+				    enum xgbe_mode mode)
+{
+	switch (mode) {
+	case XGBE_MODE_X:
+		return xgbe_phy_check_mode(pdata, mode,
+					   ADVERTISED_1000baseT_Full);
+	case XGBE_MODE_KR:
+		return xgbe_phy_check_mode(pdata, mode,
+					   ADVERTISED_10000baseT_Full);
+	default:
+		return false;
+	}
+}
+
+static bool xgbe_phy_use_baset_mode(struct xgbe_prv_data *pdata,
+				    enum xgbe_mode mode)
+{
+	switch (mode) {
+	case XGBE_MODE_SGMII_100:
+		return xgbe_phy_check_mode(pdata, mode,
+					   ADVERTISED_100baseT_Full);
+	case XGBE_MODE_SGMII_1000:
+		return xgbe_phy_check_mode(pdata, mode,
+					   ADVERTISED_1000baseT_Full);
+	case XGBE_MODE_KR:
+		return xgbe_phy_check_mode(pdata, mode,
+					   ADVERTISED_10000baseT_Full);
+	default:
+		return false;
+	}
+}
+
+static bool xgbe_phy_use_sfp_mode(struct xgbe_prv_data *pdata,
+				  enum xgbe_mode mode)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+	switch (mode) {
+	case XGBE_MODE_X:
+		if (phy_data->sfp_base == XGBE_SFP_BASE_1000_T)
+			return false;
+		return xgbe_phy_check_mode(pdata, mode,
+					   ADVERTISED_1000baseT_Full);
+	case XGBE_MODE_SGMII_100:
+		if (phy_data->sfp_base != XGBE_SFP_BASE_1000_T)
+			return false;
+		return xgbe_phy_check_mode(pdata, mode,
+					   ADVERTISED_100baseT_Full);
+	case XGBE_MODE_SGMII_1000:
+		if (phy_data->sfp_base != XGBE_SFP_BASE_1000_T)
+			return false;
+		return xgbe_phy_check_mode(pdata, mode,
+					   ADVERTISED_1000baseT_Full);
+	case XGBE_MODE_SFI:
+		return xgbe_phy_check_mode(pdata, mode,
+					   ADVERTISED_10000baseT_Full);
+	default:
+		return false;
+	}
+}
+
+static bool xgbe_phy_use_bp_2500_mode(struct xgbe_prv_data *pdata,
+				      enum xgbe_mode mode)
+{
+	switch (mode) {
+	case XGBE_MODE_KX_2500:
+		return xgbe_phy_check_mode(pdata, mode,
+					   ADVERTISED_2500baseX_Full);
+	default:
+		return false;
+	}
+}
+
+static bool xgbe_phy_use_bp_mode(struct xgbe_prv_data *pdata,
+				 enum xgbe_mode mode)
+{
+	switch (mode) {
+	case XGBE_MODE_KX_1000:
+		return xgbe_phy_check_mode(pdata, mode,
+					   ADVERTISED_1000baseKX_Full);
+	case XGBE_MODE_KR:
+		return xgbe_phy_check_mode(pdata, mode,
+					   ADVERTISED_10000baseKR_Full);
+	default:
+		return false;
+	}
+}
+
+static bool xgbe_phy_use_mode(struct xgbe_prv_data *pdata, enum xgbe_mode mode)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+	switch (phy_data->port_mode) {
+	case XGBE_PORT_MODE_BACKPLANE:
+		return xgbe_phy_use_bp_mode(pdata, mode);
+	case XGBE_PORT_MODE_BACKPLANE_2500:
+		return xgbe_phy_use_bp_2500_mode(pdata, mode);
+	case XGBE_PORT_MODE_1000BASE_T:
+	case XGBE_PORT_MODE_NBASE_T:
+	case XGBE_PORT_MODE_10GBASE_T:
+		return xgbe_phy_use_baset_mode(pdata, mode);
+	case XGBE_PORT_MODE_1000BASE_X:
+	case XGBE_PORT_MODE_10GBASE_R:
+		return xgbe_phy_use_basex_mode(pdata, mode);
+	case XGBE_PORT_MODE_SFP:
+		return xgbe_phy_use_sfp_mode(pdata, mode);
+	default:
+		return false;
+	}
+}
+
+static bool xgbe_phy_valid_speed_basex_mode(struct xgbe_phy_data *phy_data,
+					    int speed)
+{
+	switch (speed) {
+	case SPEED_1000:
+		return (phy_data->port_mode == XGBE_PORT_MODE_1000BASE_X);
+	case SPEED_10000:
+		return (phy_data->port_mode == XGBE_PORT_MODE_10GBASE_R);
+	default:
+		return false;
+	}
+}
+
+static bool xgbe_phy_valid_speed_baset_mode(struct xgbe_phy_data *phy_data,
+					    int speed)
+{
+	switch (speed) {
+	case SPEED_100:
+	case SPEED_1000:
+		return true;
+	case SPEED_10000:
+		return (phy_data->port_mode == XGBE_PORT_MODE_10GBASE_T);
+	default:
+		return false;
+	}
+}
+
+static bool xgbe_phy_valid_speed_sfp_mode(struct xgbe_phy_data *phy_data,
+					  int speed)
+{
+	switch (speed) {
+	case SPEED_100:
+		return (phy_data->sfp_speed == XGBE_SFP_SPEED_100_1000);
+	case SPEED_1000:
+		return ((phy_data->sfp_speed == XGBE_SFP_SPEED_100_1000) ||
+			(phy_data->sfp_speed == XGBE_SFP_SPEED_1000));
+	case SPEED_10000:
+		return (phy_data->sfp_speed == XGBE_SFP_SPEED_10000);
+	default:
+		return false;
+	}
+}
+
+static bool xgbe_phy_valid_speed_bp_2500_mode(int speed)
+{
+	switch (speed) {
+	case SPEED_2500:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool xgbe_phy_valid_speed_bp_mode(int speed)
+{
+	switch (speed) {
+	case SPEED_1000:
+	case SPEED_10000:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static bool xgbe_phy_valid_speed(struct xgbe_prv_data *pdata, int speed)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+	switch (phy_data->port_mode) {
+	case XGBE_PORT_MODE_BACKPLANE:
+		return xgbe_phy_valid_speed_bp_mode(speed);
+	case XGBE_PORT_MODE_BACKPLANE_2500:
+		return xgbe_phy_valid_speed_bp_2500_mode(speed);
+	case XGBE_PORT_MODE_1000BASE_T:
+	case XGBE_PORT_MODE_NBASE_T:
+	case XGBE_PORT_MODE_10GBASE_T:
+		return xgbe_phy_valid_speed_baset_mode(phy_data, speed);
+	case XGBE_PORT_MODE_1000BASE_X:
+	case XGBE_PORT_MODE_10GBASE_R:
+		return xgbe_phy_valid_speed_basex_mode(phy_data, speed);
+	case XGBE_PORT_MODE_SFP:
+		return xgbe_phy_valid_speed_sfp_mode(phy_data, speed);
+	default:
+		return false;
+	}
+}
+
+static int xgbe_phy_link_status(struct xgbe_prv_data *pdata, int *an_restart)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	unsigned int reg;
+	int ret;
+
+	*an_restart = 0;
+
+	if (phy_data->port_mode == XGBE_PORT_MODE_SFP) {
+		/* Check SFP signals */
+		xgbe_phy_sfp_detect(pdata);
+
+		if (phy_data->sfp_changed) {
+			*an_restart = 1;
+			return 0;
+		}
+
+		if (phy_data->sfp_mod_absent || phy_data->sfp_rx_los)
+			return 0;
+	}
+
+	if (phy_data->phydev) {
+		/* Check external PHY */
+		ret = phy_read_status(phy_data->phydev);
+		if (ret < 0)
+			return 0;
+
+		if ((pdata->phy.autoneg == AUTONEG_ENABLE) &&
+		    !phy_aneg_done(phy_data->phydev))
+			return 0;
+
+		if (!phy_data->phydev->link)
+			return 0;
+	}
+
+	/* Link status is latched low, so read once to clear
+	 * and then read again to get current state
+	 */
+	reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1);
+	reg = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_STAT1);
+	if (reg & MDIO_STAT1_LSTATUS)
+		return 1;
+
+	/* No link, attempt a receiver reset cycle */
+	if (phy_data->rrc_count++) {
+		phy_data->rrc_count = 0;
+		xgbe_phy_rrc(pdata);
+	}
+
+	return 0;
+}
+
+static void xgbe_phy_sfp_gpio_setup(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	unsigned int reg;
+
+	reg = XP_IOREAD(pdata, XP_PROP_3);
+
+	phy_data->sfp_gpio_address = XGBE_GPIO_ADDRESS_PCA9555 +
+				     XP_GET_BITS(reg, XP_PROP_3, GPIO_ADDR);
+
+	phy_data->sfp_gpio_mask = XP_GET_BITS(reg, XP_PROP_3, GPIO_MASK);
+
+	phy_data->sfp_gpio_rx_los = XP_GET_BITS(reg, XP_PROP_3,
+						GPIO_RX_LOS);
+	phy_data->sfp_gpio_tx_fault = XP_GET_BITS(reg, XP_PROP_3,
+						  GPIO_TX_FAULT);
+	phy_data->sfp_gpio_mod_absent = XP_GET_BITS(reg, XP_PROP_3,
+						    GPIO_MOD_ABS);
+	phy_data->sfp_gpio_rate_select = XP_GET_BITS(reg, XP_PROP_3,
+						     GPIO_RATE_SELECT);
+
+	if (netif_msg_probe(pdata)) {
+		dev_dbg(pdata->dev, "SFP: gpio_address=%#x\n",
+			phy_data->sfp_gpio_address);
+		dev_dbg(pdata->dev, "SFP: gpio_mask=%#x\n",
+			phy_data->sfp_gpio_mask);
+		dev_dbg(pdata->dev, "SFP: gpio_rx_los=%u\n",
+			phy_data->sfp_gpio_rx_los);
+		dev_dbg(pdata->dev, "SFP: gpio_tx_fault=%u\n",
+			phy_data->sfp_gpio_tx_fault);
+		dev_dbg(pdata->dev, "SFP: gpio_mod_absent=%u\n",
+			phy_data->sfp_gpio_mod_absent);
+		dev_dbg(pdata->dev, "SFP: gpio_rate_select=%u\n",
+			phy_data->sfp_gpio_rate_select);
+	}
+}
+
+static void xgbe_phy_sfp_comm_setup(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	unsigned int reg, mux_addr_hi, mux_addr_lo;
+
+	reg = XP_IOREAD(pdata, XP_PROP_4);
+
+	mux_addr_hi = XP_GET_BITS(reg, XP_PROP_4, MUX_ADDR_HI);
+	mux_addr_lo = XP_GET_BITS(reg, XP_PROP_4, MUX_ADDR_LO);
+	if (mux_addr_lo == XGBE_SFP_DIRECT)
+		return;
+
+	phy_data->sfp_comm = XGBE_SFP_COMM_PCA9545;
+	phy_data->sfp_mux_address = (mux_addr_hi << 2) + mux_addr_lo;
+	phy_data->sfp_mux_channel = XP_GET_BITS(reg, XP_PROP_4, MUX_CHAN);
+
+	if (netif_msg_probe(pdata)) {
+		dev_dbg(pdata->dev, "SFP: mux_address=%#x\n",
+			phy_data->sfp_mux_address);
+		dev_dbg(pdata->dev, "SFP: mux_channel=%u\n",
+			phy_data->sfp_mux_channel);
+	}
+}
+
+static void xgbe_phy_sfp_setup(struct xgbe_prv_data *pdata)
+{
+	xgbe_phy_sfp_comm_setup(pdata);
+	xgbe_phy_sfp_gpio_setup(pdata);
+}
+
+static int xgbe_phy_int_mdio_reset(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	unsigned int ret;
+
+	ret = pdata->hw_if.set_gpio(pdata, phy_data->mdio_reset_gpio);
+	if (ret)
+		return ret;
+
+	ret = pdata->hw_if.clr_gpio(pdata, phy_data->mdio_reset_gpio);
+
+	return ret;
+}
+
+static int xgbe_phy_i2c_mdio_reset(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	u8 gpio_reg, gpio_ports[2], gpio_data[3];
+	int ret;
+
+	/* Read the output port registers */
+	gpio_reg = 2;
+	ret = xgbe_phy_i2c_read(pdata, phy_data->mdio_reset_addr,
+				&gpio_reg, sizeof(gpio_reg),
+				gpio_ports, sizeof(gpio_ports));
+	if (ret)
+		return ret;
+
+	/* Prepare to write the GPIO data */
+	gpio_data[0] = 2;
+	gpio_data[1] = gpio_ports[0];
+	gpio_data[2] = gpio_ports[1];
+
+	/* Set the GPIO pin */
+	if (phy_data->mdio_reset_gpio < 8)
+		gpio_data[1] |= (1 << (phy_data->mdio_reset_gpio % 8));
+	else
+		gpio_data[2] |= (1 << (phy_data->mdio_reset_gpio % 8));
+
+	/* Write the output port registers */
+	ret = xgbe_phy_i2c_write(pdata, phy_data->mdio_reset_addr,
+				 gpio_data, sizeof(gpio_data));
+	if (ret)
+		return ret;
+
+	/* Clear the GPIO pin */
+	if (phy_data->mdio_reset_gpio < 8)
+		gpio_data[1] &= ~(1 << (phy_data->mdio_reset_gpio % 8));
+	else
+		gpio_data[2] &= ~(1 << (phy_data->mdio_reset_gpio % 8));
+
+	/* Write the output port registers */
+	ret = xgbe_phy_i2c_write(pdata, phy_data->mdio_reset_addr,
+				 gpio_data, sizeof(gpio_data));
+
+	return ret;
+}
+
+static int xgbe_phy_mdio_reset(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	int ret;
+
+	if (phy_data->conn_type != XGBE_CONN_TYPE_MDIO)
+		return 0;
+
+	ret = xgbe_phy_get_comm_ownership(pdata);
+	if (ret)
+		return ret;
+
+	if (phy_data->mdio_reset == XGBE_MDIO_RESET_I2C_GPIO)
+		ret = xgbe_phy_i2c_mdio_reset(pdata);
+	else if (phy_data->mdio_reset == XGBE_MDIO_RESET_INT_GPIO)
+		ret = xgbe_phy_int_mdio_reset(pdata);
+
+	xgbe_phy_put_comm_ownership(pdata);
+
+	return ret;
+}
+
+static bool xgbe_phy_redrv_error(struct xgbe_phy_data *phy_data)
+{
+	if (!phy_data->redrv)
+		return false;
+
+	if (phy_data->redrv_if >= XGBE_PHY_REDRV_IF_MAX)
+		return true;
+
+	switch (phy_data->redrv_model) {
+	case XGBE_PHY_REDRV_MODEL_4223:
+		if (phy_data->redrv_lane > 3)
+			return true;
+		break;
+	case XGBE_PHY_REDRV_MODEL_4227:
+		if (phy_data->redrv_lane > 1)
+			return true;
+		break;
+	default:
+		return true;
+	}
+
+	return false;
+}
+
+static int xgbe_phy_mdio_reset_setup(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	unsigned int reg;
+
+	if (phy_data->conn_type != XGBE_CONN_TYPE_MDIO)
+		return 0;
+
+	reg = XP_IOREAD(pdata, XP_PROP_3);
+	phy_data->mdio_reset = XP_GET_BITS(reg, XP_PROP_3, MDIO_RESET);
+	switch (phy_data->mdio_reset) {
+	case XGBE_MDIO_RESET_NONE:
+	case XGBE_MDIO_RESET_I2C_GPIO:
+	case XGBE_MDIO_RESET_INT_GPIO:
+		break;
+	default:
+		dev_err(pdata->dev, "unsupported MDIO reset (%#x)\n",
+			phy_data->mdio_reset);
+		return -EINVAL;
+	}
+
+	if (phy_data->mdio_reset == XGBE_MDIO_RESET_I2C_GPIO) {
+		phy_data->mdio_reset_addr = XGBE_GPIO_ADDRESS_PCA9555 +
+					    XP_GET_BITS(reg, XP_PROP_3,
+							MDIO_RESET_I2C_ADDR);
+		phy_data->mdio_reset_gpio = XP_GET_BITS(reg, XP_PROP_3,
+							MDIO_RESET_I2C_GPIO);
+	} else if (phy_data->mdio_reset == XGBE_MDIO_RESET_INT_GPIO) {
+		phy_data->mdio_reset_gpio = XP_GET_BITS(reg, XP_PROP_3,
+							MDIO_RESET_INT_GPIO);
+	}
+
+	return 0;
+}
+
+static bool xgbe_phy_port_mode_mismatch(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+	switch (phy_data->port_mode) {
+	case XGBE_PORT_MODE_BACKPLANE:
+		if ((phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) ||
+		    (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000))
+			return false;
+		break;
+	case XGBE_PORT_MODE_BACKPLANE_2500:
+		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_2500)
+			return false;
+		break;
+	case XGBE_PORT_MODE_1000BASE_T:
+		if ((phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) ||
+		    (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000))
+			return false;
+		break;
+	case XGBE_PORT_MODE_1000BASE_X:
+		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000)
+			return false;
+		break;
+	case XGBE_PORT_MODE_NBASE_T:
+		if ((phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) ||
+		    (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) ||
+		    (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_2500))
+			return false;
+		break;
+	case XGBE_PORT_MODE_10GBASE_T:
+		if ((phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) ||
+		    (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) ||
+		    (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000))
+			return false;
+		break;
+	case XGBE_PORT_MODE_10GBASE_R:
+		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000)
+			return false;
+		break;
+	case XGBE_PORT_MODE_SFP:
+		if ((phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) ||
+		    (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) ||
+		    (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000))
+			return false;
+		break;
+	default:
+		break;
+	}
+
+	return true;
+}
+
+static bool xgbe_phy_conn_type_mismatch(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+	switch (phy_data->port_mode) {
+	case XGBE_PORT_MODE_BACKPLANE:
+	case XGBE_PORT_MODE_BACKPLANE_2500:
+		if (phy_data->conn_type == XGBE_CONN_TYPE_BACKPLANE)
+			return false;
+		break;
+	case XGBE_PORT_MODE_1000BASE_T:
+	case XGBE_PORT_MODE_1000BASE_X:
+	case XGBE_PORT_MODE_NBASE_T:
+	case XGBE_PORT_MODE_10GBASE_T:
+	case XGBE_PORT_MODE_10GBASE_R:
+		if (phy_data->conn_type == XGBE_CONN_TYPE_MDIO)
+			return false;
+		break;
+	case XGBE_PORT_MODE_SFP:
+		if (phy_data->conn_type == XGBE_CONN_TYPE_SFP)
+			return false;
+		break;
+	default:
+		break;
+	}
+
+	return true;
+}
+
+static bool xgbe_phy_port_enabled(struct xgbe_prv_data *pdata)
+{
+	unsigned int reg;
+
+	reg = XP_IOREAD(pdata, XP_PROP_0);
+	if (!XP_GET_BITS(reg, XP_PROP_0, PORT_SPEEDS))
+		return false;
+	if (!XP_GET_BITS(reg, XP_PROP_0, CONN_TYPE))
+		return false;
+
+	return true;
+}
+
+static void xgbe_phy_stop(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+	/* If we have an external PHY, free it */
+	xgbe_phy_free_phy_device(pdata);
+
+	/* Reset SFP data */
+	xgbe_phy_sfp_reset(phy_data);
+	xgbe_phy_sfp_mod_absent(pdata);
+
+	/* Power off the PHY */
+	xgbe_phy_power_off(pdata);
+
+	/* Stop the I2C controller */
+	pdata->i2c_if.i2c_stop(pdata);
+}
+
+static int xgbe_phy_start(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	int ret;
+
+	/* Start the I2C controller */
+	ret = pdata->i2c_if.i2c_start(pdata);
+	if (ret)
+		return ret;
+
+	/* Start in highest supported mode */
+	xgbe_phy_set_mode(pdata, phy_data->start_mode);
+
+	/* After starting the I2C controller, we can check for an SFP */
+	switch (phy_data->port_mode) {
+	case XGBE_PORT_MODE_SFP:
+		xgbe_phy_sfp_detect(pdata);
+		break;
+	default:
+		break;
+	}
+
+	/* If we have an external PHY, start it */
+	ret = xgbe_phy_find_phy_device(pdata);
+	if (ret)
+		goto err_i2c;
+
+	return 0;
+
+err_i2c:
+	pdata->i2c_if.i2c_stop(pdata);
+
+	return ret;
+}
+
+static int xgbe_phy_reset(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+	enum xgbe_mode cur_mode;
+	int ret;
+
+	/* Reset by power cycling the PHY */
+	cur_mode = phy_data->cur_mode;
+	xgbe_phy_power_off(pdata);
+	xgbe_phy_set_mode(pdata, cur_mode);
+
+	if (!phy_data->phydev)
+		return 0;
+
+	/* Reset the external PHY */
+	ret = xgbe_phy_mdio_reset(pdata);
+	if (ret)
+		return ret;
+
+	return phy_init_hw(phy_data->phydev);
+}
+
+static void xgbe_phy_exit(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data = pdata->phy_data;
+
+	/* Unregister for driving external PHYs */
+	mdiobus_unregister(phy_data->mii);
+}
+
+static int xgbe_phy_init(struct xgbe_prv_data *pdata)
+{
+	struct xgbe_phy_data *phy_data;
+	struct mii_bus *mii;
+	unsigned int reg;
+	int ret;
+
+	/* Check if enabled */
+	if (!xgbe_phy_port_enabled(pdata)) {
+		dev_info(pdata->dev, "device is not enabled\n");
+		return -ENODEV;
+	}
+
+	/* Initialize the I2C controller */
+	ret = pdata->i2c_if.i2c_init(pdata);
+	if (ret)
+		return ret;
+
+	phy_data = devm_kzalloc(pdata->dev, sizeof(*phy_data), GFP_KERNEL);
+	if (!phy_data)
+		return -ENOMEM;
+	pdata->phy_data = phy_data;
+
+	reg = XP_IOREAD(pdata, XP_PROP_0);
+	phy_data->port_mode = XP_GET_BITS(reg, XP_PROP_0, PORT_MODE);
+	phy_data->port_id = XP_GET_BITS(reg, XP_PROP_0, PORT_ID);
+	phy_data->port_speeds = XP_GET_BITS(reg, XP_PROP_0, PORT_SPEEDS);
+	phy_data->conn_type = XP_GET_BITS(reg, XP_PROP_0, CONN_TYPE);
+	phy_data->mdio_addr = XP_GET_BITS(reg, XP_PROP_0, MDIO_ADDR);
+	if (netif_msg_probe(pdata)) {
+		dev_dbg(pdata->dev, "port mode=%u\n", phy_data->port_mode);
+		dev_dbg(pdata->dev, "port id=%u\n", phy_data->port_id);
+		dev_dbg(pdata->dev, "port speeds=%#x\n", phy_data->port_speeds);
+		dev_dbg(pdata->dev, "conn type=%u\n", phy_data->conn_type);
+		dev_dbg(pdata->dev, "mdio addr=%u\n", phy_data->mdio_addr);
+	}
+
+	reg = XP_IOREAD(pdata, XP_PROP_4);
+	phy_data->redrv = XP_GET_BITS(reg, XP_PROP_4, REDRV_PRESENT);
+	phy_data->redrv_if = XP_GET_BITS(reg, XP_PROP_4, REDRV_IF);
+	phy_data->redrv_addr = XP_GET_BITS(reg, XP_PROP_4, REDRV_ADDR);
+	phy_data->redrv_lane = XP_GET_BITS(reg, XP_PROP_4, REDRV_LANE);
+	phy_data->redrv_model = XP_GET_BITS(reg, XP_PROP_4, REDRV_MODEL);
+	if (phy_data->redrv && netif_msg_probe(pdata)) {
+		dev_dbg(pdata->dev, "redrv present\n");
+		dev_dbg(pdata->dev, "redrv i/f=%u\n", phy_data->redrv_if);
+		dev_dbg(pdata->dev, "redrv addr=%#x\n", phy_data->redrv_addr);
+		dev_dbg(pdata->dev, "redrv lane=%u\n", phy_data->redrv_lane);
+		dev_dbg(pdata->dev, "redrv model=%u\n", phy_data->redrv_model);
+	}
+
+	/* Validate the connection requested */
+	if (xgbe_phy_conn_type_mismatch(pdata)) {
+		dev_err(pdata->dev, "phy mode/connection mismatch (%#x/%#x)\n",
+			phy_data->port_mode, phy_data->conn_type);
+		return -EINVAL;
+	}
+
+	/* Validate the mode requested */
+	if (xgbe_phy_port_mode_mismatch(pdata)) {
+		dev_err(pdata->dev, "phy mode/speed mismatch (%#x/%#x)\n",
+			phy_data->port_mode, phy_data->port_speeds);
+		return -EINVAL;
+	}
+
+	/* Check for and validate MDIO reset support */
+	ret = xgbe_phy_mdio_reset_setup(pdata);
+	if (ret)
+		return ret;
+
+	/* Validate the re-driver information */
+	if (xgbe_phy_redrv_error(phy_data)) {
+		dev_err(pdata->dev, "phy re-driver settings error\n");
+		return -EINVAL;
+	}
+	pdata->kr_redrv = phy_data->redrv;
+
+	/* Indicate current mode is unknown */
+	phy_data->cur_mode = XGBE_MODE_UNKNOWN;
+
+	/* Initialize supported features */
+	pdata->phy.supported = 0;
+
+	switch (phy_data->port_mode) {
+	/* Backplane support */
+	case XGBE_PORT_MODE_BACKPLANE:
+		pdata->phy.supported |= SUPPORTED_Autoneg;
+		pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+		pdata->phy.supported |= SUPPORTED_Backplane;
+		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) {
+			pdata->phy.supported |= SUPPORTED_1000baseKX_Full;
+			phy_data->start_mode = XGBE_MODE_KX_1000;
+		}
+		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000) {
+			pdata->phy.supported |= SUPPORTED_10000baseKR_Full;
+			if (pdata->fec_ability & MDIO_PMA_10GBR_FECABLE_ABLE)
+				pdata->phy.supported |=
+					SUPPORTED_10000baseR_FEC;
+			phy_data->start_mode = XGBE_MODE_KR;
+		}
+
+		phy_data->phydev_mode = XGBE_MDIO_MODE_NONE;
+		break;
+	case XGBE_PORT_MODE_BACKPLANE_2500:
+		pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+		pdata->phy.supported |= SUPPORTED_Backplane;
+		pdata->phy.supported |= SUPPORTED_2500baseX_Full;
+		phy_data->start_mode = XGBE_MODE_KX_2500;
+
+		phy_data->phydev_mode = XGBE_MDIO_MODE_NONE;
+		break;
+
+	/* MDIO 1GBase-T support */
+	case XGBE_PORT_MODE_1000BASE_T:
+		pdata->phy.supported |= SUPPORTED_Autoneg;
+		pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+		pdata->phy.supported |= SUPPORTED_TP;
+		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) {
+			pdata->phy.supported |= SUPPORTED_100baseT_Full;
+			phy_data->start_mode = XGBE_MODE_SGMII_100;
+		}
+		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) {
+			pdata->phy.supported |= SUPPORTED_1000baseT_Full;
+			phy_data->start_mode = XGBE_MODE_SGMII_1000;
+		}
+
+		phy_data->phydev_mode = XGBE_MDIO_MODE_CL22;
+		break;
+
+	/* MDIO Base-X support */
+	case XGBE_PORT_MODE_1000BASE_X:
+		pdata->phy.supported |= SUPPORTED_Autoneg;
+		pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+		pdata->phy.supported |= SUPPORTED_FIBRE;
+		pdata->phy.supported |= SUPPORTED_1000baseT_Full;
+		phy_data->start_mode = XGBE_MODE_X;
+
+		phy_data->phydev_mode = XGBE_MDIO_MODE_CL22;
+		break;
+
+	/* MDIO NBase-T support */
+	case XGBE_PORT_MODE_NBASE_T:
+		pdata->phy.supported |= SUPPORTED_Autoneg;
+		pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+		pdata->phy.supported |= SUPPORTED_TP;
+		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) {
+			pdata->phy.supported |= SUPPORTED_100baseT_Full;
+			phy_data->start_mode = XGBE_MODE_SGMII_100;
+		}
+		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) {
+			pdata->phy.supported |= SUPPORTED_1000baseT_Full;
+			phy_data->start_mode = XGBE_MODE_SGMII_1000;
+		}
+		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_2500) {
+			pdata->phy.supported |= SUPPORTED_2500baseX_Full;
+			phy_data->start_mode = XGBE_MODE_KX_2500;
+		}
+
+		phy_data->phydev_mode = XGBE_MDIO_MODE_CL45;
+		break;
+
+	/* 10GBase-T support */
+	case XGBE_PORT_MODE_10GBASE_T:
+		pdata->phy.supported |= SUPPORTED_Autoneg;
+		pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+		pdata->phy.supported |= SUPPORTED_TP;
+		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) {
+			pdata->phy.supported |= SUPPORTED_100baseT_Full;
+			phy_data->start_mode = XGBE_MODE_SGMII_100;
+		}
+		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) {
+			pdata->phy.supported |= SUPPORTED_1000baseT_Full;
+			phy_data->start_mode = XGBE_MODE_SGMII_1000;
+		}
+		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000) {
+			pdata->phy.supported |= SUPPORTED_10000baseT_Full;
+			phy_data->start_mode = XGBE_MODE_KR;
+		}
+
+		phy_data->phydev_mode = XGBE_MDIO_MODE_NONE;
+		break;
+
+	/* 10GBase-R support */
+	case XGBE_PORT_MODE_10GBASE_R:
+		pdata->phy.supported |= SUPPORTED_Autoneg;
+		pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+		pdata->phy.supported |= SUPPORTED_TP;
+		pdata->phy.supported |= SUPPORTED_10000baseT_Full;
+		if (pdata->fec_ability & MDIO_PMA_10GBR_FECABLE_ABLE)
+			pdata->phy.supported |= SUPPORTED_10000baseR_FEC;
+		phy_data->start_mode = XGBE_MODE_SFI;
+
+		phy_data->phydev_mode = XGBE_MDIO_MODE_NONE;
+		break;
+
+	/* SFP support */
+	case XGBE_PORT_MODE_SFP:
+		pdata->phy.supported |= SUPPORTED_Autoneg;
+		pdata->phy.supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+		pdata->phy.supported |= SUPPORTED_TP;
+		pdata->phy.supported |= SUPPORTED_FIBRE;
+		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_100) {
+			pdata->phy.supported |= SUPPORTED_100baseT_Full;
+			phy_data->start_mode = XGBE_MODE_SGMII_100;
+		}
+		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_1000) {
+			pdata->phy.supported |= SUPPORTED_1000baseT_Full;
+			phy_data->start_mode = XGBE_MODE_SGMII_1000;
+		}
+		if (phy_data->port_speeds & XGBE_PHY_PORT_SPEED_10000) {
+			pdata->phy.supported |= SUPPORTED_10000baseT_Full;
+			phy_data->start_mode = XGBE_MODE_SFI;
+			if (pdata->fec_ability & MDIO_PMA_10GBR_FECABLE_ABLE)
+				pdata->phy.supported |=
+					SUPPORTED_10000baseR_FEC;
+		}
+
+		phy_data->phydev_mode = XGBE_MDIO_MODE_CL22;
+
+		xgbe_phy_sfp_setup(pdata);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (netif_msg_probe(pdata))
+		dev_dbg(pdata->dev, "phy supported=%#x\n",
+			pdata->phy.supported);
+
+	if ((phy_data->conn_type & XGBE_CONN_TYPE_MDIO) &&
+	    (phy_data->phydev_mode != XGBE_MDIO_MODE_NONE)) {
+		ret = pdata->hw_if.set_ext_mii_mode(pdata, phy_data->mdio_addr,
+						    phy_data->phydev_mode);
+		if (ret) {
+			dev_err(pdata->dev,
+				"mdio port/clause not compatible (%d/%u)\n",
+				phy_data->mdio_addr, phy_data->phydev_mode);
+			return -EINVAL;
+		}
+	}
+
+	if (phy_data->redrv && !phy_data->redrv_if) {
+		ret = pdata->hw_if.set_ext_mii_mode(pdata, phy_data->redrv_addr,
+						    XGBE_MDIO_MODE_CL22);
+		if (ret) {
+			dev_err(pdata->dev,
+				"redriver mdio port not compatible (%u)\n",
+				phy_data->redrv_addr);
+			return -EINVAL;
+		}
+	}
+
+	/* Register for driving external PHYs */
+	mii = devm_mdiobus_alloc(pdata->dev);
+	if (!mii) {
+		dev_err(pdata->dev, "mdiobus_alloc failed\n");
+		return -ENOMEM;
+	}
+
+	mii->priv = pdata;
+	mii->name = "amd-xgbe-mii";
+	mii->read = xgbe_phy_mii_read;
+	mii->write = xgbe_phy_mii_write;
+	mii->parent = pdata->dev;
+	mii->phy_mask = ~0;
+	snprintf(mii->id, sizeof(mii->id), "%s", dev_name(pdata->dev));
+	ret = mdiobus_register(mii);
+	if (ret) {
+		dev_err(pdata->dev, "mdiobus_register failed\n");
+		return ret;
+	}
+	phy_data->mii = mii;
+
+	return 0;
+}
+
+void xgbe_init_function_ptrs_phy_v2(struct xgbe_phy_if *phy_if)
+{
+	struct xgbe_phy_impl_if *phy_impl = &phy_if->phy_impl;
+
+	phy_impl->init			= xgbe_phy_init;
+	phy_impl->exit			= xgbe_phy_exit;
+
+	phy_impl->reset			= xgbe_phy_reset;
+	phy_impl->start			= xgbe_phy_start;
+	phy_impl->stop			= xgbe_phy_stop;
+
+	phy_impl->link_status		= xgbe_phy_link_status;
+
+	phy_impl->valid_speed		= xgbe_phy_valid_speed;
+
+	phy_impl->use_mode		= xgbe_phy_use_mode;
+	phy_impl->set_mode		= xgbe_phy_set_mode;
+	phy_impl->get_mode		= xgbe_phy_get_mode;
+	phy_impl->switch_mode		= xgbe_phy_switch_mode;
+	phy_impl->cur_mode		= xgbe_phy_cur_mode;
+
+	phy_impl->an_mode		= xgbe_phy_an_mode;
+
+	phy_impl->an_config		= xgbe_phy_an_config;
+
+	phy_impl->an_advertising	= xgbe_phy_an_advertising;
+
+	phy_impl->an_outcome		= xgbe_phy_an_outcome;
+}
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-platform.c b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c
new file mode 100644
index 000000000000..84d4c51cab8c
--- /dev/null
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-platform.c
@@ -0,0 +1,642 @@
+/*
+ * AMD 10Gb Ethernet driver
+ *
+ * This file is available to you under your choice of the following two
+ * licenses:
+ *
+ * License 1: GPLv2
+ *
+ * Copyright (c) 2014-2016 Advanced Micro Devices, Inc.
+ *
+ * This file is free software; you may copy, redistribute and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ *
+ * License 2: Modified BSD
+ *
+ * Copyright (c) 2014-2016 Advanced Micro Devices, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *       notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *       notice, this list of conditions and the following disclaimer in the
+ *       documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Advanced Micro Devices, Inc. nor the
+ *       names of its contributors may be used to endorse or promote products
+ *       derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * This file incorporates work covered by the following copyright and
+ * permission notice:
+ *     The Synopsys DWC ETHER XGMAC Software Driver and documentation
+ *     (hereinafter "Software") is an unsupported proprietary work of Synopsys,
+ *     Inc. unless otherwise expressly agreed to in writing between Synopsys
+ *     and you.
+ *
+ *     The Software IS NOT an item of Licensed Software or Licensed Product
+ *     under any End User Software License Agreement or Agreement for Licensed
+ *     Product with Synopsys or any supplement thereto.  Permission is hereby
+ *     granted, free of charge, to any person obtaining a copy of this software
+ *     annotated with this license and the Software, to deal in the Software
+ *     without restriction, including without limitation the rights to use,
+ *     copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+ *     of the Software, and to permit persons to whom the Software is furnished
+ *     to do so, subject to the following conditions:
+ *
+ *     The above copyright notice and this permission notice shall be included
+ *     in all copies or substantial portions of the Software.
+ *
+ *     THIS SOFTWARE IS BEING DISTRIBUTED BY SYNOPSYS SOLELY ON AN "AS IS"
+ *     BASIS AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ *     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+ *     PARTICULAR PURPOSE ARE HEREBY DISCLAIMED. IN NO EVENT SHALL SYNOPSYS
+ *     BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ *     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ *     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ *     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ *     THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/spinlock.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/of_net.h>
+#include <linux/of_address.h>
+#include <linux/of_platform.h>
+#include <linux/of_device.h>
+#include <linux/clk.h>
+#include <linux/property.h>
+#include <linux/acpi.h>
+#include <linux/mdio.h>
+
+#include "xgbe.h"
+#include "xgbe-common.h"
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id xgbe_acpi_match[];
+
+static struct xgbe_version_data *xgbe_acpi_vdata(struct xgbe_prv_data *pdata)
+{
+	const struct acpi_device_id *id;
+
+	id = acpi_match_device(xgbe_acpi_match, pdata->dev);
+
+	return id ? (struct xgbe_version_data *)id->driver_data : NULL;
+}
+
+static int xgbe_acpi_support(struct xgbe_prv_data *pdata)
+{
+	struct device *dev = pdata->dev;
+	u32 property;
+	int ret;
+
+	/* Obtain the system clock setting */
+	ret = device_property_read_u32(dev, XGBE_ACPI_DMA_FREQ, &property);
+	if (ret) {
+		dev_err(dev, "unable to obtain %s property\n",
+			XGBE_ACPI_DMA_FREQ);
+		return ret;
+	}
+	pdata->sysclk_rate = property;
+
+	/* Obtain the PTP clock setting */
+	ret = device_property_read_u32(dev, XGBE_ACPI_PTP_FREQ, &property);
+	if (ret) {
+		dev_err(dev, "unable to obtain %s property\n",
+			XGBE_ACPI_PTP_FREQ);
+		return ret;
+	}
+	pdata->ptpclk_rate = property;
+
+	return 0;
+}
+#else   /* CONFIG_ACPI */
+static struct xgbe_version_data *xgbe_acpi_vdata(struct xgbe_prv_data *pdata)
+{
+	return NULL;
+}
+
+static int xgbe_acpi_support(struct xgbe_prv_data *pdata)
+{
+	return -EINVAL;
+}
+#endif  /* CONFIG_ACPI */
+
+#ifdef CONFIG_OF
+static const struct of_device_id xgbe_of_match[];
+
+static struct xgbe_version_data *xgbe_of_vdata(struct xgbe_prv_data *pdata)
+{
+	const struct of_device_id *id;
+
+	id = of_match_device(xgbe_of_match, pdata->dev);
+
+	return id ? (struct xgbe_version_data *)id->data : NULL;
+}
+
+static int xgbe_of_support(struct xgbe_prv_data *pdata)
+{
+	struct device *dev = pdata->dev;
+
+	/* Obtain the system clock setting */
+	pdata->sysclk = devm_clk_get(dev, XGBE_DMA_CLOCK);
+	if (IS_ERR(pdata->sysclk)) {
+		dev_err(dev, "dma devm_clk_get failed\n");
+		return PTR_ERR(pdata->sysclk);
+	}
+	pdata->sysclk_rate = clk_get_rate(pdata->sysclk);
+
+	/* Obtain the PTP clock setting */
+	pdata->ptpclk = devm_clk_get(dev, XGBE_PTP_CLOCK);
+	if (IS_ERR(pdata->ptpclk)) {
+		dev_err(dev, "ptp devm_clk_get failed\n");
+		return PTR_ERR(pdata->ptpclk);
+	}
+	pdata->ptpclk_rate = clk_get_rate(pdata->ptpclk);
+
+	return 0;
+}
+
+static struct platform_device *xgbe_of_get_phy_pdev(struct xgbe_prv_data *pdata)
+{
+	struct device *dev = pdata->dev;
+	struct device_node *phy_node;
+	struct platform_device *phy_pdev;
+
+	phy_node = of_parse_phandle(dev->of_node, "phy-handle", 0);
+	if (phy_node) {
+		/* Old style device tree:
+		 *   The XGBE and PHY resources are separate
+		 */
+		phy_pdev = of_find_device_by_node(phy_node);
+		of_node_put(phy_node);
+	} else {
+		/* New style device tree:
+		 *   The XGBE and PHY resources are grouped together with
+		 *   the PHY resources listed last
+		 */
+		get_device(dev);
+		phy_pdev = pdata->platdev;
+	}
+
+	return phy_pdev;
+}
+#else   /* CONFIG_OF */
+static struct xgbe_version_data *xgbe_of_vdata(struct xgbe_prv_data *pdata)
+{
+	return NULL;
+}
+
+static int xgbe_of_support(struct xgbe_prv_data *pdata)
+{
+	return -EINVAL;
+}
+
+static struct platform_device *xgbe_of_get_phy_pdev(struct xgbe_prv_data *pdata)
+{
+	return NULL;
+}
+#endif  /* CONFIG_OF */
+
+static unsigned int xgbe_resource_count(struct platform_device *pdev,
+					unsigned int type)
+{
+	unsigned int count;
+	int i;
+
+	for (i = 0, count = 0; i < pdev->num_resources; i++) {
+		struct resource *res = &pdev->resource[i];
+
+		if (type == resource_type(res))
+			count++;
+	}
+
+	return count;
+}
+
+static struct platform_device *xgbe_get_phy_pdev(struct xgbe_prv_data *pdata)
+{
+	struct platform_device *phy_pdev;
+
+	if (pdata->use_acpi) {
+		get_device(pdata->dev);
+		phy_pdev = pdata->platdev;
+	} else {
+		phy_pdev = xgbe_of_get_phy_pdev(pdata);
+	}
+
+	return phy_pdev;
+}
+
+static struct xgbe_version_data *xgbe_get_vdata(struct xgbe_prv_data *pdata)
+{
+	return pdata->use_acpi ? xgbe_acpi_vdata(pdata)
+			       : xgbe_of_vdata(pdata);
+}
+
+static int xgbe_platform_probe(struct platform_device *pdev)
+{
+	struct xgbe_prv_data *pdata;
+	struct device *dev = &pdev->dev;
+	struct platform_device *phy_pdev;
+	struct resource *res;
+	const char *phy_mode;
+	unsigned int phy_memnum, phy_irqnum;
+	unsigned int dma_irqnum, dma_irqend;
+	enum dev_dma_attr attr;
+	int ret;
+
+	pdata = xgbe_alloc_pdata(dev);
+	if (IS_ERR(pdata)) {
+		ret = PTR_ERR(pdata);
+		goto err_alloc;
+	}
+
+	pdata->platdev = pdev;
+	pdata->adev = ACPI_COMPANION(dev);
+	platform_set_drvdata(pdev, pdata);
+
+	/* Check if we should use ACPI or DT */
+	pdata->use_acpi = dev->of_node ? 0 : 1;
+
+	/* Get the version data */
+	pdata->vdata = xgbe_get_vdata(pdata);
+
+	phy_pdev = xgbe_get_phy_pdev(pdata);
+	if (!phy_pdev) {
+		dev_err(dev, "unable to obtain phy device\n");
+		ret = -EINVAL;
+		goto err_phydev;
+	}
+	pdata->phy_platdev = phy_pdev;
+	pdata->phy_dev = &phy_pdev->dev;
+
+	if (pdev == phy_pdev) {
+		/* New style device tree or ACPI:
+		 *   The XGBE and PHY resources are grouped together with
+		 *   the PHY resources listed last
+		 */
+		phy_memnum = xgbe_resource_count(pdev, IORESOURCE_MEM) - 3;
+		phy_irqnum = xgbe_resource_count(pdev, IORESOURCE_IRQ) - 1;
+		dma_irqnum = 1;
+		dma_irqend = phy_irqnum;
+	} else {
+		/* Old style device tree:
+		 *   The XGBE and PHY resources are separate
+		 */
+		phy_memnum = 0;
+		phy_irqnum = 0;
+		dma_irqnum = 1;
+		dma_irqend = xgbe_resource_count(pdev, IORESOURCE_IRQ);
+	}
+
+	/* Obtain the mmio areas for the device */
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	pdata->xgmac_regs = devm_ioremap_resource(dev, res);
+	if (IS_ERR(pdata->xgmac_regs)) {
+		dev_err(dev, "xgmac ioremap failed\n");
+		ret = PTR_ERR(pdata->xgmac_regs);
+		goto err_io;
+	}
+	if (netif_msg_probe(pdata))
+		dev_dbg(dev, "xgmac_regs = %p\n", pdata->xgmac_regs);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	pdata->xpcs_regs = devm_ioremap_resource(dev, res);
+	if (IS_ERR(pdata->xpcs_regs)) {
+		dev_err(dev, "xpcs ioremap failed\n");
+		ret = PTR_ERR(pdata->xpcs_regs);
+		goto err_io;
+	}
+	if (netif_msg_probe(pdata))
+		dev_dbg(dev, "xpcs_regs  = %p\n", pdata->xpcs_regs);
+
+	res = platform_get_resource(phy_pdev, IORESOURCE_MEM, phy_memnum++);
+	pdata->rxtx_regs = devm_ioremap_resource(dev, res);
+	if (IS_ERR(pdata->rxtx_regs)) {
+		dev_err(dev, "rxtx ioremap failed\n");
+		ret = PTR_ERR(pdata->rxtx_regs);
+		goto err_io;
+	}
+	if (netif_msg_probe(pdata))
+		dev_dbg(dev, "rxtx_regs  = %p\n", pdata->rxtx_regs);
+
+	res = platform_get_resource(phy_pdev, IORESOURCE_MEM, phy_memnum++);
+	pdata->sir0_regs = devm_ioremap_resource(dev, res);
+	if (IS_ERR(pdata->sir0_regs)) {
+		dev_err(dev, "sir0 ioremap failed\n");
+		ret = PTR_ERR(pdata->sir0_regs);
+		goto err_io;
+	}
+	if (netif_msg_probe(pdata))
+		dev_dbg(dev, "sir0_regs  = %p\n", pdata->sir0_regs);
+
+	res = platform_get_resource(phy_pdev, IORESOURCE_MEM, phy_memnum++);
+	pdata->sir1_regs = devm_ioremap_resource(dev, res);
+	if (IS_ERR(pdata->sir1_regs)) {
+		dev_err(dev, "sir1 ioremap failed\n");
+		ret = PTR_ERR(pdata->sir1_regs);
+		goto err_io;
+	}
+	if (netif_msg_probe(pdata))
+		dev_dbg(dev, "sir1_regs  = %p\n", pdata->sir1_regs);
+
+	/* Retrieve the MAC address */
+	ret = device_property_read_u8_array(dev, XGBE_MAC_ADDR_PROPERTY,
+					    pdata->mac_addr,
+					    sizeof(pdata->mac_addr));
+	if (ret || !is_valid_ether_addr(pdata->mac_addr)) {
+		dev_err(dev, "invalid %s property\n", XGBE_MAC_ADDR_PROPERTY);
+		if (!ret)
+			ret = -EINVAL;
+		goto err_io;
+	}
+
+	/* Retrieve the PHY mode - it must be "xgmii" */
+	ret = device_property_read_string(dev, XGBE_PHY_MODE_PROPERTY,
+					  &phy_mode);
+	if (ret || strcmp(phy_mode, phy_modes(PHY_INTERFACE_MODE_XGMII))) {
+		dev_err(dev, "invalid %s property\n", XGBE_PHY_MODE_PROPERTY);
+		if (!ret)
+			ret = -EINVAL;
+		goto err_io;
+	}
+	pdata->phy_mode = PHY_INTERFACE_MODE_XGMII;
+
+	/* Check for per channel interrupt support */
+	if (device_property_present(dev, XGBE_DMA_IRQS_PROPERTY)) {
+		pdata->per_channel_irq = 1;
+		pdata->channel_irq_mode = XGBE_IRQ_MODE_EDGE;
+	}
+
+	/* Obtain device settings unique to ACPI/OF */
+	if (pdata->use_acpi)
+		ret = xgbe_acpi_support(pdata);
+	else
+		ret = xgbe_of_support(pdata);
+	if (ret)
+		goto err_io;
+
+	/* Set the DMA coherency values */
+	attr = device_get_dma_attr(dev);
+	if (attr == DEV_DMA_NOT_SUPPORTED) {
+		dev_err(dev, "DMA is not supported");
+		ret = -ENODEV;
+		goto err_io;
+	}
+	pdata->coherent = (attr == DEV_DMA_COHERENT);
+	if (pdata->coherent) {
+		pdata->axdomain = XGBE_DMA_OS_AXDOMAIN;
+		pdata->arcache = XGBE_DMA_OS_ARCACHE;
+		pdata->awcache = XGBE_DMA_OS_AWCACHE;
+	} else {
+		pdata->axdomain = XGBE_DMA_SYS_AXDOMAIN;
+		pdata->arcache = XGBE_DMA_SYS_ARCACHE;
+		pdata->awcache = XGBE_DMA_SYS_AWCACHE;
+	}
+
+	/* Set the maximum fifo amounts */
+	pdata->tx_max_fifo_size = pdata->vdata->tx_max_fifo_size;
+	pdata->rx_max_fifo_size = pdata->vdata->rx_max_fifo_size;
+
+	/* Set the hardware channel and queue counts */
+	xgbe_set_counts(pdata);
+
+	/* Always have XGMAC and XPCS (auto-negotiation) interrupts */
+	pdata->irq_count = 2;
+
+	/* Get the device interrupt */
+	ret = platform_get_irq(pdev, 0);
+	if (ret < 0) {
+		dev_err(dev, "platform_get_irq 0 failed\n");
+		goto err_io;
+	}
+	pdata->dev_irq = ret;
+
+	/* Get the per channel DMA interrupts */
+	if (pdata->per_channel_irq) {
+		unsigned int i, max = ARRAY_SIZE(pdata->channel_irq);
+
+		for (i = 0; (i < max) && (dma_irqnum < dma_irqend); i++) {
+			ret = platform_get_irq(pdata->platdev, dma_irqnum++);
+			if (ret < 0) {
+				netdev_err(pdata->netdev,
+					   "platform_get_irq %u failed\n",
+					   dma_irqnum - 1);
+				goto err_io;
+			}
+
+			pdata->channel_irq[i] = ret;
+		}
+
+		pdata->channel_irq_count = max;
+
+		pdata->irq_count += max;
+	}
+
+	/* Get the auto-negotiation interrupt */
+	ret = platform_get_irq(phy_pdev, phy_irqnum++);
+	if (ret < 0) {
+		dev_err(dev, "platform_get_irq phy 0 failed\n");
+		goto err_io;
+	}
+	pdata->an_irq = ret;
+
+	/* Configure the netdev resource */
+	ret = xgbe_config_netdev(pdata);
+	if (ret)
+		goto err_io;
+
+	netdev_notice(pdata->netdev, "net device enabled\n");
+
+	return 0;
+
+err_io:
+	platform_device_put(phy_pdev);
+
+err_phydev:
+	xgbe_free_pdata(pdata);
+
+err_alloc:
+	dev_notice(dev, "net device not enabled\n");
+
+	return ret;
+}
+
+static int xgbe_platform_remove(struct platform_device *pdev)
+{
+	struct xgbe_prv_data *pdata = platform_get_drvdata(pdev);
+
+	xgbe_deconfig_netdev(pdata);
+
+	platform_device_put(pdata->phy_platdev);
+
+	xgbe_free_pdata(pdata);
+
+	return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int xgbe_platform_suspend(struct device *dev)
+{
+	struct xgbe_prv_data *pdata = dev_get_drvdata(dev);
+	struct net_device *netdev = pdata->netdev;
+	int ret = 0;
+
+	DBGPR("-->xgbe_suspend\n");
+
+	if (netif_running(netdev))
+		ret = xgbe_powerdown(netdev, XGMAC_DRIVER_CONTEXT);
+
+	pdata->lpm_ctrl = XMDIO_READ(pdata, MDIO_MMD_PCS, MDIO_CTRL1);
+	pdata->lpm_ctrl |= MDIO_CTRL1_LPOWER;
+	XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl);
+
+	DBGPR("<--xgbe_suspend\n");
+
+	return ret;
+}
+
+static int xgbe_platform_resume(struct device *dev)
+{
+	struct xgbe_prv_data *pdata = dev_get_drvdata(dev);
+	struct net_device *netdev = pdata->netdev;
+	int ret = 0;
+
+	DBGPR("-->xgbe_resume\n");
+
+	pdata->lpm_ctrl &= ~MDIO_CTRL1_LPOWER;
+	XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl);
+
+	if (netif_running(netdev)) {
+		ret = xgbe_powerup(netdev, XGMAC_DRIVER_CONTEXT);
+
+		/* Schedule a restart in case the link or phy state changed
+		 * while we were powered down.
+		 */
+		schedule_work(&pdata->restart_work);
+	}
+
+	DBGPR("<--xgbe_resume\n");
+
+	return ret;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+static const struct xgbe_version_data xgbe_v1 = {
+	.init_function_ptrs_phy_impl	= xgbe_init_function_ptrs_phy_v1,
+	.xpcs_access			= XGBE_XPCS_ACCESS_V1,
+	.tx_max_fifo_size		= 81920,
+	.rx_max_fifo_size		= 81920,
+	.tx_tstamp_workaround		= 1,
+};
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id xgbe_acpi_match[] = {
+	{ .id = "AMDI8001",
+	  .driver_data = (kernel_ulong_t)&xgbe_v1 },
+	{},
+};
+
+MODULE_DEVICE_TABLE(acpi, xgbe_acpi_match);
+#endif
+
+#ifdef CONFIG_OF
+static const struct of_device_id xgbe_of_match[] = {
+	{ .compatible = "amd,xgbe-seattle-v1a",
+	  .data = &xgbe_v1 },
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, xgbe_of_match);
+#endif
+
+static SIMPLE_DEV_PM_OPS(xgbe_platform_pm_ops,
+			 xgbe_platform_suspend, xgbe_platform_resume);
+
+static struct platform_driver xgbe_driver = {
+	.driver = {
+		.name = XGBE_DRV_NAME,
+#ifdef CONFIG_ACPI
+		.acpi_match_table = xgbe_acpi_match,
+#endif
+#ifdef CONFIG_OF
+		.of_match_table = xgbe_of_match,
+#endif
+		.pm = &xgbe_platform_pm_ops,
+	},
+	.probe = xgbe_platform_probe,
+	.remove = xgbe_platform_remove,
+};
+
+int xgbe_platform_init(void)
+{
+	return platform_driver_register(&xgbe_driver);
+}
+
+void xgbe_platform_exit(void)
+{
+	platform_driver_unregister(&xgbe_driver);
+}
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index 5dd17dcea2f8..f52a9bd05bac 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -127,9 +127,10 @@
 #include <linux/timecounter.h>
 #include <linux/net_tstamp.h>
 #include <net/dcbnl.h>
+#include <linux/completion.h>
 
 #define XGBE_DRV_NAME		"amd-xgbe"
-#define XGBE_DRV_VERSION	"1.0.2"
+#define XGBE_DRV_VERSION	"1.0.3"
 #define XGBE_DRV_DESC		"AMD 10 Gigabit Ethernet Driver"
 
 /* Descriptor related defines */
@@ -158,7 +159,8 @@
 
 #define XGBE_MAX_DMA_CHANNELS	16
 #define XGBE_MAX_QUEUES		16
-#define XGBE_DMA_STOP_TIMEOUT	5
+#define XGBE_PRIORITY_QUEUES	8
+#define XGBE_DMA_STOP_TIMEOUT	1
 
 /* DMA cache settings - Outer sharable, write-back, write-allocate */
 #define XGBE_DMA_OS_AXDOMAIN	0x2
@@ -170,6 +172,10 @@
 #define XGBE_DMA_SYS_ARCACHE	0x0
 #define XGBE_DMA_SYS_AWCACHE	0x0
 
+/* DMA channel interrupt modes */
+#define XGBE_IRQ_MODE_EDGE	0
+#define XGBE_IRQ_MODE_LEVEL	1
+
 #define XGBE_DMA_INTERRUPT_MASK	0x31c7
 
 #define XGMAC_MIN_PACKET	60
@@ -177,18 +183,19 @@
 #define XGMAC_MAX_STD_PACKET	1518
 #define XGMAC_JUMBO_PACKET_MTU	9000
 #define XGMAC_MAX_JUMBO_PACKET	9018
+#define XGMAC_ETH_PREAMBLE	(12 + 8)	/* Inter-frame gap + preamble */
+
+#define XGMAC_PFC_DATA_LEN	46
+#define XGMAC_PFC_DELAYS	14000
+
+#define XGMAC_PRIO_QUEUES(_cnt)					\
+	min_t(unsigned int, IEEE_8021QAZ_MAX_TCS, (_cnt))
 
 /* Common property names */
 #define XGBE_MAC_ADDR_PROPERTY	"mac-address"
 #define XGBE_PHY_MODE_PROPERTY	"phy-mode"
 #define XGBE_DMA_IRQS_PROPERTY	"amd,per-channel-interrupt"
 #define XGBE_SPEEDSET_PROPERTY	"amd,speed-set"
-#define XGBE_BLWC_PROPERTY	"amd,serdes-blwc"
-#define XGBE_CDR_RATE_PROPERTY	"amd,serdes-cdr-rate"
-#define XGBE_PQ_SKEW_PROPERTY	"amd,serdes-pq-skew"
-#define XGBE_TX_AMP_PROPERTY	"amd,serdes-tx-amp"
-#define XGBE_DFE_CFG_PROPERTY	"amd,serdes-dfe-tap-config"
-#define XGBE_DFE_ENA_PROPERTY	"amd,serdes-dfe-tap-enable"
 
 /* Device-tree clock names */
 #define XGBE_DMA_CLOCK		"dma_clk"
@@ -198,6 +205,20 @@
 #define XGBE_ACPI_DMA_FREQ	"amd,dma-freq"
 #define XGBE_ACPI_PTP_FREQ	"amd,ptp-freq"
 
+/* PCI BAR mapping */
+#define XGBE_XGMAC_BAR		0
+#define XGBE_XPCS_BAR		1
+#define XGBE_MAC_PROP_OFFSET	0x1d000
+#define XGBE_I2C_CTRL_OFFSET	0x1e000
+
+/* PCI MSIx support */
+#define XGBE_MSIX_BASE_COUNT	4
+#define XGBE_MSIX_MIN_COUNT	(XGBE_MSIX_BASE_COUNT + 1)
+
+/* PCI clock frequencies */
+#define XGBE_V2_DMA_CLOCK_FREQ	500000000	/* 500 MHz */
+#define XGBE_V2_PTP_CLOCK_FREQ	125000000	/* 125 MHz */
+
 /* Timestamp support - values based on 50MHz PTP clock
  *   50MHz => 20 nsec
  */
@@ -208,7 +229,12 @@
 #define XGMAC_DRIVER_CONTEXT	1
 #define XGMAC_IOCTL_CONTEXT	2
 
-#define XGBE_FIFO_MAX		81920
+#define XGMAC_FIFO_MIN_ALLOC	2048
+#define XGMAC_FIFO_UNIT		256
+#define XGMAC_FIFO_ALIGN(_x)				\
+	(((_x) + XGMAC_FIFO_UNIT - 1) & ~(XGMAC_FIFO_UNIT - 1))
+#define XGMAC_FIFO_FC_OFF	2048
+#define XGMAC_FIFO_FC_MIN	4096
 
 #define XGBE_TC_MIN_QUANTUM	10
 
@@ -233,6 +259,14 @@
 /* Flow control queue count */
 #define XGMAC_MAX_FLOW_CONTROL_QUEUES	8
 
+/* Flow control threshold units */
+#define XGMAC_FLOW_CONTROL_UNIT		512
+#define XGMAC_FLOW_CONTROL_ALIGN(_x)				\
+	(((_x) + XGMAC_FLOW_CONTROL_UNIT - 1) & ~(XGMAC_FLOW_CONTROL_UNIT - 1))
+#define XGMAC_FLOW_CONTROL_VALUE(_x)				\
+	(((_x) < 1024) ? 0 : ((_x) / XGMAC_FLOW_CONTROL_UNIT) - 2)
+#define XGMAC_FLOW_CONTROL_MAX		33280
+
 /* Maximum MAC address hash table size (256 bits = 8 bytes) */
 #define XGBE_MAC_HASH_TABLE_SIZE	8
 
@@ -244,46 +278,19 @@
 
 /* Auto-negotiation */
 #define XGBE_AN_MS_TIMEOUT		500
-#define XGBE_LINK_TIMEOUT		10
-
-#define XGBE_AN_INT_CMPLT		0x01
-#define XGBE_AN_INC_LINK		0x02
-#define XGBE_AN_PG_RCV			0x04
-#define XGBE_AN_INT_MASK		0x07
-
-/* Rate-change complete wait/retry count */
-#define XGBE_RATECHANGE_COUNT		500
-
-/* Default SerDes settings */
-#define XGBE_SPEED_10000_BLWC		0
-#define XGBE_SPEED_10000_CDR		0x7
-#define XGBE_SPEED_10000_PLL		0x1
-#define XGBE_SPEED_10000_PQ		0x12
-#define XGBE_SPEED_10000_RATE		0x0
-#define XGBE_SPEED_10000_TXAMP		0xa
-#define XGBE_SPEED_10000_WORD		0x7
-#define XGBE_SPEED_10000_DFE_TAP_CONFIG	0x1
-#define XGBE_SPEED_10000_DFE_TAP_ENABLE	0x7f
-
-#define XGBE_SPEED_2500_BLWC		1
-#define XGBE_SPEED_2500_CDR		0x2
-#define XGBE_SPEED_2500_PLL		0x0
-#define XGBE_SPEED_2500_PQ		0xa
-#define XGBE_SPEED_2500_RATE		0x1
-#define XGBE_SPEED_2500_TXAMP		0xf
-#define XGBE_SPEED_2500_WORD		0x1
-#define XGBE_SPEED_2500_DFE_TAP_CONFIG	0x3
-#define XGBE_SPEED_2500_DFE_TAP_ENABLE	0x0
-
-#define XGBE_SPEED_1000_BLWC		1
-#define XGBE_SPEED_1000_CDR		0x2
-#define XGBE_SPEED_1000_PLL		0x0
-#define XGBE_SPEED_1000_PQ		0xa
-#define XGBE_SPEED_1000_RATE		0x3
-#define XGBE_SPEED_1000_TXAMP		0xf
-#define XGBE_SPEED_1000_WORD		0x1
-#define XGBE_SPEED_1000_DFE_TAP_CONFIG	0x3
-#define XGBE_SPEED_1000_DFE_TAP_ENABLE	0x0
+#define XGBE_LINK_TIMEOUT		5
+
+#define XGBE_SGMII_AN_LINK_STATUS	BIT(1)
+#define XGBE_SGMII_AN_LINK_SPEED	(BIT(2) | BIT(3))
+#define XGBE_SGMII_AN_LINK_SPEED_100	0x04
+#define XGBE_SGMII_AN_LINK_SPEED_1000	0x08
+#define XGBE_SGMII_AN_LINK_DUPLEX	BIT(4)
+
+/* ECC correctable error notification window (seconds) */
+#define XGBE_ECC_LIMIT			60
+
+/* MDIO port types */
+#define XGMAC_MAX_C22_PORT		3
 
 struct xgbe_prv_data;
 
@@ -461,6 +468,7 @@ enum xgbe_state {
 	XGBE_DOWN,
 	XGBE_LINK_INIT,
 	XGBE_LINK_ERR,
+	XGBE_STOPPED,
 };
 
 enum xgbe_int {
@@ -480,6 +488,12 @@ enum xgbe_int_state {
 	XGMAC_INT_STATE_RESTORE,
 };
 
+enum xgbe_ecc_sec {
+	XGBE_ECC_SEC_TX,
+	XGBE_ECC_SEC_RX,
+	XGBE_ECC_SEC_DESC,
+};
+
 enum xgbe_speed {
 	XGBE_SPEED_1000 = 0,
 	XGBE_SPEED_2500,
@@ -487,6 +501,19 @@ enum xgbe_speed {
 	XGBE_SPEEDS,
 };
 
+enum xgbe_xpcs_access {
+	XGBE_XPCS_ACCESS_V1 = 0,
+	XGBE_XPCS_ACCESS_V2,
+};
+
+enum xgbe_an_mode {
+	XGBE_AN_MODE_CL73 = 0,
+	XGBE_AN_MODE_CL73_REDRV,
+	XGBE_AN_MODE_CL37,
+	XGBE_AN_MODE_CL37_SGMII,
+	XGBE_AN_MODE_NONE,
+};
+
 enum xgbe_an {
 	XGBE_AN_READY = 0,
 	XGBE_AN_PAGE_RECEIVED,
@@ -504,8 +531,14 @@ enum xgbe_rx {
 };
 
 enum xgbe_mode {
-	XGBE_MODE_KR = 0,
-	XGBE_MODE_KX,
+	XGBE_MODE_KX_1000 = 0,
+	XGBE_MODE_KX_2500,
+	XGBE_MODE_KR,
+	XGBE_MODE_X,
+	XGBE_MODE_SGMII_100,
+	XGBE_MODE_SGMII_1000,
+	XGBE_MODE_SFI,
+	XGBE_MODE_UNKNOWN,
 };
 
 enum xgbe_speedset {
@@ -513,6 +546,12 @@ enum xgbe_speedset {
 	XGBE_SPEEDSET_2500_10000,
 };
 
+enum xgbe_mdio_mode {
+	XGBE_MDIO_MODE_NONE = 0,
+	XGBE_MDIO_MODE_CL22,
+	XGBE_MDIO_MODE_CL45,
+};
+
 struct xgbe_phy {
 	u32 supported;
 	u32 advertising;
@@ -531,6 +570,43 @@ struct xgbe_phy {
 	int rx_pause;
 };
 
+enum xgbe_i2c_cmd {
+	XGBE_I2C_CMD_READ = 0,
+	XGBE_I2C_CMD_WRITE,
+};
+
+struct xgbe_i2c_op {
+	enum xgbe_i2c_cmd cmd;
+
+	unsigned int target;
+
+	void *buf;
+	unsigned int len;
+};
+
+struct xgbe_i2c_op_state {
+	struct xgbe_i2c_op *op;
+
+	unsigned int tx_len;
+	unsigned char *tx_buf;
+
+	unsigned int rx_len;
+	unsigned char *rx_buf;
+
+	unsigned int tx_abort_source;
+
+	int ret;
+};
+
+struct xgbe_i2c {
+	unsigned int started;
+	unsigned int max_speed_mode;
+	unsigned int rx_fifo_size;
+	unsigned int tx_fifo_size;
+
+	struct xgbe_i2c_op_state op_state;
+};
+
 struct xgbe_mmc_stats {
 	/* Tx Stats */
 	u64 txoctetcount_gb;
@@ -601,9 +677,15 @@ struct xgbe_hw_if {
 
 	int (*read_mmd_regs)(struct xgbe_prv_data *, int, int);
 	void (*write_mmd_regs)(struct xgbe_prv_data *, int, int, int);
-	int (*set_gmii_speed)(struct xgbe_prv_data *);
-	int (*set_gmii_2500_speed)(struct xgbe_prv_data *);
-	int (*set_xgmii_speed)(struct xgbe_prv_data *);
+	int (*set_speed)(struct xgbe_prv_data *, int);
+
+	int (*set_ext_mii_mode)(struct xgbe_prv_data *, unsigned int,
+				enum xgbe_mdio_mode);
+	int (*read_ext_mii_regs)(struct xgbe_prv_data *, int, int);
+	int (*write_ext_mii_regs)(struct xgbe_prv_data *, int, int, u16);
+
+	int (*set_gpio)(struct xgbe_prv_data *, unsigned int);
+	int (*clr_gpio)(struct xgbe_prv_data *, unsigned int);
 
 	void (*enable_tx)(struct xgbe_prv_data *);
 	void (*disable_tx)(struct xgbe_prv_data *);
@@ -682,11 +764,65 @@ struct xgbe_hw_if {
 	int (*disable_rss)(struct xgbe_prv_data *);
 	int (*set_rss_hash_key)(struct xgbe_prv_data *, const u8 *);
 	int (*set_rss_lookup_table)(struct xgbe_prv_data *, const u32 *);
+
+	/* For ECC */
+	void (*disable_ecc_ded)(struct xgbe_prv_data *);
+	void (*disable_ecc_sec)(struct xgbe_prv_data *, enum xgbe_ecc_sec);
+};
+
+/* This structure represents implementation specific routines for an
+ * implementation of a PHY. All routines are required unless noted below.
+ *   Optional routines:
+ *     kr_training_pre, kr_training_post
+ */
+struct xgbe_phy_impl_if {
+	/* Perform Setup/teardown actions */
+	int (*init)(struct xgbe_prv_data *);
+	void (*exit)(struct xgbe_prv_data *);
+
+	/* Perform start/stop specific actions */
+	int (*reset)(struct xgbe_prv_data *);
+	int (*start)(struct xgbe_prv_data *);
+	void (*stop)(struct xgbe_prv_data *);
+
+	/* Return the link status */
+	int (*link_status)(struct xgbe_prv_data *, int *);
+
+	/* Indicate if a particular speed is valid */
+	bool (*valid_speed)(struct xgbe_prv_data *, int);
+
+	/* Check if the specified mode can/should be used */
+	bool (*use_mode)(struct xgbe_prv_data *, enum xgbe_mode);
+	/* Switch the PHY into various modes */
+	void (*set_mode)(struct xgbe_prv_data *, enum xgbe_mode);
+	/* Retrieve mode needed for a specific speed */
+	enum xgbe_mode (*get_mode)(struct xgbe_prv_data *, int);
+	/* Retrieve new/next mode when trying to auto-negotiate */
+	enum xgbe_mode (*switch_mode)(struct xgbe_prv_data *);
+	/* Retrieve current mode */
+	enum xgbe_mode (*cur_mode)(struct xgbe_prv_data *);
+
+	/* Retrieve current auto-negotiation mode */
+	enum xgbe_an_mode (*an_mode)(struct xgbe_prv_data *);
+
+	/* Configure auto-negotiation settings */
+	int (*an_config)(struct xgbe_prv_data *);
+
+	/* Set/override auto-negotiation advertisement settings */
+	unsigned int (*an_advertising)(struct xgbe_prv_data *);
+
+	/* Process results of auto-negotiation */
+	enum xgbe_mode (*an_outcome)(struct xgbe_prv_data *);
+
+	/* Pre/Post KR training enablement support */
+	void (*kr_training_pre)(struct xgbe_prv_data *);
+	void (*kr_training_post)(struct xgbe_prv_data *);
 };
 
 struct xgbe_phy_if {
-	/* For initial PHY setup */
-	void (*phy_init)(struct xgbe_prv_data *);
+	/* For PHY setup/teardown */
+	int (*phy_init)(struct xgbe_prv_data *);
+	void (*phy_exit)(struct xgbe_prv_data *);
 
 	/* For PHY support when setting device up/down */
 	int (*phy_reset)(struct xgbe_prv_data *);
@@ -696,6 +832,30 @@ struct xgbe_phy_if {
 	/* For PHY support while device is up */
 	void (*phy_status)(struct xgbe_prv_data *);
 	int (*phy_config_aneg)(struct xgbe_prv_data *);
+
+	/* For PHY settings validation */
+	bool (*phy_valid_speed)(struct xgbe_prv_data *, int);
+
+	/* For single interrupt support */
+	irqreturn_t (*an_isr)(int, struct xgbe_prv_data *);
+
+	/* PHY implementation specific services */
+	struct xgbe_phy_impl_if phy_impl;
+};
+
+struct xgbe_i2c_if {
+	/* For initial I2C setup */
+	int (*i2c_init)(struct xgbe_prv_data *);
+
+	/* For I2C support when setting device up/down */
+	int (*i2c_start)(struct xgbe_prv_data *);
+	void (*i2c_stop)(struct xgbe_prv_data *);
+
+	/* For performing I2C operations */
+	int (*i2c_xfer)(struct xgbe_prv_data *, struct xgbe_i2c_op *);
+
+	/* For single interrupt support */
+	irqreturn_t (*i2c_isr)(int, struct xgbe_prv_data *);
 };
 
 struct xgbe_desc_if {
@@ -755,11 +915,28 @@ struct xgbe_hw_features {
 	unsigned int aux_snap_num;	/* Number of Aux snapshot inputs */
 };
 
+struct xgbe_version_data {
+	void (*init_function_ptrs_phy_impl)(struct xgbe_phy_if *);
+	enum xgbe_xpcs_access xpcs_access;
+	unsigned int mmc_64bit;
+	unsigned int tx_max_fifo_size;
+	unsigned int rx_max_fifo_size;
+	unsigned int tx_tstamp_workaround;
+	unsigned int ecc_support;
+	unsigned int i2c_support;
+};
+
 struct xgbe_prv_data {
 	struct net_device *netdev;
-	struct platform_device *pdev;
+	struct pci_dev *pcidev;
+	struct platform_device *platdev;
 	struct acpi_device *adev;
 	struct device *dev;
+	struct platform_device *phy_platdev;
+	struct device *phy_dev;
+
+	/* Version related data */
+	struct xgbe_version_data *vdata;
 
 	/* ACPI or DT flag */
 	unsigned int use_acpi;
@@ -770,12 +947,17 @@ struct xgbe_prv_data {
 	void __iomem *rxtx_regs;	/* SerDes Rx/Tx CSRs */
 	void __iomem *sir0_regs;	/* SerDes integration registers (1/2) */
 	void __iomem *sir1_regs;	/* SerDes integration registers (2/2) */
+	void __iomem *xprop_regs;	/* XGBE property registers */
+	void __iomem *xi2c_regs;	/* XGBE I2C CSRs */
 
 	/* Overall device lock */
 	spinlock_t lock;
 
 	/* XPCS indirect addressing lock */
 	spinlock_t xpcs_lock;
+	unsigned int xpcs_window;
+	unsigned int xpcs_window_size;
+	unsigned int xpcs_window_mask;
 
 	/* RSS addressing mutex */
 	struct mutex rss_mutex;
@@ -783,12 +965,39 @@ struct xgbe_prv_data {
 	/* Flags representing xgbe_state */
 	unsigned long dev_state;
 
+	/* ECC support */
+	unsigned long tx_sec_period;
+	unsigned long tx_ded_period;
+	unsigned long rx_sec_period;
+	unsigned long rx_ded_period;
+	unsigned long desc_sec_period;
+	unsigned long desc_ded_period;
+
+	unsigned int tx_sec_count;
+	unsigned int tx_ded_count;
+	unsigned int rx_sec_count;
+	unsigned int rx_ded_count;
+	unsigned int desc_ded_count;
+	unsigned int desc_sec_count;
+
+	struct msix_entry *msix_entries;
 	int dev_irq;
+	int ecc_irq;
+	int i2c_irq;
+	int channel_irq[XGBE_MAX_DMA_CHANNELS];
+
 	unsigned int per_channel_irq;
+	unsigned int irq_shared;
+	unsigned int irq_count;
+	unsigned int channel_irq_count;
+	unsigned int channel_irq_mode;
+
+	char ecc_name[IFNAMSIZ + 32];
 
 	struct xgbe_hw_if hw_if;
 	struct xgbe_phy_if phy_if;
 	struct xgbe_desc_if desc_if;
+	struct xgbe_i2c_if i2c_if;
 
 	/* AXI DMA settings */
 	unsigned int coherent;
@@ -803,12 +1012,16 @@ struct xgbe_prv_data {
 
 	/* Rings for Tx/Rx on a DMA channel */
 	struct xgbe_channel *channel;
+	unsigned int tx_max_channel_count;
+	unsigned int rx_max_channel_count;
 	unsigned int channel_count;
 	unsigned int tx_ring_count;
 	unsigned int tx_desc_count;
 	unsigned int rx_ring_count;
 	unsigned int rx_desc_count;
 
+	unsigned int tx_max_q_count;
+	unsigned int rx_max_q_count;
 	unsigned int tx_q_count;
 	unsigned int rx_q_count;
 
@@ -820,11 +1033,13 @@ struct xgbe_prv_data {
 	unsigned int tx_threshold;
 	unsigned int tx_pbl;
 	unsigned int tx_osp_mode;
+	unsigned int tx_max_fifo_size;
 
 	/* Rx settings */
 	unsigned int rx_sf_mode;
 	unsigned int rx_threshold;
 	unsigned int rx_pbl;
+	unsigned int rx_max_fifo_size;
 
 	/* Tx coalescing settings */
 	unsigned int tx_usecs;
@@ -842,6 +1057,8 @@ struct xgbe_prv_data {
 	unsigned int pause_autoneg;
 	unsigned int tx_pause;
 	unsigned int rx_pause;
+	unsigned int rx_rfa[XGBE_MAX_QUEUES];
+	unsigned int rx_rfd[XGBE_MAX_QUEUES];
 
 	/* Receive Side Scaling settings */
 	u8 rss_key[XGBE_RSS_HASH_KEY_SIZE];
@@ -881,13 +1098,16 @@ struct xgbe_prv_data {
 	struct ieee_pfc *pfc;
 	unsigned int q2tc_map[XGBE_MAX_QUEUES];
 	unsigned int prio2q_map[IEEE_8021QAZ_MAX_TCS];
+	unsigned int pfcq[XGBE_MAX_QUEUES];
+	unsigned int pfc_rfa;
 	u8 num_tcs;
 
 	/* Hardware features of the device */
 	struct xgbe_hw_features hw_feat;
 
-	/* Device restart work structure */
+	/* Device work structures */
 	struct work_struct restart_work;
+	struct work_struct stopdev_work;
 
 	/* Keeps track of power mode */
 	unsigned int power_down;
@@ -901,9 +1121,14 @@ struct xgbe_prv_data {
 	int phy_speed;
 
 	/* MDIO/PHY related settings */
+	unsigned int phy_started;
+	void *phy_data;
 	struct xgbe_phy phy;
 	int mdio_mmd;
 	unsigned long link_check;
+	struct completion mdio_complete;
+
+	unsigned int kr_redrv;
 
 	char an_name[IFNAMSIZ + 32];
 	struct workqueue_struct *an_workqueue;
@@ -911,23 +1136,9 @@ struct xgbe_prv_data {
 	int an_irq;
 	struct work_struct an_irq_work;
 
-	unsigned int speed_set;
-
-	/* SerDes UEFI configurable settings.
-	 *   Switching between modes/speeds requires new values for some
-	 *   SerDes settings.  The values can be supplied as device
-	 *   properties in array format.  The first array entry is for
-	 *   1GbE, second for 2.5GbE and third for 10GbE
-	 */
-	u32 serdes_blwc[XGBE_SPEEDS];
-	u32 serdes_cdr_rate[XGBE_SPEEDS];
-	u32 serdes_pq_skew[XGBE_SPEEDS];
-	u32 serdes_tx_amp[XGBE_SPEEDS];
-	u32 serdes_dfe_tap_cfg[XGBE_SPEEDS];
-	u32 serdes_dfe_tap_ena[XGBE_SPEEDS];
-
 	/* Auto-negotiation state machine support */
 	unsigned int an_int;
+	unsigned int an_status;
 	struct mutex an_mutex;
 	enum xgbe_an an_result;
 	enum xgbe_an an_state;
@@ -938,6 +1149,13 @@ struct xgbe_prv_data {
 	unsigned int parallel_detect;
 	unsigned int fec_ability;
 	unsigned long an_start;
+	enum xgbe_an_mode an_mode;
+
+	/* I2C support */
+	struct xgbe_i2c i2c;
+	struct mutex i2c_mutex;
+	struct completion i2c_complete;
+	char i2c_name[IFNAMSIZ + 32];
 
 	unsigned int lpm_ctrl;		/* CTRL1 for resume */
 
@@ -948,14 +1166,36 @@ struct xgbe_prv_data {
 
 	unsigned int debugfs_xpcs_mmd;
 	unsigned int debugfs_xpcs_reg;
+
+	unsigned int debugfs_xprop_reg;
+
+	unsigned int debugfs_xi2c_reg;
 #endif
 };
 
 /* Function prototypes*/
+struct xgbe_prv_data *xgbe_alloc_pdata(struct device *);
+void xgbe_free_pdata(struct xgbe_prv_data *);
+void xgbe_set_counts(struct xgbe_prv_data *);
+int xgbe_config_netdev(struct xgbe_prv_data *);
+void xgbe_deconfig_netdev(struct xgbe_prv_data *);
+
+int xgbe_platform_init(void);
+void xgbe_platform_exit(void);
+#ifdef CONFIG_PCI
+int xgbe_pci_init(void);
+void xgbe_pci_exit(void);
+#else
+static inline int xgbe_pci_init(void) { return 0; }
+static inline void xgbe_pci_exit(void) { }
+#endif
 
 void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *);
 void xgbe_init_function_ptrs_phy(struct xgbe_phy_if *);
+void xgbe_init_function_ptrs_phy_v1(struct xgbe_phy_if *);
+void xgbe_init_function_ptrs_phy_v2(struct xgbe_phy_if *);
 void xgbe_init_function_ptrs_desc(struct xgbe_desc_if *);
+void xgbe_init_function_ptrs_i2c(struct xgbe_i2c_if *);
 const struct net_device_ops *xgbe_get_netdev_ops(void);
 const struct ethtool_ops *xgbe_get_ethtool_ops(void);
 
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_cle.c b/drivers/net/ethernet/apm/xgene/xgene_enet_cle.c
index 23d72af83d82..e1a51d8892fc 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_cle.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_cle.c
@@ -52,6 +52,7 @@ static void xgene_cle_dbptr_to_hw(struct xgene_enet_pdata *pdata,
 {
 	buf[0] = SET_VAL(CLE_DROP, dbptr->drop);
 	buf[4] = SET_VAL(CLE_FPSEL, dbptr->fpsel) |
+		 SET_VAL(CLE_NFPSEL, dbptr->nxtfpsel) |
 		 SET_VAL(CLE_DSTQIDL, dbptr->dstqid);
 
 	buf[5] = SET_VAL(CLE_DSTQIDH, (u32)dbptr->dstqid >> CLE_DSTQIDL_LEN) |
@@ -78,10 +79,10 @@ static void xgene_cle_kn_to_hw(struct xgene_cle_ptree_kn *kn, u32 *buf)
 	}
 }
 
-static void xgene_cle_dn_to_hw(struct xgene_cle_ptree_ewdn *dn,
+static void xgene_cle_dn_to_hw(const struct xgene_cle_ptree_ewdn *dn,
 			       u32 *buf, u32 jb)
 {
-	struct xgene_cle_ptree_branch *br;
+	const struct xgene_cle_ptree_branch *br;
 	u32 i, j = 0;
 	u32 npp;
 
@@ -204,17 +205,385 @@ static int xgene_cle_setup_dbptr(struct xgene_enet_pdata *pdata,
 	return 0;
 }
 
+static const struct xgene_cle_ptree_ewdn xgene_init_ptree_dn[] = {
+	{
+		/* PKT_TYPE_NODE */
+		.node_type = EWDN,
+		.last_node = 0,
+		.hdr_len_store = 1,
+		.hdr_extn = NO_BYTE,
+		.byte_store = NO_BYTE,
+		.search_byte_store = NO_BYTE,
+		.result_pointer = DB_RES_DROP,
+		.num_branches = 2,
+		.branch = {
+			{
+				/* IPV4 */
+				.valid = 1,
+				.next_packet_pointer = 22,
+				.jump_bw = JMP_FW,
+				.jump_rel = JMP_ABS,
+				.operation = EQT,
+				.next_node = PKT_PROT_NODE,
+				.next_branch = 0,
+				.data = 0x8,
+				.mask = 0x0
+			},
+			{
+				.valid = 0,
+				.next_packet_pointer = 262,
+				.jump_bw = JMP_FW,
+				.jump_rel = JMP_ABS,
+				.operation = EQT,
+				.next_node = LAST_NODE,
+				.next_branch = 0,
+				.data = 0x0,
+				.mask = 0xffff
+			}
+		},
+	},
+	{
+		/* PKT_PROT_NODE */
+		.node_type = EWDN,
+		.last_node = 0,
+		.hdr_len_store = 1,
+		.hdr_extn = NO_BYTE,
+		.byte_store = NO_BYTE,
+		.search_byte_store = NO_BYTE,
+		.result_pointer = DB_RES_DROP,
+		.num_branches = 3,
+		.branch = {
+			{
+				/* TCP */
+				.valid = 1,
+				.next_packet_pointer = 26,
+				.jump_bw = JMP_FW,
+				.jump_rel = JMP_ABS,
+				.operation = EQT,
+				.next_node = RSS_IPV4_TCP_NODE,
+				.next_branch = 0,
+				.data = 0x0600,
+				.mask = 0x00ff
+			},
+			{
+				/* UDP */
+				.valid = 1,
+				.next_packet_pointer = 26,
+				.jump_bw = JMP_FW,
+				.jump_rel = JMP_ABS,
+				.operation = EQT,
+				.next_node = RSS_IPV4_UDP_NODE,
+				.next_branch = 0,
+				.data = 0x1100,
+				.mask = 0x00ff
+			},
+			{
+				.valid = 0,
+				.next_packet_pointer = 26,
+				.jump_bw = JMP_FW,
+				.jump_rel = JMP_ABS,
+				.operation = EQT,
+				.next_node = RSS_IPV4_OTHERS_NODE,
+				.next_branch = 0,
+				.data = 0x0,
+				.mask = 0xffff
+			}
+		}
+	},
+	{
+		/* RSS_IPV4_TCP_NODE */
+		.node_type = EWDN,
+		.last_node = 0,
+		.hdr_len_store = 1,
+		.hdr_extn = NO_BYTE,
+		.byte_store = NO_BYTE,
+		.search_byte_store = BOTH_BYTES,
+		.result_pointer = DB_RES_DROP,
+		.num_branches = 6,
+		.branch = {
+			{
+				/* SRC IPV4 B01 */
+				.valid = 0,
+				.next_packet_pointer = 28,
+				.jump_bw = JMP_FW,
+				.jump_rel = JMP_ABS,
+				.operation = EQT,
+				.next_node = RSS_IPV4_TCP_NODE,
+				.next_branch = 1,
+				.data = 0x0,
+				.mask = 0xffff
+			},
+			{
+				/* SRC IPV4 B23 */
+				.valid = 0,
+				.next_packet_pointer = 30,
+				.jump_bw = JMP_FW,
+				.jump_rel = JMP_ABS,
+				.operation = EQT,
+				.next_node = RSS_IPV4_TCP_NODE,
+				.next_branch = 2,
+				.data = 0x0,
+				.mask = 0xffff
+			},
+			{
+				/* DST IPV4 B01 */
+				.valid = 0,
+				.next_packet_pointer = 32,
+				.jump_bw = JMP_FW,
+				.jump_rel = JMP_ABS,
+				.operation = EQT,
+				.next_node = RSS_IPV4_TCP_NODE,
+				.next_branch = 3,
+				.data = 0x0,
+				.mask = 0xffff
+			},
+			{
+				/* DST IPV4 B23 */
+				.valid = 0,
+				.next_packet_pointer = 34,
+				.jump_bw = JMP_FW,
+				.jump_rel = JMP_ABS,
+				.operation = EQT,
+				.next_node = RSS_IPV4_TCP_NODE,
+				.next_branch = 4,
+				.data = 0x0,
+				.mask = 0xffff
+			},
+			{
+				/* TCP SRC Port */
+				.valid = 0,
+				.next_packet_pointer = 36,
+				.jump_bw = JMP_FW,
+				.jump_rel = JMP_ABS,
+				.operation = EQT,
+				.next_node = RSS_IPV4_TCP_NODE,
+				.next_branch = 5,
+				.data = 0x0,
+				.mask = 0xffff
+			},
+			{
+				/* TCP DST Port */
+				.valid = 0,
+				.next_packet_pointer = 256,
+				.jump_bw = JMP_FW,
+				.jump_rel = JMP_ABS,
+				.operation = EQT,
+				.next_node = LAST_NODE,
+				.next_branch = 0,
+				.data = 0x0,
+				.mask = 0xffff
+			}
+		}
+	},
+	{
+		/* RSS_IPV4_UDP_NODE */
+		.node_type = EWDN,
+		.last_node = 0,
+		.hdr_len_store = 1,
+		.hdr_extn = NO_BYTE,
+		.byte_store = NO_BYTE,
+		.search_byte_store = BOTH_BYTES,
+		.result_pointer = DB_RES_DROP,
+		.num_branches = 6,
+		.branch = {
+			{
+				/* SRC IPV4 B01 */
+				.valid = 0,
+				.next_packet_pointer = 28,
+				.jump_bw = JMP_FW,
+				.jump_rel = JMP_ABS,
+				.operation = EQT,
+				.next_node = RSS_IPV4_UDP_NODE,
+				.next_branch = 1,
+				.data = 0x0,
+				.mask = 0xffff
+			},
+			{
+				/* SRC IPV4 B23 */
+				.valid = 0,
+				.next_packet_pointer = 30,
+				.jump_bw = JMP_FW,
+				.jump_rel = JMP_ABS,
+				.operation = EQT,
+				.next_node = RSS_IPV4_UDP_NODE,
+				.next_branch = 2,
+				.data = 0x0,
+				.mask = 0xffff
+			},
+			{
+				/* DST IPV4 B01 */
+				.valid = 0,
+				.next_packet_pointer = 32,
+				.jump_bw = JMP_FW,
+				.jump_rel = JMP_ABS,
+				.operation = EQT,
+				.next_node = RSS_IPV4_UDP_NODE,
+				.next_branch = 3,
+				.data = 0x0,
+				.mask = 0xffff
+			},
+			{
+				/* DST IPV4 B23 */
+				.valid = 0,
+				.next_packet_pointer = 34,
+				.jump_bw = JMP_FW,
+				.jump_rel = JMP_ABS,
+				.operation = EQT,
+				.next_node = RSS_IPV4_UDP_NODE,
+				.next_branch = 4,
+				.data = 0x0,
+				.mask = 0xffff
+			},
+			{
+				/* TCP SRC Port */
+				.valid = 0,
+				.next_packet_pointer = 36,
+				.jump_bw = JMP_FW,
+				.jump_rel = JMP_ABS,
+				.operation = EQT,
+				.next_node = RSS_IPV4_UDP_NODE,
+				.next_branch = 5,
+				.data = 0x0,
+				.mask = 0xffff
+			},
+			{
+				/* TCP DST Port */
+				.valid = 0,
+				.next_packet_pointer = 258,
+				.jump_bw = JMP_FW,
+				.jump_rel = JMP_ABS,
+				.operation = EQT,
+				.next_node = LAST_NODE,
+				.next_branch = 0,
+				.data = 0x0,
+				.mask = 0xffff
+			}
+		}
+	},
+	{
+		/* RSS_IPV4_OTHERS_NODE */
+		.node_type = EWDN,
+		.last_node = 0,
+		.hdr_len_store = 1,
+		.hdr_extn = NO_BYTE,
+		.byte_store = NO_BYTE,
+		.search_byte_store = BOTH_BYTES,
+		.result_pointer = DB_RES_DROP,
+		.num_branches = 6,
+		.branch = {
+			{
+				/* SRC IPV4 B01 */
+				.valid = 0,
+				.next_packet_pointer = 28,
+				.jump_bw = JMP_FW,
+				.jump_rel = JMP_ABS,
+				.operation = EQT,
+				.next_node = RSS_IPV4_OTHERS_NODE,
+				.next_branch = 1,
+				.data = 0x0,
+				.mask = 0xffff
+			},
+			{
+				/* SRC IPV4 B23 */
+				.valid = 0,
+				.next_packet_pointer = 30,
+				.jump_bw = JMP_FW,
+				.jump_rel = JMP_ABS,
+				.operation = EQT,
+				.next_node = RSS_IPV4_OTHERS_NODE,
+				.next_branch = 2,
+				.data = 0x0,
+				.mask = 0xffff
+			},
+			{
+				/* DST IPV4 B01 */
+				.valid = 0,
+				.next_packet_pointer = 32,
+				.jump_bw = JMP_FW,
+				.jump_rel = JMP_ABS,
+				.operation = EQT,
+				.next_node = RSS_IPV4_OTHERS_NODE,
+				.next_branch = 3,
+				.data = 0x0,
+				.mask = 0xffff
+			},
+			{
+				/* DST IPV4 B23 */
+				.valid = 0,
+				.next_packet_pointer = 34,
+				.jump_bw = JMP_FW,
+				.jump_rel = JMP_ABS,
+				.operation = EQT,
+				.next_node = RSS_IPV4_OTHERS_NODE,
+				.next_branch = 4,
+				.data = 0x0,
+				.mask = 0xffff
+			},
+			{
+				/* TCP SRC Port */
+				.valid = 0,
+				.next_packet_pointer = 36,
+				.jump_bw = JMP_FW,
+				.jump_rel = JMP_ABS,
+				.operation = EQT,
+				.next_node = RSS_IPV4_OTHERS_NODE,
+				.next_branch = 5,
+				.data = 0x0,
+				.mask = 0xffff
+			},
+			{
+				/* TCP DST Port */
+				.valid = 0,
+				.next_packet_pointer = 260,
+				.jump_bw = JMP_FW,
+				.jump_rel = JMP_ABS,
+				.operation = EQT,
+				.next_node = LAST_NODE,
+				.next_branch = 0,
+				.data = 0x0,
+				.mask = 0xffff
+			}
+		}
+	},
+
+	{
+		/* LAST NODE */
+		.node_type = EWDN,
+		.last_node = 1,
+		.hdr_len_store = 1,
+		.hdr_extn = NO_BYTE,
+		.byte_store = NO_BYTE,
+		.search_byte_store = NO_BYTE,
+		.result_pointer = DB_RES_DROP,
+		.num_branches = 1,
+		.branch = {
+			{
+				.valid = 0,
+				.next_packet_pointer = 0,
+				.jump_bw = JMP_FW,
+				.jump_rel = JMP_ABS,
+				.operation = EQT,
+				.next_node = MAX_NODES,
+				.next_branch = 0,
+				.data = 0,
+				.mask = 0xffff
+			}
+		}
+	}
+};
+
 static int xgene_cle_setup_node(struct xgene_enet_pdata *pdata,
 				struct xgene_enet_cle *cle)
 {
 	struct xgene_cle_ptree *ptree = &cle->ptree;
-	struct xgene_cle_ptree_ewdn *dn = ptree->dn;
+	const struct xgene_cle_ptree_ewdn *dn = xgene_init_ptree_dn;
+	int num_dn = ARRAY_SIZE(xgene_init_ptree_dn);
 	struct xgene_cle_ptree_kn *kn = ptree->kn;
 	u32 buf[CLE_DRAM_REGS];
 	int i, j, ret;
 
 	memset(buf, 0, sizeof(buf));
-	for (i = 0; i < ptree->num_dn; i++) {
+	for (i = 0; i < num_dn; i++) {
 		xgene_cle_dn_to_hw(&dn[i], buf, cle->jump_bytes);
 		ret = xgene_cle_dram_wr(cle, buf, 17, i + ptree->start_node,
 					PTREE_RAM, CLE_CMD_WR);
@@ -224,8 +593,8 @@ static int xgene_cle_setup_node(struct xgene_enet_pdata *pdata,
 
 	/* continue node index for key node */
 	memset(buf, 0, sizeof(buf));
-	for (j = i; j < (ptree->num_kn + ptree->num_dn); j++) {
-		xgene_cle_kn_to_hw(&kn[j - ptree->num_dn], buf);
+	for (j = i; j < (ptree->num_kn + num_dn); j++) {
+		xgene_cle_kn_to_hw(&kn[j - num_dn], buf);
 		ret = xgene_cle_dram_wr(cle, buf, 17, j + ptree->start_node,
 					PTREE_RAM, CLE_CMD_WR);
 		if (ret)
@@ -346,11 +715,15 @@ static int xgene_cle_set_rss_idt(struct xgene_enet_pdata *pdata)
 	for (i = 0; i < XGENE_CLE_IDT_ENTRIES; i++) {
 		idx = i % pdata->rxq_cnt;
 		pool_id = pdata->rx_ring[idx]->buf_pool->id;
-		fpsel = xgene_enet_ring_bufnum(pool_id) - 0x20;
+		fpsel = xgene_enet_get_fpsel(pool_id);
 		dstqid = xgene_enet_dst_ring_num(pdata->rx_ring[idx]);
 		nfpsel = 0;
-		idt_reg = 0;
+		if (pdata->rx_ring[idx]->page_pool) {
+			pool_id = pdata->rx_ring[idx]->page_pool->id;
+			nfpsel = xgene_enet_get_fpsel(pool_id);
+		}
 
+		idt_reg = 0;
 		xgene_cle_idt_to_hw(pdata, dstqid, fpsel, nfpsel, &idt_reg);
 		ret = xgene_cle_dram_wr(&pdata->cle, &idt_reg, 1, i,
 					RSS_IDT, CLE_CMD_WR);
@@ -400,320 +773,41 @@ static int xgene_cle_setup_rss(struct xgene_enet_pdata *pdata)
 static int xgene_enet_cle_init(struct xgene_enet_pdata *pdata)
 {
 	struct xgene_enet_cle *enet_cle = &pdata->cle;
+	u32 def_qid, def_fpsel, def_nxtfpsel, pool_id;
 	struct xgene_cle_dbptr dbptr[DB_MAX_PTRS];
-	struct xgene_cle_ptree_branch *br;
-	u32 def_qid, def_fpsel, pool_id;
 	struct xgene_cle_ptree *ptree;
 	struct xgene_cle_ptree_kn kn;
 	int ret;
-	struct xgene_cle_ptree_ewdn ptree_dn[] = {
-		{
-			/* PKT_TYPE_NODE */
-			.node_type = EWDN,
-			.last_node = 0,
-			.hdr_len_store = 1,
-			.hdr_extn = NO_BYTE,
-			.byte_store = NO_BYTE,
-			.search_byte_store = NO_BYTE,
-			.result_pointer = DB_RES_DROP,
-			.num_branches = 2,
-			.branch = {
-				{
-					/* IPV4 */
-					.valid = 1,
-					.next_packet_pointer = 22,
-					.jump_bw = JMP_FW,
-					.jump_rel = JMP_ABS,
-					.operation = EQT,
-					.next_node = PKT_PROT_NODE,
-					.next_branch = 0,
-					.data = 0x8,
-					.mask = 0x0
-				},
-				{
-					.valid = 0,
-					.next_packet_pointer = 262,
-					.jump_bw = JMP_FW,
-					.jump_rel = JMP_ABS,
-					.operation = EQT,
-					.next_node = LAST_NODE,
-					.next_branch = 0,
-					.data = 0x0,
-					.mask = 0xffff
-				}
-			},
-		},
-		{
-			/* PKT_PROT_NODE */
-			.node_type = EWDN,
-			.last_node = 0,
-			.hdr_len_store = 1,
-			.hdr_extn = NO_BYTE,
-			.byte_store = NO_BYTE,
-			.search_byte_store = NO_BYTE,
-			.result_pointer = DB_RES_DROP,
-			.num_branches = 3,
-			.branch = {
-				{
-					/* TCP */
-					.valid = 1,
-					.next_packet_pointer = 26,
-					.jump_bw = JMP_FW,
-					.jump_rel = JMP_ABS,
-					.operation = EQT,
-					.next_node = RSS_IPV4_TCP_NODE,
-					.next_branch = 0,
-					.data = 0x0600,
-					.mask = 0x00ff
-				},
-				{
-					/* UDP */
-					.valid = 1,
-					.next_packet_pointer = 26,
-					.jump_bw = JMP_FW,
-					.jump_rel = JMP_ABS,
-					.operation = EQT,
-					.next_node = RSS_IPV4_UDP_NODE,
-					.next_branch = 0,
-					.data = 0x1100,
-					.mask = 0x00ff
-				},
-				{
-					.valid = 0,
-					.next_packet_pointer = 260,
-					.jump_bw = JMP_FW,
-					.jump_rel = JMP_ABS,
-					.operation = EQT,
-					.next_node = LAST_NODE,
-					.next_branch = 0,
-					.data = 0x0,
-					.mask = 0xffff
-				}
-			}
-		},
-		{
-			/* RSS_IPV4_TCP_NODE */
-			.node_type = EWDN,
-			.last_node = 0,
-			.hdr_len_store = 1,
-			.hdr_extn = NO_BYTE,
-			.byte_store = NO_BYTE,
-			.search_byte_store = BOTH_BYTES,
-			.result_pointer = DB_RES_DROP,
-			.num_branches = 6,
-			.branch = {
-				{
-					/* SRC IPV4 B01 */
-					.valid = 0,
-					.next_packet_pointer = 28,
-					.jump_bw = JMP_FW,
-					.jump_rel = JMP_ABS,
-					.operation = EQT,
-					.next_node = RSS_IPV4_TCP_NODE,
-					.next_branch = 1,
-					.data = 0x0,
-					.mask = 0xffff
-				},
-				{
-					/* SRC IPV4 B23 */
-					.valid = 0,
-					.next_packet_pointer = 30,
-					.jump_bw = JMP_FW,
-					.jump_rel = JMP_ABS,
-					.operation = EQT,
-					.next_node = RSS_IPV4_TCP_NODE,
-					.next_branch = 2,
-					.data = 0x0,
-					.mask = 0xffff
-				},
-				{
-					/* DST IPV4 B01 */
-					.valid = 0,
-					.next_packet_pointer = 32,
-					.jump_bw = JMP_FW,
-					.jump_rel = JMP_ABS,
-					.operation = EQT,
-					.next_node = RSS_IPV4_TCP_NODE,
-					.next_branch = 3,
-					.data = 0x0,
-					.mask = 0xffff
-				},
-				{
-					/* DST IPV4 B23 */
-					.valid = 0,
-					.next_packet_pointer = 34,
-					.jump_bw = JMP_FW,
-					.jump_rel = JMP_ABS,
-					.operation = EQT,
-					.next_node = RSS_IPV4_TCP_NODE,
-					.next_branch = 4,
-					.data = 0x0,
-					.mask = 0xffff
-				},
-				{
-					/* TCP SRC Port */
-					.valid = 0,
-					.next_packet_pointer = 36,
-					.jump_bw = JMP_FW,
-					.jump_rel = JMP_ABS,
-					.operation = EQT,
-					.next_node = RSS_IPV4_TCP_NODE,
-					.next_branch = 5,
-					.data = 0x0,
-					.mask = 0xffff
-				},
-				{
-					/* TCP DST Port */
-					.valid = 0,
-					.next_packet_pointer = 256,
-					.jump_bw = JMP_FW,
-					.jump_rel = JMP_ABS,
-					.operation = EQT,
-					.next_node = LAST_NODE,
-					.next_branch = 0,
-					.data = 0x0,
-					.mask = 0xffff
-				}
-			}
-		},
-		{
-			/* RSS_IPV4_UDP_NODE */
-			.node_type = EWDN,
-			.last_node = 0,
-			.hdr_len_store = 1,
-			.hdr_extn = NO_BYTE,
-			.byte_store = NO_BYTE,
-			.search_byte_store = BOTH_BYTES,
-			.result_pointer = DB_RES_DROP,
-			.num_branches = 6,
-			.branch = {
-				{
-					/* SRC IPV4 B01 */
-					.valid = 0,
-					.next_packet_pointer = 28,
-					.jump_bw = JMP_FW,
-					.jump_rel = JMP_ABS,
-					.operation = EQT,
-					.next_node = RSS_IPV4_UDP_NODE,
-					.next_branch = 1,
-					.data = 0x0,
-					.mask = 0xffff
-				},
-				{
-					/* SRC IPV4 B23 */
-					.valid = 0,
-					.next_packet_pointer = 30,
-					.jump_bw = JMP_FW,
-					.jump_rel = JMP_ABS,
-					.operation = EQT,
-					.next_node = RSS_IPV4_UDP_NODE,
-					.next_branch = 2,
-					.data = 0x0,
-					.mask = 0xffff
-				},
-				{
-					/* DST IPV4 B01 */
-					.valid = 0,
-					.next_packet_pointer = 32,
-					.jump_bw = JMP_FW,
-					.jump_rel = JMP_ABS,
-					.operation = EQT,
-					.next_node = RSS_IPV4_UDP_NODE,
-					.next_branch = 3,
-					.data = 0x0,
-					.mask = 0xffff
-				},
-				{
-					/* DST IPV4 B23 */
-					.valid = 0,
-					.next_packet_pointer = 34,
-					.jump_bw = JMP_FW,
-					.jump_rel = JMP_ABS,
-					.operation = EQT,
-					.next_node = RSS_IPV4_UDP_NODE,
-					.next_branch = 4,
-					.data = 0x0,
-					.mask = 0xffff
-				},
-				{
-					/* TCP SRC Port */
-					.valid = 0,
-					.next_packet_pointer = 36,
-					.jump_bw = JMP_FW,
-					.jump_rel = JMP_ABS,
-					.operation = EQT,
-					.next_node = RSS_IPV4_UDP_NODE,
-					.next_branch = 5,
-					.data = 0x0,
-					.mask = 0xffff
-				},
-				{
-					/* TCP DST Port */
-					.valid = 0,
-					.next_packet_pointer = 258,
-					.jump_bw = JMP_FW,
-					.jump_rel = JMP_ABS,
-					.operation = EQT,
-					.next_node = LAST_NODE,
-					.next_branch = 0,
-					.data = 0x0,
-					.mask = 0xffff
-				}
-			}
-		},
-		{
-			/* LAST NODE */
-			.node_type = EWDN,
-			.last_node = 1,
-			.hdr_len_store = 1,
-			.hdr_extn = NO_BYTE,
-			.byte_store = NO_BYTE,
-			.search_byte_store = NO_BYTE,
-			.result_pointer = DB_RES_DROP,
-			.num_branches = 1,
-			.branch = {
-				{
-					.valid = 0,
-					.next_packet_pointer = 0,
-					.jump_bw = JMP_FW,
-					.jump_rel = JMP_ABS,
-					.operation = EQT,
-					.next_node = MAX_NODES,
-					.next_branch = 0,
-					.data = 0,
-					.mask = 0xffff
-				}
-			}
-		}
-	};
+
+	if (pdata->phy_mode != PHY_INTERFACE_MODE_XGMII)
+		return -EINVAL;
 
 	ptree = &enet_cle->ptree;
 	ptree->start_pkt = 12; /* Ethertype */
-	if (pdata->phy_mode == PHY_INTERFACE_MODE_XGMII) {
-		ret = xgene_cle_setup_rss(pdata);
-		if (ret) {
-			netdev_err(pdata->ndev, "RSS initialization failed\n");
-			return ret;
-		}
-	} else {
-		br = &ptree_dn[PKT_PROT_NODE].branch[0];
-		br->valid = 0;
-		br->next_packet_pointer = 260;
-		br->next_node = LAST_NODE;
-		br->data = 0x0000;
-		br->mask = 0xffff;
+
+	ret = xgene_cle_setup_rss(pdata);
+	if (ret) {
+		netdev_err(pdata->ndev, "RSS initialization failed\n");
+		return ret;
 	}
 
 	def_qid = xgene_enet_dst_ring_num(pdata->rx_ring[0]);
 	pool_id = pdata->rx_ring[0]->buf_pool->id;
-	def_fpsel = xgene_enet_ring_bufnum(pool_id) - 0x20;
+	def_fpsel = xgene_enet_get_fpsel(pool_id);
+	def_nxtfpsel = 0;
+	if (pdata->rx_ring[0]->page_pool) {
+		pool_id = pdata->rx_ring[0]->page_pool->id;
+		def_nxtfpsel = xgene_enet_get_fpsel(pool_id);
+	}
 
 	memset(dbptr, 0, sizeof(struct xgene_cle_dbptr) * DB_MAX_PTRS);
 	dbptr[DB_RES_ACCEPT].fpsel =  def_fpsel;
+	dbptr[DB_RES_ACCEPT].nxtfpsel = def_nxtfpsel;
 	dbptr[DB_RES_ACCEPT].dstqid = def_qid;
 	dbptr[DB_RES_ACCEPT].cle_priority = 1;
 
 	dbptr[DB_RES_DEF].fpsel = def_fpsel;
+	dbptr[DB_RES_DEF].nxtfpsel = def_nxtfpsel;
 	dbptr[DB_RES_DEF].dstqid = def_qid;
 	dbptr[DB_RES_DEF].cle_priority = 7;
 	xgene_cle_setup_def_dbptr(pdata, enet_cle, &dbptr[DB_RES_DEF],
@@ -727,10 +821,8 @@ static int xgene_enet_cle_init(struct xgene_enet_pdata *pdata)
 	kn.key[0].priority = 0;
 	kn.key[0].result_pointer = DB_RES_ACCEPT;
 
-	ptree->dn = ptree_dn;
 	ptree->kn = &kn;
 	ptree->dbptr = dbptr;
-	ptree->num_dn = MAX_NODES;
 	ptree->num_kn = 1;
 	ptree->num_dbptr = DB_MAX_PTRS;
 
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_cle.h b/drivers/net/ethernet/apm/xgene/xgene_enet_cle.h
index 9ac9f8e145ec..18fe8d56082c 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_cle.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_cle.h
@@ -91,6 +91,8 @@
 #define CLE_DSTQIDH_LEN		5
 #define CLE_FPSEL_POS		21
 #define CLE_FPSEL_LEN		4
+#define CLE_NFPSEL_POS		17
+#define CLE_NFPSEL_LEN		4
 #define CLE_PRIORITY_POS	5
 #define CLE_PRIORITY_LEN	3
 
@@ -104,6 +106,7 @@ enum xgene_cle_ptree_nodes {
 	PKT_PROT_NODE,
 	RSS_IPV4_TCP_NODE,
 	RSS_IPV4_UDP_NODE,
+	RSS_IPV4_OTHERS_NODE,
 	LAST_NODE,
 	MAX_NODES
 };
@@ -275,10 +278,8 @@ struct xgene_cle_dbptr {
 };
 
 struct xgene_cle_ptree {
-	struct xgene_cle_ptree_ewdn *dn;
 	struct xgene_cle_ptree_kn *kn;
 	struct xgene_cle_dbptr *dbptr;
-	u32 num_dn;
 	u32 num_kn;
 	u32 num_dbptr;
 	u32 start_node;
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c b/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c
index d372d4235c81..28fdedc30b74 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c
@@ -163,6 +163,74 @@ static void xgene_get_ethtool_stats(struct net_device *ndev,
 		*data++ = *(u64 *)(pdata + gstrings_stats[i].offset);
 }
 
+static void xgene_get_pauseparam(struct net_device *ndev,
+				 struct ethtool_pauseparam *pp)
+{
+	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
+
+	pp->autoneg = pdata->pause_autoneg;
+	pp->tx_pause = pdata->tx_pause;
+	pp->rx_pause = pdata->rx_pause;
+}
+
+static int xgene_set_pauseparam(struct net_device *ndev,
+				struct ethtool_pauseparam *pp)
+{
+	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
+	struct phy_device *phydev = ndev->phydev;
+	u32 oldadv, newadv;
+
+	if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII ||
+	    pdata->phy_mode == PHY_INTERFACE_MODE_SGMII) {
+		if (!phydev)
+			return -EINVAL;
+
+		if (!(phydev->supported & SUPPORTED_Pause) ||
+		    (!(phydev->supported & SUPPORTED_Asym_Pause) &&
+		     pp->rx_pause != pp->tx_pause))
+			return -EINVAL;
+
+		pdata->pause_autoneg = pp->autoneg;
+		pdata->tx_pause = pp->tx_pause;
+		pdata->rx_pause = pp->rx_pause;
+
+		oldadv = phydev->advertising;
+		newadv = oldadv & ~(ADVERTISED_Pause | ADVERTISED_Asym_Pause);
+
+		if (pp->rx_pause)
+			newadv |= ADVERTISED_Pause | ADVERTISED_Asym_Pause;
+
+		if (pp->tx_pause)
+			newadv ^= ADVERTISED_Asym_Pause;
+
+		if (oldadv ^ newadv) {
+			phydev->advertising = newadv;
+
+			if (phydev->autoneg)
+				return phy_start_aneg(phydev);
+
+			if (!pp->autoneg) {
+				pdata->mac_ops->flowctl_tx(pdata,
+							   pdata->tx_pause);
+				pdata->mac_ops->flowctl_rx(pdata,
+							   pdata->rx_pause);
+			}
+		}
+
+	} else {
+		if (pp->autoneg)
+			return -EINVAL;
+
+		pdata->tx_pause = pp->tx_pause;
+		pdata->rx_pause = pp->rx_pause;
+
+		pdata->mac_ops->flowctl_tx(pdata, pdata->tx_pause);
+		pdata->mac_ops->flowctl_rx(pdata, pdata->rx_pause);
+	}
+
+	return 0;
+}
+
 static const struct ethtool_ops xgene_ethtool_ops = {
 	.get_drvinfo = xgene_get_drvinfo,
 	.get_link = ethtool_op_get_link,
@@ -171,6 +239,8 @@ static const struct ethtool_ops xgene_ethtool_ops = {
 	.get_ethtool_stats = xgene_get_ethtool_stats,
 	.get_link_ksettings = xgene_get_link_ksettings,
 	.set_link_ksettings = xgene_set_link_ksettings,
+	.get_pauseparam = xgene_get_pauseparam,
+	.set_pauseparam = xgene_set_pauseparam
 };
 
 void xgene_enet_set_ethtool_ops(struct net_device *ndev)
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
index 5390ae89136c..06e681697c17 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c
@@ -504,6 +504,56 @@ static void xgene_gmac_set_speed(struct xgene_enet_pdata *pdata)
 	xgene_enet_wr_mcx_csr(pdata, ICM_CONFIG2_REG_0_ADDR, icm2);
 }
 
+static void xgene_enet_set_frame_size(struct xgene_enet_pdata *pdata, int size)
+{
+	xgene_enet_wr_mcx_mac(pdata, MAX_FRAME_LEN_ADDR, size);
+}
+
+static void xgene_gmac_enable_tx_pause(struct xgene_enet_pdata *pdata,
+				       bool enable)
+{
+	u32 data;
+
+	xgene_enet_rd_mcx_csr(pdata, CSR_ECM_CFG_0_ADDR, &data);
+
+	if (enable)
+		data |= MULTI_DPF_AUTOCTRL | PAUSE_XON_EN;
+	else
+		data &= ~(MULTI_DPF_AUTOCTRL | PAUSE_XON_EN);
+
+	xgene_enet_wr_mcx_csr(pdata, CSR_ECM_CFG_0_ADDR, data);
+}
+
+static void xgene_gmac_flowctl_tx(struct xgene_enet_pdata *pdata, bool enable)
+{
+	u32 data;
+
+	xgene_enet_rd_mcx_mac(pdata, MAC_CONFIG_1_ADDR, &data);
+
+	if (enable)
+		data |= TX_FLOW_EN;
+	else
+		data &= ~TX_FLOW_EN;
+
+	xgene_enet_wr_mcx_mac(pdata, MAC_CONFIG_1_ADDR, data);
+
+	pdata->mac_ops->enable_tx_pause(pdata, enable);
+}
+
+static void xgene_gmac_flowctl_rx(struct xgene_enet_pdata *pdata, bool enable)
+{
+	u32 data;
+
+	xgene_enet_rd_mcx_mac(pdata, MAC_CONFIG_1_ADDR, &data);
+
+	if (enable)
+		data |= RX_FLOW_EN;
+	else
+		data &= ~RX_FLOW_EN;
+
+	xgene_enet_wr_mcx_mac(pdata, MAC_CONFIG_1_ADDR, data);
+}
+
 static void xgene_gmac_init(struct xgene_enet_pdata *pdata)
 {
 	u32 value;
@@ -527,6 +577,17 @@ static void xgene_gmac_init(struct xgene_enet_pdata *pdata)
 	/* Rtype should be copied from FP */
 	xgene_enet_wr_csr(pdata, RSIF_RAM_DBG_REG0_ADDR, 0);
 
+	/* Configure HW pause frame generation */
+	xgene_enet_rd_mcx_csr(pdata, CSR_MULTI_DPF0_ADDR, &value);
+	value = (DEF_QUANTA << 16) | (value & 0xFFFF);
+	xgene_enet_wr_mcx_csr(pdata, CSR_MULTI_DPF0_ADDR, value);
+
+	xgene_enet_wr_csr(pdata, RXBUF_PAUSE_THRESH, DEF_PAUSE_THRES);
+	xgene_enet_wr_csr(pdata, RXBUF_PAUSE_OFF_THRESH, DEF_PAUSE_OFF_THRES);
+
+	xgene_gmac_flowctl_tx(pdata, pdata->tx_pause);
+	xgene_gmac_flowctl_rx(pdata, pdata->rx_pause);
+
 	/* Rx-Tx traffic resume */
 	xgene_enet_wr_csr(pdata, CFG_LINK_AGGR_RESUME_0_ADDR, TX_PORT0);
 
@@ -550,12 +611,14 @@ static void xgene_enet_config_ring_if_assoc(struct xgene_enet_pdata *pdata)
 }
 
 static void xgene_enet_cle_bypass(struct xgene_enet_pdata *pdata,
-				  u32 dst_ring_num, u16 bufpool_id)
+				  u32 dst_ring_num, u16 bufpool_id,
+				  u16 nxtbufpool_id)
 {
 	u32 cb;
-	u32 fpsel;
+	u32 fpsel, nxtfpsel;
 
-	fpsel = xgene_enet_ring_bufnum(bufpool_id) - 0x20;
+	fpsel = xgene_enet_get_fpsel(bufpool_id);
+	nxtfpsel = xgene_enet_get_fpsel(nxtbufpool_id);
 
 	xgene_enet_rd_csr(pdata, CLE_BYPASS_REG0_0_ADDR, &cb);
 	cb |= CFG_CLE_BYPASS_EN0;
@@ -565,6 +628,7 @@ static void xgene_enet_cle_bypass(struct xgene_enet_pdata *pdata,
 	xgene_enet_rd_csr(pdata, CLE_BYPASS_REG1_0_ADDR, &cb);
 	CFG_CLE_DSTQID0_SET(&cb, dst_ring_num);
 	CFG_CLE_FPSEL0_SET(&cb, fpsel);
+	CFG_CLE_NXTFPSEL0_SET(&cb, nxtfpsel);
 	xgene_enet_wr_csr(pdata, CLE_BYPASS_REG1_0_ADDR, cb);
 }
 
@@ -652,16 +716,14 @@ static int xgene_enet_reset(struct xgene_enet_pdata *pdata)
 static void xgene_enet_clear(struct xgene_enet_pdata *pdata,
 			     struct xgene_enet_desc_ring *ring)
 {
-	u32 addr, val, data;
-
-	val = xgene_enet_ring_bufnum(ring->id);
+	u32 addr, data;
 
 	if (xgene_enet_is_bufpool(ring->id)) {
 		addr = ENET_CFGSSQMIFPRESET_ADDR;
-		data = BIT(val - 0x20);
+		data = BIT(xgene_enet_get_fpsel(ring->id));
 	} else {
 		addr = ENET_CFGSSQMIWQRESET_ADDR;
-		data = BIT(val);
+		data = BIT(xgene_enet_ring_bufnum(ring->id));
 	}
 
 	xgene_enet_wr_ring_if(pdata, addr, data);
@@ -671,24 +733,24 @@ static void xgene_gport_shutdown(struct xgene_enet_pdata *pdata)
 {
 	struct device *dev = &pdata->pdev->dev;
 	struct xgene_enet_desc_ring *ring;
-	u32 pb, val;
+	u32 pb;
 	int i;
 
 	pb = 0;
 	for (i = 0; i < pdata->rxq_cnt; i++) {
 		ring = pdata->rx_ring[i]->buf_pool;
+		pb |= BIT(xgene_enet_get_fpsel(ring->id));
+		ring = pdata->rx_ring[i]->page_pool;
+		if (ring)
+			pb |= BIT(xgene_enet_get_fpsel(ring->id));
 
-		val = xgene_enet_ring_bufnum(ring->id);
-		pb |= BIT(val - 0x20);
 	}
 	xgene_enet_wr_ring_if(pdata, ENET_CFGSSQMIFPRESET_ADDR, pb);
 
 	pb = 0;
 	for (i = 0; i < pdata->txq_cnt; i++) {
 		ring = pdata->tx_ring[i];
-
-		val = xgene_enet_ring_bufnum(ring->id);
-		pb |= BIT(val);
+		pb |= BIT(xgene_enet_ring_bufnum(ring->id));
 	}
 	xgene_enet_wr_ring_if(pdata, ENET_CFGSSQMIWQRESET_ADDR, pb);
 
@@ -698,6 +760,48 @@ static void xgene_gport_shutdown(struct xgene_enet_pdata *pdata)
 	}
 }
 
+static u32 xgene_enet_flowctrl_cfg(struct net_device *ndev)
+{
+	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
+	struct phy_device *phydev = ndev->phydev;
+	u16 lcladv, rmtadv = 0;
+	u32 rx_pause, tx_pause;
+	u8 flowctl = 0;
+
+	if (!phydev->duplex || !pdata->pause_autoneg)
+		return 0;
+
+	if (pdata->tx_pause)
+		flowctl |= FLOW_CTRL_TX;
+
+	if (pdata->rx_pause)
+		flowctl |= FLOW_CTRL_RX;
+
+	lcladv = mii_advertise_flowctrl(flowctl);
+
+	if (phydev->pause)
+		rmtadv = LPA_PAUSE_CAP;
+
+	if (phydev->asym_pause)
+		rmtadv |= LPA_PAUSE_ASYM;
+
+	flowctl = mii_resolve_flowctrl_fdx(lcladv, rmtadv);
+	tx_pause = !!(flowctl & FLOW_CTRL_TX);
+	rx_pause = !!(flowctl & FLOW_CTRL_RX);
+
+	if (tx_pause != pdata->tx_pause) {
+		pdata->tx_pause = tx_pause;
+		pdata->mac_ops->flowctl_tx(pdata, pdata->tx_pause);
+	}
+
+	if (rx_pause != pdata->rx_pause) {
+		pdata->rx_pause = rx_pause;
+		pdata->mac_ops->flowctl_rx(pdata, pdata->rx_pause);
+	}
+
+	return 0;
+}
+
 static void xgene_enet_adjust_link(struct net_device *ndev)
 {
 	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
@@ -712,6 +816,8 @@ static void xgene_enet_adjust_link(struct net_device *ndev)
 			mac_ops->tx_enable(pdata);
 			phy_print_status(phydev);
 		}
+
+		xgene_enet_flowctrl_cfg(ndev);
 	} else {
 		mac_ops->rx_disable(pdata);
 		mac_ops->tx_disable(pdata);
@@ -785,6 +891,8 @@ int xgene_enet_phy_connect(struct net_device *ndev)
 	phy_dev->supported &= ~SUPPORTED_10baseT_Half &
 			      ~SUPPORTED_100baseT_Half &
 			      ~SUPPORTED_1000baseT_Half;
+	phy_dev->supported |= SUPPORTED_Pause |
+			      SUPPORTED_Asym_Pause;
 	phy_dev->advertising = phy_dev->supported;
 
 	return 0;
@@ -902,6 +1010,10 @@ const struct xgene_mac_ops xgene_gmac_ops = {
 	.tx_disable = xgene_gmac_tx_disable,
 	.set_speed = xgene_gmac_set_speed,
 	.set_mac_addr = xgene_gmac_set_mac_addr,
+	.set_framesize = xgene_enet_set_frame_size,
+	.enable_tx_pause = xgene_gmac_enable_tx_pause,
+	.flowctl_tx     = xgene_gmac_flowctl_tx,
+	.flowctl_rx     = xgene_gmac_flowctl_rx,
 };
 
 const struct xgene_port_ops xgene_gport_ops = {
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h
index 06e598c8bc16..5f83037bb96b 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h
@@ -165,10 +165,23 @@ enum xgene_enet_rm {
 #define CFG_CLE_IP_PROTOCOL0_SET(dst, val)	xgene_set_bits(dst, val, 16, 2)
 #define CFG_CLE_DSTQID0_SET(dst, val)		xgene_set_bits(dst, val, 0, 12)
 #define CFG_CLE_FPSEL0_SET(dst, val)		xgene_set_bits(dst, val, 16, 4)
+#define CFG_CLE_NXTFPSEL0_SET(dst, val)		xgene_set_bits(dst, val, 20, 4)
 #define CFG_MACMODE_SET(dst, val)		xgene_set_bits(dst, val, 18, 2)
 #define CFG_WAITASYNCRD_SET(dst, val)		xgene_set_bits(dst, val, 0, 16)
-#define CFG_CLE_DSTQID0(val)		(val & GENMASK(11, 0))
-#define CFG_CLE_FPSEL0(val)		((val << 16) & GENMASK(19, 16))
+#define CFG_CLE_DSTQID0(val)		((val) & GENMASK(11, 0))
+#define CFG_CLE_FPSEL0(val)		(((val) << 16) & GENMASK(19, 16))
+#define CSR_ECM_CFG_0_ADDR		0x0220
+#define CSR_ECM_CFG_1_ADDR		0x0224
+#define CSR_MULTI_DPF0_ADDR		0x0230
+#define RXBUF_PAUSE_THRESH		0x0534
+#define RXBUF_PAUSE_OFF_THRESH		0x0540
+#define DEF_PAUSE_THRES			0x7d
+#define DEF_PAUSE_OFF_THRES		0x6d
+#define DEF_QUANTA			0x8000
+#define NORM_PAUSE_OPCODE		0x0001
+#define PAUSE_XON_EN			BIT(30)
+#define MULTI_DPF_AUTOCTRL		BIT(28)
+#define CFG_CLE_NXTFPSEL0(val)		(((val) << 20) & GENMASK(23, 20))
 #define ICM_CONFIG0_REG_0_ADDR		0x0400
 #define ICM_CONFIG2_REG_0_ADDR		0x0410
 #define RX_DV_GATE_REG_0_ADDR		0x05fc
@@ -196,6 +209,8 @@ enum xgene_enet_rm {
 #define SOFT_RESET1			BIT(31)
 #define TX_EN				BIT(0)
 #define RX_EN				BIT(2)
+#define TX_FLOW_EN			BIT(4)
+#define RX_FLOW_EN			BIT(5)
 #define ENET_LHD_MODE			BIT(25)
 #define ENET_GHD_MODE			BIT(26)
 #define FULL_DUPLEX2			BIT(0)
@@ -346,6 +361,14 @@ static inline bool xgene_enet_is_bufpool(u16 id)
 	return ((id & RING_BUFNUM_MASK) >= 0x20) ? true : false;
 }
 
+static inline u8 xgene_enet_get_fpsel(u16 id)
+{
+	if (xgene_enet_is_bufpool(id))
+		return xgene_enet_ring_bufnum(id) - RING_BUFNUM_BUFPOOL;
+
+	return 0;
+}
+
 static inline u16 xgene_enet_get_numslots(u16 id, u32 size)
 {
 	bool is_bufpool = xgene_enet_is_bufpool(id);
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
index 8158d4698734..523b8eff6d7b 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c
@@ -37,6 +37,9 @@ static void xgene_enet_init_bufpool(struct xgene_enet_desc_ring *buf_pool)
 	struct xgene_enet_raw_desc16 *raw_desc;
 	int i;
 
+	if (!buf_pool)
+		return;
+
 	for (i = 0; i < buf_pool->slots; i++) {
 		raw_desc = &buf_pool->raw_desc16[i];
 
@@ -47,6 +50,86 @@ static void xgene_enet_init_bufpool(struct xgene_enet_desc_ring *buf_pool)
 	}
 }
 
+static u16 xgene_enet_get_data_len(u64 bufdatalen)
+{
+	u16 hw_len, mask;
+
+	hw_len = GET_VAL(BUFDATALEN, bufdatalen);
+
+	if (unlikely(hw_len == 0x7800)) {
+		return 0;
+	} else if (!(hw_len & BIT(14))) {
+		mask = GENMASK(13, 0);
+		return (hw_len & mask) ? (hw_len & mask) : SIZE_16K;
+	} else if (!(hw_len & GENMASK(13, 12))) {
+		mask = GENMASK(11, 0);
+		return (hw_len & mask) ? (hw_len & mask) : SIZE_4K;
+	} else {
+		mask = GENMASK(11, 0);
+		return (hw_len & mask) ? (hw_len & mask) : SIZE_2K;
+	}
+}
+
+static u16 xgene_enet_set_data_len(u32 size)
+{
+	u16 hw_len;
+
+	hw_len =  (size == SIZE_4K) ? BIT(14) : 0;
+
+	return hw_len;
+}
+
+static int xgene_enet_refill_pagepool(struct xgene_enet_desc_ring *buf_pool,
+				      u32 nbuf)
+{
+	struct xgene_enet_raw_desc16 *raw_desc;
+	struct xgene_enet_pdata *pdata;
+	struct net_device *ndev;
+	dma_addr_t dma_addr;
+	struct device *dev;
+	struct page *page;
+	u32 slots, tail;
+	u16 hw_len;
+	int i;
+
+	if (unlikely(!buf_pool))
+		return 0;
+
+	ndev = buf_pool->ndev;
+	pdata = netdev_priv(ndev);
+	dev = ndev_to_dev(ndev);
+	slots = buf_pool->slots - 1;
+	tail = buf_pool->tail;
+
+	for (i = 0; i < nbuf; i++) {
+		raw_desc = &buf_pool->raw_desc16[tail];
+
+		page = dev_alloc_page();
+		if (unlikely(!page))
+			return -ENOMEM;
+
+		dma_addr = dma_map_page(dev, page, 0,
+					PAGE_SIZE, DMA_FROM_DEVICE);
+		if (unlikely(dma_mapping_error(dev, dma_addr))) {
+			put_page(page);
+			return -ENOMEM;
+		}
+
+		hw_len = xgene_enet_set_data_len(PAGE_SIZE);
+		raw_desc->m1 = cpu_to_le64(SET_VAL(DATAADDR, dma_addr) |
+					   SET_VAL(BUFDATALEN, hw_len) |
+					   SET_BIT(COHERENT));
+
+		buf_pool->frag_page[tail] = page;
+		tail = (tail + 1) & slots;
+	}
+
+	pdata->ring_ops->wr_cmd(buf_pool, nbuf);
+	buf_pool->tail = tail;
+
+	return 0;
+}
+
 static int xgene_enet_refill_bufpool(struct xgene_enet_desc_ring *buf_pool,
 				     u32 nbuf)
 {
@@ -64,8 +147,9 @@ static int xgene_enet_refill_bufpool(struct xgene_enet_desc_ring *buf_pool,
 	ndev = buf_pool->ndev;
 	dev = ndev_to_dev(buf_pool->ndev);
 	pdata = netdev_priv(ndev);
+
 	bufdatalen = BUF_LEN_CODE_2K | (SKB_BUFFER_SIZE & GENMASK(11, 0));
-	len = XGENE_ENET_MAX_MTU;
+	len = XGENE_ENET_STD_MTU;
 
 	for (i = 0; i < nbuf; i++) {
 		raw_desc = &buf_pool->raw_desc16[tail];
@@ -122,6 +206,25 @@ static void xgene_enet_delete_bufpool(struct xgene_enet_desc_ring *buf_pool)
 	}
 }
 
+static void xgene_enet_delete_pagepool(struct xgene_enet_desc_ring *buf_pool)
+{
+	struct device *dev = ndev_to_dev(buf_pool->ndev);
+	dma_addr_t dma_addr;
+	struct page *page;
+	int i;
+
+	/* Free up the buffers held by hardware */
+	for (i = 0; i < buf_pool->slots; i++) {
+		page = buf_pool->frag_page[i];
+		if (page) {
+			dma_addr = buf_pool->frag_dma_addr[i];
+			dma_unmap_page(dev, dma_addr, PAGE_SIZE,
+				       DMA_FROM_DEVICE);
+			put_page(page);
+		}
+	}
+}
+
 static irqreturn_t xgene_enet_rx_irq(const int irq, void *data)
 {
 	struct xgene_enet_desc_ring *rx_ring = data;
@@ -216,11 +319,11 @@ static int xgene_enet_setup_mss(struct net_device *ndev, u32 mss)
 		}
 	}
 
-	spin_unlock(&pdata->mss_lock);
-
 	/* No slots with ref_count = 0 available, return busy */
 	if (!mss_index_found)
-		return -EBUSY;
+		mss_index = -EBUSY;
+
+	spin_unlock(&pdata->mss_lock);
 
 	return mss_index;
 }
@@ -515,23 +618,67 @@ static void xgene_enet_skip_csum(struct sk_buff *skb)
 	}
 }
 
+static void xgene_enet_free_pagepool(struct xgene_enet_desc_ring *buf_pool,
+				     struct xgene_enet_raw_desc *raw_desc,
+				     struct xgene_enet_raw_desc *exp_desc)
+{
+	__le64 *desc = (void *)exp_desc;
+	dma_addr_t dma_addr;
+	struct device *dev;
+	struct page *page;
+	u16 slots, head;
+	u32 frag_size;
+	int i;
+
+	if (!buf_pool || !raw_desc || !exp_desc ||
+	    (!GET_VAL(NV, le64_to_cpu(raw_desc->m0))))
+		return;
+
+	dev = ndev_to_dev(buf_pool->ndev);
+	slots = buf_pool->slots - 1;
+	head = buf_pool->head;
+
+	for (i = 0; i < 4; i++) {
+		frag_size = xgene_enet_get_data_len(le64_to_cpu(desc[i ^ 1]));
+		if (!frag_size)
+			break;
+
+		dma_addr = GET_VAL(DATAADDR, le64_to_cpu(desc[i ^ 1]));
+		dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_FROM_DEVICE);
+
+		page = buf_pool->frag_page[head];
+		put_page(page);
+
+		buf_pool->frag_page[head] = NULL;
+		head = (head + 1) & slots;
+	}
+	buf_pool->head = head;
+}
+
 static int xgene_enet_rx_frame(struct xgene_enet_desc_ring *rx_ring,
-			       struct xgene_enet_raw_desc *raw_desc)
+			       struct xgene_enet_raw_desc *raw_desc,
+			       struct xgene_enet_raw_desc *exp_desc)
 {
+	struct xgene_enet_desc_ring *buf_pool, *page_pool;
+	u32 datalen, frag_size, skb_index;
 	struct net_device *ndev;
-	struct device *dev;
-	struct xgene_enet_desc_ring *buf_pool;
-	u32 datalen, skb_index;
+	dma_addr_t dma_addr;
 	struct sk_buff *skb;
+	struct device *dev;
+	struct page *page;
+	u16 slots, head;
+	int i, ret = 0;
+	__le64 *desc;
 	u8 status;
-	int ret = 0;
+	bool nv;
 
 	ndev = rx_ring->ndev;
 	dev = ndev_to_dev(rx_ring->ndev);
 	buf_pool = rx_ring->buf_pool;
+	page_pool = rx_ring->page_pool;
 
 	dma_unmap_single(dev, GET_VAL(DATAADDR, le64_to_cpu(raw_desc->m1)),
-			 XGENE_ENET_MAX_MTU, DMA_FROM_DEVICE);
+			 XGENE_ENET_STD_MTU, DMA_FROM_DEVICE);
 	skb_index = GET_VAL(USERINFO, le64_to_cpu(raw_desc->m0));
 	skb = buf_pool->rx_skb[skb_index];
 	buf_pool->rx_skb[skb_index] = NULL;
@@ -541,6 +688,7 @@ static int xgene_enet_rx_frame(struct xgene_enet_desc_ring *rx_ring,
 		  GET_VAL(LERR, le64_to_cpu(raw_desc->m0));
 	if (unlikely(status > 2)) {
 		dev_kfree_skb_any(skb);
+		xgene_enet_free_pagepool(page_pool, raw_desc, exp_desc);
 		xgene_enet_parse_error(rx_ring, netdev_priv(rx_ring->ndev),
 				       status);
 		ret = -EIO;
@@ -548,11 +696,44 @@ static int xgene_enet_rx_frame(struct xgene_enet_desc_ring *rx_ring,
 	}
 
 	/* strip off CRC as HW isn't doing this */
-	datalen = GET_VAL(BUFDATALEN, le64_to_cpu(raw_desc->m1));
-	datalen = (datalen & DATALEN_MASK) - 4;
-	prefetch(skb->data - NET_IP_ALIGN);
+	datalen = xgene_enet_get_data_len(le64_to_cpu(raw_desc->m1));
+
+	nv = GET_VAL(NV, le64_to_cpu(raw_desc->m0));
+	if (!nv)
+		datalen -= 4;
+
 	skb_put(skb, datalen);
+	prefetch(skb->data - NET_IP_ALIGN);
+
+	if (!nv)
+		goto skip_jumbo;
 
+	slots = page_pool->slots - 1;
+	head = page_pool->head;
+	desc = (void *)exp_desc;
+
+	for (i = 0; i < 4; i++) {
+		frag_size = xgene_enet_get_data_len(le64_to_cpu(desc[i ^ 1]));
+		if (!frag_size)
+			break;
+
+		dma_addr = GET_VAL(DATAADDR, le64_to_cpu(desc[i ^ 1]));
+		dma_unmap_page(dev, dma_addr, PAGE_SIZE, DMA_FROM_DEVICE);
+
+		page = page_pool->frag_page[head];
+		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, 0,
+				frag_size, PAGE_SIZE);
+
+		datalen += frag_size;
+
+		page_pool->frag_page[head] = NULL;
+		head = (head + 1) & slots;
+	}
+
+	page_pool->head = head;
+	rx_ring->npagepool -= skb_shinfo(skb)->nr_frags;
+
+skip_jumbo:
 	skb_checksum_none_assert(skb);
 	skb->protocol = eth_type_trans(skb, ndev);
 	if (likely((ndev->features & NETIF_F_IP_CSUM) &&
@@ -563,7 +744,15 @@ static int xgene_enet_rx_frame(struct xgene_enet_desc_ring *rx_ring,
 	rx_ring->rx_packets++;
 	rx_ring->rx_bytes += datalen;
 	napi_gro_receive(&rx_ring->napi, skb);
+
 out:
+	if (rx_ring->npagepool <= 0) {
+		ret = xgene_enet_refill_pagepool(page_pool, NUM_NXTBUFPOOL);
+		rx_ring->npagepool = NUM_NXTBUFPOOL;
+		if (ret)
+			return ret;
+	}
+
 	if (--rx_ring->nbufpool == 0) {
 		ret = xgene_enet_refill_bufpool(buf_pool, NUM_BUFPOOL);
 		rx_ring->nbufpool = NUM_BUFPOOL;
@@ -611,7 +800,7 @@ static int xgene_enet_process_ring(struct xgene_enet_desc_ring *ring,
 			desc_count++;
 		}
 		if (is_rx_desc(raw_desc)) {
-			ret = xgene_enet_rx_frame(ring, raw_desc);
+			ret = xgene_enet_rx_frame(ring, raw_desc, exp_desc);
 		} else {
 			ret = xgene_enet_tx_completion(ring, raw_desc);
 			is_completion = true;
@@ -854,7 +1043,7 @@ static void xgene_enet_delete_ring(struct xgene_enet_desc_ring *ring)
 
 static void xgene_enet_delete_desc_rings(struct xgene_enet_pdata *pdata)
 {
-	struct xgene_enet_desc_ring *buf_pool;
+	struct xgene_enet_desc_ring *buf_pool, *page_pool;
 	struct xgene_enet_desc_ring *ring;
 	int i;
 
@@ -867,18 +1056,28 @@ static void xgene_enet_delete_desc_rings(struct xgene_enet_pdata *pdata)
 				xgene_enet_delete_ring(ring->cp_ring);
 			pdata->tx_ring[i] = NULL;
 		}
+
 	}
 
 	for (i = 0; i < pdata->rxq_cnt; i++) {
 		ring = pdata->rx_ring[i];
 		if (ring) {
+			page_pool = ring->page_pool;
+			if (page_pool) {
+				xgene_enet_delete_pagepool(page_pool);
+				xgene_enet_delete_ring(page_pool);
+				pdata->port_ops->clear(pdata, page_pool);
+			}
+
 			buf_pool = ring->buf_pool;
 			xgene_enet_delete_bufpool(buf_pool);
 			xgene_enet_delete_ring(buf_pool);
 			pdata->port_ops->clear(pdata, buf_pool);
+
 			xgene_enet_delete_ring(ring);
 			pdata->rx_ring[i] = NULL;
 		}
+
 	}
 }
 
@@ -931,8 +1130,10 @@ static void xgene_enet_free_desc_ring(struct xgene_enet_desc_ring *ring)
 
 static void xgene_enet_free_desc_rings(struct xgene_enet_pdata *pdata)
 {
+	struct xgene_enet_desc_ring *page_pool;
 	struct device *dev = &pdata->pdev->dev;
 	struct xgene_enet_desc_ring *ring;
+	void *p;
 	int i;
 
 	for (i = 0; i < pdata->txq_cnt; i++) {
@@ -940,10 +1141,13 @@ static void xgene_enet_free_desc_rings(struct xgene_enet_pdata *pdata)
 		if (ring) {
 			if (ring->cp_ring && ring->cp_ring->cp_skb)
 				devm_kfree(dev, ring->cp_ring->cp_skb);
+
 			if (ring->cp_ring && pdata->cq_cnt)
 				xgene_enet_free_desc_ring(ring->cp_ring);
+
 			xgene_enet_free_desc_ring(ring);
 		}
+
 	}
 
 	for (i = 0; i < pdata->rxq_cnt; i++) {
@@ -952,8 +1156,21 @@ static void xgene_enet_free_desc_rings(struct xgene_enet_pdata *pdata)
 			if (ring->buf_pool) {
 				if (ring->buf_pool->rx_skb)
 					devm_kfree(dev, ring->buf_pool->rx_skb);
+
 				xgene_enet_free_desc_ring(ring->buf_pool);
 			}
+
+			page_pool = ring->page_pool;
+			if (page_pool) {
+				p = page_pool->frag_page;
+				if (p)
+					devm_kfree(dev, p);
+
+				p = page_pool->frag_dma_addr;
+				if (p)
+					devm_kfree(dev, p);
+			}
+
 			xgene_enet_free_desc_ring(ring);
 		}
 	}
@@ -1071,19 +1288,20 @@ static u8 xgene_start_cpu_bufnum(struct xgene_enet_pdata *pdata)
 
 static int xgene_enet_create_desc_rings(struct net_device *ndev)
 {
-	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
-	struct device *dev = ndev_to_dev(ndev);
 	struct xgene_enet_desc_ring *rx_ring, *tx_ring, *cp_ring;
+	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
+	struct xgene_enet_desc_ring *page_pool = NULL;
 	struct xgene_enet_desc_ring *buf_pool = NULL;
-	enum xgene_ring_owner owner;
-	dma_addr_t dma_exp_bufs;
-	u8 cpu_bufnum;
+	struct device *dev = ndev_to_dev(ndev);
 	u8 eth_bufnum = pdata->eth_bufnum;
 	u8 bp_bufnum = pdata->bp_bufnum;
 	u16 ring_num = pdata->ring_num;
+	enum xgene_ring_owner owner;
+	dma_addr_t dma_exp_bufs;
+	u16 ring_id, slots;
 	__le64 *exp_bufs;
-	u16 ring_id;
 	int i, ret, size;
+	u8 cpu_bufnum;
 
 	cpu_bufnum = xgene_start_cpu_bufnum(pdata);
 
@@ -1103,7 +1321,7 @@ static int xgene_enet_create_desc_rings(struct net_device *ndev)
 		owner = xgene_derive_ring_owner(pdata);
 		ring_id = xgene_enet_get_ring_id(owner, bp_bufnum++);
 		buf_pool = xgene_enet_create_desc_ring(ndev, ring_num++,
-						       RING_CFGSIZE_2KB,
+						       RING_CFGSIZE_16KB,
 						       ring_id);
 		if (!buf_pool) {
 			ret = -ENOMEM;
@@ -1111,7 +1329,7 @@ static int xgene_enet_create_desc_rings(struct net_device *ndev)
 		}
 
 		rx_ring->nbufpool = NUM_BUFPOOL;
-		rx_ring->buf_pool = buf_pool;
+		rx_ring->npagepool = NUM_NXTBUFPOOL;
 		rx_ring->irq = pdata->irqs[i];
 		buf_pool->rx_skb = devm_kcalloc(dev, buf_pool->slots,
 						sizeof(struct sk_buff *),
@@ -1124,6 +1342,42 @@ static int xgene_enet_create_desc_rings(struct net_device *ndev)
 		buf_pool->dst_ring_num = xgene_enet_dst_ring_num(buf_pool);
 		rx_ring->buf_pool = buf_pool;
 		pdata->rx_ring[i] = rx_ring;
+
+		if ((pdata->enet_id == XGENE_ENET1 &&  pdata->rxq_cnt > 4) ||
+		    (pdata->enet_id == XGENE_ENET2 &&  pdata->rxq_cnt > 16)) {
+			break;
+		}
+
+		/* allocate next buffer pool for jumbo packets */
+		owner = xgene_derive_ring_owner(pdata);
+		ring_id = xgene_enet_get_ring_id(owner, bp_bufnum++);
+		page_pool = xgene_enet_create_desc_ring(ndev, ring_num++,
+							RING_CFGSIZE_16KB,
+							ring_id);
+		if (!page_pool) {
+			ret = -ENOMEM;
+			goto err;
+		}
+
+		slots = page_pool->slots;
+		page_pool->frag_page = devm_kcalloc(dev, slots,
+						    sizeof(struct page *),
+						    GFP_KERNEL);
+		if (!page_pool->frag_page) {
+			ret = -ENOMEM;
+			goto err;
+		}
+
+		page_pool->frag_dma_addr = devm_kcalloc(dev, slots,
+							sizeof(dma_addr_t),
+							GFP_KERNEL);
+		if (!page_pool->frag_dma_addr) {
+			ret = -ENOMEM;
+			goto err;
+		}
+
+		page_pool->dst_ring_num = xgene_enet_dst_ring_num(page_pool);
+		rx_ring->page_pool = page_pool;
 	}
 
 	for (i = 0; i < pdata->txq_cnt; i++) {
@@ -1247,13 +1501,31 @@ static int xgene_enet_set_mac_address(struct net_device *ndev, void *addr)
 	return ret;
 }
 
+static int xgene_change_mtu(struct net_device *ndev, int new_mtu)
+{
+	struct xgene_enet_pdata *pdata = netdev_priv(ndev);
+	int frame_size;
+
+	if (!netif_running(ndev))
+		return 0;
+
+	frame_size = (new_mtu > ETH_DATA_LEN) ? (new_mtu + 18) : 0x600;
+
+	xgene_enet_close(ndev);
+	ndev->mtu = new_mtu;
+	pdata->mac_ops->set_framesize(pdata, frame_size);
+	xgene_enet_open(ndev);
+
+	return 0;
+}
+
 static const struct net_device_ops xgene_ndev_ops = {
 	.ndo_open = xgene_enet_open,
 	.ndo_stop = xgene_enet_close,
 	.ndo_start_xmit = xgene_enet_start_xmit,
 	.ndo_tx_timeout = xgene_enet_timeout,
 	.ndo_get_stats64 = xgene_enet_get_stats64,
-	.ndo_change_mtu = eth_change_mtu,
+	.ndo_change_mtu = xgene_change_mtu,
 	.ndo_set_mac_address = xgene_enet_set_mac_address,
 };
 
@@ -1382,9 +1654,13 @@ static void xgene_enet_gpiod_get(struct xgene_enet_pdata *pdata)
 {
 	struct device *dev = &pdata->pdev->dev;
 
-	if (pdata->phy_mode != PHY_INTERFACE_MODE_XGMII)
+	pdata->sfp_gpio_en = false;
+	if (pdata->phy_mode != PHY_INTERFACE_MODE_XGMII ||
+	    (!device_property_present(dev, "sfp-gpios") &&
+	     !device_property_present(dev, "rxlos-gpios")))
 		return;
 
+	pdata->sfp_gpio_en = true;
 	pdata->sfp_rdy = gpiod_get(dev, "rxlos", GPIOD_IN);
 	if (IS_ERR(pdata->sfp_rdy))
 		pdata->sfp_rdy = gpiod_get(dev, "sfp", GPIOD_IN);
@@ -1515,10 +1791,12 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata)
 static int xgene_enet_init_hw(struct xgene_enet_pdata *pdata)
 {
 	struct xgene_enet_cle *enet_cle = &pdata->cle;
+	struct xgene_enet_desc_ring *page_pool;
 	struct net_device *ndev = pdata->ndev;
 	struct xgene_enet_desc_ring *buf_pool;
-	u16 dst_ring_num;
+	u16 dst_ring_num, ring_id;
 	int i, ret;
+	u32 count;
 
 	ret = pdata->port_ops->reset(pdata);
 	if (ret)
@@ -1534,9 +1812,18 @@ static int xgene_enet_init_hw(struct xgene_enet_pdata *pdata)
 	for (i = 0; i < pdata->rxq_cnt; i++) {
 		buf_pool = pdata->rx_ring[i]->buf_pool;
 		xgene_enet_init_bufpool(buf_pool);
-		ret = xgene_enet_refill_bufpool(buf_pool, pdata->rx_buff_cnt);
+		page_pool = pdata->rx_ring[i]->page_pool;
+		xgene_enet_init_bufpool(page_pool);
+
+		count = pdata->rx_buff_cnt;
+		ret = xgene_enet_refill_bufpool(buf_pool, count);
 		if (ret)
 			goto err;
+
+		ret = xgene_enet_refill_pagepool(page_pool, count);
+		if (ret)
+			goto err;
+
 	}
 
 	dst_ring_num = xgene_enet_dst_ring_num(pdata->rx_ring[0]);
@@ -1555,10 +1842,17 @@ static int xgene_enet_init_hw(struct xgene_enet_pdata *pdata)
 			netdev_err(ndev, "Preclass Tree init error\n");
 			goto err;
 		}
+
 	} else {
-		pdata->port_ops->cle_bypass(pdata, dst_ring_num, buf_pool->id);
+		dst_ring_num = xgene_enet_dst_ring_num(pdata->rx_ring[0]);
+		buf_pool = pdata->rx_ring[0]->buf_pool;
+		page_pool = pdata->rx_ring[0]->page_pool;
+		ring_id = (page_pool) ? page_pool->id : 0;
+		pdata->port_ops->cle_bypass(pdata, dst_ring_num,
+					    buf_pool->id, ring_id);
 	}
 
+	ndev->max_mtu = XGENE_ENET_MAX_MTU;
 	pdata->phy_speed = SPEED_UNKNOWN;
 	pdata->mac_ops->init(pdata);
 
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
index 0cda58f5a840..52571741da9f 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h
@@ -41,11 +41,14 @@
 #include "../../../phy/mdio-xgene.h"
 
 #define XGENE_DRV_VERSION	"v1.0"
-#define XGENE_ENET_MAX_MTU	1536
-#define SKB_BUFFER_SIZE		(XGENE_ENET_MAX_MTU - NET_IP_ALIGN)
+#define XGENE_ENET_STD_MTU	1536
+#define XGENE_ENET_MAX_MTU	9600
+#define SKB_BUFFER_SIZE		(XGENE_ENET_STD_MTU - NET_IP_ALIGN)
+
 #define BUFLEN_16K	(16 * 1024)
-#define NUM_PKT_BUF	64
+#define NUM_PKT_BUF	1024
 #define NUM_BUFPOOL	32
+#define NUM_NXTBUFPOOL	8
 #define MAX_EXP_BUFFS	256
 #define NUM_MSS_REG	4
 #define XGENE_MIN_ENET_FRAME_SIZE	60
@@ -88,6 +91,12 @@ enum xgene_enet_id {
 	XGENE_ENET2
 };
 
+enum xgene_enet_buf_len {
+	SIZE_2K = 2048,
+	SIZE_4K = 4096,
+	SIZE_16K = 16384
+};
+
 /* software context of a descriptor ring */
 struct xgene_enet_desc_ring {
 	struct net_device *ndev;
@@ -107,14 +116,18 @@ struct xgene_enet_desc_ring {
 	dma_addr_t irq_mbox_dma;
 	void *irq_mbox_addr;
 	u16 dst_ring_num;
-	u8 nbufpool;
+	u16 nbufpool;
+	int npagepool;
 	u8 index;
+	u32 flags;
 	struct sk_buff *(*rx_skb);
 	struct sk_buff *(*cp_skb);
 	dma_addr_t *frag_dma_addr;
+	struct page *(*frag_page);
 	enum xgene_enet_ring_cfgsize cfgsize;
 	struct xgene_enet_desc_ring *cp_ring;
 	struct xgene_enet_desc_ring *buf_pool;
+	struct xgene_enet_desc_ring *page_pool;
 	struct napi_struct napi;
 	union {
 		void *desc_addr;
@@ -143,8 +156,12 @@ struct xgene_mac_ops {
 	void (*rx_disable)(struct xgene_enet_pdata *pdata);
 	void (*set_speed)(struct xgene_enet_pdata *pdata);
 	void (*set_mac_addr)(struct xgene_enet_pdata *pdata);
+	void (*set_framesize)(struct xgene_enet_pdata *pdata, int framesize);
 	void (*set_mss)(struct xgene_enet_pdata *pdata, u16 mss, u8 index);
 	void (*link_state)(struct work_struct *work);
+	void (*enable_tx_pause)(struct xgene_enet_pdata *pdata, bool enable);
+	void (*flowctl_rx)(struct xgene_enet_pdata *pdata, bool enable);
+	void (*flowctl_tx)(struct xgene_enet_pdata *pdata, bool enable);
 };
 
 struct xgene_port_ops {
@@ -152,7 +169,7 @@ struct xgene_port_ops {
 	void (*clear)(struct xgene_enet_pdata *pdata,
 		      struct xgene_enet_desc_ring *ring);
 	void (*cle_bypass)(struct xgene_enet_pdata *pdata,
-			   u32 dst_ring_num, u16 bufpool_id);
+			   u32 dst_ring_num, u16 bufpool_id, u16 nxtbufpool_id);
 	void (*shutdown)(struct xgene_enet_pdata *pdata);
 };
 
@@ -219,6 +236,10 @@ struct xgene_enet_pdata {
 	u8 rx_delay;
 	bool mdio_driver;
 	struct gpio_desc *sfp_rdy;
+	bool sfp_gpio_en;
+	u32 pause_autoneg;
+	bool tx_pause;
+	bool rx_pause;
 };
 
 struct xgene_indirect_ctl {
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c b/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c
index af51dd5844ce..4ff40559f970 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_ring2.c
@@ -119,6 +119,7 @@ static void xgene_enet_set_ring_id(struct xgene_enet_desc_ring *ring)
 
 	ring_id_buf = (ring->num << 9) & GENMASK(18, 9);
 	ring_id_buf |= PREFETCH_BUF_EN;
+
 	if (is_bufpool)
 		ring_id_buf |= IS_BUFFER_POOL;
 
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c
index d12e9cbae820..a8e063bdee3b 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_sgmac.c
@@ -343,6 +343,11 @@ static void xgene_sgmac_set_speed(struct xgene_enet_pdata *p)
 	xgene_enet_wr_mcx_csr(p, icm2_addr, icm2);
 }
 
+static void xgene_sgmac_set_frame_size(struct xgene_enet_pdata *pdata, int size)
+{
+	xgene_enet_wr_mac(pdata, MAX_FRAME_LEN_ADDR, size);
+}
+
 static void xgene_sgmii_enable_autoneg(struct xgene_enet_pdata *p)
 {
 	u32 data, loop = 10;
@@ -360,11 +365,39 @@ static void xgene_sgmii_enable_autoneg(struct xgene_enet_pdata *p)
 		netdev_err(p->ndev, "Auto-negotiation failed\n");
 }
 
+static void xgene_sgmac_rxtx(struct xgene_enet_pdata *p, u32 bits, bool set)
+{
+	u32 data;
+
+	data = xgene_enet_rd_mac(p, MAC_CONFIG_1_ADDR);
+
+	if (set)
+		data |= bits;
+	else
+		data &= ~bits;
+
+	xgene_enet_wr_mac(p, MAC_CONFIG_1_ADDR, data);
+}
+
+static void xgene_sgmac_flowctl_tx(struct xgene_enet_pdata *p, bool enable)
+{
+	xgene_sgmac_rxtx(p, TX_FLOW_EN, enable);
+
+	p->mac_ops->enable_tx_pause(p, enable);
+}
+
+static void xgene_sgmac_flowctl_rx(struct xgene_enet_pdata *pdata, bool enable)
+{
+	xgene_sgmac_rxtx(pdata, RX_FLOW_EN, enable);
+}
+
 static void xgene_sgmac_init(struct xgene_enet_pdata *p)
 {
+	u32 pause_thres_reg, pause_off_thres_reg;
 	u32 enet_spare_cfg_reg, rsif_config_reg;
 	u32 cfg_bypass_reg, rx_dv_gate_reg;
-	u32 data, offset;
+	u32 data, data1, data2, offset;
+	u32 multi_dpf_reg;
 
 	if (!(p->enet_id == XGENE_ENET2 && p->mdio_driver))
 		xgene_sgmac_reset(p);
@@ -400,24 +433,50 @@ static void xgene_sgmac_init(struct xgene_enet_pdata *p)
 	data |= CFG_RSIF_FPBUFF_TIMEOUT_EN;
 	xgene_enet_wr_csr(p, rsif_config_reg, data);
 
-	/* Bypass traffic gating */
-	xgene_enet_wr_csr(p, XG_ENET_SPARE_CFG_REG_1_ADDR, 0x84);
-	xgene_enet_wr_csr(p, cfg_bypass_reg, RESUME_TX);
-	xgene_enet_wr_mcx_csr(p, rx_dv_gate_reg, RESUME_RX0);
-}
+	/* Configure HW pause frame generation */
+	multi_dpf_reg = (p->enet_id == XGENE_ENET1) ? CSR_MULTI_DPF0_ADDR :
+			 XG_MCX_MULTI_DPF0_ADDR;
+	data = xgene_enet_rd_mcx_csr(p, multi_dpf_reg);
+	data = (DEF_QUANTA << 16) | (data & 0xffff);
+	xgene_enet_wr_mcx_csr(p, multi_dpf_reg, data);
+
+	if (p->enet_id != XGENE_ENET1) {
+		data = xgene_enet_rd_mcx_csr(p, XG_MCX_MULTI_DPF1_ADDR);
+		data =  (NORM_PAUSE_OPCODE << 16) | (data & 0xFFFF);
+		xgene_enet_wr_mcx_csr(p, XG_MCX_MULTI_DPF1_ADDR, data);
+	}
 
-static void xgene_sgmac_rxtx(struct xgene_enet_pdata *p, u32 bits, bool set)
-{
-	u32 data;
+	pause_thres_reg = (p->enet_id == XGENE_ENET1) ? RXBUF_PAUSE_THRESH :
+			   XG_RXBUF_PAUSE_THRESH;
+	pause_off_thres_reg = (p->enet_id == XGENE_ENET1) ?
+			       RXBUF_PAUSE_OFF_THRESH : 0;
 
-	data = xgene_enet_rd_mac(p, MAC_CONFIG_1_ADDR);
+	if (p->enet_id == XGENE_ENET1) {
+		data1 = xgene_enet_rd_csr(p, pause_thres_reg);
+		data2 = xgene_enet_rd_csr(p, pause_off_thres_reg);
+
+		if (!(p->port_id % 2)) {
+			data1 = (data1 & 0xffff0000) | DEF_PAUSE_THRES;
+			data2 = (data2 & 0xffff0000) | DEF_PAUSE_OFF_THRES;
+		} else {
+			data1 = (data1 & 0xffff) | (DEF_PAUSE_THRES << 16);
+			data2 = (data2 & 0xffff) | (DEF_PAUSE_OFF_THRES << 16);
+		}
 
-	if (set)
-		data |= bits;
-	else
-		data &= ~bits;
+		xgene_enet_wr_csr(p, pause_thres_reg, data1);
+		xgene_enet_wr_csr(p, pause_off_thres_reg, data2);
+	} else {
+		data = (DEF_PAUSE_OFF_THRES << 16) | DEF_PAUSE_THRES;
+		xgene_enet_wr_csr(p, pause_thres_reg, data);
+	}
 
-	xgene_enet_wr_mac(p, MAC_CONFIG_1_ADDR, data);
+	xgene_sgmac_flowctl_tx(p, p->tx_pause);
+	xgene_sgmac_flowctl_rx(p, p->rx_pause);
+
+	/* Bypass traffic gating */
+	xgene_enet_wr_csr(p, XG_ENET_SPARE_CFG_REG_1_ADDR, 0x84);
+	xgene_enet_wr_csr(p, cfg_bypass_reg, RESUME_TX);
+	xgene_enet_wr_mcx_csr(p, rx_dv_gate_reg, RESUME_RX0);
 }
 
 static void xgene_sgmac_rx_enable(struct xgene_enet_pdata *p)
@@ -484,11 +543,12 @@ static int xgene_enet_reset(struct xgene_enet_pdata *p)
 }
 
 static void xgene_enet_cle_bypass(struct xgene_enet_pdata *p,
-				  u32 dst_ring_num, u16 bufpool_id)
+				  u32 dst_ring_num, u16 bufpool_id,
+				  u16 nxtbufpool_id)
 {
-	u32 data, fpsel;
 	u32 cle_bypass_reg0, cle_bypass_reg1;
 	u32 offset = p->port_id * MAC_OFFSET;
+	u32 data, fpsel, nxtfpsel;
 
 	if (p->enet_id == XGENE_ENET1) {
 		cle_bypass_reg0 = CLE_BYPASS_REG0_0_ADDR;
@@ -501,24 +561,24 @@ static void xgene_enet_cle_bypass(struct xgene_enet_pdata *p,
 	data = CFG_CLE_BYPASS_EN0;
 	xgene_enet_wr_csr(p, cle_bypass_reg0 + offset, data);
 
-	fpsel = xgene_enet_ring_bufnum(bufpool_id) - 0x20;
-	data = CFG_CLE_DSTQID0(dst_ring_num) | CFG_CLE_FPSEL0(fpsel);
+	fpsel = xgene_enet_get_fpsel(bufpool_id);
+	nxtfpsel = xgene_enet_get_fpsel(nxtbufpool_id);
+	data = CFG_CLE_DSTQID0(dst_ring_num) | CFG_CLE_FPSEL0(fpsel) |
+	       CFG_CLE_NXTFPSEL0(nxtfpsel);
 	xgene_enet_wr_csr(p, cle_bypass_reg1 + offset, data);
 }
 
 static void xgene_enet_clear(struct xgene_enet_pdata *pdata,
 			     struct xgene_enet_desc_ring *ring)
 {
-	u32 addr, val, data;
-
-	val = xgene_enet_ring_bufnum(ring->id);
+	u32 addr, data;
 
 	if (xgene_enet_is_bufpool(ring->id)) {
 		addr = ENET_CFGSSQMIFPRESET_ADDR;
-		data = BIT(val - 0x20);
+		data = BIT(xgene_enet_get_fpsel(ring->id));
 	} else {
 		addr = ENET_CFGSSQMIWQRESET_ADDR;
-		data = BIT(val);
+		data = BIT(xgene_enet_ring_bufnum(ring->id));
 	}
 
 	xgene_enet_wr_ring_if(pdata, addr, data);
@@ -528,24 +588,23 @@ static void xgene_enet_shutdown(struct xgene_enet_pdata *p)
 {
 	struct device *dev = &p->pdev->dev;
 	struct xgene_enet_desc_ring *ring;
-	u32 pb, val;
+	u32 pb;
 	int i;
 
 	pb = 0;
 	for (i = 0; i < p->rxq_cnt; i++) {
 		ring = p->rx_ring[i]->buf_pool;
-
-		val = xgene_enet_ring_bufnum(ring->id);
-		pb |= BIT(val - 0x20);
+		pb |= BIT(xgene_enet_get_fpsel(ring->id));
+		ring = p->rx_ring[i]->page_pool;
+		if (ring)
+			pb |= BIT(xgene_enet_get_fpsel(ring->id));
 	}
 	xgene_enet_wr_ring_if(p, ENET_CFGSSQMIFPRESET_ADDR, pb);
 
 	pb = 0;
 	for (i = 0; i < p->txq_cnt; i++) {
 		ring = p->tx_ring[i];
-
-		val = xgene_enet_ring_bufnum(ring->id);
-		pb |= BIT(val);
+		pb |= BIT(xgene_enet_ring_bufnum(ring->id));
 	}
 	xgene_enet_wr_ring_if(p, ENET_CFGSSQMIWQRESET_ADDR, pb);
 
@@ -586,6 +645,25 @@ static void xgene_enet_link_state(struct work_struct *work)
 	schedule_delayed_work(&p->link_work, poll_interval);
 }
 
+static void xgene_sgmac_enable_tx_pause(struct xgene_enet_pdata *p, bool enable)
+{
+	u32 data, ecm_cfg_addr;
+
+	if (p->enet_id == XGENE_ENET1) {
+		ecm_cfg_addr = (!(p->port_id % 2)) ? CSR_ECM_CFG_0_ADDR :
+				CSR_ECM_CFG_1_ADDR;
+	} else {
+		ecm_cfg_addr = XG_MCX_ECM_CFG_0_ADDR;
+	}
+
+	data = xgene_enet_rd_mcx_csr(p, ecm_cfg_addr);
+	if (enable)
+		data |= MULTI_DPF_AUTOCTRL | PAUSE_XON_EN;
+	else
+		data &= ~(MULTI_DPF_AUTOCTRL | PAUSE_XON_EN);
+	xgene_enet_wr_mcx_csr(p, ecm_cfg_addr, data);
+}
+
 const struct xgene_mac_ops xgene_sgmac_ops = {
 	.init		= xgene_sgmac_init,
 	.reset		= xgene_sgmac_reset,
@@ -595,7 +673,11 @@ const struct xgene_mac_ops xgene_sgmac_ops = {
 	.tx_disable	= xgene_sgmac_tx_disable,
 	.set_speed	= xgene_sgmac_set_speed,
 	.set_mac_addr	= xgene_sgmac_set_mac_addr,
-	.link_state	= xgene_enet_link_state
+	.set_framesize  = xgene_sgmac_set_frame_size,
+	.link_state	= xgene_enet_link_state,
+	.enable_tx_pause = xgene_sgmac_enable_tx_pause,
+	.flowctl_tx     = xgene_sgmac_flowctl_tx,
+	.flowctl_rx     = xgene_sgmac_flowctl_rx
 };
 
 const struct xgene_port_ops xgene_sgport_ops = {
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c
index 6475f383ba83..ece19e6d68e3 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c
@@ -101,6 +101,14 @@ static void xgene_enet_wr_pcs(struct xgene_enet_pdata *pdata,
 			   wr_addr);
 }
 
+static void xgene_enet_wr_axg_csr(struct xgene_enet_pdata *pdata,
+				  u32 offset, u32 val)
+{
+	void __iomem *addr = pdata->mcx_mac_csr_addr + offset;
+
+	iowrite32(val, addr);
+}
+
 static void xgene_enet_rd_csr(struct xgene_enet_pdata *pdata,
 			      u32 offset, u32 *val)
 {
@@ -174,6 +182,14 @@ static bool xgene_enet_rd_pcs(struct xgene_enet_pdata *pdata,
 	return success;
 }
 
+static void xgene_enet_rd_axg_csr(struct xgene_enet_pdata *pdata,
+				  u32 offset, u32 *val)
+{
+	void __iomem *addr = pdata->mcx_mac_csr_addr + offset;
+
+	*val = ioread32(addr);
+}
+
 static int xgene_enet_ecc_init(struct xgene_enet_pdata *pdata)
 {
 	struct net_device *ndev = pdata->ndev;
@@ -250,6 +266,12 @@ static void xgene_xgmac_set_mss(struct xgene_enet_pdata *pdata,
 	xgene_enet_wr_csr(pdata, XG_TSIF_MSS_REG0_ADDR + offset, data);
 }
 
+static void xgene_xgmac_set_frame_size(struct xgene_enet_pdata *pdata, int size)
+{
+	xgene_enet_wr_mac(pdata, HSTMAXFRAME_LENGTH_ADDR,
+			  ((((size + 2) >> 2) << 16) | size));
+}
+
 static u32 xgene_enet_link_status(struct xgene_enet_pdata *pdata)
 {
 	u32 data;
@@ -259,6 +281,51 @@ static u32 xgene_enet_link_status(struct xgene_enet_pdata *pdata)
 	return data;
 }
 
+static void xgene_xgmac_enable_tx_pause(struct xgene_enet_pdata *pdata,
+					bool enable)
+{
+	u32 data;
+
+	xgene_enet_rd_axg_csr(pdata, XGENET_CSR_ECM_CFG_0_ADDR, &data);
+
+	if (enable)
+		data |= MULTI_DPF_AUTOCTRL | PAUSE_XON_EN;
+	else
+		data &= ~(MULTI_DPF_AUTOCTRL | PAUSE_XON_EN);
+
+	xgene_enet_wr_axg_csr(pdata, XGENET_CSR_ECM_CFG_0_ADDR, data);
+}
+
+static void xgene_xgmac_flowctl_tx(struct xgene_enet_pdata *pdata, bool enable)
+{
+	u32 data;
+
+	xgene_enet_rd_mac(pdata, AXGMAC_CONFIG_1, &data);
+
+	if (enable)
+		data |= HSTTCTLEN;
+	else
+		data &= ~HSTTCTLEN;
+
+	xgene_enet_wr_mac(pdata, AXGMAC_CONFIG_1, data);
+
+	pdata->mac_ops->enable_tx_pause(pdata, enable);
+}
+
+static void xgene_xgmac_flowctl_rx(struct xgene_enet_pdata *pdata, bool enable)
+{
+	u32 data;
+
+	xgene_enet_rd_mac(pdata, AXGMAC_CONFIG_1, &data);
+
+	if (enable)
+		data |= HSTRCTLEN;
+	else
+		data &= ~HSTRCTLEN;
+
+	xgene_enet_wr_mac(pdata, AXGMAC_CONFIG_1, data);
+}
+
 static void xgene_xgmac_init(struct xgene_enet_pdata *pdata)
 {
 	u32 data;
@@ -282,6 +349,23 @@ static void xgene_xgmac_init(struct xgene_enet_pdata *pdata)
 	xgene_enet_wr_csr(pdata, XG_ENET_SPARE_CFG_REG_1_ADDR, 0x82);
 	xgene_enet_wr_csr(pdata, XGENET_RX_DV_GATE_REG_0_ADDR, 0);
 	xgene_enet_wr_csr(pdata, XG_CFG_BYPASS_ADDR, RESUME_TX);
+
+	/* Configure HW pause frame generation */
+	xgene_enet_rd_axg_csr(pdata, XGENET_CSR_MULTI_DPF0_ADDR, &data);
+	data = (DEF_QUANTA << 16) | (data & 0xFFFF);
+	xgene_enet_wr_axg_csr(pdata, XGENET_CSR_MULTI_DPF0_ADDR, data);
+
+	if (pdata->enet_id != XGENE_ENET1) {
+		xgene_enet_rd_axg_csr(pdata, XGENET_CSR_MULTI_DPF1_ADDR, &data);
+		data = (NORM_PAUSE_OPCODE << 16) | (data & 0xFFFF);
+		xgene_enet_wr_axg_csr(pdata, XGENET_CSR_MULTI_DPF1_ADDR, data);
+	}
+
+	data = (XG_DEF_PAUSE_OFF_THRES << 16) | XG_DEF_PAUSE_THRES;
+	xgene_enet_wr_csr(pdata, XG_RXBUF_PAUSE_THRESH, data);
+
+	xgene_xgmac_flowctl_tx(pdata, pdata->tx_pause);
+	xgene_xgmac_flowctl_rx(pdata, pdata->rx_pause);
 }
 
 static void xgene_xgmac_rx_enable(struct xgene_enet_pdata *pdata)
@@ -350,44 +434,47 @@ static int xgene_enet_reset(struct xgene_enet_pdata *pdata)
 }
 
 static void xgene_enet_xgcle_bypass(struct xgene_enet_pdata *pdata,
-				    u32 dst_ring_num, u16 bufpool_id)
+				    u32 dst_ring_num, u16 bufpool_id,
+				    u16 nxtbufpool_id)
 {
-	u32 cb, fpsel;
+	u32 cb, fpsel, nxtfpsel;
 
 	xgene_enet_rd_csr(pdata, XCLE_BYPASS_REG0_ADDR, &cb);
 	cb |= CFG_CLE_BYPASS_EN0;
 	CFG_CLE_IP_PROTOCOL0_SET(&cb, 3);
 	xgene_enet_wr_csr(pdata, XCLE_BYPASS_REG0_ADDR, cb);
 
-	fpsel = xgene_enet_ring_bufnum(bufpool_id) - 0x20;
+	fpsel = xgene_enet_get_fpsel(bufpool_id);
+	nxtfpsel = xgene_enet_get_fpsel(nxtbufpool_id);
 	xgene_enet_rd_csr(pdata, XCLE_BYPASS_REG1_ADDR, &cb);
 	CFG_CLE_DSTQID0_SET(&cb, dst_ring_num);
 	CFG_CLE_FPSEL0_SET(&cb, fpsel);
+	CFG_CLE_NXTFPSEL0_SET(&cb, nxtfpsel);
 	xgene_enet_wr_csr(pdata, XCLE_BYPASS_REG1_ADDR, cb);
+	pr_info("+ cle_bypass: fpsel: %d nxtfpsel: %d\n", fpsel, nxtfpsel);
 }
 
 static void xgene_enet_shutdown(struct xgene_enet_pdata *pdata)
 {
 	struct device *dev = &pdata->pdev->dev;
 	struct xgene_enet_desc_ring *ring;
-	u32 pb, val;
+	u32 pb;
 	int i;
 
 	pb = 0;
 	for (i = 0; i < pdata->rxq_cnt; i++) {
 		ring = pdata->rx_ring[i]->buf_pool;
-
-		val = xgene_enet_ring_bufnum(ring->id);
-		pb |= BIT(val - 0x20);
+		pb |= BIT(xgene_enet_get_fpsel(ring->id));
+		ring = pdata->rx_ring[i]->page_pool;
+		if (ring)
+			pb |= BIT(xgene_enet_get_fpsel(ring->id));
 	}
 	xgene_enet_wr_ring_if(pdata, ENET_CFGSSQMIFPRESET_ADDR, pb);
 
 	pb = 0;
 	for (i = 0; i < pdata->txq_cnt; i++) {
 		ring = pdata->tx_ring[i];
-
-		val = xgene_enet_ring_bufnum(ring->id);
-		pb |= BIT(val);
+		pb |= BIT(xgene_enet_ring_bufnum(ring->id));
 	}
 	xgene_enet_wr_ring_if(pdata, ENET_CFGSSQMIWQRESET_ADDR, pb);
 
@@ -400,31 +487,44 @@ static void xgene_enet_shutdown(struct xgene_enet_pdata *pdata)
 static void xgene_enet_clear(struct xgene_enet_pdata *pdata,
 			     struct xgene_enet_desc_ring *ring)
 {
-	u32 addr, val, data;
-
-	val = xgene_enet_ring_bufnum(ring->id);
+	u32 addr, data;
 
 	if (xgene_enet_is_bufpool(ring->id)) {
 		addr = ENET_CFGSSQMIFPRESET_ADDR;
-		data = BIT(val - 0x20);
+		data = BIT(xgene_enet_get_fpsel(ring->id));
 	} else {
 		addr = ENET_CFGSSQMIWQRESET_ADDR;
-		data = BIT(val);
+		data = BIT(xgene_enet_ring_bufnum(ring->id));
 	}
 
 	xgene_enet_wr_ring_if(pdata, addr, data);
 }
 
+static int xgene_enet_gpio_lookup(struct xgene_enet_pdata *pdata)
+{
+	struct device *dev = &pdata->pdev->dev;
+
+	pdata->sfp_rdy = gpiod_get(dev, "rxlos", GPIOD_IN);
+	if (IS_ERR(pdata->sfp_rdy))
+		pdata->sfp_rdy = gpiod_get(dev, "sfp", GPIOD_IN);
+
+	if (IS_ERR(pdata->sfp_rdy))
+		return -ENODEV;
+
+	return 0;
+}
+
 static void xgene_enet_link_state(struct work_struct *work)
 {
 	struct xgene_enet_pdata *pdata = container_of(to_delayed_work(work),
 					 struct xgene_enet_pdata, link_work);
-	struct gpio_desc *sfp_rdy = pdata->sfp_rdy;
 	struct net_device *ndev = pdata->ndev;
 	u32 link_status, poll_interval;
 
 	link_status = xgene_enet_link_status(pdata);
-	if (link_status && !IS_ERR(sfp_rdy) && !gpiod_get_value(sfp_rdy))
+	if (pdata->sfp_gpio_en && link_status &&
+	    (!IS_ERR(pdata->sfp_rdy) || !xgene_enet_gpio_lookup(pdata)) &&
+	    !gpiod_get_value(pdata->sfp_rdy))
 		link_status = 0;
 
 	if (link_status) {
@@ -458,8 +558,12 @@ const struct xgene_mac_ops xgene_xgmac_ops = {
 	.rx_disable = xgene_xgmac_rx_disable,
 	.tx_disable = xgene_xgmac_tx_disable,
 	.set_mac_addr = xgene_xgmac_set_mac_addr,
+	.set_framesize = xgene_xgmac_set_frame_size,
 	.set_mss = xgene_xgmac_set_mss,
-	.link_state = xgene_enet_link_state
+	.link_state = xgene_enet_link_state,
+	.enable_tx_pause = xgene_xgmac_enable_tx_pause,
+	.flowctl_rx = xgene_xgmac_flowctl_rx,
+	.flowctl_tx = xgene_xgmac_flowctl_tx
 };
 
 const struct xgene_port_ops xgene_xgport_ops = {
diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h
index 360ccbd95566..03b847ad8937 100644
--- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h
+++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h
@@ -59,6 +59,11 @@
 #define HSTMAXFRAME_LENGTH_ADDR		0x0020
 
 #define XG_MCX_RX_DV_GATE_REG_0_ADDR	0x0004
+#define XG_MCX_ECM_CFG_0_ADDR		0x0074
+#define XG_MCX_MULTI_DPF0_ADDR		0x007c
+#define XG_MCX_MULTI_DPF1_ADDR		0x0080
+#define XG_DEF_PAUSE_THRES		0x390
+#define XG_DEF_PAUSE_OFF_THRES		0x2c0
 #define XG_RSIF_CONFIG_REG_ADDR		0x00a0
 #define XCLE_BYPASS_REG0_ADDR           0x0160
 #define XCLE_BYPASS_REG1_ADDR           0x0164
@@ -70,6 +75,10 @@
 #define XG_ENET_SPARE_CFG_REG_ADDR	0x040c
 #define XG_ENET_SPARE_CFG_REG_1_ADDR	0x0410
 #define XGENET_RX_DV_GATE_REG_0_ADDR	0x0804
+#define XGENET_CSR_ECM_CFG_0_ADDR	0x0880
+#define XGENET_CSR_MULTI_DPF0_ADDR	0x0888
+#define XGENET_CSR_MULTI_DPF1_ADDR	0x088c
+#define XG_RXBUF_PAUSE_THRESH		0x0020
 #define XG_MCX_ICM_CONFIG0_REG_0_ADDR	0x00e0
 #define XG_MCX_ICM_CONFIG2_REG_0_ADDR	0x00e8
 
diff --git a/drivers/net/ethernet/apple/bmac.c b/drivers/net/ethernet/apple/bmac.c
index a65d7a60f116..2b2d87089987 100644
--- a/drivers/net/ethernet/apple/bmac.c
+++ b/drivers/net/ethernet/apple/bmac.c
@@ -1237,7 +1237,6 @@ static const struct net_device_ops bmac_netdev_ops = {
 	.ndo_start_xmit		= bmac_output,
 	.ndo_set_rx_mode	= bmac_set_multicast,
 	.ndo_set_mac_address	= bmac_set_address,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 };
 
diff --git a/drivers/net/ethernet/apple/mace.c b/drivers/net/ethernet/apple/mace.c
index e58a7c73766e..96dd5300e0e5 100644
--- a/drivers/net/ethernet/apple/mace.c
+++ b/drivers/net/ethernet/apple/mace.c
@@ -102,7 +102,6 @@ static const struct net_device_ops mace_netdev_ops = {
 	.ndo_start_xmit		= mace_xmit_start,
 	.ndo_set_rx_mode	= mace_set_multicast,
 	.ndo_set_mac_address	= mace_set_address,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 };
 
diff --git a/drivers/net/ethernet/apple/macmace.c b/drivers/net/ethernet/apple/macmace.c
index 89914ca17a49..857df9c45f04 100644
--- a/drivers/net/ethernet/apple/macmace.c
+++ b/drivers/net/ethernet/apple/macmace.c
@@ -186,7 +186,6 @@ static const struct net_device_ops mace_netdev_ops = {
 	.ndo_tx_timeout		= mace_tx_timeout,
 	.ndo_set_rx_mode	= mace_set_multicast,
 	.ndo_set_mac_address	= mace_set_address,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 };
 
diff --git a/drivers/net/ethernet/arc/Kconfig b/drivers/net/ethernet/arc/Kconfig
index 689045186064..e743ddf46343 100644
--- a/drivers/net/ethernet/arc/Kconfig
+++ b/drivers/net/ethernet/arc/Kconfig
@@ -17,13 +17,14 @@ if NET_VENDOR_ARC
 
 config ARC_EMAC_CORE
 	tristate
+	depends on ARC || ARCH_ROCKCHIP || COMPILE_TEST
 	select MII
 	select PHYLIB
 
 config ARC_EMAC
 	tristate "ARC EMAC support"
 	select ARC_EMAC_CORE
-	depends on OF_IRQ && OF_NET && HAS_DMA
+	depends on OF_IRQ && OF_NET && HAS_DMA && (ARC || COMPILE_TEST)
 	---help---
 	  On some legacy ARC (Synopsys) FPGA boards such as ARCAngel4/ML50x
 	  non-standard on-chip ethernet device ARC EMAC 10/100 is used.
@@ -32,7 +33,7 @@ config ARC_EMAC
 config EMAC_ROCKCHIP
 	tristate "Rockchip EMAC support"
 	select ARC_EMAC_CORE
-	depends on OF_IRQ && OF_NET && REGULATOR && HAS_DMA
+	depends on OF_IRQ && OF_NET && REGULATOR && HAS_DMA && (ARCH_ROCKCHIP || COMPILE_TEST)
 	---help---
 	  Support for Rockchip RK3036/RK3066/RK3188 EMAC ethernet controllers.
 	  This selects Rockchip SoC glue layer support for the
diff --git a/drivers/net/ethernet/arc/emac_main.c b/drivers/net/ethernet/arc/emac_main.c
index be865b4dada2..abc9f2a59054 100644
--- a/drivers/net/ethernet/arc/emac_main.c
+++ b/drivers/net/ethernet/arc/emac_main.c
@@ -636,7 +636,7 @@ static int arc_emac_tx(struct sk_buff *skb, struct net_device *ndev)
 	if (unlikely(dma_mapping_error(&ndev->dev, addr))) {
 		stats->tx_dropped++;
 		stats->tx_errors++;
-		dev_kfree_skb(skb);
+		dev_kfree_skb_any(skb);
 		return NETDEV_TX_OK;
 	}
 	dma_unmap_addr_set(&priv->tx_buff[*txbd_curr], addr, addr);
diff --git a/drivers/net/ethernet/atheros/alx/alx.h b/drivers/net/ethernet/atheros/alx/alx.h
index 6cac919272ea..d4a409139ea2 100644
--- a/drivers/net/ethernet/atheros/alx/alx.h
+++ b/drivers/net/ethernet/atheros/alx/alx.h
@@ -50,6 +50,10 @@ struct alx_buffer {
 };
 
 struct alx_rx_queue {
+	struct net_device *netdev;
+	struct device *dev;
+	struct alx_napi *np;
+
 	struct alx_rrd *rrd;
 	dma_addr_t rrd_dma;
 
@@ -58,16 +62,26 @@ struct alx_rx_queue {
 
 	struct alx_buffer *bufs;
 
+	u16 count;
 	u16 write_idx, read_idx;
 	u16 rrd_read_idx;
+	u16 queue_idx;
 };
 #define ALX_RX_ALLOC_THRESH	32
 
 struct alx_tx_queue {
+	struct net_device *netdev;
+	struct device *dev;
+
 	struct alx_txd *tpd;
 	dma_addr_t tpd_dma;
+
 	struct alx_buffer *bufs;
+
+	u16 count;
 	u16 write_idx, read_idx;
+	u16 queue_idx;
+	u16 p_reg, c_reg;
 };
 
 #define ALX_DEFAULT_TX_WORK 128
@@ -76,6 +90,18 @@ enum alx_device_quirks {
 	ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG = BIT(0),
 };
 
+struct alx_napi {
+	struct napi_struct	napi;
+	struct alx_priv		*alx;
+	struct alx_rx_queue	*rxq;
+	struct alx_tx_queue	*txq;
+	int			vec_idx;
+	u32			vec_mask;
+	char			irq_lbl[IFNAMSIZ + 8];
+};
+
+#define ALX_MAX_NAPIS 8
+
 #define ALX_FLAG_USING_MSIX	BIT(0)
 #define ALX_FLAG_USING_MSI	BIT(1)
 
@@ -87,7 +113,6 @@ struct alx_priv {
 	/* msi-x vectors */
 	int num_vec;
 	struct msix_entry *msix_entries;
-	char irq_lbl[IFNAMSIZ + 8];
 
 	/* all descriptor memory */
 	struct {
@@ -96,6 +121,11 @@ struct alx_priv {
 		unsigned int size;
 	} descmem;
 
+	struct alx_napi *qnapi[ALX_MAX_NAPIS];
+	int num_txq;
+	int num_rxq;
+	int num_napi;
+
 	/* protect int_mask updates */
 	spinlock_t irq_lock;
 	u32 int_mask;
@@ -104,10 +134,6 @@ struct alx_priv {
 	unsigned int rx_ringsz;
 	unsigned int rxbuf_size;
 
-	struct napi_struct napi;
-	struct alx_tx_queue txq;
-	struct alx_rx_queue rxq;
-
 	struct work_struct link_check_wk;
 	struct work_struct reset_wk;
 
diff --git a/drivers/net/ethernet/atheros/alx/ethtool.c b/drivers/net/ethernet/atheros/alx/ethtool.c
index 08e22df2a300..2f4eabf652e8 100644
--- a/drivers/net/ethernet/atheros/alx/ethtool.c
+++ b/drivers/net/ethernet/atheros/alx/ethtool.c
@@ -125,64 +125,75 @@ static u32 alx_get_supported_speeds(struct alx_hw *hw)
 	return supported;
 }
 
-static int alx_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
+static int alx_get_link_ksettings(struct net_device *netdev,
+				  struct ethtool_link_ksettings *cmd)
 {
 	struct alx_priv *alx = netdev_priv(netdev);
 	struct alx_hw *hw = &alx->hw;
+	u32 supported, advertising;
 
-	ecmd->supported = SUPPORTED_Autoneg |
+	supported = SUPPORTED_Autoneg |
 			  SUPPORTED_TP |
 			  SUPPORTED_Pause |
 			  SUPPORTED_Asym_Pause;
 	if (alx_hw_giga(hw))
-		ecmd->supported |= SUPPORTED_1000baseT_Full;
-	ecmd->supported |= alx_get_supported_speeds(hw);
+		supported |= SUPPORTED_1000baseT_Full;
+	supported |= alx_get_supported_speeds(hw);
 
-	ecmd->advertising = ADVERTISED_TP;
+	advertising = ADVERTISED_TP;
 	if (hw->adv_cfg & ADVERTISED_Autoneg)
-		ecmd->advertising |= hw->adv_cfg;
+		advertising |= hw->adv_cfg;
 
-	ecmd->port = PORT_TP;
-	ecmd->phy_address = 0;
+	cmd->base.port = PORT_TP;
+	cmd->base.phy_address = 0;
 
 	if (hw->adv_cfg & ADVERTISED_Autoneg)
-		ecmd->autoneg = AUTONEG_ENABLE;
+		cmd->base.autoneg = AUTONEG_ENABLE;
 	else
-		ecmd->autoneg = AUTONEG_DISABLE;
-	ecmd->transceiver = XCVR_INTERNAL;
+		cmd->base.autoneg = AUTONEG_DISABLE;
 
 	if (hw->flowctrl & ALX_FC_ANEG && hw->adv_cfg & ADVERTISED_Autoneg) {
 		if (hw->flowctrl & ALX_FC_RX) {
-			ecmd->advertising |= ADVERTISED_Pause;
+			advertising |= ADVERTISED_Pause;
 
 			if (!(hw->flowctrl & ALX_FC_TX))
-				ecmd->advertising |= ADVERTISED_Asym_Pause;
+				advertising |= ADVERTISED_Asym_Pause;
 		} else if (hw->flowctrl & ALX_FC_TX) {
-			ecmd->advertising |= ADVERTISED_Asym_Pause;
+			advertising |= ADVERTISED_Asym_Pause;
 		}
 	}
 
-	ethtool_cmd_speed_set(ecmd, hw->link_speed);
-	ecmd->duplex = hw->duplex;
+	cmd->base.speed = hw->link_speed;
+	cmd->base.duplex = hw->duplex;
+
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						supported);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+						advertising);
 
 	return 0;
 }
 
-static int alx_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
+static int alx_set_link_ksettings(struct net_device *netdev,
+				  const struct ethtool_link_ksettings *cmd)
 {
 	struct alx_priv *alx = netdev_priv(netdev);
 	struct alx_hw *hw = &alx->hw;
 	u32 adv_cfg;
+	u32 advertising;
 
 	ASSERT_RTNL();
 
-	if (ecmd->autoneg == AUTONEG_ENABLE) {
-		if (ecmd->advertising & ~alx_get_supported_speeds(hw))
+	ethtool_convert_link_mode_to_legacy_u32(&advertising,
+						cmd->link_modes.advertising);
+
+	if (cmd->base.autoneg == AUTONEG_ENABLE) {
+		if (advertising & ~alx_get_supported_speeds(hw))
 			return -EINVAL;
-		adv_cfg = ecmd->advertising | ADVERTISED_Autoneg;
+		adv_cfg = advertising | ADVERTISED_Autoneg;
 	} else {
-		adv_cfg = alx_speed_to_ethadv(ethtool_cmd_speed(ecmd),
-					      ecmd->duplex);
+		adv_cfg = alx_speed_to_ethadv(cmd->base.speed,
+					      cmd->base.duplex);
 
 		if (!adv_cfg || adv_cfg == ADVERTISED_1000baseT_Full)
 			return -EINVAL;
@@ -300,8 +311,6 @@ static int alx_get_sset_count(struct net_device *netdev, int sset)
 }
 
 const struct ethtool_ops alx_ethtool_ops = {
-	.get_settings	= alx_get_settings,
-	.set_settings	= alx_set_settings,
 	.get_pauseparam	= alx_get_pauseparam,
 	.set_pauseparam	= alx_set_pauseparam,
 	.get_msglevel	= alx_get_msglevel,
@@ -310,4 +319,6 @@ const struct ethtool_ops alx_ethtool_ops = {
 	.get_strings	= alx_get_strings,
 	.get_sset_count	= alx_get_sset_count,
 	.get_ethtool_stats	= alx_get_ethtool_stats,
+	.get_link_ksettings	= alx_get_link_ksettings,
+	.set_link_ksettings	= alx_set_link_ksettings,
 };
diff --git a/drivers/net/ethernet/atheros/alx/hw.h b/drivers/net/ethernet/atheros/alx/hw.h
index 0191477ace51..e42d7e0947eb 100644
--- a/drivers/net/ethernet/atheros/alx/hw.h
+++ b/drivers/net/ethernet/atheros/alx/hw.h
@@ -351,7 +351,6 @@ struct alx_rrd {
 #define ALX_MAX_JUMBO_PKT_SIZE	(9*1024)
 #define ALX_MAX_TSO_PKT_SIZE	(7*1024)
 #define ALX_MAX_FRAME_SIZE	ALX_MAX_JUMBO_PKT_SIZE
-#define ALX_MIN_FRAME_SIZE	(ETH_ZLEN + ETH_FCS_LEN + VLAN_HLEN)
 
 #define ALX_MAX_RX_QUEUES	8
 #define ALX_MAX_TX_QUEUES	4
diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index c0f84b73574d..c8f525574d68 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -51,16 +51,12 @@
 
 const char alx_drv_name[] = "alx";
 
-static bool msix = false;
-module_param(msix, bool, 0);
-MODULE_PARM_DESC(msix, "Enable msi-x interrupt support");
-
-static void alx_free_txbuf(struct alx_priv *alx, int entry)
+static void alx_free_txbuf(struct alx_tx_queue *txq, int entry)
 {
-	struct alx_buffer *txb = &alx->txq.bufs[entry];
+	struct alx_buffer *txb = &txq->bufs[entry];
 
 	if (dma_unmap_len(txb, size)) {
-		dma_unmap_single(&alx->hw.pdev->dev,
+		dma_unmap_single(txq->dev,
 				 dma_unmap_addr(txb, dma),
 				 dma_unmap_len(txb, size),
 				 DMA_TO_DEVICE);
@@ -75,7 +71,7 @@ static void alx_free_txbuf(struct alx_priv *alx, int entry)
 
 static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp)
 {
-	struct alx_rx_queue *rxq = &alx->rxq;
+	struct alx_rx_queue *rxq = alx->qnapi[0]->rxq;
 	struct sk_buff *skb;
 	struct alx_buffer *cur_buf;
 	dma_addr_t dma;
@@ -143,24 +139,42 @@ static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp)
 	return count;
 }
 
-static inline int alx_tpd_avail(struct alx_priv *alx)
+static struct alx_tx_queue *alx_tx_queue_mapping(struct alx_priv *alx,
+						 struct sk_buff *skb)
 {
-	struct alx_tx_queue *txq = &alx->txq;
+	unsigned int r_idx = skb->queue_mapping;
+
+	if (r_idx >= alx->num_txq)
+		r_idx = r_idx % alx->num_txq;
 
+	return alx->qnapi[r_idx]->txq;
+}
+
+static struct netdev_queue *alx_get_tx_queue(const struct alx_tx_queue *txq)
+{
+	return netdev_get_tx_queue(txq->netdev, txq->queue_idx);
+}
+
+static inline int alx_tpd_avail(struct alx_tx_queue *txq)
+{
 	if (txq->write_idx >= txq->read_idx)
-		return alx->tx_ringsz + txq->read_idx - txq->write_idx - 1;
+		return txq->count + txq->read_idx - txq->write_idx - 1;
 	return txq->read_idx - txq->write_idx - 1;
 }
 
-static bool alx_clean_tx_irq(struct alx_priv *alx)
+static bool alx_clean_tx_irq(struct alx_tx_queue *txq)
 {
-	struct alx_tx_queue *txq = &alx->txq;
+	struct alx_priv *alx;
+	struct netdev_queue *tx_queue;
 	u16 hw_read_idx, sw_read_idx;
 	unsigned int total_bytes = 0, total_packets = 0;
 	int budget = ALX_DEFAULT_TX_WORK;
 
+	alx = netdev_priv(txq->netdev);
+	tx_queue = alx_get_tx_queue(txq);
+
 	sw_read_idx = txq->read_idx;
-	hw_read_idx = alx_read_mem16(&alx->hw, ALX_TPD_PRI0_CIDX);
+	hw_read_idx = alx_read_mem16(&alx->hw, txq->c_reg);
 
 	if (sw_read_idx != hw_read_idx) {
 		while (sw_read_idx != hw_read_idx && budget > 0) {
@@ -173,19 +187,19 @@ static bool alx_clean_tx_irq(struct alx_priv *alx)
 				budget--;
 			}
 
-			alx_free_txbuf(alx, sw_read_idx);
+			alx_free_txbuf(txq, sw_read_idx);
 
-			if (++sw_read_idx == alx->tx_ringsz)
+			if (++sw_read_idx == txq->count)
 				sw_read_idx = 0;
 		}
 		txq->read_idx = sw_read_idx;
 
-		netdev_completed_queue(alx->dev, total_packets, total_bytes);
+		netdev_tx_completed_queue(tx_queue, total_packets, total_bytes);
 	}
 
-	if (netif_queue_stopped(alx->dev) && netif_carrier_ok(alx->dev) &&
-	    alx_tpd_avail(alx) > alx->tx_ringsz/4)
-		netif_wake_queue(alx->dev);
+	if (netif_tx_queue_stopped(tx_queue) && netif_carrier_ok(alx->dev) &&
+	    alx_tpd_avail(txq) > txq->count / 4)
+		netif_tx_wake_queue(tx_queue);
 
 	return sw_read_idx == hw_read_idx;
 }
@@ -200,15 +214,17 @@ static void alx_schedule_reset(struct alx_priv *alx)
 	schedule_work(&alx->reset_wk);
 }
 
-static int alx_clean_rx_irq(struct alx_priv *alx, int budget)
+static int alx_clean_rx_irq(struct alx_rx_queue *rxq, int budget)
 {
-	struct alx_rx_queue *rxq = &alx->rxq;
+	struct alx_priv *alx;
 	struct alx_rrd *rrd;
 	struct alx_buffer *rxb;
 	struct sk_buff *skb;
 	u16 length, rfd_cleaned = 0;
 	int work = 0;
 
+	alx = netdev_priv(rxq->netdev);
+
 	while (work < budget) {
 		rrd = &rxq->rrd[rxq->rrd_read_idx];
 		if (!(rrd->word3 & cpu_to_le32(1 << RRD_UPDATED_SHIFT)))
@@ -224,7 +240,7 @@ static int alx_clean_rx_irq(struct alx_priv *alx, int budget)
 		}
 
 		rxb = &rxq->bufs[rxq->read_idx];
-		dma_unmap_single(&alx->hw.pdev->dev,
+		dma_unmap_single(rxq->dev,
 				 dma_unmap_addr(rxb, dma),
 				 dma_unmap_len(rxb, size),
 				 DMA_FROM_DEVICE);
@@ -242,7 +258,7 @@ static int alx_clean_rx_irq(struct alx_priv *alx, int budget)
 		length = ALX_GET_FIELD(le32_to_cpu(rrd->word3),
 				       RRD_PKTLEN) - ETH_FCS_LEN;
 		skb_put(skb, length);
-		skb->protocol = eth_type_trans(skb, alx->dev);
+		skb->protocol = eth_type_trans(skb, rxq->netdev);
 
 		skb_checksum_none_assert(skb);
 		if (alx->dev->features & NETIF_F_RXCSUM &&
@@ -259,13 +275,13 @@ static int alx_clean_rx_irq(struct alx_priv *alx, int budget)
 			}
 		}
 
-		napi_gro_receive(&alx->napi, skb);
+		napi_gro_receive(&rxq->np->napi, skb);
 		work++;
 
 next_pkt:
-		if (++rxq->read_idx == alx->rx_ringsz)
+		if (++rxq->read_idx == rxq->count)
 			rxq->read_idx = 0;
-		if (++rxq->rrd_read_idx == alx->rx_ringsz)
+		if (++rxq->rrd_read_idx == rxq->count)
 			rxq->rrd_read_idx = 0;
 
 		if (++rfd_cleaned > ALX_RX_ALLOC_THRESH)
@@ -280,23 +296,26 @@ next_pkt:
 
 static int alx_poll(struct napi_struct *napi, int budget)
 {
-	struct alx_priv *alx = container_of(napi, struct alx_priv, napi);
+	struct alx_napi *np = container_of(napi, struct alx_napi, napi);
+	struct alx_priv *alx = np->alx;
 	struct alx_hw *hw = &alx->hw;
 	unsigned long flags;
-	bool tx_complete;
-	int work;
+	bool tx_complete = true;
+	int work = 0;
 
-	tx_complete = alx_clean_tx_irq(alx);
-	work = alx_clean_rx_irq(alx, budget);
+	if (np->txq)
+		tx_complete = alx_clean_tx_irq(np->txq);
+	if (np->rxq)
+		work = alx_clean_rx_irq(np->rxq, budget);
 
 	if (!tx_complete || work == budget)
 		return budget;
 
-	napi_complete(&alx->napi);
+	napi_complete(&np->napi);
 
 	/* enable interrupt */
 	if (alx->flags & ALX_FLAG_USING_MSIX) {
-		alx_mask_msix(hw, 1, false);
+		alx_mask_msix(hw, np->vec_idx, false);
 	} else {
 		spin_lock_irqsave(&alx->irq_lock, flags);
 		alx->int_mask |= ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0;
@@ -350,7 +369,7 @@ static irqreturn_t alx_intr_handle(struct alx_priv *alx, u32 intr)
 		goto out;
 
 	if (intr & (ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0)) {
-		napi_schedule(&alx->napi);
+		napi_schedule(&alx->qnapi[0]->napi);
 		/* mask rx/tx interrupt, enable them when napi complete */
 		alx->int_mask &= ~ALX_ISR_ALL_QUEUES;
 		alx_write_mem32(hw, ALX_IMR, alx->int_mask);
@@ -365,15 +384,15 @@ static irqreturn_t alx_intr_handle(struct alx_priv *alx, u32 intr)
 
 static irqreturn_t alx_intr_msix_ring(int irq, void *data)
 {
-	struct alx_priv *alx = data;
-	struct alx_hw *hw = &alx->hw;
+	struct alx_napi *np = data;
+	struct alx_hw *hw = &np->alx->hw;
 
 	/* mask interrupt to ACK chip */
-	alx_mask_msix(hw, 1, true);
+	alx_mask_msix(hw, np->vec_idx, true);
 	/* clear interrupt status */
-	alx_write_mem32(hw, ALX_ISR, (ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0));
+	alx_write_mem32(hw, ALX_ISR, np->vec_mask);
 
-	napi_schedule(&alx->napi);
+	napi_schedule(&np->napi);
 
 	return IRQ_HANDLED;
 }
@@ -424,63 +443,79 @@ static irqreturn_t alx_intr_legacy(int irq, void *data)
 	return alx_intr_handle(alx, intr);
 }
 
+static const u16 txring_header_reg[] = {ALX_TPD_PRI0_ADDR_LO,
+					ALX_TPD_PRI1_ADDR_LO,
+					ALX_TPD_PRI2_ADDR_LO,
+					ALX_TPD_PRI3_ADDR_LO};
+
 static void alx_init_ring_ptrs(struct alx_priv *alx)
 {
 	struct alx_hw *hw = &alx->hw;
 	u32 addr_hi = ((u64)alx->descmem.dma) >> 32;
+	struct alx_napi *np;
+	int i;
+
+	for (i = 0; i < alx->num_napi; i++) {
+		np = alx->qnapi[i];
+		if (np->txq) {
+			np->txq->read_idx = 0;
+			np->txq->write_idx = 0;
+			alx_write_mem32(hw,
+					txring_header_reg[np->txq->queue_idx],
+					np->txq->tpd_dma);
+		}
+
+		if (np->rxq) {
+			np->rxq->read_idx = 0;
+			np->rxq->write_idx = 0;
+			np->rxq->rrd_read_idx = 0;
+			alx_write_mem32(hw, ALX_RRD_ADDR_LO, np->rxq->rrd_dma);
+			alx_write_mem32(hw, ALX_RFD_ADDR_LO, np->rxq->rfd_dma);
+		}
+	}
+
+	alx_write_mem32(hw, ALX_TX_BASE_ADDR_HI, addr_hi);
+	alx_write_mem32(hw, ALX_TPD_RING_SZ, alx->tx_ringsz);
 
-	alx->rxq.read_idx = 0;
-	alx->rxq.write_idx = 0;
-	alx->rxq.rrd_read_idx = 0;
 	alx_write_mem32(hw, ALX_RX_BASE_ADDR_HI, addr_hi);
-	alx_write_mem32(hw, ALX_RRD_ADDR_LO, alx->rxq.rrd_dma);
 	alx_write_mem32(hw, ALX_RRD_RING_SZ, alx->rx_ringsz);
-	alx_write_mem32(hw, ALX_RFD_ADDR_LO, alx->rxq.rfd_dma);
 	alx_write_mem32(hw, ALX_RFD_RING_SZ, alx->rx_ringsz);
 	alx_write_mem32(hw, ALX_RFD_BUF_SZ, alx->rxbuf_size);
 
-	alx->txq.read_idx = 0;
-	alx->txq.write_idx = 0;
-	alx_write_mem32(hw, ALX_TX_BASE_ADDR_HI, addr_hi);
-	alx_write_mem32(hw, ALX_TPD_PRI0_ADDR_LO, alx->txq.tpd_dma);
-	alx_write_mem32(hw, ALX_TPD_RING_SZ, alx->tx_ringsz);
-
 	/* load these pointers into the chip */
 	alx_write_mem32(hw, ALX_SRAM9, ALX_SRAM_LOAD_PTR);
 }
 
-static void alx_free_txring_buf(struct alx_priv *alx)
+static void alx_free_txring_buf(struct alx_tx_queue *txq)
 {
-	struct alx_tx_queue *txq = &alx->txq;
 	int i;
 
 	if (!txq->bufs)
 		return;
 
-	for (i = 0; i < alx->tx_ringsz; i++)
-		alx_free_txbuf(alx, i);
+	for (i = 0; i < txq->count; i++)
+		alx_free_txbuf(txq, i);
 
-	memset(txq->bufs, 0, alx->tx_ringsz * sizeof(struct alx_buffer));
-	memset(txq->tpd, 0, alx->tx_ringsz * sizeof(struct alx_txd));
+	memset(txq->bufs, 0, txq->count * sizeof(struct alx_buffer));
+	memset(txq->tpd, 0, txq->count * sizeof(struct alx_txd));
 	txq->write_idx = 0;
 	txq->read_idx = 0;
 
-	netdev_reset_queue(alx->dev);
+	netdev_tx_reset_queue(alx_get_tx_queue(txq));
 }
 
-static void alx_free_rxring_buf(struct alx_priv *alx)
+static void alx_free_rxring_buf(struct alx_rx_queue *rxq)
 {
-	struct alx_rx_queue *rxq = &alx->rxq;
 	struct alx_buffer *cur_buf;
 	u16 i;
 
-	if (rxq == NULL)
+	if (!rxq->bufs)
 		return;
 
-	for (i = 0; i < alx->rx_ringsz; i++) {
+	for (i = 0; i < rxq->count; i++) {
 		cur_buf = rxq->bufs + i;
 		if (cur_buf->skb) {
-			dma_unmap_single(&alx->hw.pdev->dev,
+			dma_unmap_single(rxq->dev,
 					 dma_unmap_addr(cur_buf, dma),
 					 dma_unmap_len(cur_buf, size),
 					 DMA_FROM_DEVICE);
@@ -498,8 +533,14 @@ static void alx_free_rxring_buf(struct alx_priv *alx)
 
 static void alx_free_buffers(struct alx_priv *alx)
 {
-	alx_free_txring_buf(alx);
-	alx_free_rxring_buf(alx);
+	int i;
+
+	for (i = 0; i < alx->num_txq; i++)
+		if (alx->qnapi[i] && alx->qnapi[i]->txq)
+			alx_free_txring_buf(alx->qnapi[i]->txq);
+
+	if (alx->qnapi[0] && alx->qnapi[0]->rxq)
+		alx_free_rxring_buf(alx->qnapi[0]->rxq);
 }
 
 static int alx_reinit_rings(struct alx_priv *alx)
@@ -573,19 +614,41 @@ static int alx_set_mac_address(struct net_device *netdev, void *data)
 	return 0;
 }
 
-static int alx_alloc_descriptors(struct alx_priv *alx)
+static int alx_alloc_tx_ring(struct alx_priv *alx, struct alx_tx_queue *txq,
+			     int offset)
 {
-	alx->txq.bufs = kcalloc(alx->tx_ringsz,
-				sizeof(struct alx_buffer),
-				GFP_KERNEL);
-	if (!alx->txq.bufs)
+	txq->bufs = kcalloc(txq->count, sizeof(struct alx_buffer), GFP_KERNEL);
+	if (!txq->bufs)
 		return -ENOMEM;
 
-	alx->rxq.bufs = kcalloc(alx->rx_ringsz,
-				sizeof(struct alx_buffer),
-				GFP_KERNEL);
-	if (!alx->rxq.bufs)
-		goto out_free;
+	txq->tpd = alx->descmem.virt + offset;
+	txq->tpd_dma = alx->descmem.dma + offset;
+	offset += sizeof(struct alx_txd) * txq->count;
+
+	return offset;
+}
+
+static int alx_alloc_rx_ring(struct alx_priv *alx, struct alx_rx_queue *rxq,
+			     int offset)
+{
+	rxq->bufs = kcalloc(rxq->count, sizeof(struct alx_buffer), GFP_KERNEL);
+	if (!rxq->bufs)
+		return -ENOMEM;
+
+	rxq->rrd = alx->descmem.virt + offset;
+	rxq->rrd_dma = alx->descmem.dma + offset;
+	offset += sizeof(struct alx_rrd) * rxq->count;
+
+	rxq->rfd = alx->descmem.virt + offset;
+	rxq->rfd_dma = alx->descmem.dma + offset;
+	offset += sizeof(struct alx_rfd) * rxq->count;
+
+	return offset;
+}
+
+static int alx_alloc_rings(struct alx_priv *alx)
+{
+	int i, offset = 0;
 
 	/* physical tx/rx ring descriptors
 	 *
@@ -593,7 +656,8 @@ static int alx_alloc_descriptors(struct alx_priv *alx)
 	 * 4G boundary (hardware has a single register for high 32 bits
 	 * of addresses only)
 	 */
-	alx->descmem.size = sizeof(struct alx_txd) * alx->tx_ringsz +
+	alx->descmem.size = sizeof(struct alx_txd) * alx->tx_ringsz *
+			    alx->num_txq +
 			    sizeof(struct alx_rrd) * alx->rx_ringsz +
 			    sizeof(struct alx_rfd) * alx->rx_ringsz;
 	alx->descmem.virt = dma_zalloc_coherent(&alx->hw.pdev->dev,
@@ -601,87 +665,178 @@ static int alx_alloc_descriptors(struct alx_priv *alx)
 						&alx->descmem.dma,
 						GFP_KERNEL);
 	if (!alx->descmem.virt)
-		goto out_free;
-
-	alx->txq.tpd = alx->descmem.virt;
-	alx->txq.tpd_dma = alx->descmem.dma;
+		return -ENOMEM;
 
-	/* alignment requirement for next block */
+	/* alignment requirements */
 	BUILD_BUG_ON(sizeof(struct alx_txd) % 8);
+	BUILD_BUG_ON(sizeof(struct alx_rrd) % 8);
 
-	alx->rxq.rrd =
-		(void *)((u8 *)alx->descmem.virt +
-			 sizeof(struct alx_txd) * alx->tx_ringsz);
-	alx->rxq.rrd_dma = alx->descmem.dma +
-			   sizeof(struct alx_txd) * alx->tx_ringsz;
+	for (i = 0; i < alx->num_txq; i++) {
+		offset = alx_alloc_tx_ring(alx, alx->qnapi[i]->txq, offset);
+		if (offset < 0) {
+			netdev_err(alx->dev, "Allocation of tx buffer failed!\n");
+			return -ENOMEM;
+		}
+	}
 
-	/* alignment requirement for next block */
-	BUILD_BUG_ON(sizeof(struct alx_rrd) % 8);
+	offset = alx_alloc_rx_ring(alx, alx->qnapi[0]->rxq, offset);
+	if (offset < 0) {
+		netdev_err(alx->dev, "Allocation of rx buffer failed!\n");
+		return -ENOMEM;
+	}
 
-	alx->rxq.rfd =
-		(void *)((u8 *)alx->descmem.virt +
-			 sizeof(struct alx_txd) * alx->tx_ringsz +
-			 sizeof(struct alx_rrd) * alx->rx_ringsz);
-	alx->rxq.rfd_dma = alx->descmem.dma +
-			   sizeof(struct alx_txd) * alx->tx_ringsz +
-			   sizeof(struct alx_rrd) * alx->rx_ringsz;
+	alx_reinit_rings(alx);
 
 	return 0;
-out_free:
-	kfree(alx->txq.bufs);
-	kfree(alx->rxq.bufs);
-	return -ENOMEM;
 }
 
-static int alx_alloc_rings(struct alx_priv *alx)
+static void alx_free_rings(struct alx_priv *alx)
 {
-	int err;
+	int i;
 
-	err = alx_alloc_descriptors(alx);
-	if (err)
-		return err;
+	alx_free_buffers(alx);
 
-	alx->int_mask &= ~ALX_ISR_ALL_QUEUES;
-	alx->int_mask |= ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0;
+	for (i = 0; i < alx->num_txq; i++)
+		if (alx->qnapi[i] && alx->qnapi[i]->txq)
+			kfree(alx->qnapi[i]->txq->bufs);
 
-	netif_napi_add(alx->dev, &alx->napi, alx_poll, 64);
+	if (alx->qnapi[0] && alx->qnapi[0]->rxq)
+		kfree(alx->qnapi[0]->rxq->bufs);
 
-	alx_reinit_rings(alx);
-	return 0;
+	if (!alx->descmem.virt)
+		dma_free_coherent(&alx->hw.pdev->dev,
+				  alx->descmem.size,
+				  alx->descmem.virt,
+				  alx->descmem.dma);
 }
 
-static void alx_free_rings(struct alx_priv *alx)
+static void alx_free_napis(struct alx_priv *alx)
 {
-	netif_napi_del(&alx->napi);
-	alx_free_buffers(alx);
+	struct alx_napi *np;
+	int i;
 
-	kfree(alx->txq.bufs);
-	kfree(alx->rxq.bufs);
+	for (i = 0; i < alx->num_napi; i++) {
+		np = alx->qnapi[i];
+		if (!np)
+			continue;
 
-	dma_free_coherent(&alx->hw.pdev->dev,
-			  alx->descmem.size,
-			  alx->descmem.virt,
-			  alx->descmem.dma);
+		netif_napi_del(&np->napi);
+		kfree(np->txq);
+		kfree(np->rxq);
+		kfree(np);
+		alx->qnapi[i] = NULL;
+	}
 }
 
+static const u16 tx_pidx_reg[] = {ALX_TPD_PRI0_PIDX, ALX_TPD_PRI1_PIDX,
+				  ALX_TPD_PRI2_PIDX, ALX_TPD_PRI3_PIDX};
+static const u16 tx_cidx_reg[] = {ALX_TPD_PRI0_CIDX, ALX_TPD_PRI1_CIDX,
+				  ALX_TPD_PRI2_CIDX, ALX_TPD_PRI3_CIDX};
+static const u32 tx_vect_mask[] = {ALX_ISR_TX_Q0, ALX_ISR_TX_Q1,
+				   ALX_ISR_TX_Q2, ALX_ISR_TX_Q3};
+static const u32 rx_vect_mask[] = {ALX_ISR_RX_Q0, ALX_ISR_RX_Q1,
+				   ALX_ISR_RX_Q2, ALX_ISR_RX_Q3,
+				   ALX_ISR_RX_Q4, ALX_ISR_RX_Q5,
+				   ALX_ISR_RX_Q6, ALX_ISR_RX_Q7};
+
+static int alx_alloc_napis(struct alx_priv *alx)
+{
+	struct alx_napi *np;
+	struct alx_rx_queue *rxq;
+	struct alx_tx_queue *txq;
+	int i;
+
+	alx->int_mask &= ~ALX_ISR_ALL_QUEUES;
+
+	/* allocate alx_napi structures */
+	for (i = 0; i < alx->num_napi; i++) {
+		np = kzalloc(sizeof(struct alx_napi), GFP_KERNEL);
+		if (!np)
+			goto err_out;
+
+		np->alx = alx;
+		netif_napi_add(alx->dev, &np->napi, alx_poll, 64);
+		alx->qnapi[i] = np;
+	}
+
+	/* allocate tx queues */
+	for (i = 0; i < alx->num_txq; i++) {
+		np = alx->qnapi[i];
+		txq = kzalloc(sizeof(*txq), GFP_KERNEL);
+		if (!txq)
+			goto err_out;
+
+		np->txq = txq;
+		txq->p_reg = tx_pidx_reg[i];
+		txq->c_reg = tx_cidx_reg[i];
+		txq->queue_idx = i;
+		txq->count = alx->tx_ringsz;
+		txq->netdev = alx->dev;
+		txq->dev = &alx->hw.pdev->dev;
+		np->vec_mask |= tx_vect_mask[i];
+		alx->int_mask |= tx_vect_mask[i];
+	}
+
+	/* allocate rx queues */
+	np = alx->qnapi[0];
+	rxq = kzalloc(sizeof(*rxq), GFP_KERNEL);
+	if (!rxq)
+		goto err_out;
+
+	np->rxq = rxq;
+	rxq->np = alx->qnapi[0];
+	rxq->queue_idx = 0;
+	rxq->count = alx->rx_ringsz;
+	rxq->netdev = alx->dev;
+	rxq->dev = &alx->hw.pdev->dev;
+	np->vec_mask |= rx_vect_mask[0];
+	alx->int_mask |= rx_vect_mask[0];
+
+	return 0;
+
+err_out:
+	netdev_err(alx->dev, "error allocating internal structures\n");
+	alx_free_napis(alx);
+	return -ENOMEM;
+}
+
+static const int txq_vec_mapping_shift[] = {
+	0, ALX_MSI_MAP_TBL1_TXQ0_SHIFT,
+	0, ALX_MSI_MAP_TBL1_TXQ1_SHIFT,
+	1, ALX_MSI_MAP_TBL2_TXQ2_SHIFT,
+	1, ALX_MSI_MAP_TBL2_TXQ3_SHIFT,
+};
+
 static void alx_config_vector_mapping(struct alx_priv *alx)
 {
 	struct alx_hw *hw = &alx->hw;
-	u32 tbl = 0;
+	u32 tbl[2] = {0, 0};
+	int i, vector, idx, shift;
 
 	if (alx->flags & ALX_FLAG_USING_MSIX) {
-		tbl |= 1 << ALX_MSI_MAP_TBL1_TXQ0_SHIFT;
-		tbl |= 1 << ALX_MSI_MAP_TBL1_RXQ0_SHIFT;
+		/* tx mappings */
+		for (i = 0, vector = 1; i < alx->num_txq; i++, vector++) {
+			idx = txq_vec_mapping_shift[i * 2];
+			shift = txq_vec_mapping_shift[i * 2 + 1];
+			tbl[idx] |= vector << shift;
+		}
+
+		/* rx mapping */
+		tbl[0] |= 1 << ALX_MSI_MAP_TBL1_RXQ0_SHIFT;
 	}
 
-	alx_write_mem32(hw, ALX_MSI_MAP_TBL1, tbl);
-	alx_write_mem32(hw, ALX_MSI_MAP_TBL2, 0);
+	alx_write_mem32(hw, ALX_MSI_MAP_TBL1, tbl[0]);
+	alx_write_mem32(hw, ALX_MSI_MAP_TBL2, tbl[1]);
 	alx_write_mem32(hw, ALX_MSI_ID_MAP, 0);
 }
 
 static bool alx_enable_msix(struct alx_priv *alx)
 {
-	int i, err, num_vec = 2;
+	int i, err, num_vec, num_txq, num_rxq;
+
+	num_txq = min_t(int, num_online_cpus(), ALX_MAX_TX_QUEUES);
+	num_rxq = 1;
+	num_vec = max_t(int, num_txq, num_rxq) + 1;
 
 	alx->msix_entries = kcalloc(num_vec, sizeof(struct msix_entry),
 				    GFP_KERNEL);
@@ -701,6 +856,10 @@ static bool alx_enable_msix(struct alx_priv *alx)
 	}
 
 	alx->num_vec = num_vec;
+	alx->num_napi = num_vec - 1;
+	alx->num_txq = num_txq;
+	alx->num_rxq = num_rxq;
+
 	return true;
 }
 
@@ -714,14 +873,29 @@ static int alx_request_msix(struct alx_priv *alx)
 	if (err)
 		goto out_err;
 
-	vector++;
-	sprintf(alx->irq_lbl, "%s-TxRx-0", netdev->name);
-
-	err = request_irq(alx->msix_entries[vector].vector,
-			  alx_intr_msix_ring, 0, alx->irq_lbl, alx);
+	for (i = 0; i < alx->num_napi; i++) {
+		struct alx_napi *np = alx->qnapi[i];
+
+		vector++;
+
+		if (np->txq && np->rxq)
+			sprintf(np->irq_lbl, "%s-TxRx-%u", netdev->name,
+				np->txq->queue_idx);
+		else if (np->txq)
+			sprintf(np->irq_lbl, "%s-tx-%u", netdev->name,
+				np->txq->queue_idx);
+		else if (np->rxq)
+			sprintf(np->irq_lbl, "%s-rx-%u", netdev->name,
+				np->rxq->queue_idx);
+		else
+			sprintf(np->irq_lbl, "%s-unused", netdev->name);
+
+		np->vec_idx = vector;
+		err = request_irq(alx->msix_entries[vector].vector,
+				  alx_intr_msix_ring, 0, np->irq_lbl, np);
 		if (err)
 			goto out_free;
-
+	}
 	return 0;
 
 out_free:
@@ -729,7 +903,8 @@ out_free:
 
 	vector--;
 	for (i = 0; i < vector; i++)
-		free_irq(alx->msix_entries[free_vector++].vector, alx);
+		free_irq(alx->msix_entries[free_vector++].vector,
+			 alx->qnapi[i]);
 
 out_err:
 	return err;
@@ -744,6 +919,9 @@ static void alx_init_intr(struct alx_priv *alx, bool msix)
 
 	if (!(alx->flags & ALX_FLAG_USING_MSIX)) {
 		alx->num_vec = 1;
+		alx->num_napi = 1;
+		alx->num_txq = 1;
+		alx->num_rxq = 1;
 
 		if (!pci_enable_msi(alx->hw.pdev))
 			alx->flags |= ALX_FLAG_USING_MSI;
@@ -799,6 +977,25 @@ static void alx_irq_disable(struct alx_priv *alx)
 	}
 }
 
+static int alx_realloc_resources(struct alx_priv *alx)
+{
+	int err;
+
+	alx_free_rings(alx);
+	alx_free_napis(alx);
+	alx_disable_advanced_intr(alx);
+
+	err = alx_alloc_napis(alx);
+	if (err)
+		return err;
+
+	err = alx_alloc_rings(alx);
+	if (err)
+		return err;
+
+	return 0;
+}
+
 static int alx_request_irq(struct alx_priv *alx)
 {
 	struct pci_dev *pdev = alx->hw.pdev;
@@ -815,8 +1012,9 @@ static int alx_request_irq(struct alx_priv *alx)
 			goto out;
 
 		/* msix request failed, realloc resources */
-		alx_disable_advanced_intr(alx);
-		alx_init_intr(alx, false);
+		err = alx_realloc_resources(alx);
+		if (err)
+			goto out;
 	}
 
 	if (alx->flags & ALX_FLAG_USING_MSI) {
@@ -845,12 +1043,13 @@ out:
 static void alx_free_irq(struct alx_priv *alx)
 {
 	struct pci_dev *pdev = alx->hw.pdev;
-	int i;
+	int i, vector = 0;
 
 	if (alx->flags & ALX_FLAG_USING_MSIX) {
-		/* we have only 2 vectors without multi queue support */
-		for (i = 0; i < 2; i++)
-			free_irq(alx->msix_entries[i].vector, alx);
+		free_irq(alx->msix_entries[vector++].vector, alx);
+		for (i = 0; i < alx->num_napi; i++)
+			free_irq(alx->msix_entries[vector++].vector,
+				 alx->qnapi[i]);
 	} else {
 		free_irq(pdev->irq, alx);
 	}
@@ -892,6 +1091,9 @@ static int alx_init_sw(struct alx_priv *alx)
 	hw->smb_timer = 400;
 	hw->mtu = alx->dev->mtu;
 	alx->rxbuf_size = ALX_MAX_FRAME_LEN(hw->mtu);
+	/* MTU range: 34 - 9256 */
+	alx->dev->min_mtu = 34;
+	alx->dev->max_mtu = ALX_MAX_FRAME_LEN(ALX_MAX_FRAME_SIZE);
 	alx->tx_ringsz = 256;
 	alx->rx_ringsz = 512;
 	hw->imt = 200;
@@ -932,11 +1134,14 @@ static netdev_features_t alx_fix_features(struct net_device *netdev,
 
 static void alx_netif_stop(struct alx_priv *alx)
 {
+	int i;
+
 	netif_trans_update(alx->dev);
 	if (netif_carrier_ok(alx->dev)) {
 		netif_carrier_off(alx->dev);
 		netif_tx_disable(alx->dev);
-		napi_disable(&alx->napi);
+		for (i = 0; i < alx->num_napi; i++)
+			napi_disable(&alx->qnapi[i]->napi);
 	}
 }
 
@@ -994,13 +1199,6 @@ static int alx_change_mtu(struct net_device *netdev, int mtu)
 	struct alx_priv *alx = netdev_priv(netdev);
 	int max_frame = ALX_MAX_FRAME_LEN(mtu);
 
-	if ((max_frame < ALX_MIN_FRAME_SIZE) ||
-	    (max_frame > ALX_MAX_FRAME_SIZE))
-		return -EINVAL;
-
-	if (netdev->mtu == mtu)
-		return 0;
-
 	netdev->mtu = mtu;
 	alx->hw.mtu = mtu;
 	alx->rxbuf_size = max(max_frame, ALX_DEF_RXBUF_SIZE);
@@ -1012,8 +1210,11 @@ static int alx_change_mtu(struct net_device *netdev, int mtu)
 
 static void alx_netif_start(struct alx_priv *alx)
 {
+	int i;
+
 	netif_tx_wake_all_queues(alx->dev);
-	napi_enable(&alx->napi);
+	for (i = 0; i < alx->num_napi; i++)
+		napi_enable(&alx->qnapi[i]->napi);
 	netif_carrier_on(alx->dev);
 }
 
@@ -1021,21 +1222,28 @@ static int __alx_open(struct alx_priv *alx, bool resume)
 {
 	int err;
 
-	alx_init_intr(alx, msix);
+	alx_init_intr(alx, true);
 
 	if (!resume)
 		netif_carrier_off(alx->dev);
 
-	err = alx_alloc_rings(alx);
+	err = alx_alloc_napis(alx);
 	if (err)
 		goto out_disable_adv_intr;
 
+	err = alx_alloc_rings(alx);
+	if (err)
+		goto out_free_rings;
+
 	alx_configure(alx);
 
 	err = alx_request_irq(alx);
 	if (err)
 		goto out_free_rings;
 
+	netif_set_real_num_tx_queues(alx->dev, alx->num_txq);
+	netif_set_real_num_rx_queues(alx->dev, alx->num_rxq);
+
 	/* clear old interrupts */
 	alx_write_mem32(&alx->hw, ALX_ISR, ~(u32)ALX_ISR_DIS);
 
@@ -1049,6 +1257,7 @@ static int __alx_open(struct alx_priv *alx, bool resume)
 
 out_free_rings:
 	alx_free_rings(alx);
+	alx_free_napis(alx);
 out_disable_adv_intr:
 	alx_disable_advanced_intr(alx);
 	return err;
@@ -1059,6 +1268,7 @@ static void __alx_stop(struct alx_priv *alx)
 	alx_halt(alx);
 	alx_free_irq(alx);
 	alx_free_rings(alx);
+	alx_free_napis(alx);
 }
 
 static const char *alx_speed_desc(struct alx_hw *hw)
@@ -1241,9 +1451,8 @@ static int alx_tso(struct sk_buff *skb, struct alx_txd *first)
 	return 1;
 }
 
-static int alx_map_tx_skb(struct alx_priv *alx, struct sk_buff *skb)
+static int alx_map_tx_skb(struct alx_tx_queue *txq, struct sk_buff *skb)
 {
-	struct alx_tx_queue *txq = &alx->txq;
 	struct alx_txd *tpd, *first_tpd;
 	dma_addr_t dma;
 	int maplen, f, first_idx = txq->write_idx;
@@ -1252,7 +1461,7 @@ static int alx_map_tx_skb(struct alx_priv *alx, struct sk_buff *skb)
 	tpd = first_tpd;
 
 	if (tpd->word1 & (1 << TPD_LSO_V2_SHIFT)) {
-		if (++txq->write_idx == alx->tx_ringsz)
+		if (++txq->write_idx == txq->count)
 			txq->write_idx = 0;
 
 		tpd = &txq->tpd[txq->write_idx];
@@ -1262,9 +1471,9 @@ static int alx_map_tx_skb(struct alx_priv *alx, struct sk_buff *skb)
 	}
 
 	maplen = skb_headlen(skb);
-	dma = dma_map_single(&alx->hw.pdev->dev, skb->data, maplen,
+	dma = dma_map_single(txq->dev, skb->data, maplen,
 			     DMA_TO_DEVICE);
-	if (dma_mapping_error(&alx->hw.pdev->dev, dma))
+	if (dma_mapping_error(txq->dev, dma))
 		goto err_dma;
 
 	dma_unmap_len_set(&txq->bufs[txq->write_idx], size, maplen);
@@ -1278,16 +1487,16 @@ static int alx_map_tx_skb(struct alx_priv *alx, struct sk_buff *skb)
 
 		frag = &skb_shinfo(skb)->frags[f];
 
-		if (++txq->write_idx == alx->tx_ringsz)
+		if (++txq->write_idx == txq->count)
 			txq->write_idx = 0;
 		tpd = &txq->tpd[txq->write_idx];
 
 		tpd->word1 = first_tpd->word1;
 
 		maplen = skb_frag_size(frag);
-		dma = skb_frag_dma_map(&alx->hw.pdev->dev, frag, 0,
+		dma = skb_frag_dma_map(txq->dev, frag, 0,
 				       maplen, DMA_TO_DEVICE);
-		if (dma_mapping_error(&alx->hw.pdev->dev, dma))
+		if (dma_mapping_error(txq->dev, dma))
 			goto err_dma;
 		dma_unmap_len_set(&txq->bufs[txq->write_idx], size, maplen);
 		dma_unmap_addr_set(&txq->bufs[txq->write_idx], dma, dma);
@@ -1300,7 +1509,7 @@ static int alx_map_tx_skb(struct alx_priv *alx, struct sk_buff *skb)
 	tpd->word1 |= cpu_to_le32(1 << TPD_EOP_SHIFT);
 	txq->bufs[txq->write_idx].skb = skb;
 
-	if (++txq->write_idx == alx->tx_ringsz)
+	if (++txq->write_idx == txq->count)
 		txq->write_idx = 0;
 
 	return 0;
@@ -1308,23 +1517,24 @@ static int alx_map_tx_skb(struct alx_priv *alx, struct sk_buff *skb)
 err_dma:
 	f = first_idx;
 	while (f != txq->write_idx) {
-		alx_free_txbuf(alx, f);
-		if (++f == alx->tx_ringsz)
+		alx_free_txbuf(txq, f);
+		if (++f == txq->count)
 			f = 0;
 	}
 	return -ENOMEM;
 }
 
-static netdev_tx_t alx_start_xmit(struct sk_buff *skb,
-				  struct net_device *netdev)
+static netdev_tx_t alx_start_xmit_ring(struct sk_buff *skb,
+				       struct alx_tx_queue *txq)
 {
-	struct alx_priv *alx = netdev_priv(netdev);
-	struct alx_tx_queue *txq = &alx->txq;
+	struct alx_priv *alx;
 	struct alx_txd *first;
 	int tso;
 
-	if (alx_tpd_avail(alx) < alx_tpd_req(skb)) {
-		netif_stop_queue(alx->dev);
+	alx = netdev_priv(txq->netdev);
+
+	if (alx_tpd_avail(txq) < alx_tpd_req(skb)) {
+		netif_tx_stop_queue(alx_get_tx_queue(txq));
 		goto drop;
 	}
 
@@ -1337,17 +1547,17 @@ static netdev_tx_t alx_start_xmit(struct sk_buff *skb,
 	else if (!tso && alx_tx_csum(skb, first))
 		goto drop;
 
-	if (alx_map_tx_skb(alx, skb) < 0)
+	if (alx_map_tx_skb(txq, skb) < 0)
 		goto drop;
 
-	netdev_sent_queue(alx->dev, skb->len);
+	netdev_tx_sent_queue(alx_get_tx_queue(txq), skb->len);
 
 	/* flush updates before updating hardware */
 	wmb();
-	alx_write_mem16(&alx->hw, ALX_TPD_PRI0_PIDX, txq->write_idx);
+	alx_write_mem16(&alx->hw, txq->p_reg, txq->write_idx);
 
-	if (alx_tpd_avail(alx) < alx->tx_ringsz/8)
-		netif_stop_queue(alx->dev);
+	if (alx_tpd_avail(txq) < txq->count / 8)
+		netif_tx_stop_queue(alx_get_tx_queue(txq));
 
 	return NETDEV_TX_OK;
 
@@ -1356,6 +1566,13 @@ drop:
 	return NETDEV_TX_OK;
 }
 
+static netdev_tx_t alx_start_xmit(struct sk_buff *skb,
+				  struct net_device *netdev)
+{
+	struct alx_priv *alx = netdev_priv(netdev);
+	return alx_start_xmit_ring(skb, alx_tx_queue_mapping(alx, skb));
+}
+
 static void alx_tx_timeout(struct net_device *dev)
 {
 	struct alx_priv *alx = netdev_priv(dev);
@@ -1413,10 +1630,12 @@ static int alx_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
 static void alx_poll_controller(struct net_device *netdev)
 {
 	struct alx_priv *alx = netdev_priv(netdev);
+	int i;
 
 	if (alx->flags & ALX_FLAG_USING_MSIX) {
 		alx_intr_msix_misc(0, alx);
-		alx_intr_msix_ring(0, alx);
+		for (i = 0; i < alx->num_txq; i++)
+			alx_intr_msix_ring(0, alx->qnapi[i]);
 	} else if (alx->flags & ALX_FLAG_USING_MSI)
 		alx_intr_msi(0, alx);
 	else
@@ -1533,7 +1752,8 @@ static int alx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto out_pci_release;
 	}
 
-	netdev = alloc_etherdev(sizeof(*alx));
+	netdev = alloc_etherdev_mqs(sizeof(*alx),
+				    ALX_MAX_TX_QUEUES, 1);
 	if (!netdev) {
 		err = -ENOMEM;
 		goto out_pci_release;
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c b/drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c
index 872b7abb0196..cfe86a20c899 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_ethtool.c
@@ -26,46 +26,52 @@
 
 #include "atl1c.h"
 
-static int atl1c_get_settings(struct net_device *netdev,
-			      struct ethtool_cmd *ecmd)
+static int atl1c_get_link_ksettings(struct net_device *netdev,
+				    struct ethtool_link_ksettings *cmd)
 {
 	struct atl1c_adapter *adapter = netdev_priv(netdev);
 	struct atl1c_hw *hw = &adapter->hw;
+	u32 supported, advertising;
 
-	ecmd->supported = (SUPPORTED_10baseT_Half  |
+	supported = (SUPPORTED_10baseT_Half  |
 			   SUPPORTED_10baseT_Full  |
 			   SUPPORTED_100baseT_Half |
 			   SUPPORTED_100baseT_Full |
 			   SUPPORTED_Autoneg       |
 			   SUPPORTED_TP);
 	if (hw->link_cap_flags & ATL1C_LINK_CAP_1000M)
-		ecmd->supported |= SUPPORTED_1000baseT_Full;
+		supported |= SUPPORTED_1000baseT_Full;
 
-	ecmd->advertising = ADVERTISED_TP;
+	advertising = ADVERTISED_TP;
 
-	ecmd->advertising |= hw->autoneg_advertised;
+	advertising |= hw->autoneg_advertised;
 
-	ecmd->port = PORT_TP;
-	ecmd->phy_address = 0;
-	ecmd->transceiver = XCVR_INTERNAL;
+	cmd->base.port = PORT_TP;
+	cmd->base.phy_address = 0;
 
 	if (adapter->link_speed != SPEED_0) {
-		ethtool_cmd_speed_set(ecmd, adapter->link_speed);
+		cmd->base.speed = adapter->link_speed;
 		if (adapter->link_duplex == FULL_DUPLEX)
-			ecmd->duplex = DUPLEX_FULL;
+			cmd->base.duplex = DUPLEX_FULL;
 		else
-			ecmd->duplex = DUPLEX_HALF;
+			cmd->base.duplex = DUPLEX_HALF;
 	} else {
-		ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN);
-		ecmd->duplex = DUPLEX_UNKNOWN;
+		cmd->base.speed = SPEED_UNKNOWN;
+		cmd->base.duplex = DUPLEX_UNKNOWN;
 	}
 
-	ecmd->autoneg = AUTONEG_ENABLE;
+	cmd->base.autoneg = AUTONEG_ENABLE;
+
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						supported);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+						advertising);
+
 	return 0;
 }
 
-static int atl1c_set_settings(struct net_device *netdev,
-			      struct ethtool_cmd *ecmd)
+static int atl1c_set_link_ksettings(struct net_device *netdev,
+				    const struct ethtool_link_ksettings *cmd)
 {
 	struct atl1c_adapter *adapter = netdev_priv(netdev);
 	struct atl1c_hw *hw = &adapter->hw;
@@ -74,12 +80,12 @@ static int atl1c_set_settings(struct net_device *netdev,
 	while (test_and_set_bit(__AT_RESETTING, &adapter->flags))
 		msleep(1);
 
-	if (ecmd->autoneg == AUTONEG_ENABLE) {
+	if (cmd->base.autoneg == AUTONEG_ENABLE) {
 		autoneg_advertised = ADVERTISED_Autoneg;
 	} else {
-		u32 speed = ethtool_cmd_speed(ecmd);
+		u32 speed = cmd->base.speed;
 		if (speed == SPEED_1000) {
-			if (ecmd->duplex != DUPLEX_FULL) {
+			if (cmd->base.duplex != DUPLEX_FULL) {
 				if (netif_msg_link(adapter))
 					dev_warn(&adapter->pdev->dev,
 						"1000M half is invalid\n");
@@ -88,12 +94,12 @@ static int atl1c_set_settings(struct net_device *netdev,
 			}
 			autoneg_advertised = ADVERTISED_1000baseT_Full;
 		} else if (speed == SPEED_100) {
-			if (ecmd->duplex == DUPLEX_FULL)
+			if (cmd->base.duplex == DUPLEX_FULL)
 				autoneg_advertised = ADVERTISED_100baseT_Full;
 			else
 				autoneg_advertised = ADVERTISED_100baseT_Half;
 		} else {
-			if (ecmd->duplex == DUPLEX_FULL)
+			if (cmd->base.duplex == DUPLEX_FULL)
 				autoneg_advertised = ADVERTISED_10baseT_Full;
 			else
 				autoneg_advertised = ADVERTISED_10baseT_Half;
@@ -284,8 +290,6 @@ static int atl1c_nway_reset(struct net_device *netdev)
 }
 
 static const struct ethtool_ops atl1c_ethtool_ops = {
-	.get_settings           = atl1c_get_settings,
-	.set_settings           = atl1c_set_settings,
 	.get_drvinfo            = atl1c_get_drvinfo,
 	.get_regs_len           = atl1c_get_regs_len,
 	.get_regs               = atl1c_get_regs,
@@ -297,6 +301,8 @@ static const struct ethtool_ops atl1c_ethtool_ops = {
 	.get_link               = ethtool_op_get_link,
 	.get_eeprom_len         = atl1c_get_eeprom_len,
 	.get_eeprom             = atl1c_get_eeprom,
+	.get_link_ksettings     = atl1c_get_link_ksettings,
+	.set_link_ksettings     = atl1c_set_link_ksettings,
 };
 
 void atl1c_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index a3200ea6d765..773d3b7d8dd5 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -519,6 +519,26 @@ static int atl1c_set_features(struct net_device *netdev,
 	return 0;
 }
 
+static void atl1c_set_max_mtu(struct net_device *netdev)
+{
+	struct atl1c_adapter *adapter = netdev_priv(netdev);
+	struct atl1c_hw *hw = &adapter->hw;
+
+	switch (hw->nic_type) {
+	/* These (GbE) devices support jumbo packets, max_mtu 6122 */
+	case athr_l1c:
+	case athr_l1d:
+	case athr_l1d_2:
+		netdev->max_mtu = MAX_JUMBO_FRAME_SIZE -
+				  (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN);
+		break;
+	/* The 10/100 devices don't support jumbo packets, max_mtu 1500 */
+	default:
+		netdev->max_mtu = ETH_DATA_LEN;
+		break;
+	}
+}
+
 /**
  * atl1c_change_mtu - Change the Maximum Transfer Unit
  * @netdev: network interface device structure
@@ -529,22 +549,9 @@ static int atl1c_set_features(struct net_device *netdev,
 static int atl1c_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	struct atl1c_adapter *adapter = netdev_priv(netdev);
-	struct atl1c_hw *hw = &adapter->hw;
-	int old_mtu   = netdev->mtu;
-	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
-
-	/* Fast Ethernet controller doesn't support jumbo packet */
-	if (((hw->nic_type == athr_l2c ||
-	      hw->nic_type == athr_l2c_b ||
-	      hw->nic_type == athr_l2c_b2) && new_mtu > ETH_DATA_LEN) ||
-	      max_frame < ETH_ZLEN + ETH_FCS_LEN ||
-	      max_frame > MAX_JUMBO_FRAME_SIZE) {
-		if (netif_msg_link(adapter))
-			dev_warn(&adapter->pdev->dev, "invalid MTU setting\n");
-		return -EINVAL;
-	}
+
 	/* set MTU */
-	if (old_mtu != new_mtu && netif_running(netdev)) {
+	if (netif_running(netdev)) {
 		while (test_and_set_bit(__AT_RESETTING, &adapter->flags))
 			msleep(1);
 		netdev->mtu = new_mtu;
@@ -2511,6 +2518,7 @@ static int atl1c_init_netdev(struct net_device *netdev, struct pci_dev *pdev)
 
 	netdev->netdev_ops = &atl1c_netdev_ops;
 	netdev->watchdog_timeo = AT_TX_WATCHDOG;
+	netdev->min_mtu = ETH_ZLEN - (ETH_HLEN + VLAN_HLEN);
 	atl1c_set_ethtool_ops(netdev);
 
 	/* TODO: add when ready */
@@ -2613,6 +2621,9 @@ static int atl1c_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		dev_err(&pdev->dev, "net device private data init failed\n");
 		goto err_sw_init;
 	}
+	/* set max MTU */
+	atl1c_set_max_mtu(netdev);
+
 	atl1c_reset_pcie(&adapter->hw, ATL1C_PCIE_L0S_L1_DISABLE);
 
 	/* Init GPHY as early as possible due to power saving issue  */
diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c b/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c
index 8e3dbd4d9f79..cb489e7e8374 100644
--- a/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c
+++ b/drivers/net/ethernet/atheros/atl1e/atl1e_ethtool.c
@@ -26,73 +26,83 @@
 
 #include "atl1e.h"
 
-static int atl1e_get_settings(struct net_device *netdev,
-			      struct ethtool_cmd *ecmd)
+static int atl1e_get_link_ksettings(struct net_device *netdev,
+				    struct ethtool_link_ksettings *cmd)
 {
 	struct atl1e_adapter *adapter = netdev_priv(netdev);
 	struct atl1e_hw *hw = &adapter->hw;
+	u32 supported, advertising;
 
-	ecmd->supported = (SUPPORTED_10baseT_Half  |
+	supported = (SUPPORTED_10baseT_Half  |
 			   SUPPORTED_10baseT_Full  |
 			   SUPPORTED_100baseT_Half |
 			   SUPPORTED_100baseT_Full |
 			   SUPPORTED_Autoneg       |
 			   SUPPORTED_TP);
 	if (hw->nic_type == athr_l1e)
-		ecmd->supported |= SUPPORTED_1000baseT_Full;
+		supported |= SUPPORTED_1000baseT_Full;
 
-	ecmd->advertising = ADVERTISED_TP;
+	advertising = ADVERTISED_TP;
 
-	ecmd->advertising |= ADVERTISED_Autoneg;
-	ecmd->advertising |= hw->autoneg_advertised;
+	advertising |= ADVERTISED_Autoneg;
+	advertising |= hw->autoneg_advertised;
 
-	ecmd->port = PORT_TP;
-	ecmd->phy_address = 0;
-	ecmd->transceiver = XCVR_INTERNAL;
+	cmd->base.port = PORT_TP;
+	cmd->base.phy_address = 0;
 
 	if (adapter->link_speed != SPEED_0) {
-		ethtool_cmd_speed_set(ecmd, adapter->link_speed);
+		cmd->base.speed = adapter->link_speed;
 		if (adapter->link_duplex == FULL_DUPLEX)
-			ecmd->duplex = DUPLEX_FULL;
+			cmd->base.duplex = DUPLEX_FULL;
 		else
-			ecmd->duplex = DUPLEX_HALF;
+			cmd->base.duplex = DUPLEX_HALF;
 	} else {
-		ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN);
-		ecmd->duplex = DUPLEX_UNKNOWN;
+		cmd->base.speed = SPEED_UNKNOWN;
+		cmd->base.duplex = DUPLEX_UNKNOWN;
 	}
 
-	ecmd->autoneg = AUTONEG_ENABLE;
+	cmd->base.autoneg = AUTONEG_ENABLE;
+
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						supported);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+						advertising);
+
 	return 0;
 }
 
-static int atl1e_set_settings(struct net_device *netdev,
-			      struct ethtool_cmd *ecmd)
+static int atl1e_set_link_ksettings(struct net_device *netdev,
+				    const struct ethtool_link_ksettings *cmd)
 {
 	struct atl1e_adapter *adapter = netdev_priv(netdev);
 	struct atl1e_hw *hw = &adapter->hw;
+	u32 advertising;
+
+	ethtool_convert_link_mode_to_legacy_u32(&advertising,
+						cmd->link_modes.advertising);
 
 	while (test_and_set_bit(__AT_RESETTING, &adapter->flags))
 		msleep(1);
 
-	if (ecmd->autoneg == AUTONEG_ENABLE) {
+	if (cmd->base.autoneg == AUTONEG_ENABLE) {
 		u16 adv4, adv9;
 
-		if ((ecmd->advertising&ADVERTISE_1000_FULL)) {
+		if (advertising & ADVERTISE_1000_FULL) {
 			if (hw->nic_type == athr_l1e) {
 				hw->autoneg_advertised =
-					ecmd->advertising & AT_ADV_MASK;
+					advertising & AT_ADV_MASK;
 			} else {
 				clear_bit(__AT_RESETTING, &adapter->flags);
 				return -EINVAL;
 			}
-		} else if (ecmd->advertising&ADVERTISE_1000_HALF) {
+		} else if (advertising & ADVERTISE_1000_HALF) {
 			clear_bit(__AT_RESETTING, &adapter->flags);
 			return -EINVAL;
 		} else {
 			hw->autoneg_advertised =
-				ecmd->advertising & AT_ADV_MASK;
+				advertising & AT_ADV_MASK;
 		}
-		ecmd->advertising = hw->autoneg_advertised |
+		advertising = hw->autoneg_advertised |
 				    ADVERTISED_TP | ADVERTISED_Autoneg;
 
 		adv4 = hw->mii_autoneg_adv_reg & ~ADVERTISE_ALL;
@@ -367,8 +377,6 @@ static int atl1e_nway_reset(struct net_device *netdev)
 }
 
 static const struct ethtool_ops atl1e_ethtool_ops = {
-	.get_settings           = atl1e_get_settings,
-	.set_settings           = atl1e_set_settings,
 	.get_drvinfo            = atl1e_get_drvinfo,
 	.get_regs_len           = atl1e_get_regs_len,
 	.get_regs               = atl1e_get_regs,
@@ -380,6 +388,8 @@ static const struct ethtool_ops atl1e_ethtool_ops = {
 	.get_eeprom_len         = atl1e_get_eeprom_len,
 	.get_eeprom             = atl1e_get_eeprom,
 	.set_eeprom             = atl1e_set_eeprom,
+	.get_link_ksettings     = atl1e_get_link_ksettings,
+	.set_link_ksettings     = atl1e_set_link_ksettings,
 };
 
 void atl1e_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
index 974713b19ab6..e96091b652a7 100644
--- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
+++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
@@ -439,16 +439,10 @@ static int atl1e_set_features(struct net_device *netdev,
 static int atl1e_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	struct atl1e_adapter *adapter = netdev_priv(netdev);
-	int old_mtu   = netdev->mtu;
 	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
 
-	if ((max_frame < ETH_ZLEN + ETH_FCS_LEN) ||
-			(max_frame > MAX_JUMBO_FRAME_SIZE)) {
-		netdev_warn(adapter->netdev, "invalid MTU setting\n");
-		return -EINVAL;
-	}
 	/* set MTU */
-	if (old_mtu != new_mtu && netif_running(netdev)) {
+	if (netif_running(netdev)) {
 		while (test_and_set_bit(__AT_RESETTING, &adapter->flags))
 			msleep(1);
 		netdev->mtu = new_mtu;
@@ -2272,6 +2266,10 @@ static int atl1e_init_netdev(struct net_device *netdev, struct pci_dev *pdev)
 	netdev->netdev_ops = &atl1e_netdev_ops;
 
 	netdev->watchdog_timeo = AT_TX_WATCHDOG;
+	/* MTU range: 42 - 8170 */
+	netdev->min_mtu = ETH_ZLEN - (ETH_HLEN + VLAN_HLEN);
+	netdev->max_mtu = MAX_JUMBO_FRAME_SIZE -
+			  (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN);
 	atl1e_set_ethtool_ops(netdev);
 
 	netdev->hw_features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_TSO |
diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c
index 529bca718334..7dad8e4b9d2a 100644
--- a/drivers/net/ethernet/atheros/atlx/atl1.c
+++ b/drivers/net/ethernet/atheros/atlx/atl1.c
@@ -2701,23 +2701,15 @@ static void atl1_reset_dev_task(struct work_struct *work)
 static int atl1_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	struct atl1_adapter *adapter = netdev_priv(netdev);
-	int old_mtu = netdev->mtu;
 	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
 
-	if ((max_frame < ETH_ZLEN + ETH_FCS_LEN) ||
-	    (max_frame > MAX_JUMBO_FRAME_SIZE)) {
-		if (netif_msg_link(adapter))
-			dev_warn(&adapter->pdev->dev, "invalid MTU setting\n");
-		return -EINVAL;
-	}
-
 	adapter->hw.max_frame_size = max_frame;
 	adapter->hw.tx_jumbo_task_th = (max_frame + 7) >> 3;
 	adapter->rx_buffer_len = (max_frame + 7) & ~7;
 	adapter->hw.rx_jumbo_th = adapter->rx_buffer_len / 8;
 
 	netdev->mtu = new_mtu;
-	if ((old_mtu != new_mtu) && netif_running(netdev)) {
+	if (netif_running(netdev)) {
 		atl1_down(adapter);
 		atl1_up(adapter);
 	}
@@ -3031,6 +3023,11 @@ static int atl1_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	/* is this valid? see atl1_setup_mac_ctrl() */
 	netdev->features |= NETIF_F_RXCSUM;
 
+	/* MTU range: 42 - 10218 */
+	netdev->min_mtu = ETH_ZLEN - (ETH_HLEN + VLAN_HLEN);
+	netdev->max_mtu = MAX_JUMBO_FRAME_SIZE -
+			  (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN);
+
 	/*
 	 * patch for some L1 of old version,
 	 * the final version of L1 may not need these
@@ -3217,66 +3214,72 @@ static int atl1_get_sset_count(struct net_device *netdev, int sset)
 	}
 }
 
-static int atl1_get_settings(struct net_device *netdev,
-	struct ethtool_cmd *ecmd)
+static int atl1_get_link_ksettings(struct net_device *netdev,
+				   struct ethtool_link_ksettings *cmd)
 {
 	struct atl1_adapter *adapter = netdev_priv(netdev);
 	struct atl1_hw *hw = &adapter->hw;
+	u32 supported, advertising;
 
-	ecmd->supported = (SUPPORTED_10baseT_Half |
+	supported = (SUPPORTED_10baseT_Half |
 			   SUPPORTED_10baseT_Full |
 			   SUPPORTED_100baseT_Half |
 			   SUPPORTED_100baseT_Full |
 			   SUPPORTED_1000baseT_Full |
 			   SUPPORTED_Autoneg | SUPPORTED_TP);
-	ecmd->advertising = ADVERTISED_TP;
+	advertising = ADVERTISED_TP;
 	if (hw->media_type == MEDIA_TYPE_AUTO_SENSOR ||
 	    hw->media_type == MEDIA_TYPE_1000M_FULL) {
-		ecmd->advertising |= ADVERTISED_Autoneg;
+		advertising |= ADVERTISED_Autoneg;
 		if (hw->media_type == MEDIA_TYPE_AUTO_SENSOR) {
-			ecmd->advertising |= ADVERTISED_Autoneg;
-			ecmd->advertising |=
+			advertising |= ADVERTISED_Autoneg;
+			advertising |=
 			    (ADVERTISED_10baseT_Half |
 			     ADVERTISED_10baseT_Full |
 			     ADVERTISED_100baseT_Half |
 			     ADVERTISED_100baseT_Full |
 			     ADVERTISED_1000baseT_Full);
 		} else
-			ecmd->advertising |= (ADVERTISED_1000baseT_Full);
+			advertising |= (ADVERTISED_1000baseT_Full);
 	}
-	ecmd->port = PORT_TP;
-	ecmd->phy_address = 0;
-	ecmd->transceiver = XCVR_INTERNAL;
+	cmd->base.port = PORT_TP;
+	cmd->base.phy_address = 0;
 
 	if (netif_carrier_ok(adapter->netdev)) {
 		u16 link_speed, link_duplex;
 		atl1_get_speed_and_duplex(hw, &link_speed, &link_duplex);
-		ethtool_cmd_speed_set(ecmd, link_speed);
+		cmd->base.speed = link_speed;
 		if (link_duplex == FULL_DUPLEX)
-			ecmd->duplex = DUPLEX_FULL;
+			cmd->base.duplex = DUPLEX_FULL;
 		else
-			ecmd->duplex = DUPLEX_HALF;
+			cmd->base.duplex = DUPLEX_HALF;
 	} else {
-		ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN);
-		ecmd->duplex = DUPLEX_UNKNOWN;
+		cmd->base.speed = SPEED_UNKNOWN;
+		cmd->base.duplex = DUPLEX_UNKNOWN;
 	}
 	if (hw->media_type == MEDIA_TYPE_AUTO_SENSOR ||
 	    hw->media_type == MEDIA_TYPE_1000M_FULL)
-		ecmd->autoneg = AUTONEG_ENABLE;
+		cmd->base.autoneg = AUTONEG_ENABLE;
 	else
-		ecmd->autoneg = AUTONEG_DISABLE;
+		cmd->base.autoneg = AUTONEG_DISABLE;
+
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						supported);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+						advertising);
 
 	return 0;
 }
 
-static int atl1_set_settings(struct net_device *netdev,
-	struct ethtool_cmd *ecmd)
+static int atl1_set_link_ksettings(struct net_device *netdev,
+				   const struct ethtool_link_ksettings *cmd)
 {
 	struct atl1_adapter *adapter = netdev_priv(netdev);
 	struct atl1_hw *hw = &adapter->hw;
 	u16 phy_data;
 	int ret_val = 0;
 	u16 old_media_type = hw->media_type;
+	u32 advertising;
 
 	if (netif_running(adapter->netdev)) {
 		if (netif_msg_link(adapter))
@@ -3285,12 +3288,12 @@ static int atl1_set_settings(struct net_device *netdev,
 		atl1_down(adapter);
 	}
 
-	if (ecmd->autoneg == AUTONEG_ENABLE)
+	if (cmd->base.autoneg == AUTONEG_ENABLE)
 		hw->media_type = MEDIA_TYPE_AUTO_SENSOR;
 	else {
-		u32 speed = ethtool_cmd_speed(ecmd);
+		u32 speed = cmd->base.speed;
 		if (speed == SPEED_1000) {
-			if (ecmd->duplex != DUPLEX_FULL) {
+			if (cmd->base.duplex != DUPLEX_FULL) {
 				if (netif_msg_link(adapter))
 					dev_warn(&adapter->pdev->dev,
 						"1000M half is invalid\n");
@@ -3299,12 +3302,12 @@ static int atl1_set_settings(struct net_device *netdev,
 			}
 			hw->media_type = MEDIA_TYPE_1000M_FULL;
 		} else if (speed == SPEED_100) {
-			if (ecmd->duplex == DUPLEX_FULL)
+			if (cmd->base.duplex == DUPLEX_FULL)
 				hw->media_type = MEDIA_TYPE_100M_FULL;
 			else
 				hw->media_type = MEDIA_TYPE_100M_HALF;
 		} else {
-			if (ecmd->duplex == DUPLEX_FULL)
+			if (cmd->base.duplex == DUPLEX_FULL)
 				hw->media_type = MEDIA_TYPE_10M_FULL;
 			else
 				hw->media_type = MEDIA_TYPE_10M_HALF;
@@ -3312,7 +3315,7 @@ static int atl1_set_settings(struct net_device *netdev,
 	}
 	switch (hw->media_type) {
 	case MEDIA_TYPE_AUTO_SENSOR:
-		ecmd->advertising =
+		advertising =
 		    ADVERTISED_10baseT_Half |
 		    ADVERTISED_10baseT_Full |
 		    ADVERTISED_100baseT_Half |
@@ -3321,12 +3324,12 @@ static int atl1_set_settings(struct net_device *netdev,
 		    ADVERTISED_Autoneg | ADVERTISED_TP;
 		break;
 	case MEDIA_TYPE_1000M_FULL:
-		ecmd->advertising =
+		advertising =
 		    ADVERTISED_1000baseT_Full |
 		    ADVERTISED_Autoneg | ADVERTISED_TP;
 		break;
 	default:
-		ecmd->advertising = 0;
+		advertising = 0;
 		break;
 	}
 	if (atl1_phy_setup_autoneg_adv(hw)) {
@@ -3666,8 +3669,6 @@ static int atl1_nway_reset(struct net_device *netdev)
 }
 
 static const struct ethtool_ops atl1_ethtool_ops = {
-	.get_settings		= atl1_get_settings,
-	.set_settings		= atl1_set_settings,
 	.get_drvinfo		= atl1_get_drvinfo,
 	.get_wol		= atl1_get_wol,
 	.set_wol		= atl1_set_wol,
@@ -3684,6 +3685,8 @@ static const struct ethtool_ops atl1_ethtool_ops = {
 	.nway_reset		= atl1_nway_reset,
 	.get_ethtool_stats	= atl1_get_ethtool_stats,
 	.get_sset_count		= atl1_get_sset_count,
+	.get_link_ksettings	= atl1_get_link_ksettings,
+	.set_link_ksettings	= atl1_set_link_ksettings,
 };
 
 module_pci_driver(atl1_driver);
diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c
index 2ff465848b65..63f2deec2a52 100644
--- a/drivers/net/ethernet/atheros/atlx/atl2.c
+++ b/drivers/net/ethernet/atheros/atlx/atl2.c
@@ -253,7 +253,7 @@ static int atl2_configure(struct atl2_adapter *adapter)
 
 	/* set MTU */
 	ATL2_WRITE_REG(hw, REG_MTU, adapter->netdev->mtu +
-		ENET_HEADER_SIZE + VLAN_SIZE + ETHERNET_FCS_SIZE);
+		ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN);
 
 	/* 1590 */
 	ATL2_WRITE_REG(hw, REG_TX_CUT_THRESH, 0x177);
@@ -925,15 +925,11 @@ static int atl2_change_mtu(struct net_device *netdev, int new_mtu)
 	struct atl2_adapter *adapter = netdev_priv(netdev);
 	struct atl2_hw *hw = &adapter->hw;
 
-	if ((new_mtu < 40) || (new_mtu > (ETH_DATA_LEN + VLAN_SIZE)))
-		return -EINVAL;
-
 	/* set MTU */
-	if (hw->max_frame_size != new_mtu) {
-		netdev->mtu = new_mtu;
-		ATL2_WRITE_REG(hw, REG_MTU, new_mtu + ENET_HEADER_SIZE +
-			VLAN_SIZE + ETHERNET_FCS_SIZE);
-	}
+	netdev->mtu = new_mtu;
+	hw->max_frame_size = new_mtu;
+	ATL2_WRITE_REG(hw, REG_MTU, new_mtu + ETH_HLEN +
+		       VLAN_HLEN + ETH_FCS_LEN);
 
 	return 0;
 }
@@ -1398,6 +1394,8 @@ static int atl2_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	netdev->netdev_ops = &atl2_netdev_ops;
 	netdev->ethtool_ops = &atl2_ethtool_ops;
 	netdev->watchdog_timeo = 5 * HZ;
+	netdev->min_mtu = 40;
+	netdev->max_mtu = ETH_DATA_LEN + VLAN_HLEN;
 	strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
 
 	netdev->mem_start = mmio_start;
@@ -1739,81 +1737,87 @@ static void atl2_write_pci_cfg(struct atl2_hw *hw, u32 reg, u16 *value)
 	pci_write_config_word(adapter->pdev, reg, *value);
 }
 
-static int atl2_get_settings(struct net_device *netdev,
-	struct ethtool_cmd *ecmd)
+static int atl2_get_link_ksettings(struct net_device *netdev,
+				   struct ethtool_link_ksettings *cmd)
 {
 	struct atl2_adapter *adapter = netdev_priv(netdev);
 	struct atl2_hw *hw = &adapter->hw;
+	u32 supported, advertising;
 
-	ecmd->supported = (SUPPORTED_10baseT_Half |
+	supported = (SUPPORTED_10baseT_Half |
 		SUPPORTED_10baseT_Full |
 		SUPPORTED_100baseT_Half |
 		SUPPORTED_100baseT_Full |
 		SUPPORTED_Autoneg |
 		SUPPORTED_TP);
-	ecmd->advertising = ADVERTISED_TP;
+	advertising = ADVERTISED_TP;
 
-	ecmd->advertising |= ADVERTISED_Autoneg;
-	ecmd->advertising |= hw->autoneg_advertised;
+	advertising |= ADVERTISED_Autoneg;
+	advertising |= hw->autoneg_advertised;
 
-	ecmd->port = PORT_TP;
-	ecmd->phy_address = 0;
-	ecmd->transceiver = XCVR_INTERNAL;
+	cmd->base.port = PORT_TP;
+	cmd->base.phy_address = 0;
 
 	if (adapter->link_speed != SPEED_0) {
-		ethtool_cmd_speed_set(ecmd, adapter->link_speed);
+		cmd->base.speed = adapter->link_speed;
 		if (adapter->link_duplex == FULL_DUPLEX)
-			ecmd->duplex = DUPLEX_FULL;
+			cmd->base.duplex = DUPLEX_FULL;
 		else
-			ecmd->duplex = DUPLEX_HALF;
+			cmd->base.duplex = DUPLEX_HALF;
 	} else {
-		ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN);
-		ecmd->duplex = DUPLEX_UNKNOWN;
+		cmd->base.speed = SPEED_UNKNOWN;
+		cmd->base.duplex = DUPLEX_UNKNOWN;
 	}
 
-	ecmd->autoneg = AUTONEG_ENABLE;
+	cmd->base.autoneg = AUTONEG_ENABLE;
+
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						supported);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+						advertising);
+
 	return 0;
 }
 
-static int atl2_set_settings(struct net_device *netdev,
-	struct ethtool_cmd *ecmd)
+static int atl2_set_link_ksettings(struct net_device *netdev,
+				   const struct ethtool_link_ksettings *cmd)
 {
 	struct atl2_adapter *adapter = netdev_priv(netdev);
 	struct atl2_hw *hw = &adapter->hw;
+	u32 advertising;
+
+	ethtool_convert_link_mode_to_legacy_u32(&advertising,
+						cmd->link_modes.advertising);
 
 	while (test_and_set_bit(__ATL2_RESETTING, &adapter->flags))
 		msleep(1);
 
-	if (ecmd->autoneg == AUTONEG_ENABLE) {
+	if (cmd->base.autoneg == AUTONEG_ENABLE) {
 #define MY_ADV_MASK	(ADVERTISE_10_HALF | \
 			 ADVERTISE_10_FULL | \
 			 ADVERTISE_100_HALF| \
 			 ADVERTISE_100_FULL)
 
-		if ((ecmd->advertising & MY_ADV_MASK) == MY_ADV_MASK) {
+		if ((advertising & MY_ADV_MASK) == MY_ADV_MASK) {
 			hw->MediaType = MEDIA_TYPE_AUTO_SENSOR;
 			hw->autoneg_advertised =  MY_ADV_MASK;
-		} else if ((ecmd->advertising & MY_ADV_MASK) ==
-				ADVERTISE_100_FULL) {
+		} else if ((advertising & MY_ADV_MASK) == ADVERTISE_100_FULL) {
 			hw->MediaType = MEDIA_TYPE_100M_FULL;
 			hw->autoneg_advertised = ADVERTISE_100_FULL;
-		} else if ((ecmd->advertising & MY_ADV_MASK) ==
-				ADVERTISE_100_HALF) {
+		} else if ((advertising & MY_ADV_MASK) == ADVERTISE_100_HALF) {
 			hw->MediaType = MEDIA_TYPE_100M_HALF;
 			hw->autoneg_advertised = ADVERTISE_100_HALF;
-		} else if ((ecmd->advertising & MY_ADV_MASK) ==
-				ADVERTISE_10_FULL) {
+		} else if ((advertising & MY_ADV_MASK) == ADVERTISE_10_FULL) {
 			hw->MediaType = MEDIA_TYPE_10M_FULL;
 			hw->autoneg_advertised = ADVERTISE_10_FULL;
-		}  else if ((ecmd->advertising & MY_ADV_MASK) ==
-				ADVERTISE_10_HALF) {
+		}  else if ((advertising & MY_ADV_MASK) == ADVERTISE_10_HALF) {
 			hw->MediaType = MEDIA_TYPE_10M_HALF;
 			hw->autoneg_advertised = ADVERTISE_10_HALF;
 		} else {
 			clear_bit(__ATL2_RESETTING, &adapter->flags);
 			return -EINVAL;
 		}
-		ecmd->advertising = hw->autoneg_advertised |
+		advertising = hw->autoneg_advertised |
 			ADVERTISED_TP | ADVERTISED_Autoneg;
 	} else {
 		clear_bit(__ATL2_RESETTING, &adapter->flags);
@@ -2082,8 +2086,6 @@ static int atl2_nway_reset(struct net_device *netdev)
 }
 
 static const struct ethtool_ops atl2_ethtool_ops = {
-	.get_settings		= atl2_get_settings,
-	.set_settings		= atl2_set_settings,
 	.get_drvinfo		= atl2_get_drvinfo,
 	.get_regs_len		= atl2_get_regs_len,
 	.get_regs		= atl2_get_regs,
@@ -2096,6 +2098,8 @@ static const struct ethtool_ops atl2_ethtool_ops = {
 	.get_eeprom_len		= atl2_get_eeprom_len,
 	.get_eeprom		= atl2_get_eeprom,
 	.set_eeprom		= atl2_set_eeprom,
+	.get_link_ksettings	= atl2_get_link_ksettings,
+	.set_link_ksettings	= atl2_set_link_ksettings,
 };
 
 #define LBYTESWAP(a)  ((((a) & 0x00ff00ff) << 8) | \
diff --git a/drivers/net/ethernet/atheros/atlx/atl2.h b/drivers/net/ethernet/atheros/atlx/atl2.h
index 2f27d4c4c3ad..c64a6bdfa7ae 100644
--- a/drivers/net/ethernet/atheros/atlx/atl2.h
+++ b/drivers/net/ethernet/atheros/atlx/atl2.h
@@ -228,12 +228,9 @@ static void atl2_force_ps(struct atl2_hw *hw);
 #define AUTONEG_ADVERTISE_SPEED_DEFAULT	0x000F	/* Everything */
 
 /* The size (in bytes) of a ethernet packet */
-#define ENET_HEADER_SIZE		14
 #define MAXIMUM_ETHERNET_FRAME_SIZE	1518	/* with FCS */
 #define MINIMUM_ETHERNET_FRAME_SIZE	64	/* with FCS */
-#define ETHERNET_FCS_SIZE		4
 #define MAX_JUMBO_FRAME_SIZE		0x2000
-#define VLAN_SIZE                                               4
 
 struct tx_pkt_header {
 	unsigned pkt_size:11;
diff --git a/drivers/net/ethernet/aurora/nb8800.c b/drivers/net/ethernet/aurora/nb8800.c
index 00c38bf151e6..5711fbbd6ae3 100644
--- a/drivers/net/ethernet/aurora/nb8800.c
+++ b/drivers/net/ethernet/aurora/nb8800.c
@@ -975,8 +975,10 @@ static int nb8800_open(struct net_device *dev)
 	phydev = of_phy_connect(dev, priv->phy_node,
 				nb8800_link_reconfigure, 0,
 				priv->phy_mode);
-	if (!phydev)
+	if (!phydev) {
+		err = -ENODEV;
 		goto err_free_irq;
+	}
 
 	nb8800_pause_adv(dev);
 
@@ -1032,20 +1034,9 @@ static const struct net_device_ops nb8800_netdev_ops = {
 	.ndo_set_mac_address	= nb8800_set_mac_address,
 	.ndo_set_rx_mode	= nb8800_set_rx_mode,
 	.ndo_do_ioctl		= nb8800_ioctl,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 };
 
-static int nb8800_nway_reset(struct net_device *dev)
-{
-	struct phy_device *phydev = dev->phydev;
-
-	if (!phydev)
-		return -ENODEV;
-
-	return genphy_restart_aneg(phydev);
-}
-
 static void nb8800_get_pauseparam(struct net_device *dev,
 				  struct ethtool_pauseparam *pp)
 {
@@ -1164,7 +1155,7 @@ static void nb8800_get_ethtool_stats(struct net_device *dev,
 }
 
 static const struct ethtool_ops nb8800_ethtool_ops = {
-	.nway_reset		= nb8800_nway_reset,
+	.nway_reset		= phy_ethtool_nway_reset,
 	.get_link		= ethtool_op_get_link,
 	.get_pauseparam		= nb8800_get_pauseparam,
 	.set_pauseparam		= nb8800_set_pauseparam,
@@ -1466,12 +1457,12 @@ static int nb8800_probe(struct platform_device *pdev)
 
 	ret = nb8800_hw_init(dev);
 	if (ret)
-		goto err_free_bus;
+		goto err_deregister_fixed_link;
 
 	if (ops && ops->init) {
 		ret = ops->init(dev);
 		if (ret)
-			goto err_free_bus;
+			goto err_deregister_fixed_link;
 	}
 
 	dev->netdev_ops = &nb8800_netdev_ops;
@@ -1504,6 +1495,9 @@ static int nb8800_probe(struct platform_device *pdev)
 
 err_free_dma:
 	nb8800_dma_free(dev);
+err_deregister_fixed_link:
+	if (of_phy_is_fixed_link(pdev->dev.of_node))
+		of_phy_deregister_fixed_link(pdev->dev.of_node);
 err_free_bus:
 	of_node_put(priv->phy_node);
 	mdiobus_unregister(bus);
@@ -1521,6 +1515,8 @@ static int nb8800_remove(struct platform_device *pdev)
 	struct nb8800_priv *priv = netdev_priv(ndev);
 
 	unregister_netdev(ndev);
+	if (of_phy_is_fixed_link(pdev->dev.of_node))
+		of_phy_deregister_fixed_link(pdev->dev.of_node);
 	of_node_put(priv->phy_node);
 
 	mdiobus_unregister(priv->mii_bus);
diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index bd8c80c0b71c..940fb24bba21 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -110,7 +110,7 @@ config TIGON3
 	depends on PCI
 	select PHYLIB
 	select HWMON
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	---help---
 	  This driver supports Broadcom Tigon3 based gigabit Ethernet cards.
 
@@ -120,7 +120,7 @@ config TIGON3
 config BNX2X
 	tristate "Broadcom NetXtremeII 10Gb support"
 	depends on PCI
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	select FW_LOADER
 	select ZLIB_INFLATE
 	select LIBCRC32C
@@ -203,4 +203,14 @@ config BNXT_SRIOV
 	  Virtualization support in the NetXtreme-C/E products. This
 	  allows for virtual function acceleration in virtual environments.
 
+config BNXT_DCB
+	bool "Data Center Bridging (DCB) Support"
+	default n
+	depends on BNXT && DCB
+	---help---
+	  Say Y here if you want to use Data Center Bridging (DCB) in the
+	  driver.
+
+	  If unsure, say N.
+
 endif # NET_VENDOR_BROADCOM
diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c
index 17aa33c5567d..1df3048a3cdb 100644
--- a/drivers/net/ethernet/broadcom/b44.c
+++ b/drivers/net/ethernet/broadcom/b44.c
@@ -59,8 +59,8 @@
 #define B44_TX_TIMEOUT			(5 * HZ)
 
 /* hardware minimum and maximum for a single frame's data payload */
-#define B44_MIN_MTU			60
-#define B44_MAX_MTU			1500
+#define B44_MIN_MTU			ETH_ZLEN
+#define B44_MAX_MTU			ETH_DATA_LEN
 
 #define B44_RX_RING_SIZE		512
 #define B44_DEF_RX_RING_PENDING		200
@@ -1064,9 +1064,6 @@ static int b44_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct b44 *bp = netdev_priv(dev);
 
-	if (new_mtu < B44_MIN_MTU || new_mtu > B44_MAX_MTU)
-		return -EINVAL;
-
 	if (!netif_running(dev)) {
 		/* We'll just catch it later when the
 		 * device is up'd.
@@ -2377,6 +2374,8 @@ static int b44_init_one(struct ssb_device *sdev,
 	dev->netdev_ops = &b44_netdev_ops;
 	netif_napi_add(dev, &bp->napi, b44_poll, 64);
 	dev->watchdog_timeo = B44_TX_TIMEOUT;
+	dev->min_mtu = B44_MIN_MTU;
+	dev->max_mtu = B44_MAX_MTU;
 	dev->irq = sdev->irq;
 	dev->ethtool_ops = &b44_ethtool_ops;
 
diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
index 537090952c45..3b14d5144228 100644
--- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c
+++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c
@@ -1434,11 +1434,8 @@ static int bcm_enet_nway_reset(struct net_device *dev)
 	struct bcm_enet_priv *priv;
 
 	priv = netdev_priv(dev);
-	if (priv->has_phy) {
-		if (!dev->phydev)
-			return -ENODEV;
-		return genphy_restart_aneg(dev->phydev);
-	}
+	if (priv->has_phy)
+		return phy_ethtool_nway_reset(dev);
 
 	return -EOPNOTSUPP;
 }
@@ -1623,20 +1620,19 @@ static int bcm_enet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 }
 
 /*
- * calculate actual hardware mtu
+ * adjust mtu, can't be called while device is running
  */
-static int compute_hw_mtu(struct bcm_enet_priv *priv, int mtu)
+static int bcm_enet_change_mtu(struct net_device *dev, int new_mtu)
 {
-	int actual_mtu;
+	struct bcm_enet_priv *priv = netdev_priv(dev);
+	int actual_mtu = new_mtu;
 
-	actual_mtu = mtu;
+	if (netif_running(dev))
+		return -EBUSY;
 
 	/* add ethernet header + vlan tag size */
 	actual_mtu += VLAN_ETH_HLEN;
 
-	if (actual_mtu < 64 || actual_mtu > BCMENET_MAX_MTU)
-		return -EINVAL;
-
 	/*
 	 * setup maximum size before we get overflow mark in
 	 * descriptor, note that this will not prevent reception of
@@ -1651,22 +1647,7 @@ static int compute_hw_mtu(struct bcm_enet_priv *priv, int mtu)
 	 */
 	priv->rx_skb_size = ALIGN(actual_mtu + ETH_FCS_LEN,
 				  priv->dma_maxburst * 4);
-	return 0;
-}
-
-/*
- * adjust mtu, can't be called while device is running
- */
-static int bcm_enet_change_mtu(struct net_device *dev, int new_mtu)
-{
-	int ret;
-
-	if (netif_running(dev))
-		return -EBUSY;
 
-	ret = compute_hw_mtu(netdev_priv(dev), new_mtu);
-	if (ret)
-		return ret;
 	dev->mtu = new_mtu;
 	return 0;
 }
@@ -1756,7 +1737,7 @@ static int bcm_enet_probe(struct platform_device *pdev)
 	priv->enet_is_sw = false;
 	priv->dma_maxburst = BCMENET_DMA_MAXBURST;
 
-	ret = compute_hw_mtu(priv, dev->mtu);
+	ret = bcm_enet_change_mtu(dev, dev->mtu);
 	if (ret)
 		goto out;
 
@@ -1889,6 +1870,9 @@ static int bcm_enet_probe(struct platform_device *pdev)
 	netif_napi_add(dev, &priv->napi, bcm_enet_poll, 16);
 
 	dev->ethtool_ops = &bcm_enet_ethtool_ops;
+	/* MTU range: 46 - 2028 */
+	dev->min_mtu = ETH_ZLEN - ETH_HLEN;
+	dev->max_mtu = BCMENET_MAX_MTU - VLAN_ETH_HLEN;
 	SET_NETDEV_DEV(dev, &pdev->dev);
 
 	ret = register_netdev(dev);
@@ -2743,7 +2727,7 @@ static int bcm_enetsw_probe(struct platform_device *pdev)
 		priv->dma_chan_width = pd->dma_chan_width;
 	}
 
-	ret = compute_hw_mtu(priv, dev->mtu);
+	ret = bcm_enet_change_mtu(dev, dev->mtu);
 	if (ret)
 		goto out;
 
diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c
index c3354b9941d1..25d1eb4933d0 100644
--- a/drivers/net/ethernet/broadcom/bcmsysport.c
+++ b/drivers/net/ethernet/broadcom/bcmsysport.c
@@ -1755,13 +1755,13 @@ static int bcm_sysport_probe(struct platform_device *pdev)
 	if (priv->irq0 <= 0 || priv->irq1 <= 0) {
 		dev_err(&pdev->dev, "invalid interrupts\n");
 		ret = -EINVAL;
-		goto err;
+		goto err_free_netdev;
 	}
 
 	priv->base = devm_ioremap_resource(&pdev->dev, r);
 	if (IS_ERR(priv->base)) {
 		ret = PTR_ERR(priv->base);
-		goto err;
+		goto err_free_netdev;
 	}
 
 	priv->netdev = dev;
@@ -1779,7 +1779,7 @@ static int bcm_sysport_probe(struct platform_device *pdev)
 		ret = of_phy_register_fixed_link(dn);
 		if (ret) {
 			dev_err(&pdev->dev, "failed to register fixed PHY\n");
-			goto err;
+			goto err_free_netdev;
 		}
 
 		priv->phy_dn = dn;
@@ -1821,7 +1821,7 @@ static int bcm_sysport_probe(struct platform_device *pdev)
 	ret = register_netdev(dev);
 	if (ret) {
 		dev_err(&pdev->dev, "failed to register net_device\n");
-		goto err;
+		goto err_deregister_fixed_link;
 	}
 
 	priv->rev = topctrl_readl(priv, REV_CNTL) & REV_MASK;
@@ -1832,7 +1832,11 @@ static int bcm_sysport_probe(struct platform_device *pdev)
 		 priv->base, priv->irq0, priv->irq1, txq, rxq);
 
 	return 0;
-err:
+
+err_deregister_fixed_link:
+	if (of_phy_is_fixed_link(dn))
+		of_phy_deregister_fixed_link(dn);
+err_free_netdev:
 	free_netdev(dev);
 	return ret;
 }
@@ -1840,11 +1844,14 @@ err:
 static int bcm_sysport_remove(struct platform_device *pdev)
 {
 	struct net_device *dev = dev_get_drvdata(&pdev->dev);
+	struct device_node *dn = pdev->dev.of_node;
 
 	/* Not much to do, ndo_close has been called
 	 * and we use managed allocations
 	 */
 	unregister_netdev(dev);
+	if (of_phy_is_fixed_link(dn))
+		of_phy_deregister_fixed_link(dn);
 	free_netdev(dev);
 	dev_set_drvdata(&pdev->dev, NULL);
 
diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c b/drivers/net/ethernet/broadcom/bgmac-bcma.c
index c16ec3a51876..4a4ffc0c4c65 100644
--- a/drivers/net/ethernet/broadcom/bgmac-bcma.c
+++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c
@@ -80,6 +80,24 @@ static void bcma_bgmac_cmn_maskset32(struct bgmac *bgmac, u16 offset, u32 mask,
 	bcma_maskset32(bgmac->bcma.cmn, offset, mask, set);
 }
 
+static int bcma_phy_connect(struct bgmac *bgmac)
+{
+	struct phy_device *phy_dev;
+	char bus_id[MII_BUS_ID_SIZE + 3];
+
+	/* Connect to the PHY */
+	snprintf(bus_id, sizeof(bus_id), PHY_ID_FMT, bgmac->mii_bus->id,
+		 bgmac->phyaddr);
+	phy_dev = phy_connect(bgmac->net_dev, bus_id, bgmac_adjust_link,
+			      PHY_INTERFACE_MODE_MII);
+	if (IS_ERR(phy_dev)) {
+		dev_err(bgmac->dev, "PHY connection failed\n");
+		return PTR_ERR(phy_dev);
+	}
+
+	return 0;
+}
+
 static const struct bcma_device_id bgmac_bcma_tbl[] = {
 	BCMA_CORE(BCMA_MANUF_BCM, BCMA_CORE_4706_MAC_GBIT,
 		  BCMA_ANY_REV, BCMA_ANY_CLASS),
@@ -275,6 +293,10 @@ static int bgmac_probe(struct bcma_device *core)
 	bgmac->cco_ctl_maskset = bcma_bgmac_cco_ctl_maskset;
 	bgmac->get_bus_clock = bcma_bgmac_get_bus_clock;
 	bgmac->cmn_maskset32 = bcma_bgmac_cmn_maskset32;
+	if (bgmac->mii_bus)
+		bgmac->phy_connect = bcma_phy_connect;
+	else
+		bgmac->phy_connect = bgmac_phy_connect_direct;
 
 	err = bgmac_enet_probe(bgmac);
 	if (err)
diff --git a/drivers/net/ethernet/broadcom/bgmac-platform.c b/drivers/net/ethernet/broadcom/bgmac-platform.c
index be52f270c2c1..6f736c19872f 100644
--- a/drivers/net/ethernet/broadcom/bgmac-platform.c
+++ b/drivers/net/ethernet/broadcom/bgmac-platform.c
@@ -14,11 +14,21 @@
 #define pr_fmt(fmt)		KBUILD_MODNAME ": " fmt
 
 #include <linux/bcma/bcma.h>
+#include <linux/brcmphy.h>
 #include <linux/etherdevice.h>
 #include <linux/of_address.h>
+#include <linux/of_mdio.h>
 #include <linux/of_net.h>
 #include "bgmac.h"
 
+#define NICPM_IOMUX_CTRL		0x00000008
+
+#define NICPM_IOMUX_CTRL_INIT_VAL	0x3196e000
+#define NICPM_IOMUX_CTRL_SPD_SHIFT	10
+#define NICPM_IOMUX_CTRL_SPD_10M	0
+#define NICPM_IOMUX_CTRL_SPD_100M	1
+#define NICPM_IOMUX_CTRL_SPD_1000M	2
+
 static u32 platform_bgmac_read(struct bgmac *bgmac, u16 offset)
 {
 	return readl(bgmac->plat.base + offset);
@@ -86,6 +96,54 @@ static void platform_bgmac_cmn_maskset32(struct bgmac *bgmac, u16 offset,
 	WARN_ON(1);
 }
 
+static void bgmac_nicpm_speed_set(struct net_device *net_dev)
+{
+	struct bgmac *bgmac = netdev_priv(net_dev);
+	u32 val;
+
+	if (!bgmac->plat.nicpm_base)
+		return;
+
+	val = NICPM_IOMUX_CTRL_INIT_VAL;
+	switch (bgmac->net_dev->phydev->speed) {
+	default:
+		netdev_err(net_dev, "Unsupported speed. Defaulting to 1000Mb\n");
+	case SPEED_1000:
+		val |= NICPM_IOMUX_CTRL_SPD_1000M << NICPM_IOMUX_CTRL_SPD_SHIFT;
+		break;
+	case SPEED_100:
+		val |= NICPM_IOMUX_CTRL_SPD_100M << NICPM_IOMUX_CTRL_SPD_SHIFT;
+		break;
+	case SPEED_10:
+		val |= NICPM_IOMUX_CTRL_SPD_10M << NICPM_IOMUX_CTRL_SPD_SHIFT;
+		break;
+	}
+
+	writel(val, bgmac->plat.nicpm_base + NICPM_IOMUX_CTRL);
+
+	bgmac_adjust_link(bgmac->net_dev);
+}
+
+static int platform_phy_connect(struct bgmac *bgmac)
+{
+	struct phy_device *phy_dev;
+
+	if (bgmac->plat.nicpm_base)
+		phy_dev = of_phy_get_and_connect(bgmac->net_dev,
+						 bgmac->dev->of_node,
+						 bgmac_nicpm_speed_set);
+	else
+		phy_dev = of_phy_get_and_connect(bgmac->net_dev,
+						 bgmac->dev->of_node,
+						 bgmac_adjust_link);
+	if (!phy_dev) {
+		dev_err(bgmac->dev, "PHY connection failed\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
 static int bgmac_probe(struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
@@ -102,7 +160,6 @@ static int bgmac_probe(struct platform_device *pdev)
 	/* Set the features of the 4707 family */
 	bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST;
 	bgmac->feature_flags |= BGMAC_FEAT_NO_RESET;
-	bgmac->feature_flags |= BGMAC_FEAT_FORCE_SPEED_2500;
 	bgmac->feature_flags |= BGMAC_FEAT_CMDCFG_SR_REV4;
 	bgmac->feature_flags |= BGMAC_FEAT_TX_MASK_SETUP;
 	bgmac->feature_flags |= BGMAC_FEAT_RX_MASK_SETUP;
@@ -142,6 +199,14 @@ static int bgmac_probe(struct platform_device *pdev)
 	if (IS_ERR(bgmac->plat.idm_base))
 		return PTR_ERR(bgmac->plat.idm_base);
 
+	regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "nicpm_base");
+	if (regs) {
+		bgmac->plat.nicpm_base = devm_ioremap_resource(&pdev->dev,
+							       regs);
+		if (IS_ERR(bgmac->plat.nicpm_base))
+			return PTR_ERR(bgmac->plat.nicpm_base);
+	}
+
 	bgmac->read = platform_bgmac_read;
 	bgmac->write = platform_bgmac_write;
 	bgmac->idm_read = platform_bgmac_idm_read;
@@ -151,6 +216,12 @@ static int bgmac_probe(struct platform_device *pdev)
 	bgmac->cco_ctl_maskset = platform_bgmac_cco_ctl_maskset;
 	bgmac->get_bus_clock = platform_bgmac_get_bus_clock;
 	bgmac->cmn_maskset32 = platform_bgmac_cmn_maskset32;
+	if (of_parse_phandle(np, "phy-handle", 0)) {
+		bgmac->phy_connect = platform_phy_connect;
+	} else {
+		bgmac->phy_connect = bgmac_phy_connect_direct;
+		bgmac->feature_flags |= BGMAC_FEAT_FORCE_SPEED_2500;
+	}
 
 	return bgmac_enet_probe(bgmac);
 }
@@ -167,6 +238,7 @@ static int bgmac_remove(struct platform_device *pdev)
 static const struct of_device_id bgmac_of_enet_match[] = {
 	{.compatible = "brcm,amac",},
 	{.compatible = "brcm,nsp-amac",},
+	{.compatible = "brcm,ns2-amac",},
 	{},
 };
 
diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index 49f4cafe5438..0e066dc6b8cc 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -1085,6 +1085,9 @@ static void bgmac_enable(struct bgmac *bgmac)
 /* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipinit */
 static void bgmac_chip_init(struct bgmac *bgmac)
 {
+	/* Clear any erroneously pending interrupts */
+	bgmac_write(bgmac, BGMAC_INT_STATUS, ~0);
+
 	/* 1 interrupt per received frame */
 	bgmac_write(bgmac, BGMAC_INT_RECV_LAZY, 1 << BGMAC_IRL_FC_SHIFT);
 
@@ -1391,7 +1394,7 @@ static const struct ethtool_ops bgmac_ethtool_ops = {
  * MII
  **************************************************/
 
-static void bgmac_adjust_link(struct net_device *net_dev)
+void bgmac_adjust_link(struct net_device *net_dev)
 {
 	struct bgmac *bgmac = netdev_priv(net_dev);
 	struct phy_device *phy_dev = net_dev->phydev;
@@ -1414,8 +1417,9 @@ static void bgmac_adjust_link(struct net_device *net_dev)
 		phy_print_status(phy_dev);
 	}
 }
+EXPORT_SYMBOL_GPL(bgmac_adjust_link);
 
-static int bgmac_phy_connect_direct(struct bgmac *bgmac)
+int bgmac_phy_connect_direct(struct bgmac *bgmac)
 {
 	struct fixed_phy_status fphy_status = {
 		.link = 1,
@@ -1440,24 +1444,7 @@ static int bgmac_phy_connect_direct(struct bgmac *bgmac)
 
 	return err;
 }
-
-static int bgmac_phy_connect(struct bgmac *bgmac)
-{
-	struct phy_device *phy_dev;
-	char bus_id[MII_BUS_ID_SIZE + 3];
-
-	/* Connect to the PHY */
-	snprintf(bus_id, sizeof(bus_id), PHY_ID_FMT, bgmac->mii_bus->id,
-		 bgmac->phyaddr);
-	phy_dev = phy_connect(bgmac->net_dev, bus_id, &bgmac_adjust_link,
-			      PHY_INTERFACE_MODE_MII);
-	if (IS_ERR(phy_dev)) {
-		dev_err(bgmac->dev, "PHY connection failed\n");
-		return PTR_ERR(phy_dev);
-	}
-
-	return 0;
-}
+EXPORT_SYMBOL_GPL(bgmac_phy_connect_direct);
 
 int bgmac_enet_probe(struct bgmac *info)
 {
@@ -1510,10 +1497,7 @@ int bgmac_enet_probe(struct bgmac *info)
 
 	netif_napi_add(net_dev, &bgmac->napi, bgmac_poll, BGMAC_WEIGHT);
 
-	if (!bgmac->mii_bus)
-		err = bgmac_phy_connect_direct(bgmac);
-	else
-		err = bgmac_phy_connect(bgmac);
+	err = bgmac_phy_connect(bgmac);
 	if (err) {
 		dev_err(bgmac->dev, "Cannot connect to phy\n");
 		goto err_dma_free;
diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h
index 80836b4c9f38..71f493f2451f 100644
--- a/drivers/net/ethernet/broadcom/bgmac.h
+++ b/drivers/net/ethernet/broadcom/bgmac.h
@@ -463,6 +463,7 @@ struct bgmac {
 		struct {
 			void *base;
 			void *idm_base;
+			void *nicpm_base;
 		} plat;
 		struct {
 			struct bcma_device *core;
@@ -513,10 +514,13 @@ struct bgmac {
 	u32 (*get_bus_clock)(struct bgmac *bgmac);
 	void (*cmn_maskset32)(struct bgmac *bgmac, u16 offset, u32 mask,
 			      u32 set);
+	int (*phy_connect)(struct bgmac *bgmac);
 };
 
 int bgmac_enet_probe(struct bgmac *info);
 void bgmac_enet_remove(struct bgmac *bgmac);
+void bgmac_adjust_link(struct net_device *net_dev);
+int bgmac_phy_connect_direct(struct bgmac *bgmac);
 
 struct mii_bus *bcma_mdio_mii_register(struct bcma_device *core, u8 phyaddr);
 void bcma_mdio_mii_unregister(struct mii_bus *mii_bus);
@@ -583,4 +587,9 @@ static inline void bgmac_set(struct bgmac *bgmac, u16 offset, u32 set)
 {
 	bgmac_maskset(bgmac, offset, ~0, set);
 }
+
+static inline int bgmac_phy_connect(struct bgmac *bgmac)
+{
+	return bgmac->phy_connect(bgmac);
+}
 #endif /* _BGMAC_H */
diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index 1f7034d739b0..d5d1026be4b7 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -254,13 +254,10 @@ static inline u32 bnx2_tx_avail(struct bnx2 *bp, struct bnx2_tx_ring_info *txr)
 {
 	u32 diff;
 
-	/* Tell compiler to fetch tx_prod and tx_cons from memory. */
-	barrier();
-
 	/* The ring uses 256 indices for 255 entries, one of them
 	 * needs to be skipped.
 	 */
-	diff = txr->tx_prod - txr->tx_cons;
+	diff = READ_ONCE(txr->tx_prod) - READ_ONCE(txr->tx_cons);
 	if (unlikely(diff >= BNX2_TX_DESC_CNT)) {
 		diff &= 0xffff;
 		if (diff == BNX2_TX_DESC_CNT)
@@ -2304,7 +2301,7 @@ bnx2_init_5706s_phy(struct bnx2 *bp, int reset_phy)
 	if (BNX2_CHIP(bp) == BNX2_CHIP_5706)
 		BNX2_WR(bp, BNX2_MISC_GP_HW_CTL0, 0x300);
 
-	if (bp->dev->mtu > 1500) {
+	if (bp->dev->mtu > ETH_DATA_LEN) {
 		u32 val;
 
 		/* Set extended packet length bit */
@@ -2358,7 +2355,7 @@ bnx2_init_copper_phy(struct bnx2 *bp, int reset_phy)
 		bnx2_write_phy(bp, MII_BNX2_DSP_RW_PORT, val);
 	}
 
-	if (bp->dev->mtu > 1500) {
+	if (bp->dev->mtu > ETH_DATA_LEN) {
 		/* Set extended packet length bit */
 		bnx2_write_phy(bp, 0x18, 0x7);
 		bnx2_read_phy(bp, 0x18, &val);
@@ -2839,10 +2836,8 @@ bnx2_get_hw_tx_cons(struct bnx2_napi *bnapi)
 {
 	u16 cons;
 
-	/* Tell compiler that status block fields can change. */
-	barrier();
-	cons = *bnapi->hw_tx_cons_ptr;
-	barrier();
+	cons = READ_ONCE(*bnapi->hw_tx_cons_ptr);
+
 	if (unlikely((cons & BNX2_MAX_TX_DESC_CNT) == BNX2_MAX_TX_DESC_CNT))
 		cons++;
 	return cons;
@@ -3141,10 +3136,8 @@ bnx2_get_hw_rx_cons(struct bnx2_napi *bnapi)
 {
 	u16 cons;
 
-	/* Tell compiler that status block fields can change. */
-	barrier();
-	cons = *bnapi->hw_rx_cons_ptr;
-	barrier();
+	cons = READ_ONCE(*bnapi->hw_rx_cons_ptr);
+
 	if (unlikely((cons & BNX2_MAX_RX_DESC_CNT) == BNX2_MAX_RX_DESC_CNT))
 		cons++;
 	return cons;
@@ -5007,12 +5000,12 @@ bnx2_init_chip(struct bnx2 *bp)
 	/* Program the MTU.  Also include 4 bytes for CRC32. */
 	mtu = bp->dev->mtu;
 	val = mtu + ETH_HLEN + ETH_FCS_LEN;
-	if (val > (MAX_ETHERNET_PACKET_SIZE + 4))
+	if (val > (MAX_ETHERNET_PACKET_SIZE + ETH_HLEN + 4))
 		val |= BNX2_EMAC_RX_MTU_SIZE_JUMBO_ENA;
 	BNX2_WR(bp, BNX2_EMAC_RX_MTU_SIZE, val);
 
-	if (mtu < 1500)
-		mtu = 1500;
+	if (mtu < ETH_DATA_LEN)
+		mtu = ETH_DATA_LEN;
 
 	bnx2_reg_wr_ind(bp, BNX2_RBUF_CONFIG, BNX2_RBUF_CONFIG_VAL(mtu));
 	bnx2_reg_wr_ind(bp, BNX2_RBUF_CONFIG2, BNX2_RBUF_CONFIG2_VAL(mtu));
@@ -6904,12 +6897,14 @@ bnx2_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *net_stats)
 /* All ethtool functions called with rtnl_lock */
 
 static int
-bnx2_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+bnx2_get_link_ksettings(struct net_device *dev,
+			struct ethtool_link_ksettings *cmd)
 {
 	struct bnx2 *bp = netdev_priv(dev);
 	int support_serdes = 0, support_copper = 0;
+	u32 supported, advertising;
 
-	cmd->supported = SUPPORTED_Autoneg;
+	supported = SUPPORTED_Autoneg;
 	if (bp->phy_flags & BNX2_PHY_FLAG_REMOTE_PHY_CAP) {
 		support_serdes = 1;
 		support_copper = 1;
@@ -6919,56 +6914,59 @@ bnx2_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 		support_copper = 1;
 
 	if (support_serdes) {
-		cmd->supported |= SUPPORTED_1000baseT_Full |
+		supported |= SUPPORTED_1000baseT_Full |
 			SUPPORTED_FIBRE;
 		if (bp->phy_flags & BNX2_PHY_FLAG_2_5G_CAPABLE)
-			cmd->supported |= SUPPORTED_2500baseX_Full;
-
+			supported |= SUPPORTED_2500baseX_Full;
 	}
 	if (support_copper) {
-		cmd->supported |= SUPPORTED_10baseT_Half |
+		supported |= SUPPORTED_10baseT_Half |
 			SUPPORTED_10baseT_Full |
 			SUPPORTED_100baseT_Half |
 			SUPPORTED_100baseT_Full |
 			SUPPORTED_1000baseT_Full |
 			SUPPORTED_TP;
-
 	}
 
 	spin_lock_bh(&bp->phy_lock);
-	cmd->port = bp->phy_port;
-	cmd->advertising = bp->advertising;
+	cmd->base.port = bp->phy_port;
+	advertising = bp->advertising;
 
 	if (bp->autoneg & AUTONEG_SPEED) {
-		cmd->autoneg = AUTONEG_ENABLE;
+		cmd->base.autoneg = AUTONEG_ENABLE;
 	} else {
-		cmd->autoneg = AUTONEG_DISABLE;
+		cmd->base.autoneg = AUTONEG_DISABLE;
 	}
 
 	if (netif_carrier_ok(dev)) {
-		ethtool_cmd_speed_set(cmd, bp->line_speed);
-		cmd->duplex = bp->duplex;
+		cmd->base.speed = bp->line_speed;
+		cmd->base.duplex = bp->duplex;
 		if (!(bp->phy_flags & BNX2_PHY_FLAG_SERDES)) {
 			if (bp->phy_flags & BNX2_PHY_FLAG_MDIX)
-				cmd->eth_tp_mdix = ETH_TP_MDI_X;
+				cmd->base.eth_tp_mdix = ETH_TP_MDI_X;
 			else
-				cmd->eth_tp_mdix = ETH_TP_MDI;
+				cmd->base.eth_tp_mdix = ETH_TP_MDI;
 		}
 	}
 	else {
-		ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN);
-		cmd->duplex = DUPLEX_UNKNOWN;
+		cmd->base.speed = SPEED_UNKNOWN;
+		cmd->base.duplex = DUPLEX_UNKNOWN;
 	}
 	spin_unlock_bh(&bp->phy_lock);
 
-	cmd->transceiver = XCVR_INTERNAL;
-	cmd->phy_address = bp->phy_addr;
+	cmd->base.phy_address = bp->phy_addr;
+
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						supported);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+						advertising);
 
 	return 0;
 }
 
 static int
-bnx2_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+bnx2_set_link_ksettings(struct net_device *dev,
+			const struct ethtool_link_ksettings *cmd)
 {
 	struct bnx2 *bp = netdev_priv(dev);
 	u8 autoneg = bp->autoneg;
@@ -6979,24 +6977,26 @@ bnx2_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 
 	spin_lock_bh(&bp->phy_lock);
 
-	if (cmd->port != PORT_TP && cmd->port != PORT_FIBRE)
+	if (cmd->base.port != PORT_TP && cmd->base.port != PORT_FIBRE)
 		goto err_out_unlock;
 
-	if (cmd->port != bp->phy_port &&
+	if (cmd->base.port != bp->phy_port &&
 	    !(bp->phy_flags & BNX2_PHY_FLAG_REMOTE_PHY_CAP))
 		goto err_out_unlock;
 
 	/* If device is down, we can store the settings only if the user
 	 * is setting the currently active port.
 	 */
-	if (!netif_running(dev) && cmd->port != bp->phy_port)
+	if (!netif_running(dev) && cmd->base.port != bp->phy_port)
 		goto err_out_unlock;
 
-	if (cmd->autoneg == AUTONEG_ENABLE) {
+	if (cmd->base.autoneg == AUTONEG_ENABLE) {
 		autoneg |= AUTONEG_SPEED;
 
-		advertising = cmd->advertising;
-		if (cmd->port == PORT_TP) {
+		ethtool_convert_link_mode_to_legacy_u32(
+			&advertising, cmd->link_modes.advertising);
+
+		if (cmd->base.port == PORT_TP) {
 			advertising &= ETHTOOL_ALL_COPPER_SPEED;
 			if (!advertising)
 				advertising = ETHTOOL_ALL_COPPER_SPEED;
@@ -7008,11 +7008,12 @@ bnx2_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 		advertising |= ADVERTISED_Autoneg;
 	}
 	else {
-		u32 speed = ethtool_cmd_speed(cmd);
-		if (cmd->port == PORT_FIBRE) {
+		u32 speed = cmd->base.speed;
+
+		if (cmd->base.port == PORT_FIBRE) {
 			if ((speed != SPEED_1000 &&
 			     speed != SPEED_2500) ||
-			    (cmd->duplex != DUPLEX_FULL))
+			    (cmd->base.duplex != DUPLEX_FULL))
 				goto err_out_unlock;
 
 			if (speed == SPEED_2500 &&
@@ -7023,7 +7024,7 @@ bnx2_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 
 		autoneg &= ~AUTONEG_SPEED;
 		req_line_speed = speed;
-		req_duplex = cmd->duplex;
+		req_duplex = cmd->base.duplex;
 		advertising = 0;
 	}
 
@@ -7037,7 +7038,7 @@ bnx2_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
 	 * brought up.
 	 */
 	if (netif_running(dev))
-		err = bnx2_setup_phy(bp, cmd->port);
+		err = bnx2_setup_phy(bp, cmd->base.port);
 
 err_out_unlock:
 	spin_unlock_bh(&bp->phy_lock);
@@ -7822,8 +7823,6 @@ static int bnx2_set_channels(struct net_device *dev,
 }
 
 static const struct ethtool_ops bnx2_ethtool_ops = {
-	.get_settings		= bnx2_get_settings,
-	.set_settings		= bnx2_set_settings,
 	.get_drvinfo		= bnx2_get_drvinfo,
 	.get_regs_len		= bnx2_get_regs_len,
 	.get_regs		= bnx2_get_regs,
@@ -7847,6 +7846,8 @@ static const struct ethtool_ops bnx2_ethtool_ops = {
 	.get_sset_count		= bnx2_get_sset_count,
 	.get_channels		= bnx2_get_channels,
 	.set_channels		= bnx2_set_channels,
+	.get_link_ksettings	= bnx2_get_link_ksettings,
+	.set_link_ksettings	= bnx2_set_link_ksettings,
 };
 
 /* Called with rtnl_lock */
@@ -7923,10 +7924,6 @@ bnx2_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct bnx2 *bp = netdev_priv(dev);
 
-	if (((new_mtu + ETH_HLEN) > MAX_ETHERNET_JUMBO_PACKET_SIZE) ||
-		((new_mtu + ETH_HLEN) < MIN_ETHERNET_PACKET_SIZE))
-		return -EINVAL;
-
 	dev->mtu = new_mtu;
 	return bnx2_change_ring_size(bp, bp->rx_ring_size, bp->tx_ring_size,
 				     false);
@@ -8619,6 +8616,8 @@ bnx2_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
 	dev->features |= dev->hw_features;
 	dev->priv_flags |= IFF_UNICAST_FLT;
+	dev->min_mtu = MIN_ETHERNET_PACKET_SIZE;
+	dev->max_mtu = MAX_ETHERNET_JUMBO_PACKET_SIZE;
 
 	if (!(bp->flags & BNX2_FLAG_CAN_KEEP_VLAN))
 		dev->hw_features &= ~NETIF_F_HW_VLAN_CTAG_RX;
diff --git a/drivers/net/ethernet/broadcom/bnx2.h b/drivers/net/ethernet/broadcom/bnx2.h
index 380234d72b95..a09ec47461c9 100644
--- a/drivers/net/ethernet/broadcom/bnx2.h
+++ b/drivers/net/ethernet/broadcom/bnx2.h
@@ -6530,9 +6530,9 @@ struct l2_fhdr {
 #define MII_BNX2_AER_AER_AN_MMD			   0x3800
 #define MII_BNX2_BLK_ADDR_COMBO_IEEEB0		 0xffe0
 
-#define MIN_ETHERNET_PACKET_SIZE	60
-#define MAX_ETHERNET_PACKET_SIZE	1514
-#define MAX_ETHERNET_JUMBO_PACKET_SIZE	9014
+#define MIN_ETHERNET_PACKET_SIZE	(ETH_ZLEN - ETH_HLEN)
+#define MAX_ETHERNET_PACKET_SIZE	ETH_DATA_LEN
+#define MAX_ETHERNET_JUMBO_PACKET_SIZE	9000
 
 #define BNX2_RX_COPY_THRESH		128
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index 7dd7490fdac1..0a23034bbe3f 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -1396,9 +1396,9 @@ struct bnx2x {
 	int			tx_ring_size;
 
 /* L2 header size + 2*VLANs (8 bytes) + LLC SNAP (8 bytes) */
-#define ETH_OVREHEAD		(ETH_HLEN + 8 + 8)
-#define ETH_MIN_PACKET_SIZE		60
-#define ETH_MAX_PACKET_SIZE		1500
+#define ETH_OVERHEAD		(ETH_HLEN + 8 + 8)
+#define ETH_MIN_PACKET_SIZE		(ETH_ZLEN - ETH_HLEN)
+#define ETH_MAX_PACKET_SIZE		ETH_DATA_LEN
 #define ETH_MAX_JUMBO_PACKET_SIZE	9600
 /* TCP with Timestamp Option (32) + IPv6 (40) */
 #define ETH_MAX_TPA_HEADER_SIZE		72
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 0a9108cd4c45..3e199d3e461e 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -724,7 +724,7 @@ static void bnx2x_gro_ipv6_csum(struct bnx2x *bp, struct sk_buff *skb)
 static void bnx2x_gro_csum(struct bnx2x *bp, struct sk_buff *skb,
 			    void (*gro_func)(struct bnx2x*, struct sk_buff*))
 {
-	skb_set_network_header(skb, 0);
+	skb_reset_network_header(skb);
 	gro_func(bp, skb);
 	tcp_gro_complete(skb);
 }
@@ -2023,7 +2023,7 @@ static void bnx2x_set_rx_buf_size(struct bnx2x *bp)
 			mtu = bp->dev->mtu;
 		fp->rx_buf_size = BNX2X_FW_RX_ALIGN_START +
 				  IP_HEADER_ALIGNMENT_PADDING +
-				  ETH_OVREHEAD +
+				  ETH_OVERHEAD +
 				  mtu +
 				  BNX2X_FW_RX_ALIGN_END;
 		/* Note : rx_buf_size doesn't take into account NET_SKB_PAD */
@@ -3248,13 +3248,14 @@ static int bnx2x_poll(struct napi_struct *napi, int budget)
 			rmb();
 
 			if (!(bnx2x_has_rx_work(fp) || bnx2x_has_tx_work(fp))) {
-				napi_complete(napi);
-				/* Re-enable interrupts */
-				DP(NETIF_MSG_RX_STATUS,
-				   "Update index to %d\n", fp->fp_hc_idx);
-				bnx2x_ack_sb(bp, fp->igu_sb_id, USTORM_ID,
-					     le16_to_cpu(fp->fp_hc_idx),
-					     IGU_INT_ENABLE, 1);
+				if (napi_complete_done(napi, rx_work_done)) {
+					/* Re-enable interrupts */
+					DP(NETIF_MSG_RX_STATUS,
+					   "Update index to %d\n", fp->fp_hc_idx);
+					bnx2x_ack_sb(bp, fp->igu_sb_id, USTORM_ID,
+						     le16_to_cpu(fp->fp_hc_idx),
+						     IGU_INT_ENABLE, 1);
+				}
 			} else {
 				rx_work_done = budget;
 			}
@@ -4855,12 +4856,6 @@ int bnx2x_change_mtu(struct net_device *dev, int new_mtu)
 		return -EAGAIN;
 	}
 
-	if ((new_mtu > ETH_MAX_JUMBO_PACKET_SIZE) ||
-	    ((new_mtu + ETH_HLEN) < ETH_MIN_PACKET_SIZE)) {
-		BNX2X_ERR("Can't support requested MTU size\n");
-		return -EINVAL;
-	}
-
 	/* This does not race with packet allocation
 	 * because the actual alloc size is
 	 * only updated as part of load
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
index 85a7800bfc12..5f19427c7b27 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
@@ -1872,8 +1872,16 @@ static void bnx2x_get_ringparam(struct net_device *dev,
 
 	ering->rx_max_pending = MAX_RX_AVAIL;
 
+	/* If size isn't already set, we give an estimation of the number
+	 * of buffers we'll have. We're neglecting some possible conditions
+	 * [we couldn't know for certain at this point if number of queues
+	 * might shrink] but the number would be correct for the likely
+	 * scenario.
+	 */
 	if (bp->rx_ring_size)
 		ering->rx_pending = bp->rx_ring_size;
+	else if (BNX2X_NUM_RX_QUEUES(bp))
+		ering->rx_pending = MAX_RX_AVAIL / BNX2X_NUM_RX_QUEUES(bp);
 	else
 		ering->rx_pending = MAX_RX_AVAIL;
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
index 1fb80100e5e7..05356efdbf93 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
@@ -34,12 +34,6 @@ typedef int (*read_sfp_module_eeprom_func_p)(struct bnx2x_phy *phy,
 					     u8 dev_addr, u16 addr, u8 byte_cnt,
 					     u8 *o_buf, u8);
 /********************************************************/
-#define ETH_HLEN			14
-/* L2 header size + 2*VLANs (8 bytes) + LLC SNAP (8 bytes) */
-#define ETH_OVREHEAD			(ETH_HLEN + 8 + 8)
-#define ETH_MIN_PACKET_SIZE		60
-#define ETH_MAX_PACKET_SIZE		1500
-#define ETH_MAX_JUMBO_PACKET_SIZE	9600
 #define MDIO_ACCESS_TIMEOUT		1000
 #define WC_LANE_MAX			4
 #define I2C_SWITCH_WIDTH		2
@@ -1917,7 +1911,7 @@ static int bnx2x_emac_enable(struct link_params *params,
 	/* Enable emac for jumbo packets */
 	EMAC_WR(bp, EMAC_REG_EMAC_RX_MTU_SIZE,
 		(EMAC_RX_MTU_SIZE_JUMBO_ENA |
-		 (ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVREHEAD)));
+		 (ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVERHEAD)));
 
 	/* Strip CRC */
 	REG_WR(bp, NIG_REG_NIG_INGRESS_EMAC0_NO_CRC + port*4, 0x1);
@@ -2314,19 +2308,19 @@ static int bnx2x_bmac1_enable(struct link_params *params,
 	REG_WR_DMAE(bp, bmac_addr + BIGMAC_REGISTER_BMAC_CONTROL, wb_data, 2);
 
 	/* Set rx mtu */
-	wb_data[0] = ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVREHEAD;
+	wb_data[0] = ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVERHEAD;
 	wb_data[1] = 0;
 	REG_WR_DMAE(bp, bmac_addr + BIGMAC_REGISTER_RX_MAX_SIZE, wb_data, 2);
 
 	bnx2x_update_pfc_bmac1(params, vars);
 
 	/* Set tx mtu */
-	wb_data[0] = ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVREHEAD;
+	wb_data[0] = ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVERHEAD;
 	wb_data[1] = 0;
 	REG_WR_DMAE(bp, bmac_addr + BIGMAC_REGISTER_TX_MAX_SIZE, wb_data, 2);
 
 	/* Set cnt max size */
-	wb_data[0] = ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVREHEAD;
+	wb_data[0] = ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVERHEAD;
 	wb_data[1] = 0;
 	REG_WR_DMAE(bp, bmac_addr + BIGMAC_REGISTER_CNT_MAX_SIZE, wb_data, 2);
 
@@ -2384,18 +2378,18 @@ static int bnx2x_bmac2_enable(struct link_params *params,
 	udelay(30);
 
 	/* Set RX MTU */
-	wb_data[0] = ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVREHEAD;
+	wb_data[0] = ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVERHEAD;
 	wb_data[1] = 0;
 	REG_WR_DMAE(bp, bmac_addr + BIGMAC2_REGISTER_RX_MAX_SIZE, wb_data, 2);
 	udelay(30);
 
 	/* Set TX MTU */
-	wb_data[0] = ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVREHEAD;
+	wb_data[0] = ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVERHEAD;
 	wb_data[1] = 0;
 	REG_WR_DMAE(bp, bmac_addr + BIGMAC2_REGISTER_TX_MAX_SIZE, wb_data, 2);
 	udelay(30);
 	/* Set cnt max size */
-	wb_data[0] = ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVREHEAD - 2;
+	wb_data[0] = ETH_MAX_JUMBO_PACKET_SIZE + ETH_OVERHEAD - 2;
 	wb_data[1] = 0;
 	REG_WR_DMAE(bp, bmac_addr + BIGMAC2_REGISTER_CNT_MAX_SIZE, wb_data, 2);
 	udelay(30);
@@ -2516,7 +2510,7 @@ static int bnx2x_pbf_update(struct link_params *params, u32 flow_ctrl,
 
 	} else {
 		u32 thresh = (ETH_MAX_JUMBO_PACKET_SIZE +
-			      ETH_OVREHEAD)/16;
+			      ETH_OVERHEAD)/16;
 		REG_WR(bp, PBF_REG_P0_PAUSE_ENABLE + port*4, 0);
 		/* Update threshold */
 		REG_WR(bp, PBF_REG_P0_ARB_THRSH + port*4, thresh);
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 0cee4c0283f9..688617ac8c29 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -10138,7 +10138,7 @@ static void __bnx2x_add_udp_port(struct bnx2x *bp, u16 port,
 {
 	struct bnx2x_udp_tunnel *udp_port = &bp->udp_tunnel_ports[type];
 
-	if (!netif_running(bp->dev) || !IS_PF(bp))
+	if (!netif_running(bp->dev) || !IS_PF(bp) || CHIP_IS_E1x(bp))
 		return;
 
 	if (udp_port->count && udp_port->dst_port == port) {
@@ -10163,7 +10163,7 @@ static void __bnx2x_del_udp_port(struct bnx2x *bp, u16 port,
 {
 	struct bnx2x_udp_tunnel *udp_port = &bp->udp_tunnel_ports[type];
 
-	if (!IS_PF(bp))
+	if (!IS_PF(bp) || CHIP_IS_E1x(bp))
 		return;
 
 	if (!udp_port->count || udp_port->dst_port != port) {
@@ -12080,8 +12080,7 @@ static int bnx2x_get_hwinfo(struct bnx2x *bp)
 					   mtu_size, mtu);
 
 					/* if valid: update device mtu */
-					if (((mtu_size + ETH_HLEN) >=
-					     ETH_MIN_PACKET_SIZE) &&
+					if ((mtu_size >= ETH_MIN_PACKET_SIZE) &&
 					    (mtu_size <=
 					     ETH_MAX_JUMBO_PACKET_SIZE))
 						bp->dev->mtu = mtu_size;
@@ -13315,6 +13314,10 @@ static int bnx2x_init_dev(struct bnx2x *bp, struct pci_dev *pdev,
 	dev->dcbnl_ops = &bnx2x_dcbnl_ops;
 #endif
 
+	/* MTU range, 46 - 9600 */
+	dev->min_mtu = ETH_MIN_PACKET_SIZE;
+	dev->max_mtu = ETH_MAX_JUMBO_PACKET_SIZE;
+
 	/* get_port_hwinfo() will set prtad and mmds properly */
 	bp->mdio.prtad = MDIO_PRTAD_NONE;
 	bp->mdio.mmds = 0;
@@ -13505,6 +13508,7 @@ static int bnx2x_init_firmware(struct bnx2x *bp)
 
 	/* Initialize the pointers to the init arrays */
 	/* Blob */
+	rc = -ENOMEM;
 	BNX2X_ALLOC_AND_SET(init_data, request_firmware_exit, be32_to_cpu_n);
 
 	/* Opcodes */
diff --git a/drivers/net/ethernet/broadcom/bnxt/Makefile b/drivers/net/ethernet/broadcom/bnxt/Makefile
index 97e78e217928..6082ed1b5ea0 100644
--- a/drivers/net/ethernet/broadcom/bnxt/Makefile
+++ b/drivers/net/ethernet/broadcom/bnxt/Makefile
@@ -1,3 +1,3 @@
 obj-$(CONFIG_BNXT) += bnxt_en.o
 
-bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o
+bnxt_en-y := bnxt.o bnxt_sriov.o bnxt_ethtool.o bnxt_dcb.o bnxt_ulp.o
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index ee1a803aa11a..9608cb49a11c 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -52,8 +52,10 @@
 
 #include "bnxt_hsi.h"
 #include "bnxt.h"
+#include "bnxt_ulp.h"
 #include "bnxt_sriov.h"
 #include "bnxt_ethtool.h"
+#include "bnxt_dcb.h"
 
 #define BNXT_TX_TIMEOUT		(5 * HZ)
 
@@ -186,11 +188,11 @@ static const u16 bnxt_vf_req_snif[] = {
 };
 
 static const u16 bnxt_async_events_arr[] = {
-	HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE,
-	HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD,
-	HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED,
-	HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_CFG_CHANGE,
-	HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE,
+	ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE,
+	ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD,
+	ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED,
+	ASYNC_EVENT_CMPL_EVENT_ID_VF_CFG_CHANGE,
+	ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE,
 };
 
 static bool bnxt_vf_pciid(enum board_idx idx)
@@ -1476,8 +1478,8 @@ next_rx_no_prod:
 }
 
 #define BNXT_GET_EVENT_PORT(data)	\
-	((data) &				\
-	 HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_PORT_ID_MASK)
+	((data) &			\
+	 ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_PORT_ID_MASK)
 
 static int bnxt_async_event_process(struct bnxt *bp,
 				    struct hwrm_async_event_cmpl *cmpl)
@@ -1486,7 +1488,7 @@ static int bnxt_async_event_process(struct bnxt *bp,
 
 	/* TODO CHIMP_FW: Define event id's for link change, error etc */
 	switch (event_id) {
-	case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE: {
+	case ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE: {
 		u32 data1 = le32_to_cpu(cmpl->event_data1);
 		struct bnxt_link_info *link_info = &bp->link_info;
 
@@ -1499,15 +1501,16 @@ static int bnxt_async_event_process(struct bnxt *bp,
 			netdev_warn(bp->dev, "Link speed %d no longer supported\n",
 				    speed);
 		}
+		set_bit(BNXT_LINK_SPEED_CHNG_SP_EVENT, &bp->sp_event);
 		/* fall thru */
 	}
-	case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE:
+	case ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE:
 		set_bit(BNXT_LINK_CHNG_SP_EVENT, &bp->sp_event);
 		break;
-	case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD:
+	case ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD:
 		set_bit(BNXT_HWRM_PF_UNLOAD_SP_EVENT, &bp->sp_event);
 		break;
-	case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED: {
+	case ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED: {
 		u32 data1 = le32_to_cpu(cmpl->event_data1);
 		u16 port_id = BNXT_GET_EVENT_PORT(data1);
 
@@ -1520,18 +1523,17 @@ static int bnxt_async_event_process(struct bnxt *bp,
 		set_bit(BNXT_HWRM_PORT_MODULE_SP_EVENT, &bp->sp_event);
 		break;
 	}
-	case HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_CFG_CHANGE:
+	case ASYNC_EVENT_CMPL_EVENT_ID_VF_CFG_CHANGE:
 		if (BNXT_PF(bp))
 			goto async_event_process_exit;
 		set_bit(BNXT_RESET_TASK_SILENT_SP_EVENT, &bp->sp_event);
 		break;
 	default:
-		netdev_err(bp->dev, "unhandled ASYNC event (id 0x%x)\n",
-			   event_id);
 		goto async_event_process_exit;
 	}
 	schedule_work(&bp->sp_task);
 async_event_process_exit:
+	bnxt_ulp_async_events(bp, cmpl);
 	return 0;
 }
 
@@ -3115,27 +3117,46 @@ int hwrm_send_message_silent(struct bnxt *bp, void *msg, u32 msg_len,
 	return rc;
 }
 
-static int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp)
+int bnxt_hwrm_func_rgtr_async_events(struct bnxt *bp, unsigned long *bmap,
+				     int bmap_size)
 {
 	struct hwrm_func_drv_rgtr_input req = {0};
-	int i;
 	DECLARE_BITMAP(async_events_bmap, 256);
 	u32 *events = (u32 *)async_events_bmap;
+	int i;
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_DRV_RGTR, -1, -1);
 
 	req.enables =
-		cpu_to_le32(FUNC_DRV_RGTR_REQ_ENABLES_OS_TYPE |
-			    FUNC_DRV_RGTR_REQ_ENABLES_VER |
-			    FUNC_DRV_RGTR_REQ_ENABLES_ASYNC_EVENT_FWD);
+		cpu_to_le32(FUNC_DRV_RGTR_REQ_ENABLES_ASYNC_EVENT_FWD);
 
 	memset(async_events_bmap, 0, sizeof(async_events_bmap));
 	for (i = 0; i < ARRAY_SIZE(bnxt_async_events_arr); i++)
 		__set_bit(bnxt_async_events_arr[i], async_events_bmap);
 
+	if (bmap && bmap_size) {
+		for (i = 0; i < bmap_size; i++) {
+			if (test_bit(i, bmap))
+				__set_bit(i, async_events_bmap);
+		}
+	}
+
 	for (i = 0; i < 8; i++)
 		req.async_event_fwd[i] |= cpu_to_le32(events[i]);
 
+	return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+}
+
+static int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp)
+{
+	struct hwrm_func_drv_rgtr_input req = {0};
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_DRV_RGTR, -1, -1);
+
+	req.enables =
+		cpu_to_le32(FUNC_DRV_RGTR_REQ_ENABLES_OS_TYPE |
+			    FUNC_DRV_RGTR_REQ_ENABLES_VER);
+
 	req.os_type = cpu_to_le16(FUNC_DRV_RGTR_REQ_OS_TYPE_LINUX);
 	req.ver_maj = DRV_VER_MAJ;
 	req.ver_min = DRV_VER_MIN;
@@ -3144,6 +3165,7 @@ static int bnxt_hwrm_func_drv_rgtr(struct bnxt *bp)
 	if (BNXT_PF(bp)) {
 		DECLARE_BITMAP(vf_req_snif_bmap, 256);
 		u32 *data = (u32 *)vf_req_snif_bmap;
+		int i;
 
 		memset(vf_req_snif_bmap, 0, sizeof(vf_req_snif_bmap));
 		for (i = 0; i < ARRAY_SIZE(bnxt_vf_req_snif); i++)
@@ -3433,13 +3455,7 @@ static int bnxt_hwrm_vnic_set_rss(struct bnxt *bp, u16 vnic_id, bool set_rss)
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_RSS_CFG, -1, -1);
 	if (set_rss) {
-		vnic->hash_type = VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4 |
-				  VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4 |
-				  VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6 |
-				  VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6;
-
-		req.hash_type = cpu_to_le32(vnic->hash_type);
-
+		req.hash_type = cpu_to_le32(bp->rss_hash_cfg);
 		if (vnic->flags & BNXT_VNIC_RSS_FLAG) {
 			if (BNXT_CHIP_TYPE_NITRO_A0(bp))
 				max_rings = bp->rx_nr_rings - 1;
@@ -3531,7 +3547,7 @@ static int bnxt_hwrm_vnic_ctx_alloc(struct bnxt *bp, u16 vnic_id, u16 ctx_idx)
 	return rc;
 }
 
-static int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id)
+int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id)
 {
 	unsigned int ring = 0, grp_idx;
 	struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id];
@@ -3579,6 +3595,9 @@ static int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id)
 #endif
 	if ((bp->flags & BNXT_FLAG_STRIP_VLAN) || def_vlan)
 		req.flags |= cpu_to_le32(VNIC_CFG_REQ_FLAGS_VLAN_STRIP_MODE);
+	if (!vnic_id && bnxt_ulp_registered(bp->edev, BNXT_ROCE_ULP))
+		req.flags |=
+			cpu_to_le32(VNIC_CFG_REQ_FLAGS_ROCE_DUAL_VNIC_MODE);
 
 	return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 }
@@ -4120,7 +4139,7 @@ static int bnxt_hwrm_stat_ctx_alloc(struct bnxt *bp)
 		bp->grp_info[i].fw_stats_ctx = cpr->hw_stats_ctx_id;
 	}
 	mutex_unlock(&bp->hwrm_cmd_lock);
-	return 0;
+	return rc;
 }
 
 static int bnxt_hwrm_func_qcfg(struct bnxt *bp)
@@ -4156,7 +4175,7 @@ func_qcfg_exit:
 	return rc;
 }
 
-int bnxt_hwrm_func_qcaps(struct bnxt *bp)
+static int bnxt_hwrm_func_qcaps(struct bnxt *bp)
 {
 	int rc = 0;
 	struct hwrm_func_qcaps_input req = {0};
@@ -4170,6 +4189,11 @@ int bnxt_hwrm_func_qcaps(struct bnxt *bp)
 	if (rc)
 		goto hwrm_func_qcaps_exit;
 
+	if (resp->flags & cpu_to_le32(FUNC_QCAPS_RESP_FLAGS_ROCE_V1_SUPPORTED))
+		bp->flags |= BNXT_FLAG_ROCEV1_CAP;
+	if (resp->flags & cpu_to_le32(FUNC_QCAPS_RESP_FLAGS_ROCE_V2_SUPPORTED))
+		bp->flags |= BNXT_FLAG_ROCEV2_CAP;
+
 	bp->tx_push_thresh = 0;
 	if (resp->flags &
 	    cpu_to_le32(FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED))
@@ -4266,12 +4290,16 @@ static int bnxt_hwrm_queue_qportcfg(struct bnxt *bp)
 		goto qportcfg_exit;
 	}
 	bp->max_tc = resp->max_configurable_queues;
+	bp->max_lltc = resp->max_configurable_lossless_queues;
 	if (bp->max_tc > BNXT_MAX_QUEUE)
 		bp->max_tc = BNXT_MAX_QUEUE;
 
 	if (resp->queue_cfg_info & QUEUE_QPORTCFG_RESP_QUEUE_CFG_INFO_ASYM_CFG)
 		bp->max_tc = 1;
 
+	if (bp->max_lltc > bp->max_tc)
+		bp->max_lltc = bp->max_tc;
+
 	qptr = &resp->queue_id0;
 	for (i = 0; i < bp->max_tc; i++) {
 		bp->q_info[i].queue_id = *qptr++;
@@ -4743,16 +4771,134 @@ static int bnxt_trim_rings(struct bnxt *bp, int *rx, int *tx, int max,
 	return 0;
 }
 
-static int bnxt_setup_msix(struct bnxt *bp)
+static void bnxt_setup_msix(struct bnxt *bp)
 {
-	struct msix_entry *msix_ent;
+	const int len = sizeof(bp->irq_tbl[0].name);
 	struct net_device *dev = bp->dev;
-	int i, total_vecs, rc = 0, min = 1;
+	int tcs, i;
+
+	tcs = netdev_get_num_tc(dev);
+	if (tcs > 1) {
+		bp->tx_nr_rings_per_tc = bp->tx_nr_rings / tcs;
+		if (bp->tx_nr_rings_per_tc == 0) {
+			netdev_reset_tc(dev);
+			bp->tx_nr_rings_per_tc = bp->tx_nr_rings;
+		} else {
+			int i, off, count;
+
+			bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tcs;
+			for (i = 0; i < tcs; i++) {
+				count = bp->tx_nr_rings_per_tc;
+				off = i * count;
+				netdev_set_tc_queue(dev, i, count, off);
+			}
+		}
+	}
+
+	for (i = 0; i < bp->cp_nr_rings; i++) {
+		char *attr;
+
+		if (bp->flags & BNXT_FLAG_SHARED_RINGS)
+			attr = "TxRx";
+		else if (i < bp->rx_nr_rings)
+			attr = "rx";
+		else
+			attr = "tx";
+
+		snprintf(bp->irq_tbl[i].name, len, "%s-%s-%d", dev->name, attr,
+			 i);
+		bp->irq_tbl[i].handler = bnxt_msix;
+	}
+}
+
+static void bnxt_setup_inta(struct bnxt *bp)
+{
 	const int len = sizeof(bp->irq_tbl[0].name);
 
-	bp->flags &= ~BNXT_FLAG_USING_MSIX;
-	total_vecs = bp->cp_nr_rings;
+	if (netdev_get_num_tc(bp->dev))
+		netdev_reset_tc(bp->dev);
+
+	snprintf(bp->irq_tbl[0].name, len, "%s-%s-%d", bp->dev->name, "TxRx",
+		 0);
+	bp->irq_tbl[0].handler = bnxt_inta;
+}
 
+static int bnxt_setup_int_mode(struct bnxt *bp)
+{
+	int rc;
+
+	if (bp->flags & BNXT_FLAG_USING_MSIX)
+		bnxt_setup_msix(bp);
+	else
+		bnxt_setup_inta(bp);
+
+	rc = bnxt_set_real_num_queues(bp);
+	return rc;
+}
+
+unsigned int bnxt_get_max_func_stat_ctxs(struct bnxt *bp)
+{
+#if defined(CONFIG_BNXT_SRIOV)
+	if (BNXT_VF(bp))
+		return bp->vf.max_stat_ctxs;
+#endif
+	return bp->pf.max_stat_ctxs;
+}
+
+void bnxt_set_max_func_stat_ctxs(struct bnxt *bp, unsigned int max)
+{
+#if defined(CONFIG_BNXT_SRIOV)
+	if (BNXT_VF(bp))
+		bp->vf.max_stat_ctxs = max;
+	else
+#endif
+		bp->pf.max_stat_ctxs = max;
+}
+
+unsigned int bnxt_get_max_func_cp_rings(struct bnxt *bp)
+{
+#if defined(CONFIG_BNXT_SRIOV)
+	if (BNXT_VF(bp))
+		return bp->vf.max_cp_rings;
+#endif
+	return bp->pf.max_cp_rings;
+}
+
+void bnxt_set_max_func_cp_rings(struct bnxt *bp, unsigned int max)
+{
+#if defined(CONFIG_BNXT_SRIOV)
+	if (BNXT_VF(bp))
+		bp->vf.max_cp_rings = max;
+	else
+#endif
+		bp->pf.max_cp_rings = max;
+}
+
+static unsigned int bnxt_get_max_func_irqs(struct bnxt *bp)
+{
+#if defined(CONFIG_BNXT_SRIOV)
+	if (BNXT_VF(bp))
+		return bp->vf.max_irqs;
+#endif
+	return bp->pf.max_irqs;
+}
+
+void bnxt_set_max_func_irqs(struct bnxt *bp, unsigned int max_irqs)
+{
+#if defined(CONFIG_BNXT_SRIOV)
+	if (BNXT_VF(bp))
+		bp->vf.max_irqs = max_irqs;
+	else
+#endif
+		bp->pf.max_irqs = max_irqs;
+}
+
+static int bnxt_init_msix(struct bnxt *bp)
+{
+	int i, total_vecs, rc = 0, min = 1;
+	struct msix_entry *msix_ent;
+
+	total_vecs = bnxt_get_max_func_irqs(bp);
 	msix_ent = kcalloc(total_vecs, sizeof(struct msix_entry), GFP_KERNEL);
 	if (!msix_ent)
 		return -ENOMEM;
@@ -4773,8 +4919,10 @@ static int bnxt_setup_msix(struct bnxt *bp)
 
 	bp->irq_tbl = kcalloc(total_vecs, sizeof(struct bnxt_irq), GFP_KERNEL);
 	if (bp->irq_tbl) {
-		int tcs;
+		for (i = 0; i < total_vecs; i++)
+			bp->irq_tbl[i].vector = msix_ent[i].vector;
 
+		bp->total_irqs = total_vecs;
 		/* Trim rings based upon num of vectors allocated */
 		rc = bnxt_trim_rings(bp, &bp->rx_nr_rings, &bp->tx_nr_rings,
 				     total_vecs, min == 1);
@@ -4782,43 +4930,10 @@ static int bnxt_setup_msix(struct bnxt *bp)
 			goto msix_setup_exit;
 
 		bp->tx_nr_rings_per_tc = bp->tx_nr_rings;
-		tcs = netdev_get_num_tc(dev);
-		if (tcs > 1) {
-			bp->tx_nr_rings_per_tc = bp->tx_nr_rings / tcs;
-			if (bp->tx_nr_rings_per_tc == 0) {
-				netdev_reset_tc(dev);
-				bp->tx_nr_rings_per_tc = bp->tx_nr_rings;
-			} else {
-				int i, off, count;
-
-				bp->tx_nr_rings = bp->tx_nr_rings_per_tc * tcs;
-				for (i = 0; i < tcs; i++) {
-					count = bp->tx_nr_rings_per_tc;
-					off = i * count;
-					netdev_set_tc_queue(dev, i, count, off);
-				}
-			}
-		}
-		bp->cp_nr_rings = total_vecs;
-
-		for (i = 0; i < bp->cp_nr_rings; i++) {
-			char *attr;
+		bp->cp_nr_rings = (min == 1) ?
+				  max_t(int, bp->tx_nr_rings, bp->rx_nr_rings) :
+				  bp->tx_nr_rings + bp->rx_nr_rings;
 
-			bp->irq_tbl[i].vector = msix_ent[i].vector;
-			if (bp->flags & BNXT_FLAG_SHARED_RINGS)
-				attr = "TxRx";
-			else if (i < bp->rx_nr_rings)
-				attr = "rx";
-			else
-				attr = "tx";
-
-			snprintf(bp->irq_tbl[i].name, len,
-				 "%s-%s-%d", dev->name, attr, i);
-			bp->irq_tbl[i].handler = bnxt_msix;
-		}
-		rc = bnxt_set_real_num_queues(bp);
-		if (rc)
-			goto msix_setup_exit;
 	} else {
 		rc = -ENOMEM;
 		goto msix_setup_exit;
@@ -4828,52 +4943,54 @@ static int bnxt_setup_msix(struct bnxt *bp)
 	return 0;
 
 msix_setup_exit:
-	netdev_err(bp->dev, "bnxt_setup_msix err: %x\n", rc);
+	netdev_err(bp->dev, "bnxt_init_msix err: %x\n", rc);
+	kfree(bp->irq_tbl);
+	bp->irq_tbl = NULL;
 	pci_disable_msix(bp->pdev);
 	kfree(msix_ent);
 	return rc;
 }
 
-static int bnxt_setup_inta(struct bnxt *bp)
+static int bnxt_init_inta(struct bnxt *bp)
 {
-	int rc;
-	const int len = sizeof(bp->irq_tbl[0].name);
-
-	if (netdev_get_num_tc(bp->dev))
-		netdev_reset_tc(bp->dev);
-
 	bp->irq_tbl = kcalloc(1, sizeof(struct bnxt_irq), GFP_KERNEL);
-	if (!bp->irq_tbl) {
-		rc = -ENOMEM;
-		return rc;
-	}
+	if (!bp->irq_tbl)
+		return -ENOMEM;
+
+	bp->total_irqs = 1;
 	bp->rx_nr_rings = 1;
 	bp->tx_nr_rings = 1;
 	bp->cp_nr_rings = 1;
 	bp->tx_nr_rings_per_tc = bp->tx_nr_rings;
 	bp->flags |= BNXT_FLAG_SHARED_RINGS;
 	bp->irq_tbl[0].vector = bp->pdev->irq;
-	snprintf(bp->irq_tbl[0].name, len,
-		 "%s-%s-%d", bp->dev->name, "TxRx", 0);
-	bp->irq_tbl[0].handler = bnxt_inta;
-	rc = bnxt_set_real_num_queues(bp);
-	return rc;
+	return 0;
 }
 
-static int bnxt_setup_int_mode(struct bnxt *bp)
+static int bnxt_init_int_mode(struct bnxt *bp)
 {
 	int rc = 0;
 
 	if (bp->flags & BNXT_FLAG_MSIX_CAP)
-		rc = bnxt_setup_msix(bp);
+		rc = bnxt_init_msix(bp);
 
 	if (!(bp->flags & BNXT_FLAG_USING_MSIX) && BNXT_PF(bp)) {
 		/* fallback to INTA */
-		rc = bnxt_setup_inta(bp);
+		rc = bnxt_init_inta(bp);
 	}
 	return rc;
 }
 
+static void bnxt_clear_int_mode(struct bnxt *bp)
+{
+	if (bp->flags & BNXT_FLAG_USING_MSIX)
+		pci_disable_msix(bp->pdev);
+
+	kfree(bp->irq_tbl);
+	bp->irq_tbl = NULL;
+	bp->flags &= ~BNXT_FLAG_USING_MSIX;
+}
+
 static void bnxt_free_irq(struct bnxt *bp)
 {
 	struct bnxt_irq *irq;
@@ -4892,10 +5009,6 @@ static void bnxt_free_irq(struct bnxt *bp)
 			free_irq(irq->vector, bp->bnapi[i]);
 		irq->requested = 0;
 	}
-	if (bp->flags & BNXT_FLAG_USING_MSIX)
-		pci_disable_msix(bp->pdev);
-	kfree(bp->irq_tbl);
-	bp->irq_tbl = NULL;
 }
 
 static int bnxt_request_irq(struct bnxt *bp)
@@ -4967,7 +5080,6 @@ static void bnxt_init_napi(struct bnxt *bp)
 			bnapi = bp->bnapi[cp_nr_rings];
 			netif_napi_add(bp->dev, &bnapi->napi,
 				       bnxt_poll_nitroa0, 64);
-			napi_hash_add(&bnapi->napi);
 		}
 	} else {
 		bnapi = bp->bnapi[0];
@@ -4999,7 +5111,7 @@ static void bnxt_enable_napi(struct bnxt *bp)
 	}
 }
 
-static void bnxt_tx_disable(struct bnxt *bp)
+void bnxt_tx_disable(struct bnxt *bp)
 {
 	int i;
 	struct bnxt_tx_ring_info *txr;
@@ -5017,7 +5129,7 @@ static void bnxt_tx_disable(struct bnxt *bp)
 	netif_carrier_off(bp->dev);
 }
 
-static void bnxt_tx_enable(struct bnxt *bp)
+void bnxt_tx_enable(struct bnxt *bp)
 {
 	int i;
 	struct bnxt_tx_ring_info *txr;
@@ -5109,6 +5221,7 @@ static int bnxt_update_link(struct bnxt *bp, bool chng_link_state)
 	struct hwrm_port_phy_qcfg_input req = {0};
 	struct hwrm_port_phy_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
 	u8 link_up = link_info->link_up;
+	u16 diff;
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_QCFG, -1, -1);
 
@@ -5196,6 +5309,23 @@ static int bnxt_update_link(struct bnxt *bp, bool chng_link_state)
 		link_info->link_up = 0;
 	}
 	mutex_unlock(&bp->hwrm_cmd_lock);
+
+	diff = link_info->support_auto_speeds ^ link_info->advertising;
+	if ((link_info->support_auto_speeds | diff) !=
+	    link_info->support_auto_speeds) {
+		/* An advertised speed is no longer supported, so we need to
+		 * update the advertisement settings.  See bnxt_reset() for
+		 * comments about the rtnl_lock() sequence below.
+		 */
+		clear_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
+		rtnl_lock();
+		link_info->advertising = link_info->support_auto_speeds;
+		if (test_bit(BNXT_STATE_OPEN, &bp->state) &&
+		    (link_info->autoneg & BNXT_AUTONEG_SPEED))
+			bnxt_hwrm_set_link_setting(bp, true, false);
+		set_bit(BNXT_STATE_IN_SP_TASK, &bp->state);
+		rtnl_unlock();
+	}
 	return 0;
 }
 
@@ -5360,7 +5490,7 @@ static int bnxt_hwrm_shutdown_link(struct bnxt *bp)
 		return 0;
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_PORT_PHY_CFG, -1, -1);
-	req.flags = cpu_to_le32(PORT_PHY_CFG_REQ_FLAGS_FORCE_LINK_DOWN);
+	req.flags = cpu_to_le32(PORT_PHY_CFG_REQ_FLAGS_FORCE_LINK_DWN);
 	return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
 }
 
@@ -5423,6 +5553,12 @@ static int bnxt_update_phy_setting(struct bnxt *bp)
 			update_link = true;
 	}
 
+	/* The last close may have shutdown the link, so need to call
+	 * PHY_CFG to bring it back up.
+	 */
+	if (!netif_carrier_ok(bp->dev))
+		update_link = true;
+
 	if (!bnxt_eee_config_ok(bp))
 		update_eee = true;
 
@@ -5543,22 +5679,7 @@ int bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init)
 static int bnxt_open(struct net_device *dev)
 {
 	struct bnxt *bp = netdev_priv(dev);
-	int rc = 0;
 
-	if (!test_bit(BNXT_STATE_FN_RST_DONE, &bp->state)) {
-		rc = bnxt_hwrm_func_reset(bp);
-		if (rc) {
-			netdev_err(bp->dev, "hwrm chip reset failure rc: %x\n",
-				   rc);
-			rc = -EBUSY;
-			return rc;
-		}
-		/* Do func_reset during the 1st PF open only to prevent killing
-		 * the VFs when the PF is brought down and up.
-		 */
-		if (BNXT_PF(bp))
-			set_bit(BNXT_STATE_FN_RST_DONE, &bp->state);
-	}
 	return __bnxt_open_nic(bp, true, true);
 }
 
@@ -6116,6 +6237,10 @@ static void bnxt_sp_task(struct work_struct *work)
 	if (test_and_clear_bit(BNXT_RX_NTP_FLTR_SP_EVENT, &bp->sp_event))
 		bnxt_cfg_ntp_filters(bp);
 	if (test_and_clear_bit(BNXT_LINK_CHNG_SP_EVENT, &bp->sp_event)) {
+		if (test_and_clear_bit(BNXT_LINK_SPEED_CHNG_SP_EVENT,
+				       &bp->sp_event))
+			bnxt_hwrm_phy_qcaps(bp);
+
 		rc = bnxt_update_link(bp, true);
 		if (rc)
 			netdev_err(bp->dev, "SP task can't update link (rc: %x)\n",
@@ -6303,9 +6428,6 @@ static int bnxt_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct bnxt *bp = netdev_priv(dev);
 
-	if (new_mtu < 60 || new_mtu > 9500)
-		return -EINVAL;
-
 	if (netif_running(dev))
 		bnxt_close_nic(bp, false, false);
 
@@ -6318,17 +6440,10 @@ static int bnxt_change_mtu(struct net_device *dev, int new_mtu)
 	return 0;
 }
 
-static int bnxt_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
-			 struct tc_to_netdev *ntc)
+int bnxt_setup_mq_tc(struct net_device *dev, u8 tc)
 {
 	struct bnxt *bp = netdev_priv(dev);
 	bool sh = false;
-	u8 tc;
-
-	if (ntc->type != TC_SETUP_MQPRIO)
-		return -EINVAL;
-
-	tc = ntc->tc;
 
 	if (tc > bp->max_tc) {
 		netdev_err(dev, "too many traffic classes requested: %d Max supported is %d\n",
@@ -6371,6 +6486,15 @@ static int bnxt_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
 	return 0;
 }
 
+static int bnxt_setup_tc(struct net_device *dev, u32 handle, __be16 proto,
+			 struct tc_to_netdev *ntc)
+{
+	if (ntc->type != TC_SETUP_MQPRIO)
+		return -EINVAL;
+
+	return bnxt_setup_mq_tc(dev, ntc->tc);
+}
+
 #ifdef CONFIG_RFS_ACCEL
 static bool bnxt_fltr_match(struct bnxt_ntuple_filter *f1,
 			    struct bnxt_ntuple_filter *f2)
@@ -6659,11 +6783,15 @@ static void bnxt_remove_one(struct pci_dev *pdev)
 	cancel_work_sync(&bp->sp_task);
 	bp->sp_event = 0;
 
+	bnxt_clear_int_mode(bp);
 	bnxt_hwrm_func_drv_unrgtr(bp);
 	bnxt_free_hwrm_resources(bp);
+	bnxt_dcb_free(bp);
 	pci_iounmap(pdev, bp->bar2);
 	pci_iounmap(pdev, bp->bar1);
 	pci_iounmap(pdev, bp->bar0);
+	kfree(bp->edev);
+	bp->edev = NULL;
 	free_netdev(dev);
 
 	pci_release_regions(pdev);
@@ -6772,6 +6900,39 @@ int bnxt_get_max_rings(struct bnxt *bp, int *max_rx, int *max_tx, bool shared)
 	return bnxt_trim_rings(bp, max_rx, max_tx, cp, shared);
 }
 
+static int bnxt_get_dflt_rings(struct bnxt *bp, int *max_rx, int *max_tx,
+			       bool shared)
+{
+	int rc;
+
+	rc = bnxt_get_max_rings(bp, max_rx, max_tx, shared);
+	if (rc)
+		return rc;
+
+	if (bp->flags & BNXT_FLAG_ROCE_CAP) {
+		int max_cp, max_stat, max_irq;
+
+		/* Reserve minimum resources for RoCE */
+		max_cp = bnxt_get_max_func_cp_rings(bp);
+		max_stat = bnxt_get_max_func_stat_ctxs(bp);
+		max_irq = bnxt_get_max_func_irqs(bp);
+		if (max_cp <= BNXT_MIN_ROCE_CP_RINGS ||
+		    max_irq <= BNXT_MIN_ROCE_CP_RINGS ||
+		    max_stat <= BNXT_MIN_ROCE_STAT_CTXS)
+			return 0;
+
+		max_cp -= BNXT_MIN_ROCE_CP_RINGS;
+		max_irq -= BNXT_MIN_ROCE_CP_RINGS;
+		max_stat -= BNXT_MIN_ROCE_STAT_CTXS;
+		max_cp = min_t(int, max_cp, max_irq);
+		max_cp = min_t(int, max_cp, max_stat);
+		rc = bnxt_trim_rings(bp, max_rx, max_tx, max_cp, shared);
+		if (rc)
+			rc = 0;
+	}
+	return rc;
+}
+
 static int bnxt_set_dflt_rings(struct bnxt *bp)
 {
 	int dflt_rings, max_rx_rings, max_tx_rings, rc;
@@ -6780,7 +6941,7 @@ static int bnxt_set_dflt_rings(struct bnxt *bp)
 	if (sh)
 		bp->flags |= BNXT_FLAG_SHARED_RINGS;
 	dflt_rings = netif_get_num_default_rss_queues();
-	rc = bnxt_get_max_rings(bp, &max_rx_rings, &max_tx_rings, sh);
+	rc = bnxt_get_dflt_rings(bp, &max_rx_rings, &max_tx_rings, sh);
 	if (rc)
 		return rc;
 	bp->rx_nr_rings = min_t(int, dflt_rings, max_rx_rings);
@@ -6796,6 +6957,13 @@ static int bnxt_set_dflt_rings(struct bnxt *bp)
 	return rc;
 }
 
+void bnxt_restore_pf_fw_resources(struct bnxt *bp)
+{
+	ASSERT_RTNL();
+	bnxt_hwrm_func_qcaps(bp);
+	bnxt_subtract_ulp_resources(bp, BNXT_ROCE_ULP);
+}
+
 static void bnxt_parse_log_pcie_link(struct bnxt *bp)
 {
 	enum pcie_link_width width = PCIE_LNK_WIDTH_UNKNOWN;
@@ -6884,6 +7052,12 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	dev->features |= dev->hw_features | NETIF_F_HIGHDMA;
 	dev->priv_flags |= IFF_UNICAST_FLT;
 
+	/* MTU range: 60 - 9500 */
+	dev->min_mtu = ETH_ZLEN;
+	dev->max_mtu = 9500;
+
+	bnxt_dcb_init(bp);
+
 #ifdef CONFIG_BNXT_SRIOV
 	init_waitqueue_head(&bp->sriov_cfg_wait);
 #endif
@@ -6895,6 +7069,12 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (rc)
 		goto init_err;
 
+	rc = bnxt_hwrm_func_rgtr_async_events(bp, NULL, 0);
+	if (rc)
+		goto init_err;
+
+	bp->ulp_probe = bnxt_ulp_probe;
+
 	/* Get the MAX capabilities for this function */
 	rc = bnxt_hwrm_func_qcaps(bp);
 	if (rc) {
@@ -6916,14 +7096,22 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	bnxt_set_tpa_flags(bp);
 	bnxt_set_ring_params(bp);
-	if (BNXT_PF(bp))
-		bp->pf.max_irqs = max_irqs;
-#if defined(CONFIG_BNXT_SRIOV)
-	else
-		bp->vf.max_irqs = max_irqs;
-#endif
+	bnxt_set_max_func_irqs(bp, max_irqs);
 	bnxt_set_dflt_rings(bp);
 
+	/* Default RSS hash cfg. */
+	bp->rss_hash_cfg = VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4 |
+			   VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4 |
+			   VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6 |
+			   VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6;
+	if (!BNXT_CHIP_NUM_57X0X(bp->chip_num) &&
+	    !BNXT_CHIP_TYPE_NITRO_A0(bp) &&
+	    bp->hwrm_spec_code >= 0x10501) {
+		bp->flags |= BNXT_FLAG_UDP_RSS_CAP;
+		bp->rss_hash_cfg |= VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV4 |
+				    VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6;
+	}
+
 	if (BNXT_PF(bp) && !BNXT_CHIP_TYPE_NITRO_A0(bp)) {
 		dev->hw_features |= NETIF_F_NTUPLE;
 		if (bnxt_rfs_capable(bp)) {
@@ -6939,10 +7127,18 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (rc)
 		goto init_err;
 
-	rc = register_netdev(dev);
+	rc = bnxt_hwrm_func_reset(bp);
+	if (rc)
+		goto init_err;
+
+	rc = bnxt_init_int_mode(bp);
 	if (rc)
 		goto init_err;
 
+	rc = register_netdev(dev);
+	if (rc)
+		goto init_err_clr_int;
+
 	netdev_info(dev, "%s found at mem %lx, node addr %pM\n",
 		    board_info[ent->driver_data].name,
 		    (long)pci_resource_start(pdev, 0), dev->dev_addr);
@@ -6951,6 +7147,9 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	return 0;
 
+init_err_clr_int:
+	bnxt_clear_int_mode(bp);
+
 init_err:
 	pci_iounmap(pdev, bp->bar0);
 	pci_release_regions(pdev);
@@ -6980,6 +7179,8 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev,
 	rtnl_lock();
 	netif_device_detach(netdev);
 
+	bnxt_ulp_stop(bp);
+
 	if (state == pci_channel_io_perm_failure) {
 		rtnl_unlock();
 		return PCI_ERS_RESULT_DISCONNECT;
@@ -6988,8 +7189,6 @@ static pci_ers_result_t bnxt_io_error_detected(struct pci_dev *pdev,
 	if (netif_running(netdev))
 		bnxt_close(netdev);
 
-	/* So that func_reset will be done during slot_reset */
-	clear_bit(BNXT_STATE_FN_RST_DONE, &bp->state);
 	pci_disable_device(pdev);
 	rtnl_unlock();
 
@@ -7023,11 +7222,14 @@ static pci_ers_result_t bnxt_io_slot_reset(struct pci_dev *pdev)
 	} else {
 		pci_set_master(pdev);
 
-		if (netif_running(netdev))
+		err = bnxt_hwrm_func_reset(bp);
+		if (!err && netif_running(netdev))
 			err = bnxt_open(netdev);
 
-		if (!err)
+		if (!err) {
 			result = PCI_ERS_RESULT_RECOVERED;
+			bnxt_ulp_start(bp);
+		}
 	}
 
 	if (result != PCI_ERS_RESULT_RECOVERED && netif_running(netdev))
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 51b164a0e844..16defe9ececc 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -11,10 +11,10 @@
 #define BNXT_H
 
 #define DRV_MODULE_NAME		"bnxt_en"
-#define DRV_MODULE_VERSION	"1.5.0"
+#define DRV_MODULE_VERSION	"1.6.0"
 
 #define DRV_VER_MAJ	1
-#define DRV_VER_MIN	5
+#define DRV_VER_MIN	6
 #define DRV_VER_UPD	0
 
 struct tx_bd {
@@ -387,6 +387,9 @@ struct rx_tpa_end_cmp_ext {
 #define DB_KEY_TX_PUSH						(0x4 << 28)
 #define DB_LONG_TX_PUSH						(0x2 << 24)
 
+#define BNXT_MIN_ROCE_CP_RINGS	2
+#define BNXT_MIN_ROCE_STAT_CTXS	1
+
 #define INVALID_HW_RING_ID	((u16)-1)
 
 /* The hardware supports certain page sizes.  Use the supported page sizes
@@ -700,7 +703,6 @@ struct bnxt_vnic_info {
 	u8		*uc_list;
 
 	u16		*fw_grp_ids;
-	u16		hash_type;
 	dma_addr_t	rss_table_dma_addr;
 	__le16		*rss_table;
 	dma_addr_t	rss_hash_key_dma_addr;
@@ -952,7 +954,12 @@ struct bnxt {
 	#define BNXT_FLAG_RFS		0x100
 	#define BNXT_FLAG_SHARED_RINGS	0x200
 	#define BNXT_FLAG_PORT_STATS	0x400
+	#define BNXT_FLAG_UDP_RSS_CAP	0x800
 	#define BNXT_FLAG_EEE_CAP	0x1000
+	#define BNXT_FLAG_ROCEV1_CAP	0x8000
+	#define BNXT_FLAG_ROCEV2_CAP	0x10000
+	#define BNXT_FLAG_ROCE_CAP	(BNXT_FLAG_ROCEV1_CAP |	\
+					 BNXT_FLAG_ROCEV2_CAP)
 	#define BNXT_FLAG_CHIP_NITRO_A0	0x1000000
 
 	#define BNXT_FLAG_ALL_CONFIG_FEATS (BNXT_FLAG_TPA |		\
@@ -965,6 +972,9 @@ struct bnxt {
 #define BNXT_SINGLE_PF(bp)	(BNXT_PF(bp) && !BNXT_NPAR(bp))
 #define BNXT_CHIP_TYPE_NITRO_A0(bp) ((bp)->flags & BNXT_FLAG_CHIP_NITRO_A0)
 
+	struct bnxt_en_dev	*edev;
+	struct bnxt_en_dev *	(*ulp_probe)(struct net_device *);
+
 	struct bnxt_napi	**bnapi;
 
 	struct bnxt_rx_ring_info	*rx_ring;
@@ -1007,8 +1017,10 @@ struct bnxt {
 	struct bnxt_ring_grp_info	*grp_info;
 	struct bnxt_vnic_info	*vnic_info;
 	int			nr_vnics;
+	u32			rss_hash_cfg;
 
 	u8			max_tc;
+	u8			max_lltc;	/* lossless TCs */
 	struct bnxt_queue_info	q_info[BNXT_MAX_QUEUE];
 
 	unsigned int		current_interval;
@@ -1019,11 +1031,18 @@ struct bnxt {
 	unsigned long		state;
 #define BNXT_STATE_OPEN		0
 #define BNXT_STATE_IN_SP_TASK	1
-#define BNXT_STATE_FN_RST_DONE	2
 
 	struct bnxt_irq	*irq_tbl;
+	int			total_irqs;
 	u8			mac_addr[ETH_ALEN];
 
+#ifdef CONFIG_BNXT_DCB
+	struct ieee_pfc		*ieee_pfc;
+	struct ieee_ets		*ieee_ets;
+	u8			dcbx_cap;
+	u8			default_pri;
+#endif /* CONFIG_BNXT_DCB */
+
 	u32			msg_enable;
 
 	u32			hwrm_spec_code;
@@ -1089,6 +1108,7 @@ struct bnxt {
 #define BNXT_RESET_TASK_SILENT_SP_EVENT	11
 #define BNXT_GENEVE_ADD_PORT_SP_EVENT	12
 #define BNXT_GENEVE_DEL_PORT_SP_EVENT	13
+#define BNXT_LINK_SPEED_CHNG_SP_EVENT	14
 
 	struct bnxt_pf_info	pf;
 #ifdef CONFIG_BNXT_SRIOV
@@ -1114,6 +1134,13 @@ struct bnxt {
 	u32			lpi_tmr_hi;
 };
 
+#define BNXT_RX_STATS_OFFSET(counter)			\
+	(offsetof(struct rx_port_stats, counter) / 8)
+
+#define BNXT_TX_STATS_OFFSET(counter)			\
+	((offsetof(struct tx_port_stats, counter) +	\
+	  sizeof(struct rx_port_stats) + 512) / 8)
+
 #ifdef CONFIG_NET_RX_BUSY_POLL
 static inline void bnxt_enable_poll(struct bnxt_napi *bnapi)
 {
@@ -1216,12 +1243,23 @@ void bnxt_hwrm_cmd_hdr_init(struct bnxt *, void *, u16, u16, u16);
 int _hwrm_send_message(struct bnxt *, void *, u32, int);
 int hwrm_send_message(struct bnxt *, void *, u32, int);
 int hwrm_send_message_silent(struct bnxt *, void *, u32, int);
+int bnxt_hwrm_func_rgtr_async_events(struct bnxt *bp, unsigned long *bmap,
+				     int bmap_size);
+int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id);
 int bnxt_hwrm_set_coal(struct bnxt *);
-int bnxt_hwrm_func_qcaps(struct bnxt *);
+unsigned int bnxt_get_max_func_stat_ctxs(struct bnxt *bp);
+void bnxt_set_max_func_stat_ctxs(struct bnxt *bp, unsigned int max);
+unsigned int bnxt_get_max_func_cp_rings(struct bnxt *bp);
+void bnxt_set_max_func_cp_rings(struct bnxt *bp, unsigned int max);
+void bnxt_set_max_func_irqs(struct bnxt *bp, unsigned int max);
+void bnxt_tx_disable(struct bnxt *bp);
+void bnxt_tx_enable(struct bnxt *bp);
 int bnxt_hwrm_set_pause(struct bnxt *);
 int bnxt_hwrm_set_link_setting(struct bnxt *, bool, bool);
 int bnxt_hwrm_fw_set_time(struct bnxt *);
 int bnxt_open_nic(struct bnxt *, bool, bool);
 int bnxt_close_nic(struct bnxt *, bool, bool);
+int bnxt_setup_mq_tc(struct net_device *dev, u8 tc);
 int bnxt_get_max_rings(struct bnxt *, int *, int *, bool);
+void bnxt_restore_pf_fw_resources(struct bnxt *bp);
 #endif
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
new file mode 100644
index 000000000000..fdf2d8caf7bf
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.c
@@ -0,0 +1,502 @@
+/* Broadcom NetXtreme-C/E network driver.
+ *
+ * Copyright (c) 2014-2016 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/rtnetlink.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/etherdevice.h>
+#include "bnxt_hsi.h"
+#include "bnxt.h"
+#include "bnxt_dcb.h"
+
+#ifdef CONFIG_BNXT_DCB
+static int bnxt_hwrm_queue_pri2cos_cfg(struct bnxt *bp, struct ieee_ets *ets)
+{
+	struct hwrm_queue_pri2cos_cfg_input req = {0};
+	int rc = 0, i;
+	u8 *pri2cos;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_PRI2COS_CFG, -1, -1);
+	req.flags = cpu_to_le32(QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_BIDIR |
+				QUEUE_PRI2COS_CFG_REQ_FLAGS_IVLAN);
+
+	pri2cos = &req.pri0_cos_queue_id;
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+		req.enables |= cpu_to_le32(
+			QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI0_COS_QUEUE_ID << i);
+
+		pri2cos[i] = bp->q_info[ets->prio_tc[i]].queue_id;
+	}
+	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	return rc;
+}
+
+static int bnxt_hwrm_queue_pri2cos_qcfg(struct bnxt *bp, struct ieee_ets *ets)
+{
+	struct hwrm_queue_pri2cos_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
+	struct hwrm_queue_pri2cos_qcfg_input req = {0};
+	int rc = 0;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_PRI2COS_QCFG, -1, -1);
+	req.flags = cpu_to_le32(QUEUE_PRI2COS_QCFG_REQ_FLAGS_IVLAN);
+	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (!rc) {
+		u8 *pri2cos = &resp->pri0_cos_queue_id;
+		int i, j;
+
+		for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+			u8 queue_id = pri2cos[i];
+
+			for (j = 0; j < bp->max_tc; j++) {
+				if (bp->q_info[j].queue_id == queue_id) {
+					ets->prio_tc[i] = j;
+					break;
+				}
+			}
+		}
+	}
+	return rc;
+}
+
+static int bnxt_hwrm_queue_cos2bw_cfg(struct bnxt *bp, struct ieee_ets *ets,
+				      u8 max_tc)
+{
+	struct hwrm_queue_cos2bw_cfg_input req = {0};
+	struct bnxt_cos2bw_cfg cos2bw;
+	int rc = 0, i;
+	void *data;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_COS2BW_CFG, -1, -1);
+	data = &req.unused_0;
+	for (i = 0; i < max_tc; i++, data += sizeof(cos2bw) - 4) {
+		req.enables |= cpu_to_le32(
+			QUEUE_COS2BW_CFG_REQ_ENABLES_COS_QUEUE_ID0_VALID << i);
+
+		memset(&cos2bw, 0, sizeof(cos2bw));
+		cos2bw.queue_id = bp->q_info[i].queue_id;
+		if (ets->tc_tsa[i] == IEEE_8021QAZ_TSA_STRICT) {
+			cos2bw.tsa =
+				QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_TSA_ASSIGN_SP;
+			cos2bw.pri_lvl = i;
+		} else {
+			cos2bw.tsa =
+				QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_TSA_ASSIGN_ETS;
+			cos2bw.bw_weight = ets->tc_tx_bw[i];
+		}
+		memcpy(data, &cos2bw.queue_id, sizeof(cos2bw) - 4);
+		if (i == 0) {
+			req.queue_id0 = cos2bw.queue_id;
+			req.unused_0 = 0;
+		}
+	}
+	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	return rc;
+}
+
+static int bnxt_hwrm_queue_cos2bw_qcfg(struct bnxt *bp, struct ieee_ets *ets)
+{
+	struct hwrm_queue_cos2bw_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
+	struct hwrm_queue_cos2bw_qcfg_input req = {0};
+	struct bnxt_cos2bw_cfg cos2bw;
+	void *data;
+	int rc, i;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_COS2BW_QCFG, -1, -1);
+	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (rc)
+		return rc;
+
+	data = &resp->queue_id0 + offsetof(struct bnxt_cos2bw_cfg, queue_id);
+	for (i = 0; i < bp->max_tc; i++, data += sizeof(cos2bw) - 4) {
+		int j;
+
+		memcpy(&cos2bw.queue_id, data, sizeof(cos2bw) - 4);
+		if (i == 0)
+			cos2bw.queue_id = resp->queue_id0;
+
+		for (j = 0; j < bp->max_tc; j++) {
+			if (bp->q_info[j].queue_id != cos2bw.queue_id)
+				continue;
+			if (cos2bw.tsa ==
+			    QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_TSA_ASSIGN_SP) {
+				ets->tc_tsa[j] = IEEE_8021QAZ_TSA_STRICT;
+			} else {
+				ets->tc_tsa[j] = IEEE_8021QAZ_TSA_ETS;
+				ets->tc_tx_bw[j] = cos2bw.bw_weight;
+			}
+		}
+	}
+	return 0;
+}
+
+static int bnxt_hwrm_queue_cfg(struct bnxt *bp, unsigned int lltc_mask)
+{
+	struct hwrm_queue_cfg_input req = {0};
+	int i;
+
+	if (netif_running(bp->dev))
+		bnxt_tx_disable(bp);
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_CFG, -1, -1);
+	req.flags = cpu_to_le32(QUEUE_CFG_REQ_FLAGS_PATH_BIDIR);
+	req.enables = cpu_to_le32(QUEUE_CFG_REQ_ENABLES_SERVICE_PROFILE);
+
+	/* Configure lossless queues to lossy first */
+	req.service_profile = QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSY;
+	for (i = 0; i < bp->max_tc; i++) {
+		if (BNXT_LLQ(bp->q_info[i].queue_profile)) {
+			req.queue_id = cpu_to_le32(bp->q_info[i].queue_id);
+			hwrm_send_message(bp, &req, sizeof(req),
+					  HWRM_CMD_TIMEOUT);
+			bp->q_info[i].queue_profile =
+				QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSY;
+		}
+	}
+
+	/* Now configure desired queues to lossless */
+	req.service_profile = QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSLESS;
+	for (i = 0; i < bp->max_tc; i++) {
+		if (lltc_mask & (1 << i)) {
+			req.queue_id = cpu_to_le32(bp->q_info[i].queue_id);
+			hwrm_send_message(bp, &req, sizeof(req),
+					  HWRM_CMD_TIMEOUT);
+			bp->q_info[i].queue_profile =
+				QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSLESS;
+		}
+	}
+	if (netif_running(bp->dev))
+		bnxt_tx_enable(bp);
+
+	return 0;
+}
+
+static int bnxt_hwrm_queue_pfc_cfg(struct bnxt *bp, struct ieee_pfc *pfc)
+{
+	struct hwrm_queue_pfcenable_cfg_input req = {0};
+	struct ieee_ets *my_ets = bp->ieee_ets;
+	unsigned int tc_mask = 0, pri_mask = 0;
+	u8 i, pri, lltc_count = 0;
+	bool need_q_recfg = false;
+	int rc;
+
+	if (!my_ets)
+		return -EINVAL;
+
+	for (i = 0; i < bp->max_tc; i++) {
+		for (pri = 0; pri < IEEE_8021QAZ_MAX_TCS; pri++) {
+			if ((pfc->pfc_en & (1 << pri)) &&
+			    (my_ets->prio_tc[pri] == i)) {
+				pri_mask |= 1 << pri;
+				tc_mask |= 1 << i;
+			}
+		}
+		if (tc_mask & (1 << i))
+			lltc_count++;
+	}
+	if (lltc_count > bp->max_lltc)
+		return -EINVAL;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_PFCENABLE_CFG, -1, -1);
+	req.flags = cpu_to_le32(pri_mask);
+	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (rc)
+		return rc;
+
+	for (i = 0; i < bp->max_tc; i++) {
+		if (tc_mask & (1 << i)) {
+			if (!BNXT_LLQ(bp->q_info[i].queue_profile))
+				need_q_recfg = true;
+		}
+	}
+
+	if (need_q_recfg)
+		rc = bnxt_hwrm_queue_cfg(bp, tc_mask);
+
+	return rc;
+}
+
+static int bnxt_hwrm_queue_pfc_qcfg(struct bnxt *bp, struct ieee_pfc *pfc)
+{
+	struct hwrm_queue_pfcenable_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
+	struct hwrm_queue_pfcenable_qcfg_input req = {0};
+	u8 pri_mask;
+	int rc;
+
+	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_QUEUE_PFCENABLE_QCFG, -1, -1);
+	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (rc)
+		return rc;
+
+	pri_mask = le32_to_cpu(resp->flags);
+	pfc->pfc_en = pri_mask;
+	return 0;
+}
+
+static int bnxt_ets_validate(struct bnxt *bp, struct ieee_ets *ets, u8 *tc)
+{
+	int total_ets_bw = 0;
+	u8 max_tc = 0;
+	int i;
+
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++) {
+		if (ets->prio_tc[i] > bp->max_tc) {
+			netdev_err(bp->dev, "priority to TC mapping exceeds TC count %d\n",
+				   ets->prio_tc[i]);
+			return -EINVAL;
+		}
+		if (ets->prio_tc[i] > max_tc)
+			max_tc = ets->prio_tc[i];
+
+		if ((ets->tc_tx_bw[i] || ets->tc_tsa[i]) && i > bp->max_tc)
+			return -EINVAL;
+
+		switch (ets->tc_tsa[i]) {
+		case IEEE_8021QAZ_TSA_STRICT:
+			break;
+		case IEEE_8021QAZ_TSA_ETS:
+			total_ets_bw += ets->tc_tx_bw[i];
+			break;
+		default:
+			return -ENOTSUPP;
+		}
+	}
+	if (total_ets_bw > 100)
+		return -EINVAL;
+
+	*tc = max_tc + 1;
+	return 0;
+}
+
+static int bnxt_dcbnl_ieee_getets(struct net_device *dev, struct ieee_ets *ets)
+{
+	struct bnxt *bp = netdev_priv(dev);
+	struct ieee_ets *my_ets = bp->ieee_ets;
+
+	ets->ets_cap = bp->max_tc;
+
+	if (!my_ets) {
+		int rc;
+
+		if (bp->dcbx_cap & DCB_CAP_DCBX_HOST)
+			return 0;
+
+		my_ets = kzalloc(sizeof(*my_ets), GFP_KERNEL);
+		if (!my_ets)
+			return 0;
+		rc = bnxt_hwrm_queue_cos2bw_qcfg(bp, my_ets);
+		if (rc)
+			return 0;
+		rc = bnxt_hwrm_queue_pri2cos_qcfg(bp, my_ets);
+		if (rc)
+			return 0;
+	}
+
+	ets->cbs = my_ets->cbs;
+	memcpy(ets->tc_tx_bw, my_ets->tc_tx_bw, sizeof(ets->tc_tx_bw));
+	memcpy(ets->tc_rx_bw, my_ets->tc_rx_bw, sizeof(ets->tc_rx_bw));
+	memcpy(ets->tc_tsa, my_ets->tc_tsa, sizeof(ets->tc_tsa));
+	memcpy(ets->prio_tc, my_ets->prio_tc, sizeof(ets->prio_tc));
+	return 0;
+}
+
+static int bnxt_dcbnl_ieee_setets(struct net_device *dev, struct ieee_ets *ets)
+{
+	struct bnxt *bp = netdev_priv(dev);
+	struct ieee_ets *my_ets = bp->ieee_ets;
+	u8 max_tc = 0;
+	int rc, i;
+
+	if (!(bp->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) ||
+	    !(bp->dcbx_cap & DCB_CAP_DCBX_HOST))
+		return -EINVAL;
+
+	rc = bnxt_ets_validate(bp, ets, &max_tc);
+	if (!rc) {
+		if (!my_ets) {
+			my_ets = kzalloc(sizeof(*my_ets), GFP_KERNEL);
+			if (!my_ets)
+				return -ENOMEM;
+			/* initialize PRI2TC mappings to invalid value */
+			for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++)
+				my_ets->prio_tc[i] = IEEE_8021QAZ_MAX_TCS;
+			bp->ieee_ets = my_ets;
+		}
+		rc = bnxt_setup_mq_tc(dev, max_tc);
+		if (rc)
+			return rc;
+		rc = bnxt_hwrm_queue_cos2bw_cfg(bp, ets, max_tc);
+		if (rc)
+			return rc;
+		rc = bnxt_hwrm_queue_pri2cos_cfg(bp, ets);
+		if (rc)
+			return rc;
+		memcpy(my_ets, ets, sizeof(*my_ets));
+	}
+	return rc;
+}
+
+static int bnxt_dcbnl_ieee_getpfc(struct net_device *dev, struct ieee_pfc *pfc)
+{
+	struct bnxt *bp = netdev_priv(dev);
+	__le64 *stats = (__le64 *)bp->hw_rx_port_stats;
+	struct ieee_pfc *my_pfc = bp->ieee_pfc;
+	long rx_off, tx_off;
+	int i, rc;
+
+	pfc->pfc_cap = bp->max_lltc;
+
+	if (!my_pfc) {
+		if (bp->dcbx_cap & DCB_CAP_DCBX_HOST)
+			return 0;
+
+		my_pfc = kzalloc(sizeof(*my_pfc), GFP_KERNEL);
+		if (!my_pfc)
+			return 0;
+		bp->ieee_pfc = my_pfc;
+		rc = bnxt_hwrm_queue_pfc_qcfg(bp, my_pfc);
+		if (rc)
+			return 0;
+	}
+
+	pfc->pfc_en = my_pfc->pfc_en;
+	pfc->mbc = my_pfc->mbc;
+	pfc->delay = my_pfc->delay;
+
+	if (!stats)
+		return 0;
+
+	rx_off = BNXT_RX_STATS_OFFSET(rx_pfc_ena_frames_pri0);
+	tx_off = BNXT_TX_STATS_OFFSET(tx_pfc_ena_frames_pri0);
+	for (i = 0; i < IEEE_8021QAZ_MAX_TCS; i++, rx_off++, tx_off++) {
+		pfc->requests[i] = le64_to_cpu(*(stats + tx_off));
+		pfc->indications[i] = le64_to_cpu(*(stats + rx_off));
+	}
+
+	return 0;
+}
+
+static int bnxt_dcbnl_ieee_setpfc(struct net_device *dev, struct ieee_pfc *pfc)
+{
+	struct bnxt *bp = netdev_priv(dev);
+	struct ieee_pfc *my_pfc = bp->ieee_pfc;
+	int rc;
+
+	if (!(bp->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) ||
+	    !(bp->dcbx_cap & DCB_CAP_DCBX_HOST))
+		return -EINVAL;
+
+	if (!my_pfc) {
+		my_pfc = kzalloc(sizeof(*my_pfc), GFP_KERNEL);
+		if (!my_pfc)
+			return -ENOMEM;
+		bp->ieee_pfc = my_pfc;
+	}
+	rc = bnxt_hwrm_queue_pfc_cfg(bp, pfc);
+	if (!rc)
+		memcpy(my_pfc, pfc, sizeof(*my_pfc));
+
+	return rc;
+}
+
+static int bnxt_dcbnl_ieee_setapp(struct net_device *dev, struct dcb_app *app)
+{
+	struct bnxt *bp = netdev_priv(dev);
+	int rc = -EINVAL;
+
+	if (!(bp->dcbx_cap & DCB_CAP_DCBX_VER_IEEE) ||
+	    !(bp->dcbx_cap & DCB_CAP_DCBX_HOST))
+		return -EINVAL;
+
+	rc = dcb_ieee_setapp(dev, app);
+	return rc;
+}
+
+static int bnxt_dcbnl_ieee_delapp(struct net_device *dev, struct dcb_app *app)
+{
+	struct bnxt *bp = netdev_priv(dev);
+	int rc;
+
+	if (!(bp->dcbx_cap & DCB_CAP_DCBX_VER_IEEE))
+		return -EINVAL;
+
+	rc = dcb_ieee_delapp(dev, app);
+	return rc;
+}
+
+static u8 bnxt_dcbnl_getdcbx(struct net_device *dev)
+{
+	struct bnxt *bp = netdev_priv(dev);
+
+	return bp->dcbx_cap;
+}
+
+static u8 bnxt_dcbnl_setdcbx(struct net_device *dev, u8 mode)
+{
+	struct bnxt *bp = netdev_priv(dev);
+
+	/* only support IEEE */
+	if ((mode & DCB_CAP_DCBX_VER_CEE) || !(mode & DCB_CAP_DCBX_VER_IEEE))
+		return 1;
+
+	if ((mode & DCB_CAP_DCBX_HOST) && BNXT_VF(bp))
+		return 1;
+
+	if (mode == bp->dcbx_cap)
+		return 0;
+
+	bp->dcbx_cap = mode;
+	return 0;
+}
+
+static const struct dcbnl_rtnl_ops dcbnl_ops = {
+	.ieee_getets	= bnxt_dcbnl_ieee_getets,
+	.ieee_setets	= bnxt_dcbnl_ieee_setets,
+	.ieee_getpfc	= bnxt_dcbnl_ieee_getpfc,
+	.ieee_setpfc	= bnxt_dcbnl_ieee_setpfc,
+	.ieee_setapp	= bnxt_dcbnl_ieee_setapp,
+	.ieee_delapp	= bnxt_dcbnl_ieee_delapp,
+	.getdcbx	= bnxt_dcbnl_getdcbx,
+	.setdcbx	= bnxt_dcbnl_setdcbx,
+};
+
+void bnxt_dcb_init(struct bnxt *bp)
+{
+	if (bp->hwrm_spec_code < 0x10501)
+		return;
+
+	bp->dcbx_cap = DCB_CAP_DCBX_VER_IEEE;
+	if (BNXT_PF(bp))
+		bp->dcbx_cap |= DCB_CAP_DCBX_HOST;
+	else
+		bp->dcbx_cap |= DCB_CAP_DCBX_LLD_MANAGED;
+	bp->dev->dcbnl_ops = &dcbnl_ops;
+}
+
+void bnxt_dcb_free(struct bnxt *bp)
+{
+	kfree(bp->ieee_pfc);
+	kfree(bp->ieee_ets);
+	bp->ieee_pfc = NULL;
+	bp->ieee_ets = NULL;
+}
+
+#else
+
+void bnxt_dcb_init(struct bnxt *bp)
+{
+}
+
+void bnxt_dcb_free(struct bnxt *bp)
+{
+}
+
+#endif
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.h
new file mode 100644
index 000000000000..35a0d28cf2fd
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_dcb.h
@@ -0,0 +1,41 @@
+/* Broadcom NetXtreme-C/E network driver.
+ *
+ * Copyright (c) 2014-2016 Broadcom Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ */
+
+#ifndef BNXT_DCB_H
+#define BNXT_DCB_H
+
+#include <net/dcbnl.h>
+
+struct bnxt_dcb {
+	u8			max_tc;
+	struct ieee_pfc		*ieee_pfc;
+	struct ieee_ets		*ieee_ets;
+	u8			dcbx_cap;
+	u8			default_pri;
+};
+
+struct bnxt_cos2bw_cfg {
+	u8			pad[3];
+	u8			queue_id;
+	__le32			min_bw;
+	__le32			max_bw;
+	u8			tsa;
+	u8			pri_lvl;
+	u8			bw_weight;
+	u8			unused;
+};
+
+#define BNXT_LLQ(q_profile)	\
+	((q_profile) == QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSLESS)
+
+#define HWRM_STRUCT_DATA_SUBTYPE_HOST_OPERATIONAL	0x0300
+
+void bnxt_dcb_init(struct bnxt *bp);
+void bnxt_dcb_free(struct bnxt *bp);
+#endif
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index a7e04ff4eaed..784aa77610bc 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -107,16 +107,9 @@ static int bnxt_set_coalesce(struct net_device *dev,
 
 #define BNXT_NUM_STATS	21
 
-#define BNXT_RX_STATS_OFFSET(counter)	\
-	(offsetof(struct rx_port_stats, counter) / 8)
-
 #define BNXT_RX_STATS_ENTRY(counter)	\
 	{ BNXT_RX_STATS_OFFSET(counter), __stringify(counter) }
 
-#define BNXT_TX_STATS_OFFSET(counter)			\
-	((offsetof(struct tx_port_stats, counter) +	\
-	  sizeof(struct rx_port_stats) + 512) / 8)
-
 #define BNXT_TX_STATS_ENTRY(counter)	\
 	{ BNXT_TX_STATS_OFFSET(counter), __stringify(counter) }
 
@@ -150,6 +143,14 @@ static const struct {
 	BNXT_RX_STATS_ENTRY(rx_tagged_frames),
 	BNXT_RX_STATS_ENTRY(rx_double_tagged_frames),
 	BNXT_RX_STATS_ENTRY(rx_good_frames),
+	BNXT_RX_STATS_ENTRY(rx_pfc_ena_frames_pri0),
+	BNXT_RX_STATS_ENTRY(rx_pfc_ena_frames_pri1),
+	BNXT_RX_STATS_ENTRY(rx_pfc_ena_frames_pri2),
+	BNXT_RX_STATS_ENTRY(rx_pfc_ena_frames_pri3),
+	BNXT_RX_STATS_ENTRY(rx_pfc_ena_frames_pri4),
+	BNXT_RX_STATS_ENTRY(rx_pfc_ena_frames_pri5),
+	BNXT_RX_STATS_ENTRY(rx_pfc_ena_frames_pri6),
+	BNXT_RX_STATS_ENTRY(rx_pfc_ena_frames_pri7),
 	BNXT_RX_STATS_ENTRY(rx_undrsz_frames),
 	BNXT_RX_STATS_ENTRY(rx_eee_lpi_events),
 	BNXT_RX_STATS_ENTRY(rx_eee_lpi_duration),
@@ -179,6 +180,14 @@ static const struct {
 	BNXT_TX_STATS_ENTRY(tx_fcs_err_frames),
 	BNXT_TX_STATS_ENTRY(tx_err),
 	BNXT_TX_STATS_ENTRY(tx_fifo_underruns),
+	BNXT_TX_STATS_ENTRY(tx_pfc_ena_frames_pri0),
+	BNXT_TX_STATS_ENTRY(tx_pfc_ena_frames_pri1),
+	BNXT_TX_STATS_ENTRY(tx_pfc_ena_frames_pri2),
+	BNXT_TX_STATS_ENTRY(tx_pfc_ena_frames_pri3),
+	BNXT_TX_STATS_ENTRY(tx_pfc_ena_frames_pri4),
+	BNXT_TX_STATS_ENTRY(tx_pfc_ena_frames_pri5),
+	BNXT_TX_STATS_ENTRY(tx_pfc_ena_frames_pri6),
+	BNXT_TX_STATS_ENTRY(tx_pfc_ena_frames_pri7),
 	BNXT_TX_STATS_ENTRY(tx_eee_lpi_events),
 	BNXT_TX_STATS_ENTRY(tx_eee_lpi_duration),
 	BNXT_TX_STATS_ENTRY(tx_total_collisions),
@@ -542,6 +551,146 @@ fltr_err:
 
 	return rc;
 }
+#endif
+
+static u64 get_ethtool_ipv4_rss(struct bnxt *bp)
+{
+	if (bp->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4)
+		return RXH_IP_SRC | RXH_IP_DST;
+	return 0;
+}
+
+static u64 get_ethtool_ipv6_rss(struct bnxt *bp)
+{
+	if (bp->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6)
+		return RXH_IP_SRC | RXH_IP_DST;
+	return 0;
+}
+
+static int bnxt_grxfh(struct bnxt *bp, struct ethtool_rxnfc *cmd)
+{
+	cmd->data = 0;
+	switch (cmd->flow_type) {
+	case TCP_V4_FLOW:
+		if (bp->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4)
+			cmd->data |= RXH_IP_SRC | RXH_IP_DST |
+				     RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		cmd->data |= get_ethtool_ipv4_rss(bp);
+		break;
+	case UDP_V4_FLOW:
+		if (bp->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV4)
+			cmd->data |= RXH_IP_SRC | RXH_IP_DST |
+				     RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		/* fall through */
+	case SCTP_V4_FLOW:
+	case AH_ESP_V4_FLOW:
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+	case IPV4_FLOW:
+		cmd->data |= get_ethtool_ipv4_rss(bp);
+		break;
+
+	case TCP_V6_FLOW:
+		if (bp->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6)
+			cmd->data |= RXH_IP_SRC | RXH_IP_DST |
+				     RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		cmd->data |= get_ethtool_ipv6_rss(bp);
+		break;
+	case UDP_V6_FLOW:
+		if (bp->rss_hash_cfg & VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6)
+			cmd->data |= RXH_IP_SRC | RXH_IP_DST |
+				     RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		/* fall through */
+	case SCTP_V6_FLOW:
+	case AH_ESP_V6_FLOW:
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+	case IPV6_FLOW:
+		cmd->data |= get_ethtool_ipv6_rss(bp);
+		break;
+	}
+	return 0;
+}
+
+#define RXH_4TUPLE (RXH_IP_SRC | RXH_IP_DST | RXH_L4_B_0_1 | RXH_L4_B_2_3)
+#define RXH_2TUPLE (RXH_IP_SRC | RXH_IP_DST)
+
+static int bnxt_srxfh(struct bnxt *bp, struct ethtool_rxnfc *cmd)
+{
+	u32 rss_hash_cfg = bp->rss_hash_cfg;
+	int tuple, rc = 0;
+
+	if (cmd->data == RXH_4TUPLE)
+		tuple = 4;
+	else if (cmd->data == RXH_2TUPLE)
+		tuple = 2;
+	else if (!cmd->data)
+		tuple = 0;
+	else
+		return -EINVAL;
+
+	if (cmd->flow_type == TCP_V4_FLOW) {
+		rss_hash_cfg &= ~VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4;
+		if (tuple == 4)
+			rss_hash_cfg |= VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4;
+	} else if (cmd->flow_type == UDP_V4_FLOW) {
+		if (tuple == 4 && !(bp->flags & BNXT_FLAG_UDP_RSS_CAP))
+			return -EINVAL;
+		rss_hash_cfg &= ~VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV4;
+		if (tuple == 4)
+			rss_hash_cfg |= VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV4;
+	} else if (cmd->flow_type == TCP_V6_FLOW) {
+		rss_hash_cfg &= ~VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6;
+		if (tuple == 4)
+			rss_hash_cfg |= VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6;
+	} else if (cmd->flow_type == UDP_V6_FLOW) {
+		if (tuple == 4 && !(bp->flags & BNXT_FLAG_UDP_RSS_CAP))
+			return -EINVAL;
+		rss_hash_cfg &= ~VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6;
+		if (tuple == 4)
+			rss_hash_cfg |= VNIC_RSS_CFG_REQ_HASH_TYPE_UDP_IPV6;
+	} else if (tuple == 4) {
+		return -EINVAL;
+	}
+
+	switch (cmd->flow_type) {
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW:
+	case SCTP_V4_FLOW:
+	case AH_ESP_V4_FLOW:
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+	case IPV4_FLOW:
+		if (tuple == 2)
+			rss_hash_cfg |= VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4;
+		else if (!tuple)
+			rss_hash_cfg &= ~VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4;
+		break;
+
+	case TCP_V6_FLOW:
+	case UDP_V6_FLOW:
+	case SCTP_V6_FLOW:
+	case AH_ESP_V6_FLOW:
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+	case IPV6_FLOW:
+		if (tuple == 2)
+			rss_hash_cfg |= VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6;
+		else if (!tuple)
+			rss_hash_cfg &= ~VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6;
+		break;
+	}
+
+	if (bp->rss_hash_cfg == rss_hash_cfg)
+		return 0;
+
+	bp->rss_hash_cfg = rss_hash_cfg;
+	if (netif_running(bp->dev)) {
+		bnxt_close_nic(bp, false, false);
+		rc = bnxt_open_nic(bp, false, false);
+	}
+	return rc;
+}
 
 static int bnxt_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
 			  u32 *rule_locs)
@@ -550,6 +699,7 @@ static int bnxt_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
 	int rc = 0;
 
 	switch (cmd->cmd) {
+#ifdef CONFIG_RFS_ACCEL
 	case ETHTOOL_GRXRINGS:
 		cmd->data = bp->rx_nr_rings;
 		break;
@@ -566,6 +716,11 @@ static int bnxt_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
 	case ETHTOOL_GRXCLSRULE:
 		rc = bnxt_grxclsrule(bp, cmd);
 		break;
+#endif
+
+	case ETHTOOL_GRXFH:
+		rc = bnxt_grxfh(bp, cmd);
+		break;
 
 	default:
 		rc = -EOPNOTSUPP;
@@ -574,7 +729,23 @@ static int bnxt_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
 
 	return rc;
 }
-#endif
+
+static int bnxt_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+	struct bnxt *bp = netdev_priv(dev);
+	int rc;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_SRXFH:
+		rc = bnxt_srxfh(bp, cmd);
+		break;
+
+	default:
+		rc = -EOPNOTSUPP;
+		break;
+	}
+	return rc;
+}
 
 static u32 bnxt_get_rxfh_indir_size(struct net_device *dev)
 {
@@ -1885,9 +2056,8 @@ const struct ethtool_ops bnxt_ethtool_ops = {
 	.get_ringparam		= bnxt_get_ringparam,
 	.get_channels		= bnxt_get_channels,
 	.set_channels		= bnxt_set_channels,
-#ifdef CONFIG_RFS_ACCEL
 	.get_rxnfc		= bnxt_get_rxnfc,
-#endif
+	.set_rxnfc		= bnxt_set_rxnfc,
 	.get_rxfh_indir_size    = bnxt_get_rxfh_indir_size,
 	.get_rxfh_key_size      = bnxt_get_rxfh_key_size,
 	.get_rxfh               = bnxt_get_rxfh,
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
index 04a96cc3498a..2ddfa51519a1 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
@@ -1,6 +1,7 @@
 /* Broadcom NetXtreme-C/E network driver.
  *
  * Copyright (c) 2014-2016 Broadcom Corporation
+ * Copyright (c) 2016 Broadcom Limited
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -10,29 +11,22 @@
 #ifndef BNXT_HSI_H
 #define BNXT_HSI_H
 
-/* per-context HW statistics -- chip view */
-struct ctx_hw_stats  {
-	__le64 rx_ucast_pkts;
-	__le64 rx_mcast_pkts;
-	__le64 rx_bcast_pkts;
-	__le64 rx_discard_pkts;
-	__le64 rx_drop_pkts;
-	__le64 rx_ucast_bytes;
-	__le64 rx_mcast_bytes;
-	__le64 rx_bcast_bytes;
-	__le64 tx_ucast_pkts;
-	__le64 tx_mcast_pkts;
-	__le64 tx_bcast_pkts;
-	__le64 tx_discard_pkts;
-	__le64 tx_drop_pkts;
-	__le64 tx_ucast_bytes;
-	__le64 tx_mcast_bytes;
-	__le64 tx_bcast_bytes;
-	__le64 tpa_pkts;
-	__le64 tpa_bytes;
-	__le64 tpa_events;
-	__le64 tpa_aborts;
-};
+/* HSI and HWRM Specification 1.6.0 */
+#define HWRM_VERSION_MAJOR	1
+#define HWRM_VERSION_MINOR	6
+#define HWRM_VERSION_UPDATE	0
+
+#define HWRM_VERSION_STR	"1.6.0"
+/*
+ * Following is the signature for HWRM message field that indicates not
+ * applicable (All F's). Need to cast it the size of the field if needed.
+ */
+#define HWRM_NA_SIGNATURE	((__le32)(-1))
+#define HWRM_MAX_REQ_LEN    (128)  /* hwrm_func_buf_rgtr */
+#define HWRM_MAX_RESP_LEN    (176)  /* hwrm_func_qstats */
+#define HW_HASH_INDEX_SIZE      0x80    /* 7 bit indirection table index. */
+#define HW_HASH_KEY_SIZE	40
+#define HWRM_RESP_VALID_KEY      1 /* valid key for HWRM response */
 
 /* Statistics Ejection Buffer Completion Record (16 bytes) */
 struct eject_cmpl {
@@ -50,77 +44,77 @@ struct eject_cmpl {
 /* HWRM Completion Record (16 bytes) */
 struct hwrm_cmpl {
 	__le16 type;
-	#define HWRM_CMPL_TYPE_MASK				    0x3fUL
-	#define HWRM_CMPL_TYPE_SFT				    0
-	#define HWRM_CMPL_TYPE_HWRM_DONE			   0x20UL
+	#define CMPL_TYPE_MASK					    0x3fUL
+	#define CMPL_TYPE_SFT					    0
+	#define CMPL_TYPE_HWRM_DONE				   0x20UL
 	__le16 sequence_id;
 	__le32 unused_1;
 	__le32 v;
-	#define HWRM_CMPL_V					    0x1UL
+	#define CMPL_V						    0x1UL
 	__le32 unused_3;
 };
 
 /* HWRM Forwarded Request (16 bytes) */
 struct hwrm_fwd_req_cmpl {
 	__le16 req_len_type;
-	#define HWRM_FWD_REQ_CMPL_TYPE_MASK			    0x3fUL
-	#define HWRM_FWD_REQ_CMPL_TYPE_SFT			    0
-	#define HWRM_FWD_REQ_CMPL_TYPE_HWRM_FWD_REQ		   0x22UL
-	#define HWRM_FWD_REQ_CMPL_REQ_LEN_MASK			    0xffc0UL
-	#define HWRM_FWD_REQ_CMPL_REQ_LEN_SFT			    6
+	#define FWD_REQ_CMPL_TYPE_MASK				    0x3fUL
+	#define FWD_REQ_CMPL_TYPE_SFT				    0
+	#define FWD_REQ_CMPL_TYPE_HWRM_FWD_REQ			   0x22UL
+	#define FWD_REQ_CMPL_REQ_LEN_MASK			    0xffc0UL
+	#define FWD_REQ_CMPL_REQ_LEN_SFT			    6
 	__le16 source_id;
 	__le32 unused_0;
 	__le32 req_buf_addr_v[2];
-	#define HWRM_FWD_REQ_CMPL_V				    0x1UL
-	#define HWRM_FWD_REQ_CMPL_REQ_BUF_ADDR_MASK		    0xfffffffeUL
-	#define HWRM_FWD_REQ_CMPL_REQ_BUF_ADDR_SFT		    1
+	#define FWD_REQ_CMPL_V					    0x1UL
+	#define FWD_REQ_CMPL_REQ_BUF_ADDR_MASK			    0xfffffffeUL
+	#define FWD_REQ_CMPL_REQ_BUF_ADDR_SFT			    1
 };
 
 /* HWRM Forwarded Response (16 bytes) */
 struct hwrm_fwd_resp_cmpl {
 	__le16 type;
-	#define HWRM_FWD_RESP_CMPL_TYPE_MASK			    0x3fUL
-	#define HWRM_FWD_RESP_CMPL_TYPE_SFT			    0
-	#define HWRM_FWD_RESP_CMPL_TYPE_HWRM_FWD_RESP		   0x24UL
+	#define FWD_RESP_CMPL_TYPE_MASK			    0x3fUL
+	#define FWD_RESP_CMPL_TYPE_SFT				    0
+	#define FWD_RESP_CMPL_TYPE_HWRM_FWD_RESP		   0x24UL
 	__le16 source_id;
 	__le16 resp_len;
 	__le16 unused_1;
 	__le32 resp_buf_addr_v[2];
-	#define HWRM_FWD_RESP_CMPL_V				    0x1UL
-	#define HWRM_FWD_RESP_CMPL_RESP_BUF_ADDR_MASK		    0xfffffffeUL
-	#define HWRM_FWD_RESP_CMPL_RESP_BUF_ADDR_SFT		    1
+	#define FWD_RESP_CMPL_V				    0x1UL
+	#define FWD_RESP_CMPL_RESP_BUF_ADDR_MASK		    0xfffffffeUL
+	#define FWD_RESP_CMPL_RESP_BUF_ADDR_SFT		    1
 };
 
 /* HWRM Asynchronous Event Completion Record (16 bytes) */
 struct hwrm_async_event_cmpl {
 	__le16 type;
-	#define HWRM_ASYNC_EVENT_CMPL_TYPE_MASK		    0x3fUL
-	#define HWRM_ASYNC_EVENT_CMPL_TYPE_SFT			    0
-	#define HWRM_ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT       0x2eUL
+	#define ASYNC_EVENT_CMPL_TYPE_MASK			    0x3fUL
+	#define ASYNC_EVENT_CMPL_TYPE_SFT			    0
+	#define ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT		   0x2eUL
 	__le16 event_id;
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE 0x0UL
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_MTU_CHANGE    0x1UL
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CHANGE  0x2UL
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE  0x3UL
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED 0x4UL
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_NOT_ALLOWED 0x5UL
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE 0x6UL
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_PHY_CFG_CHANGE 0x7UL
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_UNLOAD   0x10UL
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_LOAD     0x11UL
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_FLR_PROC_CMPLT 0x12UL
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD     0x20UL
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_LOAD       0x21UL
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_FLR		   0x30UL
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_MAC_ADDR_CHANGE 0x31UL
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_VF_COMM_STATUS_CHANGE 0x32UL
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_CFG_CHANGE      0x33UL
-	#define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR	   0xffUL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE      0x0UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_LINK_MTU_CHANGE	   0x1UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CHANGE       0x2UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE       0x3UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED   0x4UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_NOT_ALLOWED 0x5UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE   0x6UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_PORT_PHY_CFG_CHANGE     0x7UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_UNLOAD	   0x10UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_LOAD	   0x11UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_FUNC_FLR_PROC_CMPLT     0x12UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD	   0x20UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_LOAD		   0x21UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_VF_FLR		   0x30UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_VF_MAC_ADDR_CHANGE      0x31UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_PF_VF_COMM_STATUS_CHANGE 0x32UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_VF_CFG_CHANGE	   0x33UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR		   0xffUL
 	__le32 event_data2;
 	u8 opaque_v;
-	#define HWRM_ASYNC_EVENT_CMPL_V			    0x1UL
-	#define HWRM_ASYNC_EVENT_CMPL_OPAQUE_MASK		    0xfeUL
-	#define HWRM_ASYNC_EVENT_CMPL_OPAQUE_SFT		    1
+	#define ASYNC_EVENT_CMPL_V				    0x1UL
+	#define ASYNC_EVENT_CMPL_OPAQUE_MASK			    0xfeUL
+	#define ASYNC_EVENT_CMPL_OPAQUE_SFT			    1
 	u8 timestamp_lo;
 	__le16 timestamp_hi;
 	__le32 event_data1;
@@ -129,670 +123,391 @@ struct hwrm_async_event_cmpl {
 /* HWRM Asynchronous Event Completion Record for link status change (16 bytes) */
 struct hwrm_async_event_cmpl_link_status_change {
 	__le16 type;
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_TYPE_MASK 0x3fUL
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_TYPE_SFT  0
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL
+	#define ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_TYPE_MASK      0x3fUL
+	#define ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_TYPE_SFT       0
+	#define ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL
 	__le16 event_id;
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_ID_LINK_STATUS_CHANGE 0x0UL
+	#define ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_ID_LINK_STATUS_CHANGE 0x0UL
 	__le32 event_data2;
 	u8 opaque_v;
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_V	    0x1UL
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_OPAQUE_MASK 0xfeUL
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_OPAQUE_SFT 1
+	#define ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_V		    0x1UL
+	#define ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_OPAQUE_MASK    0xfeUL
+	#define ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_OPAQUE_SFT     1
 	u8 timestamp_lo;
 	__le16 timestamp_hi;
 	__le32 event_data1;
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_DATA1_LINK_CHANGE 0x1UL
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_DATA1_LINK_CHANGE_DOWN (0x0UL << 0)
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_DATA1_LINK_CHANGE_UP (0x1UL << 0)
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_DATA1_LINK_CHANGE_LAST    HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_DATA1_LINK_CHANGE_UP
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_DATA1_PORT_MASK 0xeUL
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_DATA1_PORT_SFT 1
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_DATA1_PORT_ID_MASK 0xffff0UL
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_DATA1_PORT_ID_SFT 4
+	#define ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_DATA1_LINK_CHANGE 0x1UL
+	#define ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_DATA1_LINK_CHANGE_DOWN (0x0UL << 0)
+	#define ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_DATA1_LINK_CHANGE_UP (0x1UL << 0)
+	#define ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_DATA1_LINK_CHANGE_LAST    ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_DATA1_LINK_CHANGE_UP
+	#define ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_DATA1_PORT_MASK 0xeUL
+	#define ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_DATA1_PORT_SFT 1
+	#define ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_DATA1_PORT_ID_MASK 0xffff0UL
+	#define ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_DATA1_PORT_ID_SFT 4
 };
 
 /* HWRM Asynchronous Event Completion Record for link MTU change (16 bytes) */
 struct hwrm_async_event_cmpl_link_mtu_change {
 	__le16 type;
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_TYPE_MASK    0x3fUL
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_TYPE_SFT     0
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL
+	#define ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_TYPE_MASK	    0x3fUL
+	#define ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_TYPE_SFT	    0
+	#define ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL
 	__le16 event_id;
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_EVENT_ID_LINK_MTU_CHANGE 0x1UL
+	#define ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_EVENT_ID_LINK_MTU_CHANGE 0x1UL
 	__le32 event_data2;
 	u8 opaque_v;
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_V	    0x1UL
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_OPAQUE_MASK  0xfeUL
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_OPAQUE_SFT   1
+	#define ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_V		    0x1UL
+	#define ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_OPAQUE_MASK       0xfeUL
+	#define ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_OPAQUE_SFT	    1
 	u8 timestamp_lo;
 	__le16 timestamp_hi;
 	__le32 event_data1;
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_EVENT_DATA1_NEW_MTU_MASK 0xffffUL
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_EVENT_DATA1_NEW_MTU_SFT 0
+	#define ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_EVENT_DATA1_NEW_MTU_MASK 0xffffUL
+	#define ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_EVENT_DATA1_NEW_MTU_SFT 0
 };
 
 /* HWRM Asynchronous Event Completion Record for link speed change (16 bytes) */
 struct hwrm_async_event_cmpl_link_speed_change {
 	__le16 type;
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_TYPE_MASK  0x3fUL
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_TYPE_SFT   0
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_TYPE_MASK       0x3fUL
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_TYPE_SFT	    0
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL
 	__le16 event_id;
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_ID_LINK_SPEED_CHANGE 0x2UL
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_ID_LINK_SPEED_CHANGE 0x2UL
 	__le32 event_data2;
 	u8 opaque_v;
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_V	    0x1UL
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_OPAQUE_MASK 0xfeUL
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_OPAQUE_SFT 1
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_V		    0x1UL
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_OPAQUE_MASK     0xfeUL
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_OPAQUE_SFT      1
 	u8 timestamp_lo;
 	__le16 timestamp_hi;
 	__le32 event_data1;
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_FORCE 0x1UL
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_MASK 0xfffeUL
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_SFT 1
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_100MB (0x1UL << 1)
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_1GB (0xaUL << 1)
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_2GB (0x14UL << 1)
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_2_5GB (0x19UL << 1)
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_10GB (0x64UL << 1)
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_20GB (0xc8UL << 1)
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_25GB (0xfaUL << 1)
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_40GB (0x190UL << 1)
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_50GB (0x1f4UL << 1)
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_100GB (0x3e8UL << 1)
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_LAST    HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_100GB
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_PORT_ID_MASK 0xffff0000UL
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_PORT_ID_SFT 16
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_FORCE 0x1UL
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_MASK 0xfffeUL
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_SFT 1
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_100MB (0x1UL << 1)
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_1GB (0xaUL << 1)
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_2GB (0x14UL << 1)
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_2_5GB (0x19UL << 1)
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_10GB (0x64UL << 1)
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_20GB (0xc8UL << 1)
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_25GB (0xfaUL << 1)
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_40GB (0x190UL << 1)
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_50GB (0x1f4UL << 1)
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_100GB (0x3e8UL << 1)
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_LAST    ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_100GB
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_PORT_ID_MASK 0xffff0000UL
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_PORT_ID_SFT 16
 };
 
 /* HWRM Asynchronous Event Completion Record for DCB Config change (16 bytes) */
 struct hwrm_async_event_cmpl_dcb_config_change {
 	__le16 type;
-	#define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_TYPE_MASK  0x3fUL
-	#define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_TYPE_SFT   0
-	#define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL
+	#define ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_TYPE_MASK       0x3fUL
+	#define ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_TYPE_SFT	    0
+	#define ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL
 	__le16 event_id;
-	#define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_ID_DCB_CONFIG_CHANGE 0x3UL
+	#define ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_ID_DCB_CONFIG_CHANGE 0x3UL
 	__le32 event_data2;
+	#define ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_DATA2_ETS 0x1UL
+	#define ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_DATA2_PFC 0x2UL
+	#define ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_DATA2_APP 0x4UL
 	u8 opaque_v;
-	#define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_V	    0x1UL
-	#define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_OPAQUE_MASK 0xfeUL
-	#define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_OPAQUE_SFT 1
+	#define ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_V		    0x1UL
+	#define ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_OPAQUE_MASK     0xfeUL
+	#define ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_OPAQUE_SFT      1
 	u8 timestamp_lo;
 	__le16 timestamp_hi;
 	__le32 event_data1;
-	#define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_DATA1_PORT_ID_MASK 0xffffUL
-	#define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_DATA1_PORT_ID_SFT 0
+	#define ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_DATA1_PORT_ID_MASK 0xffffUL
+	#define ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_DATA1_PORT_ID_SFT 0
+	#define ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_DATA1_RECOMMEND_ROCE_PRIORITY_MASK 0xff0000UL
+	#define ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_DATA1_RECOMMEND_ROCE_PRIORITY_SFT 16
+	#define ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_DATA1_RECOMMEND_ROCE_PRIORITY_NONE (0xffUL << 16)
+	#define ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_DATA1_RECOMMEND_ROCE_PRIORITY_LAST    ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_DATA1_RECOMMEND_ROCE_PRIORITY_NONE
+	#define ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_DATA1_RECOMMEND_L2_PRIORITY_MASK 0xff000000UL
+	#define ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_DATA1_RECOMMEND_L2_PRIORITY_SFT 24
+	#define ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_DATA1_RECOMMEND_L2_PRIORITY_NONE (0xffUL << 24)
+	#define ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_DATA1_RECOMMEND_L2_PRIORITY_LAST    ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_DATA1_RECOMMEND_L2_PRIORITY_NONE
 };
 
 /* HWRM Asynchronous Event Completion Record for port connection not allowed (16 bytes) */
 struct hwrm_async_event_cmpl_port_conn_not_allowed {
 	__le16 type;
-	#define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_TYPE_MASK 0x3fUL
-	#define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_TYPE_SFT 0
-	#define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_TYPE_HWRM_ASYNC_EVENT 0x2eUL
+	#define ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_TYPE_MASK   0x3fUL
+	#define ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_TYPE_SFT    0
+	#define ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_TYPE_HWRM_ASYNC_EVENT 0x2eUL
 	__le16 event_id;
-	#define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_ID_PORT_CONN_NOT_ALLOWED 0x4UL
+	#define ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_ID_PORT_CONN_NOT_ALLOWED 0x4UL
 	__le32 event_data2;
 	u8 opaque_v;
-	#define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_V      0x1UL
-	#define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_OPAQUE_MASK 0xfeUL
-	#define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_OPAQUE_SFT 1
+	#define ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_V	    0x1UL
+	#define ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_OPAQUE_MASK 0xfeUL
+	#define ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_OPAQUE_SFT  1
 	u8 timestamp_lo;
 	__le16 timestamp_hi;
 	__le32 event_data1;
-	#define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_PORT_ID_MASK 0xffffUL
-	#define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_PORT_ID_SFT 0
-	#define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_ENFORCEMENT_POLICY_MASK 0xff0000UL
-	#define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_ENFORCEMENT_POLICY_SFT 16
-	#define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_ENFORCEMENT_POLICY_NONE (0x0UL << 16)
-	#define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_ENFORCEMENT_POLICY_DISABLETX (0x1UL << 16)
-	#define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_ENFORCEMENT_POLICY_WARNINGMSG (0x2UL << 16)
-	#define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_ENFORCEMENT_POLICY_PWRDOWN (0x3UL << 16)
-	#define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_ENFORCEMENT_POLICY_LAST    HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_ENFORCEMENT_POLICY_PWRDOWN
+	#define ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_PORT_ID_MASK 0xffffUL
+	#define ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_PORT_ID_SFT 0
+	#define ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_ENFORCEMENT_POLICY_MASK 0xff0000UL
+	#define ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_ENFORCEMENT_POLICY_SFT 16
+	#define ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_ENFORCEMENT_POLICY_NONE (0x0UL << 16)
+	#define ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_ENFORCEMENT_POLICY_DISABLETX (0x1UL << 16)
+	#define ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_ENFORCEMENT_POLICY_WARNINGMSG (0x2UL << 16)
+	#define ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_ENFORCEMENT_POLICY_PWRDOWN (0x3UL << 16)
+	#define ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_ENFORCEMENT_POLICY_LAST    ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_DATA1_ENFORCEMENT_POLICY_PWRDOWN
 };
 
 /* HWRM Asynchronous Event Completion Record for link speed config not allowed (16 bytes) */
 struct hwrm_async_event_cmpl_link_speed_cfg_not_allowed {
 	__le16 type;
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_TYPE_MASK 0x3fUL
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_TYPE_SFT 0
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_TYPE_HWRM_ASYNC_EVENT 0x2eUL
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_TYPE_MASK 0x3fUL
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_TYPE_SFT 0
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_TYPE_HWRM_ASYNC_EVENT 0x2eUL
 	__le16 event_id;
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_EVENT_ID_LINK_SPEED_CFG_NOT_ALLOWED 0x5UL
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_EVENT_ID_LINK_SPEED_CFG_NOT_ALLOWED 0x5UL
 	__le32 event_data2;
 	u8 opaque_v;
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_V 0x1UL
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_OPAQUE_MASK 0xfeUL
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_OPAQUE_SFT 1
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_V      0x1UL
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_OPAQUE_MASK 0xfeUL
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_OPAQUE_SFT 1
 	u8 timestamp_lo;
 	__le16 timestamp_hi;
 	__le32 event_data1;
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_EVENT_DATA1_PORT_ID_MASK 0xffffUL
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_EVENT_DATA1_PORT_ID_SFT 0
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_EVENT_DATA1_PORT_ID_MASK 0xffffUL
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_EVENT_DATA1_PORT_ID_SFT 0
 };
 
 /* HWRM Asynchronous Event Completion Record for link speed configuration change (16 bytes) */
 struct hwrm_async_event_cmpl_link_speed_cfg_change {
 	__le16 type;
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_TYPE_MASK 0x3fUL
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_TYPE_SFT 0
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_TYPE_MASK   0x3fUL
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_TYPE_SFT    0
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL
 	__le16 event_id;
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_EVENT_ID_LINK_SPEED_CFG_CHANGE 0x6UL
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_EVENT_ID_LINK_SPEED_CFG_CHANGE 0x6UL
 	__le32 event_data2;
 	u8 opaque_v;
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_V      0x1UL
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_OPAQUE_MASK 0xfeUL
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_OPAQUE_SFT 1
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_V	    0x1UL
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_OPAQUE_MASK 0xfeUL
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_OPAQUE_SFT  1
 	u8 timestamp_lo;
 	__le16 timestamp_hi;
 	__le32 event_data1;
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_EVENT_DATA1_PORT_ID_MASK 0xffffUL
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_EVENT_DATA1_PORT_ID_SFT 0
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_EVENT_DATA1_SUPPORTED_LINK_SPEEDS_CHANGE 0x10000UL
-	#define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_EVENT_DATA1_ILLEGAL_LINK_SPEED_CFG 0x20000UL
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_EVENT_DATA1_PORT_ID_MASK 0xffffUL
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_EVENT_DATA1_PORT_ID_SFT 0
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_EVENT_DATA1_SUPPORTED_LINK_SPEEDS_CHANGE 0x10000UL
+	#define ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_EVENT_DATA1_ILLEGAL_LINK_SPEED_CFG 0x20000UL
 };
 
 /* HWRM Asynchronous Event Completion Record for Function Driver Unload (16 bytes) */
 struct hwrm_async_event_cmpl_func_drvr_unload {
 	__le16 type;
-	#define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_TYPE_MASK   0x3fUL
-	#define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_TYPE_SFT    0
-	#define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_TYPE_HWRM_ASYNC_EVENT 0x2eUL
+	#define ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_TYPE_MASK	    0x3fUL
+	#define ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_TYPE_SFT	    0
+	#define ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_TYPE_HWRM_ASYNC_EVENT 0x2eUL
 	__le16 event_id;
-	#define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_EVENT_ID_FUNC_DRVR_UNLOAD 0x10UL
+	#define ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_EVENT_ID_FUNC_DRVR_UNLOAD 0x10UL
 	__le32 event_data2;
 	u8 opaque_v;
-	#define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_V	    0x1UL
-	#define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_OPAQUE_MASK 0xfeUL
-	#define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_OPAQUE_SFT  1
+	#define ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_V		    0x1UL
+	#define ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_OPAQUE_MASK      0xfeUL
+	#define ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_OPAQUE_SFT       1
 	u8 timestamp_lo;
 	__le16 timestamp_hi;
 	__le32 event_data1;
-	#define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_EVENT_DATA1_FUNC_ID_MASK 0xffffUL
-	#define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_EVENT_DATA1_FUNC_ID_SFT 0
+	#define ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_EVENT_DATA1_FUNC_ID_MASK 0xffffUL
+	#define ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_EVENT_DATA1_FUNC_ID_SFT 0
 };
 
 /* HWRM Asynchronous Event Completion Record for Function Driver load (16 bytes) */
 struct hwrm_async_event_cmpl_func_drvr_load {
 	__le16 type;
-	#define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_TYPE_MASK     0x3fUL
-	#define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_TYPE_SFT      0
-	#define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_TYPE_HWRM_ASYNC_EVENT 0x2eUL
+	#define ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_TYPE_MASK	    0x3fUL
+	#define ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_TYPE_SFT	    0
+	#define ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_TYPE_HWRM_ASYNC_EVENT 0x2eUL
+	__le16 event_id;
+	#define ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_EVENT_ID_FUNC_DRVR_LOAD 0x11UL
+	__le32 event_data2;
+	u8 opaque_v;
+	#define ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_V		    0x1UL
+	#define ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_OPAQUE_MASK	    0xfeUL
+	#define ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_OPAQUE_SFT	    1
+	u8 timestamp_lo;
+	__le16 timestamp_hi;
+	__le32 event_data1;
+	#define ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_EVENT_DATA1_FUNC_ID_MASK 0xffffUL
+	#define ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_EVENT_DATA1_FUNC_ID_SFT 0
+};
+
+/* HWRM Asynchronous Event Completion Record to indicate completion of FLR related processing (16 bytes) */
+struct hwrm_async_event_cmpl_func_flr_proc_cmplt {
+	__le16 type;
+	#define ASYNC_EVENT_CMPL_FUNC_FLR_PROC_CMPLT_TYPE_MASK     0x3fUL
+	#define ASYNC_EVENT_CMPL_FUNC_FLR_PROC_CMPLT_TYPE_SFT      0
+	#define ASYNC_EVENT_CMPL_FUNC_FLR_PROC_CMPLT_TYPE_HWRM_ASYNC_EVENT 0x2eUL
 	__le16 event_id;
-	#define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_EVENT_ID_FUNC_DRVR_LOAD 0x11UL
+	#define ASYNC_EVENT_CMPL_FUNC_FLR_PROC_CMPLT_EVENT_ID_FUNC_FLR_PROC_CMPLT 0x12UL
 	__le32 event_data2;
 	u8 opaque_v;
-	#define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_V		    0x1UL
-	#define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_OPAQUE_MASK   0xfeUL
-	#define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_OPAQUE_SFT    1
+	#define ASYNC_EVENT_CMPL_FUNC_FLR_PROC_CMPLT_V		    0x1UL
+	#define ASYNC_EVENT_CMPL_FUNC_FLR_PROC_CMPLT_OPAQUE_MASK   0xfeUL
+	#define ASYNC_EVENT_CMPL_FUNC_FLR_PROC_CMPLT_OPAQUE_SFT    1
 	u8 timestamp_lo;
 	__le16 timestamp_hi;
 	__le32 event_data1;
-	#define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_EVENT_DATA1_FUNC_ID_MASK 0xffffUL
-	#define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_EVENT_DATA1_FUNC_ID_SFT 0
+	#define ASYNC_EVENT_CMPL_FUNC_FLR_PROC_CMPLT_EVENT_DATA1_FUNC_ID_MASK 0xffffUL
+	#define ASYNC_EVENT_CMPL_FUNC_FLR_PROC_CMPLT_EVENT_DATA1_FUNC_ID_SFT 0
 };
 
 /* HWRM Asynchronous Event Completion Record for PF Driver Unload (16 bytes) */
 struct hwrm_async_event_cmpl_pf_drvr_unload {
 	__le16 type;
-	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_TYPE_MASK     0x3fUL
-	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_TYPE_SFT      0
-	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_TYPE_HWRM_ASYNC_EVENT 0x2eUL
+	#define ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_TYPE_MASK	    0x3fUL
+	#define ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_TYPE_SFT	    0
+	#define ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_TYPE_HWRM_ASYNC_EVENT 0x2eUL
 	__le16 event_id;
-	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_EVENT_ID_PF_DRVR_UNLOAD 0x20UL
+	#define ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_EVENT_ID_PF_DRVR_UNLOAD 0x20UL
 	__le32 event_data2;
 	u8 opaque_v;
-	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_V		    0x1UL
-	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_OPAQUE_MASK   0xfeUL
-	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_OPAQUE_SFT    1
+	#define ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_V		    0x1UL
+	#define ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_OPAQUE_MASK	    0xfeUL
+	#define ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_OPAQUE_SFT	    1
 	u8 timestamp_lo;
 	__le16 timestamp_hi;
 	__le32 event_data1;
-	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_EVENT_DATA1_FUNC_ID_MASK 0xffffUL
-	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_EVENT_DATA1_FUNC_ID_SFT 0
-	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_EVENT_DATA1_PORT_MASK 0x70000UL
-	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_EVENT_DATA1_PORT_SFT 16
+	#define ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_EVENT_DATA1_FUNC_ID_MASK 0xffffUL
+	#define ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_EVENT_DATA1_FUNC_ID_SFT 0
+	#define ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_EVENT_DATA1_PORT_MASK 0x70000UL
+	#define ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_EVENT_DATA1_PORT_SFT 16
 };
 
 /* HWRM Asynchronous Event Completion Record for PF Driver load (16 bytes) */
 struct hwrm_async_event_cmpl_pf_drvr_load {
 	__le16 type;
-	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_TYPE_MASK       0x3fUL
-	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_TYPE_SFT	    0
-	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_TYPE_HWRM_ASYNC_EVENT 0x2eUL
+	#define ASYNC_EVENT_CMPL_PF_DRVR_LOAD_TYPE_MASK	    0x3fUL
+	#define ASYNC_EVENT_CMPL_PF_DRVR_LOAD_TYPE_SFT		    0
+	#define ASYNC_EVENT_CMPL_PF_DRVR_LOAD_TYPE_HWRM_ASYNC_EVENT 0x2eUL
 	__le16 event_id;
-	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_EVENT_ID_PF_DRVR_LOAD 0x21UL
+	#define ASYNC_EVENT_CMPL_PF_DRVR_LOAD_EVENT_ID_PF_DRVR_LOAD 0x21UL
 	__le32 event_data2;
 	u8 opaque_v;
-	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_V		    0x1UL
-	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_OPAQUE_MASK     0xfeUL
-	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_OPAQUE_SFT      1
+	#define ASYNC_EVENT_CMPL_PF_DRVR_LOAD_V		    0x1UL
+	#define ASYNC_EVENT_CMPL_PF_DRVR_LOAD_OPAQUE_MASK	    0xfeUL
+	#define ASYNC_EVENT_CMPL_PF_DRVR_LOAD_OPAQUE_SFT	    1
 	u8 timestamp_lo;
 	__le16 timestamp_hi;
 	__le32 event_data1;
-	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_EVENT_DATA1_FUNC_ID_MASK 0xffffUL
-	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_EVENT_DATA1_FUNC_ID_SFT 0
-	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_EVENT_DATA1_PORT_MASK 0x70000UL
-	#define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_EVENT_DATA1_PORT_SFT 16
+	#define ASYNC_EVENT_CMPL_PF_DRVR_LOAD_EVENT_DATA1_FUNC_ID_MASK 0xffffUL
+	#define ASYNC_EVENT_CMPL_PF_DRVR_LOAD_EVENT_DATA1_FUNC_ID_SFT 0
+	#define ASYNC_EVENT_CMPL_PF_DRVR_LOAD_EVENT_DATA1_PORT_MASK 0x70000UL
+	#define ASYNC_EVENT_CMPL_PF_DRVR_LOAD_EVENT_DATA1_PORT_SFT 16
 };
 
 /* HWRM Asynchronous Event Completion Record for VF FLR (16 bytes) */
 struct hwrm_async_event_cmpl_vf_flr {
 	__le16 type;
-	#define HWRM_ASYNC_EVENT_CMPL_VF_FLR_TYPE_MASK		    0x3fUL
-	#define HWRM_ASYNC_EVENT_CMPL_VF_FLR_TYPE_SFT		    0
-	#define HWRM_ASYNC_EVENT_CMPL_VF_FLR_TYPE_HWRM_ASYNC_EVENT 0x2eUL
+	#define ASYNC_EVENT_CMPL_VF_FLR_TYPE_MASK		    0x3fUL
+	#define ASYNC_EVENT_CMPL_VF_FLR_TYPE_SFT		    0
+	#define ASYNC_EVENT_CMPL_VF_FLR_TYPE_HWRM_ASYNC_EVENT     0x2eUL
 	__le16 event_id;
-	#define HWRM_ASYNC_EVENT_CMPL_VF_FLR_EVENT_ID_VF_FLR      0x30UL
+	#define ASYNC_EVENT_CMPL_VF_FLR_EVENT_ID_VF_FLR	   0x30UL
 	__le32 event_data2;
 	u8 opaque_v;
-	#define HWRM_ASYNC_EVENT_CMPL_VF_FLR_V			    0x1UL
-	#define HWRM_ASYNC_EVENT_CMPL_VF_FLR_OPAQUE_MASK	    0xfeUL
-	#define HWRM_ASYNC_EVENT_CMPL_VF_FLR_OPAQUE_SFT	    1
+	#define ASYNC_EVENT_CMPL_VF_FLR_V			    0x1UL
+	#define ASYNC_EVENT_CMPL_VF_FLR_OPAQUE_MASK		    0xfeUL
+	#define ASYNC_EVENT_CMPL_VF_FLR_OPAQUE_SFT		    1
 	u8 timestamp_lo;
 	__le16 timestamp_hi;
 	__le32 event_data1;
-	#define HWRM_ASYNC_EVENT_CMPL_VF_FLR_EVENT_DATA1_VF_ID_MASK 0xffffUL
-	#define HWRM_ASYNC_EVENT_CMPL_VF_FLR_EVENT_DATA1_VF_ID_SFT 0
+	#define ASYNC_EVENT_CMPL_VF_FLR_EVENT_DATA1_VF_ID_MASK     0xffffUL
+	#define ASYNC_EVENT_CMPL_VF_FLR_EVENT_DATA1_VF_ID_SFT      0
 };
 
 /* HWRM Asynchronous Event Completion Record for VF MAC Addr change (16 bytes) */
 struct hwrm_async_event_cmpl_vf_mac_addr_change {
 	__le16 type;
-	#define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_TYPE_MASK 0x3fUL
-	#define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_TYPE_SFT  0
-	#define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL
+	#define ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_TYPE_MASK      0x3fUL
+	#define ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_TYPE_SFT       0
+	#define ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL
 	__le16 event_id;
-	#define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_EVENT_ID_VF_MAC_ADDR_CHANGE 0x31UL
+	#define ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_EVENT_ID_VF_MAC_ADDR_CHANGE 0x31UL
 	__le32 event_data2;
 	u8 opaque_v;
-	#define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_V	    0x1UL
-	#define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_OPAQUE_MASK 0xfeUL
-	#define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_OPAQUE_SFT 1
+	#define ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_V		    0x1UL
+	#define ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_OPAQUE_MASK    0xfeUL
+	#define ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_OPAQUE_SFT     1
 	u8 timestamp_lo;
 	__le16 timestamp_hi;
 	__le32 event_data1;
-	#define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_EVENT_DATA1_VF_ID_MASK 0xffffUL
-	#define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_EVENT_DATA1_VF_ID_SFT 0
+	#define ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_EVENT_DATA1_VF_ID_MASK 0xffffUL
+	#define ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_EVENT_DATA1_VF_ID_SFT 0
 };
 
 /* HWRM Asynchronous Event Completion Record for PF-VF communication status change (16 bytes) */
 struct hwrm_async_event_cmpl_pf_vf_comm_status_change {
 	__le16 type;
-	#define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_TYPE_MASK 0x3fUL
-	#define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_TYPE_SFT 0
-	#define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL
+	#define ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_TYPE_MASK 0x3fUL
+	#define ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_TYPE_SFT 0
+	#define ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL
 	__le16 event_id;
-	#define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_EVENT_ID_PF_VF_COMM_STATUS_CHANGE 0x32UL
+	#define ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_EVENT_ID_PF_VF_COMM_STATUS_CHANGE 0x32UL
 	__le32 event_data2;
 	u8 opaque_v;
-	#define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_V   0x1UL
-	#define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_OPAQUE_MASK 0xfeUL
-	#define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_OPAQUE_SFT 1
+	#define ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_V	    0x1UL
+	#define ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_OPAQUE_MASK 0xfeUL
+	#define ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_OPAQUE_SFT 1
 	u8 timestamp_lo;
 	__le16 timestamp_hi;
 	__le32 event_data1;
-	#define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_EVENT_DATA1_COMM_ESTABLISHED 0x1UL
+	#define ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_EVENT_DATA1_COMM_ESTABLISHED 0x1UL
 };
 
 /* HWRM Asynchronous Event Completion Record for VF configuration change (16 bytes) */
 struct hwrm_async_event_cmpl_vf_cfg_change {
 	__le16 type;
-	#define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_TYPE_MASK      0x3fUL
-	#define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_TYPE_SFT       0
-	#define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL
+	#define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_TYPE_MASK	    0x3fUL
+	#define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_TYPE_SFT	    0
+	#define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL
 	__le16 event_id;
-	#define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_ID_VF_CFG_CHANGE 0x33UL
+	#define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_ID_VF_CFG_CHANGE 0x33UL
 	__le32 event_data2;
 	u8 opaque_v;
-	#define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_V		    0x1UL
-	#define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_OPAQUE_MASK    0xfeUL
-	#define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_OPAQUE_SFT     1
+	#define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_V		    0x1UL
+	#define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_OPAQUE_MASK	    0xfeUL
+	#define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_OPAQUE_SFT	    1
 	u8 timestamp_lo;
 	__le16 timestamp_hi;
 	__le32 event_data1;
-	#define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_DATA1_MTU_CHANGE 0x1UL
-	#define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_DATA1_MRU_CHANGE 0x2UL
-	#define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_DATA1_DFLT_MAC_ADDR_CHANGE 0x4UL
-	#define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_DATA1_DFLT_VLAN_CHANGE 0x8UL
+	#define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_DATA1_MTU_CHANGE 0x1UL
+	#define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_DATA1_MRU_CHANGE 0x2UL
+	#define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_DATA1_DFLT_MAC_ADDR_CHANGE 0x4UL
+	#define ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_DATA1_DFLT_VLAN_CHANGE 0x8UL
 };
 
 /* HWRM Asynchronous Event Completion Record for HWRM Error (16 bytes) */
 struct hwrm_async_event_cmpl_hwrm_error {
 	__le16 type;
-	#define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_MASK	    0x3fUL
-	#define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_SFT	    0
-	#define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_HWRM_ASYNC_EVENT 0x2eUL
+	#define ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_MASK		    0x3fUL
+	#define ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_SFT		    0
+	#define ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_HWRM_ASYNC_EVENT 0x2eUL
 	__le16 event_id;
-	#define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_ID_HWRM_ERROR 0xffUL
+	#define ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_ID_HWRM_ERROR   0xffUL
 	__le32 event_data2;
-	#define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_MASK 0xffUL
-	#define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_SFT 0
-	#define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_WARNING 0x0UL
-	#define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_NONFATAL 0x1UL
-	#define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_FATAL 0x2UL
-	#define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_LAST    HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_FATAL
+	#define ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_MASK 0xffUL
+	#define ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_SFT 0
+	#define ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_WARNING 0x0UL
+	#define ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_NONFATAL 0x1UL
+	#define ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_FATAL 0x2UL
+	#define ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_LAST    ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_FATAL
 	u8 opaque_v;
-	#define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_V		    0x1UL
-	#define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_OPAQUE_MASK       0xfeUL
-	#define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_OPAQUE_SFT	    1
+	#define ASYNC_EVENT_CMPL_HWRM_ERROR_V			    0x1UL
+	#define ASYNC_EVENT_CMPL_HWRM_ERROR_OPAQUE_MASK	    0xfeUL
+	#define ASYNC_EVENT_CMPL_HWRM_ERROR_OPAQUE_SFT		    1
 	u8 timestamp_lo;
 	__le16 timestamp_hi;
 	__le32 event_data1;
-	#define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA1_TIMESTAMP 0x1UL
-};
-
-/* HW Resource Manager Specification 1.5.1 */
-#define HWRM_VERSION_MAJOR	1
-#define HWRM_VERSION_MINOR	5
-#define HWRM_VERSION_UPDATE	1
-
-#define HWRM_VERSION_STR	"1.5.1"
-/*
- * Following is the signature for HWRM message field that indicates not
- * applicable (All F's). Need to cast it the size of the field if needed.
- */
-#define HWRM_NA_SIGNATURE	((__le32)(-1))
-#define HWRM_MAX_REQ_LEN    (128)  /* hwrm_func_buf_rgtr */
-#define HWRM_MAX_RESP_LEN    (176)  /* hwrm_func_qstats */
-#define HW_HASH_INDEX_SIZE      0x80    /* 7 bit indirection table index. */
-#define HW_HASH_KEY_SIZE	40
-#define HWRM_RESP_VALID_KEY      1 /* valid key for HWRM response */
-/* Input (16 bytes) */
-struct input {
-	__le16 req_type;
-	__le16 cmpl_ring;
-	__le16 seq_id;
-	__le16 target_id;
-	__le64 resp_addr;
-};
-
-/* Output (8 bytes) */
-struct output {
-	__le16 error_code;
-	__le16 req_type;
-	__le16 seq_id;
-	__le16 resp_len;
-};
-
-/* Command numbering (8 bytes) */
-struct cmd_nums {
-	__le16 req_type;
-	#define HWRM_VER_GET					   (0x0UL)
-	#define HWRM_FUNC_BUF_UNRGTR				   (0xeUL)
-	#define HWRM_FUNC_VF_CFG				   (0xfUL)
-	#define RESERVED1					   (0x10UL)
-	#define HWRM_FUNC_RESET				   (0x11UL)
-	#define HWRM_FUNC_GETFID				   (0x12UL)
-	#define HWRM_FUNC_VF_ALLOC				   (0x13UL)
-	#define HWRM_FUNC_VF_FREE				   (0x14UL)
-	#define HWRM_FUNC_QCAPS				   (0x15UL)
-	#define HWRM_FUNC_QCFG					   (0x16UL)
-	#define HWRM_FUNC_CFG					   (0x17UL)
-	#define HWRM_FUNC_QSTATS				   (0x18UL)
-	#define HWRM_FUNC_CLR_STATS				   (0x19UL)
-	#define HWRM_FUNC_DRV_UNRGTR				   (0x1aUL)
-	#define HWRM_FUNC_VF_RESC_FREE				   (0x1bUL)
-	#define HWRM_FUNC_VF_VNIC_IDS_QUERY			   (0x1cUL)
-	#define HWRM_FUNC_DRV_RGTR				   (0x1dUL)
-	#define HWRM_FUNC_DRV_QVER				   (0x1eUL)
-	#define HWRM_FUNC_BUF_RGTR				   (0x1fUL)
-	#define HWRM_PORT_PHY_CFG				   (0x20UL)
-	#define HWRM_PORT_MAC_CFG				   (0x21UL)
-	#define HWRM_PORT_TS_QUERY				   (0x22UL)
-	#define HWRM_PORT_QSTATS				   (0x23UL)
-	#define HWRM_PORT_LPBK_QSTATS				   (0x24UL)
-	#define HWRM_PORT_CLR_STATS				   (0x25UL)
-	#define HWRM_PORT_LPBK_CLR_STATS			   (0x26UL)
-	#define HWRM_PORT_PHY_QCFG				   (0x27UL)
-	#define HWRM_PORT_MAC_QCFG				   (0x28UL)
-	#define HWRM_PORT_BLINK_LED				   (0x29UL)
-	#define HWRM_PORT_PHY_QCAPS				   (0x2aUL)
-	#define HWRM_PORT_PHY_I2C_WRITE			   (0x2bUL)
-	#define HWRM_PORT_PHY_I2C_READ				   (0x2cUL)
-	#define HWRM_QUEUE_QPORTCFG				   (0x30UL)
-	#define HWRM_QUEUE_QCFG				   (0x31UL)
-	#define HWRM_QUEUE_CFG					   (0x32UL)
-	#define RESERVED2					   (0x33UL)
-	#define RESERVED3					   (0x34UL)
-	#define HWRM_QUEUE_PFCENABLE_QCFG			   (0x35UL)
-	#define HWRM_QUEUE_PFCENABLE_CFG			   (0x36UL)
-	#define HWRM_QUEUE_PRI2COS_QCFG			   (0x37UL)
-	#define HWRM_QUEUE_PRI2COS_CFG				   (0x38UL)
-	#define HWRM_QUEUE_COS2BW_QCFG				   (0x39UL)
-	#define HWRM_QUEUE_COS2BW_CFG				   (0x3aUL)
-	#define HWRM_VNIC_ALLOC				   (0x40UL)
-	#define HWRM_VNIC_FREE					   (0x41UL)
-	#define HWRM_VNIC_CFG					   (0x42UL)
-	#define HWRM_VNIC_QCFG					   (0x43UL)
-	#define HWRM_VNIC_TPA_CFG				   (0x44UL)
-	#define HWRM_VNIC_TPA_QCFG				   (0x45UL)
-	#define HWRM_VNIC_RSS_CFG				   (0x46UL)
-	#define HWRM_VNIC_RSS_QCFG				   (0x47UL)
-	#define HWRM_VNIC_PLCMODES_CFG				   (0x48UL)
-	#define HWRM_VNIC_PLCMODES_QCFG			   (0x49UL)
-	#define HWRM_VNIC_QCAPS				   (0x4aUL)
-	#define HWRM_RING_ALLOC				   (0x50UL)
-	#define HWRM_RING_FREE					   (0x51UL)
-	#define HWRM_RING_CMPL_RING_QAGGINT_PARAMS		   (0x52UL)
-	#define HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS		   (0x53UL)
-	#define HWRM_RING_RESET				   (0x5eUL)
-	#define HWRM_RING_GRP_ALLOC				   (0x60UL)
-	#define HWRM_RING_GRP_FREE				   (0x61UL)
-	#define RESERVED5					   (0x64UL)
-	#define RESERVED6					   (0x65UL)
-	#define HWRM_VNIC_RSS_COS_LB_CTX_ALLOC			   (0x70UL)
-	#define HWRM_VNIC_RSS_COS_LB_CTX_FREE			   (0x71UL)
-	#define HWRM_CFA_L2_FILTER_ALLOC			   (0x90UL)
-	#define HWRM_CFA_L2_FILTER_FREE			   (0x91UL)
-	#define HWRM_CFA_L2_FILTER_CFG				   (0x92UL)
-	#define HWRM_CFA_L2_SET_RX_MASK			   (0x93UL)
-	#define RESERVED4					   (0x94UL)
-	#define HWRM_CFA_TUNNEL_FILTER_ALLOC			   (0x95UL)
-	#define HWRM_CFA_TUNNEL_FILTER_FREE			   (0x96UL)
-	#define HWRM_CFA_ENCAP_RECORD_ALLOC			   (0x97UL)
-	#define HWRM_CFA_ENCAP_RECORD_FREE			   (0x98UL)
-	#define HWRM_CFA_NTUPLE_FILTER_ALLOC			   (0x99UL)
-	#define HWRM_CFA_NTUPLE_FILTER_FREE			   (0x9aUL)
-	#define HWRM_CFA_NTUPLE_FILTER_CFG			   (0x9bUL)
-	#define HWRM_CFA_EM_FLOW_ALLOC				   (0x9cUL)
-	#define HWRM_CFA_EM_FLOW_FREE				   (0x9dUL)
-	#define HWRM_CFA_EM_FLOW_CFG				   (0x9eUL)
-	#define HWRM_TUNNEL_DST_PORT_QUERY			   (0xa0UL)
-	#define HWRM_TUNNEL_DST_PORT_ALLOC			   (0xa1UL)
-	#define HWRM_TUNNEL_DST_PORT_FREE			   (0xa2UL)
-	#define HWRM_STAT_CTX_ALLOC				   (0xb0UL)
-	#define HWRM_STAT_CTX_FREE				   (0xb1UL)
-	#define HWRM_STAT_CTX_QUERY				   (0xb2UL)
-	#define HWRM_STAT_CTX_CLR_STATS			   (0xb3UL)
-	#define HWRM_FW_RESET					   (0xc0UL)
-	#define HWRM_FW_QSTATUS				   (0xc1UL)
-	#define HWRM_FW_SET_TIME				   (0xc8UL)
-	#define HWRM_FW_GET_TIME				   (0xc9UL)
-	#define HWRM_EXEC_FWD_RESP				   (0xd0UL)
-	#define HWRM_REJECT_FWD_RESP				   (0xd1UL)
-	#define HWRM_FWD_RESP					   (0xd2UL)
-	#define HWRM_FWD_ASYNC_EVENT_CMPL			   (0xd3UL)
-	#define HWRM_TEMP_MONITOR_QUERY			   (0xe0UL)
-	#define HWRM_WOL_FILTER_ALLOC				   (0xf0UL)
-	#define HWRM_WOL_FILTER_FREE				   (0xf1UL)
-	#define HWRM_WOL_FILTER_QCFG				   (0xf2UL)
-	#define HWRM_WOL_REASON_QCFG				   (0xf3UL)
-	#define HWRM_DBG_READ_DIRECT				   (0xff10UL)
-	#define HWRM_DBG_READ_INDIRECT				   (0xff11UL)
-	#define HWRM_DBG_WRITE_DIRECT				   (0xff12UL)
-	#define HWRM_DBG_WRITE_INDIRECT			   (0xff13UL)
-	#define HWRM_DBG_DUMP					   (0xff14UL)
-	#define HWRM_NVM_INSTALL_UPDATE			   (0xfff3UL)
-	#define HWRM_NVM_MODIFY				   (0xfff4UL)
-	#define HWRM_NVM_VERIFY_UPDATE				   (0xfff5UL)
-	#define HWRM_NVM_GET_DEV_INFO				   (0xfff6UL)
-	#define HWRM_NVM_ERASE_DIR_ENTRY			   (0xfff7UL)
-	#define HWRM_NVM_MOD_DIR_ENTRY				   (0xfff8UL)
-	#define HWRM_NVM_FIND_DIR_ENTRY			   (0xfff9UL)
-	#define HWRM_NVM_GET_DIR_ENTRIES			   (0xfffaUL)
-	#define HWRM_NVM_GET_DIR_INFO				   (0xfffbUL)
-	#define HWRM_NVM_RAW_DUMP				   (0xfffcUL)
-	#define HWRM_NVM_READ					   (0xfffdUL)
-	#define HWRM_NVM_WRITE					   (0xfffeUL)
-	#define HWRM_NVM_RAW_WRITE_BLK				   (0xffffUL)
-	__le16 unused_0[3];
-};
-
-/* Return Codes (8 bytes) */
-struct ret_codes {
-	__le16 error_code;
-	#define HWRM_ERR_CODE_SUCCESS				   (0x0UL)
-	#define HWRM_ERR_CODE_FAIL				   (0x1UL)
-	#define HWRM_ERR_CODE_INVALID_PARAMS			   (0x2UL)
-	#define HWRM_ERR_CODE_RESOURCE_ACCESS_DENIED		   (0x3UL)
-	#define HWRM_ERR_CODE_RESOURCE_ALLOC_ERROR		   (0x4UL)
-	#define HWRM_ERR_CODE_INVALID_FLAGS			   (0x5UL)
-	#define HWRM_ERR_CODE_INVALID_ENABLES			   (0x6UL)
-	#define HWRM_ERR_CODE_HWRM_ERROR			   (0xfUL)
-	#define HWRM_ERR_CODE_UNKNOWN_ERR			   (0xfffeUL)
-	#define HWRM_ERR_CODE_CMD_NOT_SUPPORTED		   (0xffffUL)
-	__le16 unused_0[3];
-};
-
-/* Output (16 bytes) */
-struct hwrm_err_output {
-	__le16 error_code;
-	__le16 req_type;
-	__le16 seq_id;
-	__le16 resp_len;
-	__le32 opaque_0;
-	__le16 opaque_1;
-	u8 cmd_err;
-	u8 valid;
-};
-
-/* Port Tx Statistics Formats (408 bytes) */
-struct tx_port_stats {
-	__le64 tx_64b_frames;
-	__le64 tx_65b_127b_frames;
-	__le64 tx_128b_255b_frames;
-	__le64 tx_256b_511b_frames;
-	__le64 tx_512b_1023b_frames;
-	__le64 tx_1024b_1518_frames;
-	__le64 tx_good_vlan_frames;
-	__le64 tx_1519b_2047_frames;
-	__le64 tx_2048b_4095b_frames;
-	__le64 tx_4096b_9216b_frames;
-	__le64 tx_9217b_16383b_frames;
-	__le64 tx_good_frames;
-	__le64 tx_total_frames;
-	__le64 tx_ucast_frames;
-	__le64 tx_mcast_frames;
-	__le64 tx_bcast_frames;
-	__le64 tx_pause_frames;
-	__le64 tx_pfc_frames;
-	__le64 tx_jabber_frames;
-	__le64 tx_fcs_err_frames;
-	__le64 tx_control_frames;
-	__le64 tx_oversz_frames;
-	__le64 tx_single_dfrl_frames;
-	__le64 tx_multi_dfrl_frames;
-	__le64 tx_single_coll_frames;
-	__le64 tx_multi_coll_frames;
-	__le64 tx_late_coll_frames;
-	__le64 tx_excessive_coll_frames;
-	__le64 tx_frag_frames;
-	__le64 tx_err;
-	__le64 tx_tagged_frames;
-	__le64 tx_dbl_tagged_frames;
-	__le64 tx_runt_frames;
-	__le64 tx_fifo_underruns;
-	__le64 tx_pfc_ena_frames_pri0;
-	__le64 tx_pfc_ena_frames_pri1;
-	__le64 tx_pfc_ena_frames_pri2;
-	__le64 tx_pfc_ena_frames_pri3;
-	__le64 tx_pfc_ena_frames_pri4;
-	__le64 tx_pfc_ena_frames_pri5;
-	__le64 tx_pfc_ena_frames_pri6;
-	__le64 tx_pfc_ena_frames_pri7;
-	__le64 tx_eee_lpi_events;
-	__le64 tx_eee_lpi_duration;
-	__le64 tx_llfc_logical_msgs;
-	__le64 tx_hcfc_msgs;
-	__le64 tx_total_collisions;
-	__le64 tx_bytes;
-	__le64 tx_xthol_frames;
-	__le64 tx_stat_discard;
-	__le64 tx_stat_error;
-};
-
-/* Port Rx Statistics Formats (528 bytes) */
-struct rx_port_stats {
-	__le64 rx_64b_frames;
-	__le64 rx_65b_127b_frames;
-	__le64 rx_128b_255b_frames;
-	__le64 rx_256b_511b_frames;
-	__le64 rx_512b_1023b_frames;
-	__le64 rx_1024b_1518_frames;
-	__le64 rx_good_vlan_frames;
-	__le64 rx_1519b_2047b_frames;
-	__le64 rx_2048b_4095b_frames;
-	__le64 rx_4096b_9216b_frames;
-	__le64 rx_9217b_16383b_frames;
-	__le64 rx_total_frames;
-	__le64 rx_ucast_frames;
-	__le64 rx_mcast_frames;
-	__le64 rx_bcast_frames;
-	__le64 rx_fcs_err_frames;
-	__le64 rx_ctrl_frames;
-	__le64 rx_pause_frames;
-	__le64 rx_pfc_frames;
-	__le64 rx_unsupported_opcode_frames;
-	__le64 rx_unsupported_da_pausepfc_frames;
-	__le64 rx_wrong_sa_frames;
-	__le64 rx_align_err_frames;
-	__le64 rx_oor_len_frames;
-	__le64 rx_code_err_frames;
-	__le64 rx_false_carrier_frames;
-	__le64 rx_ovrsz_frames;
-	__le64 rx_jbr_frames;
-	__le64 rx_mtu_err_frames;
-	__le64 rx_match_crc_frames;
-	__le64 rx_promiscuous_frames;
-	__le64 rx_tagged_frames;
-	__le64 rx_double_tagged_frames;
-	__le64 rx_trunc_frames;
-	__le64 rx_good_frames;
-	__le64 rx_pfc_xon2xoff_frames_pri0;
-	__le64 rx_pfc_xon2xoff_frames_pri1;
-	__le64 rx_pfc_xon2xoff_frames_pri2;
-	__le64 rx_pfc_xon2xoff_frames_pri3;
-	__le64 rx_pfc_xon2xoff_frames_pri4;
-	__le64 rx_pfc_xon2xoff_frames_pri5;
-	__le64 rx_pfc_xon2xoff_frames_pri6;
-	__le64 rx_pfc_xon2xoff_frames_pri7;
-	__le64 rx_pfc_ena_frames_pri0;
-	__le64 rx_pfc_ena_frames_pri1;
-	__le64 rx_pfc_ena_frames_pri2;
-	__le64 rx_pfc_ena_frames_pri3;
-	__le64 rx_pfc_ena_frames_pri4;
-	__le64 rx_pfc_ena_frames_pri5;
-	__le64 rx_pfc_ena_frames_pri6;
-	__le64 rx_pfc_ena_frames_pri7;
-	__le64 rx_sch_crc_err_frames;
-	__le64 rx_undrsz_frames;
-	__le64 rx_frag_frames;
-	__le64 rx_eee_lpi_events;
-	__le64 rx_eee_lpi_duration;
-	__le64 rx_llfc_physical_msgs;
-	__le64 rx_llfc_logical_msgs;
-	__le64 rx_llfc_msgs_with_crc_err;
-	__le64 rx_hcfc_msgs;
-	__le64 rx_hcfc_msgs_with_crc_err;
-	__le64 rx_bytes;
-	__le64 rx_runt_bytes;
-	__le64 rx_runt_frames;
-	__le64 rx_stat_discard;
-	__le64 rx_stat_err;
+	#define ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA1_TIMESTAMP  0x1UL
 };
 
 /* hwrm_ver_get */
@@ -1041,6 +756,7 @@ struct hwrm_func_qcaps_output {
 	#define FUNC_QCAPS_RESP_FLAGS_TX_BW_CFG_SUPPORTED	    0x100UL
 	#define FUNC_QCAPS_RESP_FLAGS_VF_TX_RING_RL_SUPPORTED      0x200UL
 	#define FUNC_QCAPS_RESP_FLAGS_VF_BW_CFG_SUPPORTED	    0x400UL
+	#define FUNC_QCAPS_RESP_FLAGS_STD_TX_RING_MODE_SUPPORTED   0x800UL
 	u8 mac_address[6];
 	__le16 max_rsscos_ctx;
 	__le16 max_cmpl_rings;
@@ -1090,6 +806,7 @@ struct hwrm_func_qcfg_output {
 	#define FUNC_QCFG_RESP_FLAGS_OOB_WOL_MAGICPKT_ENABLED      0x1UL
 	#define FUNC_QCFG_RESP_FLAGS_OOB_WOL_BMP_ENABLED	    0x2UL
 	#define FUNC_QCFG_RESP_FLAGS_FW_DCBX_AGENT_ENABLED	    0x4UL
+	#define FUNC_QCFG_RESP_FLAGS_STD_TX_RING_MODE_ENABLED      0x8UL
 	u8 mac_address[6];
 	__le16 pci_id;
 	__le16 alloc_rsscos_ctx;
@@ -1166,6 +883,7 @@ struct hwrm_func_cfg_input {
 	#define FUNC_CFG_REQ_FLAGS_DISABLE_STP			    0x40UL
 	#define FUNC_CFG_REQ_FLAGS_DISABLE_LLDP		    0x80UL
 	#define FUNC_CFG_REQ_FLAGS_DISABLE_PTPV2		    0x100UL
+	#define FUNC_CFG_REQ_FLAGS_STD_TX_RING_MODE		    0x200UL
 	__le32 enables;
 	#define FUNC_CFG_REQ_ENABLES_MTU			    0x1UL
 	#define FUNC_CFG_REQ_ENABLES_MRU			    0x2UL
@@ -1399,6 +1117,7 @@ struct hwrm_func_drv_rgtr_input {
 	#define FUNC_DRV_RGTR_REQ_OS_TYPE_ESXI			   0x68UL
 	#define FUNC_DRV_RGTR_REQ_OS_TYPE_WIN864		   0x73UL
 	#define FUNC_DRV_RGTR_REQ_OS_TYPE_WIN2012R2		   0x74UL
+	#define FUNC_DRV_RGTR_REQ_OS_TYPE_UEFI			   0x8000UL
 	u8 ver_maj;
 	u8 ver_min;
 	u8 ver_upd;
@@ -1531,6 +1250,7 @@ struct hwrm_func_drv_qver_output {
 	#define FUNC_DRV_QVER_RESP_OS_TYPE_ESXI		   0x68UL
 	#define FUNC_DRV_QVER_RESP_OS_TYPE_WIN864		   0x73UL
 	#define FUNC_DRV_QVER_RESP_OS_TYPE_WIN2012R2		   0x74UL
+	#define FUNC_DRV_QVER_RESP_OS_TYPE_UEFI		   0x8000UL
 	u8 ver_maj;
 	u8 ver_min;
 	u8 ver_upd;
@@ -1549,7 +1269,7 @@ struct hwrm_port_phy_cfg_input {
 	__le64 resp_addr;
 	__le32 flags;
 	#define PORT_PHY_CFG_REQ_FLAGS_RESET_PHY		    0x1UL
-	#define PORT_PHY_CFG_REQ_FLAGS_FORCE_LINK_DOWN		    0x2UL
+	#define PORT_PHY_CFG_REQ_FLAGS_DEPRECATED		    0x2UL
 	#define PORT_PHY_CFG_REQ_FLAGS_FORCE			    0x4UL
 	#define PORT_PHY_CFG_REQ_FLAGS_RESTART_AUTONEG		    0x8UL
 	#define PORT_PHY_CFG_REQ_FLAGS_EEE_ENABLE		    0x10UL
@@ -1562,6 +1282,7 @@ struct hwrm_port_phy_cfg_input {
 	#define PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE74_DISABLE	    0x800UL
 	#define PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE91_ENABLE	    0x1000UL
 	#define PORT_PHY_CFG_REQ_FLAGS_FEC_CLAUSE91_DISABLE	    0x2000UL
+	#define PORT_PHY_CFG_REQ_FLAGS_FORCE_LINK_DWN		    0x4000UL
 	__le32 enables;
 	#define PORT_PHY_CFG_REQ_ENABLES_AUTO_MODE		    0x1UL
 	#define PORT_PHY_CFG_REQ_ENABLES_AUTO_DUPLEX		    0x2UL
@@ -2091,31 +1812,6 @@ struct hwrm_port_lpbk_clr_stats_output {
 	u8 valid;
 };
 
-/* hwrm_port_blink_led */
-/* Input (24 bytes) */
-struct hwrm_port_blink_led_input {
-	__le16 req_type;
-	__le16 cmpl_ring;
-	__le16 seq_id;
-	__le16 target_id;
-	__le64 resp_addr;
-	__le32 num_blinks;
-	__le32 unused_0;
-};
-
-/* Output (16 bytes) */
-struct hwrm_port_blink_led_output {
-	__le16 error_code;
-	__le16 req_type;
-	__le16 seq_id;
-	__le16 resp_len;
-	__le32 unused_0;
-	u8 unused_1;
-	u8 unused_2;
-	u8 unused_3;
-	u8 valid;
-};
-
 /* hwrm_port_phy_qcaps */
 /* Input (24 bytes) */
 struct hwrm_port_phy_qcaps_input {
@@ -2337,6 +2033,39 @@ struct hwrm_queue_cfg_output {
 	u8 valid;
 };
 
+/* hwrm_queue_pfcenable_qcfg */
+/* Input (24 bytes) */
+struct hwrm_queue_pfcenable_qcfg_input {
+	__le16 req_type;
+	__le16 cmpl_ring;
+	__le16 seq_id;
+	__le16 target_id;
+	__le64 resp_addr;
+	__le16 port_id;
+	__le16 unused_0[3];
+};
+
+/* Output (16 bytes) */
+struct hwrm_queue_pfcenable_qcfg_output {
+	__le16 error_code;
+	__le16 req_type;
+	__le16 seq_id;
+	__le16 resp_len;
+	__le32 flags;
+	#define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI0_PFC_ENABLED   0x1UL
+	#define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI1_PFC_ENABLED   0x2UL
+	#define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI2_PFC_ENABLED   0x4UL
+	#define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI3_PFC_ENABLED   0x8UL
+	#define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI4_PFC_ENABLED   0x10UL
+	#define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI5_PFC_ENABLED   0x20UL
+	#define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI6_PFC_ENABLED   0x40UL
+	#define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI7_PFC_ENABLED   0x80UL
+	u8 unused_0;
+	u8 unused_1;
+	u8 unused_2;
+	u8 valid;
+};
+
 /* hwrm_queue_pfcenable_cfg */
 /* Input (24 bytes) */
 struct hwrm_queue_pfcenable_cfg_input {
@@ -2371,6 +2100,48 @@ struct hwrm_queue_pfcenable_cfg_output {
 	u8 valid;
 };
 
+/* hwrm_queue_pri2cos_qcfg */
+/* Input (24 bytes) */
+struct hwrm_queue_pri2cos_qcfg_input {
+	__le16 req_type;
+	__le16 cmpl_ring;
+	__le16 seq_id;
+	__le16 target_id;
+	__le64 resp_addr;
+	__le32 flags;
+	#define QUEUE_PRI2COS_QCFG_REQ_FLAGS_PATH		    0x1UL
+	#define QUEUE_PRI2COS_QCFG_REQ_FLAGS_PATH_TX		   (0x0UL << 0)
+	#define QUEUE_PRI2COS_QCFG_REQ_FLAGS_PATH_RX		   (0x1UL << 0)
+	#define QUEUE_PRI2COS_QCFG_REQ_FLAGS_PATH_LAST    QUEUE_PRI2COS_QCFG_REQ_FLAGS_PATH_RX
+	#define QUEUE_PRI2COS_QCFG_REQ_FLAGS_IVLAN		    0x2UL
+	u8 port_id;
+	u8 unused_0[3];
+};
+
+/* Output (24 bytes) */
+struct hwrm_queue_pri2cos_qcfg_output {
+	__le16 error_code;
+	__le16 req_type;
+	__le16 seq_id;
+	__le16 resp_len;
+	u8 pri0_cos_queue_id;
+	u8 pri1_cos_queue_id;
+	u8 pri2_cos_queue_id;
+	u8 pri3_cos_queue_id;
+	u8 pri4_cos_queue_id;
+	u8 pri5_cos_queue_id;
+	u8 pri6_cos_queue_id;
+	u8 pri7_cos_queue_id;
+	u8 queue_cfg_info;
+	#define QUEUE_PRI2COS_QCFG_RESP_QUEUE_CFG_INFO_ASYM_CFG    0x1UL
+	u8 unused_0;
+	__le16 unused_1;
+	u8 unused_2;
+	u8 unused_3;
+	u8 unused_4;
+	u8 valid;
+};
+
 /* hwrm_queue_pri2cos_cfg */
 /* Input (40 bytes) */
 struct hwrm_queue_pri2cos_cfg_input {
@@ -2421,6 +2192,257 @@ struct hwrm_queue_pri2cos_cfg_output {
 	u8 valid;
 };
 
+/* hwrm_queue_cos2bw_qcfg */
+/* Input (24 bytes) */
+struct hwrm_queue_cos2bw_qcfg_input {
+	__le16 req_type;
+	__le16 cmpl_ring;
+	__le16 seq_id;
+	__le16 target_id;
+	__le64 resp_addr;
+	__le16 port_id;
+	__le16 unused_0[3];
+};
+
+/* Output (112 bytes) */
+struct hwrm_queue_cos2bw_qcfg_output {
+	__le16 error_code;
+	__le16 req_type;
+	__le16 seq_id;
+	__le16 resp_len;
+	u8 queue_id0;
+	u8 unused_0;
+	__le16 unused_1;
+	__le32 queue_id0_min_bw;
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MIN_BW_BW_VALUE_MASK 0xfffffffUL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MIN_BW_BW_VALUE_SFT 0
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MIN_BW_RSVD       0x10000000UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_SFT 29
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_LAST    QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_INVALID
+	__le32 queue_id0_max_bw;
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MAX_BW_BW_VALUE_MASK 0xfffffffUL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MAX_BW_BW_VALUE_SFT 0
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MAX_BW_RSVD       0x10000000UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_SFT 29
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_LAST    QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_INVALID
+	u8 queue_id0_tsa_assign;
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_TSA_ASSIGN_SP    0x0UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_TSA_ASSIGN_ETS   0x1UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_TSA_ASSIGN_RESERVED_FIRST 0x2UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID0_TSA_ASSIGN_RESERVED_LAST 0xffUL
+	u8 queue_id0_pri_lvl;
+	u8 queue_id0_bw_weight;
+	u8 queue_id1;
+	__le32 queue_id1_min_bw;
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID1_MIN_BW_BW_VALUE_MASK 0xfffffffUL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID1_MIN_BW_BW_VALUE_SFT 0
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID1_MIN_BW_RSVD       0x10000000UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_SFT 29
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_LAST    QUEUE_COS2BW_QCFG_RESP_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_INVALID
+	__le32 queue_id1_max_bw;
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID1_MAX_BW_BW_VALUE_MASK 0xfffffffUL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID1_MAX_BW_BW_VALUE_SFT 0
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID1_MAX_BW_RSVD       0x10000000UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_SFT 29
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_LAST    QUEUE_COS2BW_QCFG_RESP_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_INVALID
+	u8 queue_id1_tsa_assign;
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID1_TSA_ASSIGN_SP    0x0UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID1_TSA_ASSIGN_ETS   0x1UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID1_TSA_ASSIGN_RESERVED_FIRST 0x2UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID1_TSA_ASSIGN_RESERVED_LAST 0xffUL
+	u8 queue_id1_pri_lvl;
+	u8 queue_id1_bw_weight;
+	u8 queue_id2;
+	__le32 queue_id2_min_bw;
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID2_MIN_BW_BW_VALUE_MASK 0xfffffffUL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID2_MIN_BW_BW_VALUE_SFT 0
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID2_MIN_BW_RSVD       0x10000000UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_SFT 29
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_LAST    QUEUE_COS2BW_QCFG_RESP_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_INVALID
+	__le32 queue_id2_max_bw;
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID2_MAX_BW_BW_VALUE_MASK 0xfffffffUL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID2_MAX_BW_BW_VALUE_SFT 0
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID2_MAX_BW_RSVD       0x10000000UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_SFT 29
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_LAST    QUEUE_COS2BW_QCFG_RESP_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_INVALID
+	u8 queue_id2_tsa_assign;
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID2_TSA_ASSIGN_SP    0x0UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID2_TSA_ASSIGN_ETS   0x1UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID2_TSA_ASSIGN_RESERVED_FIRST 0x2UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID2_TSA_ASSIGN_RESERVED_LAST 0xffUL
+	u8 queue_id2_pri_lvl;
+	u8 queue_id2_bw_weight;
+	u8 queue_id3;
+	__le32 queue_id3_min_bw;
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID3_MIN_BW_BW_VALUE_MASK 0xfffffffUL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID3_MIN_BW_BW_VALUE_SFT 0
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID3_MIN_BW_RSVD       0x10000000UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_SFT 29
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_LAST    QUEUE_COS2BW_QCFG_RESP_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_INVALID
+	__le32 queue_id3_max_bw;
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID3_MAX_BW_BW_VALUE_MASK 0xfffffffUL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID3_MAX_BW_BW_VALUE_SFT 0
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID3_MAX_BW_RSVD       0x10000000UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_SFT 29
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_LAST    QUEUE_COS2BW_QCFG_RESP_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_INVALID
+	u8 queue_id3_tsa_assign;
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID3_TSA_ASSIGN_SP    0x0UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID3_TSA_ASSIGN_ETS   0x1UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID3_TSA_ASSIGN_RESERVED_FIRST 0x2UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID3_TSA_ASSIGN_RESERVED_LAST 0xffUL
+	u8 queue_id3_pri_lvl;
+	u8 queue_id3_bw_weight;
+	u8 queue_id4;
+	__le32 queue_id4_min_bw;
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID4_MIN_BW_BW_VALUE_MASK 0xfffffffUL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID4_MIN_BW_BW_VALUE_SFT 0
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID4_MIN_BW_RSVD       0x10000000UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_SFT 29
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_LAST    QUEUE_COS2BW_QCFG_RESP_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_INVALID
+	__le32 queue_id4_max_bw;
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID4_MAX_BW_BW_VALUE_MASK 0xfffffffUL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID4_MAX_BW_BW_VALUE_SFT 0
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID4_MAX_BW_RSVD       0x10000000UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_SFT 29
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_LAST    QUEUE_COS2BW_QCFG_RESP_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_INVALID
+	u8 queue_id4_tsa_assign;
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID4_TSA_ASSIGN_SP    0x0UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID4_TSA_ASSIGN_ETS   0x1UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID4_TSA_ASSIGN_RESERVED_FIRST 0x2UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID4_TSA_ASSIGN_RESERVED_LAST 0xffUL
+	u8 queue_id4_pri_lvl;
+	u8 queue_id4_bw_weight;
+	u8 queue_id5;
+	__le32 queue_id5_min_bw;
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID5_MIN_BW_BW_VALUE_MASK 0xfffffffUL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID5_MIN_BW_BW_VALUE_SFT 0
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID5_MIN_BW_RSVD       0x10000000UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_SFT 29
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_LAST    QUEUE_COS2BW_QCFG_RESP_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_INVALID
+	__le32 queue_id5_max_bw;
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID5_MAX_BW_BW_VALUE_MASK 0xfffffffUL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID5_MAX_BW_BW_VALUE_SFT 0
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID5_MAX_BW_RSVD       0x10000000UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_SFT 29
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_LAST    QUEUE_COS2BW_QCFG_RESP_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_INVALID
+	u8 queue_id5_tsa_assign;
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID5_TSA_ASSIGN_SP    0x0UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID5_TSA_ASSIGN_ETS   0x1UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID5_TSA_ASSIGN_RESERVED_FIRST 0x2UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID5_TSA_ASSIGN_RESERVED_LAST 0xffUL
+	u8 queue_id5_pri_lvl;
+	u8 queue_id5_bw_weight;
+	u8 queue_id6;
+	__le32 queue_id6_min_bw;
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID6_MIN_BW_BW_VALUE_MASK 0xfffffffUL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID6_MIN_BW_BW_VALUE_SFT 0
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID6_MIN_BW_RSVD       0x10000000UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_SFT 29
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_LAST    QUEUE_COS2BW_QCFG_RESP_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_INVALID
+	__le32 queue_id6_max_bw;
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID6_MAX_BW_BW_VALUE_MASK 0xfffffffUL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID6_MAX_BW_BW_VALUE_SFT 0
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID6_MAX_BW_RSVD       0x10000000UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_SFT 29
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_LAST    QUEUE_COS2BW_QCFG_RESP_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_INVALID
+	u8 queue_id6_tsa_assign;
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID6_TSA_ASSIGN_SP    0x0UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID6_TSA_ASSIGN_ETS   0x1UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID6_TSA_ASSIGN_RESERVED_FIRST 0x2UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID6_TSA_ASSIGN_RESERVED_LAST 0xffUL
+	u8 queue_id6_pri_lvl;
+	u8 queue_id6_bw_weight;
+	u8 queue_id7;
+	__le32 queue_id7_min_bw;
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID7_MIN_BW_BW_VALUE_MASK 0xfffffffUL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID7_MIN_BW_BW_VALUE_SFT 0
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID7_MIN_BW_RSVD       0x10000000UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_SFT 29
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_LAST    QUEUE_COS2BW_QCFG_RESP_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_INVALID
+	__le32 queue_id7_max_bw;
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID7_MAX_BW_BW_VALUE_MASK 0xfffffffUL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID7_MAX_BW_BW_VALUE_SFT 0
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID7_MAX_BW_RSVD       0x10000000UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_SFT 29
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29)
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_LAST    QUEUE_COS2BW_QCFG_RESP_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_INVALID
+	u8 queue_id7_tsa_assign;
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID7_TSA_ASSIGN_SP    0x0UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID7_TSA_ASSIGN_ETS   0x1UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID7_TSA_ASSIGN_RESERVED_FIRST 0x2UL
+	#define QUEUE_COS2BW_QCFG_RESP_QUEUE_ID7_TSA_ASSIGN_RESERVED_LAST 0xffUL
+	u8 queue_id7_pri_lvl;
+	u8 queue_id7_bw_weight;
+	u8 unused_2;
+	u8 unused_3;
+	u8 unused_4;
+	u8 unused_5;
+	u8 valid;
+};
+
 /* hwrm_queue_cos2bw_cfg */
 /* Input (128 bytes) */
 struct hwrm_queue_cos2bw_cfg_input {
@@ -3802,7 +3824,9 @@ struct hwrm_stat_ctx_alloc_input {
 	__le64 resp_addr;
 	__le64 stats_dma_addr;
 	__le32 update_period_ms;
-	__le32 unused_0;
+	u8 stat_ctx_flags;
+	#define STAT_CTX_ALLOC_REQ_STAT_CTX_FLAGS_ROCE		    0x1UL
+	u8 unused_0[3];
 };
 
 /* Output (16 bytes) */
@@ -4023,6 +4047,75 @@ struct hwrm_fw_set_time_output {
 	u8 valid;
 };
 
+/* hwrm_fw_set_structured_data */
+/* Input (32 bytes) */
+struct hwrm_fw_set_structured_data_input {
+	__le16 req_type;
+	__le16 cmpl_ring;
+	__le16 seq_id;
+	__le16 target_id;
+	__le64 resp_addr;
+	__le64 src_data_addr;
+	__le16 data_len;
+	u8 hdr_cnt;
+	u8 unused_0;
+	__le16 port_id;
+	__le16 unused_1;
+};
+
+/* Output (16 bytes) */
+struct hwrm_fw_set_structured_data_output {
+	__le16 error_code;
+	__le16 req_type;
+	__le16 seq_id;
+	__le16 resp_len;
+	__le32 unused_0;
+	u8 unused_1;
+	u8 unused_2;
+	u8 unused_3;
+	u8 valid;
+};
+
+/* hwrm_fw_get_structured_data */
+/* Input (40 bytes) */
+struct hwrm_fw_get_structured_data_input {
+	__le16 req_type;
+	__le16 cmpl_ring;
+	__le16 seq_id;
+	__le16 target_id;
+	__le64 resp_addr;
+	__le64 dest_data_addr;
+	__le16 data_len;
+	__le16 structure_id;
+	__le16 subtype;
+	#define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_ALL		   0xffffUL
+	#define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_NEAR_BRIDGE_ADMIN 0x100UL
+	#define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_NEAR_BRIDGE_PEER 0x101UL
+	#define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_NEAR_BRIDGE_OPERATIONAL 0x102UL
+	#define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_NON_TPMR_ADMIN 0x200UL
+	#define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_NON_TPMR_PEER  0x201UL
+	#define FW_GET_STRUCTURED_DATA_REQ_SUBTYPE_NON_TPMR_OPERATIONAL 0x202UL
+	u8 count;
+	u8 unused_0;
+	__le16 port_id;
+	__le16 unused_1[3];
+};
+
+/* Output (16 bytes) */
+struct hwrm_fw_get_structured_data_output {
+	__le16 error_code;
+	__le16 req_type;
+	__le16 seq_id;
+	__le16 resp_len;
+	u8 hdr_cnt;
+	u8 unused_0;
+	__le16 unused_1;
+	u8 unused_2;
+	u8 unused_3;
+	u8 unused_4;
+	u8 valid;
+};
+
 /* hwrm_exec_fwd_resp */
 /* Input (128 bytes) */
 struct hwrm_exec_fwd_resp_input {
@@ -4515,4 +4608,363 @@ struct hwrm_nvm_install_update_output {
 	u8 valid;
 };
 
+/* Hardware Resource Manager Specification */
+/* Input (16 bytes) */
+struct input {
+	__le16 req_type;
+	__le16 cmpl_ring;
+	__le16 seq_id;
+	__le16 target_id;
+	__le64 resp_addr;
+};
+
+/* Output (8 bytes) */
+struct output {
+	__le16 error_code;
+	__le16 req_type;
+	__le16 seq_id;
+	__le16 resp_len;
+};
+
+/* Command numbering (8 bytes) */
+struct cmd_nums {
+	__le16 req_type;
+	#define HWRM_VER_GET					   (0x0UL)
+	#define HWRM_FUNC_BUF_UNRGTR				   (0xeUL)
+	#define HWRM_FUNC_VF_CFG				   (0xfUL)
+	#define RESERVED1					   (0x10UL)
+	#define HWRM_FUNC_RESET				   (0x11UL)
+	#define HWRM_FUNC_GETFID				   (0x12UL)
+	#define HWRM_FUNC_VF_ALLOC				   (0x13UL)
+	#define HWRM_FUNC_VF_FREE				   (0x14UL)
+	#define HWRM_FUNC_QCAPS				   (0x15UL)
+	#define HWRM_FUNC_QCFG					   (0x16UL)
+	#define HWRM_FUNC_CFG					   (0x17UL)
+	#define HWRM_FUNC_QSTATS				   (0x18UL)
+	#define HWRM_FUNC_CLR_STATS				   (0x19UL)
+	#define HWRM_FUNC_DRV_UNRGTR				   (0x1aUL)
+	#define HWRM_FUNC_VF_RESC_FREE				   (0x1bUL)
+	#define HWRM_FUNC_VF_VNIC_IDS_QUERY			   (0x1cUL)
+	#define HWRM_FUNC_DRV_RGTR				   (0x1dUL)
+	#define HWRM_FUNC_DRV_QVER				   (0x1eUL)
+	#define HWRM_FUNC_BUF_RGTR				   (0x1fUL)
+	#define HWRM_PORT_PHY_CFG				   (0x20UL)
+	#define HWRM_PORT_MAC_CFG				   (0x21UL)
+	#define HWRM_PORT_TS_QUERY				   (0x22UL)
+	#define HWRM_PORT_QSTATS				   (0x23UL)
+	#define HWRM_PORT_LPBK_QSTATS				   (0x24UL)
+	#define HWRM_PORT_CLR_STATS				   (0x25UL)
+	#define HWRM_PORT_LPBK_CLR_STATS			   (0x26UL)
+	#define HWRM_PORT_PHY_QCFG				   (0x27UL)
+	#define HWRM_PORT_MAC_QCFG				   (0x28UL)
+	#define RESERVED7					   (0x29UL)
+	#define HWRM_PORT_PHY_QCAPS				   (0x2aUL)
+	#define HWRM_PORT_PHY_I2C_WRITE			   (0x2bUL)
+	#define HWRM_PORT_PHY_I2C_READ				   (0x2cUL)
+	#define HWRM_PORT_LED_CFG				   (0x2dUL)
+	#define HWRM_PORT_LED_QCFG				   (0x2eUL)
+	#define HWRM_PORT_LED_QCAPS				   (0x2fUL)
+	#define HWRM_QUEUE_QPORTCFG				   (0x30UL)
+	#define HWRM_QUEUE_QCFG				   (0x31UL)
+	#define HWRM_QUEUE_CFG					   (0x32UL)
+	#define RESERVED2					   (0x33UL)
+	#define RESERVED3					   (0x34UL)
+	#define HWRM_QUEUE_PFCENABLE_QCFG			   (0x35UL)
+	#define HWRM_QUEUE_PFCENABLE_CFG			   (0x36UL)
+	#define HWRM_QUEUE_PRI2COS_QCFG			   (0x37UL)
+	#define HWRM_QUEUE_PRI2COS_CFG				   (0x38UL)
+	#define HWRM_QUEUE_COS2BW_QCFG				   (0x39UL)
+	#define HWRM_QUEUE_COS2BW_CFG				   (0x3aUL)
+	#define HWRM_VNIC_ALLOC				   (0x40UL)
+	#define HWRM_VNIC_FREE					   (0x41UL)
+	#define HWRM_VNIC_CFG					   (0x42UL)
+	#define HWRM_VNIC_QCFG					   (0x43UL)
+	#define HWRM_VNIC_TPA_CFG				   (0x44UL)
+	#define HWRM_VNIC_TPA_QCFG				   (0x45UL)
+	#define HWRM_VNIC_RSS_CFG				   (0x46UL)
+	#define HWRM_VNIC_RSS_QCFG				   (0x47UL)
+	#define HWRM_VNIC_PLCMODES_CFG				   (0x48UL)
+	#define HWRM_VNIC_PLCMODES_QCFG			   (0x49UL)
+	#define HWRM_VNIC_QCAPS				   (0x4aUL)
+	#define HWRM_RING_ALLOC				   (0x50UL)
+	#define HWRM_RING_FREE					   (0x51UL)
+	#define HWRM_RING_CMPL_RING_QAGGINT_PARAMS		   (0x52UL)
+	#define HWRM_RING_CMPL_RING_CFG_AGGINT_PARAMS		   (0x53UL)
+	#define HWRM_RING_RESET				   (0x5eUL)
+	#define HWRM_RING_GRP_ALLOC				   (0x60UL)
+	#define HWRM_RING_GRP_FREE				   (0x61UL)
+	#define RESERVED5					   (0x64UL)
+	#define RESERVED6					   (0x65UL)
+	#define HWRM_VNIC_RSS_COS_LB_CTX_ALLOC			   (0x70UL)
+	#define HWRM_VNIC_RSS_COS_LB_CTX_FREE			   (0x71UL)
+	#define HWRM_CFA_L2_FILTER_ALLOC			   (0x90UL)
+	#define HWRM_CFA_L2_FILTER_FREE			   (0x91UL)
+	#define HWRM_CFA_L2_FILTER_CFG				   (0x92UL)
+	#define HWRM_CFA_L2_SET_RX_MASK			   (0x93UL)
+	#define RESERVED4					   (0x94UL)
+	#define HWRM_CFA_TUNNEL_FILTER_ALLOC			   (0x95UL)
+	#define HWRM_CFA_TUNNEL_FILTER_FREE			   (0x96UL)
+	#define HWRM_CFA_ENCAP_RECORD_ALLOC			   (0x97UL)
+	#define HWRM_CFA_ENCAP_RECORD_FREE			   (0x98UL)
+	#define HWRM_CFA_NTUPLE_FILTER_ALLOC			   (0x99UL)
+	#define HWRM_CFA_NTUPLE_FILTER_FREE			   (0x9aUL)
+	#define HWRM_CFA_NTUPLE_FILTER_CFG			   (0x9bUL)
+	#define HWRM_CFA_EM_FLOW_ALLOC				   (0x9cUL)
+	#define HWRM_CFA_EM_FLOW_FREE				   (0x9dUL)
+	#define HWRM_CFA_EM_FLOW_CFG				   (0x9eUL)
+	#define HWRM_TUNNEL_DST_PORT_QUERY			   (0xa0UL)
+	#define HWRM_TUNNEL_DST_PORT_ALLOC			   (0xa1UL)
+	#define HWRM_TUNNEL_DST_PORT_FREE			   (0xa2UL)
+	#define HWRM_STAT_CTX_ALLOC				   (0xb0UL)
+	#define HWRM_STAT_CTX_FREE				   (0xb1UL)
+	#define HWRM_STAT_CTX_QUERY				   (0xb2UL)
+	#define HWRM_STAT_CTX_CLR_STATS			   (0xb3UL)
+	#define HWRM_FW_RESET					   (0xc0UL)
+	#define HWRM_FW_QSTATUS				   (0xc1UL)
+	#define HWRM_FW_SET_TIME				   (0xc8UL)
+	#define HWRM_FW_GET_TIME				   (0xc9UL)
+	#define HWRM_FW_SET_STRUCTURED_DATA			   (0xcaUL)
+	#define HWRM_FW_GET_STRUCTURED_DATA			   (0xcbUL)
+	#define HWRM_FW_IPC_MAILBOX				   (0xccUL)
+	#define HWRM_EXEC_FWD_RESP				   (0xd0UL)
+	#define HWRM_REJECT_FWD_RESP				   (0xd1UL)
+	#define HWRM_FWD_RESP					   (0xd2UL)
+	#define HWRM_FWD_ASYNC_EVENT_CMPL			   (0xd3UL)
+	#define HWRM_TEMP_MONITOR_QUERY			   (0xe0UL)
+	#define HWRM_WOL_FILTER_ALLOC				   (0xf0UL)
+	#define HWRM_WOL_FILTER_FREE				   (0xf1UL)
+	#define HWRM_WOL_FILTER_QCFG				   (0xf2UL)
+	#define HWRM_WOL_REASON_QCFG				   (0xf3UL)
+	#define HWRM_DBG_READ_DIRECT				   (0xff10UL)
+	#define HWRM_DBG_READ_INDIRECT				   (0xff11UL)
+	#define HWRM_DBG_WRITE_DIRECT				   (0xff12UL)
+	#define HWRM_DBG_WRITE_INDIRECT			   (0xff13UL)
+	#define HWRM_DBG_DUMP					   (0xff14UL)
+	#define HWRM_NVM_GET_VARIABLE				   (0xfff1UL)
+	#define HWRM_NVM_SET_VARIABLE				   (0xfff2UL)
+	#define HWRM_NVM_INSTALL_UPDATE			   (0xfff3UL)
+	#define HWRM_NVM_MODIFY				   (0xfff4UL)
+	#define HWRM_NVM_VERIFY_UPDATE				   (0xfff5UL)
+	#define HWRM_NVM_GET_DEV_INFO				   (0xfff6UL)
+	#define HWRM_NVM_ERASE_DIR_ENTRY			   (0xfff7UL)
+	#define HWRM_NVM_MOD_DIR_ENTRY				   (0xfff8UL)
+	#define HWRM_NVM_FIND_DIR_ENTRY			   (0xfff9UL)
+	#define HWRM_NVM_GET_DIR_ENTRIES			   (0xfffaUL)
+	#define HWRM_NVM_GET_DIR_INFO				   (0xfffbUL)
+	#define HWRM_NVM_RAW_DUMP				   (0xfffcUL)
+	#define HWRM_NVM_READ					   (0xfffdUL)
+	#define HWRM_NVM_WRITE					   (0xfffeUL)
+	#define HWRM_NVM_RAW_WRITE_BLK				   (0xffffUL)
+	__le16 unused_0[3];
+};
+
+/* Return Codes (8 bytes) */
+struct ret_codes {
+	__le16 error_code;
+	#define HWRM_ERR_CODE_SUCCESS				   (0x0UL)
+	#define HWRM_ERR_CODE_FAIL				   (0x1UL)
+	#define HWRM_ERR_CODE_INVALID_PARAMS			   (0x2UL)
+	#define HWRM_ERR_CODE_RESOURCE_ACCESS_DENIED		   (0x3UL)
+	#define HWRM_ERR_CODE_RESOURCE_ALLOC_ERROR		   (0x4UL)
+	#define HWRM_ERR_CODE_INVALID_FLAGS			   (0x5UL)
+	#define HWRM_ERR_CODE_INVALID_ENABLES			   (0x6UL)
+	#define HWRM_ERR_CODE_HWRM_ERROR			   (0xfUL)
+	#define HWRM_ERR_CODE_UNKNOWN_ERR			   (0xfffeUL)
+	#define HWRM_ERR_CODE_CMD_NOT_SUPPORTED		   (0xffffUL)
+	__le16 unused_0[3];
+};
+
+/* Output (16 bytes) */
+struct hwrm_err_output {
+	__le16 error_code;
+	__le16 req_type;
+	__le16 seq_id;
+	__le16 resp_len;
+	__le32 opaque_0;
+	__le16 opaque_1;
+	u8 cmd_err;
+	u8 valid;
+};
+
+/* Port Tx Statistics Formats (408 bytes) */
+struct tx_port_stats {
+	__le64 tx_64b_frames;
+	__le64 tx_65b_127b_frames;
+	__le64 tx_128b_255b_frames;
+	__le64 tx_256b_511b_frames;
+	__le64 tx_512b_1023b_frames;
+	__le64 tx_1024b_1518_frames;
+	__le64 tx_good_vlan_frames;
+	__le64 tx_1519b_2047_frames;
+	__le64 tx_2048b_4095b_frames;
+	__le64 tx_4096b_9216b_frames;
+	__le64 tx_9217b_16383b_frames;
+	__le64 tx_good_frames;
+	__le64 tx_total_frames;
+	__le64 tx_ucast_frames;
+	__le64 tx_mcast_frames;
+	__le64 tx_bcast_frames;
+	__le64 tx_pause_frames;
+	__le64 tx_pfc_frames;
+	__le64 tx_jabber_frames;
+	__le64 tx_fcs_err_frames;
+	__le64 tx_control_frames;
+	__le64 tx_oversz_frames;
+	__le64 tx_single_dfrl_frames;
+	__le64 tx_multi_dfrl_frames;
+	__le64 tx_single_coll_frames;
+	__le64 tx_multi_coll_frames;
+	__le64 tx_late_coll_frames;
+	__le64 tx_excessive_coll_frames;
+	__le64 tx_frag_frames;
+	__le64 tx_err;
+	__le64 tx_tagged_frames;
+	__le64 tx_dbl_tagged_frames;
+	__le64 tx_runt_frames;
+	__le64 tx_fifo_underruns;
+	__le64 tx_pfc_ena_frames_pri0;
+	__le64 tx_pfc_ena_frames_pri1;
+	__le64 tx_pfc_ena_frames_pri2;
+	__le64 tx_pfc_ena_frames_pri3;
+	__le64 tx_pfc_ena_frames_pri4;
+	__le64 tx_pfc_ena_frames_pri5;
+	__le64 tx_pfc_ena_frames_pri6;
+	__le64 tx_pfc_ena_frames_pri7;
+	__le64 tx_eee_lpi_events;
+	__le64 tx_eee_lpi_duration;
+	__le64 tx_llfc_logical_msgs;
+	__le64 tx_hcfc_msgs;
+	__le64 tx_total_collisions;
+	__le64 tx_bytes;
+	__le64 tx_xthol_frames;
+	__le64 tx_stat_discard;
+	__le64 tx_stat_error;
+};
+
+/* Port Rx Statistics Formats (528 bytes) */
+struct rx_port_stats {
+	__le64 rx_64b_frames;
+	__le64 rx_65b_127b_frames;
+	__le64 rx_128b_255b_frames;
+	__le64 rx_256b_511b_frames;
+	__le64 rx_512b_1023b_frames;
+	__le64 rx_1024b_1518_frames;
+	__le64 rx_good_vlan_frames;
+	__le64 rx_1519b_2047b_frames;
+	__le64 rx_2048b_4095b_frames;
+	__le64 rx_4096b_9216b_frames;
+	__le64 rx_9217b_16383b_frames;
+	__le64 rx_total_frames;
+	__le64 rx_ucast_frames;
+	__le64 rx_mcast_frames;
+	__le64 rx_bcast_frames;
+	__le64 rx_fcs_err_frames;
+	__le64 rx_ctrl_frames;
+	__le64 rx_pause_frames;
+	__le64 rx_pfc_frames;
+	__le64 rx_unsupported_opcode_frames;
+	__le64 rx_unsupported_da_pausepfc_frames;
+	__le64 rx_wrong_sa_frames;
+	__le64 rx_align_err_frames;
+	__le64 rx_oor_len_frames;
+	__le64 rx_code_err_frames;
+	__le64 rx_false_carrier_frames;
+	__le64 rx_ovrsz_frames;
+	__le64 rx_jbr_frames;
+	__le64 rx_mtu_err_frames;
+	__le64 rx_match_crc_frames;
+	__le64 rx_promiscuous_frames;
+	__le64 rx_tagged_frames;
+	__le64 rx_double_tagged_frames;
+	__le64 rx_trunc_frames;
+	__le64 rx_good_frames;
+	__le64 rx_pfc_xon2xoff_frames_pri0;
+	__le64 rx_pfc_xon2xoff_frames_pri1;
+	__le64 rx_pfc_xon2xoff_frames_pri2;
+	__le64 rx_pfc_xon2xoff_frames_pri3;
+	__le64 rx_pfc_xon2xoff_frames_pri4;
+	__le64 rx_pfc_xon2xoff_frames_pri5;
+	__le64 rx_pfc_xon2xoff_frames_pri6;
+	__le64 rx_pfc_xon2xoff_frames_pri7;
+	__le64 rx_pfc_ena_frames_pri0;
+	__le64 rx_pfc_ena_frames_pri1;
+	__le64 rx_pfc_ena_frames_pri2;
+	__le64 rx_pfc_ena_frames_pri3;
+	__le64 rx_pfc_ena_frames_pri4;
+	__le64 rx_pfc_ena_frames_pri5;
+	__le64 rx_pfc_ena_frames_pri6;
+	__le64 rx_pfc_ena_frames_pri7;
+	__le64 rx_sch_crc_err_frames;
+	__le64 rx_undrsz_frames;
+	__le64 rx_frag_frames;
+	__le64 rx_eee_lpi_events;
+	__le64 rx_eee_lpi_duration;
+	__le64 rx_llfc_physical_msgs;
+	__le64 rx_llfc_logical_msgs;
+	__le64 rx_llfc_msgs_with_crc_err;
+	__le64 rx_hcfc_msgs;
+	__le64 rx_hcfc_msgs_with_crc_err;
+	__le64 rx_bytes;
+	__le64 rx_runt_bytes;
+	__le64 rx_runt_frames;
+	__le64 rx_stat_discard;
+	__le64 rx_stat_err;
+};
+
+/* Periodic Statistics Context DMA to host (160 bytes) */
+struct ctx_hw_stats {
+	__le64 rx_ucast_pkts;
+	__le64 rx_mcast_pkts;
+	__le64 rx_bcast_pkts;
+	__le64 rx_discard_pkts;
+	__le64 rx_drop_pkts;
+	__le64 rx_ucast_bytes;
+	__le64 rx_mcast_bytes;
+	__le64 rx_bcast_bytes;
+	__le64 tx_ucast_pkts;
+	__le64 tx_mcast_pkts;
+	__le64 tx_bcast_pkts;
+	__le64 tx_discard_pkts;
+	__le64 tx_drop_pkts;
+	__le64 tx_ucast_bytes;
+	__le64 tx_mcast_bytes;
+	__le64 tx_bcast_bytes;
+	__le64 tpa_pkts;
+	__le64 tpa_bytes;
+	__le64 tpa_events;
+	__le64 tpa_aborts;
+};
+
+/* Structure data header (16 bytes) */
+struct hwrm_struct_hdr {
+	__le16 struct_id;
+	#define STRUCT_HDR_STRUCT_ID_LLDP_CFG			   0x41bUL
+	#define STRUCT_HDR_STRUCT_ID_DCBX_ETS_CFG		   0x41dUL
+	#define STRUCT_HDR_STRUCT_ID_DCBX_PFC_CFG		   0x41fUL
+	#define STRUCT_HDR_STRUCT_ID_DCBX_APP_CFG		   0x421UL
+	#define STRUCT_HDR_STRUCT_ID_DCBX_STATE_CFG		   0x422UL
+	#define STRUCT_HDR_STRUCT_ID_LLDP_GENERIC_CFG		   0x424UL
+	#define STRUCT_HDR_STRUCT_ID_LLDP_DEVICE_CFG		   0x426UL
+	__le16 len;
+	u8 version;
+	u8 count;
+	__le16 subtype;
+	__le16 next_offset;
+	#define STRUCT_HDR_NEXT_OFFSET_LAST			   0x0UL
+	__le16 unused_0[3];
+};
+
+/* DCBX Application configuration structure (8 bytes) */
+struct hwrm_struct_data_dcbx_app_cfg {
+	__le16 protocol_id;
+	u8 protocol_selector;
+	#define STRUCT_DATA_DCBX_APP_CFG_PROTOCOL_SELECTOR_ETHER_TYPE 0x1UL
+	#define STRUCT_DATA_DCBX_APP_CFG_PROTOCOL_SELECTOR_TCP_PORT 0x2UL
+	#define STRUCT_DATA_DCBX_APP_CFG_PROTOCOL_SELECTOR_UDP_PORT 0x3UL
+	#define STRUCT_DATA_DCBX_APP_CFG_PROTOCOL_SELECTOR_TCP_UDP_PORT 0x4UL
+	u8 priority;
+	u8 valid;
+	u8 unused_0[3];
+};
+
 #endif
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index 60e2af8678bd..c69602508666 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -34,8 +34,7 @@ static int bnxt_hwrm_fwd_async_event_cmpl(struct bnxt *bp,
 		/* broadcast this async event to all VFs */
 		req.encap_async_event_target_id = cpu_to_le16(0xffff);
 	async_cmpl = (struct hwrm_async_event_cmpl *)req.encap_async_event_cmpl;
-	async_cmpl->type =
-		cpu_to_le16(HWRM_ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT);
+	async_cmpl->type = cpu_to_le16(ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT);
 	async_cmpl->event_id = cpu_to_le16(event_id);
 
 	mutex_lock(&bp->hwrm_cmd_lock);
@@ -288,7 +287,7 @@ int bnxt_set_vf_link_state(struct net_device *dev, int vf_id, int link)
 	}
 	if (vf->flags & (BNXT_VF_LINK_UP | BNXT_VF_LINK_FORCED))
 		rc = bnxt_hwrm_fwd_async_event_cmpl(bp, vf,
-			HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE);
+			ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE);
 	return rc;
 }
 
@@ -421,15 +420,7 @@ static int bnxt_hwrm_func_cfg(struct bnxt *bp, int num_vfs)
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FUNC_CFG, -1, -1);
 
 	/* Remaining rings are distributed equally amongs VF's for now */
-	/* TODO: the following workaroud is needed to restrict total number
-	 * of vf_cp_rings not exceed number of HW ring groups. This WA should
-	 * be removed once new HWRM provides HW ring groups capability in
-	 * hwrm_func_qcap.
-	 */
-	vf_cp_rings = min_t(u16, pf->max_cp_rings, pf->max_stat_ctxs);
-	vf_cp_rings = (vf_cp_rings - bp->cp_nr_rings) / num_vfs;
-	/* TODO: restore this logic below once the WA above is removed */
-	/* vf_cp_rings = (pf->max_cp_rings - bp->cp_nr_rings) / num_vfs; */
+	vf_cp_rings = (pf->max_cp_rings - bp->cp_nr_rings) / num_vfs;
 	vf_stat_ctx = (pf->max_stat_ctxs - bp->num_stat_ctxs) / num_vfs;
 	if (bp->flags & BNXT_FLAG_AGG_RINGS)
 		vf_rx_rings = (pf->max_rx_rings - bp->rx_nr_rings * 2) /
@@ -578,8 +569,7 @@ void bnxt_sriov_disable(struct bnxt *bp)
 
 	if (pci_vfs_assigned(bp->pdev)) {
 		bnxt_hwrm_fwd_async_event_cmpl(
-			bp, NULL,
-			HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD);
+			bp, NULL, ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD);
 		netdev_warn(bp->dev, "Unable to free %d VFs because some are assigned to VMs.\n",
 			    num_vfs);
 	} else {
@@ -592,7 +582,9 @@ void bnxt_sriov_disable(struct bnxt *bp)
 
 	bp->pf.active_vfs = 0;
 	/* Reclaim all resources for the PF. */
-	bnxt_hwrm_func_qcaps(bp);
+	rtnl_lock();
+	bnxt_restore_pf_fw_resources(bp);
+	rtnl_unlock();
 }
 
 int bnxt_sriov_configure(struct pci_dev *pdev, int num_vfs)
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
new file mode 100644
index 000000000000..8b7464b76501
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -0,0 +1,346 @@
+/* Broadcom NetXtreme-C/E network driver.
+ *
+ * Copyright (c) 2016 Broadcom Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/bitops.h>
+#include <linux/irq.h>
+#include <asm/byteorder.h>
+#include <linux/bitmap.h>
+
+#include "bnxt_hsi.h"
+#include "bnxt.h"
+#include "bnxt_ulp.h"
+
+static int bnxt_register_dev(struct bnxt_en_dev *edev, int ulp_id,
+			     struct bnxt_ulp_ops *ulp_ops, void *handle)
+{
+	struct net_device *dev = edev->net;
+	struct bnxt *bp = netdev_priv(dev);
+	struct bnxt_ulp *ulp;
+
+	ASSERT_RTNL();
+	if (ulp_id >= BNXT_MAX_ULP)
+		return -EINVAL;
+
+	ulp = &edev->ulp_tbl[ulp_id];
+	if (rcu_access_pointer(ulp->ulp_ops)) {
+		netdev_err(bp->dev, "ulp id %d already registered\n", ulp_id);
+		return -EBUSY;
+	}
+	if (ulp_id == BNXT_ROCE_ULP) {
+		unsigned int max_stat_ctxs;
+
+		max_stat_ctxs = bnxt_get_max_func_stat_ctxs(bp);
+		if (max_stat_ctxs <= BNXT_MIN_ROCE_STAT_CTXS ||
+		    bp->num_stat_ctxs == max_stat_ctxs)
+			return -ENOMEM;
+		bnxt_set_max_func_stat_ctxs(bp, max_stat_ctxs -
+					    BNXT_MIN_ROCE_STAT_CTXS);
+	}
+
+	atomic_set(&ulp->ref_count, 0);
+	ulp->handle = handle;
+	rcu_assign_pointer(ulp->ulp_ops, ulp_ops);
+
+	if (ulp_id == BNXT_ROCE_ULP) {
+		if (test_bit(BNXT_STATE_OPEN, &bp->state))
+			bnxt_hwrm_vnic_cfg(bp, 0);
+	}
+
+	return 0;
+}
+
+static int bnxt_unregister_dev(struct bnxt_en_dev *edev, int ulp_id)
+{
+	struct net_device *dev = edev->net;
+	struct bnxt *bp = netdev_priv(dev);
+	struct bnxt_ulp *ulp;
+	int i = 0;
+
+	ASSERT_RTNL();
+	if (ulp_id >= BNXT_MAX_ULP)
+		return -EINVAL;
+
+	ulp = &edev->ulp_tbl[ulp_id];
+	if (!rcu_access_pointer(ulp->ulp_ops)) {
+		netdev_err(bp->dev, "ulp id %d not registered\n", ulp_id);
+		return -EINVAL;
+	}
+	if (ulp_id == BNXT_ROCE_ULP) {
+		unsigned int max_stat_ctxs;
+
+		max_stat_ctxs = bnxt_get_max_func_stat_ctxs(bp);
+		bnxt_set_max_func_stat_ctxs(bp, max_stat_ctxs + 1);
+	}
+	if (ulp->max_async_event_id)
+		bnxt_hwrm_func_rgtr_async_events(bp, NULL, 0);
+
+	RCU_INIT_POINTER(ulp->ulp_ops, NULL);
+	synchronize_rcu();
+	ulp->max_async_event_id = 0;
+	ulp->async_events_bmap = NULL;
+	while (atomic_read(&ulp->ref_count) != 0 && i < 10) {
+		msleep(100);
+		i++;
+	}
+	return 0;
+}
+
+static int bnxt_req_msix_vecs(struct bnxt_en_dev *edev, int ulp_id,
+			      struct bnxt_msix_entry *ent, int num_msix)
+{
+	struct net_device *dev = edev->net;
+	struct bnxt *bp = netdev_priv(dev);
+	int max_idx, max_cp_rings;
+	int avail_msix, i, idx;
+
+	ASSERT_RTNL();
+	if (ulp_id != BNXT_ROCE_ULP)
+		return -EINVAL;
+
+	if (!(bp->flags & BNXT_FLAG_USING_MSIX))
+		return -ENODEV;
+
+	max_cp_rings = bnxt_get_max_func_cp_rings(bp);
+	max_idx = min_t(int, bp->total_irqs, max_cp_rings);
+	avail_msix = max_idx - bp->cp_nr_rings;
+	if (!avail_msix)
+		return -ENOMEM;
+	if (avail_msix > num_msix)
+		avail_msix = num_msix;
+
+	idx = max_idx - avail_msix;
+	for (i = 0; i < avail_msix; i++) {
+		ent[i].vector = bp->irq_tbl[idx + i].vector;
+		ent[i].ring_idx = idx + i;
+		ent[i].db_offset = (idx + i) * 0x80;
+	}
+	bnxt_set_max_func_irqs(bp, max_idx - avail_msix);
+	bnxt_set_max_func_cp_rings(bp, max_cp_rings - avail_msix);
+	edev->ulp_tbl[ulp_id].msix_requested = avail_msix;
+	return avail_msix;
+}
+
+static int bnxt_free_msix_vecs(struct bnxt_en_dev *edev, int ulp_id)
+{
+	struct net_device *dev = edev->net;
+	struct bnxt *bp = netdev_priv(dev);
+	int max_cp_rings, msix_requested;
+
+	ASSERT_RTNL();
+	if (ulp_id != BNXT_ROCE_ULP)
+		return -EINVAL;
+
+	max_cp_rings = bnxt_get_max_func_cp_rings(bp);
+	msix_requested = edev->ulp_tbl[ulp_id].msix_requested;
+	bnxt_set_max_func_cp_rings(bp, max_cp_rings + msix_requested);
+	edev->ulp_tbl[ulp_id].msix_requested = 0;
+	bnxt_set_max_func_irqs(bp, bp->total_irqs);
+	return 0;
+}
+
+void bnxt_subtract_ulp_resources(struct bnxt *bp, int ulp_id)
+{
+	ASSERT_RTNL();
+	if (bnxt_ulp_registered(bp->edev, ulp_id)) {
+		struct bnxt_en_dev *edev = bp->edev;
+		unsigned int msix_req, max;
+
+		msix_req = edev->ulp_tbl[ulp_id].msix_requested;
+		max = bnxt_get_max_func_cp_rings(bp);
+		bnxt_set_max_func_cp_rings(bp, max - msix_req);
+		max = bnxt_get_max_func_stat_ctxs(bp);
+		bnxt_set_max_func_stat_ctxs(bp, max - 1);
+	}
+}
+
+static int bnxt_send_msg(struct bnxt_en_dev *edev, int ulp_id,
+			 struct bnxt_fw_msg *fw_msg)
+{
+	struct net_device *dev = edev->net;
+	struct bnxt *bp = netdev_priv(dev);
+	struct input *req;
+	int rc;
+
+	mutex_lock(&bp->hwrm_cmd_lock);
+	req = fw_msg->msg;
+	req->resp_addr = cpu_to_le64(bp->hwrm_cmd_resp_dma_addr);
+	rc = _hwrm_send_message(bp, fw_msg->msg, fw_msg->msg_len,
+				fw_msg->timeout);
+	if (!rc) {
+		struct output *resp = bp->hwrm_cmd_resp_addr;
+		u32 len = le16_to_cpu(resp->resp_len);
+
+		if (fw_msg->resp_max_len < len)
+			len = fw_msg->resp_max_len;
+
+		memcpy(fw_msg->resp, resp, len);
+	}
+	mutex_unlock(&bp->hwrm_cmd_lock);
+	return rc;
+}
+
+static void bnxt_ulp_get(struct bnxt_ulp *ulp)
+{
+	atomic_inc(&ulp->ref_count);
+}
+
+static void bnxt_ulp_put(struct bnxt_ulp *ulp)
+{
+	atomic_dec(&ulp->ref_count);
+}
+
+void bnxt_ulp_stop(struct bnxt *bp)
+{
+	struct bnxt_en_dev *edev = bp->edev;
+	struct bnxt_ulp_ops *ops;
+	int i;
+
+	if (!edev)
+		return;
+
+	for (i = 0; i < BNXT_MAX_ULP; i++) {
+		struct bnxt_ulp *ulp = &edev->ulp_tbl[i];
+
+		ops = rtnl_dereference(ulp->ulp_ops);
+		if (!ops || !ops->ulp_stop)
+			continue;
+		ops->ulp_stop(ulp->handle);
+	}
+}
+
+void bnxt_ulp_start(struct bnxt *bp)
+{
+	struct bnxt_en_dev *edev = bp->edev;
+	struct bnxt_ulp_ops *ops;
+	int i;
+
+	if (!edev)
+		return;
+
+	for (i = 0; i < BNXT_MAX_ULP; i++) {
+		struct bnxt_ulp *ulp = &edev->ulp_tbl[i];
+
+		ops = rtnl_dereference(ulp->ulp_ops);
+		if (!ops || !ops->ulp_start)
+			continue;
+		ops->ulp_start(ulp->handle);
+	}
+}
+
+void bnxt_ulp_sriov_cfg(struct bnxt *bp, int num_vfs)
+{
+	struct bnxt_en_dev *edev = bp->edev;
+	struct bnxt_ulp_ops *ops;
+	int i;
+
+	if (!edev)
+		return;
+
+	for (i = 0; i < BNXT_MAX_ULP; i++) {
+		struct bnxt_ulp *ulp = &edev->ulp_tbl[i];
+
+		rcu_read_lock();
+		ops = rcu_dereference(ulp->ulp_ops);
+		if (!ops || !ops->ulp_sriov_config) {
+			rcu_read_unlock();
+			continue;
+		}
+		bnxt_ulp_get(ulp);
+		rcu_read_unlock();
+		ops->ulp_sriov_config(ulp->handle, num_vfs);
+		bnxt_ulp_put(ulp);
+	}
+}
+
+void bnxt_ulp_async_events(struct bnxt *bp, struct hwrm_async_event_cmpl *cmpl)
+{
+	u16 event_id = le16_to_cpu(cmpl->event_id);
+	struct bnxt_en_dev *edev = bp->edev;
+	struct bnxt_ulp_ops *ops;
+	int i;
+
+	if (!edev)
+		return;
+
+	rcu_read_lock();
+	for (i = 0; i < BNXT_MAX_ULP; i++) {
+		struct bnxt_ulp *ulp = &edev->ulp_tbl[i];
+
+		ops = rcu_dereference(ulp->ulp_ops);
+		if (!ops || !ops->ulp_async_notifier)
+			continue;
+		if (!ulp->async_events_bmap ||
+		    event_id > ulp->max_async_event_id)
+			continue;
+
+		/* Read max_async_event_id first before testing the bitmap. */
+		smp_rmb();
+		if (test_bit(event_id, ulp->async_events_bmap))
+			ops->ulp_async_notifier(ulp->handle, cmpl);
+	}
+	rcu_read_unlock();
+}
+
+static int bnxt_register_async_events(struct bnxt_en_dev *edev, int ulp_id,
+				      unsigned long *events_bmap, u16 max_id)
+{
+	struct net_device *dev = edev->net;
+	struct bnxt *bp = netdev_priv(dev);
+	struct bnxt_ulp *ulp;
+
+	if (ulp_id >= BNXT_MAX_ULP)
+		return -EINVAL;
+
+	ulp = &edev->ulp_tbl[ulp_id];
+	ulp->async_events_bmap = events_bmap;
+	/* Make sure bnxt_ulp_async_events() sees this order */
+	smp_wmb();
+	ulp->max_async_event_id = max_id;
+	bnxt_hwrm_func_rgtr_async_events(bp, events_bmap, max_id + 1);
+	return 0;
+}
+
+static const struct bnxt_en_ops bnxt_en_ops_tbl = {
+	.bnxt_register_device	= bnxt_register_dev,
+	.bnxt_unregister_device	= bnxt_unregister_dev,
+	.bnxt_request_msix	= bnxt_req_msix_vecs,
+	.bnxt_free_msix		= bnxt_free_msix_vecs,
+	.bnxt_send_fw_msg	= bnxt_send_msg,
+	.bnxt_register_fw_async_events	= bnxt_register_async_events,
+};
+
+struct bnxt_en_dev *bnxt_ulp_probe(struct net_device *dev)
+{
+	struct bnxt *bp = netdev_priv(dev);
+	struct bnxt_en_dev *edev;
+
+	edev = bp->edev;
+	if (!edev) {
+		edev = kzalloc(sizeof(*edev), GFP_KERNEL);
+		if (!edev)
+			return ERR_PTR(-ENOMEM);
+		edev->en_ops = &bnxt_en_ops_tbl;
+		if (bp->flags & BNXT_FLAG_ROCEV1_CAP)
+			edev->flags |= BNXT_EN_FLAG_ROCEV1_CAP;
+		if (bp->flags & BNXT_FLAG_ROCEV2_CAP)
+			edev->flags |= BNXT_EN_FLAG_ROCEV2_CAP;
+		edev->net = dev;
+		edev->pdev = bp->pdev;
+		bp->edev = edev;
+	}
+	return bp->edev;
+}
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h
new file mode 100644
index 000000000000..74f816e46a33
--- /dev/null
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h
@@ -0,0 +1,93 @@
+/* Broadcom NetXtreme-C/E network driver.
+ *
+ * Copyright (c) 2016 Broadcom Limited
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation.
+ */
+
+#ifndef BNXT_ULP_H
+#define BNXT_ULP_H
+
+#define BNXT_ROCE_ULP	0
+#define BNXT_OTHER_ULP	1
+#define BNXT_MAX_ULP	2
+
+#define BNXT_MIN_ROCE_CP_RINGS	2
+#define BNXT_MIN_ROCE_STAT_CTXS	1
+
+struct hwrm_async_event_cmpl;
+struct bnxt;
+
+struct bnxt_ulp_ops {
+	/* async_notifier() cannot sleep (in BH context) */
+	void (*ulp_async_notifier)(void *, struct hwrm_async_event_cmpl *);
+	void (*ulp_stop)(void *);
+	void (*ulp_start)(void *);
+	void (*ulp_sriov_config)(void *, int);
+};
+
+struct bnxt_msix_entry {
+	u32	vector;
+	u32	ring_idx;
+	u32	db_offset;
+};
+
+struct bnxt_fw_msg {
+	void	*msg;
+	int	msg_len;
+	void	*resp;
+	int	resp_max_len;
+	int	timeout;
+};
+
+struct bnxt_ulp {
+	void		*handle;
+	struct bnxt_ulp_ops __rcu *ulp_ops;
+	unsigned long	*async_events_bmap;
+	u16		max_async_event_id;
+	u16		msix_requested;
+	atomic_t	ref_count;
+};
+
+struct bnxt_en_dev {
+	struct net_device *net;
+	struct pci_dev *pdev;
+	u32 flags;
+	#define BNXT_EN_FLAG_ROCEV1_CAP		0x1
+	#define BNXT_EN_FLAG_ROCEV2_CAP		0x2
+	#define BNXT_EN_FLAG_ROCE_CAP		(BNXT_EN_FLAG_ROCEV1_CAP | \
+						 BNXT_EN_FLAG_ROCEV2_CAP)
+	const struct bnxt_en_ops	*en_ops;
+	struct bnxt_ulp			ulp_tbl[BNXT_MAX_ULP];
+};
+
+struct bnxt_en_ops {
+	int (*bnxt_register_device)(struct bnxt_en_dev *, int,
+				    struct bnxt_ulp_ops *, void *);
+	int (*bnxt_unregister_device)(struct bnxt_en_dev *, int);
+	int (*bnxt_request_msix)(struct bnxt_en_dev *, int,
+				 struct bnxt_msix_entry *, int);
+	int (*bnxt_free_msix)(struct bnxt_en_dev *, int);
+	int (*bnxt_send_fw_msg)(struct bnxt_en_dev *, int,
+				struct bnxt_fw_msg *);
+	int (*bnxt_register_fw_async_events)(struct bnxt_en_dev *, int,
+					     unsigned long *, u16);
+};
+
+static inline bool bnxt_ulp_registered(struct bnxt_en_dev *edev, int ulp_id)
+{
+	if (edev && rcu_access_pointer(edev->ulp_tbl[ulp_id].ulp_ops))
+		return true;
+	return false;
+}
+
+void bnxt_subtract_ulp_resources(struct bnxt *bp, int ulp_id);
+void bnxt_ulp_stop(struct bnxt *bp);
+void bnxt_ulp_start(struct bnxt *bp);
+void bnxt_ulp_sriov_cfg(struct bnxt *bp, int num_vfs);
+void bnxt_ulp_async_events(struct bnxt *bp, struct hwrm_async_event_cmpl *cmpl);
+struct bnxt_en_dev *bnxt_ulp_probe(struct net_device *dev);
+
+#endif
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 4464bc5db934..f92896835d2a 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -971,13 +971,6 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e)
 	return phy_ethtool_set_eee(priv->phydev, e);
 }
 
-static int bcmgenet_nway_reset(struct net_device *dev)
-{
-	struct bcmgenet_priv *priv = netdev_priv(dev);
-
-	return genphy_restart_aneg(priv->phydev);
-}
-
 /* standard ethtool support functions. */
 static const struct ethtool_ops bcmgenet_ethtool_ops = {
 	.get_strings		= bcmgenet_get_strings,
@@ -991,7 +984,7 @@ static const struct ethtool_ops bcmgenet_ethtool_ops = {
 	.set_wol		= bcmgenet_set_wol,
 	.get_eee		= bcmgenet_get_eee,
 	.set_eee		= bcmgenet_set_eee,
-	.nway_reset		= bcmgenet_nway_reset,
+	.nway_reset		= phy_ethtool_nway_reset,
 	.get_coalesce		= bcmgenet_get_coalesce,
 	.set_coalesce		= bcmgenet_set_coalesce,
 	.get_link_ksettings	= bcmgenet_get_link_ksettings,
@@ -1172,6 +1165,7 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev,
 					  struct bcmgenet_tx_ring *ring)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
+	struct device *kdev = &priv->pdev->dev;
 	struct enet_cb *tx_cb_ptr;
 	struct netdev_queue *txq;
 	unsigned int pkts_compl = 0;
@@ -1199,13 +1193,13 @@ static unsigned int __bcmgenet_tx_reclaim(struct net_device *dev,
 		if (tx_cb_ptr->skb) {
 			pkts_compl++;
 			bytes_compl += GENET_CB(tx_cb_ptr->skb)->bytes_sent;
-			dma_unmap_single(&dev->dev,
+			dma_unmap_single(kdev,
 					 dma_unmap_addr(tx_cb_ptr, dma_addr),
 					 dma_unmap_len(tx_cb_ptr, dma_len),
 					 DMA_TO_DEVICE);
 			bcmgenet_free_cb(tx_cb_ptr);
 		} else if (dma_unmap_addr(tx_cb_ptr, dma_addr)) {
-			dma_unmap_page(&dev->dev,
+			dma_unmap_page(kdev,
 				       dma_unmap_addr(tx_cb_ptr, dma_addr),
 				       dma_unmap_len(tx_cb_ptr, dma_len),
 				       DMA_TO_DEVICE);
@@ -1775,6 +1769,7 @@ static int bcmgenet_alloc_rx_buffers(struct bcmgenet_priv *priv,
 
 static void bcmgenet_free_rx_buffers(struct bcmgenet_priv *priv)
 {
+	struct device *kdev = &priv->pdev->dev;
 	struct enet_cb *cb;
 	int i;
 
@@ -1782,7 +1777,7 @@ static void bcmgenet_free_rx_buffers(struct bcmgenet_priv *priv)
 		cb = &priv->rx_cbs[i];
 
 		if (dma_unmap_addr(cb, dma_addr)) {
-			dma_unmap_single(&priv->dev->dev,
+			dma_unmap_single(kdev,
 					 dma_unmap_addr(cb, dma_addr),
 					 priv->rx_buf_len, DMA_FROM_DEVICE);
 			dma_unmap_addr_set(cb, dma_addr, 0);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index 457c3bc8cfff..e87607621e62 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -542,8 +542,10 @@ static int bcmgenet_mii_of_init(struct bcmgenet_priv *priv)
 	/* Make sure we initialize MoCA PHYs with a link down */
 	if (phy_mode == PHY_INTERFACE_MODE_MOCA) {
 		phydev = of_phy_find_device(dn);
-		if (phydev)
+		if (phydev) {
 			phydev->link = 0;
+			put_device(&phydev->mdio.dev);
+		}
 	}
 
 	return 0;
@@ -625,6 +627,7 @@ static int bcmgenet_mii_bus_init(struct bcmgenet_priv *priv)
 int bcmgenet_mii_init(struct net_device *dev)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
+	struct device_node *dn = priv->pdev->dev.of_node;
 	int ret;
 
 	ret = bcmgenet_mii_alloc(priv);
@@ -638,6 +641,8 @@ int bcmgenet_mii_init(struct net_device *dev)
 	return 0;
 
 out:
+	if (of_phy_is_fixed_link(dn))
+		of_phy_deregister_fixed_link(dn);
 	of_node_put(priv->phy_dn);
 	mdiobus_unregister(priv->mii_bus);
 	mdiobus_free(priv->mii_bus);
@@ -647,7 +652,10 @@ out:
 void bcmgenet_mii_exit(struct net_device *dev)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
+	struct device_node *dn = priv->pdev->dev.of_node;
 
+	if (of_phy_is_fixed_link(dn))
+		of_phy_deregister_fixed_link(dn);
 	of_node_put(priv->phy_dn);
 	mdiobus_unregister(priv->mii_bus);
 	mdiobus_free(priv->mii_bus);
diff --git a/drivers/net/ethernet/broadcom/sb1250-mac.c b/drivers/net/ethernet/broadcom/sb1250-mac.c
index f1b81187a201..435a2e4739d1 100644
--- a/drivers/net/ethernet/broadcom/sb1250-mac.c
+++ b/drivers/net/ethernet/broadcom/sb1250-mac.c
@@ -2147,15 +2147,6 @@ static void sbmac_setmulti(struct sbmac_softc *sc)
 	}
 }
 
-static int sb1250_change_mtu(struct net_device *_dev, int new_mtu)
-{
-	if (new_mtu >  ENET_PACKET_SIZE)
-		return -EINVAL;
-	_dev->mtu = new_mtu;
-	pr_info("changing the mtu to %d\n", new_mtu);
-	return 0;
-}
-
 static const struct net_device_ops sbmac_netdev_ops = {
 	.ndo_open		= sbmac_open,
 	.ndo_stop		= sbmac_close,
@@ -2163,7 +2154,6 @@ static const struct net_device_ops sbmac_netdev_ops = {
 	.ndo_set_rx_mode	= sbmac_set_rx_mode,
 	.ndo_tx_timeout		= sbmac_tx_timeout,
 	.ndo_do_ioctl		= sbmac_mii_ioctl,
-	.ndo_change_mtu		= sb1250_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -2229,6 +2219,8 @@ static int sbmac_init(struct platform_device *pldev, long long base)
 
 	dev->netdev_ops = &sbmac_netdev_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
+	dev->min_mtu = 0;
+	dev->max_mtu = ENET_PACKET_SIZE;
 
 	netif_napi_add(dev, &sc->napi, sbmac_poll, 16);
 
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index a927a730da10..185e9e047aa9 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -124,7 +124,7 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS flag, unsigned long *bits)
 #define TG3_TX_TIMEOUT			(5 * HZ)
 
 /* hardware minimum and maximum for a single frame's data payload */
-#define TG3_MIN_MTU			60
+#define TG3_MIN_MTU			ETH_ZLEN
 #define TG3_MAX_MTU(tp)	\
 	(tg3_flag(tp, JUMBO_CAPABLE) ? 9000 : 1500)
 
@@ -14199,9 +14199,6 @@ static int tg3_change_mtu(struct net_device *dev, int new_mtu)
 	int err;
 	bool reset_phy = false;
 
-	if (new_mtu < TG3_MIN_MTU || new_mtu > TG3_MAX_MTU(tp))
-		return -EINVAL;
-
 	if (!netif_running(dev)) {
 		/* We'll just catch it later when the
 		 * device is up'd.
@@ -17799,6 +17796,10 @@ static int tg3_init_one(struct pci_dev *pdev,
 	dev->hw_features |= features;
 	dev->priv_flags |= IFF_UNICAST_FLT;
 
+	/* MTU range: 60 - 9000 or 1500, depending on hardware */
+	dev->min_mtu = TG3_MIN_MTU;
+	dev->max_mtu = TG3_MAX_MTU(tp);
+
 	if (tg3_chip_rev_id(tp) == CHIPREV_ID_5705_A1 &&
 	    !tg3_flag(tp, TSO_CAPABLE) &&
 	    !(tr32(TG3PCI_PCISTATE) & PCISTATE_BUS_SPEED_HIGH)) {
diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c
index f42f672b0e7e..112030828c4b 100644
--- a/drivers/net/ethernet/brocade/bna/bnad.c
+++ b/drivers/net/ethernet/brocade/bna/bnad.c
@@ -3296,9 +3296,6 @@ bnad_change_mtu(struct net_device *netdev, int new_mtu)
 	struct bnad *bnad = netdev_priv(netdev);
 	u32 rx_count = 0, frame, new_frame;
 
-	if (new_mtu + ETH_HLEN < ETH_ZLEN || new_mtu > BNAD_JUMBO_MTU)
-		return -EINVAL;
-
 	mutex_lock(&bnad->conf_mutex);
 
 	mtu = netdev->mtu;
@@ -3465,6 +3462,10 @@ bnad_netdev_init(struct bnad *bnad, bool using_dac)
 	netdev->mem_start = bnad->mmio_start;
 	netdev->mem_end = bnad->mmio_start + bnad->mmio_len - 1;
 
+	/* MTU range: 46 - 9000 */
+	netdev->min_mtu = ETH_ZLEN - ETH_HLEN;
+	netdev->max_mtu = BNAD_JUMBO_MTU;
+
 	netdev->netdev_ops = &bnad_netdev_ops;
 	bnad_set_ethtool_ops(netdev);
 }
diff --git a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c
index 31f61a744d66..286593922139 100644
--- a/drivers/net/ethernet/brocade/bna/bnad_ethtool.c
+++ b/drivers/net/ethernet/brocade/bna/bnad_ethtool.c
@@ -240,40 +240,46 @@ static const char *bnad_net_stats_strings[] = {
 #define BNAD_ETHTOOL_STATS_NUM	ARRAY_SIZE(bnad_net_stats_strings)
 
 static int
-bnad_get_settings(struct net_device *netdev, struct ethtool_cmd *cmd)
+bnad_get_link_ksettings(struct net_device *netdev,
+			struct ethtool_link_ksettings *cmd)
 {
-	cmd->supported = SUPPORTED_10000baseT_Full;
-	cmd->advertising = ADVERTISED_10000baseT_Full;
-	cmd->autoneg = AUTONEG_DISABLE;
-	cmd->supported |= SUPPORTED_FIBRE;
-	cmd->advertising |= ADVERTISED_FIBRE;
-	cmd->port = PORT_FIBRE;
-	cmd->phy_address = 0;
+	u32 supported, advertising;
+
+	supported = SUPPORTED_10000baseT_Full;
+	advertising = ADVERTISED_10000baseT_Full;
+	cmd->base.autoneg = AUTONEG_DISABLE;
+	supported |= SUPPORTED_FIBRE;
+	advertising |= ADVERTISED_FIBRE;
+	cmd->base.port = PORT_FIBRE;
+	cmd->base.phy_address = 0;
 
 	if (netif_carrier_ok(netdev)) {
-		ethtool_cmd_speed_set(cmd, SPEED_10000);
-		cmd->duplex = DUPLEX_FULL;
+		cmd->base.speed = SPEED_10000;
+		cmd->base.duplex = DUPLEX_FULL;
 	} else {
-		ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN);
-		cmd->duplex = DUPLEX_UNKNOWN;
+		cmd->base.speed = SPEED_UNKNOWN;
+		cmd->base.duplex = DUPLEX_UNKNOWN;
 	}
-	cmd->transceiver = XCVR_EXTERNAL;
-	cmd->maxtxpkt = 0;
-	cmd->maxrxpkt = 0;
+
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						supported);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+						advertising);
 
 	return 0;
 }
 
 static int
-bnad_set_settings(struct net_device *netdev, struct ethtool_cmd *cmd)
+bnad_set_link_ksettings(struct net_device *netdev,
+			const struct ethtool_link_ksettings *cmd)
 {
 	/* 10G full duplex setting supported only */
-	if (cmd->autoneg == AUTONEG_ENABLE)
-		return -EOPNOTSUPP; else {
-		if ((ethtool_cmd_speed(cmd) == SPEED_10000)
-		    && (cmd->duplex == DUPLEX_FULL))
-			return 0;
-	}
+	if (cmd->base.autoneg == AUTONEG_ENABLE)
+		return -EOPNOTSUPP;
+
+	if ((cmd->base.speed == SPEED_10000) &&
+	    (cmd->base.duplex == DUPLEX_FULL))
+		return 0;
 
 	return -EOPNOTSUPP;
 }
@@ -1118,8 +1124,6 @@ out:
 }
 
 static const struct ethtool_ops bnad_ethtool_ops = {
-	.get_settings = bnad_get_settings,
-	.set_settings = bnad_set_settings,
 	.get_drvinfo = bnad_get_drvinfo,
 	.get_wol = bnad_get_wol,
 	.get_link = ethtool_op_get_link,
@@ -1137,6 +1141,8 @@ static const struct ethtool_ops bnad_ethtool_ops = {
 	.set_eeprom = bnad_set_eeprom,
 	.flash_device = bnad_flash_device,
 	.get_ts_info = ethtool_op_get_ts_info,
+	.get_link_ksettings = bnad_get_link_ksettings,
+	.set_link_ksettings = bnad_set_link_ksettings,
 };
 
 void
diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c
index 533653bd7aec..538544a7c642 100644
--- a/drivers/net/ethernet/cadence/macb.c
+++ b/drivers/net/ethernet/cadence/macb.c
@@ -32,19 +32,28 @@
 #include <linux/of_gpio.h>
 #include <linux/of_mdio.h>
 #include <linux/of_net.h>
-
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
 #include "macb.h"
 
 #define MACB_RX_BUFFER_SIZE	128
 #define RX_BUFFER_MULTIPLE	64  /* bytes */
-#define RX_RING_SIZE		512 /* must be power of 2 */
-#define RX_RING_BYTES		(sizeof(struct macb_dma_desc) * RX_RING_SIZE)
 
-#define TX_RING_SIZE		128 /* must be power of 2 */
-#define TX_RING_BYTES		(sizeof(struct macb_dma_desc) * TX_RING_SIZE)
+#define DEFAULT_RX_RING_SIZE	512 /* must be power of 2 */
+#define MIN_RX_RING_SIZE	64
+#define MAX_RX_RING_SIZE	8192
+#define RX_RING_BYTES(bp)	(sizeof(struct macb_dma_desc)	\
+				 * (bp)->rx_ring_size)
+
+#define DEFAULT_TX_RING_SIZE	512 /* must be power of 2 */
+#define MIN_TX_RING_SIZE	64
+#define MAX_TX_RING_SIZE	4096
+#define TX_RING_BYTES(bp)	(sizeof(struct macb_dma_desc)	\
+				 * (bp)->tx_ring_size)
 
 /* level of occupied TX descriptors under which we wake up TX process */
-#define MACB_TX_WAKEUP_THRESH	(3 * TX_RING_SIZE / 4)
+#define MACB_TX_WAKEUP_THRESH(bp)	(3 * (bp)->tx_ring_size / 4)
 
 #define MACB_RX_INT_FLAGS	(MACB_BIT(RCOMP) | MACB_BIT(RXUBR)	\
 				 | MACB_BIT(ISR_ROVR))
@@ -53,10 +62,13 @@
 					| MACB_BIT(TXERR))
 #define MACB_TX_INT_FLAGS	(MACB_TX_ERR_FLAGS | MACB_BIT(TCOMP))
 
-#define MACB_MAX_TX_LEN		((unsigned int)((1 << MACB_TX_FRMLEN_SIZE) - 1))
-#define GEM_MAX_TX_LEN		((unsigned int)((1 << GEM_TX_FRMLEN_SIZE) - 1))
+/* Max length of transmit frame must be a multiple of 8 bytes */
+#define MACB_TX_LEN_ALIGN	8
+#define MACB_MAX_TX_LEN		((unsigned int)((1 << MACB_TX_FRMLEN_SIZE) - 1) & ~((unsigned int)(MACB_TX_LEN_ALIGN - 1)))
+#define GEM_MAX_TX_LEN		((unsigned int)((1 << GEM_TX_FRMLEN_SIZE) - 1) & ~((unsigned int)(MACB_TX_LEN_ALIGN - 1)))
 
-#define GEM_MTU_MIN_SIZE	68
+#define GEM_MTU_MIN_SIZE	ETH_MIN_MTU
+#define MACB_NETIF_LSO		(NETIF_F_TSO | NETIF_F_UFO)
 
 #define MACB_WOL_HAS_MAGIC_PACKET	(0x1 << 0)
 #define MACB_WOL_ENABLED		(0x1 << 1)
@@ -67,45 +79,47 @@
 #define MACB_HALT_TIMEOUT	1230
 
 /* Ring buffer accessors */
-static unsigned int macb_tx_ring_wrap(unsigned int index)
+static unsigned int macb_tx_ring_wrap(struct macb *bp, unsigned int index)
 {
-	return index & (TX_RING_SIZE - 1);
+	return index & (bp->tx_ring_size - 1);
 }
 
 static struct macb_dma_desc *macb_tx_desc(struct macb_queue *queue,
 					  unsigned int index)
 {
-	return &queue->tx_ring[macb_tx_ring_wrap(index)];
+	return &queue->tx_ring[macb_tx_ring_wrap(queue->bp, index)];
 }
 
 static struct macb_tx_skb *macb_tx_skb(struct macb_queue *queue,
 				       unsigned int index)
 {
-	return &queue->tx_skb[macb_tx_ring_wrap(index)];
+	return &queue->tx_skb[macb_tx_ring_wrap(queue->bp, index)];
 }
 
 static dma_addr_t macb_tx_dma(struct macb_queue *queue, unsigned int index)
 {
 	dma_addr_t offset;
 
-	offset = macb_tx_ring_wrap(index) * sizeof(struct macb_dma_desc);
+	offset = macb_tx_ring_wrap(queue->bp, index) *
+		 sizeof(struct macb_dma_desc);
 
 	return queue->tx_ring_dma + offset;
 }
 
-static unsigned int macb_rx_ring_wrap(unsigned int index)
+static unsigned int macb_rx_ring_wrap(struct macb *bp, unsigned int index)
 {
-	return index & (RX_RING_SIZE - 1);
+	return index & (bp->rx_ring_size - 1);
 }
 
 static struct macb_dma_desc *macb_rx_desc(struct macb *bp, unsigned int index)
 {
-	return &bp->rx_ring[macb_rx_ring_wrap(index)];
+	return &bp->rx_ring[macb_rx_ring_wrap(bp, index)];
 }
 
 static void *macb_rx_buffer(struct macb *bp, unsigned int index)
 {
-	return bp->rx_buffers + bp->rx_buffer_size * macb_rx_ring_wrap(index);
+	return bp->rx_buffers + bp->rx_buffer_size *
+	       macb_rx_ring_wrap(bp, index);
 }
 
 /* I/O accessors */
@@ -608,7 +622,8 @@ static void macb_tx_error_task(struct work_struct *work)
 			 */
 			if (!(ctrl & MACB_BIT(TX_BUF_EXHAUSTED))) {
 				netdev_vdbg(bp->dev, "txerr skb %u (data %p) TX complete\n",
-					    macb_tx_ring_wrap(tail), skb->data);
+					    macb_tx_ring_wrap(bp, tail),
+					    skb->data);
 				bp->stats.tx_packets++;
 				bp->stats.tx_bytes += skb->len;
 			}
@@ -700,7 +715,8 @@ static void macb_tx_interrupt(struct macb_queue *queue)
 			/* First, update TX stats if needed */
 			if (skb) {
 				netdev_vdbg(bp->dev, "skb %u (data %p) TX complete\n",
-					    macb_tx_ring_wrap(tail), skb->data);
+					    macb_tx_ring_wrap(bp, tail),
+					    skb->data);
 				bp->stats.tx_packets++;
 				bp->stats.tx_bytes += skb->len;
 			}
@@ -720,7 +736,7 @@ static void macb_tx_interrupt(struct macb_queue *queue)
 	queue->tx_tail = tail;
 	if (__netif_subqueue_stopped(bp->dev, queue_index) &&
 	    CIRC_CNT(queue->tx_head, queue->tx_tail,
-		     TX_RING_SIZE) <= MACB_TX_WAKEUP_THRESH)
+		     bp->tx_ring_size) <= MACB_TX_WAKEUP_THRESH(bp))
 		netif_wake_subqueue(bp->dev, queue_index);
 }
 
@@ -731,8 +747,8 @@ static void gem_rx_refill(struct macb *bp)
 	dma_addr_t		paddr;
 
 	while (CIRC_SPACE(bp->rx_prepared_head, bp->rx_tail,
-			  RX_RING_SIZE) > 0) {
-		entry = macb_rx_ring_wrap(bp->rx_prepared_head);
+			  bp->rx_ring_size) > 0) {
+		entry = macb_rx_ring_wrap(bp, bp->rx_prepared_head);
 
 		/* Make hw descriptor updates visible to CPU */
 		rmb();
@@ -759,7 +775,7 @@ static void gem_rx_refill(struct macb *bp)
 
 			bp->rx_skbuff[entry] = skb;
 
-			if (entry == RX_RING_SIZE - 1)
+			if (entry == bp->rx_ring_size - 1)
 				paddr |= MACB_BIT(RX_WRAP);
 			macb_set_addr(&(bp->rx_ring[entry]), paddr);
 			bp->rx_ring[entry].ctrl = 0;
@@ -813,7 +829,7 @@ static int gem_rx(struct macb *bp, int budget)
 		dma_addr_t addr;
 		bool rxused;
 
-		entry = macb_rx_ring_wrap(bp->rx_tail);
+		entry = macb_rx_ring_wrap(bp, bp->rx_tail);
 		desc = &bp->rx_ring[entry];
 
 		/* Make hw descriptor updates visible to CPU */
@@ -895,8 +911,8 @@ static int macb_rx_frame(struct macb *bp, unsigned int first_frag,
 	len = desc->ctrl & bp->rx_frm_len_mask;
 
 	netdev_vdbg(bp->dev, "macb_rx_frame frags %u - %u (len %u)\n",
-		    macb_rx_ring_wrap(first_frag),
-		    macb_rx_ring_wrap(last_frag), len);
+		macb_rx_ring_wrap(bp, first_frag),
+		macb_rx_ring_wrap(bp, last_frag), len);
 
 	/* The ethernet header starts NET_IP_ALIGN bytes into the
 	 * first buffer. Since the header is 14 bytes, this makes the
@@ -969,12 +985,13 @@ static inline void macb_init_rx_ring(struct macb *bp)
 	int i;
 
 	addr = bp->rx_buffers_dma;
-	for (i = 0; i < RX_RING_SIZE; i++) {
+	for (i = 0; i < bp->rx_ring_size; i++) {
 		bp->rx_ring[i].addr = addr;
 		bp->rx_ring[i].ctrl = 0;
 		addr += bp->rx_buffer_size;
 	}
-	bp->rx_ring[RX_RING_SIZE - 1].addr |= MACB_BIT(RX_WRAP);
+	bp->rx_ring[bp->rx_ring_size - 1].addr |= MACB_BIT(RX_WRAP);
+	bp->rx_tail = 0;
 }
 
 static int macb_rx(struct macb *bp, int budget)
@@ -1156,6 +1173,7 @@ static irqreturn_t macb_interrupt(int irq, void *dev_id)
 		if (status & MACB_BIT(RXUBR)) {
 			ctrl = macb_readl(bp, NCR);
 			macb_writel(bp, NCR, ctrl & ~MACB_BIT(RE));
+			wmb();
 			macb_writel(bp, NCR, ctrl | MACB_BIT(RE));
 
 			if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE)
@@ -1212,7 +1230,8 @@ static void macb_poll_controller(struct net_device *dev)
 
 static unsigned int macb_tx_map(struct macb *bp,
 				struct macb_queue *queue,
-				struct sk_buff *skb)
+				struct sk_buff *skb,
+				unsigned int hdrlen)
 {
 	dma_addr_t mapping;
 	unsigned int len, entry, i, tx_head = queue->tx_head;
@@ -1220,15 +1239,28 @@ static unsigned int macb_tx_map(struct macb *bp,
 	struct macb_dma_desc *desc;
 	unsigned int offset, size, count = 0;
 	unsigned int f, nr_frags = skb_shinfo(skb)->nr_frags;
-	unsigned int eof = 1;
-	u32 ctrl;
+	unsigned int eof = 1, mss_mfs = 0;
+	u32 ctrl, lso_ctrl = 0, seq_ctrl = 0;
+
+	/* LSO */
+	if (skb_shinfo(skb)->gso_size != 0) {
+		if (ip_hdr(skb)->protocol == IPPROTO_UDP)
+			/* UDP - UFO */
+			lso_ctrl = MACB_LSO_UFO_ENABLE;
+		else
+			/* TCP - TSO */
+			lso_ctrl = MACB_LSO_TSO_ENABLE;
+	}
 
 	/* First, map non-paged data */
 	len = skb_headlen(skb);
+
+	/* first buffer length */
+	size = hdrlen;
+
 	offset = 0;
 	while (len) {
-		size = min(len, bp->max_tx_length);
-		entry = macb_tx_ring_wrap(tx_head);
+		entry = macb_tx_ring_wrap(bp, tx_head);
 		tx_skb = &queue->tx_skb[entry];
 
 		mapping = dma_map_single(&bp->pdev->dev,
@@ -1247,6 +1279,8 @@ static unsigned int macb_tx_map(struct macb *bp,
 		offset += size;
 		count++;
 		tx_head++;
+
+		size = min(len, bp->max_tx_length);
 	}
 
 	/* Then, map paged data from fragments */
@@ -1257,7 +1291,7 @@ static unsigned int macb_tx_map(struct macb *bp,
 		offset = 0;
 		while (len) {
 			size = min(len, bp->max_tx_length);
-			entry = macb_tx_ring_wrap(tx_head);
+			entry = macb_tx_ring_wrap(bp, tx_head);
 			tx_skb = &queue->tx_skb[entry];
 
 			mapping = skb_frag_dma_map(&bp->pdev->dev, frag,
@@ -1295,14 +1329,29 @@ static unsigned int macb_tx_map(struct macb *bp,
 	 * to set the end of TX queue
 	 */
 	i = tx_head;
-	entry = macb_tx_ring_wrap(i);
+	entry = macb_tx_ring_wrap(bp, i);
 	ctrl = MACB_BIT(TX_USED);
 	desc = &queue->tx_ring[entry];
 	desc->ctrl = ctrl;
 
+	if (lso_ctrl) {
+		if (lso_ctrl == MACB_LSO_UFO_ENABLE)
+			/* include header and FCS in value given to h/w */
+			mss_mfs = skb_shinfo(skb)->gso_size +
+					skb_transport_offset(skb) +
+					ETH_FCS_LEN;
+		else /* TSO */ {
+			mss_mfs = skb_shinfo(skb)->gso_size;
+			/* TCP Sequence Number Source Select
+			 * can be set only for TSO
+			 */
+			seq_ctrl = 0;
+		}
+	}
+
 	do {
 		i--;
-		entry = macb_tx_ring_wrap(i);
+		entry = macb_tx_ring_wrap(bp, i);
 		tx_skb = &queue->tx_skb[entry];
 		desc = &queue->tx_ring[entry];
 
@@ -1311,9 +1360,19 @@ static unsigned int macb_tx_map(struct macb *bp,
 			ctrl |= MACB_BIT(TX_LAST);
 			eof = 0;
 		}
-		if (unlikely(entry == (TX_RING_SIZE - 1)))
+		if (unlikely(entry == (bp->tx_ring_size - 1)))
 			ctrl |= MACB_BIT(TX_WRAP);
 
+		/* First descriptor is header descriptor */
+		if (i == queue->tx_head) {
+			ctrl |= MACB_BF(TX_LSO, lso_ctrl);
+			ctrl |= MACB_BF(TX_TCP_SEQ_SRC, seq_ctrl);
+		} else
+			/* Only set MSS/MFS on payload descriptors
+			 * (second or later descriptor)
+			 */
+			ctrl |= MACB_BF(MSS_MFS, mss_mfs);
+
 		/* Set TX buffer descriptor */
 		macb_set_addr(desc, tx_skb->mapping);
 		/* desc->addr must be visible to hardware before clearing
@@ -1339,6 +1398,43 @@ dma_error:
 	return 0;
 }
 
+static netdev_features_t macb_features_check(struct sk_buff *skb,
+					     struct net_device *dev,
+					     netdev_features_t features)
+{
+	unsigned int nr_frags, f;
+	unsigned int hdrlen;
+
+	/* Validate LSO compatibility */
+
+	/* there is only one buffer */
+	if (!skb_is_nonlinear(skb))
+		return features;
+
+	/* length of header */
+	hdrlen = skb_transport_offset(skb);
+	if (ip_hdr(skb)->protocol == IPPROTO_TCP)
+		hdrlen += tcp_hdrlen(skb);
+
+	/* For LSO:
+	 * When software supplies two or more payload buffers all payload buffers
+	 * apart from the last must be a multiple of 8 bytes in size.
+	 */
+	if (!IS_ALIGNED(skb_headlen(skb) - hdrlen, MACB_TX_LEN_ALIGN))
+		return features & ~MACB_NETIF_LSO;
+
+	nr_frags = skb_shinfo(skb)->nr_frags;
+	/* No need to check last fragment */
+	nr_frags--;
+	for (f = 0; f < nr_frags; f++) {
+		const skb_frag_t *frag = &skb_shinfo(skb)->frags[f];
+
+		if (!IS_ALIGNED(skb_frag_size(frag), MACB_TX_LEN_ALIGN))
+			return features & ~MACB_NETIF_LSO;
+	}
+	return features;
+}
+
 static inline int macb_clear_csum(struct sk_buff *skb)
 {
 	/* no change for packets without checksum offloading */
@@ -1363,7 +1459,28 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct macb *bp = netdev_priv(dev);
 	struct macb_queue *queue = &bp->queues[queue_index];
 	unsigned long flags;
-	unsigned int count, nr_frags, frag_size, f;
+	unsigned int desc_cnt, nr_frags, frag_size, f;
+	unsigned int hdrlen;
+	bool is_lso, is_udp = 0;
+
+	is_lso = (skb_shinfo(skb)->gso_size != 0);
+
+	if (is_lso) {
+		is_udp = !!(ip_hdr(skb)->protocol == IPPROTO_UDP);
+
+		/* length of headers */
+		if (is_udp)
+			/* only queue eth + ip headers separately for UDP */
+			hdrlen = skb_transport_offset(skb);
+		else
+			hdrlen = skb_transport_offset(skb) + tcp_hdrlen(skb);
+		if (skb_headlen(skb) < hdrlen) {
+			netdev_err(bp->dev, "Error - LSO headers fragmented!!!\n");
+			/* if this is required, would need to copy to single buffer */
+			return NETDEV_TX_BUSY;
+		}
+	} else
+		hdrlen = min(skb_headlen(skb), bp->max_tx_length);
 
 #if defined(DEBUG) && defined(VERBOSE_DEBUG)
 	netdev_vdbg(bp->dev,
@@ -1378,17 +1495,22 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	 * socket buffer: skb fragments of jumbo frames may need to be
 	 * split into many buffer descriptors.
 	 */
-	count = DIV_ROUND_UP(skb_headlen(skb), bp->max_tx_length);
+	if (is_lso && (skb_headlen(skb) > hdrlen))
+		/* extra header descriptor if also payload in first buffer */
+		desc_cnt = DIV_ROUND_UP((skb_headlen(skb) - hdrlen), bp->max_tx_length) + 1;
+	else
+		desc_cnt = DIV_ROUND_UP(skb_headlen(skb), bp->max_tx_length);
 	nr_frags = skb_shinfo(skb)->nr_frags;
 	for (f = 0; f < nr_frags; f++) {
 		frag_size = skb_frag_size(&skb_shinfo(skb)->frags[f]);
-		count += DIV_ROUND_UP(frag_size, bp->max_tx_length);
+		desc_cnt += DIV_ROUND_UP(frag_size, bp->max_tx_length);
 	}
 
 	spin_lock_irqsave(&bp->lock, flags);
 
 	/* This is a hard error, log it. */
-	if (CIRC_SPACE(queue->tx_head, queue->tx_tail, TX_RING_SIZE) < count) {
+	if (CIRC_SPACE(queue->tx_head, queue->tx_tail,
+		       bp->tx_ring_size) < desc_cnt) {
 		netif_stop_subqueue(dev, queue_index);
 		spin_unlock_irqrestore(&bp->lock, flags);
 		netdev_dbg(bp->dev, "tx_head = %u, tx_tail = %u\n",
@@ -1402,7 +1524,7 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	/* Map socket buffer for DMA transfer */
-	if (!macb_tx_map(bp, queue, skb)) {
+	if (!macb_tx_map(bp, queue, skb, hdrlen)) {
 		dev_kfree_skb_any(skb);
 		goto unlock;
 	}
@@ -1414,7 +1536,7 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	macb_writel(bp, NCR, macb_readl(bp, NCR) | MACB_BIT(TSTART));
 
-	if (CIRC_SPACE(queue->tx_head, queue->tx_tail, TX_RING_SIZE) < 1)
+	if (CIRC_SPACE(queue->tx_head, queue->tx_tail, bp->tx_ring_size) < 1)
 		netif_stop_subqueue(dev, queue_index);
 
 unlock:
@@ -1453,7 +1575,7 @@ static void gem_free_rx_buffers(struct macb *bp)
 	if (!bp->rx_skbuff)
 		return;
 
-	for (i = 0; i < RX_RING_SIZE; i++) {
+	for (i = 0; i < bp->rx_ring_size; i++) {
 		skb = bp->rx_skbuff[i];
 
 		if (!skb)
@@ -1478,7 +1600,7 @@ static void macb_free_rx_buffers(struct macb *bp)
 {
 	if (bp->rx_buffers) {
 		dma_free_coherent(&bp->pdev->dev,
-				  RX_RING_SIZE * bp->rx_buffer_size,
+				  bp->rx_ring_size * bp->rx_buffer_size,
 				  bp->rx_buffers, bp->rx_buffers_dma);
 		bp->rx_buffers = NULL;
 	}
@@ -1491,7 +1613,7 @@ static void macb_free_consistent(struct macb *bp)
 
 	bp->macbgem_ops.mog_free_rx_buffers(bp);
 	if (bp->rx_ring) {
-		dma_free_coherent(&bp->pdev->dev, RX_RING_BYTES,
+		dma_free_coherent(&bp->pdev->dev, RX_RING_BYTES(bp),
 				  bp->rx_ring, bp->rx_ring_dma);
 		bp->rx_ring = NULL;
 	}
@@ -1500,7 +1622,7 @@ static void macb_free_consistent(struct macb *bp)
 		kfree(queue->tx_skb);
 		queue->tx_skb = NULL;
 		if (queue->tx_ring) {
-			dma_free_coherent(&bp->pdev->dev, TX_RING_BYTES,
+			dma_free_coherent(&bp->pdev->dev, TX_RING_BYTES(bp),
 					  queue->tx_ring, queue->tx_ring_dma);
 			queue->tx_ring = NULL;
 		}
@@ -1511,14 +1633,14 @@ static int gem_alloc_rx_buffers(struct macb *bp)
 {
 	int size;
 
-	size = RX_RING_SIZE * sizeof(struct sk_buff *);
+	size = bp->rx_ring_size * sizeof(struct sk_buff *);
 	bp->rx_skbuff = kzalloc(size, GFP_KERNEL);
 	if (!bp->rx_skbuff)
 		return -ENOMEM;
-
-	netdev_dbg(bp->dev,
-		   "Allocated %d RX struct sk_buff entries at %p\n",
-		   RX_RING_SIZE, bp->rx_skbuff);
+	else
+		netdev_dbg(bp->dev,
+			   "Allocated %d RX struct sk_buff entries at %p\n",
+			   bp->rx_ring_size, bp->rx_skbuff);
 	return 0;
 }
 
@@ -1526,7 +1648,7 @@ static int macb_alloc_rx_buffers(struct macb *bp)
 {
 	int size;
 
-	size = RX_RING_SIZE * bp->rx_buffer_size;
+	size = bp->rx_ring_size * bp->rx_buffer_size;
 	bp->rx_buffers = dma_alloc_coherent(&bp->pdev->dev, size,
 					    &bp->rx_buffers_dma, GFP_KERNEL);
 	if (!bp->rx_buffers)
@@ -1545,7 +1667,7 @@ static int macb_alloc_consistent(struct macb *bp)
 	int size;
 
 	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
-		size = TX_RING_BYTES;
+		size = TX_RING_BYTES(bp);
 		queue->tx_ring = dma_alloc_coherent(&bp->pdev->dev, size,
 						    &queue->tx_ring_dma,
 						    GFP_KERNEL);
@@ -1556,13 +1678,13 @@ static int macb_alloc_consistent(struct macb *bp)
 			   q, size, (unsigned long)queue->tx_ring_dma,
 			   queue->tx_ring);
 
-		size = TX_RING_SIZE * sizeof(struct macb_tx_skb);
+		size = bp->tx_ring_size * sizeof(struct macb_tx_skb);
 		queue->tx_skb = kmalloc(size, GFP_KERNEL);
 		if (!queue->tx_skb)
 			goto out_err;
 	}
 
-	size = RX_RING_BYTES;
+	size = RX_RING_BYTES(bp);
 	bp->rx_ring = dma_alloc_coherent(&bp->pdev->dev, size,
 					 &bp->rx_ring_dma, GFP_KERNEL);
 	if (!bp->rx_ring)
@@ -1588,11 +1710,11 @@ static void gem_init_rings(struct macb *bp)
 	int i;
 
 	for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) {
-		for (i = 0; i < TX_RING_SIZE; i++) {
-			macb_set_addr(&(queue->tx_ring[i]), 0);
+		for (i = 0; i < bp->tx_ring_size; i++) {
+			queue->tx_ring[i].addr = 0;
 			queue->tx_ring[i].ctrl = MACB_BIT(TX_USED);
 		}
-		queue->tx_ring[TX_RING_SIZE - 1].ctrl |= MACB_BIT(TX_WRAP);
+		queue->tx_ring[bp->tx_ring_size - 1].ctrl |= MACB_BIT(TX_WRAP);
 		queue->tx_head = 0;
 		queue->tx_tail = 0;
 	}
@@ -1609,15 +1731,13 @@ static void macb_init_rings(struct macb *bp)
 
 	macb_init_rx_ring(bp);
 
-	for (i = 0; i < TX_RING_SIZE; i++) {
+	for (i = 0; i < bp->tx_ring_size; i++) {
 		bp->queues[0].tx_ring[i].addr = 0;
 		bp->queues[0].tx_ring[i].ctrl = MACB_BIT(TX_USED);
 	}
 	bp->queues[0].tx_head = 0;
 	bp->queues[0].tx_tail = 0;
-	bp->queues[0].tx_ring[TX_RING_SIZE - 1].ctrl |= MACB_BIT(TX_WRAP);
-
-	bp->rx_tail = 0;
+	bp->queues[0].tx_ring[bp->tx_ring_size - 1].ctrl |= MACB_BIT(TX_WRAP);
 }
 
 static void macb_reset_hw(struct macb *bp)
@@ -1986,19 +2106,9 @@ static int macb_close(struct net_device *dev)
 
 static int macb_change_mtu(struct net_device *dev, int new_mtu)
 {
-	struct macb *bp = netdev_priv(dev);
-	u32 max_mtu;
-
 	if (netif_running(dev))
 		return -EBUSY;
 
-	max_mtu = ETH_DATA_LEN;
-	if (bp->caps & MACB_CAPS_JUMBO)
-		max_mtu = gem_readl(bp, JML) - ETH_HLEN - ETH_FCS_LEN;
-
-	if ((new_mtu > max_mtu) || (new_mtu < GEM_MTU_MIN_SIZE))
-		return -EINVAL;
-
 	dev->mtu = new_mtu;
 
 	return 0;
@@ -2158,8 +2268,8 @@ static void macb_get_regs(struct net_device *dev, struct ethtool_regs *regs,
 	regs->version = (macb_readl(bp, MID) & ((1 << MACB_REV_SIZE) - 1))
 			| MACB_GREGS_VERSION;
 
-	tail = macb_tx_ring_wrap(bp->queues[0].tx_tail);
-	head = macb_tx_ring_wrap(bp->queues[0].tx_head);
+	tail = macb_tx_ring_wrap(bp, bp->queues[0].tx_tail);
+	head = macb_tx_ring_wrap(bp, bp->queues[0].tx_head);
 
 	regs_buff[0]  = macb_readl(bp, NCR);
 	regs_buff[1]  = macb_or_gem_readl(bp, NCFGR);
@@ -2214,6 +2324,56 @@ static int macb_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
 	return 0;
 }
 
+static void macb_get_ringparam(struct net_device *netdev,
+			       struct ethtool_ringparam *ring)
+{
+	struct macb *bp = netdev_priv(netdev);
+
+	ring->rx_max_pending = MAX_RX_RING_SIZE;
+	ring->tx_max_pending = MAX_TX_RING_SIZE;
+
+	ring->rx_pending = bp->rx_ring_size;
+	ring->tx_pending = bp->tx_ring_size;
+}
+
+static int macb_set_ringparam(struct net_device *netdev,
+			      struct ethtool_ringparam *ring)
+{
+	struct macb *bp = netdev_priv(netdev);
+	u32 new_rx_size, new_tx_size;
+	unsigned int reset = 0;
+
+	if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
+		return -EINVAL;
+
+	new_rx_size = clamp_t(u32, ring->rx_pending,
+			      MIN_RX_RING_SIZE, MAX_RX_RING_SIZE);
+	new_rx_size = roundup_pow_of_two(new_rx_size);
+
+	new_tx_size = clamp_t(u32, ring->tx_pending,
+			      MIN_TX_RING_SIZE, MAX_TX_RING_SIZE);
+	new_tx_size = roundup_pow_of_two(new_tx_size);
+
+	if ((new_tx_size == bp->tx_ring_size) &&
+	    (new_rx_size == bp->rx_ring_size)) {
+		/* nothing to do */
+		return 0;
+	}
+
+	if (netif_running(bp->dev)) {
+		reset = 1;
+		macb_close(bp->dev);
+	}
+
+	bp->rx_ring_size = new_rx_size;
+	bp->tx_ring_size = new_tx_size;
+
+	if (reset)
+		macb_open(bp->dev);
+
+	return 0;
+}
+
 static const struct ethtool_ops macb_ethtool_ops = {
 	.get_regs_len		= macb_get_regs_len,
 	.get_regs		= macb_get_regs,
@@ -2223,6 +2383,8 @@ static const struct ethtool_ops macb_ethtool_ops = {
 	.set_wol		= macb_set_wol,
 	.get_link_ksettings     = phy_ethtool_get_link_ksettings,
 	.set_link_ksettings     = phy_ethtool_set_link_ksettings,
+	.get_ringparam		= macb_get_ringparam,
+	.set_ringparam		= macb_set_ringparam,
 };
 
 static const struct ethtool_ops gem_ethtool_ops = {
@@ -2235,6 +2397,8 @@ static const struct ethtool_ops gem_ethtool_ops = {
 	.get_sset_count		= gem_get_sset_count,
 	.get_link_ksettings     = phy_ethtool_get_link_ksettings,
 	.set_link_ksettings     = phy_ethtool_set_link_ksettings,
+	.get_ringparam		= macb_get_ringparam,
+	.set_ringparam		= macb_set_ringparam,
 };
 
 static int macb_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
@@ -2298,6 +2462,7 @@ static const struct net_device_ops macb_netdev_ops = {
 	.ndo_poll_controller	= macb_poll_controller,
 #endif
 	.ndo_set_features	= macb_set_features,
+	.ndo_features_check	= macb_features_check,
 };
 
 /* Configure peripheral capabilities according to device tree
@@ -2429,6 +2594,9 @@ static int macb_init(struct platform_device *pdev)
 	int err;
 	u32 val;
 
+	bp->tx_ring_size = DEFAULT_TX_RING_SIZE;
+	bp->rx_ring_size = DEFAULT_RX_RING_SIZE;
+
 	/* set the queue register mapping once for all: queue0 has a special
 	 * register mapping but we don't want to test the queue index then
 	 * compute the corresponding register offset at run time.
@@ -2501,6 +2669,11 @@ static int macb_init(struct platform_device *pdev)
 
 	/* Set features */
 	dev->hw_features = NETIF_F_SG;
+
+	/* Check LSO capability */
+	if (GEM_BFEXT(PBUF_LSO, gem_readl(bp, DCFG6)))
+		dev->hw_features |= MACB_NETIF_LSO;
+
 	/* Checksum offload is only available on gem with packet buffer */
 	if (macb_is_gem(bp) && !(bp->caps & MACB_CAPS_FIFO_MODE))
 		dev->hw_features |= NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
@@ -2770,6 +2943,7 @@ static irqreturn_t at91ether_interrupt(int irq, void *dev_id)
 	if (intstatus & MACB_BIT(RXUBR)) {
 		ctl = macb_readl(lp, NCR);
 		macb_writel(lp, NCR, ctl & ~MACB_BIT(RE));
+		wmb();
 		macb_writel(lp, NCR, ctl | MACB_BIT(RE));
 	}
 
@@ -2799,7 +2973,6 @@ static const struct net_device_ops at91ether_netdev_ops = {
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_do_ioctl		= macb_ioctl,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= at91ether_poll_controller,
 #endif
@@ -3034,6 +3207,13 @@ static int macb_probe(struct platform_device *pdev)
 		goto err_out_free_netdev;
 	}
 
+	/* MTU range: 68 - 1500 or 10240 */
+	dev->min_mtu = GEM_MTU_MIN_SIZE;
+	if (bp->caps & MACB_CAPS_JUMBO)
+		dev->max_mtu = gem_readl(bp, JML) - ETH_HLEN - ETH_FCS_LEN;
+	else
+		dev->max_mtu = ETH_DATA_LEN;
+
 	mac = of_get_mac_address(np);
 	if (mac)
 		ether_addr_copy(bp->dev->dev_addr, mac);
diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h
index 8bed4b52fef5..d67adad67be1 100644
--- a/drivers/net/ethernet/cadence/macb.h
+++ b/drivers/net/ethernet/cadence/macb.h
@@ -382,6 +382,10 @@
 #define GEM_TX_PKT_BUFF_OFFSET			21
 #define GEM_TX_PKT_BUFF_SIZE			1
 
+/* Bitfields in DCFG6. */
+#define GEM_PBUF_LSO_OFFSET			27
+#define GEM_PBUF_LSO_SIZE			1
+
 /* Constants for CLK */
 #define MACB_CLK_DIV8				0
 #define MACB_CLK_DIV16				1
@@ -414,6 +418,10 @@
 #define MACB_CAPS_SG_DISABLED			0x40000000
 #define MACB_CAPS_MACB_IS_GEM			0x80000000
 
+/* LSO settings */
+#define MACB_LSO_UFO_ENABLE			0x01
+#define MACB_LSO_TSO_ENABLE			0x02
+
 /* Bit manipulation macros */
 #define MACB_BIT(name)					\
 	(1 << MACB_##name##_OFFSET)
@@ -545,6 +553,12 @@ struct macb_dma_desc {
 #define MACB_TX_LAST_SIZE			1
 #define MACB_TX_NOCRC_OFFSET			16
 #define MACB_TX_NOCRC_SIZE			1
+#define MACB_MSS_MFS_OFFSET			16
+#define MACB_MSS_MFS_SIZE			14
+#define MACB_TX_LSO_OFFSET			17
+#define MACB_TX_LSO_SIZE			2
+#define MACB_TX_TCP_SEQ_SRC_OFFSET		19
+#define MACB_TX_TCP_SEQ_SRC_SIZE		1
 #define MACB_TX_BUF_EXHAUSTED_OFFSET		27
 #define MACB_TX_BUF_EXHAUSTED_SIZE		1
 #define MACB_TX_UNDERRUN_OFFSET			28
@@ -811,6 +825,9 @@ struct macb {
 	void			*rx_buffers;
 	size_t			rx_buffer_size;
 
+	unsigned int		rx_ring_size;
+	unsigned int		tx_ring_size;
+
 	unsigned int		num_queues;
 	unsigned int		queue_mask;
 	struct macb_queue	queues[MACB_MAX_QUEUES];
diff --git a/drivers/net/ethernet/calxeda/xgmac.c b/drivers/net/ethernet/calxeda/xgmac.c
index 63efa0dc45ba..ce7de6f72512 100644
--- a/drivers/net/ethernet/calxeda/xgmac.c
+++ b/drivers/net/ethernet/calxeda/xgmac.c
@@ -394,7 +394,7 @@ struct xgmac_priv {
 };
 
 /* XGMAC Configuration Settings */
-#define MAX_MTU			9000
+#define XGMAC_MAX_MTU		9000
 #define PAUSE_TIME		0x400
 
 #define DMA_RX_RING_SZ		256
@@ -1360,20 +1360,6 @@ out:
  */
 static int xgmac_change_mtu(struct net_device *dev, int new_mtu)
 {
-	struct xgmac_priv *priv = netdev_priv(dev);
-	int old_mtu;
-
-	if ((new_mtu < 46) || (new_mtu > MAX_MTU)) {
-		netdev_err(priv->dev, "invalid MTU, max MTU is: %d\n", MAX_MTU);
-		return -EINVAL;
-	}
-
-	old_mtu = dev->mtu;
-
-	/* return early if the buffer sizes will not change */
-	if (old_mtu == new_mtu)
-		return 0;
-
 	/* Stop everything, get ready to change the MTU */
 	if (!netif_running(dev))
 		return 0;
@@ -1544,15 +1530,14 @@ static const struct net_device_ops xgmac_netdev_ops = {
 	.ndo_set_features = xgmac_set_features,
 };
 
-static int xgmac_ethtool_getsettings(struct net_device *dev,
-					  struct ethtool_cmd *cmd)
+static int xgmac_ethtool_get_link_ksettings(struct net_device *dev,
+					    struct ethtool_link_ksettings *cmd)
 {
-	cmd->autoneg = 0;
-	cmd->duplex = DUPLEX_FULL;
-	ethtool_cmd_speed_set(cmd, 10000);
-	cmd->supported = 0;
-	cmd->advertising = 0;
-	cmd->transceiver = XCVR_INTERNAL;
+	cmd->base.autoneg = 0;
+	cmd->base.duplex = DUPLEX_FULL;
+	cmd->base.speed = 10000;
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, 0);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, 0);
 	return 0;
 }
 
@@ -1695,7 +1680,6 @@ static int xgmac_set_wol(struct net_device *dev,
 }
 
 static const struct ethtool_ops xgmac_ethtool_ops = {
-	.get_settings = xgmac_ethtool_getsettings,
 	.get_link = ethtool_op_get_link,
 	.get_pauseparam = xgmac_get_pauseparam,
 	.set_pauseparam = xgmac_set_pauseparam,
@@ -1704,6 +1688,7 @@ static const struct ethtool_ops xgmac_ethtool_ops = {
 	.get_wol = xgmac_get_wol,
 	.set_wol = xgmac_set_wol,
 	.get_sset_count = xgmac_get_sset_count,
+	.get_link_ksettings = xgmac_ethtool_get_link_ksettings,
 };
 
 /**
@@ -1804,6 +1789,10 @@ static int xgmac_probe(struct platform_device *pdev)
 	ndev->features |= ndev->hw_features;
 	ndev->priv_flags |= IFF_UNICAST_FLT;
 
+	/* MTU range: 46 - 9000 */
+	ndev->min_mtu = ETH_ZLEN - ETH_HLEN;
+	ndev->max_mtu = XGMAC_MAX_MTU;
+
 	/* Get the MAC address */
 	xgmac_get_mac_addr(priv->base, ndev->dev_addr, 0);
 	if (!is_valid_ether_addr(ndev->dev_addr))
diff --git a/drivers/net/ethernet/cavium/Kconfig b/drivers/net/ethernet/cavium/Kconfig
index 92f411c9f0df..bbc8bd16cb97 100644
--- a/drivers/net/ethernet/cavium/Kconfig
+++ b/drivers/net/ethernet/cavium/Kconfig
@@ -53,7 +53,7 @@ config	THUNDER_NIC_RGX
 config LIQUIDIO
 	tristate "Cavium LiquidIO support"
 	depends on 64BIT
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	select FW_LOADER
 	select LIBCRC32C
 	---help---
@@ -74,4 +74,16 @@ config OCTEON_MGMT_ETHERNET
 	  port on Cavium Networks' Octeon CN57XX, CN56XX, CN55XX,
 	  CN54XX, CN52XX, and CN6XXX chips.
 
+config LIQUIDIO_VF
+	tristate "Cavium LiquidIO VF support"
+	depends on 64BIT && PCI_MSI
+	select PTP_1588_CLOCK
+	---help---
+	  This driver supports Cavium LiquidIO Intelligent Server Adapter
+	  based on CN23XX chips.
+
+	  To compile this driver as a module, choose M here: The module
+	  will be called liquidio_vf. MSI-X interrupt support is required
+	  for this driver to work correctly
+
 endif # NET_VENDOR_CAVIUM
diff --git a/drivers/net/ethernet/cavium/liquidio/Makefile b/drivers/net/ethernet/cavium/liquidio/Makefile
index 5a27b2a44039..c4d411d1aa28 100644
--- a/drivers/net/ethernet/cavium/liquidio/Makefile
+++ b/drivers/net/ethernet/cavium/liquidio/Makefile
@@ -11,8 +11,32 @@ liquidio-$(CONFIG_LIQUIDIO) += lio_ethtool.o \
 			cn66xx_device.o    \
 			cn68xx_device.o    \
 			cn23xx_pf_device.o \
+			cn23xx_vf_device.o \
+			octeon_mailbox.o   \
 			octeon_mem_ops.o   \
 			octeon_droq.o      \
 			octeon_nic.o
 
 liquidio-objs := lio_main.o octeon_console.o $(liquidio-y)
+
+obj-$(CONFIG_LIQUIDIO_VF) += liquidio_vf.o
+
+ifeq ($(CONFIG_LIQUIDIO)$(CONFIG_LIQUIDIO_VF), yy)
+	liquidio_vf-objs := lio_vf_main.o
+else
+liquidio_vf-$(CONFIG_LIQUIDIO_VF) += lio_ethtool.o \
+			lio_core.o         \
+			request_manager.o  \
+			response_manager.o \
+			octeon_device.o    \
+			cn66xx_device.o    \
+			cn68xx_device.o    \
+			cn23xx_pf_device.o \
+			cn23xx_vf_device.o \
+			octeon_mailbox.o   \
+			octeon_mem_ops.o   \
+			octeon_droq.o      \
+			octeon_nic.o
+
+liquidio_vf-objs := lio_vf_main.o $(liquidio_vf-y)
+endif
diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c
index 380a64115a98..962dcbcef8b5 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c
@@ -1,28 +1,23 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
-
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 #include <linux/pci.h>
-#include <linux/netdevice.h>
 #include <linux/vmalloc.h>
+#include <linux/etherdevice.h>
 #include "liquidio_common.h"
 #include "octeon_droq.h"
 #include "octeon_iq.h"
@@ -30,6 +25,7 @@
 #include "octeon_device.h"
 #include "cn23xx_pf_device.h"
 #include "octeon_main.h"
+#include "octeon_mailbox.h"
 
 #define RESET_NOTDONE 0
 #define RESET_DONE 1
@@ -40,11 +36,6 @@
  */
 #define CN23XX_INPUT_JABBER 64600
 
-#define LIOLUT_RING_DISTRIBUTION 9
-const int liolut_num_vfs_to_rings_per_vf[LIOLUT_RING_DISTRIBUTION] = {
-	0, 8, 4, 2, 2, 2, 1, 1, 1
-};
-
 void cn23xx_dump_pf_initialized_regs(struct octeon_device *oct)
 {
 	int i = 0;
@@ -309,9 +300,10 @@ u32 cn23xx_pf_get_oq_ticks(struct octeon_device *oct, u32 time_intr_in_us)
 
 static void cn23xx_setup_global_mac_regs(struct octeon_device *oct)
 {
-	u64 reg_val;
 	u16 mac_no = oct->pcie_port;
 	u16 pf_num = oct->pf_num;
+	u64 reg_val;
+	u64 temp;
 
 	/* programming SRN and TRS for each MAC(0..3)  */
 
@@ -333,6 +325,14 @@ static void cn23xx_setup_global_mac_regs(struct octeon_device *oct)
 	/* setting TRS <23:16> */
 	reg_val = reg_val |
 		  (oct->sriov_info.trs << CN23XX_PKT_MAC_CTL_RINFO_TRS_BIT_POS);
+	/* setting RPVF <39:32> */
+	temp = oct->sriov_info.rings_per_vf & 0xff;
+	reg_val |= (temp << CN23XX_PKT_MAC_CTL_RINFO_RPVF_BIT_POS);
+
+	/* setting NVFS <55:48> */
+	temp = oct->sriov_info.max_vfs & 0xff;
+	reg_val |= (temp << CN23XX_PKT_MAC_CTL_RINFO_NVFS_BIT_POS);
+
 	/* write these settings to MAC register */
 	octeon_write_csr64(oct, CN23XX_SLI_PKT_MAC_RINFO64(mac_no, pf_num),
 			   reg_val);
@@ -399,11 +399,12 @@ static int cn23xx_reset_io_queues(struct octeon_device *oct)
 
 static int cn23xx_pf_setup_global_input_regs(struct octeon_device *oct)
 {
+	struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip;
+	struct octeon_instr_queue *iq;
+	u64 intr_threshold, reg_val;
 	u32 q_no, ern, srn;
 	u64 pf_num;
-	u64 intr_threshold, reg_val;
-	struct octeon_instr_queue *iq;
-	struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip;
+	u64 vf_num;
 
 	pf_num = oct->pf_num;
 
@@ -414,12 +415,22 @@ static int cn23xx_pf_setup_global_input_regs(struct octeon_device *oct)
 		return -1;
 
 	/** Set the MAC_NUM and PVF_NUM in IQ_PKT_CONTROL reg
-	* for all queues.Only PF can set these bits.
-	* bits 29:30 indicate the MAC num.
-	* bits 32:47 indicate the PVF num.
-	*/
+	 * for all queues.Only PF can set these bits.
+	 * bits 29:30 indicate the MAC num.
+	 * bits 32:47 indicate the PVF num.
+	 */
 	for (q_no = 0; q_no < ern; q_no++) {
 		reg_val = oct->pcie_port << CN23XX_PKT_INPUT_CTL_MAC_NUM_POS;
+
+		/* for VF assigned queues. */
+		if (q_no < oct->sriov_info.pf_srn) {
+			vf_num = q_no / oct->sriov_info.rings_per_vf;
+			vf_num += 1; /* VF1, VF2,........ */
+		} else {
+			vf_num = 0;
+		}
+
+		reg_val |= vf_num << CN23XX_PKT_INPUT_CTL_VF_NUM_POS;
 		reg_val |= pf_num << CN23XX_PKT_INPUT_CTL_PF_NUM_POS;
 
 		octeon_write_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no),
@@ -530,8 +541,8 @@ static void cn23xx_pf_setup_global_output_regs(struct octeon_device *oct)
 	writeq(0x40, (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_OQ_WMARK);
 
 	/** Disabling setting OQs in reset when ring has no dorebells
-	  * enabling this will cause of head of line blocking
-	  */
+	 * enabling this will cause of head of line blocking
+	 */
 	/* Do it only for pass1.1. and pass1.2 */
 	if ((oct->rev_id == OCTEON_CN23XX_REV_1_0) ||
 	    (oct->rev_id == OCTEON_CN23XX_REV_1_1))
@@ -662,6 +673,118 @@ static void cn23xx_setup_oq_regs(struct octeon_device *oct, u32 oq_no)
 	}
 }
 
+static void cn23xx_pf_mbox_thread(struct work_struct *work)
+{
+	struct cavium_wk *wk = (struct cavium_wk *)work;
+	struct octeon_mbox *mbox = (struct octeon_mbox *)wk->ctxptr;
+	struct octeon_device *oct = mbox->oct_dev;
+	u64 mbox_int_val, val64;
+	u32 q_no, i;
+
+	if (oct->rev_id < OCTEON_CN23XX_REV_1_1) {
+		/*read and clear by writing 1*/
+		mbox_int_val = readq(mbox->mbox_int_reg);
+		writeq(mbox_int_val, mbox->mbox_int_reg);
+
+		for (i = 0; i < oct->sriov_info.num_vfs_alloced; i++) {
+			q_no = i * oct->sriov_info.rings_per_vf;
+
+			val64 = readq(oct->mbox[q_no]->mbox_write_reg);
+
+			if (val64 && (val64 != OCTEON_PFVFACK)) {
+				if (octeon_mbox_read(oct->mbox[q_no]))
+					octeon_mbox_process_message(
+					    oct->mbox[q_no]);
+			}
+		}
+
+		schedule_delayed_work(&wk->work, msecs_to_jiffies(10));
+	} else {
+		octeon_mbox_process_message(mbox);
+	}
+}
+
+static int cn23xx_setup_pf_mbox(struct octeon_device *oct)
+{
+	struct octeon_mbox *mbox = NULL;
+	u16 mac_no = oct->pcie_port;
+	u16 pf_num = oct->pf_num;
+	u32 q_no, i;
+
+	if (!oct->sriov_info.max_vfs)
+		return 0;
+
+	for (i = 0; i < oct->sriov_info.max_vfs; i++) {
+		q_no = i * oct->sriov_info.rings_per_vf;
+
+		mbox = vmalloc(sizeof(*mbox));
+		if (!mbox)
+			goto free_mbox;
+
+		memset(mbox, 0, sizeof(struct octeon_mbox));
+
+		spin_lock_init(&mbox->lock);
+
+		mbox->oct_dev = oct;
+
+		mbox->q_no = q_no;
+
+		mbox->state = OCTEON_MBOX_STATE_IDLE;
+
+		/* PF mbox interrupt reg */
+		mbox->mbox_int_reg = (u8 *)oct->mmio[0].hw_addr +
+				     CN23XX_SLI_MAC_PF_MBOX_INT(mac_no, pf_num);
+
+		/* PF writes into SIG0 reg */
+		mbox->mbox_write_reg = (u8 *)oct->mmio[0].hw_addr +
+				       CN23XX_SLI_PKT_PF_VF_MBOX_SIG(q_no, 0);
+
+		/* PF reads from SIG1 reg */
+		mbox->mbox_read_reg = (u8 *)oct->mmio[0].hw_addr +
+				      CN23XX_SLI_PKT_PF_VF_MBOX_SIG(q_no, 1);
+
+		/*Mail Box Thread creation*/
+		INIT_DELAYED_WORK(&mbox->mbox_poll_wk.work,
+				  cn23xx_pf_mbox_thread);
+		mbox->mbox_poll_wk.ctxptr = (void *)mbox;
+
+		oct->mbox[q_no] = mbox;
+
+		writeq(OCTEON_PFVFSIG, mbox->mbox_read_reg);
+	}
+
+	if (oct->rev_id < OCTEON_CN23XX_REV_1_1)
+		schedule_delayed_work(&oct->mbox[0]->mbox_poll_wk.work,
+				      msecs_to_jiffies(0));
+
+	return 0;
+
+free_mbox:
+	while (i) {
+		i--;
+		vfree(oct->mbox[i]);
+	}
+
+	return 1;
+}
+
+static int cn23xx_free_pf_mbox(struct octeon_device *oct)
+{
+	u32 q_no, i;
+
+	if (!oct->sriov_info.max_vfs)
+		return 0;
+
+	for (i = 0; i < oct->sriov_info.max_vfs; i++) {
+		q_no = i * oct->sriov_info.rings_per_vf;
+		cancel_delayed_work_sync(
+		    &oct->mbox[q_no]->mbox_poll_wk.work);
+		vfree(oct->mbox[q_no]);
+	}
+
+	return 0;
+}
+
 static int cn23xx_enable_io_queues(struct octeon_device *oct)
 {
 	u64 reg_val;
@@ -856,6 +979,29 @@ static u64 cn23xx_pf_msix_interrupt_handler(void *dev)
 	return ret;
 }
 
+static void cn23xx_handle_pf_mbox_intr(struct octeon_device *oct)
+{
+	struct delayed_work *work;
+	u64 mbox_int_val;
+	u32 i, q_no;
+
+	mbox_int_val = readq(oct->mbox[0]->mbox_int_reg);
+
+	for (i = 0; i < oct->sriov_info.num_vfs_alloced; i++) {
+		q_no = i * oct->sriov_info.rings_per_vf;
+
+		if (mbox_int_val & BIT_ULL(q_no)) {
+			writeq(BIT_ULL(q_no),
+			       oct->mbox[0]->mbox_int_reg);
+			if (octeon_mbox_read(oct->mbox[q_no])) {
+				work = &oct->mbox[q_no]->mbox_poll_wk.work;
+				schedule_delayed_work(work,
+						      msecs_to_jiffies(0));
+			}
+		}
+	}
+}
+
 static irqreturn_t cn23xx_interrupt_handler(void *dev)
 {
 	struct octeon_device *oct = (struct octeon_device *)dev;
@@ -871,6 +1017,10 @@ static irqreturn_t cn23xx_interrupt_handler(void *dev)
 		dev_err(&oct->pci_dev->dev, "OCTEON[%d]: Error Intr: 0x%016llx\n",
 			oct->octeon_id, CVM_CAST64(intr64));
 
+	/* When VFs write into MBOX_SIG2 reg,these intr is set in PF */
+	if (intr64 & CN23XX_INTR_VF_MBOX)
+		cn23xx_handle_pf_mbox_intr(oct);
+
 	if (oct->msix_on != LIO_FLAG_MSIX_ENABLED) {
 		if (intr64 & CN23XX_INTR_PKT_DATA)
 			oct->int_status |= OCT_DEV_INTR_PKT_DATA;
@@ -961,6 +1111,13 @@ static void cn23xx_enable_pf_interrupt(struct octeon_device *oct, u8 intr_flag)
 		intr_val = readq(cn23xx->intr_enb_reg64);
 		intr_val |= CN23XX_INTR_PKT_DATA;
 		writeq(intr_val, cn23xx->intr_enb_reg64);
+	} else if ((intr_flag & OCTEON_MBOX_INTR) &&
+		   (oct->sriov_info.max_vfs > 0)) {
+		if (oct->rev_id >= OCTEON_CN23XX_REV_1_1) {
+			intr_val = readq(cn23xx->intr_enb_reg64);
+			intr_val |= CN23XX_INTR_VF_MBOX;
+			writeq(intr_val, cn23xx->intr_enb_reg64);
+		}
 	}
 }
 
@@ -976,6 +1133,13 @@ static void cn23xx_disable_pf_interrupt(struct octeon_device *oct, u8 intr_flag)
 		intr_val = readq(cn23xx->intr_enb_reg64);
 		intr_val &= ~CN23XX_INTR_PKT_DATA;
 		writeq(intr_val, cn23xx->intr_enb_reg64);
+	} else if ((intr_flag & OCTEON_MBOX_INTR) &&
+		   (oct->sriov_info.max_vfs > 0)) {
+		if (oct->rev_id >= OCTEON_CN23XX_REV_1_1) {
+			intr_val = readq(cn23xx->intr_enb_reg64);
+			intr_val &= ~CN23XX_INTR_VF_MBOX;
+			writeq(intr_val, cn23xx->intr_enb_reg64);
+		}
 	}
 }
 
@@ -1048,50 +1212,59 @@ static void cn23xx_setup_reg_address(struct octeon_device *oct)
 
 static int cn23xx_sriov_config(struct octeon_device *oct)
 {
-	u32 total_rings;
 	struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip;
-	/* num_vfs is already filled for us */
+	u32 max_rings, total_rings, max_vfs, rings_per_vf;
 	u32 pf_srn, num_pf_rings;
+	u32 max_possible_vfs;
 
 	cn23xx->conf =
-	    (struct octeon_config *)oct_get_config_info(oct, LIO_23XX);
+		(struct octeon_config *)oct_get_config_info(oct, LIO_23XX);
 	switch (oct->rev_id) {
 	case OCTEON_CN23XX_REV_1_0:
-		total_rings = CN23XX_MAX_RINGS_PER_PF_PASS_1_0;
+		max_rings = CN23XX_MAX_RINGS_PER_PF_PASS_1_0;
+		max_possible_vfs = CN23XX_MAX_VFS_PER_PF_PASS_1_0;
 		break;
 	case OCTEON_CN23XX_REV_1_1:
-		total_rings = CN23XX_MAX_RINGS_PER_PF_PASS_1_1;
+		max_rings = CN23XX_MAX_RINGS_PER_PF_PASS_1_1;
+		max_possible_vfs = CN23XX_MAX_VFS_PER_PF_PASS_1_1;
 		break;
 	default:
-		total_rings = CN23XX_MAX_RINGS_PER_PF;
+		max_rings = CN23XX_MAX_RINGS_PER_PF;
+		max_possible_vfs = CN23XX_MAX_VFS_PER_PF;
 		break;
 	}
-	if (!oct->sriov_info.num_pf_rings) {
-		if (total_rings > num_present_cpus())
-			num_pf_rings = num_present_cpus();
-		else
-			num_pf_rings = total_rings;
-	} else {
-		num_pf_rings = oct->sriov_info.num_pf_rings;
 
-		if (num_pf_rings > total_rings) {
-			dev_warn(&oct->pci_dev->dev,
-				 "num_queues_per_pf requested %u is more than available rings. Reducing to %u\n",
-				 num_pf_rings, total_rings);
-			num_pf_rings = total_rings;
-		}
-	}
+	if (max_rings <= num_present_cpus())
+		num_pf_rings = 1;
+	else
+		num_pf_rings = num_present_cpus();
+
+#ifdef CONFIG_PCI_IOV
+	max_vfs = min_t(u32,
+			(max_rings - num_pf_rings), max_possible_vfs);
+	rings_per_vf = 1;
+#else
+	max_vfs = 0;
+	rings_per_vf = 0;
+#endif
+
+	total_rings = num_pf_rings + max_vfs;
 
-	total_rings = num_pf_rings;
 	/* the first ring of the pf */
 	pf_srn = total_rings - num_pf_rings;
 
 	oct->sriov_info.trs = total_rings;
+	oct->sriov_info.max_vfs = max_vfs;
+	oct->sriov_info.rings_per_vf = rings_per_vf;
 	oct->sriov_info.pf_srn = pf_srn;
 	oct->sriov_info.num_pf_rings = num_pf_rings;
-	dev_dbg(&oct->pci_dev->dev, "trs:%d pf_srn:%d num_pf_rings:%d\n",
-		oct->sriov_info.trs, oct->sriov_info.pf_srn,
-		oct->sriov_info.num_pf_rings);
+	dev_notice(&oct->pci_dev->dev, "trs:%d max_vfs:%d rings_per_vf:%d pf_srn:%d num_pf_rings:%d\n",
+		   oct->sriov_info.trs, oct->sriov_info.max_vfs,
+		   oct->sriov_info.rings_per_vf, oct->sriov_info.pf_srn,
+		   oct->sriov_info.num_pf_rings);
+
+	oct->sriov_info.sriov_enabled = 0;
+
 	return 0;
 }
 
@@ -1119,6 +1292,9 @@ int setup_cn23xx_octeon_pf_device(struct octeon_device *oct)
 
 	oct->fn_list.setup_iq_regs = cn23xx_setup_iq_regs;
 	oct->fn_list.setup_oq_regs = cn23xx_setup_oq_regs;
+	oct->fn_list.setup_mbox = cn23xx_setup_pf_mbox;
+	oct->fn_list.free_mbox = cn23xx_free_pf_mbox;
+
 	oct->fn_list.process_interrupt_regs = cn23xx_interrupt_handler;
 	oct->fn_list.msix_interrupt_handler = cn23xx_pf_msix_interrupt_handler;
 
@@ -1209,8 +1385,7 @@ void cn23xx_dump_iq_regs(struct octeon_device *oct)
 		dev_dbg(&oct->pci_dev->dev, "SLI_PKT[%d]_INPUT_CTL [0x%x]: 0x%016llx\n",
 			q_no, CN23XX_SLI_IQ_PKT_CONTROL64(q_no),
 			CVM_CAST64(octeon_read_csr64
-				(oct,
-					CN23XX_SLI_IQ_PKT_CONTROL64(q_no))));
+				(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no))));
 	}
 
 	pci_read_config_dword(oct->pci_dev, CN23XX_CONFIG_PCIE_DEVCTL, &regval);
@@ -1235,3 +1410,24 @@ int cn23xx_fw_loaded(struct octeon_device *oct)
 	val = octeon_read_csr64(oct, CN23XX_SLI_SCRATCH1);
 	return (val >> 1) & 1ULL;
 }
+
+void cn23xx_tell_vf_its_macaddr_changed(struct octeon_device *oct, int vfidx,
+					u8 *mac)
+{
+	if (oct->sriov_info.vf_drv_loaded_mask & BIT_ULL(vfidx)) {
+		struct octeon_mbox_cmd mbox_cmd;
+
+		mbox_cmd.msg.u64 = 0;
+		mbox_cmd.msg.s.type = OCTEON_MBOX_REQUEST;
+		mbox_cmd.msg.s.resp_needed = 0;
+		mbox_cmd.msg.s.cmd = OCTEON_PF_CHANGED_VF_MACADDR;
+		mbox_cmd.msg.s.len = 1;
+		mbox_cmd.recv_len = 0;
+		mbox_cmd.recv_status = 0;
+		mbox_cmd.fn = NULL;
+		mbox_cmd.fn_arg = 0;
+		ether_addr_copy(mbox_cmd.msg.s.params, mac);
+		mbox_cmd.q_no = vfidx * oct->sriov_info.rings_per_vf;
+		octeon_mbox_write(oct, &mbox_cmd);
+	}
+}
diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h
index 21b5c9051967..2fedd91f3df8 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h
@@ -1,34 +1,31 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
-
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 /*! \file  cn23xx_device.h
  * \brief Host Driver: Routines that perform CN23XX specific operations.
-*/
+ */
 
 #ifndef __CN23XX_PF_DEVICE_H__
 #define __CN23XX_PF_DEVICE_H__
 
 #include "cn23xx_pf_regs.h"
 
+#define LIO_CMD_WAIT_TM 100
+
 /* Register address and configuration for a CN23XX devices.
  * If device specific changes need to be made then add a struct to include
  * device specific fields as shown in the commented section
@@ -56,4 +53,7 @@ u32 cn23xx_pf_get_oq_ticks(struct octeon_device *oct, u32 time_intr_in_us);
 void cn23xx_dump_pf_initialized_regs(struct octeon_device *oct);
 
 int cn23xx_fw_loaded(struct octeon_device *oct);
+
+void cn23xx_tell_vf_its_macaddr_changed(struct octeon_device *oct, int vfidx,
+					u8 *mac);
 #endif
diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_regs.h b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_regs.h
index 03d79d95ab75..e6d4ad99cc38 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_regs.h
+++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_regs.h
@@ -1,29 +1,24 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
-
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 /*! \file cn23xx_regs.h
  * \brief Host Driver: Register Address and Register Mask values for
  * Octeon CN23XX devices.
-*/
+ */
 
 #ifndef __CN23XX_PF_REGS_H__
 #define __CN23XX_PF_REGS_H__
@@ -63,7 +58,7 @@
 
 #define     CN23XX_CONFIG_SRIOV_BAR_START	   0x19C
 #define     CN23XX_CONFIG_SRIOV_BARX(i)		\
-		(CN23XX_CONFIG_SRIOV_BAR_START + (i * 4))
+		(CN23XX_CONFIG_SRIOV_BAR_START + ((i) * 4))
 #define     CN23XX_CONFIG_SRIOV_BAR_PF		   0x08
 #define     CN23XX_CONFIG_SRIOV_BAR_64BIT	   0x04
 #define     CN23XX_CONFIG_SRIOV_BAR_IO		   0x01
@@ -513,7 +508,7 @@
 /* 4 Registers (64 - bit) */
 #define    CN23XX_SLI_S2M_PORT_CTL_START         0x23D80
 #define    CN23XX_SLI_S2M_PORTX_CTL(port)	\
-		(CN23XX_SLI_S2M_PORT_CTL_START + (port * 0x10))
+		(CN23XX_SLI_S2M_PORT_CTL_START + ((port) * 0x10))
 
 #define    CN23XX_SLI_MAC_NUMBER                 0x20050
 
@@ -554,26 +549,26 @@
  * Provides DMA Engine Queue Enable
  */
 #define    CN23XX_DPI_DMA_ENG0_ENB        0x0001df0000000080ULL
-#define    CN23XX_DPI_DMA_ENG_ENB(eng) (CN23XX_DPI_DMA_ENG0_ENB + (eng * 8))
+#define    CN23XX_DPI_DMA_ENG_ENB(eng) (CN23XX_DPI_DMA_ENG0_ENB + ((eng) * 8))
 
 /* 8 register (64-bit) - DPI_DMA(0..7)_REQQ_CTL
  * Provides control bits for transaction on 8 Queues
  */
 #define    CN23XX_DPI_DMA_REQQ0_CTL       0x0001df0000000180ULL
 #define    CN23XX_DPI_DMA_REQQ_CTL(q_no)	\
-		(CN23XX_DPI_DMA_REQQ0_CTL + (q_no * 8))
+		(CN23XX_DPI_DMA_REQQ0_CTL + ((q_no) * 8))
 
 /* 6 register (64-bit) - DPI_ENG(0..5)_BUF
  * Provides DMA Engine FIFO (Queue) Size
  */
 #define    CN23XX_DPI_DMA_ENG0_BUF        0x0001df0000000880ULL
 #define    CN23XX_DPI_DMA_ENG_BUF(eng)   \
-		(CN23XX_DPI_DMA_ENG0_BUF + (eng * 8))
+		(CN23XX_DPI_DMA_ENG0_BUF + ((eng) * 8))
 
 /* 4 Registers (64-bit) */
 #define    CN23XX_DPI_SLI_PRT_CFG_START   0x0001df0000000900ULL
 #define    CN23XX_DPI_SLI_PRTX_CFG(port)        \
-		(CN23XX_DPI_SLI_PRT_CFG_START + (port * 0x8))
+		(CN23XX_DPI_SLI_PRT_CFG_START + ((port) * 0x8))
 
 /* Masks for DPI_DMA_CONTROL Register */
 #define    CN23XX_DPI_DMA_COMMIT_MODE     BIT_ULL(58)
diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c
new file mode 100644
index 000000000000..b6117b6a1de2
--- /dev/null
+++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.c
@@ -0,0 +1,722 @@
+/**********************************************************************
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/vmalloc.h>
+#include "liquidio_common.h"
+#include "octeon_droq.h"
+#include "octeon_iq.h"
+#include "response_manager.h"
+#include "octeon_device.h"
+#include "cn23xx_vf_device.h"
+#include "octeon_main.h"
+#include "octeon_mailbox.h"
+
+u32 cn23xx_vf_get_oq_ticks(struct octeon_device *oct, u32 time_intr_in_us)
+{
+	/* This gives the SLI clock per microsec */
+	u32 oqticks_per_us = (u32)oct->pfvf_hsword.coproc_tics_per_us;
+
+	/* This gives the clock cycles per millisecond */
+	oqticks_per_us *= 1000;
+
+	/* This gives the oq ticks (1024 core clock cycles) per millisecond */
+	oqticks_per_us /= 1024;
+
+	/* time_intr is in microseconds. The next 2 steps gives the oq ticks
+	 * corressponding to time_intr.
+	 */
+	oqticks_per_us *= time_intr_in_us;
+	oqticks_per_us /= 1000;
+
+	return oqticks_per_us;
+}
+
+static int cn23xx_vf_reset_io_queues(struct octeon_device *oct, u32 num_queues)
+{
+	u32 loop = BUSY_READING_REG_VF_LOOP_COUNT;
+	int ret_val = 0;
+	u32 q_no;
+	u64 d64;
+
+	for (q_no = 0; q_no < num_queues; q_no++) {
+		/* set RST bit to 1. This bit applies to both IQ and OQ */
+		d64 = octeon_read_csr64(oct,
+					CN23XX_VF_SLI_IQ_PKT_CONTROL64(q_no));
+		d64 |= CN23XX_PKT_INPUT_CTL_RST;
+		octeon_write_csr64(oct, CN23XX_VF_SLI_IQ_PKT_CONTROL64(q_no),
+				   d64);
+	}
+
+	/* wait until the RST bit is clear or the RST and QUIET bits are set */
+	for (q_no = 0; q_no < num_queues; q_no++) {
+		u64 reg_val = octeon_read_csr64(oct,
+					CN23XX_VF_SLI_IQ_PKT_CONTROL64(q_no));
+		while ((READ_ONCE(reg_val) & CN23XX_PKT_INPUT_CTL_RST) &&
+		       !(READ_ONCE(reg_val) & CN23XX_PKT_INPUT_CTL_QUIET) &&
+		       loop) {
+			WRITE_ONCE(reg_val, octeon_read_csr64(
+			    oct, CN23XX_VF_SLI_IQ_PKT_CONTROL64(q_no)));
+			loop--;
+		}
+		if (!loop) {
+			dev_err(&oct->pci_dev->dev,
+				"clearing the reset reg failed or setting the quiet reg failed for qno: %u\n",
+				q_no);
+			return -1;
+		}
+		WRITE_ONCE(reg_val, READ_ONCE(reg_val) &
+			   ~CN23XX_PKT_INPUT_CTL_RST);
+		octeon_write_csr64(oct, CN23XX_VF_SLI_IQ_PKT_CONTROL64(q_no),
+				   READ_ONCE(reg_val));
+
+		WRITE_ONCE(reg_val, octeon_read_csr64(
+		    oct, CN23XX_VF_SLI_IQ_PKT_CONTROL64(q_no)));
+		if (READ_ONCE(reg_val) & CN23XX_PKT_INPUT_CTL_RST) {
+			dev_err(&oct->pci_dev->dev,
+				"clearing the reset failed for qno: %u\n",
+				q_no);
+			ret_val = -1;
+		}
+	}
+
+	return ret_val;
+}
+
+static int cn23xx_vf_setup_global_input_regs(struct octeon_device *oct)
+{
+	struct octeon_cn23xx_vf *cn23xx = (struct octeon_cn23xx_vf *)oct->chip;
+	struct octeon_instr_queue *iq;
+	u64 q_no, intr_threshold;
+	u64 d64;
+
+	if (cn23xx_vf_reset_io_queues(oct, oct->sriov_info.rings_per_vf))
+		return -1;
+
+	for (q_no = 0; q_no < (oct->sriov_info.rings_per_vf); q_no++) {
+		void __iomem *inst_cnt_reg;
+
+		octeon_write_csr64(oct, CN23XX_VF_SLI_IQ_DOORBELL(q_no),
+				   0xFFFFFFFF);
+		iq = oct->instr_queue[q_no];
+
+		if (iq)
+			inst_cnt_reg = iq->inst_cnt_reg;
+		else
+			inst_cnt_reg = (u8 *)oct->mmio[0].hw_addr +
+				       CN23XX_VF_SLI_IQ_INSTR_COUNT64(q_no);
+
+		d64 = octeon_read_csr64(oct,
+					CN23XX_VF_SLI_IQ_INSTR_COUNT64(q_no));
+
+		d64 &= 0xEFFFFFFFFFFFFFFFL;
+
+		octeon_write_csr64(oct, CN23XX_VF_SLI_IQ_INSTR_COUNT64(q_no),
+				   d64);
+
+		/* Select ES, RO, NS, RDSIZE,DPTR Fomat#0 for
+		 * the Input Queues
+		 */
+		octeon_write_csr64(oct, CN23XX_VF_SLI_IQ_PKT_CONTROL64(q_no),
+				   CN23XX_PKT_INPUT_CTL_MASK);
+
+		/* set the wmark level to trigger PI_INT */
+		intr_threshold = CFG_GET_IQ_INTR_PKT(cn23xx->conf) &
+				 CN23XX_PKT_IN_DONE_WMARK_MASK;
+
+		writeq((readq(inst_cnt_reg) &
+			~(CN23XX_PKT_IN_DONE_WMARK_MASK <<
+			  CN23XX_PKT_IN_DONE_WMARK_BIT_POS)) |
+		       (intr_threshold << CN23XX_PKT_IN_DONE_WMARK_BIT_POS),
+		       inst_cnt_reg);
+	}
+	return 0;
+}
+
+static void cn23xx_vf_setup_global_output_regs(struct octeon_device *oct)
+{
+	u32 reg_val;
+	u32 q_no;
+
+	for (q_no = 0; q_no < (oct->sriov_info.rings_per_vf); q_no++) {
+		octeon_write_csr(oct, CN23XX_VF_SLI_OQ_PKTS_CREDIT(q_no),
+				 0xFFFFFFFF);
+
+		reg_val =
+		    octeon_read_csr(oct, CN23XX_VF_SLI_OQ_PKTS_SENT(q_no));
+
+		reg_val &= 0xEFFFFFFFFFFFFFFFL;
+
+		reg_val =
+		    octeon_read_csr(oct, CN23XX_VF_SLI_OQ_PKT_CONTROL(q_no));
+
+		/* set IPTR & DPTR */
+		reg_val |=
+		    (CN23XX_PKT_OUTPUT_CTL_IPTR | CN23XX_PKT_OUTPUT_CTL_DPTR);
+
+		/* reset BMODE */
+		reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_BMODE);
+
+		/* No Relaxed Ordering, No Snoop, 64-bit Byte swap
+		 * for Output Queue ScatterList reset ROR_P, NSR_P
+		 */
+		reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_ROR_P);
+		reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_NSR_P);
+
+#ifdef __LITTLE_ENDIAN_BITFIELD
+		reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_ES_P);
+#else
+		reg_val |= (CN23XX_PKT_OUTPUT_CTL_ES_P);
+#endif
+		/* No Relaxed Ordering, No Snoop, 64-bit Byte swap
+		 * for Output Queue Data reset ROR, NSR
+		 */
+		reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_ROR);
+		reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_NSR);
+		/* set the ES bit */
+		reg_val |= (CN23XX_PKT_OUTPUT_CTL_ES);
+
+		/* write all the selected settings */
+		octeon_write_csr(oct, CN23XX_VF_SLI_OQ_PKT_CONTROL(q_no),
+				 reg_val);
+	}
+}
+
+static int cn23xx_setup_vf_device_regs(struct octeon_device *oct)
+{
+	if (cn23xx_vf_setup_global_input_regs(oct))
+		return -1;
+
+	cn23xx_vf_setup_global_output_regs(oct);
+
+	return 0;
+}
+
+static void cn23xx_setup_vf_iq_regs(struct octeon_device *oct, u32 iq_no)
+{
+	struct octeon_instr_queue *iq = oct->instr_queue[iq_no];
+	u64 pkt_in_done;
+
+	/* Write the start of the input queue's ring and its size */
+	octeon_write_csr64(oct, CN23XX_VF_SLI_IQ_BASE_ADDR64(iq_no),
+			   iq->base_addr_dma);
+	octeon_write_csr(oct, CN23XX_VF_SLI_IQ_SIZE(iq_no), iq->max_count);
+
+	/* Remember the doorbell & instruction count register addr
+	 * for this queue
+	 */
+	iq->doorbell_reg =
+	    (u8 *)oct->mmio[0].hw_addr + CN23XX_VF_SLI_IQ_DOORBELL(iq_no);
+	iq->inst_cnt_reg =
+	    (u8 *)oct->mmio[0].hw_addr + CN23XX_VF_SLI_IQ_INSTR_COUNT64(iq_no);
+	dev_dbg(&oct->pci_dev->dev, "InstQ[%d]:dbell reg @ 0x%p instcnt_reg @ 0x%p\n",
+		iq_no, iq->doorbell_reg, iq->inst_cnt_reg);
+
+	/* Store the current instruction counter (used in flush_iq
+	 * calculation)
+	 */
+	pkt_in_done = readq(iq->inst_cnt_reg);
+
+	if (oct->msix_on) {
+		/* Set CINT_ENB to enable IQ interrupt */
+		writeq((pkt_in_done | CN23XX_INTR_CINT_ENB),
+		       iq->inst_cnt_reg);
+	}
+	iq->reset_instr_cnt = 0;
+}
+
+static void cn23xx_setup_vf_oq_regs(struct octeon_device *oct, u32 oq_no)
+{
+	struct octeon_droq *droq = oct->droq[oq_no];
+
+	octeon_write_csr64(oct, CN23XX_VF_SLI_OQ_BASE_ADDR64(oq_no),
+			   droq->desc_ring_dma);
+	octeon_write_csr(oct, CN23XX_VF_SLI_OQ_SIZE(oq_no), droq->max_count);
+
+	octeon_write_csr(oct, CN23XX_VF_SLI_OQ_BUFF_INFO_SIZE(oq_no),
+			 (droq->buffer_size | (OCT_RH_SIZE << 16)));
+
+	/* Get the mapped address of the pkt_sent and pkts_credit regs */
+	droq->pkts_sent_reg =
+	    (u8 *)oct->mmio[0].hw_addr + CN23XX_VF_SLI_OQ_PKTS_SENT(oq_no);
+	droq->pkts_credit_reg =
+	    (u8 *)oct->mmio[0].hw_addr + CN23XX_VF_SLI_OQ_PKTS_CREDIT(oq_no);
+}
+
+static void cn23xx_vf_mbox_thread(struct work_struct *work)
+{
+	struct cavium_wk *wk = (struct cavium_wk *)work;
+	struct octeon_mbox *mbox = (struct octeon_mbox *)wk->ctxptr;
+
+	octeon_mbox_process_message(mbox);
+}
+
+static int cn23xx_free_vf_mbox(struct octeon_device *oct)
+{
+	cancel_delayed_work_sync(&oct->mbox[0]->mbox_poll_wk.work);
+	vfree(oct->mbox[0]);
+	return 0;
+}
+
+static int cn23xx_setup_vf_mbox(struct octeon_device *oct)
+{
+	struct octeon_mbox *mbox = NULL;
+
+	mbox = vmalloc(sizeof(*mbox));
+	if (!mbox)
+		return 1;
+
+	memset(mbox, 0, sizeof(struct octeon_mbox));
+
+	spin_lock_init(&mbox->lock);
+
+	mbox->oct_dev = oct;
+
+	mbox->q_no = 0;
+
+	mbox->state = OCTEON_MBOX_STATE_IDLE;
+
+	/* VF mbox interrupt reg */
+	mbox->mbox_int_reg =
+	    (u8 *)oct->mmio[0].hw_addr + CN23XX_VF_SLI_PKT_MBOX_INT(0);
+	/* VF reads from SIG0 reg */
+	mbox->mbox_read_reg =
+	    (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_PKT_PF_VF_MBOX_SIG(0, 0);
+	/* VF writes into SIG1 reg */
+	mbox->mbox_write_reg =
+	    (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_PKT_PF_VF_MBOX_SIG(0, 1);
+
+	INIT_DELAYED_WORK(&mbox->mbox_poll_wk.work,
+			  cn23xx_vf_mbox_thread);
+
+	mbox->mbox_poll_wk.ctxptr = mbox;
+
+	oct->mbox[0] = mbox;
+
+	writeq(OCTEON_PFVFSIG, mbox->mbox_read_reg);
+
+	return 0;
+}
+
+static int cn23xx_enable_vf_io_queues(struct octeon_device *oct)
+{
+	u32 q_no;
+
+	for (q_no = 0; q_no < oct->num_iqs; q_no++) {
+		u64 reg_val;
+
+		/* set the corresponding IQ IS_64B bit */
+		if (oct->io_qmask.iq64B & BIT_ULL(q_no)) {
+			reg_val = octeon_read_csr64(
+			    oct, CN23XX_VF_SLI_IQ_PKT_CONTROL64(q_no));
+			reg_val |= CN23XX_PKT_INPUT_CTL_IS_64B;
+			octeon_write_csr64(
+			    oct, CN23XX_VF_SLI_IQ_PKT_CONTROL64(q_no), reg_val);
+		}
+
+		/* set the corresponding IQ ENB bit */
+		if (oct->io_qmask.iq & BIT_ULL(q_no)) {
+			reg_val = octeon_read_csr64(
+			    oct, CN23XX_VF_SLI_IQ_PKT_CONTROL64(q_no));
+			reg_val |= CN23XX_PKT_INPUT_CTL_RING_ENB;
+			octeon_write_csr64(
+			    oct, CN23XX_VF_SLI_IQ_PKT_CONTROL64(q_no), reg_val);
+		}
+	}
+	for (q_no = 0; q_no < oct->num_oqs; q_no++) {
+		u32 reg_val;
+
+		/* set the corresponding OQ ENB bit */
+		if (oct->io_qmask.oq & BIT_ULL(q_no)) {
+			reg_val = octeon_read_csr(
+			    oct, CN23XX_VF_SLI_OQ_PKT_CONTROL(q_no));
+			reg_val |= CN23XX_PKT_OUTPUT_CTL_RING_ENB;
+			octeon_write_csr(
+			    oct, CN23XX_VF_SLI_OQ_PKT_CONTROL(q_no), reg_val);
+		}
+	}
+
+	return 0;
+}
+
+static void cn23xx_disable_vf_io_queues(struct octeon_device *oct)
+{
+	u32 num_queues = oct->num_iqs;
+
+	/* per HRM, rings can only be disabled via reset operation,
+	 * NOT via SLI_PKT()_INPUT/OUTPUT_CONTROL[ENB]
+	 */
+	if (num_queues < oct->num_oqs)
+		num_queues = oct->num_oqs;
+
+	cn23xx_vf_reset_io_queues(oct, num_queues);
+}
+
+void cn23xx_vf_ask_pf_to_do_flr(struct octeon_device *oct)
+{
+	struct octeon_mbox_cmd mbox_cmd;
+
+	mbox_cmd.msg.u64 = 0;
+	mbox_cmd.msg.s.type = OCTEON_MBOX_REQUEST;
+	mbox_cmd.msg.s.resp_needed = 0;
+	mbox_cmd.msg.s.cmd = OCTEON_VF_FLR_REQUEST;
+	mbox_cmd.msg.s.len = 1;
+	mbox_cmd.q_no = 0;
+	mbox_cmd.recv_len = 0;
+	mbox_cmd.recv_status = 0;
+	mbox_cmd.fn = NULL;
+	mbox_cmd.fn_arg = 0;
+
+	octeon_mbox_write(oct, &mbox_cmd);
+}
+
+static void octeon_pfvf_hs_callback(struct octeon_device *oct,
+				    struct octeon_mbox_cmd *cmd,
+				    void *arg)
+{
+	u32 major = 0;
+
+	memcpy((uint8_t *)&oct->pfvf_hsword, cmd->msg.s.params,
+	       CN23XX_MAILBOX_MSGPARAM_SIZE);
+	if (cmd->recv_len > 1)  {
+		major = ((struct lio_version *)(cmd->data))->major;
+		major = major << 16;
+	}
+
+	atomic_set((atomic_t *)arg, major | 1);
+}
+
+int cn23xx_octeon_pfvf_handshake(struct octeon_device *oct)
+{
+	struct octeon_mbox_cmd mbox_cmd;
+	u32 q_no, count = 0;
+	atomic_t status;
+	u32 pfmajor;
+	u32 vfmajor;
+	u32 ret;
+
+	/* Sending VF_ACTIVE indication to the PF driver */
+	dev_dbg(&oct->pci_dev->dev, "requesting info from pf\n");
+
+	mbox_cmd.msg.u64 = 0;
+	mbox_cmd.msg.s.type = OCTEON_MBOX_REQUEST;
+	mbox_cmd.msg.s.resp_needed = 1;
+	mbox_cmd.msg.s.cmd = OCTEON_VF_ACTIVE;
+	mbox_cmd.msg.s.len = 2;
+	mbox_cmd.data[0] = 0;
+	((struct lio_version *)&mbox_cmd.data[0])->major =
+						LIQUIDIO_BASE_MAJOR_VERSION;
+	((struct lio_version *)&mbox_cmd.data[0])->minor =
+						LIQUIDIO_BASE_MINOR_VERSION;
+	((struct lio_version *)&mbox_cmd.data[0])->micro =
+						LIQUIDIO_BASE_MICRO_VERSION;
+	mbox_cmd.q_no = 0;
+	mbox_cmd.recv_len = 0;
+	mbox_cmd.recv_status = 0;
+	mbox_cmd.fn = (octeon_mbox_callback_t)octeon_pfvf_hs_callback;
+	mbox_cmd.fn_arg = &status;
+
+	/* Interrupts are not enabled at this point.
+	 * Enable them with default oq ticks
+	 */
+	oct->fn_list.enable_interrupt(oct, OCTEON_ALL_INTR);
+
+	octeon_mbox_write(oct, &mbox_cmd);
+
+	atomic_set(&status, 0);
+
+	do {
+		schedule_timeout_uninterruptible(1);
+	} while ((!atomic_read(&status)) && (count++ < 100000));
+
+	/* Disable the interrupt so that the interrupsts will be reenabled
+	 * with the oq ticks received from the PF
+	 */
+	oct->fn_list.disable_interrupt(oct, OCTEON_ALL_INTR);
+
+	ret = atomic_read(&status);
+	if (!ret) {
+		dev_err(&oct->pci_dev->dev, "octeon_pfvf_handshake timeout\n");
+		return 1;
+	}
+
+	for (q_no = 0 ; q_no < oct->num_iqs ; q_no++)
+		oct->instr_queue[q_no]->txpciq.s.pkind = oct->pfvf_hsword.pkind;
+
+	vfmajor = LIQUIDIO_BASE_MAJOR_VERSION;
+	pfmajor = ret >> 16;
+	if (pfmajor != vfmajor) {
+		dev_err(&oct->pci_dev->dev,
+			"VF Liquidio driver (major version %d) is not compatible with Liquidio PF driver (major version %d)\n",
+			vfmajor, pfmajor);
+		return 1;
+	}
+
+	dev_dbg(&oct->pci_dev->dev,
+		"VF Liquidio driver (major version %d), Liquidio PF driver (major version %d)\n",
+		vfmajor, pfmajor);
+
+	dev_dbg(&oct->pci_dev->dev, "got data from pf pkind is %d\n",
+		oct->pfvf_hsword.pkind);
+
+	return 0;
+}
+
+static void cn23xx_handle_vf_mbox_intr(struct octeon_ioq_vector *ioq_vector)
+{
+	struct octeon_device *oct = ioq_vector->oct_dev;
+	u64 mbox_int_val;
+
+	if (!ioq_vector->droq_index) {
+		/* read and clear by writing 1 */
+		mbox_int_val = readq(oct->mbox[0]->mbox_int_reg);
+		writeq(mbox_int_val, oct->mbox[0]->mbox_int_reg);
+		if (octeon_mbox_read(oct->mbox[0]))
+			schedule_delayed_work(&oct->mbox[0]->mbox_poll_wk.work,
+					      msecs_to_jiffies(0));
+	}
+}
+
+static u64 cn23xx_vf_msix_interrupt_handler(void *dev)
+{
+	struct octeon_ioq_vector *ioq_vector = (struct octeon_ioq_vector *)dev;
+	struct octeon_device *oct = ioq_vector->oct_dev;
+	struct octeon_droq *droq = oct->droq[ioq_vector->droq_index];
+	u64 pkts_sent;
+	u64 ret = 0;
+
+	dev_dbg(&oct->pci_dev->dev, "In %s octeon_dev @ %p\n", __func__, oct);
+	pkts_sent = readq(droq->pkts_sent_reg);
+
+	/* If our device has interrupted, then proceed. Also check
+	 * for all f's if interrupt was triggered on an error
+	 * and the PCI read fails.
+	 */
+	if (!pkts_sent || (pkts_sent == 0xFFFFFFFFFFFFFFFFULL))
+		return ret;
+
+	/* Write count reg in sli_pkt_cnts to clear these int. */
+	if ((pkts_sent & CN23XX_INTR_PO_INT) ||
+	    (pkts_sent & CN23XX_INTR_PI_INT)) {
+		if (pkts_sent & CN23XX_INTR_PO_INT)
+			ret |= MSIX_PO_INT;
+	}
+
+	if (pkts_sent & CN23XX_INTR_PI_INT)
+		/* We will clear the count when we update the read_index. */
+		ret |= MSIX_PI_INT;
+
+	if (pkts_sent & CN23XX_INTR_MBOX_INT) {
+		cn23xx_handle_vf_mbox_intr(ioq_vector);
+		ret |= MSIX_MBOX_INT;
+	}
+
+	return ret;
+}
+
+static u32 cn23xx_update_read_index(struct octeon_instr_queue *iq)
+{
+	u32 pkt_in_done = readl(iq->inst_cnt_reg);
+	u32 last_done;
+	u32 new_idx;
+
+	last_done = pkt_in_done - iq->pkt_in_done;
+	iq->pkt_in_done = pkt_in_done;
+
+	/* Modulo of the new index with the IQ size will give us
+	 * the new index.  The iq->reset_instr_cnt is always zero for
+	 * cn23xx, so no extra adjustments are needed.
+	 */
+	new_idx = (iq->octeon_read_index +
+		   (u32)(last_done & CN23XX_PKT_IN_DONE_CNT_MASK)) %
+		  iq->max_count;
+
+	return new_idx;
+}
+
+static void cn23xx_enable_vf_interrupt(struct octeon_device *oct, u8 intr_flag)
+{
+	struct octeon_cn23xx_vf *cn23xx = (struct octeon_cn23xx_vf *)oct->chip;
+	u32 q_no, time_threshold;
+
+	if (intr_flag & OCTEON_OUTPUT_INTR) {
+		for (q_no = 0; q_no < oct->num_oqs; q_no++) {
+			/* Set up interrupt packet and time thresholds
+			 * for all the OQs
+			 */
+			time_threshold = cn23xx_vf_get_oq_ticks(
+				oct, (u32)CFG_GET_OQ_INTR_TIME(cn23xx->conf));
+
+			octeon_write_csr64(
+			    oct, CN23XX_VF_SLI_OQ_PKT_INT_LEVELS(q_no),
+			    (CFG_GET_OQ_INTR_PKT(cn23xx->conf) |
+			     ((u64)time_threshold << 32)));
+		}
+	}
+
+	if (intr_flag & OCTEON_INPUT_INTR) {
+		for (q_no = 0; q_no < oct->num_oqs; q_no++) {
+			/* Set CINT_ENB to enable IQ interrupt */
+			octeon_write_csr64(
+			    oct, CN23XX_VF_SLI_IQ_INSTR_COUNT64(q_no),
+			    ((octeon_read_csr64(
+				  oct, CN23XX_VF_SLI_IQ_INSTR_COUNT64(q_no)) &
+			      ~CN23XX_PKT_IN_DONE_CNT_MASK) |
+			     CN23XX_INTR_CINT_ENB));
+		}
+	}
+
+	/* Set queue-0 MBOX_ENB to enable VF mailbox interrupt */
+	if (intr_flag & OCTEON_MBOX_INTR) {
+		octeon_write_csr64(
+		    oct, CN23XX_VF_SLI_PKT_MBOX_INT(0),
+		    (octeon_read_csr64(oct, CN23XX_VF_SLI_PKT_MBOX_INT(0)) |
+		     CN23XX_INTR_MBOX_ENB));
+	}
+}
+
+static void cn23xx_disable_vf_interrupt(struct octeon_device *oct, u8 intr_flag)
+{
+	u32 q_no;
+
+	if (intr_flag & OCTEON_OUTPUT_INTR) {
+		for (q_no = 0; q_no < oct->num_oqs; q_no++) {
+			/* Write all 1's in INT_LEVEL reg to disable PO_INT */
+			octeon_write_csr64(
+			    oct, CN23XX_VF_SLI_OQ_PKT_INT_LEVELS(q_no),
+			    0x3fffffffffffff);
+		}
+	}
+	if (intr_flag & OCTEON_INPUT_INTR) {
+		for (q_no = 0; q_no < oct->num_oqs; q_no++) {
+			octeon_write_csr64(
+			    oct, CN23XX_VF_SLI_IQ_INSTR_COUNT64(q_no),
+			    (octeon_read_csr64(
+				 oct, CN23XX_VF_SLI_IQ_INSTR_COUNT64(q_no)) &
+			     ~(CN23XX_INTR_CINT_ENB |
+			       CN23XX_PKT_IN_DONE_CNT_MASK)));
+		}
+	}
+
+	if (intr_flag & OCTEON_MBOX_INTR) {
+		octeon_write_csr64(
+		    oct, CN23XX_VF_SLI_PKT_MBOX_INT(0),
+		    (octeon_read_csr64(oct, CN23XX_VF_SLI_PKT_MBOX_INT(0)) &
+		     ~CN23XX_INTR_MBOX_ENB));
+	}
+}
+
+int cn23xx_setup_octeon_vf_device(struct octeon_device *oct)
+{
+	struct octeon_cn23xx_vf *cn23xx = (struct octeon_cn23xx_vf *)oct->chip;
+	u32 rings_per_vf, ring_flag;
+	u64 reg_val;
+
+	if (octeon_map_pci_barx(oct, 0, 0))
+		return 1;
+
+	/* INPUT_CONTROL[RPVF] gives the VF IOq count */
+	reg_val = octeon_read_csr64(oct, CN23XX_VF_SLI_IQ_PKT_CONTROL64(0));
+
+	oct->pf_num = (reg_val >> CN23XX_PKT_INPUT_CTL_PF_NUM_POS) &
+		      CN23XX_PKT_INPUT_CTL_PF_NUM_MASK;
+	oct->vf_num = (reg_val >> CN23XX_PKT_INPUT_CTL_VF_NUM_POS) &
+		      CN23XX_PKT_INPUT_CTL_VF_NUM_MASK;
+
+	reg_val = reg_val >> CN23XX_PKT_INPUT_CTL_RPVF_POS;
+
+	rings_per_vf = reg_val & CN23XX_PKT_INPUT_CTL_RPVF_MASK;
+
+	ring_flag = 0;
+
+	cn23xx->conf  = oct_get_config_info(oct, LIO_23XX);
+	if (!cn23xx->conf) {
+		dev_err(&oct->pci_dev->dev, "%s No Config found for CN23XX\n",
+			__func__);
+		octeon_unmap_pci_barx(oct, 0);
+		return 1;
+	}
+
+	if (oct->sriov_info.rings_per_vf > rings_per_vf) {
+		dev_warn(&oct->pci_dev->dev,
+			 "num_queues:%d greater than PF configured rings_per_vf:%d. Reducing to %d.\n",
+			 oct->sriov_info.rings_per_vf, rings_per_vf,
+			 rings_per_vf);
+		oct->sriov_info.rings_per_vf = rings_per_vf;
+	} else {
+		if (rings_per_vf > num_present_cpus()) {
+			dev_warn(&oct->pci_dev->dev,
+				 "PF configured rings_per_vf:%d greater than num_cpu:%d. Using rings_per_vf:%d equal to num cpus\n",
+				 rings_per_vf,
+				 num_present_cpus(),
+				 num_present_cpus());
+			oct->sriov_info.rings_per_vf =
+				num_present_cpus();
+		} else {
+			oct->sriov_info.rings_per_vf = rings_per_vf;
+		}
+	}
+
+	oct->fn_list.setup_iq_regs = cn23xx_setup_vf_iq_regs;
+	oct->fn_list.setup_oq_regs = cn23xx_setup_vf_oq_regs;
+	oct->fn_list.setup_mbox = cn23xx_setup_vf_mbox;
+	oct->fn_list.free_mbox = cn23xx_free_vf_mbox;
+
+	oct->fn_list.msix_interrupt_handler = cn23xx_vf_msix_interrupt_handler;
+
+	oct->fn_list.setup_device_regs = cn23xx_setup_vf_device_regs;
+	oct->fn_list.update_iq_read_idx = cn23xx_update_read_index;
+
+	oct->fn_list.enable_interrupt = cn23xx_enable_vf_interrupt;
+	oct->fn_list.disable_interrupt = cn23xx_disable_vf_interrupt;
+
+	oct->fn_list.enable_io_queues = cn23xx_enable_vf_io_queues;
+	oct->fn_list.disable_io_queues = cn23xx_disable_vf_io_queues;
+
+	return 0;
+}
+
+void cn23xx_dump_vf_iq_regs(struct octeon_device *oct)
+{
+	u32 regval, q_no;
+
+	dev_dbg(&oct->pci_dev->dev, "SLI_IQ_DOORBELL_0 [0x%x]: 0x%016llx\n",
+		CN23XX_VF_SLI_IQ_DOORBELL(0),
+		CVM_CAST64(octeon_read_csr64(
+					oct, CN23XX_VF_SLI_IQ_DOORBELL(0))));
+
+	dev_dbg(&oct->pci_dev->dev, "SLI_IQ_BASEADDR_0 [0x%x]: 0x%016llx\n",
+		CN23XX_VF_SLI_IQ_BASE_ADDR64(0),
+		CVM_CAST64(octeon_read_csr64(
+			oct, CN23XX_VF_SLI_IQ_BASE_ADDR64(0))));
+
+	dev_dbg(&oct->pci_dev->dev, "SLI_IQ_FIFO_RSIZE_0 [0x%x]: 0x%016llx\n",
+		CN23XX_VF_SLI_IQ_SIZE(0),
+		CVM_CAST64(octeon_read_csr64(oct, CN23XX_VF_SLI_IQ_SIZE(0))));
+
+	for (q_no = 0; q_no < oct->sriov_info.rings_per_vf; q_no++) {
+		dev_dbg(&oct->pci_dev->dev, "SLI_PKT[%d]_INPUT_CTL [0x%x]: 0x%016llx\n",
+			q_no, CN23XX_VF_SLI_IQ_PKT_CONTROL64(q_no),
+			CVM_CAST64(octeon_read_csr64(
+				oct, CN23XX_VF_SLI_IQ_PKT_CONTROL64(q_no))));
+	}
+
+	pci_read_config_dword(oct->pci_dev, CN23XX_CONFIG_PCIE_DEVCTL, &regval);
+	dev_dbg(&oct->pci_dev->dev, "Config DevCtl [0x%x]: 0x%08x\n",
+		CN23XX_CONFIG_PCIE_DEVCTL, regval);
+}
diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.h b/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.h
new file mode 100644
index 000000000000..3f98c7334957
--- /dev/null
+++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_device.h
@@ -0,0 +1,50 @@
+/**********************************************************************
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
+/*! \file  cn23xx_device.h
+ * \brief Host Driver: Routines that perform CN23XX specific operations.
+ */
+
+#ifndef __CN23XX_VF_DEVICE_H__
+#define __CN23XX_VF_DEVICE_H__
+
+#include "cn23xx_vf_regs.h"
+
+/* Register address and configuration for a CN23XX devices.
+ * If device specific changes need to be made then add a struct to include
+ * device specific fields as shown in the commented section
+ */
+struct octeon_cn23xx_vf {
+	struct octeon_config *conf;
+};
+
+#define BUSY_READING_REG_VF_LOOP_COUNT		10000
+
+#define CN23XX_MAILBOX_MSGPARAM_SIZE		6
+
+#define MAX_VF_IP_OP_PENDING_PKT_COUNT		100
+
+void cn23xx_vf_ask_pf_to_do_flr(struct octeon_device *oct);
+
+int cn23xx_octeon_pfvf_handshake(struct octeon_device *oct);
+
+int cn23xx_setup_octeon_vf_device(struct octeon_device *oct);
+
+u32 cn23xx_vf_get_oq_ticks(struct octeon_device *oct, u32 time_intr_in_us);
+
+void cn23xx_dump_vf_initialized_regs(struct octeon_device *oct);
+#endif
diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_regs.h b/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_regs.h
new file mode 100644
index 000000000000..d33dd8f4226f
--- /dev/null
+++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_vf_regs.h
@@ -0,0 +1,274 @@
+/**********************************************************************
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
+/*! \file cn23xx_vf_regs.h
+ * \brief Host Driver: Register Address and Register Mask values for
+ * Octeon CN23XX vf functions.
+ */
+
+#ifndef __CN23XX_VF_REGS_H__
+#define __CN23XX_VF_REGS_H__
+
+#define     CN23XX_CONFIG_XPANSION_BAR             0x38
+
+#define     CN23XX_CONFIG_PCIE_CAP                 0x70
+#define     CN23XX_CONFIG_PCIE_DEVCAP              0x74
+#define     CN23XX_CONFIG_PCIE_DEVCTL              0x78
+#define     CN23XX_CONFIG_PCIE_LINKCAP             0x7C
+#define     CN23XX_CONFIG_PCIE_LINKCTL             0x80
+#define     CN23XX_CONFIG_PCIE_SLOTCAP             0x84
+#define     CN23XX_CONFIG_PCIE_SLOTCTL             0x88
+
+#define     CN23XX_CONFIG_PCIE_FLTMSK              0x720
+
+/* The input jabber is used to determine the TSO max size.
+ * Due to H/W limitation, this need to be reduced to 60000
+ * in order to to H/W TSO and avoid the WQE malfarmation
+ * PKO_BUG_24989_WQE_LEN
+ */
+#define    CN23XX_DEFAULT_INPUT_JABBER             0xEA60 /*60000*/
+
+/* ##############  BAR0 Registers ################ */
+
+/* Each Input Queue register is at a 16-byte Offset in BAR0 */
+#define    CN23XX_VF_IQ_OFFSET                     0x20000
+
+/*###################### REQUEST QUEUE #########################*/
+
+/* 64 registers for Input Queue Instr Count - SLI_PKT_IN_DONE0_CNTS */
+#define    CN23XX_VF_SLI_IQ_INSTR_COUNT_START64     0x10040
+
+/* 64 registers for Input Queues Start Addr - SLI_PKT0_INSTR_BADDR */
+#define    CN23XX_VF_SLI_IQ_BASE_ADDR_START64       0x10010
+
+/* 64 registers for Input Doorbell - SLI_PKT0_INSTR_BAOFF_DBELL */
+#define    CN23XX_VF_SLI_IQ_DOORBELL_START          0x10020
+
+/* 64 registers for Input Queue size - SLI_PKT0_INSTR_FIFO_RSIZE */
+#define    CN23XX_VF_SLI_IQ_SIZE_START              0x10030
+
+/* 64 registers (64-bit) - ES, RO, NS, Arbitration for Input Queue Data &
+ * gather list fetches. SLI_PKT(0..63)_INPUT_CONTROL.
+ */
+#define    CN23XX_VF_SLI_IQ_PKT_CONTROL_START64     0x10000
+
+/*------- Request Queue Macros ---------*/
+#define CN23XX_VF_SLI_IQ_PKT_CONTROL64(iq)		\
+	(CN23XX_VF_SLI_IQ_PKT_CONTROL_START64 + ((iq) * CN23XX_VF_IQ_OFFSET))
+
+#define CN23XX_VF_SLI_IQ_BASE_ADDR64(iq)		\
+	(CN23XX_VF_SLI_IQ_BASE_ADDR_START64 + ((iq) * CN23XX_VF_IQ_OFFSET))
+
+#define CN23XX_VF_SLI_IQ_SIZE(iq)			\
+	(CN23XX_VF_SLI_IQ_SIZE_START + ((iq) * CN23XX_VF_IQ_OFFSET))
+
+#define CN23XX_VF_SLI_IQ_DOORBELL(iq)			\
+	(CN23XX_VF_SLI_IQ_DOORBELL_START + ((iq) * CN23XX_VF_IQ_OFFSET))
+
+#define CN23XX_VF_SLI_IQ_INSTR_COUNT64(iq)		\
+	(CN23XX_VF_SLI_IQ_INSTR_COUNT_START64 + ((iq) * CN23XX_VF_IQ_OFFSET))
+
+/*------------------ Masks ----------------*/
+#define    CN23XX_PKT_INPUT_CTL_VF_NUM                  BIT_ULL(32)
+#define    CN23XX_PKT_INPUT_CTL_MAC_NUM                 BIT(29)
+/* Number of instructions to be read in one MAC read request.
+ * setting to Max value(4)
+ */
+#define    CN23XX_PKT_INPUT_CTL_RDSIZE                  (3 << 25)
+#define    CN23XX_PKT_INPUT_CTL_IS_64B                  BIT(24)
+#define    CN23XX_PKT_INPUT_CTL_RST                     BIT(23)
+#define    CN23XX_PKT_INPUT_CTL_QUIET                   BIT(28)
+#define    CN23XX_PKT_INPUT_CTL_RING_ENB                BIT(22)
+#define    CN23XX_PKT_INPUT_CTL_DATA_NS                 BIT(8)
+#define    CN23XX_PKT_INPUT_CTL_DATA_ES_64B_SWAP        BIT(6)
+#define    CN23XX_PKT_INPUT_CTL_DATA_RO                 BIT(5)
+#define    CN23XX_PKT_INPUT_CTL_USE_CSR                 BIT(4)
+#define    CN23XX_PKT_INPUT_CTL_GATHER_NS               BIT(3)
+#define    CN23XX_PKT_INPUT_CTL_GATHER_ES_64B_SWAP      (2)
+#define    CN23XX_PKT_INPUT_CTL_GATHER_RO               (1)
+
+/** Rings per Virtual Function [RO] **/
+#define    CN23XX_PKT_INPUT_CTL_RPVF_MASK               (0x3F)
+#define    CN23XX_PKT_INPUT_CTL_RPVF_POS                (48)
+/* These bits[47:44][RO] give the Physical function number info within the MAC*/
+#define    CN23XX_PKT_INPUT_CTL_PF_NUM_MASK             (0x7)
+#define    CN23XX_PKT_INPUT_CTL_PF_NUM_POS              (45)
+/** These bits[43:32][RO] give the virtual function number info within the PF*/
+#define    CN23XX_PKT_INPUT_CTL_VF_NUM_MASK             (0x1FFF)
+#define    CN23XX_PKT_INPUT_CTL_VF_NUM_POS              (32)
+#define    CN23XX_PKT_INPUT_CTL_MAC_NUM_MASK            (0x3)
+#define    CN23XX_PKT_INPUT_CTL_MAC_NUM_POS             (29)
+#define    CN23XX_PKT_IN_DONE_WMARK_MASK                (0xFFFFULL)
+#define    CN23XX_PKT_IN_DONE_WMARK_BIT_POS             (32)
+#define    CN23XX_PKT_IN_DONE_CNT_MASK                  (0x00000000FFFFFFFFULL)
+
+#ifdef __LITTLE_ENDIAN_BITFIELD
+#define CN23XX_PKT_INPUT_CTL_MASK			\
+	(CN23XX_PKT_INPUT_CTL_RDSIZE			\
+	 | CN23XX_PKT_INPUT_CTL_DATA_ES_64B_SWAP	\
+	 | CN23XX_PKT_INPUT_CTL_USE_CSR)
+#else
+#define CN23XX_PKT_INPUT_CTL_MASK			\
+	(CN23XX_PKT_INPUT_CTL_RDSIZE			\
+	 | CN23XX_PKT_INPUT_CTL_DATA_ES_64B_SWAP	\
+	 | CN23XX_PKT_INPUT_CTL_USE_CSR			\
+	 | CN23XX_PKT_INPUT_CTL_GATHER_ES_64B_SWAP)
+#endif
+
+/** Masks for SLI_PKT_IN_DONE(0..63)_CNTS Register */
+#define    CN23XX_IN_DONE_CNTS_PI_INT               BIT_ULL(62)
+#define    CN23XX_IN_DONE_CNTS_CINT_ENB             BIT_ULL(48)
+
+/*############################ OUTPUT QUEUE #########################*/
+
+/* 64 registers for Output queue control - SLI_PKT(0..63)_OUTPUT_CONTROL */
+#define    CN23XX_VF_SLI_OQ_PKT_CONTROL_START       0x10050
+
+/* 64 registers for Output queue buffer and info size - SLI_PKT0_OUT_SIZE */
+#define    CN23XX_VF_SLI_OQ0_BUFF_INFO_SIZE         0x10060
+
+/* 64 registers for Output Queue Start Addr - SLI_PKT0_SLIST_BADDR */
+#define    CN23XX_VF_SLI_OQ_BASE_ADDR_START64       0x10070
+
+/* 64 registers for Output Queue Packet Credits - SLI_PKT0_SLIST_BAOFF_DBELL */
+#define    CN23XX_VF_SLI_OQ_PKT_CREDITS_START       0x10080
+
+/* 64 registers for Output Queue size - SLI_PKT0_SLIST_FIFO_RSIZE */
+#define    CN23XX_VF_SLI_OQ_SIZE_START              0x10090
+
+/* 64 registers for Output Queue Packet Count - SLI_PKT0_CNTS */
+#define    CN23XX_VF_SLI_OQ_PKT_SENT_START          0x100B0
+
+/* 64 registers for Output Queue INT Levels - SLI_PKT0_INT_LEVELS */
+#define    CN23XX_VF_SLI_OQ_PKT_INT_LEVELS_START64  0x100A0
+
+/* Each Output Queue register is at a 16-byte Offset in BAR0 */
+#define    CN23XX_VF_OQ_OFFSET                      0x20000
+
+/*------- Output Queue Macros ---------*/
+
+#define CN23XX_VF_SLI_OQ_PKT_CONTROL(oq)		\
+	(CN23XX_VF_SLI_OQ_PKT_CONTROL_START + ((oq) * CN23XX_VF_OQ_OFFSET))
+
+#define CN23XX_VF_SLI_OQ_BASE_ADDR64(oq)		\
+	(CN23XX_VF_SLI_OQ_BASE_ADDR_START64 + ((oq) * CN23XX_VF_OQ_OFFSET))
+
+#define CN23XX_VF_SLI_OQ_SIZE(oq)			\
+	(CN23XX_VF_SLI_OQ_SIZE_START + ((oq) * CN23XX_VF_OQ_OFFSET))
+
+#define CN23XX_VF_SLI_OQ_BUFF_INFO_SIZE(oq)		\
+	(CN23XX_VF_SLI_OQ0_BUFF_INFO_SIZE + ((oq) * CN23XX_VF_OQ_OFFSET))
+
+#define CN23XX_VF_SLI_OQ_PKTS_SENT(oq)		\
+	(CN23XX_VF_SLI_OQ_PKT_SENT_START + ((oq) * CN23XX_VF_OQ_OFFSET))
+
+#define CN23XX_VF_SLI_OQ_PKTS_CREDIT(oq)		\
+	(CN23XX_VF_SLI_OQ_PKT_CREDITS_START + ((oq) * CN23XX_VF_OQ_OFFSET))
+
+#define CN23XX_VF_SLI_OQ_PKT_INT_LEVELS(oq)		\
+	(CN23XX_VF_SLI_OQ_PKT_INT_LEVELS_START64 + ((oq) * CN23XX_VF_OQ_OFFSET))
+
+/* Macro's for accessing CNT and TIME separately from INT_LEVELS */
+#define CN23XX_VF_SLI_OQ_PKT_INT_LEVELS_CNT(oq)	\
+	(CN23XX_VF_SLI_OQ_PKT_INT_LEVELS_START64 + ((oq) * CN23XX_VF_OQ_OFFSET))
+
+#define CN23XX_VF_SLI_OQ_PKT_INT_LEVELS_TIME(oq)	\
+	(CN23XX_VF_SLI_OQ_PKT_INT_LEVELS_START64 +	\
+	 ((oq) * CN23XX_VF_OQ_OFFSET) + 4)
+
+/*------------------ Masks ----------------*/
+#define    CN23XX_PKT_OUTPUT_CTL_TENB                  BIT(13)
+#define    CN23XX_PKT_OUTPUT_CTL_CENB                  BIT(12)
+#define    CN23XX_PKT_OUTPUT_CTL_IPTR                  BIT(11)
+#define    CN23XX_PKT_OUTPUT_CTL_ES                    BIT(9)
+#define    CN23XX_PKT_OUTPUT_CTL_NSR                   BIT(8)
+#define    CN23XX_PKT_OUTPUT_CTL_ROR                   BIT(7)
+#define    CN23XX_PKT_OUTPUT_CTL_DPTR                  BIT(6)
+#define    CN23XX_PKT_OUTPUT_CTL_BMODE                 BIT(5)
+#define    CN23XX_PKT_OUTPUT_CTL_ES_P                  BIT(3)
+#define    CN23XX_PKT_OUTPUT_CTL_NSR_P                 BIT(2)
+#define    CN23XX_PKT_OUTPUT_CTL_ROR_P                 BIT(1)
+#define    CN23XX_PKT_OUTPUT_CTL_RING_ENB              BIT(0)
+
+/*######################### Mailbox Reg Macros ########################*/
+#define    CN23XX_VF_SLI_PKT_MBOX_INT_START            0x10210
+#define    CN23XX_SLI_PKT_PF_VF_MBOX_SIG_START         0x10200
+
+#define    CN23XX_SLI_MBOX_OFFSET                      0x20000
+#define    CN23XX_SLI_MBOX_SIG_IDX_OFFSET              0x8
+
+#define CN23XX_VF_SLI_PKT_MBOX_INT(q)	\
+	(CN23XX_VF_SLI_PKT_MBOX_INT_START + ((q) * CN23XX_SLI_MBOX_OFFSET))
+
+#define CN23XX_SLI_PKT_PF_VF_MBOX_SIG(q, idx)		\
+	(CN23XX_SLI_PKT_PF_VF_MBOX_SIG_START +		\
+	 ((q) * CN23XX_SLI_MBOX_OFFSET +		\
+	  (idx) * CN23XX_SLI_MBOX_SIG_IDX_OFFSET))
+
+/*######################## INTERRUPTS #########################*/
+
+#define    CN23XX_VF_SLI_INT_SUM_START		  0x100D0
+
+#define CN23XX_VF_SLI_INT_SUM(q)			\
+	(CN23XX_VF_SLI_INT_SUM_START + ((q) * CN23XX_VF_IQ_OFFSET))
+
+/*------------------ Interrupt Masks ----------------*/
+
+#define    CN23XX_INTR_PO_INT                   BIT_ULL(63)
+#define    CN23XX_INTR_PI_INT                   BIT_ULL(62)
+#define    CN23XX_INTR_MBOX_INT                 BIT_ULL(61)
+#define    CN23XX_INTR_RESEND                   BIT_ULL(60)
+
+#define    CN23XX_INTR_CINT_ENB                 BIT_ULL(48)
+#define    CN23XX_INTR_MBOX_ENB                 BIT(0)
+
+/*############################ MIO #########################*/
+#define    CN23XX_MIO_PTP_CLOCK_CFG       0x0001070000000f00ULL
+#define    CN23XX_MIO_PTP_CLOCK_LO        0x0001070000000f08ULL
+#define    CN23XX_MIO_PTP_CLOCK_HI        0x0001070000000f10ULL
+#define    CN23XX_MIO_PTP_CLOCK_COMP      0x0001070000000f18ULL
+#define    CN23XX_MIO_PTP_TIMESTAMP       0x0001070000000f20ULL
+#define    CN23XX_MIO_PTP_EVT_CNT         0x0001070000000f28ULL
+#define    CN23XX_MIO_PTP_CKOUT_THRESH_LO 0x0001070000000f30ULL
+#define    CN23XX_MIO_PTP_CKOUT_THRESH_HI 0x0001070000000f38ULL
+#define    CN23XX_MIO_PTP_CKOUT_HI_INCR   0x0001070000000f40ULL
+#define    CN23XX_MIO_PTP_CKOUT_LO_INCR   0x0001070000000f48ULL
+#define    CN23XX_MIO_PTP_PPS_THRESH_LO   0x0001070000000f50ULL
+#define    CN23XX_MIO_PTP_PPS_THRESH_HI   0x0001070000000f58ULL
+#define    CN23XX_MIO_PTP_PPS_HI_INCR     0x0001070000000f60ULL
+#define    CN23XX_MIO_PTP_PPS_LO_INCR     0x0001070000000f68ULL
+
+/*############################ RST #########################*/
+#define    CN23XX_RST_BOOT                0x0001180006001600ULL
+
+/*######################## MSIX TABLE #########################*/
+
+#define    CN23XX_MSIX_TABLE_ADDR_START    0x0
+#define    CN23XX_MSIX_TABLE_DATA_START    0x8
+
+#define    CN23XX_MSIX_TABLE_SIZE          0x10
+#define    CN23XX_MSIX_TABLE_ENTRIES       0x41
+
+#define    CN23XX_MSIX_ENTRY_VECTOR_CTL    BIT_ULL(32)
+
+#define CN23XX_MSIX_TABLE_ADDR(idx)		\
+	(CN23XX_MSIX_TABLE_ADDR_START + ((idx) * CN23XX_MSIX_TABLE_SIZE))
+
+#define CN23XX_MSIX_TABLE_DATA(idx)		\
+	(CN23XX_MSIX_TABLE_DATA_START + ((idx) * CN23XX_MSIX_TABLE_SIZE))
+
+#endif
diff --git a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c
index e779af88621b..bdec051107a6 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c
@@ -1,24 +1,20 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 #include <linux/pci.h>
 #include <linux/netdevice.h>
 #include "liquidio_common.h"
@@ -275,7 +271,6 @@ void lio_cn6xxx_setup_iq_regs(struct octeon_device *oct, u32 iq_no)
 {
 	struct octeon_instr_queue *iq = oct->instr_queue[iq_no];
 
-	/* Disable Packet-by-Packet mode; No Parse Mode or Skip length */
 	octeon_write_csr64(oct, CN6XXX_SLI_IQ_PKT_INSTR_HDR64(iq_no), 0);
 
 	/* Write the start of the input queue's ring and its size  */
@@ -378,7 +373,7 @@ void lio_cn6xxx_disable_io_queues(struct octeon_device *oct)
 
 	/* Reset the doorbell register for each Input queue. */
 	for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) {
-		if (!(oct->io_qmask.iq & (1ULL << i)))
+		if (!(oct->io_qmask.iq & BIT_ULL(i)))
 			continue;
 		octeon_write_csr(oct, CN6XXX_SLI_IQ_DOORBELL(i), 0xFFFFFFFF);
 		d32 = octeon_read_csr(oct, CN6XXX_SLI_IQ_DOORBELL(i));
@@ -400,9 +395,8 @@ void lio_cn6xxx_disable_io_queues(struct octeon_device *oct)
 	;
 
 	/* Reset the doorbell register for each Output queue. */
-	/* for (i = 0; i < oct->num_oqs; i++) { */
 	for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) {
-		if (!(oct->io_qmask.oq & (1ULL << i)))
+		if (!(oct->io_qmask.oq & BIT_ULL(i)))
 			continue;
 		octeon_write_csr(oct, CN6XXX_SLI_OQ_PKTS_CREDIT(i), 0xFFFFFFFF);
 		d32 = octeon_read_csr(oct, CN6XXX_SLI_OQ_PKTS_CREDIT(i));
@@ -537,15 +531,14 @@ static int lio_cn6xxx_process_droq_intr_regs(struct octeon_device *oct)
 
 	oct->droq_intr = 0;
 
-	/* for (oq_no = 0; oq_no < oct->num_oqs; oq_no++) { */
 	for (oq_no = 0; oq_no < MAX_OCTEON_OUTPUT_QUEUES(oct); oq_no++) {
-		if (!(droq_mask & (1ULL << oq_no)))
+		if (!(droq_mask & BIT_ULL(oq_no)))
 			continue;
 
 		droq = oct->droq[oq_no];
 		pkt_count = octeon_droq_check_hw_for_pkts(droq);
 		if (pkt_count) {
-			oct->droq_intr |= (1ULL << oq_no);
+			oct->droq_intr |= BIT_ULL(oq_no);
 			if (droq->ops.poll_mode) {
 				u32 value;
 				u32 reg;
@@ -721,8 +714,6 @@ int lio_setup_cn66xx_octeon_device(struct octeon_device *oct)
 int lio_validate_cn6xxx_config_info(struct octeon_device *oct,
 				    struct octeon_config *conf6xxx)
 {
-	/* int total_instrs = 0; */
-
 	if (CFG_GET_IQ_MAX_Q(conf6xxx) > CN6XXX_MAX_INPUT_QUEUES) {
 		dev_err(&oct->pci_dev->dev, "%s: Num IQ (%d) exceeds Max (%d)\n",
 			__func__, CFG_GET_IQ_MAX_Q(conf6xxx),
diff --git a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h
index a40a91394079..8ed57134ee0c 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h
@@ -1,25 +1,20 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
-
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 /*! \file  cn66xx_device.h
  *  \brief Host Driver: Routines that perform CN66XX specific operations.
  */
@@ -96,8 +91,8 @@ void lio_cn6xxx_setup_reg_address(struct octeon_device *oct, void *chip,
 				  struct octeon_reg_list *reg_list);
 u32 lio_cn6xxx_coprocessor_clock(struct octeon_device *oct);
 u32 lio_cn6xxx_get_oq_ticks(struct octeon_device *oct, u32 time_intr_in_us);
-int lio_setup_cn66xx_octeon_device(struct octeon_device *);
+int lio_setup_cn66xx_octeon_device(struct octeon_device *oct);
 int lio_validate_cn6xxx_config_info(struct octeon_device *oct,
-				    struct octeon_config *);
+				    struct octeon_config *conf6xxx);
 
 #endif
diff --git a/drivers/net/ethernet/cavium/liquidio/cn66xx_regs.h b/drivers/net/ethernet/cavium/liquidio/cn66xx_regs.h
index 5e3aff242ad3..b248966837b4 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn66xx_regs.h
+++ b/drivers/net/ethernet/cavium/liquidio/cn66xx_regs.h
@@ -1,25 +1,20 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
-
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 /*! \file cn66xx_regs.h
  *  \brief Host Driver: Register Address and Register Mask values for
  *  Octeon CN66XX devices.
@@ -443,10 +438,10 @@
 #define    CN6XXX_SLI_S2M_PORT0_CTL              0x3D80
 #define    CN6XXX_SLI_S2M_PORT1_CTL              0x3D90
 #define    CN6XXX_SLI_S2M_PORTX_CTL(port)        \
-	(CN6XXX_SLI_S2M_PORT0_CTL + (port * 0x10))
+	(CN6XXX_SLI_S2M_PORT0_CTL + ((port) * 0x10))
 
 #define    CN6XXX_SLI_INT_ENB64(port)            \
-	(CN6XXX_SLI_INT_ENB64_PORT0 + (port * 0x10))
+	(CN6XXX_SLI_INT_ENB64_PORT0 + ((port) * 0x10))
 
 #define    CN6XXX_SLI_MAC_NUMBER                 0x3E00
 
@@ -458,7 +453,7 @@
 #define    CN6XXX_PCI_BAR1_OFFSET                  0x8
 
 #define    CN6XXX_BAR1_REG(idx, port) \
-		(CN6XXX_BAR1_INDEX_START + (port * CN6XXX_PEM_OFFSET) + \
+		(CN6XXX_BAR1_INDEX_START + ((port) * CN6XXX_PEM_OFFSET) + \
 		(CN6XXX_PCI_BAR1_OFFSET * (idx)))
 
 /*############################ DPI #########################*/
@@ -476,17 +471,17 @@
 #define    CN6XXX_DPI_DMA_ENG0_ENB        0x0001df0000000080ULL
 
 #define    CN6XXX_DPI_DMA_ENG_ENB(q_no)   \
-	(CN6XXX_DPI_DMA_ENG0_ENB + (q_no * 8))
+	(CN6XXX_DPI_DMA_ENG0_ENB + ((q_no) * 8))
 
 #define    CN6XXX_DPI_DMA_ENG0_BUF        0x0001df0000000880ULL
 
 #define    CN6XXX_DPI_DMA_ENG_BUF(q_no)   \
-	(CN6XXX_DPI_DMA_ENG0_BUF + (q_no * 8))
+	(CN6XXX_DPI_DMA_ENG0_BUF + ((q_no) * 8))
 
 #define    CN6XXX_DPI_SLI_PRT0_CFG        0x0001df0000000900ULL
 #define    CN6XXX_DPI_SLI_PRT1_CFG        0x0001df0000000908ULL
 #define    CN6XXX_DPI_SLI_PRTX_CFG(port)        \
-	(CN6XXX_DPI_SLI_PRT0_CFG + (port * 0x10))
+	(CN6XXX_DPI_SLI_PRT0_CFG + ((port) * 0x10))
 
 #define    CN6XXX_DPI_DMA_COMMIT_MODE     BIT_ULL(58)
 #define    CN6XXX_DPI_DMA_PKT_HP          BIT_ULL(57)
diff --git a/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c b/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c
index dbf3566ead53..50b533ff58e6 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c
@@ -1,24 +1,20 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 #include <linux/pci.h>
 #include <linux/netdevice.h>
 #include "liquidio_common.h"
@@ -76,7 +72,7 @@ static void lio_cn68xx_setup_pkt_ctl_regs(struct octeon_device *oct)
 	pktctl = octeon_read_csr64(oct, CN6XXX_SLI_PKT_CTL);
 
 	/* 68XX specific */
-	max_oqs = CFG_GET_OQ_MAX_Q(CHIP_FIELD(oct, cn6xxx, conf));
+	max_oqs = CFG_GET_OQ_MAX_Q(CHIP_CONF(oct, cn6xxx));
 	tx_pipe  = octeon_read_csr64(oct, CN68XX_SLI_TX_PIPE);
 	tx_pipe &= 0xffffffffff00ffffULL; /* clear out NUMP field */
 	tx_pipe |= max_oqs << 16; /* put max_oqs in NUMP field */
diff --git a/drivers/net/ethernet/cavium/liquidio/cn68xx_device.h b/drivers/net/ethernet/cavium/liquidio/cn68xx_device.h
index ea7bdcce6044..66b8d6bf5ec4 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn68xx_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/cn68xx_device.h
@@ -1,25 +1,20 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
-
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 /*! \file  cn68xx_device.h
  *  \brief Host Driver: Routines that perform CN68XX specific operations.
  */
diff --git a/drivers/net/ethernet/cavium/liquidio/cn68xx_regs.h b/drivers/net/ethernet/cavium/liquidio/cn68xx_regs.h
index d45a0f4aaf1f..0b742f09e49d 100644
--- a/drivers/net/ethernet/cavium/liquidio/cn68xx_regs.h
+++ b/drivers/net/ethernet/cavium/liquidio/cn68xx_regs.h
@@ -1,25 +1,20 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
-
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 /*! \file cn68xx_regs.h
  *  \brief Host Driver: Register Address and Register Mask values for
  *  Octeon CN68XX devices. The register map for CN66XX is the same
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c
index 201eddb3013a..f629c2fe04a4 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_core.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c
@@ -1,24 +1,20 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 #include <linux/pci.h>
 #include <linux/if_vlan.h>
 #include "liquidio_common.h"
@@ -89,13 +85,6 @@ void octeon_update_tx_completion_counters(void *buf, int reqtype,
 	}
 
 	(*pkts_compl)++;
-/*TODO, Use some other pound define to suggest
- * the fact that iqs are not tied to netdevs
- * and can take traffic from different netdevs
- * hence bql reporting is done per packet
- * than in bulk. Usage of NO_NAPI in txq completion is
- * a little confusing
- */
 	*bytes_compl += skb->len;
 }
 
@@ -264,3 +253,34 @@ void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr)
 			nctrl->ncmd.s.cmd);
 	}
 }
+
+void octeon_pf_changed_vf_macaddr(struct octeon_device *oct, u8 *mac)
+{
+	bool macaddr_changed = false;
+	struct net_device *netdev;
+	struct lio *lio;
+
+	rtnl_lock();
+
+	netdev = oct->props[0].netdev;
+	lio = GET_LIO(netdev);
+
+	lio->linfo.macaddr_is_admin_asgnd = true;
+
+	if (!ether_addr_equal(netdev->dev_addr, mac)) {
+		macaddr_changed = true;
+		ether_addr_copy(netdev->dev_addr, mac);
+		ether_addr_copy(((u8 *)&lio->linfo.hw_addr) + 2, mac);
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, netdev);
+	}
+
+	rtnl_unlock();
+
+	if (macaddr_changed)
+		dev_info(&oct->pci_dev->dev,
+			 "PF changed VF's MAC address to %pM\n", mac);
+
+	/* no need to notify the firmware of the macaddr change because
+	 * the PF did that already
+	 */
+}
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
index f163e0abbeb2..b00c3002360e 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c
@@ -1,24 +1,20 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 #include <linux/netdevice.h>
 #include <linux/net_tstamp.h>
 #include <linux/pci.h>
@@ -33,6 +29,7 @@
 #include "cn66xx_regs.h"
 #include "cn66xx_device.h"
 #include "cn23xx_pf_device.h"
+#include "cn23xx_vf_device.h"
 
 static int octnet_get_link_stats(struct net_device *netdev);
 
@@ -74,9 +71,9 @@ enum {
 	INTERFACE_MODE_MIXED,
 };
 
-#define ARRAY_LENGTH(a) (sizeof(a) / sizeof((a)[0]))
 #define OCT_ETHTOOL_REGDUMP_LEN  4096
 #define OCT_ETHTOOL_REGDUMP_LEN_23XX  (4096 * 11)
+#define OCT_ETHTOOL_REGDUMP_LEN_23XX_VF  (4096 * 2)
 #define OCT_ETHTOOL_REGSVER  1
 
 /* statistics of PF */
@@ -87,9 +84,9 @@ static const char oct_stats_strings[][ETH_GSTRING_LEN] = {
 	"tx_bytes",
 	"rx_errors",	/*jabber_err+l2_err+frame_err */
 	"tx_errors",	/*fw_err_pko+fw_err_link+fw_err_drop */
-	"rx_dropped",   /*st->fromwire.total_rcvd - st->fromwire.fw_total_rcvd
-			*+st->fromwire.dmac_drop + st->fromwire.fw_err_drop
-			*/
+	"rx_dropped",   /*st->fromwire.total_rcvd - st->fromwire.fw_total_rcvd +
+			 *st->fromwire.dmac_drop + st->fromwire.fw_err_drop
+			 */
 	"tx_dropped",
 
 	"tx_total_sent",
@@ -152,6 +149,19 @@ static const char oct_stats_strings[][ETH_GSTRING_LEN] = {
 	"link_state_changes",
 };
 
+/* statistics of VF */
+static const char oct_vf_stats_strings[][ETH_GSTRING_LEN] = {
+	"rx_packets",
+	"tx_packets",
+	"rx_bytes",
+	"tx_bytes",
+	"rx_errors", /* jabber_err + l2_err+frame_err */
+	"tx_errors", /* fw_err_pko + fw_err_link+fw_err_drop */
+	"rx_dropped", /* total_rcvd - fw_total_rcvd + dmac_drop + fw_err_drop */
+	"tx_dropped",
+	"link_state_changes",
+};
+
 /* statistics of host tx queue */
 static const char oct_iq_stats_strings[][ETH_GSTRING_LEN] = {
 	"packets",		/*oct->instr_queue[iq_no]->stats.tx_done*/
@@ -197,25 +207,28 @@ static const char oct_priv_flags_strings[][ETH_GSTRING_LEN] = {
 #define OCTNIC_NCMD_AUTONEG_ON  0x1
 #define OCTNIC_NCMD_PHY_ON      0x2
 
-static int lio_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
+static int lio_get_link_ksettings(struct net_device *netdev,
+				  struct ethtool_link_ksettings *ecmd)
 {
 	struct lio *lio = GET_LIO(netdev);
 	struct octeon_device *oct = lio->oct_dev;
 	struct oct_link_info *linfo;
+	u32 supported, advertising;
 
 	linfo = &lio->linfo;
 
 	if (linfo->link.s.if_mode == INTERFACE_MODE_XAUI ||
 	    linfo->link.s.if_mode == INTERFACE_MODE_RXAUI ||
 	    linfo->link.s.if_mode == INTERFACE_MODE_XFI) {
-		ecmd->port = PORT_FIBRE;
-		ecmd->supported =
-			(SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE |
-			 SUPPORTED_Pause);
-		ecmd->advertising =
-			(ADVERTISED_10000baseT_Full | ADVERTISED_Pause);
-		ecmd->transceiver = XCVR_EXTERNAL;
-		ecmd->autoneg = AUTONEG_DISABLE;
+		ecmd->base.port = PORT_FIBRE;
+		supported = (SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE |
+			     SUPPORTED_Pause);
+		advertising = (ADVERTISED_10000baseT_Full | ADVERTISED_Pause);
+		ethtool_convert_legacy_u32_to_link_mode(
+			ecmd->link_modes.supported, supported);
+		ethtool_convert_legacy_u32_to_link_mode(
+			ecmd->link_modes.advertising, advertising);
+		ecmd->base.autoneg = AUTONEG_DISABLE;
 
 	} else {
 		dev_err(&oct->pci_dev->dev, "Unknown link interface reported %d\n",
@@ -223,11 +236,11 @@ static int lio_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
 	}
 
 	if (linfo->link.s.link_up) {
-		ethtool_cmd_speed_set(ecmd, linfo->link.s.speed);
-		ecmd->duplex = linfo->link.s.duplex;
+		ecmd->base.speed = linfo->link.s.speed;
+		ecmd->base.duplex = linfo->link.s.duplex;
 	} else {
-		ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN);
-		ecmd->duplex = DUPLEX_UNKNOWN;
+		ecmd->base.speed = SPEED_UNKNOWN;
+		ecmd->base.duplex = DUPLEX_UNKNOWN;
 	}
 
 	return 0;
@@ -251,6 +264,23 @@ lio_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
 }
 
 static void
+lio_get_vf_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo)
+{
+	struct octeon_device *oct;
+	struct lio *lio;
+
+	lio = GET_LIO(netdev);
+	oct = lio->oct_dev;
+
+	memset(drvinfo, 0, sizeof(struct ethtool_drvinfo));
+	strcpy(drvinfo->driver, "liquidio_vf");
+	strcpy(drvinfo->version, LIQUIDIO_VERSION);
+	strncpy(drvinfo->fw_version, oct->fw_info.liquidio_firmware_version,
+		ETHTOOL_FWVERS_LEN);
+	strncpy(drvinfo->bus_info, pci_name(oct->pci_dev), 32);
+}
+
+static void
 lio_ethtool_get_channels(struct net_device *dev,
 			 struct ethtool_channels *channel)
 {
@@ -259,14 +289,14 @@ lio_ethtool_get_channels(struct net_device *dev,
 	u32 max_rx = 0, max_tx = 0, tx_count = 0, rx_count = 0;
 
 	if (OCTEON_CN6XXX(oct)) {
-		struct octeon_config *conf6x = CHIP_FIELD(oct, cn6xxx, conf);
+		struct octeon_config *conf6x = CHIP_CONF(oct, cn6xxx);
 
 		max_rx = CFG_GET_OQ_MAX_Q(conf6x);
 		max_tx = CFG_GET_IQ_MAX_Q(conf6x);
 		rx_count = CFG_GET_NUM_RXQS_NIC_IF(conf6x, lio->ifidx);
 		tx_count = CFG_GET_NUM_TXQS_NIC_IF(conf6x, lio->ifidx);
 	} else if (OCTEON_CN23XX_PF(oct)) {
-		struct octeon_config *conf23 = CHIP_FIELD(oct, cn23xx_pf, conf);
+		struct octeon_config *conf23 = CHIP_CONF(oct, cn23xx_pf);
 
 		max_rx = CFG_GET_OQ_MAX_Q(conf23);
 		max_tx = CFG_GET_IQ_MAX_Q(conf23);
@@ -589,14 +619,14 @@ lio_ethtool_get_ringparam(struct net_device *netdev,
 	    rx_pending = 0;
 
 	if (OCTEON_CN6XXX(oct)) {
-		struct octeon_config *conf6x = CHIP_FIELD(oct, cn6xxx, conf);
+		struct octeon_config *conf6x = CHIP_CONF(oct, cn6xxx);
 
 		tx_max_pending = CN6XXX_MAX_IQ_DESCRIPTORS;
 		rx_max_pending = CN6XXX_MAX_OQ_DESCRIPTORS;
 		rx_pending = CFG_GET_NUM_RX_DESCS_NIC_IF(conf6x, lio->ifidx);
 		tx_pending = CFG_GET_NUM_TX_DESCS_NIC_IF(conf6x, lio->ifidx);
 	} else if (OCTEON_CN23XX_PF(oct)) {
-		struct octeon_config *conf23 = CHIP_FIELD(oct, cn23xx_pf, conf);
+		struct octeon_config *conf23 = CHIP_CONF(oct, cn23xx_pf);
 
 		tx_max_pending = CN23XX_MAX_IQ_DESCRIPTORS;
 		rx_max_pending = CN23XX_MAX_OQ_DESCRIPTORS;
@@ -757,9 +787,6 @@ lio_get_ethtool_stats(struct net_device *netdev,
 	/*sum of oct->instr_queue[iq_no]->stats.tx_dropped */
 	data[i++] = CVM_CAST64(netstats->tx_dropped);
 
-	/*data[i++] = CVM_CAST64(stats->multicast); */
-	/*data[i++] = CVM_CAST64(stats->collisions); */
-
 	/* firmware tx stats */
 	/*per_core_stats[cvmx_get_core_num()].link_stats[mdata->from_ifidx].
 	 *fromhost.fw_total_sent
@@ -910,9 +937,8 @@ lio_get_ethtool_stats(struct net_device *netdev,
 	/*lio->link_changes*/
 	data[i++] = CVM_CAST64(lio->link_changes);
 
-	/* TX  -- lio_update_stats(lio); */
 	for (j = 0; j < MAX_OCTEON_INSTR_QUEUES(oct_dev); j++) {
-		if (!(oct_dev->io_qmask.iq & (1ULL << j)))
+		if (!(oct_dev->io_qmask.iq & BIT_ULL(j)))
 			continue;
 		/*packets to network port*/
 		/*# of packets tx to network */
@@ -954,9 +980,8 @@ lio_get_ethtool_stats(struct net_device *netdev,
 	}
 
 	/* RX */
-	/* for (j = 0; j < oct_dev->num_oqs; j++) { */
 	for (j = 0; j < MAX_OCTEON_OUTPUT_QUEUES(oct_dev); j++) {
-		if (!(oct_dev->io_qmask.oq & (1ULL << j)))
+		if (!(oct_dev->io_qmask.oq & BIT_ULL(j)))
 			continue;
 
 		/*packets send to TCP/IP network stack */
@@ -992,6 +1017,109 @@ lio_get_ethtool_stats(struct net_device *netdev,
 	}
 }
 
+static void lio_vf_get_ethtool_stats(struct net_device *netdev,
+				     struct ethtool_stats *stats
+				     __attribute__((unused)),
+				     u64 *data)
+{
+	struct net_device_stats *netstats = &netdev->stats;
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct_dev = lio->oct_dev;
+	int i = 0, j, vj;
+
+	netdev->netdev_ops->ndo_get_stats(netdev);
+	/* sum of oct->droq[oq_no]->stats->rx_pkts_received */
+	data[i++] = CVM_CAST64(netstats->rx_packets);
+	/* sum of oct->instr_queue[iq_no]->stats.tx_done */
+	data[i++] = CVM_CAST64(netstats->tx_packets);
+	/* sum of oct->droq[oq_no]->stats->rx_bytes_received */
+	data[i++] = CVM_CAST64(netstats->rx_bytes);
+	/* sum of oct->instr_queue[iq_no]->stats.tx_tot_bytes */
+	data[i++] = CVM_CAST64(netstats->tx_bytes);
+	data[i++] = CVM_CAST64(netstats->rx_errors);
+	data[i++] = CVM_CAST64(netstats->tx_errors);
+	 /* sum of oct->droq[oq_no]->stats->rx_dropped +
+	  * oct->droq[oq_no]->stats->dropped_nodispatch +
+	  * oct->droq[oq_no]->stats->dropped_toomany +
+	  * oct->droq[oq_no]->stats->dropped_nomem
+	  */
+	data[i++] = CVM_CAST64(netstats->rx_dropped);
+	/* sum of oct->instr_queue[iq_no]->stats.tx_dropped */
+	data[i++] = CVM_CAST64(netstats->tx_dropped);
+	/* lio->link_changes */
+	data[i++] = CVM_CAST64(lio->link_changes);
+
+	for (vj = 0; vj < lio->linfo.num_txpciq; vj++) {
+		j = lio->linfo.txpciq[vj].s.q_no;
+
+		/* packets to network port */
+		/* # of packets tx to network */
+		data[i++] = CVM_CAST64(oct_dev->instr_queue[j]->stats.tx_done);
+		 /* # of bytes tx to network */
+		data[i++] = CVM_CAST64(
+				oct_dev->instr_queue[j]->stats.tx_tot_bytes);
+		/* # of packets dropped */
+		data[i++] = CVM_CAST64(
+				oct_dev->instr_queue[j]->stats.tx_dropped);
+		/* # of tx fails due to queue full */
+		data[i++] = CVM_CAST64(
+				oct_dev->instr_queue[j]->stats.tx_iq_busy);
+		/* XXX gather entries sent */
+		data[i++] = CVM_CAST64(
+				oct_dev->instr_queue[j]->stats.sgentry_sent);
+
+		/* instruction to firmware: data and control */
+		/* # of instructions to the queue */
+		data[i++] = CVM_CAST64(
+				oct_dev->instr_queue[j]->stats.instr_posted);
+		/* # of instructions processed */
+		data[i++] =
+		    CVM_CAST64(oct_dev->instr_queue[j]->stats.instr_processed);
+		/* # of instructions could not be processed */
+		data[i++] =
+		    CVM_CAST64(oct_dev->instr_queue[j]->stats.instr_dropped);
+		/* bytes sent through the queue */
+		data[i++] = CVM_CAST64(
+				oct_dev->instr_queue[j]->stats.bytes_sent);
+		/* tso request */
+		data[i++] = CVM_CAST64(oct_dev->instr_queue[j]->stats.tx_gso);
+		/* vxlan request */
+		data[i++] = CVM_CAST64(oct_dev->instr_queue[j]->stats.tx_vxlan);
+		/* txq restart */
+		data[i++] = CVM_CAST64(
+				oct_dev->instr_queue[j]->stats.tx_restart);
+	}
+
+	/* RX */
+	for (vj = 0; vj < lio->linfo.num_rxpciq; vj++) {
+		j = lio->linfo.rxpciq[vj].s.q_no;
+
+		/* packets send to TCP/IP network stack */
+		/* # of packets to network stack */
+		data[i++] = CVM_CAST64(
+				oct_dev->droq[j]->stats.rx_pkts_received);
+		/* # of bytes to network stack */
+		data[i++] = CVM_CAST64(
+				oct_dev->droq[j]->stats.rx_bytes_received);
+		data[i++] = CVM_CAST64(oct_dev->droq[j]->stats.dropped_nomem +
+				       oct_dev->droq[j]->stats.dropped_toomany +
+				       oct_dev->droq[j]->stats.rx_dropped);
+		data[i++] = CVM_CAST64(oct_dev->droq[j]->stats.dropped_nomem);
+		data[i++] = CVM_CAST64(oct_dev->droq[j]->stats.dropped_toomany);
+		data[i++] = CVM_CAST64(oct_dev->droq[j]->stats.rx_dropped);
+
+		/* control and data path */
+		data[i++] = CVM_CAST64(oct_dev->droq[j]->stats.pkts_received);
+		data[i++] = CVM_CAST64(oct_dev->droq[j]->stats.bytes_received);
+		data[i++] =
+			CVM_CAST64(oct_dev->droq[j]->stats.dropped_nodispatch);
+
+		data[i++] = CVM_CAST64(oct_dev->droq[j]->stats.rx_vxlan);
+		data[i++] =
+		    CVM_CAST64(oct_dev->droq[j]->stats.rx_alloc_failure);
+	}
+}
+
 static void lio_get_priv_flags_strings(struct lio *lio, u8 *data)
 {
 	struct octeon_device *oct_dev = lio->oct_dev;
@@ -999,6 +1127,7 @@ static void lio_get_priv_flags_strings(struct lio *lio, u8 *data)
 
 	switch (oct_dev->chip_id) {
 	case OCTEON_CN23XX_PF_VID:
+	case OCTEON_CN23XX_VF_VID:
 		for (i = 0; i < ARRAY_SIZE(oct_priv_flags_strings); i++) {
 			sprintf(data, "%s", oct_priv_flags_strings[i]);
 			data += ETH_GSTRING_LEN;
@@ -1030,7 +1159,55 @@ static void lio_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 
 		num_iq_stats = ARRAY_SIZE(oct_iq_stats_strings);
 		for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct_dev); i++) {
-			if (!(oct_dev->io_qmask.iq & (1ULL << i)))
+			if (!(oct_dev->io_qmask.iq & BIT_ULL(i)))
+				continue;
+			for (j = 0; j < num_iq_stats; j++) {
+				sprintf(data, "tx-%d-%s", i,
+					oct_iq_stats_strings[j]);
+				data += ETH_GSTRING_LEN;
+			}
+		}
+
+		num_oq_stats = ARRAY_SIZE(oct_droq_stats_strings);
+		for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct_dev); i++) {
+			if (!(oct_dev->io_qmask.oq & BIT_ULL(i)))
+				continue;
+			for (j = 0; j < num_oq_stats; j++) {
+				sprintf(data, "rx-%d-%s", i,
+					oct_droq_stats_strings[j]);
+				data += ETH_GSTRING_LEN;
+			}
+		}
+		break;
+
+	case ETH_SS_PRIV_FLAGS:
+		lio_get_priv_flags_strings(lio, data);
+		break;
+	default:
+		netif_info(lio, drv, lio->netdev, "Unknown Stringset !!\n");
+		break;
+	}
+}
+
+static void lio_vf_get_strings(struct net_device *netdev, u32 stringset,
+			       u8 *data)
+{
+	int num_iq_stats, num_oq_stats, i, j;
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct_dev = lio->oct_dev;
+	int num_stats;
+
+	switch (stringset) {
+	case ETH_SS_STATS:
+		num_stats = ARRAY_SIZE(oct_vf_stats_strings);
+		for (j = 0; j < num_stats; j++) {
+			sprintf(data, "%s", oct_vf_stats_strings[j]);
+			data += ETH_GSTRING_LEN;
+		}
+
+		num_iq_stats = ARRAY_SIZE(oct_iq_stats_strings);
+		for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct_dev); i++) {
+			if (!(oct_dev->io_qmask.iq & BIT_ULL(i)))
 				continue;
 			for (j = 0; j < num_iq_stats; j++) {
 				sprintf(data, "tx-%d-%s", i,
@@ -1040,9 +1217,8 @@ static void lio_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 		}
 
 		num_oq_stats = ARRAY_SIZE(oct_droq_stats_strings);
-		/* for (i = 0; i < oct_dev->num_oqs; i++) { */
 		for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct_dev); i++) {
-			if (!(oct_dev->io_qmask.oq & (1ULL << i)))
+			if (!(oct_dev->io_qmask.oq & BIT_ULL(i)))
 				continue;
 			for (j = 0; j < num_oq_stats; j++) {
 				sprintf(data, "rx-%d-%s", i,
@@ -1067,6 +1243,7 @@ static int lio_get_priv_flags_ss_count(struct lio *lio)
 
 	switch (oct_dev->chip_id) {
 	case OCTEON_CN23XX_PF_VID:
+	case OCTEON_CN23XX_VF_VID:
 		return ARRAY_SIZE(oct_priv_flags_strings);
 	case OCTEON_CN68XX:
 	case OCTEON_CN66XX:
@@ -1094,6 +1271,23 @@ static int lio_get_sset_count(struct net_device *netdev, int sset)
 	}
 }
 
+static int lio_vf_get_sset_count(struct net_device *netdev, int sset)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct_dev = lio->oct_dev;
+
+	switch (sset) {
+	case ETH_SS_STATS:
+		return (ARRAY_SIZE(oct_vf_stats_strings) +
+			ARRAY_SIZE(oct_iq_stats_strings) * oct_dev->num_iqs +
+			ARRAY_SIZE(oct_droq_stats_strings) * oct_dev->num_oqs);
+	case ETH_SS_PRIV_FLAGS:
+		return lio_get_priv_flags_ss_count(lio);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static int lio_get_intr_coalesce(struct net_device *netdev,
 				 struct ethtool_coalesce *intr_coal)
 {
@@ -1106,6 +1300,7 @@ static int lio_get_intr_coalesce(struct net_device *netdev,
 
 	switch (oct->chip_id) {
 	case OCTEON_CN23XX_PF_VID:
+	case OCTEON_CN23XX_VF_VID:
 		if (!intrmod_cfg->rx_enable) {
 			intr_coal->rx_coalesce_usecs = intrmod_cfg->rx_usecs;
 			intr_coal->rx_max_coalesced_frames =
@@ -1152,7 +1347,7 @@ static int lio_get_intr_coalesce(struct net_device *netdev,
 		intr_coal->rx_max_coalesced_frames_low =
 		    intrmod_cfg->rx_mincnt_trigger;
 	}
-	if (OCTEON_CN23XX_PF(oct) &&
+	if ((OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct)) &&
 	    (intrmod_cfg->tx_enable)) {
 		intr_coal->use_adaptive_tx_coalesce = intrmod_cfg->tx_enable;
 		intr_coal->tx_max_coalesced_frames_high =
@@ -1510,6 +1705,26 @@ oct_cfg_rx_intrcnt(struct lio *lio, struct ethtool_coalesce *intr_coal)
 		oct->intrmod.rx_frames = rx_max_coalesced_frames;
 		break;
 	}
+	case OCTEON_CN23XX_VF_VID: {
+		int q_no;
+
+		if (!intr_coal->rx_max_coalesced_frames)
+			rx_max_coalesced_frames = oct->intrmod.rx_frames;
+		else
+			rx_max_coalesced_frames =
+			    intr_coal->rx_max_coalesced_frames;
+		for (q_no = 0; q_no < oct->num_oqs; q_no++) {
+			octeon_write_csr64(
+			    oct, CN23XX_VF_SLI_OQ_PKT_INT_LEVELS(q_no),
+			    (octeon_read_csr64(
+				 oct, CN23XX_VF_SLI_OQ_PKT_INT_LEVELS(q_no)) &
+			     (0x3fffff00000000UL)) |
+				rx_max_coalesced_frames);
+			/* consider writing to resend bit here */
+		}
+		oct->intrmod.rx_frames = rx_max_coalesced_frames;
+		break;
+	}
 	default:
 		return -EINVAL;
 	}
@@ -1563,6 +1778,27 @@ static int oct_cfg_rx_intrtime(struct lio *lio,
 		oct->intrmod.rx_usecs = rx_coalesce_usecs;
 		break;
 	}
+	case OCTEON_CN23XX_VF_VID: {
+		u64 time_threshold;
+		int q_no;
+
+		if (!intr_coal->rx_coalesce_usecs)
+			rx_coalesce_usecs = oct->intrmod.rx_usecs;
+		else
+			rx_coalesce_usecs = intr_coal->rx_coalesce_usecs;
+
+		time_threshold =
+		    cn23xx_vf_get_oq_ticks(oct, (u32)rx_coalesce_usecs);
+		for (q_no = 0; q_no < oct->num_oqs; q_no++) {
+			octeon_write_csr64(
+				oct, CN23XX_VF_SLI_OQ_PKT_INT_LEVELS(q_no),
+				(oct->intrmod.rx_frames |
+				 (time_threshold << 32)));
+			/* consider setting resend bit */
+		}
+		oct->intrmod.rx_usecs = rx_coalesce_usecs;
+		break;
+	}
 	default:
 		return -EINVAL;
 	}
@@ -1584,6 +1820,7 @@ oct_cfg_tx_intrcnt(struct lio *lio, struct ethtool_coalesce *intr_coal
 	case OCTEON_CN68XX:
 	case OCTEON_CN66XX:
 		break;
+	case OCTEON_CN23XX_VF_VID:
 	case OCTEON_CN23XX_PF_VID: {
 		int q_no;
 
@@ -1642,6 +1879,7 @@ static int lio_set_intr_coalesce(struct net_device *netdev,
 		}
 		break;
 	case OCTEON_CN23XX_PF_VID:
+	case OCTEON_CN23XX_VF_VID:
 		break;
 	default:
 		return -EINVAL;
@@ -1704,86 +1942,6 @@ static int lio_get_ts_info(struct net_device *netdev,
 	return 0;
 }
 
-static int lio_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
-{
-	struct lio *lio = GET_LIO(netdev);
-	struct octeon_device *oct = lio->oct_dev;
-	struct oct_link_info *linfo;
-	struct octnic_ctrl_pkt nctrl;
-	int ret = 0;
-
-	/* get the link info */
-	linfo = &lio->linfo;
-
-	if (ecmd->autoneg != AUTONEG_ENABLE && ecmd->autoneg != AUTONEG_DISABLE)
-		return -EINVAL;
-
-	if (ecmd->autoneg == AUTONEG_DISABLE && ((ecmd->speed != SPEED_100 &&
-						  ecmd->speed != SPEED_10) ||
-						 (ecmd->duplex != DUPLEX_HALF &&
-						  ecmd->duplex != DUPLEX_FULL)))
-		return -EINVAL;
-
-	/* Ethtool Support is not provided for XAUI, RXAUI, and XFI Interfaces
-	 * as they operate at fixed Speed and Duplex settings
-	 */
-	if (linfo->link.s.if_mode == INTERFACE_MODE_XAUI ||
-	    linfo->link.s.if_mode == INTERFACE_MODE_RXAUI ||
-	    linfo->link.s.if_mode == INTERFACE_MODE_XFI) {
-		dev_info(&oct->pci_dev->dev,
-			 "Autonegotiation, duplex and speed settings cannot be modified.\n");
-		return -EINVAL;
-	}
-
-	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
-
-	nctrl.ncmd.u64 = 0;
-	nctrl.ncmd.s.cmd = OCTNET_CMD_SET_SETTINGS;
-	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
-	nctrl.wait_time = 1000;
-	nctrl.netpndev = (u64)netdev;
-	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
-
-	/* Passing the parameters sent by ethtool like Speed, Autoneg & Duplex
-	 * to SE core application using ncmd.s.more & ncmd.s.param
-	 */
-	if (ecmd->autoneg == AUTONEG_ENABLE) {
-		/* Autoneg ON */
-		nctrl.ncmd.s.more = OCTNIC_NCMD_PHY_ON |
-				     OCTNIC_NCMD_AUTONEG_ON;
-		nctrl.ncmd.s.param1 = ecmd->advertising;
-	} else {
-		/* Autoneg OFF */
-		nctrl.ncmd.s.more = OCTNIC_NCMD_PHY_ON;
-
-		nctrl.ncmd.s.param2 = ecmd->duplex;
-
-		nctrl.ncmd.s.param1 = ecmd->speed;
-	}
-
-	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
-	if (ret < 0) {
-		dev_err(&oct->pci_dev->dev, "Failed to set settings\n");
-		return -1;
-	}
-
-	return 0;
-}
-
-static int lio_nway_reset(struct net_device *netdev)
-{
-	if (netif_running(netdev)) {
-		struct ethtool_cmd ecmd;
-
-		memset(&ecmd, 0, sizeof(struct ethtool_cmd));
-		ecmd.autoneg = 0;
-		ecmd.speed = 0;
-		ecmd.duplex = 0;
-		lio_set_settings(netdev, &ecmd);
-	}
-	return 0;
-}
-
 /* Return register dump len. */
 static int lio_get_regs_len(struct net_device *dev)
 {
@@ -1793,6 +1951,8 @@ static int lio_get_regs_len(struct net_device *dev)
 	switch (oct->chip_id) {
 	case OCTEON_CN23XX_PF_VID:
 		return OCT_ETHTOOL_REGDUMP_LEN_23XX;
+	case OCTEON_CN23XX_VF_VID:
+		return OCT_ETHTOOL_REGDUMP_LEN_23XX_VF;
 	default:
 		return OCT_ETHTOOL_REGDUMP_LEN;
 	}
@@ -2018,6 +2178,123 @@ static int cn23xx_read_csr_reg(char *s, struct octeon_device *oct)
 	return len;
 }
 
+static int cn23xx_vf_read_csr_reg(char *s, struct octeon_device *oct)
+{
+	int len = 0;
+	u32 reg;
+	int i;
+
+	/* PCI  Window Registers */
+
+	len += sprintf(s + len, "\n\t Octeon CSR Registers\n\n");
+
+	for (i = 0; i < (oct->sriov_info.rings_per_vf); i++) {
+		reg = CN23XX_VF_SLI_OQ_BUFF_INFO_SIZE(i);
+		len += sprintf(s + len,
+			       "\n[%08x] (SLI_PKT%d_OUT_SIZE): %016llx\n",
+			       reg, i, (u64)octeon_read_csr64(oct, reg));
+	}
+
+	for (i = 0; i < (oct->sriov_info.rings_per_vf); i++) {
+		reg = CN23XX_VF_SLI_IQ_INSTR_COUNT64(i);
+		len += sprintf(s + len,
+			       "\n[%08x] (SLI_PKT_IN_DONE%d_CNTS): %016llx\n",
+			       reg, i, (u64)octeon_read_csr64(oct, reg));
+	}
+
+	for (i = 0; i < (oct->sriov_info.rings_per_vf); i++) {
+		reg = CN23XX_VF_SLI_OQ_PKTS_CREDIT(i);
+		len += sprintf(s + len,
+			       "\n[%08x] (SLI_PKT%d_SLIST_BAOFF_DBELL): %016llx\n",
+			       reg, i, (u64)octeon_read_csr64(oct, reg));
+	}
+
+	for (i = 0; i < (oct->sriov_info.rings_per_vf); i++) {
+		reg = CN23XX_VF_SLI_OQ_SIZE(i);
+		len += sprintf(s + len,
+			       "\n[%08x] (SLI_PKT%d_SLIST_FIFO_RSIZE): %016llx\n",
+			       reg, i, (u64)octeon_read_csr64(oct, reg));
+	}
+
+	for (i = 0; i < (oct->sriov_info.rings_per_vf); i++) {
+		reg = CN23XX_VF_SLI_OQ_PKT_CONTROL(i);
+		len += sprintf(s + len,
+			       "\n[%08x] (SLI_PKT%d__OUTPUT_CONTROL): %016llx\n",
+			       reg, i, (u64)octeon_read_csr64(oct, reg));
+	}
+
+	for (i = 0; i < (oct->sriov_info.rings_per_vf); i++) {
+		reg = CN23XX_VF_SLI_OQ_BASE_ADDR64(i);
+		len += sprintf(s + len,
+			       "\n[%08x] (SLI_PKT%d_SLIST_BADDR): %016llx\n",
+			       reg, i, (u64)octeon_read_csr64(oct, reg));
+	}
+
+	for (i = 0; i < (oct->sriov_info.rings_per_vf); i++) {
+		reg = CN23XX_VF_SLI_OQ_PKT_INT_LEVELS(i);
+		len += sprintf(s + len,
+			       "\n[%08x] (SLI_PKT%d_INT_LEVELS): %016llx\n",
+			       reg, i, (u64)octeon_read_csr64(oct, reg));
+	}
+
+	for (i = 0; i < (oct->sriov_info.rings_per_vf); i++) {
+		reg = CN23XX_VF_SLI_OQ_PKTS_SENT(i);
+		len += sprintf(s + len, "\n[%08x] (SLI_PKT%d_CNTS): %016llx\n",
+			       reg, i, (u64)octeon_read_csr64(oct, reg));
+	}
+
+	for (i = 0; i < (oct->sriov_info.rings_per_vf); i++) {
+		reg = 0x100c0 + i * CN23XX_VF_OQ_OFFSET;
+		len += sprintf(s + len,
+			       "\n[%08x] (SLI_PKT%d_ERROR_INFO): %016llx\n",
+			       reg, i, (u64)octeon_read_csr64(oct, reg));
+	}
+
+	for (i = 0; i < (oct->sriov_info.rings_per_vf); i++) {
+		reg = 0x100d0 + i * CN23XX_VF_IQ_OFFSET;
+		len += sprintf(s + len,
+			       "\n[%08x] (SLI_PKT%d_VF_INT_SUM): %016llx\n",
+			       reg, i, (u64)octeon_read_csr64(oct, reg));
+	}
+
+	for (i = 0; i < (oct->sriov_info.rings_per_vf); i++) {
+		reg = CN23XX_VF_SLI_IQ_PKT_CONTROL64(i);
+		len += sprintf(s + len,
+			       "\n[%08x] (SLI_PKT%d_INPUT_CONTROL): %016llx\n",
+			       reg, i, (u64)octeon_read_csr64(oct, reg));
+	}
+
+	for (i = 0; i < (oct->sriov_info.rings_per_vf); i++) {
+		reg = CN23XX_VF_SLI_IQ_BASE_ADDR64(i);
+		len += sprintf(s + len,
+			       "\n[%08x] (SLI_PKT%d_INSTR_BADDR): %016llx\n",
+			       reg, i, (u64)octeon_read_csr64(oct, reg));
+	}
+
+	for (i = 0; i < (oct->sriov_info.rings_per_vf); i++) {
+		reg = CN23XX_VF_SLI_IQ_DOORBELL(i);
+		len += sprintf(s + len,
+			       "\n[%08x] (SLI_PKT%d_INSTR_BAOFF_DBELL): %016llx\n",
+			       reg, i, (u64)octeon_read_csr64(oct, reg));
+	}
+
+	for (i = 0; i < (oct->sriov_info.rings_per_vf); i++) {
+		reg = CN23XX_VF_SLI_IQ_SIZE(i);
+		len += sprintf(s + len,
+			       "\n[%08x] (SLI_PKT%d_INSTR_FIFO_RSIZE): %016llx\n",
+			       reg, i, (u64)octeon_read_csr64(oct, reg));
+	}
+
+	for (i = 0; i < (oct->sriov_info.rings_per_vf); i++) {
+		reg = CN23XX_VF_SLI_IQ_INSTR_COUNT64(i);
+		len += sprintf(s + len,
+			       "\n[%08x] (SLI_PKT_IN_DONE%d_CNTS): %016llx\n",
+			       reg, i, (u64)octeon_read_csr64(oct, reg));
+	}
+
+	return len;
+}
+
 static int cn6xxx_read_csr_reg(char *s, struct octeon_device *oct)
 {
 	u32 reg;
@@ -2164,6 +2441,10 @@ static void lio_get_regs(struct net_device *dev,
 		memset(regbuf, 0, OCT_ETHTOOL_REGDUMP_LEN_23XX);
 		len += cn23xx_read_csr_reg(regbuf + len, oct);
 		break;
+	case OCTEON_CN23XX_VF_VID:
+		memset(regbuf, 0, OCT_ETHTOOL_REGDUMP_LEN_23XX_VF);
+		len += cn23xx_vf_read_csr_reg(regbuf + len, oct);
+		break;
 	case OCTEON_CN68XX:
 	case OCTEON_CN66XX:
 		memset(regbuf, 0, OCT_ETHTOOL_REGDUMP_LEN);
@@ -2194,7 +2475,7 @@ static int lio_set_priv_flags(struct net_device *netdev, u32 flags)
 }
 
 static const struct ethtool_ops lio_ethtool_ops = {
-	.get_settings		= lio_get_settings,
+	.get_link_ksettings	= lio_get_link_ksettings,
 	.get_link		= ethtool_op_get_link,
 	.get_drvinfo		= lio_get_drvinfo,
 	.get_ringparam		= lio_ethtool_get_ringparam,
@@ -2211,8 +2492,26 @@ static const struct ethtool_ops lio_ethtool_ops = {
 	.get_msglevel		= lio_get_msglevel,
 	.set_msglevel		= lio_set_msglevel,
 	.get_sset_count		= lio_get_sset_count,
-	.nway_reset		= lio_nway_reset,
-	.set_settings		= lio_set_settings,
+	.get_coalesce		= lio_get_intr_coalesce,
+	.set_coalesce		= lio_set_intr_coalesce,
+	.get_priv_flags		= lio_get_priv_flags,
+	.set_priv_flags		= lio_set_priv_flags,
+	.get_ts_info		= lio_get_ts_info,
+};
+
+static const struct ethtool_ops lio_vf_ethtool_ops = {
+	.get_link_ksettings	= lio_get_link_ksettings,
+	.get_link		= ethtool_op_get_link,
+	.get_drvinfo		= lio_get_vf_drvinfo,
+	.get_ringparam		= lio_ethtool_get_ringparam,
+	.get_channels		= lio_ethtool_get_channels,
+	.get_strings		= lio_vf_get_strings,
+	.get_ethtool_stats	= lio_vf_get_ethtool_stats,
+	.get_regs_len		= lio_get_regs_len,
+	.get_regs		= lio_get_regs,
+	.get_msglevel		= lio_get_msglevel,
+	.set_msglevel		= lio_set_msglevel,
+	.get_sset_count		= lio_vf_get_sset_count,
 	.get_coalesce		= lio_get_intr_coalesce,
 	.set_coalesce		= lio_set_intr_coalesce,
 	.get_priv_flags		= lio_get_priv_flags,
@@ -2222,5 +2521,11 @@ static const struct ethtool_ops lio_ethtool_ops = {
 
 void liquidio_set_ethtool_ops(struct net_device *netdev)
 {
-	netdev->ethtool_ops = &lio_ethtool_ops;
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+
+	if (OCTEON_CN23XX_VF(oct))
+		netdev->ethtool_ops = &lio_vf_ethtool_ops;
+	else
+		netdev->ethtool_ops = &lio_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c
index afc6f9dc8119..39a9665c9d00 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_main.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c
@@ -1,28 +1,22 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
-#include <linux/version.h>
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 #include <linux/pci.h>
 #include <linux/firmware.h>
-#include <linux/ptp_clock_kernel.h>
 #include <net/vxlan.h>
 #include <linux/kthread.h>
 #include "liquidio_common.h"
@@ -46,6 +40,7 @@ MODULE_VERSION(LIQUIDIO_VERSION);
 MODULE_FIRMWARE(LIO_FW_DIR LIO_FW_BASE_NAME LIO_210SV_NAME LIO_FW_NAME_SUFFIX);
 MODULE_FIRMWARE(LIO_FW_DIR LIO_FW_BASE_NAME LIO_210NV_NAME LIO_FW_NAME_SUFFIX);
 MODULE_FIRMWARE(LIO_FW_DIR LIO_FW_BASE_NAME LIO_410NV_NAME LIO_FW_NAME_SUFFIX);
+MODULE_FIRMWARE(LIO_FW_DIR LIO_FW_BASE_NAME LIO_23XX_NAME LIO_FW_NAME_SUFFIX);
 
 static int ddr_timeout = 10000;
 module_param(ddr_timeout, int, 0644);
@@ -54,9 +49,6 @@ MODULE_PARM_DESC(ddr_timeout,
 
 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
 
-#define INCR_INSTRQUEUE_PKT_COUNT(octeon_dev_ptr, iq_no, field, count)  \
-	(octeon_dev_ptr->instr_queue[iq_no]->stats.field += count)
-
 static int debug = -1;
 module_param(debug, int, 0644);
 MODULE_PARM_DESC(debug, "NETIF_MSG debug bits");
@@ -65,10 +57,6 @@ static char fw_type[LIO_MAX_FW_TYPE_LEN];
 module_param_string(fw_type, fw_type, sizeof(fw_type), 0000);
 MODULE_PARM_DESC(fw_type, "Type of firmware to be loaded. Default \"nic\"");
 
-static int conf_type;
-module_param(conf_type, int, 0);
-MODULE_PARM_DESC(conf_type, "select octeon configuration 0 default 1 ovs");
-
 static int ptp_enable = 1;
 
 /* Bit mask values for lio->ifstate */
@@ -180,6 +168,10 @@ struct octeon_device_priv {
 	unsigned long napi_mask;
 };
 
+#ifdef CONFIG_PCI_IOV
+static int liquidio_enable_sriov(struct pci_dev *dev, int num_vfs);
+#endif
+
 static int octeon_device_init(struct octeon_device *);
 static int liquidio_stop(struct net_device *netdev);
 static void liquidio_remove(struct pci_dev *pdev);
@@ -197,9 +189,8 @@ static void octeon_droq_bh(unsigned long pdev)
 	struct octeon_device_priv *oct_priv =
 		(struct octeon_device_priv *)oct->priv;
 
-	/* for (q_no = 0; q_no < oct->num_oqs; q_no++) { */
 	for (q_no = 0; q_no < MAX_OCTEON_OUTPUT_QUEUES(oct); q_no++) {
-		if (!(oct->io_qmask.oq & (1ULL << q_no)))
+		if (!(oct->io_qmask.oq & BIT_ULL(q_no)))
 			continue;
 		reschedule |= octeon_droq_process_packets(oct, oct->droq[q_no],
 							  MAX_PACKET_BUDGET);
@@ -234,7 +225,7 @@ static int lio_wait_for_oq_pkts(struct octeon_device *oct)
 		pending_pkts = 0;
 
 		for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) {
-			if (!(oct->io_qmask.oq & (1ULL << i)))
+			if (!(oct->io_qmask.oq & BIT_ULL(i)))
 				continue;
 			pkt_cnt += octeon_droq_check_hw_for_pkts(oct->droq[i]);
 		}
@@ -316,7 +307,7 @@ static inline void pcierror_quiesce_device(struct octeon_device *oct)
 	for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) {
 		struct octeon_instr_queue *iq;
 
-		if (!(oct->io_qmask.iq & (1ULL << i)))
+		if (!(oct->io_qmask.iq & BIT_ULL(i)))
 			continue;
 		iq = oct->instr_queue[i];
 
@@ -382,7 +373,6 @@ static void stop_pci_io(struct octeon_device *oct)
 	dev_dbg(&oct->pci_dev->dev, "Device state is now %s\n",
 		lio_get_state_string(&oct->status));
 
-	/* cn63xx_cleanup_aer_uncorrect_error_status(oct->pci_dev); */
 	/* making it a common function for all OCTEON models */
 	cleanup_aer_uncorrect_error_status(oct->pci_dev);
 }
@@ -518,6 +508,9 @@ static struct pci_driver liquidio_pci_driver = {
 	.suspend	= liquidio_suspend,
 	.resume		= liquidio_resume,
 #endif
+#ifdef CONFIG_PCI_IOV
+	.sriov_configure = liquidio_enable_sriov,
+#endif
 };
 
 /**
@@ -763,6 +756,7 @@ static void delete_glists(struct lio *lio)
 	}
 
 	kfree((void *)lio->glist);
+	kfree((void *)lio->glist_lock);
 }
 
 /**
@@ -933,7 +927,6 @@ static inline void update_link_status(struct net_device *netdev,
 
 		if (lio->linfo.link.s.link_up) {
 			netif_carrier_on(netdev);
-			/* start_txq(netdev); */
 			txqs_wake(netdev);
 		} else {
 			netif_carrier_off(netdev);
@@ -1011,7 +1004,7 @@ static void liquidio_schedule_droq_pkt_handlers(struct octeon_device *oct)
 	if (oct->int_status & OCT_DEV_INTR_PKT_DATA) {
 		for (oq_no = 0; oq_no < MAX_OCTEON_OUTPUT_QUEUES(oct);
 		     oq_no++) {
-			if (!(oct->droq_intr & (1ULL << oq_no)))
+			if (!(oct->droq_intr & BIT_ULL(oq_no)))
 				continue;
 
 			droq = oct->droq[oq_no];
@@ -1322,6 +1315,7 @@ liquidio_probe(struct pci_dev *pdev,
 		complete(&first_stage);
 
 	if (octeon_device_init(oct_dev)) {
+		complete(&hs->init);
 		liquidio_remove(pdev);
 		return -ENOMEM;
 	}
@@ -1346,7 +1340,15 @@ liquidio_probe(struct pci_dev *pdev,
 			oct_dev->watchdog_task = kthread_create(
 			    liquidio_watchdog, oct_dev,
 			    "liowd/%02hhx:%02hhx.%hhx", bus, device, function);
-			wake_up_process(oct_dev->watchdog_task);
+			if (!IS_ERR(oct_dev->watchdog_task)) {
+				wake_up_process(oct_dev->watchdog_task);
+			} else {
+				oct_dev->watchdog_task = NULL;
+				dev_err(&oct_dev->pci_dev->dev,
+					"failed to create kernel_thread\n");
+				liquidio_remove(pdev);
+				return -1;
+			}
 		}
 	}
 
@@ -1410,6 +1412,8 @@ static void octeon_destroy_resources(struct octeon_device *oct)
 		if (lio_wait_for_oq_pkts(oct))
 			dev_err(&oct->pci_dev->dev, "OQ had pending packets\n");
 
+	/* fallthrough */
+	case OCT_DEV_INTR_SET_DONE:
 		/* Disable interrupts  */
 		oct->fn_list.disable_interrupt(oct, OCTEON_ALL_INTR);
 
@@ -1436,12 +1440,20 @@ static void octeon_destroy_resources(struct octeon_device *oct)
 				pci_disable_msi(oct->pci_dev);
 		}
 
+	/* fallthrough */
+	case OCT_DEV_MSIX_ALLOC_VECTOR_DONE:
 		if (OCTEON_CN23XX_PF(oct))
 			octeon_free_ioq_vector(oct);
+
+	/* fallthrough */
+	case OCT_DEV_MBOX_SETUP_DONE:
+		if (OCTEON_CN23XX_PF(oct))
+			oct->fn_list.free_mbox(oct);
+
 	/* fallthrough */
 	case OCT_DEV_IN_RESET:
 	case OCT_DEV_DROQ_INIT_DONE:
-		/*atomic_set(&oct->status, OCT_DEV_DROQ_INIT_DONE);*/
+		/* Wait for any pending operations */
 		mdelay(100);
 		for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) {
 			if (!(oct->io_qmask.oq & BIT_ULL(i)))
@@ -1472,6 +1484,10 @@ static void octeon_destroy_resources(struct octeon_device *oct)
 				continue;
 			octeon_delete_instr_queue(oct, i);
 		}
+#ifdef CONFIG_PCI_IOV
+		if (oct->sriov_info.sriov_enabled)
+			pci_disable_sriov(oct->pci_dev);
+#endif
 		/* fallthrough */
 	case OCT_DEV_SC_BUFF_POOL_INIT_DONE:
 		octeon_free_sc_buffer_pool(oct);
@@ -1491,10 +1507,13 @@ static void octeon_destroy_resources(struct octeon_device *oct)
 		octeon_unmap_pci_barx(oct, 1);
 
 		/* fallthrough */
-	case OCT_DEV_BEGIN_STATE:
+	case OCT_DEV_PCI_ENABLE_DONE:
+		pci_clear_master(oct->pci_dev);
 		/* Disable the device, releasing the PCI INT */
 		pci_disable_device(oct->pci_dev);
 
+		/* fallthrough */
+	case OCT_DEV_BEGIN_STATE:
 		/* Nothing to be done here either */
 		break;
 	}                       /* end switch (oct->status) */
@@ -1764,6 +1783,7 @@ static int octeon_pci_os_setup(struct octeon_device *oct)
 
 	if (dma_set_mask_and_coherent(&oct->pci_dev->dev, DMA_BIT_MASK(64))) {
 		dev_err(&oct->pci_dev->dev, "Unexpected DMA device capability\n");
+		pci_disable_device(oct->pci_dev);
 		return 1;
 	}
 
@@ -2426,7 +2446,6 @@ static int liquidio_napi_poll(struct napi_struct *napi, int budget)
 		 * Return back if tx_done is false.
 		 */
 		update_txq_status(oct, iq_no);
-		/*tx_done = (iq->flush_index == iq->octeon_read_index);*/
 	} else {
 		dev_err(&oct->pci_dev->dev, "%s:  iq (%d) num invalid\n",
 			__func__, iq_no);
@@ -2868,17 +2887,6 @@ static int liquidio_change_mtu(struct net_device *netdev, int new_mtu)
 	struct octnic_ctrl_pkt nctrl;
 	int ret = 0;
 
-	/* Limit the MTU to make sure the ethernet packets are between 68 bytes
-	 * and 16000 bytes
-	 */
-	if ((new_mtu < LIO_MIN_MTU_SIZE) ||
-	    (new_mtu > LIO_MAX_MTU_SIZE)) {
-		dev_err(&oct->pci_dev->dev, "Invalid MTU: %d\n", new_mtu);
-		dev_err(&oct->pci_dev->dev, "Valid range %d and %d\n",
-			LIO_MIN_MTU_SIZE, LIO_MAX_MTU_SIZE);
-		return -EINVAL;
-	}
-
 	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
 
 	nctrl.ncmd.u64 = 0;
@@ -3567,7 +3575,152 @@ static void liquidio_del_vxlan_port(struct net_device *netdev,
 				    OCTNET_CMD_VXLAN_PORT_DEL);
 }
 
-static struct net_device_ops lionetdevops = {
+static int __liquidio_set_vf_mac(struct net_device *netdev, int vfidx,
+				 u8 *mac, bool is_admin_assigned)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	struct octnic_ctrl_pkt nctrl;
+
+	if (!is_valid_ether_addr(mac))
+		return -EINVAL;
+
+	if (vfidx < 0 || vfidx >= oct->sriov_info.max_vfs)
+		return -EINVAL;
+
+	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+
+	nctrl.ncmd.u64 = 0;
+	nctrl.ncmd.s.cmd = OCTNET_CMD_CHANGE_MACADDR;
+	/* vfidx is 0 based, but vf_num (param1) is 1 based */
+	nctrl.ncmd.s.param1 = vfidx + 1;
+	nctrl.ncmd.s.param2 = (is_admin_assigned ? 1 : 0);
+	nctrl.ncmd.s.more = 1;
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+	nctrl.cb_fn = 0;
+	nctrl.wait_time = LIO_CMD_WAIT_TM;
+
+	nctrl.udd[0] = 0;
+	/* The MAC Address is presented in network byte order. */
+	ether_addr_copy((u8 *)&nctrl.udd[0] + 2, mac);
+
+	oct->sriov_info.vf_macaddr[vfidx] = nctrl.udd[0];
+
+	octnet_send_nic_ctrl_pkt(oct, &nctrl);
+
+	return 0;
+}
+
+static int liquidio_set_vf_mac(struct net_device *netdev, int vfidx, u8 *mac)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	int retval;
+
+	retval = __liquidio_set_vf_mac(netdev, vfidx, mac, true);
+	if (!retval)
+		cn23xx_tell_vf_its_macaddr_changed(oct, vfidx, mac);
+
+	return retval;
+}
+
+static int liquidio_set_vf_vlan(struct net_device *netdev, int vfidx,
+				u16 vlan, u8 qos, __be16 vlan_proto)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	struct octnic_ctrl_pkt nctrl;
+	u16 vlantci;
+
+	if (vfidx < 0 || vfidx >= oct->sriov_info.num_vfs_alloced)
+		return -EINVAL;
+
+	if (vlan_proto != htons(ETH_P_8021Q))
+		return -EPROTONOSUPPORT;
+
+	if (vlan >= VLAN_N_VID || qos > 7)
+		return -EINVAL;
+
+	if (vlan)
+		vlantci = vlan | (u16)qos << VLAN_PRIO_SHIFT;
+	else
+		vlantci = 0;
+
+	if (oct->sriov_info.vf_vlantci[vfidx] == vlantci)
+		return 0;
+
+	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+
+	if (vlan)
+		nctrl.ncmd.s.cmd = OCTNET_CMD_ADD_VLAN_FILTER;
+	else
+		nctrl.ncmd.s.cmd = OCTNET_CMD_DEL_VLAN_FILTER;
+
+	nctrl.ncmd.s.param1 = vlantci;
+	nctrl.ncmd.s.param2 =
+	    vfidx + 1; /* vfidx is 0 based, but vf_num (param2) is 1 based */
+	nctrl.ncmd.s.more = 0;
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+	nctrl.cb_fn = 0;
+	nctrl.wait_time = LIO_CMD_WAIT_TM;
+
+	octnet_send_nic_ctrl_pkt(oct, &nctrl);
+
+	oct->sriov_info.vf_vlantci[vfidx] = vlantci;
+
+	return 0;
+}
+
+static int liquidio_get_vf_config(struct net_device *netdev, int vfidx,
+				  struct ifla_vf_info *ivi)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	u8 *macaddr;
+
+	if (vfidx < 0 || vfidx >= oct->sriov_info.num_vfs_alloced)
+		return -EINVAL;
+
+	ivi->vf = vfidx;
+	macaddr = 2 + (u8 *)&oct->sriov_info.vf_macaddr[vfidx];
+	ether_addr_copy(&ivi->mac[0], macaddr);
+	ivi->vlan = oct->sriov_info.vf_vlantci[vfidx] & VLAN_VID_MASK;
+	ivi->qos = oct->sriov_info.vf_vlantci[vfidx] >> VLAN_PRIO_SHIFT;
+	ivi->linkstate = oct->sriov_info.vf_linkstate[vfidx];
+	return 0;
+}
+
+static int liquidio_set_vf_link_state(struct net_device *netdev, int vfidx,
+				      int linkstate)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	struct octnic_ctrl_pkt nctrl;
+
+	if (vfidx < 0 || vfidx >= oct->sriov_info.num_vfs_alloced)
+		return -EINVAL;
+
+	if (oct->sriov_info.vf_linkstate[vfidx] == linkstate)
+		return 0;
+
+	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+	nctrl.ncmd.s.cmd = OCTNET_CMD_SET_VF_LINKSTATE;
+	nctrl.ncmd.s.param1 =
+	    vfidx + 1; /* vfidx is 0 based, but vf_num (param1) is 1 based */
+	nctrl.ncmd.s.param2 = linkstate;
+	nctrl.ncmd.s.more = 0;
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+	nctrl.cb_fn = 0;
+	nctrl.wait_time = LIO_CMD_WAIT_TM;
+
+	octnet_send_nic_ctrl_pkt(oct, &nctrl);
+
+	oct->sriov_info.vf_linkstate[vfidx] = linkstate;
+
+	return 0;
+}
+
+static const struct net_device_ops lionetdevops = {
 	.ndo_open		= liquidio_open,
 	.ndo_stop		= liquidio_stop,
 	.ndo_start_xmit		= liquidio_xmit,
@@ -3584,6 +3737,11 @@ static struct net_device_ops lionetdevops = {
 	.ndo_set_features	= liquidio_set_features,
 	.ndo_udp_tunnel_add	= liquidio_add_vxlan_port,
 	.ndo_udp_tunnel_del	= liquidio_del_vxlan_port,
+	.ndo_set_vf_mac		= liquidio_set_vf_mac,
+	.ndo_set_vf_vlan	= liquidio_set_vf_vlan,
+	.ndo_get_vf_config	= liquidio_get_vf_config,
+	.ndo_set_vf_link_state  = liquidio_set_vf_link_state,
+	.ndo_select_queue	= select_q
 };
 
 /** \brief Entry point for the liquidio module
@@ -3595,7 +3753,7 @@ static int __init liquidio_init(void)
 
 	init_completion(&first_stage);
 
-	octeon_init_device_list(conf_type);
+	octeon_init_device_list(OCTEON_CONFIG_TYPE_DEFAULT);
 
 	if (liquidio_init_pci())
 		return -EINVAL;
@@ -3816,9 +3974,6 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 
 		SET_NETDEV_DEV(netdev, &octeon_dev->pci_dev->dev);
 
-		if (num_iqueues > 1)
-			lionetdevops.ndo_select_queue = select_q;
-
 		/* Associate the routines that will handle different
 		 * netdev tasks.
 		 */
@@ -3891,6 +4046,10 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 		netdev->hw_features = netdev->hw_features &
 			~NETIF_F_HW_VLAN_CTAG_RX;
 
+		/* MTU range: 68 - 16000 */
+		netdev->min_mtu = LIO_MIN_MTU_SIZE;
+		netdev->max_mtu = LIO_MAX_MTU_SIZE;
+
 		/* Point to the  properties for octeon device to which this
 		 * interface belongs.
 		 */
@@ -3902,6 +4061,19 @@ static int setup_nic_devices(struct octeon_device *octeon_dev)
 			"if%d gmx: %d hw_addr: 0x%llx\n", i,
 			lio->linfo.gmxport, CVM_CAST64(lio->linfo.hw_addr));
 
+		for (j = 0; j < octeon_dev->sriov_info.max_vfs; j++) {
+			u8 vfmac[ETH_ALEN];
+
+			random_ether_addr(&vfmac[0]);
+			if (__liquidio_set_vf_mac(netdev, j,
+						  &vfmac[0], false)) {
+				dev_err(&octeon_dev->pci_dev->dev,
+					"Error setting VF%d MAC address\n",
+					j);
+				goto setup_nic_dev_fail;
+			}
+		}
+
 		/* 64-bit swap required on LE machines */
 		octeon_swap_8B_data(&lio->linfo.hw_addr, 1);
 		for (j = 0; j < 6; j++)
@@ -3997,6 +4169,101 @@ setup_nic_wait_intr:
 	return -ENODEV;
 }
 
+#ifdef CONFIG_PCI_IOV
+static int octeon_enable_sriov(struct octeon_device *oct)
+{
+	unsigned int num_vfs_alloced = oct->sriov_info.num_vfs_alloced;
+	struct pci_dev *vfdev;
+	int err;
+	u32 u;
+
+	if (OCTEON_CN23XX_PF(oct) && num_vfs_alloced) {
+		err = pci_enable_sriov(oct->pci_dev,
+				       oct->sriov_info.num_vfs_alloced);
+		if (err) {
+			dev_err(&oct->pci_dev->dev,
+				"OCTEON: Failed to enable PCI sriov: %d\n",
+				err);
+			oct->sriov_info.num_vfs_alloced = 0;
+			return err;
+		}
+		oct->sriov_info.sriov_enabled = 1;
+
+		/* init lookup table that maps DPI ring number to VF pci_dev
+		 * struct pointer
+		 */
+		u = 0;
+		vfdev = pci_get_device(PCI_VENDOR_ID_CAVIUM,
+				       OCTEON_CN23XX_VF_VID, NULL);
+		while (vfdev) {
+			if (vfdev->is_virtfn &&
+			    (vfdev->physfn == oct->pci_dev)) {
+				oct->sriov_info.dpiring_to_vfpcidev_lut[u] =
+					vfdev;
+				u += oct->sriov_info.rings_per_vf;
+			}
+			vfdev = pci_get_device(PCI_VENDOR_ID_CAVIUM,
+					       OCTEON_CN23XX_VF_VID, vfdev);
+		}
+	}
+
+	return num_vfs_alloced;
+}
+
+static int lio_pci_sriov_disable(struct octeon_device *oct)
+{
+	int u;
+
+	if (pci_vfs_assigned(oct->pci_dev)) {
+		dev_err(&oct->pci_dev->dev, "VFs are still assigned to VMs.\n");
+		return -EPERM;
+	}
+
+	pci_disable_sriov(oct->pci_dev);
+
+	u = 0;
+	while (u < MAX_POSSIBLE_VFS) {
+		oct->sriov_info.dpiring_to_vfpcidev_lut[u] = NULL;
+		u += oct->sriov_info.rings_per_vf;
+	}
+
+	oct->sriov_info.num_vfs_alloced = 0;
+	dev_info(&oct->pci_dev->dev, "oct->pf_num:%d disabled VFs\n",
+		 oct->pf_num);
+
+	return 0;
+}
+
+static int liquidio_enable_sriov(struct pci_dev *dev, int num_vfs)
+{
+	struct octeon_device *oct = pci_get_drvdata(dev);
+	int ret = 0;
+
+	if ((num_vfs == oct->sriov_info.num_vfs_alloced) &&
+	    (oct->sriov_info.sriov_enabled)) {
+		dev_info(&oct->pci_dev->dev, "oct->pf_num:%d already enabled num_vfs:%d\n",
+			 oct->pf_num, num_vfs);
+		return 0;
+	}
+
+	if (!num_vfs) {
+		ret = lio_pci_sriov_disable(oct);
+	} else if (num_vfs > oct->sriov_info.max_vfs) {
+		dev_err(&oct->pci_dev->dev,
+			"OCTEON: Max allowed VFs:%d user requested:%d",
+			oct->sriov_info.max_vfs, num_vfs);
+		ret = -EPERM;
+	} else {
+		oct->sriov_info.num_vfs_alloced = num_vfs;
+		ret = octeon_enable_sriov(oct);
+		dev_info(&oct->pci_dev->dev, "oct->pf_num:%d num_vfs:%d\n",
+			 oct->pf_num, num_vfs);
+	}
+
+	return ret;
+}
+#endif
+
 /**
  * \brief initialize the NIC
  * @param oct octeon device
@@ -4102,6 +4369,52 @@ static void nic_starter(struct work_struct *work)
 	complete(&handshake[oct->octeon_id].started);
 }
 
+static int
+octeon_recv_vf_drv_notice(struct octeon_recv_info *recv_info, void *buf)
+{
+	struct octeon_device *oct = (struct octeon_device *)buf;
+	struct octeon_recv_pkt *recv_pkt = recv_info->recv_pkt;
+	int i, notice, vf_idx;
+	u64 *data, vf_num;
+
+	notice = recv_pkt->rh.r.ossp;
+	data = (u64 *)get_rbd(recv_pkt->buffer_ptr[0]);
+
+	/* the first 64-bit word of data is the vf_num */
+	vf_num = data[0];
+	octeon_swap_8B_data(&vf_num, 1);
+	vf_idx = (int)vf_num - 1;
+
+	if (notice == VF_DRV_LOADED) {
+		if (!(oct->sriov_info.vf_drv_loaded_mask & BIT_ULL(vf_idx))) {
+			oct->sriov_info.vf_drv_loaded_mask |= BIT_ULL(vf_idx);
+			dev_info(&oct->pci_dev->dev,
+				 "driver for VF%d was loaded\n", vf_idx);
+			try_module_get(THIS_MODULE);
+		}
+	} else if (notice == VF_DRV_REMOVED) {
+		if (oct->sriov_info.vf_drv_loaded_mask & BIT_ULL(vf_idx)) {
+			oct->sriov_info.vf_drv_loaded_mask &= ~BIT_ULL(vf_idx);
+			dev_info(&oct->pci_dev->dev,
+				 "driver for VF%d was removed\n", vf_idx);
+			module_put(THIS_MODULE);
+		}
+	} else if (notice == VF_DRV_MACADDR_CHANGED) {
+		u8 *b = (u8 *)&data[1];
+
+		oct->sriov_info.vf_macaddr[vf_idx] = data[1];
+		dev_info(&oct->pci_dev->dev,
+			 "VF driver changed VF%d's MAC address to %pM\n",
+			 vf_idx, b + 2);
+	}
+
+	for (i = 0; i < recv_pkt->buffer_count; i++)
+		recv_buffer_free(recv_pkt->buffer_ptr[i]);
+	octeon_free_recv_info(recv_info);
+
+	return 0;
+}
+
 /**
  * \brief Device initialization for each Octeon device that is probed
  * @param octeon_dev  octeon device
@@ -4121,6 +4434,8 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
 	if (octeon_pci_os_setup(octeon_dev))
 		return 1;
 
+	atomic_set(&octeon_dev->status, OCT_DEV_PCI_ENABLE_DONE);
+
 	/* Identify the Octeon type and map the BAR address space. */
 	if (octeon_chip_specific_setup(octeon_dev)) {
 		dev_err(&octeon_dev->pci_dev->dev, "Chip specific setup failed\n");
@@ -4160,6 +4475,9 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
 				    octeon_core_drv_init,
 				    octeon_dev);
 
+	octeon_register_dispatch_fn(octeon_dev, OPCODE_NIC,
+				    OPCODE_NIC_VF_DRV_NOTICE,
+				    octeon_recv_vf_drv_notice, octeon_dev);
 	INIT_DELAYED_WORK(&octeon_dev->nic_poll_work.work, nic_starter);
 	octeon_dev->nic_poll_work.ctxptr = (void *)octeon_dev;
 	schedule_delayed_work(&octeon_dev->nic_poll_work.work,
@@ -4167,7 +4485,10 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
 
 	atomic_set(&octeon_dev->status, OCT_DEV_DISPATCH_INIT_DONE);
 
-	octeon_set_io_queues_off(octeon_dev);
+	if (octeon_set_io_queues_off(octeon_dev)) {
+		dev_err(&octeon_dev->pci_dev->dev, "setting io queues off failed\n");
+		return 1;
+	}
 
 	if (OCTEON_CN23XX_PF(octeon_dev)) {
 		ret = octeon_dev->fn_list.setup_device_regs(octeon_dev);
@@ -4189,9 +4510,6 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
 	if (octeon_setup_instr_queues(octeon_dev)) {
 		dev_err(&octeon_dev->pci_dev->dev,
 			"instruction queue initialization failed\n");
-		/* On error, release any previously allocated queues */
-		for (j = 0; j < octeon_dev->num_iqs; j++)
-			octeon_delete_instr_queue(octeon_dev, j);
 		return 1;
 	}
 	atomic_set(&octeon_dev->status, OCT_DEV_INSTR_QUEUE_INIT_DONE);
@@ -4207,19 +4525,23 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
 
 	if (octeon_setup_output_queues(octeon_dev)) {
 		dev_err(&octeon_dev->pci_dev->dev, "Output queue initialization failed\n");
-		/* Release any previously allocated queues */
-		for (j = 0; j < octeon_dev->num_oqs; j++)
-			octeon_delete_droq(octeon_dev, j);
 		return 1;
 	}
 
 	atomic_set(&octeon_dev->status, OCT_DEV_DROQ_INIT_DONE);
 
 	if (OCTEON_CN23XX_PF(octeon_dev)) {
+		if (octeon_dev->fn_list.setup_mbox(octeon_dev)) {
+			dev_err(&octeon_dev->pci_dev->dev, "OCTEON: Mailbox setup failed\n");
+			return 1;
+		}
+		atomic_set(&octeon_dev->status, OCT_DEV_MBOX_SETUP_DONE);
+
 		if (octeon_allocate_ioq_vector(octeon_dev)) {
 			dev_err(&octeon_dev->pci_dev->dev, "OCTEON: ioq vector allocation failed\n");
 			return 1;
 		}
+		atomic_set(&octeon_dev->status, OCT_DEV_MSIX_ALLOC_VECTOR_DONE);
 
 	} else {
 		/* The input and output queue registers were setup earlier (the
@@ -4247,6 +4569,8 @@ static int octeon_device_init(struct octeon_device *octeon_dev)
 	/* Enable Octeon device interrupts */
 	octeon_dev->fn_list.enable_interrupt(octeon_dev, OCTEON_ALL_INTR);
 
+	atomic_set(&octeon_dev->status, OCT_DEV_INTR_SET_DONE);
+
 	/* Enable the input and output queues for this Octeon device */
 	ret = octeon_dev->fn_list.enable_io_queues(octeon_dev);
 	if (ret) {
diff --git a/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
new file mode 100644
index 000000000000..70d96c10c673
--- /dev/null
+++ b/drivers/net/ethernet/cavium/liquidio/lio_vf_main.c
@@ -0,0 +1,3251 @@
+/**********************************************************************
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
+#include <linux/pci.h>
+#include <net/vxlan.h>
+#include "liquidio_common.h"
+#include "octeon_droq.h"
+#include "octeon_iq.h"
+#include "response_manager.h"
+#include "octeon_device.h"
+#include "octeon_nic.h"
+#include "octeon_main.h"
+#include "octeon_network.h"
+#include "cn23xx_vf_device.h"
+
+MODULE_AUTHOR("Cavium Networks, <support@cavium.com>");
+MODULE_DESCRIPTION("Cavium LiquidIO Intelligent Server Adapter Virtual Function Driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(LIQUIDIO_VERSION);
+
+static int debug = -1;
+module_param(debug, int, 0644);
+MODULE_PARM_DESC(debug, "NETIF_MSG debug bits");
+
+#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
+
+/* Bit mask values for lio->ifstate */
+#define   LIO_IFSTATE_DROQ_OPS             0x01
+#define   LIO_IFSTATE_REGISTERED           0x02
+#define   LIO_IFSTATE_RUNNING              0x04
+#define   LIO_IFSTATE_RX_TIMESTAMP_ENABLED 0x08
+
+struct liquidio_if_cfg_context {
+	int octeon_id;
+
+	wait_queue_head_t wc;
+
+	int cond;
+};
+
+struct liquidio_if_cfg_resp {
+	u64 rh;
+	struct liquidio_if_cfg_info cfg_info;
+	u64 status;
+};
+
+struct liquidio_rx_ctl_context {
+	int octeon_id;
+
+	wait_queue_head_t wc;
+
+	int cond;
+};
+
+struct oct_timestamp_resp {
+	u64 rh;
+	u64 timestamp;
+	u64 status;
+};
+
+union tx_info {
+	u64 u64;
+	struct {
+#ifdef __BIG_ENDIAN_BITFIELD
+		u16 gso_size;
+		u16 gso_segs;
+		u32 reserved;
+#else
+		u32 reserved;
+		u16 gso_segs;
+		u16 gso_size;
+#endif
+	} s;
+};
+
+#define OCTNIC_MAX_SG  (MAX_SKB_FRAGS)
+
+#define OCTNIC_GSO_MAX_HEADER_SIZE 128
+#define OCTNIC_GSO_MAX_SIZE \
+		(CN23XX_DEFAULT_INPUT_JABBER - OCTNIC_GSO_MAX_HEADER_SIZE)
+
+struct octnic_gather {
+	/* List manipulation. Next and prev pointers. */
+	struct list_head list;
+
+	/* Size of the gather component at sg in bytes. */
+	int sg_size;
+
+	/* Number of bytes that sg was adjusted to make it 8B-aligned. */
+	int adjust;
+
+	/* Gather component that can accommodate max sized fragment list
+	 * received from the IP layer.
+	 */
+	struct octeon_sg_entry *sg;
+};
+
+struct octeon_device_priv {
+	/* Tasklet structures for this device. */
+	struct tasklet_struct droq_tasklet;
+	unsigned long napi_mask;
+};
+
+static int
+liquidio_vf_probe(struct pci_dev *pdev, const struct pci_device_id *ent);
+static void liquidio_vf_remove(struct pci_dev *pdev);
+static int octeon_device_init(struct octeon_device *oct);
+static int liquidio_stop(struct net_device *netdev);
+
+static int lio_wait_for_oq_pkts(struct octeon_device *oct)
+{
+	struct octeon_device_priv *oct_priv =
+	    (struct octeon_device_priv *)oct->priv;
+	int retry = MAX_VF_IP_OP_PENDING_PKT_COUNT;
+	int pkt_cnt = 0, pending_pkts;
+	int i;
+
+	do {
+		pending_pkts = 0;
+
+		for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) {
+			if (!(oct->io_qmask.oq & BIT_ULL(i)))
+				continue;
+			pkt_cnt += octeon_droq_check_hw_for_pkts(oct->droq[i]);
+		}
+		if (pkt_cnt > 0) {
+			pending_pkts += pkt_cnt;
+			tasklet_schedule(&oct_priv->droq_tasklet);
+		}
+		pkt_cnt = 0;
+		schedule_timeout_uninterruptible(1);
+
+	} while (retry-- && pending_pkts);
+
+	return pkt_cnt;
+}
+
+/**
+ * \brief wait for all pending requests to complete
+ * @param oct Pointer to Octeon device
+ *
+ * Called during shutdown sequence
+ */
+static int wait_for_pending_requests(struct octeon_device *oct)
+{
+	int i, pcount = 0;
+
+	for (i = 0; i < MAX_VF_IP_OP_PENDING_PKT_COUNT; i++) {
+		pcount = atomic_read(
+		    &oct->response_list[OCTEON_ORDERED_SC_LIST]
+			 .pending_req_count);
+		if (pcount)
+			schedule_timeout_uninterruptible(HZ / 10);
+		else
+			break;
+	}
+
+	if (pcount)
+		return 1;
+
+	return 0;
+}
+
+/**
+ * \brief Cause device to go quiet so it can be safely removed/reset/etc
+ * @param oct Pointer to Octeon device
+ */
+static void pcierror_quiesce_device(struct octeon_device *oct)
+{
+	int i;
+
+	/* Disable the input and output queues now. No more packets will
+	 * arrive from Octeon, but we should wait for all packet processing
+	 * to finish.
+	 */
+
+	/* To allow for in-flight requests */
+	schedule_timeout_uninterruptible(100);
+
+	if (wait_for_pending_requests(oct))
+		dev_err(&oct->pci_dev->dev, "There were pending requests\n");
+
+	/* Force all requests waiting to be fetched by OCTEON to complete. */
+	for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) {
+		struct octeon_instr_queue *iq;
+
+		if (!(oct->io_qmask.iq & BIT_ULL(i)))
+			continue;
+		iq = oct->instr_queue[i];
+
+		if (atomic_read(&iq->instr_pending)) {
+			spin_lock_bh(&iq->lock);
+			iq->fill_cnt = 0;
+			iq->octeon_read_index = iq->host_write_index;
+			iq->stats.instr_processed +=
+			    atomic_read(&iq->instr_pending);
+			lio_process_iq_request_list(oct, iq, 0);
+			spin_unlock_bh(&iq->lock);
+		}
+	}
+
+	/* Force all pending ordered list requests to time out. */
+	lio_process_ordered_list(oct, 1);
+
+	/* We do not need to wait for output queue packets to be processed. */
+}
+
+/**
+ * \brief Cleanup PCI AER uncorrectable error status
+ * @param dev Pointer to PCI device
+ */
+static void cleanup_aer_uncorrect_error_status(struct pci_dev *dev)
+{
+	u32 status, mask;
+	int pos = 0x100;
+
+	pr_info("%s :\n", __func__);
+
+	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status);
+	pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask);
+	if (dev->error_state == pci_channel_io_normal)
+		status &= ~mask; /* Clear corresponding nonfatal bits */
+	else
+		status &= mask; /* Clear corresponding fatal bits */
+	pci_write_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, status);
+}
+
+/**
+ * \brief Stop all PCI IO to a given device
+ * @param dev Pointer to Octeon device
+ */
+static void stop_pci_io(struct octeon_device *oct)
+{
+	struct msix_entry *msix_entries;
+	int i;
+
+	/* No more instructions will be forwarded. */
+	atomic_set(&oct->status, OCT_DEV_IN_RESET);
+
+	for (i = 0; i < oct->ifcount; i++)
+		netif_device_detach(oct->props[i].netdev);
+
+	/* Disable interrupts  */
+	oct->fn_list.disable_interrupt(oct, OCTEON_ALL_INTR);
+
+	pcierror_quiesce_device(oct);
+	if (oct->msix_on) {
+		msix_entries = (struct msix_entry *)oct->msix_entries;
+		for (i = 0; i < oct->num_msix_irqs; i++) {
+			/* clear the affinity_cpumask */
+			irq_set_affinity_hint(msix_entries[i].vector,
+					      NULL);
+			free_irq(msix_entries[i].vector,
+				 &oct->ioq_vector[i]);
+		}
+		pci_disable_msix(oct->pci_dev);
+		kfree(oct->msix_entries);
+		oct->msix_entries = NULL;
+		octeon_free_ioq_vector(oct);
+	}
+	dev_dbg(&oct->pci_dev->dev, "Device state is now %s\n",
+		lio_get_state_string(&oct->status));
+
+	/* making it a common function for all OCTEON models */
+	cleanup_aer_uncorrect_error_status(oct->pci_dev);
+
+	pci_disable_device(oct->pci_dev);
+}
+
+/**
+ * \brief called when PCI error is detected
+ * @param pdev Pointer to PCI device
+ * @param state The current pci connection state
+ *
+ * This function is called after a PCI bus error affecting
+ * this device has been detected.
+ */
+static pci_ers_result_t liquidio_pcie_error_detected(struct pci_dev *pdev,
+						     pci_channel_state_t state)
+{
+	struct octeon_device *oct = pci_get_drvdata(pdev);
+
+	/* Non-correctable Non-fatal errors */
+	if (state == pci_channel_io_normal) {
+		dev_err(&oct->pci_dev->dev, "Non-correctable non-fatal error reported:\n");
+		cleanup_aer_uncorrect_error_status(oct->pci_dev);
+		return PCI_ERS_RESULT_CAN_RECOVER;
+	}
+
+	/* Non-correctable Fatal errors */
+	dev_err(&oct->pci_dev->dev, "Non-correctable FATAL reported by PCI AER driver\n");
+	stop_pci_io(oct);
+
+	return PCI_ERS_RESULT_DISCONNECT;
+}
+
+/* For PCI-E Advanced Error Recovery (AER) Interface */
+static const struct pci_error_handlers liquidio_vf_err_handler = {
+	.error_detected = liquidio_pcie_error_detected,
+};
+
+static const struct pci_device_id liquidio_vf_pci_tbl[] = {
+	{
+		PCI_VENDOR_ID_CAVIUM, OCTEON_CN23XX_VF_VID,
+		PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0
+	},
+	{
+		0, 0, 0, 0, 0, 0, 0
+	}
+};
+MODULE_DEVICE_TABLE(pci, liquidio_vf_pci_tbl);
+
+static struct pci_driver liquidio_vf_pci_driver = {
+	.name		= "LiquidIO_VF",
+	.id_table	= liquidio_vf_pci_tbl,
+	.probe		= liquidio_vf_probe,
+	.remove		= liquidio_vf_remove,
+	.err_handler	= &liquidio_vf_err_handler,    /* For AER */
+};
+
+/**
+ * \brief check interface state
+ * @param lio per-network private data
+ * @param state_flag flag state to check
+ */
+static int ifstate_check(struct lio *lio, int state_flag)
+{
+	return atomic_read(&lio->ifstate) & state_flag;
+}
+
+/**
+ * \brief set interface state
+ * @param lio per-network private data
+ * @param state_flag flag state to set
+ */
+static void ifstate_set(struct lio *lio, int state_flag)
+{
+	atomic_set(&lio->ifstate, (atomic_read(&lio->ifstate) | state_flag));
+}
+
+/**
+ * \brief clear interface state
+ * @param lio per-network private data
+ * @param state_flag flag state to clear
+ */
+static void ifstate_reset(struct lio *lio, int state_flag)
+{
+	atomic_set(&lio->ifstate, (atomic_read(&lio->ifstate) & ~(state_flag)));
+}
+
+/**
+ * \brief Stop Tx queues
+ * @param netdev network device
+ */
+static void txqs_stop(struct net_device *netdev)
+{
+	if (netif_is_multiqueue(netdev)) {
+		int i;
+
+		for (i = 0; i < netdev->num_tx_queues; i++)
+			netif_stop_subqueue(netdev, i);
+	} else {
+		netif_stop_queue(netdev);
+	}
+}
+
+/**
+ * \brief Start Tx queues
+ * @param netdev network device
+ */
+static void txqs_start(struct net_device *netdev)
+{
+	if (netif_is_multiqueue(netdev)) {
+		int i;
+
+		for (i = 0; i < netdev->num_tx_queues; i++)
+			netif_start_subqueue(netdev, i);
+	} else {
+		netif_start_queue(netdev);
+	}
+}
+
+/**
+ * \brief Wake Tx queues
+ * @param netdev network device
+ */
+static void txqs_wake(struct net_device *netdev)
+{
+	struct lio *lio = GET_LIO(netdev);
+
+	if (netif_is_multiqueue(netdev)) {
+		int i;
+
+		for (i = 0; i < netdev->num_tx_queues; i++) {
+			int qno = lio->linfo.txpciq[i % (lio->linfo.num_txpciq)]
+				      .s.q_no;
+			if (__netif_subqueue_stopped(netdev, i)) {
+				INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, qno,
+							  tx_restart, 1);
+				netif_wake_subqueue(netdev, i);
+			}
+		}
+	} else {
+		INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, lio->txq,
+					  tx_restart, 1);
+		netif_wake_queue(netdev);
+	}
+}
+
+/**
+ * \brief Start Tx queue
+ * @param netdev network device
+ */
+static void start_txq(struct net_device *netdev)
+{
+	struct lio *lio = GET_LIO(netdev);
+
+	if (lio->linfo.link.s.link_up) {
+		txqs_start(netdev);
+		return;
+	}
+}
+
+/**
+ * \brief Wake a queue
+ * @param netdev network device
+ * @param q which queue to wake
+ */
+static void wake_q(struct net_device *netdev, int q)
+{
+	if (netif_is_multiqueue(netdev))
+		netif_wake_subqueue(netdev, q);
+	else
+		netif_wake_queue(netdev);
+}
+
+/**
+ * \brief Stop a queue
+ * @param netdev network device
+ * @param q which queue to stop
+ */
+static void stop_q(struct net_device *netdev, int q)
+{
+	if (netif_is_multiqueue(netdev))
+		netif_stop_subqueue(netdev, q);
+	else
+		netif_stop_queue(netdev);
+}
+
+/**
+ * Remove the node at the head of the list. The list would be empty at
+ * the end of this call if there are no more nodes in the list.
+ */
+static struct list_head *list_delete_head(struct list_head *root)
+{
+	struct list_head *node;
+
+	if ((root->prev == root) && (root->next == root))
+		node = NULL;
+	else
+		node = root->next;
+
+	if (node)
+		list_del(node);
+
+	return node;
+}
+
+/**
+ * \brief Delete gather lists
+ * @param lio per-network private data
+ */
+static void delete_glists(struct lio *lio)
+{
+	struct octnic_gather *g;
+	int i;
+
+	if (!lio->glist)
+		return;
+
+	for (i = 0; i < lio->linfo.num_txpciq; i++) {
+		do {
+			g = (struct octnic_gather *)
+			    list_delete_head(&lio->glist[i]);
+			if (g) {
+				if (g->sg)
+					kfree((void *)((unsigned long)g->sg -
+							g->adjust));
+				kfree(g);
+			}
+		} while (g);
+	}
+
+	kfree(lio->glist);
+	kfree(lio->glist_lock);
+}
+
+/**
+ * \brief Setup gather lists
+ * @param lio per-network private data
+ */
+static int setup_glists(struct lio *lio, int num_iqs)
+{
+	struct octnic_gather *g;
+	int i, j;
+
+	lio->glist_lock =
+	    kzalloc(sizeof(*lio->glist_lock) * num_iqs, GFP_KERNEL);
+	if (!lio->glist_lock)
+		return 1;
+
+	lio->glist =
+	    kzalloc(sizeof(*lio->glist) * num_iqs, GFP_KERNEL);
+	if (!lio->glist) {
+		kfree(lio->glist_lock);
+		return 1;
+	}
+
+	for (i = 0; i < num_iqs; i++) {
+		spin_lock_init(&lio->glist_lock[i]);
+
+		INIT_LIST_HEAD(&lio->glist[i]);
+
+		for (j = 0; j < lio->tx_qsize; j++) {
+			g = kzalloc(sizeof(*g), GFP_KERNEL);
+			if (!g)
+				break;
+
+			g->sg_size = ((ROUNDUP4(OCTNIC_MAX_SG) >> 2) *
+				      OCT_SG_ENTRY_SIZE);
+
+			g->sg = kmalloc(g->sg_size + 8, GFP_KERNEL);
+			if (!g->sg) {
+				kfree(g);
+				break;
+			}
+
+			/* The gather component should be aligned on 64-bit
+			 * boundary
+			 */
+			if (((unsigned long)g->sg) & 7) {
+				g->adjust = 8 - (((unsigned long)g->sg) & 7);
+				g->sg = (struct octeon_sg_entry *)
+					((unsigned long)g->sg + g->adjust);
+			}
+			list_add_tail(&g->list, &lio->glist[i]);
+		}
+
+		if (j != lio->tx_qsize) {
+			delete_glists(lio);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * \brief Print link information
+ * @param netdev network device
+ */
+static void print_link_info(struct net_device *netdev)
+{
+	struct lio *lio = GET_LIO(netdev);
+
+	if (atomic_read(&lio->ifstate) & LIO_IFSTATE_REGISTERED) {
+		struct oct_link_info *linfo = &lio->linfo;
+
+		if (linfo->link.s.link_up) {
+			netif_info(lio, link, lio->netdev, "%d Mbps %s Duplex UP\n",
+				   linfo->link.s.speed,
+				   (linfo->link.s.duplex) ? "Full" : "Half");
+		} else {
+			netif_info(lio, link, lio->netdev, "Link Down\n");
+		}
+	}
+}
+
+/**
+ * \brief Routine to notify MTU change
+ * @param work work_struct data structure
+ */
+static void octnet_link_status_change(struct work_struct *work)
+{
+	struct cavium_wk *wk = (struct cavium_wk *)work;
+	struct lio *lio = (struct lio *)wk->ctxptr;
+
+	rtnl_lock();
+	call_netdevice_notifiers(NETDEV_CHANGEMTU, lio->netdev);
+	rtnl_unlock();
+}
+
+/**
+ * \brief Sets up the mtu status change work
+ * @param netdev network device
+ */
+static int setup_link_status_change_wq(struct net_device *netdev)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+
+	lio->link_status_wq.wq = alloc_workqueue("link-status",
+						 WQ_MEM_RECLAIM, 0);
+	if (!lio->link_status_wq.wq) {
+		dev_err(&oct->pci_dev->dev, "unable to create cavium link status wq\n");
+		return -1;
+	}
+	INIT_DELAYED_WORK(&lio->link_status_wq.wk.work,
+			  octnet_link_status_change);
+	lio->link_status_wq.wk.ctxptr = lio;
+
+	return 0;
+}
+
+static void cleanup_link_status_change_wq(struct net_device *netdev)
+{
+	struct lio *lio = GET_LIO(netdev);
+
+	if (lio->link_status_wq.wq) {
+		cancel_delayed_work_sync(&lio->link_status_wq.wk.work);
+		destroy_workqueue(lio->link_status_wq.wq);
+	}
+}
+
+/**
+ * \brief Update link status
+ * @param netdev network device
+ * @param ls link status structure
+ *
+ * Called on receipt of a link status response from the core application to
+ * update each interface's link status.
+ */
+static void update_link_status(struct net_device *netdev,
+			       union oct_link_status *ls)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+
+	if ((lio->intf_open) && (lio->linfo.link.u64 != ls->u64)) {
+		lio->linfo.link.u64 = ls->u64;
+
+		print_link_info(netdev);
+		lio->link_changes++;
+
+		if (lio->linfo.link.s.link_up) {
+			netif_carrier_on(netdev);
+			txqs_wake(netdev);
+		} else {
+			netif_carrier_off(netdev);
+			txqs_stop(netdev);
+		}
+
+		if (lio->linfo.link.s.mtu < netdev->mtu) {
+			dev_warn(&oct->pci_dev->dev,
+				 "PF has changed the MTU for gmx port. Reducing the mtu from %d to %d\n",
+				 netdev->mtu, lio->linfo.link.s.mtu);
+			lio->mtu = lio->linfo.link.s.mtu;
+			netdev->mtu = lio->linfo.link.s.mtu;
+			queue_delayed_work(lio->link_status_wq.wq,
+					   &lio->link_status_wq.wk.work, 0);
+		}
+	}
+}
+
+static void update_txq_status(struct octeon_device *oct, int iq_num)
+{
+	struct octeon_instr_queue *iq = oct->instr_queue[iq_num];
+	struct net_device *netdev;
+	struct lio *lio;
+
+	netdev = oct->props[iq->ifidx].netdev;
+	lio = GET_LIO(netdev);
+	if (netif_is_multiqueue(netdev)) {
+		if (__netif_subqueue_stopped(netdev, iq->q_index) &&
+		    lio->linfo.link.s.link_up &&
+		    (!octnet_iq_is_full(oct, iq_num))) {
+			netif_wake_subqueue(netdev, iq->q_index);
+			INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq_num,
+						  tx_restart, 1);
+		} else {
+			if (!octnet_iq_is_full(oct, lio->txq)) {
+				INCR_INSTRQUEUE_PKT_COUNT(
+				    lio->oct_dev, lio->txq, tx_restart, 1);
+				wake_q(netdev, lio->txq);
+			}
+		}
+	}
+}
+
+static
+int liquidio_schedule_msix_droq_pkt_handler(struct octeon_droq *droq, u64 ret)
+{
+	struct octeon_device *oct = droq->oct_dev;
+	struct octeon_device_priv *oct_priv =
+	    (struct octeon_device_priv *)oct->priv;
+
+	if (droq->ops.poll_mode) {
+		droq->ops.napi_fn(droq);
+	} else {
+		if (ret & MSIX_PO_INT) {
+			dev_err(&oct->pci_dev->dev,
+				"should not come here should not get rx when poll mode = 0 for vf\n");
+			tasklet_schedule(&oct_priv->droq_tasklet);
+			return 1;
+		}
+		/* this will be flushed periodically by check iq db */
+		if (ret & MSIX_PI_INT)
+			return 0;
+	}
+	return 0;
+}
+
+static irqreturn_t
+liquidio_msix_intr_handler(int irq __attribute__((unused)), void *dev)
+{
+	struct octeon_ioq_vector *ioq_vector = (struct octeon_ioq_vector *)dev;
+	struct octeon_device *oct = ioq_vector->oct_dev;
+	struct octeon_droq *droq = oct->droq[ioq_vector->droq_index];
+	u64 ret;
+
+	ret = oct->fn_list.msix_interrupt_handler(ioq_vector);
+
+	if ((ret & MSIX_PO_INT) || (ret & MSIX_PI_INT))
+		liquidio_schedule_msix_droq_pkt_handler(droq, ret);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * \brief Setup interrupt for octeon device
+ * @param oct octeon device
+ *
+ *  Enable interrupt in Octeon device as given in the PCI interrupt mask.
+ */
+static int octeon_setup_interrupt(struct octeon_device *oct)
+{
+	struct msix_entry *msix_entries;
+	int num_alloc_ioq_vectors;
+	int num_ioq_vectors;
+	int irqret;
+	int i;
+
+	if (oct->msix_on) {
+		oct->num_msix_irqs = oct->sriov_info.rings_per_vf;
+
+		oct->msix_entries = kcalloc(
+		    oct->num_msix_irqs, sizeof(struct msix_entry), GFP_KERNEL);
+		if (!oct->msix_entries)
+			return 1;
+
+		msix_entries = (struct msix_entry *)oct->msix_entries;
+
+		for (i = 0; i < oct->num_msix_irqs; i++)
+			msix_entries[i].entry = i;
+		num_alloc_ioq_vectors = pci_enable_msix_range(
+						oct->pci_dev, msix_entries,
+						oct->num_msix_irqs,
+						oct->num_msix_irqs);
+		if (num_alloc_ioq_vectors < 0) {
+			dev_err(&oct->pci_dev->dev, "unable to Allocate MSI-X interrupts\n");
+			kfree(oct->msix_entries);
+			oct->msix_entries = NULL;
+			return 1;
+		}
+		dev_dbg(&oct->pci_dev->dev, "OCTEON: Enough MSI-X interrupts are allocated...\n");
+
+		num_ioq_vectors = oct->num_msix_irqs;
+
+		for (i = 0; i < num_ioq_vectors; i++) {
+			irqret = request_irq(msix_entries[i].vector,
+					     liquidio_msix_intr_handler, 0,
+					     "octeon", &oct->ioq_vector[i]);
+			if (irqret) {
+				dev_err(&oct->pci_dev->dev,
+					"OCTEON: Request_irq failed for MSIX interrupt Error: %d\n",
+					irqret);
+
+				while (i) {
+					i--;
+					irq_set_affinity_hint(
+					    msix_entries[i].vector, NULL);
+					free_irq(msix_entries[i].vector,
+						 &oct->ioq_vector[i]);
+				}
+				pci_disable_msix(oct->pci_dev);
+				kfree(oct->msix_entries);
+				oct->msix_entries = NULL;
+				return 1;
+			}
+			oct->ioq_vector[i].vector = msix_entries[i].vector;
+			/* assign the cpu mask for this msix interrupt vector */
+			irq_set_affinity_hint(
+			    msix_entries[i].vector,
+			    (&oct->ioq_vector[i].affinity_mask));
+		}
+		dev_dbg(&oct->pci_dev->dev,
+			"OCTEON[%d]: MSI-X enabled\n", oct->octeon_id);
+	}
+	return 0;
+}
+
+/**
+ * \brief PCI probe handler
+ * @param pdev PCI device structure
+ * @param ent unused
+ */
+static int
+liquidio_vf_probe(struct pci_dev *pdev,
+		  const struct pci_device_id *ent __attribute__((unused)))
+{
+	struct octeon_device *oct_dev = NULL;
+
+	oct_dev = octeon_allocate_device(pdev->device,
+					 sizeof(struct octeon_device_priv));
+
+	if (!oct_dev) {
+		dev_err(&pdev->dev, "Unable to allocate device\n");
+		return -ENOMEM;
+	}
+	oct_dev->msix_on = LIO_FLAG_MSIX_ENABLED;
+
+	dev_info(&pdev->dev, "Initializing device %x:%x.\n",
+		 (u32)pdev->vendor, (u32)pdev->device);
+
+	/* Assign octeon_device for this device to the private data area. */
+	pci_set_drvdata(pdev, oct_dev);
+
+	/* set linux specific device pointer */
+	oct_dev->pci_dev = pdev;
+
+	if (octeon_device_init(oct_dev)) {
+		liquidio_vf_remove(pdev);
+		return -ENOMEM;
+	}
+
+	dev_dbg(&oct_dev->pci_dev->dev, "Device is ready\n");
+
+	return 0;
+}
+
+/**
+ * \brief PCI FLR for each Octeon device.
+ * @param oct octeon device
+ */
+static void octeon_pci_flr(struct octeon_device *oct)
+{
+	u16 status;
+
+	pci_save_state(oct->pci_dev);
+
+	pci_cfg_access_lock(oct->pci_dev);
+
+	/* Quiesce the device completely */
+	pci_write_config_word(oct->pci_dev, PCI_COMMAND,
+			      PCI_COMMAND_INTX_DISABLE);
+
+	/* Wait for Transaction Pending bit clean */
+	msleep(100);
+	pcie_capability_read_word(oct->pci_dev, PCI_EXP_DEVSTA, &status);
+	if (status & PCI_EXP_DEVSTA_TRPND) {
+		dev_info(&oct->pci_dev->dev, "Function reset incomplete after 100ms, sleeping for 5 seconds\n");
+		ssleep(5);
+		pcie_capability_read_word(oct->pci_dev, PCI_EXP_DEVSTA,
+					  &status);
+		if (status & PCI_EXP_DEVSTA_TRPND)
+			dev_info(&oct->pci_dev->dev, "Function reset still incomplete after 5s, reset anyway\n");
+	}
+	pcie_capability_set_word(oct->pci_dev, PCI_EXP_DEVCTL,
+				 PCI_EXP_DEVCTL_BCR_FLR);
+	mdelay(100);
+
+	pci_cfg_access_unlock(oct->pci_dev);
+
+	pci_restore_state(oct->pci_dev);
+}
+
+/**
+ *\brief Destroy resources associated with octeon device
+ * @param pdev PCI device structure
+ * @param ent unused
+ */
+static void octeon_destroy_resources(struct octeon_device *oct)
+{
+	struct msix_entry *msix_entries;
+	int i;
+
+	switch (atomic_read(&oct->status)) {
+	case OCT_DEV_RUNNING:
+	case OCT_DEV_CORE_OK:
+		/* No more instructions will be forwarded. */
+		atomic_set(&oct->status, OCT_DEV_IN_RESET);
+
+		oct->app_mode = CVM_DRV_INVALID_APP;
+		dev_dbg(&oct->pci_dev->dev, "Device state is now %s\n",
+			lio_get_state_string(&oct->status));
+
+		schedule_timeout_uninterruptible(HZ / 10);
+
+		/* fallthrough */
+	case OCT_DEV_HOST_OK:
+		/* fallthrough */
+	case OCT_DEV_IO_QUEUES_DONE:
+		if (wait_for_pending_requests(oct))
+			dev_err(&oct->pci_dev->dev, "There were pending requests\n");
+
+		if (lio_wait_for_instr_fetch(oct))
+			dev_err(&oct->pci_dev->dev, "IQ had pending instructions\n");
+
+		/* Disable the input and output queues now. No more packets will
+		 * arrive from Octeon, but we should wait for all packet
+		 * processing to finish.
+		 */
+		oct->fn_list.disable_io_queues(oct);
+
+		if (lio_wait_for_oq_pkts(oct))
+			dev_err(&oct->pci_dev->dev, "OQ had pending packets\n");
+
+	case OCT_DEV_INTR_SET_DONE:
+		/* Disable interrupts  */
+		oct->fn_list.disable_interrupt(oct, OCTEON_ALL_INTR);
+
+		if (oct->msix_on) {
+			msix_entries = (struct msix_entry *)oct->msix_entries;
+			for (i = 0; i < oct->num_msix_irqs; i++) {
+				irq_set_affinity_hint(msix_entries[i].vector,
+						      NULL);
+				free_irq(msix_entries[i].vector,
+					 &oct->ioq_vector[i]);
+			}
+			pci_disable_msix(oct->pci_dev);
+			kfree(oct->msix_entries);
+			oct->msix_entries = NULL;
+		}
+		/* Soft reset the octeon device before exiting */
+		if (oct->pci_dev->reset_fn)
+			octeon_pci_flr(oct);
+		else
+			cn23xx_vf_ask_pf_to_do_flr(oct);
+
+		/* fallthrough */
+	case OCT_DEV_MSIX_ALLOC_VECTOR_DONE:
+		octeon_free_ioq_vector(oct);
+
+		/* fallthrough */
+	case OCT_DEV_MBOX_SETUP_DONE:
+		oct->fn_list.free_mbox(oct);
+
+		/* fallthrough */
+	case OCT_DEV_IN_RESET:
+	case OCT_DEV_DROQ_INIT_DONE:
+		mdelay(100);
+		for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) {
+			if (!(oct->io_qmask.oq & BIT_ULL(i)))
+				continue;
+			octeon_delete_droq(oct, i);
+		}
+
+		/* fallthrough */
+	case OCT_DEV_RESP_LIST_INIT_DONE:
+		octeon_delete_response_list(oct);
+
+		/* fallthrough */
+	case OCT_DEV_INSTR_QUEUE_INIT_DONE:
+		for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) {
+			if (!(oct->io_qmask.iq & BIT_ULL(i)))
+				continue;
+			octeon_delete_instr_queue(oct, i);
+		}
+
+		/* fallthrough */
+	case OCT_DEV_SC_BUFF_POOL_INIT_DONE:
+		octeon_free_sc_buffer_pool(oct);
+
+		/* fallthrough */
+	case OCT_DEV_DISPATCH_INIT_DONE:
+		octeon_delete_dispatch_list(oct);
+		cancel_delayed_work_sync(&oct->nic_poll_work.work);
+
+		/* fallthrough */
+	case OCT_DEV_PCI_MAP_DONE:
+		octeon_unmap_pci_barx(oct, 0);
+		octeon_unmap_pci_barx(oct, 1);
+
+		/* fallthrough */
+	case OCT_DEV_PCI_ENABLE_DONE:
+		pci_clear_master(oct->pci_dev);
+		/* Disable the device, releasing the PCI INT */
+		pci_disable_device(oct->pci_dev);
+
+		/* fallthrough */
+	case OCT_DEV_BEGIN_STATE:
+		/* Nothing to be done here either */
+		break;
+	}
+}
+
+/**
+ * \brief Callback for rx ctrl
+ * @param status status of request
+ * @param buf pointer to resp structure
+ */
+static void rx_ctl_callback(struct octeon_device *oct,
+			    u32 status, void *buf)
+{
+	struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
+	struct liquidio_rx_ctl_context *ctx;
+
+	ctx  = (struct liquidio_rx_ctl_context *)sc->ctxptr;
+
+	oct = lio_get_device(ctx->octeon_id);
+	if (status)
+		dev_err(&oct->pci_dev->dev, "rx ctl instruction failed. Status: %llx\n",
+			CVM_CAST64(status));
+	WRITE_ONCE(ctx->cond, 1);
+
+	/* This barrier is required to be sure that the response has been
+	 * written fully before waking up the handler
+	 */
+	wmb();
+
+	wake_up_interruptible(&ctx->wc);
+}
+
+/**
+ * \brief Send Rx control command
+ * @param lio per-network private data
+ * @param start_stop whether to start or stop
+ */
+static void send_rx_ctrl_cmd(struct lio *lio, int start_stop)
+{
+	struct octeon_device *oct = (struct octeon_device *)lio->oct_dev;
+	int ctx_size = sizeof(struct liquidio_rx_ctl_context);
+	struct liquidio_rx_ctl_context *ctx;
+	struct octeon_soft_command *sc;
+	union octnet_cmd *ncmd;
+	int retval;
+
+	if (oct->props[lio->ifidx].rx_on == start_stop)
+		return;
+
+	sc = (struct octeon_soft_command *)
+		octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE,
+					  16, ctx_size);
+
+	ncmd = (union octnet_cmd *)sc->virtdptr;
+	ctx  = (struct liquidio_rx_ctl_context *)sc->ctxptr;
+
+	WRITE_ONCE(ctx->cond, 0);
+	ctx->octeon_id = lio_get_device_id(oct);
+	init_waitqueue_head(&ctx->wc);
+
+	ncmd->u64 = 0;
+	ncmd->s.cmd = OCTNET_CMD_RX_CTL;
+	ncmd->s.param1 = start_stop;
+
+	octeon_swap_8B_data((u64 *)ncmd, (OCTNET_CMD_SIZE >> 3));
+
+	sc->iq_no = lio->linfo.txpciq[0].s.q_no;
+
+	octeon_prepare_soft_command(oct, sc, OPCODE_NIC,
+				    OPCODE_NIC_CMD, 0, 0, 0);
+
+	sc->callback = rx_ctl_callback;
+	sc->callback_arg = sc;
+	sc->wait_time = 5000;
+
+	retval = octeon_send_soft_command(oct, sc);
+	if (retval == IQ_SEND_FAILED) {
+		netif_info(lio, rx_err, lio->netdev, "Failed to send RX Control message\n");
+	} else {
+		/* Sleep on a wait queue till the cond flag indicates that the
+		 * response arrived or timed-out.
+		 */
+		if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR)
+			return;
+		oct->props[lio->ifidx].rx_on = start_stop;
+	}
+
+	octeon_free_soft_command(oct, sc);
+}
+
+/**
+ * \brief Destroy NIC device interface
+ * @param oct octeon device
+ * @param ifidx which interface to destroy
+ *
+ * Cleanup associated with each interface for an Octeon device  when NIC
+ * module is being unloaded or if initialization fails during load.
+ */
+static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx)
+{
+	struct net_device *netdev = oct->props[ifidx].netdev;
+	struct napi_struct *napi, *n;
+	struct lio *lio;
+
+	if (!netdev) {
+		dev_err(&oct->pci_dev->dev, "%s No netdevice ptr for index %d\n",
+			__func__, ifidx);
+		return;
+	}
+
+	lio = GET_LIO(netdev);
+
+	dev_dbg(&oct->pci_dev->dev, "NIC device cleanup\n");
+
+	if (atomic_read(&lio->ifstate) & LIO_IFSTATE_RUNNING)
+		liquidio_stop(netdev);
+
+	if (oct->props[lio->ifidx].napi_enabled == 1) {
+		list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
+			napi_disable(napi);
+
+		oct->props[lio->ifidx].napi_enabled = 0;
+
+		oct->droq[0]->ops.poll_mode = 0;
+	}
+
+	if (atomic_read(&lio->ifstate) & LIO_IFSTATE_REGISTERED)
+		unregister_netdev(netdev);
+
+	cleanup_link_status_change_wq(netdev);
+
+	delete_glists(lio);
+
+	free_netdev(netdev);
+
+	oct->props[ifidx].gmxport = -1;
+
+	oct->props[ifidx].netdev = NULL;
+}
+
+/**
+ * \brief Stop complete NIC functionality
+ * @param oct octeon device
+ */
+static int liquidio_stop_nic_module(struct octeon_device *oct)
+{
+	struct lio *lio;
+	int i, j;
+
+	dev_dbg(&oct->pci_dev->dev, "Stopping network interfaces\n");
+	if (!oct->ifcount) {
+		dev_err(&oct->pci_dev->dev, "Init for Octeon was not completed\n");
+		return 1;
+	}
+
+	spin_lock_bh(&oct->cmd_resp_wqlock);
+	oct->cmd_resp_state = OCT_DRV_OFFLINE;
+	spin_unlock_bh(&oct->cmd_resp_wqlock);
+
+	for (i = 0; i < oct->ifcount; i++) {
+		lio = GET_LIO(oct->props[i].netdev);
+		for (j = 0; j < lio->linfo.num_rxpciq; j++)
+			octeon_unregister_droq_ops(oct,
+						   lio->linfo.rxpciq[j].s.q_no);
+	}
+
+	for (i = 0; i < oct->ifcount; i++)
+		liquidio_destroy_nic_device(oct, i);
+
+	dev_dbg(&oct->pci_dev->dev, "Network interfaces stopped\n");
+	return 0;
+}
+
+/**
+ * \brief Cleans up resources at unload time
+ * @param pdev PCI device structure
+ */
+static void liquidio_vf_remove(struct pci_dev *pdev)
+{
+	struct octeon_device *oct_dev = pci_get_drvdata(pdev);
+
+	dev_dbg(&oct_dev->pci_dev->dev, "Stopping device\n");
+
+	if (oct_dev->app_mode == CVM_DRV_NIC_APP)
+		liquidio_stop_nic_module(oct_dev);
+
+	/* Reset the octeon device and cleanup all memory allocated for
+	 * the octeon device by driver.
+	 */
+	octeon_destroy_resources(oct_dev);
+
+	dev_info(&oct_dev->pci_dev->dev, "Device removed\n");
+
+	/* This octeon device has been removed. Update the global
+	 * data structure to reflect this. Free the device structure.
+	 */
+	octeon_free_device_mem(oct_dev);
+}
+
+/**
+ * \brief PCI initialization for each Octeon device.
+ * @param oct octeon device
+ */
+static int octeon_pci_os_setup(struct octeon_device *oct)
+{
+#ifdef CONFIG_PCI_IOV
+	/* setup PCI stuff first */
+	if (!oct->pci_dev->physfn)
+		octeon_pci_flr(oct);
+#endif
+
+	if (pci_enable_device(oct->pci_dev)) {
+		dev_err(&oct->pci_dev->dev, "pci_enable_device failed\n");
+		return 1;
+	}
+
+	if (dma_set_mask_and_coherent(&oct->pci_dev->dev, DMA_BIT_MASK(64))) {
+		dev_err(&oct->pci_dev->dev, "Unexpected DMA device capability\n");
+		pci_disable_device(oct->pci_dev);
+		return 1;
+	}
+
+	/* Enable PCI DMA Master. */
+	pci_set_master(oct->pci_dev);
+
+	return 0;
+}
+
+static int skb_iq(struct lio *lio, struct sk_buff *skb)
+{
+	int q = 0;
+
+	if (netif_is_multiqueue(lio->netdev))
+		q = skb->queue_mapping % lio->linfo.num_txpciq;
+
+	return q;
+}
+
+/**
+ * \brief Check Tx queue state for a given network buffer
+ * @param lio per-network private data
+ * @param skb network buffer
+ */
+static int check_txq_state(struct lio *lio, struct sk_buff *skb)
+{
+	int q = 0, iq = 0;
+
+	if (netif_is_multiqueue(lio->netdev)) {
+		q = skb->queue_mapping;
+		iq = lio->linfo.txpciq[(q % (lio->linfo.num_txpciq))].s.q_no;
+	} else {
+		iq = lio->txq;
+		q = iq;
+	}
+
+	if (octnet_iq_is_full(lio->oct_dev, iq))
+		return 0;
+
+	if (__netif_subqueue_stopped(lio->netdev, q)) {
+		INCR_INSTRQUEUE_PKT_COUNT(lio->oct_dev, iq, tx_restart, 1);
+		wake_q(lio->netdev, q);
+	}
+
+	return 1;
+}
+
+/**
+ * \brief Unmap and free network buffer
+ * @param buf buffer
+ */
+static void free_netbuf(void *buf)
+{
+	struct octnet_buf_free_info *finfo;
+	struct sk_buff *skb;
+	struct lio *lio;
+
+	finfo = (struct octnet_buf_free_info *)buf;
+	skb = finfo->skb;
+	lio = finfo->lio;
+
+	dma_unmap_single(&lio->oct_dev->pci_dev->dev, finfo->dptr, skb->len,
+			 DMA_TO_DEVICE);
+
+	check_txq_state(lio, skb);
+
+	tx_buffer_free(skb);
+}
+
+/**
+ * \brief Unmap and free gather buffer
+ * @param buf buffer
+ */
+static void free_netsgbuf(void *buf)
+{
+	struct octnet_buf_free_info *finfo;
+	struct octnic_gather *g;
+	struct sk_buff *skb;
+	int i, frags, iq;
+	struct lio *lio;
+
+	finfo = (struct octnet_buf_free_info *)buf;
+	skb = finfo->skb;
+	lio = finfo->lio;
+	g = finfo->g;
+	frags = skb_shinfo(skb)->nr_frags;
+
+	dma_unmap_single(&lio->oct_dev->pci_dev->dev,
+			 g->sg[0].ptr[0], (skb->len - skb->data_len),
+			 DMA_TO_DEVICE);
+
+	i = 1;
+	while (frags--) {
+		struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1];
+
+		pci_unmap_page((lio->oct_dev)->pci_dev,
+			       g->sg[(i >> 2)].ptr[(i & 3)],
+			       frag->size, DMA_TO_DEVICE);
+		i++;
+	}
+
+	dma_unmap_single(&lio->oct_dev->pci_dev->dev,
+			 finfo->dptr, g->sg_size,
+			 DMA_TO_DEVICE);
+
+	iq = skb_iq(lio, skb);
+
+	spin_lock(&lio->glist_lock[iq]);
+	list_add_tail(&g->list, &lio->glist[iq]);
+	spin_unlock(&lio->glist_lock[iq]);
+
+	check_txq_state(lio, skb); /* mq support: sub-queue state check */
+
+	tx_buffer_free(skb);
+}
+
+/**
+ * \brief Unmap and free gather buffer with response
+ * @param buf buffer
+ */
+static void free_netsgbuf_with_resp(void *buf)
+{
+	struct octnet_buf_free_info *finfo;
+	struct octeon_soft_command *sc;
+	struct octnic_gather *g;
+	struct sk_buff *skb;
+	int i, frags, iq;
+	struct lio *lio;
+
+	sc = (struct octeon_soft_command *)buf;
+	skb = (struct sk_buff *)sc->callback_arg;
+	finfo = (struct octnet_buf_free_info *)&skb->cb;
+
+	lio = finfo->lio;
+	g = finfo->g;
+	frags = skb_shinfo(skb)->nr_frags;
+
+	dma_unmap_single(&lio->oct_dev->pci_dev->dev,
+			 g->sg[0].ptr[0], (skb->len - skb->data_len),
+			 DMA_TO_DEVICE);
+
+	i = 1;
+	while (frags--) {
+		struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1];
+
+		pci_unmap_page((lio->oct_dev)->pci_dev,
+			       g->sg[(i >> 2)].ptr[(i & 3)],
+			       frag->size, DMA_TO_DEVICE);
+		i++;
+	}
+
+	dma_unmap_single(&lio->oct_dev->pci_dev->dev,
+			 finfo->dptr, g->sg_size,
+			 DMA_TO_DEVICE);
+
+	iq = skb_iq(lio, skb);
+
+	spin_lock(&lio->glist_lock[iq]);
+	list_add_tail(&g->list, &lio->glist[iq]);
+	spin_unlock(&lio->glist_lock[iq]);
+
+	/* Don't free the skb yet */
+
+	check_txq_state(lio, skb);
+}
+
+/**
+ * \brief Setup output queue
+ * @param oct octeon device
+ * @param q_no which queue
+ * @param num_descs how many descriptors
+ * @param desc_size size of each descriptor
+ * @param app_ctx application context
+ */
+static int octeon_setup_droq(struct octeon_device *oct, int q_no, int num_descs,
+			     int desc_size, void *app_ctx)
+{
+	int ret_val;
+
+	dev_dbg(&oct->pci_dev->dev, "Creating Droq: %d\n", q_no);
+	/* droq creation and local register settings. */
+	ret_val = octeon_create_droq(oct, q_no, num_descs, desc_size, app_ctx);
+	if (ret_val < 0)
+		return ret_val;
+
+	if (ret_val == 1) {
+		dev_dbg(&oct->pci_dev->dev, "Using default droq %d\n", q_no);
+		return 0;
+	}
+
+	/* Enable the droq queues */
+	octeon_set_droq_pkt_op(oct, q_no, 1);
+
+	/* Send Credit for Octeon Output queues. Credits are always
+	 * sent after the output queue is enabled.
+	 */
+	writel(oct->droq[q_no]->max_count, oct->droq[q_no]->pkts_credit_reg);
+
+	return ret_val;
+}
+
+/**
+ * \brief Callback for getting interface configuration
+ * @param status status of request
+ * @param buf pointer to resp structure
+ */
+static void if_cfg_callback(struct octeon_device *oct,
+			    u32 status __attribute__((unused)), void *buf)
+{
+	struct octeon_soft_command *sc = (struct octeon_soft_command *)buf;
+	struct liquidio_if_cfg_context *ctx;
+	struct liquidio_if_cfg_resp *resp;
+
+	resp = (struct liquidio_if_cfg_resp *)sc->virtrptr;
+	ctx = (struct liquidio_if_cfg_context *)sc->ctxptr;
+
+	oct = lio_get_device(ctx->octeon_id);
+	if (resp->status)
+		dev_err(&oct->pci_dev->dev, "nic if cfg instruction failed. Status: %llx\n",
+			CVM_CAST64(resp->status));
+	WRITE_ONCE(ctx->cond, 1);
+
+	snprintf(oct->fw_info.liquidio_firmware_version, 32, "%s",
+		 resp->cfg_info.liquidio_firmware_version);
+
+	/* This barrier is required to be sure that the response has been
+	 * written fully before waking up the handler
+	 */
+	wmb();
+
+	wake_up_interruptible(&ctx->wc);
+}
+
+/**
+ * \brief Select queue based on hash
+ * @param dev Net device
+ * @param skb sk_buff structure
+ * @returns selected queue number
+ */
+static u16 select_q(struct net_device *dev, struct sk_buff *skb,
+		    void *accel_priv __attribute__((unused)),
+		    select_queue_fallback_t fallback __attribute__((unused)))
+{
+	struct lio *lio;
+	u32 qindex;
+
+	lio = GET_LIO(dev);
+
+	qindex = skb_tx_hash(dev, skb);
+
+	return (u16)(qindex % (lio->linfo.num_txpciq));
+}
+
+/** Routine to push packets arriving on Octeon interface upto network layer.
+ * @param oct_id   - octeon device id.
+ * @param skbuff   - skbuff struct to be passed to network layer.
+ * @param len      - size of total data received.
+ * @param rh       - Control header associated with the packet
+ * @param param    - additional control data with the packet
+ * @param arg      - farg registered in droq_ops
+ */
+static void
+liquidio_push_packet(u32 octeon_id __attribute__((unused)),
+		     void *skbuff,
+		     u32 len,
+		     union octeon_rh *rh,
+		     void *param,
+		     void *arg)
+{
+	struct napi_struct *napi = param;
+	struct octeon_droq *droq =
+		container_of(param, struct octeon_droq, napi);
+	struct net_device *netdev = (struct net_device *)arg;
+	struct sk_buff *skb = (struct sk_buff *)skbuff;
+	u16 vtag = 0;
+
+	if (netdev) {
+		struct lio *lio = GET_LIO(netdev);
+		int packet_was_received;
+
+		/* Do not proceed if the interface is not in RUNNING state. */
+		if (!ifstate_check(lio, LIO_IFSTATE_RUNNING)) {
+			recv_buffer_free(skb);
+			droq->stats.rx_dropped++;
+			return;
+		}
+
+		skb->dev = netdev;
+
+		skb_record_rx_queue(skb, droq->q_no);
+		if (likely(len > MIN_SKB_SIZE)) {
+			struct octeon_skb_page_info *pg_info;
+			unsigned char *va;
+
+			pg_info = ((struct octeon_skb_page_info *)(skb->cb));
+			if (pg_info->page) {
+				/* For Paged allocation use the frags */
+				va = page_address(pg_info->page) +
+					pg_info->page_offset;
+				memcpy(skb->data, va, MIN_SKB_SIZE);
+				skb_put(skb, MIN_SKB_SIZE);
+				skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+						pg_info->page,
+						pg_info->page_offset +
+						MIN_SKB_SIZE,
+						len - MIN_SKB_SIZE,
+						LIO_RXBUFFER_SZ);
+			}
+		} else {
+			struct octeon_skb_page_info *pg_info =
+				((struct octeon_skb_page_info *)(skb->cb));
+			skb_copy_to_linear_data(skb,
+						page_address(pg_info->page) +
+						pg_info->page_offset, len);
+			skb_put(skb, len);
+			put_page(pg_info->page);
+		}
+
+		skb_pull(skb, rh->r_dh.len * 8);
+		skb->protocol = eth_type_trans(skb, skb->dev);
+
+		if ((netdev->features & NETIF_F_RXCSUM) &&
+		    (((rh->r_dh.encap_on) &&
+		      (rh->r_dh.csum_verified & CNNIC_TUN_CSUM_VERIFIED)) ||
+		     (!(rh->r_dh.encap_on) &&
+		      (rh->r_dh.csum_verified & CNNIC_CSUM_VERIFIED))))
+			/* checksum has already been verified */
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+		else
+			skb->ip_summed = CHECKSUM_NONE;
+
+		/* Setting Encapsulation field on basis of status received
+		 * from the firmware
+		 */
+		if (rh->r_dh.encap_on) {
+			skb->encapsulation = 1;
+			skb->csum_level = 1;
+			droq->stats.rx_vxlan++;
+		}
+
+		/* inbound VLAN tag */
+		if ((netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
+		    rh->r_dh.vlan) {
+			u16 priority = rh->r_dh.priority;
+			u16 vid = rh->r_dh.vlan;
+
+			vtag = (priority << VLAN_PRIO_SHIFT) | vid;
+			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vtag);
+		}
+
+		packet_was_received = (napi_gro_receive(napi, skb) != GRO_DROP);
+
+		if (packet_was_received) {
+			droq->stats.rx_bytes_received += len;
+			droq->stats.rx_pkts_received++;
+			netdev->last_rx = jiffies;
+		} else {
+			droq->stats.rx_dropped++;
+			netif_info(lio, rx_err, lio->netdev,
+				   "droq:%d  error rx_dropped:%llu\n",
+				   droq->q_no, droq->stats.rx_dropped);
+		}
+
+	} else {
+		recv_buffer_free(skb);
+	}
+}
+
+/**
+ * \brief callback when receive interrupt occurs and we are in NAPI mode
+ * @param arg pointer to octeon output queue
+ */
+static void liquidio_vf_napi_drv_callback(void *arg)
+{
+	struct octeon_droq *droq = arg;
+
+	napi_schedule_irqoff(&droq->napi);
+}
+
+/**
+ * \brief Entry point for NAPI polling
+ * @param napi NAPI structure
+ * @param budget maximum number of items to process
+ */
+static int liquidio_napi_poll(struct napi_struct *napi, int budget)
+{
+	struct octeon_instr_queue *iq;
+	struct octeon_device *oct;
+	struct octeon_droq *droq;
+	int tx_done = 0, iq_no;
+	int work_done;
+
+	droq = container_of(napi, struct octeon_droq, napi);
+	oct = droq->oct_dev;
+	iq_no = droq->q_no;
+
+	/* Handle Droq descriptors */
+	work_done = octeon_process_droq_poll_cmd(oct, droq->q_no,
+						 POLL_EVENT_PROCESS_PKTS,
+						 budget);
+
+	/* Flush the instruction queue */
+	iq = oct->instr_queue[iq_no];
+	if (iq) {
+		/* Process iq buffers with in the budget limits */
+		tx_done = octeon_flush_iq(oct, iq, 1, budget);
+		/* Update iq read-index rather than waiting for next interrupt.
+		 * Return back if tx_done is false.
+		 */
+		update_txq_status(oct, iq_no);
+	} else {
+		dev_err(&oct->pci_dev->dev, "%s: iq (%d) num invalid\n",
+			__func__, iq_no);
+	}
+
+	if ((work_done < budget) && (tx_done)) {
+		napi_complete(napi);
+		octeon_process_droq_poll_cmd(droq->oct_dev, droq->q_no,
+					     POLL_EVENT_ENABLE_INTR, 0);
+		return 0;
+	}
+
+	return (!tx_done) ? (budget) : (work_done);
+}
+
+/**
+ * \brief Setup input and output queues
+ * @param octeon_dev octeon device
+ * @param ifidx Interface index
+ *
+ * Note: Queues are with respect to the octeon device. Thus
+ * an input queue is for egress packets, and output queues
+ * are for ingress packets.
+ */
+static int setup_io_queues(struct octeon_device *octeon_dev, int ifidx)
+{
+	struct octeon_droq_ops droq_ops;
+	struct net_device *netdev;
+	static int cpu_id_modulus;
+	struct octeon_droq *droq;
+	struct napi_struct *napi;
+	static int cpu_id;
+	int num_tx_descs;
+	struct lio *lio;
+	int retval = 0;
+	int q, q_no;
+
+	netdev = octeon_dev->props[ifidx].netdev;
+
+	lio = GET_LIO(netdev);
+
+	memset(&droq_ops, 0, sizeof(struct octeon_droq_ops));
+
+	droq_ops.fptr = liquidio_push_packet;
+	droq_ops.farg = netdev;
+
+	droq_ops.poll_mode = 1;
+	droq_ops.napi_fn = liquidio_vf_napi_drv_callback;
+	cpu_id = 0;
+	cpu_id_modulus = num_present_cpus();
+
+	/* set up DROQs. */
+	for (q = 0; q < lio->linfo.num_rxpciq; q++) {
+		q_no = lio->linfo.rxpciq[q].s.q_no;
+
+		retval = octeon_setup_droq(
+		    octeon_dev, q_no,
+		    CFG_GET_NUM_RX_DESCS_NIC_IF(octeon_get_conf(octeon_dev),
+						lio->ifidx),
+		    CFG_GET_NUM_RX_BUF_SIZE_NIC_IF(octeon_get_conf(octeon_dev),
+						   lio->ifidx),
+		    NULL);
+		if (retval) {
+			dev_err(&octeon_dev->pci_dev->dev,
+				"%s : Runtime DROQ(RxQ) creation failed.\n",
+				__func__);
+			return 1;
+		}
+
+		droq = octeon_dev->droq[q_no];
+		napi = &droq->napi;
+		netif_napi_add(netdev, napi, liquidio_napi_poll, 64);
+
+		/* designate a CPU for this droq */
+		droq->cpu_id = cpu_id;
+		cpu_id++;
+		if (cpu_id >= cpu_id_modulus)
+			cpu_id = 0;
+
+		octeon_register_droq_ops(octeon_dev, q_no, &droq_ops);
+	}
+
+	/* 23XX VF can send/recv control messages (via the first VF-owned
+	 * droq) from the firmware even if the ethX interface is down,
+	 * so that's why poll_mode must be off for the first droq.
+	 */
+	octeon_dev->droq[0]->ops.poll_mode = 0;
+
+	/* set up IQs. */
+	for (q = 0; q < lio->linfo.num_txpciq; q++) {
+		num_tx_descs = CFG_GET_NUM_TX_DESCS_NIC_IF(
+		    octeon_get_conf(octeon_dev), lio->ifidx);
+		retval = octeon_setup_iq(octeon_dev, ifidx, q,
+					 lio->linfo.txpciq[q], num_tx_descs,
+					 netdev_get_tx_queue(netdev, q));
+		if (retval) {
+			dev_err(&octeon_dev->pci_dev->dev,
+				" %s : Runtime IQ(TxQ) creation failed.\n",
+				__func__);
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * \brief Net device open for LiquidIO
+ * @param netdev network device
+ */
+static int liquidio_open(struct net_device *netdev)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	struct napi_struct *napi, *n;
+
+	if (!oct->props[lio->ifidx].napi_enabled) {
+		list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list)
+			napi_enable(napi);
+
+		oct->props[lio->ifidx].napi_enabled = 1;
+
+		oct->droq[0]->ops.poll_mode = 1;
+	}
+
+	ifstate_set(lio, LIO_IFSTATE_RUNNING);
+
+	/* Ready for link status updates */
+	lio->intf_open = 1;
+
+	netif_info(lio, ifup, lio->netdev, "Interface Open, ready for traffic\n");
+	start_txq(netdev);
+
+	/* tell Octeon to start forwarding packets to host */
+	send_rx_ctrl_cmd(lio, 1);
+
+	dev_info(&oct->pci_dev->dev, "%s interface is opened\n", netdev->name);
+
+	return 0;
+}
+
+/**
+ * \brief Net device stop for LiquidIO
+ * @param netdev network device
+ */
+static int liquidio_stop(struct net_device *netdev)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+
+	netif_info(lio, ifdown, lio->netdev, "Stopping interface!\n");
+	/* Inform that netif carrier is down */
+	lio->intf_open = 0;
+	lio->linfo.link.s.link_up = 0;
+
+	netif_carrier_off(netdev);
+	lio->link_changes++;
+
+	/* tell Octeon to stop forwarding packets to host */
+	send_rx_ctrl_cmd(lio, 0);
+
+	ifstate_reset(lio, LIO_IFSTATE_RUNNING);
+
+	txqs_stop(netdev);
+
+	dev_info(&oct->pci_dev->dev, "%s interface is stopped\n", netdev->name);
+
+	return 0;
+}
+
+/**
+ * \brief Converts a mask based on net device flags
+ * @param netdev network device
+ *
+ * This routine generates a octnet_ifflags mask from the net device flags
+ * received from the OS.
+ */
+static enum octnet_ifflags get_new_flags(struct net_device *netdev)
+{
+	enum octnet_ifflags f = OCTNET_IFFLAG_UNICAST;
+
+	if (netdev->flags & IFF_PROMISC)
+		f |= OCTNET_IFFLAG_PROMISC;
+
+	if (netdev->flags & IFF_ALLMULTI)
+		f |= OCTNET_IFFLAG_ALLMULTI;
+
+	if (netdev->flags & IFF_MULTICAST) {
+		f |= OCTNET_IFFLAG_MULTICAST;
+
+		/* Accept all multicast addresses if there are more than we
+		 * can handle
+		 */
+		if (netdev_mc_count(netdev) > MAX_OCTEON_MULTICAST_ADDR)
+			f |= OCTNET_IFFLAG_ALLMULTI;
+	}
+
+	if (netdev->flags & IFF_BROADCAST)
+		f |= OCTNET_IFFLAG_BROADCAST;
+
+	return f;
+}
+
+static void liquidio_set_uc_list(struct net_device *netdev)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	struct octnic_ctrl_pkt nctrl;
+	struct netdev_hw_addr *ha;
+	u64 *mac;
+
+	if (lio->netdev_uc_count == netdev_uc_count(netdev))
+		return;
+
+	if (netdev_uc_count(netdev) > MAX_NCTRL_UDD) {
+		dev_err(&oct->pci_dev->dev, "too many MAC addresses in netdev uc list\n");
+		return;
+	}
+
+	lio->netdev_uc_count = netdev_uc_count(netdev);
+
+	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+	nctrl.ncmd.s.cmd = OCTNET_CMD_SET_UC_LIST;
+	nctrl.ncmd.s.more = lio->netdev_uc_count;
+	nctrl.ncmd.s.param1 = oct->vf_num;
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+	nctrl.netpndev = (u64)netdev;
+	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
+
+	/* copy all the addresses into the udd */
+	mac = &nctrl.udd[0];
+	netdev_for_each_uc_addr(ha, netdev) {
+		ether_addr_copy(((u8 *)mac) + 2, ha->addr);
+		mac++;
+	}
+
+	octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
+}
+
+/**
+ * \brief Net device set_multicast_list
+ * @param netdev network device
+ */
+static void liquidio_set_mcast_list(struct net_device *netdev)
+{
+	int mc_count = min(netdev_mc_count(netdev), MAX_OCTEON_MULTICAST_ADDR);
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	struct octnic_ctrl_pkt nctrl;
+	struct netdev_hw_addr *ha;
+	u64 *mc;
+	int ret;
+
+	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+
+	/* Create a ctrl pkt command to be sent to core app. */
+	nctrl.ncmd.u64 = 0;
+	nctrl.ncmd.s.cmd = OCTNET_CMD_SET_MULTI_LIST;
+	nctrl.ncmd.s.param1 = get_new_flags(netdev);
+	nctrl.ncmd.s.param2 = mc_count;
+	nctrl.ncmd.s.more = mc_count;
+	nctrl.netpndev = (u64)netdev;
+	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
+
+	/* copy all the addresses into the udd */
+	mc = &nctrl.udd[0];
+	netdev_for_each_mc_addr(ha, netdev) {
+		*mc = 0;
+		ether_addr_copy(((u8 *)mc) + 2, ha->addr);
+		/* no need to swap bytes */
+		if (++mc > &nctrl.udd[mc_count])
+			break;
+	}
+
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+
+	/* Apparently, any activity in this call from the kernel has to
+	 * be atomic. So we won't wait for response.
+	 */
+	nctrl.wait_time = 0;
+
+	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
+	if (ret < 0) {
+		dev_err(&oct->pci_dev->dev, "DEVFLAGS change failed in core (ret: 0x%x)\n",
+			ret);
+	}
+
+	liquidio_set_uc_list(netdev);
+}
+
+/**
+ * \brief Net device set_mac_address
+ * @param netdev network device
+ */
+static int liquidio_set_mac(struct net_device *netdev, void *p)
+{
+	struct sockaddr *addr = (struct sockaddr *)p;
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	struct octnic_ctrl_pkt nctrl;
+	int ret = 0;
+
+	if (!is_valid_ether_addr(addr->sa_data))
+		return -EADDRNOTAVAIL;
+
+	if (ether_addr_equal(addr->sa_data, netdev->dev_addr))
+		return 0;
+
+	if (lio->linfo.macaddr_is_admin_asgnd)
+		return -EPERM;
+
+	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+
+	nctrl.ncmd.u64 = 0;
+	nctrl.ncmd.s.cmd = OCTNET_CMD_CHANGE_MACADDR;
+	nctrl.ncmd.s.param1 = 0;
+	nctrl.ncmd.s.more = 1;
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+	nctrl.netpndev = (u64)netdev;
+	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
+	nctrl.wait_time = 100;
+
+	nctrl.udd[0] = 0;
+	/* The MAC Address is presented in network byte order. */
+	ether_addr_copy((u8 *)&nctrl.udd[0] + 2, addr->sa_data);
+
+	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
+	if (ret < 0) {
+		dev_err(&oct->pci_dev->dev, "MAC Address change failed\n");
+		return -ENOMEM;
+	}
+	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
+	ether_addr_copy(((u8 *)&lio->linfo.hw_addr) + 2, addr->sa_data);
+
+	return 0;
+}
+
+/**
+ * \brief Net device get_stats
+ * @param netdev network device
+ */
+static struct net_device_stats *liquidio_get_stats(struct net_device *netdev)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct net_device_stats *stats = &netdev->stats;
+	u64 pkts = 0, drop = 0, bytes = 0;
+	struct oct_droq_stats *oq_stats;
+	struct oct_iq_stats *iq_stats;
+	struct octeon_device *oct;
+	int i, iq_no, oq_no;
+
+	oct = lio->oct_dev;
+
+	for (i = 0; i < lio->linfo.num_txpciq; i++) {
+		iq_no = lio->linfo.txpciq[i].s.q_no;
+		iq_stats = &oct->instr_queue[iq_no]->stats;
+		pkts += iq_stats->tx_done;
+		drop += iq_stats->tx_dropped;
+		bytes += iq_stats->tx_tot_bytes;
+	}
+
+	stats->tx_packets = pkts;
+	stats->tx_bytes = bytes;
+	stats->tx_dropped = drop;
+
+	pkts = 0;
+	drop = 0;
+	bytes = 0;
+
+	for (i = 0; i < lio->linfo.num_rxpciq; i++) {
+		oq_no = lio->linfo.rxpciq[i].s.q_no;
+		oq_stats = &oct->droq[oq_no]->stats;
+		pkts += oq_stats->rx_pkts_received;
+		drop += (oq_stats->rx_dropped +
+			 oq_stats->dropped_nodispatch +
+			 oq_stats->dropped_toomany +
+			 oq_stats->dropped_nomem);
+		bytes += oq_stats->rx_bytes_received;
+	}
+
+	stats->rx_bytes = bytes;
+	stats->rx_packets = pkts;
+	stats->rx_dropped = drop;
+
+	return stats;
+}
+
+/**
+ * \brief Net device change_mtu
+ * @param netdev network device
+ */
+static int liquidio_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+
+	lio->mtu = new_mtu;
+
+	netif_info(lio, probe, lio->netdev, "MTU Changed from %d to %d\n",
+		   netdev->mtu, new_mtu);
+	dev_info(&oct->pci_dev->dev, "%s MTU Changed from %d to %d\n",
+		 netdev->name, netdev->mtu, new_mtu);
+
+	netdev->mtu = new_mtu;
+
+	return 0;
+}
+
+/**
+ * \brief Handler for SIOCSHWTSTAMP ioctl
+ * @param netdev network device
+ * @param ifr interface request
+ * @param cmd command
+ */
+static int hwtstamp_ioctl(struct net_device *netdev, struct ifreq *ifr)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct hwtstamp_config conf;
+
+	if (copy_from_user(&conf, ifr->ifr_data, sizeof(conf)))
+		return -EFAULT;
+
+	if (conf.flags)
+		return -EINVAL;
+
+	switch (conf.tx_type) {
+	case HWTSTAMP_TX_ON:
+	case HWTSTAMP_TX_OFF:
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	switch (conf.rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		break;
+	case HWTSTAMP_FILTER_ALL:
+	case HWTSTAMP_FILTER_SOME:
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+		conf.rx_filter = HWTSTAMP_FILTER_ALL;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	if (conf.rx_filter == HWTSTAMP_FILTER_ALL)
+		ifstate_set(lio, LIO_IFSTATE_RX_TIMESTAMP_ENABLED);
+
+	else
+		ifstate_reset(lio, LIO_IFSTATE_RX_TIMESTAMP_ENABLED);
+
+	return copy_to_user(ifr->ifr_data, &conf, sizeof(conf)) ? -EFAULT : 0;
+}
+
+/**
+ * \brief ioctl handler
+ * @param netdev network device
+ * @param ifr interface request
+ * @param cmd command
+ */
+static int liquidio_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
+{
+	switch (cmd) {
+	case SIOCSHWTSTAMP:
+		return hwtstamp_ioctl(netdev, ifr);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void handle_timestamp(struct octeon_device *oct, u32 status, void *buf)
+{
+	struct sk_buff *skb = (struct sk_buff *)buf;
+	struct octnet_buf_free_info *finfo;
+	struct oct_timestamp_resp *resp;
+	struct octeon_soft_command *sc;
+	struct lio *lio;
+
+	finfo = (struct octnet_buf_free_info *)skb->cb;
+	lio = finfo->lio;
+	sc = finfo->sc;
+	oct = lio->oct_dev;
+	resp = (struct oct_timestamp_resp *)sc->virtrptr;
+
+	if (status != OCTEON_REQUEST_DONE) {
+		dev_err(&oct->pci_dev->dev, "Tx timestamp instruction failed. Status: %llx\n",
+			CVM_CAST64(status));
+		resp->timestamp = 0;
+	}
+
+	octeon_swap_8B_data(&resp->timestamp, 1);
+
+	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) {
+		struct skb_shared_hwtstamps ts;
+		u64 ns = resp->timestamp;
+
+		netif_info(lio, tx_done, lio->netdev,
+			   "Got resulting SKBTX_HW_TSTAMP skb=%p ns=%016llu\n",
+			   skb, (unsigned long long)ns);
+		ts.hwtstamp = ns_to_ktime(ns + lio->ptp_adjust);
+		skb_tstamp_tx(skb, &ts);
+	}
+
+	octeon_free_soft_command(oct, sc);
+	tx_buffer_free(skb);
+}
+
+/* \brief Send a data packet that will be timestamped
+ * @param oct octeon device
+ * @param ndata pointer to network data
+ * @param finfo pointer to private network data
+ */
+static int send_nic_timestamp_pkt(struct octeon_device *oct,
+				  struct octnic_data_pkt *ndata,
+				  struct octnet_buf_free_info *finfo)
+{
+	struct octeon_soft_command *sc;
+	int ring_doorbell;
+	struct lio *lio;
+	int retval;
+	u32 len;
+
+	lio = finfo->lio;
+
+	sc = octeon_alloc_soft_command_resp(oct, &ndata->cmd,
+					    sizeof(struct oct_timestamp_resp));
+	finfo->sc = sc;
+
+	if (!sc) {
+		dev_err(&oct->pci_dev->dev, "No memory for timestamped data packet\n");
+		return IQ_SEND_FAILED;
+	}
+
+	if (ndata->reqtype == REQTYPE_NORESP_NET)
+		ndata->reqtype = REQTYPE_RESP_NET;
+	else if (ndata->reqtype == REQTYPE_NORESP_NET_SG)
+		ndata->reqtype = REQTYPE_RESP_NET_SG;
+
+	sc->callback = handle_timestamp;
+	sc->callback_arg = finfo->skb;
+	sc->iq_no = ndata->q_no;
+
+	len = (u32)((struct octeon_instr_ih3 *)(&sc->cmd.cmd3.ih3))->dlengsz;
+
+	ring_doorbell = 1;
+
+	retval = octeon_send_command(oct, sc->iq_no, ring_doorbell, &sc->cmd,
+				     sc, len, ndata->reqtype);
+
+	if (retval == IQ_SEND_FAILED) {
+		dev_err(&oct->pci_dev->dev, "timestamp data packet failed status: %x\n",
+			retval);
+		octeon_free_soft_command(oct, sc);
+	} else {
+		netif_info(lio, tx_queued, lio->netdev, "Queued timestamp packet\n");
+	}
+
+	return retval;
+}
+
+/** \brief Transmit networks packets to the Octeon interface
+ * @param skbuff   skbuff struct to be passed to network layer.
+ * @param netdev   pointer to network device
+ * @returns whether the packet was transmitted to the device okay or not
+ *             (NETDEV_TX_OK or NETDEV_TX_BUSY)
+ */
+static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev)
+{
+	struct octnet_buf_free_info *finfo;
+	union octnic_cmd_setup cmdsetup;
+	struct octnic_data_pkt ndata;
+	struct octeon_instr_irh *irh;
+	struct oct_iq_stats *stats;
+	struct octeon_device *oct;
+	int q_idx = 0, iq_no = 0;
+	union tx_info *tx_info;
+	struct lio *lio;
+	int status = 0;
+	u64 dptr = 0;
+	u32 tag = 0;
+	int j;
+
+	lio = GET_LIO(netdev);
+	oct = lio->oct_dev;
+
+	if (netif_is_multiqueue(netdev)) {
+		q_idx = skb->queue_mapping;
+		q_idx = (q_idx % (lio->linfo.num_txpciq));
+		tag = q_idx;
+		iq_no = lio->linfo.txpciq[q_idx].s.q_no;
+	} else {
+		iq_no = lio->txq;
+	}
+
+	stats = &oct->instr_queue[iq_no]->stats;
+
+	/* Check for all conditions in which the current packet cannot be
+	 * transmitted.
+	 */
+	if (!(atomic_read(&lio->ifstate) & LIO_IFSTATE_RUNNING) ||
+	    (!lio->linfo.link.s.link_up) || (skb->len <= 0)) {
+		netif_info(lio, tx_err, lio->netdev, "Transmit failed link_status : %d\n",
+			   lio->linfo.link.s.link_up);
+		goto lio_xmit_failed;
+	}
+
+	/* Use space in skb->cb to store info used to unmap and
+	 * free the buffers.
+	 */
+	finfo = (struct octnet_buf_free_info *)skb->cb;
+	finfo->lio = lio;
+	finfo->skb = skb;
+	finfo->sc = NULL;
+
+	/* Prepare the attributes for the data to be passed to OSI. */
+	memset(&ndata, 0, sizeof(struct octnic_data_pkt));
+
+	ndata.buf = finfo;
+
+	ndata.q_no = iq_no;
+
+	if (netif_is_multiqueue(netdev)) {
+		if (octnet_iq_is_full(oct, ndata.q_no)) {
+			/* defer sending if queue is full */
+			netif_info(lio, tx_err, lio->netdev, "Transmit failed iq:%d full\n",
+				   ndata.q_no);
+			stats->tx_iq_busy++;
+			return NETDEV_TX_BUSY;
+		}
+	} else {
+		if (octnet_iq_is_full(oct, lio->txq)) {
+			/* defer sending if queue is full */
+			stats->tx_iq_busy++;
+			netif_info(lio, tx_err, lio->netdev, "Transmit failed iq:%d full\n",
+				   ndata.q_no);
+			return NETDEV_TX_BUSY;
+		}
+	}
+
+	ndata.datasize = skb->len;
+
+	cmdsetup.u64 = 0;
+	cmdsetup.s.iq_no = iq_no;
+
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		if (skb->encapsulation) {
+			cmdsetup.s.tnl_csum = 1;
+			stats->tx_vxlan++;
+		} else {
+			cmdsetup.s.transport_csum = 1;
+		}
+	}
+	if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
+		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+		cmdsetup.s.timestamp = 1;
+	}
+
+	if (!skb_shinfo(skb)->nr_frags) {
+		cmdsetup.s.u.datasize = skb->len;
+		octnet_prepare_pci_cmd(oct, &ndata.cmd, &cmdsetup, tag);
+		/* Offload checksum calculation for TCP/UDP packets */
+		dptr = dma_map_single(&oct->pci_dev->dev,
+				      skb->data,
+				      skb->len,
+				      DMA_TO_DEVICE);
+		if (dma_mapping_error(&oct->pci_dev->dev, dptr)) {
+			dev_err(&oct->pci_dev->dev, "%s DMA mapping error 1\n",
+				__func__);
+			return NETDEV_TX_BUSY;
+		}
+
+		ndata.cmd.cmd3.dptr = dptr;
+		finfo->dptr = dptr;
+		ndata.reqtype = REQTYPE_NORESP_NET;
+
+	} else {
+		struct skb_frag_struct *frag;
+		struct octnic_gather *g;
+		int i, frags;
+
+		spin_lock(&lio->glist_lock[q_idx]);
+		g = (struct octnic_gather *)list_delete_head(
+		    &lio->glist[q_idx]);
+		spin_unlock(&lio->glist_lock[q_idx]);
+
+		if (!g) {
+			netif_info(lio, tx_err, lio->netdev,
+				   "Transmit scatter gather: glist null!\n");
+			goto lio_xmit_failed;
+		}
+
+		cmdsetup.s.gather = 1;
+		cmdsetup.s.u.gatherptrs = (skb_shinfo(skb)->nr_frags + 1);
+		octnet_prepare_pci_cmd(oct, &ndata.cmd, &cmdsetup, tag);
+
+		memset(g->sg, 0, g->sg_size);
+
+		g->sg[0].ptr[0] = dma_map_single(&oct->pci_dev->dev,
+						 skb->data,
+						 (skb->len - skb->data_len),
+						 DMA_TO_DEVICE);
+		if (dma_mapping_error(&oct->pci_dev->dev, g->sg[0].ptr[0])) {
+			dev_err(&oct->pci_dev->dev, "%s DMA mapping error 2\n",
+				__func__);
+			return NETDEV_TX_BUSY;
+		}
+		add_sg_size(&g->sg[0], (skb->len - skb->data_len), 0);
+
+		frags = skb_shinfo(skb)->nr_frags;
+		i = 1;
+		while (frags--) {
+			frag = &skb_shinfo(skb)->frags[i - 1];
+
+			g->sg[(i >> 2)].ptr[(i & 3)] =
+				dma_map_page(&oct->pci_dev->dev,
+					     frag->page.p,
+					     frag->page_offset,
+					     frag->size,
+					     DMA_TO_DEVICE);
+			if (dma_mapping_error(&oct->pci_dev->dev,
+					      g->sg[i >> 2].ptr[i & 3])) {
+				dma_unmap_single(&oct->pci_dev->dev,
+						 g->sg[0].ptr[0],
+						 skb->len - skb->data_len,
+						 DMA_TO_DEVICE);
+				for (j = 1; j < i; j++) {
+					frag = &skb_shinfo(skb)->frags[j - 1];
+					dma_unmap_page(&oct->pci_dev->dev,
+						       g->sg[j >> 2].ptr[j & 3],
+						       frag->size,
+						       DMA_TO_DEVICE);
+				}
+				dev_err(&oct->pci_dev->dev, "%s DMA mapping error 3\n",
+					__func__);
+				return NETDEV_TX_BUSY;
+			}
+
+			add_sg_size(&g->sg[(i >> 2)], frag->size, (i & 3));
+			i++;
+		}
+
+		dptr = dma_map_single(&oct->pci_dev->dev,
+				      g->sg, g->sg_size,
+				      DMA_TO_DEVICE);
+		if (dma_mapping_error(&oct->pci_dev->dev, dptr)) {
+			dev_err(&oct->pci_dev->dev, "%s DMA mapping error 4\n",
+				__func__);
+			dma_unmap_single(&oct->pci_dev->dev, g->sg[0].ptr[0],
+					 skb->len - skb->data_len,
+					 DMA_TO_DEVICE);
+			for (j = 1; j <= frags; j++) {
+				frag = &skb_shinfo(skb)->frags[j - 1];
+				dma_unmap_page(&oct->pci_dev->dev,
+					       g->sg[j >> 2].ptr[j & 3],
+					       frag->size, DMA_TO_DEVICE);
+			}
+			return NETDEV_TX_BUSY;
+		}
+
+		ndata.cmd.cmd3.dptr = dptr;
+		finfo->dptr = dptr;
+		finfo->g = g;
+
+		ndata.reqtype = REQTYPE_NORESP_NET_SG;
+	}
+
+	irh = (struct octeon_instr_irh *)&ndata.cmd.cmd3.irh;
+	tx_info = (union tx_info *)&ndata.cmd.cmd3.ossp[0];
+
+	if (skb_shinfo(skb)->gso_size) {
+		tx_info->s.gso_size = skb_shinfo(skb)->gso_size;
+		tx_info->s.gso_segs = skb_shinfo(skb)->gso_segs;
+	}
+
+	/* HW insert VLAN tag */
+	if (skb_vlan_tag_present(skb)) {
+		irh->priority = skb_vlan_tag_get(skb) >> VLAN_PRIO_SHIFT;
+		irh->vlan = skb_vlan_tag_get(skb) & VLAN_VID_MASK;
+	}
+
+	if (unlikely(cmdsetup.s.timestamp))
+		status = send_nic_timestamp_pkt(oct, &ndata, finfo);
+	else
+		status = octnet_send_nic_data_pkt(oct, &ndata);
+	if (status == IQ_SEND_FAILED)
+		goto lio_xmit_failed;
+
+	netif_info(lio, tx_queued, lio->netdev, "Transmit queued successfully\n");
+
+	if (status == IQ_SEND_STOP) {
+		dev_err(&oct->pci_dev->dev, "Rcvd IQ_SEND_STOP signal; stopping IQ-%d\n",
+			iq_no);
+		stop_q(lio->netdev, q_idx);
+	}
+
+	netif_trans_update(netdev);
+
+	if (skb_shinfo(skb)->gso_size)
+		stats->tx_done += skb_shinfo(skb)->gso_segs;
+	else
+		stats->tx_done++;
+	stats->tx_tot_bytes += skb->len;
+
+	return NETDEV_TX_OK;
+
+lio_xmit_failed:
+	stats->tx_dropped++;
+	netif_info(lio, tx_err, lio->netdev, "IQ%d Transmit dropped:%llu\n",
+		   iq_no, stats->tx_dropped);
+	if (dptr)
+		dma_unmap_single(&oct->pci_dev->dev, dptr,
+				 ndata.datasize, DMA_TO_DEVICE);
+	tx_buffer_free(skb);
+	return NETDEV_TX_OK;
+}
+
+/** \brief Network device Tx timeout
+ * @param netdev    pointer to network device
+ */
+static void liquidio_tx_timeout(struct net_device *netdev)
+{
+	struct lio *lio;
+
+	lio = GET_LIO(netdev);
+
+	netif_info(lio, tx_err, lio->netdev,
+		   "Transmit timeout tx_dropped:%ld, waking up queues now!!\n",
+		   netdev->stats.tx_dropped);
+	netif_trans_update(netdev);
+	txqs_wake(netdev);
+}
+
+static int
+liquidio_vlan_rx_add_vid(struct net_device *netdev,
+			 __be16 proto __attribute__((unused)), u16 vid)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	struct octnic_ctrl_pkt nctrl;
+	int ret = 0;
+
+	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+
+	nctrl.ncmd.u64 = 0;
+	nctrl.ncmd.s.cmd = OCTNET_CMD_ADD_VLAN_FILTER;
+	nctrl.ncmd.s.param1 = vid;
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+	nctrl.wait_time = 100;
+	nctrl.netpndev = (u64)netdev;
+	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
+
+	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
+	if (ret < 0) {
+		dev_err(&oct->pci_dev->dev, "Add VLAN filter failed in core (ret: 0x%x)\n",
+			ret);
+	}
+
+	return ret;
+}
+
+static int
+liquidio_vlan_rx_kill_vid(struct net_device *netdev,
+			  __be16 proto __attribute__((unused)), u16 vid)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	struct octnic_ctrl_pkt nctrl;
+	int ret = 0;
+
+	memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt));
+
+	nctrl.ncmd.u64 = 0;
+	nctrl.ncmd.s.cmd = OCTNET_CMD_DEL_VLAN_FILTER;
+	nctrl.ncmd.s.param1 = vid;
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+	nctrl.wait_time = 100;
+	nctrl.netpndev = (u64)netdev;
+	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
+
+	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
+	if (ret < 0) {
+		dev_err(&oct->pci_dev->dev, "Add VLAN filter failed in core (ret: 0x%x)\n",
+			ret);
+	}
+	return ret;
+}
+
+/** Sending command to enable/disable RX checksum offload
+ * @param netdev                pointer to network device
+ * @param command               OCTNET_CMD_TNL_RX_CSUM_CTL
+ * @param rx_cmd_bit            OCTNET_CMD_RXCSUM_ENABLE/
+ *                              OCTNET_CMD_RXCSUM_DISABLE
+ * @returns                     SUCCESS or FAILURE
+ */
+static int liquidio_set_rxcsum_command(struct net_device *netdev, int command,
+				       u8 rx_cmd)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	struct octnic_ctrl_pkt nctrl;
+	int ret = 0;
+
+	nctrl.ncmd.u64 = 0;
+	nctrl.ncmd.s.cmd = command;
+	nctrl.ncmd.s.param1 = rx_cmd;
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+	nctrl.wait_time = 100;
+	nctrl.netpndev = (u64)netdev;
+	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
+
+	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
+	if (ret < 0) {
+		dev_err(&oct->pci_dev->dev, "DEVFLAGS RXCSUM change failed in core (ret:0x%x)\n",
+			ret);
+	}
+	return ret;
+}
+
+/** Sending command to add/delete VxLAN UDP port to firmware
+ * @param netdev                pointer to network device
+ * @param command               OCTNET_CMD_VXLAN_PORT_CONFIG
+ * @param vxlan_port            VxLAN port to be added or deleted
+ * @param vxlan_cmd_bit         OCTNET_CMD_VXLAN_PORT_ADD,
+ *                              OCTNET_CMD_VXLAN_PORT_DEL
+ * @returns                     SUCCESS or FAILURE
+ */
+static int liquidio_vxlan_port_command(struct net_device *netdev, int command,
+				       u16 vxlan_port, u8 vxlan_cmd_bit)
+{
+	struct lio *lio = GET_LIO(netdev);
+	struct octeon_device *oct = lio->oct_dev;
+	struct octnic_ctrl_pkt nctrl;
+	int ret = 0;
+
+	nctrl.ncmd.u64 = 0;
+	nctrl.ncmd.s.cmd = command;
+	nctrl.ncmd.s.more = vxlan_cmd_bit;
+	nctrl.ncmd.s.param1 = vxlan_port;
+	nctrl.iq_no = lio->linfo.txpciq[0].s.q_no;
+	nctrl.wait_time = 100;
+	nctrl.netpndev = (u64)netdev;
+	nctrl.cb_fn = liquidio_link_ctrl_cmd_completion;
+
+	ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl);
+	if (ret < 0) {
+		dev_err(&oct->pci_dev->dev,
+			"DEVFLAGS VxLAN port add/delete failed in core (ret : 0x%x)\n",
+			ret);
+	}
+	return ret;
+}
+
+/** \brief Net device fix features
+ * @param netdev  pointer to network device
+ * @param request features requested
+ * @returns updated features list
+ */
+static netdev_features_t liquidio_fix_features(struct net_device *netdev,
+					       netdev_features_t request)
+{
+	struct lio *lio = netdev_priv(netdev);
+
+	if ((request & NETIF_F_RXCSUM) &&
+	    !(lio->dev_capability & NETIF_F_RXCSUM))
+		request &= ~NETIF_F_RXCSUM;
+
+	if ((request & NETIF_F_HW_CSUM) &&
+	    !(lio->dev_capability & NETIF_F_HW_CSUM))
+		request &= ~NETIF_F_HW_CSUM;
+
+	if ((request & NETIF_F_TSO) && !(lio->dev_capability & NETIF_F_TSO))
+		request &= ~NETIF_F_TSO;
+
+	if ((request & NETIF_F_TSO6) && !(lio->dev_capability & NETIF_F_TSO6))
+		request &= ~NETIF_F_TSO6;
+
+	if ((request & NETIF_F_LRO) && !(lio->dev_capability & NETIF_F_LRO))
+		request &= ~NETIF_F_LRO;
+
+	/* Disable LRO if RXCSUM is off */
+	if (!(request & NETIF_F_RXCSUM) && (netdev->features & NETIF_F_LRO) &&
+	    (lio->dev_capability & NETIF_F_LRO))
+		request &= ~NETIF_F_LRO;
+
+	return request;
+}
+
+/** \brief Net device set features
+ * @param netdev  pointer to network device
+ * @param features features to enable/disable
+ */
+static int liquidio_set_features(struct net_device *netdev,
+				 netdev_features_t features)
+{
+	struct lio *lio = netdev_priv(netdev);
+
+	if (!((netdev->features ^ features) & NETIF_F_LRO))
+		return 0;
+
+	if ((features & NETIF_F_LRO) && (lio->dev_capability & NETIF_F_LRO))
+		liquidio_set_feature(netdev, OCTNET_CMD_LRO_ENABLE,
+				     OCTNIC_LROIPV4 | OCTNIC_LROIPV6);
+	else if (!(features & NETIF_F_LRO) &&
+		 (lio->dev_capability & NETIF_F_LRO))
+		liquidio_set_feature(netdev, OCTNET_CMD_LRO_DISABLE,
+				     OCTNIC_LROIPV4 | OCTNIC_LROIPV6);
+	if (!(netdev->features & NETIF_F_RXCSUM) &&
+	    (lio->enc_dev_capability & NETIF_F_RXCSUM) &&
+	    (features & NETIF_F_RXCSUM))
+		liquidio_set_rxcsum_command(netdev, OCTNET_CMD_TNL_RX_CSUM_CTL,
+					    OCTNET_CMD_RXCSUM_ENABLE);
+	else if ((netdev->features & NETIF_F_RXCSUM) &&
+		 (lio->enc_dev_capability & NETIF_F_RXCSUM) &&
+		 !(features & NETIF_F_RXCSUM))
+		liquidio_set_rxcsum_command(netdev, OCTNET_CMD_TNL_RX_CSUM_CTL,
+					    OCTNET_CMD_RXCSUM_DISABLE);
+
+	return 0;
+}
+
+static void liquidio_add_vxlan_port(struct net_device *netdev,
+				    struct udp_tunnel_info *ti)
+{
+	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
+		return;
+
+	liquidio_vxlan_port_command(netdev,
+				    OCTNET_CMD_VXLAN_PORT_CONFIG,
+				    htons(ti->port),
+				    OCTNET_CMD_VXLAN_PORT_ADD);
+}
+
+static void liquidio_del_vxlan_port(struct net_device *netdev,
+				    struct udp_tunnel_info *ti)
+{
+	if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
+		return;
+
+	liquidio_vxlan_port_command(netdev,
+				    OCTNET_CMD_VXLAN_PORT_CONFIG,
+				    htons(ti->port),
+				    OCTNET_CMD_VXLAN_PORT_DEL);
+}
+
+static const struct net_device_ops lionetdevops = {
+	.ndo_open		= liquidio_open,
+	.ndo_stop		= liquidio_stop,
+	.ndo_start_xmit		= liquidio_xmit,
+	.ndo_get_stats		= liquidio_get_stats,
+	.ndo_set_mac_address	= liquidio_set_mac,
+	.ndo_set_rx_mode	= liquidio_set_mcast_list,
+	.ndo_tx_timeout		= liquidio_tx_timeout,
+	.ndo_vlan_rx_add_vid    = liquidio_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid   = liquidio_vlan_rx_kill_vid,
+	.ndo_change_mtu		= liquidio_change_mtu,
+	.ndo_do_ioctl		= liquidio_ioctl,
+	.ndo_fix_features	= liquidio_fix_features,
+	.ndo_set_features	= liquidio_set_features,
+	.ndo_udp_tunnel_add     = liquidio_add_vxlan_port,
+	.ndo_udp_tunnel_del     = liquidio_del_vxlan_port,
+	.ndo_select_queue	= select_q,
+};
+
+static int lio_nic_info(struct octeon_recv_info *recv_info, void *buf)
+{
+	struct octeon_device *oct = (struct octeon_device *)buf;
+	struct octeon_recv_pkt *recv_pkt = recv_info->recv_pkt;
+	union oct_link_status *ls;
+	int gmxport = 0;
+	int i;
+
+	if (recv_pkt->buffer_size[0] != sizeof(*ls)) {
+		dev_err(&oct->pci_dev->dev, "Malformed NIC_INFO, len=%d, ifidx=%d\n",
+			recv_pkt->buffer_size[0],
+			recv_pkt->rh.r_nic_info.gmxport);
+		goto nic_info_err;
+	}
+
+	gmxport = recv_pkt->rh.r_nic_info.gmxport;
+	ls = (union oct_link_status *)get_rbd(recv_pkt->buffer_ptr[0]);
+
+	octeon_swap_8B_data((u64 *)ls, (sizeof(union oct_link_status)) >> 3);
+
+	for (i = 0; i < oct->ifcount; i++) {
+		if (oct->props[i].gmxport == gmxport) {
+			update_link_status(oct->props[i].netdev, ls);
+			break;
+		}
+	}
+
+nic_info_err:
+	for (i = 0; i < recv_pkt->buffer_count; i++)
+		recv_buffer_free(recv_pkt->buffer_ptr[i]);
+	octeon_free_recv_info(recv_info);
+	return 0;
+}
+
+/**
+ * \brief Setup network interfaces
+ * @param octeon_dev  octeon device
+ *
+ * Called during init time for each device. It assumes the NIC
+ * is already up and running.  The link information for each
+ * interface is passed in link_info.
+ */
+static int setup_nic_devices(struct octeon_device *octeon_dev)
+{
+	int retval, num_iqueues, num_oqueues;
+	struct liquidio_if_cfg_context *ctx;
+	u32 resp_size, ctx_size, data_size;
+	struct liquidio_if_cfg_resp *resp;
+	struct octeon_soft_command *sc;
+	union oct_nic_if_cfg if_cfg;
+	struct octdev_props *props;
+	struct net_device *netdev;
+	struct lio_version *vdata;
+	struct lio *lio = NULL;
+	u8 mac[ETH_ALEN], i, j;
+	u32 ifidx_or_pfnum;
+
+	ifidx_or_pfnum = octeon_dev->pf_num;
+
+	/* This is to handle link status changes */
+	octeon_register_dispatch_fn(octeon_dev, OPCODE_NIC, OPCODE_NIC_INFO,
+				    lio_nic_info, octeon_dev);
+
+	/* REQTYPE_RESP_NET and REQTYPE_SOFT_COMMAND do not have free functions.
+	 * They are handled directly.
+	 */
+	octeon_register_reqtype_free_fn(octeon_dev, REQTYPE_NORESP_NET,
+					free_netbuf);
+
+	octeon_register_reqtype_free_fn(octeon_dev, REQTYPE_NORESP_NET_SG,
+					free_netsgbuf);
+
+	octeon_register_reqtype_free_fn(octeon_dev, REQTYPE_RESP_NET_SG,
+					free_netsgbuf_with_resp);
+
+	for (i = 0; i < octeon_dev->ifcount; i++) {
+		resp_size = sizeof(struct liquidio_if_cfg_resp);
+		ctx_size = sizeof(struct liquidio_if_cfg_context);
+		data_size = sizeof(struct lio_version);
+		sc = (struct octeon_soft_command *)
+			octeon_alloc_soft_command(octeon_dev, data_size,
+						  resp_size, ctx_size);
+		resp = (struct liquidio_if_cfg_resp *)sc->virtrptr;
+		ctx  = (struct liquidio_if_cfg_context *)sc->ctxptr;
+		vdata = (struct lio_version *)sc->virtdptr;
+
+		*((u64 *)vdata) = 0;
+		vdata->major = cpu_to_be16(LIQUIDIO_BASE_MAJOR_VERSION);
+		vdata->minor = cpu_to_be16(LIQUIDIO_BASE_MINOR_VERSION);
+		vdata->micro = cpu_to_be16(LIQUIDIO_BASE_MICRO_VERSION);
+
+		WRITE_ONCE(ctx->cond, 0);
+		ctx->octeon_id = lio_get_device_id(octeon_dev);
+		init_waitqueue_head(&ctx->wc);
+
+		if_cfg.u64 = 0;
+
+		if_cfg.s.num_iqueues = octeon_dev->sriov_info.rings_per_vf;
+		if_cfg.s.num_oqueues = octeon_dev->sriov_info.rings_per_vf;
+		if_cfg.s.base_queue = 0;
+
+		sc->iq_no = 0;
+
+		octeon_prepare_soft_command(octeon_dev, sc, OPCODE_NIC,
+					    OPCODE_NIC_IF_CFG, 0, if_cfg.u64,
+					    0);
+
+		sc->callback = if_cfg_callback;
+		sc->callback_arg = sc;
+		sc->wait_time = 5000;
+
+		retval = octeon_send_soft_command(octeon_dev, sc);
+		if (retval == IQ_SEND_FAILED) {
+			dev_err(&octeon_dev->pci_dev->dev,
+				"iq/oq config failed status: %x\n", retval);
+			/* Soft instr is freed by driver in case of failure. */
+			goto setup_nic_dev_fail;
+		}
+
+		/* Sleep on a wait queue till the cond flag indicates that the
+		 * response arrived or timed-out.
+		 */
+		if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR) {
+			dev_err(&octeon_dev->pci_dev->dev, "Wait interrupted\n");
+			goto setup_nic_wait_intr;
+		}
+
+		retval = resp->status;
+		if (retval) {
+			dev_err(&octeon_dev->pci_dev->dev, "iq/oq config failed\n");
+			goto setup_nic_dev_fail;
+		}
+
+		octeon_swap_8B_data((u64 *)(&resp->cfg_info),
+				    (sizeof(struct liquidio_if_cfg_info)) >> 3);
+
+		num_iqueues = hweight64(resp->cfg_info.iqmask);
+		num_oqueues = hweight64(resp->cfg_info.oqmask);
+
+		if (!(num_iqueues) || !(num_oqueues)) {
+			dev_err(&octeon_dev->pci_dev->dev,
+				"Got bad iqueues (%016llx) or oqueues (%016llx) from firmware.\n",
+				resp->cfg_info.iqmask, resp->cfg_info.oqmask);
+			goto setup_nic_dev_fail;
+		}
+		dev_dbg(&octeon_dev->pci_dev->dev,
+			"interface %d, iqmask %016llx, oqmask %016llx, numiqueues %d, numoqueues %d\n",
+			i, resp->cfg_info.iqmask, resp->cfg_info.oqmask,
+			num_iqueues, num_oqueues);
+
+		netdev = alloc_etherdev_mq(LIO_SIZE, num_iqueues);
+
+		if (!netdev) {
+			dev_err(&octeon_dev->pci_dev->dev, "Device allocation failed\n");
+			goto setup_nic_dev_fail;
+		}
+
+		SET_NETDEV_DEV(netdev, &octeon_dev->pci_dev->dev);
+
+		/* Associate the routines that will handle different
+		 * netdev tasks.
+		 */
+		netdev->netdev_ops = &lionetdevops;
+
+		lio = GET_LIO(netdev);
+
+		memset(lio, 0, sizeof(struct lio));
+
+		lio->ifidx = ifidx_or_pfnum;
+
+		props = &octeon_dev->props[i];
+		props->gmxport = resp->cfg_info.linfo.gmxport;
+		props->netdev = netdev;
+
+		lio->linfo.num_rxpciq = num_oqueues;
+		lio->linfo.num_txpciq = num_iqueues;
+
+		for (j = 0; j < num_oqueues; j++) {
+			lio->linfo.rxpciq[j].u64 =
+			    resp->cfg_info.linfo.rxpciq[j].u64;
+		}
+		for (j = 0; j < num_iqueues; j++) {
+			lio->linfo.txpciq[j].u64 =
+			    resp->cfg_info.linfo.txpciq[j].u64;
+		}
+
+		lio->linfo.hw_addr = resp->cfg_info.linfo.hw_addr;
+		lio->linfo.gmxport = resp->cfg_info.linfo.gmxport;
+		lio->linfo.link.u64 = resp->cfg_info.linfo.link.u64;
+		lio->linfo.macaddr_is_admin_asgnd =
+			resp->cfg_info.linfo.macaddr_is_admin_asgnd;
+
+		lio->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
+
+		lio->dev_capability = NETIF_F_HIGHDMA
+				      | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM
+				      | NETIF_F_SG | NETIF_F_RXCSUM
+				      | NETIF_F_TSO | NETIF_F_TSO6
+				      | NETIF_F_GRO
+				      | NETIF_F_LRO;
+		netif_set_gso_max_size(netdev, OCTNIC_GSO_MAX_SIZE);
+
+		/* Copy of transmit encapsulation capabilities:
+		 * TSO, TSO6, Checksums for this device
+		 */
+		lio->enc_dev_capability = NETIF_F_IP_CSUM
+					  | NETIF_F_IPV6_CSUM
+					  | NETIF_F_GSO_UDP_TUNNEL
+					  | NETIF_F_HW_CSUM | NETIF_F_SG
+					  | NETIF_F_RXCSUM
+					  | NETIF_F_TSO | NETIF_F_TSO6
+					  | NETIF_F_LRO;
+
+		netdev->hw_enc_features =
+		    (lio->enc_dev_capability & ~NETIF_F_LRO);
+		netdev->vlan_features = lio->dev_capability;
+		/* Add any unchangeable hw features */
+		lio->dev_capability |= NETIF_F_HW_VLAN_CTAG_FILTER |
+				       NETIF_F_HW_VLAN_CTAG_RX |
+				       NETIF_F_HW_VLAN_CTAG_TX;
+
+		netdev->features = (lio->dev_capability & ~NETIF_F_LRO);
+
+		netdev->hw_features = lio->dev_capability;
+
+		/* MTU range: 68 - 16000 */
+		netdev->min_mtu = LIO_MIN_MTU_SIZE;
+		netdev->max_mtu = LIO_MAX_MTU_SIZE;
+
+		/* Point to the  properties for octeon device to which this
+		 * interface belongs.
+		 */
+		lio->oct_dev = octeon_dev;
+		lio->octprops = props;
+		lio->netdev = netdev;
+
+		dev_dbg(&octeon_dev->pci_dev->dev,
+			"if%d gmx: %d hw_addr: 0x%llx\n", i,
+			lio->linfo.gmxport, CVM_CAST64(lio->linfo.hw_addr));
+
+		/* 64-bit swap required on LE machines */
+		octeon_swap_8B_data(&lio->linfo.hw_addr, 1);
+		for (j = 0; j < ETH_ALEN; j++)
+			mac[j] = *((u8 *)(((u8 *)&lio->linfo.hw_addr) + 2 + j));
+
+		/* Copy MAC Address to OS network device structure */
+		ether_addr_copy(netdev->dev_addr, mac);
+
+		if (setup_io_queues(octeon_dev, i)) {
+			dev_err(&octeon_dev->pci_dev->dev, "I/O queues creation failed\n");
+			goto setup_nic_dev_fail;
+		}
+
+		ifstate_set(lio, LIO_IFSTATE_DROQ_OPS);
+
+		/* For VFs, enable Octeon device interrupts here,
+		 * as this is contingent upon IO queue setup
+		 */
+		octeon_dev->fn_list.enable_interrupt(octeon_dev,
+						     OCTEON_ALL_INTR);
+
+		/* By default all interfaces on a single Octeon uses the same
+		 * tx and rx queues
+		 */
+		lio->txq = lio->linfo.txpciq[0].s.q_no;
+		lio->rxq = lio->linfo.rxpciq[0].s.q_no;
+
+		lio->tx_qsize = octeon_get_tx_qsize(octeon_dev, lio->txq);
+		lio->rx_qsize = octeon_get_rx_qsize(octeon_dev, lio->rxq);
+
+		if (setup_glists(lio, num_iqueues)) {
+			dev_err(&octeon_dev->pci_dev->dev,
+				"Gather list allocation failed\n");
+			goto setup_nic_dev_fail;
+		}
+
+		/* Register ethtool support */
+		liquidio_set_ethtool_ops(netdev);
+		if (lio->oct_dev->chip_id == OCTEON_CN23XX_VF_VID)
+			octeon_dev->priv_flags = OCT_PRIV_FLAG_DEFAULT;
+		else
+			octeon_dev->priv_flags = 0x0;
+
+		if (netdev->features & NETIF_F_LRO)
+			liquidio_set_feature(netdev, OCTNET_CMD_LRO_ENABLE,
+					     OCTNIC_LROIPV4 | OCTNIC_LROIPV6);
+
+		if ((debug != -1) && (debug & NETIF_MSG_HW))
+			liquidio_set_feature(netdev, OCTNET_CMD_VERBOSE_ENABLE,
+					     0);
+
+		if (setup_link_status_change_wq(netdev))
+			goto setup_nic_dev_fail;
+
+		/* Register the network device with the OS */
+		if (register_netdev(netdev)) {
+			dev_err(&octeon_dev->pci_dev->dev, "Device registration failed\n");
+			goto setup_nic_dev_fail;
+		}
+
+		dev_dbg(&octeon_dev->pci_dev->dev,
+			"Setup NIC ifidx:%d mac:%02x%02x%02x%02x%02x%02x\n",
+			i, mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
+		netif_carrier_off(netdev);
+		lio->link_changes++;
+
+		ifstate_set(lio, LIO_IFSTATE_REGISTERED);
+
+		/* Sending command to firmware to enable Rx checksum offload
+		 * by default at the time of setup of Liquidio driver for
+		 * this device
+		 */
+		liquidio_set_rxcsum_command(netdev, OCTNET_CMD_TNL_RX_CSUM_CTL,
+					    OCTNET_CMD_RXCSUM_ENABLE);
+		liquidio_set_feature(netdev, OCTNET_CMD_TNL_TX_CSUM_CTL,
+				     OCTNET_CMD_TXCSUM_ENABLE);
+
+		dev_dbg(&octeon_dev->pci_dev->dev,
+			"NIC ifidx:%d Setup successful\n", i);
+
+		octeon_free_soft_command(octeon_dev, sc);
+	}
+
+	return 0;
+
+setup_nic_dev_fail:
+
+	octeon_free_soft_command(octeon_dev, sc);
+
+setup_nic_wait_intr:
+
+	while (i--) {
+		dev_err(&octeon_dev->pci_dev->dev,
+			"NIC ifidx:%d Setup failed\n", i);
+		liquidio_destroy_nic_device(octeon_dev, i);
+	}
+	return -ENODEV;
+}
+
+/**
+ * \brief initialize the NIC
+ * @param oct octeon device
+ *
+ * This initialization routine is called once the Octeon device application is
+ * up and running
+ */
+static int liquidio_init_nic_module(struct octeon_device *oct)
+{
+	struct oct_intrmod_cfg *intrmod_cfg;
+	int num_nic_ports = 1;
+	int i, retval = 0;
+
+	dev_dbg(&oct->pci_dev->dev, "Initializing network interfaces\n");
+
+	/* only default iq and oq were initialized
+	 * initialize the rest as well run port_config command for each port
+	 */
+	oct->ifcount = num_nic_ports;
+	memset(oct->props, 0,
+	       sizeof(struct octdev_props) * num_nic_ports);
+
+	for (i = 0; i < MAX_OCTEON_LINKS; i++)
+		oct->props[i].gmxport = -1;
+
+	retval = setup_nic_devices(oct);
+	if (retval) {
+		dev_err(&oct->pci_dev->dev, "Setup NIC devices failed\n");
+		goto octnet_init_failure;
+	}
+
+	/* Initialize interrupt moderation params */
+	intrmod_cfg = &((struct octeon_device *)oct)->intrmod;
+	intrmod_cfg->rx_enable = 1;
+	intrmod_cfg->check_intrvl = LIO_INTRMOD_CHECK_INTERVAL;
+	intrmod_cfg->maxpkt_ratethr = LIO_INTRMOD_MAXPKT_RATETHR;
+	intrmod_cfg->minpkt_ratethr = LIO_INTRMOD_MINPKT_RATETHR;
+	intrmod_cfg->rx_maxcnt_trigger = LIO_INTRMOD_RXMAXCNT_TRIGGER;
+	intrmod_cfg->rx_maxtmr_trigger = LIO_INTRMOD_RXMAXTMR_TRIGGER;
+	intrmod_cfg->rx_mintmr_trigger = LIO_INTRMOD_RXMINTMR_TRIGGER;
+	intrmod_cfg->rx_mincnt_trigger = LIO_INTRMOD_RXMINCNT_TRIGGER;
+	intrmod_cfg->tx_enable = 1;
+	intrmod_cfg->tx_maxcnt_trigger = LIO_INTRMOD_TXMAXCNT_TRIGGER;
+	intrmod_cfg->tx_mincnt_trigger = LIO_INTRMOD_TXMINCNT_TRIGGER;
+	intrmod_cfg->rx_frames = CFG_GET_OQ_INTR_PKT(octeon_get_conf(oct));
+	intrmod_cfg->rx_usecs = CFG_GET_OQ_INTR_TIME(octeon_get_conf(oct));
+	intrmod_cfg->tx_frames = CFG_GET_IQ_INTR_PKT(octeon_get_conf(oct));
+	dev_dbg(&oct->pci_dev->dev, "Network interfaces ready\n");
+
+	return retval;
+
+octnet_init_failure:
+
+	oct->ifcount = 0;
+
+	return retval;
+}
+
+/**
+ * \brief Device initialization for each Octeon device that is probed
+ * @param octeon_dev  octeon device
+ */
+static int octeon_device_init(struct octeon_device *oct)
+{
+	u32 rev_id;
+	int j;
+
+	atomic_set(&oct->status, OCT_DEV_BEGIN_STATE);
+
+	/* Enable access to the octeon device and make its DMA capability
+	 * known to the OS.
+	 */
+	if (octeon_pci_os_setup(oct))
+		return 1;
+	atomic_set(&oct->status, OCT_DEV_PCI_ENABLE_DONE);
+
+	oct->chip_id = OCTEON_CN23XX_VF_VID;
+	pci_read_config_dword(oct->pci_dev, 8, &rev_id);
+	oct->rev_id = rev_id & 0xff;
+
+	if (cn23xx_setup_octeon_vf_device(oct))
+		return 1;
+
+	atomic_set(&oct->status, OCT_DEV_PCI_MAP_DONE);
+
+	oct->app_mode = CVM_DRV_NIC_APP;
+
+	/* Initialize the dispatch mechanism used to push packets arriving on
+	 * Octeon Output queues.
+	 */
+	if (octeon_init_dispatch_list(oct))
+		return 1;
+
+	atomic_set(&oct->status, OCT_DEV_DISPATCH_INIT_DONE);
+
+	if (octeon_set_io_queues_off(oct)) {
+		dev_err(&oct->pci_dev->dev, "setting io queues off failed\n");
+		return 1;
+	}
+
+	if (oct->fn_list.setup_device_regs(oct)) {
+		dev_err(&oct->pci_dev->dev, "device registers configuration failed\n");
+		return 1;
+	}
+
+	/* Initialize soft command buffer pool */
+	if (octeon_setup_sc_buffer_pool(oct)) {
+		dev_err(&oct->pci_dev->dev, "sc buffer pool allocation failed\n");
+		return 1;
+	}
+	atomic_set(&oct->status, OCT_DEV_SC_BUFF_POOL_INIT_DONE);
+
+	/* Setup the data structures that manage this Octeon's Input queues. */
+	if (octeon_setup_instr_queues(oct)) {
+		dev_err(&oct->pci_dev->dev, "instruction queue initialization failed\n");
+		return 1;
+	}
+	atomic_set(&oct->status, OCT_DEV_INSTR_QUEUE_INIT_DONE);
+
+	/* Initialize lists to manage the requests of different types that
+	 * arrive from user & kernel applications for this octeon device.
+	 */
+	if (octeon_setup_response_list(oct)) {
+		dev_err(&oct->pci_dev->dev, "Response list allocation failed\n");
+		return 1;
+	}
+	atomic_set(&oct->status, OCT_DEV_RESP_LIST_INIT_DONE);
+
+	if (octeon_setup_output_queues(oct)) {
+		dev_err(&oct->pci_dev->dev, "Output queue initialization failed\n");
+		return 1;
+	}
+	atomic_set(&oct->status, OCT_DEV_DROQ_INIT_DONE);
+
+	if (oct->fn_list.setup_mbox(oct)) {
+		dev_err(&oct->pci_dev->dev, "Mailbox setup failed\n");
+		return 1;
+	}
+	atomic_set(&oct->status, OCT_DEV_MBOX_SETUP_DONE);
+
+	if (octeon_allocate_ioq_vector(oct)) {
+		dev_err(&oct->pci_dev->dev, "ioq vector allocation failed\n");
+		return 1;
+	}
+	atomic_set(&oct->status, OCT_DEV_MSIX_ALLOC_VECTOR_DONE);
+
+	dev_info(&oct->pci_dev->dev, "OCTEON_CN23XX VF Version: %s, %d ioqs\n",
+		 LIQUIDIO_VERSION, oct->sriov_info.rings_per_vf);
+
+	/* Setup the interrupt handler and record the INT SUM register address*/
+	if (octeon_setup_interrupt(oct))
+		return 1;
+
+	if (cn23xx_octeon_pfvf_handshake(oct))
+		return 1;
+
+	/* Enable Octeon device interrupts */
+	oct->fn_list.enable_interrupt(oct, OCTEON_ALL_INTR);
+
+	atomic_set(&oct->status, OCT_DEV_INTR_SET_DONE);
+
+	/* Enable the input and output queues for this Octeon device */
+	if (oct->fn_list.enable_io_queues(oct)) {
+		dev_err(&oct->pci_dev->dev, "enabling io queues failed\n");
+		return 1;
+	}
+
+	atomic_set(&oct->status, OCT_DEV_IO_QUEUES_DONE);
+
+	atomic_set(&oct->status, OCT_DEV_HOST_OK);
+
+	/* Send Credit for Octeon Output queues. Credits are always sent after
+	 * the output queue is enabled.
+	 */
+	for (j = 0; j < oct->num_oqs; j++)
+		writel(oct->droq[j]->max_count, oct->droq[j]->pkts_credit_reg);
+
+	/* Packets can start arriving on the output queues from this point. */
+
+	atomic_set(&oct->status, OCT_DEV_CORE_OK);
+
+	atomic_set(&oct->status, OCT_DEV_RUNNING);
+
+	if (liquidio_init_nic_module(oct))
+		return 1;
+
+	return 0;
+}
+
+static int __init liquidio_vf_init(void)
+{
+	octeon_init_device_list(0);
+	return pci_register_driver(&liquidio_vf_pci_driver);
+}
+
+static void __exit liquidio_vf_exit(void)
+{
+	pci_unregister_driver(&liquidio_vf_pci_driver);
+
+	pr_info("LiquidIO_VF network module is now unloaded\n");
+}
+
+module_init(liquidio_vf_init);
+module_exit(liquidio_vf_exit);
diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
index 0d990accb65e..ba329f6ca779 100644
--- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
+++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h
@@ -1,25 +1,20 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
-
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 /*!  \file  liquidio_common.h
  *   \brief Common: Structures and macros used in PCI-NIC package by core and
  *   host driver.
@@ -68,12 +63,10 @@ enum octeon_tag_type {
  */
 #define OPCODE_CORE 0           /* used for generic core operations */
 #define OPCODE_NIC  1           /* used for NIC operations */
-#define OPCODE_LAST OPCODE_NIC
-
 /* Subcodes are used by host driver/apps to identify the sub-operation
  * for the core. They only need to by unique for a given subsystem.
  */
-#define OPCODE_SUBCODE(op, sub)       (((op & 0x0f) << 8) | ((sub) & 0x7f))
+#define OPCODE_SUBCODE(op, sub)       ((((op) & 0x0f) << 8) | ((sub) & 0x7f))
 
 /** OPCODE_CORE subcodes. For future use. */
 
@@ -89,13 +82,13 @@ enum octeon_tag_type {
 #define OPCODE_NIC_TIMESTAMP           0x07
 #define OPCODE_NIC_INTRMOD_CFG         0x08
 #define OPCODE_NIC_IF_CFG              0x09
+#define OPCODE_NIC_VF_DRV_NOTICE       0x0A
+#define VF_DRV_LOADED                  1
+#define VF_DRV_REMOVED                -1
+#define VF_DRV_MACADDR_CHANGED         2
 
 #define CORE_DRV_TEST_SCATTER_OP    0xFFF5
 
-#define OPCODE_SLOW_PATH(rh)  \
-	(OPCODE_SUBCODE(rh->r.opcode, rh->r.subcode) != \
-		OPCODE_SUBCODE(OPCODE_NIC, OPCODE_NIC_NW_DATA))
-
 /* Application codes advertised by the core driver initialization packet. */
 #define CVM_DRV_APP_START           0x0
 #define CVM_DRV_NO_APP              0
@@ -105,31 +98,15 @@ enum octeon_tag_type {
 #define CVM_DRV_INVALID_APP         (CVM_DRV_APP_START + 0x2)
 #define CVM_DRV_APP_END             (CVM_DRV_INVALID_APP - 1)
 
-/* Macro to increment index.
- * Index is incremented by count; if the sum exceeds
- * max, index is wrapped-around to the start.
- */
-#define INCR_INDEX(index, count, max)                \
-do {                                                 \
-	if (((index) + (count)) >= (max))            \
-		index = ((index) + (count)) - (max); \
-	else                                         \
-		index += (count);                    \
-} while (0)
-
-#define INCR_INDEX_BY1(index, max)	\
-do {                                    \
-	if ((++(index)) == (max))       \
-		index = 0;	        \
-} while (0)
-
-#define DECR_INDEX(index, count, max)                  \
-do {						       \
-	if ((count) > (index))                         \
-		index = ((max) - ((count - index)));   \
-	else                                           \
-		index -= count;			       \
-} while (0)
+static inline u32 incr_index(u32 index, u32 count, u32 max)
+{
+	if ((index + count) >= max)
+		index = index + count - max;
+	else
+		index += count;
+
+	return index;
+}
 
 #define OCT_BOARD_NAME 32
 #define OCT_SERIAL_LEN 64
@@ -235,6 +212,8 @@ static inline void add_sg_size(struct octeon_sg_entry *sg_entry,
 
 #define   OCTNET_CMD_ID_ACTIVE         0x1a
 
+#define   OCTNET_CMD_SET_UC_LIST       0x1b
+#define   OCTNET_CMD_SET_VF_LINKSTATE  0x1c
 #define   OCTNET_CMD_VXLAN_PORT_ADD    0x0
 #define   OCTNET_CMD_VXLAN_PORT_DEL    0x1
 #define   OCTNET_CMD_RXCSUM_ENABLE     0x0
@@ -731,13 +710,15 @@ struct oct_link_info {
 
 #ifdef __BIG_ENDIAN_BITFIELD
 	u64 gmxport:16;
-	u64 rsvd:32;
+	u64 macaddr_is_admin_asgnd:1;
+	u64 rsvd:31;
 	u64 num_txpciq:8;
 	u64 num_rxpciq:8;
 #else
 	u64 num_rxpciq:8;
 	u64 num_txpciq:8;
-	u64 rsvd:32;
+	u64 rsvd:31;
+	u64 macaddr_is_admin_asgnd:1;
 	u64 gmxport:16;
 #endif
 
@@ -827,6 +808,16 @@ struct oct_link_stats {
 
 };
 
+static inline int opcode_slow_path(union octeon_rh *rh)
+{
+	u16 subcode1, subcode2;
+
+	subcode1 = OPCODE_SUBCODE((rh)->r.opcode, (rh)->r.subcode);
+	subcode2 = OPCODE_SUBCODE(OPCODE_NIC, OPCODE_NIC_NW_DATA);
+
+	return (subcode2 != subcode1);
+}
+
 #define LIO68XX_LED_CTRL_ADDR     0x3501
 #define LIO68XX_LED_CTRL_CFGON    0x1f
 #define LIO68XX_LED_CTRL_CFGOFF   0x100
diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_image.h b/drivers/net/ethernet/cavium/liquidio/liquidio_image.h
index 93819bd8602b..78a3685f6fe0 100644
--- a/drivers/net/ethernet/cavium/liquidio/liquidio_image.h
+++ b/drivers/net/ethernet/cavium/liquidio/liquidio_image.h
@@ -1,24 +1,20 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 #ifndef _LIQUIDIO_IMAGE_H_
 #define _LIQUIDIO_IMAGE_H_
 
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_config.h b/drivers/net/ethernet/cavium/liquidio/octeon_config.h
index c76556809ed1..1cb3514fc949 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_config.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_config.h
@@ -1,25 +1,20 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
-
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 /*! \file  octeon_config.h
  *  \brief Host Driver: Configuration data structures for the host driver.
  */
@@ -65,9 +60,15 @@
 #define   DEFAULT_NUM_NIC_PORTS_68XX_210NV  2
 
 /* CN23xx  IQ configuration macros */
+#define   CN23XX_MAX_VFS_PER_PF_PASS_1_0 8
+#define   CN23XX_MAX_VFS_PER_PF_PASS_1_1 31
+#define   CN23XX_MAX_VFS_PER_PF          63
+#define   CN23XX_MAX_RINGS_PER_VF        8
+
 #define   CN23XX_MAX_RINGS_PER_PF_PASS_1_0 12
 #define   CN23XX_MAX_RINGS_PER_PF_PASS_1_1 32
 #define   CN23XX_MAX_RINGS_PER_PF          64
+#define   CN23XX_MAX_RINGS_PER_VF          8
 
 #define   CN23XX_MAX_INPUT_QUEUES	CN23XX_MAX_RINGS_PER_PF
 #define   CN23XX_MAX_IQ_DESCRIPTORS	2048
@@ -466,4 +467,7 @@ struct octeon_config {
 
 #define MAX_POSSIBLE_OCTEON_INSTR_QUEUES	CN23XX_MAX_INPUT_QUEUES
 #define MAX_POSSIBLE_OCTEON_OUTPUT_QUEUES	CN23XX_MAX_OUTPUT_QUEUES
+
+#define MAX_POSSIBLE_VFS			64
+
 #endif /* __OCTEON_CONFIG_H__  */
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_console.c b/drivers/net/ethernet/cavium/liquidio/octeon_console.c
index 01a50f3b0c8e..3265e0b7923e 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_console.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_console.c
@@ -1,25 +1,20 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
-
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 /**
  * @file octeon_console.c
  */
@@ -76,9 +71,9 @@ MODULE_PARM_DESC(console_bitmask,
 #define OCTEON_CONSOLE_POLL_INTERVAL_MS  100    /* 10 times per second */
 
 /* First three members of cvmx_bootmem_desc are left in original
-** positions for backwards compatibility.
-** Assumes big endian target
-*/
+ * positions for backwards compatibility.
+ * Assumes big endian target
+ */
 struct cvmx_bootmem_desc {
 	/** spinlock to control access to list */
 	u32 lock;
@@ -143,46 +138,6 @@ struct octeon_pci_console_desc {
 };
 
 /**
- * This macro returns the size of a member of a structure.
- * Logically it is the same as "sizeof(s::field)" in C++, but
- * C lacks the "::" operator.
- */
-#define SIZEOF_FIELD(s, field) sizeof(((s *)NULL)->field)
-
-/**
- * This macro returns a member of the cvmx_bootmem_desc
- * structure. These members can't be directly addressed as
- * they might be in memory not directly reachable. In the case
- * where bootmem is compiled with LINUX_HOST, the structure
- * itself might be located on a remote Octeon. The argument
- * "field" is the member name of the cvmx_bootmem_desc to read.
- * Regardless of the type of the field, the return type is always
- * a u64.
- */
-#define CVMX_BOOTMEM_DESC_GET_FIELD(oct, field)                              \
-	__cvmx_bootmem_desc_get(oct, oct->bootmem_desc_addr,                 \
-				offsetof(struct cvmx_bootmem_desc, field),   \
-				SIZEOF_FIELD(struct cvmx_bootmem_desc, field))
-
-#define __cvmx_bootmem_lock(flags)	(flags = flags)
-#define __cvmx_bootmem_unlock(flags)	(flags = flags)
-
-/**
- * This macro returns a member of the
- * cvmx_bootmem_named_block_desc structure. These members can't
- * be directly addressed as they might be in memory not directly
- * reachable. In the case where bootmem is compiled with
- * LINUX_HOST, the structure itself might be located on a remote
- * Octeon. The argument "field" is the member name of the
- * cvmx_bootmem_named_block_desc to read. Regardless of the type
- * of the field, the return type is always a u64. The "addr"
- * parameter is the physical address of the structure.
- */
-#define CVMX_BOOTMEM_NAMED_GET_FIELD(oct, addr, field)                   \
-	__cvmx_bootmem_desc_get(oct, addr,                               \
-		offsetof(struct cvmx_bootmem_named_block_desc, field),   \
-		SIZEOF_FIELD(struct cvmx_bootmem_named_block_desc, field))
-/**
  * \brief determines if a given console has debug enabled.
  * @param console console to check
  * @returns  1 = enabled. 0 otherwise
@@ -263,10 +218,15 @@ static int __cvmx_bootmem_check_version(struct octeon_device *oct,
 		oct->bootmem_desc_addr =
 			octeon_read_device_mem64(oct,
 						 BOOTLOADER_PCI_READ_DESC_ADDR);
-	major_version =
-		(u32)CVMX_BOOTMEM_DESC_GET_FIELD(oct, major_version);
-	minor_version =
-		(u32)CVMX_BOOTMEM_DESC_GET_FIELD(oct, minor_version);
+	major_version = (u32)__cvmx_bootmem_desc_get(
+			oct, oct->bootmem_desc_addr,
+			offsetof(struct cvmx_bootmem_desc, major_version),
+			FIELD_SIZEOF(struct cvmx_bootmem_desc, major_version));
+	minor_version = (u32)__cvmx_bootmem_desc_get(
+			oct, oct->bootmem_desc_addr,
+			offsetof(struct cvmx_bootmem_desc, minor_version),
+			FIELD_SIZEOF(struct cvmx_bootmem_desc, minor_version));
+
 	dev_dbg(&oct->pci_dev->dev, "%s: major_version=%d\n", __func__,
 		major_version);
 	if ((major_version > 3) ||
@@ -289,10 +249,20 @@ static const struct cvmx_bootmem_named_block_desc
 	u64 named_addr = cvmx_bootmem_phy_named_block_find(oct, name, flags);
 
 	if (named_addr) {
-		desc->base_addr = CVMX_BOOTMEM_NAMED_GET_FIELD(oct, named_addr,
-							       base_addr);
-		desc->size =
-			CVMX_BOOTMEM_NAMED_GET_FIELD(oct, named_addr, size);
+		desc->base_addr = __cvmx_bootmem_desc_get(
+				oct, named_addr,
+				offsetof(struct cvmx_bootmem_named_block_desc,
+					 base_addr),
+				FIELD_SIZEOF(
+					struct cvmx_bootmem_named_block_desc,
+					base_addr));
+		desc->size = __cvmx_bootmem_desc_get(oct, named_addr,
+				offsetof(struct cvmx_bootmem_named_block_desc,
+					 size),
+				FIELD_SIZEOF(
+					struct cvmx_bootmem_named_block_desc,
+					size));
+
 		strncpy(desc->name, name, sizeof(desc->name));
 		desc->name[sizeof(desc->name) - 1] = 0;
 		return &oct->bootmem_named_block_desc;
@@ -307,22 +277,41 @@ static u64 cvmx_bootmem_phy_named_block_find(struct octeon_device *oct,
 {
 	u64 result = 0;
 
-	__cvmx_bootmem_lock(flags);
 	if (!__cvmx_bootmem_check_version(oct, 3)) {
 		u32 i;
-		u64 named_block_array_addr =
-			CVMX_BOOTMEM_DESC_GET_FIELD(oct,
-						    named_block_array_addr);
-		u32 num_blocks = (u32)
-			CVMX_BOOTMEM_DESC_GET_FIELD(oct, nb_num_blocks);
-		u32 name_length = (u32)
-			CVMX_BOOTMEM_DESC_GET_FIELD(oct, named_block_name_len);
+
+		u64 named_block_array_addr = __cvmx_bootmem_desc_get(
+					oct, oct->bootmem_desc_addr,
+					offsetof(struct cvmx_bootmem_desc,
+						 named_block_array_addr),
+					FIELD_SIZEOF(struct cvmx_bootmem_desc,
+						     named_block_array_addr));
+		u32 num_blocks = (u32)__cvmx_bootmem_desc_get(
+					oct, oct->bootmem_desc_addr,
+					offsetof(struct cvmx_bootmem_desc,
+						 nb_num_blocks),
+					FIELD_SIZEOF(struct cvmx_bootmem_desc,
+						     nb_num_blocks));
+
+		u32 name_length = (u32)__cvmx_bootmem_desc_get(
+					oct, oct->bootmem_desc_addr,
+					offsetof(struct cvmx_bootmem_desc,
+						 named_block_name_len),
+					FIELD_SIZEOF(struct cvmx_bootmem_desc,
+						     named_block_name_len));
+
 		u64 named_addr = named_block_array_addr;
 
 		for (i = 0; i < num_blocks; i++) {
-			u64 named_size =
-				CVMX_BOOTMEM_NAMED_GET_FIELD(oct, named_addr,
-							     size);
+			u64 named_size = __cvmx_bootmem_desc_get(
+					oct, named_addr,
+					 offsetof(
+					struct cvmx_bootmem_named_block_desc,
+					size),
+					 FIELD_SIZEOF(
+					struct cvmx_bootmem_named_block_desc,
+					size));
+
 			if (name && named_size) {
 				char *name_tmp =
 					kmalloc(name_length + 1, GFP_KERNEL);
@@ -347,7 +336,6 @@ static u64 cvmx_bootmem_phy_named_block_find(struct octeon_device *oct,
 				sizeof(struct cvmx_bootmem_named_block_desc);
 		}
 	}
-	__cvmx_bootmem_unlock(flags);
 	return result;
 }
 
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
index 586b68899b06..a8df493a5012 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c
@@ -1,24 +1,20 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 #include <linux/pci.h>
 #include <linux/netdevice.h>
 #include <linux/vmalloc.h>
@@ -32,6 +28,7 @@
 #include "cn66xx_regs.h"
 #include "cn66xx_device.h"
 #include "cn23xx_pf_device.h"
+#include "cn23xx_vf_device.h"
 
 /** Default configuration
  *  for CN66XX OCTEON Models.
@@ -520,11 +517,6 @@ static struct octeon_config default_cn23xx_conf = {
 	}
 };
 
-enum {
-	OCTEON_CONFIG_TYPE_DEFAULT = 0,
-	NUM_OCTEON_CONFS,
-};
-
 static struct octeon_config_ptr {
 	u32 conf_type;
 } oct_conf_info[MAX_OCTEON_DEVICES] = {
@@ -580,15 +572,17 @@ static void *__retrieve_octeon_config_info(struct octeon_device *oct,
 	switch (oct_conf_info[oct_id].conf_type) {
 	case OCTEON_CONFIG_TYPE_DEFAULT:
 		if (oct->chip_id == OCTEON_CN66XX) {
-			ret = (void *)&default_cn66xx_conf;
+			ret = &default_cn66xx_conf;
 		} else if ((oct->chip_id == OCTEON_CN68XX) &&
 			   (card_type == LIO_210NV)) {
-			ret =  (void *)&default_cn68xx_210nv_conf;
+			ret = &default_cn68xx_210nv_conf;
 		} else if ((oct->chip_id == OCTEON_CN68XX) &&
 			   (card_type == LIO_410NV)) {
-			ret =  (void *)&default_cn68xx_conf;
+			ret = &default_cn68xx_conf;
 		} else if (oct->chip_id == OCTEON_CN23XX_PF_VID) {
-			ret =  (void *)&default_cn23xx_conf;
+			ret = &default_cn23xx_conf;
+		} else if (oct->chip_id == OCTEON_CN23XX_VF_VID) {
+			ret = &default_cn23xx_conf;
 		}
 		break;
 	default:
@@ -604,6 +598,7 @@ static int __verify_octeon_config_info(struct octeon_device *oct, void *conf)
 	case OCTEON_CN68XX:
 		return lio_validate_cn6xxx_config_info(oct, conf);
 	case OCTEON_CN23XX_PF_VID:
+	case OCTEON_CN23XX_VF_VID:
 		return 0;
 	default:
 		break;
@@ -649,12 +644,12 @@ void octeon_free_device_mem(struct octeon_device *oct)
 	int i;
 
 	for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) {
-		if (oct->io_qmask.oq & (1ULL << i))
+		if (oct->io_qmask.oq & BIT_ULL(i))
 			vfree(oct->droq[i]);
 	}
 
 	for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) {
-		if (oct->io_qmask.iq & (1ULL << i))
+		if (oct->io_qmask.iq & BIT_ULL(i))
 			vfree(oct->instr_queue[i]);
 	}
 
@@ -681,6 +676,9 @@ static struct octeon_device *octeon_allocate_device_mem(u32 pci_id,
 	case OCTEON_CN23XX_PF_VID:
 		configsize = sizeof(struct octeon_cn23xx_pf);
 		break;
+	case OCTEON_CN23XX_VF_VID:
+		configsize = sizeof(struct octeon_cn23xx_vf);
+		break;
 	default:
 		pr_err("%s: Unknown PCI Device: 0x%x\n",
 		       __func__,
@@ -756,6 +754,9 @@ octeon_allocate_ioq_vector(struct octeon_device  *oct)
 
 	if (OCTEON_CN23XX_PF(oct))
 		num_ioqs = oct->sriov_info.num_pf_rings;
+	else if (OCTEON_CN23XX_VF(oct))
+		num_ioqs = oct->sriov_info.rings_per_vf;
+
 	size = sizeof(struct octeon_ioq_vector) * num_ioqs;
 
 	oct->ioq_vector = vmalloc(size);
@@ -767,6 +768,7 @@ octeon_allocate_ioq_vector(struct octeon_device  *oct)
 		ioq_vector->oct_dev	= oct;
 		ioq_vector->iq_index	= i;
 		ioq_vector->droq_index	= i;
+		ioq_vector->mbox	= oct->mbox[i];
 
 		cpu_num = i % num_online_cpus();
 		cpumask_set_cpu(cpu_num, &ioq_vector->affinity_mask);
@@ -795,10 +797,11 @@ int octeon_setup_instr_queues(struct octeon_device *oct)
 
 	if (OCTEON_CN6XXX(oct))
 		num_descs =
-			CFG_GET_NUM_DEF_TX_DESCS(CHIP_FIELD(oct, cn6xxx, conf));
+			CFG_GET_NUM_DEF_TX_DESCS(CHIP_CONF(oct, cn6xxx));
 	else if (OCTEON_CN23XX_PF(oct))
-		num_descs = CFG_GET_NUM_DEF_TX_DESCS(CHIP_FIELD(oct, cn23xx_pf,
-								conf));
+		num_descs = CFG_GET_NUM_DEF_TX_DESCS(CHIP_CONF(oct, cn23xx_pf));
+	else if (OCTEON_CN23XX_VF(oct))
+		num_descs = CFG_GET_NUM_DEF_TX_DESCS(CHIP_CONF(oct, cn23xx_vf));
 
 	oct->num_iqs = 0;
 
@@ -821,6 +824,7 @@ int octeon_setup_instr_queues(struct octeon_device *oct)
 	if (octeon_init_instr_queue(oct, txpciq, num_descs)) {
 		/* prevent memory leak */
 		vfree(oct->instr_queue[0]);
+		oct->instr_queue[0] = NULL;
 		return 1;
 	}
 
@@ -837,14 +841,15 @@ int octeon_setup_output_queues(struct octeon_device *oct)
 
 	if (OCTEON_CN6XXX(oct)) {
 		num_descs =
-			CFG_GET_NUM_DEF_RX_DESCS(CHIP_FIELD(oct, cn6xxx, conf));
+			CFG_GET_NUM_DEF_RX_DESCS(CHIP_CONF(oct, cn6xxx));
 		desc_size =
-			CFG_GET_DEF_RX_BUF_SIZE(CHIP_FIELD(oct, cn6xxx, conf));
+			CFG_GET_DEF_RX_BUF_SIZE(CHIP_CONF(oct, cn6xxx));
 	} else if (OCTEON_CN23XX_PF(oct)) {
-		num_descs = CFG_GET_NUM_DEF_RX_DESCS(CHIP_FIELD(oct, cn23xx_pf,
-								conf));
-		desc_size = CFG_GET_DEF_RX_BUF_SIZE(CHIP_FIELD(oct, cn23xx_pf,
-							       conf));
+		num_descs = CFG_GET_NUM_DEF_RX_DESCS(CHIP_CONF(oct, cn23xx_pf));
+		desc_size = CFG_GET_DEF_RX_BUF_SIZE(CHIP_CONF(oct, cn23xx_pf));
+	} else if (OCTEON_CN23XX_VF(oct)) {
+		num_descs = CFG_GET_NUM_DEF_RX_DESCS(CHIP_CONF(oct, cn23xx_vf));
+		desc_size = CFG_GET_DEF_RX_BUF_SIZE(CHIP_CONF(oct, cn23xx_vf));
 	}
 	oct->num_oqs = 0;
 	oct->droq[0] = vmalloc_node(sizeof(*oct->droq[0]), numa_node);
@@ -853,19 +858,63 @@ int octeon_setup_output_queues(struct octeon_device *oct)
 	if (!oct->droq[0])
 		return 1;
 
-	if (octeon_init_droq(oct, oq_no, num_descs, desc_size, NULL))
+	if (octeon_init_droq(oct, oq_no, num_descs, desc_size, NULL)) {
+		vfree(oct->droq[oq_no]);
+		oct->droq[oq_no] = NULL;
 		return 1;
+	}
 	oct->num_oqs++;
 
 	return 0;
 }
 
-void octeon_set_io_queues_off(struct octeon_device *oct)
+int octeon_set_io_queues_off(struct octeon_device *oct)
 {
+	int loop = BUSY_READING_REG_VF_LOOP_COUNT;
+
 	if (OCTEON_CN6XXX(oct)) {
 		octeon_write_csr(oct, CN6XXX_SLI_PKT_INSTR_ENB, 0);
 		octeon_write_csr(oct, CN6XXX_SLI_PKT_OUT_ENB, 0);
+	} else if (oct->chip_id == OCTEON_CN23XX_VF_VID) {
+		u32 q_no;
+
+		/* IOQs will already be in reset.
+		 * If RST bit is set, wait for quiet bit to be set.
+		 * Once quiet bit is set, clear the RST bit.
+		 */
+		for (q_no = 0; q_no < oct->sriov_info.rings_per_vf; q_no++) {
+			u64 reg_val = octeon_read_csr64(
+				oct, CN23XX_VF_SLI_IQ_PKT_CONTROL64(q_no));
+
+			while ((reg_val & CN23XX_PKT_INPUT_CTL_RST) &&
+			       !(reg_val &  CN23XX_PKT_INPUT_CTL_QUIET) &&
+			       loop) {
+				reg_val = octeon_read_csr64(
+					oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no));
+				loop--;
+			}
+			if (!loop) {
+				dev_err(&oct->pci_dev->dev,
+					"clearing the reset reg failed or setting the quiet reg failed for qno: %u\n",
+					q_no);
+				return -1;
+			}
+
+			reg_val = reg_val & ~CN23XX_PKT_INPUT_CTL_RST;
+			octeon_write_csr64(oct,
+					   CN23XX_SLI_IQ_PKT_CONTROL64(q_no),
+					   reg_val);
+
+			reg_val = octeon_read_csr64(
+					oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no));
+			if (reg_val & CN23XX_PKT_INPUT_CTL_RST) {
+				dev_err(&oct->pci_dev->dev,
+					"unable to reset qno %u\n", q_no);
+				return -1;
+			}
+		}
 	}
+	return 0;
 }
 
 void octeon_set_droq_pkt_op(struct octeon_device *oct,
@@ -1070,10 +1119,10 @@ int octeon_core_drv_init(struct octeon_recv_info *recv_info, void *buf)
 
 	if (OCTEON_CN6XXX(oct))
 		num_nic_ports =
-			CFG_GET_NUM_NIC_PORTS(CHIP_FIELD(oct, cn6xxx, conf));
+			CFG_GET_NUM_NIC_PORTS(CHIP_CONF(oct, cn6xxx));
 	else if (OCTEON_CN23XX_PF(oct))
 		num_nic_ports =
-			CFG_GET_NUM_NIC_PORTS(CHIP_FIELD(oct, cn23xx_pf, conf));
+			CFG_GET_NUM_NIC_PORTS(CHIP_CONF(oct, cn23xx_pf));
 
 	if (atomic_read(&oct->status) >= OCT_DEV_RUNNING) {
 		dev_err(&oct->pci_dev->dev, "Received CORE OK when device state is 0x%x\n",
@@ -1143,7 +1192,7 @@ int octeon_get_tx_qsize(struct octeon_device *oct, u32 q_no)
 
 {
 	if (oct && (q_no < MAX_OCTEON_INSTR_QUEUES(oct)) &&
-	    (oct->io_qmask.iq & (1ULL << q_no)))
+	    (oct->io_qmask.iq & BIT_ULL(q_no)))
 		return oct->instr_queue[q_no]->max_count;
 
 	return -1;
@@ -1152,7 +1201,7 @@ int octeon_get_tx_qsize(struct octeon_device *oct, u32 q_no)
 int octeon_get_rx_qsize(struct octeon_device *oct, u32 q_no)
 {
 	if (oct && (q_no < MAX_OCTEON_OUTPUT_QUEUES(oct)) &&
-	    (oct->io_qmask.oq & (1ULL << q_no)))
+	    (oct->io_qmask.oq & BIT_ULL(q_no)))
 		return oct->droq[q_no]->max_count;
 	return -1;
 }
@@ -1168,10 +1217,13 @@ struct octeon_config *octeon_get_conf(struct octeon_device *oct)
 
 	if (OCTEON_CN6XXX(oct)) {
 		default_oct_conf =
-			(struct octeon_config *)(CHIP_FIELD(oct, cn6xxx, conf));
+			(struct octeon_config *)(CHIP_CONF(oct, cn6xxx));
 	} else if (OCTEON_CN23XX_PF(oct)) {
 		default_oct_conf = (struct octeon_config *)
-			(CHIP_FIELD(oct, cn23xx_pf, conf));
+			(CHIP_CONF(oct, cn23xx_pf));
+	} else if (OCTEON_CN23XX_VF(oct)) {
+		default_oct_conf = (struct octeon_config *)
+			(CHIP_CONF(oct, cn23xx_vf));
 	}
 	return default_oct_conf;
 }
@@ -1322,7 +1374,7 @@ void lio_enable_irq(struct octeon_droq *droq, struct octeon_instr_queue *iq)
 	/*write resend. Writing RESEND in SLI_PKTX_CNTS should be enough
 	 *to trigger tx interrupts as well, if they are pending.
 	 */
-	if (oct && OCTEON_CN23XX_PF(oct)) {
+	if (oct && (OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct))) {
 		if (droq)
 			writeq(CN23XX_INTR_RESEND, droq->pkts_sent_reg);
 		/*we race with firmrware here. read and write the IN_DONE_CNTS*/
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
index da15c2ae9330..18f6836250a6 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
@@ -1,25 +1,20 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
-
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 /*! \file octeon_device.h
  *  \brief Host Driver: This file defines the octeon device structure.
  */
@@ -38,6 +33,7 @@
 #define  OCTEON_CN68XX                0x0091
 #define  OCTEON_CN66XX                0x0092
 #define  OCTEON_CN23XX_PF_VID         0x9702
+#define  OCTEON_CN23XX_VF_VID         0x9712
 
 /**RevisionId for the chips */
 #define  OCTEON_CN23XX_REV_1_0        0x00
@@ -52,7 +48,14 @@ enum octeon_pci_swap_mode {
 	OCTEON_PCI_32BIT_LW_SWAP = 3
 };
 
+enum {
+	OCTEON_CONFIG_TYPE_DEFAULT = 0,
+	NUM_OCTEON_CONFS,
+};
+
+#define  OCTEON_INPUT_INTR    (1)
 #define  OCTEON_OUTPUT_INTR   (2)
+#define  OCTEON_MBOX_INTR     (4)
 #define  OCTEON_ALL_INTR      0xff
 
 /*---------------   PCI BAR1 index registers -------------*/
@@ -70,26 +73,30 @@ enum octeon_pci_swap_mode {
  *  as it is initialized.
  */
 #define    OCT_DEV_BEGIN_STATE            0x0
-#define    OCT_DEV_PCI_MAP_DONE           0x1
-#define    OCT_DEV_DISPATCH_INIT_DONE     0x2
-#define    OCT_DEV_INSTR_QUEUE_INIT_DONE  0x3
-#define    OCT_DEV_SC_BUFF_POOL_INIT_DONE 0x4
-#define    OCT_DEV_RESP_LIST_INIT_DONE    0x5
-#define    OCT_DEV_DROQ_INIT_DONE         0x6
-#define    OCT_DEV_IO_QUEUES_DONE         0x7
-#define    OCT_DEV_CONSOLE_INIT_DONE      0x8
-#define    OCT_DEV_HOST_OK                0x9
-#define    OCT_DEV_CORE_OK                0xa
-#define    OCT_DEV_RUNNING                0xb
-#define    OCT_DEV_IN_RESET               0xc
-#define    OCT_DEV_STATE_INVALID          0xd
+#define    OCT_DEV_PCI_ENABLE_DONE        0x1
+#define    OCT_DEV_PCI_MAP_DONE           0x2
+#define    OCT_DEV_DISPATCH_INIT_DONE     0x3
+#define    OCT_DEV_INSTR_QUEUE_INIT_DONE  0x4
+#define    OCT_DEV_SC_BUFF_POOL_INIT_DONE 0x5
+#define    OCT_DEV_RESP_LIST_INIT_DONE    0x6
+#define    OCT_DEV_DROQ_INIT_DONE         0x7
+#define    OCT_DEV_MBOX_SETUP_DONE        0x8
+#define    OCT_DEV_MSIX_ALLOC_VECTOR_DONE 0x9
+#define    OCT_DEV_INTR_SET_DONE          0xa
+#define    OCT_DEV_IO_QUEUES_DONE         0xb
+#define    OCT_DEV_CONSOLE_INIT_DONE      0xc
+#define    OCT_DEV_HOST_OK                0xd
+#define    OCT_DEV_CORE_OK                0xe
+#define    OCT_DEV_RUNNING                0xf
+#define    OCT_DEV_IN_RESET               0x10
+#define    OCT_DEV_STATE_INVALID          0x11
 
 #define    OCT_DEV_STATES                 OCT_DEV_STATE_INVALID
 
 /** Octeon Device interrupts
-  *  These interrupt bits are set in int_status filed of
-  *  octeon_device structure
-  */
+ * These interrupt bits are set in int_status filed of
+ * octeon_device structure
+ */
 #define	   OCT_DEV_INTR_DMA0_FORCE	  0x01
 #define	   OCT_DEV_INTR_DMA1_FORCE	  0x02
 #define	   OCT_DEV_INTR_PKT_DATA	  0x04
@@ -208,6 +215,10 @@ struct octeon_fn_list {
 
 	irqreturn_t (*process_interrupt_regs)(void *);
 	u64 (*msix_interrupt_handler)(void *);
+
+	int (*setup_mbox)(struct octeon_device *);
+	int (*free_mbox)(struct octeon_device *);
+
 	int (*soft_reset)(struct octeon_device *);
 	int (*setup_device_regs)(struct octeon_device *);
 	void (*bar1_idx_setup)(struct octeon_device *, u64, u32, int);
@@ -284,6 +295,7 @@ struct octdev_props {
 #define LIO_FLAG_MSIX_ENABLED	0x1
 #define MSIX_PO_INT		0x1
 #define MSIX_PI_INT		0x2
+#define MSIX_MBOX_INT		0x4
 
 struct octeon_pf_vf_hs_word {
 #ifdef __LITTLE_ENDIAN_BITFIELD
@@ -322,14 +334,39 @@ struct octeon_pf_vf_hs_word {
 };
 
 struct octeon_sriov_info {
+	/* Number of rings assigned to VF */
+	u32	rings_per_vf;
+
+	/** Max Number of VF devices that can be enabled. This variable can
+	 *  specified during load time or it will be derived after allocating
+	 *  PF queues. When max_vfs is derived then each VF will get one queue
+	 **/
+	u32	max_vfs;
+
+	/** Number of VF devices enabled using sysfs. */
+	u32	num_vfs_alloced;
+
 	/* Actual rings left for PF device */
 	u32	num_pf_rings;
 
-	/* SRN of PF usable IO queues   */
+	/* SRN of PF usable IO queues */
 	u32	pf_srn;
+
 	/* total pf rings */
 	u32	trs;
 
+	u32	sriov_enabled;
+
+	/*lookup table that maps DPI ring number to VF pci_dev struct pointer*/
+	struct pci_dev *dpiring_to_vfpcidev_lut[MAX_POSSIBLE_VFS];
+
+	u64	vf_macaddr[MAX_POSSIBLE_VFS];
+
+	u16	vf_vlantci[MAX_POSSIBLE_VFS];
+
+	int	vf_linkstate[MAX_POSSIBLE_VFS];
+
+	u64	vf_drv_loaded_mask;
 };
 
 struct octeon_ioq_vector {
@@ -337,6 +374,7 @@ struct octeon_ioq_vector {
 	int		        iq_index;
 	int		        droq_index;
 	int			vector;
+	struct octeon_mbox     *mbox;
 	struct cpumask		affinity_mask;
 	u32			ioq_num;
 };
@@ -365,8 +403,13 @@ struct octeon_device {
 
 	/** Octeon Chip type. */
 	u16 chip_id;
+
 	u16 rev_id;
+
 	u16 pf_num;
+
+	u16 vf_num;
+
 	/** This device's id - set by the driver. */
 	u32 octeon_id;
 
@@ -474,6 +517,9 @@ struct octeon_device {
 
 	int msix_on;
 
+	/** Mail Box details of each octeon queue. */
+	struct octeon_mbox  *mbox[MAX_POSSIBLE_VFS];
+
 	/** IOq information of it's corresponding MSI-X interrupt. */
 	struct octeon_ioq_vector    *ioq_vector;
 
@@ -490,11 +536,14 @@ struct octeon_device {
 
 #define  OCT_DRV_ONLINE 1
 #define  OCT_DRV_OFFLINE 2
-#define  OCTEON_CN6XXX(oct)           ((oct->chip_id == OCTEON_CN66XX) || \
-				       (oct->chip_id == OCTEON_CN68XX))
-#define  OCTEON_CN23XX_PF(oct)        (oct->chip_id == OCTEON_CN23XX_PF_VID)
-#define CHIP_FIELD(oct, TYPE, field)             \
-	(((struct octeon_ ## TYPE  *)(oct->chip))->field)
+#define  OCTEON_CN6XXX(oct)	({					\
+				 typeof(oct) _oct = (oct);		\
+				 ((_oct->chip_id == OCTEON_CN66XX) ||	\
+				  (_oct->chip_id == OCTEON_CN68XX));	})
+#define  OCTEON_CN23XX_PF(oct)        ((oct)->chip_id == OCTEON_CN23XX_PF_VID)
+#define  OCTEON_CN23XX_VF(oct)        ((oct)->chip_id == OCTEON_CN23XX_VF_VID)
+#define CHIP_CONF(oct, TYPE)             \
+	(((struct octeon_ ## TYPE  *)((oct)->chip))->conf)
 
 struct oct_intrmod_cmd {
 	struct octeon_device *oct_dev;
@@ -508,7 +557,7 @@ struct oct_intrmod_cmd {
 void octeon_init_device_list(int conf_type);
 
 /** Free memory for Input and Output queue structures for a octeon device */
-void octeon_free_device_mem(struct octeon_device *);
+void octeon_free_device_mem(struct octeon_device *oct);
 
 /* Look up a free entry in the octeon_device table and allocate resources
  * for the octeon_device structure for an octeon device. Called at init
@@ -606,16 +655,16 @@ void lio_pci_writeq(struct octeon_device *oct, u64 val, u64 addr);
 
 /* Routines for reading and writing CSRs */
 #define   octeon_write_csr(oct_dev, reg_off, value) \
-		writel(value, oct_dev->mmio[0].hw_addr + reg_off)
+		writel(value, (oct_dev)->mmio[0].hw_addr + (reg_off))
 
 #define   octeon_write_csr64(oct_dev, reg_off, val64) \
-		writeq(val64, oct_dev->mmio[0].hw_addr + reg_off)
+		writeq(val64, (oct_dev)->mmio[0].hw_addr + (reg_off))
 
 #define   octeon_read_csr(oct_dev, reg_off)         \
-		readl(oct_dev->mmio[0].hw_addr + reg_off)
+		readl((oct_dev)->mmio[0].hw_addr + (reg_off))
 
 #define   octeon_read_csr64(oct_dev, reg_off)         \
-		readq(oct_dev->mmio[0].hw_addr + reg_off)
+		readq((oct_dev)->mmio[0].hw_addr + (reg_off))
 
 /**
  * Checks if memory access is okay
@@ -724,7 +773,7 @@ int octeon_get_rx_qsize(struct octeon_device *oct, u32 q_no);
 /** Turns off the input and output queues for the device
  *  @param oct which octeon to disable
  */
-void octeon_set_io_queues_off(struct octeon_device *oct);
+int octeon_set_io_queues_off(struct octeon_device *oct);
 
 /** Turns on or off the given output queue for the device
  *  @param oct which octeon to change
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
index f60e5320daf4..0be87d119a97 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c
@@ -1,24 +1,20 @@
 /**********************************************************************
-* Author: Cavium, Inc.
-*
-* Contact: support@cavium.com
-*          Please include "LiquidIO" in the subject.
-*
-* Copyright (c) 2003-2015 Cavium, Inc.
-*
-* This file is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License, Version 2, as
-* published by the Free Software Foundation.
-*
-* This file is distributed in the hope that it will be useful, but
-* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
-* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
-* NONINFRINGEMENT.  See the GNU General Public License for more
-* details.
-*
-* This file may also be available under a different license from Cavium.
-* Contact Cavium, Inc. for more information
-**********************************************************************/
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 #include <linux/pci.h>
 #include <linux/netdevice.h>
 #include <linux/vmalloc.h>
@@ -32,9 +28,7 @@
 #include "cn66xx_regs.h"
 #include "cn66xx_device.h"
 #include "cn23xx_pf_device.h"
-
-#define     CVM_MIN(d1, d2)           (((d1) < (d2)) ? (d1) : (d2))
-#define     CVM_MAX(d1, d2)           (((d1) > (d2)) ? (d1) : (d2))
+#include "cn23xx_vf_device.h"
 
 struct niclist {
 	struct list_head list;
@@ -258,13 +252,18 @@ int octeon_init_droq(struct octeon_device *oct,
 	c_num_descs = num_descs;
 	c_buf_size = desc_size;
 	if (OCTEON_CN6XXX(oct)) {
-		struct octeon_config *conf6x = CHIP_FIELD(oct, cn6xxx, conf);
+		struct octeon_config *conf6x = CHIP_CONF(oct, cn6xxx);
 
 		c_pkts_per_intr = (u32)CFG_GET_OQ_PKTS_PER_INTR(conf6x);
 		c_refill_threshold =
 			(u32)CFG_GET_OQ_REFILL_THRESHOLD(conf6x);
 	} else if (OCTEON_CN23XX_PF(oct)) {
-		struct octeon_config *conf23 = CHIP_FIELD(oct, cn23xx_pf, conf);
+		struct octeon_config *conf23 = CHIP_CONF(oct, cn23xx_pf);
+
+		c_pkts_per_intr = (u32)CFG_GET_OQ_PKTS_PER_INTR(conf23);
+		c_refill_threshold = (u32)CFG_GET_OQ_REFILL_THRESHOLD(conf23);
+	} else if (OCTEON_CN23XX_VF(oct)) {
+		struct octeon_config *conf23 = CHIP_CONF(oct, cn23xx_vf);
 
 		c_pkts_per_intr = (u32)CFG_GET_OQ_PKTS_PER_INTR(conf23);
 		c_refill_threshold = (u32)CFG_GET_OQ_REFILL_THRESHOLD(conf23);
@@ -337,7 +336,7 @@ int octeon_init_droq(struct octeon_device *oct,
 	/* For 56xx Pass1, this function won't be called, so no checks. */
 	oct->fn_list.setup_oq_regs(oct, q_no);
 
-	oct->io_qmask.oq |= (1ULL << q_no);
+	oct->io_qmask.oq |= BIT_ULL(q_no);
 
 	return 0;
 
@@ -409,7 +408,7 @@ static inline struct octeon_recv_info *octeon_create_recv_info(
 		recv_pkt->buffer_ptr[i] = droq->recv_buf_list[idx].buffer;
 		droq->recv_buf_list[idx].buffer = NULL;
 
-		INCR_INDEX_BY1(idx, droq->max_count);
+		idx = incr_index(idx, 1, droq->max_count);
 		bytes_left -= droq->buffer_size;
 		i++;
 		buf_cnt--;
@@ -440,14 +439,15 @@ octeon_droq_refill_pullup_descs(struct octeon_droq *droq,
 			droq->recv_buf_list[refill_index].buffer = NULL;
 			desc_ring[refill_index].buffer_ptr = 0;
 			do {
-				INCR_INDEX_BY1(droq->refill_idx,
-					       droq->max_count);
+				droq->refill_idx = incr_index(droq->refill_idx,
+							      1,
+							      droq->max_count);
 				desc_refilled++;
 				droq->refill_count--;
 			} while (droq->recv_buf_list[droq->refill_idx].
 				 buffer);
 		}
-		INCR_INDEX_BY1(refill_index, droq->max_count);
+		refill_index = incr_index(refill_index, 1, droq->max_count);
 	}                       /* while */
 	return desc_refilled;
 }
@@ -514,7 +514,8 @@ octeon_droq_refill(struct octeon_device *octeon_dev, struct octeon_droq *droq)
 		/* Reset any previous values in the length field. */
 		droq->info_list[droq->refill_idx].length = 0;
 
-		INCR_INDEX_BY1(droq->refill_idx, droq->max_count);
+		droq->refill_idx = incr_index(droq->refill_idx, 1,
+					      droq->max_count);
 		desc_refilled++;
 		droq->refill_count--;
 	}
@@ -599,7 +600,8 @@ static inline void octeon_droq_drop_packets(struct octeon_device *oct,
 			buf_cnt = 1;
 		}
 
-		INCR_INDEX(droq->read_idx, buf_cnt, droq->max_count);
+		droq->read_idx = incr_index(droq->read_idx, buf_cnt,
+					    droq->max_count);
 		droq->refill_count += buf_cnt;
 	}
 }
@@ -639,11 +641,12 @@ octeon_droq_fast_process_packets(struct octeon_device *oct,
 		rh = &info->rh;
 
 		total_len += (u32)info->length;
-		if (OPCODE_SLOW_PATH(rh)) {
+		if (opcode_slow_path(rh)) {
 			u32 buf_cnt;
 
 			buf_cnt = octeon_droq_dispatch_pkt(oct, droq, rh, info);
-			INCR_INDEX(droq->read_idx, buf_cnt, droq->max_count);
+			droq->read_idx = incr_index(droq->read_idx,
+						    buf_cnt, droq->max_count);
 			droq->refill_count += buf_cnt;
 		} else {
 			if (info->length <= droq->buffer_size) {
@@ -657,7 +660,8 @@ octeon_droq_fast_process_packets(struct octeon_device *oct,
 				droq->recv_buf_list[droq->read_idx].buffer =
 					NULL;
 
-				INCR_INDEX_BY1(droq->read_idx, droq->max_count);
+				droq->read_idx = incr_index(droq->read_idx, 1,
+							    droq->max_count);
 				droq->refill_count++;
 			} else {
 				nicbuf = octeon_fast_packet_alloc((u32)
@@ -689,8 +693,9 @@ octeon_droq_fast_process_packets(struct octeon_device *oct,
 					}
 
 					pkt_len += cpy_len;
-					INCR_INDEX_BY1(droq->read_idx,
-						       droq->max_count);
+					droq->read_idx =
+						incr_index(droq->read_idx, 1,
+							   droq->max_count);
 					droq->refill_count++;
 				}
 			}
@@ -804,9 +809,8 @@ octeon_droq_process_poll_pkts(struct octeon_device *oct,
 	while (total_pkts_processed < budget) {
 		octeon_droq_check_hw_for_pkts(droq);
 
-		pkts_available =
-			CVM_MIN((budget - total_pkts_processed),
-				(u32)(atomic_read(&droq->pkts_pending)));
+		pkts_available = min((budget - total_pkts_processed),
+				     (u32)(atomic_read(&droq->pkts_pending)));
 
 		if (pkts_available == 0)
 			break;
@@ -891,6 +895,10 @@ octeon_process_droq_poll_cmd(struct octeon_device *oct, u32 q_no, int cmd,
 			lio_enable_irq(oct->droq[q_no], oct->instr_queue[q_no]);
 		}
 		break;
+
+		case OCTEON_CN23XX_VF_VID:
+			lio_enable_irq(oct->droq[q_no], oct->instr_queue[q_no]);
+		break;
 		}
 		return 0;
 	}
@@ -988,7 +996,8 @@ int octeon_create_droq(struct octeon_device *oct,
 	if (!droq)
 		droq = vmalloc(sizeof(*droq));
 	if (!droq)
-		goto create_droq_fail;
+		return -1;
+
 	memset(droq, 0, sizeof(struct octeon_droq));
 
 	/*Disable the pkt o/p for this Q  */
@@ -996,7 +1005,11 @@ int octeon_create_droq(struct octeon_device *oct,
 	oct->droq[q_no] = droq;
 
 	/* Initialize the Droq */
-	octeon_init_droq(oct, q_no, num_descs, desc_size, app_ctx);
+	if (octeon_init_droq(oct, q_no, num_descs, desc_size, app_ctx)) {
+		vfree(oct->droq[q_no]);
+		oct->droq[q_no] = NULL;
+		return -1;
+	}
 
 	oct->num_oqs++;
 
@@ -1009,8 +1022,4 @@ int octeon_create_droq(struct octeon_device *oct,
 	 * the same time.
 	 */
 	return 0;
-
-create_droq_fail:
-	octeon_delete_droq(oct, q_no);
-	return -ENOMEM;
 }
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
index 5be002d5dba4..e62074090681 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h
@@ -4,7 +4,7 @@
  * Contact: support@cavium.com
  *          Please include "LiquidIO" in the subject.
  *
- * Copyright (c) 2003-2015 Cavium, Inc.
+ * Copyright (c) 2003-2016 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -13,13 +13,8 @@
  * This file is distributed in the hope that it will be useful, but
  * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
- * NONINFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * This file may also be available under a different license from Cavium.
- * Contact Cavium, Inc. for more information
- **********************************************************************/
-
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 /*!  \file  octeon_droq.h
  *   \brief Implementation of Octeon Output queues. "Output" is with
  *   respect to the Octeon device on the NIC. From this driver's point of
@@ -81,7 +76,7 @@ struct octeon_skb_page_info {
  *  the Octeon device. Since the descriptor ring keeps physical (bus)
  *  addresses, this field is required for the driver to keep track of
  *  the virtual address pointers.
-*/
+ */
 struct octeon_recv_buffer {
 	/** Packet buffer, including metadata. */
 	void *buffer;
@@ -121,7 +116,6 @@ struct oct_droq_stats {
 	/** Num of Packets dropped due to receive path failures. */
 	u64 rx_dropped;
 
-	/** Num of vxlan packets received; */
 	u64 rx_vxlan;
 
 	/** Num of failures of recv_buffer_alloc() */
@@ -359,7 +353,7 @@ struct octeon_droq {
  * @param  q_no       - droq no. ranges from 0 - 3.
  * @param app_ctx     - pointer to application context
  * @return Success: 0    Failure: 1
-*/
+ */
 int octeon_init_droq(struct octeon_device *oct_dev,
 		     u32 q_no,
 		     u32 num_descs,
@@ -372,7 +366,7 @@ int octeon_init_droq(struct octeon_device *oct_dev,
  *  @param oct_dev - pointer to the octeon device structure
  *  @param q_no    - droq no. ranges from 0 - 3.
  *  @return:    Success: 0    Failure: 1
-*/
+ */
 int octeon_delete_droq(struct octeon_device *oct_dev, u32 q_no);
 
 /** Register a change in droq operations. The ops field has a pointer to a
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
index e4d426ba18dc..e04ca8f0b4a7 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h
@@ -4,7 +4,7 @@
  * Contact: support@cavium.com
  *          Please include "LiquidIO" in the subject.
  *
- * Copyright (c) 2003-2015 Cavium, Inc.
+ * Copyright (c) 2003-2016 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -13,13 +13,8 @@
  * This file is distributed in the hope that it will be useful, but
  * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
- * NONINFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * This file may also be available under a different license from Cavium.
- * Contact Cavium, Inc. for more information
- **********************************************************************/
-
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 /*!  \file  octeon_iq.h
  *   \brief Host Driver: Implementation of Octeon input queues. "Input" is
  *   with respect to the Octeon device on the NIC. From this driver's
@@ -69,7 +64,6 @@ struct oct_iq_stats {
 	u64 tx_vxlan; /* tunnel */
 	u64 tx_dmamap_fail;
 	u64 tx_restart;
-	/*u64 tx_timeout_count;*/
 };
 
 #define OCT_IQ_STATS_SIZE   (sizeof(struct oct_iq_stats))
@@ -78,7 +72,7 @@ struct oct_iq_stats {
  *  The input queue is used to post raw (instruction) mode data or packet
  *  data to Octeon device from the host. Each input queue (upto 4) for
  *  a Octeon device has one such structure to represent it.
-*/
+ */
 struct octeon_instr_queue {
 	struct octeon_device *oct_dev;
 
@@ -118,8 +112,8 @@ struct octeon_instr_queue {
 	u32 octeon_read_index;
 
 	/** This index aids in finding the window in the queue where Octeon
-	  * has read the commands.
-	  */
+	 *  has read the commands.
+	 */
 	u32 flush_index;
 
 	/** This field keeps track of the instructions pending in this queue. */
@@ -150,8 +144,8 @@ struct octeon_instr_queue {
 	u64 last_db_time;
 
 	/** The doorbell timeout. If the doorbell was not rung for this time and
-	  * fill_cnt is non-zero, ring the doorbell again.
-	  */
+	 * fill_cnt is non-zero, ring the doorbell again.
+	 */
 	u32 db_timeout;
 
 	/** Statistics for this input queue. */
@@ -309,6 +303,9 @@ struct octeon_sc_buffer_pool {
 	atomic_t alloc_buf_count;
 };
 
+#define INCR_INSTRQUEUE_PKT_COUNT(octeon_dev_ptr, iq_no, field, count)  \
+		(((octeon_dev_ptr)->instr_queue[iq_no]->stats.field) += count)
+
 int octeon_setup_sc_buffer_pool(struct octeon_device *oct);
 int octeon_free_sc_buffer_pool(struct octeon_device *oct);
 struct octeon_soft_command *
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c
new file mode 100644
index 000000000000..73696b427f06
--- /dev/null
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.c
@@ -0,0 +1,318 @@
+/**********************************************************************
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include "liquidio_common.h"
+#include "octeon_droq.h"
+#include "octeon_iq.h"
+#include "response_manager.h"
+#include "octeon_device.h"
+#include "octeon_main.h"
+#include "octeon_mailbox.h"
+
+/**
+ * octeon_mbox_read:
+ * @oct: Pointer mailbox
+ *
+ * Reads the 8-bytes of data from the mbox register
+ * Writes back the acknowldgement inidcating completion of read
+ */
+int octeon_mbox_read(struct octeon_mbox *mbox)
+{
+	union octeon_mbox_message msg;
+	int ret = 0;
+
+	spin_lock(&mbox->lock);
+
+	msg.u64 = readq(mbox->mbox_read_reg);
+
+	if ((msg.u64 == OCTEON_PFVFACK) || (msg.u64 == OCTEON_PFVFSIG)) {
+		spin_unlock(&mbox->lock);
+		return 0;
+	}
+
+	if (mbox->state & OCTEON_MBOX_STATE_REQUEST_RECEIVING) {
+		mbox->mbox_req.data[mbox->mbox_req.recv_len - 1] = msg.u64;
+		mbox->mbox_req.recv_len++;
+	} else {
+		if (mbox->state & OCTEON_MBOX_STATE_RESPONSE_RECEIVING) {
+			mbox->mbox_resp.data[mbox->mbox_resp.recv_len - 1] =
+				msg.u64;
+			mbox->mbox_resp.recv_len++;
+		} else {
+			if ((mbox->state & OCTEON_MBOX_STATE_IDLE) &&
+			    (msg.s.type == OCTEON_MBOX_REQUEST)) {
+				mbox->state &= ~OCTEON_MBOX_STATE_IDLE;
+				mbox->state |=
+				    OCTEON_MBOX_STATE_REQUEST_RECEIVING;
+				mbox->mbox_req.msg.u64 = msg.u64;
+				mbox->mbox_req.q_no = mbox->q_no;
+				mbox->mbox_req.recv_len = 1;
+			} else {
+				if ((mbox->state &
+				     OCTEON_MBOX_STATE_RESPONSE_PENDING) &&
+				    (msg.s.type == OCTEON_MBOX_RESPONSE)) {
+					mbox->state &=
+					    ~OCTEON_MBOX_STATE_RESPONSE_PENDING;
+					mbox->state |=
+					    OCTEON_MBOX_STATE_RESPONSE_RECEIVING
+					    ;
+					mbox->mbox_resp.msg.u64 = msg.u64;
+					mbox->mbox_resp.q_no = mbox->q_no;
+					mbox->mbox_resp.recv_len = 1;
+				} else {
+					writeq(OCTEON_PFVFERR,
+					       mbox->mbox_read_reg);
+					mbox->state |= OCTEON_MBOX_STATE_ERROR;
+					spin_unlock(&mbox->lock);
+					return 1;
+				}
+			}
+		}
+	}
+
+	if (mbox->state & OCTEON_MBOX_STATE_REQUEST_RECEIVING) {
+		if (mbox->mbox_req.recv_len < msg.s.len) {
+			ret = 0;
+		} else {
+			mbox->state &= ~OCTEON_MBOX_STATE_REQUEST_RECEIVING;
+			mbox->state |= OCTEON_MBOX_STATE_REQUEST_RECEIVED;
+			ret = 1;
+		}
+	} else {
+		if (mbox->state & OCTEON_MBOX_STATE_RESPONSE_RECEIVING) {
+			if (mbox->mbox_resp.recv_len < msg.s.len) {
+				ret = 0;
+			} else {
+				mbox->state &=
+				    ~OCTEON_MBOX_STATE_RESPONSE_RECEIVING;
+				mbox->state |=
+				    OCTEON_MBOX_STATE_RESPONSE_RECEIVED;
+				ret = 1;
+			}
+		} else {
+			WARN_ON(1);
+		}
+	}
+
+	writeq(OCTEON_PFVFACK, mbox->mbox_read_reg);
+
+	spin_unlock(&mbox->lock);
+
+	return ret;
+}
+
+/**
+ * octeon_mbox_write:
+ * @oct: Pointer Octeon Device
+ * @mbox_cmd: Cmd to send to mailbox.
+ *
+ * Populates the queue specific mbox structure
+ * with cmd information.
+ * Write the cmd to mbox register
+ */
+int octeon_mbox_write(struct octeon_device *oct,
+		      struct octeon_mbox_cmd *mbox_cmd)
+{
+	struct octeon_mbox *mbox = oct->mbox[mbox_cmd->q_no];
+	u32 count, i, ret = OCTEON_MBOX_STATUS_SUCCESS;
+	unsigned long flags;
+
+	spin_lock_irqsave(&mbox->lock, flags);
+
+	if ((mbox_cmd->msg.s.type == OCTEON_MBOX_RESPONSE) &&
+	    !(mbox->state & OCTEON_MBOX_STATE_REQUEST_RECEIVED)) {
+		spin_unlock_irqrestore(&mbox->lock, flags);
+		return OCTEON_MBOX_STATUS_FAILED;
+	}
+
+	if ((mbox_cmd->msg.s.type == OCTEON_MBOX_REQUEST) &&
+	    !(mbox->state & OCTEON_MBOX_STATE_IDLE)) {
+		spin_unlock_irqrestore(&mbox->lock, flags);
+		return OCTEON_MBOX_STATUS_BUSY;
+	}
+
+	if (mbox_cmd->msg.s.type == OCTEON_MBOX_REQUEST) {
+		memcpy(&mbox->mbox_resp, mbox_cmd,
+		       sizeof(struct octeon_mbox_cmd));
+		mbox->state = OCTEON_MBOX_STATE_RESPONSE_PENDING;
+	}
+
+	spin_unlock_irqrestore(&mbox->lock, flags);
+
+	count = 0;
+
+	while (readq(mbox->mbox_write_reg) != OCTEON_PFVFSIG) {
+		schedule_timeout_uninterruptible(LIO_MBOX_WRITE_WAIT_TIME);
+		if (count++ == LIO_MBOX_WRITE_WAIT_CNT) {
+			ret = OCTEON_MBOX_STATUS_FAILED;
+			break;
+		}
+	}
+
+	if (ret == OCTEON_MBOX_STATUS_SUCCESS) {
+		writeq(mbox_cmd->msg.u64, mbox->mbox_write_reg);
+		for (i = 0; i < (u32)(mbox_cmd->msg.s.len - 1); i++) {
+			count = 0;
+			while (readq(mbox->mbox_write_reg) !=
+			       OCTEON_PFVFACK) {
+				schedule_timeout_uninterruptible(10);
+				if (count++ == LIO_MBOX_WRITE_WAIT_CNT) {
+					ret = OCTEON_MBOX_STATUS_FAILED;
+					break;
+				}
+			}
+			writeq(mbox_cmd->data[i], mbox->mbox_write_reg);
+		}
+	}
+
+	spin_lock_irqsave(&mbox->lock, flags);
+	if (mbox_cmd->msg.s.type == OCTEON_MBOX_RESPONSE) {
+		mbox->state = OCTEON_MBOX_STATE_IDLE;
+		writeq(OCTEON_PFVFSIG, mbox->mbox_read_reg);
+	} else {
+		if ((!mbox_cmd->msg.s.resp_needed) ||
+		    (ret == OCTEON_MBOX_STATUS_FAILED)) {
+			mbox->state &= ~OCTEON_MBOX_STATE_RESPONSE_PENDING;
+			if (!(mbox->state &
+			      (OCTEON_MBOX_STATE_REQUEST_RECEIVING |
+			       OCTEON_MBOX_STATE_REQUEST_RECEIVED)))
+				mbox->state = OCTEON_MBOX_STATE_IDLE;
+		}
+	}
+	spin_unlock_irqrestore(&mbox->lock, flags);
+
+	return ret;
+}
+
+/**
+ * octeon_mbox_process_cmd:
+ * @mbox: Pointer mailbox
+ * @mbox_cmd: Pointer to command received
+ *
+ * Process the cmd received in mbox
+ */
+static int octeon_mbox_process_cmd(struct octeon_mbox *mbox,
+				   struct octeon_mbox_cmd *mbox_cmd)
+{
+	struct octeon_device *oct = mbox->oct_dev;
+
+	switch (mbox_cmd->msg.s.cmd) {
+	case OCTEON_VF_ACTIVE:
+		dev_dbg(&oct->pci_dev->dev, "got vfactive sending data back\n");
+		mbox_cmd->msg.s.type = OCTEON_MBOX_RESPONSE;
+		mbox_cmd->msg.s.resp_needed = 1;
+		mbox_cmd->msg.s.len = 2;
+		mbox_cmd->data[0] = 0; /* VF version is in mbox_cmd->data[0] */
+		((struct lio_version *)&mbox_cmd->data[0])->major =
+			LIQUIDIO_BASE_MAJOR_VERSION;
+		((struct lio_version *)&mbox_cmd->data[0])->minor =
+			LIQUIDIO_BASE_MINOR_VERSION;
+		((struct lio_version *)&mbox_cmd->data[0])->micro =
+			LIQUIDIO_BASE_MICRO_VERSION;
+		memcpy(mbox_cmd->msg.s.params, (uint8_t *)&oct->pfvf_hsword, 6);
+		/* Sending core cofig info to the corresponding active VF.*/
+		octeon_mbox_write(oct, mbox_cmd);
+		break;
+
+	case OCTEON_VF_FLR_REQUEST:
+		dev_info(&oct->pci_dev->dev,
+			 "got a request for FLR from VF that owns DPI ring %u\n",
+			 mbox->q_no);
+		pcie_capability_set_word(
+			oct->sriov_info.dpiring_to_vfpcidev_lut[mbox->q_no],
+			PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_BCR_FLR);
+		break;
+
+	case OCTEON_PF_CHANGED_VF_MACADDR:
+		if (OCTEON_CN23XX_VF(oct))
+			octeon_pf_changed_vf_macaddr(oct,
+						     mbox_cmd->msg.s.params);
+		break;
+
+	default:
+		break;
+	}
+	return 0;
+}
+
+/**
+ *octeon_mbox_process_message:
+ *
+ * Process the received mbox message.
+ */
+int octeon_mbox_process_message(struct octeon_mbox *mbox)
+{
+	struct octeon_mbox_cmd mbox_cmd;
+	unsigned long flags;
+
+	spin_lock_irqsave(&mbox->lock, flags);
+
+	if (mbox->state & OCTEON_MBOX_STATE_ERROR) {
+		if (mbox->state & (OCTEON_MBOX_STATE_RESPONSE_PENDING |
+				   OCTEON_MBOX_STATE_RESPONSE_RECEIVING)) {
+			memcpy(&mbox_cmd, &mbox->mbox_resp,
+			       sizeof(struct octeon_mbox_cmd));
+			mbox->state = OCTEON_MBOX_STATE_IDLE;
+			writeq(OCTEON_PFVFSIG, mbox->mbox_read_reg);
+			spin_unlock_irqrestore(&mbox->lock, flags);
+			mbox_cmd.recv_status = 1;
+			if (mbox_cmd.fn)
+				mbox_cmd.fn(mbox->oct_dev, &mbox_cmd,
+					    mbox_cmd.fn_arg);
+			return 0;
+		}
+
+		mbox->state = OCTEON_MBOX_STATE_IDLE;
+		writeq(OCTEON_PFVFSIG, mbox->mbox_read_reg);
+		spin_unlock_irqrestore(&mbox->lock, flags);
+		return 0;
+	}
+
+	if (mbox->state & OCTEON_MBOX_STATE_RESPONSE_RECEIVED) {
+		memcpy(&mbox_cmd, &mbox->mbox_resp,
+		       sizeof(struct octeon_mbox_cmd));
+		mbox->state = OCTEON_MBOX_STATE_IDLE;
+		writeq(OCTEON_PFVFSIG, mbox->mbox_read_reg);
+		spin_unlock_irqrestore(&mbox->lock, flags);
+		mbox_cmd.recv_status = 0;
+		if (mbox_cmd.fn)
+			mbox_cmd.fn(mbox->oct_dev, &mbox_cmd, mbox_cmd.fn_arg);
+		return 0;
+	}
+
+	if (mbox->state & OCTEON_MBOX_STATE_REQUEST_RECEIVED) {
+		memcpy(&mbox_cmd, &mbox->mbox_req,
+		       sizeof(struct octeon_mbox_cmd));
+		if (!mbox_cmd.msg.s.resp_needed) {
+			mbox->state &= ~OCTEON_MBOX_STATE_REQUEST_RECEIVED;
+			if (!(mbox->state &
+			      OCTEON_MBOX_STATE_RESPONSE_PENDING))
+				mbox->state = OCTEON_MBOX_STATE_IDLE;
+			writeq(OCTEON_PFVFSIG, mbox->mbox_read_reg);
+		}
+
+		spin_unlock_irqrestore(&mbox->lock, flags);
+		octeon_mbox_process_cmd(mbox, &mbox_cmd);
+		return 0;
+	}
+
+	WARN_ON(1);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.h b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.h
new file mode 100644
index 000000000000..fe60a3e6247b
--- /dev/null
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_mailbox.h
@@ -0,0 +1,115 @@
+/**********************************************************************
+ * Author: Cavium, Inc.
+ *
+ * Contact: support@cavium.com
+ *          Please include "LiquidIO" in the subject.
+ *
+ * Copyright (c) 2003-2016 Cavium, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, Version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
+#ifndef __MAILBOX_H__
+#define __MAILBOX_H__
+
+/* Macros for Mail Box Communication */
+
+#define OCTEON_MBOX_DATA_MAX	32
+
+#define OCTEON_VF_ACTIVE		0x1
+#define OCTEON_VF_FLR_REQUEST		0x2
+#define OCTEON_PF_CHANGED_VF_MACADDR	0x4
+
+/*Macro for Read acknowldgement*/
+#define OCTEON_PFVFACK			0xffffffffffffffff
+#define OCTEON_PFVFSIG			0x1122334455667788
+#define OCTEON_PFVFERR			0xDEADDEADDEADDEAD
+
+#define LIO_MBOX_WRITE_WAIT_CNT          1000
+#define LIO_MBOX_WRITE_WAIT_TIME           10
+
+enum octeon_mbox_cmd_status {
+	OCTEON_MBOX_STATUS_SUCCESS = 0,
+	OCTEON_MBOX_STATUS_FAILED = 1,
+	OCTEON_MBOX_STATUS_BUSY = 2
+};
+
+enum octeon_mbox_message_type {
+	OCTEON_MBOX_REQUEST = 0,
+	OCTEON_MBOX_RESPONSE = 1
+};
+
+union octeon_mbox_message {
+	u64 u64;
+	struct {
+		u16 type : 1;
+		u16 resp_needed : 1;
+		u16 cmd : 6;
+		u16 len : 8;
+		u8 params[6];
+	} s;
+};
+
+typedef void (*octeon_mbox_callback_t)(void *, void *, void *);
+
+struct octeon_mbox_cmd {
+	union octeon_mbox_message msg;
+	u64 data[OCTEON_MBOX_DATA_MAX];
+	u32 q_no;
+	u32 recv_len;
+	u32 recv_status;
+	octeon_mbox_callback_t fn;
+	void *fn_arg;
+};
+
+enum octeon_mbox_state {
+	OCTEON_MBOX_STATE_IDLE = 1,
+	OCTEON_MBOX_STATE_REQUEST_RECEIVING = 2,
+	OCTEON_MBOX_STATE_REQUEST_RECEIVED = 4,
+	OCTEON_MBOX_STATE_RESPONSE_PENDING = 8,
+	OCTEON_MBOX_STATE_RESPONSE_RECEIVING = 16,
+	OCTEON_MBOX_STATE_RESPONSE_RECEIVED = 16,
+	OCTEON_MBOX_STATE_ERROR = 32
+};
+
+struct octeon_mbox {
+	/** A spinlock to protect access to this q_mbox. */
+	spinlock_t lock;
+
+	struct octeon_device *oct_dev;
+
+	u32 q_no;
+
+	enum octeon_mbox_state state;
+
+	struct cavium_wk mbox_poll_wk;
+
+	/** SLI_MAC_PF_MBOX_INT for PF, SLI_PKT_MBOX_INT for VF. */
+	void *mbox_int_reg;
+
+	/** SLI_PKT_PF_VF_MBOX_SIG(0) for PF, SLI_PKT_PF_VF_MBOX_SIG(1) for VF.
+	 */
+	void *mbox_write_reg;
+
+	/** SLI_PKT_PF_VF_MBOX_SIG(1) for PF, SLI_PKT_PF_VF_MBOX_SIG(0) for VF.
+	 */
+	void *mbox_read_reg;
+
+	struct octeon_mbox_cmd mbox_req;
+
+	struct octeon_mbox_cmd mbox_resp;
+
+};
+
+int octeon_mbox_read(struct octeon_mbox *mbox);
+int octeon_mbox_write(struct octeon_device *oct,
+		      struct octeon_mbox_cmd *mbox_cmd);
+int octeon_mbox_process_message(struct octeon_mbox *mbox);
+
+#endif
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_main.h b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
index 366298f7bcb2..8cd389148166 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_main.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_main.h
@@ -4,7 +4,7 @@
  * Contact: support@cavium.com
  *          Please include "LiquidIO" in the subject.
  *
- * Copyright (c) 2003-2015 Cavium, Inc.
+ * Copyright (c) 2003-2016 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -13,13 +13,8 @@
  * This file is distributed in the hope that it will be useful, but
  * AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty
  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
- * NONINFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * This file may also be available under a different license from Cavium.
- * Contact Cavium, Inc. for more information
- **********************************************************************/
-
+ * NONINFRINGEMENT.  See the GNU General Public License for more details.
+ ***********************************************************************/
 /*! \file octeon_main.h
  *  \brief Host Driver: This file is included by all host driver source files
  *  to include common definitions.
@@ -66,7 +61,7 @@ void octeon_update_tx_completion_counters(void *buf, int reqtype,
 					  unsigned int *bytes_compl);
 void octeon_report_tx_completion_to_bql(void *txq, unsigned int pkts_compl,
 					unsigned int bytes_compl);
-
+void octeon_pf_changed_vf_macaddr(struct octeon_device *oct, u8 *mac);
 /** Swap 8B blocks */
 static inline void octeon_swap_8B_data(u64 *data, u32 blocks)
 {
@@ -78,10 +73,10 @@ static inline void octeon_swap_8B_data(u64 *data, u32 blocks)
 }
 
 /**
-  * \brief unmaps a PCI BAR
-  * @param oct Pointer to Octeon device
-  * @param baridx bar index
-  */
+ * \brief unmaps a PCI BAR
+ * @param oct Pointer to Octeon device
+ * @param baridx bar index
+ */
 static inline void octeon_unmap_pci_barx(struct octeon_device *oct, int baridx)
 {
 	dev_dbg(&oct->pci_dev->dev, "Freeing PCI mapped regions for Bar%d\n",
@@ -116,7 +111,7 @@ static inline int octeon_map_pci_barx(struct octeon_device *oct,
 
 	mapped_len = oct->mmio[baridx].len;
 	if (!mapped_len)
-		return 1;
+		goto err_release_region;
 
 	if (max_map_len && (mapped_len > max_map_len))
 		mapped_len = max_map_len;
@@ -132,11 +127,15 @@ static inline int octeon_map_pci_barx(struct octeon_device *oct,
 	if (!oct->mmio[baridx].hw_addr) {
 		dev_err(&oct->pci_dev->dev, "error ioremap for bar %d\n",
 			baridx);
-		return 1;
+		goto err_release_region;
 	}
 	oct->mmio[baridx].done = 1;
 
 	return 0;
+
+err_release_region:
+	pci_release_region(oct->pci_dev, baridx * 2);
+	return 1;
 }
 
 static inline void *
@@ -203,24 +202,6 @@ out:
 	return errno;
 }
 
-static inline void
-sleep_atomic_cond(wait_queue_head_t *waitq, atomic_t *pcond)
-{
-	wait_queue_t we;
-
-	init_waitqueue_entry(&we, current);
-	add_wait_queue(waitq, &we);
-	while (!atomic_read(pcond)) {
-		set_current_state(TASK_INTERRUPTIBLE);
-		if (signal_pending(current))
-			goto out;
-		schedule();
-	}
-out:
-	set_current_state(TASK_RUNNING);
-	remove_wait_queue(waitq, &we);
-}
-
 /* Gives up the CPU for a timeout period.
  * Check that the condition is not true before we go to sleep for a
  * timeout period.
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c b/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c
index 0dc081a99b30..13a18c9a7a51 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c
@@ -4,7 +4,7 @@
  * Contact: support@cavium.com
  *          Please include "LiquidIO" in the subject.
  *
- * Copyright (c) 2003-2015 Cavium, Inc.
+ * Copyright (c) 2003-2016 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -15,9 +15,6 @@
  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
  * NONINFRINGEMENT.  See the GNU General Public License for more
  * details.
- *
- * This file may also be available under a different license from Cavium.
- * Contact Cavium, Inc. for more information
  **********************************************************************/
 #include <linux/netdevice.h>
 #include "liquidio_common.h"
@@ -39,7 +36,7 @@ octeon_toggle_bar1_swapmode(struct octeon_device *oct, u32 idx)
 	oct->fn_list.bar1_idx_write(oct, idx, mask);
 }
 #else
-#define octeon_toggle_bar1_swapmode(oct, idx) (oct = oct)
+#define octeon_toggle_bar1_swapmode(oct, idx)
 #endif
 
 static void
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.h b/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.h
index 11b183377b44..bae2fdd89503 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.h
@@ -4,7 +4,7 @@
  * Contact: support@cavium.com
  *          Please include "LiquidIO" in the subject.
  *
- * Copyright (c) 2003-2015 Cavium, Inc.
+ * Copyright (c) 2003-2016 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -15,9 +15,6 @@
  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
  * NONINFRINGEMENT.  See the GNU General Public License for more
  * details.
- *
- * This file may also be available under a different license from Cavium.
- * Contact Cavium, Inc. for more information
  **********************************************************************/
 
 /*!  \file octeon_mem_ops.h
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
index e5d1debd05ad..6bb89419006e 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h
@@ -4,7 +4,7 @@
  * Contact: support@cavium.com
  *          Please include "LiquidIO" in the subject.
  *
- * Copyright (c) 2003-2015 Cavium, Inc.
+ * Copyright (c) 2003-2016 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -15,9 +15,6 @@
  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
  * NONINFRINGEMENT.  See the GNU General Public License for more
  * details.
- *
- * This file may also be available under a different license from Cavium.
- * Contact Cavium, Inc. for more information
  **********************************************************************/
 
 /*!  \file  octeon_network.h
@@ -29,7 +26,7 @@
 #include <linux/ptp_clock_kernel.h>
 
 #define LIO_MAX_MTU_SIZE (OCTNET_MAX_FRM_SIZE - OCTNET_FRM_HEADER_SIZE)
-#define LIO_MIN_MTU_SIZE 68
+#define LIO_MIN_MTU_SIZE ETH_MIN_MTU
 
 struct oct_nic_stats_resp {
 	u64     rh;
@@ -126,12 +123,13 @@ struct lio {
 	/* work queue for  link status */
 	struct cavium_wq	link_status_wq;
 
+	int netdev_uc_count;
 };
 
 #define LIO_SIZE         (sizeof(struct lio))
 #define GET_LIO(netdev)  ((struct lio *)netdev_priv(netdev))
 
-#define CIU3_WDOG(c)                 (0x1010000020000ULL + (c << 3))
+#define CIU3_WDOG(c)                 (0x1010000020000ULL + ((c) << 3))
 #define CIU3_WDOG_MASK               12ULL
 #define LIO_MONITOR_WDOG_EXPIRE      1
 #define LIO_MONITOR_CORE_STUCK_MSGD  2
@@ -342,9 +340,9 @@ static inline void tx_buffer_free(void *buffer)
 }
 
 #define lio_dma_alloc(oct, size, dma_addr) \
-	dma_alloc_coherent(&oct->pci_dev->dev, size, dma_addr, GFP_KERNEL)
+	dma_alloc_coherent(&(oct)->pci_dev->dev, size, dma_addr, GFP_KERNEL)
 #define lio_dma_free(oct, size, virt_addr, dma_addr) \
-	dma_free_coherent(&oct->pci_dev->dev, size, virt_addr, dma_addr)
+	dma_free_coherent(&(oct)->pci_dev->dev, size, virt_addr, dma_addr)
 
 static inline
 void *get_rbd(struct sk_buff *skb)
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c
index 40ac1fe88956..c3d6a8228362 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c
@@ -4,7 +4,7 @@
  * Contact: support@cavium.com
  *          Please include "LiquidIO" in the subject.
  *
- * Copyright (c) 2003-2015 Cavium, Inc.
+ * Copyright (c) 2003-2016 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -15,9 +15,6 @@
  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
  * NONINFRINGEMENT.  See the GNU General Public License for more
  * details.
- *
- * This file may also be available under a different license from Cavium.
- * Contact Cavium, Inc. for more information
  **********************************************************************/
 #include <linux/pci.h>
 #include <linux/netdevice.h>
diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_nic.h b/drivers/net/ethernet/cavium/liquidio/octeon_nic.h
index 4b8da67b995f..0c7a5c9b2932 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_nic.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_nic.h
@@ -4,7 +4,7 @@
  * Contact: support@cavium.com
  *          Please include "LiquidIO" in the subject.
  *
- * Copyright (c) 2003-2015 Cavium, Inc.
+ * Copyright (c) 2003-2016 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -15,9 +15,6 @@
  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
  * NONINFRINGEMENT.  See the GNU General Public License for more
  * details.
- *
- * This file may also be available under a different license from Cavium.
- * Contact Cavium, Inc. for more information
  **********************************************************************/
 
 /*!  \file octeon_nic.h
@@ -67,7 +64,7 @@ struct octnic_ctrl_pkt {
 	octnic_ctrl_pkt_cb_fn_t cb_fn;
 };
 
-#define MAX_UDD_SIZE(nctrl) (sizeof(nctrl->udd))
+#define MAX_UDD_SIZE(nctrl) (sizeof((nctrl)->udd))
 
 /** Structure of data information passed by the NIC module to the OSI
  * layer when forwarding data to Octeon device software.
diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c
index 90866bb50033..3ce66759e80a 100644
--- a/drivers/net/ethernet/cavium/liquidio/request_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c
@@ -4,7 +4,7 @@
  * Contact: support@cavium.com
  *          Please include "LiquidIO" in the subject.
  *
- * Copyright (c) 2003-2015 Cavium, Inc.
+ * Copyright (c) 2003-2016 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -15,9 +15,6 @@
  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
  * NONINFRINGEMENT.  See the GNU General Public License for more
  * details.
- *
- * This file may also be available under a different license from Cavium.
- * Contact Cavium, Inc. for more information
  **********************************************************************/
 #include <linux/pci.h>
 #include <linux/netdevice.h>
@@ -31,9 +28,7 @@
 #include "octeon_network.h"
 #include "cn66xx_device.h"
 #include "cn23xx_pf_device.h"
-
-#define INCR_INSTRQUEUE_PKT_COUNT(octeon_dev_ptr, iq_no, field, count)  \
-	(octeon_dev_ptr->instr_queue[iq_no]->stats.field += count)
+#include "cn23xx_vf_device.h"
 
 struct iq_post_status {
 	int status;
@@ -71,9 +66,12 @@ int octeon_init_instr_queue(struct octeon_device *oct,
 	int numa_node = cpu_to_node(iq_no % num_online_cpus());
 
 	if (OCTEON_CN6XXX(oct))
-		conf = &(CFG_GET_IQ_CFG(CHIP_FIELD(oct, cn6xxx, conf)));
+		conf = &(CFG_GET_IQ_CFG(CHIP_CONF(oct, cn6xxx)));
 	else if (OCTEON_CN23XX_PF(oct))
-		conf = &(CFG_GET_IQ_CFG(CHIP_FIELD(oct, cn23xx_pf, conf)));
+		conf = &(CFG_GET_IQ_CFG(CHIP_CONF(oct, cn23xx_pf)));
+	else if (OCTEON_CN23XX_VF(oct))
+		conf = &(CFG_GET_IQ_CFG(CHIP_CONF(oct, cn23xx_vf)));
+
 	if (!conf) {
 		dev_err(&oct->pci_dev->dev, "Unsupported Chip %x\n",
 			oct->chip_id);
@@ -145,7 +143,7 @@ int octeon_init_instr_queue(struct octeon_device *oct,
 
 	spin_lock_init(&iq->iq_flush_running_lock);
 
-	oct->io_qmask.iq |= (1ULL << iq_no);
+	oct->io_qmask.iq |= BIT_ULL(iq_no);
 
 	/* Set the 32B/64B mode for each input queue */
 	oct->io_qmask.iq64B |= ((conf->instr_type == 64) << iq_no);
@@ -157,6 +155,8 @@ int octeon_init_instr_queue(struct octeon_device *oct,
 						     WQ_MEM_RECLAIM,
 						     0);
 	if (!oct->check_db_wq[iq_no].wq) {
+		vfree(iq->request_list);
+		iq->request_list = NULL;
 		lio_dma_free(oct, q_size, iq->base_addr, iq->base_addr_dma);
 		dev_err(&oct->pci_dev->dev, "check db wq create failed for iq %d\n",
 			iq_no);
@@ -183,10 +183,13 @@ int octeon_delete_instr_queue(struct octeon_device *oct, u32 iq_no)
 
 	if (OCTEON_CN6XXX(oct))
 		desc_size =
-		    CFG_GET_IQ_INSTR_TYPE(CHIP_FIELD(oct, cn6xxx, conf));
+		    CFG_GET_IQ_INSTR_TYPE(CHIP_CONF(oct, cn6xxx));
 	else if (OCTEON_CN23XX_PF(oct))
 		desc_size =
-		    CFG_GET_IQ_INSTR_TYPE(CHIP_FIELD(oct, cn23xx_pf, conf));
+		    CFG_GET_IQ_INSTR_TYPE(CHIP_CONF(oct, cn23xx_pf));
+	else if (OCTEON_CN23XX_VF(oct))
+		desc_size =
+		    CFG_GET_IQ_INSTR_TYPE(CHIP_CONF(oct, cn23xx_vf));
 
 	vfree(iq->request_list);
 
@@ -239,7 +242,9 @@ int octeon_setup_iq(struct octeon_device *oct,
 	}
 
 	oct->num_iqs++;
-	oct->fn_list.enable_io_queues(oct);
+	if (oct->fn_list.enable_io_queues(oct))
+		return 1;
+
 	return 0;
 }
 
@@ -250,9 +255,8 @@ int lio_wait_for_instr_fetch(struct octeon_device *oct)
 	do {
 		instr_cnt = 0;
 
-		/*for (i = 0; i < oct->num_iqs; i++) {*/
 		for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) {
-			if (!(oct->io_qmask.iq & (1ULL << i)))
+			if (!(oct->io_qmask.iq & BIT_ULL(i)))
 				continue;
 			pending =
 			    atomic_read(&oct->
@@ -319,7 +323,8 @@ __post_command2(struct octeon_instr_queue *iq, u8 *cmd)
 
 	/* "index" is returned, host_write_index is modified. */
 	st.index = iq->host_write_index;
-	INCR_INDEX_BY1(iq->host_write_index, iq->max_count);
+	iq->host_write_index = incr_index(iq->host_write_index, 1,
+					  iq->max_count);
 	iq->fill_cnt++;
 
 	/* Flush the command into memory. We need to be sure the data is in
@@ -389,7 +394,7 @@ lio_process_iq_request_list(struct octeon_device *oct,
 		case REQTYPE_SOFT_COMMAND:
 			sc = buf;
 
-			if (OCTEON_CN23XX_PF(oct))
+			if (OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct))
 				irh = (struct octeon_instr_irh *)
 					&sc->cmd.cmd3.irh;
 			else
@@ -434,7 +439,7 @@ lio_process_iq_request_list(struct octeon_device *oct,
 
  skip_this:
 		inst_count++;
-		INCR_INDEX_BY1(old, iq->max_count);
+		old = incr_index(old, 1, iq->max_count);
 
 		if ((napi_budget) && (inst_count >= napi_budget))
 			break;
@@ -577,8 +582,6 @@ octeon_send_command(struct octeon_device *oct, u32 iq_no,
 	/* This is only done here to expedite packets being flushed
 	 * for cases where there are no IQ completion interrupts.
 	 */
-	/*if (iq->do_auto_flush)*/
-	/*	octeon_flush_iq(oct, iq, 2, 0);*/
 
 	return st.status;
 }
@@ -604,7 +607,7 @@ octeon_prepare_soft_command(struct octeon_device *oct,
 
 	oct_cfg = octeon_get_conf(oct);
 
-	if (OCTEON_CN23XX_PF(oct)) {
+	if (OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct)) {
 		ih3 = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
 
 		ih3->pkind = oct->instr_queue[sc->iq_no]->txpciq.s.pkind;
@@ -697,7 +700,7 @@ int octeon_send_soft_command(struct octeon_device *oct,
 	struct octeon_instr_irh *irh;
 	u32 len;
 
-	if (OCTEON_CN23XX_PF(oct)) {
+	if (OCTEON_CN23XX_PF(oct) || OCTEON_CN23XX_VF(oct)) {
 		ih3 =  (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3;
 		if (ih3->dlengsz) {
 			WARN_ON(!sc->dmadptr);
@@ -749,8 +752,10 @@ int octeon_setup_sc_buffer_pool(struct octeon_device *oct)
 			lio_dma_alloc(oct,
 				      SOFT_COMMAND_BUFFER_SIZE,
 					  (dma_addr_t *)&dma_addr);
-		if (!sc)
+		if (!sc) {
+			octeon_free_sc_buffer_pool(oct);
 			return 1;
+		}
 
 		sc->dma_addr = dma_addr;
 		sc->size = SOFT_COMMAND_BUFFER_SIZE;
diff --git a/drivers/net/ethernet/cavium/liquidio/response_manager.c b/drivers/net/ethernet/cavium/liquidio/response_manager.c
index be52178d8cb6..2fbaae96b505 100644
--- a/drivers/net/ethernet/cavium/liquidio/response_manager.c
+++ b/drivers/net/ethernet/cavium/liquidio/response_manager.c
@@ -4,7 +4,7 @@
  * Contact: support@cavium.com
  *          Please include "LiquidIO" in the subject.
  *
- * Copyright (c) 2003-2015 Cavium, Inc.
+ * Copyright (c) 2003-2016 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -15,9 +15,6 @@
  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
  * NONINFRINGEMENT.  See the GNU General Public License for more
  * details.
- *
- * This file may also be available under a different license from Cavium.
- * Contact Cavium, Inc. for more information
  **********************************************************************/
 #include <linux/pci.h>
 #include <linux/netdevice.h>
@@ -81,17 +78,14 @@ int lio_process_ordered_list(struct octeon_device *octeon_dev,
 		spin_lock_bh(&ordered_sc_list->lock);
 
 		if (ordered_sc_list->head.next == &ordered_sc_list->head) {
-			/* ordered_sc_list is empty; there is
-			 * nothing to process
-			 */
-			spin_unlock_bh
-			    (&ordered_sc_list->lock);
+			spin_unlock_bh(&ordered_sc_list->lock);
 			return 1;
 		}
 
 		sc = (struct octeon_soft_command *)ordered_sc_list->
 		    head.next;
-		if (OCTEON_CN23XX_PF(octeon_dev)) {
+		if (OCTEON_CN23XX_PF(octeon_dev) ||
+		    OCTEON_CN23XX_VF(octeon_dev)) {
 			rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp;
 			rptr = sc->cmd.cmd3.rptr;
 		} else {
diff --git a/drivers/net/ethernet/cavium/liquidio/response_manager.h b/drivers/net/ethernet/cavium/liquidio/response_manager.h
index 7a48752dcb10..cbb2d84e8932 100644
--- a/drivers/net/ethernet/cavium/liquidio/response_manager.h
+++ b/drivers/net/ethernet/cavium/liquidio/response_manager.h
@@ -4,7 +4,7 @@
  * Contact: support@cavium.com
  *          Please include "LiquidIO" in the subject.
  *
- * Copyright (c) 2003-2015 Cavium, Inc.
+ * Copyright (c) 2003-2016 Cavium, Inc.
  *
  * This file is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License, Version 2, as
@@ -15,9 +15,6 @@
  * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or
  * NONINFRINGEMENT.  See the GNU General Public License for more
  * details.
- *
- * This file may also be available under a different license from Cavium.
- * Contact Cavium, Inc. for more information
  **********************************************************************/
 
 /*! \file response_manager.h
@@ -85,7 +82,6 @@ enum {
 /**  A value of 0x00000000 indicates no error i.e. success */
 #define DRIVER_ERROR_NONE                 0x00000000
 
-/**  (Major number: 0x0000; Minor Number: 0x0001) */
 #define DRIVER_ERROR_REQ_PENDING          0x00000001
 #define DRIVER_ERROR_REQ_TIMEOUT          0x00000003
 #define DRIVER_ERROR_REQ_EINTR            0x00000004
diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
index 4ab404f45b21..16e12c45904b 100644
--- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
+++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
@@ -645,16 +645,6 @@ static int octeon_mgmt_change_mtu(struct net_device *netdev, int new_mtu)
 	struct octeon_mgmt *p = netdev_priv(netdev);
 	int size_without_fcs = new_mtu + OCTEON_MGMT_RX_HEADROOM;
 
-	/* Limit the MTU to make sure the ethernet packets are between
-	 * 64 bytes and 16383 bytes.
-	 */
-	if (size_without_fcs < 64 || size_without_fcs > 16383) {
-		dev_warn(p->dev, "MTU must be between %d and %d.\n",
-			 64 - OCTEON_MGMT_RX_HEADROOM,
-			 16383 - OCTEON_MGMT_RX_HEADROOM);
-		return -EINVAL;
-	}
-
 	netdev->mtu = new_mtu;
 
 	cvmx_write_csr(p->agl + AGL_GMX_RX_FRM_MAX, size_without_fcs);
@@ -1491,6 +1481,9 @@ static int octeon_mgmt_probe(struct platform_device *pdev)
 	netdev->netdev_ops = &octeon_mgmt_ops;
 	netdev->ethtool_ops = &octeon_mgmt_ethtool_ops;
 
+	netdev->min_mtu = 64 - OCTEON_MGMT_RX_HEADROOM;
+	netdev->max_mtu = 16383 - OCTEON_MGMT_RX_HEADROOM;
+
 	mac = of_get_mac_address(pdev->dev.of_node);
 
 	if (mac)
diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h
index 86bd93ce2ea3..e739c7153562 100644
--- a/drivers/net/ethernet/cavium/thunder/nic.h
+++ b/drivers/net/ethernet/cavium/thunder/nic.h
@@ -149,6 +149,12 @@ struct nicvf_rss_info {
 	u64 key[RSS_HASH_KEY_SIZE];
 } ____cacheline_aligned_in_smp;
 
+struct nicvf_pfc {
+	u8    autoneg;
+	u8    fc_rx;
+	u8    fc_tx;
+};
+
 enum rx_stats_reg_offset {
 	RX_OCTS = 0x0,
 	RX_UCAST = 0x1,
@@ -292,11 +298,13 @@ struct nicvf {
 	u8			node;
 	u8			cpi_alg;
 	bool			link_up;
+	u8			mac_type;
 	u8			duplex;
 	u32			speed;
 	bool			tns_mode;
 	bool			loopback_supported;
 	struct nicvf_rss_info	rss_info;
+	struct nicvf_pfc	pfc;
 	struct tasklet_struct	qs_err_task;
 	struct work_struct	reset_task;
 
@@ -357,6 +365,7 @@ struct nicvf {
 #define	NIC_MBOX_MSG_SNICVF_PTR		0x15	/* Send sqet nicvf ptr to PVF */
 #define	NIC_MBOX_MSG_LOOPBACK		0x16	/* Set interface in loopback */
 #define	NIC_MBOX_MSG_RESET_STAT_COUNTER 0x17	/* Reset statistics counters */
+#define	NIC_MBOX_MSG_PFC		0x18	/* Pause frame control */
 #define	NIC_MBOX_MSG_CFG_DONE		0xF0	/* VF configuration done */
 #define	NIC_MBOX_MSG_SHUTDOWN		0xF1	/* VF is being shutdown */
 
@@ -446,6 +455,7 @@ struct bgx_stats_msg {
 /* Physical interface link status */
 struct bgx_link_status {
 	u8    msg;
+	u8    mac_type;
 	u8    link_up;
 	u8    duplex;
 	u32   speed;
@@ -498,6 +508,14 @@ struct reset_stat_cfg {
 	u16   sq_stat_mask;
 };
 
+struct pfc {
+	u8    msg;
+	u8    get; /* Get or set PFC settings */
+	u8    autoneg;
+	u8    fc_rx;
+	u8    fc_tx;
+};
+
 /* 128 bit shared memory between PF and each VF */
 union nic_mbx {
 	struct { u8 msg; }	msg;
@@ -516,6 +534,7 @@ union nic_mbx {
 	struct nicvf_ptr	nicvf;
 	struct set_loopback	lbk;
 	struct reset_stat_cfg	reset_stat;
+	struct pfc		pfc;
 };
 
 #define NIC_NODE_ID_MASK	0x03
diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c
index 6677b96e1f3f..767234e2e8f9 100644
--- a/drivers/net/ethernet/cavium/thunder/nic_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nic_main.c
@@ -809,6 +809,15 @@ static int nic_config_loopback(struct nicpf *nic, struct set_loopback *lbk)
 
 	bgx_lmac_internal_loopback(nic->node, bgx_idx, lmac_idx, lbk->enable);
 
+	/* Enable moving average calculation.
+	 * Keep the LVL/AVG delay to HW enforced minimum so that, not too many
+	 * packets sneek in between average calculations.
+	 */
+	nic_reg_write(nic, NIC_PF_CQ_AVG_CFG,
+		      (BIT_ULL(20) | 0x2ull << 14 | 0x1));
+	nic_reg_write(nic, NIC_PF_RRM_AVG_CFG,
+		      (BIT_ULL(20) | 0x3ull << 14 | 0x1));
+
 	return 0;
 }
 
@@ -889,6 +898,30 @@ static void nic_enable_vf(struct nicpf *nic, int vf, bool enable)
 	bgx_lmac_rx_tx_enable(nic->node, bgx, lmac, enable);
 }
 
+static void nic_pause_frame(struct nicpf *nic, int vf, struct pfc *cfg)
+{
+	int bgx, lmac;
+	struct pfc pfc;
+	union nic_mbx mbx = {};
+
+	if (vf >= nic->num_vf_en)
+		return;
+	bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+	lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]);
+
+	if (cfg->get) {
+		bgx_lmac_get_pfc(nic->node, bgx, lmac, &pfc);
+		mbx.pfc.msg = NIC_MBOX_MSG_PFC;
+		mbx.pfc.autoneg = pfc.autoneg;
+		mbx.pfc.fc_rx = pfc.fc_rx;
+		mbx.pfc.fc_tx = pfc.fc_tx;
+		nic_send_msg_to_vf(nic, vf, &mbx);
+	} else {
+		bgx_lmac_set_pfc(nic->node, bgx, lmac, cfg);
+		nic_mbx_send_ack(nic, vf);
+	}
+}
+
 /* Interrupt handler to handle mailbox messages from VFs */
 static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
 {
@@ -1028,6 +1061,9 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf)
 	case NIC_MBOX_MSG_RESET_STAT_COUNTER:
 		ret = nic_reset_stat_counters(nic, vf, &mbx.reset_stat);
 		break;
+	case NIC_MBOX_MSG_PFC:
+		nic_pause_frame(nic, vf, &mbx.pfc);
+		goto unlock;
 	default:
 		dev_err(&nic->pdev->dev,
 			"Invalid msg from VF%d, msg 0x%x\n", vf, mbx.msg.msg);
@@ -1258,6 +1294,7 @@ static void nic_poll_for_link(struct work_struct *work)
 			mbx.link_status.link_up = link.link_up;
 			mbx.link_status.duplex = link.duplex;
 			mbx.link_status.speed = link.speed;
+			mbx.link_status.mac_type = link.mac_type;
 			nic_send_msg_to_vf(nic, vf, &mbx);
 		}
 	}
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
index 432bf6be57cb..2e74bbaa38e1 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_ethtool.c
@@ -116,29 +116,65 @@ static const unsigned int nicvf_n_hw_stats = ARRAY_SIZE(nicvf_hw_stats);
 static const unsigned int nicvf_n_drv_stats = ARRAY_SIZE(nicvf_drv_stats);
 static const unsigned int nicvf_n_queue_stats = ARRAY_SIZE(nicvf_queue_stats);
 
-static int nicvf_get_settings(struct net_device *netdev,
-			      struct ethtool_cmd *cmd)
+static int nicvf_get_link_ksettings(struct net_device *netdev,
+				    struct ethtool_link_ksettings *cmd)
 {
 	struct nicvf *nic = netdev_priv(netdev);
+	u32 supported, advertising;
 
-	cmd->supported = 0;
-	cmd->transceiver = XCVR_EXTERNAL;
+	supported = 0;
+	advertising = 0;
 
 	if (!nic->link_up) {
-		cmd->duplex = DUPLEX_UNKNOWN;
-		ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN);
+		cmd->base.duplex = DUPLEX_UNKNOWN;
+		cmd->base.speed = SPEED_UNKNOWN;
 		return 0;
 	}
 
-	if (nic->speed <= 1000) {
-		cmd->port = PORT_MII;
-		cmd->autoneg = AUTONEG_ENABLE;
-	} else {
-		cmd->port = PORT_FIBRE;
-		cmd->autoneg = AUTONEG_DISABLE;
+	switch (nic->speed) {
+	case SPEED_1000:
+		cmd->base.port = PORT_MII | PORT_TP;
+		cmd->base.autoneg = AUTONEG_ENABLE;
+		supported |= SUPPORTED_MII | SUPPORTED_TP;
+		supported |= SUPPORTED_1000baseT_Full |
+				  SUPPORTED_1000baseT_Half |
+				  SUPPORTED_100baseT_Full  |
+				  SUPPORTED_100baseT_Half  |
+				  SUPPORTED_10baseT_Full   |
+				  SUPPORTED_10baseT_Half;
+		supported |= SUPPORTED_Autoneg;
+		advertising |= ADVERTISED_1000baseT_Full |
+				    ADVERTISED_1000baseT_Half |
+				    ADVERTISED_100baseT_Full  |
+				    ADVERTISED_100baseT_Half  |
+				    ADVERTISED_10baseT_Full   |
+				    ADVERTISED_10baseT_Half;
+		break;
+	case SPEED_10000:
+		if (nic->mac_type == BGX_MODE_RXAUI) {
+			cmd->base.port = PORT_TP;
+			supported |= SUPPORTED_TP;
+		} else {
+			cmd->base.port = PORT_FIBRE;
+			supported |= SUPPORTED_FIBRE;
+		}
+		cmd->base.autoneg = AUTONEG_DISABLE;
+		supported |= SUPPORTED_10000baseT_Full;
+		break;
+	case SPEED_40000:
+		cmd->base.port = PORT_FIBRE;
+		cmd->base.autoneg = AUTONEG_DISABLE;
+		supported |= SUPPORTED_FIBRE;
+		supported |= SUPPORTED_40000baseCR4_Full;
+		break;
 	}
-	cmd->duplex = nic->duplex;
-	ethtool_cmd_speed_set(cmd, nic->speed);
+	cmd->base.duplex = nic->duplex;
+	cmd->base.speed = nic->speed;
+
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						supported);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+						advertising);
 
 	return 0;
 }
@@ -690,8 +726,56 @@ static int nicvf_set_channels(struct net_device *dev,
 	return err;
 }
 
+static void nicvf_get_pauseparam(struct net_device *dev,
+				 struct ethtool_pauseparam *pause)
+{
+	struct nicvf *nic = netdev_priv(dev);
+	union nic_mbx mbx = {};
+
+	/* Supported only for 10G/40G interfaces */
+	if ((nic->mac_type == BGX_MODE_SGMII) ||
+	    (nic->mac_type == BGX_MODE_QSGMII) ||
+	    (nic->mac_type == BGX_MODE_RGMII))
+		return;
+
+	mbx.pfc.msg = NIC_MBOX_MSG_PFC;
+	mbx.pfc.get = 1;
+	if (!nicvf_send_msg_to_pf(nic, &mbx)) {
+		pause->autoneg = nic->pfc.autoneg;
+		pause->rx_pause = nic->pfc.fc_rx;
+		pause->tx_pause = nic->pfc.fc_tx;
+	}
+}
+
+static int nicvf_set_pauseparam(struct net_device *dev,
+				struct ethtool_pauseparam *pause)
+{
+	struct nicvf *nic = netdev_priv(dev);
+	union nic_mbx mbx = {};
+
+	/* Supported only for 10G/40G interfaces */
+	if ((nic->mac_type == BGX_MODE_SGMII) ||
+	    (nic->mac_type == BGX_MODE_QSGMII) ||
+	    (nic->mac_type == BGX_MODE_RGMII))
+		return -EOPNOTSUPP;
+
+	if (pause->autoneg)
+		return -EOPNOTSUPP;
+
+	mbx.pfc.msg = NIC_MBOX_MSG_PFC;
+	mbx.pfc.get = 0;
+	mbx.pfc.fc_rx = pause->rx_pause;
+	mbx.pfc.fc_tx = pause->tx_pause;
+	if (nicvf_send_msg_to_pf(nic, &mbx))
+		return -EAGAIN;
+
+	nic->pfc.fc_rx = pause->rx_pause;
+	nic->pfc.fc_tx = pause->tx_pause;
+
+	return 0;
+}
+
 static const struct ethtool_ops nicvf_ethtool_ops = {
-	.get_settings		= nicvf_get_settings,
 	.get_link		= nicvf_get_link,
 	.get_drvinfo		= nicvf_get_drvinfo,
 	.get_msglevel		= nicvf_get_msglevel,
@@ -711,7 +795,10 @@ static const struct ethtool_ops nicvf_ethtool_ops = {
 	.set_rxfh		= nicvf_set_rxfh,
 	.get_channels		= nicvf_get_channels,
 	.set_channels		= nicvf_set_channels,
+	.get_pauseparam         = nicvf_get_pauseparam,
+	.set_pauseparam         = nicvf_set_pauseparam,
 	.get_ts_info		= ethtool_op_get_ts_info,
+	.get_link_ksettings	= nicvf_get_link_ksettings,
 };
 
 void nicvf_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index 8a37012c9c89..2006f58b14b1 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -221,6 +221,7 @@ static void  nicvf_handle_mbx_intr(struct nicvf *nic)
 		nic->link_up = mbx.link_status.link_up;
 		nic->duplex = mbx.link_status.duplex;
 		nic->speed = mbx.link_status.speed;
+		nic->mac_type = mbx.link_status.mac_type;
 		if (nic->link_up) {
 			netdev_info(nic->netdev, "%s: Link is Up %d Mbps %s\n",
 				    nic->netdev->name, nic->speed,
@@ -255,6 +256,12 @@ static void  nicvf_handle_mbx_intr(struct nicvf *nic)
 		nic->pnicvf = (struct nicvf *)mbx.nicvf.nicvf;
 		nic->pf_acked = true;
 		break;
+	case NIC_MBOX_MSG_PFC:
+		nic->pfc.autoneg = mbx.pfc.autoneg;
+		nic->pfc.fc_rx = mbx.pfc.fc_rx;
+		nic->pfc.fc_tx = mbx.pfc.fc_tx;
+		nic->pf_acked = true;
+		break;
 	default:
 		netdev_err(nic->netdev,
 			   "Invalid message from PF, msg 0x%x\n", mbx.msg.msg);
@@ -637,6 +644,7 @@ static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx,
 	struct cmp_queue *cq = &qs->cq[cq_idx];
 	struct cqe_rx_t *cq_desc;
 	struct netdev_queue *txq;
+	struct snd_queue *sq;
 	unsigned int tx_pkts = 0, tx_bytes = 0;
 
 	spin_lock_bh(&cq->lock);
@@ -702,16 +710,20 @@ loop:
 
 done:
 	/* Wakeup TXQ if its stopped earlier due to SQ full */
-	if (tx_done) {
+	sq = &nic->qs->sq[cq_idx];
+	if (tx_done ||
+	    (atomic_read(&sq->free_cnt) >= MIN_SQ_DESC_PER_PKT_XMIT)) {
 		netdev = nic->pnicvf->netdev;
 		txq = netdev_get_tx_queue(netdev,
 					  nicvf_netdev_qidx(nic, cq_idx));
 		if (tx_pkts)
 			netdev_tx_completed_queue(txq, tx_pkts, tx_bytes);
 
-		nic = nic->pnicvf;
+		/* To read updated queue and carrier status */
+		smp_mb();
 		if (netif_tx_queue_stopped(txq) && netif_carrier_ok(netdev)) {
-			netif_tx_start_queue(txq);
+			netif_tx_wake_queue(txq);
+			nic = nic->pnicvf;
 			this_cpu_inc(nic->drv_stats->txq_wake);
 			if (netif_msg_tx_err(nic))
 				netdev_warn(netdev,
@@ -1047,6 +1059,9 @@ static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev)
 	struct nicvf *nic = netdev_priv(netdev);
 	int qid = skb_get_queue_mapping(skb);
 	struct netdev_queue *txq = netdev_get_tx_queue(netdev, qid);
+	struct nicvf *snic;
+	struct snd_queue *sq;
+	int tmp;
 
 	/* Check for minimum packet length */
 	if (skb->len <= ETH_HLEN) {
@@ -1054,13 +1069,39 @@ static netdev_tx_t nicvf_xmit(struct sk_buff *skb, struct net_device *netdev)
 		return NETDEV_TX_OK;
 	}
 
-	if (!netif_tx_queue_stopped(txq) && !nicvf_sq_append_skb(nic, skb)) {
+	snic = nic;
+	/* Get secondary Qset's SQ structure */
+	if (qid >= MAX_SND_QUEUES_PER_QS) {
+		tmp = qid / MAX_SND_QUEUES_PER_QS;
+		snic = (struct nicvf *)nic->snicvf[tmp - 1];
+		if (!snic) {
+			netdev_warn(nic->netdev,
+				    "Secondary Qset#%d's ptr not initialized\n",
+				    tmp - 1);
+			dev_kfree_skb(skb);
+			return NETDEV_TX_OK;
+		}
+		qid = qid % MAX_SND_QUEUES_PER_QS;
+	}
+
+	sq = &snic->qs->sq[qid];
+	if (!netif_tx_queue_stopped(txq) &&
+	    !nicvf_sq_append_skb(snic, sq, skb, qid)) {
 		netif_tx_stop_queue(txq);
-		this_cpu_inc(nic->drv_stats->txq_stop);
-		if (netif_msg_tx_err(nic))
-			netdev_warn(netdev,
-				    "%s: Transmit ring full, stopping SQ%d\n",
-				    netdev->name, qid);
+
+		/* Barrier, so that stop_queue visible to other cpus */
+		smp_mb();
+
+		/* Check again, incase another cpu freed descriptors */
+		if (atomic_read(&sq->free_cnt) > MIN_SQ_DESC_PER_PKT_XMIT) {
+			netif_tx_wake_queue(txq);
+		} else {
+			this_cpu_inc(nic->drv_stats->txq_stop);
+			if (netif_msg_tx_err(nic))
+				netdev_warn(netdev,
+					    "%s: Transmit ring full, stopping SQ%d\n",
+					    netdev->name, qid);
+		}
 		return NETDEV_TX_BUSY;
 	}
 
@@ -1291,20 +1332,17 @@ napi_del:
 static int nicvf_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	struct nicvf *nic = netdev_priv(netdev);
-
-	if (new_mtu > NIC_HW_MAX_FRS)
-		return -EINVAL;
-
-	if (new_mtu < NIC_HW_MIN_FRS)
-		return -EINVAL;
+	int orig_mtu = netdev->mtu;
 
 	netdev->mtu = new_mtu;
 
 	if (!netif_running(netdev))
 		return 0;
 
-	if (nicvf_update_hw_max_frs(nic, new_mtu))
+	if (nicvf_update_hw_max_frs(nic, new_mtu)) {
+		netdev->mtu = orig_mtu;
 		return -EINVAL;
+	}
 
 	return 0;
 }
@@ -1631,6 +1669,10 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	netdev->netdev_ops = &nicvf_netdev_ops;
 	netdev->watchdog_timeo = NICVF_TX_TIMEOUT;
 
+	/* MTU range: 64 - 9200 */
+	netdev->min_mtu = NIC_HW_MIN_FRS;
+	netdev->max_mtu = NIC_HW_MAX_FRS;
+
 	INIT_WORK(&nic->reset_task, nicvf_reset_task);
 
 	err = register_netdev(netdev);
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
index 747ef0882976..d2ac133e36f1 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c
@@ -544,14 +544,18 @@ static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs,
 	nicvf_send_msg_to_pf(nic, &mbx);
 
 	mbx.rq.msg = NIC_MBOX_MSG_RQ_BP_CFG;
-	mbx.rq.cfg = (1ULL << 63) | (1ULL << 62) | (qs->vnic_id << 0);
+	mbx.rq.cfg = BIT_ULL(63) | BIT_ULL(62) |
+		     (RQ_PASS_RBDR_LVL << 16) | (RQ_PASS_CQ_LVL << 8) |
+		     (qs->vnic_id << 0);
 	nicvf_send_msg_to_pf(nic, &mbx);
 
 	/* RQ drop config
 	 * Enable CQ drop to reserve sufficient CQEs for all tx packets
 	 */
 	mbx.rq.msg = NIC_MBOX_MSG_RQ_DROP_CFG;
-	mbx.rq.cfg = (1ULL << 62) | (RQ_CQ_DROP << 8);
+	mbx.rq.cfg = BIT_ULL(63) | BIT_ULL(62) |
+		     (RQ_PASS_RBDR_LVL << 40) | (RQ_DROP_RBDR_LVL << 32) |
+		     (RQ_PASS_CQ_LVL << 16) | (RQ_DROP_CQ_LVL << 8);
 	nicvf_send_msg_to_pf(nic, &mbx);
 
 	if (!nic->sqs_mode && (qidx == 0)) {
@@ -650,6 +654,7 @@ static void nicvf_snd_queue_config(struct nicvf *nic, struct queue_set *qs,
 	sq_cfg.ldwb = 0;
 	sq_cfg.qsize = SND_QSIZE;
 	sq_cfg.tstmp_bgx_intf = 0;
+	sq_cfg.cq_limit = 0;
 	nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_CFG, qidx, *(u64 *)&sq_cfg);
 
 	/* Set threshold value for interrupt generation */
@@ -1185,30 +1190,12 @@ static int nicvf_sq_append_tso(struct nicvf *nic, struct snd_queue *sq,
 }
 
 /* Append an skb to a SQ for packet transfer. */
-int nicvf_sq_append_skb(struct nicvf *nic, struct sk_buff *skb)
+int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq,
+			struct sk_buff *skb, u8 sq_num)
 {
 	int i, size;
 	int subdesc_cnt, tso_sqe = 0;
-	int sq_num, qentry;
-	struct queue_set *qs;
-	struct snd_queue *sq;
-
-	sq_num = skb_get_queue_mapping(skb);
-	if (sq_num >= MAX_SND_QUEUES_PER_QS) {
-		/* Get secondary Qset's SQ structure */
-		i = sq_num / MAX_SND_QUEUES_PER_QS;
-		if (!nic->snicvf[i - 1]) {
-			netdev_warn(nic->netdev,
-				    "Secondary Qset#%d's ptr not initialized\n",
-				    i - 1);
-			return 1;
-		}
-		nic = (struct nicvf *)nic->snicvf[i - 1];
-		sq_num = sq_num % MAX_SND_QUEUES_PER_QS;
-	}
-
-	qs = nic->qs;
-	sq = &qs->sq[sq_num];
+	int qentry;
 
 	subdesc_cnt = nicvf_sq_subdesc_required(nic, skb);
 	if (subdesc_cnt > atomic_read(&sq->free_cnt))
diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
index 2e3c940c1093..9e2104675bc9 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h
@@ -85,12 +85,26 @@
 
 #define MAX_CQES_FOR_TX		((SND_QUEUE_LEN / MIN_SQ_DESC_PER_PKT_XMIT) * \
 				 MAX_CQE_PER_PKT_XMIT)
-/* Calculate number of CQEs to reserve for all SQEs.
- * Its 1/256th level of CQ size.
- * '+ 1' to account for pipelining
+
+/* RED and Backpressure levels of CQ for pkt reception
+ * For CQ, level is a measure of emptiness i.e 0x0 means full
+ * eg: For CQ of size 4K, and for pass/drop levels of 160/144
+ * HW accepts pkt if unused CQE >= 2560
+ * RED accepts pkt if unused CQE < 2304 & >= 2560
+ * DROPs pkts if unused CQE < 2304
+ */
+#define RQ_PASS_CQ_LVL		160ULL
+#define RQ_DROP_CQ_LVL		144ULL
+
+/* RED and Backpressure levels of RBDR for pkt reception
+ * For RBDR, level is a measure of fullness i.e 0x0 means empty
+ * eg: For RBDR of size 8K, and for pass/drop levels of 4/0
+ * HW accepts pkt if unused RBs >= 256
+ * RED accepts pkt if unused RBs < 256 & >= 0
+ * DROPs pkts if unused RBs < 0
  */
-#define RQ_CQ_DROP		((256 / (CMP_QUEUE_LEN / \
-				 (CMP_QUEUE_LEN - MAX_CQES_FOR_TX))) + 1)
+#define RQ_PASS_RBDR_LVL	8ULL
+#define RQ_DROP_RBDR_LVL	0ULL
 
 /* Descriptor size in bytes */
 #define SND_QUEUE_DESC_SIZE	16
@@ -292,7 +306,8 @@ void nicvf_sq_disable(struct nicvf *nic, int qidx);
 void nicvf_put_sq_desc(struct snd_queue *sq, int desc_cnt);
 void nicvf_sq_free_used_descs(struct net_device *netdev,
 			      struct snd_queue *sq, int qidx);
-int nicvf_sq_append_skb(struct nicvf *nic, struct sk_buff *skb);
+int nicvf_sq_append_skb(struct nicvf *nic, struct snd_queue *sq,
+			struct sk_buff *skb, u8 sq_num);
 
 struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx);
 void nicvf_rbdr_task(unsigned long data);
diff --git a/drivers/net/ethernet/cavium/thunder/q_struct.h b/drivers/net/ethernet/cavium/thunder/q_struct.h
index 9e6d9876bfd0..f36347237a54 100644
--- a/drivers/net/ethernet/cavium/thunder/q_struct.h
+++ b/drivers/net/ethernet/cavium/thunder/q_struct.h
@@ -624,7 +624,9 @@ struct cq_cfg {
 
 struct sq_cfg {
 #if defined(__BIG_ENDIAN_BITFIELD)
-	u64 reserved_20_63:44;
+	u64 reserved_32_63:32;
+	u64 cq_limit:8;
+	u64 reserved_20_23:4;
 	u64 ena:1;
 	u64 reserved_18_18:1;
 	u64 reset:1;
@@ -642,7 +644,9 @@ struct sq_cfg {
 	u64 reset:1;
 	u64 reserved_18_18:1;
 	u64 ena:1;
-	u64 reserved_20_63:44;
+	u64 reserved_20_23:4;
+	u64 cq_limit:8;
+	u64 reserved_32_63:32;
 #endif
 };
 
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
index 050e21fbb147..9211c750e064 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c
@@ -161,6 +161,7 @@ void bgx_get_lmac_link_state(int node, int bgx_idx, int lmacid, void *status)
 		return;
 
 	lmac = &bgx->lmac[lmacid];
+	link->mac_type = lmac->lmac_type;
 	link->link_up = lmac->link_up;
 	link->duplex = lmac->last_duplex;
 	link->speed = lmac->last_speed;
@@ -211,6 +212,47 @@ void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable)
 }
 EXPORT_SYMBOL(bgx_lmac_rx_tx_enable);
 
+void bgx_lmac_get_pfc(int node, int bgx_idx, int lmacid, void *pause)
+{
+	struct pfc *pfc = (struct pfc *)pause;
+	struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+	struct lmac *lmac;
+	u64 cfg;
+
+	if (!bgx)
+		return;
+	lmac = &bgx->lmac[lmacid];
+	if (lmac->is_sgmii)
+		return;
+
+	cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_CBFC_CTL);
+	pfc->fc_rx = cfg & RX_EN;
+	pfc->fc_tx = cfg & TX_EN;
+	pfc->autoneg = 0;
+}
+EXPORT_SYMBOL(bgx_lmac_get_pfc);
+
+void bgx_lmac_set_pfc(int node, int bgx_idx, int lmacid, void *pause)
+{
+	struct pfc *pfc = (struct pfc *)pause;
+	struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx];
+	struct lmac *lmac;
+	u64 cfg;
+
+	if (!bgx)
+		return;
+	lmac = &bgx->lmac[lmacid];
+	if (lmac->is_sgmii)
+		return;
+
+	cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_CBFC_CTL);
+	cfg &= ~(RX_EN | TX_EN);
+	cfg |= (pfc->fc_rx ? RX_EN : 0x00);
+	cfg |= (pfc->fc_tx ? TX_EN : 0x00);
+	bgx_reg_write(bgx, lmacid, BGX_SMUX_CBFC_CTL, cfg);
+}
+EXPORT_SYMBOL(bgx_lmac_set_pfc);
+
 static void bgx_sgmii_change_link_state(struct lmac *lmac)
 {
 	struct bgx *bgx = lmac->bgx;
@@ -524,6 +566,18 @@ static int bgx_lmac_xaui_init(struct bgx *bgx, struct lmac *lmac)
 	cfg |= SMU_TX_CTL_DIC_EN;
 	bgx_reg_write(bgx, lmacid, BGX_SMUX_TX_CTL, cfg);
 
+	/* Enable receive and transmission of pause frames */
+	bgx_reg_write(bgx, lmacid, BGX_SMUX_CBFC_CTL, ((0xffffULL << 32) |
+		      BCK_EN | DRP_EN | TX_EN | RX_EN));
+	/* Configure pause time and interval */
+	bgx_reg_write(bgx, lmacid,
+		      BGX_SMUX_TX_PAUSE_PKT_TIME, DEFAULT_PAUSE_TIME);
+	cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_TX_PAUSE_PKT_INTERVAL);
+	cfg &= ~0xFFFFull;
+	bgx_reg_write(bgx, lmacid, BGX_SMUX_TX_PAUSE_PKT_INTERVAL,
+		      cfg | (DEFAULT_PAUSE_TIME - 0x1000));
+	bgx_reg_write(bgx, lmacid, BGX_SMUX_TX_PAUSE_ZERO, 0x01);
+
 	/* take lmac_count into account */
 	bgx_reg_modify(bgx, lmacid, BGX_SMUX_TX_THRESH, (0x100 - 1));
 	/* max packet size */
@@ -970,11 +1024,25 @@ static void bgx_set_lmac_config(struct bgx *bgx, u8 idx)
 		lmac_set_training(bgx, lmac, lmac->lmacid);
 		lmac_set_lane2sds(bgx, lmac);
 
-		/* Set LMAC type of other lmac on same DLM i.e LMAC 1/3 */
 		olmac = &bgx->lmac[idx + 1];
-		olmac->lmac_type = lmac->lmac_type;
+		/*  Check if other LMAC on the same DLM is already configured by
+		 *  firmware, if so use the same config or else set as same, as
+		 *  that of LMAC 0/2.
+		 *  This check is needed as on 80xx only one lane of each of the
+		 *  DLM of BGX0 is used, so have to rely on firmware for
+		 *  distingushing 80xx from 81xx.
+		 */
+		cmr_cfg = bgx_reg_read(bgx, idx + 1, BGX_CMRX_CFG);
+		lmac_type = (u8)((cmr_cfg >> 8) & 0x07);
+		lane_to_sds = (u8)(cmr_cfg & 0xFF);
+		if ((lmac_type == 0) && (lane_to_sds == 0xE4)) {
+			olmac->lmac_type = lmac->lmac_type;
+			lmac_set_lane2sds(bgx, olmac);
+		} else {
+			olmac->lmac_type = lmac_type;
+			olmac->lane_to_sds = lane_to_sds;
+		}
 		lmac_set_training(bgx, olmac, olmac->lmacid);
-		lmac_set_lane2sds(bgx, olmac);
 	}
 }
 
diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
index 01cc7c859131..c18ebfeb2039 100644
--- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
+++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h
@@ -27,6 +27,7 @@
 #define    MAX_BGX_CHANS_PER_LMAC		16
 #define    MAX_DMAC_PER_LMAC			8
 #define    MAX_FRAME_SIZE			9216
+#define    DEFAULT_PAUSE_TIME			0xFFFF
 
 #define	   BGX_ID_MASK				0x3
 
@@ -126,7 +127,10 @@
 #define  SMU_RX_CTL_STATUS			(3ull << 0)
 #define BGX_SMUX_TX_APPEND		0x20100
 #define  SMU_TX_APPEND_FCS_D			BIT_ULL(2)
+#define BGX_SMUX_TX_PAUSE_PKT_TIME	0x20110
 #define BGX_SMUX_TX_MIN_PKT		0x20118
+#define BGX_SMUX_TX_PAUSE_PKT_INTERVAL	0x20120
+#define BGX_SMUX_TX_PAUSE_ZERO		0x20138
 #define BGX_SMUX_TX_INT			0x20140
 #define BGX_SMUX_TX_CTL			0x20178
 #define  SMU_TX_CTL_DIC_EN			BIT_ULL(0)
@@ -136,6 +140,11 @@
 #define BGX_SMUX_CTL			0x20200
 #define  SMU_CTL_RX_IDLE			BIT_ULL(0)
 #define  SMU_CTL_TX_IDLE			BIT_ULL(1)
+#define	BGX_SMUX_CBFC_CTL		0x20218
+#define	RX_EN					BIT_ULL(0)
+#define	TX_EN					BIT_ULL(1)
+#define	BCK_EN					BIT_ULL(2)
+#define	DRP_EN					BIT_ULL(3)
 
 #define BGX_GMP_PCS_MRX_CTL		0x30000
 #define	 PCS_MRX_CTL_RST_AN			BIT_ULL(9)
@@ -207,6 +216,9 @@ void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const u8 *mac);
 void bgx_get_lmac_link_state(int node, int bgx_idx, int lmacid, void *status);
 void bgx_lmac_internal_loopback(int node, int bgx_idx,
 				int lmac_idx, bool enable);
+void bgx_lmac_get_pfc(int node, int bgx_idx, int lmacid, void *pause);
+void bgx_lmac_set_pfc(int node, int bgx_idx, int lmacid, void *pause);
+
 void xcv_init_hw(void);
 void xcv_setup_link(bool link_up, int link_speed);
 
diff --git a/drivers/net/ethernet/chelsio/cxgb/common.h b/drivers/net/ethernet/chelsio/cxgb/common.h
index 53b1f9478383..6916c62f2487 100644
--- a/drivers/net/ethernet/chelsio/cxgb/common.h
+++ b/drivers/net/ethernet/chelsio/cxgb/common.h
@@ -85,6 +85,11 @@ struct t1_rx_mode {
 #define SPEED_INVALID 0xffff
 #define DUPLEX_INVALID 0xff
 
+/* Max frame size PM3393 can handle. Includes Ethernet header and CRC. */
+#define PM3393_MAX_FRAME_SIZE 9600
+
+#define VSC7326_MAX_MTU 9600
+
 enum {
 	CHBT_BOARD_N110,
 	CHBT_BOARD_N210,
diff --git a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
index f5f1b0b51ebd..81d1d0bc7553 100644
--- a/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
+++ b/drivers/net/ethernet/chelsio/cxgb/cxgb2.c
@@ -825,8 +825,6 @@ static int t1_change_mtu(struct net_device *dev, int new_mtu)
 
 	if (!mac->ops->set_mtu)
 		return -EOPNOTSUPP;
-	if (new_mtu < 68)
-		return -EINVAL;
 	if ((ret = mac->ops->set_mtu(mac, new_mtu)))
 		return ret;
 	dev->mtu = new_mtu;
@@ -1101,6 +1099,22 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		netif_napi_add(netdev, &adapter->napi, t1_poll, 64);
 
 		netdev->ethtool_ops = &t1_ethtool_ops;
+
+		switch (bi->board) {
+		case CHBT_BOARD_CHT110:
+		case CHBT_BOARD_N110:
+		case CHBT_BOARD_N210:
+		case CHBT_BOARD_CHT210:
+			netdev->max_mtu = PM3393_MAX_FRAME_SIZE -
+					  (ETH_HLEN + ETH_FCS_LEN);
+			break;
+		case CHBT_BOARD_CHN204:
+			netdev->max_mtu = VSC7326_MAX_MTU;
+			break;
+		default:
+			netdev->max_mtu = ETH_DATA_LEN;
+			break;
+		}
 	}
 
 	if (t1_init_sw_modules(adapter, bi) < 0) {
diff --git a/drivers/net/ethernet/chelsio/cxgb/pm3393.c b/drivers/net/ethernet/chelsio/cxgb/pm3393.c
index eb462d7db427..c27908e66f5e 100644
--- a/drivers/net/ethernet/chelsio/cxgb/pm3393.c
+++ b/drivers/net/ethernet/chelsio/cxgb/pm3393.c
@@ -47,9 +47,6 @@
 
 #define OFFSET(REG_ADDR)    ((REG_ADDR) << 2)
 
-/* Max frame size PM3393 can handle. Includes Ethernet header and CRC. */
-#define MAX_FRAME_SIZE  9600
-
 #define IPG 12
 #define TXXG_CONF1_VAL ((IPG << SUNI1x10GEXP_BITOFF_TXXG_IPGT) | \
 	SUNI1x10GEXP_BITMSK_TXXG_32BIT_ALIGN | SUNI1x10GEXP_BITMSK_TXXG_CRCEN | \
@@ -331,10 +328,7 @@ static int pm3393_set_mtu(struct cmac *cmac, int mtu)
 {
 	int enabled = cmac->instance->enabled;
 
-	/* MAX_FRAME_SIZE includes header + FCS, mtu doesn't */
-	mtu += 14 + 4;
-	if (mtu > MAX_FRAME_SIZE)
-		return -EINVAL;
+	mtu += ETH_HLEN + ETH_FCS_LEN;
 
 	/* Disable Rx/Tx MAC before configuring it. */
 	if (enabled)
diff --git a/drivers/net/ethernet/chelsio/cxgb/vsc7326.c b/drivers/net/ethernet/chelsio/cxgb/vsc7326.c
index 6f30b6f78553..bdc895bd2a46 100644
--- a/drivers/net/ethernet/chelsio/cxgb/vsc7326.c
+++ b/drivers/net/ethernet/chelsio/cxgb/vsc7326.c
@@ -11,8 +11,6 @@
 /* 30 minutes for full statistics update */
 #define MAJOR_UPDATE_TICKS (1800 / STATS_TICK_SECS)
 
-#define MAX_MTU 9600
-
 /* The egress WM value 0x01a01fff should be used only when the
  * interface is down (MAC port disabled). This is a workaround
  * for disabling the T2/MAC flow-control. When the interface is
@@ -452,9 +450,6 @@ static int mac_set_mtu(struct cmac *mac, int mtu)
 {
 	int port = mac->instance->index;
 
-	if (mtu > MAX_MTU)
-		return -EINVAL;
-
 	/* max_len includes header and FCS */
 	vsc_write(mac->adapter, REG_MAX_LEN(port), mtu + 14 + 4);
 	return 0;
diff --git a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
index 43da891fab97..092b3c16440b 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/cxgb3_main.c
@@ -2531,8 +2531,6 @@ static int cxgb_change_mtu(struct net_device *dev, int new_mtu)
 	struct adapter *adapter = pi->adapter;
 	int ret;
 
-	if (new_mtu < 81)	/* accommodate SACK */
-		return -EINVAL;
 	if ((ret = t3_mac_set_mtu(&pi->mac, new_mtu)))
 		return ret;
 	dev->mtu = new_mtu;
@@ -3295,6 +3293,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 		netdev->netdev_ops = &cxgb_netdev_ops;
 		netdev->ethtool_ops = &cxgb_ethtool_ops;
+		netdev->min_mtu = 81;
+		netdev->max_mtu = ETH_MAX_MTU;
 	}
 
 	pci_set_drvdata(pdev, adapter);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 2125903043fb..0bce1bf9ca0f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -635,6 +635,7 @@ struct tx_sw_desc;
 
 struct sge_txq {
 	unsigned int  in_use;       /* # of in-use Tx descriptors */
+	unsigned int  q_type;	    /* Q type Eth/Ctrl/Ofld */
 	unsigned int  size;         /* # of descriptors */
 	unsigned int  cidx;         /* SW consumer index */
 	unsigned int  pidx;         /* producer index */
@@ -665,7 +666,7 @@ struct sge_eth_txq {                /* state for an SGE Ethernet Tx queue */
 	unsigned long mapping_err;  /* # of I/O MMU packet mapping errors */
 } ____cacheline_aligned_in_smp;
 
-struct sge_ofld_txq {               /* state for an SGE offload Tx queue */
+struct sge_uld_txq {               /* state for an SGE offload Tx queue */
 	struct sge_txq q;
 	struct adapter *adap;
 	struct sk_buff_head sendq;  /* list of backpressured packets */
@@ -693,14 +694,20 @@ struct sge_uld_rxq_info {
 	u8 uld;			/* uld type */
 };
 
+struct sge_uld_txq_info {
+	struct sge_uld_txq *uldtxq; /* Txq's for ULD */
+	atomic_t users;		/* num users */
+	u16 ntxq;		/* # of egress uld queues */
+};
+
 struct sge {
 	struct sge_eth_txq ethtxq[MAX_ETH_QSETS];
-	struct sge_ofld_txq ofldtxq[MAX_OFLD_QSETS];
 	struct sge_ctrl_txq ctrlq[MAX_CTRL_QUEUES];
 
 	struct sge_eth_rxq ethrxq[MAX_ETH_QSETS];
 	struct sge_rspq fw_evtq ____cacheline_aligned_in_smp;
 	struct sge_uld_rxq_info **uld_rxq_info;
+	struct sge_uld_txq_info **uld_txq_info;
 
 	struct sge_rspq intrq ____cacheline_aligned_in_smp;
 	spinlock_t intrq_lock;
@@ -1298,8 +1305,9 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq,
 			  unsigned int cmplqid);
 int t4_sge_mod_ctrl_txq(struct adapter *adap, unsigned int eqid,
 			unsigned int cmplqid);
-int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq,
-			  struct net_device *dev, unsigned int iqid);
+int t4_sge_alloc_uld_txq(struct adapter *adap, struct sge_uld_txq *txq,
+			 struct net_device *dev, unsigned int iqid,
+			 unsigned int uld_type);
 irqreturn_t t4_sge_intr_msix(int irq, void *cookie);
 int t4_sge_init(struct adapter *adap);
 void t4_sge_start(struct adapter *adap);
@@ -1661,4 +1669,7 @@ int t4_uld_mem_alloc(struct adapter *adap);
 void t4_uld_clean_up(struct adapter *adap);
 void t4_register_netevent_notifier(void);
 void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq, struct sge_fl *fl);
+void free_tx_desc(struct adapter *adap, struct sge_txq *q,
+		  unsigned int n, bool unmap);
+void free_txq(struct adapter *adap, struct sge_txq *q);
 #endif /* __CXGB4_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index 20455d082cb8..acc231293e4d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -2512,18 +2512,6 @@ do { \
 		RL("FLLow:", fl.low);
 		RL("FLStarving:", fl.starving);
 
-	} else if (ofld_idx < ofld_entries) {
-		const struct sge_ofld_txq *tx =
-			&adap->sge.ofldtxq[ofld_idx * 4];
-		int n = min(4, adap->sge.ofldqsets - 4 * ofld_idx);
-
-		S("QType:", "OFLD-Txq");
-		T("TxQ ID:", q.cntxt_id);
-		T("TxQ size:", q.size);
-		T("TxQ inuse:", q.in_use);
-		T("TxQ CIDX:", q.cidx);
-		T("TxQ PIDX:", q.pidx);
-
 	} else if (ctrl_idx < ctrl_entries) {
 		const struct sge_ctrl_txq *tx = &adap->sge.ctrlq[ctrl_idx * 4];
 		int n = min(4, adap->params.nports - 4 * ctrl_idx);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 57eb4e1345cb..66c37fac59b2 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -134,24 +134,6 @@ MODULE_FIRMWARE(FW5_FNAME);
 MODULE_FIRMWARE(FW6_FNAME);
 
 /*
- * Normally we're willing to become the firmware's Master PF but will be happy
- * if another PF has already become the Master and initialized the adapter.
- * Setting "force_init" will cause this driver to forcibly establish itself as
- * the Master PF and initialize the adapter.
- */
-static uint force_init;
-
-module_param(force_init, uint, 0644);
-MODULE_PARM_DESC(force_init, "Forcibly become Master PF and initialize adapter,"
-		 "deprecated parameter");
-
-static int dflt_msg_enable = DFLT_MSG_ENABLE;
-
-module_param(dflt_msg_enable, int, 0644);
-MODULE_PARM_DESC(dflt_msg_enable, "Chelsio T4 default message enable bitmap, "
-		 "deprecated parameter");
-
-/*
  * The driver uses the best interrupt scheme available on a platform in the
  * order MSI-X, MSI, legacy INTx interrupts.  This parameter determines which
  * of these schemes the driver may consider as follows:
@@ -179,16 +161,6 @@ MODULE_PARM_DESC(msi, "whether to use INTx (0), MSI (1) or MSI-X (2)");
  */
 static int rx_dma_offset = 2;
 
-#ifdef CONFIG_PCI_IOV
-/* Configure the number of PCI-E Virtual Function which are to be instantiated
- * on SR-IOV Capable Physical Functions.
- */
-static unsigned int num_vf[NUM_OF_PF_WITH_SRIOV];
-
-module_param_array(num_vf, uint, NULL, 0644);
-MODULE_PARM_DESC(num_vf, "number of VFs for each of PFs 0-3, deprecated parameter - please use the pci sysfs interface.");
-#endif
-
 /* TX Queue select used to determine what algorithm to use for selecting TX
  * queue. Select between the kernel provided function (select_queue=0) or user
  * cxgb_select_queue function (select_queue=1)
@@ -530,15 +502,15 @@ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp,
 
 		txq = q->adap->sge.egr_map[qid - q->adap->sge.egr_start];
 		txq->restarts++;
-		if ((u8 *)txq < (u8 *)q->adap->sge.ofldtxq) {
+		if (txq->q_type == CXGB4_TXQ_ETH) {
 			struct sge_eth_txq *eq;
 
 			eq = container_of(txq, struct sge_eth_txq, q);
 			netif_tx_wake_queue(eq->txq);
 		} else {
-			struct sge_ofld_txq *oq;
+			struct sge_uld_txq *oq;
 
-			oq = container_of(txq, struct sge_ofld_txq, q);
+			oq = container_of(txq, struct sge_uld_txq, q);
 			tasklet_schedule(&oq->qresume_tsk);
 		}
 	} else if (opcode == CPL_FW6_MSG || opcode == CPL_FW4_MSG) {
@@ -885,15 +857,6 @@ static int setup_sge_queues(struct adapter *adap)
 		}
 	}
 
-	j = s->ofldqsets / adap->params.nports; /* iscsi queues per channel */
-	for_each_ofldtxq(s, i) {
-		err = t4_sge_alloc_ofld_txq(adap, &s->ofldtxq[i],
-					    adap->port[i / j],
-					    s->fw_evtq.cntxt_id);
-		if (err)
-			goto freeout;
-	}
-
 	for_each_port(adap, i) {
 		/* Note that cmplqid below is 0 if we don't
 		 * have RDMA queues, and that's the right value.
@@ -1922,8 +1885,18 @@ static void disable_dbs(struct adapter *adap)
 
 	for_each_ethrxq(&adap->sge, i)
 		disable_txq_db(&adap->sge.ethtxq[i].q);
-	for_each_ofldtxq(&adap->sge, i)
-		disable_txq_db(&adap->sge.ofldtxq[i].q);
+	if (is_offload(adap)) {
+		struct sge_uld_txq_info *txq_info =
+			adap->sge.uld_txq_info[CXGB4_TX_OFLD];
+
+		if (txq_info) {
+			for_each_ofldtxq(&adap->sge, i) {
+				struct sge_uld_txq *txq = &txq_info->uldtxq[i];
+
+				disable_txq_db(&txq->q);
+			}
+		}
+	}
 	for_each_port(adap, i)
 		disable_txq_db(&adap->sge.ctrlq[i].q);
 }
@@ -1934,8 +1907,18 @@ static void enable_dbs(struct adapter *adap)
 
 	for_each_ethrxq(&adap->sge, i)
 		enable_txq_db(adap, &adap->sge.ethtxq[i].q);
-	for_each_ofldtxq(&adap->sge, i)
-		enable_txq_db(adap, &adap->sge.ofldtxq[i].q);
+	if (is_offload(adap)) {
+		struct sge_uld_txq_info *txq_info =
+			adap->sge.uld_txq_info[CXGB4_TX_OFLD];
+
+		if (txq_info) {
+			for_each_ofldtxq(&adap->sge, i) {
+				struct sge_uld_txq *txq = &txq_info->uldtxq[i];
+
+				enable_txq_db(adap, &txq->q);
+			}
+		}
+	}
 	for_each_port(adap, i)
 		enable_txq_db(adap, &adap->sge.ctrlq[i].q);
 }
@@ -2006,8 +1989,17 @@ static void recover_all_queues(struct adapter *adap)
 
 	for_each_ethrxq(&adap->sge, i)
 		sync_txq_pidx(adap, &adap->sge.ethtxq[i].q);
-	for_each_ofldtxq(&adap->sge, i)
-		sync_txq_pidx(adap, &adap->sge.ofldtxq[i].q);
+	if (is_offload(adap)) {
+		struct sge_uld_txq_info *txq_info =
+			adap->sge.uld_txq_info[CXGB4_TX_OFLD];
+		if (txq_info) {
+			for_each_ofldtxq(&adap->sge, i) {
+				struct sge_uld_txq *txq = &txq_info->uldtxq[i];
+
+				sync_txq_pidx(adap, &txq->q);
+			}
+		}
+	}
 	for_each_port(adap, i)
 		sync_txq_pidx(adap, &adap->sge.ctrlq[i].q);
 }
@@ -2502,8 +2494,6 @@ static int cxgb_change_mtu(struct net_device *dev, int new_mtu)
 	int ret;
 	struct port_info *pi = netdev_priv(dev);
 
-	if (new_mtu < 81 || new_mtu > MAX_MTU)         /* accommodate SACK */
-		return -EINVAL;
 	ret = t4_set_rxmode(pi->adapter, pi->adapter->pf, pi->viid, new_mtu, -1,
 			    -1, -1, -1, true);
 	if (!ret)
@@ -3993,7 +3983,7 @@ static inline bool is_x_10g_port(const struct link_config *lc)
 static void cfg_queues(struct adapter *adap)
 {
 	struct sge *s = &adap->sge;
-	int i, n10g = 0, qidx = 0;
+	int i = 0, n10g = 0, qidx = 0;
 #ifndef CONFIG_CHELSIO_T4_DCB
 	int q10g = 0;
 #endif
@@ -4008,8 +3998,7 @@ static void cfg_queues(struct adapter *adap)
 		adap->params.crypto = 0;
 	}
 
-	for_each_port(adap, i)
-		n10g += is_x_10g_port(&adap2pinfo(adap, i)->link_cfg);
+	n10g += is_x_10g_port(&adap2pinfo(adap, i)->link_cfg);
 #ifdef CONFIG_CHELSIO_T4_DCB
 	/* For Data Center Bridging support we need to be able to support up
 	 * to 8 Traffic Priorities; each of which will be assigned to its
@@ -4077,9 +4066,6 @@ static void cfg_queues(struct adapter *adap)
 	for (i = 0; i < ARRAY_SIZE(s->ctrlq); i++)
 		s->ctrlq[i].q.size = 512;
 
-	for (i = 0; i < ARRAY_SIZE(s->ofldtxq); i++)
-		s->ofldtxq[i].q.size = 1024;
-
 	init_rspq(adap, &s->fw_evtq, 0, 1, 1024, 64);
 	init_rspq(adap, &s->intrq, 0, 1, 512, 64);
 }
@@ -4715,7 +4701,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	adapter->name = pci_name(pdev);
 	adapter->mbox = func;
 	adapter->pf = func;
-	adapter->msg_enable = dflt_msg_enable;
+	adapter->msg_enable = DFLT_MSG_ENABLE;
 	memset(adapter->chan_map, 0xff, sizeof(adapter->chan_map));
 
 	spin_lock_init(&adapter->stats_lock);
@@ -4803,6 +4789,10 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 		netdev->priv_flags |= IFF_UNICAST_FLT;
 
+		/* MTU range: 81 - 9600 */
+		netdev->min_mtu = 81;
+		netdev->max_mtu = MAX_MTU;
+
 		netdev->netdev_ops = &cxgb4_netdev_ops;
 #ifdef CONFIG_CHELSIO_T4_DCB
 		netdev->dcbnl_ops = &cxgb4_dcb_ops;
@@ -4931,6 +4921,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	 */
 	for_each_port(adapter, i) {
 		pi = adap2pinfo(adapter, i);
+		adapter->port[i]->dev_port = pi->lport;
 		netif_set_real_num_tx_queues(adapter->port[i], pi->nqsets);
 		netif_set_real_num_rx_queues(adapter->port[i], pi->nqsets);
 
@@ -4970,17 +4961,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 sriov:
 #ifdef CONFIG_PCI_IOV
-	if (func < ARRAY_SIZE(num_vf) && num_vf[func] > 0) {
-		dev_warn(&pdev->dev,
-			 "Enabling SR-IOV VFs using the num_vf module "
-			 "parameter is deprecated - please use the pci sysfs "
-			 "interface instead.\n");
-		if (pci_enable_sriov(pdev, num_vf[func]) == 0)
-			dev_info(&pdev->dev,
-				 "instantiated %u virtual functions\n",
-				 num_vf[func]);
-	}
-
 	adapter = kzalloc(sizeof(*adapter), GFP_KERNEL);
 	if (!adapter) {
 		err = -ENOMEM;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
index 49d2debb334e..52af62e0ecb6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c
@@ -113,7 +113,7 @@ static int fill_action_fields(struct adapter *adap,
 		}
 
 		/* Re-direct to specified port in hardware. */
-		if (is_tcf_mirred_redirect(a)) {
+		if (is_tcf_mirred_egress_redirect(a)) {
 			struct net_device *n_dev;
 			unsigned int i, index;
 			bool found = false;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
index 2471ff465d5c..8098902c094a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
@@ -447,6 +447,106 @@ static void quiesce_rx_uld(struct adapter *adap, unsigned int uld_type)
 		quiesce_rx(adap, &rxq_info->uldrxq[idx].rspq);
 }
 
+static void
+free_sge_txq_uld(struct adapter *adap, struct sge_uld_txq_info *txq_info)
+{
+	int nq = txq_info->ntxq;
+	int i;
+
+	for (i = 0; i < nq; i++) {
+		struct sge_uld_txq *txq = &txq_info->uldtxq[i];
+
+		if (txq && txq->q.desc) {
+			tasklet_kill(&txq->qresume_tsk);
+			t4_ofld_eq_free(adap, adap->mbox, adap->pf, 0,
+					txq->q.cntxt_id);
+			free_tx_desc(adap, &txq->q, txq->q.in_use, false);
+			kfree(txq->q.sdesc);
+			__skb_queue_purge(&txq->sendq);
+			free_txq(adap, &txq->q);
+		}
+	}
+}
+
+static int
+alloc_sge_txq_uld(struct adapter *adap, struct sge_uld_txq_info *txq_info,
+		  unsigned int uld_type)
+{
+	struct sge *s = &adap->sge;
+	int nq = txq_info->ntxq;
+	int i, j, err;
+
+	j = nq / adap->params.nports;
+	for (i = 0; i < nq; i++) {
+		struct sge_uld_txq *txq = &txq_info->uldtxq[i];
+
+		txq->q.size = 1024;
+		err = t4_sge_alloc_uld_txq(adap, txq, adap->port[i / j],
+					   s->fw_evtq.cntxt_id, uld_type);
+		if (err)
+			goto freeout;
+	}
+	return 0;
+freeout:
+	free_sge_txq_uld(adap, txq_info);
+	return err;
+}
+
+static void
+release_sge_txq_uld(struct adapter *adap, unsigned int uld_type)
+{
+	struct sge_uld_txq_info *txq_info = NULL;
+	int tx_uld_type = TX_ULD(uld_type);
+
+	txq_info = adap->sge.uld_txq_info[tx_uld_type];
+
+	if (txq_info && atomic_dec_and_test(&txq_info->users)) {
+		free_sge_txq_uld(adap, txq_info);
+		kfree(txq_info->uldtxq);
+		kfree(txq_info);
+		adap->sge.uld_txq_info[tx_uld_type] = NULL;
+	}
+}
+
+static int
+setup_sge_txq_uld(struct adapter *adap, unsigned int uld_type,
+		  const struct cxgb4_uld_info *uld_info)
+{
+	struct sge_uld_txq_info *txq_info = NULL;
+	int tx_uld_type, i;
+
+	tx_uld_type = TX_ULD(uld_type);
+	txq_info = adap->sge.uld_txq_info[tx_uld_type];
+
+	if ((tx_uld_type == CXGB4_TX_OFLD) && txq_info &&
+	    (atomic_inc_return(&txq_info->users) > 1))
+		return 0;
+
+	txq_info = kzalloc(sizeof(*txq_info), GFP_KERNEL);
+	if (!txq_info)
+		return -ENOMEM;
+
+	i = min_t(int, uld_info->ntxq, num_online_cpus());
+	txq_info->ntxq = roundup(i, adap->params.nports);
+
+	txq_info->uldtxq = kcalloc(txq_info->ntxq, sizeof(struct sge_uld_txq),
+				   GFP_KERNEL);
+	if (!txq_info->uldtxq) {
+		kfree(txq_info);
+		return -ENOMEM;
+	}
+
+	if (alloc_sge_txq_uld(adap, txq_info, tx_uld_type)) {
+		kfree(txq_info->uldtxq);
+		kfree(txq_info);
+		return -ENOMEM;
+	}
+
+	atomic_inc(&txq_info->users);
+	adap->sge.uld_txq_info[tx_uld_type] = txq_info;
+	return 0;
+}
+
 static void uld_queue_init(struct adapter *adap, unsigned int uld_type,
 			   struct cxgb4_lld_info *lli)
 {
@@ -472,7 +572,15 @@ int t4_uld_mem_alloc(struct adapter *adap)
 	if (!s->uld_rxq_info)
 		goto err_uld;
 
+	s->uld_txq_info = kzalloc(CXGB4_TX_MAX *
+				  sizeof(struct sge_uld_txq_info *),
+				  GFP_KERNEL);
+	if (!s->uld_txq_info)
+		goto err_uld_rx;
 	return 0;
+
+err_uld_rx:
+	kfree(s->uld_rxq_info);
 err_uld:
 	kfree(adap->uld);
 	return -ENOMEM;
@@ -482,6 +590,7 @@ void t4_uld_mem_free(struct adapter *adap)
 {
 	struct sge *s = &adap->sge;
 
+	kfree(s->uld_txq_info);
 	kfree(s->uld_rxq_info);
 	kfree(adap->uld);
 }
@@ -616,6 +725,9 @@ int cxgb4_register_uld(enum cxgb4_uld type,
 			ret = -EBUSY;
 			goto free_irq;
 		}
+		ret = setup_sge_txq_uld(adap, type, p);
+		if (ret)
+			goto free_irq;
 		adap->uld[type] = *p;
 		uld_attach(adap, type);
 		adap_idx++;
@@ -644,6 +756,7 @@ out:
 			break;
 		adap->uld[type].handle = NULL;
 		adap->uld[type].add = NULL;
+		release_sge_txq_uld(adap, type);
 		if (adap->flags & FULL_INIT_DONE)
 			quiesce_rx_uld(adap, type);
 		if (adap->flags & USING_MSIX)
@@ -679,6 +792,7 @@ int cxgb4_unregister_uld(enum cxgb4_uld type)
 			continue;
 		adap->uld[type].handle = NULL;
 		adap->uld[type].add = NULL;
+		release_sge_txq_uld(adap, type);
 		if (adap->flags & FULL_INIT_DONE)
 			quiesce_rx_uld(adap, type);
 		if (adap->flags & USING_MSIX)
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index 2996793b1aaa..4c856605fdfa 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -77,6 +77,8 @@ enum {
 
 /* Special asynchronous notification message */
 #define CXGB4_MSG_AN ((void *)1)
+#define TX_ULD(uld)(((uld) != CXGB4_ULD_CRYPTO) ? CXGB4_TX_OFLD :\
+		      CXGB4_TX_CRYPTO)
 
 struct serv_entry {
 	void *data;
@@ -223,6 +225,19 @@ enum cxgb4_uld {
 	CXGB4_ULD_MAX
 };
 
+enum cxgb4_tx_uld {
+	CXGB4_TX_OFLD,
+	CXGB4_TX_CRYPTO,
+	CXGB4_TX_MAX
+};
+
+enum cxgb4_txq_type {
+	CXGB4_TXQ_ETH,
+	CXGB4_TXQ_ULD,
+	CXGB4_TXQ_CTRL,
+	CXGB4_TXQ_MAX
+};
+
 enum cxgb4_state {
 	CXGB4_STATE_UP,
 	CXGB4_STATE_START_RECOVERY,
@@ -316,6 +331,7 @@ struct cxgb4_uld_info {
 	void *handle;
 	unsigned int nrxq;
 	unsigned int rxq_size;
+	unsigned int ntxq;
 	bool ciq;
 	bool lro;
 	void *(*add)(const struct cxgb4_lld_info *p);
@@ -333,6 +349,7 @@ struct cxgb4_uld_info {
 int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p);
 int cxgb4_unregister_uld(enum cxgb4_uld type);
 int cxgb4_ofld_send(struct net_device *dev, struct sk_buff *skb);
+int cxgb4_crypto_send(struct net_device *dev, struct sk_buff *skb);
 unsigned int cxgb4_dbfifo_count(const struct net_device *dev, int lpfifo);
 unsigned int cxgb4_port_chan(const struct net_device *dev);
 unsigned int cxgb4_port_viid(const struct net_device *dev);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index e19a0ca8e5dd..9f606478c29c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -377,8 +377,8 @@ unmap:			dma_unmap_page(dev, be64_to_cpu(p->addr[0]),
  *	Reclaims Tx descriptors from an SGE Tx queue and frees the associated
  *	Tx buffers.  Called with the Tx queue lock held.
  */
-static void free_tx_desc(struct adapter *adap, struct sge_txq *q,
-			 unsigned int n, bool unmap)
+void free_tx_desc(struct adapter *adap, struct sge_txq *q,
+		  unsigned int n, bool unmap)
 {
 	struct tx_sw_desc *d;
 	unsigned int cidx = q->cidx;
@@ -1543,7 +1543,7 @@ static inline unsigned int calc_tx_flits_ofld(const struct sk_buff *skb)
  *	inability to map packets.  A periodic timer attempts to restart
  *	queues so marked.
  */
-static void txq_stop_maperr(struct sge_ofld_txq *q)
+static void txq_stop_maperr(struct sge_uld_txq *q)
 {
 	q->mapping_err++;
 	q->q.stops++;
@@ -1559,7 +1559,7 @@ static void txq_stop_maperr(struct sge_ofld_txq *q)
  *	Stops an offload Tx queue that has become full and modifies the packet
  *	being written to request a wakeup.
  */
-static void ofldtxq_stop(struct sge_ofld_txq *q, struct sk_buff *skb)
+static void ofldtxq_stop(struct sge_uld_txq *q, struct sk_buff *skb)
 {
 	struct fw_wr_hdr *wr = (struct fw_wr_hdr *)skb->data;
 
@@ -1586,7 +1586,7 @@ static void ofldtxq_stop(struct sge_ofld_txq *q, struct sk_buff *skb)
  *	boolean "service_ofldq_running" to make sure that only one instance
  *	is ever running at a time ...
  */
-static void service_ofldq(struct sge_ofld_txq *q)
+static void service_ofldq(struct sge_uld_txq *q)
 {
 	u64 *pos, *before, *end;
 	int credits;
@@ -1706,7 +1706,7 @@ static void service_ofldq(struct sge_ofld_txq *q)
  *
  *	Send an offload packet through an SGE offload queue.
  */
-static int ofld_xmit(struct sge_ofld_txq *q, struct sk_buff *skb)
+static int ofld_xmit(struct sge_uld_txq *q, struct sk_buff *skb)
 {
 	skb->priority = calc_tx_flits_ofld(skb);       /* save for restart */
 	spin_lock(&q->sendq.lock);
@@ -1735,7 +1735,7 @@ static int ofld_xmit(struct sge_ofld_txq *q, struct sk_buff *skb)
  */
 static void restart_ofldq(unsigned long data)
 {
-	struct sge_ofld_txq *q = (struct sge_ofld_txq *)data;
+	struct sge_uld_txq *q = (struct sge_uld_txq *)data;
 
 	spin_lock(&q->sendq.lock);
 	q->full = 0;            /* the queue actually is completely empty now */
@@ -1767,17 +1767,23 @@ static inline unsigned int is_ctrl_pkt(const struct sk_buff *skb)
 	return skb->queue_mapping & 1;
 }
 
-static inline int ofld_send(struct adapter *adap, struct sk_buff *skb)
+static inline int uld_send(struct adapter *adap, struct sk_buff *skb,
+			   unsigned int tx_uld_type)
 {
+	struct sge_uld_txq_info *txq_info;
+	struct sge_uld_txq *txq;
 	unsigned int idx = skb_txq(skb);
 
+	txq_info = adap->sge.uld_txq_info[tx_uld_type];
+	txq = &txq_info->uldtxq[idx];
+
 	if (unlikely(is_ctrl_pkt(skb))) {
 		/* Single ctrl queue is a requirement for LE workaround path */
 		if (adap->tids.nsftids)
 			idx = 0;
 		return ctrl_xmit(&adap->sge.ctrlq[idx], skb);
 	}
-	return ofld_xmit(&adap->sge.ofldtxq[idx], skb);
+	return ofld_xmit(txq, skb);
 }
 
 /**
@@ -1794,7 +1800,7 @@ int t4_ofld_send(struct adapter *adap, struct sk_buff *skb)
 	int ret;
 
 	local_bh_disable();
-	ret = ofld_send(adap, skb);
+	ret = uld_send(adap, skb, CXGB4_TX_OFLD);
 	local_bh_enable();
 	return ret;
 }
@@ -1813,6 +1819,39 @@ int cxgb4_ofld_send(struct net_device *dev, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(cxgb4_ofld_send);
 
+/**
+ *	t4_crypto_send - send crypto packet
+ *	@adap: the adapter
+ *	@skb: the packet
+ *
+ *	Sends crypto packet.  We use the packet queue_mapping to select the
+ *	appropriate Tx queue as follows: bit 0 indicates whether the packet
+ *	should be sent as regular or control, bits 1-15 select the queue.
+ */
+static int t4_crypto_send(struct adapter *adap, struct sk_buff *skb)
+{
+	int ret;
+
+	local_bh_disable();
+	ret = uld_send(adap, skb, CXGB4_TX_CRYPTO);
+	local_bh_enable();
+	return ret;
+}
+
+/**
+ *	cxgb4_crypto_send - send crypto packet
+ *	@dev: the net device
+ *	@skb: the packet
+ *
+ *	Sends crypto packet.  This is an exported version of @t4_crypto_send,
+ *	intended for ULDs.
+ */
+int cxgb4_crypto_send(struct net_device *dev, struct sk_buff *skb)
+{
+	return t4_crypto_send(netdev2adap(dev), skb);
+}
+EXPORT_SYMBOL(cxgb4_crypto_send);
+
 static inline void copy_frags(struct sk_buff *skb,
 			      const struct pkt_gl *gl, unsigned int offset)
 {
@@ -2479,7 +2518,7 @@ static void sge_tx_timer_cb(unsigned long data)
 	for (i = 0; i < BITS_TO_LONGS(s->egr_sz); i++)
 		for (m = s->txq_maperr[i]; m; m &= m - 1) {
 			unsigned long id = __ffs(m) + i * BITS_PER_LONG;
-			struct sge_ofld_txq *txq = s->egr_map[id];
+			struct sge_uld_txq *txq = s->egr_map[id];
 
 			clear_bit(id, s->txq_maperr);
 			tasklet_schedule(&txq->qresume_tsk);
@@ -2799,6 +2838,7 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq,
 		return ret;
 	}
 
+	txq->q.q_type = CXGB4_TXQ_ETH;
 	init_txq(adap, &txq->q, FW_EQ_ETH_CMD_EQID_G(ntohl(c.eqid_pkd)));
 	txq->txq = netdevq;
 	txq->tso = txq->tx_cso = txq->vlan_ins = 0;
@@ -2852,6 +2892,7 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq,
 		return ret;
 	}
 
+	txq->q.q_type = CXGB4_TXQ_CTRL;
 	init_txq(adap, &txq->q, FW_EQ_CTRL_CMD_EQID_G(ntohl(c.cmpliqid_eqid)));
 	txq->adap = adap;
 	skb_queue_head_init(&txq->sendq);
@@ -2872,13 +2913,15 @@ int t4_sge_mod_ctrl_txq(struct adapter *adap, unsigned int eqid,
 	return t4_set_params(adap, adap->mbox, adap->pf, 0, 1, &param, &val);
 }
 
-int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq,
-			  struct net_device *dev, unsigned int iqid)
+int t4_sge_alloc_uld_txq(struct adapter *adap, struct sge_uld_txq *txq,
+			 struct net_device *dev, unsigned int iqid,
+			 unsigned int uld_type)
 {
 	int ret, nentries;
 	struct fw_eq_ofld_cmd c;
 	struct sge *s = &adap->sge;
 	struct port_info *pi = netdev_priv(dev);
+	int cmd = FW_EQ_OFLD_CMD;
 
 	/* Add status entries */
 	nentries = txq->q.size + s->stat_len / sizeof(struct tx_desc);
@@ -2891,7 +2934,9 @@ int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq,
 		return -ENOMEM;
 
 	memset(&c, 0, sizeof(c));
-	c.op_to_vfn = htonl(FW_CMD_OP_V(FW_EQ_OFLD_CMD) | FW_CMD_REQUEST_F |
+	if (unlikely(uld_type == CXGB4_TX_CRYPTO))
+		cmd = FW_EQ_CTRL_CMD;
+	c.op_to_vfn = htonl(FW_CMD_OP_V(cmd) | FW_CMD_REQUEST_F |
 			    FW_CMD_WRITE_F | FW_CMD_EXEC_F |
 			    FW_EQ_OFLD_CMD_PFN_V(adap->pf) |
 			    FW_EQ_OFLD_CMD_VFN_V(0));
@@ -2919,6 +2964,7 @@ int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq,
 		return ret;
 	}
 
+	txq->q.q_type = CXGB4_TXQ_ULD;
 	init_txq(adap, &txq->q, FW_EQ_OFLD_CMD_EQID_G(ntohl(c.eqid_pkd)));
 	txq->adap = adap;
 	skb_queue_head_init(&txq->sendq);
@@ -2928,7 +2974,7 @@ int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq,
 	return 0;
 }
 
-static void free_txq(struct adapter *adap, struct sge_txq *q)
+void free_txq(struct adapter *adap, struct sge_txq *q)
 {
 	struct sge *s = &adap->sge;
 
@@ -3025,21 +3071,6 @@ void t4_free_sge_resources(struct adapter *adap)
 		}
 	}
 
-	/* clean up offload Tx queues */
-	for (i = 0; i < ARRAY_SIZE(adap->sge.ofldtxq); i++) {
-		struct sge_ofld_txq *q = &adap->sge.ofldtxq[i];
-
-		if (q->q.desc) {
-			tasklet_kill(&q->qresume_tsk);
-			t4_ofld_eq_free(adap, adap->mbox, adap->pf, 0,
-					q->q.cntxt_id);
-			free_tx_desc(adap, &q->q, q->q.in_use, false);
-			kfree(q->q.sdesc);
-			__skb_queue_purge(&q->sendq);
-			free_txq(adap, &q->q);
-		}
-	}
-
 	/* clean up control Tx queues */
 	for (i = 0; i < ARRAY_SIZE(adap->sge.ctrlq); i++) {
 		struct sge_ctrl_txq *cq = &adap->sge.ctrlq[i];
@@ -3092,12 +3123,34 @@ void t4_sge_stop(struct adapter *adap)
 	if (s->tx_timer.function)
 		del_timer_sync(&s->tx_timer);
 
-	for (i = 0; i < ARRAY_SIZE(s->ofldtxq); i++) {
-		struct sge_ofld_txq *q = &s->ofldtxq[i];
+	if (is_offload(adap)) {
+		struct sge_uld_txq_info *txq_info;
+
+		txq_info = adap->sge.uld_txq_info[CXGB4_TX_OFLD];
+		if (txq_info) {
+			struct sge_uld_txq *txq = txq_info->uldtxq;
 
-		if (q->q.desc)
-			tasklet_kill(&q->qresume_tsk);
+			for_each_ofldtxq(&adap->sge, i) {
+				if (txq->q.desc)
+					tasklet_kill(&txq->qresume_tsk);
+			}
+		}
 	}
+
+	if (is_pci_uld(adap)) {
+		struct sge_uld_txq_info *txq_info;
+
+		txq_info = adap->sge.uld_txq_info[CXGB4_TX_CRYPTO];
+		if (txq_info) {
+			struct sge_uld_txq *txq = txq_info->uldtxq;
+
+			for_each_ofldtxq(&adap->sge, i) {
+				if (txq->q.desc)
+					tasklet_kill(&txq->qresume_tsk);
+			}
+		}
+	}
+
 	for (i = 0; i < ARRAY_SIZE(s->ctrlq); i++) {
 		struct sge_ctrl_txq *cq = &s->ctrlq[i];
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 20dec85da63d..e8139514d32c 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -7851,7 +7851,6 @@ int t4_port_init(struct adapter *adap, int mbox, int pf, int vf)
 			return ret;
 
 		memcpy(adap->port[i]->dev_addr, addr, ETH_ALEN);
-		adap->port[i]->dev_port = j;
 		j++;
 	}
 	return 0;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
index df1573c4a659..ecf3ccc257bc 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_pci_id_tbl.h
@@ -168,6 +168,7 @@ CH_PCI_DEVICE_ID_TABLE_DEFINE_BEGIN
 	CH_PCI_ID_TABLE_FENTRY(0x509a),	/* Custom T520-CR */
 	CH_PCI_ID_TABLE_FENTRY(0x509b),	/* Custom T540-CR LOM */
 	CH_PCI_ID_TABLE_FENTRY(0x509c),	/* Custom T520-CR*/
+	CH_PCI_ID_TABLE_FENTRY(0x509d),	/* Custom T540-CR*/
 
 	/* T6 adapters:
 	 */
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index 100b2cc064a3..0d1a134c8174 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -70,13 +70,6 @@
 			 NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\
 			 NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR)
 
-static int dflt_msg_enable = DFLT_MSG_ENABLE;
-
-module_param(dflt_msg_enable, int, 0644);
-MODULE_PARM_DESC(dflt_msg_enable,
-		 "default adapter ethtool message level bitmap, "
-		 "deprecated parameter");
-
 /*
  * The driver uses the best interrupt scheme available on a platform in the
  * order MSI-X then MSI.  This parameter determines which of these schemes the
@@ -1108,10 +1101,6 @@ static int cxgb4vf_change_mtu(struct net_device *dev, int new_mtu)
 	int ret;
 	struct port_info *pi = netdev_priv(dev);
 
-	/* accommodate SACK */
-	if (new_mtu < 81)
-		return -EINVAL;
-
 	ret = t4vf_set_rxmode(pi->adapter, pi->viid, new_mtu,
 			      -1, -1, -1, -1, true);
 	if (!ret)
@@ -2895,7 +2884,7 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev,
 	 * Initialize adapter level features.
 	 */
 	adapter->name = pci_name(pdev);
-	adapter->msg_enable = dflt_msg_enable;
+	adapter->msg_enable = DFLT_MSG_ENABLE;
 	err = adap_init0(adapter);
 	if (err)
 		goto err_unmap_bar;
@@ -2966,9 +2955,12 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev,
 			netdev->features |= NETIF_F_HIGHDMA;
 
 		netdev->priv_flags |= IFF_UNICAST_FLT;
+		netdev->min_mtu = 81;
+		netdev->max_mtu = ETH_MAX_MTU;
 
 		netdev->netdev_ops = &cxgb4vf_netdev_ops;
 		netdev->ethtool_ops = &cxgb4vf_ethtool_ops;
+		netdev->dev_port = pi->port_id;
 
 		/*
 		 * Initialize the hardware/software state for the port.
diff --git a/drivers/net/ethernet/cirrus/cs89x0.c b/drivers/net/ethernet/cirrus/cs89x0.c
index c363b58552e9..3647b28e8de0 100644
--- a/drivers/net/ethernet/cirrus/cs89x0.c
+++ b/drivers/net/ethernet/cirrus/cs89x0.c
@@ -1266,7 +1266,6 @@ static const struct net_device_ops net_ops = {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= net_poll_controller,
 #endif
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 };
 
diff --git a/drivers/net/ethernet/cirrus/ep93xx_eth.c b/drivers/net/ethernet/cirrus/ep93xx_eth.c
index de9f7c97d916..a1de0d12927d 100644
--- a/drivers/net/ethernet/cirrus/ep93xx_eth.c
+++ b/drivers/net/ethernet/cirrus/ep93xx_eth.c
@@ -468,6 +468,9 @@ static void ep93xx_free_buffers(struct ep93xx_priv *ep)
 	struct device *dev = ep->dev->dev.parent;
 	int i;
 
+	if (!ep->descs)
+		return;
+
 	for (i = 0; i < RX_QUEUE_ENTRIES; i++) {
 		dma_addr_t d;
 
@@ -490,6 +493,7 @@ static void ep93xx_free_buffers(struct ep93xx_priv *ep)
 
 	dma_free_coherent(dev, sizeof(struct ep93xx_descs), ep->descs,
 							ep->descs_dma_addr);
+	ep->descs = NULL;
 }
 
 static int ep93xx_alloc_buffers(struct ep93xx_priv *ep)
@@ -749,7 +753,6 @@ static const struct net_device_ops ep93xx_netdev_ops = {
 	.ndo_start_xmit		= ep93xx_xmit,
 	.ndo_do_ioctl		= ep93xx_ioctl,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address	= eth_mac_addr,
 };
 
diff --git a/drivers/net/ethernet/cirrus/mac89x0.c b/drivers/net/ethernet/cirrus/mac89x0.c
index 07719676c305..b600fbbbf679 100644
--- a/drivers/net/ethernet/cirrus/mac89x0.c
+++ b/drivers/net/ethernet/cirrus/mac89x0.c
@@ -172,7 +172,6 @@ static const struct net_device_ops mac89x0_netdev_ops = {
 	.ndo_set_rx_mode	= set_multicast_list,
 	.ndo_set_mac_address	= set_mac_address,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 };
 
 /* Probe for the CS8900 card in slot E.  We won't bother looking
diff --git a/drivers/net/ethernet/cisco/enic/enic.h b/drivers/net/ethernet/cisco/enic/enic.h
index 130f910e4785..9023c858715d 100644
--- a/drivers/net/ethernet/cisco/enic/enic.h
+++ b/drivers/net/ethernet/cisco/enic/enic.h
@@ -33,7 +33,7 @@
 
 #define DRV_NAME		"enic"
 #define DRV_DESCRIPTION		"Cisco VIC Ethernet NIC Driver"
-#define DRV_VERSION		"2.3.0.20"
+#define DRV_VERSION		"2.3.0.31"
 #define DRV_COPYRIGHT		"Copyright 2008-2013 Cisco Systems, Inc"
 
 #define ENIC_BARS_MAX		6
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index 48f82ab6c25b..cdd7a1a59aa7 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -1166,12 +1166,18 @@ static void enic_rq_indicate_buf(struct vnic_rq *rq,
 		skb->protocol = eth_type_trans(skb, netdev);
 		skb_record_rx_queue(skb, q_number);
 		if (netdev->features & NETIF_F_RXHASH) {
-			skb_set_hash(skb, rss_hash,
-				     (rss_type &
-				      (NIC_CFG_RSS_HASH_TYPE_TCP_IPV6_EX |
-				       NIC_CFG_RSS_HASH_TYPE_TCP_IPV6 |
-				       NIC_CFG_RSS_HASH_TYPE_TCP_IPV4)) ?
-				     PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3);
+			switch (rss_type) {
+			case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv4:
+			case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6:
+			case CQ_ENET_RQ_DESC_RSS_TYPE_TCP_IPv6_EX:
+				skb_set_hash(skb, rss_hash, PKT_HASH_TYPE_L4);
+				break;
+			case CQ_ENET_RQ_DESC_RSS_TYPE_IPv4:
+			case CQ_ENET_RQ_DESC_RSS_TYPE_IPv6:
+			case CQ_ENET_RQ_DESC_RSS_TYPE_IPv6_EX:
+				skb_set_hash(skb, rss_hash, PKT_HASH_TYPE_L3);
+				break;
+			}
 		}
 
 		/* Hardware does not provide whole packet checksum. It only
@@ -1843,9 +1849,6 @@ static int enic_change_mtu(struct net_device *netdev, int new_mtu)
 	struct enic *enic = netdev_priv(netdev);
 	int running = netif_running(netdev);
 
-	if (new_mtu < ENIC_MIN_MTU || new_mtu > ENIC_MAX_MTU)
-		return -EINVAL;
-
 	if (enic_is_dynamic(enic) || enic_is_sriov_vf(enic))
 		return -EOPNOTSUPP;
 
@@ -2751,6 +2754,10 @@ static int enic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	netdev->priv_flags |= IFF_UNICAST_FLT;
 
+	/* MTU range: 68 - 9000 */
+	netdev->min_mtu = ENIC_MIN_MTU;
+	netdev->max_mtu = ENIC_MAX_MTU;
+
 	err = register_netdev(netdev);
 	if (err) {
 		dev_err(dev, "Cannot register net device, aborting\n");
diff --git a/drivers/net/ethernet/cisco/enic/enic_res.h b/drivers/net/ethernet/cisco/enic/enic_res.h
index 69f60afd6577..81f98a8b60e9 100644
--- a/drivers/net/ethernet/cisco/enic/enic_res.h
+++ b/drivers/net/ethernet/cisco/enic/enic_res.h
@@ -30,7 +30,7 @@
 #define ENIC_MIN_RQ_DESCS		64
 #define ENIC_MAX_RQ_DESCS		4096
 
-#define ENIC_MIN_MTU			68
+#define ENIC_MIN_MTU			ETH_MIN_MTU
 #define ENIC_MAX_MTU			9000
 
 #define ENIC_MULTICAST_PERFECT_FILTERS	32
diff --git a/drivers/net/ethernet/davicom/dm9000.c b/drivers/net/ethernet/davicom/dm9000.c
index f45385f5c6e5..f1a81c52afe3 100644
--- a/drivers/net/ethernet/davicom/dm9000.c
+++ b/drivers/net/ethernet/davicom/dm9000.c
@@ -1382,7 +1382,6 @@ static const struct net_device_ops dm9000_netdev_ops = {
 	.ndo_tx_timeout		= dm9000_timeout,
 	.ndo_set_rx_mode	= dm9000_hash_table,
 	.ndo_do_ioctl		= dm9000_ioctl,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_features	= dm9000_set_features,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
diff --git a/drivers/net/ethernet/dec/tulip/de2104x.c b/drivers/net/ethernet/dec/tulip/de2104x.c
index cadcee645f74..90c573b8ccaf 100644
--- a/drivers/net/ethernet/dec/tulip/de2104x.c
+++ b/drivers/net/ethernet/dec/tulip/de2104x.c
@@ -1956,7 +1956,6 @@ static const struct net_device_ops de_netdev_ops = {
 	.ndo_start_xmit		= de_start_xmit,
 	.ndo_get_stats		= de_get_stats,
 	.ndo_tx_timeout 	= de_tx_timeout,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 };
diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c
index 6620fc861c47..51fda3a6b13f 100644
--- a/drivers/net/ethernet/dec/tulip/de4x5.c
+++ b/drivers/net/ethernet/dec/tulip/de4x5.c
@@ -1085,7 +1085,6 @@ static const struct net_device_ops de4x5_netdev_ops = {
     .ndo_get_stats	= de4x5_get_stats,
     .ndo_set_rx_mode	= set_multicast_list,
     .ndo_do_ioctl	= de4x5_ioctl,
-    .ndo_change_mtu	= eth_change_mtu,
     .ndo_set_mac_address= eth_mac_addr,
     .ndo_validate_addr	= eth_validate_addr,
 };
diff --git a/drivers/net/ethernet/dec/tulip/dmfe.c b/drivers/net/ethernet/dec/tulip/dmfe.c
index 8ed0fd8b1dda..df4994919456 100644
--- a/drivers/net/ethernet/dec/tulip/dmfe.c
+++ b/drivers/net/ethernet/dec/tulip/dmfe.c
@@ -352,7 +352,6 @@ static const struct net_device_ops netdev_ops = {
 	.ndo_stop		= dmfe_stop,
 	.ndo_start_xmit		= dmfe_start_xmit,
 	.ndo_set_rx_mode	= dmfe_set_filter_mode,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/dec/tulip/tulip_core.c b/drivers/net/ethernet/dec/tulip/tulip_core.c
index bbde90bc74fe..5f1377449b8f 100644
--- a/drivers/net/ethernet/dec/tulip/tulip_core.c
+++ b/drivers/net/ethernet/dec/tulip/tulip_core.c
@@ -1282,7 +1282,6 @@ static const struct net_device_ops tulip_netdev_ops = {
 	.ndo_get_stats		= tulip_get_stats,
 	.ndo_do_ioctl 		= private_ioctl,
 	.ndo_set_rx_mode	= set_rx_mode,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/dec/tulip/uli526x.c b/drivers/net/ethernet/dec/tulip/uli526x.c
index e750b5ddc0fb..e1c4133b8787 100644
--- a/drivers/net/ethernet/dec/tulip/uli526x.c
+++ b/drivers/net/ethernet/dec/tulip/uli526x.c
@@ -269,7 +269,6 @@ static const struct net_device_ops netdev_ops = {
 	.ndo_stop		= uli526x_stop,
 	.ndo_start_xmit		= uli526x_start_xmit,
 	.ndo_set_rx_mode	= uli526x_set_filter_mode,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/dec/tulip/winbond-840.c b/drivers/net/ethernet/dec/tulip/winbond-840.c
index 1f62b9423851..feda96d585e7 100644
--- a/drivers/net/ethernet/dec/tulip/winbond-840.c
+++ b/drivers/net/ethernet/dec/tulip/winbond-840.c
@@ -353,7 +353,6 @@ static const struct net_device_ops netdev_ops = {
 	.ndo_set_rx_mode	= set_rx_mode,
 	.ndo_do_ioctl		= netdev_ioctl,
 	.ndo_tx_timeout		= tx_timeout,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 };
diff --git a/drivers/net/ethernet/dec/tulip/xircom_cb.c b/drivers/net/ethernet/dec/tulip/xircom_cb.c
index 0e721cedfa67..19e4ea15b504 100644
--- a/drivers/net/ethernet/dec/tulip/xircom_cb.c
+++ b/drivers/net/ethernet/dec/tulip/xircom_cb.c
@@ -174,7 +174,6 @@ static const struct net_device_ops netdev_ops = {
 	.ndo_open		= xircom_open,
 	.ndo_stop		= xircom_close,
 	.ndo_start_xmit		= xircom_start_xmit,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c
index 78f144696d6b..8c95a8a81e3c 100644
--- a/drivers/net/ethernet/dlink/dl2k.c
+++ b/drivers/net/ethernet/dlink/dl2k.c
@@ -76,7 +76,6 @@ static void rio_free_tx (struct net_device *dev, int irq);
 static void tx_error (struct net_device *dev, int tx_status);
 static int receive_packet (struct net_device *dev);
 static void rio_error (struct net_device *dev, int int_status);
-static int change_mtu (struct net_device *dev, int new_mtu);
 static void set_multicast (struct net_device *dev);
 static struct net_device_stats *get_stats (struct net_device *dev);
 static int clear_stats (struct net_device *dev);
@@ -106,7 +105,6 @@ static const struct net_device_ops netdev_ops = {
 	.ndo_set_rx_mode	= set_multicast,
 	.ndo_do_ioctl		= rio_ioctl,
 	.ndo_tx_timeout		= rio_tx_timeout,
-	.ndo_change_mtu		= change_mtu,
 };
 
 static int
@@ -230,6 +228,10 @@ rio_probe1 (struct pci_dev *pdev, const struct pci_device_id *ent)
 #if 0
 	dev->features = NETIF_F_IP_CSUM;
 #endif
+	/* MTU range: 68 - 1536 or 8000 */
+	dev->min_mtu = ETH_MIN_MTU;
+	dev->max_mtu = np->jumbo ? MAX_JUMBO : PACKET_SIZE;
+
 	pci_set_drvdata (pdev, dev);
 
 	ring_space = pci_alloc_consistent (pdev, TX_TOTAL_SIZE, &ring_dma);
@@ -1198,22 +1200,6 @@ clear_stats (struct net_device *dev)
 	return 0;
 }
 
-
-static int
-change_mtu (struct net_device *dev, int new_mtu)
-{
-	struct netdev_private *np = netdev_priv(dev);
-	int max = (np->jumbo) ? MAX_JUMBO : 1536;
-
-	if ((new_mtu < 68) || (new_mtu > max)) {
-		return -EINVAL;
-	}
-
-	dev->mtu = new_mtu;
-
-	return 0;
-}
-
 static void
 set_multicast (struct net_device *dev)
 {
diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c
index 79d80090eac8..eab36acfc0d1 100644
--- a/drivers/net/ethernet/dlink/sundance.c
+++ b/drivers/net/ethernet/dlink/sundance.c
@@ -580,6 +580,10 @@ static int sundance_probe1(struct pci_dev *pdev,
 	dev->ethtool_ops = &ethtool_ops;
 	dev->watchdog_timeo = TX_TIMEOUT;
 
+	/* MTU range: 68 - 8191 */
+	dev->min_mtu = ETH_MIN_MTU;
+	dev->max_mtu = 8191;
+
 	pci_set_drvdata(pdev, dev);
 
 	i = register_netdev(dev);
@@ -713,8 +717,6 @@ err_out_netdev:
 
 static int change_mtu(struct net_device *dev, int new_mtu)
 {
-	if ((new_mtu < 68) || (new_mtu > 8191)) /* Set by RxDMAFrameLen */
-		return -EINVAL;
 	if (netif_running(dev))
 		return -EBUSY;
 	dev->mtu = new_mtu;
diff --git a/drivers/net/ethernet/dnet.c b/drivers/net/ethernet/dnet.c
index c3b64cdd0dec..2a17c59f69f9 100644
--- a/drivers/net/ethernet/dnet.c
+++ b/drivers/net/ethernet/dnet.c
@@ -767,7 +767,6 @@ static const struct net_device_ops dnet_netdev_ops = {
 	.ndo_do_ioctl		= dnet_ioctl,
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 };
 
 static int dnet_probe(struct platform_device *pdev)
diff --git a/drivers/net/ethernet/ec_bhf.c b/drivers/net/ethernet/ec_bhf.c
index f7b42483921c..57650953ff83 100644
--- a/drivers/net/ethernet/ec_bhf.c
+++ b/drivers/net/ethernet/ec_bhf.c
@@ -482,7 +482,6 @@ static const struct net_device_ops ec_bhf_netdev_ops = {
 	.ndo_open		= ec_bhf_open,
 	.ndo_stop		= ec_bhf_stop,
 	.ndo_get_stats64	= ec_bhf_get_stats,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr
 };
diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c
index 1fb5d7239254..0e74529a4209 100644
--- a/drivers/net/ethernet/emulex/benet/be_cmds.c
+++ b/drivers/net/ethernet/emulex/benet/be_cmds.c
@@ -90,7 +90,8 @@ static struct be_cmd_priv_map cmd_priv_map[] = {
 	{
 		OPCODE_COMMON_SET_HSW_CONFIG,
 		CMD_SUBSYSTEM_COMMON,
-		BE_PRIV_DEVCFG | BE_PRIV_VHADM
+		BE_PRIV_DEVCFG | BE_PRIV_VHADM |
+		BE_PRIV_DEVSEC
 	},
 	{
 		OPCODE_COMMON_GET_EXT_FAT_CAPABILITIES,
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 93aa2939142a..7e1633bf5a22 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -1406,23 +1406,6 @@ drop:
 	return NETDEV_TX_OK;
 }
 
-static int be_change_mtu(struct net_device *netdev, int new_mtu)
-{
-	struct be_adapter *adapter = netdev_priv(netdev);
-	struct device *dev = &adapter->pdev->dev;
-
-	if (new_mtu < BE_MIN_MTU || new_mtu > BE_MAX_MTU) {
-		dev_info(dev, "MTU must be between %d and %d bytes\n",
-			 BE_MIN_MTU, BE_MAX_MTU);
-		return -EINVAL;
-	}
-
-	dev_info(dev, "MTU changed from %d to %d bytes\n",
-		 netdev->mtu, new_mtu);
-	netdev->mtu = new_mtu;
-	return 0;
-}
-
 static inline bool be_in_all_promisc(struct be_adapter *adapter)
 {
 	return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
@@ -5215,7 +5198,6 @@ static const struct net_device_ops be_netdev_ops = {
 	.ndo_start_xmit		= be_xmit,
 	.ndo_set_rx_mode	= be_set_rx_mode,
 	.ndo_set_mac_address	= be_mac_addr_set,
-	.ndo_change_mtu		= be_change_mtu,
 	.ndo_get_stats64	= be_get_stats64,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_vlan_rx_add_vid	= be_vlan_add_vid,
@@ -5265,6 +5247,10 @@ static void be_netdev_init(struct net_device *netdev)
 	netdev->netdev_ops = &be_netdev_ops;
 
 	netdev->ethtool_ops = &be_ethtool_ops;
+
+	/* MTU range: 256 - 9000 */
+	netdev->min_mtu = BE_MIN_MTU;
+	netdev->max_mtu = BE_MAX_MTU;
 }
 
 static void be_cleanup(struct be_adapter *adapter)
diff --git a/drivers/net/ethernet/ethoc.c b/drivers/net/ethernet/ethoc.c
index c044667a0a25..45abc81f6f55 100644
--- a/drivers/net/ethernet/ethoc.c
+++ b/drivers/net/ethernet/ethoc.c
@@ -23,6 +23,7 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/of.h>
+#include <linux/of_net.h>
 #include <linux/module.h>
 #include <net/ethoc.h>
 
@@ -221,6 +222,9 @@ struct ethoc {
 	struct mii_bus *mdio;
 	struct clk *clk;
 	s8 phy_id;
+
+	int old_link;
+	int old_duplex;
 };
 
 /**
@@ -572,7 +576,7 @@ static irqreturn_t ethoc_interrupt(int irq, void *dev_id)
 
 	/* We always handle the dropped packet interrupt */
 	if (pending & INT_MASK_BUSY) {
-		dev_err(&dev->dev, "packet dropped\n");
+		dev_dbg(&dev->dev, "packet dropped\n");
 		dev->stats.rx_dropped++;
 	}
 
@@ -667,6 +671,32 @@ static int ethoc_mdio_write(struct mii_bus *bus, int phy, int reg, u16 val)
 
 static void ethoc_mdio_poll(struct net_device *dev)
 {
+	struct ethoc *priv = netdev_priv(dev);
+	struct phy_device *phydev = dev->phydev;
+	bool changed = false;
+	u32 mode;
+
+	if (priv->old_link != phydev->link) {
+		changed = true;
+		priv->old_link = phydev->link;
+	}
+
+	if (priv->old_duplex != phydev->duplex) {
+		changed = true;
+		priv->old_duplex = phydev->duplex;
+	}
+
+	if (!changed)
+		return;
+
+	mode = ethoc_read(priv, MODER);
+	if (phydev->duplex == DUPLEX_FULL)
+		mode |= MODER_FULLD;
+	else
+		mode &= ~MODER_FULLD;
+	ethoc_write(priv, MODER, mode);
+
+	phy_print_status(phydev);
 }
 
 static int ethoc_mdio_probe(struct net_device *dev)
@@ -685,6 +715,9 @@ static int ethoc_mdio_probe(struct net_device *dev)
 		return -ENXIO;
 	}
 
+	priv->old_duplex = -1;
+	priv->old_link = -1;
+
 	err = phy_connect_direct(dev, phy, ethoc_mdio_poll,
 				 PHY_INTERFACE_MODE_GMII);
 	if (err) {
@@ -721,6 +754,9 @@ static int ethoc_open(struct net_device *dev)
 		netif_start_queue(dev);
 	}
 
+	priv->old_link = -1;
+	priv->old_duplex = -1;
+
 	phy_start(dev->phydev);
 	napi_enable(&priv->napi);
 
@@ -966,6 +1002,7 @@ static int ethoc_set_ringparam(struct net_device *dev,
 const struct ethtool_ops ethoc_ethtool_ops = {
 	.get_regs_len = ethoc_get_regs_len,
 	.get_regs = ethoc_get_regs,
+	.nway_reset = phy_ethtool_nway_reset,
 	.get_link = ethtool_op_get_link,
 	.get_ringparam = ethoc_get_ringparam,
 	.set_ringparam = ethoc_set_ringparam,
@@ -1122,11 +1159,9 @@ static int ethoc_probe(struct platform_device *pdev)
 		memcpy(netdev->dev_addr, pdata->hwaddr, IFHWADDRLEN);
 		priv->phy_id = pdata->phy_id;
 	} else {
-		const uint8_t *mac;
+		const void *mac;
 
-		mac = of_get_property(pdev->dev.of_node,
-				      "local-mac-address",
-				      NULL);
+		mac = of_get_mac_address(pdev->dev.of_node);
 		if (mac)
 			memcpy(netdev->dev_addr, mac, IFHWADDRLEN);
 		priv->phy_id = -1;
diff --git a/drivers/net/ethernet/fealnx.c b/drivers/net/ethernet/fealnx.c
index c08bd763172a..6967b287b6e7 100644
--- a/drivers/net/ethernet/fealnx.c
+++ b/drivers/net/ethernet/fealnx.c
@@ -472,7 +472,6 @@ static const struct net_device_ops netdev_ops = {
 	.ndo_set_rx_mode	= set_rx_mode,
 	.ndo_do_ioctl		= mii_ioctl,
 	.ndo_tx_timeout		= fealnx_tx_timeout,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 };
diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig
index d1ca45fbb164..6e490fd2345d 100644
--- a/drivers/net/ethernet/freescale/Kconfig
+++ b/drivers/net/ethernet/freescale/Kconfig
@@ -8,7 +8,7 @@ config NET_VENDOR_FREESCALE
 	depends on FSL_SOC || QUICC_ENGINE || CPM1 || CPM2 || PPC_MPC512x || \
 		   M523x || M527x || M5272 || M528x || M520x || M532x || \
 		   ARCH_MXC || ARCH_MXS || (PPC_MPC52xx && PPC_BESTCOMM) || \
-		   ARCH_LAYERSCAPE
+		   ARCH_LAYERSCAPE || COMPILE_TEST
 	---help---
 	  If you have a network (Ethernet) card belonging to this class, say Y.
 
@@ -25,7 +25,7 @@ config FEC
 		   ARCH_MXC || SOC_IMX28)
 	default ARCH_MXC || SOC_IMX28 if ARM
 	select PHYLIB
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	---help---
 	  Say Y here if you want to use the built-in 10/100 Fast ethernet
 	  controller on some Motorola ColdFire and Freescale i.MX processors.
@@ -65,6 +65,7 @@ config FSL_PQ_MDIO
 config FSL_XGMAC_MDIO
 	tristate "Freescale XGMAC MDIO"
 	select PHYLIB
+	depends on OF
 	select OF_MDIO
 	---help---
 	  This driver supports the MDIO bus on the Fman 10G Ethernet MACs, and
@@ -85,6 +86,7 @@ config UGETH_TX_ON_DEMAND
 
 config GIANFAR
 	tristate "Gianfar Ethernet"
+	depends on HAS_DMA
 	select FSL_PQ_MDIO
 	select PHYLIB
 	select CRC32
@@ -93,4 +95,6 @@ config GIANFAR
 	  and MPC86xx family of chips, the eTSEC on LS1021A and the FEC
 	  on the 8540.
 
+source "drivers/net/ethernet/freescale/dpaa/Kconfig"
+
 endif # NET_VENDOR_FREESCALE
diff --git a/drivers/net/ethernet/freescale/Makefile b/drivers/net/ethernet/freescale/Makefile
index cbe21dc7e37e..4a13115155c9 100644
--- a/drivers/net/ethernet/freescale/Makefile
+++ b/drivers/net/ethernet/freescale/Makefile
@@ -22,3 +22,4 @@ obj-$(CONFIG_UCC_GETH) += ucc_geth_driver.o
 ucc_geth_driver-objs := ucc_geth.o ucc_geth_ethtool.o
 
 obj-$(CONFIG_FSL_FMAN) += fman/
+obj-$(CONFIG_FSL_DPAA_ETH) += dpaa/
diff --git a/drivers/net/ethernet/freescale/dpaa/Kconfig b/drivers/net/ethernet/freescale/dpaa/Kconfig
new file mode 100644
index 000000000000..f3a3454805f9
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa/Kconfig
@@ -0,0 +1,10 @@
+menuconfig FSL_DPAA_ETH
+	tristate "DPAA Ethernet"
+	depends on FSL_SOC && FSL_DPAA && FSL_FMAN
+	select PHYLIB
+	select FSL_FMAN_MAC
+	---help---
+	  Data Path Acceleration Architecture Ethernet driver,
+	  supporting the Freescale QorIQ chips.
+	  Depends on Freescale Buffer Manager and Queue Manager
+	  driver and Frame Manager Driver.
diff --git a/drivers/net/ethernet/freescale/dpaa/Makefile b/drivers/net/ethernet/freescale/dpaa/Makefile
new file mode 100644
index 000000000000..7db50bccb137
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa/Makefile
@@ -0,0 +1,12 @@
+#
+# Makefile for the Freescale DPAA Ethernet controllers
+#
+
+# Include FMan headers
+FMAN        = $(srctree)/drivers/net/ethernet/freescale/fman
+ccflags-y += -I$(FMAN)
+
+obj-$(CONFIG_FSL_DPAA_ETH) += fsl_dpa.o
+
+fsl_dpa-objs += dpaa_eth.o dpaa_ethtool.o dpaa_eth_sysfs.o
+CFLAGS_dpaa_eth.o := -I$(src)
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
new file mode 100644
index 000000000000..3c48a84dec86
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -0,0 +1,2753 @@
+/* Copyright 2008 - 2016 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+#include <linux/io.h>
+#include <linux/if_arp.h>
+#include <linux/if_vlan.h>
+#include <linux/icmp.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/net.h>
+#include <linux/skbuff.h>
+#include <linux/etherdevice.h>
+#include <linux/if_ether.h>
+#include <linux/highmem.h>
+#include <linux/percpu.h>
+#include <linux/dma-mapping.h>
+#include <linux/sort.h>
+#include <soc/fsl/bman.h>
+#include <soc/fsl/qman.h>
+
+#include "fman.h"
+#include "fman_port.h"
+#include "mac.h"
+#include "dpaa_eth.h"
+
+/* CREATE_TRACE_POINTS only needs to be defined once. Other dpaa files
+ * using trace events only need to #include <trace/events/sched.h>
+ */
+#define CREATE_TRACE_POINTS
+#include "dpaa_eth_trace.h"
+
+static int debug = -1;
+module_param(debug, int, 0444);
+MODULE_PARM_DESC(debug, "Module/Driver verbosity level (0=none,...,16=all)");
+
+static u16 tx_timeout = 1000;
+module_param(tx_timeout, ushort, 0444);
+MODULE_PARM_DESC(tx_timeout, "The Tx timeout in ms");
+
+#define FM_FD_STAT_RX_ERRORS						\
+	(FM_FD_ERR_DMA | FM_FD_ERR_PHYSICAL	| \
+	 FM_FD_ERR_SIZE | FM_FD_ERR_CLS_DISCARD | \
+	 FM_FD_ERR_EXTRACTION | FM_FD_ERR_NO_SCHEME	| \
+	 FM_FD_ERR_PRS_TIMEOUT | FM_FD_ERR_PRS_ILL_INSTRUCT | \
+	 FM_FD_ERR_PRS_HDR_ERR)
+
+#define FM_FD_STAT_TX_ERRORS \
+	(FM_FD_ERR_UNSUPPORTED_FORMAT | \
+	 FM_FD_ERR_LENGTH | FM_FD_ERR_DMA)
+
+#define DPAA_MSG_DEFAULT (NETIF_MSG_DRV | NETIF_MSG_PROBE | \
+			  NETIF_MSG_LINK | NETIF_MSG_IFUP | \
+			  NETIF_MSG_IFDOWN)
+
+#define DPAA_INGRESS_CS_THRESHOLD 0x10000000
+/* Ingress congestion threshold on FMan ports
+ * The size in bytes of the ingress tail-drop threshold on FMan ports.
+ * Traffic piling up above this value will be rejected by QMan and discarded
+ * by FMan.
+ */
+
+/* Size in bytes of the FQ taildrop threshold */
+#define DPAA_FQ_TD 0x200000
+
+#define DPAA_CS_THRESHOLD_1G 0x06000000
+/* Egress congestion threshold on 1G ports, range 0x1000 .. 0x10000000
+ * The size in bytes of the egress Congestion State notification threshold on
+ * 1G ports. The 1G dTSECs can quite easily be flooded by cores doing Tx in a
+ * tight loop (e.g. by sending UDP datagrams at "while(1) speed"),
+ * and the larger the frame size, the more acute the problem.
+ * So we have to find a balance between these factors:
+ * - avoiding the device staying congested for a prolonged time (risking
+ *   the netdev watchdog to fire - see also the tx_timeout module param);
+ * - affecting performance of protocols such as TCP, which otherwise
+ *   behave well under the congestion notification mechanism;
+ * - preventing the Tx cores from tightly-looping (as if the congestion
+ *   threshold was too low to be effective);
+ * - running out of memory if the CS threshold is set too high.
+ */
+
+#define DPAA_CS_THRESHOLD_10G 0x10000000
+/* The size in bytes of the egress Congestion State notification threshold on
+ * 10G ports, range 0x1000 .. 0x10000000
+ */
+
+/* Largest value that the FQD's OAL field can hold */
+#define FSL_QMAN_MAX_OAL	127
+
+/* Default alignment for start of data in an Rx FD */
+#define DPAA_FD_DATA_ALIGNMENT  16
+
+/* Values for the L3R field of the FM Parse Results
+ */
+/* L3 Type field: First IP Present IPv4 */
+#define FM_L3_PARSE_RESULT_IPV4	0x8000
+/* L3 Type field: First IP Present IPv6 */
+#define FM_L3_PARSE_RESULT_IPV6	0x4000
+/* Values for the L4R field of the FM Parse Results */
+/* L4 Type field: UDP */
+#define FM_L4_PARSE_RESULT_UDP	0x40
+/* L4 Type field: TCP */
+#define FM_L4_PARSE_RESULT_TCP	0x20
+
+#define DPAA_SGT_MAX_ENTRIES 16 /* maximum number of entries in SG Table */
+#define DPAA_BUFF_RELEASE_MAX 8 /* maximum number of buffers released at once */
+
+#define FSL_DPAA_BPID_INV		0xff
+#define FSL_DPAA_ETH_MAX_BUF_COUNT	128
+#define FSL_DPAA_ETH_REFILL_THRESHOLD	80
+
+#define DPAA_TX_PRIV_DATA_SIZE	16
+#define DPAA_PARSE_RESULTS_SIZE sizeof(struct fman_prs_result)
+#define DPAA_TIME_STAMP_SIZE 8
+#define DPAA_HASH_RESULTS_SIZE 8
+#define DPAA_RX_PRIV_DATA_SIZE	(u16)(DPAA_TX_PRIV_DATA_SIZE + \
+					dpaa_rx_extra_headroom)
+
+#define DPAA_ETH_RX_QUEUES	128
+
+#define DPAA_ENQUEUE_RETRIES	100000
+
+enum port_type {RX, TX};
+
+struct fm_port_fqs {
+	struct dpaa_fq *tx_defq;
+	struct dpaa_fq *tx_errq;
+	struct dpaa_fq *rx_defq;
+	struct dpaa_fq *rx_errq;
+};
+
+/* All the dpa bps in use at any moment */
+static struct dpaa_bp *dpaa_bp_array[BM_MAX_NUM_OF_POOLS];
+
+/* The raw buffer size must be cacheline aligned */
+#define DPAA_BP_RAW_SIZE 4096
+/* When using more than one buffer pool, the raw sizes are as follows:
+ * 1 bp: 4KB
+ * 2 bp: 2KB, 4KB
+ * 3 bp: 1KB, 2KB, 4KB
+ * 4 bp: 1KB, 2KB, 4KB, 8KB
+ */
+static inline size_t bpool_buffer_raw_size(u8 index, u8 cnt)
+{
+	size_t res = DPAA_BP_RAW_SIZE / 4;
+	u8 i;
+
+	for (i = (cnt < 3) ? cnt : 3; i < 3 + index; i++)
+		res *= 2;
+	return res;
+}
+
+/* FMan-DMA requires 16-byte alignment for Rx buffers, but SKB_DATA_ALIGN is
+ * even stronger (SMP_CACHE_BYTES-aligned), so we just get away with that,
+ * via SKB_WITH_OVERHEAD(). We can't rely on netdev_alloc_frag() giving us
+ * half-page-aligned buffers, so we reserve some more space for start-of-buffer
+ * alignment.
+ */
+#define dpaa_bp_size(raw_size) SKB_WITH_OVERHEAD((raw_size) - SMP_CACHE_BYTES)
+
+static int dpaa_max_frm;
+
+static int dpaa_rx_extra_headroom;
+
+#define dpaa_get_max_mtu()	\
+	(dpaa_max_frm - (VLAN_ETH_HLEN + ETH_FCS_LEN))
+
+static int dpaa_netdev_init(struct net_device *net_dev,
+			    const struct net_device_ops *dpaa_ops,
+			    u16 tx_timeout)
+{
+	struct dpaa_priv *priv = netdev_priv(net_dev);
+	struct device *dev = net_dev->dev.parent;
+	struct dpaa_percpu_priv *percpu_priv;
+	const u8 *mac_addr;
+	int i, err;
+
+	/* Although we access another CPU's private data here
+	 * we do it at initialization so it is safe
+	 */
+	for_each_possible_cpu(i) {
+		percpu_priv = per_cpu_ptr(priv->percpu_priv, i);
+		percpu_priv->net_dev = net_dev;
+	}
+
+	net_dev->netdev_ops = dpaa_ops;
+	mac_addr = priv->mac_dev->addr;
+
+	net_dev->mem_start = priv->mac_dev->res->start;
+	net_dev->mem_end = priv->mac_dev->res->end;
+
+	net_dev->min_mtu = ETH_MIN_MTU;
+	net_dev->max_mtu = dpaa_get_max_mtu();
+
+	net_dev->hw_features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
+				 NETIF_F_LLTX);
+
+	net_dev->hw_features |= NETIF_F_SG | NETIF_F_HIGHDMA;
+	/* The kernels enables GSO automatically, if we declare NETIF_F_SG.
+	 * For conformity, we'll still declare GSO explicitly.
+	 */
+	net_dev->features |= NETIF_F_GSO;
+
+	net_dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+	/* we do not want shared skbs on TX */
+	net_dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+
+	net_dev->features |= net_dev->hw_features;
+	net_dev->vlan_features = net_dev->features;
+
+	memcpy(net_dev->perm_addr, mac_addr, net_dev->addr_len);
+	memcpy(net_dev->dev_addr, mac_addr, net_dev->addr_len);
+
+	net_dev->ethtool_ops = &dpaa_ethtool_ops;
+
+	net_dev->needed_headroom = priv->tx_headroom;
+	net_dev->watchdog_timeo = msecs_to_jiffies(tx_timeout);
+
+	/* start without the RUNNING flag, phylib controls it later */
+	netif_carrier_off(net_dev);
+
+	err = register_netdev(net_dev);
+	if (err < 0) {
+		dev_err(dev, "register_netdev() = %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
+static int dpaa_stop(struct net_device *net_dev)
+{
+	struct mac_device *mac_dev;
+	struct dpaa_priv *priv;
+	int i, err, error;
+
+	priv = netdev_priv(net_dev);
+	mac_dev = priv->mac_dev;
+
+	netif_tx_stop_all_queues(net_dev);
+	/* Allow the Fman (Tx) port to process in-flight frames before we
+	 * try switching it off.
+	 */
+	usleep_range(5000, 10000);
+
+	err = mac_dev->stop(mac_dev);
+	if (err < 0)
+		netif_err(priv, ifdown, net_dev, "mac_dev->stop() = %d\n",
+			  err);
+
+	for (i = 0; i < ARRAY_SIZE(mac_dev->port); i++) {
+		error = fman_port_disable(mac_dev->port[i]);
+		if (error)
+			err = error;
+	}
+
+	if (net_dev->phydev)
+		phy_disconnect(net_dev->phydev);
+	net_dev->phydev = NULL;
+
+	return err;
+}
+
+static void dpaa_tx_timeout(struct net_device *net_dev)
+{
+	struct dpaa_percpu_priv *percpu_priv;
+	const struct dpaa_priv	*priv;
+
+	priv = netdev_priv(net_dev);
+	percpu_priv = this_cpu_ptr(priv->percpu_priv);
+
+	netif_crit(priv, timer, net_dev, "Transmit timeout latency: %u ms\n",
+		   jiffies_to_msecs(jiffies - dev_trans_start(net_dev)));
+
+	percpu_priv->stats.tx_errors++;
+}
+
+/* Calculates the statistics for the given device by adding the statistics
+ * collected by each CPU.
+ */
+static struct rtnl_link_stats64 *dpaa_get_stats64(struct net_device *net_dev,
+						  struct rtnl_link_stats64 *s)
+{
+	int numstats = sizeof(struct rtnl_link_stats64) / sizeof(u64);
+	struct dpaa_priv *priv = netdev_priv(net_dev);
+	struct dpaa_percpu_priv *percpu_priv;
+	u64 *netstats = (u64 *)s;
+	u64 *cpustats;
+	int i, j;
+
+	for_each_possible_cpu(i) {
+		percpu_priv = per_cpu_ptr(priv->percpu_priv, i);
+
+		cpustats = (u64 *)&percpu_priv->stats;
+
+		/* add stats from all CPUs */
+		for (j = 0; j < numstats; j++)
+			netstats[j] += cpustats[j];
+	}
+
+	return s;
+}
+
+static struct mac_device *dpaa_mac_dev_get(struct platform_device *pdev)
+{
+	struct platform_device *of_dev;
+	struct dpaa_eth_data *eth_data;
+	struct device *dpaa_dev, *dev;
+	struct device_node *mac_node;
+	struct mac_device *mac_dev;
+
+	dpaa_dev = &pdev->dev;
+	eth_data = dpaa_dev->platform_data;
+	if (!eth_data)
+		return ERR_PTR(-ENODEV);
+
+	mac_node = eth_data->mac_node;
+
+	of_dev = of_find_device_by_node(mac_node);
+	if (!of_dev) {
+		dev_err(dpaa_dev, "of_find_device_by_node(%s) failed\n",
+			mac_node->full_name);
+		of_node_put(mac_node);
+		return ERR_PTR(-EINVAL);
+	}
+	of_node_put(mac_node);
+
+	dev = &of_dev->dev;
+
+	mac_dev = dev_get_drvdata(dev);
+	if (!mac_dev) {
+		dev_err(dpaa_dev, "dev_get_drvdata(%s) failed\n",
+			dev_name(dev));
+		return ERR_PTR(-EINVAL);
+	}
+
+	return mac_dev;
+}
+
+static int dpaa_set_mac_address(struct net_device *net_dev, void *addr)
+{
+	const struct dpaa_priv *priv;
+	struct mac_device *mac_dev;
+	struct sockaddr old_addr;
+	int err;
+
+	priv = netdev_priv(net_dev);
+
+	memcpy(old_addr.sa_data, net_dev->dev_addr,  ETH_ALEN);
+
+	err = eth_mac_addr(net_dev, addr);
+	if (err < 0) {
+		netif_err(priv, drv, net_dev, "eth_mac_addr() = %d\n", err);
+		return err;
+	}
+
+	mac_dev = priv->mac_dev;
+
+	err = mac_dev->change_addr(mac_dev->fman_mac,
+				   (enet_addr_t *)net_dev->dev_addr);
+	if (err < 0) {
+		netif_err(priv, drv, net_dev, "mac_dev->change_addr() = %d\n",
+			  err);
+		/* reverting to previous address */
+		eth_mac_addr(net_dev, &old_addr);
+
+		return err;
+	}
+
+	return 0;
+}
+
+static void dpaa_set_rx_mode(struct net_device *net_dev)
+{
+	const struct dpaa_priv	*priv;
+	int err;
+
+	priv = netdev_priv(net_dev);
+
+	if (!!(net_dev->flags & IFF_PROMISC) != priv->mac_dev->promisc) {
+		priv->mac_dev->promisc = !priv->mac_dev->promisc;
+		err = priv->mac_dev->set_promisc(priv->mac_dev->fman_mac,
+						 priv->mac_dev->promisc);
+		if (err < 0)
+			netif_err(priv, drv, net_dev,
+				  "mac_dev->set_promisc() = %d\n",
+				  err);
+	}
+
+	err = priv->mac_dev->set_multi(net_dev, priv->mac_dev);
+	if (err < 0)
+		netif_err(priv, drv, net_dev, "mac_dev->set_multi() = %d\n",
+			  err);
+}
+
+static struct dpaa_bp *dpaa_bpid2pool(int bpid)
+{
+	if (WARN_ON(bpid < 0 || bpid >= BM_MAX_NUM_OF_POOLS))
+		return NULL;
+
+	return dpaa_bp_array[bpid];
+}
+
+/* checks if this bpool is already allocated */
+static bool dpaa_bpid2pool_use(int bpid)
+{
+	if (dpaa_bpid2pool(bpid)) {
+		atomic_inc(&dpaa_bp_array[bpid]->refs);
+		return true;
+	}
+
+	return false;
+}
+
+/* called only once per bpid by dpaa_bp_alloc_pool() */
+static void dpaa_bpid2pool_map(int bpid, struct dpaa_bp *dpaa_bp)
+{
+	dpaa_bp_array[bpid] = dpaa_bp;
+	atomic_set(&dpaa_bp->refs, 1);
+}
+
+static int dpaa_bp_alloc_pool(struct dpaa_bp *dpaa_bp)
+{
+	int err;
+
+	if (dpaa_bp->size == 0 || dpaa_bp->config_count == 0) {
+		pr_err("%s: Buffer pool is not properly initialized! Missing size or initial number of buffers\n",
+		       __func__);
+		return -EINVAL;
+	}
+
+	/* If the pool is already specified, we only create one per bpid */
+	if (dpaa_bp->bpid != FSL_DPAA_BPID_INV &&
+	    dpaa_bpid2pool_use(dpaa_bp->bpid))
+		return 0;
+
+	if (dpaa_bp->bpid == FSL_DPAA_BPID_INV) {
+		dpaa_bp->pool = bman_new_pool();
+		if (!dpaa_bp->pool) {
+			pr_err("%s: bman_new_pool() failed\n",
+			       __func__);
+			return -ENODEV;
+		}
+
+		dpaa_bp->bpid = (u8)bman_get_bpid(dpaa_bp->pool);
+	}
+
+	if (dpaa_bp->seed_cb) {
+		err = dpaa_bp->seed_cb(dpaa_bp);
+		if (err)
+			goto pool_seed_failed;
+	}
+
+	dpaa_bpid2pool_map(dpaa_bp->bpid, dpaa_bp);
+
+	return 0;
+
+pool_seed_failed:
+	pr_err("%s: pool seeding failed\n", __func__);
+	bman_free_pool(dpaa_bp->pool);
+
+	return err;
+}
+
+/* remove and free all the buffers from the given buffer pool */
+static void dpaa_bp_drain(struct dpaa_bp *bp)
+{
+	u8 num = 8;
+	int ret;
+
+	do {
+		struct bm_buffer bmb[8];
+		int i;
+
+		ret = bman_acquire(bp->pool, bmb, num);
+		if (ret < 0) {
+			if (num == 8) {
+				/* we have less than 8 buffers left;
+				 * drain them one by one
+				 */
+				num = 1;
+				ret = 1;
+				continue;
+			} else {
+				/* Pool is fully drained */
+				break;
+			}
+		}
+
+		if (bp->free_buf_cb)
+			for (i = 0; i < num; i++)
+				bp->free_buf_cb(bp, &bmb[i]);
+	} while (ret > 0);
+}
+
+static void dpaa_bp_free(struct dpaa_bp *dpaa_bp)
+{
+	struct dpaa_bp *bp = dpaa_bpid2pool(dpaa_bp->bpid);
+
+	/* the mapping between bpid and dpaa_bp is done very late in the
+	 * allocation procedure; if something failed before the mapping, the bp
+	 * was not configured, therefore we don't need the below instructions
+	 */
+	if (!bp)
+		return;
+
+	if (!atomic_dec_and_test(&bp->refs))
+		return;
+
+	if (bp->free_buf_cb)
+		dpaa_bp_drain(bp);
+
+	dpaa_bp_array[bp->bpid] = NULL;
+	bman_free_pool(bp->pool);
+}
+
+static void dpaa_bps_free(struct dpaa_priv *priv)
+{
+	int i;
+
+	for (i = 0; i < DPAA_BPS_NUM; i++)
+		dpaa_bp_free(priv->dpaa_bps[i]);
+}
+
+/* Use multiple WQs for FQ assignment:
+ *	- Tx Confirmation queues go to WQ1.
+ *	- Rx Error and Tx Error queues go to WQ2 (giving them a better chance
+ *	  to be scheduled, in case there are many more FQs in WQ3).
+ *	- Rx Default and Tx queues go to WQ3 (no differentiation between
+ *	  Rx and Tx traffic).
+ * This ensures that Tx-confirmed buffers are timely released. In particular,
+ * it avoids congestion on the Tx Confirm FQs, which can pile up PFDRs if they
+ * are greatly outnumbered by other FQs in the system, while
+ * dequeue scheduling is round-robin.
+ */
+static inline void dpaa_assign_wq(struct dpaa_fq *fq)
+{
+	switch (fq->fq_type) {
+	case FQ_TYPE_TX_CONFIRM:
+	case FQ_TYPE_TX_CONF_MQ:
+		fq->wq = 1;
+		break;
+	case FQ_TYPE_RX_ERROR:
+	case FQ_TYPE_TX_ERROR:
+		fq->wq = 2;
+		break;
+	case FQ_TYPE_RX_DEFAULT:
+	case FQ_TYPE_TX:
+		fq->wq = 3;
+		break;
+	default:
+		WARN(1, "Invalid FQ type %d for FQID %d!\n",
+		     fq->fq_type, fq->fqid);
+	}
+}
+
+static struct dpaa_fq *dpaa_fq_alloc(struct device *dev,
+				     u32 start, u32 count,
+				     struct list_head *list,
+				     enum dpaa_fq_type fq_type)
+{
+	struct dpaa_fq *dpaa_fq;
+	int i;
+
+	dpaa_fq = devm_kzalloc(dev, sizeof(*dpaa_fq) * count,
+			       GFP_KERNEL);
+	if (!dpaa_fq)
+		return NULL;
+
+	for (i = 0; i < count; i++) {
+		dpaa_fq[i].fq_type = fq_type;
+		dpaa_fq[i].fqid = start ? start + i : 0;
+		list_add_tail(&dpaa_fq[i].list, list);
+	}
+
+	for (i = 0; i < count; i++)
+		dpaa_assign_wq(dpaa_fq + i);
+
+	return dpaa_fq;
+}
+
+static int dpaa_alloc_all_fqs(struct device *dev, struct list_head *list,
+			      struct fm_port_fqs *port_fqs)
+{
+	struct dpaa_fq *dpaa_fq;
+
+	dpaa_fq = dpaa_fq_alloc(dev, 0, 1, list, FQ_TYPE_RX_ERROR);
+	if (!dpaa_fq)
+		goto fq_alloc_failed;
+
+	port_fqs->rx_errq = &dpaa_fq[0];
+
+	dpaa_fq = dpaa_fq_alloc(dev, 0, 1, list, FQ_TYPE_RX_DEFAULT);
+	if (!dpaa_fq)
+		goto fq_alloc_failed;
+
+	port_fqs->rx_defq = &dpaa_fq[0];
+
+	if (!dpaa_fq_alloc(dev, 0, DPAA_ETH_TXQ_NUM, list, FQ_TYPE_TX_CONF_MQ))
+		goto fq_alloc_failed;
+
+	dpaa_fq = dpaa_fq_alloc(dev, 0, 1, list, FQ_TYPE_TX_ERROR);
+	if (!dpaa_fq)
+		goto fq_alloc_failed;
+
+	port_fqs->tx_errq = &dpaa_fq[0];
+
+	dpaa_fq = dpaa_fq_alloc(dev, 0, 1, list, FQ_TYPE_TX_CONFIRM);
+	if (!dpaa_fq)
+		goto fq_alloc_failed;
+
+	port_fqs->tx_defq = &dpaa_fq[0];
+
+	if (!dpaa_fq_alloc(dev, 0, DPAA_ETH_TXQ_NUM, list, FQ_TYPE_TX))
+		goto fq_alloc_failed;
+
+	return 0;
+
+fq_alloc_failed:
+	dev_err(dev, "dpaa_fq_alloc() failed\n");
+	return -ENOMEM;
+}
+
+static u32 rx_pool_channel;
+static DEFINE_SPINLOCK(rx_pool_channel_init);
+
+static int dpaa_get_channel(void)
+{
+	spin_lock(&rx_pool_channel_init);
+	if (!rx_pool_channel) {
+		u32 pool;
+		int ret;
+
+		ret = qman_alloc_pool(&pool);
+
+		if (!ret)
+			rx_pool_channel = pool;
+	}
+	spin_unlock(&rx_pool_channel_init);
+	if (!rx_pool_channel)
+		return -ENOMEM;
+	return rx_pool_channel;
+}
+
+static void dpaa_release_channel(void)
+{
+	qman_release_pool(rx_pool_channel);
+}
+
+static void dpaa_eth_add_channel(u16 channel)
+{
+	u32 pool = QM_SDQCR_CHANNELS_POOL_CONV(channel);
+	const cpumask_t *cpus = qman_affine_cpus();
+	struct qman_portal *portal;
+	int cpu;
+
+	for_each_cpu(cpu, cpus) {
+		portal = qman_get_affine_portal(cpu);
+		qman_p_static_dequeue_add(portal, pool);
+	}
+}
+
+/* Congestion group state change notification callback.
+ * Stops the device's egress queues while they are congested and
+ * wakes them upon exiting congested state.
+ * Also updates some CGR-related stats.
+ */
+static void dpaa_eth_cgscn(struct qman_portal *qm, struct qman_cgr *cgr,
+			   int congested)
+{
+	struct dpaa_priv *priv = (struct dpaa_priv *)container_of(cgr,
+		struct dpaa_priv, cgr_data.cgr);
+
+	if (congested) {
+		priv->cgr_data.congestion_start_jiffies = jiffies;
+		netif_tx_stop_all_queues(priv->net_dev);
+		priv->cgr_data.cgr_congested_count++;
+	} else {
+		priv->cgr_data.congested_jiffies +=
+			(jiffies - priv->cgr_data.congestion_start_jiffies);
+		netif_tx_wake_all_queues(priv->net_dev);
+	}
+}
+
+static int dpaa_eth_cgr_init(struct dpaa_priv *priv)
+{
+	struct qm_mcc_initcgr initcgr;
+	u32 cs_th;
+	int err;
+
+	err = qman_alloc_cgrid(&priv->cgr_data.cgr.cgrid);
+	if (err < 0) {
+		if (netif_msg_drv(priv))
+			pr_err("%s: Error %d allocating CGR ID\n",
+			       __func__, err);
+		goto out_error;
+	}
+	priv->cgr_data.cgr.cb = dpaa_eth_cgscn;
+
+	/* Enable Congestion State Change Notifications and CS taildrop */
+	initcgr.we_mask = QM_CGR_WE_CSCN_EN | QM_CGR_WE_CS_THRES;
+	initcgr.cgr.cscn_en = QM_CGR_EN;
+
+	/* Set different thresholds based on the MAC speed.
+	 * This may turn suboptimal if the MAC is reconfigured at a speed
+	 * lower than its max, e.g. if a dTSEC later negotiates a 100Mbps link.
+	 * In such cases, we ought to reconfigure the threshold, too.
+	 */
+	if (priv->mac_dev->if_support & SUPPORTED_10000baseT_Full)
+		cs_th = DPAA_CS_THRESHOLD_10G;
+	else
+		cs_th = DPAA_CS_THRESHOLD_1G;
+	qm_cgr_cs_thres_set64(&initcgr.cgr.cs_thres, cs_th, 1);
+
+	initcgr.we_mask |= QM_CGR_WE_CSTD_EN;
+	initcgr.cgr.cstd_en = QM_CGR_EN;
+
+	err = qman_create_cgr(&priv->cgr_data.cgr, QMAN_CGR_FLAG_USE_INIT,
+			      &initcgr);
+	if (err < 0) {
+		if (netif_msg_drv(priv))
+			pr_err("%s: Error %d creating CGR with ID %d\n",
+			       __func__, err, priv->cgr_data.cgr.cgrid);
+		qman_release_cgrid(priv->cgr_data.cgr.cgrid);
+		goto out_error;
+	}
+	if (netif_msg_drv(priv))
+		pr_debug("Created CGR %d for netdev with hwaddr %pM on QMan channel %d\n",
+			 priv->cgr_data.cgr.cgrid, priv->mac_dev->addr,
+			 priv->cgr_data.cgr.chan);
+
+out_error:
+	return err;
+}
+
+static inline void dpaa_setup_ingress(const struct dpaa_priv *priv,
+				      struct dpaa_fq *fq,
+				      const struct qman_fq *template)
+{
+	fq->fq_base = *template;
+	fq->net_dev = priv->net_dev;
+
+	fq->flags = QMAN_FQ_FLAG_NO_ENQUEUE;
+	fq->channel = priv->channel;
+}
+
+static inline void dpaa_setup_egress(const struct dpaa_priv *priv,
+				     struct dpaa_fq *fq,
+				     struct fman_port *port,
+				     const struct qman_fq *template)
+{
+	fq->fq_base = *template;
+	fq->net_dev = priv->net_dev;
+
+	if (port) {
+		fq->flags = QMAN_FQ_FLAG_TO_DCPORTAL;
+		fq->channel = (u16)fman_port_get_qman_channel_id(port);
+	} else {
+		fq->flags = QMAN_FQ_FLAG_NO_MODIFY;
+	}
+}
+
+static void dpaa_fq_setup(struct dpaa_priv *priv,
+			  const struct dpaa_fq_cbs *fq_cbs,
+			  struct fman_port *tx_port)
+{
+	int egress_cnt = 0, conf_cnt = 0, num_portals = 0, cpu;
+	const cpumask_t *affine_cpus = qman_affine_cpus();
+	u16 portals[NR_CPUS];
+	struct dpaa_fq *fq;
+
+	for_each_cpu(cpu, affine_cpus)
+		portals[num_portals++] = qman_affine_channel(cpu);
+	if (num_portals == 0)
+		dev_err(priv->net_dev->dev.parent,
+			"No Qman software (affine) channels found");
+
+	/* Initialize each FQ in the list */
+	list_for_each_entry(fq, &priv->dpaa_fq_list, list) {
+		switch (fq->fq_type) {
+		case FQ_TYPE_RX_DEFAULT:
+			dpaa_setup_ingress(priv, fq, &fq_cbs->rx_defq);
+			break;
+		case FQ_TYPE_RX_ERROR:
+			dpaa_setup_ingress(priv, fq, &fq_cbs->rx_errq);
+			break;
+		case FQ_TYPE_TX:
+			dpaa_setup_egress(priv, fq, tx_port,
+					  &fq_cbs->egress_ern);
+			/* If we have more Tx queues than the number of cores,
+			 * just ignore the extra ones.
+			 */
+			if (egress_cnt < DPAA_ETH_TXQ_NUM)
+				priv->egress_fqs[egress_cnt++] = &fq->fq_base;
+			break;
+		case FQ_TYPE_TX_CONF_MQ:
+			priv->conf_fqs[conf_cnt++] = &fq->fq_base;
+			/* fall through */
+		case FQ_TYPE_TX_CONFIRM:
+			dpaa_setup_ingress(priv, fq, &fq_cbs->tx_defq);
+			break;
+		case FQ_TYPE_TX_ERROR:
+			dpaa_setup_ingress(priv, fq, &fq_cbs->tx_errq);
+			break;
+		default:
+			dev_warn(priv->net_dev->dev.parent,
+				 "Unknown FQ type detected!\n");
+			break;
+		}
+	}
+
+	 /* Make sure all CPUs receive a corresponding Tx queue. */
+	while (egress_cnt < DPAA_ETH_TXQ_NUM) {
+		list_for_each_entry(fq, &priv->dpaa_fq_list, list) {
+			if (fq->fq_type != FQ_TYPE_TX)
+				continue;
+			priv->egress_fqs[egress_cnt++] = &fq->fq_base;
+			if (egress_cnt == DPAA_ETH_TXQ_NUM)
+				break;
+		}
+	}
+}
+
+static inline int dpaa_tx_fq_to_id(const struct dpaa_priv *priv,
+				   struct qman_fq *tx_fq)
+{
+	int i;
+
+	for (i = 0; i < DPAA_ETH_TXQ_NUM; i++)
+		if (priv->egress_fqs[i] == tx_fq)
+			return i;
+
+	return -EINVAL;
+}
+
+static int dpaa_fq_init(struct dpaa_fq *dpaa_fq, bool td_enable)
+{
+	const struct dpaa_priv	*priv;
+	struct qman_fq *confq = NULL;
+	struct qm_mcc_initfq initfq;
+	struct device *dev;
+	struct qman_fq *fq;
+	int queue_id;
+	int err;
+
+	priv = netdev_priv(dpaa_fq->net_dev);
+	dev = dpaa_fq->net_dev->dev.parent;
+
+	if (dpaa_fq->fqid == 0)
+		dpaa_fq->flags |= QMAN_FQ_FLAG_DYNAMIC_FQID;
+
+	dpaa_fq->init = !(dpaa_fq->flags & QMAN_FQ_FLAG_NO_MODIFY);
+
+	err = qman_create_fq(dpaa_fq->fqid, dpaa_fq->flags, &dpaa_fq->fq_base);
+	if (err) {
+		dev_err(dev, "qman_create_fq() failed\n");
+		return err;
+	}
+	fq = &dpaa_fq->fq_base;
+
+	if (dpaa_fq->init) {
+		memset(&initfq, 0, sizeof(initfq));
+
+		initfq.we_mask = QM_INITFQ_WE_FQCTRL;
+		/* Note: we may get to keep an empty FQ in cache */
+		initfq.fqd.fq_ctrl = QM_FQCTRL_PREFERINCACHE;
+
+		/* Try to reduce the number of portal interrupts for
+		 * Tx Confirmation FQs.
+		 */
+		if (dpaa_fq->fq_type == FQ_TYPE_TX_CONFIRM)
+			initfq.fqd.fq_ctrl |= QM_FQCTRL_HOLDACTIVE;
+
+		/* FQ placement */
+		initfq.we_mask |= QM_INITFQ_WE_DESTWQ;
+
+		qm_fqd_set_destwq(&initfq.fqd, dpaa_fq->channel, dpaa_fq->wq);
+
+		/* Put all egress queues in a congestion group of their own.
+		 * Sensu stricto, the Tx confirmation queues are Rx FQs,
+		 * rather than Tx - but they nonetheless account for the
+		 * memory footprint on behalf of egress traffic. We therefore
+		 * place them in the netdev's CGR, along with the Tx FQs.
+		 */
+		if (dpaa_fq->fq_type == FQ_TYPE_TX ||
+		    dpaa_fq->fq_type == FQ_TYPE_TX_CONFIRM ||
+		    dpaa_fq->fq_type == FQ_TYPE_TX_CONF_MQ) {
+			initfq.we_mask |= QM_INITFQ_WE_CGID;
+			initfq.fqd.fq_ctrl |= QM_FQCTRL_CGE;
+			initfq.fqd.cgid = (u8)priv->cgr_data.cgr.cgrid;
+			/* Set a fixed overhead accounting, in an attempt to
+			 * reduce the impact of fixed-size skb shells and the
+			 * driver's needed headroom on system memory. This is
+			 * especially the case when the egress traffic is
+			 * composed of small datagrams.
+			 * Unfortunately, QMan's OAL value is capped to an
+			 * insufficient value, but even that is better than
+			 * no overhead accounting at all.
+			 */
+			initfq.we_mask |= QM_INITFQ_WE_OAC;
+			qm_fqd_set_oac(&initfq.fqd, QM_OAC_CG);
+			qm_fqd_set_oal(&initfq.fqd,
+				       min(sizeof(struct sk_buff) +
+				       priv->tx_headroom,
+				       (size_t)FSL_QMAN_MAX_OAL));
+		}
+
+		if (td_enable) {
+			initfq.we_mask |= QM_INITFQ_WE_TDTHRESH;
+			qm_fqd_set_taildrop(&initfq.fqd, DPAA_FQ_TD, 1);
+			initfq.fqd.fq_ctrl = QM_FQCTRL_TDE;
+		}
+
+		if (dpaa_fq->fq_type == FQ_TYPE_TX) {
+			queue_id = dpaa_tx_fq_to_id(priv, &dpaa_fq->fq_base);
+			if (queue_id >= 0)
+				confq = priv->conf_fqs[queue_id];
+			if (confq) {
+				initfq.we_mask |= QM_INITFQ_WE_CONTEXTA;
+			/* ContextA: OVOM=1(use contextA2 bits instead of ICAD)
+			 *	     A2V=1 (contextA A2 field is valid)
+			 *	     A0V=1 (contextA A0 field is valid)
+			 *	     B0V=1 (contextB field is valid)
+			 * ContextA A2: EBD=1 (deallocate buffers inside FMan)
+			 * ContextB B0(ASPID): 0 (absolute Virtual Storage ID)
+			 */
+				initfq.fqd.context_a.hi = 0x1e000000;
+				initfq.fqd.context_a.lo = 0x80000000;
+			}
+		}
+
+		/* Put all the ingress queues in our "ingress CGR". */
+		if (priv->use_ingress_cgr &&
+		    (dpaa_fq->fq_type == FQ_TYPE_RX_DEFAULT ||
+		     dpaa_fq->fq_type == FQ_TYPE_RX_ERROR)) {
+			initfq.we_mask |= QM_INITFQ_WE_CGID;
+			initfq.fqd.fq_ctrl |= QM_FQCTRL_CGE;
+			initfq.fqd.cgid = (u8)priv->ingress_cgr.cgrid;
+			/* Set a fixed overhead accounting, just like for the
+			 * egress CGR.
+			 */
+			initfq.we_mask |= QM_INITFQ_WE_OAC;
+			qm_fqd_set_oac(&initfq.fqd, QM_OAC_CG);
+			qm_fqd_set_oal(&initfq.fqd,
+				       min(sizeof(struct sk_buff) +
+				       priv->tx_headroom,
+				       (size_t)FSL_QMAN_MAX_OAL));
+		}
+
+		/* Initialization common to all ingress queues */
+		if (dpaa_fq->flags & QMAN_FQ_FLAG_NO_ENQUEUE) {
+			initfq.we_mask |= QM_INITFQ_WE_CONTEXTA;
+			initfq.fqd.fq_ctrl |=
+				QM_FQCTRL_HOLDACTIVE;
+			initfq.fqd.context_a.stashing.exclusive =
+				QM_STASHING_EXCL_DATA | QM_STASHING_EXCL_CTX |
+				QM_STASHING_EXCL_ANNOTATION;
+			qm_fqd_set_stashing(&initfq.fqd, 1, 2,
+					    DIV_ROUND_UP(sizeof(struct qman_fq),
+							 64));
+		}
+
+		err = qman_init_fq(fq, QMAN_INITFQ_FLAG_SCHED, &initfq);
+		if (err < 0) {
+			dev_err(dev, "qman_init_fq(%u) = %d\n",
+				qman_fq_fqid(fq), err);
+			qman_destroy_fq(fq);
+			return err;
+		}
+	}
+
+	dpaa_fq->fqid = qman_fq_fqid(fq);
+
+	return 0;
+}
+
+static int dpaa_fq_free_entry(struct device *dev, struct qman_fq *fq)
+{
+	const struct dpaa_priv  *priv;
+	struct dpaa_fq *dpaa_fq;
+	int err, error;
+
+	err = 0;
+
+	dpaa_fq = container_of(fq, struct dpaa_fq, fq_base);
+	priv = netdev_priv(dpaa_fq->net_dev);
+
+	if (dpaa_fq->init) {
+		err = qman_retire_fq(fq, NULL);
+		if (err < 0 && netif_msg_drv(priv))
+			dev_err(dev, "qman_retire_fq(%u) = %d\n",
+				qman_fq_fqid(fq), err);
+
+		error = qman_oos_fq(fq);
+		if (error < 0 && netif_msg_drv(priv)) {
+			dev_err(dev, "qman_oos_fq(%u) = %d\n",
+				qman_fq_fqid(fq), error);
+			if (err >= 0)
+				err = error;
+		}
+	}
+
+	qman_destroy_fq(fq);
+	list_del(&dpaa_fq->list);
+
+	return err;
+}
+
+static int dpaa_fq_free(struct device *dev, struct list_head *list)
+{
+	struct dpaa_fq *dpaa_fq, *tmp;
+	int err, error;
+
+	err = 0;
+	list_for_each_entry_safe(dpaa_fq, tmp, list, list) {
+		error = dpaa_fq_free_entry(dev, (struct qman_fq *)dpaa_fq);
+		if (error < 0 && err >= 0)
+			err = error;
+	}
+
+	return err;
+}
+
+static void dpaa_eth_init_tx_port(struct fman_port *port, struct dpaa_fq *errq,
+				  struct dpaa_fq *defq,
+				  struct dpaa_buffer_layout *buf_layout)
+{
+	struct fman_buffer_prefix_content buf_prefix_content;
+	struct fman_port_params params;
+	int err;
+
+	memset(&params, 0, sizeof(params));
+	memset(&buf_prefix_content, 0, sizeof(buf_prefix_content));
+
+	buf_prefix_content.priv_data_size = buf_layout->priv_data_size;
+	buf_prefix_content.pass_prs_result = true;
+	buf_prefix_content.pass_hash_result = true;
+	buf_prefix_content.pass_time_stamp = false;
+	buf_prefix_content.data_align = DPAA_FD_DATA_ALIGNMENT;
+
+	params.specific_params.non_rx_params.err_fqid = errq->fqid;
+	params.specific_params.non_rx_params.dflt_fqid = defq->fqid;
+
+	err = fman_port_config(port, &params);
+	if (err)
+		pr_err("%s: fman_port_config failed\n", __func__);
+
+	err = fman_port_cfg_buf_prefix_content(port, &buf_prefix_content);
+	if (err)
+		pr_err("%s: fman_port_cfg_buf_prefix_content failed\n",
+		       __func__);
+
+	err = fman_port_init(port);
+	if (err)
+		pr_err("%s: fm_port_init failed\n", __func__);
+}
+
+static void dpaa_eth_init_rx_port(struct fman_port *port, struct dpaa_bp **bps,
+				  size_t count, struct dpaa_fq *errq,
+				  struct dpaa_fq *defq,
+				  struct dpaa_buffer_layout *buf_layout)
+{
+	struct fman_buffer_prefix_content buf_prefix_content;
+	struct fman_port_rx_params *rx_p;
+	struct fman_port_params params;
+	int i, err;
+
+	memset(&params, 0, sizeof(params));
+	memset(&buf_prefix_content, 0, sizeof(buf_prefix_content));
+
+	buf_prefix_content.priv_data_size = buf_layout->priv_data_size;
+	buf_prefix_content.pass_prs_result = true;
+	buf_prefix_content.pass_hash_result = true;
+	buf_prefix_content.pass_time_stamp = false;
+	buf_prefix_content.data_align = DPAA_FD_DATA_ALIGNMENT;
+
+	rx_p = &params.specific_params.rx_params;
+	rx_p->err_fqid = errq->fqid;
+	rx_p->dflt_fqid = defq->fqid;
+
+	count = min(ARRAY_SIZE(rx_p->ext_buf_pools.ext_buf_pool), count);
+	rx_p->ext_buf_pools.num_of_pools_used = (u8)count;
+	for (i = 0; i < count; i++) {
+		rx_p->ext_buf_pools.ext_buf_pool[i].id =  bps[i]->bpid;
+		rx_p->ext_buf_pools.ext_buf_pool[i].size = (u16)bps[i]->size;
+	}
+
+	err = fman_port_config(port, &params);
+	if (err)
+		pr_err("%s: fman_port_config failed\n", __func__);
+
+	err = fman_port_cfg_buf_prefix_content(port, &buf_prefix_content);
+	if (err)
+		pr_err("%s: fman_port_cfg_buf_prefix_content failed\n",
+		       __func__);
+
+	err = fman_port_init(port);
+	if (err)
+		pr_err("%s: fm_port_init failed\n", __func__);
+}
+
+static void dpaa_eth_init_ports(struct mac_device *mac_dev,
+				struct dpaa_bp **bps, size_t count,
+				struct fm_port_fqs *port_fqs,
+				struct dpaa_buffer_layout *buf_layout,
+				struct device *dev)
+{
+	struct fman_port *rxport = mac_dev->port[RX];
+	struct fman_port *txport = mac_dev->port[TX];
+
+	dpaa_eth_init_tx_port(txport, port_fqs->tx_errq,
+			      port_fqs->tx_defq, &buf_layout[TX]);
+	dpaa_eth_init_rx_port(rxport, bps, count, port_fqs->rx_errq,
+			      port_fqs->rx_defq, &buf_layout[RX]);
+}
+
+static int dpaa_bman_release(const struct dpaa_bp *dpaa_bp,
+			     struct bm_buffer *bmb, int cnt)
+{
+	int err;
+
+	err = bman_release(dpaa_bp->pool, bmb, cnt);
+	/* Should never occur, address anyway to avoid leaking the buffers */
+	if (unlikely(WARN_ON(err)) && dpaa_bp->free_buf_cb)
+		while (cnt-- > 0)
+			dpaa_bp->free_buf_cb(dpaa_bp, &bmb[cnt]);
+
+	return cnt;
+}
+
+static void dpaa_release_sgt_members(struct qm_sg_entry *sgt)
+{
+	struct bm_buffer bmb[DPAA_BUFF_RELEASE_MAX];
+	struct dpaa_bp *dpaa_bp;
+	int i = 0, j;
+
+	memset(bmb, 0, sizeof(bmb));
+
+	do {
+		dpaa_bp = dpaa_bpid2pool(sgt[i].bpid);
+		if (!dpaa_bp)
+			return;
+
+		j = 0;
+		do {
+			WARN_ON(qm_sg_entry_is_ext(&sgt[i]));
+
+			bm_buffer_set64(&bmb[j], qm_sg_entry_get64(&sgt[i]));
+
+			j++; i++;
+		} while (j < ARRAY_SIZE(bmb) &&
+				!qm_sg_entry_is_final(&sgt[i - 1]) &&
+				sgt[i - 1].bpid == sgt[i].bpid);
+
+		dpaa_bman_release(dpaa_bp, bmb, j);
+	} while (!qm_sg_entry_is_final(&sgt[i - 1]));
+}
+
+static void dpaa_fd_release(const struct net_device *net_dev,
+			    const struct qm_fd *fd)
+{
+	struct qm_sg_entry *sgt;
+	struct dpaa_bp *dpaa_bp;
+	struct bm_buffer bmb;
+	dma_addr_t addr;
+	void *vaddr;
+
+	bmb.data = 0;
+	bm_buffer_set64(&bmb, qm_fd_addr(fd));
+
+	dpaa_bp = dpaa_bpid2pool(fd->bpid);
+	if (!dpaa_bp)
+		return;
+
+	if (qm_fd_get_format(fd) == qm_fd_sg) {
+		vaddr = phys_to_virt(qm_fd_addr(fd));
+		sgt = vaddr + qm_fd_get_offset(fd);
+
+		dma_unmap_single(dpaa_bp->dev, qm_fd_addr(fd), dpaa_bp->size,
+				 DMA_FROM_DEVICE);
+
+		dpaa_release_sgt_members(sgt);
+
+		addr = dma_map_single(dpaa_bp->dev, vaddr, dpaa_bp->size,
+				      DMA_FROM_DEVICE);
+		if (dma_mapping_error(dpaa_bp->dev, addr)) {
+			dev_err(dpaa_bp->dev, "DMA mapping failed");
+			return;
+		}
+		bm_buffer_set64(&bmb, addr);
+	}
+
+	dpaa_bman_release(dpaa_bp, &bmb, 1);
+}
+
+static void count_ern(struct dpaa_percpu_priv *percpu_priv,
+		      const union qm_mr_entry *msg)
+{
+	switch (msg->ern.rc & QM_MR_RC_MASK) {
+	case QM_MR_RC_CGR_TAILDROP:
+		percpu_priv->ern_cnt.cg_tdrop++;
+		break;
+	case QM_MR_RC_WRED:
+		percpu_priv->ern_cnt.wred++;
+		break;
+	case QM_MR_RC_ERROR:
+		percpu_priv->ern_cnt.err_cond++;
+		break;
+	case QM_MR_RC_ORPWINDOW_EARLY:
+		percpu_priv->ern_cnt.early_window++;
+		break;
+	case QM_MR_RC_ORPWINDOW_LATE:
+		percpu_priv->ern_cnt.late_window++;
+		break;
+	case QM_MR_RC_FQ_TAILDROP:
+		percpu_priv->ern_cnt.fq_tdrop++;
+		break;
+	case QM_MR_RC_ORPWINDOW_RETIRED:
+		percpu_priv->ern_cnt.fq_retired++;
+		break;
+	case QM_MR_RC_ORP_ZERO:
+		percpu_priv->ern_cnt.orp_zero++;
+		break;
+	}
+}
+
+/* Turn on HW checksum computation for this outgoing frame.
+ * If the current protocol is not something we support in this regard
+ * (or if the stack has already computed the SW checksum), we do nothing.
+ *
+ * Returns 0 if all goes well (or HW csum doesn't apply), and a negative value
+ * otherwise.
+ *
+ * Note that this function may modify the fd->cmd field and the skb data buffer
+ * (the Parse Results area).
+ */
+static int dpaa_enable_tx_csum(struct dpaa_priv *priv,
+			       struct sk_buff *skb,
+			       struct qm_fd *fd,
+			       char *parse_results)
+{
+	struct fman_prs_result *parse_result;
+	u16 ethertype = ntohs(skb->protocol);
+	struct ipv6hdr *ipv6h = NULL;
+	struct iphdr *iph;
+	int retval = 0;
+	u8 l4_proto;
+
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return 0;
+
+	/* Note: L3 csum seems to be already computed in sw, but we can't choose
+	 * L4 alone from the FM configuration anyway.
+	 */
+
+	/* Fill in some fields of the Parse Results array, so the FMan
+	 * can find them as if they came from the FMan Parser.
+	 */
+	parse_result = (struct fman_prs_result *)parse_results;
+
+	/* If we're dealing with VLAN, get the real Ethernet type */
+	if (ethertype == ETH_P_8021Q) {
+		/* We can't always assume the MAC header is set correctly
+		 * by the stack, so reset to beginning of skb->data
+		 */
+		skb_reset_mac_header(skb);
+		ethertype = ntohs(vlan_eth_hdr(skb)->h_vlan_encapsulated_proto);
+	}
+
+	/* Fill in the relevant L3 parse result fields
+	 * and read the L4 protocol type
+	 */
+	switch (ethertype) {
+	case ETH_P_IP:
+		parse_result->l3r = cpu_to_be16(FM_L3_PARSE_RESULT_IPV4);
+		iph = ip_hdr(skb);
+		WARN_ON(!iph);
+		l4_proto = iph->protocol;
+		break;
+	case ETH_P_IPV6:
+		parse_result->l3r = cpu_to_be16(FM_L3_PARSE_RESULT_IPV6);
+		ipv6h = ipv6_hdr(skb);
+		WARN_ON(!ipv6h);
+		l4_proto = ipv6h->nexthdr;
+		break;
+	default:
+		/* We shouldn't even be here */
+		if (net_ratelimit())
+			netif_alert(priv, tx_err, priv->net_dev,
+				    "Can't compute HW csum for L3 proto 0x%x\n",
+				    ntohs(skb->protocol));
+		retval = -EIO;
+		goto return_error;
+	}
+
+	/* Fill in the relevant L4 parse result fields */
+	switch (l4_proto) {
+	case IPPROTO_UDP:
+		parse_result->l4r = FM_L4_PARSE_RESULT_UDP;
+		break;
+	case IPPROTO_TCP:
+		parse_result->l4r = FM_L4_PARSE_RESULT_TCP;
+		break;
+	default:
+		if (net_ratelimit())
+			netif_alert(priv, tx_err, priv->net_dev,
+				    "Can't compute HW csum for L4 proto 0x%x\n",
+				    l4_proto);
+		retval = -EIO;
+		goto return_error;
+	}
+
+	/* At index 0 is IPOffset_1 as defined in the Parse Results */
+	parse_result->ip_off[0] = (u8)skb_network_offset(skb);
+	parse_result->l4_off = (u8)skb_transport_offset(skb);
+
+	/* Enable L3 (and L4, if TCP or UDP) HW checksum. */
+	fd->cmd |= FM_FD_CMD_RPD | FM_FD_CMD_DTC;
+
+	/* On P1023 and similar platforms fd->cmd interpretation could
+	 * be disabled by setting CONTEXT_A bit ICMD; currently this bit
+	 * is not set so we do not need to check; in the future, if/when
+	 * using context_a we need to check this bit
+	 */
+
+return_error:
+	return retval;
+}
+
+static int dpaa_bp_add_8_bufs(const struct dpaa_bp *dpaa_bp)
+{
+	struct device *dev = dpaa_bp->dev;
+	struct bm_buffer bmb[8];
+	dma_addr_t addr;
+	void *new_buf;
+	u8 i;
+
+	for (i = 0; i < 8; i++) {
+		new_buf = netdev_alloc_frag(dpaa_bp->raw_size);
+		if (unlikely(!new_buf)) {
+			dev_err(dev, "netdev_alloc_frag() failed, size %zu\n",
+				dpaa_bp->raw_size);
+			goto release_previous_buffs;
+		}
+		new_buf = PTR_ALIGN(new_buf, SMP_CACHE_BYTES);
+
+		addr = dma_map_single(dev, new_buf,
+				      dpaa_bp->size, DMA_FROM_DEVICE);
+		if (unlikely(dma_mapping_error(dev, addr))) {
+			dev_err(dpaa_bp->dev, "DMA map failed");
+			goto release_previous_buffs;
+		}
+
+		bmb[i].data = 0;
+		bm_buffer_set64(&bmb[i], addr);
+	}
+
+release_bufs:
+	return dpaa_bman_release(dpaa_bp, bmb, i);
+
+release_previous_buffs:
+	WARN_ONCE(1, "dpaa_eth: failed to add buffers on Rx\n");
+
+	bm_buffer_set64(&bmb[i], 0);
+	/* Avoid releasing a completely null buffer; bman_release() requires
+	 * at least one buffer.
+	 */
+	if (likely(i))
+		goto release_bufs;
+
+	return 0;
+}
+
+static int dpaa_bp_seed(struct dpaa_bp *dpaa_bp)
+{
+	int i;
+
+	/* Give each CPU an allotment of "config_count" buffers */
+	for_each_possible_cpu(i) {
+		int *count_ptr = per_cpu_ptr(dpaa_bp->percpu_count, i);
+		int j;
+
+		/* Although we access another CPU's counters here
+		 * we do it at boot time so it is safe
+		 */
+		for (j = 0; j < dpaa_bp->config_count; j += 8)
+			*count_ptr += dpaa_bp_add_8_bufs(dpaa_bp);
+	}
+	return 0;
+}
+
+/* Add buffers/(pages) for Rx processing whenever bpool count falls below
+ * REFILL_THRESHOLD.
+ */
+static int dpaa_eth_refill_bpool(struct dpaa_bp *dpaa_bp, int *countptr)
+{
+	int count = *countptr;
+	int new_bufs;
+
+	if (unlikely(count < FSL_DPAA_ETH_REFILL_THRESHOLD)) {
+		do {
+			new_bufs = dpaa_bp_add_8_bufs(dpaa_bp);
+			if (unlikely(!new_bufs)) {
+				/* Avoid looping forever if we've temporarily
+				 * run out of memory. We'll try again at the
+				 * next NAPI cycle.
+				 */
+				break;
+			}
+			count += new_bufs;
+		} while (count < FSL_DPAA_ETH_MAX_BUF_COUNT);
+
+		*countptr = count;
+		if (unlikely(count < FSL_DPAA_ETH_MAX_BUF_COUNT))
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int dpaa_eth_refill_bpools(struct dpaa_priv *priv)
+{
+	struct dpaa_bp *dpaa_bp;
+	int *countptr;
+	int res, i;
+
+	for (i = 0; i < DPAA_BPS_NUM; i++) {
+		dpaa_bp = priv->dpaa_bps[i];
+		if (!dpaa_bp)
+			return -EINVAL;
+		countptr = this_cpu_ptr(dpaa_bp->percpu_count);
+		res  = dpaa_eth_refill_bpool(dpaa_bp, countptr);
+		if (res)
+			return res;
+	}
+	return 0;
+}
+
+/* Cleanup function for outgoing frame descriptors that were built on Tx path,
+ * either contiguous frames or scatter/gather ones.
+ * Skb freeing is not handled here.
+ *
+ * This function may be called on error paths in the Tx function, so guard
+ * against cases when not all fd relevant fields were filled in.
+ *
+ * Return the skb backpointer, since for S/G frames the buffer containing it
+ * gets freed here.
+ */
+static struct sk_buff *dpaa_cleanup_tx_fd(const struct dpaa_priv *priv,
+					  const struct qm_fd *fd)
+{
+	const enum dma_data_direction dma_dir = DMA_TO_DEVICE;
+	struct device *dev = priv->net_dev->dev.parent;
+	dma_addr_t addr = qm_fd_addr(fd);
+	const struct qm_sg_entry *sgt;
+	struct sk_buff **skbh, *skb;
+	int nr_frags, i;
+
+	skbh = (struct sk_buff **)phys_to_virt(addr);
+	skb = *skbh;
+
+	if (unlikely(qm_fd_get_format(fd) == qm_fd_sg)) {
+		nr_frags = skb_shinfo(skb)->nr_frags;
+		dma_unmap_single(dev, addr, qm_fd_get_offset(fd) +
+				 sizeof(struct qm_sg_entry) * (1 + nr_frags),
+				 dma_dir);
+
+		/* The sgt buffer has been allocated with netdev_alloc_frag(),
+		 * it's from lowmem.
+		 */
+		sgt = phys_to_virt(addr + qm_fd_get_offset(fd));
+
+		/* sgt[0] is from lowmem, was dma_map_single()-ed */
+		dma_unmap_single(dev, qm_sg_addr(&sgt[0]),
+				 qm_sg_entry_get_len(&sgt[0]), dma_dir);
+
+		/* remaining pages were mapped with skb_frag_dma_map() */
+		for (i = 1; i < nr_frags; i++) {
+			WARN_ON(qm_sg_entry_is_ext(&sgt[i]));
+
+			dma_unmap_page(dev, qm_sg_addr(&sgt[i]),
+				       qm_sg_entry_get_len(&sgt[i]), dma_dir);
+		}
+
+		/* Free the page frag that we allocated on Tx */
+		skb_free_frag(phys_to_virt(addr));
+	} else {
+		dma_unmap_single(dev, addr,
+				 skb_tail_pointer(skb) - (u8 *)skbh, dma_dir);
+	}
+
+	return skb;
+}
+
+/* Build a linear skb around the received buffer.
+ * We are guaranteed there is enough room at the end of the data buffer to
+ * accommodate the shared info area of the skb.
+ */
+static struct sk_buff *contig_fd_to_skb(const struct dpaa_priv *priv,
+					const struct qm_fd *fd)
+{
+	ssize_t fd_off = qm_fd_get_offset(fd);
+	dma_addr_t addr = qm_fd_addr(fd);
+	struct dpaa_bp *dpaa_bp;
+	struct sk_buff *skb;
+	void *vaddr;
+
+	vaddr = phys_to_virt(addr);
+	WARN_ON(!IS_ALIGNED((unsigned long)vaddr, SMP_CACHE_BYTES));
+
+	dpaa_bp = dpaa_bpid2pool(fd->bpid);
+	if (!dpaa_bp)
+		goto free_buffer;
+
+	skb = build_skb(vaddr, dpaa_bp->size +
+			SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
+	if (unlikely(!skb)) {
+		WARN_ONCE(1, "Build skb failure on Rx\n");
+		goto free_buffer;
+	}
+	WARN_ON(fd_off != priv->rx_headroom);
+	skb_reserve(skb, fd_off);
+	skb_put(skb, qm_fd_get_length(fd));
+
+	skb->ip_summed = CHECKSUM_NONE;
+
+	return skb;
+
+free_buffer:
+	skb_free_frag(vaddr);
+	return NULL;
+}
+
+/* Build an skb with the data of the first S/G entry in the linear portion and
+ * the rest of the frame as skb fragments.
+ *
+ * The page fragment holding the S/G Table is recycled here.
+ */
+static struct sk_buff *sg_fd_to_skb(const struct dpaa_priv *priv,
+				    const struct qm_fd *fd)
+{
+	ssize_t fd_off = qm_fd_get_offset(fd);
+	dma_addr_t addr = qm_fd_addr(fd);
+	const struct qm_sg_entry *sgt;
+	struct page *page, *head_page;
+	struct dpaa_bp *dpaa_bp;
+	void *vaddr, *sg_vaddr;
+	int frag_off, frag_len;
+	struct sk_buff *skb;
+	dma_addr_t sg_addr;
+	int page_offset;
+	unsigned int sz;
+	int *count_ptr;
+	int i;
+
+	vaddr = phys_to_virt(addr);
+	WARN_ON(!IS_ALIGNED((unsigned long)vaddr, SMP_CACHE_BYTES));
+
+	/* Iterate through the SGT entries and add data buffers to the skb */
+	sgt = vaddr + fd_off;
+	for (i = 0; i < DPAA_SGT_MAX_ENTRIES; i++) {
+		/* Extension bit is not supported */
+		WARN_ON(qm_sg_entry_is_ext(&sgt[i]));
+
+		sg_addr = qm_sg_addr(&sgt[i]);
+		sg_vaddr = phys_to_virt(sg_addr);
+		WARN_ON(!IS_ALIGNED((unsigned long)sg_vaddr,
+				    SMP_CACHE_BYTES));
+
+		/* We may use multiple Rx pools */
+		dpaa_bp = dpaa_bpid2pool(sgt[i].bpid);
+		if (!dpaa_bp)
+			goto free_buffers;
+
+		count_ptr = this_cpu_ptr(dpaa_bp->percpu_count);
+		dma_unmap_single(dpaa_bp->dev, sg_addr, dpaa_bp->size,
+				 DMA_FROM_DEVICE);
+		if (i == 0) {
+			sz = dpaa_bp->size +
+				SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+			skb = build_skb(sg_vaddr, sz);
+			if (WARN_ON(unlikely(!skb)))
+				goto free_buffers;
+
+			skb->ip_summed = CHECKSUM_NONE;
+
+			/* Make sure forwarded skbs will have enough space
+			 * on Tx, if extra headers are added.
+			 */
+			WARN_ON(fd_off != priv->rx_headroom);
+			skb_reserve(skb, fd_off);
+			skb_put(skb, qm_sg_entry_get_len(&sgt[i]));
+		} else {
+			/* Not the first S/G entry; all data from buffer will
+			 * be added in an skb fragment; fragment index is offset
+			 * by one since first S/G entry was incorporated in the
+			 * linear part of the skb.
+			 *
+			 * Caution: 'page' may be a tail page.
+			 */
+			page = virt_to_page(sg_vaddr);
+			head_page = virt_to_head_page(sg_vaddr);
+
+			/* Compute offset in (possibly tail) page */
+			page_offset = ((unsigned long)sg_vaddr &
+					(PAGE_SIZE - 1)) +
+				(page_address(page) - page_address(head_page));
+			/* page_offset only refers to the beginning of sgt[i];
+			 * but the buffer itself may have an internal offset.
+			 */
+			frag_off = qm_sg_entry_get_off(&sgt[i]) + page_offset;
+			frag_len = qm_sg_entry_get_len(&sgt[i]);
+			/* skb_add_rx_frag() does no checking on the page; if
+			 * we pass it a tail page, we'll end up with
+			 * bad page accounting and eventually with segafults.
+			 */
+			skb_add_rx_frag(skb, i - 1, head_page, frag_off,
+					frag_len, dpaa_bp->size);
+		}
+		/* Update the pool count for the current {cpu x bpool} */
+		(*count_ptr)--;
+
+		if (qm_sg_entry_is_final(&sgt[i]))
+			break;
+	}
+	WARN_ONCE(i == DPAA_SGT_MAX_ENTRIES, "No final bit on SGT\n");
+
+	/* free the SG table buffer */
+	skb_free_frag(vaddr);
+
+	return skb;
+
+free_buffers:
+	/* compensate sw bpool counter changes */
+	for (i--; i > 0; i--) {
+		dpaa_bp = dpaa_bpid2pool(sgt[i].bpid);
+		if (dpaa_bp) {
+			count_ptr = this_cpu_ptr(dpaa_bp->percpu_count);
+			(*count_ptr)++;
+		}
+	}
+	/* free all the SG entries */
+	for (i = 0; i < DPAA_SGT_MAX_ENTRIES ; i++) {
+		sg_addr = qm_sg_addr(&sgt[i]);
+		sg_vaddr = phys_to_virt(sg_addr);
+		skb_free_frag(sg_vaddr);
+		dpaa_bp = dpaa_bpid2pool(sgt[i].bpid);
+		if (dpaa_bp) {
+			count_ptr = this_cpu_ptr(dpaa_bp->percpu_count);
+			(*count_ptr)--;
+		}
+
+		if (qm_sg_entry_is_final(&sgt[i]))
+			break;
+	}
+	/* free the SGT fragment */
+	skb_free_frag(vaddr);
+
+	return NULL;
+}
+
+static int skb_to_contig_fd(struct dpaa_priv *priv,
+			    struct sk_buff *skb, struct qm_fd *fd,
+			    int *offset)
+{
+	struct net_device *net_dev = priv->net_dev;
+	struct device *dev = net_dev->dev.parent;
+	enum dma_data_direction dma_dir;
+	unsigned char *buffer_start;
+	struct sk_buff **skbh;
+	dma_addr_t addr;
+	int err;
+
+	/* We are guaranteed to have at least tx_headroom bytes
+	 * available, so just use that for offset.
+	 */
+	fd->bpid = FSL_DPAA_BPID_INV;
+	buffer_start = skb->data - priv->tx_headroom;
+	dma_dir = DMA_TO_DEVICE;
+
+	skbh = (struct sk_buff **)buffer_start;
+	*skbh = skb;
+
+	/* Enable L3/L4 hardware checksum computation.
+	 *
+	 * We must do this before dma_map_single(DMA_TO_DEVICE), because we may
+	 * need to write into the skb.
+	 */
+	err = dpaa_enable_tx_csum(priv, skb, fd,
+				  ((char *)skbh) + DPAA_TX_PRIV_DATA_SIZE);
+	if (unlikely(err < 0)) {
+		if (net_ratelimit())
+			netif_err(priv, tx_err, net_dev, "HW csum error: %d\n",
+				  err);
+		return err;
+	}
+
+	/* Fill in the rest of the FD fields */
+	qm_fd_set_contig(fd, priv->tx_headroom, skb->len);
+	fd->cmd |= FM_FD_CMD_FCO;
+
+	/* Map the entire buffer size that may be seen by FMan, but no more */
+	addr = dma_map_single(dev, skbh,
+			      skb_tail_pointer(skb) - buffer_start, dma_dir);
+	if (unlikely(dma_mapping_error(dev, addr))) {
+		if (net_ratelimit())
+			netif_err(priv, tx_err, net_dev, "dma_map_single() failed\n");
+		return -EINVAL;
+	}
+	qm_fd_addr_set64(fd, addr);
+
+	return 0;
+}
+
+static int skb_to_sg_fd(struct dpaa_priv *priv,
+			struct sk_buff *skb, struct qm_fd *fd)
+{
+	const enum dma_data_direction dma_dir = DMA_TO_DEVICE;
+	const int nr_frags = skb_shinfo(skb)->nr_frags;
+	struct net_device *net_dev = priv->net_dev;
+	struct device *dev = net_dev->dev.parent;
+	struct qm_sg_entry *sgt;
+	struct sk_buff **skbh;
+	int i, j, err, sz;
+	void *buffer_start;
+	skb_frag_t *frag;
+	dma_addr_t addr;
+	size_t frag_len;
+	void *sgt_buf;
+
+	/* get a page frag to store the SGTable */
+	sz = SKB_DATA_ALIGN(priv->tx_headroom +
+		sizeof(struct qm_sg_entry) * (1 + nr_frags));
+	sgt_buf = netdev_alloc_frag(sz);
+	if (unlikely(!sgt_buf)) {
+		netdev_err(net_dev, "netdev_alloc_frag() failed for size %d\n",
+			   sz);
+		return -ENOMEM;
+	}
+
+	/* Enable L3/L4 hardware checksum computation.
+	 *
+	 * We must do this before dma_map_single(DMA_TO_DEVICE), because we may
+	 * need to write into the skb.
+	 */
+	err = dpaa_enable_tx_csum(priv, skb, fd,
+				  sgt_buf + DPAA_TX_PRIV_DATA_SIZE);
+	if (unlikely(err < 0)) {
+		if (net_ratelimit())
+			netif_err(priv, tx_err, net_dev, "HW csum error: %d\n",
+				  err);
+		goto csum_failed;
+	}
+
+	sgt = (struct qm_sg_entry *)(sgt_buf + priv->tx_headroom);
+	qm_sg_entry_set_len(&sgt[0], skb_headlen(skb));
+	sgt[0].bpid = FSL_DPAA_BPID_INV;
+	sgt[0].offset = 0;
+	addr = dma_map_single(dev, skb->data,
+			      skb_headlen(skb), dma_dir);
+	if (unlikely(dma_mapping_error(dev, addr))) {
+		dev_err(dev, "DMA mapping failed");
+		err = -EINVAL;
+		goto sg0_map_failed;
+	}
+	qm_sg_entry_set64(&sgt[0], addr);
+
+	/* populate the rest of SGT entries */
+	frag = &skb_shinfo(skb)->frags[0];
+	frag_len = frag->size;
+	for (i = 1; i <= nr_frags; i++, frag++) {
+		WARN_ON(!skb_frag_page(frag));
+		addr = skb_frag_dma_map(dev, frag, 0,
+					frag_len, dma_dir);
+		if (unlikely(dma_mapping_error(dev, addr))) {
+			dev_err(dev, "DMA mapping failed");
+			err = -EINVAL;
+			goto sg_map_failed;
+		}
+
+		qm_sg_entry_set_len(&sgt[i], frag_len);
+		sgt[i].bpid = FSL_DPAA_BPID_INV;
+		sgt[i].offset = 0;
+
+		/* keep the offset in the address */
+		qm_sg_entry_set64(&sgt[i], addr);
+		frag_len = frag->size;
+	}
+	qm_sg_entry_set_f(&sgt[i - 1], frag_len);
+
+	qm_fd_set_sg(fd, priv->tx_headroom, skb->len);
+
+	/* DMA map the SGT page */
+	buffer_start = (void *)sgt - priv->tx_headroom;
+	skbh = (struct sk_buff **)buffer_start;
+	*skbh = skb;
+
+	addr = dma_map_single(dev, buffer_start, priv->tx_headroom +
+			      sizeof(struct qm_sg_entry) * (1 + nr_frags),
+			      dma_dir);
+	if (unlikely(dma_mapping_error(dev, addr))) {
+		dev_err(dev, "DMA mapping failed");
+		err = -EINVAL;
+		goto sgt_map_failed;
+	}
+
+	fd->bpid = FSL_DPAA_BPID_INV;
+	fd->cmd |= FM_FD_CMD_FCO;
+	qm_fd_addr_set64(fd, addr);
+
+	return 0;
+
+sgt_map_failed:
+sg_map_failed:
+	for (j = 0; j < i; j++)
+		dma_unmap_page(dev, qm_sg_addr(&sgt[j]),
+			       qm_sg_entry_get_len(&sgt[j]), dma_dir);
+sg0_map_failed:
+csum_failed:
+	skb_free_frag(sgt_buf);
+
+	return err;
+}
+
+static inline int dpaa_xmit(struct dpaa_priv *priv,
+			    struct rtnl_link_stats64 *percpu_stats,
+			    int queue,
+			    struct qm_fd *fd)
+{
+	struct qman_fq *egress_fq;
+	int err, i;
+
+	egress_fq = priv->egress_fqs[queue];
+	if (fd->bpid == FSL_DPAA_BPID_INV)
+		fd->cmd |= qman_fq_fqid(priv->conf_fqs[queue]);
+
+	/* Trace this Tx fd */
+	trace_dpaa_tx_fd(priv->net_dev, egress_fq, fd);
+
+	for (i = 0; i < DPAA_ENQUEUE_RETRIES; i++) {
+		err = qman_enqueue(egress_fq, fd);
+		if (err != -EBUSY)
+			break;
+	}
+
+	if (unlikely(err < 0)) {
+		percpu_stats->tx_errors++;
+		percpu_stats->tx_fifo_errors++;
+		return err;
+	}
+
+	percpu_stats->tx_packets++;
+	percpu_stats->tx_bytes += qm_fd_get_length(fd);
+
+	return 0;
+}
+
+static int dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
+{
+	const int queue_mapping = skb_get_queue_mapping(skb);
+	bool nonlinear = skb_is_nonlinear(skb);
+	struct rtnl_link_stats64 *percpu_stats;
+	struct dpaa_percpu_priv *percpu_priv;
+	struct dpaa_priv *priv;
+	struct qm_fd fd;
+	int offset = 0;
+	int err = 0;
+
+	priv = netdev_priv(net_dev);
+	percpu_priv = this_cpu_ptr(priv->percpu_priv);
+	percpu_stats = &percpu_priv->stats;
+
+	qm_fd_clear_fd(&fd);
+
+	if (!nonlinear) {
+		/* We're going to store the skb backpointer at the beginning
+		 * of the data buffer, so we need a privately owned skb
+		 *
+		 * We've made sure skb is not shared in dev->priv_flags,
+		 * we need to verify the skb head is not cloned
+		 */
+		if (skb_cow_head(skb, priv->tx_headroom))
+			goto enomem;
+
+		WARN_ON(skb_is_nonlinear(skb));
+	}
+
+	/* MAX_SKB_FRAGS is equal or larger than our dpaa_SGT_MAX_ENTRIES;
+	 * make sure we don't feed FMan with more fragments than it supports.
+	 */
+	if (nonlinear &&
+	    likely(skb_shinfo(skb)->nr_frags < DPAA_SGT_MAX_ENTRIES)) {
+		/* Just create a S/G fd based on the skb */
+		err = skb_to_sg_fd(priv, skb, &fd);
+		percpu_priv->tx_frag_skbuffs++;
+	} else {
+		/* If the egress skb contains more fragments than we support
+		 * we have no choice but to linearize it ourselves.
+		 */
+		if (unlikely(nonlinear) && __skb_linearize(skb))
+			goto enomem;
+
+		/* Finally, create a contig FD from this skb */
+		err = skb_to_contig_fd(priv, skb, &fd, &offset);
+	}
+	if (unlikely(err < 0))
+		goto skb_to_fd_failed;
+
+	if (likely(dpaa_xmit(priv, percpu_stats, queue_mapping, &fd) == 0))
+		return NETDEV_TX_OK;
+
+	dpaa_cleanup_tx_fd(priv, &fd);
+skb_to_fd_failed:
+enomem:
+	percpu_stats->tx_errors++;
+	dev_kfree_skb(skb);
+	return NETDEV_TX_OK;
+}
+
+static void dpaa_rx_error(struct net_device *net_dev,
+			  const struct dpaa_priv *priv,
+			  struct dpaa_percpu_priv *percpu_priv,
+			  const struct qm_fd *fd,
+			  u32 fqid)
+{
+	if (net_ratelimit())
+		netif_err(priv, hw, net_dev, "Err FD status = 0x%08x\n",
+			  fd->status & FM_FD_STAT_RX_ERRORS);
+
+	percpu_priv->stats.rx_errors++;
+
+	if (fd->status & FM_FD_ERR_DMA)
+		percpu_priv->rx_errors.dme++;
+	if (fd->status & FM_FD_ERR_PHYSICAL)
+		percpu_priv->rx_errors.fpe++;
+	if (fd->status & FM_FD_ERR_SIZE)
+		percpu_priv->rx_errors.fse++;
+	if (fd->status & FM_FD_ERR_PRS_HDR_ERR)
+		percpu_priv->rx_errors.phe++;
+
+	dpaa_fd_release(net_dev, fd);
+}
+
+static void dpaa_tx_error(struct net_device *net_dev,
+			  const struct dpaa_priv *priv,
+			  struct dpaa_percpu_priv *percpu_priv,
+			  const struct qm_fd *fd,
+			  u32 fqid)
+{
+	struct sk_buff *skb;
+
+	if (net_ratelimit())
+		netif_warn(priv, hw, net_dev, "FD status = 0x%08x\n",
+			   fd->status & FM_FD_STAT_TX_ERRORS);
+
+	percpu_priv->stats.tx_errors++;
+
+	skb = dpaa_cleanup_tx_fd(priv, fd);
+	dev_kfree_skb(skb);
+}
+
+static int dpaa_eth_poll(struct napi_struct *napi, int budget)
+{
+	struct dpaa_napi_portal *np =
+			container_of(napi, struct dpaa_napi_portal, napi);
+
+	int cleaned = qman_p_poll_dqrr(np->p, budget);
+
+	if (cleaned < budget) {
+		napi_complete(napi);
+		qman_p_irqsource_add(np->p, QM_PIRQ_DQRI);
+
+	} else if (np->down) {
+		qman_p_irqsource_add(np->p, QM_PIRQ_DQRI);
+	}
+
+	return cleaned;
+}
+
+static void dpaa_tx_conf(struct net_device *net_dev,
+			 const struct dpaa_priv *priv,
+			 struct dpaa_percpu_priv *percpu_priv,
+			 const struct qm_fd *fd,
+			 u32 fqid)
+{
+	struct sk_buff	*skb;
+
+	if (unlikely(fd->status & FM_FD_STAT_TX_ERRORS) != 0) {
+		if (net_ratelimit())
+			netif_warn(priv, hw, net_dev, "FD status = 0x%08x\n",
+				   fd->status & FM_FD_STAT_TX_ERRORS);
+
+		percpu_priv->stats.tx_errors++;
+	}
+
+	percpu_priv->tx_confirm++;
+
+	skb = dpaa_cleanup_tx_fd(priv, fd);
+
+	consume_skb(skb);
+}
+
+static inline int dpaa_eth_napi_schedule(struct dpaa_percpu_priv *percpu_priv,
+					 struct qman_portal *portal)
+{
+	if (unlikely(in_irq() || !in_serving_softirq())) {
+		/* Disable QMan IRQ and invoke NAPI */
+		qman_p_irqsource_remove(portal, QM_PIRQ_DQRI);
+
+		percpu_priv->np.p = portal;
+		napi_schedule(&percpu_priv->np.napi);
+		percpu_priv->in_interrupt++;
+		return 1;
+	}
+	return 0;
+}
+
+static enum qman_cb_dqrr_result rx_error_dqrr(struct qman_portal *portal,
+					      struct qman_fq *fq,
+					      const struct qm_dqrr_entry *dq)
+{
+	struct dpaa_fq *dpaa_fq = container_of(fq, struct dpaa_fq, fq_base);
+	struct dpaa_percpu_priv *percpu_priv;
+	struct net_device *net_dev;
+	struct dpaa_bp *dpaa_bp;
+	struct dpaa_priv *priv;
+
+	net_dev = dpaa_fq->net_dev;
+	priv = netdev_priv(net_dev);
+	dpaa_bp = dpaa_bpid2pool(dq->fd.bpid);
+	if (!dpaa_bp)
+		return qman_cb_dqrr_consume;
+
+	percpu_priv = this_cpu_ptr(priv->percpu_priv);
+
+	if (dpaa_eth_napi_schedule(percpu_priv, portal))
+		return qman_cb_dqrr_stop;
+
+	if (dpaa_eth_refill_bpools(priv))
+		/* Unable to refill the buffer pool due to insufficient
+		 * system memory. Just release the frame back into the pool,
+		 * otherwise we'll soon end up with an empty buffer pool.
+		 */
+		dpaa_fd_release(net_dev, &dq->fd);
+	else
+		dpaa_rx_error(net_dev, priv, percpu_priv, &dq->fd, fq->fqid);
+
+	return qman_cb_dqrr_consume;
+}
+
+static enum qman_cb_dqrr_result rx_default_dqrr(struct qman_portal *portal,
+						struct qman_fq *fq,
+						const struct qm_dqrr_entry *dq)
+{
+	struct rtnl_link_stats64 *percpu_stats;
+	struct dpaa_percpu_priv *percpu_priv;
+	const struct qm_fd *fd = &dq->fd;
+	dma_addr_t addr = qm_fd_addr(fd);
+	enum qm_fd_format fd_format;
+	struct net_device *net_dev;
+	u32 fd_status = fd->status;
+	struct dpaa_bp *dpaa_bp;
+	struct dpaa_priv *priv;
+	unsigned int skb_len;
+	struct sk_buff *skb;
+	int *count_ptr;
+
+	net_dev = ((struct dpaa_fq *)fq)->net_dev;
+	priv = netdev_priv(net_dev);
+	dpaa_bp = dpaa_bpid2pool(dq->fd.bpid);
+	if (!dpaa_bp)
+		return qman_cb_dqrr_consume;
+
+	/* Trace the Rx fd */
+	trace_dpaa_rx_fd(net_dev, fq, &dq->fd);
+
+	percpu_priv = this_cpu_ptr(priv->percpu_priv);
+	percpu_stats = &percpu_priv->stats;
+
+	if (unlikely(dpaa_eth_napi_schedule(percpu_priv, portal)))
+		return qman_cb_dqrr_stop;
+
+	/* Make sure we didn't run out of buffers */
+	if (unlikely(dpaa_eth_refill_bpools(priv))) {
+		/* Unable to refill the buffer pool due to insufficient
+		 * system memory. Just release the frame back into the pool,
+		 * otherwise we'll soon end up with an empty buffer pool.
+		 */
+		dpaa_fd_release(net_dev, &dq->fd);
+		return qman_cb_dqrr_consume;
+	}
+
+	if (unlikely(fd_status & FM_FD_STAT_RX_ERRORS) != 0) {
+		if (net_ratelimit())
+			netif_warn(priv, hw, net_dev, "FD status = 0x%08x\n",
+				   fd_status & FM_FD_STAT_RX_ERRORS);
+
+		percpu_stats->rx_errors++;
+		dpaa_fd_release(net_dev, fd);
+		return qman_cb_dqrr_consume;
+	}
+
+	dpaa_bp = dpaa_bpid2pool(fd->bpid);
+	if (!dpaa_bp)
+		return qman_cb_dqrr_consume;
+
+	dma_unmap_single(dpaa_bp->dev, addr, dpaa_bp->size, DMA_FROM_DEVICE);
+
+	/* prefetch the first 64 bytes of the frame or the SGT start */
+	prefetch(phys_to_virt(addr) + qm_fd_get_offset(fd));
+
+	fd_format = qm_fd_get_format(fd);
+	/* The only FD types that we may receive are contig and S/G */
+	WARN_ON((fd_format != qm_fd_contig) && (fd_format != qm_fd_sg));
+
+	/* Account for either the contig buffer or the SGT buffer (depending on
+	 * which case we were in) having been removed from the pool.
+	 */
+	count_ptr = this_cpu_ptr(dpaa_bp->percpu_count);
+	(*count_ptr)--;
+
+	if (likely(fd_format == qm_fd_contig))
+		skb = contig_fd_to_skb(priv, fd);
+	else
+		skb = sg_fd_to_skb(priv, fd);
+	if (!skb)
+		return qman_cb_dqrr_consume;
+
+	skb->protocol = eth_type_trans(skb, net_dev);
+
+	skb_len = skb->len;
+
+	if (unlikely(netif_receive_skb(skb) == NET_RX_DROP))
+		return qman_cb_dqrr_consume;
+
+	percpu_stats->rx_packets++;
+	percpu_stats->rx_bytes += skb_len;
+
+	return qman_cb_dqrr_consume;
+}
+
+static enum qman_cb_dqrr_result conf_error_dqrr(struct qman_portal *portal,
+						struct qman_fq *fq,
+						const struct qm_dqrr_entry *dq)
+{
+	struct dpaa_percpu_priv *percpu_priv;
+	struct net_device *net_dev;
+	struct dpaa_priv *priv;
+
+	net_dev = ((struct dpaa_fq *)fq)->net_dev;
+	priv = netdev_priv(net_dev);
+
+	percpu_priv = this_cpu_ptr(priv->percpu_priv);
+
+	if (dpaa_eth_napi_schedule(percpu_priv, portal))
+		return qman_cb_dqrr_stop;
+
+	dpaa_tx_error(net_dev, priv, percpu_priv, &dq->fd, fq->fqid);
+
+	return qman_cb_dqrr_consume;
+}
+
+static enum qman_cb_dqrr_result conf_dflt_dqrr(struct qman_portal *portal,
+					       struct qman_fq *fq,
+					       const struct qm_dqrr_entry *dq)
+{
+	struct dpaa_percpu_priv *percpu_priv;
+	struct net_device *net_dev;
+	struct dpaa_priv *priv;
+
+	net_dev = ((struct dpaa_fq *)fq)->net_dev;
+	priv = netdev_priv(net_dev);
+
+	/* Trace the fd */
+	trace_dpaa_tx_conf_fd(net_dev, fq, &dq->fd);
+
+	percpu_priv = this_cpu_ptr(priv->percpu_priv);
+
+	if (dpaa_eth_napi_schedule(percpu_priv, portal))
+		return qman_cb_dqrr_stop;
+
+	dpaa_tx_conf(net_dev, priv, percpu_priv, &dq->fd, fq->fqid);
+
+	return qman_cb_dqrr_consume;
+}
+
+static void egress_ern(struct qman_portal *portal,
+		       struct qman_fq *fq,
+		       const union qm_mr_entry *msg)
+{
+	const struct qm_fd *fd = &msg->ern.fd;
+	struct dpaa_percpu_priv *percpu_priv;
+	const struct dpaa_priv *priv;
+	struct net_device *net_dev;
+	struct sk_buff *skb;
+
+	net_dev = ((struct dpaa_fq *)fq)->net_dev;
+	priv = netdev_priv(net_dev);
+	percpu_priv = this_cpu_ptr(priv->percpu_priv);
+
+	percpu_priv->stats.tx_dropped++;
+	percpu_priv->stats.tx_fifo_errors++;
+	count_ern(percpu_priv, msg);
+
+	skb = dpaa_cleanup_tx_fd(priv, fd);
+	dev_kfree_skb_any(skb);
+}
+
+static const struct dpaa_fq_cbs dpaa_fq_cbs = {
+	.rx_defq = { .cb = { .dqrr = rx_default_dqrr } },
+	.tx_defq = { .cb = { .dqrr = conf_dflt_dqrr } },
+	.rx_errq = { .cb = { .dqrr = rx_error_dqrr } },
+	.tx_errq = { .cb = { .dqrr = conf_error_dqrr } },
+	.egress_ern = { .cb = { .ern = egress_ern } }
+};
+
+static void dpaa_eth_napi_enable(struct dpaa_priv *priv)
+{
+	struct dpaa_percpu_priv *percpu_priv;
+	int i;
+
+	for_each_possible_cpu(i) {
+		percpu_priv = per_cpu_ptr(priv->percpu_priv, i);
+
+		percpu_priv->np.down = 0;
+		napi_enable(&percpu_priv->np.napi);
+	}
+}
+
+static void dpaa_eth_napi_disable(struct dpaa_priv *priv)
+{
+	struct dpaa_percpu_priv *percpu_priv;
+	int i;
+
+	for_each_possible_cpu(i) {
+		percpu_priv = per_cpu_ptr(priv->percpu_priv, i);
+
+		percpu_priv->np.down = 1;
+		napi_disable(&percpu_priv->np.napi);
+	}
+}
+
+static int dpaa_open(struct net_device *net_dev)
+{
+	struct mac_device *mac_dev;
+	struct dpaa_priv *priv;
+	int err, i;
+
+	priv = netdev_priv(net_dev);
+	mac_dev = priv->mac_dev;
+	dpaa_eth_napi_enable(priv);
+
+	net_dev->phydev = mac_dev->init_phy(net_dev, priv->mac_dev);
+	if (!net_dev->phydev) {
+		netif_err(priv, ifup, net_dev, "init_phy() failed\n");
+		return -ENODEV;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(mac_dev->port); i++) {
+		err = fman_port_enable(mac_dev->port[i]);
+		if (err)
+			goto mac_start_failed;
+	}
+
+	err = priv->mac_dev->start(mac_dev);
+	if (err < 0) {
+		netif_err(priv, ifup, net_dev, "mac_dev->start() = %d\n", err);
+		goto mac_start_failed;
+	}
+
+	netif_tx_start_all_queues(net_dev);
+
+	return 0;
+
+mac_start_failed:
+	for (i = 0; i < ARRAY_SIZE(mac_dev->port); i++)
+		fman_port_disable(mac_dev->port[i]);
+
+	dpaa_eth_napi_disable(priv);
+
+	return err;
+}
+
+static int dpaa_eth_stop(struct net_device *net_dev)
+{
+	struct dpaa_priv *priv;
+	int err;
+
+	err = dpaa_stop(net_dev);
+
+	priv = netdev_priv(net_dev);
+	dpaa_eth_napi_disable(priv);
+
+	return err;
+}
+
+static const struct net_device_ops dpaa_ops = {
+	.ndo_open = dpaa_open,
+	.ndo_start_xmit = dpaa_start_xmit,
+	.ndo_stop = dpaa_eth_stop,
+	.ndo_tx_timeout = dpaa_tx_timeout,
+	.ndo_get_stats64 = dpaa_get_stats64,
+	.ndo_set_mac_address = dpaa_set_mac_address,
+	.ndo_validate_addr = eth_validate_addr,
+	.ndo_set_rx_mode = dpaa_set_rx_mode,
+};
+
+static int dpaa_napi_add(struct net_device *net_dev)
+{
+	struct dpaa_priv *priv = netdev_priv(net_dev);
+	struct dpaa_percpu_priv *percpu_priv;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		percpu_priv = per_cpu_ptr(priv->percpu_priv, cpu);
+
+		netif_napi_add(net_dev, &percpu_priv->np.napi,
+			       dpaa_eth_poll, NAPI_POLL_WEIGHT);
+	}
+
+	return 0;
+}
+
+static void dpaa_napi_del(struct net_device *net_dev)
+{
+	struct dpaa_priv *priv = netdev_priv(net_dev);
+	struct dpaa_percpu_priv *percpu_priv;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		percpu_priv = per_cpu_ptr(priv->percpu_priv, cpu);
+
+		netif_napi_del(&percpu_priv->np.napi);
+	}
+}
+
+static inline void dpaa_bp_free_pf(const struct dpaa_bp *bp,
+				   struct bm_buffer *bmb)
+{
+	dma_addr_t addr = bm_buf_addr(bmb);
+
+	dma_unmap_single(bp->dev, addr, bp->size, DMA_FROM_DEVICE);
+
+	skb_free_frag(phys_to_virt(addr));
+}
+
+/* Alloc the dpaa_bp struct and configure default values */
+static struct dpaa_bp *dpaa_bp_alloc(struct device *dev)
+{
+	struct dpaa_bp *dpaa_bp;
+
+	dpaa_bp = devm_kzalloc(dev, sizeof(*dpaa_bp), GFP_KERNEL);
+	if (!dpaa_bp)
+		return ERR_PTR(-ENOMEM);
+
+	dpaa_bp->bpid = FSL_DPAA_BPID_INV;
+	dpaa_bp->percpu_count = devm_alloc_percpu(dev, *dpaa_bp->percpu_count);
+	dpaa_bp->config_count = FSL_DPAA_ETH_MAX_BUF_COUNT;
+
+	dpaa_bp->seed_cb = dpaa_bp_seed;
+	dpaa_bp->free_buf_cb = dpaa_bp_free_pf;
+
+	return dpaa_bp;
+}
+
+/* Place all ingress FQs (Rx Default, Rx Error) in a dedicated CGR.
+ * We won't be sending congestion notifications to FMan; for now, we just use
+ * this CGR to generate enqueue rejections to FMan in order to drop the frames
+ * before they reach our ingress queues and eat up memory.
+ */
+static int dpaa_ingress_cgr_init(struct dpaa_priv *priv)
+{
+	struct qm_mcc_initcgr initcgr;
+	u32 cs_th;
+	int err;
+
+	err = qman_alloc_cgrid(&priv->ingress_cgr.cgrid);
+	if (err < 0) {
+		if (netif_msg_drv(priv))
+			pr_err("Error %d allocating CGR ID\n", err);
+		goto out_error;
+	}
+
+	/* Enable CS TD, but disable Congestion State Change Notifications. */
+	initcgr.we_mask = QM_CGR_WE_CS_THRES;
+	initcgr.cgr.cscn_en = QM_CGR_EN;
+	cs_th = DPAA_INGRESS_CS_THRESHOLD;
+	qm_cgr_cs_thres_set64(&initcgr.cgr.cs_thres, cs_th, 1);
+
+	initcgr.we_mask |= QM_CGR_WE_CSTD_EN;
+	initcgr.cgr.cstd_en = QM_CGR_EN;
+
+	/* This CGR will be associated with the SWP affined to the current CPU.
+	 * However, we'll place all our ingress FQs in it.
+	 */
+	err = qman_create_cgr(&priv->ingress_cgr, QMAN_CGR_FLAG_USE_INIT,
+			      &initcgr);
+	if (err < 0) {
+		if (netif_msg_drv(priv))
+			pr_err("Error %d creating ingress CGR with ID %d\n",
+			       err, priv->ingress_cgr.cgrid);
+		qman_release_cgrid(priv->ingress_cgr.cgrid);
+		goto out_error;
+	}
+	if (netif_msg_drv(priv))
+		pr_debug("Created ingress CGR %d for netdev with hwaddr %pM\n",
+			 priv->ingress_cgr.cgrid, priv->mac_dev->addr);
+
+	priv->use_ingress_cgr = true;
+
+out_error:
+	return err;
+}
+
+static const struct of_device_id dpaa_match[];
+
+static inline u16 dpaa_get_headroom(struct dpaa_buffer_layout *bl)
+{
+	u16 headroom;
+
+	/* The frame headroom must accommodate:
+	 * - the driver private data area
+	 * - parse results, hash results, timestamp if selected
+	 * If either hash results or time stamp are selected, both will
+	 * be copied to/from the frame headroom, as TS is located between PR and
+	 * HR in the IC and IC copy size has a granularity of 16bytes
+	 * (see description of FMBM_RICP and FMBM_TICP registers in DPAARM)
+	 *
+	 * Also make sure the headroom is a multiple of data_align bytes
+	 */
+	headroom = (u16)(bl->priv_data_size + DPAA_PARSE_RESULTS_SIZE +
+		DPAA_TIME_STAMP_SIZE + DPAA_HASH_RESULTS_SIZE);
+
+	return DPAA_FD_DATA_ALIGNMENT ? ALIGN(headroom,
+					      DPAA_FD_DATA_ALIGNMENT) :
+					headroom;
+}
+
+static int dpaa_eth_probe(struct platform_device *pdev)
+{
+	struct dpaa_bp *dpaa_bps[DPAA_BPS_NUM] = {NULL};
+	struct dpaa_percpu_priv *percpu_priv;
+	struct net_device *net_dev = NULL;
+	struct dpaa_fq *dpaa_fq, *tmp;
+	struct dpaa_priv *priv = NULL;
+	struct fm_port_fqs port_fqs;
+	struct mac_device *mac_dev;
+	int err = 0, i, channel;
+	struct device *dev;
+
+	dev = &pdev->dev;
+
+	/* Allocate this early, so we can store relevant information in
+	 * the private area
+	 */
+	net_dev = alloc_etherdev_mq(sizeof(*priv), DPAA_ETH_TXQ_NUM);
+	if (!net_dev) {
+		dev_err(dev, "alloc_etherdev_mq() failed\n");
+		goto alloc_etherdev_mq_failed;
+	}
+
+	/* Do this here, so we can be verbose early */
+	SET_NETDEV_DEV(net_dev, dev);
+	dev_set_drvdata(dev, net_dev);
+
+	priv = netdev_priv(net_dev);
+	priv->net_dev = net_dev;
+
+	priv->msg_enable = netif_msg_init(debug, DPAA_MSG_DEFAULT);
+
+	mac_dev = dpaa_mac_dev_get(pdev);
+	if (IS_ERR(mac_dev)) {
+		dev_err(dev, "dpaa_mac_dev_get() failed\n");
+		err = PTR_ERR(mac_dev);
+		goto mac_probe_failed;
+	}
+
+	/* If fsl_fm_max_frm is set to a higher value than the all-common 1500,
+	 * we choose conservatively and let the user explicitly set a higher
+	 * MTU via ifconfig. Otherwise, the user may end up with different MTUs
+	 * in the same LAN.
+	 * If on the other hand fsl_fm_max_frm has been chosen below 1500,
+	 * start with the maximum allowed.
+	 */
+	net_dev->mtu = min(dpaa_get_max_mtu(), ETH_DATA_LEN);
+
+	netdev_dbg(net_dev, "Setting initial MTU on net device: %d\n",
+		   net_dev->mtu);
+
+	priv->buf_layout[RX].priv_data_size = DPAA_RX_PRIV_DATA_SIZE; /* Rx */
+	priv->buf_layout[TX].priv_data_size = DPAA_TX_PRIV_DATA_SIZE; /* Tx */
+
+	/* device used for DMA mapping */
+	arch_setup_dma_ops(dev, 0, 0, NULL, false);
+	err = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(40));
+	if (err) {
+		dev_err(dev, "dma_coerce_mask_and_coherent() failed\n");
+		goto dev_mask_failed;
+	}
+
+	/* bp init */
+	for (i = 0; i < DPAA_BPS_NUM; i++) {
+		int err;
+
+		dpaa_bps[i] = dpaa_bp_alloc(dev);
+		if (IS_ERR(dpaa_bps[i]))
+			return PTR_ERR(dpaa_bps[i]);
+		/* the raw size of the buffers used for reception */
+		dpaa_bps[i]->raw_size = bpool_buffer_raw_size(i, DPAA_BPS_NUM);
+		/* avoid runtime computations by keeping the usable size here */
+		dpaa_bps[i]->size = dpaa_bp_size(dpaa_bps[i]->raw_size);
+		dpaa_bps[i]->dev = dev;
+
+		err = dpaa_bp_alloc_pool(dpaa_bps[i]);
+		if (err < 0) {
+			dpaa_bps_free(priv);
+			priv->dpaa_bps[i] = NULL;
+			goto bp_create_failed;
+		}
+		priv->dpaa_bps[i] = dpaa_bps[i];
+	}
+
+	INIT_LIST_HEAD(&priv->dpaa_fq_list);
+
+	memset(&port_fqs, 0, sizeof(port_fqs));
+
+	err = dpaa_alloc_all_fqs(dev, &priv->dpaa_fq_list, &port_fqs);
+	if (err < 0) {
+		dev_err(dev, "dpaa_alloc_all_fqs() failed\n");
+		goto fq_probe_failed;
+	}
+
+	priv->mac_dev = mac_dev;
+
+	channel = dpaa_get_channel();
+	if (channel < 0) {
+		dev_err(dev, "dpaa_get_channel() failed\n");
+		err = channel;
+		goto get_channel_failed;
+	}
+
+	priv->channel = (u16)channel;
+
+	/* Start a thread that will walk the CPUs with affine portals
+	 * and add this pool channel to each's dequeue mask.
+	 */
+	dpaa_eth_add_channel(priv->channel);
+
+	dpaa_fq_setup(priv, &dpaa_fq_cbs, priv->mac_dev->port[TX]);
+
+	/* Create a congestion group for this netdev, with
+	 * dynamically-allocated CGR ID.
+	 * Must be executed after probing the MAC, but before
+	 * assigning the egress FQs to the CGRs.
+	 */
+	err = dpaa_eth_cgr_init(priv);
+	if (err < 0) {
+		dev_err(dev, "Error initializing CGR\n");
+		goto tx_cgr_init_failed;
+	}
+
+	err = dpaa_ingress_cgr_init(priv);
+	if (err < 0) {
+		dev_err(dev, "Error initializing ingress CGR\n");
+		goto rx_cgr_init_failed;
+	}
+
+	/* Add the FQs to the interface, and make them active */
+	list_for_each_entry_safe(dpaa_fq, tmp, &priv->dpaa_fq_list, list) {
+		err = dpaa_fq_init(dpaa_fq, false);
+		if (err < 0)
+			goto fq_alloc_failed;
+	}
+
+	priv->tx_headroom = dpaa_get_headroom(&priv->buf_layout[TX]);
+	priv->rx_headroom = dpaa_get_headroom(&priv->buf_layout[RX]);
+
+	/* All real interfaces need their ports initialized */
+	dpaa_eth_init_ports(mac_dev, dpaa_bps, DPAA_BPS_NUM, &port_fqs,
+			    &priv->buf_layout[0], dev);
+
+	priv->percpu_priv = devm_alloc_percpu(dev, *priv->percpu_priv);
+	if (!priv->percpu_priv) {
+		dev_err(dev, "devm_alloc_percpu() failed\n");
+		err = -ENOMEM;
+		goto alloc_percpu_failed;
+	}
+	for_each_possible_cpu(i) {
+		percpu_priv = per_cpu_ptr(priv->percpu_priv, i);
+		memset(percpu_priv, 0, sizeof(*percpu_priv));
+	}
+
+	/* Initialize NAPI */
+	err = dpaa_napi_add(net_dev);
+	if (err < 0)
+		goto napi_add_failed;
+
+	err = dpaa_netdev_init(net_dev, &dpaa_ops, tx_timeout);
+	if (err < 0)
+		goto netdev_init_failed;
+
+	dpaa_eth_sysfs_init(&net_dev->dev);
+
+	netif_info(priv, probe, net_dev, "Probed interface %s\n",
+		   net_dev->name);
+
+	return 0;
+
+netdev_init_failed:
+napi_add_failed:
+	dpaa_napi_del(net_dev);
+alloc_percpu_failed:
+	dpaa_fq_free(dev, &priv->dpaa_fq_list);
+fq_alloc_failed:
+	qman_delete_cgr_safe(&priv->ingress_cgr);
+	qman_release_cgrid(priv->ingress_cgr.cgrid);
+rx_cgr_init_failed:
+	qman_delete_cgr_safe(&priv->cgr_data.cgr);
+	qman_release_cgrid(priv->cgr_data.cgr.cgrid);
+tx_cgr_init_failed:
+get_channel_failed:
+	dpaa_bps_free(priv);
+bp_create_failed:
+fq_probe_failed:
+dev_mask_failed:
+mac_probe_failed:
+	dev_set_drvdata(dev, NULL);
+	free_netdev(net_dev);
+alloc_etherdev_mq_failed:
+	for (i = 0; i < DPAA_BPS_NUM && dpaa_bps[i]; i++) {
+		if (atomic_read(&dpaa_bps[i]->refs) == 0)
+			devm_kfree(dev, dpaa_bps[i]);
+	}
+	return err;
+}
+
+static int dpaa_remove(struct platform_device *pdev)
+{
+	struct net_device *net_dev;
+	struct dpaa_priv *priv;
+	struct device *dev;
+	int err;
+
+	dev = &pdev->dev;
+	net_dev = dev_get_drvdata(dev);
+
+	priv = netdev_priv(net_dev);
+
+	dpaa_eth_sysfs_remove(dev);
+
+	dev_set_drvdata(dev, NULL);
+	unregister_netdev(net_dev);
+
+	err = dpaa_fq_free(dev, &priv->dpaa_fq_list);
+
+	qman_delete_cgr_safe(&priv->ingress_cgr);
+	qman_release_cgrid(priv->ingress_cgr.cgrid);
+	qman_delete_cgr_safe(&priv->cgr_data.cgr);
+	qman_release_cgrid(priv->cgr_data.cgr.cgrid);
+
+	dpaa_napi_del(net_dev);
+
+	dpaa_bps_free(priv);
+
+	free_netdev(net_dev);
+
+	return err;
+}
+
+static struct platform_device_id dpaa_devtype[] = {
+	{
+		.name = "dpaa-ethernet",
+		.driver_data = 0,
+	}, {
+	}
+};
+MODULE_DEVICE_TABLE(platform, dpaa_devtype);
+
+static struct platform_driver dpaa_driver = {
+	.driver = {
+		.name = KBUILD_MODNAME,
+	},
+	.id_table = dpaa_devtype,
+	.probe = dpaa_eth_probe,
+	.remove = dpaa_remove
+};
+
+static int __init dpaa_load(void)
+{
+	int err;
+
+	pr_debug("FSL DPAA Ethernet driver\n");
+
+	/* initialize dpaa_eth mirror values */
+	dpaa_rx_extra_headroom = fman_get_rx_extra_headroom();
+	dpaa_max_frm = fman_get_max_frm();
+
+	err = platform_driver_register(&dpaa_driver);
+	if (err < 0)
+		pr_err("Error, platform_driver_register() = %d\n", err);
+
+	return err;
+}
+module_init(dpaa_load);
+
+static void __exit dpaa_unload(void)
+{
+	platform_driver_unregister(&dpaa_driver);
+
+	/* Only one channel is used and needs to be released after all
+	 * interfaces are removed
+	 */
+	dpaa_release_channel();
+}
+module_exit(dpaa_unload);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("FSL DPAA Ethernet driver");
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h
new file mode 100644
index 000000000000..1f9aebf3f3c5
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.h
@@ -0,0 +1,185 @@
+/* Copyright 2008 - 2016 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __DPAA_H
+#define __DPAA_H
+
+#include <linux/netdevice.h>
+#include <soc/fsl/qman.h>
+#include <soc/fsl/bman.h>
+
+#include "fman.h"
+#include "mac.h"
+#include "dpaa_eth_trace.h"
+
+#define DPAA_ETH_TXQ_NUM	NR_CPUS
+
+#define DPAA_BPS_NUM 3 /* number of bpools per interface */
+
+/* More detailed FQ types - used for fine-grained WQ assignments */
+enum dpaa_fq_type {
+	FQ_TYPE_RX_DEFAULT = 1, /* Rx Default FQs */
+	FQ_TYPE_RX_ERROR,	/* Rx Error FQs */
+	FQ_TYPE_TX,		/* "Real" Tx FQs */
+	FQ_TYPE_TX_CONFIRM,	/* Tx default Conf FQ (actually an Rx FQ) */
+	FQ_TYPE_TX_CONF_MQ,	/* Tx conf FQs (one for each Tx FQ) */
+	FQ_TYPE_TX_ERROR,	/* Tx Error FQs (these are actually Rx FQs) */
+};
+
+struct dpaa_fq {
+	struct qman_fq fq_base;
+	struct list_head list;
+	struct net_device *net_dev;
+	bool init;
+	u32 fqid;
+	u32 flags;
+	u16 channel;
+	u8 wq;
+	enum dpaa_fq_type fq_type;
+};
+
+struct dpaa_fq_cbs {
+	struct qman_fq rx_defq;
+	struct qman_fq tx_defq;
+	struct qman_fq rx_errq;
+	struct qman_fq tx_errq;
+	struct qman_fq egress_ern;
+};
+
+struct dpaa_bp {
+	/* device used in the DMA mapping operations */
+	struct device *dev;
+	/* current number of buffers in the buffer pool alloted to each CPU */
+	int __percpu *percpu_count;
+	/* all buffers allocated for this pool have this raw size */
+	size_t raw_size;
+	/* all buffers in this pool have this same usable size */
+	size_t size;
+	/* the buffer pools are initialized with config_count buffers for each
+	 * CPU; at runtime the number of buffers per CPU is constantly brought
+	 * back to this level
+	 */
+	u16 config_count;
+	u8 bpid;
+	struct bman_pool *pool;
+	/* bpool can be seeded before use by this cb */
+	int (*seed_cb)(struct dpaa_bp *);
+	/* bpool can be emptied before freeing by this cb */
+	void (*free_buf_cb)(const struct dpaa_bp *, struct bm_buffer *);
+	atomic_t refs;
+};
+
+struct dpaa_rx_errors {
+	u64 dme;		/* DMA Error */
+	u64 fpe;		/* Frame Physical Error */
+	u64 fse;		/* Frame Size Error */
+	u64 phe;		/* Header Error */
+};
+
+/* Counters for QMan ERN frames - one counter per rejection code */
+struct dpaa_ern_cnt {
+	u64 cg_tdrop;		/* Congestion group taildrop */
+	u64 wred;		/* WRED congestion */
+	u64 err_cond;		/* Error condition */
+	u64 early_window;	/* Order restoration, frame too early */
+	u64 late_window;	/* Order restoration, frame too late */
+	u64 fq_tdrop;		/* FQ taildrop */
+	u64 fq_retired;		/* FQ is retired */
+	u64 orp_zero;		/* ORP disabled */
+};
+
+struct dpaa_napi_portal {
+	struct napi_struct napi;
+	struct qman_portal *p;
+	bool down;
+};
+
+struct dpaa_percpu_priv {
+	struct net_device *net_dev;
+	struct dpaa_napi_portal np;
+	u64 in_interrupt;
+	u64 tx_confirm;
+	/* fragmented (non-linear) skbuffs received from the stack */
+	u64 tx_frag_skbuffs;
+	struct rtnl_link_stats64 stats;
+	struct dpaa_rx_errors rx_errors;
+	struct dpaa_ern_cnt ern_cnt;
+};
+
+struct dpaa_buffer_layout {
+	u16 priv_data_size;
+};
+
+struct dpaa_priv {
+	struct dpaa_percpu_priv __percpu *percpu_priv;
+	struct dpaa_bp *dpaa_bps[DPAA_BPS_NUM];
+	/* Store here the needed Tx headroom for convenience and speed
+	 * (even though it can be computed based on the fields of buf_layout)
+	 */
+	u16 tx_headroom;
+	struct net_device *net_dev;
+	struct mac_device *mac_dev;
+	struct qman_fq *egress_fqs[DPAA_ETH_TXQ_NUM];
+	struct qman_fq *conf_fqs[DPAA_ETH_TXQ_NUM];
+
+	u16 channel;
+	struct list_head dpaa_fq_list;
+
+	u32 msg_enable;	/* net_device message level */
+
+	struct {
+		/* All egress queues to a given net device belong to one
+		 * (and the same) congestion group.
+		 */
+		struct qman_cgr cgr;
+		/* If congested, when it began. Used for performance stats. */
+		u32 congestion_start_jiffies;
+		/* Number of jiffies the Tx port was congested. */
+		u32 congested_jiffies;
+		/* Counter for the number of times the CGR
+		 * entered congestion state
+		 */
+		u32 cgr_congested_count;
+	} cgr_data;
+	/* Use a per-port CGR for ingress traffic. */
+	bool use_ingress_cgr;
+	struct qman_cgr ingress_cgr;
+
+	struct dpaa_buffer_layout buf_layout[2];
+	u16 rx_headroom;
+};
+
+/* from dpaa_ethtool.c */
+extern const struct ethtool_ops dpaa_ethtool_ops;
+
+/* from dpaa_eth_sysfs.c */
+void dpaa_eth_sysfs_remove(struct device *dev);
+void dpaa_eth_sysfs_init(struct device *dev);
+#endif	/* __DPAA_H */
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth_sysfs.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_sysfs.c
new file mode 100644
index 000000000000..ec75d1c6fa89
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_sysfs.c
@@ -0,0 +1,165 @@
+/* Copyright 2008-2016 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/of_net.h>
+#include "dpaa_eth.h"
+#include "mac.h"
+
+static ssize_t dpaa_eth_show_addr(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct dpaa_priv *priv = netdev_priv(to_net_dev(dev));
+	struct mac_device *mac_dev = priv->mac_dev;
+
+	if (mac_dev)
+		return sprintf(buf, "%llx",
+				(unsigned long long)mac_dev->res->start);
+	else
+		return sprintf(buf, "none");
+}
+
+static ssize_t dpaa_eth_show_fqids(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct dpaa_priv *priv = netdev_priv(to_net_dev(dev));
+	struct dpaa_fq *prev = NULL;
+	char *prevstr = NULL;
+	struct dpaa_fq *tmp;
+	struct dpaa_fq *fq;
+	u32 first_fqid = 0;
+	u32 last_fqid = 0;
+	ssize_t bytes = 0;
+	char *str;
+	int i = 0;
+
+	list_for_each_entry_safe(fq, tmp, &priv->dpaa_fq_list, list) {
+		switch (fq->fq_type) {
+		case FQ_TYPE_RX_DEFAULT:
+			str = "Rx default";
+			break;
+		case FQ_TYPE_RX_ERROR:
+			str = "Rx error";
+			break;
+		case FQ_TYPE_TX_CONFIRM:
+			str = "Tx default confirmation";
+			break;
+		case FQ_TYPE_TX_CONF_MQ:
+			str = "Tx confirmation (mq)";
+			break;
+		case FQ_TYPE_TX_ERROR:
+			str = "Tx error";
+			break;
+		case FQ_TYPE_TX:
+			str = "Tx";
+			break;
+		default:
+			str = "Unknown";
+		}
+
+		if (prev && (abs(fq->fqid - prev->fqid) != 1 ||
+			     str != prevstr)) {
+			if (last_fqid == first_fqid)
+				bytes += sprintf(buf + bytes,
+					"%s: %d\n", prevstr, prev->fqid);
+			else
+				bytes += sprintf(buf + bytes,
+					"%s: %d - %d\n", prevstr,
+					first_fqid, last_fqid);
+		}
+
+		if (prev && abs(fq->fqid - prev->fqid) == 1 &&
+		    str == prevstr) {
+			last_fqid = fq->fqid;
+		} else {
+			first_fqid = fq->fqid;
+			last_fqid = fq->fqid;
+		}
+
+		prev = fq;
+		prevstr = str;
+		i++;
+	}
+
+	if (prev) {
+		if (last_fqid == first_fqid)
+			bytes += sprintf(buf + bytes, "%s: %d\n", prevstr,
+					prev->fqid);
+		else
+			bytes += sprintf(buf + bytes, "%s: %d - %d\n", prevstr,
+					first_fqid, last_fqid);
+	}
+
+	return bytes;
+}
+
+static ssize_t dpaa_eth_show_bpids(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct dpaa_priv *priv = netdev_priv(to_net_dev(dev));
+	ssize_t bytes = 0;
+	int i = 0;
+
+	for (i = 0; i < DPAA_BPS_NUM; i++)
+		bytes += snprintf(buf + bytes, PAGE_SIZE - bytes, "%u\n",
+				  priv->dpaa_bps[i]->bpid);
+
+	return bytes;
+}
+
+static struct device_attribute dpaa_eth_attrs[] = {
+	__ATTR(device_addr, 0444, dpaa_eth_show_addr, NULL),
+	__ATTR(fqids, 0444, dpaa_eth_show_fqids, NULL),
+	__ATTR(bpids, 0444, dpaa_eth_show_bpids, NULL),
+};
+
+void dpaa_eth_sysfs_init(struct device *dev)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(dpaa_eth_attrs); i++)
+		if (device_create_file(dev, &dpaa_eth_attrs[i])) {
+			dev_err(dev, "Error creating sysfs file\n");
+			while (i > 0)
+				device_remove_file(dev, &dpaa_eth_attrs[--i]);
+			return;
+		}
+}
+
+void dpaa_eth_sysfs_remove(struct device *dev)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(dpaa_eth_attrs); i++)
+		device_remove_file(dev, &dpaa_eth_attrs[i]);
+}
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth_trace.h b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_trace.h
new file mode 100644
index 000000000000..409c1dc39430
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth_trace.h
@@ -0,0 +1,141 @@
+/* Copyright 2013-2015 Freescale Semiconductor Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM	dpaa_eth
+
+#if !defined(_DPAA_ETH_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _DPAA_ETH_TRACE_H
+
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include "dpaa_eth.h"
+#include <linux/tracepoint.h>
+
+#define fd_format_name(format)	{ qm_fd_##format, #format }
+#define fd_format_list	\
+	fd_format_name(contig),	\
+	fd_format_name(sg)
+
+/* This is used to declare a class of events.
+ * individual events of this type will be defined below.
+ */
+
+/* Store details about a frame descriptor and the FQ on which it was
+ * transmitted/received.
+ */
+DECLARE_EVENT_CLASS(dpaa_eth_fd,
+	/* Trace function prototype */
+	TP_PROTO(struct net_device *netdev,
+		 struct qman_fq *fq,
+		 const struct qm_fd *fd),
+
+	/* Repeat argument list here */
+	TP_ARGS(netdev, fq, fd),
+
+	/* A structure containing the relevant information we want to record.
+	 * Declare name and type for each normal element, name, type and size
+	 * for arrays. Use __string for variable length strings.
+	 */
+	TP_STRUCT__entry(
+		__field(u32,	fqid)
+		__field(u64,	fd_addr)
+		__field(u8,	fd_format)
+		__field(u16,	fd_offset)
+		__field(u32,	fd_length)
+		__field(u32,	fd_status)
+		__string(name,	netdev->name)
+	),
+
+	/* The function that assigns values to the above declared fields */
+	TP_fast_assign(
+		__entry->fqid = fq->fqid;
+		__entry->fd_addr = qm_fd_addr_get64(fd);
+		__entry->fd_format = qm_fd_get_format(fd);
+		__entry->fd_offset = qm_fd_get_offset(fd);
+		__entry->fd_length = qm_fd_get_length(fd);
+		__entry->fd_status = fd->status;
+		__assign_str(name, netdev->name);
+	),
+
+	/* This is what gets printed when the trace event is triggered */
+	TP_printk("[%s] fqid=%d, fd: addr=0x%llx, format=%s, off=%u, len=%u, status=0x%08x",
+		  __get_str(name), __entry->fqid, __entry->fd_addr,
+		  __print_symbolic(__entry->fd_format, fd_format_list),
+		  __entry->fd_offset, __entry->fd_length, __entry->fd_status)
+);
+
+/* Now declare events of the above type. Format is:
+ * DEFINE_EVENT(class, name, proto, args), with proto and args same as for class
+ */
+
+/* Tx (egress) fd */
+DEFINE_EVENT(dpaa_eth_fd, dpaa_tx_fd,
+
+	TP_PROTO(struct net_device *netdev,
+		 struct qman_fq *fq,
+		 const struct qm_fd *fd),
+
+	TP_ARGS(netdev, fq, fd)
+);
+
+/* Rx fd */
+DEFINE_EVENT(dpaa_eth_fd, dpaa_rx_fd,
+
+	TP_PROTO(struct net_device *netdev,
+		 struct qman_fq *fq,
+		 const struct qm_fd *fd),
+
+	TP_ARGS(netdev, fq, fd)
+);
+
+/* Tx confirmation fd */
+DEFINE_EVENT(dpaa_eth_fd, dpaa_tx_conf_fd,
+
+	TP_PROTO(struct net_device *netdev,
+		 struct qman_fq *fq,
+		 const struct qm_fd *fd),
+
+	TP_ARGS(netdev, fq, fd)
+);
+
+/* If only one event of a certain type needs to be declared, use TRACE_EVENT().
+ * The syntax is the same as for DECLARE_EVENT_CLASS().
+ */
+
+#endif /* _DPAA_ETH_TRACE_H */
+
+/* This must be outside ifdef _DPAA_ETH_TRACE_H */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE	dpaa_eth_trace
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
new file mode 100644
index 000000000000..27e7044667d1
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_ethtool.c
@@ -0,0 +1,417 @@
+/* Copyright 2008-2016 Freescale Semiconductor, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *     * Redistributions of source code must retain the above copyright
+ *	 notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above copyright
+ *	 notice, this list of conditions and the following disclaimer in the
+ *	 documentation and/or other materials provided with the distribution.
+ *     * Neither the name of Freescale Semiconductor nor the
+ *	 names of its contributors may be used to endorse or promote products
+ *	 derived from this software without specific prior written permission.
+ *
+ *
+ * ALTERNATIVELY, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") as published by the Free Software
+ * Foundation, either version 2 of that License or (at your option) any
+ * later version.
+ *
+ * THIS SOFTWARE IS PROVIDED BY Freescale Semiconductor ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL Freescale Semiconductor BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/string.h>
+
+#include "dpaa_eth.h"
+#include "mac.h"
+
+static const char dpaa_stats_percpu[][ETH_GSTRING_LEN] = {
+	"interrupts",
+	"rx packets",
+	"tx packets",
+	"tx confirm",
+	"tx S/G",
+	"tx error",
+	"rx error",
+};
+
+static char dpaa_stats_global[][ETH_GSTRING_LEN] = {
+	/* dpa rx errors */
+	"rx dma error",
+	"rx frame physical error",
+	"rx frame size error",
+	"rx header error",
+
+	/* demultiplexing errors */
+	"qman cg_tdrop",
+	"qman wred",
+	"qman error cond",
+	"qman early window",
+	"qman late window",
+	"qman fq tdrop",
+	"qman fq retired",
+	"qman orp disabled",
+
+	/* congestion related stats */
+	"congestion time (ms)",
+	"entered congestion",
+	"congested (0/1)"
+};
+
+#define DPAA_STATS_PERCPU_LEN ARRAY_SIZE(dpaa_stats_percpu)
+#define DPAA_STATS_GLOBAL_LEN ARRAY_SIZE(dpaa_stats_global)
+
+static int dpaa_get_settings(struct net_device *net_dev,
+			     struct ethtool_cmd *et_cmd)
+{
+	int err;
+
+	if (!net_dev->phydev) {
+		netdev_dbg(net_dev, "phy device not initialized\n");
+		return 0;
+	}
+
+	err = phy_ethtool_gset(net_dev->phydev, et_cmd);
+
+	return err;
+}
+
+static int dpaa_set_settings(struct net_device *net_dev,
+			     struct ethtool_cmd *et_cmd)
+{
+	int err;
+
+	if (!net_dev->phydev) {
+		netdev_err(net_dev, "phy device not initialized\n");
+		return -ENODEV;
+	}
+
+	err = phy_ethtool_sset(net_dev->phydev, et_cmd);
+	if (err < 0)
+		netdev_err(net_dev, "phy_ethtool_sset() = %d\n", err);
+
+	return err;
+}
+
+static void dpaa_get_drvinfo(struct net_device *net_dev,
+			     struct ethtool_drvinfo *drvinfo)
+{
+	int len;
+
+	strlcpy(drvinfo->driver, KBUILD_MODNAME,
+		sizeof(drvinfo->driver));
+	len = snprintf(drvinfo->version, sizeof(drvinfo->version),
+		       "%X", 0);
+	len = snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
+		       "%X", 0);
+
+	if (len >= sizeof(drvinfo->fw_version)) {
+		/* Truncated output */
+		netdev_notice(net_dev, "snprintf() = %d\n", len);
+	}
+	strlcpy(drvinfo->bus_info, dev_name(net_dev->dev.parent->parent),
+		sizeof(drvinfo->bus_info));
+}
+
+static u32 dpaa_get_msglevel(struct net_device *net_dev)
+{
+	return ((struct dpaa_priv *)netdev_priv(net_dev))->msg_enable;
+}
+
+static void dpaa_set_msglevel(struct net_device *net_dev,
+			      u32 msg_enable)
+{
+	((struct dpaa_priv *)netdev_priv(net_dev))->msg_enable = msg_enable;
+}
+
+static int dpaa_nway_reset(struct net_device *net_dev)
+{
+	int err;
+
+	if (!net_dev->phydev) {
+		netdev_err(net_dev, "phy device not initialized\n");
+		return -ENODEV;
+	}
+
+	err = 0;
+	if (net_dev->phydev->autoneg) {
+		err = phy_start_aneg(net_dev->phydev);
+		if (err < 0)
+			netdev_err(net_dev, "phy_start_aneg() = %d\n",
+				   err);
+	}
+
+	return err;
+}
+
+static void dpaa_get_pauseparam(struct net_device *net_dev,
+				struct ethtool_pauseparam *epause)
+{
+	struct mac_device *mac_dev;
+	struct dpaa_priv *priv;
+
+	priv = netdev_priv(net_dev);
+	mac_dev = priv->mac_dev;
+
+	if (!net_dev->phydev) {
+		netdev_err(net_dev, "phy device not initialized\n");
+		return;
+	}
+
+	epause->autoneg = mac_dev->autoneg_pause;
+	epause->rx_pause = mac_dev->rx_pause_active;
+	epause->tx_pause = mac_dev->tx_pause_active;
+}
+
+static int dpaa_set_pauseparam(struct net_device *net_dev,
+			       struct ethtool_pauseparam *epause)
+{
+	struct mac_device *mac_dev;
+	struct phy_device *phydev;
+	bool rx_pause, tx_pause;
+	struct dpaa_priv *priv;
+	u32 newadv, oldadv;
+	int err;
+
+	priv = netdev_priv(net_dev);
+	mac_dev = priv->mac_dev;
+
+	phydev = net_dev->phydev;
+	if (!phydev) {
+		netdev_err(net_dev, "phy device not initialized\n");
+		return -ENODEV;
+	}
+
+	if (!(phydev->supported & SUPPORTED_Pause) ||
+	    (!(phydev->supported & SUPPORTED_Asym_Pause) &&
+	    (epause->rx_pause != epause->tx_pause)))
+		return -EINVAL;
+
+	/* The MAC should know how to handle PAUSE frame autonegotiation before
+	 * adjust_link is triggered by a forced renegotiation of sym/asym PAUSE
+	 * settings.
+	 */
+	mac_dev->autoneg_pause = !!epause->autoneg;
+	mac_dev->rx_pause_req = !!epause->rx_pause;
+	mac_dev->tx_pause_req = !!epause->tx_pause;
+
+	/* Determine the sym/asym advertised PAUSE capabilities from the desired
+	 * rx/tx pause settings.
+	 */
+	newadv = 0;
+	if (epause->rx_pause)
+		newadv = ADVERTISED_Pause | ADVERTISED_Asym_Pause;
+	if (epause->tx_pause)
+		newadv |= ADVERTISED_Asym_Pause;
+
+	oldadv = phydev->advertising &
+			(ADVERTISED_Pause | ADVERTISED_Asym_Pause);
+
+	/* If there are differences between the old and the new advertised
+	 * values, restart PHY autonegotiation and advertise the new values.
+	 */
+	if (oldadv != newadv) {
+		phydev->advertising &= ~(ADVERTISED_Pause
+				| ADVERTISED_Asym_Pause);
+		phydev->advertising |= newadv;
+		if (phydev->autoneg) {
+			err = phy_start_aneg(phydev);
+			if (err < 0)
+				netdev_err(net_dev, "phy_start_aneg() = %d\n",
+					   err);
+		}
+	}
+
+	fman_get_pause_cfg(mac_dev, &rx_pause, &tx_pause);
+	err = fman_set_mac_active_pause(mac_dev, rx_pause, tx_pause);
+	if (err < 0)
+		netdev_err(net_dev, "set_mac_active_pause() = %d\n", err);
+
+	return err;
+}
+
+static int dpaa_get_sset_count(struct net_device *net_dev, int type)
+{
+	unsigned int total_stats, num_stats;
+
+	num_stats   = num_online_cpus() + 1;
+	total_stats = num_stats * (DPAA_STATS_PERCPU_LEN + DPAA_BPS_NUM) +
+			DPAA_STATS_GLOBAL_LEN;
+
+	switch (type) {
+	case ETH_SS_STATS:
+		return total_stats;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void copy_stats(struct dpaa_percpu_priv *percpu_priv, int num_cpus,
+		       int crr_cpu, u64 *bp_count, u64 *data)
+{
+	int num_values = num_cpus + 1;
+	int crr = 0, j;
+
+	/* update current CPU's stats and also add them to the total values */
+	data[crr * num_values + crr_cpu] = percpu_priv->in_interrupt;
+	data[crr++ * num_values + num_cpus] += percpu_priv->in_interrupt;
+
+	data[crr * num_values + crr_cpu] = percpu_priv->stats.rx_packets;
+	data[crr++ * num_values + num_cpus] += percpu_priv->stats.rx_packets;
+
+	data[crr * num_values + crr_cpu] = percpu_priv->stats.tx_packets;
+	data[crr++ * num_values + num_cpus] += percpu_priv->stats.tx_packets;
+
+	data[crr * num_values + crr_cpu] = percpu_priv->tx_confirm;
+	data[crr++ * num_values + num_cpus] += percpu_priv->tx_confirm;
+
+	data[crr * num_values + crr_cpu] = percpu_priv->tx_frag_skbuffs;
+	data[crr++ * num_values + num_cpus] += percpu_priv->tx_frag_skbuffs;
+
+	data[crr * num_values + crr_cpu] = percpu_priv->stats.tx_errors;
+	data[crr++ * num_values + num_cpus] += percpu_priv->stats.tx_errors;
+
+	data[crr * num_values + crr_cpu] = percpu_priv->stats.rx_errors;
+	data[crr++ * num_values + num_cpus] += percpu_priv->stats.rx_errors;
+
+	for (j = 0; j < DPAA_BPS_NUM; j++) {
+		data[crr * num_values + crr_cpu] = bp_count[j];
+		data[crr++ * num_values + num_cpus] += bp_count[j];
+	}
+}
+
+static void dpaa_get_ethtool_stats(struct net_device *net_dev,
+				   struct ethtool_stats *stats, u64 *data)
+{
+	u64 bp_count[DPAA_BPS_NUM], cg_time, cg_num;
+	struct dpaa_percpu_priv *percpu_priv;
+	struct dpaa_rx_errors rx_errors;
+	unsigned int num_cpus, offset;
+	struct dpaa_ern_cnt ern_cnt;
+	struct dpaa_bp *dpaa_bp;
+	struct dpaa_priv *priv;
+	int total_stats, i, j;
+	bool cg_status;
+
+	total_stats = dpaa_get_sset_count(net_dev, ETH_SS_STATS);
+	priv     = netdev_priv(net_dev);
+	num_cpus = num_online_cpus();
+
+	memset(&bp_count, 0, sizeof(bp_count));
+	memset(&rx_errors, 0, sizeof(struct dpaa_rx_errors));
+	memset(&ern_cnt, 0, sizeof(struct dpaa_ern_cnt));
+	memset(data, 0, total_stats * sizeof(u64));
+
+	for_each_online_cpu(i) {
+		percpu_priv = per_cpu_ptr(priv->percpu_priv, i);
+		for (j = 0; j < DPAA_BPS_NUM; j++) {
+			dpaa_bp = priv->dpaa_bps[j];
+			if (!dpaa_bp->percpu_count)
+				continue;
+			bp_count[j] = *(per_cpu_ptr(dpaa_bp->percpu_count, i));
+		}
+		rx_errors.dme += percpu_priv->rx_errors.dme;
+		rx_errors.fpe += percpu_priv->rx_errors.fpe;
+		rx_errors.fse += percpu_priv->rx_errors.fse;
+		rx_errors.phe += percpu_priv->rx_errors.phe;
+
+		ern_cnt.cg_tdrop     += percpu_priv->ern_cnt.cg_tdrop;
+		ern_cnt.wred         += percpu_priv->ern_cnt.wred;
+		ern_cnt.err_cond     += percpu_priv->ern_cnt.err_cond;
+		ern_cnt.early_window += percpu_priv->ern_cnt.early_window;
+		ern_cnt.late_window  += percpu_priv->ern_cnt.late_window;
+		ern_cnt.fq_tdrop     += percpu_priv->ern_cnt.fq_tdrop;
+		ern_cnt.fq_retired   += percpu_priv->ern_cnt.fq_retired;
+		ern_cnt.orp_zero     += percpu_priv->ern_cnt.orp_zero;
+
+		copy_stats(percpu_priv, num_cpus, i, bp_count, data);
+	}
+
+	offset = (num_cpus + 1) * (DPAA_STATS_PERCPU_LEN + DPAA_BPS_NUM);
+	memcpy(data + offset, &rx_errors, sizeof(struct dpaa_rx_errors));
+
+	offset += sizeof(struct dpaa_rx_errors) / sizeof(u64);
+	memcpy(data + offset, &ern_cnt, sizeof(struct dpaa_ern_cnt));
+
+	/* gather congestion related counters */
+	cg_num    = 0;
+	cg_status = 0;
+	cg_time   = jiffies_to_msecs(priv->cgr_data.congested_jiffies);
+	if (qman_query_cgr_congested(&priv->cgr_data.cgr, &cg_status) == 0) {
+		cg_num    = priv->cgr_data.cgr_congested_count;
+
+		/* reset congestion stats (like QMan API does */
+		priv->cgr_data.congested_jiffies   = 0;
+		priv->cgr_data.cgr_congested_count = 0;
+	}
+
+	offset += sizeof(struct dpaa_ern_cnt) / sizeof(u64);
+	data[offset++] = cg_time;
+	data[offset++] = cg_num;
+	data[offset++] = cg_status;
+}
+
+static void dpaa_get_strings(struct net_device *net_dev, u32 stringset,
+			     u8 *data)
+{
+	unsigned int i, j, num_cpus, size;
+	char string_cpu[ETH_GSTRING_LEN];
+	u8 *strings;
+
+	memset(string_cpu, 0, sizeof(string_cpu));
+	strings   = data;
+	num_cpus  = num_online_cpus();
+	size      = DPAA_STATS_GLOBAL_LEN * ETH_GSTRING_LEN;
+
+	for (i = 0; i < DPAA_STATS_PERCPU_LEN; i++) {
+		for (j = 0; j < num_cpus; j++) {
+			snprintf(string_cpu, ETH_GSTRING_LEN, "%s [CPU %d]",
+				 dpaa_stats_percpu[i], j);
+			memcpy(strings, string_cpu, ETH_GSTRING_LEN);
+			strings += ETH_GSTRING_LEN;
+		}
+		snprintf(string_cpu, ETH_GSTRING_LEN, "%s [TOTAL]",
+			 dpaa_stats_percpu[i]);
+		memcpy(strings, string_cpu, ETH_GSTRING_LEN);
+		strings += ETH_GSTRING_LEN;
+	}
+	for (i = 0; i < DPAA_BPS_NUM; i++) {
+		for (j = 0; j < num_cpus; j++) {
+			snprintf(string_cpu, ETH_GSTRING_LEN,
+				 "bpool %c [CPU %d]", 'a' + i, j);
+			memcpy(strings, string_cpu, ETH_GSTRING_LEN);
+			strings += ETH_GSTRING_LEN;
+		}
+		snprintf(string_cpu, ETH_GSTRING_LEN, "bpool %c [TOTAL]",
+			 'a' + i);
+		memcpy(strings, string_cpu, ETH_GSTRING_LEN);
+		strings += ETH_GSTRING_LEN;
+	}
+	memcpy(strings, dpaa_stats_global, size);
+}
+
+const struct ethtool_ops dpaa_ethtool_ops = {
+	.get_settings = dpaa_get_settings,
+	.set_settings = dpaa_set_settings,
+	.get_drvinfo = dpaa_get_drvinfo,
+	.get_msglevel = dpaa_get_msglevel,
+	.set_msglevel = dpaa_set_msglevel,
+	.nway_reset = dpaa_nway_reset,
+	.get_pauseparam = dpaa_get_pauseparam,
+	.set_pauseparam = dpaa_set_pauseparam,
+	.get_link = ethtool_op_get_link,
+	.get_sset_count = dpaa_get_sset_count,
+	.get_ethtool_stats = dpaa_get_ethtool_stats,
+	.get_strings = dpaa_get_strings,
+};
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index c865135f3cb9..5ea740b4cf14 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -574,6 +574,8 @@ struct fec_enet_private {
 	unsigned int reload_period;
 	int pps_enable;
 	unsigned int next_counter;
+
+	u64 ethtool_stats[0];
 };
 
 void fec_ptp_init(struct platform_device *pdev);
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 5aa9d4ded214..38160c2bebcb 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1841,11 +1841,11 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable)
 		ret = clk_prepare_enable(fep->clk_ahb);
 		if (ret)
 			return ret;
-		if (fep->clk_enet_out) {
-			ret = clk_prepare_enable(fep->clk_enet_out);
-			if (ret)
-				goto failed_clk_enet_out;
-		}
+
+		ret = clk_prepare_enable(fep->clk_enet_out);
+		if (ret)
+			goto failed_clk_enet_out;
+
 		if (fep->clk_ptp) {
 			mutex_lock(&fep->ptp_clk_mutex);
 			ret = clk_prepare_enable(fep->clk_ptp);
@@ -1857,23 +1857,20 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable)
 			}
 			mutex_unlock(&fep->ptp_clk_mutex);
 		}
-		if (fep->clk_ref) {
-			ret = clk_prepare_enable(fep->clk_ref);
-			if (ret)
-				goto failed_clk_ref;
-		}
+
+		ret = clk_prepare_enable(fep->clk_ref);
+		if (ret)
+			goto failed_clk_ref;
 	} else {
 		clk_disable_unprepare(fep->clk_ahb);
-		if (fep->clk_enet_out)
-			clk_disable_unprepare(fep->clk_enet_out);
+		clk_disable_unprepare(fep->clk_enet_out);
 		if (fep->clk_ptp) {
 			mutex_lock(&fep->ptp_clk_mutex);
 			clk_disable_unprepare(fep->clk_ptp);
 			fep->ptp_clk_on = false;
 			mutex_unlock(&fep->ptp_clk_mutex);
 		}
-		if (fep->clk_ref)
-			clk_disable_unprepare(fep->clk_ref);
+		clk_disable_unprepare(fep->clk_ref);
 	}
 
 	return 0;
@@ -2313,14 +2310,26 @@ static const struct fec_stat {
 	{ "IEEE_rx_octets_ok", IEEE_R_OCTETS_OK },
 };
 
-static void fec_enet_get_ethtool_stats(struct net_device *dev,
-	struct ethtool_stats *stats, u64 *data)
+#define FEC_STATS_SIZE		(ARRAY_SIZE(fec_stats) * sizeof(u64))
+
+static void fec_enet_update_ethtool_stats(struct net_device *dev)
 {
 	struct fec_enet_private *fep = netdev_priv(dev);
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(fec_stats); i++)
-		data[i] = readl(fep->hwp + fec_stats[i].offset);
+		fep->ethtool_stats[i] = readl(fep->hwp + fec_stats[i].offset);
+}
+
+static void fec_enet_get_ethtool_stats(struct net_device *dev,
+				       struct ethtool_stats *stats, u64 *data)
+{
+	struct fec_enet_private *fep = netdev_priv(dev);
+
+	if (netif_running(dev))
+		fec_enet_update_ethtool_stats(dev);
+
+	memcpy(data, fep->ethtool_stats, FEC_STATS_SIZE);
 }
 
 static void fec_enet_get_strings(struct net_device *netdev,
@@ -2345,17 +2354,13 @@ static int fec_enet_get_sset_count(struct net_device *dev, int sset)
 		return -EOPNOTSUPP;
 	}
 }
-#endif /* !defined(CONFIG_M5272) */
 
-static int fec_enet_nway_reset(struct net_device *dev)
+#else	/* !defined(CONFIG_M5272) */
+#define FEC_STATS_SIZE	0
+static inline void fec_enet_update_ethtool_stats(struct net_device *dev)
 {
-	struct phy_device *phydev = dev->phydev;
-
-	if (!phydev)
-		return -ENODEV;
-
-	return genphy_restart_aneg(phydev);
 }
+#endif /* !defined(CONFIG_M5272) */
 
 /* ITR clock source is enet system clock (clk_ahb).
  * TCTT unit is cycle_ns * 64 cycle
@@ -2556,7 +2561,7 @@ static const struct ethtool_ops fec_enet_ethtool_ops = {
 	.get_drvinfo		= fec_enet_get_drvinfo,
 	.get_regs_len		= fec_enet_get_regs_len,
 	.get_regs		= fec_enet_get_regs,
-	.nway_reset		= fec_enet_nway_reset,
+	.nway_reset		= phy_ethtool_nway_reset,
 	.get_link		= ethtool_op_get_link,
 	.get_coalesce		= fec_enet_get_coalesce,
 	.set_coalesce		= fec_enet_set_coalesce,
@@ -2874,6 +2879,8 @@ fec_enet_close(struct net_device *ndev)
 	if (fep->quirks & FEC_QUIRK_ERR006687)
 		imx6q_cpuidle_fec_irqs_unused();
 
+	fec_enet_update_ethtool_stats(ndev);
+
 	fec_enet_clk_enable(ndev, false);
 	pinctrl_pm_select_sleep_state(&fep->pdev->dev);
 	pm_runtime_mark_last_busy(&fep->pdev->dev);
@@ -3055,7 +3062,6 @@ static const struct net_device_ops fec_netdev_ops = {
 	.ndo_stop		= fec_enet_close,
 	.ndo_start_xmit		= fec_enet_start_xmit,
 	.ndo_set_rx_mode	= set_multicast_list,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_tx_timeout		= fec_timeout,
 	.ndo_set_mac_address	= fec_set_mac_address,
@@ -3180,6 +3186,8 @@ static int fec_enet_init(struct net_device *ndev)
 
 	fec_restart(ndev);
 
+	fec_enet_update_ethtool_stats(ndev);
+
 	return 0;
 }
 
@@ -3278,8 +3286,8 @@ fec_probe(struct platform_device *pdev)
 	fec_enet_get_queue_num(pdev, &num_tx_qs, &num_rx_qs);
 
 	/* Init network device */
-	ndev = alloc_etherdev_mqs(sizeof(struct fec_enet_private),
-				  num_tx_qs, num_rx_qs);
+	ndev = alloc_etherdev_mqs(sizeof(struct fec_enet_private) +
+				  FEC_STATS_SIZE, num_tx_qs, num_rx_qs);
 	if (!ndev)
 		return -ENOMEM;
 
@@ -3475,6 +3483,8 @@ failed_regulator:
 failed_clk_ipg:
 	fec_enet_clk_enable(ndev, false);
 failed_clk:
+	if (of_phy_is_fixed_link(np))
+		of_phy_deregister_fixed_link(np);
 failed_phy:
 	of_node_put(phy_node);
 failed_ioremap:
@@ -3488,6 +3498,7 @@ fec_drv_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct fec_enet_private *fep = netdev_priv(ndev);
+	struct device_node *np = pdev->dev.of_node;
 
 	cancel_work_sync(&fep->tx_timeout_work);
 	fec_ptp_stop(pdev);
@@ -3495,6 +3506,8 @@ fec_drv_remove(struct platform_device *pdev)
 	fec_enet_mii_remove(fep);
 	if (fep->reg_phy)
 		regulator_disable(fep->reg_phy);
+	if (of_phy_is_fixed_link(np))
+		of_phy_deregister_fixed_link(np);
 	of_node_put(fep->phy_node);
 	free_netdev(ndev);
 
diff --git a/drivers/net/ethernet/freescale/fec_mpc52xx.c b/drivers/net/ethernet/freescale/fec_mpc52xx.c
index 446ae9d60c71..aa8cf5d2a53c 100644
--- a/drivers/net/ethernet/freescale/fec_mpc52xx.c
+++ b/drivers/net/ethernet/freescale/fec_mpc52xx.c
@@ -802,7 +802,6 @@ static const struct net_device_ops mpc52xx_fec_netdev_ops = {
 	.ndo_set_mac_address = mpc52xx_fec_set_mac_address,
 	.ndo_validate_addr = eth_validate_addr,
 	.ndo_do_ioctl = mpc52xx_fec_ioctl,
-	.ndo_change_mtu = eth_change_mtu,
 	.ndo_tx_timeout = mpc52xx_fec_tx_timeout,
 	.ndo_get_stats = mpc52xx_fec_get_stats,
 #ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c b/drivers/net/ethernet/freescale/fman/fman_memac.c
index 53ef51e3bd9e..71a5ded9d1de 100644
--- a/drivers/net/ethernet/freescale/fman/fman_memac.c
+++ b/drivers/net/ethernet/freescale/fman/fman_memac.c
@@ -1107,6 +1107,9 @@ int memac_free(struct fman_mac *memac)
 {
 	free_init_resources(memac);
 
+	if (memac->pcsphy)
+		put_device(&memac->pcsphy->mdio.dev);
+
 	kfree(memac->memac_drv_param);
 	kfree(memac);
 
diff --git a/drivers/net/ethernet/freescale/fman/mac.c b/drivers/net/ethernet/freescale/fman/mac.c
index 8fe6b3e253fa..69ca42ce5dd5 100644
--- a/drivers/net/ethernet/freescale/fman/mac.c
+++ b/drivers/net/ethernet/freescale/fman/mac.c
@@ -879,19 +879,25 @@ static int mac_probe(struct platform_device *_of_dev)
 
 		priv->fixed_link = kzalloc(sizeof(*priv->fixed_link),
 					   GFP_KERNEL);
-		if (!priv->fixed_link)
+		if (!priv->fixed_link) {
+			err = -ENOMEM;
 			goto _return_dev_set_drvdata;
+		}
 
 		priv->phy_node = of_node_get(mac_node);
 		phy = of_phy_find_device(priv->phy_node);
-		if (!phy)
+		if (!phy) {
+			err = -EINVAL;
 			goto _return_dev_set_drvdata;
+		}
 
 		priv->fixed_link->link = phy->link;
 		priv->fixed_link->speed = phy->speed;
 		priv->fixed_link->duplex = phy->duplex;
 		priv->fixed_link->pause = phy->pause;
 		priv->fixed_link->asym_pause = phy->asym_pause;
+
+		put_device(&phy->mdio.dev);
 	}
 
 	err = mac_dev->init(mac_dev);
diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
index dc120c148d97..d9f3a480ca1b 100644
--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
@@ -118,22 +118,22 @@ static int fs_enet_napi(struct napi_struct *napi, int budget)
 			  BD_ENET_TX_RL | BD_ENET_TX_UN | BD_ENET_TX_CSL)) {
 
 			if (sc & BD_ENET_TX_HB)	/* No heartbeat */
-				fep->stats.tx_heartbeat_errors++;
+				dev->stats.tx_heartbeat_errors++;
 			if (sc & BD_ENET_TX_LC)	/* Late collision */
-				fep->stats.tx_window_errors++;
+				dev->stats.tx_window_errors++;
 			if (sc & BD_ENET_TX_RL)	/* Retrans limit */
-				fep->stats.tx_aborted_errors++;
+				dev->stats.tx_aborted_errors++;
 			if (sc & BD_ENET_TX_UN)	/* Underrun */
-				fep->stats.tx_fifo_errors++;
+				dev->stats.tx_fifo_errors++;
 			if (sc & BD_ENET_TX_CSL)	/* Carrier lost */
-				fep->stats.tx_carrier_errors++;
+				dev->stats.tx_carrier_errors++;
 
 			if (sc & (BD_ENET_TX_LC | BD_ENET_TX_RL | BD_ENET_TX_UN)) {
-				fep->stats.tx_errors++;
+				dev->stats.tx_errors++;
 				do_restart = 1;
 			}
 		} else
-			fep->stats.tx_packets++;
+			dev->stats.tx_packets++;
 
 		if (sc & BD_ENET_TX_READY) {
 			dev_warn(fep->dev,
@@ -145,7 +145,7 @@ static int fs_enet_napi(struct napi_struct *napi, int budget)
 		 * but we eventually sent the packet OK.
 		 */
 		if (sc & BD_ENET_TX_DEF)
-			fep->stats.collisions++;
+			dev->stats.collisions++;
 
 		/* unmap */
 		if (fep->mapped_as_page[dirtyidx])
@@ -212,19 +212,19 @@ static int fs_enet_napi(struct napi_struct *napi, int budget)
 		 */
 		if (sc & (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_CL |
 			  BD_ENET_RX_NO | BD_ENET_RX_CR | BD_ENET_RX_OV)) {
-			fep->stats.rx_errors++;
+			dev->stats.rx_errors++;
 			/* Frame too long or too short. */
 			if (sc & (BD_ENET_RX_LG | BD_ENET_RX_SH))
-				fep->stats.rx_length_errors++;
+				dev->stats.rx_length_errors++;
 			/* Frame alignment */
 			if (sc & (BD_ENET_RX_NO | BD_ENET_RX_CL))
-				fep->stats.rx_frame_errors++;
+				dev->stats.rx_frame_errors++;
 			/* CRC Error */
 			if (sc & BD_ENET_RX_CR)
-				fep->stats.rx_crc_errors++;
+				dev->stats.rx_crc_errors++;
 			/* FIFO overrun */
 			if (sc & BD_ENET_RX_OV)
-				fep->stats.rx_crc_errors++;
+				dev->stats.rx_crc_errors++;
 
 			skbn = fep->rx_skbuff[curidx];
 		} else {
@@ -233,9 +233,9 @@ static int fs_enet_napi(struct napi_struct *napi, int budget)
 			/*
 			 * Process the incoming frame.
 			 */
-			fep->stats.rx_packets++;
+			dev->stats.rx_packets++;
 			pkt_len = CBDR_DATLEN(bdp) - 4;	/* remove CRC */
-			fep->stats.rx_bytes += pkt_len + 4;
+			dev->stats.rx_bytes += pkt_len + 4;
 
 			if (pkt_len <= fpi->rx_copybreak) {
 				/* +2 to make IP header L1 cache aligned */
@@ -277,7 +277,7 @@ static int fs_enet_napi(struct napi_struct *napi, int budget)
 				received++;
 				netif_receive_skb(skb);
 			} else {
-				fep->stats.rx_dropped++;
+				dev->stats.rx_dropped++;
 				skbn = skb;
 			}
 		}
@@ -543,7 +543,7 @@ static int fs_enet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	curidx = bdp - fep->tx_bd_base;
 
 	len = skb->len;
-	fep->stats.tx_bytes += len;
+	dev->stats.tx_bytes += len;
 	if (nr_frags)
 		len -= skb->data_len;
 	fep->tx_free -= nr_frags + 1;
@@ -619,7 +619,7 @@ static void fs_timeout(struct net_device *dev)
 	unsigned long flags;
 	int wake = 0;
 
-	fep->stats.tx_errors++;
+	dev->stats.tx_errors++;
 
 	spin_lock_irqsave(&fep->lock, flags);
 
@@ -774,12 +774,6 @@ static int fs_enet_close(struct net_device *dev)
 	return 0;
 }
 
-static struct net_device_stats *fs_enet_get_stats(struct net_device *dev)
-{
-	struct fs_enet_private *fep = netdev_priv(dev);
-	return &fep->stats;
-}
-
 /*************************************************************************/
 
 static void fs_get_drvinfo(struct net_device *dev,
@@ -813,11 +807,6 @@ static void fs_get_regs(struct net_device *dev, struct ethtool_regs *regs,
 		regs->version = 0;
 }
 
-static int fs_nway_reset(struct net_device *dev)
-{
-	return 0;
-}
-
 static u32 fs_get_msglevel(struct net_device *dev)
 {
 	struct fs_enet_private *fep = netdev_priv(dev);
@@ -871,7 +860,7 @@ static int fs_set_tunable(struct net_device *dev,
 static const struct ethtool_ops fs_ethtool_ops = {
 	.get_drvinfo = fs_get_drvinfo,
 	.get_regs_len = fs_get_regs_len,
-	.nway_reset = fs_nway_reset,
+	.nway_reset = phy_ethtool_nway_reset,
 	.get_link = ethtool_op_get_link,
 	.get_msglevel = fs_get_msglevel,
 	.set_msglevel = fs_set_msglevel,
@@ -905,14 +894,12 @@ extern void fs_mii_disconnect(struct net_device *dev);
 static const struct net_device_ops fs_enet_netdev_ops = {
 	.ndo_open		= fs_enet_open,
 	.ndo_stop		= fs_enet_close,
-	.ndo_get_stats		= fs_enet_get_stats,
 	.ndo_start_xmit		= fs_enet_start_xmit,
 	.ndo_tx_timeout		= fs_timeout,
 	.ndo_set_rx_mode	= fs_set_multicast_list,
 	.ndo_do_ioctl		= fs_ioctl,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= fs_enet_netpoll,
 #endif
@@ -980,7 +967,7 @@ static int fs_enet_probe(struct platform_device *ofdev)
 		err = clk_prepare_enable(clk);
 		if (err) {
 			ret = err;
-			goto out_free_fpi;
+			goto out_deregister_fixed_link;
 		}
 		fpi->clk_per = clk;
 	}
@@ -1061,6 +1048,9 @@ out_put:
 	of_node_put(fpi->phy_node);
 	if (fpi->clk_per)
 		clk_disable_unprepare(fpi->clk_per);
+out_deregister_fixed_link:
+	if (of_phy_is_fixed_link(ofdev->dev.of_node))
+		of_phy_deregister_fixed_link(ofdev->dev.of_node);
 out_free_fpi:
 	kfree(fpi);
 	return ret;
@@ -1079,6 +1069,8 @@ static int fs_enet_remove(struct platform_device *ofdev)
 	of_node_put(fep->fpi->phy_node);
 	if (fep->fpi->clk_per)
 		clk_disable_unprepare(fep->fpi->clk_per);
+	if (of_phy_is_fixed_link(ofdev->dev.of_node))
+		of_phy_deregister_fixed_link(ofdev->dev.of_node);
 	free_netdev(ndev);
 	return 0;
 }
diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h
index fee24c822fad..5ce516c8a62a 100644
--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h
@@ -137,7 +137,6 @@ struct fs_enet_private {
 	cbd_t __iomem *cur_rx;
 	cbd_t __iomem *cur_tx;
 	int tx_free;
-	struct net_device_stats stats;
 	struct timer_list phy_timer_list;
 	const struct phy_info *phy;
 	u32 msg_enable;
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index 4b4f5bc0e279..756f7e763d5f 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -1312,6 +1312,7 @@ static void gfar_init_addr_hash_table(struct gfar_private *priv)
  */
 static int gfar_probe(struct platform_device *ofdev)
 {
+	struct device_node *np = ofdev->dev.of_node;
 	struct net_device *dev = NULL;
 	struct gfar_private *priv = NULL;
 	int err = 0, i;
@@ -1338,7 +1339,10 @@ static int gfar_probe(struct platform_device *ofdev)
 
 	/* Fill in the dev structure */
 	dev->watchdog_timeo = TX_TIMEOUT;
+	/* MTU range: 50 - 9586 */
 	dev->mtu = 1500;
+	dev->min_mtu = 50;
+	dev->max_mtu = GFAR_JUMBO_FRAME_SIZE - ETH_HLEN;
 	dev->netdev_ops = &gfar_netdev_ops;
 	dev->ethtool_ops = &gfar_ethtool_ops;
 
@@ -1462,6 +1466,8 @@ static int gfar_probe(struct platform_device *ofdev)
 	return 0;
 
 register_fail:
+	if (of_phy_is_fixed_link(np))
+		of_phy_deregister_fixed_link(np);
 	unmap_group_regs(priv);
 	gfar_free_rx_queues(priv);
 	gfar_free_tx_queues(priv);
@@ -1474,11 +1480,16 @@ register_fail:
 static int gfar_remove(struct platform_device *ofdev)
 {
 	struct gfar_private *priv = platform_get_drvdata(ofdev);
+	struct device_node *np = ofdev->dev.of_node;
 
 	of_node_put(priv->phy_node);
 	of_node_put(priv->tbi_node);
 
 	unregister_netdev(priv->ndev);
+
+	if (of_phy_is_fixed_link(np))
+		of_phy_deregister_fixed_link(np);
+
 	unmap_group_regs(priv);
 	gfar_free_rx_queues(priv);
 	gfar_free_tx_queues(priv);
@@ -2592,12 +2603,6 @@ static int gfar_set_mac_address(struct net_device *dev)
 static int gfar_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct gfar_private *priv = netdev_priv(dev);
-	int frame_size = new_mtu + ETH_HLEN;
-
-	if ((frame_size < 64) || (frame_size > GFAR_JUMBO_FRAME_SIZE)) {
-		netif_err(priv, drv, dev, "Invalid MTU setting\n");
-		return -EINVAL;
-	}
 
 	while (test_and_set_bit_lock(GFAR_RESETTING, &priv->state))
 		cpu_relax();
diff --git a/drivers/net/ethernet/freescale/gianfar_ptp.c b/drivers/net/ethernet/freescale/gianfar_ptp.c
index 57798814160d..721be13081f9 100644
--- a/drivers/net/ethernet/freescale/gianfar_ptp.c
+++ b/drivers/net/ethernet/freescale/gianfar_ptp.c
@@ -72,7 +72,7 @@ struct gianfar_ptp_registers {
 /* Bit definitions for the TMR_CTRL register */
 #define ALM1P                 (1<<31) /* Alarm1 output polarity */
 #define ALM2P                 (1<<30) /* Alarm2 output polarity */
-#define FS                    (1<<28) /* FIPER start indication */
+#define FIPERST               (1<<28) /* FIPER start indication */
 #define PP1L                  (1<<27) /* Fiper1 pulse loopback mode enabled. */
 #define PP2L                  (1<<26) /* Fiper2 pulse loopback mode enabled. */
 #define TCLK_PERIOD_SHIFT     (16) /* 1588 timer reference clock period. */
@@ -280,21 +280,26 @@ static irqreturn_t isr(int irq, void *priv)
  * PTP clock operations
  */
 
-static int ptp_gianfar_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
+static int ptp_gianfar_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
 {
-	u64 adj;
-	u32 diff, tmr_add;
+	u64 adj, diff;
+	u32 tmr_add;
 	int neg_adj = 0;
 	struct etsects *etsects = container_of(ptp, struct etsects, caps);
 
-	if (ppb < 0) {
+	if (scaled_ppm < 0) {
 		neg_adj = 1;
-		ppb = -ppb;
+		scaled_ppm = -scaled_ppm;
 	}
 	tmr_add = etsects->tmr_add;
 	adj = tmr_add;
-	adj *= ppb;
-	diff = div_u64(adj, 1000000000ULL);
+
+	/* calculate diff as adj*(scaled_ppm/65536)/1000000
+	 * and round() to the nearest integer
+	 */
+	adj *= scaled_ppm;
+	diff = div_u64(adj, 8000000);
+	diff = (diff >> 13) + ((diff >> 12) & 1);
 
 	tmr_add = neg_adj ? tmr_add - diff : tmr_add + diff;
 
@@ -415,7 +420,7 @@ static struct ptp_clock_info ptp_gianfar_caps = {
 	.n_per_out	= 0,
 	.n_pins		= 0,
 	.pps		= 1,
-	.adjfreq	= ptp_gianfar_adjfreq,
+	.adjfine	= ptp_gianfar_adjfine,
 	.adjtime	= ptp_gianfar_adjtime,
 	.gettime64	= ptp_gianfar_gettime,
 	.settime64	= ptp_gianfar_settime,
@@ -502,7 +507,7 @@ static int gianfar_ptp_probe(struct platform_device *dev)
 	gfar_write(&etsects->regs->tmr_fiper1, etsects->tmr_fiper1);
 	gfar_write(&etsects->regs->tmr_fiper2, etsects->tmr_fiper2);
 	set_alarm(etsects);
-	gfar_write(&etsects->regs->tmr_ctrl,   tmr_ctrl|FS|RTPE|TE|FRD);
+	gfar_write(&etsects->regs->tmr_ctrl,   tmr_ctrl|FIPERST|RTPE|TE|FRD);
 
 	spin_unlock_irqrestore(&etsects->lock, flags);
 
diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c
index 186ef8f16c80..53c5fcf1436c 100644
--- a/drivers/net/ethernet/freescale/ucc_geth.c
+++ b/drivers/net/ethernet/freescale/ucc_geth.c
@@ -3681,7 +3681,6 @@ static const struct net_device_ops ucc_geth_netdev_ops = {
 	.ndo_start_xmit		= ucc_geth_start_xmit,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= ucc_geth_set_mac_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_rx_mode	= ucc_geth_set_multi,
 	.ndo_tx_timeout		= ucc_geth_timeout,
 	.ndo_do_ioctl		= ucc_geth_ioctl,
@@ -3868,9 +3867,8 @@ static int ucc_geth_probe(struct platform_device* ofdev)
 	dev = alloc_etherdev(sizeof(*ugeth));
 
 	if (dev == NULL) {
-		of_node_put(ug_info->tbi_node);
-		of_node_put(ug_info->phy_node);
-		return -ENOMEM;
+		err = -ENOMEM;
+		goto err_deregister_fixed_link;
 	}
 
 	ugeth = netdev_priv(dev);
@@ -3907,10 +3905,7 @@ static int ucc_geth_probe(struct platform_device* ofdev)
 		if (netif_msg_probe(ugeth))
 			pr_err("%s: Cannot register net device, aborting\n",
 			       dev->name);
-		free_netdev(dev);
-		of_node_put(ug_info->tbi_node);
-		of_node_put(ug_info->phy_node);
-		return err;
+		goto err_free_netdev;
 	}
 
 	mac_addr = of_get_mac_address(np);
@@ -3923,16 +3918,29 @@ static int ucc_geth_probe(struct platform_device* ofdev)
 	ugeth->node = np;
 
 	return 0;
+
+err_free_netdev:
+	free_netdev(dev);
+err_deregister_fixed_link:
+	if (of_phy_is_fixed_link(np))
+		of_phy_deregister_fixed_link(np);
+	of_node_put(ug_info->tbi_node);
+	of_node_put(ug_info->phy_node);
+
+	return err;
 }
 
 static int ucc_geth_remove(struct platform_device* ofdev)
 {
 	struct net_device *dev = platform_get_drvdata(ofdev);
 	struct ucc_geth_private *ugeth = netdev_priv(dev);
+	struct device_node *np = ofdev->dev.of_node;
 
 	unregister_netdev(dev);
 	free_netdev(dev);
 	ucc_geth_memclean(ugeth);
+	if (of_phy_is_fixed_link(np))
+		of_phy_deregister_fixed_link(np);
 	of_node_put(ugeth->ug_info->tbi_node);
 	of_node_put(ugeth->ug_info->phy_node);
 
diff --git a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c
index 812a968a78e9..8ba636f61b50 100644
--- a/drivers/net/ethernet/freescale/ucc_geth_ethtool.c
+++ b/drivers/net/ethernet/freescale/ucc_geth_ethtool.c
@@ -332,13 +332,6 @@ static void uec_get_ethtool_stats(struct net_device *netdev,
 	}
 }
 
-static int uec_nway_reset(struct net_device *netdev)
-{
-	struct ucc_geth_private *ugeth = netdev_priv(netdev);
-
-	return phy_start_aneg(ugeth->phydev);
-}
-
 /* Report driver information */
 static void
 uec_get_drvinfo(struct net_device *netdev,
@@ -394,7 +387,7 @@ static const struct ethtool_ops uec_ethtool_ops = {
 	.get_regs               = uec_get_regs,
 	.get_msglevel           = uec_get_msglevel,
 	.set_msglevel           = uec_set_msglevel,
-	.nway_reset             = uec_nway_reset,
+	.nway_reset             = phy_ethtool_nway_reset,
 	.get_link               = ethtool_op_get_link,
 	.get_ringparam          = uec_get_ringparam,
 	.set_ringparam          = uec_set_ringparam,
diff --git a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
index 399cfd217288..51c4abc51bf4 100644
--- a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
+++ b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c
@@ -225,7 +225,6 @@ static const struct net_device_ops fjn_netdev_ops = {
 	.ndo_tx_timeout 	= fjn_tx_timeout,
 	.ndo_set_config 	= fjn_config,
 	.ndo_set_rx_mode	= set_rx_mode,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 };
diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c
index 39778892b3b3..854befde0a08 100644
--- a/drivers/net/ethernet/hisilicon/hip04_eth.c
+++ b/drivers/net/ethernet/hisilicon/hip04_eth.c
@@ -769,7 +769,6 @@ static const struct net_device_ops hip04_netdev_ops = {
 	.ndo_set_mac_address	= hip04_set_mac_address,
 	.ndo_tx_timeout         = hip04_timeout,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 };
 
 static int hip04_alloc_ring(struct net_device *ndev, struct device *d)
@@ -898,7 +897,6 @@ static int hip04_mac_probe(struct platform_device *pdev)
 
 	INIT_WORK(&priv->tx_timeout_task, hip04_tx_timeout_task);
 
-	ether_setup(ndev);
 	ndev->netdev_ops = &hip04_netdev_ops;
 	ndev->ethtool_ops = &hip04_ethtool_ops;
 	ndev->watchdog_timeo = TX_TIMEOUT;
diff --git a/drivers/net/ethernet/hisilicon/hisi_femac.c b/drivers/net/ethernet/hisilicon/hisi_femac.c
index ced185962ef8..49863068c59e 100644
--- a/drivers/net/ethernet/hisilicon/hisi_femac.c
+++ b/drivers/net/ethernet/hisilicon/hisi_femac.c
@@ -712,7 +712,6 @@ static const struct net_device_ops hisi_femac_netdev_ops = {
 	.ndo_do_ioctl		= hisi_femac_net_ioctl,
 	.ndo_set_mac_address	= hisi_femac_set_mac_address,
 	.ndo_set_rx_mode	= hisi_femac_net_set_rx_mode,
-	.ndo_change_mtu		= eth_change_mtu,
 };
 
 static void hisi_femac_core_reset(struct hisi_femac_priv *priv)
diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
index e69a6bed31a9..ee7e9ce2f5b3 100644
--- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
+++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c
@@ -11,8 +11,10 @@
 #include <linux/interrupt.h>
 #include <linux/etherdevice.h>
 #include <linux/platform_device.h>
+#include <linux/of_device.h>
 #include <linux/of_net.h>
 #include <linux/of_mdio.h>
+#include <linux/reset.h>
 #include <linux/clk.h>
 #include <linux/circ_buf.h>
 
@@ -183,12 +185,28 @@
 #define DESC_DATA_LEN_OFF		16
 #define DESC_BUFF_LEN_OFF		0
 #define DESC_DATA_MASK			0x7ff
+#define DESC_SG				BIT(30)
+#define DESC_FRAGS_NUM_OFF		11
 
 /* DMA descriptor ring helpers */
 #define dma_ring_incr(n, s)		(((n) + 1) & ((s) - 1))
 #define dma_cnt(n)			((n) >> 5)
 #define dma_byte(n)			((n) << 5)
 
+#define HW_CAP_TSO			BIT(0)
+#define GEMAC_V1			0
+#define GEMAC_V2			(GEMAC_V1 | HW_CAP_TSO)
+#define HAS_CAP_TSO(hw_cap)		((hw_cap) & HW_CAP_TSO)
+
+#define PHY_RESET_DELAYS_PROPERTY	"hisilicon,phy-reset-delays-us"
+
+enum phy_reset_delays {
+	PRE_DELAY,
+	PULSE,
+	POST_DELAY,
+	DELAYS_NUM,
+};
+
 struct hix5hd2_desc {
 	__le32 buff_addr;
 	__le32 cmd;
@@ -201,6 +219,27 @@ struct hix5hd2_desc_sw {
 	unsigned int	size;
 };
 
+struct hix5hd2_sg_desc_ring {
+	struct sg_desc *desc;
+	dma_addr_t phys_addr;
+};
+
+struct frags_info {
+	__le32 addr;
+	__le32 size;
+};
+
+/* hardware supported max skb frags num */
+#define SG_MAX_SKB_FRAGS	17
+struct sg_desc {
+	__le32 total_len;
+	__le32 resvd0;
+	__le32 linear_addr;
+	__le32 linear_len;
+	/* reserve one more frags for memory alignment */
+	struct frags_info frags[SG_MAX_SKB_FRAGS + 1];
+};
+
 #define QUEUE_NUMS	4
 struct hix5hd2_priv {
 	struct hix5hd2_desc_sw pool[QUEUE_NUMS];
@@ -208,6 +247,7 @@ struct hix5hd2_priv {
 #define rx_bq		pool[1]
 #define tx_bq		pool[2]
 #define tx_rq		pool[3]
+	struct hix5hd2_sg_desc_ring tx_ring;
 
 	void __iomem *base;
 	void __iomem *ctrl_base;
@@ -221,15 +261,30 @@ struct hix5hd2_priv {
 	struct device_node *phy_node;
 	phy_interface_t	phy_mode;
 
+	unsigned long hw_cap;
 	unsigned int speed;
 	unsigned int duplex;
 
-	struct clk *clk;
+	struct clk *mac_core_clk;
+	struct clk *mac_ifc_clk;
+	struct reset_control *mac_core_rst;
+	struct reset_control *mac_ifc_rst;
+	struct reset_control *phy_rst;
+	u32 phy_reset_delays[DELAYS_NUM];
 	struct mii_bus *bus;
 	struct napi_struct napi;
 	struct work_struct tx_timeout_task;
 };
 
+static inline void hix5hd2_mac_interface_reset(struct hix5hd2_priv *priv)
+{
+	if (!priv->mac_ifc_rst)
+		return;
+
+	reset_control_assert(priv->mac_ifc_rst);
+	reset_control_deassert(priv->mac_ifc_rst);
+}
+
 static void hix5hd2_config_port(struct net_device *dev, u32 speed, u32 duplex)
 {
 	struct hix5hd2_priv *priv = netdev_priv(dev);
@@ -262,6 +317,7 @@ static void hix5hd2_config_port(struct net_device *dev, u32 speed, u32 duplex)
 	if (duplex)
 		val |= GMAC_FULL_DUPLEX;
 	writel_relaxed(val, priv->ctrl_base);
+	hix5hd2_mac_interface_reset(priv);
 
 	writel_relaxed(BIT_MODE_CHANGE_EN, priv->base + MODE_CHANGE_EN);
 	if (speed == SPEED_1000)
@@ -511,6 +567,27 @@ next:
 	return num;
 }
 
+static void hix5hd2_clean_sg_desc(struct hix5hd2_priv *priv,
+				  struct sk_buff *skb, u32 pos)
+{
+	struct sg_desc *desc;
+	dma_addr_t addr;
+	u32 len;
+	int i;
+
+	desc = priv->tx_ring.desc + pos;
+
+	addr = le32_to_cpu(desc->linear_addr);
+	len = le32_to_cpu(desc->linear_len);
+	dma_unmap_single(priv->dev, addr, len, DMA_TO_DEVICE);
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		addr = le32_to_cpu(desc->frags[i].addr);
+		len = le32_to_cpu(desc->frags[i].size);
+		dma_unmap_page(priv->dev, addr, len, DMA_TO_DEVICE);
+	}
+}
+
 static void hix5hd2_xmit_reclaim(struct net_device *dev)
 {
 	struct sk_buff *skb;
@@ -538,8 +615,15 @@ static void hix5hd2_xmit_reclaim(struct net_device *dev)
 		pkts_compl++;
 		bytes_compl += skb->len;
 		desc = priv->tx_rq.desc + pos;
-		addr = le32_to_cpu(desc->buff_addr);
-		dma_unmap_single(priv->dev, addr, skb->len, DMA_TO_DEVICE);
+
+		if (skb_shinfo(skb)->nr_frags) {
+			hix5hd2_clean_sg_desc(priv, skb, pos);
+		} else {
+			addr = le32_to_cpu(desc->buff_addr);
+			dma_unmap_single(priv->dev, addr, skb->len,
+					 DMA_TO_DEVICE);
+		}
+
 		priv->tx_skb[pos] = NULL;
 		dev_consume_skb_any(skb);
 		pos = dma_ring_incr(pos, TX_DESC_NUM);
@@ -600,12 +684,66 @@ static irqreturn_t hix5hd2_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+static u32 hix5hd2_get_desc_cmd(struct sk_buff *skb, unsigned long hw_cap)
+{
+	u32 cmd = 0;
+
+	if (HAS_CAP_TSO(hw_cap)) {
+		if (skb_shinfo(skb)->nr_frags)
+			cmd |= DESC_SG;
+		cmd |= skb_shinfo(skb)->nr_frags << DESC_FRAGS_NUM_OFF;
+	} else {
+		cmd |= DESC_FL_FULL |
+			((skb->len & DESC_DATA_MASK) << DESC_BUFF_LEN_OFF);
+	}
+
+	cmd |= (skb->len & DESC_DATA_MASK) << DESC_DATA_LEN_OFF;
+	cmd |= DESC_VLD_BUSY;
+
+	return cmd;
+}
+
+static int hix5hd2_fill_sg_desc(struct hix5hd2_priv *priv,
+				struct sk_buff *skb, u32 pos)
+{
+	struct sg_desc *desc;
+	dma_addr_t addr;
+	int ret;
+	int i;
+
+	desc = priv->tx_ring.desc + pos;
+
+	desc->total_len = cpu_to_le32(skb->len);
+	addr = dma_map_single(priv->dev, skb->data, skb_headlen(skb),
+			      DMA_TO_DEVICE);
+	if (unlikely(dma_mapping_error(priv->dev, addr)))
+		return -EINVAL;
+	desc->linear_addr = cpu_to_le32(addr);
+	desc->linear_len = cpu_to_le32(skb_headlen(skb));
+
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+		int len = frag->size;
+
+		addr = skb_frag_dma_map(priv->dev, frag, 0, len, DMA_TO_DEVICE);
+		ret = dma_mapping_error(priv->dev, addr);
+		if (unlikely(ret))
+			return -EINVAL;
+		desc->frags[i].addr = cpu_to_le32(addr);
+		desc->frags[i].size = cpu_to_le32(len);
+	}
+
+	return 0;
+}
+
 static int hix5hd2_net_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct hix5hd2_priv *priv = netdev_priv(dev);
 	struct hix5hd2_desc *desc;
 	dma_addr_t addr;
 	u32 pos;
+	u32 cmd;
+	int ret;
 
 	/* software write pointer */
 	pos = dma_cnt(readl_relaxed(priv->base + TX_BQ_WR_ADDR));
@@ -616,18 +754,31 @@ static int hix5hd2_net_xmit(struct sk_buff *skb, struct net_device *dev)
 		return NETDEV_TX_BUSY;
 	}
 
-	addr = dma_map_single(priv->dev, skb->data, skb->len, DMA_TO_DEVICE);
-	if (dma_mapping_error(priv->dev, addr)) {
-		dev_kfree_skb_any(skb);
-		return NETDEV_TX_OK;
-	}
-
 	desc = priv->tx_bq.desc + pos;
+
+	cmd = hix5hd2_get_desc_cmd(skb, priv->hw_cap);
+	desc->cmd = cpu_to_le32(cmd);
+
+	if (skb_shinfo(skb)->nr_frags) {
+		ret = hix5hd2_fill_sg_desc(priv, skb, pos);
+		if (unlikely(ret)) {
+			dev_kfree_skb_any(skb);
+			dev->stats.tx_dropped++;
+			return NETDEV_TX_OK;
+		}
+		addr = priv->tx_ring.phys_addr + pos * sizeof(struct sg_desc);
+	} else {
+		addr = dma_map_single(priv->dev, skb->data, skb->len,
+				      DMA_TO_DEVICE);
+		if (unlikely(dma_mapping_error(priv->dev, addr))) {
+			dev_kfree_skb_any(skb);
+			dev->stats.tx_dropped++;
+			return NETDEV_TX_OK;
+		}
+	}
 	desc->buff_addr = cpu_to_le32(addr);
+
 	priv->tx_skb[pos] = skb;
-	desc->cmd = cpu_to_le32(DESC_VLD_BUSY | DESC_FL_FULL |
-				(skb->len & DESC_DATA_MASK) << DESC_DATA_LEN_OFF |
-				(skb->len & DESC_DATA_MASK) << DESC_BUFF_LEN_OFF);
 
 	/* ensure desc updated */
 	wmb();
@@ -681,16 +832,26 @@ static int hix5hd2_net_open(struct net_device *dev)
 	struct phy_device *phy;
 	int ret;
 
-	ret = clk_prepare_enable(priv->clk);
+	ret = clk_prepare_enable(priv->mac_core_clk);
+	if (ret < 0) {
+		netdev_err(dev, "failed to enable mac core clk %d\n", ret);
+		return ret;
+	}
+
+	ret = clk_prepare_enable(priv->mac_ifc_clk);
 	if (ret < 0) {
-		netdev_err(dev, "failed to enable clk %d\n", ret);
+		clk_disable_unprepare(priv->mac_core_clk);
+		netdev_err(dev, "failed to enable mac ifc clk %d\n", ret);
 		return ret;
 	}
 
 	phy = of_phy_connect(dev, priv->phy_node,
 			     &hix5hd2_adjust_link, 0, priv->phy_mode);
-	if (!phy)
+	if (!phy) {
+		clk_disable_unprepare(priv->mac_ifc_clk);
+		clk_disable_unprepare(priv->mac_core_clk);
 		return -ENODEV;
+	}
 
 	phy_start(phy);
 	hix5hd2_hw_init(priv);
@@ -721,7 +882,8 @@ static int hix5hd2_net_close(struct net_device *dev)
 		phy_disconnect(dev->phydev);
 	}
 
-	clk_disable_unprepare(priv->clk);
+	clk_disable_unprepare(priv->mac_ifc_clk);
+	clk_disable_unprepare(priv->mac_core_clk);
 
 	return 0;
 }
@@ -862,10 +1024,82 @@ error_free_pool:
 	return -ENOMEM;
 }
 
+static int hix5hd2_init_sg_desc_queue(struct hix5hd2_priv *priv)
+{
+	struct sg_desc *desc;
+	dma_addr_t phys_addr;
+
+	desc = (struct sg_desc *)dma_alloc_coherent(priv->dev,
+				TX_DESC_NUM * sizeof(struct sg_desc),
+				&phys_addr, GFP_KERNEL);
+	if (!desc)
+		return -ENOMEM;
+
+	priv->tx_ring.desc = desc;
+	priv->tx_ring.phys_addr = phys_addr;
+
+	return 0;
+}
+
+static void hix5hd2_destroy_sg_desc_queue(struct hix5hd2_priv *priv)
+{
+	if (priv->tx_ring.desc) {
+		dma_free_coherent(priv->dev,
+				  TX_DESC_NUM * sizeof(struct sg_desc),
+				  priv->tx_ring.desc, priv->tx_ring.phys_addr);
+		priv->tx_ring.desc = NULL;
+	}
+}
+
+static inline void hix5hd2_mac_core_reset(struct hix5hd2_priv *priv)
+{
+	if (!priv->mac_core_rst)
+		return;
+
+	reset_control_assert(priv->mac_core_rst);
+	reset_control_deassert(priv->mac_core_rst);
+}
+
+static void hix5hd2_sleep_us(u32 time_us)
+{
+	u32 time_ms;
+
+	if (!time_us)
+		return;
+
+	time_ms = DIV_ROUND_UP(time_us, 1000);
+	if (time_ms < 20)
+		usleep_range(time_us, time_us + 500);
+	else
+		msleep(time_ms);
+}
+
+static void hix5hd2_phy_reset(struct hix5hd2_priv *priv)
+{
+	/* To make sure PHY hardware reset success,
+	 * we must keep PHY in deassert state first and
+	 * then complete the hardware reset operation
+	 */
+	reset_control_deassert(priv->phy_rst);
+	hix5hd2_sleep_us(priv->phy_reset_delays[PRE_DELAY]);
+
+	reset_control_assert(priv->phy_rst);
+	/* delay some time to ensure reset ok,
+	 * this depends on PHY hardware feature
+	 */
+	hix5hd2_sleep_us(priv->phy_reset_delays[PULSE]);
+	reset_control_deassert(priv->phy_rst);
+	/* delay some time to ensure later MDIO access */
+	hix5hd2_sleep_us(priv->phy_reset_delays[POST_DELAY]);
+}
+
+static const struct of_device_id hix5hd2_of_match[];
+
 static int hix5hd2_dev_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct device_node *node = dev->of_node;
+	const struct of_device_id *of_id = NULL;
 	struct net_device *ndev;
 	struct hix5hd2_priv *priv;
 	struct resource *res;
@@ -883,6 +1117,13 @@ static int hix5hd2_dev_probe(struct platform_device *pdev)
 	priv->dev = dev;
 	priv->netdev = ndev;
 
+	of_id = of_match_device(hix5hd2_of_match, dev);
+	if (!of_id) {
+		ret = -EINVAL;
+		goto out_free_netdev;
+	}
+	priv->hw_cap = (unsigned long)of_id->data;
+
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	priv->base = devm_ioremap_resource(dev, res);
 	if (IS_ERR(priv->base)) {
@@ -897,23 +1138,55 @@ static int hix5hd2_dev_probe(struct platform_device *pdev)
 		goto out_free_netdev;
 	}
 
-	priv->clk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(priv->clk)) {
-		netdev_err(ndev, "failed to get clk\n");
+	priv->mac_core_clk = devm_clk_get(&pdev->dev, "mac_core");
+	if (IS_ERR(priv->mac_core_clk)) {
+		netdev_err(ndev, "failed to get mac core clk\n");
 		ret = -ENODEV;
 		goto out_free_netdev;
 	}
 
-	ret = clk_prepare_enable(priv->clk);
+	ret = clk_prepare_enable(priv->mac_core_clk);
 	if (ret < 0) {
-		netdev_err(ndev, "failed to enable clk %d\n", ret);
+		netdev_err(ndev, "failed to enable mac core clk %d\n", ret);
 		goto out_free_netdev;
 	}
 
+	priv->mac_ifc_clk = devm_clk_get(&pdev->dev, "mac_ifc");
+	if (IS_ERR(priv->mac_ifc_clk))
+		priv->mac_ifc_clk = NULL;
+
+	ret = clk_prepare_enable(priv->mac_ifc_clk);
+	if (ret < 0) {
+		netdev_err(ndev, "failed to enable mac ifc clk %d\n", ret);
+		goto out_disable_mac_core_clk;
+	}
+
+	priv->mac_core_rst = devm_reset_control_get(dev, "mac_core");
+	if (IS_ERR(priv->mac_core_rst))
+		priv->mac_core_rst = NULL;
+	hix5hd2_mac_core_reset(priv);
+
+	priv->mac_ifc_rst = devm_reset_control_get(dev, "mac_ifc");
+	if (IS_ERR(priv->mac_ifc_rst))
+		priv->mac_ifc_rst = NULL;
+
+	priv->phy_rst = devm_reset_control_get(dev, "phy");
+	if (IS_ERR(priv->phy_rst)) {
+		priv->phy_rst = NULL;
+	} else {
+		ret = of_property_read_u32_array(node,
+						 PHY_RESET_DELAYS_PROPERTY,
+						 priv->phy_reset_delays,
+						 DELAYS_NUM);
+		if (ret)
+			goto out_disable_clk;
+		hix5hd2_phy_reset(priv);
+	}
+
 	bus = mdiobus_alloc();
 	if (bus == NULL) {
 		ret = -ENOMEM;
-		goto out_free_netdev;
+		goto out_disable_clk;
 	}
 
 	bus->priv = priv;
@@ -972,22 +1245,38 @@ static int hix5hd2_dev_probe(struct platform_device *pdev)
 	ndev->ethtool_ops = &hix5hd2_ethtools_ops;
 	SET_NETDEV_DEV(ndev, dev);
 
+	if (HAS_CAP_TSO(priv->hw_cap))
+		ndev->hw_features |= NETIF_F_SG;
+
+	ndev->features |= ndev->hw_features | NETIF_F_HIGHDMA;
+	ndev->vlan_features |= ndev->features;
+
 	ret = hix5hd2_init_hw_desc_queue(priv);
 	if (ret)
 		goto out_phy_node;
 
 	netif_napi_add(ndev, &priv->napi, hix5hd2_poll, NAPI_POLL_WEIGHT);
+
+	if (HAS_CAP_TSO(priv->hw_cap)) {
+		ret = hix5hd2_init_sg_desc_queue(priv);
+		if (ret)
+			goto out_destroy_queue;
+	}
+
 	ret = register_netdev(priv->netdev);
 	if (ret) {
 		netdev_err(ndev, "register_netdev failed!");
 		goto out_destroy_queue;
 	}
 
-	clk_disable_unprepare(priv->clk);
+	clk_disable_unprepare(priv->mac_ifc_clk);
+	clk_disable_unprepare(priv->mac_core_clk);
 
 	return ret;
 
 out_destroy_queue:
+	if (HAS_CAP_TSO(priv->hw_cap))
+		hix5hd2_destroy_sg_desc_queue(priv);
 	netif_napi_del(&priv->napi);
 	hix5hd2_destroy_hw_desc_queue(priv);
 out_phy_node:
@@ -996,6 +1285,10 @@ err_mdiobus:
 	mdiobus_unregister(bus);
 err_free_mdio:
 	mdiobus_free(bus);
+out_disable_clk:
+	clk_disable_unprepare(priv->mac_ifc_clk);
+out_disable_mac_core_clk:
+	clk_disable_unprepare(priv->mac_core_clk);
 out_free_netdev:
 	free_netdev(ndev);
 
@@ -1012,6 +1305,8 @@ static int hix5hd2_dev_remove(struct platform_device *pdev)
 	mdiobus_unregister(priv->bus);
 	mdiobus_free(priv->bus);
 
+	if (HAS_CAP_TSO(priv->hw_cap))
+		hix5hd2_destroy_sg_desc_queue(priv);
 	hix5hd2_destroy_hw_desc_queue(priv);
 	of_node_put(priv->phy_node);
 	cancel_work_sync(&priv->tx_timeout_task);
@@ -1021,7 +1316,10 @@ static int hix5hd2_dev_remove(struct platform_device *pdev)
 }
 
 static const struct of_device_id hix5hd2_of_match[] = {
-	{.compatible = "hisilicon,hix5hd2-gmac",},
+	{ .compatible = "hisilicon,hisi-gemac-v1", .data = (void *)GEMAC_V1 },
+	{ .compatible = "hisilicon,hisi-gemac-v2", .data = (void *)GEMAC_V2 },
+	{ .compatible = "hisilicon,hix5hd2-gemac", .data = (void *)GEMAC_V1 },
+	{ .compatible = "hisilicon,hi3798cv200-gemac", .data = (void *)GEMAC_V2 },
 	{},
 };
 
@@ -1029,7 +1327,7 @@ MODULE_DEVICE_TABLE(of, hix5hd2_of_match);
 
 static struct platform_driver hix5hd2_dev_driver = {
 	.driver = {
-		.name = "hix5hd2-gmac",
+		.name = "hisi-gemac",
 		.of_match_table = hix5hd2_of_match,
 	},
 	.probe = hix5hd2_dev_probe,
@@ -1038,6 +1336,6 @@ static struct platform_driver hix5hd2_dev_driver = {
 
 module_platform_driver(hix5hd2_dev_driver);
 
-MODULE_DESCRIPTION("HISILICON HIX5HD2 Ethernet driver");
+MODULE_DESCRIPTION("HISILICON Gigabit Ethernet MAC driver");
 MODULE_LICENSE("GPL v2");
-MODULE_ALIAS("platform:hix5hd2-gmac");
+MODULE_ALIAS("platform:hisi-gemac");
diff --git a/drivers/net/ethernet/hisilicon/hns/hnae.h b/drivers/net/ethernet/hisilicon/hns/hnae.h
index e093cbf26c8c..8016854796fb 100644
--- a/drivers/net/ethernet/hisilicon/hns/hnae.h
+++ b/drivers/net/ethernet/hisilicon/hns/hnae.h
@@ -99,6 +99,8 @@ enum hnae_led_state {
 #define HNS_RX_FLAG_L3ID_IPV6 0x1
 #define HNS_RX_FLAG_L4ID_UDP 0x0
 #define HNS_RX_FLAG_L4ID_TCP 0x1
+#define HNS_RX_FLAG_L4ID_SCTP 0x3
+
 
 #define HNS_TXD_ASID_S 0
 #define HNS_TXD_ASID_M (0xff << HNS_TXD_ASID_S)
@@ -426,8 +428,14 @@ enum hnae_media_type {
  *   get mac address
  * set_mac_addr()
  *   set mac address
+ * clr_mc_addr()
+ *   clear mcast tcam table
  * set_mc_addr()
  *   set multicast mode
+ * add_uc_addr()
+ *   add ucast address
+ * rm_uc_addr()
+ *   remove ucast address
  * set_mtu()
  *   set mtu
  * update_stats()
@@ -488,6 +496,11 @@ struct hnae_ae_ops {
 	void (*set_promisc_mode)(struct hnae_handle *handle, u32 en);
 	int (*get_mac_addr)(struct hnae_handle *handle, void **p);
 	int (*set_mac_addr)(struct hnae_handle *handle, void *p);
+	int (*add_uc_addr)(struct hnae_handle *handle,
+			   const unsigned char *addr);
+	int (*rm_uc_addr)(struct hnae_handle *handle,
+			  const unsigned char *addr);
+	int (*clr_mc_addr)(struct hnae_handle *handle);
 	int (*set_mc_addr)(struct hnae_handle *handle, void *addr);
 	int (*set_mtu)(struct hnae_handle *handle, int new_mtu);
 	void (*set_tso_stats)(struct hnae_handle *handle, int enable);
@@ -590,7 +603,7 @@ static inline int hnae_alloc_buffer_attach(struct hnae_ring *ring, int i)
 	if (ret)
 		return ret;
 
-	ring->desc[i].addr = (__le64)ring->desc_cb[i].dma;
+	ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma);
 
 	return 0;
 }
@@ -621,14 +634,14 @@ static inline void hnae_replace_buffer(struct hnae_ring *ring, int i,
 
 	bops->unmap_buffer(ring, &ring->desc_cb[i]);
 	ring->desc_cb[i] = *res_cb;
-	ring->desc[i].addr = (__le64)ring->desc_cb[i].dma;
+	ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma);
 	ring->desc[i].rx.ipoff_bnum_pid_flag = 0;
 }
 
 static inline void hnae_reuse_buffer(struct hnae_ring *ring, int i)
 {
 	ring->desc_cb[i].reuse_flag = 0;
-	ring->desc[i].addr = (__le64)(ring->desc_cb[i].dma
+	ring->desc[i].addr = cpu_to_le64(ring->desc_cb[i].dma
 		+ ring->desc_cb[i].page_offset);
 	ring->desc[i].rx.ipoff_bnum_pid_flag = 0;
 }
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
index 2d0cb609adc3..0a9cdf00b31a 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ae_adapt.c
@@ -18,9 +18,6 @@
 #include "hns_dsaf_rcb.h"
 
 #define AE_NAME_PORT_ID_IDX 6
-#define ETH_STATIC_REG	 1
-#define ETH_DUMP_REG	 5
-#define ETH_GSTRING_LEN	32
 
 static struct hns_mac_cb *hns_get_mac_cb(struct hnae_handle *handle)
 {
@@ -202,6 +199,28 @@ static int hns_ae_set_mac_address(struct hnae_handle *handle, void *p)
 	return 0;
 }
 
+static int hns_ae_add_uc_address(struct hnae_handle *handle,
+				 const unsigned char *addr)
+{
+	struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle);
+
+	if (mac_cb->mac_type != HNAE_PORT_SERVICE)
+		return -ENOSPC;
+
+	return hns_mac_add_uc_addr(mac_cb, handle->vf_id, addr);
+}
+
+static int hns_ae_rm_uc_address(struct hnae_handle *handle,
+				const unsigned char *addr)
+{
+	struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle);
+
+	if (mac_cb->mac_type != HNAE_PORT_SERVICE)
+		return -ENOSPC;
+
+	return hns_mac_rm_uc_addr(mac_cb, handle->vf_id, addr);
+}
+
 static int hns_ae_set_multicast_one(struct hnae_handle *handle, void *addr)
 {
 	int ret;
@@ -235,6 +254,16 @@ static int hns_ae_set_multicast_one(struct hnae_handle *handle, void *addr)
 	return ret;
 }
 
+static int hns_ae_clr_multicast(struct hnae_handle *handle)
+{
+	struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle);
+
+	if (mac_cb->mac_type != HNAE_PORT_SERVICE)
+		return 0;
+
+	return hns_mac_clr_multicast(mac_cb, handle->vf_id);
+}
+
 static int hns_ae_set_mtu(struct hnae_handle *handle, int new_mtu)
 {
 	struct hns_mac_cb *mac_cb = hns_get_mac_cb(handle);
@@ -823,7 +852,10 @@ static struct hnae_ae_ops hns_dsaf_ops = {
 	.get_coalesce_range = hns_ae_get_coalesce_range,
 	.set_promisc_mode = hns_ae_set_promisc_mode,
 	.set_mac_addr = hns_ae_set_mac_address,
+	.add_uc_addr = hns_ae_add_uc_address,
+	.rm_uc_addr = hns_ae_rm_uc_address,
 	.set_mc_addr = hns_ae_set_multicast_one,
+	.clr_mc_addr = hns_ae_clr_multicast,
 	.set_mtu = hns_ae_set_mtu,
 	.update_stats = hns_ae_update_stats,
 	.set_tso_stats = hns_ae_set_tso_stats,
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
index 1e1eb92998fb..3382441fe7b5 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_gmac.c
@@ -37,8 +37,8 @@ static const struct mac_stats_string g_gmac_stats_string[] = {
 	{"gmac_rx_very_long_err", MAC_STATS_FIELD_OFF(rx_long_err)},
 	{"gmac_rx_runt_err", MAC_STATS_FIELD_OFF(rx_minto64)},
 	{"gmac_rx_short_err", MAC_STATS_FIELD_OFF(rx_under_min)},
-	{"gmac_rx_filt_pkt", MAC_STATS_FIELD_OFF(rx_filter_bytes)},
-	{"gmac_rx_octets_total_filt", MAC_STATS_FIELD_OFF(rx_filter_pkts)},
+	{"gmac_rx_filt_pkt", MAC_STATS_FIELD_OFF(rx_filter_pkts)},
+	{"gmac_rx_octets_total_filt", MAC_STATS_FIELD_OFF(rx_filter_bytes)},
 	{"gmac_rx_overrun_cnt", MAC_STATS_FIELD_OFF(rx_fifo_overrun_err)},
 	{"gmac_rx_length_err", MAC_STATS_FIELD_OFF(rx_len_err)},
 	{"gmac_rx_fail_comma", MAC_STATS_FIELD_OFF(rx_comma_err)},
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index ec8c738af726..3239d27143b9 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -263,6 +263,46 @@ int hns_mac_change_vf_addr(struct hns_mac_cb *mac_cb,
 	return 0;
 }
 
+int hns_mac_add_uc_addr(struct hns_mac_cb *mac_cb, u8 vf_id,
+			const unsigned char *addr)
+{
+	struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev;
+	struct dsaf_drv_mac_single_dest_entry mac_entry;
+	int ret;
+
+	if (HNS_DSAF_IS_DEBUG(dsaf_dev))
+		return -ENOSPC;
+
+	memset(&mac_entry, 0, sizeof(mac_entry));
+	memcpy(mac_entry.addr, addr, sizeof(mac_entry.addr));
+	mac_entry.in_port_num = mac_cb->mac_id;
+	ret = hns_mac_get_inner_port_num(mac_cb, vf_id, &mac_entry.port_num);
+	if (ret)
+		return ret;
+
+	return hns_dsaf_set_mac_uc_entry(dsaf_dev, &mac_entry);
+}
+
+int hns_mac_rm_uc_addr(struct hns_mac_cb *mac_cb, u8 vf_id,
+		       const unsigned char *addr)
+{
+	struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev;
+	struct dsaf_drv_mac_single_dest_entry mac_entry;
+	int ret;
+
+	if (HNS_DSAF_IS_DEBUG(dsaf_dev))
+		return -ENOSPC;
+
+	memset(&mac_entry, 0, sizeof(mac_entry));
+	memcpy(mac_entry.addr, addr, sizeof(mac_entry.addr));
+	mac_entry.in_port_num = mac_cb->mac_id;
+	ret = hns_mac_get_inner_port_num(mac_cb, vf_id, &mac_entry.port_num);
+	if (ret)
+		return ret;
+
+	return hns_dsaf_rm_mac_addr(dsaf_dev, &mac_entry);
+}
+
 int hns_mac_set_multi(struct hns_mac_cb *mac_cb,
 		      u32 port_num, char *addr, bool enable)
 {
@@ -330,13 +370,24 @@ int hns_mac_del_mac(struct hns_mac_cb *mac_cb, u32 vfn, char *mac)
 	return 0;
 }
 
+int hns_mac_clr_multicast(struct hns_mac_cb *mac_cb, int vfn)
+{
+	struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev;
+	u8 port_num;
+	int ret = hns_mac_get_inner_port_num(mac_cb, vfn, &port_num);
+
+	if (ret)
+		return ret;
+
+	return hns_dsaf_clr_mac_mc_port(dsaf_dev, mac_cb->mac_id, port_num);
+}
+
 static void hns_mac_param_get(struct mac_params *param,
 			      struct hns_mac_cb *mac_cb)
 {
 	param->vaddr = (void *)mac_cb->vaddr;
 	param->mac_mode = hns_get_enet_interface(mac_cb);
-	memcpy(param->addr, mac_cb->addr_entry_idx[0].addr,
-	       MAC_NUM_OCTETS_PER_ADDR);
+	ether_addr_copy(param->addr, mac_cb->addr_entry_idx[0].addr);
 	param->mac_id = mac_cb->mac_id;
 	param->dev = mac_cb->dev;
 }
@@ -353,8 +404,7 @@ static int hns_mac_port_config_bc_en(struct hns_mac_cb *mac_cb,
 {
 	int ret;
 	struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev;
-	u8 addr[MAC_NUM_OCTETS_PER_ADDR]
-		= {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+	u8 addr[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
 	struct dsaf_drv_mac_single_dest_entry mac_entry;
 
 	/* directy return ok in debug network mode */
@@ -389,8 +439,7 @@ int hns_mac_vm_config_bc_en(struct hns_mac_cb *mac_cb, u32 vmid, bool enable)
 	int ret;
 	struct dsaf_device *dsaf_dev = mac_cb->dsaf_dev;
 	u8 port_num;
-	u8 addr[MAC_NUM_OCTETS_PER_ADDR]
-		= {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
+	u8 addr[ETH_ALEN] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
 	struct mac_entry_idx *uc_mac_entry;
 	struct dsaf_drv_mac_single_dest_entry mac_entry;
 
@@ -453,8 +502,7 @@ int hns_mac_set_mtu(struct hns_mac_cb *mac_cb, u32 new_mtu)
 	if (mac_cb->mac_type == HNAE_PORT_DEBUG)
 		max_frm = MAC_MAX_MTU_DBG;
 
-	if ((new_mtu < MAC_MIN_MTU) || (new_frm > max_frm) ||
-	    (new_frm > HNS_RCB_RING_MAX_BD_PER_PKT * buf_size))
+	if (new_frm > HNS_RCB_RING_MAX_BD_PER_PKT * buf_size)
 		return -EINVAL;
 
 	if (!drv->config_max_frame_length)
@@ -869,6 +917,13 @@ static int  hns_mac_get_info(struct hns_mac_cb *mac_cb)
 		}
 	}
 
+	if (fwnode_property_read_u8_array(mac_cb->fw_port, "mc-mac-mask",
+					  mac_cb->mc_mask, ETH_ALEN)) {
+		dev_warn(mac_cb->dev,
+			 "no mc-mac-mask property, set to default value.\n");
+		eth_broadcast_addr(mac_cb->mc_mask);
+	}
+
 	return 0;
 }
 
@@ -1082,6 +1137,8 @@ void hns_mac_set_promisc(struct hns_mac_cb *mac_cb, u8 en)
 {
 	struct mac_driver *mac_ctrl_drv = hns_mac_get_drv(mac_cb);
 
+	hns_dsaf_set_promisc_tcam(mac_cb->dsaf_dev, mac_cb->mac_id, !!en);
+
 	if (mac_ctrl_drv->set_promiscuous)
 		mac_ctrl_drv->set_promiscuous(mac_ctrl_drv, en);
 }
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
index d3a1f72ece0e..2bb3d1e93c64 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.h
@@ -31,7 +31,7 @@ struct dsaf_device;
 #define MAC_MIN_MTU		68
 #define MAC_MAX_MTU_DBG		MAC_DEFAULT_MTU
 
-#define MAC_DEFAULT_PAUSE_TIME 0xff
+#define MAC_DEFAULT_PAUSE_TIME 0xffff
 
 #define MAC_GMAC_IDX 0
 #define MAC_XGMAC_IDX 1
@@ -56,9 +56,6 @@ struct dsaf_device;
 /*check mac addr multicast*/
 #define MAC_IS_MULTICAST(p)	((*((u8 *)((p) + 0)) & 0x01) ? (1) : (0))
 
-/**< Number of octets (8-bit bytes) in an ethernet address */
-#define MAC_NUM_OCTETS_PER_ADDR 6
-
 struct mac_priv {
 	void *mac;
 };
@@ -189,7 +186,7 @@ struct mac_statistics {
 
 /*mac para struct ,mac get param from nic or dsaf when initialize*/
 struct mac_params {
-	char addr[MAC_NUM_OCTETS_PER_ADDR];
+	char addr[ETH_ALEN];
 	void *vaddr; /*virtual address*/
 	struct device *dev;
 	u8 mac_id;
@@ -214,7 +211,7 @@ struct mac_info {
 };
 
 struct mac_entry_idx {
-	u8 addr[MAC_NUM_OCTETS_PER_ADDR];
+	u8 addr[ETH_ALEN];
 	u16 vlan_id:12;
 	u16 valid:1;
 	u16 qos:3;
@@ -317,6 +314,7 @@ struct hns_mac_cb {
 	u8 __iomem *serdes_vaddr;
 	struct regmap *serdes_ctrl;
 	struct regmap *cpld_ctrl;
+	char mc_mask[ETH_ALEN];
 	u32 cpld_ctrl_reg;
 	u32 port_rst_off;
 	u32 port_mode_off;
@@ -409,7 +407,7 @@ struct mac_driver {
 };
 
 struct mac_stats_string {
-	char desc[64];
+	char desc[ETH_GSTRING_LEN];
 	unsigned long offset;
 };
 
@@ -463,5 +461,10 @@ int hns_cpld_led_set_id(struct hns_mac_cb *mac_cb,
 void hns_mac_set_promisc(struct hns_mac_cb *mac_cb, u8 en);
 int hns_mac_get_inner_port_num(struct hns_mac_cb *mac_cb,
 			       u8 vmid, u8 *port_num);
+int hns_mac_add_uc_addr(struct hns_mac_cb *mac_cb, u8 vf_id,
+			const unsigned char *addr);
+int hns_mac_rm_uc_addr(struct hns_mac_cb *mac_cb, u8 vf_id,
+		       const unsigned char *addr);
+int hns_mac_clr_multicast(struct hns_mac_cb *mac_cb, int vfn);
 
 #endif /* _HNS_DSAF_MAC_H */
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
index 8ea3d95fa483..90dbda792614 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c
@@ -591,6 +591,16 @@ static void hns_dsaf_voq_bp_all_thrd_cfg(struct dsaf_device *dsaf_dev)
 	}
 }
 
+static void hns_dsaf_tbl_tcam_match_cfg(
+	struct dsaf_device *dsaf_dev,
+	struct dsaf_tbl_tcam_data *ptbl_tcam_data)
+{
+	dsaf_write_dev(dsaf_dev, DSAF_TBL_TCAM_MATCH_CFG_L_REG,
+		       ptbl_tcam_data->tbl_tcam_data_low);
+	dsaf_write_dev(dsaf_dev, DSAF_TBL_TCAM_MATCH_CFG_H_REG,
+		       ptbl_tcam_data->tbl_tcam_data_high);
+}
+
 /**
  * hns_dsaf_tbl_tcam_data_cfg - tbl
  * @dsaf_id: dsa fabric id
@@ -755,7 +765,7 @@ static void hns_dsaf_tbl_tcam_data_ucast_pul(
 
 void hns_dsaf_set_promisc_mode(struct dsaf_device *dsaf_dev, u32 en)
 {
-	if (!HNS_DSAF_IS_DEBUG(dsaf_dev))
+	if (AE_IS_VER1(dsaf_dev->dsaf_ver) && !HNS_DSAF_IS_DEBUG(dsaf_dev))
 		dsaf_set_dev_bit(dsaf_dev, DSAF_CFG_0_REG,
 				 DSAF_CFG_MIX_MODE_S, !!en);
 }
@@ -894,15 +904,16 @@ static void hns_dsaf_tcam_uc_cfg(
 }
 
 /**
- * hns_dsaf_tcam_mc_cfg - INT
- * @dsaf_id: dsa fabric id
- * @address,
- * @ptbl_tcam_data,
- * @ptbl_tcam_mcast,
+ * hns_dsaf_tcam_mc_cfg - cfg the tcam for mc
+ * @dsaf_dev: dsa fabric device struct pointer
+ * @address: tcam index
+ * @ptbl_tcam_data: tcam data struct pointer
+ * @ptbl_tcam_mcast: tcam mask struct pointer, it must be null for HNSv1
  */
 static void hns_dsaf_tcam_mc_cfg(
 	struct dsaf_device *dsaf_dev, u32 address,
 	struct dsaf_tbl_tcam_data *ptbl_tcam_data,
+	struct dsaf_tbl_tcam_data *ptbl_tcam_mask,
 	struct dsaf_tbl_tcam_mcast_cfg *ptbl_tcam_mcast)
 {
 	spin_lock_bh(&dsaf_dev->tcam_lock);
@@ -913,7 +924,11 @@ static void hns_dsaf_tcam_mc_cfg(
 	hns_dsaf_tbl_tcam_data_cfg(dsaf_dev, ptbl_tcam_data);
 	/*Write Tcam Mcast*/
 	hns_dsaf_tbl_tcam_mcast_cfg(dsaf_dev, ptbl_tcam_mcast);
-	/*Write Plus*/
+	/* Write Match Data */
+	if (ptbl_tcam_mask)
+		hns_dsaf_tbl_tcam_match_cfg(dsaf_dev, ptbl_tcam_mask);
+
+	/* Write Puls */
 	hns_dsaf_tbl_tcam_data_mcast_pul(dsaf_dev);
 
 	spin_unlock_bh(&dsaf_dev->tcam_lock);
@@ -944,6 +959,16 @@ static void hns_dsaf_tcam_mc_invld(struct dsaf_device *dsaf_dev, u32 address)
 	spin_unlock_bh(&dsaf_dev->tcam_lock);
 }
 
+void hns_dsaf_tcam_addr_get(struct dsaf_drv_tbl_tcam_key *mac_key, u8 *addr)
+{
+	addr[0] = mac_key->high.bits.mac_0;
+	addr[1] = mac_key->high.bits.mac_1;
+	addr[2] = mac_key->high.bits.mac_2;
+	addr[3] = mac_key->high.bits.mac_3;
+	addr[4] = mac_key->low.bits.mac_4;
+	addr[5] = mac_key->low.bits.mac_5;
+}
+
 /**
  * hns_dsaf_tcam_uc_get - INT
  * @dsaf_id: dsa fabric id
@@ -1369,6 +1394,12 @@ static int hns_dsaf_init(struct dsaf_device *dsaf_dev)
 	if (HNS_DSAF_IS_DEBUG(dsaf_dev))
 		return 0;
 
+	if (AE_IS_VER1(dsaf_dev->dsaf_ver))
+		dsaf_dev->tcam_max_num = DSAF_TCAM_SUM;
+	else
+		dsaf_dev->tcam_max_num =
+			DSAF_TCAM_SUM - DSAFV2_MAC_FUZZY_TCAM_NUM;
+
 	spin_lock_init(&dsaf_dev->tcam_lock);
 	ret = hns_dsaf_init_hw(dsaf_dev);
 	if (ret)
@@ -1424,7 +1455,7 @@ static u16 hns_dsaf_find_soft_mac_entry(
 	u32 i;
 
 	soft_mac_entry = priv->soft_mac_tbl;
-	for (i = 0; i < DSAF_TCAM_SUM; i++) {
+	for (i = 0; i < dsaf_dev->tcam_max_num; i++) {
 		/* invall tab entry */
 		if ((soft_mac_entry->index != DSAF_INVALID_ENTRY_IDX) &&
 		    (soft_mac_entry->tcam_key.high.val == mac_key->high.val) &&
@@ -1449,7 +1480,7 @@ static u16 hns_dsaf_find_empty_mac_entry(struct dsaf_device *dsaf_dev)
 	u32 i;
 
 	soft_mac_entry = priv->soft_mac_tbl;
-	for (i = 0; i < DSAF_TCAM_SUM; i++) {
+	for (i = 0; i < dsaf_dev->tcam_max_num; i++) {
 		/* inv all entry */
 		if (soft_mac_entry->index == DSAF_INVALID_ENTRY_IDX)
 			/* return find result --soft index */
@@ -1488,8 +1519,12 @@ static void hns_dsaf_set_mac_key(
 	mac_key->high.bits.mac_3 = addr[3];
 	mac_key->low.bits.mac_4 = addr[4];
 	mac_key->low.bits.mac_5 = addr[5];
-	mac_key->low.bits.vlan = vlan_id;
-	mac_key->low.bits.port = port;
+	dsaf_set_field(mac_key->low.bits.port_vlan, DSAF_TBL_TCAM_KEY_VLAN_M,
+		       DSAF_TBL_TCAM_KEY_VLAN_S, vlan_id);
+	dsaf_set_field(mac_key->low.bits.port_vlan, DSAF_TBL_TCAM_KEY_PORT_M,
+		       DSAF_TBL_TCAM_KEY_PORT_S, port);
+
+	mac_key->low.bits.port_vlan = le16_to_cpu(mac_key->low.bits.port_vlan);
 }
 
 /**
@@ -1507,6 +1542,7 @@ int hns_dsaf_set_mac_uc_entry(
 	struct dsaf_drv_priv *priv =
 	    (struct dsaf_drv_priv *)hns_dsaf_dev_priv(dsaf_dev);
 	struct dsaf_drv_soft_mac_tbl *soft_mac_entry = priv->soft_mac_tbl;
+	struct dsaf_tbl_tcam_data tcam_data;
 
 	/* mac addr check */
 	if (MAC_IS_ALL_ZEROS(mac_entry->addr) ||
@@ -1548,9 +1584,10 @@ int hns_dsaf_set_mac_uc_entry(
 	/* default config dvc to 0 */
 	mac_data.tbl_ucast_dvc = 0;
 	mac_data.tbl_ucast_out_port = mac_entry->port_num;
-	hns_dsaf_tcam_uc_cfg(
-		dsaf_dev, entry_index,
-		(struct dsaf_tbl_tcam_data *)(&mac_key), &mac_data);
+	tcam_data.tbl_tcam_data_high = cpu_to_le32(mac_key.high.val);
+	tcam_data.tbl_tcam_data_low = cpu_to_le32(mac_key.low.val);
+
+	hns_dsaf_tcam_uc_cfg(dsaf_dev, entry_index, &tcam_data, &mac_data);
 
 	/* config software entry */
 	soft_mac_entry += entry_index;
@@ -1561,6 +1598,55 @@ int hns_dsaf_set_mac_uc_entry(
 	return 0;
 }
 
+int hns_dsaf_rm_mac_addr(
+	struct dsaf_device *dsaf_dev,
+	struct dsaf_drv_mac_single_dest_entry *mac_entry)
+{
+	u16 entry_index = DSAF_INVALID_ENTRY_IDX;
+	struct dsaf_tbl_tcam_ucast_cfg mac_data;
+	struct dsaf_drv_tbl_tcam_key mac_key;
+
+	/* mac addr check */
+	if (!is_valid_ether_addr(mac_entry->addr)) {
+		dev_err(dsaf_dev->dev, "rm_uc_addr %s Mac %pM err!\n",
+			dsaf_dev->ae_dev.name, mac_entry->addr);
+		return -EINVAL;
+	}
+
+	/* config key */
+	hns_dsaf_set_mac_key(dsaf_dev, &mac_key, mac_entry->in_vlan_id,
+			     mac_entry->in_port_num, mac_entry->addr);
+
+	entry_index = hns_dsaf_find_soft_mac_entry(dsaf_dev, &mac_key);
+	if (entry_index == DSAF_INVALID_ENTRY_IDX) {
+		/* can not find the tcam entry, return 0 */
+		dev_info(dsaf_dev->dev,
+			 "rm_uc_addr no tcam, %s Mac key(%#x:%#x)\n",
+			 dsaf_dev->ae_dev.name,
+			 mac_key.high.val, mac_key.low.val);
+		return 0;
+	}
+
+	dev_dbg(dsaf_dev->dev,
+		"rm_uc_addr, %s Mac key(%#x:%#x) entry_index%d\n",
+		dsaf_dev->ae_dev.name, mac_key.high.val,
+		mac_key.low.val, entry_index);
+
+	hns_dsaf_tcam_uc_get(
+			dsaf_dev, entry_index,
+			(struct dsaf_tbl_tcam_data *)&mac_key,
+			&mac_data);
+
+	/* unicast entry not used locally should not clear */
+	if (mac_entry->port_num != mac_data.tbl_ucast_out_port)
+		return -EFAULT;
+
+	return hns_dsaf_del_mac_entry(dsaf_dev,
+				      mac_entry->in_vlan_id,
+				      mac_entry->in_port_num,
+				      mac_entry->addr);
+}
+
 /**
  * hns_dsaf_set_mac_mc_entry - set mac mc-entry
  * @dsaf_dev: dsa fabric device struct pointer
@@ -1577,6 +1663,7 @@ int hns_dsaf_set_mac_mc_entry(
 	    (struct dsaf_drv_priv *)hns_dsaf_dev_priv(dsaf_dev);
 	struct dsaf_drv_soft_mac_tbl *soft_mac_entry = priv->soft_mac_tbl;
 	struct dsaf_drv_tbl_tcam_key tmp_mac_key;
+	struct dsaf_tbl_tcam_data tcam_data;
 
 	/* mac addr check */
 	if (MAC_IS_ALL_ZEROS(mac_entry->addr)) {
@@ -1609,9 +1696,12 @@ int hns_dsaf_set_mac_mc_entry(
 		       0, sizeof(mac_data.tbl_mcast_port_msk));
 	} else {
 		/* config hardware entry */
-		hns_dsaf_tcam_mc_get(
-			dsaf_dev, entry_index,
-			(struct dsaf_tbl_tcam_data *)(&tmp_mac_key), &mac_data);
+		hns_dsaf_tcam_mc_get(dsaf_dev, entry_index, &tcam_data,
+				     &mac_data);
+
+		tmp_mac_key.high.val =
+			le32_to_cpu(tcam_data.tbl_tcam_data_high);
+		tmp_mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low);
 	}
 	mac_data.tbl_mcast_old_en = 0;
 	mac_data.tbl_mcast_item_vld = 1;
@@ -1623,9 +1713,11 @@ int hns_dsaf_set_mac_mc_entry(
 		dsaf_dev->ae_dev.name, mac_key.high.val,
 		mac_key.low.val, entry_index);
 
-	hns_dsaf_tcam_mc_cfg(
-		dsaf_dev, entry_index,
-		(struct dsaf_tbl_tcam_data *)(&mac_key), &mac_data);
+	tcam_data.tbl_tcam_data_high = cpu_to_le32(mac_key.high.val);
+	tcam_data.tbl_tcam_data_low = cpu_to_le32(mac_key.low.val);
+
+	hns_dsaf_tcam_mc_cfg(dsaf_dev, entry_index, &tcam_data, NULL,
+			     &mac_data);
 
 	/* config software entry */
 	soft_mac_entry += entry_index;
@@ -1636,6 +1728,16 @@ int hns_dsaf_set_mac_mc_entry(
 	return 0;
 }
 
+static void hns_dsaf_mc_mask_bit_clear(char *dst, const char *src)
+{
+	u16 *a = (u16 *)dst;
+	const u16 *b = (const u16 *)src;
+
+	a[0] &= b[0];
+	a[1] &= b[1];
+	a[2] &= b[2];
+}
+
 /**
  * hns_dsaf_add_mac_mc_port - add mac mc-port
  * @dsaf_dev: dsa fabric device struct pointer
@@ -1646,11 +1748,15 @@ int hns_dsaf_add_mac_mc_port(struct dsaf_device *dsaf_dev,
 {
 	u16 entry_index = DSAF_INVALID_ENTRY_IDX;
 	struct dsaf_drv_tbl_tcam_key mac_key;
+	struct dsaf_drv_tbl_tcam_key mask_key;
+	struct dsaf_tbl_tcam_data *pmask_key = NULL;
 	struct dsaf_tbl_tcam_mcast_cfg mac_data;
-	struct dsaf_drv_priv *priv =
-	    (struct dsaf_drv_priv *)hns_dsaf_dev_priv(dsaf_dev);
+	struct dsaf_drv_priv *priv = hns_dsaf_dev_priv(dsaf_dev);
 	struct dsaf_drv_soft_mac_tbl *soft_mac_entry = priv->soft_mac_tbl;
 	struct dsaf_drv_tbl_tcam_key tmp_mac_key;
+	struct dsaf_tbl_tcam_data tcam_data;
+	u8 mc_addr[ETH_ALEN];
+	u8 *mc_mask;
 	int mskid;
 
 	/*chechk mac addr */
@@ -1660,14 +1766,32 @@ int hns_dsaf_add_mac_mc_port(struct dsaf_device *dsaf_dev,
 		return -EINVAL;
 	}
 
+	ether_addr_copy(mc_addr, mac_entry->addr);
+	mc_mask = dsaf_dev->mac_cb[mac_entry->in_port_num]->mc_mask;
+	if (!AE_IS_VER1(dsaf_dev->dsaf_ver)) {
+		/* prepare for key data setting */
+		hns_dsaf_mc_mask_bit_clear(mc_addr, mc_mask);
+
+		/* config key mask */
+		hns_dsaf_set_mac_key(dsaf_dev, &mask_key,
+				     0x0,
+				     0xff,
+				     mc_mask);
+
+		mask_key.high.val = le32_to_cpu(mask_key.high.val);
+		mask_key.low.val = le32_to_cpu(mask_key.low.val);
+
+		pmask_key = (struct dsaf_tbl_tcam_data *)(&mask_key);
+	}
+
 	/*config key */
 	hns_dsaf_set_mac_key(
 		dsaf_dev, &mac_key, mac_entry->in_vlan_id,
-		mac_entry->in_port_num, mac_entry->addr);
+		mac_entry->in_port_num, mc_addr);
 
 	memset(&mac_data, 0, sizeof(struct dsaf_tbl_tcam_mcast_cfg));
 
-	/*check exist? */
+	/* check if the tcam is exist */
 	entry_index = hns_dsaf_find_soft_mac_entry(dsaf_dev, &mac_key);
 	if (entry_index == DSAF_INVALID_ENTRY_IDX) {
 		/*if hasnot , find a empty*/
@@ -1681,11 +1805,15 @@ int hns_dsaf_add_mac_mc_port(struct dsaf_device *dsaf_dev,
 			return -EINVAL;
 		}
 	} else {
-		/*if exist, add in */
-		hns_dsaf_tcam_mc_get(
-			dsaf_dev, entry_index,
-			(struct dsaf_tbl_tcam_data *)(&tmp_mac_key), &mac_data);
+		/* if exist, add in */
+		hns_dsaf_tcam_mc_get(dsaf_dev, entry_index, &tcam_data,
+				     &mac_data);
+
+		tmp_mac_key.high.val =
+			le32_to_cpu(tcam_data.tbl_tcam_data_high);
+		tmp_mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low);
 	}
+
 	/* config hardware entry */
 	if (mac_entry->port_num < DSAF_SERVICE_NW_NUM) {
 		mskid = mac_entry->port_num;
@@ -1708,9 +1836,12 @@ int hns_dsaf_add_mac_mc_port(struct dsaf_device *dsaf_dev,
 		dsaf_dev->ae_dev.name, mac_key.high.val,
 		mac_key.low.val, entry_index);
 
-	hns_dsaf_tcam_mc_cfg(
-		dsaf_dev, entry_index,
-		(struct dsaf_tbl_tcam_data *)(&mac_key), &mac_data);
+	tcam_data.tbl_tcam_data_high = cpu_to_le32(mac_key.high.val);
+	tcam_data.tbl_tcam_data_low = cpu_to_le32(mac_key.low.val);
+
+	/* config mc entry with mask */
+	hns_dsaf_tcam_mc_cfg(dsaf_dev, entry_index, &tcam_data,
+			     pmask_key, &mac_data);
 
 	/*config software entry */
 	soft_mac_entry += entry_index;
@@ -1782,15 +1913,18 @@ int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev,
 {
 	u16 entry_index = DSAF_INVALID_ENTRY_IDX;
 	struct dsaf_drv_tbl_tcam_key mac_key;
-	struct dsaf_drv_priv *priv =
-	    (struct dsaf_drv_priv *)hns_dsaf_dev_priv(dsaf_dev);
+	struct dsaf_drv_priv *priv = hns_dsaf_dev_priv(dsaf_dev);
 	struct dsaf_drv_soft_mac_tbl *soft_mac_entry = priv->soft_mac_tbl;
 	u16 vlan_id;
 	u8 in_port_num;
 	struct dsaf_tbl_tcam_mcast_cfg mac_data;
-	struct dsaf_drv_tbl_tcam_key tmp_mac_key;
+	struct dsaf_tbl_tcam_data tcam_data;
 	int mskid;
 	const u8 empty_msk[sizeof(mac_data.tbl_mcast_port_msk)] = {0};
+	struct dsaf_drv_tbl_tcam_key mask_key, tmp_mac_key;
+	struct dsaf_tbl_tcam_data *pmask_key = NULL;
+	u8 mc_addr[ETH_ALEN];
+	u8 *mc_mask;
 
 	if (!(void *)mac_entry) {
 		dev_err(dsaf_dev->dev,
@@ -1798,10 +1932,6 @@ int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev,
 		return -EINVAL;
 	}
 
-	/*get key info*/
-	vlan_id = mac_entry->in_vlan_id;
-	in_port_num = mac_entry->in_port_num;
-
 	/*check mac addr */
 	if (MAC_IS_ALL_ZEROS(mac_entry->addr)) {
 		dev_err(dsaf_dev->dev, "del_port failed, addr %pM!\n",
@@ -1809,11 +1939,31 @@ int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev,
 		return -EINVAL;
 	}
 
-	/*config key */
-	hns_dsaf_set_mac_key(dsaf_dev, &mac_key, vlan_id, in_port_num,
-			     mac_entry->addr);
+	/* always mask vlan_id field */
+	ether_addr_copy(mc_addr, mac_entry->addr);
+	mc_mask = dsaf_dev->mac_cb[mac_entry->in_port_num]->mc_mask;
+
+	if (!AE_IS_VER1(dsaf_dev->dsaf_ver)) {
+		/* prepare for key data setting */
+		hns_dsaf_mc_mask_bit_clear(mc_addr, mc_mask);
+
+		/* config key mask */
+		hns_dsaf_set_mac_key(dsaf_dev, &mask_key, 0x00, 0xff, mc_addr);
+
+		mask_key.high.val = le32_to_cpu(mask_key.high.val);
+		mask_key.low.val = le32_to_cpu(mask_key.low.val);
 
-	/*check is exist? */
+		pmask_key = (struct dsaf_tbl_tcam_data *)(&mask_key);
+	}
+
+	/* get key info */
+	vlan_id = mac_entry->in_vlan_id;
+	in_port_num = mac_entry->in_port_num;
+
+	/* config key */
+	hns_dsaf_set_mac_key(dsaf_dev, &mac_key, vlan_id, in_port_num, mc_addr);
+
+	/* check if the tcam entry is exist */
 	entry_index = hns_dsaf_find_soft_mac_entry(dsaf_dev, &mac_key);
 	if (entry_index == DSAF_INVALID_ENTRY_IDX) {
 		/*find none */
@@ -1829,10 +1979,11 @@ int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev,
 		dsaf_dev->ae_dev.name, mac_key.high.val,
 		mac_key.low.val, entry_index);
 
-	/*read entry*/
-	hns_dsaf_tcam_mc_get(
-		dsaf_dev, entry_index,
-		(struct dsaf_tbl_tcam_data *)(&tmp_mac_key), &mac_data);
+	/* read entry */
+	hns_dsaf_tcam_mc_get(dsaf_dev, entry_index, &tcam_data, &mac_data);
+
+	tmp_mac_key.high.val = le32_to_cpu(tcam_data.tbl_tcam_data_high);
+	tmp_mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low);
 
 	/*del the port*/
 	if (mac_entry->port_num < DSAF_SERVICE_NW_NUM) {
@@ -1857,15 +2008,87 @@ int hns_dsaf_del_mac_mc_port(struct dsaf_device *dsaf_dev,
 		/* del soft entry */
 		soft_mac_entry += entry_index;
 		soft_mac_entry->index = DSAF_INVALID_ENTRY_IDX;
-	} else { /* not zer, just del port, updata*/
-		hns_dsaf_tcam_mc_cfg(
-			dsaf_dev, entry_index,
-			(struct dsaf_tbl_tcam_data *)(&mac_key), &mac_data);
+	} else { /* not zero, just del port, update */
+		tcam_data.tbl_tcam_data_high = cpu_to_le32(mac_key.high.val);
+		tcam_data.tbl_tcam_data_low = cpu_to_le32(mac_key.low.val);
+
+		hns_dsaf_tcam_mc_cfg(dsaf_dev, entry_index,
+				     &tcam_data,
+				     pmask_key, &mac_data);
 	}
 
 	return 0;
 }
 
+int hns_dsaf_clr_mac_mc_port(struct dsaf_device *dsaf_dev, u8 mac_id,
+			     u8 port_num)
+{
+	struct dsaf_drv_priv *priv = hns_dsaf_dev_priv(dsaf_dev);
+	struct dsaf_drv_soft_mac_tbl *soft_mac_entry;
+	struct dsaf_tbl_tcam_mcast_cfg mac_data;
+	int ret = 0, i;
+
+	if (HNS_DSAF_IS_DEBUG(dsaf_dev))
+		return 0;
+
+	for (i = 0; i < DSAF_TCAM_SUM - DSAFV2_MAC_FUZZY_TCAM_NUM; i++) {
+		u8 addr[ETH_ALEN];
+		u8 port;
+
+		soft_mac_entry = priv->soft_mac_tbl + i;
+
+		hns_dsaf_tcam_addr_get(&soft_mac_entry->tcam_key, addr);
+		port = dsaf_get_field(
+				soft_mac_entry->tcam_key.low.bits.port_vlan,
+				DSAF_TBL_TCAM_KEY_PORT_M,
+				DSAF_TBL_TCAM_KEY_PORT_S);
+		/* check valid tcam mc entry */
+		if (soft_mac_entry->index != DSAF_INVALID_ENTRY_IDX &&
+		    port == mac_id &&
+		    is_multicast_ether_addr(addr) &&
+		    !is_broadcast_ether_addr(addr)) {
+			const u32 empty_msk[DSAF_PORT_MSK_NUM] = {0};
+			struct dsaf_drv_mac_single_dest_entry mac_entry;
+
+			/* disable receiving of this multicast address for
+			 * the VF.
+			 */
+			ether_addr_copy(mac_entry.addr, addr);
+			mac_entry.in_vlan_id = dsaf_get_field(
+				soft_mac_entry->tcam_key.low.bits.port_vlan,
+				DSAF_TBL_TCAM_KEY_VLAN_M,
+				DSAF_TBL_TCAM_KEY_VLAN_S);
+			mac_entry.in_port_num = mac_id;
+			mac_entry.port_num = port_num;
+			if (hns_dsaf_del_mac_mc_port(dsaf_dev, &mac_entry)) {
+				ret = -EINVAL;
+				continue;
+			}
+
+			/* disable receiving of this multicast address for
+			 * the mac port if all VF are disable
+			 */
+			hns_dsaf_tcam_mc_get(dsaf_dev, i,
+					     (struct dsaf_tbl_tcam_data *)
+					     (&soft_mac_entry->tcam_key),
+					     &mac_data);
+			dsaf_set_bit(mac_data.tbl_mcast_port_msk[mac_id / 32],
+				     mac_id % 32, 0);
+			if (!memcmp(mac_data.tbl_mcast_port_msk, empty_msk,
+				    sizeof(u32) * DSAF_PORT_MSK_NUM)) {
+				mac_entry.port_num = mac_id;
+				if (hns_dsaf_del_mac_mc_port(dsaf_dev,
+							     &mac_entry)) {
+					ret = -EINVAL;
+					continue;
+				}
+			}
+		}
+	}
+
+	return ret;
+}
+
 /**
  * hns_dsaf_get_mac_uc_entry - get mac uc entry
  * @dsaf_dev: dsa fabric device struct pointer
@@ -1878,6 +2101,7 @@ int hns_dsaf_get_mac_uc_entry(struct dsaf_device *dsaf_dev,
 	struct dsaf_drv_tbl_tcam_key mac_key;
 
 	struct dsaf_tbl_tcam_ucast_cfg mac_data;
+	struct dsaf_tbl_tcam_data tcam_data;
 
 	/* check macaddr */
 	if (MAC_IS_ALL_ZEROS(mac_entry->addr) ||
@@ -1906,9 +2130,12 @@ int hns_dsaf_get_mac_uc_entry(struct dsaf_device *dsaf_dev,
 		dsaf_dev->ae_dev.name, mac_key.high.val,
 		mac_key.low.val, entry_index);
 
-	/*read entry*/
-	hns_dsaf_tcam_uc_get(dsaf_dev, entry_index,
-			     (struct dsaf_tbl_tcam_data *)&mac_key, &mac_data);
+	/* read entry */
+	hns_dsaf_tcam_uc_get(dsaf_dev, entry_index, &tcam_data, &mac_data);
+
+	mac_key.high.val = le32_to_cpu(tcam_data.tbl_tcam_data_high);
+	mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low);
+
 	mac_entry->port_num = mac_data.tbl_ucast_out_port;
 
 	return 0;
@@ -1926,6 +2153,7 @@ int hns_dsaf_get_mac_mc_entry(struct dsaf_device *dsaf_dev,
 	struct dsaf_drv_tbl_tcam_key mac_key;
 
 	struct dsaf_tbl_tcam_mcast_cfg mac_data;
+	struct dsaf_tbl_tcam_data tcam_data;
 
 	/*check mac addr */
 	if (MAC_IS_ALL_ZEROS(mac_entry->addr) ||
@@ -1955,8 +2183,10 @@ int hns_dsaf_get_mac_mc_entry(struct dsaf_device *dsaf_dev,
 		mac_key.low.val, entry_index);
 
 	/*read entry */
-	hns_dsaf_tcam_mc_get(dsaf_dev, entry_index,
-			     (struct dsaf_tbl_tcam_data *)&mac_key, &mac_data);
+	hns_dsaf_tcam_mc_get(dsaf_dev, entry_index, &tcam_data, &mac_data);
+
+	mac_key.high.val = le32_to_cpu(tcam_data.tbl_tcam_data_high);
+	mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low);
 
 	mac_entry->port_mask[0] = mac_data.tbl_mcast_port_msk[0] & 0x3F;
 	return 0;
@@ -1976,9 +2206,10 @@ int hns_dsaf_get_mac_entry_by_index(
 
 	struct dsaf_tbl_tcam_mcast_cfg mac_data;
 	struct dsaf_tbl_tcam_ucast_cfg mac_uc_data;
-	char mac_addr[MAC_NUM_OCTETS_PER_ADDR] = {0};
+	struct dsaf_tbl_tcam_data tcam_data;
+	char mac_addr[ETH_ALEN] = {0};
 
-	if (entry_index >= DSAF_TCAM_SUM) {
+	if (entry_index >= dsaf_dev->tcam_max_num) {
 		/* find none, del error */
 		dev_err(dsaf_dev->dev, "get_uc_entry failed, %s\n",
 			dsaf_dev->ae_dev.name);
@@ -1986,8 +2217,10 @@ int hns_dsaf_get_mac_entry_by_index(
 	}
 
 	/* mc entry, do read opt */
-	hns_dsaf_tcam_mc_get(dsaf_dev, entry_index,
-			     (struct dsaf_tbl_tcam_data *)&mac_key, &mac_data);
+	hns_dsaf_tcam_mc_get(dsaf_dev, entry_index, &tcam_data, &mac_data);
+
+	mac_key.high.val = le32_to_cpu(tcam_data.tbl_tcam_data_high);
+	mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low);
 
 	mac_entry->port_mask[0] = mac_data.tbl_mcast_port_msk[0] & 0x3F;
 
@@ -2004,9 +2237,12 @@ int hns_dsaf_get_mac_entry_by_index(
 		/**mc donot do*/
 	} else {
 		/*is not mc, just uc... */
-		hns_dsaf_tcam_uc_get(dsaf_dev, entry_index,
-				     (struct dsaf_tbl_tcam_data *)&mac_key,
+		hns_dsaf_tcam_uc_get(dsaf_dev, entry_index, &tcam_data,
 				     &mac_uc_data);
+
+		mac_key.high.val = le32_to_cpu(tcam_data.tbl_tcam_data_high);
+		mac_key.low.val = le32_to_cpu(tcam_data.tbl_tcam_data_low);
+
 		mac_entry->port_mask[0] = (1 << mac_uc_data.tbl_ucast_out_port);
 	}
 
@@ -2670,6 +2906,59 @@ int hns_dsaf_get_regs_count(void)
 	return DSAF_DUMP_REGS_NUM;
 }
 
+/* Reserve the last TCAM entry for promisc support */
+#define dsaf_promisc_tcam_entry(port) \
+	(DSAF_TCAM_SUM - DSAFV2_MAC_FUZZY_TCAM_NUM + (port))
+void hns_dsaf_set_promisc_tcam(struct dsaf_device *dsaf_dev,
+			       u32 port, bool enable)
+{
+	struct dsaf_drv_priv *priv = hns_dsaf_dev_priv(dsaf_dev);
+	struct dsaf_drv_soft_mac_tbl *soft_mac_entry = priv->soft_mac_tbl;
+	u16 entry_index;
+	struct dsaf_drv_tbl_tcam_key tbl_tcam_data, tbl_tcam_mask;
+	struct dsaf_tbl_tcam_mcast_cfg mac_data = {0};
+
+	if ((AE_IS_VER1(dsaf_dev->dsaf_ver)) || HNS_DSAF_IS_DEBUG(dsaf_dev))
+		return;
+
+	/* find the tcam entry index for promisc */
+	entry_index = dsaf_promisc_tcam_entry(port);
+
+	/* config key mask */
+	if (enable) {
+		memset(&tbl_tcam_data, 0, sizeof(tbl_tcam_data));
+		memset(&tbl_tcam_mask, 0, sizeof(tbl_tcam_mask));
+		dsaf_set_field(tbl_tcam_data.low.bits.port_vlan,
+			       DSAF_TBL_TCAM_KEY_PORT_M,
+			       DSAF_TBL_TCAM_KEY_PORT_S, port);
+		dsaf_set_field(tbl_tcam_mask.low.bits.port_vlan,
+			       DSAF_TBL_TCAM_KEY_PORT_M,
+			       DSAF_TBL_TCAM_KEY_PORT_S, 0xf);
+
+		/* SUB_QID */
+		dsaf_set_bit(mac_data.tbl_mcast_port_msk[0],
+			     DSAF_SERVICE_NW_NUM, true);
+		mac_data.tbl_mcast_item_vld = true;	/* item_vld bit */
+	} else {
+		mac_data.tbl_mcast_item_vld = false;	/* item_vld bit */
+	}
+
+	dev_dbg(dsaf_dev->dev,
+		"set_promisc_entry, %s Mac key(%#x:%#x) entry_index%d\n",
+		dsaf_dev->ae_dev.name, tbl_tcam_data.high.val,
+		tbl_tcam_data.low.val, entry_index);
+
+	/* config promisc entry with mask */
+	hns_dsaf_tcam_mc_cfg(dsaf_dev, entry_index,
+			     (struct dsaf_tbl_tcam_data *)&tbl_tcam_data,
+			     (struct dsaf_tbl_tcam_data *)&tbl_tcam_mask,
+			     &mac_data);
+
+	/* config software entry */
+	soft_mac_entry += entry_index;
+	soft_mac_entry->index = enable ? entry_index : DSAF_INVALID_ENTRY_IDX;
+}
+
 /**
  * dsaf_probe - probo dsaf dev
  * @pdev: dasf platform device
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
index c494fc52be74..cef6bf46ae93 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h
@@ -35,8 +35,6 @@ struct hns_mac_cb;
 
 #define DSAF_CFG_READ_CNT   30
 
-#define MAC_NUM_OCTETS_PER_ADDR 6
-
 #define DSAF_DUMP_REGS_NUM 504
 #define DSAF_STATIC_NUM 28
 #define DSAF_V2_STATIC_NUM	44
@@ -165,7 +163,7 @@ enum dsaf_mode {
 /*mac entry, mc or uc entry*/
 struct dsaf_drv_mac_single_dest_entry {
 	/* mac addr, match the entry*/
-	u8 addr[MAC_NUM_OCTETS_PER_ADDR];
+	u8 addr[ETH_ALEN];
 	u16 in_vlan_id; /* value of VlanId */
 
 	/* the vld input port num, dsaf-mode fix 0, */
@@ -179,7 +177,7 @@ struct dsaf_drv_mac_single_dest_entry {
 /*only mc entry*/
 struct dsaf_drv_mac_multi_dest_entry {
 	/* mac addr, match the entry*/
-	u8 addr[MAC_NUM_OCTETS_PER_ADDR];
+	u8 addr[ETH_ALEN];
 	u16 in_vlan_id;
 	/* this mac addr output port,*/
 	/*	bit0-bit5 means Port0-Port5(1bit is vld)**/
@@ -308,8 +306,6 @@ struct dsaf_misc_op {
 	/* reset series function, it will be reset if the dereset is 0 */
 	void (*dsaf_reset)(struct dsaf_device *dsaf_dev, bool dereset);
 	void (*xge_srst)(struct dsaf_device *dsaf_dev, u32 port, bool dereset);
-	void (*xge_core_srst)(struct dsaf_device *dsaf_dev, u32 port,
-			      bool dereset);
 	void (*ge_srst)(struct dsaf_device *dsaf_dev, u32 port, bool dereset);
 	void (*ppe_srst)(struct dsaf_device *dsaf_dev, u32 port, bool dereset);
 	void (*ppe_comm_srst)(struct dsaf_device *dsaf_dev, bool dereset);
@@ -343,6 +339,7 @@ struct dsaf_device {
 	enum hal_dsaf_mode dsaf_en;
 	enum hal_dsaf_tc_mode dsaf_tc_mode;
 	u32 dsaf_ver;
+	u16 tcam_max_num;	/* max TCAM entry for user except promisc */
 
 	struct ppe_common_cb *ppe_common[DSAF_COMM_DEV_NUM];
 	struct rcb_common_cb *rcb_common[DSAF_COMM_DEV_NUM];
@@ -360,6 +357,11 @@ static inline void *hns_dsaf_dev_priv(const struct dsaf_device *dsaf_dev)
 	return (void *)((u8 *)dsaf_dev + sizeof(*dsaf_dev));
 }
 
+#define DSAF_TBL_TCAM_KEY_PORT_S 0
+#define DSAF_TBL_TCAM_KEY_PORT_M (((1ULL << 4) - 1) << 0)
+#define DSAF_TBL_TCAM_KEY_VLAN_S 4
+#define DSAF_TBL_TCAM_KEY_VLAN_M (((1ULL << 12) - 1) << 4)
+
 struct dsaf_drv_tbl_tcam_key {
 	union {
 		struct {
@@ -373,11 +375,9 @@ struct dsaf_drv_tbl_tcam_key {
 	} high;
 	union {
 		struct {
-			u32 port:4; /* port id, */
-			/* dsaf-mode fixed 0, non-dsaf-mode port id*/
-			u32 vlan:12; /* vlan id */
-			u32 mac_5:8;
-			u32 mac_4:8;
+			u16 port_vlan;
+			u8 mac_5;
+			u8 mac_4;
 		} bits;
 
 		u32 val;
@@ -461,10 +461,19 @@ void hns_dsaf_get_strings(int stringset, u8 *data, int port,
 void hns_dsaf_get_regs(struct dsaf_device *ddev, u32 port, void *data);
 int hns_dsaf_get_regs_count(void);
 void hns_dsaf_set_promisc_mode(struct dsaf_device *dsaf_dev, u32 en);
+void hns_dsaf_set_promisc_tcam(struct dsaf_device *dsaf_dev,
+			       u32 port, bool enable);
 
 void hns_dsaf_get_rx_mac_pause_en(struct dsaf_device *dsaf_dev, int mac_id,
 				  u32 *en);
 int hns_dsaf_set_rx_mac_pause_en(struct dsaf_device *dsaf_dev, int mac_id,
 				 u32 en);
+int hns_dsaf_rm_mac_addr(
+	struct dsaf_device *dsaf_dev,
+	struct dsaf_drv_mac_single_dest_entry *mac_entry);
+
+int hns_dsaf_clr_mac_mc_port(struct dsaf_device *dsaf_dev,
+			     u8 mac_id, u8 port_num);
+
 
 #endif /* __HNS_DSAF_MAIN_H__ */
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
index 67accce1d33d..a2c22d084ce9 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c
@@ -23,7 +23,6 @@ enum _dsm_op_index {
 enum _dsm_rst_type {
 	HNS_DSAF_RESET_FUNC     = 0x1,
 	HNS_PPE_RESET_FUNC      = 0x2,
-	HNS_XGE_CORE_RESET_FUNC = 0x3,
 	HNS_XGE_RESET_FUNC      = 0x4,
 	HNS_GE_RESET_FUNC       = 0x5,
 	HNS_DSAF_CHN_RESET_FUNC = 0x6,
@@ -213,26 +212,6 @@ static void hns_dsaf_xge_srst_by_port_acpi(struct dsaf_device *dsaf_dev,
 				   HNS_XGE_RESET_FUNC, port, dereset);
 }
 
-static void hns_dsaf_xge_core_srst_by_port(struct dsaf_device *dsaf_dev,
-					   u32 port, bool dereset)
-{
-	u32 reg_val = 0;
-	u32 reg_addr;
-
-	if (port >= DSAF_XGE_NUM)
-		return;
-
-	reg_val |= XGMAC_TRX_CORE_SRST_M
-		<< dsaf_dev->mac_cb[port]->port_rst_off;
-
-	if (!dereset)
-		reg_addr = DSAF_SUB_SC_XGE_RESET_REQ_REG;
-	else
-		reg_addr = DSAF_SUB_SC_XGE_RESET_DREQ_REG;
-
-	dsaf_write_sub(dsaf_dev, reg_addr, reg_val);
-}
-
 /**
  * hns_dsaf_srst_chns - reset dsaf channels
  * @dsaf_dev: dsaf device struct pointer
@@ -293,14 +272,6 @@ void hns_dsaf_roce_srst_acpi(struct dsaf_device *dsaf_dev, bool dereset)
 				   HNS_ROCE_RESET_FUNC, 0, dereset);
 }
 
-static void
-hns_dsaf_xge_core_srst_by_port_acpi(struct dsaf_device *dsaf_dev,
-				    u32 port, bool dereset)
-{
-	hns_dsaf_acpi_srst_by_port(dsaf_dev, HNS_OP_RESET_FUNC,
-				   HNS_XGE_CORE_RESET_FUNC, port, dereset);
-}
-
 static void hns_dsaf_ge_srst_by_port(struct dsaf_device *dsaf_dev, u32 port,
 				     bool dereset)
 {
@@ -597,7 +568,6 @@ struct dsaf_misc_op *hns_misc_op_get(struct dsaf_device *dsaf_dev)
 
 		misc_op->dsaf_reset = hns_dsaf_rst;
 		misc_op->xge_srst = hns_dsaf_xge_srst_by_port;
-		misc_op->xge_core_srst = hns_dsaf_xge_core_srst_by_port;
 		misc_op->ge_srst = hns_dsaf_ge_srst_by_port;
 		misc_op->ppe_srst = hns_ppe_srst_by_port;
 		misc_op->ppe_comm_srst = hns_ppe_com_srst;
@@ -615,7 +585,6 @@ struct dsaf_misc_op *hns_misc_op_get(struct dsaf_device *dsaf_dev)
 
 		misc_op->dsaf_reset = hns_dsaf_rst_acpi;
 		misc_op->xge_srst = hns_dsaf_xge_srst_by_port_acpi;
-		misc_op->xge_core_srst = hns_dsaf_xge_core_srst_by_port_acpi;
 		misc_op->ge_srst = hns_dsaf_ge_srst_by_port_acpi;
 		misc_op->ppe_srst = hns_ppe_srst_by_port_acpi;
 		misc_op->ppe_comm_srst = hns_ppe_com_srst;
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
index 878950a42e6c..87226685f742 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h
@@ -41,6 +41,9 @@
 #define DSAF_SW_PORT_NUM	8
 #define DSAF_TOTAL_QUEUE_NUM	129
 
+/* reserved a tcam entry for each port to support promisc by fuzzy match */
+#define DSAFV2_MAC_FUZZY_TCAM_NUM    DSAF_MAX_PORT_NUM
+
 #define DSAF_TCAM_SUM		512
 #define DSAF_LINE_SUM		(2048 * 14)
 
@@ -297,6 +300,8 @@
 #define DSAF_TBL_LKUP_NUM_I_0_REG		0x50C0
 #define DSAF_TBL_LKUP_NUM_O_0_REG		0x50E0
 #define DSAF_TBL_UCAST_BCAST_MIS_INFO_0_0_REG	0x510C
+#define DSAF_TBL_TCAM_MATCH_CFG_H_REG		0x5130
+#define DSAF_TBL_TCAM_MATCH_CFG_L_REG		0x5134
 
 #define DSAF_INODE_FIFO_WL_0_REG		0x6000
 #define DSAF_ONODE_FIFO_WL_0_REG		0x6020
@@ -309,7 +314,6 @@
 #define PPE_COM_INTEN_REG			0x110
 #define PPE_COM_RINT_REG			0x114
 #define PPE_COM_INTSTS_REG			0x118
-#define PPE_COM_COMMON_CNT_CLR_CE_REG		0x1120
 #define PPE_COM_HIS_RX_PKT_QID_DROP_CNT_REG	0x300
 #define PPE_COM_HIS_RX_PKT_QID_OK_CNT_REG	0x600
 #define PPE_COM_HIS_TX_PKT_QID_ERR_CNT_REG	0x900
@@ -698,8 +702,6 @@
 #define XGMAC_RX_SYMBOLERRPKTS			0x0210
 #define XGMAC_RX_FCSERRPKTS			0x0218
 
-#define XGMAC_TRX_CORE_SRST_M			0x2080
-
 #define DSAF_SRAM_INIT_OVER_M 0xff
 #define DSAFV2_SRAM_INIT_OVER_M 0x3ff
 #define DSAF_SRAM_INIT_OVER_S 0
@@ -978,6 +980,11 @@
 #define XGMAC_ENABLE_TX_B		0
 #define XGMAC_ENABLE_RX_B		1
 
+#define XGMAC_UNIDIR_EN_B		0
+#define XGMAC_RF_TX_EN_B		1
+#define XGMAC_LF_RF_INSERT_S		2
+#define XGMAC_LF_RF_INSERT_M		(0x3 << XGMAC_LF_RF_INSERT_S)
+
 #define XGMAC_CTL_TX_FCS_B		0
 #define XGMAC_CTL_TX_PAD_B		1
 #define XGMAC_CTL_TX_PREAMBLE_TRANS_B	3
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
index 8f4f0e8da984..aae830a93050 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.c
@@ -108,6 +108,31 @@ static void hns_xgmac_rx_enable(struct mac_driver *drv, u32 value)
 }
 
 /**
+ * hns_xgmac_tx_lf_rf_insert - insert lf rf control about xgmac
+ * @mac_drv: mac driver
+ * @mode: inserf rf or lf
+ */
+static void hns_xgmac_lf_rf_insert(struct mac_driver *mac_drv, u32 mode)
+{
+	dsaf_set_dev_field(mac_drv, XGMAC_MAC_TX_LF_RF_CONTROL_REG,
+			   XGMAC_LF_RF_INSERT_M, XGMAC_LF_RF_INSERT_S, mode);
+}
+
+/**
+ * hns_xgmac__lf_rf_control_init - initial the lf rf control register
+ * @mac_drv: mac driver
+ */
+static void hns_xgmac_lf_rf_control_init(struct mac_driver *mac_drv)
+{
+	u32 val = 0;
+
+	dsaf_set_bit(val, XGMAC_UNIDIR_EN_B, 0);
+	dsaf_set_bit(val, XGMAC_RF_TX_EN_B, 1);
+	dsaf_set_field(val, XGMAC_LF_RF_INSERT_M, XGMAC_LF_RF_INSERT_S, 0);
+	dsaf_write_reg(mac_drv, XGMAC_MAC_TX_LF_RF_CONTROL_REG, val);
+}
+
+/**
  *hns_xgmac_enable - enable xgmac port
  *@drv: mac driver
  *@mode: mode of mac port
@@ -115,12 +140,8 @@ static void hns_xgmac_rx_enable(struct mac_driver *drv, u32 value)
 static void hns_xgmac_enable(void *mac_drv, enum mac_commom_mode mode)
 {
 	struct mac_driver *drv = (struct mac_driver *)mac_drv;
-	struct dsaf_device *dsaf_dev
-		= (struct dsaf_device *)dev_get_drvdata(drv->dev);
-	u32 port = drv->mac_id;
 
-	dsaf_dev->misc_op->xge_core_srst(dsaf_dev, port, 1);
-	mdelay(10);
+	hns_xgmac_lf_rf_insert(drv, HNS_XGMAC_NO_LF_RF_INSERT);
 
 	/*enable XGE rX/tX */
 	if (mode == MAC_COMM_MODE_TX) {
@@ -143,9 +164,6 @@ static void hns_xgmac_enable(void *mac_drv, enum mac_commom_mode mode)
 static void hns_xgmac_disable(void *mac_drv, enum mac_commom_mode mode)
 {
 	struct mac_driver *drv = (struct mac_driver *)mac_drv;
-	struct dsaf_device *dsaf_dev
-		= (struct dsaf_device *)dev_get_drvdata(drv->dev);
-	u32 port = drv->mac_id;
 
 	if (mode == MAC_COMM_MODE_TX) {
 		hns_xgmac_tx_enable(drv, 0);
@@ -155,9 +173,7 @@ static void hns_xgmac_disable(void *mac_drv, enum mac_commom_mode mode)
 		hns_xgmac_tx_enable(drv, 0);
 		hns_xgmac_rx_enable(drv, 0);
 	}
-
-	mdelay(10);
-	dsaf_dev->misc_op->xge_core_srst(dsaf_dev, port, 0);
+	hns_xgmac_lf_rf_insert(drv, HNS_XGMAC_LF_INSERT);
 }
 
 /**
@@ -203,6 +219,7 @@ static void hns_xgmac_init(void *mac_drv)
 	dsaf_dev->misc_op->xge_srst(dsaf_dev, port, 1);
 
 	mdelay(100);
+	hns_xgmac_lf_rf_control_init(drv);
 	hns_xgmac_exc_irq_en(drv, 0);
 
 	hns_xgmac_pma_fec_enable(drv, 0x0, 0x0);
@@ -788,7 +805,7 @@ static int hns_xgmac_get_sset_count(int stringset)
  */
 static int hns_xgmac_get_regs_count(void)
 {
-	return ETH_XGMAC_DUMP_NUM;
+	return HNS_XGMAC_DUMP_NUM;
 }
 
 void *hns_xgmac_config(struct hns_mac_cb *mac_cb, struct mac_params *mac_param)
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.h
index 139f7297c7b4..da6c5343d3e1 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.h
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_xgmac.h
@@ -10,6 +10,7 @@
 #ifndef _HNS_XGMAC_H
 #define _HNS_XGMAC_H
 
-#define ETH_XGMAC_DUMP_NUM		(214)
-
+#define HNS_XGMAC_DUMP_NUM		214
+#define HNS_XGMAC_NO_LF_RF_INSERT	0x0
+#define HNS_XGMAC_LF_INSERT		0x2
 #endif
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
index dff7b60345d8..672b64606321 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c
@@ -22,6 +22,7 @@
 
 #include "hnae.h"
 #include "hns_enet.h"
+#include "hns_dsaf_mac.h"
 
 #define NIC_MAX_Q_PER_VF 16
 #define HNS_NIC_TX_TIMEOUT (5 * HZ)
@@ -565,6 +566,71 @@ static void get_rx_desc_bnum(u32 bnum_flag, int *out_bnum)
 				   HNS_RXD_BUFNUM_M, HNS_RXD_BUFNUM_S);
 }
 
+static void hns_nic_rx_checksum(struct hns_nic_ring_data *ring_data,
+				struct sk_buff *skb, u32 flag)
+{
+	struct net_device *netdev = ring_data->napi.dev;
+	u32 l3id;
+	u32 l4id;
+
+	/* check if RX checksum offload is enabled */
+	if (unlikely(!(netdev->features & NETIF_F_RXCSUM)))
+		return;
+
+	/* In hardware, we only support checksum for the following protocols:
+	 * 1) IPv4,
+	 * 2) TCP(over IPv4 or IPv6),
+	 * 3) UDP(over IPv4 or IPv6),
+	 * 4) SCTP(over IPv4 or IPv6)
+	 * but we support many L3(IPv4, IPv6, MPLS, PPPoE etc) and L4(TCP,
+	 * UDP, GRE, SCTP, IGMP, ICMP etc.) protocols.
+	 *
+	 * Hardware limitation:
+	 * Our present hardware RX Descriptor lacks L3/L4 checksum "Status &
+	 * Error" bit (which usually can be used to indicate whether checksum
+	 * was calculated by the hardware and if there was any error encountered
+	 * during checksum calculation).
+	 *
+	 * Software workaround:
+	 * We do get info within the RX descriptor about the kind of L3/L4
+	 * protocol coming in the packet and the error status. These errors
+	 * might not just be checksum errors but could be related to version,
+	 * length of IPv4, UDP, TCP etc.
+	 * Because there is no-way of knowing if it is a L3/L4 error due to bad
+	 * checksum or any other L3/L4 error, we will not (cannot) convey
+	 * checksum status for such cases to upper stack and will not maintain
+	 * the RX L3/L4 checksum counters as well.
+	 */
+
+	l3id = hnae_get_field(flag, HNS_RXD_L3ID_M, HNS_RXD_L3ID_S);
+	l4id = hnae_get_field(flag, HNS_RXD_L4ID_M, HNS_RXD_L4ID_S);
+
+	/*  check L3 protocol for which checksum is supported */
+	if ((l3id != HNS_RX_FLAG_L3ID_IPV4) && (l3id != HNS_RX_FLAG_L3ID_IPV6))
+		return;
+
+	/* check for any(not just checksum)flagged L3 protocol errors */
+	if (unlikely(hnae_get_bit(flag, HNS_RXD_L3E_B)))
+		return;
+
+	/* we do not support checksum of fragmented packets */
+	if (unlikely(hnae_get_bit(flag, HNS_RXD_FRAG_B)))
+		return;
+
+	/*  check L4 protocol for which checksum is supported */
+	if ((l4id != HNS_RX_FLAG_L4ID_TCP) &&
+	    (l4id != HNS_RX_FLAG_L4ID_UDP) &&
+	    (l4id != HNS_RX_FLAG_L4ID_SCTP))
+		return;
+
+	/* check for any(not just checksum)flagged L4 protocol errors */
+	if (unlikely(hnae_get_bit(flag, HNS_RXD_L4E_B)))
+		return;
+
+	/* now, this has to be a packet with valid RX checksum */
+	skb->ip_summed = CHECKSUM_UNNECESSARY;
+}
+
 static int hns_nic_poll_rx_skb(struct hns_nic_ring_data *ring_data,
 			       struct sk_buff **out_skb, int *out_bnum)
 {
@@ -683,13 +749,10 @@ out_bnum_err:
 	ring->stats.rx_pkts++;
 	ring->stats.rx_bytes += skb->len;
 
-	if (unlikely(hnae_get_bit(bnum_flag, HNS_RXD_L3E_B) ||
-		     hnae_get_bit(bnum_flag, HNS_RXD_L4E_B))) {
-		ring->stats.l3l4_csum_err++;
-		return 0;
-	}
-
-	skb->ip_summed = CHECKSUM_UNNECESSARY;
+	/* indicate to upper stack if our hardware has already calculated
+	 * the RX checksum
+	 */
+	hns_nic_rx_checksum(ring_data, skb, bnum_flag);
 
 	return 0;
 }
@@ -1426,10 +1489,6 @@ static int hns_nic_change_mtu(struct net_device *ndev, int new_mtu)
 	struct hnae_handle *h = priv->ae_handle;
 	int ret;
 
-	/* MTU < 68 is an error and causes problems on some kernels */
-	if (new_mtu < 68)
-		return -EINVAL;
-
 	if (!h->dev->ops->set_mtu)
 		return -ENOTSUPP;
 
@@ -1496,6 +1555,29 @@ static netdev_features_t hns_nic_fix_features(
 	return features;
 }
 
+static int hns_nic_uc_sync(struct net_device *netdev, const unsigned char *addr)
+{
+	struct hns_nic_priv *priv = netdev_priv(netdev);
+	struct hnae_handle *h = priv->ae_handle;
+
+	if (h->dev->ops->add_uc_addr)
+		return h->dev->ops->add_uc_addr(h, addr);
+
+	return 0;
+}
+
+static int hns_nic_uc_unsync(struct net_device *netdev,
+			     const unsigned char *addr)
+{
+	struct hns_nic_priv *priv = netdev_priv(netdev);
+	struct hnae_handle *h = priv->ae_handle;
+
+	if (h->dev->ops->rm_uc_addr)
+		return h->dev->ops->rm_uc_addr(h, addr);
+
+	return 0;
+}
+
 /**
  * nic_set_multicast_list - set mutl mac address
  * @netdev: net device
@@ -1514,6 +1596,10 @@ void hns_set_multicast_list(struct net_device *ndev)
 		return;
 	}
 
+	if (h->dev->ops->clr_mc_addr)
+		if (h->dev->ops->clr_mc_addr(h))
+			netdev_err(ndev, "clear multicast address fail\n");
+
 	if (h->dev->ops->set_mc_addr) {
 		netdev_for_each_mc_addr(ha, ndev)
 			if (h->dev->ops->set_mc_addr(h, ha->addr))
@@ -1534,6 +1620,9 @@ void hns_nic_set_rx_mode(struct net_device *ndev)
 	}
 
 	hns_set_multicast_list(ndev);
+
+	if (__dev_uc_sync(ndev, hns_nic_uc_sync, hns_nic_uc_unsync))
+		netdev_err(ndev, "sync uc address fail\n");
 }
 
 struct rtnl_link_stats64 *hns_nic_get_stats64(struct net_device *ndev,
@@ -1992,14 +2081,20 @@ static int hns_nic_dev_probe(struct platform_device *pdev)
 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM;
 	ndev->vlan_features |= NETIF_F_SG | NETIF_F_GSO | NETIF_F_GRO;
 
+	/* MTU range: 68 - 9578 (v1) or 9706 (v2) */
+	ndev->min_mtu = MAC_MIN_MTU;
 	switch (priv->enet_ver) {
 	case AE_VERSION_2:
 		ndev->features |= NETIF_F_TSO | NETIF_F_TSO6;
 		ndev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
 			NETIF_F_RXCSUM | NETIF_F_SG | NETIF_F_GSO |
 			NETIF_F_GRO | NETIF_F_TSO | NETIF_F_TSO6;
+		ndev->max_mtu = MAC_MAX_MTU_V2 -
+				(ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN);
 		break;
 	default:
+		ndev->max_mtu = MAC_MAX_MTU -
+				(ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN);
 		break;
 	}
 
diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
index 87d5c94b2810..3ac2183dbd21 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c
@@ -1178,7 +1178,8 @@ static int hns_nic_nway_reset(struct net_device *netdev)
 	struct phy_device *phy = netdev->phydev;
 
 	if (netif_running(netdev)) {
-		if (phy)
+		/* if autoneg is disabled, don't restart auto-negotiation */
+		if (phy && phy->autoneg == AUTONEG_ENABLE)
 			ret = genphy_restart_aneg(phy);
 	}
 
diff --git a/drivers/net/ethernet/hp/hp100.c b/drivers/net/ethernet/hp/hp100.c
index 631dbc7b4dbb..1a31bee6e728 100644
--- a/drivers/net/ethernet/hp/hp100.c
+++ b/drivers/net/ethernet/hp/hp100.c
@@ -427,7 +427,6 @@ static const struct net_device_ops hp100_bm_netdev_ops = {
 	.ndo_start_xmit		= hp100_start_xmit_bm,
 	.ndo_get_stats 		= hp100_get_stats,
 	.ndo_set_rx_mode	= hp100_set_multicast_list,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 };
@@ -438,7 +437,6 @@ static const struct net_device_ops hp100_netdev_ops = {
 	.ndo_start_xmit		= hp100_start_xmit,
 	.ndo_get_stats 		= hp100_get_stats,
 	.ndo_set_rx_mode	= hp100_set_multicast_list,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 };
diff --git a/drivers/net/ethernet/i825xx/82596.c b/drivers/net/ethernet/i825xx/82596.c
index ce235b776793..945883842533 100644
--- a/drivers/net/ethernet/i825xx/82596.c
+++ b/drivers/net/ethernet/i825xx/82596.c
@@ -1118,7 +1118,6 @@ static const struct net_device_ops i596_netdev_ops = {
 	.ndo_start_xmit		= i596_start_xmit,
 	.ndo_set_rx_mode	= set_multicast_list,
 	.ndo_tx_timeout		= i596_tx_timeout,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 };
diff --git a/drivers/net/ethernet/i825xx/ether1.c b/drivers/net/ethernet/i825xx/ether1.c
index 5d353c660068..dc983450354b 100644
--- a/drivers/net/ethernet/i825xx/ether1.c
+++ b/drivers/net/ethernet/i825xx/ether1.c
@@ -981,7 +981,6 @@ static const struct net_device_ops ether1_netdev_ops = {
 	.ndo_set_rx_mode	= ether1_setmulticastlist,
 	.ndo_tx_timeout		= ether1_timeout,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address	= eth_mac_addr,
 };
 
diff --git a/drivers/net/ethernet/i825xx/lib82596.c b/drivers/net/ethernet/i825xx/lib82596.c
index 3dbc53c21baa..e86773325cbe 100644
--- a/drivers/net/ethernet/i825xx/lib82596.c
+++ b/drivers/net/ethernet/i825xx/lib82596.c
@@ -1037,7 +1037,6 @@ static const struct net_device_ops i596_netdev_ops = {
 	.ndo_start_xmit		= i596_start_xmit,
 	.ndo_set_rx_mode	= set_multicast_list,
 	.ndo_tx_timeout		= i596_tx_timeout,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/i825xx/sun3_82586.c b/drivers/net/ethernet/i825xx/sun3_82586.c
index 21c84cc9c871..8bb15a8c2a40 100644
--- a/drivers/net/ethernet/i825xx/sun3_82586.c
+++ b/drivers/net/ethernet/i825xx/sun3_82586.c
@@ -337,7 +337,6 @@ static const struct net_device_ops sun3_82586_netdev_ops = {
 	.ndo_get_stats		= sun3_82586_get_stats,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 };
 
 static int __init sun3_82586_probe1(struct net_device *dev,int ioaddr)
diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c
index bd719e25dd76..702446a93697 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_main.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c
@@ -1981,14 +1981,6 @@ out:
 	ehea_update_bcmc_registrations();
 }
 
-static int ehea_change_mtu(struct net_device *dev, int new_mtu)
-{
-	if ((new_mtu < 68) || (new_mtu > EHEA_MAX_PACKET_SIZE))
-		return -EINVAL;
-	dev->mtu = new_mtu;
-	return 0;
-}
-
 static void xmit_common(struct sk_buff *skb, struct ehea_swqe *swqe)
 {
 	swqe->tx_control |= EHEA_SWQE_IMM_DATA_PRESENT | EHEA_SWQE_CRC;
@@ -2970,7 +2962,6 @@ static const struct net_device_ops ehea_netdev_ops = {
 	.ndo_set_mac_address	= ehea_set_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_rx_mode	= ehea_set_multicast_list,
-	.ndo_change_mtu		= ehea_change_mtu,
 	.ndo_vlan_rx_add_vid	= ehea_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= ehea_vlan_rx_kill_vid,
 	.ndo_tx_timeout		= ehea_tx_watchdog,
@@ -3043,13 +3034,16 @@ static struct ehea_port *ehea_setup_single_port(struct ehea_adapter *adapter,
 			NETIF_F_IP_CSUM;
 	dev->watchdog_timeo = EHEA_WATCH_DOG_TIMEOUT;
 
+	/* MTU range: 68 - 9022 */
+	dev->min_mtu = ETH_MIN_MTU;
+	dev->max_mtu = EHEA_MAX_PACKET_SIZE;
+
 	INIT_WORK(&port->reset_task, ehea_reset_port);
 	INIT_DELAYED_WORK(&port->stats_work, ehea_update_stats);
 
 	init_waitqueue_head(&port->swqe_avail_wq);
 	init_waitqueue_head(&port->restart_wq);
 
-	memset(&port->stats, 0, sizeof(struct net_device_stats));
 	ret = register_netdev(dev);
 	if (ret) {
 		pr_err("register_netdev failed. ret=%d\n", ret);
diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c
index 8f139197f1aa..52a69c925965 100644
--- a/drivers/net/ethernet/ibm/emac/core.c
+++ b/drivers/net/ethernet/ibm/emac/core.c
@@ -1099,9 +1099,6 @@ static int emac_change_mtu(struct net_device *ndev, int new_mtu)
 	struct emac_instance *dev = netdev_priv(ndev);
 	int ret = 0;
 
-	if (new_mtu < EMAC_MIN_MTU || new_mtu > dev->max_mtu)
-		return -EINVAL;
-
 	DBG(dev, "change_mtu(%d)" NL, new_mtu);
 
 	if (netif_running(ndev)) {
@@ -2564,7 +2561,7 @@ static int emac_init_config(struct emac_instance *dev)
 	if (emac_read_uint_prop(np, "cell-index", &dev->cell_index, 1))
 		return -ENXIO;
 	if (emac_read_uint_prop(np, "max-frame-size", &dev->max_mtu, 0))
-		dev->max_mtu = 1500;
+		dev->max_mtu = ETH_DATA_LEN;
 	if (emac_read_uint_prop(np, "rx-fifo-size", &dev->rx_fifo_size, 0))
 		dev->rx_fifo_size = 2048;
 	if (emac_read_uint_prop(np, "tx-fifo-size", &dev->tx_fifo_size, 0))
@@ -2718,7 +2715,6 @@ static const struct net_device_ops emac_netdev_ops = {
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= emac_set_mac_address,
 	.ndo_start_xmit		= emac_start_xmit,
-	.ndo_change_mtu		= eth_change_mtu,
 };
 
 static const struct net_device_ops emac_gige_netdev_ops = {
@@ -2891,6 +2887,10 @@ static int emac_probe(struct platform_device *ofdev)
 		ndev->netdev_ops = &emac_netdev_ops;
 	ndev->ethtool_ops = &emac_ethtool_ops;
 
+	/* MTU range: 46 - 1500 or whatever is in OF */
+	ndev->min_mtu = EMAC_MIN_MTU;
+	ndev->max_mtu = dev->max_mtu;
+
 	netif_carrier_off(ndev);
 
 	err = register_netdev(ndev);
diff --git a/drivers/net/ethernet/ibm/ibmveth.c b/drivers/net/ethernet/ibm/ibmveth.c
index ebe60719e489..fbece63395a8 100644
--- a/drivers/net/ethernet/ibm/ibmveth.c
+++ b/drivers/net/ethernet/ibm/ibmveth.c
@@ -58,7 +58,7 @@ static struct kobj_type ktype_veth_pool;
 
 static const char ibmveth_driver_name[] = "ibmveth";
 static const char ibmveth_driver_string[] = "IBM Power Virtual Ethernet Driver";
-#define ibmveth_driver_version "1.05"
+#define ibmveth_driver_version "1.06"
 
 MODULE_AUTHOR("Santiago Leon <santil@linux.vnet.ibm.com>");
 MODULE_DESCRIPTION("IBM Power Virtual Ethernet Driver");
@@ -137,6 +137,11 @@ static inline int ibmveth_rxq_frame_offset(struct ibmveth_adapter *adapter)
 	return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_OFF_MASK;
 }
 
+static inline int ibmveth_rxq_large_packet(struct ibmveth_adapter *adapter)
+{
+	return ibmveth_rxq_flags(adapter) & IBMVETH_RXQ_LRG_PKT;
+}
+
 static inline int ibmveth_rxq_frame_length(struct ibmveth_adapter *adapter)
 {
 	return be32_to_cpu(adapter->rx_queue.queue_addr[adapter->rx_queue.index].length);
@@ -1174,6 +1179,45 @@ map_failed:
 	goto retry_bounce;
 }
 
+static void ibmveth_rx_mss_helper(struct sk_buff *skb, u16 mss, int lrg_pkt)
+{
+	int offset = 0;
+
+	/* only TCP packets will be aggregated */
+	if (skb->protocol == htons(ETH_P_IP)) {
+		struct iphdr *iph = (struct iphdr *)skb->data;
+
+		if (iph->protocol == IPPROTO_TCP) {
+			offset = iph->ihl * 4;
+			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4;
+		} else {
+			return;
+		}
+	} else if (skb->protocol == htons(ETH_P_IPV6)) {
+		struct ipv6hdr *iph6 = (struct ipv6hdr *)skb->data;
+
+		if (iph6->nexthdr == IPPROTO_TCP) {
+			offset = sizeof(struct ipv6hdr);
+			skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
+		} else {
+			return;
+		}
+	} else {
+		return;
+	}
+	/* if mss is not set through Large Packet bit/mss in rx buffer,
+	 * expect that the mss will be written to the tcp header checksum.
+	 */
+	if (lrg_pkt) {
+		skb_shinfo(skb)->gso_size = mss;
+	} else if (offset) {
+		struct tcphdr *tcph = (struct tcphdr *)(skb->data + offset);
+
+		skb_shinfo(skb)->gso_size = ntohs(tcph->check);
+		tcph->check = 0;
+	}
+}
+
 static int ibmveth_poll(struct napi_struct *napi, int budget)
 {
 	struct ibmveth_adapter *adapter =
@@ -1182,6 +1226,7 @@ static int ibmveth_poll(struct napi_struct *napi, int budget)
 	int frames_processed = 0;
 	unsigned long lpar_rc;
 	struct iphdr *iph;
+	u16 mss = 0;
 
 restart_poll:
 	while (frames_processed < budget) {
@@ -1199,9 +1244,21 @@ restart_poll:
 			int length = ibmveth_rxq_frame_length(adapter);
 			int offset = ibmveth_rxq_frame_offset(adapter);
 			int csum_good = ibmveth_rxq_csum_good(adapter);
+			int lrg_pkt = ibmveth_rxq_large_packet(adapter);
 
 			skb = ibmveth_rxq_get_buffer(adapter);
 
+			/* if the large packet bit is set in the rx queue
+			 * descriptor, the mss will be written by PHYP eight
+			 * bytes from the start of the rx buffer, which is
+			 * skb->data at this stage
+			 */
+			if (lrg_pkt) {
+				__be64 *rxmss = (__be64 *)(skb->data + 8);
+
+				mss = (u16)be64_to_cpu(*rxmss);
+			}
+
 			new_skb = NULL;
 			if (length < rx_copybreak)
 				new_skb = netdev_alloc_skb(netdev, length);
@@ -1235,11 +1292,15 @@ restart_poll:
 					if (iph->check == 0xffff) {
 						iph->check = 0;
 						iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
-						adapter->rx_large_packets++;
 					}
 				}
 			}
 
+			if (length > netdev->mtu + ETH_HLEN) {
+				ibmveth_rx_mss_helper(skb, mss, lrg_pkt);
+				adapter->rx_large_packets++;
+			}
+
 			napi_gro_receive(napi, skb);	/* send it up */
 
 			netdev->stats.rx_packets++;
@@ -1349,9 +1410,6 @@ static int ibmveth_change_mtu(struct net_device *dev, int new_mtu)
 	int i, rc;
 	int need_restart = 0;
 
-	if (new_mtu < IBMVETH_MIN_MTU)
-		return -EINVAL;
-
 	for (i = 0; i < IBMVETH_NUM_BUFF_POOLS; i++)
 		if (new_mtu_oh <= adapter->rx_buff_pool[i].buff_size)
 			break;
@@ -1551,6 +1609,9 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
 		netdev->hw_features |= NETIF_F_TSO;
 	}
 
+	netdev->min_mtu = IBMVETH_MIN_MTU;
+	netdev->max_mtu = ETH_MAX_MTU;
+
 	memcpy(netdev->dev_addr, mac_addr_p, ETH_ALEN);
 
 	if (firmware_has_feature(FW_FEATURE_CMO))
diff --git a/drivers/net/ethernet/ibm/ibmveth.h b/drivers/net/ethernet/ibm/ibmveth.h
index 4eade67fe30c..7acda04d034e 100644
--- a/drivers/net/ethernet/ibm/ibmveth.h
+++ b/drivers/net/ethernet/ibm/ibmveth.h
@@ -209,6 +209,7 @@ struct ibmveth_rx_q_entry {
 #define IBMVETH_RXQ_TOGGLE		0x80000000
 #define IBMVETH_RXQ_TOGGLE_SHIFT	31
 #define IBMVETH_RXQ_VALID		0x40000000
+#define IBMVETH_RXQ_LRG_PKT		0x04000000
 #define IBMVETH_RXQ_NO_CSUM		0x02000000
 #define IBMVETH_RXQ_CSUM_GOOD		0x01000000
 #define IBMVETH_RXQ_OFF_MASK		0x0000FFFF
diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
index 0fbf686f5e7c..c12596676bbb 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.c
+++ b/drivers/net/ethernet/ibm/ibmvnic.c
@@ -901,17 +901,6 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p)
 	return 0;
 }
 
-static int ibmvnic_change_mtu(struct net_device *netdev, int new_mtu)
-{
-	struct ibmvnic_adapter *adapter = netdev_priv(netdev);
-
-	if (new_mtu > adapter->req_mtu || new_mtu < adapter->min_mtu)
-		return -EINVAL;
-
-	netdev->mtu = new_mtu;
-	return 0;
-}
-
 static void ibmvnic_tx_timeout(struct net_device *dev)
 {
 	struct ibmvnic_adapter *adapter = netdev_priv(dev);
@@ -1028,7 +1017,6 @@ static const struct net_device_ops ibmvnic_netdev_ops = {
 	.ndo_set_rx_mode	= ibmvnic_set_multi,
 	.ndo_set_mac_address	= ibmvnic_set_mac,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_change_mtu		= ibmvnic_change_mtu,
 	.ndo_tx_timeout		= ibmvnic_tx_timeout,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= ibmvnic_netpoll_controller,
@@ -2638,10 +2626,12 @@ static void handle_query_cap_rsp(union ibmvnic_crq *crq,
 		break;
 	case MIN_MTU:
 		adapter->min_mtu = be64_to_cpu(crq->query_capability.number);
+		netdev->min_mtu = adapter->min_mtu;
 		netdev_dbg(netdev, "min_mtu = %lld\n", adapter->min_mtu);
 		break;
 	case MAX_MTU:
 		adapter->max_mtu = be64_to_cpu(crq->query_capability.number);
+		netdev->max_mtu = adapter->max_mtu;
 		netdev_dbg(netdev, "max_mtu = %lld\n", adapter->max_mtu);
 		break;
 	case MAX_MULTICAST_FILTERS:
@@ -3667,6 +3657,8 @@ static void handle_crq_init_rsp(struct work_struct *work)
 
 	netdev->real_num_tx_queues = adapter->req_tx_queues;
 	netdev->mtu = adapter->req_mtu;
+	netdev->min_mtu = adapter->min_mtu;
+	netdev->max_mtu = adapter->max_mtu;
 
 	if (adapter->failover) {
 		adapter->failover = false;
diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig
index c0e17433f623..1349b45f014d 100644
--- a/drivers/net/ethernet/intel/Kconfig
+++ b/drivers/net/ethernet/intel/Kconfig
@@ -58,7 +58,7 @@ config E1000E
 	tristate "Intel(R) PRO/1000 PCI-Express Gigabit Ethernet support"
 	depends on PCI && (!SPARC32 || BROKEN)
 	select CRC32
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	---help---
 	  This driver supports the PCI-Express Intel(R) PRO/1000 gigabit
 	  ethernet family of adapters. For PCI or PCI-X e1000 adapters,
@@ -83,7 +83,7 @@ config E1000E_HWTS
 config IGB
 	tristate "Intel(R) 82575/82576 PCI-Express Gigabit Ethernet support"
 	depends on PCI
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	select I2C
 	select I2C_ALGOBIT
 	---help---
@@ -156,7 +156,7 @@ config IXGBE
 	tristate "Intel(R) 10GbE PCI Express adapters support"
 	depends on PCI
 	select MDIO
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	---help---
 	  This driver supports Intel(R) 10GbE PCI Express family of
 	  adapters.  For more information on how to identify your adapter, go
@@ -213,7 +213,7 @@ config IXGBEVF
 
 config I40E
 	tristate "Intel(R) Ethernet Controller XL710 Family support"
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	depends on PCI
 	---help---
 	  This driver supports Intel(R) Ethernet Controller XL710 Family of
@@ -264,7 +264,7 @@ config FM10K
 	tristate "Intel(R) FM10000 Ethernet Switch Host Interface Support"
 	default n
 	depends on PCI_MSI
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	---help---
 	  This driver supports Intel(R) FM10000 Ethernet Switch Host
 	  Interface.  For more information on how to identify your adapter,
diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c
index 068789e694c9..25c6dfd500b4 100644
--- a/drivers/net/ethernet/intel/e100.c
+++ b/drivers/net/ethernet/intel/e100.c
@@ -2286,14 +2286,6 @@ static int e100_set_mac_address(struct net_device *netdev, void *p)
 	return 0;
 }
 
-static int e100_change_mtu(struct net_device *netdev, int new_mtu)
-{
-	if (new_mtu < ETH_ZLEN || new_mtu > ETH_DATA_LEN)
-		return -EINVAL;
-	netdev->mtu = new_mtu;
-	return 0;
-}
-
 static int e100_asf(struct nic *nic)
 {
 	/* ASF can be enabled from eeprom */
@@ -2834,7 +2826,6 @@ static const struct net_device_ops e100_netdev_ops = {
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_rx_mode	= e100_set_multicast_list,
 	.ndo_set_mac_address	= e100_set_mac_address,
-	.ndo_change_mtu		= e100_change_mtu,
 	.ndo_do_ioctl		= e100_do_ioctl,
 	.ndo_tx_timeout		= e100_tx_timeout,
 #ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
index f42129d09e2c..93fc6c67306b 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -1085,6 +1085,10 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	    hw->subsystem_vendor_id != PCI_VENDOR_ID_VMWARE)
 		netdev->priv_flags |= IFF_UNICAST_FLT;
 
+	/* MTU range: 46 - 16110 */
+	netdev->min_mtu = ETH_ZLEN - ETH_HLEN;
+	netdev->max_mtu = MAX_JUMBO_FRAME_SIZE - (ETH_HLEN + ETH_FCS_LEN);
+
 	adapter->en_mng_pt = e1000_enable_mng_pass_thru(hw);
 
 	/* initialize eeprom parameters */
@@ -3549,13 +3553,7 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
-	int max_frame = new_mtu + ENET_HEADER_SIZE + ETHERNET_FCS_SIZE;
-
-	if ((max_frame < MINIMUM_ETHERNET_FRAME_SIZE) ||
-	    (max_frame > MAX_JUMBO_FRAME_SIZE)) {
-		e_err(probe, "Invalid MTU setting\n");
-		return -EINVAL;
-	}
+	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
 
 	/* Adapter-specific max frame size limits. */
 	switch (hw->mac_type) {
@@ -5257,8 +5255,8 @@ static void e1000_netpoll(struct net_device *netdev)
 {
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 
-	disable_irq(adapter->pdev->irq);
-	e1000_intr(adapter->pdev->irq, netdev);
+	if (disable_hardirq(adapter->pdev->irq))
+		e1000_intr(adapter->pdev->irq, netdev);
 	enable_irq(adapter->pdev->irq);
 }
 #endif
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 7017281ba2dc..ffcf35af4881 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -5974,19 +5974,12 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu)
 	int max_frame = new_mtu + VLAN_ETH_HLEN + ETH_FCS_LEN;
 
 	/* Jumbo frame support */
-	if ((max_frame > (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN)) &&
+	if ((new_mtu > ETH_DATA_LEN) &&
 	    !(adapter->flags & FLAG_HAS_JUMBO_FRAMES)) {
 		e_err("Jumbo Frames not supported.\n");
 		return -EINVAL;
 	}
 
-	/* Supported frame sizes */
-	if ((new_mtu < (VLAN_ETH_ZLEN + ETH_FCS_LEN)) ||
-	    (max_frame > adapter->max_hw_frame_size)) {
-		e_err("Unsupported MTU setting\n");
-		return -EINVAL;
-	}
-
 	/* Jumbo frame workaround on 82579 and newer requires CRC be stripped */
 	if ((adapter->hw.mac.type >= e1000_pch2lan) &&
 	    !(adapter->flags2 & FLAG2_CRC_STRIPPING) &&
@@ -6762,13 +6755,13 @@ static void e1000_netpoll(struct net_device *netdev)
 		e1000_intr_msix(adapter->pdev->irq, netdev);
 		break;
 	case E1000E_INT_MODE_MSI:
-		disable_irq(adapter->pdev->irq);
-		e1000_intr_msi(adapter->pdev->irq, netdev);
+		if (disable_hardirq(adapter->pdev->irq))
+			e1000_intr_msi(adapter->pdev->irq, netdev);
 		enable_irq(adapter->pdev->irq);
 		break;
 	default:		/* E1000E_INT_MODE_LEGACY */
-		disable_irq(adapter->pdev->irq);
-		e1000_intr(adapter->pdev->irq, netdev);
+		if (disable_hardirq(adapter->pdev->irq))
+			e1000_intr(adapter->pdev->irq, netdev);
 		enable_irq(adapter->pdev->irq);
 		break;
 	}
@@ -7187,6 +7180,11 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		netdev->vlan_features |= NETIF_F_HIGHDMA;
 	}
 
+	/* MTU range: 68 - max_hw_frame_size */
+	netdev->min_mtu = ETH_MIN_MTU;
+	netdev->max_mtu = adapter->max_hw_frame_size -
+			  (VLAN_ETH_HLEN + ETH_FCS_LEN);
+
 	if (e1000e_enable_mng_pass_thru(&adapter->hw))
 		adapter->flags |= FLAG_MNG_PT_ENABLED;
 
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index 05629381be6b..bc5ef6eb3dd6 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -706,16 +706,6 @@ static netdev_tx_t fm10k_xmit_frame(struct sk_buff *skb, struct net_device *dev)
 	return err;
 }
 
-static int fm10k_change_mtu(struct net_device *dev, int new_mtu)
-{
-	if (new_mtu < 68 || new_mtu > FM10K_MAX_JUMBO_FRAME_SIZE)
-		return -EINVAL;
-
-	dev->mtu = new_mtu;
-
-	return 0;
-}
-
 /**
  * fm10k_tx_timeout - Respond to a Tx Hang
  * @netdev: network interface device structure
@@ -1405,7 +1395,6 @@ static const struct net_device_ops fm10k_netdev_ops = {
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_start_xmit		= fm10k_xmit_frame,
 	.ndo_set_mac_address	= fm10k_set_mac,
-	.ndo_change_mtu		= fm10k_change_mtu,
 	.ndo_tx_timeout		= fm10k_tx_timeout,
 	.ndo_vlan_rx_add_vid	= fm10k_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= fm10k_vlan_rx_kill_vid,
@@ -1490,5 +1479,9 @@ struct net_device *fm10k_alloc_netdev(const struct fm10k_info *info)
 
 	dev->hw_features |= hw_features;
 
+	/* MTU range: 68 - 15342 */
+	dev->min_mtu = ETH_MIN_MTU;
+	dev->max_mtu = FM10K_MAX_JUMBO_FRAME_SIZE;
+
 	return dev;
 }
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 6d61e443bdf8..ba8d30984bee 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -39,6 +39,7 @@
 #include <linux/iommu.h>
 #include <linux/slab.h>
 #include <linux/list.h>
+#include <linux/hashtable.h>
 #include <linux/string.h>
 #include <linux/in.h>
 #include <linux/ip.h>
@@ -355,9 +356,11 @@ struct i40e_pf {
 #define I40E_FLAG_NO_DCB_SUPPORT		BIT_ULL(45)
 #define I40E_FLAG_USE_SET_LLDP_MIB		BIT_ULL(46)
 #define I40E_FLAG_STOP_FW_LLDP			BIT_ULL(47)
-#define I40E_FLAG_HAVE_10GBASET_PHY		BIT_ULL(48)
+#define I40E_FLAG_PHY_CONTROLS_LEDS		BIT_ULL(48)
 #define I40E_FLAG_PF_MAC			BIT_ULL(50)
 #define I40E_FLAG_TRUE_PROMISC_SUPPORT		BIT_ULL(51)
+#define I40E_FLAG_HAVE_CRT_RETIMER		BIT_ULL(52)
+#define I40E_FLAG_PTP_L4_CAPABLE		BIT_ULL(53)
 
 	/* tracks features that get auto disabled by errors */
 	u64 auto_disable_flags;
@@ -428,11 +431,13 @@ struct i40e_pf {
 	struct ptp_clock_info ptp_caps;
 	struct sk_buff *ptp_tx_skb;
 	struct hwtstamp_config tstamp_config;
-	unsigned long last_rx_ptp_check;
-	spinlock_t tmreg_lock; /* Used to protect the device time registers. */
+	struct mutex tmreg_lock; /* Used to protect the SYSTIME registers. */
 	u64 ptp_base_adj;
 	u32 tx_hwtstamp_timeouts;
 	u32 rx_hwtstamp_cleared;
+	u32 latch_event_flags;
+	spinlock_t ptp_rx_lock; /* Used to protect Rx timestamp registers. */
+	unsigned long latch_events[4];
 	bool ptp_tx;
 	bool ptp_rx;
 	u16 rss_table_size; /* HW RSS table size */
@@ -445,6 +450,20 @@ struct i40e_pf {
 	u16 phy_led_val;
 };
 
+/**
+ * i40e_mac_to_hkey - Convert a 6-byte MAC Address to a u64 hash key
+ * @macaddr: the MAC Address as the base key
+ *
+ * Simply copies the address and returns it as a u64 for hashing
+ **/
+static inline u64 i40e_addr_to_hkey(const u8 *macaddr)
+{
+	u64 key = 0;
+
+	ether_addr_copy((u8 *)&key, macaddr);
+	return key;
+}
+
 enum i40e_filter_state {
 	I40E_FILTER_INVALID = 0,	/* Invalid state */
 	I40E_FILTER_NEW,		/* New, not sent to FW yet */
@@ -454,13 +473,10 @@ enum i40e_filter_state {
 /* There is no 'removed' state; the filter struct is freed */
 };
 struct i40e_mac_filter {
-	struct list_head list;
+	struct hlist_node hlist;
 	u8 macaddr[ETH_ALEN];
 #define I40E_VLAN_ANY -1
 	s16 vlan;
-	u8 counter;		/* number of instances of this filter */
-	bool is_vf;		/* filter belongs to a VF */
-	bool is_netdev;		/* filter belongs to a netdev */
 	enum i40e_filter_state state;
 };
 
@@ -501,9 +517,11 @@ struct i40e_vsi {
 #define I40E_VSI_FLAG_VEB_OWNER		BIT(1)
 	unsigned long flags;
 
-	/* Per VSI lock to protect elements/list (MAC filter) */
-	spinlock_t mac_filter_list_lock;
-	struct list_head mac_filter_list;
+	/* Per VSI lock to protect elements/hash (MAC filter) */
+	spinlock_t mac_filter_hash_lock;
+	/* Fixed size hash table with 2^8 buckets for MAC filters */
+	DECLARE_HASHTABLE(mac_filter_hash, 8);
+	bool has_vlan_filter;
 
 	/* VSI stats */
 	struct rtnl_link_stats64 net_stats;
@@ -579,6 +597,7 @@ struct i40e_vsi {
 	u16 veb_idx;		/* index of VEB parent */
 	struct kobject *kobj;	/* sysfs object */
 	bool current_isup;	/* Sync 'link up' logging */
+	enum i40e_aq_link_speed current_speed;	/* Sync link speed logging */
 
 	void *priv;	/* client driver data reference. */
 
@@ -608,6 +627,8 @@ struct i40e_q_vector {
 	unsigned long hung_detected; /* Set/Reset for hung_detection logic */
 
 	cpumask_t affinity_mask;
+	struct irq_affinity_notify affinity_notify;
+
 	struct rcu_head rcu;	/* to avoid race with update stats on free */
 	char name[I40E_INT_NAME_STR_LEN];
 	bool arm_wb_state;
@@ -705,6 +726,25 @@ int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
 void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut,
 		       u16 rss_table_size, u16 rss_size);
 struct i40e_vsi *i40e_find_vsi_from_id(struct i40e_pf *pf, u16 id);
+/**
+ * i40e_find_vsi_by_type - Find and return Flow Director VSI
+ * @pf: PF to search for VSI
+ * @type: Value indicating type of VSI we are looking for
+ **/
+static inline struct i40e_vsi *
+i40e_find_vsi_by_type(struct i40e_pf *pf, u16 type)
+{
+	int i;
+
+	for (i = 0; i < pf->num_alloc_vsi; i++) {
+		struct i40e_vsi *vsi = pf->vsi[i];
+
+		if (vsi && vsi->type == type)
+			return vsi;
+	}
+
+	return NULL;
+}
 void i40e_update_stats(struct i40e_vsi *vsi);
 void i40e_update_eth_stats(struct i40e_vsi *vsi);
 struct rtnl_link_stats64 *i40e_get_vsi_stats_struct(struct i40e_vsi *vsi);
@@ -721,16 +761,12 @@ u32 i40e_get_global_fd_count(struct i40e_pf *pf);
 bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features);
 void i40e_set_ethtool_ops(struct net_device *netdev);
 struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi,
-					u8 *macaddr, s16 vlan,
-					bool is_vf, bool is_netdev);
-void i40e_del_filter(struct i40e_vsi *vsi, u8 *macaddr, s16 vlan,
-		     bool is_vf, bool is_netdev);
+					const u8 *macaddr, s16 vlan);
+void i40e_del_filter(struct i40e_vsi *vsi, const u8 *macaddr, s16 vlan);
 int i40e_sync_vsi_filters(struct i40e_vsi *vsi);
 struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type,
 				u16 uplink, u32 param1);
 int i40e_vsi_release(struct i40e_vsi *vsi);
-struct i40e_vsi *i40e_vsi_lookup(struct i40e_pf *pf, enum i40e_vsi_type type,
-				 struct i40e_vsi *start_vsi);
 #ifdef I40E_FCOE
 void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
 			      struct i40e_vsi_context *ctxt,
@@ -740,7 +776,8 @@ void i40e_service_event_schedule(struct i40e_pf *pf);
 void i40e_notify_client_of_vf_msg(struct i40e_vsi *vsi, u32 vf_id,
 				  u8 *msg, u16 len);
 
-int i40e_vsi_control_rings(struct i40e_vsi *vsi, bool enable);
+int i40e_vsi_start_rings(struct i40e_vsi *vsi);
+void i40e_vsi_stop_rings(struct i40e_vsi *vsi);
 int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count);
 struct i40e_veb *i40e_veb_setup(struct i40e_pf *pf, u16 flags, u16 uplink_seid,
 				u16 downlink_seid, u8 enabled_tc);
@@ -815,15 +852,15 @@ int i40e_open(struct net_device *netdev);
 int i40e_close(struct net_device *netdev);
 int i40e_vsi_open(struct i40e_vsi *vsi);
 void i40e_vlan_stripping_disable(struct i40e_vsi *vsi);
+int i40e_add_vlan_all_mac(struct i40e_vsi *vsi, s16 vid);
 int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid);
-int i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid);
-struct i40e_mac_filter *i40e_put_mac_in_vlan(struct i40e_vsi *vsi, u8 *macaddr,
-					     bool is_vf, bool is_netdev);
-int i40e_del_mac_all_vlan(struct i40e_vsi *vsi, u8 *macaddr,
-			  bool is_vf, bool is_netdev);
+void i40e_rm_vlan_all_mac(struct i40e_vsi *vsi, s16 vid);
+void i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid);
+struct i40e_mac_filter *i40e_put_mac_in_vlan(struct i40e_vsi *vsi,
+					     const u8 *macaddr);
+int i40e_del_mac_all_vlan(struct i40e_vsi *vsi, const u8 *macaddr);
 bool i40e_is_vsi_in_vlan(struct i40e_vsi *vsi);
-struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, u8 *macaddr,
-				      bool is_vf, bool is_netdev);
+struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, const u8 *macaddr);
 #ifdef I40E_FCOE
 int __i40e_setup_tc(struct net_device *netdev, u32 handle, __be16 proto,
 		    struct tc_to_netdev *tc);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
index 738b42a44f20..56fb27298936 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
@@ -964,11 +964,11 @@ i40e_status i40e_clean_arq_element(struct i40e_hw *hw,
 	desc = I40E_ADMINQ_DESC(hw->aq.arq, ntc);
 	desc_idx = ntc;
 
+	hw->aq.arq_last_status =
+		(enum i40e_admin_queue_err)le16_to_cpu(desc->retval);
 	flags = le16_to_cpu(desc->flags);
 	if (flags & I40E_AQ_FLAG_ERR) {
 		ret_code = I40E_ERR_ADMIN_QUEUE_ERROR;
-		hw->aq.arq_last_status =
-			(enum i40e_admin_queue_err)le16_to_cpu(desc->retval);
 		i40e_debug(hw,
 			   I40E_DEBUG_AQ_MESSAGE,
 			   "AQRX: Event received with error 0x%X.\n",
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
index 67e396b2b347..b2101a51534c 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
@@ -1642,6 +1642,10 @@ enum i40e_aq_phy_type {
 	I40E_PHY_TYPE_1000BASE_LX		= 0x1C,
 	I40E_PHY_TYPE_1000BASE_T_OPTICAL	= 0x1D,
 	I40E_PHY_TYPE_20GBASE_KR2		= 0x1E,
+	I40E_PHY_TYPE_25GBASE_KR		= 0x1F,
+	I40E_PHY_TYPE_25GBASE_CR		= 0x20,
+	I40E_PHY_TYPE_25GBASE_SR		= 0x21,
+	I40E_PHY_TYPE_25GBASE_LR		= 0x22,
 	I40E_PHY_TYPE_MAX
 };
 
@@ -1650,6 +1654,7 @@ enum i40e_aq_phy_type {
 #define I40E_LINK_SPEED_10GB_SHIFT	0x3
 #define I40E_LINK_SPEED_40GB_SHIFT	0x4
 #define I40E_LINK_SPEED_20GB_SHIFT	0x5
+#define I40E_LINK_SPEED_25GB_SHIFT	0x6
 
 enum i40e_aq_link_speed {
 	I40E_LINK_SPEED_UNKNOWN	= 0,
@@ -1657,7 +1662,8 @@ enum i40e_aq_link_speed {
 	I40E_LINK_SPEED_1GB	= BIT(I40E_LINK_SPEED_1000MB_SHIFT),
 	I40E_LINK_SPEED_10GB	= BIT(I40E_LINK_SPEED_10GB_SHIFT),
 	I40E_LINK_SPEED_40GB	= BIT(I40E_LINK_SPEED_40GB_SHIFT),
-	I40E_LINK_SPEED_20GB	= BIT(I40E_LINK_SPEED_20GB_SHIFT)
+	I40E_LINK_SPEED_20GB	= BIT(I40E_LINK_SPEED_20GB_SHIFT),
+	I40E_LINK_SPEED_25GB	= BIT(I40E_LINK_SPEED_25GB_SHIFT),
 };
 
 struct i40e_aqc_module_desc {
@@ -1680,6 +1686,8 @@ struct i40e_aq_get_phy_abilities_resp {
 #define I40E_AQ_PHY_LINK_ENABLED	0x08
 #define I40E_AQ_PHY_AN_ENABLED		0x10
 #define I40E_AQ_PHY_FLAG_MODULE_QUAL	0x20
+#define I40E_AQ_PHY_FEC_ABILITY_KR	0x40
+#define I40E_AQ_PHY_FEC_ABILITY_RS	0x80
 	__le16	eee_capability;
 #define I40E_AQ_EEE_100BASE_TX		0x0002
 #define I40E_AQ_EEE_1000BASE_T		0x0004
@@ -1690,7 +1698,22 @@ struct i40e_aq_get_phy_abilities_resp {
 	__le32	eeer_val;
 	u8	d3_lpan;
 #define I40E_AQ_SET_PHY_D3_LPAN_ENA	0x01
-	u8	reserved[3];
+	u8	phy_type_ext;
+#define I40E_AQ_PHY_TYPE_EXT_25G_KR	0X01
+#define I40E_AQ_PHY_TYPE_EXT_25G_CR	0X02
+#define I40E_AQ_PHY_TYPE_EXT_25G_SR	0x04
+#define I40E_AQ_PHY_TYPE_EXT_25G_LR	0x08
+	u8	fec_cfg_curr_mod_ext_info;
+#define I40E_AQ_ENABLE_FEC_KR		0x01
+#define I40E_AQ_ENABLE_FEC_RS		0x02
+#define I40E_AQ_REQUEST_FEC_KR		0x04
+#define I40E_AQ_REQUEST_FEC_RS		0x08
+#define I40E_AQ_ENABLE_FEC_AUTO		0x10
+#define I40E_AQ_FEC
+#define I40E_AQ_MODULE_TYPE_EXT_MASK	0xE0
+#define I40E_AQ_MODULE_TYPE_EXT_SHIFT	5
+
+	u8	ext_comp_code;
 	u8	phy_id[4];
 	u8	module_type[3];
 	u8	qualified_module_count;
@@ -1712,7 +1735,20 @@ struct i40e_aq_set_phy_config { /* same bits as above in all */
 	__le16	eee_capability;
 	__le32	eeer;
 	u8	low_power_ctrl;
-	u8	reserved[3];
+	u8	phy_type_ext;
+#define I40E_AQ_PHY_TYPE_EXT_25G_KR	0X01
+#define I40E_AQ_PHY_TYPE_EXT_25G_CR	0X02
+#define I40E_AQ_PHY_TYPE_EXT_25G_SR	0x04
+#define I40E_AQ_PHY_TYPE_EXT_25G_LR	0x08
+	u8	fec_config;
+#define I40E_AQ_SET_FEC_ABILITY_KR	BIT(0)
+#define I40E_AQ_SET_FEC_ABILITY_RS	BIT(1)
+#define I40E_AQ_SET_FEC_REQUEST_KR	BIT(2)
+#define I40E_AQ_SET_FEC_REQUEST_RS	BIT(3)
+#define I40E_AQ_SET_FEC_AUTO		BIT(4)
+#define I40E_AQ_PHY_FEC_CONFIG_SHIFT	0x0
+#define I40E_AQ_PHY_FEC_CONFIG_MASK	(0x1F << I40E_AQ_PHY_FEC_CONFIG_SHIFT)
+	u8	reserved;
 };
 
 I40E_CHECK_CMD_LENGTH(i40e_aq_set_phy_config);
@@ -1792,9 +1828,18 @@ struct i40e_aqc_get_link_status {
 #define I40E_AQ_LINK_TX_DRAINED		0x01
 #define I40E_AQ_LINK_TX_FLUSHED		0x03
 #define I40E_AQ_LINK_FORCED_40G		0x10
+/* 25G Error Codes */
+#define I40E_AQ_25G_NO_ERR		0X00
+#define I40E_AQ_25G_NOT_PRESENT		0X01
+#define I40E_AQ_25G_NVM_CRC_ERR		0X02
+#define I40E_AQ_25G_SBUS_UCODE_ERR	0X03
+#define I40E_AQ_25G_SERDES_UCODE_ERR	0X04
+#define I40E_AQ_25G_NIMB_UCODE_ERR	0X05
 	u8	loopback; /* use defines from i40e_aqc_set_lb_mode */
 	__le16	max_frame_size;
 	u8	config;
+#define I40E_AQ_CONFIG_FEC_KR_ENA	0x01
+#define I40E_AQ_CONFIG_FEC_RS_ENA	0x02
 #define I40E_AQ_CONFIG_CRC_ENA		0x04
 #define I40E_AQ_CONFIG_PACING_MASK	0x78
 	u8	external_power_ability;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c
index 250db0b244b7..7fe72abc0b4a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_client.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.c
@@ -287,6 +287,7 @@ void i40e_notify_client_of_netdev_close(struct i40e_vsi *vsi, bool reset)
 			}
 			cdev->client->ops->close(&cdev->lan_info, cdev->client,
 						 reset);
+			clear_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state);
 			i40e_client_release_qvlist(&cdev->lan_info);
 		}
 	}
@@ -406,37 +407,6 @@ int i40e_vf_client_capable(struct i40e_pf *pf, u32 vf_id,
 }
 
 /**
- * i40e_vsi_lookup - finds a matching VSI from the PF list starting at start_vsi
- * @pf: board private structure
- * @type: vsi type
- * @start_vsi: a VSI pointer from where to start the search
- *
- * Returns non NULL on success or NULL for failure
- **/
-struct i40e_vsi *i40e_vsi_lookup(struct i40e_pf *pf,
-				 enum i40e_vsi_type type,
-				 struct i40e_vsi *start_vsi)
-{
-	struct i40e_vsi *vsi;
-	int i = 0;
-
-	if (start_vsi) {
-		for (i = 0; i < pf->num_alloc_vsi; i++) {
-			vsi = pf->vsi[i];
-			if (vsi == start_vsi)
-				break;
-		}
-	}
-	for (; i < pf->num_alloc_vsi; i++) {
-		vsi = pf->vsi[i];
-		if (vsi && vsi->type == type)
-			return vsi;
-	}
-
-	return NULL;
-}
-
-/**
  * i40e_client_add_instance - add a client instance struct to the instance list
  * @pf: pointer to the board struct
  * @client: pointer to a client struct in the client list.
@@ -565,7 +535,7 @@ void i40e_client_subtask(struct i40e_pf *pf)
 			if (test_bit(__I40E_DOWN, &pf->vsi[pf->lan_vsi]->state))
 				continue;
 		} else {
-			dev_warn(&pf->pdev->dev, "This client %s is being instanciated at probe\n",
+			dev_warn(&pf->pdev->dev, "This client %s is being instantiated at probe\n",
 				 client->name);
 		}
 
@@ -575,29 +545,25 @@ void i40e_client_subtask(struct i40e_pf *pf)
 			continue;
 
 		if (!existing) {
-			/* Also up the ref_cnt for no. of instances of this
-			 * client.
-			 */
-			atomic_inc(&client->ref_cnt);
 			dev_info(&pf->pdev->dev, "Added instance of Client %s to PF%d bus=0x%02x func=0x%02x\n",
 				 client->name, pf->hw.pf_id,
 				 pf->hw.bus.device, pf->hw.bus.func);
 		}
 
 		mutex_lock(&i40e_client_instance_mutex);
-		/* Send an Open request to the client */
-		atomic_inc(&cdev->ref_cnt);
-		if (client->ops && client->ops->open)
-			ret = client->ops->open(&cdev->lan_info, client);
-		atomic_dec(&cdev->ref_cnt);
-		if (!ret) {
-			set_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state);
-		} else {
-			/* remove client instance */
-			mutex_unlock(&i40e_client_instance_mutex);
-			i40e_client_del_instance(pf, client);
-			atomic_dec(&client->ref_cnt);
-			continue;
+		if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED,
+			      &cdev->state)) {
+			/* Send an Open request to the client */
+			if (client->ops && client->ops->open)
+				ret = client->ops->open(&cdev->lan_info,
+							client);
+			if (!ret) {
+				set_bit(__I40E_CLIENT_INSTANCE_OPENED,
+					&cdev->state);
+			} else {
+				/* remove client instance */
+				i40e_client_del_instance(pf, client);
+			}
 		}
 		mutex_unlock(&i40e_client_instance_mutex);
 	}
@@ -694,10 +660,6 @@ static int i40e_client_release(struct i40e_client *client)
 			continue;
 		pf = (struct i40e_pf *)cdev->lan_info.pf;
 		if (test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) {
-			if (atomic_read(&cdev->ref_cnt) > 0) {
-				ret = I40E_ERR_NOT_READY;
-				goto out;
-			}
 			if (client->ops && client->ops->close)
 				client->ops->close(&cdev->lan_info, client,
 						   false);
@@ -710,11 +672,9 @@ static int i40e_client_release(struct i40e_client *client)
 		}
 		/* delete the client instance from the list */
 		list_move(&cdev->list, &cdevs_tmp);
-		atomic_dec(&client->ref_cnt);
 		dev_info(&pf->pdev->dev, "Deleted client instance of Client %s\n",
 			 client->name);
 	}
-out:
 	mutex_unlock(&i40e_client_instance_mutex);
 
 	/* free the client device and release its vsi */
@@ -1040,17 +1000,10 @@ int i40e_unregister_client(struct i40e_client *client)
 		ret = -ENODEV;
 		goto out;
 	}
-	if (atomic_read(&client->ref_cnt) == 0) {
-		clear_bit(__I40E_CLIENT_REGISTERED, &client->state);
-		list_del(&client->list);
-		pr_info("i40e: Unregistered client %s with return code %d\n",
-			client->name, ret);
-	} else {
-		ret = I40E_ERR_NOT_READY;
-		pr_err("i40e: Client %s failed unregister - client has open instances\n",
-		       client->name);
-	}
-
+	clear_bit(__I40E_CLIENT_REGISTERED, &client->state);
+	list_del(&client->list);
+	pr_info("i40e: Unregistered client %s with return code %d\n",
+		client->name, ret);
 out:
 	mutex_unlock(&i40e_client_mutex);
 	return ret;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.h b/drivers/net/ethernet/intel/i40e/i40e_client.h
index 38a6c36a6a0e..528bd79b05fe 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_client.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_client.h
@@ -203,8 +203,6 @@ struct i40e_client_instance {
 	struct i40e_info lan_info;
 	struct i40e_client *client;
 	unsigned long  state;
-	/* A count of all the in-progress calls to the client */
-	atomic_t ref_cnt;
 };
 
 struct i40e_client {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index 2154a34c1dd8..128735975caa 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -53,6 +53,8 @@ static i40e_status i40e_set_mac_type(struct i40e_hw *hw)
 		case I40E_DEV_ID_10G_BASE_T4:
 		case I40E_DEV_ID_20G_KR2:
 		case I40E_DEV_ID_20G_KR2_A:
+		case I40E_DEV_ID_25G_B:
+		case I40E_DEV_ID_25G_SFP28:
 			hw->mac.type = I40E_MAC_XL710;
 			break;
 		case I40E_DEV_ID_KX_X722:
@@ -1183,6 +1185,8 @@ static enum i40e_media_type i40e_get_media_type(struct i40e_hw *hw)
 	case I40E_PHY_TYPE_1000BASE_LX:
 	case I40E_PHY_TYPE_40GBASE_SR4:
 	case I40E_PHY_TYPE_40GBASE_LR4:
+	case I40E_PHY_TYPE_25GBASE_LR:
+	case I40E_PHY_TYPE_25GBASE_SR:
 		media = I40E_MEDIA_TYPE_FIBER;
 		break;
 	case I40E_PHY_TYPE_100BASE_TX:
@@ -1197,6 +1201,7 @@ static enum i40e_media_type i40e_get_media_type(struct i40e_hw *hw)
 	case I40E_PHY_TYPE_10GBASE_SFPP_CU:
 	case I40E_PHY_TYPE_40GBASE_AOC:
 	case I40E_PHY_TYPE_10GBASE_AOC:
+	case I40E_PHY_TYPE_25GBASE_CR:
 		media = I40E_MEDIA_TYPE_DA;
 		break;
 	case I40E_PHY_TYPE_1000BASE_KX:
@@ -1204,6 +1209,7 @@ static enum i40e_media_type i40e_get_media_type(struct i40e_hw *hw)
 	case I40E_PHY_TYPE_10GBASE_KR:
 	case I40E_PHY_TYPE_40GBASE_KR4:
 	case I40E_PHY_TYPE_20GBASE_KR2:
+	case I40E_PHY_TYPE_25GBASE_KR:
 		media = I40E_MEDIA_TYPE_BACKPLANE;
 		break;
 	case I40E_PHY_TYPE_SGMII:
@@ -1608,8 +1614,10 @@ i40e_status i40e_aq_get_phy_capabilities(struct i40e_hw *hw,
 	if (hw->aq.asq_last_status == I40E_AQ_RC_EIO)
 		status = I40E_ERR_UNKNOWN_PHY;
 
-	if (report_init)
+	if (report_init) {
 		hw->phy.phy_types = le32_to_cpu(abilities->phy_type);
+		hw->phy.phy_types |= ((u64)abilities->phy_type_ext << 32);
+	}
 
 	return status;
 }
@@ -1701,10 +1709,13 @@ enum i40e_status_code i40e_set_fc(struct i40e_hw *hw, u8 *aq_failures,
 			config.abilities |= I40E_AQ_PHY_ENABLE_ATOMIC_LINK;
 		/* Copy over all the old settings */
 		config.phy_type = abilities.phy_type;
+		config.phy_type_ext = abilities.phy_type_ext;
 		config.link_speed = abilities.link_speed;
 		config.eee_capability = abilities.eee_capability;
 		config.eeer = abilities.eeer_val;
 		config.low_power_ctrl = abilities.d3_lpan;
+		config.fec_config = abilities.fec_cfg_curr_mod_ext_info &
+				    I40E_AQ_PHY_FEC_CONFIG_MASK;
 		status = i40e_aq_set_phy_config(hw, &config, NULL);
 
 		if (status)
@@ -1849,12 +1860,13 @@ i40e_status i40e_aq_get_link_info(struct i40e_hw *hw,
 	else
 		hw_link_info->crc_enable = false;
 
-	if (resp->command_flags & cpu_to_le16(I40E_AQ_LSE_ENABLE))
+	if (resp->command_flags & cpu_to_le16(I40E_AQ_LSE_IS_ENABLED))
 		hw_link_info->lse_enable = true;
 	else
 		hw_link_info->lse_enable = false;
 
-	if ((hw->aq.fw_maj_ver < 4 || (hw->aq.fw_maj_ver == 4 &&
+	if ((hw->mac.type == I40E_MAC_XL710) &&
+	    (hw->aq.fw_maj_ver < 4 || (hw->aq.fw_maj_ver == 4 &&
 	     hw->aq.fw_min_ver < 40)) && hw_link_info->phy_type == 0xE)
 		hw_link_info->phy_type = I40E_PHY_TYPE_10GBASE_SFPP_CU;
 
@@ -2169,6 +2181,40 @@ enum i40e_status_code i40e_aq_set_vsi_uc_promisc_on_vlan(struct i40e_hw *hw,
 }
 
 /**
+ * i40e_aq_set_vsi_bc_promisc_on_vlan
+ * @hw: pointer to the hw struct
+ * @seid: vsi number
+ * @enable: set broadcast promiscuous enable/disable for a given VLAN
+ * @vid: The VLAN tag filter - capture any broadcast packet with this VLAN tag
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+i40e_status i40e_aq_set_vsi_bc_promisc_on_vlan(struct i40e_hw *hw,
+				u16 seid, bool enable, u16 vid,
+				struct i40e_asq_cmd_details *cmd_details)
+{
+	struct i40e_aq_desc desc;
+	struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+		(struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw;
+	i40e_status status;
+	u16 flags = 0;
+
+	i40e_fill_default_direct_cmd_desc(&desc,
+					i40e_aqc_opc_set_vsi_promiscuous_modes);
+
+	if (enable)
+		flags |= I40E_AQC_SET_VSI_PROMISC_BROADCAST;
+
+	cmd->promiscuous_flags = cpu_to_le16(flags);
+	cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_BROADCAST);
+	cmd->seid = cpu_to_le16(seid);
+	cmd->vlan_tag = cpu_to_le16(vid | I40E_AQC_SET_VSI_VLAN_VALID);
+
+	status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+	return status;
+}
+
+/**
  * i40e_aq_set_vsi_broadcast
  * @hw: pointer to the hw struct
  * @seid: vsi number
@@ -2494,7 +2540,10 @@ i40e_status i40e_update_link_info(struct i40e_hw *hw)
 	if (status)
 		return status;
 
-	if (hw->phy.link_info.link_info & I40E_AQ_MEDIA_AVAILABLE) {
+	/* extra checking needed to ensure link info to user is timely */
+	if ((hw->phy.link_info.link_info & I40E_AQ_MEDIA_AVAILABLE) &&
+	    ((hw->phy.link_info.link_info & I40E_AQ_LINK_UP) ||
+	     !(hw->phy.link_info_old.link_info & I40E_AQ_LINK_UP))) {
 		status = i40e_aq_get_phy_capabilities(hw, false, false,
 						      &abilities, NULL);
 		if (status)
@@ -3144,6 +3193,14 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff,
 			break;
 		case I40E_AQ_CAP_ID_MNG_MODE:
 			p->management_mode = number;
+			if (major_rev > 1) {
+				p->mng_protocols_over_mctp = logical_id;
+				i40e_debug(hw, I40E_DEBUG_INIT,
+					   "HW Capability: Protocols over MCTP = %d\n",
+					   p->mng_protocols_over_mctp);
+			} else {
+				p->mng_protocols_over_mctp = 0;
+			}
 			break;
 		case I40E_AQ_CAP_ID_NPAR_ACTIVE:
 			p->npar_enable = number;
@@ -3310,8 +3367,10 @@ static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff,
 	/* partition id is 1-based, and functions are evenly spread
 	 * across the ports as partitions
 	 */
-	hw->partition_id = (hw->pf_id / hw->num_ports) + 1;
-	hw->num_partitions = num_functions / hw->num_ports;
+	if (hw->num_ports != 0) {
+		hw->partition_id = (hw->pf_id / hw->num_ports) + 1;
+		hw->num_partitions = num_functions / hw->num_ports;
+	}
 
 	/* additional HW specific goodies that might
 	 * someday be HW version specific
@@ -4391,7 +4450,92 @@ i40e_status i40e_aq_configure_partition_bw(struct i40e_hw *hw,
 }
 
 /**
- * i40e_read_phy_register
+ * i40e_read_phy_register_clause22
+ * @hw: pointer to the HW structure
+ * @reg: register address in the page
+ * @phy_adr: PHY address on MDIO interface
+ * @value: PHY register value
+ *
+ * Reads specified PHY register value
+ **/
+i40e_status i40e_read_phy_register_clause22(struct i40e_hw *hw,
+					    u16 reg, u8 phy_addr, u16 *value)
+{
+	i40e_status status = I40E_ERR_TIMEOUT;
+	u8 port_num = (u8)hw->func_caps.mdio_port_num;
+	u32 command = 0;
+	u16 retry = 1000;
+
+	command = (reg << I40E_GLGEN_MSCA_DEVADD_SHIFT) |
+		  (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) |
+		  (I40E_MDIO_CLAUSE22_OPCODE_READ_MASK) |
+		  (I40E_MDIO_CLAUSE22_STCODE_MASK) |
+		  (I40E_GLGEN_MSCA_MDICMD_MASK);
+	wr32(hw, I40E_GLGEN_MSCA(port_num), command);
+	do {
+		command = rd32(hw, I40E_GLGEN_MSCA(port_num));
+		if (!(command & I40E_GLGEN_MSCA_MDICMD_MASK)) {
+			status = 0;
+			break;
+		}
+		udelay(10);
+		retry--;
+	} while (retry);
+
+	if (status) {
+		i40e_debug(hw, I40E_DEBUG_PHY,
+			   "PHY: Can't write command to external PHY.\n");
+	} else {
+		command = rd32(hw, I40E_GLGEN_MSRWD(port_num));
+		*value = (command & I40E_GLGEN_MSRWD_MDIRDDATA_MASK) >>
+			 I40E_GLGEN_MSRWD_MDIRDDATA_SHIFT;
+	}
+
+	return status;
+}
+
+/**
+ * i40e_write_phy_register_clause22
+ * @hw: pointer to the HW structure
+ * @reg: register address in the page
+ * @phy_adr: PHY address on MDIO interface
+ * @value: PHY register value
+ *
+ * Writes specified PHY register value
+ **/
+i40e_status i40e_write_phy_register_clause22(struct i40e_hw *hw,
+					     u16 reg, u8 phy_addr, u16 value)
+{
+	i40e_status status = I40E_ERR_TIMEOUT;
+	u8 port_num = (u8)hw->func_caps.mdio_port_num;
+	u32 command  = 0;
+	u16 retry = 1000;
+
+	command = value << I40E_GLGEN_MSRWD_MDIWRDATA_SHIFT;
+	wr32(hw, I40E_GLGEN_MSRWD(port_num), command);
+
+	command = (reg << I40E_GLGEN_MSCA_DEVADD_SHIFT) |
+		  (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) |
+		  (I40E_MDIO_CLAUSE22_OPCODE_WRITE_MASK) |
+		  (I40E_MDIO_CLAUSE22_STCODE_MASK) |
+		  (I40E_GLGEN_MSCA_MDICMD_MASK);
+
+	wr32(hw, I40E_GLGEN_MSCA(port_num), command);
+	do {
+		command = rd32(hw, I40E_GLGEN_MSCA(port_num));
+		if (!(command & I40E_GLGEN_MSCA_MDICMD_MASK)) {
+			status = 0;
+			break;
+		}
+		udelay(10);
+		retry--;
+	} while (retry);
+
+	return status;
+}
+
+/**
+ * i40e_read_phy_register_clause45
  * @hw: pointer to the HW structure
  * @page: registers page number
  * @reg: register address in the page
@@ -4400,9 +4544,8 @@ i40e_status i40e_aq_configure_partition_bw(struct i40e_hw *hw,
  *
  * Reads specified PHY register value
  **/
-i40e_status i40e_read_phy_register(struct i40e_hw *hw,
-				   u8 page, u16 reg, u8 phy_addr,
-				   u16 *value)
+i40e_status i40e_read_phy_register_clause45(struct i40e_hw *hw,
+				u8 page, u16 reg, u8 phy_addr, u16 *value)
 {
 	i40e_status status = I40E_ERR_TIMEOUT;
 	u32 command = 0;
@@ -4412,8 +4555,8 @@ i40e_status i40e_read_phy_register(struct i40e_hw *hw,
 	command = (reg << I40E_GLGEN_MSCA_MDIADD_SHIFT) |
 		  (page << I40E_GLGEN_MSCA_DEVADD_SHIFT) |
 		  (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) |
-		  (I40E_MDIO_OPCODE_ADDRESS) |
-		  (I40E_MDIO_STCODE) |
+		  (I40E_MDIO_CLAUSE45_OPCODE_ADDRESS_MASK) |
+		  (I40E_MDIO_CLAUSE45_STCODE_MASK) |
 		  (I40E_GLGEN_MSCA_MDICMD_MASK) |
 		  (I40E_GLGEN_MSCA_MDIINPROGEN_MASK);
 	wr32(hw, I40E_GLGEN_MSCA(port_num), command);
@@ -4435,8 +4578,8 @@ i40e_status i40e_read_phy_register(struct i40e_hw *hw,
 
 	command = (page << I40E_GLGEN_MSCA_DEVADD_SHIFT) |
 		  (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) |
-		  (I40E_MDIO_OPCODE_READ) |
-		  (I40E_MDIO_STCODE) |
+		  (I40E_MDIO_CLAUSE45_OPCODE_READ_MASK) |
+		  (I40E_MDIO_CLAUSE45_STCODE_MASK) |
 		  (I40E_GLGEN_MSCA_MDICMD_MASK) |
 		  (I40E_GLGEN_MSCA_MDIINPROGEN_MASK);
 	status = I40E_ERR_TIMEOUT;
@@ -4466,7 +4609,7 @@ phy_read_end:
 }
 
 /**
- * i40e_write_phy_register
+ * i40e_write_phy_register_clause45
  * @hw: pointer to the HW structure
  * @page: registers page number
  * @reg: register address in the page
@@ -4475,9 +4618,8 @@ phy_read_end:
  *
  * Writes value to specified PHY register
  **/
-i40e_status i40e_write_phy_register(struct i40e_hw *hw,
-				    u8 page, u16 reg, u8 phy_addr,
-				    u16 value)
+i40e_status i40e_write_phy_register_clause45(struct i40e_hw *hw,
+				u8 page, u16 reg, u8 phy_addr, u16 value)
 {
 	i40e_status status = I40E_ERR_TIMEOUT;
 	u32 command = 0;
@@ -4487,8 +4629,8 @@ i40e_status i40e_write_phy_register(struct i40e_hw *hw,
 	command = (reg << I40E_GLGEN_MSCA_MDIADD_SHIFT) |
 		  (page << I40E_GLGEN_MSCA_DEVADD_SHIFT) |
 		  (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) |
-		  (I40E_MDIO_OPCODE_ADDRESS) |
-		  (I40E_MDIO_STCODE) |
+		  (I40E_MDIO_CLAUSE45_OPCODE_ADDRESS_MASK) |
+		  (I40E_MDIO_CLAUSE45_STCODE_MASK) |
 		  (I40E_GLGEN_MSCA_MDICMD_MASK) |
 		  (I40E_GLGEN_MSCA_MDIINPROGEN_MASK);
 	wr32(hw, I40E_GLGEN_MSCA(port_num), command);
@@ -4512,8 +4654,8 @@ i40e_status i40e_write_phy_register(struct i40e_hw *hw,
 
 	command = (page << I40E_GLGEN_MSCA_DEVADD_SHIFT) |
 		  (phy_addr << I40E_GLGEN_MSCA_PHYADD_SHIFT) |
-		  (I40E_MDIO_OPCODE_WRITE) |
-		  (I40E_MDIO_STCODE) |
+		  (I40E_MDIO_CLAUSE45_OPCODE_WRITE_MASK) |
+		  (I40E_MDIO_CLAUSE45_STCODE_MASK) |
 		  (I40E_GLGEN_MSCA_MDICMD_MASK) |
 		  (I40E_GLGEN_MSCA_MDIINPROGEN_MASK);
 	status = I40E_ERR_TIMEOUT;
@@ -4534,6 +4676,78 @@ phy_write_end:
 }
 
 /**
+ * i40e_write_phy_register
+ * @hw: pointer to the HW structure
+ * @page: registers page number
+ * @reg: register address in the page
+ * @phy_adr: PHY address on MDIO interface
+ * @value: PHY register value
+ *
+ * Writes value to specified PHY register
+ **/
+i40e_status i40e_write_phy_register(struct i40e_hw *hw,
+				    u8 page, u16 reg, u8 phy_addr, u16 value)
+{
+	i40e_status status;
+
+	switch (hw->device_id) {
+	case I40E_DEV_ID_1G_BASE_T_X722:
+		status = i40e_write_phy_register_clause22(hw, reg, phy_addr,
+							  value);
+		break;
+	case I40E_DEV_ID_10G_BASE_T:
+	case I40E_DEV_ID_10G_BASE_T4:
+	case I40E_DEV_ID_10G_BASE_T_X722:
+	case I40E_DEV_ID_25G_B:
+	case I40E_DEV_ID_25G_SFP28:
+		status = i40e_write_phy_register_clause45(hw, page, reg,
+							  phy_addr, value);
+		break;
+	default:
+		status = I40E_ERR_UNKNOWN_PHY;
+		break;
+	}
+
+	return status;
+}
+
+/**
+ * i40e_read_phy_register
+ * @hw: pointer to the HW structure
+ * @page: registers page number
+ * @reg: register address in the page
+ * @phy_adr: PHY address on MDIO interface
+ * @value: PHY register value
+ *
+ * Reads specified PHY register value
+ **/
+i40e_status i40e_read_phy_register(struct i40e_hw *hw,
+				   u8 page, u16 reg, u8 phy_addr, u16 *value)
+{
+	i40e_status status;
+
+	switch (hw->device_id) {
+	case I40E_DEV_ID_1G_BASE_T_X722:
+		status = i40e_read_phy_register_clause22(hw, reg, phy_addr,
+							 value);
+		break;
+	case I40E_DEV_ID_10G_BASE_T:
+	case I40E_DEV_ID_10G_BASE_T4:
+	case I40E_DEV_ID_10G_BASE_T_X722:
+	case I40E_DEV_ID_25G_B:
+	case I40E_DEV_ID_25G_SFP28:
+		status = i40e_read_phy_register_clause45(hw, page, reg,
+							 phy_addr, value);
+		break;
+	default:
+		status = I40E_ERR_UNKNOWN_PHY;
+		break;
+	}
+
+	return status;
+}
+
+/**
  * i40e_get_phy_address
  * @hw: pointer to the HW structure
  * @dev_num: PHY port num that address we want
@@ -4575,14 +4789,16 @@ i40e_status i40e_blink_phy_link_led(struct i40e_hw *hw,
 
 	for (gpio_led_port = 0; gpio_led_port < 3; gpio_led_port++,
 	     led_addr++) {
-		status = i40e_read_phy_register(hw, I40E_PHY_COM_REG_PAGE,
-						led_addr, phy_addr, &led_reg);
+		status = i40e_read_phy_register_clause45(hw,
+							 I40E_PHY_COM_REG_PAGE,
+							 led_addr, phy_addr,
+							 &led_reg);
 		if (status)
 			goto phy_blinking_end;
 		led_ctl = led_reg;
 		if (led_reg & I40E_PHY_LED_LINK_MODE_MASK) {
 			led_reg = 0;
-			status = i40e_write_phy_register(hw,
+			status = i40e_write_phy_register_clause45(hw,
 							 I40E_PHY_COM_REG_PAGE,
 							 led_addr, phy_addr,
 							 led_reg);
@@ -4594,20 +4810,18 @@ i40e_status i40e_blink_phy_link_led(struct i40e_hw *hw,
 
 	if (time > 0 && interval > 0) {
 		for (i = 0; i < time * 1000; i += interval) {
-			status = i40e_read_phy_register(hw,
-							I40E_PHY_COM_REG_PAGE,
-							led_addr, phy_addr,
-							&led_reg);
+			status = i40e_read_phy_register_clause45(hw,
+						I40E_PHY_COM_REG_PAGE,
+						led_addr, phy_addr, &led_reg);
 			if (status)
 				goto restore_config;
 			if (led_reg & I40E_PHY_LED_MANUAL_ON)
 				led_reg = 0;
 			else
 				led_reg = I40E_PHY_LED_MANUAL_ON;
-			status = i40e_write_phy_register(hw,
-							 I40E_PHY_COM_REG_PAGE,
-							 led_addr, phy_addr,
-							 led_reg);
+			status = i40e_write_phy_register_clause45(hw,
+						I40E_PHY_COM_REG_PAGE,
+						led_addr, phy_addr, led_reg);
 			if (status)
 				goto restore_config;
 			msleep(interval);
@@ -4615,8 +4829,9 @@ i40e_status i40e_blink_phy_link_led(struct i40e_hw *hw,
 	}
 
 restore_config:
-	status = i40e_write_phy_register(hw, I40E_PHY_COM_REG_PAGE, led_addr,
-					 phy_addr, led_ctl);
+	status = i40e_write_phy_register_clause45(hw,
+						  I40E_PHY_COM_REG_PAGE,
+						  led_addr, phy_addr, led_ctl);
 
 phy_blinking_end:
 	return status;
@@ -4647,8 +4862,10 @@ i40e_status i40e_led_get_phy(struct i40e_hw *hw, u16 *led_addr,
 
 	for (gpio_led_port = 0; gpio_led_port < 3; gpio_led_port++,
 	     temp_addr++) {
-		status = i40e_read_phy_register(hw, I40E_PHY_COM_REG_PAGE,
-						temp_addr, phy_addr, &reg_val);
+		status = i40e_read_phy_register_clause45(hw,
+							 I40E_PHY_COM_REG_PAGE,
+							 temp_addr, phy_addr,
+							 &reg_val);
 		if (status)
 			return status;
 		*val = reg_val;
@@ -4681,41 +4898,42 @@ i40e_status i40e_led_set_phy(struct i40e_hw *hw, bool on,
 	i = rd32(hw, I40E_PFGEN_PORTNUM);
 	port_num = (u8)(i & I40E_PFGEN_PORTNUM_PORT_NUM_MASK);
 	phy_addr = i40e_get_phy_address(hw, port_num);
-
-	status = i40e_read_phy_register(hw, I40E_PHY_COM_REG_PAGE, led_addr,
-					phy_addr, &led_reg);
+	status = i40e_read_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE,
+						 led_addr, phy_addr, &led_reg);
 	if (status)
 		return status;
 	led_ctl = led_reg;
 	if (led_reg & I40E_PHY_LED_LINK_MODE_MASK) {
 		led_reg = 0;
-		status = i40e_write_phy_register(hw, I40E_PHY_COM_REG_PAGE,
-						 led_addr, phy_addr, led_reg);
+		status = i40e_write_phy_register_clause45(hw,
+							  I40E_PHY_COM_REG_PAGE,
+							  led_addr, phy_addr,
+							  led_reg);
 		if (status)
 			return status;
 	}
-	status = i40e_read_phy_register(hw, I40E_PHY_COM_REG_PAGE,
-					led_addr, phy_addr, &led_reg);
+	status = i40e_read_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE,
+						 led_addr, phy_addr, &led_reg);
 	if (status)
 		goto restore_config;
 	if (on)
 		led_reg = I40E_PHY_LED_MANUAL_ON;
 	else
 		led_reg = 0;
-	status = i40e_write_phy_register(hw, I40E_PHY_COM_REG_PAGE,
-					 led_addr, phy_addr, led_reg);
+	status = i40e_write_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE,
+						  led_addr, phy_addr, led_reg);
 	if (status)
 		goto restore_config;
 	if (mode & I40E_PHY_LED_MODE_ORIG) {
 		led_ctl = (mode & I40E_PHY_LED_MODE_MASK);
-		status = i40e_write_phy_register(hw,
+		status = i40e_write_phy_register_clause45(hw,
 						 I40E_PHY_COM_REG_PAGE,
 						 led_addr, phy_addr, led_ctl);
 	}
 	return status;
 restore_config:
-	status = i40e_write_phy_register(hw, I40E_PHY_COM_REG_PAGE, led_addr,
-					 phy_addr, led_ctl);
+	status = i40e_write_phy_register_clause45(hw, I40E_PHY_COM_REG_PAGE,
+						  led_addr, phy_addr, led_ctl);
 	return status;
 }
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index 0c1875b5b16d..f1f41f12902f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -134,7 +134,7 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
 	struct rtnl_link_stats64 *nstat;
 	struct i40e_mac_filter *f;
 	struct i40e_vsi *vsi;
-	int i;
+	int i, bkt;
 
 	vsi = i40e_dbg_find_vsi(pf, seid);
 	if (!vsi) {
@@ -166,13 +166,13 @@ static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
 			 pf->hw.mac.addr,
 			 pf->hw.mac.san_addr,
 			 pf->hw.mac.port_addr);
-	list_for_each_entry(f, &vsi->mac_filter_list, list) {
+	hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) {
 		dev_info(&pf->pdev->dev,
-			 "    mac_filter_list: %pM vid=%d, is_netdev=%d is_vf=%d counter=%d, state %s\n",
-			 f->macaddr, f->vlan, f->is_netdev, f->is_vf,
-			 f->counter, i40e_filter_state_string[f->state]);
+			 "    mac_filter_hash: %pM vid=%d, state %s\n",
+			 f->macaddr, f->vlan,
+			 i40e_filter_state_string[f->state]);
 	}
-	dev_info(&pf->pdev->dev, "    active_filters %d, promisc_threshold %d, overflow promisc %s\n",
+	dev_info(&pf->pdev->dev, "    active_filters %u, promisc_threshold %u, overflow promisc %s\n",
 		 vsi->active_filters, vsi->promisc_threshold,
 		 (test_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state) ?
 		  "ON" : "OFF"));
@@ -867,86 +867,6 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
 
 		dev_info(&pf->pdev->dev, "deleting relay %d\n", veb_seid);
 		i40e_veb_release(pf->veb[i]);
-
-	} else if (strncmp(cmd_buf, "add macaddr", 11) == 0) {
-		struct i40e_mac_filter *f;
-		int vlan = 0;
-		u8 ma[6];
-		int ret;
-
-		cnt = sscanf(&cmd_buf[11],
-			     "%i %hhx:%hhx:%hhx:%hhx:%hhx:%hhx %i",
-			     &vsi_seid,
-			     &ma[0], &ma[1], &ma[2], &ma[3], &ma[4], &ma[5],
-			     &vlan);
-		if (cnt == 7) {
-			vlan = 0;
-		} else if (cnt != 8) {
-			dev_info(&pf->pdev->dev,
-				 "add macaddr: bad command string, cnt=%d\n",
-				 cnt);
-			goto command_write_done;
-		}
-
-		vsi = i40e_dbg_find_vsi(pf, vsi_seid);
-		if (!vsi) {
-			dev_info(&pf->pdev->dev,
-				 "add macaddr: VSI %d not found\n", vsi_seid);
-			goto command_write_done;
-		}
-
-		spin_lock_bh(&vsi->mac_filter_list_lock);
-		f = i40e_add_filter(vsi, ma, vlan, false, false);
-		spin_unlock_bh(&vsi->mac_filter_list_lock);
-		ret = i40e_sync_vsi_filters(vsi);
-		if (f && !ret)
-			dev_info(&pf->pdev->dev,
-				 "add macaddr: %pM vlan=%d added to VSI %d\n",
-				 ma, vlan, vsi_seid);
-		else
-			dev_info(&pf->pdev->dev,
-				 "add macaddr: %pM vlan=%d to VSI %d failed, f=%p ret=%d\n",
-				 ma, vlan, vsi_seid, f, ret);
-
-	} else if (strncmp(cmd_buf, "del macaddr", 11) == 0) {
-		int vlan = 0;
-		u8 ma[6];
-		int ret;
-
-		cnt = sscanf(&cmd_buf[11],
-			     "%i %hhx:%hhx:%hhx:%hhx:%hhx:%hhx %i",
-			     &vsi_seid,
-			     &ma[0], &ma[1], &ma[2], &ma[3], &ma[4], &ma[5],
-			     &vlan);
-		if (cnt == 7) {
-			vlan = 0;
-		} else if (cnt != 8) {
-			dev_info(&pf->pdev->dev,
-				 "del macaddr: bad command string, cnt=%d\n",
-				 cnt);
-			goto command_write_done;
-		}
-
-		vsi = i40e_dbg_find_vsi(pf, vsi_seid);
-		if (!vsi) {
-			dev_info(&pf->pdev->dev,
-				 "del macaddr: VSI %d not found\n", vsi_seid);
-			goto command_write_done;
-		}
-
-		spin_lock_bh(&vsi->mac_filter_list_lock);
-		i40e_del_filter(vsi, ma, vlan, false, false);
-		spin_unlock_bh(&vsi->mac_filter_list_lock);
-		ret = i40e_sync_vsi_filters(vsi);
-		if (!ret)
-			dev_info(&pf->pdev->dev,
-				 "del macaddr: %pM vlan=%d removed from VSI %d\n",
-				 ma, vlan, vsi_seid);
-		else
-			dev_info(&pf->pdev->dev,
-				 "del macaddr: %pM vlan=%d from VSI %d failed, ret=%d\n",
-				 ma, vlan, vsi_seid, ret);
-
 	} else if (strncmp(cmd_buf, "add pvid", 8) == 0) {
 		i40e_status ret;
 		u16 vid;
@@ -1210,24 +1130,6 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
 			dev_info(&pf->pdev->dev,
 				 "dump debug fwdata <cluster_id> <table_id> <index>\n");
 		}
-
-	} else if (strncmp(cmd_buf, "msg_enable", 10) == 0) {
-		u32 level;
-		cnt = sscanf(&cmd_buf[10], "%i", &level);
-		if (cnt) {
-			if (I40E_DEBUG_USER & level) {
-				pf->hw.debug_mask = level;
-				dev_info(&pf->pdev->dev,
-					 "set hw.debug_mask = 0x%08x\n",
-					 pf->hw.debug_mask);
-			}
-			pf->msg_enable = level;
-			dev_info(&pf->pdev->dev, "set msg_enable = 0x%08x\n",
-				 pf->msg_enable);
-		} else {
-			dev_info(&pf->pdev->dev, "msg_enable = 0x%08x\n",
-				 pf->msg_enable);
-		}
 	} else if (strncmp(cmd_buf, "pfr", 3) == 0) {
 		dev_info(&pf->pdev->dev, "debugfs: forcing PFR\n");
 		i40e_do_reset_safe(pf, BIT(__I40E_PF_RESET_REQUESTED));
@@ -1633,8 +1535,6 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
 		dev_info(&pf->pdev->dev, "  del vsi [vsi_seid]\n");
 		dev_info(&pf->pdev->dev, "  add relay <uplink_seid> <vsi_seid>\n");
 		dev_info(&pf->pdev->dev, "  del relay <relay_seid>\n");
-		dev_info(&pf->pdev->dev, "  add macaddr <vsi_seid> <aa:bb:cc:dd:ee:ff> [vlan]\n");
-		dev_info(&pf->pdev->dev, "  del macaddr <vsi_seid> <aa:bb:cc:dd:ee:ff> [vlan]\n");
 		dev_info(&pf->pdev->dev, "  add pvid <vsi_seid> <vid>\n");
 		dev_info(&pf->pdev->dev, "  del pvid <vsi_seid>\n");
 		dev_info(&pf->pdev->dev, "  dump switch\n");
@@ -1644,7 +1544,6 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
 		dev_info(&pf->pdev->dev, "  dump desc aq\n");
 		dev_info(&pf->pdev->dev, "  dump reset stats\n");
 		dev_info(&pf->pdev->dev, "  dump debug fwdata <cluster_id> <table_id> <index>\n");
-		dev_info(&pf->pdev->dev, "  msg_enable [level]\n");
 		dev_info(&pf->pdev->dev, "  read <reg>\n");
 		dev_info(&pf->pdev->dev, "  write <reg> <value>\n");
 		dev_info(&pf->pdev->dev, "  clear_stats vsi [seid]\n");
diff --git a/drivers/net/ethernet/intel/i40e/i40e_devids.h b/drivers/net/ethernet/intel/i40e/i40e_devids.h
index dd4457d29e98..8e46098bad57 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_devids.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_devids.h
@@ -39,6 +39,8 @@
 #define I40E_DEV_ID_20G_KR2		0x1587
 #define I40E_DEV_ID_20G_KR2_A		0x1588
 #define I40E_DEV_ID_10G_BASE_T4		0x1589
+#define I40E_DEV_ID_25G_B		0x158A
+#define I40E_DEV_ID_25G_SFP28		0x158B
 #define I40E_DEV_ID_KX_X722		0x37CE
 #define I40E_DEV_ID_QSFP_X722		0x37CF
 #define I40E_DEV_ID_SFP_X722		0x37D0
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index 92bc8846f1ba..cc1465aac2ef 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -104,7 +104,7 @@ static const struct i40e_stats i40e_gstrings_misc_stats[] = {
  * The PF_STATs are appended to the netdev stats only when ethtool -S
  * is queried on the base PF netdev, not on the VMDq or FCoE netdev.
  */
-static struct i40e_stats i40e_gstrings_stats[] = {
+static const struct i40e_stats i40e_gstrings_stats[] = {
 	I40E_PF_STAT("rx_bytes", stats.eth.rx_bytes),
 	I40E_PF_STAT("tx_bytes", stats.eth.tx_bytes),
 	I40E_PF_STAT("rx_unicast", stats.eth.rx_unicast),
@@ -216,7 +216,6 @@ enum i40e_ethtool_test_id {
 	I40E_ETH_TEST_REG = 0,
 	I40E_ETH_TEST_EEPROM,
 	I40E_ETH_TEST_INTR,
-	I40E_ETH_TEST_LOOPBACK,
 	I40E_ETH_TEST_LINK,
 };
 
@@ -224,32 +223,27 @@ static const char i40e_gstrings_test[][ETH_GSTRING_LEN] = {
 	"Register test  (offline)",
 	"Eeprom test    (offline)",
 	"Interrupt test (offline)",
-	"Loopback test  (offline)",
 	"Link test   (on/offline)"
 };
 
 #define I40E_TEST_LEN (sizeof(i40e_gstrings_test) / ETH_GSTRING_LEN)
 
-static const char i40e_priv_flags_strings_gl[][ETH_GSTRING_LEN] = {
+static const char i40e_priv_flags_strings[][ETH_GSTRING_LEN] = {
 	"MFP",
 	"LinkPolling",
 	"flow-director-atr",
 	"veb-stats",
 	"hw-atr-eviction",
-	"vf-true-promisc-support",
 };
 
-#define I40E_PRIV_FLAGS_GL_STR_LEN ARRAY_SIZE(i40e_priv_flags_strings_gl)
+#define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_priv_flags_strings)
 
-static const char i40e_priv_flags_strings[][ETH_GSTRING_LEN] = {
-	"NPAR",
-	"LinkPolling",
-	"flow-director-atr",
-	"veb-stats",
-	"hw-atr-eviction",
+/* Private flags with a global effect, restricted to PF 0 */
+static const char i40e_gl_priv_flags_strings[][ETH_GSTRING_LEN] = {
+	"vf-true-promisc-support",
 };
 
-#define I40E_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_priv_flags_strings)
+#define I40E_GL_PRIV_FLAGS_STR_LEN ARRAY_SIZE(i40e_gl_priv_flags_strings)
 
 /**
  * i40e_partition_setting_complaint - generic complaint for MFP restriction
@@ -271,8 +265,9 @@ static void i40e_partition_setting_complaint(struct i40e_pf *pf)
 static void i40e_phy_type_to_ethtool(struct i40e_pf *pf, u32 *supported,
 				     u32 *advertising)
 {
-	enum i40e_aq_capabilities_phy_type phy_types = pf->hw.phy.phy_types;
 	struct i40e_link_status *hw_link_info = &pf->hw.phy.link_info;
+	u64 phy_types = pf->hw.phy.phy_types;
+
 	*supported = 0x0;
 	*advertising = 0x0;
 
@@ -351,11 +346,13 @@ static void i40e_phy_type_to_ethtool(struct i40e_pf *pf, u32 *supported,
 			*advertising |= ADVERTISED_20000baseKR2_Full;
 	}
 	if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KR) {
-		*supported |= SUPPORTED_10000baseKR_Full |
-			      SUPPORTED_Autoneg;
+		if (!(pf->flags & I40E_FLAG_HAVE_CRT_RETIMER))
+			*supported |= SUPPORTED_10000baseKR_Full |
+				      SUPPORTED_Autoneg;
 		*advertising |= ADVERTISED_Autoneg;
 		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_10GB)
-			*advertising |= ADVERTISED_10000baseKR_Full;
+			if (!(pf->flags & I40E_FLAG_HAVE_CRT_RETIMER))
+				*advertising |= ADVERTISED_10000baseKR_Full;
 	}
 	if (phy_types & I40E_CAP_PHY_TYPE_10GBASE_KX4) {
 		*supported |= SUPPORTED_10000baseKX4_Full |
@@ -365,11 +362,20 @@ static void i40e_phy_type_to_ethtool(struct i40e_pf *pf, u32 *supported,
 			*advertising |= ADVERTISED_10000baseKX4_Full;
 	}
 	if (phy_types & I40E_CAP_PHY_TYPE_1000BASE_KX) {
-		*supported |= SUPPORTED_1000baseKX_Full |
-			      SUPPORTED_Autoneg;
+		if (!(pf->flags & I40E_FLAG_HAVE_CRT_RETIMER))
+			*supported |= SUPPORTED_1000baseKX_Full |
+				      SUPPORTED_Autoneg;
 		*advertising |= ADVERTISED_Autoneg;
 		if (hw_link_info->requested_speeds & I40E_LINK_SPEED_1GB)
-			*advertising |= ADVERTISED_1000baseKX_Full;
+			if (!(pf->flags & I40E_FLAG_HAVE_CRT_RETIMER))
+				*advertising |= ADVERTISED_1000baseKX_Full;
+	}
+	if (phy_types & I40E_CAP_PHY_TYPE_25GBASE_KR ||
+	    phy_types & I40E_CAP_PHY_TYPE_25GBASE_CR ||
+	    phy_types & I40E_CAP_PHY_TYPE_25GBASE_SR ||
+	    phy_types & I40E_CAP_PHY_TYPE_25GBASE_LR) {
+		*supported |= SUPPORTED_Autoneg;
+		*advertising |= ADVERTISED_Autoneg;
 	}
 }
 
@@ -493,6 +499,14 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw,
 				     ADVERTISED_1000baseKX_Full |
 				     ADVERTISED_Autoneg;
 		break;
+	case I40E_PHY_TYPE_25GBASE_KR:
+	case I40E_PHY_TYPE_25GBASE_CR:
+	case I40E_PHY_TYPE_25GBASE_SR:
+	case I40E_PHY_TYPE_25GBASE_LR:
+		ecmd->supported = SUPPORTED_Autoneg;
+		ecmd->advertising = ADVERTISED_Autoneg;
+		/* TODO: add speeds when ethtool is ready to support*/
+		break;
 	default:
 		/* if we got here and link is up something bad is afoot */
 		netdev_info(netdev, "WARNING: Link is up but PHY type 0x%x is not recognized.\n",
@@ -514,6 +528,14 @@ static void i40e_get_settings_link_up(struct i40e_hw *hw,
 	case I40E_LINK_SPEED_40GB:
 		ethtool_cmd_speed_set(ecmd, SPEED_40000);
 		break;
+	case I40E_LINK_SPEED_25GB:
+#ifdef SPEED_25000
+		ethtool_cmd_speed_set(ecmd, SPEED_25000);
+#else
+		netdev_info(netdev,
+			    "Speed is 25G, display not supported by this version of ethtool.\n");
+#endif
+		break;
 	case I40E_LINK_SPEED_20GB:
 		ethtool_cmd_speed_set(ecmd, SPEED_20000);
 		break;
@@ -978,6 +1000,10 @@ static u32 i40e_get_msglevel(struct net_device *netdev)
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_pf *pf = np->vsi->back;
+	u32 debug_mask = pf->hw.debug_mask;
+
+	if (debug_mask)
+		netdev_info(netdev, "i40e debug_mask: 0x%08X\n", debug_mask);
 
 	return pf->msg_enable;
 }
@@ -989,7 +1015,8 @@ static void i40e_set_msglevel(struct net_device *netdev, u32 data)
 
 	if (I40E_DEBUG_USER & data)
 		pf->hw.debug_mask = data;
-	pf->msg_enable = data;
+	else
+		pf->msg_enable = data;
 }
 
 static int i40e_get_regs_len(struct net_device *netdev)
@@ -1191,10 +1218,9 @@ static void i40e_get_drvinfo(struct net_device *netdev,
 		sizeof(drvinfo->fw_version));
 	strlcpy(drvinfo->bus_info, pci_name(pf->pdev),
 		sizeof(drvinfo->bus_info));
+	drvinfo->n_priv_flags = I40E_PRIV_FLAGS_STR_LEN;
 	if (pf->hw.pf_id == 0)
-		drvinfo->n_priv_flags = I40E_PRIV_FLAGS_GL_STR_LEN;
-	else
-		drvinfo->n_priv_flags = I40E_PRIV_FLAGS_STR_LEN;
+		drvinfo->n_priv_flags += I40E_GL_PRIV_FLAGS_STR_LEN;
 }
 
 static void i40e_get_ringparam(struct net_device *netdev,
@@ -1219,6 +1245,7 @@ static int i40e_set_ringparam(struct net_device *netdev,
 {
 	struct i40e_ring *tx_rings = NULL, *rx_rings = NULL;
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_hw *hw = &np->vsi->back->hw;
 	struct i40e_vsi *vsi = np->vsi;
 	struct i40e_pf *pf = vsi->back;
 	u32 new_rx_count, new_tx_count;
@@ -1311,10 +1338,6 @@ static int i40e_set_ringparam(struct net_device *netdev,
 		}
 
 		for (i = 0; i < vsi->num_queue_pairs; i++) {
-			/* this is to allow wr32 to have something to write to
-			 * during early allocation of Rx buffers
-			 */
-			u32 __iomem faketail = 0;
 			struct i40e_ring *ring;
 			u16 unused;
 
@@ -1326,7 +1349,10 @@ static int i40e_set_ringparam(struct net_device *netdev,
 			 */
 			rx_rings[i].desc = NULL;
 			rx_rings[i].rx_bi = NULL;
-			rx_rings[i].tail = (u8 __iomem *)&faketail;
+			/* this is to allow wr32 to have something to write to
+			 * during early allocation of Rx buffers
+			 */
+			rx_rings[i].tail = hw->hw_addr + I40E_PRTGEN_STATUS;
 			err = i40e_setup_rx_descriptors(&rx_rings[i]);
 			if (err)
 				goto rx_unwind;
@@ -1422,10 +1448,8 @@ static int i40e_get_sset_count(struct net_device *netdev, int sset)
 			return I40E_VSI_STATS_LEN(netdev);
 		}
 	case ETH_SS_PRIV_FLAGS:
-		if (pf->hw.pf_id == 0)
-			return I40E_PRIV_FLAGS_GL_STR_LEN;
-		else
-			return I40E_PRIV_FLAGS_STR_LEN;
+		return I40E_PRIV_FLAGS_STR_LEN +
+			(pf->hw.pf_id == 0 ? I40E_GL_PRIV_FLAGS_STR_LEN : 0);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -1536,10 +1560,8 @@ static void i40e_get_strings(struct net_device *netdev, u32 stringset,
 
 	switch (stringset) {
 	case ETH_SS_TEST:
-		for (i = 0; i < I40E_TEST_LEN; i++) {
-			memcpy(data, i40e_gstrings_test[i], ETH_GSTRING_LEN);
-			data += ETH_GSTRING_LEN;
-		}
+		memcpy(data, i40e_gstrings_test,
+		       I40E_TEST_LEN * ETH_GSTRING_LEN);
 		break;
 	case ETH_SS_STATS:
 		for (i = 0; i < I40E_NETDEV_STATS_LEN; i++) {
@@ -1623,19 +1645,12 @@ static void i40e_get_strings(struct net_device *netdev, u32 stringset,
 		/* BUG_ON(p - data != I40E_STATS_LEN * ETH_GSTRING_LEN); */
 		break;
 	case ETH_SS_PRIV_FLAGS:
-		if (pf->hw.pf_id == 0) {
-			for (i = 0; i < I40E_PRIV_FLAGS_GL_STR_LEN; i++) {
-				memcpy(data, i40e_priv_flags_strings_gl[i],
-				       ETH_GSTRING_LEN);
-				data += ETH_GSTRING_LEN;
-			}
-		} else {
-			for (i = 0; i < I40E_PRIV_FLAGS_STR_LEN; i++) {
-				memcpy(data, i40e_priv_flags_strings[i],
-				       ETH_GSTRING_LEN);
-				data += ETH_GSTRING_LEN;
-			}
-		}
+		memcpy(data, i40e_priv_flags_strings,
+		       I40E_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN);
+		data += I40E_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN;
+		if (pf->hw.pf_id == 0)
+			memcpy(data, i40e_gl_priv_flags_strings,
+			       I40E_GL_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN);
 		break;
 	default:
 		break;
@@ -1666,8 +1681,19 @@ static int i40e_get_ts_info(struct net_device *dev,
 	info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON);
 
 	info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) |
-			   BIT(HWTSTAMP_FILTER_PTP_V1_L4_EVENT) |
-			   BIT(HWTSTAMP_FILTER_PTP_V2_EVENT);
+			   BIT(HWTSTAMP_FILTER_PTP_V2_L2_EVENT) |
+			   BIT(HWTSTAMP_FILTER_PTP_V2_L2_SYNC) |
+			   BIT(HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ);
+
+	if (pf->flags & I40E_FLAG_PTP_L4_CAPABLE)
+		info->rx_filters |= BIT(HWTSTAMP_FILTER_PTP_V1_L4_SYNC) |
+				    BIT(HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) |
+				    BIT(HWTSTAMP_FILTER_PTP_V2_EVENT) |
+				    BIT(HWTSTAMP_FILTER_PTP_V2_L4_EVENT) |
+				    BIT(HWTSTAMP_FILTER_PTP_V2_SYNC) |
+				    BIT(HWTSTAMP_FILTER_PTP_V2_L4_SYNC) |
+				    BIT(HWTSTAMP_FILTER_PTP_V2_DELAY_REQ) |
+				    BIT(HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ);
 
 	return 0;
 }
@@ -1739,17 +1765,6 @@ static int i40e_intr_test(struct net_device *netdev, u64 *data)
 	return *data;
 }
 
-static int i40e_loopback_test(struct net_device *netdev, u64 *data)
-{
-	struct i40e_netdev_priv *np = netdev_priv(netdev);
-	struct i40e_pf *pf = np->vsi->back;
-
-	netif_info(pf, hw, netdev, "loopback test not implemented\n");
-	*data = 0;
-
-	return *data;
-}
-
 static inline bool i40e_active_vfs(struct i40e_pf *pf)
 {
 	struct i40e_vf *vfs = pf->vf;
@@ -1763,17 +1778,7 @@ static inline bool i40e_active_vfs(struct i40e_pf *pf)
 
 static inline bool i40e_active_vmdqs(struct i40e_pf *pf)
 {
-	struct i40e_vsi **vsi = pf->vsi;
-	int i;
-
-	for (i = 0; i < pf->num_alloc_vsi; i++) {
-		if (!vsi[i])
-			continue;
-		if (vsi[i]->type == I40E_VSI_VMDQ2)
-			return true;
-	}
-
-	return false;
+	return !!i40e_find_vsi_by_type(pf, I40E_VSI_VMDQ2);
 }
 
 static void i40e_diag_test(struct net_device *netdev,
@@ -1795,7 +1800,6 @@ static void i40e_diag_test(struct net_device *netdev,
 			data[I40E_ETH_TEST_REG]		= 1;
 			data[I40E_ETH_TEST_EEPROM]	= 1;
 			data[I40E_ETH_TEST_INTR]	= 1;
-			data[I40E_ETH_TEST_LOOPBACK]	= 1;
 			data[I40E_ETH_TEST_LINK]	= 1;
 			eth_test->flags |= ETH_TEST_FL_FAILED;
 			clear_bit(__I40E_TESTING, &pf->state);
@@ -1823,9 +1827,6 @@ static void i40e_diag_test(struct net_device *netdev,
 		if (i40e_intr_test(netdev, &data[I40E_ETH_TEST_INTR]))
 			eth_test->flags |= ETH_TEST_FL_FAILED;
 
-		if (i40e_loopback_test(netdev, &data[I40E_ETH_TEST_LOOPBACK]))
-			eth_test->flags |= ETH_TEST_FL_FAILED;
-
 		/* run reg test last, a reset is required after it */
 		if (i40e_reg_test(netdev, &data[I40E_ETH_TEST_REG]))
 			eth_test->flags |= ETH_TEST_FL_FAILED;
@@ -1846,7 +1847,6 @@ static void i40e_diag_test(struct net_device *netdev,
 		data[I40E_ETH_TEST_REG] = 0;
 		data[I40E_ETH_TEST_EEPROM] = 0;
 		data[I40E_ETH_TEST_INTR] = 0;
-		data[I40E_ETH_TEST_LOOPBACK] = 0;
 	}
 
 skip_ol_tests:
@@ -1925,7 +1925,7 @@ static int i40e_set_phys_id(struct net_device *netdev,
 
 	switch (state) {
 	case ETHTOOL_ID_ACTIVE:
-		if (!(pf->flags & I40E_FLAG_HAVE_10GBASET_PHY)) {
+		if (!(pf->flags & I40E_FLAG_PHY_CONTROLS_LEDS)) {
 			pf->led_status = i40e_led_get(hw);
 		} else {
 			i40e_aq_set_phy_debug(hw, I40E_PHY_DEBUG_ALL, NULL);
@@ -1935,20 +1935,20 @@ static int i40e_set_phys_id(struct net_device *netdev,
 		}
 		return blink_freq;
 	case ETHTOOL_ID_ON:
-		if (!(pf->flags & I40E_FLAG_HAVE_10GBASET_PHY))
+		if (!(pf->flags & I40E_FLAG_PHY_CONTROLS_LEDS))
 			i40e_led_set(hw, 0xf, false);
 		else
 			ret = i40e_led_set_phy(hw, true, pf->led_status, 0);
 		break;
 	case ETHTOOL_ID_OFF:
-		if (!(pf->flags & I40E_FLAG_HAVE_10GBASET_PHY))
+		if (!(pf->flags & I40E_FLAG_PHY_CONTROLS_LEDS))
 			i40e_led_set(hw, 0x0, false);
 		else
 			ret = i40e_led_set_phy(hw, false, pf->led_status, 0);
 		break;
 	case ETHTOOL_ID_INACTIVE:
-		if (!(pf->flags & I40E_FLAG_HAVE_10GBASET_PHY)) {
-			i40e_led_set(hw, false, pf->led_status);
+		if (!(pf->flags & I40E_FLAG_PHY_CONTROLS_LEDS)) {
+			i40e_led_set(hw, pf->led_status, false);
 		} else {
 			ret = i40e_led_set_phy(hw, false, pf->led_status,
 					       (pf->phy_led_val |
diff --git a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c
index 58e6c1570335..b077ef8b00fa 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_fcoe.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_fcoe.c
@@ -1522,12 +1522,12 @@ void i40e_fcoe_config_netdev(struct net_device *netdev, struct i40e_vsi *vsi)
 	 * same PCI function.
 	 */
 	netdev->dev_port = 1;
-	spin_lock_bh(&vsi->mac_filter_list_lock);
-	i40e_add_filter(vsi, hw->mac.san_addr, 0, false, false);
-	i40e_add_filter(vsi, (u8[6]) FC_FCOE_FLOGI_MAC, 0, false, false);
-	i40e_add_filter(vsi, FIP_ALL_FCOE_MACS, 0, false, false);
-	i40e_add_filter(vsi, FIP_ALL_ENODE_MACS, 0, false, false);
-	spin_unlock_bh(&vsi->mac_filter_list_lock);
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
+	i40e_add_filter(vsi, hw->mac.san_addr, 0);
+	i40e_add_filter(vsi, (u8[6]) FC_FCOE_FLOGI_MAC, 0);
+	i40e_add_filter(vsi, FIP_ALL_FCOE_MACS, 0);
+	i40e_add_filter(vsi, FIP_ALL_ENODE_MACS, 0);
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
 
 	/* use san mac */
 	ether_addr_copy(netdev->dev_addr, hw->mac.san_addr);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 31c97e3937a4..ad4cf639430e 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -41,7 +41,7 @@ static const char i40e_driver_string[] =
 
 #define DRV_VERSION_MAJOR 1
 #define DRV_VERSION_MINOR 6
-#define DRV_VERSION_BUILD 16
+#define DRV_VERSION_BUILD 25
 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
 	     __stringify(DRV_VERSION_MINOR) "." \
 	     __stringify(DRV_VERSION_BUILD)    DRV_KERN
@@ -86,6 +86,8 @@ static const struct pci_device_id i40e_pci_tbl[] = {
 	{PCI_VDEVICE(INTEL, I40E_DEV_ID_SFP_I_X722), 0},
 	{PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2), 0},
 	{PCI_VDEVICE(INTEL, I40E_DEV_ID_20G_KR2_A), 0},
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_25G_B), 0},
+	{PCI_VDEVICE(INTEL, I40E_DEV_ID_25G_SFP28), 0},
 	/* required last entry */
 	{0, }
 };
@@ -93,8 +95,8 @@ MODULE_DEVICE_TABLE(pci, i40e_pci_tbl);
 
 #define I40E_MAX_VF_COUNT 128
 static int debug = -1;
-module_param(debug, int, 0);
-MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+module_param(debug, uint, 0);
+MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all), Debug mask (0x8XXXXXXX)");
 
 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
 MODULE_DESCRIPTION("Intel(R) Ethernet Connection XL710 Network Driver");
@@ -286,8 +288,7 @@ struct i40e_vsi *i40e_find_vsi_from_id(struct i40e_pf *pf, u16 id)
 void i40e_service_event_schedule(struct i40e_pf *pf)
 {
 	if (!test_bit(__I40E_DOWN, &pf->state) &&
-	    !test_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state) &&
-	    !test_and_set_bit(__I40E_SERVICE_SCHED, &pf->state))
+	    !test_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state))
 		queue_work(i40e_wq, &pf->service_task);
 }
 
@@ -1145,25 +1146,22 @@ void i40e_update_stats(struct i40e_vsi *vsi)
  * @vsi: the VSI to be searched
  * @macaddr: the MAC address
  * @vlan: the vlan
- * @is_vf: make sure its a VF filter, else doesn't matter
- * @is_netdev: make sure its a netdev filter, else doesn't matter
  *
  * Returns ptr to the filter object or NULL
  **/
 static struct i40e_mac_filter *i40e_find_filter(struct i40e_vsi *vsi,
-						u8 *macaddr, s16 vlan,
-						bool is_vf, bool is_netdev)
+						const u8 *macaddr, s16 vlan)
 {
 	struct i40e_mac_filter *f;
+	u64 key;
 
 	if (!vsi || !macaddr)
 		return NULL;
 
-	list_for_each_entry(f, &vsi->mac_filter_list, list) {
+	key = i40e_addr_to_hkey(macaddr);
+	hash_for_each_possible(vsi->mac_filter_hash, f, hlist, key) {
 		if ((ether_addr_equal(macaddr, f->macaddr)) &&
-		    (vlan == f->vlan)    &&
-		    (!is_vf || f->is_vf) &&
-		    (!is_netdev || f->is_netdev))
+		    (vlan == f->vlan))
 			return f;
 	}
 	return NULL;
@@ -1173,24 +1171,21 @@ static struct i40e_mac_filter *i40e_find_filter(struct i40e_vsi *vsi,
  * i40e_find_mac - Find a mac addr in the macvlan filters list
  * @vsi: the VSI to be searched
  * @macaddr: the MAC address we are searching for
- * @is_vf: make sure its a VF filter, else doesn't matter
- * @is_netdev: make sure its a netdev filter, else doesn't matter
  *
  * Returns the first filter with the provided MAC address or NULL if
  * MAC address was not found
  **/
-struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, u8 *macaddr,
-				      bool is_vf, bool is_netdev)
+struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, const u8 *macaddr)
 {
 	struct i40e_mac_filter *f;
+	u64 key;
 
 	if (!vsi || !macaddr)
 		return NULL;
 
-	list_for_each_entry(f, &vsi->mac_filter_list, list) {
-		if ((ether_addr_equal(macaddr, f->macaddr)) &&
-		    (!is_vf || f->is_vf) &&
-		    (!is_netdev || f->is_netdev))
+	key = i40e_addr_to_hkey(macaddr);
+	hash_for_each_possible(vsi->mac_filter_hash, f, hlist, key) {
+		if ((ether_addr_equal(macaddr, f->macaddr)))
 			return f;
 	}
 	return NULL;
@@ -1204,86 +1199,132 @@ struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, u8 *macaddr,
  **/
 bool i40e_is_vsi_in_vlan(struct i40e_vsi *vsi)
 {
-	struct i40e_mac_filter *f;
+	/* If we have a PVID, always operate in VLAN mode */
+	if (vsi->info.pvid)
+		return true;
 
-	/* Only -1 for all the filters denotes not in vlan mode
-	 * so we have to go through all the list in order to make sure
+	/* We need to operate in VLAN mode whenever we have any filters with
+	 * a VLAN other than I40E_VLAN_ALL. We could check the table each
+	 * time, incurring search cost repeatedly. However, we can notice two
+	 * things:
+	 *
+	 * 1) the only place where we can gain a VLAN filter is in
+	 *    i40e_add_filter.
+	 *
+	 * 2) the only place where filters are actually removed is in
+	 *    i40e_sync_filters_subtask.
+	 *
+	 * Thus, we can simply use a boolean value, has_vlan_filters which we
+	 * will set to true when we add a VLAN filter in i40e_add_filter. Then
+	 * we have to perform the full search after deleting filters in
+	 * i40e_sync_filters_subtask, but we already have to search
+	 * filters here and can perform the check at the same time. This
+	 * results in avoiding embedding a loop for VLAN mode inside another
+	 * loop over all the filters, and should maintain correctness as noted
+	 * above.
 	 */
-	list_for_each_entry(f, &vsi->mac_filter_list, list) {
-		if (f->vlan >= 0 || vsi->info.pvid)
-			return true;
-	}
-
-	return false;
+	return vsi->has_vlan_filter;
 }
 
 /**
- * i40e_put_mac_in_vlan - Make macvlan filters from macaddrs and vlans
- * @vsi: the VSI to be searched
- * @macaddr: the mac address to be filtered
- * @is_vf: true if it is a VF
- * @is_netdev: true if it is a netdev
+ * i40e_correct_mac_vlan_filters - Correct non-VLAN filters if necessary
+ * @vsi: the VSI to configure
+ * @tmp_add_list: list of filters ready to be added
+ * @tmp_del_list: list of filters ready to be deleted
+ * @vlan_filters: the number of active VLAN filters
  *
- * Goes through all the macvlan filters and adds a
- * macvlan filter for each unique vlan that already exists
+ * Update VLAN=0 and VLAN=-1 (I40E_VLAN_ANY) filters properly so that they
+ * behave as expected. If we have any active VLAN filters remaining or about
+ * to be added then we need to update non-VLAN filters to be marked as VLAN=0
+ * so that they only match against untagged traffic. If we no longer have any
+ * active VLAN filters, we need to make all non-VLAN filters marked as VLAN=-1
+ * so that they match against both tagged and untagged traffic. In this way,
+ * we ensure that we correctly receive the desired traffic. This ensures that
+ * when we have an active VLAN we will receive only untagged traffic and
+ * traffic matching active VLANs. If we have no active VLANs then we will
+ * operate in non-VLAN mode and receive all traffic, tagged or untagged.
  *
- * Returns first filter found on success, else NULL
- **/
-struct i40e_mac_filter *i40e_put_mac_in_vlan(struct i40e_vsi *vsi, u8 *macaddr,
-					     bool is_vf, bool is_netdev)
+ * Finally, in a similar fashion, this function also corrects filters when
+ * there is an active PVID assigned to this VSI.
+ *
+ * In case of memory allocation failure return -ENOMEM. Otherwise, return 0.
+ *
+ * This function is only expected to be called from within
+ * i40e_sync_vsi_filters.
+ *
+ * NOTE: This function expects to be called while under the
+ * mac_filter_hash_lock
+ */
+static int i40e_correct_mac_vlan_filters(struct i40e_vsi *vsi,
+					 struct hlist_head *tmp_add_list,
+					 struct hlist_head *tmp_del_list,
+					 int vlan_filters)
 {
-	struct i40e_mac_filter *f;
+	struct i40e_mac_filter *f, *add_head;
+	struct hlist_node *h;
+	int bkt, new_vlan;
 
-	list_for_each_entry(f, &vsi->mac_filter_list, list) {
-		if (vsi->info.pvid)
-			f->vlan = le16_to_cpu(vsi->info.pvid);
-		if (!i40e_find_filter(vsi, macaddr, f->vlan,
-				      is_vf, is_netdev)) {
-			if (!i40e_add_filter(vsi, macaddr, f->vlan,
-					     is_vf, is_netdev))
-				return NULL;
-		}
-	}
+	/* To determine if a particular filter needs to be replaced we
+	 * have the three following conditions:
+	 *
+	 * a) if we have a PVID assigned, then all filters which are
+	 *    not marked as VLAN=PVID must be replaced with filters that
+	 *    are.
+	 * b) otherwise, if we have any active VLANS, all filters
+	 *    which are marked as VLAN=-1 must be replaced with
+	 *    filters marked as VLAN=0
+	 * c) finally, if we do not have any active VLANS, all filters
+	 *    which are marked as VLAN=0 must be replaced with filters
+	 *    marked as VLAN=-1
+	 */
 
-	return list_first_entry_or_null(&vsi->mac_filter_list,
-					struct i40e_mac_filter, list);
-}
+	/* Update the filters about to be added in place */
+	hlist_for_each_entry(f, tmp_add_list, hlist) {
+		if (vsi->info.pvid && f->vlan != vsi->info.pvid)
+			f->vlan = vsi->info.pvid;
+		else if (vlan_filters && f->vlan == I40E_VLAN_ANY)
+			f->vlan = 0;
+		else if (!vlan_filters && f->vlan == 0)
+			f->vlan = I40E_VLAN_ANY;
+	}
+
+	/* Update the remaining active filters */
+	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
+		/* Combine the checks for whether a filter needs to be changed
+		 * and then determine the new VLAN inside the if block, in
+		 * order to avoid duplicating code for adding the new filter
+		 * then deleting the old filter.
+		 */
+		if ((vsi->info.pvid && f->vlan != vsi->info.pvid) ||
+		    (vlan_filters && f->vlan == I40E_VLAN_ANY) ||
+		    (!vlan_filters && f->vlan == 0)) {
+			/* Determine the new vlan we will be adding */
+			if (vsi->info.pvid)
+				new_vlan = vsi->info.pvid;
+			else if (vlan_filters)
+				new_vlan = 0;
+			else
+				new_vlan = I40E_VLAN_ANY;
 
-/**
- * i40e_del_mac_all_vlan - Remove a MAC filter from all VLANS
- * @vsi: the VSI to be searched
- * @macaddr: the mac address to be removed
- * @is_vf: true if it is a VF
- * @is_netdev: true if it is a netdev
- *
- * Removes a given MAC address from a VSI, regardless of VLAN
- *
- * Returns 0 for success, or error
- **/
-int i40e_del_mac_all_vlan(struct i40e_vsi *vsi, u8 *macaddr,
-			  bool is_vf, bool is_netdev)
-{
-	struct i40e_mac_filter *f = NULL;
-	int changed = 0;
+			/* Create the new filter */
+			add_head = i40e_add_filter(vsi, f->macaddr, new_vlan);
+			if (!add_head)
+				return -ENOMEM;
 
-	WARN(!spin_is_locked(&vsi->mac_filter_list_lock),
-	     "Missing mac_filter_list_lock\n");
-	list_for_each_entry(f, &vsi->mac_filter_list, list) {
-		if ((ether_addr_equal(macaddr, f->macaddr)) &&
-		    (is_vf == f->is_vf) &&
-		    (is_netdev == f->is_netdev)) {
-			f->counter--;
-			changed = 1;
-			if (f->counter == 0)
-				f->state = I40E_FILTER_REMOVE;
+			/* Put the replacement filter into the add list */
+			hash_del(&add_head->hlist);
+			hlist_add_head(&add_head->hlist, tmp_add_list);
+
+			/* Put the original filter into the delete list */
+			f->state = I40E_FILTER_REMOVE;
+			hash_del(&f->hlist);
+			hlist_add_head(&f->hlist, tmp_del_list);
 		}
 	}
-	if (changed) {
-		vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
-		vsi->back->flags |= I40E_FLAG_FILTER_SYNC;
-		return 0;
-	}
-	return -ENOENT;
+
+	vsi->has_vlan_filter = !!vlan_filters;
+
+	return 0;
 }
 
 /**
@@ -1324,36 +1365,32 @@ static void i40e_rm_default_mac_filter(struct i40e_vsi *vsi, u8 *macaddr)
  * @vsi: the VSI to be searched
  * @macaddr: the MAC address
  * @vlan: the vlan
- * @is_vf: make sure its a VF filter, else doesn't matter
- * @is_netdev: make sure its a netdev filter, else doesn't matter
  *
  * Returns ptr to the filter object or NULL when no memory available.
  *
- * NOTE: This function is expected to be called with mac_filter_list_lock
+ * NOTE: This function is expected to be called with mac_filter_hash_lock
  * being held.
  **/
 struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi,
-					u8 *macaddr, s16 vlan,
-					bool is_vf, bool is_netdev)
+					const u8 *macaddr, s16 vlan)
 {
 	struct i40e_mac_filter *f;
-	int changed = false;
+	u64 key;
 
 	if (!vsi || !macaddr)
 		return NULL;
 
-	/* Do not allow broadcast filter to be added since broadcast filter
-	 * is added as part of add VSI for any newly created VSI except
-	 * FDIR VSI
-	 */
-	if (is_broadcast_ether_addr(macaddr))
-		return NULL;
-
-	f = i40e_find_filter(vsi, macaddr, vlan, is_vf, is_netdev);
+	f = i40e_find_filter(vsi, macaddr, vlan);
 	if (!f) {
 		f = kzalloc(sizeof(*f), GFP_ATOMIC);
 		if (!f)
-			goto add_filter_out;
+			return NULL;
+
+		/* Update the boolean indicating if we need to function in
+		 * VLAN mode.
+		 */
+		if (vlan >= 0)
+			vsi->has_vlan_filter = true;
 
 		ether_addr_copy(f->macaddr, macaddr);
 		f->vlan = vlan;
@@ -1365,100 +1402,148 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi,
 			f->state = I40E_FILTER_FAILED;
 		else
 			f->state = I40E_FILTER_NEW;
-		changed = true;
-		INIT_LIST_HEAD(&f->list);
-		list_add_tail(&f->list, &vsi->mac_filter_list);
-	}
+		INIT_HLIST_NODE(&f->hlist);
 
-	/* increment counter and add a new flag if needed */
-	if (is_vf) {
-		if (!f->is_vf) {
-			f->is_vf = true;
-			f->counter++;
-		}
-	} else if (is_netdev) {
-		if (!f->is_netdev) {
-			f->is_netdev = true;
-			f->counter++;
-		}
-	} else {
-		f->counter++;
-	}
+		key = i40e_addr_to_hkey(macaddr);
+		hash_add(vsi->mac_filter_hash, &f->hlist, key);
 
-	if (changed) {
 		vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
 		vsi->back->flags |= I40E_FLAG_FILTER_SYNC;
 	}
 
-add_filter_out:
+	/* If we're asked to add a filter that has been marked for removal, it
+	 * is safe to simply restore it to active state. __i40e_del_filter
+	 * will have simply deleted any filters which were previously marked
+	 * NEW or FAILED, so if it is currently marked REMOVE it must have
+	 * previously been ACTIVE. Since we haven't yet run the sync filters
+	 * task, just restore this filter to the ACTIVE state so that the
+	 * sync task leaves it in place
+	 */
+	if (f->state == I40E_FILTER_REMOVE)
+		f->state = I40E_FILTER_ACTIVE;
+
 	return f;
 }
 
 /**
- * i40e_del_filter - Remove a mac/vlan filter from the VSI
+ * __i40e_del_filter - Remove a specific filter from the VSI
+ * @vsi: VSI to remove from
+ * @f: the filter to remove from the list
+ *
+ * This function should be called instead of i40e_del_filter only if you know
+ * the exact filter you will remove already, such as via i40e_find_filter or
+ * i40e_find_mac.
+ *
+ * NOTE: This function is expected to be called with mac_filter_hash_lock
+ * being held.
+ * ANOTHER NOTE: This function MUST be called from within the context of
+ * the "safe" variants of any list iterators, e.g. list_for_each_entry_safe()
+ * instead of list_for_each_entry().
+ **/
+static void __i40e_del_filter(struct i40e_vsi *vsi, struct i40e_mac_filter *f)
+{
+	if (!f)
+		return;
+
+	if ((f->state == I40E_FILTER_FAILED) ||
+	    (f->state == I40E_FILTER_NEW)) {
+		/* this one never got added by the FW. Just remove it,
+		 * no need to sync anything.
+		 */
+		hash_del(&f->hlist);
+		kfree(f);
+	} else {
+		f->state = I40E_FILTER_REMOVE;
+		vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
+		vsi->back->flags |= I40E_FLAG_FILTER_SYNC;
+	}
+}
+
+/**
+ * i40e_del_filter - Remove a MAC/VLAN filter from the VSI
  * @vsi: the VSI to be searched
  * @macaddr: the MAC address
- * @vlan: the vlan
- * @is_vf: make sure it's a VF filter, else doesn't matter
- * @is_netdev: make sure it's a netdev filter, else doesn't matter
+ * @vlan: the VLAN
  *
- * NOTE: This function is expected to be called with mac_filter_list_lock
+ * NOTE: This function is expected to be called with mac_filter_hash_lock
  * being held.
  * ANOTHER NOTE: This function MUST be called from within the context of
  * the "safe" variants of any list iterators, e.g. list_for_each_entry_safe()
  * instead of list_for_each_entry().
  **/
-void i40e_del_filter(struct i40e_vsi *vsi,
-		     u8 *macaddr, s16 vlan,
-		     bool is_vf, bool is_netdev)
+void i40e_del_filter(struct i40e_vsi *vsi, const u8 *macaddr, s16 vlan)
 {
 	struct i40e_mac_filter *f;
 
 	if (!vsi || !macaddr)
 		return;
 
-	f = i40e_find_filter(vsi, macaddr, vlan, is_vf, is_netdev);
-	if (!f || f->counter == 0)
-		return;
+	f = i40e_find_filter(vsi, macaddr, vlan);
+	__i40e_del_filter(vsi, f);
+}
 
-	if (is_vf) {
-		if (f->is_vf) {
-			f->is_vf = false;
-			f->counter--;
-		}
-	} else if (is_netdev) {
-		if (f->is_netdev) {
-			f->is_netdev = false;
-			f->counter--;
-		}
-	} else {
-		/* make sure we don't remove a filter in use by VF or netdev */
-		int min_f = 0;
+/**
+ * i40e_put_mac_in_vlan - Make macvlan filters from macaddrs and vlans
+ * @vsi: the VSI to be searched
+ * @macaddr: the mac address to be filtered
+ *
+ * Goes through all the macvlan filters and adds a macvlan filter for each
+ * unique vlan that already exists. If a PVID has been assigned, instead only
+ * add the macaddr to that VLAN.
+ *
+ * Returns last filter added on success, else NULL
+ **/
+struct i40e_mac_filter *i40e_put_mac_in_vlan(struct i40e_vsi *vsi,
+					     const u8 *macaddr)
+{
+	struct i40e_mac_filter *f, *add = NULL;
+	struct hlist_node *h;
+	int bkt;
 
-		min_f += (f->is_vf ? 1 : 0);
-		min_f += (f->is_netdev ? 1 : 0);
+	if (vsi->info.pvid)
+		return i40e_add_filter(vsi, macaddr,
+				       le16_to_cpu(vsi->info.pvid));
 
-		if (f->counter > min_f)
-			f->counter--;
+	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
+		if (f->state == I40E_FILTER_REMOVE)
+			continue;
+		add = i40e_add_filter(vsi, macaddr, f->vlan);
+		if (!add)
+			return NULL;
 	}
 
-	/* counter == 0 tells sync_filters_subtask to
-	 * remove the filter from the firmware's list
-	 */
-	if (f->counter == 0) {
-		if ((f->state == I40E_FILTER_FAILED) ||
-		    (f->state == I40E_FILTER_NEW)) {
-			/* this one never got added by the FW. Just remove it,
-			 * no need to sync anything.
-			 */
-			list_del(&f->list);
-			kfree(f);
-		} else {
-			f->state = I40E_FILTER_REMOVE;
-			vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
-			vsi->back->flags |= I40E_FLAG_FILTER_SYNC;
+	return add;
+}
+
+/**
+ * i40e_del_mac_all_vlan - Remove a MAC filter from all VLANS
+ * @vsi: the VSI to be searched
+ * @macaddr: the mac address to be removed
+ *
+ * Removes a given MAC address from a VSI, regardless of VLAN
+ *
+ * Returns 0 for success, or error
+ **/
+int i40e_del_mac_all_vlan(struct i40e_vsi *vsi, const u8 *macaddr)
+{
+	struct i40e_mac_filter *f;
+	struct hlist_node *h;
+	bool found = false;
+	int bkt;
+
+	WARN(!spin_is_locked(&vsi->mac_filter_hash_lock),
+	     "Missing mac_filter_hash_lock\n");
+	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
+		if (ether_addr_equal(macaddr, f->macaddr)) {
+			__i40e_del_filter(vsi, f);
+			found = true;
 		}
 	}
+
+	if (found)
+		return 0;
+	else
+		return -ENOENT;
 }
 
 /**
@@ -1499,10 +1584,10 @@ static int i40e_set_mac(struct net_device *netdev, void *p)
 	else
 		netdev_info(netdev, "set new mac address %pM\n", addr->sa_data);
 
-	spin_lock_bh(&vsi->mac_filter_list_lock);
-	i40e_del_mac_all_vlan(vsi, netdev->dev_addr, false, true);
-	i40e_put_mac_in_vlan(vsi, addr->sa_data, false, true);
-	spin_unlock_bh(&vsi->mac_filter_list_lock);
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
+	i40e_del_mac_all_vlan(vsi, netdev->dev_addr);
+	i40e_put_mac_in_vlan(vsi, addr->sa_data);
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
 	ether_addr_copy(netdev->dev_addr, addr->sa_data);
 	if (vsi->type == I40E_VSI_MAIN) {
 		i40e_status ret;
@@ -1666,6 +1751,52 @@ static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
 }
 
 /**
+ * i40e_addr_sync - Callback for dev_(mc|uc)_sync to add address
+ * @netdev: the netdevice
+ * @addr: address to add
+ *
+ * Called by __dev_(mc|uc)_sync when an address needs to be added. We call
+ * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock.
+ */
+static int i40e_addr_sync(struct net_device *netdev, const u8 *addr)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+	struct i40e_mac_filter *f;
+
+	if (i40e_is_vsi_in_vlan(vsi))
+		f = i40e_put_mac_in_vlan(vsi, addr);
+	else
+		f = i40e_add_filter(vsi, addr, I40E_VLAN_ANY);
+
+	if (f)
+		return 0;
+	else
+		return -ENOMEM;
+}
+
+/**
+ * i40e_addr_unsync - Callback for dev_(mc|uc)_sync to remove address
+ * @netdev: the netdevice
+ * @addr: address to add
+ *
+ * Called by __dev_(mc|uc)_sync when an address needs to be removed. We call
+ * __dev_(uc|mc)_sync from .set_rx_mode and guarantee to hold the hash lock.
+ */
+static int i40e_addr_unsync(struct net_device *netdev, const u8 *addr)
+{
+	struct i40e_netdev_priv *np = netdev_priv(netdev);
+	struct i40e_vsi *vsi = np->vsi;
+
+	if (i40e_is_vsi_in_vlan(vsi))
+		i40e_del_mac_all_vlan(vsi, addr);
+	else
+		i40e_del_filter(vsi, addr, I40E_VLAN_ANY);
+
+	return 0;
+}
+
+/**
  * i40e_set_rx_mode - NDO callback to set the netdev filters
  * @netdev: network interface device structure
  **/
@@ -1676,62 +1807,14 @@ static void i40e_set_rx_mode(struct net_device *netdev)
 #endif
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
-	struct i40e_mac_filter *f, *ftmp;
 	struct i40e_vsi *vsi = np->vsi;
-	struct netdev_hw_addr *uca;
-	struct netdev_hw_addr *mca;
-	struct netdev_hw_addr *ha;
-
-	spin_lock_bh(&vsi->mac_filter_list_lock);
-
-	/* add addr if not already in the filter list */
-	netdev_for_each_uc_addr(uca, netdev) {
-		if (!i40e_find_mac(vsi, uca->addr, false, true)) {
-			if (i40e_is_vsi_in_vlan(vsi))
-				i40e_put_mac_in_vlan(vsi, uca->addr,
-						     false, true);
-			else
-				i40e_add_filter(vsi, uca->addr, I40E_VLAN_ANY,
-						false, true);
-		}
-	}
 
-	netdev_for_each_mc_addr(mca, netdev) {
-		if (!i40e_find_mac(vsi, mca->addr, false, true)) {
-			if (i40e_is_vsi_in_vlan(vsi))
-				i40e_put_mac_in_vlan(vsi, mca->addr,
-						     false, true);
-			else
-				i40e_add_filter(vsi, mca->addr, I40E_VLAN_ANY,
-						false, true);
-		}
-	}
-
-	/* remove filter if not in netdev list */
-	list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
-
-		if (!f->is_netdev)
-			continue;
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
 
-		netdev_for_each_mc_addr(mca, netdev)
-			if (ether_addr_equal(mca->addr, f->macaddr))
-				goto bottom_of_search_loop;
+	__dev_uc_sync(netdev, i40e_addr_sync, i40e_addr_unsync);
+	__dev_mc_sync(netdev, i40e_addr_sync, i40e_addr_unsync);
 
-		netdev_for_each_uc_addr(uca, netdev)
-			if (ether_addr_equal(uca->addr, f->macaddr))
-				goto bottom_of_search_loop;
-
-		for_each_dev_addr(netdev, ha)
-			if (ether_addr_equal(ha->addr, f->macaddr))
-				goto bottom_of_search_loop;
-
-		/* f->macaddr wasn't found in uc, mc, or ha list so delete it */
-		i40e_del_filter(vsi, f->macaddr, I40E_VLAN_ANY, false, true);
-
-bottom_of_search_loop:
-		continue;
-	}
-	spin_unlock_bh(&vsi->mac_filter_list_lock);
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
 
 	/* check for other flag changes */
 	if (vsi->current_netdev_flags != vsi->netdev->flags) {
@@ -1746,21 +1829,26 @@ bottom_of_search_loop:
 }
 
 /**
- * i40e_undo_del_filter_entries - Undo the changes made to MAC filter entries
- * @vsi: pointer to vsi struct
+ * i40e_undo_filter_entries - Undo the changes made to MAC filter entries
+ * @vsi: Pointer to VSI struct
  * @from: Pointer to list which contains MAC filter entries - changes to
  *        those entries needs to be undone.
  *
- * MAC filter entries from list were slated to be removed from device.
+ * MAC filter entries from list were slated to be sent to firmware, either for
+ * addition or deletion.
  **/
-static void i40e_undo_del_filter_entries(struct i40e_vsi *vsi,
-					 struct list_head *from)
+static void i40e_undo_filter_entries(struct i40e_vsi *vsi,
+				     struct hlist_head *from)
 {
-	struct i40e_mac_filter *f, *ftmp;
+	struct i40e_mac_filter *f;
+	struct hlist_node *h;
+
+	hlist_for_each_entry_safe(f, h, from, hlist) {
+		u64 key = i40e_addr_to_hkey(f->macaddr);
 
-	list_for_each_entry_safe(f, ftmp, from, list) {
 		/* Move the element back into MAC filter list*/
-		list_move_tail(&f->list, &vsi->mac_filter_list);
+		hlist_del(&f->hlist);
+		hash_add(vsi->mac_filter_hash, &f->hlist, key);
 	}
 }
 
@@ -1770,7 +1858,6 @@ static void i40e_undo_del_filter_entries(struct i40e_vsi *vsi,
  * @count: Number of filters added
  * @add_list: return data from fw
  * @head: pointer to first filter in current batch
- * @aq_err: status from fw
  *
  * MAC filter entries from list were slated to be added to device. Returns
  * number of successful filters. Note that 0 does NOT mean success!
@@ -1778,45 +1865,146 @@ static void i40e_undo_del_filter_entries(struct i40e_vsi *vsi,
 static int
 i40e_update_filter_state(int count,
 			 struct i40e_aqc_add_macvlan_element_data *add_list,
-			 struct i40e_mac_filter *add_head, int aq_err)
+			 struct i40e_mac_filter *add_head)
 {
 	int retval = 0;
 	int i;
 
-
-	if (!aq_err) {
-		retval = count;
-		/* Everything's good, mark all filters active. */
-		for (i = 0; i < count ; i++) {
-			add_head->state = I40E_FILTER_ACTIVE;
-			add_head = list_next_entry(add_head, list);
-		}
-	} else if (aq_err == I40E_AQ_RC_ENOSPC) {
-		/* Device ran out of filter space. Check the return value
-		 * for each filter to see which ones are active.
+	for (i = 0; i < count; i++) {
+		/* Always check status of each filter. We don't need to check
+		 * the firmware return status because we pre-set the filter
+		 * status to I40E_AQC_MM_ERR_NO_RES when sending the filter
+		 * request to the adminq. Thus, if it no longer matches then
+		 * we know the filter is active.
 		 */
-		for (i = 0; i < count ; i++) {
-			if (add_list[i].match_method ==
-			    I40E_AQC_MM_ERR_NO_RES) {
-				add_head->state = I40E_FILTER_FAILED;
-			} else {
-				add_head->state = I40E_FILTER_ACTIVE;
-				retval++;
-			}
-			add_head = list_next_entry(add_head, list);
-		}
-	} else {
-		/* Some other horrible thing happened, fail all filters */
-		retval = 0;
-		for (i = 0; i < count ; i++) {
+		if (add_list[i].match_method == I40E_AQC_MM_ERR_NO_RES) {
 			add_head->state = I40E_FILTER_FAILED;
-			add_head = list_next_entry(add_head, list);
+		} else {
+			add_head->state = I40E_FILTER_ACTIVE;
+			retval++;
 		}
+
+		add_head = hlist_entry(add_head->hlist.next,
+				       typeof(struct i40e_mac_filter),
+				       hlist);
 	}
+
 	return retval;
 }
 
 /**
+ * i40e_aqc_del_filters - Request firmware to delete a set of filters
+ * @vsi: ptr to the VSI
+ * @vsi_name: name to display in messages
+ * @list: the list of filters to send to firmware
+ * @num_del: the number of filters to delete
+ * @retval: Set to -EIO on failure to delete
+ *
+ * Send a request to firmware via AdminQ to delete a set of filters. Uses
+ * *retval instead of a return value so that success does not force ret_val to
+ * be set to 0. This ensures that a sequence of calls to this function
+ * preserve the previous value of *retval on successful delete.
+ */
+static
+void i40e_aqc_del_filters(struct i40e_vsi *vsi, const char *vsi_name,
+			  struct i40e_aqc_remove_macvlan_element_data *list,
+			  int num_del, int *retval)
+{
+	struct i40e_hw *hw = &vsi->back->hw;
+	i40e_status aq_ret;
+	int aq_err;
+
+	aq_ret = i40e_aq_remove_macvlan(hw, vsi->seid, list, num_del, NULL);
+	aq_err = hw->aq.asq_last_status;
+
+	/* Explicitly ignore and do not report when firmware returns ENOENT */
+	if (aq_ret && !(aq_err == I40E_AQ_RC_ENOENT)) {
+		*retval = -EIO;
+		dev_info(&vsi->back->pdev->dev,
+			 "ignoring delete macvlan error on %s, err %s, aq_err %s\n",
+			 vsi_name, i40e_stat_str(hw, aq_ret),
+			 i40e_aq_str(hw, aq_err));
+	}
+}
+
+/**
+ * i40e_aqc_add_filters - Request firmware to add a set of filters
+ * @vsi: ptr to the VSI
+ * @vsi_name: name to display in messages
+ * @list: the list of filters to send to firmware
+ * @add_head: Position in the add hlist
+ * @num_add: the number of filters to add
+ * @promisc_change: set to true on exit if promiscuous mode was forced on
+ *
+ * Send a request to firmware via AdminQ to add a chunk of filters. Will set
+ * promisc_changed to true if the firmware has run out of space for more
+ * filters.
+ */
+static
+void i40e_aqc_add_filters(struct i40e_vsi *vsi, const char *vsi_name,
+			  struct i40e_aqc_add_macvlan_element_data *list,
+			  struct i40e_mac_filter *add_head,
+			  int num_add, bool *promisc_changed)
+{
+	struct i40e_hw *hw = &vsi->back->hw;
+	int aq_err, fcnt;
+
+	i40e_aq_add_macvlan(hw, vsi->seid, list, num_add, NULL);
+	aq_err = hw->aq.asq_last_status;
+	fcnt = i40e_update_filter_state(num_add, list, add_head);
+
+	if (fcnt != num_add) {
+		*promisc_changed = true;
+		set_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state);
+		dev_warn(&vsi->back->pdev->dev,
+			 "Error %s adding RX filters on %s, promiscuous mode forced on\n",
+			 i40e_aq_str(hw, aq_err),
+			 vsi_name);
+	}
+}
+
+/**
+ * i40e_aqc_broadcast_filter - Set promiscuous broadcast flags
+ * @vsi: pointer to the VSI
+ * @f: filter data
+ *
+ * This function sets or clears the promiscuous broadcast flags for VLAN
+ * filters in order to properly receive broadcast frames. Assumes that only
+ * broadcast filters are passed.
+ **/
+static
+void i40e_aqc_broadcast_filter(struct i40e_vsi *vsi, const char *vsi_name,
+			       struct i40e_mac_filter *f)
+{
+	bool enable = f->state == I40E_FILTER_NEW;
+	struct i40e_hw *hw = &vsi->back->hw;
+	i40e_status aq_ret;
+
+	if (f->vlan == I40E_VLAN_ANY) {
+		aq_ret = i40e_aq_set_vsi_broadcast(hw,
+						   vsi->seid,
+						   enable,
+						   NULL);
+	} else {
+		aq_ret = i40e_aq_set_vsi_bc_promisc_on_vlan(hw,
+							    vsi->seid,
+							    enable,
+							    f->vlan,
+							    NULL);
+	}
+
+	if (aq_ret) {
+		dev_warn(&vsi->back->pdev->dev,
+			 "Error %s setting broadcast promiscuous mode on %s\n",
+			 i40e_aq_str(hw, hw->aq.asq_last_status),
+			 vsi_name);
+		f->state = I40E_FILTER_FAILED;
+	} else if (enable) {
+		f->state = I40E_FILTER_ACTIVE;
+	}
+}
+
+/**
  * i40e_sync_vsi_filters - Update the VSI filter list to the HW
  * @vsi: ptr to the VSI
  *
@@ -1826,22 +2014,24 @@ i40e_update_filter_state(int count,
  **/
 int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 {
-	struct i40e_mac_filter *f, *ftmp, *add_head = NULL;
-	struct list_head tmp_add_list, tmp_del_list;
+	struct hlist_head tmp_add_list, tmp_del_list;
+	struct i40e_mac_filter *f, *add_head = NULL;
 	struct i40e_hw *hw = &vsi->back->hw;
+	unsigned int failed_filters = 0;
+	unsigned int vlan_filters = 0;
 	bool promisc_changed = false;
 	char vsi_name[16] = "PF";
 	int filter_list_len = 0;
-	u32 changed_flags = 0;
 	i40e_status aq_ret = 0;
-	int retval = 0;
+	u32 changed_flags = 0;
+	struct hlist_node *h;
 	struct i40e_pf *pf;
 	int num_add = 0;
 	int num_del = 0;
-	int aq_err = 0;
+	int retval = 0;
 	u16 cmd_flags;
 	int list_size;
-	int fcnt;
+	int bkt;
 
 	/* empty array typed pointers, kcalloc later */
 	struct i40e_aqc_add_macvlan_element_data *add_list;
@@ -1856,8 +2046,8 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 		vsi->current_netdev_flags = vsi->netdev->flags;
 	}
 
-	INIT_LIST_HEAD(&tmp_add_list);
-	INIT_LIST_HEAD(&tmp_del_list);
+	INIT_HLIST_HEAD(&tmp_add_list);
+	INIT_HLIST_HEAD(&tmp_del_list);
 
 	if (vsi->type == I40E_VSI_SRIOV)
 		snprintf(vsi_name, sizeof(vsi_name) - 1, "VF %d", vsi->vf_id);
@@ -1867,43 +2057,64 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 	if (vsi->flags & I40E_VSI_FLAG_FILTER_CHANGED) {
 		vsi->flags &= ~I40E_VSI_FLAG_FILTER_CHANGED;
 
-		spin_lock_bh(&vsi->mac_filter_list_lock);
+		spin_lock_bh(&vsi->mac_filter_hash_lock);
 		/* Create a list of filters to delete. */
-		list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
+		hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
 			if (f->state == I40E_FILTER_REMOVE) {
-				WARN_ON(f->counter != 0);
 				/* Move the element into temporary del_list */
-				list_move_tail(&f->list, &tmp_del_list);
-				vsi->active_filters--;
+				hash_del(&f->hlist);
+				hlist_add_head(&f->hlist, &tmp_del_list);
+
+				/* Avoid counting removed filters */
+				continue;
 			}
 			if (f->state == I40E_FILTER_NEW) {
-				WARN_ON(f->counter == 0);
-				/* Move the element into temporary add_list */
-				list_move_tail(&f->list, &tmp_add_list);
+				hash_del(&f->hlist);
+				hlist_add_head(&f->hlist, &tmp_add_list);
 			}
+
+			/* Count the number of active (current and new) VLAN
+			 * filters we have now. Does not count filters which
+			 * are marked for deletion.
+			 */
+			if (f->vlan > 0)
+				vlan_filters++;
 		}
-		spin_unlock_bh(&vsi->mac_filter_list_lock);
+
+		retval = i40e_correct_mac_vlan_filters(vsi,
+						       &tmp_add_list,
+						       &tmp_del_list,
+						       vlan_filters);
+		if (retval)
+			goto err_no_memory_locked;
+
+		spin_unlock_bh(&vsi->mac_filter_hash_lock);
 	}
 
 	/* Now process 'del_list' outside the lock */
-	if (!list_empty(&tmp_del_list)) {
+	if (!hlist_empty(&tmp_del_list)) {
 		filter_list_len = hw->aq.asq_buf_size /
 			    sizeof(struct i40e_aqc_remove_macvlan_element_data);
 		list_size = filter_list_len *
 			    sizeof(struct i40e_aqc_remove_macvlan_element_data);
 		del_list = kzalloc(list_size, GFP_ATOMIC);
-		if (!del_list) {
-			/* Undo VSI's MAC filter entry element updates */
-			spin_lock_bh(&vsi->mac_filter_list_lock);
-			i40e_undo_del_filter_entries(vsi, &tmp_del_list);
-			spin_unlock_bh(&vsi->mac_filter_list_lock);
-			retval = -ENOMEM;
-			goto out;
-		}
+		if (!del_list)
+			goto err_no_memory;
 
-		list_for_each_entry_safe(f, ftmp, &tmp_del_list, list) {
+		hlist_for_each_entry_safe(f, h, &tmp_del_list, hlist) {
 			cmd_flags = 0;
 
+			/* handle broadcast filters by updating the broadcast
+			 * promiscuous flag instead of deleting a MAC filter.
+			 */
+			if (is_broadcast_ether_addr(f->macaddr)) {
+				i40e_aqc_broadcast_filter(vsi, vsi_name, f);
+
+				hlist_del(&f->hlist);
+				kfree(f);
+				continue;
+			}
+
 			/* add to delete list */
 			ether_addr_copy(del_list[num_del].mac_addr, f->macaddr);
 			if (f->vlan == I40E_VLAN_ANY) {
@@ -1920,73 +2131,57 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 
 			/* flush a full buffer */
 			if (num_del == filter_list_len) {
-				aq_ret = i40e_aq_remove_macvlan(hw, vsi->seid,
-								del_list,
-								num_del, NULL);
-				aq_err = hw->aq.asq_last_status;
-				num_del = 0;
+				i40e_aqc_del_filters(vsi, vsi_name, del_list,
+						     num_del, &retval);
 				memset(del_list, 0, list_size);
-
-				/* Explicitly ignore and do not report when
-				 * firmware returns ENOENT.
-				 */
-				if (aq_ret && !(aq_err == I40E_AQ_RC_ENOENT)) {
-					retval = -EIO;
-					dev_info(&pf->pdev->dev,
-						 "ignoring delete macvlan error on %s, err %s, aq_err %s\n",
-						 vsi_name,
-						 i40e_stat_str(hw, aq_ret),
-						 i40e_aq_str(hw, aq_err));
-				}
+				num_del = 0;
 			}
 			/* Release memory for MAC filter entries which were
 			 * synced up with HW.
 			 */
-			list_del(&f->list);
+			hlist_del(&f->hlist);
 			kfree(f);
 		}
 
 		if (num_del) {
-			aq_ret = i40e_aq_remove_macvlan(hw, vsi->seid, del_list,
-							num_del, NULL);
-			aq_err = hw->aq.asq_last_status;
-			num_del = 0;
-
-			/* Explicitly ignore and do not report when firmware
-			 * returns ENOENT.
-			 */
-			if (aq_ret && !(aq_err == I40E_AQ_RC_ENOENT)) {
-				retval = -EIO;
-				dev_info(&pf->pdev->dev,
-					 "ignoring delete macvlan error on %s, err %s aq_err %s\n",
-					 vsi_name,
-					 i40e_stat_str(hw, aq_ret),
-					 i40e_aq_str(hw, aq_err));
-			}
+			i40e_aqc_del_filters(vsi, vsi_name, del_list,
+					     num_del, &retval);
 		}
 
 		kfree(del_list);
 		del_list = NULL;
 	}
 
-	if (!list_empty(&tmp_add_list)) {
+	if (!hlist_empty(&tmp_add_list)) {
 		/* Do all the adds now. */
 		filter_list_len = hw->aq.asq_buf_size /
 			       sizeof(struct i40e_aqc_add_macvlan_element_data);
 		list_size = filter_list_len *
 			       sizeof(struct i40e_aqc_add_macvlan_element_data);
 		add_list = kzalloc(list_size, GFP_ATOMIC);
-		if (!add_list) {
-			retval = -ENOMEM;
-			goto out;
-		}
+		if (!add_list)
+			goto err_no_memory;
+
 		num_add = 0;
-		list_for_each_entry(f, &tmp_add_list, list) {
+		hlist_for_each_entry_safe(f, h, &tmp_add_list, hlist) {
 			if (test_bit(__I40E_FILTER_OVERFLOW_PROMISC,
 				     &vsi->state)) {
 				f->state = I40E_FILTER_FAILED;
 				continue;
 			}
+
+			/* handle broadcast filters by updating the broadcast
+			 * promiscuous flag instead of adding a MAC filter.
+			 */
+			if (is_broadcast_ether_addr(f->macaddr)) {
+				u64 key = i40e_addr_to_hkey(f->macaddr);
+				i40e_aqc_broadcast_filter(vsi, vsi_name, f);
+
+				hlist_del(&f->hlist);
+				hash_add(vsi->mac_filter_hash, &f->hlist, key);
+				continue;
+			}
+
 			/* add to add array */
 			if (num_add == 0)
 				add_head = f;
@@ -2000,88 +2195,70 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
 					cpu_to_le16((u16)(f->vlan));
 			}
 			add_list[num_add].queue_number = 0;
+			/* set invalid match method for later detection */
+			add_list[num_add].match_method = I40E_AQC_MM_ERR_NO_RES;
 			cmd_flags |= I40E_AQC_MACVLAN_ADD_PERFECT_MATCH;
 			add_list[num_add].flags = cpu_to_le16(cmd_flags);
 			num_add++;
 
 			/* flush a full buffer */
 			if (num_add == filter_list_len) {
-				aq_ret = i40e_aq_add_macvlan(hw, vsi->seid,
-							     add_list, num_add,
-							     NULL);
-				aq_err = hw->aq.asq_last_status;
-				fcnt = i40e_update_filter_state(num_add,
-								add_list,
-								add_head,
-								aq_ret);
-				vsi->active_filters += fcnt;
-
-				if (fcnt != num_add) {
-					promisc_changed = true;
-					set_bit(__I40E_FILTER_OVERFLOW_PROMISC,
-						&vsi->state);
-					vsi->promisc_threshold =
-						(vsi->active_filters * 3) / 4;
-					dev_warn(&pf->pdev->dev,
-						 "Error %s adding RX filters on %s, promiscuous mode forced on\n",
-						 i40e_aq_str(hw, aq_err),
-						 vsi_name);
-				}
+				i40e_aqc_add_filters(vsi, vsi_name, add_list,
+						     add_head, num_add,
+						     &promisc_changed);
 				memset(add_list, 0, list_size);
 				num_add = 0;
 			}
 		}
 		if (num_add) {
-			aq_ret = i40e_aq_add_macvlan(hw, vsi->seid,
-						     add_list, num_add, NULL);
-			aq_err = hw->aq.asq_last_status;
-			fcnt = i40e_update_filter_state(num_add, add_list,
-							add_head, aq_ret);
-			vsi->active_filters += fcnt;
-			if (fcnt != num_add) {
-				promisc_changed = true;
-				set_bit(__I40E_FILTER_OVERFLOW_PROMISC,
-					&vsi->state);
-				vsi->promisc_threshold =
-						(vsi->active_filters * 3) / 4;
-				dev_warn(&pf->pdev->dev,
-					 "Error %s adding RX filters on %s, promiscuous mode forced on\n",
-					 i40e_aq_str(hw, aq_err), vsi_name);
-			}
+			i40e_aqc_add_filters(vsi, vsi_name, add_list, add_head,
+					     num_add, &promisc_changed);
 		}
 		/* Now move all of the filters from the temp add list back to
 		 * the VSI's list.
 		 */
-		spin_lock_bh(&vsi->mac_filter_list_lock);
-		list_for_each_entry_safe(f, ftmp, &tmp_add_list, list) {
-			list_move_tail(&f->list, &vsi->mac_filter_list);
+		spin_lock_bh(&vsi->mac_filter_hash_lock);
+		hlist_for_each_entry_safe(f, h, &tmp_add_list, hlist) {
+			u64 key = i40e_addr_to_hkey(f->macaddr);
+
+			hlist_del(&f->hlist);
+			hash_add(vsi->mac_filter_hash, &f->hlist, key);
 		}
-		spin_unlock_bh(&vsi->mac_filter_list_lock);
+		spin_unlock_bh(&vsi->mac_filter_hash_lock);
 		kfree(add_list);
 		add_list = NULL;
 	}
 
-	/* Check to see if we can drop out of overflow promiscuous mode. */
+	/* Determine the number of active and failed filters. */
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
+	vsi->active_filters = 0;
+	hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) {
+		if (f->state == I40E_FILTER_ACTIVE)
+			vsi->active_filters++;
+		else if (f->state == I40E_FILTER_FAILED)
+			failed_filters++;
+	}
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
+
+	/* If promiscuous mode has changed, we need to calculate a new
+	 * threshold for when we are safe to exit
+	 */
+	if (promisc_changed)
+		vsi->promisc_threshold = (vsi->active_filters * 3) / 4;
+
+	/* Check if we are able to exit overflow promiscuous mode. We can
+	 * safely exit if we didn't just enter, we no longer have any failed
+	 * filters, and we have reduced filters below the threshold value.
+	 */
 	if (test_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state) &&
+	    !promisc_changed && !failed_filters &&
 	    (vsi->active_filters < vsi->promisc_threshold)) {
-		int failed_count = 0;
-		/* See if we have any failed filters. We can't drop out of
-		 * promiscuous until these have all been deleted.
-		 */
-		spin_lock_bh(&vsi->mac_filter_list_lock);
-		list_for_each_entry(f, &vsi->mac_filter_list, list) {
-			if (f->state == I40E_FILTER_FAILED)
-				failed_count++;
-		}
-		spin_unlock_bh(&vsi->mac_filter_list_lock);
-		if (!failed_count) {
-			dev_info(&pf->pdev->dev,
-				 "filter logjam cleared on %s, leaving overflow promiscuous mode\n",
-				 vsi_name);
-			clear_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state);
-			promisc_changed = true;
-			vsi->promisc_threshold = 0;
-		}
+		dev_info(&pf->pdev->dev,
+			 "filter logjam cleared on %s, leaving overflow promiscuous mode\n",
+			 vsi_name);
+		clear_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state);
+		promisc_changed = true;
+		vsi->promisc_threshold = 0;
 	}
 
 	/* if the VF is not trusted do not do promisc */
@@ -2201,6 +2378,18 @@ out:
 
 	clear_bit(__I40E_CONFIG_BUSY, &vsi->state);
 	return retval;
+
+err_no_memory:
+	/* Restore elements on the temporary add and delete lists */
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
+err_no_memory_locked:
+	i40e_undo_filter_entries(vsi, &tmp_del_list);
+	i40e_undo_filter_entries(vsi, &tmp_add_list);
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
+
+	vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
+	clear_bit(__I40E_CONFIG_BUSY, &vsi->state);
+	return -ENOMEM;
 }
 
 /**
@@ -2239,13 +2428,8 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf)
 static int i40e_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
-	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
 	struct i40e_vsi *vsi = np->vsi;
 
-	/* MTU < 68 is an error and causes problems on some kernels */
-	if ((new_mtu < 68) || (max_frame > I40E_MAX_RXBUFFER))
-		return -EINVAL;
-
 	netdev_info(netdev, "changing MTU from %d to %d\n",
 		    netdev->mtu, new_mtu);
 	netdev->mtu = new_mtu;
@@ -2354,88 +2538,54 @@ static void i40e_vlan_rx_register(struct net_device *netdev, u32 features)
 }
 
 /**
- * i40e_vsi_add_vlan - Add vsi membership for given vlan
+ * i40e_add_vlan_all_mac - Add a MAC/VLAN filter for each existing MAC address
  * @vsi: the vsi being configured
  * @vid: vlan id to be added (0 = untagged only , -1 = any)
+ *
+ * This is a helper function for adding a new MAC/VLAN filter with the
+ * specified VLAN for each existing MAC address already in the hash table.
+ * This function does *not* perform any accounting to update filters based on
+ * VLAN mode.
+ *
+ * NOTE: this function expects to be called while under the
+ * mac_filter_hash_lock
  **/
-int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid)
+int i40e_add_vlan_all_mac(struct i40e_vsi *vsi, s16 vid)
 {
-	struct i40e_mac_filter *f, *ftmp, *add_f;
-	bool is_netdev, is_vf;
-
-	is_vf = (vsi->type == I40E_VSI_SRIOV);
-	is_netdev = !!(vsi->netdev);
-
-	/* Locked once because all functions invoked below iterates list*/
-	spin_lock_bh(&vsi->mac_filter_list_lock);
-
-	if (is_netdev) {
-		add_f = i40e_add_filter(vsi, vsi->netdev->dev_addr, vid,
-					is_vf, is_netdev);
-		if (!add_f) {
-			dev_info(&vsi->back->pdev->dev,
-				 "Could not add vlan filter %d for %pM\n",
-				 vid, vsi->netdev->dev_addr);
-			spin_unlock_bh(&vsi->mac_filter_list_lock);
-			return -ENOMEM;
-		}
-	}
+	struct i40e_mac_filter *f, *add_f;
+	struct hlist_node *h;
+	int bkt;
 
-	list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
-		add_f = i40e_add_filter(vsi, f->macaddr, vid, is_vf, is_netdev);
+	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
+		if (f->state == I40E_FILTER_REMOVE)
+			continue;
+		add_f = i40e_add_filter(vsi, f->macaddr, vid);
 		if (!add_f) {
 			dev_info(&vsi->back->pdev->dev,
 				 "Could not add vlan filter %d for %pM\n",
 				 vid, f->macaddr);
-			spin_unlock_bh(&vsi->mac_filter_list_lock);
 			return -ENOMEM;
 		}
 	}
 
-	/* Now if we add a vlan tag, make sure to check if it is the first
-	 * tag (i.e. a "tag" -1 does exist) and if so replace the -1 "tag"
-	 * with 0, so we now accept untagged and specified tagged traffic
-	 * (and not all tags along with untagged)
-	 */
-	if (vid > 0) {
-		if (is_netdev && i40e_find_filter(vsi, vsi->netdev->dev_addr,
-						  I40E_VLAN_ANY,
-						  is_vf, is_netdev)) {
-			i40e_del_filter(vsi, vsi->netdev->dev_addr,
-					I40E_VLAN_ANY, is_vf, is_netdev);
-			add_f = i40e_add_filter(vsi, vsi->netdev->dev_addr, 0,
-						is_vf, is_netdev);
-			if (!add_f) {
-				dev_info(&vsi->back->pdev->dev,
-					 "Could not add filter 0 for %pM\n",
-					 vsi->netdev->dev_addr);
-				spin_unlock_bh(&vsi->mac_filter_list_lock);
-				return -ENOMEM;
-			}
-		}
-	}
+	return 0;
+}
 
-	/* Do not assume that I40E_VLAN_ANY should be reset to VLAN 0 */
-	if (vid > 0 && !vsi->info.pvid) {
-		list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
-			if (!i40e_find_filter(vsi, f->macaddr, I40E_VLAN_ANY,
-					      is_vf, is_netdev))
-				continue;
-			i40e_del_filter(vsi, f->macaddr, I40E_VLAN_ANY,
-					is_vf, is_netdev);
-			add_f = i40e_add_filter(vsi, f->macaddr,
-						0, is_vf, is_netdev);
-			if (!add_f) {
-				dev_info(&vsi->back->pdev->dev,
-					 "Could not add filter 0 for %pM\n",
-					f->macaddr);
-				spin_unlock_bh(&vsi->mac_filter_list_lock);
-				return -ENOMEM;
-			}
-		}
-	}
+/**
+ * i40e_vsi_add_vlan - Add VSI membership for given VLAN
+ * @vsi: the VSI being configured
+ * @vid: VLAN id to be added (0 = untagged only , -1 = any)
+ **/
+int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid)
+{
+	int err;
 
-	spin_unlock_bh(&vsi->mac_filter_list_lock);
+	/* Locked once because all functions invoked below iterates list*/
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
+	err = i40e_add_vlan_all_mac(vsi, vid);
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
+	if (err)
+		return err;
 
 	/* schedule our worker thread which will take care of
 	 * applying the new filter changes
@@ -2445,82 +2595,45 @@ int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid)
 }
 
 /**
- * i40e_vsi_kill_vlan - Remove vsi membership for given vlan
+ * i40e_rm_vlan_all_mac - Remove MAC/VLAN pair for all MAC with the given VLAN
  * @vsi: the vsi being configured
  * @vid: vlan id to be removed (0 = untagged only , -1 = any)
  *
- * Return: 0 on success or negative otherwise
- **/
-int i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid)
+ * This function should be used to remove all VLAN filters which match the
+ * given VID. It does not schedule the service event and does not take the
+ * mac_filter_hash_lock so it may be combined with other operations under
+ * a single invocation of the mac_filter_hash_lock.
+ *
+ * NOTE: this function expects to be called while under the
+ * mac_filter_hash_lock
+ */
+void i40e_rm_vlan_all_mac(struct i40e_vsi *vsi, s16 vid)
 {
-	struct net_device *netdev = vsi->netdev;
-	struct i40e_mac_filter *f, *ftmp, *add_f;
-	bool is_vf, is_netdev;
-	int filter_count = 0;
-
-	is_vf = (vsi->type == I40E_VSI_SRIOV);
-	is_netdev = !!(netdev);
-
-	/* Locked once because all functions invoked below iterates list */
-	spin_lock_bh(&vsi->mac_filter_list_lock);
-
-	if (is_netdev)
-		i40e_del_filter(vsi, netdev->dev_addr, vid, is_vf, is_netdev);
-
-	list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list)
-		i40e_del_filter(vsi, f->macaddr, vid, is_vf, is_netdev);
-
-	/* go through all the filters for this VSI and if there is only
-	 * vid == 0 it means there are no other filters, so vid 0 must
-	 * be replaced with -1. This signifies that we should from now
-	 * on accept any traffic (with any tag present, or untagged)
-	 */
-	list_for_each_entry(f, &vsi->mac_filter_list, list) {
-		if (is_netdev) {
-			if (f->vlan &&
-			    ether_addr_equal(netdev->dev_addr, f->macaddr))
-				filter_count++;
-		}
-
-		if (f->vlan)
-			filter_count++;
-	}
-
-	if (!filter_count && is_netdev) {
-		i40e_del_filter(vsi, netdev->dev_addr, 0, is_vf, is_netdev);
-		f = i40e_add_filter(vsi, netdev->dev_addr, I40E_VLAN_ANY,
-				    is_vf, is_netdev);
-		if (!f) {
-			dev_info(&vsi->back->pdev->dev,
-				 "Could not add filter %d for %pM\n",
-				 I40E_VLAN_ANY, netdev->dev_addr);
-			spin_unlock_bh(&vsi->mac_filter_list_lock);
-			return -ENOMEM;
-		}
-	}
+	struct i40e_mac_filter *f;
+	struct hlist_node *h;
+	int bkt;
 
-	if (!filter_count) {
-		list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
-			i40e_del_filter(vsi, f->macaddr, 0, is_vf, is_netdev);
-			add_f = i40e_add_filter(vsi, f->macaddr, I40E_VLAN_ANY,
-						is_vf, is_netdev);
-			if (!add_f) {
-				dev_info(&vsi->back->pdev->dev,
-					 "Could not add filter %d for %pM\n",
-					 I40E_VLAN_ANY, f->macaddr);
-				spin_unlock_bh(&vsi->mac_filter_list_lock);
-				return -ENOMEM;
-			}
-		}
+	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
+		if (f->vlan == vid)
+			__i40e_del_filter(vsi, f);
 	}
+}
 
-	spin_unlock_bh(&vsi->mac_filter_list_lock);
+/**
+ * i40e_vsi_kill_vlan - Remove VSI membership for given VLAN
+ * @vsi: the VSI being configured
+ * @vid: VLAN id to be removed (0 = untagged only , -1 = any)
+ **/
+void i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid)
+{
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
+	i40e_rm_vlan_all_mac(vsi, vid);
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
 
 	/* schedule our worker thread which will take care of
 	 * applying the new filter changes
 	 */
 	i40e_service_event_schedule(vsi->back);
-	return 0;
 }
 
 /**
@@ -2542,7 +2655,7 @@ static int i40e_vlan_rx_add_vid(struct net_device *netdev,
 	struct i40e_vsi *vsi = np->vsi;
 	int ret = 0;
 
-	if (vid > 4095)
+	if (vid >= VLAN_N_VID)
 		return -EINVAL;
 
 	/* If the network stack called us with vid = 0 then
@@ -2554,7 +2667,7 @@ static int i40e_vlan_rx_add_vid(struct net_device *netdev,
 	if (vid)
 		ret = i40e_vsi_add_vlan(vsi, vid);
 
-	if (!ret && (vid < VLAN_N_VID))
+	if (!ret)
 		set_bit(vid, vsi->active_vlans);
 
 	return ret;
@@ -3322,6 +3435,33 @@ static irqreturn_t i40e_msix_clean_rings(int irq, void *data)
 }
 
 /**
+ * i40e_irq_affinity_notify - Callback for affinity changes
+ * @notify: context as to what irq was changed
+ * @mask: the new affinity mask
+ *
+ * This is a callback function used by the irq_set_affinity_notifier function
+ * so that we may register to receive changes to the irq affinity masks.
+ **/
+static void i40e_irq_affinity_notify(struct irq_affinity_notify *notify,
+				     const cpumask_t *mask)
+{
+	struct i40e_q_vector *q_vector =
+		container_of(notify, struct i40e_q_vector, affinity_notify);
+
+	q_vector->affinity_mask = *mask;
+}
+
+/**
+ * i40e_irq_affinity_release - Callback for affinity notifier release
+ * @ref: internal core kernel usage
+ *
+ * This is a callback function used by the irq_set_affinity_notifier function
+ * to inform the current notification subscriber that they will no longer
+ * receive notifications.
+ **/
+static void i40e_irq_affinity_release(struct kref *ref) {}
+
+/**
  * i40e_vsi_request_irq_msix - Initialize MSI-X interrupts
  * @vsi: the VSI being configured
  * @basename: name for the vector
@@ -3336,10 +3476,13 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename)
 	int rx_int_idx = 0;
 	int tx_int_idx = 0;
 	int vector, err;
+	int irq_num;
 
 	for (vector = 0; vector < q_vectors; vector++) {
 		struct i40e_q_vector *q_vector = vsi->q_vectors[vector];
 
+		irq_num = pf->msix_entries[base + vector].vector;
+
 		if (q_vector->tx.ring && q_vector->rx.ring) {
 			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
 				 "%s-%s-%d", basename, "TxRx", rx_int_idx++);
@@ -3354,7 +3497,7 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename)
 			/* skip this unused q_vector */
 			continue;
 		}
-		err = request_irq(pf->msix_entries[base + vector].vector,
+		err = request_irq(irq_num,
 				  vsi->irq_handler,
 				  0,
 				  q_vector->name,
@@ -3364,9 +3507,13 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename)
 				 "MSIX request_irq failed, error: %d\n", err);
 			goto free_queue_irqs;
 		}
+
+		/* register for affinity change notifications */
+		q_vector->affinity_notify.notify = i40e_irq_affinity_notify;
+		q_vector->affinity_notify.release = i40e_irq_affinity_release;
+		irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify);
 		/* assign the mask for this irq */
-		irq_set_affinity_hint(pf->msix_entries[base + vector].vector,
-				      &q_vector->affinity_mask);
+		irq_set_affinity_hint(irq_num, &q_vector->affinity_mask);
 	}
 
 	vsi->irqs_ready = true;
@@ -3375,10 +3522,10 @@ static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename)
 free_queue_irqs:
 	while (vector) {
 		vector--;
-		irq_set_affinity_hint(pf->msix_entries[base + vector].vector,
-				      NULL);
-		free_irq(pf->msix_entries[base + vector].vector,
-			 &(vsi->q_vectors[vector]));
+		irq_num = pf->msix_entries[base + vector].vector;
+		irq_set_affinity_notifier(irq_num, NULL);
+		irq_set_affinity_hint(irq_num, NULL);
+		free_irq(irq_num, &vsi->q_vectors[vector]);
 	}
 	return err;
 }
@@ -3480,7 +3627,7 @@ static irqreturn_t i40e_intr(int irq, void *data)
 	    (ena_mask & I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK)) {
 		ena_mask &= ~I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK;
 		icr0 &= ~I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK;
-		dev_info(&pf->pdev->dev, "cleared PE_CRITERR\n");
+		dev_dbg(&pf->pdev->dev, "cleared PE_CRITERR\n");
 	}
 
 	/* only q0 is used in MSI/Legacy mode, and none are used in MSIX */
@@ -3973,30 +4120,36 @@ static int i40e_vsi_control_rx(struct i40e_vsi *vsi, bool enable)
 }
 
 /**
- * i40e_vsi_control_rings - Start or stop a VSI's rings
+ * i40e_vsi_start_rings - Start a VSI's rings
  * @vsi: the VSI being configured
- * @enable: start or stop the rings
  **/
-int i40e_vsi_control_rings(struct i40e_vsi *vsi, bool request)
+int i40e_vsi_start_rings(struct i40e_vsi *vsi)
 {
 	int ret = 0;
 
 	/* do rx first for enable and last for disable */
-	if (request) {
-		ret = i40e_vsi_control_rx(vsi, request);
-		if (ret)
-			return ret;
-		ret = i40e_vsi_control_tx(vsi, request);
-	} else {
-		/* Ignore return value, we need to shutdown whatever we can */
-		i40e_vsi_control_tx(vsi, request);
-		i40e_vsi_control_rx(vsi, request);
-	}
+	ret = i40e_vsi_control_rx(vsi, true);
+	if (ret)
+		return ret;
+	ret = i40e_vsi_control_tx(vsi, true);
 
 	return ret;
 }
 
 /**
+ * i40e_vsi_stop_rings - Stop a VSI's rings
+ * @vsi: the VSI being configured
+ **/
+void i40e_vsi_stop_rings(struct i40e_vsi *vsi)
+{
+	/* do rx first for enable and last for disable
+	 * Ignore return value, we need to shutdown whatever we can
+	 */
+	i40e_vsi_control_tx(vsi, false);
+	i40e_vsi_control_rx(vsi, false);
+}
+
+/**
  * i40e_vsi_free_irq - Free the irq association with the OS
  * @vsi: the VSI being configured
  **/
@@ -4017,19 +4170,23 @@ static void i40e_vsi_free_irq(struct i40e_vsi *vsi)
 
 		vsi->irqs_ready = false;
 		for (i = 0; i < vsi->num_q_vectors; i++) {
-			u16 vector = i + base;
+			int irq_num;
+			u16 vector;
+
+			vector = i + base;
+			irq_num = pf->msix_entries[vector].vector;
 
 			/* free only the irqs that were actually requested */
 			if (!vsi->q_vectors[i] ||
 			    !vsi->q_vectors[i]->num_ringpairs)
 				continue;
 
+			/* clear the affinity notifier in the IRQ descriptor */
+			irq_set_affinity_notifier(irq_num, NULL);
 			/* clear the affinity_mask in the IRQ descriptor */
-			irq_set_affinity_hint(pf->msix_entries[vector].vector,
-					      NULL);
-			synchronize_irq(pf->msix_entries[vector].vector);
-			free_irq(pf->msix_entries[vector].vector,
-				 vsi->q_vectors[i]);
+			irq_set_affinity_hint(irq_num, NULL);
+			synchronize_irq(irq_num);
+			free_irq(irq_num, vsi->q_vectors[i]);
 
 			/* Tear down the interrupt queue link list
 			 *
@@ -5116,12 +5273,16 @@ out:
  */
 void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
 {
+	enum i40e_aq_link_speed new_speed;
 	char *speed = "Unknown";
 	char *fc = "Unknown";
 
-	if (vsi->current_isup == isup)
+	new_speed = vsi->back->hw.phy.link_info.link_speed;
+
+	if ((vsi->current_isup == isup) && (vsi->current_speed == new_speed))
 		return;
 	vsi->current_isup = isup;
+	vsi->current_speed = new_speed;
 	if (!isup) {
 		netdev_info(vsi->netdev, "NIC Link is Down\n");
 		return;
@@ -5143,6 +5304,9 @@ void i40e_print_link_message(struct i40e_vsi *vsi, bool isup)
 	case I40E_LINK_SPEED_20GB:
 		speed = "20 G";
 		break;
+	case I40E_LINK_SPEED_25GB:
+		speed = "25 G";
+		break;
 	case I40E_LINK_SPEED_10GB:
 		speed = "10 G";
 		break;
@@ -5190,7 +5354,7 @@ static int i40e_up_complete(struct i40e_vsi *vsi)
 		i40e_configure_msi_and_legacy(vsi);
 
 	/* start rings */
-	err = i40e_vsi_control_rings(vsi, true);
+	err = i40e_vsi_start_rings(vsi);
 	if (err)
 		return err;
 
@@ -5287,7 +5451,7 @@ void i40e_down(struct i40e_vsi *vsi)
 		netif_tx_disable(vsi->netdev);
 	}
 	i40e_vsi_disable_irq(vsi);
-	i40e_vsi_control_rings(vsi, false);
+	i40e_vsi_stop_rings(vsi);
 	i40e_napi_disable_all(vsi);
 
 	for (i = 0; i < vsi->num_queue_pairs; i++) {
@@ -5833,19 +5997,6 @@ static void i40e_handle_lan_overflow_event(struct i40e_pf *pf,
 }
 
 /**
- * i40e_service_event_complete - Finish up the service event
- * @pf: board private structure
- **/
-static void i40e_service_event_complete(struct i40e_pf *pf)
-{
-	WARN_ON(!test_bit(__I40E_SERVICE_SCHED, &pf->state));
-
-	/* flush memory to make sure state is correct before next watchog */
-	smp_mb__before_atomic();
-	clear_bit(__I40E_SERVICE_SCHED, &pf->state);
-}
-
-/**
  * i40e_get_cur_guaranteed_fd_count - Get the consumed guaranteed FD filters
  * @pf: board private structure
  **/
@@ -6670,7 +6821,6 @@ static int i40e_vsi_clear(struct i40e_vsi *vsi);
 static void i40e_fdir_sb_setup(struct i40e_pf *pf)
 {
 	struct i40e_vsi *vsi;
-	int i;
 
 	/* quick workaround for an NVM issue that leaves a critical register
 	 * uninitialized
@@ -6681,6 +6831,7 @@ static void i40e_fdir_sb_setup(struct i40e_pf *pf)
 			0xeacb7d61, 0xaa4f05b6, 0x9c5c89ed, 0xfc425ddb,
 			0xa4654832, 0xfc7461d4, 0x8f827619, 0xf5c63c21,
 			0x95b3a76d};
+		int i;
 
 		for (i = 0; i <= I40E_GLQF_HKEY_MAX_INDEX; i++)
 			wr32(&pf->hw, I40E_GLQF_HKEY(i), hkey[i]);
@@ -6690,13 +6841,7 @@ static void i40e_fdir_sb_setup(struct i40e_pf *pf)
 		return;
 
 	/* find existing VSI and see if it needs configuring */
-	vsi = NULL;
-	for (i = 0; i < pf->num_alloc_vsi; i++) {
-		if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR) {
-			vsi = pf->vsi[i];
-			break;
-		}
-	}
+	vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR);
 
 	/* create a new VSI if none exists */
 	if (!vsi) {
@@ -6718,15 +6863,12 @@ static void i40e_fdir_sb_setup(struct i40e_pf *pf)
  **/
 static void i40e_fdir_teardown(struct i40e_pf *pf)
 {
-	int i;
+	struct i40e_vsi *vsi;
 
 	i40e_fdir_filter_exit(pf);
-	for (i = 0; i < pf->num_alloc_vsi; i++) {
-		if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR) {
-			i40e_vsi_release(pf->vsi[i]);
-			break;
-		}
-	}
+	vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR);
+	if (vsi)
+		i40e_vsi_release(vsi);
 }
 
 /**
@@ -7163,10 +7305,12 @@ static void i40e_service_task(struct work_struct *work)
 
 	/* don't bother with service tasks if a reset is in progress */
 	if (test_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state)) {
-		i40e_service_event_complete(pf);
 		return;
 	}
 
+	if (test_and_set_bit(__I40E_SERVICE_SCHED, &pf->state))
+		return;
+
 	i40e_detect_recover_hung(pf);
 	i40e_sync_filters_subtask(pf);
 	i40e_reset_subtask(pf);
@@ -7179,7 +7323,9 @@ static void i40e_service_task(struct work_struct *work)
 	i40e_sync_udp_filters_subtask(pf);
 	i40e_clean_adminq_subtask(pf);
 
-	i40e_service_event_complete(pf);
+	/* flush memory to make sure state is correct before next watchdog */
+	smp_mb__before_atomic();
+	clear_bit(__I40E_SERVICE_SCHED, &pf->state);
 
 	/* If the tasks have taken longer than one timer cycle or there
 	 * is more work to be done, reschedule the service task now
@@ -7354,7 +7500,7 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type)
 				pf->rss_table_size : 64;
 	vsi->netdev_registered = false;
 	vsi->work_limit = I40E_DEFAULT_IRQ_WORK;
-	INIT_LIST_HEAD(&vsi->mac_filter_list);
+	hash_init(vsi->mac_filter_hash);
 	vsi->irqs_ready = false;
 
 	ret = i40e_set_num_rings_in_vsi(vsi);
@@ -7369,7 +7515,7 @@ static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type)
 	i40e_vsi_setup_irqhandler(vsi, i40e_msix_clean_rings);
 
 	/* Initialize VSI lock */
-	spin_lock_init(&vsi->mac_filter_list_lock);
+	spin_lock_init(&vsi->mac_filter_hash_lock);
 	pf->vsi[vsi_idx] = vsi;
 	ret = vsi_idx;
 	goto unlock_pf;
@@ -8345,8 +8491,8 @@ int i40e_reconfig_rss_queues(struct i40e_pf *pf, int queue_count)
 
 		i40e_pf_config_rss(pf);
 	}
-	dev_info(&pf->pdev->dev, "RSS count/HW max RSS count:  %d/%d\n",
-		 pf->alloc_rss_size, pf->rss_size_max);
+	dev_info(&pf->pdev->dev, "User requested queue count/HW max RSS count:  %d/%d\n",
+		 vsi->req_queue_pairs, pf->rss_size_max);
 	return pf->alloc_rss_size;
 }
 
@@ -8489,15 +8635,6 @@ static int i40e_sw_init(struct i40e_pf *pf)
 	int err = 0;
 	int size;
 
-	pf->msg_enable = netif_msg_init(I40E_DEFAULT_MSG_ENABLE,
-				(NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK));
-	if (debug != -1 && debug != I40E_DEFAULT_MSG_ENABLE) {
-		if (I40E_DEBUG_USER & debug)
-			pf->hw.debug_mask = debug;
-		pf->msg_enable = netif_msg_init((debug & ~I40E_DEBUG_USER),
-						I40E_DEFAULT_MSG_ENABLE);
-	}
-
 	/* Set default capability flags */
 	pf->flags = I40E_FLAG_RX_CSUM_ENABLED |
 		    I40E_FLAG_MSI_ENABLED     |
@@ -8605,7 +8742,8 @@ static int i40e_sw_init(struct i40e_pf *pf)
 			     I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE |
 			     I40E_FLAG_NO_PCI_LINK_CHECK |
 			     I40E_FLAG_USE_SET_LLDP_MIB |
-			     I40E_FLAG_GENEVE_OFFLOAD_CAPABLE;
+			     I40E_FLAG_GENEVE_OFFLOAD_CAPABLE |
+			     I40E_FLAG_PTP_L4_CAPABLE;
 	} else if ((pf->hw.aq.api_maj_ver > 1) ||
 		   ((pf->hw.aq.api_maj_ver == 1) &&
 		    (pf->hw.aq.api_min_ver > 4))) {
@@ -9037,10 +9175,6 @@ static int i40e_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
 				       0, 0, nlflags, filter_mask, NULL);
 }
 
-/* Hardware supports L4 tunnel length of 128B (=2^7) which includes
- * inner mac plus all inner ethertypes.
- */
-#define I40E_MAX_TUNNEL_HDR_LEN 128
 /**
  * i40e_features_check - Validate encapsulated packet conforms to limits
  * @skb: skb buff
@@ -9051,12 +9185,52 @@ static netdev_features_t i40e_features_check(struct sk_buff *skb,
 					     struct net_device *dev,
 					     netdev_features_t features)
 {
-	if (skb->encapsulation &&
-	    ((skb_inner_network_header(skb) - skb_transport_header(skb)) >
-	     I40E_MAX_TUNNEL_HDR_LEN))
-		return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+	size_t len;
+
+	/* No point in doing any of this if neither checksum nor GSO are
+	 * being requested for this frame.  We can rule out both by just
+	 * checking for CHECKSUM_PARTIAL
+	 */
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return features;
+
+	/* We cannot support GSO if the MSS is going to be less than
+	 * 64 bytes.  If it is then we need to drop support for GSO.
+	 */
+	if (skb_is_gso(skb) && (skb_shinfo(skb)->gso_size < 64))
+		features &= ~NETIF_F_GSO_MASK;
+
+	/* MACLEN can support at most 63 words */
+	len = skb_network_header(skb) - skb->data;
+	if (len & ~(63 * 2))
+		goto out_err;
+
+	/* IPLEN and EIPLEN can support at most 127 dwords */
+	len = skb_transport_header(skb) - skb_network_header(skb);
+	if (len & ~(127 * 4))
+		goto out_err;
+
+	if (skb->encapsulation) {
+		/* L4TUNLEN can support 127 words */
+		len = skb_inner_network_header(skb) - skb_transport_header(skb);
+		if (len & ~(127 * 2))
+			goto out_err;
+
+		/* IPLEN can support at most 127 dwords */
+		len = skb_inner_transport_header(skb) -
+		      skb_inner_network_header(skb);
+		if (len & ~(127 * 4))
+			goto out_err;
+	}
+
+	/* No need to validate L4LEN as TCP is the only protocol with a
+	 * a flexible value and we support all possible values supported
+	 * by TCP, which is at most 15 dwords
+	 */
 
 	return features;
+out_err:
+	return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
 }
 
 static const struct net_device_ops i40e_netdev_ops = {
@@ -9109,6 +9283,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
 	struct i40e_hw *hw = &pf->hw;
 	struct i40e_netdev_priv *np;
 	struct net_device *netdev;
+	u8 broadcast[ETH_ALEN];
 	u8 mac_addr[ETH_ALEN];
 	int etherdev_size;
 
@@ -9169,20 +9344,38 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
 		 * which must be replaced by a normal filter.
 		 */
 		i40e_rm_default_mac_filter(vsi, mac_addr);
-		spin_lock_bh(&vsi->mac_filter_list_lock);
-		i40e_add_filter(vsi, mac_addr, I40E_VLAN_ANY, false, true);
-		spin_unlock_bh(&vsi->mac_filter_list_lock);
+		spin_lock_bh(&vsi->mac_filter_hash_lock);
+		i40e_add_filter(vsi, mac_addr, I40E_VLAN_ANY);
+		spin_unlock_bh(&vsi->mac_filter_hash_lock);
 	} else {
 		/* relate the VSI_VMDQ name to the VSI_MAIN name */
 		snprintf(netdev->name, IFNAMSIZ, "%sv%%d",
 			 pf->vsi[pf->lan_vsi]->netdev->name);
 		random_ether_addr(mac_addr);
 
-		spin_lock_bh(&vsi->mac_filter_list_lock);
-		i40e_add_filter(vsi, mac_addr, I40E_VLAN_ANY, false, false);
-		spin_unlock_bh(&vsi->mac_filter_list_lock);
+		spin_lock_bh(&vsi->mac_filter_hash_lock);
+		i40e_add_filter(vsi, mac_addr, I40E_VLAN_ANY);
+		spin_unlock_bh(&vsi->mac_filter_hash_lock);
 	}
 
+	/* Add the broadcast filter so that we initially will receive
+	 * broadcast packets. Note that when a new VLAN is first added the
+	 * driver will convert all filters marked I40E_VLAN_ANY into VLAN
+	 * specific filters as part of transitioning into "vlan" operation.
+	 * When more VLANs are added, the driver will copy each existing MAC
+	 * filter and add it for the new VLAN.
+	 *
+	 * Broadcast filters are handled specially by
+	 * i40e_sync_filters_subtask, as the driver must to set the broadcast
+	 * promiscuous bit instead of adding this directly as a MAC/VLAN
+	 * filter. The subtask will update the correct broadcast promiscuous
+	 * bits as VLANs become active or inactive.
+	 */
+	eth_broadcast_addr(broadcast);
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
+	i40e_add_filter(vsi, broadcast, I40E_VLAN_ANY);
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
+
 	ether_addr_copy(netdev->dev_addr, mac_addr);
 	ether_addr_copy(netdev->perm_addr, mac_addr);
 
@@ -9198,6 +9391,11 @@ static int i40e_config_netdev(struct i40e_vsi *vsi)
 	i40e_fcoe_config_netdev(netdev, vsi);
 #endif
 
+	/* MTU range: 68 - 9706 */
+	netdev->min_mtu = ETH_MIN_MTU;
+	netdev->max_mtu = I40E_MAX_RXBUFFER -
+			  (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN);
+
 	return 0;
 }
 
@@ -9260,11 +9458,12 @@ int i40e_is_vsi_uplink_mode_veb(struct i40e_vsi *vsi)
 static int i40e_add_vsi(struct i40e_vsi *vsi)
 {
 	int ret = -ENODEV;
-	i40e_status aq_ret = 0;
 	struct i40e_pf *pf = vsi->back;
 	struct i40e_hw *hw = &pf->hw;
 	struct i40e_vsi_context ctxt;
-	struct i40e_mac_filter *f, *ftmp;
+	struct i40e_mac_filter *f;
+	struct hlist_node *h;
+	int bkt;
 
 	u8 enabled_tc = 0x1; /* TC0 enabled */
 	int f_count = 0;
@@ -9448,28 +9647,16 @@ static int i40e_add_vsi(struct i40e_vsi *vsi)
 		vsi->seid = ctxt.seid;
 		vsi->id = ctxt.vsi_number;
 	}
-	/* Except FDIR VSI, for all othet VSI set the broadcast filter */
-	if (vsi->type != I40E_VSI_FDIR) {
-		aq_ret = i40e_aq_set_vsi_broadcast(hw, vsi->seid, true, NULL);
-		if (aq_ret) {
-			ret = i40e_aq_rc_to_posix(aq_ret,
-						  hw->aq.asq_last_status);
-			dev_info(&pf->pdev->dev,
-				 "set brdcast promisc failed, err %s, aq_err %s\n",
-				 i40e_stat_str(hw, aq_ret),
-				 i40e_aq_str(hw, hw->aq.asq_last_status));
-		}
-	}
 
 	vsi->active_filters = 0;
 	clear_bit(__I40E_FILTER_OVERFLOW_PROMISC, &vsi->state);
-	spin_lock_bh(&vsi->mac_filter_list_lock);
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
 	/* If macvlan filters already exist, force them to get loaded */
-	list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
+	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist) {
 		f->state = I40E_FILTER_NEW;
 		f_count++;
 	}
-	spin_unlock_bh(&vsi->mac_filter_list_lock);
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
 
 	if (f_count) {
 		vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
@@ -9499,11 +9686,12 @@ err:
  **/
 int i40e_vsi_release(struct i40e_vsi *vsi)
 {
-	struct i40e_mac_filter *f, *ftmp;
+	struct i40e_mac_filter *f;
+	struct hlist_node *h;
 	struct i40e_veb *veb = NULL;
 	struct i40e_pf *pf;
 	u16 uplink_seid;
-	int i, n;
+	int i, n, bkt;
 
 	pf = vsi->back;
 
@@ -9533,11 +9721,19 @@ int i40e_vsi_release(struct i40e_vsi *vsi)
 		i40e_vsi_disable_irq(vsi);
 	}
 
-	spin_lock_bh(&vsi->mac_filter_list_lock);
-	list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list)
-		i40e_del_filter(vsi, f->macaddr, f->vlan,
-				f->is_vf, f->is_netdev);
-	spin_unlock_bh(&vsi->mac_filter_list_lock);
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
+
+	/* clear the sync flag on all filters */
+	if (vsi->netdev) {
+		__dev_uc_unsync(vsi->netdev, NULL);
+		__dev_mc_unsync(vsi->netdev, NULL);
+	}
+
+	/* make sure any remaining filters are marked for deletion */
+	hash_for_each_safe(vsi->mac_filter_hash, bkt, h, f, hlist)
+		__i40e_del_filter(vsi, f);
+
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
 
 	i40e_sync_vsi_filters(vsi);
 
@@ -10806,10 +11002,12 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	mutex_init(&hw->aq.asq_mutex);
 	mutex_init(&hw->aq.arq_mutex);
 
-	if (debug != -1) {
-		pf->msg_enable = pf->hw.debug_mask;
-		pf->msg_enable = debug;
-	}
+	pf->msg_enable = netif_msg_init(debug,
+					NETIF_MSG_DRV |
+					NETIF_MSG_PROBE |
+					NETIF_MSG_LINK);
+	if (debug < -1)
+		pf->hw.debug_mask = debug;
 
 	/* do a special CORER for clearing PXE mode once at init */
 	if (hw->revision_id == 0 &&
@@ -10951,7 +11149,7 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	err = i40e_init_pf_dcb(pf);
 	if (err) {
 		dev_info(&pdev->dev, "DCB init failed %d, disabled\n", err);
-		pf->flags &= ~(I40E_FLAG_DCB_CAPABLE & I40E_FLAG_DCB_ENABLED);
+		pf->flags &= ~(I40E_FLAG_DCB_CAPABLE | I40E_FLAG_DCB_ENABLED);
 		/* Continue without DCB enabled */
 	}
 #endif /* CONFIG_I40E_DCB */
@@ -11209,7 +11407,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		dev_dbg(&pf->pdev->dev, "get supported phy types ret =  %s last_status =  %s\n",
 			i40e_stat_str(&pf->hw, err),
 			i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
-	pf->hw.phy.phy_types = le32_to_cpu(abilities.phy_type);
 
 	/* Add a filter to drop all Flow control frames from any VSI from being
 	 * transmitted. By doing so we stop a malicious VF from sending out
@@ -11221,9 +11418,10 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 						       pf->main_vsi_seid);
 
 	if ((pf->hw.device_id == I40E_DEV_ID_10G_BASE_T) ||
-	    (pf->hw.device_id == I40E_DEV_ID_10G_BASE_T4))
-		pf->flags |= I40E_FLAG_HAVE_10GBASET_PHY;
-
+		(pf->hw.device_id == I40E_DEV_ID_10G_BASE_T4))
+		pf->flags |= I40E_FLAG_PHY_CONTROLS_LEDS;
+	if (pf->hw.device_id == I40E_DEV_ID_SFP_I_X722)
+		pf->flags |= I40E_FLAG_HAVE_CRT_RETIMER;
 	/* print a string summarizing features */
 	i40e_print_features(pf);
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
index 954efe3118db..38ee18f11124 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
@@ -722,9 +722,20 @@ i40e_status i40e_nvmupd_command(struct i40e_hw *hw,
 			*((u16 *)&bytes[2]) = hw->nvm_wait_opcode;
 		}
 
+		/* Clear error status on read */
+		if (hw->nvmupd_state == I40E_NVMUPD_STATE_ERROR)
+			hw->nvmupd_state = I40E_NVMUPD_STATE_INIT;
+
 		return 0;
 	}
 
+	/* Clear status even it is not read and log */
+	if (hw->nvmupd_state == I40E_NVMUPD_STATE_ERROR) {
+		i40e_debug(hw, I40E_DEBUG_NVM,
+			   "Clearing I40E_NVMUPD_STATE_ERROR state without reading\n");
+		hw->nvmupd_state = I40E_NVMUPD_STATE_INIT;
+	}
+
 	switch (hw->nvmupd_state) {
 	case I40E_NVMUPD_STATE_INIT:
 		status = i40e_nvmupd_state_init(hw, cmd, bytes, perrno);
@@ -1074,6 +1085,11 @@ void i40e_nvmupd_check_wait_event(struct i40e_hw *hw, u16 opcode)
 		}
 		hw->nvm_wait_opcode = 0;
 
+		if (hw->aq.arq_last_status) {
+			hw->nvmupd_state = I40E_NVMUPD_STATE_ERROR;
+			return;
+		}
+
 		switch (hw->nvmupd_state) {
 		case I40E_NVMUPD_STATE_INIT_WAIT:
 			hw->nvmupd_state = I40E_NVMUPD_STATE_INIT;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index 4660c5abc855..2551fc827444 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -144,6 +144,9 @@ enum i40e_status_code i40e_aq_set_vsi_uc_promisc_on_vlan(struct i40e_hw *hw,
 							 u16 seid, bool enable,
 							 u16 vid,
 				struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_set_vsi_bc_promisc_on_vlan(struct i40e_hw *hw,
+				u16 seid, bool enable, u16 vid,
+				struct i40e_asq_cmd_details *cmd_details);
 i40e_status i40e_aq_set_vsi_vlan_promisc(struct i40e_hw *hw,
 				u16 seid, bool enable,
 				struct i40e_asq_cmd_details *cmd_details);
@@ -362,10 +365,18 @@ i40e_status i40e_aq_rx_ctl_write_register(struct i40e_hw *hw,
 				u32 reg_addr, u32 reg_val,
 				struct i40e_asq_cmd_details *cmd_details);
 void i40e_write_rx_ctl(struct i40e_hw *hw, u32 reg_addr, u32 reg_val);
-i40e_status i40e_read_phy_register(struct i40e_hw *hw, u8 page,
-				   u16 reg, u8 phy_addr, u16 *value);
-i40e_status i40e_write_phy_register(struct i40e_hw *hw, u8 page,
-				    u16 reg, u8 phy_addr, u16 value);
+i40e_status i40e_read_phy_register_clause22(struct i40e_hw *hw,
+					    u16 reg, u8 phy_addr, u16 *value);
+i40e_status i40e_write_phy_register_clause22(struct i40e_hw *hw,
+					     u16 reg, u8 phy_addr, u16 value);
+i40e_status i40e_read_phy_register_clause45(struct i40e_hw *hw,
+				u8 page, u16 reg, u8 phy_addr, u16 *value);
+i40e_status i40e_write_phy_register_clause45(struct i40e_hw *hw,
+				u8 page, u16 reg, u8 phy_addr, u16 value);
+i40e_status i40e_read_phy_register(struct i40e_hw *hw, u8 page, u16 reg,
+				   u8 phy_addr, u16 *value);
+i40e_status i40e_write_phy_register(struct i40e_hw *hw, u8 page, u16 reg,
+				    u8 phy_addr, u16 value);
 u8 i40e_get_phy_address(struct i40e_hw *hw, u8 dev_num);
 i40e_status i40e_blink_phy_link_led(struct i40e_hw *hw,
 				    u32 time, u32 interval);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
index f1feceab758a..9e49ffafce28 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c
@@ -159,16 +159,15 @@ static int i40e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
 {
 	struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps);
 	struct timespec64 now, then;
-	unsigned long flags;
 
 	then = ns_to_timespec64(delta);
-	spin_lock_irqsave(&pf->tmreg_lock, flags);
+	mutex_lock(&pf->tmreg_lock);
 
 	i40e_ptp_read(pf, &now);
 	now = timespec64_add(now, then);
 	i40e_ptp_write(pf, (const struct timespec64 *)&now);
 
-	spin_unlock_irqrestore(&pf->tmreg_lock, flags);
+	mutex_unlock(&pf->tmreg_lock);
 
 	return 0;
 }
@@ -184,11 +183,10 @@ static int i40e_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
 static int i40e_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
 {
 	struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps);
-	unsigned long flags;
 
-	spin_lock_irqsave(&pf->tmreg_lock, flags);
+	mutex_lock(&pf->tmreg_lock);
 	i40e_ptp_read(pf, ts);
-	spin_unlock_irqrestore(&pf->tmreg_lock, flags);
+	mutex_unlock(&pf->tmreg_lock);
 
 	return 0;
 }
@@ -205,11 +203,10 @@ static int i40e_ptp_settime(struct ptp_clock_info *ptp,
 			    const struct timespec64 *ts)
 {
 	struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps);
-	unsigned long flags;
 
-	spin_lock_irqsave(&pf->tmreg_lock, flags);
+	mutex_lock(&pf->tmreg_lock);
 	i40e_ptp_write(pf, ts);
-	spin_unlock_irqrestore(&pf->tmreg_lock, flags);
+	mutex_unlock(&pf->tmreg_lock);
 
 	return 0;
 }
@@ -230,6 +227,47 @@ static int i40e_ptp_feature_enable(struct ptp_clock_info *ptp,
 }
 
 /**
+ * i40e_ptp_update_latch_events - Read I40E_PRTTSYN_STAT_1 and latch events
+ * @pf: the PF data structure
+ *
+ * This function reads I40E_PRTTSYN_STAT_1 and updates the corresponding timers
+ * for noticed latch events. This allows the driver to keep track of the first
+ * time a latch event was noticed which will be used to help clear out Rx
+ * timestamps for packets that got dropped or lost.
+ *
+ * This function will return the current value of I40E_PRTTSYN_STAT_1 and is
+ * expected to be called only while under the ptp_rx_lock.
+ **/
+static u32 i40e_ptp_get_rx_events(struct i40e_pf *pf)
+{
+	struct i40e_hw *hw = &pf->hw;
+	u32 prttsyn_stat, new_latch_events;
+	int  i;
+
+	prttsyn_stat = rd32(hw, I40E_PRTTSYN_STAT_1);
+	new_latch_events = prttsyn_stat & ~pf->latch_event_flags;
+
+	/* Update the jiffies time for any newly latched timestamp. This
+	 * ensures that we store the time that we first discovered a timestamp
+	 * was latched by the hardware. The service task will later determine
+	 * if we should free the latch and drop that timestamp should too much
+	 * time pass. This flow ensures that we only update jiffies for new
+	 * events latched since the last time we checked, and not all events
+	 * currently latched, so that the service task accounting remains
+	 * accurate.
+	 */
+	for (i = 0; i < 4; i++) {
+		if (new_latch_events & BIT(i))
+			pf->latch_events[i] = jiffies;
+	}
+
+	/* Finally, we store the current status of the Rx timestamp latches */
+	pf->latch_event_flags = prttsyn_stat;
+
+	return prttsyn_stat;
+}
+
+/**
  * i40e_ptp_rx_hang - Detect error case when Rx timestamp registers are hung
  * @vsi: The VSI with the rings relevant to 1588
  *
@@ -242,10 +280,7 @@ void i40e_ptp_rx_hang(struct i40e_vsi *vsi)
 {
 	struct i40e_pf *pf = vsi->back;
 	struct i40e_hw *hw = &pf->hw;
-	struct i40e_ring *rx_ring;
-	unsigned long rx_event;
-	u32 prttsyn_stat;
-	int n;
+	int i;
 
 	/* Since we cannot turn off the Rx timestamp logic if the device is
 	 * configured for Tx timestamping, we check if Rx timestamping is
@@ -255,42 +290,30 @@ void i40e_ptp_rx_hang(struct i40e_vsi *vsi)
 	if (!(pf->flags & I40E_FLAG_PTP) || !pf->ptp_rx)
 		return;
 
-	prttsyn_stat = rd32(hw, I40E_PRTTSYN_STAT_1);
+	spin_lock_bh(&pf->ptp_rx_lock);
 
-	/* Unless all four receive timestamp registers are latched, we are not
-	 * concerned about a possible PTP Rx hang, so just update the timeout
-	 * counter and exit.
-	 */
-	if (!(prttsyn_stat & ((I40E_PRTTSYN_STAT_1_RXT0_MASK <<
-			       I40E_PRTTSYN_STAT_1_RXT0_SHIFT) |
-			      (I40E_PRTTSYN_STAT_1_RXT1_MASK <<
-			       I40E_PRTTSYN_STAT_1_RXT1_SHIFT) |
-			      (I40E_PRTTSYN_STAT_1_RXT2_MASK <<
-			       I40E_PRTTSYN_STAT_1_RXT2_SHIFT) |
-			      (I40E_PRTTSYN_STAT_1_RXT3_MASK <<
-			       I40E_PRTTSYN_STAT_1_RXT3_SHIFT)))) {
-		pf->last_rx_ptp_check = jiffies;
-		return;
-	}
+	/* Update current latch times for Rx events */
+	i40e_ptp_get_rx_events(pf);
 
-	/* Determine the most recent watchdog or rx_timestamp event. */
-	rx_event = pf->last_rx_ptp_check;
-	for (n = 0; n < vsi->num_queue_pairs; n++) {
-		rx_ring = vsi->rx_rings[n];
-		if (time_after(rx_ring->last_rx_timestamp, rx_event))
-			rx_event = rx_ring->last_rx_timestamp;
+	/* Check all the currently latched Rx events and see whether they have
+	 * been latched for over a second. It is assumed that any timestamp
+	 * should have been cleared within this time, or else it was captured
+	 * for a dropped frame that the driver never received. Thus, we will
+	 * clear any timestamp that has been latched for over 1 second.
+	 */
+	for (i = 0; i < 4; i++) {
+		if ((pf->latch_event_flags & BIT(i)) &&
+		    time_is_before_jiffies(pf->latch_events[i] + HZ)) {
+			rd32(hw, I40E_PRTTSYN_RXTIME_H(i));
+			pf->latch_event_flags &= ~BIT(i);
+			pf->rx_hwtstamp_cleared++;
+			dev_warn(&pf->pdev->dev,
+				 "Clearing a missed Rx timestamp event for RXTIME[%d]\n",
+				 i);
+		}
 	}
 
-	/* Only need to read the high RXSTMP register to clear the lock */
-	if (time_is_before_jiffies(rx_event + 5 * HZ)) {
-		rd32(hw, I40E_PRTTSYN_RXTIME_H(0));
-		rd32(hw, I40E_PRTTSYN_RXTIME_H(1));
-		rd32(hw, I40E_PRTTSYN_RXTIME_H(2));
-		rd32(hw, I40E_PRTTSYN_RXTIME_H(3));
-		pf->last_rx_ptp_check = jiffies;
-		pf->rx_hwtstamp_cleared++;
-		WARN_ONCE(1, "Detected Rx timestamp register hang\n");
-	}
+	spin_unlock_bh(&pf->ptp_rx_lock);
 }
 
 /**
@@ -353,14 +376,25 @@ void i40e_ptp_rx_hwtstamp(struct i40e_pf *pf, struct sk_buff *skb, u8 index)
 
 	hw = &pf->hw;
 
-	prttsyn_stat = rd32(hw, I40E_PRTTSYN_STAT_1);
+	spin_lock_bh(&pf->ptp_rx_lock);
 
-	if (!(prttsyn_stat & BIT(index)))
+	/* Get current Rx events and update latch times */
+	prttsyn_stat = i40e_ptp_get_rx_events(pf);
+
+	/* TODO: Should we warn about missing Rx timestamp event? */
+	if (!(prttsyn_stat & BIT(index))) {
+		spin_unlock_bh(&pf->ptp_rx_lock);
 		return;
+	}
+
+	/* Clear the latched event since we're about to read its register */
+	pf->latch_event_flags &= ~BIT(index);
 
 	lo = rd32(hw, I40E_PRTTSYN_RXTIME_L(index));
 	hi = rd32(hw, I40E_PRTTSYN_RXTIME_H(index));
 
+	spin_unlock_bh(&pf->ptp_rx_lock);
+
 	ns = (((u64)hi) << 32) | lo;
 
 	i40e_ptp_convert_to_hwtstamp(skb_hwtstamps(skb), ns);
@@ -487,6 +521,8 @@ static int i40e_ptp_set_timestamp_mode(struct i40e_pf *pf,
 	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
 	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
 	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+		if (!(pf->flags & I40E_FLAG_PTP_L4_CAPABLE))
+			return -ERANGE;
 		pf->ptp_rx = true;
 		tsyntype = I40E_PRTTSYN_CTL1_V1MESSTYPE0_MASK |
 			   I40E_PRTTSYN_CTL1_TSYNTYPE_V1 |
@@ -494,19 +530,26 @@ static int i40e_ptp_set_timestamp_mode(struct i40e_pf *pf,
 		config->rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT;
 		break;
 	case HWTSTAMP_FILTER_PTP_V2_EVENT:
-	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
 	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
 	case HWTSTAMP_FILTER_PTP_V2_SYNC:
-	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
 	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
 	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
-	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
 	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+		if (!(pf->flags & I40E_FLAG_PTP_L4_CAPABLE))
+			return -ERANGE;
+		/* fall through */
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
 		pf->ptp_rx = true;
 		tsyntype = I40E_PRTTSYN_CTL1_V2MESSTYPE0_MASK |
-			   I40E_PRTTSYN_CTL1_TSYNTYPE_V2 |
-			   I40E_PRTTSYN_CTL1_UDP_ENA_MASK;
-		config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+			   I40E_PRTTSYN_CTL1_TSYNTYPE_V2;
+		if (pf->flags & I40E_FLAG_PTP_L4_CAPABLE) {
+			tsyntype |= I40E_PRTTSYN_CTL1_UDP_ENA_MASK;
+			config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+		} else {
+			config->rx_filter = HWTSTAMP_FILTER_PTP_V2_L2_EVENT;
+		}
 		break;
 	case HWTSTAMP_FILTER_ALL:
 	default:
@@ -514,12 +557,15 @@ static int i40e_ptp_set_timestamp_mode(struct i40e_pf *pf,
 	}
 
 	/* Clear out all 1588-related registers to clear and unlatch them. */
+	spin_lock_bh(&pf->ptp_rx_lock);
 	rd32(hw, I40E_PRTTSYN_STAT_0);
 	rd32(hw, I40E_PRTTSYN_TXTIME_H);
 	rd32(hw, I40E_PRTTSYN_RXTIME_H(0));
 	rd32(hw, I40E_PRTTSYN_RXTIME_H(1));
 	rd32(hw, I40E_PRTTSYN_RXTIME_H(2));
 	rd32(hw, I40E_PRTTSYN_RXTIME_H(3));
+	pf->latch_event_flags = 0;
+	spin_unlock_bh(&pf->ptp_rx_lock);
 
 	/* Enable/disable the Tx timestamp interrupt based on user input. */
 	regval = rd32(hw, I40E_PRTTSYN_CTL0);
@@ -658,10 +704,8 @@ void i40e_ptp_init(struct i40e_pf *pf)
 		return;
 	}
 
-	/* we have to initialize the lock first, since we can't control
-	 * when the user will enter the PHC device entry points
-	 */
-	spin_lock_init(&pf->tmreg_lock);
+	mutex_init(&pf->tmreg_lock);
+	spin_lock_init(&pf->ptp_rx_lock);
 
 	/* ensure we have a clock device */
 	err = i40e_ptp_create_clock(pf);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 6287bf63c43c..352cf7cd2ef4 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -122,14 +122,10 @@ static int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data,
 	struct device *dev;
 	dma_addr_t dma;
 	u32 td_cmd = 0;
-	u16 delay = 0;
 	u16 i;
 
 	/* find existing FDIR VSI */
-	vsi = NULL;
-	for (i = 0; i < pf->num_alloc_vsi; i++)
-		if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR)
-			vsi = pf->vsi[i];
+	vsi = i40e_find_vsi_by_type(pf, I40E_VSI_FDIR);
 	if (!vsi)
 		return -ENOENT;
 
@@ -137,15 +133,11 @@ static int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data,
 	dev = tx_ring->dev;
 
 	/* we need two descriptors to add/del a filter and we can wait */
-	do {
-		if (I40E_DESC_UNUSED(tx_ring) > 1)
-			break;
+	for (i = I40E_FD_CLEAN_DELAY; I40E_DESC_UNUSED(tx_ring) < 2; i--) {
+		if (!i)
+			return -EAGAIN;
 		msleep_interruptible(1);
-		delay++;
-	} while (delay < I40E_FD_CLEAN_DELAY);
-
-	if (!(I40E_DESC_UNUSED(tx_ring) > 1))
-		return -EAGAIN;
+	}
 
 	dma = dma_map_single(dev, raw_packet,
 			     I40E_FDIR_MAX_RAW_PACKET_SIZE, DMA_TO_DEVICE);
@@ -335,22 +327,6 @@ static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
 	return err ? -EOPNOTSUPP : 0;
 }
 
-/**
- * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for
- * a specific flow spec
- * @vsi: pointer to the targeted VSI
- * @fd_data: the flow director data required for the FDir descriptor
- * @add: true adds a filter, false removes it
- *
- * Returns 0 if the filters were successfully added or removed
- **/
-static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi,
-				    struct i40e_fdir_filter *fd_data,
-				    bool add)
-{
-	return -EOPNOTSUPP;
-}
-
 #define I40E_IP_DUMMY_PACKET_LEN 34
 /**
  * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for
@@ -433,12 +409,6 @@ int i40e_add_del_fdir(struct i40e_vsi *vsi,
 	case UDP_V4_FLOW:
 		ret = i40e_add_del_fdir_udpv4(vsi, input, add);
 		break;
-	case SCTP_V4_FLOW:
-		ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
-		break;
-	case IPV4_FLOW:
-		ret = i40e_add_del_fdir_ipv4(vsi, input, add);
-		break;
 	case IP_USER_FLOW:
 		switch (input->ip4_proto) {
 		case IPPROTO_TCP:
@@ -447,15 +417,16 @@ int i40e_add_del_fdir(struct i40e_vsi *vsi,
 		case IPPROTO_UDP:
 			ret = i40e_add_del_fdir_udpv4(vsi, input, add);
 			break;
-		case IPPROTO_SCTP:
-			ret = i40e_add_del_fdir_sctpv4(vsi, input, add);
-			break;
-		default:
+		case IPPROTO_IP:
 			ret = i40e_add_del_fdir_ipv4(vsi, input, add);
 			break;
+		default:
+			/* We cannot support masking based on protocol */
+			goto unsupported_flow;
 		}
 		break;
 	default:
+unsupported_flow:
 		dev_info(&pf->pdev->dev, "Could not specify spec type %d\n",
 			 input->flow_type);
 		ret = -EINVAL;
@@ -645,7 +616,7 @@ u32 i40e_get_tx_pending(struct i40e_ring *ring, bool in_sw)
 	return 0;
 }
 
-#define WB_STRIDE 0x3
+#define WB_STRIDE 4
 
 /**
  * i40e_clean_tx_irq - Reclaim resources after transmit completes
@@ -761,7 +732,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
 		unsigned int j = i40e_get_tx_pending(tx_ring, false);
 
 		if (budget &&
-		    ((j / (WB_STRIDE + 1)) == 0) && (j != 0) &&
+		    ((j / WB_STRIDE) == 0) && (j > 0) &&
 		    !test_bit(__I40E_DOWN, &vsi->state) &&
 		    (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
 			tx_ring->arm_wb = true;
@@ -1246,7 +1217,6 @@ bool i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count)
 		 * because each write-back erases this info.
 		 */
 		rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
-		rx_desc->read.hdr_addr = 0;
 
 		rx_desc++;
 		bi++;
@@ -1437,13 +1407,12 @@ void i40e_process_skb_fields(struct i40e_ring *rx_ring,
 	u64 qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
 	u32 rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
 			I40E_RXD_QW1_STATUS_SHIFT;
-	u32 rsyn = (rx_status & I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
+	u32 tsynvalid = rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK;
+	u32 tsyn = (rx_status & I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
 		   I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT;
 
-	if (unlikely(rsyn)) {
-		i40e_ptp_rx_hwtstamp(rx_ring->vsi->back, skb, rsyn);
-		rx_ring->last_rx_timestamp = jiffies;
-	}
+	if (unlikely(tsynvalid))
+		i40e_ptp_rx_hwtstamp(rx_ring->vsi->back, skb, tsyn);
 
 	i40e_rx_hash(rx_ring, rx_desc, skb, rx_ptype);
 
@@ -1767,7 +1736,6 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 	while (likely(total_rx_packets < budget)) {
 		union i40e_rx_desc *rx_desc;
 		struct sk_buff *skb;
-		u32 rx_status;
 		u16 vlan_tag;
 		u8 rx_ptype;
 		u64 qword;
@@ -1781,21 +1749,13 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 
 		rx_desc = I40E_RX_DESC(rx_ring, rx_ring->next_to_clean);
 
-		qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
-		rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
-			   I40E_RXD_QW1_PTYPE_SHIFT;
-		rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
-			    I40E_RXD_QW1_STATUS_SHIFT;
-
-		if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
-			break;
-
 		/* status_error_len will always be zero for unused descriptors
 		 * because it's cleared in cleanup, and overlaps with hdr_addr
 		 * which is always zero because packet split isn't used, if the
 		 * hardware wrote DD then it will be non-zero
 		 */
-		if (!rx_desc->wb.qword1.status_error_len)
+		if (!i40e_test_staterr(rx_desc,
+				       BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
 			break;
 
 		/* This memory barrier is needed to keep us from reading
@@ -1829,6 +1789,10 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 		/* probably a little skewed due to removing CRC */
 		total_rx_bytes += skb->len;
 
+		qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
+		rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
+			   I40E_RXD_QW1_PTYPE_SHIFT;
+
 		/* populate checksum, VLAN, and protocol */
 		i40e_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
 
@@ -2025,12 +1989,25 @@ int i40e_napi_poll(struct napi_struct *napi, int budget)
 
 	/* If work not completed, return budget and polling will return */
 	if (!clean_complete) {
+		const cpumask_t *aff_mask = &q_vector->affinity_mask;
+		int cpu_id = smp_processor_id();
+
+		/* It is possible that the interrupt affinity has changed but,
+		 * if the cpu is pegged at 100%, polling will never exit while
+		 * traffic continues and the interrupt will be stuck on this
+		 * cpu.  We check to make sure affinity is correct before we
+		 * continue to poll, otherwise we must stop polling so the
+		 * interrupt can move to the correct cpu.
+		 */
+		if (likely(cpumask_test_cpu(cpu_id, aff_mask) ||
+			   !(vsi->back->flags & I40E_FLAG_MSIX_ENABLED))) {
 tx_only:
-		if (arm_wb) {
-			q_vector->tx.ring[0].tx_stats.tx_force_wb++;
-			i40e_enable_wb_on_itr(vsi, q_vector);
+			if (arm_wb) {
+				q_vector->tx.ring[0].tx_stats.tx_force_wb++;
+				i40e_enable_wb_on_itr(vsi, q_vector);
+			}
+			return budget;
 		}
-		return budget;
 	}
 
 	if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR)
@@ -2038,12 +2015,19 @@ tx_only:
 
 	/* Work is done so exit the polling mode and re-enable the interrupt */
 	napi_complete_done(napi, work_done);
-	if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
-		i40e_update_enable_itr(vsi, q_vector);
-	} else { /* Legacy mode */
+
+	/* If we're prematurely stopping polling to fix the interrupt
+	 * affinity we want to make sure polling starts back up so we
+	 * issue a call to i40e_force_wb which triggers a SW interrupt.
+	 */
+	if (!clean_complete)
+		i40e_force_wb(vsi, q_vector);
+	else if (!(vsi->back->flags & I40E_FLAG_MSIX_ENABLED))
 		i40e_irq_dynamic_enable_icr0(vsi->back, false);
-	}
-	return 0;
+	else
+		i40e_update_enable_itr(vsi, q_vector);
+
+	return min(work_done, budget - 1);
 }
 
 /**
@@ -2716,9 +2700,7 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
 	u32 td_tag = 0;
 	dma_addr_t dma;
 	u16 gso_segs;
-	u16 desc_count = 0;
-	bool tail_bump = true;
-	bool do_rs = false;
+	u16 desc_count = 1;
 
 	if (tx_flags & I40E_TX_FLAGS_HW_VLAN) {
 		td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
@@ -2801,8 +2783,7 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
 		tx_bi = &tx_ring->tx_bi[i];
 	}
 
-	/* set next_to_watch value indicating a packet is present */
-	first->next_to_watch = tx_desc;
+	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
 
 	i++;
 	if (i == tx_ring->count)
@@ -2810,66 +2791,72 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
 
 	tx_ring->next_to_use = i;
 
-	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
 	i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
 
+	/* write last descriptor with EOP bit */
+	td_cmd |= I40E_TX_DESC_CMD_EOP;
+
+	/* We can OR these values together as they both are checked against
+	 * 4 below and at this point desc_count will be used as a boolean value
+	 * after this if/else block.
+	 */
+	desc_count |= ++tx_ring->packet_stride;
+
 	/* Algorithm to optimize tail and RS bit setting:
-	 * if xmit_more is supported
-	 *	if xmit_more is true
-	 *		do not update tail and do not mark RS bit.
-	 *	if xmit_more is false and last xmit_more was false
-	 *		if every packet spanned less than 4 desc
-	 *			then set RS bit on 4th packet and update tail
-	 *			on every packet
-	 *		else
-	 *			update tail and set RS bit on every packet.
-	 *	if xmit_more is false and last_xmit_more was true
-	 *		update tail and set RS bit.
+	 * if queue is stopped
+	 *	mark RS bit
+	 *	reset packet counter
+	 * else if xmit_more is supported and is true
+	 *	advance packet counter to 4
+	 *	reset desc_count to 0
 	 *
-	 * Optimization: wmb to be issued only in case of tail update.
-	 * Also optimize the Descriptor WB path for RS bit with the same
-	 * algorithm.
+	 * if desc_count >= 4
+	 *	mark RS bit
+	 *	reset packet counter
+	 * if desc_count > 0
+	 *	update tail
 	 *
-	 * Note: If there are less than 4 packets
+	 * Note: If there are less than 4 descriptors
 	 * pending and interrupts were disabled the service task will
 	 * trigger a force WB.
 	 */
-	if (skb->xmit_more  &&
-	    !netif_xmit_stopped(txring_txq(tx_ring))) {
-		tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
-		tail_bump = false;
-	} else if (!skb->xmit_more &&
-		   !netif_xmit_stopped(txring_txq(tx_ring)) &&
-		   (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) &&
-		   (tx_ring->packet_stride < WB_STRIDE) &&
-		   (desc_count < WB_STRIDE)) {
-		tx_ring->packet_stride++;
-	} else {
+	if (netif_xmit_stopped(txring_txq(tx_ring))) {
+		goto do_rs;
+	} else if (skb->xmit_more) {
+		/* set stride to arm on next packet and reset desc_count */
+		tx_ring->packet_stride = WB_STRIDE;
+		desc_count = 0;
+	} else if (desc_count >= WB_STRIDE) {
+do_rs:
+		/* write last descriptor with RS bit set */
+		td_cmd |= I40E_TX_DESC_CMD_RS;
 		tx_ring->packet_stride = 0;
-		tx_ring->flags &= ~I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
-		do_rs = true;
 	}
-	if (do_rs)
-		tx_ring->packet_stride = 0;
 
 	tx_desc->cmd_type_offset_bsz =
-			build_ctob(td_cmd, td_offset, size, td_tag) |
-			cpu_to_le64((u64)(do_rs ? I40E_TXD_CMD :
-						  I40E_TX_DESC_CMD_EOP) <<
-						  I40E_TXD_QW1_CMD_SHIFT);
+			build_ctob(td_cmd, td_offset, size, td_tag);
+
+	/* Force memory writes to complete before letting h/w know there
+	 * are new descriptors to fetch.
+	 *
+	 * We also use this memory barrier to make certain all of the
+	 * status bits have been updated before next_to_watch is written.
+	 */
+	wmb();
+
+	/* set next_to_watch value indicating a packet is present */
+	first->next_to_watch = tx_desc;
 
 	/* notify HW of packet */
-	if (!tail_bump) {
-		prefetchw(tx_desc + 1);
-	} else {
-		/* Force memory writes to complete before letting h/w
-		 * know there are new descriptors to fetch.  (Only
-		 * applicable for weak-ordered memory model archs,
-		 * such as IA-64).
-		 */
-		wmb();
+	if (desc_count) {
 		writel(i, tx_ring->tail);
+
+		/* we need this if more than one processor can write to our tail
+		 * at a time, it synchronizes IO on IA64/Altix systems
+		 */
+		mmiowb();
 	}
+
 	return;
 
 dma_error:
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 508840585645..e065321ce8ed 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -173,26 +173,37 @@ static inline bool i40e_test_staterr(union i40e_rx_desc *rx_desc,
 #define I40E_MAX_DATA_PER_TXD_ALIGNED \
 	(I40E_MAX_DATA_PER_TXD & ~(I40E_MAX_READ_REQ_SIZE - 1))
 
-/* This ugly bit of math is equivalent to DIV_ROUNDUP(size, X) where X is
- * the value I40E_MAX_DATA_PER_TXD_ALIGNED.  It is needed due to the fact
- * that 12K is not a power of 2 and division is expensive.  It is used to
- * approximate the number of descriptors used per linear buffer.  Note
- * that this will overestimate in some cases as it doesn't account for the
- * fact that we will add up to 4K - 1 in aligning the 12K buffer, however
- * the error should not impact things much as large buffers usually mean
- * we will use fewer descriptors then there are frags in an skb.
+/**
+ * i40e_txd_use_count  - estimate the number of descriptors needed for Tx
+ * @size: transmit request size in bytes
+ *
+ * Due to hardware alignment restrictions (4K alignment), we need to
+ * assume that we can have no more than 12K of data per descriptor, even
+ * though each descriptor can take up to 16K - 1 bytes of aligned memory.
+ * Thus, we need to divide by 12K. But division is slow! Instead,
+ * we decompose the operation into shifts and one relatively cheap
+ * multiply operation.
+ *
+ * To divide by 12K, we first divide by 4K, then divide by 3:
+ *     To divide by 4K, shift right by 12 bits
+ *     To divide by 3, multiply by 85, then divide by 256
+ *     (Divide by 256 is done by shifting right by 8 bits)
+ * Finally, we add one to round up. Because 256 isn't an exact multiple of
+ * 3, we'll underestimate near each multiple of 12K. This is actually more
+ * accurate as we have 4K - 1 of wiggle room that we can fit into the last
+ * segment.  For our purposes this is accurate out to 1M which is orders of
+ * magnitude greater than our largest possible GSO size.
+ *
+ * This would then be implemented as:
+ *     return (((size >> 12) * 85) >> 8) + 1;
+ *
+ * Since multiplication and division are commutative, we can reorder
+ * operations into:
+ *     return ((size * 85) >> 20) + 1;
  */
 static inline unsigned int i40e_txd_use_count(unsigned int size)
 {
-	const unsigned int max = I40E_MAX_DATA_PER_TXD_ALIGNED;
-	const unsigned int reciprocal = ((1ull << 32) - 1 + (max / 2)) / max;
-	unsigned int adjust = ~(u32)0;
-
-	/* if we rounded up on the reciprocal pull down the adjustment */
-	if ((max * reciprocal) > adjust)
-		adjust = ~(u32)(reciprocal - 1);
-
-	return (u32)((((u64)size * reciprocal) + adjust) >> 32);
+	return ((size * 85) >> 20) + 1;
 }
 
 /* Tx Descriptors needed, worst case */
@@ -307,15 +318,12 @@ struct i40e_ring {
 	u8 atr_sample_rate;
 	u8 atr_count;
 
-	unsigned long last_rx_timestamp;
-
 	bool ring_active;		/* is ring online or not */
 	bool arm_wb;		/* do something to arm write back */
 	u8 packet_stride;
 
 	u16 flags;
 #define I40E_TXR_FLAGS_WB_ON_ITR	BIT(0)
-#define I40E_TXR_FLAGS_LAST_XMIT_MORE_SET BIT(2)
 
 	/* stats structs */
 	struct i40e_queue_stats	stats;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
index bd5f13bef83c..edc0abdf4783 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
@@ -90,14 +90,23 @@ enum i40e_debug_mask {
 	I40E_DEBUG_ALL			= 0xFFFFFFFF
 };
 
-#define I40E_MDIO_STCODE                0
-#define I40E_MDIO_OPCODE_ADDRESS        0
-#define I40E_MDIO_OPCODE_WRITE          I40E_MASK(1, \
+#define I40E_MDIO_CLAUSE22_STCODE_MASK	I40E_MASK(1, \
+						  I40E_GLGEN_MSCA_STCODE_SHIFT)
+#define I40E_MDIO_CLAUSE22_OPCODE_WRITE_MASK	I40E_MASK(1, \
 						  I40E_GLGEN_MSCA_OPCODE_SHIFT)
-#define I40E_MDIO_OPCODE_READ_INC_ADDR  I40E_MASK(2, \
+#define I40E_MDIO_CLAUSE22_OPCODE_READ_MASK	I40E_MASK(2, \
 						  I40E_GLGEN_MSCA_OPCODE_SHIFT)
-#define I40E_MDIO_OPCODE_READ           I40E_MASK(3, \
+
+#define I40E_MDIO_CLAUSE45_STCODE_MASK	I40E_MASK(0, \
+						  I40E_GLGEN_MSCA_STCODE_SHIFT)
+#define I40E_MDIO_CLAUSE45_OPCODE_ADDRESS_MASK	I40E_MASK(0, \
+						  I40E_GLGEN_MSCA_OPCODE_SHIFT)
+#define I40E_MDIO_CLAUSE45_OPCODE_WRITE_MASK	I40E_MASK(1, \
 						  I40E_GLGEN_MSCA_OPCODE_SHIFT)
+#define I40E_MDIO_CLAUSE45_OPCODE_READ_INC_ADDR_MASK	I40E_MASK(2, \
+						I40E_GLGEN_MSCA_OPCODE_SHIFT)
+#define I40E_MDIO_CLAUSE45_OPCODE_READ_MASK	I40E_MASK(3, \
+						I40E_GLGEN_MSCA_OPCODE_SHIFT)
 
 #define I40E_PHY_COM_REG_PAGE                   0x1E
 #define I40E_PHY_LED_LINK_MODE_MASK             0xF0
@@ -204,47 +213,59 @@ struct i40e_link_status {
 #define I40E_MODULE_TYPE_1000BASE_T	0x08
 };
 
-enum i40e_aq_capabilities_phy_type {
-	I40E_CAP_PHY_TYPE_SGMII		  = BIT(I40E_PHY_TYPE_SGMII),
-	I40E_CAP_PHY_TYPE_1000BASE_KX	  = BIT(I40E_PHY_TYPE_1000BASE_KX),
-	I40E_CAP_PHY_TYPE_10GBASE_KX4	  = BIT(I40E_PHY_TYPE_10GBASE_KX4),
-	I40E_CAP_PHY_TYPE_10GBASE_KR	  = BIT(I40E_PHY_TYPE_10GBASE_KR),
-	I40E_CAP_PHY_TYPE_40GBASE_KR4	  = BIT(I40E_PHY_TYPE_40GBASE_KR4),
-	I40E_CAP_PHY_TYPE_XAUI		  = BIT(I40E_PHY_TYPE_XAUI),
-	I40E_CAP_PHY_TYPE_XFI		  = BIT(I40E_PHY_TYPE_XFI),
-	I40E_CAP_PHY_TYPE_SFI		  = BIT(I40E_PHY_TYPE_SFI),
-	I40E_CAP_PHY_TYPE_XLAUI		  = BIT(I40E_PHY_TYPE_XLAUI),
-	I40E_CAP_PHY_TYPE_XLPPI		  = BIT(I40E_PHY_TYPE_XLPPI),
-	I40E_CAP_PHY_TYPE_40GBASE_CR4_CU  = BIT(I40E_PHY_TYPE_40GBASE_CR4_CU),
-	I40E_CAP_PHY_TYPE_10GBASE_CR1_CU  = BIT(I40E_PHY_TYPE_10GBASE_CR1_CU),
-	I40E_CAP_PHY_TYPE_10GBASE_AOC	  = BIT(I40E_PHY_TYPE_10GBASE_AOC),
-	I40E_CAP_PHY_TYPE_40GBASE_AOC	  = BIT(I40E_PHY_TYPE_40GBASE_AOC),
-	I40E_CAP_PHY_TYPE_100BASE_TX	  = BIT(I40E_PHY_TYPE_100BASE_TX),
-	I40E_CAP_PHY_TYPE_1000BASE_T	  = BIT(I40E_PHY_TYPE_1000BASE_T),
-	I40E_CAP_PHY_TYPE_10GBASE_T	  = BIT(I40E_PHY_TYPE_10GBASE_T),
-	I40E_CAP_PHY_TYPE_10GBASE_SR	  = BIT(I40E_PHY_TYPE_10GBASE_SR),
-	I40E_CAP_PHY_TYPE_10GBASE_LR	  = BIT(I40E_PHY_TYPE_10GBASE_LR),
-	I40E_CAP_PHY_TYPE_10GBASE_SFPP_CU = BIT(I40E_PHY_TYPE_10GBASE_SFPP_CU),
-	I40E_CAP_PHY_TYPE_10GBASE_CR1	  = BIT(I40E_PHY_TYPE_10GBASE_CR1),
-	I40E_CAP_PHY_TYPE_40GBASE_CR4	  = BIT(I40E_PHY_TYPE_40GBASE_CR4),
-	I40E_CAP_PHY_TYPE_40GBASE_SR4	  = BIT(I40E_PHY_TYPE_40GBASE_SR4),
-	I40E_CAP_PHY_TYPE_40GBASE_LR4	  = BIT(I40E_PHY_TYPE_40GBASE_LR4),
-	I40E_CAP_PHY_TYPE_1000BASE_SX	  = BIT(I40E_PHY_TYPE_1000BASE_SX),
-	I40E_CAP_PHY_TYPE_1000BASE_LX	  = BIT(I40E_PHY_TYPE_1000BASE_LX),
-	I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL =
-					 BIT(I40E_PHY_TYPE_1000BASE_T_OPTICAL),
-	I40E_CAP_PHY_TYPE_20GBASE_KR2	  = BIT(I40E_PHY_TYPE_20GBASE_KR2)
-};
-
 struct i40e_phy_info {
 	struct i40e_link_status link_info;
 	struct i40e_link_status link_info_old;
 	bool get_link_info;
 	enum i40e_media_type media_type;
 	/* all the phy types the NVM is capable of */
-	enum i40e_aq_capabilities_phy_type phy_types;
-};
-
+	u64 phy_types;
+};
+
+#define I40E_CAP_PHY_TYPE_SGMII BIT_ULL(I40E_PHY_TYPE_SGMII)
+#define I40E_CAP_PHY_TYPE_1000BASE_KX BIT_ULL(I40E_PHY_TYPE_1000BASE_KX)
+#define I40E_CAP_PHY_TYPE_10GBASE_KX4 BIT_ULL(I40E_PHY_TYPE_10GBASE_KX4)
+#define I40E_CAP_PHY_TYPE_10GBASE_KR BIT_ULL(I40E_PHY_TYPE_10GBASE_KR)
+#define I40E_CAP_PHY_TYPE_40GBASE_KR4 BIT_ULL(I40E_PHY_TYPE_40GBASE_KR4)
+#define I40E_CAP_PHY_TYPE_XAUI BIT_ULL(I40E_PHY_TYPE_XAUI)
+#define I40E_CAP_PHY_TYPE_XFI BIT_ULL(I40E_PHY_TYPE_XFI)
+#define I40E_CAP_PHY_TYPE_SFI BIT_ULL(I40E_PHY_TYPE_SFI)
+#define I40E_CAP_PHY_TYPE_XLAUI BIT_ULL(I40E_PHY_TYPE_XLAUI)
+#define I40E_CAP_PHY_TYPE_XLPPI BIT_ULL(I40E_PHY_TYPE_XLPPI)
+#define I40E_CAP_PHY_TYPE_40GBASE_CR4_CU BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4_CU)
+#define I40E_CAP_PHY_TYPE_10GBASE_CR1_CU BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1_CU)
+#define I40E_CAP_PHY_TYPE_10GBASE_AOC BIT_ULL(I40E_PHY_TYPE_10GBASE_AOC)
+#define I40E_CAP_PHY_TYPE_40GBASE_AOC BIT_ULL(I40E_PHY_TYPE_40GBASE_AOC)
+#define I40E_CAP_PHY_TYPE_100BASE_TX BIT_ULL(I40E_PHY_TYPE_100BASE_TX)
+#define I40E_CAP_PHY_TYPE_1000BASE_T BIT_ULL(I40E_PHY_TYPE_1000BASE_T)
+#define I40E_CAP_PHY_TYPE_10GBASE_T BIT_ULL(I40E_PHY_TYPE_10GBASE_T)
+#define I40E_CAP_PHY_TYPE_10GBASE_SR BIT_ULL(I40E_PHY_TYPE_10GBASE_SR)
+#define I40E_CAP_PHY_TYPE_10GBASE_LR BIT_ULL(I40E_PHY_TYPE_10GBASE_LR)
+#define I40E_CAP_PHY_TYPE_10GBASE_SFPP_CU BIT_ULL(I40E_PHY_TYPE_10GBASE_SFPP_CU)
+#define I40E_CAP_PHY_TYPE_10GBASE_CR1 BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1)
+#define I40E_CAP_PHY_TYPE_40GBASE_CR4 BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4)
+#define I40E_CAP_PHY_TYPE_40GBASE_SR4 BIT_ULL(I40E_PHY_TYPE_40GBASE_SR4)
+#define I40E_CAP_PHY_TYPE_40GBASE_LR4 BIT_ULL(I40E_PHY_TYPE_40GBASE_LR4)
+#define I40E_CAP_PHY_TYPE_1000BASE_SX BIT_ULL(I40E_PHY_TYPE_1000BASE_SX)
+#define I40E_CAP_PHY_TYPE_1000BASE_LX BIT_ULL(I40E_PHY_TYPE_1000BASE_LX)
+#define I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL \
+				BIT_ULL(I40E_PHY_TYPE_1000BASE_T_OPTICAL)
+#define I40E_CAP_PHY_TYPE_20GBASE_KR2 BIT_ULL(I40E_PHY_TYPE_20GBASE_KR2)
+/* Defining the macro I40E_TYPE_OFFSET to implement a bit shift for some
+ * PHY types. There is an unused bit (31) in the I40E_CAP_PHY_TYPE_* bit
+ * fields but no corresponding gap in the i40e_aq_phy_type enumeration. So,
+ * a shift is needed to adjust for this with values larger than 31. The
+ * only affected values are I40E_PHY_TYPE_25GBASE_*.
+ */
+#define I40E_PHY_TYPE_OFFSET 1
+#define I40E_CAP_PHY_TYPE_25GBASE_KR BIT_ULL(I40E_PHY_TYPE_25GBASE_KR + \
+					     I40E_PHY_TYPE_OFFSET)
+#define I40E_CAP_PHY_TYPE_25GBASE_CR BIT_ULL(I40E_PHY_TYPE_25GBASE_CR + \
+					     I40E_PHY_TYPE_OFFSET)
+#define I40E_CAP_PHY_TYPE_25GBASE_SR BIT_ULL(I40E_PHY_TYPE_25GBASE_SR + \
+					     I40E_PHY_TYPE_OFFSET)
+#define I40E_CAP_PHY_TYPE_25GBASE_LR BIT_ULL(I40E_PHY_TYPE_25GBASE_LR + \
+					     I40E_PHY_TYPE_OFFSET)
 #define I40E_HW_CAP_MAX_GPIO			30
 /* Capabilities of a PF or a VF or the whole device */
 struct i40e_hw_capabilities {
@@ -254,6 +275,10 @@ struct i40e_hw_capabilities {
 #define I40E_NVM_IMAGE_TYPE_UDP_CLOUD	0x3
 
 	u32  management_mode;
+	u32  mng_protocols_over_mctp;
+#define I40E_MNG_PROTOCOL_PLDM		0x2
+#define I40E_MNG_PROTOCOL_OEM_COMMANDS	0x4
+#define I40E_MNG_PROTOCOL_NCSI		0x8
 	u32  npar_enable;
 	u32  os2bmc;
 	u32  valid_functions;
@@ -366,6 +391,7 @@ enum i40e_nvmupd_state {
 	I40E_NVMUPD_STATE_WRITING,
 	I40E_NVMUPD_STATE_INIT_WAIT,
 	I40E_NVMUPD_STATE_WRITE_WAIT,
+	I40E_NVMUPD_STATE_ERROR
 };
 
 /* nvm_access definition and its masks/shifts need to be accessible to
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h
index f861d3109d1a..974ba2baf6ea 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h
@@ -165,6 +165,10 @@ struct i40e_virtchnl_vsi_resource {
 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF		0X00080000
 #define I40E_VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM	0X00100000
 
+#define I40E_VF_BASE_MODE_OFFLOADS (I40E_VIRTCHNL_VF_OFFLOAD_L2 | \
+				    I40E_VIRTCHNL_VF_OFFLOAD_VLAN | \
+				    I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF)
+
 struct i40e_virtchnl_vf_resource {
 	u16 num_vsis;
 	u16 num_queue_pairs;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 54b8ee2583f1..a6198b727e24 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -674,6 +674,7 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type)
 	}
 	if (type == I40E_VSI_SRIOV) {
 		u64 hena = i40e_pf_get_default_rss_hena(pf);
+		u8 broadcast[ETH_ALEN];
 
 		vf->lan_vsi_idx = vsi->idx;
 		vf->lan_vsi_id = vsi->id;
@@ -686,17 +687,23 @@ static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type)
 		if (vf->port_vlan_id)
 			i40e_vsi_add_pvid(vsi, vf->port_vlan_id);
 
-		spin_lock_bh(&vsi->mac_filter_list_lock);
+		spin_lock_bh(&vsi->mac_filter_hash_lock);
 		if (is_valid_ether_addr(vf->default_lan_addr.addr)) {
 			f = i40e_add_filter(vsi, vf->default_lan_addr.addr,
-				       vf->port_vlan_id ? vf->port_vlan_id : -1,
-				       true, false);
+				       vf->port_vlan_id ?
+				       vf->port_vlan_id : -1);
 			if (!f)
 				dev_info(&pf->pdev->dev,
 					 "Could not add MAC filter %pM for VF %d\n",
 					vf->default_lan_addr.addr, vf->vf_id);
 		}
-		spin_unlock_bh(&vsi->mac_filter_list_lock);
+		eth_broadcast_addr(broadcast);
+		f = i40e_add_filter(vsi, broadcast,
+				    vf->port_vlan_id ? vf->port_vlan_id : -1);
+		if (!f)
+			dev_info(&pf->pdev->dev,
+				 "Could not allocate VF broadcast filter\n");
+		spin_unlock_bh(&vsi->mac_filter_hash_lock);
 		i40e_write_rx_ctl(&pf->hw, I40E_VFQF_HENA1(0, vf->vf_id),
 				  (u32)hena);
 		i40e_write_rx_ctl(&pf->hw, I40E_VFQF_HENA1(1, vf->vf_id),
@@ -811,6 +818,7 @@ static void i40e_free_vf_res(struct i40e_vf *vf)
 		i40e_vsi_release(pf->vsi[vf->lan_vsi_idx]);
 		vf->lan_vsi_idx = 0;
 		vf->lan_vsi_id = 0;
+		vf->num_mac = 0;
 	}
 	msix_vf = pf->hw.func_caps.num_msix_vectors_vf;
 
@@ -990,7 +998,7 @@ void i40e_reset_vf(struct i40e_vf *vf, bool flr)
 	if (vf->lan_vsi_idx == 0)
 		goto complete_reset;
 
-	i40e_vsi_control_rings(pf->vsi[vf->lan_vsi_idx], false);
+	i40e_vsi_stop_rings(pf->vsi[vf->lan_vsi_idx]);
 complete_reset:
 	/* reallocate VF resources to reset the VSI state */
 	i40e_free_vf_res(vf);
@@ -1031,8 +1039,7 @@ void i40e_free_vfs(struct i40e_pf *pf)
 	i40e_notify_client_of_vf_enable(pf, 0);
 	for (i = 0; i < pf->num_alloc_vfs; i++)
 		if (test_bit(I40E_VF_STAT_INIT, &pf->vf[i].vf_states))
-			i40e_vsi_control_rings(pf->vsi[pf->vf[i].lan_vsi_idx],
-					       false);
+			i40e_vsi_stop_rings(pf->vsi[pf->vf[i].lan_vsi_idx]);
 
 	/* Disable IOV before freeing resources. This lets any VF drivers
 	 * running in the host get themselves cleaned up before we yank
@@ -1449,9 +1456,9 @@ static void i40e_vc_reset_vf_msg(struct i40e_vf *vf)
 static inline int i40e_getnum_vf_vsi_vlan_filters(struct i40e_vsi *vsi)
 {
 	struct i40e_mac_filter *f;
-	int num_vlans = 0;
+	int num_vlans = 0, bkt;
 
-	list_for_each_entry(f, &vsi->mac_filter_list, list) {
+	hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) {
 		if (f->vlan >= 0 && f->vlan <= I40E_MAX_VLANID)
 			num_vlans++;
 	}
@@ -1481,6 +1488,7 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf,
 	struct i40e_vsi *vsi;
 	bool alluni = false;
 	int aq_err = 0;
+	int bkt;
 
 	vsi = i40e_find_vsi_from_id(pf, info->vsi_id);
 	if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) ||
@@ -1507,7 +1515,7 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf,
 							    vf->port_vlan_id,
 							    NULL);
 	} else if (i40e_getnum_vf_vsi_vlan_filters(vsi)) {
-		list_for_each_entry(f, &vsi->mac_filter_list, list) {
+		hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) {
 			if (f->vlan < 0 || f->vlan > I40E_MAX_VLANID)
 				continue;
 			aq_ret = i40e_aq_set_vsi_mc_promisc_on_vlan(hw,
@@ -1535,7 +1543,7 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf,
 				vf->vf_id,
 				i40e_stat_str(&pf->hw, aq_ret),
 				i40e_aq_str(&pf->hw, aq_err));
-			goto error_param_int;
+			goto error_param;
 		}
 	}
 
@@ -1557,7 +1565,7 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf,
 							    vf->port_vlan_id,
 							    NULL);
 	} else if (i40e_getnum_vf_vsi_vlan_filters(vsi)) {
-		list_for_each_entry(f, &vsi->mac_filter_list, list) {
+		hash_for_each(vsi->mac_filter_hash, bkt, f, hlist) {
 			aq_ret = 0;
 			if (f->vlan >= 0 && f->vlan <= I40E_MAX_VLANID) {
 				aq_ret =
@@ -1580,15 +1588,16 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf,
 							     allmulti, NULL,
 							     true);
 		aq_err = pf->hw.aq.asq_last_status;
-		if (aq_ret)
+		if (aq_ret) {
 			dev_err(&pf->pdev->dev,
 				"VF %d failed to set unicast promiscuous mode %8.8x err %s aq_err %s\n",
 				vf->vf_id, info->flags,
 				i40e_stat_str(&pf->hw, aq_ret),
 				i40e_aq_str(&pf->hw, aq_err));
+			goto error_param;
+		}
 	}
 
-error_param_int:
 	if (!aq_ret) {
 		dev_info(&pf->pdev->dev,
 			 "VF %d successfully set unicast promiscuous mode\n",
@@ -1757,7 +1766,7 @@ static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 		goto error_param;
 	}
 
-	if (i40e_vsi_control_rings(pf->vsi[vf->lan_vsi_idx], true))
+	if (i40e_vsi_start_rings(pf->vsi[vf->lan_vsi_idx]))
 		aq_ret = I40E_ERR_TIMEOUT;
 error_param:
 	/* send the response to the VF */
@@ -1796,8 +1805,7 @@ static int i40e_vc_disable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 		goto error_param;
 	}
 
-	if (i40e_vsi_control_rings(pf->vsi[vf->lan_vsi_idx], false))
-		aq_ret = I40E_ERR_TIMEOUT;
+	i40e_vsi_stop_rings(pf->vsi[vf->lan_vsi_idx]);
 
 error_param:
 	/* send the response to the VF */
@@ -1927,20 +1935,18 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 	/* Lock once, because all function inside for loop accesses VSI's
 	 * MAC filter list which needs to be protected using same lock.
 	 */
-	spin_lock_bh(&vsi->mac_filter_list_lock);
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
 
 	/* add new addresses to the list */
 	for (i = 0; i < al->num_elements; i++) {
 		struct i40e_mac_filter *f;
 
-		f = i40e_find_mac(vsi, al->list[i].addr, true, false);
+		f = i40e_find_mac(vsi, al->list[i].addr);
 		if (!f) {
 			if (i40e_is_vsi_in_vlan(vsi))
-				f = i40e_put_mac_in_vlan(vsi, al->list[i].addr,
-							 true, false);
+				f = i40e_put_mac_in_vlan(vsi, al->list[i].addr);
 			else
-				f = i40e_add_filter(vsi, al->list[i].addr, -1,
-						    true, false);
+				f = i40e_add_filter(vsi, al->list[i].addr, -1);
 		}
 
 		if (!f) {
@@ -1948,13 +1954,13 @@ static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 				"Unable to add MAC filter %pM for VF %d\n",
 				 al->list[i].addr, vf->vf_id);
 			ret = I40E_ERR_PARAM;
-			spin_unlock_bh(&vsi->mac_filter_list_lock);
+			spin_unlock_bh(&vsi->mac_filter_hash_lock);
 			goto error_param;
 		} else {
 			vf->num_mac++;
 		}
 	}
-	spin_unlock_bh(&vsi->mac_filter_list_lock);
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
 
 	/* program the updated filter list */
 	ret = i40e_sync_vsi_filters(vsi);
@@ -2003,18 +2009,18 @@ static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 	}
 	vsi = pf->vsi[vf->lan_vsi_idx];
 
-	spin_lock_bh(&vsi->mac_filter_list_lock);
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
 	/* delete addresses from the list */
 	for (i = 0; i < al->num_elements; i++)
-		if (i40e_del_mac_all_vlan(vsi, al->list[i].addr, true, false)) {
+		if (i40e_del_mac_all_vlan(vsi, al->list[i].addr)) {
 			ret = I40E_ERR_INVALID_MAC_ADDR;
-			spin_unlock_bh(&vsi->mac_filter_list_lock);
+			spin_unlock_bh(&vsi->mac_filter_hash_lock);
 			goto error_param;
 		} else {
 			vf->num_mac--;
 		}
 
-	spin_unlock_bh(&vsi->mac_filter_list_lock);
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
 
 	/* program the updated filter list */
 	ret = i40e_sync_vsi_filters(vsi);
@@ -2139,9 +2145,8 @@ static int i40e_vc_remove_vlan_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 	}
 
 	for (i = 0; i < vfl->num_elements; i++) {
-		int ret = i40e_vsi_kill_vlan(vsi, vfl->vlan_id[i]);
-		if (!ret)
-			vf->num_vlan--;
+		i40e_vsi_kill_vlan(vsi, vfl->vlan_id[i]);
+		vf->num_vlan--;
 
 		if (test_bit(I40E_VF_STAT_UC_PROMISC, &vf->vf_states))
 			i40e_aq_set_vsi_uc_promisc_on_vlan(&pf->hw, vsi->seid,
@@ -2153,11 +2158,6 @@ static int i40e_vc_remove_vlan_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
 							   false,
 							   vfl->vlan_id[i],
 							   NULL);
-
-		if (ret)
-			dev_err(&pf->pdev->dev,
-				"Unable to delete VLAN filter %d for VF %d, error %d\n",
-				vfl->vlan_id[i], vf->vf_id, ret);
 	}
 
 error_param:
@@ -2689,6 +2689,7 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
 	struct i40e_mac_filter *f;
 	struct i40e_vf *vf;
 	int ret = 0;
+	int bkt;
 
 	/* validate the request */
 	if (vf_id >= pf->num_alloc_vfs) {
@@ -2715,23 +2716,22 @@ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
 	}
 
 	/* Lock once because below invoked function add/del_filter requires
-	 * mac_filter_list_lock to be held
+	 * mac_filter_hash_lock to be held
 	 */
-	spin_lock_bh(&vsi->mac_filter_list_lock);
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
 
 	/* delete the temporary mac address */
 	if (!is_zero_ether_addr(vf->default_lan_addr.addr))
 		i40e_del_filter(vsi, vf->default_lan_addr.addr,
-				vf->port_vlan_id ? vf->port_vlan_id : -1,
-				true, false);
+				vf->port_vlan_id ? vf->port_vlan_id : -1);
 
 	/* Delete all the filters for this VSI - we're going to kill it
 	 * anyway.
 	 */
-	list_for_each_entry(f, &vsi->mac_filter_list, list)
-		i40e_del_filter(vsi, f->macaddr, f->vlan, true, false);
+	hash_for_each(vsi->mac_filter_hash, bkt, f, hlist)
+		i40e_del_filter(vsi, f->macaddr, f->vlan);
 
-	spin_unlock_bh(&vsi->mac_filter_list_lock);
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
 
 	dev_info(&pf->pdev->dev, "Setting MAC %pM on VF %d\n", mac, vf_id);
 	/* program mac filter */
@@ -2766,7 +2766,6 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id,
 	u16 vlanprio = vlan_id | (qos << I40E_VLAN_PRIORITY_SHIFT);
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_pf *pf = np->vsi->back;
-	bool is_vsi_in_vlan = false;
 	struct i40e_vsi *vsi;
 	struct i40e_vf *vf;
 	int ret = 0;
@@ -2803,11 +2802,10 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id,
 		/* duplicate request, so just return success */
 		goto error_pvid;
 
-	spin_lock_bh(&vsi->mac_filter_list_lock);
-	is_vsi_in_vlan = i40e_is_vsi_in_vlan(vsi);
-	spin_unlock_bh(&vsi->mac_filter_list_lock);
+	/* Locked once because multiple functions below iterate list */
+	spin_lock_bh(&vsi->mac_filter_hash_lock);
 
-	if (le16_to_cpu(vsi->info.pvid) == 0 && is_vsi_in_vlan) {
+	if (le16_to_cpu(vsi->info.pvid) == 0 && i40e_is_vsi_in_vlan(vsi)) {
 		dev_err(&pf->pdev->dev,
 			"VF %d has already configured VLAN filters and the administrator is requesting a port VLAN override.\nPlease unload and reload the VF driver for this change to take effect.\n",
 			vf_id);
@@ -2830,19 +2828,23 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id,
 	 */
 	if ((!(vlan_id || qos) ||
 	    vlanprio != le16_to_cpu(vsi->info.pvid)) &&
-	    vsi->info.pvid)
-		ret = i40e_vsi_add_vlan(vsi, I40E_VLAN_ANY);
-
-	if (vsi->info.pvid) {
-		/* kill old VLAN */
-		ret = i40e_vsi_kill_vlan(vsi, (le16_to_cpu(vsi->info.pvid) &
-					       VLAN_VID_MASK));
+	    vsi->info.pvid) {
+		ret = i40e_add_vlan_all_mac(vsi, I40E_VLAN_ANY);
 		if (ret) {
 			dev_info(&vsi->back->pdev->dev,
-				 "remove VLAN failed, ret=%d, aq_err=%d\n",
-				 ret, pf->hw.aq.asq_last_status);
+				 "add VF VLAN failed, ret=%d aq_err=%d\n", ret,
+				 vsi->back->hw.aq.asq_last_status);
+			spin_unlock_bh(&vsi->mac_filter_hash_lock);
+			goto error_pvid;
 		}
 	}
+
+	if (vsi->info.pvid) {
+		/* remove all filters on the old VLAN */
+		i40e_rm_vlan_all_mac(vsi, (le16_to_cpu(vsi->info.pvid) &
+					   VLAN_VID_MASK));
+	}
+
 	if (vlan_id || qos)
 		ret = i40e_vsi_add_pvid(vsi, vlanprio);
 	else
@@ -2852,24 +2854,30 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id,
 		dev_info(&pf->pdev->dev, "Setting VLAN %d, QOS 0x%x on VF %d\n",
 			 vlan_id, qos, vf_id);
 
-		/* add new VLAN filter */
-		ret = i40e_vsi_add_vlan(vsi, vlan_id);
+		/* add new VLAN filter for each MAC */
+		ret = i40e_add_vlan_all_mac(vsi, vlan_id);
 		if (ret) {
 			dev_info(&vsi->back->pdev->dev,
 				 "add VF VLAN failed, ret=%d aq_err=%d\n", ret,
 				 vsi->back->hw.aq.asq_last_status);
+			spin_unlock_bh(&vsi->mac_filter_hash_lock);
 			goto error_pvid;
 		}
-		/* Kill non-vlan MAC filters - ignore error return since
-		 * there might not be any non-vlan MAC filters.
-		 */
-		i40e_vsi_kill_vlan(vsi, I40E_VLAN_ANY);
+
+		/* remove the previously added non-VLAN MAC filters */
+		i40e_rm_vlan_all_mac(vsi, I40E_VLAN_ANY);
 	}
 
+	spin_unlock_bh(&vsi->mac_filter_hash_lock);
+
+	/* Schedule the worker thread to take care of applying changes */
+	i40e_service_event_schedule(vsi->back);
+
 	if (ret) {
 		dev_err(&pf->pdev->dev, "Unable to update VF vsi context\n");
 		goto error_pvid;
 	}
+
 	/* The Port VLAN needs to be saved across resets the same as the
 	 * default LAN MAC address.
 	 */
@@ -2926,6 +2934,9 @@ int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate,
 	case I40E_LINK_SPEED_40GB:
 		speed = 40000;
 		break;
+	case I40E_LINK_SPEED_25GB:
+		speed = 25000;
+		break;
 	case I40E_LINK_SPEED_20GB:
 		speed = 20000;
 		break;
@@ -2940,7 +2951,7 @@ int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate,
 	}
 
 	if (max_tx_rate > speed) {
-		dev_err(&pf->pdev->dev, "Invalid max tx rate %d specified for VF %d.",
+		dev_err(&pf->pdev->dev, "Invalid max tx rate %d specified for VF %d.\n",
 			max_tx_rate, vf->vf_id);
 		ret = -EINVAL;
 		goto error;
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c
index 44f7ed7583dd..96385156b824 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq.c
@@ -912,11 +912,11 @@ i40e_status i40evf_clean_arq_element(struct i40e_hw *hw,
 	desc = I40E_ADMINQ_DESC(hw->aq.arq, ntc);
 	desc_idx = ntc;
 
+	hw->aq.arq_last_status =
+		(enum i40e_admin_queue_err)le16_to_cpu(desc->retval);
 	flags = le16_to_cpu(desc->flags);
 	if (flags & I40E_AQ_FLAG_ERR) {
 		ret_code = I40E_ERR_ADMIN_QUEUE_ERROR;
-		hw->aq.arq_last_status =
-			(enum i40e_admin_queue_err)le16_to_cpu(desc->retval);
 		i40e_debug(hw,
 			   I40E_DEBUG_AQ_MESSAGE,
 			   "AQRX: Event received with error 0x%X.\n",
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
index 40b0eafd0c71..eeb9864bc5b1 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h
@@ -1639,6 +1639,10 @@ enum i40e_aq_phy_type {
 	I40E_PHY_TYPE_1000BASE_LX		= 0x1C,
 	I40E_PHY_TYPE_1000BASE_T_OPTICAL	= 0x1D,
 	I40E_PHY_TYPE_20GBASE_KR2		= 0x1E,
+	I40E_PHY_TYPE_25GBASE_KR		= 0x1F,
+	I40E_PHY_TYPE_25GBASE_CR		= 0x20,
+	I40E_PHY_TYPE_25GBASE_SR		= 0x21,
+	I40E_PHY_TYPE_25GBASE_LR		= 0x22,
 	I40E_PHY_TYPE_MAX
 };
 
@@ -1647,6 +1651,7 @@ enum i40e_aq_phy_type {
 #define I40E_LINK_SPEED_10GB_SHIFT	0x3
 #define I40E_LINK_SPEED_40GB_SHIFT	0x4
 #define I40E_LINK_SPEED_20GB_SHIFT	0x5
+#define I40E_LINK_SPEED_25GB_SHIFT	0x6
 
 enum i40e_aq_link_speed {
 	I40E_LINK_SPEED_UNKNOWN	= 0,
@@ -1654,7 +1659,8 @@ enum i40e_aq_link_speed {
 	I40E_LINK_SPEED_1GB	= BIT(I40E_LINK_SPEED_1000MB_SHIFT),
 	I40E_LINK_SPEED_10GB	= BIT(I40E_LINK_SPEED_10GB_SHIFT),
 	I40E_LINK_SPEED_40GB	= BIT(I40E_LINK_SPEED_40GB_SHIFT),
-	I40E_LINK_SPEED_20GB	= BIT(I40E_LINK_SPEED_20GB_SHIFT)
+	I40E_LINK_SPEED_20GB	= BIT(I40E_LINK_SPEED_20GB_SHIFT),
+	I40E_LINK_SPEED_25GB	= BIT(I40E_LINK_SPEED_25GB_SHIFT),
 };
 
 struct i40e_aqc_module_desc {
@@ -1677,6 +1683,8 @@ struct i40e_aq_get_phy_abilities_resp {
 #define I40E_AQ_PHY_LINK_ENABLED	0x08
 #define I40E_AQ_PHY_AN_ENABLED		0x10
 #define I40E_AQ_PHY_FLAG_MODULE_QUAL	0x20
+#define I40E_AQ_PHY_FEC_ABILITY_KR	0x40
+#define I40E_AQ_PHY_FEC_ABILITY_RS	0x80
 	__le16	eee_capability;
 #define I40E_AQ_EEE_100BASE_TX		0x0002
 #define I40E_AQ_EEE_1000BASE_T		0x0004
@@ -1687,7 +1695,22 @@ struct i40e_aq_get_phy_abilities_resp {
 	__le32	eeer_val;
 	u8	d3_lpan;
 #define I40E_AQ_SET_PHY_D3_LPAN_ENA	0x01
-	u8	reserved[3];
+	u8	phy_type_ext;
+#define I40E_AQ_PHY_TYPE_EXT_25G_KR	0X01
+#define I40E_AQ_PHY_TYPE_EXT_25G_CR	0X02
+#define I40E_AQ_PHY_TYPE_EXT_25G_SR	0x04
+#define I40E_AQ_PHY_TYPE_EXT_25G_LR	0x08
+	u8	fec_cfg_curr_mod_ext_info;
+#define I40E_AQ_ENABLE_FEC_KR		0x01
+#define I40E_AQ_ENABLE_FEC_RS		0x02
+#define I40E_AQ_REQUEST_FEC_KR		0x04
+#define I40E_AQ_REQUEST_FEC_RS		0x08
+#define I40E_AQ_ENABLE_FEC_AUTO		0x10
+#define I40E_AQ_FEC
+#define I40E_AQ_MODULE_TYPE_EXT_MASK	0xE0
+#define I40E_AQ_MODULE_TYPE_EXT_SHIFT	5
+
+	u8	ext_comp_code;
 	u8	phy_id[4];
 	u8	module_type[3];
 	u8	qualified_module_count;
@@ -1709,7 +1732,20 @@ struct i40e_aq_set_phy_config { /* same bits as above in all */
 	__le16	eee_capability;
 	__le32	eeer;
 	u8	low_power_ctrl;
-	u8	reserved[3];
+	u8	phy_type_ext;
+#define I40E_AQ_PHY_TYPE_EXT_25G_KR	0X01
+#define I40E_AQ_PHY_TYPE_EXT_25G_CR	0X02
+#define I40E_AQ_PHY_TYPE_EXT_25G_SR	0x04
+#define I40E_AQ_PHY_TYPE_EXT_25G_LR	0x08
+	u8	fec_config;
+#define I40E_AQ_SET_FEC_ABILITY_KR	BIT(0)
+#define I40E_AQ_SET_FEC_ABILITY_RS	BIT(1)
+#define I40E_AQ_SET_FEC_REQUEST_KR	BIT(2)
+#define I40E_AQ_SET_FEC_REQUEST_RS	BIT(3)
+#define I40E_AQ_SET_FEC_AUTO		BIT(4)
+#define I40E_AQ_PHY_FEC_CONFIG_SHIFT	0x0
+#define I40E_AQ_PHY_FEC_CONFIG_MASK	(0x1F << I40E_AQ_PHY_FEC_CONFIG_SHIFT)
+	u8	reserved;
 };
 
 I40E_CHECK_CMD_LENGTH(i40e_aq_set_phy_config);
@@ -1789,9 +1825,18 @@ struct i40e_aqc_get_link_status {
 #define I40E_AQ_LINK_TX_DRAINED		0x01
 #define I40E_AQ_LINK_TX_FLUSHED		0x03
 #define I40E_AQ_LINK_FORCED_40G		0x10
+/* 25G Error Codes */
+#define I40E_AQ_25G_NO_ERR		0X00
+#define I40E_AQ_25G_NOT_PRESENT		0X01
+#define I40E_AQ_25G_NVM_CRC_ERR		0X02
+#define I40E_AQ_25G_SBUS_UCODE_ERR	0X03
+#define I40E_AQ_25G_SERDES_UCODE_ERR	0X04
+#define I40E_AQ_25G_NIMB_UCODE_ERR	0X05
 	u8	loopback; /* use defines from i40e_aqc_set_lb_mode */
 	__le16	max_frame_size;
 	u8	config;
+#define I40E_AQ_CONFIG_FEC_KR_ENA	0x01
+#define I40E_AQ_CONFIG_FEC_RS_ENA	0x02
 #define I40E_AQ_CONFIG_CRC_ENA		0x04
 #define I40E_AQ_CONFIG_PACING_MASK	0x78
 	u8	external_power_ability;
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c b/drivers/net/ethernet/intel/i40evf/i40e_common.c
index 7953c13451b9..aa63b7fb993d 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c
@@ -53,6 +53,8 @@ i40e_status i40e_set_mac_type(struct i40e_hw *hw)
 		case I40E_DEV_ID_10G_BASE_T4:
 		case I40E_DEV_ID_20G_KR2:
 		case I40E_DEV_ID_20G_KR2_A:
+		case I40E_DEV_ID_25G_B:
+		case I40E_DEV_ID_25G_SFP28:
 			hw->mac.type = I40E_MAC_XL710;
 			break;
 		case I40E_DEV_ID_SFP_X722:
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_devids.h b/drivers/net/ethernet/intel/i40evf/i40e_devids.h
index 70235706915e..21dcaee1ad1d 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_devids.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_devids.h
@@ -39,6 +39,8 @@
 #define I40E_DEV_ID_20G_KR2		0x1587
 #define I40E_DEV_ID_20G_KR2_A		0x1588
 #define I40E_DEV_ID_10G_BASE_T4		0x1589
+#define I40E_DEV_ID_25G_B		0x158A
+#define I40E_DEV_ID_25G_SFP28		0x158B
 #define I40E_DEV_ID_VF			0x154C
 #define I40E_DEV_ID_VF_HV		0x1571
 #define I40E_DEV_ID_SFP_X722		0x37D0
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
index d89d52109efa..ba6c6bda0e22 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
@@ -115,6 +115,10 @@ i40e_status i40e_read_phy_register(struct i40e_hw *hw, u8 page,
 				   u16 reg, u8 phy_addr, u16 *value);
 i40e_status i40e_write_phy_register(struct i40e_hw *hw, u8 page,
 				    u16 reg, u8 phy_addr, u16 value);
+i40e_status i40e_read_phy_register(struct i40e_hw *hw, u8 page, u16 reg,
+				   u8 phy_addr, u16 *value);
+i40e_status i40e_write_phy_register(struct i40e_hw *hw, u8 page, u16 reg,
+				    u8 phy_addr, u16 value);
 u8 i40e_get_phy_address(struct i40e_hw *hw, u8 dev_num);
 i40e_status i40e_blink_phy_link_led(struct i40e_hw *hw,
 				    u32 time, u32 interval);
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index 75f2a2cdd738..df67ef37b7f3 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -150,7 +150,7 @@ u32 i40evf_get_tx_pending(struct i40e_ring *ring, bool in_sw)
 	return 0;
 }
 
-#define WB_STRIDE 0x3
+#define WB_STRIDE 4
 
 /**
  * i40e_clean_tx_irq - Reclaim resources after transmit completes
@@ -266,7 +266,7 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi,
 		unsigned int j = i40evf_get_tx_pending(tx_ring, false);
 
 		if (budget &&
-		    ((j / (WB_STRIDE + 1)) == 0) && (j > 0) &&
+		    ((j / WB_STRIDE) == 0) && (j > 0) &&
 		    !test_bit(__I40E_DOWN, &vsi->state) &&
 		    (I40E_DESC_UNUSED(tx_ring) != tx_ring->count))
 			tx_ring->arm_wb = true;
@@ -705,7 +705,6 @@ bool i40evf_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count)
 		 * because each write-back erases this info.
 		 */
 		rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
-		rx_desc->read.hdr_addr = 0;
 
 		rx_desc++;
 		bi++;
@@ -1209,7 +1208,6 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 	while (likely(total_rx_packets < budget)) {
 		union i40e_rx_desc *rx_desc;
 		struct sk_buff *skb;
-		u32 rx_status;
 		u16 vlan_tag;
 		u8 rx_ptype;
 		u64 qword;
@@ -1223,21 +1221,13 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 
 		rx_desc = I40E_RX_DESC(rx_ring, rx_ring->next_to_clean);
 
-		qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
-		rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
-			   I40E_RXD_QW1_PTYPE_SHIFT;
-		rx_status = (qword & I40E_RXD_QW1_STATUS_MASK) >>
-			    I40E_RXD_QW1_STATUS_SHIFT;
-
-		if (!(rx_status & BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
-			break;
-
 		/* status_error_len will always be zero for unused descriptors
 		 * because it's cleared in cleanup, and overlaps with hdr_addr
 		 * which is always zero because packet split isn't used, if the
 		 * hardware wrote DD then it will be non-zero
 		 */
-		if (!rx_desc->wb.qword1.status_error_len)
+		if (!i40e_test_staterr(rx_desc,
+				       BIT(I40E_RX_DESC_STATUS_DD_SHIFT)))
 			break;
 
 		/* This memory barrier is needed to keep us from reading
@@ -1271,6 +1261,10 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 		/* probably a little skewed due to removing CRC */
 		total_rx_bytes += skb->len;
 
+		qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
+		rx_ptype = (qword & I40E_RXD_QW1_PTYPE_MASK) >>
+			   I40E_RXD_QW1_PTYPE_SHIFT;
+
 		/* populate checksum, VLAN, and protocol */
 		i40evf_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype);
 
@@ -1461,12 +1455,24 @@ int i40evf_napi_poll(struct napi_struct *napi, int budget)
 
 	/* If work not completed, return budget and polling will return */
 	if (!clean_complete) {
+		const cpumask_t *aff_mask = &q_vector->affinity_mask;
+		int cpu_id = smp_processor_id();
+
+		/* It is possible that the interrupt affinity has changed but,
+		 * if the cpu is pegged at 100%, polling will never exit while
+		 * traffic continues and the interrupt will be stuck on this
+		 * cpu.  We check to make sure affinity is correct before we
+		 * continue to poll, otherwise we must stop polling so the
+		 * interrupt can move to the correct cpu.
+		 */
+		if (likely(cpumask_test_cpu(cpu_id, aff_mask))) {
 tx_only:
-		if (arm_wb) {
-			q_vector->tx.ring[0].tx_stats.tx_force_wb++;
-			i40e_enable_wb_on_itr(vsi, q_vector);
+			if (arm_wb) {
+				q_vector->tx.ring[0].tx_stats.tx_force_wb++;
+				i40e_enable_wb_on_itr(vsi, q_vector);
+			}
+			return budget;
 		}
-		return budget;
 	}
 
 	if (vsi->back->flags & I40E_TXR_FLAGS_WB_ON_ITR)
@@ -1474,8 +1480,17 @@ tx_only:
 
 	/* Work is done so exit the polling mode and re-enable the interrupt */
 	napi_complete_done(napi, work_done);
-	i40e_update_enable_itr(vsi, q_vector);
-	return 0;
+
+	/* If we're prematurely stopping polling to fix the interrupt
+	 * affinity we want to make sure polling starts back up so we
+	 * issue a call to i40evf_force_wb which triggers a SW interrupt.
+	 */
+	if (!clean_complete)
+		i40evf_force_wb(vsi, q_vector);
+	else
+		i40e_update_enable_itr(vsi, q_vector);
+
+	return min(work_done, budget - 1);
 }
 
 /**
@@ -1935,9 +1950,7 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
 	u32 td_tag = 0;
 	dma_addr_t dma;
 	u16 gso_segs;
-	u16 desc_count = 0;
-	bool tail_bump = true;
-	bool do_rs = false;
+	u16 desc_count = 1;
 
 	if (tx_flags & I40E_TX_FLAGS_HW_VLAN) {
 		td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
@@ -2020,8 +2033,7 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
 		tx_bi = &tx_ring->tx_bi[i];
 	}
 
-	/* set next_to_watch value indicating a packet is present */
-	first->next_to_watch = tx_desc;
+	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
 
 	i++;
 	if (i == tx_ring->count)
@@ -2029,66 +2041,72 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
 
 	tx_ring->next_to_use = i;
 
-	netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
 	i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
 
+	/* write last descriptor with EOP bit */
+	td_cmd |= I40E_TX_DESC_CMD_EOP;
+
+	/* We can OR these values together as they both are checked against
+	 * 4 below and at this point desc_count will be used as a boolean value
+	 * after this if/else block.
+	 */
+	desc_count |= ++tx_ring->packet_stride;
+
 	/* Algorithm to optimize tail and RS bit setting:
-	 * if xmit_more is supported
-	 *	if xmit_more is true
-	 *		do not update tail and do not mark RS bit.
-	 *	if xmit_more is false and last xmit_more was false
-	 *		if every packet spanned less than 4 desc
-	 *			then set RS bit on 4th packet and update tail
-	 *			on every packet
-	 *		else
-	 *			update tail and set RS bit on every packet.
-	 *	if xmit_more is false and last_xmit_more was true
-	 *		update tail and set RS bit.
+	 * if queue is stopped
+	 *	mark RS bit
+	 *	reset packet counter
+	 * else if xmit_more is supported and is true
+	 *	advance packet counter to 4
+	 *	reset desc_count to 0
 	 *
-	 * Optimization: wmb to be issued only in case of tail update.
-	 * Also optimize the Descriptor WB path for RS bit with the same
-	 * algorithm.
+	 * if desc_count >= 4
+	 *	mark RS bit
+	 *	reset packet counter
+	 * if desc_count > 0
+	 *	update tail
 	 *
-	 * Note: If there are less than 4 packets
+	 * Note: If there are less than 4 descriptors
 	 * pending and interrupts were disabled the service task will
 	 * trigger a force WB.
 	 */
-	if (skb->xmit_more  &&
-	    !netif_xmit_stopped(txring_txq(tx_ring))) {
-		tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
-		tail_bump = false;
-	} else if (!skb->xmit_more &&
-		   !netif_xmit_stopped(txring_txq(tx_ring)) &&
-		   (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) &&
-		   (tx_ring->packet_stride < WB_STRIDE) &&
-		   (desc_count < WB_STRIDE)) {
-		tx_ring->packet_stride++;
-	} else {
+	if (netif_xmit_stopped(txring_txq(tx_ring))) {
+		goto do_rs;
+	} else if (skb->xmit_more) {
+		/* set stride to arm on next packet and reset desc_count */
+		tx_ring->packet_stride = WB_STRIDE;
+		desc_count = 0;
+	} else if (desc_count >= WB_STRIDE) {
+do_rs:
+		/* write last descriptor with RS bit set */
+		td_cmd |= I40E_TX_DESC_CMD_RS;
 		tx_ring->packet_stride = 0;
-		tx_ring->flags &= ~I40E_TXR_FLAGS_LAST_XMIT_MORE_SET;
-		do_rs = true;
 	}
-	if (do_rs)
-		tx_ring->packet_stride = 0;
 
 	tx_desc->cmd_type_offset_bsz =
-			build_ctob(td_cmd, td_offset, size, td_tag) |
-			cpu_to_le64((u64)(do_rs ? I40E_TXD_CMD :
-						  I40E_TX_DESC_CMD_EOP) <<
-						  I40E_TXD_QW1_CMD_SHIFT);
+			build_ctob(td_cmd, td_offset, size, td_tag);
+
+	/* Force memory writes to complete before letting h/w know there
+	 * are new descriptors to fetch.
+	 *
+	 * We also use this memory barrier to make certain all of the
+	 * status bits have been updated before next_to_watch is written.
+	 */
+	wmb();
+
+	/* set next_to_watch value indicating a packet is present */
+	first->next_to_watch = tx_desc;
 
 	/* notify HW of packet */
-	if (!tail_bump) {
-		prefetchw(tx_desc + 1);
-	} else {
-		/* Force memory writes to complete before letting h/w
-		 * know there are new descriptors to fetch.  (Only
-		 * applicable for weak-ordered memory model archs,
-		 * such as IA-64).
-		 */
-		wmb();
+	if (desc_count) {
 		writel(i, tx_ring->tail);
+
+		/* we need this if more than one processor can write to our tail
+		 * at a time, it synchronizes IO on IA64/Altix systems
+		 */
+		mmiowb();
 	}
+
 	return;
 
 dma_error:
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
index abcdecabbc56..a5fc789f78eb 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
@@ -173,26 +173,37 @@ static inline bool i40e_test_staterr(union i40e_rx_desc *rx_desc,
 #define I40E_MAX_DATA_PER_TXD_ALIGNED \
 	(I40E_MAX_DATA_PER_TXD & ~(I40E_MAX_READ_REQ_SIZE - 1))
 
-/* This ugly bit of math is equivalent to DIV_ROUNDUP(size, X) where X is
- * the value I40E_MAX_DATA_PER_TXD_ALIGNED.  It is needed due to the fact
- * that 12K is not a power of 2 and division is expensive.  It is used to
- * approximate the number of descriptors used per linear buffer.  Note
- * that this will overestimate in some cases as it doesn't account for the
- * fact that we will add up to 4K - 1 in aligning the 12K buffer, however
- * the error should not impact things much as large buffers usually mean
- * we will use fewer descriptors then there are frags in an skb.
+/**
+ * i40e_txd_use_count  - estimate the number of descriptors needed for Tx
+ * @size: transmit request size in bytes
+ *
+ * Due to hardware alignment restrictions (4K alignment), we need to
+ * assume that we can have no more than 12K of data per descriptor, even
+ * though each descriptor can take up to 16K - 1 bytes of aligned memory.
+ * Thus, we need to divide by 12K. But division is slow! Instead,
+ * we decompose the operation into shifts and one relatively cheap
+ * multiply operation.
+ *
+ * To divide by 12K, we first divide by 4K, then divide by 3:
+ *     To divide by 4K, shift right by 12 bits
+ *     To divide by 3, multiply by 85, then divide by 256
+ *     (Divide by 256 is done by shifting right by 8 bits)
+ * Finally, we add one to round up. Because 256 isn't an exact multiple of
+ * 3, we'll underestimate near each multiple of 12K. This is actually more
+ * accurate as we have 4K - 1 of wiggle room that we can fit into the last
+ * segment.  For our purposes this is accurate out to 1M which is orders of
+ * magnitude greater than our largest possible GSO size.
+ *
+ * This would then be implemented as:
+ *     return (((size >> 12) * 85) >> 8) + 1;
+ *
+ * Since multiplication and division are commutative, we can reorder
+ * operations into:
+ *     return ((size * 85) >> 20) + 1;
  */
 static inline unsigned int i40e_txd_use_count(unsigned int size)
 {
-	const unsigned int max = I40E_MAX_DATA_PER_TXD_ALIGNED;
-	const unsigned int reciprocal = ((1ull << 32) - 1 + (max / 2)) / max;
-	unsigned int adjust = ~(u32)0;
-
-	/* if we rounded up on the reciprocal pull down the adjustment */
-	if ((max * reciprocal) > adjust)
-		adjust = ~(u32)(reciprocal - 1);
-
-	return (u32)((((u64)size * reciprocal) + adjust) >> 32);
+	return ((size * 85) >> 20) + 1;
 }
 
 /* Tx Descriptors needed, worst case */
@@ -309,7 +320,6 @@ struct i40e_ring {
 	bool ring_active;		/* is ring online or not */
 	bool arm_wb;		/* do something to arm write back */
 	u8 packet_stride;
-#define I40E_TXR_FLAGS_LAST_XMIT_MORE_SET BIT(2)
 
 	u16 flags;
 #define I40E_TXR_FLAGS_WB_ON_ITR	BIT(0)
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_type.h b/drivers/net/ethernet/intel/i40evf/i40e_type.h
index 97f96e0d9c4c..c85e8a31c072 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_type.h
@@ -187,47 +187,59 @@ struct i40e_link_status {
 #define I40E_MODULE_TYPE_1000BASE_T	0x08
 };
 
-enum i40e_aq_capabilities_phy_type {
-	I40E_CAP_PHY_TYPE_SGMII		  = BIT(I40E_PHY_TYPE_SGMII),
-	I40E_CAP_PHY_TYPE_1000BASE_KX	  = BIT(I40E_PHY_TYPE_1000BASE_KX),
-	I40E_CAP_PHY_TYPE_10GBASE_KX4	  = BIT(I40E_PHY_TYPE_10GBASE_KX4),
-	I40E_CAP_PHY_TYPE_10GBASE_KR	  = BIT(I40E_PHY_TYPE_10GBASE_KR),
-	I40E_CAP_PHY_TYPE_40GBASE_KR4	  = BIT(I40E_PHY_TYPE_40GBASE_KR4),
-	I40E_CAP_PHY_TYPE_XAUI		  = BIT(I40E_PHY_TYPE_XAUI),
-	I40E_CAP_PHY_TYPE_XFI		  = BIT(I40E_PHY_TYPE_XFI),
-	I40E_CAP_PHY_TYPE_SFI		  = BIT(I40E_PHY_TYPE_SFI),
-	I40E_CAP_PHY_TYPE_XLAUI		  = BIT(I40E_PHY_TYPE_XLAUI),
-	I40E_CAP_PHY_TYPE_XLPPI		  = BIT(I40E_PHY_TYPE_XLPPI),
-	I40E_CAP_PHY_TYPE_40GBASE_CR4_CU  = BIT(I40E_PHY_TYPE_40GBASE_CR4_CU),
-	I40E_CAP_PHY_TYPE_10GBASE_CR1_CU  = BIT(I40E_PHY_TYPE_10GBASE_CR1_CU),
-	I40E_CAP_PHY_TYPE_10GBASE_AOC	  = BIT(I40E_PHY_TYPE_10GBASE_AOC),
-	I40E_CAP_PHY_TYPE_40GBASE_AOC	  = BIT(I40E_PHY_TYPE_40GBASE_AOC),
-	I40E_CAP_PHY_TYPE_100BASE_TX	  = BIT(I40E_PHY_TYPE_100BASE_TX),
-	I40E_CAP_PHY_TYPE_1000BASE_T	  = BIT(I40E_PHY_TYPE_1000BASE_T),
-	I40E_CAP_PHY_TYPE_10GBASE_T	  = BIT(I40E_PHY_TYPE_10GBASE_T),
-	I40E_CAP_PHY_TYPE_10GBASE_SR	  = BIT(I40E_PHY_TYPE_10GBASE_SR),
-	I40E_CAP_PHY_TYPE_10GBASE_LR	  = BIT(I40E_PHY_TYPE_10GBASE_LR),
-	I40E_CAP_PHY_TYPE_10GBASE_SFPP_CU = BIT(I40E_PHY_TYPE_10GBASE_SFPP_CU),
-	I40E_CAP_PHY_TYPE_10GBASE_CR1	  = BIT(I40E_PHY_TYPE_10GBASE_CR1),
-	I40E_CAP_PHY_TYPE_40GBASE_CR4	  = BIT(I40E_PHY_TYPE_40GBASE_CR4),
-	I40E_CAP_PHY_TYPE_40GBASE_SR4	  = BIT(I40E_PHY_TYPE_40GBASE_SR4),
-	I40E_CAP_PHY_TYPE_40GBASE_LR4	  = BIT(I40E_PHY_TYPE_40GBASE_LR4),
-	I40E_CAP_PHY_TYPE_1000BASE_SX	  = BIT(I40E_PHY_TYPE_1000BASE_SX),
-	I40E_CAP_PHY_TYPE_1000BASE_LX	  = BIT(I40E_PHY_TYPE_1000BASE_LX),
-	I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL =
-					 BIT(I40E_PHY_TYPE_1000BASE_T_OPTICAL),
-	I40E_CAP_PHY_TYPE_20GBASE_KR2	  = BIT(I40E_PHY_TYPE_20GBASE_KR2)
-};
-
 struct i40e_phy_info {
 	struct i40e_link_status link_info;
 	struct i40e_link_status link_info_old;
 	bool get_link_info;
 	enum i40e_media_type media_type;
 	/* all the phy types the NVM is capable of */
-	enum i40e_aq_capabilities_phy_type phy_types;
-};
-
+	u64 phy_types;
+};
+
+#define I40E_CAP_PHY_TYPE_SGMII BIT_ULL(I40E_PHY_TYPE_SGMII)
+#define I40E_CAP_PHY_TYPE_1000BASE_KX BIT_ULL(I40E_PHY_TYPE_1000BASE_KX)
+#define I40E_CAP_PHY_TYPE_10GBASE_KX4 BIT_ULL(I40E_PHY_TYPE_10GBASE_KX4)
+#define I40E_CAP_PHY_TYPE_10GBASE_KR BIT_ULL(I40E_PHY_TYPE_10GBASE_KR)
+#define I40E_CAP_PHY_TYPE_40GBASE_KR4 BIT_ULL(I40E_PHY_TYPE_40GBASE_KR4)
+#define I40E_CAP_PHY_TYPE_XAUI BIT_ULL(I40E_PHY_TYPE_XAUI)
+#define I40E_CAP_PHY_TYPE_XFI BIT_ULL(I40E_PHY_TYPE_XFI)
+#define I40E_CAP_PHY_TYPE_SFI BIT_ULL(I40E_PHY_TYPE_SFI)
+#define I40E_CAP_PHY_TYPE_XLAUI BIT_ULL(I40E_PHY_TYPE_XLAUI)
+#define I40E_CAP_PHY_TYPE_XLPPI BIT_ULL(I40E_PHY_TYPE_XLPPI)
+#define I40E_CAP_PHY_TYPE_40GBASE_CR4_CU BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4_CU)
+#define I40E_CAP_PHY_TYPE_10GBASE_CR1_CU BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1_CU)
+#define I40E_CAP_PHY_TYPE_10GBASE_AOC BIT_ULL(I40E_PHY_TYPE_10GBASE_AOC)
+#define I40E_CAP_PHY_TYPE_40GBASE_AOC BIT_ULL(I40E_PHY_TYPE_40GBASE_AOC)
+#define I40E_CAP_PHY_TYPE_100BASE_TX BIT_ULL(I40E_PHY_TYPE_100BASE_TX)
+#define I40E_CAP_PHY_TYPE_1000BASE_T BIT_ULL(I40E_PHY_TYPE_1000BASE_T)
+#define I40E_CAP_PHY_TYPE_10GBASE_T BIT_ULL(I40E_PHY_TYPE_10GBASE_T)
+#define I40E_CAP_PHY_TYPE_10GBASE_SR BIT_ULL(I40E_PHY_TYPE_10GBASE_SR)
+#define I40E_CAP_PHY_TYPE_10GBASE_LR BIT_ULL(I40E_PHY_TYPE_10GBASE_LR)
+#define I40E_CAP_PHY_TYPE_10GBASE_SFPP_CU BIT_ULL(I40E_PHY_TYPE_10GBASE_SFPP_CU)
+#define I40E_CAP_PHY_TYPE_10GBASE_CR1 BIT_ULL(I40E_PHY_TYPE_10GBASE_CR1)
+#define I40E_CAP_PHY_TYPE_40GBASE_CR4 BIT_ULL(I40E_PHY_TYPE_40GBASE_CR4)
+#define I40E_CAP_PHY_TYPE_40GBASE_SR4 BIT_ULL(I40E_PHY_TYPE_40GBASE_SR4)
+#define I40E_CAP_PHY_TYPE_40GBASE_LR4 BIT_ULL(I40E_PHY_TYPE_40GBASE_LR4)
+#define I40E_CAP_PHY_TYPE_1000BASE_SX BIT_ULL(I40E_PHY_TYPE_1000BASE_SX)
+#define I40E_CAP_PHY_TYPE_1000BASE_LX BIT_ULL(I40E_PHY_TYPE_1000BASE_LX)
+#define I40E_CAP_PHY_TYPE_1000BASE_T_OPTICAL \
+				BIT_ULL(I40E_PHY_TYPE_1000BASE_T_OPTICAL)
+#define I40E_CAP_PHY_TYPE_20GBASE_KR2 BIT_ULL(I40E_PHY_TYPE_20GBASE_KR2)
+/* Defining the macro I40E_TYPE_OFFSET to implement a bit shift for some
+ * PHY types. There is an unused bit (31) in the I40E_CAP_PHY_TYPE_* bit
+ * fields but no corresponding gap in the i40e_aq_phy_type enumeration. So,
+ * a shift is needed to adjust for this with values larger than 31. The
+ * only affected values are I40E_PHY_TYPE_25GBASE_*.
+ */
+#define I40E_PHY_TYPE_OFFSET 1
+#define I40E_CAP_PHY_TYPE_25GBASE_KR BIT_ULL(I40E_PHY_TYPE_25GBASE_KR + \
+					     I40E_PHY_TYPE_OFFSET)
+#define I40E_CAP_PHY_TYPE_25GBASE_CR BIT_ULL(I40E_PHY_TYPE_25GBASE_CR + \
+					     I40E_PHY_TYPE_OFFSET)
+#define I40E_CAP_PHY_TYPE_25GBASE_SR BIT_ULL(I40E_PHY_TYPE_25GBASE_SR + \
+					     I40E_PHY_TYPE_OFFSET)
+#define I40E_CAP_PHY_TYPE_25GBASE_LR BIT_ULL(I40E_PHY_TYPE_25GBASE_LR + \
+					     I40E_PHY_TYPE_OFFSET)
 #define I40E_HW_CAP_MAX_GPIO			30
 /* Capabilities of a PF or a VF or the whole device */
 struct i40e_hw_capabilities {
@@ -237,6 +249,10 @@ struct i40e_hw_capabilities {
 #define I40E_NVM_IMAGE_TYPE_UDP_CLOUD	0x3
 
 	u32  management_mode;
+	u32  mng_protocols_over_mctp;
+#define I40E_MNG_PROTOCOL_PLDM		0x2
+#define I40E_MNG_PROTOCOL_OEM_COMMANDS	0x4
+#define I40E_MNG_PROTOCOL_NCSI		0x8
 	u32  npar_enable;
 	u32  os2bmc;
 	u32  valid_functions;
@@ -348,6 +364,7 @@ enum i40e_nvmupd_state {
 	I40E_NVMUPD_STATE_WRITING,
 	I40E_NVMUPD_STATE_INIT_WAIT,
 	I40E_NVMUPD_STATE_WRITE_WAIT,
+	I40E_NVMUPD_STATE_ERROR
 };
 
 /* nvm_access definition and its masks/shifts need to be accessible to
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h
index bd691ad86673..fc374f833aa9 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h
@@ -162,6 +162,10 @@ struct i40e_virtchnl_vsi_resource {
 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF		0X00080000
 #define I40E_VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM	0X00100000
 
+#define I40E_VF_BASE_MODE_OFFLOADS (I40E_VIRTCHNL_VF_OFFLOAD_L2 | \
+				    I40E_VIRTCHNL_VF_OFFLOAD_VLAN | \
+				    I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF)
+
 struct i40e_virtchnl_vf_resource {
 	u16 num_vsis;
 	u16 num_queue_pairs;
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h
index c5fd724313c7..fffe4cf2c20b 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf.h
+++ b/drivers/net/ethernet/intel/i40evf/i40evf.h
@@ -107,7 +107,8 @@ struct i40e_q_vector {
 	int v_idx;	/* vector index in list */
 	char name[IFNAMSIZ + 9];
 	bool arm_wb_state;
-	cpumask_var_t affinity_mask;
+	cpumask_t affinity_mask;
+	struct irq_affinity_notify affinity_notify;
 };
 
 /* Helper macros to switch between ints/sec and what the register uses.
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
index a9940154eead..272d600c1ed0 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
@@ -85,6 +85,14 @@ static int i40evf_get_settings(struct net_device *netdev,
 	case I40E_LINK_SPEED_40GB:
 		ethtool_cmd_speed_set(ecmd, SPEED_40000);
 		break;
+	case I40E_LINK_SPEED_25GB:
+#ifdef SPEED_25000
+		ethtool_cmd_speed_set(ecmd, SPEED_25000);
+#else
+		netdev_info(netdev,
+			    "Speed is 25G, display not supported by this version of ethtool.\n");
+#endif
+		break;
 	case I40E_LINK_SPEED_20GB:
 		ethtool_cmd_speed_set(ecmd, SPEED_20000);
 		break;
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index 14372810fc27..c0fc53361800 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -38,7 +38,7 @@ static const char i40evf_driver_string[] =
 
 #define DRV_VERSION_MAJOR 1
 #define DRV_VERSION_MINOR 6
-#define DRV_VERSION_BUILD 16
+#define DRV_VERSION_BUILD 25
 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
 	     __stringify(DRV_VERSION_MINOR) "." \
 	     __stringify(DRV_VERSION_BUILD) \
@@ -207,6 +207,9 @@ static void i40evf_misc_irq_disable(struct i40evf_adapter *adapter)
 {
 	struct i40e_hw *hw = &adapter->hw;
 
+	if (!adapter->msix_entries)
+		return;
+
 	wr32(hw, I40E_VFINT_DYN_CTL01, 0);
 
 	/* read flush */
@@ -496,6 +499,33 @@ static void i40evf_netpoll(struct net_device *netdev)
 
 #endif
 /**
+ * i40evf_irq_affinity_notify - Callback for affinity changes
+ * @notify: context as to what irq was changed
+ * @mask: the new affinity mask
+ *
+ * This is a callback function used by the irq_set_affinity_notifier function
+ * so that we may register to receive changes to the irq affinity masks.
+ **/
+static void i40evf_irq_affinity_notify(struct irq_affinity_notify *notify,
+				       const cpumask_t *mask)
+{
+	struct i40e_q_vector *q_vector =
+		container_of(notify, struct i40e_q_vector, affinity_notify);
+
+	q_vector->affinity_mask = *mask;
+}
+
+/**
+ * i40evf_irq_affinity_release - Callback for affinity notifier release
+ * @ref: internal core kernel usage
+ *
+ * This is a callback function used by the irq_set_affinity_notifier function
+ * to inform the current notification subscriber that they will no longer
+ * receive notifications.
+ **/
+static void i40evf_irq_affinity_release(struct kref *ref) {}
+
+/**
  * i40evf_request_traffic_irqs - Initialize MSI-X interrupts
  * @adapter: board private structure
  *
@@ -507,6 +537,7 @@ i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename)
 {
 	int vector, err, q_vectors;
 	int rx_int_idx = 0, tx_int_idx = 0;
+	int irq_num;
 
 	i40evf_irq_disable(adapter);
 	/* Decrement for Other and TCP Timer vectors */
@@ -514,6 +545,7 @@ i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename)
 
 	for (vector = 0; vector < q_vectors; vector++) {
 		struct i40e_q_vector *q_vector = &adapter->q_vectors[vector];
+		irq_num = adapter->msix_entries[vector + NONQ_VECS].vector;
 
 		if (q_vector->tx.ring && q_vector->rx.ring) {
 			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
@@ -532,21 +564,23 @@ i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename)
 			/* skip this unused q_vector */
 			continue;
 		}
-		err = request_irq(
-			adapter->msix_entries[vector + NONQ_VECS].vector,
-			i40evf_msix_clean_rings,
-			0,
-			q_vector->name,
-			q_vector);
+		err = request_irq(irq_num,
+				  i40evf_msix_clean_rings,
+				  0,
+				  q_vector->name,
+				  q_vector);
 		if (err) {
 			dev_info(&adapter->pdev->dev,
 				 "Request_irq failed, error: %d\n", err);
 			goto free_queue_irqs;
 		}
+		/* register for affinity change notifications */
+		q_vector->affinity_notify.notify = i40evf_irq_affinity_notify;
+		q_vector->affinity_notify.release =
+						   i40evf_irq_affinity_release;
+		irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify);
 		/* assign the mask for this irq */
-		irq_set_affinity_hint(
-			adapter->msix_entries[vector + NONQ_VECS].vector,
-			q_vector->affinity_mask);
+		irq_set_affinity_hint(irq_num, &q_vector->affinity_mask);
 	}
 
 	return 0;
@@ -554,11 +588,10 @@ i40evf_request_traffic_irqs(struct i40evf_adapter *adapter, char *basename)
 free_queue_irqs:
 	while (vector) {
 		vector--;
-		irq_set_affinity_hint(
-			adapter->msix_entries[vector + NONQ_VECS].vector,
-			NULL);
-		free_irq(adapter->msix_entries[vector + NONQ_VECS].vector,
-			 &adapter->q_vectors[vector]);
+		irq_num = adapter->msix_entries[vector + NONQ_VECS].vector;
+		irq_set_affinity_notifier(irq_num, NULL);
+		irq_set_affinity_hint(irq_num, NULL);
+		free_irq(irq_num, &adapter->q_vectors[vector]);
 	}
 	return err;
 }
@@ -599,16 +632,18 @@ static int i40evf_request_misc_irq(struct i40evf_adapter *adapter)
  **/
 static void i40evf_free_traffic_irqs(struct i40evf_adapter *adapter)
 {
-	int i;
-	int q_vectors;
+	int vector, irq_num, q_vectors;
+
+	if (!adapter->msix_entries)
+		return;
 
 	q_vectors = adapter->num_msix_vectors - NONQ_VECS;
 
-	for (i = 0; i < q_vectors; i++) {
-		irq_set_affinity_hint(adapter->msix_entries[i+1].vector,
-				      NULL);
-		free_irq(adapter->msix_entries[i+1].vector,
-			 &adapter->q_vectors[i]);
+	for (vector = 0; vector < q_vectors; vector++) {
+		irq_num = adapter->msix_entries[vector + NONQ_VECS].vector;
+		irq_set_affinity_notifier(irq_num, NULL);
+		irq_set_affinity_hint(irq_num, NULL);
+		free_irq(irq_num, &adapter->q_vectors[vector]);
 	}
 }
 
@@ -622,6 +657,9 @@ static void i40evf_free_misc_irq(struct i40evf_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
 
+	if (!adapter->msix_entries)
+		return;
+
 	free_irq(adapter->msix_entries[0].vector, netdev);
 }
 
@@ -1396,6 +1434,9 @@ static void i40evf_free_q_vectors(struct i40evf_adapter *adapter)
 	int q_idx, num_q_vectors;
 	int napi_vectors;
 
+	if (!adapter->q_vectors)
+		return;
+
 	num_q_vectors = adapter->num_msix_vectors - NONQ_VECS;
 	napi_vectors = adapter->num_active_queues;
 
@@ -1405,6 +1446,7 @@ static void i40evf_free_q_vectors(struct i40evf_adapter *adapter)
 			netif_napi_del(&q_vector->napi);
 	}
 	kfree(adapter->q_vectors);
+	adapter->q_vectors = NULL;
 }
 
 /**
@@ -1414,6 +1456,9 @@ static void i40evf_free_q_vectors(struct i40evf_adapter *adapter)
  **/
 void i40evf_reset_interrupt_capability(struct i40evf_adapter *adapter)
 {
+	if (!adapter->msix_entries)
+		return;
+
 	pci_disable_msix(adapter->pdev);
 	kfree(adapter->msix_entries);
 	adapter->msix_entries = NULL;
@@ -1664,6 +1709,49 @@ restart_watchdog:
 	schedule_work(&adapter->adminq_task);
 }
 
+static void i40evf_disable_vf(struct i40evf_adapter *adapter)
+{
+	struct i40evf_mac_filter *f, *ftmp;
+	struct i40evf_vlan_filter *fv, *fvtmp;
+
+	adapter->flags |= I40EVF_FLAG_PF_COMMS_FAILED;
+
+	if (netif_running(adapter->netdev)) {
+		set_bit(__I40E_DOWN, &adapter->vsi.state);
+		netif_carrier_off(adapter->netdev);
+		netif_tx_disable(adapter->netdev);
+		adapter->link_up = false;
+		i40evf_napi_disable_all(adapter);
+		i40evf_irq_disable(adapter);
+		i40evf_free_traffic_irqs(adapter);
+		i40evf_free_all_tx_resources(adapter);
+		i40evf_free_all_rx_resources(adapter);
+	}
+
+	/* Delete all of the filters, both MAC and VLAN. */
+	list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list, list) {
+		list_del(&f->list);
+		kfree(f);
+	}
+
+	list_for_each_entry_safe(fv, fvtmp, &adapter->vlan_filter_list, list) {
+		list_del(&fv->list);
+		kfree(fv);
+	}
+
+	i40evf_free_misc_irq(adapter);
+	i40evf_reset_interrupt_capability(adapter);
+	i40evf_free_queues(adapter);
+	i40evf_free_q_vectors(adapter);
+	kfree(adapter->vf_res);
+	i40evf_shutdown_adminq(&adapter->hw);
+	adapter->netdev->flags &= ~IFF_UP;
+	clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
+	adapter->flags &= ~I40EVF_FLAG_RESET_PENDING;
+	adapter->state = __I40EVF_DOWN;
+	dev_info(&adapter->pdev->dev, "Reset task did not complete, VF disabled\n");
+}
+
 #define I40EVF_RESET_WAIT_MS 10
 #define I40EVF_RESET_WAIT_COUNT 500
 /**
@@ -1717,60 +1805,21 @@ static void i40evf_reset_task(struct work_struct *work)
 
 	/* wait until the reset is complete and the PF is responding to us */
 	for (i = 0; i < I40EVF_RESET_WAIT_COUNT; i++) {
+		/* sleep first to make sure a minimum wait time is met */
+		msleep(I40EVF_RESET_WAIT_MS);
+
 		reg_val = rd32(hw, I40E_VFGEN_RSTAT) &
 			  I40E_VFGEN_RSTAT_VFR_STATE_MASK;
 		if (reg_val == I40E_VFR_VFACTIVE)
 			break;
-		msleep(I40EVF_RESET_WAIT_MS);
 	}
+
 	pci_set_master(adapter->pdev);
-	/* extra wait to make sure minimum wait is met */
-	msleep(I40EVF_RESET_WAIT_MS);
-	if (i == I40EVF_RESET_WAIT_COUNT) {
-		struct i40evf_mac_filter *ftmp;
-		struct i40evf_vlan_filter *fv, *fvtmp;
 
-		/* reset never finished */
+	if (i == I40EVF_RESET_WAIT_COUNT) {
 		dev_err(&adapter->pdev->dev, "Reset never finished (%x)\n",
 			reg_val);
-		adapter->flags |= I40EVF_FLAG_PF_COMMS_FAILED;
-
-		if (netif_running(adapter->netdev)) {
-			set_bit(__I40E_DOWN, &adapter->vsi.state);
-			netif_carrier_off(netdev);
-			netif_tx_disable(netdev);
-			adapter->link_up = false;
-			i40evf_napi_disable_all(adapter);
-			i40evf_irq_disable(adapter);
-			i40evf_free_traffic_irqs(adapter);
-			i40evf_free_all_tx_resources(adapter);
-			i40evf_free_all_rx_resources(adapter);
-		}
-
-		/* Delete all of the filters, both MAC and VLAN. */
-		list_for_each_entry_safe(f, ftmp, &adapter->mac_filter_list,
-					 list) {
-			list_del(&f->list);
-			kfree(f);
-		}
-
-		list_for_each_entry_safe(fv, fvtmp, &adapter->vlan_filter_list,
-					 list) {
-			list_del(&fv->list);
-			kfree(fv);
-		}
-
-		i40evf_free_misc_irq(adapter);
-		i40evf_reset_interrupt_capability(adapter);
-		i40evf_free_queues(adapter);
-		i40evf_free_q_vectors(adapter);
-		kfree(adapter->vf_res);
-		i40evf_shutdown_adminq(hw);
-		adapter->netdev->flags &= ~IFF_UP;
-		clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section);
-		adapter->flags &= ~I40EVF_FLAG_RESET_PENDING;
-		adapter->state = __I40EVF_DOWN;
-		dev_info(&adapter->pdev->dev, "Reset task did not complete, VF disabled\n");
+		i40evf_disable_vf(adapter);
 		return; /* Do not attempt to reinit. It's dead, Jim. */
 	}
 
@@ -2133,10 +2182,6 @@ static struct net_device_stats *i40evf_get_stats(struct net_device *netdev)
 static int i40evf_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	struct i40evf_adapter *adapter = netdev_priv(netdev);
-	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
-
-	if ((new_mtu < 68) || (max_frame > I40E_MAX_RXBUFFER))
-		return -EINVAL;
 
 	netdev->mtu = new_mtu;
 	adapter->flags |= I40EVF_FLAG_RESET_NEEDED;
@@ -2145,6 +2190,64 @@ static int i40evf_change_mtu(struct net_device *netdev, int new_mtu)
 	return 0;
 }
 
+/**
+ * i40evf_features_check - Validate encapsulated packet conforms to limits
+ * @skb: skb buff
+ * @netdev: This physical port's netdev
+ * @features: Offload features that the stack believes apply
+ **/
+static netdev_features_t i40evf_features_check(struct sk_buff *skb,
+					       struct net_device *dev,
+					       netdev_features_t features)
+{
+	size_t len;
+
+	/* No point in doing any of this if neither checksum nor GSO are
+	 * being requested for this frame.  We can rule out both by just
+	 * checking for CHECKSUM_PARTIAL
+	 */
+	if (skb->ip_summed != CHECKSUM_PARTIAL)
+		return features;
+
+	/* We cannot support GSO if the MSS is going to be less than
+	 * 64 bytes.  If it is then we need to drop support for GSO.
+	 */
+	if (skb_is_gso(skb) && (skb_shinfo(skb)->gso_size < 64))
+		features &= ~NETIF_F_GSO_MASK;
+
+	/* MACLEN can support at most 63 words */
+	len = skb_network_header(skb) - skb->data;
+	if (len & ~(63 * 2))
+		goto out_err;
+
+	/* IPLEN and EIPLEN can support at most 127 dwords */
+	len = skb_transport_header(skb) - skb_network_header(skb);
+	if (len & ~(127 * 4))
+		goto out_err;
+
+	if (skb->encapsulation) {
+		/* L4TUNLEN can support 127 words */
+		len = skb_inner_network_header(skb) - skb_transport_header(skb);
+		if (len & ~(127 * 2))
+			goto out_err;
+
+		/* IPLEN can support at most 127 dwords */
+		len = skb_inner_transport_header(skb) -
+		      skb_inner_network_header(skb);
+		if (len & ~(127 * 4))
+			goto out_err;
+	}
+
+	/* No need to validate L4LEN as TCP is the only protocol with a
+	 * a flexible value and we support all possible values supported
+	 * by TCP, which is at most 15 dwords
+	 */
+
+	return features;
+out_err:
+	return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
+}
+
 #define I40EVF_VLAN_FEATURES (NETIF_F_HW_VLAN_CTAG_TX |\
 			      NETIF_F_HW_VLAN_CTAG_RX |\
 			      NETIF_F_HW_VLAN_CTAG_FILTER)
@@ -2179,6 +2282,7 @@ static const struct net_device_ops i40evf_netdev_ops = {
 	.ndo_tx_timeout		= i40evf_tx_timeout,
 	.ndo_vlan_rx_add_vid	= i40evf_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid	= i40evf_vlan_rx_kill_vid,
+	.ndo_features_check	= i40evf_features_check,
 	.ndo_fix_features	= i40evf_fix_features,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= i40evf_netpoll,
@@ -2424,6 +2528,10 @@ static void i40evf_init_task(struct work_struct *work)
 	i40evf_set_ethtool_ops(netdev);
 	netdev->watchdog_timeo = 5 * HZ;
 
+	/* MTU range: 68 - 9710 */
+	netdev->min_mtu = ETH_MIN_MTU;
+	netdev->max_mtu = I40E_MAX_RXBUFFER - (ETH_HLEN + ETH_FCS_LEN);
+
 	if (!is_valid_ether_addr(adapter->hw.mac.addr)) {
 		dev_info(&pdev->dev, "Invalid MAC address %pM, using random\n",
 			 adapter->hw.mac.addr);
@@ -2764,12 +2872,10 @@ static void i40evf_remove(struct pci_dev *pdev)
 		msleep(50);
 	}
 
-	if (adapter->msix_entries) {
-		i40evf_misc_irq_disable(adapter);
-		i40evf_free_misc_irq(adapter);
-		i40evf_reset_interrupt_capability(adapter);
-		i40evf_free_q_vectors(adapter);
-	}
+	i40evf_misc_irq_disable(adapter);
+	i40evf_free_misc_irq(adapter);
+	i40evf_reset_interrupt_capability(adapter);
+	i40evf_free_q_vectors(adapter);
 
 	if (adapter->watchdog_timer.function)
 		del_timer_sync(&adapter->watchdog_timer);
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
index ddf478d6322b..2059a8e88908 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c
@@ -836,6 +836,9 @@ static void i40evf_print_link_message(struct i40evf_adapter *adapter)
 	case I40E_LINK_SPEED_40GB:
 		speed = "40 G";
 		break;
+	case I40E_LINK_SPEED_25GB:
+		speed = "25 G";
+		break;
 	case I40E_LINK_SPEED_20GB:
 		speed = "20 G";
 		break;
diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h
index 2688180a7acd..8aee314332a8 100644
--- a/drivers/net/ethernet/intel/igb/e1000_defines.h
+++ b/drivers/net/ethernet/intel/igb/e1000_defines.h
@@ -357,7 +357,8 @@
 #define ETHERNET_IEEE_VLAN_TYPE 0x8100  /* 802.3ac packet */
 
 /* As per the EAS the maximum supported size is 9.5KB (9728 bytes) */
-#define MAX_JUMBO_FRAME_SIZE	0x2600
+#define MAX_JUMBO_FRAME_SIZE		0x2600
+#define MAX_STD_JUMBO_FRAME_SIZE	9216
 
 /* PBA constants */
 #define E1000_PBA_34K 0x0022
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index edc9a6ac5169..cae24a8ccf47 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -2468,6 +2468,10 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	netdev->priv_flags |= IFF_UNICAST_FLT;
 
+	/* MTU range: 68 - 9216 */
+	netdev->min_mtu = ETH_MIN_MTU;
+	netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE;
+
 	adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
 
 	/* before reading the NVM, reset the controller to put the device in a
@@ -4931,11 +4935,15 @@ static int igb_tso(struct igb_ring *tx_ring,
 
 	/* initialize outer IP header fields */
 	if (ip.v4->version == 4) {
+		unsigned char *csum_start = skb_checksum_start(skb);
+		unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4);
+
 		/* IP header will have to cancel out any data that
 		 * is not a part of the outer IP header
 		 */
-		ip.v4->check = csum_fold(csum_add(lco_csum(skb),
-						  csum_unfold(l4.tcp->check)));
+		ip.v4->check = csum_fold(csum_partial(trans_start,
+						      csum_start - trans_start,
+						      0));
 		type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
 
 		ip.v4->tot_len = 0;
@@ -5408,17 +5416,6 @@ static int igb_change_mtu(struct net_device *netdev, int new_mtu)
 	struct pci_dev *pdev = adapter->pdev;
 	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
 
-	if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
-		dev_err(&pdev->dev, "Invalid MTU setting\n");
-		return -EINVAL;
-	}
-
-#define MAX_STD_JUMBO_FRAME_SIZE 9238
-	if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
-		dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
-		return -EINVAL;
-	}
-
 	/* adjust max frame to be at least the size of a standard frame */
 	if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN))
 		max_frame = ETH_FRAME_LEN + ETH_FCS_LEN;
diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c
index a7895c4cbcc3..c30eea8399a7 100644
--- a/drivers/net/ethernet/intel/igb/igb_ptp.c
+++ b/drivers/net/ethernet/intel/igb/igb_ptp.c
@@ -226,7 +226,7 @@ static int igb_ptp_adjfreq_82576(struct ptp_clock_info *ptp, s32 ppb)
 	return 0;
 }
 
-static int igb_ptp_adjfreq_82580(struct ptp_clock_info *ptp, s32 ppb)
+static int igb_ptp_adjfine_82580(struct ptp_clock_info *ptp, long scaled_ppm)
 {
 	struct igb_adapter *igb = container_of(ptp, struct igb_adapter,
 					       ptp_caps);
@@ -235,13 +235,13 @@ static int igb_ptp_adjfreq_82580(struct ptp_clock_info *ptp, s32 ppb)
 	u64 rate;
 	u32 inca;
 
-	if (ppb < 0) {
+	if (scaled_ppm < 0) {
 		neg_adj = 1;
-		ppb = -ppb;
+		scaled_ppm = -scaled_ppm;
 	}
-	rate = ppb;
-	rate <<= 26;
-	rate = div_u64(rate, 1953125);
+	rate = scaled_ppm;
+	rate <<= 13;
+	rate = div_u64(rate, 15625);
 
 	inca = rate & INCVALUE_MASK;
 	if (neg_adj)
@@ -1103,7 +1103,7 @@ void igb_ptp_init(struct igb_adapter *adapter)
 		adapter->ptp_caps.max_adj = 62499999;
 		adapter->ptp_caps.n_ext_ts = 0;
 		adapter->ptp_caps.pps = 0;
-		adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580;
+		adapter->ptp_caps.adjfine = igb_ptp_adjfine_82580;
 		adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576;
 		adapter->ptp_caps.gettime64 = igb_ptp_gettime_82576;
 		adapter->ptp_caps.settime64 = igb_ptp_settime_82576;
@@ -1131,7 +1131,7 @@ void igb_ptp_init(struct igb_adapter *adapter)
 		adapter->ptp_caps.n_pins = IGB_N_SDP;
 		adapter->ptp_caps.pps = 1;
 		adapter->ptp_caps.pin_config = adapter->sdp_config;
-		adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580;
+		adapter->ptp_caps.adjfine = igb_ptp_adjfine_82580;
 		adapter->ptp_caps.adjtime = igb_ptp_adjtime_i210;
 		adapter->ptp_caps.gettime64 = igb_ptp_gettime_i210;
 		adapter->ptp_caps.settime64 = igb_ptp_settime_i210;
diff --git a/drivers/net/ethernet/intel/igbvf/defines.h b/drivers/net/ethernet/intel/igbvf/defines.h
index ee1ef08d7fc4..f1789d192e24 100644
--- a/drivers/net/ethernet/intel/igbvf/defines.h
+++ b/drivers/net/ethernet/intel/igbvf/defines.h
@@ -85,7 +85,8 @@
 #define E1000_TXD_CMD_DEXT	0x20000000 /* Desc extension (0 = legacy) */
 #define E1000_TXD_STAT_DD	0x00000001 /* Desc Done */
 
-#define MAX_JUMBO_FRAME_SIZE	0x3F00
+#define MAX_JUMBO_FRAME_SIZE		0x3F00
+#define MAX_STD_JUMBO_FRAME_SIZE	9216
 
 /* 802.1q VLAN Packet Size */
 #define VLAN_TAG_SIZE		4    /* 802.3ac tag (not DMA'd) */
diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
index 12bb877df860..839ba110f7fb 100644
--- a/drivers/net/ethernet/intel/igbvf/netdev.c
+++ b/drivers/net/ethernet/intel/igbvf/netdev.c
@@ -1965,11 +1965,15 @@ static int igbvf_tso(struct igbvf_ring *tx_ring,
 
 	/* initialize outer IP header fields */
 	if (ip.v4->version == 4) {
+		unsigned char *csum_start = skb_checksum_start(skb);
+		unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4);
+
 		/* IP header will have to cancel out any data that
 		 * is not a part of the outer IP header
 		 */
-		ip.v4->check = csum_fold(csum_add(lco_csum(skb),
-						  csum_unfold(l4.tcp->check)));
+		ip.v4->check = csum_fold(csum_partial(trans_start,
+						      csum_start - trans_start,
+						      0));
 		type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
 
 		ip.v4->tot_len = 0;
@@ -2356,16 +2360,6 @@ static int igbvf_change_mtu(struct net_device *netdev, int new_mtu)
 	struct igbvf_adapter *adapter = netdev_priv(netdev);
 	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
 
-	if (new_mtu < 68 || new_mtu > INT_MAX - ETH_HLEN - ETH_FCS_LEN ||
-	    max_frame > MAX_JUMBO_FRAME_SIZE)
-		return -EINVAL;
-
-#define MAX_STD_JUMBO_FRAME_SIZE 9234
-	if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
-		dev_err(&adapter->pdev->dev, "MTU > 9216 not supported.\n");
-		return -EINVAL;
-	}
-
 	while (test_and_set_bit(__IGBVF_RESETTING, &adapter->state))
 		usleep_range(1000, 2000);
 	/* igbvf_down has a dependency on max_frame_size */
@@ -2786,6 +2780,10 @@ static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 			    NETIF_F_HW_VLAN_CTAG_RX |
 			    NETIF_F_HW_VLAN_CTAG_TX;
 
+	/* MTU range: 68 - 9216 */
+	netdev->min_mtu = ETH_MIN_MTU;
+	netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE;
+
 	/*reset the controller to put the device in a known good state */
 	err = hw->mac.ops.reset_hw(hw);
 	if (err) {
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
index 31f91459312f..5826b1ddedcf 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
@@ -487,6 +487,10 @@ ixgb_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		netdev->vlan_features |= NETIF_F_HIGHDMA;
 	}
 
+	/* MTU range: 68 - 16114 */
+	netdev->min_mtu = ETH_MIN_MTU;
+	netdev->max_mtu = IXGB_MAX_JUMBO_FRAME_SIZE - ETH_HLEN;
+
 	/* make sure the EEPROM is good */
 
 	if (!ixgb_validate_eeprom_checksum(&adapter->hw)) {
@@ -1619,18 +1623,6 @@ ixgb_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	struct ixgb_adapter *adapter = netdev_priv(netdev);
 	int max_frame = new_mtu + ENET_HEADER_SIZE + ENET_FCS_LENGTH;
-	int old_max_frame = netdev->mtu + ENET_HEADER_SIZE + ENET_FCS_LENGTH;
-
-	/* MTU < 68 is an error for IPv4 traffic, just don't allow it */
-	if ((new_mtu < 68) ||
-	    (max_frame > IXGB_MAX_JUMBO_FRAME_SIZE + ENET_FCS_LENGTH)) {
-		netif_err(adapter, probe, adapter->netdev,
-			  "Invalid MTU setting %d\n", new_mtu);
-		return -EINVAL;
-	}
-
-	if (old_max_frame == max_frame)
-		return 0;
 
 	if (netif_running(netdev))
 		ixgb_down(adapter, true);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index b06e32d0d22a..ef81c3d8c295 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -1027,4 +1027,6 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
 				  struct ixgbe_ring *tx_ring);
 u32 ixgbe_rss_indir_tbl_entries(struct ixgbe_adapter *adapter);
 void ixgbe_store_reta(struct ixgbe_adapter *adapter);
+s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg,
+		       u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm);
 #endif /* _IXGBE_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
index fb51be74dd4c..805ab319e578 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82598.c
@@ -367,7 +367,7 @@ static s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw)
 	}
 
 	/* Negotiate the fc mode to use */
-	ixgbe_fc_autoneg(hw);
+	hw->mac.ops.fc_autoneg(hw);
 
 	/* Disable any previous flow control settings */
 	fctrl_reg = IXGBE_READ_REG(hw, IXGBE_FCTRL);
@@ -1179,6 +1179,7 @@ static const struct ixgbe_mac_operations mac_ops_82598 = {
 	.get_link_capabilities	= &ixgbe_get_link_capabilities_82598,
 	.led_on			= &ixgbe_led_on_generic,
 	.led_off		= &ixgbe_led_off_generic,
+	.init_led_link_act	= ixgbe_init_led_link_act_generic,
 	.blink_led_start	= &ixgbe_blink_led_start_generic,
 	.blink_led_stop		= &ixgbe_blink_led_stop_generic,
 	.set_rar		= &ixgbe_set_rar_generic,
@@ -1193,6 +1194,7 @@ static const struct ixgbe_mac_operations mac_ops_82598 = {
 	.set_vfta		= &ixgbe_set_vfta_82598,
 	.fc_enable		= &ixgbe_fc_enable_82598,
 	.setup_fc		= ixgbe_setup_fc_generic,
+	.fc_autoneg		= ixgbe_fc_autoneg,
 	.set_fw_drv_ver         = NULL,
 	.acquire_swfw_sync      = &ixgbe_acquire_swfw_sync,
 	.release_swfw_sync      = &ixgbe_release_swfw_sync,
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
index 63b25006ac90..e00aaeb91827 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_82599.c
@@ -2204,6 +2204,7 @@ static const struct ixgbe_mac_operations mac_ops_82599 = {
 	.get_link_capabilities  = &ixgbe_get_link_capabilities_82599,
 	.led_on                 = &ixgbe_led_on_generic,
 	.led_off                = &ixgbe_led_off_generic,
+	.init_led_link_act	= ixgbe_init_led_link_act_generic,
 	.blink_led_start        = &ixgbe_blink_led_start_generic,
 	.blink_led_stop         = &ixgbe_blink_led_stop_generic,
 	.set_rar                = &ixgbe_set_rar_generic,
@@ -2219,6 +2220,7 @@ static const struct ixgbe_mac_operations mac_ops_82599 = {
 	.set_vfta               = &ixgbe_set_vfta_generic,
 	.fc_enable              = &ixgbe_fc_enable_generic,
 	.setup_fc		= ixgbe_setup_fc_generic,
+	.fc_autoneg		= ixgbe_fc_autoneg,
 	.set_fw_drv_ver         = &ixgbe_set_fw_drv_ver_generic,
 	.init_uta_tables        = &ixgbe_init_uta_tables_generic,
 	.setup_sfp              = &ixgbe_setup_sfp_modules_82599,
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 77d3039283f6..8832df3eba25 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -298,10 +298,12 @@ s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw)
 	IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext);
 	IXGBE_WRITE_FLUSH(hw);
 
-	/* Setup flow control */
-	ret_val = hw->mac.ops.setup_fc(hw);
-	if (ret_val)
-		return ret_val;
+	/* Setup flow control if method for doing so */
+	if (hw->mac.ops.setup_fc) {
+		ret_val = hw->mac.ops.setup_fc(hw);
+		if (ret_val)
+			return ret_val;
+	}
 
 	/* Cashe bit indicating need for crosstalk fix */
 	switch (hw->mac.type) {
@@ -390,6 +392,9 @@ s32 ixgbe_init_hw_generic(struct ixgbe_hw *hw)
 		status = hw->mac.ops.start_hw(hw);
 	}
 
+	/* Initialize the LED link active for LED blink support */
+	hw->mac.ops.init_led_link_act(hw);
+
 	return status;
 }
 
@@ -773,6 +778,49 @@ s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw)
 }
 
 /**
+ *  ixgbe_init_led_link_act_generic - Store the LED index link/activity.
+ *  @hw: pointer to hardware structure
+ *
+ *  Store the index for the link active LED. This will be used to support
+ *  blinking the LED.
+ **/
+s32 ixgbe_init_led_link_act_generic(struct ixgbe_hw *hw)
+{
+	struct ixgbe_mac_info *mac = &hw->mac;
+	u32 led_reg, led_mode;
+	u16 i;
+
+	led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL);
+
+	/* Get LED link active from the LEDCTL register */
+	for (i = 0; i < 4; i++) {
+		led_mode = led_reg >> IXGBE_LED_MODE_SHIFT(i);
+
+		if ((led_mode & IXGBE_LED_MODE_MASK_BASE) ==
+		    IXGBE_LED_LINK_ACTIVE) {
+			mac->led_link_act = i;
+			return 0;
+		}
+	}
+
+	/* If LEDCTL register does not have the LED link active set, then use
+	 * known MAC defaults.
+	 */
+	switch (hw->mac.type) {
+	case ixgbe_mac_x550em_a:
+		mac->led_link_act = 0;
+		break;
+	case ixgbe_mac_X550EM_x:
+		mac->led_link_act = 1;
+		break;
+	default:
+		mac->led_link_act = 2;
+	}
+
+	return 0;
+}
+
+/**
  *  ixgbe_led_on_generic - Turns on the software controllable LEDs.
  *  @hw: pointer to hardware structure
  *  @index: led number to turn on
@@ -2127,7 +2175,7 @@ s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw)
 	}
 
 	/* Negotiate the fc mode to use */
-	ixgbe_fc_autoneg(hw);
+	hw->mac.ops.fc_autoneg(hw);
 
 	/* Disable any previous flow control settings */
 	mflcn_reg = IXGBE_READ_REG(hw, IXGBE_MFLCN);
@@ -2231,8 +2279,8 @@ s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw)
  *  Find the intersection between advertised settings and link partner's
  *  advertised settings
  **/
-static s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg,
-			      u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm)
+s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg,
+		       u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm)
 {
 	if ((!(adv_reg)) ||  (!(lp_reg)))
 		return IXGBE_ERR_FC_NOT_NEGOTIATED;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
index 6d4c260d0cbd..5b3e3c65927e 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.h
@@ -49,6 +49,7 @@ s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw);
 
 s32 ixgbe_led_on_generic(struct ixgbe_hw *hw, u32 index);
 s32 ixgbe_led_off_generic(struct ixgbe_hw *hw, u32 index);
+s32 ixgbe_init_led_link_act_generic(struct ixgbe_hw *hw);
 
 s32 ixgbe_init_eeprom_params_generic(struct ixgbe_hw *hw);
 s32 ixgbe_write_eeprom_generic(struct ixgbe_hw *hw, u16 offset, u16 data);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index f49f80380aa5..fd192bf29b26 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -2225,11 +2225,11 @@ static int ixgbe_set_phys_id(struct net_device *netdev,
 		return 2;
 
 	case ETHTOOL_ID_ON:
-		hw->mac.ops.led_on(hw, hw->bus.func);
+		hw->mac.ops.led_on(hw, hw->mac.led_link_act);
 		break;
 
 	case ETHTOOL_ID_OFF:
-		hw->mac.ops.led_off(hw, hw->bus.func);
+		hw->mac.ops.led_off(hw, hw->mac.led_link_act);
 		break;
 
 	case ETHTOOL_ID_INACTIVE:
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index bd93d823cc25..1e2f39ebd824 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -54,6 +54,7 @@
 #include <net/pkt_cls.h>
 #include <net/tc_act/tc_gact.h>
 #include <net/tc_act/tc_mirred.h>
+#include <net/vxlan.h>
 
 #include "ixgbe.h"
 #include "ixgbe_common.h"
@@ -3070,6 +3071,9 @@ static void ixgbe_free_irq(struct ixgbe_adapter *adapter)
 		return;
 	}
 
+	if (!adapter->msix_entries)
+		return;
+
 	for (vector = 0; vector < adapter->num_q_vectors; vector++) {
 		struct ixgbe_q_vector *q_vector = adapter->q_vector[vector];
 		struct msix_entry *entry = &adapter->msix_entries[vector];
@@ -5012,24 +5016,23 @@ fwd_queue_err:
 	return err;
 }
 
-static void ixgbe_configure_dfwd(struct ixgbe_adapter *adapter)
+static int ixgbe_upper_dev_walk(struct net_device *upper, void *data)
 {
-	struct net_device *upper;
-	struct list_head *iter;
-	int err;
+	if (netif_is_macvlan(upper)) {
+		struct macvlan_dev *dfwd = netdev_priv(upper);
+		struct ixgbe_fwd_adapter *vadapter = dfwd->fwd_priv;
 
-	netdev_for_each_all_upper_dev_rcu(adapter->netdev, upper, iter) {
-		if (netif_is_macvlan(upper)) {
-			struct macvlan_dev *dfwd = netdev_priv(upper);
-			struct ixgbe_fwd_adapter *vadapter = dfwd->fwd_priv;
-
-			if (dfwd->fwd_priv) {
-				err = ixgbe_fwd_ring_up(upper, vadapter);
-				if (err)
-					continue;
-			}
-		}
+		if (dfwd->fwd_priv)
+			ixgbe_fwd_ring_up(upper, vadapter);
 	}
+
+	return 0;
+}
+
+static void ixgbe_configure_dfwd(struct ixgbe_adapter *adapter)
+{
+	netdev_walk_all_upper_dev_rcu(adapter->netdev,
+				      ixgbe_upper_dev_walk, NULL);
 }
 
 static void ixgbe_configure(struct ixgbe_adapter *adapter)
@@ -5448,12 +5451,25 @@ static void ixgbe_fdir_filter_exit(struct ixgbe_adapter *adapter)
 	spin_unlock(&adapter->fdir_perfect_lock);
 }
 
+static int ixgbe_disable_macvlan(struct net_device *upper, void *data)
+{
+	if (netif_is_macvlan(upper)) {
+		struct macvlan_dev *vlan = netdev_priv(upper);
+
+		if (vlan->fwd_priv) {
+			netif_tx_stop_all_queues(upper);
+			netif_carrier_off(upper);
+			netif_tx_disable(upper);
+		}
+	}
+
+	return 0;
+}
+
 void ixgbe_down(struct ixgbe_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
 	struct ixgbe_hw *hw = &adapter->hw;
-	struct net_device *upper;
-	struct list_head *iter;
 	int i;
 
 	/* signal that we are down to the interrupt handler */
@@ -5477,17 +5493,8 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
 	netif_tx_disable(netdev);
 
 	/* disable any upper devices */
-	netdev_for_each_all_upper_dev_rcu(adapter->netdev, upper, iter) {
-		if (netif_is_macvlan(upper)) {
-			struct macvlan_dev *vlan = netdev_priv(upper);
-
-			if (vlan->fwd_priv) {
-				netif_tx_stop_all_queues(upper);
-				netif_carrier_off(upper);
-				netif_tx_disable(upper);
-			}
-		}
-	}
+	netdev_walk_all_upper_dev_rcu(adapter->netdev,
+				      ixgbe_disable_macvlan, NULL);
 
 	ixgbe_irq_disable(adapter);
 
@@ -5618,7 +5625,8 @@ static void ixgbe_init_dcb(struct ixgbe_adapter *adapter)
  * Fields are initialized based on PCI device information and
  * OS network device settings (MTU size).
  **/
-static int ixgbe_sw_init(struct ixgbe_adapter *adapter)
+static int ixgbe_sw_init(struct ixgbe_adapter *adapter,
+			 const struct ixgbe_info *ii)
 {
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct pci_dev *pdev = adapter->pdev;
@@ -5634,6 +5642,9 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter)
 	hw->subsystem_vendor_id = pdev->subsystem_vendor;
 	hw->subsystem_device_id = pdev->subsystem_device;
 
+	/* get_invariants needs the device IDs */
+	ii->get_invariants(hw);
+
 	/* Set common capability flags and settings */
 	rss = min_t(int, ixgbe_max_rss_indices(adapter), num_online_cpus());
 	adapter->ring_feature[RING_F_RSS].limit = rss;
@@ -6049,11 +6060,6 @@ static void ixgbe_free_all_rx_resources(struct ixgbe_adapter *adapter)
 static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
-	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
-
-	/* MTU < 68 is an error and causes problems on some kernels */
-	if ((new_mtu < 68) || (max_frame > IXGBE_MAX_JUMBO_FRAME_SIZE))
-		return -EINVAL;
 
 	/*
 	 * For 82599EB we cannot allow legacy VFs to enable their receive
@@ -6062,7 +6068,7 @@ static int ixgbe_change_mtu(struct net_device *netdev, int new_mtu)
 	 */
 	if ((adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) &&
 	    (adapter->hw.mac.type == ixgbe_mac_82599EB) &&
-	    (max_frame > (ETH_FRAME_LEN + ETH_FCS_LEN)))
+	    (new_mtu > ETH_DATA_LEN))
 		e_warn(probe, "Setting MTU > 1500 will disable legacy VFs\n");
 
 	e_info(probe, "changing MTU from %d to %d\n", netdev->mtu, new_mtu);
@@ -6728,6 +6734,18 @@ static void ixgbe_update_default_up(struct ixgbe_adapter *adapter)
 #endif
 }
 
+static int ixgbe_enable_macvlan(struct net_device *upper, void *data)
+{
+	if (netif_is_macvlan(upper)) {
+		struct macvlan_dev *vlan = netdev_priv(upper);
+
+		if (vlan->fwd_priv)
+			netif_tx_wake_all_queues(upper);
+	}
+
+	return 0;
+}
+
 /**
  * ixgbe_watchdog_link_is_up - update netif_carrier status and
  *                             print link up message
@@ -6737,8 +6755,6 @@ static void ixgbe_watchdog_link_is_up(struct ixgbe_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
 	struct ixgbe_hw *hw = &adapter->hw;
-	struct net_device *upper;
-	struct list_head *iter;
 	u32 link_speed = adapter->link_speed;
 	const char *speed_str;
 	bool flow_rx, flow_tx;
@@ -6809,14 +6825,8 @@ static void ixgbe_watchdog_link_is_up(struct ixgbe_adapter *adapter)
 
 	/* enable any upper devices */
 	rtnl_lock();
-	netdev_for_each_all_upper_dev_rcu(adapter->netdev, upper, iter) {
-		if (netif_is_macvlan(upper)) {
-			struct macvlan_dev *vlan = netdev_priv(upper);
-
-			if (vlan->fwd_priv)
-				netif_tx_wake_all_queues(upper);
-		}
-	}
+	netdev_walk_all_upper_dev_rcu(adapter->netdev,
+				      ixgbe_enable_macvlan, NULL);
 	rtnl_unlock();
 
 	/* update the default user priority for VFs */
@@ -7277,11 +7287,15 @@ static int ixgbe_tso(struct ixgbe_ring *tx_ring,
 
 	/* initialize outer IP header fields */
 	if (ip.v4->version == 4) {
+		unsigned char *csum_start = skb_checksum_start(skb);
+		unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4);
+
 		/* IP header will have to cancel out any data that
 		 * is not a part of the outer IP header
 		 */
-		ip.v4->check = csum_fold(csum_add(lco_csum(skb),
-						  csum_unfold(l4.tcp->check)));
+		ip.v4->check = csum_fold(csum_partial(trans_start,
+						      csum_start - trans_start,
+						      0));
 		type_tucmd |= IXGBE_ADVTXD_TUCMD_IPV4;
 
 		ip.v4->tot_len = 0;
@@ -7651,11 +7665,17 @@ static void ixgbe_atr(struct ixgbe_ring *ring,
 	/* snag network header to get L4 type and address */
 	skb = first->skb;
 	hdr.network = skb_network_header(skb);
+	if (unlikely(hdr.network <= skb->data))
+		return;
 	if (skb->encapsulation &&
 	    first->protocol == htons(ETH_P_IP) &&
-	    hdr.ipv4->protocol != IPPROTO_UDP) {
+	    hdr.ipv4->protocol == IPPROTO_UDP) {
 		struct ixgbe_adapter *adapter = q_vector->adapter;
 
+		if (unlikely(skb_tail_pointer(skb) < hdr.network +
+			     VXLAN_HEADROOM))
+			return;
+
 		/* verify the port is recognized as VXLAN */
 		if (adapter->vxlan_port &&
 		    udp_hdr(skb)->dest == adapter->vxlan_port)
@@ -7666,6 +7686,12 @@ static void ixgbe_atr(struct ixgbe_ring *ring,
 			hdr.network = skb_inner_network_header(skb);
 	}
 
+	/* Make sure we have at least [minimum IPv4 header + TCP]
+	 * or [IPv6 header] bytes
+	 */
+	if (unlikely(skb_tail_pointer(skb) < hdr.network + 40))
+		return;
+
 	/* Currently only IPv4/IPv6 with TCP is supported */
 	switch (hdr.ipv4->version) {
 	case IPVERSION:
@@ -7685,6 +7711,10 @@ static void ixgbe_atr(struct ixgbe_ring *ring,
 	if (l4_proto != IPPROTO_TCP)
 		return;
 
+	if (unlikely(skb_tail_pointer(skb) < hdr.network +
+		     hlen + sizeof(struct tcphdr)))
+		return;
+
 	th = (struct tcphdr *)(hdr.network + hlen);
 
 	/* skip this packet since the socket is closing */
@@ -8350,12 +8380,38 @@ static int ixgbe_configure_clsu32_del_hnode(struct ixgbe_adapter *adapter,
 }
 
 #ifdef CONFIG_NET_CLS_ACT
+struct upper_walk_data {
+	struct ixgbe_adapter *adapter;
+	u64 action;
+	int ifindex;
+	u8 queue;
+};
+
+static int get_macvlan_queue(struct net_device *upper, void *_data)
+{
+	if (netif_is_macvlan(upper)) {
+		struct macvlan_dev *dfwd = netdev_priv(upper);
+		struct ixgbe_fwd_adapter *vadapter = dfwd->fwd_priv;
+		struct upper_walk_data *data = _data;
+		struct ixgbe_adapter *adapter = data->adapter;
+		int ifindex = data->ifindex;
+
+		if (vadapter && vadapter->netdev->ifindex == ifindex) {
+			data->queue = adapter->rx_ring[vadapter->rx_base_queue]->reg_idx;
+			data->action = data->queue;
+			return 1;
+		}
+	}
+
+	return 0;
+}
+
 static int handle_redirect_action(struct ixgbe_adapter *adapter, int ifindex,
 				  u8 *queue, u64 *action)
 {
 	unsigned int num_vfs = adapter->num_vfs, vf;
+	struct upper_walk_data data;
 	struct net_device *upper;
-	struct list_head *iter;
 
 	/* redirect to a SRIOV VF */
 	for (vf = 0; vf < num_vfs; ++vf) {
@@ -8373,17 +8429,16 @@ static int handle_redirect_action(struct ixgbe_adapter *adapter, int ifindex,
 	}
 
 	/* redirect to a offloaded macvlan netdev */
-	netdev_for_each_all_upper_dev_rcu(adapter->netdev, upper, iter) {
-		if (netif_is_macvlan(upper)) {
-			struct macvlan_dev *dfwd = netdev_priv(upper);
-			struct ixgbe_fwd_adapter *vadapter = dfwd->fwd_priv;
-
-			if (vadapter && vadapter->netdev->ifindex == ifindex) {
-				*queue = adapter->rx_ring[vadapter->rx_base_queue]->reg_idx;
-				*action = *queue;
-				return 0;
-			}
-		}
+	data.adapter = adapter;
+	data.ifindex = ifindex;
+	data.action = 0;
+	data.queue = 0;
+	if (netdev_walk_all_upper_dev_rcu(adapter->netdev,
+					  get_macvlan_queue, &data)) {
+		*action = data.action;
+		*queue = data.queue;
+
+		return 0;
 	}
 
 	return -EINVAL;
@@ -8410,7 +8465,7 @@ static int parse_tc_actions(struct ixgbe_adapter *adapter,
 		}
 
 		/* Redirect to a VF or a offloaded macvlan */
-		if (is_tcf_mirred_redirect(a)) {
+		if (is_tcf_mirred_egress_redirect(a)) {
 			int ifindex = tcf_mirred_ifindex(a);
 
 			err = handle_redirect_action(adapter, ifindex, queue,
@@ -9477,6 +9532,8 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	hw->mac.ops   = *ii->mac_ops;
 	hw->mac.type  = ii->mac;
 	hw->mvals     = ii->mvals;
+	if (ii->link_ops)
+		hw->link.ops  = *ii->link_ops;
 
 	/* EEPROM */
 	hw->eeprom.ops = *ii->eeprom_ops;
@@ -9500,10 +9557,8 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	hw->phy.mdio.mdio_read = ixgbe_mdio_read;
 	hw->phy.mdio.mdio_write = ixgbe_mdio_write;
 
-	ii->get_invariants(hw);
-
 	/* setup the private structure */
-	err = ixgbe_sw_init(adapter);
+	err = ixgbe_sw_init(adapter, ii);
 	if (err)
 		goto err_sw_init;
 
@@ -9612,6 +9667,10 @@ skip_sriov:
 	netdev->priv_flags |= IFF_UNICAST_FLT;
 	netdev->priv_flags |= IFF_SUPP_NOFCS;
 
+	/* MTU range: 68 - 9710 */
+	netdev->min_mtu = ETH_MIN_MTU;
+	netdev->max_mtu = IXGBE_MAX_JUMBO_FRAME_SIZE - (ETH_HLEN + ETH_FCS_LEN);
+
 #ifdef CONFIG_IXGBE_DCB
 	if (adapter->flags & IXGBE_FLAG_DCB_CAPABLE)
 		netdev->dcbnl_ops = &dcbnl_ops;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
index 021ab9b89c71..3b8362085f57 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c
@@ -109,8 +109,8 @@ static u8 ixgbe_ones_comp_byte_add(u8 add1, u8 add2)
  *
  *  Returns an error code on error.
  */
-static s32 ixgbe_read_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr,
-					       u16 reg, u16 *val, bool lock)
+s32 ixgbe_read_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr,
+					u16 reg, u16 *val, bool lock)
 {
 	u32 swfw_mask = hw->phy.phy_semaphore_mask;
 	int max_retry = 10;
@@ -178,36 +178,6 @@ fail:
 }
 
 /**
- *  ixgbe_read_i2c_combined_generic - Perform I2C read combined operation
- *  @hw: pointer to the hardware structure
- *  @addr: I2C bus address to read from
- *  @reg: I2C device register to read from
- *  @val: pointer to location to receive read value
- *
- *  Returns an error code on error.
- */
-s32 ixgbe_read_i2c_combined_generic(struct ixgbe_hw *hw, u8 addr,
-				    u16 reg, u16 *val)
-{
-	return ixgbe_read_i2c_combined_generic_int(hw, addr, reg, val, true);
-}
-
-/**
- *  ixgbe_read_i2c_combined_generic_unlocked - Unlocked I2C read combined
- *  @hw: pointer to the hardware structure
- *  @addr: I2C bus address to read from
- *  @reg: I2C device register to read from
- *  @val: pointer to location to receive read value
- *
- *  Returns an error code on error.
- */
-s32 ixgbe_read_i2c_combined_generic_unlocked(struct ixgbe_hw *hw, u8 addr,
-					     u16 reg, u16 *val)
-{
-	return ixgbe_read_i2c_combined_generic_int(hw, addr, reg, val, false);
-}
-
-/**
  *  ixgbe_write_i2c_combined_generic_int - Perform I2C write combined operation
  *  @hw: pointer to the hardware structure
  *  @addr: I2C bus address to write to
@@ -217,8 +187,8 @@ s32 ixgbe_read_i2c_combined_generic_unlocked(struct ixgbe_hw *hw, u8 addr,
  *
  *  Returns an error code on error.
  */
-static s32 ixgbe_write_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr,
-						u16 reg, u16 val, bool lock)
+s32 ixgbe_write_i2c_combined_generic_int(struct ixgbe_hw *hw, u8 addr,
+					 u16 reg, u16 val, bool lock)
 {
 	u32 swfw_mask = hw->phy.phy_semaphore_mask;
 	int max_retry = 1;
@@ -273,33 +243,39 @@ fail:
 }
 
 /**
- *  ixgbe_write_i2c_combined_generic - Perform I2C write combined operation
- *  @hw: pointer to the hardware structure
- *  @addr: I2C bus address to write to
- *  @reg: I2C device register to write to
- *  @val: value to write
+ *  ixgbe_probe_phy - Probe a single address for a PHY
+ *  @hw: pointer to hardware structure
+ *  @phy_addr: PHY address to probe
  *
- *  Returns an error code on error.
- */
-s32 ixgbe_write_i2c_combined_generic(struct ixgbe_hw *hw,
-				     u8 addr, u16 reg, u16 val)
+ *  Returns true if PHY found
+ **/
+static bool ixgbe_probe_phy(struct ixgbe_hw *hw, u16 phy_addr)
 {
-	return ixgbe_write_i2c_combined_generic_int(hw, addr, reg, val, true);
-}
+	u16 ext_ability = 0;
 
-/**
- *  ixgbe_write_i2c_combined_generic_unlocked - Unlocked I2C write combined
- *  @hw: pointer to the hardware structure
- *  @addr: I2C bus address to write to
- *  @reg: I2C device register to write to
- *  @val: value to write
- *
- *  Returns an error code on error.
- */
-s32 ixgbe_write_i2c_combined_generic_unlocked(struct ixgbe_hw *hw,
-					      u8 addr, u16 reg, u16 val)
-{
-	return ixgbe_write_i2c_combined_generic_int(hw, addr, reg, val, false);
+	hw->phy.mdio.prtad = phy_addr;
+	if (mdio45_probe(&hw->phy.mdio, phy_addr) != 0)
+		return false;
+
+	if (ixgbe_get_phy_id(hw))
+		return false;
+
+	hw->phy.type = ixgbe_get_phy_type_from_id(hw->phy.id);
+
+	if (hw->phy.type == ixgbe_phy_unknown) {
+		hw->phy.ops.read_reg(hw,
+				     MDIO_PMA_EXTABLE,
+				     MDIO_MMD_PMAPMD,
+				     &ext_ability);
+		if (ext_ability &
+		    (MDIO_PMA_EXTABLE_10GBT |
+		     MDIO_PMA_EXTABLE_1000BT))
+			hw->phy.type = ixgbe_phy_cu_unknown;
+		else
+			hw->phy.type = ixgbe_phy_generic;
+	}
+
+	return true;
 }
 
 /**
@@ -311,7 +287,7 @@ s32 ixgbe_write_i2c_combined_generic_unlocked(struct ixgbe_hw *hw,
 s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw)
 {
 	u32 phy_addr;
-	u16 ext_ability = 0;
+	u32 status = IXGBE_ERR_PHY_ADDR_INVALID;
 
 	if (!hw->phy.phy_semaphore_mask) {
 		if (hw->bus.lan_id)
@@ -320,37 +296,34 @@ s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw)
 			hw->phy.phy_semaphore_mask = IXGBE_GSSR_PHY0_SM;
 	}
 
-	if (hw->phy.type == ixgbe_phy_unknown) {
-		for (phy_addr = 0; phy_addr < IXGBE_MAX_PHY_ADDR; phy_addr++) {
-			hw->phy.mdio.prtad = phy_addr;
-			if (mdio45_probe(&hw->phy.mdio, phy_addr) == 0) {
-				ixgbe_get_phy_id(hw);
-				hw->phy.type =
-					ixgbe_get_phy_type_from_id(hw->phy.id);
-
-				if (hw->phy.type == ixgbe_phy_unknown) {
-					hw->phy.ops.read_reg(hw,
-							     MDIO_PMA_EXTABLE,
-							     MDIO_MMD_PMAPMD,
-							     &ext_ability);
-					if (ext_ability &
-					    (MDIO_PMA_EXTABLE_10GBT |
-					     MDIO_PMA_EXTABLE_1000BT))
-						hw->phy.type =
-							 ixgbe_phy_cu_unknown;
-					else
-						hw->phy.type =
-							 ixgbe_phy_generic;
-				}
+	if (hw->phy.type != ixgbe_phy_unknown)
+		return 0;
 
-				return 0;
-			}
+	if (hw->phy.nw_mng_if_sel) {
+		phy_addr = (hw->phy.nw_mng_if_sel &
+			    IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD) >>
+			   IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT;
+		if (ixgbe_probe_phy(hw, phy_addr))
+			return 0;
+		else
+			return IXGBE_ERR_PHY_ADDR_INVALID;
+	}
+
+	for (phy_addr = 0; phy_addr < IXGBE_MAX_PHY_ADDR; phy_addr++) {
+		if (ixgbe_probe_phy(hw, phy_addr)) {
+			status = 0;
+			break;
 		}
-		/* indicate no PHY found */
-		hw->phy.mdio.prtad = MDIO_PRTAD_NONE;
-		return IXGBE_ERR_PHY_ADDR_INVALID;
 	}
-	return 0;
+
+	/* Certain media types do not have a phy so an address will not
+	 * be found and the code will take this path.  Caller has to
+	 * decide if it is an error or not.
+	 */
+	if (status)
+		hw->phy.mdio.prtad = MDIO_PRTAD_NONE;
+
+	return status;
 }
 
 /**
@@ -416,7 +389,8 @@ static enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id)
 	case TN1010_PHY_ID:
 		phy_type = ixgbe_phy_tn;
 		break;
-	case X550_PHY_ID:
+	case X550_PHY_ID2:
+	case X550_PHY_ID3:
 	case X540_PHY_ID:
 		phy_type = ixgbe_phy_aq;
 		break;
@@ -427,6 +401,7 @@ static enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id)
 		phy_type = ixgbe_phy_nl;
 		break;
 	case X557_PHY_ID:
+	case X557_PHY_ID2:
 		phy_type = ixgbe_phy_x550em_ext_t;
 		break;
 	default:
@@ -477,8 +452,7 @@ s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw)
 	 */
 	for (i = 0; i < 30; i++) {
 		msleep(100);
-		hw->phy.ops.read_reg(hw, MDIO_CTRL1,
-				     MDIO_MMD_PHYXS, &ctrl);
+		hw->phy.ops.read_reg(hw, MDIO_CTRL1, MDIO_MMD_PHYXS, &ctrl);
 		if (!(ctrl & MDIO_CTRL1_RESET)) {
 			udelay(2);
 			break;
@@ -705,53 +679,52 @@ s32 ixgbe_setup_phy_link_generic(struct ixgbe_hw *hw)
 
 	ixgbe_get_copper_link_capabilities_generic(hw, &speed, &autoneg);
 
-	if (speed & IXGBE_LINK_SPEED_10GB_FULL) {
-		/* Set or unset auto-negotiation 10G advertisement */
-		hw->phy.ops.read_reg(hw, MDIO_AN_10GBT_CTRL,
-				     MDIO_MMD_AN,
-				     &autoneg_reg);
+	/* Set or unset auto-negotiation 10G advertisement */
+	hw->phy.ops.read_reg(hw, MDIO_AN_10GBT_CTRL, MDIO_MMD_AN, &autoneg_reg);
 
-		autoneg_reg &= ~MDIO_AN_10GBT_CTRL_ADV10G;
-		if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL)
-			autoneg_reg |= MDIO_AN_10GBT_CTRL_ADV10G;
-
-		hw->phy.ops.write_reg(hw, MDIO_AN_10GBT_CTRL,
-				      MDIO_MMD_AN,
-				      autoneg_reg);
-	}
+	autoneg_reg &= ~MDIO_AN_10GBT_CTRL_ADV10G;
+	if ((hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL) &&
+	    (speed & IXGBE_LINK_SPEED_10GB_FULL))
+		autoneg_reg |= MDIO_AN_10GBT_CTRL_ADV10G;
 
-	if (speed & IXGBE_LINK_SPEED_1GB_FULL) {
-		/* Set or unset auto-negotiation 1G advertisement */
-		hw->phy.ops.read_reg(hw,
-				     IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG,
-				     MDIO_MMD_AN,
-				     &autoneg_reg);
+	hw->phy.ops.write_reg(hw, MDIO_AN_10GBT_CTRL, MDIO_MMD_AN, autoneg_reg);
 
-		autoneg_reg &= ~IXGBE_MII_1GBASE_T_ADVERTISE;
-		if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL)
-			autoneg_reg |= IXGBE_MII_1GBASE_T_ADVERTISE;
+	hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG,
+			     MDIO_MMD_AN, &autoneg_reg);
 
-		hw->phy.ops.write_reg(hw,
-				      IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG,
-				      MDIO_MMD_AN,
-				      autoneg_reg);
+	if (hw->mac.type == ixgbe_mac_X550) {
+		/* Set or unset auto-negotiation 5G advertisement */
+		autoneg_reg &= ~IXGBE_MII_5GBASE_T_ADVERTISE;
+		if ((hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_5GB_FULL) &&
+		    (speed & IXGBE_LINK_SPEED_5GB_FULL))
+			autoneg_reg |= IXGBE_MII_5GBASE_T_ADVERTISE;
+
+		/* Set or unset auto-negotiation 2.5G advertisement */
+		autoneg_reg &= ~IXGBE_MII_2_5GBASE_T_ADVERTISE;
+		if ((hw->phy.autoneg_advertised &
+		     IXGBE_LINK_SPEED_2_5GB_FULL) &&
+		    (speed & IXGBE_LINK_SPEED_2_5GB_FULL))
+			autoneg_reg |= IXGBE_MII_2_5GBASE_T_ADVERTISE;
 	}
 
-	if (speed & IXGBE_LINK_SPEED_100_FULL) {
-		/* Set or unset auto-negotiation 100M advertisement */
-		hw->phy.ops.read_reg(hw, MDIO_AN_ADVERTISE,
-				     MDIO_MMD_AN,
-				     &autoneg_reg);
+	/* Set or unset auto-negotiation 1G advertisement */
+	autoneg_reg &= ~IXGBE_MII_1GBASE_T_ADVERTISE;
+	if ((hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL) &&
+	    (speed & IXGBE_LINK_SPEED_1GB_FULL))
+		autoneg_reg |= IXGBE_MII_1GBASE_T_ADVERTISE;
 
-		autoneg_reg &= ~(ADVERTISE_100FULL |
-				 ADVERTISE_100HALF);
-		if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_100_FULL)
-			autoneg_reg |= ADVERTISE_100FULL;
+	hw->phy.ops.write_reg(hw, IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG,
+			      MDIO_MMD_AN, autoneg_reg);
 
-		hw->phy.ops.write_reg(hw, MDIO_AN_ADVERTISE,
-				      MDIO_MMD_AN,
-				      autoneg_reg);
-	}
+	/* Set or unset auto-negotiation 100M advertisement */
+	hw->phy.ops.read_reg(hw, MDIO_AN_ADVERTISE, MDIO_MMD_AN, &autoneg_reg);
+
+	autoneg_reg &= ~(ADVERTISE_100FULL | ADVERTISE_100HALF);
+	if ((hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_100_FULL) &&
+	    (speed & IXGBE_LINK_SPEED_100_FULL))
+		autoneg_reg |= ADVERTISE_100FULL;
+
+	hw->phy.ops.write_reg(hw, MDIO_AN_ADVERTISE, MDIO_MMD_AN, autoneg_reg);
 
 	/* Blocked by MNG FW so don't reset PHY */
 	if (ixgbe_check_reset_blocked(hw))
@@ -830,6 +803,7 @@ static s32 ixgbe_get_copper_speeds_supported(struct ixgbe_hw *hw)
 		hw->phy.speeds_supported |= IXGBE_LINK_SPEED_5GB_FULL;
 		break;
 	case ixgbe_mac_X550EM_x:
+	case ixgbe_mac_x550em_a:
 		hw->phy.speeds_supported &= ~IXGBE_LINK_SPEED_100_FULL;
 		break;
 	default:
@@ -2396,9 +2370,7 @@ s32 ixgbe_set_copper_phy_power(struct ixgbe_hw *hw, bool on)
 	if (!on && ixgbe_mng_present(hw))
 		return 0;
 
-	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_VENDOR_SPECIFIC_1_CONTROL,
-				      IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
-				      &reg);
+	status = hw->phy.ops.read_reg(hw, MDIO_CTRL1, MDIO_MMD_VEND1, &reg);
 	if (status)
 		return status;
 
@@ -2410,8 +2382,6 @@ s32 ixgbe_set_copper_phy_power(struct ixgbe_hw *hw, bool on)
 		reg |= IXGBE_MDIO_PHY_SET_LOW_POWER_MODE;
 	}
 
-	status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_VENDOR_SPECIFIC_1_CONTROL,
-				       IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
-				       reg);
+	status = hw->phy.ops.write_reg(hw, MDIO_CTRL1, MDIO_MMD_VEND1, reg);
 	return status;
 }
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h
index cc735ec3e045..ecf05f838fc5 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.h
@@ -195,12 +195,8 @@ s32 ixgbe_read_i2c_sff8472_generic(struct ixgbe_hw *hw, u8 byte_offset,
 				   u8 *sff8472_data);
 s32 ixgbe_write_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset,
 				   u8 eeprom_data);
-s32 ixgbe_read_i2c_combined_generic(struct ixgbe_hw *hw, u8 addr,
-				    u16 reg, u16 *val);
-s32 ixgbe_read_i2c_combined_generic_unlocked(struct ixgbe_hw *hw, u8 addr,
-					     u16 reg, u16 *val);
-s32 ixgbe_write_i2c_combined_generic(struct ixgbe_hw *hw, u8 addr,
-				     u16 reg, u16 val);
-s32 ixgbe_write_i2c_combined_generic_unlocked(struct ixgbe_hw *hw, u8 addr,
-					      u16 reg, u16 val);
+s32 ixgbe_read_i2c_combined_generic_int(struct ixgbe_hw *, u8 addr, u16 reg,
+					u16 *val, bool lock);
+s32 ixgbe_write_i2c_combined_generic_int(struct ixgbe_hw *, u8 addr, u16 reg,
+					 u16 val, bool lock);
 #endif /* _IXGBE_PHY_H_ */
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
index 31d82e3abac8..cf21273db201 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h
@@ -874,19 +874,13 @@ struct ixgbe_thermal_sensor_data {
 #define IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_1GB	0x4 /* 1Gb/s */
 #define IXGBE_MDIO_AUTO_NEG_VENDOR_STATUS_10GB	0x6 /* 10Gb/s */
 
-#define IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG	0x20	/* 10G Control Reg */
 #define IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG 0xC400	/* 1G Provisioning 1 */
 #define IXGBE_MII_AUTONEG_XNP_TX_REG		0x17	/* 1G XNP Transmit */
-#define IXGBE_MII_AUTONEG_ADVERTISE_REG		0x10	/* 100M Advertisement */
-#define IXGBE_MII_10GBASE_T_ADVERTISE		0x1000	/* full duplex, bit:12*/
 #define IXGBE_MII_1GBASE_T_ADVERTISE_XNP_TX	0x4000	/* full duplex, bit:14*/
 #define IXGBE_MII_1GBASE_T_ADVERTISE		0x8000	/* full duplex, bit:15*/
 #define IXGBE_MII_2_5GBASE_T_ADVERTISE		0x0400
 #define IXGBE_MII_5GBASE_T_ADVERTISE		0x0800
-#define IXGBE_MII_100BASE_T_ADVERTISE		0x0100	/* full duplex, bit:8 */
-#define IXGBE_MII_100BASE_T_ADVERTISE_HALF	0x0080	/* half duplex, bit:7 */
 #define IXGBE_MII_RESTART			0x200
-#define IXGBE_MII_AUTONEG_COMPLETE		0x20
 #define IXGBE_MII_AUTONEG_LINK_UP		0x04
 #define IXGBE_MII_AUTONEG_REG			0x0
 
@@ -1320,30 +1314,20 @@ struct ixgbe_thermal_sensor_data {
 /* MDIO definitions */
 
 #define IXGBE_MDIO_ZERO_DEV_TYPE		0x0
-#define IXGBE_MDIO_PMA_PMD_DEV_TYPE		0x1
 #define IXGBE_MDIO_PCS_DEV_TYPE		0x3
-#define IXGBE_MDIO_PHY_XS_DEV_TYPE		0x4
-#define IXGBE_MDIO_AUTO_NEG_DEV_TYPE		0x7
-#define IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE	0x1E   /* Device 30 */
 #define IXGBE_TWINAX_DEV			1
 
 #define IXGBE_MDIO_COMMAND_TIMEOUT     100 /* PHY Timeout for 1 GB mode */
 
-#define IXGBE_MDIO_VENDOR_SPECIFIC_1_CONTROL      0x0    /* VS1 Control Reg */
-#define IXGBE_MDIO_VENDOR_SPECIFIC_1_STATUS       0x1    /* VS1 Status Reg */
 #define IXGBE_MDIO_VENDOR_SPECIFIC_1_LINK_STATUS  0x0008 /* 1 = Link Up */
 #define IXGBE_MDIO_VENDOR_SPECIFIC_1_SPEED_STATUS 0x0010 /* 0 - 10G, 1 - 1G */
 #define IXGBE_MDIO_VENDOR_SPECIFIC_1_10G_SPEED    0x0018
 #define IXGBE_MDIO_VENDOR_SPECIFIC_1_1G_SPEED     0x0010
 
-#define IXGBE_MDIO_AUTO_NEG_CONTROL	0x0 /* AUTO_NEG Control Reg */
-#define IXGBE_MDIO_AUTO_NEG_STATUS	0x1 /* AUTO_NEG Status Reg */
 #define IXGBE_MDIO_AUTO_NEG_VENDOR_STAT	0xC800 /* AUTO_NEG Vendor Status Reg */
 #define IXGBE_MDIO_AUTO_NEG_VENDOR_TX_ALARM  0xCC00 /* AUTO_NEG Vendor TX Reg */
 #define IXGBE_MDIO_AUTO_NEG_VENDOR_TX_ALARM2 0xCC01 /* AUTO_NEG Vendor Tx Reg */
 #define IXGBE_MDIO_AUTO_NEG_VEN_LSC	0x1 /* AUTO_NEG Vendor Tx LSC */
-#define IXGBE_MDIO_AUTO_NEG_ADVT	0x10 /* AUTO_NEG Advt Reg */
-#define IXGBE_MDIO_AUTO_NEG_LP		0x13 /* AUTO_NEG LP Status Reg */
 #define IXGBE_MDIO_AUTO_NEG_EEE_ADVT	0x3C /* AUTO_NEG EEE Advt Reg */
 
 #define IXGBE_MDIO_PHY_SET_LOW_POWER_MODE	 0x0800 /* Set low power mode */
@@ -1393,8 +1377,10 @@ struct ixgbe_thermal_sensor_data {
 #define TN1010_PHY_ID    0x00A19410
 #define TNX_FW_REV       0xB
 #define X540_PHY_ID      0x01540200
-#define X550_PHY_ID      0x01540220
+#define X550_PHY_ID2	0x01540223
+#define X550_PHY_ID3	0x01540221
 #define X557_PHY_ID      0x01540240
+#define X557_PHY_ID2	0x01540250
 #define QT2022_PHY_ID    0x0043A400
 #define ATH_PHY_ID       0x03429050
 #define AQ_FW_REV        0x20
@@ -3352,6 +3338,7 @@ struct ixgbe_mac_operations {
 	s32 (*led_off)(struct ixgbe_hw *, u32);
 	s32 (*blink_led_start)(struct ixgbe_hw *, u32);
 	s32 (*blink_led_stop)(struct ixgbe_hw *, u32);
+	s32 (*init_led_link_act)(struct ixgbe_hw *);
 
 	/* RAR, Multicast, VLAN */
 	s32 (*set_rar)(struct ixgbe_hw *, u32, u8 *, u32, u32);
@@ -3372,6 +3359,7 @@ struct ixgbe_mac_operations {
 	/* Flow Control */
 	s32 (*fc_enable)(struct ixgbe_hw *);
 	s32 (*setup_fc)(struct ixgbe_hw *);
+	void (*fc_autoneg)(struct ixgbe_hw *);
 
 	/* Manageability interface */
 	s32 (*set_fw_drv_ver)(struct ixgbe_hw *, u8, u8, u8, u8);
@@ -3410,16 +3398,28 @@ struct ixgbe_phy_operations {
 	s32 (*read_i2c_sff8472)(struct ixgbe_hw *, u8 , u8 *);
 	s32 (*read_i2c_eeprom)(struct ixgbe_hw *, u8 , u8 *);
 	s32 (*write_i2c_eeprom)(struct ixgbe_hw *, u8, u8);
-	s32 (*read_i2c_combined)(struct ixgbe_hw *, u8 addr, u16 reg, u16 *val);
-	s32 (*write_i2c_combined)(struct ixgbe_hw *, u8 addr, u16 reg, u16 val);
 	s32 (*check_overtemp)(struct ixgbe_hw *);
 	s32 (*set_phy_power)(struct ixgbe_hw *, bool on);
 	s32 (*enter_lplu)(struct ixgbe_hw *);
 	s32 (*handle_lasi)(struct ixgbe_hw *hw);
-	s32 (*read_i2c_combined_unlocked)(struct ixgbe_hw *, u8 addr, u16 reg,
-					  u16 *value);
-	s32 (*write_i2c_combined_unlocked)(struct ixgbe_hw *, u8 addr, u16 reg,
-					   u16 value);
+	s32 (*read_i2c_byte_unlocked)(struct ixgbe_hw *, u8 offset, u8 addr,
+				      u8 *value);
+	s32 (*write_i2c_byte_unlocked)(struct ixgbe_hw *, u8 offset, u8 addr,
+				       u8 value);
+};
+
+struct ixgbe_link_operations {
+	s32 (*read_link)(struct ixgbe_hw *, u8 addr, u16 reg, u16 *val);
+	s32 (*read_link_unlocked)(struct ixgbe_hw *, u8 addr, u16 reg,
+				  u16 *val);
+	s32 (*write_link)(struct ixgbe_hw *, u8 addr, u16 reg, u16 val);
+	s32 (*write_link_unlocked)(struct ixgbe_hw *, u8 addr, u16 reg,
+				   u16 val);
+};
+
+struct ixgbe_link_info {
+	struct ixgbe_link_operations ops;
+	u8 addr;
 };
 
 struct ixgbe_eeprom_info {
@@ -3462,6 +3462,7 @@ struct ixgbe_mac_info {
 	u8				san_mac_rar_index;
 	struct ixgbe_thermal_sensor_data  thermal_sensor_data;
 	bool				set_lben;
+	u8				led_link_act;
 };
 
 struct ixgbe_phy_info {
@@ -3523,6 +3524,7 @@ struct ixgbe_hw {
 	struct ixgbe_addr_filter_info	addr_ctrl;
 	struct ixgbe_fc_info		fc;
 	struct ixgbe_phy_info		phy;
+	struct ixgbe_link_info		link;
 	struct ixgbe_eeprom_info	eeprom;
 	struct ixgbe_bus_info		bus;
 	struct ixgbe_mbx_info		mbx;
@@ -3546,6 +3548,7 @@ struct ixgbe_info {
 	const struct ixgbe_eeprom_operations	*eeprom_ops;
 	const struct ixgbe_phy_operations	*phy_ops;
 	const struct ixgbe_mbx_operations	*mbx_ops;
+	const struct ixgbe_link_operations	*link_ops;
 	const u32			*mvals;
 };
 
@@ -3593,17 +3596,35 @@ struct ixgbe_info {
 #define IXGBE_FUSES0_REV_MASK		(3u << 6)
 
 #define IXGBE_KRM_PORT_CAR_GEN_CTRL(P)	((P) ? 0x8010 : 0x4010)
+#define IXGBE_KRM_LINK_S1(P)		((P) ? 0x8200 : 0x4200)
 #define IXGBE_KRM_LINK_CTRL_1(P)	((P) ? 0x820C : 0x420C)
 #define IXGBE_KRM_AN_CNTL_1(P)		((P) ? 0x822C : 0x422C)
 #define IXGBE_KRM_AN_CNTL_8(P)		((P) ? 0x8248 : 0x4248)
 #define IXGBE_KRM_SGMII_CTRL(P)		((P) ? 0x82A0 : 0x42A0)
+#define IXGBE_KRM_LP_BASE_PAGE_HIGH(P)	((P) ? 0x836C : 0x436C)
 #define IXGBE_KRM_DSP_TXFFE_STATE_4(P)	((P) ? 0x8634 : 0x4634)
 #define IXGBE_KRM_DSP_TXFFE_STATE_5(P)	((P) ? 0x8638 : 0x4638)
 #define IXGBE_KRM_RX_TRN_LINKUP_CTRL(P)	((P) ? 0x8B00 : 0x4B00)
 #define IXGBE_KRM_PMD_DFX_BURNIN(P)	((P) ? 0x8E00 : 0x4E00)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20(P)	((P) ? 0x9054 : 0x5054)
 #define IXGBE_KRM_TX_COEFF_CTRL_1(P)	((P) ? 0x9520 : 0x5520)
 #define IXGBE_KRM_RX_ANA_CTL(P)		((P) ? 0x9A00 : 0x5A00)
 
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_DA		~(0x3 << 20)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_SR		BIT(20)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_LR		(0x2 << 20)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN		BIT(25)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN		BIT(26)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN		BIT(27)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_10M		~(0x7 << 28)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_100M		BIT(28)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_1G		(0x2 << 28)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_10G		(0x3 << 28)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_AN		(0x4 << 28)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_2_5G		(0x7 << 28)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK		(0x7 << 28)
+#define IXGBE_KRM_PMD_FLX_MASK_ST20_FW_AN_RESTART	BIT(31)
+
 #define IXGBE_KRM_PORT_CAR_GEN_CTRL_NELB_32B		BIT(9)
 #define IXGBE_KRM_PORT_CAR_GEN_CTRL_NELB_KRPCS		BIT(11)
 
@@ -3618,6 +3639,7 @@ struct ixgbe_info {
 #define IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_KR		BIT(18)
 #define IXGBE_KRM_LINK_CTRL_1_TETH_EEE_CAP_KX		BIT(24)
 #define IXGBE_KRM_LINK_CTRL_1_TETH_EEE_CAP_KR		BIT(26)
+#define IXGBE_KRM_LINK_S1_MAC_AN_COMPLETE		BIT(28)
 #define IXGBE_KRM_LINK_CTRL_1_TETH_AN_ENABLE		BIT(29)
 #define IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART		BIT(31)
 
@@ -3627,6 +3649,8 @@ struct ixgbe_info {
 #define IXGBE_KRM_AN_CNTL_8_LINEAR			BIT(0)
 #define IXGBE_KRM_AN_CNTL_8_LIMITING			BIT(1)
 
+#define IXGBE_KRM_LP_BASE_PAGE_HIGH_SYM_PAUSE		BIT(10)
+#define IXGBE_KRM_LP_BASE_PAGE_HIGH_ASM_PAUSE		BIT(11)
 #define IXGBE_KRM_SGMII_CTRL_MAC_TAR_FORCE_100_D	BIT(12)
 #define IXGBE_KRM_SGMII_CTRL_MAC_TAR_FORCE_10_D		BIT(19)
 
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
index f2b1d48a16c3..e2ff823ee202 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c
@@ -851,6 +851,7 @@ static const struct ixgbe_mac_operations mac_ops_X540 = {
 	.get_link_capabilities  = &ixgbe_get_copper_link_capabilities_generic,
 	.led_on                 = &ixgbe_led_on_generic,
 	.led_off                = &ixgbe_led_off_generic,
+	.init_led_link_act	= ixgbe_init_led_link_act_generic,
 	.blink_led_start        = &ixgbe_blink_led_start_X540,
 	.blink_led_stop         = &ixgbe_blink_led_stop_X540,
 	.set_rar                = &ixgbe_set_rar_generic,
@@ -866,6 +867,7 @@ static const struct ixgbe_mac_operations mac_ops_X540 = {
 	.set_vfta               = &ixgbe_set_vfta_generic,
 	.fc_enable              = &ixgbe_fc_enable_generic,
 	.setup_fc		= ixgbe_setup_fc_generic,
+	.fc_autoneg		= ixgbe_fc_autoneg,
 	.set_fw_drv_ver         = &ixgbe_set_fw_drv_ver_generic,
 	.init_uta_tables        = &ixgbe_init_uta_tables_generic,
 	.setup_sfp              = NULL,
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
index 7e6b9267ca9d..11fb433eb924 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c
@@ -28,11 +28,31 @@
 
 static s32 ixgbe_setup_kr_speed_x550em(struct ixgbe_hw *, ixgbe_link_speed);
 static s32 ixgbe_setup_fc_x550em(struct ixgbe_hw *);
+static void ixgbe_fc_autoneg_fiber_x550em_a(struct ixgbe_hw *);
+static void ixgbe_fc_autoneg_backplane_x550em_a(struct ixgbe_hw *);
+static s32 ixgbe_setup_fc_backplane_x550em_a(struct ixgbe_hw *);
 
 static s32 ixgbe_get_invariants_X550_x(struct ixgbe_hw *hw)
 {
 	struct ixgbe_mac_info *mac = &hw->mac;
 	struct ixgbe_phy_info *phy = &hw->phy;
+	struct ixgbe_link_info *link = &hw->link;
+
+	/* Start with X540 invariants, since so simular */
+	ixgbe_get_invariants_X540(hw);
+
+	if (mac->ops.get_media_type(hw) != ixgbe_media_type_copper)
+		phy->ops.set_phy_power = NULL;
+
+	link->addr = IXGBE_CS4227;
+
+	return 0;
+}
+
+static s32 ixgbe_get_invariants_X550_a(struct ixgbe_hw *hw)
+{
+	struct ixgbe_mac_info *mac = &hw->mac;
+	struct ixgbe_phy_info *phy = &hw->phy;
 
 	/* Start with X540 invariants, since so simular */
 	ixgbe_get_invariants_X540(hw);
@@ -69,8 +89,7 @@ static void ixgbe_setup_mux_ctl(struct ixgbe_hw *hw)
  */
 static s32 ixgbe_read_cs4227(struct ixgbe_hw *hw, u16 reg, u16 *value)
 {
-	return hw->phy.ops.read_i2c_combined_unlocked(hw, IXGBE_CS4227, reg,
-						      value);
+	return hw->link.ops.read_link_unlocked(hw, hw->link.addr, reg, value);
 }
 
 /**
@@ -83,8 +102,7 @@ static s32 ixgbe_read_cs4227(struct ixgbe_hw *hw, u16 reg, u16 *value)
  */
 static s32 ixgbe_write_cs4227(struct ixgbe_hw *hw, u16 reg, u16 value)
 {
-	return hw->phy.ops.write_i2c_combined_unlocked(hw, IXGBE_CS4227, reg,
-						       value);
+	return hw->link.ops.write_link_unlocked(hw, hw->link.addr, reg, value);
 }
 
 /**
@@ -322,6 +340,68 @@ static s32 ixgbe_write_phy_reg_x550em(struct ixgbe_hw *hw, u32 reg_addr,
 	return IXGBE_NOT_IMPLEMENTED;
 }
 
+/**
+ * ixgbe_read_i2c_combined_generic - Perform I2C read combined operation
+ * @hw: pointer to the hardware structure
+ * @addr: I2C bus address to read from
+ * @reg: I2C device register to read from
+ * @val: pointer to location to receive read value
+ *
+ * Returns an error code on error.
+ **/
+static s32 ixgbe_read_i2c_combined_generic(struct ixgbe_hw *hw, u8 addr,
+					   u16 reg, u16 *val)
+{
+	return ixgbe_read_i2c_combined_generic_int(hw, addr, reg, val, true);
+}
+
+/**
+ * ixgbe_read_i2c_combined_generic_unlocked - Do I2C read combined operation
+ * @hw: pointer to the hardware structure
+ * @addr: I2C bus address to read from
+ * @reg: I2C device register to read from
+ * @val: pointer to location to receive read value
+ *
+ * Returns an error code on error.
+ **/
+static s32
+ixgbe_read_i2c_combined_generic_unlocked(struct ixgbe_hw *hw, u8 addr,
+					 u16 reg, u16 *val)
+{
+	return ixgbe_read_i2c_combined_generic_int(hw, addr, reg, val, false);
+}
+
+/**
+ * ixgbe_write_i2c_combined_generic - Perform I2C write combined operation
+ * @hw: pointer to the hardware structure
+ * @addr: I2C bus address to write to
+ * @reg: I2C device register to write to
+ * @val: value to write
+ *
+ * Returns an error code on error.
+ **/
+static s32 ixgbe_write_i2c_combined_generic(struct ixgbe_hw *hw,
+					    u8 addr, u16 reg, u16 val)
+{
+	return ixgbe_write_i2c_combined_generic_int(hw, addr, reg, val, true);
+}
+
+/**
+ * ixgbe_write_i2c_combined_generic_unlocked - Do I2C write combined operation
+ * @hw: pointer to the hardware structure
+ * @addr: I2C bus address to write to
+ * @reg: I2C device register to write to
+ * @val: value to write
+ *
+ * Returns an error code on error.
+ **/
+static s32
+ixgbe_write_i2c_combined_generic_unlocked(struct ixgbe_hw *hw,
+					  u8 addr, u16 reg, u16 val)
+{
+	return ixgbe_write_i2c_combined_generic_int(hw, addr, reg, val, false);
+}
+
 /** ixgbe_init_eeprom_params_X550 - Initialize EEPROM params
  *  @hw: pointer to hardware structure
  *
@@ -1128,47 +1208,17 @@ out:
 	return ret;
 }
 
-/** ixgbe_setup_ixfi_x550em - Configure the KR PHY for iXFI mode.
+/**
+ *  ixgbe_setup_ixfi_x550em_x - MAC specific iXFI configuration
  *  @hw: pointer to hardware structure
- *  @speed: the link speed to force
  *
- *  Configures the integrated KR PHY to use iXFI mode. Used to connect an
- *  internal and external PHY at a specific speed, without autonegotiation.
+ *  iXfI configuration needed for ixgbe_mac_X550EM_x devices.
  **/
-static s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed)
+static s32 ixgbe_setup_ixfi_x550em_x(struct ixgbe_hw *hw)
 {
 	s32 status;
 	u32 reg_val;
 
-	/* Disable AN and force speed to 10G Serial. */
-	status = ixgbe_read_iosf_sb_reg_x550(hw,
-					IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
-					IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
-	if (status)
-		return status;
-
-	reg_val &= ~IXGBE_KRM_LINK_CTRL_1_TETH_AN_ENABLE;
-	reg_val &= ~IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_MASK;
-
-	/* Select forced link speed for internal PHY. */
-	switch (*speed) {
-	case IXGBE_LINK_SPEED_10GB_FULL:
-		reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_10G;
-		break;
-	case IXGBE_LINK_SPEED_1GB_FULL:
-		reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_1G;
-		break;
-	default:
-		/* Other link speeds are not supported by internal KR PHY. */
-		return IXGBE_ERR_LINK_SETUP;
-	}
-
-	status = ixgbe_write_iosf_sb_reg_x550(hw,
-				IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
-				IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
-	if (status)
-		return status;
-
 	/* Disable training protocol FSM. */
 	status = ixgbe_read_iosf_sb_reg_x550(hw,
 				IXGBE_KRM_RX_TRN_LINKUP_CTRL(hw->bus.lan_id),
@@ -1228,20 +1278,106 @@ static s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed)
 	status = ixgbe_write_iosf_sb_reg_x550(hw,
 				IXGBE_KRM_TX_COEFF_CTRL_1(hw->bus.lan_id),
 				IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
-	if (status)
+	return status;
+}
+
+/**
+ *  ixgbe_restart_an_internal_phy_x550em - restart autonegotiation for the
+ *  internal PHY
+ *  @hw: pointer to hardware structure
+ **/
+static s32 ixgbe_restart_an_internal_phy_x550em(struct ixgbe_hw *hw)
+{
+	s32 status;
+	u32 link_ctrl;
+
+	/* Restart auto-negotiation. */
+	status = hw->mac.ops.read_iosf_sb_reg(hw,
+				IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, &link_ctrl);
+
+	if (status) {
+		hw_dbg(hw, "Auto-negotiation did not complete\n");
 		return status;
+	}
 
-	/* Toggle port SW reset by AN reset. */
-	status = ixgbe_read_iosf_sb_reg_x550(hw,
+	link_ctrl |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART;
+	status = hw->mac.ops.write_iosf_sb_reg(hw,
 				IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
-				IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+				IXGBE_SB_IOSF_TARGET_KR_PHY, link_ctrl);
+
+	if (hw->mac.type == ixgbe_mac_x550em_a) {
+		u32 flx_mask_st20;
+
+		/* Indicate to FW that AN restart has been asserted */
+		status = hw->mac.ops.read_iosf_sb_reg(hw,
+				IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, &flx_mask_st20);
+
+		if (status) {
+			hw_dbg(hw, "Auto-negotiation did not complete\n");
+			return status;
+		}
+
+		flx_mask_st20 |= IXGBE_KRM_PMD_FLX_MASK_ST20_FW_AN_RESTART;
+		status = hw->mac.ops.write_iosf_sb_reg(hw,
+				IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, flx_mask_st20);
+	}
+
+	return status;
+}
+
+/** ixgbe_setup_ixfi_x550em - Configure the KR PHY for iXFI mode.
+ *  @hw: pointer to hardware structure
+ *  @speed: the link speed to force
+ *
+ *  Configures the integrated KR PHY to use iXFI mode. Used to connect an
+ *  internal and external PHY at a specific speed, without autonegotiation.
+ **/
+static s32 ixgbe_setup_ixfi_x550em(struct ixgbe_hw *hw, ixgbe_link_speed *speed)
+{
+	s32 status;
+	u32 reg_val;
+
+	/* Disable AN and force speed to 10G Serial. */
+	status = ixgbe_read_iosf_sb_reg_x550(hw,
+					IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
+					IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
 	if (status)
 		return status;
 
-	reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART;
+	reg_val &= ~IXGBE_KRM_LINK_CTRL_1_TETH_AN_ENABLE;
+	reg_val &= ~IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_MASK;
+
+	/* Select forced link speed for internal PHY. */
+	switch (*speed) {
+	case IXGBE_LINK_SPEED_10GB_FULL:
+		reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_10G;
+		break;
+	case IXGBE_LINK_SPEED_1GB_FULL:
+		reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_FORCE_SPEED_1G;
+		break;
+	default:
+		/* Other link speeds are not supported by internal KR PHY. */
+		return IXGBE_ERR_LINK_SETUP;
+	}
+
 	status = ixgbe_write_iosf_sb_reg_x550(hw,
 				IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
 				IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+	if (status)
+		return status;
+
+	/* Additional configuration needed for x550em_x */
+	if (hw->mac.type == ixgbe_mac_X550EM_x) {
+		status = ixgbe_setup_ixfi_x550em_x(hw);
+		if (status)
+			return status;
+	}
+
+	/* Toggle port SW reset by AN reset. */
+	status = ixgbe_restart_an_internal_phy_x550em(hw);
 
 	return status;
 }
@@ -1292,7 +1428,7 @@ ixgbe_setup_mac_link_sfp_x550em(struct ixgbe_hw *hw,
 				__always_unused bool autoneg_wait_to_complete)
 {
 	s32 status;
-	u16 slice, value;
+	u16 reg_slice, reg_val;
 	bool setup_linear = false;
 
 	/* Check if SFP module is supported and linear */
@@ -1308,71 +1444,68 @@ ixgbe_setup_mac_link_sfp_x550em(struct ixgbe_hw *hw,
 	if (status)
 		return status;
 
-	if (!(hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_INT_PHY_MODE)) {
-		/* Configure CS4227 LINE side to 10G SR. */
-		slice = IXGBE_CS4227_LINE_SPARE22_MSB + (hw->bus.lan_id << 12);
-		value = IXGBE_CS4227_SPEED_10G;
-		status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227,
-							  slice, value);
-		if (status)
-			goto i2c_err;
+	/* Configure internal PHY for KR/KX. */
+	ixgbe_setup_kr_speed_x550em(hw, speed);
 
-		slice = IXGBE_CS4227_LINE_SPARE24_LSB + (hw->bus.lan_id << 12);
-		value = (IXGBE_CS4227_EDC_MODE_SR << 1) | 1;
-		status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227,
-							  slice, value);
-		if (status)
-			goto i2c_err;
-
-		/* Configure CS4227 for HOST connection rate then type. */
-		slice = IXGBE_CS4227_HOST_SPARE22_MSB + (hw->bus.lan_id << 12);
-		value = speed & IXGBE_LINK_SPEED_10GB_FULL ?
-			IXGBE_CS4227_SPEED_10G : IXGBE_CS4227_SPEED_1G;
-		status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227,
-							  slice, value);
-		if (status)
-			goto i2c_err;
+	/* Configure CS4227 LINE side to proper mode. */
+	reg_slice = IXGBE_CS4227_LINE_SPARE24_LSB + (hw->bus.lan_id << 12);
+	if (setup_linear)
+		reg_val = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 0x1;
+	else
+		reg_val = (IXGBE_CS4227_EDC_MODE_SR << 1) | 0x1;
 
-		slice = IXGBE_CS4227_HOST_SPARE24_LSB + (hw->bus.lan_id << 12);
-		if (setup_linear)
-			value = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 1;
-		else
-			value = (IXGBE_CS4227_EDC_MODE_SR << 1) | 1;
-		status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227,
-							  slice, value);
-		if (status)
-			goto i2c_err;
+	status = hw->link.ops.write_link(hw, hw->link.addr, reg_slice,
+					 reg_val);
 
-		/* Setup XFI internal link. */
-		status = ixgbe_setup_ixfi_x550em(hw, &speed);
-		if (status) {
-			hw_dbg(hw, "setup_ixfi failed with %d\n", status);
-			return status;
-		}
-	} else {
-		/* Configure internal PHY for KR/KX. */
-		status = ixgbe_setup_kr_speed_x550em(hw, speed);
-		if (status) {
-			hw_dbg(hw, "setup_kr_speed failed with %d\n", status);
-			return status;
-		}
+	return status;
+}
 
-		/* Configure CS4227 LINE side to proper mode. */
-		slice = IXGBE_CS4227_LINE_SPARE24_LSB + (hw->bus.lan_id << 12);
-		if (setup_linear)
-			value = (IXGBE_CS4227_EDC_MODE_CX1 << 1) | 1;
-		else
-			value = (IXGBE_CS4227_EDC_MODE_SR << 1) | 1;
-		status = ixgbe_write_i2c_combined_generic(hw, IXGBE_CS4227,
-							  slice, value);
-		if (status)
-			goto i2c_err;
+/**
+ * ixgbe_setup_sfi_x550a - Configure the internal PHY for native SFI mode
+ * @hw: pointer to hardware structure
+ * @speed: the link speed to force
+ *
+ * Configures the integrated PHY for native SFI mode. Used to connect the
+ * internal PHY directly to an SFP cage, without autonegotiation.
+ **/
+static s32 ixgbe_setup_sfi_x550a(struct ixgbe_hw *hw, ixgbe_link_speed *speed)
+{
+	struct ixgbe_mac_info *mac = &hw->mac;
+	s32 status;
+	u32 reg_val;
+
+	/* Disable all AN and force speed to 10G Serial. */
+	status = mac->ops.read_iosf_sb_reg(hw,
+				IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+	if (status)
+		return status;
+
+	reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN;
+	reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN;
+	reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN;
+	reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK;
+
+	/* Select forced link speed for internal PHY. */
+	switch (*speed) {
+	case IXGBE_LINK_SPEED_10GB_FULL:
+		reg_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_10G;
+		break;
+	case IXGBE_LINK_SPEED_1GB_FULL:
+		reg_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_1G;
+		break;
+	default:
+		/* Other link speeds are not supported by internal PHY. */
+		return IXGBE_ERR_LINK_SETUP;
 	}
 
-	return 0;
+	status = mac->ops.write_iosf_sb_reg(hw,
+				IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+
+	/* Toggle port SW reset by AN reset. */
+	status = ixgbe_restart_an_internal_phy_x550em(hw);
 
-i2c_err:
-	hw_dbg(hw, "combined i2c access failed with %d\n", status);
 	return status;
 }
 
@@ -1388,45 +1521,39 @@ ixgbe_setup_mac_link_sfp_n(struct ixgbe_hw *hw, ixgbe_link_speed speed,
 {
 	bool setup_linear = false;
 	u32 reg_phy_int;
-	s32 rc;
+	s32 ret_val;
 
 	/* Check if SFP module is supported and linear */
-	rc = ixgbe_supported_sfp_modules_X550em(hw, &setup_linear);
+	ret_val = ixgbe_supported_sfp_modules_X550em(hw, &setup_linear);
 
 	/* If no SFP module present, then return success. Return success since
 	 * SFP not present error is not excepted in the setup MAC link flow.
 	 */
-	if (rc == IXGBE_ERR_SFP_NOT_PRESENT)
+	if (ret_val == IXGBE_ERR_SFP_NOT_PRESENT)
 		return 0;
 
-	if (!rc)
-		return rc;
+	if (!ret_val)
+		return ret_val;
 
-	/* Configure internal PHY for native SFI */
-	rc = hw->mac.ops.read_iosf_sb_reg(hw,
-					  IXGBE_KRM_AN_CNTL_8(hw->bus.lan_id),
-					  IXGBE_SB_IOSF_TARGET_KR_PHY,
-					  &reg_phy_int);
-	if (rc)
-		return rc;
+	/* Configure internal PHY for native SFI based on module type */
+	ret_val = hw->mac.ops.read_iosf_sb_reg(hw,
+				IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_phy_int);
+	if (!ret_val)
+		return ret_val;
 
-	if (setup_linear) {
-		reg_phy_int &= ~IXGBE_KRM_AN_CNTL_8_LIMITING;
-		reg_phy_int |= IXGBE_KRM_AN_CNTL_8_LINEAR;
-	} else {
-		reg_phy_int |= IXGBE_KRM_AN_CNTL_8_LIMITING;
-		reg_phy_int &= ~IXGBE_KRM_AN_CNTL_8_LINEAR;
-	}
+	reg_phy_int &= IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_DA;
+	if (!setup_linear)
+		reg_phy_int |= IXGBE_KRM_PMD_FLX_MASK_ST20_SFI_10G_SR;
 
-	rc = hw->mac.ops.write_iosf_sb_reg(hw,
-					   IXGBE_KRM_AN_CNTL_8(hw->bus.lan_id),
-					   IXGBE_SB_IOSF_TARGET_KR_PHY,
-					   reg_phy_int);
-	if (rc)
-		return rc;
+	ret_val = hw->mac.ops.write_iosf_sb_reg(hw,
+				IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, reg_phy_int);
+	if (!ret_val)
+		return ret_val;
 
-	/* Setup XFI/SFI internal link */
-	return ixgbe_setup_ixfi_x550em(hw, &speed);
+	/* Setup SFI internal link. */
+	return ixgbe_setup_sfi_x550a(hw, &speed);
 }
 
 /**
@@ -1442,19 +1569,19 @@ ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, ixgbe_link_speed speed,
 	u32 reg_slice, slice_offset;
 	bool setup_linear = false;
 	u16 reg_phy_ext;
-	s32 rc;
+	s32 ret_val;
 
 	/* Check if SFP module is supported and linear */
-	rc = ixgbe_supported_sfp_modules_X550em(hw, &setup_linear);
+	ret_val = ixgbe_supported_sfp_modules_X550em(hw, &setup_linear);
 
 	/* If no SFP module present, then return success. Return success since
 	 * SFP not present error is not excepted in the setup MAC link flow.
 	 */
-	if (rc == IXGBE_ERR_SFP_NOT_PRESENT)
+	if (ret_val == IXGBE_ERR_SFP_NOT_PRESENT)
 		return 0;
 
-	if (!rc)
-		return rc;
+	if (!ret_val)
+		return ret_val;
 
 	/* Configure internal PHY for KR/KX. */
 	ixgbe_setup_kr_speed_x550em(hw, speed);
@@ -1463,10 +1590,10 @@ ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, ixgbe_link_speed speed,
 		return IXGBE_ERR_PHY_ADDR_INVALID;
 
 	/* Get external PHY device id */
-	rc = hw->phy.ops.read_reg(hw, IXGBE_CS4227_GLOBAL_ID_MSB,
+	ret_val = hw->phy.ops.read_reg(hw, IXGBE_CS4227_GLOBAL_ID_MSB,
 				  IXGBE_MDIO_ZERO_DEV_TYPE, &reg_phy_ext);
-	if (rc)
-		return rc;
+	if (ret_val)
+		return ret_val;
 
 	/* When configuring quad port CS4223, the MAC instance is part
 	 * of the slice offset.
@@ -1538,7 +1665,7 @@ static s32 ixgbe_check_link_t_X550em(struct ixgbe_hw *hw,
 				     bool link_up_wait_to_complete)
 {
 	u32 status;
-	u16 autoneg_status;
+	u16 i, autoneg_status;
 
 	if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_copper)
 		return IXGBE_ERR_CONFIG;
@@ -1550,14 +1677,18 @@ static s32 ixgbe_check_link_t_X550em(struct ixgbe_hw *hw,
 	if (status || !(*link_up))
 		return status;
 
-	 /* MAC link is up, so check external PHY link.
-	  * Read this twice back to back to indicate current status.
-	  */
-	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS,
-				      IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
-				      &autoneg_status);
-	if (status)
-		return status;
+	/* MAC link is up, so check external PHY link.
+	 * Link status is latching low, and can only be used to detect link
+	 * drop, and not the current status of the link without performing
+	 * back-to-back reads.
+	 */
+	for (i = 0; i < 2; i++) {
+		status = hw->phy.ops.read_reg(hw, MDIO_STAT1, MDIO_MMD_AN,
+					      &autoneg_status);
+
+		if (status)
+			return status;
+	}
 
 	/* If external PHY link is not up, then indicate link not up */
 	if (!(autoneg_status & IXGBE_MDIO_AUTO_NEG_LINK_STATUS))
@@ -1575,7 +1706,7 @@ ixgbe_setup_sgmii(struct ixgbe_hw *hw, __always_unused ixgbe_link_speed speed,
 		  __always_unused bool autoneg_wait_to_complete)
 {
 	struct ixgbe_mac_info *mac = &hw->mac;
-	u32 lval, sval;
+	u32 lval, sval, flx_val;
 	s32 rc;
 
 	rc = mac->ops.read_iosf_sb_reg(hw,
@@ -1609,14 +1740,55 @@ ixgbe_setup_sgmii(struct ixgbe_hw *hw, __always_unused ixgbe_link_speed speed,
 	if (rc)
 		return rc;
 
-	lval |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART;
+	rc = mac->ops.read_iosf_sb_reg(hw,
+				IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, &flx_val);
+	if (rc)
+		return rc;
+
+	rc = mac->ops.read_iosf_sb_reg(hw,
+				IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, &flx_val);
+	if (rc)
+		return rc;
+
+	flx_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK;
+	flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_1G;
+	flx_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN;
+	flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN;
+	flx_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN;
+
 	rc = mac->ops.write_iosf_sb_reg(hw,
-					IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
-					IXGBE_SB_IOSF_TARGET_KR_PHY, lval);
+				IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, flx_val);
+	if (rc)
+		return rc;
 
+	rc = ixgbe_restart_an_internal_phy_x550em(hw);
 	return rc;
 }
 
+/** ixgbe_init_mac_link_ops_X550em_a - Init mac link function pointers
+ *  @hw: pointer to hardware structure
+ **/
+static void ixgbe_init_mac_link_ops_X550em_a(struct ixgbe_hw *hw)
+{
+	struct ixgbe_mac_info *mac = &hw->mac;
+
+	switch (mac->ops.get_media_type(hw)) {
+	case ixgbe_media_type_fiber:
+		mac->ops.setup_fc = NULL;
+		mac->ops.fc_autoneg = ixgbe_fc_autoneg_fiber_x550em_a;
+		break;
+	case ixgbe_media_type_backplane:
+		mac->ops.fc_autoneg = ixgbe_fc_autoneg_backplane_x550em_a;
+		mac->ops.setup_fc = ixgbe_setup_fc_backplane_x550em_a;
+		break;
+	default:
+		break;
+	}
+}
+
 /** ixgbe_init_mac_link_ops_X550em - init mac link function pointers
  *  @hw: pointer to hardware structure
  **/
@@ -1664,6 +1836,10 @@ static void ixgbe_init_mac_link_ops_X550em(struct ixgbe_hw *hw)
 	default:
 		break;
 	}
+
+	/* Additional modification for X550em_a devices */
+	if (hw->mac.type == ixgbe_mac_x550em_a)
+		ixgbe_init_mac_link_ops_X550em_a(hw);
 }
 
 /** ixgbe_setup_sfp_modules_X550em - Setup SFP module
@@ -1740,7 +1916,7 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc)
 
 	/* Vendor alarm triggered */
 	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_CHIP_STD_INT_FLAG,
-				      IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+				      MDIO_MMD_VEND1,
 				      &reg);
 
 	if (status || !(reg & IXGBE_MDIO_GLOBAL_VEN_ALM_INT_EN))
@@ -1748,7 +1924,7 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc)
 
 	/* Vendor Auto-Neg alarm triggered or Global alarm 1 triggered */
 	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_CHIP_VEN_FLAG,
-				      IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+				      MDIO_MMD_VEND1,
 				      &reg);
 
 	if (status || !(reg & (IXGBE_MDIO_GLOBAL_AN_VEN_ALM_INT_EN |
@@ -1757,7 +1933,7 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc)
 
 	/* Global alarm triggered */
 	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_ALARM_1,
-				      IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+				      MDIO_MMD_VEND1,
 				      &reg);
 
 	if (status)
@@ -1772,7 +1948,7 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc)
 	if (reg & IXGBE_MDIO_GLOBAL_ALM_1_DEV_FAULT) {
 		/*  device fault alarm triggered */
 		status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_FAULT_MSG,
-					  IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+					  MDIO_MMD_VEND1,
 					  &reg);
 		if (status)
 			return status;
@@ -1787,14 +1963,14 @@ static s32 ixgbe_get_lasi_ext_t_x550em(struct ixgbe_hw *hw, bool *lsc)
 
 	/* Vendor alarm 2 triggered */
 	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_CHIP_STD_INT_FLAG,
-				      IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &reg);
+				      MDIO_MMD_AN, &reg);
 
 	if (status || !(reg & IXGBE_MDIO_GLOBAL_STD_ALM2_INT))
 		return status;
 
 	/* link connect/disconnect event occurred */
 	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_VENDOR_TX_ALARM2,
-				      IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &reg);
+				      MDIO_MMD_AN, &reg);
 
 	if (status)
 		return status;
@@ -1826,20 +2002,20 @@ static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw)
 
 	/* Enable link status change alarm */
 	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK,
-				      IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &reg);
+				      MDIO_MMD_AN, &reg);
 	if (status)
 		return status;
 
 	reg |= IXGBE_MDIO_PMA_TX_VEN_LASI_INT_EN;
 
 	status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_PMA_TX_VEN_LASI_INT_MASK,
-				       IXGBE_MDIO_AUTO_NEG_DEV_TYPE, reg);
+				       MDIO_MMD_AN, reg);
 	if (status)
 		return status;
 
 	/* Enable high temperature failure and global fault alarms */
 	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_MASK,
-				      IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+				      MDIO_MMD_VEND1,
 				      &reg);
 	if (status)
 		return status;
@@ -1848,14 +2024,14 @@ static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw)
 		IXGBE_MDIO_GLOBAL_INT_DEV_FAULT_EN);
 
 	status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_GLOBAL_INT_MASK,
-				       IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+				       MDIO_MMD_VEND1,
 				       reg);
 	if (status)
 		return status;
 
 	/* Enable vendor Auto-Neg alarm and Global Interrupt Mask 1 alarm */
 	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_CHIP_VEN_MASK,
-				      IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+				      MDIO_MMD_VEND1,
 				      &reg);
 	if (status)
 		return status;
@@ -1864,14 +2040,14 @@ static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw)
 		IXGBE_MDIO_GLOBAL_ALARM_1_INT);
 
 	status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_GLOBAL_INT_CHIP_VEN_MASK,
-				       IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+				       MDIO_MMD_VEND1,
 				       reg);
 	if (status)
 		return status;
 
 	/* Enable chip-wide vendor alarm */
 	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_GLOBAL_INT_CHIP_STD_MASK,
-				      IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+				      MDIO_MMD_VEND1,
 				      &reg);
 	if (status)
 		return status;
@@ -1879,7 +2055,7 @@ static s32 ixgbe_enable_lasi_ext_t_x550em(struct ixgbe_hw *hw)
 	reg |= IXGBE_MDIO_GLOBAL_VEN_ALM_INT_EN;
 
 	status = hw->phy.ops.write_reg(hw, IXGBE_MDIO_GLOBAL_INT_CHIP_STD_MASK,
-				       IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+				       MDIO_MMD_VEND1,
 				       reg);
 
 	return status;
@@ -1945,13 +2121,31 @@ static s32 ixgbe_setup_kr_speed_x550em(struct ixgbe_hw *hw,
 	if (speed & IXGBE_LINK_SPEED_1GB_FULL)
 		reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_CAP_KX;
 
-	/* Restart auto-negotiation. */
-	reg_val |= IXGBE_KRM_LINK_CTRL_1_TETH_AN_RESTART;
 	status = hw->mac.ops.write_iosf_sb_reg(hw,
 					IXGBE_KRM_LINK_CTRL_1(hw->bus.lan_id),
 					IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
 
-	return status;
+	if (hw->mac.type == ixgbe_mac_x550em_a) {
+		/* Set lane mode  to KR auto negotiation */
+		status = hw->mac.ops.read_iosf_sb_reg(hw,
+				IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, &reg_val);
+
+		if (status)
+			return status;
+
+		reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_MASK;
+		reg_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_SPEED_AN;
+		reg_val |= IXGBE_KRM_PMD_FLX_MASK_ST20_AN_EN;
+		reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_AN37_EN;
+		reg_val &= ~IXGBE_KRM_PMD_FLX_MASK_ST20_SGMII_EN;
+
+		status = hw->mac.ops.write_iosf_sb_reg(hw,
+				IXGBE_KRM_PMD_FLX_MASK_ST20(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, reg_val);
+	}
+
+	return ixgbe_restart_an_internal_phy_x550em(hw);
 }
 
 /** ixgbe_setup_kx4_x550em - Configure the KX4 PHY.
@@ -2020,14 +2214,12 @@ static s32 ixgbe_ext_phy_t_x550em_get_link(struct ixgbe_hw *hw, bool *link_up)
 	*link_up = false;
 
 	/* read this twice back to back to indicate current status */
-	ret = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS,
-				   IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+	ret = hw->phy.ops.read_reg(hw, MDIO_STAT1, MDIO_MMD_AN,
 				   &autoneg_status);
 	if (ret)
 		return ret;
 
-	ret = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS,
-				   IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+	ret = hw->phy.ops.read_reg(hw, MDIO_STAT1, MDIO_MMD_AN,
 				   &autoneg_status);
 	if (ret)
 		return ret;
@@ -2073,7 +2265,7 @@ static s32 ixgbe_setup_internal_phy_t_x550em(struct ixgbe_hw *hw)
 		return 0;
 
 	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_VENDOR_STAT,
-				      IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+				      MDIO_MMD_AN,
 				      &speed);
 	if (status)
 		return status;
@@ -2134,10 +2326,10 @@ static s32 ixgbe_led_on_t_x550em(struct ixgbe_hw *hw, u32 led_idx)
 
 	/* To turn on the LED, set mode to ON. */
 	hw->phy.ops.read_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx,
-			     IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, &phy_data);
+			     MDIO_MMD_VEND1, &phy_data);
 	phy_data |= IXGBE_X557_LED_MANUAL_SET_MASK;
 	hw->phy.ops.write_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx,
-			      IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, phy_data);
+			      MDIO_MMD_VEND1, phy_data);
 
 	return 0;
 }
@@ -2156,10 +2348,10 @@ static s32 ixgbe_led_off_t_x550em(struct ixgbe_hw *hw, u32 led_idx)
 
 	/* To turn on the LED, set mode to ON. */
 	hw->phy.ops.read_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx,
-			     IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, &phy_data);
+			     MDIO_MMD_VEND1, &phy_data);
 	phy_data &= ~IXGBE_X557_LED_MANUAL_SET_MASK;
 	hw->phy.ops.write_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx,
-			      IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, phy_data);
+			      MDIO_MMD_VEND1, phy_data);
 
 	return 0;
 }
@@ -2180,7 +2372,7 @@ static s32 ixgbe_get_lcd_t_x550em(struct ixgbe_hw *hw,
 	*lcd_speed = IXGBE_LINK_SPEED_UNKNOWN;
 
 	status = hw->phy.ops.read_reg(hw, IXGBE_AUTO_NEG_LP_STATUS,
-				      IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+				      MDIO_MMD_AN,
 				      &an_lp_status);
 	if (status)
 		return status;
@@ -2281,6 +2473,90 @@ static s32 ixgbe_setup_fc_x550em(struct ixgbe_hw *hw)
 	return rc;
 }
 
+/**
+ *  ixgbe_fc_autoneg_backplane_x550em_a - Enable flow control IEEE clause 37
+ *  @hw: pointer to hardware structure
+ **/
+static void ixgbe_fc_autoneg_backplane_x550em_a(struct ixgbe_hw *hw)
+{
+	u32 link_s1, lp_an_page_low, an_cntl_1;
+	s32 status = IXGBE_ERR_FC_NOT_NEGOTIATED;
+	ixgbe_link_speed speed;
+	bool link_up;
+
+	/* AN should have completed when the cable was plugged in.
+	 * Look for reasons to bail out.  Bail out if:
+	 * - FC autoneg is disabled, or if
+	 * - link is not up.
+	 */
+	if (hw->fc.disable_fc_autoneg) {
+		hw_err(hw, "Flow control autoneg is disabled");
+		goto out;
+	}
+
+	hw->mac.ops.check_link(hw, &speed, &link_up, false);
+	if (!link_up) {
+		hw_err(hw, "The link is down");
+		goto out;
+	}
+
+	/* Check at auto-negotiation has completed */
+	status = hw->mac.ops.read_iosf_sb_reg(hw,
+					IXGBE_KRM_LINK_S1(hw->bus.lan_id),
+					IXGBE_SB_IOSF_TARGET_KR_PHY, &link_s1);
+
+	if (status || (link_s1 & IXGBE_KRM_LINK_S1_MAC_AN_COMPLETE) == 0) {
+		hw_dbg(hw, "Auto-Negotiation did not complete\n");
+		status = IXGBE_ERR_FC_NOT_NEGOTIATED;
+		goto out;
+	}
+
+	/* Read the 10g AN autoc and LP ability registers and resolve
+	 * local flow control settings accordingly
+	 */
+	status = hw->mac.ops.read_iosf_sb_reg(hw,
+				IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, &an_cntl_1);
+
+	if (status) {
+		hw_dbg(hw, "Auto-Negotiation did not complete\n");
+		goto out;
+	}
+
+	status = hw->mac.ops.read_iosf_sb_reg(hw,
+				IXGBE_KRM_LP_BASE_PAGE_HIGH(hw->bus.lan_id),
+				IXGBE_SB_IOSF_TARGET_KR_PHY, &lp_an_page_low);
+
+	if (status) {
+		hw_dbg(hw, "Auto-Negotiation did not complete\n");
+		goto out;
+	}
+
+	status = ixgbe_negotiate_fc(hw, an_cntl_1, lp_an_page_low,
+				    IXGBE_KRM_AN_CNTL_1_SYM_PAUSE,
+				    IXGBE_KRM_AN_CNTL_1_ASM_PAUSE,
+				    IXGBE_KRM_LP_BASE_PAGE_HIGH_SYM_PAUSE,
+				    IXGBE_KRM_LP_BASE_PAGE_HIGH_ASM_PAUSE);
+
+out:
+	if (!status) {
+		hw->fc.fc_was_autonegged = true;
+	} else {
+		hw->fc.fc_was_autonegged = false;
+		hw->fc.current_mode = hw->fc.requested_mode;
+	}
+}
+
+/**
+ *  ixgbe_fc_autoneg_fiber_x550em_a - passthrough FC settings
+ *  @hw: pointer to hardware structure
+ **/
+static void ixgbe_fc_autoneg_fiber_x550em_a(struct ixgbe_hw *hw)
+{
+	hw->fc.fc_was_autonegged = false;
+	hw->fc.current_mode = hw->fc.requested_mode;
+}
+
 /** ixgbe_enter_lplu_x550em - Transition to low power states
  *  @hw: pointer to hardware structure
  *
@@ -2327,7 +2603,7 @@ static s32 ixgbe_enter_lplu_t_x550em(struct ixgbe_hw *hw)
 		return ixgbe_set_copper_phy_power(hw, false);
 
 	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_VENDOR_STAT,
-				      IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+				      MDIO_MMD_AN,
 				      &speed);
 	if (status)
 		return status;
@@ -2349,20 +2625,20 @@ static s32 ixgbe_enter_lplu_t_x550em(struct ixgbe_hw *hw)
 
 	/* Clear AN completed indication */
 	status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_VENDOR_TX_ALARM,
-				      IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+				      MDIO_MMD_AN,
 				      &autoneg_reg);
 	if (status)
 		return status;
 
-	status = hw->phy.ops.read_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG,
-				      IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+	status = hw->phy.ops.read_reg(hw, MDIO_AN_10GBT_CTRL,
+				      MDIO_MMD_AN,
 				      &an_10g_cntl_reg);
 	if (status)
 		return status;
 
 	status = hw->phy.ops.read_reg(hw,
 				      IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG,
-				      IXGBE_MDIO_AUTO_NEG_DEV_TYPE,
+				      MDIO_MMD_AN,
 				      &autoneg_reg);
 	if (status)
 		return status;
@@ -2520,7 +2796,7 @@ static s32 ixgbe_init_ext_t_x550em(struct ixgbe_hw *hw)
 
 	status = hw->phy.ops.read_reg(hw,
 				      IXGBE_MDIO_TX_VENDOR_ALARMS_3,
-				      IXGBE_MDIO_PMA_PMD_DEV_TYPE,
+				      MDIO_MMD_PMAPMD,
 				      &reg);
 	if (status)
 		return status;
@@ -2531,7 +2807,7 @@ static s32 ixgbe_init_ext_t_x550em(struct ixgbe_hw *hw)
 	if (reg & IXGBE_MDIO_TX_VENDOR_ALARMS_3_RST_MASK) {
 		status = hw->phy.ops.read_reg(hw,
 					IXGBE_MDIO_GLOBAL_RES_PR_10,
-					IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+					MDIO_MMD_VEND1,
 					&reg);
 		if (status)
 			return status;
@@ -2540,7 +2816,7 @@ static s32 ixgbe_init_ext_t_x550em(struct ixgbe_hw *hw)
 
 		status = hw->phy.ops.write_reg(hw,
 					IXGBE_MDIO_GLOBAL_RES_PR_10,
-					IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE,
+					MDIO_MMD_VEND1,
 					reg);
 		if (status)
 			return status;
@@ -2729,6 +3005,90 @@ static void ixgbe_set_source_address_pruning_X550(struct ixgbe_hw *hw,
 }
 
 /**
+ *  ixgbe_setup_fc_backplane_x550em_a - Set up flow control
+ *  @hw: pointer to hardware structure
+ *
+ *  Called at init time to set up flow control.
+ **/
+static s32 ixgbe_setup_fc_backplane_x550em_a(struct ixgbe_hw *hw)
+{
+	s32 status = 0;
+	u32 an_cntl = 0;
+
+	/* Validate the requested mode */
+	if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) {
+		hw_err(hw, "ixgbe_fc_rx_pause not valid in strict IEEE mode\n");
+		return IXGBE_ERR_INVALID_LINK_SETTINGS;
+	}
+
+	if (hw->fc.requested_mode == ixgbe_fc_default)
+		hw->fc.requested_mode = ixgbe_fc_full;
+
+	/* Set up the 1G and 10G flow control advertisement registers so the
+	 * HW will be able to do FC autoneg once the cable is plugged in.  If
+	 * we link at 10G, the 1G advertisement is harmless and vice versa.
+	 */
+	status = hw->mac.ops.read_iosf_sb_reg(hw,
+					IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
+					IXGBE_SB_IOSF_TARGET_KR_PHY, &an_cntl);
+
+	if (status) {
+		hw_dbg(hw, "Auto-Negotiation did not complete\n");
+		return status;
+	}
+
+	/* The possible values of fc.requested_mode are:
+	 * 0: Flow control is completely disabled
+	 * 1: Rx flow control is enabled (we can receive pause frames,
+	 *    but not send pause frames).
+	 * 2: Tx flow control is enabled (we can send pause frames but
+	 *    we do not support receiving pause frames).
+	 * 3: Both Rx and Tx flow control (symmetric) are enabled.
+	 * other: Invalid.
+	 */
+	switch (hw->fc.requested_mode) {
+	case ixgbe_fc_none:
+		/* Flow control completely disabled by software override. */
+		an_cntl &= ~(IXGBE_KRM_AN_CNTL_1_SYM_PAUSE |
+			     IXGBE_KRM_AN_CNTL_1_ASM_PAUSE);
+		break;
+	case ixgbe_fc_tx_pause:
+		/* Tx Flow control is enabled, and Rx Flow control is
+		 * disabled by software override.
+		 */
+		an_cntl |= IXGBE_KRM_AN_CNTL_1_ASM_PAUSE;
+		an_cntl &= ~IXGBE_KRM_AN_CNTL_1_SYM_PAUSE;
+		break;
+	case ixgbe_fc_rx_pause:
+		/* Rx Flow control is enabled and Tx Flow control is
+		 * disabled by software override. Since there really
+		 * isn't a way to advertise that we are capable of RX
+		 * Pause ONLY, we will advertise that we support both
+		 * symmetric and asymmetric Rx PAUSE, as such we fall
+		 * through to the fc_full statement.  Later, we will
+		 * disable the adapter's ability to send PAUSE frames.
+		 */
+	case ixgbe_fc_full:
+		/* Flow control (both Rx and Tx) is enabled by SW override. */
+		an_cntl |= IXGBE_KRM_AN_CNTL_1_SYM_PAUSE |
+			   IXGBE_KRM_AN_CNTL_1_ASM_PAUSE;
+		break;
+	default:
+		hw_err(hw, "Flow control param set incorrectly\n");
+		return IXGBE_ERR_CONFIG;
+	}
+
+	status = hw->mac.ops.write_iosf_sb_reg(hw,
+					IXGBE_KRM_AN_CNTL_1(hw->bus.lan_id),
+					IXGBE_SB_IOSF_TARGET_KR_PHY, an_cntl);
+
+	/* Restart auto-negotiation. */
+	status = ixgbe_restart_an_internal_phy_x550em(hw);
+
+	return status;
+}
+
+/**
  * ixgbe_set_mux - Set mux for port 1 access with CS4227
  * @hw: pointer to hardware structure
  * @state: set mux if 1, clear if 0
@@ -2934,6 +3294,7 @@ static const struct ixgbe_mac_operations mac_ops_X550 = {
 	X550_COMMON_MAC
 	.led_on			= ixgbe_led_on_generic,
 	.led_off		= ixgbe_led_off_generic,
+	.init_led_link_act	= ixgbe_init_led_link_act_generic,
 	.reset_hw		= &ixgbe_reset_hw_X540,
 	.get_media_type		= &ixgbe_get_media_type_X540,
 	.get_san_mac_addr	= &ixgbe_get_san_mac_addr_generic,
@@ -2948,12 +3309,14 @@ static const struct ixgbe_mac_operations mac_ops_X550 = {
 	.prot_autoc_read	= prot_autoc_read_generic,
 	.prot_autoc_write	= prot_autoc_write_generic,
 	.setup_fc		= ixgbe_setup_fc_generic,
+	.fc_autoneg		= ixgbe_fc_autoneg,
 };
 
 static const struct ixgbe_mac_operations mac_ops_X550EM_x = {
 	X550_COMMON_MAC
 	.led_on			= ixgbe_led_on_t_x550em,
 	.led_off		= ixgbe_led_off_t_x550em,
+	.init_led_link_act	= ixgbe_init_led_link_act_generic,
 	.reset_hw		= &ixgbe_reset_hw_X550em,
 	.get_media_type		= &ixgbe_get_media_type_X550em,
 	.get_san_mac_addr	= NULL,
@@ -2966,6 +3329,7 @@ static const struct ixgbe_mac_operations mac_ops_X550EM_x = {
 	.release_swfw_sync	= &ixgbe_release_swfw_sync_X550em,
 	.init_swfw_sync		= &ixgbe_init_swfw_sync_X540,
 	.setup_fc		= NULL, /* defined later */
+	.fc_autoneg		= ixgbe_fc_autoneg,
 	.read_iosf_sb_reg	= ixgbe_read_iosf_sb_reg_x550,
 	.write_iosf_sb_reg	= ixgbe_write_iosf_sb_reg_x550,
 };
@@ -2974,6 +3338,7 @@ static struct ixgbe_mac_operations mac_ops_x550em_a = {
 	X550_COMMON_MAC
 	.led_on			= ixgbe_led_on_t_x550em,
 	.led_off		= ixgbe_led_off_t_x550em,
+	.init_led_link_act	= ixgbe_init_led_link_act_generic,
 	.reset_hw		= ixgbe_reset_hw_X550em,
 	.get_media_type		= ixgbe_get_media_type_X550em,
 	.get_san_mac_addr	= NULL,
@@ -2985,6 +3350,7 @@ static struct ixgbe_mac_operations mac_ops_x550em_a = {
 	.acquire_swfw_sync	= ixgbe_acquire_swfw_sync_x550em_a,
 	.release_swfw_sync	= ixgbe_release_swfw_sync_x550em_a,
 	.setup_fc		= ixgbe_setup_fc_x550em,
+	.fc_autoneg		= ixgbe_fc_autoneg,
 	.read_iosf_sb_reg	= ixgbe_read_iosf_sb_reg_x550a,
 	.write_iosf_sb_reg	= ixgbe_write_iosf_sb_reg_x550a,
 };
@@ -3036,11 +3402,6 @@ static const struct ixgbe_phy_operations phy_ops_X550EM_x = {
 	.identify		= &ixgbe_identify_phy_x550em,
 	.read_reg		= &ixgbe_read_phy_reg_generic,
 	.write_reg		= &ixgbe_write_phy_reg_generic,
-	.read_i2c_combined	= &ixgbe_read_i2c_combined_generic,
-	.write_i2c_combined	= &ixgbe_write_i2c_combined_generic,
-	.read_i2c_combined_unlocked = &ixgbe_read_i2c_combined_generic_unlocked,
-	.write_i2c_combined_unlocked =
-				     &ixgbe_write_i2c_combined_generic_unlocked,
 };
 
 static const struct ixgbe_phy_operations phy_ops_x550em_a = {
@@ -3053,6 +3414,13 @@ static const struct ixgbe_phy_operations phy_ops_x550em_a = {
 	.write_reg_mdi		= &ixgbe_write_phy_reg_mdi,
 };
 
+static const struct ixgbe_link_operations link_ops_x550em_x = {
+	.read_link		= &ixgbe_read_i2c_combined_generic,
+	.read_link_unlocked	= &ixgbe_read_i2c_combined_generic_unlocked,
+	.write_link		= &ixgbe_write_i2c_combined_generic,
+	.write_link_unlocked	= &ixgbe_write_i2c_combined_generic_unlocked,
+};
+
 static const u32 ixgbe_mvals_X550[IXGBE_MVALS_IDX_LIMIT] = {
 	IXGBE_MVALS_INIT(X550)
 };
@@ -3083,11 +3451,12 @@ const struct ixgbe_info ixgbe_X550EM_x_info = {
 	.phy_ops		= &phy_ops_X550EM_x,
 	.mbx_ops		= &mbx_ops_generic,
 	.mvals			= ixgbe_mvals_X550EM_x,
+	.link_ops		= &link_ops_x550em_x,
 };
 
 const struct ixgbe_info ixgbe_x550em_a_info = {
 	.mac			= ixgbe_mac_x550em_a,
-	.get_invariants		= &ixgbe_get_invariants_X550_x,
+	.get_invariants		= &ixgbe_get_invariants_X550_a,
 	.mac_ops		= &mac_ops_x550em_a,
 	.eeprom_ops		= &eeprom_ops_X550EM_x,
 	.phy_ops		= &phy_ops_x550em_a,
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 7eaac3234049..6d4bef5803f2 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -1498,6 +1498,9 @@ static void ixgbevf_free_irq(struct ixgbevf_adapter *adapter)
 {
 	int i, q_vectors;
 
+	if (!adapter->msix_entries)
+		return;
+
 	q_vectors = adapter->num_msix_vectors;
 	i = q_vectors - 1;
 
@@ -2552,6 +2555,9 @@ static void ixgbevf_free_q_vectors(struct ixgbevf_adapter *adapter)
  **/
 static void ixgbevf_reset_interrupt_capability(struct ixgbevf_adapter *adapter)
 {
+	if (!adapter->msix_entries)
+		return;
+
 	pci_disable_msix(adapter->pdev);
 	kfree(adapter->msix_entries);
 	adapter->msix_entries = NULL;
@@ -3329,11 +3335,15 @@ static int ixgbevf_tso(struct ixgbevf_ring *tx_ring,
 
 	/* initialize outer IP header fields */
 	if (ip.v4->version == 4) {
+		unsigned char *csum_start = skb_checksum_start(skb);
+		unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4);
+
 		/* IP header will have to cancel out any data that
 		 * is not a part of the outer IP header
 		 */
-		ip.v4->check = csum_fold(csum_add(lco_csum(skb),
-						  csum_unfold(l4.tcp->check)));
+		ip.v4->check = csum_fold(csum_partial(trans_start,
+						      csum_start - trans_start,
+						      0));
 		type_tucmd |= IXGBE_ADVTXD_TUCMD_IPV4;
 
 		ip.v4->tot_len = 0;
@@ -3742,24 +3752,8 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu)
 	struct ixgbevf_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_hw *hw = &adapter->hw;
 	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
-	int max_possible_frame = MAXIMUM_ETHERNET_VLAN_SIZE;
 	int ret;
 
-	switch (adapter->hw.api_version) {
-	case ixgbe_mbox_api_11:
-	case ixgbe_mbox_api_12:
-		max_possible_frame = IXGBE_MAX_JUMBO_FRAME_SIZE;
-		break;
-	default:
-		if (adapter->hw.mac.type != ixgbe_mac_82599_vf)
-			max_possible_frame = IXGBE_MAX_JUMBO_FRAME_SIZE;
-		break;
-	}
-
-	/* MTU < 68 is an error and causes problems on some kernels */
-	if ((new_mtu < 68) || (max_frame > max_possible_frame))
-		return -EINVAL;
-
 	spin_lock_bh(&adapter->mbx_lock);
 	/* notify the PF of our intent to use this size of frame */
 	ret = hw->mac.ops.set_rlpml(hw, max_frame);
@@ -3810,11 +3804,10 @@ static int ixgbevf_suspend(struct pci_dev *pdev, pm_message_t state)
 		ixgbevf_free_irq(adapter);
 		ixgbevf_free_all_tx_resources(adapter);
 		ixgbevf_free_all_rx_resources(adapter);
+		ixgbevf_clear_interrupt_scheme(adapter);
 		rtnl_unlock();
 	}
 
-	ixgbevf_clear_interrupt_scheme(adapter);
-
 #ifdef CONFIG_PM
 	retval = pci_save_state(pdev);
 	if (retval)
@@ -4104,6 +4097,23 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	netdev->priv_flags |= IFF_UNICAST_FLT;
 
+	/* MTU range: 68 - 1504 or 9710 */
+	netdev->min_mtu = ETH_MIN_MTU;
+	switch (adapter->hw.api_version) {
+	case ixgbe_mbox_api_11:
+	case ixgbe_mbox_api_12:
+		netdev->max_mtu = IXGBE_MAX_JUMBO_FRAME_SIZE -
+				  (ETH_HLEN + ETH_FCS_LEN);
+		break;
+	default:
+		if (adapter->hw.mac.type != ixgbe_mac_82599_vf)
+			netdev->max_mtu = IXGBE_MAX_JUMBO_FRAME_SIZE -
+					  (ETH_HLEN + ETH_FCS_LEN);
+		else
+			netdev->max_mtu = ETH_DATA_LEN + ETH_FCS_LEN;
+		break;
+	}
+
 	if (IXGBE_REMOVED(hw->hw_addr)) {
 		err = -EIO;
 		goto err_sw_init;
diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c
index 836ebd8ee768..f9fcab54783c 100644
--- a/drivers/net/ethernet/jme.c
+++ b/drivers/net/ethernet/jme.c
@@ -2357,14 +2357,6 @@ jme_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	struct jme_adapter *jme = netdev_priv(netdev);
 
-	if (new_mtu == jme->old_mtu)
-		return 0;
-
-	if (((new_mtu + ETH_HLEN) > MAX_ETHERNET_JUMBO_PACKET_SIZE) ||
-		((new_mtu) < IPV6_MIN_MTU))
-		return -EINVAL;
-
-
 	netdev->mtu = new_mtu;
 	netdev_update_features(netdev);
 
@@ -3063,6 +3055,10 @@ jme_init_one(struct pci_dev *pdev,
 	if (using_dac)
 		netdev->features	|=	NETIF_F_HIGHDMA;
 
+	/* MTU range: 1280 - 9202*/
+	netdev->min_mtu = IPV6_MIN_MTU;
+	netdev->max_mtu = MAX_ETHERNET_JUMBO_PACKET_SIZE - ETH_HLEN;
+
 	SET_NETDEV_DEV(netdev, &pdev->dev);
 	pci_set_drvdata(pdev, netdev);
 
diff --git a/drivers/net/ethernet/korina.c b/drivers/net/ethernet/korina.c
index 1799fe1415df..cbeea915f026 100644
--- a/drivers/net/ethernet/korina.c
+++ b/drivers/net/ethernet/korina.c
@@ -1085,7 +1085,6 @@ static const struct net_device_ops korina_netdev_ops = {
 	.ndo_set_rx_mode	= korina_multicast_list,
 	.ndo_tx_timeout		= korina_tx_timeout,
 	.ndo_do_ioctl		= korina_ioctl,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/lantiq_etop.c b/drivers/net/ethernet/lantiq_etop.c
index 91e09d68b7e2..faea52da8dae 100644
--- a/drivers/net/ethernet/lantiq_etop.c
+++ b/drivers/net/ethernet/lantiq_etop.c
@@ -303,15 +303,9 @@ ltq_etop_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info)
 	strlcpy(info->version, DRV_VERSION, sizeof(info->version));
 }
 
-static int
-ltq_etop_nway_reset(struct net_device *dev)
-{
-	return phy_start_aneg(dev->phydev);
-}
-
 static const struct ethtool_ops ltq_etop_ethtool_ops = {
 	.get_drvinfo = ltq_etop_get_drvinfo,
-	.nway_reset = ltq_etop_nway_reset,
+	.nway_reset = phy_ethtool_nway_reset,
 	.get_link_ksettings = phy_ethtool_get_link_ksettings,
 	.set_link_ksettings = phy_ethtool_set_link_ksettings,
 };
@@ -519,18 +513,16 @@ ltq_etop_tx(struct sk_buff *skb, struct net_device *dev)
 static int
 ltq_etop_change_mtu(struct net_device *dev, int new_mtu)
 {
-	int ret = eth_change_mtu(dev, new_mtu);
+	struct ltq_etop_priv *priv = netdev_priv(dev);
+	unsigned long flags;
 
-	if (!ret) {
-		struct ltq_etop_priv *priv = netdev_priv(dev);
-		unsigned long flags;
+	dev->mtu = new_mtu;
 
-		spin_lock_irqsave(&priv->lock, flags);
-		ltq_etop_w32((ETOP_PLEN_UNDER << 16) | new_mtu,
-			LTQ_ETOP_IGPLEN);
-		spin_unlock_irqrestore(&priv->lock, flags);
-	}
-	return ret;
+	spin_lock_irqsave(&priv->lock, flags);
+	ltq_etop_w32((ETOP_PLEN_UNDER << 16) | new_mtu, LTQ_ETOP_IGPLEN);
+	spin_unlock_irqrestore(&priv->lock, flags);
+
+	return 0;
 }
 
 static int
@@ -704,6 +696,7 @@ ltq_etop_probe(struct platform_device *pdev)
 	priv->pldata = dev_get_platdata(&pdev->dev);
 	priv->netdev = dev;
 	spin_lock_init(&priv->lock);
+	SET_NETDEV_DEV(dev, &pdev->dev);
 
 	for (i = 0; i < MAX_DMA_CHAN; i++) {
 		if (IS_TX(i))
diff --git a/drivers/net/ethernet/marvell/Kconfig b/drivers/net/ethernet/marvell/Kconfig
index 2664827ddecd..f4b7cf18fb0f 100644
--- a/drivers/net/ethernet/marvell/Kconfig
+++ b/drivers/net/ethernet/marvell/Kconfig
@@ -5,7 +5,7 @@
 config NET_VENDOR_MARVELL
 	bool "Marvell devices"
 	default y
-	depends on PCI || CPU_PXA168 || MV64X60 || PPC32 || PLAT_ORION || INET
+	depends on PCI || CPU_PXA168 || MV64X60 || PPC32 || PLAT_ORION || INET || COMPILE_TEST
 	---help---
 	  If you have a network (Ethernet) card belonging to this class, say Y.
 
@@ -18,7 +18,8 @@ if NET_VENDOR_MARVELL
 
 config MV643XX_ETH
 	tristate "Marvell Discovery (643XX) and Orion ethernet support"
-	depends on (MV64X60 || PPC32 || PLAT_ORION) && INET
+	depends on (MV64X60 || PPC32 || PLAT_ORION || COMPILE_TEST) && INET
+	depends on HAS_DMA
 	select PHYLIB
 	select MVMDIO
 	---help---
@@ -43,6 +44,7 @@ config MVMDIO
 config MVNETA_BM_ENABLE
 	tristate "Marvell Armada 38x/XP network interface BM support"
 	depends on MVNETA
+	depends on !64BIT
 	---help---
 	  This driver supports auxiliary block of the network
 	  interface units in the Marvell ARMADA XP and ARMADA 38x SoC
@@ -54,13 +56,15 @@ config MVNETA_BM_ENABLE
 	  buffer management.
 
 config MVNETA
-	tristate "Marvell Armada 370/38x/XP network interface support"
-	depends on PLAT_ORION
+	tristate "Marvell Armada 370/38x/XP/37xx network interface support"
+	depends on ARCH_MVEBU || COMPILE_TEST
+	depends on HAS_DMA
 	select MVMDIO
 	select FIXED_PHY
 	---help---
 	  This driver supports the network interface units in the
-	  Marvell ARMADA XP, ARMADA 370 and ARMADA 38x SoC family.
+	  Marvell ARMADA XP, ARMADA 370, ARMADA 38x and
+	  ARMADA 37xx SoC family.
 
 	  Note that this driver is distinct from the mv643xx_eth
 	  driver, which should be used for the older Marvell SoCs
@@ -68,16 +72,20 @@ config MVNETA
 
 config MVNETA_BM
 	tristate
+	depends on !64BIT
 	default y if MVNETA=y && MVNETA_BM_ENABLE!=n
 	default MVNETA_BM_ENABLE
 	select HWBM
+	select GENERIC_ALLOCATOR
 	help
 	  MVNETA_BM must not be 'm' if MVNETA=y, so this symbol ensures
 	  that all dependencies are met.
 
 config MVPP2
 	tristate "Marvell Armada 375 network interface support"
-	depends on MACH_ARMADA_375
+	depends on MACH_ARMADA_375 || COMPILE_TEST
+	depends on HAS_DMA
+	depends on !64BIT
 	select MVMDIO
 	---help---
 	  This driver supports the network interface units in the
diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index 5b12022adf1f..5f62c3d70df9 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -384,8 +384,6 @@ struct mv643xx_eth_private {
 
 	struct net_device *dev;
 
-	struct phy_device *phy;
-
 	struct timer_list mib_counters_timer;
 	spinlock_t mib_counters_lock;
 	struct mib_counters mib_counters;
@@ -1236,7 +1234,7 @@ static void mv643xx_eth_adjust_link(struct net_device *dev)
 		     DISABLE_AUTO_NEG_FOR_FLOW_CTRL |
 		     DISABLE_AUTO_NEG_FOR_DUPLEX;
 
-	if (mp->phy->autoneg == AUTONEG_ENABLE) {
+	if (dev->phydev->autoneg == AUTONEG_ENABLE) {
 		/* enable auto negotiation */
 		pscr &= ~autoneg_disable;
 		goto out_write;
@@ -1244,7 +1242,7 @@ static void mv643xx_eth_adjust_link(struct net_device *dev)
 
 	pscr |= autoneg_disable;
 
-	if (mp->phy->speed == SPEED_1000) {
+	if (dev->phydev->speed == SPEED_1000) {
 		/* force gigabit, half duplex not supported */
 		pscr |= SET_GMII_SPEED_TO_1000;
 		pscr |= SET_FULL_DUPLEX_MODE;
@@ -1253,12 +1251,12 @@ static void mv643xx_eth_adjust_link(struct net_device *dev)
 
 	pscr &= ~SET_GMII_SPEED_TO_1000;
 
-	if (mp->phy->speed == SPEED_100)
+	if (dev->phydev->speed == SPEED_100)
 		pscr |= SET_MII_SPEED_TO_100;
 	else
 		pscr &= ~SET_MII_SPEED_TO_100;
 
-	if (mp->phy->duplex == DUPLEX_FULL)
+	if (dev->phydev->duplex == DUPLEX_FULL)
 		pscr |= SET_FULL_DUPLEX_MODE;
 	else
 		pscr &= ~SET_FULL_DUPLEX_MODE;
@@ -1499,55 +1497,69 @@ static const struct mv643xx_eth_stats mv643xx_eth_stats[] = {
 };
 
 static int
-mv643xx_eth_get_settings_phy(struct mv643xx_eth_private *mp,
-			     struct ethtool_cmd *cmd)
+mv643xx_eth_get_link_ksettings_phy(struct mv643xx_eth_private *mp,
+				   struct ethtool_link_ksettings *cmd)
 {
+	struct net_device *dev = mp->dev;
 	int err;
+	u32 supported, advertising;
 
-	err = phy_read_status(mp->phy);
+	err = phy_read_status(dev->phydev);
 	if (err == 0)
-		err = phy_ethtool_gset(mp->phy, cmd);
+		err = phy_ethtool_ksettings_get(dev->phydev, cmd);
 
 	/*
 	 * The MAC does not support 1000baseT_Half.
 	 */
-	cmd->supported &= ~SUPPORTED_1000baseT_Half;
-	cmd->advertising &= ~ADVERTISED_1000baseT_Half;
+	ethtool_convert_link_mode_to_legacy_u32(&supported,
+						cmd->link_modes.supported);
+	ethtool_convert_link_mode_to_legacy_u32(&advertising,
+						cmd->link_modes.advertising);
+	supported &= ~SUPPORTED_1000baseT_Half;
+	advertising &= ~ADVERTISED_1000baseT_Half;
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						supported);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+						advertising);
 
 	return err;
 }
 
 static int
-mv643xx_eth_get_settings_phyless(struct mv643xx_eth_private *mp,
-				 struct ethtool_cmd *cmd)
+mv643xx_eth_get_link_ksettings_phyless(struct mv643xx_eth_private *mp,
+				       struct ethtool_link_ksettings *cmd)
 {
 	u32 port_status;
+	u32 supported, advertising;
 
 	port_status = rdlp(mp, PORT_STATUS);
 
-	cmd->supported = SUPPORTED_MII;
-	cmd->advertising = ADVERTISED_MII;
+	supported = SUPPORTED_MII;
+	advertising = ADVERTISED_MII;
 	switch (port_status & PORT_SPEED_MASK) {
 	case PORT_SPEED_10:
-		ethtool_cmd_speed_set(cmd, SPEED_10);
+		cmd->base.speed = SPEED_10;
 		break;
 	case PORT_SPEED_100:
-		ethtool_cmd_speed_set(cmd, SPEED_100);
+		cmd->base.speed = SPEED_100;
 		break;
 	case PORT_SPEED_1000:
-		ethtool_cmd_speed_set(cmd, SPEED_1000);
+		cmd->base.speed = SPEED_1000;
 		break;
 	default:
-		cmd->speed = -1;
+		cmd->base.speed = -1;
 		break;
 	}
-	cmd->duplex = (port_status & FULL_DUPLEX) ? DUPLEX_FULL : DUPLEX_HALF;
-	cmd->port = PORT_MII;
-	cmd->phy_address = 0;
-	cmd->transceiver = XCVR_INTERNAL;
-	cmd->autoneg = AUTONEG_DISABLE;
-	cmd->maxtxpkt = 1;
-	cmd->maxrxpkt = 1;
+	cmd->base.duplex = (port_status & FULL_DUPLEX) ?
+		DUPLEX_FULL : DUPLEX_HALF;
+	cmd->base.port = PORT_MII;
+	cmd->base.phy_address = 0;
+	cmd->base.autoneg = AUTONEG_DISABLE;
+
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported,
+						supported);
+	ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising,
+						advertising);
 
 	return 0;
 }
@@ -1555,23 +1567,21 @@ mv643xx_eth_get_settings_phyless(struct mv643xx_eth_private *mp,
 static void
 mv643xx_eth_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 {
-	struct mv643xx_eth_private *mp = netdev_priv(dev);
 	wol->supported = 0;
 	wol->wolopts = 0;
-	if (mp->phy)
-		phy_ethtool_get_wol(mp->phy, wol);
+	if (dev->phydev)
+		phy_ethtool_get_wol(dev->phydev, wol);
 }
 
 static int
 mv643xx_eth_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 {
-	struct mv643xx_eth_private *mp = netdev_priv(dev);
 	int err;
 
-	if (mp->phy == NULL)
+	if (!dev->phydev)
 		return -EOPNOTSUPP;
 
-	err = phy_ethtool_set_wol(mp->phy, wol);
+	err = phy_ethtool_set_wol(dev->phydev, wol);
 	/* Given that mv643xx_eth works without the marvell-specific PHY driver,
 	 * this debugging hint is useful to have.
 	 */
@@ -1581,31 +1591,38 @@ mv643xx_eth_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 }
 
 static int
-mv643xx_eth_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+mv643xx_eth_get_link_ksettings(struct net_device *dev,
+			       struct ethtool_link_ksettings *cmd)
 {
 	struct mv643xx_eth_private *mp = netdev_priv(dev);
 
-	if (mp->phy != NULL)
-		return mv643xx_eth_get_settings_phy(mp, cmd);
+	if (dev->phydev)
+		return mv643xx_eth_get_link_ksettings_phy(mp, cmd);
 	else
-		return mv643xx_eth_get_settings_phyless(mp, cmd);
+		return mv643xx_eth_get_link_ksettings_phyless(mp, cmd);
 }
 
 static int
-mv643xx_eth_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+mv643xx_eth_set_link_ksettings(struct net_device *dev,
+			       const struct ethtool_link_ksettings *cmd)
 {
-	struct mv643xx_eth_private *mp = netdev_priv(dev);
+	struct ethtool_link_ksettings c = *cmd;
+	u32 advertising;
 	int ret;
 
-	if (mp->phy == NULL)
+	if (!dev->phydev)
 		return -EINVAL;
 
 	/*
 	 * The MAC does not support 1000baseT_Half.
 	 */
-	cmd->advertising &= ~ADVERTISED_1000baseT_Half;
+	ethtool_convert_link_mode_to_legacy_u32(&advertising,
+						c.link_modes.advertising);
+	advertising &= ~ADVERTISED_1000baseT_Half;
+	ethtool_convert_legacy_u32_to_link_mode(c.link_modes.advertising,
+						advertising);
 
-	ret = phy_ethtool_sset(mp->phy, cmd);
+	ret = phy_ethtool_ksettings_set(dev->phydev, &c);
 	if (!ret)
 		mv643xx_eth_adjust_link(dev);
 	return ret;
@@ -1622,16 +1639,6 @@ static void mv643xx_eth_get_drvinfo(struct net_device *dev,
 	strlcpy(drvinfo->bus_info, "platform", sizeof(drvinfo->bus_info));
 }
 
-static int mv643xx_eth_nway_reset(struct net_device *dev)
-{
-	struct mv643xx_eth_private *mp = netdev_priv(dev);
-
-	if (mp->phy == NULL)
-		return -EINVAL;
-
-	return genphy_restart_aneg(mp->phy);
-}
-
 static int
 mv643xx_eth_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 {
@@ -1754,10 +1761,8 @@ static int mv643xx_eth_get_sset_count(struct net_device *dev, int sset)
 }
 
 static const struct ethtool_ops mv643xx_eth_ethtool_ops = {
-	.get_settings		= mv643xx_eth_get_settings,
-	.set_settings		= mv643xx_eth_set_settings,
 	.get_drvinfo		= mv643xx_eth_get_drvinfo,
-	.nway_reset		= mv643xx_eth_nway_reset,
+	.nway_reset		= phy_ethtool_nway_reset,
 	.get_link		= ethtool_op_get_link,
 	.get_coalesce		= mv643xx_eth_get_coalesce,
 	.set_coalesce		= mv643xx_eth_set_coalesce,
@@ -1769,6 +1774,8 @@ static const struct ethtool_ops mv643xx_eth_ethtool_ops = {
 	.get_ts_info		= ethtool_op_get_ts_info,
 	.get_wol                = mv643xx_eth_get_wol,
 	.set_wol                = mv643xx_eth_set_wol,
+	.get_link_ksettings	= mv643xx_eth_get_link_ksettings,
+	.set_link_ksettings	= mv643xx_eth_set_link_ksettings,
 };
 
 
@@ -2328,19 +2335,21 @@ static inline void oom_timer_wrapper(unsigned long data)
 
 static void port_start(struct mv643xx_eth_private *mp)
 {
+	struct net_device *dev = mp->dev;
 	u32 pscr;
 	int i;
 
 	/*
 	 * Perform PHY reset, if there is a PHY.
 	 */
-	if (mp->phy != NULL) {
-		struct ethtool_cmd cmd;
+	if (dev->phydev) {
+		struct ethtool_link_ksettings cmd;
 
-		mv643xx_eth_get_settings(mp->dev, &cmd);
-		phy_init_hw(mp->phy);
-		mv643xx_eth_set_settings(mp->dev, &cmd);
-		phy_start(mp->phy);
+		mv643xx_eth_get_link_ksettings(dev, &cmd);
+		phy_init_hw(dev->phydev);
+		mv643xx_eth_set_link_ksettings(
+			dev, (const struct ethtool_link_ksettings *)&cmd);
+		phy_start(dev->phydev);
 	}
 
 	/*
@@ -2352,7 +2361,7 @@ static void port_start(struct mv643xx_eth_private *mp)
 	wrlp(mp, PORT_SERIAL_CONTROL, pscr);
 
 	pscr |= DO_NOT_FORCE_LINK_FAIL;
-	if (mp->phy == NULL)
+	if (!dev->phydev)
 		pscr |= FORCE_LINK_PASS;
 	wrlp(mp, PORT_SERIAL_CONTROL, pscr);
 
@@ -2536,8 +2545,8 @@ static int mv643xx_eth_stop(struct net_device *dev)
 	del_timer_sync(&mp->rx_oom);
 
 	netif_carrier_off(dev);
-	if (mp->phy)
-		phy_stop(mp->phy);
+	if (dev->phydev)
+		phy_stop(dev->phydev);
 	free_irq(dev->irq, dev);
 
 	port_reset(mp);
@@ -2555,13 +2564,12 @@ static int mv643xx_eth_stop(struct net_device *dev)
 
 static int mv643xx_eth_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
-	struct mv643xx_eth_private *mp = netdev_priv(dev);
 	int ret;
 
-	if (mp->phy == NULL)
+	if (!dev->phydev)
 		return -ENOTSUPP;
 
-	ret = phy_mii_ioctl(mp->phy, ifr, cmd);
+	ret = phy_mii_ioctl(dev->phydev, ifr, cmd);
 	if (!ret)
 		mv643xx_eth_adjust_link(dev);
 	return ret;
@@ -2571,9 +2579,6 @@ static int mv643xx_eth_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct mv643xx_eth_private *mp = netdev_priv(dev);
 
-	if (new_mtu < 64 || new_mtu > 9500)
-		return -EINVAL;
-
 	dev->mtu = new_mtu;
 	mv643xx_eth_recalc_skb_size(mp);
 	tx_set_rate(mp, 1000000000, 16777216);
@@ -3024,7 +3029,8 @@ static struct phy_device *phy_scan(struct mv643xx_eth_private *mp,
 
 static void phy_init(struct mv643xx_eth_private *mp, int speed, int duplex)
 {
-	struct phy_device *phy = mp->phy;
+	struct net_device *dev = mp->dev;
+	struct phy_device *phy = dev->phydev;
 
 	if (speed == 0) {
 		phy->autoneg = AUTONEG_ENABLE;
@@ -3042,6 +3048,7 @@ static void phy_init(struct mv643xx_eth_private *mp, int speed, int duplex)
 
 static void init_pscr(struct mv643xx_eth_private *mp, int speed, int duplex)
 {
+	struct net_device *dev = mp->dev;
 	u32 pscr;
 
 	pscr = rdlp(mp, PORT_SERIAL_CONTROL);
@@ -3051,7 +3058,7 @@ static void init_pscr(struct mv643xx_eth_private *mp, int speed, int duplex)
 	}
 
 	pscr = MAX_RX_PACKET_9700BYTE | SERIAL_PORT_CONTROL_RESERVED;
-	if (mp->phy == NULL) {
+	if (!dev->phydev) {
 		pscr |= DISABLE_AUTO_NEG_SPEED_GMII;
 		if (speed == SPEED_1000)
 			pscr |= SET_GMII_SPEED_TO_1000;
@@ -3090,6 +3097,7 @@ static int mv643xx_eth_probe(struct platform_device *pdev)
 	struct mv643xx_eth_platform_data *pd;
 	struct mv643xx_eth_private *mp;
 	struct net_device *dev;
+	struct phy_device *phydev = NULL;
 	struct resource *res;
 	int err;
 
@@ -3146,18 +3154,18 @@ static int mv643xx_eth_probe(struct platform_device *pdev)
 
 	err = 0;
 	if (pd->phy_node) {
-		mp->phy = of_phy_connect(mp->dev, pd->phy_node,
-					 mv643xx_eth_adjust_link, 0,
-					 get_phy_mode(mp));
-		if (!mp->phy)
+		phydev = of_phy_connect(mp->dev, pd->phy_node,
+					mv643xx_eth_adjust_link, 0,
+					get_phy_mode(mp));
+		if (!phydev)
 			err = -ENODEV;
 		else
-			phy_addr_set(mp, mp->phy->mdio.addr);
+			phy_addr_set(mp, phydev->mdio.addr);
 	} else if (pd->phy_addr != MV643XX_ETH_PHY_NONE) {
-		mp->phy = phy_scan(mp, pd->phy_addr);
+		phydev = phy_scan(mp, pd->phy_addr);
 
-		if (IS_ERR(mp->phy))
-			err = PTR_ERR(mp->phy);
+		if (IS_ERR(phydev))
+			err = PTR_ERR(phydev);
 		else
 			phy_init(mp, pd->speed, pd->duplex);
 	}
@@ -3206,6 +3214,10 @@ static int mv643xx_eth_probe(struct platform_device *pdev)
 	dev->priv_flags |= IFF_UNICAST_FLT;
 	dev->gso_max_segs = MV643XX_MAX_TSO_SEGS;
 
+	/* MTU range: 64 - 9500 */
+	dev->min_mtu = 64;
+	dev->max_mtu = 9500;
+
 	if (mp->shared->win_protect)
 		wrl(mp, WINDOW_PROTECT(mp->port_num), mp->shared->win_protect);
 
@@ -3239,10 +3251,11 @@ out:
 static int mv643xx_eth_remove(struct platform_device *pdev)
 {
 	struct mv643xx_eth_private *mp = platform_get_drvdata(pdev);
+	struct net_device *dev = mp->dev;
 
 	unregister_netdev(mp->dev);
-	if (mp->phy != NULL)
-		phy_disconnect(mp->phy);
+	if (dev->phydev)
+		phy_disconnect(dev->phydev);
 	cancel_work_sync(&mp->tx_timeout_task);
 
 	if (!IS_ERR(mp->clk))
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 0c0a45af950f..e05e22705cf7 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -296,6 +296,12 @@
 /* descriptor aligned size */
 #define MVNETA_DESC_ALIGNED_SIZE	32
 
+/* Number of bytes to be taken into account by HW when putting incoming data
+ * to the buffers. It is needed in case NET_SKB_PAD exceeds maximum packet
+ * offset supported in MVNETA_RXQ_CONFIG_REG(q) registers.
+ */
+#define MVNETA_RX_PKT_OFFSET_CORRECTION		64
+
 #define MVNETA_RX_PKT_SIZE(mtu) \
 	ALIGN((mtu) + MVNETA_MH_SIZE + MVNETA_VLAN_TAG_LEN + \
 	      ETH_HLEN + ETH_FCS_LEN,			     \
@@ -391,6 +397,9 @@ struct mvneta_port {
 	spinlock_t lock;
 	bool is_stopped;
 
+	u32 cause_rx_tx;
+	struct napi_struct napi;
+
 	/* Core clock */
 	struct clk *clk;
 	/* AXI clock */
@@ -416,6 +425,10 @@ struct mvneta_port {
 	u64 ethtool_stats[ARRAY_SIZE(mvneta_statistics)];
 
 	u32 indir[MVNETA_RSS_LU_TABLE_SIZE];
+
+	/* Flags for special SoC configurations */
+	bool neta_armada3700;
+	u16 rx_offset_correction;
 };
 
 /* The mvneta_tx_desc and mvneta_rx_desc structures describe the
@@ -561,6 +574,9 @@ struct mvneta_rx_queue {
 	u32 pkts_coal;
 	u32 time_coal;
 
+	/* Virtual address of the RX buffer */
+	void  **buf_virt_addr;
+
 	/* Virtual address of the RX DMA descriptors array */
 	struct mvneta_rx_desc *descs;
 
@@ -955,14 +971,9 @@ static int mvneta_mbus_io_win_set(struct mvneta_port *pp, u32 base, u32 wsize,
 	return 0;
 }
 
-/* Assign and initialize pools for port. In case of fail
- * buffer manager will remain disabled for current port.
- */
-static int mvneta_bm_port_init(struct platform_device *pdev,
-			       struct mvneta_port *pp)
+static  int mvneta_bm_port_mbus_init(struct mvneta_port *pp)
 {
-	struct device_node *dn = pdev->dev.of_node;
-	u32 long_pool_id, short_pool_id, wsize;
+	u32 wsize;
 	u8 target, attr;
 	int err;
 
@@ -981,6 +992,25 @@ static int mvneta_bm_port_init(struct platform_device *pdev,
 		netdev_info(pp->dev, "fail to configure mbus window to BM\n");
 		return err;
 	}
+	return 0;
+}
+
+/* Assign and initialize pools for port. In case of fail
+ * buffer manager will remain disabled for current port.
+ */
+static int mvneta_bm_port_init(struct platform_device *pdev,
+			       struct mvneta_port *pp)
+{
+	struct device_node *dn = pdev->dev.of_node;
+	u32 long_pool_id, short_pool_id;
+
+	if (!pp->neta_armada3700) {
+		int ret;
+
+		ret = mvneta_bm_port_mbus_init(pp);
+		if (ret)
+			return ret;
+	}
 
 	if (of_property_read_u32(dn, "bm,pool-long", &long_pool_id)) {
 		netdev_info(pp->dev, "missing long pool id\n");
@@ -1349,22 +1379,27 @@ static void mvneta_defaults_set(struct mvneta_port *pp)
 	for_each_present_cpu(cpu) {
 		int rxq_map = 0, txq_map = 0;
 		int rxq, txq;
+		if (!pp->neta_armada3700) {
+			for (rxq = 0; rxq < rxq_number; rxq++)
+				if ((rxq % max_cpu) == cpu)
+					rxq_map |= MVNETA_CPU_RXQ_ACCESS(rxq);
+
+			for (txq = 0; txq < txq_number; txq++)
+				if ((txq % max_cpu) == cpu)
+					txq_map |= MVNETA_CPU_TXQ_ACCESS(txq);
+
+			/* With only one TX queue we configure a special case
+			 * which will allow to get all the irq on a single
+			 * CPU
+			 */
+			if (txq_number == 1)
+				txq_map = (cpu == pp->rxq_def) ?
+					MVNETA_CPU_TXQ_ACCESS(1) : 0;
 
-		for (rxq = 0; rxq < rxq_number; rxq++)
-			if ((rxq % max_cpu) == cpu)
-				rxq_map |= MVNETA_CPU_RXQ_ACCESS(rxq);
-
-		for (txq = 0; txq < txq_number; txq++)
-			if ((txq % max_cpu) == cpu)
-				txq_map |= MVNETA_CPU_TXQ_ACCESS(txq);
-
-		/* With only one TX queue we configure a special case
-		 * which will allow to get all the irq on a single
-		 * CPU
-		 */
-		if (txq_number == 1)
-			txq_map = (cpu == pp->rxq_def) ?
-				MVNETA_CPU_TXQ_ACCESS(1) : 0;
+		} else {
+			txq_map = MVNETA_CPU_TXQ_ACCESS_ALL_MASK;
+			rxq_map = MVNETA_CPU_RXQ_ACCESS_ALL_MASK;
+		}
 
 		mvreg_write(pp, MVNETA_CPU_MAP(cpu), rxq_map | txq_map);
 	}
@@ -1573,10 +1608,14 @@ static void mvneta_tx_done_pkts_coal_set(struct mvneta_port *pp,
 
 /* Handle rx descriptor fill by setting buf_cookie and buf_phys_addr */
 static void mvneta_rx_desc_fill(struct mvneta_rx_desc *rx_desc,
-				u32 phys_addr, u32 cookie)
+				u32 phys_addr, void *virt_addr,
+				struct mvneta_rx_queue *rxq)
 {
-	rx_desc->buf_cookie = cookie;
+	int i;
+
 	rx_desc->buf_phys_addr = phys_addr;
+	i = rx_desc - rxq->descs;
+	rxq->buf_virt_addr[i] = virt_addr;
 }
 
 /* Decrement sent descriptors counter */
@@ -1781,7 +1820,8 @@ EXPORT_SYMBOL_GPL(mvneta_frag_free);
 
 /* Refill processing for SW buffer management */
 static int mvneta_rx_refill(struct mvneta_port *pp,
-			    struct mvneta_rx_desc *rx_desc)
+			    struct mvneta_rx_desc *rx_desc,
+			    struct mvneta_rx_queue *rxq)
 
 {
 	dma_addr_t phys_addr;
@@ -1799,7 +1839,8 @@ static int mvneta_rx_refill(struct mvneta_port *pp,
 		return -ENOMEM;
 	}
 
-	mvneta_rx_desc_fill(rx_desc, phys_addr, (u32)data);
+	phys_addr += pp->rx_offset_correction;
+	mvneta_rx_desc_fill(rx_desc, phys_addr, data, rxq);
 	return 0;
 }
 
@@ -1861,7 +1902,7 @@ static void mvneta_rxq_drop_pkts(struct mvneta_port *pp,
 
 	for (i = 0; i < rxq->size; i++) {
 		struct mvneta_rx_desc *rx_desc = rxq->descs + i;
-		void *data = (void *)rx_desc->buf_cookie;
+		void *data = rxq->buf_virt_addr[i];
 
 		dma_unmap_single(pp->dev->dev.parent, rx_desc->buf_phys_addr,
 				 MVNETA_RX_BUF_SIZE(pp->pkt_size), DMA_FROM_DEVICE);
@@ -1894,12 +1935,13 @@ static int mvneta_rx_swbm(struct mvneta_port *pp, int rx_todo,
 		unsigned char *data;
 		dma_addr_t phys_addr;
 		u32 rx_status, frag_size;
-		int rx_bytes, err;
+		int rx_bytes, err, index;
 
 		rx_done++;
 		rx_status = rx_desc->status;
 		rx_bytes = rx_desc->data_size - (ETH_FCS_LEN + MVNETA_MH_SIZE);
-		data = (unsigned char *)rx_desc->buf_cookie;
+		index = rx_desc - rxq->descs;
+		data = rxq->buf_virt_addr[index];
 		phys_addr = rx_desc->buf_phys_addr;
 
 		if (!mvneta_rxq_desc_is_first_last(rx_status) ||
@@ -1918,7 +1960,7 @@ err_drop_frame:
 				goto err_drop_frame;
 
 			dma_sync_single_range_for_cpu(dev->dev.parent,
-						      rx_desc->buf_phys_addr,
+						      phys_addr,
 						      MVNETA_MH_SIZE + NET_SKB_PAD,
 						      rx_bytes,
 						      DMA_FROM_DEVICE);
@@ -1938,7 +1980,7 @@ err_drop_frame:
 		}
 
 		/* Refill processing */
-		err = mvneta_rx_refill(pp, rx_desc);
+		err = mvneta_rx_refill(pp, rx_desc, rxq);
 		if (err) {
 			netdev_err(dev, "Linux processing - Can't refill\n");
 			rxq->missed++;
@@ -2020,7 +2062,7 @@ static int mvneta_rx_hwbm(struct mvneta_port *pp, int rx_todo,
 		rx_done++;
 		rx_status = rx_desc->status;
 		rx_bytes = rx_desc->data_size - (ETH_FCS_LEN + MVNETA_MH_SIZE);
-		data = (unsigned char *)rx_desc->buf_cookie;
+		data = (u8 *)(uintptr_t)rx_desc->buf_cookie;
 		phys_addr = rx_desc->buf_phys_addr;
 		pool_id = MVNETA_RX_GET_BM_POOL_ID(rx_desc);
 		bm_pool = &pp->bm_priv->bm_pools[pool_id];
@@ -2610,6 +2652,17 @@ static void mvneta_set_rx_mode(struct net_device *dev)
 /* Interrupt handling - the callback for request_irq() */
 static irqreturn_t mvneta_isr(int irq, void *dev_id)
 {
+	struct mvneta_port *pp = (struct mvneta_port *)dev_id;
+
+	mvreg_write(pp, MVNETA_INTR_NEW_MASK, 0);
+	napi_schedule(&pp->napi);
+
+	return IRQ_HANDLED;
+}
+
+/* Interrupt handling - the callback for request_percpu_irq() */
+static irqreturn_t mvneta_percpu_isr(int irq, void *dev_id)
+{
 	struct mvneta_pcpu_port *port = (struct mvneta_pcpu_port *)dev_id;
 
 	disable_percpu_irq(port->pp->dev->irq);
@@ -2657,7 +2710,7 @@ static int mvneta_poll(struct napi_struct *napi, int budget)
 	struct mvneta_pcpu_port *port = this_cpu_ptr(pp->ports);
 
 	if (!netif_running(pp->dev)) {
-		napi_complete(&port->napi);
+		napi_complete(napi);
 		return rx_done;
 	}
 
@@ -2686,7 +2739,8 @@ static int mvneta_poll(struct napi_struct *napi, int budget)
 	 */
 	rx_queue = fls(((cause_rx_tx >> 8) & 0xff));
 
-	cause_rx_tx |= port->cause_rx_tx;
+	cause_rx_tx |= pp->neta_armada3700 ? pp->cause_rx_tx :
+		port->cause_rx_tx;
 
 	if (rx_queue) {
 		rx_queue = rx_queue - 1;
@@ -2700,11 +2754,27 @@ static int mvneta_poll(struct napi_struct *napi, int budget)
 
 	if (budget > 0) {
 		cause_rx_tx = 0;
-		napi_complete(&port->napi);
-		enable_percpu_irq(pp->dev->irq, 0);
+		napi_complete(napi);
+
+		if (pp->neta_armada3700) {
+			unsigned long flags;
+
+			local_irq_save(flags);
+			mvreg_write(pp, MVNETA_INTR_NEW_MASK,
+				    MVNETA_RX_INTR_MASK(rxq_number) |
+				    MVNETA_TX_INTR_MASK(txq_number) |
+				    MVNETA_MISCINTR_INTR_MASK);
+			local_irq_restore(flags);
+		} else {
+			enable_percpu_irq(pp->dev->irq, 0);
+		}
 	}
 
-	port->cause_rx_tx = cause_rx_tx;
+	if (pp->neta_armada3700)
+		pp->cause_rx_tx = cause_rx_tx;
+	else
+		port->cause_rx_tx = cause_rx_tx;
+
 	return rx_done;
 }
 
@@ -2716,7 +2786,7 @@ static int mvneta_rxq_fill(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
 
 	for (i = 0; i < num; i++) {
 		memset(rxq->descs + i, 0, sizeof(struct mvneta_rx_desc));
-		if (mvneta_rx_refill(pp, rxq->descs + i) != 0) {
+		if (mvneta_rx_refill(pp, rxq->descs + i, rxq) != 0) {
 			netdev_err(pp->dev, "%s:rxq %d, %d of %d buffs  filled\n",
 				__func__, rxq->id, i, num);
 			break;
@@ -2773,7 +2843,7 @@ static int mvneta_rxq_init(struct mvneta_port *pp,
 	mvreg_write(pp, MVNETA_RXQ_SIZE_REG(rxq->id), rxq->size);
 
 	/* Set Offset */
-	mvneta_rxq_offset_set(pp, rxq, NET_SKB_PAD);
+	mvneta_rxq_offset_set(pp, rxq, NET_SKB_PAD - pp->rx_offset_correction);
 
 	/* Set coalescing pkts and time */
 	mvneta_rx_pkts_coal_set(pp, rxq, rxq->pkts_coal);
@@ -2784,14 +2854,14 @@ static int mvneta_rxq_init(struct mvneta_port *pp,
 		mvneta_rxq_buf_size_set(pp, rxq,
 					MVNETA_RX_BUF_SIZE(pp->pkt_size));
 		mvneta_rxq_bm_disable(pp, rxq);
+		mvneta_rxq_fill(pp, rxq, rxq->size);
 	} else {
 		mvneta_rxq_bm_enable(pp, rxq);
 		mvneta_rxq_long_pool_set(pp, rxq);
 		mvneta_rxq_short_pool_set(pp, rxq);
+		mvneta_rxq_non_occup_desc_add(pp, rxq, rxq->size);
 	}
 
-	mvneta_rxq_fill(pp, rxq, rxq->size);
-
 	return 0;
 }
 
@@ -2974,11 +3044,16 @@ static void mvneta_start_dev(struct mvneta_port *pp)
 	/* start the Rx/Tx activity */
 	mvneta_port_enable(pp);
 
-	/* Enable polling on the port */
-	for_each_online_cpu(cpu) {
-		struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
+	if (!pp->neta_armada3700) {
+		/* Enable polling on the port */
+		for_each_online_cpu(cpu) {
+			struct mvneta_pcpu_port *port =
+				per_cpu_ptr(pp->ports, cpu);
 
-		napi_enable(&port->napi);
+			napi_enable(&port->napi);
+		}
+	} else {
+		napi_enable(&pp->napi);
 	}
 
 	/* Unmask interrupts. It has to be done from each CPU */
@@ -3000,10 +3075,15 @@ static void mvneta_stop_dev(struct mvneta_port *pp)
 
 	phy_stop(ndev->phydev);
 
-	for_each_online_cpu(cpu) {
-		struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
+	if (!pp->neta_armada3700) {
+		for_each_online_cpu(cpu) {
+			struct mvneta_pcpu_port *port =
+				per_cpu_ptr(pp->ports, cpu);
 
-		napi_disable(&port->napi);
+			napi_disable(&port->napi);
+		}
+	} else {
+		napi_disable(&pp->napi);
 	}
 
 	netif_carrier_off(pp->dev);
@@ -3024,29 +3104,6 @@ static void mvneta_stop_dev(struct mvneta_port *pp)
 	mvneta_rx_reset(pp);
 }
 
-/* Return positive if MTU is valid */
-static int mvneta_check_mtu_valid(struct net_device *dev, int mtu)
-{
-	if (mtu < 68) {
-		netdev_err(dev, "cannot change mtu to less than 68\n");
-		return -EINVAL;
-	}
-
-	/* 9676 == 9700 - 20 and rounding to 8 */
-	if (mtu > 9676) {
-		netdev_info(dev, "Illegal MTU value %d, round to 9676\n", mtu);
-		mtu = 9676;
-	}
-
-	if (!IS_ALIGNED(MVNETA_RX_PKT_SIZE(mtu), 8)) {
-		netdev_info(dev, "Illegal MTU value %d, rounding to %d\n",
-			mtu, ALIGN(MVNETA_RX_PKT_SIZE(mtu), 8));
-		mtu = ALIGN(MVNETA_RX_PKT_SIZE(mtu), 8);
-	}
-
-	return mtu;
-}
-
 static void mvneta_percpu_enable(void *arg)
 {
 	struct mvneta_port *pp = arg;
@@ -3067,9 +3124,11 @@ static int mvneta_change_mtu(struct net_device *dev, int mtu)
 	struct mvneta_port *pp = netdev_priv(dev);
 	int ret;
 
-	mtu = mvneta_check_mtu_valid(dev, mtu);
-	if (mtu < 0)
-		return -EINVAL;
+	if (!IS_ALIGNED(MVNETA_RX_PKT_SIZE(mtu), 8)) {
+		netdev_info(dev, "Illegal MTU value %d, rounding to %d\n",
+			    mtu, ALIGN(MVNETA_RX_PKT_SIZE(mtu), 8));
+		mtu = ALIGN(MVNETA_RX_PKT_SIZE(mtu), 8);
+	}
 
 	dev->mtu = mtu;
 
@@ -3434,31 +3493,37 @@ static int mvneta_open(struct net_device *dev)
 		goto err_cleanup_rxqs;
 
 	/* Connect to port interrupt line */
-	ret = request_percpu_irq(pp->dev->irq, mvneta_isr,
-				 MVNETA_DRIVER_NAME, pp->ports);
+	if (pp->neta_armada3700)
+		ret = request_irq(pp->dev->irq, mvneta_isr, 0,
+				  dev->name, pp);
+	else
+		ret = request_percpu_irq(pp->dev->irq, mvneta_percpu_isr,
+					 dev->name, pp->ports);
 	if (ret) {
 		netdev_err(pp->dev, "cannot request irq %d\n", pp->dev->irq);
 		goto err_cleanup_txqs;
 	}
 
-	/* Enable per-CPU interrupt on all the CPU to handle our RX
-	 * queue interrupts
-	 */
-	on_each_cpu(mvneta_percpu_enable, pp, true);
+	if (!pp->neta_armada3700) {
+		/* Enable per-CPU interrupt on all the CPU to handle our RX
+		 * queue interrupts
+		 */
+		on_each_cpu(mvneta_percpu_enable, pp, true);
 
-	pp->is_stopped = false;
-	/* Register a CPU notifier to handle the case where our CPU
-	 * might be taken offline.
-	 */
-	ret = cpuhp_state_add_instance_nocalls(online_hpstate,
-					       &pp->node_online);
-	if (ret)
-		goto err_free_irq;
+		pp->is_stopped = false;
+		/* Register a CPU notifier to handle the case where our CPU
+		 * might be taken offline.
+		 */
+		ret = cpuhp_state_add_instance_nocalls(online_hpstate,
+						       &pp->node_online);
+		if (ret)
+			goto err_free_irq;
 
-	ret = cpuhp_state_add_instance_nocalls(CPUHP_NET_MVNETA_DEAD,
-					       &pp->node_dead);
-	if (ret)
-		goto err_free_online_hp;
+		ret = cpuhp_state_add_instance_nocalls(CPUHP_NET_MVNETA_DEAD,
+						       &pp->node_dead);
+		if (ret)
+			goto err_free_online_hp;
+	}
 
 	/* In default link is down */
 	netif_carrier_off(pp->dev);
@@ -3474,13 +3539,20 @@ static int mvneta_open(struct net_device *dev)
 	return 0;
 
 err_free_dead_hp:
-	cpuhp_state_remove_instance_nocalls(CPUHP_NET_MVNETA_DEAD,
-					    &pp->node_dead);
+	if (!pp->neta_armada3700)
+		cpuhp_state_remove_instance_nocalls(CPUHP_NET_MVNETA_DEAD,
+						    &pp->node_dead);
 err_free_online_hp:
-	cpuhp_state_remove_instance_nocalls(online_hpstate, &pp->node_online);
+	if (!pp->neta_armada3700)
+		cpuhp_state_remove_instance_nocalls(online_hpstate,
+						    &pp->node_online);
 err_free_irq:
-	on_each_cpu(mvneta_percpu_disable, pp, true);
-	free_percpu_irq(pp->dev->irq, pp->ports);
+	if (pp->neta_armada3700) {
+		free_irq(pp->dev->irq, pp);
+	} else {
+		on_each_cpu(mvneta_percpu_disable, pp, true);
+		free_percpu_irq(pp->dev->irq, pp->ports);
+	}
 err_cleanup_txqs:
 	mvneta_cleanup_txqs(pp);
 err_cleanup_rxqs:
@@ -3493,23 +3565,31 @@ static int mvneta_stop(struct net_device *dev)
 {
 	struct mvneta_port *pp = netdev_priv(dev);
 
-	/* Inform that we are stopping so we don't want to setup the
-	 * driver for new CPUs in the notifiers. The code of the
-	 * notifier for CPU online is protected by the same spinlock,
-	 * so when we get the lock, the notifer work is done.
-	 */
-	spin_lock(&pp->lock);
-	pp->is_stopped = true;
-	spin_unlock(&pp->lock);
+	if (!pp->neta_armada3700) {
+		/* Inform that we are stopping so we don't want to setup the
+		 * driver for new CPUs in the notifiers. The code of the
+		 * notifier for CPU online is protected by the same spinlock,
+		 * so when we get the lock, the notifer work is done.
+		 */
+		spin_lock(&pp->lock);
+		pp->is_stopped = true;
+		spin_unlock(&pp->lock);
 
-	mvneta_stop_dev(pp);
-	mvneta_mdio_remove(pp);
+		mvneta_stop_dev(pp);
+		mvneta_mdio_remove(pp);
+
+		cpuhp_state_remove_instance_nocalls(online_hpstate,
+						    &pp->node_online);
+		cpuhp_state_remove_instance_nocalls(CPUHP_NET_MVNETA_DEAD,
+						    &pp->node_dead);
+		on_each_cpu(mvneta_percpu_disable, pp, true);
+		free_percpu_irq(dev->irq, pp->ports);
+	} else {
+		mvneta_stop_dev(pp);
+		mvneta_mdio_remove(pp);
+		free_irq(dev->irq, pp);
+	}
 
-	cpuhp_state_remove_instance_nocalls(online_hpstate, &pp->node_online);
-	cpuhp_state_remove_instance_nocalls(CPUHP_NET_MVNETA_DEAD,
-					    &pp->node_dead);
-	on_each_cpu(mvneta_percpu_disable, pp, true);
-	free_percpu_irq(dev->irq, pp->ports);
 	mvneta_cleanup_rxqs(pp);
 	mvneta_cleanup_txqs(pp);
 
@@ -3788,6 +3868,11 @@ static int mvneta_ethtool_set_rxfh(struct net_device *dev, const u32 *indir,
 				   const u8 *key, const u8 hfunc)
 {
 	struct mvneta_port *pp = netdev_priv(dev);
+
+	/* Current code for Armada 3700 doesn't support RSS features yet */
+	if (pp->neta_armada3700)
+		return -EOPNOTSUPP;
+
 	/* We require at least one supported parameter to be changed
 	 * and no change in any of the unsupported parameters
 	 */
@@ -3808,6 +3893,10 @@ static int mvneta_ethtool_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
 {
 	struct mvneta_port *pp = netdev_priv(dev);
 
+	/* Current code for Armada 3700 doesn't support RSS features yet */
+	if (pp->neta_armada3700)
+		return -EOPNOTSUPP;
+
 	if (hfunc)
 		*hfunc = ETH_RSS_HASH_TOP;
 
@@ -3832,6 +3921,7 @@ static const struct net_device_ops mvneta_netdev_ops = {
 };
 
 const struct ethtool_ops mvneta_eth_tool_ops = {
+	.nway_reset	= phy_ethtool_nway_reset,
 	.get_link       = ethtool_op_get_link,
 	.set_coalesce   = mvneta_ethtool_set_coalesce,
 	.get_coalesce   = mvneta_ethtool_get_coalesce,
@@ -3885,6 +3975,11 @@ static int mvneta_init(struct device *dev, struct mvneta_port *pp)
 		rxq->size = pp->rx_ring_size;
 		rxq->pkts_coal = MVNETA_RX_COAL_PKTS;
 		rxq->time_coal = MVNETA_RX_COAL_USEC;
+		rxq->buf_virt_addr = devm_kmalloc(pp->dev->dev.parent,
+						  rxq->size * sizeof(void *),
+						  GFP_KERNEL);
+		if (!rxq->buf_virt_addr)
+			return -ENOMEM;
 	}
 
 	return 0;
@@ -3909,16 +4004,29 @@ static void mvneta_conf_mbus_windows(struct mvneta_port *pp,
 	win_enable = 0x3f;
 	win_protect = 0;
 
-	for (i = 0; i < dram->num_cs; i++) {
-		const struct mbus_dram_window *cs = dram->cs + i;
-		mvreg_write(pp, MVNETA_WIN_BASE(i), (cs->base & 0xffff0000) |
-			    (cs->mbus_attr << 8) | dram->mbus_dram_target_id);
+	if (dram) {
+		for (i = 0; i < dram->num_cs; i++) {
+			const struct mbus_dram_window *cs = dram->cs + i;
 
-		mvreg_write(pp, MVNETA_WIN_SIZE(i),
-			    (cs->size - 1) & 0xffff0000);
+			mvreg_write(pp, MVNETA_WIN_BASE(i),
+				    (cs->base & 0xffff0000) |
+				    (cs->mbus_attr << 8) |
+				    dram->mbus_dram_target_id);
 
-		win_enable &= ~(1 << i);
-		win_protect |= 3 << (2 * i);
+			mvreg_write(pp, MVNETA_WIN_SIZE(i),
+				    (cs->size - 1) & 0xffff0000);
+
+			win_enable &= ~(1 << i);
+			win_protect |= 3 << (2 * i);
+		}
+	} else {
+		/* For Armada3700 open default 4GB Mbus window, leaving
+		 * arbitration of target/attribute to a different layer
+		 * of configuration.
+		 */
+		mvreg_write(pp, MVNETA_WIN_SIZE(0), 0xffff0000);
+		win_enable &= ~BIT(0);
+		win_protect = 3;
 	}
 
 	mvreg_write(pp, MVNETA_BASE_ADDR_ENABLE, win_enable);
@@ -4039,8 +4147,19 @@ static int mvneta_probe(struct platform_device *pdev)
 
 	pp->rxq_def = rxq_def;
 
+	/* Set RX packet offset correction for platforms, whose
+	 * NET_SKB_PAD, exceeds 64B. It should be 64B for 64-bit
+	 * platforms and 0B for 32-bit ones.
+	 */
+	pp->rx_offset_correction =
+		max(0, NET_SKB_PAD - MVNETA_RX_PKT_OFFSET_CORRECTION);
+
 	pp->indir[0] = rxq_def;
 
+	/* Get special SoC configurations */
+	if (of_device_is_compatible(dn, "marvell,armada-3700-neta"))
+		pp->neta_armada3700 = true;
+
 	pp->clk = devm_clk_get(&pdev->dev, "core");
 	if (IS_ERR(pp->clk))
 		pp->clk = devm_clk_get(&pdev->dev, NULL);
@@ -4108,7 +4227,11 @@ static int mvneta_probe(struct platform_device *pdev)
 	pp->tx_csum_limit = tx_csum_limit;
 
 	dram_target_info = mv_mbus_dram_info();
-	if (dram_target_info)
+	/* Armada3700 requires setting default configuration of Mbus
+	 * windows, however without using filled mbus_dram_target_info
+	 * structure.
+	 */
+	if (dram_target_info || pp->neta_armada3700)
 		mvneta_conf_mbus_windows(pp, dram_target_info);
 
 	pp->tx_ring_size = MVNETA_MAX_TXD;
@@ -4141,11 +4264,20 @@ static int mvneta_probe(struct platform_device *pdev)
 		goto err_netdev;
 	}
 
-	for_each_present_cpu(cpu) {
-		struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu);
+	/* Armada3700 network controller does not support per-cpu
+	 * operation, so only single NAPI should be initialized.
+	 */
+	if (pp->neta_armada3700) {
+		netif_napi_add(dev, &pp->napi, mvneta_poll, NAPI_POLL_WEIGHT);
+	} else {
+		for_each_present_cpu(cpu) {
+			struct mvneta_pcpu_port *port =
+				per_cpu_ptr(pp->ports, cpu);
 
-		netif_napi_add(dev, &port->napi, mvneta_poll, NAPI_POLL_WEIGHT);
-		port->pp = pp;
+			netif_napi_add(dev, &port->napi, mvneta_poll,
+				       NAPI_POLL_WEIGHT);
+			port->pp = pp;
+		}
 	}
 
 	dev->features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
@@ -4154,6 +4286,11 @@ static int mvneta_probe(struct platform_device *pdev)
 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
 	dev->gso_max_segs = MVNETA_MAX_TSO_SEGS;
 
+	/* MTU range: 68 - 9676 */
+	dev->min_mtu = ETH_MIN_MTU;
+	/* 9676 == 9700 - 20 and rounding to 8 */
+	dev->max_mtu = 9676;
+
 	err = register_netdev(dev);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to register\n");
@@ -4191,6 +4328,8 @@ err_clk:
 	clk_disable_unprepare(pp->clk);
 err_put_phy_node:
 	of_node_put(phy_node);
+	if (of_phy_is_fixed_link(dn))
+		of_phy_deregister_fixed_link(dn);
 err_free_irq:
 	irq_dispose_mapping(dev->irq);
 err_free_netdev:
@@ -4202,6 +4341,7 @@ err_free_netdev:
 static int mvneta_remove(struct platform_device *pdev)
 {
 	struct net_device  *dev = platform_get_drvdata(pdev);
+	struct device_node *dn = pdev->dev.of_node;
 	struct mvneta_port *pp = netdev_priv(dev);
 
 	unregister_netdev(dev);
@@ -4209,6 +4349,8 @@ static int mvneta_remove(struct platform_device *pdev)
 	clk_disable_unprepare(pp->clk);
 	free_percpu(pp->ports);
 	free_percpu(pp->stats);
+	if (of_phy_is_fixed_link(dn))
+		of_phy_deregister_fixed_link(dn);
 	irq_dispose_mapping(dev->irq);
 	of_node_put(pp->phy_node);
 	free_netdev(dev);
@@ -4225,6 +4367,7 @@ static int mvneta_remove(struct platform_device *pdev)
 static const struct of_device_id mvneta_match[] = {
 	{ .compatible = "marvell,armada-370-neta" },
 	{ .compatible = "marvell,armada-xp-neta" },
+	{ .compatible = "marvell,armada-3700-neta" },
 	{ }
 };
 MODULE_DEVICE_TABLE(of, mvneta_match);
diff --git a/drivers/net/ethernet/marvell/mvpp2.c b/drivers/net/ethernet/marvell/mvpp2.c
index 1026c452e39d..dabc5418efcc 100644
--- a/drivers/net/ethernet/marvell/mvpp2.c
+++ b/drivers/net/ethernet/marvell/mvpp2.c
@@ -5453,29 +5453,6 @@ static void mvpp2_stop_dev(struct mvpp2_port *port)
 	phy_stop(ndev->phydev);
 }
 
-/* Return positive if MTU is valid */
-static inline int mvpp2_check_mtu_valid(struct net_device *dev, int mtu)
-{
-	if (mtu < 68) {
-		netdev_err(dev, "cannot change mtu to less than 68\n");
-		return -EINVAL;
-	}
-
-	/* 9676 == 9700 - 20 and rounding to 8 */
-	if (mtu > 9676) {
-		netdev_info(dev, "illegal MTU value %d, round to 9676\n", mtu);
-		mtu = 9676;
-	}
-
-	if (!IS_ALIGNED(MVPP2_RX_PKT_SIZE(mtu), 8)) {
-		netdev_info(dev, "illegal MTU value %d, round to %d\n", mtu,
-			    ALIGN(MVPP2_RX_PKT_SIZE(mtu), 8));
-		mtu = ALIGN(MVPP2_RX_PKT_SIZE(mtu), 8);
-	}
-
-	return mtu;
-}
-
 static int mvpp2_check_ringparam_valid(struct net_device *dev,
 				       struct ethtool_ringparam *ring)
 {
@@ -5717,10 +5694,10 @@ static int mvpp2_change_mtu(struct net_device *dev, int mtu)
 	struct mvpp2_port *port = netdev_priv(dev);
 	int err;
 
-	mtu = mvpp2_check_mtu_valid(dev, mtu);
-	if (mtu < 0) {
-		err = mtu;
-		goto error;
+	if (!IS_ALIGNED(MVPP2_RX_PKT_SIZE(mtu), 8)) {
+		netdev_info(dev, "illegal MTU value %d, round to %d\n", mtu,
+			    ALIGN(MVPP2_RX_PKT_SIZE(mtu), 8));
+		mtu = ALIGN(MVPP2_RX_PKT_SIZE(mtu), 8);
 	}
 
 	if (!netif_running(dev)) {
@@ -5946,6 +5923,7 @@ static const struct net_device_ops mvpp2_netdev_ops = {
 };
 
 static const struct ethtool_ops mvpp2_eth_tool_ops = {
+	.nway_reset	= phy_ethtool_nway_reset,
 	.get_link	= ethtool_op_get_link,
 	.set_coalesce	= mvpp2_ethtool_set_coalesce,
 	.get_coalesce	= mvpp2_ethtool_get_coalesce,
@@ -6212,6 +6190,11 @@ static int mvpp2_port_probe(struct platform_device *pdev,
 	dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO;
 	dev->vlan_features |= features;
 
+	/* MTU range: 68 - 9676 */
+	dev->min_mtu = ETH_MIN_MTU;
+	/* 9676 == 9700 - 20 and rounding to 8 */
+	dev->max_mtu = 9676;
+
 	err = register_netdev(dev);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to register netdev\n");
diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c
index 5d5000c8edf1..3af2814ada23 100644
--- a/drivers/net/ethernet/marvell/pxa168_eth.c
+++ b/drivers/net/ethernet/marvell/pxa168_eth.c
@@ -1209,9 +1209,6 @@ static int pxa168_eth_change_mtu(struct net_device *dev, int mtu)
 	int retval;
 	struct pxa168_eth_private *pep = netdev_priv(dev);
 
-	if ((mtu > 9500) || (mtu < 68))
-		return -EINVAL;
-
 	dev->mtu = mtu;
 	retval = set_port_config_ext(pep);
 
@@ -1396,6 +1393,7 @@ static void pxa168_get_drvinfo(struct net_device *dev,
 
 static const struct ethtool_ops pxa168_ethtool_ops = {
 	.get_drvinfo	= pxa168_get_drvinfo,
+	.nway_reset	= phy_ethtool_nway_reset,
 	.get_link	= ethtool_op_get_link,
 	.get_ts_info	= ethtool_op_get_ts_info,
 	.get_link_ksettings = pxa168_get_link_ksettings,
@@ -1459,6 +1457,10 @@ static int pxa168_eth_probe(struct platform_device *pdev)
 	dev->base_addr = 0;
 	dev->ethtool_ops = &pxa168_ethtool_ops;
 
+	/* MTU range: 68 - 9500 */
+	dev->min_mtu = ETH_MIN_MTU;
+	dev->max_mtu = 9500;
+
 	INIT_WORK(&pep->tx_timeout_task, pxa168_eth_tx_timeout_task);
 
 	if (pdev->dev.of_node)
diff --git a/drivers/net/ethernet/marvell/skge.c b/drivers/net/ethernet/marvell/skge.c
index 7173836fe361..9146a514fb33 100644
--- a/drivers/net/ethernet/marvell/skge.c
+++ b/drivers/net/ethernet/marvell/skge.c
@@ -1048,7 +1048,7 @@ static const char *skge_pause(enum pause_status status)
 static void skge_link_up(struct skge_port *skge)
 {
 	skge_write8(skge->hw, SK_REG(skge->port, LNK_LED_REG),
-		    LED_BLK_OFF|LED_SYNC_OFF|LED_ON);
+		    LED_BLK_OFF|LED_SYNC_OFF|LED_REG_ON);
 
 	netif_carrier_on(skge->netdev);
 	netif_wake_queue(skge->netdev);
@@ -1062,7 +1062,7 @@ static void skge_link_up(struct skge_port *skge)
 
 static void skge_link_down(struct skge_port *skge)
 {
-	skge_write8(skge->hw, SK_REG(skge->port, LNK_LED_REG), LED_OFF);
+	skge_write8(skge->hw, SK_REG(skge->port, LNK_LED_REG), LED_REG_OFF);
 	netif_carrier_off(skge->netdev);
 	netif_stop_queue(skge->netdev);
 
@@ -2668,7 +2668,7 @@ static int skge_down(struct net_device *dev)
 	if (hw->ports == 1)
 		free_irq(hw->pdev->irq, hw);
 
-	skge_write8(skge->hw, SK_REG(skge->port, LNK_LED_REG), LED_OFF);
+	skge_write8(skge->hw, SK_REG(skge->port, LNK_LED_REG), LED_REG_OFF);
 	if (is_genesis(hw))
 		genesis_stop(skge);
 	else
@@ -2900,9 +2900,6 @@ static int skge_change_mtu(struct net_device *dev, int new_mtu)
 {
 	int err;
 
-	if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU)
-		return -EINVAL;
-
 	if (!netif_running(dev)) {
 		dev->mtu = new_mtu;
 		return 0;
@@ -3857,6 +3854,10 @@ static struct net_device *skge_devinit(struct skge_hw *hw, int port,
 	dev->watchdog_timeo = TX_WATCHDOG;
 	dev->irq = hw->pdev->irq;
 
+	/* MTU range: 60 - 9000 */
+	dev->min_mtu = ETH_ZLEN;
+	dev->max_mtu = ETH_JUMBO_MTU;
+
 	if (highmem)
 		dev->features |= NETIF_F_HIGHDMA;
 
diff --git a/drivers/net/ethernet/marvell/skge.h b/drivers/net/ethernet/marvell/skge.h
index a2eb34115844..3ea151ff9c43 100644
--- a/drivers/net/ethernet/marvell/skge.h
+++ b/drivers/net/ethernet/marvell/skge.h
@@ -662,8 +662,8 @@ enum {
 	LED_BLK_OFF	= 1<<4,	/* Link LED Blinking Off */
 	LED_SYNC_ON	= 1<<3,	/* Use Sync Wire to switch LED */
 	LED_SYNC_OFF	= 1<<2,	/* Disable Sync Wire Input */
-	LED_ON	= 1<<1,	/* switch LED on */
-	LED_OFF	= 1<<0,	/* switch LED off */
+	LED_REG_ON	= 1<<1,	/* switch LED on */
+	LED_REG_OFF	= 1<<0,	/* switch LED off */
 };
 
 /* Receive GMAC FIFO (YUKON) */
diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
index 941c8e2c944e..b60ad0e56a9f 100644
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c
@@ -2398,16 +2398,6 @@ static int sky2_change_mtu(struct net_device *dev, int new_mtu)
 	u16 ctl, mode;
 	u32 imask;
 
-	/* MTU size outside the spec */
-	if (new_mtu < ETH_ZLEN || new_mtu > ETH_JUMBO_MTU)
-		return -EINVAL;
-
-	/* MTU > 1500 on yukon FE and FE+ not allowed */
-	if (new_mtu > ETH_DATA_LEN &&
-	    (hw->chip_id == CHIP_ID_YUKON_FE ||
-	     hw->chip_id == CHIP_ID_YUKON_FE_P))
-		return -EINVAL;
-
 	if (!netif_running(dev)) {
 		dev->mtu = new_mtu;
 		netdev_update_features(dev);
@@ -4808,6 +4798,14 @@ static struct net_device *sky2_init_netdev(struct sky2_hw *hw, unsigned port,
 
 	dev->features |= dev->hw_features;
 
+	/* MTU range: 60 - 1500 or 9000 */
+	dev->min_mtu = ETH_ZLEN;
+	if (hw->chip_id == CHIP_ID_YUKON_FE ||
+	    hw->chip_id == CHIP_ID_YUKON_FE_P)
+		dev->max_mtu = ETH_DATA_LEN;
+	else
+		dev->max_mtu = ETH_JUMBO_MTU;
+
 	/* try to get mac address in the following order:
 	 * 1) from device tree data
 	 * 2) from internal registers set by bootloader
diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 4a62ffd7729d..3dd87889e67e 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -318,6 +318,8 @@ static int mtk_phy_connect(struct net_device *dev)
 	return 0;
 
 err_phy:
+	if (of_phy_is_fixed_link(mac->of_node))
+		of_phy_deregister_fixed_link(mac->of_node);
 	of_node_put(np);
 	dev_err(eth->dev, "%s: invalid phy\n", __func__);
 	return -EINVAL;
@@ -843,7 +845,7 @@ static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
 drop:
 	spin_unlock(&eth->page_lock);
 	stats->tx_dropped++;
-	dev_kfree_skb(skb);
+	dev_kfree_skb_any(skb);
 	return NETDEV_TX_OK;
 }
 
@@ -1923,6 +1925,8 @@ static void mtk_uninit(struct net_device *dev)
 	struct mtk_eth *eth = mac->hw;
 
 	phy_disconnect(dev->phydev);
+	if (of_phy_is_fixed_link(mac->of_node))
+		of_phy_deregister_fixed_link(mac->of_node);
 	mtk_irq_disable(eth, MTK_QDMA_INT_MASK, ~0);
 	mtk_irq_disable(eth, MTK_PDMA_INT_MASK, ~0);
 }
@@ -2243,7 +2247,6 @@ static const struct net_device_ops mtk_netdev_ops = {
 	.ndo_set_mac_address	= mtk_set_mac_address,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_do_ioctl		= mtk_do_ioctl,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_tx_timeout		= mtk_tx_timeout,
 	.ndo_get_stats64        = mtk_get_stats64,
 	.ndo_fix_features	= mtk_fix_features,
diff --git a/drivers/net/ethernet/mellanox/mlx4/Kconfig b/drivers/net/ethernet/mellanox/mlx4/Kconfig
index 5098e7f21987..22b1cc012bc9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx4/Kconfig
@@ -7,7 +7,7 @@ config MLX4_EN
 	depends on MAY_USE_DEVLINK
 	depends on PCI
 	select MLX4_CORE
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	---help---
 	  This driver supports Mellanox Technologies ConnectX Ethernet
 	  devices.
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index e36bebcab3f2..a49072b4fa52 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -2679,15 +2679,13 @@ struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev)
 	if (!mailbox)
 		return ERR_PTR(-ENOMEM);
 
-	mailbox->buf = pci_pool_alloc(mlx4_priv(dev)->cmd.pool, GFP_KERNEL,
-				      &mailbox->dma);
+	mailbox->buf = pci_pool_zalloc(mlx4_priv(dev)->cmd.pool, GFP_KERNEL,
+				       &mailbox->dma);
 	if (!mailbox->buf) {
 		kfree(mailbox);
 		return ERR_PTR(-ENOMEM);
 	}
 
-	memset(mailbox->buf, 0, MLX4_MAILBOX_SIZE);
-
 	return mailbox;
 }
 EXPORT_SYMBOL_GPL(mlx4_alloc_cmd_mailbox);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
index e3be7e44ff51..09dd3776db76 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
@@ -65,7 +65,7 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv,
 	cq->buf_size = cq->size * mdev->dev->caps.cqe_size;
 
 	cq->ring = ring;
-	cq->is_tx = mode;
+	cq->type = mode;
 	cq->vector = mdev->dev->caps.num_comp_vectors;
 
 	/* Allocate HW buffers on provided NUMA node.
@@ -104,7 +104,7 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
 	*cq->mcq.arm_db    = 0;
 	memset(cq->buf, 0, cq->buf_size);
 
-	if (cq->is_tx == RX) {
+	if (cq->type == RX) {
 		if (!mlx4_is_eq_vector_valid(mdev->dev, priv->port,
 					     cq->vector)) {
 			cq->vector = cpumask_first(priv->rx_ring[cq->ring]->affinity_mask);
@@ -127,25 +127,17 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
 		/* For TX we use the same irq per
 		ring we assigned for the RX    */
 		struct mlx4_en_cq *rx_cq;
-		int xdp_index;
-
-		/* The xdp tx irq must align with the rx ring that forwards to
-		 * it, so reindex these from 0. This should only happen when
-		 * tx_ring_num is not a multiple of rx_ring_num.
-		 */
-		xdp_index = (priv->xdp_ring_num - priv->tx_ring_num) + cq_idx;
-		if (xdp_index >= 0)
-			cq_idx = xdp_index;
+
 		cq_idx = cq_idx % priv->rx_ring_num;
 		rx_cq = priv->rx_cq[cq_idx];
 		cq->vector = rx_cq->vector;
 	}
 
-	if (!cq->is_tx)
+	if (cq->type == RX)
 		cq->size = priv->rx_ring[cq->ring]->actual_size;
 
-	if ((cq->is_tx && priv->hwtstamp_config.tx_type) ||
-	    (!cq->is_tx && priv->hwtstamp_config.rx_filter))
+	if ((cq->type != RX && priv->hwtstamp_config.tx_type) ||
+	    (cq->type == RX && priv->hwtstamp_config.rx_filter))
 		timestamp_en = 1;
 
 	err = mlx4_cq_alloc(mdev->dev, cq->size, &cq->wqres.mtt,
@@ -154,10 +146,10 @@ int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
 	if (err)
 		goto free_eq;
 
-	cq->mcq.comp  = cq->is_tx ? mlx4_en_tx_irq : mlx4_en_rx_irq;
+	cq->mcq.comp  = cq->type != RX ? mlx4_en_tx_irq : mlx4_en_rx_irq;
 	cq->mcq.event = mlx4_en_cq_event;
 
-	if (cq->is_tx)
+	if (cq->type != RX)
 		netif_tx_napi_add(cq->dev, &cq->napi, mlx4_en_poll_tx_cq,
 				  NAPI_POLL_WEIGHT);
 	else
@@ -181,7 +173,7 @@ void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq)
 
 	mlx4_free_hwq_res(mdev->dev, &cq->wqres, cq->buf_size);
 	if (mlx4_is_eq_vector_valid(mdev->dev, priv->port, cq->vector) &&
-	    cq->is_tx == RX)
+	    cq->type == RX)
 		mlx4_release_eq(priv->mdev->dev, cq->vector);
 	cq->vector = 0;
 	cq->buf_size = 0;
@@ -193,10 +185,6 @@ void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq **pcq)
 void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq)
 {
 	napi_disable(&cq->napi);
-	if (!cq->is_tx) {
-		napi_hash_del(&cq->napi);
-		synchronize_rcu();
-	}
 	netif_napi_del(&cq->napi);
 
 	mlx4_cq_free(priv->mdev->dev, &cq->mcq);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index bdda17d2ea0f..d9c9f86a30df 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -49,16 +49,19 @@
 
 static int mlx4_en_moderation_update(struct mlx4_en_priv *priv)
 {
-	int i;
+	int i, t;
 	int err = 0;
 
-	for (i = 0; i < priv->tx_ring_num; i++) {
-		priv->tx_cq[i]->moder_cnt = priv->tx_frames;
-		priv->tx_cq[i]->moder_time = priv->tx_usecs;
-		if (priv->port_up) {
-			err = mlx4_en_set_cq_moder(priv, priv->tx_cq[i]);
-			if (err)
-				return err;
+	for (t = 0 ; t < MLX4_EN_NUM_TX_TYPES; t++) {
+		for (i = 0; i < priv->tx_ring_num[t]; i++) {
+			priv->tx_cq[t][i]->moder_cnt = priv->tx_frames;
+			priv->tx_cq[t][i]->moder_time = priv->tx_usecs;
+			if (priv->port_up) {
+				err = mlx4_en_set_cq_moder(priv,
+							   priv->tx_cq[t][i]);
+				if (err)
+					return err;
+			}
 		}
 	}
 
@@ -192,6 +195,10 @@ static const char main_strings[][ETH_GSTRING_LEN] = {
 	"tx_prio_7_packets", "tx_prio_7_bytes",
 	"tx_novlan_packets", "tx_novlan_bytes",
 
+	/* xdp statistics */
+	"rx_xdp_drop",
+	"rx_xdp_tx",
+	"rx_xdp_tx_full",
 };
 
 static const char mlx4_en_test_names[][ETH_GSTRING_LEN]= {
@@ -336,8 +343,8 @@ static int mlx4_en_get_sset_count(struct net_device *dev, int sset)
 	switch (sset) {
 	case ETH_SS_STATS:
 		return bitmap_iterator_count(&it) +
-			(priv->tx_ring_num * 2) +
-			(priv->rx_ring_num * 3);
+			(priv->tx_ring_num[TX] * 2) +
+			(priv->rx_ring_num * (3 + NUM_XDP_STATS));
 	case ETH_SS_TEST:
 		return MLX4_EN_NUM_SELF_TEST - !(priv->mdev->dev->caps.flags
 					& MLX4_DEV_CAP_FLAG_UC_LOOPBACK) * 2;
@@ -360,6 +367,8 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev,
 
 	spin_lock_bh(&priv->stats_lock);
 
+	mlx4_en_fold_software_stats(dev);
+
 	for (i = 0; i < NUM_MAIN_STATS; i++, bitmap_iterator_inc(&it))
 		if (bitmap_iterator_test(&it))
 			data[index++] = ((unsigned long *)&dev->stats)[i];
@@ -397,14 +406,21 @@ static void mlx4_en_get_ethtool_stats(struct net_device *dev,
 		if (bitmap_iterator_test(&it))
 			data[index++] = ((unsigned long *)&priv->pkstats)[i];
 
-	for (i = 0; i < priv->tx_ring_num; i++) {
-		data[index++] = priv->tx_ring[i]->packets;
-		data[index++] = priv->tx_ring[i]->bytes;
+	for (i = 0; i < NUM_XDP_STATS; i++, bitmap_iterator_inc(&it))
+		if (bitmap_iterator_test(&it))
+			data[index++] = ((unsigned long *)&priv->xdp_stats)[i];
+
+	for (i = 0; i < priv->tx_ring_num[TX]; i++) {
+		data[index++] = priv->tx_ring[TX][i]->packets;
+		data[index++] = priv->tx_ring[TX][i]->bytes;
 	}
 	for (i = 0; i < priv->rx_ring_num; i++) {
 		data[index++] = priv->rx_ring[i]->packets;
 		data[index++] = priv->rx_ring[i]->bytes;
 		data[index++] = priv->rx_ring[i]->dropped;
+		data[index++] = priv->rx_ring[i]->xdp_drop;
+		data[index++] = priv->rx_ring[i]->xdp_tx;
+		data[index++] = priv->rx_ring[i]->xdp_tx_full;
 	}
 	spin_unlock_bh(&priv->stats_lock);
 
@@ -467,7 +483,13 @@ static void mlx4_en_get_strings(struct net_device *dev,
 				strcpy(data + (index++) * ETH_GSTRING_LEN,
 				       main_strings[strings]);
 
-		for (i = 0; i < priv->tx_ring_num; i++) {
+		for (i = 0; i < NUM_XDP_STATS; i++, strings++,
+		     bitmap_iterator_inc(&it))
+			if (bitmap_iterator_test(&it))
+				strcpy(data + (index++) * ETH_GSTRING_LEN,
+				       main_strings[strings]);
+
+		for (i = 0; i < priv->tx_ring_num[TX]; i++) {
 			sprintf(data + (index++) * ETH_GSTRING_LEN,
 				"tx%d_packets", i);
 			sprintf(data + (index++) * ETH_GSTRING_LEN,
@@ -480,6 +502,12 @@ static void mlx4_en_get_strings(struct net_device *dev,
 				"rx%d_bytes", i);
 			sprintf(data + (index++) * ETH_GSTRING_LEN,
 				"rx%d_dropped", i);
+			sprintf(data + (index++) * ETH_GSTRING_LEN,
+				"rx%d_xdp_drop", i);
+			sprintf(data + (index++) * ETH_GSTRING_LEN,
+				"rx%d_xdp_tx", i);
+			sprintf(data + (index++) * ETH_GSTRING_LEN,
+				"rx%d_xdp_tx_full", i);
 		}
 		break;
 	case ETH_SS_PRIV_FLAGS:
@@ -1060,7 +1088,7 @@ static int mlx4_en_set_ringparam(struct net_device *dev,
 
 	if (rx_size == (priv->port_up ? priv->rx_ring[0]->actual_size :
 					priv->rx_ring[0]->size) &&
-	    tx_size == priv->tx_ring[0]->size)
+	    tx_size == priv->tx_ring[TX][0]->size)
 		return 0;
 
 	tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
@@ -1105,7 +1133,7 @@ static void mlx4_en_get_ringparam(struct net_device *dev,
 	param->tx_max_pending = MLX4_EN_MAX_TX_SIZE;
 	param->rx_pending = priv->port_up ?
 		priv->rx_ring[0]->actual_size : priv->rx_ring[0]->size;
-	param->tx_pending = priv->tx_ring[0]->size;
+	param->tx_pending = priv->tx_ring[TX][0]->size;
 }
 
 static u32 mlx4_en_get_rxfh_indir_size(struct net_device *dev)
@@ -1710,7 +1738,7 @@ static void mlx4_en_get_channels(struct net_device *dev,
 	channel->max_tx = MLX4_EN_MAX_TX_RING_P_UP;
 
 	channel->rx_count = priv->rx_ring_num;
-	channel->tx_count = priv->tx_ring_num / MLX4_EN_NUM_UP;
+	channel->tx_count = priv->tx_ring_num[TX] / MLX4_EN_NUM_UP;
 }
 
 static int mlx4_en_set_channels(struct net_device *dev,
@@ -1721,6 +1749,7 @@ static int mlx4_en_set_channels(struct net_device *dev,
 	struct mlx4_en_port_profile new_prof;
 	struct mlx4_en_priv *tmp;
 	int port_up = 0;
+	int xdp_count;
 	int err = 0;
 
 	if (channel->other_count || channel->combined_count ||
@@ -1729,20 +1758,25 @@ static int mlx4_en_set_channels(struct net_device *dev,
 	    !channel->tx_count || !channel->rx_count)
 		return -EINVAL;
 
-	if (channel->tx_count * MLX4_EN_NUM_UP <= priv->xdp_ring_num) {
-		en_err(priv, "Minimum %d tx channels required with XDP on\n",
-		       priv->xdp_ring_num / MLX4_EN_NUM_UP + 1);
-		return -EINVAL;
-	}
-
 	tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
 	if (!tmp)
 		return -ENOMEM;
 
 	mutex_lock(&mdev->state_lock);
+	xdp_count = priv->tx_ring_num[TX_XDP] ? channel->rx_count : 0;
+	if (channel->tx_count * MLX4_EN_NUM_UP + xdp_count > MAX_TX_RINGS) {
+		err = -EINVAL;
+		en_err(priv,
+		       "Total number of TX and XDP rings (%d) exceeds the maximum supported (%d)\n",
+		       channel->tx_count * MLX4_EN_NUM_UP + xdp_count,
+		       MAX_TX_RINGS);
+		goto out;
+	}
+
 	memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile));
 	new_prof.num_tx_rings_p_up = channel->tx_count;
-	new_prof.tx_ring_num = channel->tx_count * MLX4_EN_NUM_UP;
+	new_prof.tx_ring_num[TX] = channel->tx_count * MLX4_EN_NUM_UP;
+	new_prof.tx_ring_num[TX_XDP] = xdp_count;
 	new_prof.rx_ring_num = channel->rx_count;
 
 	err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof);
@@ -1756,14 +1790,13 @@ static int mlx4_en_set_channels(struct net_device *dev,
 
 	mlx4_en_safe_replace_resources(priv, tmp);
 
-	netif_set_real_num_tx_queues(dev, priv->tx_ring_num -
-							priv->xdp_ring_num);
+	netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]);
 	netif_set_real_num_rx_queues(dev, priv->rx_ring_num);
 
 	if (dev->num_tc)
 		mlx4_en_setup_tc(dev, MLX4_EN_NUM_UP);
 
-	en_warn(priv, "Using %d TX rings\n", priv->tx_ring_num);
+	en_warn(priv, "Using %d TX rings\n", priv->tx_ring_num[TX]);
 	en_warn(priv, "Using %d RX rings\n", priv->rx_ring_num);
 
 	if (port_up) {
@@ -1774,8 +1807,8 @@ static int mlx4_en_set_channels(struct net_device *dev,
 
 	err = mlx4_en_moderation_update(priv);
 out:
-	kfree(tmp);
 	mutex_unlock(&mdev->state_lock);
+	kfree(tmp);
 	return err;
 }
 
@@ -1823,11 +1856,15 @@ static int mlx4_en_set_priv_flags(struct net_device *dev, u32 flags)
 	int ret = 0;
 
 	if (bf_enabled_new != bf_enabled_old) {
+		int t;
+
 		if (bf_enabled_new) {
 			bool bf_supported = true;
 
-			for (i = 0; i < priv->tx_ring_num; i++)
-				bf_supported &= priv->tx_ring[i]->bf_alloced;
+			for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++)
+				for (i = 0; i < priv->tx_ring_num[t]; i++)
+					bf_supported &=
+						priv->tx_ring[t][i]->bf_alloced;
 
 			if (!bf_supported) {
 				en_err(priv, "BlueFlame is not supported\n");
@@ -1839,8 +1876,10 @@ static int mlx4_en_set_priv_flags(struct net_device *dev, u32 flags)
 			priv->pflags &= ~MLX4_EN_PRIV_FLAGS_BLUEFLAME;
 		}
 
-		for (i = 0; i < priv->tx_ring_num; i++)
-			priv->tx_ring[i]->bf_enabled = bf_enabled_new;
+		for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++)
+			for (i = 0; i < priv->tx_ring_num[t]; i++)
+				priv->tx_ring[t][i]->bf_enabled =
+					bf_enabled_new;
 
 		en_info(priv, "BlueFlame %s\n",
 			bf_enabled_new ?  "Enabled" : "Disabled");
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c
index bf7628db098a..36a7a54bbb82 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c
@@ -169,7 +169,7 @@ static int mlx4_en_get_profile(struct mlx4_en_dev *mdev)
 		params->prof[i].tx_ppp = pfctx;
 		params->prof[i].tx_ring_size = MLX4_EN_DEF_TX_RING_SIZE;
 		params->prof[i].rx_ring_size = MLX4_EN_DEF_RX_RING_SIZE;
-		params->prof[i].tx_ring_num = params->num_tx_rings_p_up *
+		params->prof[i].tx_ring_num[TX] = params->num_tx_rings_p_up *
 			MLX4_EN_NUM_UP;
 		params->prof[i].rss_rings = 0;
 		params->prof[i].inline_thold = inline_thold;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index a60f635da78b..bcd955339058 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -51,6 +51,9 @@
 #include "mlx4_en.h"
 #include "en_port.h"
 
+#define MLX4_EN_MAX_XDP_MTU ((int)(PAGE_SIZE - ETH_HLEN - (2 * VLAN_HLEN) - \
+				   XDP_PACKET_HEADROOM))
+
 int mlx4_en_setup_tc(struct net_device *dev, u8 up)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
@@ -1217,8 +1220,8 @@ static void mlx4_en_netpoll(struct net_device *dev)
 	struct mlx4_en_cq *cq;
 	int i;
 
-	for (i = 0; i < priv->tx_ring_num; i++) {
-		cq = priv->tx_cq[i];
+	for (i = 0; i < priv->tx_ring_num[TX]; i++) {
+		cq = priv->tx_cq[TX][i];
 		napi_schedule(&cq->napi);
 	}
 }
@@ -1302,12 +1305,14 @@ static void mlx4_en_tx_timeout(struct net_device *dev)
 	if (netif_msg_timer(priv))
 		en_warn(priv, "Tx timeout called on port:%d\n", priv->port);
 
-	for (i = 0; i < priv->tx_ring_num; i++) {
+	for (i = 0; i < priv->tx_ring_num[TX]; i++) {
+		struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[TX][i];
+
 		if (!netif_tx_queue_stopped(netdev_get_tx_queue(dev, i)))
 			continue;
 		en_warn(priv, "TX timeout on queue: %d, QP: 0x%x, CQ: 0x%x, Cons: 0x%x, Prod: 0x%x\n",
-			i, priv->tx_ring[i]->qpn, priv->tx_ring[i]->cqn,
-			priv->tx_ring[i]->cons, priv->tx_ring[i]->prod);
+			i, tx_ring->qpn, tx_ring->sp_cqn,
+			tx_ring->cons, tx_ring->prod);
 	}
 
 	priv->port_stats.tx_timeout++;
@@ -1322,6 +1327,7 @@ mlx4_en_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 
 	spin_lock_bh(&priv->stats_lock);
+	mlx4_en_fold_software_stats(dev);
 	netdev_stats_to_stats64(stats, &dev->stats);
 	spin_unlock_bh(&priv->stats_lock);
 
@@ -1331,7 +1337,7 @@ mlx4_en_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 static void mlx4_en_set_default_moderation(struct mlx4_en_priv *priv)
 {
 	struct mlx4_en_cq *cq;
-	int i;
+	int i, t;
 
 	/* If we haven't received a specific coalescing setting
 	 * (module param), we set the moderation parameters as follows:
@@ -1356,10 +1362,12 @@ static void mlx4_en_set_default_moderation(struct mlx4_en_priv *priv)
 		priv->last_moder_bytes[i] = 0;
 	}
 
-	for (i = 0; i < priv->tx_ring_num; i++) {
-		cq = priv->tx_cq[i];
-		cq->moder_cnt = priv->tx_frames;
-		cq->moder_time = priv->tx_usecs;
+	for (t = 0 ; t < MLX4_EN_NUM_TX_TYPES; t++) {
+		for (i = 0; i < priv->tx_ring_num[t]; i++) {
+			cq = priv->tx_cq[t][i];
+			cq->moder_cnt = priv->tx_frames;
+			cq->moder_time = priv->tx_usecs;
+		}
 	}
 
 	/* Reset auto-moderation params */
@@ -1390,10 +1398,8 @@ static void mlx4_en_auto_moderation(struct mlx4_en_priv *priv)
 		return;
 
 	for (ring = 0; ring < priv->rx_ring_num; ring++) {
-		spin_lock_bh(&priv->stats_lock);
-		rx_packets = priv->rx_ring[ring]->packets;
-		rx_bytes = priv->rx_ring[ring]->bytes;
-		spin_unlock_bh(&priv->stats_lock);
+		rx_packets = READ_ONCE(priv->rx_ring[ring]->packets);
+		rx_bytes = READ_ONCE(priv->rx_ring[ring]->bytes);
 
 		rx_pkt_diff = ((unsigned long) (rx_packets -
 				priv->last_moder_packets[ring]));
@@ -1529,19 +1535,13 @@ static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx)
 static void mlx4_en_init_recycle_ring(struct mlx4_en_priv *priv,
 				      int tx_ring_idx)
 {
-	struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[tx_ring_idx];
-	int rr_index;
+	struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[TX_XDP][tx_ring_idx];
+	int rr_index = tx_ring_idx;
 
-	rr_index = (priv->xdp_ring_num - priv->tx_ring_num) + tx_ring_idx;
-	if (rr_index >= 0) {
-		tx_ring->free_tx_desc = mlx4_en_recycle_tx_desc;
-		tx_ring->recycle_ring = priv->rx_ring[rr_index];
-		en_dbg(DRV, priv,
-		       "Set tx_ring[%d]->recycle_ring = rx_ring[%d]\n",
-		       tx_ring_idx, rr_index);
-	} else {
-		tx_ring->recycle_ring = NULL;
-	}
+	tx_ring->free_tx_desc = mlx4_en_recycle_tx_desc;
+	tx_ring->recycle_ring = priv->rx_ring[rr_index];
+	en_dbg(DRV, priv, "Set tx_ring[%d][%d]->recycle_ring = rx_ring[%d]\n",
+	       TX_XDP, tx_ring_idx, rr_index);
 }
 
 int mlx4_en_start_port(struct net_device *dev)
@@ -1551,9 +1551,8 @@ int mlx4_en_start_port(struct net_device *dev)
 	struct mlx4_en_cq *cq;
 	struct mlx4_en_tx_ring *tx_ring;
 	int rx_index = 0;
-	int tx_index = 0;
 	int err = 0;
-	int i;
+	int i, t;
 	int j;
 	u8 mc_list[16] = {0};
 
@@ -1638,43 +1637,51 @@ int mlx4_en_start_port(struct net_device *dev)
 		goto rss_err;
 
 	/* Configure tx cq's and rings */
-	for (i = 0; i < priv->tx_ring_num; i++) {
-		/* Configure cq */
-		cq = priv->tx_cq[i];
-		err = mlx4_en_activate_cq(priv, cq, i);
-		if (err) {
-			en_err(priv, "Failed allocating Tx CQ\n");
-			goto tx_err;
-		}
-		err = mlx4_en_set_cq_moder(priv, cq);
-		if (err) {
-			en_err(priv, "Failed setting cq moderation parameters\n");
-			mlx4_en_deactivate_cq(priv, cq);
-			goto tx_err;
-		}
-		en_dbg(DRV, priv, "Resetting index of collapsed CQ:%d to -1\n", i);
-		cq->buf->wqe_index = cpu_to_be16(0xffff);
-
-		/* Configure ring */
-		tx_ring = priv->tx_ring[i];
-		err = mlx4_en_activate_tx_ring(priv, tx_ring, cq->mcq.cqn,
-			i / priv->num_tx_rings_p_up);
-		if (err) {
-			en_err(priv, "Failed allocating Tx ring\n");
-			mlx4_en_deactivate_cq(priv, cq);
-			goto tx_err;
-		}
-		tx_ring->tx_queue = netdev_get_tx_queue(dev, i);
+	for (t = 0 ; t < MLX4_EN_NUM_TX_TYPES; t++) {
+		u8 num_tx_rings_p_up = t == TX ? priv->num_tx_rings_p_up : 1;
 
-		mlx4_en_init_recycle_ring(priv, i);
+		for (i = 0; i < priv->tx_ring_num[t]; i++) {
+			/* Configure cq */
+			cq = priv->tx_cq[t][i];
+			err = mlx4_en_activate_cq(priv, cq, i);
+			if (err) {
+				en_err(priv, "Failed allocating Tx CQ\n");
+				goto tx_err;
+			}
+			err = mlx4_en_set_cq_moder(priv, cq);
+			if (err) {
+				en_err(priv, "Failed setting cq moderation parameters\n");
+				mlx4_en_deactivate_cq(priv, cq);
+				goto tx_err;
+			}
+			en_dbg(DRV, priv,
+			       "Resetting index of collapsed CQ:%d to -1\n", i);
+			cq->buf->wqe_index = cpu_to_be16(0xffff);
+
+			/* Configure ring */
+			tx_ring = priv->tx_ring[t][i];
+			err = mlx4_en_activate_tx_ring(priv, tx_ring,
+						       cq->mcq.cqn,
+						       i / num_tx_rings_p_up);
+			if (err) {
+				en_err(priv, "Failed allocating Tx ring\n");
+				mlx4_en_deactivate_cq(priv, cq);
+				goto tx_err;
+			}
+			if (t != TX_XDP) {
+				tx_ring->tx_queue = netdev_get_tx_queue(dev, i);
+				tx_ring->recycle_ring = NULL;
+			} else {
+				mlx4_en_init_recycle_ring(priv, i);
+			}
 
-		/* Arm CQ for TX completions */
-		mlx4_en_arm_cq(priv, cq);
+			/* Arm CQ for TX completions */
+			mlx4_en_arm_cq(priv, cq);
 
-		/* Set initial ownership of all Tx TXBBs to SW (1) */
-		for (j = 0; j < tx_ring->buf_size; j += STAMP_STRIDE)
-			*((u32 *) (tx_ring->buf + j)) = 0xffffffff;
-		++tx_index;
+			/* Set initial ownership of all Tx TXBBs to SW (1) */
+			for (j = 0; j < tx_ring->buf_size; j += STAMP_STRIDE)
+				*((u32 *)(tx_ring->buf + j)) = 0xffffffff;
+		}
 	}
 
 	/* Configure port */
@@ -1749,9 +1756,18 @@ int mlx4_en_start_port(struct net_device *dev)
 	return 0;
 
 tx_err:
-	while (tx_index--) {
-		mlx4_en_deactivate_tx_ring(priv, priv->tx_ring[tx_index]);
-		mlx4_en_deactivate_cq(priv, priv->tx_cq[tx_index]);
+	if (t == MLX4_EN_NUM_TX_TYPES) {
+		t--;
+		i = priv->tx_ring_num[t];
+	}
+	while (t >= 0) {
+		while (i--) {
+			mlx4_en_deactivate_tx_ring(priv, priv->tx_ring[t][i]);
+			mlx4_en_deactivate_cq(priv, priv->tx_cq[t][i]);
+		}
+		if (!t--)
+			break;
+		i = priv->tx_ring_num[t];
 	}
 	mlx4_en_destroy_drop_qp(priv);
 rss_err:
@@ -1776,7 +1792,7 @@ void mlx4_en_stop_port(struct net_device *dev, int detach)
 	struct mlx4_en_dev *mdev = priv->mdev;
 	struct mlx4_en_mc_list *mclist, *tmp;
 	struct ethtool_flow_id *flow, *tmp_flow;
-	int i;
+	int i, t;
 	u8 mc_list[16] = {0};
 
 	if (!priv->port_up) {
@@ -1796,8 +1812,12 @@ void mlx4_en_stop_port(struct net_device *dev, int detach)
 
 	netif_tx_disable(dev);
 
+	spin_lock_bh(&priv->stats_lock);
+	mlx4_en_fold_software_stats(dev);
 	/* Set port as not active */
 	priv->port_up = false;
+	spin_unlock_bh(&priv->stats_lock);
+
 	priv->counter_index = MLX4_SINK_COUNTER_INDEX(mdev->dev);
 
 	/* Promsicuous mode */
@@ -1862,14 +1882,17 @@ void mlx4_en_stop_port(struct net_device *dev, int detach)
 	mlx4_en_destroy_drop_qp(priv);
 
 	/* Free TX Rings */
-	for (i = 0; i < priv->tx_ring_num; i++) {
-		mlx4_en_deactivate_tx_ring(priv, priv->tx_ring[i]);
-		mlx4_en_deactivate_cq(priv, priv->tx_cq[i]);
+	for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) {
+		for (i = 0; i < priv->tx_ring_num[t]; i++) {
+			mlx4_en_deactivate_tx_ring(priv, priv->tx_ring[t][i]);
+			mlx4_en_deactivate_cq(priv, priv->tx_cq[t][i]);
+		}
 	}
 	msleep(10);
 
-	for (i = 0; i < priv->tx_ring_num; i++)
-		mlx4_en_free_tx_buf(dev, priv->tx_ring[i]);
+	for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++)
+		for (i = 0; i < priv->tx_ring_num[t]; i++)
+			mlx4_en_free_tx_buf(dev, priv->tx_ring[t][i]);
 
 	if (mdev->dev->caps.steering_mode != MLX4_STEERING_MODE_A0)
 		mlx4_en_delete_rss_steer_rules(priv);
@@ -1918,6 +1941,7 @@ static void mlx4_en_clear_stats(struct net_device *dev)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_en_tx_ring **tx_ring;
 	int i;
 
 	if (!mlx4_is_slave(mdev->dev))
@@ -1935,15 +1959,16 @@ static void mlx4_en_clear_stats(struct net_device *dev)
 	       sizeof(priv->tx_priority_flowstats));
 	memset(&priv->pf_stats, 0, sizeof(priv->pf_stats));
 
-	for (i = 0; i < priv->tx_ring_num; i++) {
-		priv->tx_ring[i]->bytes = 0;
-		priv->tx_ring[i]->packets = 0;
-		priv->tx_ring[i]->tx_csum = 0;
-		priv->tx_ring[i]->tx_dropped = 0;
-		priv->tx_ring[i]->queue_stopped = 0;
-		priv->tx_ring[i]->wake_queue = 0;
-		priv->tx_ring[i]->tso_packets = 0;
-		priv->tx_ring[i]->xmit_more = 0;
+	tx_ring = priv->tx_ring[TX];
+	for (i = 0; i < priv->tx_ring_num[TX]; i++) {
+		tx_ring[i]->bytes = 0;
+		tx_ring[i]->packets = 0;
+		tx_ring[i]->tx_csum = 0;
+		tx_ring[i]->tx_dropped = 0;
+		tx_ring[i]->queue_stopped = 0;
+		tx_ring[i]->wake_queue = 0;
+		tx_ring[i]->tso_packets = 0;
+		tx_ring[i]->xmit_more = 0;
 	}
 	for (i = 0; i < priv->rx_ring_num; i++) {
 		priv->rx_ring[i]->bytes = 0;
@@ -1999,17 +2024,20 @@ static int mlx4_en_close(struct net_device *dev)
 
 static void mlx4_en_free_resources(struct mlx4_en_priv *priv)
 {
-	int i;
+	int i, t;
 
 #ifdef CONFIG_RFS_ACCEL
 	priv->dev->rx_cpu_rmap = NULL;
 #endif
 
-	for (i = 0; i < priv->tx_ring_num; i++) {
-		if (priv->tx_ring && priv->tx_ring[i])
-			mlx4_en_destroy_tx_ring(priv, &priv->tx_ring[i]);
-		if (priv->tx_cq && priv->tx_cq[i])
-			mlx4_en_destroy_cq(priv, &priv->tx_cq[i]);
+	for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) {
+		for (i = 0; i < priv->tx_ring_num[t]; i++) {
+			if (priv->tx_ring[t] && priv->tx_ring[t][i])
+				mlx4_en_destroy_tx_ring(priv,
+							&priv->tx_ring[t][i]);
+			if (priv->tx_cq[t] && priv->tx_cq[t][i])
+				mlx4_en_destroy_cq(priv, &priv->tx_cq[t][i]);
+		}
 	}
 
 	for (i = 0; i < priv->rx_ring_num; i++) {
@@ -2025,20 +2053,22 @@ static void mlx4_en_free_resources(struct mlx4_en_priv *priv)
 static int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
 {
 	struct mlx4_en_port_profile *prof = priv->prof;
-	int i;
+	int i, t;
 	int node;
 
 	/* Create tx Rings */
-	for (i = 0; i < priv->tx_ring_num; i++) {
-		node = cpu_to_node(i % num_online_cpus());
-		if (mlx4_en_create_cq(priv, &priv->tx_cq[i],
-				      prof->tx_ring_size, i, TX, node))
-			goto err;
-
-		if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[i],
-					   prof->tx_ring_size, TXBB_SIZE,
-					   node, i))
-			goto err;
+	for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) {
+		for (i = 0; i < priv->tx_ring_num[t]; i++) {
+			node = cpu_to_node(i % num_online_cpus());
+			if (mlx4_en_create_cq(priv, &priv->tx_cq[t][i],
+					      prof->tx_ring_size, i, t, node))
+				goto err;
+
+			if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[t][i],
+						   prof->tx_ring_size,
+						   TXBB_SIZE, node, i))
+				goto err;
+		}
 	}
 
 	/* Create rx Rings */
@@ -2070,31 +2100,28 @@ err:
 		if (priv->rx_cq[i])
 			mlx4_en_destroy_cq(priv, &priv->rx_cq[i]);
 	}
-	for (i = 0; i < priv->tx_ring_num; i++) {
-		if (priv->tx_ring[i])
-			mlx4_en_destroy_tx_ring(priv, &priv->tx_ring[i]);
-		if (priv->tx_cq[i])
-			mlx4_en_destroy_cq(priv, &priv->tx_cq[i]);
+	for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) {
+		for (i = 0; i < priv->tx_ring_num[t]; i++) {
+			if (priv->tx_ring[t][i])
+				mlx4_en_destroy_tx_ring(priv,
+							&priv->tx_ring[t][i]);
+			if (priv->tx_cq[t][i])
+				mlx4_en_destroy_cq(priv, &priv->tx_cq[t][i]);
+		}
 	}
 	return -ENOMEM;
 }
 
-static void mlx4_en_shutdown(struct net_device *dev)
-{
-	rtnl_lock();
-	netif_device_detach(dev);
-	mlx4_en_close(dev);
-	rtnl_unlock();
-}
 
 static int mlx4_en_copy_priv(struct mlx4_en_priv *dst,
 			     struct mlx4_en_priv *src,
 			     struct mlx4_en_port_profile *prof)
 {
+	int t;
+
 	memcpy(&dst->hwtstamp_config, &prof->hwtstamp_config,
 	       sizeof(dst->hwtstamp_config));
 	dst->num_tx_rings_p_up = src->mdev->profile.num_tx_rings_p_up;
-	dst->tx_ring_num = prof->tx_ring_num;
 	dst->rx_ring_num = prof->rx_ring_num;
 	dst->flags = prof->flags;
 	dst->mdev = src->mdev;
@@ -2104,33 +2131,50 @@ static int mlx4_en_copy_priv(struct mlx4_en_priv *dst,
 	dst->stride = roundup_pow_of_two(sizeof(struct mlx4_en_rx_desc) +
 					 DS_SIZE * MLX4_EN_MAX_RX_FRAGS);
 
-	dst->tx_ring = kzalloc(sizeof(struct mlx4_en_tx_ring *) * MAX_TX_RINGS,
-				GFP_KERNEL);
-	if (!dst->tx_ring)
-		return -ENOMEM;
+	for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) {
+		dst->tx_ring_num[t] = prof->tx_ring_num[t];
+		if (!dst->tx_ring_num[t])
+			continue;
 
-	dst->tx_cq = kzalloc(sizeof(struct mlx4_en_cq *) * MAX_TX_RINGS,
-			      GFP_KERNEL);
-	if (!dst->tx_cq) {
-		kfree(dst->tx_ring);
-		return -ENOMEM;
+		dst->tx_ring[t] = kzalloc(sizeof(struct mlx4_en_tx_ring *) *
+					  MAX_TX_RINGS, GFP_KERNEL);
+		if (!dst->tx_ring[t])
+			goto err_free_tx;
+
+		dst->tx_cq[t] = kzalloc(sizeof(struct mlx4_en_cq *) *
+					MAX_TX_RINGS, GFP_KERNEL);
+		if (!dst->tx_cq[t]) {
+			kfree(dst->tx_ring[t]);
+			goto err_free_tx;
+		}
 	}
+
 	return 0;
+
+err_free_tx:
+	while (t--) {
+		kfree(dst->tx_ring[t]);
+		kfree(dst->tx_cq[t]);
+	}
+	return -ENOMEM;
 }
 
 static void mlx4_en_update_priv(struct mlx4_en_priv *dst,
 				struct mlx4_en_priv *src)
 {
+	int t;
 	memcpy(dst->rx_ring, src->rx_ring,
 	       sizeof(struct mlx4_en_rx_ring *) * src->rx_ring_num);
 	memcpy(dst->rx_cq, src->rx_cq,
 	       sizeof(struct mlx4_en_cq *) * src->rx_ring_num);
 	memcpy(&dst->hwtstamp_config, &src->hwtstamp_config,
 	       sizeof(dst->hwtstamp_config));
-	dst->tx_ring_num = src->tx_ring_num;
+	for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) {
+		dst->tx_ring_num[t] = src->tx_ring_num[t];
+		dst->tx_ring[t] = src->tx_ring[t];
+		dst->tx_cq[t] = src->tx_cq[t];
+	}
 	dst->rx_ring_num = src->rx_ring_num;
-	dst->tx_ring = src->tx_ring;
-	dst->tx_cq = src->tx_cq;
 	memcpy(dst->prof, src->prof, sizeof(struct mlx4_en_port_profile));
 }
 
@@ -2138,14 +2182,18 @@ int mlx4_en_try_alloc_resources(struct mlx4_en_priv *priv,
 				struct mlx4_en_priv *tmp,
 				struct mlx4_en_port_profile *prof)
 {
+	int t;
+
 	mlx4_en_copy_priv(tmp, priv, prof);
 
 	if (mlx4_en_alloc_resources(tmp)) {
 		en_warn(priv,
 			"%s: Resource allocation failed, using previous configuration\n",
 			__func__);
-		kfree(tmp->tx_ring);
-		kfree(tmp->tx_cq);
+		for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) {
+			kfree(tmp->tx_ring[t]);
+			kfree(tmp->tx_cq[t]);
+		}
 		return -ENOMEM;
 	}
 	return 0;
@@ -2162,8 +2210,7 @@ void mlx4_en_destroy_netdev(struct net_device *dev)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
-	bool shutdown = mdev->dev->persist->interface_state &
-					    MLX4_INTERFACE_STATE_SHUTDOWN;
+	int t;
 
 	en_dbg(DRV, priv, "Destroying netdev on port:%d\n", priv->port);
 
@@ -2171,10 +2218,7 @@ void mlx4_en_destroy_netdev(struct net_device *dev)
 	if (priv->registered) {
 		devlink_port_type_clear(mlx4_get_devlink_port(mdev->dev,
 							      priv->port));
-		if (shutdown)
-			mlx4_en_shutdown(dev);
-		else
-			unregister_netdev(dev);
+		unregister_netdev(dev);
 	}
 
 	if (priv->allocated)
@@ -2200,11 +2244,25 @@ void mlx4_en_destroy_netdev(struct net_device *dev)
 	mlx4_en_free_resources(priv);
 	mutex_unlock(&mdev->state_lock);
 
-	kfree(priv->tx_ring);
-	kfree(priv->tx_cq);
+	for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) {
+		kfree(priv->tx_ring[t]);
+		kfree(priv->tx_cq[t]);
+	}
 
-	if (!shutdown)
-		free_netdev(dev);
+	free_netdev(dev);
+}
+
+static bool mlx4_en_check_xdp_mtu(struct net_device *dev, int mtu)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+
+	if (mtu > MLX4_EN_MAX_XDP_MTU) {
+		en_err(priv, "mtu:%d > max:%d when XDP prog is attached\n",
+		       mtu, MLX4_EN_MAX_XDP_MTU);
+		return false;
+	}
+
+	return true;
 }
 
 static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu)
@@ -2216,15 +2274,10 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu)
 	en_dbg(DRV, priv, "Change MTU called - current:%d new:%d\n",
 		 dev->mtu, new_mtu);
 
-	if ((new_mtu < MLX4_EN_MIN_MTU) || (new_mtu > priv->max_mtu)) {
-		en_err(priv, "Bad MTU size:%d.\n", new_mtu);
-		return -EPERM;
-	}
-	if (priv->xdp_ring_num && MLX4_EN_EFF_MTU(new_mtu) > FRAG_SZ0) {
-		en_err(priv, "MTU size:%d requires frags but XDP running\n",
-		       new_mtu);
-		return -EOPNOTSUPP;
-	}
+	if (priv->tx_ring_num[TX_XDP] &&
+	    !mlx4_en_check_xdp_mtu(dev, new_mtu))
+		return -ENOTSUPP;
+
 	dev->mtu = new_mtu;
 
 	if (netif_running(dev)) {
@@ -2611,7 +2664,7 @@ static netdev_features_t mlx4_en_features_check(struct sk_buff *skb,
 static int mlx4_en_set_tx_maxrate(struct net_device *dev, int queue_index, u32 maxrate)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
-	struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[queue_index];
+	struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[TX][queue_index];
 	struct mlx4_update_qp_params params;
 	int err;
 
@@ -2639,18 +2692,21 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
+	struct mlx4_en_port_profile new_prof;
 	struct bpf_prog *old_prog;
+	struct mlx4_en_priv *tmp;
+	int tx_changed = 0;
 	int xdp_ring_num;
 	int port_up = 0;
 	int err;
 	int i;
 
-	xdp_ring_num = prog ? ALIGN(priv->rx_ring_num, MLX4_EN_NUM_UP) : 0;
+	xdp_ring_num = prog ? priv->rx_ring_num : 0;
 
 	/* No need to reconfigure buffers when simply swapping the
 	 * program for a new one.
 	 */
-	if (priv->xdp_ring_num == xdp_ring_num) {
+	if (priv->tx_ring_num[TX_XDP] == xdp_ring_num) {
 		if (prog) {
 			prog = bpf_prog_add(prog, priv->rx_ring_num - 1);
 			if (IS_ERR(prog))
@@ -2669,33 +2725,47 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
 		return 0;
 	}
 
-	if (priv->num_frags > 1) {
-		en_err(priv, "Cannot set XDP if MTU requires multiple frags\n");
+	if (!mlx4_en_check_xdp_mtu(dev, dev->mtu))
 		return -EOPNOTSUPP;
-	}
 
-	if (priv->tx_ring_num < xdp_ring_num + MLX4_EN_NUM_UP) {
-		en_err(priv,
-		       "Minimum %d tx channels required to run XDP\n",
-		       (xdp_ring_num + MLX4_EN_NUM_UP) / MLX4_EN_NUM_UP);
-		return -EINVAL;
-	}
+	tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
 
 	if (prog) {
 		prog = bpf_prog_add(prog, priv->rx_ring_num - 1);
-		if (IS_ERR(prog))
-			return PTR_ERR(prog);
+		if (IS_ERR(prog)) {
+			err = PTR_ERR(prog);
+			goto out;
+		}
 	}
 
 	mutex_lock(&mdev->state_lock);
+	memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile));
+	new_prof.tx_ring_num[TX_XDP] = xdp_ring_num;
+
+	if (priv->tx_ring_num[TX] + xdp_ring_num > MAX_TX_RINGS) {
+		tx_changed = 1;
+		new_prof.tx_ring_num[TX] =
+			MAX_TX_RINGS - ALIGN(xdp_ring_num, MLX4_EN_NUM_UP);
+		en_warn(priv, "Reducing the number of TX rings, to not exceed the max total rings number.\n");
+	}
+
+	err = mlx4_en_try_alloc_resources(priv, tmp, &new_prof);
+	if (err) {
+		if (prog)
+			bpf_prog_sub(prog, priv->rx_ring_num - 1);
+		goto unlock_out;
+	}
+
 	if (priv->port_up) {
 		port_up = 1;
 		mlx4_en_stop_port(dev, 1);
 	}
 
-	priv->xdp_ring_num = xdp_ring_num;
-	netif_set_real_num_tx_queues(dev, priv->tx_ring_num -
-							priv->xdp_ring_num);
+	mlx4_en_safe_replace_resources(priv, tmp);
+	if (tx_changed)
+		netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]);
 
 	for (i = 0; i < priv->rx_ring_num; i++) {
 		old_prog = rcu_dereference_protected(
@@ -2715,15 +2785,18 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
 		}
 	}
 
+unlock_out:
 	mutex_unlock(&mdev->state_lock);
-	return 0;
+out:
+	kfree(tmp);
+	return err;
 }
 
 static bool mlx4_xdp_attached(struct net_device *dev)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 
-	return !!priv->xdp_ring_num;
+	return !!priv->tx_ring_num[TX_XDP];
 }
 
 static int mlx4_xdp(struct net_device *dev, struct netdev_xdp *xdp)
@@ -3060,6 +3133,10 @@ void mlx4_en_set_stats_bitmap(struct mlx4_dev *dev,
 
 	if (!mlx4_is_slave(dev))
 		bitmap_set(stats_bitmap->bitmap, last_i, NUM_PKT_STATS);
+	last_i += NUM_PKT_STATS;
+
+	bitmap_set(stats_bitmap->bitmap, last_i, NUM_XDP_STATS);
+	last_i += NUM_XDP_STATS;
 }
 
 int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
@@ -3067,7 +3144,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 {
 	struct net_device *dev;
 	struct mlx4_en_priv *priv;
-	int i;
+	int i, t;
 	int err;
 
 	dev = alloc_etherdev_mqs(sizeof(struct mlx4_en_priv),
@@ -3075,7 +3152,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 	if (dev == NULL)
 		return -ENOMEM;
 
-	netif_set_real_num_tx_queues(dev, prof->tx_ring_num);
+	netif_set_real_num_tx_queues(dev, prof->tx_ring_num[TX]);
 	netif_set_real_num_rx_queues(dev, prof->rx_ring_num);
 
 	SET_NETDEV_DEV(dev, &mdev->dev->persist->pdev->dev);
@@ -3112,21 +3189,27 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 	priv->ctrl_flags = cpu_to_be32(MLX4_WQE_CTRL_CQ_UPDATE |
 			MLX4_WQE_CTRL_SOLICITED);
 	priv->num_tx_rings_p_up = mdev->profile.num_tx_rings_p_up;
-	priv->tx_ring_num = prof->tx_ring_num;
 	priv->tx_work_limit = MLX4_EN_DEFAULT_TX_WORK;
 	netdev_rss_key_fill(priv->rss_key, sizeof(priv->rss_key));
 
-	priv->tx_ring = kzalloc(sizeof(struct mlx4_en_tx_ring *) * MAX_TX_RINGS,
-				GFP_KERNEL);
-	if (!priv->tx_ring) {
-		err = -ENOMEM;
-		goto out;
-	}
-	priv->tx_cq = kzalloc(sizeof(struct mlx4_en_cq *) * MAX_TX_RINGS,
-			      GFP_KERNEL);
-	if (!priv->tx_cq) {
-		err = -ENOMEM;
-		goto out;
+	for (t = 0; t < MLX4_EN_NUM_TX_TYPES; t++) {
+		priv->tx_ring_num[t] = prof->tx_ring_num[t];
+		if (!priv->tx_ring_num[t])
+			continue;
+
+		priv->tx_ring[t] = kzalloc(sizeof(struct mlx4_en_tx_ring *) *
+					   MAX_TX_RINGS, GFP_KERNEL);
+		if (!priv->tx_ring[t]) {
+			err = -ENOMEM;
+			goto err_free_tx;
+		}
+		priv->tx_cq[t] = kzalloc(sizeof(struct mlx4_en_cq *) *
+					 MAX_TX_RINGS, GFP_KERNEL);
+		if (!priv->tx_cq[t]) {
+			kfree(priv->tx_ring[t]);
+			err = -ENOMEM;
+			goto out;
+		}
 	}
 	priv->rx_ring_num = prof->rx_ring_num;
 	priv->cqe_factor = (mdev->dev->caps.cqe_size == 64) ? 1 : 0;
@@ -3209,7 +3292,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 	else
 		dev->netdev_ops = &mlx4_netdev_ops;
 	dev->watchdog_timeo = MLX4_EN_WATCHDOG_TIMEOUT;
-	netif_set_real_num_tx_queues(dev, priv->tx_ring_num);
+	netif_set_real_num_tx_queues(dev, priv->tx_ring_num[TX]);
 	netif_set_real_num_rx_queues(dev, priv->rx_ring_num);
 
 	dev->ethtool_ops = &mlx4_en_ethtool_ops;
@@ -3299,13 +3382,17 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 		dev->gso_partial_features = NETIF_F_GSO_UDP_TUNNEL_CSUM;
 	}
 
+	/* MTU range: 46 - hw-specific max */
+	dev->min_mtu = MLX4_EN_MIN_MTU;
+	dev->max_mtu = priv->max_mtu;
+
 	mdev->pndev[port] = dev;
 	mdev->upper[port] = NULL;
 
 	netif_carrier_off(dev);
 	mlx4_en_set_default_moderation(priv);
 
-	en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num);
+	en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num[TX]);
 	en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num);
 
 	mlx4_en_update_loopback_state(priv->dev, priv->dev->features);
@@ -3365,6 +3452,11 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 
 	return 0;
 
+err_free_tx:
+	while (t--) {
+		kfree(priv->tx_ring[t]);
+		kfree(priv->tx_cq[t]);
+	}
 out:
 	mlx4_en_destroy_netdev(dev);
 	return err;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c
index 59473a0ebcdf..9166d90e7328 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c
@@ -147,6 +147,39 @@ static unsigned long en_stats_adder(__be64 *start, __be64 *next, int num)
 	return ret;
 }
 
+void mlx4_en_fold_software_stats(struct net_device *dev)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+	struct mlx4_en_dev *mdev = priv->mdev;
+	unsigned long packets, bytes;
+	int i;
+
+	if (!priv->port_up || mlx4_is_master(mdev->dev))
+		return;
+
+	packets = 0;
+	bytes = 0;
+	for (i = 0; i < priv->rx_ring_num; i++) {
+		const struct mlx4_en_rx_ring *ring = priv->rx_ring[i];
+
+		packets += READ_ONCE(ring->packets);
+		bytes   += READ_ONCE(ring->bytes);
+	}
+	dev->stats.rx_packets = packets;
+	dev->stats.rx_bytes = bytes;
+
+	packets = 0;
+	bytes = 0;
+	for (i = 0; i < priv->tx_ring_num[TX]; i++) {
+		const struct mlx4_en_tx_ring *ring = priv->tx_ring[TX][i];
+
+		packets += READ_ONCE(ring->packets);
+		bytes   += READ_ONCE(ring->bytes);
+	}
+	dev->stats.tx_packets = packets;
+	dev->stats.tx_bytes = bytes;
+}
+
 int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
 {
 	struct mlx4_counter tmp_counter_stats;
@@ -159,6 +192,7 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
 	u64 in_mod = reset << 8 | port;
 	int err;
 	int i, counter_index;
+	unsigned long sw_tx_dropped = 0;
 	unsigned long sw_rx_dropped = 0;
 
 	mailbox = mlx4_alloc_cmd_mailbox(mdev->dev);
@@ -174,40 +208,42 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
 
 	spin_lock_bh(&priv->stats_lock);
 
-	stats->rx_packets = 0;
-	stats->rx_bytes = 0;
+	mlx4_en_fold_software_stats(dev);
+
 	priv->port_stats.rx_chksum_good = 0;
 	priv->port_stats.rx_chksum_none = 0;
 	priv->port_stats.rx_chksum_complete = 0;
+	priv->xdp_stats.rx_xdp_drop    = 0;
+	priv->xdp_stats.rx_xdp_tx      = 0;
+	priv->xdp_stats.rx_xdp_tx_full = 0;
 	for (i = 0; i < priv->rx_ring_num; i++) {
-		stats->rx_packets += priv->rx_ring[i]->packets;
-		stats->rx_bytes += priv->rx_ring[i]->bytes;
-		sw_rx_dropped += priv->rx_ring[i]->dropped;
-		priv->port_stats.rx_chksum_good += priv->rx_ring[i]->csum_ok;
-		priv->port_stats.rx_chksum_none += priv->rx_ring[i]->csum_none;
-		priv->port_stats.rx_chksum_complete += priv->rx_ring[i]->csum_complete;
+		const struct mlx4_en_rx_ring *ring = priv->rx_ring[i];
+
+		sw_rx_dropped			+= READ_ONCE(ring->dropped);
+		priv->port_stats.rx_chksum_good += READ_ONCE(ring->csum_ok);
+		priv->port_stats.rx_chksum_none += READ_ONCE(ring->csum_none);
+		priv->port_stats.rx_chksum_complete += READ_ONCE(ring->csum_complete);
+		priv->xdp_stats.rx_xdp_drop	+= READ_ONCE(ring->xdp_drop);
+		priv->xdp_stats.rx_xdp_tx	+= READ_ONCE(ring->xdp_tx);
+		priv->xdp_stats.rx_xdp_tx_full	+= READ_ONCE(ring->xdp_tx_full);
 	}
-	stats->tx_packets = 0;
-	stats->tx_bytes = 0;
-	stats->tx_dropped = 0;
 	priv->port_stats.tx_chksum_offload = 0;
 	priv->port_stats.queue_stopped = 0;
 	priv->port_stats.wake_queue = 0;
 	priv->port_stats.tso_packets = 0;
 	priv->port_stats.xmit_more = 0;
 
-	for (i = 0; i < priv->tx_ring_num; i++) {
-		const struct mlx4_en_tx_ring *ring = priv->tx_ring[i];
-
-		stats->tx_packets += ring->packets;
-		stats->tx_bytes += ring->bytes;
-		stats->tx_dropped += ring->tx_dropped;
-		priv->port_stats.tx_chksum_offload += ring->tx_csum;
-		priv->port_stats.queue_stopped     += ring->queue_stopped;
-		priv->port_stats.wake_queue        += ring->wake_queue;
-		priv->port_stats.tso_packets       += ring->tso_packets;
-		priv->port_stats.xmit_more         += ring->xmit_more;
+	for (i = 0; i < priv->tx_ring_num[TX]; i++) {
+		const struct mlx4_en_tx_ring *ring = priv->tx_ring[TX][i];
+
+		sw_tx_dropped			   += READ_ONCE(ring->tx_dropped);
+		priv->port_stats.tx_chksum_offload += READ_ONCE(ring->tx_csum);
+		priv->port_stats.queue_stopped     += READ_ONCE(ring->queue_stopped);
+		priv->port_stats.wake_queue        += READ_ONCE(ring->wake_queue);
+		priv->port_stats.tso_packets       += READ_ONCE(ring->tso_packets);
+		priv->port_stats.xmit_more         += READ_ONCE(ring->xmit_more);
 	}
+
 	if (mlx4_is_master(mdev->dev)) {
 		stats->rx_packets = en_stats_adder(&mlx4_en_stats->RTOT_prio_0,
 						   &mlx4_en_stats->RTOT_prio_1,
@@ -245,7 +281,8 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
 	stats->rx_length_errors = be32_to_cpu(mlx4_en_stats->RdropLength);
 	stats->rx_crc_errors = be32_to_cpu(mlx4_en_stats->RCRC);
 	stats->rx_fifo_errors = be32_to_cpu(mlx4_en_stats->RdropOvflw);
-	stats->tx_dropped += be32_to_cpu(mlx4_en_stats->TDROP);
+	stats->tx_dropped = be32_to_cpu(mlx4_en_stats->TDROP) +
+			    sw_tx_dropped;
 
 	/* RX stats */
 	priv->pkstats.rx_multicast_packets = stats->multicast;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index f2e8beddcf44..3c37e216bbf3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -96,7 +96,6 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
 	struct mlx4_en_rx_alloc page_alloc[MLX4_EN_MAX_RX_FRAGS];
 	const struct mlx4_en_frag_info *frag_info;
 	struct page *page;
-	dma_addr_t dma;
 	int i;
 
 	for (i = 0; i < priv->num_frags; i++) {
@@ -115,9 +114,10 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
 
 	for (i = 0; i < priv->num_frags; i++) {
 		frags[i] = ring_alloc[i];
-		dma = ring_alloc[i].dma + ring_alloc[i].page_offset;
+		frags[i].page_offset += priv->frag_info[i].rx_headroom;
+		rx_desc->data[i].addr = cpu_to_be64(frags[i].dma +
+						    frags[i].page_offset);
 		ring_alloc[i] = page_alloc[i];
-		rx_desc->data[i].addr = cpu_to_be64(dma);
 	}
 
 	return 0;
@@ -250,7 +250,8 @@ static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv,
 
 	if (ring->page_cache.index > 0) {
 		frags[0] = ring->page_cache.buf[--ring->page_cache.index];
-		rx_desc->data[0].addr = cpu_to_be64(frags[0].dma);
+		rx_desc->data[0].addr = cpu_to_be64(frags[0].dma +
+						    frags[0].page_offset);
 		return 0;
 	}
 
@@ -688,18 +689,23 @@ out_loopback:
 	dev_kfree_skb_any(skb);
 }
 
-static void mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv,
-				     struct mlx4_en_rx_ring *ring)
+static bool mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv,
+				      struct mlx4_en_rx_ring *ring)
 {
-	int index = ring->prod & ring->size_mask;
+	u32 missing = ring->actual_size - (ring->prod - ring->cons);
 
-	while ((u32) (ring->prod - ring->cons) < ring->actual_size) {
-		if (mlx4_en_prepare_rx_desc(priv, ring, index,
+	/* Try to batch allocations, but not too much. */
+	if (missing < 8)
+		return false;
+	do {
+		if (mlx4_en_prepare_rx_desc(priv, ring,
+					    ring->prod & ring->size_mask,
 					    GFP_ATOMIC | __GFP_COLD))
 			break;
 		ring->prod++;
-		index = ring->prod & ring->size_mask;
-	}
+	} while (--missing);
+
+	return true;
 }
 
 /* When hardware doesn't strip the vlan, we need to calculate the checksum
@@ -788,7 +794,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 	struct bpf_prog *xdp_prog;
 	int doorbell_pending;
 	struct sk_buff *skb;
-	int tx_index;
 	int index;
 	int nr;
 	unsigned int length;
@@ -808,7 +813,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 	rcu_read_lock();
 	xdp_prog = rcu_dereference(ring->xdp_prog);
 	doorbell_pending = 0;
-	tx_index = (priv->tx_ring_num - priv->xdp_ring_num) + cq->ring;
 
 	/* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx
 	 * descriptor offset can be deduced from the CQE index instead of
@@ -877,8 +881,6 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 		 */
 		length = be32_to_cpu(cqe->byte_cnt);
 		length -= ring->fcs_del;
-		ring->bytes += length;
-		ring->packets++;
 		l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) &&
 			(cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL));
 
@@ -888,6 +890,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 		if (xdp_prog) {
 			struct xdp_buff xdp;
 			dma_addr_t dma;
+			void *orig_data;
 			u32 act;
 
 			dma = be64_to_cpu(rx_desc->data[0].addr);
@@ -895,31 +898,43 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 						priv->frag_info[0].frag_size,
 						DMA_FROM_DEVICE);
 
-			xdp.data = page_address(frags[0].page) +
-							frags[0].page_offset;
+			xdp.data_hard_start = page_address(frags[0].page);
+			xdp.data = xdp.data_hard_start + frags[0].page_offset;
 			xdp.data_end = xdp.data + length;
+			orig_data = xdp.data;
 
 			act = bpf_prog_run_xdp(xdp_prog, &xdp);
+
+			if (xdp.data != orig_data) {
+				length = xdp.data_end - xdp.data;
+				frags[0].page_offset = xdp.data -
+					xdp.data_hard_start;
+			}
+
 			switch (act) {
 			case XDP_PASS:
 				break;
 			case XDP_TX:
-				if (likely(!mlx4_en_xmit_frame(frags, dev,
-							length, tx_index,
+				if (likely(!mlx4_en_xmit_frame(ring, frags, dev,
+							length, cq->ring,
 							&doorbell_pending)))
 					goto consumed;
-				goto xdp_drop; /* Drop on xmit failure */
+				goto xdp_drop_no_cnt; /* Drop on xmit failure */
 			default:
 				bpf_warn_invalid_xdp_action(act);
 			case XDP_ABORTED:
 			case XDP_DROP:
-xdp_drop:
+				ring->xdp_drop++;
+xdp_drop_no_cnt:
 				if (likely(mlx4_en_rx_recycle(ring, frags)))
 					goto consumed;
 				goto next;
 			}
 		}
 
+		ring->bytes += length;
+		ring->packets++;
+
 		if (likely(dev->features & NETIF_F_RXCSUM)) {
 			if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP |
 						      MLX4_CQE_STATUS_UDP)) {
@@ -1081,15 +1096,20 @@ consumed:
 
 out:
 	rcu_read_unlock();
-	if (doorbell_pending)
-		mlx4_en_xmit_doorbell(priv->tx_ring[tx_index]);
 
+	if (polled) {
+		if (doorbell_pending)
+			mlx4_en_xmit_doorbell(priv->tx_ring[TX_XDP][cq->ring]);
+
+		mlx4_cq_set_ci(&cq->mcq);
+		wmb(); /* ensure HW sees CQ consumer before we post new buffers */
+		ring->cons = cq->mcq.cons_index;
+	}
 	AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled);
-	mlx4_cq_set_ci(&cq->mcq);
-	wmb(); /* ensure HW sees CQ consumer before we post new buffers */
-	ring->cons = cq->mcq.cons_index;
-	mlx4_en_refill_rx_buffers(priv, ring);
-	mlx4_en_update_rx_prod_db(ring);
+
+	if (mlx4_en_refill_rx_buffers(priv, ring))
+		mlx4_en_update_rx_prod_db(ring);
+
 	return polled;
 }
 
@@ -1131,14 +1151,17 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
 			return budget;
 
 		/* Current cpu is not according to smp_irq_affinity -
-		 * probably affinity changed. need to stop this NAPI
-		 * poll, and restart it on the right CPU
+		 * probably affinity changed. Need to stop this NAPI
+		 * poll, and restart it on the right CPU.
+		 * Try to avoid returning a too small value (like 0),
+		 * to not fool net_rx_action() and its netdev_budget
 		 */
-		done = 0;
+		if (done)
+			done--;
 	}
 	/* Done for now */
-	napi_complete_done(napi, done);
-	mlx4_en_arm_cq(priv, cq);
+	if (napi_complete_done(napi, done))
+		mlx4_en_arm_cq(priv, cq);
 	return done;
 }
 
@@ -1151,37 +1174,41 @@ static const int frag_sizes[] = {
 
 void mlx4_en_calc_rx_buf(struct net_device *dev)
 {
-	enum dma_data_direction dma_dir = PCI_DMA_FROMDEVICE;
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	int eff_mtu = MLX4_EN_EFF_MTU(dev->mtu);
-	int order = MLX4_EN_ALLOC_PREFER_ORDER;
-	u32 align = SMP_CACHE_BYTES;
-	int buf_size = 0;
 	int i = 0;
 
 	/* bpf requires buffers to be set up as 1 packet per page.
 	 * This only works when num_frags == 1.
 	 */
-	if (priv->xdp_ring_num) {
-		dma_dir = PCI_DMA_BIDIRECTIONAL;
-		/* This will gain efficient xdp frame recycling at the expense
-		 * of more costly truesize accounting
+	if (priv->tx_ring_num[TX_XDP]) {
+		priv->frag_info[0].order = 0;
+		priv->frag_info[0].frag_size = eff_mtu;
+		priv->frag_info[0].frag_prefix_size = 0;
+		/* This will gain efficient xdp frame recycling at the
+		 * expense of more costly truesize accounting
 		 */
-		align = PAGE_SIZE;
-		order = 0;
-	}
-
-	while (buf_size < eff_mtu) {
-		priv->frag_info[i].order = order;
-		priv->frag_info[i].frag_size =
-			(eff_mtu > buf_size + frag_sizes[i]) ?
-				frag_sizes[i] : eff_mtu - buf_size;
-		priv->frag_info[i].frag_prefix_size = buf_size;
-		priv->frag_info[i].frag_stride =
-				ALIGN(priv->frag_info[i].frag_size, align);
-		priv->frag_info[i].dma_dir = dma_dir;
-		buf_size += priv->frag_info[i].frag_size;
-		i++;
+		priv->frag_info[0].frag_stride = PAGE_SIZE;
+		priv->frag_info[0].dma_dir = PCI_DMA_BIDIRECTIONAL;
+		priv->frag_info[0].rx_headroom = XDP_PACKET_HEADROOM;
+		i = 1;
+	} else {
+		int buf_size = 0;
+
+		while (buf_size < eff_mtu) {
+			priv->frag_info[i].order = MLX4_EN_ALLOC_PREFER_ORDER;
+			priv->frag_info[i].frag_size =
+				(eff_mtu > buf_size + frag_sizes[i]) ?
+					frag_sizes[i] : eff_mtu - buf_size;
+			priv->frag_info[i].frag_prefix_size = buf_size;
+			priv->frag_info[i].frag_stride =
+				ALIGN(priv->frag_info[i].frag_size,
+				      SMP_CACHE_BYTES);
+			priv->frag_info[i].dma_dir = PCI_DMA_FROMDEVICE;
+			priv->frag_info[i].rx_headroom = 0;
+			buf_size += priv->frag_info[i].frag_size;
+			i++;
+		}
 	}
 
 	priv->num_frags = i;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c
index c06346a82496..95290e1fc9fe 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_selftest.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_selftest.c
@@ -68,7 +68,7 @@ static int mlx4_en_test_loopback_xmit(struct mlx4_en_priv *priv)
 	memcpy(ethh->h_dest, priv->dev->dev_addr, ETH_ALEN);
 	eth_zero_addr(ethh->h_source);
 	ethh->h_proto = htons(ETH_P_ARP);
-	skb_set_mac_header(skb, 0);
+	skb_reset_mac_header(skb);
 	for (i = 0; i < packet_size; ++i)	/* fill our packet */
 		packet[i] = (unsigned char)(i & 0xff);
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index e2509bba3e7c..5886ad78058f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -66,7 +66,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
 
 	ring->size = size;
 	ring->size_mask = size - 1;
-	ring->stride = stride;
+	ring->sp_stride = stride;
 	ring->full_size = ring->size - HEADROOM - MAX_DESC_TXBBS;
 
 	tmp = size * sizeof(struct mlx4_en_tx_info);
@@ -90,22 +90,22 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
 			goto err_info;
 		}
 	}
-	ring->buf_size = ALIGN(size * ring->stride, MLX4_EN_PAGE_SIZE);
+	ring->buf_size = ALIGN(size * ring->sp_stride, MLX4_EN_PAGE_SIZE);
 
 	/* Allocate HW buffers on provided NUMA node */
 	set_dev_node(&mdev->dev->persist->pdev->dev, node);
-	err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
+	err = mlx4_alloc_hwq_res(mdev->dev, &ring->sp_wqres, ring->buf_size);
 	set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node);
 	if (err) {
 		en_err(priv, "Failed allocating hwq resources\n");
 		goto err_bounce;
 	}
 
-	ring->buf = ring->wqres.buf.direct.buf;
+	ring->buf = ring->sp_wqres.buf.direct.buf;
 
 	en_dbg(DRV, priv, "Allocated TX ring (addr:%p) - buf:%p size:%d buf_size:%d dma:%llx\n",
 	       ring, ring->buf, ring->size, ring->buf_size,
-	       (unsigned long long) ring->wqres.buf.direct.map);
+	       (unsigned long long) ring->sp_wqres.buf.direct.map);
 
 	err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &ring->qpn,
 				    MLX4_RESERVE_ETH_BF_QP);
@@ -114,12 +114,12 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
 		goto err_hwq_res;
 	}
 
-	err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp, GFP_KERNEL);
+	err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->sp_qp, GFP_KERNEL);
 	if (err) {
 		en_err(priv, "Failed allocating qp %d\n", ring->qpn);
 		goto err_reserve;
 	}
-	ring->qp.event = mlx4_en_sqp_event;
+	ring->sp_qp.event = mlx4_en_sqp_event;
 
 	err = mlx4_bf_alloc(mdev->dev, &ring->bf, node);
 	if (err) {
@@ -141,7 +141,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
 	if (queue_index < priv->num_tx_rings_p_up)
 		cpumask_set_cpu(cpumask_local_spread(queue_index,
 						     priv->mdev->dev->numa_node),
-				&ring->affinity_mask);
+				&ring->sp_affinity_mask);
 
 	*pring = ring;
 	return 0;
@@ -149,7 +149,7 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
 err_reserve:
 	mlx4_qp_release_range(mdev->dev, ring->qpn, 1);
 err_hwq_res:
-	mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
+	mlx4_free_hwq_res(mdev->dev, &ring->sp_wqres, ring->buf_size);
 err_bounce:
 	kfree(ring->bounce_buf);
 	ring->bounce_buf = NULL;
@@ -171,10 +171,10 @@ void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv,
 
 	if (ring->bf_alloced)
 		mlx4_bf_free(mdev->dev, &ring->bf);
-	mlx4_qp_remove(mdev->dev, &ring->qp);
-	mlx4_qp_free(mdev->dev, &ring->qp);
+	mlx4_qp_remove(mdev->dev, &ring->sp_qp);
+	mlx4_qp_free(mdev->dev, &ring->sp_qp);
 	mlx4_qp_release_range(priv->mdev->dev, ring->qpn, 1);
-	mlx4_free_hwq_res(mdev->dev, &ring->wqres, ring->buf_size);
+	mlx4_free_hwq_res(mdev->dev, &ring->sp_wqres, ring->buf_size);
 	kfree(ring->bounce_buf);
 	ring->bounce_buf = NULL;
 	kvfree(ring->tx_info);
@@ -190,7 +190,7 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
 	struct mlx4_en_dev *mdev = priv->mdev;
 	int err;
 
-	ring->cqn = cq;
+	ring->sp_cqn = cq;
 	ring->prod = 0;
 	ring->cons = 0xffffffff;
 	ring->last_nr_txbb = 1;
@@ -198,21 +198,21 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
 	memset(ring->buf, 0, ring->buf_size);
 	ring->free_tx_desc = mlx4_en_free_tx_desc;
 
-	ring->qp_state = MLX4_QP_STATE_RST;
-	ring->doorbell_qpn = cpu_to_be32(ring->qp.qpn << 8);
+	ring->sp_qp_state = MLX4_QP_STATE_RST;
+	ring->doorbell_qpn = cpu_to_be32(ring->sp_qp.qpn << 8);
 	ring->mr_key = cpu_to_be32(mdev->mr.key);
 
-	mlx4_en_fill_qp_context(priv, ring->size, ring->stride, 1, 0, ring->qpn,
-				ring->cqn, user_prio, &ring->context);
+	mlx4_en_fill_qp_context(priv, ring->size, ring->sp_stride, 1, 0, ring->qpn,
+				ring->sp_cqn, user_prio, &ring->sp_context);
 	if (ring->bf_alloced)
-		ring->context.usr_page =
+		ring->sp_context.usr_page =
 			cpu_to_be32(mlx4_to_hw_uar_index(mdev->dev,
 							 ring->bf.uar->index));
 
-	err = mlx4_qp_to_ready(mdev->dev, &ring->wqres.mtt, &ring->context,
-			       &ring->qp, &ring->qp_state);
-	if (!cpumask_empty(&ring->affinity_mask))
-		netif_set_xps_queue(priv->dev, &ring->affinity_mask,
+	err = mlx4_qp_to_ready(mdev->dev, &ring->sp_wqres.mtt, &ring->sp_context,
+			       &ring->sp_qp, &ring->sp_qp_state);
+	if (!cpumask_empty(&ring->sp_affinity_mask))
+		netif_set_xps_queue(priv->dev, &ring->sp_affinity_mask,
 				    ring->queue_index);
 
 	return err;
@@ -223,8 +223,8 @@ void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv,
 {
 	struct mlx4_en_dev *mdev = priv->mdev;
 
-	mlx4_qp_modify(mdev->dev, NULL, ring->qp_state,
-		       MLX4_QP_STATE_RST, NULL, 0, 0, &ring->qp);
+	mlx4_qp_modify(mdev->dev, NULL, ring->sp_qp_state,
+		       MLX4_QP_STATE_RST, NULL, 0, 0, &ring->sp_qp);
 }
 
 static inline bool mlx4_en_is_tx_ring_full(struct mlx4_en_tx_ring *ring)
@@ -354,7 +354,7 @@ u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv,
 	struct mlx4_en_rx_alloc frame = {
 		.page = tx_info->page,
 		.dma = tx_info->map0_dma,
-		.page_offset = 0,
+		.page_offset = XDP_PACKET_HEADROOM,
 		.page_size = PAGE_SIZE,
 	};
 
@@ -392,7 +392,8 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring)
 		cnt++;
 	}
 
-	netdev_tx_reset_queue(ring->tx_queue);
+	if (ring->tx_queue)
+		netdev_tx_reset_queue(ring->tx_queue);
 
 	if (cnt)
 		en_dbg(DRV, priv, "Freed %d uncompleted tx descriptors\n", cnt);
@@ -405,7 +406,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_cq *mcq = &cq->mcq;
-	struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->ring];
+	struct mlx4_en_tx_ring *ring = priv->tx_ring[cq->type][cq->ring];
 	struct mlx4_cqe *cqe;
 	u16 index;
 	u16 new_index, ring_index, stamp_index;
@@ -807,7 +808,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 	bool bf_ok;
 
 	tx_ind = skb_get_queue_mapping(skb);
-	ring = priv->tx_ring[tx_ind];
+	ring = priv->tx_ring[TX][tx_ind];
 
 	if (!priv->port_up)
 		goto tx_drop;
@@ -1078,7 +1079,8 @@ tx_drop:
 	return NETDEV_TX_OK;
 }
 
-netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame,
+netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
+			       struct mlx4_en_rx_alloc *frame,
 			       struct net_device *dev, unsigned int length,
 			       int tx_ind, int *doorbell_pending)
 {
@@ -1101,7 +1103,7 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame,
 	BUILD_BUG_ON_MSG(ALIGN(CTRL_SIZE + DS_SIZE, TXBB_SIZE) != TXBB_SIZE,
 			 "mlx4_en_xmit_frame requires minimum size tx desc");
 
-	ring = priv->tx_ring[tx_ind];
+	ring = priv->tx_ring[TX_XDP][tx_ind];
 
 	if (!priv->port_up)
 		goto tx_drop;
@@ -1130,7 +1132,7 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame,
 	tx_info->page = frame->page;
 	frame->page = NULL;
 	tx_info->map0_dma = dma;
-	tx_info->map0_byte_count = length;
+	tx_info->map0_byte_count = PAGE_SIZE;
 	tx_info->nr_txbb = nr_txbb;
 	tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN);
 	tx_info->data_offset = (void *)data - (void *)tx_desc;
@@ -1139,9 +1141,10 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame,
 	tx_info->linear = 1;
 	tx_info->inl = 0;
 
-	dma_sync_single_for_device(priv->ddev, dma, length, PCI_DMA_TODEVICE);
+	dma_sync_single_range_for_device(priv->ddev, dma, frame->page_offset,
+					 length, PCI_DMA_TODEVICE);
 
-	data->addr = cpu_to_be64(dma);
+	data->addr = cpu_to_be64(dma + frame->page_offset);
 	data->lkey = ring->mr_key;
 	dma_wmb();
 	data->byte_count = cpu_to_be32(length);
@@ -1153,8 +1156,7 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame,
 		((ring->prod & ring->size) ?
 		 cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0);
 
-	ring->packets++;
-	ring->bytes += tx_info->nr_bytes;
+	rx_ring->xdp_tx++;
 	AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, length);
 
 	ring->prod += nr_txbb;
@@ -1178,7 +1180,7 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame,
 	return NETDEV_TX_OK;
 
 tx_drop_count:
-	ring->tx_dropped++;
+	rx_ring->xdp_tx_full++;
 tx_drop:
 	return NETDEV_TX_BUSY;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 6f4e67bc3538..75d07fa9d0b1 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -4147,11 +4147,8 @@ static void mlx4_shutdown(struct pci_dev *pdev)
 
 	mlx4_info(persist->dev, "mlx4_shutdown was called\n");
 	mutex_lock(&persist->interface_state_mutex);
-	if (persist->interface_state & MLX4_INTERFACE_STATE_UP) {
-		/* Notify mlx4 clients that the kernel is being shut down */
-		persist->interface_state |= MLX4_INTERFACE_STATE_SHUTDOWN;
+	if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
 		mlx4_unload_one(pdev);
-	}
 	mutex_unlock(&persist->interface_state_mutex);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index 94b891c118c1..1a670b681555 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c
@@ -1457,7 +1457,12 @@ EXPORT_SYMBOL_GPL(mlx4_multicast_detach);
 int mlx4_flow_steer_promisc_add(struct mlx4_dev *dev, u8 port,
 				u32 qpn, enum mlx4_net_trans_promisc_mode mode)
 {
-	struct mlx4_net_trans_rule rule;
+	struct mlx4_net_trans_rule rule = {
+		.queue_mode = MLX4_NET_TRANS_Q_FIFO,
+		.exclusive = 0,
+		.allow_loopback = 1,
+	};
+
 	u64 *regid_p;
 
 	switch (mode) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index a3528dd1e72e..ba1c6cd0cc79 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -207,8 +207,11 @@ enum {
  */
 
 enum cq_type {
-	RX = 0,
-	TX = 1,
+	/* keep tx types first */
+	TX,
+	TX_XDP,
+#define MLX4_EN_NUM_TX_TYPES (TX_XDP + 1)
+	RX,
 };
 
 
@@ -278,46 +281,50 @@ struct mlx4_en_tx_ring {
 	u32			last_nr_txbb;
 	u32			cons;
 	unsigned long		wake_queue;
+	struct netdev_queue	*tx_queue;
+	u32			(*free_tx_desc)(struct mlx4_en_priv *priv,
+						struct mlx4_en_tx_ring *ring,
+						int index, u8 owner,
+						u64 timestamp, int napi_mode);
+	struct mlx4_en_rx_ring	*recycle_ring;
 
 	/* cache line used and dirtied in mlx4_en_xmit() */
 	u32			prod ____cacheline_aligned_in_smp;
+	unsigned int		tx_dropped;
 	unsigned long		bytes;
 	unsigned long		packets;
 	unsigned long		tx_csum;
 	unsigned long		tso_packets;
 	unsigned long		xmit_more;
-	unsigned int		tx_dropped;
 	struct mlx4_bf		bf;
-	unsigned long		queue_stopped;
 
 	/* Following part should be mostly read */
-	cpumask_t		affinity_mask;
-	struct mlx4_qp		qp;
-	struct mlx4_hwq_resources wqres;
+	__be32			doorbell_qpn;
+	__be32			mr_key;
 	u32			size; /* number of TXBBs */
 	u32			size_mask;
-	u16			stride;
 	u32			full_size;
-	u16			cqn;	/* index of port CQ associated with this ring */
 	u32			buf_size;
-	__be32			doorbell_qpn;
-	__be32			mr_key;
 	void			*buf;
 	struct mlx4_en_tx_info	*tx_info;
-	struct mlx4_en_rx_ring	*recycle_ring;
-	u32			(*free_tx_desc)(struct mlx4_en_priv *priv,
-						struct mlx4_en_tx_ring *ring,
-						int index, u8 owner,
-						u64 timestamp, int napi_mode);
-	u8			*bounce_buf;
-	struct mlx4_qp_context	context;
 	int			qpn;
-	enum mlx4_qp_state	qp_state;
 	u8			queue_index;
 	bool			bf_enabled;
 	bool			bf_alloced;
-	struct netdev_queue	*tx_queue;
-	int			hwtstamp_tx_type;
+	u8			hwtstamp_tx_type;
+	u8			*bounce_buf;
+
+	/* Not used in fast path
+	 * Only queue_stopped might be used if BQL is not properly working.
+	 */
+	unsigned long		queue_stopped;
+	struct mlx4_hwq_resources sp_wqres;
+	struct mlx4_qp		sp_qp;
+	struct mlx4_qp_context	sp_context;
+	cpumask_t		sp_affinity_mask;
+	enum mlx4_qp_state	sp_qp_state;
+	u16			sp_stride;
+	u16			sp_cqn;	/* index of port CQ associated with this ring */
 } ____cacheline_aligned_in_smp;
 
 struct mlx4_en_rx_desc {
@@ -347,6 +354,9 @@ struct mlx4_en_rx_ring {
 	unsigned long csum_ok;
 	unsigned long csum_none;
 	unsigned long csum_complete;
+	unsigned long xdp_drop;
+	unsigned long xdp_tx;
+	unsigned long xdp_tx_full;
 	unsigned long dropped;
 	int hwtstamp_rx_filter;
 	cpumask_var_t affinity_mask;
@@ -361,7 +371,7 @@ struct mlx4_en_cq {
 	int size;
 	int buf_size;
 	int vector;
-	enum cq_type is_tx;
+	enum cq_type type;
 	u16 moder_time;
 	u16 moder_cnt;
 	struct mlx4_cqe *buf;
@@ -372,7 +382,7 @@ struct mlx4_en_cq {
 
 struct mlx4_en_port_profile {
 	u32 flags;
-	u32 tx_ring_num;
+	u32 tx_ring_num[MLX4_EN_NUM_TX_TYPES];
 	u32 rx_ring_num;
 	u32 tx_ring_size;
 	u32 rx_ring_size;
@@ -465,7 +475,8 @@ struct mlx4_en_frag_info {
 	u16 frag_prefix_size;
 	u32 frag_stride;
 	enum dma_data_direction dma_dir;
-	int order;
+	u16 order;
+	u16 rx_headroom;
 };
 
 #ifdef CONFIG_MLX4_EN_DCB
@@ -569,17 +580,16 @@ struct mlx4_en_priv {
 	u32 flags;
 	u8 num_tx_rings_p_up;
 	u32 tx_work_limit;
-	u32 tx_ring_num;
+	u32 tx_ring_num[MLX4_EN_NUM_TX_TYPES];
 	u32 rx_ring_num;
 	u32 rx_skb_size;
 	struct mlx4_en_frag_info frag_info[MLX4_EN_MAX_RX_FRAGS];
 	u16 num_frags;
 	u16 log_rx_info;
-	int xdp_ring_num;
 
-	struct mlx4_en_tx_ring **tx_ring;
+	struct mlx4_en_tx_ring **tx_ring[MLX4_EN_NUM_TX_TYPES];
 	struct mlx4_en_rx_ring *rx_ring[MAX_RX_RINGS];
-	struct mlx4_en_cq **tx_cq;
+	struct mlx4_en_cq **tx_cq[MLX4_EN_NUM_TX_TYPES];
 	struct mlx4_en_cq *rx_cq[MAX_RX_RINGS];
 	struct mlx4_qp drop_qp;
 	struct work_struct rx_mode_task;
@@ -597,6 +607,7 @@ struct mlx4_en_priv {
 	struct mlx4_en_flow_stats_rx rx_flowstats;
 	struct mlx4_en_flow_stats_tx tx_flowstats;
 	struct mlx4_en_port_stats port_stats;
+	struct mlx4_en_xdp_stats xdp_stats;
 	struct mlx4_en_stats_bitmap stats_bitmap;
 	struct list_head mc_list;
 	struct list_head curr_list;
@@ -685,7 +696,8 @@ void mlx4_en_tx_irq(struct mlx4_cq *mcq);
 u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
 			 void *accel_priv, select_queue_fallback_t fallback);
 netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev);
-netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame,
+netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
+			       struct mlx4_en_rx_alloc *frame,
 			       struct net_device *dev, unsigned int length,
 			       int tx_ind, int *doorbell_pending);
 void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring);
@@ -744,6 +756,7 @@ void mlx4_en_rx_irq(struct mlx4_cq *mcq);
 int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, u64 mac, u64 clear, u8 mode);
 int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, struct mlx4_en_priv *priv);
 
+void mlx4_en_fold_software_stats(struct net_device *dev);
 int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset);
 int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port);
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h
index 7fd466c0b929..48641cb0367f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_stats.h
@@ -55,6 +55,13 @@ struct mlx4_en_perf_stats {
 #define NUM_PERF_COUNTERS		6
 };
 
+struct mlx4_en_xdp_stats {
+	unsigned long rx_xdp_drop;
+	unsigned long rx_xdp_tx;
+	unsigned long rx_xdp_tx_full;
+#define NUM_XDP_STATS		3
+};
+
 #define NUM_MAIN_STATS	21
 
 #define MLX4_NUM_PRIORITIES	8
@@ -107,7 +114,8 @@ enum {
 };
 
 #define NUM_ALL_STATS	(NUM_MAIN_STATS + NUM_PORT_STATS + NUM_PKT_STATS + \
-			 NUM_FLOW_STATS + NUM_PERF_STATS + NUM_PF_STATS)
+			 NUM_FLOW_STATS + NUM_PERF_STATS + NUM_PF_STATS + \
+			 NUM_XDP_STATS)
 
 #define MLX4_FIND_NETDEV_STAT(n) (offsetof(struct net_device_stats, n) / \
 				  sizeof(((struct net_device_stats *)0)->n))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index aae46884bf93..ddb4ca4ff930 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -14,12 +14,10 @@ config MLX5_CORE
 config MLX5_CORE_EN
 	bool "Mellanox Technologies ConnectX-4 Ethernet support"
 	depends on NETDEVICES && ETHERNET && PCI && MLX5_CORE
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	default n
 	---help---
 	  Ethernet support in Mellanox Technologies ConnectX-4 NIC.
-	  Ethernet and Infiniband support in ConnectX-4 are currently mutually
-	  exclusive.
 
 config MLX5_CORE_EN_DCB
 	bool "Data Center Bridging (DCB) Support"
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 0343725d7f44..9f43beb86250 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -8,6 +8,6 @@ mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
 mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \
 		en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \
 		en_rx.o en_rx_am.o en_txrx.o en_clock.o vxlan.o \
-		en_tc.o en_arfs.o en_rep.o en_fs_ethtool.o
+		en_tc.o en_arfs.o en_rep.o en_fs_ethtool.o en_selftest.o
 
 mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) +=  en_dcbnl.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
index 2c6e3c7b7417..66bd213f35ce 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/alloc.c
@@ -106,6 +106,63 @@ void mlx5_buf_free(struct mlx5_core_dev *dev, struct mlx5_buf *buf)
 }
 EXPORT_SYMBOL_GPL(mlx5_buf_free);
 
+int mlx5_frag_buf_alloc_node(struct mlx5_core_dev *dev, int size,
+			     struct mlx5_frag_buf *buf, int node)
+{
+	int i;
+
+	buf->size = size;
+	buf->npages = 1 << get_order(size);
+	buf->page_shift = PAGE_SHIFT;
+	buf->frags = kcalloc(buf->npages, sizeof(struct mlx5_buf_list),
+			     GFP_KERNEL);
+	if (!buf->frags)
+		goto err_out;
+
+	for (i = 0; i < buf->npages; i++) {
+		struct mlx5_buf_list *frag = &buf->frags[i];
+		int frag_sz = min_t(int, size, PAGE_SIZE);
+
+		frag->buf = mlx5_dma_zalloc_coherent_node(dev, frag_sz,
+							  &frag->map, node);
+		if (!frag->buf)
+			goto err_free_buf;
+		if (frag->map & ((1 << buf->page_shift) - 1)) {
+			dma_free_coherent(&dev->pdev->dev, frag_sz,
+					  buf->frags[i].buf, buf->frags[i].map);
+			mlx5_core_warn(dev, "unexpected map alignment: %pad, page_shift=%d\n",
+				       &frag->map, buf->page_shift);
+			goto err_free_buf;
+		}
+		size -= frag_sz;
+	}
+
+	return 0;
+
+err_free_buf:
+	while (i--)
+		dma_free_coherent(&dev->pdev->dev, PAGE_SIZE, buf->frags[i].buf,
+				  buf->frags[i].map);
+	kfree(buf->frags);
+err_out:
+	return -ENOMEM;
+}
+
+void mlx5_frag_buf_free(struct mlx5_core_dev *dev, struct mlx5_frag_buf *buf)
+{
+	int size = buf->size;
+	int i;
+
+	for (i = 0; i < buf->npages; i++) {
+		int frag_sz = min_t(int, size, PAGE_SIZE);
+
+		dma_free_coherent(&dev->pdev->dev, frag_sz, buf->frags[i].buf,
+				  buf->frags[i].map);
+		size -= frag_sz;
+	}
+	kfree(buf->frags);
+}
+
 static struct mlx5_db_pgdir *mlx5_alloc_db_pgdir(struct mlx5_core_dev *dev,
 						 int node)
 {
@@ -230,3 +287,12 @@ void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas)
 	}
 }
 EXPORT_SYMBOL_GPL(mlx5_fill_page_array);
+
+void mlx5_fill_page_frag_array(struct mlx5_frag_buf *buf, __be64 *pas)
+{
+	int i;
+
+	for (i = 0; i < buf->npages; i++)
+		pas[i] = cpu_to_be64(buf->frags[i].map);
+}
+EXPORT_SYMBOL_GPL(mlx5_fill_page_frag_array);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 1e639f886021..3797cc7c1288 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -54,14 +54,6 @@ enum {
 };
 
 enum {
-	NUM_LONG_LISTS	  = 2,
-	NUM_MED_LISTS	  = 64,
-	LONG_LIST_SIZE	  = (2ULL * 1024 * 1024 * 1024 / PAGE_SIZE) * 8 + 16 +
-				MLX5_CMD_DATA_BLOCK_SIZE,
-	MED_LIST_SIZE	  = 16 + MLX5_CMD_DATA_BLOCK_SIZE,
-};
-
-enum {
 	MLX5_CMD_DELIVERY_STAT_OK			= 0x0,
 	MLX5_CMD_DELIVERY_STAT_SIGNAT_ERR		= 0x1,
 	MLX5_CMD_DELIVERY_STAT_TOK_ERR			= 0x2,
@@ -268,11 +260,6 @@ static void dump_buf(void *buf, int size, int data_only, int offset)
 		pr_debug("\n");
 }
 
-enum {
-	MLX5_DRIVER_STATUS_ABORTED = 0xfe,
-	MLX5_DRIVER_SYND = 0xbadd00de,
-};
-
 static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 				       u32 *synd, u8 *status)
 {
@@ -318,6 +305,8 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY:
 	case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT:
 	case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER:
+	case MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT:
+	case MLX5_CMD_OP_DESTROY_QOS_PARA_VPORT:
 		return MLX5_CMD_STAT_OK;
 
 	case MLX5_CMD_OP_QUERY_HCA_CAP:
@@ -419,11 +408,14 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op,
 	case MLX5_CMD_OP_QUERY_FLOW_TABLE:
 	case MLX5_CMD_OP_CREATE_FLOW_GROUP:
 	case MLX5_CMD_OP_QUERY_FLOW_GROUP:
-
 	case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY:
 	case MLX5_CMD_OP_ALLOC_FLOW_COUNTER:
 	case MLX5_CMD_OP_QUERY_FLOW_COUNTER:
 	case MLX5_CMD_OP_ALLOC_ENCAP_HEADER:
+	case MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT:
+	case MLX5_CMD_OP_QUERY_SCHEDULING_ELEMENT:
+	case MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT:
+	case MLX5_CMD_OP_CREATE_QOS_PARA_VPORT:
 		*status = MLX5_DRIVER_STATUS_ABORTED;
 		*synd = MLX5_DRIVER_SYND;
 		return -EIO;
@@ -580,6 +572,12 @@ const char *mlx5_command_str(int command)
 	MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE);
 	MLX5_COMMAND_STR_CASE(ALLOC_ENCAP_HEADER);
 	MLX5_COMMAND_STR_CASE(DEALLOC_ENCAP_HEADER);
+	MLX5_COMMAND_STR_CASE(CREATE_SCHEDULING_ELEMENT);
+	MLX5_COMMAND_STR_CASE(DESTROY_SCHEDULING_ELEMENT);
+	MLX5_COMMAND_STR_CASE(QUERY_SCHEDULING_ELEMENT);
+	MLX5_COMMAND_STR_CASE(MODIFY_SCHEDULING_ELEMENT);
+	MLX5_COMMAND_STR_CASE(CREATE_QOS_PARA_VPORT);
+	MLX5_COMMAND_STR_CASE(DESTROY_QOS_PARA_VPORT);
 	default: return "unknown command opcode";
 	}
 }
@@ -1063,14 +1061,13 @@ static struct mlx5_cmd_mailbox *alloc_cmd_box(struct mlx5_core_dev *dev,
 	if (!mailbox)
 		return ERR_PTR(-ENOMEM);
 
-	mailbox->buf = pci_pool_alloc(dev->cmd.pool, flags,
-				      &mailbox->dma);
+	mailbox->buf = pci_pool_zalloc(dev->cmd.pool, flags,
+				       &mailbox->dma);
 	if (!mailbox->buf) {
 		mlx5_core_dbg(dev, "failed allocation\n");
 		kfree(mailbox);
 		return ERR_PTR(-ENOMEM);
 	}
-	memset(mailbox->buf, 0, sizeof(struct mlx5_cmd_prot_block));
 	mailbox->next = NULL;
 
 	return mailbox;
@@ -1361,10 +1358,10 @@ static void free_msg(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *msg)
 {
 	unsigned long flags;
 
-	if (msg->cache) {
-		spin_lock_irqsave(&msg->cache->lock, flags);
-		list_add_tail(&msg->list, &msg->cache->head);
-		spin_unlock_irqrestore(&msg->cache->lock, flags);
+	if (msg->parent) {
+		spin_lock_irqsave(&msg->parent->lock, flags);
+		list_add_tail(&msg->list, &msg->parent->head);
+		spin_unlock_irqrestore(&msg->parent->lock, flags);
 	} else {
 		mlx5_free_cmd_msg(dev, msg);
 	}
@@ -1461,30 +1458,37 @@ static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size,
 				      gfp_t gfp)
 {
 	struct mlx5_cmd_msg *msg = ERR_PTR(-ENOMEM);
+	struct cmd_msg_cache *ch = NULL;
 	struct mlx5_cmd *cmd = &dev->cmd;
-	struct cache_ent *ent = NULL;
-
-	if (in_size > MED_LIST_SIZE && in_size <= LONG_LIST_SIZE)
-		ent = &cmd->cache.large;
-	else if (in_size > 16 && in_size <= MED_LIST_SIZE)
-		ent = &cmd->cache.med;
-
-	if (ent) {
-		spin_lock_irq(&ent->lock);
-		if (!list_empty(&ent->head)) {
-			msg = list_entry(ent->head.next, typeof(*msg), list);
-			/* For cached lists, we must explicitly state what is
-			 * the real size
-			 */
-			msg->len = in_size;
-			list_del(&msg->list);
+	int i;
+
+	if (in_size <= 16)
+		goto cache_miss;
+
+	for (i = 0; i < MLX5_NUM_COMMAND_CACHES; i++) {
+		ch = &cmd->cache[i];
+		if (in_size > ch->max_inbox_size)
+			continue;
+		spin_lock_irq(&ch->lock);
+		if (list_empty(&ch->head)) {
+			spin_unlock_irq(&ch->lock);
+			continue;
 		}
-		spin_unlock_irq(&ent->lock);
+		msg = list_entry(ch->head.next, typeof(*msg), list);
+		/* For cached lists, we must explicitly state what is
+		 * the real size
+		 */
+		msg->len = in_size;
+		list_del(&msg->list);
+		spin_unlock_irq(&ch->lock);
+		break;
 	}
 
-	if (IS_ERR(msg))
-		msg = mlx5_alloc_cmd_msg(dev, gfp, in_size, 0);
+	if (!IS_ERR(msg))
+		return msg;
 
+cache_miss:
+	msg = mlx5_alloc_cmd_msg(dev, gfp, in_size, 0);
 	return msg;
 }
 
@@ -1582,58 +1586,56 @@ EXPORT_SYMBOL(mlx5_cmd_exec_cb);
 
 static void destroy_msg_cache(struct mlx5_core_dev *dev)
 {
-	struct mlx5_cmd *cmd = &dev->cmd;
+	struct cmd_msg_cache *ch;
 	struct mlx5_cmd_msg *msg;
 	struct mlx5_cmd_msg *n;
+	int i;
 
-	list_for_each_entry_safe(msg, n, &cmd->cache.large.head, list) {
-		list_del(&msg->list);
-		mlx5_free_cmd_msg(dev, msg);
-	}
-
-	list_for_each_entry_safe(msg, n, &cmd->cache.med.head, list) {
-		list_del(&msg->list);
-		mlx5_free_cmd_msg(dev, msg);
+	for (i = 0; i < MLX5_NUM_COMMAND_CACHES; i++) {
+		ch = &dev->cmd.cache[i];
+		list_for_each_entry_safe(msg, n, &ch->head, list) {
+			list_del(&msg->list);
+			mlx5_free_cmd_msg(dev, msg);
+		}
 	}
 }
 
-static int create_msg_cache(struct mlx5_core_dev *dev)
+static unsigned cmd_cache_num_ent[MLX5_NUM_COMMAND_CACHES] = {
+	512, 32, 16, 8, 2
+};
+
+static unsigned cmd_cache_ent_size[MLX5_NUM_COMMAND_CACHES] = {
+	16 + MLX5_CMD_DATA_BLOCK_SIZE,
+	16 + MLX5_CMD_DATA_BLOCK_SIZE * 2,
+	16 + MLX5_CMD_DATA_BLOCK_SIZE * 16,
+	16 + MLX5_CMD_DATA_BLOCK_SIZE * 256,
+	16 + MLX5_CMD_DATA_BLOCK_SIZE * 512,
+};
+
+static void create_msg_cache(struct mlx5_core_dev *dev)
 {
 	struct mlx5_cmd *cmd = &dev->cmd;
+	struct cmd_msg_cache *ch;
 	struct mlx5_cmd_msg *msg;
-	int err;
 	int i;
-
-	spin_lock_init(&cmd->cache.large.lock);
-	INIT_LIST_HEAD(&cmd->cache.large.head);
-	spin_lock_init(&cmd->cache.med.lock);
-	INIT_LIST_HEAD(&cmd->cache.med.head);
-
-	for (i = 0; i < NUM_LONG_LISTS; i++) {
-		msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, LONG_LIST_SIZE, 0);
-		if (IS_ERR(msg)) {
-			err = PTR_ERR(msg);
-			goto ex_err;
+	int k;
+
+	/* Initialize and fill the caches with initial entries */
+	for (k = 0; k < MLX5_NUM_COMMAND_CACHES; k++) {
+		ch = &cmd->cache[k];
+		spin_lock_init(&ch->lock);
+		INIT_LIST_HEAD(&ch->head);
+		ch->num_ent = cmd_cache_num_ent[k];
+		ch->max_inbox_size = cmd_cache_ent_size[k];
+		for (i = 0; i < ch->num_ent; i++) {
+			msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL | __GFP_NOWARN,
+						 ch->max_inbox_size, 0);
+			if (IS_ERR(msg))
+				break;
+			msg->parent = ch;
+			list_add_tail(&msg->list, &ch->head);
 		}
-		msg->cache = &cmd->cache.large;
-		list_add_tail(&msg->list, &cmd->cache.large.head);
 	}
-
-	for (i = 0; i < NUM_MED_LISTS; i++) {
-		msg = mlx5_alloc_cmd_msg(dev, GFP_KERNEL, MED_LIST_SIZE, 0);
-		if (IS_ERR(msg)) {
-			err = PTR_ERR(msg);
-			goto ex_err;
-		}
-		msg->cache = &cmd->cache.med;
-		list_add_tail(&msg->list, &cmd->cache.med.head);
-	}
-
-	return 0;
-
-ex_err:
-	destroy_msg_cache(dev);
-	return err;
 }
 
 static int alloc_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd)
@@ -1756,11 +1758,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
 
 	cmd->mode = CMD_MODE_POLLING;
 
-	err = create_msg_cache(dev);
-	if (err) {
-		dev_err(&dev->pdev->dev, "failed to create command cache\n");
-		goto err_free_page;
-	}
+	create_msg_cache(dev);
 
 	set_wqname(dev);
 	cmd->wq = create_singlethread_workqueue(cmd->wq_name);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 7a43502a89cc..951dbd58594d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -77,9 +77,9 @@
 						 MLX5_MPWRQ_WQE_PAGE_ORDER)
 
 #define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2)
-#define MLX5E_REQUIRED_MTTS(rqs, wqes)\
-	(rqs * wqes * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8))
-#define MLX5E_VALID_NUM_MTTS(num_mtts) (MLX5_MTT_OCTW(num_mtts) <= U16_MAX)
+#define MLX5E_REQUIRED_MTTS(wqes)		\
+	(wqes * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8))
+#define MLX5E_VALID_NUM_MTTS(num_mtts) (MLX5_MTT_OCTW(num_mtts) - 1 <= U16_MAX)
 
 #define MLX5_UMR_ALIGN				(2048)
 #define MLX5_MPWRQ_SMALL_PACKET_THRESHOLD	(128)
@@ -150,12 +150,6 @@ static inline int mlx5_max_log_rq_size(int wq_type)
 	}
 }
 
-enum {
-	MLX5E_INLINE_MODE_L2,
-	MLX5E_INLINE_MODE_VPORT_CONTEXT,
-	MLX5_INLINE_MODE_NOT_REQUIRED,
-};
-
 struct mlx5e_tx_wqe {
 	struct mlx5_wqe_ctrl_seg ctrl;
 	struct mlx5_wqe_eth_seg  eth;
@@ -173,22 +167,28 @@ struct mlx5e_umr_wqe {
 	struct mlx5_wqe_data_seg       data;
 };
 
+extern const char mlx5e_self_tests[][ETH_GSTRING_LEN];
+
 static const char mlx5e_priv_flags[][ETH_GSTRING_LEN] = {
 	"rx_cqe_moder",
+	"rx_cqe_compress",
 };
 
 enum mlx5e_priv_flag {
 	MLX5E_PFLAG_RX_CQE_BASED_MODER = (1 << 0),
+	MLX5E_PFLAG_RX_CQE_COMPRESS = (1 << 1),
 };
 
-#define MLX5E_SET_PRIV_FLAG(priv, pflag, enable)    \
-	do {                                        \
-		if (enable)                         \
-			priv->pflags |= pflag;      \
-		else                                \
-			priv->pflags &= ~pflag;     \
+#define MLX5E_SET_PFLAG(priv, pflag, enable)			\
+	do {							\
+		if (enable)					\
+			(priv)->params.pflags |= (pflag);	\
+		else						\
+			(priv)->params.pflags &= ~(pflag);	\
 	} while (0)
 
+#define MLX5E_GET_PFLAG(priv, pflag) (!!((priv)->params.pflags & (pflag)))
+
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 #define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */
 #endif
@@ -207,8 +207,7 @@ struct mlx5e_params {
 	u16 num_channels;
 	u8  num_tc;
 	u8  rx_cq_period_mode;
-	bool rx_cqe_compress_admin;
-	bool rx_cqe_compress;
+	bool rx_cqe_compress_def;
 	struct mlx5e_cq_moder rx_cq_moderation;
 	struct mlx5e_cq_moder tx_cq_moderation;
 	u16 min_rx_wqes;
@@ -220,12 +219,34 @@ struct mlx5e_params {
 	u8  toeplitz_hash_key[40];
 	u32 indirection_rqt[MLX5E_INDIR_RQT_SIZE];
 	bool vlan_strip_disable;
-#ifdef CONFIG_MLX5_CORE_EN_DCB
-	struct ieee_ets ets;
-#endif
 	bool rx_am_enabled;
 	u32 lro_timeout;
+	u32 pflags;
+};
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+struct mlx5e_cee_config {
+	/* bw pct for priority group */
+	u8                         pg_bw_pct[CEE_DCBX_MAX_PGS];
+	u8                         prio_to_pg_map[CEE_DCBX_MAX_PRIO];
+	bool                       pfc_setting[CEE_DCBX_MAX_PRIO];
+	bool                       pfc_enable;
+};
+
+enum {
+	MLX5_DCB_CHG_RESET,
+	MLX5_DCB_NO_CHG,
+	MLX5_DCB_CHG_NO_RESET,
+};
+
+struct mlx5e_dcbx {
+	enum mlx5_dcbx_oper_mode   mode;
+	struct mlx5e_cee_config    cee_cfg; /* pending configuration */
+
+	/* The only setting that cannot be read from FW */
+	u8                         tc_tsa[IEEE_8021QAZ_MAX_TCS];
 };
+#endif
 
 struct mlx5e_tstamp {
 	rwlock_t                   lock;
@@ -241,7 +262,7 @@ struct mlx5e_tstamp {
 };
 
 enum {
-	MLX5E_RQ_STATE_FLUSH,
+	MLX5E_RQ_STATE_ENABLED,
 	MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS,
 	MLX5E_RQ_STATE_AM,
 };
@@ -265,7 +286,7 @@ struct mlx5e_cq {
 	u16                        decmprs_wqe_counter;
 
 	/* control */
-	struct mlx5_wq_ctrl        wq_ctrl;
+	struct mlx5_frag_wq_ctrl   wq_ctrl;
 } ____cacheline_aligned_in_smp;
 
 struct mlx5e_rq;
@@ -326,7 +347,6 @@ struct mlx5e_rq {
 		struct {
 			struct mlx5e_mpw_info *info;
 			void                  *mtt_no_align;
-			u32                    mtt_offset;
 		} mpwqe;
 	};
 	struct {
@@ -361,6 +381,7 @@ struct mlx5e_rq {
 	u32                    rqn;
 	struct mlx5e_channel  *channel;
 	struct mlx5e_priv     *priv;
+	struct mlx5_core_mkey  umr_mkey;
 } ____cacheline_aligned_in_smp;
 
 struct mlx5e_umr_dma_info {
@@ -394,7 +415,7 @@ struct mlx5e_sq_dma {
 };
 
 enum {
-	MLX5E_SQ_STATE_FLUSH,
+	MLX5E_SQ_STATE_ENABLED,
 	MLX5E_SQ_STATE_BF_ENABLE,
 };
 
@@ -524,7 +545,7 @@ struct mlx5e_vxlan_db {
 
 struct mlx5e_l2_rule {
 	u8  addr[ETH_ALEN + 2];
-	struct mlx5_flow_rule *rule;
+	struct mlx5_flow_handle *rule;
 };
 
 struct mlx5e_flow_table {
@@ -545,10 +566,10 @@ struct mlx5e_tc_table {
 struct mlx5e_vlan_table {
 	struct mlx5e_flow_table		ft;
 	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
-	struct mlx5_flow_rule	*active_vlans_rule[VLAN_N_VID];
-	struct mlx5_flow_rule	*untagged_rule;
-	struct mlx5_flow_rule	*any_vlan_rule;
-	bool          filter_disabled;
+	struct mlx5_flow_handle	*active_vlans_rule[VLAN_N_VID];
+	struct mlx5_flow_handle	*untagged_rule;
+	struct mlx5_flow_handle	*any_vlan_rule;
+	bool		filter_disabled;
 };
 
 struct mlx5e_l2_table {
@@ -566,14 +587,14 @@ struct mlx5e_l2_table {
 /* L3/L4 traffic type classifier */
 struct mlx5e_ttc_table {
 	struct mlx5e_flow_table  ft;
-	struct mlx5_flow_rule	 *rules[MLX5E_NUM_TT];
+	struct mlx5_flow_handle	 *rules[MLX5E_NUM_TT];
 };
 
 #define ARFS_HASH_SHIFT BITS_PER_BYTE
 #define ARFS_HASH_SIZE BIT(BITS_PER_BYTE)
 struct arfs_table {
 	struct mlx5e_flow_table  ft;
-	struct mlx5_flow_rule    *default_rule;
+	struct mlx5_flow_handle	 *default_rule;
 	struct hlist_head	 rules_hash[ARFS_HASH_SIZE];
 };
 
@@ -668,7 +689,6 @@ struct mlx5e_priv {
 
 	unsigned long              state;
 	struct mutex               state_lock; /* Protects Interface state */
-	struct mlx5_core_mkey      umr_mkey;
 	struct mlx5e_rq            drop_rq;
 
 	struct mlx5e_channel     **channel;
@@ -688,12 +708,15 @@ struct mlx5e_priv {
 	struct work_struct         tx_timeout_work;
 	struct delayed_work        update_stats_work;
 
-	u32                        pflags;
 	struct mlx5_core_dev      *mdev;
 	struct net_device         *netdev;
 	struct mlx5e_stats         stats;
 	struct mlx5e_tstamp        tstamp;
 	u16 q_counter;
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+	struct mlx5e_dcbx          dcbx;
+#endif
+
 	const struct mlx5e_profile *profile;
 	void                      *ppriv;
 };
@@ -735,6 +758,9 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv);
 void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv);
 void mlx5e_init_l2_addr(struct mlx5e_priv *priv);
 void mlx5e_destroy_flow_table(struct mlx5e_flow_table *ft);
+int mlx5e_self_test_num(struct mlx5e_priv *priv);
+void mlx5e_self_test(struct net_device *ndev, struct ethtool_test *etest,
+		     u64 *buf);
 int mlx5e_ethtool_get_flow(struct mlx5e_priv *priv, struct ethtool_rxnfc *info,
 			   int location);
 int mlx5e_ethtool_get_all_flows(struct mlx5e_priv *priv,
@@ -811,8 +837,7 @@ static inline void mlx5e_cq_arm(struct mlx5e_cq *cq)
 
 static inline u32 mlx5e_get_wqe_mtt_offset(struct mlx5e_rq *rq, u16 wqe_ix)
 {
-	return rq->mpwqe.mtt_offset +
-		wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8);
+	return wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8);
 }
 
 static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev)
@@ -825,6 +850,7 @@ extern const struct ethtool_ops mlx5e_ethtool_ops;
 #ifdef CONFIG_MLX5_CORE_EN_DCB
 extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops;
 int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets);
+void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv);
 #endif
 
 #ifndef CONFIG_RFS_ACCEL
@@ -860,7 +886,8 @@ void mlx5e_destroy_tir(struct mlx5_core_dev *mdev,
 		       struct mlx5e_tir *tir);
 int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev);
 void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev);
-int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev);
+int mlx5e_refresh_tirs_self_loopback(struct mlx5_core_dev *mdev,
+				     bool enable_uc_lb);
 
 struct mlx5_eswitch_rep;
 int mlx5e_vport_rep_load(struct mlx5_eswitch *esw,
@@ -874,6 +901,7 @@ int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv);
 void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv);
 int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr);
 void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
+void mlx5e_update_hw_rep_counters(struct mlx5e_priv *priv);
 
 int mlx5e_create_direct_rqts(struct mlx5e_priv *priv);
 void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt);
@@ -890,8 +918,16 @@ struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev,
 void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv);
 int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev);
 void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev);
-struct rtnl_link_stats64 *
-mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats);
 u32 mlx5e_choose_lro_timeout(struct mlx5_core_dev *mdev, u32 wanted_timeout);
+void mlx5e_add_vxlan_port(struct net_device *netdev,
+			  struct udp_tunnel_info *ti);
+void mlx5e_del_vxlan_port(struct net_device *netdev,
+			  struct udp_tunnel_info *ti);
+
+int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev,
+			    void *sp);
+bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id);
 
+bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv);
+bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv);
 #endif /* __MLX5_EN_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
index a8cb38789774..68419a01db36 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_arfs.c
@@ -56,7 +56,7 @@ struct arfs_tuple {
 struct arfs_rule {
 	struct mlx5e_priv	*priv;
 	struct work_struct      arfs_work;
-	struct mlx5_flow_rule   *rule;
+	struct mlx5_flow_handle *rule;
 	struct hlist_node	hlist;
 	int			rxq;
 	/* Flow ID passed to ndo_rx_flow_steer */
@@ -104,7 +104,7 @@ static int arfs_disable(struct mlx5e_priv *priv)
 		tt = arfs_get_tt(i);
 		/* Modify ttc rules destination to bypass the aRFS tables*/
 		err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt],
-						   &dest);
+						   &dest, NULL);
 		if (err) {
 			netdev_err(priv->netdev,
 				   "%s: modify ttc destination failed\n",
@@ -137,7 +137,7 @@ int mlx5e_arfs_enable(struct mlx5e_priv *priv)
 		tt = arfs_get_tt(i);
 		/* Modify ttc rules destination to point on the aRFS FTs */
 		err = mlx5_modify_rule_destination(priv->fs.ttc.rules[tt],
-						   &dest);
+						   &dest, NULL);
 		if (err) {
 			netdev_err(priv->netdev,
 				   "%s: modify ttc destination failed err=%d\n",
@@ -151,7 +151,7 @@ int mlx5e_arfs_enable(struct mlx5e_priv *priv)
 
 static void arfs_destroy_table(struct arfs_table *arfs_t)
 {
-	mlx5_del_flow_rule(arfs_t->default_rule);
+	mlx5_del_flow_rules(arfs_t->default_rule);
 	mlx5e_destroy_flow_table(&arfs_t->ft);
 }
 
@@ -174,6 +174,11 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv,
 				 enum arfs_type type)
 {
 	struct arfs_table *arfs_t = &priv->fs.arfs.arfs_tables[type];
+	struct mlx5_flow_act flow_act = {
+		.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+		.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG,
+		.encap_id = 0,
+	};
 	struct mlx5_flow_destination dest;
 	struct mlx5e_tir *tir = priv->indir_tir;
 	struct mlx5_flow_spec *spec;
@@ -205,10 +210,9 @@ static int arfs_add_default_rule(struct mlx5e_priv *priv,
 		goto out;
 	}
 
-	arfs_t->default_rule = mlx5_add_flow_rule(arfs_t->ft.t, spec,
-						  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-						  MLX5_FS_DEFAULT_FLOW_TAG,
-						  &dest);
+	arfs_t->default_rule = mlx5_add_flow_rules(arfs_t->ft.t, spec,
+						   &flow_act,
+						   &dest, 1);
 	if (IS_ERR(arfs_t->default_rule)) {
 		err = PTR_ERR(arfs_t->default_rule);
 		arfs_t->default_rule = NULL;
@@ -324,7 +328,7 @@ static int arfs_create_table(struct mlx5e_priv *priv,
 	int err;
 
 	ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO,
-				       MLX5E_ARFS_TABLE_SIZE, MLX5E_ARFS_FT_LEVEL);
+				       MLX5E_ARFS_TABLE_SIZE, MLX5E_ARFS_FT_LEVEL, 0);
 	if (IS_ERR(ft->t)) {
 		err = PTR_ERR(ft->t);
 		ft->t = NULL;
@@ -396,7 +400,7 @@ static void arfs_may_expire_flow(struct mlx5e_priv *priv)
 	spin_unlock_bh(&priv->fs.arfs.arfs_lock);
 	hlist_for_each_entry_safe(arfs_rule, htmp, &del_list, hlist) {
 		if (arfs_rule->rule)
-			mlx5_del_flow_rule(arfs_rule->rule);
+			mlx5_del_flow_rules(arfs_rule->rule);
 		hlist_del(&arfs_rule->hlist);
 		kfree(arfs_rule);
 	}
@@ -420,7 +424,7 @@ static void arfs_del_rules(struct mlx5e_priv *priv)
 	hlist_for_each_entry_safe(rule, htmp, &del_list, hlist) {
 		cancel_work_sync(&rule->arfs_work);
 		if (rule->rule)
-			mlx5_del_flow_rule(rule->rule);
+			mlx5_del_flow_rules(rule->rule);
 		hlist_del(&rule->hlist);
 		kfree(rule);
 	}
@@ -462,12 +466,17 @@ static struct arfs_table *arfs_get_table(struct mlx5e_arfs_tables *arfs,
 	return NULL;
 }
 
-static struct mlx5_flow_rule *arfs_add_rule(struct mlx5e_priv *priv,
-					    struct arfs_rule *arfs_rule)
+static struct mlx5_flow_handle *arfs_add_rule(struct mlx5e_priv *priv,
+					      struct arfs_rule *arfs_rule)
 {
+	struct mlx5_flow_act flow_act = {
+		.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+		.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG,
+		.encap_id = 0,
+	};
 	struct mlx5e_arfs_tables *arfs = &priv->fs.arfs;
 	struct arfs_tuple *tuple = &arfs_rule->tuple;
-	struct mlx5_flow_rule *rule = NULL;
+	struct mlx5_flow_handle *rule = NULL;
 	struct mlx5_flow_destination dest;
 	struct arfs_table *arfs_table;
 	struct mlx5_flow_spec *spec;
@@ -544,9 +553,7 @@ static struct mlx5_flow_rule *arfs_add_rule(struct mlx5e_priv *priv,
 	}
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
 	dest.tir_num = priv->direct_tir[arfs_rule->rxq].tirn;
-	rule = mlx5_add_flow_rule(ft, spec, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-				  MLX5_FS_DEFAULT_FLOW_TAG,
-				  &dest);
+	rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
 	if (IS_ERR(rule)) {
 		err = PTR_ERR(rule);
 		netdev_err(priv->netdev, "%s: add rule(filter id=%d, rq idx=%d) failed, err=%d\n",
@@ -559,14 +566,14 @@ out:
 }
 
 static void arfs_modify_rule_rq(struct mlx5e_priv *priv,
-				struct mlx5_flow_rule *rule, u16 rxq)
+				struct mlx5_flow_handle *rule, u16 rxq)
 {
 	struct mlx5_flow_destination dst;
 	int err = 0;
 
 	dst.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
 	dst.tir_num = priv->direct_tir[rxq].tirn;
-	err =  mlx5_modify_rule_destination(rule, &dst);
+	err =  mlx5_modify_rule_destination(rule, &dst, NULL);
 	if (err)
 		netdev_warn(priv->netdev,
 			    "Failed to modfiy aRFS rule destination to rq=%d\n", rxq);
@@ -578,7 +585,7 @@ static void arfs_handle_work(struct work_struct *work)
 						   struct arfs_rule,
 						   arfs_work);
 	struct mlx5e_priv *priv = arfs_rule->priv;
-	struct mlx5_flow_rule *rule;
+	struct mlx5_flow_handle *rule;
 
 	mutex_lock(&priv->state_lock);
 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
index 13dc388667b6..2cd8e56a573b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c
@@ -94,7 +94,7 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr)
 	switch (config.rx_filter) {
 	case HWTSTAMP_FILTER_NONE:
 		/* Reset CQE compression to Admin default */
-		mlx5e_modify_rx_cqe_compression(priv, priv->params.rx_cqe_compress_admin);
+		mlx5e_modify_rx_cqe_compression(priv, priv->params.rx_cqe_compress_def);
 		break;
 	case HWTSTAMP_FILTER_ALL:
 	case HWTSTAMP_FILTER_SOME:
@@ -111,6 +111,7 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr)
 	case HWTSTAMP_FILTER_PTP_V2_SYNC:
 	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
 		/* Disable CQE compression */
+		netdev_warn(dev, "Disabling cqe compression");
 		mlx5e_modify_rx_cqe_compression(priv, false);
 		config.rx_filter = HWTSTAMP_FILTER_ALL;
 		break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
index 029e856f72a0..f175518ff07a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -137,7 +137,8 @@ void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev)
 	mlx5_unmap_free_uar(mdev, &res->cq_uar);
 }
 
-int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev)
+int mlx5e_refresh_tirs_self_loopback(struct mlx5_core_dev *mdev,
+				     bool enable_uc_lb)
 {
 	struct mlx5e_tir *tir;
 	void *in;
@@ -149,6 +150,10 @@ int mlx5e_refresh_tirs_self_loopback_enable(struct mlx5_core_dev *mdev)
 	if (!in)
 		return -ENOMEM;
 
+	if (enable_uc_lb)
+		MLX5_SET(modify_tir_in, in, ctx.self_lb_block,
+			 MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST_);
+
 	MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1);
 
 	list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index 762af16ed021..7f6c225666c1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -38,16 +38,77 @@
 #define MLX5E_100MB (100000)
 #define MLX5E_1GB   (1000000)
 
+#define MLX5E_CEE_STATE_UP    1
+#define MLX5E_CEE_STATE_DOWN  0
+
+/* If dcbx mode is non-host set the dcbx mode to host.
+ */
+static int mlx5e_dcbnl_set_dcbx_mode(struct mlx5e_priv *priv,
+				     enum mlx5_dcbx_oper_mode mode)
+{
+	struct mlx5_core_dev *mdev = priv->mdev;
+	u32 param[MLX5_ST_SZ_DW(dcbx_param)];
+	int err;
+
+	err = mlx5_query_port_dcbx_param(mdev, param);
+	if (err)
+		return err;
+
+	MLX5_SET(dcbx_param, param, version_admin, mode);
+	if (mode != MLX5E_DCBX_PARAM_VER_OPER_HOST)
+		MLX5_SET(dcbx_param, param, willing_admin, 1);
+
+	return mlx5_set_port_dcbx_param(mdev, param);
+}
+
+static int mlx5e_dcbnl_switch_to_host_mode(struct mlx5e_priv *priv)
+{
+	struct mlx5e_dcbx *dcbx = &priv->dcbx;
+	int err;
+
+	if (!MLX5_CAP_GEN(priv->mdev, dcbx))
+		return 0;
+
+	if (dcbx->mode == MLX5E_DCBX_PARAM_VER_OPER_HOST)
+		return 0;
+
+	err = mlx5e_dcbnl_set_dcbx_mode(priv, MLX5E_DCBX_PARAM_VER_OPER_HOST);
+	if (err)
+		return err;
+
+	dcbx->mode = MLX5E_DCBX_PARAM_VER_OPER_HOST;
+	return 0;
+}
+
 static int mlx5e_dcbnl_ieee_getets(struct net_device *netdev,
 				   struct ieee_ets *ets)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+	int err = 0;
+	int i;
 
 	if (!MLX5_CAP_GEN(priv->mdev, ets))
 		return -ENOTSUPP;
 
-	memcpy(ets, &priv->params.ets, sizeof(*ets));
-	return 0;
+	ets->ets_cap = mlx5_max_tc(priv->mdev) + 1;
+	for (i = 0; i < ets->ets_cap; i++) {
+		err = mlx5_query_port_prio_tc(mdev, i, &ets->prio_tc[i]);
+		if (err)
+			return err;
+	}
+
+	for (i = 0; i < ets->ets_cap; i++) {
+		err = mlx5_query_port_tc_bw_alloc(mdev, i, &ets->tc_tx_bw[i]);
+		if (err)
+			return err;
+		if (ets->tc_tx_bw[i] < MLX5E_MAX_BW_ALLOC)
+			priv->dcbx.tc_tsa[i] = IEEE_8021QAZ_TSA_ETS;
+	}
+
+	memcpy(ets->tc_tsa, priv->dcbx.tc_tsa, sizeof(ets->tc_tsa));
+
+	return err;
 }
 
 enum {
@@ -110,9 +171,6 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets)
 	int max_tc = mlx5_max_tc(mdev);
 	int err;
 
-	if (!MLX5_CAP_GEN(mdev, ets))
-		return -ENOTSUPP;
-
 	mlx5e_build_tc_group(ets, tc_group, max_tc);
 	mlx5e_build_tc_tx_bw(ets, tc_tx_bw, tc_group, max_tc);
 
@@ -124,7 +182,14 @@ int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets)
 	if (err)
 		return err;
 
-	return mlx5_set_port_tc_bw_alloc(mdev, tc_tx_bw);
+	err = mlx5_set_port_tc_bw_alloc(mdev, tc_tx_bw);
+
+	if (err)
+		return err;
+
+	memcpy(priv->dcbx.tc_tsa, ets->tc_tsa, sizeof(ets->tc_tsa));
+
+	return err;
 }
 
 static int mlx5e_dbcnl_validate_ets(struct net_device *netdev,
@@ -170,6 +235,9 @@ static int mlx5e_dcbnl_ieee_setets(struct net_device *netdev,
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	int err;
 
+	if (!MLX5_CAP_GEN(priv->mdev, ets))
+		return -ENOTSUPP;
+
 	err = mlx5e_dbcnl_validate_ets(netdev, ets);
 	if (err)
 		return err;
@@ -178,9 +246,6 @@ static int mlx5e_dcbnl_ieee_setets(struct net_device *netdev,
 	if (err)
 		return err;
 
-	memcpy(&priv->params.ets, ets, sizeof(*ets));
-	priv->params.ets.ets_cap = mlx5_max_tc(priv->mdev) + 1;
-
 	return 0;
 }
 
@@ -222,13 +287,39 @@ static int mlx5e_dcbnl_ieee_setpfc(struct net_device *dev,
 
 static u8 mlx5e_dcbnl_getdcbx(struct net_device *dev)
 {
-	return DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE;
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5e_dcbx *dcbx = &priv->dcbx;
+	u8 mode = DCB_CAP_DCBX_VER_IEEE | DCB_CAP_DCBX_VER_CEE;
+
+	if (dcbx->mode == MLX5E_DCBX_PARAM_VER_OPER_HOST)
+		mode |= DCB_CAP_DCBX_HOST;
+
+	return mode;
 }
 
 static u8 mlx5e_dcbnl_setdcbx(struct net_device *dev, u8 mode)
 {
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5e_dcbx *dcbx = &priv->dcbx;
+
+	if ((!mode) && MLX5_CAP_GEN(priv->mdev, dcbx)) {
+		if (dcbx->mode == MLX5E_DCBX_PARAM_VER_OPER_AUTO)
+			return 0;
+
+		/* set dcbx to fw controlled */
+		if (!mlx5e_dcbnl_set_dcbx_mode(priv, MLX5E_DCBX_PARAM_VER_OPER_AUTO)) {
+			dcbx->mode = MLX5E_DCBX_PARAM_VER_OPER_AUTO;
+			return 0;
+		}
+
+		return 1;
+	}
+
+	if (mlx5e_dcbnl_switch_to_host_mode(netdev_priv(dev)))
+		return 1;
+
 	if ((mode & DCB_CAP_DCBX_LLD_MANAGED) ||
-	    (mode & DCB_CAP_DCBX_VER_CEE) ||
+	    !(mode & DCB_CAP_DCBX_VER_CEE) ||
 	    !(mode & DCB_CAP_DCBX_VER_IEEE) ||
 	    !(mode & DCB_CAP_DCBX_HOST))
 		return 1;
@@ -304,6 +395,284 @@ static int mlx5e_dcbnl_ieee_setmaxrate(struct net_device *netdev,
 	return mlx5_modify_port_ets_rate_limit(mdev, max_bw_value, max_bw_unit);
 }
 
+static u8 mlx5e_dcbnl_setall(struct net_device *netdev)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg;
+	struct mlx5_core_dev *mdev = priv->mdev;
+	struct ieee_ets ets;
+	struct ieee_pfc pfc;
+	int err = -ENOTSUPP;
+	int i;
+
+	if (!MLX5_CAP_GEN(mdev, ets))
+		goto out;
+
+	memset(&ets, 0, sizeof(ets));
+	memset(&pfc, 0, sizeof(pfc));
+
+	ets.ets_cap = IEEE_8021QAZ_MAX_TCS;
+	for (i = 0; i < CEE_DCBX_MAX_PGS; i++) {
+		ets.tc_tx_bw[i] = cee_cfg->pg_bw_pct[i];
+		ets.tc_rx_bw[i] = cee_cfg->pg_bw_pct[i];
+		ets.tc_tsa[i]   = IEEE_8021QAZ_TSA_ETS;
+		ets.prio_tc[i]  = cee_cfg->prio_to_pg_map[i];
+	}
+
+	err = mlx5e_dbcnl_validate_ets(netdev, &ets);
+	if (err) {
+		netdev_err(netdev,
+			   "%s, Failed to validate ETS: %d\n", __func__, err);
+		goto out;
+	}
+
+	err = mlx5e_dcbnl_ieee_setets_core(priv, &ets);
+	if (err) {
+		netdev_err(netdev,
+			   "%s, Failed to set ETS: %d\n", __func__, err);
+		goto out;
+	}
+
+	/* Set PFC */
+	pfc.pfc_cap = mlx5_max_tc(mdev) + 1;
+	if (!cee_cfg->pfc_enable)
+		pfc.pfc_en = 0;
+	else
+		for (i = 0; i < CEE_DCBX_MAX_PRIO; i++)
+			pfc.pfc_en |= cee_cfg->pfc_setting[i] << i;
+
+	err = mlx5e_dcbnl_ieee_setpfc(netdev, &pfc);
+	if (err) {
+		netdev_err(netdev,
+			   "%s, Failed to set PFC: %d\n", __func__, err);
+		goto out;
+	}
+out:
+	return err ? MLX5_DCB_NO_CHG : MLX5_DCB_CHG_RESET;
+}
+
+static u8 mlx5e_dcbnl_getstate(struct net_device *netdev)
+{
+	return MLX5E_CEE_STATE_UP;
+}
+
+static void mlx5e_dcbnl_getpermhwaddr(struct net_device *netdev,
+				      u8 *perm_addr)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+
+	if (!perm_addr)
+		return;
+
+	mlx5_query_nic_vport_mac_address(priv->mdev, 0, perm_addr);
+}
+
+static void mlx5e_dcbnl_setpgtccfgtx(struct net_device *netdev,
+				     int priority, u8 prio_type,
+				     u8 pgid, u8 bw_pct, u8 up_map)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg;
+
+	if (priority >= CEE_DCBX_MAX_PRIO) {
+		netdev_err(netdev,
+			   "%s, priority is out of range\n", __func__);
+		return;
+	}
+
+	if (pgid >= CEE_DCBX_MAX_PGS) {
+		netdev_err(netdev,
+			   "%s, priority group is out of range\n", __func__);
+		return;
+	}
+
+	cee_cfg->prio_to_pg_map[priority] = pgid;
+}
+
+static void mlx5e_dcbnl_setpgbwgcfgtx(struct net_device *netdev,
+				      int pgid, u8 bw_pct)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg;
+
+	if (pgid >= CEE_DCBX_MAX_PGS) {
+		netdev_err(netdev,
+			   "%s, priority group is out of range\n", __func__);
+		return;
+	}
+
+	cee_cfg->pg_bw_pct[pgid] = bw_pct;
+}
+
+static void mlx5e_dcbnl_getpgtccfgtx(struct net_device *netdev,
+				     int priority, u8 *prio_type,
+				     u8 *pgid, u8 *bw_pct, u8 *up_map)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+
+	if (priority >= CEE_DCBX_MAX_PRIO) {
+		netdev_err(netdev,
+			   "%s, priority is out of range\n", __func__);
+		return;
+	}
+
+	*prio_type = 0;
+	*bw_pct = 0;
+	*up_map = 0;
+
+	if (mlx5_query_port_prio_tc(mdev, priority, pgid))
+		*pgid = 0;
+}
+
+static void mlx5e_dcbnl_getpgbwgcfgtx(struct net_device *netdev,
+				      int pgid, u8 *bw_pct)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+
+	if (pgid >= CEE_DCBX_MAX_PGS) {
+		netdev_err(netdev,
+			   "%s, priority group is out of range\n", __func__);
+		return;
+	}
+
+	if (mlx5_query_port_tc_bw_alloc(mdev, pgid, bw_pct))
+		*bw_pct = 0;
+}
+
+static void mlx5e_dcbnl_setpfccfg(struct net_device *netdev,
+				  int priority, u8 setting)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg;
+
+	if (priority >= CEE_DCBX_MAX_PRIO) {
+		netdev_err(netdev,
+			   "%s, priority is out of range\n", __func__);
+		return;
+	}
+
+	if (setting > 1)
+		return;
+
+	cee_cfg->pfc_setting[priority] = setting;
+}
+
+static int
+mlx5e_dcbnl_get_priority_pfc(struct net_device *netdev,
+			     int priority, u8 *setting)
+{
+	struct ieee_pfc pfc;
+	int err;
+
+	err = mlx5e_dcbnl_ieee_getpfc(netdev, &pfc);
+
+	if (err)
+		*setting = 0;
+	else
+		*setting = (pfc.pfc_en >> priority) & 0x01;
+
+	return err;
+}
+
+static void mlx5e_dcbnl_getpfccfg(struct net_device *netdev,
+				  int priority, u8 *setting)
+{
+	if (priority >= CEE_DCBX_MAX_PRIO) {
+		netdev_err(netdev,
+			   "%s, priority is out of range\n", __func__);
+		return;
+	}
+
+	if (!setting)
+		return;
+
+	mlx5e_dcbnl_get_priority_pfc(netdev, priority, setting);
+}
+
+static u8 mlx5e_dcbnl_getcap(struct net_device *netdev,
+			     int capid, u8 *cap)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+	u8 rval = 0;
+
+	switch (capid) {
+	case DCB_CAP_ATTR_PG:
+		*cap = true;
+		break;
+	case DCB_CAP_ATTR_PFC:
+		*cap = true;
+		break;
+	case DCB_CAP_ATTR_UP2TC:
+		*cap = false;
+		break;
+	case DCB_CAP_ATTR_PG_TCS:
+		*cap = 1 << mlx5_max_tc(mdev);
+		break;
+	case DCB_CAP_ATTR_PFC_TCS:
+		*cap = 1 << mlx5_max_tc(mdev);
+		break;
+	case DCB_CAP_ATTR_GSP:
+		*cap = false;
+		break;
+	case DCB_CAP_ATTR_BCN:
+		*cap = false;
+		break;
+	case DCB_CAP_ATTR_DCBX:
+		*cap = (DCB_CAP_DCBX_LLD_MANAGED |
+			DCB_CAP_DCBX_VER_CEE |
+			DCB_CAP_DCBX_STATIC);
+		break;
+	default:
+		*cap = 0;
+		rval = 1;
+		break;
+	}
+
+	return rval;
+}
+
+static int mlx5e_dcbnl_getnumtcs(struct net_device *netdev,
+				 int tcs_id, u8 *num)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+
+	switch (tcs_id) {
+	case DCB_NUMTCS_ATTR_PG:
+	case DCB_NUMTCS_ATTR_PFC:
+		*num = mlx5_max_tc(mdev) + 1;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static u8 mlx5e_dcbnl_getpfcstate(struct net_device *netdev)
+{
+	struct ieee_pfc pfc;
+
+	if (mlx5e_dcbnl_ieee_getpfc(netdev, &pfc))
+		return MLX5E_CEE_STATE_DOWN;
+
+	return pfc.pfc_en ? MLX5E_CEE_STATE_UP : MLX5E_CEE_STATE_DOWN;
+}
+
+static void mlx5e_dcbnl_setpfcstate(struct net_device *netdev, u8 state)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5e_cee_config *cee_cfg = &priv->dcbx.cee_cfg;
+
+	if ((state != MLX5E_CEE_STATE_UP) && (state != MLX5E_CEE_STATE_DOWN))
+		return;
+
+	cee_cfg->pfc_enable = state;
+}
+
 const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops = {
 	.ieee_getets	= mlx5e_dcbnl_ieee_getets,
 	.ieee_setets	= mlx5e_dcbnl_ieee_setets,
@@ -313,4 +682,70 @@ const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops = {
 	.ieee_setpfc	= mlx5e_dcbnl_ieee_setpfc,
 	.getdcbx	= mlx5e_dcbnl_getdcbx,
 	.setdcbx	= mlx5e_dcbnl_setdcbx,
+
+/* CEE interfaces */
+	.setall         = mlx5e_dcbnl_setall,
+	.getstate       = mlx5e_dcbnl_getstate,
+	.getpermhwaddr  = mlx5e_dcbnl_getpermhwaddr,
+
+	.setpgtccfgtx   = mlx5e_dcbnl_setpgtccfgtx,
+	.setpgbwgcfgtx  = mlx5e_dcbnl_setpgbwgcfgtx,
+	.getpgtccfgtx   = mlx5e_dcbnl_getpgtccfgtx,
+	.getpgbwgcfgtx  = mlx5e_dcbnl_getpgbwgcfgtx,
+
+	.setpfccfg      = mlx5e_dcbnl_setpfccfg,
+	.getpfccfg      = mlx5e_dcbnl_getpfccfg,
+	.getcap         = mlx5e_dcbnl_getcap,
+	.getnumtcs      = mlx5e_dcbnl_getnumtcs,
+	.getpfcstate    = mlx5e_dcbnl_getpfcstate,
+	.setpfcstate    = mlx5e_dcbnl_setpfcstate,
 };
+
+static void mlx5e_dcbnl_query_dcbx_mode(struct mlx5e_priv *priv,
+					enum mlx5_dcbx_oper_mode *mode)
+{
+	u32 out[MLX5_ST_SZ_DW(dcbx_param)];
+
+	*mode = MLX5E_DCBX_PARAM_VER_OPER_HOST;
+
+	if (!mlx5_query_port_dcbx_param(priv->mdev, out))
+		*mode = MLX5_GET(dcbx_param, out, version_oper);
+
+	/* From driver's point of view, we only care if the mode
+	 * is host (HOST) or non-host (AUTO)
+	 */
+	if (*mode != MLX5E_DCBX_PARAM_VER_OPER_HOST)
+		*mode = MLX5E_DCBX_PARAM_VER_OPER_AUTO;
+}
+
+static void mlx5e_ets_init(struct mlx5e_priv *priv)
+{
+	int i;
+	struct ieee_ets ets;
+
+	memset(&ets, 0, sizeof(ets));
+	ets.ets_cap = mlx5_max_tc(priv->mdev) + 1;
+	for (i = 0; i < ets.ets_cap; i++) {
+		ets.tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC;
+		ets.tc_tsa[i] = IEEE_8021QAZ_TSA_VENDOR;
+		ets.prio_tc[i] = i;
+	}
+
+	memcpy(priv->dcbx.tc_tsa, ets.tc_tsa, sizeof(ets.tc_tsa));
+
+	/* tclass[prio=0]=1, tclass[prio=1]=0, tclass[prio=i]=i (for i>1) */
+	ets.prio_tc[0] = 1;
+	ets.prio_tc[1] = 0;
+
+	mlx5e_dcbnl_ieee_setets_core(priv, &ets);
+}
+
+void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv)
+{
+	struct mlx5e_dcbx *dcbx = &priv->dcbx;
+
+	if (MLX5_CAP_GEN(priv->mdev, dcbx))
+		mlx5e_dcbnl_query_dcbx_mode(priv, &dcbx->mode);
+
+	mlx5e_ets_init(priv);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 27ff401cec20..352462af8d51 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -171,11 +171,17 @@ static int mlx5e_get_sset_count(struct net_device *dev, int sset)
 		return NUM_SW_COUNTERS +
 		       MLX5E_NUM_Q_CNTRS(priv) +
 		       NUM_VPORT_COUNTERS + NUM_PPORT_COUNTERS +
+		       NUM_PCIE_COUNTERS +
 		       MLX5E_NUM_RQ_STATS(priv) +
 		       MLX5E_NUM_SQ_STATS(priv) +
-		       MLX5E_NUM_PFC_COUNTERS(priv);
+		       MLX5E_NUM_PFC_COUNTERS(priv) +
+		       ARRAY_SIZE(mlx5e_pme_status_desc) +
+		       ARRAY_SIZE(mlx5e_pme_error_desc);
+
 	case ETH_SS_PRIV_FLAGS:
 		return ARRAY_SIZE(mlx5e_priv_flags);
+	case ETH_SS_TEST:
+		return mlx5e_self_test_num(priv);
 	/* fallthrough */
 	default:
 		return -EOPNOTSUPP;
@@ -213,6 +219,14 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data)
 		strcpy(data + (idx++) * ETH_GSTRING_LEN,
 		       pport_2819_stats_desc[i].format);
 
+	for (i = 0; i < NUM_PCIE_PERF_COUNTERS; i++)
+		strcpy(data + (idx++) * ETH_GSTRING_LEN,
+		       pcie_perf_stats_desc[i].format);
+
+	for (i = 0; i < NUM_PCIE_TAS_COUNTERS; i++)
+		strcpy(data + (idx++) * ETH_GSTRING_LEN,
+		       pcie_tas_stats_desc[i].format);
+
 	for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
 		for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++)
 			sprintf(data + (idx++) * ETH_GSTRING_LEN,
@@ -237,6 +251,13 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data)
 		}
 	}
 
+	/* port module event counters */
+	for (i = 0; i < ARRAY_SIZE(mlx5e_pme_status_desc); i++)
+		strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_status_desc[i].format);
+
+	for (i = 0; i < ARRAY_SIZE(mlx5e_pme_error_desc); i++)
+		strcpy(data + (idx++) * ETH_GSTRING_LEN, mlx5e_pme_error_desc[i].format);
+
 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
 		return;
 
@@ -267,6 +288,9 @@ static void mlx5e_get_strings(struct net_device *dev,
 		break;
 
 	case ETH_SS_TEST:
+		for (i = 0; i < mlx5e_self_test_num(priv); i++)
+			strcpy(data + i * ETH_GSTRING_LEN,
+			       mlx5e_self_tests[i]);
 		break;
 
 	case ETH_SS_STATS:
@@ -279,6 +303,7 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev,
 				    struct ethtool_stats *stats, u64 *data)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5_priv *mlx5_priv;
 	int i, j, tc, prio, idx = 0;
 	unsigned long pfc_combined;
 
@@ -314,6 +339,14 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev,
 		data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.RFC_2819_counters,
 						  pport_2819_stats_desc, i);
 
+	for (i = 0; i < NUM_PCIE_PERF_COUNTERS; i++)
+		data[idx++] = MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_perf_counters,
+						  pcie_perf_stats_desc, i);
+
+	for (i = 0; i < NUM_PCIE_TAS_COUNTERS; i++)
+		data[idx++] = MLX5E_READ_CTR32_BE(&priv->stats.pcie.pcie_tas_counters,
+						  pcie_tas_stats_desc, i);
+
 	for (prio = 0; prio < NUM_PPORT_PRIO; prio++) {
 		for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++)
 			data[idx++] = MLX5E_READ_CTR64_BE(&priv->stats.pport.per_prio_counters[prio],
@@ -335,6 +368,16 @@ static void mlx5e_get_ethtool_stats(struct net_device *dev,
 		}
 	}
 
+	/* port module event counters */
+	mlx5_priv =  &priv->mdev->priv;
+	for (i = 0; i < ARRAY_SIZE(mlx5e_pme_status_desc); i++)
+		data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.status_counters,
+						   mlx5e_pme_status_desc, i);
+
+	for (i = 0; i < ARRAY_SIZE(mlx5e_pme_error_desc); i++)
+		data[idx++] = MLX5E_READ_CTR64_CPU(mlx5_priv->pme_stats.error_counters,
+						   mlx5e_pme_error_desc, i);
+
 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state))
 		return;
 
@@ -456,8 +499,7 @@ static int mlx5e_set_ringparam(struct net_device *dev,
 		return -EINVAL;
 	}
 
-	num_mtts = MLX5E_REQUIRED_MTTS(priv->params.num_channels,
-				       rx_pending_wqes);
+	num_mtts = MLX5E_REQUIRED_MTTS(rx_pending_wqes);
 	if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ &&
 	    !MLX5E_VALID_NUM_MTTS(num_mtts)) {
 		netdev_info(dev, "%s: rx_pending (%d) request can't be satisfied, try to reduce.\n",
@@ -522,7 +564,6 @@ static int mlx5e_set_channels(struct net_device *dev,
 	unsigned int count = ch->combined_count;
 	bool arfs_enabled;
 	bool was_opened;
-	u32 num_mtts;
 	int err = 0;
 
 	if (!count) {
@@ -541,14 +582,6 @@ static int mlx5e_set_channels(struct net_device *dev,
 		return -EINVAL;
 	}
 
-	num_mtts = MLX5E_REQUIRED_MTTS(count, BIT(priv->params.log_rq_size));
-	if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ &&
-	    !MLX5E_VALID_NUM_MTTS(num_mtts)) {
-		netdev_info(dev, "%s: rx count (%d) request can't be satisfied, try to reduce.\n",
-			    __func__, count);
-		return -EINVAL;
-	}
-
 	if (priv->params.num_channels == count)
 		return 0;
 
@@ -1438,6 +1471,35 @@ static int set_pflag_rx_cqe_based_moder(struct net_device *netdev, bool enable)
 	return err;
 }
 
+static int set_pflag_rx_cqe_compress(struct net_device *netdev,
+				     bool enable)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+	int err = 0;
+	bool reset;
+
+	if (!MLX5_CAP_GEN(mdev, cqe_compression))
+		return -ENOTSUPP;
+
+	if (enable && priv->tstamp.hwtstamp_config.rx_filter != HWTSTAMP_FILTER_NONE) {
+		netdev_err(netdev, "Can't enable cqe compression while timestamping is enabled.\n");
+		return -EINVAL;
+	}
+
+	reset = test_bit(MLX5E_STATE_OPENED, &priv->state);
+
+	if (reset)
+		mlx5e_close_locked(netdev);
+
+	MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS, enable);
+	priv->params.rx_cqe_compress_def = enable;
+
+	if (reset)
+		err = mlx5e_open_locked(netdev);
+	return err;
+}
+
 static int mlx5e_handle_pflag(struct net_device *netdev,
 			      u32 wanted_flags,
 			      enum mlx5e_priv_flag flag,
@@ -1445,7 +1507,7 @@ static int mlx5e_handle_pflag(struct net_device *netdev,
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	bool enable = !!(wanted_flags & flag);
-	u32 changes = wanted_flags ^ priv->pflags;
+	u32 changes = wanted_flags ^ priv->params.pflags;
 	int err;
 
 	if (!(changes & flag))
@@ -1458,7 +1520,7 @@ static int mlx5e_handle_pflag(struct net_device *netdev,
 		return err;
 	}
 
-	MLX5E_SET_PRIV_FLAG(priv, flag, enable);
+	MLX5E_SET_PFLAG(priv, flag, enable);
 	return 0;
 }
 
@@ -1468,20 +1530,26 @@ static int mlx5e_set_priv_flags(struct net_device *netdev, u32 pflags)
 	int err;
 
 	mutex_lock(&priv->state_lock);
-
 	err = mlx5e_handle_pflag(netdev, pflags,
 				 MLX5E_PFLAG_RX_CQE_BASED_MODER,
 				 set_pflag_rx_cqe_based_moder);
+	if (err)
+		goto out;
+
+	err = mlx5e_handle_pflag(netdev, pflags,
+				 MLX5E_PFLAG_RX_CQE_COMPRESS,
+				 set_pflag_rx_cqe_compress);
 
+out:
 	mutex_unlock(&priv->state_lock);
-	return err ? -EINVAL : 0;
+	return err;
 }
 
 static u32 mlx5e_get_priv_flags(struct net_device *netdev)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 
-	return priv->pflags;
+	return priv->params.pflags;
 }
 
 static int mlx5e_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
@@ -1535,5 +1603,6 @@ const struct ethtool_ops mlx5e_ethtool_ops = {
 	.get_module_info   = mlx5e_get_module_info,
 	.get_module_eeprom = mlx5e_get_module_eeprom,
 	.get_priv_flags    = mlx5e_get_priv_flags,
-	.set_priv_flags    = mlx5e_set_priv_flags
+	.set_priv_flags    = mlx5e_set_priv_flags,
+	.self_test         = mlx5e_self_test,
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
index 36fbc6b21a33..1fe80de5d68f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c
@@ -158,9 +158,14 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
 				 enum mlx5e_vlan_rule_type rule_type,
 				 u16 vid, struct mlx5_flow_spec *spec)
 {
+	struct mlx5_flow_act flow_act = {
+		.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+		.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG,
+		.encap_id = 0,
+	};
 	struct mlx5_flow_table *ft = priv->fs.vlan.ft.t;
 	struct mlx5_flow_destination dest;
-	struct mlx5_flow_rule **rule_p;
+	struct mlx5_flow_handle **rule_p;
 	int err = 0;
 
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
@@ -187,10 +192,7 @@ static int __mlx5e_add_vlan_rule(struct mlx5e_priv *priv,
 		break;
 	}
 
-	*rule_p = mlx5_add_flow_rule(ft, spec,
-				     MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-				     MLX5_FS_DEFAULT_FLOW_TAG,
-				     &dest);
+	*rule_p = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
 
 	if (IS_ERR(*rule_p)) {
 		err = PTR_ERR(*rule_p);
@@ -229,20 +231,20 @@ static void mlx5e_del_vlan_rule(struct mlx5e_priv *priv,
 	switch (rule_type) {
 	case MLX5E_VLAN_RULE_TYPE_UNTAGGED:
 		if (priv->fs.vlan.untagged_rule) {
-			mlx5_del_flow_rule(priv->fs.vlan.untagged_rule);
+			mlx5_del_flow_rules(priv->fs.vlan.untagged_rule);
 			priv->fs.vlan.untagged_rule = NULL;
 		}
 		break;
 	case MLX5E_VLAN_RULE_TYPE_ANY_VID:
 		if (priv->fs.vlan.any_vlan_rule) {
-			mlx5_del_flow_rule(priv->fs.vlan.any_vlan_rule);
+			mlx5_del_flow_rules(priv->fs.vlan.any_vlan_rule);
 			priv->fs.vlan.any_vlan_rule = NULL;
 		}
 		break;
 	case MLX5E_VLAN_RULE_TYPE_MATCH_VID:
 		mlx5e_vport_context_update_vlans(priv);
 		if (priv->fs.vlan.active_vlans_rule[vid]) {
-			mlx5_del_flow_rule(priv->fs.vlan.active_vlans_rule[vid]);
+			mlx5_del_flow_rules(priv->fs.vlan.active_vlans_rule[vid]);
 			priv->fs.vlan.active_vlans_rule[vid] = NULL;
 		}
 		mlx5e_vport_context_update_vlans(priv);
@@ -560,7 +562,7 @@ static void mlx5e_cleanup_ttc_rules(struct mlx5e_ttc_table *ttc)
 
 	for (i = 0; i < MLX5E_NUM_TT; i++) {
 		if (!IS_ERR_OR_NULL(ttc->rules[i])) {
-			mlx5_del_flow_rule(ttc->rules[i]);
+			mlx5_del_flow_rules(ttc->rules[i]);
 			ttc->rules[i] = NULL;
 		}
 	}
@@ -616,13 +618,19 @@ static struct {
 	},
 };
 
-static struct mlx5_flow_rule *mlx5e_generate_ttc_rule(struct mlx5e_priv *priv,
-						      struct mlx5_flow_table *ft,
-						      struct mlx5_flow_destination *dest,
-						      u16 etype,
-						      u8 proto)
+static struct mlx5_flow_handle *
+mlx5e_generate_ttc_rule(struct mlx5e_priv *priv,
+			struct mlx5_flow_table *ft,
+			struct mlx5_flow_destination *dest,
+			u16 etype,
+			u8 proto)
 {
-	struct mlx5_flow_rule *rule;
+	struct mlx5_flow_act flow_act = {
+		.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+		.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG,
+		.encap_id = 0,
+	};
+	struct mlx5_flow_handle *rule;
 	struct mlx5_flow_spec *spec;
 	int err = 0;
 
@@ -643,10 +651,7 @@ static struct mlx5_flow_rule *mlx5e_generate_ttc_rule(struct mlx5e_priv *priv,
 		MLX5_SET(fte_match_param, spec->match_value, outer_headers.ethertype, etype);
 	}
 
-	rule = mlx5_add_flow_rule(ft, spec,
-				  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-				  MLX5_FS_DEFAULT_FLOW_TAG,
-				  dest);
+	rule = mlx5_add_flow_rules(ft, spec, &flow_act, dest, 1);
 	if (IS_ERR(rule)) {
 		err = PTR_ERR(rule);
 		netdev_err(priv->netdev, "%s: add rule failed\n", __func__);
@@ -660,7 +665,7 @@ static int mlx5e_generate_ttc_table_rules(struct mlx5e_priv *priv)
 {
 	struct mlx5_flow_destination dest;
 	struct mlx5e_ttc_table *ttc;
-	struct mlx5_flow_rule **rules;
+	struct mlx5_flow_handle **rules;
 	struct mlx5_flow_table *ft;
 	int tt;
 	int err;
@@ -776,7 +781,7 @@ static int mlx5e_create_ttc_table(struct mlx5e_priv *priv)
 	int err;
 
 	ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO,
-				       MLX5E_TTC_TABLE_SIZE, MLX5E_TTC_FT_LEVEL);
+				       MLX5E_TTC_TABLE_SIZE, MLX5E_TTC_FT_LEVEL, 0);
 	if (IS_ERR(ft->t)) {
 		err = PTR_ERR(ft->t);
 		ft->t = NULL;
@@ -801,7 +806,7 @@ static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv,
 				   struct mlx5e_l2_rule *ai)
 {
 	if (!IS_ERR_OR_NULL(ai->rule)) {
-		mlx5_del_flow_rule(ai->rule);
+		mlx5_del_flow_rules(ai->rule);
 		ai->rule = NULL;
 	}
 }
@@ -809,6 +814,11 @@ static void mlx5e_del_l2_flow_rule(struct mlx5e_priv *priv,
 static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
 				  struct mlx5e_l2_rule *ai, int type)
 {
+	struct mlx5_flow_act flow_act = {
+		.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+		.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG,
+		.encap_id = 0,
+	};
 	struct mlx5_flow_table *ft = priv->fs.l2.ft.t;
 	struct mlx5_flow_destination dest;
 	struct mlx5_flow_spec *spec;
@@ -847,9 +857,7 @@ static int mlx5e_add_l2_flow_rule(struct mlx5e_priv *priv,
 		break;
 	}
 
-	ai->rule = mlx5_add_flow_rule(ft, spec,
-				      MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-				      MLX5_FS_DEFAULT_FLOW_TAG, &dest);
+	ai->rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
 	if (IS_ERR(ai->rule)) {
 		netdev_err(priv->netdev, "%s: add l2 rule(mac:%pM) failed\n",
 			   __func__, mv_dmac);
@@ -947,7 +955,7 @@ static int mlx5e_create_l2_table(struct mlx5e_priv *priv)
 
 	ft->num_groups = 0;
 	ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO,
-				       MLX5E_L2_TABLE_SIZE, MLX5E_L2_FT_LEVEL);
+				       MLX5E_L2_TABLE_SIZE, MLX5E_L2_FT_LEVEL, 0);
 
 	if (IS_ERR(ft->t)) {
 		err = PTR_ERR(ft->t);
@@ -1037,7 +1045,7 @@ static int mlx5e_create_vlan_table(struct mlx5e_priv *priv)
 
 	ft->num_groups = 0;
 	ft->t = mlx5_create_flow_table(priv->fs.ns, MLX5E_NIC_PRIO,
-				       MLX5E_VLAN_TABLE_SIZE, MLX5E_VLAN_FT_LEVEL);
+				       MLX5E_VLAN_TABLE_SIZE, MLX5E_VLAN_FT_LEVEL, 0);
 
 	if (IS_ERR(ft->t)) {
 		err = PTR_ERR(ft->t);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index d17c24227900..3691451c728c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -36,7 +36,7 @@
 struct mlx5e_ethtool_rule {
 	struct list_head             list;
 	struct ethtool_rx_flow_spec  flow_spec;
-	struct mlx5_flow_rule        *rule;
+	struct mlx5_flow_handle	     *rule;
 	struct mlx5e_ethtool_table   *eth_ft;
 };
 
@@ -99,7 +99,7 @@ static struct mlx5e_ethtool_table *get_flow_table(struct mlx5e_priv *priv,
 			   MLX5E_ETHTOOL_NUM_ENTRIES);
 	ft = mlx5_create_auto_grouped_flow_table(ns, prio,
 						 table_size,
-						 MLX5E_ETHTOOL_NUM_GROUPS, 0);
+						 MLX5E_ETHTOOL_NUM_GROUPS, 0, 0);
 	if (IS_ERR(ft))
 		return (void *)ft;
 
@@ -284,15 +284,16 @@ static bool outer_header_zero(u32 *match_criteria)
 						  size - 1);
 }
 
-static struct mlx5_flow_rule *add_ethtool_flow_rule(struct mlx5e_priv *priv,
-						    struct mlx5_flow_table *ft,
-						    struct ethtool_rx_flow_spec *fs)
+static struct mlx5_flow_handle *
+add_ethtool_flow_rule(struct mlx5e_priv *priv,
+		      struct mlx5_flow_table *ft,
+		      struct ethtool_rx_flow_spec *fs)
 {
 	struct mlx5_flow_destination *dst = NULL;
+	struct mlx5_flow_act flow_act = {0};
 	struct mlx5_flow_spec *spec;
-	struct mlx5_flow_rule *rule;
+	struct mlx5_flow_handle *rule;
 	int err = 0;
-	u32 action;
 
 	spec = mlx5_vzalloc(sizeof(*spec));
 	if (!spec)
@@ -303,7 +304,7 @@ static struct mlx5_flow_rule *add_ethtool_flow_rule(struct mlx5e_priv *priv,
 		goto free;
 
 	if (fs->ring_cookie == RX_CLS_FLOW_DISC) {
-		action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+		flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
 	} else {
 		dst = kzalloc(sizeof(*dst), GFP_KERNEL);
 		if (!dst) {
@@ -313,12 +314,12 @@ static struct mlx5_flow_rule *add_ethtool_flow_rule(struct mlx5e_priv *priv,
 
 		dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR;
 		dst->tir_num = priv->direct_tir[fs->ring_cookie].tirn;
-		action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+		flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 	}
 
 	spec->match_criteria_enable = (!outer_header_zero(spec->match_criteria));
-	rule = mlx5_add_flow_rule(ft, spec, action,
-				  MLX5_FS_DEFAULT_FLOW_TAG, dst);
+	flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+	rule = mlx5_add_flow_rules(ft, spec, &flow_act, dst, 1);
 	if (IS_ERR(rule)) {
 		err = PTR_ERR(rule);
 		netdev_err(priv->netdev, "%s: failed to add ethtool steering rule: %d\n",
@@ -335,7 +336,7 @@ static void del_ethtool_rule(struct mlx5e_priv *priv,
 			     struct mlx5e_ethtool_rule *eth_rule)
 {
 	if (eth_rule->rule)
-		mlx5_del_flow_rule(eth_rule->rule);
+		mlx5_del_flow_rules(eth_rule->rule);
 	list_del(&eth_rule->list);
 	priv->fs.ethtool.tot_num_rules--;
 	put_flow_table(eth_rule->eth_ft);
@@ -475,7 +476,7 @@ int mlx5e_ethtool_flow_replace(struct mlx5e_priv *priv,
 {
 	struct mlx5e_ethtool_table *eth_ft;
 	struct mlx5e_ethtool_rule *eth_rule;
-	struct mlx5_flow_rule *rule;
+	struct mlx5_flow_handle *rule;
 	int num_tuples;
 	int err;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 84e8b250e2af..cbfa38fc72c0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -84,7 +84,8 @@ static void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type)
 	switch (priv->params.rq_wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
 		priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW;
-		priv->params.mpwqe_log_stride_sz = priv->params.rx_cqe_compress ?
+		priv->params.mpwqe_log_stride_sz =
+			MLX5E_GET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS) ?
 			MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS :
 			MLX5_MPWRQ_LOG_STRIDE_SIZE;
 		priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ -
@@ -101,7 +102,7 @@ static void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type)
 		       priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ,
 		       BIT(priv->params.log_rq_size),
 		       BIT(priv->params.mpwqe_log_stride_sz),
-		       priv->params.rx_cqe_compress_admin);
+		       MLX5E_GET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS));
 }
 
 static void mlx5e_set_rq_priv_params(struct mlx5e_priv *priv)
@@ -290,12 +291,36 @@ static void mlx5e_update_q_counter(struct mlx5e_priv *priv)
 				      &qcnt->rx_out_of_buffer);
 }
 
+static void mlx5e_update_pcie_counters(struct mlx5e_priv *priv)
+{
+	struct mlx5e_pcie_stats *pcie_stats = &priv->stats.pcie;
+	struct mlx5_core_dev *mdev = priv->mdev;
+	int sz = MLX5_ST_SZ_BYTES(mpcnt_reg);
+	void *out;
+	u32 *in;
+
+	in = mlx5_vzalloc(sz);
+	if (!in)
+		return;
+
+	out = pcie_stats->pcie_perf_counters;
+	MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_PERFORMANCE_COUNTERS_GROUP);
+	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0);
+
+	out = pcie_stats->pcie_tas_counters;
+	MLX5_SET(mpcnt_reg, in, grp, MLX5_PCIE_TIMERS_AND_STATES_COUNTERS_GROUP);
+	mlx5_core_access_reg(mdev, in, sz, out, sz, MLX5_REG_MPCNT, 0, 0);
+
+	kvfree(in);
+}
+
 void mlx5e_update_stats(struct mlx5e_priv *priv)
 {
 	mlx5e_update_q_counter(priv);
 	mlx5e_update_vport_counters(priv);
 	mlx5e_update_pport_counters(priv);
 	mlx5e_update_sw_counters(priv);
+	mlx5e_update_pcie_counters(priv);
 }
 
 void mlx5e_update_stats_work(struct work_struct *work)
@@ -446,14 +471,50 @@ static void mlx5e_rq_free_mpwqe_info(struct mlx5e_rq *rq)
 	kfree(rq->mpwqe.info);
 }
 
-static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv)
+static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv,
+				 u64 npages, u8 page_shift,
+				 struct mlx5_core_mkey *umr_mkey)
 {
-	struct mlx5_eswitch_rep *rep = (struct mlx5_eswitch_rep *)priv->ppriv;
+	struct mlx5_core_dev *mdev = priv->mdev;
+	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
+	void *mkc;
+	u32 *in;
+	int err;
 
-	if (rep && rep->vport != FDB_UPLINK_VPORT)
-		return true;
+	if (!MLX5E_VALID_NUM_MTTS(npages))
+		return -EINVAL;
 
-	return false;
+	in = mlx5_vzalloc(inlen);
+	if (!in)
+		return -ENOMEM;
+
+	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
+
+	MLX5_SET(mkc, mkc, free, 1);
+	MLX5_SET(mkc, mkc, umr_en, 1);
+	MLX5_SET(mkc, mkc, lw, 1);
+	MLX5_SET(mkc, mkc, lr, 1);
+	MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT);
+
+	MLX5_SET(mkc, mkc, qpn, 0xffffff);
+	MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.pdn);
+	MLX5_SET64(mkc, mkc, len, npages << page_shift);
+	MLX5_SET(mkc, mkc, translations_octword_size,
+		 MLX5_MTT_OCTW(npages));
+	MLX5_SET(mkc, mkc, log_page_size, page_shift);
+
+	err = mlx5_core_create_mkey(mdev, umr_mkey, in, inlen);
+
+	kvfree(in);
+	return err;
+}
+
+static int mlx5e_create_rq_umr_mkey(struct mlx5e_rq *rq)
+{
+	struct mlx5e_priv *priv = rq->priv;
+	u64 num_mtts = MLX5E_REQUIRED_MTTS(BIT(priv->params.log_rq_size));
+
+	return mlx5e_create_umr_mkey(priv, num_mtts, PAGE_SHIFT, &rq->umr_mkey);
 }
 
 static int mlx5e_create_rq(struct mlx5e_channel *c,
@@ -489,7 +550,13 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
 	rq->channel = c;
 	rq->ix      = c->ix;
 	rq->priv    = c->priv;
-	rq->xdp_prog = priv->xdp_prog;
+
+	rq->xdp_prog = priv->xdp_prog ? bpf_prog_inc(priv->xdp_prog) : NULL;
+	if (IS_ERR(rq->xdp_prog)) {
+		err = PTR_ERR(rq->xdp_prog);
+		rq->xdp_prog = NULL;
+		goto err_rq_wq_destroy;
+	}
 
 	rq->buff.map_dir = DMA_FROM_DEVICE;
 	if (rq->xdp_prog)
@@ -506,18 +573,20 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
 		rq->alloc_wqe = mlx5e_alloc_rx_mpwqe;
 		rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe;
 
-		rq->mpwqe.mtt_offset = c->ix *
-			MLX5E_REQUIRED_MTTS(1, BIT(priv->params.log_rq_size));
-
 		rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz);
 		rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides);
 
 		rq->buff.wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides;
 		byte_count = rq->buff.wqe_sz;
-		rq->mkey_be = cpu_to_be32(c->priv->umr_mkey.key);
-		err = mlx5e_rq_alloc_mpwqe_info(rq, c);
+
+		err = mlx5e_create_rq_umr_mkey(rq);
 		if (err)
 			goto err_rq_wq_destroy;
+		rq->mkey_be = cpu_to_be32(rq->umr_mkey.key);
+
+		err = mlx5e_rq_alloc_mpwqe_info(rq, c);
+		if (err)
+			goto err_destroy_umr_mkey;
 		break;
 	default: /* MLX5_WQ_TYPE_LINKED_LIST */
 		rq->dma_info = kzalloc_node(wq_sz * sizeof(*rq->dma_info),
@@ -566,12 +635,14 @@ static int mlx5e_create_rq(struct mlx5e_channel *c,
 	rq->page_cache.head = 0;
 	rq->page_cache.tail = 0;
 
-	if (rq->xdp_prog)
-		bpf_prog_add(rq->xdp_prog, 1);
-
 	return 0;
 
+err_destroy_umr_mkey:
+	mlx5_core_destroy_mkey(mdev, &rq->umr_mkey);
+
 err_rq_wq_destroy:
+	if (rq->xdp_prog)
+		bpf_prog_put(rq->xdp_prog);
 	mlx5_wq_destroy(&rq->wq_ctrl);
 
 	return err;
@@ -587,6 +658,7 @@ static void mlx5e_destroy_rq(struct mlx5e_rq *rq)
 	switch (rq->wq_type) {
 	case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
 		mlx5e_rq_free_mpwqe_info(rq);
+		mlx5_core_destroy_mkey(rq->priv->mdev, &rq->umr_mkey);
 		break;
 	default: /* MLX5_WQ_TYPE_LINKED_LIST */
 		kfree(rq->dma_info);
@@ -759,6 +831,7 @@ static int mlx5e_open_rq(struct mlx5e_channel *c,
 	if (err)
 		goto err_destroy_rq;
 
+	set_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
 	err = mlx5e_modify_rq_state(rq, MLX5_RQC_STATE_RST, MLX5_RQC_STATE_RDY);
 	if (err)
 		goto err_disable_rq;
@@ -773,6 +846,7 @@ static int mlx5e_open_rq(struct mlx5e_channel *c,
 	return 0;
 
 err_disable_rq:
+	clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
 	mlx5e_disable_rq(rq);
 err_destroy_rq:
 	mlx5e_destroy_rq(rq);
@@ -782,7 +856,7 @@ err_destroy_rq:
 
 static void mlx5e_close_rq(struct mlx5e_rq *rq)
 {
-	set_bit(MLX5E_RQ_STATE_FLUSH, &rq->state);
+	clear_bit(MLX5E_RQ_STATE_ENABLED, &rq->state);
 	napi_synchronize(&rq->channel->napi); /* prevent mlx5e_post_rx_wqes */
 	cancel_work_sync(&rq->am.work);
 
@@ -938,7 +1012,7 @@ static int mlx5e_create_sq(struct mlx5e_channel *c,
 	sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2;
 	sq->max_inline  = param->max_inline;
 	sq->min_inline_mode =
-		MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5E_INLINE_MODE_VPORT_CONTEXT ?
+		MLX5_CAP_ETH(mdev, wqe_inline_mode) == MLX5_CAP_INLINE_MODE_VPORT_CONTEXT ?
 		param->min_inline_mode : 0;
 
 	err = mlx5e_alloc_sq_db(sq, cpu_to_node(c->cpu));
@@ -1006,7 +1080,6 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param)
 	MLX5_SET(sqc,  sqc, min_wqe_inline_mode, sq->min_inline_mode);
 	MLX5_SET(sqc,  sqc, state,		MLX5_SQC_STATE_RST);
 	MLX5_SET(sqc,  sqc, tis_lst_sz, param->type == MLX5E_SQ_ICO ? 0 : 1);
-	MLX5_SET(sqc,  sqc, flush_in_error_en,	1);
 
 	MLX5_SET(wq,   wq, wq_type,       MLX5_WQ_TYPE_CYCLIC);
 	MLX5_SET(wq,   wq, uar_page,      sq->uar.index);
@@ -1083,6 +1156,7 @@ static int mlx5e_open_sq(struct mlx5e_channel *c,
 	if (err)
 		goto err_destroy_sq;
 
+	set_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
 	err = mlx5e_modify_sq(sq, MLX5_SQC_STATE_RST, MLX5_SQC_STATE_RDY,
 			      false, 0);
 	if (err)
@@ -1096,6 +1170,7 @@ static int mlx5e_open_sq(struct mlx5e_channel *c,
 	return 0;
 
 err_disable_sq:
+	clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
 	mlx5e_disable_sq(sq);
 err_destroy_sq:
 	mlx5e_destroy_sq(sq);
@@ -1112,7 +1187,7 @@ static inline void netif_tx_disable_queue(struct netdev_queue *txq)
 
 static void mlx5e_close_sq(struct mlx5e_sq *sq)
 {
-	set_bit(MLX5E_SQ_STATE_FLUSH, &sq->state);
+	clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
 	/* prevent netif_tx_wake_queue */
 	napi_synchronize(&sq->channel->napi);
 
@@ -1181,7 +1256,7 @@ static int mlx5e_create_cq(struct mlx5e_channel *c,
 
 static void mlx5e_destroy_cq(struct mlx5e_cq *cq)
 {
-	mlx5_wq_destroy(&cq->wq_ctrl);
+	mlx5_cqwq_destroy(&cq->wq_ctrl);
 }
 
 static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
@@ -1198,7 +1273,7 @@ static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
 	int err;
 
 	inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
-		sizeof(u64) * cq->wq_ctrl.buf.npages;
+		sizeof(u64) * cq->wq_ctrl.frag_buf.npages;
 	in = mlx5_vzalloc(inlen);
 	if (!in)
 		return -ENOMEM;
@@ -1207,15 +1282,15 @@ static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param)
 
 	memcpy(cqc, param->cqc, sizeof(param->cqc));
 
-	mlx5_fill_page_array(&cq->wq_ctrl.buf,
-			     (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));
+	mlx5_fill_page_frag_array(&cq->wq_ctrl.frag_buf,
+				  (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas));
 
 	mlx5_vector2eqn(mdev, param->eq_ix, &eqn, &irqn_not_used);
 
 	MLX5_SET(cqc,   cqc, cq_period_mode, param->cq_period_mode);
 	MLX5_SET(cqc,   cqc, c_eqn,         eqn);
 	MLX5_SET(cqc,   cqc, uar_page,      mcq->uar->index);
-	MLX5_SET(cqc,   cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
+	MLX5_SET(cqc,   cqc, log_page_size, cq->wq_ctrl.frag_buf.page_shift -
 					    MLX5_ADAPTER_PAGE_SHIFT);
 	MLX5_SET64(cqc, cqc, dbr_addr,      cq->wq_ctrl.db.dma);
 
@@ -1533,7 +1608,6 @@ err_close_icosq_cq:
 
 err_napi_del:
 	netif_napi_del(&c->napi);
-	napi_hash_del(&c->napi);
 	kfree(c);
 
 	return err;
@@ -1554,9 +1628,6 @@ static void mlx5e_close_channel(struct mlx5e_channel *c)
 	mlx5e_close_cq(&c->icosq.cq);
 	netif_napi_del(&c->napi);
 
-	napi_hash_del(&c->napi);
-	synchronize_rcu();
-
 	kfree(c);
 }
 
@@ -1649,7 +1720,7 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv,
 	}
 
 	MLX5_SET(cqc, cqc, log_cq_size, log_cq_size);
-	if (priv->params.rx_cqe_compress) {
+	if (MLX5E_GET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS)) {
 		MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM);
 		MLX5_SET(cqc, cqc, cqe_comp_en, 1);
 	}
@@ -2121,7 +2192,7 @@ int mlx5e_open_locked(struct net_device *netdev)
 		goto err_clear_state_opened_flag;
 	}
 
-	err = mlx5e_refresh_tirs_self_loopback_enable(priv->mdev);
+	err = mlx5e_refresh_tirs_self_loopback(priv->mdev, false);
 	if (err) {
 		netdev_err(netdev, "%s: mlx5e_refresh_tirs_self_loopback_enable failed, %d\n",
 			   __func__, err);
@@ -2639,7 +2710,7 @@ mqprio:
 	return mlx5e_setup_tc(dev, tc->tc);
 }
 
-struct rtnl_link_stats64 *
+static struct rtnl_link_stats64 *
 mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
@@ -2647,13 +2718,20 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
 	struct mlx5e_vport_stats *vstats = &priv->stats.vport;
 	struct mlx5e_pport_stats *pstats = &priv->stats.pport;
 
-	stats->rx_packets = sstats->rx_packets;
-	stats->rx_bytes   = sstats->rx_bytes;
-	stats->tx_packets = sstats->tx_packets;
-	stats->tx_bytes   = sstats->tx_bytes;
+	if (mlx5e_is_uplink_rep(priv)) {
+		stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok);
+		stats->rx_bytes   = PPORT_802_3_GET(pstats, a_octets_received_ok);
+		stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok);
+		stats->tx_bytes   = PPORT_802_3_GET(pstats, a_octets_transmitted_ok);
+	} else {
+		stats->rx_packets = sstats->rx_packets;
+		stats->rx_bytes   = sstats->rx_bytes;
+		stats->tx_packets = sstats->tx_packets;
+		stats->tx_bytes   = sstats->tx_bytes;
+		stats->tx_dropped = sstats->tx_queue_dropped;
+	}
 
 	stats->rx_dropped = priv->stats.qcnt.rx_out_of_buffer;
-	stats->tx_dropped = sstats->tx_queue_dropped;
 
 	stats->rx_length_errors =
 		PPORT_802_3_GET(pstats, a_in_range_length_errors) +
@@ -2850,31 +2928,13 @@ static int mlx5e_set_features(struct net_device *netdev,
 	return err ? -EINVAL : 0;
 }
 
-#define MXL5_HW_MIN_MTU 64
-#define MXL5E_MIN_MTU (MXL5_HW_MIN_MTU + ETH_FCS_LEN)
-
 static int mlx5e_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
-	struct mlx5_core_dev *mdev = priv->mdev;
 	bool was_opened;
-	u16 max_mtu;
-	u16 min_mtu;
 	int err = 0;
 	bool reset;
 
-	mlx5_query_port_max_mtu(mdev, &max_mtu, 1);
-
-	max_mtu = MLX5E_HW2SW_MTU(max_mtu);
-	min_mtu = MLX5E_HW2SW_MTU(MXL5E_MIN_MTU);
-
-	if (new_mtu > max_mtu || new_mtu < min_mtu) {
-		netdev_err(netdev,
-			   "%s: Bad MTU (%d), valid range is: [%d..%d]\n",
-			   __func__, new_mtu, min_mtu, max_mtu);
-		return -EINVAL;
-	}
-
 	mutex_lock(&priv->state_lock);
 
 	reset = !priv->params.lro_en &&
@@ -2944,6 +3004,20 @@ static int mlx5e_set_vf_trust(struct net_device *dev, int vf, bool setting)
 
 	return mlx5_eswitch_set_vport_trust(mdev->priv.eswitch, vf + 1, setting);
 }
+
+static int mlx5e_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
+			     int max_tx_rate)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+
+	if (min_tx_rate)
+		return -EOPNOTSUPP;
+
+	return mlx5_eswitch_set_vport_rate(mdev->priv.eswitch, vf + 1,
+					   max_tx_rate);
+}
+
 static int mlx5_vport_link2ifla(u8 esw_link)
 {
 	switch (esw_link) {
@@ -3000,8 +3074,8 @@ static int mlx5e_get_vf_stats(struct net_device *dev,
 					    vf_stats);
 }
 
-static void mlx5e_add_vxlan_port(struct net_device *netdev,
-				 struct udp_tunnel_info *ti)
+void mlx5e_add_vxlan_port(struct net_device *netdev,
+			  struct udp_tunnel_info *ti)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 
@@ -3014,8 +3088,8 @@ static void mlx5e_add_vxlan_port(struct net_device *netdev,
 	mlx5e_vxlan_queue_work(priv, ti->sa_family, be16_to_cpu(ti->port), 1);
 }
 
-static void mlx5e_del_vxlan_port(struct net_device *netdev,
-				 struct udp_tunnel_info *ti)
+void mlx5e_del_vxlan_port(struct net_device *netdev,
+			  struct udp_tunnel_info *ti)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 
@@ -3092,7 +3166,7 @@ static void mlx5e_tx_timeout(struct net_device *dev)
 		if (!netif_xmit_stopped(netdev_get_tx_queue(dev, i)))
 			continue;
 		sched_work = true;
-		set_bit(MLX5E_SQ_STATE_FLUSH, &sq->state);
+		clear_bit(MLX5E_SQ_STATE_ENABLED, &sq->state);
 		netdev_err(dev, "TX timeout on queue: %d, SQ: 0x%x, CQ: 0x%x, SQ Cons: 0x%x SQ Prod: 0x%x\n",
 			   i, sq->sqn, sq->cq.mcq.cqn, sq->cc, sq->pc);
 	}
@@ -3109,6 +3183,11 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
 	bool reset, was_opened;
 	int i;
 
+	if (prog && prog->xdp_adjust_head) {
+		netdev_err(netdev, "Does not support bpf_xdp_adjust_head()\n");
+		return -EOPNOTSUPP;
+	}
+
 	mutex_lock(&priv->state_lock);
 
 	if ((netdev->features & NETIF_F_LRO) && prog) {
@@ -3123,11 +3202,21 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
 
 	if (was_opened && reset)
 		mlx5e_close_locked(netdev);
+	if (was_opened && !reset) {
+		/* num_channels is invariant here, so we can take the
+		 * batched reference right upfront.
+		 */
+		prog = bpf_prog_add(prog, priv->params.num_channels);
+		if (IS_ERR(prog)) {
+			err = PTR_ERR(prog);
+			goto unlock;
+		}
+	}
 
-	/* exchange programs */
+	/* exchange programs, extra prog reference we got from caller
+	 * as long as we don't fail from this point onwards.
+	 */
 	old_prog = xchg(&priv->xdp_prog, prog);
-	if (prog)
-		bpf_prog_add(prog, 1);
 	if (old_prog)
 		bpf_prog_put(old_prog);
 
@@ -3143,17 +3232,16 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
 	/* exchanging programs w/o reset, we update ref counts on behalf
 	 * of the channels RQs here.
 	 */
-	bpf_prog_add(prog, priv->params.num_channels);
 	for (i = 0; i < priv->params.num_channels; i++) {
 		struct mlx5e_channel *c = priv->channel[i];
 
-		set_bit(MLX5E_RQ_STATE_FLUSH, &c->rq.state);
+		clear_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state);
 		napi_synchronize(&c->napi);
 		/* prevent mlx5e_poll_rx_cq from accessing rq->xdp_prog */
 
 		old_prog = xchg(&c->rq.xdp_prog, prog);
 
-		clear_bit(MLX5E_RQ_STATE_FLUSH, &c->rq.state);
+		set_bit(MLX5E_RQ_STATE_ENABLED, &c->rq.state);
 		/* napi_schedule in case we have missed anything */
 		set_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags);
 		napi_schedule(&c->napi);
@@ -3251,6 +3339,7 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = {
 	.ndo_set_vf_vlan         = mlx5e_set_vf_vlan,
 	.ndo_set_vf_spoofchk     = mlx5e_set_vf_spoofchk,
 	.ndo_set_vf_trust        = mlx5e_set_vf_trust,
+	.ndo_set_vf_rate         = mlx5e_set_vf_rate,
 	.ndo_get_vf_config       = mlx5e_get_vf_config,
 	.ndo_set_vf_link_state   = mlx5e_set_vf_link_state,
 	.ndo_get_vf_stats        = mlx5e_get_vf_stats,
@@ -3259,6 +3348,8 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller     = mlx5e_netpoll,
 #endif
+	.ndo_has_offload_stats	 = mlx5e_has_offload_stats,
+	.ndo_get_offload_stats	 = mlx5e_get_offload_stats,
 };
 
 static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev)
@@ -3295,24 +3386,6 @@ u16 mlx5e_get_max_inline_cap(struct mlx5_core_dev *mdev)
 	       2 /*sizeof(mlx5e_tx_wqe.inline_hdr_start)*/;
 }
 
-#ifdef CONFIG_MLX5_CORE_EN_DCB
-static void mlx5e_ets_init(struct mlx5e_priv *priv)
-{
-	int i;
-
-	priv->params.ets.ets_cap = mlx5_max_tc(priv->mdev) + 1;
-	for (i = 0; i < priv->params.ets.ets_cap; i++) {
-		priv->params.ets.tc_tx_bw[i] = MLX5E_MAX_BW_ALLOC;
-		priv->params.ets.tc_tsa[i] = IEEE_8021QAZ_TSA_VENDOR;
-		priv->params.ets.prio_tc[i] = i;
-	}
-
-	/* tclass[prio=0]=1, tclass[prio=1]=0, tclass[prio=i]=i (for i>1) */
-	priv->params.ets.prio_tc[0] = 1;
-	priv->params.ets.prio_tc[1] = 0;
-}
-#endif
-
 void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev,
 				   u32 *indirection_rqt, int len,
 				   int num_channels)
@@ -3387,14 +3460,13 @@ static void mlx5e_query_min_inline(struct mlx5_core_dev *mdev,
 				   u8 *min_inline_mode)
 {
 	switch (MLX5_CAP_ETH(mdev, wqe_inline_mode)) {
-	case MLX5E_INLINE_MODE_L2:
+	case MLX5_CAP_INLINE_MODE_L2:
 		*min_inline_mode = MLX5_INLINE_MODE_L2;
 		break;
-	case MLX5E_INLINE_MODE_VPORT_CONTEXT:
-		mlx5_query_nic_vport_min_inline(mdev,
-						min_inline_mode);
+	case MLX5_CAP_INLINE_MODE_VPORT_CONTEXT:
+		mlx5_query_nic_vport_min_inline(mdev, 0, min_inline_mode);
 		break;
-	case MLX5_INLINE_MODE_NOT_REQUIRED:
+	case MLX5_CAP_INLINE_MODE_NOT_REQUIRED:
 		*min_inline_mode = MLX5_INLINE_MODE_NONE;
 		break;
 	}
@@ -3436,17 +3508,16 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev,
 	priv->params.log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE;
 
 	/* set CQE compression */
-	priv->params.rx_cqe_compress_admin = false;
+	priv->params.rx_cqe_compress_def = false;
 	if (MLX5_CAP_GEN(mdev, cqe_compression) &&
 	    MLX5_CAP_GEN(mdev, vport_group_manager)) {
 		mlx5e_get_max_linkspeed(mdev, &link_speed);
 		mlx5e_get_pci_bw(mdev, &pci_bw);
 		mlx5_core_dbg(mdev, "Max link speed = %d, PCI BW = %d\n",
 			      link_speed, pci_bw);
-		priv->params.rx_cqe_compress_admin =
+		priv->params.rx_cqe_compress_def =
 			cqe_compress_heuristic(link_speed, pci_bw);
 	}
-	priv->params.rx_cqe_compress = priv->params.rx_cqe_compress_admin;
 
 	mlx5e_set_rq_priv_params(priv);
 	if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ)
@@ -3477,12 +3548,9 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev,
 		SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 
 	/* Initialize pflags */
-	MLX5E_SET_PRIV_FLAG(priv, MLX5E_PFLAG_RX_CQE_BASED_MODER,
-			    priv->params.rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
-
-#ifdef CONFIG_MLX5_CORE_EN_DCB
-	mlx5e_ets_init(priv);
-#endif
+	MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_BASED_MODER,
+			priv->params.rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE);
+	MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS, priv->params.rx_cqe_compress_def);
 
 	mutex_init(&priv->state_lock);
 
@@ -3520,7 +3588,8 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 	if (MLX5_CAP_GEN(mdev, vport_group_manager)) {
 		netdev->netdev_ops = &mlx5e_netdev_ops_sriov;
 #ifdef CONFIG_MLX5_CORE_EN_DCB
-		netdev->dcbnl_ops = &mlx5e_dcbnl_ops;
+		if (MLX5_CAP_GEN(mdev, qos))
+			netdev->dcbnl_ops = &mlx5e_dcbnl_ops;
 #endif
 	} else {
 		netdev->netdev_ops = &mlx5e_netdev_ops_basic;
@@ -3616,43 +3685,6 @@ static void mlx5e_destroy_q_counter(struct mlx5e_priv *priv)
 	mlx5_core_dealloc_q_counter(priv->mdev, priv->q_counter);
 }
 
-static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv)
-{
-	struct mlx5_core_dev *mdev = priv->mdev;
-	u64 npages = MLX5E_REQUIRED_MTTS(priv->profile->max_nch(mdev),
-					 BIT(MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW));
-	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
-	void *mkc;
-	u32 *in;
-	int err;
-
-	in = mlx5_vzalloc(inlen);
-	if (!in)
-		return -ENOMEM;
-
-	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
-
-	npages = min_t(u32, ALIGN(U16_MAX, 4) * 2, npages);
-
-	MLX5_SET(mkc, mkc, free, 1);
-	MLX5_SET(mkc, mkc, umr_en, 1);
-	MLX5_SET(mkc, mkc, lw, 1);
-	MLX5_SET(mkc, mkc, lr, 1);
-	MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT);
-
-	MLX5_SET(mkc, mkc, qpn, 0xffffff);
-	MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.pdn);
-	MLX5_SET64(mkc, mkc, len, npages << PAGE_SHIFT);
-	MLX5_SET(mkc, mkc, translations_octword_size,
-		 MLX5_MTT_OCTW(npages));
-	MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT);
-
-	err = mlx5_core_create_mkey(mdev, &priv->umr_mkey, in, inlen);
-
-	kvfree(in);
-	return err;
-}
-
 static void mlx5e_nic_init(struct mlx5_core_dev *mdev,
 			   struct net_device *netdev,
 			   const struct mlx5e_profile *profile,
@@ -3674,6 +3706,9 @@ static void mlx5e_nic_cleanup(struct mlx5e_priv *priv)
 
 	if (MLX5_CAP_GEN(mdev, vport_group_manager))
 		mlx5_eswitch_unregister_vport_rep(esw, 0);
+
+	if (priv->xdp_prog)
+		bpf_prog_put(priv->xdp_prog);
 }
 
 static int mlx5e_init_nic_rx(struct mlx5e_priv *priv)
@@ -3756,7 +3791,7 @@ static int mlx5e_init_nic_tx(struct mlx5e_priv *priv)
 	}
 
 #ifdef CONFIG_MLX5_CORE_EN_DCB
-	mlx5e_dcbnl_ieee_setets_core(priv, &priv->params.ets);
+	mlx5e_dcbnl_initialize(priv);
 #endif
 	return 0;
 }
@@ -3784,7 +3819,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
 		rep.load = mlx5e_nic_rep_load;
 		rep.unload = mlx5e_nic_rep_unload;
 		rep.vport = FDB_UPLINK_VPORT;
-		rep.priv_data = priv;
+		rep.netdev = netdev;
 		mlx5_eswitch_register_vport_rep(esw, 0, &rep);
 	}
 }
@@ -3849,21 +3884,16 @@ int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev)
 {
 	const struct mlx5e_profile *profile;
 	struct mlx5e_priv *priv;
+	u16 max_mtu;
 	int err;
 
 	priv = netdev_priv(netdev);
 	profile = priv->profile;
 	clear_bit(MLX5E_STATE_DESTROYING, &priv->state);
 
-	err = mlx5e_create_umr_mkey(priv);
-	if (err) {
-		mlx5_core_err(mdev, "create umr mkey failed, %d\n", err);
-		goto out;
-	}
-
 	err = profile->init_tx(priv);
 	if (err)
-		goto err_destroy_umr_mkey;
+		goto out;
 
 	err = mlx5e_open_drop_rq(priv);
 	if (err) {
@@ -3879,6 +3909,11 @@ int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev)
 
 	mlx5e_init_l2_addr(priv);
 
+	/* MTU range: 68 - hw-specific max */
+	netdev->min_mtu = ETH_MIN_MTU;
+	mlx5_query_port_max_mtu(priv->mdev, &max_mtu, 1);
+	netdev->max_mtu = MLX5E_HW2SW_MTU(max_mtu);
+
 	mlx5e_set_dev_port_mtu(netdev);
 
 	if (profile->enable)
@@ -3898,9 +3933,6 @@ err_close_drop_rq:
 err_cleanup_tx:
 	profile->cleanup_tx(priv);
 
-err_destroy_umr_mkey:
-	mlx5_core_destroy_mkey(mdev, &priv->umr_mkey);
-
 out:
 	return err;
 }
@@ -3949,7 +3981,6 @@ void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev)
 	profile->cleanup_rx(priv);
 	mlx5e_close_drop_rq(priv);
 	profile->cleanup_tx(priv);
-	mlx5_core_destroy_mkey(priv->mdev, &priv->umr_mkey);
 	cancel_delayed_work_sync(&priv->update_stats_work);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index bf1c09ca73c0..850378893b25 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -72,7 +72,29 @@ static void mlx5e_rep_get_strings(struct net_device *dev,
 	}
 }
 
-static void mlx5e_update_sw_rep_counters(struct mlx5e_priv *priv)
+static void mlx5e_rep_update_hw_counters(struct mlx5e_priv *priv)
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct mlx5_eswitch_rep *rep = priv->ppriv;
+	struct rtnl_link_stats64 *vport_stats;
+	struct ifla_vf_stats vf_stats;
+	int err;
+
+	err = mlx5_eswitch_get_vport_stats(esw, rep->vport, &vf_stats);
+	if (err) {
+		pr_warn("vport %d error %d reading stats\n", rep->vport, err);
+		return;
+	}
+
+	vport_stats = &priv->stats.vf_vport;
+	/* flip tx/rx as we are reporting the counters for the switch vport */
+	vport_stats->rx_packets = vf_stats.tx_packets;
+	vport_stats->rx_bytes   = vf_stats.tx_bytes;
+	vport_stats->tx_packets = vf_stats.rx_packets;
+	vport_stats->tx_bytes   = vf_stats.rx_bytes;
+}
+
+static void mlx5e_rep_update_sw_counters(struct mlx5e_priv *priv)
 {
 	struct mlx5e_sw_stats *s = &priv->stats.sw;
 	struct mlx5e_rq_stats *rq_stats;
@@ -95,6 +117,12 @@ static void mlx5e_update_sw_rep_counters(struct mlx5e_priv *priv)
 	}
 }
 
+static void mlx5e_rep_update_stats(struct mlx5e_priv *priv)
+{
+	mlx5e_rep_update_sw_counters(priv);
+	mlx5e_rep_update_hw_counters(priv);
+}
+
 static void mlx5e_rep_get_ethtool_stats(struct net_device *dev,
 					struct ethtool_stats *stats, u64 *data)
 {
@@ -106,7 +134,7 @@ static void mlx5e_rep_get_ethtool_stats(struct net_device *dev,
 
 	mutex_lock(&priv->state_lock);
 	if (test_bit(MLX5E_STATE_OPENED, &priv->state))
-		mlx5e_update_sw_rep_counters(priv);
+		mlx5e_rep_update_sw_counters(priv);
 	mutex_unlock(&priv->state_lock);
 
 	for (i = 0; i < NUM_VPORT_REP_COUNTERS; i++)
@@ -180,7 +208,8 @@ int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv)
 
 int mlx5e_nic_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep)
 {
-	struct mlx5e_priv *priv = rep->priv_data;
+	struct net_device *netdev = rep->netdev;
+	struct mlx5e_priv *priv = netdev_priv(netdev);
 
 	if (test_bit(MLX5E_STATE_OPENED, &priv->state))
 		return mlx5e_add_sqs_fwd_rules(priv);
@@ -198,7 +227,8 @@ void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv)
 void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw,
 			  struct mlx5_eswitch_rep *rep)
 {
-	struct mlx5e_priv *priv = rep->priv_data;
+	struct net_device *netdev = rep->netdev;
+	struct mlx5e_priv *priv = netdev_priv(netdev);
 
 	if (test_bit(MLX5E_STATE_OPENED, &priv->state))
 		mlx5e_remove_sqs_fwd_rules(priv);
@@ -208,6 +238,35 @@ void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw,
 	mlx5e_tc_init(priv);
 }
 
+static int mlx5e_rep_open(struct net_device *dev)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5_eswitch_rep *rep = priv->ppriv;
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	int err;
+
+	err = mlx5e_open(dev);
+	if (err)
+		return err;
+
+	err = mlx5_eswitch_set_vport_state(esw, rep->vport, MLX5_ESW_VPORT_ADMIN_STATE_UP);
+	if (!err)
+		netif_carrier_on(dev);
+
+	return 0;
+}
+
+static int mlx5e_rep_close(struct net_device *dev)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5_eswitch_rep *rep = priv->ppriv;
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+	(void)mlx5_eswitch_set_vport_state(esw, rep->vport, MLX5_ESW_VPORT_ADMIN_STATE_DOWN);
+
+	return mlx5e_close(dev);
+}
+
 static int mlx5e_rep_get_phys_port_name(struct net_device *dev,
 					char *buf, size_t len)
 {
@@ -230,6 +289,14 @@ static int mlx5e_rep_ndo_setup_tc(struct net_device *dev, u32 handle,
 	if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS))
 		return -EOPNOTSUPP;
 
+	if (tc->egress_dev) {
+		struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+		struct net_device *uplink_dev = mlx5_eswitch_get_uplink_netdev(esw);
+
+		return uplink_dev->netdev_ops->ndo_setup_tc(uplink_dev, handle,
+							    proto, tc);
+	}
+
 	switch (tc->type) {
 	case TC_SETUP_CLSFLOWER:
 		switch (tc->cls_flower->command) {
@@ -245,17 +312,92 @@ static int mlx5e_rep_ndo_setup_tc(struct net_device *dev, u32 handle,
 	}
 }
 
+bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv)
+{
+	struct mlx5_eswitch_rep *rep = (struct mlx5_eswitch_rep *)priv->ppriv;
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+	if (rep && rep->vport == FDB_UPLINK_VPORT && esw->mode == SRIOV_OFFLOADS)
+		return true;
+
+	return false;
+}
+
+bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv)
+{
+	struct mlx5_eswitch_rep *rep = (struct mlx5_eswitch_rep *)priv->ppriv;
+
+	if (rep && rep->vport != FDB_UPLINK_VPORT)
+		return true;
+
+	return false;
+}
+
+bool mlx5e_has_offload_stats(const struct net_device *dev, int attr_id)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+
+	switch (attr_id) {
+	case IFLA_OFFLOAD_XSTATS_CPU_HIT:
+		if (mlx5e_is_vf_vport_rep(priv) || mlx5e_is_uplink_rep(priv))
+			return true;
+	}
+
+	return false;
+}
+
+static int
+mlx5e_get_sw_stats64(const struct net_device *dev,
+		     struct rtnl_link_stats64 *stats)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5e_sw_stats *sstats = &priv->stats.sw;
+
+	stats->rx_packets = sstats->rx_packets;
+	stats->rx_bytes   = sstats->rx_bytes;
+	stats->tx_packets = sstats->tx_packets;
+	stats->tx_bytes   = sstats->tx_bytes;
+
+	stats->tx_dropped = sstats->tx_queue_dropped;
+
+	return 0;
+}
+
+int mlx5e_get_offload_stats(int attr_id, const struct net_device *dev,
+			    void *sp)
+{
+	switch (attr_id) {
+	case IFLA_OFFLOAD_XSTATS_CPU_HIT:
+		return mlx5e_get_sw_stats64(dev, sp);
+	}
+
+	return -EINVAL;
+}
+
+static struct rtnl_link_stats64 *
+mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+
+	memcpy(stats, &priv->stats.vf_vport, sizeof(*stats));
+	return stats;
+}
+
 static const struct switchdev_ops mlx5e_rep_switchdev_ops = {
 	.switchdev_port_attr_get	= mlx5e_attr_get,
 };
 
 static const struct net_device_ops mlx5e_netdev_ops_rep = {
-	.ndo_open                = mlx5e_open,
-	.ndo_stop                = mlx5e_close,
+	.ndo_open                = mlx5e_rep_open,
+	.ndo_stop                = mlx5e_rep_close,
 	.ndo_start_xmit          = mlx5e_xmit,
 	.ndo_get_phys_port_name  = mlx5e_rep_get_phys_port_name,
 	.ndo_setup_tc            = mlx5e_rep_ndo_setup_tc,
-	.ndo_get_stats64         = mlx5e_get_stats,
+	.ndo_get_stats64         = mlx5e_rep_get_stats,
+	.ndo_udp_tunnel_add      = mlx5e_add_vxlan_port,
+	.ndo_udp_tunnel_del      = mlx5e_del_vxlan_port,
+	.ndo_has_offload_stats	 = mlx5e_has_offload_stats,
+	.ndo_get_offload_stats	 = mlx5e_get_offload_stats,
 };
 
 static void mlx5e_build_rep_netdev_priv(struct mlx5_core_dev *mdev,
@@ -328,7 +470,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 	struct mlx5_eswitch_rep *rep = priv->ppriv;
 	struct mlx5_core_dev *mdev = priv->mdev;
-	struct mlx5_flow_rule *flow_rule;
+	struct mlx5_flow_handle *flow_rule;
 	int err;
 	int i;
 
@@ -360,7 +502,7 @@ static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
 	return 0;
 
 err_del_flow_rule:
-	mlx5_del_flow_rule(rep->vport_rx_rule);
+	mlx5_del_flow_rules(rep->vport_rx_rule);
 err_destroy_direct_tirs:
 	mlx5e_destroy_direct_tirs(priv);
 err_destroy_direct_rqts:
@@ -375,7 +517,7 @@ static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
 	int i;
 
 	mlx5e_tc_cleanup(priv);
-	mlx5_del_flow_rule(rep->vport_rx_rule);
+	mlx5_del_flow_rules(rep->vport_rx_rule);
 	mlx5e_destroy_direct_tirs(priv);
 	for (i = 0; i < priv->params.num_channels; i++)
 		mlx5e_destroy_rqt(priv, &priv->direct_tir[i].rqt);
@@ -405,7 +547,7 @@ static struct mlx5e_profile mlx5e_rep_profile = {
 	.cleanup_rx		= mlx5e_cleanup_rep_rx,
 	.init_tx		= mlx5e_init_rep_tx,
 	.cleanup_tx		= mlx5e_cleanup_nic_tx,
-	.update_stats           = mlx5e_update_sw_rep_counters,
+	.update_stats           = mlx5e_rep_update_stats,
 	.max_nch		= mlx5e_get_rep_max_num_channels,
 	.max_tc			= 1,
 };
@@ -423,7 +565,7 @@ int mlx5e_vport_rep_load(struct mlx5_eswitch *esw,
 		return -EINVAL;
 	}
 
-	rep->priv_data = netdev_priv(netdev);
+	rep->netdev = netdev;
 
 	err = mlx5e_attach_netdev(esw->dev, netdev);
 	if (err) {
@@ -445,7 +587,7 @@ err_detach_netdev:
 	mlx5e_detach_netdev(esw->dev, netdev);
 
 err_destroy_netdev:
-	mlx5e_destroy_netdev(esw->dev, rep->priv_data);
+	mlx5e_destroy_netdev(esw->dev, netdev_priv(netdev));
 
 	return err;
 
@@ -454,10 +596,9 @@ err_destroy_netdev:
 void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw,
 			    struct mlx5_eswitch_rep *rep)
 {
-	struct mlx5e_priv *priv = rep->priv_data;
-	struct net_device *netdev = priv->netdev;
+	struct net_device *netdev = rep->netdev;
 
 	unregister_netdev(netdev);
 	mlx5e_detach_netdev(esw->dev, netdev);
-	mlx5e_destroy_netdev(esw->dev, priv);
+	mlx5e_destroy_netdev(esw->dev, netdev_priv(netdev));
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index c6de6fba5843..0e2fb3ed1790 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -164,14 +164,14 @@ void mlx5e_modify_rx_cqe_compression(struct mlx5e_priv *priv, bool val)
 
 	mutex_lock(&priv->state_lock);
 
-	if (priv->params.rx_cqe_compress == val)
+	if (MLX5E_GET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS) == val)
 		goto unlock;
 
 	was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state);
 	if (was_opened)
 		mlx5e_close_locked(priv->netdev);
 
-	priv->params.rx_cqe_compress = val;
+	MLX5E_SET_PFLAG(priv, MLX5E_PFLAG_RX_CQE_COMPRESS, val);
 
 	if (was_opened)
 		mlx5e_open_locked(priv->netdev);
@@ -340,7 +340,7 @@ static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix)
 	while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) {
 		sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP;
 		sq->db.ico_wqe[pi].num_wqebbs = 1;
-		mlx5e_send_nop(sq, true);
+		mlx5e_send_nop(sq, false);
 	}
 
 	wqe = mlx5_wq_cyc_get_wqe(wq, pi);
@@ -412,7 +412,7 @@ void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq)
 
 	clear_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state);
 
-	if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state))) {
+	if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state))) {
 		mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]);
 		return;
 	}
@@ -445,7 +445,7 @@ void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 }
 
 #define RQ_CANNOT_POST(rq) \
-	(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state) || \
+	(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state) || \
 	 test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state))
 
 bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq)
@@ -737,10 +737,10 @@ static inline
 struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
 			     u16 wqe_counter, u32 cqe_bcnt)
 {
-	struct bpf_prog *xdp_prog = READ_ONCE(rq->xdp_prog);
 	struct mlx5e_dma_info *di;
 	struct sk_buff *skb;
 	void *va, *data;
+	bool consumed;
 
 	di             = &rq->dma_info[wqe_counter];
 	va             = page_address(di->page);
@@ -759,7 +759,11 @@ struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
 		return NULL;
 	}
 
-	if (mlx5e_xdp_handle(rq, xdp_prog, di, data, cqe_bcnt))
+	rcu_read_lock();
+	consumed = mlx5e_xdp_handle(rq, READ_ONCE(rq->xdp_prog), di, data,
+				    cqe_bcnt);
+	rcu_read_unlock();
+	if (consumed)
 		return NULL; /* page/packet was consumed by XDP */
 
 	skb = build_skb(va, RQ_PAGE_SIZE(rq));
@@ -924,7 +928,7 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget)
 	struct mlx5e_sq *xdp_sq = &rq->channel->xdp_sq;
 	int work_done = 0;
 
-	if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state)))
+	if (unlikely(!test_bit(MLX5E_RQ_STATE_ENABLED, &rq->state)))
 		return 0;
 
 	if (cq->decmprs_left)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
new file mode 100644
index 000000000000..65442c36a6e1
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
@@ -0,0 +1,352 @@
+/*
+ * Copyright (c) 2016, Mellanox Technologies, Ltd.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <net/udp.h>
+#include "en.h"
+
+enum {
+	MLX5E_ST_LINK_STATE,
+	MLX5E_ST_LINK_SPEED,
+	MLX5E_ST_HEALTH_INFO,
+#ifdef CONFIG_INET
+	MLX5E_ST_LOOPBACK,
+#endif
+	MLX5E_ST_NUM,
+};
+
+const char mlx5e_self_tests[MLX5E_ST_NUM][ETH_GSTRING_LEN] = {
+	"Link Test",
+	"Speed Test",
+	"Health Test",
+#ifdef CONFIG_INET
+	"Loopback Test",
+#endif
+};
+
+int mlx5e_self_test_num(struct mlx5e_priv *priv)
+{
+	return ARRAY_SIZE(mlx5e_self_tests);
+}
+
+static int mlx5e_test_health_info(struct mlx5e_priv *priv)
+{
+	struct mlx5_core_health *health = &priv->mdev->priv.health;
+
+	return health->sick ? 1 : 0;
+}
+
+static int mlx5e_test_link_state(struct mlx5e_priv *priv)
+{
+	u8 port_state;
+
+	if (!netif_carrier_ok(priv->netdev))
+		return 1;
+
+	port_state = mlx5_query_vport_state(priv->mdev, MLX5_QUERY_VPORT_STATE_IN_OP_MOD_VNIC_VPORT, 0);
+	return port_state == VPORT_STATE_UP ? 0 : 1;
+}
+
+static int mlx5e_test_link_speed(struct mlx5e_priv *priv)
+{
+	u32 out[MLX5_ST_SZ_DW(ptys_reg)];
+	u32 eth_proto_oper;
+	int i;
+
+	if (!netif_carrier_ok(priv->netdev))
+		return 1;
+
+	if (mlx5_query_port_ptys(priv->mdev, out, sizeof(out), MLX5_PTYS_EN, 1))
+		return 1;
+
+	eth_proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper);
+	for (i = 0; i < MLX5E_LINK_MODES_NUMBER; i++) {
+		if (eth_proto_oper & MLX5E_PROT_MASK(i))
+			return 0;
+	}
+	return 1;
+}
+
+#ifdef CONFIG_INET
+/* loopback test */
+#define MLX5E_TEST_PKT_SIZE (MLX5_MPWRQ_SMALL_PACKET_THRESHOLD - NET_IP_ALIGN)
+static const char mlx5e_test_text[ETH_GSTRING_LEN] = "MLX5E SELF TEST";
+#define MLX5E_TEST_MAGIC 0x5AEED15C001ULL
+
+struct mlx5ehdr {
+	__be32 version;
+	__be64 magic;
+	char   text[ETH_GSTRING_LEN];
+};
+
+static struct sk_buff *mlx5e_test_get_udp_skb(struct mlx5e_priv *priv)
+{
+	struct sk_buff *skb = NULL;
+	struct mlx5ehdr *mlxh;
+	struct ethhdr *ethh;
+	struct udphdr *udph;
+	struct iphdr *iph;
+	int datalen, iplen;
+
+	datalen = MLX5E_TEST_PKT_SIZE -
+		  (sizeof(*ethh) + sizeof(*iph) + sizeof(*udph));
+
+	skb = netdev_alloc_skb(priv->netdev, MLX5E_TEST_PKT_SIZE);
+	if (!skb) {
+		netdev_err(priv->netdev, "\tFailed to alloc loopback skb\n");
+		return NULL;
+	}
+
+	prefetchw(skb->data);
+	skb_reserve(skb, NET_IP_ALIGN);
+
+	/*  Reserve for ethernet and IP header  */
+	ethh = (struct ethhdr *)skb_push(skb, ETH_HLEN);
+	skb_reset_mac_header(skb);
+
+	skb_set_network_header(skb, skb->len);
+	iph = (struct iphdr *)skb_put(skb, sizeof(struct iphdr));
+
+	skb_set_transport_header(skb, skb->len);
+	udph = (struct udphdr *)skb_put(skb, sizeof(struct udphdr));
+
+	/* Fill ETH header */
+	ether_addr_copy(ethh->h_dest, priv->netdev->dev_addr);
+	eth_zero_addr(ethh->h_source);
+	ethh->h_proto = htons(ETH_P_IP);
+
+	/* Fill UDP header */
+	udph->source = htons(9);
+	udph->dest = htons(9); /* Discard Protocol */
+	udph->len = htons(datalen + sizeof(struct udphdr));
+	udph->check = 0;
+
+	/* Fill IP header */
+	iph->ihl = 5;
+	iph->ttl = 32;
+	iph->version = 4;
+	iph->protocol = IPPROTO_UDP;
+	iplen = sizeof(struct iphdr) + sizeof(struct udphdr) + datalen;
+	iph->tot_len = htons(iplen);
+	iph->frag_off = 0;
+	iph->saddr = 0;
+	iph->daddr = 0;
+	iph->tos = 0;
+	iph->id = 0;
+	ip_send_check(iph);
+
+	/* Fill test header and data */
+	mlxh = (struct mlx5ehdr *)skb_put(skb, sizeof(*mlxh));
+	mlxh->version = 0;
+	mlxh->magic = cpu_to_be64(MLX5E_TEST_MAGIC);
+	strlcpy(mlxh->text, mlx5e_test_text, sizeof(mlxh->text));
+	datalen -= sizeof(*mlxh);
+	memset(skb_put(skb, datalen), 0, datalen);
+
+	skb->csum = 0;
+	skb->ip_summed = CHECKSUM_PARTIAL;
+	udp4_hwcsum(skb, iph->saddr, iph->daddr);
+
+	skb->protocol = htons(ETH_P_IP);
+	skb->pkt_type = PACKET_HOST;
+	skb->dev = priv->netdev;
+
+	return skb;
+}
+
+struct mlx5e_lbt_priv {
+	struct packet_type pt;
+	struct completion comp;
+	bool loopback_ok;
+};
+
+static int
+mlx5e_test_loopback_validate(struct sk_buff *skb,
+			     struct net_device *ndev,
+			     struct packet_type *pt,
+			     struct net_device *orig_ndev)
+{
+	struct mlx5e_lbt_priv *lbtp = pt->af_packet_priv;
+	struct mlx5ehdr *mlxh;
+	struct ethhdr *ethh;
+	struct udphdr *udph;
+	struct iphdr *iph;
+
+	/* We are only going to peek, no need to clone the SKB */
+	if (skb->protocol != htons(ETH_P_IP))
+		goto out;
+
+	if (MLX5E_TEST_PKT_SIZE - ETH_HLEN > skb_headlen(skb))
+		goto out;
+
+	ethh = (struct ethhdr *)skb_mac_header(skb);
+	if (!ether_addr_equal(ethh->h_dest, orig_ndev->dev_addr))
+		goto out;
+
+	iph = ip_hdr(skb);
+	if (iph->protocol != IPPROTO_UDP)
+		goto out;
+
+	udph = udp_hdr(skb);
+	if (udph->dest != htons(9))
+		goto out;
+
+	mlxh = (struct mlx5ehdr *)((char *)udph + sizeof(*udph));
+	if (mlxh->magic != cpu_to_be64(MLX5E_TEST_MAGIC))
+		goto out; /* so close ! */
+
+	/* bingo */
+	lbtp->loopback_ok = true;
+	complete(&lbtp->comp);
+out:
+	kfree_skb(skb);
+	return 0;
+}
+
+static int mlx5e_test_loopback_setup(struct mlx5e_priv *priv,
+				     struct mlx5e_lbt_priv *lbtp)
+{
+	int err = 0;
+
+	err = mlx5e_refresh_tirs_self_loopback(priv->mdev, true);
+	if (err) {
+		netdev_err(priv->netdev,
+			   "\tFailed to enable UC loopback err(%d)\n", err);
+		return err;
+	}
+
+	lbtp->loopback_ok = false;
+	init_completion(&lbtp->comp);
+
+	lbtp->pt.type = htons(ETH_P_ALL);
+	lbtp->pt.func = mlx5e_test_loopback_validate;
+	lbtp->pt.dev = priv->netdev;
+	lbtp->pt.af_packet_priv = lbtp;
+	dev_add_pack(&lbtp->pt);
+	return err;
+}
+
+static void mlx5e_test_loopback_cleanup(struct mlx5e_priv *priv,
+					struct mlx5e_lbt_priv *lbtp)
+{
+	dev_remove_pack(&lbtp->pt);
+	mlx5e_refresh_tirs_self_loopback(priv->mdev, false);
+}
+
+#define MLX5E_LB_VERIFY_TIMEOUT (msecs_to_jiffies(200))
+static int mlx5e_test_loopback(struct mlx5e_priv *priv)
+{
+	struct mlx5e_lbt_priv *lbtp;
+	struct sk_buff *skb = NULL;
+	int err;
+
+	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+		netdev_err(priv->netdev,
+			   "\tCan't perform loobpack test while device is down\n");
+		return -ENODEV;
+	}
+
+	lbtp = kzalloc(sizeof(*lbtp), GFP_KERNEL);
+	if (!lbtp)
+		return -ENOMEM;
+	lbtp->loopback_ok = false;
+
+	err = mlx5e_test_loopback_setup(priv, lbtp);
+	if (err)
+		goto out;
+
+	skb = mlx5e_test_get_udp_skb(priv);
+	if (!skb) {
+		err = -ENOMEM;
+		goto cleanup;
+	}
+
+	skb_set_queue_mapping(skb, 0);
+	err = dev_queue_xmit(skb);
+	if (err) {
+		netdev_err(priv->netdev,
+			   "\tFailed to xmit loopback packet err(%d)\n",
+			   err);
+		goto cleanup;
+	}
+
+	wait_for_completion_timeout(&lbtp->comp, MLX5E_LB_VERIFY_TIMEOUT);
+	err = !lbtp->loopback_ok;
+
+cleanup:
+	mlx5e_test_loopback_cleanup(priv, lbtp);
+out:
+	kfree(lbtp);
+	return err;
+}
+#endif
+
+static int (*mlx5e_st_func[MLX5E_ST_NUM])(struct mlx5e_priv *) = {
+	mlx5e_test_link_state,
+	mlx5e_test_link_speed,
+	mlx5e_test_health_info,
+#ifdef CONFIG_INET
+	mlx5e_test_loopback,
+#endif
+};
+
+void mlx5e_self_test(struct net_device *ndev, struct ethtool_test *etest,
+		     u64 *buf)
+{
+	struct mlx5e_priv *priv = netdev_priv(ndev);
+	int i;
+
+	memset(buf, 0, sizeof(u64) * MLX5E_ST_NUM);
+
+	mutex_lock(&priv->state_lock);
+	netdev_info(ndev, "Self test begin..\n");
+
+	for (i = 0; i < MLX5E_ST_NUM; i++) {
+		netdev_info(ndev, "\t[%d] %s start..\n",
+			    i, mlx5e_self_tests[i]);
+		buf[i] = mlx5e_st_func[i](priv);
+		netdev_info(ndev, "\t[%d] %s end: result(%lld)\n",
+			    i, mlx5e_self_tests[i], buf[i]);
+	}
+
+	mutex_unlock(&priv->state_lock);
+
+	for (i = 0; i < MLX5E_ST_NUM; i++) {
+		if (buf[i]) {
+			etest->flags |= ETH_TEST_FL_FAILED;
+			break;
+		}
+	}
+	netdev_info(ndev, "Self test out: status flags(0x%x)\n",
+		    etest->flags);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 57452fdc5154..f202f872f57f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -39,7 +39,7 @@
 #define MLX5E_READ_CTR32_CPU(ptr, dsc, i) \
 	(*(u32 *)((char *)ptr + dsc[i].offset))
 #define MLX5E_READ_CTR32_BE(ptr, dsc, i) \
-	be64_to_cpu(*(__be32 *)((char *)ptr + dsc[i].offset))
+	be32_to_cpu(*(__be32 *)((char *)ptr + dsc[i].offset))
 
 #define MLX5E_DECLARE_STAT(type, fld) #fld, offsetof(type, fld)
 #define MLX5E_DECLARE_RX_STAT(type, fld) "rx%d_"#fld, offsetof(type, fld)
@@ -276,6 +276,32 @@ static const struct counter_desc pport_per_prio_pfc_stats_desc[] = {
 	{ "rx_%s_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) },
 };
 
+#define PCIE_PERF_OFF(c) \
+	MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_perf_cntrs_grp_data_layout.c)
+#define PCIE_PERF_GET(pcie_stats, c) \
+	MLX5_GET(mpcnt_reg, pcie_stats->pcie_perf_counters, \
+		 counter_set.pcie_perf_cntrs_grp_data_layout.c)
+#define PCIE_TAS_OFF(c) \
+	MLX5_BYTE_OFF(mpcnt_reg, counter_set.pcie_tas_cntrs_grp_data_layout.c)
+#define PCIE_TAS_GET(pcie_stats, c) \
+	MLX5_GET(mpcnt_reg, pcie_stats->pcie_tas_counters, \
+		 counter_set.pcie_tas_cntrs_grp_data_layout.c)
+
+struct mlx5e_pcie_stats {
+	__be64 pcie_perf_counters[MLX5_ST_SZ_QW(mpcnt_reg)];
+	__be64 pcie_tas_counters[MLX5_ST_SZ_QW(mpcnt_reg)];
+};
+
+static const struct counter_desc pcie_perf_stats_desc[] = {
+	{ "rx_pci_signal_integrity", PCIE_PERF_OFF(rx_errors) },
+	{ "tx_pci_signal_integrity", PCIE_PERF_OFF(tx_errors) },
+};
+
+static const struct counter_desc pcie_tas_stats_desc[] = {
+	{ "tx_pci_transport_nonfatal_msg", PCIE_TAS_OFF(non_fatal_err_msg_sent) },
+	{ "tx_pci_transport_fatal_msg", PCIE_TAS_OFF(fatal_err_msg_sent) },
+};
+
 struct mlx5e_rq_stats {
 	u64 packets;
 	u64 bytes;
@@ -360,6 +386,8 @@ static const struct counter_desc sq_stats_desc[] = {
 #define NUM_PPORT_802_3_COUNTERS	ARRAY_SIZE(pport_802_3_stats_desc)
 #define NUM_PPORT_2863_COUNTERS		ARRAY_SIZE(pport_2863_stats_desc)
 #define NUM_PPORT_2819_COUNTERS		ARRAY_SIZE(pport_2819_stats_desc)
+#define NUM_PCIE_PERF_COUNTERS		ARRAY_SIZE(pcie_perf_stats_desc)
+#define NUM_PCIE_TAS_COUNTERS		ARRAY_SIZE(pcie_tas_stats_desc)
 #define NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS \
 	ARRAY_SIZE(pport_per_prio_traffic_stats_desc)
 #define NUM_PPORT_PER_PRIO_PFC_COUNTERS \
@@ -369,6 +397,7 @@ static const struct counter_desc sq_stats_desc[] = {
 					 NUM_PPORT_2819_COUNTERS  + \
 					 NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS * \
 					 NUM_PPORT_PRIO)
+#define NUM_PCIE_COUNTERS		(NUM_PCIE_PERF_COUNTERS + NUM_PCIE_TAS_COUNTERS)
 #define NUM_RQ_STATS			ARRAY_SIZE(rq_stats_desc)
 #define NUM_SQ_STATS			ARRAY_SIZE(sq_stats_desc)
 
@@ -377,6 +406,25 @@ struct mlx5e_stats {
 	struct mlx5e_qcounter_stats qcnt;
 	struct mlx5e_vport_stats vport;
 	struct mlx5e_pport_stats pport;
+	struct mlx5e_pcie_stats pcie;
+	struct rtnl_link_stats64 vf_vport;
+};
+
+static const struct counter_desc mlx5e_pme_status_desc[] = {
+	{ "module_plug", 0 },
+	{ "module_unplug", 8 },
+};
+
+static const struct counter_desc mlx5e_pme_error_desc[] = {
+	{ "module_pwr_budget_exd", 0 },  /* power budget exceed */
+	{ "module_long_range", 8 },      /* long range for non MLNX cable */
+	{ "module_bus_stuck", 16 },      /* bus stuck (I2C or data shorted) */
+	{ "module_no_eeprom", 24 },      /* no eeprom/retry time out */
+	{ "module_enforce_part", 32 },   /* enforce part number list */
+	{ "module_unknown_id", 40 },     /* unknown identifier */
+	{ "module_high_temp", 48 },      /* high temperature */
+	{ "module_bad_shorted", 56 },    /* bad or shorted cable/module */
+	{ "module_unknown_status", 64 },
 };
 
 #endif /* __MLX5_EN_STATS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 6bb21b31cfeb..f8829b517156 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -31,6 +31,7 @@
  */
 
 #include <net/flow_dissector.h>
+#include <net/sch_generic.h>
 #include <net/pkt_cls.h>
 #include <net/tc_act/tc_gact.h>
 #include <net/tc_act/tc_skbedit.h>
@@ -40,28 +41,43 @@
 #include <net/switchdev.h>
 #include <net/tc_act/tc_mirred.h>
 #include <net/tc_act/tc_vlan.h>
+#include <net/tc_act/tc_tunnel_key.h>
+#include <net/vxlan.h>
 #include "en.h"
 #include "en_tc.h"
 #include "eswitch.h"
+#include "vxlan.h"
 
 struct mlx5e_tc_flow {
 	struct rhash_head	node;
 	u64			cookie;
-	struct mlx5_flow_rule	*rule;
+	struct mlx5_flow_handle *rule;
+	struct list_head	encap; /* flows sharing the same encap */
 	struct mlx5_esw_flow_attr *attr;
 };
 
+enum {
+	MLX5_HEADER_TYPE_VXLAN = 0x0,
+	MLX5_HEADER_TYPE_NVGRE = 0x1,
+};
+
 #define MLX5E_TC_TABLE_NUM_ENTRIES 1024
 #define MLX5E_TC_TABLE_NUM_GROUPS 4
 
-static struct mlx5_flow_rule *mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
-						    struct mlx5_flow_spec *spec,
-						    u32 action, u32 flow_tag)
+static struct mlx5_flow_handle *
+mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
+		      struct mlx5_flow_spec *spec,
+		      u32 action, u32 flow_tag)
 {
 	struct mlx5_core_dev *dev = priv->mdev;
 	struct mlx5_flow_destination dest = { 0 };
+	struct mlx5_flow_act flow_act = {
+		.action = action,
+		.flow_tag = flow_tag,
+		.encap_id = 0,
+	};
 	struct mlx5_fc *counter = NULL;
-	struct mlx5_flow_rule *rule;
+	struct mlx5_flow_handle *rule;
 	bool table_created = false;
 
 	if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
@@ -82,7 +98,7 @@ static struct mlx5_flow_rule *mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
 							    MLX5E_TC_PRIO,
 							    MLX5E_TC_TABLE_NUM_ENTRIES,
 							    MLX5E_TC_TABLE_NUM_GROUPS,
-							    0);
+							    0, 0);
 		if (IS_ERR(priv->fs.tc.t)) {
 			netdev_err(priv->netdev,
 				   "Failed to create tc offload table\n");
@@ -94,9 +110,7 @@ static struct mlx5_flow_rule *mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
 	}
 
 	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-	rule = mlx5_add_flow_rule(priv->fs.tc.t, spec,
-				  action, flow_tag,
-				  &dest);
+	rule = mlx5_add_flow_rules(priv->fs.tc.t, spec, &flow_act, &dest, 1);
 
 	if (IS_ERR(rule))
 		goto err_add_rule;
@@ -114,9 +128,10 @@ err_create_ft:
 	return rule;
 }
 
-static struct mlx5_flow_rule *mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
-						    struct mlx5_flow_spec *spec,
-						    struct mlx5_esw_flow_attr *attr)
+static struct mlx5_flow_handle *
+mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
+		      struct mlx5_flow_spec *spec,
+		      struct mlx5_esw_flow_attr *attr)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 	int err;
@@ -128,19 +143,39 @@ static struct mlx5_flow_rule *mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 	return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
 }
 
+static void mlx5e_detach_encap(struct mlx5e_priv *priv,
+			       struct mlx5e_tc_flow *flow) {
+	struct list_head *next = flow->encap.next;
+
+	list_del(&flow->encap);
+	if (list_empty(next)) {
+		struct mlx5_encap_entry *e;
+
+		e = list_entry(next, struct mlx5_encap_entry, flows);
+		if (e->n) {
+			mlx5_encap_dealloc(priv->mdev, e->encap_id);
+			neigh_release(e->n);
+		}
+		hlist_del_rcu(&e->encap_hlist);
+		kfree(e);
+	}
+}
+
 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
-			      struct mlx5_flow_rule *rule,
-			      struct mlx5_esw_flow_attr *attr)
+			      struct mlx5e_tc_flow *flow)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 	struct mlx5_fc *counter = NULL;
 
-	counter = mlx5_flow_rule_counter(rule);
+	counter = mlx5_flow_rule_counter(flow->rule);
 
-	if (esw && esw->mode == SRIOV_OFFLOADS)
-		mlx5_eswitch_del_vlan_action(esw, attr);
+	mlx5_del_flow_rules(flow->rule);
 
-	mlx5_del_flow_rule(rule);
+	if (esw && esw->mode == SRIOV_OFFLOADS) {
+		mlx5_eswitch_del_vlan_action(esw, flow->attr);
+		if (flow->attr->action & MLX5_FLOW_CONTEXT_ACTION_ENCAP)
+			mlx5e_detach_encap(priv, flow);
+	}
 
 	mlx5_fc_destroy(priv->mdev, counter);
 
@@ -150,8 +185,125 @@ static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
 	}
 }
 
-static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
-			    struct tc_cls_flower_offload *f)
+static void parse_vxlan_attr(struct mlx5_flow_spec *spec,
+			     struct tc_cls_flower_offload *f)
+{
+	void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+				       outer_headers);
+	void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+				       outer_headers);
+	void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+				    misc_parameters);
+	void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+				    misc_parameters);
+
+	MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_protocol);
+	MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP);
+
+	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) {
+		struct flow_dissector_key_keyid *key =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_ENC_KEYID,
+						  f->key);
+		struct flow_dissector_key_keyid *mask =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_ENC_KEYID,
+						  f->mask);
+		MLX5_SET(fte_match_set_misc, misc_c, vxlan_vni,
+			 be32_to_cpu(mask->keyid));
+		MLX5_SET(fte_match_set_misc, misc_v, vxlan_vni,
+			 be32_to_cpu(key->keyid));
+	}
+}
+
+static int parse_tunnel_attr(struct mlx5e_priv *priv,
+			     struct mlx5_flow_spec *spec,
+			     struct tc_cls_flower_offload *f)
+{
+	void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+				       outer_headers);
+	void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+				       outer_headers);
+
+	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) {
+		struct flow_dissector_key_ports *key =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_ENC_PORTS,
+						  f->key);
+		struct flow_dissector_key_ports *mask =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_ENC_PORTS,
+						  f->mask);
+
+		/* Full udp dst port must be given */
+		if (memchr_inv(&mask->dst, 0xff, sizeof(mask->dst)))
+			return -EOPNOTSUPP;
+
+		/* udp src port isn't supported */
+		if (memchr_inv(&mask->src, 0, sizeof(mask->src)))
+			return -EOPNOTSUPP;
+
+		if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->dst)) &&
+		    MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap))
+			parse_vxlan_attr(spec, f);
+		else
+			return -EOPNOTSUPP;
+
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+			 udp_dport, ntohs(mask->dst));
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+			 udp_dport, ntohs(key->dst));
+
+	} else { /* udp dst port must be given */
+			return -EOPNOTSUPP;
+	}
+
+	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) {
+		struct flow_dissector_key_ipv4_addrs *key =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
+						  f->key);
+		struct flow_dissector_key_ipv4_addrs *mask =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
+						  f->mask);
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+			 src_ipv4_src_ipv6.ipv4_layout.ipv4,
+			 ntohl(mask->src));
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+			 src_ipv4_src_ipv6.ipv4_layout.ipv4,
+			 ntohl(key->src));
+
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c,
+			 dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
+			 ntohl(mask->dst));
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v,
+			 dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
+			 ntohl(key->dst));
+	}
+
+	MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ethertype);
+	MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP);
+
+	/* Enforce DMAC when offloading incoming tunneled flows.
+	 * Flow counters require a match on the DMAC.
+	 */
+	MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_47_16);
+	MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, dmac_15_0);
+	ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
+				     dmac_47_16), priv->netdev->dev_addr);
+
+	/* let software handle IP fragments */
+	MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
+	MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag, 0);
+
+	return 0;
+}
+
+static int __parse_cls_flower(struct mlx5e_priv *priv,
+			      struct mlx5_flow_spec *spec,
+			      struct tc_cls_flower_offload *f,
+			      u8 *min_inline)
 {
 	void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
 				       outer_headers);
@@ -160,6 +312,8 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec
 	u16 addr_type = 0;
 	u8 ip_proto = 0;
 
+	*min_inline = MLX5_INLINE_MODE_L2;
+
 	if (f->dissector->used_keys &
 	    ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
 	      BIT(FLOW_DISSECTOR_KEY_BASIC) |
@@ -167,18 +321,61 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec
 	      BIT(FLOW_DISSECTOR_KEY_VLAN) |
 	      BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
 	      BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
-	      BIT(FLOW_DISSECTOR_KEY_PORTS))) {
+	      BIT(FLOW_DISSECTOR_KEY_PORTS) |
+	      BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
+	      BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
+	      BIT(FLOW_DISSECTOR_KEY_ENC_PORTS)	|
+	      BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL))) {
 		netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n",
 			    f->dissector->used_keys);
 		return -EOPNOTSUPP;
 	}
 
+	if ((dissector_uses_key(f->dissector,
+				FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) ||
+	     dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_KEYID) ||
+	     dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) &&
+	    dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
+		struct flow_dissector_key_control *key =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_ENC_CONTROL,
+						  f->key);
+		switch (key->addr_type) {
+		case FLOW_DISSECTOR_KEY_IPV4_ADDRS:
+			if (parse_tunnel_attr(priv, spec, f))
+				return -EOPNOTSUPP;
+			break;
+		default:
+			return -EOPNOTSUPP;
+		}
+
+		/* In decap flow, header pointers should point to the inner
+		 * headers, outer header were already set by parse_tunnel_attr
+		 */
+		headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+					 inner_headers);
+		headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+					 inner_headers);
+	}
+
 	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_CONTROL)) {
 		struct flow_dissector_key_control *key =
 			skb_flow_dissector_target(f->dissector,
 						  FLOW_DISSECTOR_KEY_CONTROL,
 						  f->key);
+
+		struct flow_dissector_key_control *mask =
+			skb_flow_dissector_target(f->dissector,
+						  FLOW_DISSECTOR_KEY_CONTROL,
+						  f->mask);
 		addr_type = key->addr_type;
+
+		if (mask->flags & FLOW_DIS_IS_FRAGMENT) {
+			MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
+			MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
+				 key->flags & FLOW_DIS_IS_FRAGMENT);
+		}
 	}
 
 	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_BASIC)) {
@@ -201,6 +398,9 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec
 			 mask->ip_proto);
 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
 			 key->ip_proto);
+
+		if (mask->ip_proto)
+			*min_inline = MLX5_INLINE_MODE_IP;
 	}
 
 	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
@@ -271,6 +471,9 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec
 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 				    dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
 		       &key->dst, sizeof(key->dst));
+
+		if (mask->src || mask->dst)
+			*min_inline = MLX5_INLINE_MODE_IP;
 	}
 
 	if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
@@ -296,6 +499,10 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec
 		memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
 				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
 		       &key->dst, sizeof(key->dst));
+
+		if (ipv6_addr_type(&mask->src) != IPV6_ADDR_ANY ||
+		    ipv6_addr_type(&mask->dst) != IPV6_ADDR_ANY)
+			*min_inline = MLX5_INLINE_MODE_IP;
 	}
 
 	if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_PORTS)) {
@@ -336,11 +543,39 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec
 				   "Only UDP and TCP transport are supported\n");
 			return -EINVAL;
 		}
+
+		if (mask->src || mask->dst)
+			*min_inline = MLX5_INLINE_MODE_TCP_UDP;
 	}
 
 	return 0;
 }
 
+static int parse_cls_flower(struct mlx5e_priv *priv,
+			    struct mlx5_flow_spec *spec,
+			    struct tc_cls_flower_offload *f)
+{
+	struct mlx5_core_dev *dev = priv->mdev;
+	struct mlx5_eswitch *esw = dev->priv.eswitch;
+	struct mlx5_eswitch_rep *rep = priv->ppriv;
+	u8 min_inline;
+	int err;
+
+	err = __parse_cls_flower(priv, spec, f, &min_inline);
+
+	if (!err && esw->mode == SRIOV_OFFLOADS &&
+	    rep->vport != FDB_UPLINK_VPORT) {
+		if (min_inline > esw->offloads.inline_mode) {
+			netdev_warn(priv->netdev,
+				    "Flow is not offloaded due to min inline setting, required %d actual %d\n",
+				    min_inline, esw->offloads.inline_mode);
+			return -EOPNOTSUPP;
+		}
+	}
+
+	return err;
+}
+
 static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 				u32 *action, u32 *flow_tag)
 {
@@ -387,11 +622,243 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 	return 0;
 }
 
+static inline int cmp_encap_info(struct mlx5_encap_info *a,
+				 struct mlx5_encap_info *b)
+{
+	return memcmp(a, b, sizeof(*a));
+}
+
+static inline int hash_encap_info(struct mlx5_encap_info *info)
+{
+	return jhash(info, sizeof(*info), 0);
+}
+
+static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
+				   struct net_device *mirred_dev,
+				   struct net_device **out_dev,
+				   struct flowi4 *fl4,
+				   struct neighbour **out_n,
+				   __be32 *saddr,
+				   int *out_ttl)
+{
+	struct rtable *rt;
+	struct neighbour *n = NULL;
+	int ttl;
+
+#if IS_ENABLED(CONFIG_INET)
+	rt = ip_route_output_key(dev_net(mirred_dev), fl4);
+	if (IS_ERR(rt)) {
+		pr_warn("%s: no route to %pI4\n", __func__, &fl4->daddr);
+		return -EOPNOTSUPP;
+	}
+#else
+	return -EOPNOTSUPP;
+#endif
+
+	if (!switchdev_port_same_parent_id(priv->netdev, rt->dst.dev)) {
+		pr_warn("%s: Can't offload the flow, netdevices aren't on the same HW e-switch\n",
+			__func__);
+		ip_rt_put(rt);
+		return -EOPNOTSUPP;
+	}
+
+	ttl = ip4_dst_hoplimit(&rt->dst);
+	n = dst_neigh_lookup(&rt->dst, &fl4->daddr);
+	ip_rt_put(rt);
+	if (!n)
+		return -ENOMEM;
+
+	*out_n = n;
+	*saddr = fl4->saddr;
+	*out_ttl = ttl;
+	*out_dev = rt->dst.dev;
+
+	return 0;
+}
+
+static int gen_vxlan_header_ipv4(struct net_device *out_dev,
+				 char buf[],
+				 unsigned char h_dest[ETH_ALEN],
+				 int ttl,
+				 __be32 daddr,
+				 __be32 saddr,
+				 __be16 udp_dst_port,
+				 __be32 vx_vni)
+{
+	int encap_size = VXLAN_HLEN + sizeof(struct iphdr) + ETH_HLEN;
+	struct ethhdr *eth = (struct ethhdr *)buf;
+	struct iphdr  *ip = (struct iphdr *)((char *)eth + sizeof(struct ethhdr));
+	struct udphdr *udp = (struct udphdr *)((char *)ip + sizeof(struct iphdr));
+	struct vxlanhdr *vxh = (struct vxlanhdr *)((char *)udp + sizeof(struct udphdr));
+
+	memset(buf, 0, encap_size);
+
+	ether_addr_copy(eth->h_dest, h_dest);
+	ether_addr_copy(eth->h_source, out_dev->dev_addr);
+	eth->h_proto = htons(ETH_P_IP);
+
+	ip->daddr = daddr;
+	ip->saddr = saddr;
+
+	ip->ttl = ttl;
+	ip->protocol = IPPROTO_UDP;
+	ip->version = 0x4;
+	ip->ihl = 0x5;
+
+	udp->dest = udp_dst_port;
+	vxh->vx_flags = VXLAN_HF_VNI;
+	vxh->vx_vni = vxlan_vni_field(vx_vni);
+
+	return encap_size;
+}
+
+static int mlx5e_create_encap_header_ipv4(struct mlx5e_priv *priv,
+					  struct net_device *mirred_dev,
+					  struct mlx5_encap_entry *e,
+					  struct net_device **out_dev)
+{
+	int max_encap_size = MLX5_CAP_ESW(priv->mdev, max_encap_header_size);
+	struct flowi4 fl4 = {};
+	struct neighbour *n;
+	char *encap_header;
+	int encap_size;
+	__be32 saddr;
+	int ttl;
+	int err;
+
+	encap_header = kzalloc(max_encap_size, GFP_KERNEL);
+	if (!encap_header)
+		return -ENOMEM;
+
+	switch (e->tunnel_type) {
+	case MLX5_HEADER_TYPE_VXLAN:
+		fl4.flowi4_proto = IPPROTO_UDP;
+		fl4.fl4_dport = e->tun_info.tp_dst;
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+	fl4.daddr = e->tun_info.daddr;
+
+	err = mlx5e_route_lookup_ipv4(priv, mirred_dev, out_dev,
+				      &fl4, &n, &saddr, &ttl);
+	if (err)
+		goto out;
+
+	e->n = n;
+	e->out_dev = *out_dev;
+
+	if (!(n->nud_state & NUD_VALID)) {
+		err = -ENOTSUPP;
+		goto out;
+	}
+
+	neigh_ha_snapshot(e->h_dest, n, *out_dev);
+
+	switch (e->tunnel_type) {
+	case MLX5_HEADER_TYPE_VXLAN:
+		encap_size = gen_vxlan_header_ipv4(*out_dev, encap_header,
+						   e->h_dest, ttl,
+						   e->tun_info.daddr,
+						   saddr, e->tun_info.tp_dst,
+						   e->tun_info.tun_id);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	err = mlx5_encap_alloc(priv->mdev, e->tunnel_type,
+			       encap_size, encap_header, &e->encap_id);
+out:
+	kfree(encap_header);
+	return err;
+}
+
+static int mlx5e_attach_encap(struct mlx5e_priv *priv,
+			      struct ip_tunnel_info *tun_info,
+			      struct net_device *mirred_dev,
+			      struct mlx5_esw_flow_attr *attr)
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	unsigned short family = ip_tunnel_info_af(tun_info);
+	struct ip_tunnel_key *key = &tun_info->key;
+	struct mlx5_encap_info info;
+	struct mlx5_encap_entry *e;
+	struct net_device *out_dev;
+	uintptr_t hash_key;
+	bool found = false;
+	int tunnel_type;
+	int err;
+
+	/* udp dst port must be given */
+	if (!memchr_inv(&key->tp_dst, 0, sizeof(key->tp_dst)))
+		return -EOPNOTSUPP;
+
+	if (mlx5e_vxlan_lookup_port(priv, be16_to_cpu(key->tp_dst)) &&
+	    MLX5_CAP_ESW(priv->mdev, vxlan_encap_decap)) {
+		info.tp_dst = key->tp_dst;
+		info.tun_id = tunnel_id_to_key32(key->tun_id);
+		tunnel_type = MLX5_HEADER_TYPE_VXLAN;
+	} else {
+		return -EOPNOTSUPP;
+	}
+
+	switch (family) {
+	case AF_INET:
+		info.daddr = key->u.ipv4.dst;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	hash_key = hash_encap_info(&info);
+
+	hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
+				   encap_hlist, hash_key) {
+		if (!cmp_encap_info(&e->tun_info, &info)) {
+			found = true;
+			break;
+		}
+	}
+
+	if (found) {
+		attr->encap = e;
+		return 0;
+	}
+
+	e = kzalloc(sizeof(*e), GFP_KERNEL);
+	if (!e)
+		return -ENOMEM;
+
+	e->tun_info = info;
+	e->tunnel_type = tunnel_type;
+	INIT_LIST_HEAD(&e->flows);
+
+	err = mlx5e_create_encap_header_ipv4(priv, mirred_dev, e, &out_dev);
+	if (err)
+		goto out_err;
+
+	attr->encap = e;
+	hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
+
+	return err;
+
+out_err:
+	kfree(e);
+	return err;
+}
+
 static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
-				struct mlx5_esw_flow_attr *attr)
+				struct mlx5e_tc_flow *flow)
 {
+	struct mlx5_esw_flow_attr *attr = flow->attr;
+	struct ip_tunnel_info *info = NULL;
 	const struct tc_action *a;
 	LIST_HEAD(actions);
+	bool encap = false;
+	int err;
 
 	if (tc_no_actions(exts))
 		return -EINVAL;
@@ -407,22 +874,44 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 			continue;
 		}
 
-		if (is_tcf_mirred_redirect(a)) {
+		if (is_tcf_mirred_egress_redirect(a)) {
 			int ifindex = tcf_mirred_ifindex(a);
 			struct net_device *out_dev;
 			struct mlx5e_priv *out_priv;
 
 			out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex);
 
-			if (!switchdev_port_same_parent_id(priv->netdev, out_dev)) {
+			if (switchdev_port_same_parent_id(priv->netdev,
+							  out_dev)) {
+				attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+					MLX5_FLOW_CONTEXT_ACTION_COUNT;
+				out_priv = netdev_priv(out_dev);
+				attr->out_rep = out_priv->ppriv;
+			} else if (encap) {
+				err = mlx5e_attach_encap(priv, info,
+							 out_dev, attr);
+				if (err)
+					return err;
+				list_add(&flow->encap, &attr->encap->flows);
+				attr->action |= MLX5_FLOW_CONTEXT_ACTION_ENCAP |
+					MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+					MLX5_FLOW_CONTEXT_ACTION_COUNT;
+				out_priv = netdev_priv(attr->encap->out_dev);
+				attr->out_rep = out_priv->ppriv;
+			} else {
 				pr_err("devices %s %s not on same switch HW, can't offload forwarding\n",
 				       priv->netdev->name, out_dev->name);
 				return -EINVAL;
 			}
+			continue;
+		}
 
-			attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
-			out_priv = netdev_priv(out_dev);
-			attr->out_rep = out_priv->ppriv;
+		if (is_tcf_tunnel_set(a)) {
+			info = tcf_tunnel_info(a);
+			if (info)
+				encap = true;
+			else
+				return -EOPNOTSUPP;
 			continue;
 		}
 
@@ -439,6 +928,11 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts,
 			continue;
 		}
 
+		if (is_tcf_tunnel_release(a)) {
+			attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
+			continue;
+		}
+
 		return -EINVAL;
 	}
 	return 0;
@@ -453,25 +947,17 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
 	u32 flow_tag, action;
 	struct mlx5e_tc_flow *flow;
 	struct mlx5_flow_spec *spec;
-	struct mlx5_flow_rule *old = NULL;
-	struct mlx5_esw_flow_attr *old_attr = NULL;
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 
 	if (esw && esw->mode == SRIOV_OFFLOADS)
 		fdb_flow = true;
 
-	flow = rhashtable_lookup_fast(&tc->ht, &f->cookie,
-				      tc->ht_params);
-	if (flow) {
-		old = flow->rule;
-		old_attr = flow->attr;
-	} else {
-		if (fdb_flow)
-			flow = kzalloc(sizeof(*flow) + sizeof(struct mlx5_esw_flow_attr),
-				       GFP_KERNEL);
-		else
-			flow = kzalloc(sizeof(*flow), GFP_KERNEL);
-	}
+	if (fdb_flow)
+		flow = kzalloc(sizeof(*flow) +
+			       sizeof(struct mlx5_esw_flow_attr),
+			       GFP_KERNEL);
+	else
+		flow = kzalloc(sizeof(*flow), GFP_KERNEL);
 
 	spec = mlx5_vzalloc(sizeof(*spec));
 	if (!spec || !flow) {
@@ -487,7 +973,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
 
 	if (fdb_flow) {
 		flow->attr  = (struct mlx5_esw_flow_attr *)(flow + 1);
-		err = parse_tc_fdb_actions(priv, f->exts, flow->attr);
+		err = parse_tc_fdb_actions(priv, f->exts, flow);
 		if (err < 0)
 			goto err_free;
 		flow->rule = mlx5e_tc_add_fdb_flow(priv, spec, flow->attr);
@@ -508,17 +994,13 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol,
 	if (err)
 		goto err_del_rule;
 
-	if (old)
-		mlx5e_tc_del_flow(priv, old, old_attr);
-
 	goto out;
 
 err_del_rule:
-	mlx5_del_flow_rule(flow->rule);
+	mlx5_del_flow_rules(flow->rule);
 
 err_free:
-	if (!old)
-		kfree(flow);
+	kfree(flow);
 out:
 	kvfree(spec);
 	return err;
@@ -537,7 +1019,8 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv,
 
 	rhashtable_remove_fast(&tc->ht, &flow->node, tc->ht_params);
 
-	mlx5e_tc_del_flow(priv, flow->rule, flow->attr);
+	mlx5e_tc_del_flow(priv, flow);
+
 
 	kfree(flow);
 
@@ -594,7 +1077,7 @@ static void _mlx5e_tc_del_flow(void *ptr, void *arg)
 	struct mlx5e_tc_flow *flow = ptr;
 	struct mlx5e_priv *priv = arg;
 
-	mlx5e_tc_del_flow(priv, flow->rule, flow->attr);
+	mlx5e_tc_del_flow(priv, flow);
 	kfree(flow);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 70a717382357..cfb68371c397 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -409,7 +409,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 
 	sq = container_of(cq, struct mlx5e_sq, cq);
 
-	if (unlikely(test_bit(MLX5E_SQ_STATE_FLUSH, &sq->state)))
+	if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
 		return false;
 
 	npkts = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index 5703f19a6a24..e5c12a732aa1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -56,7 +56,7 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
 	struct mlx5_cqe64 *cqe;
 	u16 sqcc;
 
-	if (unlikely(test_bit(MLX5E_SQ_STATE_FLUSH, &sq->state)))
+	if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
 		return;
 
 	cqe = mlx5e_get_cqe(cq);
@@ -113,7 +113,7 @@ static inline bool mlx5e_poll_xdp_tx_cq(struct mlx5e_cq *cq)
 
 	sq = container_of(cq, struct mlx5e_sq, cq);
 
-	if (unlikely(test_bit(MLX5E_SQ_STATE_FLUSH, &sq->state)))
+	if (unlikely(!test_bit(MLX5E_SQ_STATE_ENABLED, &sq->state)))
 		return false;
 
 	/* sq->cc must be updated only after mlx5_cqwq_update_db_record(),
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index aaca09002ca6..8ffcc8808e50 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -139,6 +139,8 @@ static const char *eqe_type_str(u8 type)
 		return "MLX5_EVENT_TYPE_PORT_CHANGE";
 	case MLX5_EVENT_TYPE_GPIO_EVENT:
 		return "MLX5_EVENT_TYPE_GPIO_EVENT";
+	case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
+		return "MLX5_EVENT_TYPE_PORT_MODULE_EVENT";
 	case MLX5_EVENT_TYPE_REMOTE_CONFIG:
 		return "MLX5_EVENT_TYPE_REMOTE_CONFIG";
 	case MLX5_EVENT_TYPE_DB_BF_CONGESTION:
@@ -285,6 +287,11 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
 			mlx5_eswitch_vport_event(dev->priv.eswitch, eqe);
 			break;
 #endif
+
+		case MLX5_EVENT_TYPE_PORT_MODULE_EVENT:
+			mlx5_port_module_event(dev, eqe);
+			break;
+
 		default:
 			mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n",
 				       eqe->type, eq->eqn);
@@ -469,7 +476,7 @@ void mlx5_eq_cleanup(struct mlx5_core_dev *dev)
 int mlx5_start_eqs(struct mlx5_core_dev *dev)
 {
 	struct mlx5_eq_table *table = &dev->priv.eq_table;
-	u32 async_event_mask = MLX5_ASYNC_EVENT_MASK;
+	u64 async_event_mask = MLX5_ASYNC_EVENT_MASK;
 	int err;
 
 	if (MLX5_CAP_GEN(dev, pg))
@@ -480,6 +487,11 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
 	    mlx5_core_is_pf(dev))
 		async_event_mask |= (1ull << MLX5_EVENT_TYPE_NIC_VPORT_CHANGE);
 
+	if (MLX5_CAP_GEN(dev, port_module_event))
+		async_event_mask |= (1ull << MLX5_EVENT_TYPE_PORT_MODULE_EVENT);
+	else
+		mlx5_core_dbg(dev, "port_module_event is not set\n");
+
 	err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
 				 MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
 				 "mlx5_cmd_eq", &dev->priv.uuari.uars[0]);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index be1f7333ab7f..d6807c3cc461 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -56,7 +56,7 @@ struct esw_uc_addr {
 /* E-Switch MC FDB table hash node */
 struct esw_mc_addr { /* SRIOV only */
 	struct l2addr_node     node;
-	struct mlx5_flow_rule *uplink_rule; /* Forward to uplink rule */
+	struct mlx5_flow_handle *uplink_rule; /* Forward to uplink rule */
 	u32                    refcnt;
 };
 
@@ -65,7 +65,7 @@ struct vport_addr {
 	struct l2addr_node     node;
 	u8                     action;
 	u32                    vport;
-	struct mlx5_flow_rule *flow_rule; /* SRIOV only */
+	struct mlx5_flow_handle *flow_rule; /* SRIOV only */
 	/* A flag indicating that mac was added due to mc promiscuous vport */
 	bool mc_promisc;
 };
@@ -237,13 +237,14 @@ static void del_l2_table_entry(struct mlx5_core_dev *dev, u32 index)
 }
 
 /* E-Switch FDB */
-static struct mlx5_flow_rule *
+static struct mlx5_flow_handle *
 __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
 			 u8 mac_c[ETH_ALEN], u8 mac_v[ETH_ALEN])
 {
 	int match_header = (is_zero_ether_addr(mac_c) ? 0 :
 			    MLX5_MATCH_OUTER_HEADERS);
-	struct mlx5_flow_rule *flow_rule = NULL;
+	struct mlx5_flow_handle *flow_rule = NULL;
+	struct mlx5_flow_act flow_act = {0};
 	struct mlx5_flow_destination dest;
 	struct mlx5_flow_spec *spec;
 	void *mv_misc = NULL;
@@ -285,10 +286,10 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
 		  "\tFDB add rule dmac_v(%pM) dmac_c(%pM) -> vport(%d)\n",
 		  dmac_v, dmac_c, vport);
 	spec->match_criteria_enable = match_header;
+	flow_act.action =  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 	flow_rule =
-		mlx5_add_flow_rule(esw->fdb_table.fdb, spec,
-				   MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-				   0, &dest);
+		mlx5_add_flow_rules(esw->fdb_table.fdb, spec,
+				    &flow_act, &dest, 1);
 	if (IS_ERR(flow_rule)) {
 		esw_warn(esw->dev,
 			 "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n",
@@ -300,7 +301,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule,
 	return flow_rule;
 }
 
-static struct mlx5_flow_rule *
+static struct mlx5_flow_handle *
 esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport)
 {
 	u8 mac_c[ETH_ALEN];
@@ -309,7 +310,7 @@ esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport)
 	return __esw_fdb_set_vport_rule(esw, vport, false, mac_c, mac);
 }
 
-static struct mlx5_flow_rule *
+static struct mlx5_flow_handle *
 esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u32 vport)
 {
 	u8 mac_c[ETH_ALEN];
@@ -322,7 +323,7 @@ esw_fdb_set_vport_allmulti_rule(struct mlx5_eswitch *esw, u32 vport)
 	return __esw_fdb_set_vport_rule(esw, vport, false, mac_c, mac_v);
 }
 
-static struct mlx5_flow_rule *
+static struct mlx5_flow_handle *
 esw_fdb_set_vport_promisc_rule(struct mlx5_eswitch *esw, u32 vport)
 {
 	u8 mac_c[ETH_ALEN];
@@ -361,7 +362,7 @@ static int esw_create_legacy_fdb_table(struct mlx5_eswitch *esw, int nvports)
 	memset(flow_group_in, 0, inlen);
 
 	table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
-	fdb = mlx5_create_flow_table(root_ns, 0, table_size, 0);
+	fdb = mlx5_create_flow_table(root_ns, 0, table_size, 0, 0);
 	if (IS_ERR(fdb)) {
 		err = PTR_ERR(fdb);
 		esw_warn(dev, "Failed to create FDB Table err %d\n", err);
@@ -515,7 +516,7 @@ static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
 	del_l2_table_entry(esw->dev, esw_uc->table_index);
 
 	if (vaddr->flow_rule)
-		mlx5_del_flow_rule(vaddr->flow_rule);
+		mlx5_del_flow_rules(vaddr->flow_rule);
 	vaddr->flow_rule = NULL;
 
 	l2addr_hash_del(esw_uc);
@@ -562,7 +563,7 @@ static void update_allmulti_vports(struct mlx5_eswitch *esw,
 		case MLX5_ACTION_DEL:
 			if (!iter_vaddr)
 				continue;
-			mlx5_del_flow_rule(iter_vaddr->flow_rule);
+			mlx5_del_flow_rules(iter_vaddr->flow_rule);
 			l2addr_hash_del(iter_vaddr);
 			break;
 		}
@@ -632,7 +633,7 @@ static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
 		  esw_mc->uplink_rule);
 
 	if (vaddr->flow_rule)
-		mlx5_del_flow_rule(vaddr->flow_rule);
+		mlx5_del_flow_rules(vaddr->flow_rule);
 	vaddr->flow_rule = NULL;
 
 	/* If the multicast mac is added as a result of mc promiscuous vport,
@@ -645,7 +646,7 @@ static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
 	update_allmulti_vports(esw, vaddr, esw_mc);
 
 	if (esw_mc->uplink_rule)
-		mlx5_del_flow_rule(esw_mc->uplink_rule);
+		mlx5_del_flow_rules(esw_mc->uplink_rule);
 
 	l2addr_hash_del(esw_mc);
 	return 0;
@@ -828,14 +829,14 @@ static void esw_apply_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num,
 								UPLINK_VPORT);
 		allmulti_addr->refcnt++;
 	} else if (vport->allmulti_rule) {
-		mlx5_del_flow_rule(vport->allmulti_rule);
+		mlx5_del_flow_rules(vport->allmulti_rule);
 		vport->allmulti_rule = NULL;
 
 		if (--allmulti_addr->refcnt > 0)
 			goto promisc;
 
 		if (allmulti_addr->uplink_rule)
-			mlx5_del_flow_rule(allmulti_addr->uplink_rule);
+			mlx5_del_flow_rules(allmulti_addr->uplink_rule);
 		allmulti_addr->uplink_rule = NULL;
 	}
 
@@ -847,7 +848,7 @@ promisc:
 		vport->promisc_rule = esw_fdb_set_vport_promisc_rule(esw,
 								     vport_num);
 	} else if (vport->promisc_rule) {
-		mlx5_del_flow_rule(vport->promisc_rule);
+		mlx5_del_flow_rules(vport->promisc_rule);
 		vport->promisc_rule = NULL;
 	}
 }
@@ -1018,10 +1019,10 @@ static void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw,
 					   struct mlx5_vport *vport)
 {
 	if (!IS_ERR_OR_NULL(vport->egress.allowed_vlan))
-		mlx5_del_flow_rule(vport->egress.allowed_vlan);
+		mlx5_del_flow_rules(vport->egress.allowed_vlan);
 
 	if (!IS_ERR_OR_NULL(vport->egress.drop_rule))
-		mlx5_del_flow_rule(vport->egress.drop_rule);
+		mlx5_del_flow_rules(vport->egress.drop_rule);
 
 	vport->egress.allowed_vlan = NULL;
 	vport->egress.drop_rule = NULL;
@@ -1179,10 +1180,10 @@ static void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw,
 					    struct mlx5_vport *vport)
 {
 	if (!IS_ERR_OR_NULL(vport->ingress.drop_rule))
-		mlx5_del_flow_rule(vport->ingress.drop_rule);
+		mlx5_del_flow_rules(vport->ingress.drop_rule);
 
 	if (!IS_ERR_OR_NULL(vport->ingress.allow_rule))
-		mlx5_del_flow_rule(vport->ingress.allow_rule);
+		mlx5_del_flow_rules(vport->ingress.allow_rule);
 
 	vport->ingress.drop_rule = NULL;
 	vport->ingress.allow_rule = NULL;
@@ -1212,6 +1213,7 @@ static void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw,
 static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
 				    struct mlx5_vport *vport)
 {
+	struct mlx5_flow_act flow_act = {0};
 	struct mlx5_flow_spec *spec;
 	int err = 0;
 	u8 *smac_v;
@@ -1264,10 +1266,10 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
 	}
 
 	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
 	vport->ingress.allow_rule =
-		mlx5_add_flow_rule(vport->ingress.acl, spec,
-				   MLX5_FLOW_CONTEXT_ACTION_ALLOW,
-				   0, NULL);
+		mlx5_add_flow_rules(vport->ingress.acl, spec,
+				    &flow_act, NULL, 0);
 	if (IS_ERR(vport->ingress.allow_rule)) {
 		err = PTR_ERR(vport->ingress.allow_rule);
 		esw_warn(esw->dev,
@@ -1278,10 +1280,10 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
 	}
 
 	memset(spec, 0, sizeof(*spec));
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
 	vport->ingress.drop_rule =
-		mlx5_add_flow_rule(vport->ingress.acl, spec,
-				   MLX5_FLOW_CONTEXT_ACTION_DROP,
-				   0, NULL);
+		mlx5_add_flow_rules(vport->ingress.acl, spec,
+				    &flow_act, NULL, 0);
 	if (IS_ERR(vport->ingress.drop_rule)) {
 		err = PTR_ERR(vport->ingress.drop_rule);
 		esw_warn(esw->dev,
@@ -1301,6 +1303,7 @@ out:
 static int esw_vport_egress_config(struct mlx5_eswitch *esw,
 				   struct mlx5_vport *vport)
 {
+	struct mlx5_flow_act flow_act = {0};
 	struct mlx5_flow_spec *spec;
 	int err = 0;
 
@@ -1338,10 +1341,10 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
 	MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->info.vlan);
 
 	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
 	vport->egress.allowed_vlan =
-		mlx5_add_flow_rule(vport->egress.acl, spec,
-				   MLX5_FLOW_CONTEXT_ACTION_ALLOW,
-				   0, NULL);
+		mlx5_add_flow_rules(vport->egress.acl, spec,
+				    &flow_act, NULL, 0);
 	if (IS_ERR(vport->egress.allowed_vlan)) {
 		err = PTR_ERR(vport->egress.allowed_vlan);
 		esw_warn(esw->dev,
@@ -1353,10 +1356,10 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw,
 
 	/* Drop others rule (star rule) */
 	memset(spec, 0, sizeof(*spec));
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
 	vport->egress.drop_rule =
-		mlx5_add_flow_rule(vport->egress.acl, spec,
-				   MLX5_FLOW_CONTEXT_ACTION_DROP,
-				   0, NULL);
+		mlx5_add_flow_rules(vport->egress.acl, spec,
+				    &flow_act, NULL, 0);
 	if (IS_ERR(vport->egress.drop_rule)) {
 		err = PTR_ERR(vport->egress.drop_rule);
 		esw_warn(esw->dev,
@@ -1369,6 +1372,147 @@ out:
 	return err;
 }
 
+/* Vport QoS management */
+static int esw_create_tsar(struct mlx5_eswitch *esw)
+{
+	u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
+	struct mlx5_core_dev *dev = esw->dev;
+	int err;
+
+	if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
+		return 0;
+
+	if (esw->qos.enabled)
+		return -EEXIST;
+
+	err = mlx5_create_scheduling_element_cmd(dev,
+						 SCHEDULING_HIERARCHY_E_SWITCH,
+						 &tsar_ctx,
+						 &esw->qos.root_tsar_id);
+	if (err) {
+		esw_warn(esw->dev, "E-Switch create TSAR failed (%d)\n", err);
+		return err;
+	}
+
+	esw->qos.enabled = true;
+	return 0;
+}
+
+static void esw_destroy_tsar(struct mlx5_eswitch *esw)
+{
+	int err;
+
+	if (!esw->qos.enabled)
+		return;
+
+	err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+						  SCHEDULING_HIERARCHY_E_SWITCH,
+						  esw->qos.root_tsar_id);
+	if (err)
+		esw_warn(esw->dev, "E-Switch destroy TSAR failed (%d)\n", err);
+
+	esw->qos.enabled = false;
+}
+
+static int esw_vport_enable_qos(struct mlx5_eswitch *esw, int vport_num,
+				u32 initial_max_rate)
+{
+	u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
+	struct mlx5_vport *vport = &esw->vports[vport_num];
+	struct mlx5_core_dev *dev = esw->dev;
+	void *vport_elem;
+	int err = 0;
+
+	if (!esw->qos.enabled || !MLX5_CAP_GEN(dev, qos) ||
+	    !MLX5_CAP_QOS(dev, esw_scheduling))
+		return 0;
+
+	if (vport->qos.enabled)
+		return -EEXIST;
+
+	MLX5_SET(scheduling_context, &sched_ctx, element_type,
+		 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
+	vport_elem = MLX5_ADDR_OF(scheduling_context, &sched_ctx,
+				  element_attributes);
+	MLX5_SET(vport_element, vport_elem, vport_number, vport_num);
+	MLX5_SET(scheduling_context, &sched_ctx, parent_element_id,
+		 esw->qos.root_tsar_id);
+	MLX5_SET(scheduling_context, &sched_ctx, max_average_bw,
+		 initial_max_rate);
+
+	err = mlx5_create_scheduling_element_cmd(dev,
+						 SCHEDULING_HIERARCHY_E_SWITCH,
+						 &sched_ctx,
+						 &vport->qos.esw_tsar_ix);
+	if (err) {
+		esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n",
+			 vport_num, err);
+		return err;
+	}
+
+	vport->qos.enabled = true;
+	return 0;
+}
+
+static void esw_vport_disable_qos(struct mlx5_eswitch *esw, int vport_num)
+{
+	struct mlx5_vport *vport = &esw->vports[vport_num];
+	int err = 0;
+
+	if (!vport->qos.enabled)
+		return;
+
+	err = mlx5_destroy_scheduling_element_cmd(esw->dev,
+						  SCHEDULING_HIERARCHY_E_SWITCH,
+						  vport->qos.esw_tsar_ix);
+	if (err)
+		esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n",
+			 vport_num, err);
+
+	vport->qos.enabled = false;
+}
+
+static int esw_vport_qos_config(struct mlx5_eswitch *esw, int vport_num,
+				u32 max_rate)
+{
+	u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
+	struct mlx5_vport *vport = &esw->vports[vport_num];
+	struct mlx5_core_dev *dev = esw->dev;
+	void *vport_elem;
+	u32 bitmask = 0;
+	int err = 0;
+
+	if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
+		return -EOPNOTSUPP;
+
+	if (!vport->qos.enabled)
+		return -EIO;
+
+	MLX5_SET(scheduling_context, &sched_ctx, element_type,
+		 SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
+	vport_elem = MLX5_ADDR_OF(scheduling_context, &sched_ctx,
+				  element_attributes);
+	MLX5_SET(vport_element, vport_elem, vport_number, vport_num);
+	MLX5_SET(scheduling_context, &sched_ctx, parent_element_id,
+		 esw->qos.root_tsar_id);
+	MLX5_SET(scheduling_context, &sched_ctx, max_average_bw,
+		 max_rate);
+	bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW;
+
+	err = mlx5_modify_scheduling_element_cmd(dev,
+						 SCHEDULING_HIERARCHY_E_SWITCH,
+						 &sched_ctx,
+						 vport->qos.esw_tsar_ix,
+						 bitmask);
+	if (err) {
+		esw_warn(esw->dev, "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n",
+			 vport_num, err);
+		return err;
+	}
+
+	return 0;
+}
+
 static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN])
 {
 	((u8 *)node_guid)[7] = mac[0];
@@ -1404,6 +1548,7 @@ static void esw_apply_vport_conf(struct mlx5_eswitch *esw,
 		esw_vport_egress_config(esw, vport);
 	}
 }
+
 static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num,
 			     int enable_events)
 {
@@ -1417,6 +1562,10 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num,
 	/* Restore old vport configuration */
 	esw_apply_vport_conf(esw, vport);
 
+	/* Attach vport to the eswitch rate limiter */
+	if (esw_vport_enable_qos(esw, vport_num, vport->info.max_rate))
+		esw_warn(esw->dev, "Failed to attach vport %d to eswitch rate limiter", vport_num);
+
 	/* Sync with current vport context */
 	vport->enabled_events = enable_events;
 	vport->enabled = true;
@@ -1455,7 +1604,7 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
 	 */
 	esw_vport_change_handle_locked(vport);
 	vport->enabled_events = 0;
-
+	esw_vport_disable_qos(esw, vport_num);
 	if (vport_num && esw->mode == SRIOV_LEGACY) {
 		mlx5_modify_vport_admin_state(esw->dev,
 					      MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
@@ -1501,6 +1650,10 @@ int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode)
 	if (err)
 		goto abort;
 
+	err = esw_create_tsar(esw);
+	if (err)
+		esw_warn(esw->dev, "Failed to create eswitch TSAR");
+
 	enabled_events = (mode == SRIOV_LEGACY) ? SRIOV_VPORT_EVENTS : UC_ADDR_CHANGE;
 	for (i = 0; i <= nvfs; i++)
 		esw_enable_vport(esw, i, enabled_events);
@@ -1535,7 +1688,9 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
 		esw_disable_vport(esw, i);
 
 	if (mc_promisc && mc_promisc->uplink_rule)
-		mlx5_del_flow_rule(mc_promisc->uplink_rule);
+		mlx5_del_flow_rules(mc_promisc->uplink_rule);
+
+	esw_destroy_tsar(esw);
 
 	if (esw->mode == SRIOV_LEGACY)
 		esw_destroy_legacy_fdb_table(esw);
@@ -1627,6 +1782,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 		goto abort;
 	}
 
+	hash_init(esw->offloads.encap_tbl);
 	mutex_init(&esw->state_lock);
 
 	for (vport_num = 0; vport_num < total_vports; vport_num++) {
@@ -1642,6 +1798,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 	esw->total_vports = total_vports;
 	esw->enabled_vports = 0;
 	esw->mode = SRIOV_NONE;
+	esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE;
 
 	dev->priv.eswitch = esw;
 	return 0;
@@ -1795,6 +1952,7 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
 	ivi->qos = evport->info.qos;
 	ivi->spoofchk = evport->info.spoofchk;
 	ivi->trusted = evport->info.trusted;
+	ivi->max_tx_rate = evport->info.max_rate;
 	mutex_unlock(&esw->state_lock);
 
 	return 0;
@@ -1888,6 +2046,27 @@ int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
 	return 0;
 }
 
+int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw,
+				int vport, u32 max_rate)
+{
+	struct mlx5_vport *evport;
+	int err = 0;
+
+	if (!ESW_ALLOWED(esw))
+		return -EPERM;
+	if (!LEGAL_VPORT(esw, vport))
+		return -EINVAL;
+
+	mutex_lock(&esw->state_lock);
+	evport = &esw->vports[vport];
+	err = esw_vport_qos_config(esw, vport, max_rate);
+	if (!err)
+		evport->info.max_rate = max_rate;
+
+	mutex_unlock(&esw->state_lock);
+	return err;
+}
+
 int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
 				 int vport,
 				 struct ifla_vf_stats *vf_stats)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 2e2938e08cda..8661dd3f542c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -97,16 +97,16 @@ struct vport_ingress {
 	struct mlx5_flow_group *allow_spoofchk_only_grp;
 	struct mlx5_flow_group *allow_untagged_only_grp;
 	struct mlx5_flow_group *drop_grp;
-	struct mlx5_flow_rule  *allow_rule;
-	struct mlx5_flow_rule  *drop_rule;
+	struct mlx5_flow_handle  *allow_rule;
+	struct mlx5_flow_handle  *drop_rule;
 };
 
 struct vport_egress {
 	struct mlx5_flow_table *acl;
 	struct mlx5_flow_group *allowed_vlans_grp;
 	struct mlx5_flow_group *drop_grp;
-	struct mlx5_flow_rule  *allowed_vlan;
-	struct mlx5_flow_rule  *drop_rule;
+	struct mlx5_flow_handle  *allowed_vlan;
+	struct mlx5_flow_handle  *drop_rule;
 };
 
 struct mlx5_vport_info {
@@ -115,6 +115,7 @@ struct mlx5_vport_info {
 	u8                      qos;
 	u64                     node_guid;
 	int                     link_state;
+	u32                     max_rate;
 	bool                    spoofchk;
 	bool                    trusted;
 };
@@ -124,8 +125,8 @@ struct mlx5_vport {
 	int                     vport;
 	struct hlist_head       uc_list[MLX5_L2_ADDR_HASH_SIZE];
 	struct hlist_head       mc_list[MLX5_L2_ADDR_HASH_SIZE];
-	struct mlx5_flow_rule   *promisc_rule;
-	struct mlx5_flow_rule   *allmulti_rule;
+	struct mlx5_flow_handle *promisc_rule;
+	struct mlx5_flow_handle *allmulti_rule;
 	struct work_struct      vport_change_handler;
 
 	struct vport_ingress    ingress;
@@ -133,6 +134,11 @@ struct mlx5_vport {
 
 	struct mlx5_vport_info  info;
 
+	struct {
+		bool            enabled;
+		u32             esw_tsar_ix;
+	} qos;
+
 	bool                    enabled;
 	u16                     enabled_events;
 };
@@ -156,7 +162,7 @@ struct mlx5_eswitch_fdb {
 			struct mlx5_flow_table *fdb;
 			struct mlx5_flow_group *send_to_vport_grp;
 			struct mlx5_flow_group *miss_grp;
-			struct mlx5_flow_rule  *miss_rule;
+			struct mlx5_flow_handle *miss_rule;
 			int vlan_push_pop_refcount;
 		} offloads;
 	};
@@ -169,7 +175,7 @@ enum {
 };
 
 struct mlx5_esw_sq {
-	struct mlx5_flow_rule	*send_to_vport_rule;
+	struct mlx5_flow_handle	*send_to_vport_rule;
 	struct list_head	 list;
 };
 
@@ -180,9 +186,9 @@ struct mlx5_eswitch_rep {
 					 struct mlx5_eswitch_rep *rep);
 	u16		       vport;
 	u8		       hw_id[ETH_ALEN];
-	void		      *priv_data;
+	struct net_device      *netdev;
 
-	struct mlx5_flow_rule *vport_rx_rule;
+	struct mlx5_flow_handle *vport_rx_rule;
 	struct list_head       vport_sqs_list;
 	u16		       vlan;
 	u32		       vlan_refcount;
@@ -193,6 +199,8 @@ struct mlx5_esw_offload {
 	struct mlx5_flow_table *ft_offloads;
 	struct mlx5_flow_group *vport_rx_group;
 	struct mlx5_eswitch_rep *vport_reps;
+	DECLARE_HASHTABLE(encap_tbl, 8);
+	u8 inline_mode;
 };
 
 struct mlx5_eswitch {
@@ -209,6 +217,12 @@ struct mlx5_eswitch {
 	 */
 	struct mutex            state_lock;
 	struct esw_mc_addr      *mc_promisc;
+
+	struct {
+		bool            enabled;
+		u32             root_tsar_id;
+	} qos;
+
 	struct mlx5_esw_offload offloads;
 	int                     mode;
 };
@@ -234,6 +248,8 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
 				    int vport, bool spoofchk);
 int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw,
 				 int vport_num, bool setting);
+int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw,
+				int vport, u32 max_rate);
 int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
 				  int vport, struct ifla_vf_info *ivi);
 int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
@@ -243,11 +259,11 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
 struct mlx5_flow_spec;
 struct mlx5_esw_flow_attr;
 
-struct mlx5_flow_rule *
+struct mlx5_flow_handle *
 mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 				struct mlx5_flow_spec *spec,
 				struct mlx5_esw_flow_attr *attr);
-struct mlx5_flow_rule *
+struct mlx5_flow_handle *
 mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn);
 
 enum {
@@ -258,6 +274,24 @@ enum {
 #define MLX5_FLOW_CONTEXT_ACTION_VLAN_POP  0x40
 #define MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH 0x80
 
+struct mlx5_encap_info {
+	__be32 daddr;
+	__be32 tun_id;
+	__be16 tp_dst;
+};
+
+struct mlx5_encap_entry {
+	struct hlist_node encap_hlist;
+	struct list_head flows;
+	u32 encap_id;
+	struct neighbour *n;
+	struct mlx5_encap_info tun_info;
+	unsigned char h_dest[ETH_ALEN];	/* destination eth addr	*/
+
+	struct net_device *out_dev;
+	int tunnel_type;
+};
+
 struct mlx5_esw_flow_attr {
 	struct mlx5_eswitch_rep *in_rep;
 	struct mlx5_eswitch_rep *out_rep;
@@ -265,6 +299,7 @@ struct mlx5_esw_flow_attr {
 	int	action;
 	u16	vlan;
 	bool	vlan_handled;
+	struct mlx5_encap_entry *encap;
 };
 
 int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw,
@@ -275,11 +310,15 @@ void mlx5_eswitch_sqs2vport_stop(struct mlx5_eswitch *esw,
 
 int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode);
 int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode);
+int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode);
+int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode);
+int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode);
 void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
 				     int vport_index,
 				     struct mlx5_eswitch_rep *rep);
 void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
 				       int vport_index);
+struct net_device *mlx5_eswitch_get_uplink_netdev(struct mlx5_eswitch *esw);
 
 int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw,
 				 struct mlx5_esw_flow_attr *attr);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index d239f5d0ea36..466e161010f7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -43,33 +43,36 @@ enum {
 	FDB_SLOW_PATH
 };
 
-struct mlx5_flow_rule *
+struct mlx5_flow_handle *
 mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 				struct mlx5_flow_spec *spec,
 				struct mlx5_esw_flow_attr *attr)
 {
-	struct mlx5_flow_destination dest = { 0 };
+	struct mlx5_flow_destination dest[2] = {};
+	struct mlx5_flow_act flow_act = {0};
 	struct mlx5_fc *counter = NULL;
-	struct mlx5_flow_rule *rule;
+	struct mlx5_flow_handle *rule;
 	void *misc;
-	int action;
+	int i = 0;
 
 	if (esw->mode != SRIOV_OFFLOADS)
 		return ERR_PTR(-EOPNOTSUPP);
 
 	/* per flow vlan pop/push is emulated, don't set that into the firmware */
-	action = attr->action & ~(MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH | MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
+	flow_act.action = attr->action & ~(MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH | MLX5_FLOW_CONTEXT_ACTION_VLAN_POP);
 
-	if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
-		dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
-		dest.vport_num = attr->out_rep->vport;
-		action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
-	} else if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
+	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
+		dest[i].type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+		dest[i].vport_num = attr->out_rep->vport;
+		i++;
+	}
+	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
 		counter = mlx5_fc_create(esw->dev, true);
 		if (IS_ERR(counter))
 			return ERR_CAST(counter);
-		dest.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
-		dest.counter = counter;
+		dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+		dest[i].counter = counter;
+		i++;
 	}
 
 	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters);
@@ -80,10 +83,14 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 
 	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS |
 				      MLX5_MATCH_MISC_PARAMETERS;
+	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
+		spec->match_criteria_enable |= MLX5_MATCH_INNER_HEADERS;
 
-	rule = mlx5_add_flow_rule((struct mlx5_flow_table *)esw->fdb_table.fdb,
-				  spec, action, 0, &dest);
+	if (attr->encap)
+		flow_act.encap_id = attr->encap->encap_id;
 
+	rule = mlx5_add_flow_rules((struct mlx5_flow_table *)esw->fdb_table.fdb,
+				   spec, &flow_act, dest, i);
 	if (IS_ERR(rule))
 		mlx5_fc_destroy(esw->dev, counter);
 
@@ -270,11 +277,12 @@ out:
 	return err;
 }
 
-static struct mlx5_flow_rule *
+static struct mlx5_flow_handle *
 mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn)
 {
+	struct mlx5_flow_act flow_act = {0};
 	struct mlx5_flow_destination dest;
-	struct mlx5_flow_rule *flow_rule;
+	struct mlx5_flow_handle *flow_rule;
 	struct mlx5_flow_spec *spec;
 	void *misc;
 
@@ -296,10 +304,10 @@ mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn
 	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
 	dest.vport_num = vport;
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 
-	flow_rule = mlx5_add_flow_rule(esw->fdb_table.offloads.fdb, spec,
-				       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-				       0, &dest);
+	flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fdb, spec,
+					&flow_act, &dest, 1);
 	if (IS_ERR(flow_rule))
 		esw_warn(esw->dev, "FDB: Failed to add send to vport rule err %ld\n", PTR_ERR(flow_rule));
 out:
@@ -316,7 +324,7 @@ void mlx5_eswitch_sqs2vport_stop(struct mlx5_eswitch *esw,
 		return;
 
 	list_for_each_entry_safe(esw_sq, tmp, &rep->vport_sqs_list, list) {
-		mlx5_del_flow_rule(esw_sq->send_to_vport_rule);
+		mlx5_del_flow_rules(esw_sq->send_to_vport_rule);
 		list_del(&esw_sq->list);
 		kfree(esw_sq);
 	}
@@ -326,7 +334,7 @@ int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw,
 				 struct mlx5_eswitch_rep *rep,
 				 u16 *sqns_array, int sqns_num)
 {
-	struct mlx5_flow_rule *flow_rule;
+	struct mlx5_flow_handle *flow_rule;
 	struct mlx5_esw_sq *esw_sq;
 	int err;
 	int i;
@@ -362,8 +370,9 @@ out_err:
 
 static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
 {
+	struct mlx5_flow_act flow_act = {0};
 	struct mlx5_flow_destination dest;
-	struct mlx5_flow_rule *flow_rule = NULL;
+	struct mlx5_flow_handle *flow_rule = NULL;
 	struct mlx5_flow_spec *spec;
 	int err = 0;
 
@@ -376,10 +385,10 @@ static int esw_add_fdb_miss_rule(struct mlx5_eswitch *esw)
 
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
 	dest.vport_num = 0;
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 
-	flow_rule = mlx5_add_flow_rule(esw->fdb_table.offloads.fdb, spec,
-				       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-				       0, &dest);
+	flow_rule = mlx5_add_flow_rules(esw->fdb_table.offloads.fdb, spec,
+					&flow_act, &dest, 1);
 	if (IS_ERR(flow_rule)) {
 		err = PTR_ERR(flow_rule);
 		esw_warn(esw->dev,  "FDB: Failed to add miss flow rule err %d\n", err);
@@ -406,6 +415,7 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports)
 	u32 *flow_group_in;
 	void *match_criteria;
 	int table_size, ix, err = 0;
+	u32 flags = 0;
 
 	flow_group_in = mlx5_vzalloc(inlen);
 	if (!flow_group_in)
@@ -420,9 +430,14 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports)
 	esw_debug(dev, "Create offloads FDB table, log_max_size(%d)\n",
 		  MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
 
+	if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, encap) &&
+	    MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))
+		flags |= MLX5_FLOW_TABLE_TUNNEL_EN;
+
 	fdb = mlx5_create_auto_grouped_flow_table(root_ns, FDB_FAST_PATH,
 						  ESW_OFFLOADS_NUM_ENTRIES,
-						  ESW_OFFLOADS_NUM_GROUPS, 0);
+						  ESW_OFFLOADS_NUM_GROUPS, 0,
+						  flags);
 	if (IS_ERR(fdb)) {
 		err = PTR_ERR(fdb);
 		esw_warn(dev, "Failed to create Fast path FDB Table err %d\n", err);
@@ -431,7 +446,7 @@ static int esw_create_offloads_fdb_table(struct mlx5_eswitch *esw, int nvports)
 	esw->fdb_table.fdb = fdb;
 
 	table_size = nvports + MAX_PF_SQ + 1;
-	fdb = mlx5_create_flow_table(root_ns, FDB_SLOW_PATH, table_size, 0);
+	fdb = mlx5_create_flow_table(root_ns, FDB_SLOW_PATH, table_size, 0, 0);
 	if (IS_ERR(fdb)) {
 		err = PTR_ERR(fdb);
 		esw_warn(dev, "Failed to create slow path FDB Table err %d\n", err);
@@ -502,7 +517,7 @@ static void esw_destroy_offloads_fdb_table(struct mlx5_eswitch *esw)
 		return;
 
 	esw_debug(esw->dev, "Destroy offloads FDB Table\n");
-	mlx5_del_flow_rule(esw->fdb_table.offloads.miss_rule);
+	mlx5_del_flow_rules(esw->fdb_table.offloads.miss_rule);
 	mlx5_destroy_flow_group(esw->fdb_table.offloads.send_to_vport_grp);
 	mlx5_destroy_flow_group(esw->fdb_table.offloads.miss_grp);
 
@@ -523,7 +538,7 @@ static int esw_create_offloads_table(struct mlx5_eswitch *esw)
 		return -ENOMEM;
 	}
 
-	ft_offloads = mlx5_create_flow_table(ns, 0, dev->priv.sriov.num_vfs + 2, 0);
+	ft_offloads = mlx5_create_flow_table(ns, 0, dev->priv.sriov.num_vfs + 2, 0, 0);
 	if (IS_ERR(ft_offloads)) {
 		err = PTR_ERR(ft_offloads);
 		esw_warn(esw->dev, "Failed to create offloads table, err %d\n", err);
@@ -586,11 +601,12 @@ static void esw_destroy_vport_rx_group(struct mlx5_eswitch *esw)
 	mlx5_destroy_flow_group(esw->offloads.vport_rx_group);
 }
 
-struct mlx5_flow_rule *
+struct mlx5_flow_handle *
 mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn)
 {
+	struct mlx5_flow_act flow_act = {0};
 	struct mlx5_flow_destination dest;
-	struct mlx5_flow_rule *flow_rule;
+	struct mlx5_flow_handle *flow_rule;
 	struct mlx5_flow_spec *spec;
 	void *misc;
 
@@ -611,9 +627,9 @@ mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn)
 	dest.type = MLX5_FLOW_DESTINATION_TYPE_TIR;
 	dest.tir_num = tirn;
 
-	flow_rule = mlx5_add_flow_rule(esw->offloads.ft_offloads, spec,
-				       MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
-				       0, &dest);
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+	flow_rule = mlx5_add_flow_rules(esw->offloads.ft_offloads, spec,
+				       &flow_act, &dest, 1);
 	if (IS_ERR(flow_rule)) {
 		esw_warn(esw->dev, "fs offloads: Failed to add vport rx rule err %ld\n", PTR_ERR(flow_rule));
 		goto out;
@@ -641,6 +657,14 @@ static int esw_offloads_start(struct mlx5_eswitch *esw)
 		if (err1)
 			esw_warn(esw->dev, "Failed setting eswitch back to legacy, err %d\n", err);
 	}
+	if (esw->offloads.inline_mode == MLX5_INLINE_MODE_NONE) {
+		if (mlx5_eswitch_inline_mode_get(esw,
+						 num_vfs,
+						 &esw->offloads.inline_mode)) {
+			esw->offloads.inline_mode = MLX5_INLINE_MODE_L2;
+			esw_warn(esw->dev, "Inline mode is different between vports\n");
+		}
+	}
 	return err;
 }
 
@@ -755,6 +779,50 @@ static int esw_mode_to_devlink(u16 mlx5_mode, u16 *mode)
 	return 0;
 }
 
+static int esw_inline_mode_from_devlink(u8 mode, u8 *mlx5_mode)
+{
+	switch (mode) {
+	case DEVLINK_ESWITCH_INLINE_MODE_NONE:
+		*mlx5_mode = MLX5_INLINE_MODE_NONE;
+		break;
+	case DEVLINK_ESWITCH_INLINE_MODE_LINK:
+		*mlx5_mode = MLX5_INLINE_MODE_L2;
+		break;
+	case DEVLINK_ESWITCH_INLINE_MODE_NETWORK:
+		*mlx5_mode = MLX5_INLINE_MODE_IP;
+		break;
+	case DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT:
+		*mlx5_mode = MLX5_INLINE_MODE_TCP_UDP;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int esw_inline_mode_to_devlink(u8 mlx5_mode, u8 *mode)
+{
+	switch (mlx5_mode) {
+	case MLX5_INLINE_MODE_NONE:
+		*mode = DEVLINK_ESWITCH_INLINE_MODE_NONE;
+		break;
+	case MLX5_INLINE_MODE_L2:
+		*mode = DEVLINK_ESWITCH_INLINE_MODE_LINK;
+		break;
+	case MLX5_INLINE_MODE_IP:
+		*mode = DEVLINK_ESWITCH_INLINE_MODE_NETWORK;
+		break;
+	case MLX5_INLINE_MODE_TCP_UDP:
+		*mode = DEVLINK_ESWITCH_INLINE_MODE_TRANSPORT;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode)
 {
 	struct mlx5_core_dev *dev;
@@ -799,6 +867,95 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode)
 	return esw_mode_to_devlink(dev->priv.eswitch->mode, mode);
 }
 
+int mlx5_devlink_eswitch_inline_mode_set(struct devlink *devlink, u8 mode)
+{
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+	struct mlx5_eswitch *esw = dev->priv.eswitch;
+	int num_vports = esw->enabled_vports;
+	int err;
+	int vport;
+	u8 mlx5_mode;
+
+	if (!MLX5_CAP_GEN(dev, vport_group_manager))
+		return -EOPNOTSUPP;
+
+	if (esw->mode == SRIOV_NONE)
+		return -EOPNOTSUPP;
+
+	if (MLX5_CAP_ETH(dev, wqe_inline_mode) !=
+	    MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
+		return -EOPNOTSUPP;
+
+	err = esw_inline_mode_from_devlink(mode, &mlx5_mode);
+	if (err)
+		goto out;
+
+	for (vport = 1; vport < num_vports; vport++) {
+		err = mlx5_modify_nic_vport_min_inline(dev, vport, mlx5_mode);
+		if (err) {
+			esw_warn(dev, "Failed to set min inline on vport %d\n",
+				 vport);
+			goto revert_inline_mode;
+		}
+	}
+
+	esw->offloads.inline_mode = mlx5_mode;
+	return 0;
+
+revert_inline_mode:
+	while (--vport > 0)
+		mlx5_modify_nic_vport_min_inline(dev,
+						 vport,
+						 esw->offloads.inline_mode);
+out:
+	return err;
+}
+
+int mlx5_devlink_eswitch_inline_mode_get(struct devlink *devlink, u8 *mode)
+{
+	struct mlx5_core_dev *dev = devlink_priv(devlink);
+	struct mlx5_eswitch *esw = dev->priv.eswitch;
+
+	if (!MLX5_CAP_GEN(dev, vport_group_manager))
+		return -EOPNOTSUPP;
+
+	if (esw->mode == SRIOV_NONE)
+		return -EOPNOTSUPP;
+
+	if (MLX5_CAP_ETH(dev, wqe_inline_mode) !=
+	    MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
+		return -EOPNOTSUPP;
+
+	return esw_inline_mode_to_devlink(esw->offloads.inline_mode, mode);
+}
+
+int mlx5_eswitch_inline_mode_get(struct mlx5_eswitch *esw, int nvfs, u8 *mode)
+{
+	struct mlx5_core_dev *dev = esw->dev;
+	int vport;
+	u8 prev_mlx5_mode, mlx5_mode = MLX5_INLINE_MODE_L2;
+
+	if (!MLX5_CAP_GEN(dev, vport_group_manager))
+		return -EOPNOTSUPP;
+
+	if (esw->mode == SRIOV_NONE)
+		return -EOPNOTSUPP;
+
+	if (MLX5_CAP_ETH(dev, wqe_inline_mode) !=
+	    MLX5_CAP_INLINE_MODE_VPORT_CONTEXT)
+		return -EOPNOTSUPP;
+
+	for (vport = 1; vport <= nvfs; vport++) {
+		mlx5_query_nic_vport_min_inline(dev, vport, &mlx5_mode);
+		if (vport > 1 && prev_mlx5_mode != mlx5_mode)
+			return -EINVAL;
+		prev_mlx5_mode = mlx5_mode;
+	}
+
+	*mode = mlx5_mode;
+	return 0;
+}
+
 void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
 				     int vport_index,
 				     struct mlx5_eswitch_rep *__rep)
@@ -813,7 +970,7 @@ void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw,
 	rep->load   = __rep->load;
 	rep->unload = __rep->unload;
 	rep->vport  = __rep->vport;
-	rep->priv_data = __rep->priv_data;
+	rep->netdev = __rep->netdev;
 	ether_addr_copy(rep->hw_id, __rep->hw_id);
 
 	INIT_LIST_HEAD(&rep->vport_sqs_list);
@@ -833,3 +990,13 @@ void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw,
 
 	rep->valid = false;
 }
+
+struct net_device *mlx5_eswitch_get_uplink_netdev(struct mlx5_eswitch *esw)
+{
+#define UPLINK_REP_INDEX 0
+	struct mlx5_esw_offload *offloads = &esw->offloads;
+	struct mlx5_eswitch_rep *rep;
+
+	rep = &offloads->vport_reps[UPLINK_REP_INDEX];
+	return rep->netdev;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 113c32326333..c4478ecd8056 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -37,6 +37,7 @@
 #include "fs_core.h"
 #include "fs_cmd.h"
 #include "mlx5_core.h"
+#include "eswitch.h"
 
 int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev,
 			    struct mlx5_flow_table *ft)
@@ -61,8 +62,9 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
 			       enum fs_flow_table_op_mod op_mod,
 			       enum fs_flow_table_type type, unsigned int level,
 			       unsigned int log_size, struct mlx5_flow_table
-			       *next_ft, unsigned int *table_id)
+			       *next_ft, unsigned int *table_id, u32 flags)
 {
+	int en_encap_decap = !!(flags & MLX5_FLOW_TABLE_TUNNEL_EN);
 	u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0};
 	u32 in[MLX5_ST_SZ_DW(create_flow_table_in)]   = {0};
 	int err;
@@ -78,6 +80,9 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
 		MLX5_SET(create_flow_table_in, in, other_vport, 1);
 	}
 
+	MLX5_SET(create_flow_table_in, in, decap_en, en_encap_decap);
+	MLX5_SET(create_flow_table_in, in, encap_en, en_encap_decap);
+
 	switch (op_mod) {
 	case FS_FT_OP_MOD_NORMAL:
 		if (next_ft) {
@@ -243,6 +248,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev,
 	MLX5_SET(flow_context, in_flow_context, group_id, group_id);
 	MLX5_SET(flow_context, in_flow_context, flow_tag, fte->flow_tag);
 	MLX5_SET(flow_context, in_flow_context, action, fte->action);
+	MLX5_SET(flow_context, in_flow_context, encap_id, fte->encap_id);
 	in_match_value = MLX5_ADDR_OF(flow_context, in_flow_context,
 				      match_value);
 	memcpy(in_match_value, &fte->val, MLX5_ST_SZ_BYTES(fte_match_param));
@@ -453,27 +459,32 @@ void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
 	*bytes = MLX5_GET64(traffic_counter, stats, octets);
 }
 
-#define MAX_ENCAP_SIZE (128)
-
-int mlx5_cmd_alloc_encap(struct mlx5_core_dev *dev,
-			 int header_type,
-			 size_t size,
-			 void *encap_header,
-			 u32 *encap_id)
+int mlx5_encap_alloc(struct mlx5_core_dev *dev,
+		     int header_type,
+		     size_t size,
+		     void *encap_header,
+		     u32 *encap_id)
 {
+	int max_encap_size = MLX5_CAP_ESW(dev, max_encap_header_size);
 	u32 out[MLX5_ST_SZ_DW(alloc_encap_header_out)];
-	u32 in[MLX5_ST_SZ_DW(alloc_encap_header_in) +
-	      (MAX_ENCAP_SIZE / sizeof(u32))];
-	void *encap_header_in = MLX5_ADDR_OF(alloc_encap_header_in, in,
-					     encap_header);
-	void *header = MLX5_ADDR_OF(encap_header_in, encap_header_in,
-				    encap_header);
-	int inlen = header - (void *)in  + size;
+	void *encap_header_in;
+	void *header;
+	int inlen;
 	int err;
+	u32 *in;
 
-	if (size > MAX_ENCAP_SIZE)
+	if (size > MLX5_CAP_ESW(dev, max_encap_header_size))
 		return -EINVAL;
 
+	in = kzalloc(MLX5_ST_SZ_BYTES(alloc_encap_header_in) + max_encap_size,
+		     GFP_KERNEL);
+	if (!in)
+		return -ENOMEM;
+
+	encap_header_in = MLX5_ADDR_OF(alloc_encap_header_in, in, encap_header);
+	header = MLX5_ADDR_OF(encap_header_in, encap_header_in, encap_header);
+	inlen = header - (void *)in  + size;
+
 	memset(in, 0, inlen);
 	MLX5_SET(alloc_encap_header_in, in, opcode,
 		 MLX5_CMD_OP_ALLOC_ENCAP_HEADER);
@@ -485,10 +496,11 @@ int mlx5_cmd_alloc_encap(struct mlx5_core_dev *dev,
 	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 
 	*encap_id = MLX5_GET(alloc_encap_header_out, out, encap_id);
+	kfree(in);
 	return err;
 }
 
-void mlx5_cmd_dealloc_encap(struct mlx5_core_dev *dev, u32 encap_id)
+void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id)
 {
 	u32 in[MLX5_ST_SZ_DW(dealloc_encap_header_in)];
 	u32 out[MLX5_ST_SZ_DW(dealloc_encap_header_out)];
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
index c5bc4686c832..8fad80688536 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h
@@ -38,7 +38,7 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev,
 			       enum fs_flow_table_op_mod op_mod,
 			       enum fs_flow_table_type type, unsigned int level,
 			       unsigned int log_size, struct mlx5_flow_table
-			       *next_ft, unsigned int *table_id);
+			       *next_ft, unsigned int *table_id, u32 flags);
 
 int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev,
 				struct mlx5_flow_table *ft);
@@ -89,11 +89,4 @@ void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev,
 			  struct mlx5_cmd_fc_bulk *b, u16 id,
 			  u64 *packets, u64 *bytes);
 
-int mlx5_cmd_alloc_encap(struct mlx5_core_dev *dev,
-			 int header_type,
-			 size_t size,
-			 void *encap_header,
-			 u32 *encap_id);
-void mlx5_cmd_dealloc_encap(struct mlx5_core_dev *dev, u32 encap_id);
-
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 914e5466f729..a263d8904a4c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -153,6 +153,11 @@ static void del_rule(struct fs_node *node);
 static void del_flow_table(struct fs_node *node);
 static void del_flow_group(struct fs_node *node);
 static void del_fte(struct fs_node *node);
+static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
+				struct mlx5_flow_destination *d2);
+static struct mlx5_flow_rule *
+find_flow_rule(struct fs_fte *fte,
+	       struct mlx5_flow_destination *dest);
 
 static void tree_init_node(struct fs_node *node,
 			   unsigned int refcount,
@@ -369,6 +374,7 @@ static void del_rule(struct fs_node *node)
 	struct mlx5_core_dev *dev = get_dev(node);
 	int match_len = MLX5_ST_SZ_BYTES(fte_match_param);
 	int err;
+	bool update_fte = false;
 
 	match_value = mlx5_vzalloc(match_len);
 	if (!match_value) {
@@ -387,13 +393,23 @@ static void del_rule(struct fs_node *node)
 		list_del(&rule->next_ft);
 		mutex_unlock(&rule->dest_attr.ft->lock);
 	}
+
+	if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER  &&
+	    --fte->dests_size) {
+		modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
+		fte->action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT;
+		update_fte = true;
+		goto out;
+	}
+
 	if ((fte->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) &&
 	    --fte->dests_size) {
 		modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST),
-		err = mlx5_cmd_update_fte(dev, ft,
-					  fg->id,
-					  modify_mask,
-					  fte);
+		update_fte = true;
+	}
+out:
+	if (update_fte && fte->dests_size) {
+		err = mlx5_cmd_update_fte(dev, ft, fg->id, modify_mask, fte);
 		if (err)
 			mlx5_core_warn(dev,
 				       "%s can't del rule fg id=%d fte_index=%d\n",
@@ -444,8 +460,7 @@ static void del_flow_group(struct fs_node *node)
 			       fg->id, ft->id);
 }
 
-static struct fs_fte *alloc_fte(u8 action,
-				u32 flow_tag,
+static struct fs_fte *alloc_fte(struct mlx5_flow_act *flow_act,
 				u32 *match_value,
 				unsigned int index)
 {
@@ -457,9 +472,10 @@ static struct fs_fte *alloc_fte(u8 action,
 
 	memcpy(fte->val, match_value, sizeof(fte->val));
 	fte->node.type =  FS_TYPE_FLOW_ENTRY;
-	fte->flow_tag = flow_tag;
+	fte->flow_tag = flow_act->flow_tag;
 	fte->index = index;
-	fte->action = action;
+	fte->action = flow_act->action;
+	fte->encap_id = flow_act->encap_id;
 
 	return fte;
 }
@@ -489,7 +505,8 @@ static struct mlx5_flow_group *alloc_flow_group(u32 *create_fg_in)
 
 static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_fte,
 						enum fs_flow_table_type table_type,
-						enum fs_flow_table_op_mod op_mod)
+						enum fs_flow_table_op_mod op_mod,
+						u32 flags)
 {
 	struct mlx5_flow_table *ft;
 
@@ -503,6 +520,7 @@ static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_ft
 	ft->type = table_type;
 	ft->vport = vport;
 	ft->max_fte = max_fte;
+	ft->flags = flags;
 	INIT_LIST_HEAD(&ft->fwd_rules);
 	mutex_init(&ft->lock);
 
@@ -641,8 +659,8 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio
 	return err;
 }
 
-int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
-				 struct mlx5_flow_destination *dest)
+static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
+					 struct mlx5_flow_destination *dest)
 {
 	struct mlx5_flow_table *ft;
 	struct mlx5_flow_group *fg;
@@ -667,6 +685,28 @@ int mlx5_modify_rule_destination(struct mlx5_flow_rule *rule,
 	return err;
 }
 
+int mlx5_modify_rule_destination(struct mlx5_flow_handle *handle,
+				 struct mlx5_flow_destination *new_dest,
+				 struct mlx5_flow_destination *old_dest)
+{
+	int i;
+
+	if (!old_dest) {
+		if (handle->num_rules != 1)
+			return -EINVAL;
+		return _mlx5_modify_rule_destination(handle->rule[0],
+						     new_dest);
+	}
+
+	for (i = 0; i < handle->num_rules; i++) {
+		if (mlx5_flow_dests_cmp(new_dest, &handle->rule[i]->dest_attr))
+			return _mlx5_modify_rule_destination(handle->rule[i],
+							     new_dest);
+	}
+
+	return -EINVAL;
+}
+
 /* Modify/set FWD rules that point on old_next_ft to point on new_next_ft  */
 static int connect_fwd_rules(struct mlx5_core_dev *dev,
 			     struct mlx5_flow_table *new_next_ft,
@@ -689,7 +729,7 @@ static int connect_fwd_rules(struct mlx5_core_dev *dev,
 	list_splice_init(&old_next_ft->fwd_rules, &new_next_ft->fwd_rules);
 	mutex_unlock(&old_next_ft->lock);
 	list_for_each_entry(iter, &new_next_ft->fwd_rules, next_ft) {
-		err = mlx5_modify_rule_destination(iter, &dest);
+		err = _mlx5_modify_rule_destination(iter, &dest);
 		if (err)
 			pr_err("mlx5_core: failed to modify rule to point on flow table %d\n",
 			       new_next_ft->id);
@@ -739,7 +779,8 @@ static void list_add_flow_table(struct mlx5_flow_table *ft,
 static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
 							enum fs_flow_table_op_mod op_mod,
 							u16 vport, int prio,
-							int max_fte, u32 level)
+							int max_fte, u32 level,
+							u32 flags)
 {
 	struct mlx5_flow_table *next_ft = NULL;
 	struct mlx5_flow_table *ft;
@@ -772,7 +813,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
 			      vport,
 			      max_fte ? roundup_pow_of_two(max_fte) : 0,
 			      root->table_type,
-			      op_mod);
+			      op_mod, flags);
 	if (!ft) {
 		err = -ENOMEM;
 		goto unlock_root;
@@ -782,7 +823,8 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
 	log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0;
 	next_ft = find_next_chained_ft(fs_prio);
 	err = mlx5_cmd_create_flow_table(root->dev, ft->vport, ft->op_mod, ft->type,
-					 ft->level, log_table_sz, next_ft, &ft->id);
+					 ft->level, log_table_sz, next_ft, &ft->id,
+					 ft->flags);
 	if (err)
 		goto free_ft;
 
@@ -807,10 +849,11 @@ unlock_root:
 
 struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns,
 					       int prio, int max_fte,
-					       u32 level)
+					       u32 level,
+					       u32 flags)
 {
 	return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_NORMAL, 0, prio,
-					max_fte, level);
+					max_fte, level, flags);
 }
 
 struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns,
@@ -818,7 +861,7 @@ struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace
 						     u32 level, u16 vport)
 {
 	return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_NORMAL, vport, prio,
-					max_fte, level);
+					max_fte, level, 0);
 }
 
 struct mlx5_flow_table *mlx5_create_lag_demux_flow_table(
@@ -826,7 +869,7 @@ struct mlx5_flow_table *mlx5_create_lag_demux_flow_table(
 					       int prio, u32 level)
 {
 	return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_LAG_DEMUX, 0, prio, 0,
-					level);
+					level, 0);
 }
 EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table);
 
@@ -834,14 +877,15 @@ struct mlx5_flow_table *mlx5_create_auto_grouped_flow_table(struct mlx5_flow_nam
 							    int prio,
 							    int num_flow_table_entries,
 							    int max_num_groups,
-							    u32 level)
+							    u32 level,
+							    u32 flags)
 {
 	struct mlx5_flow_table *ft;
 
 	if (max_num_groups > num_flow_table_entries)
 		return ERR_PTR(-EINVAL);
 
-	ft = mlx5_create_flow_table(ns, prio, num_flow_table_entries, level);
+	ft = mlx5_create_flow_table(ns, prio, num_flow_table_entries, level, flags);
 	if (IS_ERR(ft))
 		return ft;
 
@@ -918,55 +962,133 @@ static struct mlx5_flow_rule *alloc_rule(struct mlx5_flow_destination *dest)
 	return rule;
 }
 
-/* fte should not be deleted while calling this function */
-static struct mlx5_flow_rule *add_rule_fte(struct fs_fte *fte,
-					   struct mlx5_flow_group *fg,
-					   struct mlx5_flow_destination *dest)
+static struct mlx5_flow_handle *alloc_handle(int num_rules)
+{
+	struct mlx5_flow_handle *handle;
+
+	handle = kzalloc(sizeof(*handle) + sizeof(handle->rule[0]) *
+			  num_rules, GFP_KERNEL);
+	if (!handle)
+		return NULL;
+
+	handle->num_rules = num_rules;
+
+	return handle;
+}
+
+static void destroy_flow_handle(struct fs_fte *fte,
+				struct mlx5_flow_handle *handle,
+				struct mlx5_flow_destination *dest,
+				int i)
+{
+	for (; --i >= 0;) {
+		if (atomic_dec_and_test(&handle->rule[i]->node.refcount)) {
+			fte->dests_size--;
+			list_del(&handle->rule[i]->node.list);
+			kfree(handle->rule[i]);
+		}
+	}
+	kfree(handle);
+}
+
+static struct mlx5_flow_handle *
+create_flow_handle(struct fs_fte *fte,
+		   struct mlx5_flow_destination *dest,
+		   int dest_num,
+		   int *modify_mask,
+		   bool *new_rule)
 {
+	struct mlx5_flow_handle *handle;
+	struct mlx5_flow_rule *rule = NULL;
+	static int count = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS);
+	static int dst = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
+	int type;
+	int i = 0;
+
+	handle = alloc_handle((dest_num) ? dest_num : 1);
+	if (!handle)
+		return ERR_PTR(-ENOMEM);
+
+	do {
+		if (dest) {
+			rule = find_flow_rule(fte, dest + i);
+			if (rule) {
+				atomic_inc(&rule->node.refcount);
+				goto rule_found;
+			}
+		}
+
+		*new_rule = true;
+		rule = alloc_rule(dest + i);
+		if (!rule)
+			goto free_rules;
+
+		/* Add dest to dests list- we need flow tables to be in the
+		 * end of the list for forward to next prio rules.
+		 */
+		tree_init_node(&rule->node, 1, del_rule);
+		if (dest &&
+		    dest[i].type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
+			list_add(&rule->node.list, &fte->node.children);
+		else
+			list_add_tail(&rule->node.list, &fte->node.children);
+		if (dest) {
+			fte->dests_size++;
+
+			type = dest[i].type ==
+				MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+			*modify_mask |= type ? count : dst;
+		}
+rule_found:
+		handle->rule[i] = rule;
+	} while (++i < dest_num);
+
+	return handle;
+
+free_rules:
+	destroy_flow_handle(fte, handle, dest, i);
+	return ERR_PTR(-ENOMEM);
+}
+
+/* fte should not be deleted while calling this function */
+static struct mlx5_flow_handle *
+add_rule_fte(struct fs_fte *fte,
+	     struct mlx5_flow_group *fg,
+	     struct mlx5_flow_destination *dest,
+	     int dest_num,
+	     bool update_action)
+{
+	struct mlx5_flow_handle *handle;
 	struct mlx5_flow_table *ft;
-	struct mlx5_flow_rule *rule;
 	int modify_mask = 0;
 	int err;
+	bool new_rule = false;
 
-	rule = alloc_rule(dest);
-	if (!rule)
-		return ERR_PTR(-ENOMEM);
+	handle = create_flow_handle(fte, dest, dest_num, &modify_mask,
+				    &new_rule);
+	if (IS_ERR(handle) || !new_rule)
+		goto out;
 
-	fs_get_obj(ft, fg->node.parent);
-	/* Add dest to dests list- we need flow tables to be in the
-	 * end of the list for forward to next prio rules.
-	 */
-	tree_init_node(&rule->node, 1, del_rule);
-	if (dest && dest->type != MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE)
-		list_add(&rule->node.list, &fte->node.children);
-	else
-		list_add_tail(&rule->node.list, &fte->node.children);
-	if (dest) {
-		fte->dests_size++;
+	if (update_action)
+		modify_mask |= BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION);
 
-		modify_mask |= dest->type == MLX5_FLOW_DESTINATION_TYPE_COUNTER ?
-			BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS) :
-			BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST);
-	}
-
-	if (fte->dests_size == 1 || !dest)
+	fs_get_obj(ft, fg->node.parent);
+	if (!(fte->status & FS_FTE_STATUS_EXISTING))
 		err = mlx5_cmd_create_fte(get_dev(&ft->node),
 					  ft, fg->id, fte);
 	else
 		err = mlx5_cmd_update_fte(get_dev(&ft->node),
 					  ft, fg->id, modify_mask, fte);
 	if (err)
-		goto free_rule;
+		goto free_handle;
 
 	fte->status |= FS_FTE_STATUS_EXISTING;
 
-	return rule;
+out:
+	return handle;
 
-free_rule:
-	list_del(&rule->node.list);
-	kfree(rule);
-	if (dest)
-		fte->dests_size--;
+free_handle:
+	destroy_flow_handle(fte, handle, dest, handle->num_rules);
 	return ERR_PTR(err);
 }
 
@@ -995,15 +1117,14 @@ static unsigned int get_free_fte_index(struct mlx5_flow_group *fg,
 /* prev is output, prev->next = new_fte */
 static struct fs_fte *create_fte(struct mlx5_flow_group *fg,
 				 u32 *match_value,
-				 u8 action,
-				 u32 flow_tag,
+				 struct mlx5_flow_act *flow_act,
 				 struct list_head **prev)
 {
 	struct fs_fte *fte;
 	int index;
 
 	index = get_free_fte_index(fg, prev);
-	fte = alloc_fte(action, flow_tag, match_value, index);
+	fte = alloc_fte(flow_act, match_value, index);
 	if (IS_ERR(fte))
 		return fte;
 
@@ -1067,71 +1188,81 @@ out:
 	return fg;
 }
 
+static bool mlx5_flow_dests_cmp(struct mlx5_flow_destination *d1,
+				struct mlx5_flow_destination *d2)
+{
+	if (d1->type == d2->type) {
+		if ((d1->type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
+		     d1->vport_num == d2->vport_num) ||
+		    (d1->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
+		     d1->ft == d2->ft) ||
+		    (d1->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
+		     d1->tir_num == d2->tir_num))
+			return true;
+	}
+
+	return false;
+}
+
 static struct mlx5_flow_rule *find_flow_rule(struct fs_fte *fte,
 					     struct mlx5_flow_destination *dest)
 {
 	struct mlx5_flow_rule *rule;
 
 	list_for_each_entry(rule, &fte->node.children, node.list) {
-		if (rule->dest_attr.type == dest->type) {
-			if ((dest->type == MLX5_FLOW_DESTINATION_TYPE_VPORT &&
-			     dest->vport_num == rule->dest_attr.vport_num) ||
-			    (dest->type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
-			     dest->ft == rule->dest_attr.ft) ||
-			    (dest->type == MLX5_FLOW_DESTINATION_TYPE_TIR &&
-			     dest->tir_num == rule->dest_attr.tir_num))
-				return rule;
-		}
+		if (mlx5_flow_dests_cmp(&rule->dest_attr, dest))
+			return rule;
 	}
 	return NULL;
 }
 
-static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg,
-					  u32 *match_value,
-					  u8 action,
-					  u32 flow_tag,
-					  struct mlx5_flow_destination *dest)
+static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
+					    u32 *match_value,
+					    struct mlx5_flow_act *flow_act,
+					    struct mlx5_flow_destination *dest,
+					    int dest_num)
 {
-	struct fs_fte *fte;
-	struct mlx5_flow_rule *rule;
+	struct mlx5_flow_handle *handle;
 	struct mlx5_flow_table *ft;
 	struct list_head *prev;
+	struct fs_fte *fte;
+	int i;
 
 	nested_lock_ref_node(&fg->node, FS_MUTEX_PARENT);
 	fs_for_each_fte(fte, fg) {
 		nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD);
 		if (compare_match_value(&fg->mask, match_value, &fte->val) &&
-		    action == fte->action && flow_tag == fte->flow_tag) {
-			rule = find_flow_rule(fte, dest);
-			if (rule) {
-				atomic_inc(&rule->node.refcount);
-				unlock_ref_node(&fte->node);
-				unlock_ref_node(&fg->node);
-				return rule;
+		    (flow_act->action & fte->action) &&
+		    flow_act->flow_tag == fte->flow_tag) {
+			int old_action = fte->action;
+
+			fte->action |= flow_act->action;
+			handle = add_rule_fte(fte, fg, dest, dest_num,
+					      old_action != flow_act->action);
+			if (IS_ERR(handle)) {
+				fte->action = old_action;
+				goto unlock_fte;
+			} else {
+				goto add_rules;
 			}
-			rule = add_rule_fte(fte, fg, dest);
-			unlock_ref_node(&fte->node);
-			if (IS_ERR(rule))
-				goto unlock_fg;
-			else
-				goto add_rule;
 		}
 		unlock_ref_node(&fte->node);
 	}
 	fs_get_obj(ft, fg->node.parent);
 	if (fg->num_ftes >= fg->max_ftes) {
-		rule = ERR_PTR(-ENOSPC);
+		handle = ERR_PTR(-ENOSPC);
 		goto unlock_fg;
 	}
 
-	fte = create_fte(fg, match_value, action, flow_tag, &prev);
+	fte = create_fte(fg, match_value, flow_act, &prev);
 	if (IS_ERR(fte)) {
-		rule = (void *)fte;
+		handle = (void *)fte;
 		goto unlock_fg;
 	}
 	tree_init_node(&fte->node, 0, del_fte);
-	rule = add_rule_fte(fte, fg, dest);
-	if (IS_ERR(rule)) {
+	nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD);
+	handle = add_rule_fte(fte, fg, dest, dest_num, false);
+	if (IS_ERR(handle)) {
 		kfree(fte);
 		goto unlock_fg;
 	}
@@ -1140,19 +1271,24 @@ static struct mlx5_flow_rule *add_rule_fg(struct mlx5_flow_group *fg,
 
 	tree_add_node(&fte->node, &fg->node);
 	list_add(&fte->node.list, prev);
-add_rule:
-	tree_add_node(&rule->node, &fte->node);
+add_rules:
+	for (i = 0; i < handle->num_rules; i++) {
+		if (atomic_read(&handle->rule[i]->node.refcount) == 1)
+			tree_add_node(&handle->rule[i]->node, &fte->node);
+	}
+unlock_fte:
+	unlock_ref_node(&fte->node);
 unlock_fg:
 	unlock_ref_node(&fg->node);
-	return rule;
+	return handle;
 }
 
-struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_rule *rule)
+struct mlx5_fc *mlx5_flow_rule_counter(struct mlx5_flow_handle *handle)
 {
 	struct mlx5_flow_rule *dst;
 	struct fs_fte *fte;
 
-	fs_get_obj(fte, rule->node.parent);
+	fs_get_obj(fte, handle->rule[0]->node.parent);
 
 	fs_for_each_dst(dst, fte) {
 		if (dst->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER)
@@ -1170,8 +1306,8 @@ static bool counter_is_valid(struct mlx5_fc *counter, u32 action)
 	if (!counter)
 		return false;
 
-	/* Hardware support counter for a drop action only */
-	return action == (MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_COUNT);
+	return (action & (MLX5_FLOW_CONTEXT_ACTION_DROP |
+			  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST));
 }
 
 static bool dest_is_valid(struct mlx5_flow_destination *dest,
@@ -1191,18 +1327,22 @@ static bool dest_is_valid(struct mlx5_flow_destination *dest,
 	return true;
 }
 
-static struct mlx5_flow_rule *
-_mlx5_add_flow_rule(struct mlx5_flow_table *ft,
-		   struct mlx5_flow_spec *spec,
-		    u32 action,
-		    u32 flow_tag,
-		    struct mlx5_flow_destination *dest)
+static struct mlx5_flow_handle *
+_mlx5_add_flow_rules(struct mlx5_flow_table *ft,
+		     struct mlx5_flow_spec *spec,
+		     struct mlx5_flow_act *flow_act,
+		     struct mlx5_flow_destination *dest,
+		     int dest_num)
+
 {
 	struct mlx5_flow_group *g;
-	struct mlx5_flow_rule *rule;
+	struct mlx5_flow_handle *rule;
+	int i;
 
-	if (!dest_is_valid(dest, action, ft))
-		return ERR_PTR(-EINVAL);
+	for (i = 0; i < dest_num; i++) {
+		if (!dest_is_valid(&dest[i], flow_act->action, ft))
+			return ERR_PTR(-EINVAL);
+	}
 
 	nested_lock_ref_node(&ft->node, FS_MUTEX_GRANDPARENT);
 	fs_for_each_fg(g, ft)
@@ -1211,7 +1351,7 @@ _mlx5_add_flow_rule(struct mlx5_flow_table *ft,
 					   g->mask.match_criteria,
 					   spec->match_criteria)) {
 			rule = add_rule_fg(g, spec->match_value,
-					   action, flow_tag, dest);
+					   flow_act, dest, dest_num);
 			if (!IS_ERR(rule) || PTR_ERR(rule) != -ENOSPC)
 				goto unlock;
 		}
@@ -1223,8 +1363,7 @@ _mlx5_add_flow_rule(struct mlx5_flow_table *ft,
 		goto unlock;
 	}
 
-	rule = add_rule_fg(g, spec->match_value,
-			   action, flow_tag, dest);
+	rule = add_rule_fg(g, spec->match_value, flow_act, dest, dest_num);
 	if (IS_ERR(rule)) {
 		/* Remove assumes refcount > 0 and autogroup creates a group
 		 * with a refcount = 0.
@@ -1245,22 +1384,22 @@ static bool fwd_next_prio_supported(struct mlx5_flow_table *ft)
 		(MLX5_CAP_FLOWTABLE(get_dev(&ft->node), nic_rx_multi_path_tirs)));
 }
 
-struct mlx5_flow_rule *
-mlx5_add_flow_rule(struct mlx5_flow_table *ft,
-		   struct mlx5_flow_spec *spec,
-		   u32 action,
-		   u32 flow_tag,
-		   struct mlx5_flow_destination *dest)
+struct mlx5_flow_handle *
+mlx5_add_flow_rules(struct mlx5_flow_table *ft,
+		    struct mlx5_flow_spec *spec,
+		    struct mlx5_flow_act *flow_act,
+		    struct mlx5_flow_destination *dest,
+		    int dest_num)
 {
 	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
 	struct mlx5_flow_destination gen_dest;
 	struct mlx5_flow_table *next_ft = NULL;
-	struct mlx5_flow_rule *rule = NULL;
-	u32 sw_action = action;
+	struct mlx5_flow_handle *handle = NULL;
+	u32 sw_action = flow_act->action;
 	struct fs_prio *prio;
 
 	fs_get_obj(prio, ft->node.parent);
-	if (action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
+	if (flow_act->action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
 		if (!fwd_next_prio_supported(ft))
 			return ERR_PTR(-EOPNOTSUPP);
 		if (dest)
@@ -1271,34 +1410,40 @@ mlx5_add_flow_rule(struct mlx5_flow_table *ft,
 			gen_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
 			gen_dest.ft = next_ft;
 			dest = &gen_dest;
-			action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+			dest_num = 1;
+			flow_act->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 		} else {
 			mutex_unlock(&root->chain_lock);
 			return ERR_PTR(-EOPNOTSUPP);
 		}
 	}
 
-	rule = _mlx5_add_flow_rule(ft, spec, action, flow_tag, dest);
+	handle = _mlx5_add_flow_rules(ft, spec, flow_act, dest, dest_num);
 
 	if (sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
-		if (!IS_ERR_OR_NULL(rule) &&
-		    (list_empty(&rule->next_ft))) {
+		if (!IS_ERR_OR_NULL(handle) &&
+		    (list_empty(&handle->rule[0]->next_ft))) {
 			mutex_lock(&next_ft->lock);
-			list_add(&rule->next_ft, &next_ft->fwd_rules);
+			list_add(&handle->rule[0]->next_ft,
+				 &next_ft->fwd_rules);
 			mutex_unlock(&next_ft->lock);
-			rule->sw_action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
+			handle->rule[0]->sw_action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
 		}
 		mutex_unlock(&root->chain_lock);
 	}
-	return rule;
+	return handle;
 }
-EXPORT_SYMBOL(mlx5_add_flow_rule);
+EXPORT_SYMBOL(mlx5_add_flow_rules);
 
-void mlx5_del_flow_rule(struct mlx5_flow_rule *rule)
+void mlx5_del_flow_rules(struct mlx5_flow_handle *handle)
 {
-	tree_remove_node(&rule->node);
+	int i;
+
+	for (i = handle->num_rules - 1; i >= 0; i--)
+		tree_remove_node(&handle->rule[i]->node);
+	kfree(handle);
 }
-EXPORT_SYMBOL(mlx5_del_flow_rule);
+EXPORT_SYMBOL(mlx5_del_flow_rules);
 
 /* Assuming prio->node.children(flow tables) is sorted by level */
 static struct mlx5_flow_table *find_next_ft(struct mlx5_flow_table *ft)
@@ -1678,7 +1823,7 @@ static int create_anchor_flow_table(struct mlx5_flow_steering *steering)
 	ns = mlx5_get_flow_namespace(steering->dev, MLX5_FLOW_NAMESPACE_ANCHOR);
 	if (!ns)
 		return -EINVAL;
-	ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE, ANCHOR_LEVEL);
+	ft = mlx5_create_flow_table(ns, ANCHOR_PRIO, ANCHOR_SIZE, ANCHOR_LEVEL, 0);
 	if (IS_ERR(ft)) {
 		mlx5_core_err(steering->dev, "Failed to create last anchor flow table");
 		return PTR_ERR(ft);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 71ff03bceabb..8e668c63f69e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -94,6 +94,11 @@ struct mlx5_flow_rule {
 	u32					sw_action;
 };
 
+struct mlx5_flow_handle {
+	int num_rules;
+	struct mlx5_flow_rule *rule[];
+};
+
 /* Type of children is mlx5_flow_group */
 struct mlx5_flow_table {
 	struct fs_node			node;
@@ -112,6 +117,7 @@ struct mlx5_flow_table {
 	struct mutex			lock;
 	/* FWD rules that point on this flow table */
 	struct list_head		fwd_rules;
+	u32				flags;
 };
 
 struct mlx5_fc_cache {
@@ -145,6 +151,7 @@ struct fs_fte {
 	u32				flow_tag;
 	u32				index;
 	u32				action;
+	u32				encap_id;
 	enum fs_fte_status		status;
 	struct mlx5_fc			*counter;
 };
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 3b7c6a9f2b5f..7b4c339a8a9a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -62,13 +62,13 @@ MODULE_DESCRIPTION("Mellanox Connect-IB, ConnectX-4 core driver");
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_VERSION(DRIVER_VERSION);
 
-int mlx5_core_debug_mask;
-module_param_named(debug_mask, mlx5_core_debug_mask, int, 0644);
+unsigned int mlx5_core_debug_mask;
+module_param_named(debug_mask, mlx5_core_debug_mask, uint, 0644);
 MODULE_PARM_DESC(debug_mask, "debug mask: 1 = dump cmd data, 2 = dump cmd exec time, 3 = both. Default=0");
 
 #define MLX5_DEFAULT_PROF	2
-static int prof_sel = MLX5_DEFAULT_PROF;
-module_param_named(prof_sel, prof_sel, int, 0444);
+static unsigned int prof_sel = MLX5_DEFAULT_PROF;
+module_param_named(prof_sel, prof_sel, uint, 0444);
 MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2");
 
 enum {
@@ -174,6 +174,41 @@ static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili)
 	return err;
 }
 
+static void mlx5_set_driver_version(struct mlx5_core_dev *dev)
+{
+	int driver_ver_sz = MLX5_FLD_SZ_BYTES(set_driver_version_in,
+					      driver_version);
+	u8 in[MLX5_ST_SZ_BYTES(set_driver_version_in)] = {0};
+	u8 out[MLX5_ST_SZ_BYTES(set_driver_version_out)] = {0};
+	int remaining_size = driver_ver_sz;
+	char *string;
+
+	if (!MLX5_CAP_GEN(dev, driver_version))
+		return;
+
+	string = MLX5_ADDR_OF(set_driver_version_in, in, driver_version);
+
+	strncpy(string, "Linux", remaining_size);
+
+	remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
+	strncat(string, ",", remaining_size);
+
+	remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
+	strncat(string, DRIVER_NAME, remaining_size);
+
+	remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
+	strncat(string, ",", remaining_size);
+
+	remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
+	strncat(string, DRIVER_VERSION, remaining_size);
+
+	/*Send the command*/
+	MLX5_SET(set_driver_version_in, in, opcode,
+		 MLX5_CMD_OP_SET_DRIVER_VERSION);
+
+	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
 static int set_dma_caps(struct pci_dev *pdev)
 {
 	int err;
@@ -732,13 +767,15 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
 		u8 status;
 
 		mlx5_cmd_mbox_status(query_out, &status, &syndrome);
-		if (status == MLX5_CMD_STAT_BAD_OP_ERR) {
-			pr_debug("Only ISSI 0 is supported\n");
-			return 0;
+		if (!status || syndrome == MLX5_DRIVER_SYND) {
+			mlx5_core_err(dev, "Failed to query ISSI err(%d) status(%d) synd(%d)\n",
+				      err, status, syndrome);
+			return err;
 		}
 
-		pr_err("failed to query ISSI err(%d)\n", err);
-		return err;
+		mlx5_core_warn(dev, "Query ISSI is not supported by FW, ISSI is 0\n");
+		dev->issi = 0;
+		return 0;
 	}
 
 	sup_issi = MLX5_GET(query_issi_out, query_out, supported_issi_dw0);
@@ -752,7 +789,8 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
 		err = mlx5_cmd_exec(dev, set_in, sizeof(set_in),
 				    set_out, sizeof(set_out));
 		if (err) {
-			pr_err("failed to set ISSI=1 err(%d)\n", err);
+			mlx5_core_err(dev, "Failed to set ISSI to 1 err(%d)\n",
+				      err);
 			return err;
 		}
 
@@ -1014,6 +1052,8 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv,
 		goto err_pagealloc_stop;
 	}
 
+	mlx5_set_driver_version(dev);
+
 	mlx5_start_health_poll(dev);
 
 	err = mlx5_query_hca_caps(dev);
@@ -1201,6 +1241,8 @@ static const struct devlink_ops mlx5_devlink_ops = {
 #ifdef CONFIG_MLX5_CORE_EN
 	.eswitch_mode_set = mlx5_devlink_eswitch_mode_set,
 	.eswitch_mode_get = mlx5_devlink_eswitch_mode_get,
+	.eswitch_inline_mode_set = mlx5_devlink_eswitch_inline_mode_set,
+	.eswitch_inline_mode_get = mlx5_devlink_eswitch_inline_mode_get,
 #endif
 };
 
@@ -1227,13 +1269,6 @@ static int init_one(struct pci_dev *pdev,
 
 	dev->pdev = pdev;
 	dev->event = mlx5_core_event;
-
-	if (prof_sel < 0 || prof_sel >= ARRAY_SIZE(profile)) {
-		mlx5_core_warn(dev,
-			       "selected profile out of range, selecting default (%d)\n",
-			       MLX5_DEFAULT_PROF);
-		prof_sel = MLX5_DEFAULT_PROF;
-	}
 	dev->profile = &profile[prof_sel];
 
 	INIT_LIST_HEAD(&priv->ctx_list);
@@ -1423,6 +1458,7 @@ static const struct pci_device_id mlx5_core_pci_table[] = {
 	{ PCI_VDEVICE(MELLANOX, 0x1017) },			/* ConnectX-5, PCIe 3.0 */
 	{ PCI_VDEVICE(MELLANOX, 0x1018), MLX5_PCI_DEV_IS_VF},	/* ConnectX-5 VF */
 	{ PCI_VDEVICE(MELLANOX, 0x1019) },			/* ConnectX-5, PCIe 4.0 */
+	{ PCI_VDEVICE(MELLANOX, 0x101a), MLX5_PCI_DEV_IS_VF},	/* ConnectX-5, PCIe 4.0 VF */
 	{ 0, }
 };
 
@@ -1450,10 +1486,22 @@ static struct pci_driver mlx5_core_driver = {
 	.sriov_configure   = mlx5_core_sriov_configure,
 };
 
+static void mlx5_core_verify_params(void)
+{
+	if (prof_sel >= ARRAY_SIZE(profile)) {
+		pr_warn("mlx5_core: WARNING: Invalid module parameter prof_sel %d, valid range 0-%zu, changing back to default(%d)\n",
+			prof_sel,
+			ARRAY_SIZE(profile) - 1,
+			MLX5_DEFAULT_PROF);
+		prof_sel = MLX5_DEFAULT_PROF;
+	}
+}
+
 static int __init init(void)
 {
 	int err;
 
+	mlx5_core_verify_params();
 	mlx5_register_debugfs();
 
 	err = pci_register_driver(&mlx5_core_driver);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index 187662c8ea96..e0a8fbdd1446 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -44,11 +44,11 @@
 
 #define MLX5_TOTAL_VPORTS(mdev) (1 + pci_sriov_get_totalvfs(mdev->pdev))
 
-extern int mlx5_core_debug_mask;
+extern uint mlx5_core_debug_mask;
 
 #define mlx5_core_dbg(__dev, format, ...)				\
-	dev_dbg(&(__dev)->pdev->dev, "%s:%s:%d:(pid %d): " format,	\
-		 (__dev)->priv.name, __func__, __LINE__, current->pid,	\
+	dev_dbg(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format,		\
+		 __func__, __LINE__, current->pid,			\
 		 ##__VA_ARGS__)
 
 #define mlx5_core_dbg_mask(__dev, mask, format, ...)			\
@@ -63,8 +63,8 @@ do {									\
 	       ##__VA_ARGS__)
 
 #define mlx5_core_warn(__dev, format, ...)				\
-	dev_warn(&(__dev)->pdev->dev, "%s:%s:%d:(pid %d): " format,	\
-		(__dev)->priv.name, __func__, __LINE__, current->pid,	\
+	dev_warn(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format,	\
+		 __func__, __LINE__, current->pid,			\
 		##__VA_ARGS__)
 
 #define mlx5_core_info(__dev, format, ...)				\
@@ -75,12 +75,18 @@ enum {
 	MLX5_CMD_TIME, /* print command execution time */
 };
 
+enum {
+	MLX5_DRIVER_STATUS_ABORTED = 0xfe,
+	MLX5_DRIVER_SYND = 0xbadd00de,
+};
+
 int mlx5_query_hca_caps(struct mlx5_core_dev *dev);
 int mlx5_query_board_id(struct mlx5_core_dev *dev);
 int mlx5_cmd_init_hca(struct mlx5_core_dev *dev);
 int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev);
 void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event,
 		     unsigned long param);
+void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
 void mlx5_enter_error_state(struct mlx5_core_dev *dev);
 void mlx5_disable_device(struct mlx5_core_dev *dev);
 void mlx5_recover_device(struct mlx5_core_dev *dev);
@@ -92,6 +98,13 @@ int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs);
 bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev);
 int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id);
 int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id);
+int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+				       void *context, u32 *element_id);
+int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+				       void *context, u32 element_id,
+				       u32 modify_bitmask);
+int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+					u32 element_id);
 int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev);
 cycle_t mlx5_read_internal_timer(struct mlx5_core_dev *dev);
 u32 mlx5_get_msix_vec(struct mlx5_core_dev *dev, int vecidx);
@@ -114,6 +127,12 @@ struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev);
 void mlx5_dev_list_lock(void);
 void mlx5_dev_list_unlock(void);
 int mlx5_dev_list_trylock(void);
+int mlx5_encap_alloc(struct mlx5_core_dev *dev,
+		     int header_type,
+		     size_t size,
+		     void *encap_header,
+		     u32 *encap_id);
+void mlx5_encap_dealloc(struct mlx5_core_dev *dev, u32 encap_id);
 
 bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 34e7184e23c9..d2ec9d232a70 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -548,6 +548,26 @@ int mlx5_max_tc(struct mlx5_core_dev *mdev)
 	return num_tc - 1;
 }
 
+int mlx5_query_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *out)
+{
+	u32 in[MLX5_ST_SZ_DW(dcbx_param)] = {0};
+
+	MLX5_SET(dcbx_param, in, port_number, 1);
+
+	return  mlx5_core_access_reg(mdev, in, sizeof(in), out,
+				    sizeof(in), MLX5_REG_DCBX_PARAM, 0, 0);
+}
+
+int mlx5_set_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *in)
+{
+	u32 out[MLX5_ST_SZ_DW(dcbx_param)];
+
+	MLX5_SET(dcbx_param, in, port_number, 1);
+
+	return mlx5_core_access_reg(mdev, in, sizeof(out), out,
+				    sizeof(out), MLX5_REG_DCBX_PARAM, 0, 1);
+}
+
 int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc)
 {
 	u32 in[MLX5_ST_SZ_DW(qtct_reg)] = {0};
@@ -572,6 +592,28 @@ int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc)
 }
 EXPORT_SYMBOL_GPL(mlx5_set_port_prio_tc);
 
+int mlx5_query_port_prio_tc(struct mlx5_core_dev *mdev,
+			    u8 prio, u8 *tc)
+{
+	u32 in[MLX5_ST_SZ_DW(qtct_reg)];
+	u32 out[MLX5_ST_SZ_DW(qtct_reg)];
+	int err;
+
+	memset(in, 0, sizeof(in));
+	memset(out, 0, sizeof(out));
+
+	MLX5_SET(qtct_reg, in, port_number, 1);
+	MLX5_SET(qtct_reg, in, prio, prio);
+
+	err = mlx5_core_access_reg(mdev, in, sizeof(in), out,
+				   sizeof(out), MLX5_REG_QTCT, 0, 0);
+	if (!err)
+		*tc = MLX5_GET(qtct_reg, out, tclass);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_prio_tc);
+
 static int mlx5_set_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *in,
 				   int inlen)
 {
@@ -625,6 +667,27 @@ int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw)
 }
 EXPORT_SYMBOL_GPL(mlx5_set_port_tc_bw_alloc);
 
+int mlx5_query_port_tc_bw_alloc(struct mlx5_core_dev *mdev,
+				u8 tc, u8 *bw_pct)
+{
+	u32 out[MLX5_ST_SZ_DW(qetc_reg)];
+	void *ets_tcn_conf;
+	int err;
+
+	err = mlx5_query_port_qetcr_reg(mdev, out, sizeof(out));
+	if (err)
+		return err;
+
+	ets_tcn_conf = MLX5_ADDR_OF(qetc_reg, out,
+				    tc_configuration[tc]);
+
+	*bw_pct = MLX5_GET(ets_tcn_config_reg, ets_tcn_conf,
+			   bw_allocation);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx5_query_port_tc_bw_alloc);
+
 int mlx5_modify_port_ets_rate_limit(struct mlx5_core_dev *mdev,
 				    u8 *max_bw_value,
 				    u8 *max_bw_units)
@@ -746,3 +809,60 @@ void mlx5_query_port_fcs(struct mlx5_core_dev *mdev, bool *supported,
 	*supported = !!(MLX5_GET(pcmr_reg, out, fcs_cap));
 	*enabled = !!(MLX5_GET(pcmr_reg, out, fcs_chk));
 }
+
+static const char *mlx5_pme_status[MLX5_MODULE_STATUS_NUM] = {
+	"Cable plugged",   /* MLX5_MODULE_STATUS_PLUGGED    = 0x1 */
+	"Cable unplugged", /* MLX5_MODULE_STATUS_UNPLUGGED  = 0x2 */
+	"Cable error",     /* MLX5_MODULE_STATUS_ERROR      = 0x3 */
+};
+
+static const char *mlx5_pme_error[MLX5_MODULE_EVENT_ERROR_NUM] = {
+	"Power budget exceeded",
+	"Long Range for non MLNX cable",
+	"Bus stuck(I2C or data shorted)",
+	"No EEPROM/retry timeout",
+	"Enforce part number list",
+	"Unknown identifier",
+	"High Temperature",
+	"Bad or shorted cable/module",
+	"Unknown status",
+};
+
+void mlx5_port_module_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
+{
+	enum port_module_event_status_type module_status;
+	enum port_module_event_error_type error_type;
+	struct mlx5_eqe_port_module *module_event_eqe;
+	struct mlx5_priv *priv = &dev->priv;
+	u8 module_num;
+
+	module_event_eqe = &eqe->data.port_module;
+	module_num = module_event_eqe->module;
+	module_status = module_event_eqe->module_status &
+			PORT_MODULE_EVENT_MODULE_STATUS_MASK;
+	error_type = module_event_eqe->error_type &
+		     PORT_MODULE_EVENT_ERROR_TYPE_MASK;
+
+	if (module_status < MLX5_MODULE_STATUS_ERROR) {
+		priv->pme_stats.status_counters[module_status - 1]++;
+	} else if (module_status == MLX5_MODULE_STATUS_ERROR) {
+		if (error_type >= MLX5_MODULE_EVENT_ERROR_UNKNOWN)
+			/* Unknown error type */
+			error_type = MLX5_MODULE_EVENT_ERROR_UNKNOWN;
+		priv->pme_stats.error_counters[error_type]++;
+	}
+
+	if (!printk_ratelimit())
+		return;
+
+	if (module_status < MLX5_MODULE_STATUS_ERROR)
+		mlx5_core_info(dev,
+			       "Port module event: module %u, %s\n",
+			       module_num, mlx5_pme_status[module_status - 1]);
+
+	else if (module_status == MLX5_MODULE_STATUS_ERROR)
+		mlx5_core_info(dev,
+			       "Port module event[error]: module %u, %s, %s\n",
+			       module_num, mlx5_pme_status[module_status - 1],
+			       mlx5_pme_error[error_type]);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
index 104902a93a0b..e651e4c02867 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
@@ -36,6 +36,71 @@
 #include <linux/mlx5/cmd.h>
 #include "mlx5_core.h"
 
+/* Scheduling element fw management */
+int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+				       void *ctx, u32 *element_id)
+{
+	u32 in[MLX5_ST_SZ_DW(create_scheduling_element_in)]  = {0};
+	u32 out[MLX5_ST_SZ_DW(create_scheduling_element_in)] = {0};
+	void *schedc;
+	int err;
+
+	schedc = MLX5_ADDR_OF(create_scheduling_element_in, in,
+			      scheduling_context);
+	MLX5_SET(create_scheduling_element_in, in, opcode,
+		 MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT);
+	MLX5_SET(create_scheduling_element_in, in, scheduling_hierarchy,
+		 hierarchy);
+	memcpy(schedc, ctx, MLX5_ST_SZ_BYTES(scheduling_context));
+
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	if (err)
+		return err;
+
+	*element_id = MLX5_GET(create_scheduling_element_out, out,
+			       scheduling_element_id);
+	return 0;
+}
+
+int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+				       void *ctx, u32 element_id,
+				       u32 modify_bitmask)
+{
+	u32 in[MLX5_ST_SZ_DW(modify_scheduling_element_in)]  = {0};
+	u32 out[MLX5_ST_SZ_DW(modify_scheduling_element_in)] = {0};
+	void *schedc;
+
+	schedc = MLX5_ADDR_OF(modify_scheduling_element_in, in,
+			      scheduling_context);
+	MLX5_SET(modify_scheduling_element_in, in, opcode,
+		 MLX5_CMD_OP_MODIFY_SCHEDULING_ELEMENT);
+	MLX5_SET(modify_scheduling_element_in, in, scheduling_element_id,
+		 element_id);
+	MLX5_SET(modify_scheduling_element_in, in, modify_bitmask,
+		 modify_bitmask);
+	MLX5_SET(modify_scheduling_element_in, in, scheduling_hierarchy,
+		 hierarchy);
+	memcpy(schedc, ctx, MLX5_ST_SZ_BYTES(scheduling_context));
+
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
+int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
+					u32 element_id)
+{
+	u32 in[MLX5_ST_SZ_DW(destroy_scheduling_element_in)]  = {0};
+	u32 out[MLX5_ST_SZ_DW(destroy_scheduling_element_in)] = {0};
+
+	MLX5_SET(destroy_scheduling_element_in, in, opcode,
+		 MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT);
+	MLX5_SET(destroy_scheduling_element_in, in, scheduling_element_id,
+		 element_id);
+	MLX5_SET(destroy_scheduling_element_in, in, scheduling_hierarchy,
+		 hierarchy);
+
+	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+}
+
 /* Finds an entry where we can register the given rate
  * If the rate already exists, return the entry where it is registered,
  * otherwise return the first available entry.
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 525f17af108e..269e4401c342 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -113,15 +113,17 @@ static int mlx5_modify_nic_vport_context(struct mlx5_core_dev *mdev, void *in,
 	return mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
 }
 
-void mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
-				     u8 *min_inline_mode)
+int mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
+				    u16 vport, u8 *min_inline)
 {
 	u32 out[MLX5_ST_SZ_DW(query_nic_vport_context_out)] = {0};
+	int err;
 
-	mlx5_query_nic_vport_context(mdev, 0, out, sizeof(out));
-
-	*min_inline_mode = MLX5_GET(query_nic_vport_context_out, out,
-				    nic_vport_context.min_wqe_inline_mode);
+	err = mlx5_query_nic_vport_context(mdev, vport, out, sizeof(out));
+	if (!err)
+		*min_inline = MLX5_GET(query_nic_vport_context_out, out,
+				       nic_vport_context.min_wqe_inline_mode);
+	return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_min_inline);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
index 821a087c7ae2..921673c42bc9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c
@@ -101,13 +101,15 @@ err_db_free:
 
 int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		     void *cqc, struct mlx5_cqwq *wq,
-		     struct mlx5_wq_ctrl *wq_ctrl)
+		     struct mlx5_frag_wq_ctrl *wq_ctrl)
 {
 	int err;
 
-	wq->log_stride = 6 + MLX5_GET(cqc, cqc, cqe_sz);
-	wq->log_sz = MLX5_GET(cqc, cqc, log_cq_size);
-	wq->sz_m1 = (1 << wq->log_sz) - 1;
+	wq->log_stride	= 6 + MLX5_GET(cqc, cqc, cqe_sz);
+	wq->log_sz	= MLX5_GET(cqc, cqc, log_cq_size);
+	wq->sz_m1	= (1 << wq->log_sz) - 1;
+	wq->log_frag_strides = PAGE_SHIFT - wq->log_stride;
+	wq->frag_sz_m1	= (1 << wq->log_frag_strides) - 1;
 
 	err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node);
 	if (err) {
@@ -115,14 +117,16 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		return err;
 	}
 
-	err = mlx5_buf_alloc_node(mdev, mlx5_cqwq_get_byte_size(wq),
-				  &wq_ctrl->buf, param->buf_numa_node);
+	err = mlx5_frag_buf_alloc_node(mdev, mlx5_cqwq_get_byte_size(wq),
+				       &wq_ctrl->frag_buf,
+				       param->buf_numa_node);
 	if (err) {
-		mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err);
+		mlx5_core_warn(mdev, "mlx5_frag_buf_alloc_node() failed, %d\n",
+			       err);
 		goto err_db_free;
 	}
 
-	wq->buf = wq_ctrl->buf.direct.buf;
+	wq->frag_buf = wq_ctrl->frag_buf;
 	wq->db  = wq_ctrl->db.db;
 
 	wq_ctrl->mdev = mdev;
@@ -184,3 +188,9 @@ void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl)
 	mlx5_buf_free(wq_ctrl->mdev, &wq_ctrl->buf);
 	mlx5_db_free(wq_ctrl->mdev, &wq_ctrl->db);
 }
+
+void mlx5_cqwq_destroy(struct mlx5_frag_wq_ctrl *wq_ctrl)
+{
+	mlx5_frag_buf_free(wq_ctrl->mdev, &wq_ctrl->frag_buf);
+	mlx5_db_free(wq_ctrl->mdev, &wq_ctrl->db);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
index 6c2a8f95093c..d8afed898c31 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h
@@ -47,6 +47,12 @@ struct mlx5_wq_ctrl {
 	struct mlx5_db		db;
 };
 
+struct mlx5_frag_wq_ctrl {
+	struct mlx5_core_dev	*mdev;
+	struct mlx5_frag_buf	frag_buf;
+	struct mlx5_db		db;
+};
+
 struct mlx5_wq_cyc {
 	void			*buf;
 	__be32			*db;
@@ -55,12 +61,14 @@ struct mlx5_wq_cyc {
 };
 
 struct mlx5_cqwq {
-	void			*buf;
+	struct mlx5_frag_buf	frag_buf;
 	__be32			*db;
 	u32			sz_m1;
+	u32			frag_sz_m1;
 	u32			cc; /* consumer counter */
 	u8			log_sz;
 	u8			log_stride;
+	u8			log_frag_strides;
 };
 
 struct mlx5_wq_ll {
@@ -81,7 +89,7 @@ u32 mlx5_wq_cyc_get_size(struct mlx5_wq_cyc *wq);
 
 int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 		     void *cqc, struct mlx5_cqwq *wq,
-		     struct mlx5_wq_ctrl *wq_ctrl);
+		     struct mlx5_frag_wq_ctrl *wq_ctrl);
 u32 mlx5_cqwq_get_size(struct mlx5_cqwq *wq);
 
 int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
@@ -90,6 +98,7 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param,
 u32 mlx5_wq_ll_get_size(struct mlx5_wq_ll *wq);
 
 void mlx5_wq_destroy(struct mlx5_wq_ctrl *wq_ctrl);
+void mlx5_cqwq_destroy(struct mlx5_frag_wq_ctrl *wq_ctrl);
 
 static inline u16 mlx5_wq_cyc_ctr2ix(struct mlx5_wq_cyc *wq, u16 ctr)
 {
@@ -116,7 +125,10 @@ static inline u32 mlx5_cqwq_get_ci(struct mlx5_cqwq *wq)
 
 static inline void *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix)
 {
-	return wq->buf + (ix << wq->log_stride);
+	unsigned int frag = (ix >> wq->log_frag_strides);
+
+	return wq->frag_buf.frags[frag].buf +
+		((wq->frag_sz_m1 & ix) << wq->log_stride);
 }
 
 static inline u32 mlx5_cqwq_get_wrap_cnt(struct mlx5_cqwq *wq)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Kconfig b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
index 5989f7cb5462..16f44b9aa076 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlxsw/Kconfig
@@ -19,6 +19,15 @@ config MLXSW_CORE_HWMON
 	---help---
 	  Say Y here if you want to expose HWMON interface on mlxsw devices.
 
+config MLXSW_CORE_THERMAL
+	bool "Thermal zone support for Mellanox Technologies Switch ASICs"
+	depends on MLXSW_CORE && THERMAL
+	depends on !(MLXSW_CORE=y && THERMAL=m)
+	default y
+	---help---
+	 Say Y here if you want to automatically control fans speed according
+	 ambient temperature reported by ASIC.
+
 config MLXSW_PCI
 	tristate "PCI bus implementation for Mellanox Technologies Switch ASICs"
 	depends on PCI && HAS_DMA && HAS_IOMEM && MLXSW_CORE
@@ -29,9 +38,30 @@ config MLXSW_PCI
 	  To compile this driver as a module, choose M here: the
 	  module will be called mlxsw_pci.
 
+config MLXSW_I2C
+	tristate "I2C bus implementation for Mellanox Technologies Switch ASICs"
+	depends on I2C && MLXSW_CORE
+	default m
+	---help---
+	  This is I2C bus implementation for Mellanox Technologies Switch ASICs.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called mlxsw_i2c.
+
+config MLXSW_SWITCHIB
+	tristate "Mellanox Technologies SwitchIB and SwitchIB-2 support"
+	depends on MLXSW_CORE && MLXSW_PCI && NET_SWITCHDEV
+	default m
+	---help---
+	  This driver supports Mellanox Technologies SwitchIB and SwitchIB-2
+	  Infiniband Switch ASICs.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called mlxsw_switchib.
+
 config MLXSW_SWITCHX2
 	tristate "Mellanox Technologies SwitchX-2 support"
-	depends on MLXSW_CORE && NET_SWITCHDEV
+	depends on MLXSW_CORE && MLXSW_PCI && NET_SWITCHDEV
 	default m
 	---help---
 	  This driver supports Mellanox Technologies SwitchX-2 Ethernet
@@ -42,7 +72,7 @@ config MLXSW_SWITCHX2
 
 config MLXSW_SPECTRUM
 	tristate "Mellanox Technologies Spectrum support"
-	depends on MLXSW_CORE && NET_SWITCHDEV && VLAN_8021Q
+	depends on MLXSW_CORE && MLXSW_PCI && NET_SWITCHDEV && VLAN_8021Q
 	default m
 	---help---
 	  This driver supports Mellanox Technologies Spectrum Ethernet
@@ -58,3 +88,14 @@ config MLXSW_SPECTRUM_DCB
 	---help---
 	  Say Y here if you want to use Data Center Bridging (DCB) in the
 	  driver.
+
+config MLXSW_MINIMAL
+	tristate "Mellanox Technologies minimal I2C support"
+	depends on MLXSW_CORE && MLXSW_I2C
+	default m
+	---help---
+	  This driver supports I2C access for Mellanox Technologies Switch
+	  ASICs.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called mlxsw_minimal.
diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile
index d20ae1838a64..fe8dadba15ab 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Makefile
+++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile
@@ -1,8 +1,13 @@
 obj-$(CONFIG_MLXSW_CORE)	+= mlxsw_core.o
 mlxsw_core-objs			:= core.o
 mlxsw_core-$(CONFIG_MLXSW_CORE_HWMON) += core_hwmon.o
+mlxsw_core-$(CONFIG_MLXSW_CORE_THERMAL) += core_thermal.o
 obj-$(CONFIG_MLXSW_PCI)		+= mlxsw_pci.o
 mlxsw_pci-objs			:= pci.o
+obj-$(CONFIG_MLXSW_I2C)		+= mlxsw_i2c.o
+mlxsw_i2c-objs			:= i2c.o
+obj-$(CONFIG_MLXSW_SWITCHIB)	+= mlxsw_switchib.o
+mlxsw_switchib-objs		:= switchib.o
 obj-$(CONFIG_MLXSW_SWITCHX2)	+= mlxsw_switchx2.o
 mlxsw_switchx2-objs		:= switchx2.o
 obj-$(CONFIG_MLXSW_SPECTRUM)	+= mlxsw_spectrum.o
@@ -10,3 +15,5 @@ mlxsw_spectrum-objs		:= spectrum.o spectrum_buffers.o \
 				   spectrum_switchdev.o spectrum_router.o \
 				   spectrum_kvdl.o
 mlxsw_spectrum-$(CONFIG_MLXSW_SPECTRUM_DCB)	+= spectrum_dcb.o
+obj-$(CONFIG_MLXSW_MINIMAL)	+= mlxsw_minimal.o
+mlxsw_minimal-objs		:= minimal.o
diff --git a/drivers/net/ethernet/mellanox/mlxsw/cmd.h b/drivers/net/ethernet/mellanox/mlxsw/cmd.h
index 28271bedd957..56e19b0d2f8f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/cmd.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/cmd.h
@@ -513,6 +513,11 @@ static inline int mlxsw_cmd_unmap_fa(struct mlxsw_core *mlxsw_core)
  * are no more sources in the table, will return resource id 0xFFF to indicate
  * it.
  */
+
+#define MLXSW_CMD_QUERY_RESOURCES_TABLE_END_ID 0xffff
+#define MLXSW_CMD_QUERY_RESOURCES_MAX_QUERIES 100
+#define MLXSW_CMD_QUERY_RESOURCES_PER_QUERY 32
+
 static inline int mlxsw_cmd_query_resources(struct mlxsw_core *mlxsw_core,
 					    char *out_mbox, int index)
 {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c
index aa33d58b9f81..57a98849551b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.c
@@ -67,6 +67,7 @@
 #include "trap.h"
 #include "emad.h"
 #include "reg.h"
+#include "resources.h"
 
 static LIST_HEAD(mlxsw_core_driver_list);
 static DEFINE_SPINLOCK(mlxsw_core_driver_list_lock);
@@ -76,6 +77,7 @@ static const char mlxsw_core_driver_name[] = "mlxsw_core";
 static struct dentry *mlxsw_core_dbg_root;
 
 static struct workqueue_struct *mlxsw_wq;
+static struct workqueue_struct *mlxsw_owq;
 
 struct mlxsw_core_pcpu_stats {
 	u64			trap_rx_packets[MLXSW_TRAP_ID_MAX];
@@ -89,6 +91,23 @@ struct mlxsw_core_pcpu_stats {
 	u32			port_rx_invalid;
 };
 
+struct mlxsw_core_port {
+	struct devlink_port devlink_port;
+	void *port_driver_priv;
+	u8 local_port;
+};
+
+void *mlxsw_core_port_driver_priv(struct mlxsw_core_port *mlxsw_core_port)
+{
+	return mlxsw_core_port->port_driver_priv;
+}
+EXPORT_SYMBOL(mlxsw_core_port_driver_priv);
+
+static bool mlxsw_core_port_check(struct mlxsw_core_port *mlxsw_core_port)
+{
+	return mlxsw_core_port->port_driver_priv != NULL;
+}
+
 struct mlxsw_core {
 	struct mlxsw_driver *driver;
 	const struct mlxsw_bus *bus;
@@ -111,8 +130,10 @@ struct mlxsw_core {
 	struct {
 		u8 *mapping; /* lag_id+port_index to local_port mapping */
 	} lag;
-	struct mlxsw_resources resources;
+	struct mlxsw_res res;
 	struct mlxsw_hwmon *hwmon;
+	struct mlxsw_thermal *thermal;
+	struct mlxsw_core_port ports[MLXSW_PORT_MAX_PORTS];
 	unsigned long driver_priv[0];
 	/* driver_priv has to be always the last item */
 };
@@ -552,33 +573,18 @@ free_skb:
 	dev_kfree_skb(skb);
 }
 
-static const struct mlxsw_rx_listener mlxsw_emad_rx_listener = {
-	.func = mlxsw_emad_rx_listener_func,
-	.local_port = MLXSW_PORT_DONT_CARE,
-	.trap_id = MLXSW_TRAP_ID_ETHEMAD,
-};
-
-static int mlxsw_emad_traps_set(struct mlxsw_core *mlxsw_core)
-{
-	char htgt_pl[MLXSW_REG_HTGT_LEN];
-	char hpkt_pl[MLXSW_REG_HPKT_LEN];
-	int err;
-
-	mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_EMAD);
-	err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
-	if (err)
-		return err;
-
-	mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_TRAP_TO_CPU,
-			    MLXSW_TRAP_ID_ETHEMAD);
-	return mlxsw_reg_write(mlxsw_core, MLXSW_REG(hpkt), hpkt_pl);
-}
+static const struct mlxsw_listener mlxsw_emad_rx_listener =
+	MLXSW_RXL(mlxsw_emad_rx_listener_func, ETHEMAD, TRAP_TO_CPU, false,
+		  EMAD, DISCARD);
 
 static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core)
 {
 	u64 tid;
 	int err;
 
+	if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX))
+		return 0;
+
 	/* Set the upper 32 bits of the transaction ID field to a random
 	 * number. This allows us to discard EMADs addressed to other
 	 * devices.
@@ -590,39 +596,33 @@ static int mlxsw_emad_init(struct mlxsw_core *mlxsw_core)
 	INIT_LIST_HEAD(&mlxsw_core->emad.trans_list);
 	spin_lock_init(&mlxsw_core->emad.trans_list_lock);
 
-	err = mlxsw_core_rx_listener_register(mlxsw_core,
-					      &mlxsw_emad_rx_listener,
-					      mlxsw_core);
+	err = mlxsw_core_trap_register(mlxsw_core, &mlxsw_emad_rx_listener,
+				       mlxsw_core);
 	if (err)
 		return err;
 
-	err = mlxsw_emad_traps_set(mlxsw_core);
+	err = mlxsw_core->driver->basic_trap_groups_set(mlxsw_core);
 	if (err)
 		goto err_emad_trap_set;
-
 	mlxsw_core->emad.use_emad = true;
 
 	return 0;
 
 err_emad_trap_set:
-	mlxsw_core_rx_listener_unregister(mlxsw_core,
-					  &mlxsw_emad_rx_listener,
-					  mlxsw_core);
+	mlxsw_core_trap_unregister(mlxsw_core, &mlxsw_emad_rx_listener,
+				   mlxsw_core);
 	return err;
 }
 
 static void mlxsw_emad_fini(struct mlxsw_core *mlxsw_core)
 {
-	char hpkt_pl[MLXSW_REG_HPKT_LEN];
 
-	mlxsw_core->emad.use_emad = false;
-	mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_DISCARD,
-			    MLXSW_TRAP_ID_ETHEMAD);
-	mlxsw_reg_write(mlxsw_core, MLXSW_REG(hpkt), hpkt_pl);
+	if (!(mlxsw_core->bus->features & MLXSW_BUS_F_TXRX))
+		return;
 
-	mlxsw_core_rx_listener_unregister(mlxsw_core,
-					  &mlxsw_emad_rx_listener,
-					  mlxsw_core);
+	mlxsw_core->emad.use_emad = false;
+	mlxsw_core_trap_unregister(mlxsw_core, &mlxsw_emad_rx_listener,
+				   mlxsw_core);
 }
 
 static struct sk_buff *mlxsw_emad_alloc(const struct mlxsw_core *mlxsw_core,
@@ -822,17 +822,6 @@ static struct mlxsw_driver *mlxsw_core_driver_get(const char *kind)
 
 	spin_lock(&mlxsw_core_driver_list_lock);
 	mlxsw_driver = __driver_find(kind);
-	if (!mlxsw_driver) {
-		spin_unlock(&mlxsw_core_driver_list_lock);
-		request_module(MLXSW_MODULE_ALIAS_PREFIX "%s", kind);
-		spin_lock(&mlxsw_core_driver_list_lock);
-		mlxsw_driver = __driver_find(kind);
-	}
-	if (mlxsw_driver) {
-		if (!try_module_get(mlxsw_driver->owner))
-			mlxsw_driver = NULL;
-	}
-
 	spin_unlock(&mlxsw_core_driver_list_lock);
 	return mlxsw_driver;
 }
@@ -844,9 +833,6 @@ static void mlxsw_core_driver_put(const char *kind)
 	spin_lock(&mlxsw_core_driver_list_lock);
 	mlxsw_driver = __driver_find(kind);
 	spin_unlock(&mlxsw_core_driver_list_lock);
-	if (!mlxsw_driver)
-		return;
-	module_put(mlxsw_driver->owner);
 }
 
 static int mlxsw_core_debugfs_init(struct mlxsw_core *mlxsw_core)
@@ -933,6 +919,21 @@ static void *__dl_port(struct devlink_port *devlink_port)
 	return container_of(devlink_port, struct mlxsw_core_port, devlink_port);
 }
 
+static int mlxsw_devlink_port_type_set(struct devlink_port *devlink_port,
+				       enum devlink_port_type port_type)
+{
+	struct mlxsw_core *mlxsw_core = devlink_priv(devlink_port->devlink);
+	struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
+	struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port);
+
+	if (!mlxsw_driver->port_type_set)
+		return -EOPNOTSUPP;
+
+	return mlxsw_driver->port_type_set(mlxsw_core,
+					   mlxsw_core_port->local_port,
+					   port_type);
+}
+
 static int mlxsw_devlink_sb_port_pool_get(struct devlink_port *devlink_port,
 					  unsigned int sb_index, u16 pool_index,
 					  u32 *p_threshold)
@@ -941,7 +942,8 @@ static int mlxsw_devlink_sb_port_pool_get(struct devlink_port *devlink_port,
 	struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
 	struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port);
 
-	if (!mlxsw_driver->sb_port_pool_get)
+	if (!mlxsw_driver->sb_port_pool_get ||
+	    !mlxsw_core_port_check(mlxsw_core_port))
 		return -EOPNOTSUPP;
 	return mlxsw_driver->sb_port_pool_get(mlxsw_core_port, sb_index,
 					      pool_index, p_threshold);
@@ -955,7 +957,8 @@ static int mlxsw_devlink_sb_port_pool_set(struct devlink_port *devlink_port,
 	struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
 	struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port);
 
-	if (!mlxsw_driver->sb_port_pool_set)
+	if (!mlxsw_driver->sb_port_pool_set ||
+	    !mlxsw_core_port_check(mlxsw_core_port))
 		return -EOPNOTSUPP;
 	return mlxsw_driver->sb_port_pool_set(mlxsw_core_port, sb_index,
 					      pool_index, threshold);
@@ -971,7 +974,8 @@ mlxsw_devlink_sb_tc_pool_bind_get(struct devlink_port *devlink_port,
 	struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
 	struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port);
 
-	if (!mlxsw_driver->sb_tc_pool_bind_get)
+	if (!mlxsw_driver->sb_tc_pool_bind_get ||
+	    !mlxsw_core_port_check(mlxsw_core_port))
 		return -EOPNOTSUPP;
 	return mlxsw_driver->sb_tc_pool_bind_get(mlxsw_core_port, sb_index,
 						 tc_index, pool_type,
@@ -988,7 +992,8 @@ mlxsw_devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port,
 	struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
 	struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port);
 
-	if (!mlxsw_driver->sb_tc_pool_bind_set)
+	if (!mlxsw_driver->sb_tc_pool_bind_set ||
+	    !mlxsw_core_port_check(mlxsw_core_port))
 		return -EOPNOTSUPP;
 	return mlxsw_driver->sb_tc_pool_bind_set(mlxsw_core_port, sb_index,
 						 tc_index, pool_type,
@@ -1026,7 +1031,8 @@ mlxsw_devlink_sb_occ_port_pool_get(struct devlink_port *devlink_port,
 	struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
 	struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port);
 
-	if (!mlxsw_driver->sb_occ_port_pool_get)
+	if (!mlxsw_driver->sb_occ_port_pool_get ||
+	    !mlxsw_core_port_check(mlxsw_core_port))
 		return -EOPNOTSUPP;
 	return mlxsw_driver->sb_occ_port_pool_get(mlxsw_core_port, sb_index,
 						  pool_index, p_cur, p_max);
@@ -1042,7 +1048,8 @@ mlxsw_devlink_sb_occ_tc_port_bind_get(struct devlink_port *devlink_port,
 	struct mlxsw_driver *mlxsw_driver = mlxsw_core->driver;
 	struct mlxsw_core_port *mlxsw_core_port = __dl_port(devlink_port);
 
-	if (!mlxsw_driver->sb_occ_tc_port_bind_get)
+	if (!mlxsw_driver->sb_occ_tc_port_bind_get ||
+	    !mlxsw_core_port_check(mlxsw_core_port))
 		return -EOPNOTSUPP;
 	return mlxsw_driver->sb_occ_tc_port_bind_get(mlxsw_core_port,
 						     sb_index, tc_index,
@@ -1050,6 +1057,7 @@ mlxsw_devlink_sb_occ_tc_port_bind_get(struct devlink_port *devlink_port,
 }
 
 static const struct devlink_ops mlxsw_devlink_ops = {
+	.port_type_set			= mlxsw_devlink_port_type_set,
 	.port_split			= mlxsw_devlink_port_split,
 	.port_unsplit			= mlxsw_devlink_port_unsplit,
 	.sb_pool_get			= mlxsw_devlink_sb_pool_get,
@@ -1101,14 +1109,15 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
 	}
 
 	err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile,
-			      &mlxsw_core->resources);
+			      &mlxsw_core->res);
 	if (err)
 		goto err_bus_init;
 
-	if (mlxsw_core->resources.max_lag_valid &&
-	    mlxsw_core->resources.max_ports_in_lag_valid) {
-		alloc_size = sizeof(u8) * mlxsw_core->resources.max_lag *
-			mlxsw_core->resources.max_ports_in_lag;
+	if (MLXSW_CORE_RES_VALID(mlxsw_core, MAX_LAG) &&
+	    MLXSW_CORE_RES_VALID(mlxsw_core, MAX_LAG_MEMBERS)) {
+		alloc_size = sizeof(u8) *
+			MLXSW_CORE_RES_GET(mlxsw_core, MAX_LAG) *
+			MLXSW_CORE_RES_GET(mlxsw_core, MAX_LAG_MEMBERS);
 		mlxsw_core->lag.mapping = kzalloc(alloc_size, GFP_KERNEL);
 		if (!mlxsw_core->lag.mapping) {
 			err = -ENOMEM;
@@ -1128,9 +1137,16 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
 	if (err)
 		goto err_hwmon_init;
 
-	err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info);
+	err = mlxsw_thermal_init(mlxsw_core, mlxsw_bus_info,
+				 &mlxsw_core->thermal);
 	if (err)
-		goto err_driver_init;
+		goto err_thermal_init;
+
+	if (mlxsw_driver->init) {
+		err = mlxsw_driver->init(mlxsw_core, mlxsw_bus_info);
+		if (err)
+			goto err_driver_init;
+	}
 
 	err = mlxsw_core_debugfs_init(mlxsw_core);
 	if (err)
@@ -1139,8 +1155,11 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info,
 	return 0;
 
 err_debugfs_init:
-	mlxsw_core->driver->fini(mlxsw_core);
+	if (mlxsw_core->driver->fini)
+		mlxsw_core->driver->fini(mlxsw_core);
 err_driver_init:
+	mlxsw_thermal_fini(mlxsw_core->thermal);
+err_thermal_init:
 err_hwmon_init:
 	devlink_unregister(devlink);
 err_devlink_register:
@@ -1165,11 +1184,13 @@ void mlxsw_core_bus_device_unregister(struct mlxsw_core *mlxsw_core)
 	struct devlink *devlink = priv_to_devlink(mlxsw_core);
 
 	mlxsw_core_debugfs_fini(mlxsw_core);
-	mlxsw_core->driver->fini(mlxsw_core);
+	if (mlxsw_core->driver->fini)
+		mlxsw_core->driver->fini(mlxsw_core);
+	mlxsw_thermal_fini(mlxsw_core->thermal);
 	devlink_unregister(devlink);
 	mlxsw_emad_fini(mlxsw_core);
-	mlxsw_core->bus->fini(mlxsw_core->bus_priv);
 	kfree(mlxsw_core->lag.mapping);
+	mlxsw_core->bus->fini(mlxsw_core->bus_priv);
 	free_percpu(mlxsw_core->pcpu_stats);
 	devlink_free(devlink);
 	mlxsw_core_driver_put(device_kind);
@@ -1346,6 +1367,75 @@ void mlxsw_core_event_listener_unregister(struct mlxsw_core *mlxsw_core,
 }
 EXPORT_SYMBOL(mlxsw_core_event_listener_unregister);
 
+static int mlxsw_core_listener_register(struct mlxsw_core *mlxsw_core,
+					const struct mlxsw_listener *listener,
+					void *priv)
+{
+	if (listener->is_event)
+		return mlxsw_core_event_listener_register(mlxsw_core,
+						&listener->u.event_listener,
+						priv);
+	else
+		return mlxsw_core_rx_listener_register(mlxsw_core,
+						&listener->u.rx_listener,
+						priv);
+}
+
+static void mlxsw_core_listener_unregister(struct mlxsw_core *mlxsw_core,
+				      const struct mlxsw_listener *listener,
+				      void *priv)
+{
+	if (listener->is_event)
+		mlxsw_core_event_listener_unregister(mlxsw_core,
+						     &listener->u.event_listener,
+						     priv);
+	else
+		mlxsw_core_rx_listener_unregister(mlxsw_core,
+						  &listener->u.rx_listener,
+						  priv);
+}
+
+int mlxsw_core_trap_register(struct mlxsw_core *mlxsw_core,
+			     const struct mlxsw_listener *listener, void *priv)
+{
+	char hpkt_pl[MLXSW_REG_HPKT_LEN];
+	int err;
+
+	err = mlxsw_core_listener_register(mlxsw_core, listener, priv);
+	if (err)
+		return err;
+
+	mlxsw_reg_hpkt_pack(hpkt_pl, listener->action, listener->trap_id,
+			    listener->trap_group, listener->is_ctrl);
+	err = mlxsw_reg_write(mlxsw_core,  MLXSW_REG(hpkt), hpkt_pl);
+	if (err)
+		goto err_trap_set;
+
+	return 0;
+
+err_trap_set:
+	mlxsw_core_listener_unregister(mlxsw_core, listener, priv);
+	return err;
+}
+EXPORT_SYMBOL(mlxsw_core_trap_register);
+
+void mlxsw_core_trap_unregister(struct mlxsw_core *mlxsw_core,
+				const struct mlxsw_listener *listener,
+				void *priv)
+{
+	char hpkt_pl[MLXSW_REG_HPKT_LEN];
+
+	if (!listener->is_event) {
+		mlxsw_reg_hpkt_pack(hpkt_pl, listener->unreg_action,
+				    listener->trap_id, listener->trap_group,
+				    listener->is_ctrl);
+		mlxsw_reg_write(mlxsw_core, MLXSW_REG(hpkt), hpkt_pl);
+	}
+
+	mlxsw_core_listener_unregister(mlxsw_core, listener, priv);
+}
+EXPORT_SYMBOL(mlxsw_core_trap_unregister);
+
 static u64 mlxsw_core_tid_get(struct mlxsw_core *mlxsw_core)
 {
 	return atomic64_inc_return(&mlxsw_core->emad.tid);
@@ -1615,7 +1705,7 @@ EXPORT_SYMBOL(mlxsw_core_skb_receive);
 static int mlxsw_core_lag_mapping_index(struct mlxsw_core *mlxsw_core,
 					u16 lag_id, u8 port_index)
 {
-	return mlxsw_core->resources.max_ports_in_lag * lag_id +
+	return MLXSW_CORE_RES_GET(mlxsw_core, MAX_LAG_MEMBERS) * lag_id +
 	       port_index;
 }
 
@@ -1644,7 +1734,7 @@ void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core,
 {
 	int i;
 
-	for (i = 0; i < mlxsw_core->resources.max_ports_in_lag; i++) {
+	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_core, MAX_LAG_MEMBERS); i++) {
 		int index = mlxsw_core_lag_mapping_index(mlxsw_core,
 							 lag_id, i);
 
@@ -1654,34 +1744,97 @@ void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core,
 }
 EXPORT_SYMBOL(mlxsw_core_lag_mapping_clear);
 
-struct mlxsw_resources *mlxsw_core_resources_get(struct mlxsw_core *mlxsw_core)
+bool mlxsw_core_res_valid(struct mlxsw_core *mlxsw_core,
+			  enum mlxsw_res_id res_id)
 {
-	return &mlxsw_core->resources;
+	return mlxsw_res_valid(&mlxsw_core->res, res_id);
 }
-EXPORT_SYMBOL(mlxsw_core_resources_get);
+EXPORT_SYMBOL(mlxsw_core_res_valid);
 
-int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core,
-			 struct mlxsw_core_port *mlxsw_core_port, u8 local_port,
-			 struct net_device *dev, bool split, u32 split_group)
+u64 mlxsw_core_res_get(struct mlxsw_core *mlxsw_core,
+		       enum mlxsw_res_id res_id)
+{
+	return mlxsw_res_get(&mlxsw_core->res, res_id);
+}
+EXPORT_SYMBOL(mlxsw_core_res_get);
+
+int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u8 local_port)
 {
 	struct devlink *devlink = priv_to_devlink(mlxsw_core);
+	struct mlxsw_core_port *mlxsw_core_port =
+					&mlxsw_core->ports[local_port];
 	struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port;
+	int err;
 
-	if (split)
-		devlink_port_split_set(devlink_port, split_group);
-	devlink_port_type_eth_set(devlink_port, dev);
-	return devlink_port_register(devlink, devlink_port, local_port);
+	mlxsw_core_port->local_port = local_port;
+	err = devlink_port_register(devlink, devlink_port, local_port);
+	if (err)
+		memset(mlxsw_core_port, 0, sizeof(*mlxsw_core_port));
+	return err;
 }
 EXPORT_SYMBOL(mlxsw_core_port_init);
 
-void mlxsw_core_port_fini(struct mlxsw_core_port *mlxsw_core_port)
+void mlxsw_core_port_fini(struct mlxsw_core *mlxsw_core, u8 local_port)
 {
+	struct mlxsw_core_port *mlxsw_core_port =
+					&mlxsw_core->ports[local_port];
 	struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port;
 
 	devlink_port_unregister(devlink_port);
+	memset(mlxsw_core_port, 0, sizeof(*mlxsw_core_port));
 }
 EXPORT_SYMBOL(mlxsw_core_port_fini);
 
+void mlxsw_core_port_eth_set(struct mlxsw_core *mlxsw_core, u8 local_port,
+			     void *port_driver_priv, struct net_device *dev,
+			     bool split, u32 split_group)
+{
+	struct mlxsw_core_port *mlxsw_core_port =
+					&mlxsw_core->ports[local_port];
+	struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port;
+
+	mlxsw_core_port->port_driver_priv = port_driver_priv;
+	if (split)
+		devlink_port_split_set(devlink_port, split_group);
+	devlink_port_type_eth_set(devlink_port, dev);
+}
+EXPORT_SYMBOL(mlxsw_core_port_eth_set);
+
+void mlxsw_core_port_ib_set(struct mlxsw_core *mlxsw_core, u8 local_port,
+			    void *port_driver_priv)
+{
+	struct mlxsw_core_port *mlxsw_core_port =
+					&mlxsw_core->ports[local_port];
+	struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port;
+
+	mlxsw_core_port->port_driver_priv = port_driver_priv;
+	devlink_port_type_ib_set(devlink_port, NULL);
+}
+EXPORT_SYMBOL(mlxsw_core_port_ib_set);
+
+void mlxsw_core_port_clear(struct mlxsw_core *mlxsw_core, u8 local_port,
+			   void *port_driver_priv)
+{
+	struct mlxsw_core_port *mlxsw_core_port =
+					&mlxsw_core->ports[local_port];
+	struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port;
+
+	mlxsw_core_port->port_driver_priv = port_driver_priv;
+	devlink_port_type_clear(devlink_port);
+}
+EXPORT_SYMBOL(mlxsw_core_port_clear);
+
+enum devlink_port_type mlxsw_core_port_type_get(struct mlxsw_core *mlxsw_core,
+						u8 local_port)
+{
+	struct mlxsw_core_port *mlxsw_core_port =
+					&mlxsw_core->ports[local_port];
+	struct devlink_port *devlink_port = &mlxsw_core_port->devlink_port;
+
+	return devlink_port->type;
+}
+EXPORT_SYMBOL(mlxsw_core_port_type_get);
+
 static void mlxsw_core_buf_dump_dbg(struct mlxsw_core *mlxsw_core,
 				    const char *buf, size_t size)
 {
@@ -1748,6 +1901,18 @@ int mlxsw_core_schedule_dw(struct delayed_work *dwork, unsigned long delay)
 }
 EXPORT_SYMBOL(mlxsw_core_schedule_dw);
 
+int mlxsw_core_schedule_odw(struct delayed_work *dwork, unsigned long delay)
+{
+	return queue_delayed_work(mlxsw_owq, dwork, delay);
+}
+EXPORT_SYMBOL(mlxsw_core_schedule_odw);
+
+void mlxsw_core_flush_owq(void)
+{
+	flush_workqueue(mlxsw_owq);
+}
+EXPORT_SYMBOL(mlxsw_core_flush_owq);
+
 static int __init mlxsw_core_module_init(void)
 {
 	int err;
@@ -1755,6 +1920,12 @@ static int __init mlxsw_core_module_init(void)
 	mlxsw_wq = alloc_workqueue(mlxsw_core_driver_name, WQ_MEM_RECLAIM, 0);
 	if (!mlxsw_wq)
 		return -ENOMEM;
+	mlxsw_owq = alloc_ordered_workqueue("%s_ordered", WQ_MEM_RECLAIM,
+					    mlxsw_core_driver_name);
+	if (!mlxsw_owq) {
+		err = -ENOMEM;
+		goto err_alloc_ordered_workqueue;
+	}
 	mlxsw_core_dbg_root = debugfs_create_dir(mlxsw_core_driver_name, NULL);
 	if (!mlxsw_core_dbg_root) {
 		err = -ENOMEM;
@@ -1763,6 +1934,8 @@ static int __init mlxsw_core_module_init(void)
 	return 0;
 
 err_debugfs_create_dir:
+	destroy_workqueue(mlxsw_owq);
+err_alloc_ordered_workqueue:
 	destroy_workqueue(mlxsw_wq);
 	return err;
 }
@@ -1770,6 +1943,7 @@ err_debugfs_create_dir:
 static void __exit mlxsw_core_module_exit(void)
 {
 	debugfs_remove_recursive(mlxsw_core_dbg_root);
+	destroy_workqueue(mlxsw_owq);
 	destroy_workqueue(mlxsw_wq);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h
index c4f550b6f783..a7f94fbc898b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/core.h
@@ -48,17 +48,11 @@
 
 #include "trap.h"
 #include "reg.h"
-
 #include "cmd.h"
-
-#define MLXSW_MODULE_ALIAS_PREFIX "mlxsw-driver-"
-#define MODULE_MLXSW_DRIVER_ALIAS(kind)	\
-	MODULE_ALIAS(MLXSW_MODULE_ALIAS_PREFIX kind)
-
-#define MLXSW_DEVICE_KIND_SWITCHX2 "switchx2"
-#define MLXSW_DEVICE_KIND_SPECTRUM "spectrum"
+#include "resources.h"
 
 struct mlxsw_core;
+struct mlxsw_core_port;
 struct mlxsw_driver;
 struct mlxsw_bus;
 struct mlxsw_bus_info;
@@ -96,6 +90,50 @@ struct mlxsw_event_listener {
 	enum mlxsw_event_trap_id trap_id;
 };
 
+struct mlxsw_listener {
+	u16 trap_id;
+	union {
+		struct mlxsw_rx_listener rx_listener;
+		struct mlxsw_event_listener event_listener;
+	} u;
+	enum mlxsw_reg_hpkt_action action;
+	enum mlxsw_reg_hpkt_action unreg_action;
+	u8 trap_group;
+	bool is_ctrl; /* should go via control buffer or not */
+	bool is_event;
+};
+
+#define MLXSW_RXL(_func, _trap_id, _action, _is_ctrl, _trap_group,	\
+		  _unreg_action)					\
+	{								\
+		.trap_id = MLXSW_TRAP_ID_##_trap_id,			\
+		.u.rx_listener =					\
+		{							\
+			.func = _func,					\
+			.local_port = MLXSW_PORT_DONT_CARE,		\
+			.trap_id = MLXSW_TRAP_ID_##_trap_id,		\
+		},							\
+		.action = MLXSW_REG_HPKT_ACTION_##_action,		\
+		.unreg_action = MLXSW_REG_HPKT_ACTION_##_unreg_action,	\
+		.trap_group = MLXSW_REG_HTGT_TRAP_GROUP_##_trap_group,	\
+		.is_ctrl = _is_ctrl,					\
+		.is_event = false,					\
+	}
+
+#define MLXSW_EVENTL(_func, _trap_id, _trap_group)			\
+	{								\
+		.trap_id = MLXSW_TRAP_ID_##_trap_id,			\
+		.u.event_listener =					\
+		{							\
+			.func = _func,					\
+			.trap_id = MLXSW_TRAP_ID_##_trap_id,		\
+		},							\
+		.action = MLXSW_REG_HPKT_ACTION_TRAP_TO_CPU,		\
+		.trap_group = MLXSW_REG_HTGT_TRAP_GROUP_##_trap_group,	\
+		.is_ctrl = false,					\
+		.is_event = true,					\
+	}
+
 int mlxsw_core_rx_listener_register(struct mlxsw_core *mlxsw_core,
 				    const struct mlxsw_rx_listener *rxl,
 				    void *priv);
@@ -110,6 +148,13 @@ void mlxsw_core_event_listener_unregister(struct mlxsw_core *mlxsw_core,
 					  const struct mlxsw_event_listener *el,
 					  void *priv);
 
+int mlxsw_core_trap_register(struct mlxsw_core *mlxsw_core,
+			     const struct mlxsw_listener *listener,
+			     void *priv);
+void mlxsw_core_trap_unregister(struct mlxsw_core *mlxsw_core,
+				const struct mlxsw_listener *listener,
+				void *priv);
+
 typedef void mlxsw_reg_trans_cb_t(struct mlxsw_core *mlxsw_core, char *payload,
 				  size_t payload_len, unsigned long cb_priv);
 
@@ -148,25 +193,22 @@ u8 mlxsw_core_lag_mapping_get(struct mlxsw_core *mlxsw_core,
 void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core,
 				  u16 lag_id, u8 local_port);
 
-struct mlxsw_core_port {
-	struct devlink_port devlink_port;
-};
-
-static inline void *
-mlxsw_core_port_driver_priv(struct mlxsw_core_port *mlxsw_core_port)
-{
-	/* mlxsw_core_port is ensured to always be the first field in driver
-	 * port structure.
-	 */
-	return mlxsw_core_port;
-}
-
-int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core,
-			 struct mlxsw_core_port *mlxsw_core_port, u8 local_port,
-			 struct net_device *dev, bool split, u32 split_group);
-void mlxsw_core_port_fini(struct mlxsw_core_port *mlxsw_core_port);
+void *mlxsw_core_port_driver_priv(struct mlxsw_core_port *mlxsw_core_port);
+int mlxsw_core_port_init(struct mlxsw_core *mlxsw_core, u8 local_port);
+void mlxsw_core_port_fini(struct mlxsw_core *mlxsw_core, u8 local_port);
+void mlxsw_core_port_eth_set(struct mlxsw_core *mlxsw_core, u8 local_port,
+			     void *port_driver_priv, struct net_device *dev,
+			     bool split, u32 split_group);
+void mlxsw_core_port_ib_set(struct mlxsw_core *mlxsw_core, u8 local_port,
+			    void *port_driver_priv);
+void mlxsw_core_port_clear(struct mlxsw_core *mlxsw_core, u8 local_port,
+			   void *port_driver_priv);
+enum devlink_port_type mlxsw_core_port_type_get(struct mlxsw_core *mlxsw_core,
+						u8 local_port);
 
 int mlxsw_core_schedule_dw(struct delayed_work *dwork, unsigned long delay);
+int mlxsw_core_schedule_odw(struct delayed_work *dwork, unsigned long delay);
+void mlxsw_core_flush_owq(void);
 
 #define MLXSW_CONFIG_PROFILE_SWID_COUNT 8
 
@@ -221,11 +263,13 @@ struct mlxsw_config_profile {
 struct mlxsw_driver {
 	struct list_head list;
 	const char *kind;
-	struct module *owner;
 	size_t priv_size;
 	int (*init)(struct mlxsw_core *mlxsw_core,
 		    const struct mlxsw_bus_info *mlxsw_bus_info);
 	void (*fini)(struct mlxsw_core *mlxsw_core);
+	int (*basic_trap_groups_set)(struct mlxsw_core *mlxsw_core);
+	int (*port_type_set)(struct mlxsw_core *mlxsw_core, u8 local_port,
+			     enum devlink_port_type new_type);
 	int (*port_split)(struct mlxsw_core *mlxsw_core, u8 local_port,
 			  unsigned int count);
 	int (*port_unsplit)(struct mlxsw_core *mlxsw_core, u8 local_port);
@@ -266,45 +310,25 @@ struct mlxsw_driver {
 	const struct mlxsw_config_profile *profile;
 };
 
-struct mlxsw_resources {
-	u32	max_span_valid:1,
-		max_lag_valid:1,
-		max_ports_in_lag_valid:1,
-		kvd_size_valid:1,
-		kvd_single_min_size_valid:1,
-		kvd_double_min_size_valid:1,
-		max_virtual_routers_valid:1,
-		max_system_ports_valid:1,
-		max_vlan_groups_valid:1,
-		max_regions_valid:1,
-		max_rif_valid:1;
-	u8      max_span;
-	u8	max_lag;
-	u8	max_ports_in_lag;
-	u32	kvd_size;
-	u32	kvd_single_min_size;
-	u32	kvd_double_min_size;
-	u16     max_virtual_routers;
-	u16	max_system_ports;
-	u16	max_vlan_groups;
-	u16	max_regions;
-	u16	max_rif;
+bool mlxsw_core_res_valid(struct mlxsw_core *mlxsw_core,
+			  enum mlxsw_res_id res_id);
 
-	/* Internal resources.
-	 * Determined by the SW, not queried from the HW.
-	 */
-	u32	kvd_single_size;
-	u32	kvd_double_size;
-	u32	kvd_linear_size;
-};
+#define MLXSW_CORE_RES_VALID(res, short_res_id)			\
+	mlxsw_core_res_valid(res, MLXSW_RES_ID_##short_res_id)
+
+u64 mlxsw_core_res_get(struct mlxsw_core *mlxsw_core,
+		       enum mlxsw_res_id res_id);
 
-struct mlxsw_resources *mlxsw_core_resources_get(struct mlxsw_core *mlxsw_core);
+#define MLXSW_CORE_RES_GET(res, short_res_id)			\
+	mlxsw_core_res_get(res, MLXSW_RES_ID_##short_res_id)
+
+#define MLXSW_BUS_F_TXRX	BIT(0)
 
 struct mlxsw_bus {
 	const char *kind;
 	int (*init)(void *bus_priv, struct mlxsw_core *mlxsw_core,
 		    const struct mlxsw_config_profile *profile,
-		    struct mlxsw_resources *resources);
+		    struct mlxsw_res *res);
 	void (*fini)(void *bus_priv);
 	bool (*skb_transmit_busy)(void *bus_priv,
 				  const struct mlxsw_tx_info *tx_info);
@@ -315,6 +339,7 @@ struct mlxsw_bus {
 			char *in_mbox, size_t in_mbox_size,
 			char *out_mbox, size_t out_mbox_size,
 			u8 *p_status);
+	u8 features;
 };
 
 struct mlxsw_bus_info {
@@ -350,4 +375,28 @@ static inline int mlxsw_hwmon_init(struct mlxsw_core *mlxsw_core,
 
 #endif
 
+struct mlxsw_thermal;
+
+#ifdef CONFIG_MLXSW_CORE_THERMAL
+
+int mlxsw_thermal_init(struct mlxsw_core *mlxsw_core,
+		       const struct mlxsw_bus_info *mlxsw_bus_info,
+		       struct mlxsw_thermal **p_thermal);
+void mlxsw_thermal_fini(struct mlxsw_thermal *thermal);
+
+#else
+
+static inline int mlxsw_thermal_init(struct mlxsw_core *mlxsw_core,
+				     const struct mlxsw_bus_info *mlxsw_bus_info,
+				     struct mlxsw_thermal **p_thermal)
+{
+	return 0;
+}
+
+static inline void mlxsw_thermal_fini(struct mlxsw_thermal *thermal)
+{
+}
+
+#endif
+
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
index 1ac8bf187168..ab710e37af99 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_hwmon.c
@@ -262,7 +262,7 @@ static void mlxsw_hwmon_attr_add(struct mlxsw_hwmon *mlxsw_hwmon,
 
 static int mlxsw_hwmon_temp_init(struct mlxsw_hwmon *mlxsw_hwmon)
 {
-	char mtcap_pl[MLXSW_REG_MTCAP_LEN];
+	char mtcap_pl[MLXSW_REG_MTCAP_LEN] = {0};
 	char mtmp_pl[MLXSW_REG_MTMP_LEN];
 	u8 sensor_count;
 	int i;
@@ -295,7 +295,7 @@ static int mlxsw_hwmon_temp_init(struct mlxsw_hwmon *mlxsw_hwmon)
 
 static int mlxsw_hwmon_fans_init(struct mlxsw_hwmon *mlxsw_hwmon)
 {
-	char mfcr_pl[MLXSW_REG_MFCR_LEN];
+	char mfcr_pl[MLXSW_REG_MFCR_LEN] = {0};
 	enum mlxsw_reg_mfcr_pwm_frequency freq;
 	unsigned int type_index;
 	unsigned int num;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
new file mode 100644
index 000000000000..d866c98c1a97
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
@@ -0,0 +1,442 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/core_thermal.c
+ * Copyright (c) 2016 Ivan Vecera <cera@cera.cz>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/sysfs.h>
+#include <linux/thermal.h>
+#include <linux/err.h>
+
+#include "core.h"
+
+#define MLXSW_THERMAL_POLL_INT	1000	/* ms */
+#define MLXSW_THERMAL_MAX_TEMP	110000	/* 110C */
+#define MLXSW_THERMAL_MAX_STATE	10
+#define MLXSW_THERMAL_MAX_DUTY	255
+
+struct mlxsw_thermal_trip {
+	int	type;
+	int	temp;
+	int	min_state;
+	int	max_state;
+};
+
+static const struct mlxsw_thermal_trip default_thermal_trips[] = {
+	{	/* In range - 0-40% PWM */
+		.type		= THERMAL_TRIP_ACTIVE,
+		.temp		= 75000,
+		.min_state	= 0,
+		.max_state	= (4 * MLXSW_THERMAL_MAX_STATE) / 10,
+	},
+	{	/* High - 40-100% PWM */
+		.type		= THERMAL_TRIP_ACTIVE,
+		.temp		= 80000,
+		.min_state	= (4 * MLXSW_THERMAL_MAX_STATE) / 10,
+		.max_state	= MLXSW_THERMAL_MAX_STATE,
+	},
+	{
+		/* Very high - 100% PWM */
+		.type		= THERMAL_TRIP_ACTIVE,
+		.temp		= 85000,
+		.min_state	= MLXSW_THERMAL_MAX_STATE,
+		.max_state	= MLXSW_THERMAL_MAX_STATE,
+	},
+	{	/* Warning */
+		.type		= THERMAL_TRIP_HOT,
+		.temp		= 105000,
+		.min_state	= MLXSW_THERMAL_MAX_STATE,
+		.max_state	= MLXSW_THERMAL_MAX_STATE,
+	},
+	{	/* Critical - soft poweroff */
+		.type		= THERMAL_TRIP_CRITICAL,
+		.temp		= MLXSW_THERMAL_MAX_TEMP,
+		.min_state	= MLXSW_THERMAL_MAX_STATE,
+		.max_state	= MLXSW_THERMAL_MAX_STATE,
+	}
+};
+
+#define MLXSW_THERMAL_NUM_TRIPS	ARRAY_SIZE(default_thermal_trips)
+
+/* Make sure all trips are writable */
+#define MLXSW_THERMAL_TRIP_MASK	(BIT(MLXSW_THERMAL_NUM_TRIPS) - 1)
+
+struct mlxsw_thermal {
+	struct mlxsw_core *core;
+	const struct mlxsw_bus_info *bus_info;
+	struct thermal_zone_device *tzdev;
+	struct thermal_cooling_device *cdevs[MLXSW_MFCR_PWMS_MAX];
+	struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS];
+	enum thermal_device_mode mode;
+};
+
+static inline u8 mlxsw_state_to_duty(int state)
+{
+	return DIV_ROUND_CLOSEST(state * MLXSW_THERMAL_MAX_DUTY,
+				 MLXSW_THERMAL_MAX_STATE);
+}
+
+static inline int mlxsw_duty_to_state(u8 duty)
+{
+	return DIV_ROUND_CLOSEST(duty * MLXSW_THERMAL_MAX_STATE,
+				 MLXSW_THERMAL_MAX_DUTY);
+}
+
+static int mlxsw_get_cooling_device_idx(struct mlxsw_thermal *thermal,
+					struct thermal_cooling_device *cdev)
+{
+	int i;
+
+	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++)
+		if (thermal->cdevs[i] == cdev)
+			return i;
+
+	return -ENODEV;
+}
+
+static int mlxsw_thermal_bind(struct thermal_zone_device *tzdev,
+			      struct thermal_cooling_device *cdev)
+{
+	struct mlxsw_thermal *thermal = tzdev->devdata;
+	struct device *dev = thermal->bus_info->dev;
+	int i, err;
+
+	/* If the cooling device is one of ours bind it */
+	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
+		return 0;
+
+	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
+		const struct mlxsw_thermal_trip *trip = &thermal->trips[i];
+
+		err = thermal_zone_bind_cooling_device(tzdev, i, cdev,
+						       trip->max_state,
+						       trip->min_state,
+						       THERMAL_WEIGHT_DEFAULT);
+		if (err < 0) {
+			dev_err(dev, "Failed to bind cooling device to trip %d\n", i);
+			return err;
+		}
+	}
+	return 0;
+}
+
+static int mlxsw_thermal_unbind(struct thermal_zone_device *tzdev,
+				struct thermal_cooling_device *cdev)
+{
+	struct mlxsw_thermal *thermal = tzdev->devdata;
+	struct device *dev = thermal->bus_info->dev;
+	int i;
+	int err;
+
+	/* If the cooling device is our one unbind it */
+	if (mlxsw_get_cooling_device_idx(thermal, cdev) < 0)
+		return 0;
+
+	for (i = 0; i < MLXSW_THERMAL_NUM_TRIPS; i++) {
+		err = thermal_zone_unbind_cooling_device(tzdev, i, cdev);
+		if (err < 0) {
+			dev_err(dev, "Failed to unbind cooling device\n");
+			return err;
+		}
+	}
+	return 0;
+}
+
+static int mlxsw_thermal_get_mode(struct thermal_zone_device *tzdev,
+				  enum thermal_device_mode *mode)
+{
+	struct mlxsw_thermal *thermal = tzdev->devdata;
+
+	*mode = thermal->mode;
+
+	return 0;
+}
+
+static int mlxsw_thermal_set_mode(struct thermal_zone_device *tzdev,
+				  enum thermal_device_mode mode)
+{
+	struct mlxsw_thermal *thermal = tzdev->devdata;
+
+	mutex_lock(&tzdev->lock);
+
+	if (mode == THERMAL_DEVICE_ENABLED)
+		tzdev->polling_delay = MLXSW_THERMAL_POLL_INT;
+	else
+		tzdev->polling_delay = 0;
+
+	mutex_unlock(&tzdev->lock);
+
+	thermal->mode = mode;
+	thermal_zone_device_update(tzdev, THERMAL_EVENT_UNSPECIFIED);
+
+	return 0;
+}
+
+static int mlxsw_thermal_get_temp(struct thermal_zone_device *tzdev,
+				  int *p_temp)
+{
+	struct mlxsw_thermal *thermal = tzdev->devdata;
+	struct device *dev = thermal->bus_info->dev;
+	char mtmp_pl[MLXSW_REG_MTMP_LEN];
+	unsigned int temp;
+	int err;
+
+	mlxsw_reg_mtmp_pack(mtmp_pl, 0, false, false);
+
+	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mtmp), mtmp_pl);
+	if (err) {
+		dev_err(dev, "Failed to query temp sensor\n");
+		return err;
+	}
+	mlxsw_reg_mtmp_unpack(mtmp_pl, &temp, NULL, NULL);
+
+	*p_temp = (int) temp;
+	return 0;
+}
+
+static int mlxsw_thermal_get_trip_type(struct thermal_zone_device *tzdev,
+				       int trip,
+				       enum thermal_trip_type *p_type)
+{
+	struct mlxsw_thermal *thermal = tzdev->devdata;
+
+	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
+		return -EINVAL;
+
+	*p_type = thermal->trips[trip].type;
+	return 0;
+}
+
+static int mlxsw_thermal_get_trip_temp(struct thermal_zone_device *tzdev,
+				       int trip, int *p_temp)
+{
+	struct mlxsw_thermal *thermal = tzdev->devdata;
+
+	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS)
+		return -EINVAL;
+
+	*p_temp = thermal->trips[trip].temp;
+	return 0;
+}
+
+static int mlxsw_thermal_set_trip_temp(struct thermal_zone_device *tzdev,
+				       int trip, int temp)
+{
+	struct mlxsw_thermal *thermal = tzdev->devdata;
+
+	if (trip < 0 || trip >= MLXSW_THERMAL_NUM_TRIPS ||
+	    temp > MLXSW_THERMAL_MAX_TEMP)
+		return -EINVAL;
+
+	thermal->trips[trip].temp = temp;
+	return 0;
+}
+
+static struct thermal_zone_device_ops mlxsw_thermal_ops = {
+	.bind = mlxsw_thermal_bind,
+	.unbind = mlxsw_thermal_unbind,
+	.get_mode = mlxsw_thermal_get_mode,
+	.set_mode = mlxsw_thermal_set_mode,
+	.get_temp = mlxsw_thermal_get_temp,
+	.get_trip_type	= mlxsw_thermal_get_trip_type,
+	.get_trip_temp	= mlxsw_thermal_get_trip_temp,
+	.set_trip_temp	= mlxsw_thermal_set_trip_temp,
+};
+
+static int mlxsw_thermal_get_max_state(struct thermal_cooling_device *cdev,
+				       unsigned long *p_state)
+{
+	*p_state = MLXSW_THERMAL_MAX_STATE;
+	return 0;
+}
+
+static int mlxsw_thermal_get_cur_state(struct thermal_cooling_device *cdev,
+				       unsigned long *p_state)
+
+{
+	struct mlxsw_thermal *thermal = cdev->devdata;
+	struct device *dev = thermal->bus_info->dev;
+	char mfsc_pl[MLXSW_REG_MFSC_LEN];
+	int err, idx;
+	u8 duty;
+
+	idx = mlxsw_get_cooling_device_idx(thermal, cdev);
+	if (idx < 0)
+		return idx;
+
+	mlxsw_reg_mfsc_pack(mfsc_pl, idx, 0);
+	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
+	if (err) {
+		dev_err(dev, "Failed to query PWM duty\n");
+		return err;
+	}
+
+	duty = mlxsw_reg_mfsc_pwm_duty_cycle_get(mfsc_pl);
+	*p_state = mlxsw_duty_to_state(duty);
+	return 0;
+}
+
+static int mlxsw_thermal_set_cur_state(struct thermal_cooling_device *cdev,
+				       unsigned long state)
+
+{
+	struct mlxsw_thermal *thermal = cdev->devdata;
+	struct device *dev = thermal->bus_info->dev;
+	char mfsc_pl[MLXSW_REG_MFSC_LEN];
+	int err, idx;
+
+	idx = mlxsw_get_cooling_device_idx(thermal, cdev);
+	if (idx < 0)
+		return idx;
+
+	mlxsw_reg_mfsc_pack(mfsc_pl, idx, mlxsw_state_to_duty(state));
+	err = mlxsw_reg_write(thermal->core, MLXSW_REG(mfsc), mfsc_pl);
+	if (err) {
+		dev_err(dev, "Failed to write PWM duty\n");
+		return err;
+	}
+	return 0;
+}
+
+static const struct thermal_cooling_device_ops mlxsw_cooling_ops = {
+	.get_max_state	= mlxsw_thermal_get_max_state,
+	.get_cur_state	= mlxsw_thermal_get_cur_state,
+	.set_cur_state	= mlxsw_thermal_set_cur_state,
+};
+
+int mlxsw_thermal_init(struct mlxsw_core *core,
+		       const struct mlxsw_bus_info *bus_info,
+		       struct mlxsw_thermal **p_thermal)
+{
+	char mfcr_pl[MLXSW_REG_MFCR_LEN] = { 0 };
+	enum mlxsw_reg_mfcr_pwm_frequency freq;
+	struct device *dev = bus_info->dev;
+	struct mlxsw_thermal *thermal;
+	u16 tacho_active;
+	u8 pwm_active;
+	int err, i;
+
+	thermal = devm_kzalloc(dev, sizeof(*thermal),
+			       GFP_KERNEL);
+	if (!thermal)
+		return -ENOMEM;
+
+	thermal->core = core;
+	thermal->bus_info = bus_info;
+	memcpy(thermal->trips, default_thermal_trips, sizeof(thermal->trips));
+
+	err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfcr), mfcr_pl);
+	if (err) {
+		dev_err(dev, "Failed to probe PWMs\n");
+		goto err_free_thermal;
+	}
+	mlxsw_reg_mfcr_unpack(mfcr_pl, &freq, &tacho_active, &pwm_active);
+
+	for (i = 0; i < MLXSW_MFCR_TACHOS_MAX; i++) {
+		if (tacho_active & BIT(i)) {
+			char mfsl_pl[MLXSW_REG_MFSL_LEN];
+
+			mlxsw_reg_mfsl_pack(mfsl_pl, i, 0, 0);
+
+			/* We need to query the register to preserve maximum */
+			err = mlxsw_reg_query(thermal->core, MLXSW_REG(mfsl),
+					      mfsl_pl);
+			if (err)
+				goto err_free_thermal;
+
+			/* set the minimal RPMs to 0 */
+			mlxsw_reg_mfsl_tach_min_set(mfsl_pl, 0);
+			err = mlxsw_reg_write(thermal->core, MLXSW_REG(mfsl),
+					      mfsl_pl);
+			if (err)
+				goto err_free_thermal;
+		}
+	}
+	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) {
+		if (pwm_active & BIT(i)) {
+			struct thermal_cooling_device *cdev;
+
+			cdev = thermal_cooling_device_register("Fan", thermal,
+							&mlxsw_cooling_ops);
+			if (IS_ERR(cdev)) {
+				err = PTR_ERR(cdev);
+				dev_err(dev, "Failed to register cooling device\n");
+				goto err_unreg_cdevs;
+			}
+			thermal->cdevs[i] = cdev;
+		}
+	}
+
+	thermal->tzdev = thermal_zone_device_register("mlxsw",
+						      MLXSW_THERMAL_NUM_TRIPS,
+						      MLXSW_THERMAL_TRIP_MASK,
+						      thermal,
+						      &mlxsw_thermal_ops,
+						      NULL, 0,
+						      MLXSW_THERMAL_POLL_INT);
+	if (IS_ERR(thermal->tzdev)) {
+		err = PTR_ERR(thermal->tzdev);
+		dev_err(dev, "Failed to register thermal zone\n");
+		goto err_unreg_cdevs;
+	}
+
+	thermal->mode = THERMAL_DEVICE_ENABLED;
+	*p_thermal = thermal;
+	return 0;
+err_unreg_cdevs:
+	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++)
+		if (thermal->cdevs[i])
+			thermal_cooling_device_unregister(thermal->cdevs[i]);
+err_free_thermal:
+	devm_kfree(dev, thermal);
+	return err;
+}
+
+void mlxsw_thermal_fini(struct mlxsw_thermal *thermal)
+{
+	int i;
+
+	if (thermal->tzdev) {
+		thermal_zone_device_unregister(thermal->tzdev);
+		thermal->tzdev = NULL;
+	}
+
+	for (i = 0; i < MLXSW_MFCR_PWMS_MAX; i++) {
+		if (thermal->cdevs[i]) {
+			thermal_cooling_device_unregister(thermal->cdevs[i]);
+			thermal->cdevs[i] = NULL;
+		}
+	}
+
+	devm_kfree(thermal->bus_info->dev, thermal);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.c b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
new file mode 100644
index 000000000000..e50c8db2602a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.c
@@ -0,0 +1,582 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/i2c.c
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Vadim Pasternak <vadimp@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/mutex.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/slab.h>
+
+#include "cmd.h"
+#include "core.h"
+#include "i2c.h"
+
+static const char mlxsw_i2c_driver_name[] = "mlxsw_i2c";
+
+#define MLXSW_I2C_CIR2_BASE		0x72000
+#define MLXSW_I2C_CIR_STATUS_OFF	0x18
+#define MLXSW_I2C_CIR2_OFF_STATUS	(MLXSW_I2C_CIR2_BASE + \
+					 MLXSW_I2C_CIR_STATUS_OFF)
+#define MLXSW_I2C_OPMOD_SHIFT		12
+#define MLXSW_I2C_GO_BIT_SHIFT		23
+#define MLXSW_I2C_CIR_CTRL_STATUS_SHIFT	24
+#define MLXSW_I2C_GO_BIT		BIT(MLXSW_I2C_GO_BIT_SHIFT)
+#define MLXSW_I2C_GO_OPMODE		BIT(MLXSW_I2C_OPMOD_SHIFT)
+#define MLXSW_I2C_SET_IMM_CMD		(MLXSW_I2C_GO_OPMODE | \
+					 MLXSW_CMD_OPCODE_QUERY_FW)
+#define MLXSW_I2C_PUSH_IMM_CMD		(MLXSW_I2C_GO_BIT | \
+					 MLXSW_I2C_SET_IMM_CMD)
+#define MLXSW_I2C_SET_CMD		(MLXSW_CMD_OPCODE_ACCESS_REG)
+#define MLXSW_I2C_PUSH_CMD		(MLXSW_I2C_GO_BIT | MLXSW_I2C_SET_CMD)
+#define MLXSW_I2C_TLV_HDR_SIZE		0x10
+#define MLXSW_I2C_ADDR_WIDTH		4
+#define MLXSW_I2C_PUSH_CMD_SIZE		(MLXSW_I2C_ADDR_WIDTH + 4)
+#define MLXSW_I2C_READ_SEMA_SIZE	4
+#define MLXSW_I2C_PREP_SIZE		(MLXSW_I2C_ADDR_WIDTH + 28)
+#define MLXSW_I2C_MBOX_SIZE		20
+#define MLXSW_I2C_MBOX_OUT_PARAM_OFF	12
+#define MLXSW_I2C_MAX_BUFF_SIZE		32
+#define MLXSW_I2C_MBOX_OFFSET_BITS	20
+#define MLXSW_I2C_MBOX_SIZE_BITS	12
+#define MLXSW_I2C_ADDR_BUF_SIZE		4
+#define MLXSW_I2C_BLK_MAX		32
+#define MLXSW_I2C_RETRY			5
+#define MLXSW_I2C_TIMEOUT_MSECS		5000
+
+/**
+ * struct mlxsw_i2c - device private data:
+ * @cmd.mb_size_in: input mailbox size;
+ * @cmd.mb_off_in: input mailbox offset in register space;
+ * @cmd.mb_size_out: output mailbox size;
+ * @cmd.mb_off_out: output mailbox offset in register space;
+ * @cmd.lock: command execution lock;
+ * @dev: I2C device;
+ * @core: switch core pointer;
+ * @bus_info: bus info block;
+ */
+struct mlxsw_i2c {
+	struct {
+		u32 mb_size_in;
+		u32 mb_off_in;
+		u32 mb_size_out;
+		u32 mb_off_out;
+		struct mutex lock;
+	} cmd;
+	struct device *dev;
+	struct mlxsw_core *core;
+	struct mlxsw_bus_info bus_info;
+};
+
+#define MLXSW_I2C_READ_MSG(_client, _addr_buf, _buf, _len) {	\
+	{ .addr = (_client)->addr,				\
+	  .buf = (_addr_buf),					\
+	  .len = MLXSW_I2C_ADDR_BUF_SIZE,			\
+	  .flags = 0 },						\
+	{ .addr = (_client)->addr,				\
+	  .buf = (_buf),					\
+	  .len = (_len),					\
+	  .flags = I2C_M_RD } }
+
+#define MLXSW_I2C_WRITE_MSG(_client, _buf, _len)		\
+	{ .addr = (_client)->addr,				\
+	  .buf = (u8 *)(_buf),					\
+	  .len = (_len),					\
+	  .flags = 0 }
+
+/* Routine converts in and out mail boxes offset and size. */
+static inline void
+mlxsw_i2c_convert_mbox(struct mlxsw_i2c *mlxsw_i2c, u8 *buf)
+{
+	u32 tmp;
+
+	/* Local in/out mailboxes: 20 bits for offset, 12 for size */
+	tmp = be32_to_cpup((__be32 *) buf);
+	mlxsw_i2c->cmd.mb_off_in = tmp &
+				   GENMASK(MLXSW_I2C_MBOX_OFFSET_BITS - 1, 0);
+	mlxsw_i2c->cmd.mb_size_in = (tmp & GENMASK(31,
+					MLXSW_I2C_MBOX_OFFSET_BITS)) >>
+					MLXSW_I2C_MBOX_OFFSET_BITS;
+
+	tmp = be32_to_cpup((__be32 *) (buf + MLXSW_I2C_ADDR_WIDTH));
+	mlxsw_i2c->cmd.mb_off_out = tmp &
+				    GENMASK(MLXSW_I2C_MBOX_OFFSET_BITS - 1, 0);
+	mlxsw_i2c->cmd.mb_size_out = (tmp & GENMASK(31,
+					MLXSW_I2C_MBOX_OFFSET_BITS)) >>
+					MLXSW_I2C_MBOX_OFFSET_BITS;
+}
+
+/* Routine obtains register size from mail box buffer. */
+static inline int mlxsw_i2c_get_reg_size(u8 *in_mbox)
+{
+	u16  tmp = be16_to_cpup((__be16 *) (in_mbox + MLXSW_I2C_TLV_HDR_SIZE));
+
+	return (tmp & 0x7ff) * 4 + MLXSW_I2C_TLV_HDR_SIZE;
+}
+
+/* Routine sets I2C device internal offset in the transaction buffer. */
+static inline void mlxsw_i2c_set_slave_addr(u8 *buf, u32 off)
+{
+	__be32 *val = (__be32 *) buf;
+
+	*val = htonl(off);
+}
+
+/* Routine waits until go bit is cleared. */
+static int mlxsw_i2c_wait_go_bit(struct i2c_client *client,
+				 struct mlxsw_i2c *mlxsw_i2c, u8 *p_status)
+{
+	u8 addr_buf[MLXSW_I2C_ADDR_BUF_SIZE];
+	u8 buf[MLXSW_I2C_READ_SEMA_SIZE];
+	int len = MLXSW_I2C_READ_SEMA_SIZE;
+	struct i2c_msg read_sema[] =
+		MLXSW_I2C_READ_MSG(client, addr_buf, buf, len);
+	bool wait_done = false;
+	unsigned long end;
+	int i = 0, err;
+
+	mlxsw_i2c_set_slave_addr(addr_buf, MLXSW_I2C_CIR2_OFF_STATUS);
+
+	end = jiffies + msecs_to_jiffies(MLXSW_I2C_TIMEOUT_MSECS);
+	do {
+		u32 ctrl;
+
+		err = i2c_transfer(client->adapter, read_sema,
+				   ARRAY_SIZE(read_sema));
+
+		ctrl = be32_to_cpu(*(__be32 *) buf);
+		if (err == ARRAY_SIZE(read_sema)) {
+			if (!(ctrl & MLXSW_I2C_GO_BIT)) {
+				wait_done = true;
+				*p_status = ctrl >>
+					    MLXSW_I2C_CIR_CTRL_STATUS_SHIFT;
+				break;
+			}
+		}
+		cond_resched();
+	} while ((time_before(jiffies, end)) || (i++ < MLXSW_I2C_RETRY));
+
+	if (wait_done) {
+		if (*p_status)
+			err = -EIO;
+	} else {
+		return -ETIMEDOUT;
+	}
+
+	return err > 0 ? 0 : err;
+}
+
+/* Routine posts a command to ASIC though mail box. */
+static int mlxsw_i2c_write_cmd(struct i2c_client *client,
+			       struct mlxsw_i2c *mlxsw_i2c,
+			       int immediate)
+{
+	__be32 push_cmd_buf[MLXSW_I2C_PUSH_CMD_SIZE / 4] = {
+		0, cpu_to_be32(MLXSW_I2C_PUSH_IMM_CMD)
+	};
+	__be32 prep_cmd_buf[MLXSW_I2C_PREP_SIZE / 4] = {
+		0, 0, 0, 0, 0, 0,
+		cpu_to_be32(client->adapter->nr & 0xffff),
+		cpu_to_be32(MLXSW_I2C_SET_IMM_CMD)
+	};
+	struct i2c_msg push_cmd =
+		MLXSW_I2C_WRITE_MSG(client, push_cmd_buf,
+				    MLXSW_I2C_PUSH_CMD_SIZE);
+	struct i2c_msg prep_cmd =
+		MLXSW_I2C_WRITE_MSG(client, prep_cmd_buf, MLXSW_I2C_PREP_SIZE);
+	int err;
+
+	if (!immediate) {
+		push_cmd_buf[1] = cpu_to_be32(MLXSW_I2C_PUSH_CMD);
+		prep_cmd_buf[7] = cpu_to_be32(MLXSW_I2C_SET_CMD);
+	}
+	mlxsw_i2c_set_slave_addr((u8 *)prep_cmd_buf,
+				 MLXSW_I2C_CIR2_BASE);
+	mlxsw_i2c_set_slave_addr((u8 *)push_cmd_buf,
+				 MLXSW_I2C_CIR2_OFF_STATUS);
+
+	/* Prepare Command Interface Register for transaction */
+	err = i2c_transfer(client->adapter, &prep_cmd, 1);
+	if (err < 0)
+		return err;
+	else if (err != 1)
+		return -EIO;
+
+	/* Write out Command Interface Register GO bit to push transaction */
+	err = i2c_transfer(client->adapter, &push_cmd, 1);
+	if (err < 0)
+		return err;
+	else if (err != 1)
+		return -EIO;
+
+	return 0;
+}
+
+/* Routine obtains mail box offsets from ASIC register space. */
+static int mlxsw_i2c_get_mbox(struct i2c_client *client,
+			      struct mlxsw_i2c *mlxsw_i2c)
+{
+	u8 addr_buf[MLXSW_I2C_ADDR_BUF_SIZE];
+	u8 buf[MLXSW_I2C_MBOX_SIZE];
+	struct i2c_msg mbox_cmd[] =
+		MLXSW_I2C_READ_MSG(client, addr_buf, buf, MLXSW_I2C_MBOX_SIZE);
+	int err;
+
+	/* Read mail boxes offsets. */
+	mlxsw_i2c_set_slave_addr(addr_buf, MLXSW_I2C_CIR2_BASE);
+	err = i2c_transfer(client->adapter, mbox_cmd, 2);
+	if (err != 2) {
+		dev_err(&client->dev, "Could not obtain mail boxes\n");
+		if (!err)
+			return -EIO;
+		else
+			return err;
+	}
+
+	/* Convert mail boxes. */
+	mlxsw_i2c_convert_mbox(mlxsw_i2c, &buf[MLXSW_I2C_MBOX_OUT_PARAM_OFF]);
+
+	return err;
+}
+
+/* Routine sends I2C write transaction to ASIC device. */
+static int
+mlxsw_i2c_write(struct device *dev, size_t in_mbox_size, u8 *in_mbox, int num,
+		u8 *p_status)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct mlxsw_i2c *mlxsw_i2c = i2c_get_clientdata(client);
+	unsigned long timeout = msecs_to_jiffies(MLXSW_I2C_TIMEOUT_MSECS);
+	u8 tran_buf[MLXSW_I2C_MAX_BUFF_SIZE + MLXSW_I2C_ADDR_BUF_SIZE];
+	int off = mlxsw_i2c->cmd.mb_off_in, chunk_size, i, j;
+	unsigned long end;
+	struct i2c_msg write_tran =
+		MLXSW_I2C_WRITE_MSG(client, tran_buf, MLXSW_I2C_PUSH_CMD_SIZE);
+	int err;
+
+	for (i = 0; i < num; i++) {
+		chunk_size = (in_mbox_size > MLXSW_I2C_BLK_MAX) ?
+			     MLXSW_I2C_BLK_MAX : in_mbox_size;
+		write_tran.len = MLXSW_I2C_ADDR_WIDTH + chunk_size;
+		mlxsw_i2c_set_slave_addr(tran_buf, off);
+		memcpy(&tran_buf[MLXSW_I2C_ADDR_BUF_SIZE], in_mbox +
+		       chunk_size * i, chunk_size);
+
+		j = 0;
+		end = jiffies + timeout;
+		do {
+			err = i2c_transfer(client->adapter, &write_tran, 1);
+			if (err == 1)
+				break;
+
+			cond_resched();
+		} while ((time_before(jiffies, end)) ||
+			 (j++ < MLXSW_I2C_RETRY));
+
+		if (err != 1) {
+			if (!err)
+				err = -EIO;
+			return err;
+		}
+
+		off += chunk_size;
+		in_mbox_size -= chunk_size;
+	}
+
+	/* Prepare and write out Command Interface Register for transaction. */
+	err = mlxsw_i2c_write_cmd(client, mlxsw_i2c, 0);
+	if (err) {
+		dev_err(&client->dev, "Could not start transaction");
+		return -EIO;
+	}
+
+	/* Wait until go bit is cleared. */
+	err = mlxsw_i2c_wait_go_bit(client, mlxsw_i2c, p_status);
+	if (err) {
+		dev_err(&client->dev, "HW semaphore is not released");
+		return err;
+	}
+
+	/* Validate transaction completion status. */
+	if (*p_status) {
+		dev_err(&client->dev, "Bad transaction completion status %x\n",
+			*p_status);
+		return -EIO;
+	}
+
+	return err > 0 ? 0 : err;
+}
+
+/* Routine executes I2C command. */
+static int
+mlxsw_i2c_cmd(struct device *dev, size_t in_mbox_size, u8 *in_mbox,
+	      size_t out_mbox_size, u8 *out_mbox, u8 *status)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	struct mlxsw_i2c *mlxsw_i2c = i2c_get_clientdata(client);
+	unsigned long timeout = msecs_to_jiffies(MLXSW_I2C_TIMEOUT_MSECS);
+	u8 tran_buf[MLXSW_I2C_ADDR_BUF_SIZE];
+	int num, chunk_size, reg_size, i, j;
+	int off = mlxsw_i2c->cmd.mb_off_out;
+	unsigned long end;
+	struct i2c_msg read_tran[] =
+		MLXSW_I2C_READ_MSG(client, tran_buf, NULL, 0);
+	int err;
+
+	WARN_ON(in_mbox_size % sizeof(u32) || out_mbox_size % sizeof(u32));
+
+	reg_size = mlxsw_i2c_get_reg_size(in_mbox);
+	num = reg_size / MLXSW_I2C_BLK_MAX;
+	if (reg_size % MLXSW_I2C_BLK_MAX)
+		num++;
+
+	if (mutex_lock_interruptible(&mlxsw_i2c->cmd.lock) < 0) {
+		dev_err(&client->dev, "Could not acquire lock");
+		return -EINVAL;
+	}
+
+	err = mlxsw_i2c_write(dev, reg_size, in_mbox, num, status);
+	if (err)
+		goto cmd_fail;
+
+	/* No out mailbox is case of write transaction. */
+	if (!out_mbox) {
+		mutex_unlock(&mlxsw_i2c->cmd.lock);
+		return 0;
+	}
+
+	/* Send read transaction to get output mailbox content. */
+	read_tran[1].buf = out_mbox;
+	for (i = 0; i < num; i++) {
+		chunk_size = (reg_size > MLXSW_I2C_BLK_MAX) ?
+			     MLXSW_I2C_BLK_MAX : reg_size;
+		read_tran[1].len = chunk_size;
+		mlxsw_i2c_set_slave_addr(tran_buf, off);
+
+		j = 0;
+		end = jiffies + timeout;
+		do {
+			err = i2c_transfer(client->adapter, read_tran,
+					   ARRAY_SIZE(read_tran));
+			if (err == ARRAY_SIZE(read_tran))
+				break;
+
+			cond_resched();
+		} while ((time_before(jiffies, end)) ||
+			 (j++ < MLXSW_I2C_RETRY));
+
+		if (err != ARRAY_SIZE(read_tran)) {
+			if (!err)
+				err = -EIO;
+
+			goto cmd_fail;
+		}
+
+		off += chunk_size;
+		reg_size -= chunk_size;
+		read_tran[1].buf += chunk_size;
+	}
+
+	mutex_unlock(&mlxsw_i2c->cmd.lock);
+
+	return 0;
+
+cmd_fail:
+	mutex_unlock(&mlxsw_i2c->cmd.lock);
+	return err;
+}
+
+static int mlxsw_i2c_cmd_exec(void *bus_priv, u16 opcode, u8 opcode_mod,
+			      u32 in_mod, bool out_mbox_direct,
+			      char *in_mbox, size_t in_mbox_size,
+			      char *out_mbox, size_t out_mbox_size,
+			      u8 *status)
+{
+	struct mlxsw_i2c *mlxsw_i2c = bus_priv;
+
+	return mlxsw_i2c_cmd(mlxsw_i2c->dev, in_mbox_size, in_mbox,
+			     out_mbox_size, out_mbox, status);
+}
+
+static bool mlxsw_i2c_skb_transmit_busy(void *bus_priv,
+					const struct mlxsw_tx_info *tx_info)
+{
+	return false;
+}
+
+static int mlxsw_i2c_skb_transmit(void *bus_priv, struct sk_buff *skb,
+				  const struct mlxsw_tx_info *tx_info)
+{
+	return 0;
+}
+
+static int
+mlxsw_i2c_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
+	       const struct mlxsw_config_profile *profile,
+	       struct mlxsw_res *resources)
+{
+	struct mlxsw_i2c *mlxsw_i2c = bus_priv;
+
+	mlxsw_i2c->core = mlxsw_core;
+
+	return 0;
+}
+
+static void mlxsw_i2c_fini(void *bus_priv)
+{
+	struct mlxsw_i2c *mlxsw_i2c = bus_priv;
+
+	mlxsw_i2c->core = NULL;
+}
+
+static const struct mlxsw_bus mlxsw_i2c_bus = {
+	.kind			= "i2c",
+	.init			= mlxsw_i2c_init,
+	.fini			= mlxsw_i2c_fini,
+	.skb_transmit_busy	= mlxsw_i2c_skb_transmit_busy,
+	.skb_transmit		= mlxsw_i2c_skb_transmit,
+	.cmd_exec		= mlxsw_i2c_cmd_exec,
+};
+
+static int mlxsw_i2c_probe(struct i2c_client *client,
+			   const struct i2c_device_id *id)
+{
+	struct mlxsw_i2c *mlxsw_i2c;
+	u8 status;
+	int err;
+
+	mlxsw_i2c = devm_kzalloc(&client->dev, sizeof(*mlxsw_i2c), GFP_KERNEL);
+	if (!mlxsw_i2c)
+		return -ENOMEM;
+
+	i2c_set_clientdata(client, mlxsw_i2c);
+	mutex_init(&mlxsw_i2c->cmd.lock);
+
+	/* In order to use mailboxes through the i2c, special area is reserved
+	 * on the i2c address space that can be used for input and output
+	 * mailboxes. Such mailboxes are called local mailboxes. When using a
+	 * local mailbox, software should specify 0 as the Input/Output
+	 * parameters. The location of the Local Mailbox addresses on the i2c
+	 * space can be retrieved through the QUERY_FW command.
+	 * For this purpose QUERY_FW is to be issued with opcode modifier equal
+	 * 0x01. For such command the output parameter is an immediate value.
+	 * Here QUERY_FW command is invoked for ASIC probing and for getting
+	 * local mailboxes addresses from immedate output parameters.
+	 */
+
+	/* Prepare and write out Command Interface Register for transaction */
+	err = mlxsw_i2c_write_cmd(client, mlxsw_i2c, 1);
+	if (err) {
+		dev_err(&client->dev, "Could not start transaction");
+		goto errout;
+	}
+
+	/* Wait until go bit is cleared. */
+	err = mlxsw_i2c_wait_go_bit(client, mlxsw_i2c, &status);
+	if (err) {
+		dev_err(&client->dev, "HW semaphore is not released");
+		goto errout;
+	}
+
+	/* Validate transaction completion status. */
+	if (status) {
+		dev_err(&client->dev, "Bad transaction completion status %x\n",
+			status);
+		err = -EIO;
+		goto errout;
+	}
+
+	/* Get mailbox offsets. */
+	err = mlxsw_i2c_get_mbox(client, mlxsw_i2c);
+	if (err < 0) {
+		dev_err(&client->dev, "Fail to get mailboxes\n");
+		goto errout;
+	}
+
+	dev_info(&client->dev, "%s mb size=%x off=0x%08x out mb size=%x off=0x%08x\n",
+		 id->name, mlxsw_i2c->cmd.mb_size_in,
+		 mlxsw_i2c->cmd.mb_off_in, mlxsw_i2c->cmd.mb_size_out,
+		 mlxsw_i2c->cmd.mb_off_out);
+
+	/* Register device bus. */
+	mlxsw_i2c->bus_info.device_kind = id->name;
+	mlxsw_i2c->bus_info.device_name = client->name;
+	mlxsw_i2c->bus_info.dev = &client->dev;
+	mlxsw_i2c->dev = &client->dev;
+
+	err = mlxsw_core_bus_device_register(&mlxsw_i2c->bus_info,
+					     &mlxsw_i2c_bus, mlxsw_i2c);
+	if (err) {
+		dev_err(&client->dev, "Fail to register core bus\n");
+		return err;
+	}
+
+	return 0;
+
+errout:
+	i2c_set_clientdata(client, NULL);
+
+	return err;
+}
+
+static int mlxsw_i2c_remove(struct i2c_client *client)
+{
+	struct mlxsw_i2c *mlxsw_i2c = i2c_get_clientdata(client);
+
+	mlxsw_core_bus_device_unregister(mlxsw_i2c->core);
+	mutex_destroy(&mlxsw_i2c->cmd.lock);
+
+	return 0;
+}
+
+int mlxsw_i2c_driver_register(struct i2c_driver *i2c_driver)
+{
+	i2c_driver->probe = mlxsw_i2c_probe;
+	i2c_driver->remove = mlxsw_i2c_remove;
+	return i2c_add_driver(i2c_driver);
+}
+EXPORT_SYMBOL(mlxsw_i2c_driver_register);
+
+void mlxsw_i2c_driver_unregister(struct i2c_driver *i2c_driver)
+{
+	i2c_del_driver(i2c_driver);
+}
+EXPORT_SYMBOL(mlxsw_i2c_driver_unregister);
+
+MODULE_AUTHOR("Vadim Pasternak <vadimp@mellanox.com>");
+MODULE_DESCRIPTION("Mellanox switch I2C interface driver");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/ethernet/mellanox/mlxsw/i2c.h b/drivers/net/ethernet/mellanox/mlxsw/i2c.h
new file mode 100644
index 000000000000..daa24b213ea4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/i2c.h
@@ -0,0 +1,60 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/i2c.h
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Vadim Pasternak <vadimp@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MLXSW_I2C_H
+#define _MLXSW_I2C_H
+
+#include <linux/i2c.h>
+
+#if IS_ENABLED(CONFIG_MLXSW_I2C)
+
+int mlxsw_i2c_driver_register(struct i2c_driver *i2c_driver);
+void mlxsw_i2c_driver_unregister(struct i2c_driver *i2c_driver);
+
+#else
+
+static inline int
+mlxsw_i2c_driver_register(struct i2c_driver *i2c_driver)
+{
+	return -ENODEV;
+}
+
+static inline void
+mlxsw_i2c_driver_unregister(struct i2c_driver *i2c_driver)
+{
+}
+
+#endif
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/ib.h b/drivers/net/ethernet/mellanox/mlxsw/ib.h
new file mode 100644
index 000000000000..ce313aaa6336
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/ib.h
@@ -0,0 +1,39 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/ib.h
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Elad Raz <eladr@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef _MLXSW_IB_H
+#define _MLXSW_IB_H
+
+#define MLXSW_IB_DEFAULT_MTU 4096
+
+#endif /* _MLXSW_IB_H */
diff --git a/drivers/net/ethernet/mellanox/mlxsw/item.h b/drivers/net/ethernet/mellanox/mlxsw/item.h
index a94dbda6590b..3c95e3ddd9c2 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/item.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/item.h
@@ -55,7 +55,7 @@ struct mlxsw_item {
 };
 
 static inline unsigned int
-__mlxsw_item_offset(struct mlxsw_item *item, unsigned short index,
+__mlxsw_item_offset(const struct mlxsw_item *item, unsigned short index,
 		    size_t typesize)
 {
 	BUG_ON(index && !item->step);
@@ -72,7 +72,8 @@ __mlxsw_item_offset(struct mlxsw_item *item, unsigned short index,
 		typesize);
 }
 
-static inline u16 __mlxsw_item_get16(char *buf, struct mlxsw_item *item,
+static inline u16 __mlxsw_item_get16(const char *buf,
+				     const struct mlxsw_item *item,
 				     unsigned short index)
 {
 	unsigned int offset = __mlxsw_item_offset(item, index, sizeof(u16));
@@ -87,7 +88,7 @@ static inline u16 __mlxsw_item_get16(char *buf, struct mlxsw_item *item,
 	return tmp;
 }
 
-static inline void __mlxsw_item_set16(char *buf, struct mlxsw_item *item,
+static inline void __mlxsw_item_set16(char *buf, const struct mlxsw_item *item,
 				      unsigned short index, u16 val)
 {
 	unsigned int offset = __mlxsw_item_offset(item, index,
@@ -105,7 +106,8 @@ static inline void __mlxsw_item_set16(char *buf, struct mlxsw_item *item,
 	b[offset] = cpu_to_be16(tmp);
 }
 
-static inline u32 __mlxsw_item_get32(char *buf, struct mlxsw_item *item,
+static inline u32 __mlxsw_item_get32(const char *buf,
+				     const struct mlxsw_item *item,
 				     unsigned short index)
 {
 	unsigned int offset = __mlxsw_item_offset(item, index, sizeof(u32));
@@ -120,7 +122,7 @@ static inline u32 __mlxsw_item_get32(char *buf, struct mlxsw_item *item,
 	return tmp;
 }
 
-static inline void __mlxsw_item_set32(char *buf, struct mlxsw_item *item,
+static inline void __mlxsw_item_set32(char *buf, const struct mlxsw_item *item,
 				      unsigned short index, u32 val)
 {
 	unsigned int offset = __mlxsw_item_offset(item, index,
@@ -138,7 +140,8 @@ static inline void __mlxsw_item_set32(char *buf, struct mlxsw_item *item,
 	b[offset] = cpu_to_be32(tmp);
 }
 
-static inline u64 __mlxsw_item_get64(char *buf, struct mlxsw_item *item,
+static inline u64 __mlxsw_item_get64(const char *buf,
+				     const struct mlxsw_item *item,
 				     unsigned short index)
 {
 	unsigned int offset = __mlxsw_item_offset(item, index, sizeof(u64));
@@ -153,7 +156,7 @@ static inline u64 __mlxsw_item_get64(char *buf, struct mlxsw_item *item,
 	return tmp;
 }
 
-static inline void __mlxsw_item_set64(char *buf, struct mlxsw_item *item,
+static inline void __mlxsw_item_set64(char *buf, const struct mlxsw_item *item,
 				      unsigned short index, u64 val)
 {
 	unsigned int offset = __mlxsw_item_offset(item, index, sizeof(u64));
@@ -170,8 +173,8 @@ static inline void __mlxsw_item_set64(char *buf, struct mlxsw_item *item,
 	b[offset] = cpu_to_be64(tmp);
 }
 
-static inline void __mlxsw_item_memcpy_from(char *buf, char *dst,
-					    struct mlxsw_item *item,
+static inline void __mlxsw_item_memcpy_from(const char *buf, char *dst,
+					    const struct mlxsw_item *item,
 					    unsigned short index)
 {
 	unsigned int offset = __mlxsw_item_offset(item, index, sizeof(char));
@@ -180,7 +183,7 @@ static inline void __mlxsw_item_memcpy_from(char *buf, char *dst,
 }
 
 static inline void __mlxsw_item_memcpy_to(char *buf, const char *src,
-					  struct mlxsw_item *item,
+					  const struct mlxsw_item *item,
 					  unsigned short index)
 {
 	unsigned int offset = __mlxsw_item_offset(item, index, sizeof(char));
@@ -189,7 +192,8 @@ static inline void __mlxsw_item_memcpy_to(char *buf, const char *src,
 }
 
 static inline u16
-__mlxsw_item_bit_array_offset(struct mlxsw_item *item, u16 index, u8 *shift)
+__mlxsw_item_bit_array_offset(const struct mlxsw_item *item,
+			      u16 index, u8 *shift)
 {
 	u16 max_index, be_index;
 	u16 offset;		/* byte offset inside the array */
@@ -212,7 +216,8 @@ __mlxsw_item_bit_array_offset(struct mlxsw_item *item, u16 index, u8 *shift)
 	return item->offset + offset;
 }
 
-static inline u8 __mlxsw_item_bit_array_get(char *buf, struct mlxsw_item *item,
+static inline u8 __mlxsw_item_bit_array_get(const char *buf,
+					    const struct mlxsw_item *item,
 					    u16 index)
 {
 	u8 shift, tmp;
@@ -224,7 +229,8 @@ static inline u8 __mlxsw_item_bit_array_get(char *buf, struct mlxsw_item *item,
 	return tmp;
 }
 
-static inline void __mlxsw_item_bit_array_set(char *buf, struct mlxsw_item *item,
+static inline void __mlxsw_item_bit_array_set(char *buf,
+					      const struct mlxsw_item *item,
 					      u16 index, u8 val)
 {
 	u8 shift, tmp;
@@ -254,7 +260,7 @@ static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = {			\
 	.size = {.bits = _sizebits,},						\
 	.name = #_type "_" #_cname "_" #_iname,					\
 };										\
-static inline u16 mlxsw_##_type##_##_cname##_##_iname##_get(char *buf)		\
+static inline u16 mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf)	\
 {										\
 	return __mlxsw_item_get16(buf, &__ITEM_NAME(_type, _cname, _iname), 0);	\
 }										\
@@ -275,7 +281,7 @@ static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = {			\
 	.name = #_type "_" #_cname "_" #_iname,					\
 };										\
 static inline u16								\
-mlxsw_##_type##_##_cname##_##_iname##_get(char *buf, unsigned short index)	\
+mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf, unsigned short index)\
 {										\
 	return __mlxsw_item_get16(buf, &__ITEM_NAME(_type, _cname, _iname),	\
 				  index);					\
@@ -295,7 +301,7 @@ static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = {			\
 	.size = {.bits = _sizebits,},						\
 	.name = #_type "_" #_cname "_" #_iname,					\
 };										\
-static inline u32 mlxsw_##_type##_##_cname##_##_iname##_get(char *buf)		\
+static inline u32 mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf)	\
 {										\
 	return __mlxsw_item_get32(buf, &__ITEM_NAME(_type, _cname, _iname), 0);	\
 }										\
@@ -316,7 +322,7 @@ static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = {			\
 	.name = #_type "_" #_cname "_" #_iname,					\
 };										\
 static inline u32								\
-mlxsw_##_type##_##_cname##_##_iname##_get(char *buf, unsigned short index)	\
+mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf, unsigned short index)\
 {										\
 	return __mlxsw_item_get32(buf, &__ITEM_NAME(_type, _cname, _iname),	\
 				  index);					\
@@ -336,7 +342,7 @@ static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = {			\
 	.size = {.bits = _sizebits,},						\
 	.name = #_type "_" #_cname "_" #_iname,					\
 };										\
-static inline u64 mlxsw_##_type##_##_cname##_##_iname##_get(char *buf)		\
+static inline u64 mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf)	\
 {										\
 	return __mlxsw_item_get64(buf, &__ITEM_NAME(_type, _cname, _iname), 0);	\
 }										\
@@ -357,7 +363,7 @@ static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = {			\
 	.name = #_type "_" #_cname "_" #_iname,					\
 };										\
 static inline u64								\
-mlxsw_##_type##_##_cname##_##_iname##_get(char *buf, unsigned short index)	\
+mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf, unsigned short index)\
 {										\
 	return __mlxsw_item_get64(buf, &__ITEM_NAME(_type, _cname, _iname),	\
 				  index);					\
@@ -377,7 +383,7 @@ static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = {			\
 	.name = #_type "_" #_cname "_" #_iname,					\
 };										\
 static inline void								\
-mlxsw_##_type##_##_cname##_##_iname##_memcpy_from(char *buf, char *dst)		\
+mlxsw_##_type##_##_cname##_##_iname##_memcpy_from(const char *buf, char *dst)	\
 {										\
 	__mlxsw_item_memcpy_from(buf, dst,					\
 				 &__ITEM_NAME(_type, _cname, _iname), 0);	\
@@ -399,7 +405,7 @@ static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = {			\
 	.name = #_type "_" #_cname "_" #_iname,					\
 };										\
 static inline void								\
-mlxsw_##_type##_##_cname##_##_iname##_memcpy_from(char *buf,			\
+mlxsw_##_type##_##_cname##_##_iname##_memcpy_from(const char *buf,		\
 						  unsigned short index,		\
 						  char *dst)			\
 {										\
@@ -424,7 +430,7 @@ static struct mlxsw_item __ITEM_NAME(_type, _cname, _iname) = {			\
 	.name = #_type "_" #_cname "_" #_iname,					\
 };										\
 static inline u8								\
-mlxsw_##_type##_##_cname##_##_iname##_get(char *buf, u16 index)			\
+mlxsw_##_type##_##_cname##_##_iname##_get(const char *buf, u16 index)		\
 {										\
 	return __mlxsw_item_bit_array_get(buf,					\
 					  &__ITEM_NAME(_type, _cname, _iname),	\
diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
new file mode 100644
index 000000000000..3dd16267b76c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c
@@ -0,0 +1,97 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/minimal.c
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Vadim Pasternak <vadimp@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/i2c.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mod_devicetable.h>
+#include <linux/types.h>
+
+#include "core.h"
+#include "i2c.h"
+
+static const char mlxsw_minimal_driver_name[] = "mlxsw_minimal";
+
+static const struct mlxsw_config_profile mlxsw_minimal_config_profile;
+
+static struct mlxsw_driver mlxsw_minimal_driver = {
+	.kind		= mlxsw_minimal_driver_name,
+	.priv_size	= 1,
+	.profile	= &mlxsw_minimal_config_profile,
+};
+
+static const struct i2c_device_id mlxsw_minimal_i2c_id[] = {
+	{ "mlxsw_minimal", 0},
+	{ },
+};
+
+static struct i2c_driver mlxsw_minimal_i2c_driver = {
+	.driver.name = "mlxsw_minimal",
+	.class = I2C_CLASS_HWMON,
+	.id_table = mlxsw_minimal_i2c_id,
+};
+
+static int __init mlxsw_minimal_module_init(void)
+{
+	int err;
+
+	err = mlxsw_core_driver_register(&mlxsw_minimal_driver);
+	if (err)
+		return err;
+
+	err = mlxsw_i2c_driver_register(&mlxsw_minimal_i2c_driver);
+	if (err)
+		goto err_i2c_driver_register;
+
+	return 0;
+
+err_i2c_driver_register:
+	mlxsw_core_driver_unregister(&mlxsw_minimal_driver);
+
+	return err;
+}
+
+static void __exit mlxsw_minimal_module_exit(void)
+{
+	mlxsw_i2c_driver_unregister(&mlxsw_minimal_i2c_driver);
+	mlxsw_core_driver_unregister(&mlxsw_minimal_driver);
+}
+
+module_init(mlxsw_minimal_module_init);
+module_exit(mlxsw_minimal_module_exit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Vadim Pasternak <vadimp@mellanox.com>");
+MODULE_DESCRIPTION("Mellanox minimal driver");
+MODULE_DEVICE_TABLE(i2c, mlxsw_minimal_i2c_id);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c
index 912f71f84209..a223c85dfde0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c
@@ -48,33 +48,17 @@
 #include <linux/seq_file.h>
 #include <linux/string.h>
 
+#include "pci_hw.h"
 #include "pci.h"
 #include "core.h"
 #include "cmd.h"
 #include "port.h"
+#include "resources.h"
 
 static const char mlxsw_pci_driver_name[] = "mlxsw_pci";
 
-static const struct pci_device_id mlxsw_pci_id_table[] = {
-	{PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SWITCHX2), 0},
-	{PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SPECTRUM), 0},
-	{0, }
-};
-
 static struct dentry *mlxsw_pci_dbg_root;
 
-static const char *mlxsw_pci_device_kind_get(const struct pci_device_id *id)
-{
-	switch (id->device) {
-	case PCI_DEVICE_ID_MELLANOX_SWITCHX2:
-		return MLXSW_DEVICE_KIND_SWITCHX2;
-	case PCI_DEVICE_ID_MELLANOX_SPECTRUM:
-		return MLXSW_DEVICE_KIND_SPECTRUM;
-	default:
-		BUG();
-	}
-}
-
 #define mlxsw_pci_write32(mlxsw_pci, reg, val) \
 	iowrite32be(val, (mlxsw_pci)->hw_addr + (MLXSW_PCI_ ## reg))
 #define mlxsw_pci_read32(mlxsw_pci, reg) \
@@ -238,8 +222,9 @@ static bool mlxsw_pci_elem_hw_owned(struct mlxsw_pci_queue *q, bool owner_bit)
 	return owner_bit != !!(q->consumer_counter & q->count);
 }
 
-static char *mlxsw_pci_queue_sw_elem_get(struct mlxsw_pci_queue *q,
-					 u32 (*get_elem_owner_func)(char *))
+static char *
+mlxsw_pci_queue_sw_elem_get(struct mlxsw_pci_queue *q,
+			    u32 (*get_elem_owner_func)(const char *))
 {
 	struct mlxsw_pci_queue_elem_info *elem_info;
 	char *elem;
@@ -1154,76 +1139,8 @@ mlxsw_pci_config_profile_swid_config(struct mlxsw_pci *mlxsw_pci,
 	mlxsw_cmd_mbox_config_profile_swid_config_mask_set(mbox, index, mask);
 }
 
-#define MLXSW_RESOURCES_TABLE_END_ID 0xffff
-#define MLXSW_MAX_SPAN_ID 0x2420
-#define MLXSW_MAX_LAG_ID 0x2520
-#define MLXSW_MAX_PORTS_IN_LAG_ID 0x2521
-#define MLXSW_KVD_SIZE_ID 0x1001
-#define MLXSW_KVD_SINGLE_MIN_SIZE_ID 0x1002
-#define MLXSW_KVD_DOUBLE_MIN_SIZE_ID 0x1003
-#define MLXSW_MAX_VIRTUAL_ROUTERS_ID 0x2C01
-#define MLXSW_MAX_SYSTEM_PORT_ID 0x2502
-#define MLXSW_MAX_VLAN_GROUPS_ID 0x2906
-#define MLXSW_MAX_REGIONS_ID 0x2901
-#define MLXSW_MAX_RIF_ID 0x2C02
-#define MLXSW_RESOURCES_QUERY_MAX_QUERIES 100
-#define MLXSW_RESOURCES_PER_QUERY 32
-
-static void mlxsw_pci_resources_query_parse(int id, u64 val,
-					    struct mlxsw_resources *resources)
-{
-	switch (id) {
-	case MLXSW_MAX_SPAN_ID:
-		resources->max_span = val;
-		resources->max_span_valid = 1;
-		break;
-	case MLXSW_MAX_LAG_ID:
-		resources->max_lag = val;
-		resources->max_lag_valid = 1;
-		break;
-	case MLXSW_MAX_PORTS_IN_LAG_ID:
-		resources->max_ports_in_lag = val;
-		resources->max_ports_in_lag_valid = 1;
-		break;
-	case MLXSW_KVD_SIZE_ID:
-		resources->kvd_size = val;
-		resources->kvd_size_valid = 1;
-		break;
-	case MLXSW_KVD_SINGLE_MIN_SIZE_ID:
-		resources->kvd_single_min_size = val;
-		resources->kvd_single_min_size_valid = 1;
-		break;
-	case MLXSW_KVD_DOUBLE_MIN_SIZE_ID:
-		resources->kvd_double_min_size = val;
-		resources->kvd_double_min_size_valid = 1;
-		break;
-	case MLXSW_MAX_VIRTUAL_ROUTERS_ID:
-		resources->max_virtual_routers = val;
-		resources->max_virtual_routers_valid = 1;
-		break;
-	case MLXSW_MAX_SYSTEM_PORT_ID:
-		resources->max_system_ports = val;
-		resources->max_system_ports_valid = 1;
-		break;
-	case MLXSW_MAX_VLAN_GROUPS_ID:
-		resources->max_vlan_groups = val;
-		resources->max_vlan_groups_valid = 1;
-		break;
-	case MLXSW_MAX_REGIONS_ID:
-		resources->max_regions = val;
-		resources->max_regions_valid = 1;
-		break;
-	case MLXSW_MAX_RIF_ID:
-		resources->max_rif = val;
-		resources->max_rif_valid = 1;
-		break;
-	default:
-		break;
-	}
-}
-
 static int mlxsw_pci_resources_query(struct mlxsw_pci *mlxsw_pci, char *mbox,
-				     struct mlxsw_resources *resources,
+				     struct mlxsw_res *res,
 				     u8 query_enabled)
 {
 	int index, i;
@@ -1237,19 +1154,20 @@ static int mlxsw_pci_resources_query(struct mlxsw_pci *mlxsw_pci, char *mbox,
 
 	mlxsw_cmd_mbox_zero(mbox);
 
-	for (index = 0; index < MLXSW_RESOURCES_QUERY_MAX_QUERIES; index++) {
+	for (index = 0; index < MLXSW_CMD_QUERY_RESOURCES_MAX_QUERIES;
+	     index++) {
 		err = mlxsw_cmd_query_resources(mlxsw_pci->core, mbox, index);
 		if (err)
 			return err;
 
-		for (i = 0; i < MLXSW_RESOURCES_PER_QUERY; i++) {
+		for (i = 0; i < MLXSW_CMD_QUERY_RESOURCES_PER_QUERY; i++) {
 			id = mlxsw_cmd_mbox_query_resource_id_get(mbox, i);
 			data = mlxsw_cmd_mbox_query_resource_data_get(mbox, i);
 
-			if (id == MLXSW_RESOURCES_TABLE_END_ID)
+			if (id == MLXSW_CMD_QUERY_RESOURCES_TABLE_END_ID)
 				return 0;
 
-			mlxsw_pci_resources_query_parse(id, data, resources);
+			mlxsw_res_parse(res, id, data);
 		}
 	}
 
@@ -1259,13 +1177,14 @@ static int mlxsw_pci_resources_query(struct mlxsw_pci *mlxsw_pci, char *mbox,
 	return -EIO;
 }
 
-static int mlxsw_pci_profile_get_kvd_sizes(const struct mlxsw_config_profile *profile,
-					   struct mlxsw_resources *resources)
+static int
+mlxsw_pci_profile_get_kvd_sizes(const struct mlxsw_config_profile *profile,
+				struct mlxsw_res *res)
 {
-	u32 singles_size, doubles_size, linear_size;
+	u32 single_size, double_size, linear_size;
 
-	if (!resources->kvd_single_min_size_valid ||
-	    !resources->kvd_double_min_size_valid ||
+	if (!MLXSW_RES_VALID(res, KVD_SINGLE_MIN_SIZE) ||
+	    !MLXSW_RES_VALID(res, KVD_DOUBLE_MIN_SIZE) ||
 	    !profile->used_kvd_split_data)
 		return -EIO;
 
@@ -1277,31 +1196,31 @@ static int mlxsw_pci_profile_get_kvd_sizes(const struct mlxsw_config_profile *pr
 	 * Both sizes must be a multiplications of the
 	 * granularity from the profile.
 	 */
-	doubles_size = (resources->kvd_size - linear_size);
-	doubles_size *= profile->kvd_hash_double_parts;
-	doubles_size /= (profile->kvd_hash_double_parts +
-			 profile->kvd_hash_single_parts);
-	doubles_size /= profile->kvd_hash_granularity;
-	doubles_size *= profile->kvd_hash_granularity;
-	singles_size = resources->kvd_size - doubles_size -
-		       linear_size;
+	double_size = MLXSW_RES_GET(res, KVD_SIZE) - linear_size;
+	double_size *= profile->kvd_hash_double_parts;
+	double_size /= profile->kvd_hash_double_parts +
+		       profile->kvd_hash_single_parts;
+	double_size /= profile->kvd_hash_granularity;
+	double_size *= profile->kvd_hash_granularity;
+	single_size = MLXSW_RES_GET(res, KVD_SIZE) - double_size -
+		      linear_size;
 
 	/* Check results are legal. */
-	if (singles_size < resources->kvd_single_min_size ||
-	    doubles_size < resources->kvd_double_min_size ||
-	    resources->kvd_size < linear_size)
+	if (single_size < MLXSW_RES_GET(res, KVD_SINGLE_MIN_SIZE) ||
+	    double_size < MLXSW_RES_GET(res, KVD_DOUBLE_MIN_SIZE) ||
+	    MLXSW_RES_GET(res, KVD_SIZE) < linear_size)
 		return -EIO;
 
-	resources->kvd_single_size = singles_size;
-	resources->kvd_double_size = doubles_size;
-	resources->kvd_linear_size = linear_size;
+	MLXSW_RES_SET(res, KVD_SINGLE_SIZE, single_size);
+	MLXSW_RES_SET(res, KVD_DOUBLE_SIZE, double_size);
+	MLXSW_RES_SET(res, KVD_LINEAR_SIZE, linear_size);
 
 	return 0;
 }
 
 static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox,
 				    const struct mlxsw_config_profile *profile,
-				    struct mlxsw_resources *resources)
+				    struct mlxsw_res *res)
 {
 	int i;
 	int err;
@@ -1390,22 +1309,22 @@ static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox,
 		mlxsw_cmd_mbox_config_profile_adaptive_routing_group_cap_set(
 			mbox, profile->adaptive_routing_group_cap);
 	}
-	if (resources->kvd_size_valid) {
-		err = mlxsw_pci_profile_get_kvd_sizes(profile, resources);
+	if (MLXSW_RES_VALID(res, KVD_SIZE)) {
+		err = mlxsw_pci_profile_get_kvd_sizes(profile, res);
 		if (err)
 			return err;
 
 		mlxsw_cmd_mbox_config_profile_set_kvd_linear_size_set(mbox, 1);
 		mlxsw_cmd_mbox_config_profile_kvd_linear_size_set(mbox,
-						resources->kvd_linear_size);
+					MLXSW_RES_GET(res, KVD_LINEAR_SIZE));
 		mlxsw_cmd_mbox_config_profile_set_kvd_hash_single_size_set(mbox,
 									   1);
 		mlxsw_cmd_mbox_config_profile_kvd_hash_single_size_set(mbox,
-						resources->kvd_single_size);
+					MLXSW_RES_GET(res, KVD_SINGLE_SIZE));
 		mlxsw_cmd_mbox_config_profile_set_kvd_hash_double_size_set(
 								mbox, 1);
 		mlxsw_cmd_mbox_config_profile_kvd_hash_double_size_set(mbox,
-						resources->kvd_double_size);
+					MLXSW_RES_GET(res, KVD_DOUBLE_SIZE));
 	}
 
 	for (i = 0; i < MLXSW_CONFIG_PROFILE_SWID_COUNT; i++)
@@ -1543,7 +1462,7 @@ static void mlxsw_pci_mbox_free(struct mlxsw_pci *mlxsw_pci,
 
 static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
 			  const struct mlxsw_config_profile *profile,
-			  struct mlxsw_resources *resources)
+			  struct mlxsw_res *res)
 {
 	struct mlxsw_pci *mlxsw_pci = bus_priv;
 	struct pci_dev *pdev = mlxsw_pci->pdev;
@@ -1602,12 +1521,12 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
 	if (err)
 		goto err_boardinfo;
 
-	err = mlxsw_pci_resources_query(mlxsw_pci, mbox, resources,
+	err = mlxsw_pci_resources_query(mlxsw_pci, mbox, res,
 					profile->resource_query_enable);
 	if (err)
 		goto err_query_resources;
 
-	err = mlxsw_pci_config_profile(mlxsw_pci, mbox, profile, resources);
+	err = mlxsw_pci_config_profile(mlxsw_pci, mbox, profile, res);
 	if (err)
 		goto err_config_profile;
 
@@ -1617,7 +1536,7 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core,
 
 	err = request_irq(mlxsw_pci->msix_entry.vector,
 			  mlxsw_pci_eq_irq_handler, 0,
-			  mlxsw_pci_driver_name, mlxsw_pci);
+			  mlxsw_pci->bus_info.device_kind, mlxsw_pci);
 	if (err) {
 		dev_err(&pdev->dev, "IRQ request failed\n");
 		goto err_request_eq_irq;
@@ -1836,6 +1755,7 @@ static const struct mlxsw_bus mlxsw_pci_bus = {
 	.skb_transmit_busy	= mlxsw_pci_skb_transmit_busy,
 	.skb_transmit		= mlxsw_pci_skb_transmit,
 	.cmd_exec		= mlxsw_pci_cmd_exec,
+	.features		= MLXSW_BUS_F_TXRX,
 };
 
 static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci,
@@ -1863,6 +1783,7 @@ static int mlxsw_pci_sw_reset(struct mlxsw_pci *mlxsw_pci,
 
 static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
+	const char *driver_name = pdev->driver->name;
 	struct mlxsw_pci *mlxsw_pci;
 	int err;
 
@@ -1876,7 +1797,7 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto err_pci_enable_device;
 	}
 
-	err = pci_request_regions(pdev, mlxsw_pci_driver_name);
+	err = pci_request_regions(pdev, driver_name);
 	if (err) {
 		dev_err(&pdev->dev, "pci_request_regions failed\n");
 		goto err_pci_request_regions;
@@ -1927,7 +1848,7 @@ static int mlxsw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto err_msix_init;
 	}
 
-	mlxsw_pci->bus_info.device_kind = mlxsw_pci_device_kind_get(id);
+	mlxsw_pci->bus_info.device_kind = driver_name;
 	mlxsw_pci->bus_info.device_name = pci_name(mlxsw_pci->pdev);
 	mlxsw_pci->bus_info.dev = &pdev->dev;
 
@@ -1979,33 +1900,30 @@ static void mlxsw_pci_remove(struct pci_dev *pdev)
 	kfree(mlxsw_pci);
 }
 
-static struct pci_driver mlxsw_pci_driver = {
-	.name		= mlxsw_pci_driver_name,
-	.id_table	= mlxsw_pci_id_table,
-	.probe		= mlxsw_pci_probe,
-	.remove		= mlxsw_pci_remove,
-};
+int mlxsw_pci_driver_register(struct pci_driver *pci_driver)
+{
+	pci_driver->probe = mlxsw_pci_probe;
+	pci_driver->remove = mlxsw_pci_remove;
+	return pci_register_driver(pci_driver);
+}
+EXPORT_SYMBOL(mlxsw_pci_driver_register);
 
-static int __init mlxsw_pci_module_init(void)
+void mlxsw_pci_driver_unregister(struct pci_driver *pci_driver)
 {
-	int err;
+	pci_unregister_driver(pci_driver);
+}
+EXPORT_SYMBOL(mlxsw_pci_driver_unregister);
 
+static int __init mlxsw_pci_module_init(void)
+{
 	mlxsw_pci_dbg_root = debugfs_create_dir(mlxsw_pci_driver_name, NULL);
 	if (!mlxsw_pci_dbg_root)
 		return -ENOMEM;
-	err = pci_register_driver(&mlxsw_pci_driver);
-	if (err)
-		goto err_register_driver;
 	return 0;
-
-err_register_driver:
-	debugfs_remove_recursive(mlxsw_pci_dbg_root);
-	return err;
 }
 
 static void __exit mlxsw_pci_module_exit(void)
 {
-	pci_unregister_driver(&mlxsw_pci_driver);
 	debugfs_remove_recursive(mlxsw_pci_dbg_root);
 }
 
@@ -2015,4 +1933,3 @@ module_exit(mlxsw_pci_module_exit);
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>");
 MODULE_DESCRIPTION("Mellanox switch PCI interface driver");
-MODULE_DEVICE_TABLE(pci, mlxsw_pci_id_table);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.h b/drivers/net/ethernet/mellanox/mlxsw/pci.h
index d942a3e6fa41..d65582325cd5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/pci.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci.h
@@ -1,7 +1,7 @@
 /*
  * drivers/net/ethernet/mellanox/mlxsw/pci.h
- * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
- * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com>
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -35,197 +35,31 @@
 #ifndef _MLXSW_PCI_H
 #define _MLXSW_PCI_H
 
-#include <linux/bitops.h>
+#include <linux/pci.h>
 
-#include "item.h"
+#define PCI_DEVICE_ID_MELLANOX_SWITCHX2		0xc738
+#define PCI_DEVICE_ID_MELLANOX_SPECTRUM		0xcb84
+#define PCI_DEVICE_ID_MELLANOX_SWITCHIB		0xcb20
+#define PCI_DEVICE_ID_MELLANOX_SWITCHIB2	0xcf08
 
-#define PCI_DEVICE_ID_MELLANOX_SWITCHX2	0xc738
-#define PCI_DEVICE_ID_MELLANOX_SPECTRUM	0xcb84
-#define MLXSW_PCI_BAR0_SIZE		(1024 * 1024) /* 1MB */
-#define MLXSW_PCI_PAGE_SIZE		4096
+#if IS_ENABLED(CONFIG_MLXSW_PCI)
 
-#define MLXSW_PCI_CIR_BASE			0x71000
-#define MLXSW_PCI_CIR_IN_PARAM_HI		MLXSW_PCI_CIR_BASE
-#define MLXSW_PCI_CIR_IN_PARAM_LO		(MLXSW_PCI_CIR_BASE + 0x04)
-#define MLXSW_PCI_CIR_IN_MODIFIER		(MLXSW_PCI_CIR_BASE + 0x08)
-#define MLXSW_PCI_CIR_OUT_PARAM_HI		(MLXSW_PCI_CIR_BASE + 0x0C)
-#define MLXSW_PCI_CIR_OUT_PARAM_LO		(MLXSW_PCI_CIR_BASE + 0x10)
-#define MLXSW_PCI_CIR_TOKEN			(MLXSW_PCI_CIR_BASE + 0x14)
-#define MLXSW_PCI_CIR_CTRL			(MLXSW_PCI_CIR_BASE + 0x18)
-#define MLXSW_PCI_CIR_CTRL_GO_BIT		BIT(23)
-#define MLXSW_PCI_CIR_CTRL_EVREQ_BIT		BIT(22)
-#define MLXSW_PCI_CIR_CTRL_OPCODE_MOD_SHIFT	12
-#define MLXSW_PCI_CIR_CTRL_STATUS_SHIFT		24
-#define MLXSW_PCI_CIR_TIMEOUT_MSECS		1000
+int mlxsw_pci_driver_register(struct pci_driver *pci_driver);
+void mlxsw_pci_driver_unregister(struct pci_driver *pci_driver);
 
-#define MLXSW_PCI_SW_RESET			0xF0010
-#define MLXSW_PCI_SW_RESET_RST_BIT		BIT(0)
-#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS	5000
-#define MLXSW_PCI_FW_READY			0xA1844
-#define MLXSW_PCI_FW_READY_MASK			0xFF
-#define MLXSW_PCI_FW_READY_MAGIC		0x5E
+#else
 
-#define MLXSW_PCI_DOORBELL_SDQ_OFFSET		0x000
-#define MLXSW_PCI_DOORBELL_RDQ_OFFSET		0x200
-#define MLXSW_PCI_DOORBELL_CQ_OFFSET		0x400
-#define MLXSW_PCI_DOORBELL_EQ_OFFSET		0x600
-#define MLXSW_PCI_DOORBELL_ARM_CQ_OFFSET	0x800
-#define MLXSW_PCI_DOORBELL_ARM_EQ_OFFSET	0xA00
+static inline int
+mlxsw_pci_driver_register(struct pci_driver *pci_driver)
+{
+	return 0;
+}
 
-#define MLXSW_PCI_DOORBELL(offset, type_offset, num)	\
-	((offset) + (type_offset) + (num) * 4)
+static inline void
+mlxsw_pci_driver_unregister(struct pci_driver *pci_driver)
+{
+}
 
-#define MLXSW_PCI_CQS_MAX	96
-#define MLXSW_PCI_EQS_COUNT	2
-#define MLXSW_PCI_EQ_ASYNC_NUM	0
-#define MLXSW_PCI_EQ_COMP_NUM	1
-
-#define MLXSW_PCI_AQ_PAGES	8
-#define MLXSW_PCI_AQ_SIZE	(MLXSW_PCI_PAGE_SIZE * MLXSW_PCI_AQ_PAGES)
-#define MLXSW_PCI_WQE_SIZE	32 /* 32 bytes per element */
-#define MLXSW_PCI_CQE_SIZE	16 /* 16 bytes per element */
-#define MLXSW_PCI_EQE_SIZE	16 /* 16 bytes per element */
-#define MLXSW_PCI_WQE_COUNT	(MLXSW_PCI_AQ_SIZE / MLXSW_PCI_WQE_SIZE)
-#define MLXSW_PCI_CQE_COUNT	(MLXSW_PCI_AQ_SIZE / MLXSW_PCI_CQE_SIZE)
-#define MLXSW_PCI_EQE_COUNT	(MLXSW_PCI_AQ_SIZE / MLXSW_PCI_EQE_SIZE)
-#define MLXSW_PCI_EQE_UPDATE_COUNT	0x80
-
-#define MLXSW_PCI_WQE_SG_ENTRIES	3
-#define MLXSW_PCI_WQE_TYPE_ETHERNET	0xA
-
-/* pci_wqe_c
- * If set it indicates that a completion should be reported upon
- * execution of this descriptor.
- */
-MLXSW_ITEM32(pci, wqe, c, 0x00, 31, 1);
-
-/* pci_wqe_lp
- * Local Processing, set if packet should be processed by the local
- * switch hardware:
- * For Ethernet EMAD (Direct Route and non Direct Route) -
- * must be set if packet destination is local device
- * For InfiniBand CTL - must be set if packet destination is local device
- * Otherwise it must be clear
- * Local Process packets must not exceed the size of 2K (including payload
- * and headers).
- */
-MLXSW_ITEM32(pci, wqe, lp, 0x00, 30, 1);
-
-/* pci_wqe_type
- * Packet type.
- */
-MLXSW_ITEM32(pci, wqe, type, 0x00, 23, 4);
-
-/* pci_wqe_byte_count
- * Size of i-th scatter/gather entry, 0 if entry is unused.
- */
-MLXSW_ITEM16_INDEXED(pci, wqe, byte_count, 0x02, 0, 14, 0x02, 0x00, false);
-
-/* pci_wqe_address
- * Physical address of i-th scatter/gather entry.
- * Gather Entries must be 2Byte aligned.
- */
-MLXSW_ITEM64_INDEXED(pci, wqe, address, 0x08, 0, 64, 0x8, 0x0, false);
-
-/* pci_cqe_lag
- * Packet arrives from a port which is a LAG
- */
-MLXSW_ITEM32(pci, cqe, lag, 0x00, 23, 1);
-
-/* pci_cqe_system_port/lag_id
- * When lag=0: System port on which the packet was received
- * When lag=1:
- * bits [15:4] LAG ID on which the packet was received
- * bits [3:0] sub_port on which the packet was received
- */
-MLXSW_ITEM32(pci, cqe, system_port, 0x00, 0, 16);
-MLXSW_ITEM32(pci, cqe, lag_id, 0x00, 4, 12);
-MLXSW_ITEM32(pci, cqe, lag_port_index, 0x00, 0, 4);
-
-/* pci_cqe_wqe_counter
- * WQE count of the WQEs completed on the associated dqn
- */
-MLXSW_ITEM32(pci, cqe, wqe_counter, 0x04, 16, 16);
-
-/* pci_cqe_byte_count
- * Byte count of received packets including additional two
- * Reserved Bytes that are append to the end of the frame.
- * Reserved for Send CQE.
- */
-MLXSW_ITEM32(pci, cqe, byte_count, 0x04, 0, 14);
-
-/* pci_cqe_trap_id
- * Trap ID that captured the packet.
- */
-MLXSW_ITEM32(pci, cqe, trap_id, 0x08, 0, 8);
-
-/* pci_cqe_crc
- * Length include CRC. Indicates the length field includes
- * the packet's CRC.
- */
-MLXSW_ITEM32(pci, cqe, crc, 0x0C, 8, 1);
-
-/* pci_cqe_e
- * CQE with Error.
- */
-MLXSW_ITEM32(pci, cqe, e, 0x0C, 7, 1);
-
-/* pci_cqe_sr
- * 1 - Send Queue
- * 0 - Receive Queue
- */
-MLXSW_ITEM32(pci, cqe, sr, 0x0C, 6, 1);
-
-/* pci_cqe_dqn
- * Descriptor Queue (DQ) Number.
- */
-MLXSW_ITEM32(pci, cqe, dqn, 0x0C, 1, 5);
-
-/* pci_cqe_owner
- * Ownership bit.
- */
-MLXSW_ITEM32(pci, cqe, owner, 0x0C, 0, 1);
-
-/* pci_eqe_event_type
- * Event type.
- */
-MLXSW_ITEM32(pci, eqe, event_type, 0x0C, 24, 8);
-#define MLXSW_PCI_EQE_EVENT_TYPE_COMP	0x00
-#define MLXSW_PCI_EQE_EVENT_TYPE_CMD	0x0A
-
-/* pci_eqe_event_sub_type
- * Event type.
- */
-MLXSW_ITEM32(pci, eqe, event_sub_type, 0x0C, 16, 8);
-
-/* pci_eqe_cqn
- * Completion Queue that triggeret this EQE.
- */
-MLXSW_ITEM32(pci, eqe, cqn, 0x0C, 8, 7);
-
-/* pci_eqe_owner
- * Ownership bit.
- */
-MLXSW_ITEM32(pci, eqe, owner, 0x0C, 0, 1);
-
-/* pci_eqe_cmd_token
- * Command completion event - token
- */
-MLXSW_ITEM32(pci, eqe, cmd_token, 0x08, 16, 16);
-
-/* pci_eqe_cmd_status
- * Command completion event - status
- */
-MLXSW_ITEM32(pci, eqe, cmd_status, 0x08, 0, 8);
-
-/* pci_eqe_cmd_out_param_h
- * Command completion event - output parameter - higher part
- */
-MLXSW_ITEM32(pci, eqe, cmd_out_param_h, 0x0C, 0, 32);
-
-/* pci_eqe_cmd_out_param_l
- * Command completion event - output parameter - lower part
- */
-MLXSW_ITEM32(pci, eqe, cmd_out_param_l, 0x10, 0, 32);
+#endif
 
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
new file mode 100644
index 000000000000..d147ddd97997
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
@@ -0,0 +1,229 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/pci_hw.h
+ * Copyright (c) 2015-2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2015-2016 Jiri Pirko <jiri@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MLXSW_PCI_HW_H
+#define _MLXSW_PCI_HW_H
+
+#include <linux/bitops.h>
+
+#include "item.h"
+
+#define MLXSW_PCI_BAR0_SIZE		(1024 * 1024) /* 1MB */
+#define MLXSW_PCI_PAGE_SIZE		4096
+
+#define MLXSW_PCI_CIR_BASE			0x71000
+#define MLXSW_PCI_CIR_IN_PARAM_HI		MLXSW_PCI_CIR_BASE
+#define MLXSW_PCI_CIR_IN_PARAM_LO		(MLXSW_PCI_CIR_BASE + 0x04)
+#define MLXSW_PCI_CIR_IN_MODIFIER		(MLXSW_PCI_CIR_BASE + 0x08)
+#define MLXSW_PCI_CIR_OUT_PARAM_HI		(MLXSW_PCI_CIR_BASE + 0x0C)
+#define MLXSW_PCI_CIR_OUT_PARAM_LO		(MLXSW_PCI_CIR_BASE + 0x10)
+#define MLXSW_PCI_CIR_TOKEN			(MLXSW_PCI_CIR_BASE + 0x14)
+#define MLXSW_PCI_CIR_CTRL			(MLXSW_PCI_CIR_BASE + 0x18)
+#define MLXSW_PCI_CIR_CTRL_GO_BIT		BIT(23)
+#define MLXSW_PCI_CIR_CTRL_EVREQ_BIT		BIT(22)
+#define MLXSW_PCI_CIR_CTRL_OPCODE_MOD_SHIFT	12
+#define MLXSW_PCI_CIR_CTRL_STATUS_SHIFT		24
+#define MLXSW_PCI_CIR_TIMEOUT_MSECS		1000
+
+#define MLXSW_PCI_SW_RESET			0xF0010
+#define MLXSW_PCI_SW_RESET_RST_BIT		BIT(0)
+#define MLXSW_PCI_SW_RESET_TIMEOUT_MSECS	5000
+#define MLXSW_PCI_FW_READY			0xA1844
+#define MLXSW_PCI_FW_READY_MASK			0xFFFF
+#define MLXSW_PCI_FW_READY_MAGIC		0x5E
+
+#define MLXSW_PCI_DOORBELL_SDQ_OFFSET		0x000
+#define MLXSW_PCI_DOORBELL_RDQ_OFFSET		0x200
+#define MLXSW_PCI_DOORBELL_CQ_OFFSET		0x400
+#define MLXSW_PCI_DOORBELL_EQ_OFFSET		0x600
+#define MLXSW_PCI_DOORBELL_ARM_CQ_OFFSET	0x800
+#define MLXSW_PCI_DOORBELL_ARM_EQ_OFFSET	0xA00
+
+#define MLXSW_PCI_DOORBELL(offset, type_offset, num)	\
+	((offset) + (type_offset) + (num) * 4)
+
+#define MLXSW_PCI_CQS_MAX	96
+#define MLXSW_PCI_EQS_COUNT	2
+#define MLXSW_PCI_EQ_ASYNC_NUM	0
+#define MLXSW_PCI_EQ_COMP_NUM	1
+
+#define MLXSW_PCI_AQ_PAGES	8
+#define MLXSW_PCI_AQ_SIZE	(MLXSW_PCI_PAGE_SIZE * MLXSW_PCI_AQ_PAGES)
+#define MLXSW_PCI_WQE_SIZE	32 /* 32 bytes per element */
+#define MLXSW_PCI_CQE_SIZE	16 /* 16 bytes per element */
+#define MLXSW_PCI_EQE_SIZE	16 /* 16 bytes per element */
+#define MLXSW_PCI_WQE_COUNT	(MLXSW_PCI_AQ_SIZE / MLXSW_PCI_WQE_SIZE)
+#define MLXSW_PCI_CQE_COUNT	(MLXSW_PCI_AQ_SIZE / MLXSW_PCI_CQE_SIZE)
+#define MLXSW_PCI_EQE_COUNT	(MLXSW_PCI_AQ_SIZE / MLXSW_PCI_EQE_SIZE)
+#define MLXSW_PCI_EQE_UPDATE_COUNT	0x80
+
+#define MLXSW_PCI_WQE_SG_ENTRIES	3
+#define MLXSW_PCI_WQE_TYPE_ETHERNET	0xA
+
+/* pci_wqe_c
+ * If set it indicates that a completion should be reported upon
+ * execution of this descriptor.
+ */
+MLXSW_ITEM32(pci, wqe, c, 0x00, 31, 1);
+
+/* pci_wqe_lp
+ * Local Processing, set if packet should be processed by the local
+ * switch hardware:
+ * For Ethernet EMAD (Direct Route and non Direct Route) -
+ * must be set if packet destination is local device
+ * For InfiniBand CTL - must be set if packet destination is local device
+ * Otherwise it must be clear
+ * Local Process packets must not exceed the size of 2K (including payload
+ * and headers).
+ */
+MLXSW_ITEM32(pci, wqe, lp, 0x00, 30, 1);
+
+/* pci_wqe_type
+ * Packet type.
+ */
+MLXSW_ITEM32(pci, wqe, type, 0x00, 23, 4);
+
+/* pci_wqe_byte_count
+ * Size of i-th scatter/gather entry, 0 if entry is unused.
+ */
+MLXSW_ITEM16_INDEXED(pci, wqe, byte_count, 0x02, 0, 14, 0x02, 0x00, false);
+
+/* pci_wqe_address
+ * Physical address of i-th scatter/gather entry.
+ * Gather Entries must be 2Byte aligned.
+ */
+MLXSW_ITEM64_INDEXED(pci, wqe, address, 0x08, 0, 64, 0x8, 0x0, false);
+
+/* pci_cqe_lag
+ * Packet arrives from a port which is a LAG
+ */
+MLXSW_ITEM32(pci, cqe, lag, 0x00, 23, 1);
+
+/* pci_cqe_system_port/lag_id
+ * When lag=0: System port on which the packet was received
+ * When lag=1:
+ * bits [15:4] LAG ID on which the packet was received
+ * bits [3:0] sub_port on which the packet was received
+ */
+MLXSW_ITEM32(pci, cqe, system_port, 0x00, 0, 16);
+MLXSW_ITEM32(pci, cqe, lag_id, 0x00, 4, 12);
+MLXSW_ITEM32(pci, cqe, lag_port_index, 0x00, 0, 4);
+
+/* pci_cqe_wqe_counter
+ * WQE count of the WQEs completed on the associated dqn
+ */
+MLXSW_ITEM32(pci, cqe, wqe_counter, 0x04, 16, 16);
+
+/* pci_cqe_byte_count
+ * Byte count of received packets including additional two
+ * Reserved Bytes that are append to the end of the frame.
+ * Reserved for Send CQE.
+ */
+MLXSW_ITEM32(pci, cqe, byte_count, 0x04, 0, 14);
+
+/* pci_cqe_trap_id
+ * Trap ID that captured the packet.
+ */
+MLXSW_ITEM32(pci, cqe, trap_id, 0x08, 0, 8);
+
+/* pci_cqe_crc
+ * Length include CRC. Indicates the length field includes
+ * the packet's CRC.
+ */
+MLXSW_ITEM32(pci, cqe, crc, 0x0C, 8, 1);
+
+/* pci_cqe_e
+ * CQE with Error.
+ */
+MLXSW_ITEM32(pci, cqe, e, 0x0C, 7, 1);
+
+/* pci_cqe_sr
+ * 1 - Send Queue
+ * 0 - Receive Queue
+ */
+MLXSW_ITEM32(pci, cqe, sr, 0x0C, 6, 1);
+
+/* pci_cqe_dqn
+ * Descriptor Queue (DQ) Number.
+ */
+MLXSW_ITEM32(pci, cqe, dqn, 0x0C, 1, 5);
+
+/* pci_cqe_owner
+ * Ownership bit.
+ */
+MLXSW_ITEM32(pci, cqe, owner, 0x0C, 0, 1);
+
+/* pci_eqe_event_type
+ * Event type.
+ */
+MLXSW_ITEM32(pci, eqe, event_type, 0x0C, 24, 8);
+#define MLXSW_PCI_EQE_EVENT_TYPE_COMP	0x00
+#define MLXSW_PCI_EQE_EVENT_TYPE_CMD	0x0A
+
+/* pci_eqe_event_sub_type
+ * Event type.
+ */
+MLXSW_ITEM32(pci, eqe, event_sub_type, 0x0C, 16, 8);
+
+/* pci_eqe_cqn
+ * Completion Queue that triggeret this EQE.
+ */
+MLXSW_ITEM32(pci, eqe, cqn, 0x0C, 8, 7);
+
+/* pci_eqe_owner
+ * Ownership bit.
+ */
+MLXSW_ITEM32(pci, eqe, owner, 0x0C, 0, 1);
+
+/* pci_eqe_cmd_token
+ * Command completion event - token
+ */
+MLXSW_ITEM32(pci, eqe, cmd_token, 0x08, 16, 16);
+
+/* pci_eqe_cmd_status
+ * Command completion event - status
+ */
+MLXSW_ITEM32(pci, eqe, cmd_status, 0x08, 0, 8);
+
+/* pci_eqe_cmd_out_param_h
+ * Command completion event - output parameter - higher part
+ */
+MLXSW_ITEM32(pci, eqe, cmd_out_param_h, 0x0C, 0, 32);
+
+/* pci_eqe_cmd_out_param_l
+ * Command completion event - output parameter - lower part
+ */
+MLXSW_ITEM32(pci, eqe, cmd_out_param_l, 0x10, 0, 32);
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/port.h b/drivers/net/ethernet/mellanox/mlxsw/port.h
index af371a82c35b..3d42146473b3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/port.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/port.h
@@ -44,6 +44,7 @@
 
 #define MLXSW_PORT_SWID_DISABLED_PORT	255
 #define MLXSW_PORT_SWID_ALL_SWIDS	254
+#define MLXSW_PORT_SWID_TYPE_IB		1
 #define MLXSW_PORT_SWID_TYPE_ETH	2
 
 #define MLXSW_PORT_MID			0xd000
@@ -51,6 +52,9 @@
 #define MLXSW_PORT_MAX_PHY_PORTS	0x40
 #define MLXSW_PORT_MAX_PORTS		(MLXSW_PORT_MAX_PHY_PORTS + 1)
 
+#define MLXSW_PORT_MAX_IB_PHY_PORTS	36
+#define MLXSW_PORT_MAX_IB_PORTS		(MLXSW_PORT_MAX_IB_PHY_PORTS + 1)
+
 #define MLXSW_PORT_DEVID_BITS_OFFSET	10
 #define MLXSW_PORT_PHY_BITS_OFFSET	4
 #define MLXSW_PORT_PHY_BITS_MASK	(MLXSW_PORT_MAX_PHY_PORTS - 1)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 6460c7256f2b..1357fe04391b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -48,8 +48,16 @@
 struct mlxsw_reg_info {
 	u16 id;
 	u16 len; /* In u8 */
+	const char *name;
 };
 
+#define MLXSW_REG_DEFINE(_name, _id, _len)				\
+static const struct mlxsw_reg_info mlxsw_reg_##_name = {		\
+	.id = _id,							\
+	.len = _len,							\
+	.name = #_name,							\
+}
+
 #define MLXSW_REG(type) (&mlxsw_reg_##type)
 #define MLXSW_REG_LEN(type) MLXSW_REG(type)->len
 #define MLXSW_REG_ZERO(type, payload) memset(payload, 0, MLXSW_REG(type)->len)
@@ -61,10 +69,7 @@ struct mlxsw_reg_info {
 #define MLXSW_REG_SGCR_ID 0x2000
 #define MLXSW_REG_SGCR_LEN 0x10
 
-static const struct mlxsw_reg_info mlxsw_reg_sgcr = {
-	.id = MLXSW_REG_SGCR_ID,
-	.len = MLXSW_REG_SGCR_LEN,
-};
+MLXSW_REG_DEFINE(sgcr, MLXSW_REG_SGCR_ID, MLXSW_REG_SGCR_LEN);
 
 /* reg_sgcr_llb
  * Link Local Broadcast (Default=0)
@@ -87,10 +92,7 @@ static inline void mlxsw_reg_sgcr_pack(char *payload, bool llb)
 #define MLXSW_REG_SPAD_ID 0x2002
 #define MLXSW_REG_SPAD_LEN 0x10
 
-static const struct mlxsw_reg_info mlxsw_reg_spad = {
-	.id = MLXSW_REG_SPAD_ID,
-	.len = MLXSW_REG_SPAD_LEN,
-};
+MLXSW_REG_DEFINE(spad, MLXSW_REG_SPAD_ID, MLXSW_REG_SPAD_LEN);
 
 /* reg_spad_base_mac
  * Base MAC address for the switch partitions.
@@ -109,10 +111,7 @@ MLXSW_ITEM_BUF(reg, spad, base_mac, 0x02, 6);
 #define MLXSW_REG_SMID_ID 0x2007
 #define MLXSW_REG_SMID_LEN 0x240
 
-static const struct mlxsw_reg_info mlxsw_reg_smid = {
-	.id = MLXSW_REG_SMID_ID,
-	.len = MLXSW_REG_SMID_LEN,
-};
+MLXSW_REG_DEFINE(smid, MLXSW_REG_SMID_ID, MLXSW_REG_SMID_LEN);
 
 /* reg_smid_swid
  * Switch partition ID.
@@ -156,10 +155,7 @@ static inline void mlxsw_reg_smid_pack(char *payload, u16 mid,
 #define MLXSW_REG_SSPR_ID 0x2008
 #define MLXSW_REG_SSPR_LEN 0x8
 
-static const struct mlxsw_reg_info mlxsw_reg_sspr = {
-	.id = MLXSW_REG_SSPR_ID,
-	.len = MLXSW_REG_SSPR_LEN,
-};
+MLXSW_REG_DEFINE(sspr, MLXSW_REG_SSPR_ID, MLXSW_REG_SSPR_LEN);
 
 /* reg_sspr_m
  * Master - if set, then the record describes the master system port.
@@ -215,10 +211,7 @@ static inline void mlxsw_reg_sspr_pack(char *payload, u8 local_port)
 #define MLXSW_REG_SFDAT_ID 0x2009
 #define MLXSW_REG_SFDAT_LEN 0x8
 
-static const struct mlxsw_reg_info mlxsw_reg_sfdat = {
-	.id = MLXSW_REG_SFDAT_ID,
-	.len = MLXSW_REG_SFDAT_LEN,
-};
+MLXSW_REG_DEFINE(sfdat, MLXSW_REG_SFDAT_ID, MLXSW_REG_SFDAT_LEN);
 
 /* reg_sfdat_swid
  * Switch partition ID.
@@ -256,10 +249,7 @@ static inline void mlxsw_reg_sfdat_pack(char *payload, u32 age_time)
 #define MLXSW_REG_SFD_LEN (MLXSW_REG_SFD_BASE_LEN +	\
 			   MLXSW_REG_SFD_REC_LEN * MLXSW_REG_SFD_REC_MAX_COUNT)
 
-static const struct mlxsw_reg_info mlxsw_reg_sfd = {
-	.id = MLXSW_REG_SFD_ID,
-	.len = MLXSW_REG_SFD_LEN,
-};
+MLXSW_REG_DEFINE(sfd, MLXSW_REG_SFD_ID, MLXSW_REG_SFD_LEN);
 
 /* reg_sfd_swid
  * Switch partition ID for queries. Reserved on Write.
@@ -580,10 +570,7 @@ mlxsw_reg_sfd_mc_pack(char *payload, int rec_index,
 #define MLXSW_REG_SFN_LEN (MLXSW_REG_SFN_BASE_LEN +	\
 			   MLXSW_REG_SFN_REC_LEN * MLXSW_REG_SFN_REC_MAX_COUNT)
 
-static const struct mlxsw_reg_info mlxsw_reg_sfn = {
-	.id = MLXSW_REG_SFN_ID,
-	.len = MLXSW_REG_SFN_LEN,
-};
+MLXSW_REG_DEFINE(sfn, MLXSW_REG_SFN_ID, MLXSW_REG_SFN_LEN);
 
 /* reg_sfn_swid
  * Switch partition ID.
@@ -701,10 +688,7 @@ static inline void mlxsw_reg_sfn_mac_lag_unpack(char *payload, int rec_index,
 #define MLXSW_REG_SPMS_ID 0x200D
 #define MLXSW_REG_SPMS_LEN 0x404
 
-static const struct mlxsw_reg_info mlxsw_reg_spms = {
-	.id = MLXSW_REG_SPMS_ID,
-	.len = MLXSW_REG_SPMS_LEN,
-};
+MLXSW_REG_DEFINE(spms, MLXSW_REG_SPMS_ID, MLXSW_REG_SPMS_LEN);
 
 /* reg_spms_local_port
  * Local port number.
@@ -748,10 +732,7 @@ static inline void mlxsw_reg_spms_vid_pack(char *payload, u16 vid,
 #define MLXSW_REG_SPVID_ID 0x200E
 #define MLXSW_REG_SPVID_LEN 0x08
 
-static const struct mlxsw_reg_info mlxsw_reg_spvid = {
-	.id = MLXSW_REG_SPVID_ID,
-	.len = MLXSW_REG_SPVID_LEN,
-};
+MLXSW_REG_DEFINE(spvid, MLXSW_REG_SPVID_ID, MLXSW_REG_SPVID_LEN);
 
 /* reg_spvid_local_port
  * Local port number.
@@ -792,10 +773,7 @@ static inline void mlxsw_reg_spvid_pack(char *payload, u8 local_port, u16 pvid)
 #define MLXSW_REG_SPVM_LEN (MLXSW_REG_SPVM_BASE_LEN +	\
 		    MLXSW_REG_SPVM_REC_LEN * MLXSW_REG_SPVM_REC_MAX_COUNT)
 
-static const struct mlxsw_reg_info mlxsw_reg_spvm = {
-	.id = MLXSW_REG_SPVM_ID,
-	.len = MLXSW_REG_SPVM_LEN,
-};
+MLXSW_REG_DEFINE(spvm, MLXSW_REG_SPVM_ID, MLXSW_REG_SPVM_LEN);
 
 /* reg_spvm_pt
  * Priority tagged. If this bit is set, packets forwarded to the port with
@@ -891,10 +869,7 @@ static inline void mlxsw_reg_spvm_pack(char *payload, u8 local_port,
 #define MLXSW_REG_SPAFT_ID 0x2010
 #define MLXSW_REG_SPAFT_LEN 0x08
 
-static const struct mlxsw_reg_info mlxsw_reg_spaft = {
-	.id = MLXSW_REG_SPAFT_ID,
-	.len = MLXSW_REG_SPAFT_LEN,
-};
+MLXSW_REG_DEFINE(spaft, MLXSW_REG_SPAFT_ID, MLXSW_REG_SPAFT_LEN);
 
 /* reg_spaft_local_port
  * Local port number.
@@ -947,10 +922,7 @@ static inline void mlxsw_reg_spaft_pack(char *payload, u8 local_port,
 #define MLXSW_REG_SFGC_ID 0x2011
 #define MLXSW_REG_SFGC_LEN 0x10
 
-static const struct mlxsw_reg_info mlxsw_reg_sfgc = {
-	.id = MLXSW_REG_SFGC_ID,
-	.len = MLXSW_REG_SFGC_LEN,
-};
+MLXSW_REG_DEFINE(sfgc, MLXSW_REG_SFGC_ID, MLXSW_REG_SFGC_LEN);
 
 enum mlxsw_reg_sfgc_type {
 	MLXSW_REG_SFGC_TYPE_BROADCAST,
@@ -1045,10 +1017,7 @@ mlxsw_reg_sfgc_pack(char *payload, enum mlxsw_reg_sfgc_type type,
 #define MLXSW_REG_SFTR_ID 0x2012
 #define MLXSW_REG_SFTR_LEN 0x420
 
-static const struct mlxsw_reg_info mlxsw_reg_sftr = {
-	.id = MLXSW_REG_SFTR_ID,
-	.len = MLXSW_REG_SFTR_LEN,
-};
+MLXSW_REG_DEFINE(sftr, MLXSW_REG_SFTR_ID, MLXSW_REG_SFTR_LEN);
 
 /* reg_sftr_swid
  * Switch partition ID with which to associate the port.
@@ -1118,10 +1087,7 @@ static inline void mlxsw_reg_sftr_pack(char *payload,
 #define MLXSW_REG_SFDF_ID 0x2013
 #define MLXSW_REG_SFDF_LEN 0x14
 
-static const struct mlxsw_reg_info mlxsw_reg_sfdf = {
-	.id = MLXSW_REG_SFDF_ID,
-	.len = MLXSW_REG_SFDF_LEN,
-};
+MLXSW_REG_DEFINE(sfdf, MLXSW_REG_SFDF_ID, MLXSW_REG_SFDF_LEN);
 
 /* reg_sfdf_swid
  * Switch partition ID.
@@ -1205,10 +1171,7 @@ MLXSW_ITEM32(reg, sfdf, lag_fid_lag_id, 0x08, 0, 10);
 #define MLXSW_REG_SLDR_ID 0x2014
 #define MLXSW_REG_SLDR_LEN 0x0C /* counting in only one port in list */
 
-static const struct mlxsw_reg_info mlxsw_reg_sldr = {
-	.id = MLXSW_REG_SLDR_ID,
-	.len = MLXSW_REG_SLDR_LEN,
-};
+MLXSW_REG_DEFINE(sldr, MLXSW_REG_SLDR_ID, MLXSW_REG_SLDR_LEN);
 
 enum mlxsw_reg_sldr_op {
 	/* Indicates a creation of a new LAG-ID, lag_id must be valid */
@@ -1288,10 +1251,7 @@ static inline void mlxsw_reg_sldr_lag_remove_port_pack(char *payload, u8 lag_id,
 #define MLXSW_REG_SLCR_ID 0x2015
 #define MLXSW_REG_SLCR_LEN 0x10
 
-static const struct mlxsw_reg_info mlxsw_reg_slcr = {
-	.id = MLXSW_REG_SLCR_ID,
-	.len = MLXSW_REG_SLCR_LEN,
-};
+MLXSW_REG_DEFINE(slcr, MLXSW_REG_SLCR_ID, MLXSW_REG_SLCR_LEN);
 
 enum mlxsw_reg_slcr_pp {
 	/* Global Configuration (for all ports) */
@@ -1404,10 +1364,7 @@ static inline void mlxsw_reg_slcr_pack(char *payload, u16 lag_hash)
 #define MLXSW_REG_SLCOR_ID 0x2016
 #define MLXSW_REG_SLCOR_LEN 0x10
 
-static const struct mlxsw_reg_info mlxsw_reg_slcor = {
-	.id = MLXSW_REG_SLCOR_ID,
-	.len = MLXSW_REG_SLCOR_LEN,
-};
+MLXSW_REG_DEFINE(slcor, MLXSW_REG_SLCOR_ID, MLXSW_REG_SLCOR_LEN);
 
 enum mlxsw_reg_slcor_col {
 	/* Port is added with collector disabled */
@@ -1490,10 +1447,7 @@ static inline void mlxsw_reg_slcor_col_disable_pack(char *payload,
 #define MLXSW_REG_SPMLR_ID 0x2018
 #define MLXSW_REG_SPMLR_LEN 0x8
 
-static const struct mlxsw_reg_info mlxsw_reg_spmlr = {
-	.id = MLXSW_REG_SPMLR_ID,
-	.len = MLXSW_REG_SPMLR_LEN,
-};
+MLXSW_REG_DEFINE(spmlr, MLXSW_REG_SPMLR_ID, MLXSW_REG_SPMLR_LEN);
 
 /* reg_spmlr_local_port
  * Local port number.
@@ -1544,10 +1498,7 @@ static inline void mlxsw_reg_spmlr_pack(char *payload, u8 local_port,
 #define MLXSW_REG_SVFA_ID 0x201C
 #define MLXSW_REG_SVFA_LEN 0x10
 
-static const struct mlxsw_reg_info mlxsw_reg_svfa = {
-	.id = MLXSW_REG_SVFA_ID,
-	.len = MLXSW_REG_SVFA_LEN,
-};
+MLXSW_REG_DEFINE(svfa, MLXSW_REG_SVFA_ID, MLXSW_REG_SVFA_LEN);
 
 /* reg_svfa_swid
  * Switch partition ID.
@@ -1636,10 +1587,7 @@ static inline void mlxsw_reg_svfa_pack(char *payload, u8 local_port,
 #define MLXSW_REG_SVPE_ID 0x201E
 #define MLXSW_REG_SVPE_LEN 0x4
 
-static const struct mlxsw_reg_info mlxsw_reg_svpe = {
-	.id = MLXSW_REG_SVPE_ID,
-	.len = MLXSW_REG_SVPE_LEN,
-};
+MLXSW_REG_DEFINE(svpe, MLXSW_REG_SVPE_ID, MLXSW_REG_SVPE_LEN);
 
 /* reg_svpe_local_port
  * Local port number
@@ -1672,10 +1620,7 @@ static inline void mlxsw_reg_svpe_pack(char *payload, u8 local_port,
 #define MLXSW_REG_SFMR_ID 0x201F
 #define MLXSW_REG_SFMR_LEN 0x18
 
-static const struct mlxsw_reg_info mlxsw_reg_sfmr = {
-	.id = MLXSW_REG_SFMR_ID,
-	.len = MLXSW_REG_SFMR_LEN,
-};
+MLXSW_REG_DEFINE(sfmr, MLXSW_REG_SFMR_ID, MLXSW_REG_SFMR_LEN);
 
 enum mlxsw_reg_sfmr_op {
 	MLXSW_REG_SFMR_OP_CREATE_FID,
@@ -1762,10 +1707,7 @@ static inline void mlxsw_reg_sfmr_pack(char *payload,
 			      MLXSW_REG_SPVMLR_REC_LEN * \
 			      MLXSW_REG_SPVMLR_REC_MAX_COUNT)
 
-static const struct mlxsw_reg_info mlxsw_reg_spvmlr = {
-	.id = MLXSW_REG_SPVMLR_ID,
-	.len = MLXSW_REG_SPVMLR_LEN,
-};
+MLXSW_REG_DEFINE(spvmlr, MLXSW_REG_SPVMLR_ID, MLXSW_REG_SPVMLR_LEN);
 
 /* reg_spvmlr_local_port
  * Local ingress port.
@@ -1815,6 +1757,146 @@ static inline void mlxsw_reg_spvmlr_pack(char *payload, u8 local_port,
 	}
 }
 
+/* QPCR - QoS Policer Configuration Register
+ * -----------------------------------------
+ * The QPCR register is used to create policers - that limit
+ * the rate of bytes or packets via some trap group.
+ */
+#define MLXSW_REG_QPCR_ID 0x4004
+#define MLXSW_REG_QPCR_LEN 0x28
+
+MLXSW_REG_DEFINE(qpcr, MLXSW_REG_QPCR_ID, MLXSW_REG_QPCR_LEN);
+
+enum mlxsw_reg_qpcr_g {
+	MLXSW_REG_QPCR_G_GLOBAL = 2,
+	MLXSW_REG_QPCR_G_STORM_CONTROL = 3,
+};
+
+/* reg_qpcr_g
+ * The policer type.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, qpcr, g, 0x00, 14, 2);
+
+/* reg_qpcr_pid
+ * Policer ID.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, qpcr, pid, 0x00, 0, 14);
+
+/* reg_qpcr_color_aware
+ * Is the policer aware of colors.
+ * Must be 0 (unaware) for cpu port.
+ * Access: RW for unbounded policer. RO for bounded policer.
+ */
+MLXSW_ITEM32(reg, qpcr, color_aware, 0x04, 15, 1);
+
+/* reg_qpcr_bytes
+ * Is policer limit is for bytes per sec or packets per sec.
+ * 0 - packets
+ * 1 - bytes
+ * Access: RW for unbounded policer. RO for bounded policer.
+ */
+MLXSW_ITEM32(reg, qpcr, bytes, 0x04, 14, 1);
+
+enum mlxsw_reg_qpcr_ir_units {
+	MLXSW_REG_QPCR_IR_UNITS_M,
+	MLXSW_REG_QPCR_IR_UNITS_K,
+};
+
+/* reg_qpcr_ir_units
+ * Policer's units for cir and eir fields (for bytes limits only)
+ * 1 - 10^3
+ * 0 - 10^6
+ * Access: OP
+ */
+MLXSW_ITEM32(reg, qpcr, ir_units, 0x04, 12, 1);
+
+enum mlxsw_reg_qpcr_rate_type {
+	MLXSW_REG_QPCR_RATE_TYPE_SINGLE = 1,
+	MLXSW_REG_QPCR_RATE_TYPE_DOUBLE = 2,
+};
+
+/* reg_qpcr_rate_type
+ * Policer can have one limit (single rate) or 2 limits with specific operation
+ * for packets that exceed the lower rate but not the upper one.
+ * (For cpu port must be single rate)
+ * Access: RW for unbounded policer. RO for bounded policer.
+ */
+MLXSW_ITEM32(reg, qpcr, rate_type, 0x04, 8, 2);
+
+/* reg_qpc_cbs
+ * Policer's committed burst size.
+ * The policer is working with time slices of 50 nano sec. By default every
+ * slice is granted the proportionate share of the committed rate. If we want to
+ * allow a slice to exceed that share (while still keeping the rate per sec) we
+ * can allow burst. The burst size is between the default proportionate share
+ * (and no lower than 8) to 32Gb. (Even though giving a number higher than the
+ * committed rate will result in exceeding the rate). The burst size must be a
+ * log of 2 and will be determined by 2^cbs.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpcr, cbs, 0x08, 24, 6);
+
+/* reg_qpcr_cir
+ * Policer's committed rate.
+ * The rate used for sungle rate, the lower rate for double rate.
+ * For bytes limits, the rate will be this value * the unit from ir_units.
+ * (Resolution error is up to 1%).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpcr, cir, 0x0C, 0, 32);
+
+/* reg_qpcr_eir
+ * Policer's exceed rate.
+ * The higher rate for double rate, reserved for single rate.
+ * Lower rate for double rate policer.
+ * For bytes limits, the rate will be this value * the unit from ir_units.
+ * (Resolution error is up to 1%).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, qpcr, eir, 0x10, 0, 32);
+
+#define MLXSW_REG_QPCR_DOUBLE_RATE_ACTION 2
+
+/* reg_qpcr_exceed_action.
+ * What to do with packets between the 2 limits for double rate.
+ * Access: RW for unbounded policer. RO for bounded policer.
+ */
+MLXSW_ITEM32(reg, qpcr, exceed_action, 0x14, 0, 4);
+
+enum mlxsw_reg_qpcr_action {
+	/* Discard */
+	MLXSW_REG_QPCR_ACTION_DISCARD = 1,
+	/* Forward and set color to red.
+	 * If the packet is intended to cpu port, it will be dropped.
+	 */
+	MLXSW_REG_QPCR_ACTION_FORWARD = 2,
+};
+
+/* reg_qpcr_violate_action
+ * What to do with packets that cross the cir limit (for single rate) or the eir
+ * limit (for double rate).
+ * Access: RW for unbounded policer. RO for bounded policer.
+ */
+MLXSW_ITEM32(reg, qpcr, violate_action, 0x18, 0, 4);
+
+static inline void mlxsw_reg_qpcr_pack(char *payload, u16 pid,
+				       enum mlxsw_reg_qpcr_ir_units ir_units,
+				       bool bytes, u32 cir, u16 cbs)
+{
+	MLXSW_REG_ZERO(qpcr, payload);
+	mlxsw_reg_qpcr_pid_set(payload, pid);
+	mlxsw_reg_qpcr_g_set(payload, MLXSW_REG_QPCR_G_GLOBAL);
+	mlxsw_reg_qpcr_rate_type_set(payload, MLXSW_REG_QPCR_RATE_TYPE_SINGLE);
+	mlxsw_reg_qpcr_violate_action_set(payload,
+					  MLXSW_REG_QPCR_ACTION_DISCARD);
+	mlxsw_reg_qpcr_cir_set(payload, cir);
+	mlxsw_reg_qpcr_ir_units_set(payload, ir_units);
+	mlxsw_reg_qpcr_bytes_set(payload, bytes);
+	mlxsw_reg_qpcr_cbs_set(payload, cbs);
+}
+
 /* QTCT - QoS Switch Traffic Class Table
  * -------------------------------------
  * Configures the mapping between the packet switch priority and the
@@ -1823,10 +1905,7 @@ static inline void mlxsw_reg_spvmlr_pack(char *payload, u8 local_port,
 #define MLXSW_REG_QTCT_ID 0x400A
 #define MLXSW_REG_QTCT_LEN 0x08
 
-static const struct mlxsw_reg_info mlxsw_reg_qtct = {
-	.id = MLXSW_REG_QTCT_ID,
-	.len = MLXSW_REG_QTCT_LEN,
-};
+MLXSW_REG_DEFINE(qtct, MLXSW_REG_QTCT_ID, MLXSW_REG_QTCT_LEN);
 
 /* reg_qtct_local_port
  * Local port number.
@@ -1875,10 +1954,7 @@ static inline void mlxsw_reg_qtct_pack(char *payload, u8 local_port,
 #define MLXSW_REG_QEEC_ID 0x400D
 #define MLXSW_REG_QEEC_LEN 0x1C
 
-static const struct mlxsw_reg_info mlxsw_reg_qeec = {
-	.id = MLXSW_REG_QEEC_ID,
-	.len = MLXSW_REG_QEEC_LEN,
-};
+MLXSW_REG_DEFINE(qeec, MLXSW_REG_QEEC_ID, MLXSW_REG_QEEC_LEN);
 
 /* reg_qeec_local_port
  * Local port number.
@@ -2000,10 +2076,7 @@ static inline void mlxsw_reg_qeec_pack(char *payload, u8 local_port,
 #define MLXSW_REG_PMLP_ID 0x5002
 #define MLXSW_REG_PMLP_LEN 0x40
 
-static const struct mlxsw_reg_info mlxsw_reg_pmlp = {
-	.id = MLXSW_REG_PMLP_ID,
-	.len = MLXSW_REG_PMLP_LEN,
-};
+MLXSW_REG_DEFINE(pmlp, MLXSW_REG_PMLP_ID, MLXSW_REG_PMLP_LEN);
 
 /* reg_pmlp_rxtx
  * 0 - Tx value is used for both Tx and Rx.
@@ -2059,10 +2132,7 @@ static inline void mlxsw_reg_pmlp_pack(char *payload, u8 local_port)
 #define MLXSW_REG_PMTU_ID 0x5003
 #define MLXSW_REG_PMTU_LEN 0x10
 
-static const struct mlxsw_reg_info mlxsw_reg_pmtu = {
-	.id = MLXSW_REG_PMTU_ID,
-	.len = MLXSW_REG_PMTU_LEN,
-};
+MLXSW_REG_DEFINE(pmtu, MLXSW_REG_PMTU_ID, MLXSW_REG_PMTU_LEN);
 
 /* reg_pmtu_local_port
  * Local port number.
@@ -2116,10 +2186,7 @@ static inline void mlxsw_reg_pmtu_pack(char *payload, u8 local_port,
 #define MLXSW_REG_PTYS_ID 0x5004
 #define MLXSW_REG_PTYS_LEN 0x40
 
-static const struct mlxsw_reg_info mlxsw_reg_ptys = {
-	.id = MLXSW_REG_PTYS_ID,
-	.len = MLXSW_REG_PTYS_LEN,
-};
+MLXSW_REG_DEFINE(ptys, MLXSW_REG_PTYS_ID, MLXSW_REG_PTYS_LEN);
 
 /* reg_ptys_local_port
  * Local port number.
@@ -2127,6 +2194,7 @@ static const struct mlxsw_reg_info mlxsw_reg_ptys = {
  */
 MLXSW_ITEM32(reg, ptys, local_port, 0x00, 16, 8);
 
+#define MLXSW_REG_PTYS_PROTO_MASK_IB	BIT(0)
 #define MLXSW_REG_PTYS_PROTO_MASK_ETH	BIT(2)
 
 /* reg_ptys_proto_mask
@@ -2185,18 +2253,61 @@ MLXSW_ITEM32(reg, ptys, an_status, 0x04, 28, 4);
  */
 MLXSW_ITEM32(reg, ptys, eth_proto_cap, 0x0C, 0, 32);
 
+/* reg_ptys_ib_link_width_cap
+ * IB port supported widths.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, ptys, ib_link_width_cap, 0x10, 16, 16);
+
+#define MLXSW_REG_PTYS_IB_SPEED_SDR	BIT(0)
+#define MLXSW_REG_PTYS_IB_SPEED_DDR	BIT(1)
+#define MLXSW_REG_PTYS_IB_SPEED_QDR	BIT(2)
+#define MLXSW_REG_PTYS_IB_SPEED_FDR10	BIT(3)
+#define MLXSW_REG_PTYS_IB_SPEED_FDR	BIT(4)
+#define MLXSW_REG_PTYS_IB_SPEED_EDR	BIT(5)
+
+/* reg_ptys_ib_proto_cap
+ * IB port supported speeds and protocols.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, ptys, ib_proto_cap, 0x10, 0, 16);
+
 /* reg_ptys_eth_proto_admin
  * Speed and protocol to set port to.
  * Access: RW
  */
 MLXSW_ITEM32(reg, ptys, eth_proto_admin, 0x18, 0, 32);
 
+/* reg_ptys_ib_link_width_admin
+ * IB width to set port to.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ptys, ib_link_width_admin, 0x1C, 16, 16);
+
+/* reg_ptys_ib_proto_admin
+ * IB speeds and protocols to set port to.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, ptys, ib_proto_admin, 0x1C, 0, 16);
+
 /* reg_ptys_eth_proto_oper
  * The current speed and protocol configured for the port.
  * Access: RO
  */
 MLXSW_ITEM32(reg, ptys, eth_proto_oper, 0x24, 0, 32);
 
+/* reg_ptys_ib_link_width_oper
+ * The current IB width to set port to.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, ptys, ib_link_width_oper, 0x28, 16, 16);
+
+/* reg_ptys_ib_proto_oper
+ * The current IB speed and protocol.
+ * Access: RO
+ */
+MLXSW_ITEM32(reg, ptys, ib_proto_oper, 0x28, 0, 16);
+
 /* reg_ptys_eth_proto_lp_advertise
  * The protocols that were advertised by the link partner during
  * autonegotiation.
@@ -2204,8 +2315,8 @@ MLXSW_ITEM32(reg, ptys, eth_proto_oper, 0x24, 0, 32);
  */
 MLXSW_ITEM32(reg, ptys, eth_proto_lp_advertise, 0x30, 0, 32);
 
-static inline void mlxsw_reg_ptys_pack(char *payload, u8 local_port,
-				       u32 proto_admin)
+static inline void mlxsw_reg_ptys_eth_pack(char *payload, u8 local_port,
+					   u32 proto_admin)
 {
 	MLXSW_REG_ZERO(ptys, payload);
 	mlxsw_reg_ptys_local_port_set(payload, local_port);
@@ -2213,9 +2324,10 @@ static inline void mlxsw_reg_ptys_pack(char *payload, u8 local_port,
 	mlxsw_reg_ptys_eth_proto_admin_set(payload, proto_admin);
 }
 
-static inline void mlxsw_reg_ptys_unpack(char *payload, u32 *p_eth_proto_cap,
-					 u32 *p_eth_proto_adm,
-					 u32 *p_eth_proto_oper)
+static inline void mlxsw_reg_ptys_eth_unpack(char *payload,
+					     u32 *p_eth_proto_cap,
+					     u32 *p_eth_proto_adm,
+					     u32 *p_eth_proto_oper)
 {
 	if (p_eth_proto_cap)
 		*p_eth_proto_cap = mlxsw_reg_ptys_eth_proto_cap_get(payload);
@@ -2225,6 +2337,33 @@ static inline void mlxsw_reg_ptys_unpack(char *payload, u32 *p_eth_proto_cap,
 		*p_eth_proto_oper = mlxsw_reg_ptys_eth_proto_oper_get(payload);
 }
 
+static inline void mlxsw_reg_ptys_ib_pack(char *payload, u8 local_port,
+					  u16 proto_admin, u16 link_width)
+{
+	MLXSW_REG_ZERO(ptys, payload);
+	mlxsw_reg_ptys_local_port_set(payload, local_port);
+	mlxsw_reg_ptys_proto_mask_set(payload, MLXSW_REG_PTYS_PROTO_MASK_IB);
+	mlxsw_reg_ptys_ib_proto_admin_set(payload, proto_admin);
+	mlxsw_reg_ptys_ib_link_width_admin_set(payload, link_width);
+}
+
+static inline void mlxsw_reg_ptys_ib_unpack(char *payload, u16 *p_ib_proto_cap,
+					    u16 *p_ib_link_width_cap,
+					    u16 *p_ib_proto_oper,
+					    u16 *p_ib_link_width_oper)
+{
+	if (p_ib_proto_cap)
+		*p_ib_proto_cap = mlxsw_reg_ptys_ib_proto_cap_get(payload);
+	if (p_ib_link_width_cap)
+		*p_ib_link_width_cap =
+			mlxsw_reg_ptys_ib_link_width_cap_get(payload);
+	if (p_ib_proto_oper)
+		*p_ib_proto_oper = mlxsw_reg_ptys_ib_proto_oper_get(payload);
+	if (p_ib_link_width_oper)
+		*p_ib_link_width_oper =
+			mlxsw_reg_ptys_ib_link_width_oper_get(payload);
+}
+
 /* PPAD - Port Physical Address Register
  * -------------------------------------
  * The PPAD register configures the per port physical MAC address.
@@ -2232,10 +2371,7 @@ static inline void mlxsw_reg_ptys_unpack(char *payload, u32 *p_eth_proto_cap,
 #define MLXSW_REG_PPAD_ID 0x5005
 #define MLXSW_REG_PPAD_LEN 0x10
 
-static const struct mlxsw_reg_info mlxsw_reg_ppad = {
-	.id = MLXSW_REG_PPAD_ID,
-	.len = MLXSW_REG_PPAD_LEN,
-};
+MLXSW_REG_DEFINE(ppad, MLXSW_REG_PPAD_ID, MLXSW_REG_PPAD_LEN);
 
 /* reg_ppad_single_base_mac
  * 0: base_mac, local port should be 0 and mac[7:0] is
@@ -2273,10 +2409,7 @@ static inline void mlxsw_reg_ppad_pack(char *payload, bool single_base_mac,
 #define MLXSW_REG_PAOS_ID 0x5006
 #define MLXSW_REG_PAOS_LEN 0x10
 
-static const struct mlxsw_reg_info mlxsw_reg_paos = {
-	.id = MLXSW_REG_PAOS_ID,
-	.len = MLXSW_REG_PAOS_LEN,
-};
+MLXSW_REG_DEFINE(paos, MLXSW_REG_PAOS_ID, MLXSW_REG_PAOS_LEN);
 
 /* reg_paos_swid
  * Switch partition ID with which to associate the port.
@@ -2356,10 +2489,7 @@ static inline void mlxsw_reg_paos_pack(char *payload, u8 local_port,
 #define MLXSW_REG_PFCC_ID 0x5007
 #define MLXSW_REG_PFCC_LEN 0x20
 
-static const struct mlxsw_reg_info mlxsw_reg_pfcc = {
-	.id = MLXSW_REG_PFCC_ID,
-	.len = MLXSW_REG_PFCC_LEN,
-};
+MLXSW_REG_DEFINE(pfcc, MLXSW_REG_PFCC_ID, MLXSW_REG_PFCC_LEN);
 
 /* reg_pfcc_local_port
  * Local port number.
@@ -2495,10 +2625,7 @@ static inline void mlxsw_reg_pfcc_pack(char *payload, u8 local_port)
 #define MLXSW_REG_PPCNT_ID 0x5008
 #define MLXSW_REG_PPCNT_LEN 0x100
 
-static const struct mlxsw_reg_info mlxsw_reg_ppcnt = {
-	.id = MLXSW_REG_PPCNT_ID,
-	.len = MLXSW_REG_PPCNT_LEN,
-};
+MLXSW_REG_DEFINE(ppcnt, MLXSW_REG_PPCNT_ID, MLXSW_REG_PPCNT_LEN);
 
 /* reg_ppcnt_swid
  * For HCA: must be always 0.
@@ -2761,6 +2888,27 @@ static inline void mlxsw_reg_ppcnt_pack(char *payload, u8 local_port,
 	mlxsw_reg_ppcnt_prio_tc_set(payload, prio_tc);
 }
 
+/* PLIB - Port Local to InfiniBand Port
+ * ------------------------------------
+ * The PLIB register performs mapping from Local Port into InfiniBand Port.
+ */
+#define MLXSW_REG_PLIB_ID 0x500A
+#define MLXSW_REG_PLIB_LEN 0x10
+
+MLXSW_REG_DEFINE(plib, MLXSW_REG_PLIB_ID, MLXSW_REG_PLIB_LEN);
+
+/* reg_plib_local_port
+ * Local port number.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, plib, local_port, 0x00, 16, 8);
+
+/* reg_plib_ib_port
+ * InfiniBand port remapping for local_port.
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, plib, ib_port, 0x00, 0, 8);
+
 /* PPTB - Port Prio To Buffer Register
  * -----------------------------------
  * Configures the switch priority to buffer table.
@@ -2768,10 +2916,7 @@ static inline void mlxsw_reg_ppcnt_pack(char *payload, u8 local_port,
 #define MLXSW_REG_PPTB_ID 0x500B
 #define MLXSW_REG_PPTB_LEN 0x10
 
-static const struct mlxsw_reg_info mlxsw_reg_pptb = {
-	.id = MLXSW_REG_PPTB_ID,
-	.len = MLXSW_REG_PPTB_LEN,
-};
+MLXSW_REG_DEFINE(pptb, MLXSW_REG_PPTB_ID, MLXSW_REG_PPTB_LEN);
 
 enum {
 	MLXSW_REG_PPTB_MM_UM,
@@ -2865,10 +3010,7 @@ static inline void mlxsw_reg_pptb_prio_to_buff_pack(char *payload, u8 prio,
 #define MLXSW_REG_PBMC_ID 0x500C
 #define MLXSW_REG_PBMC_LEN 0x6C
 
-static const struct mlxsw_reg_info mlxsw_reg_pbmc = {
-	.id = MLXSW_REG_PBMC_ID,
-	.len = MLXSW_REG_PBMC_LEN,
-};
+MLXSW_REG_DEFINE(pbmc, MLXSW_REG_PBMC_ID, MLXSW_REG_PBMC_LEN);
 
 /* reg_pbmc_local_port
  * Local port number.
@@ -2978,10 +3120,7 @@ static inline void mlxsw_reg_pbmc_lossless_buffer_pack(char *payload,
 #define MLXSW_REG_PSPA_ID 0x500D
 #define MLXSW_REG_PSPA_LEN 0x8
 
-static const struct mlxsw_reg_info mlxsw_reg_pspa = {
-	.id = MLXSW_REG_PSPA_ID,
-	.len = MLXSW_REG_PSPA_LEN,
-};
+MLXSW_REG_DEFINE(pspa, MLXSW_REG_PSPA_ID, MLXSW_REG_PSPA_LEN);
 
 /* reg_pspa_swid
  * Switch partition ID.
@@ -3017,10 +3156,7 @@ static inline void mlxsw_reg_pspa_pack(char *payload, u8 swid, u8 local_port)
 #define MLXSW_REG_HTGT_ID 0x7002
 #define MLXSW_REG_HTGT_LEN 0x100
 
-static const struct mlxsw_reg_info mlxsw_reg_htgt = {
-	.id = MLXSW_REG_HTGT_ID,
-	.len = MLXSW_REG_HTGT_LEN,
-};
+MLXSW_REG_DEFINE(htgt, MLXSW_REG_HTGT_ID, MLXSW_REG_HTGT_LEN);
 
 /* reg_htgt_swid
  * Switch partition ID.
@@ -3038,8 +3174,21 @@ MLXSW_ITEM32(reg, htgt, type, 0x00, 8, 4);
 
 enum mlxsw_reg_htgt_trap_group {
 	MLXSW_REG_HTGT_TRAP_GROUP_EMAD,
-	MLXSW_REG_HTGT_TRAP_GROUP_RX,
-	MLXSW_REG_HTGT_TRAP_GROUP_CTRL,
+	MLXSW_REG_HTGT_TRAP_GROUP_SX2_RX,
+	MLXSW_REG_HTGT_TRAP_GROUP_SX2_CTRL,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_STP,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP_IPV4,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP_MISS,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT,
 };
 
 /* reg_htgt_trap_group
@@ -3061,6 +3210,8 @@ enum {
  */
 MLXSW_ITEM32(reg, htgt, pide, 0x04, 15, 1);
 
+#define MLXSW_REG_HTGT_INVALID_POLICER 0xff
+
 /* reg_htgt_pid
  * Policer ID for the trap group.
  * Access: RW
@@ -3086,6 +3237,8 @@ MLXSW_ITEM32(reg, htgt, mirror_action, 0x08, 8, 2);
  */
 MLXSW_ITEM32(reg, htgt, mirroring_agent, 0x08, 0, 3);
 
+#define MLXSW_REG_HTGT_DEFAULT_PRIORITY 0
+
 /* reg_htgt_priority
  * Trap group priority.
  * In case a packet matches multiple classification rules, the packet will
@@ -3099,52 +3252,47 @@ MLXSW_ITEM32(reg, htgt, mirroring_agent, 0x08, 0, 3);
  */
 MLXSW_ITEM32(reg, htgt, priority, 0x0C, 0, 4);
 
+#define MLXSW_REG_HTGT_DEFAULT_TC 7
+
 /* reg_htgt_local_path_cpu_tclass
  * CPU ingress traffic class for the trap group.
  * Access: RW
  */
 MLXSW_ITEM32(reg, htgt, local_path_cpu_tclass, 0x10, 16, 6);
 
-#define MLXSW_REG_HTGT_LOCAL_PATH_RDQ_EMAD	0x15
-#define MLXSW_REG_HTGT_LOCAL_PATH_RDQ_RX	0x14
-#define MLXSW_REG_HTGT_LOCAL_PATH_RDQ_CTRL	0x13
-
+enum mlxsw_reg_htgt_local_path_rdq {
+	MLXSW_REG_HTGT_LOCAL_PATH_RDQ_SX2_CTRL = 0x13,
+	MLXSW_REG_HTGT_LOCAL_PATH_RDQ_SX2_RX = 0x14,
+	MLXSW_REG_HTGT_LOCAL_PATH_RDQ_SX2_EMAD = 0x15,
+	MLXSW_REG_HTGT_LOCAL_PATH_RDQ_SIB_EMAD = 0x15,
+};
 /* reg_htgt_local_path_rdq
  * Receive descriptor queue (RDQ) to use for the trap group.
  * Access: RW
  */
 MLXSW_ITEM32(reg, htgt, local_path_rdq, 0x10, 0, 6);
 
-static inline void mlxsw_reg_htgt_pack(char *payload,
-				       enum mlxsw_reg_htgt_trap_group group)
+static inline void mlxsw_reg_htgt_pack(char *payload, u8 group, u8 policer_id,
+				       u8 priority, u8 tc)
 {
-	u8 swid, rdq;
-
 	MLXSW_REG_ZERO(htgt, payload);
-	switch (group) {
-	case MLXSW_REG_HTGT_TRAP_GROUP_EMAD:
-		swid = MLXSW_PORT_SWID_ALL_SWIDS;
-		rdq = MLXSW_REG_HTGT_LOCAL_PATH_RDQ_EMAD;
-		break;
-	case MLXSW_REG_HTGT_TRAP_GROUP_RX:
-		swid = 0;
-		rdq = MLXSW_REG_HTGT_LOCAL_PATH_RDQ_RX;
-		break;
-	case MLXSW_REG_HTGT_TRAP_GROUP_CTRL:
-		swid = 0;
-		rdq = MLXSW_REG_HTGT_LOCAL_PATH_RDQ_CTRL;
-		break;
+
+	if (policer_id == MLXSW_REG_HTGT_INVALID_POLICER) {
+		mlxsw_reg_htgt_pide_set(payload,
+					MLXSW_REG_HTGT_POLICER_DISABLE);
+	} else {
+		mlxsw_reg_htgt_pide_set(payload,
+					MLXSW_REG_HTGT_POLICER_ENABLE);
+		mlxsw_reg_htgt_pid_set(payload, policer_id);
 	}
-	mlxsw_reg_htgt_swid_set(payload, swid);
+
 	mlxsw_reg_htgt_type_set(payload, MLXSW_REG_HTGT_PATH_TYPE_LOCAL);
 	mlxsw_reg_htgt_trap_group_set(payload, group);
-	mlxsw_reg_htgt_pide_set(payload, MLXSW_REG_HTGT_POLICER_DISABLE);
-	mlxsw_reg_htgt_pid_set(payload, 0);
 	mlxsw_reg_htgt_mirror_action_set(payload, MLXSW_REG_HTGT_TRAP_TO_CPU);
 	mlxsw_reg_htgt_mirroring_agent_set(payload, 0);
-	mlxsw_reg_htgt_priority_set(payload, 0);
-	mlxsw_reg_htgt_local_path_cpu_tclass_set(payload, 7);
-	mlxsw_reg_htgt_local_path_rdq_set(payload, rdq);
+	mlxsw_reg_htgt_priority_set(payload, priority);
+	mlxsw_reg_htgt_local_path_cpu_tclass_set(payload, tc);
+	mlxsw_reg_htgt_local_path_rdq_set(payload, group);
 }
 
 /* HPKT - Host Packet Trap
@@ -3154,10 +3302,7 @@ static inline void mlxsw_reg_htgt_pack(char *payload,
 #define MLXSW_REG_HPKT_ID 0x7003
 #define MLXSW_REG_HPKT_LEN 0x10
 
-static const struct mlxsw_reg_info mlxsw_reg_hpkt = {
-	.id = MLXSW_REG_HPKT_ID,
-	.len = MLXSW_REG_HPKT_LEN,
-};
+MLXSW_REG_DEFINE(hpkt, MLXSW_REG_HPKT_ID, MLXSW_REG_HPKT_LEN);
 
 enum {
 	MLXSW_REG_HPKT_ACK_NOT_REQUIRED,
@@ -3221,6 +3366,7 @@ enum {
 
 /* reg_hpkt_ctrl
  * Configure dedicated buffer resources for control packets.
+ * Ignored by SwitchX-2.
  * 0 - Keep factory defaults.
  * 1 - Do not use control buffer for this trap ID.
  * 2 - Use control buffer for this trap ID.
@@ -3228,25 +3374,18 @@ enum {
  */
 MLXSW_ITEM32(reg, hpkt, ctrl, 0x04, 16, 2);
 
-static inline void mlxsw_reg_hpkt_pack(char *payload, u8 action, u16 trap_id)
+static inline void mlxsw_reg_hpkt_pack(char *payload, u8 action, u16 trap_id,
+				       enum mlxsw_reg_htgt_trap_group trap_group,
+				       bool is_ctrl)
 {
-	enum mlxsw_reg_htgt_trap_group trap_group;
-
 	MLXSW_REG_ZERO(hpkt, payload);
 	mlxsw_reg_hpkt_ack_set(payload, MLXSW_REG_HPKT_ACK_NOT_REQUIRED);
 	mlxsw_reg_hpkt_action_set(payload, action);
-	switch (trap_id) {
-	case MLXSW_TRAP_ID_ETHEMAD:
-	case MLXSW_TRAP_ID_PUDE:
-		trap_group = MLXSW_REG_HTGT_TRAP_GROUP_EMAD;
-		break;
-	default:
-		trap_group = MLXSW_REG_HTGT_TRAP_GROUP_RX;
-		break;
-	}
 	mlxsw_reg_hpkt_trap_group_set(payload, trap_group);
 	mlxsw_reg_hpkt_trap_id_set(payload, trap_id);
-	mlxsw_reg_hpkt_ctrl_set(payload, MLXSW_REG_HPKT_CTRL_PACKET_DEFAULT);
+	mlxsw_reg_hpkt_ctrl_set(payload, is_ctrl ?
+				MLXSW_REG_HPKT_CTRL_PACKET_USE_BUFFER :
+				MLXSW_REG_HPKT_CTRL_PACKET_NO_BUFFER);
 }
 
 /* RGCR - Router General Configuration Register
@@ -3256,10 +3395,7 @@ static inline void mlxsw_reg_hpkt_pack(char *payload, u8 action, u16 trap_id)
 #define MLXSW_REG_RGCR_ID 0x8001
 #define MLXSW_REG_RGCR_LEN 0x28
 
-static const struct mlxsw_reg_info mlxsw_reg_rgcr = {
-	.id = MLXSW_REG_RGCR_ID,
-	.len = MLXSW_REG_RGCR_LEN,
-};
+MLXSW_REG_DEFINE(rgcr, MLXSW_REG_RGCR_ID, MLXSW_REG_RGCR_LEN);
 
 /* reg_rgcr_ipv4_en
  * IPv4 router enable.
@@ -3330,10 +3466,7 @@ static inline void mlxsw_reg_rgcr_pack(char *payload, bool ipv4_en)
 #define MLXSW_REG_RITR_ID 0x8002
 #define MLXSW_REG_RITR_LEN 0x40
 
-static const struct mlxsw_reg_info mlxsw_reg_ritr = {
-	.id = MLXSW_REG_RITR_ID,
-	.len = MLXSW_REG_RITR_LEN,
-};
+MLXSW_REG_DEFINE(ritr, MLXSW_REG_RITR_ID, MLXSW_REG_RITR_LEN);
 
 /* reg_ritr_enable
  * Enables routing on the router interface.
@@ -3533,10 +3666,7 @@ static inline void mlxsw_reg_ritr_pack(char *payload, bool enable,
 #define MLXSW_REG_RATR_ID 0x8008
 #define MLXSW_REG_RATR_LEN 0x2C
 
-static const struct mlxsw_reg_info mlxsw_reg_ratr = {
-	.id = MLXSW_REG_RATR_ID,
-	.len = MLXSW_REG_RATR_LEN,
-};
+MLXSW_REG_DEFINE(ratr, MLXSW_REG_RATR_ID, MLXSW_REG_RATR_LEN);
 
 enum mlxsw_reg_ratr_op {
 	/* Read */
@@ -3663,10 +3793,7 @@ static inline void mlxsw_reg_ratr_eth_entry_pack(char *payload,
 #define MLXSW_REG_RALTA_ID 0x8010
 #define MLXSW_REG_RALTA_LEN 0x04
 
-static const struct mlxsw_reg_info mlxsw_reg_ralta = {
-	.id = MLXSW_REG_RALTA_ID,
-	.len = MLXSW_REG_RALTA_LEN,
-};
+MLXSW_REG_DEFINE(ralta, MLXSW_REG_RALTA_ID, MLXSW_REG_RALTA_LEN);
 
 /* reg_ralta_op
  * opcode (valid for Write, must be 0 on Read)
@@ -3718,10 +3845,7 @@ static inline void mlxsw_reg_ralta_pack(char *payload, bool alloc,
 #define MLXSW_REG_RALST_ID 0x8011
 #define MLXSW_REG_RALST_LEN 0x104
 
-static const struct mlxsw_reg_info mlxsw_reg_ralst = {
-	.id = MLXSW_REG_RALST_ID,
-	.len = MLXSW_REG_RALST_LEN,
-};
+MLXSW_REG_DEFINE(ralst, MLXSW_REG_RALST_ID, MLXSW_REG_RALST_LEN);
 
 /* reg_ralst_root_bin
  * The bin number of the root bin.
@@ -3788,10 +3912,7 @@ static inline void mlxsw_reg_ralst_bin_pack(char *payload, u8 bin_number,
 #define MLXSW_REG_RALTB_ID 0x8012
 #define MLXSW_REG_RALTB_LEN 0x04
 
-static const struct mlxsw_reg_info mlxsw_reg_raltb = {
-	.id = MLXSW_REG_RALTB_ID,
-	.len = MLXSW_REG_RALTB_LEN,
-};
+MLXSW_REG_DEFINE(raltb, MLXSW_REG_RALTB_ID, MLXSW_REG_RALTB_LEN);
 
 /* reg_raltb_virtual_router
  * Virtual Router ID
@@ -3832,10 +3953,7 @@ static inline void mlxsw_reg_raltb_pack(char *payload, u16 virtual_router,
 #define MLXSW_REG_RALUE_ID 0x8013
 #define MLXSW_REG_RALUE_LEN 0x38
 
-static const struct mlxsw_reg_info mlxsw_reg_ralue = {
-	.id = MLXSW_REG_RALUE_ID,
-	.len = MLXSW_REG_RALUE_LEN,
-};
+MLXSW_REG_DEFINE(ralue, MLXSW_REG_RALUE_ID, MLXSW_REG_RALUE_LEN);
 
 /* reg_ralue_protocol
  * Protocol.
@@ -4095,10 +4213,7 @@ mlxsw_reg_ralue_act_ip2me_pack(char *payload)
 #define MLXSW_REG_RAUHT_ID 0x8014
 #define MLXSW_REG_RAUHT_LEN 0x74
 
-static const struct mlxsw_reg_info mlxsw_reg_rauht = {
-	.id = MLXSW_REG_RAUHT_ID,
-	.len = MLXSW_REG_RAUHT_LEN,
-};
+MLXSW_REG_DEFINE(rauht, MLXSW_REG_RAUHT_ID, MLXSW_REG_RAUHT_LEN);
 
 enum mlxsw_reg_rauht_type {
 	MLXSW_REG_RAUHT_TYPE_IPV4,
@@ -4234,10 +4349,7 @@ static inline void mlxsw_reg_rauht_pack4(char *payload,
 #define MLXSW_REG_RALEU_ID 0x8015
 #define MLXSW_REG_RALEU_LEN 0x28
 
-static const struct mlxsw_reg_info mlxsw_reg_raleu = {
-	.id = MLXSW_REG_RALEU_ID,
-	.len = MLXSW_REG_RALEU_LEN,
-};
+MLXSW_REG_DEFINE(raleu, MLXSW_REG_RALEU_ID, MLXSW_REG_RALEU_LEN);
 
 /* reg_raleu_protocol
  * Protocol.
@@ -4309,10 +4421,7 @@ static inline void mlxsw_reg_raleu_pack(char *payload,
 		MLXSW_REG_RAUHTD_REC_MAX_NUM * MLXSW_REG_RAUHTD_REC_LEN)
 #define MLXSW_REG_RAUHTD_IPV4_ENT_PER_REC 4
 
-static const struct mlxsw_reg_info mlxsw_reg_rauhtd = {
-	.id = MLXSW_REG_RAUHTD_ID,
-	.len = MLXSW_REG_RAUHTD_LEN,
-};
+MLXSW_REG_DEFINE(rauhtd, MLXSW_REG_RAUHTD_ID, MLXSW_REG_RAUHTD_LEN);
 
 #define MLXSW_REG_RAUHTD_FILTER_A BIT(0)
 #define MLXSW_REG_RAUHTD_FILTER_RIF BIT(3)
@@ -4444,10 +4553,7 @@ static inline void mlxsw_reg_rauhtd_ent_ipv4_unpack(char *payload,
 #define MLXSW_REG_MFCR_ID 0x9001
 #define MLXSW_REG_MFCR_LEN 0x08
 
-static const struct mlxsw_reg_info mlxsw_reg_mfcr = {
-	.id = MLXSW_REG_MFCR_ID,
-	.len = MLXSW_REG_MFCR_LEN,
-};
+MLXSW_REG_DEFINE(mfcr, MLXSW_REG_MFCR_ID, MLXSW_REG_MFCR_LEN);
 
 enum mlxsw_reg_mfcr_pwm_frequency {
 	MLXSW_REG_MFCR_PWM_FEQ_11HZ = 0x00,
@@ -4464,7 +4570,7 @@ enum mlxsw_reg_mfcr_pwm_frequency {
  * Controls the frequency of the PWM signal.
  * Access: RW
  */
-MLXSW_ITEM32(reg, mfcr, pwm_frequency, 0x00, 0, 6);
+MLXSW_ITEM32(reg, mfcr, pwm_frequency, 0x00, 0, 7);
 
 #define MLXSW_MFCR_TACHOS_MAX 10
 
@@ -4507,10 +4613,7 @@ mlxsw_reg_mfcr_unpack(char *payload,
 #define MLXSW_REG_MFSC_ID 0x9002
 #define MLXSW_REG_MFSC_LEN 0x08
 
-static const struct mlxsw_reg_info mlxsw_reg_mfsc = {
-	.id = MLXSW_REG_MFSC_ID,
-	.len = MLXSW_REG_MFSC_LEN,
-};
+MLXSW_REG_DEFINE(mfsc, MLXSW_REG_MFSC_ID, MLXSW_REG_MFSC_LEN);
 
 /* reg_mfsc_pwm
  * Fan pwm to control / monitor.
@@ -4541,10 +4644,7 @@ static inline void mlxsw_reg_mfsc_pack(char *payload, u8 pwm,
 #define MLXSW_REG_MFSM_ID 0x9003
 #define MLXSW_REG_MFSM_LEN 0x08
 
-static const struct mlxsw_reg_info mlxsw_reg_mfsm = {
-	.id = MLXSW_REG_MFSM_ID,
-	.len = MLXSW_REG_MFSM_LEN,
-};
+MLXSW_REG_DEFINE(mfsm, MLXSW_REG_MFSM_ID, MLXSW_REG_MFSM_LEN);
 
 /* reg_mfsm_tacho
  * Fan tachometer index.
@@ -4564,6 +4664,54 @@ static inline void mlxsw_reg_mfsm_pack(char *payload, u8 tacho)
 	mlxsw_reg_mfsm_tacho_set(payload, tacho);
 }
 
+/* MFSL - Management Fan Speed Limit Register
+ * ------------------------------------------
+ * The Fan Speed Limit register is used to configure the fan speed
+ * event / interrupt notification mechanism. Fan speed threshold are
+ * defined for both under-speed and over-speed.
+ */
+#define MLXSW_REG_MFSL_ID 0x9004
+#define MLXSW_REG_MFSL_LEN 0x0C
+
+MLXSW_REG_DEFINE(mfsl, MLXSW_REG_MFSL_ID, MLXSW_REG_MFSL_LEN);
+
+/* reg_mfsl_tacho
+ * Fan tachometer index.
+ * Access: Index
+ */
+MLXSW_ITEM32(reg, mfsl, tacho, 0x00, 24, 4);
+
+/* reg_mfsl_tach_min
+ * Tachometer minimum value (minimum RPM).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mfsl, tach_min, 0x04, 0, 16);
+
+/* reg_mfsl_tach_max
+ * Tachometer maximum value (maximum RPM).
+ * Access: RW
+ */
+MLXSW_ITEM32(reg, mfsl, tach_max, 0x08, 0, 16);
+
+static inline void mlxsw_reg_mfsl_pack(char *payload, u8 tacho,
+				       u16 tach_min, u16 tach_max)
+{
+	MLXSW_REG_ZERO(mfsl, payload);
+	mlxsw_reg_mfsl_tacho_set(payload, tacho);
+	mlxsw_reg_mfsl_tach_min_set(payload, tach_min);
+	mlxsw_reg_mfsl_tach_max_set(payload, tach_max);
+}
+
+static inline void mlxsw_reg_mfsl_unpack(char *payload, u8 tacho,
+					 u16 *p_tach_min, u16 *p_tach_max)
+{
+	if (p_tach_min)
+		*p_tach_min = mlxsw_reg_mfsl_tach_min_get(payload);
+
+	if (p_tach_max)
+		*p_tach_max = mlxsw_reg_mfsl_tach_max_get(payload);
+}
+
 /* MTCAP - Management Temperature Capabilities
  * -------------------------------------------
  * This register exposes the capabilities of the device and
@@ -4572,10 +4720,7 @@ static inline void mlxsw_reg_mfsm_pack(char *payload, u8 tacho)
 #define MLXSW_REG_MTCAP_ID 0x9009
 #define MLXSW_REG_MTCAP_LEN 0x08
 
-static const struct mlxsw_reg_info mlxsw_reg_mtcap = {
-	.id = MLXSW_REG_MTCAP_ID,
-	.len = MLXSW_REG_MTCAP_LEN,
-};
+MLXSW_REG_DEFINE(mtcap, MLXSW_REG_MTCAP_ID, MLXSW_REG_MTCAP_LEN);
 
 /* reg_mtcap_sensor_count
  * Number of sensors supported by the device.
@@ -4593,10 +4738,7 @@ MLXSW_ITEM32(reg, mtcap, sensor_count, 0x00, 0, 7);
 #define MLXSW_REG_MTMP_ID 0x900A
 #define MLXSW_REG_MTMP_LEN 0x20
 
-static const struct mlxsw_reg_info mlxsw_reg_mtmp = {
-	.id = MLXSW_REG_MTMP_ID,
-	.len = MLXSW_REG_MTMP_LEN,
-};
+MLXSW_REG_DEFINE(mtmp, MLXSW_REG_MTMP_ID, MLXSW_REG_MTMP_LEN);
 
 /* reg_mtmp_sensor_index
  * Sensors index to access.
@@ -4679,10 +4821,7 @@ static inline void mlxsw_reg_mtmp_unpack(char *payload, unsigned int *p_temp,
 #define MLXSW_REG_MPAT_ID 0x901A
 #define MLXSW_REG_MPAT_LEN 0x78
 
-static const struct mlxsw_reg_info mlxsw_reg_mpat = {
-	.id = MLXSW_REG_MPAT_ID,
-	.len = MLXSW_REG_MPAT_LEN,
-};
+MLXSW_REG_DEFINE(mpat, MLXSW_REG_MPAT_ID, MLXSW_REG_MPAT_LEN);
 
 /* reg_mpat_pa_id
  * Port Analyzer ID.
@@ -4742,10 +4881,7 @@ static inline void mlxsw_reg_mpat_pack(char *payload, u8 pa_id,
 #define MLXSW_REG_MPAR_ID 0x901B
 #define MLXSW_REG_MPAR_LEN 0x08
 
-static const struct mlxsw_reg_info mlxsw_reg_mpar = {
-	.id = MLXSW_REG_MPAR_ID,
-	.len = MLXSW_REG_MPAR_LEN,
-};
+MLXSW_REG_DEFINE(mpar, MLXSW_REG_MPAR_ID, MLXSW_REG_MPAR_LEN);
 
 /* reg_mpar_local_port
  * The local port to mirror the packets from.
@@ -4795,10 +4931,7 @@ static inline void mlxsw_reg_mpar_pack(char *payload, u8 local_port,
 #define MLXSW_REG_MLCR_ID 0x902B
 #define MLXSW_REG_MLCR_LEN 0x0C
 
-static const struct mlxsw_reg_info mlxsw_reg_mlcr = {
-	.id = MLXSW_REG_MLCR_ID,
-	.len = MLXSW_REG_MLCR_LEN,
-};
+MLXSW_REG_DEFINE(mlcr, MLXSW_REG_MLCR_ID, MLXSW_REG_MLCR_LEN);
 
 /* reg_mlcr_local_port
  * Local port number.
@@ -4839,10 +4972,7 @@ static inline void mlxsw_reg_mlcr_pack(char *payload, u8 local_port,
 #define MLXSW_REG_SBPR_ID 0xB001
 #define MLXSW_REG_SBPR_LEN 0x14
 
-static const struct mlxsw_reg_info mlxsw_reg_sbpr = {
-	.id = MLXSW_REG_SBPR_ID,
-	.len = MLXSW_REG_SBPR_LEN,
-};
+MLXSW_REG_DEFINE(sbpr, MLXSW_REG_SBPR_ID, MLXSW_REG_SBPR_LEN);
 
 /* shared direstion enum for SBPR, SBCM, SBPM */
 enum mlxsw_reg_sbxx_dir {
@@ -4899,10 +5029,7 @@ static inline void mlxsw_reg_sbpr_pack(char *payload, u8 pool,
 #define MLXSW_REG_SBCM_ID 0xB002
 #define MLXSW_REG_SBCM_LEN 0x28
 
-static const struct mlxsw_reg_info mlxsw_reg_sbcm = {
-	.id = MLXSW_REG_SBCM_ID,
-	.len = MLXSW_REG_SBCM_LEN,
-};
+MLXSW_REG_DEFINE(sbcm, MLXSW_REG_SBCM_ID, MLXSW_REG_SBCM_LEN);
 
 /* reg_sbcm_local_port
  * Local port number.
@@ -4979,10 +5106,7 @@ static inline void mlxsw_reg_sbcm_pack(char *payload, u8 local_port, u8 pg_buff,
 #define MLXSW_REG_SBPM_ID 0xB003
 #define MLXSW_REG_SBPM_LEN 0x28
 
-static const struct mlxsw_reg_info mlxsw_reg_sbpm = {
-	.id = MLXSW_REG_SBPM_ID,
-	.len = MLXSW_REG_SBPM_LEN,
-};
+MLXSW_REG_DEFINE(sbpm, MLXSW_REG_SBPM_ID, MLXSW_REG_SBPM_LEN);
 
 /* reg_sbpm_local_port
  * Local port number.
@@ -5073,10 +5197,7 @@ static inline void mlxsw_reg_sbpm_unpack(char *payload, u32 *p_buff_occupancy,
 #define MLXSW_REG_SBMM_ID 0xB004
 #define MLXSW_REG_SBMM_LEN 0x28
 
-static const struct mlxsw_reg_info mlxsw_reg_sbmm = {
-	.id = MLXSW_REG_SBMM_ID,
-	.len = MLXSW_REG_SBMM_LEN,
-};
+MLXSW_REG_DEFINE(sbmm, MLXSW_REG_SBMM_ID, MLXSW_REG_SBMM_LEN);
 
 /* reg_sbmm_prio
  * Switch Priority.
@@ -5135,10 +5256,7 @@ static inline void mlxsw_reg_sbmm_pack(char *payload, u8 prio, u32 min_buff,
 			    MLXSW_REG_SBSR_REC_LEN *	\
 			    MLXSW_REG_SBSR_REC_MAX_COUNT)
 
-static const struct mlxsw_reg_info mlxsw_reg_sbsr = {
-	.id = MLXSW_REG_SBSR_ID,
-	.len = MLXSW_REG_SBSR_LEN,
-};
+MLXSW_REG_DEFINE(sbsr, MLXSW_REG_SBSR_ID, MLXSW_REG_SBSR_LEN);
 
 /* reg_sbsr_clr
  * Clear Max Buffer Occupancy. When this bit is set, the max_buff_occupancy
@@ -5228,10 +5346,7 @@ static inline void mlxsw_reg_sbsr_rec_unpack(char *payload, int rec_index,
 #define MLXSW_REG_SBIB_ID 0xB006
 #define MLXSW_REG_SBIB_LEN 0x10
 
-static const struct mlxsw_reg_info mlxsw_reg_sbib = {
-	.id = MLXSW_REG_SBIB_ID,
-	.len = MLXSW_REG_SBIB_LEN,
-};
+MLXSW_REG_DEFINE(sbib, MLXSW_REG_SBIB_ID, MLXSW_REG_SBIB_LEN);
 
 /* reg_sbib_local_port
  * Local port number
@@ -5256,132 +5371,83 @@ static inline void mlxsw_reg_sbib_pack(char *payload, u8 local_port,
 	mlxsw_reg_sbib_buff_size_set(payload, buff_size);
 }
 
+static const struct mlxsw_reg_info *mlxsw_reg_infos[] = {
+	MLXSW_REG(sgcr),
+	MLXSW_REG(spad),
+	MLXSW_REG(smid),
+	MLXSW_REG(sspr),
+	MLXSW_REG(sfdat),
+	MLXSW_REG(sfd),
+	MLXSW_REG(sfn),
+	MLXSW_REG(spms),
+	MLXSW_REG(spvid),
+	MLXSW_REG(spvm),
+	MLXSW_REG(spaft),
+	MLXSW_REG(sfgc),
+	MLXSW_REG(sftr),
+	MLXSW_REG(sfdf),
+	MLXSW_REG(sldr),
+	MLXSW_REG(slcr),
+	MLXSW_REG(slcor),
+	MLXSW_REG(spmlr),
+	MLXSW_REG(svfa),
+	MLXSW_REG(svpe),
+	MLXSW_REG(sfmr),
+	MLXSW_REG(spvmlr),
+	MLXSW_REG(qpcr),
+	MLXSW_REG(qtct),
+	MLXSW_REG(qeec),
+	MLXSW_REG(pmlp),
+	MLXSW_REG(pmtu),
+	MLXSW_REG(ptys),
+	MLXSW_REG(ppad),
+	MLXSW_REG(paos),
+	MLXSW_REG(pfcc),
+	MLXSW_REG(ppcnt),
+	MLXSW_REG(plib),
+	MLXSW_REG(pptb),
+	MLXSW_REG(pbmc),
+	MLXSW_REG(pspa),
+	MLXSW_REG(htgt),
+	MLXSW_REG(hpkt),
+	MLXSW_REG(rgcr),
+	MLXSW_REG(ritr),
+	MLXSW_REG(ratr),
+	MLXSW_REG(ralta),
+	MLXSW_REG(ralst),
+	MLXSW_REG(raltb),
+	MLXSW_REG(ralue),
+	MLXSW_REG(rauht),
+	MLXSW_REG(raleu),
+	MLXSW_REG(rauhtd),
+	MLXSW_REG(mfcr),
+	MLXSW_REG(mfsc),
+	MLXSW_REG(mfsm),
+	MLXSW_REG(mfsl),
+	MLXSW_REG(mtcap),
+	MLXSW_REG(mtmp),
+	MLXSW_REG(mpat),
+	MLXSW_REG(mpar),
+	MLXSW_REG(mlcr),
+	MLXSW_REG(sbpr),
+	MLXSW_REG(sbcm),
+	MLXSW_REG(sbpm),
+	MLXSW_REG(sbmm),
+	MLXSW_REG(sbsr),
+	MLXSW_REG(sbib),
+};
+
 static inline const char *mlxsw_reg_id_str(u16 reg_id)
 {
-	switch (reg_id) {
-	case MLXSW_REG_SGCR_ID:
-		return "SGCR";
-	case MLXSW_REG_SPAD_ID:
-		return "SPAD";
-	case MLXSW_REG_SMID_ID:
-		return "SMID";
-	case MLXSW_REG_SSPR_ID:
-		return "SSPR";
-	case MLXSW_REG_SFDAT_ID:
-		return "SFDAT";
-	case MLXSW_REG_SFD_ID:
-		return "SFD";
-	case MLXSW_REG_SFN_ID:
-		return "SFN";
-	case MLXSW_REG_SPMS_ID:
-		return "SPMS";
-	case MLXSW_REG_SPVID_ID:
-		return "SPVID";
-	case MLXSW_REG_SPVM_ID:
-		return "SPVM";
-	case MLXSW_REG_SPAFT_ID:
-		return "SPAFT";
-	case MLXSW_REG_SFGC_ID:
-		return "SFGC";
-	case MLXSW_REG_SFTR_ID:
-		return "SFTR";
-	case MLXSW_REG_SFDF_ID:
-		return "SFDF";
-	case MLXSW_REG_SLDR_ID:
-		return "SLDR";
-	case MLXSW_REG_SLCR_ID:
-		return "SLCR";
-	case MLXSW_REG_SLCOR_ID:
-		return "SLCOR";
-	case MLXSW_REG_SPMLR_ID:
-		return "SPMLR";
-	case MLXSW_REG_SVFA_ID:
-		return "SVFA";
-	case MLXSW_REG_SVPE_ID:
-		return "SVPE";
-	case MLXSW_REG_SFMR_ID:
-		return "SFMR";
-	case MLXSW_REG_SPVMLR_ID:
-		return "SPVMLR";
-	case MLXSW_REG_QTCT_ID:
-		return "QTCT";
-	case MLXSW_REG_QEEC_ID:
-		return "QEEC";
-	case MLXSW_REG_PMLP_ID:
-		return "PMLP";
-	case MLXSW_REG_PMTU_ID:
-		return "PMTU";
-	case MLXSW_REG_PTYS_ID:
-		return "PTYS";
-	case MLXSW_REG_PPAD_ID:
-		return "PPAD";
-	case MLXSW_REG_PAOS_ID:
-		return "PAOS";
-	case MLXSW_REG_PFCC_ID:
-		return "PFCC";
-	case MLXSW_REG_PPCNT_ID:
-		return "PPCNT";
-	case MLXSW_REG_PPTB_ID:
-		return "PPTB";
-	case MLXSW_REG_PBMC_ID:
-		return "PBMC";
-	case MLXSW_REG_PSPA_ID:
-		return "PSPA";
-	case MLXSW_REG_HTGT_ID:
-		return "HTGT";
-	case MLXSW_REG_HPKT_ID:
-		return "HPKT";
-	case MLXSW_REG_RGCR_ID:
-		return "RGCR";
-	case MLXSW_REG_RITR_ID:
-		return "RITR";
-	case MLXSW_REG_RATR_ID:
-		return "RATR";
-	case MLXSW_REG_RALTA_ID:
-		return "RALTA";
-	case MLXSW_REG_RALST_ID:
-		return "RALST";
-	case MLXSW_REG_RALTB_ID:
-		return "RALTB";
-	case MLXSW_REG_RALUE_ID:
-		return "RALUE";
-	case MLXSW_REG_RAUHT_ID:
-		return "RAUHT";
-	case MLXSW_REG_RALEU_ID:
-		return "RALEU";
-	case MLXSW_REG_RAUHTD_ID:
-		return "RAUHTD";
-	case MLXSW_REG_MFCR_ID:
-		return "MFCR";
-	case MLXSW_REG_MFSC_ID:
-		return "MFSC";
-	case MLXSW_REG_MFSM_ID:
-		return "MFSM";
-	case MLXSW_REG_MTCAP_ID:
-		return "MTCAP";
-	case MLXSW_REG_MPAT_ID:
-		return "MPAT";
-	case MLXSW_REG_MPAR_ID:
-		return "MPAR";
-	case MLXSW_REG_MTMP_ID:
-		return "MTMP";
-	case MLXSW_REG_MLCR_ID:
-		return "MLCR";
-	case MLXSW_REG_SBPR_ID:
-		return "SBPR";
-	case MLXSW_REG_SBCM_ID:
-		return "SBCM";
-	case MLXSW_REG_SBPM_ID:
-		return "SBPM";
-	case MLXSW_REG_SBMM_ID:
-		return "SBMM";
-	case MLXSW_REG_SBSR_ID:
-		return "SBSR";
-	case MLXSW_REG_SBIB_ID:
-		return "SBIB";
-	default:
-		return "*UNKNOWN*";
+	const struct mlxsw_reg_info *reg_info;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(mlxsw_reg_infos); i++) {
+		reg_info = mlxsw_reg_infos[i];
+		if (reg_info->id == reg_id)
+			return reg_info->name;
 	}
+	return "*UNKNOWN*";
 }
 
 /* PUDE - Port Up / Down Event
diff --git a/drivers/net/ethernet/mellanox/mlxsw/resources.h b/drivers/net/ethernet/mellanox/mlxsw/resources.h
new file mode 100644
index 000000000000..3c2171dbdba4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/resources.h
@@ -0,0 +1,127 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/resources.h
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MLXSW_RESOURCES_H
+#define _MLXSW_RESOURCES_H
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+enum mlxsw_res_id {
+	MLXSW_RES_ID_KVD_SIZE,
+	MLXSW_RES_ID_KVD_SINGLE_MIN_SIZE,
+	MLXSW_RES_ID_KVD_DOUBLE_MIN_SIZE,
+	MLXSW_RES_ID_MAX_TRAP_GROUPS,
+	MLXSW_RES_ID_MAX_SPAN,
+	MLXSW_RES_ID_MAX_SYSTEM_PORT,
+	MLXSW_RES_ID_MAX_LAG,
+	MLXSW_RES_ID_MAX_LAG_MEMBERS,
+	MLXSW_RES_ID_MAX_BUFFER_SIZE,
+	MLXSW_RES_ID_MAX_CPU_POLICERS,
+	MLXSW_RES_ID_MAX_VRS,
+	MLXSW_RES_ID_MAX_RIFS,
+
+	/* Internal resources.
+	 * Determined by the SW, not queried from the HW.
+	 */
+	MLXSW_RES_ID_KVD_SINGLE_SIZE,
+	MLXSW_RES_ID_KVD_DOUBLE_SIZE,
+	MLXSW_RES_ID_KVD_LINEAR_SIZE,
+
+	__MLXSW_RES_ID_MAX,
+};
+
+static u16 mlxsw_res_ids[] = {
+	[MLXSW_RES_ID_KVD_SIZE] = 0x1001,
+	[MLXSW_RES_ID_KVD_SINGLE_MIN_SIZE] = 0x1002,
+	[MLXSW_RES_ID_KVD_DOUBLE_MIN_SIZE] = 0x1003,
+	[MLXSW_RES_ID_MAX_TRAP_GROUPS] = 0x2201,
+	[MLXSW_RES_ID_MAX_SPAN] = 0x2420,
+	[MLXSW_RES_ID_MAX_SYSTEM_PORT] = 0x2502,
+	[MLXSW_RES_ID_MAX_LAG] = 0x2520,
+	[MLXSW_RES_ID_MAX_LAG_MEMBERS] = 0x2521,
+	[MLXSW_RES_ID_MAX_BUFFER_SIZE] = 0x2802,	/* Bytes */
+	[MLXSW_RES_ID_MAX_CPU_POLICERS] = 0x2A13,
+	[MLXSW_RES_ID_MAX_VRS] = 0x2C01,
+	[MLXSW_RES_ID_MAX_RIFS] = 0x2C02,
+};
+
+struct mlxsw_res {
+	bool valid[__MLXSW_RES_ID_MAX];
+	u64 values[__MLXSW_RES_ID_MAX];
+};
+
+static inline bool mlxsw_res_valid(struct mlxsw_res *res,
+				   enum mlxsw_res_id res_id)
+{
+	return res->valid[res_id];
+}
+
+#define MLXSW_RES_VALID(res, short_res_id)			\
+	mlxsw_res_valid(res, MLXSW_RES_ID_##short_res_id)
+
+static inline u64 mlxsw_res_get(struct mlxsw_res *res,
+				enum mlxsw_res_id res_id)
+{
+	if (WARN_ON(!res->valid[res_id]))
+		return 0;
+	return res->values[res_id];
+}
+
+#define MLXSW_RES_GET(res, short_res_id)			\
+	mlxsw_res_get(res, MLXSW_RES_ID_##short_res_id)
+
+static inline void mlxsw_res_set(struct mlxsw_res *res,
+				 enum mlxsw_res_id res_id, u64 value)
+{
+	res->valid[res_id] = true;
+	res->values[res_id] = value;
+}
+
+#define MLXSW_RES_SET(res, short_res_id, value)			\
+	mlxsw_res_set(res, MLXSW_RES_ID_##short_res_id, value)
+
+static inline void mlxsw_res_parse(struct mlxsw_res *res, u16 id, u64 value)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(mlxsw_res_ids); i++) {
+		if (mlxsw_res_ids[i] == id) {
+			mlxsw_res_set(res, i, value);
+			return;
+		}
+	}
+}
+
+#endif
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index dda5761e91bc..fece974b4edd 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -37,6 +37,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/types.h>
+#include <linux/pci.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
@@ -53,12 +54,12 @@
 #include <linux/dcbnl.h>
 #include <linux/inetdevice.h>
 #include <net/switchdev.h>
-#include <generated/utsrelease.h>
 #include <net/pkt_cls.h>
 #include <net/tc_act/tc_mirred.h>
 #include <net/netevent.h>
 
 #include "spectrum.h"
+#include "pci.h"
 #include "core.h"
 #include "reg.h"
 #include "port.h"
@@ -156,7 +157,7 @@ static void mlxsw_sp_txhdr_construct(struct sk_buff *skb,
 
 static int mlxsw_sp_base_mac_get(struct mlxsw_sp *mlxsw_sp)
 {
-	char spad_pl[MLXSW_REG_SPAD_LEN];
+	char spad_pl[MLXSW_REG_SPAD_LEN] = {0};
 	int err;
 
 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(spad), spad_pl);
@@ -168,14 +169,13 @@ static int mlxsw_sp_base_mac_get(struct mlxsw_sp *mlxsw_sp)
 
 static int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
 {
-	struct mlxsw_resources *resources;
 	int i;
 
-	resources = mlxsw_core_resources_get(mlxsw_sp->core);
-	if (!resources->max_span_valid)
+	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_SPAN))
 		return -EIO;
 
-	mlxsw_sp->span.entries_count = resources->max_span;
+	mlxsw_sp->span.entries_count = MLXSW_CORE_RES_GET(mlxsw_sp->core,
+							  MAX_SPAN);
 	mlxsw_sp->span.entries = kcalloc(mlxsw_sp->span.entries_count,
 					 sizeof(struct mlxsw_sp_span_entry),
 					 GFP_KERNEL);
@@ -857,7 +857,7 @@ mlxsw_sp_port_get_sw_stats64(const struct net_device *dev,
 	return 0;
 }
 
-static bool mlxsw_sp_port_has_offload_stats(int attr_id)
+static bool mlxsw_sp_port_has_offload_stats(const struct net_device *dev, int attr_id)
 {
 	switch (attr_id) {
 	case IFLA_OFFLOAD_XSTATS_CPU_HIT:
@@ -1239,8 +1239,10 @@ static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
 
 	tcf_exts_to_list(cls->exts, &actions);
 	list_for_each_entry(a, &actions, list) {
-		if (!is_tcf_mirred_mirror(a) || protocol != htons(ETH_P_ALL))
+		if (!is_tcf_mirred_egress_mirror(a) ||
+		    protocol != htons(ETH_P_ALL)) {
 			return -ENOTSUPP;
+		}
 
 		err = mlxsw_sp_port_add_cls_matchall_mirror(mlxsw_sp_port, cls,
 							    a, ingress);
@@ -1413,7 +1415,7 @@ err_port_pause_configure:
 
 struct mlxsw_sp_port_hw_stats {
 	char str[ETH_GSTRING_LEN];
-	u64 (*getter)(char *payload);
+	u64 (*getter)(const char *payload);
 };
 
 static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_stats[] = {
@@ -1534,7 +1536,7 @@ static struct mlxsw_sp_port_hw_stats mlxsw_sp_port_hw_prio_stats[] = {
 
 #define MLXSW_SP_PORT_HW_PRIO_STATS_LEN ARRAY_SIZE(mlxsw_sp_port_hw_prio_stats)
 
-static u64 mlxsw_reg_ppcnt_tc_transmit_queue_bytes_get(char *ppcnt_pl)
+static u64 mlxsw_reg_ppcnt_tc_transmit_queue_bytes_get(const char *ppcnt_pl)
 {
 	u64 transmit_queue = mlxsw_reg_ppcnt_tc_transmit_queue_get(ppcnt_pl);
 
@@ -2002,12 +2004,12 @@ static int mlxsw_sp_port_get_link_ksettings(struct net_device *dev,
 	int err;
 
 	autoneg = mlxsw_sp_port->link.autoneg;
-	mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, 0);
+	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port, 0);
 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
 	if (err)
 		return err;
-	mlxsw_reg_ptys_unpack(ptys_pl, &eth_proto_cap, &eth_proto_admin,
-			      &eth_proto_oper);
+	mlxsw_reg_ptys_eth_unpack(ptys_pl, &eth_proto_cap, &eth_proto_admin,
+				  &eth_proto_oper);
 
 	mlxsw_sp_port_get_link_supported(eth_proto_cap, cmd);
 
@@ -2036,11 +2038,11 @@ mlxsw_sp_port_set_link_ksettings(struct net_device *dev,
 	bool autoneg;
 	int err;
 
-	mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, 0);
+	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port, 0);
 	err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
 	if (err)
 		return err;
-	mlxsw_reg_ptys_unpack(ptys_pl, &eth_proto_cap, NULL, NULL);
+	mlxsw_reg_ptys_eth_unpack(ptys_pl, &eth_proto_cap, NULL, NULL);
 
 	autoneg = cmd->base.autoneg == AUTONEG_ENABLE;
 	eth_proto_new = autoneg ?
@@ -2053,7 +2055,8 @@ mlxsw_sp_port_set_link_ksettings(struct net_device *dev,
 		return -EINVAL;
 	}
 
-	mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, eth_proto_new);
+	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port,
+				eth_proto_new);
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
 	if (err)
 		return err;
@@ -2091,8 +2094,8 @@ mlxsw_sp_port_speed_by_width_set(struct mlxsw_sp_port *mlxsw_sp_port, u8 width)
 	u32 eth_proto_admin;
 
 	eth_proto_admin = mlxsw_sp_to_ptys_upper_speed(upper_speed);
-	mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port,
-			    eth_proto_admin);
+	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sp_port->local_port,
+				eth_proto_admin);
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl);
 }
 
@@ -2210,8 +2213,8 @@ static int mlxsw_sp_port_pvid_vport_destroy(struct mlxsw_sp_port *mlxsw_sp_port)
 	return mlxsw_sp_port_kill_vid(mlxsw_sp_port->dev, 0, 1);
 }
 
-static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
-				bool split, u8 module, u8 width, u8 lane)
+static int __mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
+				  bool split, u8 module, u8 width, u8 lane)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port;
 	struct net_device *dev;
@@ -2221,6 +2224,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 	dev = alloc_etherdev(sizeof(struct mlxsw_sp_port));
 	if (!dev)
 		return -ENOMEM;
+	SET_NETDEV_DEV(dev, mlxsw_sp->bus_info->dev);
 	mlxsw_sp_port = netdev_priv(dev);
 	mlxsw_sp_port->dev = dev;
 	mlxsw_sp_port->mlxsw_sp = mlxsw_sp;
@@ -2284,6 +2288,9 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 			 NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_TC;
 	dev->hw_features |= NETIF_F_HW_TC;
 
+	dev->min_mtu = 0;
+	dev->max_mtu = ETH_MAX_MTU;
+
 	/* Each packet needs to have a Tx header (metadata) on top all other
 	 * headers.
 	 */
@@ -2352,20 +2359,12 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 		goto err_register_netdev;
 	}
 
-	err = mlxsw_core_port_init(mlxsw_sp->core, &mlxsw_sp_port->core_port,
-				   mlxsw_sp_port->local_port, dev,
-				   mlxsw_sp_port->split, module);
-	if (err) {
-		dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to init core port\n",
-			mlxsw_sp_port->local_port);
-		goto err_core_port_init;
-	}
-
+	mlxsw_core_port_eth_set(mlxsw_sp->core, mlxsw_sp_port->local_port,
+				mlxsw_sp_port, dev, mlxsw_sp_port->split,
+				module);
 	mlxsw_core_schedule_dw(&mlxsw_sp_port->hw_stats.update_dw, 0);
 	return 0;
 
-err_core_port_init:
-	unregister_netdev(dev);
 err_register_netdev:
 	mlxsw_sp->ports[local_port] = NULL;
 	mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
@@ -2394,14 +2393,34 @@ err_port_active_vlans_alloc:
 	return err;
 }
 
-static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
+static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
+				bool split, u8 module, u8 width, u8 lane)
+{
+	int err;
+
+	err = mlxsw_core_port_init(mlxsw_sp->core, local_port);
+	if (err) {
+		dev_err(mlxsw_sp->bus_info->dev, "Port %d: Failed to init core port\n",
+			local_port);
+		return err;
+	}
+	err = __mlxsw_sp_port_create(mlxsw_sp, local_port, false,
+				     module, width, lane);
+	if (err)
+		goto err_port_create;
+	return 0;
+
+err_port_create:
+	mlxsw_core_port_fini(mlxsw_sp->core, local_port);
+	return err;
+}
+
+static void __mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port];
 
-	if (!mlxsw_sp_port)
-		return;
 	cancel_delayed_work_sync(&mlxsw_sp_port->hw_stats.update_dw);
-	mlxsw_core_port_fini(&mlxsw_sp_port->core_port);
+	mlxsw_core_port_clear(mlxsw_sp->core, local_port, mlxsw_sp);
 	unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */
 	mlxsw_sp->ports[local_port] = NULL;
 	mlxsw_sp_port_switchdev_fini(mlxsw_sp_port);
@@ -2417,12 +2436,24 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
 	free_netdev(mlxsw_sp_port->dev);
 }
 
+static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
+{
+	__mlxsw_sp_port_remove(mlxsw_sp, local_port);
+	mlxsw_core_port_fini(mlxsw_sp->core, local_port);
+}
+
+static bool mlxsw_sp_port_created(struct mlxsw_sp *mlxsw_sp, u8 local_port)
+{
+	return mlxsw_sp->ports[local_port] != NULL;
+}
+
 static void mlxsw_sp_ports_remove(struct mlxsw_sp *mlxsw_sp)
 {
 	int i;
 
 	for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++)
-		mlxsw_sp_port_remove(mlxsw_sp, i);
+		if (mlxsw_sp_port_created(mlxsw_sp, i))
+			mlxsw_sp_port_remove(mlxsw_sp, i);
 	kfree(mlxsw_sp->ports);
 }
 
@@ -2446,8 +2477,8 @@ static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp)
 		if (!width)
 			continue;
 		mlxsw_sp->port_to_module[i] = module;
-		err = mlxsw_sp_port_create(mlxsw_sp, i, false, module, width,
-					   lane);
+		err = mlxsw_sp_port_create(mlxsw_sp, i, false,
+					   module, width, lane);
 		if (err)
 			goto err_port_create;
 	}
@@ -2456,7 +2487,8 @@ static int mlxsw_sp_ports_create(struct mlxsw_sp *mlxsw_sp)
 err_port_create:
 err_port_module_info_get:
 	for (i--; i >= 1; i--)
-		mlxsw_sp_port_remove(mlxsw_sp, i);
+		if (mlxsw_sp_port_created(mlxsw_sp, i))
+			mlxsw_sp_port_remove(mlxsw_sp, i);
 	kfree(mlxsw_sp->ports);
 	return err;
 }
@@ -2498,7 +2530,8 @@ static int mlxsw_sp_port_split_create(struct mlxsw_sp *mlxsw_sp, u8 base_port,
 
 err_port_create:
 	for (i--; i >= 0; i--)
-		mlxsw_sp_port_remove(mlxsw_sp, base_port + i);
+		if (mlxsw_sp_port_created(mlxsw_sp, base_port + i))
+			mlxsw_sp_port_remove(mlxsw_sp, base_port + i);
 	i = count;
 err_port_swid_set:
 	for (i--; i >= 0; i--)
@@ -2588,7 +2621,8 @@ static int mlxsw_sp_port_split(struct mlxsw_core *mlxsw_core, u8 local_port,
 	}
 
 	for (i = 0; i < count; i++)
-		mlxsw_sp_port_remove(mlxsw_sp, base_port + i);
+		if (mlxsw_sp_port_created(mlxsw_sp, base_port + i))
+			mlxsw_sp_port_remove(mlxsw_sp, base_port + i);
 
 	err = mlxsw_sp_port_split_create(mlxsw_sp, base_port, module, count);
 	if (err) {
@@ -2633,7 +2667,8 @@ static int mlxsw_sp_port_unsplit(struct mlxsw_core *mlxsw_core, u8 local_port)
 		base_port = base_port + 2;
 
 	for (i = 0; i < count; i++)
-		mlxsw_sp_port_remove(mlxsw_sp, base_port + i);
+		if (mlxsw_sp_port_created(mlxsw_sp, base_port + i))
+			mlxsw_sp_port_remove(mlxsw_sp, base_port + i);
 
 	mlxsw_sp_port_unsplit_create(mlxsw_sp, base_port, count);
 
@@ -2663,54 +2698,8 @@ static void mlxsw_sp_pude_event_func(const struct mlxsw_reg_info *reg,
 	}
 }
 
-static struct mlxsw_event_listener mlxsw_sp_pude_event = {
-	.func = mlxsw_sp_pude_event_func,
-	.trap_id = MLXSW_TRAP_ID_PUDE,
-};
-
-static int mlxsw_sp_event_register(struct mlxsw_sp *mlxsw_sp,
-				   enum mlxsw_event_trap_id trap_id)
-{
-	struct mlxsw_event_listener *el;
-	char hpkt_pl[MLXSW_REG_HPKT_LEN];
-	int err;
-
-	switch (trap_id) {
-	case MLXSW_TRAP_ID_PUDE:
-		el = &mlxsw_sp_pude_event;
-		break;
-	}
-	err = mlxsw_core_event_listener_register(mlxsw_sp->core, el, mlxsw_sp);
-	if (err)
-		return err;
-
-	mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_FORWARD, trap_id);
-	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(hpkt), hpkt_pl);
-	if (err)
-		goto err_event_trap_set;
-
-	return 0;
-
-err_event_trap_set:
-	mlxsw_core_event_listener_unregister(mlxsw_sp->core, el, mlxsw_sp);
-	return err;
-}
-
-static void mlxsw_sp_event_unregister(struct mlxsw_sp *mlxsw_sp,
-				      enum mlxsw_event_trap_id trap_id)
-{
-	struct mlxsw_event_listener *el;
-
-	switch (trap_id) {
-	case MLXSW_TRAP_ID_PUDE:
-		el = &mlxsw_sp_pude_event;
-		break;
-	}
-	mlxsw_core_event_listener_unregister(mlxsw_sp->core, el, mlxsw_sp);
-}
-
-static void mlxsw_sp_rx_listener_func(struct sk_buff *skb, u8 local_port,
-				      void *priv)
+static void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb,
+					      u8 local_port, void *priv)
 {
 	struct mlxsw_sp *mlxsw_sp = priv;
 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port];
@@ -2738,107 +2727,212 @@ static void mlxsw_sp_rx_listener_mark_func(struct sk_buff *skb, u8 local_port,
 					   void *priv)
 {
 	skb->offload_fwd_mark = 1;
-	return mlxsw_sp_rx_listener_func(skb, local_port, priv);
+	return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv);
+}
+
+#define MLXSW_SP_RXL_NO_MARK(_trap_id, _action, _trap_group, _is_ctrl)	\
+	MLXSW_RXL(mlxsw_sp_rx_listener_no_mark_func, _trap_id, _action,	\
+		  _is_ctrl, SP_##_trap_group, DISCARD)
+
+#define MLXSW_SP_RXL_MARK(_trap_id, _action, _trap_group, _is_ctrl)	\
+	MLXSW_RXL(mlxsw_sp_rx_listener_mark_func, _trap_id, _action,	\
+		_is_ctrl, SP_##_trap_group, DISCARD)
+
+#define MLXSW_SP_EVENTL(_func, _trap_id)		\
+	MLXSW_EVENTL(_func, _trap_id, SP_EVENT)
+
+static const struct mlxsw_listener mlxsw_sp_listener[] = {
+	/* Events */
+	MLXSW_SP_EVENTL(mlxsw_sp_pude_event_func, PUDE),
+	/* L2 traps */
+	MLXSW_SP_RXL_NO_MARK(STP, TRAP_TO_CPU, STP, true),
+	MLXSW_SP_RXL_NO_MARK(LACP, TRAP_TO_CPU, LACP, true),
+	MLXSW_SP_RXL_NO_MARK(LLDP, TRAP_TO_CPU, LLDP, true),
+	MLXSW_SP_RXL_MARK(DHCP, MIRROR_TO_CPU, DHCP, false),
+	MLXSW_SP_RXL_MARK(IGMP_QUERY, MIRROR_TO_CPU, IGMP, false),
+	MLXSW_SP_RXL_NO_MARK(IGMP_V1_REPORT, TRAP_TO_CPU, IGMP, false),
+	MLXSW_SP_RXL_NO_MARK(IGMP_V2_REPORT, TRAP_TO_CPU, IGMP, false),
+	MLXSW_SP_RXL_NO_MARK(IGMP_V2_LEAVE, TRAP_TO_CPU, IGMP, false),
+	MLXSW_SP_RXL_NO_MARK(IGMP_V3_REPORT, TRAP_TO_CPU, IGMP, false),
+	MLXSW_SP_RXL_MARK(ARPBC, MIRROR_TO_CPU, ARP, false),
+	MLXSW_SP_RXL_MARK(ARPUC, MIRROR_TO_CPU, ARP, false),
+	/* L3 traps */
+	MLXSW_SP_RXL_NO_MARK(MTUERROR, TRAP_TO_CPU, ROUTER_EXP, false),
+	MLXSW_SP_RXL_NO_MARK(TTLERROR, TRAP_TO_CPU, ROUTER_EXP, false),
+	MLXSW_SP_RXL_NO_MARK(LBERROR, TRAP_TO_CPU, ROUTER_EXP, false),
+	MLXSW_SP_RXL_MARK(OSPF, TRAP_TO_CPU, OSPF, false),
+	MLXSW_SP_RXL_NO_MARK(IP2ME, TRAP_TO_CPU, IP2ME, false),
+	MLXSW_SP_RXL_NO_MARK(RTR_INGRESS0, TRAP_TO_CPU, REMOTE_ROUTE, false),
+	MLXSW_SP_RXL_NO_MARK(HOST_MISS_IPV4, TRAP_TO_CPU, ARP_MISS, false),
+	MLXSW_SP_RXL_NO_MARK(BGP_IPV4, TRAP_TO_CPU, BGP_IPV4, false),
+};
+
+static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
+{
+	char qpcr_pl[MLXSW_REG_QPCR_LEN];
+	enum mlxsw_reg_qpcr_ir_units ir_units;
+	int max_cpu_policers;
+	bool is_bytes;
+	u8 burst_size;
+	u32 rate;
+	int i, err;
+
+	if (!MLXSW_CORE_RES_VALID(mlxsw_core, MAX_CPU_POLICERS))
+		return -EIO;
+
+	max_cpu_policers = MLXSW_CORE_RES_GET(mlxsw_core, MAX_CPU_POLICERS);
+
+	ir_units = MLXSW_REG_QPCR_IR_UNITS_M;
+	for (i = 0; i < max_cpu_policers; i++) {
+		is_bytes = false;
+		switch (i) {
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_STP:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF:
+			rate = 128;
+			burst_size = 7;
+			break;
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP:
+			rate = 16 * 1024;
+			burst_size = 10;
+			break;
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP_IPV4:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP_MISS:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE:
+			rate = 1024;
+			burst_size = 7;
+			break;
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME:
+			is_bytes = true;
+			rate = 4 * 1024;
+			burst_size = 4;
+			break;
+		default:
+			continue;
+		}
+
+		mlxsw_reg_qpcr_pack(qpcr_pl, i, ir_units, is_bytes, rate,
+				    burst_size);
+		err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(qpcr), qpcr_pl);
+		if (err)
+			return err;
+	}
+
+	return 0;
 }
 
-#define MLXSW_SP_RXL(_func, _trap_id, _action)			\
-	{							\
-		.func = _func,					\
-		.local_port = MLXSW_PORT_DONT_CARE,		\
-		.trap_id = MLXSW_TRAP_ID_##_trap_id,		\
-		.action = MLXSW_REG_HPKT_ACTION_##_action,	\
+static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
+{
+	char htgt_pl[MLXSW_REG_HTGT_LEN];
+	enum mlxsw_reg_htgt_trap_group i;
+	int max_cpu_policers;
+	int max_trap_groups;
+	u8 priority, tc;
+	u16 policer_id;
+	int err;
+
+	if (!MLXSW_CORE_RES_VALID(mlxsw_core, MAX_TRAP_GROUPS))
+		return -EIO;
+
+	max_trap_groups = MLXSW_CORE_RES_GET(mlxsw_core, MAX_TRAP_GROUPS);
+	max_cpu_policers = MLXSW_CORE_RES_GET(mlxsw_core, MAX_CPU_POLICERS);
+
+	for (i = 0; i < max_trap_groups; i++) {
+		policer_id = i;
+		switch (i) {
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_STP:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF:
+			priority = 5;
+			tc = 5;
+			break;
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP_IPV4:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP:
+			priority = 4;
+			tc = 4;
+			break;
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME:
+			priority = 3;
+			tc = 3;
+			break;
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP:
+			priority = 2;
+			tc = 2;
+			break;
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP_MISS:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE:
+			priority = 1;
+			tc = 1;
+			break;
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT:
+			priority = MLXSW_REG_HTGT_DEFAULT_PRIORITY;
+			tc = MLXSW_REG_HTGT_DEFAULT_TC;
+			policer_id = MLXSW_REG_HTGT_INVALID_POLICER;
+			break;
+		default:
+			continue;
+		}
+
+		if (max_cpu_policers <= policer_id &&
+		    policer_id != MLXSW_REG_HTGT_INVALID_POLICER)
+			return -EIO;
+
+		mlxsw_reg_htgt_pack(htgt_pl, i, policer_id, priority, tc);
+		err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
+		if (err)
+			return err;
 	}
 
-static const struct mlxsw_rx_listener mlxsw_sp_rx_listener[] = {
-	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, FDB_MC, TRAP_TO_CPU),
-	/* Traps for specific L2 packet types, not trapped as FDB MC */
-	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, STP, TRAP_TO_CPU),
-	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, LACP, TRAP_TO_CPU),
-	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, EAPOL, TRAP_TO_CPU),
-	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, LLDP, TRAP_TO_CPU),
-	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, MMRP, TRAP_TO_CPU),
-	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, MVRP, TRAP_TO_CPU),
-	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, RPVST, TRAP_TO_CPU),
-	MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, DHCP, MIRROR_TO_CPU),
-	MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, IGMP_QUERY, MIRROR_TO_CPU),
-	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V1_REPORT, TRAP_TO_CPU),
-	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V2_REPORT, TRAP_TO_CPU),
-	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V2_LEAVE, TRAP_TO_CPU),
-	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V3_REPORT, TRAP_TO_CPU),
-	MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, ARPBC, MIRROR_TO_CPU),
-	MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, ARPUC, MIRROR_TO_CPU),
-	/* L3 traps */
-	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, MTUERROR, TRAP_TO_CPU),
-	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, TTLERROR, TRAP_TO_CPU),
-	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, LBERROR, TRAP_TO_CPU),
-	MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, OSPF, TRAP_TO_CPU),
-	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IP2ME, TRAP_TO_CPU),
-	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, RTR_INGRESS0, TRAP_TO_CPU),
-	MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, HOST_MISS_IPV4, TRAP_TO_CPU),
-};
+	return 0;
+}
 
 static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp)
 {
-	char htgt_pl[MLXSW_REG_HTGT_LEN];
-	char hpkt_pl[MLXSW_REG_HPKT_LEN];
 	int i;
 	int err;
 
-	mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_RX);
-	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(htgt), htgt_pl);
+	err = mlxsw_sp_cpu_policers_set(mlxsw_sp->core);
 	if (err)
 		return err;
 
-	mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_CTRL);
-	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(htgt), htgt_pl);
+	err = mlxsw_sp_trap_groups_set(mlxsw_sp->core);
 	if (err)
 		return err;
 
-	for (i = 0; i < ARRAY_SIZE(mlxsw_sp_rx_listener); i++) {
-		err = mlxsw_core_rx_listener_register(mlxsw_sp->core,
-						      &mlxsw_sp_rx_listener[i],
-						      mlxsw_sp);
+	for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener); i++) {
+		err = mlxsw_core_trap_register(mlxsw_sp->core,
+					       &mlxsw_sp_listener[i],
+					       mlxsw_sp);
 		if (err)
-			goto err_rx_listener_register;
+			goto err_listener_register;
 
-		mlxsw_reg_hpkt_pack(hpkt_pl, mlxsw_sp_rx_listener[i].action,
-				    mlxsw_sp_rx_listener[i].trap_id);
-		err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(hpkt), hpkt_pl);
-		if (err)
-			goto err_rx_trap_set;
 	}
 	return 0;
 
-err_rx_trap_set:
-	mlxsw_core_rx_listener_unregister(mlxsw_sp->core,
-					  &mlxsw_sp_rx_listener[i],
-					  mlxsw_sp);
-err_rx_listener_register:
+err_listener_register:
 	for (i--; i >= 0; i--) {
-		mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_DISCARD,
-				    mlxsw_sp_rx_listener[i].trap_id);
-		mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(hpkt), hpkt_pl);
-
-		mlxsw_core_rx_listener_unregister(mlxsw_sp->core,
-						  &mlxsw_sp_rx_listener[i],
-						  mlxsw_sp);
+		mlxsw_core_trap_unregister(mlxsw_sp->core,
+					   &mlxsw_sp_listener[i],
+					   mlxsw_sp);
 	}
 	return err;
 }
 
 static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp)
 {
-	char hpkt_pl[MLXSW_REG_HPKT_LEN];
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(mlxsw_sp_rx_listener); i++) {
-		mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_DISCARD,
-				    mlxsw_sp_rx_listener[i].trap_id);
-		mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(hpkt), hpkt_pl);
-
-		mlxsw_core_rx_listener_unregister(mlxsw_sp->core,
-						  &mlxsw_sp_rx_listener[i],
-						  mlxsw_sp);
+	for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener); i++) {
+		mlxsw_core_trap_unregister(mlxsw_sp->core,
+					   &mlxsw_sp_listener[i],
+					   mlxsw_sp);
 	}
 }
 
@@ -2889,7 +2983,6 @@ static int mlxsw_sp_flood_init(struct mlxsw_sp *mlxsw_sp)
 
 static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp)
 {
-	struct mlxsw_resources *resources;
 	char slcr_pl[MLXSW_REG_SLCR_LEN];
 	int err;
 
@@ -2906,11 +2999,11 @@ static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp)
 	if (err)
 		return err;
 
-	resources = mlxsw_core_resources_get(mlxsw_sp->core);
-	if (!(resources->max_lag_valid && resources->max_ports_in_lag_valid))
+	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LAG) ||
+	    !MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_LAG_MEMBERS))
 		return -EIO;
 
-	mlxsw_sp->lags = kcalloc(resources->max_lag,
+	mlxsw_sp->lags = kcalloc(MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LAG),
 				 sizeof(struct mlxsw_sp_upper),
 				 GFP_KERNEL);
 	if (!mlxsw_sp->lags)
@@ -2924,6 +3017,17 @@ static void mlxsw_sp_lag_fini(struct mlxsw_sp *mlxsw_sp)
 	kfree(mlxsw_sp->lags);
 }
 
+static int mlxsw_sp_basic_trap_groups_set(struct mlxsw_core *mlxsw_core)
+{
+	char htgt_pl[MLXSW_REG_HTGT_LEN];
+
+	mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_EMAD,
+			    MLXSW_REG_HTGT_INVALID_POLICER,
+			    MLXSW_REG_HTGT_DEFAULT_PRIORITY,
+			    MLXSW_REG_HTGT_DEFAULT_TC);
+	return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
+}
+
 static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 			 const struct mlxsw_bus_info *mlxsw_bus_info)
 {
@@ -2942,16 +3046,10 @@ static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core,
 		return err;
 	}
 
-	err = mlxsw_sp_event_register(mlxsw_sp, MLXSW_TRAP_ID_PUDE);
-	if (err) {
-		dev_err(mlxsw_sp->bus_info->dev, "Failed to register for PUDE events\n");
-		return err;
-	}
-
 	err = mlxsw_sp_traps_init(mlxsw_sp);
 	if (err) {
-		dev_err(mlxsw_sp->bus_info->dev, "Failed to set traps for RX\n");
-		goto err_rx_listener_register;
+		dev_err(mlxsw_sp->bus_info->dev, "Failed to set traps\n");
+		return err;
 	}
 
 	err = mlxsw_sp_flood_init(mlxsw_sp);
@@ -3011,8 +3109,6 @@ err_lag_init:
 err_buffers_init:
 err_flood_init:
 	mlxsw_sp_traps_fini(mlxsw_sp);
-err_rx_listener_register:
-	mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE);
 	return err;
 }
 
@@ -3027,7 +3123,6 @@ static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core)
 	mlxsw_sp_lag_fini(mlxsw_sp);
 	mlxsw_sp_buffers_fini(mlxsw_sp);
 	mlxsw_sp_traps_fini(mlxsw_sp);
-	mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE);
 	WARN_ON(!list_empty(&mlxsw_sp->vfids.list));
 	WARN_ON(!list_empty(&mlxsw_sp->fids));
 }
@@ -3065,11 +3160,11 @@ static struct mlxsw_config_profile mlxsw_sp_config_profile = {
 };
 
 static struct mlxsw_driver mlxsw_sp_driver = {
-	.kind				= MLXSW_DEVICE_KIND_SPECTRUM,
-	.owner				= THIS_MODULE,
+	.kind				= mlxsw_sp_driver_name,
 	.priv_size			= sizeof(struct mlxsw_sp),
 	.init				= mlxsw_sp_init,
 	.fini				= mlxsw_sp_fini,
+	.basic_trap_groups_set		= mlxsw_sp_basic_trap_groups_set,
 	.port_split			= mlxsw_sp_port_split,
 	.port_unsplit			= mlxsw_sp_port_unsplit,
 	.sb_pool_get			= mlxsw_sp_sb_pool_get,
@@ -3092,19 +3187,30 @@ static bool mlxsw_sp_port_dev_check(const struct net_device *dev)
 	return dev->netdev_ops == &mlxsw_sp_port_netdev_ops;
 }
 
+static int mlxsw_lower_dev_walk(struct net_device *lower_dev, void *data)
+{
+	struct mlxsw_sp_port **port = data;
+	int ret = 0;
+
+	if (mlxsw_sp_port_dev_check(lower_dev)) {
+		*port = netdev_priv(lower_dev);
+		ret = 1;
+	}
+
+	return ret;
+}
+
 static struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find(struct net_device *dev)
 {
-	struct net_device *lower_dev;
-	struct list_head *iter;
+	struct mlxsw_sp_port *port;
 
 	if (mlxsw_sp_port_dev_check(dev))
 		return netdev_priv(dev);
 
-	netdev_for_each_all_lower_dev(dev, lower_dev, iter) {
-		if (mlxsw_sp_port_dev_check(lower_dev))
-			return netdev_priv(lower_dev);
-	}
-	return NULL;
+	port = NULL;
+	netdev_walk_all_lower_dev(dev, mlxsw_lower_dev_walk, &port);
+
+	return port;
 }
 
 static struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev)
@@ -3117,17 +3223,15 @@ static struct mlxsw_sp *mlxsw_sp_lower_get(struct net_device *dev)
 
 static struct mlxsw_sp_port *mlxsw_sp_port_dev_lower_find_rcu(struct net_device *dev)
 {
-	struct net_device *lower_dev;
-	struct list_head *iter;
+	struct mlxsw_sp_port *port;
 
 	if (mlxsw_sp_port_dev_check(dev))
 		return netdev_priv(dev);
 
-	netdev_for_each_all_lower_dev_rcu(dev, lower_dev, iter) {
-		if (mlxsw_sp_port_dev_check(lower_dev))
-			return netdev_priv(lower_dev);
-	}
-	return NULL;
+	port = NULL;
+	netdev_walk_all_lower_dev_rcu(dev, mlxsw_lower_dev_walk, &port);
+
+	return port;
 }
 
 struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev)
@@ -3171,11 +3275,9 @@ static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *r,
 
 static int mlxsw_sp_avail_rif_get(struct mlxsw_sp *mlxsw_sp)
 {
-	struct mlxsw_resources *resources;
 	int i;
 
-	resources = mlxsw_core_resources_get(mlxsw_sp->core);
-	for (i = 0; i < resources->max_rif; i++)
+	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
 		if (!mlxsw_sp->rifs[i])
 			return i;
 
@@ -3698,14 +3800,15 @@ static bool mlxsw_sp_port_fdb_should_flush(struct mlxsw_sp_port *mlxsw_sp_port,
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	u8 local_port = mlxsw_sp_port->local_port;
 	u16 lag_id = mlxsw_sp_port->lag_id;
-	struct mlxsw_resources *resources;
+	u64 max_lag_members;
 	int i, count = 0;
 
 	if (!mlxsw_sp_port->lagged)
 		return true;
 
-	resources = mlxsw_core_resources_get(mlxsw_sp->core);
-	for (i = 0; i < resources->max_ports_in_lag; i++) {
+	max_lag_members = MLXSW_CORE_RES_GET(mlxsw_sp->core,
+					     MAX_LAG_MEMBERS);
+	for (i = 0; i < max_lag_members; i++) {
 		struct mlxsw_sp_port *lag_port;
 
 		lag_port = mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i);
@@ -3911,13 +4014,13 @@ static int mlxsw_sp_lag_index_get(struct mlxsw_sp *mlxsw_sp,
 				  struct net_device *lag_dev,
 				  u16 *p_lag_id)
 {
-	struct mlxsw_resources *resources;
 	struct mlxsw_sp_upper *lag;
 	int free_lag_id = -1;
+	u64 max_lag;
 	int i;
 
-	resources = mlxsw_core_resources_get(mlxsw_sp->core);
-	for (i = 0; i < resources->max_lag; i++) {
+	max_lag = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_LAG);
+	for (i = 0; i < max_lag; i++) {
 		lag = mlxsw_sp_lag_get(mlxsw_sp, i);
 		if (lag->ref_count) {
 			if (lag->dev == lag_dev) {
@@ -3951,11 +4054,12 @@ mlxsw_sp_master_lag_check(struct mlxsw_sp *mlxsw_sp,
 static int mlxsw_sp_port_lag_index_get(struct mlxsw_sp *mlxsw_sp,
 				       u16 lag_id, u8 *p_port_index)
 {
-	struct mlxsw_resources *resources;
+	u64 max_lag_members;
 	int i;
 
-	resources = mlxsw_core_resources_get(mlxsw_sp->core);
-	for (i = 0; i < resources->max_ports_in_lag; i++) {
+	max_lag_members = MLXSW_CORE_RES_GET(mlxsw_sp->core,
+					     MAX_LAG_MEMBERS);
+	for (i = 0; i < max_lag_members; i++) {
 		if (!mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i)) {
 			*p_port_index = i;
 			return 0;
@@ -4652,6 +4756,16 @@ static struct notifier_block mlxsw_sp_router_netevent_nb __read_mostly = {
 	.notifier_call = mlxsw_sp_router_netevent_event,
 };
 
+static const struct pci_device_id mlxsw_sp_pci_id_table[] = {
+	{PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SPECTRUM), 0},
+	{0, },
+};
+
+static struct pci_driver mlxsw_sp_pci_driver = {
+	.name = mlxsw_sp_driver_name,
+	.id_table = mlxsw_sp_pci_id_table,
+};
+
 static int __init mlxsw_sp_module_init(void)
 {
 	int err;
@@ -4663,8 +4777,15 @@ static int __init mlxsw_sp_module_init(void)
 	err = mlxsw_core_driver_register(&mlxsw_sp_driver);
 	if (err)
 		goto err_core_driver_register;
+
+	err = mlxsw_pci_driver_register(&mlxsw_sp_pci_driver);
+	if (err)
+		goto err_pci_driver_register;
+
 	return 0;
 
+err_pci_driver_register:
+	mlxsw_core_driver_unregister(&mlxsw_sp_driver);
 err_core_driver_register:
 	unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb);
 	unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
@@ -4674,6 +4795,7 @@ err_core_driver_register:
 
 static void __exit mlxsw_sp_module_exit(void)
 {
+	mlxsw_pci_driver_unregister(&mlxsw_sp_pci_driver);
 	mlxsw_core_driver_unregister(&mlxsw_sp_driver);
 	unregister_netevent_notifier(&mlxsw_sp_router_netevent_nb);
 	unregister_inetaddr_notifier(&mlxsw_sp_inetaddr_nb);
@@ -4686,4 +4808,4 @@ module_exit(mlxsw_sp_module_exit);
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>");
 MODULE_DESCRIPTION("Mellanox Spectrum driver");
-MODULE_MLXSW_DRIVER_ALIAS(MLXSW_DEVICE_KIND_SPECTRUM);
+MODULE_DEVICE_TABLE(pci, mlxsw_sp_pci_id_table);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 97bbc1d21df8..cc1af19d699a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -316,7 +316,6 @@ struct mlxsw_sp_port_pcpu_stats {
 };
 
 struct mlxsw_sp_port {
-	struct mlxsw_core_port core_port; /* must be first */
 	struct net_device *dev;
 	struct mlxsw_sp_port_pcpu_stats __percpu *pcpu_stats;
 	struct mlxsw_sp *mlxsw_sp;
@@ -479,12 +478,9 @@ static inline struct mlxsw_sp_rif *
 mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp,
 			 const struct net_device *dev)
 {
-	struct mlxsw_resources *resources;
 	int i;
 
-	resources = mlxsw_core_resources_get(mlxsw_sp->core);
-
-	for (i = 0; i < resources->max_rif; i++)
+	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
 		if (mlxsw_sp->rifs[i] && mlxsw_sp->rifs[i]->dev == dev)
 			return mlxsw_sp->rifs[i];
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
index bcaed8a38037..a7468262f118 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
@@ -611,6 +611,9 @@ int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core,
 	u32 pool_size = MLXSW_SP_BYTES_TO_CELLS(size);
 	enum mlxsw_reg_sbpr_mode mode;
 
+	if (size > MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_BUFFER_SIZE))
+		return -EINVAL;
+
 	mode = (enum mlxsw_reg_sbpr_mode) threshold_type;
 	return mlxsw_sp_sb_pr_write(mlxsw_sp, pool, dir, mode, pool_size);
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index e83072da6272..53126bf68ea9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -382,12 +382,10 @@ static void mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp)
 
 static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp)
 {
-	struct mlxsw_resources *resources;
 	struct mlxsw_sp_vr *vr;
 	int i;
 
-	resources = mlxsw_core_resources_get(mlxsw_sp->core);
-	for (i = 0; i < resources->max_virtual_routers; i++) {
+	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
 		vr = &mlxsw_sp->router.vrs[i];
 		if (!vr->used)
 			return vr;
@@ -429,14 +427,12 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp,
 					    u32 tb_id,
 					    enum mlxsw_sp_l3proto proto)
 {
-	struct mlxsw_resources *resources;
 	struct mlxsw_sp_vr *vr;
 	int i;
 
 	tb_id = mlxsw_sp_fix_tb_id(tb_id);
 
-	resources = mlxsw_core_resources_get(mlxsw_sp->core);
-	for (i = 0; i < resources->max_virtual_routers; i++) {
+	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
 		vr = &mlxsw_sp->router.vrs[i];
 		if (vr->used && vr->proto == proto && vr->tb_id == tb_id)
 			return vr;
@@ -572,21 +568,20 @@ static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr)
 
 static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp)
 {
-	struct mlxsw_resources *resources;
 	struct mlxsw_sp_vr *vr;
+	u64 max_vrs;
 	int i;
 
-	resources = mlxsw_core_resources_get(mlxsw_sp->core);
-	if (!resources->max_virtual_routers_valid)
+	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_VRS))
 		return -EIO;
 
-	mlxsw_sp->router.vrs = kcalloc(resources->max_virtual_routers,
-				       sizeof(struct mlxsw_sp_vr),
+	max_vrs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS);
+	mlxsw_sp->router.vrs = kcalloc(max_vrs, sizeof(struct mlxsw_sp_vr),
 				       GFP_KERNEL);
 	if (!mlxsw_sp->router.vrs)
 		return -ENOMEM;
 
-	for (i = 0; i < resources->max_virtual_routers; i++) {
+	for (i = 0; i < max_vrs; i++) {
 		vr = &mlxsw_sp->router.vrs[i];
 		vr->id = i;
 	}
@@ -598,6 +593,14 @@ static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp);
 
 static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp)
 {
+	/* At this stage we're guaranteed not to have new incoming
+	 * FIB notifications and the work queue is free from FIBs
+	 * sitting on top of mlxsw netdevs. However, we can still
+	 * have other FIBs queued. Flush the queue before flushing
+	 * the device's tables. No need for locks, as we're the only
+	 * writer.
+	 */
+	mlxsw_core_flush_owq();
 	mlxsw_sp_router_fib_flush(mlxsw_sp);
 	kfree(mlxsw_sp->router.vrs);
 }
@@ -1872,14 +1875,12 @@ static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp)
 
 static void mlxsw_sp_router_fib_flush(struct mlxsw_sp *mlxsw_sp)
 {
-	struct mlxsw_resources *resources;
 	struct mlxsw_sp_fib_entry *fib_entry;
 	struct mlxsw_sp_fib_entry *tmp;
 	struct mlxsw_sp_vr *vr;
 	int i;
 
-	resources = mlxsw_core_resources_get(mlxsw_sp->core);
-	for (i = 0; i < resources->max_virtual_routers; i++) {
+	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_VRS); i++) {
 		vr = &mlxsw_sp->router.vrs[i];
 
 		if (!vr->used)
@@ -1903,6 +1904,9 @@ static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp)
 {
 	int err;
 
+	if (mlxsw_sp->router.aborted)
+		return;
+	dev_warn(mlxsw_sp->bus_info->dev, "FIB abort triggered. Note that FIB entries are no longer being offloaded to this device.\n");
 	mlxsw_sp_router_fib_flush(mlxsw_sp);
 	mlxsw_sp->router.aborted = true;
 	err = mlxsw_sp_router_set_abort_trap(mlxsw_sp);
@@ -1912,21 +1916,21 @@ static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp)
 
 static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
 {
-	struct mlxsw_resources *resources;
 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
+	u64 max_rifs;
 	int err;
 
-	resources = mlxsw_core_resources_get(mlxsw_sp->core);
-	if (!resources->max_rif_valid)
+	if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
 		return -EIO;
 
-	mlxsw_sp->rifs = kcalloc(resources->max_rif,
-				 sizeof(struct mlxsw_sp_rif *), GFP_KERNEL);
+	max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
+	mlxsw_sp->rifs = kcalloc(max_rifs, sizeof(struct mlxsw_sp_rif *),
+				 GFP_KERNEL);
 	if (!mlxsw_sp->rifs)
 		return -ENOMEM;
 
 	mlxsw_reg_rgcr_pack(rgcr_pl, true);
-	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, resources->max_rif);
+	mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
 	err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
 	if (err)
 		goto err_rgcr_fail;
@@ -1940,47 +1944,101 @@ err_rgcr_fail:
 
 static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp)
 {
-	struct mlxsw_resources *resources;
 	char rgcr_pl[MLXSW_REG_RGCR_LEN];
 	int i;
 
 	mlxsw_reg_rgcr_pack(rgcr_pl, false);
 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl);
 
-	resources = mlxsw_core_resources_get(mlxsw_sp->core);
-	for (i = 0; i < resources->max_rif; i++)
+	for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++)
 		WARN_ON_ONCE(mlxsw_sp->rifs[i]);
 
 	kfree(mlxsw_sp->rifs);
 }
 
-static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
-				     unsigned long event, void *ptr)
+struct mlxsw_sp_fib_event_work {
+	struct delayed_work dw;
+	struct fib_entry_notifier_info fen_info;
+	struct mlxsw_sp *mlxsw_sp;
+	unsigned long event;
+};
+
+static void mlxsw_sp_router_fib_event_work(struct work_struct *work)
 {
-	struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb);
-	struct fib_entry_notifier_info *fen_info = ptr;
+	struct mlxsw_sp_fib_event_work *fib_work =
+		container_of(work, struct mlxsw_sp_fib_event_work, dw.work);
+	struct mlxsw_sp *mlxsw_sp = fib_work->mlxsw_sp;
 	int err;
 
-	if (!net_eq(fen_info->info.net, &init_net))
-		return NOTIFY_DONE;
-
-	switch (event) {
+	/* Protect internal structures from changes */
+	rtnl_lock();
+	switch (fib_work->event) {
 	case FIB_EVENT_ENTRY_ADD:
-		err = mlxsw_sp_router_fib4_add(mlxsw_sp, fen_info);
+		err = mlxsw_sp_router_fib4_add(mlxsw_sp, &fib_work->fen_info);
 		if (err)
 			mlxsw_sp_router_fib4_abort(mlxsw_sp);
+		fib_info_put(fib_work->fen_info.fi);
 		break;
 	case FIB_EVENT_ENTRY_DEL:
-		mlxsw_sp_router_fib4_del(mlxsw_sp, fen_info);
+		mlxsw_sp_router_fib4_del(mlxsw_sp, &fib_work->fen_info);
+		fib_info_put(fib_work->fen_info.fi);
 		break;
 	case FIB_EVENT_RULE_ADD: /* fall through */
 	case FIB_EVENT_RULE_DEL:
 		mlxsw_sp_router_fib4_abort(mlxsw_sp);
 		break;
 	}
+	rtnl_unlock();
+	kfree(fib_work);
+}
+
+/* Called with rcu_read_lock() */
+static int mlxsw_sp_router_fib_event(struct notifier_block *nb,
+				     unsigned long event, void *ptr)
+{
+	struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb);
+	struct mlxsw_sp_fib_event_work *fib_work;
+	struct fib_notifier_info *info = ptr;
+
+	if (!net_eq(info->net, &init_net))
+		return NOTIFY_DONE;
+
+	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
+	if (WARN_ON(!fib_work))
+		return NOTIFY_BAD;
+
+	INIT_DELAYED_WORK(&fib_work->dw, mlxsw_sp_router_fib_event_work);
+	fib_work->mlxsw_sp = mlxsw_sp;
+	fib_work->event = event;
+
+	switch (event) {
+	case FIB_EVENT_ENTRY_ADD: /* fall through */
+	case FIB_EVENT_ENTRY_DEL:
+		memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info));
+		/* Take referece on fib_info to prevent it from being
+		 * freed while work is queued. Release it afterwards.
+		 */
+		fib_info_hold(fib_work->fen_info.fi);
+		break;
+	}
+
+	mlxsw_core_schedule_odw(&fib_work->dw, 0);
+
 	return NOTIFY_DONE;
 }
 
+static void mlxsw_sp_router_fib_dump_flush(struct notifier_block *nb)
+{
+	struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb);
+
+	/* Flush pending FIB notifications and then flush the device's
+	 * table before requesting another dump. The FIB notification
+	 * block is unregistered, so no need to take RTNL.
+	 */
+	mlxsw_core_flush_owq();
+	mlxsw_sp_router_fib_flush(mlxsw_sp);
+}
+
 int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
 {
 	int err;
@@ -1996,14 +2054,20 @@ int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
 	if (err)
 		goto err_vrs_init;
 
-	err =  mlxsw_sp_neigh_init(mlxsw_sp);
+	err = mlxsw_sp_neigh_init(mlxsw_sp);
 	if (err)
 		goto err_neigh_init;
 
 	mlxsw_sp->fib_nb.notifier_call = mlxsw_sp_router_fib_event;
-	register_fib_notifier(&mlxsw_sp->fib_nb);
+	err = register_fib_notifier(&mlxsw_sp->fib_nb,
+				    mlxsw_sp_router_fib_dump_flush);
+	if (err)
+		goto err_register_fib_notifier;
+
 	return 0;
 
+err_register_fib_notifier:
+	mlxsw_sp_neigh_fini(mlxsw_sp);
 err_neigh_init:
 	mlxsw_sp_vrs_fini(mlxsw_sp);
 err_vrs_init:
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
index 1e2c8eca3af1..b87ba7d36bc4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c
@@ -1196,11 +1196,12 @@ static struct mlxsw_sp_port *mlxsw_sp_lag_rep_port(struct mlxsw_sp *mlxsw_sp,
 						   u16 lag_id)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port;
-	struct mlxsw_resources *resources;
+	u64 max_lag_members;
 	int i;
 
-	resources = mlxsw_core_resources_get(mlxsw_sp->core);
-	for (i = 0; i < resources->max_ports_in_lag; i++) {
+	max_lag_members = MLXSW_CORE_RES_GET(mlxsw_sp->core,
+					     MAX_LAG_MEMBERS);
+	for (i = 0; i < max_lag_members; i++) {
 		mlxsw_sp_port = mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i);
 		if (mlxsw_sp_port)
 			return mlxsw_sp_port;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchib.c b/drivers/net/ethernet/mellanox/mlxsw/switchib.c
new file mode 100644
index 000000000000..74341fe0eb25
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/switchib.c
@@ -0,0 +1,605 @@
+/*
+ * drivers/net/ethernet/mellanox/mlxsw/switchib.c
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Elad Raz <eladr@mellanox.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/slab.h>
+#include <linux/device.h>
+#include <linux/skbuff.h>
+#include <linux/if_vlan.h>
+#include <net/switchdev.h>
+
+#include "pci.h"
+#include "core.h"
+#include "reg.h"
+#include "port.h"
+#include "trap.h"
+#include "txheader.h"
+#include "ib.h"
+
+static const char mlxsw_sib_driver_name[] = "mlxsw_switchib";
+static const char mlxsw_sib2_driver_name[] = "mlxsw_switchib2";
+
+struct mlxsw_sib_port;
+
+struct mlxsw_sib {
+	struct mlxsw_sib_port **ports;
+	struct mlxsw_core *core;
+	const struct mlxsw_bus_info *bus_info;
+};
+
+struct mlxsw_sib_port {
+	struct mlxsw_sib *mlxsw_sib;
+	u8 local_port;
+	struct {
+		u8 module;
+	} mapping;
+};
+
+/* tx_v1_hdr_version
+ * Tx header version.
+ * Must be set to 1.
+ */
+MLXSW_ITEM32(tx_v1, hdr, version, 0x00, 28, 4);
+
+/* tx_v1_hdr_ctl
+ * Packet control type.
+ * 0 - Ethernet control (e.g. EMADs, LACP)
+ * 1 - Ethernet data
+ */
+MLXSW_ITEM32(tx_v1, hdr, ctl, 0x00, 26, 2);
+
+/* tx_v1_hdr_proto
+ * Packet protocol type. Must be set to 1 (Ethernet).
+ */
+MLXSW_ITEM32(tx_v1, hdr, proto, 0x00, 21, 3);
+
+/* tx_v1_hdr_swid
+ * Switch partition ID. Must be set to 0.
+ */
+MLXSW_ITEM32(tx_v1, hdr, swid, 0x00, 12, 3);
+
+/* tx_v1_hdr_control_tclass
+ * Indicates if the packet should use the control TClass and not one
+ * of the data TClasses.
+ */
+MLXSW_ITEM32(tx_v1, hdr, control_tclass, 0x00, 6, 1);
+
+/* tx_v1_hdr_port_mid
+ * Destination local port for unicast packets.
+ * Destination multicast ID for multicast packets.
+ *
+ * Control packets are directed to a specific egress port, while data
+ * packets are transmitted through the CPU port (0) into the switch partition,
+ * where forwarding rules are applied.
+ */
+MLXSW_ITEM32(tx_v1, hdr, port_mid, 0x04, 16, 16);
+
+/* tx_v1_hdr_type
+ * 0 - Data packets
+ * 6 - Control packets
+ */
+MLXSW_ITEM32(tx_v1, hdr, type, 0x0C, 0, 4);
+
+static void
+mlxsw_sib_tx_v1_hdr_construct(struct sk_buff *skb,
+			      const struct mlxsw_tx_info *tx_info)
+{
+	char *txhdr = skb_push(skb, MLXSW_TXHDR_LEN);
+
+	memset(txhdr, 0, MLXSW_TXHDR_LEN);
+
+	mlxsw_tx_v1_hdr_version_set(txhdr, MLXSW_TXHDR_VERSION_1);
+	mlxsw_tx_v1_hdr_ctl_set(txhdr, MLXSW_TXHDR_ETH_CTL);
+	mlxsw_tx_v1_hdr_proto_set(txhdr, MLXSW_TXHDR_PROTO_ETH);
+	mlxsw_tx_v1_hdr_swid_set(txhdr, 0);
+	mlxsw_tx_v1_hdr_control_tclass_set(txhdr, 1);
+	mlxsw_tx_v1_hdr_port_mid_set(txhdr, tx_info->local_port);
+	mlxsw_tx_v1_hdr_type_set(txhdr, MLXSW_TXHDR_TYPE_CONTROL);
+}
+
+static int
+mlxsw_sib_port_admin_status_set(struct mlxsw_sib_port *mlxsw_sib_port,
+				bool is_up)
+{
+	struct mlxsw_sib *mlxsw_sib = mlxsw_sib_port->mlxsw_sib;
+	char paos_pl[MLXSW_REG_PAOS_LEN];
+
+	mlxsw_reg_paos_pack(paos_pl, mlxsw_sib_port->local_port,
+			    is_up ? MLXSW_PORT_ADMIN_STATUS_UP :
+			    MLXSW_PORT_ADMIN_STATUS_DOWN);
+	return mlxsw_reg_write(mlxsw_sib->core, MLXSW_REG(paos), paos_pl);
+}
+
+static int mlxsw_sib_port_mtu_set(struct mlxsw_sib_port *mlxsw_sib_port,
+				  u16 mtu)
+{
+	struct mlxsw_sib *mlxsw_sib = mlxsw_sib_port->mlxsw_sib;
+	char pmtu_pl[MLXSW_REG_PMTU_LEN];
+	int max_mtu;
+	int err;
+
+	mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sib_port->local_port, 0);
+	err = mlxsw_reg_query(mlxsw_sib->core, MLXSW_REG(pmtu), pmtu_pl);
+	if (err)
+		return err;
+	max_mtu = mlxsw_reg_pmtu_max_mtu_get(pmtu_pl);
+
+	if (mtu > max_mtu)
+		return -EINVAL;
+
+	mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sib_port->local_port, mtu);
+	return mlxsw_reg_write(mlxsw_sib->core, MLXSW_REG(pmtu), pmtu_pl);
+}
+
+static int mlxsw_sib_port_set(struct mlxsw_sib_port *mlxsw_sib_port, u8 port)
+{
+	struct mlxsw_sib *mlxsw_sib = mlxsw_sib_port->mlxsw_sib;
+	char plib_pl[MLXSW_REG_PLIB_LEN] = {0};
+	int err;
+
+	mlxsw_reg_plib_local_port_set(plib_pl, mlxsw_sib_port->local_port);
+	mlxsw_reg_plib_ib_port_set(plib_pl, port);
+	err = mlxsw_reg_write(mlxsw_sib->core, MLXSW_REG(plib), plib_pl);
+	return err;
+}
+
+static int mlxsw_sib_port_swid_set(struct mlxsw_sib_port *mlxsw_sib_port,
+				   u8 swid)
+{
+	struct mlxsw_sib *mlxsw_sib = mlxsw_sib_port->mlxsw_sib;
+	char pspa_pl[MLXSW_REG_PSPA_LEN];
+
+	mlxsw_reg_pspa_pack(pspa_pl, swid, mlxsw_sib_port->local_port);
+	return mlxsw_reg_write(mlxsw_sib->core, MLXSW_REG(pspa), pspa_pl);
+}
+
+static int mlxsw_sib_port_module_info_get(struct mlxsw_sib *mlxsw_sib,
+					  u8 local_port, u8 *p_module,
+					  u8 *p_width)
+{
+	char pmlp_pl[MLXSW_REG_PMLP_LEN];
+	int err;
+
+	mlxsw_reg_pmlp_pack(pmlp_pl, local_port);
+	err = mlxsw_reg_query(mlxsw_sib->core, MLXSW_REG(pmlp), pmlp_pl);
+	if (err)
+		return err;
+	*p_module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0);
+	*p_width = mlxsw_reg_pmlp_width_get(pmlp_pl);
+	return 0;
+}
+
+static int mlxsw_sib_port_speed_set(struct mlxsw_sib_port *mlxsw_sib_port,
+				    u16 speed, u16 width)
+{
+	struct mlxsw_sib *mlxsw_sib = mlxsw_sib_port->mlxsw_sib;
+	char ptys_pl[MLXSW_REG_PTYS_LEN];
+
+	mlxsw_reg_ptys_ib_pack(ptys_pl, mlxsw_sib_port->local_port, speed,
+			       width);
+	return mlxsw_reg_write(mlxsw_sib->core, MLXSW_REG(ptys), ptys_pl);
+}
+
+static bool mlxsw_sib_port_created(struct mlxsw_sib *mlxsw_sib, u8 local_port)
+{
+	return mlxsw_sib->ports[local_port] != NULL;
+}
+
+static int __mlxsw_sib_port_create(struct mlxsw_sib *mlxsw_sib, u8 local_port,
+				   u8 module, u8 width)
+{
+	struct mlxsw_sib_port *mlxsw_sib_port;
+	int err;
+
+	mlxsw_sib_port = kzalloc(sizeof(*mlxsw_sib_port), GFP_KERNEL);
+	if (!mlxsw_sib_port)
+		return -ENOMEM;
+	mlxsw_sib_port->mlxsw_sib = mlxsw_sib;
+	mlxsw_sib_port->local_port = local_port;
+	mlxsw_sib_port->mapping.module = module;
+
+	err = mlxsw_sib_port_swid_set(mlxsw_sib_port, 0);
+	if (err) {
+		dev_err(mlxsw_sib->bus_info->dev, "Port %d: Failed to set SWID\n",
+			mlxsw_sib_port->local_port);
+		goto err_port_swid_set;
+	}
+
+	/* Expose the IB port number as it's front panel name */
+	err = mlxsw_sib_port_set(mlxsw_sib_port, module + 1);
+	if (err) {
+		dev_err(mlxsw_sib->bus_info->dev, "Port %d: Failed to set IB port\n",
+			mlxsw_sib_port->local_port);
+		goto err_port_ib_set;
+	}
+
+	/* Supports all speeds from SDR to FDR (bitmask) and support bus width
+	 * of 1x, 2x and 4x (3 bits bitmask)
+	 */
+	err = mlxsw_sib_port_speed_set(mlxsw_sib_port,
+				       MLXSW_REG_PTYS_IB_SPEED_EDR - 1,
+				       BIT(3) - 1);
+	if (err) {
+		dev_err(mlxsw_sib->bus_info->dev, "Port %d: Failed to set speed\n",
+			mlxsw_sib_port->local_port);
+		goto err_port_speed_set;
+	}
+
+	/* Change to the maximum MTU the device supports, the SMA will take
+	 * care of the active MTU
+	 */
+	err = mlxsw_sib_port_mtu_set(mlxsw_sib_port, MLXSW_IB_DEFAULT_MTU);
+	if (err) {
+		dev_err(mlxsw_sib->bus_info->dev, "Port %d: Failed to set MTU\n",
+			mlxsw_sib_port->local_port);
+		goto err_port_mtu_set;
+	}
+
+	err = mlxsw_sib_port_admin_status_set(mlxsw_sib_port, true);
+	if (err) {
+		dev_err(mlxsw_sib->bus_info->dev, "Port %d: Failed to change admin state to UP\n",
+			mlxsw_sib_port->local_port);
+		goto err_port_admin_set;
+	}
+
+	mlxsw_core_port_ib_set(mlxsw_sib->core, mlxsw_sib_port->local_port,
+			       mlxsw_sib_port);
+	mlxsw_sib->ports[local_port] = mlxsw_sib_port;
+	return 0;
+
+err_port_admin_set:
+err_port_mtu_set:
+err_port_speed_set:
+err_port_ib_set:
+	mlxsw_sib_port_swid_set(mlxsw_sib_port, MLXSW_PORT_SWID_DISABLED_PORT);
+err_port_swid_set:
+	kfree(mlxsw_sib_port);
+	return err;
+}
+
+static int mlxsw_sib_port_create(struct mlxsw_sib *mlxsw_sib, u8 local_port,
+				 u8 module, u8 width)
+{
+	int err;
+
+	err = mlxsw_core_port_init(mlxsw_sib->core, local_port);
+	if (err) {
+		dev_err(mlxsw_sib->bus_info->dev, "Port %d: Failed to init core port\n",
+			local_port);
+		return err;
+	}
+	err = __mlxsw_sib_port_create(mlxsw_sib, local_port, module, width);
+	if (err)
+		goto err_port_create;
+
+	return 0;
+
+err_port_create:
+	mlxsw_core_port_fini(mlxsw_sib->core, local_port);
+	return err;
+}
+
+static void __mlxsw_sib_port_remove(struct mlxsw_sib *mlxsw_sib, u8 local_port)
+{
+	struct mlxsw_sib_port *mlxsw_sib_port = mlxsw_sib->ports[local_port];
+
+	mlxsw_core_port_clear(mlxsw_sib->core, local_port, mlxsw_sib);
+	mlxsw_sib->ports[local_port] = NULL;
+	mlxsw_sib_port_admin_status_set(mlxsw_sib_port, false);
+	mlxsw_sib_port_swid_set(mlxsw_sib_port, MLXSW_PORT_SWID_DISABLED_PORT);
+	kfree(mlxsw_sib_port);
+}
+
+static void mlxsw_sib_port_remove(struct mlxsw_sib *mlxsw_sib, u8 local_port)
+{
+	__mlxsw_sib_port_remove(mlxsw_sib, local_port);
+	mlxsw_core_port_fini(mlxsw_sib->core, local_port);
+}
+
+static void mlxsw_sib_ports_remove(struct mlxsw_sib *mlxsw_sib)
+{
+	int i;
+
+	for (i = 1; i < MLXSW_PORT_MAX_IB_PORTS; i++)
+		if (mlxsw_sib_port_created(mlxsw_sib, i))
+			mlxsw_sib_port_remove(mlxsw_sib, i);
+	kfree(mlxsw_sib->ports);
+}
+
+static int mlxsw_sib_ports_create(struct mlxsw_sib *mlxsw_sib)
+{
+	size_t alloc_size;
+	u8 module, width;
+	int i;
+	int err;
+
+	alloc_size = sizeof(struct mlxsw_sib_port *) * MLXSW_PORT_MAX_IB_PORTS;
+	mlxsw_sib->ports = kzalloc(alloc_size, GFP_KERNEL);
+	if (!mlxsw_sib->ports)
+		return -ENOMEM;
+
+	for (i = 1; i < MLXSW_PORT_MAX_IB_PORTS; i++) {
+		err = mlxsw_sib_port_module_info_get(mlxsw_sib, i, &module,
+						     &width);
+		if (err)
+			goto err_port_module_info_get;
+		if (!width)
+			continue;
+		err = mlxsw_sib_port_create(mlxsw_sib, i, module, width);
+		if (err)
+			goto err_port_create;
+	}
+	return 0;
+
+err_port_create:
+err_port_module_info_get:
+	for (i--; i >= 1; i--)
+		if (mlxsw_sib_port_created(mlxsw_sib, i))
+			mlxsw_sib_port_remove(mlxsw_sib, i);
+	kfree(mlxsw_sib->ports);
+	return err;
+}
+
+static void
+mlxsw_sib_pude_ib_event_func(struct mlxsw_sib_port *mlxsw_sib_port,
+			     enum mlxsw_reg_pude_oper_status status)
+{
+	if (status == MLXSW_PORT_OPER_STATUS_UP)
+		pr_info("ib link for port %d - up\n",
+			mlxsw_sib_port->mapping.module + 1);
+	else
+		pr_info("ib link for port %d - down\n",
+			mlxsw_sib_port->mapping.module + 1);
+}
+
+static void mlxsw_sib_pude_event_func(const struct mlxsw_reg_info *reg,
+				      char *pude_pl, void *priv)
+{
+	struct mlxsw_sib *mlxsw_sib = priv;
+	struct mlxsw_sib_port *mlxsw_sib_port;
+	enum mlxsw_reg_pude_oper_status status;
+	u8 local_port;
+
+	local_port = mlxsw_reg_pude_local_port_get(pude_pl);
+	mlxsw_sib_port = mlxsw_sib->ports[local_port];
+	if (!mlxsw_sib_port) {
+		dev_warn(mlxsw_sib->bus_info->dev, "Port %d: Link event received for non-existent port\n",
+			 local_port);
+		return;
+	}
+
+	status = mlxsw_reg_pude_oper_status_get(pude_pl);
+	mlxsw_sib_pude_ib_event_func(mlxsw_sib_port, status);
+}
+
+static const struct mlxsw_listener mlxsw_sib_listener[] = {
+	MLXSW_EVENTL(mlxsw_sib_pude_event_func, PUDE, EMAD),
+};
+
+static int mlxsw_sib_taps_init(struct mlxsw_sib *mlxsw_sib)
+{
+	int i;
+	int err;
+
+	for (i = 0; i < ARRAY_SIZE(mlxsw_sib_listener); i++) {
+		err = mlxsw_core_trap_register(mlxsw_sib->core,
+					       &mlxsw_sib_listener[i],
+					       mlxsw_sib);
+		if (err)
+			goto err_rx_listener_register;
+	}
+
+	return 0;
+
+err_rx_listener_register:
+	for (i--; i >= 0; i--) {
+		mlxsw_core_trap_unregister(mlxsw_sib->core,
+					   &mlxsw_sib_listener[i],
+					   mlxsw_sib);
+	}
+
+	return err;
+}
+
+static void mlxsw_sib_traps_fini(struct mlxsw_sib *mlxsw_sib)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(mlxsw_sib_listener); i++) {
+		mlxsw_core_trap_unregister(mlxsw_sib->core,
+					   &mlxsw_sib_listener[i], mlxsw_sib);
+	}
+}
+
+static int mlxsw_sib_basic_trap_groups_set(struct mlxsw_core *mlxsw_core)
+{
+	char htgt_pl[MLXSW_REG_HTGT_LEN];
+
+	mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_EMAD,
+			    MLXSW_REG_HTGT_INVALID_POLICER,
+			    MLXSW_REG_HTGT_DEFAULT_PRIORITY,
+			    MLXSW_REG_HTGT_DEFAULT_TC);
+	mlxsw_reg_htgt_swid_set(htgt_pl, MLXSW_PORT_SWID_ALL_SWIDS);
+	mlxsw_reg_htgt_local_path_rdq_set(htgt_pl,
+					MLXSW_REG_HTGT_LOCAL_PATH_RDQ_SIB_EMAD);
+	return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
+}
+
+static int mlxsw_sib_init(struct mlxsw_core *mlxsw_core,
+			  const struct mlxsw_bus_info *mlxsw_bus_info)
+{
+	struct mlxsw_sib *mlxsw_sib = mlxsw_core_driver_priv(mlxsw_core);
+	int err;
+
+	mlxsw_sib->core = mlxsw_core;
+	mlxsw_sib->bus_info = mlxsw_bus_info;
+
+	err = mlxsw_sib_ports_create(mlxsw_sib);
+	if (err) {
+		dev_err(mlxsw_sib->bus_info->dev, "Failed to create ports\n");
+		return err;
+	}
+
+	err = mlxsw_sib_taps_init(mlxsw_sib);
+	if (err) {
+		dev_err(mlxsw_sib->bus_info->dev, "Failed to set traps\n");
+		goto err_traps_init_err;
+	}
+
+	return 0;
+
+err_traps_init_err:
+	mlxsw_sib_ports_remove(mlxsw_sib);
+	return err;
+}
+
+static void mlxsw_sib_fini(struct mlxsw_core *mlxsw_core)
+{
+	struct mlxsw_sib *mlxsw_sib = mlxsw_core_driver_priv(mlxsw_core);
+
+	mlxsw_sib_traps_fini(mlxsw_sib);
+	mlxsw_sib_ports_remove(mlxsw_sib);
+}
+
+static struct mlxsw_config_profile mlxsw_sib_config_profile = {
+	.used_max_system_port		= 1,
+	.max_system_port		= 48000,
+	.used_max_ib_mc			= 1,
+	.max_ib_mc			= 27,
+	.used_max_pkey			= 1,
+	.max_pkey			= 32,
+	.swid_config			= {
+		{
+			.used_type	= 1,
+			.type		= MLXSW_PORT_SWID_TYPE_IB,
+		}
+	},
+	.resource_query_enable		= 0,
+};
+
+static struct mlxsw_driver mlxsw_sib_driver = {
+	.kind			= mlxsw_sib_driver_name,
+	.priv_size		= sizeof(struct mlxsw_sib),
+	.init			= mlxsw_sib_init,
+	.fini			= mlxsw_sib_fini,
+	.basic_trap_groups_set	= mlxsw_sib_basic_trap_groups_set,
+	.txhdr_construct	= mlxsw_sib_tx_v1_hdr_construct,
+	.txhdr_len		= MLXSW_TXHDR_LEN,
+	.profile		= &mlxsw_sib_config_profile,
+};
+
+static struct mlxsw_driver mlxsw_sib2_driver = {
+	.kind			= mlxsw_sib2_driver_name,
+	.priv_size		= sizeof(struct mlxsw_sib),
+	.init			= mlxsw_sib_init,
+	.fini			= mlxsw_sib_fini,
+	.basic_trap_groups_set	= mlxsw_sib_basic_trap_groups_set,
+	.txhdr_construct	= mlxsw_sib_tx_v1_hdr_construct,
+	.txhdr_len		= MLXSW_TXHDR_LEN,
+	.profile		= &mlxsw_sib_config_profile,
+};
+
+static const struct pci_device_id mlxsw_sib_pci_id_table[] = {
+	{PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SWITCHIB), 0},
+	{0, },
+};
+
+static struct pci_driver mlxsw_sib_pci_driver = {
+	.name = mlxsw_sib_driver_name,
+	.id_table = mlxsw_sib_pci_id_table,
+};
+
+static const struct pci_device_id mlxsw_sib2_pci_id_table[] = {
+	{PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SWITCHIB2), 0},
+	{0, },
+};
+
+static struct pci_driver mlxsw_sib2_pci_driver = {
+	.name = mlxsw_sib2_driver_name,
+	.id_table = mlxsw_sib2_pci_id_table,
+};
+
+static int __init mlxsw_sib_module_init(void)
+{
+	int err;
+
+	err = mlxsw_core_driver_register(&mlxsw_sib_driver);
+	if (err)
+		return err;
+
+	err = mlxsw_core_driver_register(&mlxsw_sib2_driver);
+	if (err)
+		goto err_sib2_driver_register;
+
+	err = mlxsw_pci_driver_register(&mlxsw_sib_pci_driver);
+	if (err)
+		goto err_sib_pci_driver_register;
+
+	err = mlxsw_pci_driver_register(&mlxsw_sib2_pci_driver);
+	if (err)
+		goto err_sib2_pci_driver_register;
+
+	return 0;
+
+err_sib2_pci_driver_register:
+	mlxsw_pci_driver_unregister(&mlxsw_sib_pci_driver);
+err_sib_pci_driver_register:
+	mlxsw_core_driver_unregister(&mlxsw_sib2_driver);
+err_sib2_driver_register:
+	mlxsw_core_driver_unregister(&mlxsw_sib_driver);
+	return err;
+}
+
+static void __exit mlxsw_sib_module_exit(void)
+{
+	mlxsw_pci_driver_unregister(&mlxsw_sib2_pci_driver);
+	mlxsw_pci_driver_unregister(&mlxsw_sib_pci_driver);
+	mlxsw_core_driver_unregister(&mlxsw_sib2_driver);
+	mlxsw_core_driver_unregister(&mlxsw_sib_driver);
+}
+
+module_init(mlxsw_sib_module_init);
+module_exit(mlxsw_sib_module_exit);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_AUTHOR("Elad Raz <eladr@@mellanox.com>");
+MODULE_DESCRIPTION("Mellanox SwitchIB and SwitchIB-2 driver");
+MODULE_ALIAS("mlxsw_switchib2");
+MODULE_DEVICE_TABLE(pci, mlxsw_sib_pci_id_table);
+MODULE_DEVICE_TABLE(pci, mlxsw_sib2_pci_id_table);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
index 92bda8703f87..150ccf5192a9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
@@ -3,7 +3,7 @@
  * Copyright (c) 2015 Mellanox Technologies. All rights reserved.
  * Copyright (c) 2015 Jiri Pirko <jiri@mellanox.com>
  * Copyright (c) 2015 Ido Schimmel <idosch@mellanox.com>
- * Copyright (c) 2015 Elad Raz <eladr@mellanox.com>
+ * Copyright (c) 2015-2016 Elad Raz <eladr@mellanox.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
@@ -37,6 +37,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/types.h>
+#include <linux/pci.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/slab.h>
@@ -44,13 +45,14 @@
 #include <linux/skbuff.h>
 #include <linux/if_vlan.h>
 #include <net/switchdev.h>
-#include <generated/utsrelease.h>
 
+#include "pci.h"
 #include "core.h"
 #include "reg.h"
 #include "port.h"
 #include "trap.h"
 #include "txheader.h"
+#include "ib.h"
 
 static const char mlxsw_sx_driver_name[] = "mlxsw_switchx2";
 static const char mlxsw_sx_driver_version[] = "1.0";
@@ -74,11 +76,13 @@ struct mlxsw_sx_port_pcpu_stats {
 };
 
 struct mlxsw_sx_port {
-	struct mlxsw_core_port core_port; /* must be first */
 	struct net_device *dev;
 	struct mlxsw_sx_port_pcpu_stats __percpu *pcpu_stats;
 	struct mlxsw_sx *mlxsw_sx;
 	u8 local_port;
+	struct {
+		u8 module;
+	} mapping;
 };
 
 /* tx_hdr_version
@@ -214,14 +218,14 @@ static int mlxsw_sx_port_oper_status_get(struct mlxsw_sx_port *mlxsw_sx_port,
 	return 0;
 }
 
-static int mlxsw_sx_port_mtu_set(struct mlxsw_sx_port *mlxsw_sx_port, u16 mtu)
+static int __mlxsw_sx_port_mtu_set(struct mlxsw_sx_port *mlxsw_sx_port,
+				   u16 mtu)
 {
 	struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
 	char pmtu_pl[MLXSW_REG_PMTU_LEN];
 	int max_mtu;
 	int err;
 
-	mtu += MLXSW_TXHDR_LEN + ETH_HLEN;
 	mlxsw_reg_pmtu_pack(pmtu_pl, mlxsw_sx_port->local_port, 0);
 	err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(pmtu), pmtu_pl);
 	if (err)
@@ -235,6 +239,32 @@ static int mlxsw_sx_port_mtu_set(struct mlxsw_sx_port *mlxsw_sx_port, u16 mtu)
 	return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(pmtu), pmtu_pl);
 }
 
+static int mlxsw_sx_port_mtu_eth_set(struct mlxsw_sx_port *mlxsw_sx_port,
+				     u16 mtu)
+{
+	mtu += MLXSW_TXHDR_LEN + ETH_HLEN;
+	return __mlxsw_sx_port_mtu_set(mlxsw_sx_port, mtu);
+}
+
+static int mlxsw_sx_port_mtu_ib_set(struct mlxsw_sx_port *mlxsw_sx_port,
+				    u16 mtu)
+{
+	return __mlxsw_sx_port_mtu_set(mlxsw_sx_port, mtu);
+}
+
+static int mlxsw_sx_port_ib_port_set(struct mlxsw_sx_port *mlxsw_sx_port,
+				     u8 ib_port)
+{
+	struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
+	char plib_pl[MLXSW_REG_PLIB_LEN] = {0};
+	int err;
+
+	mlxsw_reg_plib_local_port_set(plib_pl, mlxsw_sx_port->local_port);
+	mlxsw_reg_plib_ib_port_set(plib_pl, ib_port);
+	err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(plib), plib_pl);
+	return err;
+}
+
 static int mlxsw_sx_port_swid_set(struct mlxsw_sx_port *mlxsw_sx_port, u8 swid)
 {
 	struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
@@ -254,18 +284,19 @@ mlxsw_sx_port_system_port_mapping_set(struct mlxsw_sx_port *mlxsw_sx_port)
 	return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(sspr), sspr_pl);
 }
 
-static int mlxsw_sx_port_module_check(struct mlxsw_sx_port *mlxsw_sx_port,
-				      bool *p_usable)
+static int mlxsw_sx_port_module_info_get(struct mlxsw_sx *mlxsw_sx,
+					 u8 local_port, u8 *p_module,
+					 u8 *p_width)
 {
-	struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
 	char pmlp_pl[MLXSW_REG_PMLP_LEN];
 	int err;
 
-	mlxsw_reg_pmlp_pack(pmlp_pl, mlxsw_sx_port->local_port);
+	mlxsw_reg_pmlp_pack(pmlp_pl, local_port);
 	err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(pmlp), pmlp_pl);
 	if (err)
 		return err;
-	*p_usable = mlxsw_reg_pmlp_width_get(pmlp_pl) ? true : false;
+	*p_module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0);
+	*p_width = mlxsw_reg_pmlp_width_get(pmlp_pl);
 	return 0;
 }
 
@@ -343,7 +374,7 @@ static int mlxsw_sx_port_change_mtu(struct net_device *dev, int mtu)
 	struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
 	int err;
 
-	err = mlxsw_sx_port_mtu_set(mlxsw_sx_port, mtu);
+	err = mlxsw_sx_port_mtu_eth_set(mlxsw_sx_port, mtu);
 	if (err)
 		return err;
 	dev->mtu = mtu;
@@ -382,12 +413,26 @@ mlxsw_sx_port_get_stats64(struct net_device *dev,
 	return stats;
 }
 
+static int mlxsw_sx_port_get_phys_port_name(struct net_device *dev, char *name,
+					    size_t len)
+{
+	struct mlxsw_sx_port *mlxsw_sx_port = netdev_priv(dev);
+	int err;
+
+	err = snprintf(name, len, "p%d", mlxsw_sx_port->mapping.module + 1);
+	if (err >= len)
+		return -EINVAL;
+
+	return 0;
+}
+
 static const struct net_device_ops mlxsw_sx_port_netdev_ops = {
 	.ndo_open		= mlxsw_sx_port_open,
 	.ndo_stop		= mlxsw_sx_port_stop,
 	.ndo_start_xmit		= mlxsw_sx_port_xmit,
 	.ndo_change_mtu		= mlxsw_sx_port_change_mtu,
 	.ndo_get_stats64	= mlxsw_sx_port_get_stats64,
+	.ndo_get_phys_port_name = mlxsw_sx_port_get_phys_port_name,
 };
 
 static void mlxsw_sx_port_get_drvinfo(struct net_device *dev,
@@ -410,7 +455,7 @@ static void mlxsw_sx_port_get_drvinfo(struct net_device *dev,
 
 struct mlxsw_sx_port_hw_stats {
 	char str[ETH_GSTRING_LEN];
-	u64 (*getter)(char *payload);
+	u64 (*getter)(const char *payload);
 };
 
 static const struct mlxsw_sx_port_hw_stats mlxsw_sx_port_hw_stats[] = {
@@ -642,6 +687,7 @@ static const struct mlxsw_sx_port_link_mode mlxsw_sx_port_link_mode[] = {
 };
 
 #define MLXSW_SX_PORT_LINK_MODE_LEN ARRAY_SIZE(mlxsw_sx_port_link_mode)
+#define MLXSW_SX_PORT_BASE_SPEED 10000 /* Mb/s */
 
 static u32 mlxsw_sx_from_ptys_supported_port(u32 ptys_eth_proto)
 {
@@ -741,14 +787,14 @@ static int mlxsw_sx_port_get_settings(struct net_device *dev,
 	u32 eth_proto_oper;
 	int err;
 
-	mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sx_port->local_port, 0);
+	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port, 0);
 	err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
 	if (err) {
 		netdev_err(dev, "Failed to get proto");
 		return err;
 	}
-	mlxsw_reg_ptys_unpack(ptys_pl, &eth_proto_cap,
-			      &eth_proto_admin, &eth_proto_oper);
+	mlxsw_reg_ptys_eth_unpack(ptys_pl, &eth_proto_cap,
+				  &eth_proto_admin, &eth_proto_oper);
 
 	cmd->supported = mlxsw_sx_from_ptys_supported_port(eth_proto_cap) |
 			 mlxsw_sx_from_ptys_supported_link(eth_proto_cap) |
@@ -789,6 +835,18 @@ static u32 mlxsw_sx_to_ptys_speed(u32 speed)
 	return ptys_proto;
 }
 
+static u32 mlxsw_sx_to_ptys_upper_speed(u32 upper_speed)
+{
+	u32 ptys_proto = 0;
+	int i;
+
+	for (i = 0; i < MLXSW_SX_PORT_LINK_MODE_LEN; i++) {
+		if (mlxsw_sx_port_link_mode[i].speed <= upper_speed)
+			ptys_proto |= mlxsw_sx_port_link_mode[i].mask;
+	}
+	return ptys_proto;
+}
+
 static int mlxsw_sx_port_set_settings(struct net_device *dev,
 				      struct ethtool_cmd *cmd)
 {
@@ -808,13 +866,14 @@ static int mlxsw_sx_port_set_settings(struct net_device *dev,
 		mlxsw_sx_to_ptys_advert_link(cmd->advertising) :
 		mlxsw_sx_to_ptys_speed(speed);
 
-	mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sx_port->local_port, 0);
+	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port, 0);
 	err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
 	if (err) {
 		netdev_err(dev, "Failed to get proto");
 		return err;
 	}
-	mlxsw_reg_ptys_unpack(ptys_pl, &eth_proto_cap, &eth_proto_admin, NULL);
+	mlxsw_reg_ptys_eth_unpack(ptys_pl, &eth_proto_cap, &eth_proto_admin,
+				  NULL);
 
 	eth_proto_new = eth_proto_new & eth_proto_cap;
 	if (!eth_proto_new) {
@@ -824,7 +883,8 @@ static int mlxsw_sx_port_set_settings(struct net_device *dev,
 	if (eth_proto_new == eth_proto_admin)
 		return 0;
 
-	mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sx_port->local_port, eth_proto_new);
+	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port,
+				eth_proto_new);
 	err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
 	if (err) {
 		netdev_err(dev, "Failed to set proto admin");
@@ -888,7 +948,7 @@ static const struct switchdev_ops mlxsw_sx_port_switchdev_ops = {
 
 static int mlxsw_sx_hw_id_get(struct mlxsw_sx *mlxsw_sx)
 {
-	char spad_pl[MLXSW_REG_SPAD_LEN];
+	char spad_pl[MLXSW_REG_SPAD_LEN] = {0};
 	int err;
 
 	err = mlxsw_reg_query(mlxsw_sx->core, MLXSW_REG(spad), spad_pl);
@@ -935,13 +995,28 @@ static int mlxsw_sx_port_stp_state_set(struct mlxsw_sx_port *mlxsw_sx_port,
 	return err;
 }
 
-static int mlxsw_sx_port_speed_set(struct mlxsw_sx_port *mlxsw_sx_port,
-				   u32 speed)
+static int mlxsw_sx_port_ib_speed_set(struct mlxsw_sx_port *mlxsw_sx_port,
+				      u16 speed, u16 width)
 {
 	struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
 	char ptys_pl[MLXSW_REG_PTYS_LEN];
 
-	mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sx_port->local_port, speed);
+	mlxsw_reg_ptys_ib_pack(ptys_pl, mlxsw_sx_port->local_port, speed,
+			       width);
+	return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
+}
+
+static int
+mlxsw_sx_port_speed_by_width_set(struct mlxsw_sx_port *mlxsw_sx_port, u8 width)
+{
+	struct mlxsw_sx *mlxsw_sx = mlxsw_sx_port->mlxsw_sx;
+	u32 upper_speed = MLXSW_SX_PORT_BASE_SPEED * width;
+	char ptys_pl[MLXSW_REG_PTYS_LEN];
+	u32 eth_proto_admin;
+
+	eth_proto_admin = mlxsw_sx_to_ptys_upper_speed(upper_speed);
+	mlxsw_reg_ptys_eth_pack(ptys_pl, mlxsw_sx_port->local_port,
+				eth_proto_admin);
 	return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(ptys), ptys_pl);
 }
 
@@ -956,20 +1031,22 @@ mlxsw_sx_port_mac_learning_mode_set(struct mlxsw_sx_port *mlxsw_sx_port,
 	return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(spmlr), spmlr_pl);
 }
 
-static int mlxsw_sx_port_create(struct mlxsw_sx *mlxsw_sx, u8 local_port)
+static int __mlxsw_sx_port_eth_create(struct mlxsw_sx *mlxsw_sx, u8 local_port,
+				      u8 module, u8 width)
 {
 	struct mlxsw_sx_port *mlxsw_sx_port;
 	struct net_device *dev;
-	bool usable;
 	int err;
 
 	dev = alloc_etherdev(sizeof(struct mlxsw_sx_port));
 	if (!dev)
 		return -ENOMEM;
+	SET_NETDEV_DEV(dev, mlxsw_sx->bus_info->dev);
 	mlxsw_sx_port = netdev_priv(dev);
 	mlxsw_sx_port->dev = dev;
 	mlxsw_sx_port->mlxsw_sx = mlxsw_sx;
 	mlxsw_sx_port->local_port = local_port;
+	mlxsw_sx_port->mapping.module = module;
 
 	mlxsw_sx_port->pcpu_stats =
 		netdev_alloc_pcpu_stats(struct mlxsw_sx_port_pcpu_stats);
@@ -994,24 +1071,14 @@ static int mlxsw_sx_port_create(struct mlxsw_sx *mlxsw_sx, u8 local_port)
 	dev->features |= NETIF_F_NETNS_LOCAL | NETIF_F_LLTX | NETIF_F_SG |
 			 NETIF_F_VLAN_CHALLENGED;
 
+	dev->min_mtu = 0;
+	dev->max_mtu = ETH_MAX_MTU;
+
 	/* Each packet needs to have a Tx header (metadata) on top all other
 	 * headers.
 	 */
 	dev->needed_headroom = MLXSW_TXHDR_LEN;
 
-	err = mlxsw_sx_port_module_check(mlxsw_sx_port, &usable);
-	if (err) {
-		dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to check module\n",
-			mlxsw_sx_port->local_port);
-		goto err_port_module_check;
-	}
-
-	if (!usable) {
-		dev_dbg(mlxsw_sx->bus_info->dev, "Port %d: Not usable, skipping initialization\n",
-			mlxsw_sx_port->local_port);
-		goto port_not_usable;
-	}
-
 	err = mlxsw_sx_port_system_port_mapping_set(mlxsw_sx_port);
 	if (err) {
 		dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set system port mapping\n",
@@ -1026,15 +1093,14 @@ static int mlxsw_sx_port_create(struct mlxsw_sx *mlxsw_sx, u8 local_port)
 		goto err_port_swid_set;
 	}
 
-	err = mlxsw_sx_port_speed_set(mlxsw_sx_port,
-				      MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4);
+	err = mlxsw_sx_port_speed_by_width_set(mlxsw_sx_port, width);
 	if (err) {
 		dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set speed\n",
 			mlxsw_sx_port->local_port);
 		goto err_port_speed_set;
 	}
 
-	err = mlxsw_sx_port_mtu_set(mlxsw_sx_port, ETH_DATA_LEN);
+	err = mlxsw_sx_port_mtu_eth_set(mlxsw_sx_port, ETH_DATA_LEN);
 	if (err) {
 		dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set MTU\n",
 			mlxsw_sx_port->local_port);
@@ -1069,19 +1135,11 @@ static int mlxsw_sx_port_create(struct mlxsw_sx *mlxsw_sx, u8 local_port)
 		goto err_register_netdev;
 	}
 
-	err = mlxsw_core_port_init(mlxsw_sx->core, &mlxsw_sx_port->core_port,
-				   mlxsw_sx_port->local_port, dev, false, 0);
-	if (err) {
-		dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to init core port\n",
-			mlxsw_sx_port->local_port);
-		goto err_core_port_init;
-	}
-
+	mlxsw_core_port_eth_set(mlxsw_sx->core, mlxsw_sx_port->local_port,
+				mlxsw_sx_port, dev, false, 0);
 	mlxsw_sx->ports[local_port] = mlxsw_sx_port;
 	return 0;
 
-err_core_port_init:
-	unregister_netdev(dev);
 err_register_netdev:
 err_port_mac_learning_mode_set:
 err_port_stp_state_set:
@@ -1091,8 +1149,6 @@ err_port_speed_set:
 	mlxsw_sx_port_swid_set(mlxsw_sx_port, MLXSW_PORT_SWID_DISABLED_PORT);
 err_port_swid_set:
 err_port_system_port_mapping_set:
-port_not_usable:
-err_port_module_check:
 err_dev_addr_get:
 	free_percpu(mlxsw_sx_port->pcpu_stats);
 err_alloc_stats:
@@ -1100,31 +1156,168 @@ err_alloc_stats:
 	return err;
 }
 
-static void mlxsw_sx_port_remove(struct mlxsw_sx *mlxsw_sx, u8 local_port)
+static int mlxsw_sx_port_eth_create(struct mlxsw_sx *mlxsw_sx, u8 local_port,
+				    u8 module, u8 width)
+{
+	int err;
+
+	err = mlxsw_core_port_init(mlxsw_sx->core, local_port);
+	if (err) {
+		dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to init core port\n",
+			local_port);
+		return err;
+	}
+	err = __mlxsw_sx_port_eth_create(mlxsw_sx, local_port, module, width);
+	if (err)
+		goto err_port_create;
+
+	return 0;
+
+err_port_create:
+	mlxsw_core_port_fini(mlxsw_sx->core, local_port);
+	return err;
+}
+
+static void __mlxsw_sx_port_eth_remove(struct mlxsw_sx *mlxsw_sx, u8 local_port)
 {
 	struct mlxsw_sx_port *mlxsw_sx_port = mlxsw_sx->ports[local_port];
 
-	if (!mlxsw_sx_port)
-		return;
-	mlxsw_core_port_fini(&mlxsw_sx_port->core_port);
+	mlxsw_core_port_clear(mlxsw_sx->core, local_port, mlxsw_sx);
 	unregister_netdev(mlxsw_sx_port->dev); /* This calls ndo_stop */
+	mlxsw_sx->ports[local_port] = NULL;
 	mlxsw_sx_port_swid_set(mlxsw_sx_port, MLXSW_PORT_SWID_DISABLED_PORT);
 	free_percpu(mlxsw_sx_port->pcpu_stats);
 	free_netdev(mlxsw_sx_port->dev);
 }
 
+static bool mlxsw_sx_port_created(struct mlxsw_sx *mlxsw_sx, u8 local_port)
+{
+	return mlxsw_sx->ports[local_port] != NULL;
+}
+
+static int __mlxsw_sx_port_ib_create(struct mlxsw_sx *mlxsw_sx, u8 local_port,
+				     u8 module, u8 width)
+{
+	struct mlxsw_sx_port *mlxsw_sx_port;
+	int err;
+
+	mlxsw_sx_port = kzalloc(sizeof(*mlxsw_sx_port), GFP_KERNEL);
+	if (!mlxsw_sx_port)
+		return -ENOMEM;
+	mlxsw_sx_port->mlxsw_sx = mlxsw_sx;
+	mlxsw_sx_port->local_port = local_port;
+	mlxsw_sx_port->mapping.module = module;
+
+	err = mlxsw_sx_port_system_port_mapping_set(mlxsw_sx_port);
+	if (err) {
+		dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set system port mapping\n",
+			mlxsw_sx_port->local_port);
+		goto err_port_system_port_mapping_set;
+	}
+
+	/* Adding port to Infiniband swid (1) */
+	err = mlxsw_sx_port_swid_set(mlxsw_sx_port, 1);
+	if (err) {
+		dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set SWID\n",
+			mlxsw_sx_port->local_port);
+		goto err_port_swid_set;
+	}
+
+	/* Expose the IB port number as it's front panel name */
+	err = mlxsw_sx_port_ib_port_set(mlxsw_sx_port, module + 1);
+	if (err) {
+		dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set IB port\n",
+			mlxsw_sx_port->local_port);
+		goto err_port_ib_set;
+	}
+
+	/* Supports all speeds from SDR to FDR (bitmask) and support bus width
+	 * of 1x, 2x and 4x (3 bits bitmask)
+	 */
+	err = mlxsw_sx_port_ib_speed_set(mlxsw_sx_port,
+					 MLXSW_REG_PTYS_IB_SPEED_EDR - 1,
+					 BIT(3) - 1);
+	if (err) {
+		dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set speed\n",
+			mlxsw_sx_port->local_port);
+		goto err_port_speed_set;
+	}
+
+	/* Change to the maximum MTU the device supports, the SMA will take
+	 * care of the active MTU
+	 */
+	err = mlxsw_sx_port_mtu_ib_set(mlxsw_sx_port, MLXSW_IB_DEFAULT_MTU);
+	if (err) {
+		dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to set MTU\n",
+			mlxsw_sx_port->local_port);
+		goto err_port_mtu_set;
+	}
+
+	err = mlxsw_sx_port_admin_status_set(mlxsw_sx_port, true);
+	if (err) {
+		dev_err(mlxsw_sx->bus_info->dev, "Port %d: Failed to change admin state to UP\n",
+			mlxsw_sx_port->local_port);
+		goto err_port_admin_set;
+	}
+
+	mlxsw_core_port_ib_set(mlxsw_sx->core, mlxsw_sx_port->local_port,
+			       mlxsw_sx_port);
+	mlxsw_sx->ports[local_port] = mlxsw_sx_port;
+	return 0;
+
+err_port_admin_set:
+err_port_mtu_set:
+err_port_speed_set:
+err_port_ib_set:
+	mlxsw_sx_port_swid_set(mlxsw_sx_port, MLXSW_PORT_SWID_DISABLED_PORT);
+err_port_swid_set:
+err_port_system_port_mapping_set:
+	kfree(mlxsw_sx_port);
+	return err;
+}
+
+static void __mlxsw_sx_port_ib_remove(struct mlxsw_sx *mlxsw_sx, u8 local_port)
+{
+	struct mlxsw_sx_port *mlxsw_sx_port = mlxsw_sx->ports[local_port];
+
+	mlxsw_core_port_clear(mlxsw_sx->core, local_port, mlxsw_sx);
+	mlxsw_sx->ports[local_port] = NULL;
+	mlxsw_sx_port_admin_status_set(mlxsw_sx_port, false);
+	mlxsw_sx_port_swid_set(mlxsw_sx_port, MLXSW_PORT_SWID_DISABLED_PORT);
+	kfree(mlxsw_sx_port);
+}
+
+static void __mlxsw_sx_port_remove(struct mlxsw_sx *mlxsw_sx, u8 local_port)
+{
+	enum devlink_port_type port_type =
+		mlxsw_core_port_type_get(mlxsw_sx->core, local_port);
+
+	if (port_type == DEVLINK_PORT_TYPE_ETH)
+		__mlxsw_sx_port_eth_remove(mlxsw_sx, local_port);
+	else if (port_type == DEVLINK_PORT_TYPE_IB)
+		__mlxsw_sx_port_ib_remove(mlxsw_sx, local_port);
+}
+
+static void mlxsw_sx_port_remove(struct mlxsw_sx *mlxsw_sx, u8 local_port)
+{
+	__mlxsw_sx_port_remove(mlxsw_sx, local_port);
+	mlxsw_core_port_fini(mlxsw_sx->core, local_port);
+}
+
 static void mlxsw_sx_ports_remove(struct mlxsw_sx *mlxsw_sx)
 {
 	int i;
 
 	for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++)
-		mlxsw_sx_port_remove(mlxsw_sx, i);
+		if (mlxsw_sx_port_created(mlxsw_sx, i))
+			mlxsw_sx_port_remove(mlxsw_sx, i);
 	kfree(mlxsw_sx->ports);
 }
 
 static int mlxsw_sx_ports_create(struct mlxsw_sx *mlxsw_sx)
 {
 	size_t alloc_size;
+	u8 module, width;
 	int i;
 	int err;
 
@@ -1134,25 +1327,57 @@ static int mlxsw_sx_ports_create(struct mlxsw_sx *mlxsw_sx)
 		return -ENOMEM;
 
 	for (i = 1; i < MLXSW_PORT_MAX_PORTS; i++) {
-		err = mlxsw_sx_port_create(mlxsw_sx, i);
+		err = mlxsw_sx_port_module_info_get(mlxsw_sx, i, &module,
+						    &width);
+		if (err)
+			goto err_port_module_info_get;
+		if (!width)
+			continue;
+		err = mlxsw_sx_port_eth_create(mlxsw_sx, i, module, width);
 		if (err)
 			goto err_port_create;
 	}
 	return 0;
 
 err_port_create:
+err_port_module_info_get:
 	for (i--; i >= 1; i--)
-		mlxsw_sx_port_remove(mlxsw_sx, i);
+		if (mlxsw_sx_port_created(mlxsw_sx, i))
+			mlxsw_sx_port_remove(mlxsw_sx, i);
 	kfree(mlxsw_sx->ports);
 	return err;
 }
 
+static void mlxsw_sx_pude_eth_event_func(struct mlxsw_sx_port *mlxsw_sx_port,
+					 enum mlxsw_reg_pude_oper_status status)
+{
+	if (status == MLXSW_PORT_OPER_STATUS_UP) {
+		netdev_info(mlxsw_sx_port->dev, "link up\n");
+		netif_carrier_on(mlxsw_sx_port->dev);
+	} else {
+		netdev_info(mlxsw_sx_port->dev, "link down\n");
+		netif_carrier_off(mlxsw_sx_port->dev);
+	}
+}
+
+static void mlxsw_sx_pude_ib_event_func(struct mlxsw_sx_port *mlxsw_sx_port,
+					enum mlxsw_reg_pude_oper_status status)
+{
+	if (status == MLXSW_PORT_OPER_STATUS_UP)
+		pr_info("ib link for port %d - up\n",
+			mlxsw_sx_port->mapping.module + 1);
+	else
+		pr_info("ib link for port %d - down\n",
+			mlxsw_sx_port->mapping.module + 1);
+}
+
 static void mlxsw_sx_pude_event_func(const struct mlxsw_reg_info *reg,
 				     char *pude_pl, void *priv)
 {
 	struct mlxsw_sx *mlxsw_sx = priv;
 	struct mlxsw_sx_port *mlxsw_sx_port;
 	enum mlxsw_reg_pude_oper_status status;
+	enum devlink_port_type port_type;
 	u8 local_port;
 
 	local_port = mlxsw_reg_pude_local_port_get(pude_pl);
@@ -1164,59 +1389,11 @@ static void mlxsw_sx_pude_event_func(const struct mlxsw_reg_info *reg,
 	}
 
 	status = mlxsw_reg_pude_oper_status_get(pude_pl);
-	if (status == MLXSW_PORT_OPER_STATUS_UP) {
-		netdev_info(mlxsw_sx_port->dev, "link up\n");
-		netif_carrier_on(mlxsw_sx_port->dev);
-	} else {
-		netdev_info(mlxsw_sx_port->dev, "link down\n");
-		netif_carrier_off(mlxsw_sx_port->dev);
-	}
-}
-
-static struct mlxsw_event_listener mlxsw_sx_pude_event = {
-	.func = mlxsw_sx_pude_event_func,
-	.trap_id = MLXSW_TRAP_ID_PUDE,
-};
-
-static int mlxsw_sx_event_register(struct mlxsw_sx *mlxsw_sx,
-				   enum mlxsw_event_trap_id trap_id)
-{
-	struct mlxsw_event_listener *el;
-	char hpkt_pl[MLXSW_REG_HPKT_LEN];
-	int err;
-
-	switch (trap_id) {
-	case MLXSW_TRAP_ID_PUDE:
-		el = &mlxsw_sx_pude_event;
-		break;
-	}
-	err = mlxsw_core_event_listener_register(mlxsw_sx->core, el, mlxsw_sx);
-	if (err)
-		return err;
-
-	mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_FORWARD, trap_id);
-	err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(hpkt), hpkt_pl);
-	if (err)
-		goto err_event_trap_set;
-
-	return 0;
-
-err_event_trap_set:
-	mlxsw_core_event_listener_unregister(mlxsw_sx->core, el, mlxsw_sx);
-	return err;
-}
-
-static void mlxsw_sx_event_unregister(struct mlxsw_sx *mlxsw_sx,
-				      enum mlxsw_event_trap_id trap_id)
-{
-	struct mlxsw_event_listener *el;
-
-	switch (trap_id) {
-	case MLXSW_TRAP_ID_PUDE:
-		el = &mlxsw_sx_pude_event;
-		break;
-	}
-	mlxsw_core_event_listener_unregister(mlxsw_sx->core, el, mlxsw_sx);
+	port_type = mlxsw_core_port_type_get(mlxsw_sx->core, local_port);
+	if (port_type == DEVLINK_PORT_TYPE_ETH)
+		mlxsw_sx_pude_eth_event_func(mlxsw_sx_port, status);
+	else if (port_type == DEVLINK_PORT_TYPE_IB)
+		mlxsw_sx_pude_ib_event_func(mlxsw_sx_port, status);
 }
 
 static void mlxsw_sx_rx_listener_func(struct sk_buff *skb, u8 local_port,
@@ -1244,142 +1421,110 @@ static void mlxsw_sx_rx_listener_func(struct sk_buff *skb, u8 local_port,
 	netif_receive_skb(skb);
 }
 
-static const struct mlxsw_rx_listener mlxsw_sx_rx_listener[] = {
-	{
-		.func = mlxsw_sx_rx_listener_func,
-		.local_port = MLXSW_PORT_DONT_CARE,
-		.trap_id = MLXSW_TRAP_ID_FDB_MC,
-	},
-	/* Traps for specific L2 packet types, not trapped as FDB MC */
-	{
-		.func = mlxsw_sx_rx_listener_func,
-		.local_port = MLXSW_PORT_DONT_CARE,
-		.trap_id = MLXSW_TRAP_ID_STP,
-	},
-	{
-		.func = mlxsw_sx_rx_listener_func,
-		.local_port = MLXSW_PORT_DONT_CARE,
-		.trap_id = MLXSW_TRAP_ID_LACP,
-	},
-	{
-		.func = mlxsw_sx_rx_listener_func,
-		.local_port = MLXSW_PORT_DONT_CARE,
-		.trap_id = MLXSW_TRAP_ID_EAPOL,
-	},
-	{
-		.func = mlxsw_sx_rx_listener_func,
-		.local_port = MLXSW_PORT_DONT_CARE,
-		.trap_id = MLXSW_TRAP_ID_LLDP,
-	},
-	{
-		.func = mlxsw_sx_rx_listener_func,
-		.local_port = MLXSW_PORT_DONT_CARE,
-		.trap_id = MLXSW_TRAP_ID_MMRP,
-	},
-	{
-		.func = mlxsw_sx_rx_listener_func,
-		.local_port = MLXSW_PORT_DONT_CARE,
-		.trap_id = MLXSW_TRAP_ID_MVRP,
-	},
-	{
-		.func = mlxsw_sx_rx_listener_func,
-		.local_port = MLXSW_PORT_DONT_CARE,
-		.trap_id = MLXSW_TRAP_ID_RPVST,
-	},
-	{
-		.func = mlxsw_sx_rx_listener_func,
-		.local_port = MLXSW_PORT_DONT_CARE,
-		.trap_id = MLXSW_TRAP_ID_DHCP,
-	},
-	{
-		.func = mlxsw_sx_rx_listener_func,
-		.local_port = MLXSW_PORT_DONT_CARE,
-		.trap_id = MLXSW_TRAP_ID_IGMP_QUERY,
-	},
-	{
-		.func = mlxsw_sx_rx_listener_func,
-		.local_port = MLXSW_PORT_DONT_CARE,
-		.trap_id = MLXSW_TRAP_ID_IGMP_V1_REPORT,
-	},
-	{
-		.func = mlxsw_sx_rx_listener_func,
-		.local_port = MLXSW_PORT_DONT_CARE,
-		.trap_id = MLXSW_TRAP_ID_IGMP_V2_REPORT,
-	},
-	{
-		.func = mlxsw_sx_rx_listener_func,
-		.local_port = MLXSW_PORT_DONT_CARE,
-		.trap_id = MLXSW_TRAP_ID_IGMP_V2_LEAVE,
-	},
-	{
-		.func = mlxsw_sx_rx_listener_func,
-		.local_port = MLXSW_PORT_DONT_CARE,
-		.trap_id = MLXSW_TRAP_ID_IGMP_V3_REPORT,
-	},
+static int mlxsw_sx_port_type_set(struct mlxsw_core *mlxsw_core, u8 local_port,
+				  enum devlink_port_type new_type)
+{
+	struct mlxsw_sx *mlxsw_sx = mlxsw_core_driver_priv(mlxsw_core);
+	u8 module, width;
+	int err;
+
+	if (new_type == DEVLINK_PORT_TYPE_AUTO)
+		return -EOPNOTSUPP;
+
+	__mlxsw_sx_port_remove(mlxsw_sx, local_port);
+	err = mlxsw_sx_port_module_info_get(mlxsw_sx, local_port, &module,
+					    &width);
+	if (err)
+		goto err_port_module_info_get;
+
+	if (new_type == DEVLINK_PORT_TYPE_ETH)
+		err = __mlxsw_sx_port_eth_create(mlxsw_sx, local_port, module,
+						 width);
+	else if (new_type == DEVLINK_PORT_TYPE_IB)
+		err = __mlxsw_sx_port_ib_create(mlxsw_sx, local_port, module,
+						width);
+
+err_port_module_info_get:
+	return err;
+}
+
+#define MLXSW_SX_RXL(_trap_id) \
+	MLXSW_RXL(mlxsw_sx_rx_listener_func, _trap_id, TRAP_TO_CPU,	\
+		  false, SX2_RX, FORWARD)
+
+static const struct mlxsw_listener mlxsw_sx_listener[] = {
+	MLXSW_EVENTL(mlxsw_sx_pude_event_func, PUDE, EMAD),
+	MLXSW_SX_RXL(FDB_MC),
+	MLXSW_SX_RXL(STP),
+	MLXSW_SX_RXL(LACP),
+	MLXSW_SX_RXL(EAPOL),
+	MLXSW_SX_RXL(LLDP),
+	MLXSW_SX_RXL(MMRP),
+	MLXSW_SX_RXL(MVRP),
+	MLXSW_SX_RXL(RPVST),
+	MLXSW_SX_RXL(DHCP),
+	MLXSW_SX_RXL(IGMP_QUERY),
+	MLXSW_SX_RXL(IGMP_V1_REPORT),
+	MLXSW_SX_RXL(IGMP_V2_REPORT),
+	MLXSW_SX_RXL(IGMP_V2_LEAVE),
+	MLXSW_SX_RXL(IGMP_V3_REPORT),
 };
 
 static int mlxsw_sx_traps_init(struct mlxsw_sx *mlxsw_sx)
 {
 	char htgt_pl[MLXSW_REG_HTGT_LEN];
-	char hpkt_pl[MLXSW_REG_HPKT_LEN];
 	int i;
 	int err;
 
-	mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_RX);
+	mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_SX2_RX,
+			    MLXSW_REG_HTGT_INVALID_POLICER,
+			    MLXSW_REG_HTGT_DEFAULT_PRIORITY,
+			    MLXSW_REG_HTGT_DEFAULT_TC);
+	mlxsw_reg_htgt_local_path_rdq_set(htgt_pl,
+					  MLXSW_REG_HTGT_LOCAL_PATH_RDQ_SX2_RX);
+
 	err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(htgt), htgt_pl);
 	if (err)
 		return err;
 
-	mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_CTRL);
+	mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_SX2_CTRL,
+			    MLXSW_REG_HTGT_INVALID_POLICER,
+			    MLXSW_REG_HTGT_DEFAULT_PRIORITY,
+			    MLXSW_REG_HTGT_DEFAULT_TC);
+	mlxsw_reg_htgt_local_path_rdq_set(htgt_pl,
+					MLXSW_REG_HTGT_LOCAL_PATH_RDQ_SX2_CTRL);
+
 	err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(htgt), htgt_pl);
 	if (err)
 		return err;
 
-	for (i = 0; i < ARRAY_SIZE(mlxsw_sx_rx_listener); i++) {
-		err = mlxsw_core_rx_listener_register(mlxsw_sx->core,
-						      &mlxsw_sx_rx_listener[i],
-						      mlxsw_sx);
+	for (i = 0; i < ARRAY_SIZE(mlxsw_sx_listener); i++) {
+		err = mlxsw_core_trap_register(mlxsw_sx->core,
+					       &mlxsw_sx_listener[i],
+					       mlxsw_sx);
 		if (err)
-			goto err_rx_listener_register;
+			goto err_listener_register;
 
-		mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_TRAP_TO_CPU,
-				    mlxsw_sx_rx_listener[i].trap_id);
-		err = mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(hpkt), hpkt_pl);
-		if (err)
-			goto err_rx_trap_set;
 	}
 	return 0;
 
-err_rx_trap_set:
-	mlxsw_core_rx_listener_unregister(mlxsw_sx->core,
-					  &mlxsw_sx_rx_listener[i],
-					  mlxsw_sx);
-err_rx_listener_register:
+err_listener_register:
 	for (i--; i >= 0; i--) {
-		mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_FORWARD,
-				    mlxsw_sx_rx_listener[i].trap_id);
-		mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(hpkt), hpkt_pl);
-
-		mlxsw_core_rx_listener_unregister(mlxsw_sx->core,
-						  &mlxsw_sx_rx_listener[i],
-						  mlxsw_sx);
+		mlxsw_core_trap_unregister(mlxsw_sx->core,
+					   &mlxsw_sx_listener[i],
+					   mlxsw_sx);
 	}
 	return err;
 }
 
 static void mlxsw_sx_traps_fini(struct mlxsw_sx *mlxsw_sx)
 {
-	char hpkt_pl[MLXSW_REG_HPKT_LEN];
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(mlxsw_sx_rx_listener); i++) {
-		mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_FORWARD,
-				    mlxsw_sx_rx_listener[i].trap_id);
-		mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(hpkt), hpkt_pl);
-
-		mlxsw_core_rx_listener_unregister(mlxsw_sx->core,
-						  &mlxsw_sx_rx_listener[i],
-						  mlxsw_sx);
+	for (i = 0; i < ARRAY_SIZE(mlxsw_sx_listener); i++) {
+		mlxsw_core_trap_unregister(mlxsw_sx->core,
+					   &mlxsw_sx_listener[i],
+					   mlxsw_sx);
 	}
 }
 
@@ -1451,6 +1596,20 @@ static int mlxsw_sx_flood_init(struct mlxsw_sx *mlxsw_sx)
 	return mlxsw_reg_write(mlxsw_sx->core, MLXSW_REG(sgcr), sgcr_pl);
 }
 
+static int mlxsw_sx_basic_trap_groups_set(struct mlxsw_core *mlxsw_core)
+{
+	char htgt_pl[MLXSW_REG_HTGT_LEN];
+
+	mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_EMAD,
+			    MLXSW_REG_HTGT_INVALID_POLICER,
+			    MLXSW_REG_HTGT_DEFAULT_PRIORITY,
+			    MLXSW_REG_HTGT_DEFAULT_TC);
+	mlxsw_reg_htgt_swid_set(htgt_pl, MLXSW_PORT_SWID_ALL_SWIDS);
+	mlxsw_reg_htgt_local_path_rdq_set(htgt_pl,
+					MLXSW_REG_HTGT_LOCAL_PATH_RDQ_SX2_EMAD);
+	return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
+}
+
 static int mlxsw_sx_init(struct mlxsw_core *mlxsw_core,
 			 const struct mlxsw_bus_info *mlxsw_bus_info)
 {
@@ -1472,16 +1631,10 @@ static int mlxsw_sx_init(struct mlxsw_core *mlxsw_core,
 		return err;
 	}
 
-	err = mlxsw_sx_event_register(mlxsw_sx, MLXSW_TRAP_ID_PUDE);
-	if (err) {
-		dev_err(mlxsw_sx->bus_info->dev, "Failed to register for PUDE events\n");
-		goto err_event_register;
-	}
-
 	err = mlxsw_sx_traps_init(mlxsw_sx);
 	if (err) {
-		dev_err(mlxsw_sx->bus_info->dev, "Failed to set traps for RX\n");
-		goto err_rx_listener_register;
+		dev_err(mlxsw_sx->bus_info->dev, "Failed to set traps\n");
+		goto err_listener_register;
 	}
 
 	err = mlxsw_sx_flood_init(mlxsw_sx);
@@ -1494,9 +1647,7 @@ static int mlxsw_sx_init(struct mlxsw_core *mlxsw_core,
 
 err_flood_init:
 	mlxsw_sx_traps_fini(mlxsw_sx);
-err_rx_listener_register:
-	mlxsw_sx_event_unregister(mlxsw_sx, MLXSW_TRAP_ID_PUDE);
-err_event_register:
+err_listener_register:
 	mlxsw_sx_ports_remove(mlxsw_sx);
 	return err;
 }
@@ -1506,7 +1657,6 @@ static void mlxsw_sx_fini(struct mlxsw_core *mlxsw_core)
 	struct mlxsw_sx *mlxsw_sx = mlxsw_core_driver_priv(mlxsw_core);
 
 	mlxsw_sx_traps_fini(mlxsw_sx);
-	mlxsw_sx_event_unregister(mlxsw_sx, MLXSW_TRAP_ID_PUDE);
 	mlxsw_sx_ports_remove(mlxsw_sx);
 }
 
@@ -1529,36 +1679,66 @@ static struct mlxsw_config_profile mlxsw_sx_config_profile = {
 	.used_flood_mode		= 1,
 	.flood_mode			= 3,
 	.used_max_ib_mc			= 1,
-	.max_ib_mc			= 0,
+	.max_ib_mc			= 6,
 	.used_max_pkey			= 1,
 	.max_pkey			= 0,
 	.swid_config			= {
 		{
 			.used_type	= 1,
 			.type		= MLXSW_PORT_SWID_TYPE_ETH,
+		},
+		{
+			.used_type	= 1,
+			.type		= MLXSW_PORT_SWID_TYPE_IB,
 		}
 	},
 	.resource_query_enable		= 0,
 };
 
 static struct mlxsw_driver mlxsw_sx_driver = {
-	.kind			= MLXSW_DEVICE_KIND_SWITCHX2,
-	.owner			= THIS_MODULE,
+	.kind			= mlxsw_sx_driver_name,
 	.priv_size		= sizeof(struct mlxsw_sx),
 	.init			= mlxsw_sx_init,
 	.fini			= mlxsw_sx_fini,
+	.basic_trap_groups_set	= mlxsw_sx_basic_trap_groups_set,
 	.txhdr_construct	= mlxsw_sx_txhdr_construct,
 	.txhdr_len		= MLXSW_TXHDR_LEN,
 	.profile		= &mlxsw_sx_config_profile,
+	.port_type_set		= mlxsw_sx_port_type_set,
+};
+
+static const struct pci_device_id mlxsw_sx_pci_id_table[] = {
+	{PCI_VDEVICE(MELLANOX, PCI_DEVICE_ID_MELLANOX_SWITCHX2), 0},
+	{0, },
+};
+
+static struct pci_driver mlxsw_sx_pci_driver = {
+	.name = mlxsw_sx_driver_name,
+	.id_table = mlxsw_sx_pci_id_table,
 };
 
 static int __init mlxsw_sx_module_init(void)
 {
-	return mlxsw_core_driver_register(&mlxsw_sx_driver);
+	int err;
+
+	err = mlxsw_core_driver_register(&mlxsw_sx_driver);
+	if (err)
+		return err;
+
+	err = mlxsw_pci_driver_register(&mlxsw_sx_pci_driver);
+	if (err)
+		goto err_pci_driver_register;
+
+	return 0;
+
+err_pci_driver_register:
+	mlxsw_core_driver_unregister(&mlxsw_sx_driver);
+	return err;
 }
 
 static void __exit mlxsw_sx_module_exit(void)
 {
+	mlxsw_pci_driver_unregister(&mlxsw_sx_pci_driver);
 	mlxsw_core_driver_unregister(&mlxsw_sx_driver);
 }
 
@@ -1568,4 +1748,4 @@ module_exit(mlxsw_sx_module_exit);
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Jiri Pirko <jiri@mellanox.com>");
 MODULE_DESCRIPTION("Mellanox SwitchX-2 driver");
-MODULE_MLXSW_DRIVER_ALIAS(MLXSW_DEVICE_KIND_SWITCHX2);
+MODULE_DEVICE_TABLE(pci, mlxsw_sx_pci_id_table);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h
index ed8e30186400..7ab275deacac 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h
@@ -62,6 +62,7 @@ enum {
 	MLXSW_TRAP_ID_OSPF = 0x55,
 	MLXSW_TRAP_ID_IP2ME = 0x5F,
 	MLXSW_TRAP_ID_RTR_INGRESS0 = 0x70,
+	MLXSW_TRAP_ID_BGP_IPV4 = 0x88,
 	MLXSW_TRAP_ID_HOST_MISS_IPV4 = 0x90,
 
 	MLXSW_TRAP_ID_MAX = 0x1FF
diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c
index 1edc973df4c4..e7e1aff40bd9 100644
--- a/drivers/net/ethernet/micrel/ks8851.c
+++ b/drivers/net/ethernet/micrel/ks8851.c
@@ -1063,7 +1063,6 @@ static const struct net_device_ops ks8851_netdev_ops = {
 	.ndo_start_xmit		= ks8851_start_xmit,
 	.ndo_set_mac_address	= ks8851_set_mac_address,
 	.ndo_set_rx_mode	= ks8851_set_rx_mode,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 };
 
diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c
index 2fc5cd56c0a8..db628078a4e6 100644
--- a/drivers/net/ethernet/micrel/ks8851_mll.c
+++ b/drivers/net/ethernet/micrel/ks8851_mll.c
@@ -1285,7 +1285,6 @@ static const struct net_device_ops ks_netdev_ops = {
 	.ndo_start_xmit		= ks_start_xmit,
 	.ndo_set_mac_address	= ks_set_mac_address,
 	.ndo_set_rx_mode	= ks_set_rx_mode,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 };
 
diff --git a/drivers/net/ethernet/micrel/ksz884x.c b/drivers/net/ethernet/micrel/ksz884x.c
index 280e761d3a97..97f6ef1fa7d0 100644
--- a/drivers/net/ethernet/micrel/ksz884x.c
+++ b/drivers/net/ethernet/micrel/ksz884x.c
@@ -5807,24 +5807,19 @@ static int netdev_change_mtu(struct net_device *dev, int new_mtu)
 	if (hw->dev_count > 1)
 		if (dev != hw_priv->dev)
 			return 0;
-	if (new_mtu < 60)
-		return -EINVAL;
 
-	if (dev->mtu != new_mtu) {
-		hw_mtu = new_mtu + ETHERNET_HEADER_SIZE + 4;
-		if (hw_mtu > MAX_RX_BUF_SIZE)
-			return -EINVAL;
-		if (hw_mtu > REGULAR_RX_BUF_SIZE) {
-			hw->features |= RX_HUGE_FRAME;
-			hw_mtu = MAX_RX_BUF_SIZE;
-		} else {
-			hw->features &= ~RX_HUGE_FRAME;
-			hw_mtu = REGULAR_RX_BUF_SIZE;
-		}
-		hw_mtu = (hw_mtu + 3) & ~3;
-		hw_priv->mtu = hw_mtu;
-		dev->mtu = new_mtu;
+	hw_mtu = new_mtu + ETHERNET_HEADER_SIZE + 4;
+	if (hw_mtu > REGULAR_RX_BUF_SIZE) {
+		hw->features |= RX_HUGE_FRAME;
+		hw_mtu = MAX_RX_BUF_SIZE;
+	} else {
+		hw->features &= ~RX_HUGE_FRAME;
+		hw_mtu = REGULAR_RX_BUF_SIZE;
 	}
+	hw_mtu = (hw_mtu + 3) & ~3;
+	hw_priv->mtu = hw_mtu;
+	dev->mtu = new_mtu;
+
 	return 0;
 }
 
@@ -7099,6 +7094,12 @@ static int pcidev_init(struct pci_dev *pdev, const struct pci_device_id *id)
 
 		dev->netdev_ops = &netdev_ops;
 		dev->ethtool_ops = &netdev_ethtool_ops;
+
+		/* MTU range: 60 - 1894 */
+		dev->min_mtu = ETH_ZLEN;
+		dev->max_mtu = MAX_RX_BUF_SIZE -
+			       (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN);
+
 		if (register_netdev(dev))
 			goto pcidev_init_reg_err;
 		port_set_power_saving(port, true);
diff --git a/drivers/net/ethernet/microchip/enc28j60.c b/drivers/net/ethernet/microchip/enc28j60.c
index 0a26b11ca8f6..045b9106c0ff 100644
--- a/drivers/net/ethernet/microchip/enc28j60.c
+++ b/drivers/net/ethernet/microchip/enc28j60.c
@@ -1544,7 +1544,6 @@ static const struct net_device_ops enc28j60_netdev_ops = {
 	.ndo_set_rx_mode	= enc28j60_set_multicast_list,
 	.ndo_set_mac_address	= enc28j60_set_mac_address,
 	.ndo_tx_timeout		= enc28j60_tx_timeout,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 };
 
diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c
index 4367dd6879a2..9774b50cff6e 100644
--- a/drivers/net/ethernet/moxa/moxart_ether.c
+++ b/drivers/net/ethernet/moxa/moxart_ether.c
@@ -444,7 +444,6 @@ static struct net_device_ops moxart_netdev_ops = {
 	.ndo_set_rx_mode	= moxart_mac_set_rx_mode,
 	.ndo_set_mac_address	= moxart_set_mac_address,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 };
 
 static int moxart_mac_probe(struct platform_device *pdev)
diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index 6d1a956e3f77..e506ca876d0d 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
@@ -289,7 +289,7 @@ static char *myri10ge_fw_names[MYRI10GE_MAX_BOARDS] =
     {[0 ... (MYRI10GE_MAX_BOARDS - 1)] = NULL };
 module_param_array_named(myri10ge_fw_names, myri10ge_fw_names, charp, NULL,
 			 0444);
-MODULE_PARM_DESC(myri10ge_fw_name, "Firmware image names per board");
+MODULE_PARM_DESC(myri10ge_fw_names, "Firmware image names per board");
 
 static int myri10ge_ecrc_enable = 1;
 module_param(myri10ge_ecrc_enable, int, S_IRUGO);
@@ -3232,10 +3232,6 @@ static int myri10ge_change_mtu(struct net_device *dev, int new_mtu)
 	struct myri10ge_priv *mgp = netdev_priv(dev);
 	int error = 0;
 
-	if ((new_mtu < 68) || (ETH_HLEN + new_mtu > MYRI10GE_MAX_ETHER_MTU)) {
-		netdev_err(dev, "new mtu (%d) is not valid\n", new_mtu);
-		return -EINVAL;
-	}
 	netdev_info(dev, "changing mtu from %d to %d\n", dev->mtu, new_mtu);
 	if (mgp->running) {
 		/* if we change the mtu on an active device, we must
@@ -4086,13 +4082,19 @@ static int myri10ge_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	myri10ge_setup_dca(mgp);
 #endif
 	pci_set_drvdata(pdev, mgp);
-	if ((myri10ge_initial_mtu + ETH_HLEN) > MYRI10GE_MAX_ETHER_MTU)
-		myri10ge_initial_mtu = MYRI10GE_MAX_ETHER_MTU - ETH_HLEN;
-	if ((myri10ge_initial_mtu + ETH_HLEN) < 68)
-		myri10ge_initial_mtu = 68;
 
-	netdev->netdev_ops = &myri10ge_netdev_ops;
+	/* MTU range: 68 - 9000 */
+	netdev->min_mtu = ETH_MIN_MTU;
+	netdev->max_mtu = MYRI10GE_MAX_ETHER_MTU - ETH_HLEN;
+
+	if (myri10ge_initial_mtu > netdev->max_mtu)
+		myri10ge_initial_mtu = netdev->max_mtu;
+	if (myri10ge_initial_mtu < netdev->min_mtu)
+		myri10ge_initial_mtu = netdev->min_mtu;
+
 	netdev->mtu = myri10ge_initial_mtu;
+
+	netdev->netdev_ops = &myri10ge_netdev_ops;
 	netdev->hw_features = mgp->features | NETIF_F_RXCSUM;
 
 	/* fake NETIF_F_HW_VLAN_CTAG_RX for good GRO performance */
diff --git a/drivers/net/ethernet/natsemi/jazzsonic.c b/drivers/net/ethernet/natsemi/jazzsonic.c
index acf3f11e38cc..a6caeb567c0d 100644
--- a/drivers/net/ethernet/natsemi/jazzsonic.c
+++ b/drivers/net/ethernet/natsemi/jazzsonic.c
@@ -110,7 +110,6 @@ static const struct net_device_ops sonic_netdev_ops = {
 	.ndo_get_stats		= sonic_get_stats,
 	.ndo_set_rx_mode	= sonic_multicast_list,
 	.ndo_tx_timeout		= sonic_tx_timeout,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
 };
diff --git a/drivers/net/ethernet/natsemi/macsonic.c b/drivers/net/ethernet/natsemi/macsonic.c
index d98f5b8a1c66..3ca6ae7caf55 100644
--- a/drivers/net/ethernet/natsemi/macsonic.c
+++ b/drivers/net/ethernet/natsemi/macsonic.c
@@ -190,7 +190,6 @@ static const struct net_device_ops macsonic_netdev_ops = {
 	.ndo_tx_timeout		= sonic_tx_timeout,
 	.ndo_get_stats		= sonic_get_stats,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address	= eth_mac_addr,
 };
 
diff --git a/drivers/net/ethernet/natsemi/natsemi.c b/drivers/net/ethernet/natsemi/natsemi.c
index ed89029ff75b..22b0821c1da0 100644
--- a/drivers/net/ethernet/natsemi/natsemi.c
+++ b/drivers/net/ethernet/natsemi/natsemi.c
@@ -929,6 +929,10 @@ static int natsemi_probe1(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	dev->ethtool_ops = &ethtool_ops;
 
+	/* MTU range: 64 - 2024 */
+	dev->min_mtu = ETH_ZLEN + ETH_FCS_LEN;
+	dev->max_mtu = NATSEMI_RX_LIMIT - NATSEMI_HEADERS;
+
 	if (mtu)
 		dev->mtu = mtu;
 
@@ -2526,9 +2530,6 @@ static void __set_rx_mode(struct net_device *dev)
 
 static int natsemi_change_mtu(struct net_device *dev, int new_mtu)
 {
-	if (new_mtu < 64 || new_mtu > NATSEMI_RX_LIMIT-NATSEMI_HEADERS)
-		return -EINVAL;
-
 	dev->mtu = new_mtu;
 
 	/* synchronized against open : rtnl_lock() held by caller */
diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c
index 569ade6cf85c..93c4bdc0cdca 100644
--- a/drivers/net/ethernet/natsemi/ns83820.c
+++ b/drivers/net/ethernet/natsemi/ns83820.c
@@ -919,7 +919,7 @@ netdev_mangle_me_harder_failed:
 				ndev->stats.rx_dropped++;
 			}
 		} else {
-			kfree_skb(skb);
+			dev_kfree_skb_irq(skb);
 		}
 
 		nr++;
@@ -1679,14 +1679,6 @@ static void ns83820_getmac(struct ns83820 *dev, u8 *mac)
 	}
 }
 
-static int ns83820_change_mtu(struct net_device *ndev, int new_mtu)
-{
-	if (new_mtu > RX_BUF_SIZE)
-		return -EINVAL;
-	ndev->mtu = new_mtu;
-	return 0;
-}
-
 static void ns83820_set_multicast(struct net_device *ndev)
 {
 	struct ns83820 *dev = PRIV(ndev);
@@ -1933,7 +1925,6 @@ static const struct net_device_ops netdev_ops = {
 	.ndo_stop		= ns83820_stop,
 	.ndo_start_xmit		= ns83820_hard_start_xmit,
 	.ndo_get_stats		= ns83820_get_stats,
-	.ndo_change_mtu		= ns83820_change_mtu,
 	.ndo_set_rx_mode	= ns83820_set_multicast,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
@@ -2190,6 +2181,8 @@ static int ns83820_init_one(struct pci_dev *pci_dev,
 	ndev->features |= NETIF_F_SG;
 	ndev->features |= NETIF_F_IP_CSUM;
 
+	ndev->min_mtu = 0;
+
 #ifdef NS83820_VLAN_ACCEL_SUPPORT
 	/* We also support hardware vlan acceleration */
 	ndev->features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
diff --git a/drivers/net/ethernet/natsemi/xtsonic.c b/drivers/net/ethernet/natsemi/xtsonic.c
index 7007d212f3e4..9ee0f69a83c0 100644
--- a/drivers/net/ethernet/natsemi/xtsonic.c
+++ b/drivers/net/ethernet/natsemi/xtsonic.c
@@ -124,7 +124,6 @@ static const struct net_device_ops xtsonic_netdev_ops = {
 	.ndo_set_rx_mode	= sonic_multicast_list,
 	.ndo_tx_timeout		= sonic_tx_timeout,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address	= eth_mac_addr,
 };
 
diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c
index eaa37c079a7c..564f682fa4dc 100644
--- a/drivers/net/ethernet/neterion/s2io.c
+++ b/drivers/net/ethernet/neterion/s2io.c
@@ -6678,11 +6678,6 @@ static int s2io_change_mtu(struct net_device *dev, int new_mtu)
 	struct s2io_nic *sp = netdev_priv(dev);
 	int ret = 0;
 
-	if ((new_mtu < MIN_MTU) || (new_mtu > S2IO_JUMBO_SIZE)) {
-		DBG_PRINT(ERR_DBG, "%s: MTU size is invalid.\n", dev->name);
-		return -EPERM;
-	}
-
 	dev->mtu = new_mtu;
 	if (netif_running(dev)) {
 		s2io_stop_all_tx_queue(sp);
@@ -8019,6 +8014,10 @@ s2io_init_nic(struct pci_dev *pdev, const struct pci_device_id *pre)
 		config->mc_start_offset = S2IO_HERC_MC_ADDR_START_OFFSET;
 	}
 
+	/* MTU range: 46 - 9600 */
+	dev->min_mtu = MIN_MTU;
+	dev->max_mtu = S2IO_JUMBO_SIZE;
+
 	/* store mac addresses from CAM to s2io_nic structure */
 	do_s2io_store_unicast_mc(sp);
 
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-config.h b/drivers/net/ethernet/neterion/vxge/vxge-config.h
index 6ce4412fcc1a..cfa970417f81 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-config.h
+++ b/drivers/net/ethernet/neterion/vxge/vxge-config.h
@@ -27,7 +27,7 @@
 	(((size) - (((u64)adrs) & ((size)-1))) & ((size)-1))
 #endif
 
-#define VXGE_HW_MIN_MTU				68
+#define VXGE_HW_MIN_MTU				ETH_MIN_MTU
 #define VXGE_HW_MAX_MTU				9600
 #define VXGE_HW_DEFAULT_MTU			1500
 
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c
index e0993eba5df3..e07b936f64ec 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-main.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c
@@ -3074,11 +3074,6 @@ static int vxge_change_mtu(struct net_device *dev, int new_mtu)
 
 	vxge_debug_entryexit(vdev->level_trace,
 		"%s:%d", __func__, __LINE__);
-	if ((new_mtu < VXGE_HW_MIN_MTU) || (new_mtu > VXGE_HW_MAX_MTU)) {
-		vxge_debug_init(vdev->level_err,
-			"%s: mtu size is invalid", dev->name);
-		return -EPERM;
-	}
 
 	/* check if device is down already */
 	if (unlikely(!is_vxge_card_up(vdev))) {
@@ -3462,6 +3457,10 @@ static int vxge_device_register(struct __vxge_hw_device *hldev,
 			"%s : using High DMA", __func__);
 	}
 
+	/* MTU range: 68 - 9600 */
+	ndev->min_mtu = VXGE_HW_MIN_MTU;
+	ndev->max_mtu = VXGE_HW_MAX_MTU;
+
 	ret = register_netdev(ndev);
 	if (ret) {
 		vxge_debug_init(vxge_hw_device_trace_level_get(hldev),
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h
index 87aa8a3e9112..76a19f1796af 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h
@@ -62,6 +62,7 @@ enum nfp_bpf_action_type {
 	NN_ACT_TC_DROP,
 	NN_ACT_TC_REDIR,
 	NN_ACT_DIRECT,
+	NN_ACT_XDP,
 };
 
 /* Software register representation, hardware encoding in asm.h */
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c
index f8df5300f49c..335beb8b8b45 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c
@@ -1126,7 +1126,7 @@ static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 				     meta->insn.src_reg * 2, true, 4);
 }
 
-static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+static int mem_ldx4_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	if (meta->insn.off == offsetof(struct sk_buff, len))
 		emit_alu(nfp_prog, reg_both(meta->insn.dst_reg * 2),
@@ -1134,12 +1134,42 @@ static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	else
 		return -ENOTSUPP;
 
-	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
+	return 0;
+}
+
+static int mem_ldx4_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	u32 dst = reg_both(meta->insn.dst_reg * 2);
+
+	if (meta->insn.off != offsetof(struct xdp_md, data) &&
+	    meta->insn.off != offsetof(struct xdp_md, data_end))
+		return -ENOTSUPP;
+
+	emit_alu(nfp_prog, dst, reg_none(), ALU_OP_NONE, NFP_BPF_ABI_PKT);
+
+	if (meta->insn.off == offsetof(struct xdp_md, data))
+		return 0;
+
+	emit_alu(nfp_prog, dst,	dst, ALU_OP_ADD, NFP_BPF_ABI_LEN);
 
 	return 0;
 }
 
-static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	int ret;
+
+	if (nfp_prog->act == NN_ACT_XDP)
+		ret = mem_ldx4_xdp(nfp_prog, meta);
+	else
+		ret = mem_ldx4_skb(nfp_prog, meta);
+
+	wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
+
+	return ret;
+}
+
+static int mem_stx4_skb(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	if (meta->insn.off == offsetof(struct sk_buff, mark))
 		return wrp_set_mark(nfp_prog, meta->insn.src_reg * 2);
@@ -1147,6 +1177,18 @@ static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 	return -ENOTSUPP;
 }
 
+static int mem_stx4_xdp(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	return -ENOTSUPP;
+}
+
+static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+	if (nfp_prog->act == NN_ACT_XDP)
+		return mem_stx4_xdp(nfp_prog, meta);
+	return mem_stx4_skb(nfp_prog, meta);
+}
+
 static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
 {
 	if (meta->insn.off < 0) /* TODO */
@@ -1530,6 +1572,47 @@ static void nfp_outro_tc_da(struct nfp_prog *nfp_prog)
 	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
 }
 
+static void nfp_outro_xdp(struct nfp_prog *nfp_prog)
+{
+	/* XDP return codes:
+	 *   0 aborted  0x82 -> drop,  count as stat3
+	 *   1    drop  0x22 -> drop,  count as stat1
+	 *   2    pass  0x11 -> pass,  count as stat0
+	 *   3      tx  0x44 -> redir, count as stat2
+	 *   * unknown  0x82 -> drop,  count as stat3
+	 */
+	/* Target for aborts */
+	nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog);
+
+	emit_br_def(nfp_prog, nfp_prog->tgt_done, 2);
+
+	emit_alu(nfp_prog, reg_a(0),
+		 reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS);
+	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x82), SHF_SC_L_SHF, 16);
+
+	/* Target for normal exits */
+	nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog);
+
+	/* if R0 > 3 jump to abort */
+	emit_alu(nfp_prog, reg_none(), reg_imm(3), ALU_OP_SUB, reg_b(0));
+	emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0);
+
+	wrp_immed(nfp_prog, reg_b(2), 0x44112282);
+
+	emit_shf(nfp_prog, reg_a(1),
+		 reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 3);
+
+	emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0));
+	emit_shf(nfp_prog, reg_b(2),
+		 reg_imm(0xff), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0);
+
+	emit_br_def(nfp_prog, nfp_prog->tgt_done, 2);
+
+	emit_alu(nfp_prog, reg_a(0),
+		 reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS);
+	emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16);
+}
+
 static void nfp_outro(struct nfp_prog *nfp_prog)
 {
 	switch (nfp_prog->act) {
@@ -1540,6 +1623,9 @@ static void nfp_outro(struct nfp_prog *nfp_prog)
 	case NN_ACT_TC_REDIR:
 		nfp_outro_tc_legacy(nfp_prog);
 		break;
+	case NN_ACT_XDP:
+		nfp_outro_xdp(nfp_prog);
+		break;
 	}
 }
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c
index 144cae87f63a..b3361f9b8e5c 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c
@@ -80,6 +80,9 @@ nfp_bpf_check_exit(struct nfp_prog *nfp_prog,
 {
 	const struct bpf_reg_state *reg0 = &env->cur_state.regs[0];
 
+	if (nfp_prog->act == NN_ACT_XDP)
+		return 0;
+
 	if (reg0->type != CONST_IMM) {
 		pr_info("unsupported exit state: %d, imm: %llx\n",
 			reg0->type, reg0->imm);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h
index ed824e11a1e3..2115f446031e 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h
@@ -75,7 +75,6 @@
 
 /* Default size for MTU and freelist buffer sizes */
 #define NFP_NET_DEFAULT_MTU		1500
-#define NFP_NET_DEFAULT_RX_BUFSZ	2048
 
 /* Maximum number of bytes prepended to a packet */
 #define NFP_NET_MAX_PREPEND		64
@@ -88,6 +87,9 @@
 /* Queue/Ring definitions */
 #define NFP_NET_MAX_TX_RINGS	64	/* Max. # of Tx rings per device */
 #define NFP_NET_MAX_RX_RINGS	64	/* Max. # of Rx rings per device */
+#define NFP_NET_MAX_R_VECS	(NFP_NET_MAX_TX_RINGS > NFP_NET_MAX_RX_RINGS ? \
+				 NFP_NET_MAX_TX_RINGS : NFP_NET_MAX_RX_RINGS)
+#define NFP_NET_MAX_IRQS	(NFP_NET_NON_Q_VECTORS + NFP_NET_MAX_R_VECS)
 
 #define NFP_NET_MIN_TX_DESCS	256	/* Min. # of Tx descs per ring */
 #define NFP_NET_MIN_RX_DESCS	256	/* Min. # of Rx descs per ring */
@@ -102,6 +104,10 @@
 /* Offload definitions */
 #define NFP_NET_N_VXLAN_PORTS	(NFP_NET_CFG_VXLAN_SZ / sizeof(__be16))
 
+#define NFP_NET_RX_BUF_HEADROOM	(NET_SKB_PAD + NET_IP_ALIGN)
+#define NFP_NET_RX_BUF_NON_DATA	(NFP_NET_RX_BUF_HEADROOM +		\
+				 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+
 /* Forward declarations */
 struct nfp_net;
 struct nfp_net_r_vector;
@@ -165,7 +171,10 @@ struct nfp_net_tx_desc {
  *		on the head's buffer). Equal to skb->len for non-TSO packets.
  */
 struct nfp_net_tx_buf {
-	struct sk_buff *skb;
+	union {
+		struct sk_buff *skb;
+		void *frag;
+	};
 	dma_addr_t dma_addr;
 	short int fidx;
 	u16 pkt_cnt;
@@ -278,11 +287,11 @@ struct nfp_net_rx_hash {
 
 /**
  * struct nfp_net_rx_buf - software RX buffer descriptor
- * @skb:	sk_buff associated with this buffer
+ * @frag:	page fragment buffer
  * @dma_addr:	DMA mapping address of the buffer
  */
 struct nfp_net_rx_buf {
-	struct sk_buff *skb;
+	void *frag;
 	dma_addr_t dma_addr;
 };
 
@@ -335,6 +344,7 @@ struct nfp_net_rx_ring {
  * @napi:           NAPI structure for this ring vec
  * @tx_ring:        Pointer to TX ring
  * @rx_ring:        Pointer to RX ring
+ * @xdp_ring:	    Pointer to an extra TX ring for XDP
  * @irq_idx:        Index into MSI-X table
  * @rx_sync:	    Seqlock for atomic updates of RX stats
  * @rx_pkts:        Number of received packets
@@ -378,6 +388,8 @@ struct nfp_net_r_vector {
 	u64 hw_csum_rx_inner_ok;
 	u64 hw_csum_rx_error;
 
+	struct nfp_net_tx_ring *xdp_ring;
+
 	struct u64_stats_sync tx_sync;
 	u64 tx_pkts;
 	u64 tx_bytes;
@@ -421,12 +433,13 @@ struct nfp_stat_pair {
  * @netdev:             Backpointer to net_device structure
  * @nfp_fallback:       Is the driver used in fallback mode?
  * @is_vf:              Is the driver attached to a VF?
- * @is_nfp3200:         Is the driver for a NFP-3200 card?
  * @fw_loaded:          Is the firmware loaded?
  * @bpf_offload_skip_sw:  Offloaded BPF program will not be rerun by cls_bpf
+ * @bpf_offload_xdp:	Offloaded BPF program is XDP
  * @ctrl:               Local copy of the control register/word.
  * @fl_bufsz:           Currently configured size of the freelist buffers
  * @rx_offset:		Offset in the RX buffers where packet data starts
+ * @xdp_prog:		Installed XDP program
  * @cpp:                Pointer to the CPP handle
  * @nfp_dev_cpp:        Pointer to the NFP Device handle
  * @ctrl_area:          Pointer to the CPP area for the control BAR
@@ -446,12 +459,13 @@ struct nfp_stat_pair {
  * @max_tx_rings:       Maximum number of TX rings supported by the Firmware
  * @max_rx_rings:       Maximum number of RX rings supported by the Firmware
  * @num_tx_rings:       Currently configured number of TX rings
+ * @num_stack_tx_rings:	Number of TX rings used by the stack (not XDP)
  * @num_rx_rings:       Currently configured number of RX rings
  * @txd_cnt:            Size of the TX ring in number of descriptors
  * @rxd_cnt:            Size of the RX ring in number of descriptors
  * @tx_rings:           Array of pre-allocated TX ring structures
  * @rx_rings:           Array of pre-allocated RX ring structures
- * @num_irqs:	        Number of allocated interrupt vectors
+ * @max_r_vecs:	        Number of allocated interrupt vectors for RX/TX
  * @num_r_vecs:         Number of used ring vectors
  * @r_vecs:             Pre-allocated array of ring vectors
  * @irq_entries:        Pre-allocated array of MSI-X entries
@@ -487,15 +501,17 @@ struct nfp_net {
 
 	unsigned nfp_fallback:1;
 	unsigned is_vf:1;
-	unsigned is_nfp3200:1;
 	unsigned fw_loaded:1;
 	unsigned bpf_offload_skip_sw:1;
+	unsigned bpf_offload_xdp:1;
 
 	u32 ctrl;
 	u32 fl_bufsz;
 
 	u32 rx_offset;
 
+	struct bpf_prog *xdp_prog;
+
 	struct nfp_net_tx_ring *tx_rings;
 	struct nfp_net_rx_ring *rx_rings;
 
@@ -524,11 +540,12 @@ struct nfp_net {
 	struct timer_list rx_filter_stats_timer;
 	spinlock_t rx_filter_lock;
 
-	int max_tx_rings;
-	int max_rx_rings;
+	unsigned int max_tx_rings;
+	unsigned int max_rx_rings;
 
-	int num_tx_rings;
-	int num_rx_rings;
+	unsigned int num_tx_rings;
+	unsigned int num_stack_tx_rings;
+	unsigned int num_rx_rings;
 
 	int stride_tx;
 	int stride_rx;
@@ -536,11 +553,10 @@ struct nfp_net {
 	int txd_cnt;
 	int rxd_cnt;
 
-	u8 num_irqs;
-	u8 num_r_vecs;
-	struct nfp_net_r_vector r_vecs[NFP_NET_MAX_TX_RINGS];
-	struct msix_entry irq_entries[NFP_NET_NON_Q_VECTORS +
-				      NFP_NET_MAX_TX_RINGS];
+	unsigned int max_r_vecs;
+	unsigned int num_r_vecs;
+	struct nfp_net_r_vector r_vecs[NFP_NET_MAX_R_VECS];
+	struct msix_entry irq_entries[NFP_NET_MAX_IRQS];
 
 	irq_handler_t lsc_handler;
 	char lsc_name[IFNAMSIZ + 8];
@@ -580,6 +596,13 @@ struct nfp_net {
 	struct dentry *debugfs_dir;
 };
 
+struct nfp_net_ring_set {
+	unsigned int n_rings;
+	unsigned int mtu;
+	unsigned int dcnt;
+	void *rings;
+};
+
 /* Functions to read/write from/to a BAR
  * Performs any endian conversion necessary.
  */
@@ -593,16 +616,13 @@ static inline void nn_writeb(struct nfp_net *nn, int off, u8 val)
 	writeb(val, nn->ctrl_bar + off);
 }
 
-/* NFP-3200 can't handle 16-bit accesses too well */
 static inline u16 nn_readw(struct nfp_net *nn, int off)
 {
-	WARN_ON_ONCE(nn->is_nfp3200);
 	return readw(nn->ctrl_bar + off);
 }
 
 static inline void nn_writew(struct nfp_net *nn, int off, u16 val)
 {
-	WARN_ON_ONCE(nn->is_nfp3200);
 	writew(val, nn->ctrl_bar + off);
 }
 
@@ -650,7 +670,7 @@ static inline void nn_pci_flush(struct nfp_net *nn)
 #define NFP_QCP_QUEUE_STS_HI			0x000c
 #define NFP_QCP_QUEUE_STS_HI_WRITEPTR_mask	0x3ffff
 
-/* The offset of a QCP queues in the PCIe Target (same on NFP3200 and NFP6000 */
+/* The offset of a QCP queues in the PCIe Target */
 #define NFP_PCIE_QUEUE(_q) (0x80000 + (NFP_QCP_QUEUE_ADDR_SZ * ((_q) & 0xff)))
 
 /* nfp_qcp_ptr - Read or Write Pointer of a queue */
@@ -757,8 +777,9 @@ extern const char nfp_net_driver_version[];
 void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver,
 			    void __iomem *ctrl_bar);
 
-struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev,
-				     int max_tx_rings, int max_rx_rings);
+struct nfp_net *
+nfp_net_netdev_alloc(struct pci_dev *pdev,
+		     unsigned int max_tx_rings, unsigned int max_rx_rings);
 void nfp_net_netdev_free(struct nfp_net *nn);
 int nfp_net_netdev_init(struct net_device *netdev);
 void nfp_net_netdev_clean(struct net_device *netdev);
@@ -770,7 +791,9 @@ void nfp_net_rss_write_key(struct nfp_net *nn);
 void nfp_net_coalesce_write_cfg(struct nfp_net *nn);
 int nfp_net_irqs_alloc(struct nfp_net *nn);
 void nfp_net_irqs_disable(struct nfp_net *nn);
-int nfp_net_set_ring_size(struct nfp_net *nn, u32 rxd_cnt, u32 txd_cnt);
+int
+nfp_net_ring_reconfig(struct nfp_net *nn, struct bpf_prog **xdp_prog,
+		      struct nfp_net_ring_set *rx, struct nfp_net_ring_set *tx);
 
 #ifdef CONFIG_NFP_NET_DEBUG
 void nfp_net_debugfs_create(void);
@@ -796,8 +819,6 @@ static inline void nfp_net_debugfs_adapter_del(struct nfp_net *nn)
 #endif /* CONFIG_NFP_NET_DEBUG */
 
 void nfp_net_filter_stats_timer(unsigned long data);
-int
-nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto,
-		    struct tc_cls_bpf_offload *cls_bpf);
+int nfp_net_bpf_offload(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf);
 
 #endif /* _NFP_NET_H_ */
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index aee3fd2b6538..e8d448109e03 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -41,6 +41,7 @@
  *          Chris Telfer <chris.telfer@netronome.com>
  */
 
+#include <linux/bpf.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -50,6 +51,7 @@
 #include <linux/interrupt.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
+#include <linux/page_ref.h>
 #include <linux/pci.h>
 #include <linux/pci_regs.h>
 #include <linux/msi.h>
@@ -80,6 +82,22 @@ void nfp_net_get_fw_version(struct nfp_net_fw_version *fw_ver,
 	put_unaligned_le32(reg, fw_ver);
 }
 
+static dma_addr_t
+nfp_net_dma_map_rx(struct nfp_net *nn, void *frag, unsigned int bufsz,
+		   int direction)
+{
+	return dma_map_single(&nn->pdev->dev, frag + NFP_NET_RX_BUF_HEADROOM,
+			      bufsz - NFP_NET_RX_BUF_NON_DATA, direction);
+}
+
+static void
+nfp_net_dma_unmap_rx(struct nfp_net *nn, dma_addr_t dma_addr,
+		     unsigned int bufsz, int direction)
+{
+	dma_unmap_single(&nn->pdev->dev, dma_addr,
+			 bufsz - NFP_NET_RX_BUF_NON_DATA, direction);
+}
+
 /* Firmware reconfig
  *
  * Firmware reconfig may take a while so we have two versions of it -
@@ -249,43 +267,14 @@ int nfp_net_reconfig(struct nfp_net *nn, u32 update)
  */
 
 /**
- * nfp_net_irq_unmask_msix() - Unmask MSI-X after automasking
- * @nn:       NFP Network structure
- * @entry_nr: MSI-X table entry
- *
- * Clear the MSI-X table mask bit for the given entry bypassing Linux irq
- * handling subsystem.  Use *only* to reenable automasked vectors.
- */
-static void nfp_net_irq_unmask_msix(struct nfp_net *nn, unsigned int entry_nr)
-{
-	struct list_head *msi_head = &nn->pdev->dev.msi_list;
-	struct msi_desc *entry;
-	u32 off;
-
-	/* All MSI-Xs have the same mask_base */
-	entry = list_first_entry(msi_head, struct msi_desc, list);
-
-	off = (PCI_MSIX_ENTRY_SIZE * entry_nr) +
-		PCI_MSIX_ENTRY_VECTOR_CTRL;
-	writel(0, entry->mask_base + off);
-	readl(entry->mask_base);
-}
-
-/**
  * nfp_net_irq_unmask() - Unmask automasked interrupt
  * @nn:       NFP Network structure
  * @entry_nr: MSI-X table entry
  *
- * If MSI-X auto-masking is enabled clear the mask bit, otherwise
- * clear the ICR for the entry.
+ * Clear the ICR for the IRQ entry.
  */
 static void nfp_net_irq_unmask(struct nfp_net *nn, unsigned int entry_nr)
 {
-	if (nn->ctrl & NFP_NET_CFG_CTRL_MSIXAUTO) {
-		nfp_net_irq_unmask_msix(nn, entry_nr);
-		return;
-	}
-
 	nn_writeb(nn, NFP_NET_CFG_ICR(entry_nr), NFP_NET_CFG_ICR_UNMASKED);
 	nn_pci_flush(nn);
 }
@@ -320,28 +309,6 @@ static int nfp_net_msix_alloc(struct nfp_net *nn, int nr_vecs)
 }
 
 /**
- * nfp_net_irqs_wanted() - Work out how many interrupt vectors we want
- * @nn:       NFP Network structure
- *
- * We want a vector per CPU (or ring), whatever is smaller plus
- * NFP_NET_NON_Q_VECTORS for LSC etc.
- *
- * Return: Number of interrupts wanted
- */
-static int nfp_net_irqs_wanted(struct nfp_net *nn)
-{
-	int ncpus;
-	int vecs;
-
-	ncpus = num_online_cpus();
-
-	vecs = max_t(int, nn->num_tx_rings, nn->num_rx_rings);
-	vecs = min_t(int, vecs, ncpus);
-
-	return vecs + NFP_NET_NON_Q_VECTORS;
-}
-
-/**
  * nfp_net_irqs_alloc() - allocates MSI-X irqs
  * @nn:       NFP Network structure
  *
@@ -350,22 +317,24 @@ static int nfp_net_irqs_wanted(struct nfp_net *nn)
 int nfp_net_irqs_alloc(struct nfp_net *nn)
 {
 	int wanted_irqs;
+	unsigned int n;
 
-	wanted_irqs = nfp_net_irqs_wanted(nn);
+	wanted_irqs = nn->num_r_vecs + NFP_NET_NON_Q_VECTORS;
 
-	nn->num_irqs = nfp_net_msix_alloc(nn, wanted_irqs);
-	if (nn->num_irqs == 0) {
+	n = nfp_net_msix_alloc(nn, wanted_irqs);
+	if (n == 0) {
 		nn_err(nn, "Failed to allocate MSI-X IRQs\n");
 		return 0;
 	}
 
-	nn->num_r_vecs = nn->num_irqs - NFP_NET_NON_Q_VECTORS;
+	nn->max_r_vecs = n - NFP_NET_NON_Q_VECTORS;
+	nn->num_r_vecs = nn->max_r_vecs;
 
-	if (nn->num_irqs < wanted_irqs)
+	if (n < wanted_irqs)
 		nn_warn(nn, "Unable to allocate %d vectors. Got %d instead\n",
-			wanted_irqs, nn->num_irqs);
+			wanted_irqs, n);
 
-	return nn->num_irqs;
+	return n;
 }
 
 /**
@@ -515,18 +484,19 @@ static void nfp_net_irqs_assign(struct net_device *netdev)
 	struct nfp_net_r_vector *r_vec;
 	int r;
 
-	/* Assumes nn->num_tx_rings == nn->num_rx_rings */
-	if (nn->num_tx_rings > nn->num_r_vecs) {
-		nn_warn(nn, "More rings (%d) than vectors (%d).\n",
-			nn->num_tx_rings, nn->num_r_vecs);
-		nn->num_tx_rings = nn->num_r_vecs;
-		nn->num_rx_rings = nn->num_r_vecs;
-	}
+	if (nn->num_rx_rings > nn->num_r_vecs ||
+	    nn->num_tx_rings > nn->num_r_vecs)
+		nn_warn(nn, "More rings (%d,%d) than vectors (%d).\n",
+			nn->num_rx_rings, nn->num_tx_rings, nn->num_r_vecs);
+
+	nn->num_rx_rings = min(nn->num_r_vecs, nn->num_rx_rings);
+	nn->num_tx_rings = min(nn->num_r_vecs, nn->num_tx_rings);
+	nn->num_stack_tx_rings = nn->num_tx_rings;
 
 	nn->lsc_handler = nfp_net_irq_lsc;
 	nn->exn_handler = nfp_net_irq_exn;
 
-	for (r = 0; r < nn->num_r_vecs; r++) {
+	for (r = 0; r < nn->max_r_vecs; r++) {
 		r_vec = &nn->r_vecs[r];
 		r_vec->nfp_net = nn;
 		r_vec->handler = nfp_net_irq_rxtx;
@@ -605,7 +575,7 @@ static void nfp_net_aux_irq_free(struct nfp_net *nn, u32 ctrl_offset,
  *
  * Return: True if the ring is full.
  */
-static inline int nfp_net_tx_full(struct nfp_net_tx_ring *tx_ring, int dcnt)
+static int nfp_net_tx_full(struct nfp_net_tx_ring *tx_ring, int dcnt)
 {
 	return (tx_ring->wr_p - tx_ring->rd_p) >= (tx_ring->cnt - dcnt);
 }
@@ -745,6 +715,13 @@ static void nfp_net_tx_csum(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
 	u64_stats_update_end(&r_vec->tx_sync);
 }
 
+static void nfp_net_tx_xmit_more_flush(struct nfp_net_tx_ring *tx_ring)
+{
+	wmb();
+	nfp_qcp_wr_ptr_add(tx_ring->qcp_q, tx_ring->wr_ptr_add);
+	tx_ring->wr_ptr_add = 0;
+}
+
 /**
  * nfp_net_tx() - Main transmit entry point
  * @skb:    SKB to transmit
@@ -790,7 +767,7 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev)
 	if (dma_mapping_error(&nn->pdev->dev, dma_addr))
 		goto err_free;
 
-	wr_idx = tx_ring->wr_p % tx_ring->cnt;
+	wr_idx = tx_ring->wr_p & (tx_ring->cnt - 1);
 
 	/* Stash the soft descriptor of the head then initialize it */
 	txbuf = &tx_ring->txbufs[wr_idx];
@@ -834,7 +811,7 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev)
 			if (dma_mapping_error(&nn->pdev->dev, dma_addr))
 				goto err_unmap;
 
-			wr_idx = (wr_idx + 1) % tx_ring->cnt;
+			wr_idx = (wr_idx + 1) & (tx_ring->cnt - 1);
 			tx_ring->txbufs[wr_idx].skb = skb;
 			tx_ring->txbufs[wr_idx].dma_addr = dma_addr;
 			tx_ring->txbufs[wr_idx].fidx = f;
@@ -859,12 +836,8 @@ static int nfp_net_tx(struct sk_buff *skb, struct net_device *netdev)
 		nfp_net_tx_ring_stop(nd_q, tx_ring);
 
 	tx_ring->wr_ptr_add += nr_frags + 1;
-	if (!skb->xmit_more || netif_xmit_stopped(nd_q)) {
-		/* force memory write before we let HW know */
-		wmb();
-		nfp_qcp_wr_ptr_add(tx_ring->qcp_q, tx_ring->wr_ptr_add);
-		tx_ring->wr_ptr_add = 0;
-	}
+	if (!skb->xmit_more || netif_xmit_stopped(nd_q))
+		nfp_net_tx_xmit_more_flush(tx_ring);
 
 	skb_tx_timestamp(skb);
 
@@ -929,7 +902,7 @@ static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring)
 		todo = qcp_rd_p + tx_ring->cnt - tx_ring->qcp_rd_p;
 
 	while (todo--) {
-		idx = tx_ring->rd_p % tx_ring->cnt;
+		idx = tx_ring->rd_p & (tx_ring->cnt - 1);
 		tx_ring->rd_p++;
 
 		skb = tx_ring->txbufs[idx].skb;
@@ -986,6 +959,56 @@ static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring)
 		  tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt);
 }
 
+static void nfp_net_xdp_complete(struct nfp_net_tx_ring *tx_ring)
+{
+	struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
+	struct nfp_net *nn = r_vec->nfp_net;
+	u32 done_pkts = 0, done_bytes = 0;
+	int idx, todo;
+	u32 qcp_rd_p;
+
+	/* Work out how many descriptors have been transmitted */
+	qcp_rd_p = nfp_qcp_rd_ptr_read(tx_ring->qcp_q);
+
+	if (qcp_rd_p == tx_ring->qcp_rd_p)
+		return;
+
+	if (qcp_rd_p > tx_ring->qcp_rd_p)
+		todo = qcp_rd_p - tx_ring->qcp_rd_p;
+	else
+		todo = qcp_rd_p + tx_ring->cnt - tx_ring->qcp_rd_p;
+
+	while (todo--) {
+		idx = tx_ring->rd_p & (tx_ring->cnt - 1);
+		tx_ring->rd_p++;
+
+		if (!tx_ring->txbufs[idx].frag)
+			continue;
+
+		nfp_net_dma_unmap_rx(nn, tx_ring->txbufs[idx].dma_addr,
+				     nn->fl_bufsz, DMA_BIDIRECTIONAL);
+		__free_page(virt_to_page(tx_ring->txbufs[idx].frag));
+
+		done_pkts++;
+		done_bytes += tx_ring->txbufs[idx].real_len;
+
+		tx_ring->txbufs[idx].dma_addr = 0;
+		tx_ring->txbufs[idx].frag = NULL;
+		tx_ring->txbufs[idx].fidx = -2;
+	}
+
+	tx_ring->qcp_rd_p = qcp_rd_p;
+
+	u64_stats_update_begin(&r_vec->tx_sync);
+	r_vec->tx_bytes += done_bytes;
+	r_vec->tx_pkts += done_pkts;
+	u64_stats_update_end(&r_vec->tx_sync);
+
+	WARN_ONCE(tx_ring->wr_p - tx_ring->rd_p > tx_ring->cnt,
+		  "TX ring corruption rd_p=%u wr_p=%u cnt=%u\n",
+		  tx_ring->rd_p, tx_ring->wr_p, tx_ring->cnt);
+}
+
 /**
  * nfp_net_tx_ring_reset() - Free any untransmitted buffers and reset pointers
  * @nn:		NFP Net device
@@ -996,39 +1019,47 @@ static void nfp_net_tx_complete(struct nfp_net_tx_ring *tx_ring)
 static void
 nfp_net_tx_ring_reset(struct nfp_net *nn, struct nfp_net_tx_ring *tx_ring)
 {
+	struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
 	const struct skb_frag_struct *frag;
-	struct netdev_queue *nd_q;
 	struct pci_dev *pdev = nn->pdev;
+	struct netdev_queue *nd_q;
 
 	while (tx_ring->rd_p != tx_ring->wr_p) {
-		int nr_frags, fidx, idx;
-		struct sk_buff *skb;
+		struct nfp_net_tx_buf *tx_buf;
+		int idx;
 
-		idx = tx_ring->rd_p % tx_ring->cnt;
-		skb = tx_ring->txbufs[idx].skb;
-		nr_frags = skb_shinfo(skb)->nr_frags;
-		fidx = tx_ring->txbufs[idx].fidx;
+		idx = tx_ring->rd_p & (tx_ring->cnt - 1);
+		tx_buf = &tx_ring->txbufs[idx];
 
-		if (fidx == -1) {
-			/* unmap head */
-			dma_unmap_single(&pdev->dev,
-					 tx_ring->txbufs[idx].dma_addr,
-					 skb_headlen(skb), DMA_TO_DEVICE);
+		if (tx_ring == r_vec->xdp_ring) {
+			nfp_net_dma_unmap_rx(nn, tx_buf->dma_addr,
+					     nn->fl_bufsz, DMA_BIDIRECTIONAL);
+			__free_page(virt_to_page(tx_ring->txbufs[idx].frag));
 		} else {
-			/* unmap fragment */
-			frag = &skb_shinfo(skb)->frags[fidx];
-			dma_unmap_page(&pdev->dev,
-				       tx_ring->txbufs[idx].dma_addr,
-				       skb_frag_size(frag), DMA_TO_DEVICE);
-		}
+			struct sk_buff *skb = tx_ring->txbufs[idx].skb;
+			int nr_frags = skb_shinfo(skb)->nr_frags;
+
+			if (tx_buf->fidx == -1) {
+				/* unmap head */
+				dma_unmap_single(&pdev->dev, tx_buf->dma_addr,
+						 skb_headlen(skb),
+						 DMA_TO_DEVICE);
+			} else {
+				/* unmap fragment */
+				frag = &skb_shinfo(skb)->frags[tx_buf->fidx];
+				dma_unmap_page(&pdev->dev, tx_buf->dma_addr,
+					       skb_frag_size(frag),
+					       DMA_TO_DEVICE);
+			}
 
-		/* check for last gather fragment */
-		if (fidx == nr_frags - 1)
-			dev_kfree_skb_any(skb);
+			/* check for last gather fragment */
+			if (tx_buf->fidx == nr_frags - 1)
+				dev_kfree_skb_any(skb);
+		}
 
-		tx_ring->txbufs[idx].dma_addr = 0;
-		tx_ring->txbufs[idx].skb = NULL;
-		tx_ring->txbufs[idx].fidx = -2;
+		tx_buf->dma_addr = 0;
+		tx_buf->skb = NULL;
+		tx_buf->fidx = -2;
 
 		tx_ring->qcp_rd_p++;
 		tx_ring->rd_p++;
@@ -1040,6 +1071,9 @@ nfp_net_tx_ring_reset(struct nfp_net *nn, struct nfp_net_tx_ring *tx_ring)
 	tx_ring->qcp_rd_p = 0;
 	tx_ring->wr_ptr_add = 0;
 
+	if (tx_ring == r_vec->xdp_ring)
+		return;
+
 	nd_q = netdev_get_tx_queue(nn->netdev, tx_ring->idx);
 	netdev_tx_reset_queue(nd_q);
 }
@@ -1049,7 +1083,7 @@ static void nfp_net_tx_timeout(struct net_device *netdev)
 	struct nfp_net *nn = netdev_priv(netdev);
 	int i;
 
-	for (i = 0; i < nn->num_tx_rings; i++) {
+	for (i = 0; i < nn->netdev->real_num_tx_queues; i++) {
 		if (!netif_tx_queue_stopped(netdev_get_tx_queue(netdev, i)))
 			continue;
 		nn_warn(nn, "TX timeout on ring: %d\n", i);
@@ -1059,69 +1093,112 @@ static void nfp_net_tx_timeout(struct net_device *netdev)
 
 /* Receive processing
  */
+static unsigned int
+nfp_net_calc_fl_bufsz(struct nfp_net *nn, unsigned int mtu)
+{
+	unsigned int fl_bufsz;
 
-/**
- * nfp_net_rx_space() - return the number of free slots on the RX ring
- * @rx_ring:   RX ring structure
- *
- * Make sure we leave at least one slot free.
- *
- * Return: True if there is space on the RX ring
- */
-static inline int nfp_net_rx_space(struct nfp_net_rx_ring *rx_ring)
+	fl_bufsz = NFP_NET_RX_BUF_HEADROOM;
+	if (nn->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
+		fl_bufsz += NFP_NET_MAX_PREPEND;
+	else
+		fl_bufsz += nn->rx_offset;
+	fl_bufsz += ETH_HLEN + VLAN_HLEN * 2 + mtu;
+
+	fl_bufsz = SKB_DATA_ALIGN(fl_bufsz);
+	fl_bufsz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
+	return fl_bufsz;
+}
+
+static void
+nfp_net_free_frag(void *frag, bool xdp)
 {
-	return (rx_ring->cnt - 1) - (rx_ring->wr_p - rx_ring->rd_p);
+	if (!xdp)
+		skb_free_frag(frag);
+	else
+		__free_page(virt_to_page(frag));
 }
 
 /**
- * nfp_net_rx_alloc_one() - Allocate and map skb for RX
+ * nfp_net_rx_alloc_one() - Allocate and map page frag for RX
  * @rx_ring:	RX ring structure of the skb
  * @dma_addr:	Pointer to storage for DMA address (output param)
  * @fl_bufsz:	size of freelist buffers
+ * @xdp:	Whether XDP is enabled
  *
- * This function will allcate a new skb, map it for DMA.
+ * This function will allcate a new page frag, map it for DMA.
  *
- * Return: allocated skb or NULL on failure.
+ * Return: allocated page frag or NULL on failure.
  */
-static struct sk_buff *
+static void *
 nfp_net_rx_alloc_one(struct nfp_net_rx_ring *rx_ring, dma_addr_t *dma_addr,
-		     unsigned int fl_bufsz)
+		     unsigned int fl_bufsz, bool xdp)
 {
 	struct nfp_net *nn = rx_ring->r_vec->nfp_net;
-	struct sk_buff *skb;
+	int direction;
+	void *frag;
 
-	skb = netdev_alloc_skb(nn->netdev, fl_bufsz);
-	if (!skb) {
-		nn_warn_ratelimit(nn, "Failed to alloc receive SKB\n");
+	if (!xdp)
+		frag = netdev_alloc_frag(fl_bufsz);
+	else
+		frag = page_address(alloc_page(GFP_KERNEL | __GFP_COLD));
+	if (!frag) {
+		nn_warn_ratelimit(nn, "Failed to alloc receive page frag\n");
 		return NULL;
 	}
 
-	*dma_addr = dma_map_single(&nn->pdev->dev, skb->data,
-				   fl_bufsz, DMA_FROM_DEVICE);
+	direction = xdp ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
+
+	*dma_addr = nfp_net_dma_map_rx(nn, frag, fl_bufsz, direction);
 	if (dma_mapping_error(&nn->pdev->dev, *dma_addr)) {
-		dev_kfree_skb_any(skb);
+		nfp_net_free_frag(frag, xdp);
 		nn_warn_ratelimit(nn, "Failed to map DMA RX buffer\n");
 		return NULL;
 	}
 
-	return skb;
+	return frag;
+}
+
+static void *
+nfp_net_napi_alloc_one(struct nfp_net *nn, int direction, dma_addr_t *dma_addr)
+{
+	void *frag;
+
+	if (!nn->xdp_prog)
+		frag = napi_alloc_frag(nn->fl_bufsz);
+	else
+		frag = page_address(alloc_page(GFP_ATOMIC | __GFP_COLD));
+	if (!frag) {
+		nn_warn_ratelimit(nn, "Failed to alloc receive page frag\n");
+		return NULL;
+	}
+
+	*dma_addr = nfp_net_dma_map_rx(nn, frag, nn->fl_bufsz, direction);
+	if (dma_mapping_error(&nn->pdev->dev, *dma_addr)) {
+		nfp_net_free_frag(frag, nn->xdp_prog);
+		nn_warn_ratelimit(nn, "Failed to map DMA RX buffer\n");
+		return NULL;
+	}
+
+	return frag;
 }
 
 /**
  * nfp_net_rx_give_one() - Put mapped skb on the software and hardware rings
  * @rx_ring:	RX ring structure
- * @skb:	Skb to put on rings
+ * @frag:	page fragment buffer
  * @dma_addr:	DMA address of skb mapping
  */
 static void nfp_net_rx_give_one(struct nfp_net_rx_ring *rx_ring,
-				struct sk_buff *skb, dma_addr_t dma_addr)
+				void *frag, dma_addr_t dma_addr)
 {
 	unsigned int wr_idx;
 
-	wr_idx = rx_ring->wr_p % rx_ring->cnt;
+	wr_idx = rx_ring->wr_p & (rx_ring->cnt - 1);
 
 	/* Stash SKB and DMA address away */
-	rx_ring->rxbufs[wr_idx].skb = skb;
+	rx_ring->rxbufs[wr_idx].frag = frag;
 	rx_ring->rxbufs[wr_idx].dma_addr = dma_addr;
 
 	/* Fill freelist descriptor */
@@ -1153,12 +1230,12 @@ static void nfp_net_rx_ring_reset(struct nfp_net_rx_ring *rx_ring)
 	unsigned int wr_idx, last_idx;
 
 	/* Move the empty entry to the end of the list */
-	wr_idx = rx_ring->wr_p % rx_ring->cnt;
+	wr_idx = rx_ring->wr_p & (rx_ring->cnt - 1);
 	last_idx = rx_ring->cnt - 1;
 	rx_ring->rxbufs[wr_idx].dma_addr = rx_ring->rxbufs[last_idx].dma_addr;
-	rx_ring->rxbufs[wr_idx].skb = rx_ring->rxbufs[last_idx].skb;
+	rx_ring->rxbufs[wr_idx].frag = rx_ring->rxbufs[last_idx].frag;
 	rx_ring->rxbufs[last_idx].dma_addr = 0;
-	rx_ring->rxbufs[last_idx].skb = NULL;
+	rx_ring->rxbufs[last_idx].frag = NULL;
 
 	memset(rx_ring->rxds, 0, sizeof(*rx_ring->rxds) * rx_ring->cnt);
 	rx_ring->wr_p = 0;
@@ -1170,15 +1247,17 @@ static void nfp_net_rx_ring_reset(struct nfp_net_rx_ring *rx_ring)
  * nfp_net_rx_ring_bufs_free() - Free any buffers currently on the RX ring
  * @nn:		NFP Net device
  * @rx_ring:	RX ring to remove buffers from
+ * @xdp:	Whether XDP is enabled
  *
  * Assumes that the device is stopped and buffers are in [0, ring->cnt - 1)
  * entries.  After device is disabled nfp_net_rx_ring_reset() must be called
  * to restore required ring geometry.
  */
 static void
-nfp_net_rx_ring_bufs_free(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring)
+nfp_net_rx_ring_bufs_free(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring,
+			  bool xdp)
 {
-	struct pci_dev *pdev = nn->pdev;
+	int direction = xdp ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
 	unsigned int i;
 
 	for (i = 0; i < rx_ring->cnt - 1; i++) {
@@ -1186,14 +1265,14 @@ nfp_net_rx_ring_bufs_free(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring)
 		 * fails to allocate enough buffers and calls here to free
 		 * already allocated ones.
 		 */
-		if (!rx_ring->rxbufs[i].skb)
+		if (!rx_ring->rxbufs[i].frag)
 			continue;
 
-		dma_unmap_single(&pdev->dev, rx_ring->rxbufs[i].dma_addr,
-				 rx_ring->bufsz, DMA_FROM_DEVICE);
-		dev_kfree_skb_any(rx_ring->rxbufs[i].skb);
+		nfp_net_dma_unmap_rx(nn, rx_ring->rxbufs[i].dma_addr,
+				     rx_ring->bufsz, direction);
+		nfp_net_free_frag(rx_ring->rxbufs[i].frag, xdp);
 		rx_ring->rxbufs[i].dma_addr = 0;
-		rx_ring->rxbufs[i].skb = NULL;
+		rx_ring->rxbufs[i].frag = NULL;
 	}
 }
 
@@ -1201,9 +1280,11 @@ nfp_net_rx_ring_bufs_free(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring)
  * nfp_net_rx_ring_bufs_alloc() - Fill RX ring with buffers (don't give to FW)
  * @nn:		NFP Net device
  * @rx_ring:	RX ring to remove buffers from
+ * @xdp:	Whether XDP is enabled
  */
 static int
-nfp_net_rx_ring_bufs_alloc(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring)
+nfp_net_rx_ring_bufs_alloc(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring,
+			   bool xdp)
 {
 	struct nfp_net_rx_buf *rxbufs;
 	unsigned int i;
@@ -1211,11 +1292,11 @@ nfp_net_rx_ring_bufs_alloc(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring)
 	rxbufs = rx_ring->rxbufs;
 
 	for (i = 0; i < rx_ring->cnt - 1; i++) {
-		rxbufs[i].skb =
+		rxbufs[i].frag =
 			nfp_net_rx_alloc_one(rx_ring, &rxbufs[i].dma_addr,
-					     rx_ring->bufsz);
-		if (!rxbufs[i].skb) {
-			nfp_net_rx_ring_bufs_free(nn, rx_ring);
+					     rx_ring->bufsz, xdp);
+		if (!rxbufs[i].frag) {
+			nfp_net_rx_ring_bufs_free(nn, rx_ring, xdp);
 			return -ENOMEM;
 		}
 	}
@@ -1232,7 +1313,7 @@ static void nfp_net_rx_ring_fill_freelist(struct nfp_net_rx_ring *rx_ring)
 	unsigned int i;
 
 	for (i = 0; i < rx_ring->cnt - 1; i++)
-		nfp_net_rx_give_one(rx_ring, rx_ring->rxbufs[i].skb,
+		nfp_net_rx_give_one(rx_ring, rx_ring->rxbufs[i].frag,
 				    rx_ring->rxbufs[i].dma_addr);
 }
 
@@ -1359,6 +1440,87 @@ nfp_net_parse_meta(struct net_device *netdev, struct sk_buff *skb,
 	return data;
 }
 
+static void
+nfp_net_rx_drop(struct nfp_net_r_vector *r_vec, struct nfp_net_rx_ring *rx_ring,
+		struct nfp_net_rx_buf *rxbuf, struct sk_buff *skb)
+{
+	u64_stats_update_begin(&r_vec->rx_sync);
+	r_vec->rx_drops++;
+	u64_stats_update_end(&r_vec->rx_sync);
+
+	/* skb is build based on the frag, free_skb() would free the frag
+	 * so to be able to reuse it we need an extra ref.
+	 */
+	if (skb && rxbuf && skb->head == rxbuf->frag)
+		page_ref_inc(virt_to_head_page(rxbuf->frag));
+	if (rxbuf)
+		nfp_net_rx_give_one(rx_ring, rxbuf->frag, rxbuf->dma_addr);
+	if (skb)
+		dev_kfree_skb_any(skb);
+}
+
+static void
+nfp_net_tx_xdp_buf(struct nfp_net *nn, struct nfp_net_rx_ring *rx_ring,
+		   struct nfp_net_tx_ring *tx_ring,
+		   struct nfp_net_rx_buf *rxbuf, unsigned int pkt_off,
+		   unsigned int pkt_len)
+{
+	struct nfp_net_tx_buf *txbuf;
+	struct nfp_net_tx_desc *txd;
+	dma_addr_t new_dma_addr;
+	void *new_frag;
+	int wr_idx;
+
+	if (unlikely(nfp_net_tx_full(tx_ring, 1))) {
+		nfp_net_rx_drop(rx_ring->r_vec, rx_ring, rxbuf, NULL);
+		return;
+	}
+
+	new_frag = nfp_net_napi_alloc_one(nn, DMA_BIDIRECTIONAL, &new_dma_addr);
+	if (unlikely(!new_frag)) {
+		nfp_net_rx_drop(rx_ring->r_vec, rx_ring, rxbuf, NULL);
+		return;
+	}
+	nfp_net_rx_give_one(rx_ring, new_frag, new_dma_addr);
+
+	wr_idx = tx_ring->wr_p & (tx_ring->cnt - 1);
+
+	/* Stash the soft descriptor of the head then initialize it */
+	txbuf = &tx_ring->txbufs[wr_idx];
+	txbuf->frag = rxbuf->frag;
+	txbuf->dma_addr = rxbuf->dma_addr;
+	txbuf->fidx = -1;
+	txbuf->pkt_cnt = 1;
+	txbuf->real_len = pkt_len;
+
+	dma_sync_single_for_device(&nn->pdev->dev, rxbuf->dma_addr + pkt_off,
+				   pkt_len, DMA_TO_DEVICE);
+
+	/* Build TX descriptor */
+	txd = &tx_ring->txds[wr_idx];
+	txd->offset_eop = PCIE_DESC_TX_EOP;
+	txd->dma_len = cpu_to_le16(pkt_len);
+	nfp_desc_set_dma_addr(txd, rxbuf->dma_addr + pkt_off);
+	txd->data_len = cpu_to_le16(pkt_len);
+
+	txd->flags = 0;
+	txd->mss = 0;
+	txd->l4_offset = 0;
+
+	tx_ring->wr_p++;
+	tx_ring->wr_ptr_add++;
+}
+
+static int nfp_net_run_xdp(struct bpf_prog *prog, void *data, unsigned int len)
+{
+	struct xdp_buff xdp;
+
+	xdp.data = data;
+	xdp.data_end = data + len;
+
+	return bpf_prog_run_xdp(prog, &xdp);
+}
+
 /**
  * nfp_net_rx() - receive up to @budget packets on @rx_ring
  * @rx_ring:   RX ring to receive from
@@ -1368,62 +1530,39 @@ nfp_net_parse_meta(struct net_device *netdev, struct sk_buff *skb,
  * more cleanly separate packet receive code from other bookkeeping
  * functions performed in the napi poll function.
  *
- * There are differences between the NFP-3200 firmware and the
- * NFP-6000 firmware.  The NFP-3200 firmware uses a dedicated RX queue
- * to indicate that new packets have arrived.  The NFP-6000 does not
- * have this queue and uses the DD bit in the RX descriptor. This
- * method cannot be used on the NFP-3200 as it causes a race
- * condition: The RX ring write pointer on the NFP-3200 is updated
- * after packets (and descriptors) have been DMAed.  If the DD bit is
- * used and subsequently the read pointer is updated this may lead to
- * the RX queue to underflow (if the firmware has not yet update the
- * write pointer).  Therefore we use slightly ugly conditional code
- * below to handle the differences.  We may, in the future update the
- * NFP-3200 firmware to behave the same as the firmware on the
- * NFP-6000.
- *
  * Return: Number of packets received.
  */
 static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 {
 	struct nfp_net_r_vector *r_vec = rx_ring->r_vec;
 	struct nfp_net *nn = r_vec->nfp_net;
-	unsigned int data_len, meta_len;
-	int avail = 0, pkts_polled = 0;
-	struct sk_buff *skb, *new_skb;
-	struct nfp_net_rx_desc *rxd;
-	dma_addr_t new_dma_addr;
-	u32 qcp_wr_p;
+	struct nfp_net_tx_ring *tx_ring;
+	struct bpf_prog *xdp_prog;
+	unsigned int true_bufsz;
+	struct sk_buff *skb;
+	int pkts_polled = 0;
+	int rx_dma_map_dir;
 	int idx;
 
-	if (nn->is_nfp3200) {
-		/* Work out how many packets arrived */
-		qcp_wr_p = nfp_qcp_wr_ptr_read(rx_ring->qcp_rx);
-		idx = rx_ring->rd_p % rx_ring->cnt;
+	rcu_read_lock();
+	xdp_prog = READ_ONCE(nn->xdp_prog);
+	rx_dma_map_dir = xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
+	true_bufsz = xdp_prog ? PAGE_SIZE : nn->fl_bufsz;
+	tx_ring = r_vec->xdp_ring;
 
-		if (qcp_wr_p == idx)
-			/* No new packets */
-			return 0;
-
-		if (qcp_wr_p > idx)
-			avail = qcp_wr_p - idx;
-		else
-			avail = qcp_wr_p + rx_ring->cnt - idx;
-	} else {
-		avail = budget + 1;
-	}
+	while (pkts_polled < budget) {
+		unsigned int meta_len, data_len, data_off, pkt_len, pkt_off;
+		struct nfp_net_rx_buf *rxbuf;
+		struct nfp_net_rx_desc *rxd;
+		dma_addr_t new_dma_addr;
+		void *new_frag;
 
-	while (avail > 0 && pkts_polled < budget) {
-		idx = rx_ring->rd_p % rx_ring->cnt;
+		idx = rx_ring->rd_p & (rx_ring->cnt - 1);
 
 		rxd = &rx_ring->rxds[idx];
-		if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD)) {
-			if (nn->is_nfp3200)
-				nn_dbg(nn, "RX descriptor not valid (DD)%d:%u rxd[0]=%#x rxd[1]=%#x\n",
-				       rx_ring->idx, idx,
-				       rxd->vals[0], rxd->vals[1]);
+		if (!(rxd->rxd.meta_len_dd & PCIE_DESC_RX_DD))
 			break;
-		}
+
 		/* Memory barrier to ensure that we won't do other reads
 		 * before the DD bit.
 		 */
@@ -1431,27 +1570,8 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 
 		rx_ring->rd_p++;
 		pkts_polled++;
-		avail--;
-
-		skb = rx_ring->rxbufs[idx].skb;
-
-		new_skb = nfp_net_rx_alloc_one(rx_ring, &new_dma_addr,
-					       nn->fl_bufsz);
-		if (!new_skb) {
-			nfp_net_rx_give_one(rx_ring, rx_ring->rxbufs[idx].skb,
-					    rx_ring->rxbufs[idx].dma_addr);
-			u64_stats_update_begin(&r_vec->rx_sync);
-			r_vec->rx_drops++;
-			u64_stats_update_end(&r_vec->rx_sync);
-			continue;
-		}
-
-		dma_unmap_single(&nn->pdev->dev,
-				 rx_ring->rxbufs[idx].dma_addr,
-				 nn->fl_bufsz, DMA_FROM_DEVICE);
-
-		nfp_net_rx_give_one(rx_ring, new_skb, new_dma_addr);
 
+		rxbuf =	&rx_ring->rxbufs[idx];
 		/*         < meta_len >
 		 *  <-- [rx_offset] -->
 		 *  ---------------------------------------------------------
@@ -1466,19 +1586,66 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 		 */
 		meta_len = rxd->rxd.meta_len_dd & PCIE_DESC_RX_META_LEN_MASK;
 		data_len = le16_to_cpu(rxd->rxd.data_len);
+		pkt_len = data_len - meta_len;
 
 		if (nn->rx_offset == NFP_NET_CFG_RX_OFFSET_DYNAMIC)
-			skb_reserve(skb, meta_len);
+			pkt_off = meta_len;
 		else
-			skb_reserve(skb, nn->rx_offset);
-		skb_put(skb, data_len - meta_len);
+			pkt_off = nn->rx_offset;
+		data_off = NFP_NET_RX_BUF_HEADROOM + pkt_off;
 
 		/* Stats update */
 		u64_stats_update_begin(&r_vec->rx_sync);
 		r_vec->rx_pkts++;
-		r_vec->rx_bytes += skb->len;
+		r_vec->rx_bytes += pkt_len;
 		u64_stats_update_end(&r_vec->rx_sync);
 
+		if (xdp_prog && !(rxd->rxd.flags & PCIE_DESC_RX_BPF &&
+				  nn->bpf_offload_xdp)) {
+			int act;
+
+			dma_sync_single_for_cpu(&nn->pdev->dev,
+						rxbuf->dma_addr + pkt_off,
+						pkt_len, DMA_FROM_DEVICE);
+			act = nfp_net_run_xdp(xdp_prog, rxbuf->frag + data_off,
+					      pkt_len);
+			switch (act) {
+			case XDP_PASS:
+				break;
+			case XDP_TX:
+				nfp_net_tx_xdp_buf(nn, rx_ring, tx_ring, rxbuf,
+						   pkt_off, pkt_len);
+				continue;
+			default:
+				bpf_warn_invalid_xdp_action(act);
+			case XDP_ABORTED:
+			case XDP_DROP:
+				nfp_net_rx_give_one(rx_ring, rxbuf->frag,
+						    rxbuf->dma_addr);
+				continue;
+			}
+		}
+
+		skb = build_skb(rxbuf->frag, true_bufsz);
+		if (unlikely(!skb)) {
+			nfp_net_rx_drop(r_vec, rx_ring, rxbuf, NULL);
+			continue;
+		}
+		new_frag = nfp_net_napi_alloc_one(nn, rx_dma_map_dir,
+						  &new_dma_addr);
+		if (unlikely(!new_frag)) {
+			nfp_net_rx_drop(r_vec, rx_ring, rxbuf, skb);
+			continue;
+		}
+
+		nfp_net_dma_unmap_rx(nn, rxbuf->dma_addr, nn->fl_bufsz,
+				     rx_dma_map_dir);
+
+		nfp_net_rx_give_one(rx_ring, new_frag, new_dma_addr);
+
+		skb_reserve(skb, data_off);
+		skb_put(skb, pkt_len);
+
 		if (nn->fw_ver.major <= 3) {
 			nfp_net_set_hash_desc(nn->netdev, skb, rxd);
 		} else if (meta_len) {
@@ -1486,12 +1653,8 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 
 			end = nfp_net_parse_meta(nn->netdev, skb, meta_len);
 			if (unlikely(end != skb->data)) {
-				u64_stats_update_begin(&r_vec->rx_sync);
-				r_vec->rx_drops++;
-				u64_stats_update_end(&r_vec->rx_sync);
-
-				dev_kfree_skb_any(skb);
 				nn_warn_ratelimit(nn, "invalid RX packet metadata\n");
+				nfp_net_rx_drop(r_vec, rx_ring, NULL, skb);
 				continue;
 			}
 		}
@@ -1508,8 +1671,9 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 		napi_gro_receive(&rx_ring->r_vec->napi, skb);
 	}
 
-	if (nn->is_nfp3200)
-		nfp_qcp_rd_ptr_add(rx_ring->qcp_rx, pkts_polled);
+	if (xdp_prog && tx_ring->wr_ptr_add)
+		nfp_net_tx_xmit_more_flush(tx_ring);
+	rcu_read_unlock();
 
 	return pkts_polled;
 }
@@ -1525,21 +1689,19 @@ static int nfp_net_poll(struct napi_struct *napi, int budget)
 {
 	struct nfp_net_r_vector *r_vec =
 		container_of(napi, struct nfp_net_r_vector, napi);
-	struct nfp_net_rx_ring *rx_ring = r_vec->rx_ring;
-	struct nfp_net_tx_ring *tx_ring = r_vec->tx_ring;
-	struct nfp_net *nn = r_vec->nfp_net;
-	struct netdev_queue *txq;
-	unsigned int pkts_polled;
-
-	tx_ring = &nn->tx_rings[rx_ring->idx];
-	txq = netdev_get_tx_queue(nn->netdev, tx_ring->idx);
-	nfp_net_tx_complete(tx_ring);
+	unsigned int pkts_polled = 0;
 
-	pkts_polled = nfp_net_rx(rx_ring, budget);
+	if (r_vec->tx_ring)
+		nfp_net_tx_complete(r_vec->tx_ring);
+	if (r_vec->rx_ring) {
+		pkts_polled = nfp_net_rx(r_vec->rx_ring, budget);
+		if (r_vec->xdp_ring)
+			nfp_net_xdp_complete(r_vec->xdp_ring);
+	}
 
 	if (pkts_polled < budget) {
 		napi_complete_done(napi, pkts_polled);
-		nfp_net_irq_unmask(nn, r_vec->irq_idx);
+		nfp_net_irq_unmask(r_vec->nfp_net, r_vec->irq_idx);
 	}
 
 	return pkts_polled;
@@ -1575,10 +1737,12 @@ static void nfp_net_tx_ring_free(struct nfp_net_tx_ring *tx_ring)
  * nfp_net_tx_ring_alloc() - Allocate resource for a TX ring
  * @tx_ring:   TX Ring structure to allocate
  * @cnt:       Ring buffer count
+ * @is_xdp:    True if ring will be used for XDP
  *
  * Return: 0 on success, negative errno otherwise.
  */
-static int nfp_net_tx_ring_alloc(struct nfp_net_tx_ring *tx_ring, u32 cnt)
+static int
+nfp_net_tx_ring_alloc(struct nfp_net_tx_ring *tx_ring, u32 cnt, bool is_xdp)
 {
 	struct nfp_net_r_vector *r_vec = tx_ring->r_vec;
 	struct nfp_net *nn = r_vec->nfp_net;
@@ -1598,11 +1762,14 @@ static int nfp_net_tx_ring_alloc(struct nfp_net_tx_ring *tx_ring, u32 cnt)
 	if (!tx_ring->txbufs)
 		goto err_alloc;
 
-	netif_set_xps_queue(nn->netdev, &r_vec->affinity_mask, tx_ring->idx);
+	if (!is_xdp)
+		netif_set_xps_queue(nn->netdev, &r_vec->affinity_mask,
+				    tx_ring->idx);
 
-	nn_dbg(nn, "TxQ%02d: QCidx=%02d cnt=%d dma=%#llx host=%p\n",
+	nn_dbg(nn, "TxQ%02d: QCidx=%02d cnt=%d dma=%#llx host=%p %s\n",
 	       tx_ring->idx, tx_ring->qcidx,
-	       tx_ring->cnt, (unsigned long long)tx_ring->dma, tx_ring->txds);
+	       tx_ring->cnt, (unsigned long long)tx_ring->dma, tx_ring->txds,
+	       is_xdp ? "XDP" : "");
 
 	return 0;
 
@@ -1612,23 +1779,29 @@ err_alloc:
 }
 
 static struct nfp_net_tx_ring *
-nfp_net_shadow_tx_rings_prepare(struct nfp_net *nn, u32 buf_cnt)
+nfp_net_tx_ring_set_prepare(struct nfp_net *nn, struct nfp_net_ring_set *s,
+			    unsigned int num_stack_tx_rings)
 {
 	struct nfp_net_tx_ring *rings;
 	unsigned int r;
 
-	rings = kcalloc(nn->num_tx_rings, sizeof(*rings), GFP_KERNEL);
+	rings = kcalloc(s->n_rings, sizeof(*rings), GFP_KERNEL);
 	if (!rings)
 		return NULL;
 
-	for (r = 0; r < nn->num_tx_rings; r++) {
-		nfp_net_tx_ring_init(&rings[r], nn->tx_rings[r].r_vec, r);
+	for (r = 0; r < s->n_rings; r++) {
+		int bias = 0;
+
+		if (r >= num_stack_tx_rings)
+			bias = num_stack_tx_rings;
 
-		if (nfp_net_tx_ring_alloc(&rings[r], buf_cnt))
+		nfp_net_tx_ring_init(&rings[r], &nn->r_vecs[r - bias], r);
+
+		if (nfp_net_tx_ring_alloc(&rings[r], s->dcnt, bias))
 			goto err_free_prev;
 	}
 
-	return rings;
+	return s->rings = rings;
 
 err_free_prev:
 	while (r--)
@@ -1637,28 +1810,27 @@ err_free_prev:
 	return NULL;
 }
 
-static struct nfp_net_tx_ring *
-nfp_net_shadow_tx_rings_swap(struct nfp_net *nn, struct nfp_net_tx_ring *rings)
+static void
+nfp_net_tx_ring_set_swap(struct nfp_net *nn, struct nfp_net_ring_set *s)
 {
-	struct nfp_net_tx_ring *old = nn->tx_rings;
-	unsigned int r;
+	struct nfp_net_ring_set new = *s;
 
-	for (r = 0; r < nn->num_tx_rings; r++)
-		old[r].r_vec->tx_ring = &rings[r];
+	s->dcnt = nn->txd_cnt;
+	s->rings = nn->tx_rings;
+	s->n_rings = nn->num_tx_rings;
 
-	nn->tx_rings = rings;
-	return old;
+	nn->txd_cnt = new.dcnt;
+	nn->tx_rings = new.rings;
+	nn->num_tx_rings = new.n_rings;
 }
 
 static void
-nfp_net_shadow_tx_rings_free(struct nfp_net *nn, struct nfp_net_tx_ring *rings)
+nfp_net_tx_ring_set_free(struct nfp_net *nn, struct nfp_net_ring_set *s)
 {
+	struct nfp_net_tx_ring *rings = s->rings;
 	unsigned int r;
 
-	if (!rings)
-		return;
-
-	for (r = 0; r < nn->num_tx_rings; r++)
+	for (r = 0; r < s->n_rings; r++)
 		nfp_net_tx_ring_free(&rings[r]);
 
 	kfree(rings);
@@ -1730,31 +1902,32 @@ err_alloc:
 }
 
 static struct nfp_net_rx_ring *
-nfp_net_shadow_rx_rings_prepare(struct nfp_net *nn, unsigned int fl_bufsz,
-				u32 buf_cnt)
+nfp_net_rx_ring_set_prepare(struct nfp_net *nn, struct nfp_net_ring_set *s,
+			    bool xdp)
 {
+	unsigned int fl_bufsz =	nfp_net_calc_fl_bufsz(nn, s->mtu);
 	struct nfp_net_rx_ring *rings;
 	unsigned int r;
 
-	rings = kcalloc(nn->num_rx_rings, sizeof(*rings), GFP_KERNEL);
+	rings = kcalloc(s->n_rings, sizeof(*rings), GFP_KERNEL);
 	if (!rings)
 		return NULL;
 
-	for (r = 0; r < nn->num_rx_rings; r++) {
-		nfp_net_rx_ring_init(&rings[r], nn->rx_rings[r].r_vec, r);
+	for (r = 0; r < s->n_rings; r++) {
+		nfp_net_rx_ring_init(&rings[r], &nn->r_vecs[r], r);
 
-		if (nfp_net_rx_ring_alloc(&rings[r], fl_bufsz, buf_cnt))
+		if (nfp_net_rx_ring_alloc(&rings[r], fl_bufsz, s->dcnt))
 			goto err_free_prev;
 
-		if (nfp_net_rx_ring_bufs_alloc(nn, &rings[r]))
+		if (nfp_net_rx_ring_bufs_alloc(nn, &rings[r], xdp))
 			goto err_free_ring;
 	}
 
-	return rings;
+	return s->rings = rings;
 
 err_free_prev:
 	while (r--) {
-		nfp_net_rx_ring_bufs_free(nn, &rings[r]);
+		nfp_net_rx_ring_bufs_free(nn, &rings[r], xdp);
 err_free_ring:
 		nfp_net_rx_ring_free(&rings[r]);
 	}
@@ -1762,35 +1935,50 @@ err_free_ring:
 	return NULL;
 }
 
-static struct nfp_net_rx_ring *
-nfp_net_shadow_rx_rings_swap(struct nfp_net *nn, struct nfp_net_rx_ring *rings)
+static void
+nfp_net_rx_ring_set_swap(struct nfp_net *nn, struct nfp_net_ring_set *s)
 {
-	struct nfp_net_rx_ring *old = nn->rx_rings;
-	unsigned int r;
+	struct nfp_net_ring_set new = *s;
 
-	for (r = 0; r < nn->num_rx_rings; r++)
-		old[r].r_vec->rx_ring = &rings[r];
+	s->mtu = nn->netdev->mtu;
+	s->dcnt = nn->rxd_cnt;
+	s->rings = nn->rx_rings;
+	s->n_rings = nn->num_rx_rings;
 
-	nn->rx_rings = rings;
-	return old;
+	nn->netdev->mtu = new.mtu;
+	nn->fl_bufsz = nfp_net_calc_fl_bufsz(nn, new.mtu);
+	nn->rxd_cnt = new.dcnt;
+	nn->rx_rings = new.rings;
+	nn->num_rx_rings = new.n_rings;
 }
 
 static void
-nfp_net_shadow_rx_rings_free(struct nfp_net *nn, struct nfp_net_rx_ring *rings)
+nfp_net_rx_ring_set_free(struct nfp_net *nn, struct nfp_net_ring_set *s,
+			 bool xdp)
 {
+	struct nfp_net_rx_ring *rings = s->rings;
 	unsigned int r;
 
-	if (!rings)
-		return;
-
-	for (r = 0; r < nn->num_r_vecs; r++) {
-		nfp_net_rx_ring_bufs_free(nn, &rings[r]);
+	for (r = 0; r < s->n_rings; r++) {
+		nfp_net_rx_ring_bufs_free(nn, &rings[r], xdp);
 		nfp_net_rx_ring_free(&rings[r]);
 	}
 
 	kfree(rings);
 }
 
+static void
+nfp_net_vector_assign_rings(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
+			    int idx)
+{
+	r_vec->rx_ring = idx < nn->num_rx_rings ? &nn->rx_rings[idx] : NULL;
+	r_vec->tx_ring =
+		idx < nn->num_stack_tx_rings ? &nn->tx_rings[idx] : NULL;
+
+	r_vec->xdp_ring = idx < nn->num_tx_rings - nn->num_stack_tx_rings ?
+		&nn->tx_rings[nn->num_stack_tx_rings + idx] : NULL;
+}
+
 static int
 nfp_net_prepare_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
 		       int idx)
@@ -1798,25 +1986,20 @@ nfp_net_prepare_vector(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
 	struct msix_entry *entry = &nn->irq_entries[r_vec->irq_idx];
 	int err;
 
-	r_vec->tx_ring = &nn->tx_rings[idx];
-	nfp_net_tx_ring_init(r_vec->tx_ring, r_vec, idx);
-
-	r_vec->rx_ring = &nn->rx_rings[idx];
-	nfp_net_rx_ring_init(r_vec->rx_ring, r_vec, idx);
+	/* Setup NAPI */
+	netif_napi_add(nn->netdev, &r_vec->napi,
+		       nfp_net_poll, NAPI_POLL_WEIGHT);
 
 	snprintf(r_vec->name, sizeof(r_vec->name),
 		 "%s-rxtx-%d", nn->netdev->name, idx);
 	err = request_irq(entry->vector, r_vec->handler, 0, r_vec->name, r_vec);
 	if (err) {
+		netif_napi_del(&r_vec->napi);
 		nn_err(nn, "Error requesting IRQ %d\n", entry->vector);
 		return err;
 	}
 	disable_irq(entry->vector);
 
-	/* Setup NAPI */
-	netif_napi_add(nn->netdev, &r_vec->napi,
-		       nfp_net_poll, NAPI_POLL_WEIGHT);
-
 	irq_set_affinity_hint(entry->vector, &r_vec->affinity_mask);
 
 	nn_dbg(nn, "RV%02d: irq=%03d/%03d\n", idx, entry->vector, entry->entry);
@@ -1879,13 +2062,13 @@ void nfp_net_coalesce_write_cfg(struct nfp_net *nn)
 	/* copy RX interrupt coalesce parameters */
 	value = (nn->rx_coalesce_max_frames << 16) |
 		(factor * nn->rx_coalesce_usecs);
-	for (i = 0; i < nn->num_r_vecs; i++)
+	for (i = 0; i < nn->num_rx_rings; i++)
 		nn_writel(nn, NFP_NET_CFG_RXR_IRQ_MOD(i), value);
 
 	/* copy TX interrupt coalesce parameters */
 	value = (nn->tx_coalesce_max_frames << 16) |
 		(factor * nn->tx_coalesce_usecs);
-	for (i = 0; i < nn->num_r_vecs; i++)
+	for (i = 0; i < nn->num_tx_rings; i++)
 		nn_writel(nn, NFP_NET_CFG_TXR_IRQ_MOD(i), value);
 }
 
@@ -1901,9 +2084,8 @@ static void nfp_net_write_mac_addr(struct nfp_net *nn)
 {
 	nn_writel(nn, NFP_NET_CFG_MACADDR + 0,
 		  get_unaligned_be32(nn->netdev->dev_addr));
-	/* We can't do writew for NFP-3200 compatibility */
-	nn_writel(nn, NFP_NET_CFG_MACADDR + 4,
-		  get_unaligned_be16(nn->netdev->dev_addr + 4) << 16);
+	nn_writew(nn, NFP_NET_CFG_MACADDR + 6,
+		  get_unaligned_be16(nn->netdev->dev_addr + 4));
 }
 
 static void nfp_net_vec_clear_ring_data(struct nfp_net *nn, unsigned int idx)
@@ -1944,27 +2126,33 @@ static void nfp_net_clear_config_and_disable(struct nfp_net *nn)
 	if (err)
 		nn_err(nn, "Could not disable device: %d\n", err);
 
-	for (r = 0; r < nn->num_r_vecs; r++) {
-		nfp_net_rx_ring_reset(nn->r_vecs[r].rx_ring);
-		nfp_net_tx_ring_reset(nn, nn->r_vecs[r].tx_ring);
+	for (r = 0; r < nn->num_rx_rings; r++)
+		nfp_net_rx_ring_reset(&nn->rx_rings[r]);
+	for (r = 0; r < nn->num_tx_rings; r++)
+		nfp_net_tx_ring_reset(nn, &nn->tx_rings[r]);
+	for (r = 0; r < nn->num_r_vecs; r++)
 		nfp_net_vec_clear_ring_data(nn, r);
-	}
 
 	nn->ctrl = new_ctrl;
 }
 
 static void
-nfp_net_vec_write_ring_data(struct nfp_net *nn, struct nfp_net_r_vector *r_vec,
-			    unsigned int idx)
+nfp_net_rx_ring_hw_cfg_write(struct nfp_net *nn,
+			     struct nfp_net_rx_ring *rx_ring, unsigned int idx)
 {
 	/* Write the DMA address, size and MSI-X info to the device */
-	nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(idx), r_vec->rx_ring->dma);
-	nn_writeb(nn, NFP_NET_CFG_RXR_SZ(idx), ilog2(r_vec->rx_ring->cnt));
-	nn_writeb(nn, NFP_NET_CFG_RXR_VEC(idx), r_vec->irq_idx);
+	nn_writeq(nn, NFP_NET_CFG_RXR_ADDR(idx), rx_ring->dma);
+	nn_writeb(nn, NFP_NET_CFG_RXR_SZ(idx), ilog2(rx_ring->cnt));
+	nn_writeb(nn, NFP_NET_CFG_RXR_VEC(idx), rx_ring->r_vec->irq_idx);
+}
 
-	nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(idx), r_vec->tx_ring->dma);
-	nn_writeb(nn, NFP_NET_CFG_TXR_SZ(idx), ilog2(r_vec->tx_ring->cnt));
-	nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), r_vec->irq_idx);
+static void
+nfp_net_tx_ring_hw_cfg_write(struct nfp_net *nn,
+			     struct nfp_net_tx_ring *tx_ring, unsigned int idx)
+{
+	nn_writeq(nn, NFP_NET_CFG_TXR_ADDR(idx), tx_ring->dma);
+	nn_writeb(nn, NFP_NET_CFG_TXR_SZ(idx), ilog2(tx_ring->cnt));
+	nn_writeb(nn, NFP_NET_CFG_TXR_VEC(idx), tx_ring->r_vec->irq_idx);
 }
 
 static int __nfp_net_set_config_and_enable(struct nfp_net *nn)
@@ -1989,8 +2177,10 @@ static int __nfp_net_set_config_and_enable(struct nfp_net *nn)
 		update |= NFP_NET_CFG_UPDATE_IRQMOD;
 	}
 
-	for (r = 0; r < nn->num_r_vecs; r++)
-		nfp_net_vec_write_ring_data(nn, &nn->r_vecs[r], r);
+	for (r = 0; r < nn->num_tx_rings; r++)
+		nfp_net_tx_ring_hw_cfg_write(nn, &nn->tx_rings[r], r);
+	for (r = 0; r < nn->num_rx_rings; r++)
+		nfp_net_rx_ring_hw_cfg_write(nn, &nn->rx_rings[r], r);
 
 	nn_writeq(nn, NFP_NET_CFG_TXRS_ENABLE, nn->num_tx_rings == 64 ?
 		  0xffffffffffffffffULL : ((u64)1 << nn->num_tx_rings) - 1);
@@ -2016,8 +2206,8 @@ static int __nfp_net_set_config_and_enable(struct nfp_net *nn)
 
 	nn->ctrl = new_ctrl;
 
-	for (r = 0; r < nn->num_r_vecs; r++)
-		nfp_net_rx_ring_fill_freelist(nn->r_vecs[r].rx_ring);
+	for (r = 0; r < nn->num_rx_rings; r++)
+		nfp_net_rx_ring_fill_freelist(&nn->rx_rings[r]);
 
 	/* Since reconfiguration requests while NFP is down are ignored we
 	 * have to wipe the entire VXLAN configuration and reinitialize it.
@@ -2068,6 +2258,15 @@ static void nfp_net_open_stack(struct nfp_net *nn)
 static int nfp_net_netdev_open(struct net_device *netdev)
 {
 	struct nfp_net *nn = netdev_priv(netdev);
+	struct nfp_net_ring_set rx = {
+		.n_rings = nn->num_rx_rings,
+		.mtu = nn->netdev->mtu,
+		.dcnt = nn->rxd_cnt,
+	};
+	struct nfp_net_ring_set tx = {
+		.n_rings = nn->num_tx_rings,
+		.dcnt = nn->txd_cnt,
+	};
 	int err, r;
 
 	if (nn->ctrl & NFP_NET_CFG_CTRL_ENABLE) {
@@ -2092,39 +2291,29 @@ static int nfp_net_netdev_open(struct net_device *netdev)
 		goto err_free_exn;
 	disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector);
 
-	nn->rx_rings = kcalloc(nn->num_rx_rings, sizeof(*nn->rx_rings),
-			       GFP_KERNEL);
+	for (r = 0; r < nn->num_r_vecs; r++) {
+		err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r);
+		if (err)
+			goto err_cleanup_vec_p;
+	}
+
+	nn->rx_rings = nfp_net_rx_ring_set_prepare(nn, &rx, nn->xdp_prog);
 	if (!nn->rx_rings) {
 		err = -ENOMEM;
-		goto err_free_lsc;
+		goto err_cleanup_vec;
 	}
-	nn->tx_rings = kcalloc(nn->num_tx_rings, sizeof(*nn->tx_rings),
-			       GFP_KERNEL);
+
+	nn->tx_rings = nfp_net_tx_ring_set_prepare(nn, &tx,
+						   nn->num_stack_tx_rings);
 	if (!nn->tx_rings) {
 		err = -ENOMEM;
 		goto err_free_rx_rings;
 	}
 
-	for (r = 0; r < nn->num_r_vecs; r++) {
-		err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r);
-		if (err)
-			goto err_free_prev_vecs;
+	for (r = 0; r < nn->max_r_vecs; r++)
+		nfp_net_vector_assign_rings(nn, &nn->r_vecs[r], r);
 
-		err = nfp_net_tx_ring_alloc(nn->r_vecs[r].tx_ring, nn->txd_cnt);
-		if (err)
-			goto err_cleanup_vec_p;
-
-		err = nfp_net_rx_ring_alloc(nn->r_vecs[r].rx_ring,
-					    nn->fl_bufsz, nn->rxd_cnt);
-		if (err)
-			goto err_free_tx_ring_p;
-
-		err = nfp_net_rx_ring_bufs_alloc(nn, nn->r_vecs[r].rx_ring);
-		if (err)
-			goto err_flush_rx_ring_p;
-	}
-
-	err = netif_set_real_num_tx_queues(netdev, nn->num_tx_rings);
+	err = netif_set_real_num_tx_queues(netdev, nn->num_stack_tx_rings);
 	if (err)
 		goto err_free_rings;
 
@@ -2154,21 +2343,14 @@ static int nfp_net_netdev_open(struct net_device *netdev)
 	return 0;
 
 err_free_rings:
+	nfp_net_tx_ring_set_free(nn, &tx);
+err_free_rx_rings:
+	nfp_net_rx_ring_set_free(nn, &rx, nn->xdp_prog);
+err_cleanup_vec:
 	r = nn->num_r_vecs;
-err_free_prev_vecs:
-	while (r--) {
-		nfp_net_rx_ring_bufs_free(nn, nn->r_vecs[r].rx_ring);
-err_flush_rx_ring_p:
-		nfp_net_rx_ring_free(nn->r_vecs[r].rx_ring);
-err_free_tx_ring_p:
-		nfp_net_tx_ring_free(nn->r_vecs[r].tx_ring);
 err_cleanup_vec_p:
+	while (r--)
 		nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
-	}
-	kfree(nn->tx_rings);
-err_free_rx_rings:
-	kfree(nn->rx_rings);
-err_free_lsc:
 	nfp_net_aux_irq_free(nn, NFP_NET_CFG_LSC, NFP_NET_IRQ_LSC_IDX);
 err_free_exn:
 	nfp_net_aux_irq_free(nn, NFP_NET_CFG_EXN, NFP_NET_IRQ_EXN_IDX);
@@ -2203,12 +2385,14 @@ static void nfp_net_close_free_all(struct nfp_net *nn)
 {
 	unsigned int r;
 
-	for (r = 0; r < nn->num_r_vecs; r++) {
-		nfp_net_rx_ring_bufs_free(nn, nn->r_vecs[r].rx_ring);
-		nfp_net_rx_ring_free(nn->r_vecs[r].rx_ring);
-		nfp_net_tx_ring_free(nn->r_vecs[r].tx_ring);
-		nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
+	for (r = 0; r < nn->num_rx_rings; r++) {
+		nfp_net_rx_ring_bufs_free(nn, &nn->rx_rings[r], nn->xdp_prog);
+		nfp_net_rx_ring_free(&nn->rx_rings[r]);
 	}
+	for (r = 0; r < nn->num_tx_rings; r++)
+		nfp_net_tx_ring_free(&nn->tx_rings[r]);
+	for (r = 0; r < nn->num_r_vecs; r++)
+		nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
 
 	kfree(nn->rx_rings);
 	kfree(nn->tx_rings);
@@ -2271,94 +2455,135 @@ static void nfp_net_set_rx_mode(struct net_device *netdev)
 	nn->ctrl = new_ctrl;
 }
 
-static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu)
+static void nfp_net_rss_init_itbl(struct nfp_net *nn)
 {
-	unsigned int old_mtu, old_fl_bufsz, new_fl_bufsz;
-	struct nfp_net *nn = netdev_priv(netdev);
-	struct nfp_net_rx_ring *tmp_rings;
-	int err;
-
-	if (new_mtu < 68 || new_mtu > nn->max_mtu) {
-		nn_err(nn, "New MTU (%d) is not valid\n", new_mtu);
-		return -EINVAL;
-	}
-
-	old_mtu = netdev->mtu;
-	old_fl_bufsz = nn->fl_bufsz;
-	new_fl_bufsz = NFP_NET_MAX_PREPEND + ETH_HLEN + VLAN_HLEN * 2 + new_mtu;
-
-	if (!netif_running(netdev)) {
-		netdev->mtu = new_mtu;
-		nn->fl_bufsz = new_fl_bufsz;
-		return 0;
-	}
+	int i;
 
-	/* Prepare new rings */
-	tmp_rings = nfp_net_shadow_rx_rings_prepare(nn, new_fl_bufsz,
-						    nn->rxd_cnt);
-	if (!tmp_rings)
-		return -ENOMEM;
+	for (i = 0; i < sizeof(nn->rss_itbl); i++)
+		nn->rss_itbl[i] =
+			ethtool_rxfh_indir_default(i, nn->num_rx_rings);
+}
 
-	/* Stop device, swap in new rings, try to start the firmware */
-	nfp_net_close_stack(nn);
-	nfp_net_clear_config_and_disable(nn);
+static int
+nfp_net_ring_swap_enable(struct nfp_net *nn, unsigned int *num_vecs,
+			 unsigned int *stack_tx_rings,
+			 struct bpf_prog **xdp_prog,
+			 struct nfp_net_ring_set *rx,
+			 struct nfp_net_ring_set *tx)
+{
+	unsigned int r;
+	int err;
 
-	tmp_rings = nfp_net_shadow_rx_rings_swap(nn, tmp_rings);
+	if (rx)
+		nfp_net_rx_ring_set_swap(nn, rx);
+	if (tx)
+		nfp_net_tx_ring_set_swap(nn, tx);
 
-	netdev->mtu = new_mtu;
-	nn->fl_bufsz = new_fl_bufsz;
+	swap(*num_vecs, nn->num_r_vecs);
+	swap(*stack_tx_rings, nn->num_stack_tx_rings);
+	*xdp_prog = xchg(&nn->xdp_prog, *xdp_prog);
 
-	err = nfp_net_set_config_and_enable(nn);
-	if (err) {
-		const int err_new = err;
+	for (r = 0; r <	nn->max_r_vecs; r++)
+		nfp_net_vector_assign_rings(nn, &nn->r_vecs[r], r);
 
-		/* Try with old configuration and old rings */
-		tmp_rings = nfp_net_shadow_rx_rings_swap(nn, tmp_rings);
+	if (!netif_is_rxfh_configured(nn->netdev))
+		nfp_net_rss_init_itbl(nn);
 
-		netdev->mtu = old_mtu;
-		nn->fl_bufsz = old_fl_bufsz;
+	err = netif_set_real_num_rx_queues(nn->netdev,
+					   nn->num_rx_rings);
+	if (err)
+		return err;
 
-		err = __nfp_net_set_config_and_enable(nn);
+	if (nn->netdev->real_num_tx_queues != nn->num_stack_tx_rings) {
+		err = netif_set_real_num_tx_queues(nn->netdev,
+						   nn->num_stack_tx_rings);
 		if (err)
-			nn_err(nn, "Can't restore MTU - FW communication failed (%d,%d)\n",
-			       err_new, err);
+			return err;
 	}
 
-	nfp_net_shadow_rx_rings_free(nn, tmp_rings);
+	return __nfp_net_set_config_and_enable(nn);
+}
 
-	nfp_net_open_stack(nn);
+static int
+nfp_net_check_config(struct nfp_net *nn, struct bpf_prog *xdp_prog,
+		     struct nfp_net_ring_set *rx, struct nfp_net_ring_set *tx)
+{
+	/* XDP-enabled tests */
+	if (!xdp_prog)
+		return 0;
+	if (rx && nfp_net_calc_fl_bufsz(nn, rx->mtu) > PAGE_SIZE) {
+		nn_warn(nn, "MTU too large w/ XDP enabled\n");
+		return -EINVAL;
+	}
+	if (tx && tx->n_rings > nn->max_tx_rings) {
+		nn_warn(nn, "Insufficient number of TX rings w/ XDP enabled\n");
+		return -EINVAL;
+	}
 
-	return err;
+	return 0;
 }
 
-int nfp_net_set_ring_size(struct nfp_net *nn, u32 rxd_cnt, u32 txd_cnt)
-{
-	struct nfp_net_tx_ring *tx_rings = NULL;
-	struct nfp_net_rx_ring *rx_rings = NULL;
-	u32 old_rxd_cnt, old_txd_cnt;
+static void
+nfp_net_ring_reconfig_down(struct nfp_net *nn, struct bpf_prog **xdp_prog,
+			   struct nfp_net_ring_set *rx,
+			   struct nfp_net_ring_set *tx,
+			   unsigned int stack_tx_rings, unsigned int num_vecs)
+{
+	nn->netdev->mtu = rx ? rx->mtu : nn->netdev->mtu;
+	nn->fl_bufsz = nfp_net_calc_fl_bufsz(nn, nn->netdev->mtu);
+	nn->rxd_cnt = rx ? rx->dcnt : nn->rxd_cnt;
+	nn->txd_cnt = tx ? tx->dcnt : nn->txd_cnt;
+	nn->num_rx_rings = rx ? rx->n_rings : nn->num_rx_rings;
+	nn->num_tx_rings = tx ? tx->n_rings : nn->num_tx_rings;
+	nn->num_stack_tx_rings = stack_tx_rings;
+	nn->num_r_vecs = num_vecs;
+	*xdp_prog = xchg(&nn->xdp_prog, *xdp_prog);
+
+	if (!netif_is_rxfh_configured(nn->netdev))
+		nfp_net_rss_init_itbl(nn);
+}
+
+int
+nfp_net_ring_reconfig(struct nfp_net *nn, struct bpf_prog **xdp_prog,
+		      struct nfp_net_ring_set *rx, struct nfp_net_ring_set *tx)
+{
+	unsigned int stack_tx_rings, num_vecs, r;
 	int err;
 
+	stack_tx_rings = tx ? tx->n_rings : nn->num_tx_rings;
+	if (*xdp_prog)
+		stack_tx_rings -= rx ? rx->n_rings : nn->num_rx_rings;
+
+	num_vecs = max(rx ? rx->n_rings : nn->num_rx_rings, stack_tx_rings);
+
+	err = nfp_net_check_config(nn, *xdp_prog, rx, tx);
+	if (err)
+		return err;
+
 	if (!netif_running(nn->netdev)) {
-		nn->rxd_cnt = rxd_cnt;
-		nn->txd_cnt = txd_cnt;
+		nfp_net_ring_reconfig_down(nn, xdp_prog, rx, tx,
+					   stack_tx_rings, num_vecs);
 		return 0;
 	}
 
-	old_rxd_cnt = nn->rxd_cnt;
-	old_txd_cnt = nn->txd_cnt;
-
 	/* Prepare new rings */
-	if (nn->rxd_cnt != rxd_cnt) {
-		rx_rings = nfp_net_shadow_rx_rings_prepare(nn, nn->fl_bufsz,
-							   rxd_cnt);
-		if (!rx_rings)
-			return -ENOMEM;
+	for (r = nn->num_r_vecs; r < num_vecs; r++) {
+		err = nfp_net_prepare_vector(nn, &nn->r_vecs[r], r);
+		if (err) {
+			num_vecs = r;
+			goto err_cleanup_vecs;
+		}
 	}
-	if (nn->txd_cnt != txd_cnt) {
-		tx_rings = nfp_net_shadow_tx_rings_prepare(nn, txd_cnt);
-		if (!tx_rings) {
-			nfp_net_shadow_rx_rings_free(nn, rx_rings);
-			return -ENOMEM;
+	if (rx) {
+		if (!nfp_net_rx_ring_set_prepare(nn, rx, *xdp_prog)) {
+			err = -ENOMEM;
+			goto err_cleanup_vecs;
+		}
+	}
+	if (tx) {
+		if (!nfp_net_tx_ring_set_prepare(nn, tx, stack_tx_rings)) {
+			err = -ENOMEM;
+			goto err_free_rx;
 		}
 	}
 
@@ -2366,39 +2591,51 @@ int nfp_net_set_ring_size(struct nfp_net *nn, u32 rxd_cnt, u32 txd_cnt)
 	nfp_net_close_stack(nn);
 	nfp_net_clear_config_and_disable(nn);
 
-	if (rx_rings)
-		rx_rings = nfp_net_shadow_rx_rings_swap(nn, rx_rings);
-	if (tx_rings)
-		tx_rings = nfp_net_shadow_tx_rings_swap(nn, tx_rings);
-
-	nn->rxd_cnt = rxd_cnt;
-	nn->txd_cnt = txd_cnt;
-
-	err = nfp_net_set_config_and_enable(nn);
+	err = nfp_net_ring_swap_enable(nn, &num_vecs, &stack_tx_rings,
+				       xdp_prog, rx, tx);
 	if (err) {
-		const int err_new = err;
-
-		/* Try with old configuration and old rings */
-		if (rx_rings)
-			rx_rings = nfp_net_shadow_rx_rings_swap(nn, rx_rings);
-		if (tx_rings)
-			tx_rings = nfp_net_shadow_tx_rings_swap(nn, tx_rings);
+		int err2;
 
-		nn->rxd_cnt = old_rxd_cnt;
-		nn->txd_cnt = old_txd_cnt;
+		nfp_net_clear_config_and_disable(nn);
 
-		err = __nfp_net_set_config_and_enable(nn);
-		if (err)
+		/* Try with old configuration and old rings */
+		err2 = nfp_net_ring_swap_enable(nn, &num_vecs, &stack_tx_rings,
+						xdp_prog, rx, tx);
+		if (err2)
 			nn_err(nn, "Can't restore ring config - FW communication failed (%d,%d)\n",
-			       err_new, err);
+			       err, err2);
 	}
+	for (r = num_vecs - 1; r >= nn->num_r_vecs; r--)
+		nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
 
-	nfp_net_shadow_rx_rings_free(nn, rx_rings);
-	nfp_net_shadow_tx_rings_free(nn, tx_rings);
+	if (rx)
+		nfp_net_rx_ring_set_free(nn, rx, *xdp_prog);
+	if (tx)
+		nfp_net_tx_ring_set_free(nn, tx);
 
 	nfp_net_open_stack(nn);
 
 	return err;
+
+err_free_rx:
+	if (rx)
+		nfp_net_rx_ring_set_free(nn, rx, *xdp_prog);
+err_cleanup_vecs:
+	for (r = num_vecs - 1; r >= nn->num_r_vecs; r--)
+		nfp_net_cleanup_vector(nn, &nn->r_vecs[r]);
+	return err;
+}
+
+static int nfp_net_change_mtu(struct net_device *netdev, int new_mtu)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+	struct nfp_net_ring_set rx = {
+		.n_rings = nn->num_rx_rings,
+		.mtu = new_mtu,
+		.dcnt = nn->rxd_cnt,
+	};
+
+	return nfp_net_ring_reconfig(nn, &nn->xdp_prog, &rx, NULL);
 }
 
 static struct rtnl_link_stats64 *nfp_net_stat64(struct net_device *netdev,
@@ -2455,8 +2692,12 @@ nfp_net_setup_tc(struct net_device *netdev, u32 handle, __be16 proto,
 	if (proto != htons(ETH_P_ALL))
 		return -ENOTSUPP;
 
-	if (tc->type == TC_SETUP_CLSBPF && nfp_net_ebpf_capable(nn))
-		return nfp_net_bpf_offload(nn, handle, proto, tc->cls_bpf);
+	if (tc->type == TC_SETUP_CLSBPF && nfp_net_ebpf_capable(nn)) {
+		if (!nn->bpf_offload_xdp)
+			return nfp_net_bpf_offload(nn, tc->cls_bpf);
+		else
+			return -EBUSY;
+	}
 
 	return -EINVAL;
 }
@@ -2664,6 +2905,91 @@ static void nfp_net_del_vxlan_port(struct net_device *netdev,
 		nfp_net_set_vxlan_port(nn, idx, 0);
 }
 
+static int nfp_net_xdp_offload(struct nfp_net *nn, struct bpf_prog *prog)
+{
+	struct tc_cls_bpf_offload cmd = {
+		.prog = prog,
+	};
+	int ret;
+
+	if (!nfp_net_ebpf_capable(nn))
+		return -EINVAL;
+
+	if (nn->ctrl & NFP_NET_CFG_CTRL_BPF) {
+		if (!nn->bpf_offload_xdp)
+			return prog ? -EBUSY : 0;
+		cmd.command = prog ? TC_CLSBPF_REPLACE : TC_CLSBPF_DESTROY;
+	} else {
+		if (!prog)
+			return 0;
+		cmd.command = TC_CLSBPF_ADD;
+	}
+
+	ret = nfp_net_bpf_offload(nn, &cmd);
+	/* Stop offload if replace not possible */
+	if (ret && cmd.command == TC_CLSBPF_REPLACE)
+		nfp_net_xdp_offload(nn, NULL);
+	nn->bpf_offload_xdp = prog && !ret;
+	return ret;
+}
+
+static int nfp_net_xdp_setup(struct nfp_net *nn, struct bpf_prog *prog)
+{
+	struct nfp_net_ring_set rx = {
+		.n_rings = nn->num_rx_rings,
+		.mtu = nn->netdev->mtu,
+		.dcnt = nn->rxd_cnt,
+	};
+	struct nfp_net_ring_set tx = {
+		.n_rings = nn->num_tx_rings,
+		.dcnt = nn->txd_cnt,
+	};
+	int err;
+
+	if (prog && prog->xdp_adjust_head) {
+		nn_err(nn, "Does not support bpf_xdp_adjust_head()\n");
+		return -EOPNOTSUPP;
+	}
+	if (!prog && !nn->xdp_prog)
+		return 0;
+	if (prog && nn->xdp_prog) {
+		prog = xchg(&nn->xdp_prog, prog);
+		bpf_prog_put(prog);
+		nfp_net_xdp_offload(nn, nn->xdp_prog);
+		return 0;
+	}
+
+	tx.n_rings += prog ? nn->num_rx_rings : -nn->num_rx_rings;
+
+	/* We need RX reconfig to remap the buffers (BIDIR vs FROM_DEV) */
+	err = nfp_net_ring_reconfig(nn, &prog, &rx, &tx);
+	if (err)
+		return err;
+
+	/* @prog got swapped and is now the old one */
+	if (prog)
+		bpf_prog_put(prog);
+
+	nfp_net_xdp_offload(nn, nn->xdp_prog);
+
+	return 0;
+}
+
+static int nfp_net_xdp(struct net_device *netdev, struct netdev_xdp *xdp)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+
+	switch (xdp->command) {
+	case XDP_SETUP_PROG:
+		return nfp_net_xdp_setup(nn, xdp->prog);
+	case XDP_QUERY_PROG:
+		xdp->prog_attached = !!nn->xdp_prog;
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
 static const struct net_device_ops nfp_net_netdev_ops = {
 	.ndo_open		= nfp_net_netdev_open,
 	.ndo_stop		= nfp_net_netdev_close,
@@ -2678,6 +3004,7 @@ static const struct net_device_ops nfp_net_netdev_ops = {
 	.ndo_features_check	= nfp_net_features_check,
 	.ndo_udp_tunnel_add	= nfp_net_add_vxlan_port,
 	.ndo_udp_tunnel_del	= nfp_net_del_vxlan_port,
+	.ndo_xdp		= nfp_net_xdp,
 };
 
 /**
@@ -2686,8 +3013,7 @@ static const struct net_device_ops nfp_net_netdev_ops = {
  */
 void nfp_net_info(struct nfp_net *nn)
 {
-	nn_info(nn, "Netronome %s %sNetdev: TxQs=%d/%d RxQs=%d/%d\n",
-		nn->is_nfp3200 ? "NFP-32xx" : "NFP-6xxx",
+	nn_info(nn, "Netronome NFP-6xxx %sNetdev: TxQs=%d/%d RxQs=%d/%d\n",
 		nn->is_vf ? "VF " : "",
 		nn->num_tx_rings, nn->max_tx_rings,
 		nn->num_rx_rings, nn->max_rx_rings);
@@ -2728,11 +3054,11 @@ void nfp_net_info(struct nfp_net *nn)
  * Return: NFP Net device structure, or ERR_PTR on error.
  */
 struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev,
-				     int max_tx_rings, int max_rx_rings)
+				     unsigned int max_tx_rings,
+				     unsigned int max_rx_rings)
 {
 	struct net_device *netdev;
 	struct nfp_net *nn;
-	int nqs;
 
 	netdev = alloc_etherdev_mqs(sizeof(struct nfp_net),
 				    max_tx_rings, max_rx_rings);
@@ -2748,9 +3074,12 @@ struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev,
 	nn->max_tx_rings = max_tx_rings;
 	nn->max_rx_rings = max_rx_rings;
 
-	nqs = netif_get_num_default_rss_queues();
-	nn->num_tx_rings = min_t(int, nqs, max_tx_rings);
-	nn->num_rx_rings = min_t(int, nqs, max_rx_rings);
+	nn->num_tx_rings = min_t(unsigned int, max_tx_rings, num_online_cpus());
+	nn->num_rx_rings = min_t(unsigned int, max_rx_rings,
+				 netif_get_num_default_rss_queues());
+
+	nn->num_r_vecs = max(nn->num_tx_rings, nn->num_rx_rings);
+	nn->num_r_vecs = min_t(unsigned int, nn->num_r_vecs, num_online_cpus());
 
 	nn->txd_cnt = NFP_NET_TX_DESCS_DEFAULT;
 	nn->rxd_cnt = NFP_NET_RX_DESCS_DEFAULT;
@@ -2782,13 +3111,9 @@ void nfp_net_netdev_free(struct nfp_net *nn)
  */
 static void nfp_net_rss_init(struct nfp_net *nn)
 {
-	int i;
-
 	netdev_rss_key_fill(nn->rss_key, NFP_NET_CFG_RSS_KEY_SZ);
 
-	for (i = 0; i < sizeof(nn->rss_itbl); i++)
-		nn->rss_itbl[i] =
-			ethtool_rxfh_indir_default(i, nn->num_rx_rings);
+	nfp_net_rss_init_itbl(nn);
 
 	/* Enable IPv4/IPv6 TCP by default */
 	nn->rss_cfg = NFP_NET_CFG_RSS_IPV4_TCP |
@@ -2826,12 +3151,18 @@ int nfp_net_netdev_init(struct net_device *netdev)
 
 	nfp_net_write_mac_addr(nn);
 
+	/* Determine RX packet/metadata boundary offset */
+	if (nn->fw_ver.major >= 2)
+		nn->rx_offset = nn_readl(nn, NFP_NET_CFG_RX_OFFSET);
+	else
+		nn->rx_offset = NFP_NET_RX_OFFSET;
+
 	/* Set default MTU and Freelist buffer size */
 	if (nn->max_mtu < NFP_NET_DEFAULT_MTU)
 		netdev->mtu = nn->max_mtu;
 	else
 		netdev->mtu = NFP_NET_DEFAULT_MTU;
-	nn->fl_bufsz = NFP_NET_DEFAULT_RX_BUFSZ;
+	nn->fl_bufsz = nfp_net_calc_fl_bufsz(nn, netdev->mtu);
 
 	/* Advertise/enable offloads based on capabilities
 	 *
@@ -2902,18 +3233,6 @@ int nfp_net_netdev_init(struct net_device *netdev)
 		nn->ctrl |= NFP_NET_CFG_CTRL_IRQMOD;
 	}
 
-	/* On NFP-3200 enable MSI-X auto-masking, if supported and the
-	 * interrupts are not shared.
-	 */
-	if (nn->is_nfp3200 && nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO)
-		nn->ctrl |= NFP_NET_CFG_CTRL_MSIXAUTO;
-
-	/* On NFP4000/NFP6000, determine RX packet/metadata boundary offset */
-	if (nn->fw_ver.major >= 2)
-		nn->rx_offset = nn_readl(nn, NFP_NET_CFG_RX_OFFSET);
-	else
-		nn->rx_offset = NFP_NET_RX_OFFSET;
-
 	/* Stash the re-configuration queue away.  First odd queue in TX Bar */
 	nn->qcp_cfg = nn->tx_bar + NFP_QCP_QUEUE_ADDR_SZ;
 
@@ -2927,9 +3246,13 @@ int nfp_net_netdev_init(struct net_device *netdev)
 		return err;
 
 	/* Finalise the netdev setup */
-	ether_setup(netdev);
 	netdev->netdev_ops = &nfp_net_netdev_ops;
 	netdev->watchdog_timeo = msecs_to_jiffies(5 * 1000);
+
+	/* MTU range: 68 - hw-specific max */
+	netdev->min_mtu = ETH_MIN_MTU;
+	netdev->max_mtu = nn->max_mtu;
+
 	netif_carrier_off(netdev);
 
 	nfp_net_set_ethtool_ops(netdev);
@@ -2944,5 +3267,11 @@ int nfp_net_netdev_init(struct net_device *netdev)
  */
 void nfp_net_netdev_clean(struct net_device *netdev)
 {
-	unregister_netdev(netdev);
+	struct nfp_net *nn = netdev_priv(netdev);
+
+	if (nn->xdp_prog)
+		bpf_prog_put(nn->xdp_prog);
+	if (nn->bpf_offload_xdp)
+		nfp_net_xdp_offload(nn, NULL);
+	unregister_netdev(nn->netdev);
 }
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
index 93b10b441acb..385ba355c965 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h
@@ -50,7 +50,7 @@
 /**
  * Configuration BAR size.
  *
- * The configuration BAR is 8K in size, but on the NFP6000, due to
+ * The configuration BAR is 8K in size, but due to
  * THB-350, 32k needs to be reserved.
  */
 #define NFP_NET_CFG_BAR_SZ              (32 * 1024)
@@ -186,18 +186,13 @@
 #define NFP_NET_CFG_START_RXQ           0x004c
 
 /**
- * NFP-3200 workaround (0x0050 - 0x0058)
- * @NFP_NET_CFG_SPARE_ADDR:  DMA address for ME code to use (e.g. YDS-155 fix)
- */
-#define NFP_NET_CFG_SPARE_ADDR          0x0050
-/**
- * NFP6000/NFP4000 - Prepend configuration
+ * Prepend configuration
  */
 #define NFP_NET_CFG_RX_OFFSET		0x0050
 #define NFP_NET_CFG_RX_OFFSET_DYNAMIC		0	/* Prepend mode */
 
 /**
- * NFP6000/NFP4000 - VXLAN/UDP encap configuration
+ * VXLAN/UDP encap configuration
  * @NFP_NET_CFG_VXLAN_PORT:	Base address of table of tunnels' UDP dst ports
  * @NFP_NET_CFG_VXLAN_SZ:	Size of the UDP port table in bytes
  */
@@ -205,7 +200,7 @@
 #define NFP_NET_CFG_VXLAN_SZ		  0x0008
 
 /**
- * NFP6000 - BPF section
+ * BPF section
  * @NFP_NET_CFG_BPF_ABI:	BPF ABI version
  * @NFP_NET_CFG_BPF_CAP:	BPF capabilities
  * @NFP_NET_CFG_BPF_MAX_LEN:	Maximum size of JITed BPF code in bytes
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c
index f7c9a5bc4aa3..c66f3f954aa8 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_debugfs.c
@@ -44,8 +44,8 @@ static int nfp_net_debugfs_rx_q_read(struct seq_file *file, void *data)
 	struct nfp_net_r_vector *r_vec = file->private;
 	struct nfp_net_rx_ring *rx_ring;
 	struct nfp_net_rx_desc *rxd;
-	struct sk_buff *skb;
 	struct nfp_net *nn;
+	void *frag;
 	int i;
 
 	rtnl_lock();
@@ -73,10 +73,9 @@ static int nfp_net_debugfs_rx_q_read(struct seq_file *file, void *data)
 		seq_printf(file, "%04d: 0x%08x 0x%08x", i,
 			   rxd->vals[0], rxd->vals[1]);
 
-		skb = READ_ONCE(rx_ring->rxbufs[i].skb);
-		if (skb)
-			seq_printf(file, " skb->head=%p skb->data=%p",
-				   skb->head, skb->data);
+		frag = READ_ONCE(rx_ring->rxbufs[i].frag);
+		if (frag)
+			seq_printf(file, " frag=%p", frag);
 
 		if (rx_ring->rxbufs[i].dma_addr)
 			seq_printf(file, " dma_addr=%pad",
@@ -115,6 +114,16 @@ static const struct file_operations nfp_rx_q_fops = {
 	.llseek = seq_lseek
 };
 
+static int nfp_net_debugfs_tx_q_open(struct inode *inode, struct file *f);
+
+static const struct file_operations nfp_tx_q_fops = {
+	.owner = THIS_MODULE,
+	.open = nfp_net_debugfs_tx_q_open,
+	.release = single_release,
+	.read = seq_read,
+	.llseek = seq_lseek
+};
+
 static int nfp_net_debugfs_tx_q_read(struct seq_file *file, void *data)
 {
 	struct nfp_net_r_vector *r_vec = file->private;
@@ -127,10 +136,13 @@ static int nfp_net_debugfs_tx_q_read(struct seq_file *file, void *data)
 
 	rtnl_lock();
 
-	if (!r_vec->nfp_net || !r_vec->tx_ring)
+	if (debugfs_real_fops(file->file) == &nfp_tx_q_fops)
+		tx_ring = r_vec->tx_ring;
+	else
+		tx_ring = r_vec->xdp_ring;
+	if (!r_vec->nfp_net || !tx_ring)
 		goto out;
 	nn = r_vec->nfp_net;
-	tx_ring = r_vec->tx_ring;
 	if (!netif_running(nn->netdev))
 		goto out;
 
@@ -149,9 +161,14 @@ static int nfp_net_debugfs_tx_q_read(struct seq_file *file, void *data)
 			   txd->vals[2], txd->vals[3]);
 
 		skb = READ_ONCE(tx_ring->txbufs[i].skb);
-		if (skb)
-			seq_printf(file, " skb->head=%p skb->data=%p",
-				   skb->head, skb->data);
+		if (skb) {
+			if (tx_ring == r_vec->tx_ring)
+				seq_printf(file, " skb->head=%p skb->data=%p",
+					   skb->head, skb->data);
+			else
+				seq_printf(file, " frag=%p", skb);
+		}
+
 		if (tx_ring->txbufs[i].dma_addr)
 			seq_printf(file, " dma_addr=%pad",
 				   &tx_ring->txbufs[i].dma_addr);
@@ -177,7 +194,7 @@ static int nfp_net_debugfs_tx_q_open(struct inode *inode, struct file *f)
 	return single_open(f, nfp_net_debugfs_tx_q_read, inode->i_private);
 }
 
-static const struct file_operations nfp_tx_q_fops = {
+static const struct file_operations nfp_xdp_q_fops = {
 	.owner = THIS_MODULE,
 	.open = nfp_net_debugfs_tx_q_open,
 	.release = single_release,
@@ -187,7 +204,7 @@ static const struct file_operations nfp_tx_q_fops = {
 
 void nfp_net_debugfs_adapter_add(struct nfp_net *nn)
 {
-	struct dentry *queues, *tx, *rx;
+	struct dentry *queues, *tx, *rx, *xdp;
 	char int_name[16];
 	int i;
 
@@ -205,16 +222,19 @@ void nfp_net_debugfs_adapter_add(struct nfp_net *nn)
 
 	rx = debugfs_create_dir("rx", queues);
 	tx = debugfs_create_dir("tx", queues);
-	if (IS_ERR_OR_NULL(rx) || IS_ERR_OR_NULL(tx))
+	xdp = debugfs_create_dir("xdp", queues);
+	if (IS_ERR_OR_NULL(rx) || IS_ERR_OR_NULL(tx) || IS_ERR_OR_NULL(xdp))
 		return;
 
-	for (i = 0; i < nn->num_rx_rings; i++) {
+	for (i = 0; i < min(nn->max_rx_rings, nn->max_r_vecs); i++) {
 		sprintf(int_name, "%d", i);
 		debugfs_create_file(int_name, S_IRUSR, rx,
 				    &nn->r_vecs[i], &nfp_rx_q_fops);
+		debugfs_create_file(int_name, S_IRUSR, xdp,
+				    &nn->r_vecs[i], &nfp_xdp_q_fops);
 	}
 
-	for (i = 0; i < nn->num_tx_rings; i++) {
+	for (i = 0; i < min(nn->max_tx_rings, nn->max_r_vecs); i++) {
 		sprintf(int_name, "%d", i);
 		debugfs_create_file(int_name, S_IRUSR, tx,
 				    &nn->r_vecs[i], &nfp_tx_q_fops);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index 3418f2277e9d..1b26e9646574 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -158,6 +158,28 @@ static void nfp_net_get_ringparam(struct net_device *netdev,
 	ring->tx_pending = nn->txd_cnt;
 }
 
+static int nfp_net_set_ring_size(struct nfp_net *nn, u32 rxd_cnt, u32 txd_cnt)
+{
+	struct nfp_net_ring_set *reconfig_rx = NULL, *reconfig_tx = NULL;
+	struct nfp_net_ring_set rx = {
+		.n_rings = nn->num_rx_rings,
+		.mtu = nn->netdev->mtu,
+		.dcnt = rxd_cnt,
+	};
+	struct nfp_net_ring_set tx = {
+		.n_rings = nn->num_tx_rings,
+		.dcnt = txd_cnt,
+	};
+
+	if (nn->rxd_cnt != rxd_cnt)
+		reconfig_rx = &rx;
+	if (nn->txd_cnt != txd_cnt)
+		reconfig_tx = &tx;
+
+	return nfp_net_ring_reconfig(nn, &nn->xdp_prog,
+				     reconfig_rx, reconfig_tx);
+}
+
 static int nfp_net_set_ringparam(struct net_device *netdev,
 				 struct ethtool_ringparam *ring)
 {
@@ -614,6 +636,76 @@ static int nfp_net_set_coalesce(struct net_device *netdev,
 	return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_IRQMOD);
 }
 
+static void nfp_net_get_channels(struct net_device *netdev,
+				 struct ethtool_channels *channel)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+	unsigned int num_tx_rings;
+
+	num_tx_rings = nn->num_tx_rings;
+	if (nn->xdp_prog)
+		num_tx_rings -= nn->num_rx_rings;
+
+	channel->max_rx = min(nn->max_rx_rings, nn->max_r_vecs);
+	channel->max_tx = min(nn->max_tx_rings, nn->max_r_vecs);
+	channel->max_combined = min(channel->max_rx, channel->max_tx);
+	channel->max_other = NFP_NET_NON_Q_VECTORS;
+	channel->combined_count = min(nn->num_rx_rings, num_tx_rings);
+	channel->rx_count = nn->num_rx_rings - channel->combined_count;
+	channel->tx_count = num_tx_rings - channel->combined_count;
+	channel->other_count = NFP_NET_NON_Q_VECTORS;
+}
+
+static int nfp_net_set_num_rings(struct nfp_net *nn, unsigned int total_rx,
+				 unsigned int total_tx)
+{
+	struct nfp_net_ring_set *reconfig_rx = NULL, *reconfig_tx = NULL;
+	struct nfp_net_ring_set rx = {
+		.n_rings = total_rx,
+		.mtu = nn->netdev->mtu,
+		.dcnt = nn->rxd_cnt,
+	};
+	struct nfp_net_ring_set tx = {
+		.n_rings = total_tx,
+		.dcnt = nn->txd_cnt,
+	};
+
+	if (nn->num_rx_rings != total_rx)
+		reconfig_rx = &rx;
+	if (nn->num_stack_tx_rings != total_tx ||
+	    (nn->xdp_prog && reconfig_rx))
+		reconfig_tx = &tx;
+
+	/* nfp_net_check_config() will catch tx.n_rings > nn->max_tx_rings */
+	if (nn->xdp_prog)
+		tx.n_rings += total_rx;
+
+	return nfp_net_ring_reconfig(nn, &nn->xdp_prog,
+				     reconfig_rx, reconfig_tx);
+}
+
+static int nfp_net_set_channels(struct net_device *netdev,
+				struct ethtool_channels *channel)
+{
+	struct nfp_net *nn = netdev_priv(netdev);
+	unsigned int total_rx, total_tx;
+
+	/* Reject unsupported */
+	if (!channel->combined_count ||
+	    channel->other_count != NFP_NET_NON_Q_VECTORS ||
+	    (channel->rx_count && channel->tx_count))
+		return -EINVAL;
+
+	total_rx = channel->combined_count + channel->rx_count;
+	total_tx = channel->combined_count + channel->tx_count;
+
+	if (total_rx > min(nn->max_rx_rings, nn->max_r_vecs) ||
+	    total_tx > min(nn->max_tx_rings, nn->max_r_vecs))
+		return -EINVAL;
+
+	return nfp_net_set_num_rings(nn, total_rx, total_tx);
+}
+
 static const struct ethtool_ops nfp_net_ethtool_ops = {
 	.get_drvinfo		= nfp_net_get_drvinfo,
 	.get_link		= ethtool_op_get_link,
@@ -632,6 +724,8 @@ static const struct ethtool_ops nfp_net_ethtool_ops = {
 	.get_regs		= nfp_net_get_regs,
 	.get_coalesce           = nfp_net_get_coalesce,
 	.set_coalesce           = nfp_net_set_coalesce,
+	.get_channels		= nfp_net_get_channels,
+	.set_channels		= nfp_net_set_channels,
 };
 
 void nfp_net_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c
index 8acfb631a0ea..18a851eb3508 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c
@@ -111,6 +111,9 @@ nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf)
 	const struct tc_action *a;
 	LIST_HEAD(actions);
 
+	if (!cls_bpf->exts)
+		return NN_ACT_XDP;
+
 	/* TC direct action */
 	if (cls_bpf->exts_integrated) {
 		if (tc_no_actions(cls_bpf->exts))
@@ -128,7 +131,7 @@ nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf)
 		if (is_tcf_gact_shot(a))
 			return NN_ACT_TC_DROP;
 
-		if (is_tcf_mirred_redirect(a) &&
+		if (is_tcf_mirred_egress_redirect(a) &&
 		    tcf_mirred_ifindex(a) == nn->netdev->ifindex)
 			return NN_ACT_TC_REDIR;
 	}
@@ -233,9 +236,7 @@ static int nfp_net_bpf_stop(struct nfp_net *nn)
 	return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN);
 }
 
-int
-nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto,
-		    struct tc_cls_bpf_offload *cls_bpf)
+int nfp_net_bpf_offload(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf)
 {
 	struct nfp_bpf_result res;
 	dma_addr_t dma_addr;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
index 2800bbf65a89..d065235034d4 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c
@@ -63,9 +63,7 @@ static void nfp_netvf_get_mac_addr(struct nfp_net *nn)
 	u8 mac_addr[ETH_ALEN];
 
 	put_unaligned_be32(nn_readl(nn, NFP_NET_CFG_MACADDR + 0), &mac_addr[0]);
-	/* We can't do readw for NFP-3200 compatibility */
-	put_unaligned_be16(nn_readl(nn, NFP_NET_CFG_MACADDR + 4) >> 16,
-			   &mac_addr[4]);
+	put_unaligned_be16(nn_readw(nn, NFP_NET_CFG_MACADDR + 6), &mac_addr[4]);
 
 	if (!is_valid_ether_addr(mac_addr)) {
 		eth_hw_addr_random(nn->netdev);
@@ -86,7 +84,6 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev,
 	int tx_bar_no, rx_bar_no;
 	u8 __iomem *ctrl_bar;
 	struct nfp_net *nn;
-	int is_nfp3200;
 	u32 startq;
 	int stride;
 	int err;
@@ -101,15 +98,6 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev,
 		goto err_pci_disable;
 	}
 
-	switch (pdev->device) {
-	case PCI_DEVICE_NFP6000VF:
-		is_nfp3200 = 0;
-		break;
-	default:
-		err = -ENODEV;
-		goto err_pci_regions;
-	}
-
 	pci_set_master(pdev);
 
 	err = dma_set_mask_and_coherent(&pdev->dev,
@@ -149,15 +137,9 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev,
 	} else {
 		switch (fw_ver.major) {
 		case 1 ... 4:
-			if (is_nfp3200) {
-				stride = 2;
-				tx_bar_no = NFP_NET_Q0_BAR;
-				rx_bar_no = NFP_NET_Q1_BAR;
-			} else {
-				stride = 4;
-				tx_bar_no = NFP_NET_Q0_BAR;
-				rx_bar_no = tx_bar_no;
-			}
+			stride = 4;
+			tx_bar_no = NFP_NET_Q0_BAR;
+			rx_bar_no = tx_bar_no;
 			break;
 		default:
 			dev_err(&pdev->dev, "Unsupported Firmware ABI %d.%d.%d.%d\n",
@@ -189,20 +171,10 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev,
 		max_rx_rings = (rx_bar_sz / NFP_QCP_QUEUE_ADDR_SZ) / 2;
 	}
 
-	/* XXX Implement a workaround for THB-350 here.  Ideally, we
-	 * have a different PCI ID for A rev VFs.
-	 */
-	switch (pdev->device) {
-	case PCI_DEVICE_NFP6000VF:
-		startq = readl(ctrl_bar + NFP_NET_CFG_START_TXQ);
-		tx_bar_off = NFP_PCIE_QUEUE(startq);
-		startq = readl(ctrl_bar + NFP_NET_CFG_START_RXQ);
-		rx_bar_off = NFP_PCIE_QUEUE(startq);
-		break;
-	default:
-		err = -ENODEV;
-		goto err_ctrl_unmap;
-	}
+	startq = readl(ctrl_bar + NFP_NET_CFG_START_TXQ);
+	tx_bar_off = NFP_PCIE_QUEUE(startq);
+	startq = readl(ctrl_bar + NFP_NET_CFG_START_RXQ);
+	rx_bar_off = NFP_PCIE_QUEUE(startq);
 
 	/* Allocate and initialise the netdev */
 	nn = nfp_net_netdev_alloc(pdev, max_tx_rings, max_rx_rings);
@@ -214,7 +186,6 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev,
 	nn->fw_ver = fw_ver;
 	nn->ctrl_bar = ctrl_bar;
 	nn->is_vf = 1;
-	nn->is_nfp3200 = is_nfp3200;
 	nn->stride_tx = stride;
 	nn->stride_rx = stride;
 
diff --git a/drivers/net/ethernet/netx-eth.c b/drivers/net/ethernet/netx-eth.c
index adbc47f2d132..df4188cb43e0 100644
--- a/drivers/net/ethernet/netx-eth.c
+++ b/drivers/net/ethernet/netx-eth.c
@@ -304,7 +304,6 @@ static const struct net_device_ops netx_eth_netdev_ops = {
 	.ndo_start_xmit		= netx_eth_hard_start_xmit,
 	.ndo_tx_timeout		= netx_eth_timeout,
 	.ndo_set_rx_mode	= netx_eth_set_multicast_list,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
 };
diff --git a/drivers/net/ethernet/nuvoton/w90p910_ether.c b/drivers/net/ethernet/nuvoton/w90p910_ether.c
index 712d8bcb7d8c..119f6dca71f0 100644
--- a/drivers/net/ethernet/nuvoton/w90p910_ether.c
+++ b/drivers/net/ethernet/nuvoton/w90p910_ether.c
@@ -915,7 +915,6 @@ static const struct net_device_ops w90p910_ether_netdev_ops = {
 	.ndo_set_mac_address	= w90p910_set_mac_address,
 	.ndo_do_ioctl		= w90p910_ether_ioctl,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 };
 
 static void __init get_mac_address(struct net_device *dev)
diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index 9b0d7f463ff3..3913f07279d2 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -3008,17 +3008,12 @@ static int nv_change_mtu(struct net_device *dev, int new_mtu)
 	struct fe_priv *np = netdev_priv(dev);
 	int old_mtu;
 
-	if (new_mtu < 64 || new_mtu > np->pkt_limit)
-		return -EINVAL;
-
 	old_mtu = dev->mtu;
 	dev->mtu = new_mtu;
 
 	/* return early if the buffer sizes will not change */
 	if (old_mtu <= ETH_DATA_LEN && new_mtu <= ETH_DATA_LEN)
 		return 0;
-	if (old_mtu == new_mtu)
-		return 0;
 
 	/* synchronized against open : rtnl_lock() held by caller */
 	if (netif_running(dev)) {
@@ -5719,6 +5714,10 @@ static int nv_probe(struct pci_dev *pci_dev, const struct pci_device_id *id)
 	/* Add loopback capability to the device. */
 	dev->hw_features |= NETIF_F_LOOPBACK;
 
+	/* MTU range: 64 - 1500 or 9100 */
+	dev->min_mtu = ETH_ZLEN + ETH_FCS_LEN;
+	dev->max_mtu = np->pkt_limit;
+
 	np->pause_flags = NV_PAUSEFRAME_RX_CAPABLE | NV_PAUSEFRAME_RX_REQ | NV_PAUSEFRAME_AUTONEG;
 	if ((id->driver_data & DEV_HAS_PAUSEFRAME_TX_V1) ||
 	    (id->driver_data & DEV_HAS_PAUSEFRAME_TX_V2) ||
diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c
index 8e13ec84c538..dd6b0d0f7fa5 100644
--- a/drivers/net/ethernet/nxp/lpc_eth.c
+++ b/drivers/net/ethernet/nxp/lpc_eth.c
@@ -1256,7 +1256,6 @@ static const struct net_device_ops lpc_netdev_ops = {
 	.ndo_do_ioctl		= lpc_eth_ioctl,
 	.ndo_set_mac_address	= lpc_set_mac_address,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 };
 
 static int lpc_eth_drv_probe(struct platform_device *pdev)
diff --git a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
index 3cd87a41ac92..d461f419948e 100644
--- a/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
+++ b/drivers/net/ethernet/oki-semi/pch_gbe/pch_gbe_main.c
@@ -2260,16 +2260,10 @@ static int pch_gbe_set_mac(struct net_device *netdev, void *addr)
 static int pch_gbe_change_mtu(struct net_device *netdev, int new_mtu)
 {
 	struct pch_gbe_adapter *adapter = netdev_priv(netdev);
-	int max_frame;
+	int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
 	unsigned long old_rx_buffer_len = adapter->rx_buffer_len;
 	int err;
 
-	max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
-	if ((max_frame < ETH_ZLEN + ETH_FCS_LEN) ||
-		(max_frame > PCH_GBE_MAX_JUMBO_FRAME_SIZE)) {
-		netdev_err(netdev, "Invalid MTU setting\n");
-		return -EINVAL;
-	}
 	if (max_frame <= PCH_GBE_FRAME_SIZE_2048)
 		adapter->rx_buffer_len = PCH_GBE_FRAME_SIZE_2048;
 	else if (max_frame <= PCH_GBE_FRAME_SIZE_4096)
@@ -2633,6 +2627,11 @@ static int pch_gbe_probe(struct pci_dev *pdev,
 	netdev->features = netdev->hw_features;
 	pch_gbe_set_ethtool_ops(netdev);
 
+	/* MTU range: 46 - 10300 */
+	netdev->min_mtu = ETH_ZLEN - ETH_HLEN;
+	netdev->max_mtu = PCH_GBE_MAX_JUMBO_FRAME_SIZE -
+			  (ETH_HLEN + ETH_FCS_LEN);
+
 	pch_gbe_mac_load_mac_addr(&adapter->hw);
 	pch_gbe_mac_reset_hw(&adapter->hw);
 
diff --git a/drivers/net/ethernet/packetengines/hamachi.c b/drivers/net/ethernet/packetengines/hamachi.c
index 91be2f02ef1c..2d04679a923a 100644
--- a/drivers/net/ethernet/packetengines/hamachi.c
+++ b/drivers/net/ethernet/packetengines/hamachi.c
@@ -568,7 +568,6 @@ static const struct net_device_ops hamachi_netdev_ops = {
 	.ndo_start_xmit		= hamachi_start_xmit,
 	.ndo_get_stats		= hamachi_get_stats,
 	.ndo_set_rx_mode	= set_rx_mode,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_tx_timeout		= hamachi_tx_timeout,
diff --git a/drivers/net/ethernet/packetengines/yellowfin.c b/drivers/net/ethernet/packetengines/yellowfin.c
index fb1d1031b091..2a2ca5fa0c69 100644
--- a/drivers/net/ethernet/packetengines/yellowfin.c
+++ b/drivers/net/ethernet/packetengines/yellowfin.c
@@ -360,7 +360,6 @@ static const struct net_device_ops netdev_ops = {
 	.ndo_stop 		= yellowfin_close,
 	.ndo_start_xmit 	= yellowfin_start_xmit,
 	.ndo_set_rx_mode	= set_rx_mode,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_do_ioctl 		= netdev_ioctl,
diff --git a/drivers/net/ethernet/pasemi/pasemi_mac.c b/drivers/net/ethernet/pasemi/pasemi_mac.c
index 2f4a837f0d6a..badfa1d562a4 100644
--- a/drivers/net/ethernet/pasemi/pasemi_mac.c
+++ b/drivers/net/ethernet/pasemi/pasemi_mac.c
@@ -53,7 +53,7 @@
  * - Multiqueue RX/TX
  */
 
-#define PE_MIN_MTU	64
+#define PE_MIN_MTU	(ETH_ZLEN + ETH_HLEN)
 #define PE_MAX_MTU	9000
 #define PE_DEF_MTU	ETH_DATA_LEN
 
@@ -1611,9 +1611,6 @@ static int pasemi_mac_change_mtu(struct net_device *dev, int new_mtu)
 	int running;
 	int ret = 0;
 
-	if (new_mtu < PE_MIN_MTU || new_mtu > PE_MAX_MTU)
-		return -EINVAL;
-
 	running = netif_running(dev);
 
 	if (running) {
@@ -1635,7 +1632,7 @@ static int pasemi_mac_change_mtu(struct net_device *dev, int new_mtu)
 	}
 
 	/* Setup checksum channels if large MTU and none already allocated */
-	if (new_mtu > 1500 && !mac->num_cs) {
+	if (new_mtu > PE_DEF_MTU && !mac->num_cs) {
 		pasemi_mac_setup_csrings(mac);
 		if (!mac->num_cs) {
 			ret = -ENOMEM;
@@ -1757,6 +1754,11 @@ pasemi_mac_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	dev->netdev_ops = &pasemi_netdev_ops;
 	dev->mtu = PE_DEF_MTU;
+
+	/* MTU range: 64 - 9000 */
+	dev->min_mtu = PE_MIN_MTU;
+	dev->max_mtu = PE_MAX_MTU;
+
 	/* 1500 MTU + ETH_HLEN + VLAN_HLEN + 2 64B cachelines */
 	mac->bufsz = dev->mtu + ETH_HLEN + ETH_FCS_LEN + LOCAL_SKB_ALIGN + 128;
 
diff --git a/drivers/net/ethernet/qlogic/Kconfig b/drivers/net/ethernet/qlogic/Kconfig
index 32f2a45f4ab2..3cfd10503446 100644
--- a/drivers/net/ethernet/qlogic/Kconfig
+++ b/drivers/net/ethernet/qlogic/Kconfig
@@ -110,4 +110,7 @@ config QEDE
 config QED_RDMA
 	bool
 
+config QED_ISCSI
+	bool
+
 endif # NET_VENDOR_QLOGIC
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
index 2b10f1bcd151..a996801d442d 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_hw.c
@@ -987,20 +987,8 @@ int netxen_send_lro_cleanup(struct netxen_adapter *adapter)
 int netxen_nic_change_mtu(struct net_device *netdev, int mtu)
 {
 	struct netxen_adapter *adapter = netdev_priv(netdev);
-	int max_mtu;
 	int rc = 0;
 
-	if (NX_IS_REVISION_P3(adapter->ahw.revision_id))
-		max_mtu = P3_MAX_MTU;
-	else
-		max_mtu = P2_MAX_MTU;
-
-	if (mtu > max_mtu) {
-		printk(KERN_ERR "%s: mtu > %d bytes unsupported\n",
-				netdev->name, max_mtu);
-		return -EINVAL;
-	}
-
 	if (adapter->set_mtu)
 		rc = adapter->set_mtu(adapter, mtu);
 
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index 7a0281a36c28..561fb94c7267 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
@@ -1572,6 +1572,13 @@ netxen_nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 			adapter->physical_port = i;
 	}
 
+	/* MTU range: 0 - 8000 (P2) or 9600 (P3) */
+	netdev->min_mtu = 0;
+	if (NX_IS_REVISION_P3(adapter->ahw.revision_id))
+		netdev->max_mtu = P3_MAX_MTU;
+	else
+		netdev->max_mtu = P2_MAX_MTU;
+
 	netxen_nic_clear_stats(adapter);
 
 	err = netxen_setup_intr(adapter);
diff --git a/drivers/net/ethernet/qlogic/qed/Makefile b/drivers/net/ethernet/qlogic/qed/Makefile
index 967acf322c09..729e43768e99 100644
--- a/drivers/net/ethernet/qlogic/qed/Makefile
+++ b/drivers/net/ethernet/qlogic/qed/Makefile
@@ -6,3 +6,4 @@ qed-y := qed_cxt.o qed_dev.o qed_hw.o qed_init_fw_funcs.o qed_init_ops.o \
 qed-$(CONFIG_QED_SRIOV) += qed_sriov.o qed_vf.o
 qed-$(CONFIG_QED_LL2) += qed_ll2.o
 qed-$(CONFIG_QED_RDMA) += qed_roce.o
+qed-$(CONFIG_QED_ISCSI) += qed_iscsi.o qed_ooo.o
diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index 653bb5735f0c..44c184ebe3b0 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -35,6 +35,7 @@ extern const struct qed_common_ops qed_common_ops_pass;
 
 #define QED_WFQ_UNIT	100
 
+#define ISCSI_BDQ_ID(_port_id) (_port_id)
 #define QED_WID_SIZE            (1024)
 #define QED_PF_DEMS_SIZE        (4)
 
@@ -154,7 +155,10 @@ struct qed_qm_iids {
 	u32 tids;
 };
 
-enum QED_RESOURCES {
+/* HW / FW resources, output of features supported below, most information
+ * is received from MFW.
+ */
+enum qed_resources {
 	QED_SB,
 	QED_L2_QUEUE,
 	QED_VPORT,
@@ -166,6 +170,7 @@ enum QED_RESOURCES {
 	QED_RDMA_CNQ_RAM,
 	QED_ILT,
 	QED_LL2_QUEUE,
+	QED_CMDQS_CQS,
 	QED_RDMA_STATS_QUEUE,
 	QED_MAX_RESC,
 };
@@ -174,6 +179,7 @@ enum QED_FEATURE {
 	QED_PF_L2_QUE,
 	QED_VF,
 	QED_RDMA_CNQ,
+	QED_VF_L2_QUE,
 	QED_MAX_FEATURES,
 };
 
@@ -195,6 +201,11 @@ enum qed_dev_cap {
 	QED_DEV_CAP_ROCE,
 };
 
+enum qed_wol_support {
+	QED_WOL_SUPPORT_NONE,
+	QED_WOL_SUPPORT_PME,
+};
+
 struct qed_hw_info {
 	/* PCI personality */
 	enum qed_pci_personality	personality;
@@ -226,15 +237,9 @@ struct qed_hw_info {
 	u32				port_mode;
 	u32				hw_mode;
 	unsigned long		device_capabilities;
-};
+	u16				mtu;
 
-struct qed_hw_cid_data {
-	u32	cid;
-	bool	b_cid_allocated;
-
-	/* Additional identifiers */
-	u16	opaque_fid;
-	u8	vport_id;
+	enum qed_wol_support b_wol_support;
 };
 
 /* maximun size of read/write commands (HW limit) */
@@ -378,7 +383,9 @@ struct qed_hwfn {
 	/* Protocol related */
 	bool				using_ll2;
 	struct qed_ll2_info		*p_ll2_info;
+	struct qed_ooo_info		*p_ooo_info;
 	struct qed_rdma_info		*p_rdma_info;
+	struct qed_iscsi_info		*p_iscsi_info;
 	struct qed_pf_params		pf_params;
 
 	bool b_rdma_enabled_in_prs;
@@ -403,9 +410,6 @@ struct qed_hwfn {
 
 	struct qed_dcbx_info		*p_dcbx_info;
 
-	struct qed_hw_cid_data		*p_tx_cids;
-	struct qed_hw_cid_data		*p_rx_cids;
-
 	struct qed_dmae_info		dmae_info;
 
 	/* QM init */
@@ -538,7 +542,9 @@ struct qed_dev {
 	u8				mcp_rev;
 	u8				boot_mode;
 
-	u8				wol;
+	/* WoL related configurations */
+	u8 wol_config;
+	u8 wol_mac[ETH_ALEN];
 
 	u32				int_mode;
 	enum qed_coalescing_mode	int_coalescing_mode;
@@ -578,6 +584,8 @@ struct qed_dev {
 	/* Linux specific here */
 	struct  qede_dev		*edev;
 	struct  pci_dev			*pdev;
+	u32 flags;
+#define QED_FLAG_STORAGE_STARTED	(BIT(0))
 	int				msg_enable;
 
 	struct pci_params		pci_params;
@@ -591,6 +599,7 @@ struct qed_dev {
 	union {
 		struct qed_common_cb_ops	*common;
 		struct qed_eth_cb_ops		*eth;
+		struct qed_iscsi_cb_ops		*iscsi;
 	} protocol_ops;
 	void				*ops_cookie;
 
@@ -600,7 +609,7 @@ struct qed_dev {
 	struct qed_cb_ll2_info		*ll2;
 	u8				ll2_mac_address[ETH_ALEN];
 #endif
-
+	DECLARE_HASHTABLE(connections, 10);
 	const struct firmware		*firmware;
 
 	u32 rdma_max_sge;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index edae5fc5fccd..3b2250021c5f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -29,8 +29,10 @@
 #include "qed_hw.h"
 #include "qed_init_ops.h"
 #include "qed_int.h"
+#include "qed_iscsi.h"
 #include "qed_ll2.h"
 #include "qed_mcp.h"
+#include "qed_ooo.h"
 #include "qed_reg_addr.h"
 #include "qed_sp.h"
 #include "qed_sriov.h"
@@ -137,15 +139,6 @@ void qed_resc_free(struct qed_dev *cdev)
 	for_each_hwfn(cdev, i) {
 		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
 
-		kfree(p_hwfn->p_tx_cids);
-		p_hwfn->p_tx_cids = NULL;
-		kfree(p_hwfn->p_rx_cids);
-		p_hwfn->p_rx_cids = NULL;
-	}
-
-	for_each_hwfn(cdev, i) {
-		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
-
 		qed_cxt_mngr_free(p_hwfn);
 		qed_qm_info_free(p_hwfn);
 		qed_spq_free(p_hwfn);
@@ -155,6 +148,10 @@ void qed_resc_free(struct qed_dev *cdev)
 #ifdef CONFIG_QED_LL2
 		qed_ll2_free(p_hwfn, p_hwfn->p_ll2_info);
 #endif
+		if (p_hwfn->hw_info.personality == QED_PCI_ISCSI) {
+			qed_iscsi_free(p_hwfn, p_hwfn->p_iscsi_info);
+			qed_ooo_free(p_hwfn, p_hwfn->p_ooo_info);
+		}
 		qed_iov_free(p_hwfn);
 		qed_dmae_info_free(p_hwfn);
 		qed_dcbx_info_free(p_hwfn, p_hwfn->p_dcbx_info);
@@ -411,6 +408,8 @@ int qed_qm_reconf(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 
 int qed_resc_alloc(struct qed_dev *cdev)
 {
+	struct qed_iscsi_info *p_iscsi_info;
+	struct qed_ooo_info *p_ooo_info;
 #ifdef CONFIG_QED_LL2
 	struct qed_ll2_info *p_ll2_info;
 #endif
@@ -425,23 +424,6 @@ int qed_resc_alloc(struct qed_dev *cdev)
 	if (!cdev->fw_data)
 		return -ENOMEM;
 
-	/* Allocate Memory for the Queue->CID mapping */
-	for_each_hwfn(cdev, i) {
-		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
-		int tx_size = sizeof(struct qed_hw_cid_data) *
-				     RESC_NUM(p_hwfn, QED_L2_QUEUE);
-		int rx_size = sizeof(struct qed_hw_cid_data) *
-				     RESC_NUM(p_hwfn, QED_L2_QUEUE);
-
-		p_hwfn->p_tx_cids = kzalloc(tx_size, GFP_KERNEL);
-		if (!p_hwfn->p_tx_cids)
-			goto alloc_no_mem;
-
-		p_hwfn->p_rx_cids = kzalloc(rx_size, GFP_KERNEL);
-		if (!p_hwfn->p_rx_cids)
-			goto alloc_no_mem;
-	}
-
 	for_each_hwfn(cdev, i) {
 		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
 		u32 n_eqes, num_cons;
@@ -533,6 +515,16 @@ int qed_resc_alloc(struct qed_dev *cdev)
 			p_hwfn->p_ll2_info = p_ll2_info;
 		}
 #endif
+		if (p_hwfn->hw_info.personality == QED_PCI_ISCSI) {
+			p_iscsi_info = qed_iscsi_alloc(p_hwfn);
+			if (!p_iscsi_info)
+				goto alloc_no_mem;
+			p_hwfn->p_iscsi_info = p_iscsi_info;
+			p_ooo_info = qed_ooo_alloc(p_hwfn);
+			if (!p_ooo_info)
+				goto alloc_no_mem;
+			p_hwfn->p_ooo_info = p_ooo_info;
+		}
 
 		/* DMA info initialization */
 		rc = qed_dmae_info_alloc(p_hwfn);
@@ -586,6 +578,10 @@ void qed_resc_setup(struct qed_dev *cdev)
 		if (p_hwfn->using_ll2)
 			qed_ll2_setup(p_hwfn, p_hwfn->p_ll2_info);
 #endif
+		if (p_hwfn->hw_info.personality == QED_PCI_ISCSI) {
+			qed_iscsi_setup(p_hwfn, p_hwfn->p_iscsi_info);
+			qed_ooo_setup(p_hwfn, p_hwfn->p_ooo_info);
+		}
 	}
 }
 
@@ -1057,8 +1053,10 @@ int qed_hw_init(struct qed_dev *cdev,
 		bool allow_npar_tx_switch,
 		const u8 *bin_fw_data)
 {
-	u32 load_code, param;
-	int rc, mfw_rc, i;
+	u32 load_code, param, drv_mb_param;
+	bool b_default_mtu = true;
+	struct qed_hwfn *p_hwfn;
+	int rc = 0, mfw_rc, i;
 
 	if ((int_mode == QED_INT_MODE_MSI) && (cdev->num_hwfns > 1)) {
 		DP_NOTICE(cdev, "MSI mode is not supported for CMT devices\n");
@@ -1074,6 +1072,12 @@ int qed_hw_init(struct qed_dev *cdev,
 	for_each_hwfn(cdev, i) {
 		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
 
+		/* If management didn't provide a default, set one of our own */
+		if (!p_hwfn->hw_info.mtu) {
+			p_hwfn->hw_info.mtu = 1500;
+			b_default_mtu = false;
+		}
+
 		if (IS_VF(cdev)) {
 			p_hwfn->b_int_enabled = 1;
 			continue;
@@ -1157,6 +1161,38 @@ int qed_hw_init(struct qed_dev *cdev,
 		p_hwfn->hw_init_done = true;
 	}
 
+	if (IS_PF(cdev)) {
+		p_hwfn = QED_LEADING_HWFN(cdev);
+		drv_mb_param = (FW_MAJOR_VERSION << 24) |
+			       (FW_MINOR_VERSION << 16) |
+			       (FW_REVISION_VERSION << 8) |
+			       (FW_ENGINEERING_VERSION);
+		rc = qed_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
+				 DRV_MSG_CODE_OV_UPDATE_STORM_FW_VER,
+				 drv_mb_param, &load_code, &param);
+		if (rc)
+			DP_INFO(p_hwfn, "Failed to update firmware version\n");
+
+		if (!b_default_mtu) {
+			rc = qed_mcp_ov_update_mtu(p_hwfn, p_hwfn->p_main_ptt,
+						   p_hwfn->hw_info.mtu);
+			if (rc)
+				DP_INFO(p_hwfn,
+					"Failed to update default mtu\n");
+		}
+
+		rc = qed_mcp_ov_update_driver_state(p_hwfn,
+						    p_hwfn->p_main_ptt,
+						  QED_OV_DRIVER_STATE_DISABLED);
+		if (rc)
+			DP_INFO(p_hwfn, "Failed to update driver state\n");
+
+		rc = qed_mcp_ov_update_eswitch(p_hwfn, p_hwfn->p_main_ptt,
+					       QED_OV_ESWITCH_VEB);
+		if (rc)
+			DP_INFO(p_hwfn, "Failed to update eswitch mode\n");
+	}
+
 	return 0;
 }
 
@@ -1324,8 +1360,24 @@ int qed_hw_reset(struct qed_dev *cdev)
 {
 	int rc = 0;
 	u32 unload_resp, unload_param;
+	u32 wol_param;
 	int i;
 
+	switch (cdev->wol_config) {
+	case QED_OV_WOL_DISABLED:
+		wol_param = DRV_MB_PARAM_UNLOAD_WOL_DISABLED;
+		break;
+	case QED_OV_WOL_ENABLED:
+		wol_param = DRV_MB_PARAM_UNLOAD_WOL_ENABLED;
+		break;
+	default:
+		DP_NOTICE(cdev,
+			  "Unknown WoL configuration %02x\n", cdev->wol_config);
+		/* Fallthrough */
+	case QED_OV_WOL_DEFAULT:
+		wol_param = DRV_MB_PARAM_UNLOAD_WOL_MCP;
+	}
+
 	for_each_hwfn(cdev, i) {
 		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
 
@@ -1354,8 +1406,7 @@ int qed_hw_reset(struct qed_dev *cdev)
 
 		/* Send unload command to MCP */
 		rc = qed_mcp_cmd(p_hwfn, p_hwfn->p_main_ptt,
-				 DRV_MSG_CODE_UNLOAD_REQ,
-				 DRV_MB_PARAM_UNLOAD_WOL_MCP,
+				 DRV_MSG_CODE_UNLOAD_REQ, wol_param,
 				 &unload_resp, &unload_param);
 		if (rc) {
 			DP_NOTICE(p_hwfn, "qed_hw_reset: UNLOAD_REQ failed\n");
@@ -1421,6 +1472,7 @@ static void get_function_id(struct qed_hwfn *p_hwfn)
 static void qed_hw_set_feat(struct qed_hwfn *p_hwfn)
 {
 	u32 *feat_num = p_hwfn->hw_info.feat_num;
+	struct qed_sb_cnt_info sb_cnt_info;
 	int num_features = 1;
 
 	if (IS_ENABLED(CONFIG_QED_RDMA) &&
@@ -1439,53 +1491,257 @@ static void qed_hw_set_feat(struct qed_hwfn *p_hwfn)
 	feat_num[QED_PF_L2_QUE] = min_t(u32, RESC_NUM(p_hwfn, QED_SB) /
 						num_features,
 					RESC_NUM(p_hwfn, QED_L2_QUEUE));
-	DP_VERBOSE(p_hwfn, NETIF_MSG_PROBE,
-		   "#PF_L2_QUEUES=%d #SBS=%d num_features=%d\n",
-		   feat_num[QED_PF_L2_QUE], RESC_NUM(p_hwfn, QED_SB),
-		   num_features);
+
+	memset(&sb_cnt_info, 0, sizeof(sb_cnt_info));
+	qed_int_get_num_sbs(p_hwfn, &sb_cnt_info);
+	feat_num[QED_VF_L2_QUE] =
+	    min_t(u32,
+		  RESC_NUM(p_hwfn, QED_L2_QUEUE) -
+		  FEAT_NUM(p_hwfn, QED_PF_L2_QUE), sb_cnt_info.sb_iov_cnt);
+
+	DP_VERBOSE(p_hwfn,
+		   NETIF_MSG_PROBE,
+		   "#PF_L2_QUEUES=%d VF_L2_QUEUES=%d #ROCE_CNQ=%d #SBS=%d num_features=%d\n",
+		   (int)FEAT_NUM(p_hwfn, QED_PF_L2_QUE),
+		   (int)FEAT_NUM(p_hwfn, QED_VF_L2_QUE),
+		   (int)FEAT_NUM(p_hwfn, QED_RDMA_CNQ),
+		   RESC_NUM(p_hwfn, QED_SB), num_features);
 }
 
-static int qed_hw_get_resc(struct qed_hwfn *p_hwfn)
+static enum resource_id_enum qed_hw_get_mfw_res_id(enum qed_resources res_id)
+{
+	enum resource_id_enum mfw_res_id = RESOURCE_NUM_INVALID;
+
+	switch (res_id) {
+	case QED_SB:
+		mfw_res_id = RESOURCE_NUM_SB_E;
+		break;
+	case QED_L2_QUEUE:
+		mfw_res_id = RESOURCE_NUM_L2_QUEUE_E;
+		break;
+	case QED_VPORT:
+		mfw_res_id = RESOURCE_NUM_VPORT_E;
+		break;
+	case QED_RSS_ENG:
+		mfw_res_id = RESOURCE_NUM_RSS_ENGINES_E;
+		break;
+	case QED_PQ:
+		mfw_res_id = RESOURCE_NUM_PQ_E;
+		break;
+	case QED_RL:
+		mfw_res_id = RESOURCE_NUM_RL_E;
+		break;
+	case QED_MAC:
+	case QED_VLAN:
+		/* Each VFC resource can accommodate both a MAC and a VLAN */
+		mfw_res_id = RESOURCE_VFC_FILTER_E;
+		break;
+	case QED_ILT:
+		mfw_res_id = RESOURCE_ILT_E;
+		break;
+	case QED_LL2_QUEUE:
+		mfw_res_id = RESOURCE_LL2_QUEUE_E;
+		break;
+	case QED_RDMA_CNQ_RAM:
+	case QED_CMDQS_CQS:
+		/* CNQ/CMDQS are the same resource */
+		mfw_res_id = RESOURCE_CQS_E;
+		break;
+	case QED_RDMA_STATS_QUEUE:
+		mfw_res_id = RESOURCE_RDMA_STATS_QUEUE_E;
+		break;
+	default:
+		break;
+	}
+
+	return mfw_res_id;
+}
+
+static u32 qed_hw_get_dflt_resc_num(struct qed_hwfn *p_hwfn,
+				    enum qed_resources res_id)
 {
-	u8 enabled_func_idx = p_hwfn->enabled_func_idx;
-	u32 *resc_start = p_hwfn->hw_info.resc_start;
 	u8 num_funcs = p_hwfn->num_funcs_on_engine;
-	u32 *resc_num = p_hwfn->hw_info.resc_num;
 	struct qed_sb_cnt_info sb_cnt_info;
-	int i, max_vf_vlan_filters;
+	u32 dflt_resc_num = 0;
 
-	memset(&sb_cnt_info, 0, sizeof(sb_cnt_info));
+	switch (res_id) {
+	case QED_SB:
+		memset(&sb_cnt_info, 0, sizeof(sb_cnt_info));
+		qed_int_get_num_sbs(p_hwfn, &sb_cnt_info);
+		dflt_resc_num = sb_cnt_info.sb_cnt;
+		break;
+	case QED_L2_QUEUE:
+		dflt_resc_num = MAX_NUM_L2_QUEUES_BB / num_funcs;
+		break;
+	case QED_VPORT:
+		dflt_resc_num = MAX_NUM_VPORTS_BB / num_funcs;
+		break;
+	case QED_RSS_ENG:
+		dflt_resc_num = ETH_RSS_ENGINE_NUM_BB / num_funcs;
+		break;
+	case QED_PQ:
+		/* The granularity of the PQs is 8 */
+		dflt_resc_num = MAX_QM_TX_QUEUES_BB / num_funcs;
+		dflt_resc_num &= ~0x7;
+		break;
+	case QED_RL:
+		dflt_resc_num = MAX_QM_GLOBAL_RLS / num_funcs;
+		break;
+	case QED_MAC:
+	case QED_VLAN:
+		/* Each VFC resource can accommodate both a MAC and a VLAN */
+		dflt_resc_num = ETH_NUM_MAC_FILTERS / num_funcs;
+		break;
+	case QED_ILT:
+		dflt_resc_num = PXP_NUM_ILT_RECORDS_BB / num_funcs;
+		break;
+	case QED_LL2_QUEUE:
+		dflt_resc_num = MAX_NUM_LL2_RX_QUEUES / num_funcs;
+		break;
+	case QED_RDMA_CNQ_RAM:
+	case QED_CMDQS_CQS:
+		/* CNQ/CMDQS are the same resource */
+		dflt_resc_num = NUM_OF_CMDQS_CQS / num_funcs;
+		break;
+	case QED_RDMA_STATS_QUEUE:
+		dflt_resc_num = RDMA_NUM_STATISTIC_COUNTERS_BB / num_funcs;
+		break;
+	default:
+		break;
+	}
 
-#ifdef CONFIG_QED_SRIOV
-	max_vf_vlan_filters = QED_ETH_MAX_VF_NUM_VLAN_FILTERS;
-#else
-	max_vf_vlan_filters = 0;
-#endif
+	return dflt_resc_num;
+}
+
+static const char *qed_hw_get_resc_name(enum qed_resources res_id)
+{
+	switch (res_id) {
+	case QED_SB:
+		return "SB";
+	case QED_L2_QUEUE:
+		return "L2_QUEUE";
+	case QED_VPORT:
+		return "VPORT";
+	case QED_RSS_ENG:
+		return "RSS_ENG";
+	case QED_PQ:
+		return "PQ";
+	case QED_RL:
+		return "RL";
+	case QED_MAC:
+		return "MAC";
+	case QED_VLAN:
+		return "VLAN";
+	case QED_RDMA_CNQ_RAM:
+		return "RDMA_CNQ_RAM";
+	case QED_ILT:
+		return "ILT";
+	case QED_LL2_QUEUE:
+		return "LL2_QUEUE";
+	case QED_CMDQS_CQS:
+		return "CMDQS_CQS";
+	case QED_RDMA_STATS_QUEUE:
+		return "RDMA_STATS_QUEUE";
+	default:
+		return "UNKNOWN_RESOURCE";
+	}
+}
 
-	qed_int_get_num_sbs(p_hwfn, &sb_cnt_info);
+static int qed_hw_set_resc_info(struct qed_hwfn *p_hwfn,
+				enum qed_resources res_id)
+{
+	u32 dflt_resc_num = 0, dflt_resc_start = 0, mcp_resp, mcp_param;
+	u32 *p_resc_num, *p_resc_start;
+	struct resource_info resc_info;
+	int rc;
+
+	p_resc_num = &RESC_NUM(p_hwfn, res_id);
+	p_resc_start = &RESC_START(p_hwfn, res_id);
+
+	/* Default values assumes that each function received equal share */
+	dflt_resc_num = qed_hw_get_dflt_resc_num(p_hwfn, res_id);
+	if (!dflt_resc_num) {
+		DP_ERR(p_hwfn,
+		       "Failed to get default amount for resource %d [%s]\n",
+		       res_id, qed_hw_get_resc_name(res_id));
+		return -EINVAL;
+	}
+	dflt_resc_start = dflt_resc_num * p_hwfn->enabled_func_idx;
+
+	memset(&resc_info, 0, sizeof(resc_info));
+	resc_info.res_id = qed_hw_get_mfw_res_id(res_id);
+	if (resc_info.res_id == RESOURCE_NUM_INVALID) {
+		DP_ERR(p_hwfn,
+		       "Failed to match resource %d [%s] with the MFW resources\n",
+		       res_id, qed_hw_get_resc_name(res_id));
+		return -EINVAL;
+	}
+
+	rc = qed_mcp_get_resc_info(p_hwfn, p_hwfn->p_main_ptt, &resc_info,
+				   &mcp_resp, &mcp_param);
+	if (rc) {
+		DP_NOTICE(p_hwfn,
+			  "MFW response failure for an allocation request for resource %d [%s]\n",
+			  res_id, qed_hw_get_resc_name(res_id));
+		return rc;
+	}
 
-	resc_num[QED_SB] = min_t(u32,
-				 (MAX_SB_PER_PATH_BB / num_funcs),
-				 sb_cnt_info.sb_cnt);
-	resc_num[QED_L2_QUEUE] = MAX_NUM_L2_QUEUES_BB / num_funcs;
-	resc_num[QED_VPORT] = MAX_NUM_VPORTS_BB / num_funcs;
-	resc_num[QED_RSS_ENG] = ETH_RSS_ENGINE_NUM_BB / num_funcs;
-	resc_num[QED_PQ] = MAX_QM_TX_QUEUES_BB / num_funcs;
-	resc_num[QED_RL] = min_t(u32, 64, resc_num[QED_VPORT]);
-	resc_num[QED_MAC] = ETH_NUM_MAC_FILTERS / num_funcs;
-	resc_num[QED_VLAN] = (ETH_NUM_VLAN_FILTERS - 1 /*For vlan0*/) /
-			     num_funcs;
-	resc_num[QED_ILT] = PXP_NUM_ILT_RECORDS_BB / num_funcs;
-	resc_num[QED_LL2_QUEUE] = MAX_NUM_LL2_RX_QUEUES / num_funcs;
-	resc_num[QED_RDMA_CNQ_RAM] = NUM_OF_CMDQS_CQS / num_funcs;
-	resc_num[QED_RDMA_STATS_QUEUE] = RDMA_NUM_STATISTIC_COUNTERS_BB /
-					 num_funcs;
-
-	for (i = 0; i < QED_MAX_RESC; i++)
-		resc_start[i] = resc_num[i] * enabled_func_idx;
+	/* Default driver values are applied in the following cases:
+	 * - The resource allocation MB command is not supported by the MFW
+	 * - There is an internal error in the MFW while processing the request
+	 * - The resource ID is unknown to the MFW
+	 */
+	if (mcp_resp != FW_MSG_CODE_RESOURCE_ALLOC_OK &&
+	    mcp_resp != FW_MSG_CODE_RESOURCE_ALLOC_DEPRECATED) {
+		DP_NOTICE(p_hwfn,
+			  "Resource %d [%s]: No allocation info was received [mcp_resp 0x%x]. Applying default values [num %d, start %d].\n",
+			  res_id,
+			  qed_hw_get_resc_name(res_id),
+			  mcp_resp, dflt_resc_num, dflt_resc_start);
+		*p_resc_num = dflt_resc_num;
+		*p_resc_start = dflt_resc_start;
+		goto out;
+	}
+
+	/* Special handling for status blocks; Would be revised in future */
+	if (res_id == QED_SB) {
+		resc_info.size -= 1;
+		resc_info.offset -= p_hwfn->enabled_func_idx;
+	}
+
+	*p_resc_num = resc_info.size;
+	*p_resc_start = resc_info.offset;
+
+out:
+	/* PQs have to divide by 8 [that's the HW granularity].
+	 * Reduce number so it would fit.
+	 */
+	if ((res_id == QED_PQ) && ((*p_resc_num % 8) || (*p_resc_start % 8))) {
+		DP_INFO(p_hwfn,
+			"PQs need to align by 8; Number %08x --> %08x, Start %08x --> %08x\n",
+			*p_resc_num,
+			(*p_resc_num) & ~0x7,
+			*p_resc_start, (*p_resc_start) & ~0x7);
+		*p_resc_num &= ~0x7;
+		*p_resc_start &= ~0x7;
+	}
+
+	return 0;
+}
+
+static int qed_hw_get_resc(struct qed_hwfn *p_hwfn)
+{
+	u8 res_id;
+	int rc;
+
+	for (res_id = 0; res_id < QED_MAX_RESC; res_id++) {
+		rc = qed_hw_set_resc_info(p_hwfn, res_id);
+		if (rc)
+			return rc;
+	}
 
 	/* Sanity for ILT */
-	if (RESC_END(p_hwfn, QED_ILT) > PXP_NUM_ILT_RECORDS_BB) {
+	if ((RESC_END(p_hwfn, QED_ILT) > PXP_NUM_ILT_RECORDS_BB)) {
 		DP_NOTICE(p_hwfn, "Can't assign ILT pages [%08x,...,%08x]\n",
 			  RESC_START(p_hwfn, QED_ILT),
 			  RESC_END(p_hwfn, QED_ILT) - 1);
@@ -1495,34 +1751,12 @@ static int qed_hw_get_resc(struct qed_hwfn *p_hwfn)
 	qed_hw_set_feat(p_hwfn);
 
 	DP_VERBOSE(p_hwfn, NETIF_MSG_PROBE,
-		   "The numbers for each resource are:\n"
-		   "SB = %d start = %d\n"
-		   "L2_QUEUE = %d start = %d\n"
-		   "VPORT = %d start = %d\n"
-		   "PQ = %d start = %d\n"
-		   "RL = %d start = %d\n"
-		   "MAC = %d start = %d\n"
-		   "VLAN = %d start = %d\n"
-		   "ILT = %d start = %d\n"
-		   "LL2_QUEUE = %d start = %d\n",
-		   p_hwfn->hw_info.resc_num[QED_SB],
-		   p_hwfn->hw_info.resc_start[QED_SB],
-		   p_hwfn->hw_info.resc_num[QED_L2_QUEUE],
-		   p_hwfn->hw_info.resc_start[QED_L2_QUEUE],
-		   p_hwfn->hw_info.resc_num[QED_VPORT],
-		   p_hwfn->hw_info.resc_start[QED_VPORT],
-		   p_hwfn->hw_info.resc_num[QED_PQ],
-		   p_hwfn->hw_info.resc_start[QED_PQ],
-		   p_hwfn->hw_info.resc_num[QED_RL],
-		   p_hwfn->hw_info.resc_start[QED_RL],
-		   p_hwfn->hw_info.resc_num[QED_MAC],
-		   p_hwfn->hw_info.resc_start[QED_MAC],
-		   p_hwfn->hw_info.resc_num[QED_VLAN],
-		   p_hwfn->hw_info.resc_start[QED_VLAN],
-		   p_hwfn->hw_info.resc_num[QED_ILT],
-		   p_hwfn->hw_info.resc_start[QED_ILT],
-		   RESC_NUM(p_hwfn, QED_LL2_QUEUE),
-		   RESC_START(p_hwfn, QED_LL2_QUEUE));
+		   "The numbers for each resource are:\n");
+	for (res_id = 0; res_id < QED_MAX_RESC; res_id++)
+		DP_VERBOSE(p_hwfn, NETIF_MSG_PROBE, "%s = %d start = %d\n",
+			   qed_hw_get_resc_name(res_id),
+			   RESC_NUM(p_hwfn, res_id),
+			   RESC_START(p_hwfn, res_id));
 
 	return 0;
 }
@@ -1801,6 +2035,9 @@ qed_get_hw_info(struct qed_hwfn *p_hwfn,
 
 	qed_get_num_funcs(p_hwfn, p_ptt);
 
+	if (qed_mcp_is_init(p_hwfn))
+		p_hwfn->hw_info.mtu = p_hwfn->mcp_info->func_info.mtu;
+
 	return qed_hw_get_resc(p_hwfn);
 }
 
@@ -1975,8 +2212,13 @@ int qed_hw_prepare(struct qed_dev *cdev,
 
 void qed_hw_remove(struct qed_dev *cdev)
 {
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
 	int i;
 
+	if (IS_PF(cdev))
+		qed_mcp_ov_update_driver_state(p_hwfn, p_hwfn->p_main_ptt,
+					       QED_OV_DRIVER_STATE_NOT_LOADED);
+
 	for_each_hwfn(cdev, i) {
 		struct qed_hwfn *p_hwfn = &cdev->hwfns[i];
 
@@ -2037,12 +2279,12 @@ static void qed_chain_free_pbl(struct qed_dev *cdev, struct qed_chain *p_chain)
 {
 	void **pp_virt_addr_tbl = p_chain->pbl.pp_virt_addr_tbl;
 	u32 page_cnt = p_chain->page_cnt, i, pbl_size;
-	u8 *p_pbl_virt = p_chain->pbl.p_virt_table;
+	u8 *p_pbl_virt = p_chain->pbl_sp.p_virt_table;
 
 	if (!pp_virt_addr_tbl)
 		return;
 
-	if (!p_chain->pbl.p_virt_table)
+	if (!p_pbl_virt)
 		goto out;
 
 	for (i = 0; i < page_cnt; i++) {
@@ -2060,7 +2302,8 @@ static void qed_chain_free_pbl(struct qed_dev *cdev, struct qed_chain *p_chain)
 	pbl_size = page_cnt * QED_CHAIN_PBL_ENTRY_SIZE;
 	dma_free_coherent(&cdev->pdev->dev,
 			  pbl_size,
-			  p_chain->pbl.p_virt_table, p_chain->pbl.p_phys_table);
+			  p_chain->pbl_sp.p_virt_table,
+			  p_chain->pbl_sp.p_phys_table);
 out:
 	vfree(p_chain->pbl.pp_virt_addr_tbl);
 }
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 2777d5bb4380..785ab03683eb 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -8526,6 +8526,41 @@ struct mdump_config_stc {
 	u32 valid_logs;
 };
 
+enum resource_id_enum {
+	RESOURCE_NUM_SB_E = 0,
+	RESOURCE_NUM_L2_QUEUE_E = 1,
+	RESOURCE_NUM_VPORT_E = 2,
+	RESOURCE_NUM_VMQ_E = 3,
+	RESOURCE_FACTOR_NUM_RSS_PF_E = 4,
+	RESOURCE_FACTOR_RSS_PER_VF_E = 5,
+	RESOURCE_NUM_RL_E = 6,
+	RESOURCE_NUM_PQ_E = 7,
+	RESOURCE_NUM_VF_E = 8,
+	RESOURCE_VFC_FILTER_E = 9,
+	RESOURCE_ILT_E = 10,
+	RESOURCE_CQS_E = 11,
+	RESOURCE_GFT_PROFILES_E = 12,
+	RESOURCE_NUM_TC_E = 13,
+	RESOURCE_NUM_RSS_ENGINES_E = 14,
+	RESOURCE_LL2_QUEUE_E = 15,
+	RESOURCE_RDMA_STATS_QUEUE_E = 16,
+	RESOURCE_MAX_NUM,
+	RESOURCE_NUM_INVALID = 0xFFFFFFFF
+};
+
+/* Resource ID is to be filled by the driver in the MB request
+ * Size, offset & flags to be filled by the MFW in the MB response
+ */
+struct resource_info {
+	enum resource_id_enum res_id;
+	u32 size;		/* number of allocated resources */
+	u32 offset;		/* Offset of the 1st resource */
+	u32 vf_size;
+	u32 vf_offset;
+	u32 flags;
+#define RESOURCE_ELEMENT_STRICT (1 << 0)
+};
+
 union drv_union_data {
 	u32 ver_str[MCP_DRV_VER_STR_SIZE_DWORD];
 	struct mcp_mac wol_mac;
@@ -8543,9 +8578,9 @@ union drv_union_data {
 	struct drv_version_stc drv_version;
 
 	struct lan_stats_stc lan_stats;
-	u64 reserved_stats[11];
 	struct ocbb_data_stc ocbb_info;
 	struct temperature_status_stc temp_info;
+	struct resource_info resource;
 	struct bist_nvm_image_att nvm_image_att;
 	struct mdump_config_stc mdump_config;
 };
@@ -8561,9 +8596,19 @@ struct public_drv_mb {
 #define DRV_MSG_CODE_INIT_PHY			0x22000000
 #define DRV_MSG_CODE_LINK_RESET			0x23000000
 #define DRV_MSG_CODE_SET_DCBX			0x25000000
+#define DRV_MSG_CODE_OV_UPDATE_CURR_CFG         0x26000000
+#define DRV_MSG_CODE_OV_UPDATE_BUS_NUM          0x27000000
+#define DRV_MSG_CODE_OV_UPDATE_BOOT_PROGRESS    0x28000000
+#define DRV_MSG_CODE_OV_UPDATE_STORM_FW_VER     0x29000000
+#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE     0x31000000
+#define DRV_MSG_CODE_BW_UPDATE_ACK              0x32000000
+#define DRV_MSG_CODE_OV_UPDATE_MTU              0x33000000
+#define DRV_MSG_CODE_OV_UPDATE_WOL              0x38000000
+#define DRV_MSG_CODE_OV_UPDATE_ESWITCH_MODE     0x39000000
 
 #define DRV_MSG_CODE_BW_UPDATE_ACK		0x32000000
 #define DRV_MSG_CODE_NIG_DRAIN			0x30000000
+#define DRV_MSG_GET_RESOURCE_ALLOC_MSG          0x34000000
 #define DRV_MSG_CODE_VF_DISABLED_DONE		0xc0000000
 #define DRV_MSG_CODE_CFG_VF_MSIX		0xc0010000
 #define DRV_MSG_CODE_NVM_GET_FILE_ATT		0x00030000
@@ -8571,6 +8616,13 @@ struct public_drv_mb {
 #define DRV_MSG_CODE_MCP_RESET			0x00090000
 #define DRV_MSG_CODE_SET_VERSION		0x000f0000
 #define DRV_MSG_CODE_MCP_HALT                   0x00100000
+#define DRV_MSG_CODE_SET_VMAC                   0x00110000
+#define DRV_MSG_CODE_GET_VMAC                   0x00120000
+#define DRV_MSG_CODE_VMAC_TYPE_SHIFT            4
+#define DRV_MSG_CODE_VMAC_TYPE_MASK             0x30
+#define DRV_MSG_CODE_VMAC_TYPE_MAC              1
+#define DRV_MSG_CODE_VMAC_TYPE_WWNN             2
+#define DRV_MSG_CODE_VMAC_TYPE_WWPN             3
 
 #define DRV_MSG_CODE_GET_STATS                  0x00130000
 #define DRV_MSG_CODE_STATS_TYPE_LAN             1
@@ -8582,11 +8634,16 @@ struct public_drv_mb {
 
 #define DRV_MSG_CODE_BIST_TEST			0x001e0000
 #define DRV_MSG_CODE_SET_LED_MODE		0x00200000
+#define DRV_MSG_CODE_GET_PF_RDMA_PROTOCOL	0x002b0000
+#define DRV_MSG_CODE_OS_WOL			0x002e0000
 
 #define DRV_MSG_SEQ_NUMBER_MASK			0x0000ffff
 
 	u32 drv_mb_param;
-#define DRV_MB_PARAM_UNLOAD_WOL_MCP		0x00000001
+#define DRV_MB_PARAM_UNLOAD_WOL_UNKNOWN         0x00000000
+#define DRV_MB_PARAM_UNLOAD_WOL_MCP             0x00000001
+#define DRV_MB_PARAM_UNLOAD_WOL_DISABLED        0x00000002
+#define DRV_MB_PARAM_UNLOAD_WOL_ENABLED         0x00000003
 #define DRV_MB_PARAM_DCBX_NOTIFY_MASK		0x000000FF
 #define DRV_MB_PARAM_DCBX_NOTIFY_SHIFT		3
 
@@ -8599,13 +8656,59 @@ struct public_drv_mb {
 #define DRV_MB_PARAM_LLDP_SEND_MASK		0x00000001
 #define DRV_MB_PARAM_LLDP_SEND_SHIFT		0
 
+#define DRV_MB_PARAM_OV_CURR_CFG_SHIFT		0
+#define DRV_MB_PARAM_OV_CURR_CFG_MASK		0x0000000F
+#define DRV_MB_PARAM_OV_CURR_CFG_NONE		0
+#define DRV_MB_PARAM_OV_CURR_CFG_OS		1
+#define DRV_MB_PARAM_OV_CURR_CFG_VENDOR_SPEC	2
+#define DRV_MB_PARAM_OV_CURR_CFG_OTHER		3
+
+#define DRV_MB_PARAM_OV_STORM_FW_VER_SHIFT	0
+#define DRV_MB_PARAM_OV_STORM_FW_VER_MASK	0xFFFFFFFF
+#define DRV_MB_PARAM_OV_STORM_FW_VER_MAJOR_MASK	0xFF000000
+#define DRV_MB_PARAM_OV_STORM_FW_VER_MINOR_MASK	0x00FF0000
+#define DRV_MB_PARAM_OV_STORM_FW_VER_BUILD_MASK	0x0000FF00
+#define DRV_MB_PARAM_OV_STORM_FW_VER_DROP_MASK	0x000000FF
+
+#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_SHIFT	0
+#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_MASK	0xF
+#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_UNKNOWN	0x1
+#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_NOT_LOADED	0x2
+#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_LOADING	0x3
+#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_DISABLED	0x4
+#define DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_ACTIVE	0x5
+
+#define DRV_MB_PARAM_OV_MTU_SIZE_SHIFT	0
+#define DRV_MB_PARAM_OV_MTU_SIZE_MASK	0xFFFFFFFF
+
+#define DRV_MB_PARAM_WOL_MASK	(DRV_MB_PARAM_WOL_DEFAULT | \
+				 DRV_MB_PARAM_WOL_DISABLED | \
+				 DRV_MB_PARAM_WOL_ENABLED)
+#define DRV_MB_PARAM_WOL_DEFAULT	DRV_MB_PARAM_UNLOAD_WOL_MCP
+#define DRV_MB_PARAM_WOL_DISABLED	DRV_MB_PARAM_UNLOAD_WOL_DISABLED
+#define DRV_MB_PARAM_WOL_ENABLED	DRV_MB_PARAM_UNLOAD_WOL_ENABLED
+
+#define DRV_MB_PARAM_ESWITCH_MODE_MASK	(DRV_MB_PARAM_ESWITCH_MODE_NONE | \
+					 DRV_MB_PARAM_ESWITCH_MODE_VEB | \
+					 DRV_MB_PARAM_ESWITCH_MODE_VEPA)
+#define DRV_MB_PARAM_ESWITCH_MODE_NONE	0x0
+#define DRV_MB_PARAM_ESWITCH_MODE_VEB	0x1
+#define DRV_MB_PARAM_ESWITCH_MODE_VEPA	0x2
 
 #define DRV_MB_PARAM_SET_LED_MODE_OPER		0x0
 #define DRV_MB_PARAM_SET_LED_MODE_ON		0x1
 #define DRV_MB_PARAM_SET_LED_MODE_OFF		0x2
 
+	/* Resource Allocation params - Driver version support */
+#define DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_MASK	0xFFFF0000
+#define DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_SHIFT	16
+#define DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MINOR_MASK	0x0000FFFF
+#define DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MINOR_SHIFT	0
+
 #define DRV_MB_PARAM_BIST_REGISTER_TEST		1
 #define DRV_MB_PARAM_BIST_CLOCK_TEST		2
+#define DRV_MB_PARAM_BIST_NVM_TEST_NUM_IMAGES	3
+#define DRV_MB_PARAM_BIST_NVM_TEST_IMAGE_BY_INDEX	4
 
 #define DRV_MB_PARAM_BIST_RC_UNKNOWN		0
 #define DRV_MB_PARAM_BIST_RC_PASSED		1
@@ -8614,6 +8717,8 @@ struct public_drv_mb {
 
 #define DRV_MB_PARAM_BIST_TEST_INDEX_SHIFT	0
 #define DRV_MB_PARAM_BIST_TEST_INDEX_MASK	0x000000FF
+#define DRV_MB_PARAM_BIST_TEST_IMAGE_INDEX_SHIFT	8
+#define DRV_MB_PARAM_BIST_TEST_IMAGE_INDEX_MASK		0x0000FF00
 
 	u32 fw_mb_header;
 #define FW_MSG_CODE_MASK			0xffff0000
@@ -8628,15 +8733,27 @@ struct public_drv_mb {
 #define FW_MSG_CODE_DRV_UNLOAD_PORT		0x20120000
 #define FW_MSG_CODE_DRV_UNLOAD_FUNCTION		0x20130000
 #define FW_MSG_CODE_DRV_UNLOAD_DONE		0x21100000
+#define FW_MSG_CODE_RESOURCE_ALLOC_OK           0x34000000
+#define FW_MSG_CODE_RESOURCE_ALLOC_UNKNOWN      0x35000000
+#define FW_MSG_CODE_RESOURCE_ALLOC_DEPRECATED   0x36000000
 #define FW_MSG_CODE_DRV_CFG_VF_MSIX_DONE	0xb0010000
 
 #define FW_MSG_CODE_NVM_OK			0x00010000
 #define FW_MSG_CODE_OK				0x00160000
 
+#define FW_MSG_CODE_OS_WOL_SUPPORTED            0x00800000
+#define FW_MSG_CODE_OS_WOL_NOT_SUPPORTED        0x00810000
+
 #define FW_MSG_SEQ_NUMBER_MASK			0x0000ffff
 
 	u32 fw_mb_param;
 
+	/* get pf rdma protocol command responce */
+#define FW_MB_PARAM_GET_PF_RDMA_NONE		0x0
+#define FW_MB_PARAM_GET_PF_RDMA_ROCE		0x1
+#define FW_MB_PARAM_GET_PF_RDMA_IWARP		0x2
+#define FW_MB_PARAM_GET_PF_RDMA_BOTH		0x3
+
 	u32 drv_pulse_mb;
 #define DRV_PULSE_SEQ_MASK			0x00007fff
 #define DRV_PULSE_SYSTEM_TIME_MASK		0xffff0000
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index 2adedc6fb6cf..bb74e1c10ffe 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -3030,6 +3030,31 @@ int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 			}
 		}
 	}
+
+	/* There's a possibility the igu_sb_cnt_iov doesn't properly reflect
+	 * the number of VF SBs [especially for first VF on engine, as we can't
+	 * diffrentiate between empty entries and its entries].
+	 * Since we don't really support more SBs than VFs today, prevent any
+	 * such configuration by sanitizing the number of SBs to equal the
+	 * number of VFs.
+	 */
+	if (IS_PF_SRIOV(p_hwfn)) {
+		u16 total_vfs = p_hwfn->cdev->p_iov_info->total_vfs;
+
+		if (total_vfs < p_igu_info->free_blks) {
+			DP_VERBOSE(p_hwfn,
+				   (NETIF_MSG_INTR | QED_MSG_IOV),
+				   "Limiting number of SBs for IOV - %04x --> %04x\n",
+				   p_igu_info->free_blks,
+				   p_hwfn->cdev->p_iov_info->total_vfs);
+			p_igu_info->free_blks = total_vfs;
+		} else if (total_vfs > p_igu_info->free_blks) {
+			DP_NOTICE(p_hwfn,
+				  "IGU has only %04x SBs for VFs while the device has %04x VFs\n",
+				  p_igu_info->free_blks, total_vfs);
+			return -EINVAL;
+		}
+	}
 	p_igu_info->igu_sb_cnt_iov = p_igu_info->free_blks;
 
 	DP_VERBOSE(
@@ -3163,7 +3188,12 @@ u16 qed_int_queue_id_from_sb_id(struct qed_hwfn *p_hwfn, u16 sb_id)
 		return sb_id - p_info->igu_base_sb;
 	} else if ((sb_id >= p_info->igu_base_sb_iov) &&
 		   (sb_id < p_info->igu_base_sb_iov + p_info->igu_sb_cnt_iov)) {
-		return sb_id - p_info->igu_base_sb_iov + p_info->igu_sb_cnt;
+		/* We want the first VF queue to be adjacent to the
+		 * last PF queue. Since L2 queues can be partial to
+		 * SBs, we'll use the feature instead.
+		 */
+		return sb_id - p_info->igu_base_sb_iov +
+		       FEAT_NUM(p_hwfn, QED_PF_L2_QUE);
 	} else {
 		DP_NOTICE(p_hwfn, "SB %d not in range for function\n", sb_id);
 		return 0;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.c b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
new file mode 100644
index 000000000000..00efb1c4c57e
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.c
@@ -0,0 +1,1277 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include <asm/param.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/etherdevice.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/log2.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/stddef.h>
+#include <linux/string.h>
+#include <linux/version.h>
+#include <linux/workqueue.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/qed/qed_iscsi_if.h>
+#include "qed.h"
+#include "qed_cxt.h"
+#include "qed_dev_api.h"
+#include "qed_hsi.h"
+#include "qed_hw.h"
+#include "qed_int.h"
+#include "qed_iscsi.h"
+#include "qed_ll2.h"
+#include "qed_mcp.h"
+#include "qed_sp.h"
+#include "qed_sriov.h"
+#include "qed_reg_addr.h"
+
+struct qed_iscsi_conn {
+	struct list_head list_entry;
+	bool free_on_delete;
+
+	u16 conn_id;
+	u32 icid;
+	u32 fw_cid;
+
+	u8 layer_code;
+	u8 offl_flags;
+	u8 connect_mode;
+	u32 initial_ack;
+	dma_addr_t sq_pbl_addr;
+	struct qed_chain r2tq;
+	struct qed_chain xhq;
+	struct qed_chain uhq;
+
+	struct tcp_upload_params *tcp_upload_params_virt_addr;
+	dma_addr_t tcp_upload_params_phys_addr;
+	struct scsi_terminate_extra_params *queue_cnts_virt_addr;
+	dma_addr_t queue_cnts_phys_addr;
+	dma_addr_t syn_phy_addr;
+
+	u16 syn_ip_payload_length;
+	u8 local_mac[6];
+	u8 remote_mac[6];
+	u16 vlan_id;
+	u8 tcp_flags;
+	u8 ip_version;
+	u32 remote_ip[4];
+	u32 local_ip[4];
+	u8 ka_max_probe_cnt;
+	u8 dup_ack_theshold;
+	u32 rcv_next;
+	u32 snd_una;
+	u32 snd_next;
+	u32 snd_max;
+	u32 snd_wnd;
+	u32 rcv_wnd;
+	u32 snd_wl1;
+	u32 cwnd;
+	u32 ss_thresh;
+	u16 srtt;
+	u16 rtt_var;
+	u32 ts_time;
+	u32 ts_recent;
+	u32 ts_recent_age;
+	u32 total_rt;
+	u32 ka_timeout_delta;
+	u32 rt_timeout_delta;
+	u8 dup_ack_cnt;
+	u8 snd_wnd_probe_cnt;
+	u8 ka_probe_cnt;
+	u8 rt_cnt;
+	u32 flow_label;
+	u32 ka_timeout;
+	u32 ka_interval;
+	u32 max_rt_time;
+	u32 initial_rcv_wnd;
+	u8 ttl;
+	u8 tos_or_tc;
+	u16 remote_port;
+	u16 local_port;
+	u16 mss;
+	u8 snd_wnd_scale;
+	u8 rcv_wnd_scale;
+	u32 ts_ticks_per_second;
+	u16 da_timeout_value;
+	u8 ack_frequency;
+
+	u8 update_flag;
+	u8 default_cq;
+	u32 max_seq_size;
+	u32 max_recv_pdu_length;
+	u32 max_send_pdu_length;
+	u32 first_seq_length;
+	u32 exp_stat_sn;
+	u32 stat_sn;
+	u16 physical_q0;
+	u16 physical_q1;
+	u8 abortive_dsconnect;
+};
+
+static int
+qed_sp_iscsi_func_start(struct qed_hwfn *p_hwfn,
+			enum spq_mode comp_mode,
+			struct qed_spq_comp_cb *p_comp_addr,
+			void *event_context, iscsi_event_cb_t async_event_cb)
+{
+	struct iscsi_init_ramrod_params *p_ramrod = NULL;
+	struct scsi_init_func_queues *p_queue = NULL;
+	struct qed_iscsi_pf_params *p_params = NULL;
+	struct iscsi_spe_func_init *p_init = NULL;
+	struct qed_spq_entry *p_ent = NULL;
+	struct qed_sp_init_data init_data;
+	int rc = 0;
+	u32 dval;
+	u16 val;
+	u8 i;
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = qed_spq_get_cid(p_hwfn);
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = comp_mode;
+	init_data.p_comp_data = p_comp_addr;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 ISCSI_RAMROD_CMD_ID_INIT_FUNC,
+				 PROTOCOLID_ISCSI, &init_data);
+	if (rc)
+		return rc;
+
+	p_ramrod = &p_ent->ramrod.iscsi_init;
+	p_init = &p_ramrod->iscsi_init_spe;
+	p_params = &p_hwfn->pf_params.iscsi_pf_params;
+	p_queue = &p_init->q_params;
+
+	SET_FIELD(p_init->hdr.flags,
+		  ISCSI_SLOW_PATH_HDR_LAYER_CODE, ISCSI_SLOW_PATH_LAYER_CODE);
+	p_init->hdr.op_code = ISCSI_RAMROD_CMD_ID_INIT_FUNC;
+
+	val = p_params->half_way_close_timeout;
+	p_init->half_way_close_timeout = cpu_to_le16(val);
+	p_init->num_sq_pages_in_ring = p_params->num_sq_pages_in_ring;
+	p_init->num_r2tq_pages_in_ring = p_params->num_r2tq_pages_in_ring;
+	p_init->num_uhq_pages_in_ring = p_params->num_uhq_pages_in_ring;
+	p_init->func_params.log_page_size = p_params->log_page_size;
+	val = p_params->num_tasks;
+	p_init->func_params.num_tasks = cpu_to_le16(val);
+	p_init->debug_mode.flags = p_params->debug_mode;
+
+	DMA_REGPAIR_LE(p_queue->glbl_q_params_addr,
+		       p_params->glbl_q_params_addr);
+
+	val = p_params->cq_num_entries;
+	p_queue->cq_num_entries = cpu_to_le16(val);
+	val = p_params->cmdq_num_entries;
+	p_queue->cmdq_num_entries = cpu_to_le16(val);
+	p_queue->num_queues = p_params->num_queues;
+	dval = (u8)p_hwfn->hw_info.resc_start[QED_CMDQS_CQS];
+	p_queue->queue_relative_offset = (u8)dval;
+	p_queue->cq_sb_pi = p_params->gl_rq_pi;
+	p_queue->cmdq_sb_pi = p_params->gl_cmd_pi;
+
+	for (i = 0; i < p_params->num_queues; i++) {
+		val = p_hwfn->sbs_info[i]->igu_sb_id;
+		p_queue->cq_cmdq_sb_num_arr[i] = cpu_to_le16(val);
+	}
+
+	p_queue->bdq_resource_id = ISCSI_BDQ_ID(p_hwfn->port_id);
+
+	DMA_REGPAIR_LE(p_queue->bdq_pbl_base_address[BDQ_ID_RQ],
+		       p_params->bdq_pbl_base_addr[BDQ_ID_RQ]);
+	p_queue->bdq_pbl_num_entries[BDQ_ID_RQ] =
+	    p_params->bdq_pbl_num_entries[BDQ_ID_RQ];
+	val = p_params->bdq_xoff_threshold[BDQ_ID_RQ];
+	p_queue->bdq_xoff_threshold[BDQ_ID_RQ] = cpu_to_le16(val);
+	val = p_params->bdq_xon_threshold[BDQ_ID_RQ];
+	p_queue->bdq_xon_threshold[BDQ_ID_RQ] = cpu_to_le16(val);
+
+	DMA_REGPAIR_LE(p_queue->bdq_pbl_base_address[BDQ_ID_IMM_DATA],
+		       p_params->bdq_pbl_base_addr[BDQ_ID_IMM_DATA]);
+	p_queue->bdq_pbl_num_entries[BDQ_ID_IMM_DATA] =
+	    p_params->bdq_pbl_num_entries[BDQ_ID_IMM_DATA];
+	val = p_params->bdq_xoff_threshold[BDQ_ID_IMM_DATA];
+	p_queue->bdq_xoff_threshold[BDQ_ID_IMM_DATA] = cpu_to_le16(val);
+	val = p_params->bdq_xon_threshold[BDQ_ID_IMM_DATA];
+	p_queue->bdq_xon_threshold[BDQ_ID_IMM_DATA] = cpu_to_le16(val);
+	val = p_params->rq_buffer_size;
+	p_queue->rq_buffer_size = cpu_to_le16(val);
+	if (p_params->is_target) {
+		SET_FIELD(p_queue->q_validity,
+			  SCSI_INIT_FUNC_QUEUES_RQ_VALID, 1);
+		if (p_queue->bdq_pbl_num_entries[BDQ_ID_IMM_DATA])
+			SET_FIELD(p_queue->q_validity,
+				  SCSI_INIT_FUNC_QUEUES_IMM_DATA_VALID, 1);
+		SET_FIELD(p_queue->q_validity,
+			  SCSI_INIT_FUNC_QUEUES_CMD_VALID, 1);
+	} else {
+		SET_FIELD(p_queue->q_validity,
+			  SCSI_INIT_FUNC_QUEUES_RQ_VALID, 1);
+	}
+	p_ramrod->tcp_init.two_msl_timer = cpu_to_le32(p_params->two_msl_timer);
+	val = p_params->tx_sws_timer;
+	p_ramrod->tcp_init.tx_sws_timer = cpu_to_le16(val);
+	p_ramrod->tcp_init.maxfinrt = p_params->max_fin_rt;
+
+	p_hwfn->p_iscsi_info->event_context = event_context;
+	p_hwfn->p_iscsi_info->event_cb = async_event_cb;
+
+	return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+static int qed_sp_iscsi_conn_offload(struct qed_hwfn *p_hwfn,
+				     struct qed_iscsi_conn *p_conn,
+				     enum spq_mode comp_mode,
+				     struct qed_spq_comp_cb *p_comp_addr)
+{
+	struct iscsi_spe_conn_offload *p_ramrod = NULL;
+	struct tcp_offload_params_opt2 *p_tcp2 = NULL;
+	struct tcp_offload_params *p_tcp = NULL;
+	struct qed_spq_entry *p_ent = NULL;
+	struct qed_sp_init_data init_data;
+	union qed_qm_pq_params pq_params;
+	u16 pq0_id = 0, pq1_id = 0;
+	dma_addr_t r2tq_pbl_addr;
+	dma_addr_t xhq_pbl_addr;
+	dma_addr_t uhq_pbl_addr;
+	int rc = 0;
+	u32 dval;
+	u16 wval;
+	u8 i;
+	u16 *p;
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = p_conn->icid;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = comp_mode;
+	init_data.p_comp_data = p_comp_addr;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 ISCSI_RAMROD_CMD_ID_OFFLOAD_CONN,
+				 PROTOCOLID_ISCSI, &init_data);
+	if (rc)
+		return rc;
+
+	p_ramrod = &p_ent->ramrod.iscsi_conn_offload;
+
+	/* Transmission PQ is the first of the PF */
+	memset(&pq_params, 0, sizeof(pq_params));
+	pq0_id = qed_get_qm_pq(p_hwfn, PROTOCOLID_ISCSI, &pq_params);
+	p_conn->physical_q0 = cpu_to_le16(pq0_id);
+	p_ramrod->iscsi.physical_q0 = cpu_to_le16(pq0_id);
+
+	/* iSCSI Pure-ACK PQ */
+	pq_params.iscsi.q_idx = 1;
+	pq1_id = qed_get_qm_pq(p_hwfn, PROTOCOLID_ISCSI, &pq_params);
+	p_conn->physical_q1 = cpu_to_le16(pq1_id);
+	p_ramrod->iscsi.physical_q1 = cpu_to_le16(pq1_id);
+
+	p_ramrod->hdr.op_code = ISCSI_RAMROD_CMD_ID_OFFLOAD_CONN;
+	SET_FIELD(p_ramrod->hdr.flags, ISCSI_SLOW_PATH_HDR_LAYER_CODE,
+		  p_conn->layer_code);
+
+	p_ramrod->conn_id = cpu_to_le16(p_conn->conn_id);
+	p_ramrod->fw_cid = cpu_to_le32(p_conn->icid);
+
+	DMA_REGPAIR_LE(p_ramrod->iscsi.sq_pbl_addr, p_conn->sq_pbl_addr);
+
+	r2tq_pbl_addr = qed_chain_get_pbl_phys(&p_conn->r2tq);
+	DMA_REGPAIR_LE(p_ramrod->iscsi.r2tq_pbl_addr, r2tq_pbl_addr);
+
+	xhq_pbl_addr = qed_chain_get_pbl_phys(&p_conn->xhq);
+	DMA_REGPAIR_LE(p_ramrod->iscsi.xhq_pbl_addr, xhq_pbl_addr);
+
+	uhq_pbl_addr = qed_chain_get_pbl_phys(&p_conn->uhq);
+	DMA_REGPAIR_LE(p_ramrod->iscsi.uhq_pbl_addr, uhq_pbl_addr);
+
+	p_ramrod->iscsi.initial_ack = cpu_to_le32(p_conn->initial_ack);
+	p_ramrod->iscsi.flags = p_conn->offl_flags;
+	p_ramrod->iscsi.default_cq = p_conn->default_cq;
+	p_ramrod->iscsi.stat_sn = cpu_to_le32(p_conn->stat_sn);
+
+	if (!GET_FIELD(p_ramrod->iscsi.flags,
+		       ISCSI_CONN_OFFLOAD_PARAMS_TCP_ON_CHIP_1B)) {
+		p_tcp = &p_ramrod->tcp;
+
+		p = (u16 *)p_conn->local_mac;
+		p_tcp->local_mac_addr_hi = swab16(get_unaligned(p));
+		p_tcp->local_mac_addr_mid = swab16(get_unaligned(p + 1));
+		p_tcp->local_mac_addr_lo = swab16(get_unaligned(p + 2));
+
+		p = (u16 *)p_conn->remote_mac;
+		p_tcp->remote_mac_addr_hi = swab16(get_unaligned(p));
+		p_tcp->remote_mac_addr_mid = swab16(get_unaligned(p + 1));
+		p_tcp->remote_mac_addr_lo = swab16(get_unaligned(p + 2));
+
+		p_tcp->vlan_id = cpu_to_le16(p_conn->vlan_id);
+
+		p_tcp->flags = p_conn->tcp_flags;
+		p_tcp->ip_version = p_conn->ip_version;
+		for (i = 0; i < 4; i++) {
+			dval = p_conn->remote_ip[i];
+			p_tcp->remote_ip[i] = cpu_to_le32(dval);
+			dval = p_conn->local_ip[i];
+			p_tcp->local_ip[i] = cpu_to_le32(dval);
+		}
+		p_tcp->ka_max_probe_cnt = p_conn->ka_max_probe_cnt;
+		p_tcp->dup_ack_theshold = p_conn->dup_ack_theshold;
+
+		p_tcp->rcv_next = cpu_to_le32(p_conn->rcv_next);
+		p_tcp->snd_una = cpu_to_le32(p_conn->snd_una);
+		p_tcp->snd_next = cpu_to_le32(p_conn->snd_next);
+		p_tcp->snd_max = cpu_to_le32(p_conn->snd_max);
+		p_tcp->snd_wnd = cpu_to_le32(p_conn->snd_wnd);
+		p_tcp->rcv_wnd = cpu_to_le32(p_conn->rcv_wnd);
+		p_tcp->snd_wl1 = cpu_to_le32(p_conn->snd_wl1);
+		p_tcp->cwnd = cpu_to_le32(p_conn->cwnd);
+		p_tcp->ss_thresh = cpu_to_le32(p_conn->ss_thresh);
+		p_tcp->srtt = cpu_to_le16(p_conn->srtt);
+		p_tcp->rtt_var = cpu_to_le16(p_conn->rtt_var);
+		p_tcp->ts_time = cpu_to_le32(p_conn->ts_time);
+		p_tcp->ts_recent = cpu_to_le32(p_conn->ts_recent);
+		p_tcp->ts_recent_age = cpu_to_le32(p_conn->ts_recent_age);
+		p_tcp->total_rt = cpu_to_le32(p_conn->total_rt);
+		dval = p_conn->ka_timeout_delta;
+		p_tcp->ka_timeout_delta = cpu_to_le32(dval);
+		dval = p_conn->rt_timeout_delta;
+		p_tcp->rt_timeout_delta = cpu_to_le32(dval);
+		p_tcp->dup_ack_cnt = p_conn->dup_ack_cnt;
+		p_tcp->snd_wnd_probe_cnt = p_conn->snd_wnd_probe_cnt;
+		p_tcp->ka_probe_cnt = p_conn->ka_probe_cnt;
+		p_tcp->rt_cnt = p_conn->rt_cnt;
+		p_tcp->flow_label = cpu_to_le32(p_conn->flow_label);
+		p_tcp->ka_timeout = cpu_to_le32(p_conn->ka_timeout);
+		p_tcp->ka_interval = cpu_to_le32(p_conn->ka_interval);
+		p_tcp->max_rt_time = cpu_to_le32(p_conn->max_rt_time);
+		dval = p_conn->initial_rcv_wnd;
+		p_tcp->initial_rcv_wnd = cpu_to_le32(dval);
+		p_tcp->ttl = p_conn->ttl;
+		p_tcp->tos_or_tc = p_conn->tos_or_tc;
+		p_tcp->remote_port = cpu_to_le16(p_conn->remote_port);
+		p_tcp->local_port = cpu_to_le16(p_conn->local_port);
+		p_tcp->mss = cpu_to_le16(p_conn->mss);
+		p_tcp->snd_wnd_scale = p_conn->snd_wnd_scale;
+		p_tcp->rcv_wnd_scale = p_conn->rcv_wnd_scale;
+		dval = p_conn->ts_ticks_per_second;
+		p_tcp->ts_ticks_per_second = cpu_to_le32(dval);
+		wval = p_conn->da_timeout_value;
+		p_tcp->da_timeout_value = cpu_to_le16(wval);
+		p_tcp->ack_frequency = p_conn->ack_frequency;
+		p_tcp->connect_mode = p_conn->connect_mode;
+	} else {
+		p_tcp2 =
+		    &((struct iscsi_spe_conn_offload_option2 *)p_ramrod)->tcp;
+
+		p = (u16 *)p_conn->local_mac;
+		p_tcp2->local_mac_addr_hi = swab16(get_unaligned(p));
+		p_tcp2->local_mac_addr_mid = swab16(get_unaligned(p + 1));
+		p_tcp2->local_mac_addr_lo = swab16(get_unaligned(p + 2));
+
+		p = (u16 *)p_conn->remote_mac;
+		p_tcp2->remote_mac_addr_hi = swab16(get_unaligned(p));
+		p_tcp2->remote_mac_addr_mid = swab16(get_unaligned(p + 1));
+		p_tcp2->remote_mac_addr_lo = swab16(get_unaligned(p + 2));
+
+		p_tcp2->vlan_id = cpu_to_le16(p_conn->vlan_id);
+		p_tcp2->flags = p_conn->tcp_flags;
+
+		p_tcp2->ip_version = p_conn->ip_version;
+		for (i = 0; i < 4; i++) {
+			dval = p_conn->remote_ip[i];
+			p_tcp2->remote_ip[i] = cpu_to_le32(dval);
+			dval = p_conn->local_ip[i];
+			p_tcp2->local_ip[i] = cpu_to_le32(dval);
+		}
+
+		p_tcp2->flow_label = cpu_to_le32(p_conn->flow_label);
+		p_tcp2->ttl = p_conn->ttl;
+		p_tcp2->tos_or_tc = p_conn->tos_or_tc;
+		p_tcp2->remote_port = cpu_to_le16(p_conn->remote_port);
+		p_tcp2->local_port = cpu_to_le16(p_conn->local_port);
+		p_tcp2->mss = cpu_to_le16(p_conn->mss);
+		p_tcp2->rcv_wnd_scale = p_conn->rcv_wnd_scale;
+		p_tcp2->connect_mode = p_conn->connect_mode;
+		wval = p_conn->syn_ip_payload_length;
+		p_tcp2->syn_ip_payload_length = cpu_to_le16(wval);
+		p_tcp2->syn_phy_addr_lo = DMA_LO_LE(p_conn->syn_phy_addr);
+		p_tcp2->syn_phy_addr_hi = DMA_HI_LE(p_conn->syn_phy_addr);
+	}
+
+	return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+static int qed_sp_iscsi_conn_update(struct qed_hwfn *p_hwfn,
+				    struct qed_iscsi_conn *p_conn,
+				    enum spq_mode comp_mode,
+				    struct qed_spq_comp_cb *p_comp_addr)
+{
+	struct iscsi_conn_update_ramrod_params *p_ramrod = NULL;
+	struct qed_spq_entry *p_ent = NULL;
+	struct qed_sp_init_data init_data;
+	int rc = -EINVAL;
+	u32 dval;
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = p_conn->icid;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = comp_mode;
+	init_data.p_comp_data = p_comp_addr;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 ISCSI_RAMROD_CMD_ID_UPDATE_CONN,
+				 PROTOCOLID_ISCSI, &init_data);
+	if (rc)
+		return rc;
+
+	p_ramrod = &p_ent->ramrod.iscsi_conn_update;
+	p_ramrod->hdr.op_code = ISCSI_RAMROD_CMD_ID_UPDATE_CONN;
+	SET_FIELD(p_ramrod->hdr.flags,
+		  ISCSI_SLOW_PATH_HDR_LAYER_CODE, p_conn->layer_code);
+
+	p_ramrod->conn_id = cpu_to_le16(p_conn->conn_id);
+	p_ramrod->fw_cid = cpu_to_le32(p_conn->icid);
+	p_ramrod->flags = p_conn->update_flag;
+	p_ramrod->max_seq_size = cpu_to_le32(p_conn->max_seq_size);
+	dval = p_conn->max_recv_pdu_length;
+	p_ramrod->max_recv_pdu_length = cpu_to_le32(dval);
+	dval = p_conn->max_send_pdu_length;
+	p_ramrod->max_send_pdu_length = cpu_to_le32(dval);
+	dval = p_conn->first_seq_length;
+	p_ramrod->first_seq_length = cpu_to_le32(dval);
+	p_ramrod->exp_stat_sn = cpu_to_le32(p_conn->exp_stat_sn);
+
+	return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+static int qed_sp_iscsi_conn_terminate(struct qed_hwfn *p_hwfn,
+				       struct qed_iscsi_conn *p_conn,
+				       enum spq_mode comp_mode,
+				       struct qed_spq_comp_cb *p_comp_addr)
+{
+	struct iscsi_spe_conn_termination *p_ramrod = NULL;
+	struct qed_spq_entry *p_ent = NULL;
+	struct qed_sp_init_data init_data;
+	int rc = -EINVAL;
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = p_conn->icid;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = comp_mode;
+	init_data.p_comp_data = p_comp_addr;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 ISCSI_RAMROD_CMD_ID_TERMINATION_CONN,
+				 PROTOCOLID_ISCSI, &init_data);
+	if (rc)
+		return rc;
+
+	p_ramrod = &p_ent->ramrod.iscsi_conn_terminate;
+	p_ramrod->hdr.op_code = ISCSI_RAMROD_CMD_ID_TERMINATION_CONN;
+	SET_FIELD(p_ramrod->hdr.flags,
+		  ISCSI_SLOW_PATH_HDR_LAYER_CODE, p_conn->layer_code);
+
+	p_ramrod->conn_id = cpu_to_le16(p_conn->conn_id);
+	p_ramrod->fw_cid = cpu_to_le32(p_conn->icid);
+	p_ramrod->abortive = p_conn->abortive_dsconnect;
+
+	DMA_REGPAIR_LE(p_ramrod->query_params_addr,
+		       p_conn->tcp_upload_params_phys_addr);
+	DMA_REGPAIR_LE(p_ramrod->queue_cnts_addr, p_conn->queue_cnts_phys_addr);
+
+	return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+static int qed_sp_iscsi_conn_clear_sq(struct qed_hwfn *p_hwfn,
+				      struct qed_iscsi_conn *p_conn,
+				      enum spq_mode comp_mode,
+				      struct qed_spq_comp_cb *p_comp_addr)
+{
+	struct iscsi_slow_path_hdr *p_ramrod = NULL;
+	struct qed_spq_entry *p_ent = NULL;
+	struct qed_sp_init_data init_data;
+	int rc = -EINVAL;
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = p_conn->icid;
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = comp_mode;
+	init_data.p_comp_data = p_comp_addr;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 ISCSI_RAMROD_CMD_ID_CLEAR_SQ,
+				 PROTOCOLID_ISCSI, &init_data);
+	if (rc)
+		return rc;
+
+	p_ramrod = &p_ent->ramrod.iscsi_empty;
+	p_ramrod->op_code = ISCSI_RAMROD_CMD_ID_CLEAR_SQ;
+	SET_FIELD(p_ramrod->flags,
+		  ISCSI_SLOW_PATH_HDR_LAYER_CODE, p_conn->layer_code);
+
+	return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+static int qed_sp_iscsi_func_stop(struct qed_hwfn *p_hwfn,
+				  enum spq_mode comp_mode,
+				  struct qed_spq_comp_cb *p_comp_addr)
+{
+	struct iscsi_spe_func_dstry *p_ramrod = NULL;
+	struct qed_spq_entry *p_ent = NULL;
+	struct qed_sp_init_data init_data;
+	int rc = 0;
+
+	/* Get SPQ entry */
+	memset(&init_data, 0, sizeof(init_data));
+	init_data.cid = qed_spq_get_cid(p_hwfn);
+	init_data.opaque_fid = p_hwfn->hw_info.opaque_fid;
+	init_data.comp_mode = comp_mode;
+	init_data.p_comp_data = p_comp_addr;
+
+	rc = qed_sp_init_request(p_hwfn, &p_ent,
+				 ISCSI_RAMROD_CMD_ID_DESTROY_FUNC,
+				 PROTOCOLID_ISCSI, &init_data);
+	if (rc)
+		return rc;
+
+	p_ramrod = &p_ent->ramrod.iscsi_destroy;
+	p_ramrod->hdr.op_code = ISCSI_RAMROD_CMD_ID_DESTROY_FUNC;
+
+	return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+static void __iomem *qed_iscsi_get_db_addr(struct qed_hwfn *p_hwfn, u32 cid)
+{
+	return (u8 __iomem *)p_hwfn->doorbells +
+			     qed_db_addr(cid, DQ_DEMS_LEGACY);
+}
+
+static void __iomem *qed_iscsi_get_primary_bdq_prod(struct qed_hwfn *p_hwfn,
+						    u8 bdq_id)
+{
+	u8 bdq_function_id = ISCSI_BDQ_ID(p_hwfn->port_id);
+
+	return (u8 __iomem *)p_hwfn->regview + GTT_BAR0_MAP_REG_MSDM_RAM +
+			     MSTORM_SCSI_BDQ_EXT_PROD_OFFSET(bdq_function_id,
+							     bdq_id);
+}
+
+static void __iomem *qed_iscsi_get_secondary_bdq_prod(struct qed_hwfn *p_hwfn,
+						      u8 bdq_id)
+{
+	u8 bdq_function_id = ISCSI_BDQ_ID(p_hwfn->port_id);
+
+	return (u8 __iomem *)p_hwfn->regview + GTT_BAR0_MAP_REG_TSDM_RAM +
+			     TSTORM_SCSI_BDQ_EXT_PROD_OFFSET(bdq_function_id,
+							     bdq_id);
+}
+
+static int qed_iscsi_setup_connection(struct qed_hwfn *p_hwfn,
+				      struct qed_iscsi_conn *p_conn)
+{
+	if (!p_conn->queue_cnts_virt_addr)
+		goto nomem;
+	memset(p_conn->queue_cnts_virt_addr, 0,
+	       sizeof(*p_conn->queue_cnts_virt_addr));
+
+	if (!p_conn->tcp_upload_params_virt_addr)
+		goto nomem;
+	memset(p_conn->tcp_upload_params_virt_addr, 0,
+	       sizeof(*p_conn->tcp_upload_params_virt_addr));
+
+	if (!p_conn->r2tq.p_virt_addr)
+		goto nomem;
+	qed_chain_pbl_zero_mem(&p_conn->r2tq);
+
+	if (!p_conn->uhq.p_virt_addr)
+		goto nomem;
+	qed_chain_pbl_zero_mem(&p_conn->uhq);
+
+	if (!p_conn->xhq.p_virt_addr)
+		goto nomem;
+	qed_chain_pbl_zero_mem(&p_conn->xhq);
+
+	return 0;
+nomem:
+	return -ENOMEM;
+}
+
+static int qed_iscsi_allocate_connection(struct qed_hwfn *p_hwfn,
+					 struct qed_iscsi_conn **p_out_conn)
+{
+	u16 uhq_num_elements = 0, xhq_num_elements = 0, r2tq_num_elements = 0;
+	struct scsi_terminate_extra_params *p_q_cnts = NULL;
+	struct qed_iscsi_pf_params *p_params = NULL;
+	struct tcp_upload_params *p_tcp = NULL;
+	struct qed_iscsi_conn *p_conn = NULL;
+	int rc = 0;
+
+	/* Try finding a free connection that can be used */
+	spin_lock_bh(&p_hwfn->p_iscsi_info->lock);
+	if (!list_empty(&p_hwfn->p_iscsi_info->free_list))
+		p_conn = list_first_entry(&p_hwfn->p_iscsi_info->free_list,
+					  struct qed_iscsi_conn, list_entry);
+	if (p_conn) {
+		list_del(&p_conn->list_entry);
+		spin_unlock_bh(&p_hwfn->p_iscsi_info->lock);
+		*p_out_conn = p_conn;
+		return 0;
+	}
+	spin_unlock_bh(&p_hwfn->p_iscsi_info->lock);
+
+	/* Need to allocate a new connection */
+	p_params = &p_hwfn->pf_params.iscsi_pf_params;
+
+	p_conn = kzalloc(sizeof(*p_conn), GFP_KERNEL);
+	if (!p_conn)
+		return -ENOMEM;
+
+	p_q_cnts = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+				      sizeof(*p_q_cnts),
+				      &p_conn->queue_cnts_phys_addr,
+				      GFP_KERNEL);
+	if (!p_q_cnts)
+		goto nomem_queue_cnts_param;
+	p_conn->queue_cnts_virt_addr = p_q_cnts;
+
+	p_tcp = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+				   sizeof(*p_tcp),
+				   &p_conn->tcp_upload_params_phys_addr,
+				   GFP_KERNEL);
+	if (!p_tcp)
+		goto nomem_upload_param;
+	p_conn->tcp_upload_params_virt_addr = p_tcp;
+
+	r2tq_num_elements = p_params->num_r2tq_pages_in_ring *
+			    QED_CHAIN_PAGE_SIZE / 0x80;
+	rc = qed_chain_alloc(p_hwfn->cdev,
+			     QED_CHAIN_USE_TO_CONSUME_PRODUCE,
+			     QED_CHAIN_MODE_PBL,
+			     QED_CHAIN_CNT_TYPE_U16,
+			     r2tq_num_elements, 0x80, &p_conn->r2tq);
+	if (rc)
+		goto nomem_r2tq;
+
+	uhq_num_elements = p_params->num_uhq_pages_in_ring *
+			   QED_CHAIN_PAGE_SIZE / sizeof(struct iscsi_uhqe);
+	rc = qed_chain_alloc(p_hwfn->cdev,
+			     QED_CHAIN_USE_TO_CONSUME_PRODUCE,
+			     QED_CHAIN_MODE_PBL,
+			     QED_CHAIN_CNT_TYPE_U16,
+			     uhq_num_elements,
+			     sizeof(struct iscsi_uhqe), &p_conn->uhq);
+	if (rc)
+		goto nomem_uhq;
+
+	xhq_num_elements = uhq_num_elements;
+	rc = qed_chain_alloc(p_hwfn->cdev,
+			     QED_CHAIN_USE_TO_CONSUME_PRODUCE,
+			     QED_CHAIN_MODE_PBL,
+			     QED_CHAIN_CNT_TYPE_U16,
+			     xhq_num_elements,
+			     sizeof(struct iscsi_xhqe), &p_conn->xhq);
+	if (rc)
+		goto nomem;
+
+	p_conn->free_on_delete = true;
+	*p_out_conn = p_conn;
+	return 0;
+
+nomem:
+	qed_chain_free(p_hwfn->cdev, &p_conn->uhq);
+nomem_uhq:
+	qed_chain_free(p_hwfn->cdev, &p_conn->r2tq);
+nomem_r2tq:
+	dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+			  sizeof(struct tcp_upload_params),
+			  p_conn->tcp_upload_params_virt_addr,
+			  p_conn->tcp_upload_params_phys_addr);
+nomem_upload_param:
+	dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+			  sizeof(struct scsi_terminate_extra_params),
+			  p_conn->queue_cnts_virt_addr,
+			  p_conn->queue_cnts_phys_addr);
+nomem_queue_cnts_param:
+	kfree(p_conn);
+
+	return -ENOMEM;
+}
+
+static int qed_iscsi_acquire_connection(struct qed_hwfn *p_hwfn,
+					struct qed_iscsi_conn *p_in_conn,
+					struct qed_iscsi_conn **p_out_conn)
+{
+	struct qed_iscsi_conn *p_conn = NULL;
+	int rc = 0;
+	u32 icid;
+
+	spin_lock_bh(&p_hwfn->p_iscsi_info->lock);
+	rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ISCSI, &icid);
+	spin_unlock_bh(&p_hwfn->p_iscsi_info->lock);
+	if (rc)
+		return rc;
+
+	/* Use input connection or allocate a new one */
+	if (p_in_conn)
+		p_conn = p_in_conn;
+	else
+		rc = qed_iscsi_allocate_connection(p_hwfn, &p_conn);
+
+	if (!rc)
+		rc = qed_iscsi_setup_connection(p_hwfn, p_conn);
+
+	if (rc) {
+		spin_lock_bh(&p_hwfn->p_iscsi_info->lock);
+		qed_cxt_release_cid(p_hwfn, icid);
+		spin_unlock_bh(&p_hwfn->p_iscsi_info->lock);
+		return rc;
+	}
+
+	p_conn->icid = icid;
+	p_conn->conn_id = (u16)icid;
+	p_conn->fw_cid = (p_hwfn->hw_info.opaque_fid << 16) | icid;
+
+	*p_out_conn = p_conn;
+
+	return rc;
+}
+
+static void qed_iscsi_release_connection(struct qed_hwfn *p_hwfn,
+					 struct qed_iscsi_conn *p_conn)
+{
+	spin_lock_bh(&p_hwfn->p_iscsi_info->lock);
+	list_add_tail(&p_conn->list_entry, &p_hwfn->p_iscsi_info->free_list);
+	qed_cxt_release_cid(p_hwfn, p_conn->icid);
+	spin_unlock_bh(&p_hwfn->p_iscsi_info->lock);
+}
+
+struct qed_iscsi_info *qed_iscsi_alloc(struct qed_hwfn *p_hwfn)
+{
+	struct qed_iscsi_info *p_iscsi_info;
+
+	p_iscsi_info = kzalloc(sizeof(*p_iscsi_info), GFP_KERNEL);
+	if (!p_iscsi_info)
+		return NULL;
+
+	INIT_LIST_HEAD(&p_iscsi_info->free_list);
+	return p_iscsi_info;
+}
+
+void qed_iscsi_setup(struct qed_hwfn *p_hwfn,
+		     struct qed_iscsi_info *p_iscsi_info)
+{
+	spin_lock_init(&p_iscsi_info->lock);
+}
+
+void qed_iscsi_free(struct qed_hwfn *p_hwfn,
+		    struct qed_iscsi_info *p_iscsi_info)
+{
+	kfree(p_iscsi_info);
+}
+
+static void _qed_iscsi_get_tstats(struct qed_hwfn *p_hwfn,
+				  struct qed_ptt *p_ptt,
+				  struct qed_iscsi_stats *p_stats)
+{
+	struct tstorm_iscsi_stats_drv tstats;
+	u32 tstats_addr;
+
+	memset(&tstats, 0, sizeof(tstats));
+	tstats_addr = BAR0_MAP_REG_TSDM_RAM +
+		      TSTORM_ISCSI_RX_STATS_OFFSET(p_hwfn->rel_pf_id);
+	qed_memcpy_from(p_hwfn, p_ptt, &tstats, tstats_addr, sizeof(tstats));
+
+	p_stats->iscsi_rx_bytes_cnt =
+	    HILO_64_REGPAIR(tstats.iscsi_rx_bytes_cnt);
+	p_stats->iscsi_rx_packet_cnt =
+	    HILO_64_REGPAIR(tstats.iscsi_rx_packet_cnt);
+	p_stats->iscsi_cmdq_threshold_cnt =
+	    le32_to_cpu(tstats.iscsi_cmdq_threshold_cnt);
+	p_stats->iscsi_rq_threshold_cnt =
+	    le32_to_cpu(tstats.iscsi_rq_threshold_cnt);
+	p_stats->iscsi_immq_threshold_cnt =
+	    le32_to_cpu(tstats.iscsi_immq_threshold_cnt);
+}
+
+static void _qed_iscsi_get_mstats(struct qed_hwfn *p_hwfn,
+				  struct qed_ptt *p_ptt,
+				  struct qed_iscsi_stats *p_stats)
+{
+	struct mstorm_iscsi_stats_drv mstats;
+	u32 mstats_addr;
+
+	memset(&mstats, 0, sizeof(mstats));
+	mstats_addr = BAR0_MAP_REG_MSDM_RAM +
+		      MSTORM_ISCSI_RX_STATS_OFFSET(p_hwfn->rel_pf_id);
+	qed_memcpy_from(p_hwfn, p_ptt, &mstats, mstats_addr, sizeof(mstats));
+
+	p_stats->iscsi_rx_dropped_pdus_task_not_valid =
+	    HILO_64_REGPAIR(mstats.iscsi_rx_dropped_pdus_task_not_valid);
+}
+
+static void _qed_iscsi_get_ustats(struct qed_hwfn *p_hwfn,
+				  struct qed_ptt *p_ptt,
+				  struct qed_iscsi_stats *p_stats)
+{
+	struct ustorm_iscsi_stats_drv ustats;
+	u32 ustats_addr;
+
+	memset(&ustats, 0, sizeof(ustats));
+	ustats_addr = BAR0_MAP_REG_USDM_RAM +
+		      USTORM_ISCSI_RX_STATS_OFFSET(p_hwfn->rel_pf_id);
+	qed_memcpy_from(p_hwfn, p_ptt, &ustats, ustats_addr, sizeof(ustats));
+
+	p_stats->iscsi_rx_data_pdu_cnt =
+	    HILO_64_REGPAIR(ustats.iscsi_rx_data_pdu_cnt);
+	p_stats->iscsi_rx_r2t_pdu_cnt =
+	    HILO_64_REGPAIR(ustats.iscsi_rx_r2t_pdu_cnt);
+	p_stats->iscsi_rx_total_pdu_cnt =
+	    HILO_64_REGPAIR(ustats.iscsi_rx_total_pdu_cnt);
+}
+
+static void _qed_iscsi_get_xstats(struct qed_hwfn *p_hwfn,
+				  struct qed_ptt *p_ptt,
+				  struct qed_iscsi_stats *p_stats)
+{
+	struct xstorm_iscsi_stats_drv xstats;
+	u32 xstats_addr;
+
+	memset(&xstats, 0, sizeof(xstats));
+	xstats_addr = BAR0_MAP_REG_XSDM_RAM +
+		      XSTORM_ISCSI_TX_STATS_OFFSET(p_hwfn->rel_pf_id);
+	qed_memcpy_from(p_hwfn, p_ptt, &xstats, xstats_addr, sizeof(xstats));
+
+	p_stats->iscsi_tx_go_to_slow_start_event_cnt =
+	    HILO_64_REGPAIR(xstats.iscsi_tx_go_to_slow_start_event_cnt);
+	p_stats->iscsi_tx_fast_retransmit_event_cnt =
+	    HILO_64_REGPAIR(xstats.iscsi_tx_fast_retransmit_event_cnt);
+}
+
+static void _qed_iscsi_get_ystats(struct qed_hwfn *p_hwfn,
+				  struct qed_ptt *p_ptt,
+				  struct qed_iscsi_stats *p_stats)
+{
+	struct ystorm_iscsi_stats_drv ystats;
+	u32 ystats_addr;
+
+	memset(&ystats, 0, sizeof(ystats));
+	ystats_addr = BAR0_MAP_REG_YSDM_RAM +
+		      YSTORM_ISCSI_TX_STATS_OFFSET(p_hwfn->rel_pf_id);
+	qed_memcpy_from(p_hwfn, p_ptt, &ystats, ystats_addr, sizeof(ystats));
+
+	p_stats->iscsi_tx_data_pdu_cnt =
+	    HILO_64_REGPAIR(ystats.iscsi_tx_data_pdu_cnt);
+	p_stats->iscsi_tx_r2t_pdu_cnt =
+	    HILO_64_REGPAIR(ystats.iscsi_tx_r2t_pdu_cnt);
+	p_stats->iscsi_tx_total_pdu_cnt =
+	    HILO_64_REGPAIR(ystats.iscsi_tx_total_pdu_cnt);
+}
+
+static void _qed_iscsi_get_pstats(struct qed_hwfn *p_hwfn,
+				  struct qed_ptt *p_ptt,
+				  struct qed_iscsi_stats *p_stats)
+{
+	struct pstorm_iscsi_stats_drv pstats;
+	u32 pstats_addr;
+
+	memset(&pstats, 0, sizeof(pstats));
+	pstats_addr = BAR0_MAP_REG_PSDM_RAM +
+		      PSTORM_ISCSI_TX_STATS_OFFSET(p_hwfn->rel_pf_id);
+	qed_memcpy_from(p_hwfn, p_ptt, &pstats, pstats_addr, sizeof(pstats));
+
+	p_stats->iscsi_tx_bytes_cnt =
+	    HILO_64_REGPAIR(pstats.iscsi_tx_bytes_cnt);
+	p_stats->iscsi_tx_packet_cnt =
+	    HILO_64_REGPAIR(pstats.iscsi_tx_packet_cnt);
+}
+
+static int qed_iscsi_get_stats(struct qed_hwfn *p_hwfn,
+			       struct qed_iscsi_stats *stats)
+{
+	struct qed_ptt *p_ptt;
+
+	memset(stats, 0, sizeof(*stats));
+
+	p_ptt = qed_ptt_acquire(p_hwfn);
+	if (!p_ptt) {
+		DP_ERR(p_hwfn, "Failed to acquire ptt\n");
+		return -EAGAIN;
+	}
+
+	_qed_iscsi_get_tstats(p_hwfn, p_ptt, stats);
+	_qed_iscsi_get_mstats(p_hwfn, p_ptt, stats);
+	_qed_iscsi_get_ustats(p_hwfn, p_ptt, stats);
+
+	_qed_iscsi_get_xstats(p_hwfn, p_ptt, stats);
+	_qed_iscsi_get_ystats(p_hwfn, p_ptt, stats);
+	_qed_iscsi_get_pstats(p_hwfn, p_ptt, stats);
+
+	qed_ptt_release(p_hwfn, p_ptt);
+
+	return 0;
+}
+
+struct qed_hash_iscsi_con {
+	struct hlist_node node;
+	struct qed_iscsi_conn *con;
+};
+
+static int qed_fill_iscsi_dev_info(struct qed_dev *cdev,
+				   struct qed_dev_iscsi_info *info)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+
+	int rc;
+
+	memset(info, 0, sizeof(*info));
+	rc = qed_fill_dev_info(cdev, &info->common);
+
+	info->primary_dbq_rq_addr =
+	    qed_iscsi_get_primary_bdq_prod(hwfn, BDQ_ID_RQ);
+	info->secondary_bdq_rq_addr =
+	    qed_iscsi_get_secondary_bdq_prod(hwfn, BDQ_ID_RQ);
+
+	return rc;
+}
+
+static void qed_register_iscsi_ops(struct qed_dev *cdev,
+				   struct qed_iscsi_cb_ops *ops, void *cookie)
+{
+	cdev->protocol_ops.iscsi = ops;
+	cdev->ops_cookie = cookie;
+}
+
+static struct qed_hash_iscsi_con *qed_iscsi_get_hash(struct qed_dev *cdev,
+						     u32 handle)
+{
+	struct qed_hash_iscsi_con *hash_con = NULL;
+
+	if (!(cdev->flags & QED_FLAG_STORAGE_STARTED))
+		return NULL;
+
+	hash_for_each_possible(cdev->connections, hash_con, node, handle) {
+		if (hash_con->con->icid == handle)
+			break;
+	}
+
+	if (!hash_con || (hash_con->con->icid != handle))
+		return NULL;
+
+	return hash_con;
+}
+
+static int qed_iscsi_stop(struct qed_dev *cdev)
+{
+	int rc;
+
+	if (!(cdev->flags & QED_FLAG_STORAGE_STARTED)) {
+		DP_NOTICE(cdev, "iscsi already stopped\n");
+		return 0;
+	}
+
+	if (!hash_empty(cdev->connections)) {
+		DP_NOTICE(cdev,
+			  "Can't stop iscsi - not all connections were returned\n");
+		return -EINVAL;
+	}
+
+	/* Stop the iscsi */
+	rc = qed_sp_iscsi_func_stop(QED_LEADING_HWFN(cdev),
+				    QED_SPQ_MODE_EBLOCK, NULL);
+	cdev->flags &= ~QED_FLAG_STORAGE_STARTED;
+
+	return rc;
+}
+
+static int qed_iscsi_start(struct qed_dev *cdev,
+			   struct qed_iscsi_tid *tasks,
+			   void *event_context,
+			   iscsi_event_cb_t async_event_cb)
+{
+	int rc;
+	struct qed_tid_mem *tid_info;
+
+	if (cdev->flags & QED_FLAG_STORAGE_STARTED) {
+		DP_NOTICE(cdev, "iscsi already started;\n");
+		return 0;
+	}
+
+	rc = qed_sp_iscsi_func_start(QED_LEADING_HWFN(cdev),
+				     QED_SPQ_MODE_EBLOCK, NULL, event_context,
+				     async_event_cb);
+	if (rc) {
+		DP_NOTICE(cdev, "Failed to start iscsi\n");
+		return rc;
+	}
+
+	cdev->flags |= QED_FLAG_STORAGE_STARTED;
+	hash_init(cdev->connections);
+
+	if (!tasks)
+		return 0;
+
+	tid_info = kzalloc(sizeof(*tid_info), GFP_KERNEL);
+
+	if (!tid_info) {
+		qed_iscsi_stop(cdev);
+		return -ENOMEM;
+	}
+
+	rc = qed_cxt_get_tid_mem_info(QED_LEADING_HWFN(cdev),
+				      tid_info);
+	if (rc) {
+		DP_NOTICE(cdev, "Failed to gather task information\n");
+		qed_iscsi_stop(cdev);
+		kfree(tid_info);
+		return rc;
+	}
+
+	/* Fill task information */
+	tasks->size = tid_info->tid_size;
+	tasks->num_tids_per_block = tid_info->num_tids_per_block;
+	memcpy(tasks->blocks, tid_info->blocks,
+	       MAX_TID_BLOCKS_ISCSI * sizeof(u8 *));
+
+	kfree(tid_info);
+
+	return 0;
+}
+
+static int qed_iscsi_acquire_conn(struct qed_dev *cdev,
+				  u32 *handle,
+				  u32 *fw_cid, void __iomem **p_doorbell)
+{
+	struct qed_hash_iscsi_con *hash_con;
+	int rc;
+
+	/* Allocate a hashed connection */
+	hash_con = kzalloc(sizeof(*hash_con), GFP_ATOMIC);
+	if (!hash_con)
+		return -ENOMEM;
+
+	/* Acquire the connection */
+	rc = qed_iscsi_acquire_connection(QED_LEADING_HWFN(cdev), NULL,
+					  &hash_con->con);
+	if (rc) {
+		DP_NOTICE(cdev, "Failed to acquire Connection\n");
+		kfree(hash_con);
+		return rc;
+	}
+
+	/* Added the connection to hash table */
+	*handle = hash_con->con->icid;
+	*fw_cid = hash_con->con->fw_cid;
+	hash_add(cdev->connections, &hash_con->node, *handle);
+
+	if (p_doorbell)
+		*p_doorbell = qed_iscsi_get_db_addr(QED_LEADING_HWFN(cdev),
+						    *handle);
+
+	return 0;
+}
+
+static int qed_iscsi_release_conn(struct qed_dev *cdev, u32 handle)
+{
+	struct qed_hash_iscsi_con *hash_con;
+
+	hash_con = qed_iscsi_get_hash(cdev, handle);
+	if (!hash_con) {
+		DP_NOTICE(cdev, "Failed to find connection for handle %d\n",
+			  handle);
+		return -EINVAL;
+	}
+
+	hlist_del(&hash_con->node);
+	qed_iscsi_release_connection(QED_LEADING_HWFN(cdev), hash_con->con);
+	kfree(hash_con);
+
+	return 0;
+}
+
+static int qed_iscsi_offload_conn(struct qed_dev *cdev,
+				  u32 handle,
+				  struct qed_iscsi_params_offload *conn_info)
+{
+	struct qed_hash_iscsi_con *hash_con;
+	struct qed_iscsi_conn *con;
+
+	hash_con = qed_iscsi_get_hash(cdev, handle);
+	if (!hash_con) {
+		DP_NOTICE(cdev, "Failed to find connection for handle %d\n",
+			  handle);
+		return -EINVAL;
+	}
+
+	/* Update the connection with information from the params */
+	con = hash_con->con;
+
+	ether_addr_copy(con->local_mac, conn_info->src.mac);
+	ether_addr_copy(con->remote_mac, conn_info->dst.mac);
+	memcpy(con->local_ip, conn_info->src.ip, sizeof(con->local_ip));
+	memcpy(con->remote_ip, conn_info->dst.ip, sizeof(con->remote_ip));
+	con->local_port = conn_info->src.port;
+	con->remote_port = conn_info->dst.port;
+
+	con->layer_code = conn_info->layer_code;
+	con->sq_pbl_addr = conn_info->sq_pbl_addr;
+	con->initial_ack = conn_info->initial_ack;
+	con->vlan_id = conn_info->vlan_id;
+	con->tcp_flags = conn_info->tcp_flags;
+	con->ip_version = conn_info->ip_version;
+	con->default_cq = conn_info->default_cq;
+	con->ka_max_probe_cnt = conn_info->ka_max_probe_cnt;
+	con->dup_ack_theshold = conn_info->dup_ack_theshold;
+	con->rcv_next = conn_info->rcv_next;
+	con->snd_una = conn_info->snd_una;
+	con->snd_next = conn_info->snd_next;
+	con->snd_max = conn_info->snd_max;
+	con->snd_wnd = conn_info->snd_wnd;
+	con->rcv_wnd = conn_info->rcv_wnd;
+	con->snd_wl1 = conn_info->snd_wl1;
+	con->cwnd = conn_info->cwnd;
+	con->ss_thresh = conn_info->ss_thresh;
+	con->srtt = conn_info->srtt;
+	con->rtt_var = conn_info->rtt_var;
+	con->ts_time = conn_info->ts_time;
+	con->ts_recent = conn_info->ts_recent;
+	con->ts_recent_age = conn_info->ts_recent_age;
+	con->total_rt = conn_info->total_rt;
+	con->ka_timeout_delta = conn_info->ka_timeout_delta;
+	con->rt_timeout_delta = conn_info->rt_timeout_delta;
+	con->dup_ack_cnt = conn_info->dup_ack_cnt;
+	con->snd_wnd_probe_cnt = conn_info->snd_wnd_probe_cnt;
+	con->ka_probe_cnt = conn_info->ka_probe_cnt;
+	con->rt_cnt = conn_info->rt_cnt;
+	con->flow_label = conn_info->flow_label;
+	con->ka_timeout = conn_info->ka_timeout;
+	con->ka_interval = conn_info->ka_interval;
+	con->max_rt_time = conn_info->max_rt_time;
+	con->initial_rcv_wnd = conn_info->initial_rcv_wnd;
+	con->ttl = conn_info->ttl;
+	con->tos_or_tc = conn_info->tos_or_tc;
+	con->remote_port = conn_info->remote_port;
+	con->local_port = conn_info->local_port;
+	con->mss = conn_info->mss;
+	con->snd_wnd_scale = conn_info->snd_wnd_scale;
+	con->rcv_wnd_scale = conn_info->rcv_wnd_scale;
+	con->ts_ticks_per_second = conn_info->ts_ticks_per_second;
+	con->da_timeout_value = conn_info->da_timeout_value;
+	con->ack_frequency = conn_info->ack_frequency;
+
+	/* Set default values on other connection fields */
+	con->offl_flags = 0x1;
+
+	return qed_sp_iscsi_conn_offload(QED_LEADING_HWFN(cdev), con,
+					 QED_SPQ_MODE_EBLOCK, NULL);
+}
+
+static int qed_iscsi_update_conn(struct qed_dev *cdev,
+				 u32 handle,
+				 struct qed_iscsi_params_update *conn_info)
+{
+	struct qed_hash_iscsi_con *hash_con;
+	struct qed_iscsi_conn *con;
+
+	hash_con = qed_iscsi_get_hash(cdev, handle);
+	if (!hash_con) {
+		DP_NOTICE(cdev, "Failed to find connection for handle %d\n",
+			  handle);
+		return -EINVAL;
+	}
+
+	/* Update the connection with information from the params */
+	con = hash_con->con;
+	con->update_flag = conn_info->update_flag;
+	con->max_seq_size = conn_info->max_seq_size;
+	con->max_recv_pdu_length = conn_info->max_recv_pdu_length;
+	con->max_send_pdu_length = conn_info->max_send_pdu_length;
+	con->first_seq_length = conn_info->first_seq_length;
+	con->exp_stat_sn = conn_info->exp_stat_sn;
+
+	return qed_sp_iscsi_conn_update(QED_LEADING_HWFN(cdev), con,
+					QED_SPQ_MODE_EBLOCK, NULL);
+}
+
+static int qed_iscsi_clear_conn_sq(struct qed_dev *cdev, u32 handle)
+{
+	struct qed_hash_iscsi_con *hash_con;
+
+	hash_con = qed_iscsi_get_hash(cdev, handle);
+	if (!hash_con) {
+		DP_NOTICE(cdev, "Failed to find connection for handle %d\n",
+			  handle);
+		return -EINVAL;
+	}
+
+	return qed_sp_iscsi_conn_clear_sq(QED_LEADING_HWFN(cdev),
+					  hash_con->con,
+					  QED_SPQ_MODE_EBLOCK, NULL);
+}
+
+static int qed_iscsi_destroy_conn(struct qed_dev *cdev,
+				  u32 handle, u8 abrt_conn)
+{
+	struct qed_hash_iscsi_con *hash_con;
+
+	hash_con = qed_iscsi_get_hash(cdev, handle);
+	if (!hash_con) {
+		DP_NOTICE(cdev, "Failed to find connection for handle %d\n",
+			  handle);
+		return -EINVAL;
+	}
+
+	hash_con->con->abortive_dsconnect = abrt_conn;
+
+	return qed_sp_iscsi_conn_terminate(QED_LEADING_HWFN(cdev),
+					   hash_con->con,
+					   QED_SPQ_MODE_EBLOCK, NULL);
+}
+
+static int qed_iscsi_stats(struct qed_dev *cdev, struct qed_iscsi_stats *stats)
+{
+	return qed_iscsi_get_stats(QED_LEADING_HWFN(cdev), stats);
+}
+
+static const struct qed_iscsi_ops qed_iscsi_ops_pass = {
+	.common = &qed_common_ops_pass,
+	.ll2 = &qed_ll2_ops_pass,
+	.fill_dev_info = &qed_fill_iscsi_dev_info,
+	.register_ops = &qed_register_iscsi_ops,
+	.start = &qed_iscsi_start,
+	.stop = &qed_iscsi_stop,
+	.acquire_conn = &qed_iscsi_acquire_conn,
+	.release_conn = &qed_iscsi_release_conn,
+	.offload_conn = &qed_iscsi_offload_conn,
+	.update_conn = &qed_iscsi_update_conn,
+	.destroy_conn = &qed_iscsi_destroy_conn,
+	.clear_sq = &qed_iscsi_clear_conn_sq,
+	.get_stats = &qed_iscsi_stats,
+};
+
+const struct qed_iscsi_ops *qed_get_iscsi_ops()
+{
+	return &qed_iscsi_ops_pass;
+}
+EXPORT_SYMBOL(qed_get_iscsi_ops);
+
+void qed_put_iscsi_ops(void)
+{
+}
+EXPORT_SYMBOL(qed_put_iscsi_ops);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_iscsi.h b/drivers/net/ethernet/qlogic/qed/qed_iscsi.h
new file mode 100644
index 000000000000..67c25f3db4d5
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_iscsi.h
@@ -0,0 +1,52 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef _QED_ISCSI_H
+#define _QED_ISCSI_H
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/qed/tcp_common.h>
+#include <linux/qed/qed_iscsi_if.h>
+#include <linux/qed/qed_chain.h>
+#include "qed.h"
+#include "qed_hsi.h"
+#include "qed_mcp.h"
+#include "qed_sp.h"
+
+struct qed_iscsi_info {
+	spinlock_t lock; /* Connection resources. */
+	struct list_head free_list;
+	u16 max_num_outstanding_tasks;
+	void *event_context;
+	iscsi_event_cb_t event_cb;
+};
+
+#ifdef CONFIG_QED_LL2
+extern const struct qed_ll2_ops qed_ll2_ops_pass;
+#endif
+
+#if IS_ENABLED(CONFIG_QED_ISCSI)
+struct qed_iscsi_info *qed_iscsi_alloc(struct qed_hwfn *p_hwfn);
+
+void qed_iscsi_setup(struct qed_hwfn *p_hwfn,
+		     struct qed_iscsi_info *p_iscsi_info);
+
+void qed_iscsi_free(struct qed_hwfn *p_hwfn,
+		    struct qed_iscsi_info *p_iscsi_info);
+#else /* IS_ENABLED(CONFIG_QED_ISCSI) */
+static inline struct qed_iscsi_info *qed_iscsi_alloc(
+		struct qed_hwfn *p_hwfn) { return NULL; }
+static inline void qed_iscsi_setup(struct qed_hwfn *p_hwfn,
+				   struct qed_iscsi_info *p_iscsi_info) {}
+static inline void qed_iscsi_free(struct qed_hwfn *p_hwfn,
+				  struct qed_iscsi_info *p_iscsi_info) {}
+#endif /* IS_ENABLED(CONFIG_QED_ISCSI) */
+
+#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index ddd410a91e13..6a3727c4c0c6 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -23,6 +23,7 @@
 #include <linux/workqueue.h>
 #include <linux/bitops.h>
 #include <linux/bug.h>
+#include <linux/vmalloc.h>
 #include "qed.h"
 #include <linux/qed/qed_chain.h>
 #include "qed_cxt.h"
@@ -41,6 +42,124 @@
 #define QED_MAX_SGES_NUM 16
 #define CRC32_POLY 0x1edc6f41
 
+void qed_eth_queue_cid_release(struct qed_hwfn *p_hwfn,
+			       struct qed_queue_cid *p_cid)
+{
+	/* VFs' CIDs are 0-based in PF-view, and uninitialized on VF */
+	if (!p_cid->is_vf && IS_PF(p_hwfn->cdev))
+		qed_cxt_release_cid(p_hwfn, p_cid->cid);
+	vfree(p_cid);
+}
+
+/* The internal is only meant to be directly called by PFs initializeing CIDs
+ * for their VFs.
+ */
+struct qed_queue_cid *
+_qed_eth_queue_to_cid(struct qed_hwfn *p_hwfn,
+		      u16 opaque_fid,
+		      u32 cid,
+		      u8 vf_qid,
+		      struct qed_queue_start_common_params *p_params)
+{
+	bool b_is_same = (p_hwfn->hw_info.opaque_fid == opaque_fid);
+	struct qed_queue_cid *p_cid;
+	int rc;
+
+	p_cid = vmalloc(sizeof(*p_cid));
+	if (!p_cid)
+		return NULL;
+	memset(p_cid, 0, sizeof(*p_cid));
+
+	p_cid->opaque_fid = opaque_fid;
+	p_cid->cid = cid;
+	p_cid->vf_qid = vf_qid;
+	p_cid->rel = *p_params;
+
+	/* Don't try calculating the absolute indices for VFs */
+	if (IS_VF(p_hwfn->cdev)) {
+		p_cid->abs = p_cid->rel;
+		goto out;
+	}
+
+	/* Calculate the engine-absolute indices of the resources.
+	 * This would guarantee they're valid later on.
+	 * In some cases [SBs] we already have the right values.
+	 */
+	rc = qed_fw_vport(p_hwfn, p_cid->rel.vport_id, &p_cid->abs.vport_id);
+	if (rc)
+		goto fail;
+
+	rc = qed_fw_l2_queue(p_hwfn, p_cid->rel.queue_id, &p_cid->abs.queue_id);
+	if (rc)
+		goto fail;
+
+	/* In case of a PF configuring its VF's queues, the stats-id is already
+	 * absolute [since there's a single index that's suitable per-VF].
+	 */
+	if (b_is_same) {
+		rc = qed_fw_vport(p_hwfn, p_cid->rel.stats_id,
+				  &p_cid->abs.stats_id);
+		if (rc)
+			goto fail;
+	} else {
+		p_cid->abs.stats_id = p_cid->rel.stats_id;
+	}
+
+	/* SBs relevant information was already provided as absolute */
+	p_cid->abs.sb = p_cid->rel.sb;
+	p_cid->abs.sb_idx = p_cid->rel.sb_idx;
+
+	/* This is tricky - we're actually interested in whehter this is a PF
+	 * entry meant for the VF.
+	 */
+	if (!b_is_same)
+		p_cid->is_vf = true;
+out:
+	DP_VERBOSE(p_hwfn,
+		   QED_MSG_SP,
+		   "opaque_fid: %04x CID %08x vport %02x [%02x] qzone %04x [%04x] stats %02x [%02x] SB %04x PI %02x\n",
+		   p_cid->opaque_fid,
+		   p_cid->cid,
+		   p_cid->rel.vport_id,
+		   p_cid->abs.vport_id,
+		   p_cid->rel.queue_id,
+		   p_cid->abs.queue_id,
+		   p_cid->rel.stats_id,
+		   p_cid->abs.stats_id, p_cid->abs.sb, p_cid->abs.sb_idx);
+
+	return p_cid;
+
+fail:
+	vfree(p_cid);
+	return NULL;
+}
+
+static struct qed_queue_cid *qed_eth_queue_to_cid(struct qed_hwfn *p_hwfn,
+						  u16 opaque_fid, struct
+						  qed_queue_start_common_params
+						  *p_params)
+{
+	struct qed_queue_cid *p_cid;
+	u32 cid = 0;
+
+	/* Get a unique firmware CID for this queue, in case it's a PF.
+	 * VF's don't need a CID as the queue configuration will be done
+	 * by PF.
+	 */
+	if (IS_PF(p_hwfn->cdev)) {
+		if (qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ETH, &cid)) {
+			DP_NOTICE(p_hwfn, "Failed to acquire cid\n");
+			return NULL;
+		}
+	}
+
+	p_cid = _qed_eth_queue_to_cid(p_hwfn, opaque_fid, cid, 0, p_params);
+	if (!p_cid && IS_PF(p_hwfn->cdev))
+		qed_cxt_release_cid(p_hwfn, cid);
+
+	return p_cid;
+}
+
 int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn,
 			   struct qed_sp_vport_start_params *p_params)
 {
@@ -496,61 +615,26 @@ static int qed_filter_accept_cmd(struct qed_dev *cdev,
 	return 0;
 }
 
-static int qed_sp_release_queue_cid(
-	struct qed_hwfn *p_hwfn,
-	struct qed_hw_cid_data *p_cid_data)
-{
-	if (!p_cid_data->b_cid_allocated)
-		return 0;
-
-	qed_cxt_release_cid(p_hwfn, p_cid_data->cid);
-
-	p_cid_data->b_cid_allocated = false;
-
-	return 0;
-}
-
-int qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
-				u16 opaque_fid,
-				u32 cid,
-				struct qed_queue_start_common_params *p_params,
-				u8 stats_id,
-				u16 bd_max_bytes,
-				dma_addr_t bd_chain_phys_addr,
-				dma_addr_t cqe_pbl_addr,
-				u16 cqe_pbl_size, bool b_use_zone_a_prod)
+int qed_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
+			     struct qed_queue_cid *p_cid,
+			     u16 bd_max_bytes,
+			     dma_addr_t bd_chain_phys_addr,
+			     dma_addr_t cqe_pbl_addr, u16 cqe_pbl_size)
 {
 	struct rx_queue_start_ramrod_data *p_ramrod = NULL;
 	struct qed_spq_entry *p_ent = NULL;
 	struct qed_sp_init_data init_data;
-	struct qed_hw_cid_data *p_rx_cid;
-	u16 abs_rx_q_id = 0;
-	u8 abs_vport_id = 0;
 	int rc = -EINVAL;
 
-	/* Store information for the stop */
-	p_rx_cid = &p_hwfn->p_rx_cids[p_params->queue_id];
-	p_rx_cid->cid = cid;
-	p_rx_cid->opaque_fid = opaque_fid;
-	p_rx_cid->vport_id = p_params->vport_id;
-
-	rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_vport_id);
-	if (rc)
-		return rc;
-
-	rc = qed_fw_l2_queue(p_hwfn, p_params->queue_id, &abs_rx_q_id);
-	if (rc)
-		return rc;
-
 	DP_VERBOSE(p_hwfn, QED_MSG_SP,
-		   "opaque_fid=0x%x, cid=0x%x, rx_qid=0x%x, vport_id=0x%x, sb_id=0x%x\n",
-		   opaque_fid,
-		   cid, p_params->queue_id, p_params->vport_id, p_params->sb);
+		   "opaque_fid=0x%x, cid=0x%x, rx_qzone=0x%x, vport_id=0x%x, sb_id=0x%x\n",
+		   p_cid->opaque_fid, p_cid->cid,
+		   p_cid->abs.queue_id, p_cid->abs.vport_id, p_cid->abs.sb);
 
 	/* Get SPQ entry */
 	memset(&init_data, 0, sizeof(init_data));
-	init_data.cid = cid;
-	init_data.opaque_fid = opaque_fid;
+	init_data.cid = p_cid->cid;
+	init_data.opaque_fid = p_cid->opaque_fid;
 	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
 
 	rc = qed_sp_init_request(p_hwfn, &p_ent,
@@ -561,11 +645,11 @@ int qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
 
 	p_ramrod = &p_ent->ramrod.rx_queue_start;
 
-	p_ramrod->sb_id = cpu_to_le16(p_params->sb);
-	p_ramrod->sb_index = p_params->sb_idx;
-	p_ramrod->vport_id = abs_vport_id;
-	p_ramrod->stats_counter_id = stats_id;
-	p_ramrod->rx_queue_id = cpu_to_le16(abs_rx_q_id);
+	p_ramrod->sb_id = cpu_to_le16(p_cid->abs.sb);
+	p_ramrod->sb_index = p_cid->abs.sb_idx;
+	p_ramrod->vport_id = p_cid->abs.vport_id;
+	p_ramrod->stats_counter_id = p_cid->abs.stats_id;
+	p_ramrod->rx_queue_id = cpu_to_le16(p_cid->abs.queue_id);
 	p_ramrod->complete_cqe_flg = 0;
 	p_ramrod->complete_event_flg = 1;
 
@@ -575,85 +659,85 @@ int qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
 	p_ramrod->num_of_pbl_pages = cpu_to_le16(cqe_pbl_size);
 	DMA_REGPAIR_LE(p_ramrod->cqe_pbl_addr, cqe_pbl_addr);
 
-	if (p_params->vf_qid || b_use_zone_a_prod) {
-		p_ramrod->vf_rx_prod_index = p_params->vf_qid;
+	if (p_cid->is_vf) {
+		p_ramrod->vf_rx_prod_index = p_cid->vf_qid;
 		DP_VERBOSE(p_hwfn, QED_MSG_SP,
 			   "Queue%s is meant for VF rxq[%02x]\n",
-			   b_use_zone_a_prod ? " [legacy]" : "",
-			   p_params->vf_qid);
-		p_ramrod->vf_rx_prod_use_zone_a = b_use_zone_a_prod;
+			   !!p_cid->b_legacy_vf ? " [legacy]" : "",
+			   p_cid->vf_qid);
+		p_ramrod->vf_rx_prod_use_zone_a = !!p_cid->b_legacy_vf;
 	}
 
 	return qed_spq_post(p_hwfn, p_ent, NULL);
 }
 
 static int
-qed_sp_eth_rx_queue_start(struct qed_hwfn *p_hwfn,
-			  u16 opaque_fid,
-			  struct qed_queue_start_common_params *p_params,
+qed_eth_pf_rx_queue_start(struct qed_hwfn *p_hwfn,
+			  struct qed_queue_cid *p_cid,
 			  u16 bd_max_bytes,
 			  dma_addr_t bd_chain_phys_addr,
 			  dma_addr_t cqe_pbl_addr,
 			  u16 cqe_pbl_size, void __iomem **pp_prod)
 {
-	struct qed_hw_cid_data *p_rx_cid;
 	u32 init_prod_val = 0;
-	u16 abs_l2_queue = 0;
-	u8 abs_stats_id = 0;
-	int rc;
 
-	if (IS_VF(p_hwfn->cdev)) {
-		return qed_vf_pf_rxq_start(p_hwfn,
-					   p_params->queue_id,
-					   p_params->sb,
-					   (u8)p_params->sb_idx,
-					   bd_max_bytes,
-					   bd_chain_phys_addr,
-					   cqe_pbl_addr, cqe_pbl_size, pp_prod);
-	}
-
-	rc = qed_fw_l2_queue(p_hwfn, p_params->queue_id, &abs_l2_queue);
-	if (rc)
-		return rc;
-
-	rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_stats_id);
-	if (rc)
-		return rc;
-
-	*pp_prod = (u8 __iomem *)p_hwfn->regview +
-				 GTT_BAR0_MAP_REG_MSDM_RAM +
-				 MSTORM_ETH_PF_PRODS_OFFSET(abs_l2_queue);
+	*pp_prod = p_hwfn->regview +
+		   GTT_BAR0_MAP_REG_MSDM_RAM +
+		    MSTORM_ETH_PF_PRODS_OFFSET(p_cid->abs.queue_id);
 
 	/* Init the rcq, rx bd and rx sge (if valid) producers to 0 */
 	__internal_ram_wr(p_hwfn, *pp_prod, sizeof(u32),
 			  (u32 *)(&init_prod_val));
 
+	return qed_eth_rxq_start_ramrod(p_hwfn, p_cid,
+					bd_max_bytes,
+					bd_chain_phys_addr,
+					cqe_pbl_addr, cqe_pbl_size);
+}
+
+static int
+qed_eth_rx_queue_start(struct qed_hwfn *p_hwfn,
+		       u16 opaque_fid,
+		       struct qed_queue_start_common_params *p_params,
+		       u16 bd_max_bytes,
+		       dma_addr_t bd_chain_phys_addr,
+		       dma_addr_t cqe_pbl_addr,
+		       u16 cqe_pbl_size,
+		       struct qed_rxq_start_ret_params *p_ret_params)
+{
+	struct qed_queue_cid *p_cid;
+	int rc;
+
 	/* Allocate a CID for the queue */
-	p_rx_cid = &p_hwfn->p_rx_cids[p_params->queue_id];
-	rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ETH, &p_rx_cid->cid);
-	if (rc) {
-		DP_NOTICE(p_hwfn, "Failed to acquire cid\n");
-		return rc;
-	}
-	p_rx_cid->b_cid_allocated = true;
+	p_cid = qed_eth_queue_to_cid(p_hwfn, opaque_fid, p_params);
+	if (!p_cid)
+		return -ENOMEM;
 
-	rc = qed_sp_eth_rxq_start_ramrod(p_hwfn,
-					 opaque_fid,
-					 p_rx_cid->cid,
-					 p_params,
-					 abs_stats_id,
+	if (IS_PF(p_hwfn->cdev)) {
+		rc = qed_eth_pf_rx_queue_start(p_hwfn, p_cid,
+					       bd_max_bytes,
+					       bd_chain_phys_addr,
+					       cqe_pbl_addr, cqe_pbl_size,
+					       &p_ret_params->p_prod);
+	} else {
+		rc = qed_vf_pf_rxq_start(p_hwfn, p_cid,
 					 bd_max_bytes,
 					 bd_chain_phys_addr,
-					 cqe_pbl_addr, cqe_pbl_size, false);
+					 cqe_pbl_addr,
+					 cqe_pbl_size, &p_ret_params->p_prod);
+	}
 
+	/* Provide the caller with a reference to as handler */
 	if (rc)
-		qed_sp_release_queue_cid(p_hwfn, p_rx_cid);
+		qed_eth_queue_cid_release(p_hwfn, p_cid);
+	else
+		p_ret_params->p_handle = (void *)p_cid;
 
 	return rc;
 }
 
 int qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn,
-				u16 rx_queue_id,
+				void **pp_rxq_handles,
 				u8 num_rxqs,
 				u8 complete_cqe_flg,
 				u8 complete_event_flg,
@@ -663,8 +747,7 @@ int qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn,
 	struct rx_queue_update_ramrod_data *p_ramrod = NULL;
 	struct qed_spq_entry *p_ent = NULL;
 	struct qed_sp_init_data init_data;
-	struct qed_hw_cid_data *p_rx_cid;
-	u16 qid, abs_rx_q_id = 0;
+	struct qed_queue_cid *p_cid;
 	int rc = -EINVAL;
 	u8 i;
 
@@ -673,12 +756,11 @@ int qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn,
 	init_data.p_comp_data = p_comp_data;
 
 	for (i = 0; i < num_rxqs; i++) {
-		qid = rx_queue_id + i;
-		p_rx_cid = &p_hwfn->p_rx_cids[qid];
+		p_cid = ((struct qed_queue_cid **)pp_rxq_handles)[i];
 
 		/* Get SPQ entry */
-		init_data.cid = p_rx_cid->cid;
-		init_data.opaque_fid = p_rx_cid->opaque_fid;
+		init_data.cid = p_cid->cid;
+		init_data.opaque_fid = p_cid->opaque_fid;
 
 		rc = qed_sp_init_request(p_hwfn, &p_ent,
 					 ETH_RAMROD_RX_QUEUE_UPDATE,
@@ -687,10 +769,9 @@ int qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn,
 			return rc;
 
 		p_ramrod = &p_ent->ramrod.rx_queue_update;
+		p_ramrod->vport_id = p_cid->abs.vport_id;
 
-		qed_fw_vport(p_hwfn, p_rx_cid->vport_id, &p_ramrod->vport_id);
-		qed_fw_l2_queue(p_hwfn, qid, &abs_rx_q_id);
-		p_ramrod->rx_queue_id = cpu_to_le16(abs_rx_q_id);
+		p_ramrod->rx_queue_id = cpu_to_le16(p_cid->abs.queue_id);
 		p_ramrod->complete_cqe_flg = complete_cqe_flg;
 		p_ramrod->complete_event_flg = complete_event_flg;
 
@@ -702,24 +783,19 @@ int qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn,
 	return rc;
 }
 
-int qed_sp_eth_rx_queue_stop(struct qed_hwfn *p_hwfn,
-			     u16 rx_queue_id,
-			     bool eq_completion_only, bool cqe_completion)
+static int
+qed_eth_pf_rx_queue_stop(struct qed_hwfn *p_hwfn,
+			 struct qed_queue_cid *p_cid,
+			 bool b_eq_completion_only, bool b_cqe_completion)
 {
-	struct qed_hw_cid_data *p_rx_cid = &p_hwfn->p_rx_cids[rx_queue_id];
 	struct rx_queue_stop_ramrod_data *p_ramrod = NULL;
 	struct qed_spq_entry *p_ent = NULL;
 	struct qed_sp_init_data init_data;
-	u16 abs_rx_q_id = 0;
-	int rc = -EINVAL;
-
-	if (IS_VF(p_hwfn->cdev))
-		return qed_vf_pf_rxq_stop(p_hwfn, rx_queue_id, cqe_completion);
+	int rc;
 
-	/* Get SPQ entry */
 	memset(&init_data, 0, sizeof(init_data));
-	init_data.cid = p_rx_cid->cid;
-	init_data.opaque_fid = p_rx_cid->opaque_fid;
+	init_data.cid = p_cid->cid;
+	init_data.opaque_fid = p_cid->opaque_fid;
 	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
 
 	rc = qed_sp_init_request(p_hwfn, &p_ent,
@@ -729,62 +805,53 @@ int qed_sp_eth_rx_queue_stop(struct qed_hwfn *p_hwfn,
 		return rc;
 
 	p_ramrod = &p_ent->ramrod.rx_queue_stop;
-
-	qed_fw_vport(p_hwfn, p_rx_cid->vport_id, &p_ramrod->vport_id);
-	qed_fw_l2_queue(p_hwfn, rx_queue_id, &abs_rx_q_id);
-	p_ramrod->rx_queue_id = cpu_to_le16(abs_rx_q_id);
+	p_ramrod->vport_id = p_cid->abs.vport_id;
+	p_ramrod->rx_queue_id = cpu_to_le16(p_cid->abs.queue_id);
 
 	/* Cleaning the queue requires the completion to arrive there.
 	 * In addition, VFs require the answer to come as eqe to PF.
 	 */
-	p_ramrod->complete_cqe_flg =
-		(!!(p_rx_cid->opaque_fid == p_hwfn->hw_info.opaque_fid) &&
-		 !eq_completion_only) || cqe_completion;
-	p_ramrod->complete_event_flg =
-		!(p_rx_cid->opaque_fid == p_hwfn->hw_info.opaque_fid) ||
-		eq_completion_only;
+	p_ramrod->complete_cqe_flg = (!p_cid->is_vf &&
+				      !b_eq_completion_only) ||
+				     b_cqe_completion;
+	p_ramrod->complete_event_flg = p_cid->is_vf || b_eq_completion_only;
 
-	rc = qed_spq_post(p_hwfn, p_ent, NULL);
-	if (rc)
-		return rc;
+	return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+int qed_eth_rx_queue_stop(struct qed_hwfn *p_hwfn,
+			  void *p_rxq,
+			  bool eq_completion_only, bool cqe_completion)
+{
+	struct qed_queue_cid *p_cid = (struct qed_queue_cid *)p_rxq;
+	int rc = -EINVAL;
+
+	if (IS_PF(p_hwfn->cdev))
+		rc = qed_eth_pf_rx_queue_stop(p_hwfn, p_cid,
+					      eq_completion_only,
+					      cqe_completion);
+	else
+		rc = qed_vf_pf_rxq_stop(p_hwfn, p_cid, cqe_completion);
 
-	return qed_sp_release_queue_cid(p_hwfn, p_rx_cid);
+	if (!rc)
+		qed_eth_queue_cid_release(p_hwfn, p_cid);
+	return rc;
 }
 
-int qed_sp_eth_txq_start_ramrod(struct qed_hwfn  *p_hwfn,
-				u16  opaque_fid,
-				u32  cid,
-				struct qed_queue_start_common_params *p_params,
-				u8  stats_id,
-				dma_addr_t pbl_addr,
-				u16 pbl_size,
-				union qed_qm_pq_params *p_pq_params)
+int
+qed_eth_txq_start_ramrod(struct qed_hwfn *p_hwfn,
+			 struct qed_queue_cid *p_cid,
+			 dma_addr_t pbl_addr, u16 pbl_size, u16 pq_id)
 {
 	struct tx_queue_start_ramrod_data *p_ramrod = NULL;
 	struct qed_spq_entry *p_ent = NULL;
 	struct qed_sp_init_data init_data;
-	struct qed_hw_cid_data *p_tx_cid;
-	u16 pq_id, abs_tx_q_id = 0;
 	int rc = -EINVAL;
-	u8 abs_vport_id;
-
-	/* Store information for the stop */
-	p_tx_cid = &p_hwfn->p_tx_cids[p_params->queue_id];
-	p_tx_cid->cid		= cid;
-	p_tx_cid->opaque_fid	= opaque_fid;
-
-	rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_vport_id);
-	if (rc)
-		return rc;
-
-	rc = qed_fw_l2_queue(p_hwfn, p_params->queue_id, &abs_tx_q_id);
-	if (rc)
-		return rc;
 
 	/* Get SPQ entry */
 	memset(&init_data, 0, sizeof(init_data));
-	init_data.cid = cid;
-	init_data.opaque_fid = opaque_fid;
+	init_data.cid = p_cid->cid;
+	init_data.opaque_fid = p_cid->opaque_fid;
 	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
 
 	rc = qed_sp_init_request(p_hwfn, &p_ent,
@@ -794,96 +861,92 @@ int qed_sp_eth_txq_start_ramrod(struct qed_hwfn  *p_hwfn,
 		return rc;
 
 	p_ramrod = &p_ent->ramrod.tx_queue_start;
-	p_ramrod->vport_id = abs_vport_id;
+	p_ramrod->vport_id = p_cid->abs.vport_id;
 
-	p_ramrod->sb_id = cpu_to_le16(p_params->sb);
-	p_ramrod->sb_index = p_params->sb_idx;
-	p_ramrod->stats_counter_id = stats_id;
+	p_ramrod->sb_id = cpu_to_le16(p_cid->abs.sb);
+	p_ramrod->sb_index = p_cid->abs.sb_idx;
+	p_ramrod->stats_counter_id = p_cid->abs.stats_id;
 
-	p_ramrod->queue_zone_id = cpu_to_le16(abs_tx_q_id);
+	p_ramrod->queue_zone_id = cpu_to_le16(p_cid->abs.queue_id);
+	p_ramrod->same_as_last_id = cpu_to_le16(p_cid->abs.queue_id);
 
 	p_ramrod->pbl_size = cpu_to_le16(pbl_size);
 	DMA_REGPAIR_LE(p_ramrod->pbl_base_addr, pbl_addr);
 
-	pq_id = qed_get_qm_pq(p_hwfn, PROTOCOLID_ETH, p_pq_params);
 	p_ramrod->qm_pq_id = cpu_to_le16(pq_id);
 
 	return qed_spq_post(p_hwfn, p_ent, NULL);
 }
 
 static int
-qed_sp_eth_tx_queue_start(struct qed_hwfn *p_hwfn,
-			  u16 opaque_fid,
-			  struct qed_queue_start_common_params *p_params,
+qed_eth_pf_tx_queue_start(struct qed_hwfn *p_hwfn,
+			  struct qed_queue_cid *p_cid,
+			  u8 tc,
 			  dma_addr_t pbl_addr,
 			  u16 pbl_size, void __iomem **pp_doorbell)
 {
-	struct qed_hw_cid_data *p_tx_cid;
 	union qed_qm_pq_params pq_params;
-	u8 abs_stats_id = 0;
 	int rc;
 
-	if (IS_VF(p_hwfn->cdev)) {
-		return qed_vf_pf_txq_start(p_hwfn,
-					   p_params->queue_id,
-					   p_params->sb,
-					   p_params->sb_idx,
-					   pbl_addr, pbl_size, pp_doorbell);
-	}
+	memset(&pq_params, 0, sizeof(pq_params));
 
-	rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_stats_id);
+	rc = qed_eth_txq_start_ramrod(p_hwfn, p_cid,
+				      pbl_addr, pbl_size,
+				      qed_get_qm_pq(p_hwfn, PROTOCOLID_ETH,
+						    &pq_params));
 	if (rc)
 		return rc;
 
-	p_tx_cid = &p_hwfn->p_tx_cids[p_params->queue_id];
-	memset(p_tx_cid, 0, sizeof(*p_tx_cid));
-	memset(&pq_params, 0, sizeof(pq_params));
+	/* Provide the caller with the necessary return values */
+	*pp_doorbell = p_hwfn->doorbells +
+		       qed_db_addr(p_cid->cid, DQ_DEMS_LEGACY);
 
-	/* Allocate a CID for the queue */
-	rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ETH, &p_tx_cid->cid);
-	if (rc) {
-		DP_NOTICE(p_hwfn, "Failed to acquire cid\n");
-		return rc;
-	}
-	p_tx_cid->b_cid_allocated = true;
+	return 0;
+}
 
-	DP_VERBOSE(p_hwfn, QED_MSG_SP,
-		   "opaque_fid=0x%x, cid=0x%x, tx_qid=0x%x, vport_id=0x%x, sb_id=0x%x\n",
-		   opaque_fid, p_tx_cid->cid,
-		   p_params->queue_id, p_params->vport_id, p_params->sb);
-
-	rc = qed_sp_eth_txq_start_ramrod(p_hwfn,
-					 opaque_fid,
-					 p_tx_cid->cid,
-					 p_params,
-					 abs_stats_id,
-					 pbl_addr,
-					 pbl_size,
-					 &pq_params);
-
-	*pp_doorbell = (u8 __iomem *)p_hwfn->doorbells +
-				     qed_db_addr(p_tx_cid->cid, DQ_DEMS_LEGACY);
+static int
+qed_eth_tx_queue_start(struct qed_hwfn *p_hwfn,
+		       u16 opaque_fid,
+		       struct qed_queue_start_common_params *p_params,
+		       u8 tc,
+		       dma_addr_t pbl_addr,
+		       u16 pbl_size,
+		       struct qed_txq_start_ret_params *p_ret_params)
+{
+	struct qed_queue_cid *p_cid;
+	int rc;
+
+	p_cid = qed_eth_queue_to_cid(p_hwfn, opaque_fid, p_params);
+	if (!p_cid)
+		return -EINVAL;
+
+	if (IS_PF(p_hwfn->cdev))
+		rc = qed_eth_pf_tx_queue_start(p_hwfn, p_cid, tc,
+					       pbl_addr, pbl_size,
+					       &p_ret_params->p_doorbell);
+	else
+		rc = qed_vf_pf_txq_start(p_hwfn, p_cid,
+					 pbl_addr, pbl_size,
+					 &p_ret_params->p_doorbell);
 
 	if (rc)
-		qed_sp_release_queue_cid(p_hwfn, p_tx_cid);
+		qed_eth_queue_cid_release(p_hwfn, p_cid);
+	else
+		p_ret_params->p_handle = (void *)p_cid;
 
 	return rc;
 }
 
-int qed_sp_eth_tx_queue_stop(struct qed_hwfn *p_hwfn, u16 tx_queue_id)
+static int
+qed_eth_pf_tx_queue_stop(struct qed_hwfn *p_hwfn, struct qed_queue_cid *p_cid)
 {
-	struct qed_hw_cid_data *p_tx_cid = &p_hwfn->p_tx_cids[tx_queue_id];
 	struct qed_spq_entry *p_ent = NULL;
 	struct qed_sp_init_data init_data;
-	int rc = -EINVAL;
-
-	if (IS_VF(p_hwfn->cdev))
-		return qed_vf_pf_txq_stop(p_hwfn, tx_queue_id);
+	int rc;
 
-	/* Get SPQ entry */
 	memset(&init_data, 0, sizeof(init_data));
-	init_data.cid = p_tx_cid->cid;
-	init_data.opaque_fid = p_tx_cid->opaque_fid;
+	init_data.cid = p_cid->cid;
+	init_data.opaque_fid = p_cid->opaque_fid;
 	init_data.comp_mode = QED_SPQ_MODE_EBLOCK;
 
 	rc = qed_sp_init_request(p_hwfn, &p_ent,
@@ -892,11 +955,22 @@ int qed_sp_eth_tx_queue_stop(struct qed_hwfn *p_hwfn, u16 tx_queue_id)
 	if (rc)
 		return rc;
 
-	rc = qed_spq_post(p_hwfn, p_ent, NULL);
-	if (rc)
-		return rc;
+	return qed_spq_post(p_hwfn, p_ent, NULL);
+}
+
+int qed_eth_tx_queue_stop(struct qed_hwfn *p_hwfn, void *p_handle)
+{
+	struct qed_queue_cid *p_cid = (struct qed_queue_cid *)p_handle;
+	int rc;
+
+	if (IS_PF(p_hwfn->cdev))
+		rc = qed_eth_pf_tx_queue_stop(p_hwfn, p_cid);
+	else
+		rc = qed_vf_pf_txq_stop(p_hwfn, p_cid);
 
-	return qed_sp_release_queue_cid(p_hwfn, p_tx_cid);
+	if (!rc)
+		qed_eth_queue_cid_release(p_hwfn, p_cid);
+	return rc;
 }
 
 static enum eth_filter_action qed_filter_action(enum qed_filter_opcode opcode)
@@ -1652,6 +1726,7 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev,
 
 	if (IS_PF(cdev)) {
 		int max_vf_vlan_filters = 0;
+		int max_vf_mac_filters = 0;
 
 		if (cdev->int_params.out.int_mode == QED_INT_MODE_MSIX) {
 			for_each_hwfn(cdev, i)
@@ -1665,11 +1740,18 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev,
 			info->num_queues = cdev->num_hwfns;
 		}
 
-		if (IS_QED_SRIOV(cdev))
+		if (IS_QED_SRIOV(cdev)) {
 			max_vf_vlan_filters = cdev->p_iov_info->total_vfs *
 					      QED_ETH_VF_NUM_VLAN_FILTERS;
-		info->num_vlan_filters = RESC_NUM(&cdev->hwfns[0], QED_VLAN) -
+			max_vf_mac_filters = cdev->p_iov_info->total_vfs *
+					     QED_ETH_VF_NUM_MAC_FILTERS;
+		}
+		info->num_vlan_filters = RESC_NUM(QED_LEADING_HWFN(cdev),
+						  QED_VLAN) -
 					 max_vf_vlan_filters;
+		info->num_mac_filters = RESC_NUM(QED_LEADING_HWFN(cdev),
+						 QED_MAC) -
+					max_vf_mac_filters;
 
 		ether_addr_copy(info->port_mac,
 				cdev->hwfns[0].hw_info.hw_mac_addr);
@@ -1683,7 +1765,9 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev,
 		}
 
 		qed_vf_get_num_vlan_filters(&cdev->hwfns[0],
-					    &info->num_vlan_filters);
+					    (u8 *)&info->num_vlan_filters);
+		qed_vf_get_num_mac_filters(&cdev->hwfns[0],
+					   (u8 *)&info->num_mac_filters);
 		qed_vf_get_port_mac(&cdev->hwfns[0], info->port_mac);
 
 		info->is_legacy = !!cdev->hwfns[0].vf_iov_info->b_pre_fp_hsi;
@@ -1870,58 +1954,53 @@ static int qed_update_vport(struct qed_dev *cdev,
 }
 
 static int qed_start_rxq(struct qed_dev *cdev,
-			 struct qed_queue_start_common_params *params,
+			 u8 rss_num,
+			 struct qed_queue_start_common_params *p_params,
 			 u16 bd_max_bytes,
 			 dma_addr_t bd_chain_phys_addr,
 			 dma_addr_t cqe_pbl_addr,
 			 u16 cqe_pbl_size,
-			 void __iomem **pp_prod)
+			 struct qed_rxq_start_ret_params *ret_params)
 {
 	struct qed_hwfn *p_hwfn;
 	int rc, hwfn_index;
 
-	hwfn_index = params->rss_id % cdev->num_hwfns;
+	hwfn_index = rss_num % cdev->num_hwfns;
 	p_hwfn = &cdev->hwfns[hwfn_index];
 
-	/* Fix queue ID in 100g mode */
-	params->queue_id /= cdev->num_hwfns;
-
-	rc = qed_sp_eth_rx_queue_start(p_hwfn,
-				       p_hwfn->hw_info.opaque_fid,
-				       params,
-				       bd_max_bytes,
-				       bd_chain_phys_addr,
-				       cqe_pbl_addr,
-				       cqe_pbl_size,
-				       pp_prod);
+	p_params->queue_id = p_params->queue_id / cdev->num_hwfns;
+	p_params->stats_id = p_params->vport_id;
 
+	rc = qed_eth_rx_queue_start(p_hwfn,
+				    p_hwfn->hw_info.opaque_fid,
+				    p_params,
+				    bd_max_bytes,
+				    bd_chain_phys_addr,
+				    cqe_pbl_addr, cqe_pbl_size, ret_params);
 	if (rc) {
-		DP_ERR(cdev, "Failed to start RXQ#%d\n", params->queue_id);
+		DP_ERR(cdev, "Failed to start RXQ#%d\n", p_params->queue_id);
 		return rc;
 	}
 
 	DP_VERBOSE(cdev, (QED_MSG_SPQ | NETIF_MSG_IFUP),
-		   "Started RX-Q %d [rss %d] on V-PORT %d and SB %d\n",
-		   params->queue_id, params->rss_id, params->vport_id,
-		   params->sb);
+		   "Started RX-Q %d [rss_num %d] on V-PORT %d and SB %d\n",
+		   p_params->queue_id, rss_num, p_params->vport_id,
+		   p_params->sb);
 
 	return 0;
 }
 
-static int qed_stop_rxq(struct qed_dev *cdev,
-			struct qed_stop_rxq_params *params)
+static int qed_stop_rxq(struct qed_dev *cdev, u8 rss_id, void *handle)
 {
 	int rc, hwfn_index;
 	struct qed_hwfn *p_hwfn;
 
-	hwfn_index	= params->rss_id % cdev->num_hwfns;
-	p_hwfn		= &cdev->hwfns[hwfn_index];
+	hwfn_index = rss_id % cdev->num_hwfns;
+	p_hwfn = &cdev->hwfns[hwfn_index];
 
-	rc = qed_sp_eth_rx_queue_stop(p_hwfn,
-				      params->rx_queue_id / cdev->num_hwfns,
-				      params->eq_completion_only, false);
+	rc = qed_eth_rx_queue_stop(p_hwfn, handle, false, false);
 	if (rc) {
-		DP_ERR(cdev, "Failed to stop RXQ#%d\n", params->rx_queue_id);
+		DP_ERR(cdev, "Failed to stop RXQ#%02x\n", rss_id);
 		return rc;
 	}
 
@@ -1929,26 +2008,24 @@ static int qed_stop_rxq(struct qed_dev *cdev,
 }
 
 static int qed_start_txq(struct qed_dev *cdev,
+			 u8 rss_num,
 			 struct qed_queue_start_common_params *p_params,
 			 dma_addr_t pbl_addr,
 			 u16 pbl_size,
-			 void __iomem **pp_doorbell)
+			 struct qed_txq_start_ret_params *ret_params)
 {
 	struct qed_hwfn *p_hwfn;
 	int rc, hwfn_index;
 
-	hwfn_index	= p_params->rss_id % cdev->num_hwfns;
-	p_hwfn		= &cdev->hwfns[hwfn_index];
-
-	/* Fix queue ID in 100g mode */
-	p_params->queue_id /= cdev->num_hwfns;
+	hwfn_index = rss_num % cdev->num_hwfns;
+	p_hwfn = &cdev->hwfns[hwfn_index];
+	p_params->queue_id = p_params->queue_id / cdev->num_hwfns;
+	p_params->stats_id = p_params->vport_id;
 
-	rc = qed_sp_eth_tx_queue_start(p_hwfn,
-				       p_hwfn->hw_info.opaque_fid,
-				       p_params,
-				       pbl_addr,
-				       pbl_size,
-				       pp_doorbell);
+	rc = qed_eth_tx_queue_start(p_hwfn,
+				    p_hwfn->hw_info.opaque_fid,
+				    p_params, 0,
+				    pbl_addr, pbl_size, ret_params);
 
 	if (rc) {
 		DP_ERR(cdev, "Failed to start TXQ#%d\n", p_params->queue_id);
@@ -1956,8 +2033,8 @@ static int qed_start_txq(struct qed_dev *cdev,
 	}
 
 	DP_VERBOSE(cdev, (QED_MSG_SPQ | NETIF_MSG_IFUP),
-		   "Started TX-Q %d [rss %d] on V-PORT %d and SB %d\n",
-		   p_params->queue_id, p_params->rss_id, p_params->vport_id,
+		   "Started TX-Q %d [rss_num %d] on V-PORT %d and SB %d\n",
+		   p_params->queue_id, rss_num, p_params->vport_id,
 		   p_params->sb);
 
 	return 0;
@@ -1971,19 +2048,17 @@ static int qed_fastpath_stop(struct qed_dev *cdev)
 	return 0;
 }
 
-static int qed_stop_txq(struct qed_dev *cdev,
-			struct qed_stop_txq_params *params)
+static int qed_stop_txq(struct qed_dev *cdev, u8 rss_id, void *handle)
 {
 	struct qed_hwfn *p_hwfn;
 	int rc, hwfn_index;
 
-	hwfn_index	= params->rss_id % cdev->num_hwfns;
-	p_hwfn		= &cdev->hwfns[hwfn_index];
+	hwfn_index = rss_id % cdev->num_hwfns;
+	p_hwfn = &cdev->hwfns[hwfn_index];
 
-	rc = qed_sp_eth_tx_queue_stop(p_hwfn,
-				      params->tx_queue_id / cdev->num_hwfns);
+	rc = qed_eth_tx_queue_stop(p_hwfn, handle);
 	if (rc) {
-		DP_ERR(cdev, "Failed to stop TXQ#%d\n", params->tx_queue_id);
+		DP_ERR(cdev, "Failed to stop TXQ#%02x\n", rss_id);
 		return rc;
 	}
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.h b/drivers/net/ethernet/qlogic/qed/qed_l2.h
index e495d62fcc03..48c9bfc28140 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.h
@@ -78,11 +78,34 @@ struct qed_filter_mcast {
 	unsigned char mac[QED_MAX_MC_ADDRS][ETH_ALEN];
 };
 
-int qed_sp_eth_rx_queue_stop(struct qed_hwfn *p_hwfn,
-			     u16 rx_queue_id,
-			     bool eq_completion_only, bool cqe_completion);
+/**
+ * @brief qed_eth_rx_queue_stop - This ramrod closes an Rx queue
+ *
+ * @param p_hwfn
+ * @param p_rxq			Handler of queue to close
+ * @param eq_completion_only	If True completion will be on
+ *				EQe, if False completion will be
+ *				on EQe if p_hwfn opaque
+ *				different from the RXQ opaque
+ *				otherwise on CQe.
+ * @param cqe_completion	If True completion will be
+ *				receive on CQe.
+ * @return int
+ */
+int
+qed_eth_rx_queue_stop(struct qed_hwfn *p_hwfn,
+		      void *p_rxq,
+		      bool eq_completion_only, bool cqe_completion);
 
-int qed_sp_eth_tx_queue_stop(struct qed_hwfn *p_hwfn, u16 tx_queue_id);
+/**
+ * @brief qed_eth_tx_queue_stop - closes a Tx queue
+ *
+ * @param p_hwfn
+ * @param p_txq - handle to Tx queue needed to be closed
+ *
+ * @return int
+ */
+int qed_eth_tx_queue_stop(struct qed_hwfn *p_hwfn, void *p_txq);
 
 enum qed_tpa_mode {
 	QED_TPA_MODE_NONE,
@@ -196,19 +219,19 @@ int qed_sp_eth_filter_ucast(struct qed_hwfn *p_hwfn,
  * @note At the moment - only used by non-linux VFs.
  *
  * @param p_hwfn
- * @param rx_queue_id		RX Queue ID
- * @param num_rxqs		Allow to update multiple rx
- *				queues, from rx_queue_id to
- *				(rx_queue_id + num_rxqs)
+ * @param pp_rxq_handlers	An array of queue handlers to be updated.
+ * @param num_rxqs              number of queues to update.
  * @param complete_cqe_flg	Post completion to the CQE Ring if set
  * @param complete_event_flg	Post completion to the Event Ring if set
+ * @param comp_mode
+ * @param p_comp_data
  *
  * @return int
  */
 
 int
 qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn,
-			    u16 rx_queue_id,
+			    void **pp_rxq_handlers,
 			    u8 num_rxqs,
 			    u8 complete_cqe_flg,
 			    u8 complete_event_flg,
@@ -217,27 +240,79 @@ qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn,
 
 void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats);
 
-int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn,
-			   struct qed_sp_vport_start_params *p_params);
+void qed_reset_vport_stats(struct qed_dev *cdev);
+
+struct qed_queue_cid {
+	/* 'Relative' is a relative term ;-). Usually the indices [not counting
+	 * SBs] would be PF-relative, but there are some cases where that isn't
+	 * the case - specifically for a PF configuring its VF indices it's
+	 * possible some fields [E.g., stats-id] in 'rel' would already be abs.
+	 */
+	struct qed_queue_start_common_params rel;
+	struct qed_queue_start_common_params abs;
+	u32 cid;
+	u16 opaque_fid;
+
+	/* VFs queues are mapped differently, so we need to know the
+	 * relative queue associated with them [0-based].
+	 * Notice this is relevant on the *PF* queue-cid of its VF's queues,
+	 * and not on the VF itself.
+	 */
+	bool is_vf;
+	u8 vf_qid;
+
+	/* Legacy VFs might have Rx producer located elsewhere */
+	bool b_legacy_vf;
+};
 
-int qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
-				u16 opaque_fid,
-				u32 cid,
-				struct qed_queue_start_common_params *params,
-				u8 stats_id,
-				u16 bd_max_bytes,
-				dma_addr_t bd_chain_phys_addr,
-				dma_addr_t cqe_pbl_addr,
-				u16 cqe_pbl_size, bool b_use_zone_a_prod);
-
-int qed_sp_eth_txq_start_ramrod(struct qed_hwfn  *p_hwfn,
-				u16  opaque_fid,
-				u32  cid,
-				struct qed_queue_start_common_params *p_params,
-				u8  stats_id,
-				dma_addr_t pbl_addr,
-				u16 pbl_size,
-				union qed_qm_pq_params *p_pq_params);
+void qed_eth_queue_cid_release(struct qed_hwfn *p_hwfn,
+			       struct qed_queue_cid *p_cid);
+
+struct qed_queue_cid *_qed_eth_queue_to_cid(struct qed_hwfn *p_hwfn,
+					    u16 opaque_fid,
+					    u32 cid,
+					    u8 vf_qid,
+					    struct qed_queue_start_common_params
+					    *p_params);
+
+int
+qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn,
+		       struct qed_sp_vport_start_params *p_params);
+
+/**
+ * @brief - Starts an Rx queue, when queue_cid is already prepared
+ *
+ * @param p_hwfn
+ * @param p_cid
+ * @param bd_max_bytes
+ * @param bd_chain_phys_addr
+ * @param cqe_pbl_addr
+ * @param cqe_pbl_size
+ *
+ * @return int
+ */
+int
+qed_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn,
+			 struct qed_queue_cid *p_cid,
+			 u16 bd_max_bytes,
+			 dma_addr_t bd_chain_phys_addr,
+			 dma_addr_t cqe_pbl_addr, u16 cqe_pbl_size);
+
+/**
+ * @brief - Starts a Tx queue, where queue_cid is already prepared
+ *
+ * @param p_hwfn
+ * @param p_cid
+ * @param pbl_addr
+ * @param pbl_size
+ * @param p_pq_params - parameters for choosing the PQ for this Tx queue
+ *
+ * @return int
+ */
+int
+qed_eth_txq_start_ramrod(struct qed_hwfn *p_hwfn,
+			 struct qed_queue_cid *p_cid,
+			 dma_addr_t pbl_addr, u16 pbl_size, u16 pq_id);
 
 u8 qed_mcast_bin_from_mac(u8 *mac);
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index f95385cbbd40..8e5cb7605b0f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -36,6 +36,7 @@
 #include "qed_int.h"
 #include "qed_ll2.h"
 #include "qed_mcp.h"
+#include "qed_ooo.h"
 #include "qed_reg_addr.h"
 #include "qed_sp.h"
 #include "qed_roce.h"
@@ -296,25 +297,34 @@ static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
 		list_del(&p_pkt->list_entry);
 		b_last_packet = list_empty(&p_tx->active_descq);
 		list_add_tail(&p_pkt->list_entry, &p_tx->free_descq);
-		p_tx->cur_completing_packet = *p_pkt;
-		p_tx->cur_completing_bd_idx = 1;
-		b_last_frag = p_tx->cur_completing_bd_idx == p_pkt->bd_used;
-		tx_frag = p_pkt->bds_set[0].tx_frag;
-		if (p_ll2_conn->gsi_enable)
-			qed_ll2b_release_tx_gsi_packet(p_hwfn,
-						       p_ll2_conn->my_id,
-						       p_pkt->cookie,
-						       tx_frag,
-						       b_last_frag,
-						       b_last_packet);
-		else
-			qed_ll2b_complete_tx_packet(p_hwfn,
-						    p_ll2_conn->my_id,
-						    p_pkt->cookie,
-						    tx_frag,
-						    b_last_frag,
-						    b_last_packet);
+		if (p_ll2_conn->conn_type == QED_LL2_TYPE_ISCSI_OOO) {
+			struct qed_ooo_buffer *p_buffer;
 
+			p_buffer = (struct qed_ooo_buffer *)p_pkt->cookie;
+			qed_ooo_put_free_buffer(p_hwfn, p_hwfn->p_ooo_info,
+						p_buffer);
+		} else {
+			p_tx->cur_completing_packet = *p_pkt;
+			p_tx->cur_completing_bd_idx = 1;
+			b_last_frag =
+				p_tx->cur_completing_bd_idx == p_pkt->bd_used;
+			tx_frag = p_pkt->bds_set[0].tx_frag;
+			if (p_ll2_conn->gsi_enable)
+				qed_ll2b_release_tx_gsi_packet(p_hwfn,
+							       p_ll2_conn->
+							       my_id,
+							       p_pkt->cookie,
+							       tx_frag,
+							       b_last_frag,
+							       b_last_packet);
+			else
+				qed_ll2b_complete_tx_packet(p_hwfn,
+							    p_ll2_conn->my_id,
+							    p_pkt->cookie,
+							    tx_frag,
+							    b_last_frag,
+							    b_last_packet);
+		}
 	}
 }
 
@@ -540,12 +550,457 @@ static void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle)
 
 		list_move_tail(&p_pkt->list_entry, &p_rx->free_descq);
 
-		rx_buf_addr = p_pkt->rx_buf_addr;
-		cookie = p_pkt->cookie;
+		if (p_ll2_conn->conn_type == QED_LL2_TYPE_ISCSI_OOO) {
+			struct qed_ooo_buffer *p_buffer;
+
+			p_buffer = (struct qed_ooo_buffer *)p_pkt->cookie;
+			qed_ooo_put_free_buffer(p_hwfn, p_hwfn->p_ooo_info,
+						p_buffer);
+		} else {
+			rx_buf_addr = p_pkt->rx_buf_addr;
+			cookie = p_pkt->cookie;
+
+			b_last = list_empty(&p_rx->active_descq);
+		}
+	}
+}
+
+#if IS_ENABLED(CONFIG_QED_ISCSI)
+static u8 qed_ll2_convert_rx_parse_to_tx_flags(u16 parse_flags)
+{
+	u8 bd_flags = 0;
+
+	if (GET_FIELD(parse_flags, PARSING_AND_ERR_FLAGS_TAG8021QEXIST))
+		SET_FIELD(bd_flags, CORE_TX_BD_FLAGS_VLAN_INSERTION, 1);
+
+	return bd_flags;
+}
+
+static int qed_ll2_lb_rxq_handler(struct qed_hwfn *p_hwfn,
+				  struct qed_ll2_info *p_ll2_conn)
+{
+	struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue;
+	u16 packet_length = 0, parse_flags = 0, vlan = 0;
+	struct qed_ll2_rx_packet *p_pkt = NULL;
+	u32 num_ooo_add_to_peninsula = 0, cid;
+	union core_rx_cqe_union *cqe = NULL;
+	u16 cq_new_idx = 0, cq_old_idx = 0;
+	struct qed_ooo_buffer *p_buffer;
+	struct ooo_opaque *iscsi_ooo;
+	u8 placement_offset = 0;
+	u8 cqe_type;
+
+	cq_new_idx = le16_to_cpu(*p_rx->p_fw_cons);
+	cq_old_idx = qed_chain_get_cons_idx(&p_rx->rcq_chain);
+	if (cq_new_idx == cq_old_idx)
+		return 0;
+
+	while (cq_new_idx != cq_old_idx) {
+		struct core_rx_fast_path_cqe *p_cqe_fp;
+
+		cqe = qed_chain_consume(&p_rx->rcq_chain);
+		cq_old_idx = qed_chain_get_cons_idx(&p_rx->rcq_chain);
+		cqe_type = cqe->rx_cqe_sp.type;
+
+		if (cqe_type != CORE_RX_CQE_TYPE_REGULAR) {
+			DP_NOTICE(p_hwfn,
+				  "Got a non-regular LB LL2 completion [type 0x%02x]\n",
+				  cqe_type);
+			return -EINVAL;
+		}
+		p_cqe_fp = &cqe->rx_cqe_fp;
+
+		placement_offset = p_cqe_fp->placement_offset;
+		parse_flags = le16_to_cpu(p_cqe_fp->parse_flags.flags);
+		packet_length = le16_to_cpu(p_cqe_fp->packet_length);
+		vlan = le16_to_cpu(p_cqe_fp->vlan);
+		iscsi_ooo = (struct ooo_opaque *)&p_cqe_fp->opaque_data;
+		qed_ooo_save_history_entry(p_hwfn, p_hwfn->p_ooo_info,
+					   iscsi_ooo);
+		cid = le32_to_cpu(iscsi_ooo->cid);
+
+		/* Process delete isle first */
+		if (iscsi_ooo->drop_size)
+			qed_ooo_delete_isles(p_hwfn, p_hwfn->p_ooo_info, cid,
+					     iscsi_ooo->drop_isle,
+					     iscsi_ooo->drop_size);
+
+		if (iscsi_ooo->ooo_opcode == TCP_EVENT_NOP)
+			continue;
+
+		/* Now process create/add/join isles */
+		if (list_empty(&p_rx->active_descq)) {
+			DP_NOTICE(p_hwfn,
+				  "LL2 OOO RX chain has no submitted buffers\n"
+				  );
+			return -EIO;
+		}
+
+		p_pkt = list_first_entry(&p_rx->active_descq,
+					 struct qed_ll2_rx_packet, list_entry);
+
+		if ((iscsi_ooo->ooo_opcode == TCP_EVENT_ADD_NEW_ISLE) ||
+		    (iscsi_ooo->ooo_opcode == TCP_EVENT_ADD_ISLE_RIGHT) ||
+		    (iscsi_ooo->ooo_opcode == TCP_EVENT_ADD_ISLE_LEFT) ||
+		    (iscsi_ooo->ooo_opcode == TCP_EVENT_ADD_PEN) ||
+		    (iscsi_ooo->ooo_opcode == TCP_EVENT_JOIN)) {
+			if (!p_pkt) {
+				DP_NOTICE(p_hwfn,
+					  "LL2 OOO RX packet is not valid\n");
+				return -EIO;
+			}
+			list_del(&p_pkt->list_entry);
+			p_buffer = (struct qed_ooo_buffer *)p_pkt->cookie;
+			p_buffer->packet_length = packet_length;
+			p_buffer->parse_flags = parse_flags;
+			p_buffer->vlan = vlan;
+			p_buffer->placement_offset = placement_offset;
+			qed_chain_consume(&p_rx->rxq_chain);
+			list_add_tail(&p_pkt->list_entry, &p_rx->free_descq);
+
+			switch (iscsi_ooo->ooo_opcode) {
+			case TCP_EVENT_ADD_NEW_ISLE:
+				qed_ooo_add_new_isle(p_hwfn,
+						     p_hwfn->p_ooo_info,
+						     cid,
+						     iscsi_ooo->ooo_isle,
+						     p_buffer);
+				break;
+			case TCP_EVENT_ADD_ISLE_RIGHT:
+				qed_ooo_add_new_buffer(p_hwfn,
+						       p_hwfn->p_ooo_info,
+						       cid,
+						       iscsi_ooo->ooo_isle,
+						       p_buffer,
+						       QED_OOO_RIGHT_BUF);
+				break;
+			case TCP_EVENT_ADD_ISLE_LEFT:
+				qed_ooo_add_new_buffer(p_hwfn,
+						       p_hwfn->p_ooo_info,
+						       cid,
+						       iscsi_ooo->ooo_isle,
+						       p_buffer,
+						       QED_OOO_LEFT_BUF);
+				break;
+			case TCP_EVENT_JOIN:
+				qed_ooo_add_new_buffer(p_hwfn,
+						       p_hwfn->p_ooo_info,
+						       cid,
+						       iscsi_ooo->ooo_isle +
+						       1,
+						       p_buffer,
+						       QED_OOO_LEFT_BUF);
+				qed_ooo_join_isles(p_hwfn,
+						   p_hwfn->p_ooo_info,
+						   cid, iscsi_ooo->ooo_isle);
+				break;
+			case TCP_EVENT_ADD_PEN:
+				num_ooo_add_to_peninsula++;
+				qed_ooo_put_ready_buffer(p_hwfn,
+							 p_hwfn->p_ooo_info,
+							 p_buffer, true);
+				break;
+			}
+		} else {
+			DP_NOTICE(p_hwfn,
+				  "Unexpected event (%d) TX OOO completion\n",
+				  iscsi_ooo->ooo_opcode);
+		}
+	}
+
+	return 0;
+}
+
+static void
+qed_ooo_submit_tx_buffers(struct qed_hwfn *p_hwfn,
+			  struct qed_ll2_info *p_ll2_conn)
+{
+	struct qed_ooo_buffer *p_buffer;
+	int rc;
+	u16 l4_hdr_offset_w;
+	dma_addr_t first_frag;
+	u16 parse_flags;
+	u8 bd_flags;
+
+	/* Submit Tx buffers here */
+	while ((p_buffer = qed_ooo_get_ready_buffer(p_hwfn,
+						    p_hwfn->p_ooo_info))) {
+		l4_hdr_offset_w = 0;
+		bd_flags = 0;
+
+		first_frag = p_buffer->rx_buffer_phys_addr +
+			     p_buffer->placement_offset;
+		parse_flags = p_buffer->parse_flags;
+		bd_flags = qed_ll2_convert_rx_parse_to_tx_flags(parse_flags);
+		SET_FIELD(bd_flags, CORE_TX_BD_FLAGS_FORCE_VLAN_MODE, 1);
+		SET_FIELD(bd_flags, CORE_TX_BD_FLAGS_L4_PROTOCOL, 1);
+
+		rc = qed_ll2_prepare_tx_packet(p_hwfn, p_ll2_conn->my_id, 1,
+					       p_buffer->vlan, bd_flags,
+					       l4_hdr_offset_w,
+					       p_ll2_conn->tx_dest, 0,
+					       first_frag,
+					       p_buffer->packet_length,
+					       p_buffer, true);
+		if (rc) {
+			qed_ooo_put_ready_buffer(p_hwfn, p_hwfn->p_ooo_info,
+						 p_buffer, false);
+			break;
+		}
+	}
+}
+
+static void
+qed_ooo_submit_rx_buffers(struct qed_hwfn *p_hwfn,
+			  struct qed_ll2_info *p_ll2_conn)
+{
+	struct qed_ooo_buffer *p_buffer;
+	int rc;
+
+	while ((p_buffer = qed_ooo_get_free_buffer(p_hwfn,
+						   p_hwfn->p_ooo_info))) {
+		rc = qed_ll2_post_rx_buffer(p_hwfn,
+					    p_ll2_conn->my_id,
+					    p_buffer->rx_buffer_phys_addr,
+					    0, p_buffer, true);
+		if (rc) {
+			qed_ooo_put_free_buffer(p_hwfn,
+						p_hwfn->p_ooo_info, p_buffer);
+			break;
+		}
+	}
+}
+
+static int qed_ll2_lb_rxq_completion(struct qed_hwfn *p_hwfn, void *p_cookie)
+{
+	struct qed_ll2_info *p_ll2_conn = (struct qed_ll2_info *)p_cookie;
+	int rc;
+
+	rc = qed_ll2_lb_rxq_handler(p_hwfn, p_ll2_conn);
+	if (rc)
+		return rc;
+
+	qed_ooo_submit_rx_buffers(p_hwfn, p_ll2_conn);
+	qed_ooo_submit_tx_buffers(p_hwfn, p_ll2_conn);
+
+	return 0;
+}
+
+static int qed_ll2_lb_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie)
+{
+	struct qed_ll2_info *p_ll2_conn = (struct qed_ll2_info *)p_cookie;
+	struct qed_ll2_tx_queue *p_tx = &p_ll2_conn->tx_queue;
+	struct qed_ll2_tx_packet *p_pkt = NULL;
+	struct qed_ooo_buffer *p_buffer;
+	bool b_dont_submit_rx = false;
+	u16 new_idx = 0, num_bds = 0;
+	int rc;
+
+	new_idx = le16_to_cpu(*p_tx->p_fw_cons);
+	num_bds = ((s16)new_idx - (s16)p_tx->bds_idx);
+
+	if (!num_bds)
+		return 0;
+
+	while (num_bds) {
+		if (list_empty(&p_tx->active_descq))
+			return -EINVAL;
+
+		p_pkt = list_first_entry(&p_tx->active_descq,
+					 struct qed_ll2_tx_packet, list_entry);
+		if (!p_pkt)
+			return -EINVAL;
+
+		if (p_pkt->bd_used != 1) {
+			DP_NOTICE(p_hwfn,
+				  "Unexpectedly many BDs(%d) in TX OOO completion\n",
+				  p_pkt->bd_used);
+			return -EINVAL;
+		}
+
+		list_del(&p_pkt->list_entry);
+
+		num_bds--;
+		p_tx->bds_idx++;
+		qed_chain_consume(&p_tx->txq_chain);
+
+		p_buffer = (struct qed_ooo_buffer *)p_pkt->cookie;
+		list_add_tail(&p_pkt->list_entry, &p_tx->free_descq);
+
+		if (b_dont_submit_rx) {
+			qed_ooo_put_free_buffer(p_hwfn, p_hwfn->p_ooo_info,
+						p_buffer);
+			continue;
+		}
+
+		rc = qed_ll2_post_rx_buffer(p_hwfn, p_ll2_conn->my_id,
+					    p_buffer->rx_buffer_phys_addr, 0,
+					    p_buffer, true);
+		if (rc != 0) {
+			qed_ooo_put_free_buffer(p_hwfn,
+						p_hwfn->p_ooo_info, p_buffer);
+			b_dont_submit_rx = true;
+		}
+	}
+
+	qed_ooo_submit_tx_buffers(p_hwfn, p_ll2_conn);
+
+	return 0;
+}
+
+static int
+qed_ll2_acquire_connection_ooo(struct qed_hwfn *p_hwfn,
+			       struct qed_ll2_info *p_ll2_info,
+			       u16 rx_num_ooo_buffers, u16 mtu)
+{
+	struct qed_ooo_buffer *p_buf = NULL;
+	void *p_virt;
+	u16 buf_idx;
+	int rc = 0;
+
+	if (p_ll2_info->conn_type != QED_LL2_TYPE_ISCSI_OOO)
+		return rc;
+
+	if (!rx_num_ooo_buffers)
+		return -EINVAL;
+
+	for (buf_idx = 0; buf_idx < rx_num_ooo_buffers; buf_idx++) {
+		p_buf = kzalloc(sizeof(*p_buf), GFP_KERNEL);
+		if (!p_buf) {
+			rc = -ENOMEM;
+			goto out;
+		}
+
+		p_buf->rx_buffer_size = mtu + 26 + ETH_CACHE_LINE_SIZE;
+		p_buf->rx_buffer_size = (p_buf->rx_buffer_size +
+					 ETH_CACHE_LINE_SIZE - 1) &
+					~(ETH_CACHE_LINE_SIZE - 1);
+		p_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+					    p_buf->rx_buffer_size,
+					    &p_buf->rx_buffer_phys_addr,
+					    GFP_KERNEL);
+		if (!p_virt) {
+			kfree(p_buf);
+			rc = -ENOMEM;
+			goto out;
+		}
+
+		p_buf->rx_buffer_virt_addr = p_virt;
+		qed_ooo_put_free_buffer(p_hwfn, p_hwfn->p_ooo_info, p_buf);
+	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_LL2,
+		   "Allocated [%04x] LL2 OOO buffers [each of size 0x%08x]\n",
+		   rx_num_ooo_buffers, p_buf->rx_buffer_size);
+
+out:
+	return rc;
+}
+
+static void
+qed_ll2_establish_connection_ooo(struct qed_hwfn *p_hwfn,
+				 struct qed_ll2_info *p_ll2_conn)
+{
+	if (p_ll2_conn->conn_type != QED_LL2_TYPE_ISCSI_OOO)
+		return;
+
+	qed_ooo_release_all_isles(p_hwfn, p_hwfn->p_ooo_info);
+	qed_ooo_submit_rx_buffers(p_hwfn, p_ll2_conn);
+}
+
+static void qed_ll2_release_connection_ooo(struct qed_hwfn *p_hwfn,
+					   struct qed_ll2_info *p_ll2_conn)
+{
+	struct qed_ooo_buffer *p_buffer;
+
+	if (p_ll2_conn->conn_type != QED_LL2_TYPE_ISCSI_OOO)
+		return;
+
+	qed_ooo_release_all_isles(p_hwfn, p_hwfn->p_ooo_info);
+	while ((p_buffer = qed_ooo_get_free_buffer(p_hwfn,
+						   p_hwfn->p_ooo_info))) {
+		dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+				  p_buffer->rx_buffer_size,
+				  p_buffer->rx_buffer_virt_addr,
+				  p_buffer->rx_buffer_phys_addr);
+		kfree(p_buffer);
+	}
+}
+
+static void qed_ll2_stop_ooo(struct qed_dev *cdev)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	u8 *handle = &hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id;
+
+	DP_VERBOSE(cdev, QED_MSG_STORAGE, "Stopping LL2 OOO queue [%02x]\n",
+		   *handle);
+
+	qed_ll2_terminate_connection(hwfn, *handle);
+	qed_ll2_release_connection(hwfn, *handle);
+	*handle = QED_LL2_UNUSED_HANDLE;
+}
+
+static int qed_ll2_start_ooo(struct qed_dev *cdev,
+			     struct qed_ll2_params *params)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	u8 *handle = &hwfn->pf_params.iscsi_pf_params.ll2_ooo_queue_id;
+	struct qed_ll2_info *ll2_info;
+	int rc;
+
+	ll2_info = kzalloc(sizeof(*ll2_info), GFP_KERNEL);
+	if (!ll2_info)
+		return -ENOMEM;
+	ll2_info->conn_type = QED_LL2_TYPE_ISCSI_OOO;
+	ll2_info->mtu = params->mtu;
+	ll2_info->rx_drop_ttl0_flg = params->drop_ttl0_packets;
+	ll2_info->rx_vlan_removal_en = params->rx_vlan_stripping;
+	ll2_info->tx_tc = OOO_LB_TC;
+	ll2_info->tx_dest = CORE_TX_DEST_LB;
+
+	rc = qed_ll2_acquire_connection(hwfn, ll2_info,
+					QED_LL2_RX_SIZE, QED_LL2_TX_SIZE,
+					handle);
+	kfree(ll2_info);
+	if (rc) {
+		DP_INFO(cdev, "Failed to acquire LL2 OOO connection\n");
+		goto out;
+	}
 
-		b_last = list_empty(&p_rx->active_descq);
+	rc = qed_ll2_establish_connection(hwfn, *handle);
+	if (rc) {
+		DP_INFO(cdev, "Failed to establist LL2 OOO connection\n");
+		goto fail;
 	}
+
+	return 0;
+
+fail:
+	qed_ll2_release_connection(hwfn, *handle);
+out:
+	*handle = QED_LL2_UNUSED_HANDLE;
+	return rc;
 }
+#else /* IS_ENABLED(CONFIG_QED_ISCSI) */
+static int qed_ll2_lb_rxq_completion(struct qed_hwfn *p_hwfn,
+				     void *p_cookie) { return -EINVAL; }
+static int qed_ll2_lb_txq_completion(struct qed_hwfn *p_hwfn,
+				     void *p_cookie) { return -EINVAL; }
+static inline int
+qed_ll2_acquire_connection_ooo(struct qed_hwfn *p_hwfn,
+			       struct qed_ll2_info *p_ll2_info,
+			       u16 rx_num_ooo_buffers, u16 mtu) { return 0; }
+static inline void
+qed_ll2_establish_connection_ooo(struct qed_hwfn *p_hwfn,
+				 struct qed_ll2_info *p_ll2_conn) { return; }
+static inline void
+qed_ll2_release_connection_ooo(struct qed_hwfn *p_hwfn,
+			       struct qed_ll2_info *p_ll2_conn) { return; }
+static inline void qed_ll2_stop_ooo(struct qed_dev *cdev) { return; }
+static inline int qed_ll2_start_ooo(struct qed_dev *cdev,
+				    struct qed_ll2_params *params)
+				    { return -EINVAL; }
+#endif /* IS_ENABLED(CONFIG_QED_ISCSI) */
 
 static int qed_sp_ll2_rx_queue_start(struct qed_hwfn *p_hwfn,
 				     struct qed_ll2_info *p_ll2_conn,
@@ -588,7 +1043,8 @@ static int qed_sp_ll2_rx_queue_start(struct qed_hwfn *p_hwfn,
 	p_ramrod->drop_ttl0_flg = p_ll2_conn->rx_drop_ttl0_flg;
 	p_ramrod->inner_vlan_removal_en = p_ll2_conn->rx_vlan_removal_en;
 	p_ramrod->queue_id = p_ll2_conn->queue_id;
-	p_ramrod->main_func_queue = 1;
+	p_ramrod->main_func_queue = (conn_type == QED_LL2_TYPE_ISCSI_OOO) ? 0
+									  : 1;
 
 	if ((IS_MF_DEFAULT(p_hwfn) || IS_MF_SI(p_hwfn)) &&
 	    p_ramrod->main_func_queue && (conn_type != QED_LL2_TYPE_ROCE)) {
@@ -619,6 +1075,11 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn,
 	if (!QED_LL2_TX_REGISTERED(p_ll2_conn))
 		return 0;
 
+	if (p_ll2_conn->conn_type == QED_LL2_TYPE_ISCSI_OOO)
+		p_ll2_conn->tx_stats_en = 0;
+	else
+		p_ll2_conn->tx_stats_en = 1;
+
 	/* Get SPQ entry */
 	memset(&init_data, 0, sizeof(init_data));
 	init_data.cid = p_ll2_conn->cid;
@@ -636,7 +1097,6 @@ static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn,
 	p_ramrod->sb_id = cpu_to_le16(qed_int_get_sp_sb_id(p_hwfn));
 	p_ramrod->sb_index = p_tx->tx_sb_index;
 	p_ramrod->mtu = cpu_to_le16(p_ll2_conn->mtu);
-	p_ll2_conn->tx_stats_en = 1;
 	p_ramrod->stats_en = p_ll2_conn->tx_stats_en;
 	p_ramrod->stats_id = p_ll2_conn->tx_stats_id;
 
@@ -860,9 +1320,19 @@ int qed_ll2_acquire_connection(struct qed_hwfn *p_hwfn,
 	if (rc)
 		goto q_allocate_fail;
 
+	rc = qed_ll2_acquire_connection_ooo(p_hwfn, p_ll2_info,
+					    rx_num_desc * 2, p_params->mtu);
+	if (rc)
+		goto q_allocate_fail;
+
 	/* Register callbacks for the Rx/Tx queues */
-	comp_rx_cb = qed_ll2_rxq_completion;
-	comp_tx_cb = qed_ll2_txq_completion;
+	if (p_params->conn_type == QED_LL2_TYPE_ISCSI_OOO) {
+		comp_rx_cb = qed_ll2_lb_rxq_completion;
+		comp_tx_cb = qed_ll2_lb_txq_completion;
+	} else {
+		comp_rx_cb = qed_ll2_rxq_completion;
+		comp_tx_cb = qed_ll2_txq_completion;
+	}
 
 	if (rx_num_desc) {
 		qed_int_register_cb(p_hwfn, comp_rx_cb,
@@ -975,6 +1445,8 @@ int qed_ll2_establish_connection(struct qed_hwfn *p_hwfn, u8 connection_handle)
 	if (p_hwfn->hw_info.personality != QED_PCI_ETH_ROCE)
 		qed_wr(p_hwfn, p_hwfn->p_main_ptt, PRS_REG_USE_LIGHT_L2, 1);
 
+	qed_ll2_establish_connection_ooo(p_hwfn, p_ll2_conn);
+
 	return rc;
 }
 
@@ -1213,6 +1685,7 @@ int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn,
 			      u16 vlan,
 			      u8 bd_flags,
 			      u16 l4_hdr_offset_w,
+			      enum qed_ll2_tx_dest e_tx_dest,
 			      enum qed_ll2_roce_flavor_type qed_roce_flavor,
 			      dma_addr_t first_frag,
 			      u16 first_frag_len, void *cookie, u8 notify_fw)
@@ -1222,6 +1695,7 @@ int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn,
 	enum core_roce_flavor_type roce_flavor;
 	struct qed_ll2_tx_queue *p_tx;
 	struct qed_chain *p_tx_chain;
+	enum core_tx_dest tx_dest;
 	unsigned long flags;
 	int rc = 0;
 
@@ -1252,6 +1726,8 @@ int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn,
 		goto out;
 	}
 
+	tx_dest = e_tx_dest == QED_LL2_TX_DEST_NW ? CORE_TX_DEST_NW :
+						    CORE_TX_DEST_LB;
 	if (qed_roce_flavor == QED_LL2_ROCE) {
 		roce_flavor = CORE_ROCE;
 	} else if (qed_roce_flavor == QED_LL2_RROCE) {
@@ -1266,7 +1742,7 @@ int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn,
 				      num_of_bds, first_frag,
 				      first_frag_len, cookie, notify_fw);
 	qed_ll2_prepare_tx_packet_set_bd(p_hwfn, p_ll2_conn, p_curp,
-					 num_of_bds, CORE_TX_DEST_NW,
+					 num_of_bds, tx_dest,
 					 vlan, bd_flags, l4_hdr_offset_w,
 					 roce_flavor,
 					 first_frag, first_frag_len);
@@ -1341,6 +1817,9 @@ int qed_ll2_terminate_connection(struct qed_hwfn *p_hwfn, u8 connection_handle)
 		qed_ll2_rxq_flush(p_hwfn, connection_handle);
 	}
 
+	if (p_ll2_conn->conn_type == QED_LL2_TYPE_ISCSI_OOO)
+		qed_ooo_release_all_isles(p_hwfn, p_hwfn->p_ooo_info);
+
 	return rc;
 }
 
@@ -1371,6 +1850,8 @@ void qed_ll2_release_connection(struct qed_hwfn *p_hwfn, u8 connection_handle)
 
 	qed_cxt_release_cid(p_hwfn, p_ll2_conn->cid);
 
+	qed_ll2_release_connection_ooo(p_hwfn, p_ll2_conn);
+
 	mutex_lock(&p_ll2_conn->mutex);
 	p_ll2_conn->b_active = false;
 	mutex_unlock(&p_ll2_conn->mutex);
@@ -1517,6 +1998,7 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
 	enum qed_ll2_conn_type conn_type;
 	struct qed_ptt *p_ptt;
 	int rc, i;
+	u8 gsi_enable = 1;
 
 	/* Initialize LL2 locks & lists */
 	INIT_LIST_HEAD(&cdev->ll2->list);
@@ -1548,6 +2030,7 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
 	switch (QED_LEADING_HWFN(cdev)->hw_info.personality) {
 	case QED_PCI_ISCSI:
 		conn_type = QED_LL2_TYPE_ISCSI;
+		gsi_enable = 0;
 		break;
 	case QED_PCI_ETH_ROCE:
 		conn_type = QED_LL2_TYPE_ROCE;
@@ -1564,7 +2047,7 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
 	ll2_info.rx_vlan_removal_en = params->rx_vlan_stripping;
 	ll2_info.tx_tc = 0;
 	ll2_info.tx_dest = CORE_TX_DEST_NW;
-	ll2_info.gsi_enable = 1;
+	ll2_info.gsi_enable = gsi_enable;
 
 	rc = qed_ll2_acquire_connection(QED_LEADING_HWFN(cdev), &ll2_info,
 					QED_LL2_RX_SIZE, QED_LL2_TX_SIZE,
@@ -1611,6 +2094,17 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params)
 		goto release_terminate;
 	}
 
+	if (cdev->hwfns[0].hw_info.personality == QED_PCI_ISCSI &&
+	    cdev->hwfns[0].pf_params.iscsi_pf_params.ooo_enable) {
+		DP_VERBOSE(cdev, QED_MSG_STORAGE, "Starting OOO LL2 queue\n");
+		rc = qed_ll2_start_ooo(cdev, params);
+		if (rc) {
+			DP_INFO(cdev,
+				"Failed to initialize the OOO LL2 queue\n");
+			goto release_terminate;
+		}
+	}
+
 	p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev));
 	if (!p_ptt) {
 		DP_INFO(cdev, "Failed to acquire PTT\n");
@@ -1660,6 +2154,10 @@ static int qed_ll2_stop(struct qed_dev *cdev)
 	qed_ptt_release(QED_LEADING_HWFN(cdev), p_ptt);
 	eth_zero_addr(cdev->ll2_mac_address);
 
+	if (cdev->hwfns[0].hw_info.personality == QED_PCI_ISCSI &&
+	    cdev->hwfns[0].pf_params.iscsi_pf_params.ooo_enable)
+		qed_ll2_stop_ooo(cdev);
+
 	rc = qed_ll2_terminate_connection(QED_LEADING_HWFN(cdev),
 					  cdev->ll2->handle);
 	if (rc)
@@ -1714,7 +2212,8 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb)
 	rc = qed_ll2_prepare_tx_packet(QED_LEADING_HWFN(cdev),
 				       cdev->ll2->handle,
 				       1 + skb_shinfo(skb)->nr_frags,
-				       vlan, flags, 0, 0 /* RoCE FLAVOR */,
+				       vlan, flags, 0, QED_LL2_TX_DEST_NW,
+				       0 /* RoCE FLAVOR */,
 				       mapping, skb->len, skb, 1);
 	if (rc)
 		goto err;
@@ -1730,6 +2229,7 @@ static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb)
 						       mapping))) {
 				DP_NOTICE(cdev,
 					  "Unable to map frag - dropping packet\n");
+				rc = -ENOMEM;
 				goto err;
 			}
 		} else {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.h b/drivers/net/ethernet/qlogic/qed/qed_ll2.h
index 4e3d62a16cab..6625a3ae5a33 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.h
@@ -41,6 +41,12 @@ enum qed_ll2_conn_type {
 	MAX_QED_LL2_RX_CONN_TYPE
 };
 
+enum qed_ll2_tx_dest {
+	QED_LL2_TX_DEST_NW, /* Light L2 TX Destination to the Network */
+	QED_LL2_TX_DEST_LB, /* Light L2 TX Destination to the Loopback */
+	QED_LL2_TX_DEST_MAX
+};
+
 struct qed_ll2_rx_packet {
 	struct list_head list_entry;
 	struct core_rx_bd_with_buff_len *rxq_bd;
@@ -192,6 +198,8 @@ int qed_ll2_post_rx_buffer(struct qed_hwfn *p_hwfn,
  * @param l4_hdr_offset_w	L4 Header Offset from start of packet
  *				(in words). This is needed if both l4_csum
  *				and ipv6_ext are set
+ * @param e_tx_dest             indicates if the packet is to be transmitted via
+ *                              loopback or to the network
  * @param first_frag
  * @param first_frag_len
  * @param cookie
@@ -206,6 +214,7 @@ int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn,
 			      u16 vlan,
 			      u8 bd_flags,
 			      u16 l4_hdr_offset_w,
+			      enum qed_ll2_tx_dest e_tx_dest,
 			      enum qed_ll2_roce_flavor_type qed_roce_flavor,
 			      dma_addr_t first_frag,
 			      u16 first_frag_len, void *cookie, u8 notify_fw);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 333c7442e48a..aeb98d8c5626 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -221,6 +221,10 @@ int qed_fill_dev_info(struct qed_dev *cdev,
 		dev_info->fw_eng = FW_ENGINEERING_VERSION;
 		dev_info->mf_mode = cdev->mf_mode;
 		dev_info->tx_switching = true;
+
+		if (QED_LEADING_HWFN(cdev)->hw_info.b_wol_support ==
+		    QED_WOL_SUPPORT_PME)
+			dev_info->wol_support = true;
 	} else {
 		qed_vf_get_fw_version(&cdev->hwfns[0], &dev_info->fw_major,
 				      &dev_info->fw_minor, &dev_info->fw_rev,
@@ -243,6 +247,8 @@ int qed_fill_dev_info(struct qed_dev *cdev,
 				    &dev_info->mfw_rev, NULL);
 	}
 
+	dev_info->mtu = QED_LEADING_HWFN(cdev)->hw_info.mtu;
+
 	return 0;
 }
 
@@ -1430,11 +1436,106 @@ static int qed_set_led(struct qed_dev *cdev, enum qed_led_mode mode)
 	return status;
 }
 
+static int qed_update_wol(struct qed_dev *cdev, bool enabled)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *ptt;
+	int rc = 0;
+
+	if (IS_VF(cdev))
+		return 0;
+
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt)
+		return -EAGAIN;
+
+	rc = qed_mcp_ov_update_wol(hwfn, ptt, enabled ? QED_OV_WOL_ENABLED
+				   : QED_OV_WOL_DISABLED);
+	if (rc)
+		goto out;
+	rc = qed_mcp_ov_update_current_config(hwfn, ptt, QED_OV_CLIENT_DRV);
+
+out:
+	qed_ptt_release(hwfn, ptt);
+	return rc;
+}
+
+static int qed_update_drv_state(struct qed_dev *cdev, bool active)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *ptt;
+	int status = 0;
+
+	if (IS_VF(cdev))
+		return 0;
+
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt)
+		return -EAGAIN;
+
+	status = qed_mcp_ov_update_driver_state(hwfn, ptt, active ?
+						QED_OV_DRIVER_STATE_ACTIVE :
+						QED_OV_DRIVER_STATE_DISABLED);
+
+	qed_ptt_release(hwfn, ptt);
+
+	return status;
+}
+
+static int qed_update_mac(struct qed_dev *cdev, u8 *mac)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *ptt;
+	int status = 0;
+
+	if (IS_VF(cdev))
+		return 0;
+
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt)
+		return -EAGAIN;
+
+	status = qed_mcp_ov_update_mac(hwfn, ptt, mac);
+	if (status)
+		goto out;
+
+	status = qed_mcp_ov_update_current_config(hwfn, ptt, QED_OV_CLIENT_DRV);
+
+out:
+	qed_ptt_release(hwfn, ptt);
+	return status;
+}
+
+static int qed_update_mtu(struct qed_dev *cdev, u16 mtu)
+{
+	struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *ptt;
+	int status = 0;
+
+	if (IS_VF(cdev))
+		return 0;
+
+	ptt = qed_ptt_acquire(hwfn);
+	if (!ptt)
+		return -EAGAIN;
+
+	status = qed_mcp_ov_update_mtu(hwfn, ptt, mtu);
+	if (status)
+		goto out;
+
+	status = qed_mcp_ov_update_current_config(hwfn, ptt, QED_OV_CLIENT_DRV);
+
+out:
+	qed_ptt_release(hwfn, ptt);
+	return status;
+}
+
 static struct qed_selftest_ops qed_selftest_ops_pass = {
 	.selftest_memory = &qed_selftest_memory,
 	.selftest_interrupt = &qed_selftest_interrupt,
 	.selftest_register = &qed_selftest_register,
 	.selftest_clock = &qed_selftest_clock,
+	.selftest_nvram = &qed_selftest_nvram,
 };
 
 const struct qed_common_ops qed_common_ops_pass = {
@@ -1464,6 +1565,10 @@ const struct qed_common_ops qed_common_ops_pass = {
 	.get_coalesce = &qed_get_coalesce,
 	.set_coalesce = &qed_set_coalesce,
 	.set_led = &qed_set_led,
+	.update_drv_state = &qed_update_drv_state,
+	.update_mac = &qed_update_mac,
+	.update_mtu = &qed_update_mtu,
+	.update_wol = &qed_update_wol,
 };
 
 void qed_get_protocol_stats(struct qed_dev *cdev,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index bdc9ba92f6d4..6dd3ce443484 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/string.h>
+#include <linux/etherdevice.h>
 #include "qed.h"
 #include "qed_dcbx.h"
 #include "qed_hsi.h"
@@ -329,6 +330,7 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
 				 struct qed_mcp_mb_params *p_mb_params)
 {
 	u32 union_data_addr;
+
 	int rc;
 
 	/* MCP not initialized */
@@ -374,11 +376,32 @@ int qed_mcp_cmd(struct qed_hwfn *p_hwfn,
 		u32 *o_mcp_param)
 {
 	struct qed_mcp_mb_params mb_params;
+	union drv_union_data data_src;
 	int rc;
 
 	memset(&mb_params, 0, sizeof(mb_params));
+	memset(&data_src, 0, sizeof(data_src));
 	mb_params.cmd = cmd;
 	mb_params.param = param;
+
+	/* In case of UNLOAD_DONE, set the primary MAC */
+	if ((cmd == DRV_MSG_CODE_UNLOAD_DONE) &&
+	    (p_hwfn->cdev->wol_config == QED_OV_WOL_ENABLED)) {
+		u8 *p_mac = p_hwfn->cdev->wol_mac;
+
+		data_src.wol_mac.mac_upper = p_mac[0] << 8 | p_mac[1];
+		data_src.wol_mac.mac_lower = p_mac[2] << 24 | p_mac[3] << 16 |
+					     p_mac[4] << 8 | p_mac[5];
+
+		DP_VERBOSE(p_hwfn,
+			   (QED_MSG_SP | NETIF_MSG_IFDOWN),
+			   "Setting WoL MAC: %pM --> [%08x,%08x]\n",
+			   p_mac, data_src.wol_mac.mac_upper,
+			   data_src.wol_mac.mac_lower);
+
+		mb_params.p_data_src = &data_src;
+	}
+
 	rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
 	if (rc)
 		return rc;
@@ -1001,28 +1024,89 @@ int qed_mcp_get_media_type(struct qed_dev *cdev, u32 *p_media_type)
 	return 0;
 }
 
+/* Old MFW has a global configuration for all PFs regarding RDMA support */
+static void
+qed_mcp_get_shmem_proto_legacy(struct qed_hwfn *p_hwfn,
+			       enum qed_pci_personality *p_proto)
+{
+	/* There wasn't ever a legacy MFW that published iwarp.
+	 * So at this point, this is either plain l2 or RoCE.
+	 */
+	if (test_bit(QED_DEV_CAP_ROCE, &p_hwfn->hw_info.device_capabilities))
+		*p_proto = QED_PCI_ETH_ROCE;
+	else
+		*p_proto = QED_PCI_ETH;
+
+	DP_VERBOSE(p_hwfn, NETIF_MSG_IFUP,
+		   "According to Legacy capabilities, L2 personality is %08x\n",
+		   (u32) *p_proto);
+}
+
+static int
+qed_mcp_get_shmem_proto_mfw(struct qed_hwfn *p_hwfn,
+			    struct qed_ptt *p_ptt,
+			    enum qed_pci_personality *p_proto)
+{
+	u32 resp = 0, param = 0;
+	int rc;
+
+	rc = qed_mcp_cmd(p_hwfn, p_ptt,
+			 DRV_MSG_CODE_GET_PF_RDMA_PROTOCOL, 0, &resp, &param);
+	if (rc)
+		return rc;
+	if (resp != FW_MSG_CODE_OK) {
+		DP_VERBOSE(p_hwfn, NETIF_MSG_IFUP,
+			   "MFW lacks support for command; Returns %08x\n",
+			   resp);
+		return -EINVAL;
+	}
+
+	switch (param) {
+	case FW_MB_PARAM_GET_PF_RDMA_NONE:
+		*p_proto = QED_PCI_ETH;
+		break;
+	case FW_MB_PARAM_GET_PF_RDMA_ROCE:
+		*p_proto = QED_PCI_ETH_ROCE;
+		break;
+	case FW_MB_PARAM_GET_PF_RDMA_BOTH:
+		DP_NOTICE(p_hwfn,
+			  "Current day drivers don't support RoCE & iWARP. Default to RoCE-only\n");
+		*p_proto = QED_PCI_ETH_ROCE;
+		break;
+	case FW_MB_PARAM_GET_PF_RDMA_IWARP:
+	default:
+		DP_NOTICE(p_hwfn,
+			  "MFW answers GET_PF_RDMA_PROTOCOL but param is %08x\n",
+			  param);
+		return -EINVAL;
+	}
+
+	DP_VERBOSE(p_hwfn,
+		   NETIF_MSG_IFUP,
+		   "According to capabilities, L2 personality is %08x [resp %08x param %08x]\n",
+		   (u32) *p_proto, resp, param);
+	return 0;
+}
+
 static int
 qed_mcp_get_shmem_proto(struct qed_hwfn *p_hwfn,
 			struct public_func *p_info,
+			struct qed_ptt *p_ptt,
 			enum qed_pci_personality *p_proto)
 {
 	int rc = 0;
 
 	switch (p_info->config & FUNC_MF_CFG_PROTOCOL_MASK) {
 	case FUNC_MF_CFG_PROTOCOL_ETHERNET:
-		if (test_bit(QED_DEV_CAP_ROCE,
-			     &p_hwfn->hw_info.device_capabilities))
-			*p_proto = QED_PCI_ETH_ROCE;
-		else
-			*p_proto = QED_PCI_ETH;
+		if (qed_mcp_get_shmem_proto_mfw(p_hwfn, p_ptt, p_proto))
+			qed_mcp_get_shmem_proto_legacy(p_hwfn, p_proto);
 		break;
 	case FUNC_MF_CFG_PROTOCOL_ISCSI:
 		*p_proto = QED_PCI_ISCSI;
 		break;
 	case FUNC_MF_CFG_PROTOCOL_ROCE:
 		DP_NOTICE(p_hwfn, "RoCE personality is not a valid value!\n");
-		rc = -EINVAL;
-		break;
+	/* Fallthrough */
 	default:
 		rc = -EINVAL;
 	}
@@ -1042,7 +1126,8 @@ int qed_mcp_fill_shmem_func_info(struct qed_hwfn *p_hwfn,
 	info->pause_on_host = (shmem_info.config &
 			       FUNC_MF_CFG_PAUSE_ON_HOST_RING) ? 1 : 0;
 
-	if (qed_mcp_get_shmem_proto(p_hwfn, &shmem_info, &info->protocol)) {
+	if (qed_mcp_get_shmem_proto(p_hwfn, &shmem_info, p_ptt,
+				    &info->protocol)) {
 		DP_ERR(p_hwfn, "Unknown personality %08x\n",
 		       (u32)(shmem_info.config & FUNC_MF_CFG_PROTOCOL_MASK));
 		return -EINVAL;
@@ -1057,6 +1142,9 @@ int qed_mcp_fill_shmem_func_info(struct qed_hwfn *p_hwfn,
 		info->mac[3] = (u8)(shmem_info.mac_lower >> 16);
 		info->mac[4] = (u8)(shmem_info.mac_lower >> 8);
 		info->mac[5] = (u8)(shmem_info.mac_lower);
+
+		/* Store primary MAC for later possible WoL */
+		memcpy(&p_hwfn->cdev->wol_mac, info->mac, ETH_ALEN);
 	} else {
 		DP_NOTICE(p_hwfn, "MAC is 0 in shmem\n");
 	}
@@ -1068,13 +1156,30 @@ int qed_mcp_fill_shmem_func_info(struct qed_hwfn *p_hwfn,
 
 	info->ovlan = (u16)(shmem_info.ovlan_stag & FUNC_MF_CFG_OV_STAG_MASK);
 
+	info->mtu = (u16)shmem_info.mtu_size;
+
+	p_hwfn->hw_info.b_wol_support = QED_WOL_SUPPORT_NONE;
+	p_hwfn->cdev->wol_config = (u8)QED_OV_WOL_DEFAULT;
+	if (qed_mcp_is_init(p_hwfn)) {
+		u32 resp = 0, param = 0;
+		int rc;
+
+		rc = qed_mcp_cmd(p_hwfn, p_ptt,
+				 DRV_MSG_CODE_OS_WOL, 0, &resp, &param);
+		if (rc)
+			return rc;
+		if (resp == FW_MSG_CODE_OS_WOL_SUPPORTED)
+			p_hwfn->hw_info.b_wol_support = QED_WOL_SUPPORT_PME;
+	}
+
 	DP_VERBOSE(p_hwfn, (QED_MSG_SP | NETIF_MSG_IFUP),
-		   "Read configuration from shmem: pause_on_host %02x protocol %02x BW [%02x - %02x] MAC %02x:%02x:%02x:%02x:%02x:%02x wwn port %llx node %llx ovlan %04x\n",
+		   "Read configuration from shmem: pause_on_host %02x protocol %02x BW [%02x - %02x] MAC %02x:%02x:%02x:%02x:%02x:%02x wwn port %llx node %llx ovlan %04x wol %02x\n",
 		info->pause_on_host, info->protocol,
 		info->bandwidth_min, info->bandwidth_max,
 		info->mac[0], info->mac[1], info->mac[2],
 		info->mac[3], info->mac[4], info->mac[5],
-		info->wwn_port, info->wwn_node, info->ovlan);
+		info->wwn_port, info->wwn_node,
+		info->ovlan, (u8)p_hwfn->hw_info.b_wol_support);
 
 	return 0;
 }
@@ -1223,6 +1328,178 @@ int qed_mcp_resume(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 	return (cpu_mode & MCP_REG_CPU_MODE_SOFT_HALT) ? -EAGAIN : 0;
 }
 
+int qed_mcp_ov_update_current_config(struct qed_hwfn *p_hwfn,
+				     struct qed_ptt *p_ptt,
+				     enum qed_ov_client client)
+{
+	u32 resp = 0, param = 0;
+	u32 drv_mb_param;
+	int rc;
+
+	switch (client) {
+	case QED_OV_CLIENT_DRV:
+		drv_mb_param = DRV_MB_PARAM_OV_CURR_CFG_OS;
+		break;
+	case QED_OV_CLIENT_USER:
+		drv_mb_param = DRV_MB_PARAM_OV_CURR_CFG_OTHER;
+		break;
+	case QED_OV_CLIENT_VENDOR_SPEC:
+		drv_mb_param = DRV_MB_PARAM_OV_CURR_CFG_VENDOR_SPEC;
+		break;
+	default:
+		DP_NOTICE(p_hwfn, "Invalid client type %d\n", client);
+		return -EINVAL;
+	}
+
+	rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_OV_UPDATE_CURR_CFG,
+			 drv_mb_param, &resp, &param);
+	if (rc)
+		DP_ERR(p_hwfn, "MCP response failure, aborting\n");
+
+	return rc;
+}
+
+int qed_mcp_ov_update_driver_state(struct qed_hwfn *p_hwfn,
+				   struct qed_ptt *p_ptt,
+				   enum qed_ov_driver_state drv_state)
+{
+	u32 resp = 0, param = 0;
+	u32 drv_mb_param;
+	int rc;
+
+	switch (drv_state) {
+	case QED_OV_DRIVER_STATE_NOT_LOADED:
+		drv_mb_param = DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_NOT_LOADED;
+		break;
+	case QED_OV_DRIVER_STATE_DISABLED:
+		drv_mb_param = DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_DISABLED;
+		break;
+	case QED_OV_DRIVER_STATE_ACTIVE:
+		drv_mb_param = DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE_ACTIVE;
+		break;
+	default:
+		DP_NOTICE(p_hwfn, "Invalid driver state %d\n", drv_state);
+		return -EINVAL;
+	}
+
+	rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_OV_UPDATE_DRIVER_STATE,
+			 drv_mb_param, &resp, &param);
+	if (rc)
+		DP_ERR(p_hwfn, "Failed to send driver state\n");
+
+	return rc;
+}
+
+int qed_mcp_ov_update_mtu(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt, u16 mtu)
+{
+	u32 resp = 0, param = 0;
+	u32 drv_mb_param;
+	int rc;
+
+	drv_mb_param = (u32)mtu << DRV_MB_PARAM_OV_MTU_SIZE_SHIFT;
+	rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_OV_UPDATE_MTU,
+			 drv_mb_param, &resp, &param);
+	if (rc)
+		DP_ERR(p_hwfn, "Failed to send mtu value, rc = %d\n", rc);
+
+	return rc;
+}
+
+int qed_mcp_ov_update_mac(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt, u8 *mac)
+{
+	struct qed_mcp_mb_params mb_params;
+	union drv_union_data union_data;
+	int rc;
+
+	memset(&mb_params, 0, sizeof(mb_params));
+	mb_params.cmd = DRV_MSG_CODE_SET_VMAC;
+	mb_params.param = DRV_MSG_CODE_VMAC_TYPE_MAC <<
+			  DRV_MSG_CODE_VMAC_TYPE_SHIFT;
+	mb_params.param |= MCP_PF_ID(p_hwfn);
+	ether_addr_copy(&union_data.raw_data[0], mac);
+	mb_params.p_data_src = &union_data;
+	rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+	if (rc)
+		DP_ERR(p_hwfn, "Failed to send mac address, rc = %d\n", rc);
+
+	/* Store primary MAC for later possible WoL */
+	memcpy(p_hwfn->cdev->wol_mac, mac, ETH_ALEN);
+
+	return rc;
+}
+
+int qed_mcp_ov_update_wol(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt, enum qed_ov_wol wol)
+{
+	u32 resp = 0, param = 0;
+	u32 drv_mb_param;
+	int rc;
+
+	if (p_hwfn->hw_info.b_wol_support == QED_WOL_SUPPORT_NONE) {
+		DP_VERBOSE(p_hwfn, QED_MSG_SP,
+			   "Can't change WoL configuration when WoL isn't supported\n");
+		return -EINVAL;
+	}
+
+	switch (wol) {
+	case QED_OV_WOL_DEFAULT:
+		drv_mb_param = DRV_MB_PARAM_WOL_DEFAULT;
+		break;
+	case QED_OV_WOL_DISABLED:
+		drv_mb_param = DRV_MB_PARAM_WOL_DISABLED;
+		break;
+	case QED_OV_WOL_ENABLED:
+		drv_mb_param = DRV_MB_PARAM_WOL_ENABLED;
+		break;
+	default:
+		DP_ERR(p_hwfn, "Invalid wol state %d\n", wol);
+		return -EINVAL;
+	}
+
+	rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_OV_UPDATE_WOL,
+			 drv_mb_param, &resp, &param);
+	if (rc)
+		DP_ERR(p_hwfn, "Failed to send wol mode, rc = %d\n", rc);
+
+	/* Store the WoL update for a future unload */
+	p_hwfn->cdev->wol_config = (u8)wol;
+
+	return rc;
+}
+
+int qed_mcp_ov_update_eswitch(struct qed_hwfn *p_hwfn,
+			      struct qed_ptt *p_ptt,
+			      enum qed_ov_eswitch eswitch)
+{
+	u32 resp = 0, param = 0;
+	u32 drv_mb_param;
+	int rc;
+
+	switch (eswitch) {
+	case QED_OV_ESWITCH_NONE:
+		drv_mb_param = DRV_MB_PARAM_ESWITCH_MODE_NONE;
+		break;
+	case QED_OV_ESWITCH_VEB:
+		drv_mb_param = DRV_MB_PARAM_ESWITCH_MODE_VEB;
+		break;
+	case QED_OV_ESWITCH_VEPA:
+		drv_mb_param = DRV_MB_PARAM_ESWITCH_MODE_VEPA;
+		break;
+	default:
+		DP_ERR(p_hwfn, "Invalid eswitch mode %d\n", eswitch);
+		return -EINVAL;
+	}
+
+	rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_OV_UPDATE_ESWITCH_MODE,
+			 drv_mb_param, &resp, &param);
+	if (rc)
+		DP_ERR(p_hwfn, "Failed to send eswitch mode, rc = %d\n", rc);
+
+	return rc;
+}
+
 int qed_mcp_set_led(struct qed_hwfn *p_hwfn,
 		    struct qed_ptt *p_ptt, enum qed_led_mode mode)
 {
@@ -1271,6 +1548,52 @@ int qed_mcp_mask_parities(struct qed_hwfn *p_hwfn,
 	return rc;
 }
 
+int qed_mcp_nvm_read(struct qed_dev *cdev, u32 addr, u8 *p_buf, u32 len)
+{
+	u32 bytes_left = len, offset = 0, bytes_to_copy, read_len = 0;
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	u32 resp = 0, resp_param = 0;
+	struct qed_ptt *p_ptt;
+	int rc = 0;
+
+	p_ptt = qed_ptt_acquire(p_hwfn);
+	if (!p_ptt)
+		return -EBUSY;
+
+	while (bytes_left > 0) {
+		bytes_to_copy = min_t(u32, bytes_left, MCP_DRV_NVM_BUF_LEN);
+
+		rc = qed_mcp_nvm_rd_cmd(p_hwfn, p_ptt,
+					DRV_MSG_CODE_NVM_READ_NVRAM,
+					addr + offset +
+					(bytes_to_copy <<
+					 DRV_MB_PARAM_NVM_LEN_SHIFT),
+					&resp, &resp_param,
+					&read_len,
+					(u32 *)(p_buf + offset));
+
+		if (rc || (resp != FW_MSG_CODE_NVM_OK)) {
+			DP_NOTICE(cdev, "MCP command rc = %d\n", rc);
+			break;
+		}
+
+		/* This can be a lengthy process, and it's possible scheduler
+		 * isn't preemptable. Sleep a bit to prevent CPU hogging.
+		 */
+		if (bytes_left % 0x1000 <
+		    (bytes_left - read_len) % 0x1000)
+			usleep_range(1000, 2000);
+
+		offset += read_len;
+		bytes_left -= read_len;
+	}
+
+	cdev->mcp_nvm_resp = resp;
+	qed_ptt_release(p_hwfn, p_ptt);
+
+	return rc;
+}
+
 int qed_mcp_bist_register_test(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	u32 drv_mb_param = 0, rsp, param;
@@ -1312,3 +1635,101 @@ int qed_mcp_bist_clock_test(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 
 	return rc;
 }
+
+int qed_mcp_bist_nvm_test_get_num_images(struct qed_hwfn *p_hwfn,
+					 struct qed_ptt *p_ptt,
+					 u32 *num_images)
+{
+	u32 drv_mb_param = 0, rsp;
+	int rc = 0;
+
+	drv_mb_param = (DRV_MB_PARAM_BIST_NVM_TEST_NUM_IMAGES <<
+			DRV_MB_PARAM_BIST_TEST_INDEX_SHIFT);
+
+	rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_BIST_TEST,
+			 drv_mb_param, &rsp, num_images);
+	if (rc)
+		return rc;
+
+	if (((rsp & FW_MSG_CODE_MASK) != FW_MSG_CODE_OK))
+		rc = -EINVAL;
+
+	return rc;
+}
+
+int qed_mcp_bist_nvm_test_get_image_att(struct qed_hwfn *p_hwfn,
+					struct qed_ptt *p_ptt,
+					struct bist_nvm_image_att *p_image_att,
+					u32 image_index)
+{
+	u32 buf_size = 0, param, resp = 0, resp_param = 0;
+	int rc;
+
+	param = DRV_MB_PARAM_BIST_NVM_TEST_IMAGE_BY_INDEX <<
+		DRV_MB_PARAM_BIST_TEST_INDEX_SHIFT;
+	param |= image_index << DRV_MB_PARAM_BIST_TEST_IMAGE_INDEX_SHIFT;
+
+	rc = qed_mcp_nvm_rd_cmd(p_hwfn, p_ptt,
+				DRV_MSG_CODE_BIST_TEST, param,
+				&resp, &resp_param,
+				&buf_size,
+				(u32 *)p_image_att);
+	if (rc)
+		return rc;
+
+	if (((resp & FW_MSG_CODE_MASK) != FW_MSG_CODE_OK) ||
+	    (p_image_att->return_code != 1))
+		rc = -EINVAL;
+
+	return rc;
+}
+
+#define QED_RESC_ALLOC_VERSION_MAJOR    1
+#define QED_RESC_ALLOC_VERSION_MINOR    0
+#define QED_RESC_ALLOC_VERSION				     \
+	((QED_RESC_ALLOC_VERSION_MAJOR <<		     \
+	  DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_SHIFT) | \
+	 (QED_RESC_ALLOC_VERSION_MINOR <<		     \
+	  DRV_MB_PARAM_RESOURCE_ALLOC_VERSION_MINOR_SHIFT))
+int qed_mcp_get_resc_info(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt,
+			  struct resource_info *p_resc_info,
+			  u32 *p_mcp_resp, u32 *p_mcp_param)
+{
+	struct qed_mcp_mb_params mb_params;
+	union drv_union_data union_data;
+	int rc;
+
+	memset(&mb_params, 0, sizeof(mb_params));
+	memset(&union_data, 0, sizeof(union_data));
+	mb_params.cmd = DRV_MSG_GET_RESOURCE_ALLOC_MSG;
+	mb_params.param = QED_RESC_ALLOC_VERSION;
+
+	/* Need to have a sufficient large struct, as the cmd_and_union
+	 * is going to do memcpy from and to it.
+	 */
+	memcpy(&union_data.resource, p_resc_info, sizeof(*p_resc_info));
+
+	mb_params.p_data_src = &union_data;
+	mb_params.p_data_dst = &union_data;
+	rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+	if (rc)
+		return rc;
+
+	/* Copy the data back */
+	memcpy(p_resc_info, &union_data.resource, sizeof(*p_resc_info));
+	*p_mcp_resp = mb_params.mcp_resp;
+	*p_mcp_param = mb_params.mcp_param;
+
+	DP_VERBOSE(p_hwfn,
+		   QED_MSG_SP,
+		   "MFW resource_info: version 0x%x, res_id 0x%x, size 0x%x, offset 0x%x, vf_size 0x%x, vf_offset 0x%x, flags 0x%x\n",
+		   *p_mcp_param,
+		   p_resc_info->res_id,
+		   p_resc_info->size,
+		   p_resc_info->offset,
+		   p_resc_info->vf_size,
+		   p_resc_info->vf_offset, p_resc_info->flags);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index dff520ed069b..407a2c1830fb 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -92,6 +92,8 @@ struct qed_mcp_function_info {
 
 #define QED_MCP_VLAN_UNSET              (0xffff)
 	u16				ovlan;
+
+	u16				mtu;
 };
 
 struct qed_mcp_nvm_common {
@@ -147,6 +149,30 @@ union qed_mcp_protocol_stats {
 	struct qed_mcp_rdma_stats rdma_stats;
 };
 
+enum qed_ov_eswitch {
+	QED_OV_ESWITCH_NONE,
+	QED_OV_ESWITCH_VEB,
+	QED_OV_ESWITCH_VEPA
+};
+
+enum qed_ov_client {
+	QED_OV_CLIENT_DRV,
+	QED_OV_CLIENT_USER,
+	QED_OV_CLIENT_VENDOR_SPEC
+};
+
+enum qed_ov_driver_state {
+	QED_OV_DRIVER_STATE_NOT_LOADED,
+	QED_OV_DRIVER_STATE_DISABLED,
+	QED_OV_DRIVER_STATE_ACTIVE
+};
+
+enum qed_ov_wol {
+	QED_OV_WOL_DEFAULT,
+	QED_OV_WOL_DISABLED,
+	QED_OV_WOL_ENABLED
+};
+
 /**
  * @brief - returns the link params of the hw function
  *
@@ -278,6 +304,69 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn,
 			 struct qed_mcp_drv_version *p_ver);
 
 /**
+ * @brief Notify MFW about the change in base device properties
+ *
+ *  @param p_hwfn
+ *  @param p_ptt
+ *  @param client - qed client type
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_ov_update_current_config(struct qed_hwfn *p_hwfn,
+				     struct qed_ptt *p_ptt,
+				     enum qed_ov_client client);
+
+/**
+ * @brief Notify MFW about the driver state
+ *
+ *  @param p_hwfn
+ *  @param p_ptt
+ *  @param drv_state - Driver state
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_ov_update_driver_state(struct qed_hwfn *p_hwfn,
+				   struct qed_ptt *p_ptt,
+				   enum qed_ov_driver_state drv_state);
+
+/**
+ * @brief Send MTU size to MFW
+ *
+ *  @param p_hwfn
+ *  @param p_ptt
+ *  @param mtu - MTU size
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_ov_update_mtu(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt, u16 mtu);
+
+/**
+ * @brief Send MAC address to MFW
+ *
+ *  @param p_hwfn
+ *  @param p_ptt
+ *  @param mac - MAC address
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_ov_update_mac(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt, u8 *mac);
+
+/**
+ * @brief Send WOL mode to MFW
+ *
+ *  @param p_hwfn
+ *  @param p_ptt
+ *  @param wol - WOL mode
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_ov_update_wol(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt,
+			  enum qed_ov_wol wol);
+
+/**
  * @brief Set LED status
  *
  *  @param p_hwfn
@@ -291,6 +380,18 @@ int qed_mcp_set_led(struct qed_hwfn *p_hwfn,
 		    enum qed_led_mode mode);
 
 /**
+ * @brief Read from nvm
+ *
+ *  @param cdev
+ *  @param addr - nvm offset
+ *  @param p_buf - nvm read buffer
+ *  @param len - buffer len
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_nvm_read(struct qed_dev *cdev, u32 addr, u8 *p_buf, u32 len);
+
+/**
  * @brief Bist register test
  *
  *  @param p_hwfn    - hw function
@@ -312,6 +413,35 @@ int qed_mcp_bist_register_test(struct qed_hwfn *p_hwfn,
 int qed_mcp_bist_clock_test(struct qed_hwfn *p_hwfn,
 			    struct qed_ptt *p_ptt);
 
+/**
+ * @brief Bist nvm test - get number of images
+ *
+ *  @param p_hwfn       - hw function
+ *  @param p_ptt        - PTT required for register access
+ *  @param num_images   - number of images if operation was
+ *			  successful. 0 if not.
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_bist_nvm_test_get_num_images(struct qed_hwfn *p_hwfn,
+					 struct qed_ptt *p_ptt,
+					 u32 *num_images);
+
+/**
+ * @brief Bist nvm test - get image attributes by index
+ *
+ *  @param p_hwfn      - hw function
+ *  @param p_ptt       - PTT required for register access
+ *  @param p_image_att - Attributes of image
+ *  @param image_index - Index of image to get information for
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_bist_nvm_test_get_image_att(struct qed_hwfn *p_hwfn,
+					struct qed_ptt *p_ptt,
+					struct bist_nvm_image_att *p_image_att,
+					u32 image_index);
+
 /* Using hwfn number (and not pf_num) is required since in CMT mode,
  * same pf_num may be used by two different hwfn
  * TODO - this shouldn't really be in .h file, but until all fields
@@ -546,4 +676,32 @@ int __qed_configure_pf_min_bandwidth(struct qed_hwfn *p_hwfn,
 int qed_mcp_mask_parities(struct qed_hwfn *p_hwfn,
 			  struct qed_ptt *p_ptt, u32 mask_parities);
 
+/**
+ * @brief Send eswitch mode to MFW
+ *
+ *  @param p_hwfn
+ *  @param p_ptt
+ *  @param eswitch - eswitch mode
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_ov_update_eswitch(struct qed_hwfn *p_hwfn,
+			      struct qed_ptt *p_ptt,
+			      enum qed_ov_eswitch eswitch);
+
+/**
+ * @brief - Gets the MFW allocation info for the given resource
+ *
+ *  @param p_hwfn
+ *  @param p_ptt
+ *  @param p_resc_info - descriptor of requested resource
+ *  @param p_mcp_resp
+ *  @param p_mcp_param
+ *
+ * @return int - 0 - operation was successful.
+ */
+int qed_mcp_get_resc_info(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt,
+			  struct resource_info *p_resc_info,
+			  u32 *p_mcp_resp, u32 *p_mcp_param);
 #endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ooo.c b/drivers/net/ethernet/qlogic/qed/qed_ooo.c
new file mode 100644
index 000000000000..155abcb507fd
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_ooo.c
@@ -0,0 +1,501 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#include <linux/types.h>
+#include <linux/dma-mapping.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include "qed.h"
+#include "qed_iscsi.h"
+#include "qed_ll2.h"
+#include "qed_ooo.h"
+
+static struct qed_ooo_archipelago
+*qed_ooo_seek_archipelago(struct qed_hwfn *p_hwfn,
+			  struct qed_ooo_info
+			  *p_ooo_info,
+			  u32 cid)
+{
+	struct qed_ooo_archipelago *p_archipelago = NULL;
+
+	list_for_each_entry(p_archipelago,
+			    &p_ooo_info->archipelagos_list, list_entry) {
+		if (p_archipelago->cid == cid)
+			return p_archipelago;
+	}
+
+	return NULL;
+}
+
+static struct qed_ooo_isle *qed_ooo_seek_isle(struct qed_hwfn *p_hwfn,
+					      struct qed_ooo_info *p_ooo_info,
+					      u32 cid, u8 isle)
+{
+	struct qed_ooo_archipelago *p_archipelago = NULL;
+	struct qed_ooo_isle *p_isle = NULL;
+	u8 the_num_of_isle = 1;
+
+	p_archipelago = qed_ooo_seek_archipelago(p_hwfn, p_ooo_info, cid);
+	if (!p_archipelago) {
+		DP_NOTICE(p_hwfn,
+			  "Connection %d is not found in OOO list\n", cid);
+		return NULL;
+	}
+
+	list_for_each_entry(p_isle, &p_archipelago->isles_list, list_entry) {
+		if (the_num_of_isle == isle)
+			return p_isle;
+		the_num_of_isle++;
+	}
+
+	return NULL;
+}
+
+void qed_ooo_save_history_entry(struct qed_hwfn *p_hwfn,
+				struct qed_ooo_info *p_ooo_info,
+				struct ooo_opaque *p_cqe)
+{
+	struct qed_ooo_history *p_history = &p_ooo_info->ooo_history;
+
+	if (p_history->head_idx == p_history->num_of_cqes)
+		p_history->head_idx = 0;
+	p_history->p_cqes[p_history->head_idx] = *p_cqe;
+	p_history->head_idx++;
+}
+
+struct qed_ooo_info *qed_ooo_alloc(struct qed_hwfn *p_hwfn)
+{
+	struct qed_ooo_info *p_ooo_info;
+	u16 max_num_archipelagos = 0;
+	u16 max_num_isles = 0;
+	u32 i;
+
+	if (p_hwfn->hw_info.personality != QED_PCI_ISCSI) {
+		DP_NOTICE(p_hwfn,
+			  "Failed to allocate qed_ooo_info: unknown personality\n");
+		return NULL;
+	}
+
+	max_num_archipelagos = p_hwfn->pf_params.iscsi_pf_params.num_cons;
+	max_num_isles = QED_MAX_NUM_ISLES + max_num_archipelagos;
+
+	if (!max_num_archipelagos) {
+		DP_NOTICE(p_hwfn,
+			  "Failed to allocate qed_ooo_info: unknown amount of connections\n");
+		return NULL;
+	}
+
+	p_ooo_info = kzalloc(sizeof(*p_ooo_info), GFP_KERNEL);
+	if (!p_ooo_info)
+		return NULL;
+
+	INIT_LIST_HEAD(&p_ooo_info->free_buffers_list);
+	INIT_LIST_HEAD(&p_ooo_info->ready_buffers_list);
+	INIT_LIST_HEAD(&p_ooo_info->free_isles_list);
+	INIT_LIST_HEAD(&p_ooo_info->free_archipelagos_list);
+	INIT_LIST_HEAD(&p_ooo_info->archipelagos_list);
+
+	p_ooo_info->p_isles_mem = kcalloc(max_num_isles,
+					  sizeof(struct qed_ooo_isle),
+					  GFP_KERNEL);
+	if (!p_ooo_info->p_isles_mem)
+		goto no_isles_mem;
+
+	for (i = 0; i < max_num_isles; i++) {
+		INIT_LIST_HEAD(&p_ooo_info->p_isles_mem[i].buffers_list);
+		list_add_tail(&p_ooo_info->p_isles_mem[i].list_entry,
+			      &p_ooo_info->free_isles_list);
+	}
+
+	p_ooo_info->p_archipelagos_mem =
+				kcalloc(max_num_archipelagos,
+					sizeof(struct qed_ooo_archipelago),
+					GFP_KERNEL);
+	if (!p_ooo_info->p_archipelagos_mem)
+		goto no_archipelagos_mem;
+
+	for (i = 0; i < max_num_archipelagos; i++) {
+		INIT_LIST_HEAD(&p_ooo_info->p_archipelagos_mem[i].isles_list);
+		list_add_tail(&p_ooo_info->p_archipelagos_mem[i].list_entry,
+			      &p_ooo_info->free_archipelagos_list);
+	}
+
+	p_ooo_info->ooo_history.p_cqes =
+				kcalloc(QED_MAX_NUM_OOO_HISTORY_ENTRIES,
+					sizeof(struct ooo_opaque),
+					GFP_KERNEL);
+	if (!p_ooo_info->ooo_history.p_cqes)
+		goto no_history_mem;
+
+	return p_ooo_info;
+
+no_history_mem:
+	kfree(p_ooo_info->p_archipelagos_mem);
+no_archipelagos_mem:
+	kfree(p_ooo_info->p_isles_mem);
+no_isles_mem:
+	kfree(p_ooo_info);
+	return NULL;
+}
+
+void qed_ooo_release_connection_isles(struct qed_hwfn *p_hwfn,
+				      struct qed_ooo_info *p_ooo_info, u32 cid)
+{
+	struct qed_ooo_archipelago *p_archipelago;
+	struct qed_ooo_buffer *p_buffer;
+	struct qed_ooo_isle *p_isle;
+	bool b_found = false;
+
+	if (list_empty(&p_ooo_info->archipelagos_list))
+		return;
+
+	list_for_each_entry(p_archipelago,
+			    &p_ooo_info->archipelagos_list, list_entry) {
+		if (p_archipelago->cid == cid) {
+			list_del(&p_archipelago->list_entry);
+			b_found = true;
+			break;
+		}
+	}
+
+	if (!b_found)
+		return;
+
+	while (!list_empty(&p_archipelago->isles_list)) {
+		p_isle = list_first_entry(&p_archipelago->isles_list,
+					  struct qed_ooo_isle, list_entry);
+
+		list_del(&p_isle->list_entry);
+
+		while (!list_empty(&p_isle->buffers_list)) {
+			p_buffer = list_first_entry(&p_isle->buffers_list,
+						    struct qed_ooo_buffer,
+						    list_entry);
+
+			if (!p_buffer)
+				break;
+
+			list_del(&p_buffer->list_entry);
+			list_add_tail(&p_buffer->list_entry,
+				      &p_ooo_info->free_buffers_list);
+		}
+		list_add_tail(&p_isle->list_entry,
+			      &p_ooo_info->free_isles_list);
+	}
+
+	list_add_tail(&p_archipelago->list_entry,
+		      &p_ooo_info->free_archipelagos_list);
+}
+
+void qed_ooo_release_all_isles(struct qed_hwfn *p_hwfn,
+			       struct qed_ooo_info *p_ooo_info)
+{
+	struct qed_ooo_archipelago *p_arch;
+	struct qed_ooo_buffer *p_buffer;
+	struct qed_ooo_isle *p_isle;
+
+	while (!list_empty(&p_ooo_info->archipelagos_list)) {
+		p_arch = list_first_entry(&p_ooo_info->archipelagos_list,
+					  struct qed_ooo_archipelago,
+					  list_entry);
+
+		list_del(&p_arch->list_entry);
+
+		while (!list_empty(&p_arch->isles_list)) {
+			p_isle = list_first_entry(&p_arch->isles_list,
+						  struct qed_ooo_isle,
+						  list_entry);
+
+			list_del(&p_isle->list_entry);
+
+			while (!list_empty(&p_isle->buffers_list)) {
+				p_buffer =
+				    list_first_entry(&p_isle->buffers_list,
+						     struct qed_ooo_buffer,
+						     list_entry);
+
+				if (!p_buffer)
+					break;
+
+			list_del(&p_buffer->list_entry);
+				list_add_tail(&p_buffer->list_entry,
+					      &p_ooo_info->free_buffers_list);
+			}
+			list_add_tail(&p_isle->list_entry,
+				      &p_ooo_info->free_isles_list);
+		}
+		list_add_tail(&p_arch->list_entry,
+			      &p_ooo_info->free_archipelagos_list);
+	}
+	if (!list_empty(&p_ooo_info->ready_buffers_list))
+		list_splice_tail_init(&p_ooo_info->ready_buffers_list,
+				      &p_ooo_info->free_buffers_list);
+}
+
+void qed_ooo_setup(struct qed_hwfn *p_hwfn, struct qed_ooo_info *p_ooo_info)
+{
+	qed_ooo_release_all_isles(p_hwfn, p_ooo_info);
+	memset(p_ooo_info->ooo_history.p_cqes, 0,
+	       p_ooo_info->ooo_history.num_of_cqes *
+	       sizeof(struct ooo_opaque));
+	p_ooo_info->ooo_history.head_idx = 0;
+}
+
+void qed_ooo_free(struct qed_hwfn *p_hwfn, struct qed_ooo_info *p_ooo_info)
+{
+	struct qed_ooo_buffer *p_buffer;
+
+	qed_ooo_release_all_isles(p_hwfn, p_ooo_info);
+	while (!list_empty(&p_ooo_info->free_buffers_list)) {
+		p_buffer = list_first_entry(&p_ooo_info->free_buffers_list,
+					    struct qed_ooo_buffer, list_entry);
+
+		if (!p_buffer)
+			break;
+
+		list_del(&p_buffer->list_entry);
+		dma_free_coherent(&p_hwfn->cdev->pdev->dev,
+				  p_buffer->rx_buffer_size,
+				  p_buffer->rx_buffer_virt_addr,
+				  p_buffer->rx_buffer_phys_addr);
+		kfree(p_buffer);
+	}
+
+	kfree(p_ooo_info->p_isles_mem);
+	kfree(p_ooo_info->p_archipelagos_mem);
+	kfree(p_ooo_info->ooo_history.p_cqes);
+	kfree(p_ooo_info);
+}
+
+void qed_ooo_put_free_buffer(struct qed_hwfn *p_hwfn,
+			     struct qed_ooo_info *p_ooo_info,
+			     struct qed_ooo_buffer *p_buffer)
+{
+	list_add_tail(&p_buffer->list_entry, &p_ooo_info->free_buffers_list);
+}
+
+struct qed_ooo_buffer *qed_ooo_get_free_buffer(struct qed_hwfn *p_hwfn,
+					       struct qed_ooo_info *p_ooo_info)
+{
+	struct qed_ooo_buffer *p_buffer = NULL;
+
+	if (!list_empty(&p_ooo_info->free_buffers_list)) {
+		p_buffer = list_first_entry(&p_ooo_info->free_buffers_list,
+					    struct qed_ooo_buffer, list_entry);
+
+		list_del(&p_buffer->list_entry);
+	}
+
+	return p_buffer;
+}
+
+void qed_ooo_put_ready_buffer(struct qed_hwfn *p_hwfn,
+			      struct qed_ooo_info *p_ooo_info,
+			      struct qed_ooo_buffer *p_buffer, u8 on_tail)
+{
+	if (on_tail)
+		list_add_tail(&p_buffer->list_entry,
+			      &p_ooo_info->ready_buffers_list);
+	else
+		list_add(&p_buffer->list_entry,
+			 &p_ooo_info->ready_buffers_list);
+}
+
+struct qed_ooo_buffer *qed_ooo_get_ready_buffer(struct qed_hwfn *p_hwfn,
+						struct qed_ooo_info *p_ooo_info)
+{
+	struct qed_ooo_buffer *p_buffer = NULL;
+
+	if (!list_empty(&p_ooo_info->ready_buffers_list)) {
+		p_buffer = list_first_entry(&p_ooo_info->ready_buffers_list,
+					    struct qed_ooo_buffer, list_entry);
+
+		list_del(&p_buffer->list_entry);
+	}
+
+	return p_buffer;
+}
+
+void qed_ooo_delete_isles(struct qed_hwfn *p_hwfn,
+			  struct qed_ooo_info *p_ooo_info,
+			  u32 cid, u8 drop_isle, u8 drop_size)
+{
+	struct qed_ooo_archipelago *p_archipelago = NULL;
+	struct qed_ooo_isle *p_isle = NULL;
+	u8 isle_idx;
+
+	p_archipelago = qed_ooo_seek_archipelago(p_hwfn, p_ooo_info, cid);
+	for (isle_idx = 0; isle_idx < drop_size; isle_idx++) {
+		p_isle = qed_ooo_seek_isle(p_hwfn, p_ooo_info, cid, drop_isle);
+		if (!p_isle) {
+			DP_NOTICE(p_hwfn,
+				  "Isle %d is not found(cid %d)\n",
+				  drop_isle, cid);
+			return;
+		}
+		if (list_empty(&p_isle->buffers_list))
+			DP_NOTICE(p_hwfn,
+				  "Isle %d is empty(cid %d)\n", drop_isle, cid);
+		else
+			list_splice_tail_init(&p_isle->buffers_list,
+					      &p_ooo_info->free_buffers_list);
+
+		list_del(&p_isle->list_entry);
+		p_ooo_info->cur_isles_number--;
+		list_add(&p_isle->list_entry, &p_ooo_info->free_isles_list);
+	}
+
+	if (list_empty(&p_archipelago->isles_list)) {
+		list_del(&p_archipelago->list_entry);
+		list_add(&p_archipelago->list_entry,
+			 &p_ooo_info->free_archipelagos_list);
+	}
+}
+
+void qed_ooo_add_new_isle(struct qed_hwfn *p_hwfn,
+			  struct qed_ooo_info *p_ooo_info,
+			  u32 cid, u8 ooo_isle,
+			  struct qed_ooo_buffer *p_buffer)
+{
+	struct qed_ooo_archipelago *p_archipelago = NULL;
+	struct qed_ooo_isle *p_prev_isle = NULL;
+	struct qed_ooo_isle *p_isle = NULL;
+
+	if (ooo_isle > 1) {
+		p_prev_isle = qed_ooo_seek_isle(p_hwfn,
+						p_ooo_info, cid, ooo_isle - 1);
+		if (!p_prev_isle) {
+			DP_NOTICE(p_hwfn,
+				  "Isle %d is not found(cid %d)\n",
+				  ooo_isle - 1, cid);
+			return;
+		}
+	}
+	p_archipelago = qed_ooo_seek_archipelago(p_hwfn, p_ooo_info, cid);
+	if (!p_archipelago && (ooo_isle != 1)) {
+		DP_NOTICE(p_hwfn,
+			  "Connection %d is not found in OOO list\n", cid);
+		return;
+	}
+
+	if (!list_empty(&p_ooo_info->free_isles_list)) {
+		p_isle = list_first_entry(&p_ooo_info->free_isles_list,
+					  struct qed_ooo_isle, list_entry);
+
+		list_del(&p_isle->list_entry);
+		if (!list_empty(&p_isle->buffers_list)) {
+			DP_NOTICE(p_hwfn, "Free isle is not empty\n");
+			INIT_LIST_HEAD(&p_isle->buffers_list);
+		}
+	} else {
+		DP_NOTICE(p_hwfn, "No more free isles\n");
+		return;
+	}
+
+	if (!p_archipelago &&
+	    !list_empty(&p_ooo_info->free_archipelagos_list)) {
+		p_archipelago =
+		    list_first_entry(&p_ooo_info->free_archipelagos_list,
+				     struct qed_ooo_archipelago, list_entry);
+
+		list_del(&p_archipelago->list_entry);
+		if (!list_empty(&p_archipelago->isles_list)) {
+			DP_NOTICE(p_hwfn,
+				  "Free OOO connection is not empty\n");
+			INIT_LIST_HEAD(&p_archipelago->isles_list);
+		}
+		p_archipelago->cid = cid;
+		list_add(&p_archipelago->list_entry,
+			 &p_ooo_info->archipelagos_list);
+	} else if (!p_archipelago) {
+		DP_NOTICE(p_hwfn, "No more free OOO connections\n");
+		list_add(&p_isle->list_entry,
+			 &p_ooo_info->free_isles_list);
+		list_add(&p_buffer->list_entry,
+			 &p_ooo_info->free_buffers_list);
+		return;
+	}
+
+	list_add(&p_buffer->list_entry, &p_isle->buffers_list);
+	p_ooo_info->cur_isles_number++;
+	p_ooo_info->gen_isles_number++;
+
+	if (p_ooo_info->cur_isles_number > p_ooo_info->max_isles_number)
+		p_ooo_info->max_isles_number = p_ooo_info->cur_isles_number;
+
+	if (!p_prev_isle)
+		list_add(&p_isle->list_entry, &p_archipelago->isles_list);
+	else
+		list_add(&p_isle->list_entry, &p_prev_isle->list_entry);
+}
+
+void qed_ooo_add_new_buffer(struct qed_hwfn *p_hwfn,
+			    struct qed_ooo_info *p_ooo_info,
+			    u32 cid,
+			    u8 ooo_isle,
+			    struct qed_ooo_buffer *p_buffer, u8 buffer_side)
+{
+	struct qed_ooo_isle *p_isle = NULL;
+
+	p_isle = qed_ooo_seek_isle(p_hwfn, p_ooo_info, cid, ooo_isle);
+	if (!p_isle) {
+		DP_NOTICE(p_hwfn,
+			  "Isle %d is not found(cid %d)\n", ooo_isle, cid);
+		return;
+	}
+
+	if (buffer_side == QED_OOO_LEFT_BUF)
+		list_add(&p_buffer->list_entry, &p_isle->buffers_list);
+	else
+		list_add_tail(&p_buffer->list_entry, &p_isle->buffers_list);
+}
+
+void qed_ooo_join_isles(struct qed_hwfn *p_hwfn,
+			struct qed_ooo_info *p_ooo_info, u32 cid, u8 left_isle)
+{
+	struct qed_ooo_archipelago *p_archipelago = NULL;
+	struct qed_ooo_isle *p_right_isle = NULL;
+	struct qed_ooo_isle *p_left_isle = NULL;
+
+	p_right_isle = qed_ooo_seek_isle(p_hwfn, p_ooo_info, cid,
+					 left_isle + 1);
+	if (!p_right_isle) {
+		DP_NOTICE(p_hwfn,
+			  "Right isle %d is not found(cid %d)\n",
+			  left_isle + 1, cid);
+		return;
+	}
+
+	p_archipelago = qed_ooo_seek_archipelago(p_hwfn, p_ooo_info, cid);
+	list_del(&p_right_isle->list_entry);
+	p_ooo_info->cur_isles_number--;
+	if (left_isle) {
+		p_left_isle = qed_ooo_seek_isle(p_hwfn, p_ooo_info, cid,
+						left_isle);
+		if (!p_left_isle) {
+			DP_NOTICE(p_hwfn,
+				  "Left isle %d is not found(cid %d)\n",
+				  left_isle, cid);
+			return;
+		}
+		list_splice_tail_init(&p_right_isle->buffers_list,
+				      &p_left_isle->buffers_list);
+	} else {
+		list_splice_tail_init(&p_right_isle->buffers_list,
+				      &p_ooo_info->ready_buffers_list);
+		if (list_empty(&p_archipelago->isles_list)) {
+			list_del(&p_archipelago->list_entry);
+			list_add(&p_archipelago->list_entry,
+				 &p_ooo_info->free_archipelagos_list);
+		}
+	}
+	list_add_tail(&p_right_isle->list_entry, &p_ooo_info->free_isles_list);
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ooo.h b/drivers/net/ethernet/qlogic/qed/qed_ooo.h
new file mode 100644
index 000000000000..7a0670a9a074
--- /dev/null
+++ b/drivers/net/ethernet/qlogic/qed/qed_ooo.h
@@ -0,0 +1,176 @@
+/* QLogic qed NIC Driver
+ * Copyright (c) 2015 QLogic Corporation
+ *
+ * This software is available under the terms of the GNU General Public License
+ * (GPL) Version 2, available from the file COPYING in the main directory of
+ * this source tree.
+ */
+
+#ifndef _QED_OOO_H
+#define _QED_OOO_H
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include "qed.h"
+
+#define QED_MAX_NUM_ISLES	256
+#define QED_MAX_NUM_OOO_HISTORY_ENTRIES	512
+
+#define QED_OOO_LEFT_BUF	0
+#define QED_OOO_RIGHT_BUF	1
+
+struct qed_ooo_buffer {
+	struct list_head list_entry;
+	void *rx_buffer_virt_addr;
+	dma_addr_t rx_buffer_phys_addr;
+	u32 rx_buffer_size;
+	u16 packet_length;
+	u16 parse_flags;
+	u16 vlan;
+	u8 placement_offset;
+};
+
+struct qed_ooo_isle {
+	struct list_head list_entry;
+	struct list_head buffers_list;
+};
+
+struct qed_ooo_archipelago {
+	struct list_head list_entry;
+	struct list_head isles_list;
+	u32 cid;
+};
+
+struct qed_ooo_history {
+	struct ooo_opaque *p_cqes;
+	u32 head_idx;
+	u32 num_of_cqes;
+};
+
+struct qed_ooo_info {
+	struct list_head free_buffers_list;
+	struct list_head ready_buffers_list;
+	struct list_head free_isles_list;
+	struct list_head free_archipelagos_list;
+	struct list_head archipelagos_list;
+	struct qed_ooo_archipelago *p_archipelagos_mem;
+	struct qed_ooo_isle *p_isles_mem;
+	struct qed_ooo_history ooo_history;
+	u32 cur_isles_number;
+	u32 max_isles_number;
+	u32 gen_isles_number;
+};
+
+#if IS_ENABLED(CONFIG_QED_ISCSI)
+void qed_ooo_save_history_entry(struct qed_hwfn *p_hwfn,
+				struct qed_ooo_info *p_ooo_info,
+				struct ooo_opaque *p_cqe);
+
+struct qed_ooo_info *qed_ooo_alloc(struct qed_hwfn *p_hwfn);
+
+void qed_ooo_release_connection_isles(struct qed_hwfn *p_hwfn,
+				      struct qed_ooo_info *p_ooo_info,
+				      u32 cid);
+
+void qed_ooo_release_all_isles(struct qed_hwfn *p_hwfn,
+			       struct qed_ooo_info *p_ooo_info);
+
+void qed_ooo_setup(struct qed_hwfn *p_hwfn, struct qed_ooo_info *p_ooo_info);
+
+void qed_ooo_free(struct qed_hwfn *p_hwfn, struct qed_ooo_info *p_ooo_info);
+
+void qed_ooo_put_free_buffer(struct qed_hwfn *p_hwfn,
+			     struct qed_ooo_info *p_ooo_info,
+			     struct qed_ooo_buffer *p_buffer);
+
+struct qed_ooo_buffer *
+qed_ooo_get_free_buffer(struct qed_hwfn *p_hwfn,
+			struct qed_ooo_info *p_ooo_info);
+
+void qed_ooo_put_ready_buffer(struct qed_hwfn *p_hwfn,
+			      struct qed_ooo_info *p_ooo_info,
+			      struct qed_ooo_buffer *p_buffer, u8 on_tail);
+
+struct qed_ooo_buffer *
+qed_ooo_get_ready_buffer(struct qed_hwfn *p_hwfn,
+			 struct qed_ooo_info *p_ooo_info);
+
+void qed_ooo_delete_isles(struct qed_hwfn *p_hwfn,
+			  struct qed_ooo_info *p_ooo_info,
+			  u32 cid, u8 drop_isle, u8 drop_size);
+
+void qed_ooo_add_new_isle(struct qed_hwfn *p_hwfn,
+			  struct qed_ooo_info *p_ooo_info,
+			  u32 cid,
+			  u8 ooo_isle, struct qed_ooo_buffer *p_buffer);
+
+void qed_ooo_add_new_buffer(struct qed_hwfn *p_hwfn,
+			    struct qed_ooo_info *p_ooo_info,
+			    u32 cid,
+			    u8 ooo_isle,
+			    struct qed_ooo_buffer *p_buffer, u8 buffer_side);
+
+void qed_ooo_join_isles(struct qed_hwfn *p_hwfn,
+			struct qed_ooo_info *p_ooo_info, u32 cid,
+			u8 left_isle);
+#else /* IS_ENABLED(CONFIG_QED_ISCSI) */
+static inline void qed_ooo_save_history_entry(struct qed_hwfn *p_hwfn,
+					      struct qed_ooo_info *p_ooo_info,
+					      struct ooo_opaque *p_cqe) {}
+
+static inline struct qed_ooo_info *qed_ooo_alloc(
+				struct qed_hwfn *p_hwfn) { return NULL; }
+
+static inline void
+qed_ooo_release_connection_isles(struct qed_hwfn *p_hwfn,
+				 struct qed_ooo_info *p_ooo_info,
+				 u32 cid) {}
+
+static inline void qed_ooo_release_all_isles(struct qed_hwfn *p_hwfn,
+					     struct qed_ooo_info *p_ooo_info)
+					     {}
+
+static inline void qed_ooo_setup(struct qed_hwfn *p_hwfn,
+				 struct qed_ooo_info *p_ooo_info) {}
+
+static inline void qed_ooo_free(struct qed_hwfn *p_hwfn,
+				struct qed_ooo_info *p_ooo_info) {}
+
+static inline void qed_ooo_put_free_buffer(struct qed_hwfn *p_hwfn,
+					   struct qed_ooo_info *p_ooo_info,
+					   struct qed_ooo_buffer *p_buffer) {}
+
+static inline struct qed_ooo_buffer *
+qed_ooo_get_free_buffer(struct qed_hwfn *p_hwfn,
+			struct qed_ooo_info *p_ooo_info) { return NULL; }
+
+static inline void qed_ooo_put_ready_buffer(struct qed_hwfn *p_hwfn,
+					    struct qed_ooo_info *p_ooo_info,
+					    struct qed_ooo_buffer *p_buffer,
+					    u8 on_tail) {}
+
+static inline struct qed_ooo_buffer *
+qed_ooo_get_ready_buffer(struct qed_hwfn *p_hwfn,
+			 struct qed_ooo_info *p_ooo_info) { return NULL; }
+
+static inline void qed_ooo_delete_isles(struct qed_hwfn *p_hwfn,
+					struct qed_ooo_info *p_ooo_info,
+					u32 cid, u8 drop_isle, u8 drop_size) {}
+
+static inline void qed_ooo_add_new_isle(struct qed_hwfn *p_hwfn,
+					struct qed_ooo_info *p_ooo_info,
+					u32 cid, u8 ooo_isle,
+					struct qed_ooo_buffer *p_buffer) {}
+
+static inline void qed_ooo_add_new_buffer(struct qed_hwfn *p_hwfn,
+					  struct qed_ooo_info *p_ooo_info,
+					  u32 cid, u8 ooo_isle,
+					  struct qed_ooo_buffer *p_buffer,
+					  u8 buffer_side) {}
+
+static inline void qed_ooo_join_isles(struct qed_hwfn *p_hwfn,
+				      struct qed_ooo_info *p_ooo_info, u32 cid,
+				      u8 left_isle) {}
+#endif /* IS_ENABLED(CONFIG_QED_ISCSI) */
+
+#endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
index b414a0542177..97544205a8c1 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h
@@ -82,6 +82,8 @@
 	0x1c80000UL
 #define BAR0_MAP_REG_XSDM_RAM \
 	0x1e00000UL
+#define BAR0_MAP_REG_YSDM_RAM \
+	0x1e80000UL
 #define  NIG_REG_RX_LLH_BRB_GATE_DNTFWD_PERPF \
 	0x5011f4UL
 #define  PRS_REG_SEARCH_TCP \
diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c
index f3a825a8f8d5..2a16547c8966 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_roce.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c
@@ -2658,7 +2658,6 @@ static int qed_roce_ll2_start(struct qed_dev *cdev,
 		DP_ERR(cdev, "qed roce ll2 start: failed memory allocation\n");
 		return -ENOMEM;
 	}
-	memset(roce_ll2, 0, sizeof(*roce_ll2));
 	roce_ll2->handle = QED_LL2_UNUSED_HANDLE;
 	roce_ll2->cbs = params->cbs;
 	roce_ll2->cb_cookie = params->cb_cookie;
@@ -2772,6 +2771,7 @@ static int qed_roce_ll2_tx(struct qed_dev *cdev,
 	/* Tx header */
 	rc = qed_ll2_prepare_tx_packet(QED_LEADING_HWFN(cdev), roce_ll2->handle,
 				       1 + pkt->n_seg, 0, flags, 0,
+				       QED_LL2_TX_DEST_NW,
 				       qed_roce_flavor, pkt->header.baddr,
 				       pkt->header.len, pkt, 1);
 	if (rc) {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_selftest.c b/drivers/net/ethernet/qlogic/qed/qed_selftest.c
index 9b7678f26909..48bfaecaf6dc 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_selftest.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_selftest.c
@@ -1,3 +1,4 @@
+#include <linux/crc32.h>
 #include "qed.h"
 #include "qed_dev_api.h"
 #include "qed_mcp.h"
@@ -75,3 +76,103 @@ int qed_selftest_clock(struct qed_dev *cdev)
 
 	return rc;
 }
+
+int qed_selftest_nvram(struct qed_dev *cdev)
+{
+	struct qed_hwfn *p_hwfn = QED_LEADING_HWFN(cdev);
+	struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
+	u32 num_images, i, j, nvm_crc, calc_crc;
+	struct bist_nvm_image_att image_att;
+	u8 *buf = NULL;
+	__be32 val;
+	int rc;
+
+	if (!p_ptt) {
+		DP_ERR(p_hwfn, "failed to acquire ptt\n");
+		return -EBUSY;
+	}
+
+	/* Acquire from MFW the amount of available images */
+	rc = qed_mcp_bist_nvm_test_get_num_images(p_hwfn, p_ptt, &num_images);
+	if (rc || !num_images) {
+		DP_ERR(p_hwfn, "Failed getting number of images\n");
+		return -EINVAL;
+	}
+
+	/* Iterate over images and validate CRC */
+	for (i = 0; i < num_images; i++) {
+		/* This mailbox returns information about the image required for
+		 * reading it.
+		 */
+		rc = qed_mcp_bist_nvm_test_get_image_att(p_hwfn, p_ptt,
+							 &image_att, i);
+		if (rc) {
+			DP_ERR(p_hwfn,
+			       "Failed getting image index %d attributes\n",
+			       i);
+			goto err0;
+		}
+
+		/* After MFW crash dump is collected - the image's CRC stops
+		 * being valid.
+		 */
+		if (image_att.image_type == NVM_TYPE_MDUMP)
+			continue;
+
+		DP_VERBOSE(p_hwfn, QED_MSG_SP, "image index %d, size %x\n",
+			   i, image_att.len);
+
+		/* Allocate a buffer for holding the nvram image */
+		buf = kzalloc(image_att.len, GFP_KERNEL);
+		if (!buf) {
+			rc = -ENOMEM;
+			goto err0;
+		}
+
+		/* Read image into buffer */
+		rc = qed_mcp_nvm_read(p_hwfn->cdev, image_att.nvm_start_addr,
+				      buf, image_att.len);
+		if (rc) {
+			DP_ERR(p_hwfn,
+			       "Failed reading image index %d from nvm.\n", i);
+			goto err1;
+		}
+
+		/* Convert the buffer into big-endian format (excluding the
+		 * closing 4 bytes of CRC).
+		 */
+		for (j = 0; j < image_att.len - 4; j += 4) {
+			val = cpu_to_be32(*(u32 *)&buf[j]);
+			*(u32 *)&buf[j] = (__force u32)val;
+		}
+
+		/* Calc CRC for the "actual" image buffer, i.e. not including
+		 * the last 4 CRC bytes.
+		 */
+		nvm_crc = *(u32 *)(buf + image_att.len - 4);
+		calc_crc = crc32(0xffffffff, buf, image_att.len - 4);
+		calc_crc = (__force u32)~cpu_to_be32(calc_crc);
+		DP_VERBOSE(p_hwfn, QED_MSG_SP,
+			   "nvm crc 0x%x, calc_crc 0x%x\n", nvm_crc, calc_crc);
+
+		if (calc_crc != nvm_crc) {
+			rc = -EINVAL;
+			goto err1;
+		}
+
+		/* Done with this image; Free to prevent double release
+		 * on subsequent failure.
+		 */
+		kfree(buf);
+		buf = NULL;
+	}
+
+	qed_ptt_release(p_hwfn, p_ptt);
+	return 0;
+
+err1:
+	kfree(buf);
+err0:
+	qed_ptt_release(p_hwfn, p_ptt);
+	return rc;
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_selftest.h b/drivers/net/ethernet/qlogic/qed/qed_selftest.h
index 50eb0b49950f..739ddb730967 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_selftest.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_selftest.h
@@ -37,4 +37,14 @@ int qed_selftest_register(struct qed_dev *cdev);
  * @return int
  */
 int qed_selftest_clock(struct qed_dev *cdev);
+
+/**
+ * @brief qed_selftest_nvram - Perform nvram test
+ *
+ * @param cdev
+ *
+ * @return int
+ */
+int qed_selftest_nvram(struct qed_dev *cdev);
+
 #endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h
index b2c08e4d2a9b..9c897bc68d05 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h
@@ -110,8 +110,8 @@ union qed_spq_req_comp {
 };
 
 struct qed_spq_comp_done {
-	u64	done;
-	u8	fw_return_code;
+	unsigned int	done;
+	u8		fw_return_code;
 };
 
 struct qed_spq_entry {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
index 2888eb0628f8..d0a58282f2a8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c
@@ -347,11 +347,11 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn,
 
 	/* Place EQ address in RAMROD */
 	DMA_REGPAIR_LE(p_ramrod->event_ring_pbl_addr,
-		       p_hwfn->p_eq->chain.pbl.p_phys_table);
+		       p_hwfn->p_eq->chain.pbl_sp.p_phys_table);
 	page_cnt = (u8)qed_chain_get_page_cnt(&p_hwfn->p_eq->chain);
 	p_ramrod->event_ring_num_pages = page_cnt;
 	DMA_REGPAIR_LE(p_ramrod->consolid_q_pbl_addr,
-		       p_hwfn->p_consq->chain.pbl.p_phys_table);
+		       p_hwfn->p_consq->chain.pbl_sp.p_phys_table);
 
 	qed_tunn_set_pf_start_params(p_hwfn, p_tunn, &p_ramrod->tunnel_config);
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c
index 9fbaf9429fd0..f022469bdcf8 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c
@@ -24,7 +24,9 @@
 #include "qed_hsi.h"
 #include "qed_hw.h"
 #include "qed_int.h"
+#include "qed_iscsi.h"
 #include "qed_mcp.h"
+#include "qed_ooo.h"
 #include "qed_reg_addr.h"
 #include "qed_sp.h"
 #include "qed_sriov.h"
@@ -35,7 +37,11 @@
 ***************************************************************************/
 
 #define SPQ_HIGH_PRI_RESERVE_DEFAULT    (1)
-#define SPQ_BLOCK_SLEEP_LENGTH          (1000)
+
+#define SPQ_BLOCK_DELAY_MAX_ITER        (10)
+#define SPQ_BLOCK_DELAY_US              (10)
+#define SPQ_BLOCK_SLEEP_MAX_ITER        (1000)
+#define SPQ_BLOCK_SLEEP_MS              (5)
 
 /***************************************************************************
 * Blocking Imp. (BLOCK/EBLOCK mode)
@@ -48,60 +54,88 @@ static void qed_spq_blocking_cb(struct qed_hwfn *p_hwfn,
 
 	comp_done = (struct qed_spq_comp_done *)cookie;
 
-	comp_done->done			= 0x1;
-	comp_done->fw_return_code	= fw_return_code;
+	comp_done->fw_return_code = fw_return_code;
 
-	/* make update visible to waiting thread */
-	smp_wmb();
+	/* Make sure completion done is visible on waiting thread */
+	smp_store_release(&comp_done->done, 0x1);
 }
 
-static int qed_spq_block(struct qed_hwfn *p_hwfn,
-			 struct qed_spq_entry *p_ent,
-			 u8 *p_fw_ret)
+static int __qed_spq_block(struct qed_hwfn *p_hwfn,
+			   struct qed_spq_entry *p_ent,
+			   u8 *p_fw_ret, bool sleep_between_iter)
 {
-	int sleep_count = SPQ_BLOCK_SLEEP_LENGTH;
 	struct qed_spq_comp_done *comp_done;
-	int rc;
+	u32 iter_cnt;
 
 	comp_done = (struct qed_spq_comp_done *)p_ent->comp_cb.cookie;
-	while (sleep_count) {
-		/* validate we receive completion update */
-		smp_rmb();
-		if (comp_done->done == 1) {
+	iter_cnt = sleep_between_iter ? SPQ_BLOCK_SLEEP_MAX_ITER
+				      : SPQ_BLOCK_DELAY_MAX_ITER;
+
+	while (iter_cnt--) {
+		/* Validate we receive completion update */
+		if (READ_ONCE(comp_done->done) == 1) {
+			/* Read updated FW return value */
+			smp_read_barrier_depends();
 			if (p_fw_ret)
 				*p_fw_ret = comp_done->fw_return_code;
 			return 0;
 		}
-		usleep_range(5000, 10000);
-		sleep_count--;
+
+		if (sleep_between_iter)
+			msleep(SPQ_BLOCK_SLEEP_MS);
+		else
+			udelay(SPQ_BLOCK_DELAY_US);
+	}
+
+	return -EBUSY;
+}
+
+static int qed_spq_block(struct qed_hwfn *p_hwfn,
+			 struct qed_spq_entry *p_ent,
+			 u8 *p_fw_ret, bool skip_quick_poll)
+{
+	struct qed_spq_comp_done *comp_done;
+	int rc;
+
+	/* A relatively short polling period w/o sleeping, to allow the FW to
+	 * complete the ramrod and thus possibly to avoid the following sleeps.
+	 */
+	if (!skip_quick_poll) {
+		rc = __qed_spq_block(p_hwfn, p_ent, p_fw_ret, false);
+		if (!rc)
+			return 0;
 	}
 
+	/* Move to polling with a sleeping period between iterations */
+	rc = __qed_spq_block(p_hwfn, p_ent, p_fw_ret, true);
+	if (!rc)
+		return 0;
+
 	DP_INFO(p_hwfn, "Ramrod is stuck, requesting MCP drain\n");
 	rc = qed_mcp_drain(p_hwfn, p_hwfn->p_main_ptt);
-	if (rc != 0)
+	if (rc) {
 		DP_NOTICE(p_hwfn, "MCP drain failed\n");
+		goto err;
+	}
 
 	/* Retry after drain */
-	sleep_count = SPQ_BLOCK_SLEEP_LENGTH;
-	while (sleep_count) {
-		/* validate we receive completion update */
-		smp_rmb();
-		if (comp_done->done == 1) {
-			if (p_fw_ret)
-				*p_fw_ret = comp_done->fw_return_code;
-			return 0;
-		}
-		usleep_range(5000, 10000);
-		sleep_count--;
-	}
+	rc = __qed_spq_block(p_hwfn, p_ent, p_fw_ret, true);
+	if (!rc)
+		return 0;
 
+	comp_done = (struct qed_spq_comp_done *)p_ent->comp_cb.cookie;
 	if (comp_done->done == 1) {
 		if (p_fw_ret)
 			*p_fw_ret = comp_done->fw_return_code;
 		return 0;
 	}
-
-	DP_NOTICE(p_hwfn, "Ramrod is stuck, MCP drain failed\n");
+err:
+	DP_NOTICE(p_hwfn,
+		  "Ramrod is stuck [CID %08x cmd %02x protocol %02x echo %04x]\n",
+		  le32_to_cpu(p_ent->elem.hdr.cid),
+		  p_ent->elem.hdr.cmd_id,
+		  p_ent->elem.hdr.protocol_id,
+		  le16_to_cpu(p_ent->elem.hdr.echo));
 
 	return -EBUSY;
 }
@@ -245,6 +279,28 @@ qed_async_event_completion(struct qed_hwfn *p_hwfn,
 		return qed_sriov_eqe_event(p_hwfn,
 					   p_eqe->opcode,
 					   p_eqe->echo, &p_eqe->data);
+	case PROTOCOLID_ISCSI:
+		if (!IS_ENABLED(CONFIG_QED_ISCSI))
+			return -EINVAL;
+		if (p_eqe->opcode == ISCSI_EVENT_TYPE_ASYN_DELETE_OOO_ISLES) {
+			u32 cid = le32_to_cpu(p_eqe->data.iscsi_info.cid);
+
+			qed_ooo_release_connection_isles(p_hwfn,
+							 p_hwfn->p_ooo_info,
+							 cid);
+			return 0;
+		}
+
+		if (p_hwfn->p_iscsi_info->event_cb) {
+			struct qed_iscsi_info *p_iscsi = p_hwfn->p_iscsi_info;
+
+			return p_iscsi->event_cb(p_iscsi->event_context,
+						 p_eqe->opcode, &p_eqe->data);
+		} else {
+			DP_NOTICE(p_hwfn,
+				  "iSCSI async completion is not set\n");
+			return -EINVAL;
+		}
 	default:
 		DP_NOTICE(p_hwfn,
 			  "Unknown Async completion for protocol: %d\n",
@@ -725,7 +781,8 @@ int qed_spq_post(struct qed_hwfn *p_hwfn,
 		 * access p_ent here to see whether it's successful or not.
 		 * Thus, after gaining the answer perform the cleanup here.
 		 */
-		rc = qed_spq_block(p_hwfn, p_ent, fw_return_code);
+		rc = qed_spq_block(p_hwfn, p_ent, fw_return_code,
+				   p_ent->queue == &p_spq->unlimited_pending);
 
 		if (p_ent->queue == &p_spq->unlimited_pending) {
 			/* This is an allocated p_ent which does not need to
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
index d2d6621fe0e5..85b09dd1787a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c
@@ -109,7 +109,8 @@ static int qed_sp_vf_stop(struct qed_hwfn *p_hwfn,
 }
 
 static bool qed_iov_is_valid_vfid(struct qed_hwfn *p_hwfn,
-				  int rel_vf_id, bool b_enabled_only)
+				  int rel_vf_id,
+				  bool b_enabled_only, bool b_non_malicious)
 {
 	if (!p_hwfn->pf_iov_info) {
 		DP_NOTICE(p_hwfn->cdev, "No iov info\n");
@@ -124,6 +125,10 @@ static bool qed_iov_is_valid_vfid(struct qed_hwfn *p_hwfn,
 	    b_enabled_only)
 		return false;
 
+	if ((p_hwfn->pf_iov_info->vfs_array[rel_vf_id].b_malicious) &&
+	    b_non_malicious)
+		return false;
+
 	return true;
 }
 
@@ -138,7 +143,8 @@ static struct qed_vf_info *qed_iov_get_vf_info(struct qed_hwfn *p_hwfn,
 		return NULL;
 	}
 
-	if (qed_iov_is_valid_vfid(p_hwfn, relative_vf_id, b_enabled_only))
+	if (qed_iov_is_valid_vfid(p_hwfn, relative_vf_id,
+				  b_enabled_only, false))
 		vf = &p_hwfn->pf_iov_info->vfs_array[relative_vf_id];
 	else
 		DP_ERR(p_hwfn, "qed_iov_get_vf_info: VF[%d] is not enabled\n",
@@ -542,7 +548,8 @@ int qed_iov_hw_info(struct qed_hwfn *p_hwfn)
 	return 0;
 }
 
-static bool qed_iov_pf_sanity_check(struct qed_hwfn *p_hwfn, int vfid)
+bool _qed_iov_pf_sanity_check(struct qed_hwfn *p_hwfn,
+			      int vfid, bool b_fail_malicious)
 {
 	/* Check PF supports sriov */
 	if (IS_VF(p_hwfn->cdev) || !IS_QED_SRIOV(p_hwfn->cdev) ||
@@ -550,12 +557,17 @@ static bool qed_iov_pf_sanity_check(struct qed_hwfn *p_hwfn, int vfid)
 		return false;
 
 	/* Check VF validity */
-	if (!qed_iov_is_valid_vfid(p_hwfn, vfid, true))
+	if (!qed_iov_is_valid_vfid(p_hwfn, vfid, true, b_fail_malicious))
 		return false;
 
 	return true;
 }
 
+bool qed_iov_pf_sanity_check(struct qed_hwfn *p_hwfn, int vfid)
+{
+	return _qed_iov_pf_sanity_check(p_hwfn, vfid, true);
+}
+
 static void qed_iov_set_vf_to_disable(struct qed_dev *cdev,
 				      u16 rel_vf_id, u8 to_disable)
 {
@@ -652,6 +664,9 @@ static int qed_iov_enable_vf_access(struct qed_hwfn *p_hwfn,
 
 	qed_iov_vf_igu_reset(p_hwfn, p_ptt, vf);
 
+	/* It's possible VF was previously considered malicious */
+	vf->b_malicious = false;
+
 	rc = qed_mcp_config_vf_msix(p_hwfn, p_ptt, vf->abs_vf_id, vf->num_sbs);
 	if (rc)
 		return rc;
@@ -793,37 +808,70 @@ static void qed_iov_free_vf_igu_sbs(struct qed_hwfn *p_hwfn,
 
 static int qed_iov_init_hw_for_vf(struct qed_hwfn *p_hwfn,
 				  struct qed_ptt *p_ptt,
-				  u16 rel_vf_id, u16 num_rx_queues)
+				  struct qed_iov_vf_init_params *p_params)
 {
 	u8 num_of_vf_avaiable_chains = 0;
 	struct qed_vf_info *vf = NULL;
+	u16 qid, num_irqs;
 	int rc = 0;
 	u32 cids;
 	u8 i;
 
-	vf = qed_iov_get_vf_info(p_hwfn, rel_vf_id, false);
+	vf = qed_iov_get_vf_info(p_hwfn, p_params->rel_vf_id, false);
 	if (!vf) {
 		DP_ERR(p_hwfn, "qed_iov_init_hw_for_vf : vf is NULL\n");
 		return -EINVAL;
 	}
 
 	if (vf->b_init) {
-		DP_NOTICE(p_hwfn, "VF[%d] is already active.\n", rel_vf_id);
+		DP_NOTICE(p_hwfn, "VF[%d] is already active.\n",
+			  p_params->rel_vf_id);
 		return -EINVAL;
 	}
 
+	/* Perform sanity checking on the requested queue_id */
+	for (i = 0; i < p_params->num_queues; i++) {
+		u16 min_vf_qzone = FEAT_NUM(p_hwfn, QED_PF_L2_QUE);
+		u16 max_vf_qzone = min_vf_qzone +
+		    FEAT_NUM(p_hwfn, QED_VF_L2_QUE) - 1;
+
+		qid = p_params->req_rx_queue[i];
+		if (qid < min_vf_qzone || qid > max_vf_qzone) {
+			DP_NOTICE(p_hwfn,
+				  "Can't enable Rx qid [%04x] for VF[%d]: qids [0x%04x,...,0x%04x] available\n",
+				  qid,
+				  p_params->rel_vf_id,
+				  min_vf_qzone, max_vf_qzone);
+			return -EINVAL;
+		}
+
+		qid = p_params->req_tx_queue[i];
+		if (qid > max_vf_qzone) {
+			DP_NOTICE(p_hwfn,
+				  "Can't enable Tx qid [%04x] for VF[%d]: max qid 0x%04x\n",
+				  qid, p_params->rel_vf_id, max_vf_qzone);
+			return -EINVAL;
+		}
+
+		/* If client *really* wants, Tx qid can be shared with PF */
+		if (qid < min_vf_qzone)
+			DP_VERBOSE(p_hwfn,
+				   QED_MSG_IOV,
+				   "VF[%d] is using PF qid [0x%04x] for Txq[0x%02x]\n",
+				   p_params->rel_vf_id, qid, i);
+	}
+
 	/* Limit number of queues according to number of CIDs */
 	qed_cxt_get_proto_cid_count(p_hwfn, PROTOCOLID_ETH, &cids);
 	DP_VERBOSE(p_hwfn,
 		   QED_MSG_IOV,
 		   "VF[%d] - requesting to initialize for 0x%04x queues [0x%04x CIDs available]\n",
-		   vf->relative_vf_id, num_rx_queues, (u16) cids);
-	num_rx_queues = min_t(u16, num_rx_queues, ((u16) cids));
+		   vf->relative_vf_id, p_params->num_queues, (u16)cids);
+	num_irqs = min_t(u16, p_params->num_queues, ((u16)cids));
 
 	num_of_vf_avaiable_chains = qed_iov_alloc_vf_igu_sbs(p_hwfn,
 							     p_ptt,
-							     vf,
-							     num_rx_queues);
+							     vf, num_irqs);
 	if (!num_of_vf_avaiable_chains) {
 		DP_ERR(p_hwfn, "no available igu sbs\n");
 		return -ENOMEM;
@@ -834,25 +882,22 @@ static int qed_iov_init_hw_for_vf(struct qed_hwfn *p_hwfn,
 	vf->num_txqs = num_of_vf_avaiable_chains;
 
 	for (i = 0; i < vf->num_rxqs; i++) {
-		u16 queue_id = qed_int_queue_id_from_sb_id(p_hwfn,
-							   vf->igu_sbs[i]);
+		struct qed_vf_q_info *p_queue = &vf->vf_queues[i];
 
-		if (queue_id > RESC_NUM(p_hwfn, QED_L2_QUEUE)) {
-			DP_NOTICE(p_hwfn,
-				  "VF[%d] will require utilizing of out-of-bounds queues - %04x\n",
-				  vf->relative_vf_id, queue_id);
-			return -EINVAL;
-		}
+		p_queue->fw_rx_qid = p_params->req_rx_queue[i];
+		p_queue->fw_tx_qid = p_params->req_tx_queue[i];
 
 		/* CIDs are per-VF, so no problem having them 0-based. */
-		vf->vf_queues[i].fw_rx_qid = queue_id;
-		vf->vf_queues[i].fw_tx_qid = queue_id;
-		vf->vf_queues[i].fw_cid = i;
+		p_queue->fw_cid = i;
 
 		DP_VERBOSE(p_hwfn, QED_MSG_IOV,
-			   "VF[%d] - [%d] SB %04x, Tx/Rx queue %04x CID %04x\n",
-			   vf->relative_vf_id, i, vf->igu_sbs[i], queue_id, i);
+			   "VF[%d] - Q[%d] SB %04x, qid [Rx %04x Tx %04x]  CID %04x\n",
+			   vf->relative_vf_id,
+			   i, vf->igu_sbs[i],
+			   p_queue->fw_rx_qid,
+			   p_queue->fw_tx_qid, p_queue->fw_cid);
 	}
+
 	rc = qed_iov_enable_vf_access(p_hwfn, p_ptt, vf);
 	if (!rc) {
 		vf->b_init = true;
@@ -1172,8 +1217,19 @@ static void qed_iov_vf_cleanup(struct qed_hwfn *p_hwfn,
 
 	p_vf->num_active_rxqs = 0;
 
-	for (i = 0; i < QED_MAX_VF_CHAINS_PER_PF; i++)
-		p_vf->vf_queues[i].rxq_active = 0;
+	for (i = 0; i < QED_MAX_VF_CHAINS_PER_PF; i++) {
+		struct qed_vf_q_info *p_queue = &p_vf->vf_queues[i];
+
+		if (p_queue->p_rx_cid) {
+			qed_eth_queue_cid_release(p_hwfn, p_queue->p_rx_cid);
+			p_queue->p_rx_cid = NULL;
+		}
+
+		if (p_queue->p_tx_cid) {
+			qed_eth_queue_cid_release(p_hwfn, p_queue->p_tx_cid);
+			p_queue->p_tx_cid = NULL;
+		}
+	}
 
 	memset(&p_vf->shadow_config, 0, sizeof(p_vf->shadow_config));
 	memset(&p_vf->acquire, 0, sizeof(p_vf->acquire));
@@ -1579,21 +1635,21 @@ static int qed_iov_configure_vport_forced(struct qed_hwfn *p_hwfn,
 
 		/* Update all the Rx queues */
 		for (i = 0; i < QED_MAX_VF_CHAINS_PER_PF; i++) {
-			u16 qid;
+			struct qed_queue_cid *p_cid;
 
-			if (!p_vf->vf_queues[i].rxq_active)
+			p_cid = p_vf->vf_queues[i].p_rx_cid;
+			if (!p_cid)
 				continue;
 
-			qid = p_vf->vf_queues[i].fw_rx_qid;
-
-			rc = qed_sp_eth_rx_queues_update(p_hwfn, qid,
+			rc = qed_sp_eth_rx_queues_update(p_hwfn,
+							 (void **)&p_cid,
 							 1, 0, 1,
 							 QED_SPQ_MODE_EBLOCK,
 							 NULL);
 			if (rc) {
 				DP_NOTICE(p_hwfn,
 					  "Failed to send Rx update fo queue[0x%04x]\n",
-					  qid);
+					  p_cid->rel.queue_id);
 				return rc;
 			}
 		}
@@ -1767,23 +1823,34 @@ static void qed_iov_vf_mbx_start_rxq(struct qed_hwfn *p_hwfn,
 	struct qed_queue_start_common_params params;
 	struct qed_iov_vf_mbx *mbx = &vf->vf_mbx;
 	u8 status = PFVF_STATUS_NO_RESOURCE;
+	struct qed_vf_q_info *p_queue;
 	struct vfpf_start_rxq_tlv *req;
 	bool b_legacy_vf = false;
 	int rc;
 
-	memset(&params, 0, sizeof(params));
 	req = &mbx->req_virt->start_rxq;
 
 	if (!qed_iov_validate_rxq(p_hwfn, vf, req->rx_qid) ||
 	    !qed_iov_validate_sb(p_hwfn, vf, req->hw_sb))
 		goto out;
 
-	params.queue_id =  vf->vf_queues[req->rx_qid].fw_rx_qid;
-	params.vf_qid = req->rx_qid;
+	/* Acquire a new queue-cid */
+	p_queue = &vf->vf_queues[req->rx_qid];
+
+	memset(&params, 0, sizeof(params));
+	params.queue_id = p_queue->fw_rx_qid;
 	params.vport_id = vf->vport_id;
+	params.stats_id = vf->abs_vf_id + 0x10;
 	params.sb = req->hw_sb;
 	params.sb_idx = req->sb_index;
 
+	p_queue->p_rx_cid = _qed_eth_queue_to_cid(p_hwfn,
+						  vf->opaque_fid,
+						  p_queue->fw_cid,
+						  req->rx_qid, &params);
+	if (!p_queue->p_rx_cid)
+		goto out;
+
 	/* Legacy VFs have their Producers in a different location, which they
 	 * calculate on their own and clean the producer prior to this.
 	 */
@@ -1796,21 +1863,19 @@ static void qed_iov_vf_mbx_start_rxq(struct qed_hwfn *p_hwfn,
 		       MSTORM_ETH_VF_PRODS_OFFSET(vf->abs_vf_id, req->rx_qid),
 		       0);
 	}
+	p_queue->p_rx_cid->b_legacy_vf = b_legacy_vf;
 
-	rc = qed_sp_eth_rxq_start_ramrod(p_hwfn, vf->opaque_fid,
-					 vf->vf_queues[req->rx_qid].fw_cid,
-					 &params,
-					 vf->abs_vf_id + 0x10,
-					 req->bd_max_bytes,
-					 req->rxq_addr,
-					 req->cqe_pbl_addr, req->cqe_pbl_size,
-					 b_legacy_vf);
-
+	rc = qed_eth_rxq_start_ramrod(p_hwfn,
+				      p_queue->p_rx_cid,
+				      req->bd_max_bytes,
+				      req->rxq_addr,
+				      req->cqe_pbl_addr, req->cqe_pbl_size);
 	if (rc) {
 		status = PFVF_STATUS_FAILURE;
+		qed_eth_queue_cid_release(p_hwfn, p_queue->p_rx_cid);
+		p_queue->p_rx_cid = NULL;
 	} else {
 		status = PFVF_STATUS_SUCCESS;
-		vf->vf_queues[req->rx_qid].rxq_active = true;
 		vf->num_active_rxqs++;
 	}
 
@@ -1867,7 +1932,9 @@ static void qed_iov_vf_mbx_start_txq(struct qed_hwfn *p_hwfn,
 	u8 status = PFVF_STATUS_NO_RESOURCE;
 	union qed_qm_pq_params pq_params;
 	struct vfpf_start_txq_tlv *req;
+	struct qed_vf_q_info *p_queue;
 	int rc;
+	u16 pq;
 
 	/* Prepare the parameters which would choose the right PQ */
 	memset(&pq_params, 0, sizeof(pq_params));
@@ -1881,24 +1948,31 @@ static void qed_iov_vf_mbx_start_txq(struct qed_hwfn *p_hwfn,
 	    !qed_iov_validate_sb(p_hwfn, vf, req->hw_sb))
 		goto out;
 
-	params.queue_id =  vf->vf_queues[req->tx_qid].fw_tx_qid;
+	/* Acquire a new queue-cid */
+	p_queue = &vf->vf_queues[req->tx_qid];
+
+	params.queue_id = p_queue->fw_tx_qid;
 	params.vport_id = vf->vport_id;
+	params.stats_id = vf->abs_vf_id + 0x10;
 	params.sb = req->hw_sb;
 	params.sb_idx = req->sb_index;
 
-	rc = qed_sp_eth_txq_start_ramrod(p_hwfn,
-					 vf->opaque_fid,
-					 vf->vf_queues[req->tx_qid].fw_cid,
-					 &params,
-					 vf->abs_vf_id + 0x10,
-					 req->pbl_addr,
-					 req->pbl_size, &pq_params);
+	p_queue->p_tx_cid = _qed_eth_queue_to_cid(p_hwfn,
+						  vf->opaque_fid,
+						  p_queue->fw_cid,
+						  req->tx_qid, &params);
+	if (!p_queue->p_tx_cid)
+		goto out;
 
+	pq = qed_get_qm_pq(p_hwfn, PROTOCOLID_ETH, &pq_params);
+	rc = qed_eth_txq_start_ramrod(p_hwfn, p_queue->p_tx_cid,
+				      req->pbl_addr, req->pbl_size, pq);
 	if (rc) {
 		status = PFVF_STATUS_FAILURE;
+		qed_eth_queue_cid_release(p_hwfn, p_queue->p_tx_cid);
+		p_queue->p_tx_cid = NULL;
 	} else {
 		status = PFVF_STATUS_SUCCESS;
-		vf->vf_queues[req->tx_qid].txq_active = true;
 	}
 
 out:
@@ -1909,6 +1983,7 @@ static int qed_iov_vf_stop_rxqs(struct qed_hwfn *p_hwfn,
 				struct qed_vf_info *vf,
 				u16 rxq_id, u8 num_rxqs, bool cqe_completion)
 {
+	struct qed_vf_q_info *p_queue;
 	int rc = 0;
 	int qid;
 
@@ -1916,16 +1991,18 @@ static int qed_iov_vf_stop_rxqs(struct qed_hwfn *p_hwfn,
 		return -EINVAL;
 
 	for (qid = rxq_id; qid < rxq_id + num_rxqs; qid++) {
-		if (vf->vf_queues[qid].rxq_active) {
-			rc = qed_sp_eth_rx_queue_stop(p_hwfn,
-						      vf->vf_queues[qid].
-						      fw_rx_qid, false,
-						      cqe_completion);
+		p_queue = &vf->vf_queues[qid];
 
-			if (rc)
-				return rc;
-		}
-		vf->vf_queues[qid].rxq_active = false;
+		if (!p_queue->p_rx_cid)
+			continue;
+
+		rc = qed_eth_rx_queue_stop(p_hwfn,
+					   p_queue->p_rx_cid,
+					   false, cqe_completion);
+		if (rc)
+			return rc;
+
+		vf->vf_queues[qid].p_rx_cid = NULL;
 		vf->num_active_rxqs--;
 	}
 
@@ -1936,22 +2013,24 @@ static int qed_iov_vf_stop_txqs(struct qed_hwfn *p_hwfn,
 				struct qed_vf_info *vf, u16 txq_id, u8 num_txqs)
 {
 	int rc = 0;
+	struct qed_vf_q_info *p_queue;
 	int qid;
 
 	if (txq_id + num_txqs > ARRAY_SIZE(vf->vf_queues))
 		return -EINVAL;
 
 	for (qid = txq_id; qid < txq_id + num_txqs; qid++) {
-		if (vf->vf_queues[qid].txq_active) {
-			rc = qed_sp_eth_tx_queue_stop(p_hwfn,
-						      vf->vf_queues[qid].
-						      fw_tx_qid);
+		p_queue = &vf->vf_queues[qid];
+		if (!p_queue->p_tx_cid)
+			continue;
 
-			if (rc)
-				return rc;
-		}
-		vf->vf_queues[qid].txq_active = false;
+		rc = qed_eth_tx_queue_stop(p_hwfn, p_queue->p_tx_cid);
+		if (rc)
+			return rc;
+
+		p_queue->p_tx_cid = NULL;
 	}
+
 	return rc;
 }
 
@@ -2006,10 +2085,11 @@ static void qed_iov_vf_mbx_update_rxqs(struct qed_hwfn *p_hwfn,
 				       struct qed_ptt *p_ptt,
 				       struct qed_vf_info *vf)
 {
+	struct qed_queue_cid *handlers[QED_MAX_VF_CHAINS_PER_PF];
 	u16 length = sizeof(struct pfvf_def_resp_tlv);
 	struct qed_iov_vf_mbx *mbx = &vf->vf_mbx;
 	struct vfpf_update_rxq_tlv *req;
-	u8 status = PFVF_STATUS_SUCCESS;
+	u8 status = PFVF_STATUS_FAILURE;
 	u8 complete_event_flg;
 	u8 complete_cqe_flg;
 	u16 qid;
@@ -2020,29 +2100,36 @@ static void qed_iov_vf_mbx_update_rxqs(struct qed_hwfn *p_hwfn,
 	complete_cqe_flg = !!(req->flags & VFPF_RXQ_UPD_COMPLETE_CQE_FLAG);
 	complete_event_flg = !!(req->flags & VFPF_RXQ_UPD_COMPLETE_EVENT_FLAG);
 
+	/* Validate inputs */
+	if (req->num_rxqs + req->rx_qid > QED_MAX_VF_CHAINS_PER_PF ||
+	    !qed_iov_validate_rxq(p_hwfn, vf, req->rx_qid)) {
+		DP_INFO(p_hwfn, "VF[%d]: Incorrect Rxqs [%04x, %02x]\n",
+			vf->relative_vf_id, req->rx_qid, req->num_rxqs);
+		goto out;
+	}
+
 	for (i = 0; i < req->num_rxqs; i++) {
 		qid = req->rx_qid + i;
-
-		if (!vf->vf_queues[qid].rxq_active) {
-			DP_NOTICE(p_hwfn, "VF rx_qid = %d isn`t active!\n",
-				  qid);
-			status = PFVF_STATUS_FAILURE;
-			break;
+		if (!vf->vf_queues[qid].p_rx_cid) {
+			DP_INFO(p_hwfn,
+				"VF[%d] rx_qid = %d isn`t active!\n",
+				vf->relative_vf_id, qid);
+			goto out;
 		}
 
-		rc = qed_sp_eth_rx_queues_update(p_hwfn,
-						 vf->vf_queues[qid].fw_rx_qid,
-						 1,
-						 complete_cqe_flg,
-						 complete_event_flg,
-						 QED_SPQ_MODE_EBLOCK, NULL);
-
-		if (rc) {
-			status = PFVF_STATUS_FAILURE;
-			break;
-		}
+		handlers[i] = vf->vf_queues[qid].p_rx_cid;
 	}
 
+	rc = qed_sp_eth_rx_queues_update(p_hwfn, (void **)&handlers,
+					 req->num_rxqs,
+					 complete_cqe_flg,
+					 complete_event_flg,
+					 QED_SPQ_MODE_EBLOCK, NULL);
+	if (rc)
+		goto out;
+
+	status = PFVF_STATUS_SUCCESS;
+out:
 	qed_iov_prepare_resp(p_hwfn, p_ptt, vf, CHANNEL_TLV_UPDATE_RXQ,
 			     length, status);
 }
@@ -2253,7 +2340,7 @@ qed_iov_vp_update_rss_param(struct qed_hwfn *p_hwfn,
 			DP_NOTICE(p_hwfn,
 				  "rss_ind_table[%d] = %d, rxq is out of range\n",
 				  i, q_idx);
-		else if (!vf->vf_queues[q_idx].rxq_active)
+		else if (!vf->vf_queues[q_idx].p_rx_cid)
 			DP_NOTICE(p_hwfn,
 				  "rss_ind_table[%d] = %d, rxq is not active\n",
 				  i, q_idx);
@@ -2804,6 +2891,13 @@ qed_iov_execute_vf_flr_cleanup(struct qed_hwfn *p_hwfn,
 			return rc;
 		}
 
+		/* Workaround to make VF-PF channel ready, as FW
+		 * doesn't do that as a part of FLR.
+		 */
+		REG_WR(p_hwfn,
+		       GTT_BAR0_MAP_REG_USDM_RAM +
+		       USTORM_VF_PF_CHANNEL_READY_OFFSET(vfid), 1);
+
 		/* VF_STOPPED has to be set only after final cleanup
 		 * but prior to re-enabling the VF.
 		 */
@@ -2942,7 +3036,8 @@ static void qed_iov_process_mbx_req(struct qed_hwfn *p_hwfn,
 	mbx->first_tlv = mbx->req_virt->first_tlv;
 
 	/* check if tlv type is known */
-	if (qed_iov_tlv_supported(mbx->first_tlv.tl.type)) {
+	if (qed_iov_tlv_supported(mbx->first_tlv.tl.type) &&
+	    !p_vf->b_malicious) {
 		switch (mbx->first_tlv.tl.type) {
 		case CHANNEL_TLV_ACQUIRE:
 			qed_iov_vf_mbx_acquire(p_hwfn, p_ptt, p_vf);
@@ -2984,6 +3079,15 @@ static void qed_iov_process_mbx_req(struct qed_hwfn *p_hwfn,
 			qed_iov_vf_mbx_release(p_hwfn, p_ptt, p_vf);
 			break;
 		}
+	} else if (qed_iov_tlv_supported(mbx->first_tlv.tl.type)) {
+		DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+			   "VF [%02x] - considered malicious; Ignoring TLV [%04x]\n",
+			   p_vf->abs_vf_id, mbx->first_tlv.tl.type);
+
+		qed_iov_prepare_resp(p_hwfn, p_ptt, p_vf,
+				     mbx->first_tlv.tl.type,
+				     sizeof(struct pfvf_def_resp_tlv),
+				     PFVF_STATUS_MALICIOUS);
 	} else {
 		/* unknown TLV - this may belong to a VF driver from the future
 		 * - a version written after this PF driver was written, which
@@ -3033,20 +3137,30 @@ static void qed_iov_pf_get_and_clear_pending_events(struct qed_hwfn *p_hwfn,
 	memset(p_pending_events, 0, sizeof(u64) * QED_VF_ARRAY_LENGTH);
 }
 
-static int qed_sriov_vfpf_msg(struct qed_hwfn *p_hwfn,
-			      u16 abs_vfid, struct regpair *vf_msg)
+static struct qed_vf_info *qed_sriov_get_vf_from_absid(struct qed_hwfn *p_hwfn,
+						       u16 abs_vfid)
 {
-	u8 min = (u8)p_hwfn->cdev->p_iov_info->first_vf_in_pf;
-	struct qed_vf_info *p_vf;
+	u8 min = (u8) p_hwfn->cdev->p_iov_info->first_vf_in_pf;
 
-	if (!qed_iov_pf_sanity_check(p_hwfn, (int)abs_vfid - min)) {
+	if (!_qed_iov_pf_sanity_check(p_hwfn, (int)abs_vfid - min, false)) {
 		DP_VERBOSE(p_hwfn,
 			   QED_MSG_IOV,
-			   "Got a message from VF [abs 0x%08x] that cannot be handled by PF\n",
+			   "Got indication for VF [abs 0x%08x] that cannot be handled by PF\n",
 			   abs_vfid);
-		return 0;
+		return NULL;
 	}
-	p_vf = &p_hwfn->pf_iov_info->vfs_array[(u8)abs_vfid - min];
+
+	return &p_hwfn->pf_iov_info->vfs_array[(u8) abs_vfid - min];
+}
+
+static int qed_sriov_vfpf_msg(struct qed_hwfn *p_hwfn,
+			      u16 abs_vfid, struct regpair *vf_msg)
+{
+	struct qed_vf_info *p_vf = qed_sriov_get_vf_from_absid(p_hwfn,
+			   abs_vfid);
+
+	if (!p_vf)
+		return 0;
 
 	/* List the physical address of the request so that handler
 	 * could later on copy the message from it.
@@ -3060,6 +3174,23 @@ static int qed_sriov_vfpf_msg(struct qed_hwfn *p_hwfn,
 	return 0;
 }
 
+static void qed_sriov_vfpf_malicious(struct qed_hwfn *p_hwfn,
+				     struct malicious_vf_eqe_data *p_data)
+{
+	struct qed_vf_info *p_vf;
+
+	p_vf = qed_sriov_get_vf_from_absid(p_hwfn, p_data->vf_id);
+
+	if (!p_vf)
+		return;
+
+	DP_INFO(p_hwfn,
+		"VF [%d] - Malicious behavior [%02x]\n",
+		p_vf->abs_vf_id, p_data->err_id);
+
+	p_vf->b_malicious = true;
+}
+
 int qed_sriov_eqe_event(struct qed_hwfn *p_hwfn,
 			u8 opcode, __le16 echo, union event_ring_data *data)
 {
@@ -3067,6 +3198,9 @@ int qed_sriov_eqe_event(struct qed_hwfn *p_hwfn,
 	case COMMON_EVENT_VF_PF_CHANNEL:
 		return qed_sriov_vfpf_msg(p_hwfn, le16_to_cpu(echo),
 					  &data->vf_pf_channel.msg_addr);
+	case COMMON_EVENT_MALICIOUS_VF:
+		qed_sriov_vfpf_malicious(p_hwfn, &data->malicious_vf);
+		return 0;
 	default:
 		DP_INFO(p_hwfn->cdev, "Unknown sriov eqe event 0x%02x\n",
 			opcode);
@@ -3083,7 +3217,7 @@ u16 qed_iov_get_next_active_vf(struct qed_hwfn *p_hwfn, u16 rel_vf_id)
 		goto out;
 
 	for (i = rel_vf_id; i < p_iov->total_vfs; i++)
-		if (qed_iov_is_valid_vfid(p_hwfn, rel_vf_id, true))
+		if (qed_iov_is_valid_vfid(p_hwfn, rel_vf_id, true, false))
 			return i;
 
 out:
@@ -3130,6 +3264,12 @@ static void qed_iov_bulletin_set_forced_mac(struct qed_hwfn *p_hwfn,
 		return;
 	}
 
+	if (vf_info->b_malicious) {
+		DP_NOTICE(p_hwfn->cdev,
+			  "Can't set forced MAC to malicious VF [%d]\n", vfid);
+		return;
+	}
+
 	feature = 1 << MAC_ADDR_FORCED;
 	memcpy(vf_info->bulletin.p_virt->mac, mac, ETH_ALEN);
 
@@ -3153,6 +3293,12 @@ static void qed_iov_bulletin_set_forced_vlan(struct qed_hwfn *p_hwfn,
 		return;
 	}
 
+	if (vf_info->b_malicious) {
+		DP_NOTICE(p_hwfn->cdev,
+			  "Can't set forced vlan to malicious VF [%d]\n", vfid);
+		return;
+	}
+
 	feature = 1 << VLAN_ADDR_FORCED;
 	vf_info->bulletin.p_virt->pvid = pvid;
 	if (pvid)
@@ -3367,7 +3513,7 @@ int qed_sriov_disable(struct qed_dev *cdev, bool pci_enabled)
 		qed_for_each_vf(hwfn, j) {
 			int k;
 
-			if (!qed_iov_is_valid_vfid(hwfn, j, true))
+			if (!qed_iov_is_valid_vfid(hwfn, j, true, false))
 				continue;
 
 			/* Wait until VF is disabled before releasing */
@@ -3394,9 +3540,28 @@ int qed_sriov_disable(struct qed_dev *cdev, bool pci_enabled)
 	return 0;
 }
 
+static void qed_sriov_enable_qid_config(struct qed_hwfn *hwfn,
+					u16 vfid,
+					struct qed_iov_vf_init_params *params)
+{
+	u16 base, i;
+
+	/* Since we have an equal resource distribution per-VF, and we assume
+	 * PF has acquired the QED_PF_L2_QUE first queues, we start setting
+	 * sequentially from there.
+	 */
+	base = FEAT_NUM(hwfn, QED_PF_L2_QUE) + vfid * params->num_queues;
+
+	params->rel_vf_id = vfid;
+	for (i = 0; i < params->num_queues; i++) {
+		params->req_rx_queue[i] = base + i;
+		params->req_tx_queue[i] = base + i;
+	}
+}
+
 static int qed_sriov_enable(struct qed_dev *cdev, int num)
 {
-	struct qed_sb_cnt_info sb_cnt_info;
+	struct qed_iov_vf_init_params params;
 	int i, j, rc;
 
 	if (num >= RESC_NUM(&cdev->hwfns[0], QED_VPORT)) {
@@ -3405,11 +3570,17 @@ static int qed_sriov_enable(struct qed_dev *cdev, int num)
 		return -EINVAL;
 	}
 
+	memset(&params, 0, sizeof(params));
+
 	/* Initialize HW for VF access */
 	for_each_hwfn(cdev, j) {
 		struct qed_hwfn *hwfn = &cdev->hwfns[j];
 		struct qed_ptt *ptt = qed_ptt_acquire(hwfn);
-		int num_sbs = 0, limit = 16;
+
+		/* Make sure not to use more than 16 queues per VF */
+		params.num_queues = min_t(int,
+					  FEAT_NUM(hwfn, QED_VF_L2_QUE) / num,
+					  16);
 
 		if (!ptt) {
 			DP_ERR(hwfn, "Failed to acquire ptt\n");
@@ -3417,19 +3588,12 @@ static int qed_sriov_enable(struct qed_dev *cdev, int num)
 			goto err;
 		}
 
-		if (IS_MF_DEFAULT(hwfn))
-			limit = MAX_NUM_VFS_BB / hwfn->num_funcs_on_engine;
-
-		memset(&sb_cnt_info, 0, sizeof(sb_cnt_info));
-		qed_int_get_num_sbs(hwfn, &sb_cnt_info);
-		num_sbs = min_t(int, sb_cnt_info.sb_free_blk, limit);
-
 		for (i = 0; i < num; i++) {
-			if (!qed_iov_is_valid_vfid(hwfn, i, false))
+			if (!qed_iov_is_valid_vfid(hwfn, i, false, true))
 				continue;
 
-			rc = qed_iov_init_hw_for_vf(hwfn,
-						    ptt, i, num_sbs / num);
+			qed_sriov_enable_qid_config(hwfn, i, &params);
+			rc = qed_iov_init_hw_for_vf(hwfn, ptt, &params);
 			if (rc) {
 				DP_ERR(cdev, "Failed to enable VF[%d]\n", i);
 				qed_ptt_release(hwfn, ptt);
@@ -3477,7 +3641,7 @@ static int qed_sriov_pf_set_mac(struct qed_dev *cdev, u8 *mac, int vfid)
 		return -EINVAL;
 	}
 
-	if (!qed_iov_is_valid_vfid(&cdev->hwfns[0], vfid, true)) {
+	if (!qed_iov_is_valid_vfid(&cdev->hwfns[0], vfid, true, true)) {
 		DP_VERBOSE(cdev, QED_MSG_IOV,
 			   "Cannot set VF[%d] MAC (VF is not active)\n", vfid);
 		return -EINVAL;
@@ -3509,7 +3673,7 @@ static int qed_sriov_pf_set_vlan(struct qed_dev *cdev, u16 vid, int vfid)
 		return -EINVAL;
 	}
 
-	if (!qed_iov_is_valid_vfid(&cdev->hwfns[0], vfid, true)) {
+	if (!qed_iov_is_valid_vfid(&cdev->hwfns[0], vfid, true, true)) {
 		DP_VERBOSE(cdev, QED_MSG_IOV,
 			   "Cannot set VF[%d] MAC (VF is not active)\n", vfid);
 		return -EINVAL;
@@ -3543,7 +3707,7 @@ static int qed_get_vf_config(struct qed_dev *cdev,
 	if (IS_VF(cdev))
 		return -EINVAL;
 
-	if (!qed_iov_is_valid_vfid(&cdev->hwfns[0], vf_id, true)) {
+	if (!qed_iov_is_valid_vfid(&cdev->hwfns[0], vf_id, true, false)) {
 		DP_VERBOSE(cdev, QED_MSG_IOV,
 			   "VF index [%d] isn't active\n", vf_id);
 		return -EINVAL;
@@ -3647,7 +3811,7 @@ static int qed_set_vf_link_state(struct qed_dev *cdev,
 	if (IS_VF(cdev))
 		return -EINVAL;
 
-	if (!qed_iov_is_valid_vfid(&cdev->hwfns[0], vf_id, true)) {
+	if (!qed_iov_is_valid_vfid(&cdev->hwfns[0], vf_id, true, true)) {
 		DP_VERBOSE(cdev, QED_MSG_IOV,
 			   "VF index [%d] isn't active\n", vf_id);
 		return -EINVAL;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.h b/drivers/net/ethernet/qlogic/qed/qed_sriov.h
index 0dd23e409b3f..509c02b4772e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.h
@@ -58,6 +58,23 @@ struct qed_public_vf_info {
 	int tx_rate;
 };
 
+struct qed_iov_vf_init_params {
+	u16 rel_vf_id;
+
+	/* Number of requested Queues; Currently, don't support different
+	 * number of Rx/Tx queues.
+	 */
+
+	u16 num_queues;
+
+	/* Allow the client to choose which qzones to use for Rx/Tx,
+	 * and which queue_base to use for Tx queues on a per-queue basis.
+	 * Notice values should be relative to the PF resources.
+	 */
+	u16 req_rx_queue[QED_MAX_VF_CHAINS_PER_PF];
+	u16 req_tx_queue[QED_MAX_VF_CHAINS_PER_PF];
+};
+
 /* This struct is part of qed_dev and contains data relevant to all hwfns;
  * Initialized only if SR-IOV cpabability is exposed in PCIe config space.
  */
@@ -99,10 +116,10 @@ struct qed_iov_vf_mbx {
 
 struct qed_vf_q_info {
 	u16 fw_rx_qid;
+	struct qed_queue_cid *p_rx_cid;
 	u16 fw_tx_qid;
+	struct qed_queue_cid *p_tx_cid;
 	u8 fw_cid;
-	u8 rxq_active;
-	u8 txq_active;
 };
 
 enum vf_state {
@@ -132,6 +149,7 @@ struct qed_vf_info {
 	struct qed_iov_vf_mbx vf_mbx;
 	enum vf_state state;
 	bool b_init;
+	bool b_malicious;
 	u8 to_disable;
 
 	struct qed_bulletin bulletin;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c
index abf5bf11f865..60b31a8ede73 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c
@@ -388,18 +388,18 @@ free_p_iov:
 #define MSTORM_QZONE_START(dev)   (TSTORM_QZONE_START +	\
 				   (TSTORM_QZONE_SIZE * NUM_OF_L2_QUEUES(dev)))
 
-int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
-			u8 rx_qid,
-			u16 sb,
-			u8 sb_index,
-			u16 bd_max_bytes,
-			dma_addr_t bd_chain_phys_addr,
-			dma_addr_t cqe_pbl_addr,
-			u16 cqe_pbl_size, void __iomem **pp_prod)
+int
+qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
+		    struct qed_queue_cid *p_cid,
+		    u16 bd_max_bytes,
+		    dma_addr_t bd_chain_phys_addr,
+		    dma_addr_t cqe_pbl_addr,
+		    u16 cqe_pbl_size, void __iomem **pp_prod)
 {
 	struct qed_vf_iov *p_iov = p_hwfn->vf_iov_info;
 	struct pfvf_start_queue_resp_tlv *resp;
 	struct vfpf_start_rxq_tlv *req;
+	u8 rx_qid = p_cid->rel.queue_id;
 	int rc;
 
 	/* clear mailbox and prep first tlv */
@@ -409,21 +409,22 @@ int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
 	req->cqe_pbl_addr = cqe_pbl_addr;
 	req->cqe_pbl_size = cqe_pbl_size;
 	req->rxq_addr = bd_chain_phys_addr;
-	req->hw_sb = sb;
-	req->sb_index = sb_index;
+	req->hw_sb = p_cid->rel.sb;
+	req->sb_index = p_cid->rel.sb_idx;
 	req->bd_max_bytes = bd_max_bytes;
 	req->stat_id = -1;
 
 	/* If PF is legacy, we'll need to calculate producers ourselves
 	 * as well as clean them.
 	 */
-	if (pp_prod && p_iov->b_pre_fp_hsi) {
+	if (p_iov->b_pre_fp_hsi) {
 		u8 hw_qid = p_iov->acquire_resp.resc.hw_qid[rx_qid];
 		u32 init_prod_val = 0;
 
-		*pp_prod = (u8 __iomem *)p_hwfn->regview +
-					 MSTORM_QZONE_START(p_hwfn->cdev) +
-					 hw_qid * MSTORM_QZONE_SIZE;
+		*pp_prod = (u8 __iomem *)
+		    p_hwfn->regview +
+		    MSTORM_QZONE_START(p_hwfn->cdev) +
+		    hw_qid * MSTORM_QZONE_SIZE;
 
 		/* Init the rcq, rx bd and rx sge (if valid) producers to 0 */
 		__internal_ram_wr(p_hwfn, *pp_prod, sizeof(u32),
@@ -444,7 +445,7 @@ int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
 	}
 
 	/* Learn the address of the producer from the response */
-	if (pp_prod && !p_iov->b_pre_fp_hsi) {
+	if (!p_iov->b_pre_fp_hsi) {
 		u32 init_prod_val = 0;
 
 		*pp_prod = (u8 __iomem *)p_hwfn->regview + resp->offset;
@@ -462,7 +463,8 @@ exit:
 	return rc;
 }
 
-int qed_vf_pf_rxq_stop(struct qed_hwfn *p_hwfn, u16 rx_qid, bool cqe_completion)
+int qed_vf_pf_rxq_stop(struct qed_hwfn *p_hwfn,
+		       struct qed_queue_cid *p_cid, bool cqe_completion)
 {
 	struct qed_vf_iov *p_iov = p_hwfn->vf_iov_info;
 	struct vfpf_stop_rxqs_tlv *req;
@@ -472,7 +474,7 @@ int qed_vf_pf_rxq_stop(struct qed_hwfn *p_hwfn, u16 rx_qid, bool cqe_completion)
 	/* clear mailbox and prep first tlv */
 	req = qed_vf_pf_prep(p_hwfn, CHANNEL_TLV_STOP_RXQS, sizeof(*req));
 
-	req->rx_qid = rx_qid;
+	req->rx_qid = p_cid->rel.queue_id;
 	req->num_rxqs = 1;
 	req->cqe_completion = cqe_completion;
 
@@ -496,28 +498,28 @@ exit:
 	return rc;
 }
 
-int qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn,
-			u16 tx_queue_id,
-			u16 sb,
-			u8 sb_index,
-			dma_addr_t pbl_addr,
-			u16 pbl_size, void __iomem **pp_doorbell)
+int
+qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn,
+		    struct qed_queue_cid *p_cid,
+		    dma_addr_t pbl_addr,
+		    u16 pbl_size, void __iomem **pp_doorbell)
 {
 	struct qed_vf_iov *p_iov = p_hwfn->vf_iov_info;
 	struct pfvf_start_queue_resp_tlv *resp;
 	struct vfpf_start_txq_tlv *req;
+	u16 qid = p_cid->rel.queue_id;
 	int rc;
 
 	/* clear mailbox and prep first tlv */
 	req = qed_vf_pf_prep(p_hwfn, CHANNEL_TLV_START_TXQ, sizeof(*req));
 
-	req->tx_qid = tx_queue_id;
+	req->tx_qid = qid;
 
 	/* Tx */
 	req->pbl_addr = pbl_addr;
 	req->pbl_size = pbl_size;
-	req->hw_sb = sb;
-	req->sb_index = sb_index;
+	req->hw_sb = p_cid->rel.sb;
+	req->sb_index = p_cid->rel.sb_idx;
 
 	/* add list termination tlv */
 	qed_add_tlv(p_hwfn, &p_iov->offset,
@@ -533,33 +535,29 @@ int qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn,
 		goto exit;
 	}
 
-	if (pp_doorbell) {
-		/* Modern PFs provide the actual offsets, while legacy
-		 * provided only the queue id.
-		 */
-		if (!p_iov->b_pre_fp_hsi) {
-			*pp_doorbell = (u8 __iomem *)p_hwfn->doorbells +
-						     resp->offset;
-		} else {
-			u8 cid = p_iov->acquire_resp.resc.cid[tx_queue_id];
-			u32 db_addr;
-
-			db_addr = qed_db_addr_vf(cid, DQ_DEMS_LEGACY);
-			*pp_doorbell = (u8 __iomem *)p_hwfn->doorbells +
-						     db_addr;
-		}
+	/* Modern PFs provide the actual offsets, while legacy
+	 * provided only the queue id.
+	 */
+	if (!p_iov->b_pre_fp_hsi) {
+		*pp_doorbell = (u8 __iomem *)p_hwfn->doorbells + resp->offset;
+	} else {
+		u8 cid = p_iov->acquire_resp.resc.cid[qid];
 
-		DP_VERBOSE(p_hwfn, QED_MSG_IOV,
-			   "Txq[0x%02x]: doorbell at %p [offset 0x%08x]\n",
-			   tx_queue_id, *pp_doorbell, resp->offset);
+		*pp_doorbell = (u8 __iomem *)p_hwfn->doorbells +
+					     qed_db_addr_vf(cid,
+							    DQ_DEMS_LEGACY);
 	}
+
+	DP_VERBOSE(p_hwfn, QED_MSG_IOV,
+		   "Txq[0x%02x]: doorbell at %p [offset 0x%08x]\n",
+		   qid, *pp_doorbell, resp->offset);
 exit:
 	qed_vf_pf_req_end(p_hwfn, rc);
 
 	return rc;
 }
 
-int qed_vf_pf_txq_stop(struct qed_hwfn *p_hwfn, u16 tx_qid)
+int qed_vf_pf_txq_stop(struct qed_hwfn *p_hwfn, struct qed_queue_cid *p_cid)
 {
 	struct qed_vf_iov *p_iov = p_hwfn->vf_iov_info;
 	struct vfpf_stop_txqs_tlv *req;
@@ -569,7 +567,7 @@ int qed_vf_pf_txq_stop(struct qed_hwfn *p_hwfn, u16 tx_qid)
 	/* clear mailbox and prep first tlv */
 	req = qed_vf_pf_prep(p_hwfn, CHANNEL_TLV_STOP_TXQS, sizeof(*req));
 
-	req->tx_qid = tx_qid;
+	req->tx_qid = p_cid->rel.queue_id;
 	req->num_txqs = 1;
 
 	/* add list termination tlv */
@@ -1171,6 +1169,13 @@ void qed_vf_get_num_vlan_filters(struct qed_hwfn *p_hwfn, u8 *num_vlan_filters)
 	*num_vlan_filters = p_vf->acquire_resp.resc.num_vlan_filters;
 }
 
+void qed_vf_get_num_mac_filters(struct qed_hwfn *p_hwfn, u8 *num_mac_filters)
+{
+	struct qed_vf_iov *p_vf = p_hwfn->vf_iov_info;
+
+	*num_mac_filters = p_vf->acquire_resp.resc.num_mac_filters;
+}
+
 bool qed_vf_check_mac(struct qed_hwfn *p_hwfn, u8 *mac)
 {
 	struct qed_bulletin_content *bulletin;
@@ -1230,8 +1235,8 @@ static void qed_handle_bulletin_change(struct qed_hwfn *hwfn)
 
 	is_mac_exist = qed_vf_bulletin_get_forced_mac(hwfn, mac,
 						      &is_mac_forced);
-	if (is_mac_exist && is_mac_forced && cookie)
-		ops->force_mac(cookie, mac);
+	if (is_mac_exist && cookie)
+		ops->force_mac(cookie, mac, !!is_mac_forced);
 
 	/* Always update link configuration according to bulletin */
 	qed_link_update(hwfn);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.h b/drivers/net/ethernet/qlogic/qed/qed_vf.h
index 35db7a28aa13..11eb3854e6f2 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.h
@@ -40,6 +40,7 @@ enum {
 	PFVF_STATUS_NOT_SUPPORTED,
 	PFVF_STATUS_NO_RESOURCE,
 	PFVF_STATUS_FORCED,
+	PFVF_STATUS_MALICIOUS,
 };
 
 /* vf pf channel tlvs */
@@ -622,6 +623,14 @@ void qed_vf_get_num_vlan_filters(struct qed_hwfn *p_hwfn,
 				 u8 *num_vlan_filters);
 
 /**
+ * @brief Get number of MAC filters allocated for VF by qed
+ *
+ *  @param p_hwfn
+ *  @param num_rxqs - allocated MAC filters
+ */
+void qed_vf_get_num_mac_filters(struct qed_hwfn *p_hwfn, u8 *num_mac_filters);
+
+/**
  * @brief Check if VF can set a MAC address
  *
  * @param p_hwfn
@@ -657,10 +666,7 @@ int qed_vf_hw_prepare(struct qed_hwfn *p_hwfn);
 /**
  * @brief VF - start the RX Queue by sending a message to the PF
  * @param p_hwfn
- * @param cid                   - zero based within the VF
- * @param rx_queue_id           - zero based within the VF
- * @param sb                    - VF status block for this queue
- * @param sb_index              - Index within the status block
+ * @param p_cid			- Only relative fields are relevant
  * @param bd_max_bytes          - maximum number of bytes per bd
  * @param bd_chain_phys_addr    - physical address of bd chain
  * @param cqe_pbl_addr          - physical address of pbl
@@ -671,9 +677,7 @@ int qed_vf_hw_prepare(struct qed_hwfn *p_hwfn);
  * @return int
  */
 int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
-			u8 rx_queue_id,
-			u16 sb,
-			u8 sb_index,
+			struct qed_queue_cid *p_cid,
 			u16 bd_max_bytes,
 			dma_addr_t bd_chain_phys_addr,
 			dma_addr_t cqe_pbl_addr,
@@ -693,24 +697,23 @@ int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
  *
  * @return int
  */
-int qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn,
-			u16 tx_queue_id,
-			u16 sb,
-			u8 sb_index,
-			dma_addr_t pbl_addr,
-			u16 pbl_size, void __iomem **pp_doorbell);
+int
+qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn,
+		    struct qed_queue_cid *p_cid,
+		    dma_addr_t pbl_addr,
+		    u16 pbl_size, void __iomem **pp_doorbell);
 
 /**
  * @brief VF - stop the RX queue by sending a message to the PF
  *
  * @param p_hwfn
- * @param rx_qid
+ * @param p_cid
  * @param cqe_completion
  *
  * @return int
  */
 int qed_vf_pf_rxq_stop(struct qed_hwfn *p_hwfn,
-		       u16 rx_qid, bool cqe_completion);
+		       struct qed_queue_cid *p_cid, bool cqe_completion);
 
 /**
  * @brief VF - stop the TX queue by sending a message to the PF
@@ -720,7 +723,7 @@ int qed_vf_pf_rxq_stop(struct qed_hwfn *p_hwfn,
  *
  * @return int
  */
-int qed_vf_pf_txq_stop(struct qed_hwfn *p_hwfn, u16 tx_qid);
+int qed_vf_pf_txq_stop(struct qed_hwfn *p_hwfn, struct qed_queue_cid *p_cid);
 
 /**
  * @brief VF - send a vport update command
@@ -871,6 +874,11 @@ static inline void qed_vf_get_num_vlan_filters(struct qed_hwfn *p_hwfn,
 {
 }
 
+static inline void qed_vf_get_num_mac_filters(struct qed_hwfn *p_hwfn,
+					      u8 *num_mac_filters)
+{
+}
+
 static inline bool qed_vf_check_mac(struct qed_hwfn *p_hwfn, u8 *mac)
 {
 	return false;
@@ -888,9 +896,7 @@ static inline int qed_vf_hw_prepare(struct qed_hwfn *p_hwfn)
 }
 
 static inline int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
-				      u8 rx_queue_id,
-				      u16 sb,
-				      u8 sb_index,
+				      struct qed_queue_cid *p_cid,
 				      u16 bd_max_bytes,
 				      dma_addr_t bd_chain_phys_adr,
 				      dma_addr_t cqe_pbl_addr,
@@ -900,9 +906,7 @@ static inline int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
 }
 
 static inline int qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn,
-				      u16 tx_queue_id,
-				      u16 sb,
-				      u8 sb_index,
+				      struct qed_queue_cid *p_cid,
 				      dma_addr_t pbl_addr,
 				      u16 pbl_size, void __iomem **pp_doorbell)
 {
@@ -910,12 +914,14 @@ static inline int qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn,
 }
 
 static inline int qed_vf_pf_rxq_stop(struct qed_hwfn *p_hwfn,
-				     u16 rx_qid, bool cqe_completion)
+				     struct qed_queue_cid *p_cid,
+				     bool cqe_completion)
 {
 	return -EINVAL;
 }
 
-static inline int qed_vf_pf_txq_stop(struct qed_hwfn *p_hwfn, u16 tx_qid)
+static inline int qed_vf_pf_txq_stop(struct qed_hwfn *p_hwfn,
+				     struct qed_queue_cid *p_cid)
 {
 	return -EINVAL;
 }
diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index 974689a13337..c79dc78746fc 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -16,6 +16,7 @@
 #include <linux/bitmap.h>
 #include <linux/kernel.h>
 #include <linux/mutex.h>
+#include <linux/bpf.h>
 #include <linux/io.h>
 #include <linux/qed/common_hsi.h>
 #include <linux/qed/eth_common.h>
@@ -127,10 +128,9 @@ struct qede_dev {
 
 	const struct qed_eth_ops	*ops;
 
-	struct qed_dev_eth_info	dev_info;
+	struct qed_dev_eth_info dev_info;
 #define QEDE_MAX_RSS_CNT(edev)	((edev)->dev_info.num_queues)
-#define QEDE_MAX_TSS_CNT(edev)	((edev)->dev_info.num_queues * \
-				 (edev)->dev_info.num_tc)
+#define QEDE_MAX_TSS_CNT(edev)	((edev)->dev_info.num_queues)
 
 	struct qede_fastpath		*fp_array;
 	u8				req_num_tx;
@@ -139,17 +139,9 @@ struct qede_dev {
 	u8				fp_num_rx;
 	u16				req_queues;
 	u16				num_queues;
-	u8				num_tc;
 #define QEDE_QUEUE_CNT(edev)	((edev)->num_queues)
 #define QEDE_RSS_COUNT(edev)	((edev)->num_queues - (edev)->fp_num_tx)
-#define QEDE_TSS_COUNT(edev)	(((edev)->num_queues - (edev)->fp_num_rx) * \
-				 (edev)->num_tc)
-#define QEDE_TX_IDX(edev, txqidx)	((edev)->fp_num_rx + (txqidx) % \
-					 QEDE_TSS_COUNT(edev))
-#define QEDE_TC_IDX(edev, txqidx)	((txqidx) / QEDE_TSS_COUNT(edev))
-#define QEDE_TX_QUEUE(edev, txqidx)	\
-	(&(edev)->fp_array[QEDE_TX_IDX((edev), (txqidx))].txqs[QEDE_TC_IDX(\
-							(edev), (txqidx))])
+#define QEDE_TSS_COUNT(edev)	((edev)->num_queues - (edev)->fp_num_rx)
 
 	struct qed_int_info		int_info;
 	unsigned char			primary_mac[ETH_ALEN];
@@ -193,7 +185,11 @@ struct qede_dev {
 	u16				vxlan_dst_port;
 	u16				geneve_dst_port;
 
+	bool wol_enabled;
+
 	struct qede_rdma_dev		rdma_info;
+
+	struct bpf_prog *xdp_prog;
 };
 
 enum QEDE_STATE {
@@ -223,39 +219,67 @@ enum qede_agg_state {
 };
 
 struct qede_agg_info {
-	struct sw_rx_data replace_buf;
-	dma_addr_t replace_buf_mapping;
-	struct sw_rx_data start_buf;
-	dma_addr_t start_buf_mapping;
-	struct eth_fast_path_rx_tpa_start_cqe start_cqe;
-	enum qede_agg_state agg_state;
+	/* rx_buf is a data buffer that can be placed / consumed from rx bd
+	 * chain. It has two purposes: We will preallocate the data buffer
+	 * for each aggregation when we open the interface and will place this
+	 * buffer on the rx-bd-ring when we receive TPA_START. We don't want
+	 * to be in a state where allocation fails, as we can't reuse the
+	 * consumer buffer in the rx-chain since FW may still be writing to it
+	 * (since header needs to be modified for TPA).
+	 * The second purpose is to keep a pointer to the bd buffer during
+	 * aggregation.
+	 */
+	struct sw_rx_data buffer;
+	dma_addr_t buffer_mapping;
+
 	struct sk_buff *skb;
-	int frag_id;
+
+	/* We need some structs from the start cookie until termination */
 	u16 vlan_tag;
+	u16 start_cqe_bd_len;
+	u8 start_cqe_placement_offset;
+
+	u8 state;
+	u8 frag_id;
+
+	u8 tunnel_type;
 };
 
 struct qede_rx_queue {
-	__le16			*hw_cons_ptr;
-	struct sw_rx_data	*sw_rx_ring;
-	u16			sw_rx_cons;
-	u16			sw_rx_prod;
-	struct qed_chain	rx_bd_ring;
-	struct qed_chain	rx_comp_ring;
-	void __iomem		*hw_rxq_prod_addr;
+	__le16 *hw_cons_ptr;
+	void __iomem *hw_rxq_prod_addr;
+
+	/* Required for the allocation of replacement buffers */
+	struct device *dev;
+
+	struct bpf_prog *xdp_prog;
+
+	u16 sw_rx_cons;
+	u16 sw_rx_prod;
+
+	u16 num_rx_buffers; /* Slowpath */
+	u8 data_direction;
+	u8 rxq_id;
+
+	u32 rx_buf_size;
+	u32 rx_buf_seg_size;
+
+	u64 rcv_pkts;
+
+	struct sw_rx_data *sw_rx_ring;
+	struct qed_chain rx_bd_ring;
+	struct qed_chain rx_comp_ring ____cacheline_aligned;
 
 	/* GRO */
-	struct qede_agg_info	tpa_info[ETH_TPA_MAX_AGGS_NUM];
+	struct qede_agg_info tpa_info[ETH_TPA_MAX_AGGS_NUM];
 
-	int			rx_buf_size;
-	unsigned int		rx_buf_seg_size;
+	u64 rx_hw_errors;
+	u64 rx_alloc_errors;
+	u64 rx_ip_frags;
 
-	u16			num_rx_buffers;
-	u16			rxq_id;
+	u64 xdp_no_pass;
 
-	u64			rcv_pkts;
-	u64			rx_hw_errors;
-	u64			rx_alloc_errors;
-	u64			rx_ip_frags;
+	void *handle;
 };
 
 union db_prod {
@@ -271,20 +295,39 @@ struct sw_tx_bd {
 };
 
 struct qede_tx_queue {
-	int			index; /* Queue index */
-	__le16			*hw_cons_ptr;
-	struct sw_tx_bd		*sw_tx_ring;
-	u16			sw_tx_cons;
-	u16			sw_tx_prod;
-	struct qed_chain	tx_pbl;
-	void __iomem		*doorbell_addr;
-	union db_prod		tx_db;
-
-	u16			num_tx_buffers;
-	u64			xmit_pkts;
-	u64			stopped_cnt;
-
-	bool			is_legacy;
+	u8 is_xdp;
+	bool is_legacy;
+	u16 sw_tx_cons;
+	u16 sw_tx_prod;
+	u16 num_tx_buffers; /* Slowpath only */
+
+	u64 xmit_pkts;
+	u64 stopped_cnt;
+
+	__le16 *hw_cons_ptr;
+
+	/* Needed for the mapping of packets */
+	struct device *dev;
+
+	void __iomem *doorbell_addr;
+	union db_prod tx_db;
+	int index; /* Slowpath only */
+#define QEDE_TXQ_XDP_TO_IDX(edev, txq)	((txq)->index - \
+					 QEDE_MAX_TSS_CNT(edev))
+#define QEDE_TXQ_IDX_TO_XDP(edev, idx)	((idx) + QEDE_MAX_TSS_CNT(edev))
+
+	/* Regular Tx requires skb + metadata for release purpose,
+	 * while XDP requires only the pages themselves.
+	 */
+	union {
+		struct sw_tx_bd *skbs;
+		struct page **pages;
+	} sw_tx_ring;
+
+	struct qed_chain tx_pbl;
+
+	/* Slowpath; Should be kept in end [unless missing padding] */
+	void *handle;
 };
 
 #define BD_UNMAP_ADDR(bd)		HILO_U64(le32_to_cpu((bd)->addr.hi), \
@@ -301,13 +344,16 @@ struct qede_fastpath {
 	struct qede_dev	*edev;
 #define QEDE_FASTPATH_TX	BIT(0)
 #define QEDE_FASTPATH_RX	BIT(1)
+#define QEDE_FASTPATH_XDP	BIT(2)
 #define QEDE_FASTPATH_COMBINED	(QEDE_FASTPATH_TX | QEDE_FASTPATH_RX)
 	u8			type;
 	u8			id;
+	u8			xdp_xmit;
 	struct napi_struct	napi;
 	struct qed_sb_info	*sb_info;
 	struct qede_rx_queue	*rxq;
-	struct qede_tx_queue	*txqs;
+	struct qede_tx_queue	*txq;
+	struct qede_tx_queue	*xdp_tx;
 
 #define VEC_NAME_SIZE	(sizeof(((struct net_device *)0)->name) + 8)
 	char	name[VEC_NAME_SIZE];
@@ -320,6 +366,7 @@ struct qede_fastpath {
 #define XMIT_L4_CSUM		BIT(0)
 #define XMIT_LSO		BIT(1)
 #define XMIT_ENC		BIT(2)
+#define XMIT_ENC_GSO_L4_CSUM	BIT(3)
 
 #define QEDE_CSUM_ERROR			BIT(0)
 #define QEDE_CSUM_UNNECESSARY		BIT(1)
@@ -329,8 +376,13 @@ struct qede_fastpath {
 #define QEDE_SP_VXLAN_PORT_CONFIG	2
 #define QEDE_SP_GENEVE_PORT_CONFIG	3
 
-union qede_reload_args {
-	u16 mtu;
+struct qede_reload_args {
+	void (*func)(struct qede_dev *edev, struct qede_reload_args *args);
+	union {
+		netdev_features_t features;
+		struct bpf_prog *new_prog;
+		u16 mtu;
+	} u;
 };
 
 #ifdef CONFIG_DCB
@@ -339,15 +391,14 @@ void qede_set_dcbnl_ops(struct net_device *ndev);
 void qede_config_debug(uint debug, u32 *p_dp_module, u8 *p_dp_level);
 void qede_set_ethtool_ops(struct net_device *netdev);
 void qede_reload(struct qede_dev *edev,
-		 void (*func)(struct qede_dev *edev,
-			      union qede_reload_args *args),
-		 union qede_reload_args *args);
+		 struct qede_reload_args *args, bool is_locked);
 int qede_change_mtu(struct net_device *dev, int new_mtu);
 void qede_fill_by_demand_stats(struct qede_dev *edev);
+void __qede_lock(struct qede_dev *edev);
+void __qede_unlock(struct qede_dev *edev);
 bool qede_has_rx_work(struct qede_rx_queue *rxq);
 int qede_txq_has_work(struct qede_tx_queue *txq);
-void qede_recycle_rx_bd_ring(struct qede_rx_queue *rxq, struct qede_dev *edev,
-			     u8 count);
+void qede_recycle_rx_bd_ring(struct qede_rx_queue *rxq, u8 count);
 void qede_update_rx_prod(struct qede_dev *edev, struct qede_rx_queue *rxq);
 
 #define RX_RING_SIZE_POW	13
@@ -362,8 +413,9 @@ void qede_update_rx_prod(struct qede_dev *edev, struct qede_rx_queue *rxq);
 #define NUM_TX_BDS_MIN		128
 #define NUM_TX_BDS_DEF		NUM_TX_BDS_MAX
 
-#define QEDE_MIN_PKT_LEN	64
-#define QEDE_RX_HDR_SIZE	256
+#define QEDE_MIN_PKT_LEN		64
+#define QEDE_RX_HDR_SIZE		256
+#define QEDE_MAX_JUMBO_PACKET_SIZE	9600
 #define	for_each_queue(i) for (i = 0; i < edev->num_queues; i++)
 
 #endif /* _QEDE_H_ */
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index 7567cc464b88..1c48f445c93b 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -16,13 +16,6 @@
 #include <linux/capability.h>
 #include "qede.h"
 
-#define QEDE_STAT_OFFSET(stat_name) (offsetof(struct qede_stats, stat_name))
-#define QEDE_STAT_STRING(stat_name) (#stat_name)
-#define _QEDE_STAT(stat_name, pf_only) \
-	 {QEDE_STAT_OFFSET(stat_name), QEDE_STAT_STRING(stat_name), pf_only}
-#define QEDE_PF_STAT(stat_name)		_QEDE_STAT(stat_name, true)
-#define QEDE_STAT(stat_name)		_QEDE_STAT(stat_name, false)
-
 #define QEDE_RQSTAT_OFFSET(stat_name) \
 	 (offsetof(struct qede_rx_queue, stat_name))
 #define QEDE_RQSTAT_STRING(stat_name) (#stat_name)
@@ -39,12 +32,10 @@ static const struct {
 	QEDE_RQSTAT(rx_hw_errors),
 	QEDE_RQSTAT(rx_alloc_errors),
 	QEDE_RQSTAT(rx_ip_frags),
+	QEDE_RQSTAT(xdp_no_pass),
 };
 
 #define QEDE_NUM_RQSTATS ARRAY_SIZE(qede_rqstats_arr)
-#define QEDE_RQSTATS_DATA(dev, sindex, rqindex) \
-	(*((u64 *)(((char *)(dev->fp_array[(rqindex)].rxq)) +\
-		    qede_rqstats_arr[(sindex)].offset)))
 #define QEDE_TQSTAT_OFFSET(stat_name) \
 	(offsetof(struct qede_tx_queue, stat_name))
 #define QEDE_TQSTAT_STRING(stat_name) (#stat_name)
@@ -59,10 +50,12 @@ static const struct {
 	QEDE_TQSTAT(stopped_cnt),
 };
 
-#define QEDE_TQSTATS_DATA(dev, sindex, tssid, tcid) \
-	(*((u64 *)(((void *)(&dev->fp_array[tssid].txqs[tcid])) +\
-		   qede_tqstats_arr[(sindex)].offset)))
-
+#define QEDE_STAT_OFFSET(stat_name) (offsetof(struct qede_stats, stat_name))
+#define QEDE_STAT_STRING(stat_name) (#stat_name)
+#define _QEDE_STAT(stat_name, pf_only) \
+	 {QEDE_STAT_OFFSET(stat_name), QEDE_STAT_STRING(stat_name), pf_only}
+#define QEDE_PF_STAT(stat_name)	_QEDE_STAT(stat_name, true)
+#define QEDE_STAT(stat_name)	_QEDE_STAT(stat_name, false)
 static const struct {
 	u64 offset;
 	char string[ETH_GSTRING_LEN];
@@ -136,10 +129,6 @@ static const struct {
 	QEDE_STAT(coalesced_bytes),
 };
 
-#define QEDE_STATS_DATA(dev, index) \
-	(*((u64 *)(((char *)(dev)) + offsetof(struct qede_dev, stats) \
-			+ qede_stats_arr[(index)].offset)))
-
 #define QEDE_NUM_STATS	ARRAY_SIZE(qede_stats_arr)
 
 enum {
@@ -157,6 +146,7 @@ enum qede_ethtool_tests {
 	QEDE_ETHTOOL_MEMORY_TEST,
 	QEDE_ETHTOOL_REGISTER_TEST,
 	QEDE_ETHTOOL_CLOCK_TEST,
+	QEDE_ETHTOOL_NVRAM_TEST,
 	QEDE_ETHTOOL_TEST_MAX
 };
 
@@ -166,41 +156,63 @@ static const char qede_tests_str_arr[QEDE_ETHTOOL_TEST_MAX][ETH_GSTRING_LEN] = {
 	"Memory (online)\t\t",
 	"Register (online)\t",
 	"Clock (online)\t\t",
+	"Nvram (online)\t\t",
 };
 
+static void qede_get_strings_stats_txq(struct qede_dev *edev,
+				       struct qede_tx_queue *txq, u8 **buf)
+{
+	int i;
+
+	for (i = 0; i < QEDE_NUM_TQSTATS; i++) {
+		if (txq->is_xdp)
+			sprintf(*buf, "%d [XDP]: %s",
+				QEDE_TXQ_XDP_TO_IDX(edev, txq),
+				qede_tqstats_arr[i].string);
+		else
+			sprintf(*buf, "%d: %s", txq->index,
+				qede_tqstats_arr[i].string);
+		*buf += ETH_GSTRING_LEN;
+	}
+}
+
+static void qede_get_strings_stats_rxq(struct qede_dev *edev,
+				       struct qede_rx_queue *rxq, u8 **buf)
+{
+	int i;
+
+	for (i = 0; i < QEDE_NUM_RQSTATS; i++) {
+		sprintf(*buf, "%d: %s", rxq->rxq_id,
+			qede_rqstats_arr[i].string);
+		*buf += ETH_GSTRING_LEN;
+	}
+}
+
 static void qede_get_strings_stats(struct qede_dev *edev, u8 *buf)
 {
-	int i, j, k;
+	struct qede_fastpath *fp;
+	int i;
 
-	for (i = 0, k = 0; i < QEDE_QUEUE_CNT(edev); i++) {
-		int tc;
+	/* Account for queue statistics */
+	for (i = 0; i < QEDE_QUEUE_CNT(edev); i++) {
+		fp = &edev->fp_array[i];
 
-		if (edev->fp_array[i].type & QEDE_FASTPATH_RX) {
-			for (j = 0; j < QEDE_NUM_RQSTATS; j++)
-				sprintf(buf + (k + j) * ETH_GSTRING_LEN,
-					"%d:   %s", i,
-					qede_rqstats_arr[j].string);
-			k += QEDE_NUM_RQSTATS;
-		}
+		if (fp->type & QEDE_FASTPATH_RX)
+			qede_get_strings_stats_rxq(edev, fp->rxq, &buf);
 
-		if (edev->fp_array[i].type & QEDE_FASTPATH_TX) {
-			for (tc = 0; tc < edev->num_tc; tc++) {
-				for (j = 0; j < QEDE_NUM_TQSTATS; j++)
-					sprintf(buf + (k + j) *
-						ETH_GSTRING_LEN,
-						"%d.%d: %s", i, tc,
-						qede_tqstats_arr[j].string);
-				k += QEDE_NUM_TQSTATS;
-			}
-		}
+		if (fp->type & QEDE_FASTPATH_XDP)
+			qede_get_strings_stats_txq(edev, fp->xdp_tx, &buf);
+
+		if (fp->type & QEDE_FASTPATH_TX)
+			qede_get_strings_stats_txq(edev, fp->txq, &buf);
 	}
 
-	for (i = 0, j = 0; i < QEDE_NUM_STATS; i++) {
+	/* Account for non-queue statistics */
+	for (i = 0; i < QEDE_NUM_STATS; i++) {
 		if (IS_VF(edev) && qede_stats_arr[i].pf_only)
 			continue;
-		strcpy(buf + (k + j) * ETH_GSTRING_LEN,
-		       qede_stats_arr[i].string);
-		j++;
+		strcpy(buf, qede_stats_arr[i].string);
+		buf += ETH_GSTRING_LEN;
 	}
 }
 
@@ -226,42 +238,61 @@ static void qede_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
 	}
 }
 
+static void qede_get_ethtool_stats_txq(struct qede_tx_queue *txq, u64 **buf)
+{
+	int i;
+
+	for (i = 0; i < QEDE_NUM_TQSTATS; i++) {
+		**buf = *((u64 *)(((void *)txq) + qede_tqstats_arr[i].offset));
+		(*buf)++;
+	}
+}
+
+static void qede_get_ethtool_stats_rxq(struct qede_rx_queue *rxq, u64 **buf)
+{
+	int i;
+
+	for (i = 0; i < QEDE_NUM_RQSTATS; i++) {
+		**buf = *((u64 *)(((void *)rxq) + qede_rqstats_arr[i].offset));
+		(*buf)++;
+	}
+}
+
 static void qede_get_ethtool_stats(struct net_device *dev,
 				   struct ethtool_stats *stats, u64 *buf)
 {
 	struct qede_dev *edev = netdev_priv(dev);
-	int sidx, cnt = 0;
-	int qid;
+	struct qede_fastpath *fp;
+	int i;
 
 	qede_fill_by_demand_stats(edev);
 
-	mutex_lock(&edev->qede_lock);
+	/* Need to protect the access to the fastpath array */
+	__qede_lock(edev);
 
-	for (qid = 0; qid < QEDE_QUEUE_CNT(edev); qid++) {
-		int tc;
+	for (i = 0; i < QEDE_QUEUE_CNT(edev); i++) {
+		fp = &edev->fp_array[i];
 
-		if (edev->fp_array[qid].type & QEDE_FASTPATH_RX) {
-			for (sidx = 0; sidx < QEDE_NUM_RQSTATS; sidx++)
-				buf[cnt++] = QEDE_RQSTATS_DATA(edev, sidx, qid);
-		}
+		if (fp->type & QEDE_FASTPATH_RX)
+			qede_get_ethtool_stats_rxq(fp->rxq, &buf);
 
-		if (edev->fp_array[qid].type & QEDE_FASTPATH_TX) {
-			for (tc = 0; tc < edev->num_tc; tc++) {
-				for (sidx = 0; sidx < QEDE_NUM_TQSTATS; sidx++)
-					buf[cnt++] = QEDE_TQSTATS_DATA(edev,
-								       sidx,
-								       qid, tc);
-			}
-		}
+		if (fp->type & QEDE_FASTPATH_XDP)
+			qede_get_ethtool_stats_txq(fp->xdp_tx, &buf);
+
+		if (fp->type & QEDE_FASTPATH_TX)
+			qede_get_ethtool_stats_txq(fp->txq, &buf);
 	}
 
-	for (sidx = 0; sidx < QEDE_NUM_STATS; sidx++) {
-		if (IS_VF(edev) && qede_stats_arr[sidx].pf_only)
+	for (i = 0; i < QEDE_NUM_STATS; i++) {
+		if (IS_VF(edev) && qede_stats_arr[i].pf_only)
 			continue;
-		buf[cnt++] = QEDE_STATS_DATA(edev, sidx);
+		*buf = *((u64 *)(((void *)&edev->stats) +
+				 qede_stats_arr[i].offset));
+
+		buf++;
 	}
 
-	mutex_unlock(&edev->qede_lock);
+	__qede_unlock(edev);
 }
 
 static int qede_get_sset_count(struct net_device *dev, int stringset)
@@ -278,8 +309,18 @@ static int qede_get_sset_count(struct net_device *dev, int stringset)
 				if (qede_stats_arr[i].pf_only)
 					num_stats--;
 		}
-		return num_stats + QEDE_RSS_COUNT(edev) * QEDE_NUM_RQSTATS +
-		       QEDE_TSS_COUNT(edev) * QEDE_NUM_TQSTATS * edev->num_tc;
+
+		/* Account for the Regular Tx statistics */
+		num_stats += QEDE_TSS_COUNT(edev) * QEDE_NUM_TQSTATS;
+
+		/* Account for the Regular Rx statistics */
+		num_stats += QEDE_RSS_COUNT(edev) * QEDE_NUM_RQSTATS;
+
+		/* Account for XDP statistics [if needed] */
+		if (edev->xdp_prog)
+			num_stats += QEDE_RSS_COUNT(edev) * QEDE_NUM_TQSTATS;
+		return num_stats;
+
 	case ETH_SS_PRIV_FLAGS:
 		return QEDE_PRI_FLAG_LEN;
 	case ETH_SS_TEST:
@@ -325,7 +366,7 @@ static const struct qede_link_mode_mapping qed_lm_map[] = {
 {								\
 	int i;							\
 								\
-	for (i = 0; i < QED_LM_COUNT; i++) {			\
+	for (i = 0; i < ARRAY_SIZE(qed_lm_map); i++) {		\
 		if ((caps) & (qed_lm_map[i].qed_link_mode))	\
 			__set_bit(qed_lm_map[i].ethtool_link_mode,\
 				  lk_ksettings->link_modes.name); \
@@ -336,7 +377,7 @@ static const struct qede_link_mode_mapping qed_lm_map[] = {
 {								\
 	int i;							\
 								\
-	for (i = 0; i < QED_LM_COUNT; i++) {			\
+	for (i = 0; i < ARRAY_SIZE(qed_lm_map); i++) {		\
 		if (test_bit(qed_lm_map[i].ethtool_link_mode,	\
 			     lk_ksettings->link_modes.name))	\
 			caps |= qed_lm_map[i].qed_link_mode;	\
@@ -350,6 +391,8 @@ static int qede_get_link_ksettings(struct net_device *dev,
 	struct qede_dev *edev = netdev_priv(dev);
 	struct qed_link_output current_link;
 
+	__qede_lock(edev);
+
 	memset(&current_link, 0, sizeof(current_link));
 	edev->ops->common->get_link(edev->cdev, &current_link);
 
@@ -369,6 +412,9 @@ static int qede_get_link_ksettings(struct net_device *dev,
 		base->speed = SPEED_UNKNOWN;
 		base->duplex = DUPLEX_UNKNOWN;
 	}
+
+	__qede_unlock(edev);
+
 	base->port = current_link.port;
 	base->autoneg = (current_link.autoneg) ? AUTONEG_ENABLE :
 			AUTONEG_DISABLE;
@@ -488,6 +534,45 @@ static void qede_get_drvinfo(struct net_device *ndev,
 	strlcpy(info->bus_info, pci_name(edev->pdev), sizeof(info->bus_info));
 }
 
+static void qede_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
+{
+	struct qede_dev *edev = netdev_priv(ndev);
+
+	if (edev->dev_info.common.wol_support) {
+		wol->supported = WAKE_MAGIC;
+		wol->wolopts = edev->wol_enabled ? WAKE_MAGIC : 0;
+	}
+}
+
+static int qede_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
+{
+	struct qede_dev *edev = netdev_priv(ndev);
+	bool wol_requested;
+	int rc;
+
+	if (wol->wolopts & ~WAKE_MAGIC) {
+		DP_INFO(edev,
+			"Can't support WoL options other than magic-packet\n");
+		return -EINVAL;
+	}
+
+	wol_requested = !!(wol->wolopts & WAKE_MAGIC);
+	if (wol_requested == edev->wol_enabled)
+		return 0;
+
+	/* Need to actually change configuration */
+	if (!edev->dev_info.common.wol_support) {
+		DP_INFO(edev, "Device doesn't support WoL\n");
+		return -EINVAL;
+	}
+
+	rc = edev->ops->common->update_wol(edev->cdev, wol_requested);
+	if (!rc)
+		edev->wol_enabled = wol_requested;
+
+	return rc;
+}
+
 static u32 qede_get_msglevel(struct net_device *ndev)
 {
 	struct qede_dev *edev = netdev_priv(ndev);
@@ -638,8 +723,7 @@ static int qede_set_ringparam(struct net_device *dev,
 	edev->q_num_rx_buffers = ering->rx_pending;
 	edev->q_num_tx_buffers = ering->tx_pending;
 
-	if (netif_running(edev->ndev))
-		qede_reload(edev, NULL, NULL);
+	qede_reload(edev, NULL, false);
 
 	return 0;
 }
@@ -724,35 +808,27 @@ static int qede_get_regs_len(struct net_device *ndev)
 		return -EINVAL;
 }
 
-static void qede_update_mtu(struct qede_dev *edev, union qede_reload_args *args)
+static void qede_update_mtu(struct qede_dev *edev,
+			    struct qede_reload_args *args)
 {
-	edev->ndev->mtu = args->mtu;
+	edev->ndev->mtu = args->u.mtu;
 }
 
 /* Netdevice NDOs */
-#define ETH_MAX_JUMBO_PACKET_SIZE	9600
-#define ETH_MIN_PACKET_SIZE		60
 int qede_change_mtu(struct net_device *ndev, int new_mtu)
 {
 	struct qede_dev *edev = netdev_priv(ndev);
-	union qede_reload_args args;
-
-	if ((new_mtu > ETH_MAX_JUMBO_PACKET_SIZE) ||
-	    ((new_mtu + ETH_HLEN) < ETH_MIN_PACKET_SIZE)) {
-		DP_ERR(edev, "Can't support requested MTU size\n");
-		return -EINVAL;
-	}
+	struct qede_reload_args args;
 
 	DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN),
 		   "Configuring MTU size of %d\n", new_mtu);
 
-	/* Set the mtu field and re-start the interface if needed*/
-	args.mtu = new_mtu;
-
-	if (netif_running(edev->ndev))
-		qede_reload(edev, &qede_update_mtu, &args);
+	/* Set the mtu field and re-start the interface if needed */
+	args.u.mtu = new_mtu;
+	args.func = &qede_update_mtu;
+	qede_reload(edev, &args, false);
 
-	qede_update_mtu(edev, &args);
+	edev->ops->common->update_mtu(edev->cdev, new_mtu);
 
 	return 0;
 }
@@ -836,8 +912,7 @@ static int qede_set_channels(struct net_device *dev,
 		       sizeof(edev->rss_params.rss_ind_table));
 	}
 
-	if (netif_running(dev))
-		qede_reload(edev, NULL, NULL);
+	qede_reload(edev, NULL, false);
 
 	return 0;
 }
@@ -1143,7 +1218,7 @@ static int qede_selftest_transmit_traffic(struct qede_dev *edev,
 
 	for_each_queue(i) {
 		if (edev->fp_array[i].type & QEDE_FASTPATH_TX) {
-			txq = edev->fp_array[i].txqs;
+			txq = edev->fp_array[i].txq;
 			break;
 		}
 	}
@@ -1155,7 +1230,7 @@ static int qede_selftest_transmit_traffic(struct qede_dev *edev,
 
 	/* Fill the entry in the SW ring and the BDs in the FW ring */
 	idx = txq->sw_tx_prod & NUM_TX_BDS_MAX;
-	txq->sw_tx_ring[idx].skb = skb;
+	txq->sw_tx_ring.skbs[idx].skb = skb;
 	first_bd = qed_chain_produce(&txq->tx_pbl);
 	memset(first_bd, 0, sizeof(*first_bd));
 	val = 1 << ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT;
@@ -1209,7 +1284,7 @@ static int qede_selftest_transmit_traffic(struct qede_dev *edev,
 	dma_unmap_single(&edev->pdev->dev, BD_UNMAP_ADDR(first_bd),
 			 BD_UNMAP_LEN(first_bd), DMA_TO_DEVICE);
 	txq->sw_tx_cons++;
-	txq->sw_tx_ring[idx].skb = NULL;
+	txq->sw_tx_ring.skbs[idx].skb = NULL;
 
 	return 0;
 }
@@ -1277,13 +1352,13 @@ static int qede_selftest_receive_traffic(struct qede_dev *edev)
 					break;
 				}
 
-			qede_recycle_rx_bd_ring(rxq, edev, 1);
+			qede_recycle_rx_bd_ring(rxq, 1);
 			qed_chain_recycle_consumed(&rxq->rx_comp_ring);
 			break;
 		}
 
 		DP_INFO(edev, "Not the transmitted packet\n");
-		qede_recycle_rx_bd_ring(rxq, edev, 1);
+		qede_recycle_rx_bd_ring(rxq, 1);
 		qed_chain_recycle_consumed(&rxq->rx_comp_ring);
 	}
 
@@ -1405,6 +1480,11 @@ static void qede_self_test(struct net_device *dev,
 		buf[QEDE_ETHTOOL_CLOCK_TEST] = 1;
 		etest->flags |= ETH_TEST_FL_FAILED;
 	}
+
+	if (edev->ops->common->selftest->selftest_nvram(edev->cdev)) {
+		buf[QEDE_ETHTOOL_NVRAM_TEST] = 1;
+		etest->flags |= ETH_TEST_FL_FAILED;
+	}
 }
 
 static int qede_set_tunable(struct net_device *dev,
@@ -1455,6 +1535,8 @@ static const struct ethtool_ops qede_ethtool_ops = {
 	.get_drvinfo = qede_get_drvinfo,
 	.get_regs_len = qede_get_regs_len,
 	.get_regs = qede_get_regs,
+	.get_wol = qede_get_wol,
+	.set_wol = qede_set_wol,
 	.get_msglevel = qede_get_msglevel,
 	.set_msglevel = qede_set_msglevel,
 	.nway_reset = qede_nway_reset,
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 85f46dbecd5b..aecdd1c5c0ea 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -94,11 +94,26 @@ static int qede_probe(struct pci_dev *pdev, const struct pci_device_id *id);
 
 #define TX_TIMEOUT		(5 * HZ)
 
+/* Utilize last protocol index for XDP */
+#define XDP_PI	11
+
 static void qede_remove(struct pci_dev *pdev);
-static int qede_alloc_rx_buffer(struct qede_dev *edev,
-				struct qede_rx_queue *rxq);
+static void qede_shutdown(struct pci_dev *pdev);
 static void qede_link_update(void *dev, struct qed_link_output *link);
 
+/* The qede lock is used to protect driver state change and driver flows that
+ * are not reentrant.
+ */
+void __qede_lock(struct qede_dev *edev)
+{
+	mutex_lock(&edev->qede_lock);
+}
+
+void __qede_unlock(struct qede_dev *edev)
+{
+	mutex_unlock(&edev->qede_lock);
+}
+
 #ifdef CONFIG_QED_SRIOV
 static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos,
 			    __be16 vlan_proto)
@@ -166,15 +181,20 @@ static struct pci_driver qede_pci_driver = {
 	.id_table = qede_pci_tbl,
 	.probe = qede_probe,
 	.remove = qede_remove,
+	.shutdown = qede_shutdown,
 #ifdef CONFIG_QED_SRIOV
 	.sriov_configure = qede_sriov_configure,
 #endif
 };
 
-static void qede_force_mac(void *dev, u8 *mac)
+static void qede_force_mac(void *dev, u8 *mac, bool forced)
 {
 	struct qede_dev *edev = dev;
 
+	/* MAC hints take effect only if we haven't set one already */
+	if (is_valid_ether_addr(edev->ndev->dev_addr) && !forced)
+		return;
+
 	ether_addr_copy(edev->ndev->dev_addr, mac);
 	ether_addr_copy(edev->primary_mac, mac);
 }
@@ -284,12 +304,12 @@ static int qede_free_tx_pkt(struct qede_dev *edev,
 			    struct qede_tx_queue *txq, int *len)
 {
 	u16 idx = txq->sw_tx_cons & NUM_TX_BDS_MAX;
-	struct sk_buff *skb = txq->sw_tx_ring[idx].skb;
+	struct sk_buff *skb = txq->sw_tx_ring.skbs[idx].skb;
 	struct eth_tx_1st_bd *first_bd;
 	struct eth_tx_bd *tx_data_bd;
 	int bds_consumed = 0;
 	int nbds;
-	bool data_split = txq->sw_tx_ring[idx].flags & QEDE_TSO_SPLIT_BD;
+	bool data_split = txq->sw_tx_ring.skbs[idx].flags & QEDE_TSO_SPLIT_BD;
 	int i, split_bd_len = 0;
 
 	if (unlikely(!skb)) {
@@ -329,20 +349,19 @@ static int qede_free_tx_pkt(struct qede_dev *edev,
 
 	/* Free skb */
 	dev_kfree_skb_any(skb);
-	txq->sw_tx_ring[idx].skb = NULL;
-	txq->sw_tx_ring[idx].flags = 0;
+	txq->sw_tx_ring.skbs[idx].skb = NULL;
+	txq->sw_tx_ring.skbs[idx].flags = 0;
 
 	return 0;
 }
 
 /* Unmap the data and free skb when mapping failed during start_xmit */
-static void qede_free_failed_tx_pkt(struct qede_dev *edev,
-				    struct qede_tx_queue *txq,
+static void qede_free_failed_tx_pkt(struct qede_tx_queue *txq,
 				    struct eth_tx_1st_bd *first_bd,
 				    int nbd, bool data_split)
 {
 	u16 idx = txq->sw_tx_prod & NUM_TX_BDS_MAX;
-	struct sk_buff *skb = txq->sw_tx_ring[idx].skb;
+	struct sk_buff *skb = txq->sw_tx_ring.skbs[idx].skb;
 	struct eth_tx_bd *tx_data_bd;
 	int i, split_bd_len = 0;
 
@@ -359,7 +378,7 @@ static void qede_free_failed_tx_pkt(struct qede_dev *edev,
 		nbd--;
 	}
 
-	dma_unmap_single(&edev->pdev->dev, BD_UNMAP_ADDR(first_bd),
+	dma_unmap_single(txq->dev, BD_UNMAP_ADDR(first_bd),
 			 BD_UNMAP_LEN(first_bd) + split_bd_len, DMA_TO_DEVICE);
 
 	/* Unmap the data of the skb frags */
@@ -367,7 +386,7 @@ static void qede_free_failed_tx_pkt(struct qede_dev *edev,
 		tx_data_bd = (struct eth_tx_bd *)
 			qed_chain_produce(&txq->tx_pbl);
 		if (tx_data_bd->nbytes)
-			dma_unmap_page(&edev->pdev->dev,
+			dma_unmap_page(txq->dev,
 				       BD_UNMAP_ADDR(tx_data_bd),
 				       BD_UNMAP_LEN(tx_data_bd), DMA_TO_DEVICE);
 	}
@@ -378,12 +397,11 @@ static void qede_free_failed_tx_pkt(struct qede_dev *edev,
 
 	/* Free skb */
 	dev_kfree_skb_any(skb);
-	txq->sw_tx_ring[idx].skb = NULL;
-	txq->sw_tx_ring[idx].flags = 0;
+	txq->sw_tx_ring.skbs[idx].skb = NULL;
+	txq->sw_tx_ring.skbs[idx].flags = 0;
 }
 
-static u32 qede_xmit_type(struct qede_dev *edev,
-			  struct sk_buff *skb, int *ipv6_ext)
+static u32 qede_xmit_type(struct sk_buff *skb, int *ipv6_ext)
 {
 	u32 rc = XMIT_L4_CSUM;
 	__be16 l3_proto;
@@ -396,8 +414,19 @@ static u32 qede_xmit_type(struct qede_dev *edev,
 	    (ipv6_hdr(skb)->nexthdr == NEXTHDR_IPV6))
 		*ipv6_ext = 1;
 
-	if (skb->encapsulation)
+	if (skb->encapsulation) {
 		rc |= XMIT_ENC;
+		if (skb_is_gso(skb)) {
+			unsigned short gso_type = skb_shinfo(skb)->gso_type;
+
+			if ((gso_type & SKB_GSO_UDP_TUNNEL_CSUM) ||
+			    (gso_type & SKB_GSO_GRE_CSUM))
+				rc |= XMIT_ENC_GSO_L4_CSUM;
+
+			rc |= XMIT_LSO;
+			return rc;
+		}
+	}
 
 	if (skb_is_gso(skb))
 		rc |= XMIT_LSO;
@@ -439,18 +468,16 @@ static void qede_set_params_for_ipv6_ext(struct sk_buff *skb,
 	second_bd->data.bitfields2 = cpu_to_le16(bd2_bits2);
 }
 
-static int map_frag_to_bd(struct qede_dev *edev,
+static int map_frag_to_bd(struct qede_tx_queue *txq,
 			  skb_frag_t *frag, struct eth_tx_bd *bd)
 {
 	dma_addr_t mapping;
 
 	/* Map skb non-linear frag data for DMA */
-	mapping = skb_frag_dma_map(&edev->pdev->dev, frag, 0,
+	mapping = skb_frag_dma_map(txq->dev, frag, 0,
 				   skb_frag_size(frag), DMA_TO_DEVICE);
-	if (unlikely(dma_mapping_error(&edev->pdev->dev, mapping))) {
-		DP_NOTICE(edev, "Unable to map frag - dropping packet\n");
+	if (unlikely(dma_mapping_error(txq->dev, mapping)))
 		return -ENOMEM;
-	}
 
 	/* Setup the data pointer of the frag data */
 	BD_SET_UNMAP_ADDR_LEN(bd, mapping, skb_frag_size(frag));
@@ -470,8 +497,7 @@ static u16 qede_get_skb_hlen(struct sk_buff *skb, bool is_encap_pkt)
 
 /* +2 for 1st BD for headers and 2nd BD for headlen (if required) */
 #if ((MAX_SKB_FRAGS + 2) > ETH_TX_MAX_BDS_PER_NON_LSO_PACKET)
-static bool qede_pkt_req_lin(struct qede_dev *edev, struct sk_buff *skb,
-			     u8 xmit_type)
+static bool qede_pkt_req_lin(struct sk_buff *skb, u8 xmit_type)
 {
 	int allowed_frags = ETH_TX_MAX_BDS_PER_NON_LSO_PACKET - 1;
 
@@ -507,6 +533,47 @@ static inline void qede_update_tx_producer(struct qede_tx_queue *txq)
 	mmiowb();
 }
 
+static int qede_xdp_xmit(struct qede_dev *edev, struct qede_fastpath *fp,
+			 struct sw_rx_data *metadata, u16 padding, u16 length)
+{
+	struct qede_tx_queue *txq = fp->xdp_tx;
+	u16 idx = txq->sw_tx_prod & NUM_TX_BDS_MAX;
+	struct eth_tx_1st_bd *first_bd;
+
+	if (!qed_chain_get_elem_left(&txq->tx_pbl)) {
+		txq->stopped_cnt++;
+		return -ENOMEM;
+	}
+
+	first_bd = (struct eth_tx_1st_bd *)qed_chain_produce(&txq->tx_pbl);
+
+	memset(first_bd, 0, sizeof(*first_bd));
+	first_bd->data.bd_flags.bitfields =
+	    BIT(ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT);
+	first_bd->data.bitfields |=
+	    (length & ETH_TX_DATA_1ST_BD_PKT_LEN_MASK) <<
+	    ETH_TX_DATA_1ST_BD_PKT_LEN_SHIFT;
+	first_bd->data.nbds = 1;
+
+	/* We can safely ignore the offset, as it's 0 for XDP */
+	BD_SET_UNMAP_ADDR_LEN(first_bd, metadata->mapping + padding, length);
+
+	/* Synchronize the buffer back to device, as program [probably]
+	 * has changed it.
+	 */
+	dma_sync_single_for_device(&edev->pdev->dev,
+				   metadata->mapping + padding,
+				   length, PCI_DMA_TODEVICE);
+
+	txq->sw_tx_ring.pages[idx] = metadata->data;
+	txq->sw_tx_prod++;
+
+	/* Mark the fastpath for future XDP doorbell */
+	fp->xdp_xmit = 1;
+
+	return 0;
+}
+
 /* Main transmit function */
 static netdev_tx_t qede_start_xmit(struct sk_buff *skb,
 				   struct net_device *ndev)
@@ -530,15 +597,15 @@ static netdev_tx_t qede_start_xmit(struct sk_buff *skb,
 	/* Get tx-queue context and netdev index */
 	txq_index = skb_get_queue_mapping(skb);
 	WARN_ON(txq_index >= QEDE_TSS_COUNT(edev));
-	txq = QEDE_TX_QUEUE(edev, txq_index);
+	txq = edev->fp_array[edev->fp_num_rx + txq_index].txq;
 	netdev_txq = netdev_get_tx_queue(ndev, txq_index);
 
 	WARN_ON(qed_chain_get_elem_left(&txq->tx_pbl) < (MAX_SKB_FRAGS + 1));
 
-	xmit_type = qede_xmit_type(edev, skb, &ipv6_ext);
+	xmit_type = qede_xmit_type(skb, &ipv6_ext);
 
 #if ((MAX_SKB_FRAGS + 2) > ETH_TX_MAX_BDS_PER_NON_LSO_PACKET)
-	if (qede_pkt_req_lin(edev, skb, xmit_type)) {
+	if (qede_pkt_req_lin(skb, xmit_type)) {
 		if (skb_linearize(skb)) {
 			DP_NOTICE(edev,
 				  "SKB linearization failed - silently dropping this SKB\n");
@@ -550,7 +617,7 @@ static netdev_tx_t qede_start_xmit(struct sk_buff *skb,
 
 	/* Fill the entry in the SW ring and the BDs in the FW ring */
 	idx = txq->sw_tx_prod & NUM_TX_BDS_MAX;
-	txq->sw_tx_ring[idx].skb = skb;
+	txq->sw_tx_ring.skbs[idx].skb = skb;
 	first_bd = (struct eth_tx_1st_bd *)
 		   qed_chain_produce(&txq->tx_pbl);
 	memset(first_bd, 0, sizeof(*first_bd));
@@ -558,11 +625,11 @@ static netdev_tx_t qede_start_xmit(struct sk_buff *skb,
 		1 << ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT;
 
 	/* Map skb linear data for DMA and set in the first BD */
-	mapping = dma_map_single(&edev->pdev->dev, skb->data,
+	mapping = dma_map_single(txq->dev, skb->data,
 				 skb_headlen(skb), DMA_TO_DEVICE);
-	if (unlikely(dma_mapping_error(&edev->pdev->dev, mapping))) {
+	if (unlikely(dma_mapping_error(txq->dev, mapping))) {
 		DP_NOTICE(edev, "SKB mapping failed\n");
-		qede_free_failed_tx_pkt(edev, txq, first_bd, 0, false);
+		qede_free_failed_tx_pkt(txq, first_bd, 0, false);
 		qede_update_tx_producer(txq);
 		return NETDEV_TX_OK;
 	}
@@ -633,6 +700,12 @@ static netdev_tx_t qede_start_xmit(struct sk_buff *skb,
 		if (unlikely(xmit_type & XMIT_ENC)) {
 			first_bd->data.bd_flags.bitfields |=
 				1 << ETH_TX_1ST_BD_FLAGS_TUNN_IP_CSUM_SHIFT;
+
+			if (xmit_type & XMIT_ENC_GSO_L4_CSUM) {
+				u8 tmp = ETH_TX_1ST_BD_FLAGS_TUNN_L4_CSUM_SHIFT;
+
+				first_bd->data.bd_flags.bitfields |= 1 << tmp;
+			}
 			hlen = qede_get_skb_hlen(skb, true);
 		} else {
 			first_bd->data.bd_flags.bitfields |=
@@ -664,7 +737,7 @@ static netdev_tx_t qede_start_xmit(struct sk_buff *skb,
 			/* this marks the BD as one that has no
 			 * individual mapping
 			 */
-			txq->sw_tx_ring[idx].flags |= QEDE_TSO_SPLIT_BD;
+			txq->sw_tx_ring.skbs[idx].flags |= QEDE_TSO_SPLIT_BD;
 
 			first_bd->nbytes = cpu_to_le16(hlen);
 
@@ -680,12 +753,11 @@ static netdev_tx_t qede_start_xmit(struct sk_buff *skb,
 	/* Handle fragmented skb */
 	/* special handle for frags inside 2nd and 3rd bds.. */
 	while (tx_data_bd && frag_idx < skb_shinfo(skb)->nr_frags) {
-		rc = map_frag_to_bd(edev,
+		rc = map_frag_to_bd(txq,
 				    &skb_shinfo(skb)->frags[frag_idx],
 				    tx_data_bd);
 		if (rc) {
-			qede_free_failed_tx_pkt(edev, txq, first_bd, nbd,
-						data_split);
+			qede_free_failed_tx_pkt(txq, first_bd, nbd, data_split);
 			qede_update_tx_producer(txq);
 			return NETDEV_TX_OK;
 		}
@@ -705,12 +777,11 @@ static netdev_tx_t qede_start_xmit(struct sk_buff *skb,
 
 		memset(tx_data_bd, 0, sizeof(*tx_data_bd));
 
-		rc = map_frag_to_bd(edev,
+		rc = map_frag_to_bd(txq,
 				    &skb_shinfo(skb)->frags[frag_idx],
 				    tx_data_bd);
 		if (rc) {
-			qede_free_failed_tx_pkt(edev, txq, first_bd, nbd,
-						data_split);
+			qede_free_failed_tx_pkt(txq, first_bd, nbd, data_split);
 			qede_update_tx_producer(txq);
 			return NETDEV_TX_OK;
 		}
@@ -775,6 +846,27 @@ int qede_txq_has_work(struct qede_tx_queue *txq)
 	return hw_bd_cons != qed_chain_get_cons_idx(&txq->tx_pbl);
 }
 
+static void qede_xdp_tx_int(struct qede_dev *edev, struct qede_tx_queue *txq)
+{
+	struct eth_tx_1st_bd *bd;
+	u16 hw_bd_cons;
+
+	hw_bd_cons = le16_to_cpu(*txq->hw_cons_ptr);
+	barrier();
+
+	while (hw_bd_cons != qed_chain_get_cons_idx(&txq->tx_pbl)) {
+		bd = (struct eth_tx_1st_bd *)qed_chain_consume(&txq->tx_pbl);
+
+		dma_unmap_single(&edev->pdev->dev, BD_UNMAP_ADDR(bd),
+				 PAGE_SIZE, DMA_BIDIRECTIONAL);
+		__free_page(txq->sw_tx_ring.pages[txq->sw_tx_cons &
+						  NUM_TX_BDS_MAX]);
+
+		txq->sw_tx_cons++;
+		txq->xmit_pkts++;
+	}
+}
+
 static int qede_tx_int(struct qede_dev *edev, struct qede_tx_queue *txq)
 {
 	struct netdev_queue *netdev_txq;
@@ -858,16 +950,6 @@ bool qede_has_rx_work(struct qede_rx_queue *rxq)
 	return hw_comp_cons != sw_comp_cons;
 }
 
-static bool qede_has_tx_work(struct qede_fastpath *fp)
-{
-	u8 tc;
-
-	for (tc = 0; tc < fp->edev->num_tc; tc++)
-		if (qede_txq_has_work(&fp->txqs[tc]))
-			return true;
-	return false;
-}
-
 static inline void qede_rx_bd_ring_consume(struct qede_rx_queue *rxq)
 {
 	qed_chain_consume(&rxq->rx_bd_ring);
@@ -877,8 +959,7 @@ static inline void qede_rx_bd_ring_consume(struct qede_rx_queue *rxq)
 /* This function reuses the buffer(from an offset) from
  * consumer index to producer index in the bd ring
  */
-static inline void qede_reuse_page(struct qede_dev *edev,
-				   struct qede_rx_queue *rxq,
+static inline void qede_reuse_page(struct qede_rx_queue *rxq,
 				   struct sw_rx_data *curr_cons)
 {
 	struct eth_rx_bd *rx_bd_prod = qed_chain_produce(&rxq->rx_bd_ring);
@@ -900,27 +981,62 @@ static inline void qede_reuse_page(struct qede_dev *edev,
 /* In case of allocation failures reuse buffers
  * from consumer index to produce buffers for firmware
  */
-void qede_recycle_rx_bd_ring(struct qede_rx_queue *rxq,
-			     struct qede_dev *edev, u8 count)
+void qede_recycle_rx_bd_ring(struct qede_rx_queue *rxq, u8 count)
 {
 	struct sw_rx_data *curr_cons;
 
 	for (; count > 0; count--) {
 		curr_cons = &rxq->sw_rx_ring[rxq->sw_rx_cons & NUM_RX_BDS_MAX];
-		qede_reuse_page(edev, rxq, curr_cons);
+		qede_reuse_page(rxq, curr_cons);
 		qede_rx_bd_ring_consume(rxq);
 	}
 }
 
-static inline int qede_realloc_rx_buffer(struct qede_dev *edev,
-					 struct qede_rx_queue *rxq,
+static int qede_alloc_rx_buffer(struct qede_rx_queue *rxq)
+{
+	struct sw_rx_data *sw_rx_data;
+	struct eth_rx_bd *rx_bd;
+	dma_addr_t mapping;
+	struct page *data;
+
+	data = alloc_pages(GFP_ATOMIC, 0);
+	if (unlikely(!data))
+		return -ENOMEM;
+
+	/* Map the entire page as it would be used
+	 * for multiple RX buffer segment size mapping.
+	 */
+	mapping = dma_map_page(rxq->dev, data, 0,
+			       PAGE_SIZE, rxq->data_direction);
+	if (unlikely(dma_mapping_error(rxq->dev, mapping))) {
+		__free_page(data);
+		return -ENOMEM;
+	}
+
+	sw_rx_data = &rxq->sw_rx_ring[rxq->sw_rx_prod & NUM_RX_BDS_MAX];
+	sw_rx_data->page_offset = 0;
+	sw_rx_data->data = data;
+	sw_rx_data->mapping = mapping;
+
+	/* Advance PROD and get BD pointer */
+	rx_bd = (struct eth_rx_bd *)qed_chain_produce(&rxq->rx_bd_ring);
+	WARN_ON(!rx_bd);
+	rx_bd->addr.hi = cpu_to_le32(upper_32_bits(mapping));
+	rx_bd->addr.lo = cpu_to_le32(lower_32_bits(mapping));
+
+	rxq->sw_rx_prod++;
+
+	return 0;
+}
+
+static inline int qede_realloc_rx_buffer(struct qede_rx_queue *rxq,
 					 struct sw_rx_data *curr_cons)
 {
 	/* Move to the next segment in the page */
 	curr_cons->page_offset += rxq->rx_buf_seg_size;
 
 	if (curr_cons->page_offset == PAGE_SIZE) {
-		if (unlikely(qede_alloc_rx_buffer(edev, rxq))) {
+		if (unlikely(qede_alloc_rx_buffer(rxq))) {
 			/* Since we failed to allocate new buffer
 			 * current buffer can be used again.
 			 */
@@ -929,15 +1045,15 @@ static inline int qede_realloc_rx_buffer(struct qede_dev *edev,
 			return -ENOMEM;
 		}
 
-		dma_unmap_page(&edev->pdev->dev, curr_cons->mapping,
-			       PAGE_SIZE, DMA_FROM_DEVICE);
+		dma_unmap_page(rxq->dev, curr_cons->mapping,
+			       PAGE_SIZE, rxq->data_direction);
 	} else {
 		/* Increment refcount of the page as we don't want
 		 * network stack to take the ownership of the page
 		 * which can be recycled multiple times by the driver.
 		 */
 		page_ref_inc(curr_cons->data);
-		qede_reuse_page(edev, rxq, curr_cons);
+		qede_reuse_page(rxq, curr_cons);
 	}
 
 	return 0;
@@ -971,22 +1087,20 @@ void qede_update_rx_prod(struct qede_dev *edev, struct qede_rx_queue *rxq)
 	mmiowb();
 }
 
-static u32 qede_get_rxhash(struct qede_dev *edev,
-			   u8 bitfields,
-			   __le32 rss_hash, enum pkt_hash_types *rxhash_type)
+static void qede_get_rxhash(struct sk_buff *skb, u8 bitfields, __le32 rss_hash)
 {
+	enum pkt_hash_types hash_type = PKT_HASH_TYPE_NONE;
 	enum rss_hash_type htype;
+	u32 hash = 0;
 
 	htype = GET_FIELD(bitfields, ETH_FAST_PATH_RX_REG_CQE_RSS_HASH_TYPE);
-
-	if ((edev->ndev->features & NETIF_F_RXHASH) && htype) {
-		*rxhash_type = ((htype == RSS_HASH_TYPE_IPV4) ||
-				(htype == RSS_HASH_TYPE_IPV6)) ?
-				PKT_HASH_TYPE_L3 : PKT_HASH_TYPE_L4;
-		return le32_to_cpu(rss_hash);
+	if (htype) {
+		hash_type = ((htype == RSS_HASH_TYPE_IPV4) ||
+			     (htype == RSS_HASH_TYPE_IPV6)) ?
+			    PKT_HASH_TYPE_L3 : PKT_HASH_TYPE_L4;
+		hash = le32_to_cpu(rss_hash);
 	}
-	*rxhash_type = PKT_HASH_TYPE_NONE;
-	return 0;
+	skb_set_hash(skb, hash, hash_type);
 }
 
 static void qede_set_skb_csum(struct sk_buff *skb, u8 csum_flag)
@@ -1002,12 +1116,14 @@ static void qede_set_skb_csum(struct sk_buff *skb, u8 csum_flag)
 
 static inline void qede_skb_receive(struct qede_dev *edev,
 				    struct qede_fastpath *fp,
+				    struct qede_rx_queue *rxq,
 				    struct sk_buff *skb, u16 vlan_tag)
 {
 	if (vlan_tag)
 		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
 
 	napi_gro_receive(&fp->napi, skb);
+	fp->rxq->rcv_pkts++;
 }
 
 static void qede_set_gro_params(struct qede_dev *edev,
@@ -1035,7 +1151,7 @@ static int qede_fill_frag_skb(struct qede_dev *edev,
 	struct qede_agg_info *tpa_info = &rxq->tpa_info[tpa_agg_index];
 	struct sk_buff *skb = tpa_info->skb;
 
-	if (unlikely(tpa_info->agg_state != QEDE_AGG_STATE_START))
+	if (unlikely(tpa_info->state != QEDE_AGG_STATE_START))
 		goto out;
 
 	/* Add one frag and update the appropriate fields in the skb */
@@ -1043,7 +1159,7 @@ static int qede_fill_frag_skb(struct qede_dev *edev,
 			   current_bd->data, current_bd->page_offset,
 			   len_on_bd);
 
-	if (unlikely(qede_realloc_rx_buffer(edev, rxq, current_bd))) {
+	if (unlikely(qede_realloc_rx_buffer(rxq, current_bd))) {
 		/* Incr page ref count to reuse on allocation failure
 		 * so that it doesn't get freed while freeing SKB.
 		 */
@@ -1061,8 +1177,9 @@ static int qede_fill_frag_skb(struct qede_dev *edev,
 	return 0;
 
 out:
-	tpa_info->agg_state = QEDE_AGG_STATE_ERROR;
-	qede_recycle_rx_bd_ring(rxq, edev, 1);
+	tpa_info->state = QEDE_AGG_STATE_ERROR;
+	qede_recycle_rx_bd_ring(rxq, 1);
+
 	return -ENOMEM;
 }
 
@@ -1073,12 +1190,10 @@ static void qede_tpa_start(struct qede_dev *edev,
 	struct qede_agg_info *tpa_info = &rxq->tpa_info[cqe->tpa_agg_index];
 	struct eth_rx_bd *rx_bd_cons = qed_chain_consume(&rxq->rx_bd_ring);
 	struct eth_rx_bd *rx_bd_prod = qed_chain_produce(&rxq->rx_bd_ring);
-	struct sw_rx_data *replace_buf = &tpa_info->replace_buf;
-	dma_addr_t mapping = tpa_info->replace_buf_mapping;
+	struct sw_rx_data *replace_buf = &tpa_info->buffer;
+	dma_addr_t mapping = tpa_info->buffer_mapping;
 	struct sw_rx_data *sw_rx_data_cons;
 	struct sw_rx_data *sw_rx_data_prod;
-	enum pkt_hash_types rxhash_type;
-	u32 rxhash;
 
 	sw_rx_data_cons = &rxq->sw_rx_ring[rxq->sw_rx_cons & NUM_RX_BDS_MAX];
 	sw_rx_data_prod = &rxq->sw_rx_ring[rxq->sw_rx_prod & NUM_RX_BDS_MAX];
@@ -1099,11 +1214,11 @@ static void qede_tpa_start(struct qede_dev *edev,
 	/* move partial skb from cons to pool (don't unmap yet)
 	 * save mapping, incase we drop the packet later on.
 	 */
-	tpa_info->start_buf = *sw_rx_data_cons;
+	tpa_info->buffer = *sw_rx_data_cons;
 	mapping = HILO_U64(le32_to_cpu(rx_bd_cons->addr.hi),
 			   le32_to_cpu(rx_bd_cons->addr.lo));
 
-	tpa_info->start_buf_mapping = mapping;
+	tpa_info->buffer_mapping = mapping;
 	rxq->sw_rx_cons++;
 
 	/* set tpa state to start only if we are able to allocate skb
@@ -1114,27 +1229,27 @@ static void qede_tpa_start(struct qede_dev *edev,
 					 le16_to_cpu(cqe->len_on_first_bd));
 	if (unlikely(!tpa_info->skb)) {
 		DP_NOTICE(edev, "Failed to allocate SKB for gro\n");
-		tpa_info->agg_state = QEDE_AGG_STATE_ERROR;
+		tpa_info->state = QEDE_AGG_STATE_ERROR;
 		goto cons_buf;
 	}
 
-	skb_put(tpa_info->skb, le16_to_cpu(cqe->len_on_first_bd));
-	memcpy(&tpa_info->start_cqe, cqe, sizeof(tpa_info->start_cqe));
-
 	/* Start filling in the aggregation info */
+	skb_put(tpa_info->skb, le16_to_cpu(cqe->len_on_first_bd));
 	tpa_info->frag_id = 0;
-	tpa_info->agg_state = QEDE_AGG_STATE_START;
+	tpa_info->state = QEDE_AGG_STATE_START;
 
-	rxhash = qede_get_rxhash(edev, cqe->bitfields,
-				 cqe->rss_hash, &rxhash_type);
-	skb_set_hash(tpa_info->skb, rxhash, rxhash_type);
+	/* Store some information from first CQE */
+	tpa_info->start_cqe_placement_offset = cqe->placement_offset;
+	tpa_info->start_cqe_bd_len = le16_to_cpu(cqe->len_on_first_bd);
 	if ((le16_to_cpu(cqe->pars_flags.flags) >>
 	     PARSING_AND_ERR_FLAGS_TAG8021QEXIST_SHIFT) &
-		    PARSING_AND_ERR_FLAGS_TAG8021QEXIST_MASK)
+	    PARSING_AND_ERR_FLAGS_TAG8021QEXIST_MASK)
 		tpa_info->vlan_tag = le16_to_cpu(cqe->vlan_tag);
 	else
 		tpa_info->vlan_tag = 0;
 
+	qede_get_rxhash(tpa_info->skb, cqe->bitfields, cqe->rss_hash);
+
 	/* This is needed in order to enable forwarding support */
 	qede_set_gro_params(edev, tpa_info->skb, cqe);
 
@@ -1146,7 +1261,7 @@ cons_buf: /* We still need to handle bd_len_list to consume buffers */
 	if (unlikely(cqe->ext_bd_len_list[1])) {
 		DP_ERR(edev,
 		       "Unlikely - got a TPA aggregation with more than one ext_bd_len_list entry in the TPA start\n");
-		tpa_info->agg_state = QEDE_AGG_STATE_ERROR;
+		tpa_info->state = QEDE_AGG_STATE_ERROR;
 	}
 }
 
@@ -1197,7 +1312,7 @@ static void qede_gro_receive(struct qede_dev *edev,
 
 #ifdef CONFIG_INET
 	if (skb_shinfo(skb)->gso_size) {
-		skb_set_network_header(skb, 0);
+		skb_reset_network_header(skb);
 
 		switch (skb->protocol) {
 		case htons(ETH_P_IP):
@@ -1216,7 +1331,7 @@ static void qede_gro_receive(struct qede_dev *edev,
 
 send_skb:
 	skb_record_rx_queue(skb, fp->rxq->rxq_id);
-	qede_skb_receive(edev, fp, skb, vlan_tag);
+	qede_skb_receive(edev, fp, fp->rxq, skb, vlan_tag);
 }
 
 static inline void qede_tpa_cont(struct qede_dev *edev,
@@ -1253,7 +1368,7 @@ static void qede_tpa_end(struct qede_dev *edev,
 		DP_ERR(edev,
 		       "Strange - TPA emd with more than a single len_list entry\n");
 
-	if (unlikely(tpa_info->agg_state != QEDE_AGG_STATE_START))
+	if (unlikely(tpa_info->state != QEDE_AGG_STATE_START))
 		goto err;
 
 	/* Sanity */
@@ -1267,14 +1382,9 @@ static void qede_tpa_end(struct qede_dev *edev,
 		       le16_to_cpu(cqe->total_packet_len), skb->len);
 
 	memcpy(skb->data,
-	       page_address(tpa_info->start_buf.data) +
-		tpa_info->start_cqe.placement_offset +
-		tpa_info->start_buf.page_offset,
-	       le16_to_cpu(tpa_info->start_cqe.len_on_first_bd));
-
-	/* Recycle [mapped] start buffer for the next replacement */
-	tpa_info->replace_buf = tpa_info->start_buf;
-	tpa_info->replace_buf_mapping = tpa_info->start_buf_mapping;
+	       page_address(tpa_info->buffer.data) +
+	       tpa_info->start_cqe_placement_offset +
+	       tpa_info->buffer.page_offset, tpa_info->start_cqe_bd_len);
 
 	/* Finalize the SKB */
 	skb->protocol = eth_type_trans(skb, edev->ndev);
@@ -1287,18 +1397,11 @@ static void qede_tpa_end(struct qede_dev *edev,
 
 	qede_gro_receive(edev, fp, skb, tpa_info->vlan_tag);
 
-	tpa_info->agg_state = QEDE_AGG_STATE_NONE;
+	tpa_info->state = QEDE_AGG_STATE_NONE;
 
 	return;
 err:
-	/* The BD starting the aggregation is still mapped; Re-use it for
-	 * future aggregations [as replacement buffer]
-	 */
-	memcpy(&tpa_info->replace_buf, &tpa_info->start_buf,
-	       sizeof(struct sw_rx_data));
-	tpa_info->replace_buf_mapping = tpa_info->start_buf_mapping;
-	tpa_info->start_buf.data = NULL;
-	tpa_info->agg_state = QEDE_AGG_STATE_NONE;
+	tpa_info->state = QEDE_AGG_STATE_NONE;
 	dev_kfree_skb_any(tpa_info->skb);
 	tpa_info->skb = NULL;
 }
@@ -1380,238 +1483,364 @@ static bool qede_pkt_is_ip_fragmented(struct eth_fast_path_rx_reg_cqe *cqe,
 	return false;
 }
 
-static int qede_rx_int(struct qede_fastpath *fp, int budget)
+/* Return true iff packet is to be passed to stack */
+static bool qede_rx_xdp(struct qede_dev *edev,
+			struct qede_fastpath *fp,
+			struct qede_rx_queue *rxq,
+			struct bpf_prog *prog,
+			struct sw_rx_data *bd,
+			struct eth_fast_path_rx_reg_cqe *cqe)
 {
-	struct qede_dev *edev = fp->edev;
-	struct qede_rx_queue *rxq = fp->rxq;
-
-	u16 hw_comp_cons, sw_comp_cons, sw_rx_index, parse_flag;
-	int rx_pkt = 0;
-	u8 csum_flag;
+	u16 len = le16_to_cpu(cqe->len_on_first_bd);
+	struct xdp_buff xdp;
+	enum xdp_action act;
 
-	hw_comp_cons = le16_to_cpu(*rxq->hw_cons_ptr);
-	sw_comp_cons = qed_chain_get_cons_idx(&rxq->rx_comp_ring);
+	xdp.data = page_address(bd->data) + cqe->placement_offset;
+	xdp.data_end = xdp.data + len;
 
-	/* Memory barrier to prevent the CPU from doing speculative reads of CQE
-	 * / BD in the while-loop before reading hw_comp_cons. If the CQE is
-	 * read before it is written by FW, then FW writes CQE and SB, and then
-	 * the CPU reads the hw_comp_cons, it will use an old CQE.
+	/* Queues always have a full reset currently, so for the time
+	 * being until there's atomic program replace just mark read
+	 * side for map helpers.
 	 */
-	rmb();
+	rcu_read_lock();
+	act = bpf_prog_run_xdp(prog, &xdp);
+	rcu_read_unlock();
 
-	/* Loop to complete all indicated BDs */
-	while (sw_comp_cons != hw_comp_cons) {
-		struct eth_fast_path_rx_reg_cqe *fp_cqe;
-		enum pkt_hash_types rxhash_type;
-		enum eth_rx_cqe_type cqe_type;
-		struct sw_rx_data *sw_rx_data;
-		union eth_rx_cqe *cqe;
-		struct sk_buff *skb;
-		struct page *data;
-		__le16 flags;
-		u16 len, pad;
-		u32 rx_hash;
-
-		/* Get the CQE from the completion ring */
-		cqe = (union eth_rx_cqe *)
-			qed_chain_consume(&rxq->rx_comp_ring);
-		cqe_type = cqe->fast_path_regular.type;
-
-		if (unlikely(cqe_type == ETH_RX_CQE_TYPE_SLOW_PATH)) {
-			edev->ops->eth_cqe_completion(
-					edev->cdev, fp->id,
-					(struct eth_slow_path_rx_cqe *)cqe);
-			goto next_cqe;
+	if (act == XDP_PASS)
+		return true;
+
+	/* Count number of packets not to be passed to stack */
+	rxq->xdp_no_pass++;
+
+	switch (act) {
+	case XDP_TX:
+		/* We need the replacement buffer before transmit. */
+		if (qede_alloc_rx_buffer(rxq)) {
+			qede_recycle_rx_bd_ring(rxq, 1);
+			return false;
 		}
 
-		if (cqe_type != ETH_RX_CQE_TYPE_REGULAR) {
-			switch (cqe_type) {
-			case ETH_RX_CQE_TYPE_TPA_START:
-				qede_tpa_start(edev, rxq,
-					       &cqe->fast_path_tpa_start);
-				goto next_cqe;
-			case ETH_RX_CQE_TYPE_TPA_CONT:
-				qede_tpa_cont(edev, rxq,
-					      &cqe->fast_path_tpa_cont);
-				goto next_cqe;
-			case ETH_RX_CQE_TYPE_TPA_END:
-				qede_tpa_end(edev, fp,
-					     &cqe->fast_path_tpa_end);
-				goto next_rx_only;
-			default:
-				break;
-			}
+		/* Now if there's a transmission problem, we'd still have to
+		 * throw current buffer, as replacement was already allocated.
+		 */
+		if (qede_xdp_xmit(edev, fp, bd, cqe->placement_offset, len)) {
+			dma_unmap_page(rxq->dev, bd->mapping,
+				       PAGE_SIZE, DMA_BIDIRECTIONAL);
+			__free_page(bd->data);
 		}
 
-		/* Get the data from the SW ring */
-		sw_rx_index = rxq->sw_rx_cons & NUM_RX_BDS_MAX;
-		sw_rx_data = &rxq->sw_rx_ring[sw_rx_index];
-		data = sw_rx_data->data;
-
-		fp_cqe = &cqe->fast_path_regular;
-		len =  le16_to_cpu(fp_cqe->len_on_first_bd);
-		pad = fp_cqe->placement_offset;
-		flags = cqe->fast_path_regular.pars_flags.flags;
-
-		/* If this is an error packet then drop it */
-		parse_flag = le16_to_cpu(flags);
-
-		csum_flag = qede_check_csum(parse_flag);
-		if (unlikely(csum_flag == QEDE_CSUM_ERROR)) {
-			if (qede_pkt_is_ip_fragmented(&cqe->fast_path_regular,
-						      parse_flag)) {
-				rxq->rx_ip_frags++;
-				goto alloc_skb;
-			}
+		/* Regardless, we've consumed an Rx BD */
+		qede_rx_bd_ring_consume(rxq);
+		return false;
 
-			DP_NOTICE(edev,
-				  "CQE in CONS = %u has error, flags = %x, dropping incoming packet\n",
-				  sw_comp_cons, parse_flag);
-			rxq->rx_hw_errors++;
-			qede_recycle_rx_bd_ring(rxq, edev, fp_cqe->bd_num);
-			goto next_cqe;
-		}
+	default:
+		bpf_warn_invalid_xdp_action(act);
+	case XDP_ABORTED:
+	case XDP_DROP:
+		qede_recycle_rx_bd_ring(rxq, cqe->bd_num);
+	}
 
-alloc_skb:
-		skb = netdev_alloc_skb(edev->ndev, QEDE_RX_HDR_SIZE);
-		if (unlikely(!skb)) {
-			DP_NOTICE(edev,
-				  "skb allocation failed, dropping incoming packet\n");
-			qede_recycle_rx_bd_ring(rxq, edev, fp_cqe->bd_num);
-			rxq->rx_alloc_errors++;
-			goto next_cqe;
+	return false;
+}
+
+static struct sk_buff *qede_rx_allocate_skb(struct qede_dev *edev,
+					    struct qede_rx_queue *rxq,
+					    struct sw_rx_data *bd, u16 len,
+					    u16 pad)
+{
+	unsigned int offset = bd->page_offset;
+	struct skb_frag_struct *frag;
+	struct page *page = bd->data;
+	unsigned int pull_len;
+	struct sk_buff *skb;
+	unsigned char *va;
+
+	/* Allocate a new SKB with a sufficient large header len */
+	skb = netdev_alloc_skb(edev->ndev, QEDE_RX_HDR_SIZE);
+	if (unlikely(!skb))
+		return NULL;
+
+	/* Copy data into SKB - if it's small, we can simply copy it and
+	 * re-use the already allcoated & mapped memory.
+	 */
+	if (len + pad <= edev->rx_copybreak) {
+		memcpy(skb_put(skb, len),
+		       page_address(page) + pad + offset, len);
+		qede_reuse_page(rxq, bd);
+		goto out;
+	}
+
+	frag = &skb_shinfo(skb)->frags[0];
+
+	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
+			page, pad + offset, len, rxq->rx_buf_seg_size);
+
+	va = skb_frag_address(frag);
+	pull_len = eth_get_headlen(va, QEDE_RX_HDR_SIZE);
+
+	/* Align the pull_len to optimize memcpy */
+	memcpy(skb->data, va, ALIGN(pull_len, sizeof(long)));
+
+	/* Correct the skb & frag sizes offset after the pull */
+	skb_frag_size_sub(frag, pull_len);
+	frag->page_offset += pull_len;
+	skb->data_len -= pull_len;
+	skb->tail += pull_len;
+
+	if (unlikely(qede_realloc_rx_buffer(rxq, bd))) {
+		/* Incr page ref count to reuse on allocation failure so
+		 * that it doesn't get freed while freeing SKB [as its
+		 * already mapped there].
+		 */
+		page_ref_inc(page);
+		dev_kfree_skb_any(skb);
+		return NULL;
+	}
+
+out:
+	/* We've consumed the first BD and prepared an SKB */
+	qede_rx_bd_ring_consume(rxq);
+	return skb;
+}
+
+static int qede_rx_build_jumbo(struct qede_dev *edev,
+			       struct qede_rx_queue *rxq,
+			       struct sk_buff *skb,
+			       struct eth_fast_path_rx_reg_cqe *cqe,
+			       u16 first_bd_len)
+{
+	u16 pkt_len = le16_to_cpu(cqe->pkt_len);
+	struct sw_rx_data *bd;
+	u16 bd_cons_idx;
+	u8 num_frags;
+
+	pkt_len -= first_bd_len;
+
+	/* We've already used one BD for the SKB. Now take care of the rest */
+	for (num_frags = cqe->bd_num - 1; num_frags > 0; num_frags--) {
+		u16 cur_size = pkt_len > rxq->rx_buf_size ? rxq->rx_buf_size :
+		    pkt_len;
+
+		if (unlikely(!cur_size)) {
+			DP_ERR(edev,
+			       "Still got %d BDs for mapping jumbo, but length became 0\n",
+			       num_frags);
+			goto out;
 		}
 
-		/* Copy data into SKB */
-		if (len + pad <= edev->rx_copybreak) {
-			memcpy(skb_put(skb, len),
-			       page_address(data) + pad +
-				sw_rx_data->page_offset, len);
-			qede_reuse_page(edev, rxq, sw_rx_data);
+		/* We need a replacement buffer for each BD */
+		if (unlikely(qede_alloc_rx_buffer(rxq)))
+			goto out;
+
+		/* Now that we've allocated the replacement buffer,
+		 * we can safely consume the next BD and map it to the SKB.
+		 */
+		bd_cons_idx = rxq->sw_rx_cons & NUM_RX_BDS_MAX;
+		bd = &rxq->sw_rx_ring[bd_cons_idx];
+		qede_rx_bd_ring_consume(rxq);
+
+		dma_unmap_page(rxq->dev, bd->mapping,
+			       PAGE_SIZE, DMA_FROM_DEVICE);
+
+		skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
+				   bd->data, 0, cur_size);
+
+		skb->truesize += PAGE_SIZE;
+		skb->data_len += cur_size;
+		skb->len += cur_size;
+		pkt_len -= cur_size;
+	}
+
+	if (unlikely(pkt_len))
+		DP_ERR(edev,
+		       "Mapped all BDs of jumbo, but still have %d bytes\n",
+		       pkt_len);
+
+out:
+	return num_frags;
+}
+
+static int qede_rx_process_tpa_cqe(struct qede_dev *edev,
+				   struct qede_fastpath *fp,
+				   struct qede_rx_queue *rxq,
+				   union eth_rx_cqe *cqe,
+				   enum eth_rx_cqe_type type)
+{
+	switch (type) {
+	case ETH_RX_CQE_TYPE_TPA_START:
+		qede_tpa_start(edev, rxq, &cqe->fast_path_tpa_start);
+		return 0;
+	case ETH_RX_CQE_TYPE_TPA_CONT:
+		qede_tpa_cont(edev, rxq, &cqe->fast_path_tpa_cont);
+		return 0;
+	case ETH_RX_CQE_TYPE_TPA_END:
+		qede_tpa_end(edev, fp, &cqe->fast_path_tpa_end);
+		return 1;
+	default:
+		return 0;
+	}
+}
+
+static int qede_rx_process_cqe(struct qede_dev *edev,
+			       struct qede_fastpath *fp,
+			       struct qede_rx_queue *rxq)
+{
+	struct bpf_prog *xdp_prog = READ_ONCE(rxq->xdp_prog);
+	struct eth_fast_path_rx_reg_cqe *fp_cqe;
+	u16 len, pad, bd_cons_idx, parse_flag;
+	enum eth_rx_cqe_type cqe_type;
+	union eth_rx_cqe *cqe;
+	struct sw_rx_data *bd;
+	struct sk_buff *skb;
+	__le16 flags;
+	u8 csum_flag;
+
+	/* Get the CQE from the completion ring */
+	cqe = (union eth_rx_cqe *)qed_chain_consume(&rxq->rx_comp_ring);
+	cqe_type = cqe->fast_path_regular.type;
+
+	/* Process an unlikely slowpath event */
+	if (unlikely(cqe_type == ETH_RX_CQE_TYPE_SLOW_PATH)) {
+		struct eth_slow_path_rx_cqe *sp_cqe;
+
+		sp_cqe = (struct eth_slow_path_rx_cqe *)cqe;
+		edev->ops->eth_cqe_completion(edev->cdev, fp->id, sp_cqe);
+		return 0;
+	}
+
+	/* Handle TPA cqes */
+	if (cqe_type != ETH_RX_CQE_TYPE_REGULAR)
+		return qede_rx_process_tpa_cqe(edev, fp, rxq, cqe, cqe_type);
+
+	/* Get the data from the SW ring; Consume it only after it's evident
+	 * we wouldn't recycle it.
+	 */
+	bd_cons_idx = rxq->sw_rx_cons & NUM_RX_BDS_MAX;
+	bd = &rxq->sw_rx_ring[bd_cons_idx];
+
+	fp_cqe = &cqe->fast_path_regular;
+	len = le16_to_cpu(fp_cqe->len_on_first_bd);
+	pad = fp_cqe->placement_offset;
+
+	/* Run eBPF program if one is attached */
+	if (xdp_prog)
+		if (!qede_rx_xdp(edev, fp, rxq, xdp_prog, bd, fp_cqe))
+			return 1;
+
+	/* If this is an error packet then drop it */
+	flags = cqe->fast_path_regular.pars_flags.flags;
+	parse_flag = le16_to_cpu(flags);
+
+	csum_flag = qede_check_csum(parse_flag);
+	if (unlikely(csum_flag == QEDE_CSUM_ERROR)) {
+		if (qede_pkt_is_ip_fragmented(fp_cqe, parse_flag)) {
+			rxq->rx_ip_frags++;
 		} else {
-			struct skb_frag_struct *frag;
-			unsigned int pull_len;
-			unsigned char *va;
-
-			frag = &skb_shinfo(skb)->frags[0];
-
-			skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, data,
-					pad + sw_rx_data->page_offset,
-					len, rxq->rx_buf_seg_size);
-
-			va = skb_frag_address(frag);
-			pull_len = eth_get_headlen(va, QEDE_RX_HDR_SIZE);
-
-			/* Align the pull_len to optimize memcpy */
-			memcpy(skb->data, va, ALIGN(pull_len, sizeof(long)));
-
-			skb_frag_size_sub(frag, pull_len);
-			frag->page_offset += pull_len;
-			skb->data_len -= pull_len;
-			skb->tail += pull_len;
-
-			if (unlikely(qede_realloc_rx_buffer(edev, rxq,
-							    sw_rx_data))) {
-				DP_ERR(edev, "Failed to allocate rx buffer\n");
-				/* Incr page ref count to reuse on allocation
-				 * failure so that it doesn't get freed while
-				 * freeing SKB.
-				 */
-
-				page_ref_inc(sw_rx_data->data);
-				rxq->rx_alloc_errors++;
-				qede_recycle_rx_bd_ring(rxq, edev,
-							fp_cqe->bd_num);
-				dev_kfree_skb_any(skb);
-				goto next_cqe;
-			}
+			DP_NOTICE(edev,
+				  "CQE has error, flags = %x, dropping incoming packet\n",
+				  parse_flag);
+			rxq->rx_hw_errors++;
+			qede_recycle_rx_bd_ring(rxq, fp_cqe->bd_num);
+			return 0;
 		}
+	}
 
-		qede_rx_bd_ring_consume(rxq);
+	/* Basic validation passed; Need to prepare an SKB. This would also
+	 * guarantee to finally consume the first BD upon success.
+	 */
+	skb = qede_rx_allocate_skb(edev, rxq, bd, len, pad);
+	if (!skb) {
+		rxq->rx_alloc_errors++;
+		qede_recycle_rx_bd_ring(rxq, fp_cqe->bd_num);
+		return 0;
+	}
 
-		if (fp_cqe->bd_num != 1) {
-			u16 pkt_len = le16_to_cpu(fp_cqe->pkt_len);
-			u8 num_frags;
-
-			pkt_len -= len;
-
-			for (num_frags = fp_cqe->bd_num - 1; num_frags > 0;
-			     num_frags--) {
-				u16 cur_size = pkt_len > rxq->rx_buf_size ?
-						rxq->rx_buf_size : pkt_len;
-				if (unlikely(!cur_size)) {
-					DP_ERR(edev,
-					       "Still got %d BDs for mapping jumbo, but length became 0\n",
-					       num_frags);
-					qede_recycle_rx_bd_ring(rxq, edev,
-								num_frags);
-					dev_kfree_skb_any(skb);
-					goto next_cqe;
-				}
-
-				if (unlikely(qede_alloc_rx_buffer(edev, rxq))) {
-					qede_recycle_rx_bd_ring(rxq, edev,
-								num_frags);
-					dev_kfree_skb_any(skb);
-					goto next_cqe;
-				}
-
-				sw_rx_index = rxq->sw_rx_cons & NUM_RX_BDS_MAX;
-				sw_rx_data = &rxq->sw_rx_ring[sw_rx_index];
-				qede_rx_bd_ring_consume(rxq);
-
-				dma_unmap_page(&edev->pdev->dev,
-					       sw_rx_data->mapping,
-					       PAGE_SIZE, DMA_FROM_DEVICE);
-
-				skb_fill_page_desc(skb,
-						   skb_shinfo(skb)->nr_frags++,
-						   sw_rx_data->data, 0,
-						   cur_size);
-
-				skb->truesize += PAGE_SIZE;
-				skb->data_len += cur_size;
-				skb->len += cur_size;
-				pkt_len -= cur_size;
-			}
+	/* In case of Jumbo packet, several PAGE_SIZEd buffers will be pointed
+	 * by a single cqe.
+	 */
+	if (fp_cqe->bd_num > 1) {
+		u16 unmapped_frags = qede_rx_build_jumbo(edev, rxq, skb,
+							 fp_cqe, len);
 
-			if (unlikely(pkt_len))
-				DP_ERR(edev,
-				       "Mapped all BDs of jumbo, but still have %d bytes\n",
-				       pkt_len);
+		if (unlikely(unmapped_frags > 0)) {
+			qede_recycle_rx_bd_ring(rxq, unmapped_frags);
+			dev_kfree_skb_any(skb);
+			return 0;
 		}
+	}
 
-		skb->protocol = eth_type_trans(skb, edev->ndev);
+	/* The SKB contains all the data. Now prepare meta-magic */
+	skb->protocol = eth_type_trans(skb, edev->ndev);
+	qede_get_rxhash(skb, fp_cqe->bitfields, fp_cqe->rss_hash);
+	qede_set_skb_csum(skb, csum_flag);
+	skb_record_rx_queue(skb, rxq->rxq_id);
 
-		rx_hash = qede_get_rxhash(edev, fp_cqe->bitfields,
-					  fp_cqe->rss_hash, &rxhash_type);
+	/* SKB is prepared - pass it to stack */
+	qede_skb_receive(edev, fp, rxq, skb, le16_to_cpu(fp_cqe->vlan_tag));
 
-		skb_set_hash(skb, rx_hash, rxhash_type);
+	return 1;
+}
 
-		qede_set_skb_csum(skb, csum_flag);
+static int qede_rx_int(struct qede_fastpath *fp, int budget)
+{
+	struct qede_rx_queue *rxq = fp->rxq;
+	struct qede_dev *edev = fp->edev;
+	u16 hw_comp_cons, sw_comp_cons;
+	int work_done = 0;
 
-		skb_record_rx_queue(skb, fp->rxq->rxq_id);
+	hw_comp_cons = le16_to_cpu(*rxq->hw_cons_ptr);
+	sw_comp_cons = qed_chain_get_cons_idx(&rxq->rx_comp_ring);
 
-		qede_skb_receive(edev, fp, skb, le16_to_cpu(fp_cqe->vlan_tag));
-next_rx_only:
-		rx_pkt++;
+	/* Memory barrier to prevent the CPU from doing speculative reads of CQE
+	 * / BD in the while-loop before reading hw_comp_cons. If the CQE is
+	 * read before it is written by FW, then FW writes CQE and SB, and then
+	 * the CPU reads the hw_comp_cons, it will use an old CQE.
+	 */
+	rmb();
 
-next_cqe: /* don't consume bd rx buffer */
+	/* Loop to complete all indicated BDs */
+	while ((sw_comp_cons != hw_comp_cons) && (work_done < budget)) {
+		qede_rx_process_cqe(edev, fp, rxq);
 		qed_chain_recycle_consumed(&rxq->rx_comp_ring);
 		sw_comp_cons = qed_chain_get_cons_idx(&rxq->rx_comp_ring);
-		/* CR TPA - revisit how to handle budget in TPA perhaps
-		 * increase on "end"
-		 */
-		if (rx_pkt == budget)
-			break;
-	} /* repeat while sw_comp_cons != hw_comp_cons... */
+		work_done++;
+	}
 
 	/* Update producers */
 	qede_update_rx_prod(edev, rxq);
 
-	rxq->rcv_pkts += rx_pkt;
+	return work_done;
+}
+
+static bool qede_poll_is_more_work(struct qede_fastpath *fp)
+{
+	qed_sb_update_sb_idx(fp->sb_info);
+
+	/* *_has_*_work() reads the status block, thus we need to ensure that
+	 * status block indices have been actually read (qed_sb_update_sb_idx)
+	 * prior to this check (*_has_*_work) so that we won't write the
+	 * "newer" value of the status block to HW (if there was a DMA right
+	 * after qede_has_rx_work and if there is no rmb, the memory reading
+	 * (qed_sb_update_sb_idx) may be postponed to right before *_ack_sb).
+	 * In this case there will never be another interrupt until there is
+	 * another update of the status block, while there is still unhandled
+	 * work.
+	 */
+	rmb();
+
+	if (likely(fp->type & QEDE_FASTPATH_RX))
+		if (qede_has_rx_work(fp->rxq))
+			return true;
 
-	return rx_pkt;
+	if (fp->type & QEDE_FASTPATH_XDP)
+		if (qede_txq_has_work(fp->xdp_tx))
+			return true;
+
+	if (likely(fp->type & QEDE_FASTPATH_TX))
+		if (qede_txq_has_work(fp->txq))
+			return true;
+
+	return false;
 }
 
 static int qede_poll(struct napi_struct *napi, int budget)
@@ -1620,48 +1849,35 @@ static int qede_poll(struct napi_struct *napi, int budget)
 						napi);
 	struct qede_dev *edev = fp->edev;
 	int rx_work_done = 0;
-	u8 tc;
 
-	for (tc = 0; tc < edev->num_tc; tc++)
-		if (likely(fp->type & QEDE_FASTPATH_TX) &&
-		    qede_txq_has_work(&fp->txqs[tc]))
-			qede_tx_int(edev, &fp->txqs[tc]);
+	if (likely(fp->type & QEDE_FASTPATH_TX) && qede_txq_has_work(fp->txq))
+		qede_tx_int(edev, fp->txq);
+
+	if ((fp->type & QEDE_FASTPATH_XDP) && qede_txq_has_work(fp->xdp_tx))
+		qede_xdp_tx_int(edev, fp->xdp_tx);
 
 	rx_work_done = (likely(fp->type & QEDE_FASTPATH_RX) &&
 			qede_has_rx_work(fp->rxq)) ?
 			qede_rx_int(fp, budget) : 0;
 	if (rx_work_done < budget) {
-		qed_sb_update_sb_idx(fp->sb_info);
-		/* *_has_*_work() reads the status block,
-		 * thus we need to ensure that status block indices
-		 * have been actually read (qed_sb_update_sb_idx)
-		 * prior to this check (*_has_*_work) so that
-		 * we won't write the "newer" value of the status block
-		 * to HW (if there was a DMA right after
-		 * qede_has_rx_work and if there is no rmb, the memory
-		 * reading (qed_sb_update_sb_idx) may be postponed
-		 * to right before *_ack_sb). In this case there
-		 * will never be another interrupt until there is
-		 * another update of the status block, while there
-		 * is still unhandled work.
-		 */
-		rmb();
-
-		/* Fall out from the NAPI loop if needed */
-		if (!((likely(fp->type & QEDE_FASTPATH_RX) &&
-		       qede_has_rx_work(fp->rxq)) ||
-		      (likely(fp->type & QEDE_FASTPATH_TX) &&
-		       qede_has_tx_work(fp)))) {
+		if (!qede_poll_is_more_work(fp)) {
 			napi_complete(napi);
 
 			/* Update and reenable interrupts */
-			qed_sb_ack(fp->sb_info, IGU_INT_ENABLE,
-				   1 /*update*/);
+			qed_sb_ack(fp->sb_info, IGU_INT_ENABLE, 1);
 		} else {
 			rx_work_done = budget;
 		}
 	}
 
+	if (fp->xdp_xmit) {
+		u16 xdp_prod = qed_chain_get_prod_idx(&fp->xdp_tx->tx_pbl);
+
+		fp->xdp_xmit = 0;
+		fp->xdp_tx->tx_db.data.bd_prod = cpu_to_le16(xdp_prod);
+		qede_update_tx_producer(fp->xdp_tx);
+	}
+
 	return rx_work_done;
 }
 
@@ -1912,7 +2128,7 @@ static int qede_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
 {
 	struct qede_dev *edev = netdev_priv(dev);
 	struct qede_vlan *vlan, *tmp;
-	int rc;
+	int rc = 0;
 
 	DP_VERBOSE(edev, NETIF_MSG_IFUP, "Adding vlan 0x%04x\n", vid);
 
@@ -1936,6 +2152,7 @@ static int qede_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
 	}
 
 	/* If interface is down, cache this VLAN ID and return */
+	__qede_lock(edev);
 	if (edev->state != QEDE_STATE_OPEN) {
 		DP_VERBOSE(edev, NETIF_MSG_IFDOWN,
 			   "Interface is down, VLAN %d will be configured when interface is up\n",
@@ -1943,8 +2160,7 @@ static int qede_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
 		if (vid != 0)
 			edev->non_configured_vlans++;
 		list_add(&vlan->list, &edev->vlan_list);
-
-		return 0;
+		goto out;
 	}
 
 	/* Check for the filter limit.
@@ -1960,7 +2176,7 @@ static int qede_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
 			DP_ERR(edev, "Failed to configure VLAN %d\n",
 			       vlan->vid);
 			kfree(vlan);
-			return -EINVAL;
+			goto out;
 		}
 		vlan->configured = true;
 
@@ -1977,7 +2193,9 @@ static int qede_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
 
 	list_add(&vlan->list, &edev->vlan_list);
 
-	return 0;
+out:
+	__qede_unlock(edev);
+	return rc;
 }
 
 static void qede_del_vlan_from_list(struct qede_dev *edev,
@@ -2054,11 +2272,12 @@ static int qede_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
 {
 	struct qede_dev *edev = netdev_priv(dev);
 	struct qede_vlan *vlan = NULL;
-	int rc;
+	int rc = 0;
 
 	DP_VERBOSE(edev, NETIF_MSG_IFDOWN, "Removing vlan 0x%04x\n", vid);
 
 	/* Find whether entry exists */
+	__qede_lock(edev);
 	list_for_each_entry(vlan, &edev->vlan_list, list)
 		if (vlan->vid == vid)
 			break;
@@ -2066,7 +2285,7 @@ static int qede_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
 	if (!vlan || (vlan->vid != vid)) {
 		DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN),
 			   "Vlan isn't configured\n");
-		return 0;
+		goto out;
 	}
 
 	if (edev->state != QEDE_STATE_OPEN) {
@@ -2076,7 +2295,7 @@ static int qede_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
 		DP_VERBOSE(edev, NETIF_MSG_IFDOWN,
 			   "Interface is down, removing VLAN from list only\n");
 		qede_del_vlan_from_list(edev, vlan);
-		return 0;
+		goto out;
 	}
 
 	/* Remove vlan */
@@ -2085,7 +2304,7 @@ static int qede_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
 					    vid);
 		if (rc) {
 			DP_ERR(edev, "Failed to remove VLAN %d\n", vid);
-			return -EINVAL;
+			goto out;
 		}
 	}
 
@@ -2096,6 +2315,8 @@ static int qede_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid)
 	 */
 	rc = qede_configure_vlan_filters(edev);
 
+out:
+	__qede_unlock(edev);
 	return rc;
 }
 
@@ -2125,7 +2346,13 @@ static void qede_vlan_mark_nonconfigured(struct qede_dev *edev)
 	edev->accept_any_vlan = false;
 }
 
-static int qede_set_features(struct net_device *dev, netdev_features_t features)
+static void qede_set_features_reload(struct qede_dev *edev,
+				     struct qede_reload_args *args)
+{
+	edev->ndev->features = args->u.features;
+}
+
+int qede_set_features(struct net_device *dev, netdev_features_t features)
 {
 	struct qede_dev *edev = netdev_priv(dev);
 	netdev_features_t changes = features ^ dev->features;
@@ -2139,9 +2366,23 @@ static int qede_set_features(struct net_device *dev, netdev_features_t features)
 			need_reload = edev->gro_disable;
 	}
 
-	if (need_reload && netif_running(edev->ndev)) {
-		dev->features = features;
-		qede_reload(edev, NULL, NULL);
+	if (need_reload) {
+		struct qede_reload_args args;
+
+		args.u.features = features;
+		args.func = &qede_set_features_reload;
+
+		/* Make sure that we definitely need to reload.
+		 * In case of an eBPF attached program, there will be no FW
+		 * aggregations, so no need to actually reload.
+		 */
+		__qede_lock(edev);
+		if (edev->xdp_prog)
+			args.func(edev, &args);
+		else
+			qede_reload(edev, &args, true);
+		__qede_unlock(edev);
+
 		return 1;
 	}
 
@@ -2218,6 +2459,82 @@ static void qede_udp_tunnel_del(struct net_device *dev,
 	schedule_delayed_work(&edev->sp_task, 0);
 }
 
+/* 8B udp header + 8B base tunnel header + 32B option length */
+#define QEDE_MAX_TUN_HDR_LEN 48
+
+static netdev_features_t qede_features_check(struct sk_buff *skb,
+					     struct net_device *dev,
+					     netdev_features_t features)
+{
+	if (skb->encapsulation) {
+		u8 l4_proto = 0;
+
+		switch (vlan_get_protocol(skb)) {
+		case htons(ETH_P_IP):
+			l4_proto = ip_hdr(skb)->protocol;
+			break;
+		case htons(ETH_P_IPV6):
+			l4_proto = ipv6_hdr(skb)->nexthdr;
+			break;
+		default:
+			return features;
+		}
+
+		/* Disable offloads for geneve tunnels, as HW can't parse
+		 * the geneve header which has option length greater than 32B.
+		 */
+		if ((l4_proto == IPPROTO_UDP) &&
+		    ((skb_inner_mac_header(skb) -
+		      skb_transport_header(skb)) > QEDE_MAX_TUN_HDR_LEN))
+			return features & ~(NETIF_F_CSUM_MASK |
+					    NETIF_F_GSO_MASK);
+	}
+
+	return features;
+}
+
+static void qede_xdp_reload_func(struct qede_dev *edev,
+				 struct qede_reload_args *args)
+{
+	struct bpf_prog *old;
+
+	old = xchg(&edev->xdp_prog, args->u.new_prog);
+	if (old)
+		bpf_prog_put(old);
+}
+
+static int qede_xdp_set(struct qede_dev *edev, struct bpf_prog *prog)
+{
+	struct qede_reload_args args;
+
+	if (prog && prog->xdp_adjust_head) {
+		DP_ERR(edev, "Does not support bpf_xdp_adjust_head()\n");
+		return -EOPNOTSUPP;
+	}
+
+	/* If we're called, there was already a bpf reference increment */
+	args.func = &qede_xdp_reload_func;
+	args.u.new_prog = prog;
+	qede_reload(edev, &args, false);
+
+	return 0;
+}
+
+static int qede_xdp(struct net_device *dev, struct netdev_xdp *xdp)
+{
+	struct qede_dev *edev = netdev_priv(dev);
+
+	switch (xdp->command) {
+	case XDP_SETUP_PROG:
+		return qede_xdp_set(edev, xdp->prog);
+	case XDP_QUERY_PROG:
+		xdp->prog_attached = !!edev->xdp_prog;
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
 static const struct net_device_ops qede_netdev_ops = {
 	.ndo_open = qede_open,
 	.ndo_stop = qede_close,
@@ -2242,6 +2559,8 @@ static const struct net_device_ops qede_netdev_ops = {
 #endif
 	.ndo_udp_tunnel_add = qede_udp_tunnel_add,
 	.ndo_udp_tunnel_del = qede_udp_tunnel_del,
+	.ndo_features_check = qede_features_check,
+	.ndo_xdp = qede_xdp,
 };
 
 /* -------------------------------------------------------------------------
@@ -2282,8 +2601,6 @@ static struct qede_dev *qede_alloc_etherdev(struct qed_dev *cdev,
 	memset(&edev->stats, 0, sizeof(edev->stats));
 	memcpy(&edev->dev_info, info, sizeof(*info));
 
-	edev->num_tc = edev->dev_info.num_tc;
-
 	INIT_LIST_HEAD(&edev->vlan_list);
 
 	return edev;
@@ -2308,6 +2625,8 @@ static void qede_init_ndev(struct qede_dev *edev)
 
 	qede_set_ethtool_ops(ndev);
 
+	ndev->priv_flags |= IFF_UNICAST_FLT;
+
 	/* user-changeble features */
 	hw_features = NETIF_F_GRO | NETIF_F_SG |
 		      NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
@@ -2315,11 +2634,14 @@ static void qede_init_ndev(struct qede_dev *edev)
 
 	/* Encap features*/
 	hw_features |= NETIF_F_GSO_GRE | NETIF_F_GSO_UDP_TUNNEL |
-		       NETIF_F_TSO_ECN;
+		       NETIF_F_TSO_ECN | NETIF_F_GSO_UDP_TUNNEL_CSUM |
+		       NETIF_F_GSO_GRE_CSUM;
 	ndev->hw_enc_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
 				NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO_ECN |
 				NETIF_F_TSO6 | NETIF_F_GSO_GRE |
-				NETIF_F_GSO_UDP_TUNNEL | NETIF_F_RXCSUM;
+				NETIF_F_GSO_UDP_TUNNEL | NETIF_F_RXCSUM |
+				NETIF_F_GSO_UDP_TUNNEL_CSUM |
+				NETIF_F_GSO_GRE_CSUM;
 
 	ndev->vlan_features = hw_features | NETIF_F_RXHASH | NETIF_F_RXCSUM |
 			      NETIF_F_HIGHDMA;
@@ -2329,8 +2651,14 @@ static void qede_init_ndev(struct qede_dev *edev)
 
 	ndev->hw_features = hw_features;
 
+	/* MTU range: 46 - 9600 */
+	ndev->min_mtu = ETH_ZLEN - ETH_HLEN;
+	ndev->max_mtu = QEDE_MAX_JUMBO_PACKET_SIZE;
+
 	/* Set network device HW mac */
 	ether_addr_copy(edev->ndev->dev_addr, edev->dev_info.common.hw_mac);
+
+	ndev->mtu = edev->dev_info.common.mtu;
 }
 
 /* This function converts from 32b param to two params of level and module
@@ -2370,7 +2698,8 @@ static void qede_free_fp_array(struct qede_dev *edev)
 
 			kfree(fp->sb_info);
 			kfree(fp->rxq);
-			kfree(fp->txqs);
+			kfree(fp->xdp_tx);
+			kfree(fp->txq);
 		}
 		kfree(edev->fp_array);
 	}
@@ -2403,7 +2732,7 @@ static int qede_alloc_fp_array(struct qede_dev *edev)
 	for_each_queue(i) {
 		fp = &edev->fp_array[i];
 
-		fp->sb_info = kcalloc(1, sizeof(*fp->sb_info), GFP_KERNEL);
+		fp->sb_info = kzalloc(sizeof(*fp->sb_info), GFP_KERNEL);
 		if (!fp->sb_info) {
 			DP_NOTICE(edev, "sb info struct allocation failed\n");
 			goto err;
@@ -2420,21 +2749,22 @@ static int qede_alloc_fp_array(struct qede_dev *edev)
 		}
 
 		if (fp->type & QEDE_FASTPATH_TX) {
-			fp->txqs = kcalloc(edev->num_tc, sizeof(*fp->txqs),
-					   GFP_KERNEL);
-			if (!fp->txqs) {
-				DP_NOTICE(edev,
-					  "TXQ array allocation failed\n");
+			fp->txq = kzalloc(sizeof(*fp->txq), GFP_KERNEL);
+			if (!fp->txq)
 				goto err;
-			}
 		}
 
 		if (fp->type & QEDE_FASTPATH_RX) {
-			fp->rxq = kcalloc(1, sizeof(*fp->rxq), GFP_KERNEL);
-			if (!fp->rxq) {
-				DP_NOTICE(edev,
-					  "RXQ struct allocation failed\n");
+			fp->rxq = kzalloc(sizeof(*fp->rxq), GFP_KERNEL);
+			if (!fp->rxq)
 				goto err;
+
+			if (edev->xdp_prog) {
+				fp->xdp_tx = kzalloc(sizeof(*fp->xdp_tx),
+						     GFP_KERNEL);
+				if (!fp->xdp_tx)
+					goto err;
+				fp->type |= QEDE_FASTPATH_XDP;
 			}
 		}
 	}
@@ -2451,12 +2781,11 @@ static void qede_sp_task(struct work_struct *work)
 					     sp_task.work);
 	struct qed_dev *cdev = edev->cdev;
 
-	mutex_lock(&edev->qede_lock);
+	__qede_lock(edev);
 
-	if (edev->state == QEDE_STATE_OPEN) {
-		if (test_and_clear_bit(QEDE_SP_RX_MODE, &edev->sp_flags))
+	if (test_and_clear_bit(QEDE_SP_RX_MODE, &edev->sp_flags))
+		if (edev->state == QEDE_STATE_OPEN)
 			qede_config_rx_mode(edev->ndev);
-	}
 
 	if (test_and_clear_bit(QEDE_SP_VXLAN_PORT_CONFIG, &edev->sp_flags)) {
 		struct qed_tunn_params tunn_params;
@@ -2476,16 +2805,16 @@ static void qede_sp_task(struct work_struct *work)
 		qed_ops->tunn_config(cdev, &tunn_params);
 	}
 
-	mutex_unlock(&edev->qede_lock);
+	__qede_unlock(edev);
 }
 
 static void qede_update_pf_params(struct qed_dev *cdev)
 {
 	struct qed_pf_params pf_params;
 
-	/* 64 rx + 64 tx */
+	/* 64 rx + 64 tx + 64 XDP */
 	memset(&pf_params, 0, sizeof(struct qed_pf_params));
-	pf_params.eth_pf_params.num_cons = 128;
+	pf_params.eth_pf_params.num_cons = 192;
 	qed_ops->common->update_pf_params(cdev, &pf_params);
 }
 
@@ -2634,10 +2963,16 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode)
 
 	pci_set_drvdata(pdev, NULL);
 
+	/* Release edev's reference to XDP's bpf if such exist */
+	if (edev->xdp_prog)
+		bpf_prog_put(edev->xdp_prog);
+
 	free_netdev(ndev);
 
 	/* Use global ops since we've freed edev */
 	qed_ops->common->slowpath_stop(cdev);
+	if (system_state == SYSTEM_POWER_OFF)
+		return;
 	qed_ops->common->remove(cdev);
 
 	dev_info(&pdev->dev, "Ending qede_remove successfully\n");
@@ -2648,6 +2983,11 @@ static void qede_remove(struct pci_dev *pdev)
 	__qede_remove(pdev, QEDE_REMOVE_NORMAL);
 }
 
+static void qede_shutdown(struct pci_dev *pdev)
+{
+	__qede_remove(pdev, QEDE_REMOVE_NORMAL);
+}
+
 /* -------------------------------------------------------------------------
  * START OF LOAD / UNLOAD
  * -------------------------------------------------------------------------
@@ -2731,7 +3071,7 @@ static void qede_free_rx_buffers(struct qede_dev *edev,
 		data = rx_buf->data;
 
 		dma_unmap_page(&edev->pdev->dev,
-			       rx_buf->mapping, PAGE_SIZE, DMA_FROM_DEVICE);
+			       rx_buf->mapping, PAGE_SIZE, rxq->data_direction);
 
 		rx_buf->data = NULL;
 		__free_page(data);
@@ -2747,7 +3087,7 @@ static void qede_free_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq)
 
 	for (i = 0; i < ETH_TPA_MAX_AGGS_NUM; i++) {
 		struct qede_agg_info *tpa_info = &rxq->tpa_info[i];
-		struct sw_rx_data *replace_buf = &tpa_info->replace_buf;
+		struct sw_rx_data *replace_buf = &tpa_info->buffer;
 
 		if (replace_buf->data) {
 			dma_unmap_page(&edev->pdev->dev,
@@ -2773,52 +3113,15 @@ static void qede_free_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq)
 	edev->ops->common->chain_free(edev->cdev, &rxq->rx_comp_ring);
 }
 
-static int qede_alloc_rx_buffer(struct qede_dev *edev,
-				struct qede_rx_queue *rxq)
-{
-	struct sw_rx_data *sw_rx_data;
-	struct eth_rx_bd *rx_bd;
-	dma_addr_t mapping;
-	struct page *data;
-
-	data = alloc_pages(GFP_ATOMIC, 0);
-	if (unlikely(!data)) {
-		DP_NOTICE(edev, "Failed to allocate Rx data [page]\n");
-		return -ENOMEM;
-	}
-
-	/* Map the entire page as it would be used
-	 * for multiple RX buffer segment size mapping.
-	 */
-	mapping = dma_map_page(&edev->pdev->dev, data, 0,
-			       PAGE_SIZE, DMA_FROM_DEVICE);
-	if (unlikely(dma_mapping_error(&edev->pdev->dev, mapping))) {
-		__free_page(data);
-		DP_NOTICE(edev, "Failed to map Rx buffer\n");
-		return -ENOMEM;
-	}
-
-	sw_rx_data = &rxq->sw_rx_ring[rxq->sw_rx_prod & NUM_RX_BDS_MAX];
-	sw_rx_data->page_offset = 0;
-	sw_rx_data->data = data;
-	sw_rx_data->mapping = mapping;
-
-	/* Advance PROD and get BD pointer */
-	rx_bd = (struct eth_rx_bd *)qed_chain_produce(&rxq->rx_bd_ring);
-	WARN_ON(!rx_bd);
-	rx_bd->addr.hi = cpu_to_le32(upper_32_bits(mapping));
-	rx_bd->addr.lo = cpu_to_le32(lower_32_bits(mapping));
-
-	rxq->sw_rx_prod++;
-
-	return 0;
-}
-
 static int qede_alloc_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq)
 {
 	dma_addr_t mapping;
 	int i;
 
+	/* Don't perform FW aggregations in case of XDP */
+	if (edev->xdp_prog)
+		edev->gro_disable = 1;
+
 	if (edev->gro_disable)
 		return 0;
 
@@ -2829,7 +3132,7 @@ static int qede_alloc_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq)
 
 	for (i = 0; i < ETH_TPA_MAX_AGGS_NUM; i++) {
 		struct qede_agg_info *tpa_info = &rxq->tpa_info[i];
-		struct sw_rx_data *replace_buf = &tpa_info->replace_buf;
+		struct sw_rx_data *replace_buf = &tpa_info->buffer;
 
 		replace_buf->data = alloc_pages(GFP_ATOMIC, 0);
 		if (unlikely(!replace_buf->data)) {
@@ -2847,10 +3150,9 @@ static int qede_alloc_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq)
 		}
 
 		replace_buf->mapping = mapping;
-		tpa_info->replace_buf.page_offset = 0;
-
-		tpa_info->replace_buf_mapping = mapping;
-		tpa_info->agg_state = QEDE_AGG_STATE_NONE;
+		tpa_info->buffer.page_offset = 0;
+		tpa_info->buffer_mapping = mapping;
+		tpa_info->state = QEDE_AGG_STATE_NONE;
 	}
 
 	return 0;
@@ -2872,8 +3174,13 @@ static int qede_alloc_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq)
 	if (rxq->rx_buf_size > PAGE_SIZE)
 		rxq->rx_buf_size = PAGE_SIZE;
 
-	/* Segment size to spilt a page in multiple equal parts */
-	rxq->rx_buf_seg_size = roundup_pow_of_two(rxq->rx_buf_size);
+	/* Segment size to spilt a page in multiple equal parts,
+	 * unless XDP is used in which case we'd use the entire page.
+	 */
+	if (!edev->xdp_prog)
+		rxq->rx_buf_seg_size = roundup_pow_of_two(rxq->rx_buf_size);
+	else
+		rxq->rx_buf_seg_size = PAGE_SIZE;
 
 	/* Allocate the parallel driver ring for Rx buffers */
 	size = sizeof(*rxq->sw_rx_ring) * RX_RING_SIZE;
@@ -2909,7 +3216,7 @@ static int qede_alloc_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq)
 
 	/* Allocate buffers for the Rx ring */
 	for (i = 0; i < rxq->num_rx_buffers; i++) {
-		rc = qede_alloc_rx_buffer(edev, rxq);
+		rc = qede_alloc_rx_buffer(rxq);
 		if (rc) {
 			DP_ERR(edev,
 			       "Rx buffers allocation failed at index %d\n", i);
@@ -2925,7 +3232,10 @@ err:
 static void qede_free_mem_txq(struct qede_dev *edev, struct qede_tx_queue *txq)
 {
 	/* Free the parallel SW ring */
-	kfree(txq->sw_tx_ring);
+	if (txq->is_xdp)
+		kfree(txq->sw_tx_ring.pages);
+	else
+		kfree(txq->sw_tx_ring.skbs);
 
 	/* Free the real RQ ring used by FW */
 	edev->ops->common->chain_free(edev->cdev, &txq->tx_pbl);
@@ -2934,17 +3244,22 @@ static void qede_free_mem_txq(struct qede_dev *edev, struct qede_tx_queue *txq)
 /* This function allocates all memory needed per Tx queue */
 static int qede_alloc_mem_txq(struct qede_dev *edev, struct qede_tx_queue *txq)
 {
-	int size, rc;
 	union eth_tx_bd_types *p_virt;
+	int size, rc;
 
 	txq->num_tx_buffers = edev->q_num_tx_buffers;
 
 	/* Allocate the parallel driver ring for Tx buffers */
-	size = sizeof(*txq->sw_tx_ring) * TX_RING_SIZE;
-	txq->sw_tx_ring = kzalloc(size, GFP_KERNEL);
-	if (!txq->sw_tx_ring) {
-		DP_NOTICE(edev, "Tx buffers ring allocation failed\n");
-		goto err;
+	if (txq->is_xdp) {
+		size = sizeof(*txq->sw_tx_ring.pages) * TX_RING_SIZE;
+		txq->sw_tx_ring.pages = kzalloc(size, GFP_KERNEL);
+		if (!txq->sw_tx_ring.pages)
+			goto err;
+	} else {
+		size = sizeof(*txq->sw_tx_ring.skbs) * TX_RING_SIZE;
+		txq->sw_tx_ring.skbs = kzalloc(size, GFP_KERNEL);
+		if (!txq->sw_tx_ring.skbs)
+			goto err;
 	}
 
 	rc = edev->ops->common->chain_alloc(edev->cdev,
@@ -2966,16 +3281,13 @@ err:
 /* This function frees all memory of a single fp */
 static void qede_free_mem_fp(struct qede_dev *edev, struct qede_fastpath *fp)
 {
-	int tc;
-
 	qede_free_mem_sb(edev, fp->sb_info);
 
 	if (fp->type & QEDE_FASTPATH_RX)
 		qede_free_mem_rxq(edev, fp->rxq);
 
 	if (fp->type & QEDE_FASTPATH_TX)
-		for (tc = 0; tc < edev->num_tc; tc++)
-			qede_free_mem_txq(edev, &fp->txqs[tc]);
+		qede_free_mem_txq(edev, fp->txq);
 }
 
 /* This function allocates all memory needed for a single fp (i.e. an entity
@@ -2983,28 +3295,31 @@ static void qede_free_mem_fp(struct qede_dev *edev, struct qede_fastpath *fp)
  */
 static int qede_alloc_mem_fp(struct qede_dev *edev, struct qede_fastpath *fp)
 {
-	int rc, tc;
+	int rc = 0;
 
 	rc = qede_alloc_mem_sb(edev, fp->sb_info, fp->id);
 	if (rc)
-		goto err;
+		goto out;
 
 	if (fp->type & QEDE_FASTPATH_RX) {
 		rc = qede_alloc_mem_rxq(edev, fp->rxq);
 		if (rc)
-			goto err;
+			goto out;
+	}
+
+	if (fp->type & QEDE_FASTPATH_XDP) {
+		rc = qede_alloc_mem_txq(edev, fp->xdp_tx);
+		if (rc)
+			goto out;
 	}
 
 	if (fp->type & QEDE_FASTPATH_TX) {
-		for (tc = 0; tc < edev->num_tc; tc++) {
-			rc = qede_alloc_mem_txq(edev, &fp->txqs[tc]);
-			if (rc)
-				goto err;
-		}
+		rc = qede_alloc_mem_txq(edev, fp->txq);
+		if (rc)
+			goto out;
 	}
 
-	return 0;
-err:
+out:
 	return rc;
 }
 
@@ -3043,7 +3358,7 @@ static int qede_alloc_mem_load(struct qede_dev *edev)
 /* This function inits fp content and resets the SB, RXQ and TXQ structures */
 static void qede_init_fp(struct qede_dev *edev)
 {
-	int queue_id, rxq_index = 0, txq_index = 0, tc;
+	int queue_id, rxq_index = 0, txq_index = 0;
 	struct qede_fastpath *fp;
 
 	for_each_queue(queue_id) {
@@ -3052,25 +3367,28 @@ static void qede_init_fp(struct qede_dev *edev)
 		fp->edev = edev;
 		fp->id = queue_id;
 
-		memset((void *)&fp->napi, 0, sizeof(fp->napi));
-
-		memset((void *)fp->sb_info, 0, sizeof(*fp->sb_info));
+		if (fp->type & QEDE_FASTPATH_XDP) {
+			fp->xdp_tx->index = QEDE_TXQ_IDX_TO_XDP(edev,
+								rxq_index);
+			fp->xdp_tx->is_xdp = 1;
+		}
 
 		if (fp->type & QEDE_FASTPATH_RX) {
-			memset((void *)fp->rxq, 0, sizeof(*fp->rxq));
 			fp->rxq->rxq_id = rxq_index++;
+
+			/* Determine how to map buffers for this queue */
+			if (fp->type & QEDE_FASTPATH_XDP)
+				fp->rxq->data_direction = DMA_BIDIRECTIONAL;
+			else
+				fp->rxq->data_direction = DMA_FROM_DEVICE;
+			fp->rxq->dev = &edev->pdev->dev;
 		}
 
 		if (fp->type & QEDE_FASTPATH_TX) {
-			memset((void *)fp->txqs, 0,
-			       (edev->num_tc * sizeof(*fp->txqs)));
-			for (tc = 0; tc < edev->num_tc; tc++) {
-				fp->txqs[tc].index = txq_index +
-				    tc * QEDE_TSS_COUNT(edev);
-				if (edev->dev_info.is_legacy)
-					fp->txqs[tc].is_legacy = true;
-			}
-			txq_index++;
+			fp->txq->index = txq_index++;
+			if (edev->dev_info.is_legacy)
+				fp->txq->is_legacy = 1;
+			fp->txq->dev = &edev->pdev->dev;
 		}
 
 		snprintf(fp->name, sizeof(fp->name), "%s-fp-%d",
@@ -3238,11 +3556,18 @@ static int qede_drain_txq(struct qede_dev *edev,
 	return 0;
 }
 
+static int qede_stop_txq(struct qede_dev *edev,
+			 struct qede_tx_queue *txq, int rss_id)
+{
+	return edev->ops->q_tx_stop(edev->cdev, rss_id, txq->handle);
+}
+
 static int qede_stop_queues(struct qede_dev *edev)
 {
 	struct qed_update_vport_params vport_update_params;
 	struct qed_dev *cdev = edev->cdev;
-	int rc, tc, i;
+	struct qede_fastpath *fp;
+	int rc, i;
 
 	/* Disable the vport */
 	memset(&vport_update_params, 0, sizeof(vport_update_params));
@@ -3259,53 +3584,49 @@ static int qede_stop_queues(struct qede_dev *edev)
 
 	/* Flush Tx queues. If needed, request drain from MCP */
 	for_each_queue(i) {
-		struct qede_fastpath *fp = &edev->fp_array[i];
+		fp = &edev->fp_array[i];
 
 		if (fp->type & QEDE_FASTPATH_TX) {
-			for (tc = 0; tc < edev->num_tc; tc++) {
-				struct qede_tx_queue *txq = &fp->txqs[tc];
+			rc = qede_drain_txq(edev, fp->txq, true);
+			if (rc)
+				return rc;
+		}
 
-				rc = qede_drain_txq(edev, txq, true);
-				if (rc)
-					return rc;
-			}
+		if (fp->type & QEDE_FASTPATH_XDP) {
+			rc = qede_drain_txq(edev, fp->xdp_tx, true);
+			if (rc)
+				return rc;
 		}
 	}
 
 	/* Stop all Queues in reverse order */
 	for (i = QEDE_QUEUE_CNT(edev) - 1; i >= 0; i--) {
-		struct qed_stop_rxq_params rx_params;
+		fp = &edev->fp_array[i];
 
 		/* Stop the Tx Queue(s) */
-		if (edev->fp_array[i].type & QEDE_FASTPATH_TX) {
-			for (tc = 0; tc < edev->num_tc; tc++) {
-				struct qed_stop_txq_params tx_params;
-				u8 val;
-
-				tx_params.rss_id = i;
-				val = edev->fp_array[i].txqs[tc].index;
-				tx_params.tx_queue_id = val;
-				rc = edev->ops->q_tx_stop(cdev, &tx_params);
-				if (rc) {
-					DP_ERR(edev, "Failed to stop TXQ #%d\n",
-					       tx_params.tx_queue_id);
-					return rc;
-				}
-			}
+		if (fp->type & QEDE_FASTPATH_TX) {
+			rc = qede_stop_txq(edev, fp->txq, i);
+			if (rc)
+				return rc;
 		}
 
 		/* Stop the Rx Queue */
-		if (edev->fp_array[i].type & QEDE_FASTPATH_RX) {
-			memset(&rx_params, 0, sizeof(rx_params));
-			rx_params.rss_id = i;
-			rx_params.rx_queue_id = edev->fp_array[i].rxq->rxq_id;
-
-			rc = edev->ops->q_rx_stop(cdev, &rx_params);
+		if (fp->type & QEDE_FASTPATH_RX) {
+			rc = edev->ops->q_rx_stop(cdev, i, fp->rxq->handle);
 			if (rc) {
 				DP_ERR(edev, "Failed to stop RXQ #%d\n", i);
 				return rc;
 			}
 		}
+
+		/* Stop the XDP forwarding queue */
+		if (fp->type & QEDE_FASTPATH_XDP) {
+			rc = qede_stop_txq(edev, fp->xdp_tx, i);
+			if (rc)
+				return rc;
+
+			bpf_prog_put(fp->rxq->xdp_prog);
+		}
 	}
 
 	/* Stop the vport */
@@ -3316,9 +3637,55 @@ static int qede_stop_queues(struct qede_dev *edev)
 	return rc;
 }
 
+static int qede_start_txq(struct qede_dev *edev,
+			  struct qede_fastpath *fp,
+			  struct qede_tx_queue *txq, u8 rss_id, u16 sb_idx)
+{
+	dma_addr_t phys_table = qed_chain_get_pbl_phys(&txq->tx_pbl);
+	u32 page_cnt = qed_chain_get_page_cnt(&txq->tx_pbl);
+	struct qed_queue_start_common_params params;
+	struct qed_txq_start_ret_params ret_params;
+	int rc;
+
+	memset(&params, 0, sizeof(params));
+	memset(&ret_params, 0, sizeof(ret_params));
+
+	/* Let the XDP queue share the queue-zone with one of the regular txq.
+	 * We don't really care about its coalescing.
+	 */
+	if (txq->is_xdp)
+		params.queue_id = QEDE_TXQ_XDP_TO_IDX(edev, txq);
+	else
+		params.queue_id = txq->index;
+
+	params.sb = fp->sb_info->igu_sb_id;
+	params.sb_idx = sb_idx;
+
+	rc = edev->ops->q_tx_start(edev->cdev, rss_id, &params, phys_table,
+				   page_cnt, &ret_params);
+	if (rc) {
+		DP_ERR(edev, "Start TXQ #%d failed %d\n", txq->index, rc);
+		return rc;
+	}
+
+	txq->doorbell_addr = ret_params.p_doorbell;
+	txq->handle = ret_params.p_handle;
+
+	/* Determine the FW consumer address associated */
+	txq->hw_cons_ptr = &fp->sb_info->sb_virt->pi_array[sb_idx];
+
+	/* Prepare the doorbell parameters */
+	SET_FIELD(txq->tx_db.data.params, ETH_DB_DATA_DEST, DB_DEST_XCM);
+	SET_FIELD(txq->tx_db.data.params, ETH_DB_DATA_AGG_CMD, DB_AGG_CMD_SET);
+	SET_FIELD(txq->tx_db.data.params, ETH_DB_DATA_AGG_VAL_SEL,
+		  DQ_XCM_ETH_TX_BD_PROD_CMD);
+	txq->tx_db.data.agg_flags = DQ_XCM_ETH_DQ_CF_CMD;
+
+	return rc;
+}
+
 static int qede_start_queues(struct qede_dev *edev, bool clear_stats)
 {
-	int rc, tc, i;
 	int vlan_removal_en = 1;
 	struct qed_dev *cdev = edev->cdev;
 	struct qed_update_vport_params vport_update_params;
@@ -3326,6 +3693,7 @@ static int qede_start_queues(struct qede_dev *edev, bool clear_stats)
 	struct qed_dev_info *qed_info = &edev->dev_info.common;
 	struct qed_start_vport_params start = {0};
 	bool reset_rss_indir = false;
+	int rc, i;
 
 	if (!edev->num_queues) {
 		DP_ERR(edev,
@@ -3357,11 +3725,12 @@ static int qede_start_queues(struct qede_dev *edev, bool clear_stats)
 		u32 page_cnt;
 
 		if (fp->type & QEDE_FASTPATH_RX) {
+			struct qed_rxq_start_ret_params ret_params;
 			struct qede_rx_queue *rxq = fp->rxq;
 			__le16 *val;
 
+			memset(&ret_params, 0, sizeof(ret_params));
 			memset(&q_params, 0, sizeof(q_params));
-			q_params.rss_id = i;
 			q_params.queue_id = rxq->rxq_id;
 			q_params.vport_id = 0;
 			q_params.sb = fp->sb_info->igu_sb_id;
@@ -3371,60 +3740,44 @@ static int qede_start_queues(struct qede_dev *edev, bool clear_stats)
 			    qed_chain_get_pbl_phys(&rxq->rx_comp_ring);
 			page_cnt = qed_chain_get_page_cnt(&rxq->rx_comp_ring);
 
-			rc = edev->ops->q_rx_start(cdev, &q_params,
+			rc = edev->ops->q_rx_start(cdev, i, &q_params,
 						   rxq->rx_buf_size,
 						   rxq->rx_bd_ring.p_phys_addr,
 						   p_phys_table,
-						   page_cnt,
-						   &rxq->hw_rxq_prod_addr);
+						   page_cnt, &ret_params);
 			if (rc) {
 				DP_ERR(edev, "Start RXQ #%d failed %d\n", i,
 				       rc);
 				return rc;
 			}
 
+			/* Use the return parameters */
+			rxq->hw_rxq_prod_addr = ret_params.p_prod;
+			rxq->handle = ret_params.p_handle;
+
 			val = &fp->sb_info->sb_virt->pi_array[RX_PI];
 			rxq->hw_cons_ptr = val;
 
 			qede_update_rx_prod(edev, rxq);
 		}
 
-		if (!(fp->type & QEDE_FASTPATH_TX))
-			continue;
-
-		for (tc = 0; tc < edev->num_tc; tc++) {
-			struct qede_tx_queue *txq = &fp->txqs[tc];
-
-			p_phys_table = qed_chain_get_pbl_phys(&txq->tx_pbl);
-			page_cnt = qed_chain_get_page_cnt(&txq->tx_pbl);
-
-			memset(&q_params, 0, sizeof(q_params));
-			q_params.rss_id = i;
-			q_params.queue_id = txq->index;
-			q_params.vport_id = 0;
-			q_params.sb = fp->sb_info->igu_sb_id;
-			q_params.sb_idx = TX_PI(tc);
+		if (fp->type & QEDE_FASTPATH_XDP) {
+			rc = qede_start_txq(edev, fp, fp->xdp_tx, i, XDP_PI);
+			if (rc)
+				return rc;
 
-			rc = edev->ops->q_tx_start(cdev, &q_params,
-						   p_phys_table, page_cnt,
-						   &txq->doorbell_addr);
-			if (rc) {
-				DP_ERR(edev, "Start TXQ #%d failed %d\n",
-				       txq->index, rc);
+			fp->rxq->xdp_prog = bpf_prog_add(edev->xdp_prog, 1);
+			if (IS_ERR(fp->rxq->xdp_prog)) {
+				rc = PTR_ERR(fp->rxq->xdp_prog);
+				fp->rxq->xdp_prog = NULL;
 				return rc;
 			}
+		}
 
-			txq->hw_cons_ptr =
-				&fp->sb_info->sb_virt->pi_array[TX_PI(tc)];
-			SET_FIELD(txq->tx_db.data.params,
-				  ETH_DB_DATA_DEST, DB_DEST_XCM);
-			SET_FIELD(txq->tx_db.data.params, ETH_DB_DATA_AGG_CMD,
-				  DB_AGG_CMD_SET);
-			SET_FIELD(txq->tx_db.data.params,
-				  ETH_DB_DATA_AGG_VAL_SEL,
-				  DQ_XCM_ETH_TX_BD_PROD_CMD);
-
-			txq->tx_db.data.agg_flags = DQ_XCM_ETH_DQ_CF_CMD;
+		if (fp->type & QEDE_FASTPATH_TX) {
+			rc = qede_start_txq(edev, fp, fp->txq, i, TX_PI(0));
+			if (rc)
+				return rc;
 		}
 	}
 
@@ -3519,15 +3872,18 @@ enum qede_unload_mode {
 	QEDE_UNLOAD_NORMAL,
 };
 
-static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode)
+static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode,
+			bool is_locked)
 {
 	struct qed_link_params link_params;
 	int rc;
 
 	DP_INFO(edev, "Starting qede unload\n");
 
+	if (!is_locked)
+		__qede_lock(edev);
+
 	qede_roce_dev_event_close(edev);
-	mutex_lock(&edev->qede_lock);
 	edev->state = QEDE_STATE_CLOSED;
 
 	/* Close OS Tx */
@@ -3559,7 +3915,8 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode)
 	qede_free_fp_array(edev);
 
 out:
-	mutex_unlock(&edev->qede_lock);
+	if (!is_locked)
+		__qede_unlock(edev);
 	DP_INFO(edev, "Ending qede unload\n");
 }
 
@@ -3568,7 +3925,8 @@ enum qede_load_mode {
 	QEDE_LOAD_RELOAD,
 };
 
-static int qede_load(struct qede_dev *edev, enum qede_load_mode mode)
+static int qede_load(struct qede_dev *edev, enum qede_load_mode mode,
+		     bool is_locked)
 {
 	struct qed_link_params link_params;
 	struct qed_link_output link_output;
@@ -3576,21 +3934,24 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode)
 
 	DP_INFO(edev, "Starting qede load\n");
 
+	if (!is_locked)
+		__qede_lock(edev);
+
 	rc = qede_set_num_queues(edev);
 	if (rc)
-		goto err0;
+		goto out;
 
 	rc = qede_alloc_fp_array(edev);
 	if (rc)
-		goto err0;
+		goto out;
 
 	qede_init_fp(edev);
 
 	rc = qede_alloc_mem_load(edev);
 	if (rc)
 		goto err1;
-	DP_INFO(edev, "Allocated %d RSS queues on %d TC/s\n",
-		QEDE_QUEUE_CNT(edev), edev->num_tc);
+	DP_INFO(edev, "Allocated %d Rx, %d Tx queues\n",
+		QEDE_RSS_COUNT(edev), QEDE_TSS_COUNT(edev));
 
 	rc = qede_set_real_num_queues(edev);
 	if (rc)
@@ -3612,10 +3973,6 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode)
 	/* Add primary mac and set Rx filters */
 	ether_addr_copy(edev->primary_mac, edev->ndev->dev_addr);
 
-	mutex_lock(&edev->qede_lock);
-	edev->state = QEDE_STATE_OPEN;
-	mutex_unlock(&edev->qede_lock);
-
 	/* Program un-configured VLANs */
 	qede_configure_vlan_filters(edev);
 
@@ -3630,10 +3987,12 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode)
 	qede_roce_dev_event_open(edev);
 	qede_link_update(edev, &link_output);
 
+	edev->state = QEDE_STATE_OPEN;
+
 	DP_INFO(edev, "Ending successfully qede load\n");
 
-	return 0;
 
+	goto out;
 err4:
 	qede_sync_free_irqs(edev);
 	memset(&edev->int_info.msix_cnt, 0, sizeof(struct qed_int_info));
@@ -3647,26 +4006,40 @@ err1:
 	edev->num_queues = 0;
 	edev->fp_num_tx = 0;
 	edev->fp_num_rx = 0;
-err0:
+out:
+	if (!is_locked)
+		__qede_unlock(edev);
+
 	return rc;
 }
 
+/* 'func' should be able to run between unload and reload assuming interface
+ * is actually running, or afterwards in case it's currently DOWN.
+ */
 void qede_reload(struct qede_dev *edev,
-		 void (*func)(struct qede_dev *, union qede_reload_args *),
-		 union qede_reload_args *args)
+		 struct qede_reload_args *args, bool is_locked)
 {
-	qede_unload(edev, QEDE_UNLOAD_NORMAL);
-	/* Call function handler to update parameters
-	 * needed for function load.
-	 */
-	if (func)
-		func(edev, args);
+	if (!is_locked)
+		__qede_lock(edev);
 
-	qede_load(edev, QEDE_LOAD_RELOAD);
+	/* Since qede_lock is held, internal state wouldn't change even
+	 * if netdev state would start transitioning. Check whether current
+	 * internal configuration indicates device is up, then reload.
+	 */
+	if (edev->state == QEDE_STATE_OPEN) {
+		qede_unload(edev, QEDE_UNLOAD_NORMAL, true);
+		if (args)
+			args->func(edev, args);
+		qede_load(edev, QEDE_LOAD_RELOAD, true);
+
+		/* Since no one is going to do it for us, re-configure */
+		qede_config_rx_mode(edev->ndev);
+	} else if (args) {
+		args->func(edev, args);
+	}
 
-	mutex_lock(&edev->qede_lock);
-	qede_config_rx_mode(edev->ndev);
-	mutex_unlock(&edev->qede_lock);
+	if (!is_locked)
+		__qede_unlock(edev);
 }
 
 /* called with rtnl_lock */
@@ -3679,13 +4052,14 @@ static int qede_open(struct net_device *ndev)
 
 	edev->ops->common->set_power_state(edev->cdev, PCI_D0);
 
-	rc = qede_load(edev, QEDE_LOAD_NORMAL);
-
+	rc = qede_load(edev, QEDE_LOAD_NORMAL, false);
 	if (rc)
 		return rc;
 
 	udp_tunnel_get_rx_info(ndev);
 
+	edev->ops->common->update_drv_state(edev->cdev, true);
+
 	return 0;
 }
 
@@ -3693,7 +4067,9 @@ static int qede_close(struct net_device *ndev)
 {
 	struct qede_dev *edev = netdev_priv(ndev);
 
-	qede_unload(edev, QEDE_UNLOAD_NORMAL);
+	qede_unload(edev, QEDE_UNLOAD_NORMAL, false);
+
+	edev->ops->common->update_drv_state(edev->cdev, false);
 
 	return 0;
 }
@@ -3755,6 +4131,8 @@ static int qede_set_mac_addr(struct net_device *ndev, void *p)
 	if (rc)
 		return rc;
 
+	edev->ops->common->update_mac(edev->cdev, addr->sa_data);
+
 	/* Add MAC filter according to the new unicast HW MAC address */
 	ether_addr_copy(edev->primary_mac, ndev->dev_addr);
 	return qede_set_ucast_rx_mac(edev, QED_FILTER_XCAST_TYPE_ADD,
@@ -3821,15 +4199,8 @@ static void qede_set_rx_mode(struct net_device *ndev)
 {
 	struct qede_dev *edev = netdev_priv(ndev);
 
-	DP_INFO(edev, "qede_set_rx_mode called\n");
-
-	if (edev->state != QEDE_STATE_OPEN) {
-		DP_INFO(edev,
-			"qede_set_rx_mode called while interface is down\n");
-	} else {
-		set_bit(QEDE_SP_RX_MODE, &edev->sp_flags);
-		schedule_delayed_work(&edev->sp_task, 0);
-	}
+	set_bit(QEDE_SP_RX_MODE, &edev->sp_flags);
+	schedule_delayed_work(&edev->sp_task, 0);
 }
 
 /* Must be called with qede_lock held */
@@ -3877,7 +4248,7 @@ static void qede_config_rx_mode(struct net_device *ndev)
 
 	/* Check for promiscuous */
 	if ((ndev->flags & IFF_PROMISC) ||
-	    (uc_count > 15)) { /* @@@TBD resource allocation - 1 */
+	    (uc_count > edev->dev_info.num_mac_filters - 1)) {
 		accept_flags = QED_FILTER_RX_MODE_TYPE_PROMISC;
 	} else {
 		/* Add MAC filters according to the unicast secondary macs */
diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c
index b09a6b80d107..5c100ab86c00 100644
--- a/drivers/net/ethernet/qlogic/qla3xxx.c
+++ b/drivers/net/ethernet/qlogic/qla3xxx.c
@@ -3755,7 +3755,6 @@ static const struct net_device_ops ql3xxx_netdev_ops = {
 	.ndo_open		= ql3xxx_open,
 	.ndo_start_xmit		= ql3xxx_send,
 	.ndo_stop		= ql3xxx_close,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= ql3xxx_set_mac_address,
 	.ndo_tx_timeout		= ql3xxx_tx_timeout,
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
index 509b596cf1e8..838cc0ceafd8 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_hw.c
@@ -1024,12 +1024,6 @@ int qlcnic_change_mtu(struct net_device *netdev, int mtu)
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
 	int rc = 0;
 
-	if (mtu < P3P_MIN_MTU || mtu > P3P_MAX_MTU) {
-		dev_err(&adapter->netdev->dev, "%d bytes < mtu < %d bytes"
-			" not supported\n", P3P_MAX_MTU, P3P_MIN_MTU);
-		return -EINVAL;
-	}
-
 	rc = qlcnic_fw_cmd_set_mtu(adapter, mtu);
 
 	if (!rc)
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 3ae3968b0edf..4c0cce962585 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -2342,6 +2342,10 @@ qlcnic_setup_netdev(struct qlcnic_adapter *adapter, struct net_device *netdev,
 	netdev->priv_flags |= IFF_UNICAST_FLT;
 	netdev->irq = adapter->msix_entries[0].vector;
 
+	/* MTU range: 68 - 9600 */
+	netdev->min_mtu = P3P_MIN_MTU;
+	netdev->max_mtu = P3P_MAX_MTU;
+
 	err = qlcnic_set_real_num_queues(adapter, adapter->drv_tx_rings,
 					 adapter->drv_sds_rings);
 	if (err)
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
index fd4a8e473f11..1409412ab39d 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
@@ -4788,6 +4788,13 @@ static int qlge_probe(struct pci_dev *pdev,
 	ndev->ethtool_ops = &qlge_ethtool_ops;
 	ndev->watchdog_timeo = 10 * HZ;
 
+	/* MTU range: this driver only supports 1500 or 9000, so this only
+	 * filters out values above or below, and we'll rely on
+	 * qlge_change_mtu to make sure only 1500 or 9000 are allowed
+	 */
+	ndev->min_mtu = ETH_DATA_LEN;
+	ndev->max_mtu = 9000;
+
 	err = register_netdev(ndev);
 	if (err) {
 		dev_err(&pdev->dev, "net device registration failed.\n");
diff --git a/drivers/net/ethernet/qualcomm/emac/Makefile b/drivers/net/ethernet/qualcomm/emac/Makefile
index 01ee144c6386..7a6687982dae 100644
--- a/drivers/net/ethernet/qualcomm/emac/Makefile
+++ b/drivers/net/ethernet/qualcomm/emac/Makefile
@@ -4,4 +4,6 @@
 
 obj-$(CONFIG_QCOM_EMAC) += qcom-emac.o
 
-qcom-emac-objs := emac.o emac-mac.o emac-phy.o emac-sgmii.o
+qcom-emac-objs := emac.o emac-mac.o emac-phy.o emac-sgmii.o \
+		  emac-sgmii-fsm9900.o emac-sgmii-qdf2432.o \
+		  emac-sgmii-qdf2400.o
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-phy.c b/drivers/net/ethernet/qualcomm/emac/emac-phy.c
index da4e90db4d98..99a14df28b96 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-phy.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-phy.c
@@ -212,6 +212,7 @@ int emac_phy_config(struct platform_device *pdev, struct emac_adapter *adpt)
 
 		phy_np = of_parse_phandle(np, "phy-handle", 0);
 		adpt->phydev = of_phy_find_device(phy_np);
+		of_node_put(phy_np);
 	}
 
 	if (!adpt->phydev) {
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii-fsm9900.c b/drivers/net/ethernet/qualcomm/emac/emac-sgmii-fsm9900.c
new file mode 100644
index 000000000000..af690e1a6e7b
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii-fsm9900.c
@@ -0,0 +1,245 @@
+/* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/* Qualcomm Technologies, Inc. FSM9900 EMAC SGMII Controller driver.
+ */
+
+#include <linux/iopoll.h>
+#include "emac.h"
+
+/* EMAC_QSERDES register offsets */
+#define EMAC_QSERDES_COM_SYS_CLK_CTRL		0x0000
+#define EMAC_QSERDES_COM_PLL_CNTRL		0x0014
+#define EMAC_QSERDES_COM_PLL_IP_SETI		0x0018
+#define EMAC_QSERDES_COM_PLL_CP_SETI		0x0024
+#define EMAC_QSERDES_COM_PLL_IP_SETP		0x0028
+#define EMAC_QSERDES_COM_PLL_CP_SETP		0x002c
+#define EMAC_QSERDES_COM_SYSCLK_EN_SEL		0x0038
+#define EMAC_QSERDES_COM_RESETSM_CNTRL		0x0040
+#define EMAC_QSERDES_COM_PLLLOCK_CMP1		0x0044
+#define EMAC_QSERDES_COM_PLLLOCK_CMP2		0x0048
+#define EMAC_QSERDES_COM_PLLLOCK_CMP3		0x004c
+#define EMAC_QSERDES_COM_PLLLOCK_CMP_EN		0x0050
+#define EMAC_QSERDES_COM_DEC_START1		0x0064
+#define EMAC_QSERDES_COM_DIV_FRAC_START1	0x0098
+#define EMAC_QSERDES_COM_DIV_FRAC_START2	0x009c
+#define EMAC_QSERDES_COM_DIV_FRAC_START3	0x00a0
+#define EMAC_QSERDES_COM_DEC_START2		0x00a4
+#define EMAC_QSERDES_COM_PLL_CRCTRL		0x00ac
+#define EMAC_QSERDES_COM_RESET_SM		0x00bc
+#define EMAC_QSERDES_TX_BIST_MODE_LANENO	0x0100
+#define EMAC_QSERDES_TX_TX_EMP_POST1_LVL	0x0108
+#define EMAC_QSERDES_TX_TX_DRV_LVL		0x010c
+#define EMAC_QSERDES_TX_LANE_MODE		0x0150
+#define EMAC_QSERDES_TX_TRAN_DRVR_EMP_EN	0x0170
+#define EMAC_QSERDES_RX_CDR_CONTROL		0x0200
+#define EMAC_QSERDES_RX_CDR_CONTROL2		0x0210
+#define EMAC_QSERDES_RX_RX_EQ_GAIN12		0x0230
+
+/* EMAC_SGMII register offsets */
+#define EMAC_SGMII_PHY_SERDES_START		0x0000
+#define EMAC_SGMII_PHY_CMN_PWR_CTRL		0x0004
+#define EMAC_SGMII_PHY_RX_PWR_CTRL		0x0008
+#define EMAC_SGMII_PHY_TX_PWR_CTRL		0x000C
+#define EMAC_SGMII_PHY_LANE_CTRL1		0x0018
+#define EMAC_SGMII_PHY_CDR_CTRL0		0x0058
+#define EMAC_SGMII_PHY_POW_DWN_CTRL0		0x0080
+#define EMAC_SGMII_PHY_INTERRUPT_MASK		0x00b4
+
+#define PLL_IPSETI(x)				((x) & 0x3f)
+
+#define PLL_CPSETI(x)				((x) & 0xff)
+
+#define PLL_IPSETP(x)				((x) & 0x3f)
+
+#define PLL_CPSETP(x)				((x) & 0x1f)
+
+#define PLL_RCTRL(x)				(((x) & 0xf) << 4)
+#define PLL_CCTRL(x)				((x) & 0xf)
+
+#define LANE_MODE(x)				((x) & 0x1f)
+
+#define SYSCLK_CM				BIT(4)
+#define SYSCLK_AC_COUPLE			BIT(3)
+
+#define OCP_EN					BIT(5)
+#define PLL_DIV_FFEN				BIT(2)
+#define PLL_DIV_ORD				BIT(1)
+
+#define SYSCLK_SEL_CMOS				BIT(3)
+
+#define FRQ_TUNE_MODE				BIT(4)
+
+#define PLLLOCK_CMP_EN				BIT(0)
+
+#define DEC_START1_MUX				BIT(7)
+#define DEC_START1(x)				((x) & 0x7f)
+
+#define DIV_FRAC_START_MUX			BIT(7)
+#define DIV_FRAC_START(x)			((x) & 0x7f)
+
+#define DIV_FRAC_START3_MUX			BIT(4)
+#define DIV_FRAC_START3(x)			((x) & 0xf)
+
+#define DEC_START2_MUX				BIT(1)
+#define DEC_START2				BIT(0)
+
+#define READY					BIT(5)
+
+#define TX_EMP_POST1_LVL_MUX			BIT(5)
+#define TX_EMP_POST1_LVL(x)			((x) & 0x1f)
+
+#define TX_DRV_LVL_MUX				BIT(4)
+#define TX_DRV_LVL(x)				((x) & 0xf)
+
+#define EMP_EN_MUX				BIT(1)
+#define EMP_EN					BIT(0)
+
+#define SECONDORDERENABLE			BIT(6)
+#define FIRSTORDER_THRESH(x)			(((x) & 0x7) << 3)
+#define SECONDORDERGAIN(x)			((x) & 0x7)
+
+#define RX_EQ_GAIN2(x)				(((x) & 0xf) << 4)
+#define RX_EQ_GAIN1(x)				((x) & 0xf)
+
+#define SERDES_START				BIT(0)
+
+#define BIAS_EN					BIT(6)
+#define PLL_EN					BIT(5)
+#define SYSCLK_EN				BIT(4)
+#define CLKBUF_L_EN				BIT(3)
+#define PLL_TXCLK_EN				BIT(1)
+#define PLL_RXCLK_EN				BIT(0)
+
+#define L0_RX_SIGDET_EN				BIT(7)
+#define L0_RX_TERM_MODE(x)			(((x) & 3) << 4)
+#define L0_RX_I_EN				BIT(1)
+
+#define L0_TX_EN				BIT(5)
+#define L0_CLKBUF_EN				BIT(4)
+#define L0_TRAN_BIAS_EN				BIT(1)
+
+#define L0_RX_EQUALIZE_ENABLE			BIT(6)
+#define L0_RESET_TSYNC_EN			BIT(4)
+#define L0_DRV_LVL(x)				((x) & 0xf)
+
+#define PWRDN_B					BIT(0)
+#define CDR_MAX_CNT(x)				((x) & 0xff)
+
+#define PLLLOCK_CMP(x)				((x) & 0xff)
+
+#define SERDES_START_WAIT_TIMES			100
+
+struct emac_reg_write {
+	unsigned int offset;
+	u32 val;
+};
+
+static void emac_reg_write_all(void __iomem *base,
+			       const struct emac_reg_write *itr, size_t size)
+{
+	size_t i;
+
+	for (i = 0; i < size; ++itr, ++i)
+		writel(itr->val, base + itr->offset);
+}
+
+static const struct emac_reg_write physical_coding_sublayer_programming[] = {
+	{EMAC_SGMII_PHY_CDR_CTRL0, CDR_MAX_CNT(15)},
+	{EMAC_SGMII_PHY_POW_DWN_CTRL0, PWRDN_B},
+	{EMAC_SGMII_PHY_CMN_PWR_CTRL,
+		BIAS_EN | SYSCLK_EN | CLKBUF_L_EN | PLL_TXCLK_EN | PLL_RXCLK_EN},
+	{EMAC_SGMII_PHY_TX_PWR_CTRL, L0_TX_EN | L0_CLKBUF_EN | L0_TRAN_BIAS_EN},
+	{EMAC_SGMII_PHY_RX_PWR_CTRL,
+		L0_RX_SIGDET_EN | L0_RX_TERM_MODE(1) | L0_RX_I_EN},
+	{EMAC_SGMII_PHY_CMN_PWR_CTRL,
+		BIAS_EN | PLL_EN | SYSCLK_EN | CLKBUF_L_EN | PLL_TXCLK_EN |
+		PLL_RXCLK_EN},
+	{EMAC_SGMII_PHY_LANE_CTRL1,
+		L0_RX_EQUALIZE_ENABLE | L0_RESET_TSYNC_EN | L0_DRV_LVL(15)},
+};
+
+static const struct emac_reg_write sysclk_refclk_setting[] = {
+	{EMAC_QSERDES_COM_SYSCLK_EN_SEL, SYSCLK_SEL_CMOS},
+	{EMAC_QSERDES_COM_SYS_CLK_CTRL,	SYSCLK_CM | SYSCLK_AC_COUPLE},
+};
+
+static const struct emac_reg_write pll_setting[] = {
+	{EMAC_QSERDES_COM_PLL_IP_SETI, PLL_IPSETI(1)},
+	{EMAC_QSERDES_COM_PLL_CP_SETI, PLL_CPSETI(59)},
+	{EMAC_QSERDES_COM_PLL_IP_SETP, PLL_IPSETP(10)},
+	{EMAC_QSERDES_COM_PLL_CP_SETP, PLL_CPSETP(9)},
+	{EMAC_QSERDES_COM_PLL_CRCTRL, PLL_RCTRL(15) | PLL_CCTRL(11)},
+	{EMAC_QSERDES_COM_PLL_CNTRL, OCP_EN | PLL_DIV_FFEN | PLL_DIV_ORD},
+	{EMAC_QSERDES_COM_DEC_START1, DEC_START1_MUX | DEC_START1(2)},
+	{EMAC_QSERDES_COM_DEC_START2, DEC_START2_MUX | DEC_START2},
+	{EMAC_QSERDES_COM_DIV_FRAC_START1,
+		DIV_FRAC_START_MUX | DIV_FRAC_START(85)},
+	{EMAC_QSERDES_COM_DIV_FRAC_START2,
+		DIV_FRAC_START_MUX | DIV_FRAC_START(42)},
+	{EMAC_QSERDES_COM_DIV_FRAC_START3,
+		DIV_FRAC_START3_MUX | DIV_FRAC_START3(3)},
+	{EMAC_QSERDES_COM_PLLLOCK_CMP1, PLLLOCK_CMP(43)},
+	{EMAC_QSERDES_COM_PLLLOCK_CMP2, PLLLOCK_CMP(104)},
+	{EMAC_QSERDES_COM_PLLLOCK_CMP3, PLLLOCK_CMP(0)},
+	{EMAC_QSERDES_COM_PLLLOCK_CMP_EN, PLLLOCK_CMP_EN},
+	{EMAC_QSERDES_COM_RESETSM_CNTRL, FRQ_TUNE_MODE},
+};
+
+static const struct emac_reg_write cdr_setting[] = {
+	{EMAC_QSERDES_RX_CDR_CONTROL,
+		SECONDORDERENABLE | FIRSTORDER_THRESH(3) | SECONDORDERGAIN(2)},
+	{EMAC_QSERDES_RX_CDR_CONTROL2,
+		SECONDORDERENABLE | FIRSTORDER_THRESH(3) | SECONDORDERGAIN(4)},
+};
+
+static const struct emac_reg_write tx_rx_setting[] = {
+	{EMAC_QSERDES_TX_BIST_MODE_LANENO, 0},
+	{EMAC_QSERDES_TX_TX_DRV_LVL, TX_DRV_LVL_MUX | TX_DRV_LVL(15)},
+	{EMAC_QSERDES_TX_TRAN_DRVR_EMP_EN, EMP_EN_MUX | EMP_EN},
+	{EMAC_QSERDES_TX_TX_EMP_POST1_LVL,
+		TX_EMP_POST1_LVL_MUX | TX_EMP_POST1_LVL(1)},
+	{EMAC_QSERDES_RX_RX_EQ_GAIN12, RX_EQ_GAIN2(15) | RX_EQ_GAIN1(15)},
+	{EMAC_QSERDES_TX_LANE_MODE, LANE_MODE(8)},
+};
+
+int emac_sgmii_init_fsm9900(struct emac_adapter *adpt)
+{
+	struct emac_phy *phy = &adpt->phy;
+	unsigned int i;
+
+	emac_reg_write_all(phy->base, physical_coding_sublayer_programming,
+			   ARRAY_SIZE(physical_coding_sublayer_programming));
+	emac_reg_write_all(phy->base, sysclk_refclk_setting,
+			   ARRAY_SIZE(sysclk_refclk_setting));
+	emac_reg_write_all(phy->base, pll_setting, ARRAY_SIZE(pll_setting));
+	emac_reg_write_all(phy->base, cdr_setting, ARRAY_SIZE(cdr_setting));
+	emac_reg_write_all(phy->base, tx_rx_setting, ARRAY_SIZE(tx_rx_setting));
+
+	/* Power up the Ser/Des engine */
+	writel(SERDES_START, phy->base + EMAC_SGMII_PHY_SERDES_START);
+
+	for (i = 0; i < SERDES_START_WAIT_TIMES; i++) {
+		if (readl(phy->base + EMAC_QSERDES_COM_RESET_SM) & READY)
+			break;
+		usleep_range(100, 200);
+	}
+
+	if (i == SERDES_START_WAIT_TIMES) {
+		netdev_err(adpt->netdev, "error: ser/des failed to start\n");
+		return -EIO;
+	}
+	/* Mask out all the SGMII Interrupt */
+	writel(0, phy->base + EMAC_SGMII_PHY_INTERRUPT_MASK);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii-qdf2400.c b/drivers/net/ethernet/qualcomm/emac/emac-sgmii-qdf2400.c
new file mode 100644
index 000000000000..5b8419498ef1
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii-qdf2400.c
@@ -0,0 +1,217 @@
+/* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/* Qualcomm Technologies, Inc. QDF2400 EMAC SGMII Controller driver.
+ */
+
+#include <linux/iopoll.h>
+#include "emac.h"
+
+/* EMAC_SGMII register offsets */
+#define EMAC_SGMII_PHY_TX_PWR_CTRL		0x000C
+#define EMAC_SGMII_PHY_LANE_CTRL1		0x0018
+#define EMAC_SGMII_PHY_CDR_CTRL0		0x0058
+#define EMAC_SGMII_PHY_POW_DWN_CTRL0		0x0080
+#define EMAC_SGMII_PHY_RESET_CTRL		0x00a8
+#define EMAC_SGMII_PHY_INTERRUPT_MASK		0x00b4
+
+/* SGMII digital lane registers */
+#define EMAC_SGMII_LN_DRVR_CTRL0		0x000C
+#define EMAC_SGMII_LN_DRVR_TAP_EN		0x0018
+#define EMAC_SGMII_LN_TX_MARGINING		0x001C
+#define EMAC_SGMII_LN_TX_PRE			0x0020
+#define EMAC_SGMII_LN_TX_POST			0x0024
+#define EMAC_SGMII_LN_TX_BAND_MODE		0x0060
+#define EMAC_SGMII_LN_LANE_MODE			0x0064
+#define EMAC_SGMII_LN_PARALLEL_RATE		0x007C
+#define EMAC_SGMII_LN_CML_CTRL_MODE0		0x00C0
+#define EMAC_SGMII_LN_MIXER_CTRL_MODE0		0x00D8
+#define EMAC_SGMII_LN_VGA_INITVAL		0x013C
+#define EMAC_SGMII_LN_UCDR_FO_GAIN_MODE0	0x0184
+#define EMAC_SGMII_LN_UCDR_SO_GAIN_MODE0	0x0190
+#define EMAC_SGMII_LN_UCDR_SO_CONFIG		0x019C
+#define EMAC_SGMII_LN_RX_BAND			0x01A4
+#define EMAC_SGMII_LN_RX_RCVR_PATH1_MODE0	0x01C0
+#define EMAC_SGMII_LN_RSM_CONFIG		0x01F8
+#define EMAC_SGMII_LN_SIGDET_ENABLES		0x0230
+#define EMAC_SGMII_LN_SIGDET_CNTRL		0x0234
+#define EMAC_SGMII_LN_SIGDET_DEGLITCH_CNTRL	0x0238
+#define EMAC_SGMII_LN_RX_EN_SIGNAL		0x02AC
+#define EMAC_SGMII_LN_RX_MISC_CNTRL0		0x02B8
+#define EMAC_SGMII_LN_DRVR_LOGIC_CLKDIV		0x02C8
+
+/* SGMII digital lane register values */
+#define UCDR_STEP_BY_TWO_MODE0			BIT(7)
+#define UCDR_xO_GAIN_MODE(x)			((x) & 0x7f)
+#define UCDR_ENABLE				BIT(6)
+#define UCDR_SO_SATURATION(x)			((x) & 0x3f)
+
+#define SIGDET_LP_BYP_PS4			BIT(7)
+#define SIGDET_EN_PS0_TO_PS2			BIT(6)
+
+#define TXVAL_VALID_INIT			BIT(4)
+#define KR_PCIGEN3_MODE				BIT(0)
+
+#define MAIN_EN					BIT(0)
+
+#define TX_MARGINING_MUX			BIT(6)
+#define TX_MARGINING(x)				((x) & 0x3f)
+
+#define TX_PRE_MUX				BIT(6)
+
+#define TX_POST_MUX				BIT(6)
+
+#define CML_GEAR_MODE(x)			(((x) & 7) << 3)
+#define CML2CMOS_IBOOST_MODE(x)			((x) & 7)
+
+#define MIXER_LOADB_MODE(x)			(((x) & 0xf) << 2)
+#define MIXER_DATARATE_MODE(x)			((x) & 3)
+
+#define VGA_THRESH_DFE(x)			((x) & 0x3f)
+
+#define SIGDET_LP_BYP_PS0_TO_PS2		BIT(5)
+#define SIGDET_FLT_BYP				BIT(0)
+
+#define SIGDET_LVL(x)				(((x) & 0xf) << 4)
+
+#define SIGDET_DEGLITCH_CTRL(x)			(((x) & 0xf) << 1)
+
+#define INVERT_PCS_RX_CLK			BIT(7)
+
+#define DRVR_LOGIC_CLK_EN			BIT(4)
+#define DRVR_LOGIC_CLK_DIV(x)			((x) & 0xf)
+
+#define PARALLEL_RATE_MODE0(x)			((x) & 0x3)
+
+#define BAND_MODE0(x)				((x) & 0x3)
+
+#define LANE_MODE(x)				((x) & 0x1f)
+
+#define CDR_PD_SEL_MODE0(x)			(((x) & 0x3) << 5)
+#define EN_DLL_MODE0				BIT(4)
+#define EN_IQ_DCC_MODE0				BIT(3)
+#define EN_IQCAL_MODE0				BIT(2)
+
+#define BYPASS_RSM_SAMP_CAL			BIT(1)
+#define BYPASS_RSM_DLL_CAL			BIT(0)
+
+#define L0_RX_EQUALIZE_ENABLE			BIT(6)
+
+#define PWRDN_B					BIT(0)
+
+#define CDR_MAX_CNT(x)				((x) & 0xff)
+
+#define SERDES_START_WAIT_TIMES			100
+
+struct emac_reg_write {
+	unsigned int offset;
+	u32 val;
+};
+
+static void emac_reg_write_all(void __iomem *base,
+			       const struct emac_reg_write *itr, size_t size)
+{
+	size_t i;
+
+	for (i = 0; i < size; ++itr, ++i)
+		writel(itr->val, base + itr->offset);
+}
+
+static const struct emac_reg_write sgmii_laned[] = {
+	/* CDR Settings */
+	{EMAC_SGMII_LN_UCDR_FO_GAIN_MODE0,
+		UCDR_STEP_BY_TWO_MODE0 | UCDR_xO_GAIN_MODE(10)},
+	{EMAC_SGMII_LN_UCDR_SO_GAIN_MODE0, UCDR_xO_GAIN_MODE(0)},
+	{EMAC_SGMII_LN_UCDR_SO_CONFIG, UCDR_ENABLE | UCDR_SO_SATURATION(12)},
+
+	/* TX/RX Settings */
+	{EMAC_SGMII_LN_RX_EN_SIGNAL, SIGDET_LP_BYP_PS4 | SIGDET_EN_PS0_TO_PS2},
+
+	{EMAC_SGMII_LN_DRVR_CTRL0, TXVAL_VALID_INIT | KR_PCIGEN3_MODE},
+	{EMAC_SGMII_LN_DRVR_TAP_EN, MAIN_EN},
+	{EMAC_SGMII_LN_TX_MARGINING, TX_MARGINING_MUX | TX_MARGINING(25)},
+	{EMAC_SGMII_LN_TX_PRE, TX_PRE_MUX},
+	{EMAC_SGMII_LN_TX_POST, TX_POST_MUX},
+
+	{EMAC_SGMII_LN_CML_CTRL_MODE0,
+		CML_GEAR_MODE(1) | CML2CMOS_IBOOST_MODE(1)},
+	{EMAC_SGMII_LN_MIXER_CTRL_MODE0,
+		MIXER_LOADB_MODE(12) | MIXER_DATARATE_MODE(1)},
+	{EMAC_SGMII_LN_VGA_INITVAL, VGA_THRESH_DFE(31)},
+	{EMAC_SGMII_LN_SIGDET_ENABLES,
+		SIGDET_LP_BYP_PS0_TO_PS2 | SIGDET_FLT_BYP},
+	{EMAC_SGMII_LN_SIGDET_CNTRL, SIGDET_LVL(8)},
+
+	{EMAC_SGMII_LN_SIGDET_DEGLITCH_CNTRL, SIGDET_DEGLITCH_CTRL(4)},
+	{EMAC_SGMII_LN_RX_MISC_CNTRL0, INVERT_PCS_RX_CLK},
+	{EMAC_SGMII_LN_DRVR_LOGIC_CLKDIV,
+		DRVR_LOGIC_CLK_EN | DRVR_LOGIC_CLK_DIV(4)},
+
+	{EMAC_SGMII_LN_PARALLEL_RATE, PARALLEL_RATE_MODE0(1)},
+	{EMAC_SGMII_LN_TX_BAND_MODE, BAND_MODE0(1)},
+	{EMAC_SGMII_LN_RX_BAND, BAND_MODE0(2)},
+	{EMAC_SGMII_LN_LANE_MODE, LANE_MODE(26)},
+	{EMAC_SGMII_LN_RX_RCVR_PATH1_MODE0, CDR_PD_SEL_MODE0(2) |
+		EN_DLL_MODE0 | EN_IQ_DCC_MODE0 | EN_IQCAL_MODE0},
+	{EMAC_SGMII_LN_RSM_CONFIG, BYPASS_RSM_SAMP_CAL | BYPASS_RSM_DLL_CAL},
+};
+
+static const struct emac_reg_write physical_coding_sublayer_programming[] = {
+	{EMAC_SGMII_PHY_POW_DWN_CTRL0, PWRDN_B},
+	{EMAC_SGMII_PHY_CDR_CTRL0, CDR_MAX_CNT(15)},
+	{EMAC_SGMII_PHY_TX_PWR_CTRL, 0},
+	{EMAC_SGMII_PHY_LANE_CTRL1, L0_RX_EQUALIZE_ENABLE},
+};
+
+int emac_sgmii_init_qdf2400(struct emac_adapter *adpt)
+{
+	struct emac_phy *phy = &adpt->phy;
+	void __iomem *phy_regs = phy->base;
+	void __iomem *laned = phy->digital;
+	unsigned int i;
+	u32 lnstatus;
+
+	/* PCS lane-x init */
+	emac_reg_write_all(phy->base, physical_coding_sublayer_programming,
+			   ARRAY_SIZE(physical_coding_sublayer_programming));
+
+	/* SGMII lane-x init */
+	emac_reg_write_all(phy->digital, sgmii_laned, ARRAY_SIZE(sgmii_laned));
+
+	/* Power up PCS and start reset lane state machine */
+
+	writel(0, phy_regs + EMAC_SGMII_PHY_RESET_CTRL);
+	writel(1, laned + SGMII_LN_RSM_START);
+
+	/* Wait for c_ready assertion */
+	for (i = 0; i < SERDES_START_WAIT_TIMES; i++) {
+		lnstatus = readl(phy_regs + SGMII_PHY_LN_LANE_STATUS);
+		if (lnstatus & BIT(1))
+			break;
+		usleep_range(100, 200);
+	}
+
+	if (i == SERDES_START_WAIT_TIMES) {
+		netdev_err(adpt->netdev, "SGMII failed to start\n");
+		return -EIO;
+	}
+
+	/* Disable digital and SERDES loopback */
+	writel(0, phy_regs + SGMII_PHY_LN_BIST_GEN0);
+	writel(0, phy_regs + SGMII_PHY_LN_BIST_GEN2);
+	writel(0, phy_regs + SGMII_PHY_LN_CDR_CTRL1);
+
+	/* Mask out all the SGMII Interrupt */
+	writel(0, phy_regs + EMAC_SGMII_PHY_INTERRUPT_MASK);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii-qdf2432.c b/drivers/net/ethernet/qualcomm/emac/emac-sgmii-qdf2432.c
new file mode 100644
index 000000000000..6170200d7479
--- /dev/null
+++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii-qdf2432.c
@@ -0,0 +1,210 @@
+/* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/* Qualcomm Technologies, Inc. QDF2432 EMAC SGMII Controller driver.
+ */
+
+#include <linux/iopoll.h>
+#include "emac.h"
+
+/* EMAC_SGMII register offsets */
+#define EMAC_SGMII_PHY_TX_PWR_CTRL		0x000C
+#define EMAC_SGMII_PHY_LANE_CTRL1		0x0018
+#define EMAC_SGMII_PHY_CDR_CTRL0		0x0058
+#define EMAC_SGMII_PHY_POW_DWN_CTRL0		0x0080
+#define EMAC_SGMII_PHY_RESET_CTRL		0x00a8
+#define EMAC_SGMII_PHY_INTERRUPT_MASK		0x00b4
+
+/* SGMII digital lane registers */
+#define EMAC_SGMII_LN_DRVR_CTRL0		0x000C
+#define EMAC_SGMII_LN_DRVR_TAP_EN		0x0018
+#define EMAC_SGMII_LN_TX_MARGINING		0x001C
+#define EMAC_SGMII_LN_TX_PRE			0x0020
+#define EMAC_SGMII_LN_TX_POST			0x0024
+#define EMAC_SGMII_LN_TX_BAND_MODE		0x0060
+#define EMAC_SGMII_LN_LANE_MODE			0x0064
+#define EMAC_SGMII_LN_PARALLEL_RATE		0x0078
+#define EMAC_SGMII_LN_CML_CTRL_MODE0		0x00B8
+#define EMAC_SGMII_LN_MIXER_CTRL_MODE0		0x00D0
+#define EMAC_SGMII_LN_VGA_INITVAL		0x0134
+#define EMAC_SGMII_LN_UCDR_FO_GAIN_MODE0	0x017C
+#define EMAC_SGMII_LN_UCDR_SO_GAIN_MODE0	0x0188
+#define EMAC_SGMII_LN_UCDR_SO_CONFIG		0x0194
+#define EMAC_SGMII_LN_RX_BAND			0x019C
+#define EMAC_SGMII_LN_RX_RCVR_PATH1_MODE0	0x01B8
+#define EMAC_SGMII_LN_RSM_CONFIG		0x01F0
+#define EMAC_SGMII_LN_SIGDET_ENABLES		0x0224
+#define EMAC_SGMII_LN_SIGDET_CNTRL		0x0228
+#define EMAC_SGMII_LN_SIGDET_DEGLITCH_CNTRL	0x022C
+#define EMAC_SGMII_LN_RX_EN_SIGNAL		0x02A0
+#define EMAC_SGMII_LN_RX_MISC_CNTRL0		0x02AC
+#define EMAC_SGMII_LN_DRVR_LOGIC_CLKDIV		0x02BC
+
+/* SGMII digital lane register values */
+#define UCDR_STEP_BY_TWO_MODE0			BIT(7)
+#define UCDR_xO_GAIN_MODE(x)			((x) & 0x7f)
+#define UCDR_ENABLE				BIT(6)
+#define UCDR_SO_SATURATION(x)			((x) & 0x3f)
+
+#define SIGDET_LP_BYP_PS4			BIT(7)
+#define SIGDET_EN_PS0_TO_PS2			BIT(6)
+
+#define TXVAL_VALID_INIT			BIT(4)
+#define KR_PCIGEN3_MODE				BIT(0)
+
+#define MAIN_EN					BIT(0)
+
+#define TX_MARGINING_MUX			BIT(6)
+#define TX_MARGINING(x)				((x) & 0x3f)
+
+#define TX_PRE_MUX				BIT(6)
+
+#define TX_POST_MUX				BIT(6)
+
+#define CML_GEAR_MODE(x)			(((x) & 7) << 3)
+#define CML2CMOS_IBOOST_MODE(x)			((x) & 7)
+
+#define MIXER_LOADB_MODE(x)			(((x) & 0xf) << 2)
+#define MIXER_DATARATE_MODE(x)			((x) & 3)
+
+#define VGA_THRESH_DFE(x)			((x) & 0x3f)
+
+#define SIGDET_LP_BYP_PS0_TO_PS2		BIT(5)
+#define SIGDET_FLT_BYP				BIT(0)
+
+#define SIGDET_LVL(x)				(((x) & 0xf) << 4)
+
+#define SIGDET_DEGLITCH_CTRL(x)			(((x) & 0xf) << 1)
+
+#define DRVR_LOGIC_CLK_EN			BIT(4)
+#define DRVR_LOGIC_CLK_DIV(x)			((x) & 0xf)
+
+#define PARALLEL_RATE_MODE0(x)			((x) & 0x3)
+
+#define BAND_MODE0(x)				((x) & 0x3)
+
+#define LANE_MODE(x)				((x) & 0x1f)
+
+#define CDR_PD_SEL_MODE0(x)			(((x) & 0x3) << 5)
+#define BYPASS_RSM_SAMP_CAL			BIT(1)
+#define BYPASS_RSM_DLL_CAL			BIT(0)
+
+#define L0_RX_EQUALIZE_ENABLE			BIT(6)
+
+#define PWRDN_B					BIT(0)
+
+#define CDR_MAX_CNT(x)				((x) & 0xff)
+
+#define SERDES_START_WAIT_TIMES			100
+
+struct emac_reg_write {
+	unsigned int offset;
+	u32 val;
+};
+
+static void emac_reg_write_all(void __iomem *base,
+			       const struct emac_reg_write *itr, size_t size)
+{
+	size_t i;
+
+	for (i = 0; i < size; ++itr, ++i)
+		writel(itr->val, base + itr->offset);
+}
+
+static const struct emac_reg_write sgmii_laned[] = {
+	/* CDR Settings */
+	{EMAC_SGMII_LN_UCDR_FO_GAIN_MODE0,
+		UCDR_STEP_BY_TWO_MODE0 | UCDR_xO_GAIN_MODE(10)},
+	{EMAC_SGMII_LN_UCDR_SO_GAIN_MODE0, UCDR_xO_GAIN_MODE(0)},
+	{EMAC_SGMII_LN_UCDR_SO_CONFIG, UCDR_ENABLE | UCDR_SO_SATURATION(12)},
+
+	/* TX/RX Settings */
+	{EMAC_SGMII_LN_RX_EN_SIGNAL, SIGDET_LP_BYP_PS4 | SIGDET_EN_PS0_TO_PS2},
+
+	{EMAC_SGMII_LN_DRVR_CTRL0, TXVAL_VALID_INIT | KR_PCIGEN3_MODE},
+	{EMAC_SGMII_LN_DRVR_TAP_EN, MAIN_EN},
+	{EMAC_SGMII_LN_TX_MARGINING, TX_MARGINING_MUX | TX_MARGINING(25)},
+	{EMAC_SGMII_LN_TX_PRE, TX_PRE_MUX},
+	{EMAC_SGMII_LN_TX_POST, TX_POST_MUX},
+
+	{EMAC_SGMII_LN_CML_CTRL_MODE0,
+		CML_GEAR_MODE(1) | CML2CMOS_IBOOST_MODE(1)},
+	{EMAC_SGMII_LN_MIXER_CTRL_MODE0,
+		MIXER_LOADB_MODE(12) | MIXER_DATARATE_MODE(1)},
+	{EMAC_SGMII_LN_VGA_INITVAL, VGA_THRESH_DFE(31)},
+	{EMAC_SGMII_LN_SIGDET_ENABLES,
+		SIGDET_LP_BYP_PS0_TO_PS2 | SIGDET_FLT_BYP},
+	{EMAC_SGMII_LN_SIGDET_CNTRL, SIGDET_LVL(8)},
+
+	{EMAC_SGMII_LN_SIGDET_DEGLITCH_CNTRL, SIGDET_DEGLITCH_CTRL(4)},
+	{EMAC_SGMII_LN_RX_MISC_CNTRL0, 0},
+	{EMAC_SGMII_LN_DRVR_LOGIC_CLKDIV,
+		DRVR_LOGIC_CLK_EN | DRVR_LOGIC_CLK_DIV(4)},
+
+	{EMAC_SGMII_LN_PARALLEL_RATE, PARALLEL_RATE_MODE0(1)},
+	{EMAC_SGMII_LN_TX_BAND_MODE, BAND_MODE0(2)},
+	{EMAC_SGMII_LN_RX_BAND, BAND_MODE0(3)},
+	{EMAC_SGMII_LN_LANE_MODE, LANE_MODE(26)},
+	{EMAC_SGMII_LN_RX_RCVR_PATH1_MODE0, CDR_PD_SEL_MODE0(3)},
+	{EMAC_SGMII_LN_RSM_CONFIG, BYPASS_RSM_SAMP_CAL | BYPASS_RSM_DLL_CAL},
+};
+
+static const struct emac_reg_write physical_coding_sublayer_programming[] = {
+	{EMAC_SGMII_PHY_POW_DWN_CTRL0, PWRDN_B},
+	{EMAC_SGMII_PHY_CDR_CTRL0, CDR_MAX_CNT(15)},
+	{EMAC_SGMII_PHY_TX_PWR_CTRL, 0},
+	{EMAC_SGMII_PHY_LANE_CTRL1, L0_RX_EQUALIZE_ENABLE},
+};
+
+int emac_sgmii_init_qdf2432(struct emac_adapter *adpt)
+{
+	struct emac_phy *phy = &adpt->phy;
+	void __iomem *phy_regs = phy->base;
+	void __iomem *laned = phy->digital;
+	unsigned int i;
+	u32 lnstatus;
+
+	/* PCS lane-x init */
+	emac_reg_write_all(phy->base, physical_coding_sublayer_programming,
+			   ARRAY_SIZE(physical_coding_sublayer_programming));
+
+	/* SGMII lane-x init */
+	emac_reg_write_all(phy->digital, sgmii_laned, ARRAY_SIZE(sgmii_laned));
+
+	/* Power up PCS and start reset lane state machine */
+
+	writel(0, phy_regs + EMAC_SGMII_PHY_RESET_CTRL);
+	writel(1, laned + SGMII_LN_RSM_START);
+
+	/* Wait for c_ready assertion */
+	for (i = 0; i < SERDES_START_WAIT_TIMES; i++) {
+		lnstatus = readl(phy_regs + SGMII_PHY_LN_LANE_STATUS);
+		if (lnstatus & BIT(1))
+			break;
+		usleep_range(100, 200);
+	}
+
+	if (i == SERDES_START_WAIT_TIMES) {
+		netdev_err(adpt->netdev, "SGMII failed to start\n");
+		return -EIO;
+	}
+
+	/* Disable digital and SERDES loopback */
+	writel(0, phy_regs + SGMII_PHY_LN_BIST_GEN0);
+	writel(0, phy_regs + SGMII_PHY_LN_BIST_GEN2);
+	writel(0, phy_regs + SGMII_PHY_LN_CDR_CTRL1);
+
+	/* Mask out all the SGMII Interrupt */
+	writel(0, phy_regs + EMAC_SGMII_PHY_INTERRUPT_MASK);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
index 72fe343c7a36..bf722a9bb09d 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c
@@ -20,448 +20,33 @@
 #include "emac-mac.h"
 #include "emac-sgmii.h"
 
-/* EMAC_QSERDES register offsets */
-#define EMAC_QSERDES_COM_SYS_CLK_CTRL		0x000000
-#define EMAC_QSERDES_COM_PLL_CNTRL		0x000014
-#define EMAC_QSERDES_COM_PLL_IP_SETI		0x000018
-#define EMAC_QSERDES_COM_PLL_CP_SETI		0x000024
-#define EMAC_QSERDES_COM_PLL_IP_SETP		0x000028
-#define EMAC_QSERDES_COM_PLL_CP_SETP		0x00002c
-#define EMAC_QSERDES_COM_SYSCLK_EN_SEL		0x000038
-#define EMAC_QSERDES_COM_RESETSM_CNTRL		0x000040
-#define EMAC_QSERDES_COM_PLLLOCK_CMP1		0x000044
-#define EMAC_QSERDES_COM_PLLLOCK_CMP2		0x000048
-#define EMAC_QSERDES_COM_PLLLOCK_CMP3		0x00004c
-#define EMAC_QSERDES_COM_PLLLOCK_CMP_EN		0x000050
-#define EMAC_QSERDES_COM_DEC_START1		0x000064
-#define EMAC_QSERDES_COM_DIV_FRAC_START1	0x000098
-#define EMAC_QSERDES_COM_DIV_FRAC_START2	0x00009c
-#define EMAC_QSERDES_COM_DIV_FRAC_START3	0x0000a0
-#define EMAC_QSERDES_COM_DEC_START2		0x0000a4
-#define EMAC_QSERDES_COM_PLL_CRCTRL		0x0000ac
-#define EMAC_QSERDES_COM_RESET_SM		0x0000bc
-#define EMAC_QSERDES_TX_BIST_MODE_LANENO	0x000100
-#define EMAC_QSERDES_TX_TX_EMP_POST1_LVL	0x000108
-#define EMAC_QSERDES_TX_TX_DRV_LVL		0x00010c
-#define EMAC_QSERDES_TX_LANE_MODE		0x000150
-#define EMAC_QSERDES_TX_TRAN_DRVR_EMP_EN	0x000170
-#define EMAC_QSERDES_RX_CDR_CONTROL		0x000200
-#define EMAC_QSERDES_RX_CDR_CONTROL2		0x000210
-#define EMAC_QSERDES_RX_RX_EQ_GAIN12		0x000230
-
 /* EMAC_SGMII register offsets */
-#define EMAC_SGMII_PHY_SERDES_START		0x000000
-#define EMAC_SGMII_PHY_CMN_PWR_CTRL		0x000004
-#define EMAC_SGMII_PHY_RX_PWR_CTRL		0x000008
-#define EMAC_SGMII_PHY_TX_PWR_CTRL		0x00000C
-#define EMAC_SGMII_PHY_LANE_CTRL1		0x000018
-#define EMAC_SGMII_PHY_AUTONEG_CFG2		0x000048
-#define EMAC_SGMII_PHY_CDR_CTRL0		0x000058
-#define EMAC_SGMII_PHY_SPEED_CFG1		0x000074
-#define EMAC_SGMII_PHY_POW_DWN_CTRL0		0x000080
-#define EMAC_SGMII_PHY_RESET_CTRL		0x0000a8
-#define EMAC_SGMII_PHY_IRQ_CMD			0x0000ac
-#define EMAC_SGMII_PHY_INTERRUPT_CLEAR		0x0000b0
-#define EMAC_SGMII_PHY_INTERRUPT_MASK		0x0000b4
-#define EMAC_SGMII_PHY_INTERRUPT_STATUS		0x0000b8
-#define EMAC_SGMII_PHY_RX_CHK_STATUS		0x0000d4
-#define EMAC_SGMII_PHY_AUTONEG0_STATUS		0x0000e0
-#define EMAC_SGMII_PHY_AUTONEG1_STATUS		0x0000e4
-
-/* EMAC_QSERDES_COM_PLL_IP_SETI */
-#define PLL_IPSETI(x)				((x) & 0x3f)
-
-/* EMAC_QSERDES_COM_PLL_CP_SETI */
-#define PLL_CPSETI(x)				((x) & 0xff)
-
-/* EMAC_QSERDES_COM_PLL_IP_SETP */
-#define PLL_IPSETP(x)				((x) & 0x3f)
-
-/* EMAC_QSERDES_COM_PLL_CP_SETP */
-#define PLL_CPSETP(x)				((x) & 0x1f)
-
-/* EMAC_QSERDES_COM_PLL_CRCTRL */
-#define PLL_RCTRL(x)				(((x) & 0xf) << 4)
-#define PLL_CCTRL(x)				((x) & 0xf)
-
-/* SGMII v2 PHY registers per lane */
-#define EMAC_SGMII_PHY_LN_OFFSET		0x0400
-
-/* SGMII v2 digital lane registers */
-#define EMAC_SGMII_LN_DRVR_CTRL0		0x00C
-#define EMAC_SGMII_LN_DRVR_TAP_EN		0x018
-#define EMAC_SGMII_LN_TX_MARGINING		0x01C
-#define EMAC_SGMII_LN_TX_PRE			0x020
-#define EMAC_SGMII_LN_TX_POST			0x024
-#define EMAC_SGMII_LN_TX_BAND_MODE		0x060
-#define EMAC_SGMII_LN_LANE_MODE			0x064
-#define EMAC_SGMII_LN_PARALLEL_RATE		0x078
-#define EMAC_SGMII_LN_CML_CTRL_MODE0		0x0B8
-#define EMAC_SGMII_LN_MIXER_CTRL_MODE0		0x0D0
-#define EMAC_SGMII_LN_VGA_INITVAL		0x134
-#define EMAC_SGMII_LN_UCDR_FO_GAIN_MODE0	0x17C
-#define EMAC_SGMII_LN_UCDR_SO_GAIN_MODE0	0x188
-#define EMAC_SGMII_LN_UCDR_SO_CONFIG		0x194
-#define EMAC_SGMII_LN_RX_BAND			0x19C
-#define EMAC_SGMII_LN_RX_RCVR_PATH1_MODE0	0x1B8
-#define EMAC_SGMII_LN_RSM_CONFIG		0x1F0
-#define EMAC_SGMII_LN_SIGDET_ENABLES		0x224
-#define EMAC_SGMII_LN_SIGDET_CNTRL		0x228
-#define EMAC_SGMII_LN_SIGDET_DEGLITCH_CNTRL	0x22C
-#define EMAC_SGMII_LN_RX_EN_SIGNAL		0x2A0
-#define EMAC_SGMII_LN_RX_MISC_CNTRL0		0x2AC
-#define EMAC_SGMII_LN_DRVR_LOGIC_CLKDIV		0x2BC
-
-/* SGMII v2 digital lane register values */
-#define UCDR_STEP_BY_TWO_MODE0			BIT(7)
-#define UCDR_xO_GAIN_MODE(x)			((x) & 0x7f)
-#define UCDR_ENABLE				BIT(6)
-#define UCDR_SO_SATURATION(x)			((x) & 0x3f)
-#define SIGDET_LP_BYP_PS4			BIT(7)
-#define SIGDET_EN_PS0_TO_PS2			BIT(6)
-#define EN_ACCOUPLEVCM_SW_MUX			BIT(5)
-#define EN_ACCOUPLEVCM_SW			BIT(4)
-#define RX_SYNC_EN				BIT(3)
-#define RXTERM_HIGHZ_PS5			BIT(2)
-#define SIGDET_EN_PS3				BIT(1)
-#define EN_ACCOUPLE_VCM_PS3			BIT(0)
-#define UFS_MODE				BIT(5)
-#define TXVAL_VALID_INIT			BIT(4)
-#define TXVAL_VALID_MUX				BIT(3)
-#define TXVAL_VALID				BIT(2)
-#define USB3P1_MODE				BIT(1)
-#define KR_PCIGEN3_MODE				BIT(0)
-#define PRE_EN					BIT(3)
-#define POST_EN					BIT(2)
-#define MAIN_EN_MUX				BIT(1)
-#define MAIN_EN					BIT(0)
-#define TX_MARGINING_MUX			BIT(6)
-#define TX_MARGINING(x)				((x) & 0x3f)
-#define TX_PRE_MUX				BIT(6)
-#define TX_PRE(x)				((x) & 0x3f)
-#define TX_POST_MUX				BIT(6)
-#define TX_POST(x)				((x) & 0x3f)
-#define CML_GEAR_MODE(x)			(((x) & 7) << 3)
-#define CML2CMOS_IBOOST_MODE(x)			((x) & 7)
-#define MIXER_LOADB_MODE(x)			(((x) & 0xf) << 2)
-#define MIXER_DATARATE_MODE(x)			((x) & 3)
-#define VGA_THRESH_DFE(x)			((x) & 0x3f)
-#define SIGDET_LP_BYP_PS0_TO_PS2		BIT(5)
-#define SIGDET_LP_BYP_MUX			BIT(4)
-#define SIGDET_LP_BYP				BIT(3)
-#define SIGDET_EN_MUX				BIT(2)
-#define SIGDET_EN				BIT(1)
-#define SIGDET_FLT_BYP				BIT(0)
-#define SIGDET_LVL(x)				(((x) & 0xf) << 4)
-#define SIGDET_BW_CTRL(x)			((x) & 0xf)
-#define SIGDET_DEGLITCH_CTRL(x)			(((x) & 0xf) << 1)
-#define SIGDET_DEGLITCH_BYP			BIT(0)
-#define INVERT_PCS_RX_CLK			BIT(7)
-#define PWM_EN					BIT(6)
-#define RXBIAS_SEL(x)				(((x) & 0x3) << 4)
-#define EBDAC_SIGN				BIT(3)
-#define EDAC_SIGN				BIT(2)
-#define EN_AUXTAP1SIGN_INVERT			BIT(1)
-#define EN_DAC_CHOPPING				BIT(0)
-#define DRVR_LOGIC_CLK_EN			BIT(4)
-#define DRVR_LOGIC_CLK_DIV(x)			((x) & 0xf)
-#define PARALLEL_RATE_MODE2(x)			(((x) & 0x3) << 4)
-#define PARALLEL_RATE_MODE1(x)			(((x) & 0x3) << 2)
-#define PARALLEL_RATE_MODE0(x)			((x) & 0x3)
-#define BAND_MODE2(x)				(((x) & 0x3) << 4)
-#define BAND_MODE1(x)				(((x) & 0x3) << 2)
-#define BAND_MODE0(x)				((x) & 0x3)
-#define LANE_SYNC_MODE				BIT(5)
-#define LANE_MODE(x)				((x) & 0x1f)
-#define CDR_PD_SEL_MODE0(x)			(((x) & 0x3) << 5)
-#define EN_DLL_MODE0				BIT(4)
-#define EN_IQ_DCC_MODE0				BIT(3)
-#define EN_IQCAL_MODE0				BIT(2)
-#define EN_QPATH_MODE0				BIT(1)
-#define EN_EPATH_MODE0				BIT(0)
-#define FORCE_TSYNC_ACK				BIT(7)
-#define FORCE_CMN_ACK				BIT(6)
-#define FORCE_CMN_READY				BIT(5)
-#define EN_RCLK_DEGLITCH			BIT(4)
-#define BYPASS_RSM_CDR_RESET			BIT(3)
-#define BYPASS_RSM_TSYNC			BIT(2)
-#define BYPASS_RSM_SAMP_CAL			BIT(1)
-#define BYPASS_RSM_DLL_CAL			BIT(0)
-
-/* EMAC_QSERDES_COM_SYS_CLK_CTRL */
-#define SYSCLK_CM				BIT(4)
-#define SYSCLK_AC_COUPLE			BIT(3)
-
-/* EMAC_QSERDES_COM_PLL_CNTRL */
-#define OCP_EN					BIT(5)
-#define PLL_DIV_FFEN				BIT(2)
-#define PLL_DIV_ORD				BIT(1)
-
-/* EMAC_QSERDES_COM_SYSCLK_EN_SEL */
-#define SYSCLK_SEL_CMOS				BIT(3)
-
-/* EMAC_QSERDES_COM_RESETSM_CNTRL */
-#define FRQ_TUNE_MODE				BIT(4)
-
-/* EMAC_QSERDES_COM_PLLLOCK_CMP_EN */
-#define PLLLOCK_CMP_EN				BIT(0)
-
-/* EMAC_QSERDES_COM_DEC_START1 */
-#define DEC_START1_MUX				BIT(7)
-#define DEC_START1(x)				((x) & 0x7f)
-
-/* EMAC_QSERDES_COM_DIV_FRAC_START1 * EMAC_QSERDES_COM_DIV_FRAC_START2 */
-#define DIV_FRAC_START_MUX			BIT(7)
-#define DIV_FRAC_START(x)			((x) & 0x7f)
-
-/* EMAC_QSERDES_COM_DIV_FRAC_START3 */
-#define DIV_FRAC_START3_MUX			BIT(4)
-#define DIV_FRAC_START3(x)			((x) & 0xf)
-
-/* EMAC_QSERDES_COM_DEC_START2 */
-#define DEC_START2_MUX				BIT(1)
-#define DEC_START2				BIT(0)
-
-/* EMAC_QSERDES_COM_RESET_SM */
-#define READY					BIT(5)
-
-/* EMAC_QSERDES_TX_TX_EMP_POST1_LVL */
-#define TX_EMP_POST1_LVL_MUX			BIT(5)
-#define TX_EMP_POST1_LVL(x)			((x) & 0x1f)
-#define TX_EMP_POST1_LVL_BMSK			0x1f
-#define TX_EMP_POST1_LVL_SHFT			0
-
-/* EMAC_QSERDES_TX_TX_DRV_LVL */
-#define TX_DRV_LVL_MUX				BIT(4)
-#define TX_DRV_LVL(x)				((x) & 0xf)
-
-/* EMAC_QSERDES_TX_TRAN_DRVR_EMP_EN */
-#define EMP_EN_MUX				BIT(1)
-#define EMP_EN					BIT(0)
-
-/* EMAC_QSERDES_RX_CDR_CONTROL & EMAC_QSERDES_RX_CDR_CONTROL2 */
-#define HBW_PD_EN				BIT(7)
-#define SECONDORDERENABLE			BIT(6)
-#define FIRSTORDER_THRESH(x)			(((x) & 0x7) << 3)
-#define SECONDORDERGAIN(x)			((x) & 0x7)
-
-/* EMAC_QSERDES_RX_RX_EQ_GAIN12 */
-#define RX_EQ_GAIN2(x)				(((x) & 0xf) << 4)
-#define RX_EQ_GAIN1(x)				((x) & 0xf)
-
-/* EMAC_SGMII_PHY_SERDES_START */
-#define SERDES_START				BIT(0)
-
-/* EMAC_SGMII_PHY_CMN_PWR_CTRL */
-#define BIAS_EN					BIT(6)
-#define PLL_EN					BIT(5)
-#define SYSCLK_EN				BIT(4)
-#define CLKBUF_L_EN				BIT(3)
-#define PLL_TXCLK_EN				BIT(1)
-#define PLL_RXCLK_EN				BIT(0)
-
-/* EMAC_SGMII_PHY_RX_PWR_CTRL */
-#define L0_RX_SIGDET_EN				BIT(7)
-#define L0_RX_TERM_MODE(x)			(((x) & 3) << 4)
-#define L0_RX_I_EN				BIT(1)
-
-/* EMAC_SGMII_PHY_TX_PWR_CTRL */
-#define L0_TX_EN				BIT(5)
-#define L0_CLKBUF_EN				BIT(4)
-#define L0_TRAN_BIAS_EN				BIT(1)
-
-/* EMAC_SGMII_PHY_LANE_CTRL1 */
-#define L0_RX_EQUALIZE_ENABLE			BIT(6)
-#define L0_RESET_TSYNC_EN			BIT(4)
-#define L0_DRV_LVL(x)				((x) & 0xf)
-
-/* EMAC_SGMII_PHY_AUTONEG_CFG2 */
+#define EMAC_SGMII_PHY_AUTONEG_CFG2		0x0048
+#define EMAC_SGMII_PHY_SPEED_CFG1		0x0074
+#define EMAC_SGMII_PHY_IRQ_CMD			0x00ac
+#define EMAC_SGMII_PHY_INTERRUPT_CLEAR		0x00b0
+#define EMAC_SGMII_PHY_INTERRUPT_STATUS		0x00b8
+
 #define FORCE_AN_TX_CFG				BIT(5)
 #define FORCE_AN_RX_CFG				BIT(4)
 #define AN_ENABLE				BIT(0)
 
-/* EMAC_SGMII_PHY_SPEED_CFG1 */
 #define DUPLEX_MODE				BIT(4)
 #define SPDMODE_1000				BIT(1)
 #define SPDMODE_100				BIT(0)
 #define SPDMODE_10				0
-#define SPDMODE_BMSK				3
-#define SPDMODE_SHFT				0
-
-/* EMAC_SGMII_PHY_POW_DWN_CTRL0 */
-#define PWRDN_B					BIT(0)
-#define CDR_MAX_CNT(x)				((x) & 0xff)
-
-/* EMAC_QSERDES_TX_BIST_MODE_LANENO */
-#define BIST_LANE_NUMBER(x)			(((x) & 3) << 5)
-#define BISTMODE(x)				((x) & 0x1f)
-
-/* EMAC_QSERDES_COM_PLLLOCK_CMPx */
-#define PLLLOCK_CMP(x)				((x) & 0xff)
 
-/* EMAC_SGMII_PHY_RESET_CTRL */
-#define PHY_SW_RESET				BIT(0)
-
-/* EMAC_SGMII_PHY_IRQ_CMD */
 #define IRQ_GLOBAL_CLEAR			BIT(0)
 
-/* EMAC_SGMII_PHY_INTERRUPT_MASK */
 #define DECODE_CODE_ERR				BIT(7)
 #define DECODE_DISP_ERR				BIT(6)
-#define PLL_UNLOCK				BIT(5)
-#define AN_ILLEGAL_TERM				BIT(4)
-#define SYNC_FAIL				BIT(3)
-#define AN_START				BIT(2)
-#define AN_END					BIT(1)
-#define AN_REQUEST				BIT(0)
 
 #define SGMII_PHY_IRQ_CLR_WAIT_TIME		10
 
-#define SGMII_PHY_INTERRUPT_ERR (\
-	DECODE_CODE_ERR         |\
-	DECODE_DISP_ERR)
-
-#define SGMII_ISR_AN_MASK       (\
-	AN_REQUEST              |\
-	AN_START                |\
-	AN_END                  |\
-	AN_ILLEGAL_TERM         |\
-	PLL_UNLOCK              |\
-	SYNC_FAIL)
-
-#define SGMII_ISR_MASK          (\
-	SGMII_PHY_INTERRUPT_ERR |\
-	SGMII_ISR_AN_MASK)
-
-/* SGMII TX_CONFIG */
-#define TXCFG_LINK				0x8000
-#define TXCFG_MODE_BMSK				0x1c00
-#define TXCFG_1000_FULL				0x1800
-#define TXCFG_100_FULL				0x1400
-#define TXCFG_100_HALF				0x0400
-#define TXCFG_10_FULL				0x1000
-#define TXCFG_10_HALF				0x0000
+#define SGMII_PHY_INTERRUPT_ERR		(DECODE_CODE_ERR | DECODE_DISP_ERR)
 
 #define SERDES_START_WAIT_TIMES			100
 
-struct emac_reg_write {
-	unsigned int offset;
-	u32 val;
-};
-
-static void emac_reg_write_all(void __iomem *base,
-			       const struct emac_reg_write *itr, size_t size)
-{
-	size_t i;
-
-	for (i = 0; i < size; ++itr, ++i)
-		writel(itr->val, base + itr->offset);
-}
-
-static const struct emac_reg_write physical_coding_sublayer_programming_v1[] = {
-	{EMAC_SGMII_PHY_CDR_CTRL0, CDR_MAX_CNT(15)},
-	{EMAC_SGMII_PHY_POW_DWN_CTRL0, PWRDN_B},
-	{EMAC_SGMII_PHY_CMN_PWR_CTRL,
-		BIAS_EN | SYSCLK_EN | CLKBUF_L_EN | PLL_TXCLK_EN | PLL_RXCLK_EN},
-	{EMAC_SGMII_PHY_TX_PWR_CTRL, L0_TX_EN | L0_CLKBUF_EN | L0_TRAN_BIAS_EN},
-	{EMAC_SGMII_PHY_RX_PWR_CTRL,
-		L0_RX_SIGDET_EN | L0_RX_TERM_MODE(1) | L0_RX_I_EN},
-	{EMAC_SGMII_PHY_CMN_PWR_CTRL,
-		BIAS_EN | PLL_EN | SYSCLK_EN | CLKBUF_L_EN | PLL_TXCLK_EN |
-		PLL_RXCLK_EN},
-	{EMAC_SGMII_PHY_LANE_CTRL1,
-		L0_RX_EQUALIZE_ENABLE | L0_RESET_TSYNC_EN | L0_DRV_LVL(15)},
-};
-
-static const struct emac_reg_write sysclk_refclk_setting[] = {
-	{EMAC_QSERDES_COM_SYSCLK_EN_SEL, SYSCLK_SEL_CMOS},
-	{EMAC_QSERDES_COM_SYS_CLK_CTRL,	SYSCLK_CM | SYSCLK_AC_COUPLE},
-};
-
-static const struct emac_reg_write pll_setting[] = {
-	{EMAC_QSERDES_COM_PLL_IP_SETI, PLL_IPSETI(1)},
-	{EMAC_QSERDES_COM_PLL_CP_SETI, PLL_CPSETI(59)},
-	{EMAC_QSERDES_COM_PLL_IP_SETP, PLL_IPSETP(10)},
-	{EMAC_QSERDES_COM_PLL_CP_SETP, PLL_CPSETP(9)},
-	{EMAC_QSERDES_COM_PLL_CRCTRL, PLL_RCTRL(15) | PLL_CCTRL(11)},
-	{EMAC_QSERDES_COM_PLL_CNTRL, OCP_EN | PLL_DIV_FFEN | PLL_DIV_ORD},
-	{EMAC_QSERDES_COM_DEC_START1, DEC_START1_MUX | DEC_START1(2)},
-	{EMAC_QSERDES_COM_DEC_START2, DEC_START2_MUX | DEC_START2},
-	{EMAC_QSERDES_COM_DIV_FRAC_START1,
-		DIV_FRAC_START_MUX | DIV_FRAC_START(85)},
-	{EMAC_QSERDES_COM_DIV_FRAC_START2,
-		DIV_FRAC_START_MUX | DIV_FRAC_START(42)},
-	{EMAC_QSERDES_COM_DIV_FRAC_START3,
-		DIV_FRAC_START3_MUX | DIV_FRAC_START3(3)},
-	{EMAC_QSERDES_COM_PLLLOCK_CMP1, PLLLOCK_CMP(43)},
-	{EMAC_QSERDES_COM_PLLLOCK_CMP2, PLLLOCK_CMP(104)},
-	{EMAC_QSERDES_COM_PLLLOCK_CMP3, PLLLOCK_CMP(0)},
-	{EMAC_QSERDES_COM_PLLLOCK_CMP_EN, PLLLOCK_CMP_EN},
-	{EMAC_QSERDES_COM_RESETSM_CNTRL, FRQ_TUNE_MODE},
-};
-
-static const struct emac_reg_write cdr_setting[] = {
-	{EMAC_QSERDES_RX_CDR_CONTROL,
-		SECONDORDERENABLE | FIRSTORDER_THRESH(3) | SECONDORDERGAIN(2)},
-	{EMAC_QSERDES_RX_CDR_CONTROL2,
-		SECONDORDERENABLE | FIRSTORDER_THRESH(3) | SECONDORDERGAIN(4)},
-};
-
-static const struct emac_reg_write tx_rx_setting[] = {
-	{EMAC_QSERDES_TX_BIST_MODE_LANENO, 0},
-	{EMAC_QSERDES_TX_TX_DRV_LVL, TX_DRV_LVL_MUX | TX_DRV_LVL(15)},
-	{EMAC_QSERDES_TX_TRAN_DRVR_EMP_EN, EMP_EN_MUX | EMP_EN},
-	{EMAC_QSERDES_TX_TX_EMP_POST1_LVL,
-		TX_EMP_POST1_LVL_MUX | TX_EMP_POST1_LVL(1)},
-	{EMAC_QSERDES_RX_RX_EQ_GAIN12, RX_EQ_GAIN2(15) | RX_EQ_GAIN1(15)},
-	{EMAC_QSERDES_TX_LANE_MODE, LANE_MODE(8)},
-};
-
-static const struct emac_reg_write sgmii_v2_laned[] = {
-	/* CDR Settings */
-	{EMAC_SGMII_LN_UCDR_FO_GAIN_MODE0,
-		UCDR_STEP_BY_TWO_MODE0 | UCDR_xO_GAIN_MODE(10)},
-	{EMAC_SGMII_LN_UCDR_SO_GAIN_MODE0, UCDR_xO_GAIN_MODE(0)},
-	{EMAC_SGMII_LN_UCDR_SO_CONFIG, UCDR_ENABLE | UCDR_SO_SATURATION(12)},
-
-	/* TX/RX Settings */
-	{EMAC_SGMII_LN_RX_EN_SIGNAL, SIGDET_LP_BYP_PS4 | SIGDET_EN_PS0_TO_PS2},
-
-	{EMAC_SGMII_LN_DRVR_CTRL0, TXVAL_VALID_INIT | KR_PCIGEN3_MODE},
-	{EMAC_SGMII_LN_DRVR_TAP_EN, MAIN_EN},
-	{EMAC_SGMII_LN_TX_MARGINING, TX_MARGINING_MUX | TX_MARGINING(25)},
-	{EMAC_SGMII_LN_TX_PRE, TX_PRE_MUX},
-	{EMAC_SGMII_LN_TX_POST, TX_POST_MUX},
-
-	{EMAC_SGMII_LN_CML_CTRL_MODE0,
-		CML_GEAR_MODE(1) | CML2CMOS_IBOOST_MODE(1)},
-	{EMAC_SGMII_LN_MIXER_CTRL_MODE0,
-		MIXER_LOADB_MODE(12) | MIXER_DATARATE_MODE(1)},
-	{EMAC_SGMII_LN_VGA_INITVAL, VGA_THRESH_DFE(31)},
-	{EMAC_SGMII_LN_SIGDET_ENABLES,
-		SIGDET_LP_BYP_PS0_TO_PS2 | SIGDET_FLT_BYP},
-	{EMAC_SGMII_LN_SIGDET_CNTRL, SIGDET_LVL(8)},
-
-	{EMAC_SGMII_LN_SIGDET_DEGLITCH_CNTRL, SIGDET_DEGLITCH_CTRL(4)},
-	{EMAC_SGMII_LN_RX_MISC_CNTRL0, 0},
-	{EMAC_SGMII_LN_DRVR_LOGIC_CLKDIV,
-		DRVR_LOGIC_CLK_EN | DRVR_LOGIC_CLK_DIV(4)},
-
-	{EMAC_SGMII_LN_PARALLEL_RATE, PARALLEL_RATE_MODE0(1)},
-	{EMAC_SGMII_LN_TX_BAND_MODE, BAND_MODE0(2)},
-	{EMAC_SGMII_LN_RX_BAND, BAND_MODE0(3)},
-	{EMAC_SGMII_LN_LANE_MODE, LANE_MODE(26)},
-	{EMAC_SGMII_LN_RX_RCVR_PATH1_MODE0, CDR_PD_SEL_MODE0(3)},
-	{EMAC_SGMII_LN_RSM_CONFIG, BYPASS_RSM_SAMP_CAL | BYPASS_RSM_DLL_CAL},
-};
-
-static const struct emac_reg_write physical_coding_sublayer_programming_v2[] = {
-	{EMAC_SGMII_PHY_POW_DWN_CTRL0, PWRDN_B},
-	{EMAC_SGMII_PHY_CDR_CTRL0, CDR_MAX_CNT(15)},
-	{EMAC_SGMII_PHY_TX_PWR_CTRL, 0},
-	{EMAC_SGMII_PHY_LANE_CTRL1, L0_RX_EQUALIZE_ENABLE},
-};
-
 static int emac_sgmii_link_init(struct emac_adapter *adpt)
 {
 	struct phy_device *phydev = adpt->phydev;
@@ -536,98 +121,6 @@ static int emac_sgmii_irq_clear(struct emac_adapter *adpt, u32 irq_bits)
 	return 0;
 }
 
-int emac_sgmii_init_v1(struct emac_adapter *adpt)
-{
-	struct emac_phy *phy = &adpt->phy;
-	unsigned int i;
-	int ret;
-
-	ret = emac_sgmii_link_init(adpt);
-	if (ret)
-		return ret;
-
-	emac_reg_write_all(phy->base, physical_coding_sublayer_programming_v1,
-			   ARRAY_SIZE(physical_coding_sublayer_programming_v1));
-	emac_reg_write_all(phy->base, sysclk_refclk_setting,
-			   ARRAY_SIZE(sysclk_refclk_setting));
-	emac_reg_write_all(phy->base, pll_setting, ARRAY_SIZE(pll_setting));
-	emac_reg_write_all(phy->base, cdr_setting, ARRAY_SIZE(cdr_setting));
-	emac_reg_write_all(phy->base, tx_rx_setting,
-			   ARRAY_SIZE(tx_rx_setting));
-
-	/* Power up the Ser/Des engine */
-	writel(SERDES_START, phy->base + EMAC_SGMII_PHY_SERDES_START);
-
-	for (i = 0; i < SERDES_START_WAIT_TIMES; i++) {
-		if (readl(phy->base + EMAC_QSERDES_COM_RESET_SM) & READY)
-			break;
-		usleep_range(100, 200);
-	}
-
-	if (i == SERDES_START_WAIT_TIMES) {
-		netdev_err(adpt->netdev, "error: ser/des failed to start\n");
-		return -EIO;
-	}
-	/* Mask out all the SGMII Interrupt */
-	writel(0, phy->base + EMAC_SGMII_PHY_INTERRUPT_MASK);
-
-	emac_sgmii_irq_clear(adpt, SGMII_PHY_INTERRUPT_ERR);
-
-	return 0;
-}
-
-int emac_sgmii_init_v2(struct emac_adapter *adpt)
-{
-	struct emac_phy *phy = &adpt->phy;
-	void __iomem *phy_regs = phy->base;
-	void __iomem *laned = phy->digital;
-	unsigned int i;
-	u32 lnstatus;
-	int ret;
-
-	ret = emac_sgmii_link_init(adpt);
-	if (ret)
-		return ret;
-
-	/* PCS lane-x init */
-	emac_reg_write_all(phy->base, physical_coding_sublayer_programming_v2,
-			   ARRAY_SIZE(physical_coding_sublayer_programming_v2));
-
-	/* SGMII lane-x init */
-	emac_reg_write_all(phy->digital,
-			   sgmii_v2_laned, ARRAY_SIZE(sgmii_v2_laned));
-
-	/* Power up PCS and start reset lane state machine */
-
-	writel(0, phy_regs + EMAC_SGMII_PHY_RESET_CTRL);
-	writel(1, laned + SGMII_LN_RSM_START);
-
-	/* Wait for c_ready assertion */
-	for (i = 0; i < SERDES_START_WAIT_TIMES; i++) {
-		lnstatus = readl(phy_regs + SGMII_PHY_LN_LANE_STATUS);
-		if (lnstatus & BIT(1))
-			break;
-		usleep_range(100, 200);
-	}
-
-	if (i == SERDES_START_WAIT_TIMES) {
-		netdev_err(adpt->netdev, "SGMII failed to start\n");
-		return -EIO;
-	}
-
-	/* Disable digital and SERDES loopback */
-	writel(0, phy_regs + SGMII_PHY_LN_BIST_GEN0);
-	writel(0, phy_regs + SGMII_PHY_LN_BIST_GEN2);
-	writel(0, phy_regs + SGMII_PHY_LN_CDR_CTRL1);
-
-	/* Mask out all the SGMII Interrupt */
-	writel(0, phy_regs + EMAC_SGMII_PHY_INTERRUPT_MASK);
-
-	emac_sgmii_irq_clear(adpt, SGMII_PHY_INTERRUPT_ERR);
-
-	return 0;
-}
-
 static void emac_sgmii_reset_prepare(struct emac_adapter *adpt)
 {
 	struct emac_phy *phy = &adpt->phy;
@@ -651,44 +144,72 @@ void emac_sgmii_reset(struct emac_adapter *adpt)
 {
 	int ret;
 
-	clk_set_rate(adpt->clk[EMAC_CLK_HIGH_SPEED], 19200000);
 	emac_sgmii_reset_prepare(adpt);
 
+	ret = emac_sgmii_link_init(adpt);
+	if (ret) {
+		netdev_err(adpt->netdev, "unsupported link speed\n");
+		return;
+	}
+
 	ret = adpt->phy.initialize(adpt);
 	if (ret)
 		netdev_err(adpt->netdev,
 			   "could not reinitialize internal PHY (error=%i)\n",
 			   ret);
-
-	clk_set_rate(adpt->clk[EMAC_CLK_HIGH_SPEED], 125000000);
 }
 
 static int emac_sgmii_acpi_match(struct device *dev, void *data)
 {
+#ifdef CONFIG_ACPI
 	static const struct acpi_device_id match_table[] = {
 		{
 			.id = "QCOM8071",
-			.driver_data = (kernel_ulong_t)emac_sgmii_init_v2,
 		},
 		{}
 	};
 	const struct acpi_device_id *id = acpi_match_device(match_table, dev);
 	emac_sgmii_initialize *initialize = data;
 
-	if (id)
-		*initialize = (emac_sgmii_initialize)id->driver_data;
+	if (id) {
+		acpi_handle handle = ACPI_HANDLE(dev);
+		unsigned long long hrv;
+		acpi_status status;
+
+		status = acpi_evaluate_integer(handle, "_HRV", NULL, &hrv);
+		if (status) {
+			if (status == AE_NOT_FOUND)
+				/* Older versions of the QDF2432 ACPI tables do
+				 * not have an _HRV property.
+				 */
+				hrv = 1;
+			else
+				/* Something is wrong with the tables */
+				return 0;
+		}
 
-	return !!id;
+		switch (hrv) {
+		case 1:
+			*initialize = emac_sgmii_init_qdf2432;
+			return 1;
+		case 2:
+			*initialize = emac_sgmii_init_qdf2400;
+			return 1;
+		}
+	}
+#endif
+
+	return 0;
 }
 
 static const struct of_device_id emac_sgmii_dt_match[] = {
 	{
 		.compatible = "qcom,fsm9900-emac-sgmii",
-		.data = emac_sgmii_init_v1,
+		.data = emac_sgmii_init_fsm9900,
 	},
 	{
 		.compatible = "qcom,qdf2432-emac-sgmii",
-		.data = emac_sgmii_init_v2,
+		.data = emac_sgmii_init_qdf2432,
 	},
 	{}
 };
@@ -765,6 +286,8 @@ int emac_sgmii_config(struct platform_device *pdev, struct emac_adapter *adpt)
 	if (ret)
 		goto error;
 
+	emac_sgmii_irq_clear(adpt, SGMII_PHY_INTERRUPT_ERR);
+
 	/* We've remapped the addresses, so we don't need the device any
 	 * more.  of_find_device_by_node() says we should release it.
 	 */
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.h b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.h
index ce79212ff403..80ed3dc3157a 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.h
+++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.h
@@ -16,9 +16,11 @@
 struct emac_adapter;
 struct platform_device;
 
-int emac_sgmii_init_v1(struct emac_adapter *adpt);
-int emac_sgmii_init_v2(struct emac_adapter *adpt);
 int emac_sgmii_config(struct platform_device *pdev, struct emac_adapter *adpt);
 void emac_sgmii_reset(struct emac_adapter *adpt);
 
+int emac_sgmii_init_fsm9900(struct emac_adapter *adpt);
+int emac_sgmii_init_qdf2432(struct emac_adapter *adpt);
+int emac_sgmii_init_qdf2400(struct emac_adapter *adpt);
+
 #endif
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c
index 4fede4b86538..ae32f855e31b 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac.c
@@ -239,15 +239,8 @@ static void emac_rx_mode_set(struct net_device *netdev)
 /* Change the Maximum Transfer Unit (MTU) */
 static int emac_change_mtu(struct net_device *netdev, int new_mtu)
 {
-	unsigned int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
 	struct emac_adapter *adpt = netdev_priv(netdev);
 
-	if ((max_frame < EMAC_MIN_ETH_FRAME_SIZE) ||
-	    (max_frame > EMAC_MAX_ETH_FRAME_SIZE)) {
-		netdev_err(adpt->netdev, "error: invalid MTU setting\n");
-		return -EINVAL;
-	}
-
 	netif_info(adpt, hw, adpt->netdev,
 		   "changing MTU from %d to %d\n", netdev->mtu,
 		   new_mtu);
@@ -680,6 +673,12 @@ static int emac_probe(struct platform_device *pdev)
 	netdev->vlan_features |= NETIF_F_SG | NETIF_F_HW_CSUM |
 				 NETIF_F_TSO | NETIF_F_TSO6;
 
+	/* MTU range: 46 - 9194 */
+	netdev->min_mtu = EMAC_MIN_ETH_FRAME_SIZE -
+			  (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN);
+	netdev->max_mtu = EMAC_MAX_ETH_FRAME_SIZE -
+			  (ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN);
+
 	INIT_WORK(&adpt->work_thread, emac_work_thread);
 
 	/* Initialize queues */
@@ -711,6 +710,8 @@ static int emac_probe(struct platform_device *pdev)
 err_undo_napi:
 	netif_napi_del(&adpt->rx_q.napi);
 err_undo_mdiobus:
+	if (!has_acpi_companion(&pdev->dev))
+		put_device(&adpt->phydev->mdio.dev);
 	mdiobus_unregister(adpt->mii_bus);
 err_undo_clocks:
 	emac_clks_teardown(adpt);
@@ -730,6 +731,8 @@ static int emac_remove(struct platform_device *pdev)
 
 	emac_clks_teardown(adpt);
 
+	if (!has_acpi_companion(&pdev->dev))
+		put_device(&adpt->phydev->mdio.dev);
 	mdiobus_unregister(adpt->mii_bus);
 	free_netdev(netdev);
 
diff --git a/drivers/net/ethernet/qualcomm/qca_framing.h b/drivers/net/ethernet/qualcomm/qca_framing.h
index 5d965959c978..d5e795dcdf47 100644
--- a/drivers/net/ethernet/qualcomm/qca_framing.h
+++ b/drivers/net/ethernet/qualcomm/qca_framing.h
@@ -43,9 +43,9 @@
 /* Frame length is invalid */
 #define QCAFRM_INVFRAME (QCAFRM_ERR_BASE - 4)
 
-/* Min/Max Ethernet MTU */
-#define QCAFRM_ETHMINMTU 46
-#define QCAFRM_ETHMAXMTU 1500
+/* Min/Max Ethernet MTU: 46/1500 */
+#define QCAFRM_ETHMINMTU (ETH_ZLEN - ETH_HLEN)
+#define QCAFRM_ETHMAXMTU ETH_DATA_LEN
 
 /* Min/Max frame lengths */
 #define QCAFRM_ETHMINLEN (QCAFRM_ETHMINMTU + ETH_HLEN)
diff --git a/drivers/net/ethernet/qualcomm/qca_spi.c b/drivers/net/ethernet/qualcomm/qca_spi.c
index 6e2add979471..513e6c74e199 100644
--- a/drivers/net/ethernet/qualcomm/qca_spi.c
+++ b/drivers/net/ethernet/qualcomm/qca_spi.c
@@ -780,24 +780,12 @@ qcaspi_netdev_uninit(struct net_device *dev)
 		dev_kfree_skb(qca->rx_skb);
 }
 
-static int
-qcaspi_netdev_change_mtu(struct net_device *dev, int new_mtu)
-{
-	if ((new_mtu < QCAFRM_ETHMINMTU) || (new_mtu > QCAFRM_ETHMAXMTU))
-		return -EINVAL;
-
-	dev->mtu = new_mtu;
-
-	return 0;
-}
-
 static const struct net_device_ops qcaspi_netdev_ops = {
 	.ndo_init = qcaspi_netdev_init,
 	.ndo_uninit = qcaspi_netdev_uninit,
 	.ndo_open = qcaspi_netdev_open,
 	.ndo_stop = qcaspi_netdev_close,
 	.ndo_start_xmit = qcaspi_netdev_xmit,
-	.ndo_change_mtu = qcaspi_netdev_change_mtu,
 	.ndo_set_mac_address = eth_mac_addr,
 	.ndo_tx_timeout = qcaspi_netdev_tx_timeout,
 	.ndo_validate_addr = eth_validate_addr,
@@ -814,6 +802,10 @@ qcaspi_netdev_setup(struct net_device *dev)
 	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
 	dev->tx_queue_len = 100;
 
+	/* MTU range: 46 - 1500 */
+	dev->min_mtu = QCAFRM_ETHMINMTU;
+	dev->max_mtu = QCAFRM_ETHMAXMTU;
+
 	qca = netdev_priv(dev);
 	memset(qca, 0, sizeof(struct qcaspi));
 
diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c
index 5ef5d728c250..4ff4e0491406 100644
--- a/drivers/net/ethernet/rdc/r6040.c
+++ b/drivers/net/ethernet/rdc/r6040.c
@@ -969,7 +969,6 @@ static const struct net_device_ops r6040_netdev_ops = {
 	.ndo_start_xmit		= r6040_start_xmit,
 	.ndo_get_stats		= r6040_get_stats,
 	.ndo_set_rx_mode	= r6040_multicast_list,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_do_ioctl		= r6040_ioctl,
diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c
index 5297bf77211c..b7c89ebcf4a2 100644
--- a/drivers/net/ethernet/realtek/8139cp.c
+++ b/drivers/net/ethernet/realtek/8139cp.c
@@ -1277,10 +1277,6 @@ static int cp_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct cp_private *cp = netdev_priv(dev);
 
-	/* check for invalid MTU, according to hardware limits */
-	if (new_mtu < CP_MIN_MTU || new_mtu > CP_MAX_MTU)
-		return -EINVAL;
-
 	/* if network interface not up, no need for complexity */
 	if (!netif_running(dev)) {
 		dev->mtu = new_mtu;
@@ -2010,6 +2006,10 @@ static int cp_init_one (struct pci_dev *pdev, const struct pci_device_id *ent)
 	dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO |
 		NETIF_F_HIGHDMA;
 
+	/* MTU range: 60 - 4096 */
+	dev->min_mtu = CP_MIN_MTU;
+	dev->max_mtu = CP_MAX_MTU;
+
 	rc = register_netdev(dev);
 	if (rc)
 		goto err_out_iomap;
diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c
index da4c2d8a4173..9bc047ac883b 100644
--- a/drivers/net/ethernet/realtek/8139too.c
+++ b/drivers/net/ethernet/realtek/8139too.c
@@ -924,19 +924,10 @@ static int rtl8139_set_features(struct net_device *dev, netdev_features_t featur
 	return 0;
 }
 
-static int rtl8139_change_mtu(struct net_device *dev, int new_mtu)
-{
-	if (new_mtu < 68 || new_mtu > MAX_ETH_DATA_SIZE)
-		return -EINVAL;
-	dev->mtu = new_mtu;
-	return 0;
-}
-
 static const struct net_device_ops rtl8139_netdev_ops = {
 	.ndo_open		= rtl8139_open,
 	.ndo_stop		= rtl8139_close,
 	.ndo_get_stats64	= rtl8139_get_stats64,
-	.ndo_change_mtu		= rtl8139_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address 	= rtl8139_set_mac_address,
 	.ndo_start_xmit		= rtl8139_start_xmit,
@@ -1022,6 +1013,10 @@ static int rtl8139_init_one(struct pci_dev *pdev,
 	dev->hw_features |= NETIF_F_RXALL;
 	dev->hw_features |= NETIF_F_RXFCS;
 
+	/* MTU range: 68 - 1770 */
+	dev->min_mtu = ETH_MIN_MTU;
+	dev->max_mtu = MAX_ETH_DATA_SIZE;
+
 	/* tp zeroed and aligned in alloc_etherdev */
 	tp = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/realtek/atp.c b/drivers/net/ethernet/realtek/atp.c
index 5cb96785fb63..570ed3bd3cbf 100644
--- a/drivers/net/ethernet/realtek/atp.c
+++ b/drivers/net/ethernet/realtek/atp.c
@@ -245,7 +245,6 @@ static const struct net_device_ops atp_netdev_ops = {
 	.ndo_start_xmit		= atp_send_packet,
 	.ndo_set_rx_mode	= set_rx_mode,
 	.ndo_tx_timeout		= tx_timeout,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 };
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index bf000d819a21..f9b97f5946f8 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -2344,6 +2344,13 @@ static void rtl8169_get_strings(struct net_device *dev, u32 stringset, u8 *data)
 	}
 }
 
+static int rtl8169_nway_reset(struct net_device *dev)
+{
+	struct rtl8169_private *tp = netdev_priv(dev);
+
+	return mii_nway_restart(&tp->mii);
+}
+
 static const struct ethtool_ops rtl8169_ethtool_ops = {
 	.get_drvinfo		= rtl8169_get_drvinfo,
 	.get_regs_len		= rtl8169_get_regs_len,
@@ -2359,6 +2366,7 @@ static const struct ethtool_ops rtl8169_ethtool_ops = {
 	.get_sset_count		= rtl8169_get_sset_count,
 	.get_ethtool_stats	= rtl8169_get_ethtool_stats,
 	.get_ts_info		= ethtool_op_get_ts_info,
+	.nway_reset		= rtl8169_nway_reset,
 };
 
 static void rtl8169_get_mac_version(struct rtl8169_private *tp,
@@ -6673,10 +6681,6 @@ static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
 
-	if (new_mtu < ETH_ZLEN ||
-	    new_mtu > rtl_chip_infos[tp->mac_version].jumbo_max)
-		return -EINVAL;
-
 	if (new_mtu > ETH_DATA_LEN)
 		rtl_hw_jumbo_enable(tp);
 	else
@@ -8431,6 +8435,10 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	dev->hw_features |= NETIF_F_RXALL;
 	dev->hw_features |= NETIF_F_RXFCS;
 
+	/* MTU range: 60 - hw-specific max */
+	dev->min_mtu = ETH_ZLEN;
+	dev->max_mtu = rtl_chip_infos[chipset].jumbo_max;
+
 	tp->hw_start = cfg->hw_start;
 	tp->event_slow = cfg->event_slow;
 
diff --git a/drivers/net/ethernet/renesas/Kconfig b/drivers/net/ethernet/renesas/Kconfig
index 85ec447c2d18..27be51f0a421 100644
--- a/drivers/net/ethernet/renesas/Kconfig
+++ b/drivers/net/ethernet/renesas/Kconfig
@@ -37,7 +37,7 @@ config RAVB
 	select MII
 	select MDIO_BITBANG
 	select PHYLIB
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	help
 	  Renesas Ethernet AVB device driver.
 	  This driver supports the following SoCs:
diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 630536bc72f9..92d7692c840d 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -1008,20 +1008,18 @@ static int ravb_phy_init(struct net_device *ndev)
 	of_node_put(pn);
 	if (!phydev) {
 		netdev_err(ndev, "failed to connect PHY\n");
-		return -ENOENT;
+		err = -ENOENT;
+		goto err_deregister_fixed_link;
 	}
 
 	/* This driver only support 10/100Mbit speeds on Gen3
 	 * at this time.
 	 */
 	if (priv->chip_id == RCAR_GEN3) {
-		int err;
-
 		err = phy_set_max_speed(phydev, SPEED_100);
 		if (err) {
 			netdev_err(ndev, "failed to limit PHY to 100Mbit/s\n");
-			phy_disconnect(phydev);
-			return err;
+			goto err_phy_disconnect;
 		}
 
 		netdev_info(ndev, "limited PHY to 100Mbit/s\n");
@@ -1033,6 +1031,14 @@ static int ravb_phy_init(struct net_device *ndev)
 	phy_attached_info(phydev);
 
 	return 0;
+
+err_phy_disconnect:
+	phy_disconnect(phydev);
+err_deregister_fixed_link:
+	if (of_phy_is_fixed_link(np))
+		of_phy_deregister_fixed_link(np);
+
+	return err;
 }
 
 /* PHY control start function */
@@ -1634,6 +1640,7 @@ static void ravb_set_rx_mode(struct net_device *ndev)
 /* Device close function for Ethernet AVB */
 static int ravb_close(struct net_device *ndev)
 {
+	struct device_node *np = ndev->dev.parent->of_node;
 	struct ravb_private *priv = netdev_priv(ndev);
 	struct ravb_tstamp_skb *ts_skb, *ts_skb2;
 
@@ -1663,6 +1670,8 @@ static int ravb_close(struct net_device *ndev)
 	if (ndev->phydev) {
 		phy_stop(ndev->phydev);
 		phy_disconnect(ndev->phydev);
+		if (of_phy_is_fixed_link(np))
+			of_phy_deregister_fixed_link(np);
 	}
 
 	if (priv->chip_id != RCAR_GEN2) {
@@ -1780,7 +1789,6 @@ static const struct net_device_ops ravb_netdev_ops = {
 	.ndo_do_ioctl		= ravb_do_ioctl,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 };
 
 /* MDIO bus init function */
diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 05b0dc55de77..f341c1bc7001 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -518,7 +518,7 @@ static struct sh_eth_cpu_data r7s72100_data = {
 
 	.ecsr_value	= ECSR_ICD,
 	.ecsipr_value	= ECSIPR_ICDIP,
-	.eesipr_value	= 0xff7f009f,
+	.eesipr_value	= 0xe77f009f,
 
 	.tx_check	= EESR_TC1 | EESR_FTC,
 	.eesr_err_check	= EESR_TWB1 | EESR_TWB | EESR_TABT | EESR_RABT |
@@ -2914,7 +2914,6 @@ static const struct net_device_ops sh_eth_netdev_ops = {
 	.ndo_do_ioctl		= sh_eth_do_ioctl,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 };
 
 static const struct net_device_ops sh_eth_netdev_ops_tsu = {
@@ -2929,7 +2928,6 @@ static const struct net_device_ops sh_eth_netdev_ops_tsu = {
 	.ndo_do_ioctl		= sh_eth_do_ioctl,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 };
 
 #ifdef CONFIG_OF
diff --git a/drivers/net/ethernet/rocker/rocker.h b/drivers/net/ethernet/rocker/rocker.h
index 2eb9b49569d5..ee9675db5bf9 100644
--- a/drivers/net/ethernet/rocker/rocker.h
+++ b/drivers/net/ethernet/rocker/rocker.h
@@ -72,6 +72,7 @@ struct rocker {
 	struct rocker_dma_ring_info event_ring;
 	struct notifier_block fib_nb;
 	struct rocker_world_ops *wops;
+	struct workqueue_struct *rocker_owq;
 	void *wpriv;
 };
 
diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c
index 24b746406bc7..7c450b5a1138 100644
--- a/drivers/net/ethernet/rocker/rocker_main.c
+++ b/drivers/net/ethernet/rocker/rocker_main.c
@@ -28,6 +28,7 @@
 #include <linux/if_bridge.h>
 #include <linux/bitops.h>
 #include <linux/ctype.h>
+#include <linux/workqueue.h>
 #include <net/switchdev.h>
 #include <net/rtnetlink.h>
 #include <net/netevent.h>
@@ -1953,12 +1954,6 @@ static int rocker_port_change_mtu(struct net_device *dev, int new_mtu)
 	int running = netif_running(dev);
 	int err;
 
-#define ROCKER_PORT_MIN_MTU	68
-#define ROCKER_PORT_MAX_MTU	9000
-
-	if (new_mtu < ROCKER_PORT_MIN_MTU || new_mtu > ROCKER_PORT_MAX_MTU)
-		return -EINVAL;
-
 	if (running)
 		rocker_port_stop(dev);
 
@@ -2171,28 +2166,70 @@ static const struct switchdev_ops rocker_port_switchdev_ops = {
 	.switchdev_port_obj_dump	= rocker_port_obj_dump,
 };
 
-static int rocker_router_fib_event(struct notifier_block *nb,
-				   unsigned long event, void *ptr)
+struct rocker_fib_event_work {
+	struct work_struct work;
+	struct fib_entry_notifier_info fen_info;
+	struct rocker *rocker;
+	unsigned long event;
+};
+
+static void rocker_router_fib_event_work(struct work_struct *work)
 {
-	struct rocker *rocker = container_of(nb, struct rocker, fib_nb);
-	struct fib_entry_notifier_info *fen_info = ptr;
+	struct rocker_fib_event_work *fib_work =
+		container_of(work, struct rocker_fib_event_work, work);
+	struct rocker *rocker = fib_work->rocker;
 	int err;
 
-	switch (event) {
+	/* Protect internal structures from changes */
+	rtnl_lock();
+	switch (fib_work->event) {
 	case FIB_EVENT_ENTRY_ADD:
-		err = rocker_world_fib4_add(rocker, fen_info);
+		err = rocker_world_fib4_add(rocker, &fib_work->fen_info);
 		if (err)
 			rocker_world_fib4_abort(rocker);
-		else
+		fib_info_put(fib_work->fen_info.fi);
 		break;
 	case FIB_EVENT_ENTRY_DEL:
-		rocker_world_fib4_del(rocker, fen_info);
+		rocker_world_fib4_del(rocker, &fib_work->fen_info);
+		fib_info_put(fib_work->fen_info.fi);
 		break;
 	case FIB_EVENT_RULE_ADD: /* fall through */
 	case FIB_EVENT_RULE_DEL:
 		rocker_world_fib4_abort(rocker);
 		break;
 	}
+	rtnl_unlock();
+	kfree(fib_work);
+}
+
+/* Called with rcu_read_lock() */
+static int rocker_router_fib_event(struct notifier_block *nb,
+				   unsigned long event, void *ptr)
+{
+	struct rocker *rocker = container_of(nb, struct rocker, fib_nb);
+	struct rocker_fib_event_work *fib_work;
+
+	fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC);
+	if (WARN_ON(!fib_work))
+		return NOTIFY_BAD;
+
+	INIT_WORK(&fib_work->work, rocker_router_fib_event_work);
+	fib_work->rocker = rocker;
+	fib_work->event = event;
+
+	switch (event) {
+	case FIB_EVENT_ENTRY_ADD: /* fall through */
+	case FIB_EVENT_ENTRY_DEL:
+		memcpy(&fib_work->fen_info, ptr, sizeof(fib_work->fen_info));
+		/* Take referece on fib_info to prevent it from being
+		 * freed while work is queued. Release it afterwards.
+		 */
+		fib_info_hold(fib_work->fen_info.fi);
+		break;
+	}
+
+	queue_work(rocker->rocker_owq, &fib_work->work);
+
 	return NOTIFY_DONE;
 }
 
@@ -2536,9 +2573,11 @@ static void rocker_port_dev_addr_init(struct rocker_port *rocker_port)
 	}
 }
 
+#define ROCKER_PORT_MIN_MTU	ETH_MIN_MTU
+#define ROCKER_PORT_MAX_MTU	9000
 static int rocker_probe_port(struct rocker *rocker, unsigned int port_number)
 {
-	const struct pci_dev *pdev = rocker->pdev;
+	struct pci_dev *pdev = rocker->pdev;
 	struct rocker_port *rocker_port;
 	struct net_device *dev;
 	int err;
@@ -2546,6 +2585,7 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number)
 	dev = alloc_etherdev(sizeof(struct rocker_port));
 	if (!dev)
 		return -ENOMEM;
+	SET_NETDEV_DEV(dev, &pdev->dev);
 	rocker_port = netdev_priv(dev);
 	rocker_port->dev = dev;
 	rocker_port->rocker = rocker;
@@ -2570,6 +2610,10 @@ static int rocker_probe_port(struct rocker *rocker, unsigned int port_number)
 
 	dev->features |= NETIF_F_NETNS_LOCAL | NETIF_F_SG;
 
+	/* MTU range: 68 - 9000 */
+	dev->min_mtu = ROCKER_PORT_MIN_MTU;
+	dev->max_mtu = ROCKER_PORT_MAX_MTU;
+
 	err = rocker_world_port_pre_init(rocker_port);
 	if (err) {
 		dev_err(&pdev->dev, "port world pre-init failed\n");
@@ -2753,6 +2797,21 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto err_request_event_irq;
 	}
 
+	rocker->rocker_owq = alloc_ordered_workqueue(rocker_driver_name,
+						     WQ_MEM_RECLAIM);
+	if (!rocker->rocker_owq) {
+		err = -ENOMEM;
+		goto err_alloc_ordered_workqueue;
+	}
+
+	/* Only FIBs pointing to our own netdevs are programmed into
+	 * the device, so no need to pass a callback.
+	 */
+	rocker->fib_nb.notifier_call = rocker_router_fib_event;
+	err = register_fib_notifier(&rocker->fib_nb, NULL);
+	if (err)
+		goto err_register_fib_notifier;
+
 	rocker->hw.id = rocker_read64(rocker, SWITCH_ID);
 
 	err = rocker_probe_ports(rocker);
@@ -2761,15 +2820,16 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto err_probe_ports;
 	}
 
-	rocker->fib_nb.notifier_call = rocker_router_fib_event;
-	register_fib_notifier(&rocker->fib_nb);
-
 	dev_info(&pdev->dev, "Rocker switch with id %*phN\n",
 		 (int)sizeof(rocker->hw.id), &rocker->hw.id);
 
 	return 0;
 
 err_probe_ports:
+	unregister_fib_notifier(&rocker->fib_nb);
+err_register_fib_notifier:
+	destroy_workqueue(rocker->rocker_owq);
+err_alloc_ordered_workqueue:
 	free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT), rocker);
 err_request_event_irq:
 	free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_CMD), rocker);
@@ -2795,9 +2855,10 @@ static void rocker_remove(struct pci_dev *pdev)
 {
 	struct rocker *rocker = pci_get_drvdata(pdev);
 
+	rocker_remove_ports(rocker);
 	unregister_fib_notifier(&rocker->fib_nb);
 	rocker_write32(rocker, CONTROL, ROCKER_CONTROL_RESET);
-	rocker_remove_ports(rocker);
+	destroy_workqueue(rocker->rocker_owq);
 	free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT), rocker);
 	free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_CMD), rocker);
 	rocker_dma_rings_fini(rocker);
@@ -2839,20 +2900,37 @@ static bool rocker_port_dev_check_under(const struct net_device *dev,
 	return true;
 }
 
+struct rocker_walk_data {
+	struct rocker *rocker;
+	struct rocker_port *port;
+};
+
+static int rocker_lower_dev_walk(struct net_device *lower_dev, void *_data)
+{
+	struct rocker_walk_data *data = _data;
+	int ret = 0;
+
+	if (rocker_port_dev_check_under(lower_dev, data->rocker)) {
+		data->port = netdev_priv(lower_dev);
+		ret = 1;
+	}
+
+	return ret;
+}
+
 struct rocker_port *rocker_port_dev_lower_find(struct net_device *dev,
 					       struct rocker *rocker)
 {
-	struct net_device *lower_dev;
-	struct list_head *iter;
+	struct rocker_walk_data data;
 
 	if (rocker_port_dev_check_under(dev, rocker))
 		return netdev_priv(dev);
 
-	netdev_for_each_all_lower_dev(dev, lower_dev, iter) {
-		if (rocker_port_dev_check_under(lower_dev, rocker))
-			return netdev_priv(lower_dev);
-	}
-	return NULL;
+	data.rocker = rocker;
+	data.port = NULL;
+	netdev_walk_all_lower_dev(dev, rocker_lower_dev_walk, &data);
+
+	return data.port;
 }
 
 static int rocker_netdevice_event(struct notifier_block *unused,
diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c
index 4ca461322d60..7cd76b6b5cb9 100644
--- a/drivers/net/ethernet/rocker/rocker_ofdpa.c
+++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c
@@ -2516,6 +2516,7 @@ static void ofdpa_fini(struct rocker *rocker)
 	int bkt;
 
 	del_timer_sync(&ofdpa->fdb_cleanup_timer);
+	flush_workqueue(rocker->rocker_owq);
 
 	spin_lock_irqsave(&ofdpa->flow_tbl_lock, flags);
 	hash_for_each_safe(ofdpa->flow_tbl, bkt, tmp, flow_entry, entry)
diff --git a/drivers/net/ethernet/samsung/Kconfig b/drivers/net/ethernet/samsung/Kconfig
index 2360d8150777..fbd5e06654c6 100644
--- a/drivers/net/ethernet/samsung/Kconfig
+++ b/drivers/net/ethernet/samsung/Kconfig
@@ -21,7 +21,7 @@ config SXGBE_ETH
 	depends on HAS_IOMEM && HAS_DMA
 	select PHYLIB
 	select CRC32
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	---help---
 	  This is the driver for the SXGBE 10G Ethernet IP block found on
 	  Samsung platforms.
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h b/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h
index 5cb51b609f02..c61f260e18a4 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_common.h
@@ -384,7 +384,6 @@ struct sxgbe_tx_queue {
 	dma_addr_t *tx_skbuff_dma;
 	struct sk_buff **tx_skbuff;
 	struct timer_list txtimer;
-	spinlock_t tx_lock;	/* lock for tx queues */
 	unsigned int cur_tx;
 	unsigned int dirty_tx;
 	u32 tx_count_frames;
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
index ea44a2456ce1..cddcff5a00a7 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
@@ -426,9 +426,6 @@ static int init_tx_ring(struct device *dev, u8 queue_no,
 	tx_ring->dirty_tx = 0;
 	tx_ring->cur_tx = 0;
 
-	/* initialise TX queue lock */
-	spin_lock_init(&tx_ring->tx_lock);
-
 	return 0;
 
 dmamem_err:
@@ -743,7 +740,7 @@ static void sxgbe_tx_queue_clean(struct sxgbe_tx_queue *tqueue)
 
 	dev_txq = netdev_get_tx_queue(priv->dev, queue_no);
 
-	spin_lock(&tqueue->tx_lock);
+	__netif_tx_lock(dev_txq, smp_processor_id());
 
 	priv->xstats.tx_clean++;
 	while (tqueue->dirty_tx != tqueue->cur_tx) {
@@ -781,18 +778,13 @@ static void sxgbe_tx_queue_clean(struct sxgbe_tx_queue *tqueue)
 
 	/* wake up queue */
 	if (unlikely(netif_tx_queue_stopped(dev_txq) &&
-		     sxgbe_tx_avail(tqueue, tx_rsize) > SXGBE_TX_THRESH(priv))) {
-		netif_tx_lock(priv->dev);
-		if (netif_tx_queue_stopped(dev_txq) &&
-		    sxgbe_tx_avail(tqueue, tx_rsize) > SXGBE_TX_THRESH(priv)) {
-			if (netif_msg_tx_done(priv))
-				pr_debug("%s: restart transmit\n", __func__);
-			netif_tx_wake_queue(dev_txq);
-		}
-		netif_tx_unlock(priv->dev);
+	    sxgbe_tx_avail(tqueue, tx_rsize) > SXGBE_TX_THRESH(priv))) {
+		if (netif_msg_tx_done(priv))
+			pr_debug("%s: restart transmit\n", __func__);
+		netif_tx_wake_queue(dev_txq);
 	}
 
-	spin_unlock(&tqueue->tx_lock);
+	__netif_tx_unlock(dev_txq);
 }
 
 /**
@@ -1304,9 +1296,6 @@ static netdev_tx_t sxgbe_xmit(struct sk_buff *skb, struct net_device *dev)
 		      tqueue->hwts_tx_en)))
 		ctxt_desc_req = 1;
 
-	/* get the spinlock */
-	spin_lock(&tqueue->tx_lock);
-
 	if (priv->tx_path_in_lpi_mode)
 		sxgbe_disable_eee_mode(priv);
 
@@ -1316,8 +1305,6 @@ static netdev_tx_t sxgbe_xmit(struct sk_buff *skb, struct net_device *dev)
 			netdev_err(dev, "%s: Tx Ring is full when %d queue is awake\n",
 				   __func__, txq_index);
 		}
-		/* release the spin lock in case of BUSY */
-		spin_unlock(&tqueue->tx_lock);
 		return NETDEV_TX_BUSY;
 	}
 
@@ -1436,8 +1423,6 @@ static netdev_tx_t sxgbe_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	priv->hw->dma->enable_dma_transmission(priv->ioaddr, txq_index);
 
-	spin_unlock(&tqueue->tx_lock);
-
 	return NETDEV_TX_OK;
 }
 
@@ -1820,19 +1805,6 @@ static int sxgbe_set_features(struct net_device *dev,
  */
 static int sxgbe_change_mtu(struct net_device *dev, int new_mtu)
 {
-	/* RFC 791, page 25, "Every internet module must be able to forward
-	 * a datagram of 68 octets without further fragmentation."
-	 */
-	if (new_mtu < MIN_MTU || (new_mtu > MAX_MTU)) {
-		netdev_err(dev, "invalid MTU, MTU should be in between %d and %d\n",
-			   MIN_MTU, MAX_MTU);
-		return -EINVAL;
-	}
-
-	/* Return if the buffer sizes will not change */
-	if (dev->mtu == new_mtu)
-		return 0;
-
 	dev->mtu = new_mtu;
 
 	if (!netif_running(dev))
@@ -2144,6 +2116,10 @@ struct sxgbe_priv_data *sxgbe_drv_probe(struct device *device,
 	/* assign filtering support */
 	ndev->priv_flags |= IFF_UNICAST_FLT;
 
+	/* MTU range: 68 - 9000 */
+	ndev->min_mtu = MIN_MTU;
+	ndev->max_mtu = MAX_MTU;
+
 	priv->msg_enable = netif_msg_init(debug, default_msg_level);
 
 	/* Enable TCP segmentation offload for all DMA channels */
diff --git a/drivers/net/ethernet/seeq/ether3.c b/drivers/net/ethernet/seeq/ether3.c
index bdac936a68bc..244c1e171017 100644
--- a/drivers/net/ethernet/seeq/ether3.c
+++ b/drivers/net/ethernet/seeq/ether3.c
@@ -745,7 +745,6 @@ static const struct net_device_ops ether3_netdev_ops = {
 	.ndo_set_rx_mode	= ether3_setmulticastlist,
 	.ndo_tx_timeout		= ether3_timeout,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address	= eth_mac_addr,
 };
 
diff --git a/drivers/net/ethernet/seeq/sgiseeq.c b/drivers/net/ethernet/seeq/sgiseeq.c
index c2bd5378ffda..ed34196028b8 100644
--- a/drivers/net/ethernet/seeq/sgiseeq.c
+++ b/drivers/net/ethernet/seeq/sgiseeq.c
@@ -714,7 +714,6 @@ static const struct net_device_ops sgiseeq_netdev_ops = {
 	.ndo_tx_timeout		= timeout,
 	.ndo_set_rx_mode	= sgiseeq_set_multicast,
 	.ndo_set_mac_address	= sgiseeq_set_mac_address,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 };
 
diff --git a/drivers/net/ethernet/sfc/Kconfig b/drivers/net/ethernet/sfc/Kconfig
index 4dd92b7b80f4..46f7be85f5a3 100644
--- a/drivers/net/ethernet/sfc/Kconfig
+++ b/drivers/net/ethernet/sfc/Kconfig
@@ -1,20 +1,19 @@
 config SFC
-	tristate "Solarflare SFC4000/SFC9000/SFC9100-family support"
+	tristate "Solarflare SFC9000/SFC9100-family support"
 	depends on PCI
 	select MDIO
 	select CRC32
 	select I2C
 	select I2C_ALGOBIT
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	---help---
 	  This driver supports 10/40-gigabit Ethernet cards based on
-	  the Solarflare SFC4000, SFC9000-family and SFC9100-family
-	  controllers.
+	  the Solarflare SFC9000-family and SFC9100-family controllers.
 
 	  To compile this driver as a module, choose M here.  The module
 	  will be called sfc.
 config SFC_MTD
-	bool "Solarflare SFC4000/SFC9000/SFC9100-family MTD support"
+	bool "Solarflare SFC9000/SFC9100-family MTD support"
 	depends on SFC && MTD && !(SFC=y && MTD=m)
 	default y
 	---help---
diff --git a/drivers/net/ethernet/sfc/Makefile b/drivers/net/ethernet/sfc/Makefile
index ce8470fe79d5..520cfcc17785 100644
--- a/drivers/net/ethernet/sfc/Makefile
+++ b/drivers/net/ethernet/sfc/Makefile
@@ -1,7 +1,6 @@
-sfc-y			+= efx.o nic.o farch.o falcon.o siena.o ef10.o tx.o \
-			   rx.o selftest.o ethtool.o qt202x_phy.o mdio_10g.o \
-			   tenxpress.o txc43128_phy.o falcon_boards.o \
-			   mcdi.o mcdi_port.o mcdi_mon.o ptp.o
+sfc-y			+= efx.o nic.o farch.o siena.o ef10.o tx.o rx.o \
+			   selftest.o ethtool.o ptp.o tx_tso.o \
+			   mcdi.o mcdi_port.o mcdi_mon.o
 sfc-$(CONFIG_SFC_MTD)	+= mtd.o
 sfc-$(CONFIG_SFC_SRIOV)	+= sriov.o siena_sriov.o ef10_sriov.o
 
diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 00279da6a1e8..de2947ccc5ad 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -2086,6 +2086,92 @@ static inline void efx_ef10_push_tx_desc(struct efx_tx_queue *tx_queue,
 			ER_DZ_TX_DESC_UPD, tx_queue->queue);
 }
 
+/* Add Firmware-Assisted TSO v2 option descriptors to a queue.
+ */
+static int efx_ef10_tx_tso_desc(struct efx_tx_queue *tx_queue,
+				struct sk_buff *skb,
+				bool *data_mapped)
+{
+	struct efx_tx_buffer *buffer;
+	struct tcphdr *tcp;
+	struct iphdr *ip;
+
+	u16 ipv4_id;
+	u32 seqnum;
+	u32 mss;
+
+	EFX_WARN_ON_ONCE_PARANOID(tx_queue->tso_version != 2);
+
+	mss = skb_shinfo(skb)->gso_size;
+
+	if (unlikely(mss < 4)) {
+		WARN_ONCE(1, "MSS of %u is too small for TSO v2\n", mss);
+		return -EINVAL;
+	}
+
+	ip = ip_hdr(skb);
+	if (ip->version == 4) {
+		/* Modify IPv4 header if needed. */
+		ip->tot_len = 0;
+		ip->check = 0;
+		ipv4_id = ip->id;
+	} else {
+		/* Modify IPv6 header if needed. */
+		struct ipv6hdr *ipv6 = ipv6_hdr(skb);
+
+		ipv6->payload_len = 0;
+		ipv4_id = 0;
+	}
+
+	tcp = tcp_hdr(skb);
+	seqnum = ntohl(tcp->seq);
+
+	buffer = efx_tx_queue_get_insert_buffer(tx_queue);
+
+	buffer->flags = EFX_TX_BUF_OPTION;
+	buffer->len = 0;
+	buffer->unmap_len = 0;
+	EFX_POPULATE_QWORD_5(buffer->option,
+			ESF_DZ_TX_DESC_IS_OPT, 1,
+			ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_TSO,
+			ESF_DZ_TX_TSO_OPTION_TYPE,
+			ESE_DZ_TX_TSO_OPTION_DESC_FATSO2A,
+			ESF_DZ_TX_TSO_IP_ID, ipv4_id,
+			ESF_DZ_TX_TSO_TCP_SEQNO, seqnum
+			);
+	++tx_queue->insert_count;
+
+	buffer = efx_tx_queue_get_insert_buffer(tx_queue);
+
+	buffer->flags = EFX_TX_BUF_OPTION;
+	buffer->len = 0;
+	buffer->unmap_len = 0;
+	EFX_POPULATE_QWORD_4(buffer->option,
+			ESF_DZ_TX_DESC_IS_OPT, 1,
+			ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_TSO,
+			ESF_DZ_TX_TSO_OPTION_TYPE,
+			ESE_DZ_TX_TSO_OPTION_DESC_FATSO2B,
+			ESF_DZ_TX_TSO_TCP_MSS, mss
+			);
+	++tx_queue->insert_count;
+
+	return 0;
+}
+
+static u32 efx_ef10_tso_versions(struct efx_nic *efx)
+{
+	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+	u32 tso_versions = 0;
+
+	if (nic_data->datapath_caps &
+	    (1 << MC_CMD_GET_CAPABILITIES_OUT_TX_TSO_LBN))
+		tso_versions |= BIT(1);
+	if (nic_data->datapath_caps2 &
+	    (1 << MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_LBN))
+		tso_versions |= BIT(2);
+	return tso_versions;
+}
+
 static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
 {
 	MCDI_DECLARE_BUF(inbuf, MC_CMD_INIT_TXQ_IN_LEN(EFX_MAX_DMAQ_SIZE * 8 /
@@ -2095,6 +2181,7 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
 	struct efx_channel *channel = tx_queue->channel;
 	struct efx_nic *efx = tx_queue->efx;
 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+	bool tso_v2 = false;
 	size_t inlen;
 	dma_addr_t dma_addr;
 	efx_qword_t *txd;
@@ -2102,13 +2189,21 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
 	int i;
 	BUILD_BUG_ON(MC_CMD_INIT_TXQ_OUT_LEN != 0);
 
+	/* TSOv2 is a limited resource that can only be configured on a limited
+	 * number of queues. TSO without checksum offload is not really a thing,
+	 * so we only enable it for those queues.
+	 */
+	if (csum_offload && (nic_data->datapath_caps2 &
+			(1 << MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TSO_V2_LBN))) {
+		tso_v2 = true;
+		netif_dbg(efx, hw, efx->net_dev, "Using TSOv2 for channel %u\n",
+				channel->channel);
+	}
+
 	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_SIZE, tx_queue->ptr_mask + 1);
 	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_TARGET_EVQ, channel->channel);
 	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_LABEL, tx_queue->queue);
 	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_INSTANCE, tx_queue->queue);
-	MCDI_POPULATE_DWORD_2(inbuf, INIT_TXQ_IN_FLAGS,
-			      INIT_TXQ_IN_FLAG_IP_CSUM_DIS, !csum_offload,
-			      INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload);
 	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_OWNER_ID, 0);
 	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_PORT_ID, nic_data->vport_id);
 
@@ -2124,10 +2219,30 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
 
 	inlen = MC_CMD_INIT_TXQ_IN_LEN(entries);
 
-	rc = efx_mcdi_rpc(efx, MC_CMD_INIT_TXQ, inbuf, inlen,
-			  NULL, 0, NULL);
-	if (rc)
-		goto fail;
+	do {
+		MCDI_POPULATE_DWORD_3(inbuf, INIT_TXQ_IN_FLAGS,
+				/* This flag was removed from mcdi_pcol.h for
+				 * the non-_EXT version of INIT_TXQ.  However,
+				 * firmware still honours it.
+				 */
+				INIT_TXQ_EXT_IN_FLAG_TSOV2_EN, tso_v2,
+				INIT_TXQ_IN_FLAG_IP_CSUM_DIS, !csum_offload,
+				INIT_TXQ_IN_FLAG_TCP_CSUM_DIS, !csum_offload);
+
+		rc = efx_mcdi_rpc_quiet(efx, MC_CMD_INIT_TXQ, inbuf, inlen,
+					NULL, 0, NULL);
+		if (rc == -ENOSPC && tso_v2) {
+			/* Retry without TSOv2 if we're short on contexts. */
+			tso_v2 = false;
+			netif_warn(efx, probe, efx->net_dev,
+				   "TSOv2 context not available to segment in hardware. TCP performance may be reduced.\n");
+		} else if (rc) {
+			efx_mcdi_display_error(efx, MC_CMD_INIT_TXQ,
+					       MC_CMD_INIT_TXQ_EXT_IN_LEN,
+					       NULL, 0, rc);
+			goto fail;
+		}
+	} while (rc);
 
 	/* A previous user of this TX queue might have set us up the
 	 * bomb by writing a descriptor to the TX push collector but
@@ -2146,8 +2261,11 @@ static void efx_ef10_tx_init(struct efx_tx_queue *tx_queue)
 			     ESF_DZ_TX_OPTION_IP_CSUM, csum_offload);
 	tx_queue->write_count = 1;
 
-	if (nic_data->datapath_caps &
-	    (1 << MC_CMD_GET_CAPABILITIES_OUT_TX_TSO_LBN)) {
+	if (tso_v2) {
+		tx_queue->handle_tso = efx_ef10_tx_tso_desc;
+		tx_queue->tso_version = 2;
+	} else if (nic_data->datapath_caps &
+			(1 << MC_CMD_GET_CAPABILITIES_OUT_TX_TSO_LBN)) {
 		tx_queue->tso_version = 1;
 	}
 
@@ -2202,6 +2320,25 @@ static inline void efx_ef10_notify_tx_desc(struct efx_tx_queue *tx_queue)
 			ER_DZ_TX_DESC_UPD_DWORD, tx_queue->queue);
 }
 
+#define EFX_EF10_MAX_TX_DESCRIPTOR_LEN 0x3fff
+
+static unsigned int efx_ef10_tx_limit_len(struct efx_tx_queue *tx_queue,
+					  dma_addr_t dma_addr, unsigned int len)
+{
+	if (len > EFX_EF10_MAX_TX_DESCRIPTOR_LEN) {
+		/* If we need to break across multiple descriptors we should
+		 * stop at a page boundary. This assumes the length limit is
+		 * greater than the page size.
+		 */
+		dma_addr_t end = dma_addr + EFX_EF10_MAX_TX_DESCRIPTOR_LEN;
+
+		BUILD_BUG_ON(EFX_EF10_MAX_TX_DESCRIPTOR_LEN < EFX_PAGE_SIZE);
+		len = (end & (~(EFX_PAGE_SIZE - 1))) - dma_addr;
+	}
+
+	return len;
+}
+
 static void efx_ef10_tx_write(struct efx_tx_queue *tx_queue)
 {
 	unsigned int old_write_count = tx_queue->write_count;
@@ -2245,6 +2382,86 @@ static void efx_ef10_tx_write(struct efx_tx_queue *tx_queue)
 	}
 }
 
+#define RSS_MODE_HASH_ADDRS	(1 << RSS_MODE_HASH_SRC_ADDR_LBN |\
+				 1 << RSS_MODE_HASH_DST_ADDR_LBN)
+#define RSS_MODE_HASH_PORTS	(1 << RSS_MODE_HASH_SRC_PORT_LBN |\
+				 1 << RSS_MODE_HASH_DST_PORT_LBN)
+#define RSS_CONTEXT_FLAGS_DEFAULT	(1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_IPV4_EN_LBN |\
+					 1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_TCPV4_EN_LBN |\
+					 1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_IPV6_EN_LBN |\
+					 1 << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TOEPLITZ_TCPV6_EN_LBN |\
+					 (RSS_MODE_HASH_ADDRS | RSS_MODE_HASH_PORTS) << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TCP_IPV4_RSS_MODE_LBN |\
+					 RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV4_RSS_MODE_LBN |\
+					 RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_OTHER_IPV4_RSS_MODE_LBN |\
+					 (RSS_MODE_HASH_ADDRS | RSS_MODE_HASH_PORTS) << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_TCP_IPV6_RSS_MODE_LBN |\
+					 RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV6_RSS_MODE_LBN |\
+					 RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_OTHER_IPV6_RSS_MODE_LBN)
+
+static int efx_ef10_get_rss_flags(struct efx_nic *efx, u32 context, u32 *flags)
+{
+	/* Firmware had a bug (sfc bug 61952) where it would not actually
+	 * fill in the flags field in the response to MC_CMD_RSS_CONTEXT_GET_FLAGS.
+	 * This meant that it would always contain whatever was previously
+	 * in the MCDI buffer.  Fortunately, all firmware versions with
+	 * this bug have the same default flags value for a newly-allocated
+	 * RSS context, and the only time we want to get the flags is just
+	 * after allocating.  Moreover, the response has a 32-bit hole
+	 * where the context ID would be in the request, so we can use an
+	 * overlength buffer in the request and pre-fill the flags field
+	 * with what we believe the default to be.  Thus if the firmware
+	 * has the bug, it will leave our pre-filled value in the flags
+	 * field of the response, and we will get the right answer.
+	 *
+	 * However, this does mean that this function should NOT be used if
+	 * the RSS context flags might not be their defaults - it is ONLY
+	 * reliably correct for a newly-allocated RSS context.
+	 */
+	MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN);
+	MCDI_DECLARE_BUF(outbuf, MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN);
+	size_t outlen;
+	int rc;
+
+	/* Check we have a hole for the context ID */
+	BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_GET_FLAGS_IN_LEN != MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_FLAGS_OFST);
+	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_FLAGS_IN_RSS_CONTEXT_ID, context);
+	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_FLAGS_OUT_FLAGS,
+		       RSS_CONTEXT_FLAGS_DEFAULT);
+	rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_FLAGS, inbuf,
+			  sizeof(inbuf), outbuf, sizeof(outbuf), &outlen);
+	if (rc == 0) {
+		if (outlen < MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_LEN)
+			rc = -EIO;
+		else
+			*flags = MCDI_DWORD(outbuf, RSS_CONTEXT_GET_FLAGS_OUT_FLAGS);
+	}
+	return rc;
+}
+
+/* Attempt to enable 4-tuple UDP hashing on the specified RSS context.
+ * If we fail, we just leave the RSS context at its default hash settings,
+ * which is safe but may slightly reduce performance.
+ * Defaults are 4-tuple for TCP and 2-tuple for UDP and other-IP, so we
+ * just need to set the UDP ports flags (for both IP versions).
+ */
+static void efx_ef10_set_rss_flags(struct efx_nic *efx, u32 context)
+{
+	MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_SET_FLAGS_IN_LEN);
+	u32 flags;
+
+	BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_SET_FLAGS_OUT_LEN != 0);
+
+	if (efx_ef10_get_rss_flags(efx, context, &flags) != 0)
+		return;
+	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_RSS_CONTEXT_ID, context);
+	flags |= RSS_MODE_HASH_PORTS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV4_RSS_MODE_LBN;
+	flags |= RSS_MODE_HASH_PORTS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV6_RSS_MODE_LBN;
+	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_SET_FLAGS_IN_FLAGS, flags);
+	if (!efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_SET_FLAGS, inbuf, sizeof(inbuf),
+			  NULL, 0, NULL))
+		/* Succeeded, so UDP 4-tuple is now enabled */
+		efx->rx_hash_udp_4tuple = true;
+}
+
 static int efx_ef10_alloc_rss_context(struct efx_nic *efx, u32 *context,
 				      bool exclusive, unsigned *context_size)
 {
@@ -2290,6 +2507,10 @@ static int efx_ef10_alloc_rss_context(struct efx_nic *efx, u32 *context,
 	if (context_size)
 		*context_size = rss_spread;
 
+	if (nic_data->datapath_caps &
+	    1 << MC_CMD_GET_CAPABILITIES_OUT_ADDITIONAL_RSS_MODES_LBN)
+		efx_ef10_set_rss_flags(efx, *context);
+
 	return 0;
 }
 
@@ -5385,6 +5606,7 @@ const struct efx_nic_type efx_hunt_a0_vf_nic_type = {
 	.tx_init = efx_ef10_tx_init,
 	.tx_remove = efx_ef10_tx_remove,
 	.tx_write = efx_ef10_tx_write,
+	.tx_limit_len = efx_ef10_tx_limit_len,
 	.rx_push_rss_config = efx_ef10_vf_rx_push_rss_config,
 	.rx_probe = efx_ef10_rx_probe,
 	.rx_init = efx_ef10_rx_init,
@@ -5491,6 +5713,7 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
 	.tx_init = efx_ef10_tx_init,
 	.tx_remove = efx_ef10_tx_remove,
 	.tx_write = efx_ef10_tx_write,
+	.tx_limit_len = efx_ef10_tx_limit_len,
 	.rx_push_rss_config = efx_ef10_pf_rx_push_rss_config,
 	.rx_probe = efx_ef10_rx_probe,
 	.rx_init = efx_ef10_rx_init,
@@ -5550,6 +5773,7 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
 #endif
 	.get_mac_address = efx_ef10_get_mac_address_pf,
 	.set_mac_address = efx_ef10_set_mac_address,
+	.tso_versions = efx_ef10_tso_versions,
 
 	.revision = EFX_REV_HUNT_A0,
 	.max_dma_mask = DMA_BIT_MASK(ESF_DZ_TX_KER_BUF_ADDR_WIDTH),
diff --git a/drivers/net/ethernet/sfc/ef10_regs.h b/drivers/net/ethernet/sfc/ef10_regs.h
index 62a55dde61d5..2c4bf9476c37 100644
--- a/drivers/net/ethernet/sfc/ef10_regs.h
+++ b/drivers/net/ethernet/sfc/ef10_regs.h
@@ -1,6 +1,6 @@
 /****************************************************************************
  * Driver for Solarflare network controllers and boards
- * Copyright 2012-2013 Solarflare Communications Inc.
+ * Copyright 2012-2015 Solarflare Communications Inc.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 as published
@@ -147,8 +147,14 @@
 #define	ESF_DZ_RX_OVERRIDE_HOLDOFF_WIDTH 1
 #define	ESF_DZ_RX_DROP_EVENT_LBN 58
 #define	ESF_DZ_RX_DROP_EVENT_WIDTH 1
-#define	ESF_DZ_RX_EV_RSVD2_LBN 54
-#define	ESF_DZ_RX_EV_RSVD2_WIDTH 4
+#define	ESF_DD_RX_EV_RSVD2_LBN 54
+#define	ESF_DD_RX_EV_RSVD2_WIDTH 4
+#define	ESF_EZ_RX_TCP_UDP_INNER_CHKSUM_ERR_LBN 57
+#define	ESF_EZ_RX_TCP_UDP_INNER_CHKSUM_ERR_WIDTH 1
+#define	ESF_EZ_RX_IP_INNER_CHKSUM_ERR_LBN 56
+#define	ESF_EZ_RX_IP_INNER_CHKSUM_ERR_WIDTH 1
+#define	ESF_EZ_RX_EV_RSVD2_LBN 54
+#define	ESF_EZ_RX_EV_RSVD2_WIDTH 2
 #define	ESF_DZ_RX_EV_SOFT2_LBN 52
 #define	ESF_DZ_RX_EV_SOFT2_WIDTH 2
 #define	ESF_DZ_RX_DSC_PTR_LBITS_LBN 48
@@ -192,12 +198,21 @@
 #define	ESF_DZ_RX_MAC_CLASS_WIDTH 1
 #define	ESE_DZ_MAC_CLASS_MCAST 1
 #define	ESE_DZ_MAC_CLASS_UCAST 0
-#define	ESF_DZ_RX_EV_SOFT1_LBN 32
-#define	ESF_DZ_RX_EV_SOFT1_WIDTH 3
-#define	ESF_DZ_RX_EV_RSVD1_LBN 31
-#define	ESF_DZ_RX_EV_RSVD1_WIDTH 1
-#define	ESF_DZ_RX_ABORT_LBN 30
-#define	ESF_DZ_RX_ABORT_WIDTH 1
+#define	ESF_DD_RX_EV_SOFT1_LBN 32
+#define	ESF_DD_RX_EV_SOFT1_WIDTH 3
+#define	ESF_EZ_RX_EV_SOFT1_LBN 34
+#define	ESF_EZ_RX_EV_SOFT1_WIDTH 1
+#define	ESF_EZ_RX_ENCAP_HDR_LBN 32
+#define	ESF_EZ_RX_ENCAP_HDR_WIDTH 2
+#define	ESE_EZ_ENCAP_HDR_GRE 2
+#define	ESE_EZ_ENCAP_HDR_VXLAN 1
+#define	ESE_EZ_ENCAP_HDR_NONE 0
+#define	ESF_DD_RX_EV_RSVD1_LBN 30
+#define	ESF_DD_RX_EV_RSVD1_WIDTH 2
+#define	ESF_EZ_RX_EV_RSVD1_LBN 31
+#define	ESF_EZ_RX_EV_RSVD1_WIDTH 1
+#define	ESF_EZ_RX_ABORT_LBN 30
+#define	ESF_EZ_RX_ABORT_WIDTH 1
 #define	ESF_DZ_RX_ECC_ERR_LBN 29
 #define	ESF_DZ_RX_ECC_ERR_WIDTH 1
 #define	ESF_DZ_RX_CRC1_ERR_LBN 28
@@ -235,6 +250,12 @@
 #define	ESE_DZ_TX_OPTION_DESC_TSO 7
 #define	ESE_DZ_TX_OPTION_DESC_VLAN 6
 #define	ESE_DZ_TX_OPTION_DESC_CRC_CSUM 0
+#define	ESF_DZ_TX_OPTION_TS_AT_TXDP_LBN 8
+#define	ESF_DZ_TX_OPTION_TS_AT_TXDP_WIDTH 1
+#define	ESF_DZ_TX_OPTION_INNER_UDP_TCP_CSUM_LBN 7
+#define	ESF_DZ_TX_OPTION_INNER_UDP_TCP_CSUM_WIDTH 1
+#define	ESF_DZ_TX_OPTION_INNER_IP_CSUM_LBN 6
+#define	ESF_DZ_TX_OPTION_INNER_IP_CSUM_WIDTH 1
 #define	ESF_DZ_TX_TIMESTAMP_LBN 5
 #define	ESF_DZ_TX_TIMESTAMP_WIDTH 1
 #define	ESF_DZ_TX_OPTION_CRC_MODE_LBN 2
@@ -257,14 +278,22 @@
 #define	ESF_DZ_TX_OVERRIDE_HOLDOFF_WIDTH 1
 #define	ESF_DZ_TX_DROP_EVENT_LBN 58
 #define	ESF_DZ_TX_DROP_EVENT_WIDTH 1
-#define	ESF_DZ_TX_EV_RSVD_LBN 48
-#define	ESF_DZ_TX_EV_RSVD_WIDTH 10
+#define	ESF_DD_TX_EV_RSVD_LBN 48
+#define	ESF_DD_TX_EV_RSVD_WIDTH 10
+#define	ESF_EZ_TCP_UDP_INNER_CHKSUM_ERR_LBN 57
+#define	ESF_EZ_TCP_UDP_INNER_CHKSUM_ERR_WIDTH 1
+#define	ESF_EZ_IP_INNER_CHKSUM_ERR_LBN 56
+#define	ESF_EZ_IP_INNER_CHKSUM_ERR_WIDTH 1
+#define	ESF_EZ_TX_EV_RSVD_LBN 48
+#define	ESF_EZ_TX_EV_RSVD_WIDTH 8
 #define	ESF_DZ_TX_SOFT2_LBN 32
 #define	ESF_DZ_TX_SOFT2_WIDTH 16
-#define	ESF_DZ_TX_CAN_MERGE_LBN 31
-#define	ESF_DZ_TX_CAN_MERGE_WIDTH 1
-#define	ESF_DZ_TX_SOFT1_LBN 24
-#define	ESF_DZ_TX_SOFT1_WIDTH 7
+#define	ESF_DD_TX_SOFT1_LBN 24
+#define	ESF_DD_TX_SOFT1_WIDTH 8
+#define	ESF_EZ_TX_CAN_MERGE_LBN 31
+#define	ESF_EZ_TX_CAN_MERGE_WIDTH 1
+#define	ESF_EZ_TX_SOFT1_LBN 24
+#define	ESF_EZ_TX_SOFT1_WIDTH 7
 #define	ESF_DZ_TX_QLABEL_LBN 16
 #define	ESF_DZ_TX_QLABEL_WIDTH 5
 #define	ESF_DZ_TX_DESCR_INDX_LBN 0
@@ -301,6 +330,10 @@
 #define	ESE_DZ_TX_OPTION_DESC_TSO 7
 #define	ESE_DZ_TX_OPTION_DESC_VLAN 6
 #define	ESE_DZ_TX_OPTION_DESC_CRC_CSUM 0
+#define	ESF_DZ_TX_TSO_OPTION_TYPE_LBN 56
+#define	ESF_DZ_TX_TSO_OPTION_TYPE_WIDTH 4
+#define	ESE_DZ_TX_TSO_OPTION_DESC_ENCAP 1
+#define	ESE_DZ_TX_TSO_OPTION_DESC_NORMAL 0
 #define	ESF_DZ_TX_TSO_TCP_FLAGS_LBN 48
 #define	ESF_DZ_TX_TSO_TCP_FLAGS_WIDTH 8
 #define	ESF_DZ_TX_TSO_IP_ID_LBN 32
@@ -308,6 +341,46 @@
 #define	ESF_DZ_TX_TSO_TCP_SEQNO_LBN 0
 #define	ESF_DZ_TX_TSO_TCP_SEQNO_WIDTH 32
 
+/* TX_TSO_FATSO2A_DESC */
+#define	ESF_DZ_TX_DESC_IS_OPT_LBN 63
+#define	ESF_DZ_TX_DESC_IS_OPT_WIDTH 1
+#define	ESF_DZ_TX_OPTION_TYPE_LBN 60
+#define	ESF_DZ_TX_OPTION_TYPE_WIDTH 3
+#define	ESE_DZ_TX_OPTION_DESC_TSO 7
+#define	ESE_DZ_TX_OPTION_DESC_VLAN 6
+#define	ESE_DZ_TX_OPTION_DESC_CRC_CSUM 0
+#define	ESF_DZ_TX_TSO_OPTION_TYPE_LBN 56
+#define	ESF_DZ_TX_TSO_OPTION_TYPE_WIDTH 4
+#define	ESE_DZ_TX_TSO_OPTION_DESC_FATSO2B 3
+#define	ESE_DZ_TX_TSO_OPTION_DESC_FATSO2A 2
+#define	ESE_DZ_TX_TSO_OPTION_DESC_ENCAP 1
+#define	ESE_DZ_TX_TSO_OPTION_DESC_NORMAL 0
+#define	ESF_DZ_TX_TSO_IP_ID_LBN 32
+#define	ESF_DZ_TX_TSO_IP_ID_WIDTH 16
+#define	ESF_DZ_TX_TSO_TCP_SEQNO_LBN 0
+#define	ESF_DZ_TX_TSO_TCP_SEQNO_WIDTH 32
+
+
+/* TX_TSO_FATSO2B_DESC */
+#define	ESF_DZ_TX_DESC_IS_OPT_LBN 63
+#define	ESF_DZ_TX_DESC_IS_OPT_WIDTH 1
+#define	ESF_DZ_TX_OPTION_TYPE_LBN 60
+#define	ESF_DZ_TX_OPTION_TYPE_WIDTH 3
+#define	ESE_DZ_TX_OPTION_DESC_TSO 7
+#define	ESE_DZ_TX_OPTION_DESC_VLAN 6
+#define	ESE_DZ_TX_OPTION_DESC_CRC_CSUM 0
+#define	ESF_DZ_TX_TSO_OPTION_TYPE_LBN 56
+#define	ESF_DZ_TX_TSO_OPTION_TYPE_WIDTH 4
+#define	ESE_DZ_TX_TSO_OPTION_DESC_FATSO2B 3
+#define	ESE_DZ_TX_TSO_OPTION_DESC_FATSO2A 2
+#define	ESE_DZ_TX_TSO_OPTION_DESC_ENCAP 1
+#define	ESE_DZ_TX_TSO_OPTION_DESC_NORMAL 0
+#define	ESF_DZ_TX_TSO_OUTER_IP_ID_LBN 0
+#define	ESF_DZ_TX_TSO_OUTER_IP_ID_WIDTH 16
+#define	ESF_DZ_TX_TSO_TCP_MSS_LBN 32
+#define	ESF_DZ_TX_TSO_TCP_MSS_WIDTH 16
+
+
 /*************************************************************************/
 
 /* TX_DESC_UPD_REG: Transmit descriptor update register.
diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c
index 6b89e4a7b164..5a5dcad8c49a 100644
--- a/drivers/net/ethernet/sfc/efx.c
+++ b/drivers/net/ethernet/sfc/efx.c
@@ -82,7 +82,6 @@ const char *const efx_reset_type_names[] = {
 	[RESET_TYPE_DISABLE]            = "DISABLE",
 	[RESET_TYPE_TX_WATCHDOG]        = "TX_WATCHDOG",
 	[RESET_TYPE_INT_ERROR]          = "INT_ERROR",
-	[RESET_TYPE_RX_RECOVERY]        = "RX_RECOVERY",
 	[RESET_TYPE_DMA_ERROR]          = "DMA_ERROR",
 	[RESET_TYPE_TX_SKIP]            = "TX_SKIP",
 	[RESET_TYPE_MC_FAILURE]         = "MC_FAILURE",
@@ -356,7 +355,7 @@ static int efx_probe_eventq(struct efx_channel *channel)
 	/* Build an event queue with room for one event per tx and rx buffer,
 	 * plus some extra for link state events and MCDI completions. */
 	entries = roundup_pow_of_two(efx->rxq_entries + efx->txq_entries + 128);
-	EFX_BUG_ON_PARANOID(entries > EFX_MAX_EVQ_SIZE);
+	EFX_WARN_ON_PARANOID(entries > EFX_MAX_EVQ_SIZE);
 	channel->eventq_mask = max(entries, EFX_MIN_EVQ_SIZE) - 1;
 
 	return efx_nic_probe_eventq(channel);
@@ -733,16 +732,7 @@ static void efx_stop_datapath(struct efx_nic *efx)
 	}
 
 	rc = efx->type->fini_dmaq(efx);
-	if (rc && EFX_WORKAROUND_7803(efx)) {
-		/* Schedule a reset to recover from the flush failure. The
-		 * descriptor caches reference memory we're about to free,
-		 * but falcon_reconfigure_mac_wrapper() won't reconnect
-		 * the MACs because of the pending reset.
-		 */
-		netif_err(efx, drv, efx->net_dev,
-			  "Resetting to recover from flush failure\n");
-		efx_schedule_reset(efx, RESET_TYPE_ALL);
-	} else if (rc) {
+	if (rc) {
 		netif_err(efx, drv, efx->net_dev, "failed to flush queues\n");
 	} else {
 		netif_dbg(efx, drv, efx->net_dev,
@@ -1892,15 +1882,13 @@ static void efx_start_all(struct efx_nic *efx)
 		queue_delayed_work(efx->workqueue, &efx->monitor_work,
 				   efx_monitor_interval);
 
-	/* If link state detection is normally event-driven, we have
+	/* Link state detection is normally event-driven; we have
 	 * to poll now because we could have missed a change
 	 */
-	if (efx_nic_rev(efx) >= EFX_REV_SIENA_A0) {
-		mutex_lock(&efx->mac_lock);
-		if (efx->phy_op->poll(efx))
-			efx_link_status_changed(efx);
-		mutex_unlock(&efx->mac_lock);
-	}
+	mutex_lock(&efx->mac_lock);
+	if (efx->phy_op->poll(efx))
+		efx_link_status_changed(efx);
+	mutex_unlock(&efx->mac_lock);
 
 	efx->type->start_stats(efx);
 	efx->type->pull_stats(efx);
@@ -2113,10 +2101,9 @@ static void efx_init_napi(struct efx_nic *efx)
 
 static void efx_fini_napi_channel(struct efx_channel *channel)
 {
-	if (channel->napi_dev) {
+	if (channel->napi_dev)
 		netif_napi_del(&channel->napi_str);
-		napi_hash_del(&channel->napi_str);
-	}
+
 	channel->napi_dev = NULL;
 }
 
@@ -2266,18 +2253,6 @@ static int efx_change_mtu(struct net_device *net_dev, int new_mtu)
 	rc = efx_check_disabled(efx);
 	if (rc)
 		return rc;
-	if (new_mtu > EFX_MAX_MTU) {
-		netif_err(efx, drv, efx->net_dev,
-			  "Requested MTU of %d too big (max: %d)\n",
-			  new_mtu, EFX_MAX_MTU);
-		return -EINVAL;
-	}
-	if (new_mtu < EFX_MIN_MTU) {
-		netif_err(efx, drv, efx->net_dev,
-			  "Requested MTU of %d too small (min: %d)\n",
-			  new_mtu, EFX_MIN_MTU);
-		return -EINVAL;
-	}
 
 	netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu);
 
@@ -2481,6 +2456,8 @@ static int efx_register_netdev(struct efx_nic *efx)
 		net_dev->priv_flags |= IFF_UNICAST_FLT;
 	net_dev->ethtool_ops = &efx_ethtool_ops;
 	net_dev->gso_max_segs = EFX_TSO_MAX_SEGS;
+	net_dev->min_mtu = EFX_MIN_MTU;
+	net_dev->max_mtu = EFX_MAX_MTU;
 
 	rtnl_lock();
 
@@ -2853,12 +2830,6 @@ void efx_schedule_reset(struct efx_nic *efx, enum reset_type type)
 
 /* PCI device ID table */
 static const struct pci_device_id efx_pci_table[] = {
-	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE,
-		    PCI_DEVICE_ID_SOLARFLARE_SFC4000A_0),
-	 .driver_data = (unsigned long) &falcon_a1_nic_type},
-	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE,
-		    PCI_DEVICE_ID_SOLARFLARE_SFC4000B),
-	 .driver_data = (unsigned long) &falcon_b0_nic_type},
 	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0803),	/* SFC9020 */
 	 .driver_data = (unsigned long) &siena_a0_nic_type},
 	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE, 0x0813),	/* SFL9021 */
@@ -3211,23 +3182,6 @@ static int efx_pci_probe(struct pci_dev *pci_dev,
 	efx = netdev_priv(net_dev);
 	efx->type = (const struct efx_nic_type *) entry->driver_data;
 	efx->fixed_features |= NETIF_F_HIGHDMA;
-	net_dev->features |= (efx->type->offload_features | NETIF_F_SG |
-			      NETIF_F_TSO | NETIF_F_RXCSUM);
-	if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
-		net_dev->features |= NETIF_F_TSO6;
-	/* Mask for features that also apply to VLAN devices */
-	net_dev->vlan_features |= (NETIF_F_HW_CSUM | NETIF_F_SG |
-				   NETIF_F_HIGHDMA | NETIF_F_ALL_TSO |
-				   NETIF_F_RXCSUM);
-
-	net_dev->hw_features = net_dev->features & ~efx->fixed_features;
-
-	/* Disable VLAN filtering by default.  It may be enforced if
-	 * the feature is fixed (i.e. VLAN filters are required to
-	 * receive VLAN tagged packets due to vPort restrictions).
-	 */
-	net_dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
-	net_dev->features |= efx->fixed_features;
 
 	pci_set_drvdata(pci_dev, efx);
 	SET_NETDEV_DEV(net_dev, &pci_dev->dev);
@@ -3250,6 +3204,27 @@ static int efx_pci_probe(struct pci_dev *pci_dev,
 	if (rc)
 		goto fail3;
 
+	net_dev->features |= (efx->type->offload_features | NETIF_F_SG |
+			      NETIF_F_TSO | NETIF_F_RXCSUM);
+	if (efx->type->offload_features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
+		net_dev->features |= NETIF_F_TSO6;
+	/* Check whether device supports TSO */
+	if (!efx->type->tso_versions || !efx->type->tso_versions(efx))
+		net_dev->features &= ~NETIF_F_ALL_TSO;
+	/* Mask for features that also apply to VLAN devices */
+	net_dev->vlan_features |= (NETIF_F_HW_CSUM | NETIF_F_SG |
+				   NETIF_F_HIGHDMA | NETIF_F_ALL_TSO |
+				   NETIF_F_RXCSUM);
+
+	net_dev->hw_features = net_dev->features & ~efx->fixed_features;
+
+	/* Disable VLAN filtering by default.  It may be enforced if
+	 * the feature is fixed (i.e. VLAN filters are required to
+	 * receive VLAN tagged packets due to vPort restrictions).
+	 */
+	net_dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
+	net_dev->features |= efx->fixed_features;
+
 	rc = efx_register_netdev(efx);
 	if (rc)
 		goto fail4;
diff --git a/drivers/net/ethernet/sfc/enum.h b/drivers/net/ethernet/sfc/enum.h
index c94f56271dd4..6fa824211d91 100644
--- a/drivers/net/ethernet/sfc/enum.h
+++ b/drivers/net/ethernet/sfc/enum.h
@@ -148,7 +148,6 @@ enum efx_loopback_mode {
  * @RESET_TYPE_DISABLE: Reset datapath, MAC and PHY; leave NIC disabled
  * @RESET_TYPE_TX_WATCHDOG: reset due to TX watchdog
  * @RESET_TYPE_INT_ERROR: reset due to internal error
- * @RESET_TYPE_RX_RECOVERY: reset to recover from RX datapath errors
  * @RESET_TYPE_DMA_ERROR: DMA error
  * @RESET_TYPE_TX_SKIP: hardware completed empty tx descriptors
  * @RESET_TYPE_MC_FAILURE: MC reboot/assertion
@@ -166,15 +165,13 @@ enum reset_type {
 	RESET_TYPE_MAX_METHOD,
 	RESET_TYPE_TX_WATCHDOG,
 	RESET_TYPE_INT_ERROR,
-	RESET_TYPE_RX_RECOVERY,
 	RESET_TYPE_DMA_ERROR,
 	RESET_TYPE_TX_SKIP,
 	RESET_TYPE_MC_FAILURE,
 	/* RESET_TYPE_MCDI_TIMEOUT is actually a method, not just a reason, but
 	 * it doesn't fit the scope hierarchy (not well-ordered by inclusion).
 	 * We encode this by having its enum value be greater than
-	 * RESET_TYPE_MAX_METHOD. This also prevents issuing it with
-	 * efx_ioctl_reset.
+	 * RESET_TYPE_MAX_METHOD.
 	 */
 	RESET_TYPE_MCDI_TIMEOUT,
 	RESET_TYPE_MAX,
diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
index 445ccdb6bc67..f644216eda1b 100644
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c
@@ -69,8 +69,10 @@ static const struct efx_sw_stat_desc efx_sw_stat_desc[] = {
 	EFX_ETHTOOL_UINT_TXQ_STAT(tso_bursts),
 	EFX_ETHTOOL_UINT_TXQ_STAT(tso_long_headers),
 	EFX_ETHTOOL_UINT_TXQ_STAT(tso_packets),
+	EFX_ETHTOOL_UINT_TXQ_STAT(tso_fallbacks),
 	EFX_ETHTOOL_UINT_TXQ_STAT(pushes),
 	EFX_ETHTOOL_UINT_TXQ_STAT(pio_packets),
+	EFX_ETHTOOL_UINT_TXQ_STAT(cb_packets),
 	EFX_ETHTOOL_ATOMIC_NIC_ERROR_STAT(rx_reset),
 	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_tobe_disc),
 	EFX_ETHTOOL_UINT_CHANNEL_STAT(rx_ip_hdr_chksum_err),
@@ -167,9 +169,8 @@ static void efx_ethtool_get_drvinfo(struct net_device *net_dev,
 
 	strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
 	strlcpy(info->version, EFX_DRIVER_VERSION, sizeof(info->version));
-	if (efx_nic_rev(efx) >= EFX_REV_SIENA_A0)
-		efx_mcdi_print_fwver(efx, info->fw_version,
-				     sizeof(info->fw_version));
+	efx_mcdi_print_fwver(efx, info->fw_version,
+			     sizeof(info->fw_version));
 	strlcpy(info->bus_info, pci_name(efx->pci_dev), sizeof(info->bus_info));
 }
 
@@ -332,12 +333,12 @@ static int efx_ethtool_fill_self_tests(struct efx_nic *efx,
 		      "core", 0, "registers", NULL);
 
 	if (efx->phy_op->run_tests != NULL) {
-		EFX_BUG_ON_PARANOID(efx->phy_op->test_name == NULL);
+		EFX_WARN_ON_PARANOID(efx->phy_op->test_name == NULL);
 
 		for (i = 0; true; ++i) {
 			const char *name;
 
-			EFX_BUG_ON_PARANOID(i >= EFX_MAX_PHY_TESTS);
+			EFX_WARN_ON_PARANOID(i >= EFX_MAX_PHY_TESTS);
 			name = efx->phy_op->test_name(efx, i);
 			if (name == NULL)
 				break;
@@ -964,35 +965,33 @@ efx_ethtool_get_rxnfc(struct net_device *net_dev,
 		return 0;
 
 	case ETHTOOL_GRXFH: {
-		unsigned min_revision = 0;
-
 		info->data = 0;
 		switch (info->flow_type) {
+		case UDP_V4_FLOW:
+			if (efx->rx_hash_udp_4tuple)
+				/* fall through */
 		case TCP_V4_FLOW:
-			info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+				info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
 			/* fall through */
-		case UDP_V4_FLOW:
 		case SCTP_V4_FLOW:
 		case AH_ESP_V4_FLOW:
 		case IPV4_FLOW:
 			info->data |= RXH_IP_SRC | RXH_IP_DST;
-			min_revision = EFX_REV_FALCON_B0;
 			break;
+		case UDP_V6_FLOW:
+			if (efx->rx_hash_udp_4tuple)
+				/* fall through */
 		case TCP_V6_FLOW:
-			info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+				info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
 			/* fall through */
-		case UDP_V6_FLOW:
 		case SCTP_V6_FLOW:
 		case AH_ESP_V6_FLOW:
 		case IPV6_FLOW:
 			info->data |= RXH_IP_SRC | RXH_IP_DST;
-			min_revision = EFX_REV_SIENA_A0;
 			break;
 		default:
 			break;
 		}
-		if (efx_nic_rev(efx) < min_revision)
-			info->data = 0;
 		return 0;
 	}
 
@@ -1265,9 +1264,7 @@ static u32 efx_ethtool_get_rxfh_indir_size(struct net_device *net_dev)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
 
-	return ((efx_nic_rev(efx) < EFX_REV_FALCON_B0 ||
-		 efx->n_rx_channels == 1) ?
-		0 : ARRAY_SIZE(efx->rx_indir_table));
+	return (efx->n_rx_channels == 1) ? 0 : ARRAY_SIZE(efx->rx_indir_table);
 }
 
 static int efx_ethtool_get_rxfh(struct net_device *net_dev, u32 *indir, u8 *key,
diff --git a/drivers/net/ethernet/sfc/falcon/Kconfig b/drivers/net/ethernet/sfc/falcon/Kconfig
new file mode 100644
index 000000000000..6248e96253a2
--- /dev/null
+++ b/drivers/net/ethernet/sfc/falcon/Kconfig
@@ -0,0 +1,21 @@
+config SFC_FALCON
+	tristate "Solarflare SFC4000 support"
+	depends on PCI
+	select MDIO
+	select CRC32
+	select I2C
+	select I2C_ALGOBIT
+	---help---
+	  This driver supports 10-gigabit Ethernet cards based on
+	  the Solarflare SFC4000 controller.
+
+	  To compile this driver as a module, choose M here.  The module
+	  will be called sfc-falcon.
+config SFC_FALCON_MTD
+	bool "Solarflare SFC4000 MTD support"
+	depends on SFC_FALCON && MTD && !(SFC_FALCON=y && MTD=m)
+	default y
+	---help---
+	  This exposes the on-board flash and/or EEPROM as MTD devices
+	  (e.g. /dev/mtd1).  This is required to update the boot
+	  configuration under Linux.
diff --git a/drivers/net/ethernet/sfc/falcon/Makefile b/drivers/net/ethernet/sfc/falcon/Makefile
new file mode 100644
index 000000000000..aa1b45979ca4
--- /dev/null
+++ b/drivers/net/ethernet/sfc/falcon/Makefile
@@ -0,0 +1,6 @@
+sfc-falcon-y		+= efx.o nic.o farch.o falcon.o tx.o rx.o selftest.o \
+			   ethtool.o qt202x_phy.o mdio_10g.o tenxpress.o \
+			   txc43128_phy.o falcon_boards.o
+
+sfc-falcon-$(CONFIG_SFC_FALCON_MTD)	+= mtd.o
+obj-$(CONFIG_SFC_FALCON)		+= sfc-falcon.o
diff --git a/drivers/net/ethernet/sfc/falcon/bitfield.h b/drivers/net/ethernet/sfc/falcon/bitfield.h
new file mode 100644
index 000000000000..230fd77bd311
--- /dev/null
+++ b/drivers/net/ethernet/sfc/falcon/bitfield.h
@@ -0,0 +1,542 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2013 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EF4_BITFIELD_H
+#define EF4_BITFIELD_H
+
+/*
+ * Efx bitfield access
+ *
+ * Efx NICs make extensive use of bitfields up to 128 bits
+ * wide.  Since there is no native 128-bit datatype on most systems,
+ * and since 64-bit datatypes are inefficient on 32-bit systems and
+ * vice versa, we wrap accesses in a way that uses the most efficient
+ * datatype.
+ *
+ * The NICs are PCI devices and therefore little-endian.  Since most
+ * of the quantities that we deal with are DMAed to/from host memory,
+ * we define our datatypes (ef4_oword_t, ef4_qword_t and
+ * ef4_dword_t) to be little-endian.
+ */
+
+/* Lowest bit numbers and widths */
+#define EF4_DUMMY_FIELD_LBN 0
+#define EF4_DUMMY_FIELD_WIDTH 0
+#define EF4_WORD_0_LBN 0
+#define EF4_WORD_0_WIDTH 16
+#define EF4_WORD_1_LBN 16
+#define EF4_WORD_1_WIDTH 16
+#define EF4_DWORD_0_LBN 0
+#define EF4_DWORD_0_WIDTH 32
+#define EF4_DWORD_1_LBN 32
+#define EF4_DWORD_1_WIDTH 32
+#define EF4_DWORD_2_LBN 64
+#define EF4_DWORD_2_WIDTH 32
+#define EF4_DWORD_3_LBN 96
+#define EF4_DWORD_3_WIDTH 32
+#define EF4_QWORD_0_LBN 0
+#define EF4_QWORD_0_WIDTH 64
+
+/* Specified attribute (e.g. LBN) of the specified field */
+#define EF4_VAL(field, attribute) field ## _ ## attribute
+/* Low bit number of the specified field */
+#define EF4_LOW_BIT(field) EF4_VAL(field, LBN)
+/* Bit width of the specified field */
+#define EF4_WIDTH(field) EF4_VAL(field, WIDTH)
+/* High bit number of the specified field */
+#define EF4_HIGH_BIT(field) (EF4_LOW_BIT(field) + EF4_WIDTH(field) - 1)
+/* Mask equal in width to the specified field.
+ *
+ * For example, a field with width 5 would have a mask of 0x1f.
+ *
+ * The maximum width mask that can be generated is 64 bits.
+ */
+#define EF4_MASK64(width)			\
+	((width) == 64 ? ~((u64) 0) :		\
+	 (((((u64) 1) << (width))) - 1))
+
+/* Mask equal in width to the specified field.
+ *
+ * For example, a field with width 5 would have a mask of 0x1f.
+ *
+ * The maximum width mask that can be generated is 32 bits.  Use
+ * EF4_MASK64 for higher width fields.
+ */
+#define EF4_MASK32(width)			\
+	((width) == 32 ? ~((u32) 0) :		\
+	 (((((u32) 1) << (width))) - 1))
+
+/* A doubleword (i.e. 4 byte) datatype - little-endian in HW */
+typedef union ef4_dword {
+	__le32 u32[1];
+} ef4_dword_t;
+
+/* A quadword (i.e. 8 byte) datatype - little-endian in HW */
+typedef union ef4_qword {
+	__le64 u64[1];
+	__le32 u32[2];
+	ef4_dword_t dword[2];
+} ef4_qword_t;
+
+/* An octword (eight-word, i.e. 16 byte) datatype - little-endian in HW */
+typedef union ef4_oword {
+	__le64 u64[2];
+	ef4_qword_t qword[2];
+	__le32 u32[4];
+	ef4_dword_t dword[4];
+} ef4_oword_t;
+
+/* Format string and value expanders for printk */
+#define EF4_DWORD_FMT "%08x"
+#define EF4_QWORD_FMT "%08x:%08x"
+#define EF4_OWORD_FMT "%08x:%08x:%08x:%08x"
+#define EF4_DWORD_VAL(dword)				\
+	((unsigned int) le32_to_cpu((dword).u32[0]))
+#define EF4_QWORD_VAL(qword)				\
+	((unsigned int) le32_to_cpu((qword).u32[1])),	\
+	((unsigned int) le32_to_cpu((qword).u32[0]))
+#define EF4_OWORD_VAL(oword)				\
+	((unsigned int) le32_to_cpu((oword).u32[3])),	\
+	((unsigned int) le32_to_cpu((oword).u32[2])),	\
+	((unsigned int) le32_to_cpu((oword).u32[1])),	\
+	((unsigned int) le32_to_cpu((oword).u32[0]))
+
+/*
+ * Extract bit field portion [low,high) from the native-endian element
+ * which contains bits [min,max).
+ *
+ * For example, suppose "element" represents the high 32 bits of a
+ * 64-bit value, and we wish to extract the bits belonging to the bit
+ * field occupying bits 28-45 of this 64-bit value.
+ *
+ * Then EF4_EXTRACT ( element, 32, 63, 28, 45 ) would give
+ *
+ *   ( element ) << 4
+ *
+ * The result will contain the relevant bits filled in in the range
+ * [0,high-low), with garbage in bits [high-low+1,...).
+ */
+#define EF4_EXTRACT_NATIVE(native_element, min, max, low, high)		\
+	((low) > (max) || (high) < (min) ? 0 :				\
+	 (low) > (min) ?						\
+	 (native_element) >> ((low) - (min)) :				\
+	 (native_element) << ((min) - (low)))
+
+/*
+ * Extract bit field portion [low,high) from the 64-bit little-endian
+ * element which contains bits [min,max)
+ */
+#define EF4_EXTRACT64(element, min, max, low, high)			\
+	EF4_EXTRACT_NATIVE(le64_to_cpu(element), min, max, low, high)
+
+/*
+ * Extract bit field portion [low,high) from the 32-bit little-endian
+ * element which contains bits [min,max)
+ */
+#define EF4_EXTRACT32(element, min, max, low, high)			\
+	EF4_EXTRACT_NATIVE(le32_to_cpu(element), min, max, low, high)
+
+#define EF4_EXTRACT_OWORD64(oword, low, high)				\
+	((EF4_EXTRACT64((oword).u64[0], 0, 63, low, high) |		\
+	  EF4_EXTRACT64((oword).u64[1], 64, 127, low, high)) &		\
+	 EF4_MASK64((high) + 1 - (low)))
+
+#define EF4_EXTRACT_QWORD64(qword, low, high)				\
+	(EF4_EXTRACT64((qword).u64[0], 0, 63, low, high) &		\
+	 EF4_MASK64((high) + 1 - (low)))
+
+#define EF4_EXTRACT_OWORD32(oword, low, high)				\
+	((EF4_EXTRACT32((oword).u32[0], 0, 31, low, high) |		\
+	  EF4_EXTRACT32((oword).u32[1], 32, 63, low, high) |		\
+	  EF4_EXTRACT32((oword).u32[2], 64, 95, low, high) |		\
+	  EF4_EXTRACT32((oword).u32[3], 96, 127, low, high)) &		\
+	 EF4_MASK32((high) + 1 - (low)))
+
+#define EF4_EXTRACT_QWORD32(qword, low, high)				\
+	((EF4_EXTRACT32((qword).u32[0], 0, 31, low, high) |		\
+	  EF4_EXTRACT32((qword).u32[1], 32, 63, low, high)) &		\
+	 EF4_MASK32((high) + 1 - (low)))
+
+#define EF4_EXTRACT_DWORD(dword, low, high)			\
+	(EF4_EXTRACT32((dword).u32[0], 0, 31, low, high) &	\
+	 EF4_MASK32((high) + 1 - (low)))
+
+#define EF4_OWORD_FIELD64(oword, field)				\
+	EF4_EXTRACT_OWORD64(oword, EF4_LOW_BIT(field),		\
+			    EF4_HIGH_BIT(field))
+
+#define EF4_QWORD_FIELD64(qword, field)				\
+	EF4_EXTRACT_QWORD64(qword, EF4_LOW_BIT(field),		\
+			    EF4_HIGH_BIT(field))
+
+#define EF4_OWORD_FIELD32(oword, field)				\
+	EF4_EXTRACT_OWORD32(oword, EF4_LOW_BIT(field),		\
+			    EF4_HIGH_BIT(field))
+
+#define EF4_QWORD_FIELD32(qword, field)				\
+	EF4_EXTRACT_QWORD32(qword, EF4_LOW_BIT(field),		\
+			    EF4_HIGH_BIT(field))
+
+#define EF4_DWORD_FIELD(dword, field)				\
+	EF4_EXTRACT_DWORD(dword, EF4_LOW_BIT(field),		\
+			  EF4_HIGH_BIT(field))
+
+#define EF4_OWORD_IS_ZERO64(oword)					\
+	(((oword).u64[0] | (oword).u64[1]) == (__force __le64) 0)
+
+#define EF4_QWORD_IS_ZERO64(qword)					\
+	(((qword).u64[0]) == (__force __le64) 0)
+
+#define EF4_OWORD_IS_ZERO32(oword)					     \
+	(((oword).u32[0] | (oword).u32[1] | (oword).u32[2] | (oword).u32[3]) \
+	 == (__force __le32) 0)
+
+#define EF4_QWORD_IS_ZERO32(qword)					\
+	(((qword).u32[0] | (qword).u32[1]) == (__force __le32) 0)
+
+#define EF4_DWORD_IS_ZERO(dword)					\
+	(((dword).u32[0]) == (__force __le32) 0)
+
+#define EF4_OWORD_IS_ALL_ONES64(oword)					\
+	(((oword).u64[0] & (oword).u64[1]) == ~((__force __le64) 0))
+
+#define EF4_QWORD_IS_ALL_ONES64(qword)					\
+	((qword).u64[0] == ~((__force __le64) 0))
+
+#define EF4_OWORD_IS_ALL_ONES32(oword)					\
+	(((oword).u32[0] & (oword).u32[1] & (oword).u32[2] & (oword).u32[3]) \
+	 == ~((__force __le32) 0))
+
+#define EF4_QWORD_IS_ALL_ONES32(qword)					\
+	(((qword).u32[0] & (qword).u32[1]) == ~((__force __le32) 0))
+
+#define EF4_DWORD_IS_ALL_ONES(dword)					\
+	((dword).u32[0] == ~((__force __le32) 0))
+
+#if BITS_PER_LONG == 64
+#define EF4_OWORD_FIELD		EF4_OWORD_FIELD64
+#define EF4_QWORD_FIELD		EF4_QWORD_FIELD64
+#define EF4_OWORD_IS_ZERO	EF4_OWORD_IS_ZERO64
+#define EF4_QWORD_IS_ZERO	EF4_QWORD_IS_ZERO64
+#define EF4_OWORD_IS_ALL_ONES	EF4_OWORD_IS_ALL_ONES64
+#define EF4_QWORD_IS_ALL_ONES	EF4_QWORD_IS_ALL_ONES64
+#else
+#define EF4_OWORD_FIELD		EF4_OWORD_FIELD32
+#define EF4_QWORD_FIELD		EF4_QWORD_FIELD32
+#define EF4_OWORD_IS_ZERO	EF4_OWORD_IS_ZERO32
+#define EF4_QWORD_IS_ZERO	EF4_QWORD_IS_ZERO32
+#define EF4_OWORD_IS_ALL_ONES	EF4_OWORD_IS_ALL_ONES32
+#define EF4_QWORD_IS_ALL_ONES	EF4_QWORD_IS_ALL_ONES32
+#endif
+
+/*
+ * Construct bit field portion
+ *
+ * Creates the portion of the bit field [low,high) that lies within
+ * the range [min,max).
+ */
+#define EF4_INSERT_NATIVE64(min, max, low, high, value)		\
+	(((low > max) || (high < min)) ? 0 :			\
+	 ((low > min) ?						\
+	  (((u64) (value)) << (low - min)) :		\
+	  (((u64) (value)) >> (min - low))))
+
+#define EF4_INSERT_NATIVE32(min, max, low, high, value)		\
+	(((low > max) || (high < min)) ? 0 :			\
+	 ((low > min) ?						\
+	  (((u32) (value)) << (low - min)) :		\
+	  (((u32) (value)) >> (min - low))))
+
+#define EF4_INSERT_NATIVE(min, max, low, high, value)		\
+	((((max - min) >= 32) || ((high - low) >= 32)) ?	\
+	 EF4_INSERT_NATIVE64(min, max, low, high, value) :	\
+	 EF4_INSERT_NATIVE32(min, max, low, high, value))
+
+/*
+ * Construct bit field portion
+ *
+ * Creates the portion of the named bit field that lies within the
+ * range [min,max).
+ */
+#define EF4_INSERT_FIELD_NATIVE(min, max, field, value)		\
+	EF4_INSERT_NATIVE(min, max, EF4_LOW_BIT(field),		\
+			  EF4_HIGH_BIT(field), value)
+
+/*
+ * Construct bit field
+ *
+ * Creates the portion of the named bit fields that lie within the
+ * range [min,max).
+ */
+#define EF4_INSERT_FIELDS_NATIVE(min, max,				\
+				 field1, value1,			\
+				 field2, value2,			\
+				 field3, value3,			\
+				 field4, value4,			\
+				 field5, value5,			\
+				 field6, value6,			\
+				 field7, value7,			\
+				 field8, value8,			\
+				 field9, value9,			\
+				 field10, value10)			\
+	(EF4_INSERT_FIELD_NATIVE((min), (max), field1, (value1)) |	\
+	 EF4_INSERT_FIELD_NATIVE((min), (max), field2, (value2)) |	\
+	 EF4_INSERT_FIELD_NATIVE((min), (max), field3, (value3)) |	\
+	 EF4_INSERT_FIELD_NATIVE((min), (max), field4, (value4)) |	\
+	 EF4_INSERT_FIELD_NATIVE((min), (max), field5, (value5)) |	\
+	 EF4_INSERT_FIELD_NATIVE((min), (max), field6, (value6)) |	\
+	 EF4_INSERT_FIELD_NATIVE((min), (max), field7, (value7)) |	\
+	 EF4_INSERT_FIELD_NATIVE((min), (max), field8, (value8)) |	\
+	 EF4_INSERT_FIELD_NATIVE((min), (max), field9, (value9)) |	\
+	 EF4_INSERT_FIELD_NATIVE((min), (max), field10, (value10)))
+
+#define EF4_INSERT_FIELDS64(...)				\
+	cpu_to_le64(EF4_INSERT_FIELDS_NATIVE(__VA_ARGS__))
+
+#define EF4_INSERT_FIELDS32(...)				\
+	cpu_to_le32(EF4_INSERT_FIELDS_NATIVE(__VA_ARGS__))
+
+#define EF4_POPULATE_OWORD64(oword, ...) do {				\
+	(oword).u64[0] = EF4_INSERT_FIELDS64(0, 63, __VA_ARGS__);	\
+	(oword).u64[1] = EF4_INSERT_FIELDS64(64, 127, __VA_ARGS__);	\
+	} while (0)
+
+#define EF4_POPULATE_QWORD64(qword, ...) do {				\
+	(qword).u64[0] = EF4_INSERT_FIELDS64(0, 63, __VA_ARGS__);	\
+	} while (0)
+
+#define EF4_POPULATE_OWORD32(oword, ...) do {				\
+	(oword).u32[0] = EF4_INSERT_FIELDS32(0, 31, __VA_ARGS__);	\
+	(oword).u32[1] = EF4_INSERT_FIELDS32(32, 63, __VA_ARGS__);	\
+	(oword).u32[2] = EF4_INSERT_FIELDS32(64, 95, __VA_ARGS__);	\
+	(oword).u32[3] = EF4_INSERT_FIELDS32(96, 127, __VA_ARGS__);	\
+	} while (0)
+
+#define EF4_POPULATE_QWORD32(qword, ...) do {				\
+	(qword).u32[0] = EF4_INSERT_FIELDS32(0, 31, __VA_ARGS__);	\
+	(qword).u32[1] = EF4_INSERT_FIELDS32(32, 63, __VA_ARGS__);	\
+	} while (0)
+
+#define EF4_POPULATE_DWORD(dword, ...) do {				\
+	(dword).u32[0] = EF4_INSERT_FIELDS32(0, 31, __VA_ARGS__);	\
+	} while (0)
+
+#if BITS_PER_LONG == 64
+#define EF4_POPULATE_OWORD EF4_POPULATE_OWORD64
+#define EF4_POPULATE_QWORD EF4_POPULATE_QWORD64
+#else
+#define EF4_POPULATE_OWORD EF4_POPULATE_OWORD32
+#define EF4_POPULATE_QWORD EF4_POPULATE_QWORD32
+#endif
+
+/* Populate an octword field with various numbers of arguments */
+#define EF4_POPULATE_OWORD_10 EF4_POPULATE_OWORD
+#define EF4_POPULATE_OWORD_9(oword, ...) \
+	EF4_POPULATE_OWORD_10(oword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_OWORD_8(oword, ...) \
+	EF4_POPULATE_OWORD_9(oword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_OWORD_7(oword, ...) \
+	EF4_POPULATE_OWORD_8(oword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_OWORD_6(oword, ...) \
+	EF4_POPULATE_OWORD_7(oword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_OWORD_5(oword, ...) \
+	EF4_POPULATE_OWORD_6(oword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_OWORD_4(oword, ...) \
+	EF4_POPULATE_OWORD_5(oword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_OWORD_3(oword, ...) \
+	EF4_POPULATE_OWORD_4(oword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_OWORD_2(oword, ...) \
+	EF4_POPULATE_OWORD_3(oword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_OWORD_1(oword, ...) \
+	EF4_POPULATE_OWORD_2(oword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_ZERO_OWORD(oword) \
+	EF4_POPULATE_OWORD_1(oword, EF4_DUMMY_FIELD, 0)
+#define EF4_SET_OWORD(oword) \
+	EF4_POPULATE_OWORD_4(oword, \
+			     EF4_DWORD_0, 0xffffffff, \
+			     EF4_DWORD_1, 0xffffffff, \
+			     EF4_DWORD_2, 0xffffffff, \
+			     EF4_DWORD_3, 0xffffffff)
+
+/* Populate a quadword field with various numbers of arguments */
+#define EF4_POPULATE_QWORD_10 EF4_POPULATE_QWORD
+#define EF4_POPULATE_QWORD_9(qword, ...) \
+	EF4_POPULATE_QWORD_10(qword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_QWORD_8(qword, ...) \
+	EF4_POPULATE_QWORD_9(qword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_QWORD_7(qword, ...) \
+	EF4_POPULATE_QWORD_8(qword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_QWORD_6(qword, ...) \
+	EF4_POPULATE_QWORD_7(qword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_QWORD_5(qword, ...) \
+	EF4_POPULATE_QWORD_6(qword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_QWORD_4(qword, ...) \
+	EF4_POPULATE_QWORD_5(qword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_QWORD_3(qword, ...) \
+	EF4_POPULATE_QWORD_4(qword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_QWORD_2(qword, ...) \
+	EF4_POPULATE_QWORD_3(qword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_QWORD_1(qword, ...) \
+	EF4_POPULATE_QWORD_2(qword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_ZERO_QWORD(qword) \
+	EF4_POPULATE_QWORD_1(qword, EF4_DUMMY_FIELD, 0)
+#define EF4_SET_QWORD(qword) \
+	EF4_POPULATE_QWORD_2(qword, \
+			     EF4_DWORD_0, 0xffffffff, \
+			     EF4_DWORD_1, 0xffffffff)
+
+/* Populate a dword field with various numbers of arguments */
+#define EF4_POPULATE_DWORD_10 EF4_POPULATE_DWORD
+#define EF4_POPULATE_DWORD_9(dword, ...) \
+	EF4_POPULATE_DWORD_10(dword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_DWORD_8(dword, ...) \
+	EF4_POPULATE_DWORD_9(dword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_DWORD_7(dword, ...) \
+	EF4_POPULATE_DWORD_8(dword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_DWORD_6(dword, ...) \
+	EF4_POPULATE_DWORD_7(dword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_DWORD_5(dword, ...) \
+	EF4_POPULATE_DWORD_6(dword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_DWORD_4(dword, ...) \
+	EF4_POPULATE_DWORD_5(dword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_DWORD_3(dword, ...) \
+	EF4_POPULATE_DWORD_4(dword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_DWORD_2(dword, ...) \
+	EF4_POPULATE_DWORD_3(dword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_POPULATE_DWORD_1(dword, ...) \
+	EF4_POPULATE_DWORD_2(dword, EF4_DUMMY_FIELD, 0, __VA_ARGS__)
+#define EF4_ZERO_DWORD(dword) \
+	EF4_POPULATE_DWORD_1(dword, EF4_DUMMY_FIELD, 0)
+#define EF4_SET_DWORD(dword) \
+	EF4_POPULATE_DWORD_1(dword, EF4_DWORD_0, 0xffffffff)
+
+/*
+ * Modify a named field within an already-populated structure.  Used
+ * for read-modify-write operations.
+ *
+ */
+#define EF4_INVERT_OWORD(oword) do {		\
+	(oword).u64[0] = ~((oword).u64[0]);	\
+	(oword).u64[1] = ~((oword).u64[1]);	\
+	} while (0)
+
+#define EF4_AND_OWORD(oword, from, mask)			\
+	do {							\
+		(oword).u64[0] = (from).u64[0] & (mask).u64[0];	\
+		(oword).u64[1] = (from).u64[1] & (mask).u64[1];	\
+	} while (0)
+
+#define EF4_OR_OWORD(oword, from, mask)				\
+	do {							\
+		(oword).u64[0] = (from).u64[0] | (mask).u64[0];	\
+		(oword).u64[1] = (from).u64[1] | (mask).u64[1];	\
+	} while (0)
+
+#define EF4_INSERT64(min, max, low, high, value)			\
+	cpu_to_le64(EF4_INSERT_NATIVE(min, max, low, high, value))
+
+#define EF4_INSERT32(min, max, low, high, value)			\
+	cpu_to_le32(EF4_INSERT_NATIVE(min, max, low, high, value))
+
+#define EF4_INPLACE_MASK64(min, max, low, high)				\
+	EF4_INSERT64(min, max, low, high, EF4_MASK64((high) + 1 - (low)))
+
+#define EF4_INPLACE_MASK32(min, max, low, high)				\
+	EF4_INSERT32(min, max, low, high, EF4_MASK32((high) + 1 - (low)))
+
+#define EF4_SET_OWORD64(oword, low, high, value) do {			\
+	(oword).u64[0] = (((oword).u64[0]				\
+			   & ~EF4_INPLACE_MASK64(0,  63, low, high))	\
+			  | EF4_INSERT64(0,  63, low, high, value));	\
+	(oword).u64[1] = (((oword).u64[1]				\
+			   & ~EF4_INPLACE_MASK64(64, 127, low, high))	\
+			  | EF4_INSERT64(64, 127, low, high, value));	\
+	} while (0)
+
+#define EF4_SET_QWORD64(qword, low, high, value) do {			\
+	(qword).u64[0] = (((qword).u64[0]				\
+			   & ~EF4_INPLACE_MASK64(0, 63, low, high))	\
+			  | EF4_INSERT64(0, 63, low, high, value));	\
+	} while (0)
+
+#define EF4_SET_OWORD32(oword, low, high, value) do {			\
+	(oword).u32[0] = (((oword).u32[0]				\
+			   & ~EF4_INPLACE_MASK32(0, 31, low, high))	\
+			  | EF4_INSERT32(0, 31, low, high, value));	\
+	(oword).u32[1] = (((oword).u32[1]				\
+			   & ~EF4_INPLACE_MASK32(32, 63, low, high))	\
+			  | EF4_INSERT32(32, 63, low, high, value));	\
+	(oword).u32[2] = (((oword).u32[2]				\
+			   & ~EF4_INPLACE_MASK32(64, 95, low, high))	\
+			  | EF4_INSERT32(64, 95, low, high, value));	\
+	(oword).u32[3] = (((oword).u32[3]				\
+			   & ~EF4_INPLACE_MASK32(96, 127, low, high))	\
+			  | EF4_INSERT32(96, 127, low, high, value));	\
+	} while (0)
+
+#define EF4_SET_QWORD32(qword, low, high, value) do {			\
+	(qword).u32[0] = (((qword).u32[0]				\
+			   & ~EF4_INPLACE_MASK32(0, 31, low, high))	\
+			  | EF4_INSERT32(0, 31, low, high, value));	\
+	(qword).u32[1] = (((qword).u32[1]				\
+			   & ~EF4_INPLACE_MASK32(32, 63, low, high))	\
+			  | EF4_INSERT32(32, 63, low, high, value));	\
+	} while (0)
+
+#define EF4_SET_DWORD32(dword, low, high, value) do {			\
+	(dword).u32[0] = (((dword).u32[0]				\
+			   & ~EF4_INPLACE_MASK32(0, 31, low, high))	\
+			  | EF4_INSERT32(0, 31, low, high, value));	\
+	} while (0)
+
+#define EF4_SET_OWORD_FIELD64(oword, field, value)			\
+	EF4_SET_OWORD64(oword, EF4_LOW_BIT(field),			\
+			 EF4_HIGH_BIT(field), value)
+
+#define EF4_SET_QWORD_FIELD64(qword, field, value)			\
+	EF4_SET_QWORD64(qword, EF4_LOW_BIT(field),			\
+			 EF4_HIGH_BIT(field), value)
+
+#define EF4_SET_OWORD_FIELD32(oword, field, value)			\
+	EF4_SET_OWORD32(oword, EF4_LOW_BIT(field),			\
+			 EF4_HIGH_BIT(field), value)
+
+#define EF4_SET_QWORD_FIELD32(qword, field, value)			\
+	EF4_SET_QWORD32(qword, EF4_LOW_BIT(field),			\
+			 EF4_HIGH_BIT(field), value)
+
+#define EF4_SET_DWORD_FIELD(dword, field, value)			\
+	EF4_SET_DWORD32(dword, EF4_LOW_BIT(field),			\
+			 EF4_HIGH_BIT(field), value)
+
+
+
+#if BITS_PER_LONG == 64
+#define EF4_SET_OWORD_FIELD EF4_SET_OWORD_FIELD64
+#define EF4_SET_QWORD_FIELD EF4_SET_QWORD_FIELD64
+#else
+#define EF4_SET_OWORD_FIELD EF4_SET_OWORD_FIELD32
+#define EF4_SET_QWORD_FIELD EF4_SET_QWORD_FIELD32
+#endif
+
+/* Used to avoid compiler warnings about shift range exceeding width
+ * of the data types when dma_addr_t is only 32 bits wide.
+ */
+#define DMA_ADDR_T_WIDTH	(8 * sizeof(dma_addr_t))
+#define EF4_DMA_TYPE_WIDTH(width) \
+	(((width) < DMA_ADDR_T_WIDTH) ? (width) : DMA_ADDR_T_WIDTH)
+
+
+/* Static initialiser */
+#define EF4_OWORD32(a, b, c, d)				\
+	{ .u32 = { cpu_to_le32(a), cpu_to_le32(b),	\
+		   cpu_to_le32(c), cpu_to_le32(d) } }
+
+#endif /* EF4_BITFIELD_H */
diff --git a/drivers/net/ethernet/sfc/falcon/efx.c b/drivers/net/ethernet/sfc/falcon/efx.c
new file mode 100644
index 000000000000..5c5cb3c4c12e
--- /dev/null
+++ b/drivers/net/ethernet/sfc/falcon/efx.c
@@ -0,0 +1,3350 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2005-2013 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/delay.h>
+#include <linux/notifier.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/in.h>
+#include <linux/ethtool.h>
+#include <linux/topology.h>
+#include <linux/gfp.h>
+#include <linux/aer.h>
+#include <linux/interrupt.h>
+#include "net_driver.h"
+#include "efx.h"
+#include "nic.h"
+#include "selftest.h"
+
+#include "workarounds.h"
+
+/**************************************************************************
+ *
+ * Type name strings
+ *
+ **************************************************************************
+ */
+
+/* Loopback mode names (see LOOPBACK_MODE()) */
+const unsigned int ef4_loopback_mode_max = LOOPBACK_MAX;
+const char *const ef4_loopback_mode_names[] = {
+	[LOOPBACK_NONE]		= "NONE",
+	[LOOPBACK_DATA]		= "DATAPATH",
+	[LOOPBACK_GMAC]		= "GMAC",
+	[LOOPBACK_XGMII]	= "XGMII",
+	[LOOPBACK_XGXS]		= "XGXS",
+	[LOOPBACK_XAUI]		= "XAUI",
+	[LOOPBACK_GMII]		= "GMII",
+	[LOOPBACK_SGMII]	= "SGMII",
+	[LOOPBACK_XGBR]		= "XGBR",
+	[LOOPBACK_XFI]		= "XFI",
+	[LOOPBACK_XAUI_FAR]	= "XAUI_FAR",
+	[LOOPBACK_GMII_FAR]	= "GMII_FAR",
+	[LOOPBACK_SGMII_FAR]	= "SGMII_FAR",
+	[LOOPBACK_XFI_FAR]	= "XFI_FAR",
+	[LOOPBACK_GPHY]		= "GPHY",
+	[LOOPBACK_PHYXS]	= "PHYXS",
+	[LOOPBACK_PCS]		= "PCS",
+	[LOOPBACK_PMAPMD]	= "PMA/PMD",
+	[LOOPBACK_XPORT]	= "XPORT",
+	[LOOPBACK_XGMII_WS]	= "XGMII_WS",
+	[LOOPBACK_XAUI_WS]	= "XAUI_WS",
+	[LOOPBACK_XAUI_WS_FAR]  = "XAUI_WS_FAR",
+	[LOOPBACK_XAUI_WS_NEAR] = "XAUI_WS_NEAR",
+	[LOOPBACK_GMII_WS]	= "GMII_WS",
+	[LOOPBACK_XFI_WS]	= "XFI_WS",
+	[LOOPBACK_XFI_WS_FAR]	= "XFI_WS_FAR",
+	[LOOPBACK_PHYXS_WS]	= "PHYXS_WS",
+};
+
+const unsigned int ef4_reset_type_max = RESET_TYPE_MAX;
+const char *const ef4_reset_type_names[] = {
+	[RESET_TYPE_INVISIBLE]          = "INVISIBLE",
+	[RESET_TYPE_ALL]                = "ALL",
+	[RESET_TYPE_RECOVER_OR_ALL]     = "RECOVER_OR_ALL",
+	[RESET_TYPE_WORLD]              = "WORLD",
+	[RESET_TYPE_RECOVER_OR_DISABLE] = "RECOVER_OR_DISABLE",
+	[RESET_TYPE_DATAPATH]           = "DATAPATH",
+	[RESET_TYPE_DISABLE]            = "DISABLE",
+	[RESET_TYPE_TX_WATCHDOG]        = "TX_WATCHDOG",
+	[RESET_TYPE_INT_ERROR]          = "INT_ERROR",
+	[RESET_TYPE_RX_RECOVERY]        = "RX_RECOVERY",
+	[RESET_TYPE_DMA_ERROR]          = "DMA_ERROR",
+	[RESET_TYPE_TX_SKIP]            = "TX_SKIP",
+};
+
+/* Reset workqueue. If any NIC has a hardware failure then a reset will be
+ * queued onto this work queue. This is not a per-nic work queue, because
+ * ef4_reset_work() acquires the rtnl lock, so resets are naturally serialised.
+ */
+static struct workqueue_struct *reset_workqueue;
+
+/* How often and how many times to poll for a reset while waiting for a
+ * BIST that another function started to complete.
+ */
+#define BIST_WAIT_DELAY_MS	100
+#define BIST_WAIT_DELAY_COUNT	100
+
+/**************************************************************************
+ *
+ * Configurable values
+ *
+ *************************************************************************/
+
+/*
+ * Use separate channels for TX and RX events
+ *
+ * Set this to 1 to use separate channels for TX and RX. It allows us
+ * to control interrupt affinity separately for TX and RX.
+ *
+ * This is only used in MSI-X interrupt mode
+ */
+bool ef4_separate_tx_channels;
+module_param(ef4_separate_tx_channels, bool, 0444);
+MODULE_PARM_DESC(ef4_separate_tx_channels,
+		 "Use separate channels for TX and RX");
+
+/* This is the weight assigned to each of the (per-channel) virtual
+ * NAPI devices.
+ */
+static int napi_weight = 64;
+
+/* This is the time (in jiffies) between invocations of the hardware
+ * monitor.
+ * On Falcon-based NICs, this will:
+ * - Check the on-board hardware monitor;
+ * - Poll the link state and reconfigure the hardware as necessary.
+ * On Siena-based NICs for power systems with EEH support, this will give EEH a
+ * chance to start.
+ */
+static unsigned int ef4_monitor_interval = 1 * HZ;
+
+/* Initial interrupt moderation settings.  They can be modified after
+ * module load with ethtool.
+ *
+ * The default for RX should strike a balance between increasing the
+ * round-trip latency and reducing overhead.
+ */
+static unsigned int rx_irq_mod_usec = 60;
+
+/* Initial interrupt moderation settings.  They can be modified after
+ * module load with ethtool.
+ *
+ * This default is chosen to ensure that a 10G link does not go idle
+ * while a TX queue is stopped after it has become full.  A queue is
+ * restarted when it drops below half full.  The time this takes (assuming
+ * worst case 3 descriptors per packet and 1024 descriptors) is
+ *   512 / 3 * 1.2 = 205 usec.
+ */
+static unsigned int tx_irq_mod_usec = 150;
+
+/* This is the first interrupt mode to try out of:
+ * 0 => MSI-X
+ * 1 => MSI
+ * 2 => legacy
+ */
+static unsigned int interrupt_mode;
+
+/* This is the requested number of CPUs to use for Receive-Side Scaling (RSS),
+ * i.e. the number of CPUs among which we may distribute simultaneous
+ * interrupt handling.
+ *
+ * Cards without MSI-X will only target one CPU via legacy or MSI interrupt.
+ * The default (0) means to assign an interrupt to each core.
+ */
+static unsigned int rss_cpus;
+module_param(rss_cpus, uint, 0444);
+MODULE_PARM_DESC(rss_cpus, "Number of CPUs to use for Receive-Side Scaling");
+
+static bool phy_flash_cfg;
+module_param(phy_flash_cfg, bool, 0644);
+MODULE_PARM_DESC(phy_flash_cfg, "Set PHYs into reflash mode initially");
+
+static unsigned irq_adapt_low_thresh = 8000;
+module_param(irq_adapt_low_thresh, uint, 0644);
+MODULE_PARM_DESC(irq_adapt_low_thresh,
+		 "Threshold score for reducing IRQ moderation");
+
+static unsigned irq_adapt_high_thresh = 16000;
+module_param(irq_adapt_high_thresh, uint, 0644);
+MODULE_PARM_DESC(irq_adapt_high_thresh,
+		 "Threshold score for increasing IRQ moderation");
+
+static unsigned debug = (NETIF_MSG_DRV | NETIF_MSG_PROBE |
+			 NETIF_MSG_LINK | NETIF_MSG_IFDOWN |
+			 NETIF_MSG_IFUP | NETIF_MSG_RX_ERR |
+			 NETIF_MSG_TX_ERR | NETIF_MSG_HW);
+module_param(debug, uint, 0);
+MODULE_PARM_DESC(debug, "Bitmapped debugging message enable value");
+
+/**************************************************************************
+ *
+ * Utility functions and prototypes
+ *
+ *************************************************************************/
+
+static int ef4_soft_enable_interrupts(struct ef4_nic *efx);
+static void ef4_soft_disable_interrupts(struct ef4_nic *efx);
+static void ef4_remove_channel(struct ef4_channel *channel);
+static void ef4_remove_channels(struct ef4_nic *efx);
+static const struct ef4_channel_type ef4_default_channel_type;
+static void ef4_remove_port(struct ef4_nic *efx);
+static void ef4_init_napi_channel(struct ef4_channel *channel);
+static void ef4_fini_napi(struct ef4_nic *efx);
+static void ef4_fini_napi_channel(struct ef4_channel *channel);
+static void ef4_fini_struct(struct ef4_nic *efx);
+static void ef4_start_all(struct ef4_nic *efx);
+static void ef4_stop_all(struct ef4_nic *efx);
+
+#define EF4_ASSERT_RESET_SERIALISED(efx)		\
+	do {						\
+		if ((efx->state == STATE_READY) ||	\
+		    (efx->state == STATE_RECOVERY) ||	\
+		    (efx->state == STATE_DISABLED))	\
+			ASSERT_RTNL();			\
+	} while (0)
+
+static int ef4_check_disabled(struct ef4_nic *efx)
+{
+	if (efx->state == STATE_DISABLED || efx->state == STATE_RECOVERY) {
+		netif_err(efx, drv, efx->net_dev,
+			  "device is disabled due to earlier errors\n");
+		return -EIO;
+	}
+	return 0;
+}
+
+/**************************************************************************
+ *
+ * Event queue processing
+ *
+ *************************************************************************/
+
+/* Process channel's event queue
+ *
+ * This function is responsible for processing the event queue of a
+ * single channel.  The caller must guarantee that this function will
+ * never be concurrently called more than once on the same channel,
+ * though different channels may be being processed concurrently.
+ */
+static int ef4_process_channel(struct ef4_channel *channel, int budget)
+{
+	struct ef4_tx_queue *tx_queue;
+	int spent;
+
+	if (unlikely(!channel->enabled))
+		return 0;
+
+	ef4_for_each_channel_tx_queue(tx_queue, channel) {
+		tx_queue->pkts_compl = 0;
+		tx_queue->bytes_compl = 0;
+	}
+
+	spent = ef4_nic_process_eventq(channel, budget);
+	if (spent && ef4_channel_has_rx_queue(channel)) {
+		struct ef4_rx_queue *rx_queue =
+			ef4_channel_get_rx_queue(channel);
+
+		ef4_rx_flush_packet(channel);
+		ef4_fast_push_rx_descriptors(rx_queue, true);
+	}
+
+	/* Update BQL */
+	ef4_for_each_channel_tx_queue(tx_queue, channel) {
+		if (tx_queue->bytes_compl) {
+			netdev_tx_completed_queue(tx_queue->core_txq,
+				tx_queue->pkts_compl, tx_queue->bytes_compl);
+		}
+	}
+
+	return spent;
+}
+
+/* NAPI poll handler
+ *
+ * NAPI guarantees serialisation of polls of the same device, which
+ * provides the guarantee required by ef4_process_channel().
+ */
+static void ef4_update_irq_mod(struct ef4_nic *efx, struct ef4_channel *channel)
+{
+	int step = efx->irq_mod_step_us;
+
+	if (channel->irq_mod_score < irq_adapt_low_thresh) {
+		if (channel->irq_moderation_us > step) {
+			channel->irq_moderation_us -= step;
+			efx->type->push_irq_moderation(channel);
+		}
+	} else if (channel->irq_mod_score > irq_adapt_high_thresh) {
+		if (channel->irq_moderation_us <
+		    efx->irq_rx_moderation_us) {
+			channel->irq_moderation_us += step;
+			efx->type->push_irq_moderation(channel);
+		}
+	}
+
+	channel->irq_count = 0;
+	channel->irq_mod_score = 0;
+}
+
+static int ef4_poll(struct napi_struct *napi, int budget)
+{
+	struct ef4_channel *channel =
+		container_of(napi, struct ef4_channel, napi_str);
+	struct ef4_nic *efx = channel->efx;
+	int spent;
+
+	if (!ef4_channel_lock_napi(channel))
+		return budget;
+
+	netif_vdbg(efx, intr, efx->net_dev,
+		   "channel %d NAPI poll executing on CPU %d\n",
+		   channel->channel, raw_smp_processor_id());
+
+	spent = ef4_process_channel(channel, budget);
+
+	if (spent < budget) {
+		if (ef4_channel_has_rx_queue(channel) &&
+		    efx->irq_rx_adaptive &&
+		    unlikely(++channel->irq_count == 1000)) {
+			ef4_update_irq_mod(efx, channel);
+		}
+
+		ef4_filter_rfs_expire(channel);
+
+		/* There is no race here; although napi_disable() will
+		 * only wait for napi_complete(), this isn't a problem
+		 * since ef4_nic_eventq_read_ack() will have no effect if
+		 * interrupts have already been disabled.
+		 */
+		napi_complete(napi);
+		ef4_nic_eventq_read_ack(channel);
+	}
+
+	ef4_channel_unlock_napi(channel);
+	return spent;
+}
+
+/* Create event queue
+ * Event queue memory allocations are done only once.  If the channel
+ * is reset, the memory buffer will be reused; this guards against
+ * errors during channel reset and also simplifies interrupt handling.
+ */
+static int ef4_probe_eventq(struct ef4_channel *channel)
+{
+	struct ef4_nic *efx = channel->efx;
+	unsigned long entries;
+
+	netif_dbg(efx, probe, efx->net_dev,
+		  "chan %d create event queue\n", channel->channel);
+
+	/* Build an event queue with room for one event per tx and rx buffer,
+	 * plus some extra for link state events and MCDI completions. */
+	entries = roundup_pow_of_two(efx->rxq_entries + efx->txq_entries + 128);
+	EF4_BUG_ON_PARANOID(entries > EF4_MAX_EVQ_SIZE);
+	channel->eventq_mask = max(entries, EF4_MIN_EVQ_SIZE) - 1;
+
+	return ef4_nic_probe_eventq(channel);
+}
+
+/* Prepare channel's event queue */
+static int ef4_init_eventq(struct ef4_channel *channel)
+{
+	struct ef4_nic *efx = channel->efx;
+	int rc;
+
+	EF4_WARN_ON_PARANOID(channel->eventq_init);
+
+	netif_dbg(efx, drv, efx->net_dev,
+		  "chan %d init event queue\n", channel->channel);
+
+	rc = ef4_nic_init_eventq(channel);
+	if (rc == 0) {
+		efx->type->push_irq_moderation(channel);
+		channel->eventq_read_ptr = 0;
+		channel->eventq_init = true;
+	}
+	return rc;
+}
+
+/* Enable event queue processing and NAPI */
+void ef4_start_eventq(struct ef4_channel *channel)
+{
+	netif_dbg(channel->efx, ifup, channel->efx->net_dev,
+		  "chan %d start event queue\n", channel->channel);
+
+	/* Make sure the NAPI handler sees the enabled flag set */
+	channel->enabled = true;
+	smp_wmb();
+
+	ef4_channel_enable(channel);
+	napi_enable(&channel->napi_str);
+	ef4_nic_eventq_read_ack(channel);
+}
+
+/* Disable event queue processing and NAPI */
+void ef4_stop_eventq(struct ef4_channel *channel)
+{
+	if (!channel->enabled)
+		return;
+
+	napi_disable(&channel->napi_str);
+	while (!ef4_channel_disable(channel))
+		usleep_range(1000, 20000);
+	channel->enabled = false;
+}
+
+static void ef4_fini_eventq(struct ef4_channel *channel)
+{
+	if (!channel->eventq_init)
+		return;
+
+	netif_dbg(channel->efx, drv, channel->efx->net_dev,
+		  "chan %d fini event queue\n", channel->channel);
+
+	ef4_nic_fini_eventq(channel);
+	channel->eventq_init = false;
+}
+
+static void ef4_remove_eventq(struct ef4_channel *channel)
+{
+	netif_dbg(channel->efx, drv, channel->efx->net_dev,
+		  "chan %d remove event queue\n", channel->channel);
+
+	ef4_nic_remove_eventq(channel);
+}
+
+/**************************************************************************
+ *
+ * Channel handling
+ *
+ *************************************************************************/
+
+/* Allocate and initialise a channel structure. */
+static struct ef4_channel *
+ef4_alloc_channel(struct ef4_nic *efx, int i, struct ef4_channel *old_channel)
+{
+	struct ef4_channel *channel;
+	struct ef4_rx_queue *rx_queue;
+	struct ef4_tx_queue *tx_queue;
+	int j;
+
+	channel = kzalloc(sizeof(*channel), GFP_KERNEL);
+	if (!channel)
+		return NULL;
+
+	channel->efx = efx;
+	channel->channel = i;
+	channel->type = &ef4_default_channel_type;
+
+	for (j = 0; j < EF4_TXQ_TYPES; j++) {
+		tx_queue = &channel->tx_queue[j];
+		tx_queue->efx = efx;
+		tx_queue->queue = i * EF4_TXQ_TYPES + j;
+		tx_queue->channel = channel;
+	}
+
+	rx_queue = &channel->rx_queue;
+	rx_queue->efx = efx;
+	setup_timer(&rx_queue->slow_fill, ef4_rx_slow_fill,
+		    (unsigned long)rx_queue);
+
+	return channel;
+}
+
+/* Allocate and initialise a channel structure, copying parameters
+ * (but not resources) from an old channel structure.
+ */
+static struct ef4_channel *
+ef4_copy_channel(const struct ef4_channel *old_channel)
+{
+	struct ef4_channel *channel;
+	struct ef4_rx_queue *rx_queue;
+	struct ef4_tx_queue *tx_queue;
+	int j;
+
+	channel = kmalloc(sizeof(*channel), GFP_KERNEL);
+	if (!channel)
+		return NULL;
+
+	*channel = *old_channel;
+
+	channel->napi_dev = NULL;
+	INIT_HLIST_NODE(&channel->napi_str.napi_hash_node);
+	channel->napi_str.napi_id = 0;
+	channel->napi_str.state = 0;
+	memset(&channel->eventq, 0, sizeof(channel->eventq));
+
+	for (j = 0; j < EF4_TXQ_TYPES; j++) {
+		tx_queue = &channel->tx_queue[j];
+		if (tx_queue->channel)
+			tx_queue->channel = channel;
+		tx_queue->buffer = NULL;
+		memset(&tx_queue->txd, 0, sizeof(tx_queue->txd));
+	}
+
+	rx_queue = &channel->rx_queue;
+	rx_queue->buffer = NULL;
+	memset(&rx_queue->rxd, 0, sizeof(rx_queue->rxd));
+	setup_timer(&rx_queue->slow_fill, ef4_rx_slow_fill,
+		    (unsigned long)rx_queue);
+
+	return channel;
+}
+
+static int ef4_probe_channel(struct ef4_channel *channel)
+{
+	struct ef4_tx_queue *tx_queue;
+	struct ef4_rx_queue *rx_queue;
+	int rc;
+
+	netif_dbg(channel->efx, probe, channel->efx->net_dev,
+		  "creating channel %d\n", channel->channel);
+
+	rc = channel->type->pre_probe(channel);
+	if (rc)
+		goto fail;
+
+	rc = ef4_probe_eventq(channel);
+	if (rc)
+		goto fail;
+
+	ef4_for_each_channel_tx_queue(tx_queue, channel) {
+		rc = ef4_probe_tx_queue(tx_queue);
+		if (rc)
+			goto fail;
+	}
+
+	ef4_for_each_channel_rx_queue(rx_queue, channel) {
+		rc = ef4_probe_rx_queue(rx_queue);
+		if (rc)
+			goto fail;
+	}
+
+	return 0;
+
+fail:
+	ef4_remove_channel(channel);
+	return rc;
+}
+
+static void
+ef4_get_channel_name(struct ef4_channel *channel, char *buf, size_t len)
+{
+	struct ef4_nic *efx = channel->efx;
+	const char *type;
+	int number;
+
+	number = channel->channel;
+	if (efx->tx_channel_offset == 0) {
+		type = "";
+	} else if (channel->channel < efx->tx_channel_offset) {
+		type = "-rx";
+	} else {
+		type = "-tx";
+		number -= efx->tx_channel_offset;
+	}
+	snprintf(buf, len, "%s%s-%d", efx->name, type, number);
+}
+
+static void ef4_set_channel_names(struct ef4_nic *efx)
+{
+	struct ef4_channel *channel;
+
+	ef4_for_each_channel(channel, efx)
+		channel->type->get_name(channel,
+					efx->msi_context[channel->channel].name,
+					sizeof(efx->msi_context[0].name));
+}
+
+static int ef4_probe_channels(struct ef4_nic *efx)
+{
+	struct ef4_channel *channel;
+	int rc;
+
+	/* Restart special buffer allocation */
+	efx->next_buffer_table = 0;
+
+	/* Probe channels in reverse, so that any 'extra' channels
+	 * use the start of the buffer table. This allows the traffic
+	 * channels to be resized without moving them or wasting the
+	 * entries before them.
+	 */
+	ef4_for_each_channel_rev(channel, efx) {
+		rc = ef4_probe_channel(channel);
+		if (rc) {
+			netif_err(efx, probe, efx->net_dev,
+				  "failed to create channel %d\n",
+				  channel->channel);
+			goto fail;
+		}
+	}
+	ef4_set_channel_names(efx);
+
+	return 0;
+
+fail:
+	ef4_remove_channels(efx);
+	return rc;
+}
+
+/* Channels are shutdown and reinitialised whilst the NIC is running
+ * to propagate configuration changes (mtu, checksum offload), or
+ * to clear hardware error conditions
+ */
+static void ef4_start_datapath(struct ef4_nic *efx)
+{
+	netdev_features_t old_features = efx->net_dev->features;
+	bool old_rx_scatter = efx->rx_scatter;
+	struct ef4_tx_queue *tx_queue;
+	struct ef4_rx_queue *rx_queue;
+	struct ef4_channel *channel;
+	size_t rx_buf_len;
+
+	/* Calculate the rx buffer allocation parameters required to
+	 * support the current MTU, including padding for header
+	 * alignment and overruns.
+	 */
+	efx->rx_dma_len = (efx->rx_prefix_size +
+			   EF4_MAX_FRAME_LEN(efx->net_dev->mtu) +
+			   efx->type->rx_buffer_padding);
+	rx_buf_len = (sizeof(struct ef4_rx_page_state) +
+		      efx->rx_ip_align + efx->rx_dma_len);
+	if (rx_buf_len <= PAGE_SIZE) {
+		efx->rx_scatter = efx->type->always_rx_scatter;
+		efx->rx_buffer_order = 0;
+	} else if (efx->type->can_rx_scatter) {
+		BUILD_BUG_ON(EF4_RX_USR_BUF_SIZE % L1_CACHE_BYTES);
+		BUILD_BUG_ON(sizeof(struct ef4_rx_page_state) +
+			     2 * ALIGN(NET_IP_ALIGN + EF4_RX_USR_BUF_SIZE,
+				       EF4_RX_BUF_ALIGNMENT) >
+			     PAGE_SIZE);
+		efx->rx_scatter = true;
+		efx->rx_dma_len = EF4_RX_USR_BUF_SIZE;
+		efx->rx_buffer_order = 0;
+	} else {
+		efx->rx_scatter = false;
+		efx->rx_buffer_order = get_order(rx_buf_len);
+	}
+
+	ef4_rx_config_page_split(efx);
+	if (efx->rx_buffer_order)
+		netif_dbg(efx, drv, efx->net_dev,
+			  "RX buf len=%u; page order=%u batch=%u\n",
+			  efx->rx_dma_len, efx->rx_buffer_order,
+			  efx->rx_pages_per_batch);
+	else
+		netif_dbg(efx, drv, efx->net_dev,
+			  "RX buf len=%u step=%u bpp=%u; page batch=%u\n",
+			  efx->rx_dma_len, efx->rx_page_buf_step,
+			  efx->rx_bufs_per_page, efx->rx_pages_per_batch);
+
+	/* Restore previously fixed features in hw_features and remove
+	 * features which are fixed now
+	 */
+	efx->net_dev->hw_features |= efx->net_dev->features;
+	efx->net_dev->hw_features &= ~efx->fixed_features;
+	efx->net_dev->features |= efx->fixed_features;
+	if (efx->net_dev->features != old_features)
+		netdev_features_change(efx->net_dev);
+
+	/* RX filters may also have scatter-enabled flags */
+	if (efx->rx_scatter != old_rx_scatter)
+		efx->type->filter_update_rx_scatter(efx);
+
+	/* We must keep at least one descriptor in a TX ring empty.
+	 * We could avoid this when the queue size does not exactly
+	 * match the hardware ring size, but it's not that important.
+	 * Therefore we stop the queue when one more skb might fill
+	 * the ring completely.  We wake it when half way back to
+	 * empty.
+	 */
+	efx->txq_stop_thresh = efx->txq_entries - ef4_tx_max_skb_descs(efx);
+	efx->txq_wake_thresh = efx->txq_stop_thresh / 2;
+
+	/* Initialise the channels */
+	ef4_for_each_channel(channel, efx) {
+		ef4_for_each_channel_tx_queue(tx_queue, channel) {
+			ef4_init_tx_queue(tx_queue);
+			atomic_inc(&efx->active_queues);
+		}
+
+		ef4_for_each_channel_rx_queue(rx_queue, channel) {
+			ef4_init_rx_queue(rx_queue);
+			atomic_inc(&efx->active_queues);
+			ef4_stop_eventq(channel);
+			ef4_fast_push_rx_descriptors(rx_queue, false);
+			ef4_start_eventq(channel);
+		}
+
+		WARN_ON(channel->rx_pkt_n_frags);
+	}
+
+	if (netif_device_present(efx->net_dev))
+		netif_tx_wake_all_queues(efx->net_dev);
+}
+
+static void ef4_stop_datapath(struct ef4_nic *efx)
+{
+	struct ef4_channel *channel;
+	struct ef4_tx_queue *tx_queue;
+	struct ef4_rx_queue *rx_queue;
+	int rc;
+
+	EF4_ASSERT_RESET_SERIALISED(efx);
+	BUG_ON(efx->port_enabled);
+
+	/* Stop RX refill */
+	ef4_for_each_channel(channel, efx) {
+		ef4_for_each_channel_rx_queue(rx_queue, channel)
+			rx_queue->refill_enabled = false;
+	}
+
+	ef4_for_each_channel(channel, efx) {
+		/* RX packet processing is pipelined, so wait for the
+		 * NAPI handler to complete.  At least event queue 0
+		 * might be kept active by non-data events, so don't
+		 * use napi_synchronize() but actually disable NAPI
+		 * temporarily.
+		 */
+		if (ef4_channel_has_rx_queue(channel)) {
+			ef4_stop_eventq(channel);
+			ef4_start_eventq(channel);
+		}
+	}
+
+	rc = efx->type->fini_dmaq(efx);
+	if (rc && EF4_WORKAROUND_7803(efx)) {
+		/* Schedule a reset to recover from the flush failure. The
+		 * descriptor caches reference memory we're about to free,
+		 * but falcon_reconfigure_mac_wrapper() won't reconnect
+		 * the MACs because of the pending reset.
+		 */
+		netif_err(efx, drv, efx->net_dev,
+			  "Resetting to recover from flush failure\n");
+		ef4_schedule_reset(efx, RESET_TYPE_ALL);
+	} else if (rc) {
+		netif_err(efx, drv, efx->net_dev, "failed to flush queues\n");
+	} else {
+		netif_dbg(efx, drv, efx->net_dev,
+			  "successfully flushed all queues\n");
+	}
+
+	ef4_for_each_channel(channel, efx) {
+		ef4_for_each_channel_rx_queue(rx_queue, channel)
+			ef4_fini_rx_queue(rx_queue);
+		ef4_for_each_possible_channel_tx_queue(tx_queue, channel)
+			ef4_fini_tx_queue(tx_queue);
+	}
+}
+
+static void ef4_remove_channel(struct ef4_channel *channel)
+{
+	struct ef4_tx_queue *tx_queue;
+	struct ef4_rx_queue *rx_queue;
+
+	netif_dbg(channel->efx, drv, channel->efx->net_dev,
+		  "destroy chan %d\n", channel->channel);
+
+	ef4_for_each_channel_rx_queue(rx_queue, channel)
+		ef4_remove_rx_queue(rx_queue);
+	ef4_for_each_possible_channel_tx_queue(tx_queue, channel)
+		ef4_remove_tx_queue(tx_queue);
+	ef4_remove_eventq(channel);
+	channel->type->post_remove(channel);
+}
+
+static void ef4_remove_channels(struct ef4_nic *efx)
+{
+	struct ef4_channel *channel;
+
+	ef4_for_each_channel(channel, efx)
+		ef4_remove_channel(channel);
+}
+
+int
+ef4_realloc_channels(struct ef4_nic *efx, u32 rxq_entries, u32 txq_entries)
+{
+	struct ef4_channel *other_channel[EF4_MAX_CHANNELS], *channel;
+	u32 old_rxq_entries, old_txq_entries;
+	unsigned i, next_buffer_table = 0;
+	int rc, rc2;
+
+	rc = ef4_check_disabled(efx);
+	if (rc)
+		return rc;
+
+	/* Not all channels should be reallocated. We must avoid
+	 * reallocating their buffer table entries.
+	 */
+	ef4_for_each_channel(channel, efx) {
+		struct ef4_rx_queue *rx_queue;
+		struct ef4_tx_queue *tx_queue;
+
+		if (channel->type->copy)
+			continue;
+		next_buffer_table = max(next_buffer_table,
+					channel->eventq.index +
+					channel->eventq.entries);
+		ef4_for_each_channel_rx_queue(rx_queue, channel)
+			next_buffer_table = max(next_buffer_table,
+						rx_queue->rxd.index +
+						rx_queue->rxd.entries);
+		ef4_for_each_channel_tx_queue(tx_queue, channel)
+			next_buffer_table = max(next_buffer_table,
+						tx_queue->txd.index +
+						tx_queue->txd.entries);
+	}
+
+	ef4_device_detach_sync(efx);
+	ef4_stop_all(efx);
+	ef4_soft_disable_interrupts(efx);
+
+	/* Clone channels (where possible) */
+	memset(other_channel, 0, sizeof(other_channel));
+	for (i = 0; i < efx->n_channels; i++) {
+		channel = efx->channel[i];
+		if (channel->type->copy)
+			channel = channel->type->copy(channel);
+		if (!channel) {
+			rc = -ENOMEM;
+			goto out;
+		}
+		other_channel[i] = channel;
+	}
+
+	/* Swap entry counts and channel pointers */
+	old_rxq_entries = efx->rxq_entries;
+	old_txq_entries = efx->txq_entries;
+	efx->rxq_entries = rxq_entries;
+	efx->txq_entries = txq_entries;
+	for (i = 0; i < efx->n_channels; i++) {
+		channel = efx->channel[i];
+		efx->channel[i] = other_channel[i];
+		other_channel[i] = channel;
+	}
+
+	/* Restart buffer table allocation */
+	efx->next_buffer_table = next_buffer_table;
+
+	for (i = 0; i < efx->n_channels; i++) {
+		channel = efx->channel[i];
+		if (!channel->type->copy)
+			continue;
+		rc = ef4_probe_channel(channel);
+		if (rc)
+			goto rollback;
+		ef4_init_napi_channel(efx->channel[i]);
+	}
+
+out:
+	/* Destroy unused channel structures */
+	for (i = 0; i < efx->n_channels; i++) {
+		channel = other_channel[i];
+		if (channel && channel->type->copy) {
+			ef4_fini_napi_channel(channel);
+			ef4_remove_channel(channel);
+			kfree(channel);
+		}
+	}
+
+	rc2 = ef4_soft_enable_interrupts(efx);
+	if (rc2) {
+		rc = rc ? rc : rc2;
+		netif_err(efx, drv, efx->net_dev,
+			  "unable to restart interrupts on channel reallocation\n");
+		ef4_schedule_reset(efx, RESET_TYPE_DISABLE);
+	} else {
+		ef4_start_all(efx);
+		netif_device_attach(efx->net_dev);
+	}
+	return rc;
+
+rollback:
+	/* Swap back */
+	efx->rxq_entries = old_rxq_entries;
+	efx->txq_entries = old_txq_entries;
+	for (i = 0; i < efx->n_channels; i++) {
+		channel = efx->channel[i];
+		efx->channel[i] = other_channel[i];
+		other_channel[i] = channel;
+	}
+	goto out;
+}
+
+void ef4_schedule_slow_fill(struct ef4_rx_queue *rx_queue)
+{
+	mod_timer(&rx_queue->slow_fill, jiffies + msecs_to_jiffies(100));
+}
+
+static const struct ef4_channel_type ef4_default_channel_type = {
+	.pre_probe		= ef4_channel_dummy_op_int,
+	.post_remove		= ef4_channel_dummy_op_void,
+	.get_name		= ef4_get_channel_name,
+	.copy			= ef4_copy_channel,
+	.keep_eventq		= false,
+};
+
+int ef4_channel_dummy_op_int(struct ef4_channel *channel)
+{
+	return 0;
+}
+
+void ef4_channel_dummy_op_void(struct ef4_channel *channel)
+{
+}
+
+/**************************************************************************
+ *
+ * Port handling
+ *
+ **************************************************************************/
+
+/* This ensures that the kernel is kept informed (via
+ * netif_carrier_on/off) of the link status, and also maintains the
+ * link status's stop on the port's TX queue.
+ */
+void ef4_link_status_changed(struct ef4_nic *efx)
+{
+	struct ef4_link_state *link_state = &efx->link_state;
+
+	/* SFC Bug 5356: A net_dev notifier is registered, so we must ensure
+	 * that no events are triggered between unregister_netdev() and the
+	 * driver unloading. A more general condition is that NETDEV_CHANGE
+	 * can only be generated between NETDEV_UP and NETDEV_DOWN */
+	if (!netif_running(efx->net_dev))
+		return;
+
+	if (link_state->up != netif_carrier_ok(efx->net_dev)) {
+		efx->n_link_state_changes++;
+
+		if (link_state->up)
+			netif_carrier_on(efx->net_dev);
+		else
+			netif_carrier_off(efx->net_dev);
+	}
+
+	/* Status message for kernel log */
+	if (link_state->up)
+		netif_info(efx, link, efx->net_dev,
+			   "link up at %uMbps %s-duplex (MTU %d)\n",
+			   link_state->speed, link_state->fd ? "full" : "half",
+			   efx->net_dev->mtu);
+	else
+		netif_info(efx, link, efx->net_dev, "link down\n");
+}
+
+void ef4_link_set_advertising(struct ef4_nic *efx, u32 advertising)
+{
+	efx->link_advertising = advertising;
+	if (advertising) {
+		if (advertising & ADVERTISED_Pause)
+			efx->wanted_fc |= (EF4_FC_TX | EF4_FC_RX);
+		else
+			efx->wanted_fc &= ~(EF4_FC_TX | EF4_FC_RX);
+		if (advertising & ADVERTISED_Asym_Pause)
+			efx->wanted_fc ^= EF4_FC_TX;
+	}
+}
+
+void ef4_link_set_wanted_fc(struct ef4_nic *efx, u8 wanted_fc)
+{
+	efx->wanted_fc = wanted_fc;
+	if (efx->link_advertising) {
+		if (wanted_fc & EF4_FC_RX)
+			efx->link_advertising |= (ADVERTISED_Pause |
+						  ADVERTISED_Asym_Pause);
+		else
+			efx->link_advertising &= ~(ADVERTISED_Pause |
+						   ADVERTISED_Asym_Pause);
+		if (wanted_fc & EF4_FC_TX)
+			efx->link_advertising ^= ADVERTISED_Asym_Pause;
+	}
+}
+
+static void ef4_fini_port(struct ef4_nic *efx);
+
+/* We assume that efx->type->reconfigure_mac will always try to sync RX
+ * filters and therefore needs to read-lock the filter table against freeing
+ */
+void ef4_mac_reconfigure(struct ef4_nic *efx)
+{
+	down_read(&efx->filter_sem);
+	efx->type->reconfigure_mac(efx);
+	up_read(&efx->filter_sem);
+}
+
+/* Push loopback/power/transmit disable settings to the PHY, and reconfigure
+ * the MAC appropriately. All other PHY configuration changes are pushed
+ * through phy_op->set_settings(), and pushed asynchronously to the MAC
+ * through ef4_monitor().
+ *
+ * Callers must hold the mac_lock
+ */
+int __ef4_reconfigure_port(struct ef4_nic *efx)
+{
+	enum ef4_phy_mode phy_mode;
+	int rc;
+
+	WARN_ON(!mutex_is_locked(&efx->mac_lock));
+
+	/* Disable PHY transmit in mac level loopbacks */
+	phy_mode = efx->phy_mode;
+	if (LOOPBACK_INTERNAL(efx))
+		efx->phy_mode |= PHY_MODE_TX_DISABLED;
+	else
+		efx->phy_mode &= ~PHY_MODE_TX_DISABLED;
+
+	rc = efx->type->reconfigure_port(efx);
+
+	if (rc)
+		efx->phy_mode = phy_mode;
+
+	return rc;
+}
+
+/* Reinitialise the MAC to pick up new PHY settings, even if the port is
+ * disabled. */
+int ef4_reconfigure_port(struct ef4_nic *efx)
+{
+	int rc;
+
+	EF4_ASSERT_RESET_SERIALISED(efx);
+
+	mutex_lock(&efx->mac_lock);
+	rc = __ef4_reconfigure_port(efx);
+	mutex_unlock(&efx->mac_lock);
+
+	return rc;
+}
+
+/* Asynchronous work item for changing MAC promiscuity and multicast
+ * hash.  Avoid a drain/rx_ingress enable by reconfiguring the current
+ * MAC directly. */
+static void ef4_mac_work(struct work_struct *data)
+{
+	struct ef4_nic *efx = container_of(data, struct ef4_nic, mac_work);
+
+	mutex_lock(&efx->mac_lock);
+	if (efx->port_enabled)
+		ef4_mac_reconfigure(efx);
+	mutex_unlock(&efx->mac_lock);
+}
+
+static int ef4_probe_port(struct ef4_nic *efx)
+{
+	int rc;
+
+	netif_dbg(efx, probe, efx->net_dev, "create port\n");
+
+	if (phy_flash_cfg)
+		efx->phy_mode = PHY_MODE_SPECIAL;
+
+	/* Connect up MAC/PHY operations table */
+	rc = efx->type->probe_port(efx);
+	if (rc)
+		return rc;
+
+	/* Initialise MAC address to permanent address */
+	ether_addr_copy(efx->net_dev->dev_addr, efx->net_dev->perm_addr);
+
+	return 0;
+}
+
+static int ef4_init_port(struct ef4_nic *efx)
+{
+	int rc;
+
+	netif_dbg(efx, drv, efx->net_dev, "init port\n");
+
+	mutex_lock(&efx->mac_lock);
+
+	rc = efx->phy_op->init(efx);
+	if (rc)
+		goto fail1;
+
+	efx->port_initialized = true;
+
+	/* Reconfigure the MAC before creating dma queues (required for
+	 * Falcon/A1 where RX_INGR_EN/TX_DRAIN_EN isn't supported) */
+	ef4_mac_reconfigure(efx);
+
+	/* Ensure the PHY advertises the correct flow control settings */
+	rc = efx->phy_op->reconfigure(efx);
+	if (rc && rc != -EPERM)
+		goto fail2;
+
+	mutex_unlock(&efx->mac_lock);
+	return 0;
+
+fail2:
+	efx->phy_op->fini(efx);
+fail1:
+	mutex_unlock(&efx->mac_lock);
+	return rc;
+}
+
+static void ef4_start_port(struct ef4_nic *efx)
+{
+	netif_dbg(efx, ifup, efx->net_dev, "start port\n");
+	BUG_ON(efx->port_enabled);
+
+	mutex_lock(&efx->mac_lock);
+	efx->port_enabled = true;
+
+	/* Ensure MAC ingress/egress is enabled */
+	ef4_mac_reconfigure(efx);
+
+	mutex_unlock(&efx->mac_lock);
+}
+
+/* Cancel work for MAC reconfiguration, periodic hardware monitoring
+ * and the async self-test, wait for them to finish and prevent them
+ * being scheduled again.  This doesn't cover online resets, which
+ * should only be cancelled when removing the device.
+ */
+static void ef4_stop_port(struct ef4_nic *efx)
+{
+	netif_dbg(efx, ifdown, efx->net_dev, "stop port\n");
+
+	EF4_ASSERT_RESET_SERIALISED(efx);
+
+	mutex_lock(&efx->mac_lock);
+	efx->port_enabled = false;
+	mutex_unlock(&efx->mac_lock);
+
+	/* Serialise against ef4_set_multicast_list() */
+	netif_addr_lock_bh(efx->net_dev);
+	netif_addr_unlock_bh(efx->net_dev);
+
+	cancel_delayed_work_sync(&efx->monitor_work);
+	ef4_selftest_async_cancel(efx);
+	cancel_work_sync(&efx->mac_work);
+}
+
+static void ef4_fini_port(struct ef4_nic *efx)
+{
+	netif_dbg(efx, drv, efx->net_dev, "shut down port\n");
+
+	if (!efx->port_initialized)
+		return;
+
+	efx->phy_op->fini(efx);
+	efx->port_initialized = false;
+
+	efx->link_state.up = false;
+	ef4_link_status_changed(efx);
+}
+
+static void ef4_remove_port(struct ef4_nic *efx)
+{
+	netif_dbg(efx, drv, efx->net_dev, "destroying port\n");
+
+	efx->type->remove_port(efx);
+}
+
+/**************************************************************************
+ *
+ * NIC handling
+ *
+ **************************************************************************/
+
+static LIST_HEAD(ef4_primary_list);
+static LIST_HEAD(ef4_unassociated_list);
+
+static bool ef4_same_controller(struct ef4_nic *left, struct ef4_nic *right)
+{
+	return left->type == right->type &&
+		left->vpd_sn && right->vpd_sn &&
+		!strcmp(left->vpd_sn, right->vpd_sn);
+}
+
+static void ef4_associate(struct ef4_nic *efx)
+{
+	struct ef4_nic *other, *next;
+
+	if (efx->primary == efx) {
+		/* Adding primary function; look for secondaries */
+
+		netif_dbg(efx, probe, efx->net_dev, "adding to primary list\n");
+		list_add_tail(&efx->node, &ef4_primary_list);
+
+		list_for_each_entry_safe(other, next, &ef4_unassociated_list,
+					 node) {
+			if (ef4_same_controller(efx, other)) {
+				list_del(&other->node);
+				netif_dbg(other, probe, other->net_dev,
+					  "moving to secondary list of %s %s\n",
+					  pci_name(efx->pci_dev),
+					  efx->net_dev->name);
+				list_add_tail(&other->node,
+					      &efx->secondary_list);
+				other->primary = efx;
+			}
+		}
+	} else {
+		/* Adding secondary function; look for primary */
+
+		list_for_each_entry(other, &ef4_primary_list, node) {
+			if (ef4_same_controller(efx, other)) {
+				netif_dbg(efx, probe, efx->net_dev,
+					  "adding to secondary list of %s %s\n",
+					  pci_name(other->pci_dev),
+					  other->net_dev->name);
+				list_add_tail(&efx->node,
+					      &other->secondary_list);
+				efx->primary = other;
+				return;
+			}
+		}
+
+		netif_dbg(efx, probe, efx->net_dev,
+			  "adding to unassociated list\n");
+		list_add_tail(&efx->node, &ef4_unassociated_list);
+	}
+}
+
+static void ef4_dissociate(struct ef4_nic *efx)
+{
+	struct ef4_nic *other, *next;
+
+	list_del(&efx->node);
+	efx->primary = NULL;
+
+	list_for_each_entry_safe(other, next, &efx->secondary_list, node) {
+		list_del(&other->node);
+		netif_dbg(other, probe, other->net_dev,
+			  "moving to unassociated list\n");
+		list_add_tail(&other->node, &ef4_unassociated_list);
+		other->primary = NULL;
+	}
+}
+
+/* This configures the PCI device to enable I/O and DMA. */
+static int ef4_init_io(struct ef4_nic *efx)
+{
+	struct pci_dev *pci_dev = efx->pci_dev;
+	dma_addr_t dma_mask = efx->type->max_dma_mask;
+	unsigned int mem_map_size = efx->type->mem_map_size(efx);
+	int rc, bar;
+
+	netif_dbg(efx, probe, efx->net_dev, "initialising I/O\n");
+
+	bar = efx->type->mem_bar;
+
+	rc = pci_enable_device(pci_dev);
+	if (rc) {
+		netif_err(efx, probe, efx->net_dev,
+			  "failed to enable PCI device\n");
+		goto fail1;
+	}
+
+	pci_set_master(pci_dev);
+
+	/* Set the PCI DMA mask.  Try all possibilities from our
+	 * genuine mask down to 32 bits, because some architectures
+	 * (e.g. x86_64 with iommu_sac_force set) will allow 40 bit
+	 * masks event though they reject 46 bit masks.
+	 */
+	while (dma_mask > 0x7fffffffUL) {
+		rc = dma_set_mask_and_coherent(&pci_dev->dev, dma_mask);
+		if (rc == 0)
+			break;
+		dma_mask >>= 1;
+	}
+	if (rc) {
+		netif_err(efx, probe, efx->net_dev,
+			  "could not find a suitable DMA mask\n");
+		goto fail2;
+	}
+	netif_dbg(efx, probe, efx->net_dev,
+		  "using DMA mask %llx\n", (unsigned long long) dma_mask);
+
+	efx->membase_phys = pci_resource_start(efx->pci_dev, bar);
+	rc = pci_request_region(pci_dev, bar, "sfc");
+	if (rc) {
+		netif_err(efx, probe, efx->net_dev,
+			  "request for memory BAR failed\n");
+		rc = -EIO;
+		goto fail3;
+	}
+	efx->membase = ioremap_nocache(efx->membase_phys, mem_map_size);
+	if (!efx->membase) {
+		netif_err(efx, probe, efx->net_dev,
+			  "could not map memory BAR at %llx+%x\n",
+			  (unsigned long long)efx->membase_phys, mem_map_size);
+		rc = -ENOMEM;
+		goto fail4;
+	}
+	netif_dbg(efx, probe, efx->net_dev,
+		  "memory BAR at %llx+%x (virtual %p)\n",
+		  (unsigned long long)efx->membase_phys, mem_map_size,
+		  efx->membase);
+
+	return 0;
+
+ fail4:
+	pci_release_region(efx->pci_dev, bar);
+ fail3:
+	efx->membase_phys = 0;
+ fail2:
+	pci_disable_device(efx->pci_dev);
+ fail1:
+	return rc;
+}
+
+static void ef4_fini_io(struct ef4_nic *efx)
+{
+	int bar;
+
+	netif_dbg(efx, drv, efx->net_dev, "shutting down I/O\n");
+
+	if (efx->membase) {
+		iounmap(efx->membase);
+		efx->membase = NULL;
+	}
+
+	if (efx->membase_phys) {
+		bar = efx->type->mem_bar;
+		pci_release_region(efx->pci_dev, bar);
+		efx->membase_phys = 0;
+	}
+
+	/* Don't disable bus-mastering if VFs are assigned */
+	if (!pci_vfs_assigned(efx->pci_dev))
+		pci_disable_device(efx->pci_dev);
+}
+
+void ef4_set_default_rx_indir_table(struct ef4_nic *efx)
+{
+	size_t i;
+
+	for (i = 0; i < ARRAY_SIZE(efx->rx_indir_table); i++)
+		efx->rx_indir_table[i] =
+			ethtool_rxfh_indir_default(i, efx->rss_spread);
+}
+
+static unsigned int ef4_wanted_parallelism(struct ef4_nic *efx)
+{
+	cpumask_var_t thread_mask;
+	unsigned int count;
+	int cpu;
+
+	if (rss_cpus) {
+		count = rss_cpus;
+	} else {
+		if (unlikely(!zalloc_cpumask_var(&thread_mask, GFP_KERNEL))) {
+			netif_warn(efx, probe, efx->net_dev,
+				   "RSS disabled due to allocation failure\n");
+			return 1;
+		}
+
+		count = 0;
+		for_each_online_cpu(cpu) {
+			if (!cpumask_test_cpu(cpu, thread_mask)) {
+				++count;
+				cpumask_or(thread_mask, thread_mask,
+					   topology_sibling_cpumask(cpu));
+			}
+		}
+
+		free_cpumask_var(thread_mask);
+	}
+
+	return count;
+}
+
+/* Probe the number and type of interrupts we are able to obtain, and
+ * the resulting numbers of channels and RX queues.
+ */
+static int ef4_probe_interrupts(struct ef4_nic *efx)
+{
+	unsigned int extra_channels = 0;
+	unsigned int i, j;
+	int rc;
+
+	for (i = 0; i < EF4_MAX_EXTRA_CHANNELS; i++)
+		if (efx->extra_channel_type[i])
+			++extra_channels;
+
+	if (efx->interrupt_mode == EF4_INT_MODE_MSIX) {
+		struct msix_entry xentries[EF4_MAX_CHANNELS];
+		unsigned int n_channels;
+
+		n_channels = ef4_wanted_parallelism(efx);
+		if (ef4_separate_tx_channels)
+			n_channels *= 2;
+		n_channels += extra_channels;
+		n_channels = min(n_channels, efx->max_channels);
+
+		for (i = 0; i < n_channels; i++)
+			xentries[i].entry = i;
+		rc = pci_enable_msix_range(efx->pci_dev,
+					   xentries, 1, n_channels);
+		if (rc < 0) {
+			/* Fall back to single channel MSI */
+			efx->interrupt_mode = EF4_INT_MODE_MSI;
+			netif_err(efx, drv, efx->net_dev,
+				  "could not enable MSI-X\n");
+		} else if (rc < n_channels) {
+			netif_err(efx, drv, efx->net_dev,
+				  "WARNING: Insufficient MSI-X vectors"
+				  " available (%d < %u).\n", rc, n_channels);
+			netif_err(efx, drv, efx->net_dev,
+				  "WARNING: Performance may be reduced.\n");
+			n_channels = rc;
+		}
+
+		if (rc > 0) {
+			efx->n_channels = n_channels;
+			if (n_channels > extra_channels)
+				n_channels -= extra_channels;
+			if (ef4_separate_tx_channels) {
+				efx->n_tx_channels = min(max(n_channels / 2,
+							     1U),
+							 efx->max_tx_channels);
+				efx->n_rx_channels = max(n_channels -
+							 efx->n_tx_channels,
+							 1U);
+			} else {
+				efx->n_tx_channels = min(n_channels,
+							 efx->max_tx_channels);
+				efx->n_rx_channels = n_channels;
+			}
+			for (i = 0; i < efx->n_channels; i++)
+				ef4_get_channel(efx, i)->irq =
+					xentries[i].vector;
+		}
+	}
+
+	/* Try single interrupt MSI */
+	if (efx->interrupt_mode == EF4_INT_MODE_MSI) {
+		efx->n_channels = 1;
+		efx->n_rx_channels = 1;
+		efx->n_tx_channels = 1;
+		rc = pci_enable_msi(efx->pci_dev);
+		if (rc == 0) {
+			ef4_get_channel(efx, 0)->irq = efx->pci_dev->irq;
+		} else {
+			netif_err(efx, drv, efx->net_dev,
+				  "could not enable MSI\n");
+			efx->interrupt_mode = EF4_INT_MODE_LEGACY;
+		}
+	}
+
+	/* Assume legacy interrupts */
+	if (efx->interrupt_mode == EF4_INT_MODE_LEGACY) {
+		efx->n_channels = 1 + (ef4_separate_tx_channels ? 1 : 0);
+		efx->n_rx_channels = 1;
+		efx->n_tx_channels = 1;
+		efx->legacy_irq = efx->pci_dev->irq;
+	}
+
+	/* Assign extra channels if possible */
+	j = efx->n_channels;
+	for (i = 0; i < EF4_MAX_EXTRA_CHANNELS; i++) {
+		if (!efx->extra_channel_type[i])
+			continue;
+		if (efx->interrupt_mode != EF4_INT_MODE_MSIX ||
+		    efx->n_channels <= extra_channels) {
+			efx->extra_channel_type[i]->handle_no_channel(efx);
+		} else {
+			--j;
+			ef4_get_channel(efx, j)->type =
+				efx->extra_channel_type[i];
+		}
+	}
+
+	efx->rss_spread = efx->n_rx_channels;
+
+	return 0;
+}
+
+static int ef4_soft_enable_interrupts(struct ef4_nic *efx)
+{
+	struct ef4_channel *channel, *end_channel;
+	int rc;
+
+	BUG_ON(efx->state == STATE_DISABLED);
+
+	efx->irq_soft_enabled = true;
+	smp_wmb();
+
+	ef4_for_each_channel(channel, efx) {
+		if (!channel->type->keep_eventq) {
+			rc = ef4_init_eventq(channel);
+			if (rc)
+				goto fail;
+		}
+		ef4_start_eventq(channel);
+	}
+
+	return 0;
+fail:
+	end_channel = channel;
+	ef4_for_each_channel(channel, efx) {
+		if (channel == end_channel)
+			break;
+		ef4_stop_eventq(channel);
+		if (!channel->type->keep_eventq)
+			ef4_fini_eventq(channel);
+	}
+
+	return rc;
+}
+
+static void ef4_soft_disable_interrupts(struct ef4_nic *efx)
+{
+	struct ef4_channel *channel;
+
+	if (efx->state == STATE_DISABLED)
+		return;
+
+	efx->irq_soft_enabled = false;
+	smp_wmb();
+
+	if (efx->legacy_irq)
+		synchronize_irq(efx->legacy_irq);
+
+	ef4_for_each_channel(channel, efx) {
+		if (channel->irq)
+			synchronize_irq(channel->irq);
+
+		ef4_stop_eventq(channel);
+		if (!channel->type->keep_eventq)
+			ef4_fini_eventq(channel);
+	}
+}
+
+static int ef4_enable_interrupts(struct ef4_nic *efx)
+{
+	struct ef4_channel *channel, *end_channel;
+	int rc;
+
+	BUG_ON(efx->state == STATE_DISABLED);
+
+	if (efx->eeh_disabled_legacy_irq) {
+		enable_irq(efx->legacy_irq);
+		efx->eeh_disabled_legacy_irq = false;
+	}
+
+	efx->type->irq_enable_master(efx);
+
+	ef4_for_each_channel(channel, efx) {
+		if (channel->type->keep_eventq) {
+			rc = ef4_init_eventq(channel);
+			if (rc)
+				goto fail;
+		}
+	}
+
+	rc = ef4_soft_enable_interrupts(efx);
+	if (rc)
+		goto fail;
+
+	return 0;
+
+fail:
+	end_channel = channel;
+	ef4_for_each_channel(channel, efx) {
+		if (channel == end_channel)
+			break;
+		if (channel->type->keep_eventq)
+			ef4_fini_eventq(channel);
+	}
+
+	efx->type->irq_disable_non_ev(efx);
+
+	return rc;
+}
+
+static void ef4_disable_interrupts(struct ef4_nic *efx)
+{
+	struct ef4_channel *channel;
+
+	ef4_soft_disable_interrupts(efx);
+
+	ef4_for_each_channel(channel, efx) {
+		if (channel->type->keep_eventq)
+			ef4_fini_eventq(channel);
+	}
+
+	efx->type->irq_disable_non_ev(efx);
+}
+
+static void ef4_remove_interrupts(struct ef4_nic *efx)
+{
+	struct ef4_channel *channel;
+
+	/* Remove MSI/MSI-X interrupts */
+	ef4_for_each_channel(channel, efx)
+		channel->irq = 0;
+	pci_disable_msi(efx->pci_dev);
+	pci_disable_msix(efx->pci_dev);
+
+	/* Remove legacy interrupt */
+	efx->legacy_irq = 0;
+}
+
+static void ef4_set_channels(struct ef4_nic *efx)
+{
+	struct ef4_channel *channel;
+	struct ef4_tx_queue *tx_queue;
+
+	efx->tx_channel_offset =
+		ef4_separate_tx_channels ?
+		efx->n_channels - efx->n_tx_channels : 0;
+
+	/* We need to mark which channels really have RX and TX
+	 * queues, and adjust the TX queue numbers if we have separate
+	 * RX-only and TX-only channels.
+	 */
+	ef4_for_each_channel(channel, efx) {
+		if (channel->channel < efx->n_rx_channels)
+			channel->rx_queue.core_index = channel->channel;
+		else
+			channel->rx_queue.core_index = -1;
+
+		ef4_for_each_channel_tx_queue(tx_queue, channel)
+			tx_queue->queue -= (efx->tx_channel_offset *
+					    EF4_TXQ_TYPES);
+	}
+}
+
+static int ef4_probe_nic(struct ef4_nic *efx)
+{
+	int rc;
+
+	netif_dbg(efx, probe, efx->net_dev, "creating NIC\n");
+
+	/* Carry out hardware-type specific initialisation */
+	rc = efx->type->probe(efx);
+	if (rc)
+		return rc;
+
+	do {
+		if (!efx->max_channels || !efx->max_tx_channels) {
+			netif_err(efx, drv, efx->net_dev,
+				  "Insufficient resources to allocate"
+				  " any channels\n");
+			rc = -ENOSPC;
+			goto fail1;
+		}
+
+		/* Determine the number of channels and queues by trying
+		 * to hook in MSI-X interrupts.
+		 */
+		rc = ef4_probe_interrupts(efx);
+		if (rc)
+			goto fail1;
+
+		ef4_set_channels(efx);
+
+		/* dimension_resources can fail with EAGAIN */
+		rc = efx->type->dimension_resources(efx);
+		if (rc != 0 && rc != -EAGAIN)
+			goto fail2;
+
+		if (rc == -EAGAIN)
+			/* try again with new max_channels */
+			ef4_remove_interrupts(efx);
+
+	} while (rc == -EAGAIN);
+
+	if (efx->n_channels > 1)
+		netdev_rss_key_fill(&efx->rx_hash_key,
+				    sizeof(efx->rx_hash_key));
+	ef4_set_default_rx_indir_table(efx);
+
+	netif_set_real_num_tx_queues(efx->net_dev, efx->n_tx_channels);
+	netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels);
+
+	/* Initialise the interrupt moderation settings */
+	efx->irq_mod_step_us = DIV_ROUND_UP(efx->timer_quantum_ns, 1000);
+	ef4_init_irq_moderation(efx, tx_irq_mod_usec, rx_irq_mod_usec, true,
+				true);
+
+	return 0;
+
+fail2:
+	ef4_remove_interrupts(efx);
+fail1:
+	efx->type->remove(efx);
+	return rc;
+}
+
+static void ef4_remove_nic(struct ef4_nic *efx)
+{
+	netif_dbg(efx, drv, efx->net_dev, "destroying NIC\n");
+
+	ef4_remove_interrupts(efx);
+	efx->type->remove(efx);
+}
+
+static int ef4_probe_filters(struct ef4_nic *efx)
+{
+	int rc;
+
+	spin_lock_init(&efx->filter_lock);
+	init_rwsem(&efx->filter_sem);
+	mutex_lock(&efx->mac_lock);
+	down_write(&efx->filter_sem);
+	rc = efx->type->filter_table_probe(efx);
+	if (rc)
+		goto out_unlock;
+
+#ifdef CONFIG_RFS_ACCEL
+	if (efx->type->offload_features & NETIF_F_NTUPLE) {
+		struct ef4_channel *channel;
+		int i, success = 1;
+
+		ef4_for_each_channel(channel, efx) {
+			channel->rps_flow_id =
+				kcalloc(efx->type->max_rx_ip_filters,
+					sizeof(*channel->rps_flow_id),
+					GFP_KERNEL);
+			if (!channel->rps_flow_id)
+				success = 0;
+			else
+				for (i = 0;
+				     i < efx->type->max_rx_ip_filters;
+				     ++i)
+					channel->rps_flow_id[i] =
+						RPS_FLOW_ID_INVALID;
+		}
+
+		if (!success) {
+			ef4_for_each_channel(channel, efx)
+				kfree(channel->rps_flow_id);
+			efx->type->filter_table_remove(efx);
+			rc = -ENOMEM;
+			goto out_unlock;
+		}
+
+		efx->rps_expire_index = efx->rps_expire_channel = 0;
+	}
+#endif
+out_unlock:
+	up_write(&efx->filter_sem);
+	mutex_unlock(&efx->mac_lock);
+	return rc;
+}
+
+static void ef4_remove_filters(struct ef4_nic *efx)
+{
+#ifdef CONFIG_RFS_ACCEL
+	struct ef4_channel *channel;
+
+	ef4_for_each_channel(channel, efx)
+		kfree(channel->rps_flow_id);
+#endif
+	down_write(&efx->filter_sem);
+	efx->type->filter_table_remove(efx);
+	up_write(&efx->filter_sem);
+}
+
+static void ef4_restore_filters(struct ef4_nic *efx)
+{
+	down_read(&efx->filter_sem);
+	efx->type->filter_table_restore(efx);
+	up_read(&efx->filter_sem);
+}
+
+/**************************************************************************
+ *
+ * NIC startup/shutdown
+ *
+ *************************************************************************/
+
+static int ef4_probe_all(struct ef4_nic *efx)
+{
+	int rc;
+
+	rc = ef4_probe_nic(efx);
+	if (rc) {
+		netif_err(efx, probe, efx->net_dev, "failed to create NIC\n");
+		goto fail1;
+	}
+
+	rc = ef4_probe_port(efx);
+	if (rc) {
+		netif_err(efx, probe, efx->net_dev, "failed to create port\n");
+		goto fail2;
+	}
+
+	BUILD_BUG_ON(EF4_DEFAULT_DMAQ_SIZE < EF4_RXQ_MIN_ENT);
+	if (WARN_ON(EF4_DEFAULT_DMAQ_SIZE < EF4_TXQ_MIN_ENT(efx))) {
+		rc = -EINVAL;
+		goto fail3;
+	}
+	efx->rxq_entries = efx->txq_entries = EF4_DEFAULT_DMAQ_SIZE;
+
+	rc = ef4_probe_filters(efx);
+	if (rc) {
+		netif_err(efx, probe, efx->net_dev,
+			  "failed to create filter tables\n");
+		goto fail4;
+	}
+
+	rc = ef4_probe_channels(efx);
+	if (rc)
+		goto fail5;
+
+	return 0;
+
+ fail5:
+	ef4_remove_filters(efx);
+ fail4:
+ fail3:
+	ef4_remove_port(efx);
+ fail2:
+	ef4_remove_nic(efx);
+ fail1:
+	return rc;
+}
+
+/* If the interface is supposed to be running but is not, start
+ * the hardware and software data path, regular activity for the port
+ * (MAC statistics, link polling, etc.) and schedule the port to be
+ * reconfigured.  Interrupts must already be enabled.  This function
+ * is safe to call multiple times, so long as the NIC is not disabled.
+ * Requires the RTNL lock.
+ */
+static void ef4_start_all(struct ef4_nic *efx)
+{
+	EF4_ASSERT_RESET_SERIALISED(efx);
+	BUG_ON(efx->state == STATE_DISABLED);
+
+	/* Check that it is appropriate to restart the interface. All
+	 * of these flags are safe to read under just the rtnl lock */
+	if (efx->port_enabled || !netif_running(efx->net_dev) ||
+	    efx->reset_pending)
+		return;
+
+	ef4_start_port(efx);
+	ef4_start_datapath(efx);
+
+	/* Start the hardware monitor if there is one */
+	if (efx->type->monitor != NULL)
+		queue_delayed_work(efx->workqueue, &efx->monitor_work,
+				   ef4_monitor_interval);
+
+	efx->type->start_stats(efx);
+	efx->type->pull_stats(efx);
+	spin_lock_bh(&efx->stats_lock);
+	efx->type->update_stats(efx, NULL, NULL);
+	spin_unlock_bh(&efx->stats_lock);
+}
+
+/* Quiesce the hardware and software data path, and regular activity
+ * for the port without bringing the link down.  Safe to call multiple
+ * times with the NIC in almost any state, but interrupts should be
+ * enabled.  Requires the RTNL lock.
+ */
+static void ef4_stop_all(struct ef4_nic *efx)
+{
+	EF4_ASSERT_RESET_SERIALISED(efx);
+
+	/* port_enabled can be read safely under the rtnl lock */
+	if (!efx->port_enabled)
+		return;
+
+	/* update stats before we go down so we can accurately count
+	 * rx_nodesc_drops
+	 */
+	efx->type->pull_stats(efx);
+	spin_lock_bh(&efx->stats_lock);
+	efx->type->update_stats(efx, NULL, NULL);
+	spin_unlock_bh(&efx->stats_lock);
+	efx->type->stop_stats(efx);
+	ef4_stop_port(efx);
+
+	/* Stop the kernel transmit interface.  This is only valid if
+	 * the device is stopped or detached; otherwise the watchdog
+	 * may fire immediately.
+	 */
+	WARN_ON(netif_running(efx->net_dev) &&
+		netif_device_present(efx->net_dev));
+	netif_tx_disable(efx->net_dev);
+
+	ef4_stop_datapath(efx);
+}
+
+static void ef4_remove_all(struct ef4_nic *efx)
+{
+	ef4_remove_channels(efx);
+	ef4_remove_filters(efx);
+	ef4_remove_port(efx);
+	ef4_remove_nic(efx);
+}
+
+/**************************************************************************
+ *
+ * Interrupt moderation
+ *
+ **************************************************************************/
+unsigned int ef4_usecs_to_ticks(struct ef4_nic *efx, unsigned int usecs)
+{
+	if (usecs == 0)
+		return 0;
+	if (usecs * 1000 < efx->timer_quantum_ns)
+		return 1; /* never round down to 0 */
+	return usecs * 1000 / efx->timer_quantum_ns;
+}
+
+unsigned int ef4_ticks_to_usecs(struct ef4_nic *efx, unsigned int ticks)
+{
+	/* We must round up when converting ticks to microseconds
+	 * because we round down when converting the other way.
+	 */
+	return DIV_ROUND_UP(ticks * efx->timer_quantum_ns, 1000);
+}
+
+/* Set interrupt moderation parameters */
+int ef4_init_irq_moderation(struct ef4_nic *efx, unsigned int tx_usecs,
+			    unsigned int rx_usecs, bool rx_adaptive,
+			    bool rx_may_override_tx)
+{
+	struct ef4_channel *channel;
+	unsigned int timer_max_us;
+
+	EF4_ASSERT_RESET_SERIALISED(efx);
+
+	timer_max_us = efx->timer_max_ns / 1000;
+
+	if (tx_usecs > timer_max_us || rx_usecs > timer_max_us)
+		return -EINVAL;
+
+	if (tx_usecs != rx_usecs && efx->tx_channel_offset == 0 &&
+	    !rx_may_override_tx) {
+		netif_err(efx, drv, efx->net_dev, "Channels are shared. "
+			  "RX and TX IRQ moderation must be equal\n");
+		return -EINVAL;
+	}
+
+	efx->irq_rx_adaptive = rx_adaptive;
+	efx->irq_rx_moderation_us = rx_usecs;
+	ef4_for_each_channel(channel, efx) {
+		if (ef4_channel_has_rx_queue(channel))
+			channel->irq_moderation_us = rx_usecs;
+		else if (ef4_channel_has_tx_queues(channel))
+			channel->irq_moderation_us = tx_usecs;
+	}
+
+	return 0;
+}
+
+void ef4_get_irq_moderation(struct ef4_nic *efx, unsigned int *tx_usecs,
+			    unsigned int *rx_usecs, bool *rx_adaptive)
+{
+	*rx_adaptive = efx->irq_rx_adaptive;
+	*rx_usecs = efx->irq_rx_moderation_us;
+
+	/* If channels are shared between RX and TX, so is IRQ
+	 * moderation.  Otherwise, IRQ moderation is the same for all
+	 * TX channels and is not adaptive.
+	 */
+	if (efx->tx_channel_offset == 0) {
+		*tx_usecs = *rx_usecs;
+	} else {
+		struct ef4_channel *tx_channel;
+
+		tx_channel = efx->channel[efx->tx_channel_offset];
+		*tx_usecs = tx_channel->irq_moderation_us;
+	}
+}
+
+/**************************************************************************
+ *
+ * Hardware monitor
+ *
+ **************************************************************************/
+
+/* Run periodically off the general workqueue */
+static void ef4_monitor(struct work_struct *data)
+{
+	struct ef4_nic *efx = container_of(data, struct ef4_nic,
+					   monitor_work.work);
+
+	netif_vdbg(efx, timer, efx->net_dev,
+		   "hardware monitor executing on CPU %d\n",
+		   raw_smp_processor_id());
+	BUG_ON(efx->type->monitor == NULL);
+
+	/* If the mac_lock is already held then it is likely a port
+	 * reconfiguration is already in place, which will likely do
+	 * most of the work of monitor() anyway. */
+	if (mutex_trylock(&efx->mac_lock)) {
+		if (efx->port_enabled)
+			efx->type->monitor(efx);
+		mutex_unlock(&efx->mac_lock);
+	}
+
+	queue_delayed_work(efx->workqueue, &efx->monitor_work,
+			   ef4_monitor_interval);
+}
+
+/**************************************************************************
+ *
+ * ioctls
+ *
+ *************************************************************************/
+
+/* Net device ioctl
+ * Context: process, rtnl_lock() held.
+ */
+static int ef4_ioctl(struct net_device *net_dev, struct ifreq *ifr, int cmd)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+	struct mii_ioctl_data *data = if_mii(ifr);
+
+	/* Convert phy_id from older PRTAD/DEVAD format */
+	if ((cmd == SIOCGMIIREG || cmd == SIOCSMIIREG) &&
+	    (data->phy_id & 0xfc00) == 0x0400)
+		data->phy_id ^= MDIO_PHY_ID_C45 | 0x0400;
+
+	return mdio_mii_ioctl(&efx->mdio, data, cmd);
+}
+
+/**************************************************************************
+ *
+ * NAPI interface
+ *
+ **************************************************************************/
+
+static void ef4_init_napi_channel(struct ef4_channel *channel)
+{
+	struct ef4_nic *efx = channel->efx;
+
+	channel->napi_dev = efx->net_dev;
+	netif_napi_add(channel->napi_dev, &channel->napi_str,
+		       ef4_poll, napi_weight);
+	ef4_channel_busy_poll_init(channel);
+}
+
+static void ef4_init_napi(struct ef4_nic *efx)
+{
+	struct ef4_channel *channel;
+
+	ef4_for_each_channel(channel, efx)
+		ef4_init_napi_channel(channel);
+}
+
+static void ef4_fini_napi_channel(struct ef4_channel *channel)
+{
+	if (channel->napi_dev)
+		netif_napi_del(&channel->napi_str);
+
+	channel->napi_dev = NULL;
+}
+
+static void ef4_fini_napi(struct ef4_nic *efx)
+{
+	struct ef4_channel *channel;
+
+	ef4_for_each_channel(channel, efx)
+		ef4_fini_napi_channel(channel);
+}
+
+/**************************************************************************
+ *
+ * Kernel netpoll interface
+ *
+ *************************************************************************/
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+
+/* Although in the common case interrupts will be disabled, this is not
+ * guaranteed. However, all our work happens inside the NAPI callback,
+ * so no locking is required.
+ */
+static void ef4_netpoll(struct net_device *net_dev)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+	struct ef4_channel *channel;
+
+	ef4_for_each_channel(channel, efx)
+		ef4_schedule_channel(channel);
+}
+
+#endif
+
+#ifdef CONFIG_NET_RX_BUSY_POLL
+static int ef4_busy_poll(struct napi_struct *napi)
+{
+	struct ef4_channel *channel =
+		container_of(napi, struct ef4_channel, napi_str);
+	struct ef4_nic *efx = channel->efx;
+	int budget = 4;
+	int old_rx_packets, rx_packets;
+
+	if (!netif_running(efx->net_dev))
+		return LL_FLUSH_FAILED;
+
+	if (!ef4_channel_try_lock_poll(channel))
+		return LL_FLUSH_BUSY;
+
+	old_rx_packets = channel->rx_queue.rx_packets;
+	ef4_process_channel(channel, budget);
+
+	rx_packets = channel->rx_queue.rx_packets - old_rx_packets;
+
+	/* There is no race condition with NAPI here.
+	 * NAPI will automatically be rescheduled if it yielded during busy
+	 * polling, because it was not able to take the lock and thus returned
+	 * the full budget.
+	 */
+	ef4_channel_unlock_poll(channel);
+
+	return rx_packets;
+}
+#endif
+
+/**************************************************************************
+ *
+ * Kernel net device interface
+ *
+ *************************************************************************/
+
+/* Context: process, rtnl_lock() held. */
+int ef4_net_open(struct net_device *net_dev)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+	int rc;
+
+	netif_dbg(efx, ifup, efx->net_dev, "opening device on CPU %d\n",
+		  raw_smp_processor_id());
+
+	rc = ef4_check_disabled(efx);
+	if (rc)
+		return rc;
+	if (efx->phy_mode & PHY_MODE_SPECIAL)
+		return -EBUSY;
+
+	/* Notify the kernel of the link state polled during driver load,
+	 * before the monitor starts running */
+	ef4_link_status_changed(efx);
+
+	ef4_start_all(efx);
+	ef4_selftest_async_start(efx);
+	return 0;
+}
+
+/* Context: process, rtnl_lock() held.
+ * Note that the kernel will ignore our return code; this method
+ * should really be a void.
+ */
+int ef4_net_stop(struct net_device *net_dev)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+
+	netif_dbg(efx, ifdown, efx->net_dev, "closing on CPU %d\n",
+		  raw_smp_processor_id());
+
+	/* Stop the device and flush all the channels */
+	ef4_stop_all(efx);
+
+	return 0;
+}
+
+/* Context: process, dev_base_lock or RTNL held, non-blocking. */
+static struct rtnl_link_stats64 *ef4_net_stats(struct net_device *net_dev,
+					       struct rtnl_link_stats64 *stats)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+
+	spin_lock_bh(&efx->stats_lock);
+	efx->type->update_stats(efx, NULL, stats);
+	spin_unlock_bh(&efx->stats_lock);
+
+	return stats;
+}
+
+/* Context: netif_tx_lock held, BHs disabled. */
+static void ef4_watchdog(struct net_device *net_dev)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+
+	netif_err(efx, tx_err, efx->net_dev,
+		  "TX stuck with port_enabled=%d: resetting channels\n",
+		  efx->port_enabled);
+
+	ef4_schedule_reset(efx, RESET_TYPE_TX_WATCHDOG);
+}
+
+
+/* Context: process, rtnl_lock() held. */
+static int ef4_change_mtu(struct net_device *net_dev, int new_mtu)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+	int rc;
+
+	rc = ef4_check_disabled(efx);
+	if (rc)
+		return rc;
+
+	netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu);
+
+	ef4_device_detach_sync(efx);
+	ef4_stop_all(efx);
+
+	mutex_lock(&efx->mac_lock);
+	net_dev->mtu = new_mtu;
+	ef4_mac_reconfigure(efx);
+	mutex_unlock(&efx->mac_lock);
+
+	ef4_start_all(efx);
+	netif_device_attach(efx->net_dev);
+	return 0;
+}
+
+static int ef4_set_mac_address(struct net_device *net_dev, void *data)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+	struct sockaddr *addr = data;
+	u8 *new_addr = addr->sa_data;
+	u8 old_addr[6];
+	int rc;
+
+	if (!is_valid_ether_addr(new_addr)) {
+		netif_err(efx, drv, efx->net_dev,
+			  "invalid ethernet MAC address requested: %pM\n",
+			  new_addr);
+		return -EADDRNOTAVAIL;
+	}
+
+	/* save old address */
+	ether_addr_copy(old_addr, net_dev->dev_addr);
+	ether_addr_copy(net_dev->dev_addr, new_addr);
+	if (efx->type->set_mac_address) {
+		rc = efx->type->set_mac_address(efx);
+		if (rc) {
+			ether_addr_copy(net_dev->dev_addr, old_addr);
+			return rc;
+		}
+	}
+
+	/* Reconfigure the MAC */
+	mutex_lock(&efx->mac_lock);
+	ef4_mac_reconfigure(efx);
+	mutex_unlock(&efx->mac_lock);
+
+	return 0;
+}
+
+/* Context: netif_addr_lock held, BHs disabled. */
+static void ef4_set_rx_mode(struct net_device *net_dev)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+
+	if (efx->port_enabled)
+		queue_work(efx->workqueue, &efx->mac_work);
+	/* Otherwise ef4_start_port() will do this */
+}
+
+static int ef4_set_features(struct net_device *net_dev, netdev_features_t data)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+	int rc;
+
+	/* If disabling RX n-tuple filtering, clear existing filters */
+	if (net_dev->features & ~data & NETIF_F_NTUPLE) {
+		rc = efx->type->filter_clear_rx(efx, EF4_FILTER_PRI_MANUAL);
+		if (rc)
+			return rc;
+	}
+
+	/* If Rx VLAN filter is changed, update filters via mac_reconfigure */
+	if ((net_dev->features ^ data) & NETIF_F_HW_VLAN_CTAG_FILTER) {
+		/* ef4_set_rx_mode() will schedule MAC work to update filters
+		 * when a new features are finally set in net_dev.
+		 */
+		ef4_set_rx_mode(net_dev);
+	}
+
+	return 0;
+}
+
+static const struct net_device_ops ef4_netdev_ops = {
+	.ndo_open		= ef4_net_open,
+	.ndo_stop		= ef4_net_stop,
+	.ndo_get_stats64	= ef4_net_stats,
+	.ndo_tx_timeout		= ef4_watchdog,
+	.ndo_start_xmit		= ef4_hard_start_xmit,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_do_ioctl		= ef4_ioctl,
+	.ndo_change_mtu		= ef4_change_mtu,
+	.ndo_set_mac_address	= ef4_set_mac_address,
+	.ndo_set_rx_mode	= ef4_set_rx_mode,
+	.ndo_set_features	= ef4_set_features,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+	.ndo_poll_controller = ef4_netpoll,
+#endif
+	.ndo_setup_tc		= ef4_setup_tc,
+#ifdef CONFIG_NET_RX_BUSY_POLL
+	.ndo_busy_poll		= ef4_busy_poll,
+#endif
+#ifdef CONFIG_RFS_ACCEL
+	.ndo_rx_flow_steer	= ef4_filter_rfs,
+#endif
+};
+
+static void ef4_update_name(struct ef4_nic *efx)
+{
+	strcpy(efx->name, efx->net_dev->name);
+	ef4_mtd_rename(efx);
+	ef4_set_channel_names(efx);
+}
+
+static int ef4_netdev_event(struct notifier_block *this,
+			    unsigned long event, void *ptr)
+{
+	struct net_device *net_dev = netdev_notifier_info_to_dev(ptr);
+
+	if ((net_dev->netdev_ops == &ef4_netdev_ops) &&
+	    event == NETDEV_CHANGENAME)
+		ef4_update_name(netdev_priv(net_dev));
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block ef4_netdev_notifier = {
+	.notifier_call = ef4_netdev_event,
+};
+
+static ssize_t
+show_phy_type(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct ef4_nic *efx = pci_get_drvdata(to_pci_dev(dev));
+	return sprintf(buf, "%d\n", efx->phy_type);
+}
+static DEVICE_ATTR(phy_type, 0444, show_phy_type, NULL);
+
+static int ef4_register_netdev(struct ef4_nic *efx)
+{
+	struct net_device *net_dev = efx->net_dev;
+	struct ef4_channel *channel;
+	int rc;
+
+	net_dev->watchdog_timeo = 5 * HZ;
+	net_dev->irq = efx->pci_dev->irq;
+	net_dev->netdev_ops = &ef4_netdev_ops;
+	net_dev->ethtool_ops = &ef4_ethtool_ops;
+	net_dev->gso_max_segs = EF4_TSO_MAX_SEGS;
+	net_dev->min_mtu = EF4_MIN_MTU;
+	net_dev->max_mtu = EF4_MAX_MTU;
+
+	rtnl_lock();
+
+	/* Enable resets to be scheduled and check whether any were
+	 * already requested.  If so, the NIC is probably hosed so we
+	 * abort.
+	 */
+	efx->state = STATE_READY;
+	smp_mb(); /* ensure we change state before checking reset_pending */
+	if (efx->reset_pending) {
+		netif_err(efx, probe, efx->net_dev,
+			  "aborting probe due to scheduled reset\n");
+		rc = -EIO;
+		goto fail_locked;
+	}
+
+	rc = dev_alloc_name(net_dev, net_dev->name);
+	if (rc < 0)
+		goto fail_locked;
+	ef4_update_name(efx);
+
+	/* Always start with carrier off; PHY events will detect the link */
+	netif_carrier_off(net_dev);
+
+	rc = register_netdevice(net_dev);
+	if (rc)
+		goto fail_locked;
+
+	ef4_for_each_channel(channel, efx) {
+		struct ef4_tx_queue *tx_queue;
+		ef4_for_each_channel_tx_queue(tx_queue, channel)
+			ef4_init_tx_queue_core_txq(tx_queue);
+	}
+
+	ef4_associate(efx);
+
+	rtnl_unlock();
+
+	rc = device_create_file(&efx->pci_dev->dev, &dev_attr_phy_type);
+	if (rc) {
+		netif_err(efx, drv, efx->net_dev,
+			  "failed to init net dev attributes\n");
+		goto fail_registered;
+	}
+	return 0;
+
+fail_registered:
+	rtnl_lock();
+	ef4_dissociate(efx);
+	unregister_netdevice(net_dev);
+fail_locked:
+	efx->state = STATE_UNINIT;
+	rtnl_unlock();
+	netif_err(efx, drv, efx->net_dev, "could not register net dev\n");
+	return rc;
+}
+
+static void ef4_unregister_netdev(struct ef4_nic *efx)
+{
+	if (!efx->net_dev)
+		return;
+
+	BUG_ON(netdev_priv(efx->net_dev) != efx);
+
+	if (ef4_dev_registered(efx)) {
+		strlcpy(efx->name, pci_name(efx->pci_dev), sizeof(efx->name));
+		device_remove_file(&efx->pci_dev->dev, &dev_attr_phy_type);
+		unregister_netdev(efx->net_dev);
+	}
+}
+
+/**************************************************************************
+ *
+ * Device reset and suspend
+ *
+ **************************************************************************/
+
+/* Tears down the entire software state and most of the hardware state
+ * before reset.  */
+void ef4_reset_down(struct ef4_nic *efx, enum reset_type method)
+{
+	EF4_ASSERT_RESET_SERIALISED(efx);
+
+	ef4_stop_all(efx);
+	ef4_disable_interrupts(efx);
+
+	mutex_lock(&efx->mac_lock);
+	if (efx->port_initialized && method != RESET_TYPE_INVISIBLE &&
+	    method != RESET_TYPE_DATAPATH)
+		efx->phy_op->fini(efx);
+	efx->type->fini(efx);
+}
+
+/* This function will always ensure that the locks acquired in
+ * ef4_reset_down() are released. A failure return code indicates
+ * that we were unable to reinitialise the hardware, and the
+ * driver should be disabled. If ok is false, then the rx and tx
+ * engines are not restarted, pending a RESET_DISABLE. */
+int ef4_reset_up(struct ef4_nic *efx, enum reset_type method, bool ok)
+{
+	int rc;
+
+	EF4_ASSERT_RESET_SERIALISED(efx);
+
+	/* Ensure that SRAM is initialised even if we're disabling the device */
+	rc = efx->type->init(efx);
+	if (rc) {
+		netif_err(efx, drv, efx->net_dev, "failed to initialise NIC\n");
+		goto fail;
+	}
+
+	if (!ok)
+		goto fail;
+
+	if (efx->port_initialized && method != RESET_TYPE_INVISIBLE &&
+	    method != RESET_TYPE_DATAPATH) {
+		rc = efx->phy_op->init(efx);
+		if (rc)
+			goto fail;
+		rc = efx->phy_op->reconfigure(efx);
+		if (rc && rc != -EPERM)
+			netif_err(efx, drv, efx->net_dev,
+				  "could not restore PHY settings\n");
+	}
+
+	rc = ef4_enable_interrupts(efx);
+	if (rc)
+		goto fail;
+
+	down_read(&efx->filter_sem);
+	ef4_restore_filters(efx);
+	up_read(&efx->filter_sem);
+
+	mutex_unlock(&efx->mac_lock);
+
+	ef4_start_all(efx);
+
+	return 0;
+
+fail:
+	efx->port_initialized = false;
+
+	mutex_unlock(&efx->mac_lock);
+
+	return rc;
+}
+
+/* Reset the NIC using the specified method.  Note that the reset may
+ * fail, in which case the card will be left in an unusable state.
+ *
+ * Caller must hold the rtnl_lock.
+ */
+int ef4_reset(struct ef4_nic *efx, enum reset_type method)
+{
+	int rc, rc2;
+	bool disabled;
+
+	netif_info(efx, drv, efx->net_dev, "resetting (%s)\n",
+		   RESET_TYPE(method));
+
+	ef4_device_detach_sync(efx);
+	ef4_reset_down(efx, method);
+
+	rc = efx->type->reset(efx, method);
+	if (rc) {
+		netif_err(efx, drv, efx->net_dev, "failed to reset hardware\n");
+		goto out;
+	}
+
+	/* Clear flags for the scopes we covered.  We assume the NIC and
+	 * driver are now quiescent so that there is no race here.
+	 */
+	if (method < RESET_TYPE_MAX_METHOD)
+		efx->reset_pending &= -(1 << (method + 1));
+	else /* it doesn't fit into the well-ordered scope hierarchy */
+		__clear_bit(method, &efx->reset_pending);
+
+	/* Reinitialise bus-mastering, which may have been turned off before
+	 * the reset was scheduled. This is still appropriate, even in the
+	 * RESET_TYPE_DISABLE since this driver generally assumes the hardware
+	 * can respond to requests. */
+	pci_set_master(efx->pci_dev);
+
+out:
+	/* Leave device stopped if necessary */
+	disabled = rc ||
+		method == RESET_TYPE_DISABLE ||
+		method == RESET_TYPE_RECOVER_OR_DISABLE;
+	rc2 = ef4_reset_up(efx, method, !disabled);
+	if (rc2) {
+		disabled = true;
+		if (!rc)
+			rc = rc2;
+	}
+
+	if (disabled) {
+		dev_close(efx->net_dev);
+		netif_err(efx, drv, efx->net_dev, "has been disabled\n");
+		efx->state = STATE_DISABLED;
+	} else {
+		netif_dbg(efx, drv, efx->net_dev, "reset complete\n");
+		netif_device_attach(efx->net_dev);
+	}
+	return rc;
+}
+
+/* Try recovery mechanisms.
+ * For now only EEH is supported.
+ * Returns 0 if the recovery mechanisms are unsuccessful.
+ * Returns a non-zero value otherwise.
+ */
+int ef4_try_recovery(struct ef4_nic *efx)
+{
+#ifdef CONFIG_EEH
+	/* A PCI error can occur and not be seen by EEH because nothing
+	 * happens on the PCI bus. In this case the driver may fail and
+	 * schedule a 'recover or reset', leading to this recovery handler.
+	 * Manually call the eeh failure check function.
+	 */
+	struct eeh_dev *eehdev = pci_dev_to_eeh_dev(efx->pci_dev);
+	if (eeh_dev_check_failure(eehdev)) {
+		/* The EEH mechanisms will handle the error and reset the
+		 * device if necessary.
+		 */
+		return 1;
+	}
+#endif
+	return 0;
+}
+
+/* The worker thread exists so that code that cannot sleep can
+ * schedule a reset for later.
+ */
+static void ef4_reset_work(struct work_struct *data)
+{
+	struct ef4_nic *efx = container_of(data, struct ef4_nic, reset_work);
+	unsigned long pending;
+	enum reset_type method;
+
+	pending = ACCESS_ONCE(efx->reset_pending);
+	method = fls(pending) - 1;
+
+	if ((method == RESET_TYPE_RECOVER_OR_DISABLE ||
+	     method == RESET_TYPE_RECOVER_OR_ALL) &&
+	    ef4_try_recovery(efx))
+		return;
+
+	if (!pending)
+		return;
+
+	rtnl_lock();
+
+	/* We checked the state in ef4_schedule_reset() but it may
+	 * have changed by now.  Now that we have the RTNL lock,
+	 * it cannot change again.
+	 */
+	if (efx->state == STATE_READY)
+		(void)ef4_reset(efx, method);
+
+	rtnl_unlock();
+}
+
+void ef4_schedule_reset(struct ef4_nic *efx, enum reset_type type)
+{
+	enum reset_type method;
+
+	if (efx->state == STATE_RECOVERY) {
+		netif_dbg(efx, drv, efx->net_dev,
+			  "recovering: skip scheduling %s reset\n",
+			  RESET_TYPE(type));
+		return;
+	}
+
+	switch (type) {
+	case RESET_TYPE_INVISIBLE:
+	case RESET_TYPE_ALL:
+	case RESET_TYPE_RECOVER_OR_ALL:
+	case RESET_TYPE_WORLD:
+	case RESET_TYPE_DISABLE:
+	case RESET_TYPE_RECOVER_OR_DISABLE:
+	case RESET_TYPE_DATAPATH:
+		method = type;
+		netif_dbg(efx, drv, efx->net_dev, "scheduling %s reset\n",
+			  RESET_TYPE(method));
+		break;
+	default:
+		method = efx->type->map_reset_reason(type);
+		netif_dbg(efx, drv, efx->net_dev,
+			  "scheduling %s reset for %s\n",
+			  RESET_TYPE(method), RESET_TYPE(type));
+		break;
+	}
+
+	set_bit(method, &efx->reset_pending);
+	smp_mb(); /* ensure we change reset_pending before checking state */
+
+	/* If we're not READY then just leave the flags set as the cue
+	 * to abort probing or reschedule the reset later.
+	 */
+	if (ACCESS_ONCE(efx->state) != STATE_READY)
+		return;
+
+	queue_work(reset_workqueue, &efx->reset_work);
+}
+
+/**************************************************************************
+ *
+ * List of NICs we support
+ *
+ **************************************************************************/
+
+/* PCI device ID table */
+static const struct pci_device_id ef4_pci_table[] = {
+	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE,
+		    PCI_DEVICE_ID_SOLARFLARE_SFC4000A_0),
+	 .driver_data = (unsigned long) &falcon_a1_nic_type},
+	{PCI_DEVICE(PCI_VENDOR_ID_SOLARFLARE,
+		    PCI_DEVICE_ID_SOLARFLARE_SFC4000B),
+	 .driver_data = (unsigned long) &falcon_b0_nic_type},
+	{0}			/* end of list */
+};
+
+/**************************************************************************
+ *
+ * Dummy PHY/MAC operations
+ *
+ * Can be used for some unimplemented operations
+ * Needed so all function pointers are valid and do not have to be tested
+ * before use
+ *
+ **************************************************************************/
+int ef4_port_dummy_op_int(struct ef4_nic *efx)
+{
+	return 0;
+}
+void ef4_port_dummy_op_void(struct ef4_nic *efx) {}
+
+static bool ef4_port_dummy_op_poll(struct ef4_nic *efx)
+{
+	return false;
+}
+
+static const struct ef4_phy_operations ef4_dummy_phy_operations = {
+	.init		 = ef4_port_dummy_op_int,
+	.reconfigure	 = ef4_port_dummy_op_int,
+	.poll		 = ef4_port_dummy_op_poll,
+	.fini		 = ef4_port_dummy_op_void,
+};
+
+/**************************************************************************
+ *
+ * Data housekeeping
+ *
+ **************************************************************************/
+
+/* This zeroes out and then fills in the invariants in a struct
+ * ef4_nic (including all sub-structures).
+ */
+static int ef4_init_struct(struct ef4_nic *efx,
+			   struct pci_dev *pci_dev, struct net_device *net_dev)
+{
+	int i;
+
+	/* Initialise common structures */
+	INIT_LIST_HEAD(&efx->node);
+	INIT_LIST_HEAD(&efx->secondary_list);
+	spin_lock_init(&efx->biu_lock);
+#ifdef CONFIG_SFC_FALCON_MTD
+	INIT_LIST_HEAD(&efx->mtd_list);
+#endif
+	INIT_WORK(&efx->reset_work, ef4_reset_work);
+	INIT_DELAYED_WORK(&efx->monitor_work, ef4_monitor);
+	INIT_DELAYED_WORK(&efx->selftest_work, ef4_selftest_async_work);
+	efx->pci_dev = pci_dev;
+	efx->msg_enable = debug;
+	efx->state = STATE_UNINIT;
+	strlcpy(efx->name, pci_name(pci_dev), sizeof(efx->name));
+
+	efx->net_dev = net_dev;
+	efx->rx_prefix_size = efx->type->rx_prefix_size;
+	efx->rx_ip_align =
+		NET_IP_ALIGN ? (efx->rx_prefix_size + NET_IP_ALIGN) % 4 : 0;
+	efx->rx_packet_hash_offset =
+		efx->type->rx_hash_offset - efx->type->rx_prefix_size;
+	efx->rx_packet_ts_offset =
+		efx->type->rx_ts_offset - efx->type->rx_prefix_size;
+	spin_lock_init(&efx->stats_lock);
+	mutex_init(&efx->mac_lock);
+	efx->phy_op = &ef4_dummy_phy_operations;
+	efx->mdio.dev = net_dev;
+	INIT_WORK(&efx->mac_work, ef4_mac_work);
+	init_waitqueue_head(&efx->flush_wq);
+
+	for (i = 0; i < EF4_MAX_CHANNELS; i++) {
+		efx->channel[i] = ef4_alloc_channel(efx, i, NULL);
+		if (!efx->channel[i])
+			goto fail;
+		efx->msi_context[i].efx = efx;
+		efx->msi_context[i].index = i;
+	}
+
+	/* Higher numbered interrupt modes are less capable! */
+	efx->interrupt_mode = max(efx->type->max_interrupt_mode,
+				  interrupt_mode);
+
+	/* Would be good to use the net_dev name, but we're too early */
+	snprintf(efx->workqueue_name, sizeof(efx->workqueue_name), "sfc%s",
+		 pci_name(pci_dev));
+	efx->workqueue = create_singlethread_workqueue(efx->workqueue_name);
+	if (!efx->workqueue)
+		goto fail;
+
+	return 0;
+
+fail:
+	ef4_fini_struct(efx);
+	return -ENOMEM;
+}
+
+static void ef4_fini_struct(struct ef4_nic *efx)
+{
+	int i;
+
+	for (i = 0; i < EF4_MAX_CHANNELS; i++)
+		kfree(efx->channel[i]);
+
+	kfree(efx->vpd_sn);
+
+	if (efx->workqueue) {
+		destroy_workqueue(efx->workqueue);
+		efx->workqueue = NULL;
+	}
+}
+
+void ef4_update_sw_stats(struct ef4_nic *efx, u64 *stats)
+{
+	u64 n_rx_nodesc_trunc = 0;
+	struct ef4_channel *channel;
+
+	ef4_for_each_channel(channel, efx)
+		n_rx_nodesc_trunc += channel->n_rx_nodesc_trunc;
+	stats[GENERIC_STAT_rx_nodesc_trunc] = n_rx_nodesc_trunc;
+	stats[GENERIC_STAT_rx_noskb_drops] = atomic_read(&efx->n_rx_noskb_drops);
+}
+
+/**************************************************************************
+ *
+ * PCI interface
+ *
+ **************************************************************************/
+
+/* Main body of final NIC shutdown code
+ * This is called only at module unload (or hotplug removal).
+ */
+static void ef4_pci_remove_main(struct ef4_nic *efx)
+{
+	/* Flush reset_work. It can no longer be scheduled since we
+	 * are not READY.
+	 */
+	BUG_ON(efx->state == STATE_READY);
+	cancel_work_sync(&efx->reset_work);
+
+	ef4_disable_interrupts(efx);
+	ef4_nic_fini_interrupt(efx);
+	ef4_fini_port(efx);
+	efx->type->fini(efx);
+	ef4_fini_napi(efx);
+	ef4_remove_all(efx);
+}
+
+/* Final NIC shutdown
+ * This is called only at module unload (or hotplug removal).  A PF can call
+ * this on its VFs to ensure they are unbound first.
+ */
+static void ef4_pci_remove(struct pci_dev *pci_dev)
+{
+	struct ef4_nic *efx;
+
+	efx = pci_get_drvdata(pci_dev);
+	if (!efx)
+		return;
+
+	/* Mark the NIC as fini, then stop the interface */
+	rtnl_lock();
+	ef4_dissociate(efx);
+	dev_close(efx->net_dev);
+	ef4_disable_interrupts(efx);
+	efx->state = STATE_UNINIT;
+	rtnl_unlock();
+
+	ef4_unregister_netdev(efx);
+
+	ef4_mtd_remove(efx);
+
+	ef4_pci_remove_main(efx);
+
+	ef4_fini_io(efx);
+	netif_dbg(efx, drv, efx->net_dev, "shutdown successful\n");
+
+	ef4_fini_struct(efx);
+	free_netdev(efx->net_dev);
+
+	pci_disable_pcie_error_reporting(pci_dev);
+};
+
+/* NIC VPD information
+ * Called during probe to display the part number of the
+ * installed NIC.  VPD is potentially very large but this should
+ * always appear within the first 512 bytes.
+ */
+#define SFC_VPD_LEN 512
+static void ef4_probe_vpd_strings(struct ef4_nic *efx)
+{
+	struct pci_dev *dev = efx->pci_dev;
+	char vpd_data[SFC_VPD_LEN];
+	ssize_t vpd_size;
+	int ro_start, ro_size, i, j;
+
+	/* Get the vpd data from the device */
+	vpd_size = pci_read_vpd(dev, 0, sizeof(vpd_data), vpd_data);
+	if (vpd_size <= 0) {
+		netif_err(efx, drv, efx->net_dev, "Unable to read VPD\n");
+		return;
+	}
+
+	/* Get the Read only section */
+	ro_start = pci_vpd_find_tag(vpd_data, 0, vpd_size, PCI_VPD_LRDT_RO_DATA);
+	if (ro_start < 0) {
+		netif_err(efx, drv, efx->net_dev, "VPD Read-only not found\n");
+		return;
+	}
+
+	ro_size = pci_vpd_lrdt_size(&vpd_data[ro_start]);
+	j = ro_size;
+	i = ro_start + PCI_VPD_LRDT_TAG_SIZE;
+	if (i + j > vpd_size)
+		j = vpd_size - i;
+
+	/* Get the Part number */
+	i = pci_vpd_find_info_keyword(vpd_data, i, j, "PN");
+	if (i < 0) {
+		netif_err(efx, drv, efx->net_dev, "Part number not found\n");
+		return;
+	}
+
+	j = pci_vpd_info_field_size(&vpd_data[i]);
+	i += PCI_VPD_INFO_FLD_HDR_SIZE;
+	if (i + j > vpd_size) {
+		netif_err(efx, drv, efx->net_dev, "Incomplete part number\n");
+		return;
+	}
+
+	netif_info(efx, drv, efx->net_dev,
+		   "Part Number : %.*s\n", j, &vpd_data[i]);
+
+	i = ro_start + PCI_VPD_LRDT_TAG_SIZE;
+	j = ro_size;
+	i = pci_vpd_find_info_keyword(vpd_data, i, j, "SN");
+	if (i < 0) {
+		netif_err(efx, drv, efx->net_dev, "Serial number not found\n");
+		return;
+	}
+
+	j = pci_vpd_info_field_size(&vpd_data[i]);
+	i += PCI_VPD_INFO_FLD_HDR_SIZE;
+	if (i + j > vpd_size) {
+		netif_err(efx, drv, efx->net_dev, "Incomplete serial number\n");
+		return;
+	}
+
+	efx->vpd_sn = kmalloc(j + 1, GFP_KERNEL);
+	if (!efx->vpd_sn)
+		return;
+
+	snprintf(efx->vpd_sn, j + 1, "%s", &vpd_data[i]);
+}
+
+
+/* Main body of NIC initialisation
+ * This is called at module load (or hotplug insertion, theoretically).
+ */
+static int ef4_pci_probe_main(struct ef4_nic *efx)
+{
+	int rc;
+
+	/* Do start-of-day initialisation */
+	rc = ef4_probe_all(efx);
+	if (rc)
+		goto fail1;
+
+	ef4_init_napi(efx);
+
+	rc = efx->type->init(efx);
+	if (rc) {
+		netif_err(efx, probe, efx->net_dev,
+			  "failed to initialise NIC\n");
+		goto fail3;
+	}
+
+	rc = ef4_init_port(efx);
+	if (rc) {
+		netif_err(efx, probe, efx->net_dev,
+			  "failed to initialise port\n");
+		goto fail4;
+	}
+
+	rc = ef4_nic_init_interrupt(efx);
+	if (rc)
+		goto fail5;
+	rc = ef4_enable_interrupts(efx);
+	if (rc)
+		goto fail6;
+
+	return 0;
+
+ fail6:
+	ef4_nic_fini_interrupt(efx);
+ fail5:
+	ef4_fini_port(efx);
+ fail4:
+	efx->type->fini(efx);
+ fail3:
+	ef4_fini_napi(efx);
+	ef4_remove_all(efx);
+ fail1:
+	return rc;
+}
+
+/* NIC initialisation
+ *
+ * This is called at module load (or hotplug insertion,
+ * theoretically).  It sets up PCI mappings, resets the NIC,
+ * sets up and registers the network devices with the kernel and hooks
+ * the interrupt service routine.  It does not prepare the device for
+ * transmission; this is left to the first time one of the network
+ * interfaces is brought up (i.e. ef4_net_open).
+ */
+static int ef4_pci_probe(struct pci_dev *pci_dev,
+			 const struct pci_device_id *entry)
+{
+	struct net_device *net_dev;
+	struct ef4_nic *efx;
+	int rc;
+
+	/* Allocate and initialise a struct net_device and struct ef4_nic */
+	net_dev = alloc_etherdev_mqs(sizeof(*efx), EF4_MAX_CORE_TX_QUEUES,
+				     EF4_MAX_RX_QUEUES);
+	if (!net_dev)
+		return -ENOMEM;
+	efx = netdev_priv(net_dev);
+	efx->type = (const struct ef4_nic_type *) entry->driver_data;
+	efx->fixed_features |= NETIF_F_HIGHDMA;
+
+	pci_set_drvdata(pci_dev, efx);
+	SET_NETDEV_DEV(net_dev, &pci_dev->dev);
+	rc = ef4_init_struct(efx, pci_dev, net_dev);
+	if (rc)
+		goto fail1;
+
+	netif_info(efx, probe, efx->net_dev,
+		   "Solarflare NIC detected\n");
+
+	ef4_probe_vpd_strings(efx);
+
+	/* Set up basic I/O (BAR mappings etc) */
+	rc = ef4_init_io(efx);
+	if (rc)
+		goto fail2;
+
+	rc = ef4_pci_probe_main(efx);
+	if (rc)
+		goto fail3;
+
+	net_dev->features |= (efx->type->offload_features | NETIF_F_SG |
+			      NETIF_F_RXCSUM);
+	/* Mask for features that also apply to VLAN devices */
+	net_dev->vlan_features |= (NETIF_F_HW_CSUM | NETIF_F_SG |
+				   NETIF_F_HIGHDMA | NETIF_F_RXCSUM);
+
+	net_dev->hw_features = net_dev->features & ~efx->fixed_features;
+
+	/* Disable VLAN filtering by default.  It may be enforced if
+	 * the feature is fixed (i.e. VLAN filters are required to
+	 * receive VLAN tagged packets due to vPort restrictions).
+	 */
+	net_dev->features &= ~NETIF_F_HW_VLAN_CTAG_FILTER;
+	net_dev->features |= efx->fixed_features;
+
+	rc = ef4_register_netdev(efx);
+	if (rc)
+		goto fail4;
+
+	netif_dbg(efx, probe, efx->net_dev, "initialisation successful\n");
+
+	/* Try to create MTDs, but allow this to fail */
+	rtnl_lock();
+	rc = ef4_mtd_probe(efx);
+	rtnl_unlock();
+	if (rc && rc != -EPERM)
+		netif_warn(efx, probe, efx->net_dev,
+			   "failed to create MTDs (%d)\n", rc);
+
+	rc = pci_enable_pcie_error_reporting(pci_dev);
+	if (rc && rc != -EINVAL)
+		netif_notice(efx, probe, efx->net_dev,
+			     "PCIE error reporting unavailable (%d).\n",
+			     rc);
+
+	return 0;
+
+ fail4:
+	ef4_pci_remove_main(efx);
+ fail3:
+	ef4_fini_io(efx);
+ fail2:
+	ef4_fini_struct(efx);
+ fail1:
+	WARN_ON(rc > 0);
+	netif_dbg(efx, drv, efx->net_dev, "initialisation failed. rc=%d\n", rc);
+	free_netdev(net_dev);
+	return rc;
+}
+
+static int ef4_pm_freeze(struct device *dev)
+{
+	struct ef4_nic *efx = pci_get_drvdata(to_pci_dev(dev));
+
+	rtnl_lock();
+
+	if (efx->state != STATE_DISABLED) {
+		efx->state = STATE_UNINIT;
+
+		ef4_device_detach_sync(efx);
+
+		ef4_stop_all(efx);
+		ef4_disable_interrupts(efx);
+	}
+
+	rtnl_unlock();
+
+	return 0;
+}
+
+static int ef4_pm_thaw(struct device *dev)
+{
+	int rc;
+	struct ef4_nic *efx = pci_get_drvdata(to_pci_dev(dev));
+
+	rtnl_lock();
+
+	if (efx->state != STATE_DISABLED) {
+		rc = ef4_enable_interrupts(efx);
+		if (rc)
+			goto fail;
+
+		mutex_lock(&efx->mac_lock);
+		efx->phy_op->reconfigure(efx);
+		mutex_unlock(&efx->mac_lock);
+
+		ef4_start_all(efx);
+
+		netif_device_attach(efx->net_dev);
+
+		efx->state = STATE_READY;
+
+		efx->type->resume_wol(efx);
+	}
+
+	rtnl_unlock();
+
+	/* Reschedule any quenched resets scheduled during ef4_pm_freeze() */
+	queue_work(reset_workqueue, &efx->reset_work);
+
+	return 0;
+
+fail:
+	rtnl_unlock();
+
+	return rc;
+}
+
+static int ef4_pm_poweroff(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct ef4_nic *efx = pci_get_drvdata(pci_dev);
+
+	efx->type->fini(efx);
+
+	efx->reset_pending = 0;
+
+	pci_save_state(pci_dev);
+	return pci_set_power_state(pci_dev, PCI_D3hot);
+}
+
+/* Used for both resume and restore */
+static int ef4_pm_resume(struct device *dev)
+{
+	struct pci_dev *pci_dev = to_pci_dev(dev);
+	struct ef4_nic *efx = pci_get_drvdata(pci_dev);
+	int rc;
+
+	rc = pci_set_power_state(pci_dev, PCI_D0);
+	if (rc)
+		return rc;
+	pci_restore_state(pci_dev);
+	rc = pci_enable_device(pci_dev);
+	if (rc)
+		return rc;
+	pci_set_master(efx->pci_dev);
+	rc = efx->type->reset(efx, RESET_TYPE_ALL);
+	if (rc)
+		return rc;
+	rc = efx->type->init(efx);
+	if (rc)
+		return rc;
+	rc = ef4_pm_thaw(dev);
+	return rc;
+}
+
+static int ef4_pm_suspend(struct device *dev)
+{
+	int rc;
+
+	ef4_pm_freeze(dev);
+	rc = ef4_pm_poweroff(dev);
+	if (rc)
+		ef4_pm_resume(dev);
+	return rc;
+}
+
+static const struct dev_pm_ops ef4_pm_ops = {
+	.suspend	= ef4_pm_suspend,
+	.resume		= ef4_pm_resume,
+	.freeze		= ef4_pm_freeze,
+	.thaw		= ef4_pm_thaw,
+	.poweroff	= ef4_pm_poweroff,
+	.restore	= ef4_pm_resume,
+};
+
+/* A PCI error affecting this device was detected.
+ * At this point MMIO and DMA may be disabled.
+ * Stop the software path and request a slot reset.
+ */
+static pci_ers_result_t ef4_io_error_detected(struct pci_dev *pdev,
+					      enum pci_channel_state state)
+{
+	pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
+	struct ef4_nic *efx = pci_get_drvdata(pdev);
+
+	if (state == pci_channel_io_perm_failure)
+		return PCI_ERS_RESULT_DISCONNECT;
+
+	rtnl_lock();
+
+	if (efx->state != STATE_DISABLED) {
+		efx->state = STATE_RECOVERY;
+		efx->reset_pending = 0;
+
+		ef4_device_detach_sync(efx);
+
+		ef4_stop_all(efx);
+		ef4_disable_interrupts(efx);
+
+		status = PCI_ERS_RESULT_NEED_RESET;
+	} else {
+		/* If the interface is disabled we don't want to do anything
+		 * with it.
+		 */
+		status = PCI_ERS_RESULT_RECOVERED;
+	}
+
+	rtnl_unlock();
+
+	pci_disable_device(pdev);
+
+	return status;
+}
+
+/* Fake a successful reset, which will be performed later in ef4_io_resume. */
+static pci_ers_result_t ef4_io_slot_reset(struct pci_dev *pdev)
+{
+	struct ef4_nic *efx = pci_get_drvdata(pdev);
+	pci_ers_result_t status = PCI_ERS_RESULT_RECOVERED;
+	int rc;
+
+	if (pci_enable_device(pdev)) {
+		netif_err(efx, hw, efx->net_dev,
+			  "Cannot re-enable PCI device after reset.\n");
+		status =  PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	rc = pci_cleanup_aer_uncorrect_error_status(pdev);
+	if (rc) {
+		netif_err(efx, hw, efx->net_dev,
+		"pci_cleanup_aer_uncorrect_error_status failed (%d)\n", rc);
+		/* Non-fatal error. Continue. */
+	}
+
+	return status;
+}
+
+/* Perform the actual reset and resume I/O operations. */
+static void ef4_io_resume(struct pci_dev *pdev)
+{
+	struct ef4_nic *efx = pci_get_drvdata(pdev);
+	int rc;
+
+	rtnl_lock();
+
+	if (efx->state == STATE_DISABLED)
+		goto out;
+
+	rc = ef4_reset(efx, RESET_TYPE_ALL);
+	if (rc) {
+		netif_err(efx, hw, efx->net_dev,
+			  "ef4_reset failed after PCI error (%d)\n", rc);
+	} else {
+		efx->state = STATE_READY;
+		netif_dbg(efx, hw, efx->net_dev,
+			  "Done resetting and resuming IO after PCI error.\n");
+	}
+
+out:
+	rtnl_unlock();
+}
+
+/* For simplicity and reliability, we always require a slot reset and try to
+ * reset the hardware when a pci error affecting the device is detected.
+ * We leave both the link_reset and mmio_enabled callback unimplemented:
+ * with our request for slot reset the mmio_enabled callback will never be
+ * called, and the link_reset callback is not used by AER or EEH mechanisms.
+ */
+static const struct pci_error_handlers ef4_err_handlers = {
+	.error_detected = ef4_io_error_detected,
+	.slot_reset	= ef4_io_slot_reset,
+	.resume		= ef4_io_resume,
+};
+
+static struct pci_driver ef4_pci_driver = {
+	.name		= KBUILD_MODNAME,
+	.id_table	= ef4_pci_table,
+	.probe		= ef4_pci_probe,
+	.remove		= ef4_pci_remove,
+	.driver.pm	= &ef4_pm_ops,
+	.err_handler	= &ef4_err_handlers,
+};
+
+/**************************************************************************
+ *
+ * Kernel module interface
+ *
+ *************************************************************************/
+
+module_param(interrupt_mode, uint, 0444);
+MODULE_PARM_DESC(interrupt_mode,
+		 "Interrupt mode (0=>MSIX 1=>MSI 2=>legacy)");
+
+static int __init ef4_init_module(void)
+{
+	int rc;
+
+	printk(KERN_INFO "Solarflare Falcon driver v" EF4_DRIVER_VERSION "\n");
+
+	rc = register_netdevice_notifier(&ef4_netdev_notifier);
+	if (rc)
+		goto err_notifier;
+
+	reset_workqueue = create_singlethread_workqueue("sfc_reset");
+	if (!reset_workqueue) {
+		rc = -ENOMEM;
+		goto err_reset;
+	}
+
+	rc = pci_register_driver(&ef4_pci_driver);
+	if (rc < 0)
+		goto err_pci;
+
+	return 0;
+
+ err_pci:
+	destroy_workqueue(reset_workqueue);
+ err_reset:
+	unregister_netdevice_notifier(&ef4_netdev_notifier);
+ err_notifier:
+	return rc;
+}
+
+static void __exit ef4_exit_module(void)
+{
+	printk(KERN_INFO "Solarflare Falcon driver unloading\n");
+
+	pci_unregister_driver(&ef4_pci_driver);
+	destroy_workqueue(reset_workqueue);
+	unregister_netdevice_notifier(&ef4_netdev_notifier);
+
+}
+
+module_init(ef4_init_module);
+module_exit(ef4_exit_module);
+
+MODULE_AUTHOR("Solarflare Communications and "
+	      "Michael Brown <mbrown@fensystems.co.uk>");
+MODULE_DESCRIPTION("Solarflare Falcon network driver");
+MODULE_LICENSE("GPL");
+MODULE_DEVICE_TABLE(pci, ef4_pci_table);
diff --git a/drivers/net/ethernet/sfc/falcon/efx.h b/drivers/net/ethernet/sfc/falcon/efx.h
new file mode 100644
index 000000000000..c89456fa148c
--- /dev/null
+++ b/drivers/net/ethernet/sfc/falcon/efx.h
@@ -0,0 +1,277 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2013 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EF4_EFX_H
+#define EF4_EFX_H
+
+#include "net_driver.h"
+#include "filter.h"
+
+/* All controllers use BAR 0 for I/O space and BAR 2(&3) for memory */
+/* All VFs use BAR 0/1 for memory */
+#define EF4_MEM_BAR 2
+#define EF4_MEM_VF_BAR 0
+
+int ef4_net_open(struct net_device *net_dev);
+int ef4_net_stop(struct net_device *net_dev);
+
+/* TX */
+int ef4_probe_tx_queue(struct ef4_tx_queue *tx_queue);
+void ef4_remove_tx_queue(struct ef4_tx_queue *tx_queue);
+void ef4_init_tx_queue(struct ef4_tx_queue *tx_queue);
+void ef4_init_tx_queue_core_txq(struct ef4_tx_queue *tx_queue);
+void ef4_fini_tx_queue(struct ef4_tx_queue *tx_queue);
+netdev_tx_t ef4_hard_start_xmit(struct sk_buff *skb,
+				struct net_device *net_dev);
+netdev_tx_t ef4_enqueue_skb(struct ef4_tx_queue *tx_queue, struct sk_buff *skb);
+void ef4_xmit_done(struct ef4_tx_queue *tx_queue, unsigned int index);
+int ef4_setup_tc(struct net_device *net_dev, u32 handle, __be16 proto,
+		 struct tc_to_netdev *tc);
+unsigned int ef4_tx_max_skb_descs(struct ef4_nic *efx);
+extern bool ef4_separate_tx_channels;
+
+/* RX */
+void ef4_set_default_rx_indir_table(struct ef4_nic *efx);
+void ef4_rx_config_page_split(struct ef4_nic *efx);
+int ef4_probe_rx_queue(struct ef4_rx_queue *rx_queue);
+void ef4_remove_rx_queue(struct ef4_rx_queue *rx_queue);
+void ef4_init_rx_queue(struct ef4_rx_queue *rx_queue);
+void ef4_fini_rx_queue(struct ef4_rx_queue *rx_queue);
+void ef4_fast_push_rx_descriptors(struct ef4_rx_queue *rx_queue, bool atomic);
+void ef4_rx_slow_fill(unsigned long context);
+void __ef4_rx_packet(struct ef4_channel *channel);
+void ef4_rx_packet(struct ef4_rx_queue *rx_queue, unsigned int index,
+		   unsigned int n_frags, unsigned int len, u16 flags);
+static inline void ef4_rx_flush_packet(struct ef4_channel *channel)
+{
+	if (channel->rx_pkt_n_frags)
+		__ef4_rx_packet(channel);
+}
+void ef4_schedule_slow_fill(struct ef4_rx_queue *rx_queue);
+
+#define EF4_MAX_DMAQ_SIZE 4096UL
+#define EF4_DEFAULT_DMAQ_SIZE 1024UL
+#define EF4_MIN_DMAQ_SIZE 512UL
+
+#define EF4_MAX_EVQ_SIZE 16384UL
+#define EF4_MIN_EVQ_SIZE 512UL
+
+/* Maximum number of TCP segments we support for soft-TSO */
+#define EF4_TSO_MAX_SEGS	100
+
+/* The smallest [rt]xq_entries that the driver supports.  RX minimum
+ * is a bit arbitrary.  For TX, we must have space for at least 2
+ * TSO skbs.
+ */
+#define EF4_RXQ_MIN_ENT		128U
+#define EF4_TXQ_MIN_ENT(efx)	(2 * ef4_tx_max_skb_descs(efx))
+
+static inline bool ef4_rss_enabled(struct ef4_nic *efx)
+{
+	return efx->rss_spread > 1;
+}
+
+/* Filters */
+
+void ef4_mac_reconfigure(struct ef4_nic *efx);
+
+/**
+ * ef4_filter_insert_filter - add or replace a filter
+ * @efx: NIC in which to insert the filter
+ * @spec: Specification for the filter
+ * @replace_equal: Flag for whether the specified filter may replace an
+ *	existing filter with equal priority
+ *
+ * On success, return the filter ID.
+ * On failure, return a negative error code.
+ *
+ * If existing filters have equal match values to the new filter spec,
+ * then the new filter might replace them or the function might fail,
+ * as follows.
+ *
+ * 1. If the existing filters have lower priority, or @replace_equal
+ *    is set and they have equal priority, replace them.
+ *
+ * 2. If the existing filters have higher priority, return -%EPERM.
+ *
+ * 3. If !ef4_filter_is_mc_recipient(@spec), or the NIC does not
+ *    support delivery to multiple recipients, return -%EEXIST.
+ *
+ * This implies that filters for multiple multicast recipients must
+ * all be inserted with the same priority and @replace_equal = %false.
+ */
+static inline s32 ef4_filter_insert_filter(struct ef4_nic *efx,
+					   struct ef4_filter_spec *spec,
+					   bool replace_equal)
+{
+	return efx->type->filter_insert(efx, spec, replace_equal);
+}
+
+/**
+ * ef4_filter_remove_id_safe - remove a filter by ID, carefully
+ * @efx: NIC from which to remove the filter
+ * @priority: Priority of filter, as passed to @ef4_filter_insert_filter
+ * @filter_id: ID of filter, as returned by @ef4_filter_insert_filter
+ *
+ * This function will range-check @filter_id, so it is safe to call
+ * with a value passed from userland.
+ */
+static inline int ef4_filter_remove_id_safe(struct ef4_nic *efx,
+					    enum ef4_filter_priority priority,
+					    u32 filter_id)
+{
+	return efx->type->filter_remove_safe(efx, priority, filter_id);
+}
+
+/**
+ * ef4_filter_get_filter_safe - retrieve a filter by ID, carefully
+ * @efx: NIC from which to remove the filter
+ * @priority: Priority of filter, as passed to @ef4_filter_insert_filter
+ * @filter_id: ID of filter, as returned by @ef4_filter_insert_filter
+ * @spec: Buffer in which to store filter specification
+ *
+ * This function will range-check @filter_id, so it is safe to call
+ * with a value passed from userland.
+ */
+static inline int
+ef4_filter_get_filter_safe(struct ef4_nic *efx,
+			   enum ef4_filter_priority priority,
+			   u32 filter_id, struct ef4_filter_spec *spec)
+{
+	return efx->type->filter_get_safe(efx, priority, filter_id, spec);
+}
+
+static inline u32 ef4_filter_count_rx_used(struct ef4_nic *efx,
+					   enum ef4_filter_priority priority)
+{
+	return efx->type->filter_count_rx_used(efx, priority);
+}
+static inline u32 ef4_filter_get_rx_id_limit(struct ef4_nic *efx)
+{
+	return efx->type->filter_get_rx_id_limit(efx);
+}
+static inline s32 ef4_filter_get_rx_ids(struct ef4_nic *efx,
+					enum ef4_filter_priority priority,
+					u32 *buf, u32 size)
+{
+	return efx->type->filter_get_rx_ids(efx, priority, buf, size);
+}
+#ifdef CONFIG_RFS_ACCEL
+int ef4_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
+		   u16 rxq_index, u32 flow_id);
+bool __ef4_filter_rfs_expire(struct ef4_nic *efx, unsigned quota);
+static inline void ef4_filter_rfs_expire(struct ef4_channel *channel)
+{
+	if (channel->rfs_filters_added >= 60 &&
+	    __ef4_filter_rfs_expire(channel->efx, 100))
+		channel->rfs_filters_added -= 60;
+}
+#define ef4_filter_rfs_enabled() 1
+#else
+static inline void ef4_filter_rfs_expire(struct ef4_channel *channel) {}
+#define ef4_filter_rfs_enabled() 0
+#endif
+bool ef4_filter_is_mc_recipient(const struct ef4_filter_spec *spec);
+
+/* Channels */
+int ef4_channel_dummy_op_int(struct ef4_channel *channel);
+void ef4_channel_dummy_op_void(struct ef4_channel *channel);
+int ef4_realloc_channels(struct ef4_nic *efx, u32 rxq_entries, u32 txq_entries);
+
+/* Ports */
+int ef4_reconfigure_port(struct ef4_nic *efx);
+int __ef4_reconfigure_port(struct ef4_nic *efx);
+
+/* Ethtool support */
+extern const struct ethtool_ops ef4_ethtool_ops;
+
+/* Reset handling */
+int ef4_reset(struct ef4_nic *efx, enum reset_type method);
+void ef4_reset_down(struct ef4_nic *efx, enum reset_type method);
+int ef4_reset_up(struct ef4_nic *efx, enum reset_type method, bool ok);
+int ef4_try_recovery(struct ef4_nic *efx);
+
+/* Global */
+void ef4_schedule_reset(struct ef4_nic *efx, enum reset_type type);
+unsigned int ef4_usecs_to_ticks(struct ef4_nic *efx, unsigned int usecs);
+unsigned int ef4_ticks_to_usecs(struct ef4_nic *efx, unsigned int ticks);
+int ef4_init_irq_moderation(struct ef4_nic *efx, unsigned int tx_usecs,
+			    unsigned int rx_usecs, bool rx_adaptive,
+			    bool rx_may_override_tx);
+void ef4_get_irq_moderation(struct ef4_nic *efx, unsigned int *tx_usecs,
+			    unsigned int *rx_usecs, bool *rx_adaptive);
+void ef4_stop_eventq(struct ef4_channel *channel);
+void ef4_start_eventq(struct ef4_channel *channel);
+
+/* Dummy PHY ops for PHY drivers */
+int ef4_port_dummy_op_int(struct ef4_nic *efx);
+void ef4_port_dummy_op_void(struct ef4_nic *efx);
+
+/* Update the generic software stats in the passed stats array */
+void ef4_update_sw_stats(struct ef4_nic *efx, u64 *stats);
+
+/* MTD */
+#ifdef CONFIG_SFC_FALCON_MTD
+int ef4_mtd_add(struct ef4_nic *efx, struct ef4_mtd_partition *parts,
+		size_t n_parts, size_t sizeof_part);
+static inline int ef4_mtd_probe(struct ef4_nic *efx)
+{
+	return efx->type->mtd_probe(efx);
+}
+void ef4_mtd_rename(struct ef4_nic *efx);
+void ef4_mtd_remove(struct ef4_nic *efx);
+#else
+static inline int ef4_mtd_probe(struct ef4_nic *efx) { return 0; }
+static inline void ef4_mtd_rename(struct ef4_nic *efx) {}
+static inline void ef4_mtd_remove(struct ef4_nic *efx) {}
+#endif
+
+static inline void ef4_schedule_channel(struct ef4_channel *channel)
+{
+	netif_vdbg(channel->efx, intr, channel->efx->net_dev,
+		   "channel %d scheduling NAPI poll on CPU%d\n",
+		   channel->channel, raw_smp_processor_id());
+
+	napi_schedule(&channel->napi_str);
+}
+
+static inline void ef4_schedule_channel_irq(struct ef4_channel *channel)
+{
+	channel->event_test_cpu = raw_smp_processor_id();
+	ef4_schedule_channel(channel);
+}
+
+void ef4_link_status_changed(struct ef4_nic *efx);
+void ef4_link_set_advertising(struct ef4_nic *efx, u32);
+void ef4_link_set_wanted_fc(struct ef4_nic *efx, u8);
+
+static inline void ef4_device_detach_sync(struct ef4_nic *efx)
+{
+	struct net_device *dev = efx->net_dev;
+
+	/* Lock/freeze all TX queues so that we can be sure the
+	 * TX scheduler is stopped when we're done and before
+	 * netif_device_present() becomes false.
+	 */
+	netif_tx_lock_bh(dev);
+	netif_device_detach(dev);
+	netif_tx_unlock_bh(dev);
+}
+
+static inline bool ef4_rwsem_assert_write_locked(struct rw_semaphore *sem)
+{
+	if (WARN_ON(down_read_trylock(sem))) {
+		up_read(sem);
+		return false;
+	}
+	return true;
+}
+
+#endif /* EF4_EFX_H */
diff --git a/drivers/net/ethernet/sfc/falcon/enum.h b/drivers/net/ethernet/sfc/falcon/enum.h
new file mode 100644
index 000000000000..30a1136fc909
--- /dev/null
+++ b/drivers/net/ethernet/sfc/falcon/enum.h
@@ -0,0 +1,171 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2007-2013 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EF4_ENUM_H
+#define EF4_ENUM_H
+
+/**
+ * enum ef4_loopback_mode - loopback modes
+ * @LOOPBACK_NONE: no loopback
+ * @LOOPBACK_DATA: data path loopback
+ * @LOOPBACK_GMAC: loopback within GMAC
+ * @LOOPBACK_XGMII: loopback after XMAC
+ * @LOOPBACK_XGXS: loopback within BPX after XGXS
+ * @LOOPBACK_XAUI: loopback within BPX before XAUI serdes
+ * @LOOPBACK_GMII: loopback within BPX after GMAC
+ * @LOOPBACK_SGMII: loopback within BPX within SGMII
+ * @LOOPBACK_XGBR: loopback within BPX within XGBR
+ * @LOOPBACK_XFI: loopback within BPX before XFI serdes
+ * @LOOPBACK_XAUI_FAR: loopback within BPX after XAUI serdes
+ * @LOOPBACK_GMII_FAR: loopback within BPX before SGMII
+ * @LOOPBACK_SGMII_FAR: loopback within BPX after SGMII
+ * @LOOPBACK_XFI_FAR: loopback after XFI serdes
+ * @LOOPBACK_GPHY: loopback within 1G PHY at unspecified level
+ * @LOOPBACK_PHYXS: loopback within 10G PHY at PHYXS level
+ * @LOOPBACK_PCS: loopback within 10G PHY at PCS level
+ * @LOOPBACK_PMAPMD: loopback within 10G PHY at PMAPMD level
+ * @LOOPBACK_XPORT: cross port loopback
+ * @LOOPBACK_XGMII_WS: wireside loopback excluding XMAC
+ * @LOOPBACK_XAUI_WS: wireside loopback within BPX within XAUI serdes
+ * @LOOPBACK_XAUI_WS_FAR: wireside loopback within BPX including XAUI serdes
+ * @LOOPBACK_XAUI_WS_NEAR: wireside loopback within BPX excluding XAUI serdes
+ * @LOOPBACK_GMII_WS: wireside loopback excluding GMAC
+ * @LOOPBACK_XFI_WS: wireside loopback excluding XFI serdes
+ * @LOOPBACK_XFI_WS_FAR: wireside loopback including XFI serdes
+ * @LOOPBACK_PHYXS_WS: wireside loopback within 10G PHY at PHYXS level
+ */
+/* Please keep up-to-date w.r.t the following two #defines */
+enum ef4_loopback_mode {
+	LOOPBACK_NONE = 0,
+	LOOPBACK_DATA = 1,
+	LOOPBACK_GMAC = 2,
+	LOOPBACK_XGMII = 3,
+	LOOPBACK_XGXS = 4,
+	LOOPBACK_XAUI = 5,
+	LOOPBACK_GMII = 6,
+	LOOPBACK_SGMII = 7,
+	LOOPBACK_XGBR = 8,
+	LOOPBACK_XFI = 9,
+	LOOPBACK_XAUI_FAR = 10,
+	LOOPBACK_GMII_FAR = 11,
+	LOOPBACK_SGMII_FAR = 12,
+	LOOPBACK_XFI_FAR = 13,
+	LOOPBACK_GPHY = 14,
+	LOOPBACK_PHYXS = 15,
+	LOOPBACK_PCS = 16,
+	LOOPBACK_PMAPMD = 17,
+	LOOPBACK_XPORT = 18,
+	LOOPBACK_XGMII_WS = 19,
+	LOOPBACK_XAUI_WS = 20,
+	LOOPBACK_XAUI_WS_FAR = 21,
+	LOOPBACK_XAUI_WS_NEAR = 22,
+	LOOPBACK_GMII_WS = 23,
+	LOOPBACK_XFI_WS = 24,
+	LOOPBACK_XFI_WS_FAR = 25,
+	LOOPBACK_PHYXS_WS = 26,
+	LOOPBACK_MAX
+};
+#define LOOPBACK_TEST_MAX LOOPBACK_PMAPMD
+
+/* These loopbacks occur within the controller */
+#define LOOPBACKS_INTERNAL ((1 << LOOPBACK_DATA) |		\
+			    (1 << LOOPBACK_GMAC) |		\
+			    (1 << LOOPBACK_XGMII)|		\
+			    (1 << LOOPBACK_XGXS) |		\
+			    (1 << LOOPBACK_XAUI) |		\
+			    (1 << LOOPBACK_GMII) |		\
+			    (1 << LOOPBACK_SGMII) |		\
+			    (1 << LOOPBACK_SGMII) |		\
+			    (1 << LOOPBACK_XGBR) |		\
+			    (1 << LOOPBACK_XFI) |		\
+			    (1 << LOOPBACK_XAUI_FAR) |		\
+			    (1 << LOOPBACK_GMII_FAR) |		\
+			    (1 << LOOPBACK_SGMII_FAR) |		\
+			    (1 << LOOPBACK_XFI_FAR) |		\
+			    (1 << LOOPBACK_XGMII_WS) |		\
+			    (1 << LOOPBACK_XAUI_WS) |		\
+			    (1 << LOOPBACK_XAUI_WS_FAR) |	\
+			    (1 << LOOPBACK_XAUI_WS_NEAR) |	\
+			    (1 << LOOPBACK_GMII_WS) |		\
+			    (1 << LOOPBACK_XFI_WS) |		\
+			    (1 << LOOPBACK_XFI_WS_FAR))
+
+#define LOOPBACKS_WS ((1 << LOOPBACK_XGMII_WS) |		\
+		      (1 << LOOPBACK_XAUI_WS) |			\
+		      (1 << LOOPBACK_XAUI_WS_FAR) |		\
+		      (1 << LOOPBACK_XAUI_WS_NEAR) |		\
+		      (1 << LOOPBACK_GMII_WS) |			\
+		      (1 << LOOPBACK_XFI_WS) |			\
+		      (1 << LOOPBACK_XFI_WS_FAR) |		\
+		      (1 << LOOPBACK_PHYXS_WS))
+
+#define LOOPBACKS_EXTERNAL(_efx)					\
+	((_efx)->loopback_modes & ~LOOPBACKS_INTERNAL &			\
+	 ~(1 << LOOPBACK_NONE))
+
+#define LOOPBACK_MASK(_efx)			\
+	(1 << (_efx)->loopback_mode)
+
+#define LOOPBACK_INTERNAL(_efx)				\
+	(!!(LOOPBACKS_INTERNAL & LOOPBACK_MASK(_efx)))
+
+#define LOOPBACK_EXTERNAL(_efx)				\
+	(!!(LOOPBACK_MASK(_efx) & LOOPBACKS_EXTERNAL(_efx)))
+
+#define LOOPBACK_CHANGED(_from, _to, _mask)				\
+	(!!((LOOPBACK_MASK(_from) ^ LOOPBACK_MASK(_to)) & (_mask)))
+
+#define LOOPBACK_OUT_OF(_from, _to, _mask)				\
+	((LOOPBACK_MASK(_from) & (_mask)) && !(LOOPBACK_MASK(_to) & (_mask)))
+
+/*****************************************************************************/
+
+/**
+ * enum reset_type - reset types
+ *
+ * %RESET_TYPE_INVSIBLE, %RESET_TYPE_ALL, %RESET_TYPE_WORLD and
+ * %RESET_TYPE_DISABLE specify the method/scope of the reset.  The
+ * other valuesspecify reasons, which ef4_schedule_reset() will choose
+ * a method for.
+ *
+ * Reset methods are numbered in order of increasing scope.
+ *
+ * @RESET_TYPE_INVISIBLE: Reset datapath and MAC
+ * @RESET_TYPE_RECOVER_OR_ALL: Try to recover. Apply RESET_TYPE_ALL
+ * if unsuccessful.
+ * @RESET_TYPE_ALL: Reset datapath, MAC and PHY
+ * @RESET_TYPE_WORLD: Reset as much as possible
+ * @RESET_TYPE_RECOVER_OR_DISABLE: Try to recover. Apply RESET_TYPE_DISABLE if
+ * unsuccessful.
+ * @RESET_TYPE_DATAPATH: Reset datapath only.
+ * @RESET_TYPE_DISABLE: Reset datapath, MAC and PHY; leave NIC disabled
+ * @RESET_TYPE_TX_WATCHDOG: reset due to TX watchdog
+ * @RESET_TYPE_INT_ERROR: reset due to internal error
+ * @RESET_TYPE_RX_RECOVERY: reset to recover from RX datapath errors
+ * @RESET_TYPE_DMA_ERROR: DMA error
+ * @RESET_TYPE_TX_SKIP: hardware completed empty tx descriptors
+ */
+enum reset_type {
+	RESET_TYPE_INVISIBLE,
+	RESET_TYPE_RECOVER_OR_ALL,
+	RESET_TYPE_ALL,
+	RESET_TYPE_WORLD,
+	RESET_TYPE_RECOVER_OR_DISABLE,
+	RESET_TYPE_DATAPATH,
+	RESET_TYPE_DISABLE,
+	RESET_TYPE_MAX_METHOD,
+	RESET_TYPE_TX_WATCHDOG,
+	RESET_TYPE_INT_ERROR,
+	RESET_TYPE_RX_RECOVERY,
+	RESET_TYPE_DMA_ERROR,
+	RESET_TYPE_TX_SKIP,
+	RESET_TYPE_MAX,
+};
+
+#endif /* EF4_ENUM_H */
diff --git a/drivers/net/ethernet/sfc/falcon/ethtool.c b/drivers/net/ethernet/sfc/falcon/ethtool.c
new file mode 100644
index 000000000000..8e1929b01a32
--- /dev/null
+++ b/drivers/net/ethernet/sfc/falcon/ethtool.c
@@ -0,0 +1,1343 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2013 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/ethtool.h>
+#include <linux/rtnetlink.h>
+#include <linux/in.h>
+#include "net_driver.h"
+#include "workarounds.h"
+#include "selftest.h"
+#include "efx.h"
+#include "filter.h"
+#include "nic.h"
+
+struct ef4_sw_stat_desc {
+	const char *name;
+	enum {
+		EF4_ETHTOOL_STAT_SOURCE_nic,
+		EF4_ETHTOOL_STAT_SOURCE_channel,
+		EF4_ETHTOOL_STAT_SOURCE_tx_queue
+	} source;
+	unsigned offset;
+	u64(*get_stat) (void *field); /* Reader function */
+};
+
+/* Initialiser for a struct ef4_sw_stat_desc with type-checking */
+#define EF4_ETHTOOL_STAT(stat_name, source_name, field, field_type, \
+				get_stat_function) {			\
+	.name = #stat_name,						\
+	.source = EF4_ETHTOOL_STAT_SOURCE_##source_name,		\
+	.offset = ((((field_type *) 0) ==				\
+		      &((struct ef4_##source_name *)0)->field) ?	\
+		    offsetof(struct ef4_##source_name, field) :		\
+		    offsetof(struct ef4_##source_name, field)),		\
+	.get_stat = get_stat_function,					\
+}
+
+static u64 ef4_get_uint_stat(void *field)
+{
+	return *(unsigned int *)field;
+}
+
+static u64 ef4_get_atomic_stat(void *field)
+{
+	return atomic_read((atomic_t *) field);
+}
+
+#define EF4_ETHTOOL_ATOMIC_NIC_ERROR_STAT(field)		\
+	EF4_ETHTOOL_STAT(field, nic, field,			\
+			 atomic_t, ef4_get_atomic_stat)
+
+#define EF4_ETHTOOL_UINT_CHANNEL_STAT(field)			\
+	EF4_ETHTOOL_STAT(field, channel, n_##field,		\
+			 unsigned int, ef4_get_uint_stat)
+
+#define EF4_ETHTOOL_UINT_TXQ_STAT(field)			\
+	EF4_ETHTOOL_STAT(tx_##field, tx_queue, field,		\
+			 unsigned int, ef4_get_uint_stat)
+
+static const struct ef4_sw_stat_desc ef4_sw_stat_desc[] = {
+	EF4_ETHTOOL_UINT_TXQ_STAT(merge_events),
+	EF4_ETHTOOL_UINT_TXQ_STAT(pushes),
+	EF4_ETHTOOL_UINT_TXQ_STAT(cb_packets),
+	EF4_ETHTOOL_ATOMIC_NIC_ERROR_STAT(rx_reset),
+	EF4_ETHTOOL_UINT_CHANNEL_STAT(rx_tobe_disc),
+	EF4_ETHTOOL_UINT_CHANNEL_STAT(rx_ip_hdr_chksum_err),
+	EF4_ETHTOOL_UINT_CHANNEL_STAT(rx_tcp_udp_chksum_err),
+	EF4_ETHTOOL_UINT_CHANNEL_STAT(rx_mcast_mismatch),
+	EF4_ETHTOOL_UINT_CHANNEL_STAT(rx_frm_trunc),
+	EF4_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_events),
+	EF4_ETHTOOL_UINT_CHANNEL_STAT(rx_merge_packets),
+};
+
+#define EF4_ETHTOOL_SW_STAT_COUNT ARRAY_SIZE(ef4_sw_stat_desc)
+
+#define EF4_ETHTOOL_EEPROM_MAGIC 0xEFAB
+
+/**************************************************************************
+ *
+ * Ethtool operations
+ *
+ **************************************************************************
+ */
+
+/* Identify device by flashing LEDs */
+static int ef4_ethtool_phys_id(struct net_device *net_dev,
+			       enum ethtool_phys_id_state state)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+	enum ef4_led_mode mode = EF4_LED_DEFAULT;
+
+	switch (state) {
+	case ETHTOOL_ID_ON:
+		mode = EF4_LED_ON;
+		break;
+	case ETHTOOL_ID_OFF:
+		mode = EF4_LED_OFF;
+		break;
+	case ETHTOOL_ID_INACTIVE:
+		mode = EF4_LED_DEFAULT;
+		break;
+	case ETHTOOL_ID_ACTIVE:
+		return 1;	/* cycle on/off once per second */
+	}
+
+	efx->type->set_id_led(efx, mode);
+	return 0;
+}
+
+/* This must be called with rtnl_lock held. */
+static int ef4_ethtool_get_settings(struct net_device *net_dev,
+				    struct ethtool_cmd *ecmd)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+	struct ef4_link_state *link_state = &efx->link_state;
+
+	mutex_lock(&efx->mac_lock);
+	efx->phy_op->get_settings(efx, ecmd);
+	mutex_unlock(&efx->mac_lock);
+
+	/* Both MACs support pause frames (bidirectional and respond-only) */
+	ecmd->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause;
+
+	if (LOOPBACK_INTERNAL(efx)) {
+		ethtool_cmd_speed_set(ecmd, link_state->speed);
+		ecmd->duplex = link_state->fd ? DUPLEX_FULL : DUPLEX_HALF;
+	}
+
+	return 0;
+}
+
+/* This must be called with rtnl_lock held. */
+static int ef4_ethtool_set_settings(struct net_device *net_dev,
+				    struct ethtool_cmd *ecmd)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+	int rc;
+
+	/* GMAC does not support 1000Mbps HD */
+	if ((ethtool_cmd_speed(ecmd) == SPEED_1000) &&
+	    (ecmd->duplex != DUPLEX_FULL)) {
+		netif_dbg(efx, drv, efx->net_dev,
+			  "rejecting unsupported 1000Mbps HD setting\n");
+		return -EINVAL;
+	}
+
+	mutex_lock(&efx->mac_lock);
+	rc = efx->phy_op->set_settings(efx, ecmd);
+	mutex_unlock(&efx->mac_lock);
+	return rc;
+}
+
+static void ef4_ethtool_get_drvinfo(struct net_device *net_dev,
+				    struct ethtool_drvinfo *info)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+
+	strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver));
+	strlcpy(info->version, EF4_DRIVER_VERSION, sizeof(info->version));
+	strlcpy(info->bus_info, pci_name(efx->pci_dev), sizeof(info->bus_info));
+}
+
+static int ef4_ethtool_get_regs_len(struct net_device *net_dev)
+{
+	return ef4_nic_get_regs_len(netdev_priv(net_dev));
+}
+
+static void ef4_ethtool_get_regs(struct net_device *net_dev,
+				 struct ethtool_regs *regs, void *buf)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+
+	regs->version = efx->type->revision;
+	ef4_nic_get_regs(efx, buf);
+}
+
+static u32 ef4_ethtool_get_msglevel(struct net_device *net_dev)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+	return efx->msg_enable;
+}
+
+static void ef4_ethtool_set_msglevel(struct net_device *net_dev, u32 msg_enable)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+	efx->msg_enable = msg_enable;
+}
+
+/**
+ * ef4_fill_test - fill in an individual self-test entry
+ * @test_index:		Index of the test
+ * @strings:		Ethtool strings, or %NULL
+ * @data:		Ethtool test results, or %NULL
+ * @test:		Pointer to test result (used only if data != %NULL)
+ * @unit_format:	Unit name format (e.g. "chan\%d")
+ * @unit_id:		Unit id (e.g. 0 for "chan0")
+ * @test_format:	Test name format (e.g. "loopback.\%s.tx.sent")
+ * @test_id:		Test id (e.g. "PHYXS" for "loopback.PHYXS.tx_sent")
+ *
+ * Fill in an individual self-test entry.
+ */
+static void ef4_fill_test(unsigned int test_index, u8 *strings, u64 *data,
+			  int *test, const char *unit_format, int unit_id,
+			  const char *test_format, const char *test_id)
+{
+	char unit_str[ETH_GSTRING_LEN], test_str[ETH_GSTRING_LEN];
+
+	/* Fill data value, if applicable */
+	if (data)
+		data[test_index] = *test;
+
+	/* Fill string, if applicable */
+	if (strings) {
+		if (strchr(unit_format, '%'))
+			snprintf(unit_str, sizeof(unit_str),
+				 unit_format, unit_id);
+		else
+			strcpy(unit_str, unit_format);
+		snprintf(test_str, sizeof(test_str), test_format, test_id);
+		snprintf(strings + test_index * ETH_GSTRING_LEN,
+			 ETH_GSTRING_LEN,
+			 "%-6s %-24s", unit_str, test_str);
+	}
+}
+
+#define EF4_CHANNEL_NAME(_channel) "chan%d", _channel->channel
+#define EF4_TX_QUEUE_NAME(_tx_queue) "txq%d", _tx_queue->queue
+#define EF4_RX_QUEUE_NAME(_rx_queue) "rxq%d", _rx_queue->queue
+#define EF4_LOOPBACK_NAME(_mode, _counter)			\
+	"loopback.%s." _counter, STRING_TABLE_LOOKUP(_mode, ef4_loopback_mode)
+
+/**
+ * ef4_fill_loopback_test - fill in a block of loopback self-test entries
+ * @efx:		Efx NIC
+ * @lb_tests:		Efx loopback self-test results structure
+ * @mode:		Loopback test mode
+ * @test_index:		Starting index of the test
+ * @strings:		Ethtool strings, or %NULL
+ * @data:		Ethtool test results, or %NULL
+ *
+ * Fill in a block of loopback self-test entries.  Return new test
+ * index.
+ */
+static int ef4_fill_loopback_test(struct ef4_nic *efx,
+				  struct ef4_loopback_self_tests *lb_tests,
+				  enum ef4_loopback_mode mode,
+				  unsigned int test_index,
+				  u8 *strings, u64 *data)
+{
+	struct ef4_channel *channel =
+		ef4_get_channel(efx, efx->tx_channel_offset);
+	struct ef4_tx_queue *tx_queue;
+
+	ef4_for_each_channel_tx_queue(tx_queue, channel) {
+		ef4_fill_test(test_index++, strings, data,
+			      &lb_tests->tx_sent[tx_queue->queue],
+			      EF4_TX_QUEUE_NAME(tx_queue),
+			      EF4_LOOPBACK_NAME(mode, "tx_sent"));
+		ef4_fill_test(test_index++, strings, data,
+			      &lb_tests->tx_done[tx_queue->queue],
+			      EF4_TX_QUEUE_NAME(tx_queue),
+			      EF4_LOOPBACK_NAME(mode, "tx_done"));
+	}
+	ef4_fill_test(test_index++, strings, data,
+		      &lb_tests->rx_good,
+		      "rx", 0,
+		      EF4_LOOPBACK_NAME(mode, "rx_good"));
+	ef4_fill_test(test_index++, strings, data,
+		      &lb_tests->rx_bad,
+		      "rx", 0,
+		      EF4_LOOPBACK_NAME(mode, "rx_bad"));
+
+	return test_index;
+}
+
+/**
+ * ef4_ethtool_fill_self_tests - get self-test details
+ * @efx:		Efx NIC
+ * @tests:		Efx self-test results structure, or %NULL
+ * @strings:		Ethtool strings, or %NULL
+ * @data:		Ethtool test results, or %NULL
+ *
+ * Get self-test number of strings, strings, and/or test results.
+ * Return number of strings (== number of test results).
+ *
+ * The reason for merging these three functions is to make sure that
+ * they can never be inconsistent.
+ */
+static int ef4_ethtool_fill_self_tests(struct ef4_nic *efx,
+				       struct ef4_self_tests *tests,
+				       u8 *strings, u64 *data)
+{
+	struct ef4_channel *channel;
+	unsigned int n = 0, i;
+	enum ef4_loopback_mode mode;
+
+	ef4_fill_test(n++, strings, data, &tests->phy_alive,
+		      "phy", 0, "alive", NULL);
+	ef4_fill_test(n++, strings, data, &tests->nvram,
+		      "core", 0, "nvram", NULL);
+	ef4_fill_test(n++, strings, data, &tests->interrupt,
+		      "core", 0, "interrupt", NULL);
+
+	/* Event queues */
+	ef4_for_each_channel(channel, efx) {
+		ef4_fill_test(n++, strings, data,
+			      &tests->eventq_dma[channel->channel],
+			      EF4_CHANNEL_NAME(channel),
+			      "eventq.dma", NULL);
+		ef4_fill_test(n++, strings, data,
+			      &tests->eventq_int[channel->channel],
+			      EF4_CHANNEL_NAME(channel),
+			      "eventq.int", NULL);
+	}
+
+	ef4_fill_test(n++, strings, data, &tests->memory,
+		      "core", 0, "memory", NULL);
+	ef4_fill_test(n++, strings, data, &tests->registers,
+		      "core", 0, "registers", NULL);
+
+	if (efx->phy_op->run_tests != NULL) {
+		EF4_BUG_ON_PARANOID(efx->phy_op->test_name == NULL);
+
+		for (i = 0; true; ++i) {
+			const char *name;
+
+			EF4_BUG_ON_PARANOID(i >= EF4_MAX_PHY_TESTS);
+			name = efx->phy_op->test_name(efx, i);
+			if (name == NULL)
+				break;
+
+			ef4_fill_test(n++, strings, data, &tests->phy_ext[i],
+				      "phy", 0, name, NULL);
+		}
+	}
+
+	/* Loopback tests */
+	for (mode = LOOPBACK_NONE; mode <= LOOPBACK_TEST_MAX; mode++) {
+		if (!(efx->loopback_modes & (1 << mode)))
+			continue;
+		n = ef4_fill_loopback_test(efx,
+					   &tests->loopback[mode], mode, n,
+					   strings, data);
+	}
+
+	return n;
+}
+
+static size_t ef4_describe_per_queue_stats(struct ef4_nic *efx, u8 *strings)
+{
+	size_t n_stats = 0;
+	struct ef4_channel *channel;
+
+	ef4_for_each_channel(channel, efx) {
+		if (ef4_channel_has_tx_queues(channel)) {
+			n_stats++;
+			if (strings != NULL) {
+				snprintf(strings, ETH_GSTRING_LEN,
+					 "tx-%u.tx_packets",
+					 channel->tx_queue[0].queue /
+					 EF4_TXQ_TYPES);
+
+				strings += ETH_GSTRING_LEN;
+			}
+		}
+	}
+	ef4_for_each_channel(channel, efx) {
+		if (ef4_channel_has_rx_queue(channel)) {
+			n_stats++;
+			if (strings != NULL) {
+				snprintf(strings, ETH_GSTRING_LEN,
+					 "rx-%d.rx_packets", channel->channel);
+				strings += ETH_GSTRING_LEN;
+			}
+		}
+	}
+	return n_stats;
+}
+
+static int ef4_ethtool_get_sset_count(struct net_device *net_dev,
+				      int string_set)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+
+	switch (string_set) {
+	case ETH_SS_STATS:
+		return efx->type->describe_stats(efx, NULL) +
+		       EF4_ETHTOOL_SW_STAT_COUNT +
+		       ef4_describe_per_queue_stats(efx, NULL);
+	case ETH_SS_TEST:
+		return ef4_ethtool_fill_self_tests(efx, NULL, NULL, NULL);
+	default:
+		return -EINVAL;
+	}
+}
+
+static void ef4_ethtool_get_strings(struct net_device *net_dev,
+				    u32 string_set, u8 *strings)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+	int i;
+
+	switch (string_set) {
+	case ETH_SS_STATS:
+		strings += (efx->type->describe_stats(efx, strings) *
+			    ETH_GSTRING_LEN);
+		for (i = 0; i < EF4_ETHTOOL_SW_STAT_COUNT; i++)
+			strlcpy(strings + i * ETH_GSTRING_LEN,
+				ef4_sw_stat_desc[i].name, ETH_GSTRING_LEN);
+		strings += EF4_ETHTOOL_SW_STAT_COUNT * ETH_GSTRING_LEN;
+		strings += (ef4_describe_per_queue_stats(efx, strings) *
+			    ETH_GSTRING_LEN);
+		break;
+	case ETH_SS_TEST:
+		ef4_ethtool_fill_self_tests(efx, NULL, strings, NULL);
+		break;
+	default:
+		/* No other string sets */
+		break;
+	}
+}
+
+static void ef4_ethtool_get_stats(struct net_device *net_dev,
+				  struct ethtool_stats *stats,
+				  u64 *data)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+	const struct ef4_sw_stat_desc *stat;
+	struct ef4_channel *channel;
+	struct ef4_tx_queue *tx_queue;
+	struct ef4_rx_queue *rx_queue;
+	int i;
+
+	spin_lock_bh(&efx->stats_lock);
+
+	/* Get NIC statistics */
+	data += efx->type->update_stats(efx, data, NULL);
+
+	/* Get software statistics */
+	for (i = 0; i < EF4_ETHTOOL_SW_STAT_COUNT; i++) {
+		stat = &ef4_sw_stat_desc[i];
+		switch (stat->source) {
+		case EF4_ETHTOOL_STAT_SOURCE_nic:
+			data[i] = stat->get_stat((void *)efx + stat->offset);
+			break;
+		case EF4_ETHTOOL_STAT_SOURCE_channel:
+			data[i] = 0;
+			ef4_for_each_channel(channel, efx)
+				data[i] += stat->get_stat((void *)channel +
+							  stat->offset);
+			break;
+		case EF4_ETHTOOL_STAT_SOURCE_tx_queue:
+			data[i] = 0;
+			ef4_for_each_channel(channel, efx) {
+				ef4_for_each_channel_tx_queue(tx_queue, channel)
+					data[i] +=
+						stat->get_stat((void *)tx_queue
+							       + stat->offset);
+			}
+			break;
+		}
+	}
+	data += EF4_ETHTOOL_SW_STAT_COUNT;
+
+	spin_unlock_bh(&efx->stats_lock);
+
+	ef4_for_each_channel(channel, efx) {
+		if (ef4_channel_has_tx_queues(channel)) {
+			*data = 0;
+			ef4_for_each_channel_tx_queue(tx_queue, channel) {
+				*data += tx_queue->tx_packets;
+			}
+			data++;
+		}
+	}
+	ef4_for_each_channel(channel, efx) {
+		if (ef4_channel_has_rx_queue(channel)) {
+			*data = 0;
+			ef4_for_each_channel_rx_queue(rx_queue, channel) {
+				*data += rx_queue->rx_packets;
+			}
+			data++;
+		}
+	}
+}
+
+static void ef4_ethtool_self_test(struct net_device *net_dev,
+				  struct ethtool_test *test, u64 *data)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+	struct ef4_self_tests *ef4_tests;
+	bool already_up;
+	int rc = -ENOMEM;
+
+	ef4_tests = kzalloc(sizeof(*ef4_tests), GFP_KERNEL);
+	if (!ef4_tests)
+		goto fail;
+
+	if (efx->state != STATE_READY) {
+		rc = -EBUSY;
+		goto out;
+	}
+
+	netif_info(efx, drv, efx->net_dev, "starting %sline testing\n",
+		   (test->flags & ETH_TEST_FL_OFFLINE) ? "off" : "on");
+
+	/* We need rx buffers and interrupts. */
+	already_up = (efx->net_dev->flags & IFF_UP);
+	if (!already_up) {
+		rc = dev_open(efx->net_dev);
+		if (rc) {
+			netif_err(efx, drv, efx->net_dev,
+				  "failed opening device.\n");
+			goto out;
+		}
+	}
+
+	rc = ef4_selftest(efx, ef4_tests, test->flags);
+
+	if (!already_up)
+		dev_close(efx->net_dev);
+
+	netif_info(efx, drv, efx->net_dev, "%s %sline self-tests\n",
+		   rc == 0 ? "passed" : "failed",
+		   (test->flags & ETH_TEST_FL_OFFLINE) ? "off" : "on");
+
+out:
+	ef4_ethtool_fill_self_tests(efx, ef4_tests, NULL, data);
+	kfree(ef4_tests);
+fail:
+	if (rc)
+		test->flags |= ETH_TEST_FL_FAILED;
+}
+
+/* Restart autonegotiation */
+static int ef4_ethtool_nway_reset(struct net_device *net_dev)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+
+	return mdio45_nway_restart(&efx->mdio);
+}
+
+/*
+ * Each channel has a single IRQ and moderation timer, started by any
+ * completion (or other event).  Unless the module parameter
+ * separate_tx_channels is set, IRQs and moderation are therefore
+ * shared between RX and TX completions.  In this case, when RX IRQ
+ * moderation is explicitly changed then TX IRQ moderation is
+ * automatically changed too, but otherwise we fail if the two values
+ * are requested to be different.
+ *
+ * The hardware does not support a limit on the number of completions
+ * before an IRQ, so we do not use the max_frames fields.  We should
+ * report and require that max_frames == (usecs != 0), but this would
+ * invalidate existing user documentation.
+ *
+ * The hardware does not have distinct settings for interrupt
+ * moderation while the previous IRQ is being handled, so we should
+ * not use the 'irq' fields.  However, an earlier developer
+ * misunderstood the meaning of the 'irq' fields and the driver did
+ * not support the standard fields.  To avoid invalidating existing
+ * user documentation, we report and accept changes through either the
+ * standard or 'irq' fields.  If both are changed at the same time, we
+ * prefer the standard field.
+ *
+ * We implement adaptive IRQ moderation, but use a different algorithm
+ * from that assumed in the definition of struct ethtool_coalesce.
+ * Therefore we do not use any of the adaptive moderation parameters
+ * in it.
+ */
+
+static int ef4_ethtool_get_coalesce(struct net_device *net_dev,
+				    struct ethtool_coalesce *coalesce)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+	unsigned int tx_usecs, rx_usecs;
+	bool rx_adaptive;
+
+	ef4_get_irq_moderation(efx, &tx_usecs, &rx_usecs, &rx_adaptive);
+
+	coalesce->tx_coalesce_usecs = tx_usecs;
+	coalesce->tx_coalesce_usecs_irq = tx_usecs;
+	coalesce->rx_coalesce_usecs = rx_usecs;
+	coalesce->rx_coalesce_usecs_irq = rx_usecs;
+	coalesce->use_adaptive_rx_coalesce = rx_adaptive;
+
+	return 0;
+}
+
+static int ef4_ethtool_set_coalesce(struct net_device *net_dev,
+				    struct ethtool_coalesce *coalesce)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+	struct ef4_channel *channel;
+	unsigned int tx_usecs, rx_usecs;
+	bool adaptive, rx_may_override_tx;
+	int rc;
+
+	if (coalesce->use_adaptive_tx_coalesce)
+		return -EINVAL;
+
+	ef4_get_irq_moderation(efx, &tx_usecs, &rx_usecs, &adaptive);
+
+	if (coalesce->rx_coalesce_usecs != rx_usecs)
+		rx_usecs = coalesce->rx_coalesce_usecs;
+	else
+		rx_usecs = coalesce->rx_coalesce_usecs_irq;
+
+	adaptive = coalesce->use_adaptive_rx_coalesce;
+
+	/* If channels are shared, TX IRQ moderation can be quietly
+	 * overridden unless it is changed from its old value.
+	 */
+	rx_may_override_tx = (coalesce->tx_coalesce_usecs == tx_usecs &&
+			      coalesce->tx_coalesce_usecs_irq == tx_usecs);
+	if (coalesce->tx_coalesce_usecs != tx_usecs)
+		tx_usecs = coalesce->tx_coalesce_usecs;
+	else
+		tx_usecs = coalesce->tx_coalesce_usecs_irq;
+
+	rc = ef4_init_irq_moderation(efx, tx_usecs, rx_usecs, adaptive,
+				     rx_may_override_tx);
+	if (rc != 0)
+		return rc;
+
+	ef4_for_each_channel(channel, efx)
+		efx->type->push_irq_moderation(channel);
+
+	return 0;
+}
+
+static void ef4_ethtool_get_ringparam(struct net_device *net_dev,
+				      struct ethtool_ringparam *ring)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+
+	ring->rx_max_pending = EF4_MAX_DMAQ_SIZE;
+	ring->tx_max_pending = EF4_MAX_DMAQ_SIZE;
+	ring->rx_pending = efx->rxq_entries;
+	ring->tx_pending = efx->txq_entries;
+}
+
+static int ef4_ethtool_set_ringparam(struct net_device *net_dev,
+				     struct ethtool_ringparam *ring)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+	u32 txq_entries;
+
+	if (ring->rx_mini_pending || ring->rx_jumbo_pending ||
+	    ring->rx_pending > EF4_MAX_DMAQ_SIZE ||
+	    ring->tx_pending > EF4_MAX_DMAQ_SIZE)
+		return -EINVAL;
+
+	if (ring->rx_pending < EF4_RXQ_MIN_ENT) {
+		netif_err(efx, drv, efx->net_dev,
+			  "RX queues cannot be smaller than %u\n",
+			  EF4_RXQ_MIN_ENT);
+		return -EINVAL;
+	}
+
+	txq_entries = max(ring->tx_pending, EF4_TXQ_MIN_ENT(efx));
+	if (txq_entries != ring->tx_pending)
+		netif_warn(efx, drv, efx->net_dev,
+			   "increasing TX queue size to minimum of %u\n",
+			   txq_entries);
+
+	return ef4_realloc_channels(efx, ring->rx_pending, txq_entries);
+}
+
+static int ef4_ethtool_set_pauseparam(struct net_device *net_dev,
+				      struct ethtool_pauseparam *pause)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+	u8 wanted_fc, old_fc;
+	u32 old_adv;
+	int rc = 0;
+
+	mutex_lock(&efx->mac_lock);
+
+	wanted_fc = ((pause->rx_pause ? EF4_FC_RX : 0) |
+		     (pause->tx_pause ? EF4_FC_TX : 0) |
+		     (pause->autoneg ? EF4_FC_AUTO : 0));
+
+	if ((wanted_fc & EF4_FC_TX) && !(wanted_fc & EF4_FC_RX)) {
+		netif_dbg(efx, drv, efx->net_dev,
+			  "Flow control unsupported: tx ON rx OFF\n");
+		rc = -EINVAL;
+		goto out;
+	}
+
+	if ((wanted_fc & EF4_FC_AUTO) && !efx->link_advertising) {
+		netif_dbg(efx, drv, efx->net_dev,
+			  "Autonegotiation is disabled\n");
+		rc = -EINVAL;
+		goto out;
+	}
+
+	/* Hook for Falcon bug 11482 workaround */
+	if (efx->type->prepare_enable_fc_tx &&
+	    (wanted_fc & EF4_FC_TX) && !(efx->wanted_fc & EF4_FC_TX))
+		efx->type->prepare_enable_fc_tx(efx);
+
+	old_adv = efx->link_advertising;
+	old_fc = efx->wanted_fc;
+	ef4_link_set_wanted_fc(efx, wanted_fc);
+	if (efx->link_advertising != old_adv ||
+	    (efx->wanted_fc ^ old_fc) & EF4_FC_AUTO) {
+		rc = efx->phy_op->reconfigure(efx);
+		if (rc) {
+			netif_err(efx, drv, efx->net_dev,
+				  "Unable to advertise requested flow "
+				  "control setting\n");
+			goto out;
+		}
+	}
+
+	/* Reconfigure the MAC. The PHY *may* generate a link state change event
+	 * if the user just changed the advertised capabilities, but there's no
+	 * harm doing this twice */
+	ef4_mac_reconfigure(efx);
+
+out:
+	mutex_unlock(&efx->mac_lock);
+
+	return rc;
+}
+
+static void ef4_ethtool_get_pauseparam(struct net_device *net_dev,
+				       struct ethtool_pauseparam *pause)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+
+	pause->rx_pause = !!(efx->wanted_fc & EF4_FC_RX);
+	pause->tx_pause = !!(efx->wanted_fc & EF4_FC_TX);
+	pause->autoneg = !!(efx->wanted_fc & EF4_FC_AUTO);
+}
+
+static void ef4_ethtool_get_wol(struct net_device *net_dev,
+				struct ethtool_wolinfo *wol)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+	return efx->type->get_wol(efx, wol);
+}
+
+
+static int ef4_ethtool_set_wol(struct net_device *net_dev,
+			       struct ethtool_wolinfo *wol)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+	return efx->type->set_wol(efx, wol->wolopts);
+}
+
+static int ef4_ethtool_reset(struct net_device *net_dev, u32 *flags)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+	int rc;
+
+	rc = efx->type->map_reset_flags(flags);
+	if (rc < 0)
+		return rc;
+
+	return ef4_reset(efx, rc);
+}
+
+/* MAC address mask including only I/G bit */
+static const u8 mac_addr_ig_mask[ETH_ALEN] __aligned(2) = {0x01, 0, 0, 0, 0, 0};
+
+#define IP4_ADDR_FULL_MASK	((__force __be32)~0)
+#define IP_PROTO_FULL_MASK	0xFF
+#define PORT_FULL_MASK		((__force __be16)~0)
+#define ETHER_TYPE_FULL_MASK	((__force __be16)~0)
+
+static inline void ip6_fill_mask(__be32 *mask)
+{
+	mask[0] = mask[1] = mask[2] = mask[3] = ~(__be32)0;
+}
+
+static int ef4_ethtool_get_class_rule(struct ef4_nic *efx,
+				      struct ethtool_rx_flow_spec *rule)
+{
+	struct ethtool_tcpip4_spec *ip_entry = &rule->h_u.tcp_ip4_spec;
+	struct ethtool_tcpip4_spec *ip_mask = &rule->m_u.tcp_ip4_spec;
+	struct ethtool_usrip4_spec *uip_entry = &rule->h_u.usr_ip4_spec;
+	struct ethtool_usrip4_spec *uip_mask = &rule->m_u.usr_ip4_spec;
+	struct ethtool_tcpip6_spec *ip6_entry = &rule->h_u.tcp_ip6_spec;
+	struct ethtool_tcpip6_spec *ip6_mask = &rule->m_u.tcp_ip6_spec;
+	struct ethtool_usrip6_spec *uip6_entry = &rule->h_u.usr_ip6_spec;
+	struct ethtool_usrip6_spec *uip6_mask = &rule->m_u.usr_ip6_spec;
+	struct ethhdr *mac_entry = &rule->h_u.ether_spec;
+	struct ethhdr *mac_mask = &rule->m_u.ether_spec;
+	struct ef4_filter_spec spec;
+	int rc;
+
+	rc = ef4_filter_get_filter_safe(efx, EF4_FILTER_PRI_MANUAL,
+					rule->location, &spec);
+	if (rc)
+		return rc;
+
+	if (spec.dmaq_id == EF4_FILTER_RX_DMAQ_ID_DROP)
+		rule->ring_cookie = RX_CLS_FLOW_DISC;
+	else
+		rule->ring_cookie = spec.dmaq_id;
+
+	if ((spec.match_flags & EF4_FILTER_MATCH_ETHER_TYPE) &&
+	    spec.ether_type == htons(ETH_P_IP) &&
+	    (spec.match_flags & EF4_FILTER_MATCH_IP_PROTO) &&
+	    (spec.ip_proto == IPPROTO_TCP || spec.ip_proto == IPPROTO_UDP) &&
+	    !(spec.match_flags &
+	      ~(EF4_FILTER_MATCH_ETHER_TYPE | EF4_FILTER_MATCH_OUTER_VID |
+		EF4_FILTER_MATCH_LOC_HOST | EF4_FILTER_MATCH_REM_HOST |
+		EF4_FILTER_MATCH_IP_PROTO |
+		EF4_FILTER_MATCH_LOC_PORT | EF4_FILTER_MATCH_REM_PORT))) {
+		rule->flow_type = ((spec.ip_proto == IPPROTO_TCP) ?
+				   TCP_V4_FLOW : UDP_V4_FLOW);
+		if (spec.match_flags & EF4_FILTER_MATCH_LOC_HOST) {
+			ip_entry->ip4dst = spec.loc_host[0];
+			ip_mask->ip4dst = IP4_ADDR_FULL_MASK;
+		}
+		if (spec.match_flags & EF4_FILTER_MATCH_REM_HOST) {
+			ip_entry->ip4src = spec.rem_host[0];
+			ip_mask->ip4src = IP4_ADDR_FULL_MASK;
+		}
+		if (spec.match_flags & EF4_FILTER_MATCH_LOC_PORT) {
+			ip_entry->pdst = spec.loc_port;
+			ip_mask->pdst = PORT_FULL_MASK;
+		}
+		if (spec.match_flags & EF4_FILTER_MATCH_REM_PORT) {
+			ip_entry->psrc = spec.rem_port;
+			ip_mask->psrc = PORT_FULL_MASK;
+		}
+	} else if ((spec.match_flags & EF4_FILTER_MATCH_ETHER_TYPE) &&
+	    spec.ether_type == htons(ETH_P_IPV6) &&
+	    (spec.match_flags & EF4_FILTER_MATCH_IP_PROTO) &&
+	    (spec.ip_proto == IPPROTO_TCP || spec.ip_proto == IPPROTO_UDP) &&
+	    !(spec.match_flags &
+	      ~(EF4_FILTER_MATCH_ETHER_TYPE | EF4_FILTER_MATCH_OUTER_VID |
+		EF4_FILTER_MATCH_LOC_HOST | EF4_FILTER_MATCH_REM_HOST |
+		EF4_FILTER_MATCH_IP_PROTO |
+		EF4_FILTER_MATCH_LOC_PORT | EF4_FILTER_MATCH_REM_PORT))) {
+		rule->flow_type = ((spec.ip_proto == IPPROTO_TCP) ?
+				   TCP_V6_FLOW : UDP_V6_FLOW);
+		if (spec.match_flags & EF4_FILTER_MATCH_LOC_HOST) {
+			memcpy(ip6_entry->ip6dst, spec.loc_host,
+			       sizeof(ip6_entry->ip6dst));
+			ip6_fill_mask(ip6_mask->ip6dst);
+		}
+		if (spec.match_flags & EF4_FILTER_MATCH_REM_HOST) {
+			memcpy(ip6_entry->ip6src, spec.rem_host,
+			       sizeof(ip6_entry->ip6src));
+			ip6_fill_mask(ip6_mask->ip6src);
+		}
+		if (spec.match_flags & EF4_FILTER_MATCH_LOC_PORT) {
+			ip6_entry->pdst = spec.loc_port;
+			ip6_mask->pdst = PORT_FULL_MASK;
+		}
+		if (spec.match_flags & EF4_FILTER_MATCH_REM_PORT) {
+			ip6_entry->psrc = spec.rem_port;
+			ip6_mask->psrc = PORT_FULL_MASK;
+		}
+	} else if (!(spec.match_flags &
+		     ~(EF4_FILTER_MATCH_LOC_MAC | EF4_FILTER_MATCH_LOC_MAC_IG |
+		       EF4_FILTER_MATCH_REM_MAC | EF4_FILTER_MATCH_ETHER_TYPE |
+		       EF4_FILTER_MATCH_OUTER_VID))) {
+		rule->flow_type = ETHER_FLOW;
+		if (spec.match_flags &
+		    (EF4_FILTER_MATCH_LOC_MAC | EF4_FILTER_MATCH_LOC_MAC_IG)) {
+			ether_addr_copy(mac_entry->h_dest, spec.loc_mac);
+			if (spec.match_flags & EF4_FILTER_MATCH_LOC_MAC)
+				eth_broadcast_addr(mac_mask->h_dest);
+			else
+				ether_addr_copy(mac_mask->h_dest,
+						mac_addr_ig_mask);
+		}
+		if (spec.match_flags & EF4_FILTER_MATCH_REM_MAC) {
+			ether_addr_copy(mac_entry->h_source, spec.rem_mac);
+			eth_broadcast_addr(mac_mask->h_source);
+		}
+		if (spec.match_flags & EF4_FILTER_MATCH_ETHER_TYPE) {
+			mac_entry->h_proto = spec.ether_type;
+			mac_mask->h_proto = ETHER_TYPE_FULL_MASK;
+		}
+	} else if (spec.match_flags & EF4_FILTER_MATCH_ETHER_TYPE &&
+		   spec.ether_type == htons(ETH_P_IP) &&
+		   !(spec.match_flags &
+		     ~(EF4_FILTER_MATCH_ETHER_TYPE | EF4_FILTER_MATCH_OUTER_VID |
+		       EF4_FILTER_MATCH_LOC_HOST | EF4_FILTER_MATCH_REM_HOST |
+		       EF4_FILTER_MATCH_IP_PROTO))) {
+		rule->flow_type = IPV4_USER_FLOW;
+		uip_entry->ip_ver = ETH_RX_NFC_IP4;
+		if (spec.match_flags & EF4_FILTER_MATCH_IP_PROTO) {
+			uip_mask->proto = IP_PROTO_FULL_MASK;
+			uip_entry->proto = spec.ip_proto;
+		}
+		if (spec.match_flags & EF4_FILTER_MATCH_LOC_HOST) {
+			uip_entry->ip4dst = spec.loc_host[0];
+			uip_mask->ip4dst = IP4_ADDR_FULL_MASK;
+		}
+		if (spec.match_flags & EF4_FILTER_MATCH_REM_HOST) {
+			uip_entry->ip4src = spec.rem_host[0];
+			uip_mask->ip4src = IP4_ADDR_FULL_MASK;
+		}
+	} else if (spec.match_flags & EF4_FILTER_MATCH_ETHER_TYPE &&
+		   spec.ether_type == htons(ETH_P_IPV6) &&
+		   !(spec.match_flags &
+		     ~(EF4_FILTER_MATCH_ETHER_TYPE | EF4_FILTER_MATCH_OUTER_VID |
+		       EF4_FILTER_MATCH_LOC_HOST | EF4_FILTER_MATCH_REM_HOST |
+		       EF4_FILTER_MATCH_IP_PROTO))) {
+		rule->flow_type = IPV6_USER_FLOW;
+		if (spec.match_flags & EF4_FILTER_MATCH_IP_PROTO) {
+			uip6_mask->l4_proto = IP_PROTO_FULL_MASK;
+			uip6_entry->l4_proto = spec.ip_proto;
+		}
+		if (spec.match_flags & EF4_FILTER_MATCH_LOC_HOST) {
+			memcpy(uip6_entry->ip6dst, spec.loc_host,
+			       sizeof(uip6_entry->ip6dst));
+			ip6_fill_mask(uip6_mask->ip6dst);
+		}
+		if (spec.match_flags & EF4_FILTER_MATCH_REM_HOST) {
+			memcpy(uip6_entry->ip6src, spec.rem_host,
+			       sizeof(uip6_entry->ip6src));
+			ip6_fill_mask(uip6_mask->ip6src);
+		}
+	} else {
+		/* The above should handle all filters that we insert */
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
+	if (spec.match_flags & EF4_FILTER_MATCH_OUTER_VID) {
+		rule->flow_type |= FLOW_EXT;
+		rule->h_ext.vlan_tci = spec.outer_vid;
+		rule->m_ext.vlan_tci = htons(0xfff);
+	}
+
+	return rc;
+}
+
+static int
+ef4_ethtool_get_rxnfc(struct net_device *net_dev,
+		      struct ethtool_rxnfc *info, u32 *rule_locs)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+
+	switch (info->cmd) {
+	case ETHTOOL_GRXRINGS:
+		info->data = efx->n_rx_channels;
+		return 0;
+
+	case ETHTOOL_GRXFH: {
+		unsigned min_revision = 0;
+
+		info->data = 0;
+		switch (info->flow_type) {
+		case TCP_V4_FLOW:
+			info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		case UDP_V4_FLOW:
+		case SCTP_V4_FLOW:
+		case AH_ESP_V4_FLOW:
+		case IPV4_FLOW:
+			info->data |= RXH_IP_SRC | RXH_IP_DST;
+			min_revision = EF4_REV_FALCON_B0;
+			break;
+		default:
+			break;
+		}
+		if (ef4_nic_rev(efx) < min_revision)
+			info->data = 0;
+		return 0;
+	}
+
+	case ETHTOOL_GRXCLSRLCNT:
+		info->data = ef4_filter_get_rx_id_limit(efx);
+		if (info->data == 0)
+			return -EOPNOTSUPP;
+		info->data |= RX_CLS_LOC_SPECIAL;
+		info->rule_cnt =
+			ef4_filter_count_rx_used(efx, EF4_FILTER_PRI_MANUAL);
+		return 0;
+
+	case ETHTOOL_GRXCLSRULE:
+		if (ef4_filter_get_rx_id_limit(efx) == 0)
+			return -EOPNOTSUPP;
+		return ef4_ethtool_get_class_rule(efx, &info->fs);
+
+	case ETHTOOL_GRXCLSRLALL: {
+		s32 rc;
+		info->data = ef4_filter_get_rx_id_limit(efx);
+		if (info->data == 0)
+			return -EOPNOTSUPP;
+		rc = ef4_filter_get_rx_ids(efx, EF4_FILTER_PRI_MANUAL,
+					   rule_locs, info->rule_cnt);
+		if (rc < 0)
+			return rc;
+		info->rule_cnt = rc;
+		return 0;
+	}
+
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static inline bool ip6_mask_is_full(__be32 mask[4])
+{
+	return !~(mask[0] & mask[1] & mask[2] & mask[3]);
+}
+
+static inline bool ip6_mask_is_empty(__be32 mask[4])
+{
+	return !(mask[0] | mask[1] | mask[2] | mask[3]);
+}
+
+static int ef4_ethtool_set_class_rule(struct ef4_nic *efx,
+				      struct ethtool_rx_flow_spec *rule)
+{
+	struct ethtool_tcpip4_spec *ip_entry = &rule->h_u.tcp_ip4_spec;
+	struct ethtool_tcpip4_spec *ip_mask = &rule->m_u.tcp_ip4_spec;
+	struct ethtool_usrip4_spec *uip_entry = &rule->h_u.usr_ip4_spec;
+	struct ethtool_usrip4_spec *uip_mask = &rule->m_u.usr_ip4_spec;
+	struct ethtool_tcpip6_spec *ip6_entry = &rule->h_u.tcp_ip6_spec;
+	struct ethtool_tcpip6_spec *ip6_mask = &rule->m_u.tcp_ip6_spec;
+	struct ethtool_usrip6_spec *uip6_entry = &rule->h_u.usr_ip6_spec;
+	struct ethtool_usrip6_spec *uip6_mask = &rule->m_u.usr_ip6_spec;
+	struct ethhdr *mac_entry = &rule->h_u.ether_spec;
+	struct ethhdr *mac_mask = &rule->m_u.ether_spec;
+	struct ef4_filter_spec spec;
+	int rc;
+
+	/* Check that user wants us to choose the location */
+	if (rule->location != RX_CLS_LOC_ANY)
+		return -EINVAL;
+
+	/* Range-check ring_cookie */
+	if (rule->ring_cookie >= efx->n_rx_channels &&
+	    rule->ring_cookie != RX_CLS_FLOW_DISC)
+		return -EINVAL;
+
+	/* Check for unsupported extensions */
+	if ((rule->flow_type & FLOW_EXT) &&
+	    (rule->m_ext.vlan_etype || rule->m_ext.data[0] ||
+	     rule->m_ext.data[1]))
+		return -EINVAL;
+
+	ef4_filter_init_rx(&spec, EF4_FILTER_PRI_MANUAL,
+			   efx->rx_scatter ? EF4_FILTER_FLAG_RX_SCATTER : 0,
+			   (rule->ring_cookie == RX_CLS_FLOW_DISC) ?
+			   EF4_FILTER_RX_DMAQ_ID_DROP : rule->ring_cookie);
+
+	switch (rule->flow_type & ~FLOW_EXT) {
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW:
+		spec.match_flags = (EF4_FILTER_MATCH_ETHER_TYPE |
+				    EF4_FILTER_MATCH_IP_PROTO);
+		spec.ether_type = htons(ETH_P_IP);
+		spec.ip_proto = ((rule->flow_type & ~FLOW_EXT) == TCP_V4_FLOW ?
+				 IPPROTO_TCP : IPPROTO_UDP);
+		if (ip_mask->ip4dst) {
+			if (ip_mask->ip4dst != IP4_ADDR_FULL_MASK)
+				return -EINVAL;
+			spec.match_flags |= EF4_FILTER_MATCH_LOC_HOST;
+			spec.loc_host[0] = ip_entry->ip4dst;
+		}
+		if (ip_mask->ip4src) {
+			if (ip_mask->ip4src != IP4_ADDR_FULL_MASK)
+				return -EINVAL;
+			spec.match_flags |= EF4_FILTER_MATCH_REM_HOST;
+			spec.rem_host[0] = ip_entry->ip4src;
+		}
+		if (ip_mask->pdst) {
+			if (ip_mask->pdst != PORT_FULL_MASK)
+				return -EINVAL;
+			spec.match_flags |= EF4_FILTER_MATCH_LOC_PORT;
+			spec.loc_port = ip_entry->pdst;
+		}
+		if (ip_mask->psrc) {
+			if (ip_mask->psrc != PORT_FULL_MASK)
+				return -EINVAL;
+			spec.match_flags |= EF4_FILTER_MATCH_REM_PORT;
+			spec.rem_port = ip_entry->psrc;
+		}
+		if (ip_mask->tos)
+			return -EINVAL;
+		break;
+
+	case TCP_V6_FLOW:
+	case UDP_V6_FLOW:
+		spec.match_flags = (EF4_FILTER_MATCH_ETHER_TYPE |
+				    EF4_FILTER_MATCH_IP_PROTO);
+		spec.ether_type = htons(ETH_P_IPV6);
+		spec.ip_proto = ((rule->flow_type & ~FLOW_EXT) == TCP_V6_FLOW ?
+				 IPPROTO_TCP : IPPROTO_UDP);
+		if (!ip6_mask_is_empty(ip6_mask->ip6dst)) {
+			if (!ip6_mask_is_full(ip6_mask->ip6dst))
+				return -EINVAL;
+			spec.match_flags |= EF4_FILTER_MATCH_LOC_HOST;
+			memcpy(spec.loc_host, ip6_entry->ip6dst, sizeof(spec.loc_host));
+		}
+		if (!ip6_mask_is_empty(ip6_mask->ip6src)) {
+			if (!ip6_mask_is_full(ip6_mask->ip6src))
+				return -EINVAL;
+			spec.match_flags |= EF4_FILTER_MATCH_REM_HOST;
+			memcpy(spec.rem_host, ip6_entry->ip6src, sizeof(spec.rem_host));
+		}
+		if (ip6_mask->pdst) {
+			if (ip6_mask->pdst != PORT_FULL_MASK)
+				return -EINVAL;
+			spec.match_flags |= EF4_FILTER_MATCH_LOC_PORT;
+			spec.loc_port = ip6_entry->pdst;
+		}
+		if (ip6_mask->psrc) {
+			if (ip6_mask->psrc != PORT_FULL_MASK)
+				return -EINVAL;
+			spec.match_flags |= EF4_FILTER_MATCH_REM_PORT;
+			spec.rem_port = ip6_entry->psrc;
+		}
+		if (ip6_mask->tclass)
+			return -EINVAL;
+		break;
+
+	case IPV4_USER_FLOW:
+		if (uip_mask->l4_4_bytes || uip_mask->tos || uip_mask->ip_ver ||
+		    uip_entry->ip_ver != ETH_RX_NFC_IP4)
+			return -EINVAL;
+		spec.match_flags = EF4_FILTER_MATCH_ETHER_TYPE;
+		spec.ether_type = htons(ETH_P_IP);
+		if (uip_mask->ip4dst) {
+			if (uip_mask->ip4dst != IP4_ADDR_FULL_MASK)
+				return -EINVAL;
+			spec.match_flags |= EF4_FILTER_MATCH_LOC_HOST;
+			spec.loc_host[0] = uip_entry->ip4dst;
+		}
+		if (uip_mask->ip4src) {
+			if (uip_mask->ip4src != IP4_ADDR_FULL_MASK)
+				return -EINVAL;
+			spec.match_flags |= EF4_FILTER_MATCH_REM_HOST;
+			spec.rem_host[0] = uip_entry->ip4src;
+		}
+		if (uip_mask->proto) {
+			if (uip_mask->proto != IP_PROTO_FULL_MASK)
+				return -EINVAL;
+			spec.match_flags |= EF4_FILTER_MATCH_IP_PROTO;
+			spec.ip_proto = uip_entry->proto;
+		}
+		break;
+
+	case IPV6_USER_FLOW:
+		if (uip6_mask->l4_4_bytes || uip6_mask->tclass)
+			return -EINVAL;
+		spec.match_flags = EF4_FILTER_MATCH_ETHER_TYPE;
+		spec.ether_type = htons(ETH_P_IPV6);
+		if (!ip6_mask_is_empty(uip6_mask->ip6dst)) {
+			if (!ip6_mask_is_full(uip6_mask->ip6dst))
+				return -EINVAL;
+			spec.match_flags |= EF4_FILTER_MATCH_LOC_HOST;
+			memcpy(spec.loc_host, uip6_entry->ip6dst, sizeof(spec.loc_host));
+		}
+		if (!ip6_mask_is_empty(uip6_mask->ip6src)) {
+			if (!ip6_mask_is_full(uip6_mask->ip6src))
+				return -EINVAL;
+			spec.match_flags |= EF4_FILTER_MATCH_REM_HOST;
+			memcpy(spec.rem_host, uip6_entry->ip6src, sizeof(spec.rem_host));
+		}
+		if (uip6_mask->l4_proto) {
+			if (uip6_mask->l4_proto != IP_PROTO_FULL_MASK)
+				return -EINVAL;
+			spec.match_flags |= EF4_FILTER_MATCH_IP_PROTO;
+			spec.ip_proto = uip6_entry->l4_proto;
+		}
+		break;
+
+	case ETHER_FLOW:
+		if (!is_zero_ether_addr(mac_mask->h_dest)) {
+			if (ether_addr_equal(mac_mask->h_dest,
+					     mac_addr_ig_mask))
+				spec.match_flags |= EF4_FILTER_MATCH_LOC_MAC_IG;
+			else if (is_broadcast_ether_addr(mac_mask->h_dest))
+				spec.match_flags |= EF4_FILTER_MATCH_LOC_MAC;
+			else
+				return -EINVAL;
+			ether_addr_copy(spec.loc_mac, mac_entry->h_dest);
+		}
+		if (!is_zero_ether_addr(mac_mask->h_source)) {
+			if (!is_broadcast_ether_addr(mac_mask->h_source))
+				return -EINVAL;
+			spec.match_flags |= EF4_FILTER_MATCH_REM_MAC;
+			ether_addr_copy(spec.rem_mac, mac_entry->h_source);
+		}
+		if (mac_mask->h_proto) {
+			if (mac_mask->h_proto != ETHER_TYPE_FULL_MASK)
+				return -EINVAL;
+			spec.match_flags |= EF4_FILTER_MATCH_ETHER_TYPE;
+			spec.ether_type = mac_entry->h_proto;
+		}
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	if ((rule->flow_type & FLOW_EXT) && rule->m_ext.vlan_tci) {
+		if (rule->m_ext.vlan_tci != htons(0xfff))
+			return -EINVAL;
+		spec.match_flags |= EF4_FILTER_MATCH_OUTER_VID;
+		spec.outer_vid = rule->h_ext.vlan_tci;
+	}
+
+	rc = ef4_filter_insert_filter(efx, &spec, true);
+	if (rc < 0)
+		return rc;
+
+	rule->location = rc;
+	return 0;
+}
+
+static int ef4_ethtool_set_rxnfc(struct net_device *net_dev,
+				 struct ethtool_rxnfc *info)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+
+	if (ef4_filter_get_rx_id_limit(efx) == 0)
+		return -EOPNOTSUPP;
+
+	switch (info->cmd) {
+	case ETHTOOL_SRXCLSRLINS:
+		return ef4_ethtool_set_class_rule(efx, &info->fs);
+
+	case ETHTOOL_SRXCLSRLDEL:
+		return ef4_filter_remove_id_safe(efx, EF4_FILTER_PRI_MANUAL,
+						 info->fs.location);
+
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static u32 ef4_ethtool_get_rxfh_indir_size(struct net_device *net_dev)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+
+	return ((ef4_nic_rev(efx) < EF4_REV_FALCON_B0 ||
+		 efx->n_rx_channels == 1) ?
+		0 : ARRAY_SIZE(efx->rx_indir_table));
+}
+
+static int ef4_ethtool_get_rxfh(struct net_device *net_dev, u32 *indir, u8 *key,
+				u8 *hfunc)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+	if (indir)
+		memcpy(indir, efx->rx_indir_table, sizeof(efx->rx_indir_table));
+	return 0;
+}
+
+static int ef4_ethtool_set_rxfh(struct net_device *net_dev, const u32 *indir,
+				const u8 *key, const u8 hfunc)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+
+	/* We do not allow change in unsupported parameters */
+	if (key ||
+	    (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP))
+		return -EOPNOTSUPP;
+	if (!indir)
+		return 0;
+
+	return efx->type->rx_push_rss_config(efx, true, indir);
+}
+
+static int ef4_ethtool_get_module_eeprom(struct net_device *net_dev,
+					 struct ethtool_eeprom *ee,
+					 u8 *data)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+	int ret;
+
+	if (!efx->phy_op || !efx->phy_op->get_module_eeprom)
+		return -EOPNOTSUPP;
+
+	mutex_lock(&efx->mac_lock);
+	ret = efx->phy_op->get_module_eeprom(efx, ee, data);
+	mutex_unlock(&efx->mac_lock);
+
+	return ret;
+}
+
+static int ef4_ethtool_get_module_info(struct net_device *net_dev,
+				       struct ethtool_modinfo *modinfo)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+	int ret;
+
+	if (!efx->phy_op || !efx->phy_op->get_module_info)
+		return -EOPNOTSUPP;
+
+	mutex_lock(&efx->mac_lock);
+	ret = efx->phy_op->get_module_info(efx, modinfo);
+	mutex_unlock(&efx->mac_lock);
+
+	return ret;
+}
+
+const struct ethtool_ops ef4_ethtool_ops = {
+	.get_settings		= ef4_ethtool_get_settings,
+	.set_settings		= ef4_ethtool_set_settings,
+	.get_drvinfo		= ef4_ethtool_get_drvinfo,
+	.get_regs_len		= ef4_ethtool_get_regs_len,
+	.get_regs		= ef4_ethtool_get_regs,
+	.get_msglevel		= ef4_ethtool_get_msglevel,
+	.set_msglevel		= ef4_ethtool_set_msglevel,
+	.nway_reset		= ef4_ethtool_nway_reset,
+	.get_link		= ethtool_op_get_link,
+	.get_coalesce		= ef4_ethtool_get_coalesce,
+	.set_coalesce		= ef4_ethtool_set_coalesce,
+	.get_ringparam		= ef4_ethtool_get_ringparam,
+	.set_ringparam		= ef4_ethtool_set_ringparam,
+	.get_pauseparam         = ef4_ethtool_get_pauseparam,
+	.set_pauseparam         = ef4_ethtool_set_pauseparam,
+	.get_sset_count		= ef4_ethtool_get_sset_count,
+	.self_test		= ef4_ethtool_self_test,
+	.get_strings		= ef4_ethtool_get_strings,
+	.set_phys_id		= ef4_ethtool_phys_id,
+	.get_ethtool_stats	= ef4_ethtool_get_stats,
+	.get_wol                = ef4_ethtool_get_wol,
+	.set_wol                = ef4_ethtool_set_wol,
+	.reset			= ef4_ethtool_reset,
+	.get_rxnfc		= ef4_ethtool_get_rxnfc,
+	.set_rxnfc		= ef4_ethtool_set_rxnfc,
+	.get_rxfh_indir_size	= ef4_ethtool_get_rxfh_indir_size,
+	.get_rxfh		= ef4_ethtool_get_rxfh,
+	.set_rxfh		= ef4_ethtool_set_rxfh,
+	.get_module_info	= ef4_ethtool_get_module_info,
+	.get_module_eeprom	= ef4_ethtool_get_module_eeprom,
+};
diff --git a/drivers/net/ethernet/sfc/falcon.c b/drivers/net/ethernet/sfc/falcon/falcon.c
index 1a7092602aec..c6ff0cc5ef18 100644
--- a/drivers/net/ethernet/sfc/falcon.c
+++ b/drivers/net/ethernet/sfc/falcon/falcon.c
@@ -145,7 +145,7 @@
 #define GENERIC_SW_STAT(ext_name)				\
 	[GENERIC_STAT_ ## ext_name] = { #ext_name, 0, 0 }
 
-static const struct efx_hw_stat_desc falcon_stat_desc[FALCON_STAT_COUNT] = {
+static const struct ef4_hw_stat_desc falcon_stat_desc[FALCON_STAT_COUNT] = {
 	FALCON_DMA_STAT(tx_bytes, XgTxOctets),
 	FALCON_DMA_STAT(tx_packets, XgTxPkts),
 	FALCON_DMA_STAT(tx_pause, XgTxPausePkts),
@@ -273,34 +273,34 @@ struct falcon_nvconfig_board_v3 {
 #define SPI_DEV_TYPE_BLOCK_SIZE_LBN 24
 #define SPI_DEV_TYPE_BLOCK_SIZE_WIDTH 5
 #define SPI_DEV_TYPE_FIELD(type, field)					\
-	(((type) >> EFX_LOW_BIT(field)) & EFX_MASK32(EFX_WIDTH(field)))
+	(((type) >> EF4_LOW_BIT(field)) & EF4_MASK32(EF4_WIDTH(field)))
 
 #define FALCON_NVCONFIG_OFFSET 0x300
 
 #define FALCON_NVCONFIG_BOARD_MAGIC_NUM 0xFA1C
 struct falcon_nvconfig {
-	efx_oword_t ee_vpd_cfg_reg;			/* 0x300 */
+	ef4_oword_t ee_vpd_cfg_reg;			/* 0x300 */
 	u8 mac_address[2][8];			/* 0x310 */
-	efx_oword_t pcie_sd_ctl0123_reg;		/* 0x320 */
-	efx_oword_t pcie_sd_ctl45_reg;			/* 0x330 */
-	efx_oword_t pcie_pcs_ctl_stat_reg;		/* 0x340 */
-	efx_oword_t hw_init_reg;			/* 0x350 */
-	efx_oword_t nic_stat_reg;			/* 0x360 */
-	efx_oword_t glb_ctl_reg;			/* 0x370 */
-	efx_oword_t srm_cfg_reg;			/* 0x380 */
-	efx_oword_t spare_reg;				/* 0x390 */
+	ef4_oword_t pcie_sd_ctl0123_reg;		/* 0x320 */
+	ef4_oword_t pcie_sd_ctl45_reg;			/* 0x330 */
+	ef4_oword_t pcie_pcs_ctl_stat_reg;		/* 0x340 */
+	ef4_oword_t hw_init_reg;			/* 0x350 */
+	ef4_oword_t nic_stat_reg;			/* 0x360 */
+	ef4_oword_t glb_ctl_reg;			/* 0x370 */
+	ef4_oword_t srm_cfg_reg;			/* 0x380 */
+	ef4_oword_t spare_reg;				/* 0x390 */
 	__le16 board_magic_num;			/* 0x3A0 */
 	__le16 board_struct_ver;
 	__le16 board_checksum;
 	struct falcon_nvconfig_board_v2 board_v2;
-	efx_oword_t ee_base_page_reg;			/* 0x3B0 */
+	ef4_oword_t ee_base_page_reg;			/* 0x3B0 */
 	struct falcon_nvconfig_board_v3 board_v3;	/* 0x3C0 */
 } __packed;
 
 /*************************************************************************/
 
-static int falcon_reset_hw(struct efx_nic *efx, enum reset_type method);
-static void falcon_reconfigure_mac_wrapper(struct efx_nic *efx);
+static int falcon_reset_hw(struct ef4_nic *efx, enum reset_type method);
+static void falcon_reconfigure_mac_wrapper(struct ef4_nic *efx);
 
 static const unsigned int
 /* "Large" EEPROM device: Atmel AT25640 or similar
@@ -326,40 +326,40 @@ default_flash_type = ((17 << SPI_DEV_TYPE_SIZE_LBN)
  */
 static void falcon_setsda(void *data, int state)
 {
-	struct efx_nic *efx = (struct efx_nic *)data;
-	efx_oword_t reg;
+	struct ef4_nic *efx = (struct ef4_nic *)data;
+	ef4_oword_t reg;
 
-	efx_reado(efx, &reg, FR_AB_GPIO_CTL);
-	EFX_SET_OWORD_FIELD(reg, FRF_AB_GPIO3_OEN, !state);
-	efx_writeo(efx, &reg, FR_AB_GPIO_CTL);
+	ef4_reado(efx, &reg, FR_AB_GPIO_CTL);
+	EF4_SET_OWORD_FIELD(reg, FRF_AB_GPIO3_OEN, !state);
+	ef4_writeo(efx, &reg, FR_AB_GPIO_CTL);
 }
 
 static void falcon_setscl(void *data, int state)
 {
-	struct efx_nic *efx = (struct efx_nic *)data;
-	efx_oword_t reg;
+	struct ef4_nic *efx = (struct ef4_nic *)data;
+	ef4_oword_t reg;
 
-	efx_reado(efx, &reg, FR_AB_GPIO_CTL);
-	EFX_SET_OWORD_FIELD(reg, FRF_AB_GPIO0_OEN, !state);
-	efx_writeo(efx, &reg, FR_AB_GPIO_CTL);
+	ef4_reado(efx, &reg, FR_AB_GPIO_CTL);
+	EF4_SET_OWORD_FIELD(reg, FRF_AB_GPIO0_OEN, !state);
+	ef4_writeo(efx, &reg, FR_AB_GPIO_CTL);
 }
 
 static int falcon_getsda(void *data)
 {
-	struct efx_nic *efx = (struct efx_nic *)data;
-	efx_oword_t reg;
+	struct ef4_nic *efx = (struct ef4_nic *)data;
+	ef4_oword_t reg;
 
-	efx_reado(efx, &reg, FR_AB_GPIO_CTL);
-	return EFX_OWORD_FIELD(reg, FRF_AB_GPIO3_IN);
+	ef4_reado(efx, &reg, FR_AB_GPIO_CTL);
+	return EF4_OWORD_FIELD(reg, FRF_AB_GPIO3_IN);
 }
 
 static int falcon_getscl(void *data)
 {
-	struct efx_nic *efx = (struct efx_nic *)data;
-	efx_oword_t reg;
+	struct ef4_nic *efx = (struct ef4_nic *)data;
+	ef4_oword_t reg;
 
-	efx_reado(efx, &reg, FR_AB_GPIO_CTL);
-	return EFX_OWORD_FIELD(reg, FRF_AB_GPIO0_IN);
+	ef4_reado(efx, &reg, FR_AB_GPIO_CTL);
+	return EF4_OWORD_FIELD(reg, FRF_AB_GPIO0_IN);
 }
 
 static const struct i2c_algo_bit_data falcon_i2c_bit_operations = {
@@ -372,35 +372,35 @@ static const struct i2c_algo_bit_data falcon_i2c_bit_operations = {
 	.timeout	= DIV_ROUND_UP(HZ, 20),
 };
 
-static void falcon_push_irq_moderation(struct efx_channel *channel)
+static void falcon_push_irq_moderation(struct ef4_channel *channel)
 {
-	efx_dword_t timer_cmd;
-	struct efx_nic *efx = channel->efx;
+	ef4_dword_t timer_cmd;
+	struct ef4_nic *efx = channel->efx;
 
 	/* Set timer register */
 	if (channel->irq_moderation_us) {
 		unsigned int ticks;
 
-		ticks = efx_usecs_to_ticks(efx, channel->irq_moderation_us);
-		EFX_POPULATE_DWORD_2(timer_cmd,
+		ticks = ef4_usecs_to_ticks(efx, channel->irq_moderation_us);
+		EF4_POPULATE_DWORD_2(timer_cmd,
 				     FRF_AB_TC_TIMER_MODE,
 				     FFE_BB_TIMER_MODE_INT_HLDOFF,
 				     FRF_AB_TC_TIMER_VAL,
 				     ticks - 1);
 	} else {
-		EFX_POPULATE_DWORD_2(timer_cmd,
+		EF4_POPULATE_DWORD_2(timer_cmd,
 				     FRF_AB_TC_TIMER_MODE,
 				     FFE_BB_TIMER_MODE_DIS,
 				     FRF_AB_TC_TIMER_VAL, 0);
 	}
 	BUILD_BUG_ON(FR_AA_TIMER_COMMAND_KER != FR_BZ_TIMER_COMMAND_P0);
-	efx_writed_page_locked(efx, &timer_cmd, FR_BZ_TIMER_COMMAND_P0,
+	ef4_writed_page_locked(efx, &timer_cmd, FR_BZ_TIMER_COMMAND_P0,
 			       channel->channel);
 }
 
-static void falcon_deconfigure_mac_wrapper(struct efx_nic *efx);
+static void falcon_deconfigure_mac_wrapper(struct ef4_nic *efx);
 
-static void falcon_prepare_flush(struct efx_nic *efx)
+static void falcon_prepare_flush(struct ef4_nic *efx)
 {
 	falcon_deconfigure_mac_wrapper(efx);
 
@@ -420,26 +420,26 @@ static void falcon_prepare_flush(struct efx_nic *efx)
  *
  * NB most hardware supports MSI interrupts
  */
-static inline void falcon_irq_ack_a1(struct efx_nic *efx)
+static inline void falcon_irq_ack_a1(struct ef4_nic *efx)
 {
-	efx_dword_t reg;
+	ef4_dword_t reg;
 
-	EFX_POPULATE_DWORD_1(reg, FRF_AA_INT_ACK_KER_FIELD, 0xb7eb7e);
-	efx_writed(efx, &reg, FR_AA_INT_ACK_KER);
-	efx_readd(efx, &reg, FR_AA_WORK_AROUND_BROKEN_PCI_READS);
+	EF4_POPULATE_DWORD_1(reg, FRF_AA_INT_ACK_KER_FIELD, 0xb7eb7e);
+	ef4_writed(efx, &reg, FR_AA_INT_ACK_KER);
+	ef4_readd(efx, &reg, FR_AA_WORK_AROUND_BROKEN_PCI_READS);
 }
 
 static irqreturn_t falcon_legacy_interrupt_a1(int irq, void *dev_id)
 {
-	struct efx_nic *efx = dev_id;
-	efx_oword_t *int_ker = efx->irq_status.addr;
+	struct ef4_nic *efx = dev_id;
+	ef4_oword_t *int_ker = efx->irq_status.addr;
 	int syserr;
 	int queues;
 
 	/* Check to see if this is our interrupt.  If it isn't, we
 	 * exit without having touched the hardware.
 	 */
-	if (unlikely(EFX_OWORD_IS_ZERO(*int_ker))) {
+	if (unlikely(EF4_OWORD_IS_ZERO(*int_ker))) {
 		netif_vdbg(efx, intr, efx->net_dev,
 			   "IRQ %d on CPU %d not for me\n", irq,
 			   raw_smp_processor_id());
@@ -447,30 +447,30 @@ static irqreturn_t falcon_legacy_interrupt_a1(int irq, void *dev_id)
 	}
 	efx->last_irq_cpu = raw_smp_processor_id();
 	netif_vdbg(efx, intr, efx->net_dev,
-		   "IRQ %d on CPU %d status " EFX_OWORD_FMT "\n",
-		   irq, raw_smp_processor_id(), EFX_OWORD_VAL(*int_ker));
+		   "IRQ %d on CPU %d status " EF4_OWORD_FMT "\n",
+		   irq, raw_smp_processor_id(), EF4_OWORD_VAL(*int_ker));
 
 	if (!likely(ACCESS_ONCE(efx->irq_soft_enabled)))
 		return IRQ_HANDLED;
 
 	/* Check to see if we have a serious error condition */
-	syserr = EFX_OWORD_FIELD(*int_ker, FSF_AZ_NET_IVEC_FATAL_INT);
+	syserr = EF4_OWORD_FIELD(*int_ker, FSF_AZ_NET_IVEC_FATAL_INT);
 	if (unlikely(syserr))
-		return efx_farch_fatal_interrupt(efx);
+		return ef4_farch_fatal_interrupt(efx);
 
 	/* Determine interrupting queues, clear interrupt status
 	 * register and acknowledge the device interrupt.
 	 */
-	BUILD_BUG_ON(FSF_AZ_NET_IVEC_INT_Q_WIDTH > EFX_MAX_CHANNELS);
-	queues = EFX_OWORD_FIELD(*int_ker, FSF_AZ_NET_IVEC_INT_Q);
-	EFX_ZERO_OWORD(*int_ker);
+	BUILD_BUG_ON(FSF_AZ_NET_IVEC_INT_Q_WIDTH > EF4_MAX_CHANNELS);
+	queues = EF4_OWORD_FIELD(*int_ker, FSF_AZ_NET_IVEC_INT_Q);
+	EF4_ZERO_OWORD(*int_ker);
 	wmb(); /* Ensure the vector is cleared before interrupt ack */
 	falcon_irq_ack_a1(efx);
 
 	if (queues & 1)
-		efx_schedule_channel_irq(efx_get_channel(efx, 0));
+		ef4_schedule_channel_irq(ef4_get_channel(efx, 0));
 	if (queues & 2)
-		efx_schedule_channel_irq(efx_get_channel(efx, 1));
+		ef4_schedule_channel_irq(ef4_get_channel(efx, 1));
 	return IRQ_HANDLED;
 }
 
@@ -480,7 +480,7 @@ static irqreturn_t falcon_legacy_interrupt_a1(int irq, void *dev_id)
  *
  **************************************************************************
  */
-static int dummy_rx_push_rss_config(struct efx_nic *efx, bool user,
+static int dummy_rx_push_rss_config(struct ef4_nic *efx, bool user,
 				    const u32 *rx_indir_table)
 {
 	(void) efx;
@@ -489,19 +489,19 @@ static int dummy_rx_push_rss_config(struct efx_nic *efx, bool user,
 	return -ENOSYS;
 }
 
-static int falcon_b0_rx_push_rss_config(struct efx_nic *efx, bool user,
+static int falcon_b0_rx_push_rss_config(struct ef4_nic *efx, bool user,
 					const u32 *rx_indir_table)
 {
-	efx_oword_t temp;
+	ef4_oword_t temp;
 
 	(void) user;
 	/* Set hash key for IPv4 */
 	memcpy(&temp, efx->rx_hash_key, sizeof(temp));
-	efx_writeo(efx, &temp, FR_BZ_RX_RSS_TKEY);
+	ef4_writeo(efx, &temp, FR_BZ_RX_RSS_TKEY);
 
 	memcpy(efx->rx_indir_table, rx_indir_table,
 	       sizeof(efx->rx_indir_table));
-	efx_farch_rx_push_indir_table(efx);
+	ef4_farch_rx_push_indir_table(efx);
 	return 0;
 }
 
@@ -512,17 +512,17 @@ static int falcon_b0_rx_push_rss_config(struct efx_nic *efx, bool user,
  **************************************************************************
  */
 
-#define FALCON_SPI_MAX_LEN sizeof(efx_oword_t)
+#define FALCON_SPI_MAX_LEN sizeof(ef4_oword_t)
 
-static int falcon_spi_poll(struct efx_nic *efx)
+static int falcon_spi_poll(struct ef4_nic *efx)
 {
-	efx_oword_t reg;
-	efx_reado(efx, &reg, FR_AB_EE_SPI_HCMD);
-	return EFX_OWORD_FIELD(reg, FRF_AB_EE_SPI_HCMD_CMD_EN) ? -EBUSY : 0;
+	ef4_oword_t reg;
+	ef4_reado(efx, &reg, FR_AB_EE_SPI_HCMD);
+	return EF4_OWORD_FIELD(reg, FRF_AB_EE_SPI_HCMD_CMD_EN) ? -EBUSY : 0;
 }
 
 /* Wait for SPI command completion */
-static int falcon_spi_wait(struct efx_nic *efx)
+static int falcon_spi_wait(struct ef4_nic *efx)
 {
 	/* Most commands will finish quickly, so we start polling at
 	 * very short intervals.  Sometimes the command may have to
@@ -550,13 +550,13 @@ static int falcon_spi_wait(struct efx_nic *efx)
 }
 
 static int
-falcon_spi_cmd(struct efx_nic *efx, const struct falcon_spi_device *spi,
+falcon_spi_cmd(struct ef4_nic *efx, const struct falcon_spi_device *spi,
 	       unsigned int command, int address,
 	       const void *in, void *out, size_t len)
 {
 	bool addressed = (address >= 0);
 	bool reading = (out != NULL);
-	efx_oword_t reg;
+	ef4_oword_t reg;
 	int rc;
 
 	/* Input validation */
@@ -570,18 +570,18 @@ falcon_spi_cmd(struct efx_nic *efx, const struct falcon_spi_device *spi,
 
 	/* Program address register, if we have an address */
 	if (addressed) {
-		EFX_POPULATE_OWORD_1(reg, FRF_AB_EE_SPI_HADR_ADR, address);
-		efx_writeo(efx, &reg, FR_AB_EE_SPI_HADR);
+		EF4_POPULATE_OWORD_1(reg, FRF_AB_EE_SPI_HADR_ADR, address);
+		ef4_writeo(efx, &reg, FR_AB_EE_SPI_HADR);
 	}
 
 	/* Program data register, if we have data */
 	if (in != NULL) {
 		memcpy(&reg, in, len);
-		efx_writeo(efx, &reg, FR_AB_EE_SPI_HDATA);
+		ef4_writeo(efx, &reg, FR_AB_EE_SPI_HDATA);
 	}
 
 	/* Issue read/write command */
-	EFX_POPULATE_OWORD_7(reg,
+	EF4_POPULATE_OWORD_7(reg,
 			     FRF_AB_EE_SPI_HCMD_CMD_EN, 1,
 			     FRF_AB_EE_SPI_HCMD_SF_SEL, spi->device_id,
 			     FRF_AB_EE_SPI_HCMD_DABCNT, len,
@@ -590,7 +590,7 @@ falcon_spi_cmd(struct efx_nic *efx, const struct falcon_spi_device *spi,
 			     FRF_AB_EE_SPI_HCMD_ADBCNT,
 			     (addressed ? spi->addr_len : 0),
 			     FRF_AB_EE_SPI_HCMD_ENC, command);
-	efx_writeo(efx, &reg, FR_AB_EE_SPI_HCMD);
+	ef4_writeo(efx, &reg, FR_AB_EE_SPI_HCMD);
 
 	/* Wait for read/write to complete */
 	rc = falcon_spi_wait(efx);
@@ -599,7 +599,7 @@ falcon_spi_cmd(struct efx_nic *efx, const struct falcon_spi_device *spi,
 
 	/* Read data */
 	if (out != NULL) {
-		efx_reado(efx, &reg, FR_AB_EE_SPI_HDATA);
+		ef4_reado(efx, &reg, FR_AB_EE_SPI_HDATA);
 		memcpy(out, &reg, len);
 	}
 
@@ -614,7 +614,7 @@ falcon_spi_munge_command(const struct falcon_spi_device *spi,
 }
 
 static int
-falcon_spi_read(struct efx_nic *efx, const struct falcon_spi_device *spi,
+falcon_spi_read(struct ef4_nic *efx, const struct falcon_spi_device *spi,
 		loff_t start, size_t len, size_t *retlen, u8 *buffer)
 {
 	size_t block_len, pos = 0;
@@ -644,10 +644,10 @@ falcon_spi_read(struct efx_nic *efx, const struct falcon_spi_device *spi,
 	return rc;
 }
 
-#ifdef CONFIG_SFC_MTD
+#ifdef CONFIG_SFC_FALCON_MTD
 
 struct falcon_mtd_partition {
-	struct efx_mtd_partition common;
+	struct ef4_mtd_partition common;
 	const struct falcon_spi_device *spi;
 	size_t offset;
 };
@@ -664,7 +664,7 @@ falcon_spi_write_limit(const struct falcon_spi_device *spi, size_t start)
 
 /* Wait up to 10 ms for buffered write completion */
 static int
-falcon_spi_wait_write(struct efx_nic *efx, const struct falcon_spi_device *spi)
+falcon_spi_wait_write(struct ef4_nic *efx, const struct falcon_spi_device *spi)
 {
 	unsigned long timeout = jiffies + 1 + DIV_ROUND_UP(HZ, 100);
 	u8 status;
@@ -689,7 +689,7 @@ falcon_spi_wait_write(struct efx_nic *efx, const struct falcon_spi_device *spi)
 }
 
 static int
-falcon_spi_write(struct efx_nic *efx, const struct falcon_spi_device *spi,
+falcon_spi_write(struct ef4_nic *efx, const struct falcon_spi_device *spi,
 		 loff_t start, size_t len, size_t *retlen, const u8 *buffer)
 {
 	u8 verify_buffer[FALCON_SPI_MAX_LEN];
@@ -741,7 +741,7 @@ static int
 falcon_spi_slow_wait(struct falcon_mtd_partition *part, bool uninterruptible)
 {
 	const struct falcon_spi_device *spi = part->spi;
-	struct efx_nic *efx = part->common.mtd.priv;
+	struct ef4_nic *efx = part->common.mtd.priv;
 	u8 status;
 	int rc, i;
 
@@ -765,7 +765,7 @@ falcon_spi_slow_wait(struct falcon_mtd_partition *part, bool uninterruptible)
 }
 
 static int
-falcon_spi_unlock(struct efx_nic *efx, const struct falcon_spi_device *spi)
+falcon_spi_unlock(struct ef4_nic *efx, const struct falcon_spi_device *spi)
 {
 	const u8 unlock_mask = (SPI_STATUS_BP2 | SPI_STATUS_BP1 |
 				SPI_STATUS_BP0);
@@ -805,7 +805,7 @@ static int
 falcon_spi_erase(struct falcon_mtd_partition *part, loff_t start, size_t len)
 {
 	const struct falcon_spi_device *spi = part->spi;
-	struct efx_nic *efx = part->common.mtd.priv;
+	struct ef4_nic *efx = part->common.mtd.priv;
 	unsigned pos, block_len;
 	u8 empty[FALCON_SPI_VERIFY_BUF_LEN];
 	u8 buffer[FALCON_SPI_VERIFY_BUF_LEN];
@@ -849,9 +849,9 @@ falcon_spi_erase(struct falcon_mtd_partition *part, loff_t start, size_t len)
 	return rc;
 }
 
-static void falcon_mtd_rename(struct efx_mtd_partition *part)
+static void falcon_mtd_rename(struct ef4_mtd_partition *part)
 {
-	struct efx_nic *efx = part->mtd.priv;
+	struct ef4_nic *efx = part->mtd.priv;
 
 	snprintf(part->name, sizeof(part->name), "%s %s",
 		 efx->name, part->type_name);
@@ -861,7 +861,7 @@ static int falcon_mtd_read(struct mtd_info *mtd, loff_t start,
 			   size_t len, size_t *retlen, u8 *buffer)
 {
 	struct falcon_mtd_partition *part = to_falcon_mtd_partition(mtd);
-	struct efx_nic *efx = mtd->priv;
+	struct ef4_nic *efx = mtd->priv;
 	struct falcon_nic_data *nic_data = efx->nic_data;
 	int rc;
 
@@ -877,7 +877,7 @@ static int falcon_mtd_read(struct mtd_info *mtd, loff_t start,
 static int falcon_mtd_erase(struct mtd_info *mtd, loff_t start, size_t len)
 {
 	struct falcon_mtd_partition *part = to_falcon_mtd_partition(mtd);
-	struct efx_nic *efx = mtd->priv;
+	struct ef4_nic *efx = mtd->priv;
 	struct falcon_nic_data *nic_data = efx->nic_data;
 	int rc;
 
@@ -893,7 +893,7 @@ static int falcon_mtd_write(struct mtd_info *mtd, loff_t start,
 			    size_t len, size_t *retlen, const u8 *buffer)
 {
 	struct falcon_mtd_partition *part = to_falcon_mtd_partition(mtd);
-	struct efx_nic *efx = mtd->priv;
+	struct ef4_nic *efx = mtd->priv;
 	struct falcon_nic_data *nic_data = efx->nic_data;
 	int rc;
 
@@ -909,7 +909,7 @@ static int falcon_mtd_write(struct mtd_info *mtd, loff_t start,
 static int falcon_mtd_sync(struct mtd_info *mtd)
 {
 	struct falcon_mtd_partition *part = to_falcon_mtd_partition(mtd);
-	struct efx_nic *efx = mtd->priv;
+	struct ef4_nic *efx = mtd->priv;
 	struct falcon_nic_data *nic_data = efx->nic_data;
 	int rc;
 
@@ -919,7 +919,7 @@ static int falcon_mtd_sync(struct mtd_info *mtd)
 	return rc;
 }
 
-static int falcon_mtd_probe(struct efx_nic *efx)
+static int falcon_mtd_probe(struct ef4_nic *efx)
 {
 	struct falcon_nic_data *nic_data = efx->nic_data;
 	struct falcon_mtd_partition *parts;
@@ -963,13 +963,13 @@ static int falcon_mtd_probe(struct efx_nic *efx)
 		n_parts++;
 	}
 
-	rc = efx_mtd_add(efx, &parts[0].common, n_parts, sizeof(*parts));
+	rc = ef4_mtd_add(efx, &parts[0].common, n_parts, sizeof(*parts));
 	if (rc)
 		kfree(parts);
 	return rc;
 }
 
-#endif /* CONFIG_SFC_MTD */
+#endif /* CONFIG_SFC_FALCON_MTD */
 
 /**************************************************************************
  *
@@ -979,27 +979,27 @@ static int falcon_mtd_probe(struct efx_nic *efx)
  */
 
 /* Configure the XAUI driver that is an output from Falcon */
-static void falcon_setup_xaui(struct efx_nic *efx)
+static void falcon_setup_xaui(struct ef4_nic *efx)
 {
-	efx_oword_t sdctl, txdrv;
+	ef4_oword_t sdctl, txdrv;
 
 	/* Move the XAUI into low power, unless there is no PHY, in
 	 * which case the XAUI will have to drive a cable. */
 	if (efx->phy_type == PHY_TYPE_NONE)
 		return;
 
-	efx_reado(efx, &sdctl, FR_AB_XX_SD_CTL);
-	EFX_SET_OWORD_FIELD(sdctl, FRF_AB_XX_HIDRVD, FFE_AB_XX_SD_CTL_DRV_DEF);
-	EFX_SET_OWORD_FIELD(sdctl, FRF_AB_XX_LODRVD, FFE_AB_XX_SD_CTL_DRV_DEF);
-	EFX_SET_OWORD_FIELD(sdctl, FRF_AB_XX_HIDRVC, FFE_AB_XX_SD_CTL_DRV_DEF);
-	EFX_SET_OWORD_FIELD(sdctl, FRF_AB_XX_LODRVC, FFE_AB_XX_SD_CTL_DRV_DEF);
-	EFX_SET_OWORD_FIELD(sdctl, FRF_AB_XX_HIDRVB, FFE_AB_XX_SD_CTL_DRV_DEF);
-	EFX_SET_OWORD_FIELD(sdctl, FRF_AB_XX_LODRVB, FFE_AB_XX_SD_CTL_DRV_DEF);
-	EFX_SET_OWORD_FIELD(sdctl, FRF_AB_XX_HIDRVA, FFE_AB_XX_SD_CTL_DRV_DEF);
-	EFX_SET_OWORD_FIELD(sdctl, FRF_AB_XX_LODRVA, FFE_AB_XX_SD_CTL_DRV_DEF);
-	efx_writeo(efx, &sdctl, FR_AB_XX_SD_CTL);
-
-	EFX_POPULATE_OWORD_8(txdrv,
+	ef4_reado(efx, &sdctl, FR_AB_XX_SD_CTL);
+	EF4_SET_OWORD_FIELD(sdctl, FRF_AB_XX_HIDRVD, FFE_AB_XX_SD_CTL_DRV_DEF);
+	EF4_SET_OWORD_FIELD(sdctl, FRF_AB_XX_LODRVD, FFE_AB_XX_SD_CTL_DRV_DEF);
+	EF4_SET_OWORD_FIELD(sdctl, FRF_AB_XX_HIDRVC, FFE_AB_XX_SD_CTL_DRV_DEF);
+	EF4_SET_OWORD_FIELD(sdctl, FRF_AB_XX_LODRVC, FFE_AB_XX_SD_CTL_DRV_DEF);
+	EF4_SET_OWORD_FIELD(sdctl, FRF_AB_XX_HIDRVB, FFE_AB_XX_SD_CTL_DRV_DEF);
+	EF4_SET_OWORD_FIELD(sdctl, FRF_AB_XX_LODRVB, FFE_AB_XX_SD_CTL_DRV_DEF);
+	EF4_SET_OWORD_FIELD(sdctl, FRF_AB_XX_HIDRVA, FFE_AB_XX_SD_CTL_DRV_DEF);
+	EF4_SET_OWORD_FIELD(sdctl, FRF_AB_XX_LODRVA, FFE_AB_XX_SD_CTL_DRV_DEF);
+	ef4_writeo(efx, &sdctl, FR_AB_XX_SD_CTL);
+
+	EF4_POPULATE_OWORD_8(txdrv,
 			     FRF_AB_XX_DEQD, FFE_AB_XX_TXDRV_DEQ_DEF,
 			     FRF_AB_XX_DEQC, FFE_AB_XX_TXDRV_DEQ_DEF,
 			     FRF_AB_XX_DEQB, FFE_AB_XX_TXDRV_DEQ_DEF,
@@ -1008,27 +1008,27 @@ static void falcon_setup_xaui(struct efx_nic *efx)
 			     FRF_AB_XX_DTXC, FFE_AB_XX_TXDRV_DTX_DEF,
 			     FRF_AB_XX_DTXB, FFE_AB_XX_TXDRV_DTX_DEF,
 			     FRF_AB_XX_DTXA, FFE_AB_XX_TXDRV_DTX_DEF);
-	efx_writeo(efx, &txdrv, FR_AB_XX_TXDRV_CTL);
+	ef4_writeo(efx, &txdrv, FR_AB_XX_TXDRV_CTL);
 }
 
-int falcon_reset_xaui(struct efx_nic *efx)
+int falcon_reset_xaui(struct ef4_nic *efx)
 {
 	struct falcon_nic_data *nic_data = efx->nic_data;
-	efx_oword_t reg;
+	ef4_oword_t reg;
 	int count;
 
 	/* Don't fetch MAC statistics over an XMAC reset */
 	WARN_ON(nic_data->stats_disable_count == 0);
 
 	/* Start reset sequence */
-	EFX_POPULATE_OWORD_1(reg, FRF_AB_XX_RST_XX_EN, 1);
-	efx_writeo(efx, &reg, FR_AB_XX_PWR_RST);
+	EF4_POPULATE_OWORD_1(reg, FRF_AB_XX_RST_XX_EN, 1);
+	ef4_writeo(efx, &reg, FR_AB_XX_PWR_RST);
 
 	/* Wait up to 10 ms for completion, then reinitialise */
 	for (count = 0; count < 1000; count++) {
-		efx_reado(efx, &reg, FR_AB_XX_PWR_RST);
-		if (EFX_OWORD_FIELD(reg, FRF_AB_XX_RST_XX_EN) == 0 &&
-		    EFX_OWORD_FIELD(reg, FRF_AB_XX_SD_RST_ACT) == 0) {
+		ef4_reado(efx, &reg, FR_AB_XX_PWR_RST);
+		if (EF4_OWORD_FIELD(reg, FRF_AB_XX_RST_XX_EN) == 0 &&
+		    EF4_OWORD_FIELD(reg, FRF_AB_XX_SD_RST_ACT) == 0) {
 			falcon_setup_xaui(efx);
 			return 0;
 		}
@@ -1039,12 +1039,12 @@ int falcon_reset_xaui(struct efx_nic *efx)
 	return -ETIMEDOUT;
 }
 
-static void falcon_ack_status_intr(struct efx_nic *efx)
+static void falcon_ack_status_intr(struct ef4_nic *efx)
 {
 	struct falcon_nic_data *nic_data = efx->nic_data;
-	efx_oword_t reg;
+	ef4_oword_t reg;
 
-	if ((efx_nic_rev(efx) != EFX_REV_FALCON_B0) || LOOPBACK_INTERNAL(efx))
+	if ((ef4_nic_rev(efx) != EF4_REV_FALCON_B0) || LOOPBACK_INTERNAL(efx))
 		return;
 
 	/* We expect xgmii faults if the wireside link is down */
@@ -1056,33 +1056,33 @@ static void falcon_ack_status_intr(struct efx_nic *efx)
 	if (nic_data->xmac_poll_required)
 		return;
 
-	efx_reado(efx, &reg, FR_AB_XM_MGT_INT_MSK);
+	ef4_reado(efx, &reg, FR_AB_XM_MGT_INT_MSK);
 }
 
-static bool falcon_xgxs_link_ok(struct efx_nic *efx)
+static bool falcon_xgxs_link_ok(struct ef4_nic *efx)
 {
-	efx_oword_t reg;
+	ef4_oword_t reg;
 	bool align_done, link_ok = false;
 	int sync_status;
 
 	/* Read link status */
-	efx_reado(efx, &reg, FR_AB_XX_CORE_STAT);
+	ef4_reado(efx, &reg, FR_AB_XX_CORE_STAT);
 
-	align_done = EFX_OWORD_FIELD(reg, FRF_AB_XX_ALIGN_DONE);
-	sync_status = EFX_OWORD_FIELD(reg, FRF_AB_XX_SYNC_STAT);
+	align_done = EF4_OWORD_FIELD(reg, FRF_AB_XX_ALIGN_DONE);
+	sync_status = EF4_OWORD_FIELD(reg, FRF_AB_XX_SYNC_STAT);
 	if (align_done && (sync_status == FFE_AB_XX_STAT_ALL_LANES))
 		link_ok = true;
 
 	/* Clear link status ready for next read */
-	EFX_SET_OWORD_FIELD(reg, FRF_AB_XX_COMMA_DET, FFE_AB_XX_STAT_ALL_LANES);
-	EFX_SET_OWORD_FIELD(reg, FRF_AB_XX_CHAR_ERR, FFE_AB_XX_STAT_ALL_LANES);
-	EFX_SET_OWORD_FIELD(reg, FRF_AB_XX_DISPERR, FFE_AB_XX_STAT_ALL_LANES);
-	efx_writeo(efx, &reg, FR_AB_XX_CORE_STAT);
+	EF4_SET_OWORD_FIELD(reg, FRF_AB_XX_COMMA_DET, FFE_AB_XX_STAT_ALL_LANES);
+	EF4_SET_OWORD_FIELD(reg, FRF_AB_XX_CHAR_ERR, FFE_AB_XX_STAT_ALL_LANES);
+	EF4_SET_OWORD_FIELD(reg, FRF_AB_XX_DISPERR, FFE_AB_XX_STAT_ALL_LANES);
+	ef4_writeo(efx, &reg, FR_AB_XX_CORE_STAT);
 
 	return link_ok;
 }
 
-static bool falcon_xmac_link_ok(struct efx_nic *efx)
+static bool falcon_xmac_link_ok(struct ef4_nic *efx)
 {
 	/*
 	 * Check MAC's XGXS link status except when using XGMII loopback
@@ -1094,66 +1094,66 @@ static bool falcon_xmac_link_ok(struct efx_nic *efx)
 		falcon_xgxs_link_ok(efx)) &&
 		(!(efx->mdio.mmds & (1 << MDIO_MMD_PHYXS)) ||
 		 LOOPBACK_INTERNAL(efx) ||
-		 efx_mdio_phyxgxs_lane_sync(efx));
+		 ef4_mdio_phyxgxs_lane_sync(efx));
 }
 
-static void falcon_reconfigure_xmac_core(struct efx_nic *efx)
+static void falcon_reconfigure_xmac_core(struct ef4_nic *efx)
 {
 	unsigned int max_frame_len;
-	efx_oword_t reg;
-	bool rx_fc = !!(efx->link_state.fc & EFX_FC_RX);
-	bool tx_fc = !!(efx->link_state.fc & EFX_FC_TX);
+	ef4_oword_t reg;
+	bool rx_fc = !!(efx->link_state.fc & EF4_FC_RX);
+	bool tx_fc = !!(efx->link_state.fc & EF4_FC_TX);
 
 	/* Configure MAC  - cut-thru mode is hard wired on */
-	EFX_POPULATE_OWORD_3(reg,
+	EF4_POPULATE_OWORD_3(reg,
 			     FRF_AB_XM_RX_JUMBO_MODE, 1,
 			     FRF_AB_XM_TX_STAT_EN, 1,
 			     FRF_AB_XM_RX_STAT_EN, 1);
-	efx_writeo(efx, &reg, FR_AB_XM_GLB_CFG);
+	ef4_writeo(efx, &reg, FR_AB_XM_GLB_CFG);
 
 	/* Configure TX */
-	EFX_POPULATE_OWORD_6(reg,
+	EF4_POPULATE_OWORD_6(reg,
 			     FRF_AB_XM_TXEN, 1,
 			     FRF_AB_XM_TX_PRMBL, 1,
 			     FRF_AB_XM_AUTO_PAD, 1,
 			     FRF_AB_XM_TXCRC, 1,
 			     FRF_AB_XM_FCNTL, tx_fc,
 			     FRF_AB_XM_IPG, 0x3);
-	efx_writeo(efx, &reg, FR_AB_XM_TX_CFG);
+	ef4_writeo(efx, &reg, FR_AB_XM_TX_CFG);
 
 	/* Configure RX */
-	EFX_POPULATE_OWORD_5(reg,
+	EF4_POPULATE_OWORD_5(reg,
 			     FRF_AB_XM_RXEN, 1,
 			     FRF_AB_XM_AUTO_DEPAD, 0,
 			     FRF_AB_XM_ACPT_ALL_MCAST, 1,
 			     FRF_AB_XM_ACPT_ALL_UCAST, !efx->unicast_filter,
 			     FRF_AB_XM_PASS_CRC_ERR, 1);
-	efx_writeo(efx, &reg, FR_AB_XM_RX_CFG);
+	ef4_writeo(efx, &reg, FR_AB_XM_RX_CFG);
 
 	/* Set frame length */
-	max_frame_len = EFX_MAX_FRAME_LEN(efx->net_dev->mtu);
-	EFX_POPULATE_OWORD_1(reg, FRF_AB_XM_MAX_RX_FRM_SIZE, max_frame_len);
-	efx_writeo(efx, &reg, FR_AB_XM_RX_PARAM);
-	EFX_POPULATE_OWORD_2(reg,
+	max_frame_len = EF4_MAX_FRAME_LEN(efx->net_dev->mtu);
+	EF4_POPULATE_OWORD_1(reg, FRF_AB_XM_MAX_RX_FRM_SIZE, max_frame_len);
+	ef4_writeo(efx, &reg, FR_AB_XM_RX_PARAM);
+	EF4_POPULATE_OWORD_2(reg,
 			     FRF_AB_XM_MAX_TX_FRM_SIZE, max_frame_len,
 			     FRF_AB_XM_TX_JUMBO_MODE, 1);
-	efx_writeo(efx, &reg, FR_AB_XM_TX_PARAM);
+	ef4_writeo(efx, &reg, FR_AB_XM_TX_PARAM);
 
-	EFX_POPULATE_OWORD_2(reg,
+	EF4_POPULATE_OWORD_2(reg,
 			     FRF_AB_XM_PAUSE_TIME, 0xfffe, /* MAX PAUSE TIME */
 			     FRF_AB_XM_DIS_FCNTL, !rx_fc);
-	efx_writeo(efx, &reg, FR_AB_XM_FC);
+	ef4_writeo(efx, &reg, FR_AB_XM_FC);
 
 	/* Set MAC address */
 	memcpy(&reg, &efx->net_dev->dev_addr[0], 4);
-	efx_writeo(efx, &reg, FR_AB_XM_ADR_LO);
+	ef4_writeo(efx, &reg, FR_AB_XM_ADR_LO);
 	memcpy(&reg, &efx->net_dev->dev_addr[4], 2);
-	efx_writeo(efx, &reg, FR_AB_XM_ADR_HI);
+	ef4_writeo(efx, &reg, FR_AB_XM_ADR_HI);
 }
 
-static void falcon_reconfigure_xgxs_core(struct efx_nic *efx)
+static void falcon_reconfigure_xgxs_core(struct ef4_nic *efx)
 {
-	efx_oword_t reg;
+	ef4_oword_t reg;
 	bool xgxs_loopback = (efx->loopback_mode == LOOPBACK_XGXS);
 	bool xaui_loopback = (efx->loopback_mode == LOOPBACK_XAUI);
 	bool xgmii_loopback = (efx->loopback_mode == LOOPBACK_XGMII);
@@ -1161,12 +1161,12 @@ static void falcon_reconfigure_xgxs_core(struct efx_nic *efx)
 
 	/* XGXS block is flaky and will need to be reset if moving
 	 * into our out of XGMII, XGXS or XAUI loopbacks. */
-	efx_reado(efx, &reg, FR_AB_XX_CORE_STAT);
-	old_xgxs_loopback = EFX_OWORD_FIELD(reg, FRF_AB_XX_XGXS_LB_EN);
-	old_xgmii_loopback = EFX_OWORD_FIELD(reg, FRF_AB_XX_XGMII_LB_EN);
+	ef4_reado(efx, &reg, FR_AB_XX_CORE_STAT);
+	old_xgxs_loopback = EF4_OWORD_FIELD(reg, FRF_AB_XX_XGXS_LB_EN);
+	old_xgmii_loopback = EF4_OWORD_FIELD(reg, FRF_AB_XX_XGMII_LB_EN);
 
-	efx_reado(efx, &reg, FR_AB_XX_SD_CTL);
-	old_xaui_loopback = EFX_OWORD_FIELD(reg, FRF_AB_XX_LPBKA);
+	ef4_reado(efx, &reg, FR_AB_XX_SD_CTL);
+	old_xaui_loopback = EF4_OWORD_FIELD(reg, FRF_AB_XX_LPBKA);
 
 	/* The PHY driver may have turned XAUI off */
 	if ((xgxs_loopback != old_xgxs_loopback) ||
@@ -1174,30 +1174,30 @@ static void falcon_reconfigure_xgxs_core(struct efx_nic *efx)
 	    (xgmii_loopback != old_xgmii_loopback))
 		falcon_reset_xaui(efx);
 
-	efx_reado(efx, &reg, FR_AB_XX_CORE_STAT);
-	EFX_SET_OWORD_FIELD(reg, FRF_AB_XX_FORCE_SIG,
+	ef4_reado(efx, &reg, FR_AB_XX_CORE_STAT);
+	EF4_SET_OWORD_FIELD(reg, FRF_AB_XX_FORCE_SIG,
 			    (xgxs_loopback || xaui_loopback) ?
 			    FFE_AB_XX_FORCE_SIG_ALL_LANES : 0);
-	EFX_SET_OWORD_FIELD(reg, FRF_AB_XX_XGXS_LB_EN, xgxs_loopback);
-	EFX_SET_OWORD_FIELD(reg, FRF_AB_XX_XGMII_LB_EN, xgmii_loopback);
-	efx_writeo(efx, &reg, FR_AB_XX_CORE_STAT);
+	EF4_SET_OWORD_FIELD(reg, FRF_AB_XX_XGXS_LB_EN, xgxs_loopback);
+	EF4_SET_OWORD_FIELD(reg, FRF_AB_XX_XGMII_LB_EN, xgmii_loopback);
+	ef4_writeo(efx, &reg, FR_AB_XX_CORE_STAT);
 
-	efx_reado(efx, &reg, FR_AB_XX_SD_CTL);
-	EFX_SET_OWORD_FIELD(reg, FRF_AB_XX_LPBKD, xaui_loopback);
-	EFX_SET_OWORD_FIELD(reg, FRF_AB_XX_LPBKC, xaui_loopback);
-	EFX_SET_OWORD_FIELD(reg, FRF_AB_XX_LPBKB, xaui_loopback);
-	EFX_SET_OWORD_FIELD(reg, FRF_AB_XX_LPBKA, xaui_loopback);
-	efx_writeo(efx, &reg, FR_AB_XX_SD_CTL);
+	ef4_reado(efx, &reg, FR_AB_XX_SD_CTL);
+	EF4_SET_OWORD_FIELD(reg, FRF_AB_XX_LPBKD, xaui_loopback);
+	EF4_SET_OWORD_FIELD(reg, FRF_AB_XX_LPBKC, xaui_loopback);
+	EF4_SET_OWORD_FIELD(reg, FRF_AB_XX_LPBKB, xaui_loopback);
+	EF4_SET_OWORD_FIELD(reg, FRF_AB_XX_LPBKA, xaui_loopback);
+	ef4_writeo(efx, &reg, FR_AB_XX_SD_CTL);
 }
 
 
 /* Try to bring up the Falcon side of the Falcon-Phy XAUI link */
-static bool falcon_xmac_link_ok_retry(struct efx_nic *efx, int tries)
+static bool falcon_xmac_link_ok_retry(struct ef4_nic *efx, int tries)
 {
 	bool mac_up = falcon_xmac_link_ok(efx);
 
 	if (LOOPBACK_MASK(efx) & LOOPBACKS_EXTERNAL(efx) & LOOPBACKS_WS ||
-	    efx_phy_mode_disabled(efx->phy_mode))
+	    ef4_phy_mode_disabled(efx->phy_mode))
 		/* XAUI link is expected to be down */
 		return mac_up;
 
@@ -1217,16 +1217,16 @@ static bool falcon_xmac_link_ok_retry(struct efx_nic *efx, int tries)
 	return mac_up;
 }
 
-static bool falcon_xmac_check_fault(struct efx_nic *efx)
+static bool falcon_xmac_check_fault(struct ef4_nic *efx)
 {
 	return !falcon_xmac_link_ok_retry(efx, 5);
 }
 
-static int falcon_reconfigure_xmac(struct efx_nic *efx)
+static int falcon_reconfigure_xmac(struct ef4_nic *efx)
 {
 	struct falcon_nic_data *nic_data = efx->nic_data;
 
-	efx_farch_filter_sync_rx_mode(efx);
+	ef4_farch_filter_sync_rx_mode(efx);
 
 	falcon_reconfigure_xgxs_core(efx);
 	falcon_reconfigure_xmac_core(efx);
@@ -1239,7 +1239,7 @@ static int falcon_reconfigure_xmac(struct efx_nic *efx)
 	return 0;
 }
 
-static void falcon_poll_xmac(struct efx_nic *efx)
+static void falcon_poll_xmac(struct ef4_nic *efx)
 {
 	struct falcon_nic_data *nic_data = efx->nic_data;
 
@@ -1258,32 +1258,32 @@ static void falcon_poll_xmac(struct efx_nic *efx)
  **************************************************************************
  */
 
-static void falcon_push_multicast_hash(struct efx_nic *efx)
+static void falcon_push_multicast_hash(struct ef4_nic *efx)
 {
-	union efx_multicast_hash *mc_hash = &efx->multicast_hash;
+	union ef4_multicast_hash *mc_hash = &efx->multicast_hash;
 
 	WARN_ON(!mutex_is_locked(&efx->mac_lock));
 
-	efx_writeo(efx, &mc_hash->oword[0], FR_AB_MAC_MC_HASH_REG0);
-	efx_writeo(efx, &mc_hash->oword[1], FR_AB_MAC_MC_HASH_REG1);
+	ef4_writeo(efx, &mc_hash->oword[0], FR_AB_MAC_MC_HASH_REG0);
+	ef4_writeo(efx, &mc_hash->oword[1], FR_AB_MAC_MC_HASH_REG1);
 }
 
-static void falcon_reset_macs(struct efx_nic *efx)
+static void falcon_reset_macs(struct ef4_nic *efx)
 {
 	struct falcon_nic_data *nic_data = efx->nic_data;
-	efx_oword_t reg, mac_ctrl;
+	ef4_oword_t reg, mac_ctrl;
 	int count;
 
-	if (efx_nic_rev(efx) < EFX_REV_FALCON_B0) {
+	if (ef4_nic_rev(efx) < EF4_REV_FALCON_B0) {
 		/* It's not safe to use GLB_CTL_REG to reset the
 		 * macs, so instead use the internal MAC resets
 		 */
-		EFX_POPULATE_OWORD_1(reg, FRF_AB_XM_CORE_RST, 1);
-		efx_writeo(efx, &reg, FR_AB_XM_GLB_CFG);
+		EF4_POPULATE_OWORD_1(reg, FRF_AB_XM_CORE_RST, 1);
+		ef4_writeo(efx, &reg, FR_AB_XM_GLB_CFG);
 
 		for (count = 0; count < 10000; count++) {
-			efx_reado(efx, &reg, FR_AB_XM_GLB_CFG);
-			if (EFX_OWORD_FIELD(reg, FRF_AB_XM_CORE_RST) ==
+			ef4_reado(efx, &reg, FR_AB_XM_GLB_CFG);
+			if (EF4_OWORD_FIELD(reg, FRF_AB_XM_CORE_RST) ==
 			    0)
 				return;
 			udelay(10);
@@ -1296,22 +1296,22 @@ static void falcon_reset_macs(struct efx_nic *efx)
 	/* Mac stats will fail whist the TX fifo is draining */
 	WARN_ON(nic_data->stats_disable_count == 0);
 
-	efx_reado(efx, &mac_ctrl, FR_AB_MAC_CTRL);
-	EFX_SET_OWORD_FIELD(mac_ctrl, FRF_BB_TXFIFO_DRAIN_EN, 1);
-	efx_writeo(efx, &mac_ctrl, FR_AB_MAC_CTRL);
+	ef4_reado(efx, &mac_ctrl, FR_AB_MAC_CTRL);
+	EF4_SET_OWORD_FIELD(mac_ctrl, FRF_BB_TXFIFO_DRAIN_EN, 1);
+	ef4_writeo(efx, &mac_ctrl, FR_AB_MAC_CTRL);
 
-	efx_reado(efx, &reg, FR_AB_GLB_CTL);
-	EFX_SET_OWORD_FIELD(reg, FRF_AB_RST_XGTX, 1);
-	EFX_SET_OWORD_FIELD(reg, FRF_AB_RST_XGRX, 1);
-	EFX_SET_OWORD_FIELD(reg, FRF_AB_RST_EM, 1);
-	efx_writeo(efx, &reg, FR_AB_GLB_CTL);
+	ef4_reado(efx, &reg, FR_AB_GLB_CTL);
+	EF4_SET_OWORD_FIELD(reg, FRF_AB_RST_XGTX, 1);
+	EF4_SET_OWORD_FIELD(reg, FRF_AB_RST_XGRX, 1);
+	EF4_SET_OWORD_FIELD(reg, FRF_AB_RST_EM, 1);
+	ef4_writeo(efx, &reg, FR_AB_GLB_CTL);
 
 	count = 0;
 	while (1) {
-		efx_reado(efx, &reg, FR_AB_GLB_CTL);
-		if (!EFX_OWORD_FIELD(reg, FRF_AB_RST_XGTX) &&
-		    !EFX_OWORD_FIELD(reg, FRF_AB_RST_XGRX) &&
-		    !EFX_OWORD_FIELD(reg, FRF_AB_RST_EM)) {
+		ef4_reado(efx, &reg, FR_AB_GLB_CTL);
+		if (!EF4_OWORD_FIELD(reg, FRF_AB_RST_XGTX) &&
+		    !EF4_OWORD_FIELD(reg, FRF_AB_RST_XGRX) &&
+		    !EF4_OWORD_FIELD(reg, FRF_AB_RST_EM)) {
 			netif_dbg(efx, hw, efx->net_dev,
 				  "Completed MAC reset after %d loops\n",
 				  count);
@@ -1327,47 +1327,47 @@ static void falcon_reset_macs(struct efx_nic *efx)
 
 	/* Ensure the correct MAC is selected before statistics
 	 * are re-enabled by the caller */
-	efx_writeo(efx, &mac_ctrl, FR_AB_MAC_CTRL);
+	ef4_writeo(efx, &mac_ctrl, FR_AB_MAC_CTRL);
 
 	falcon_setup_xaui(efx);
 }
 
-static void falcon_drain_tx_fifo(struct efx_nic *efx)
+static void falcon_drain_tx_fifo(struct ef4_nic *efx)
 {
-	efx_oword_t reg;
+	ef4_oword_t reg;
 
-	if ((efx_nic_rev(efx) < EFX_REV_FALCON_B0) ||
+	if ((ef4_nic_rev(efx) < EF4_REV_FALCON_B0) ||
 	    (efx->loopback_mode != LOOPBACK_NONE))
 		return;
 
-	efx_reado(efx, &reg, FR_AB_MAC_CTRL);
+	ef4_reado(efx, &reg, FR_AB_MAC_CTRL);
 	/* There is no point in draining more than once */
-	if (EFX_OWORD_FIELD(reg, FRF_BB_TXFIFO_DRAIN_EN))
+	if (EF4_OWORD_FIELD(reg, FRF_BB_TXFIFO_DRAIN_EN))
 		return;
 
 	falcon_reset_macs(efx);
 }
 
-static void falcon_deconfigure_mac_wrapper(struct efx_nic *efx)
+static void falcon_deconfigure_mac_wrapper(struct ef4_nic *efx)
 {
-	efx_oword_t reg;
+	ef4_oword_t reg;
 
-	if (efx_nic_rev(efx) < EFX_REV_FALCON_B0)
+	if (ef4_nic_rev(efx) < EF4_REV_FALCON_B0)
 		return;
 
 	/* Isolate the MAC -> RX */
-	efx_reado(efx, &reg, FR_AZ_RX_CFG);
-	EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_INGR_EN, 0);
-	efx_writeo(efx, &reg, FR_AZ_RX_CFG);
+	ef4_reado(efx, &reg, FR_AZ_RX_CFG);
+	EF4_SET_OWORD_FIELD(reg, FRF_BZ_RX_INGR_EN, 0);
+	ef4_writeo(efx, &reg, FR_AZ_RX_CFG);
 
 	/* Isolate TX -> MAC */
 	falcon_drain_tx_fifo(efx);
 }
 
-static void falcon_reconfigure_mac_wrapper(struct efx_nic *efx)
+static void falcon_reconfigure_mac_wrapper(struct ef4_nic *efx)
 {
-	struct efx_link_state *link_state = &efx->link_state;
-	efx_oword_t reg;
+	struct ef4_link_state *link_state = &efx->link_state;
+	ef4_oword_t reg;
 	int link_speed, isolate;
 
 	isolate = !!ACCESS_ONCE(efx->reset_pending);
@@ -1383,7 +1383,7 @@ static void falcon_reconfigure_mac_wrapper(struct efx_nic *efx)
 	 * as advertised.  Disable to ensure packets are not
 	 * indefinitely held and TX queue can be flushed at any point
 	 * while the link is down. */
-	EFX_POPULATE_OWORD_5(reg,
+	EF4_POPULATE_OWORD_5(reg,
 			     FRF_AB_MAC_XOFF_VAL, 0xffff /* max pause time */,
 			     FRF_AB_MAC_BCAD_ACPT, 1,
 			     FRF_AB_MAC_UC_PROM, !efx->unicast_filter,
@@ -1391,30 +1391,30 @@ static void falcon_reconfigure_mac_wrapper(struct efx_nic *efx)
 			     FRF_AB_MAC_SPEED, link_speed);
 	/* On B0, MAC backpressure can be disabled and packets get
 	 * discarded. */
-	if (efx_nic_rev(efx) >= EFX_REV_FALCON_B0) {
-		EFX_SET_OWORD_FIELD(reg, FRF_BB_TXFIFO_DRAIN_EN,
+	if (ef4_nic_rev(efx) >= EF4_REV_FALCON_B0) {
+		EF4_SET_OWORD_FIELD(reg, FRF_BB_TXFIFO_DRAIN_EN,
 				    !link_state->up || isolate);
 	}
 
-	efx_writeo(efx, &reg, FR_AB_MAC_CTRL);
+	ef4_writeo(efx, &reg, FR_AB_MAC_CTRL);
 
 	/* Restore the multicast hash registers. */
 	falcon_push_multicast_hash(efx);
 
-	efx_reado(efx, &reg, FR_AZ_RX_CFG);
+	ef4_reado(efx, &reg, FR_AZ_RX_CFG);
 	/* Enable XOFF signal from RX FIFO (we enabled it during NIC
 	 * initialisation but it may read back as 0) */
-	EFX_SET_OWORD_FIELD(reg, FRF_AZ_RX_XOFF_MAC_EN, 1);
+	EF4_SET_OWORD_FIELD(reg, FRF_AZ_RX_XOFF_MAC_EN, 1);
 	/* Unisolate the MAC -> RX */
-	if (efx_nic_rev(efx) >= EFX_REV_FALCON_B0)
-		EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_INGR_EN, !isolate);
-	efx_writeo(efx, &reg, FR_AZ_RX_CFG);
+	if (ef4_nic_rev(efx) >= EF4_REV_FALCON_B0)
+		EF4_SET_OWORD_FIELD(reg, FRF_BZ_RX_INGR_EN, !isolate);
+	ef4_writeo(efx, &reg, FR_AZ_RX_CFG);
 }
 
-static void falcon_stats_request(struct efx_nic *efx)
+static void falcon_stats_request(struct ef4_nic *efx)
 {
 	struct falcon_nic_data *nic_data = efx->nic_data;
-	efx_oword_t reg;
+	ef4_oword_t reg;
 
 	WARN_ON(nic_data->stats_pending);
 	WARN_ON(nic_data->stats_disable_count);
@@ -1424,16 +1424,16 @@ static void falcon_stats_request(struct efx_nic *efx)
 	wmb(); /* ensure done flag is clear */
 
 	/* Initiate DMA transfer of stats */
-	EFX_POPULATE_OWORD_2(reg,
+	EF4_POPULATE_OWORD_2(reg,
 			     FRF_AB_MAC_STAT_DMA_CMD, 1,
 			     FRF_AB_MAC_STAT_DMA_ADR,
 			     efx->stats_buffer.dma_addr);
-	efx_writeo(efx, &reg, FR_AB_MAC_STAT_DMA);
+	ef4_writeo(efx, &reg, FR_AB_MAC_STAT_DMA);
 
 	mod_timer(&nic_data->stats_timer, round_jiffies_up(jiffies + HZ / 2));
 }
 
-static void falcon_stats_complete(struct efx_nic *efx)
+static void falcon_stats_complete(struct ef4_nic *efx)
 {
 	struct falcon_nic_data *nic_data = efx->nic_data;
 
@@ -1443,7 +1443,7 @@ static void falcon_stats_complete(struct efx_nic *efx)
 	nic_data->stats_pending = false;
 	if (FALCON_XMAC_STATS_DMA_FLAG(efx)) {
 		rmb(); /* read the done flag before the stats */
-		efx_nic_update_stats(falcon_stat_desc, FALCON_STAT_COUNT,
+		ef4_nic_update_stats(falcon_stat_desc, FALCON_STAT_COUNT,
 				     falcon_stat_mask, nic_data->stats,
 				     efx->stats_buffer.addr, true);
 	} else {
@@ -1454,7 +1454,7 @@ static void falcon_stats_complete(struct efx_nic *efx)
 
 static void falcon_stats_timer_func(unsigned long context)
 {
-	struct efx_nic *efx = (struct efx_nic *)context;
+	struct ef4_nic *efx = (struct ef4_nic *)context;
 	struct falcon_nic_data *nic_data = efx->nic_data;
 
 	spin_lock(&efx->stats_lock);
@@ -1466,9 +1466,9 @@ static void falcon_stats_timer_func(unsigned long context)
 	spin_unlock(&efx->stats_lock);
 }
 
-static bool falcon_loopback_link_poll(struct efx_nic *efx)
+static bool falcon_loopback_link_poll(struct ef4_nic *efx)
 {
-	struct efx_link_state old_state = efx->link_state;
+	struct ef4_link_state old_state = efx->link_state;
 
 	WARN_ON(!mutex_is_locked(&efx->mac_lock));
 	WARN_ON(!LOOPBACK_INTERNAL(efx));
@@ -1478,14 +1478,14 @@ static bool falcon_loopback_link_poll(struct efx_nic *efx)
 	efx->link_state.up = true;
 	efx->link_state.speed = 10000;
 
-	return !efx_link_state_equal(&efx->link_state, &old_state);
+	return !ef4_link_state_equal(&efx->link_state, &old_state);
 }
 
-static int falcon_reconfigure_port(struct efx_nic *efx)
+static int falcon_reconfigure_port(struct ef4_nic *efx)
 {
 	int rc;
 
-	WARN_ON(efx_nic_rev(efx) > EFX_REV_FALCON_B0);
+	WARN_ON(ef4_nic_rev(efx) > EF4_REV_FALCON_B0);
 
 	/* Poll the PHY link state *before* reconfiguring it. This means we
 	 * will pick up the correct speed (in loopback) to select the correct
@@ -1508,7 +1508,7 @@ static int falcon_reconfigure_port(struct efx_nic *efx)
 	falcon_start_nic_stats(efx);
 
 	/* Synchronise efx->link_state with the kernel */
-	efx_link_status_changed(efx);
+	ef4_link_status_changed(efx);
 
 	return 0;
 }
@@ -1520,13 +1520,13 @@ static int falcon_reconfigure_port(struct efx_nic *efx)
  * flow control on this end.
  */
 
-static void falcon_a1_prepare_enable_fc_tx(struct efx_nic *efx)
+static void falcon_a1_prepare_enable_fc_tx(struct ef4_nic *efx)
 {
 	/* Schedule a reset to recover */
-	efx_schedule_reset(efx, RESET_TYPE_INVISIBLE);
+	ef4_schedule_reset(efx, RESET_TYPE_INVISIBLE);
 }
 
-static void falcon_b0_prepare_enable_fc_tx(struct efx_nic *efx)
+static void falcon_b0_prepare_enable_fc_tx(struct ef4_nic *efx)
 {
 	/* Recover by resetting the EM block */
 	falcon_stop_nic_stats(efx);
@@ -1543,21 +1543,21 @@ static void falcon_b0_prepare_enable_fc_tx(struct efx_nic *efx)
  */
 
 /* Wait for GMII access to complete */
-static int falcon_gmii_wait(struct efx_nic *efx)
+static int falcon_gmii_wait(struct ef4_nic *efx)
 {
-	efx_oword_t md_stat;
+	ef4_oword_t md_stat;
 	int count;
 
 	/* wait up to 50ms - taken max from datasheet */
 	for (count = 0; count < 5000; count++) {
-		efx_reado(efx, &md_stat, FR_AB_MD_STAT);
-		if (EFX_OWORD_FIELD(md_stat, FRF_AB_MD_BSY) == 0) {
-			if (EFX_OWORD_FIELD(md_stat, FRF_AB_MD_LNFL) != 0 ||
-			    EFX_OWORD_FIELD(md_stat, FRF_AB_MD_BSERR) != 0) {
+		ef4_reado(efx, &md_stat, FR_AB_MD_STAT);
+		if (EF4_OWORD_FIELD(md_stat, FRF_AB_MD_BSY) == 0) {
+			if (EF4_OWORD_FIELD(md_stat, FRF_AB_MD_LNFL) != 0 ||
+			    EF4_OWORD_FIELD(md_stat, FRF_AB_MD_BSERR) != 0) {
 				netif_err(efx, hw, efx->net_dev,
 					  "error from GMII access "
-					  EFX_OWORD_FMT"\n",
-					  EFX_OWORD_VAL(md_stat));
+					  EF4_OWORD_FMT"\n",
+					  EF4_OWORD_VAL(md_stat));
 				return -EIO;
 			}
 			return 0;
@@ -1572,9 +1572,9 @@ static int falcon_gmii_wait(struct efx_nic *efx)
 static int falcon_mdio_write(struct net_device *net_dev,
 			     int prtad, int devad, u16 addr, u16 value)
 {
-	struct efx_nic *efx = netdev_priv(net_dev);
+	struct ef4_nic *efx = netdev_priv(net_dev);
 	struct falcon_nic_data *nic_data = efx->nic_data;
-	efx_oword_t reg;
+	ef4_oword_t reg;
 	int rc;
 
 	netif_vdbg(efx, hw, efx->net_dev,
@@ -1589,30 +1589,30 @@ static int falcon_mdio_write(struct net_device *net_dev,
 		goto out;
 
 	/* Write the address/ID register */
-	EFX_POPULATE_OWORD_1(reg, FRF_AB_MD_PHY_ADR, addr);
-	efx_writeo(efx, &reg, FR_AB_MD_PHY_ADR);
+	EF4_POPULATE_OWORD_1(reg, FRF_AB_MD_PHY_ADR, addr);
+	ef4_writeo(efx, &reg, FR_AB_MD_PHY_ADR);
 
-	EFX_POPULATE_OWORD_2(reg, FRF_AB_MD_PRT_ADR, prtad,
+	EF4_POPULATE_OWORD_2(reg, FRF_AB_MD_PRT_ADR, prtad,
 			     FRF_AB_MD_DEV_ADR, devad);
-	efx_writeo(efx, &reg, FR_AB_MD_ID);
+	ef4_writeo(efx, &reg, FR_AB_MD_ID);
 
 	/* Write data */
-	EFX_POPULATE_OWORD_1(reg, FRF_AB_MD_TXD, value);
-	efx_writeo(efx, &reg, FR_AB_MD_TXD);
+	EF4_POPULATE_OWORD_1(reg, FRF_AB_MD_TXD, value);
+	ef4_writeo(efx, &reg, FR_AB_MD_TXD);
 
-	EFX_POPULATE_OWORD_2(reg,
+	EF4_POPULATE_OWORD_2(reg,
 			     FRF_AB_MD_WRC, 1,
 			     FRF_AB_MD_GC, 0);
-	efx_writeo(efx, &reg, FR_AB_MD_CS);
+	ef4_writeo(efx, &reg, FR_AB_MD_CS);
 
 	/* Wait for data to be written */
 	rc = falcon_gmii_wait(efx);
 	if (rc) {
 		/* Abort the write operation */
-		EFX_POPULATE_OWORD_2(reg,
+		EF4_POPULATE_OWORD_2(reg,
 				     FRF_AB_MD_WRC, 0,
 				     FRF_AB_MD_GC, 1);
-		efx_writeo(efx, &reg, FR_AB_MD_CS);
+		ef4_writeo(efx, &reg, FR_AB_MD_CS);
 		udelay(10);
 	}
 
@@ -1625,9 +1625,9 @@ out:
 static int falcon_mdio_read(struct net_device *net_dev,
 			    int prtad, int devad, u16 addr)
 {
-	struct efx_nic *efx = netdev_priv(net_dev);
+	struct ef4_nic *efx = netdev_priv(net_dev);
 	struct falcon_nic_data *nic_data = efx->nic_data;
-	efx_oword_t reg;
+	ef4_oword_t reg;
 	int rc;
 
 	mutex_lock(&nic_data->mdio_lock);
@@ -1637,31 +1637,31 @@ static int falcon_mdio_read(struct net_device *net_dev,
 	if (rc)
 		goto out;
 
-	EFX_POPULATE_OWORD_1(reg, FRF_AB_MD_PHY_ADR, addr);
-	efx_writeo(efx, &reg, FR_AB_MD_PHY_ADR);
+	EF4_POPULATE_OWORD_1(reg, FRF_AB_MD_PHY_ADR, addr);
+	ef4_writeo(efx, &reg, FR_AB_MD_PHY_ADR);
 
-	EFX_POPULATE_OWORD_2(reg, FRF_AB_MD_PRT_ADR, prtad,
+	EF4_POPULATE_OWORD_2(reg, FRF_AB_MD_PRT_ADR, prtad,
 			     FRF_AB_MD_DEV_ADR, devad);
-	efx_writeo(efx, &reg, FR_AB_MD_ID);
+	ef4_writeo(efx, &reg, FR_AB_MD_ID);
 
 	/* Request data to be read */
-	EFX_POPULATE_OWORD_2(reg, FRF_AB_MD_RDC, 1, FRF_AB_MD_GC, 0);
-	efx_writeo(efx, &reg, FR_AB_MD_CS);
+	EF4_POPULATE_OWORD_2(reg, FRF_AB_MD_RDC, 1, FRF_AB_MD_GC, 0);
+	ef4_writeo(efx, &reg, FR_AB_MD_CS);
 
 	/* Wait for data to become available */
 	rc = falcon_gmii_wait(efx);
 	if (rc == 0) {
-		efx_reado(efx, &reg, FR_AB_MD_RXD);
-		rc = EFX_OWORD_FIELD(reg, FRF_AB_MD_RXD);
+		ef4_reado(efx, &reg, FR_AB_MD_RXD);
+		rc = EF4_OWORD_FIELD(reg, FRF_AB_MD_RXD);
 		netif_vdbg(efx, hw, efx->net_dev,
 			   "read from MDIO %d register %d.%d, got %04x\n",
 			   prtad, devad, addr, rc);
 	} else {
 		/* Abort the read operation */
-		EFX_POPULATE_OWORD_2(reg,
+		EF4_POPULATE_OWORD_2(reg,
 				     FRF_AB_MD_RIC, 0,
 				     FRF_AB_MD_GC, 1);
-		efx_writeo(efx, &reg, FR_AB_MD_CS);
+		ef4_writeo(efx, &reg, FR_AB_MD_CS);
 
 		netif_dbg(efx, hw, efx->net_dev,
 			  "read from MDIO %d register %d.%d, got error %d\n",
@@ -1674,7 +1674,7 @@ out:
 }
 
 /* This call is responsible for hooking in the MAC and PHY operations */
-static int falcon_probe_port(struct efx_nic *efx)
+static int falcon_probe_port(struct ef4_nic *efx)
 {
 	struct falcon_nic_data *nic_data = efx->nic_data;
 	int rc;
@@ -1709,15 +1709,15 @@ static int falcon_probe_port(struct efx_nic *efx)
 	efx->link_state.fd = true;
 
 	/* Hardware flow ctrl. FalconA RX FIFO too small for pause generation */
-	if (efx_nic_rev(efx) >= EFX_REV_FALCON_B0)
-		efx->wanted_fc = EFX_FC_RX | EFX_FC_TX;
+	if (ef4_nic_rev(efx) >= EF4_REV_FALCON_B0)
+		efx->wanted_fc = EF4_FC_RX | EF4_FC_TX;
 	else
-		efx->wanted_fc = EFX_FC_RX;
+		efx->wanted_fc = EF4_FC_RX;
 	if (efx->mdio.mmds & MDIO_DEVS_AN)
-		efx->wanted_fc |= EFX_FC_AUTO;
+		efx->wanted_fc |= EF4_FC_AUTO;
 
 	/* Allocate buffer for stats */
-	rc = efx_nic_alloc_buffer(efx, &efx->stats_buffer,
+	rc = ef4_nic_alloc_buffer(efx, &efx->stats_buffer,
 				  FALCON_MAC_STATS_SIZE, GFP_KERNEL);
 	if (rc)
 		return rc;
@@ -1730,40 +1730,40 @@ static int falcon_probe_port(struct efx_nic *efx)
 	return 0;
 }
 
-static void falcon_remove_port(struct efx_nic *efx)
+static void falcon_remove_port(struct ef4_nic *efx)
 {
 	efx->phy_op->remove(efx);
-	efx_nic_free_buffer(efx, &efx->stats_buffer);
+	ef4_nic_free_buffer(efx, &efx->stats_buffer);
 }
 
 /* Global events are basically PHY events */
 static bool
-falcon_handle_global_event(struct efx_channel *channel, efx_qword_t *event)
+falcon_handle_global_event(struct ef4_channel *channel, ef4_qword_t *event)
 {
-	struct efx_nic *efx = channel->efx;
+	struct ef4_nic *efx = channel->efx;
 	struct falcon_nic_data *nic_data = efx->nic_data;
 
-	if (EFX_QWORD_FIELD(*event, FSF_AB_GLB_EV_G_PHY0_INTR) ||
-	    EFX_QWORD_FIELD(*event, FSF_AB_GLB_EV_XG_PHY0_INTR) ||
-	    EFX_QWORD_FIELD(*event, FSF_AB_GLB_EV_XFP_PHY0_INTR))
+	if (EF4_QWORD_FIELD(*event, FSF_AB_GLB_EV_G_PHY0_INTR) ||
+	    EF4_QWORD_FIELD(*event, FSF_AB_GLB_EV_XG_PHY0_INTR) ||
+	    EF4_QWORD_FIELD(*event, FSF_AB_GLB_EV_XFP_PHY0_INTR))
 		/* Ignored */
 		return true;
 
-	if ((efx_nic_rev(efx) == EFX_REV_FALCON_B0) &&
-	    EFX_QWORD_FIELD(*event, FSF_BB_GLB_EV_XG_MGT_INTR)) {
+	if ((ef4_nic_rev(efx) == EF4_REV_FALCON_B0) &&
+	    EF4_QWORD_FIELD(*event, FSF_BB_GLB_EV_XG_MGT_INTR)) {
 		nic_data->xmac_poll_required = true;
 		return true;
 	}
 
-	if (efx_nic_rev(efx) <= EFX_REV_FALCON_A1 ?
-	    EFX_QWORD_FIELD(*event, FSF_AA_GLB_EV_RX_RECOVERY) :
-	    EFX_QWORD_FIELD(*event, FSF_BB_GLB_EV_RX_RECOVERY)) {
+	if (ef4_nic_rev(efx) <= EF4_REV_FALCON_A1 ?
+	    EF4_QWORD_FIELD(*event, FSF_AA_GLB_EV_RX_RECOVERY) :
+	    EF4_QWORD_FIELD(*event, FSF_BB_GLB_EV_RX_RECOVERY)) {
 		netif_err(efx, rx_err, efx->net_dev,
 			  "channel %d seen global RX_RESET event. Resetting.\n",
 			  channel->channel);
 
 		atomic_inc(&efx->rx_reset);
-		efx_schedule_reset(efx, EFX_WORKAROUND_6555(efx) ?
+		ef4_schedule_reset(efx, EF4_WORKAROUND_6555(efx) ?
 				   RESET_TYPE_RX_RECOVERY : RESET_TYPE_DISABLE);
 		return true;
 	}
@@ -1778,7 +1778,7 @@ falcon_handle_global_event(struct efx_channel *channel, efx_qword_t *event)
  **************************************************************************/
 
 static int
-falcon_read_nvram(struct efx_nic *efx, struct falcon_nvconfig *nvconfig_out)
+falcon_read_nvram(struct ef4_nic *efx, struct falcon_nvconfig *nvconfig_out)
 {
 	struct falcon_nic_data *nic_data = efx->nic_data;
 	struct falcon_nvconfig *nvconfig;
@@ -1849,52 +1849,52 @@ falcon_read_nvram(struct efx_nic *efx, struct falcon_nvconfig *nvconfig_out)
 	return rc;
 }
 
-static int falcon_test_nvram(struct efx_nic *efx)
+static int falcon_test_nvram(struct ef4_nic *efx)
 {
 	return falcon_read_nvram(efx, NULL);
 }
 
-static const struct efx_farch_register_test falcon_b0_register_tests[] = {
+static const struct ef4_farch_register_test falcon_b0_register_tests[] = {
 	{ FR_AZ_ADR_REGION,
-	  EFX_OWORD32(0x0003FFFF, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF) },
+	  EF4_OWORD32(0x0003FFFF, 0x0003FFFF, 0x0003FFFF, 0x0003FFFF) },
 	{ FR_AZ_RX_CFG,
-	  EFX_OWORD32(0xFFFFFFFE, 0x00017FFF, 0x00000000, 0x00000000) },
+	  EF4_OWORD32(0xFFFFFFFE, 0x00017FFF, 0x00000000, 0x00000000) },
 	{ FR_AZ_TX_CFG,
-	  EFX_OWORD32(0x7FFF0037, 0x00000000, 0x00000000, 0x00000000) },
+	  EF4_OWORD32(0x7FFF0037, 0x00000000, 0x00000000, 0x00000000) },
 	{ FR_AZ_TX_RESERVED,
-	  EFX_OWORD32(0xFFFEFE80, 0x1FFFFFFF, 0x020000FE, 0x007FFFFF) },
+	  EF4_OWORD32(0xFFFEFE80, 0x1FFFFFFF, 0x020000FE, 0x007FFFFF) },
 	{ FR_AB_MAC_CTRL,
-	  EFX_OWORD32(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000) },
+	  EF4_OWORD32(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000) },
 	{ FR_AZ_SRM_TX_DC_CFG,
-	  EFX_OWORD32(0x001FFFFF, 0x00000000, 0x00000000, 0x00000000) },
+	  EF4_OWORD32(0x001FFFFF, 0x00000000, 0x00000000, 0x00000000) },
 	{ FR_AZ_RX_DC_CFG,
-	  EFX_OWORD32(0x0000000F, 0x00000000, 0x00000000, 0x00000000) },
+	  EF4_OWORD32(0x0000000F, 0x00000000, 0x00000000, 0x00000000) },
 	{ FR_AZ_RX_DC_PF_WM,
-	  EFX_OWORD32(0x000003FF, 0x00000000, 0x00000000, 0x00000000) },
+	  EF4_OWORD32(0x000003FF, 0x00000000, 0x00000000, 0x00000000) },
 	{ FR_BZ_DP_CTRL,
-	  EFX_OWORD32(0x00000FFF, 0x00000000, 0x00000000, 0x00000000) },
+	  EF4_OWORD32(0x00000FFF, 0x00000000, 0x00000000, 0x00000000) },
 	{ FR_AB_GM_CFG2,
-	  EFX_OWORD32(0x00007337, 0x00000000, 0x00000000, 0x00000000) },
+	  EF4_OWORD32(0x00007337, 0x00000000, 0x00000000, 0x00000000) },
 	{ FR_AB_GMF_CFG0,
-	  EFX_OWORD32(0x00001F1F, 0x00000000, 0x00000000, 0x00000000) },
+	  EF4_OWORD32(0x00001F1F, 0x00000000, 0x00000000, 0x00000000) },
 	{ FR_AB_XM_GLB_CFG,
-	  EFX_OWORD32(0x00000C68, 0x00000000, 0x00000000, 0x00000000) },
+	  EF4_OWORD32(0x00000C68, 0x00000000, 0x00000000, 0x00000000) },
 	{ FR_AB_XM_TX_CFG,
-	  EFX_OWORD32(0x00080164, 0x00000000, 0x00000000, 0x00000000) },
+	  EF4_OWORD32(0x00080164, 0x00000000, 0x00000000, 0x00000000) },
 	{ FR_AB_XM_RX_CFG,
-	  EFX_OWORD32(0x07100A0C, 0x00000000, 0x00000000, 0x00000000) },
+	  EF4_OWORD32(0x07100A0C, 0x00000000, 0x00000000, 0x00000000) },
 	{ FR_AB_XM_RX_PARAM,
-	  EFX_OWORD32(0x00001FF8, 0x00000000, 0x00000000, 0x00000000) },
+	  EF4_OWORD32(0x00001FF8, 0x00000000, 0x00000000, 0x00000000) },
 	{ FR_AB_XM_FC,
-	  EFX_OWORD32(0xFFFF0001, 0x00000000, 0x00000000, 0x00000000) },
+	  EF4_OWORD32(0xFFFF0001, 0x00000000, 0x00000000, 0x00000000) },
 	{ FR_AB_XM_ADR_LO,
-	  EFX_OWORD32(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000) },
+	  EF4_OWORD32(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000) },
 	{ FR_AB_XX_SD_CTL,
-	  EFX_OWORD32(0x0003FF0F, 0x00000000, 0x00000000, 0x00000000) },
+	  EF4_OWORD32(0x0003FF0F, 0x00000000, 0x00000000, 0x00000000) },
 };
 
 static int
-falcon_b0_test_chip(struct efx_nic *efx, struct efx_self_tests *tests)
+falcon_b0_test_chip(struct ef4_nic *efx, struct ef4_self_tests *tests)
 {
 	enum reset_type reset_method = RESET_TYPE_INVISIBLE;
 	int rc, rc2;
@@ -1908,18 +1908,18 @@ falcon_b0_test_chip(struct efx_nic *efx, struct efx_self_tests *tests)
 		else
 			efx->loopback_mode = __ffs(efx->loopback_modes);
 	}
-	__efx_reconfigure_port(efx);
+	__ef4_reconfigure_port(efx);
 	mutex_unlock(&efx->mac_lock);
 
-	efx_reset_down(efx, reset_method);
+	ef4_reset_down(efx, reset_method);
 
 	tests->registers =
-		efx_farch_test_registers(efx, falcon_b0_register_tests,
+		ef4_farch_test_registers(efx, falcon_b0_register_tests,
 					 ARRAY_SIZE(falcon_b0_register_tests))
 		? -1 : 1;
 
 	rc = falcon_reset_hw(efx, reset_method);
-	rc2 = efx_reset_up(efx, reset_method, rc == 0);
+	rc2 = ef4_reset_up(efx, reset_method, rc == 0);
 	return rc ? rc : rc2;
 }
 
@@ -1974,10 +1974,10 @@ static int falcon_map_reset_flags(u32 *flags)
 
 /* Resets NIC to known state.  This routine must be called in process
  * context and is allowed to sleep. */
-static int __falcon_reset_hw(struct efx_nic *efx, enum reset_type method)
+static int __falcon_reset_hw(struct ef4_nic *efx, enum reset_type method)
 {
 	struct falcon_nic_data *nic_data = efx->nic_data;
-	efx_oword_t glb_ctl_reg_ker;
+	ef4_oword_t glb_ctl_reg_ker;
 	int rc;
 
 	netif_dbg(efx, hw, efx->net_dev, "performing %s hardware reset\n",
@@ -1992,7 +1992,7 @@ static int __falcon_reset_hw(struct efx_nic *efx, enum reset_type method)
 				  "function prior to hardware reset\n");
 			goto fail1;
 		}
-		if (efx_nic_is_dual_func(efx)) {
+		if (ef4_nic_is_dual_func(efx)) {
 			rc = pci_save_state(nic_data->pci_dev2);
 			if (rc) {
 				netif_err(efx, drv, efx->net_dev,
@@ -2003,12 +2003,12 @@ static int __falcon_reset_hw(struct efx_nic *efx, enum reset_type method)
 			}
 		}
 
-		EFX_POPULATE_OWORD_2(glb_ctl_reg_ker,
+		EF4_POPULATE_OWORD_2(glb_ctl_reg_ker,
 				     FRF_AB_EXT_PHY_RST_DUR,
 				     FFE_AB_EXT_PHY_RST_DUR_10240US,
 				     FRF_AB_SWRST, 1);
 	} else {
-		EFX_POPULATE_OWORD_7(glb_ctl_reg_ker,
+		EF4_POPULATE_OWORD_7(glb_ctl_reg_ker,
 				     /* exclude PHY from "invisible" reset */
 				     FRF_AB_EXT_PHY_RST_CTL,
 				     method == RESET_TYPE_INVISIBLE,
@@ -2021,14 +2021,14 @@ static int __falcon_reset_hw(struct efx_nic *efx, enum reset_type method)
 				     FFE_AB_EXT_PHY_RST_DUR_10240US,
 				     FRF_AB_SWRST, 1);
 	}
-	efx_writeo(efx, &glb_ctl_reg_ker, FR_AB_GLB_CTL);
+	ef4_writeo(efx, &glb_ctl_reg_ker, FR_AB_GLB_CTL);
 
 	netif_dbg(efx, hw, efx->net_dev, "waiting for hardware reset\n");
 	schedule_timeout_uninterruptible(HZ / 20);
 
 	/* Restore PCI configuration if needed */
 	if (method == RESET_TYPE_WORLD) {
-		if (efx_nic_is_dual_func(efx))
+		if (ef4_nic_is_dual_func(efx))
 			pci_restore_state(nic_data->pci_dev2);
 		pci_restore_state(efx->pci_dev);
 		netif_dbg(efx, drv, efx->net_dev,
@@ -2036,8 +2036,8 @@ static int __falcon_reset_hw(struct efx_nic *efx, enum reset_type method)
 	}
 
 	/* Assert that reset complete */
-	efx_reado(efx, &glb_ctl_reg_ker, FR_AB_GLB_CTL);
-	if (EFX_OWORD_FIELD(glb_ctl_reg_ker, FRF_AB_SWRST) != 0) {
+	ef4_reado(efx, &glb_ctl_reg_ker, FR_AB_GLB_CTL);
+	if (EF4_OWORD_FIELD(glb_ctl_reg_ker, FRF_AB_SWRST) != 0) {
 		rc = -ETIMEDOUT;
 		netif_err(efx, hw, efx->net_dev,
 			  "timed out waiting for hardware reset\n");
@@ -2055,7 +2055,7 @@ fail3:
 	return rc;
 }
 
-static int falcon_reset_hw(struct efx_nic *efx, enum reset_type method)
+static int falcon_reset_hw(struct ef4_nic *efx, enum reset_type method)
 {
 	struct falcon_nic_data *nic_data = efx->nic_data;
 	int rc;
@@ -2067,7 +2067,7 @@ static int falcon_reset_hw(struct efx_nic *efx, enum reset_type method)
 	return rc;
 }
 
-static void falcon_monitor(struct efx_nic *efx)
+static void falcon_monitor(struct ef4_nic *efx)
 {
 	bool link_changed;
 	int rc;
@@ -2080,7 +2080,7 @@ static void falcon_monitor(struct efx_nic *efx)
 			  "Board sensor %s; shutting down PHY\n",
 			  (rc == -ERANGE) ? "reported fault" : "failed");
 		efx->phy_mode |= PHY_MODE_LOW_POWER;
-		rc = __efx_reconfigure_port(efx);
+		rc = __ef4_reconfigure_port(efx);
 		WARN_ON(rc);
 	}
 
@@ -2099,7 +2099,7 @@ static void falcon_monitor(struct efx_nic *efx)
 
 		falcon_start_nic_stats(efx);
 
-		efx_link_status_changed(efx);
+		ef4_link_status_changed(efx);
 	}
 
 	falcon_poll_xmac(efx);
@@ -2108,22 +2108,22 @@ static void falcon_monitor(struct efx_nic *efx)
 /* Zeroes out the SRAM contents.  This routine must be called in
  * process context and is allowed to sleep.
  */
-static int falcon_reset_sram(struct efx_nic *efx)
+static int falcon_reset_sram(struct ef4_nic *efx)
 {
-	efx_oword_t srm_cfg_reg_ker, gpio_cfg_reg_ker;
+	ef4_oword_t srm_cfg_reg_ker, gpio_cfg_reg_ker;
 	int count;
 
 	/* Set the SRAM wake/sleep GPIO appropriately. */
-	efx_reado(efx, &gpio_cfg_reg_ker, FR_AB_GPIO_CTL);
-	EFX_SET_OWORD_FIELD(gpio_cfg_reg_ker, FRF_AB_GPIO1_OEN, 1);
-	EFX_SET_OWORD_FIELD(gpio_cfg_reg_ker, FRF_AB_GPIO1_OUT, 1);
-	efx_writeo(efx, &gpio_cfg_reg_ker, FR_AB_GPIO_CTL);
+	ef4_reado(efx, &gpio_cfg_reg_ker, FR_AB_GPIO_CTL);
+	EF4_SET_OWORD_FIELD(gpio_cfg_reg_ker, FRF_AB_GPIO1_OEN, 1);
+	EF4_SET_OWORD_FIELD(gpio_cfg_reg_ker, FRF_AB_GPIO1_OUT, 1);
+	ef4_writeo(efx, &gpio_cfg_reg_ker, FR_AB_GPIO_CTL);
 
 	/* Initiate SRAM reset */
-	EFX_POPULATE_OWORD_2(srm_cfg_reg_ker,
+	EF4_POPULATE_OWORD_2(srm_cfg_reg_ker,
 			     FRF_AZ_SRM_INIT_EN, 1,
 			     FRF_AZ_SRM_NB_SZ, 0);
-	efx_writeo(efx, &srm_cfg_reg_ker, FR_AZ_SRM_CFG);
+	ef4_writeo(efx, &srm_cfg_reg_ker, FR_AZ_SRM_CFG);
 
 	/* Wait for SRAM reset to complete */
 	count = 0;
@@ -2135,8 +2135,8 @@ static int falcon_reset_sram(struct efx_nic *efx)
 		schedule_timeout_uninterruptible(HZ / 50);
 
 		/* Check for reset complete */
-		efx_reado(efx, &srm_cfg_reg_ker, FR_AZ_SRM_CFG);
-		if (!EFX_OWORD_FIELD(srm_cfg_reg_ker, FRF_AZ_SRM_INIT_EN)) {
+		ef4_reado(efx, &srm_cfg_reg_ker, FR_AZ_SRM_CFG);
+		if (!EF4_OWORD_FIELD(srm_cfg_reg_ker, FRF_AZ_SRM_INIT_EN)) {
 			netif_dbg(efx, hw, efx->net_dev,
 				  "SRAM reset complete\n");
 
@@ -2148,7 +2148,7 @@ static int falcon_reset_sram(struct efx_nic *efx)
 	return -ETIMEDOUT;
 }
 
-static void falcon_spi_device_init(struct efx_nic *efx,
+static void falcon_spi_device_init(struct ef4_nic *efx,
 				  struct falcon_spi_device *spi_device,
 				  unsigned int device_id, u32 device_type)
 {
@@ -2174,7 +2174,7 @@ static void falcon_spi_device_init(struct efx_nic *efx,
 }
 
 /* Extract non-volatile configuration */
-static int falcon_probe_nvconfig(struct efx_nic *efx)
+static int falcon_probe_nvconfig(struct ef4_nic *efx)
 {
 	struct falcon_nic_data *nic_data = efx->nic_data;
 	struct falcon_nvconfig *nvconfig;
@@ -2215,7 +2215,7 @@ out:
 	return rc;
 }
 
-static int falcon_dimension_resources(struct efx_nic *efx)
+static int falcon_dimension_resources(struct ef4_nic *efx)
 {
 	efx->rx_dc_base = 0x20000;
 	efx->tx_dc_base = 0x26000;
@@ -2223,18 +2223,18 @@ static int falcon_dimension_resources(struct efx_nic *efx)
 }
 
 /* Probe all SPI devices on the NIC */
-static void falcon_probe_spi_devices(struct efx_nic *efx)
+static void falcon_probe_spi_devices(struct ef4_nic *efx)
 {
 	struct falcon_nic_data *nic_data = efx->nic_data;
-	efx_oword_t nic_stat, gpio_ctl, ee_vpd_cfg;
+	ef4_oword_t nic_stat, gpio_ctl, ee_vpd_cfg;
 	int boot_dev;
 
-	efx_reado(efx, &gpio_ctl, FR_AB_GPIO_CTL);
-	efx_reado(efx, &nic_stat, FR_AB_NIC_STAT);
-	efx_reado(efx, &ee_vpd_cfg, FR_AB_EE_VPD_CFG0);
+	ef4_reado(efx, &gpio_ctl, FR_AB_GPIO_CTL);
+	ef4_reado(efx, &nic_stat, FR_AB_NIC_STAT);
+	ef4_reado(efx, &ee_vpd_cfg, FR_AB_EE_VPD_CFG0);
 
-	if (EFX_OWORD_FIELD(gpio_ctl, FRF_AB_GPIO3_PWRUP_VALUE)) {
-		boot_dev = (EFX_OWORD_FIELD(nic_stat, FRF_AB_SF_PRST) ?
+	if (EF4_OWORD_FIELD(gpio_ctl, FRF_AB_GPIO3_PWRUP_VALUE)) {
+		boot_dev = (EF4_OWORD_FIELD(nic_stat, FRF_AB_SF_PRST) ?
 			    FFE_AB_SPI_DEVICE_FLASH : FFE_AB_SPI_DEVICE_EEPROM);
 		netif_dbg(efx, probe, efx->net_dev, "Booted from %s\n",
 			  boot_dev == FFE_AB_SPI_DEVICE_FLASH ?
@@ -2246,12 +2246,12 @@ static void falcon_probe_spi_devices(struct efx_nic *efx)
 		netif_dbg(efx, probe, efx->net_dev,
 			  "Booted from internal ASIC settings;"
 			  " setting SPI config\n");
-		EFX_POPULATE_OWORD_3(ee_vpd_cfg, FRF_AB_EE_VPD_EN, 0,
+		EF4_POPULATE_OWORD_3(ee_vpd_cfg, FRF_AB_EE_VPD_EN, 0,
 				     /* 125 MHz / 7 ~= 20 MHz */
 				     FRF_AB_EE_SF_CLOCK_DIV, 7,
 				     /* 125 MHz / 63 ~= 2 MHz */
 				     FRF_AB_EE_EE_CLOCK_DIV, 63);
-		efx_writeo(efx, &ee_vpd_cfg, FR_AB_EE_VPD_CFG0);
+		ef4_writeo(efx, &ee_vpd_cfg, FR_AB_EE_VPD_CFG0);
 	}
 
 	mutex_init(&nic_data->spi_lock);
@@ -2266,12 +2266,12 @@ static void falcon_probe_spi_devices(struct efx_nic *efx)
 				       large_eeprom_type);
 }
 
-static unsigned int falcon_a1_mem_map_size(struct efx_nic *efx)
+static unsigned int falcon_a1_mem_map_size(struct ef4_nic *efx)
 {
 	return 0x20000;
 }
 
-static unsigned int falcon_b0_mem_map_size(struct efx_nic *efx)
+static unsigned int falcon_b0_mem_map_size(struct ef4_nic *efx)
 {
 	/* Map everything up to and including the RSS indirection table.
 	 * The PCI core takes care of mapping the MSI-X tables.
@@ -2280,7 +2280,7 @@ static unsigned int falcon_b0_mem_map_size(struct efx_nic *efx)
 		FR_BZ_RX_INDIRECTION_TBL_STEP * FR_BZ_RX_INDIRECTION_TBL_ROWS;
 }
 
-static int falcon_probe_nic(struct efx_nic *efx)
+static int falcon_probe_nic(struct ef4_nic *efx)
 {
 	struct falcon_nic_data *nic_data;
 	struct falcon_board *board;
@@ -2296,14 +2296,14 @@ static int falcon_probe_nic(struct efx_nic *efx)
 
 	rc = -ENODEV;
 
-	if (efx_farch_fpga_ver(efx) != 0) {
+	if (ef4_farch_fpga_ver(efx) != 0) {
 		netif_err(efx, probe, efx->net_dev,
 			  "Falcon FPGA not supported\n");
 		goto fail1;
 	}
 
-	if (efx_nic_rev(efx) <= EFX_REV_FALCON_A1) {
-		efx_oword_t nic_stat;
+	if (ef4_nic_rev(efx) <= EF4_REV_FALCON_A1) {
+		ef4_oword_t nic_stat;
 		struct pci_dev *dev;
 		u8 pci_rev = efx->pci_dev->revision;
 
@@ -2312,13 +2312,13 @@ static int falcon_probe_nic(struct efx_nic *efx)
 				  "Falcon rev A0 not supported\n");
 			goto fail1;
 		}
-		efx_reado(efx, &nic_stat, FR_AB_NIC_STAT);
-		if (EFX_OWORD_FIELD(nic_stat, FRF_AB_STRAP_10G) == 0) {
+		ef4_reado(efx, &nic_stat, FR_AB_NIC_STAT);
+		if (EF4_OWORD_FIELD(nic_stat, FRF_AB_STRAP_10G) == 0) {
 			netif_err(efx, probe, efx->net_dev,
 				  "Falcon rev A1 1G not supported\n");
 			goto fail1;
 		}
-		if (EFX_OWORD_FIELD(nic_stat, FRF_AA_STRAP_PCIE) == 0) {
+		if (EF4_OWORD_FIELD(nic_stat, FRF_AA_STRAP_PCIE) == 0) {
 			netif_err(efx, probe, efx->net_dev,
 				  "Falcon rev A1 PCI-X not supported\n");
 			goto fail1;
@@ -2350,7 +2350,7 @@ static int falcon_probe_nic(struct efx_nic *efx)
 	}
 
 	/* Allocate memory for INT_KER */
-	rc = efx_nic_alloc_buffer(efx, &efx->irq_status, sizeof(efx_oword_t),
+	rc = ef4_nic_alloc_buffer(efx, &efx->irq_status, sizeof(ef4_oword_t),
 				  GFP_KERNEL);
 	if (rc)
 		goto fail4;
@@ -2372,8 +2372,8 @@ static int falcon_probe_nic(struct efx_nic *efx)
 		goto fail5;
 	}
 
-	efx->max_channels = (efx_nic_rev(efx) <= EFX_REV_FALCON_A1 ? 4 :
-			     EFX_MAX_CHANNELS);
+	efx->max_channels = (ef4_nic_rev(efx) <= EF4_REV_FALCON_A1 ? 4 :
+			     EF4_MAX_CHANNELS);
 	efx->max_tx_channels = efx->max_channels;
 	efx->timer_quantum_ns = 4968; /* 621 cycles */
 	efx->timer_max_ns = efx->type->timer_period_max *
@@ -2409,7 +2409,7 @@ static int falcon_probe_nic(struct efx_nic *efx)
 	i2c_del_adapter(&board->i2c_adap);
 	memset(&board->i2c_adap, 0, sizeof(board->i2c_adap));
  fail5:
-	efx_nic_free_buffer(efx, &efx->irq_status);
+	ef4_nic_free_buffer(efx, &efx->irq_status);
  fail4:
  fail3:
 	if (nic_data->pci_dev2) {
@@ -2422,66 +2422,66 @@ static int falcon_probe_nic(struct efx_nic *efx)
 	return rc;
 }
 
-static void falcon_init_rx_cfg(struct efx_nic *efx)
+static void falcon_init_rx_cfg(struct ef4_nic *efx)
 {
 	/* RX control FIFO thresholds (32 entries) */
 	const unsigned ctrl_xon_thr = 20;
 	const unsigned ctrl_xoff_thr = 25;
-	efx_oword_t reg;
+	ef4_oword_t reg;
 
-	efx_reado(efx, &reg, FR_AZ_RX_CFG);
-	if (efx_nic_rev(efx) <= EFX_REV_FALCON_A1) {
+	ef4_reado(efx, &reg, FR_AZ_RX_CFG);
+	if (ef4_nic_rev(efx) <= EF4_REV_FALCON_A1) {
 		/* Data FIFO size is 5.5K.  The RX DMA engine only
 		 * supports scattering for user-mode queues, but will
 		 * split DMA writes at intervals of RX_USR_BUF_SIZE
 		 * (32-byte units) even for kernel-mode queues.  We
 		 * set it to be so large that that never happens.
 		 */
-		EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_DESC_PUSH_EN, 0);
-		EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_USR_BUF_SIZE,
+		EF4_SET_OWORD_FIELD(reg, FRF_AA_RX_DESC_PUSH_EN, 0);
+		EF4_SET_OWORD_FIELD(reg, FRF_AA_RX_USR_BUF_SIZE,
 				    (3 * 4096) >> 5);
-		EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_XON_MAC_TH, 512 >> 8);
-		EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_XOFF_MAC_TH, 2048 >> 8);
-		EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_XON_TX_TH, ctrl_xon_thr);
-		EFX_SET_OWORD_FIELD(reg, FRF_AA_RX_XOFF_TX_TH, ctrl_xoff_thr);
+		EF4_SET_OWORD_FIELD(reg, FRF_AA_RX_XON_MAC_TH, 512 >> 8);
+		EF4_SET_OWORD_FIELD(reg, FRF_AA_RX_XOFF_MAC_TH, 2048 >> 8);
+		EF4_SET_OWORD_FIELD(reg, FRF_AA_RX_XON_TX_TH, ctrl_xon_thr);
+		EF4_SET_OWORD_FIELD(reg, FRF_AA_RX_XOFF_TX_TH, ctrl_xoff_thr);
 	} else {
 		/* Data FIFO size is 80K; register fields moved */
-		EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_DESC_PUSH_EN, 0);
-		EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_USR_BUF_SIZE,
-				    EFX_RX_USR_BUF_SIZE >> 5);
+		EF4_SET_OWORD_FIELD(reg, FRF_BZ_RX_DESC_PUSH_EN, 0);
+		EF4_SET_OWORD_FIELD(reg, FRF_BZ_RX_USR_BUF_SIZE,
+				    EF4_RX_USR_BUF_SIZE >> 5);
 		/* Send XON and XOFF at ~3 * max MTU away from empty/full */
-		EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_XON_MAC_TH, 27648 >> 8);
-		EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_XOFF_MAC_TH, 54272 >> 8);
-		EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_XON_TX_TH, ctrl_xon_thr);
-		EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_XOFF_TX_TH, ctrl_xoff_thr);
-		EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_INGR_EN, 1);
+		EF4_SET_OWORD_FIELD(reg, FRF_BZ_RX_XON_MAC_TH, 27648 >> 8);
+		EF4_SET_OWORD_FIELD(reg, FRF_BZ_RX_XOFF_MAC_TH, 54272 >> 8);
+		EF4_SET_OWORD_FIELD(reg, FRF_BZ_RX_XON_TX_TH, ctrl_xon_thr);
+		EF4_SET_OWORD_FIELD(reg, FRF_BZ_RX_XOFF_TX_TH, ctrl_xoff_thr);
+		EF4_SET_OWORD_FIELD(reg, FRF_BZ_RX_INGR_EN, 1);
 
 		/* Enable hash insertion. This is broken for the
 		 * 'Falcon' hash so also select Toeplitz TCP/IPv4 and
 		 * IPv4 hashes. */
-		EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_HASH_INSRT_HDR, 1);
-		EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_HASH_ALG, 1);
-		EFX_SET_OWORD_FIELD(reg, FRF_BZ_RX_IP_HASH, 1);
+		EF4_SET_OWORD_FIELD(reg, FRF_BZ_RX_HASH_INSRT_HDR, 1);
+		EF4_SET_OWORD_FIELD(reg, FRF_BZ_RX_HASH_ALG, 1);
+		EF4_SET_OWORD_FIELD(reg, FRF_BZ_RX_IP_HASH, 1);
 	}
 	/* Always enable XOFF signal from RX FIFO.  We enable
 	 * or disable transmission of pause frames at the MAC. */
-	EFX_SET_OWORD_FIELD(reg, FRF_AZ_RX_XOFF_MAC_EN, 1);
-	efx_writeo(efx, &reg, FR_AZ_RX_CFG);
+	EF4_SET_OWORD_FIELD(reg, FRF_AZ_RX_XOFF_MAC_EN, 1);
+	ef4_writeo(efx, &reg, FR_AZ_RX_CFG);
 }
 
 /* This call performs hardware-specific global initialisation, such as
  * defining the descriptor cache sizes and number of RSS channels.
  * It does not set up any buffers, descriptor rings or event queues.
  */
-static int falcon_init_nic(struct efx_nic *efx)
+static int falcon_init_nic(struct ef4_nic *efx)
 {
-	efx_oword_t temp;
+	ef4_oword_t temp;
 	int rc;
 
 	/* Use on-chip SRAM */
-	efx_reado(efx, &temp, FR_AB_NIC_STAT);
-	EFX_SET_OWORD_FIELD(temp, FRF_AB_ONCHIP_SRAM, 1);
-	efx_writeo(efx, &temp, FR_AB_NIC_STAT);
+	ef4_reado(efx, &temp, FR_AB_NIC_STAT);
+	EF4_SET_OWORD_FIELD(temp, FRF_AB_ONCHIP_SRAM, 1);
+	ef4_writeo(efx, &temp, FR_AB_NIC_STAT);
 
 	rc = falcon_reset_sram(efx);
 	if (rc)
@@ -2490,55 +2490,55 @@ static int falcon_init_nic(struct efx_nic *efx)
 	/* Clear the parity enables on the TX data fifos as
 	 * they produce false parity errors because of timing issues
 	 */
-	if (EFX_WORKAROUND_5129(efx)) {
-		efx_reado(efx, &temp, FR_AZ_CSR_SPARE);
-		EFX_SET_OWORD_FIELD(temp, FRF_AB_MEM_PERR_EN_TX_DATA, 0);
-		efx_writeo(efx, &temp, FR_AZ_CSR_SPARE);
+	if (EF4_WORKAROUND_5129(efx)) {
+		ef4_reado(efx, &temp, FR_AZ_CSR_SPARE);
+		EF4_SET_OWORD_FIELD(temp, FRF_AB_MEM_PERR_EN_TX_DATA, 0);
+		ef4_writeo(efx, &temp, FR_AZ_CSR_SPARE);
 	}
 
-	if (EFX_WORKAROUND_7244(efx)) {
-		efx_reado(efx, &temp, FR_BZ_RX_FILTER_CTL);
-		EFX_SET_OWORD_FIELD(temp, FRF_BZ_UDP_FULL_SRCH_LIMIT, 8);
-		EFX_SET_OWORD_FIELD(temp, FRF_BZ_UDP_WILD_SRCH_LIMIT, 8);
-		EFX_SET_OWORD_FIELD(temp, FRF_BZ_TCP_FULL_SRCH_LIMIT, 8);
-		EFX_SET_OWORD_FIELD(temp, FRF_BZ_TCP_WILD_SRCH_LIMIT, 8);
-		efx_writeo(efx, &temp, FR_BZ_RX_FILTER_CTL);
+	if (EF4_WORKAROUND_7244(efx)) {
+		ef4_reado(efx, &temp, FR_BZ_RX_FILTER_CTL);
+		EF4_SET_OWORD_FIELD(temp, FRF_BZ_UDP_FULL_SRCH_LIMIT, 8);
+		EF4_SET_OWORD_FIELD(temp, FRF_BZ_UDP_WILD_SRCH_LIMIT, 8);
+		EF4_SET_OWORD_FIELD(temp, FRF_BZ_TCP_FULL_SRCH_LIMIT, 8);
+		EF4_SET_OWORD_FIELD(temp, FRF_BZ_TCP_WILD_SRCH_LIMIT, 8);
+		ef4_writeo(efx, &temp, FR_BZ_RX_FILTER_CTL);
 	}
 
 	/* XXX This is documented only for Falcon A0/A1 */
 	/* Setup RX.  Wait for descriptor is broken and must
 	 * be disabled.  RXDP recovery shouldn't be needed, but is.
 	 */
-	efx_reado(efx, &temp, FR_AA_RX_SELF_RST);
-	EFX_SET_OWORD_FIELD(temp, FRF_AA_RX_NODESC_WAIT_DIS, 1);
-	EFX_SET_OWORD_FIELD(temp, FRF_AA_RX_SELF_RST_EN, 1);
-	if (EFX_WORKAROUND_5583(efx))
-		EFX_SET_OWORD_FIELD(temp, FRF_AA_RX_ISCSI_DIS, 1);
-	efx_writeo(efx, &temp, FR_AA_RX_SELF_RST);
+	ef4_reado(efx, &temp, FR_AA_RX_SELF_RST);
+	EF4_SET_OWORD_FIELD(temp, FRF_AA_RX_NODESC_WAIT_DIS, 1);
+	EF4_SET_OWORD_FIELD(temp, FRF_AA_RX_SELF_RST_EN, 1);
+	if (EF4_WORKAROUND_5583(efx))
+		EF4_SET_OWORD_FIELD(temp, FRF_AA_RX_ISCSI_DIS, 1);
+	ef4_writeo(efx, &temp, FR_AA_RX_SELF_RST);
 
 	/* Do not enable TX_NO_EOP_DISC_EN, since it limits packets to 16
 	 * descriptors (which is bad).
 	 */
-	efx_reado(efx, &temp, FR_AZ_TX_CFG);
-	EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_NO_EOP_DISC_EN, 0);
-	efx_writeo(efx, &temp, FR_AZ_TX_CFG);
+	ef4_reado(efx, &temp, FR_AZ_TX_CFG);
+	EF4_SET_OWORD_FIELD(temp, FRF_AZ_TX_NO_EOP_DISC_EN, 0);
+	ef4_writeo(efx, &temp, FR_AZ_TX_CFG);
 
 	falcon_init_rx_cfg(efx);
 
-	if (efx_nic_rev(efx) >= EFX_REV_FALCON_B0) {
+	if (ef4_nic_rev(efx) >= EF4_REV_FALCON_B0) {
 		falcon_b0_rx_push_rss_config(efx, false, efx->rx_indir_table);
 
 		/* Set destination of both TX and RX Flush events */
-		EFX_POPULATE_OWORD_1(temp, FRF_BZ_FLS_EVQ_ID, 0);
-		efx_writeo(efx, &temp, FR_BZ_DP_CTRL);
+		EF4_POPULATE_OWORD_1(temp, FRF_BZ_FLS_EVQ_ID, 0);
+		ef4_writeo(efx, &temp, FR_BZ_DP_CTRL);
 	}
 
-	efx_farch_init_common(efx);
+	ef4_farch_init_common(efx);
 
 	return 0;
 }
 
-static void falcon_remove_nic(struct efx_nic *efx)
+static void falcon_remove_nic(struct ef4_nic *efx)
 {
 	struct falcon_nic_data *nic_data = efx->nic_data;
 	struct falcon_board *board = falcon_board(efx);
@@ -2549,7 +2549,7 @@ static void falcon_remove_nic(struct efx_nic *efx)
 	i2c_del_adapter(&board->i2c_adap);
 	memset(&board->i2c_adap, 0, sizeof(board->i2c_adap));
 
-	efx_nic_free_buffer(efx, &efx->irq_status);
+	ef4_nic_free_buffer(efx, &efx->irq_status);
 
 	__falcon_reset_hw(efx, RESET_TYPE_ALL);
 
@@ -2564,40 +2564,40 @@ static void falcon_remove_nic(struct efx_nic *efx)
 	efx->nic_data = NULL;
 }
 
-static size_t falcon_describe_nic_stats(struct efx_nic *efx, u8 *names)
+static size_t falcon_describe_nic_stats(struct ef4_nic *efx, u8 *names)
 {
-	return efx_nic_describe_stats(falcon_stat_desc, FALCON_STAT_COUNT,
+	return ef4_nic_describe_stats(falcon_stat_desc, FALCON_STAT_COUNT,
 				      falcon_stat_mask, names);
 }
 
-static size_t falcon_update_nic_stats(struct efx_nic *efx, u64 *full_stats,
+static size_t falcon_update_nic_stats(struct ef4_nic *efx, u64 *full_stats,
 				      struct rtnl_link_stats64 *core_stats)
 {
 	struct falcon_nic_data *nic_data = efx->nic_data;
 	u64 *stats = nic_data->stats;
-	efx_oword_t cnt;
+	ef4_oword_t cnt;
 
 	if (!nic_data->stats_disable_count) {
-		efx_reado(efx, &cnt, FR_AZ_RX_NODESC_DROP);
+		ef4_reado(efx, &cnt, FR_AZ_RX_NODESC_DROP);
 		stats[FALCON_STAT_rx_nodesc_drop_cnt] +=
-			EFX_OWORD_FIELD(cnt, FRF_AB_RX_NODESC_DROP_CNT);
+			EF4_OWORD_FIELD(cnt, FRF_AB_RX_NODESC_DROP_CNT);
 
 		if (nic_data->stats_pending &&
 		    FALCON_XMAC_STATS_DMA_FLAG(efx)) {
 			nic_data->stats_pending = false;
 			rmb(); /* read the done flag before the stats */
-			efx_nic_update_stats(
+			ef4_nic_update_stats(
 				falcon_stat_desc, FALCON_STAT_COUNT,
 				falcon_stat_mask,
 				stats, efx->stats_buffer.addr, true);
 		}
 
 		/* Update derived statistic */
-		efx_update_diff_stat(&stats[FALCON_STAT_rx_bad_bytes],
+		ef4_update_diff_stat(&stats[FALCON_STAT_rx_bad_bytes],
 				     stats[FALCON_STAT_rx_bytes] -
 				     stats[FALCON_STAT_rx_good_bytes] -
 				     stats[FALCON_STAT_rx_control] * 64);
-		efx_update_sw_stats(efx, stats);
+		ef4_update_sw_stats(efx, stats);
 	}
 
 	if (full_stats)
@@ -2628,7 +2628,7 @@ static size_t falcon_update_nic_stats(struct efx_nic *efx, u64 *full_stats,
 	return FALCON_STAT_COUNT;
 }
 
-void falcon_start_nic_stats(struct efx_nic *efx)
+void falcon_start_nic_stats(struct ef4_nic *efx)
 {
 	struct falcon_nic_data *nic_data = efx->nic_data;
 
@@ -2641,12 +2641,12 @@ void falcon_start_nic_stats(struct efx_nic *efx)
 /* We don't acutally pull stats on falcon. Wait 10ms so that
  * they arrive when we call this just after start_stats
  */
-static void falcon_pull_nic_stats(struct efx_nic *efx)
+static void falcon_pull_nic_stats(struct ef4_nic *efx)
 {
 	msleep(10);
 }
 
-void falcon_stop_nic_stats(struct efx_nic *efx)
+void falcon_stop_nic_stats(struct ef4_nic *efx)
 {
 	struct falcon_nic_data *nic_data = efx->nic_data;
 	int i;
@@ -2672,7 +2672,7 @@ void falcon_stop_nic_stats(struct efx_nic *efx)
 	spin_unlock_bh(&efx->stats_lock);
 }
 
-static void falcon_set_id_led(struct efx_nic *efx, enum efx_led_mode mode)
+static void falcon_set_id_led(struct ef4_nic *efx, enum ef4_led_mode mode)
 {
 	falcon_board(efx)->type->set_id_led(efx, mode);
 }
@@ -2684,14 +2684,14 @@ static void falcon_set_id_led(struct efx_nic *efx, enum efx_led_mode mode)
  **************************************************************************
  */
 
-static void falcon_get_wol(struct efx_nic *efx, struct ethtool_wolinfo *wol)
+static void falcon_get_wol(struct ef4_nic *efx, struct ethtool_wolinfo *wol)
 {
 	wol->supported = 0;
 	wol->wolopts = 0;
 	memset(&wol->sopass, 0, sizeof(wol->sopass));
 }
 
-static int falcon_set_wol(struct efx_nic *efx, u32 type)
+static int falcon_set_wol(struct ef4_nic *efx, u32 type)
 {
 	if (type != 0)
 		return -EINVAL;
@@ -2705,9 +2705,8 @@ static int falcon_set_wol(struct efx_nic *efx, u32 type)
  **************************************************************************
  */
 
-const struct efx_nic_type falcon_a1_nic_type = {
-	.is_vf = false,
-	.mem_bar = EFX_MEM_BAR,
+const struct ef4_nic_type falcon_a1_nic_type = {
+	.mem_bar = EF4_MEM_BAR,
 	.mem_map_size = falcon_a1_mem_map_size,
 	.probe = falcon_probe_nic,
 	.remove = falcon_remove_nic,
@@ -2721,11 +2720,11 @@ const struct efx_nic_type falcon_a1_nic_type = {
 	.probe_port = falcon_probe_port,
 	.remove_port = falcon_remove_port,
 	.handle_global_event = falcon_handle_global_event,
-	.fini_dmaq = efx_farch_fini_dmaq,
+	.fini_dmaq = ef4_farch_fini_dmaq,
 	.prepare_flush = falcon_prepare_flush,
-	.finish_flush = efx_port_dummy_op_void,
-	.prepare_flr = efx_port_dummy_op_void,
-	.finish_flr = efx_farch_finish_flr,
+	.finish_flush = ef4_port_dummy_op_void,
+	.prepare_flr = ef4_port_dummy_op_void,
+	.finish_flr = ef4_farch_finish_flr,
 	.describe_stats = falcon_describe_nic_stats,
 	.update_stats = falcon_update_nic_stats,
 	.start_stats = falcon_start_nic_stats,
@@ -2739,47 +2738,48 @@ const struct efx_nic_type falcon_a1_nic_type = {
 	.check_mac_fault = falcon_xmac_check_fault,
 	.get_wol = falcon_get_wol,
 	.set_wol = falcon_set_wol,
-	.resume_wol = efx_port_dummy_op_void,
+	.resume_wol = ef4_port_dummy_op_void,
 	.test_nvram = falcon_test_nvram,
-	.irq_enable_master = efx_farch_irq_enable_master,
-	.irq_test_generate = efx_farch_irq_test_generate,
-	.irq_disable_non_ev = efx_farch_irq_disable_master,
-	.irq_handle_msi = efx_farch_msi_interrupt,
+	.irq_enable_master = ef4_farch_irq_enable_master,
+	.irq_test_generate = ef4_farch_irq_test_generate,
+	.irq_disable_non_ev = ef4_farch_irq_disable_master,
+	.irq_handle_msi = ef4_farch_msi_interrupt,
 	.irq_handle_legacy = falcon_legacy_interrupt_a1,
-	.tx_probe = efx_farch_tx_probe,
-	.tx_init = efx_farch_tx_init,
-	.tx_remove = efx_farch_tx_remove,
-	.tx_write = efx_farch_tx_write,
+	.tx_probe = ef4_farch_tx_probe,
+	.tx_init = ef4_farch_tx_init,
+	.tx_remove = ef4_farch_tx_remove,
+	.tx_write = ef4_farch_tx_write,
+	.tx_limit_len = ef4_farch_tx_limit_len,
 	.rx_push_rss_config = dummy_rx_push_rss_config,
-	.rx_probe = efx_farch_rx_probe,
-	.rx_init = efx_farch_rx_init,
-	.rx_remove = efx_farch_rx_remove,
-	.rx_write = efx_farch_rx_write,
-	.rx_defer_refill = efx_farch_rx_defer_refill,
-	.ev_probe = efx_farch_ev_probe,
-	.ev_init = efx_farch_ev_init,
-	.ev_fini = efx_farch_ev_fini,
-	.ev_remove = efx_farch_ev_remove,
-	.ev_process = efx_farch_ev_process,
-	.ev_read_ack = efx_farch_ev_read_ack,
-	.ev_test_generate = efx_farch_ev_test_generate,
+	.rx_probe = ef4_farch_rx_probe,
+	.rx_init = ef4_farch_rx_init,
+	.rx_remove = ef4_farch_rx_remove,
+	.rx_write = ef4_farch_rx_write,
+	.rx_defer_refill = ef4_farch_rx_defer_refill,
+	.ev_probe = ef4_farch_ev_probe,
+	.ev_init = ef4_farch_ev_init,
+	.ev_fini = ef4_farch_ev_fini,
+	.ev_remove = ef4_farch_ev_remove,
+	.ev_process = ef4_farch_ev_process,
+	.ev_read_ack = ef4_farch_ev_read_ack,
+	.ev_test_generate = ef4_farch_ev_test_generate,
 
 	/* We don't expose the filter table on Falcon A1 as it is not
 	 * mapped into function 0, but these implementations still
 	 * work with a degenerate case of all tables set to size 0.
 	 */
-	.filter_table_probe = efx_farch_filter_table_probe,
-	.filter_table_restore = efx_farch_filter_table_restore,
-	.filter_table_remove = efx_farch_filter_table_remove,
-	.filter_insert = efx_farch_filter_insert,
-	.filter_remove_safe = efx_farch_filter_remove_safe,
-	.filter_get_safe = efx_farch_filter_get_safe,
-	.filter_clear_rx = efx_farch_filter_clear_rx,
-	.filter_count_rx_used = efx_farch_filter_count_rx_used,
-	.filter_get_rx_id_limit = efx_farch_filter_get_rx_id_limit,
-	.filter_get_rx_ids = efx_farch_filter_get_rx_ids,
-
-#ifdef CONFIG_SFC_MTD
+	.filter_table_probe = ef4_farch_filter_table_probe,
+	.filter_table_restore = ef4_farch_filter_table_restore,
+	.filter_table_remove = ef4_farch_filter_table_remove,
+	.filter_insert = ef4_farch_filter_insert,
+	.filter_remove_safe = ef4_farch_filter_remove_safe,
+	.filter_get_safe = ef4_farch_filter_get_safe,
+	.filter_clear_rx = ef4_farch_filter_clear_rx,
+	.filter_count_rx_used = ef4_farch_filter_count_rx_used,
+	.filter_get_rx_id_limit = ef4_farch_filter_get_rx_id_limit,
+	.filter_get_rx_ids = ef4_farch_filter_get_rx_ids,
+
+#ifdef CONFIG_SFC_FALCON_MTD
 	.mtd_probe = falcon_mtd_probe,
 	.mtd_rename = falcon_mtd_rename,
 	.mtd_read = falcon_mtd_read,
@@ -2788,7 +2788,7 @@ const struct efx_nic_type falcon_a1_nic_type = {
 	.mtd_sync = falcon_mtd_sync,
 #endif
 
-	.revision = EFX_REV_FALCON_A1,
+	.revision = EF4_REV_FALCON_A1,
 	.txd_ptr_tbl_base = FR_AA_TX_DESC_PTR_TBL_KER,
 	.rxd_ptr_tbl_base = FR_AA_RX_DESC_PTR_TBL_KER,
 	.buf_tbl_base = FR_AA_BUF_FULL_TBL_KER,
@@ -2797,21 +2797,19 @@ const struct efx_nic_type falcon_a1_nic_type = {
 	.max_dma_mask = DMA_BIT_MASK(FSF_AZ_TX_KER_BUF_ADDR_WIDTH),
 	.rx_buffer_padding = 0x24,
 	.can_rx_scatter = false,
-	.max_interrupt_mode = EFX_INT_MODE_MSI,
+	.max_interrupt_mode = EF4_INT_MODE_MSI,
 	.timer_period_max =  1 << FRF_AB_TC_TIMER_VAL_WIDTH,
 	.offload_features = NETIF_F_IP_CSUM,
-	.mcdi_max_ver = -1,
 };
 
-const struct efx_nic_type falcon_b0_nic_type = {
-	.is_vf = false,
-	.mem_bar = EFX_MEM_BAR,
+const struct ef4_nic_type falcon_b0_nic_type = {
+	.mem_bar = EF4_MEM_BAR,
 	.mem_map_size = falcon_b0_mem_map_size,
 	.probe = falcon_probe_nic,
 	.remove = falcon_remove_nic,
 	.init = falcon_init_nic,
 	.dimension_resources = falcon_dimension_resources,
-	.fini = efx_port_dummy_op_void,
+	.fini = ef4_port_dummy_op_void,
 	.monitor = falcon_monitor,
 	.map_reset_reason = falcon_map_reset_reason,
 	.map_reset_flags = falcon_map_reset_flags,
@@ -2819,11 +2817,11 @@ const struct efx_nic_type falcon_b0_nic_type = {
 	.probe_port = falcon_probe_port,
 	.remove_port = falcon_remove_port,
 	.handle_global_event = falcon_handle_global_event,
-	.fini_dmaq = efx_farch_fini_dmaq,
+	.fini_dmaq = ef4_farch_fini_dmaq,
 	.prepare_flush = falcon_prepare_flush,
-	.finish_flush = efx_port_dummy_op_void,
-	.prepare_flr = efx_port_dummy_op_void,
-	.finish_flr = efx_farch_finish_flr,
+	.finish_flush = ef4_port_dummy_op_void,
+	.prepare_flr = ef4_port_dummy_op_void,
+	.finish_flr = ef4_farch_finish_flr,
 	.describe_stats = falcon_describe_nic_stats,
 	.update_stats = falcon_update_nic_stats,
 	.start_stats = falcon_start_nic_stats,
@@ -2837,47 +2835,48 @@ const struct efx_nic_type falcon_b0_nic_type = {
 	.check_mac_fault = falcon_xmac_check_fault,
 	.get_wol = falcon_get_wol,
 	.set_wol = falcon_set_wol,
-	.resume_wol = efx_port_dummy_op_void,
+	.resume_wol = ef4_port_dummy_op_void,
 	.test_chip = falcon_b0_test_chip,
 	.test_nvram = falcon_test_nvram,
-	.irq_enable_master = efx_farch_irq_enable_master,
-	.irq_test_generate = efx_farch_irq_test_generate,
-	.irq_disable_non_ev = efx_farch_irq_disable_master,
-	.irq_handle_msi = efx_farch_msi_interrupt,
-	.irq_handle_legacy = efx_farch_legacy_interrupt,
-	.tx_probe = efx_farch_tx_probe,
-	.tx_init = efx_farch_tx_init,
-	.tx_remove = efx_farch_tx_remove,
-	.tx_write = efx_farch_tx_write,
+	.irq_enable_master = ef4_farch_irq_enable_master,
+	.irq_test_generate = ef4_farch_irq_test_generate,
+	.irq_disable_non_ev = ef4_farch_irq_disable_master,
+	.irq_handle_msi = ef4_farch_msi_interrupt,
+	.irq_handle_legacy = ef4_farch_legacy_interrupt,
+	.tx_probe = ef4_farch_tx_probe,
+	.tx_init = ef4_farch_tx_init,
+	.tx_remove = ef4_farch_tx_remove,
+	.tx_write = ef4_farch_tx_write,
+	.tx_limit_len = ef4_farch_tx_limit_len,
 	.rx_push_rss_config = falcon_b0_rx_push_rss_config,
-	.rx_probe = efx_farch_rx_probe,
-	.rx_init = efx_farch_rx_init,
-	.rx_remove = efx_farch_rx_remove,
-	.rx_write = efx_farch_rx_write,
-	.rx_defer_refill = efx_farch_rx_defer_refill,
-	.ev_probe = efx_farch_ev_probe,
-	.ev_init = efx_farch_ev_init,
-	.ev_fini = efx_farch_ev_fini,
-	.ev_remove = efx_farch_ev_remove,
-	.ev_process = efx_farch_ev_process,
-	.ev_read_ack = efx_farch_ev_read_ack,
-	.ev_test_generate = efx_farch_ev_test_generate,
-	.filter_table_probe = efx_farch_filter_table_probe,
-	.filter_table_restore = efx_farch_filter_table_restore,
-	.filter_table_remove = efx_farch_filter_table_remove,
-	.filter_update_rx_scatter = efx_farch_filter_update_rx_scatter,
-	.filter_insert = efx_farch_filter_insert,
-	.filter_remove_safe = efx_farch_filter_remove_safe,
-	.filter_get_safe = efx_farch_filter_get_safe,
-	.filter_clear_rx = efx_farch_filter_clear_rx,
-	.filter_count_rx_used = efx_farch_filter_count_rx_used,
-	.filter_get_rx_id_limit = efx_farch_filter_get_rx_id_limit,
-	.filter_get_rx_ids = efx_farch_filter_get_rx_ids,
+	.rx_probe = ef4_farch_rx_probe,
+	.rx_init = ef4_farch_rx_init,
+	.rx_remove = ef4_farch_rx_remove,
+	.rx_write = ef4_farch_rx_write,
+	.rx_defer_refill = ef4_farch_rx_defer_refill,
+	.ev_probe = ef4_farch_ev_probe,
+	.ev_init = ef4_farch_ev_init,
+	.ev_fini = ef4_farch_ev_fini,
+	.ev_remove = ef4_farch_ev_remove,
+	.ev_process = ef4_farch_ev_process,
+	.ev_read_ack = ef4_farch_ev_read_ack,
+	.ev_test_generate = ef4_farch_ev_test_generate,
+	.filter_table_probe = ef4_farch_filter_table_probe,
+	.filter_table_restore = ef4_farch_filter_table_restore,
+	.filter_table_remove = ef4_farch_filter_table_remove,
+	.filter_update_rx_scatter = ef4_farch_filter_update_rx_scatter,
+	.filter_insert = ef4_farch_filter_insert,
+	.filter_remove_safe = ef4_farch_filter_remove_safe,
+	.filter_get_safe = ef4_farch_filter_get_safe,
+	.filter_clear_rx = ef4_farch_filter_clear_rx,
+	.filter_count_rx_used = ef4_farch_filter_count_rx_used,
+	.filter_get_rx_id_limit = ef4_farch_filter_get_rx_id_limit,
+	.filter_get_rx_ids = ef4_farch_filter_get_rx_ids,
 #ifdef CONFIG_RFS_ACCEL
-	.filter_rfs_insert = efx_farch_filter_rfs_insert,
-	.filter_rfs_expire_one = efx_farch_filter_rfs_expire_one,
+	.filter_rfs_insert = ef4_farch_filter_rfs_insert,
+	.filter_rfs_expire_one = ef4_farch_filter_rfs_expire_one,
 #endif
-#ifdef CONFIG_SFC_MTD
+#ifdef CONFIG_SFC_FALCON_MTD
 	.mtd_probe = falcon_mtd_probe,
 	.mtd_rename = falcon_mtd_rename,
 	.mtd_read = falcon_mtd_read,
@@ -2886,7 +2885,7 @@ const struct efx_nic_type falcon_b0_nic_type = {
 	.mtd_sync = falcon_mtd_sync,
 #endif
 
-	.revision = EFX_REV_FALCON_B0,
+	.revision = EF4_REV_FALCON_B0,
 	.txd_ptr_tbl_base = FR_BZ_TX_DESC_PTR_TBL,
 	.rxd_ptr_tbl_base = FR_BZ_RX_DESC_PTR_TBL,
 	.buf_tbl_base = FR_BZ_BUF_FULL_TBL,
@@ -2897,9 +2896,8 @@ const struct efx_nic_type falcon_b0_nic_type = {
 	.rx_hash_offset = FS_BZ_RX_PREFIX_HASH_OFST,
 	.rx_buffer_padding = 0,
 	.can_rx_scatter = true,
-	.max_interrupt_mode = EFX_INT_MODE_MSIX,
+	.max_interrupt_mode = EF4_INT_MODE_MSIX,
 	.timer_period_max =  1 << FRF_AB_TC_TIMER_VAL_WIDTH,
 	.offload_features = NETIF_F_IP_CSUM | NETIF_F_RXHASH | NETIF_F_NTUPLE,
-	.mcdi_max_ver = -1,
 	.max_rx_ip_filters = FR_BZ_RX_FILTER_TBL0_ROWS,
 };
diff --git a/drivers/net/ethernet/sfc/falcon_boards.c b/drivers/net/ethernet/sfc/falcon/falcon_boards.c
index f6883b2b5da3..dec83a217093 100644
--- a/drivers/net/ethernet/sfc/falcon_boards.c
+++ b/drivers/net/ethernet/sfc/falcon/falcon_boards.c
@@ -66,7 +66,7 @@
 
 #if IS_ENABLED(CONFIG_SENSORS_LM87)
 
-static int efx_poke_lm87(struct i2c_client *client, const u8 *reg_values)
+static int ef4_poke_lm87(struct i2c_client *client, const u8 *reg_values)
 {
 	while (*reg_values) {
 		u8 reg = *reg_values++;
@@ -87,7 +87,7 @@ static const u8 falcon_lm87_common_regs[] = {
 	0
 };
 
-static int efx_init_lm87(struct efx_nic *efx, const struct i2c_board_info *info,
+static int ef4_init_lm87(struct ef4_nic *efx, const struct i2c_board_info *info,
 			 const u8 *reg_values)
 {
 	struct falcon_board *board = falcon_board(efx);
@@ -101,10 +101,10 @@ static int efx_init_lm87(struct efx_nic *efx, const struct i2c_board_info *info,
 	i2c_smbus_read_byte_data(client, LM87_REG_ALARMS1);
 	i2c_smbus_read_byte_data(client, LM87_REG_ALARMS2);
 
-	rc = efx_poke_lm87(client, reg_values);
+	rc = ef4_poke_lm87(client, reg_values);
 	if (rc)
 		goto err;
-	rc = efx_poke_lm87(client, falcon_lm87_common_regs);
+	rc = ef4_poke_lm87(client, falcon_lm87_common_regs);
 	if (rc)
 		goto err;
 
@@ -116,12 +116,12 @@ err:
 	return rc;
 }
 
-static void efx_fini_lm87(struct efx_nic *efx)
+static void ef4_fini_lm87(struct ef4_nic *efx)
 {
 	i2c_unregister_device(falcon_board(efx)->hwmon_client);
 }
 
-static int efx_check_lm87(struct efx_nic *efx, unsigned mask)
+static int ef4_check_lm87(struct ef4_nic *efx, unsigned mask)
 {
 	struct i2c_client *client = falcon_board(efx)->hwmon_client;
 	bool temp_crit, elec_fault, is_failure;
@@ -129,7 +129,7 @@ static int efx_check_lm87(struct efx_nic *efx, unsigned mask)
 	s32 reg;
 
 	/* If link is up then do not monitor temperature */
-	if (EFX_WORKAROUND_7884(efx) && efx->link_state.up)
+	if (EF4_WORKAROUND_7884(efx) && efx->link_state.up)
 		return 0;
 
 	reg = i2c_smbus_read_byte_data(client, LM87_REG_ALARMS1);
@@ -179,15 +179,15 @@ static int efx_check_lm87(struct efx_nic *efx, unsigned mask)
 #else /* !CONFIG_SENSORS_LM87 */
 
 static inline int
-efx_init_lm87(struct efx_nic *efx, const struct i2c_board_info *info,
+ef4_init_lm87(struct ef4_nic *efx, const struct i2c_board_info *info,
 	      const u8 *reg_values)
 {
 	return 0;
 }
-static inline void efx_fini_lm87(struct efx_nic *efx)
+static inline void ef4_fini_lm87(struct ef4_nic *efx)
 {
 }
-static inline int efx_check_lm87(struct efx_nic *efx, unsigned mask)
+static inline int ef4_check_lm87(struct ef4_nic *efx, unsigned mask)
 {
 	return 0;
 }
@@ -255,7 +255,7 @@ static inline int efx_check_lm87(struct efx_nic *efx, unsigned mask)
 #define MAX664X_REG_RSL		0x02
 #define MAX664X_REG_WLHO	0x0B
 
-static void sfe4001_poweroff(struct efx_nic *efx)
+static void sfe4001_poweroff(struct ef4_nic *efx)
 {
 	struct i2c_client *ioexp_client = falcon_board(efx)->ioexp_client;
 	struct i2c_client *hwmon_client = falcon_board(efx)->hwmon_client;
@@ -269,7 +269,7 @@ static void sfe4001_poweroff(struct efx_nic *efx)
 	i2c_smbus_read_byte_data(hwmon_client, MAX664X_REG_RSL);
 }
 
-static int sfe4001_poweron(struct efx_nic *efx)
+static int sfe4001_poweron(struct ef4_nic *efx)
 {
 	struct i2c_client *ioexp_client = falcon_board(efx)->ioexp_client;
 	struct i2c_client *hwmon_client = falcon_board(efx)->hwmon_client;
@@ -360,7 +360,7 @@ fail_on:
 static ssize_t show_phy_flash_cfg(struct device *dev,
 				  struct device_attribute *attr, char *buf)
 {
-	struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
+	struct ef4_nic *efx = pci_get_drvdata(to_pci_dev(dev));
 	return sprintf(buf, "%d\n", !!(efx->phy_mode & PHY_MODE_SPECIAL));
 }
 
@@ -368,8 +368,8 @@ static ssize_t set_phy_flash_cfg(struct device *dev,
 				 struct device_attribute *attr,
 				 const char *buf, size_t count)
 {
-	struct efx_nic *efx = pci_get_drvdata(to_pci_dev(dev));
-	enum efx_phy_mode old_mode, new_mode;
+	struct ef4_nic *efx = pci_get_drvdata(to_pci_dev(dev));
+	enum ef4_phy_mode old_mode, new_mode;
 	int err;
 
 	rtnl_lock();
@@ -390,7 +390,7 @@ static ssize_t set_phy_flash_cfg(struct device *dev,
 			falcon_stop_nic_stats(efx);
 		err = sfe4001_poweron(efx);
 		if (!err)
-			err = efx_reconfigure_port(efx);
+			err = ef4_reconfigure_port(efx);
 		if (!(new_mode & PHY_MODE_SPECIAL))
 			falcon_start_nic_stats(efx);
 	}
@@ -401,7 +401,7 @@ static ssize_t set_phy_flash_cfg(struct device *dev,
 
 static DEVICE_ATTR(phy_flash_cfg, 0644, show_phy_flash_cfg, set_phy_flash_cfg);
 
-static void sfe4001_fini(struct efx_nic *efx)
+static void sfe4001_fini(struct ef4_nic *efx)
 {
 	struct falcon_board *board = falcon_board(efx);
 
@@ -413,13 +413,13 @@ static void sfe4001_fini(struct efx_nic *efx)
 	i2c_unregister_device(board->hwmon_client);
 }
 
-static int sfe4001_check_hw(struct efx_nic *efx)
+static int sfe4001_check_hw(struct ef4_nic *efx)
 {
 	struct falcon_nic_data *nic_data = efx->nic_data;
 	s32 status;
 
 	/* If XAUI link is up then do not monitor */
-	if (EFX_WORKAROUND_7884(efx) && !nic_data->xmac_poll_required)
+	if (EF4_WORKAROUND_7884(efx) && !nic_data->xmac_poll_required)
 		return 0;
 
 	/* Check the powered status of the PHY. Lack of power implies that
@@ -450,7 +450,7 @@ static const struct i2c_board_info sfe4001_hwmon_info = {
  * be turned on before the PHY can be used.
  * Context: Process context, rtnl lock held
  */
-static int sfe4001_init(struct efx_nic *efx)
+static int sfe4001_init(struct ef4_nic *efx)
 {
 	struct falcon_board *board = falcon_board(efx);
 	int rc;
@@ -537,7 +537,7 @@ static const struct i2c_board_info sfe4002_hwmon_info = {
 #define SFE4002_RX_LED    (0)	/* Green */
 #define SFE4002_TX_LED    (1)	/* Amber */
 
-static void sfe4002_init_phy(struct efx_nic *efx)
+static void sfe4002_init_phy(struct ef4_nic *efx)
 {
 	/* Set the TX and RX LEDs to reflect status and activity, and the
 	 * fault LED off */
@@ -548,14 +548,14 @@ static void sfe4002_init_phy(struct efx_nic *efx)
 	falcon_qt202x_set_led(efx, SFE4002_FAULT_LED, QUAKE_LED_OFF);
 }
 
-static void sfe4002_set_id_led(struct efx_nic *efx, enum efx_led_mode mode)
+static void sfe4002_set_id_led(struct ef4_nic *efx, enum ef4_led_mode mode)
 {
 	falcon_qt202x_set_led(
 		efx, SFE4002_FAULT_LED,
-		(mode == EFX_LED_ON) ? QUAKE_LED_ON : QUAKE_LED_OFF);
+		(mode == EF4_LED_ON) ? QUAKE_LED_ON : QUAKE_LED_OFF);
 }
 
-static int sfe4002_check_hw(struct efx_nic *efx)
+static int sfe4002_check_hw(struct ef4_nic *efx)
 {
 	struct falcon_board *board = falcon_board(efx);
 
@@ -565,12 +565,12 @@ static int sfe4002_check_hw(struct efx_nic *efx)
 		(board->major == 0 && board->minor == 0) ?
 		~LM87_ALARM_TEMP_EXT1 : ~0;
 
-	return efx_check_lm87(efx, alarm_mask);
+	return ef4_check_lm87(efx, alarm_mask);
 }
 
-static int sfe4002_init(struct efx_nic *efx)
+static int sfe4002_init(struct ef4_nic *efx)
 {
-	return efx_init_lm87(efx, &sfe4002_hwmon_info, sfe4002_lm87_regs);
+	return ef4_init_lm87(efx, &sfe4002_hwmon_info, sfe4002_lm87_regs);
 }
 
 /*****************************************************************************
@@ -599,7 +599,7 @@ static const struct i2c_board_info sfn4112f_hwmon_info = {
 #define SFN4112F_ACT_LED	0
 #define SFN4112F_LINK_LED	1
 
-static void sfn4112f_init_phy(struct efx_nic *efx)
+static void sfn4112f_init_phy(struct ef4_nic *efx)
 {
 	falcon_qt202x_set_led(efx, SFN4112F_ACT_LED,
 			      QUAKE_LED_RXLINK | QUAKE_LED_LINK_ACT);
@@ -607,15 +607,15 @@ static void sfn4112f_init_phy(struct efx_nic *efx)
 			      QUAKE_LED_RXLINK | QUAKE_LED_LINK_STAT);
 }
 
-static void sfn4112f_set_id_led(struct efx_nic *efx, enum efx_led_mode mode)
+static void sfn4112f_set_id_led(struct ef4_nic *efx, enum ef4_led_mode mode)
 {
 	int reg;
 
 	switch (mode) {
-	case EFX_LED_OFF:
+	case EF4_LED_OFF:
 		reg = QUAKE_LED_OFF;
 		break;
-	case EFX_LED_ON:
+	case EF4_LED_ON:
 		reg = QUAKE_LED_ON;
 		break;
 	default:
@@ -626,15 +626,15 @@ static void sfn4112f_set_id_led(struct efx_nic *efx, enum efx_led_mode mode)
 	falcon_qt202x_set_led(efx, SFN4112F_LINK_LED, reg);
 }
 
-static int sfn4112f_check_hw(struct efx_nic *efx)
+static int sfn4112f_check_hw(struct ef4_nic *efx)
 {
 	/* Mask out unused sensors */
-	return efx_check_lm87(efx, ~0x48);
+	return ef4_check_lm87(efx, ~0x48);
 }
 
-static int sfn4112f_init(struct efx_nic *efx)
+static int sfn4112f_init(struct ef4_nic *efx)
 {
-	return efx_init_lm87(efx, &sfn4112f_hwmon_info, sfn4112f_lm87_regs);
+	return ef4_init_lm87(efx, &sfn4112f_hwmon_info, sfn4112f_lm87_regs);
 }
 
 /*****************************************************************************
@@ -663,7 +663,7 @@ static const struct i2c_board_info sfe4003_hwmon_info = {
 #define SFE4003_LED_ON		1
 #define SFE4003_LED_OFF		0
 
-static void sfe4003_set_id_led(struct efx_nic *efx, enum efx_led_mode mode)
+static void sfe4003_set_id_led(struct ef4_nic *efx, enum ef4_led_mode mode)
 {
 	struct falcon_board *board = falcon_board(efx);
 
@@ -673,10 +673,10 @@ static void sfe4003_set_id_led(struct efx_nic *efx, enum efx_led_mode mode)
 
 	falcon_txc_set_gpio_val(
 		efx, SFE4003_RED_LED_GPIO,
-		(mode == EFX_LED_ON) ? SFE4003_LED_ON : SFE4003_LED_OFF);
+		(mode == EF4_LED_ON) ? SFE4003_LED_ON : SFE4003_LED_OFF);
 }
 
-static void sfe4003_init_phy(struct efx_nic *efx)
+static void sfe4003_init_phy(struct ef4_nic *efx)
 {
 	struct falcon_board *board = falcon_board(efx);
 
@@ -688,7 +688,7 @@ static void sfe4003_init_phy(struct efx_nic *efx)
 	falcon_txc_set_gpio_val(efx, SFE4003_RED_LED_GPIO, SFE4003_LED_OFF);
 }
 
-static int sfe4003_check_hw(struct efx_nic *efx)
+static int sfe4003_check_hw(struct ef4_nic *efx)
 {
 	struct falcon_board *board = falcon_board(efx);
 
@@ -698,19 +698,19 @@ static int sfe4003_check_hw(struct efx_nic *efx)
 		(board->major == 0 && board->minor <= 2) ?
 		~LM87_ALARM_TEMP_EXT1 : ~0;
 
-	return efx_check_lm87(efx, alarm_mask);
+	return ef4_check_lm87(efx, alarm_mask);
 }
 
-static int sfe4003_init(struct efx_nic *efx)
+static int sfe4003_init(struct ef4_nic *efx)
 {
-	return efx_init_lm87(efx, &sfe4003_hwmon_info, sfe4003_lm87_regs);
+	return ef4_init_lm87(efx, &sfe4003_hwmon_info, sfe4003_lm87_regs);
 }
 
 static const struct falcon_board_type board_types[] = {
 	{
 		.id		= FALCON_BOARD_SFE4001,
 		.init		= sfe4001_init,
-		.init_phy	= efx_port_dummy_op_void,
+		.init_phy	= ef4_port_dummy_op_void,
 		.fini		= sfe4001_fini,
 		.set_id_led	= tenxpress_set_id_led,
 		.monitor	= sfe4001_check_hw,
@@ -719,7 +719,7 @@ static const struct falcon_board_type board_types[] = {
 		.id		= FALCON_BOARD_SFE4002,
 		.init		= sfe4002_init,
 		.init_phy	= sfe4002_init_phy,
-		.fini		= efx_fini_lm87,
+		.fini		= ef4_fini_lm87,
 		.set_id_led	= sfe4002_set_id_led,
 		.monitor	= sfe4002_check_hw,
 	},
@@ -727,7 +727,7 @@ static const struct falcon_board_type board_types[] = {
 		.id		= FALCON_BOARD_SFE4003,
 		.init		= sfe4003_init,
 		.init_phy	= sfe4003_init_phy,
-		.fini		= efx_fini_lm87,
+		.fini		= ef4_fini_lm87,
 		.set_id_led	= sfe4003_set_id_led,
 		.monitor	= sfe4003_check_hw,
 	},
@@ -735,13 +735,13 @@ static const struct falcon_board_type board_types[] = {
 		.id		= FALCON_BOARD_SFN4112F,
 		.init		= sfn4112f_init,
 		.init_phy	= sfn4112f_init_phy,
-		.fini		= efx_fini_lm87,
+		.fini		= ef4_fini_lm87,
 		.set_id_led	= sfn4112f_set_id_led,
 		.monitor	= sfn4112f_check_hw,
 	},
 };
 
-int falcon_probe_board(struct efx_nic *efx, u16 revision_info)
+int falcon_probe_board(struct ef4_nic *efx, u16 revision_info)
 {
 	struct falcon_board *board = falcon_board(efx);
 	u8 type_id = FALCON_BOARD_TYPE(revision_info);
diff --git a/drivers/net/ethernet/sfc/falcon/farch.c b/drivers/net/ethernet/sfc/falcon/farch.c
new file mode 100644
index 000000000000..05916c710d8c
--- /dev/null
+++ b/drivers/net/ethernet/sfc/falcon/farch.c
@@ -0,0 +1,2892 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2013 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/crc32.h>
+#include "net_driver.h"
+#include "bitfield.h"
+#include "efx.h"
+#include "nic.h"
+#include "farch_regs.h"
+#include "io.h"
+#include "workarounds.h"
+
+/* Falcon-architecture (SFC4000) support */
+
+/**************************************************************************
+ *
+ * Configurable values
+ *
+ **************************************************************************
+ */
+
+/* This is set to 16 for a good reason.  In summary, if larger than
+ * 16, the descriptor cache holds more than a default socket
+ * buffer's worth of packets (for UDP we can only have at most one
+ * socket buffer's worth outstanding).  This combined with the fact
+ * that we only get 1 TX event per descriptor cache means the NIC
+ * goes idle.
+ */
+#define TX_DC_ENTRIES 16
+#define TX_DC_ENTRIES_ORDER 1
+
+#define RX_DC_ENTRIES 64
+#define RX_DC_ENTRIES_ORDER 3
+
+/* If EF4_MAX_INT_ERRORS internal errors occur within
+ * EF4_INT_ERROR_EXPIRE seconds, we consider the NIC broken and
+ * disable it.
+ */
+#define EF4_INT_ERROR_EXPIRE 3600
+#define EF4_MAX_INT_ERRORS 5
+
+/* Depth of RX flush request fifo */
+#define EF4_RX_FLUSH_COUNT 4
+
+/* Driver generated events */
+#define _EF4_CHANNEL_MAGIC_TEST		0x000101
+#define _EF4_CHANNEL_MAGIC_FILL		0x000102
+#define _EF4_CHANNEL_MAGIC_RX_DRAIN	0x000103
+#define _EF4_CHANNEL_MAGIC_TX_DRAIN	0x000104
+
+#define _EF4_CHANNEL_MAGIC(_code, _data)	((_code) << 8 | (_data))
+#define _EF4_CHANNEL_MAGIC_CODE(_magic)		((_magic) >> 8)
+
+#define EF4_CHANNEL_MAGIC_TEST(_channel)				\
+	_EF4_CHANNEL_MAGIC(_EF4_CHANNEL_MAGIC_TEST, (_channel)->channel)
+#define EF4_CHANNEL_MAGIC_FILL(_rx_queue)				\
+	_EF4_CHANNEL_MAGIC(_EF4_CHANNEL_MAGIC_FILL,			\
+			   ef4_rx_queue_index(_rx_queue))
+#define EF4_CHANNEL_MAGIC_RX_DRAIN(_rx_queue)				\
+	_EF4_CHANNEL_MAGIC(_EF4_CHANNEL_MAGIC_RX_DRAIN,			\
+			   ef4_rx_queue_index(_rx_queue))
+#define EF4_CHANNEL_MAGIC_TX_DRAIN(_tx_queue)				\
+	_EF4_CHANNEL_MAGIC(_EF4_CHANNEL_MAGIC_TX_DRAIN,			\
+			   (_tx_queue)->queue)
+
+static void ef4_farch_magic_event(struct ef4_channel *channel, u32 magic);
+
+/**************************************************************************
+ *
+ * Hardware access
+ *
+ **************************************************************************/
+
+static inline void ef4_write_buf_tbl(struct ef4_nic *efx, ef4_qword_t *value,
+				     unsigned int index)
+{
+	ef4_sram_writeq(efx, efx->membase + efx->type->buf_tbl_base,
+			value, index);
+}
+
+static bool ef4_masked_compare_oword(const ef4_oword_t *a, const ef4_oword_t *b,
+				     const ef4_oword_t *mask)
+{
+	return ((a->u64[0] ^ b->u64[0]) & mask->u64[0]) ||
+		((a->u64[1] ^ b->u64[1]) & mask->u64[1]);
+}
+
+int ef4_farch_test_registers(struct ef4_nic *efx,
+			     const struct ef4_farch_register_test *regs,
+			     size_t n_regs)
+{
+	unsigned address = 0;
+	int i, j;
+	ef4_oword_t mask, imask, original, reg, buf;
+
+	for (i = 0; i < n_regs; ++i) {
+		address = regs[i].address;
+		mask = imask = regs[i].mask;
+		EF4_INVERT_OWORD(imask);
+
+		ef4_reado(efx, &original, address);
+
+		/* bit sweep on and off */
+		for (j = 0; j < 128; j++) {
+			if (!EF4_EXTRACT_OWORD32(mask, j, j))
+				continue;
+
+			/* Test this testable bit can be set in isolation */
+			EF4_AND_OWORD(reg, original, mask);
+			EF4_SET_OWORD32(reg, j, j, 1);
+
+			ef4_writeo(efx, &reg, address);
+			ef4_reado(efx, &buf, address);
+
+			if (ef4_masked_compare_oword(&reg, &buf, &mask))
+				goto fail;
+
+			/* Test this testable bit can be cleared in isolation */
+			EF4_OR_OWORD(reg, original, mask);
+			EF4_SET_OWORD32(reg, j, j, 0);
+
+			ef4_writeo(efx, &reg, address);
+			ef4_reado(efx, &buf, address);
+
+			if (ef4_masked_compare_oword(&reg, &buf, &mask))
+				goto fail;
+		}
+
+		ef4_writeo(efx, &original, address);
+	}
+
+	return 0;
+
+fail:
+	netif_err(efx, hw, efx->net_dev,
+		  "wrote "EF4_OWORD_FMT" read "EF4_OWORD_FMT
+		  " at address 0x%x mask "EF4_OWORD_FMT"\n", EF4_OWORD_VAL(reg),
+		  EF4_OWORD_VAL(buf), address, EF4_OWORD_VAL(mask));
+	return -EIO;
+}
+
+/**************************************************************************
+ *
+ * Special buffer handling
+ * Special buffers are used for event queues and the TX and RX
+ * descriptor rings.
+ *
+ *************************************************************************/
+
+/*
+ * Initialise a special buffer
+ *
+ * This will define a buffer (previously allocated via
+ * ef4_alloc_special_buffer()) in the buffer table, allowing
+ * it to be used for event queues, descriptor rings etc.
+ */
+static void
+ef4_init_special_buffer(struct ef4_nic *efx, struct ef4_special_buffer *buffer)
+{
+	ef4_qword_t buf_desc;
+	unsigned int index;
+	dma_addr_t dma_addr;
+	int i;
+
+	EF4_BUG_ON_PARANOID(!buffer->buf.addr);
+
+	/* Write buffer descriptors to NIC */
+	for (i = 0; i < buffer->entries; i++) {
+		index = buffer->index + i;
+		dma_addr = buffer->buf.dma_addr + (i * EF4_BUF_SIZE);
+		netif_dbg(efx, probe, efx->net_dev,
+			  "mapping special buffer %d at %llx\n",
+			  index, (unsigned long long)dma_addr);
+		EF4_POPULATE_QWORD_3(buf_desc,
+				     FRF_AZ_BUF_ADR_REGION, 0,
+				     FRF_AZ_BUF_ADR_FBUF, dma_addr >> 12,
+				     FRF_AZ_BUF_OWNER_ID_FBUF, 0);
+		ef4_write_buf_tbl(efx, &buf_desc, index);
+	}
+}
+
+/* Unmaps a buffer and clears the buffer table entries */
+static void
+ef4_fini_special_buffer(struct ef4_nic *efx, struct ef4_special_buffer *buffer)
+{
+	ef4_oword_t buf_tbl_upd;
+	unsigned int start = buffer->index;
+	unsigned int end = (buffer->index + buffer->entries - 1);
+
+	if (!buffer->entries)
+		return;
+
+	netif_dbg(efx, hw, efx->net_dev, "unmapping special buffers %d-%d\n",
+		  buffer->index, buffer->index + buffer->entries - 1);
+
+	EF4_POPULATE_OWORD_4(buf_tbl_upd,
+			     FRF_AZ_BUF_UPD_CMD, 0,
+			     FRF_AZ_BUF_CLR_CMD, 1,
+			     FRF_AZ_BUF_CLR_END_ID, end,
+			     FRF_AZ_BUF_CLR_START_ID, start);
+	ef4_writeo(efx, &buf_tbl_upd, FR_AZ_BUF_TBL_UPD);
+}
+
+/*
+ * Allocate a new special buffer
+ *
+ * This allocates memory for a new buffer, clears it and allocates a
+ * new buffer ID range.  It does not write into the buffer table.
+ *
+ * This call will allocate 4KB buffers, since 8KB buffers can't be
+ * used for event queues and descriptor rings.
+ */
+static int ef4_alloc_special_buffer(struct ef4_nic *efx,
+				    struct ef4_special_buffer *buffer,
+				    unsigned int len)
+{
+	len = ALIGN(len, EF4_BUF_SIZE);
+
+	if (ef4_nic_alloc_buffer(efx, &buffer->buf, len, GFP_KERNEL))
+		return -ENOMEM;
+	buffer->entries = len / EF4_BUF_SIZE;
+	BUG_ON(buffer->buf.dma_addr & (EF4_BUF_SIZE - 1));
+
+	/* Select new buffer ID */
+	buffer->index = efx->next_buffer_table;
+	efx->next_buffer_table += buffer->entries;
+
+	netif_dbg(efx, probe, efx->net_dev,
+		  "allocating special buffers %d-%d at %llx+%x "
+		  "(virt %p phys %llx)\n", buffer->index,
+		  buffer->index + buffer->entries - 1,
+		  (u64)buffer->buf.dma_addr, len,
+		  buffer->buf.addr, (u64)virt_to_phys(buffer->buf.addr));
+
+	return 0;
+}
+
+static void
+ef4_free_special_buffer(struct ef4_nic *efx, struct ef4_special_buffer *buffer)
+{
+	if (!buffer->buf.addr)
+		return;
+
+	netif_dbg(efx, hw, efx->net_dev,
+		  "deallocating special buffers %d-%d at %llx+%x "
+		  "(virt %p phys %llx)\n", buffer->index,
+		  buffer->index + buffer->entries - 1,
+		  (u64)buffer->buf.dma_addr, buffer->buf.len,
+		  buffer->buf.addr, (u64)virt_to_phys(buffer->buf.addr));
+
+	ef4_nic_free_buffer(efx, &buffer->buf);
+	buffer->entries = 0;
+}
+
+/**************************************************************************
+ *
+ * TX path
+ *
+ **************************************************************************/
+
+/* This writes to the TX_DESC_WPTR; write pointer for TX descriptor ring */
+static inline void ef4_farch_notify_tx_desc(struct ef4_tx_queue *tx_queue)
+{
+	unsigned write_ptr;
+	ef4_dword_t reg;
+
+	write_ptr = tx_queue->write_count & tx_queue->ptr_mask;
+	EF4_POPULATE_DWORD_1(reg, FRF_AZ_TX_DESC_WPTR_DWORD, write_ptr);
+	ef4_writed_page(tx_queue->efx, &reg,
+			FR_AZ_TX_DESC_UPD_DWORD_P0, tx_queue->queue);
+}
+
+/* Write pointer and first descriptor for TX descriptor ring */
+static inline void ef4_farch_push_tx_desc(struct ef4_tx_queue *tx_queue,
+					  const ef4_qword_t *txd)
+{
+	unsigned write_ptr;
+	ef4_oword_t reg;
+
+	BUILD_BUG_ON(FRF_AZ_TX_DESC_LBN != 0);
+	BUILD_BUG_ON(FR_AA_TX_DESC_UPD_KER != FR_BZ_TX_DESC_UPD_P0);
+
+	write_ptr = tx_queue->write_count & tx_queue->ptr_mask;
+	EF4_POPULATE_OWORD_2(reg, FRF_AZ_TX_DESC_PUSH_CMD, true,
+			     FRF_AZ_TX_DESC_WPTR, write_ptr);
+	reg.qword[0] = *txd;
+	ef4_writeo_page(tx_queue->efx, &reg,
+			FR_BZ_TX_DESC_UPD_P0, tx_queue->queue);
+}
+
+
+/* For each entry inserted into the software descriptor ring, create a
+ * descriptor in the hardware TX descriptor ring (in host memory), and
+ * write a doorbell.
+ */
+void ef4_farch_tx_write(struct ef4_tx_queue *tx_queue)
+{
+	struct ef4_tx_buffer *buffer;
+	ef4_qword_t *txd;
+	unsigned write_ptr;
+	unsigned old_write_count = tx_queue->write_count;
+
+	tx_queue->xmit_more_available = false;
+	if (unlikely(tx_queue->write_count == tx_queue->insert_count))
+		return;
+
+	do {
+		write_ptr = tx_queue->write_count & tx_queue->ptr_mask;
+		buffer = &tx_queue->buffer[write_ptr];
+		txd = ef4_tx_desc(tx_queue, write_ptr);
+		++tx_queue->write_count;
+
+		EF4_BUG_ON_PARANOID(buffer->flags & EF4_TX_BUF_OPTION);
+
+		/* Create TX descriptor ring entry */
+		BUILD_BUG_ON(EF4_TX_BUF_CONT != 1);
+		EF4_POPULATE_QWORD_4(*txd,
+				     FSF_AZ_TX_KER_CONT,
+				     buffer->flags & EF4_TX_BUF_CONT,
+				     FSF_AZ_TX_KER_BYTE_COUNT, buffer->len,
+				     FSF_AZ_TX_KER_BUF_REGION, 0,
+				     FSF_AZ_TX_KER_BUF_ADDR, buffer->dma_addr);
+	} while (tx_queue->write_count != tx_queue->insert_count);
+
+	wmb(); /* Ensure descriptors are written before they are fetched */
+
+	if (ef4_nic_may_push_tx_desc(tx_queue, old_write_count)) {
+		txd = ef4_tx_desc(tx_queue,
+				  old_write_count & tx_queue->ptr_mask);
+		ef4_farch_push_tx_desc(tx_queue, txd);
+		++tx_queue->pushes;
+	} else {
+		ef4_farch_notify_tx_desc(tx_queue);
+	}
+}
+
+unsigned int ef4_farch_tx_limit_len(struct ef4_tx_queue *tx_queue,
+				    dma_addr_t dma_addr, unsigned int len)
+{
+	/* Don't cross 4K boundaries with descriptors. */
+	unsigned int limit = (~dma_addr & (EF4_PAGE_SIZE - 1)) + 1;
+
+	len = min(limit, len);
+
+	if (EF4_WORKAROUND_5391(tx_queue->efx) && (dma_addr & 0xf))
+		len = min_t(unsigned int, len, 512 - (dma_addr & 0xf));
+
+	return len;
+}
+
+
+/* Allocate hardware resources for a TX queue */
+int ef4_farch_tx_probe(struct ef4_tx_queue *tx_queue)
+{
+	struct ef4_nic *efx = tx_queue->efx;
+	unsigned entries;
+
+	entries = tx_queue->ptr_mask + 1;
+	return ef4_alloc_special_buffer(efx, &tx_queue->txd,
+					entries * sizeof(ef4_qword_t));
+}
+
+void ef4_farch_tx_init(struct ef4_tx_queue *tx_queue)
+{
+	struct ef4_nic *efx = tx_queue->efx;
+	ef4_oword_t reg;
+
+	/* Pin TX descriptor ring */
+	ef4_init_special_buffer(efx, &tx_queue->txd);
+
+	/* Push TX descriptor ring to card */
+	EF4_POPULATE_OWORD_10(reg,
+			      FRF_AZ_TX_DESCQ_EN, 1,
+			      FRF_AZ_TX_ISCSI_DDIG_EN, 0,
+			      FRF_AZ_TX_ISCSI_HDIG_EN, 0,
+			      FRF_AZ_TX_DESCQ_BUF_BASE_ID, tx_queue->txd.index,
+			      FRF_AZ_TX_DESCQ_EVQ_ID,
+			      tx_queue->channel->channel,
+			      FRF_AZ_TX_DESCQ_OWNER_ID, 0,
+			      FRF_AZ_TX_DESCQ_LABEL, tx_queue->queue,
+			      FRF_AZ_TX_DESCQ_SIZE,
+			      __ffs(tx_queue->txd.entries),
+			      FRF_AZ_TX_DESCQ_TYPE, 0,
+			      FRF_BZ_TX_NON_IP_DROP_DIS, 1);
+
+	if (ef4_nic_rev(efx) >= EF4_REV_FALCON_B0) {
+		int csum = tx_queue->queue & EF4_TXQ_TYPE_OFFLOAD;
+		EF4_SET_OWORD_FIELD(reg, FRF_BZ_TX_IP_CHKSM_DIS, !csum);
+		EF4_SET_OWORD_FIELD(reg, FRF_BZ_TX_TCP_CHKSM_DIS,
+				    !csum);
+	}
+
+	ef4_writeo_table(efx, &reg, efx->type->txd_ptr_tbl_base,
+			 tx_queue->queue);
+
+	if (ef4_nic_rev(efx) < EF4_REV_FALCON_B0) {
+		/* Only 128 bits in this register */
+		BUILD_BUG_ON(EF4_MAX_TX_QUEUES > 128);
+
+		ef4_reado(efx, &reg, FR_AA_TX_CHKSM_CFG);
+		if (tx_queue->queue & EF4_TXQ_TYPE_OFFLOAD)
+			__clear_bit_le(tx_queue->queue, &reg);
+		else
+			__set_bit_le(tx_queue->queue, &reg);
+		ef4_writeo(efx, &reg, FR_AA_TX_CHKSM_CFG);
+	}
+
+	if (ef4_nic_rev(efx) >= EF4_REV_FALCON_B0) {
+		EF4_POPULATE_OWORD_1(reg,
+				     FRF_BZ_TX_PACE,
+				     (tx_queue->queue & EF4_TXQ_TYPE_HIGHPRI) ?
+				     FFE_BZ_TX_PACE_OFF :
+				     FFE_BZ_TX_PACE_RESERVED);
+		ef4_writeo_table(efx, &reg, FR_BZ_TX_PACE_TBL,
+				 tx_queue->queue);
+	}
+}
+
+static void ef4_farch_flush_tx_queue(struct ef4_tx_queue *tx_queue)
+{
+	struct ef4_nic *efx = tx_queue->efx;
+	ef4_oword_t tx_flush_descq;
+
+	WARN_ON(atomic_read(&tx_queue->flush_outstanding));
+	atomic_set(&tx_queue->flush_outstanding, 1);
+
+	EF4_POPULATE_OWORD_2(tx_flush_descq,
+			     FRF_AZ_TX_FLUSH_DESCQ_CMD, 1,
+			     FRF_AZ_TX_FLUSH_DESCQ, tx_queue->queue);
+	ef4_writeo(efx, &tx_flush_descq, FR_AZ_TX_FLUSH_DESCQ);
+}
+
+void ef4_farch_tx_fini(struct ef4_tx_queue *tx_queue)
+{
+	struct ef4_nic *efx = tx_queue->efx;
+	ef4_oword_t tx_desc_ptr;
+
+	/* Remove TX descriptor ring from card */
+	EF4_ZERO_OWORD(tx_desc_ptr);
+	ef4_writeo_table(efx, &tx_desc_ptr, efx->type->txd_ptr_tbl_base,
+			 tx_queue->queue);
+
+	/* Unpin TX descriptor ring */
+	ef4_fini_special_buffer(efx, &tx_queue->txd);
+}
+
+/* Free buffers backing TX queue */
+void ef4_farch_tx_remove(struct ef4_tx_queue *tx_queue)
+{
+	ef4_free_special_buffer(tx_queue->efx, &tx_queue->txd);
+}
+
+/**************************************************************************
+ *
+ * RX path
+ *
+ **************************************************************************/
+
+/* This creates an entry in the RX descriptor queue */
+static inline void
+ef4_farch_build_rx_desc(struct ef4_rx_queue *rx_queue, unsigned index)
+{
+	struct ef4_rx_buffer *rx_buf;
+	ef4_qword_t *rxd;
+
+	rxd = ef4_rx_desc(rx_queue, index);
+	rx_buf = ef4_rx_buffer(rx_queue, index);
+	EF4_POPULATE_QWORD_3(*rxd,
+			     FSF_AZ_RX_KER_BUF_SIZE,
+			     rx_buf->len -
+			     rx_queue->efx->type->rx_buffer_padding,
+			     FSF_AZ_RX_KER_BUF_REGION, 0,
+			     FSF_AZ_RX_KER_BUF_ADDR, rx_buf->dma_addr);
+}
+
+/* This writes to the RX_DESC_WPTR register for the specified receive
+ * descriptor ring.
+ */
+void ef4_farch_rx_write(struct ef4_rx_queue *rx_queue)
+{
+	struct ef4_nic *efx = rx_queue->efx;
+	ef4_dword_t reg;
+	unsigned write_ptr;
+
+	while (rx_queue->notified_count != rx_queue->added_count) {
+		ef4_farch_build_rx_desc(
+			rx_queue,
+			rx_queue->notified_count & rx_queue->ptr_mask);
+		++rx_queue->notified_count;
+	}
+
+	wmb();
+	write_ptr = rx_queue->added_count & rx_queue->ptr_mask;
+	EF4_POPULATE_DWORD_1(reg, FRF_AZ_RX_DESC_WPTR_DWORD, write_ptr);
+	ef4_writed_page(efx, &reg, FR_AZ_RX_DESC_UPD_DWORD_P0,
+			ef4_rx_queue_index(rx_queue));
+}
+
+int ef4_farch_rx_probe(struct ef4_rx_queue *rx_queue)
+{
+	struct ef4_nic *efx = rx_queue->efx;
+	unsigned entries;
+
+	entries = rx_queue->ptr_mask + 1;
+	return ef4_alloc_special_buffer(efx, &rx_queue->rxd,
+					entries * sizeof(ef4_qword_t));
+}
+
+void ef4_farch_rx_init(struct ef4_rx_queue *rx_queue)
+{
+	ef4_oword_t rx_desc_ptr;
+	struct ef4_nic *efx = rx_queue->efx;
+	bool is_b0 = ef4_nic_rev(efx) >= EF4_REV_FALCON_B0;
+	bool iscsi_digest_en = is_b0;
+	bool jumbo_en;
+
+	/* For kernel-mode queues in Falcon A1, the JUMBO flag enables
+	 * DMA to continue after a PCIe page boundary (and scattering
+	 * is not possible).  In Falcon B0 and Siena, it enables
+	 * scatter.
+	 */
+	jumbo_en = !is_b0 || efx->rx_scatter;
+
+	netif_dbg(efx, hw, efx->net_dev,
+		  "RX queue %d ring in special buffers %d-%d\n",
+		  ef4_rx_queue_index(rx_queue), rx_queue->rxd.index,
+		  rx_queue->rxd.index + rx_queue->rxd.entries - 1);
+
+	rx_queue->scatter_n = 0;
+
+	/* Pin RX descriptor ring */
+	ef4_init_special_buffer(efx, &rx_queue->rxd);
+
+	/* Push RX descriptor ring to card */
+	EF4_POPULATE_OWORD_10(rx_desc_ptr,
+			      FRF_AZ_RX_ISCSI_DDIG_EN, iscsi_digest_en,
+			      FRF_AZ_RX_ISCSI_HDIG_EN, iscsi_digest_en,
+			      FRF_AZ_RX_DESCQ_BUF_BASE_ID, rx_queue->rxd.index,
+			      FRF_AZ_RX_DESCQ_EVQ_ID,
+			      ef4_rx_queue_channel(rx_queue)->channel,
+			      FRF_AZ_RX_DESCQ_OWNER_ID, 0,
+			      FRF_AZ_RX_DESCQ_LABEL,
+			      ef4_rx_queue_index(rx_queue),
+			      FRF_AZ_RX_DESCQ_SIZE,
+			      __ffs(rx_queue->rxd.entries),
+			      FRF_AZ_RX_DESCQ_TYPE, 0 /* kernel queue */ ,
+			      FRF_AZ_RX_DESCQ_JUMBO, jumbo_en,
+			      FRF_AZ_RX_DESCQ_EN, 1);
+	ef4_writeo_table(efx, &rx_desc_ptr, efx->type->rxd_ptr_tbl_base,
+			 ef4_rx_queue_index(rx_queue));
+}
+
+static void ef4_farch_flush_rx_queue(struct ef4_rx_queue *rx_queue)
+{
+	struct ef4_nic *efx = rx_queue->efx;
+	ef4_oword_t rx_flush_descq;
+
+	EF4_POPULATE_OWORD_2(rx_flush_descq,
+			     FRF_AZ_RX_FLUSH_DESCQ_CMD, 1,
+			     FRF_AZ_RX_FLUSH_DESCQ,
+			     ef4_rx_queue_index(rx_queue));
+	ef4_writeo(efx, &rx_flush_descq, FR_AZ_RX_FLUSH_DESCQ);
+}
+
+void ef4_farch_rx_fini(struct ef4_rx_queue *rx_queue)
+{
+	ef4_oword_t rx_desc_ptr;
+	struct ef4_nic *efx = rx_queue->efx;
+
+	/* Remove RX descriptor ring from card */
+	EF4_ZERO_OWORD(rx_desc_ptr);
+	ef4_writeo_table(efx, &rx_desc_ptr, efx->type->rxd_ptr_tbl_base,
+			 ef4_rx_queue_index(rx_queue));
+
+	/* Unpin RX descriptor ring */
+	ef4_fini_special_buffer(efx, &rx_queue->rxd);
+}
+
+/* Free buffers backing RX queue */
+void ef4_farch_rx_remove(struct ef4_rx_queue *rx_queue)
+{
+	ef4_free_special_buffer(rx_queue->efx, &rx_queue->rxd);
+}
+
+/**************************************************************************
+ *
+ * Flush handling
+ *
+ **************************************************************************/
+
+/* ef4_farch_flush_queues() must be woken up when all flushes are completed,
+ * or more RX flushes can be kicked off.
+ */
+static bool ef4_farch_flush_wake(struct ef4_nic *efx)
+{
+	/* Ensure that all updates are visible to ef4_farch_flush_queues() */
+	smp_mb();
+
+	return (atomic_read(&efx->active_queues) == 0 ||
+		(atomic_read(&efx->rxq_flush_outstanding) < EF4_RX_FLUSH_COUNT
+		 && atomic_read(&efx->rxq_flush_pending) > 0));
+}
+
+static bool ef4_check_tx_flush_complete(struct ef4_nic *efx)
+{
+	bool i = true;
+	ef4_oword_t txd_ptr_tbl;
+	struct ef4_channel *channel;
+	struct ef4_tx_queue *tx_queue;
+
+	ef4_for_each_channel(channel, efx) {
+		ef4_for_each_channel_tx_queue(tx_queue, channel) {
+			ef4_reado_table(efx, &txd_ptr_tbl,
+					FR_BZ_TX_DESC_PTR_TBL, tx_queue->queue);
+			if (EF4_OWORD_FIELD(txd_ptr_tbl,
+					    FRF_AZ_TX_DESCQ_FLUSH) ||
+			    EF4_OWORD_FIELD(txd_ptr_tbl,
+					    FRF_AZ_TX_DESCQ_EN)) {
+				netif_dbg(efx, hw, efx->net_dev,
+					  "flush did not complete on TXQ %d\n",
+					  tx_queue->queue);
+				i = false;
+			} else if (atomic_cmpxchg(&tx_queue->flush_outstanding,
+						  1, 0)) {
+				/* The flush is complete, but we didn't
+				 * receive a flush completion event
+				 */
+				netif_dbg(efx, hw, efx->net_dev,
+					  "flush complete on TXQ %d, so drain "
+					  "the queue\n", tx_queue->queue);
+				/* Don't need to increment active_queues as it
+				 * has already been incremented for the queues
+				 * which did not drain
+				 */
+				ef4_farch_magic_event(channel,
+						      EF4_CHANNEL_MAGIC_TX_DRAIN(
+							      tx_queue));
+			}
+		}
+	}
+
+	return i;
+}
+
+/* Flush all the transmit queues, and continue flushing receive queues until
+ * they're all flushed. Wait for the DRAIN events to be received so that there
+ * are no more RX and TX events left on any channel. */
+static int ef4_farch_do_flush(struct ef4_nic *efx)
+{
+	unsigned timeout = msecs_to_jiffies(5000); /* 5s for all flushes and drains */
+	struct ef4_channel *channel;
+	struct ef4_rx_queue *rx_queue;
+	struct ef4_tx_queue *tx_queue;
+	int rc = 0;
+
+	ef4_for_each_channel(channel, efx) {
+		ef4_for_each_channel_tx_queue(tx_queue, channel) {
+			ef4_farch_flush_tx_queue(tx_queue);
+		}
+		ef4_for_each_channel_rx_queue(rx_queue, channel) {
+			rx_queue->flush_pending = true;
+			atomic_inc(&efx->rxq_flush_pending);
+		}
+	}
+
+	while (timeout && atomic_read(&efx->active_queues) > 0) {
+		/* The hardware supports four concurrent rx flushes, each of
+		 * which may need to be retried if there is an outstanding
+		 * descriptor fetch
+		 */
+		ef4_for_each_channel(channel, efx) {
+			ef4_for_each_channel_rx_queue(rx_queue, channel) {
+				if (atomic_read(&efx->rxq_flush_outstanding) >=
+				    EF4_RX_FLUSH_COUNT)
+					break;
+
+				if (rx_queue->flush_pending) {
+					rx_queue->flush_pending = false;
+					atomic_dec(&efx->rxq_flush_pending);
+					atomic_inc(&efx->rxq_flush_outstanding);
+					ef4_farch_flush_rx_queue(rx_queue);
+				}
+			}
+		}
+
+		timeout = wait_event_timeout(efx->flush_wq,
+					     ef4_farch_flush_wake(efx),
+					     timeout);
+	}
+
+	if (atomic_read(&efx->active_queues) &&
+	    !ef4_check_tx_flush_complete(efx)) {
+		netif_err(efx, hw, efx->net_dev, "failed to flush %d queues "
+			  "(rx %d+%d)\n", atomic_read(&efx->active_queues),
+			  atomic_read(&efx->rxq_flush_outstanding),
+			  atomic_read(&efx->rxq_flush_pending));
+		rc = -ETIMEDOUT;
+
+		atomic_set(&efx->active_queues, 0);
+		atomic_set(&efx->rxq_flush_pending, 0);
+		atomic_set(&efx->rxq_flush_outstanding, 0);
+	}
+
+	return rc;
+}
+
+int ef4_farch_fini_dmaq(struct ef4_nic *efx)
+{
+	struct ef4_channel *channel;
+	struct ef4_tx_queue *tx_queue;
+	struct ef4_rx_queue *rx_queue;
+	int rc = 0;
+
+	/* Do not attempt to write to the NIC during EEH recovery */
+	if (efx->state != STATE_RECOVERY) {
+		/* Only perform flush if DMA is enabled */
+		if (efx->pci_dev->is_busmaster) {
+			efx->type->prepare_flush(efx);
+			rc = ef4_farch_do_flush(efx);
+			efx->type->finish_flush(efx);
+		}
+
+		ef4_for_each_channel(channel, efx) {
+			ef4_for_each_channel_rx_queue(rx_queue, channel)
+				ef4_farch_rx_fini(rx_queue);
+			ef4_for_each_channel_tx_queue(tx_queue, channel)
+				ef4_farch_tx_fini(tx_queue);
+		}
+	}
+
+	return rc;
+}
+
+/* Reset queue and flush accounting after FLR
+ *
+ * One possible cause of FLR recovery is that DMA may be failing (eg. if bus
+ * mastering was disabled), in which case we don't receive (RXQ) flush
+ * completion events.  This means that efx->rxq_flush_outstanding remained at 4
+ * after the FLR; also, efx->active_queues was non-zero (as no flush completion
+ * events were received, and we didn't go through ef4_check_tx_flush_complete())
+ * If we don't fix this up, on the next call to ef4_realloc_channels() we won't
+ * flush any RX queues because efx->rxq_flush_outstanding is at the limit of 4
+ * for batched flush requests; and the efx->active_queues gets messed up because
+ * we keep incrementing for the newly initialised queues, but it never went to
+ * zero previously.  Then we get a timeout every time we try to restart the
+ * queues, as it doesn't go back to zero when we should be flushing the queues.
+ */
+void ef4_farch_finish_flr(struct ef4_nic *efx)
+{
+	atomic_set(&efx->rxq_flush_pending, 0);
+	atomic_set(&efx->rxq_flush_outstanding, 0);
+	atomic_set(&efx->active_queues, 0);
+}
+
+
+/**************************************************************************
+ *
+ * Event queue processing
+ * Event queues are processed by per-channel tasklets.
+ *
+ **************************************************************************/
+
+/* Update a channel's event queue's read pointer (RPTR) register
+ *
+ * This writes the EVQ_RPTR_REG register for the specified channel's
+ * event queue.
+ */
+void ef4_farch_ev_read_ack(struct ef4_channel *channel)
+{
+	ef4_dword_t reg;
+	struct ef4_nic *efx = channel->efx;
+
+	EF4_POPULATE_DWORD_1(reg, FRF_AZ_EVQ_RPTR,
+			     channel->eventq_read_ptr & channel->eventq_mask);
+
+	/* For Falcon A1, EVQ_RPTR_KER is documented as having a step size
+	 * of 4 bytes, but it is really 16 bytes just like later revisions.
+	 */
+	ef4_writed(efx, &reg,
+		   efx->type->evq_rptr_tbl_base +
+		   FR_BZ_EVQ_RPTR_STEP * channel->channel);
+}
+
+/* Use HW to insert a SW defined event */
+void ef4_farch_generate_event(struct ef4_nic *efx, unsigned int evq,
+			      ef4_qword_t *event)
+{
+	ef4_oword_t drv_ev_reg;
+
+	BUILD_BUG_ON(FRF_AZ_DRV_EV_DATA_LBN != 0 ||
+		     FRF_AZ_DRV_EV_DATA_WIDTH != 64);
+	drv_ev_reg.u32[0] = event->u32[0];
+	drv_ev_reg.u32[1] = event->u32[1];
+	drv_ev_reg.u32[2] = 0;
+	drv_ev_reg.u32[3] = 0;
+	EF4_SET_OWORD_FIELD(drv_ev_reg, FRF_AZ_DRV_EV_QID, evq);
+	ef4_writeo(efx, &drv_ev_reg, FR_AZ_DRV_EV);
+}
+
+static void ef4_farch_magic_event(struct ef4_channel *channel, u32 magic)
+{
+	ef4_qword_t event;
+
+	EF4_POPULATE_QWORD_2(event, FSF_AZ_EV_CODE,
+			     FSE_AZ_EV_CODE_DRV_GEN_EV,
+			     FSF_AZ_DRV_GEN_EV_MAGIC, magic);
+	ef4_farch_generate_event(channel->efx, channel->channel, &event);
+}
+
+/* Handle a transmit completion event
+ *
+ * The NIC batches TX completion events; the message we receive is of
+ * the form "complete all TX events up to this index".
+ */
+static int
+ef4_farch_handle_tx_event(struct ef4_channel *channel, ef4_qword_t *event)
+{
+	unsigned int tx_ev_desc_ptr;
+	unsigned int tx_ev_q_label;
+	struct ef4_tx_queue *tx_queue;
+	struct ef4_nic *efx = channel->efx;
+	int tx_packets = 0;
+
+	if (unlikely(ACCESS_ONCE(efx->reset_pending)))
+		return 0;
+
+	if (likely(EF4_QWORD_FIELD(*event, FSF_AZ_TX_EV_COMP))) {
+		/* Transmit completion */
+		tx_ev_desc_ptr = EF4_QWORD_FIELD(*event, FSF_AZ_TX_EV_DESC_PTR);
+		tx_ev_q_label = EF4_QWORD_FIELD(*event, FSF_AZ_TX_EV_Q_LABEL);
+		tx_queue = ef4_channel_get_tx_queue(
+			channel, tx_ev_q_label % EF4_TXQ_TYPES);
+		tx_packets = ((tx_ev_desc_ptr - tx_queue->read_count) &
+			      tx_queue->ptr_mask);
+		ef4_xmit_done(tx_queue, tx_ev_desc_ptr);
+	} else if (EF4_QWORD_FIELD(*event, FSF_AZ_TX_EV_WQ_FF_FULL)) {
+		/* Rewrite the FIFO write pointer */
+		tx_ev_q_label = EF4_QWORD_FIELD(*event, FSF_AZ_TX_EV_Q_LABEL);
+		tx_queue = ef4_channel_get_tx_queue(
+			channel, tx_ev_q_label % EF4_TXQ_TYPES);
+
+		netif_tx_lock(efx->net_dev);
+		ef4_farch_notify_tx_desc(tx_queue);
+		netif_tx_unlock(efx->net_dev);
+	} else if (EF4_QWORD_FIELD(*event, FSF_AZ_TX_EV_PKT_ERR)) {
+		ef4_schedule_reset(efx, RESET_TYPE_DMA_ERROR);
+	} else {
+		netif_err(efx, tx_err, efx->net_dev,
+			  "channel %d unexpected TX event "
+			  EF4_QWORD_FMT"\n", channel->channel,
+			  EF4_QWORD_VAL(*event));
+	}
+
+	return tx_packets;
+}
+
+/* Detect errors included in the rx_evt_pkt_ok bit. */
+static u16 ef4_farch_handle_rx_not_ok(struct ef4_rx_queue *rx_queue,
+				      const ef4_qword_t *event)
+{
+	struct ef4_channel *channel = ef4_rx_queue_channel(rx_queue);
+	struct ef4_nic *efx = rx_queue->efx;
+	bool rx_ev_buf_owner_id_err, rx_ev_ip_hdr_chksum_err;
+	bool rx_ev_tcp_udp_chksum_err, rx_ev_eth_crc_err;
+	bool rx_ev_frm_trunc, rx_ev_drib_nib, rx_ev_tobe_disc;
+	bool rx_ev_other_err, rx_ev_pause_frm;
+	bool rx_ev_hdr_type, rx_ev_mcast_pkt;
+	unsigned rx_ev_pkt_type;
+
+	rx_ev_hdr_type = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_HDR_TYPE);
+	rx_ev_mcast_pkt = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_MCAST_PKT);
+	rx_ev_tobe_disc = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_TOBE_DISC);
+	rx_ev_pkt_type = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_PKT_TYPE);
+	rx_ev_buf_owner_id_err = EF4_QWORD_FIELD(*event,
+						 FSF_AZ_RX_EV_BUF_OWNER_ID_ERR);
+	rx_ev_ip_hdr_chksum_err = EF4_QWORD_FIELD(*event,
+						  FSF_AZ_RX_EV_IP_HDR_CHKSUM_ERR);
+	rx_ev_tcp_udp_chksum_err = EF4_QWORD_FIELD(*event,
+						   FSF_AZ_RX_EV_TCP_UDP_CHKSUM_ERR);
+	rx_ev_eth_crc_err = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_ETH_CRC_ERR);
+	rx_ev_frm_trunc = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_FRM_TRUNC);
+	rx_ev_drib_nib = ((ef4_nic_rev(efx) >= EF4_REV_FALCON_B0) ?
+			  0 : EF4_QWORD_FIELD(*event, FSF_AA_RX_EV_DRIB_NIB));
+	rx_ev_pause_frm = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_PAUSE_FRM_ERR);
+
+	/* Every error apart from tobe_disc and pause_frm */
+	rx_ev_other_err = (rx_ev_drib_nib | rx_ev_tcp_udp_chksum_err |
+			   rx_ev_buf_owner_id_err | rx_ev_eth_crc_err |
+			   rx_ev_frm_trunc | rx_ev_ip_hdr_chksum_err);
+
+	/* Count errors that are not in MAC stats.  Ignore expected
+	 * checksum errors during self-test. */
+	if (rx_ev_frm_trunc)
+		++channel->n_rx_frm_trunc;
+	else if (rx_ev_tobe_disc)
+		++channel->n_rx_tobe_disc;
+	else if (!efx->loopback_selftest) {
+		if (rx_ev_ip_hdr_chksum_err)
+			++channel->n_rx_ip_hdr_chksum_err;
+		else if (rx_ev_tcp_udp_chksum_err)
+			++channel->n_rx_tcp_udp_chksum_err;
+	}
+
+	/* TOBE_DISC is expected on unicast mismatches; don't print out an
+	 * error message.  FRM_TRUNC indicates RXDP dropped the packet due
+	 * to a FIFO overflow.
+	 */
+#ifdef DEBUG
+	if (rx_ev_other_err && net_ratelimit()) {
+		netif_dbg(efx, rx_err, efx->net_dev,
+			  " RX queue %d unexpected RX event "
+			  EF4_QWORD_FMT "%s%s%s%s%s%s%s%s\n",
+			  ef4_rx_queue_index(rx_queue), EF4_QWORD_VAL(*event),
+			  rx_ev_buf_owner_id_err ? " [OWNER_ID_ERR]" : "",
+			  rx_ev_ip_hdr_chksum_err ?
+			  " [IP_HDR_CHKSUM_ERR]" : "",
+			  rx_ev_tcp_udp_chksum_err ?
+			  " [TCP_UDP_CHKSUM_ERR]" : "",
+			  rx_ev_eth_crc_err ? " [ETH_CRC_ERR]" : "",
+			  rx_ev_frm_trunc ? " [FRM_TRUNC]" : "",
+			  rx_ev_drib_nib ? " [DRIB_NIB]" : "",
+			  rx_ev_tobe_disc ? " [TOBE_DISC]" : "",
+			  rx_ev_pause_frm ? " [PAUSE]" : "");
+	}
+#endif
+
+	/* The frame must be discarded if any of these are true. */
+	return (rx_ev_eth_crc_err | rx_ev_frm_trunc | rx_ev_drib_nib |
+		rx_ev_tobe_disc | rx_ev_pause_frm) ?
+		EF4_RX_PKT_DISCARD : 0;
+}
+
+/* Handle receive events that are not in-order. Return true if this
+ * can be handled as a partial packet discard, false if it's more
+ * serious.
+ */
+static bool
+ef4_farch_handle_rx_bad_index(struct ef4_rx_queue *rx_queue, unsigned index)
+{
+	struct ef4_channel *channel = ef4_rx_queue_channel(rx_queue);
+	struct ef4_nic *efx = rx_queue->efx;
+	unsigned expected, dropped;
+
+	if (rx_queue->scatter_n &&
+	    index == ((rx_queue->removed_count + rx_queue->scatter_n - 1) &
+		      rx_queue->ptr_mask)) {
+		++channel->n_rx_nodesc_trunc;
+		return true;
+	}
+
+	expected = rx_queue->removed_count & rx_queue->ptr_mask;
+	dropped = (index - expected) & rx_queue->ptr_mask;
+	netif_info(efx, rx_err, efx->net_dev,
+		   "dropped %d events (index=%d expected=%d)\n",
+		   dropped, index, expected);
+
+	ef4_schedule_reset(efx, EF4_WORKAROUND_5676(efx) ?
+			   RESET_TYPE_RX_RECOVERY : RESET_TYPE_DISABLE);
+	return false;
+}
+
+/* Handle a packet received event
+ *
+ * The NIC gives a "discard" flag if it's a unicast packet with the
+ * wrong destination address
+ * Also "is multicast" and "matches multicast filter" flags can be used to
+ * discard non-matching multicast packets.
+ */
+static void
+ef4_farch_handle_rx_event(struct ef4_channel *channel, const ef4_qword_t *event)
+{
+	unsigned int rx_ev_desc_ptr, rx_ev_byte_cnt;
+	unsigned int rx_ev_hdr_type, rx_ev_mcast_pkt;
+	unsigned expected_ptr;
+	bool rx_ev_pkt_ok, rx_ev_sop, rx_ev_cont;
+	u16 flags;
+	struct ef4_rx_queue *rx_queue;
+	struct ef4_nic *efx = channel->efx;
+
+	if (unlikely(ACCESS_ONCE(efx->reset_pending)))
+		return;
+
+	rx_ev_cont = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_JUMBO_CONT);
+	rx_ev_sop = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_SOP);
+	WARN_ON(EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_Q_LABEL) !=
+		channel->channel);
+
+	rx_queue = ef4_channel_get_rx_queue(channel);
+
+	rx_ev_desc_ptr = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_DESC_PTR);
+	expected_ptr = ((rx_queue->removed_count + rx_queue->scatter_n) &
+			rx_queue->ptr_mask);
+
+	/* Check for partial drops and other errors */
+	if (unlikely(rx_ev_desc_ptr != expected_ptr) ||
+	    unlikely(rx_ev_sop != (rx_queue->scatter_n == 0))) {
+		if (rx_ev_desc_ptr != expected_ptr &&
+		    !ef4_farch_handle_rx_bad_index(rx_queue, rx_ev_desc_ptr))
+			return;
+
+		/* Discard all pending fragments */
+		if (rx_queue->scatter_n) {
+			ef4_rx_packet(
+				rx_queue,
+				rx_queue->removed_count & rx_queue->ptr_mask,
+				rx_queue->scatter_n, 0, EF4_RX_PKT_DISCARD);
+			rx_queue->removed_count += rx_queue->scatter_n;
+			rx_queue->scatter_n = 0;
+		}
+
+		/* Return if there is no new fragment */
+		if (rx_ev_desc_ptr != expected_ptr)
+			return;
+
+		/* Discard new fragment if not SOP */
+		if (!rx_ev_sop) {
+			ef4_rx_packet(
+				rx_queue,
+				rx_queue->removed_count & rx_queue->ptr_mask,
+				1, 0, EF4_RX_PKT_DISCARD);
+			++rx_queue->removed_count;
+			return;
+		}
+	}
+
+	++rx_queue->scatter_n;
+	if (rx_ev_cont)
+		return;
+
+	rx_ev_byte_cnt = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_BYTE_CNT);
+	rx_ev_pkt_ok = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_PKT_OK);
+	rx_ev_hdr_type = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_HDR_TYPE);
+
+	if (likely(rx_ev_pkt_ok)) {
+		/* If packet is marked as OK then we can rely on the
+		 * hardware checksum and classification.
+		 */
+		flags = 0;
+		switch (rx_ev_hdr_type) {
+		case FSE_CZ_RX_EV_HDR_TYPE_IPV4V6_TCP:
+			flags |= EF4_RX_PKT_TCP;
+			/* fall through */
+		case FSE_CZ_RX_EV_HDR_TYPE_IPV4V6_UDP:
+			flags |= EF4_RX_PKT_CSUMMED;
+			/* fall through */
+		case FSE_CZ_RX_EV_HDR_TYPE_IPV4V6_OTHER:
+		case FSE_AZ_RX_EV_HDR_TYPE_OTHER:
+			break;
+		}
+	} else {
+		flags = ef4_farch_handle_rx_not_ok(rx_queue, event);
+	}
+
+	/* Detect multicast packets that didn't match the filter */
+	rx_ev_mcast_pkt = EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_MCAST_PKT);
+	if (rx_ev_mcast_pkt) {
+		unsigned int rx_ev_mcast_hash_match =
+			EF4_QWORD_FIELD(*event, FSF_AZ_RX_EV_MCAST_HASH_MATCH);
+
+		if (unlikely(!rx_ev_mcast_hash_match)) {
+			++channel->n_rx_mcast_mismatch;
+			flags |= EF4_RX_PKT_DISCARD;
+		}
+	}
+
+	channel->irq_mod_score += 2;
+
+	/* Handle received packet */
+	ef4_rx_packet(rx_queue,
+		      rx_queue->removed_count & rx_queue->ptr_mask,
+		      rx_queue->scatter_n, rx_ev_byte_cnt, flags);
+	rx_queue->removed_count += rx_queue->scatter_n;
+	rx_queue->scatter_n = 0;
+}
+
+/* If this flush done event corresponds to a &struct ef4_tx_queue, then
+ * send an %EF4_CHANNEL_MAGIC_TX_DRAIN event to drain the event queue
+ * of all transmit completions.
+ */
+static void
+ef4_farch_handle_tx_flush_done(struct ef4_nic *efx, ef4_qword_t *event)
+{
+	struct ef4_tx_queue *tx_queue;
+	int qid;
+
+	qid = EF4_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBDATA);
+	if (qid < EF4_TXQ_TYPES * efx->n_tx_channels) {
+		tx_queue = ef4_get_tx_queue(efx, qid / EF4_TXQ_TYPES,
+					    qid % EF4_TXQ_TYPES);
+		if (atomic_cmpxchg(&tx_queue->flush_outstanding, 1, 0)) {
+			ef4_farch_magic_event(tx_queue->channel,
+					      EF4_CHANNEL_MAGIC_TX_DRAIN(tx_queue));
+		}
+	}
+}
+
+/* If this flush done event corresponds to a &struct ef4_rx_queue: If the flush
+ * was successful then send an %EF4_CHANNEL_MAGIC_RX_DRAIN, otherwise add
+ * the RX queue back to the mask of RX queues in need of flushing.
+ */
+static void
+ef4_farch_handle_rx_flush_done(struct ef4_nic *efx, ef4_qword_t *event)
+{
+	struct ef4_channel *channel;
+	struct ef4_rx_queue *rx_queue;
+	int qid;
+	bool failed;
+
+	qid = EF4_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_RX_DESCQ_ID);
+	failed = EF4_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_RX_FLUSH_FAIL);
+	if (qid >= efx->n_channels)
+		return;
+	channel = ef4_get_channel(efx, qid);
+	if (!ef4_channel_has_rx_queue(channel))
+		return;
+	rx_queue = ef4_channel_get_rx_queue(channel);
+
+	if (failed) {
+		netif_info(efx, hw, efx->net_dev,
+			   "RXQ %d flush retry\n", qid);
+		rx_queue->flush_pending = true;
+		atomic_inc(&efx->rxq_flush_pending);
+	} else {
+		ef4_farch_magic_event(ef4_rx_queue_channel(rx_queue),
+				      EF4_CHANNEL_MAGIC_RX_DRAIN(rx_queue));
+	}
+	atomic_dec(&efx->rxq_flush_outstanding);
+	if (ef4_farch_flush_wake(efx))
+		wake_up(&efx->flush_wq);
+}
+
+static void
+ef4_farch_handle_drain_event(struct ef4_channel *channel)
+{
+	struct ef4_nic *efx = channel->efx;
+
+	WARN_ON(atomic_read(&efx->active_queues) == 0);
+	atomic_dec(&efx->active_queues);
+	if (ef4_farch_flush_wake(efx))
+		wake_up(&efx->flush_wq);
+}
+
+static void ef4_farch_handle_generated_event(struct ef4_channel *channel,
+					     ef4_qword_t *event)
+{
+	struct ef4_nic *efx = channel->efx;
+	struct ef4_rx_queue *rx_queue =
+		ef4_channel_has_rx_queue(channel) ?
+		ef4_channel_get_rx_queue(channel) : NULL;
+	unsigned magic, code;
+
+	magic = EF4_QWORD_FIELD(*event, FSF_AZ_DRV_GEN_EV_MAGIC);
+	code = _EF4_CHANNEL_MAGIC_CODE(magic);
+
+	if (magic == EF4_CHANNEL_MAGIC_TEST(channel)) {
+		channel->event_test_cpu = raw_smp_processor_id();
+	} else if (rx_queue && magic == EF4_CHANNEL_MAGIC_FILL(rx_queue)) {
+		/* The queue must be empty, so we won't receive any rx
+		 * events, so ef4_process_channel() won't refill the
+		 * queue. Refill it here */
+		ef4_fast_push_rx_descriptors(rx_queue, true);
+	} else if (rx_queue && magic == EF4_CHANNEL_MAGIC_RX_DRAIN(rx_queue)) {
+		ef4_farch_handle_drain_event(channel);
+	} else if (code == _EF4_CHANNEL_MAGIC_TX_DRAIN) {
+		ef4_farch_handle_drain_event(channel);
+	} else {
+		netif_dbg(efx, hw, efx->net_dev, "channel %d received "
+			  "generated event "EF4_QWORD_FMT"\n",
+			  channel->channel, EF4_QWORD_VAL(*event));
+	}
+}
+
+static void
+ef4_farch_handle_driver_event(struct ef4_channel *channel, ef4_qword_t *event)
+{
+	struct ef4_nic *efx = channel->efx;
+	unsigned int ev_sub_code;
+	unsigned int ev_sub_data;
+
+	ev_sub_code = EF4_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBCODE);
+	ev_sub_data = EF4_QWORD_FIELD(*event, FSF_AZ_DRIVER_EV_SUBDATA);
+
+	switch (ev_sub_code) {
+	case FSE_AZ_TX_DESCQ_FLS_DONE_EV:
+		netif_vdbg(efx, hw, efx->net_dev, "channel %d TXQ %d flushed\n",
+			   channel->channel, ev_sub_data);
+		ef4_farch_handle_tx_flush_done(efx, event);
+		break;
+	case FSE_AZ_RX_DESCQ_FLS_DONE_EV:
+		netif_vdbg(efx, hw, efx->net_dev, "channel %d RXQ %d flushed\n",
+			   channel->channel, ev_sub_data);
+		ef4_farch_handle_rx_flush_done(efx, event);
+		break;
+	case FSE_AZ_EVQ_INIT_DONE_EV:
+		netif_dbg(efx, hw, efx->net_dev,
+			  "channel %d EVQ %d initialised\n",
+			  channel->channel, ev_sub_data);
+		break;
+	case FSE_AZ_SRM_UPD_DONE_EV:
+		netif_vdbg(efx, hw, efx->net_dev,
+			   "channel %d SRAM update done\n", channel->channel);
+		break;
+	case FSE_AZ_WAKE_UP_EV:
+		netif_vdbg(efx, hw, efx->net_dev,
+			   "channel %d RXQ %d wakeup event\n",
+			   channel->channel, ev_sub_data);
+		break;
+	case FSE_AZ_TIMER_EV:
+		netif_vdbg(efx, hw, efx->net_dev,
+			   "channel %d RX queue %d timer expired\n",
+			   channel->channel, ev_sub_data);
+		break;
+	case FSE_AA_RX_RECOVER_EV:
+		netif_err(efx, rx_err, efx->net_dev,
+			  "channel %d seen DRIVER RX_RESET event. "
+			"Resetting.\n", channel->channel);
+		atomic_inc(&efx->rx_reset);
+		ef4_schedule_reset(efx,
+				   EF4_WORKAROUND_6555(efx) ?
+				   RESET_TYPE_RX_RECOVERY :
+				   RESET_TYPE_DISABLE);
+		break;
+	case FSE_BZ_RX_DSC_ERROR_EV:
+		netif_err(efx, rx_err, efx->net_dev,
+			  "RX DMA Q %d reports descriptor fetch error."
+			  " RX Q %d is disabled.\n", ev_sub_data,
+			  ev_sub_data);
+		ef4_schedule_reset(efx, RESET_TYPE_DMA_ERROR);
+		break;
+	case FSE_BZ_TX_DSC_ERROR_EV:
+		netif_err(efx, tx_err, efx->net_dev,
+			  "TX DMA Q %d reports descriptor fetch error."
+			  " TX Q %d is disabled.\n", ev_sub_data,
+			  ev_sub_data);
+		ef4_schedule_reset(efx, RESET_TYPE_DMA_ERROR);
+		break;
+	default:
+		netif_vdbg(efx, hw, efx->net_dev,
+			   "channel %d unknown driver event code %d "
+			   "data %04x\n", channel->channel, ev_sub_code,
+			   ev_sub_data);
+		break;
+	}
+}
+
+int ef4_farch_ev_process(struct ef4_channel *channel, int budget)
+{
+	struct ef4_nic *efx = channel->efx;
+	unsigned int read_ptr;
+	ef4_qword_t event, *p_event;
+	int ev_code;
+	int tx_packets = 0;
+	int spent = 0;
+
+	if (budget <= 0)
+		return spent;
+
+	read_ptr = channel->eventq_read_ptr;
+
+	for (;;) {
+		p_event = ef4_event(channel, read_ptr);
+		event = *p_event;
+
+		if (!ef4_event_present(&event))
+			/* End of events */
+			break;
+
+		netif_vdbg(channel->efx, intr, channel->efx->net_dev,
+			   "channel %d event is "EF4_QWORD_FMT"\n",
+			   channel->channel, EF4_QWORD_VAL(event));
+
+		/* Clear this event by marking it all ones */
+		EF4_SET_QWORD(*p_event);
+
+		++read_ptr;
+
+		ev_code = EF4_QWORD_FIELD(event, FSF_AZ_EV_CODE);
+
+		switch (ev_code) {
+		case FSE_AZ_EV_CODE_RX_EV:
+			ef4_farch_handle_rx_event(channel, &event);
+			if (++spent == budget)
+				goto out;
+			break;
+		case FSE_AZ_EV_CODE_TX_EV:
+			tx_packets += ef4_farch_handle_tx_event(channel,
+								&event);
+			if (tx_packets > efx->txq_entries) {
+				spent = budget;
+				goto out;
+			}
+			break;
+		case FSE_AZ_EV_CODE_DRV_GEN_EV:
+			ef4_farch_handle_generated_event(channel, &event);
+			break;
+		case FSE_AZ_EV_CODE_DRIVER_EV:
+			ef4_farch_handle_driver_event(channel, &event);
+			break;
+		case FSE_AZ_EV_CODE_GLOBAL_EV:
+			if (efx->type->handle_global_event &&
+			    efx->type->handle_global_event(channel, &event))
+				break;
+			/* else fall through */
+		default:
+			netif_err(channel->efx, hw, channel->efx->net_dev,
+				  "channel %d unknown event type %d (data "
+				  EF4_QWORD_FMT ")\n", channel->channel,
+				  ev_code, EF4_QWORD_VAL(event));
+		}
+	}
+
+out:
+	channel->eventq_read_ptr = read_ptr;
+	return spent;
+}
+
+/* Allocate buffer table entries for event queue */
+int ef4_farch_ev_probe(struct ef4_channel *channel)
+{
+	struct ef4_nic *efx = channel->efx;
+	unsigned entries;
+
+	entries = channel->eventq_mask + 1;
+	return ef4_alloc_special_buffer(efx, &channel->eventq,
+					entries * sizeof(ef4_qword_t));
+}
+
+int ef4_farch_ev_init(struct ef4_channel *channel)
+{
+	ef4_oword_t reg;
+	struct ef4_nic *efx = channel->efx;
+
+	netif_dbg(efx, hw, efx->net_dev,
+		  "channel %d event queue in special buffers %d-%d\n",
+		  channel->channel, channel->eventq.index,
+		  channel->eventq.index + channel->eventq.entries - 1);
+
+	/* Pin event queue buffer */
+	ef4_init_special_buffer(efx, &channel->eventq);
+
+	/* Fill event queue with all ones (i.e. empty events) */
+	memset(channel->eventq.buf.addr, 0xff, channel->eventq.buf.len);
+
+	/* Push event queue to card */
+	EF4_POPULATE_OWORD_3(reg,
+			     FRF_AZ_EVQ_EN, 1,
+			     FRF_AZ_EVQ_SIZE, __ffs(channel->eventq.entries),
+			     FRF_AZ_EVQ_BUF_BASE_ID, channel->eventq.index);
+	ef4_writeo_table(efx, &reg, efx->type->evq_ptr_tbl_base,
+			 channel->channel);
+
+	return 0;
+}
+
+void ef4_farch_ev_fini(struct ef4_channel *channel)
+{
+	ef4_oword_t reg;
+	struct ef4_nic *efx = channel->efx;
+
+	/* Remove event queue from card */
+	EF4_ZERO_OWORD(reg);
+	ef4_writeo_table(efx, &reg, efx->type->evq_ptr_tbl_base,
+			 channel->channel);
+
+	/* Unpin event queue */
+	ef4_fini_special_buffer(efx, &channel->eventq);
+}
+
+/* Free buffers backing event queue */
+void ef4_farch_ev_remove(struct ef4_channel *channel)
+{
+	ef4_free_special_buffer(channel->efx, &channel->eventq);
+}
+
+
+void ef4_farch_ev_test_generate(struct ef4_channel *channel)
+{
+	ef4_farch_magic_event(channel, EF4_CHANNEL_MAGIC_TEST(channel));
+}
+
+void ef4_farch_rx_defer_refill(struct ef4_rx_queue *rx_queue)
+{
+	ef4_farch_magic_event(ef4_rx_queue_channel(rx_queue),
+			      EF4_CHANNEL_MAGIC_FILL(rx_queue));
+}
+
+/**************************************************************************
+ *
+ * Hardware interrupts
+ * The hardware interrupt handler does very little work; all the event
+ * queue processing is carried out by per-channel tasklets.
+ *
+ **************************************************************************/
+
+/* Enable/disable/generate interrupts */
+static inline void ef4_farch_interrupts(struct ef4_nic *efx,
+				      bool enabled, bool force)
+{
+	ef4_oword_t int_en_reg_ker;
+
+	EF4_POPULATE_OWORD_3(int_en_reg_ker,
+			     FRF_AZ_KER_INT_LEVE_SEL, efx->irq_level,
+			     FRF_AZ_KER_INT_KER, force,
+			     FRF_AZ_DRV_INT_EN_KER, enabled);
+	ef4_writeo(efx, &int_en_reg_ker, FR_AZ_INT_EN_KER);
+}
+
+void ef4_farch_irq_enable_master(struct ef4_nic *efx)
+{
+	EF4_ZERO_OWORD(*((ef4_oword_t *) efx->irq_status.addr));
+	wmb(); /* Ensure interrupt vector is clear before interrupts enabled */
+
+	ef4_farch_interrupts(efx, true, false);
+}
+
+void ef4_farch_irq_disable_master(struct ef4_nic *efx)
+{
+	/* Disable interrupts */
+	ef4_farch_interrupts(efx, false, false);
+}
+
+/* Generate a test interrupt
+ * Interrupt must already have been enabled, otherwise nasty things
+ * may happen.
+ */
+int ef4_farch_irq_test_generate(struct ef4_nic *efx)
+{
+	ef4_farch_interrupts(efx, true, true);
+	return 0;
+}
+
+/* Process a fatal interrupt
+ * Disable bus mastering ASAP and schedule a reset
+ */
+irqreturn_t ef4_farch_fatal_interrupt(struct ef4_nic *efx)
+{
+	struct falcon_nic_data *nic_data = efx->nic_data;
+	ef4_oword_t *int_ker = efx->irq_status.addr;
+	ef4_oword_t fatal_intr;
+	int error, mem_perr;
+
+	ef4_reado(efx, &fatal_intr, FR_AZ_FATAL_INTR_KER);
+	error = EF4_OWORD_FIELD(fatal_intr, FRF_AZ_FATAL_INTR);
+
+	netif_err(efx, hw, efx->net_dev, "SYSTEM ERROR "EF4_OWORD_FMT" status "
+		  EF4_OWORD_FMT ": %s\n", EF4_OWORD_VAL(*int_ker),
+		  EF4_OWORD_VAL(fatal_intr),
+		  error ? "disabling bus mastering" : "no recognised error");
+
+	/* If this is a memory parity error dump which blocks are offending */
+	mem_perr = (EF4_OWORD_FIELD(fatal_intr, FRF_AZ_MEM_PERR_INT_KER) ||
+		    EF4_OWORD_FIELD(fatal_intr, FRF_AZ_SRM_PERR_INT_KER));
+	if (mem_perr) {
+		ef4_oword_t reg;
+		ef4_reado(efx, &reg, FR_AZ_MEM_STAT);
+		netif_err(efx, hw, efx->net_dev,
+			  "SYSTEM ERROR: memory parity error "EF4_OWORD_FMT"\n",
+			  EF4_OWORD_VAL(reg));
+	}
+
+	/* Disable both devices */
+	pci_clear_master(efx->pci_dev);
+	if (ef4_nic_is_dual_func(efx))
+		pci_clear_master(nic_data->pci_dev2);
+	ef4_farch_irq_disable_master(efx);
+
+	/* Count errors and reset or disable the NIC accordingly */
+	if (efx->int_error_count == 0 ||
+	    time_after(jiffies, efx->int_error_expire)) {
+		efx->int_error_count = 0;
+		efx->int_error_expire =
+			jiffies + EF4_INT_ERROR_EXPIRE * HZ;
+	}
+	if (++efx->int_error_count < EF4_MAX_INT_ERRORS) {
+		netif_err(efx, hw, efx->net_dev,
+			  "SYSTEM ERROR - reset scheduled\n");
+		ef4_schedule_reset(efx, RESET_TYPE_INT_ERROR);
+	} else {
+		netif_err(efx, hw, efx->net_dev,
+			  "SYSTEM ERROR - max number of errors seen."
+			  "NIC will be disabled\n");
+		ef4_schedule_reset(efx, RESET_TYPE_DISABLE);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/* Handle a legacy interrupt
+ * Acknowledges the interrupt and schedule event queue processing.
+ */
+irqreturn_t ef4_farch_legacy_interrupt(int irq, void *dev_id)
+{
+	struct ef4_nic *efx = dev_id;
+	bool soft_enabled = ACCESS_ONCE(efx->irq_soft_enabled);
+	ef4_oword_t *int_ker = efx->irq_status.addr;
+	irqreturn_t result = IRQ_NONE;
+	struct ef4_channel *channel;
+	ef4_dword_t reg;
+	u32 queues;
+	int syserr;
+
+	/* Read the ISR which also ACKs the interrupts */
+	ef4_readd(efx, &reg, FR_BZ_INT_ISR0);
+	queues = EF4_EXTRACT_DWORD(reg, 0, 31);
+
+	/* Legacy interrupts are disabled too late by the EEH kernel
+	 * code. Disable them earlier.
+	 * If an EEH error occurred, the read will have returned all ones.
+	 */
+	if (EF4_DWORD_IS_ALL_ONES(reg) && ef4_try_recovery(efx) &&
+	    !efx->eeh_disabled_legacy_irq) {
+		disable_irq_nosync(efx->legacy_irq);
+		efx->eeh_disabled_legacy_irq = true;
+	}
+
+	/* Handle non-event-queue sources */
+	if (queues & (1U << efx->irq_level) && soft_enabled) {
+		syserr = EF4_OWORD_FIELD(*int_ker, FSF_AZ_NET_IVEC_FATAL_INT);
+		if (unlikely(syserr))
+			return ef4_farch_fatal_interrupt(efx);
+		efx->last_irq_cpu = raw_smp_processor_id();
+	}
+
+	if (queues != 0) {
+		efx->irq_zero_count = 0;
+
+		/* Schedule processing of any interrupting queues */
+		if (likely(soft_enabled)) {
+			ef4_for_each_channel(channel, efx) {
+				if (queues & 1)
+					ef4_schedule_channel_irq(channel);
+				queues >>= 1;
+			}
+		}
+		result = IRQ_HANDLED;
+
+	} else {
+		ef4_qword_t *event;
+
+		/* Legacy ISR read can return zero once (SF bug 15783) */
+
+		/* We can't return IRQ_HANDLED more than once on seeing ISR=0
+		 * because this might be a shared interrupt. */
+		if (efx->irq_zero_count++ == 0)
+			result = IRQ_HANDLED;
+
+		/* Ensure we schedule or rearm all event queues */
+		if (likely(soft_enabled)) {
+			ef4_for_each_channel(channel, efx) {
+				event = ef4_event(channel,
+						  channel->eventq_read_ptr);
+				if (ef4_event_present(event))
+					ef4_schedule_channel_irq(channel);
+				else
+					ef4_farch_ev_read_ack(channel);
+			}
+		}
+	}
+
+	if (result == IRQ_HANDLED)
+		netif_vdbg(efx, intr, efx->net_dev,
+			   "IRQ %d on CPU %d status " EF4_DWORD_FMT "\n",
+			   irq, raw_smp_processor_id(), EF4_DWORD_VAL(reg));
+
+	return result;
+}
+
+/* Handle an MSI interrupt
+ *
+ * Handle an MSI hardware interrupt.  This routine schedules event
+ * queue processing.  No interrupt acknowledgement cycle is necessary.
+ * Also, we never need to check that the interrupt is for us, since
+ * MSI interrupts cannot be shared.
+ */
+irqreturn_t ef4_farch_msi_interrupt(int irq, void *dev_id)
+{
+	struct ef4_msi_context *context = dev_id;
+	struct ef4_nic *efx = context->efx;
+	ef4_oword_t *int_ker = efx->irq_status.addr;
+	int syserr;
+
+	netif_vdbg(efx, intr, efx->net_dev,
+		   "IRQ %d on CPU %d status " EF4_OWORD_FMT "\n",
+		   irq, raw_smp_processor_id(), EF4_OWORD_VAL(*int_ker));
+
+	if (!likely(ACCESS_ONCE(efx->irq_soft_enabled)))
+		return IRQ_HANDLED;
+
+	/* Handle non-event-queue sources */
+	if (context->index == efx->irq_level) {
+		syserr = EF4_OWORD_FIELD(*int_ker, FSF_AZ_NET_IVEC_FATAL_INT);
+		if (unlikely(syserr))
+			return ef4_farch_fatal_interrupt(efx);
+		efx->last_irq_cpu = raw_smp_processor_id();
+	}
+
+	/* Schedule processing of the channel */
+	ef4_schedule_channel_irq(efx->channel[context->index]);
+
+	return IRQ_HANDLED;
+}
+
+/* Setup RSS indirection table.
+ * This maps from the hash value of the packet to RXQ
+ */
+void ef4_farch_rx_push_indir_table(struct ef4_nic *efx)
+{
+	size_t i = 0;
+	ef4_dword_t dword;
+
+	BUG_ON(ef4_nic_rev(efx) < EF4_REV_FALCON_B0);
+
+	BUILD_BUG_ON(ARRAY_SIZE(efx->rx_indir_table) !=
+		     FR_BZ_RX_INDIRECTION_TBL_ROWS);
+
+	for (i = 0; i < FR_BZ_RX_INDIRECTION_TBL_ROWS; i++) {
+		EF4_POPULATE_DWORD_1(dword, FRF_BZ_IT_QUEUE,
+				     efx->rx_indir_table[i]);
+		ef4_writed(efx, &dword,
+			   FR_BZ_RX_INDIRECTION_TBL +
+			   FR_BZ_RX_INDIRECTION_TBL_STEP * i);
+	}
+}
+
+/* Looks at available SRAM resources and works out how many queues we
+ * can support, and where things like descriptor caches should live.
+ *
+ * SRAM is split up as follows:
+ * 0                          buftbl entries for channels
+ * efx->vf_buftbl_base        buftbl entries for SR-IOV
+ * efx->rx_dc_base            RX descriptor caches
+ * efx->tx_dc_base            TX descriptor caches
+ */
+void ef4_farch_dimension_resources(struct ef4_nic *efx, unsigned sram_lim_qw)
+{
+	unsigned vi_count, buftbl_min;
+
+	/* Account for the buffer table entries backing the datapath channels
+	 * and the descriptor caches for those channels.
+	 */
+	buftbl_min = ((efx->n_rx_channels * EF4_MAX_DMAQ_SIZE +
+		       efx->n_tx_channels * EF4_TXQ_TYPES * EF4_MAX_DMAQ_SIZE +
+		       efx->n_channels * EF4_MAX_EVQ_SIZE)
+		      * sizeof(ef4_qword_t) / EF4_BUF_SIZE);
+	vi_count = max(efx->n_channels, efx->n_tx_channels * EF4_TXQ_TYPES);
+
+	efx->tx_dc_base = sram_lim_qw - vi_count * TX_DC_ENTRIES;
+	efx->rx_dc_base = efx->tx_dc_base - vi_count * RX_DC_ENTRIES;
+}
+
+u32 ef4_farch_fpga_ver(struct ef4_nic *efx)
+{
+	ef4_oword_t altera_build;
+	ef4_reado(efx, &altera_build, FR_AZ_ALTERA_BUILD);
+	return EF4_OWORD_FIELD(altera_build, FRF_AZ_ALTERA_BUILD_VER);
+}
+
+void ef4_farch_init_common(struct ef4_nic *efx)
+{
+	ef4_oword_t temp;
+
+	/* Set positions of descriptor caches in SRAM. */
+	EF4_POPULATE_OWORD_1(temp, FRF_AZ_SRM_TX_DC_BASE_ADR, efx->tx_dc_base);
+	ef4_writeo(efx, &temp, FR_AZ_SRM_TX_DC_CFG);
+	EF4_POPULATE_OWORD_1(temp, FRF_AZ_SRM_RX_DC_BASE_ADR, efx->rx_dc_base);
+	ef4_writeo(efx, &temp, FR_AZ_SRM_RX_DC_CFG);
+
+	/* Set TX descriptor cache size. */
+	BUILD_BUG_ON(TX_DC_ENTRIES != (8 << TX_DC_ENTRIES_ORDER));
+	EF4_POPULATE_OWORD_1(temp, FRF_AZ_TX_DC_SIZE, TX_DC_ENTRIES_ORDER);
+	ef4_writeo(efx, &temp, FR_AZ_TX_DC_CFG);
+
+	/* Set RX descriptor cache size.  Set low watermark to size-8, as
+	 * this allows most efficient prefetching.
+	 */
+	BUILD_BUG_ON(RX_DC_ENTRIES != (8 << RX_DC_ENTRIES_ORDER));
+	EF4_POPULATE_OWORD_1(temp, FRF_AZ_RX_DC_SIZE, RX_DC_ENTRIES_ORDER);
+	ef4_writeo(efx, &temp, FR_AZ_RX_DC_CFG);
+	EF4_POPULATE_OWORD_1(temp, FRF_AZ_RX_DC_PF_LWM, RX_DC_ENTRIES - 8);
+	ef4_writeo(efx, &temp, FR_AZ_RX_DC_PF_WM);
+
+	/* Program INT_KER address */
+	EF4_POPULATE_OWORD_2(temp,
+			     FRF_AZ_NORM_INT_VEC_DIS_KER,
+			     EF4_INT_MODE_USE_MSI(efx),
+			     FRF_AZ_INT_ADR_KER, efx->irq_status.dma_addr);
+	ef4_writeo(efx, &temp, FR_AZ_INT_ADR_KER);
+
+	/* Use a valid MSI-X vector */
+	efx->irq_level = 0;
+
+	/* Enable all the genuinely fatal interrupts.  (They are still
+	 * masked by the overall interrupt mask, controlled by
+	 * falcon_interrupts()).
+	 *
+	 * Note: All other fatal interrupts are enabled
+	 */
+	EF4_POPULATE_OWORD_3(temp,
+			     FRF_AZ_ILL_ADR_INT_KER_EN, 1,
+			     FRF_AZ_RBUF_OWN_INT_KER_EN, 1,
+			     FRF_AZ_TBUF_OWN_INT_KER_EN, 1);
+	EF4_INVERT_OWORD(temp);
+	ef4_writeo(efx, &temp, FR_AZ_FATAL_INTR_KER);
+
+	/* Disable the ugly timer-based TX DMA backoff and allow TX DMA to be
+	 * controlled by the RX FIFO fill level. Set arbitration to one pkt/Q.
+	 */
+	ef4_reado(efx, &temp, FR_AZ_TX_RESERVED);
+	EF4_SET_OWORD_FIELD(temp, FRF_AZ_TX_RX_SPACER, 0xfe);
+	EF4_SET_OWORD_FIELD(temp, FRF_AZ_TX_RX_SPACER_EN, 1);
+	EF4_SET_OWORD_FIELD(temp, FRF_AZ_TX_ONE_PKT_PER_Q, 1);
+	EF4_SET_OWORD_FIELD(temp, FRF_AZ_TX_PUSH_EN, 1);
+	EF4_SET_OWORD_FIELD(temp, FRF_AZ_TX_DIS_NON_IP_EV, 1);
+	/* Enable SW_EV to inherit in char driver - assume harmless here */
+	EF4_SET_OWORD_FIELD(temp, FRF_AZ_TX_SOFT_EVT_EN, 1);
+	/* Prefetch threshold 2 => fetch when descriptor cache half empty */
+	EF4_SET_OWORD_FIELD(temp, FRF_AZ_TX_PREF_THRESHOLD, 2);
+	/* Disable hardware watchdog which can misfire */
+	EF4_SET_OWORD_FIELD(temp, FRF_AZ_TX_PREF_WD_TMR, 0x3fffff);
+	/* Squash TX of packets of 16 bytes or less */
+	if (ef4_nic_rev(efx) >= EF4_REV_FALCON_B0)
+		EF4_SET_OWORD_FIELD(temp, FRF_BZ_TX_FLUSH_MIN_LEN_EN, 1);
+	ef4_writeo(efx, &temp, FR_AZ_TX_RESERVED);
+
+	if (ef4_nic_rev(efx) >= EF4_REV_FALCON_B0) {
+		EF4_POPULATE_OWORD_4(temp,
+				     /* Default values */
+				     FRF_BZ_TX_PACE_SB_NOT_AF, 0x15,
+				     FRF_BZ_TX_PACE_SB_AF, 0xb,
+				     FRF_BZ_TX_PACE_FB_BASE, 0,
+				     /* Allow large pace values in the
+				      * fast bin. */
+				     FRF_BZ_TX_PACE_BIN_TH,
+				     FFE_BZ_TX_PACE_RESERVED);
+		ef4_writeo(efx, &temp, FR_BZ_TX_PACE);
+	}
+}
+
+/**************************************************************************
+ *
+ * Filter tables
+ *
+ **************************************************************************
+ */
+
+/* "Fudge factors" - difference between programmed value and actual depth.
+ * Due to pipelined implementation we need to program H/W with a value that
+ * is larger than the hop limit we want.
+ */
+#define EF4_FARCH_FILTER_CTL_SRCH_FUDGE_WILD 3
+#define EF4_FARCH_FILTER_CTL_SRCH_FUDGE_FULL 1
+
+/* Hard maximum search limit.  Hardware will time-out beyond 200-something.
+ * We also need to avoid infinite loops in ef4_farch_filter_search() when the
+ * table is full.
+ */
+#define EF4_FARCH_FILTER_CTL_SRCH_MAX 200
+
+/* Don't try very hard to find space for performance hints, as this is
+ * counter-productive. */
+#define EF4_FARCH_FILTER_CTL_SRCH_HINT_MAX 5
+
+enum ef4_farch_filter_type {
+	EF4_FARCH_FILTER_TCP_FULL = 0,
+	EF4_FARCH_FILTER_TCP_WILD,
+	EF4_FARCH_FILTER_UDP_FULL,
+	EF4_FARCH_FILTER_UDP_WILD,
+	EF4_FARCH_FILTER_MAC_FULL = 4,
+	EF4_FARCH_FILTER_MAC_WILD,
+	EF4_FARCH_FILTER_UC_DEF = 8,
+	EF4_FARCH_FILTER_MC_DEF,
+	EF4_FARCH_FILTER_TYPE_COUNT,		/* number of specific types */
+};
+
+enum ef4_farch_filter_table_id {
+	EF4_FARCH_FILTER_TABLE_RX_IP = 0,
+	EF4_FARCH_FILTER_TABLE_RX_MAC,
+	EF4_FARCH_FILTER_TABLE_RX_DEF,
+	EF4_FARCH_FILTER_TABLE_TX_MAC,
+	EF4_FARCH_FILTER_TABLE_COUNT,
+};
+
+enum ef4_farch_filter_index {
+	EF4_FARCH_FILTER_INDEX_UC_DEF,
+	EF4_FARCH_FILTER_INDEX_MC_DEF,
+	EF4_FARCH_FILTER_SIZE_RX_DEF,
+};
+
+struct ef4_farch_filter_spec {
+	u8	type:4;
+	u8	priority:4;
+	u8	flags;
+	u16	dmaq_id;
+	u32	data[3];
+};
+
+struct ef4_farch_filter_table {
+	enum ef4_farch_filter_table_id id;
+	u32		offset;		/* address of table relative to BAR */
+	unsigned	size;		/* number of entries */
+	unsigned	step;		/* step between entries */
+	unsigned	used;		/* number currently used */
+	unsigned long	*used_bitmap;
+	struct ef4_farch_filter_spec *spec;
+	unsigned	search_limit[EF4_FARCH_FILTER_TYPE_COUNT];
+};
+
+struct ef4_farch_filter_state {
+	struct ef4_farch_filter_table table[EF4_FARCH_FILTER_TABLE_COUNT];
+};
+
+static void
+ef4_farch_filter_table_clear_entry(struct ef4_nic *efx,
+				   struct ef4_farch_filter_table *table,
+				   unsigned int filter_idx);
+
+/* The filter hash function is LFSR polynomial x^16 + x^3 + 1 of a 32-bit
+ * key derived from the n-tuple.  The initial LFSR state is 0xffff. */
+static u16 ef4_farch_filter_hash(u32 key)
+{
+	u16 tmp;
+
+	/* First 16 rounds */
+	tmp = 0x1fff ^ key >> 16;
+	tmp = tmp ^ tmp >> 3 ^ tmp >> 6;
+	tmp = tmp ^ tmp >> 9;
+	/* Last 16 rounds */
+	tmp = tmp ^ tmp << 13 ^ key;
+	tmp = tmp ^ tmp >> 3 ^ tmp >> 6;
+	return tmp ^ tmp >> 9;
+}
+
+/* To allow for hash collisions, filter search continues at these
+ * increments from the first possible entry selected by the hash. */
+static u16 ef4_farch_filter_increment(u32 key)
+{
+	return key * 2 - 1;
+}
+
+static enum ef4_farch_filter_table_id
+ef4_farch_filter_spec_table_id(const struct ef4_farch_filter_spec *spec)
+{
+	BUILD_BUG_ON(EF4_FARCH_FILTER_TABLE_RX_IP !=
+		     (EF4_FARCH_FILTER_TCP_FULL >> 2));
+	BUILD_BUG_ON(EF4_FARCH_FILTER_TABLE_RX_IP !=
+		     (EF4_FARCH_FILTER_TCP_WILD >> 2));
+	BUILD_BUG_ON(EF4_FARCH_FILTER_TABLE_RX_IP !=
+		     (EF4_FARCH_FILTER_UDP_FULL >> 2));
+	BUILD_BUG_ON(EF4_FARCH_FILTER_TABLE_RX_IP !=
+		     (EF4_FARCH_FILTER_UDP_WILD >> 2));
+	BUILD_BUG_ON(EF4_FARCH_FILTER_TABLE_RX_MAC !=
+		     (EF4_FARCH_FILTER_MAC_FULL >> 2));
+	BUILD_BUG_ON(EF4_FARCH_FILTER_TABLE_RX_MAC !=
+		     (EF4_FARCH_FILTER_MAC_WILD >> 2));
+	BUILD_BUG_ON(EF4_FARCH_FILTER_TABLE_TX_MAC !=
+		     EF4_FARCH_FILTER_TABLE_RX_MAC + 2);
+	return (spec->type >> 2) + ((spec->flags & EF4_FILTER_FLAG_TX) ? 2 : 0);
+}
+
+static void ef4_farch_filter_push_rx_config(struct ef4_nic *efx)
+{
+	struct ef4_farch_filter_state *state = efx->filter_state;
+	struct ef4_farch_filter_table *table;
+	ef4_oword_t filter_ctl;
+
+	ef4_reado(efx, &filter_ctl, FR_BZ_RX_FILTER_CTL);
+
+	table = &state->table[EF4_FARCH_FILTER_TABLE_RX_IP];
+	EF4_SET_OWORD_FIELD(filter_ctl, FRF_BZ_TCP_FULL_SRCH_LIMIT,
+			    table->search_limit[EF4_FARCH_FILTER_TCP_FULL] +
+			    EF4_FARCH_FILTER_CTL_SRCH_FUDGE_FULL);
+	EF4_SET_OWORD_FIELD(filter_ctl, FRF_BZ_TCP_WILD_SRCH_LIMIT,
+			    table->search_limit[EF4_FARCH_FILTER_TCP_WILD] +
+			    EF4_FARCH_FILTER_CTL_SRCH_FUDGE_WILD);
+	EF4_SET_OWORD_FIELD(filter_ctl, FRF_BZ_UDP_FULL_SRCH_LIMIT,
+			    table->search_limit[EF4_FARCH_FILTER_UDP_FULL] +
+			    EF4_FARCH_FILTER_CTL_SRCH_FUDGE_FULL);
+	EF4_SET_OWORD_FIELD(filter_ctl, FRF_BZ_UDP_WILD_SRCH_LIMIT,
+			    table->search_limit[EF4_FARCH_FILTER_UDP_WILD] +
+			    EF4_FARCH_FILTER_CTL_SRCH_FUDGE_WILD);
+
+	table = &state->table[EF4_FARCH_FILTER_TABLE_RX_MAC];
+	if (table->size) {
+		EF4_SET_OWORD_FIELD(
+			filter_ctl, FRF_CZ_ETHERNET_FULL_SEARCH_LIMIT,
+			table->search_limit[EF4_FARCH_FILTER_MAC_FULL] +
+			EF4_FARCH_FILTER_CTL_SRCH_FUDGE_FULL);
+		EF4_SET_OWORD_FIELD(
+			filter_ctl, FRF_CZ_ETHERNET_WILDCARD_SEARCH_LIMIT,
+			table->search_limit[EF4_FARCH_FILTER_MAC_WILD] +
+			EF4_FARCH_FILTER_CTL_SRCH_FUDGE_WILD);
+	}
+
+	table = &state->table[EF4_FARCH_FILTER_TABLE_RX_DEF];
+	if (table->size) {
+		EF4_SET_OWORD_FIELD(
+			filter_ctl, FRF_CZ_UNICAST_NOMATCH_Q_ID,
+			table->spec[EF4_FARCH_FILTER_INDEX_UC_DEF].dmaq_id);
+		EF4_SET_OWORD_FIELD(
+			filter_ctl, FRF_CZ_UNICAST_NOMATCH_RSS_ENABLED,
+			!!(table->spec[EF4_FARCH_FILTER_INDEX_UC_DEF].flags &
+			   EF4_FILTER_FLAG_RX_RSS));
+		EF4_SET_OWORD_FIELD(
+			filter_ctl, FRF_CZ_MULTICAST_NOMATCH_Q_ID,
+			table->spec[EF4_FARCH_FILTER_INDEX_MC_DEF].dmaq_id);
+		EF4_SET_OWORD_FIELD(
+			filter_ctl, FRF_CZ_MULTICAST_NOMATCH_RSS_ENABLED,
+			!!(table->spec[EF4_FARCH_FILTER_INDEX_MC_DEF].flags &
+			   EF4_FILTER_FLAG_RX_RSS));
+
+		/* There is a single bit to enable RX scatter for all
+		 * unmatched packets.  Only set it if scatter is
+		 * enabled in both filter specs.
+		 */
+		EF4_SET_OWORD_FIELD(
+			filter_ctl, FRF_BZ_SCATTER_ENBL_NO_MATCH_Q,
+			!!(table->spec[EF4_FARCH_FILTER_INDEX_UC_DEF].flags &
+			   table->spec[EF4_FARCH_FILTER_INDEX_MC_DEF].flags &
+			   EF4_FILTER_FLAG_RX_SCATTER));
+	} else if (ef4_nic_rev(efx) >= EF4_REV_FALCON_B0) {
+		/* We don't expose 'default' filters because unmatched
+		 * packets always go to the queue number found in the
+		 * RSS table.  But we still need to set the RX scatter
+		 * bit here.
+		 */
+		EF4_SET_OWORD_FIELD(
+			filter_ctl, FRF_BZ_SCATTER_ENBL_NO_MATCH_Q,
+			efx->rx_scatter);
+	}
+
+	ef4_writeo(efx, &filter_ctl, FR_BZ_RX_FILTER_CTL);
+}
+
+static void ef4_farch_filter_push_tx_limits(struct ef4_nic *efx)
+{
+	struct ef4_farch_filter_state *state = efx->filter_state;
+	struct ef4_farch_filter_table *table;
+	ef4_oword_t tx_cfg;
+
+	ef4_reado(efx, &tx_cfg, FR_AZ_TX_CFG);
+
+	table = &state->table[EF4_FARCH_FILTER_TABLE_TX_MAC];
+	if (table->size) {
+		EF4_SET_OWORD_FIELD(
+			tx_cfg, FRF_CZ_TX_ETH_FILTER_FULL_SEARCH_RANGE,
+			table->search_limit[EF4_FARCH_FILTER_MAC_FULL] +
+			EF4_FARCH_FILTER_CTL_SRCH_FUDGE_FULL);
+		EF4_SET_OWORD_FIELD(
+			tx_cfg, FRF_CZ_TX_ETH_FILTER_WILD_SEARCH_RANGE,
+			table->search_limit[EF4_FARCH_FILTER_MAC_WILD] +
+			EF4_FARCH_FILTER_CTL_SRCH_FUDGE_WILD);
+	}
+
+	ef4_writeo(efx, &tx_cfg, FR_AZ_TX_CFG);
+}
+
+static int
+ef4_farch_filter_from_gen_spec(struct ef4_farch_filter_spec *spec,
+			       const struct ef4_filter_spec *gen_spec)
+{
+	bool is_full = false;
+
+	if ((gen_spec->flags & EF4_FILTER_FLAG_RX_RSS) &&
+	    gen_spec->rss_context != EF4_FILTER_RSS_CONTEXT_DEFAULT)
+		return -EINVAL;
+
+	spec->priority = gen_spec->priority;
+	spec->flags = gen_spec->flags;
+	spec->dmaq_id = gen_spec->dmaq_id;
+
+	switch (gen_spec->match_flags) {
+	case (EF4_FILTER_MATCH_ETHER_TYPE | EF4_FILTER_MATCH_IP_PROTO |
+	      EF4_FILTER_MATCH_LOC_HOST | EF4_FILTER_MATCH_LOC_PORT |
+	      EF4_FILTER_MATCH_REM_HOST | EF4_FILTER_MATCH_REM_PORT):
+		is_full = true;
+		/* fall through */
+	case (EF4_FILTER_MATCH_ETHER_TYPE | EF4_FILTER_MATCH_IP_PROTO |
+	      EF4_FILTER_MATCH_LOC_HOST | EF4_FILTER_MATCH_LOC_PORT): {
+		__be32 rhost, host1, host2;
+		__be16 rport, port1, port2;
+
+		EF4_BUG_ON_PARANOID(!(gen_spec->flags & EF4_FILTER_FLAG_RX));
+
+		if (gen_spec->ether_type != htons(ETH_P_IP))
+			return -EPROTONOSUPPORT;
+		if (gen_spec->loc_port == 0 ||
+		    (is_full && gen_spec->rem_port == 0))
+			return -EADDRNOTAVAIL;
+		switch (gen_spec->ip_proto) {
+		case IPPROTO_TCP:
+			spec->type = (is_full ? EF4_FARCH_FILTER_TCP_FULL :
+				      EF4_FARCH_FILTER_TCP_WILD);
+			break;
+		case IPPROTO_UDP:
+			spec->type = (is_full ? EF4_FARCH_FILTER_UDP_FULL :
+				      EF4_FARCH_FILTER_UDP_WILD);
+			break;
+		default:
+			return -EPROTONOSUPPORT;
+		}
+
+		/* Filter is constructed in terms of source and destination,
+		 * with the odd wrinkle that the ports are swapped in a UDP
+		 * wildcard filter.  We need to convert from local and remote
+		 * (= zero for wildcard) addresses.
+		 */
+		rhost = is_full ? gen_spec->rem_host[0] : 0;
+		rport = is_full ? gen_spec->rem_port : 0;
+		host1 = rhost;
+		host2 = gen_spec->loc_host[0];
+		if (!is_full && gen_spec->ip_proto == IPPROTO_UDP) {
+			port1 = gen_spec->loc_port;
+			port2 = rport;
+		} else {
+			port1 = rport;
+			port2 = gen_spec->loc_port;
+		}
+		spec->data[0] = ntohl(host1) << 16 | ntohs(port1);
+		spec->data[1] = ntohs(port2) << 16 | ntohl(host1) >> 16;
+		spec->data[2] = ntohl(host2);
+
+		break;
+	}
+
+	case EF4_FILTER_MATCH_LOC_MAC | EF4_FILTER_MATCH_OUTER_VID:
+		is_full = true;
+		/* fall through */
+	case EF4_FILTER_MATCH_LOC_MAC:
+		spec->type = (is_full ? EF4_FARCH_FILTER_MAC_FULL :
+			      EF4_FARCH_FILTER_MAC_WILD);
+		spec->data[0] = is_full ? ntohs(gen_spec->outer_vid) : 0;
+		spec->data[1] = (gen_spec->loc_mac[2] << 24 |
+				 gen_spec->loc_mac[3] << 16 |
+				 gen_spec->loc_mac[4] << 8 |
+				 gen_spec->loc_mac[5]);
+		spec->data[2] = (gen_spec->loc_mac[0] << 8 |
+				 gen_spec->loc_mac[1]);
+		break;
+
+	case EF4_FILTER_MATCH_LOC_MAC_IG:
+		spec->type = (is_multicast_ether_addr(gen_spec->loc_mac) ?
+			      EF4_FARCH_FILTER_MC_DEF :
+			      EF4_FARCH_FILTER_UC_DEF);
+		memset(spec->data, 0, sizeof(spec->data)); /* ensure equality */
+		break;
+
+	default:
+		return -EPROTONOSUPPORT;
+	}
+
+	return 0;
+}
+
+static void
+ef4_farch_filter_to_gen_spec(struct ef4_filter_spec *gen_spec,
+			     const struct ef4_farch_filter_spec *spec)
+{
+	bool is_full = false;
+
+	/* *gen_spec should be completely initialised, to be consistent
+	 * with ef4_filter_init_{rx,tx}() and in case we want to copy
+	 * it back to userland.
+	 */
+	memset(gen_spec, 0, sizeof(*gen_spec));
+
+	gen_spec->priority = spec->priority;
+	gen_spec->flags = spec->flags;
+	gen_spec->dmaq_id = spec->dmaq_id;
+
+	switch (spec->type) {
+	case EF4_FARCH_FILTER_TCP_FULL:
+	case EF4_FARCH_FILTER_UDP_FULL:
+		is_full = true;
+		/* fall through */
+	case EF4_FARCH_FILTER_TCP_WILD:
+	case EF4_FARCH_FILTER_UDP_WILD: {
+		__be32 host1, host2;
+		__be16 port1, port2;
+
+		gen_spec->match_flags =
+			EF4_FILTER_MATCH_ETHER_TYPE |
+			EF4_FILTER_MATCH_IP_PROTO |
+			EF4_FILTER_MATCH_LOC_HOST | EF4_FILTER_MATCH_LOC_PORT;
+		if (is_full)
+			gen_spec->match_flags |= (EF4_FILTER_MATCH_REM_HOST |
+						  EF4_FILTER_MATCH_REM_PORT);
+		gen_spec->ether_type = htons(ETH_P_IP);
+		gen_spec->ip_proto =
+			(spec->type == EF4_FARCH_FILTER_TCP_FULL ||
+			 spec->type == EF4_FARCH_FILTER_TCP_WILD) ?
+			IPPROTO_TCP : IPPROTO_UDP;
+
+		host1 = htonl(spec->data[0] >> 16 | spec->data[1] << 16);
+		port1 = htons(spec->data[0]);
+		host2 = htonl(spec->data[2]);
+		port2 = htons(spec->data[1] >> 16);
+		if (spec->flags & EF4_FILTER_FLAG_TX) {
+			gen_spec->loc_host[0] = host1;
+			gen_spec->rem_host[0] = host2;
+		} else {
+			gen_spec->loc_host[0] = host2;
+			gen_spec->rem_host[0] = host1;
+		}
+		if (!!(gen_spec->flags & EF4_FILTER_FLAG_TX) ^
+		    (!is_full && gen_spec->ip_proto == IPPROTO_UDP)) {
+			gen_spec->loc_port = port1;
+			gen_spec->rem_port = port2;
+		} else {
+			gen_spec->loc_port = port2;
+			gen_spec->rem_port = port1;
+		}
+
+		break;
+	}
+
+	case EF4_FARCH_FILTER_MAC_FULL:
+		is_full = true;
+		/* fall through */
+	case EF4_FARCH_FILTER_MAC_WILD:
+		gen_spec->match_flags = EF4_FILTER_MATCH_LOC_MAC;
+		if (is_full)
+			gen_spec->match_flags |= EF4_FILTER_MATCH_OUTER_VID;
+		gen_spec->loc_mac[0] = spec->data[2] >> 8;
+		gen_spec->loc_mac[1] = spec->data[2];
+		gen_spec->loc_mac[2] = spec->data[1] >> 24;
+		gen_spec->loc_mac[3] = spec->data[1] >> 16;
+		gen_spec->loc_mac[4] = spec->data[1] >> 8;
+		gen_spec->loc_mac[5] = spec->data[1];
+		gen_spec->outer_vid = htons(spec->data[0]);
+		break;
+
+	case EF4_FARCH_FILTER_UC_DEF:
+	case EF4_FARCH_FILTER_MC_DEF:
+		gen_spec->match_flags = EF4_FILTER_MATCH_LOC_MAC_IG;
+		gen_spec->loc_mac[0] = spec->type == EF4_FARCH_FILTER_MC_DEF;
+		break;
+
+	default:
+		WARN_ON(1);
+		break;
+	}
+}
+
+static void
+ef4_farch_filter_init_rx_auto(struct ef4_nic *efx,
+			      struct ef4_farch_filter_spec *spec)
+{
+	/* If there's only one channel then disable RSS for non VF
+	 * traffic, thereby allowing VFs to use RSS when the PF can't.
+	 */
+	spec->priority = EF4_FILTER_PRI_AUTO;
+	spec->flags = (EF4_FILTER_FLAG_RX |
+		       (ef4_rss_enabled(efx) ? EF4_FILTER_FLAG_RX_RSS : 0) |
+		       (efx->rx_scatter ? EF4_FILTER_FLAG_RX_SCATTER : 0));
+	spec->dmaq_id = 0;
+}
+
+/* Build a filter entry and return its n-tuple key. */
+static u32 ef4_farch_filter_build(ef4_oword_t *filter,
+				  struct ef4_farch_filter_spec *spec)
+{
+	u32 data3;
+
+	switch (ef4_farch_filter_spec_table_id(spec)) {
+	case EF4_FARCH_FILTER_TABLE_RX_IP: {
+		bool is_udp = (spec->type == EF4_FARCH_FILTER_UDP_FULL ||
+			       spec->type == EF4_FARCH_FILTER_UDP_WILD);
+		EF4_POPULATE_OWORD_7(
+			*filter,
+			FRF_BZ_RSS_EN,
+			!!(spec->flags & EF4_FILTER_FLAG_RX_RSS),
+			FRF_BZ_SCATTER_EN,
+			!!(spec->flags & EF4_FILTER_FLAG_RX_SCATTER),
+			FRF_BZ_TCP_UDP, is_udp,
+			FRF_BZ_RXQ_ID, spec->dmaq_id,
+			EF4_DWORD_2, spec->data[2],
+			EF4_DWORD_1, spec->data[1],
+			EF4_DWORD_0, spec->data[0]);
+		data3 = is_udp;
+		break;
+	}
+
+	case EF4_FARCH_FILTER_TABLE_RX_MAC: {
+		bool is_wild = spec->type == EF4_FARCH_FILTER_MAC_WILD;
+		EF4_POPULATE_OWORD_7(
+			*filter,
+			FRF_CZ_RMFT_RSS_EN,
+			!!(spec->flags & EF4_FILTER_FLAG_RX_RSS),
+			FRF_CZ_RMFT_SCATTER_EN,
+			!!(spec->flags & EF4_FILTER_FLAG_RX_SCATTER),
+			FRF_CZ_RMFT_RXQ_ID, spec->dmaq_id,
+			FRF_CZ_RMFT_WILDCARD_MATCH, is_wild,
+			FRF_CZ_RMFT_DEST_MAC_HI, spec->data[2],
+			FRF_CZ_RMFT_DEST_MAC_LO, spec->data[1],
+			FRF_CZ_RMFT_VLAN_ID, spec->data[0]);
+		data3 = is_wild;
+		break;
+	}
+
+	case EF4_FARCH_FILTER_TABLE_TX_MAC: {
+		bool is_wild = spec->type == EF4_FARCH_FILTER_MAC_WILD;
+		EF4_POPULATE_OWORD_5(*filter,
+				     FRF_CZ_TMFT_TXQ_ID, spec->dmaq_id,
+				     FRF_CZ_TMFT_WILDCARD_MATCH, is_wild,
+				     FRF_CZ_TMFT_SRC_MAC_HI, spec->data[2],
+				     FRF_CZ_TMFT_SRC_MAC_LO, spec->data[1],
+				     FRF_CZ_TMFT_VLAN_ID, spec->data[0]);
+		data3 = is_wild | spec->dmaq_id << 1;
+		break;
+	}
+
+	default:
+		BUG();
+	}
+
+	return spec->data[0] ^ spec->data[1] ^ spec->data[2] ^ data3;
+}
+
+static bool ef4_farch_filter_equal(const struct ef4_farch_filter_spec *left,
+				   const struct ef4_farch_filter_spec *right)
+{
+	if (left->type != right->type ||
+	    memcmp(left->data, right->data, sizeof(left->data)))
+		return false;
+
+	if (left->flags & EF4_FILTER_FLAG_TX &&
+	    left->dmaq_id != right->dmaq_id)
+		return false;
+
+	return true;
+}
+
+/*
+ * Construct/deconstruct external filter IDs.  At least the RX filter
+ * IDs must be ordered by matching priority, for RX NFC semantics.
+ *
+ * Deconstruction needs to be robust against invalid IDs so that
+ * ef4_filter_remove_id_safe() and ef4_filter_get_filter_safe() can
+ * accept user-provided IDs.
+ */
+
+#define EF4_FARCH_FILTER_MATCH_PRI_COUNT	5
+
+static const u8 ef4_farch_filter_type_match_pri[EF4_FARCH_FILTER_TYPE_COUNT] = {
+	[EF4_FARCH_FILTER_TCP_FULL]	= 0,
+	[EF4_FARCH_FILTER_UDP_FULL]	= 0,
+	[EF4_FARCH_FILTER_TCP_WILD]	= 1,
+	[EF4_FARCH_FILTER_UDP_WILD]	= 1,
+	[EF4_FARCH_FILTER_MAC_FULL]	= 2,
+	[EF4_FARCH_FILTER_MAC_WILD]	= 3,
+	[EF4_FARCH_FILTER_UC_DEF]	= 4,
+	[EF4_FARCH_FILTER_MC_DEF]	= 4,
+};
+
+static const enum ef4_farch_filter_table_id ef4_farch_filter_range_table[] = {
+	EF4_FARCH_FILTER_TABLE_RX_IP,	/* RX match pri 0 */
+	EF4_FARCH_FILTER_TABLE_RX_IP,
+	EF4_FARCH_FILTER_TABLE_RX_MAC,
+	EF4_FARCH_FILTER_TABLE_RX_MAC,
+	EF4_FARCH_FILTER_TABLE_RX_DEF,	/* RX match pri 4 */
+	EF4_FARCH_FILTER_TABLE_TX_MAC,	/* TX match pri 0 */
+	EF4_FARCH_FILTER_TABLE_TX_MAC,	/* TX match pri 1 */
+};
+
+#define EF4_FARCH_FILTER_INDEX_WIDTH 13
+#define EF4_FARCH_FILTER_INDEX_MASK ((1 << EF4_FARCH_FILTER_INDEX_WIDTH) - 1)
+
+static inline u32
+ef4_farch_filter_make_id(const struct ef4_farch_filter_spec *spec,
+			 unsigned int index)
+{
+	unsigned int range;
+
+	range = ef4_farch_filter_type_match_pri[spec->type];
+	if (!(spec->flags & EF4_FILTER_FLAG_RX))
+		range += EF4_FARCH_FILTER_MATCH_PRI_COUNT;
+
+	return range << EF4_FARCH_FILTER_INDEX_WIDTH | index;
+}
+
+static inline enum ef4_farch_filter_table_id
+ef4_farch_filter_id_table_id(u32 id)
+{
+	unsigned int range = id >> EF4_FARCH_FILTER_INDEX_WIDTH;
+
+	if (range < ARRAY_SIZE(ef4_farch_filter_range_table))
+		return ef4_farch_filter_range_table[range];
+	else
+		return EF4_FARCH_FILTER_TABLE_COUNT; /* invalid */
+}
+
+static inline unsigned int ef4_farch_filter_id_index(u32 id)
+{
+	return id & EF4_FARCH_FILTER_INDEX_MASK;
+}
+
+u32 ef4_farch_filter_get_rx_id_limit(struct ef4_nic *efx)
+{
+	struct ef4_farch_filter_state *state = efx->filter_state;
+	unsigned int range = EF4_FARCH_FILTER_MATCH_PRI_COUNT - 1;
+	enum ef4_farch_filter_table_id table_id;
+
+	do {
+		table_id = ef4_farch_filter_range_table[range];
+		if (state->table[table_id].size != 0)
+			return range << EF4_FARCH_FILTER_INDEX_WIDTH |
+				state->table[table_id].size;
+	} while (range--);
+
+	return 0;
+}
+
+s32 ef4_farch_filter_insert(struct ef4_nic *efx,
+			    struct ef4_filter_spec *gen_spec,
+			    bool replace_equal)
+{
+	struct ef4_farch_filter_state *state = efx->filter_state;
+	struct ef4_farch_filter_table *table;
+	struct ef4_farch_filter_spec spec;
+	ef4_oword_t filter;
+	int rep_index, ins_index;
+	unsigned int depth = 0;
+	int rc;
+
+	rc = ef4_farch_filter_from_gen_spec(&spec, gen_spec);
+	if (rc)
+		return rc;
+
+	table = &state->table[ef4_farch_filter_spec_table_id(&spec)];
+	if (table->size == 0)
+		return -EINVAL;
+
+	netif_vdbg(efx, hw, efx->net_dev,
+		   "%s: type %d search_limit=%d", __func__, spec.type,
+		   table->search_limit[spec.type]);
+
+	if (table->id == EF4_FARCH_FILTER_TABLE_RX_DEF) {
+		/* One filter spec per type */
+		BUILD_BUG_ON(EF4_FARCH_FILTER_INDEX_UC_DEF != 0);
+		BUILD_BUG_ON(EF4_FARCH_FILTER_INDEX_MC_DEF !=
+			     EF4_FARCH_FILTER_MC_DEF - EF4_FARCH_FILTER_UC_DEF);
+		rep_index = spec.type - EF4_FARCH_FILTER_UC_DEF;
+		ins_index = rep_index;
+
+		spin_lock_bh(&efx->filter_lock);
+	} else {
+		/* Search concurrently for
+		 * (1) a filter to be replaced (rep_index): any filter
+		 *     with the same match values, up to the current
+		 *     search depth for this type, and
+		 * (2) the insertion point (ins_index): (1) or any
+		 *     free slot before it or up to the maximum search
+		 *     depth for this priority
+		 * We fail if we cannot find (2).
+		 *
+		 * We can stop once either
+		 * (a) we find (1), in which case we have definitely
+		 *     found (2) as well; or
+		 * (b) we have searched exhaustively for (1), and have
+		 *     either found (2) or searched exhaustively for it
+		 */
+		u32 key = ef4_farch_filter_build(&filter, &spec);
+		unsigned int hash = ef4_farch_filter_hash(key);
+		unsigned int incr = ef4_farch_filter_increment(key);
+		unsigned int max_rep_depth = table->search_limit[spec.type];
+		unsigned int max_ins_depth =
+			spec.priority <= EF4_FILTER_PRI_HINT ?
+			EF4_FARCH_FILTER_CTL_SRCH_HINT_MAX :
+			EF4_FARCH_FILTER_CTL_SRCH_MAX;
+		unsigned int i = hash & (table->size - 1);
+
+		ins_index = -1;
+		depth = 1;
+
+		spin_lock_bh(&efx->filter_lock);
+
+		for (;;) {
+			if (!test_bit(i, table->used_bitmap)) {
+				if (ins_index < 0)
+					ins_index = i;
+			} else if (ef4_farch_filter_equal(&spec,
+							  &table->spec[i])) {
+				/* Case (a) */
+				if (ins_index < 0)
+					ins_index = i;
+				rep_index = i;
+				break;
+			}
+
+			if (depth >= max_rep_depth &&
+			    (ins_index >= 0 || depth >= max_ins_depth)) {
+				/* Case (b) */
+				if (ins_index < 0) {
+					rc = -EBUSY;
+					goto out;
+				}
+				rep_index = -1;
+				break;
+			}
+
+			i = (i + incr) & (table->size - 1);
+			++depth;
+		}
+	}
+
+	/* If we found a filter to be replaced, check whether we
+	 * should do so
+	 */
+	if (rep_index >= 0) {
+		struct ef4_farch_filter_spec *saved_spec =
+			&table->spec[rep_index];
+
+		if (spec.priority == saved_spec->priority && !replace_equal) {
+			rc = -EEXIST;
+			goto out;
+		}
+		if (spec.priority < saved_spec->priority) {
+			rc = -EPERM;
+			goto out;
+		}
+		if (saved_spec->priority == EF4_FILTER_PRI_AUTO ||
+		    saved_spec->flags & EF4_FILTER_FLAG_RX_OVER_AUTO)
+			spec.flags |= EF4_FILTER_FLAG_RX_OVER_AUTO;
+	}
+
+	/* Insert the filter */
+	if (ins_index != rep_index) {
+		__set_bit(ins_index, table->used_bitmap);
+		++table->used;
+	}
+	table->spec[ins_index] = spec;
+
+	if (table->id == EF4_FARCH_FILTER_TABLE_RX_DEF) {
+		ef4_farch_filter_push_rx_config(efx);
+	} else {
+		if (table->search_limit[spec.type] < depth) {
+			table->search_limit[spec.type] = depth;
+			if (spec.flags & EF4_FILTER_FLAG_TX)
+				ef4_farch_filter_push_tx_limits(efx);
+			else
+				ef4_farch_filter_push_rx_config(efx);
+		}
+
+		ef4_writeo(efx, &filter,
+			   table->offset + table->step * ins_index);
+
+		/* If we were able to replace a filter by inserting
+		 * at a lower depth, clear the replaced filter
+		 */
+		if (ins_index != rep_index && rep_index >= 0)
+			ef4_farch_filter_table_clear_entry(efx, table,
+							   rep_index);
+	}
+
+	netif_vdbg(efx, hw, efx->net_dev,
+		   "%s: filter type %d index %d rxq %u set",
+		   __func__, spec.type, ins_index, spec.dmaq_id);
+	rc = ef4_farch_filter_make_id(&spec, ins_index);
+
+out:
+	spin_unlock_bh(&efx->filter_lock);
+	return rc;
+}
+
+static void
+ef4_farch_filter_table_clear_entry(struct ef4_nic *efx,
+				   struct ef4_farch_filter_table *table,
+				   unsigned int filter_idx)
+{
+	static ef4_oword_t filter;
+
+	EF4_WARN_ON_PARANOID(!test_bit(filter_idx, table->used_bitmap));
+	BUG_ON(table->offset == 0); /* can't clear MAC default filters */
+
+	__clear_bit(filter_idx, table->used_bitmap);
+	--table->used;
+	memset(&table->spec[filter_idx], 0, sizeof(table->spec[0]));
+
+	ef4_writeo(efx, &filter, table->offset + table->step * filter_idx);
+
+	/* If this filter required a greater search depth than
+	 * any other, the search limit for its type can now be
+	 * decreased.  However, it is hard to determine that
+	 * unless the table has become completely empty - in
+	 * which case, all its search limits can be set to 0.
+	 */
+	if (unlikely(table->used == 0)) {
+		memset(table->search_limit, 0, sizeof(table->search_limit));
+		if (table->id == EF4_FARCH_FILTER_TABLE_TX_MAC)
+			ef4_farch_filter_push_tx_limits(efx);
+		else
+			ef4_farch_filter_push_rx_config(efx);
+	}
+}
+
+static int ef4_farch_filter_remove(struct ef4_nic *efx,
+				   struct ef4_farch_filter_table *table,
+				   unsigned int filter_idx,
+				   enum ef4_filter_priority priority)
+{
+	struct ef4_farch_filter_spec *spec = &table->spec[filter_idx];
+
+	if (!test_bit(filter_idx, table->used_bitmap) ||
+	    spec->priority != priority)
+		return -ENOENT;
+
+	if (spec->flags & EF4_FILTER_FLAG_RX_OVER_AUTO) {
+		ef4_farch_filter_init_rx_auto(efx, spec);
+		ef4_farch_filter_push_rx_config(efx);
+	} else {
+		ef4_farch_filter_table_clear_entry(efx, table, filter_idx);
+	}
+
+	return 0;
+}
+
+int ef4_farch_filter_remove_safe(struct ef4_nic *efx,
+				 enum ef4_filter_priority priority,
+				 u32 filter_id)
+{
+	struct ef4_farch_filter_state *state = efx->filter_state;
+	enum ef4_farch_filter_table_id table_id;
+	struct ef4_farch_filter_table *table;
+	unsigned int filter_idx;
+	struct ef4_farch_filter_spec *spec;
+	int rc;
+
+	table_id = ef4_farch_filter_id_table_id(filter_id);
+	if ((unsigned int)table_id >= EF4_FARCH_FILTER_TABLE_COUNT)
+		return -ENOENT;
+	table = &state->table[table_id];
+
+	filter_idx = ef4_farch_filter_id_index(filter_id);
+	if (filter_idx >= table->size)
+		return -ENOENT;
+	spec = &table->spec[filter_idx];
+
+	spin_lock_bh(&efx->filter_lock);
+	rc = ef4_farch_filter_remove(efx, table, filter_idx, priority);
+	spin_unlock_bh(&efx->filter_lock);
+
+	return rc;
+}
+
+int ef4_farch_filter_get_safe(struct ef4_nic *efx,
+			      enum ef4_filter_priority priority,
+			      u32 filter_id, struct ef4_filter_spec *spec_buf)
+{
+	struct ef4_farch_filter_state *state = efx->filter_state;
+	enum ef4_farch_filter_table_id table_id;
+	struct ef4_farch_filter_table *table;
+	struct ef4_farch_filter_spec *spec;
+	unsigned int filter_idx;
+	int rc;
+
+	table_id = ef4_farch_filter_id_table_id(filter_id);
+	if ((unsigned int)table_id >= EF4_FARCH_FILTER_TABLE_COUNT)
+		return -ENOENT;
+	table = &state->table[table_id];
+
+	filter_idx = ef4_farch_filter_id_index(filter_id);
+	if (filter_idx >= table->size)
+		return -ENOENT;
+	spec = &table->spec[filter_idx];
+
+	spin_lock_bh(&efx->filter_lock);
+
+	if (test_bit(filter_idx, table->used_bitmap) &&
+	    spec->priority == priority) {
+		ef4_farch_filter_to_gen_spec(spec_buf, spec);
+		rc = 0;
+	} else {
+		rc = -ENOENT;
+	}
+
+	spin_unlock_bh(&efx->filter_lock);
+
+	return rc;
+}
+
+static void
+ef4_farch_filter_table_clear(struct ef4_nic *efx,
+			     enum ef4_farch_filter_table_id table_id,
+			     enum ef4_filter_priority priority)
+{
+	struct ef4_farch_filter_state *state = efx->filter_state;
+	struct ef4_farch_filter_table *table = &state->table[table_id];
+	unsigned int filter_idx;
+
+	spin_lock_bh(&efx->filter_lock);
+	for (filter_idx = 0; filter_idx < table->size; ++filter_idx) {
+		if (table->spec[filter_idx].priority != EF4_FILTER_PRI_AUTO)
+			ef4_farch_filter_remove(efx, table,
+						filter_idx, priority);
+	}
+	spin_unlock_bh(&efx->filter_lock);
+}
+
+int ef4_farch_filter_clear_rx(struct ef4_nic *efx,
+			       enum ef4_filter_priority priority)
+{
+	ef4_farch_filter_table_clear(efx, EF4_FARCH_FILTER_TABLE_RX_IP,
+				     priority);
+	ef4_farch_filter_table_clear(efx, EF4_FARCH_FILTER_TABLE_RX_MAC,
+				     priority);
+	ef4_farch_filter_table_clear(efx, EF4_FARCH_FILTER_TABLE_RX_DEF,
+				     priority);
+	return 0;
+}
+
+u32 ef4_farch_filter_count_rx_used(struct ef4_nic *efx,
+				   enum ef4_filter_priority priority)
+{
+	struct ef4_farch_filter_state *state = efx->filter_state;
+	enum ef4_farch_filter_table_id table_id;
+	struct ef4_farch_filter_table *table;
+	unsigned int filter_idx;
+	u32 count = 0;
+
+	spin_lock_bh(&efx->filter_lock);
+
+	for (table_id = EF4_FARCH_FILTER_TABLE_RX_IP;
+	     table_id <= EF4_FARCH_FILTER_TABLE_RX_DEF;
+	     table_id++) {
+		table = &state->table[table_id];
+		for (filter_idx = 0; filter_idx < table->size; filter_idx++) {
+			if (test_bit(filter_idx, table->used_bitmap) &&
+			    table->spec[filter_idx].priority == priority)
+				++count;
+		}
+	}
+
+	spin_unlock_bh(&efx->filter_lock);
+
+	return count;
+}
+
+s32 ef4_farch_filter_get_rx_ids(struct ef4_nic *efx,
+				enum ef4_filter_priority priority,
+				u32 *buf, u32 size)
+{
+	struct ef4_farch_filter_state *state = efx->filter_state;
+	enum ef4_farch_filter_table_id table_id;
+	struct ef4_farch_filter_table *table;
+	unsigned int filter_idx;
+	s32 count = 0;
+
+	spin_lock_bh(&efx->filter_lock);
+
+	for (table_id = EF4_FARCH_FILTER_TABLE_RX_IP;
+	     table_id <= EF4_FARCH_FILTER_TABLE_RX_DEF;
+	     table_id++) {
+		table = &state->table[table_id];
+		for (filter_idx = 0; filter_idx < table->size; filter_idx++) {
+			if (test_bit(filter_idx, table->used_bitmap) &&
+			    table->spec[filter_idx].priority == priority) {
+				if (count == size) {
+					count = -EMSGSIZE;
+					goto out;
+				}
+				buf[count++] = ef4_farch_filter_make_id(
+					&table->spec[filter_idx], filter_idx);
+			}
+		}
+	}
+out:
+	spin_unlock_bh(&efx->filter_lock);
+
+	return count;
+}
+
+/* Restore filter stater after reset */
+void ef4_farch_filter_table_restore(struct ef4_nic *efx)
+{
+	struct ef4_farch_filter_state *state = efx->filter_state;
+	enum ef4_farch_filter_table_id table_id;
+	struct ef4_farch_filter_table *table;
+	ef4_oword_t filter;
+	unsigned int filter_idx;
+
+	spin_lock_bh(&efx->filter_lock);
+
+	for (table_id = 0; table_id < EF4_FARCH_FILTER_TABLE_COUNT; table_id++) {
+		table = &state->table[table_id];
+
+		/* Check whether this is a regular register table */
+		if (table->step == 0)
+			continue;
+
+		for (filter_idx = 0; filter_idx < table->size; filter_idx++) {
+			if (!test_bit(filter_idx, table->used_bitmap))
+				continue;
+			ef4_farch_filter_build(&filter, &table->spec[filter_idx]);
+			ef4_writeo(efx, &filter,
+				   table->offset + table->step * filter_idx);
+		}
+	}
+
+	ef4_farch_filter_push_rx_config(efx);
+	ef4_farch_filter_push_tx_limits(efx);
+
+	spin_unlock_bh(&efx->filter_lock);
+}
+
+void ef4_farch_filter_table_remove(struct ef4_nic *efx)
+{
+	struct ef4_farch_filter_state *state = efx->filter_state;
+	enum ef4_farch_filter_table_id table_id;
+
+	for (table_id = 0; table_id < EF4_FARCH_FILTER_TABLE_COUNT; table_id++) {
+		kfree(state->table[table_id].used_bitmap);
+		vfree(state->table[table_id].spec);
+	}
+	kfree(state);
+}
+
+int ef4_farch_filter_table_probe(struct ef4_nic *efx)
+{
+	struct ef4_farch_filter_state *state;
+	struct ef4_farch_filter_table *table;
+	unsigned table_id;
+
+	state = kzalloc(sizeof(struct ef4_farch_filter_state), GFP_KERNEL);
+	if (!state)
+		return -ENOMEM;
+	efx->filter_state = state;
+
+	if (ef4_nic_rev(efx) >= EF4_REV_FALCON_B0) {
+		table = &state->table[EF4_FARCH_FILTER_TABLE_RX_IP];
+		table->id = EF4_FARCH_FILTER_TABLE_RX_IP;
+		table->offset = FR_BZ_RX_FILTER_TBL0;
+		table->size = FR_BZ_RX_FILTER_TBL0_ROWS;
+		table->step = FR_BZ_RX_FILTER_TBL0_STEP;
+	}
+
+	for (table_id = 0; table_id < EF4_FARCH_FILTER_TABLE_COUNT; table_id++) {
+		table = &state->table[table_id];
+		if (table->size == 0)
+			continue;
+		table->used_bitmap = kcalloc(BITS_TO_LONGS(table->size),
+					     sizeof(unsigned long),
+					     GFP_KERNEL);
+		if (!table->used_bitmap)
+			goto fail;
+		table->spec = vzalloc(table->size * sizeof(*table->spec));
+		if (!table->spec)
+			goto fail;
+	}
+
+	table = &state->table[EF4_FARCH_FILTER_TABLE_RX_DEF];
+	if (table->size) {
+		/* RX default filters must always exist */
+		struct ef4_farch_filter_spec *spec;
+		unsigned i;
+
+		for (i = 0; i < EF4_FARCH_FILTER_SIZE_RX_DEF; i++) {
+			spec = &table->spec[i];
+			spec->type = EF4_FARCH_FILTER_UC_DEF + i;
+			ef4_farch_filter_init_rx_auto(efx, spec);
+			__set_bit(i, table->used_bitmap);
+		}
+	}
+
+	ef4_farch_filter_push_rx_config(efx);
+
+	return 0;
+
+fail:
+	ef4_farch_filter_table_remove(efx);
+	return -ENOMEM;
+}
+
+/* Update scatter enable flags for filters pointing to our own RX queues */
+void ef4_farch_filter_update_rx_scatter(struct ef4_nic *efx)
+{
+	struct ef4_farch_filter_state *state = efx->filter_state;
+	enum ef4_farch_filter_table_id table_id;
+	struct ef4_farch_filter_table *table;
+	ef4_oword_t filter;
+	unsigned int filter_idx;
+
+	spin_lock_bh(&efx->filter_lock);
+
+	for (table_id = EF4_FARCH_FILTER_TABLE_RX_IP;
+	     table_id <= EF4_FARCH_FILTER_TABLE_RX_DEF;
+	     table_id++) {
+		table = &state->table[table_id];
+
+		for (filter_idx = 0; filter_idx < table->size; filter_idx++) {
+			if (!test_bit(filter_idx, table->used_bitmap) ||
+			    table->spec[filter_idx].dmaq_id >=
+			    efx->n_rx_channels)
+				continue;
+
+			if (efx->rx_scatter)
+				table->spec[filter_idx].flags |=
+					EF4_FILTER_FLAG_RX_SCATTER;
+			else
+				table->spec[filter_idx].flags &=
+					~EF4_FILTER_FLAG_RX_SCATTER;
+
+			if (table_id == EF4_FARCH_FILTER_TABLE_RX_DEF)
+				/* Pushed by ef4_farch_filter_push_rx_config() */
+				continue;
+
+			ef4_farch_filter_build(&filter, &table->spec[filter_idx]);
+			ef4_writeo(efx, &filter,
+				   table->offset + table->step * filter_idx);
+		}
+	}
+
+	ef4_farch_filter_push_rx_config(efx);
+
+	spin_unlock_bh(&efx->filter_lock);
+}
+
+#ifdef CONFIG_RFS_ACCEL
+
+s32 ef4_farch_filter_rfs_insert(struct ef4_nic *efx,
+				struct ef4_filter_spec *gen_spec)
+{
+	return ef4_farch_filter_insert(efx, gen_spec, true);
+}
+
+bool ef4_farch_filter_rfs_expire_one(struct ef4_nic *efx, u32 flow_id,
+				     unsigned int index)
+{
+	struct ef4_farch_filter_state *state = efx->filter_state;
+	struct ef4_farch_filter_table *table =
+		&state->table[EF4_FARCH_FILTER_TABLE_RX_IP];
+
+	if (test_bit(index, table->used_bitmap) &&
+	    table->spec[index].priority == EF4_FILTER_PRI_HINT &&
+	    rps_may_expire_flow(efx->net_dev, table->spec[index].dmaq_id,
+				flow_id, index)) {
+		ef4_farch_filter_table_clear_entry(efx, table, index);
+		return true;
+	}
+
+	return false;
+}
+
+#endif /* CONFIG_RFS_ACCEL */
+
+void ef4_farch_filter_sync_rx_mode(struct ef4_nic *efx)
+{
+	struct net_device *net_dev = efx->net_dev;
+	struct netdev_hw_addr *ha;
+	union ef4_multicast_hash *mc_hash = &efx->multicast_hash;
+	u32 crc;
+	int bit;
+
+	if (!ef4_dev_registered(efx))
+		return;
+
+	netif_addr_lock_bh(net_dev);
+
+	efx->unicast_filter = !(net_dev->flags & IFF_PROMISC);
+
+	/* Build multicast hash table */
+	if (net_dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) {
+		memset(mc_hash, 0xff, sizeof(*mc_hash));
+	} else {
+		memset(mc_hash, 0x00, sizeof(*mc_hash));
+		netdev_for_each_mc_addr(ha, net_dev) {
+			crc = ether_crc_le(ETH_ALEN, ha->addr);
+			bit = crc & (EF4_MCAST_HASH_ENTRIES - 1);
+			__set_bit_le(bit, mc_hash);
+		}
+
+		/* Broadcast packets go through the multicast hash filter.
+		 * ether_crc_le() of the broadcast address is 0xbe2612ff
+		 * so we always add bit 0xff to the mask.
+		 */
+		__set_bit_le(0xff, mc_hash);
+	}
+
+	netif_addr_unlock_bh(net_dev);
+}
diff --git a/drivers/net/ethernet/sfc/falcon/farch_regs.h b/drivers/net/ethernet/sfc/falcon/farch_regs.h
new file mode 100644
index 000000000000..8095f273d574
--- /dev/null
+++ b/drivers/net/ethernet/sfc/falcon/farch_regs.h
@@ -0,0 +1,2932 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2012 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EF4_FARCH_REGS_H
+#define EF4_FARCH_REGS_H
+
+/*
+ * Falcon hardware architecture definitions have a name prefix following
+ * the format:
+ *
+ *     F<type>_<min-rev><max-rev>_
+ *
+ * The following <type> strings are used:
+ *
+ *             MMIO register  MC register  Host memory structure
+ * -------------------------------------------------------------
+ * Address     R              MCR
+ * Bitfield    RF             MCRF         SF
+ * Enumerator  FE             MCFE         SE
+ *
+ * <min-rev> is the first revision to which the definition applies:
+ *
+ *     A: Falcon A1 (SFC4000AB)
+ *     B: Falcon B0 (SFC4000BA)
+ *     C: Siena A0 (SFL9021AA)
+ *
+ * If the definition has been changed or removed in later revisions
+ * then <max-rev> is the last revision to which the definition applies;
+ * otherwise it is "Z".
+ */
+
+/**************************************************************************
+ *
+ * Falcon/Siena registers and descriptors
+ *
+ **************************************************************************
+ */
+
+/* ADR_REGION_REG: Address region register */
+#define	FR_AZ_ADR_REGION 0x00000000
+#define	FRF_AZ_ADR_REGION3_LBN 96
+#define	FRF_AZ_ADR_REGION3_WIDTH 18
+#define	FRF_AZ_ADR_REGION2_LBN 64
+#define	FRF_AZ_ADR_REGION2_WIDTH 18
+#define	FRF_AZ_ADR_REGION1_LBN 32
+#define	FRF_AZ_ADR_REGION1_WIDTH 18
+#define	FRF_AZ_ADR_REGION0_LBN 0
+#define	FRF_AZ_ADR_REGION0_WIDTH 18
+
+/* INT_EN_REG_KER: Kernel driver Interrupt enable register */
+#define	FR_AZ_INT_EN_KER 0x00000010
+#define	FRF_AZ_KER_INT_LEVE_SEL_LBN 8
+#define	FRF_AZ_KER_INT_LEVE_SEL_WIDTH 6
+#define	FRF_AZ_KER_INT_CHAR_LBN 4
+#define	FRF_AZ_KER_INT_CHAR_WIDTH 1
+#define	FRF_AZ_KER_INT_KER_LBN 3
+#define	FRF_AZ_KER_INT_KER_WIDTH 1
+#define	FRF_AZ_DRV_INT_EN_KER_LBN 0
+#define	FRF_AZ_DRV_INT_EN_KER_WIDTH 1
+
+/* INT_EN_REG_CHAR: Char Driver interrupt enable register */
+#define	FR_BZ_INT_EN_CHAR 0x00000020
+#define	FRF_BZ_CHAR_INT_LEVE_SEL_LBN 8
+#define	FRF_BZ_CHAR_INT_LEVE_SEL_WIDTH 6
+#define	FRF_BZ_CHAR_INT_CHAR_LBN 4
+#define	FRF_BZ_CHAR_INT_CHAR_WIDTH 1
+#define	FRF_BZ_CHAR_INT_KER_LBN 3
+#define	FRF_BZ_CHAR_INT_KER_WIDTH 1
+#define	FRF_BZ_DRV_INT_EN_CHAR_LBN 0
+#define	FRF_BZ_DRV_INT_EN_CHAR_WIDTH 1
+
+/* INT_ADR_REG_KER: Interrupt host address for Kernel driver */
+#define	FR_AZ_INT_ADR_KER 0x00000030
+#define	FRF_AZ_NORM_INT_VEC_DIS_KER_LBN 64
+#define	FRF_AZ_NORM_INT_VEC_DIS_KER_WIDTH 1
+#define	FRF_AZ_INT_ADR_KER_LBN 0
+#define	FRF_AZ_INT_ADR_KER_WIDTH 64
+
+/* INT_ADR_REG_CHAR: Interrupt host address for Char driver */
+#define	FR_BZ_INT_ADR_CHAR 0x00000040
+#define	FRF_BZ_NORM_INT_VEC_DIS_CHAR_LBN 64
+#define	FRF_BZ_NORM_INT_VEC_DIS_CHAR_WIDTH 1
+#define	FRF_BZ_INT_ADR_CHAR_LBN 0
+#define	FRF_BZ_INT_ADR_CHAR_WIDTH 64
+
+/* INT_ACK_KER: Kernel interrupt acknowledge register */
+#define	FR_AA_INT_ACK_KER 0x00000050
+#define	FRF_AA_INT_ACK_KER_FIELD_LBN 0
+#define	FRF_AA_INT_ACK_KER_FIELD_WIDTH 32
+
+/* INT_ISR0_REG: Function 0 Interrupt Acknowledge Status register */
+#define	FR_BZ_INT_ISR0 0x00000090
+#define	FRF_BZ_INT_ISR_REG_LBN 0
+#define	FRF_BZ_INT_ISR_REG_WIDTH 64
+
+/* HW_INIT_REG: Hardware initialization register */
+#define	FR_AZ_HW_INIT 0x000000c0
+#define	FRF_BB_BDMRD_CPLF_FULL_LBN 124
+#define	FRF_BB_BDMRD_CPLF_FULL_WIDTH 1
+#define	FRF_BB_PCIE_CPL_TIMEOUT_CTRL_LBN 121
+#define	FRF_BB_PCIE_CPL_TIMEOUT_CTRL_WIDTH 3
+#define	FRF_CZ_TX_MRG_TAGS_LBN 120
+#define	FRF_CZ_TX_MRG_TAGS_WIDTH 1
+#define	FRF_AB_TRGT_MASK_ALL_LBN 100
+#define	FRF_AB_TRGT_MASK_ALL_WIDTH 1
+#define	FRF_AZ_DOORBELL_DROP_LBN 92
+#define	FRF_AZ_DOORBELL_DROP_WIDTH 8
+#define	FRF_AB_TX_RREQ_MASK_EN_LBN 76
+#define	FRF_AB_TX_RREQ_MASK_EN_WIDTH 1
+#define	FRF_AB_PE_EIDLE_DIS_LBN 75
+#define	FRF_AB_PE_EIDLE_DIS_WIDTH 1
+#define	FRF_AA_FC_BLOCKING_EN_LBN 45
+#define	FRF_AA_FC_BLOCKING_EN_WIDTH 1
+#define	FRF_BZ_B2B_REQ_EN_LBN 45
+#define	FRF_BZ_B2B_REQ_EN_WIDTH 1
+#define	FRF_AA_B2B_REQ_EN_LBN 44
+#define	FRF_AA_B2B_REQ_EN_WIDTH 1
+#define	FRF_BB_FC_BLOCKING_EN_LBN 44
+#define	FRF_BB_FC_BLOCKING_EN_WIDTH 1
+#define	FRF_AZ_POST_WR_MASK_LBN 40
+#define	FRF_AZ_POST_WR_MASK_WIDTH 4
+#define	FRF_AZ_TLP_TC_LBN 34
+#define	FRF_AZ_TLP_TC_WIDTH 3
+#define	FRF_AZ_TLP_ATTR_LBN 32
+#define	FRF_AZ_TLP_ATTR_WIDTH 2
+#define	FRF_AB_INTB_VEC_LBN 24
+#define	FRF_AB_INTB_VEC_WIDTH 5
+#define	FRF_AB_INTA_VEC_LBN 16
+#define	FRF_AB_INTA_VEC_WIDTH 5
+#define	FRF_AZ_WD_TIMER_LBN 8
+#define	FRF_AZ_WD_TIMER_WIDTH 8
+#define	FRF_AZ_US_DISABLE_LBN 5
+#define	FRF_AZ_US_DISABLE_WIDTH 1
+#define	FRF_AZ_TLP_EP_LBN 4
+#define	FRF_AZ_TLP_EP_WIDTH 1
+#define	FRF_AZ_ATTR_SEL_LBN 3
+#define	FRF_AZ_ATTR_SEL_WIDTH 1
+#define	FRF_AZ_TD_SEL_LBN 1
+#define	FRF_AZ_TD_SEL_WIDTH 1
+#define	FRF_AZ_TLP_TD_LBN 0
+#define	FRF_AZ_TLP_TD_WIDTH 1
+
+/* EE_SPI_HCMD_REG: SPI host command register */
+#define	FR_AB_EE_SPI_HCMD 0x00000100
+#define	FRF_AB_EE_SPI_HCMD_CMD_EN_LBN 31
+#define	FRF_AB_EE_SPI_HCMD_CMD_EN_WIDTH 1
+#define	FRF_AB_EE_WR_TIMER_ACTIVE_LBN 28
+#define	FRF_AB_EE_WR_TIMER_ACTIVE_WIDTH 1
+#define	FRF_AB_EE_SPI_HCMD_SF_SEL_LBN 24
+#define	FRF_AB_EE_SPI_HCMD_SF_SEL_WIDTH 1
+#define	FRF_AB_EE_SPI_HCMD_DABCNT_LBN 16
+#define	FRF_AB_EE_SPI_HCMD_DABCNT_WIDTH 5
+#define	FRF_AB_EE_SPI_HCMD_READ_LBN 15
+#define	FRF_AB_EE_SPI_HCMD_READ_WIDTH 1
+#define	FRF_AB_EE_SPI_HCMD_DUBCNT_LBN 12
+#define	FRF_AB_EE_SPI_HCMD_DUBCNT_WIDTH 2
+#define	FRF_AB_EE_SPI_HCMD_ADBCNT_LBN 8
+#define	FRF_AB_EE_SPI_HCMD_ADBCNT_WIDTH 2
+#define	FRF_AB_EE_SPI_HCMD_ENC_LBN 0
+#define	FRF_AB_EE_SPI_HCMD_ENC_WIDTH 8
+
+/* USR_EV_CFG: User Level Event Configuration register */
+#define	FR_CZ_USR_EV_CFG 0x00000100
+#define	FRF_CZ_USREV_DIS_LBN 16
+#define	FRF_CZ_USREV_DIS_WIDTH 1
+#define	FRF_CZ_DFLT_EVQ_LBN 0
+#define	FRF_CZ_DFLT_EVQ_WIDTH 10
+
+/* EE_SPI_HADR_REG: SPI host address register */
+#define	FR_AB_EE_SPI_HADR 0x00000110
+#define	FRF_AB_EE_SPI_HADR_DUBYTE_LBN 24
+#define	FRF_AB_EE_SPI_HADR_DUBYTE_WIDTH 8
+#define	FRF_AB_EE_SPI_HADR_ADR_LBN 0
+#define	FRF_AB_EE_SPI_HADR_ADR_WIDTH 24
+
+/* EE_SPI_HDATA_REG: SPI host data register */
+#define	FR_AB_EE_SPI_HDATA 0x00000120
+#define	FRF_AB_EE_SPI_HDATA3_LBN 96
+#define	FRF_AB_EE_SPI_HDATA3_WIDTH 32
+#define	FRF_AB_EE_SPI_HDATA2_LBN 64
+#define	FRF_AB_EE_SPI_HDATA2_WIDTH 32
+#define	FRF_AB_EE_SPI_HDATA1_LBN 32
+#define	FRF_AB_EE_SPI_HDATA1_WIDTH 32
+#define	FRF_AB_EE_SPI_HDATA0_LBN 0
+#define	FRF_AB_EE_SPI_HDATA0_WIDTH 32
+
+/* EE_BASE_PAGE_REG: Expansion ROM base mirror register */
+#define	FR_AB_EE_BASE_PAGE 0x00000130
+#define	FRF_AB_EE_EXPROM_MASK_LBN 16
+#define	FRF_AB_EE_EXPROM_MASK_WIDTH 13
+#define	FRF_AB_EE_EXP_ROM_WINDOW_BASE_LBN 0
+#define	FRF_AB_EE_EXP_ROM_WINDOW_BASE_WIDTH 13
+
+/* EE_VPD_CFG0_REG: SPI/VPD configuration register 0 */
+#define	FR_AB_EE_VPD_CFG0 0x00000140
+#define	FRF_AB_EE_SF_FASTRD_EN_LBN 127
+#define	FRF_AB_EE_SF_FASTRD_EN_WIDTH 1
+#define	FRF_AB_EE_SF_CLOCK_DIV_LBN 120
+#define	FRF_AB_EE_SF_CLOCK_DIV_WIDTH 7
+#define	FRF_AB_EE_VPD_WIP_POLL_LBN 119
+#define	FRF_AB_EE_VPD_WIP_POLL_WIDTH 1
+#define	FRF_AB_EE_EE_CLOCK_DIV_LBN 112
+#define	FRF_AB_EE_EE_CLOCK_DIV_WIDTH 7
+#define	FRF_AB_EE_EE_WR_TMR_VALUE_LBN 96
+#define	FRF_AB_EE_EE_WR_TMR_VALUE_WIDTH 16
+#define	FRF_AB_EE_VPDW_LENGTH_LBN 80
+#define	FRF_AB_EE_VPDW_LENGTH_WIDTH 15
+#define	FRF_AB_EE_VPDW_BASE_LBN 64
+#define	FRF_AB_EE_VPDW_BASE_WIDTH 15
+#define	FRF_AB_EE_VPD_WR_CMD_EN_LBN 56
+#define	FRF_AB_EE_VPD_WR_CMD_EN_WIDTH 8
+#define	FRF_AB_EE_VPD_BASE_LBN 32
+#define	FRF_AB_EE_VPD_BASE_WIDTH 24
+#define	FRF_AB_EE_VPD_LENGTH_LBN 16
+#define	FRF_AB_EE_VPD_LENGTH_WIDTH 15
+#define	FRF_AB_EE_VPD_AD_SIZE_LBN 8
+#define	FRF_AB_EE_VPD_AD_SIZE_WIDTH 5
+#define	FRF_AB_EE_VPD_ACCESS_ON_LBN 5
+#define	FRF_AB_EE_VPD_ACCESS_ON_WIDTH 1
+#define	FRF_AB_EE_VPD_ACCESS_BLOCK_LBN 4
+#define	FRF_AB_EE_VPD_ACCESS_BLOCK_WIDTH 1
+#define	FRF_AB_EE_VPD_DEV_SF_SEL_LBN 2
+#define	FRF_AB_EE_VPD_DEV_SF_SEL_WIDTH 1
+#define	FRF_AB_EE_VPD_EN_AD9_MODE_LBN 1
+#define	FRF_AB_EE_VPD_EN_AD9_MODE_WIDTH 1
+#define	FRF_AB_EE_VPD_EN_LBN 0
+#define	FRF_AB_EE_VPD_EN_WIDTH 1
+
+/* EE_VPD_SW_CNTL_REG: VPD access SW control register */
+#define	FR_AB_EE_VPD_SW_CNTL 0x00000150
+#define	FRF_AB_EE_VPD_CYCLE_PENDING_LBN 31
+#define	FRF_AB_EE_VPD_CYCLE_PENDING_WIDTH 1
+#define	FRF_AB_EE_VPD_CYC_WRITE_LBN 28
+#define	FRF_AB_EE_VPD_CYC_WRITE_WIDTH 1
+#define	FRF_AB_EE_VPD_CYC_ADR_LBN 0
+#define	FRF_AB_EE_VPD_CYC_ADR_WIDTH 15
+
+/* EE_VPD_SW_DATA_REG: VPD access SW data register */
+#define	FR_AB_EE_VPD_SW_DATA 0x00000160
+#define	FRF_AB_EE_VPD_CYC_DAT_LBN 0
+#define	FRF_AB_EE_VPD_CYC_DAT_WIDTH 32
+
+/* PBMX_DBG_IADDR_REG: Capture Module address register */
+#define	FR_CZ_PBMX_DBG_IADDR 0x000001f0
+#define	FRF_CZ_PBMX_DBG_IADDR_LBN 0
+#define	FRF_CZ_PBMX_DBG_IADDR_WIDTH 32
+
+/* PCIE_CORE_INDIRECT_REG: Indirect Access to PCIE Core registers */
+#define	FR_BB_PCIE_CORE_INDIRECT 0x000001f0
+#define	FRF_BB_PCIE_CORE_TARGET_DATA_LBN 32
+#define	FRF_BB_PCIE_CORE_TARGET_DATA_WIDTH 32
+#define	FRF_BB_PCIE_CORE_INDIRECT_ACCESS_DIR_LBN 15
+#define	FRF_BB_PCIE_CORE_INDIRECT_ACCESS_DIR_WIDTH 1
+#define	FRF_BB_PCIE_CORE_TARGET_REG_ADRS_LBN 0
+#define	FRF_BB_PCIE_CORE_TARGET_REG_ADRS_WIDTH 12
+
+/* PBMX_DBG_IDATA_REG: Capture Module data register */
+#define	FR_CZ_PBMX_DBG_IDATA 0x000001f8
+#define	FRF_CZ_PBMX_DBG_IDATA_LBN 0
+#define	FRF_CZ_PBMX_DBG_IDATA_WIDTH 64
+
+/* NIC_STAT_REG: NIC status register */
+#define	FR_AB_NIC_STAT 0x00000200
+#define	FRF_BB_AER_DIS_LBN 34
+#define	FRF_BB_AER_DIS_WIDTH 1
+#define	FRF_BB_EE_STRAP_EN_LBN 31
+#define	FRF_BB_EE_STRAP_EN_WIDTH 1
+#define	FRF_BB_EE_STRAP_LBN 24
+#define	FRF_BB_EE_STRAP_WIDTH 4
+#define	FRF_BB_REVISION_ID_LBN 17
+#define	FRF_BB_REVISION_ID_WIDTH 7
+#define	FRF_AB_ONCHIP_SRAM_LBN 16
+#define	FRF_AB_ONCHIP_SRAM_WIDTH 1
+#define	FRF_AB_SF_PRST_LBN 9
+#define	FRF_AB_SF_PRST_WIDTH 1
+#define	FRF_AB_EE_PRST_LBN 8
+#define	FRF_AB_EE_PRST_WIDTH 1
+#define	FRF_AB_ATE_MODE_LBN 3
+#define	FRF_AB_ATE_MODE_WIDTH 1
+#define	FRF_AB_STRAP_PINS_LBN 0
+#define	FRF_AB_STRAP_PINS_WIDTH 3
+
+/* GPIO_CTL_REG: GPIO control register */
+#define	FR_AB_GPIO_CTL 0x00000210
+#define	FRF_AB_GPIO_OUT3_LBN 112
+#define	FRF_AB_GPIO_OUT3_WIDTH 16
+#define	FRF_AB_GPIO_IN3_LBN 104
+#define	FRF_AB_GPIO_IN3_WIDTH 8
+#define	FRF_AB_GPIO_PWRUP_VALUE3_LBN 96
+#define	FRF_AB_GPIO_PWRUP_VALUE3_WIDTH 8
+#define	FRF_AB_GPIO_OUT2_LBN 80
+#define	FRF_AB_GPIO_OUT2_WIDTH 16
+#define	FRF_AB_GPIO_IN2_LBN 72
+#define	FRF_AB_GPIO_IN2_WIDTH 8
+#define	FRF_AB_GPIO_PWRUP_VALUE2_LBN 64
+#define	FRF_AB_GPIO_PWRUP_VALUE2_WIDTH 8
+#define	FRF_AB_GPIO15_OEN_LBN 63
+#define	FRF_AB_GPIO15_OEN_WIDTH 1
+#define	FRF_AB_GPIO14_OEN_LBN 62
+#define	FRF_AB_GPIO14_OEN_WIDTH 1
+#define	FRF_AB_GPIO13_OEN_LBN 61
+#define	FRF_AB_GPIO13_OEN_WIDTH 1
+#define	FRF_AB_GPIO12_OEN_LBN 60
+#define	FRF_AB_GPIO12_OEN_WIDTH 1
+#define	FRF_AB_GPIO11_OEN_LBN 59
+#define	FRF_AB_GPIO11_OEN_WIDTH 1
+#define	FRF_AB_GPIO10_OEN_LBN 58
+#define	FRF_AB_GPIO10_OEN_WIDTH 1
+#define	FRF_AB_GPIO9_OEN_LBN 57
+#define	FRF_AB_GPIO9_OEN_WIDTH 1
+#define	FRF_AB_GPIO8_OEN_LBN 56
+#define	FRF_AB_GPIO8_OEN_WIDTH 1
+#define	FRF_AB_GPIO15_OUT_LBN 55
+#define	FRF_AB_GPIO15_OUT_WIDTH 1
+#define	FRF_AB_GPIO14_OUT_LBN 54
+#define	FRF_AB_GPIO14_OUT_WIDTH 1
+#define	FRF_AB_GPIO13_OUT_LBN 53
+#define	FRF_AB_GPIO13_OUT_WIDTH 1
+#define	FRF_AB_GPIO12_OUT_LBN 52
+#define	FRF_AB_GPIO12_OUT_WIDTH 1
+#define	FRF_AB_GPIO11_OUT_LBN 51
+#define	FRF_AB_GPIO11_OUT_WIDTH 1
+#define	FRF_AB_GPIO10_OUT_LBN 50
+#define	FRF_AB_GPIO10_OUT_WIDTH 1
+#define	FRF_AB_GPIO9_OUT_LBN 49
+#define	FRF_AB_GPIO9_OUT_WIDTH 1
+#define	FRF_AB_GPIO8_OUT_LBN 48
+#define	FRF_AB_GPIO8_OUT_WIDTH 1
+#define	FRF_AB_GPIO15_IN_LBN 47
+#define	FRF_AB_GPIO15_IN_WIDTH 1
+#define	FRF_AB_GPIO14_IN_LBN 46
+#define	FRF_AB_GPIO14_IN_WIDTH 1
+#define	FRF_AB_GPIO13_IN_LBN 45
+#define	FRF_AB_GPIO13_IN_WIDTH 1
+#define	FRF_AB_GPIO12_IN_LBN 44
+#define	FRF_AB_GPIO12_IN_WIDTH 1
+#define	FRF_AB_GPIO11_IN_LBN 43
+#define	FRF_AB_GPIO11_IN_WIDTH 1
+#define	FRF_AB_GPIO10_IN_LBN 42
+#define	FRF_AB_GPIO10_IN_WIDTH 1
+#define	FRF_AB_GPIO9_IN_LBN 41
+#define	FRF_AB_GPIO9_IN_WIDTH 1
+#define	FRF_AB_GPIO8_IN_LBN 40
+#define	FRF_AB_GPIO8_IN_WIDTH 1
+#define	FRF_AB_GPIO15_PWRUP_VALUE_LBN 39
+#define	FRF_AB_GPIO15_PWRUP_VALUE_WIDTH 1
+#define	FRF_AB_GPIO14_PWRUP_VALUE_LBN 38
+#define	FRF_AB_GPIO14_PWRUP_VALUE_WIDTH 1
+#define	FRF_AB_GPIO13_PWRUP_VALUE_LBN 37
+#define	FRF_AB_GPIO13_PWRUP_VALUE_WIDTH 1
+#define	FRF_AB_GPIO12_PWRUP_VALUE_LBN 36
+#define	FRF_AB_GPIO12_PWRUP_VALUE_WIDTH 1
+#define	FRF_AB_GPIO11_PWRUP_VALUE_LBN 35
+#define	FRF_AB_GPIO11_PWRUP_VALUE_WIDTH 1
+#define	FRF_AB_GPIO10_PWRUP_VALUE_LBN 34
+#define	FRF_AB_GPIO10_PWRUP_VALUE_WIDTH 1
+#define	FRF_AB_GPIO9_PWRUP_VALUE_LBN 33
+#define	FRF_AB_GPIO9_PWRUP_VALUE_WIDTH 1
+#define	FRF_AB_GPIO8_PWRUP_VALUE_LBN 32
+#define	FRF_AB_GPIO8_PWRUP_VALUE_WIDTH 1
+#define	FRF_AB_CLK156_OUT_EN_LBN 31
+#define	FRF_AB_CLK156_OUT_EN_WIDTH 1
+#define	FRF_AB_USE_NIC_CLK_LBN 30
+#define	FRF_AB_USE_NIC_CLK_WIDTH 1
+#define	FRF_AB_GPIO5_OEN_LBN 29
+#define	FRF_AB_GPIO5_OEN_WIDTH 1
+#define	FRF_AB_GPIO4_OEN_LBN 28
+#define	FRF_AB_GPIO4_OEN_WIDTH 1
+#define	FRF_AB_GPIO3_OEN_LBN 27
+#define	FRF_AB_GPIO3_OEN_WIDTH 1
+#define	FRF_AB_GPIO2_OEN_LBN 26
+#define	FRF_AB_GPIO2_OEN_WIDTH 1
+#define	FRF_AB_GPIO1_OEN_LBN 25
+#define	FRF_AB_GPIO1_OEN_WIDTH 1
+#define	FRF_AB_GPIO0_OEN_LBN 24
+#define	FRF_AB_GPIO0_OEN_WIDTH 1
+#define	FRF_AB_GPIO7_OUT_LBN 23
+#define	FRF_AB_GPIO7_OUT_WIDTH 1
+#define	FRF_AB_GPIO6_OUT_LBN 22
+#define	FRF_AB_GPIO6_OUT_WIDTH 1
+#define	FRF_AB_GPIO5_OUT_LBN 21
+#define	FRF_AB_GPIO5_OUT_WIDTH 1
+#define	FRF_AB_GPIO4_OUT_LBN 20
+#define	FRF_AB_GPIO4_OUT_WIDTH 1
+#define	FRF_AB_GPIO3_OUT_LBN 19
+#define	FRF_AB_GPIO3_OUT_WIDTH 1
+#define	FRF_AB_GPIO2_OUT_LBN 18
+#define	FRF_AB_GPIO2_OUT_WIDTH 1
+#define	FRF_AB_GPIO1_OUT_LBN 17
+#define	FRF_AB_GPIO1_OUT_WIDTH 1
+#define	FRF_AB_GPIO0_OUT_LBN 16
+#define	FRF_AB_GPIO0_OUT_WIDTH 1
+#define	FRF_AB_GPIO7_IN_LBN 15
+#define	FRF_AB_GPIO7_IN_WIDTH 1
+#define	FRF_AB_GPIO6_IN_LBN 14
+#define	FRF_AB_GPIO6_IN_WIDTH 1
+#define	FRF_AB_GPIO5_IN_LBN 13
+#define	FRF_AB_GPIO5_IN_WIDTH 1
+#define	FRF_AB_GPIO4_IN_LBN 12
+#define	FRF_AB_GPIO4_IN_WIDTH 1
+#define	FRF_AB_GPIO3_IN_LBN 11
+#define	FRF_AB_GPIO3_IN_WIDTH 1
+#define	FRF_AB_GPIO2_IN_LBN 10
+#define	FRF_AB_GPIO2_IN_WIDTH 1
+#define	FRF_AB_GPIO1_IN_LBN 9
+#define	FRF_AB_GPIO1_IN_WIDTH 1
+#define	FRF_AB_GPIO0_IN_LBN 8
+#define	FRF_AB_GPIO0_IN_WIDTH 1
+#define	FRF_AB_GPIO7_PWRUP_VALUE_LBN 7
+#define	FRF_AB_GPIO7_PWRUP_VALUE_WIDTH 1
+#define	FRF_AB_GPIO6_PWRUP_VALUE_LBN 6
+#define	FRF_AB_GPIO6_PWRUP_VALUE_WIDTH 1
+#define	FRF_AB_GPIO5_PWRUP_VALUE_LBN 5
+#define	FRF_AB_GPIO5_PWRUP_VALUE_WIDTH 1
+#define	FRF_AB_GPIO4_PWRUP_VALUE_LBN 4
+#define	FRF_AB_GPIO4_PWRUP_VALUE_WIDTH 1
+#define	FRF_AB_GPIO3_PWRUP_VALUE_LBN 3
+#define	FRF_AB_GPIO3_PWRUP_VALUE_WIDTH 1
+#define	FRF_AB_GPIO2_PWRUP_VALUE_LBN 2
+#define	FRF_AB_GPIO2_PWRUP_VALUE_WIDTH 1
+#define	FRF_AB_GPIO1_PWRUP_VALUE_LBN 1
+#define	FRF_AB_GPIO1_PWRUP_VALUE_WIDTH 1
+#define	FRF_AB_GPIO0_PWRUP_VALUE_LBN 0
+#define	FRF_AB_GPIO0_PWRUP_VALUE_WIDTH 1
+
+/* GLB_CTL_REG: Global control register */
+#define	FR_AB_GLB_CTL 0x00000220
+#define	FRF_AB_EXT_PHY_RST_CTL_LBN 63
+#define	FRF_AB_EXT_PHY_RST_CTL_WIDTH 1
+#define	FRF_AB_XAUI_SD_RST_CTL_LBN 62
+#define	FRF_AB_XAUI_SD_RST_CTL_WIDTH 1
+#define	FRF_AB_PCIE_SD_RST_CTL_LBN 61
+#define	FRF_AB_PCIE_SD_RST_CTL_WIDTH 1
+#define	FRF_AA_PCIX_RST_CTL_LBN 60
+#define	FRF_AA_PCIX_RST_CTL_WIDTH 1
+#define	FRF_BB_BIU_RST_CTL_LBN 60
+#define	FRF_BB_BIU_RST_CTL_WIDTH 1
+#define	FRF_AB_PCIE_STKY_RST_CTL_LBN 59
+#define	FRF_AB_PCIE_STKY_RST_CTL_WIDTH 1
+#define	FRF_AB_PCIE_NSTKY_RST_CTL_LBN 58
+#define	FRF_AB_PCIE_NSTKY_RST_CTL_WIDTH 1
+#define	FRF_AB_PCIE_CORE_RST_CTL_LBN 57
+#define	FRF_AB_PCIE_CORE_RST_CTL_WIDTH 1
+#define	FRF_AB_XGRX_RST_CTL_LBN 56
+#define	FRF_AB_XGRX_RST_CTL_WIDTH 1
+#define	FRF_AB_XGTX_RST_CTL_LBN 55
+#define	FRF_AB_XGTX_RST_CTL_WIDTH 1
+#define	FRF_AB_EM_RST_CTL_LBN 54
+#define	FRF_AB_EM_RST_CTL_WIDTH 1
+#define	FRF_AB_EV_RST_CTL_LBN 53
+#define	FRF_AB_EV_RST_CTL_WIDTH 1
+#define	FRF_AB_SR_RST_CTL_LBN 52
+#define	FRF_AB_SR_RST_CTL_WIDTH 1
+#define	FRF_AB_RX_RST_CTL_LBN 51
+#define	FRF_AB_RX_RST_CTL_WIDTH 1
+#define	FRF_AB_TX_RST_CTL_LBN 50
+#define	FRF_AB_TX_RST_CTL_WIDTH 1
+#define	FRF_AB_EE_RST_CTL_LBN 49
+#define	FRF_AB_EE_RST_CTL_WIDTH 1
+#define	FRF_AB_CS_RST_CTL_LBN 48
+#define	FRF_AB_CS_RST_CTL_WIDTH 1
+#define	FRF_AB_HOT_RST_CTL_LBN 40
+#define	FRF_AB_HOT_RST_CTL_WIDTH 2
+#define	FRF_AB_RST_EXT_PHY_LBN 31
+#define	FRF_AB_RST_EXT_PHY_WIDTH 1
+#define	FRF_AB_RST_XAUI_SD_LBN 30
+#define	FRF_AB_RST_XAUI_SD_WIDTH 1
+#define	FRF_AB_RST_PCIE_SD_LBN 29
+#define	FRF_AB_RST_PCIE_SD_WIDTH 1
+#define	FRF_AA_RST_PCIX_LBN 28
+#define	FRF_AA_RST_PCIX_WIDTH 1
+#define	FRF_BB_RST_BIU_LBN 28
+#define	FRF_BB_RST_BIU_WIDTH 1
+#define	FRF_AB_RST_PCIE_STKY_LBN 27
+#define	FRF_AB_RST_PCIE_STKY_WIDTH 1
+#define	FRF_AB_RST_PCIE_NSTKY_LBN 26
+#define	FRF_AB_RST_PCIE_NSTKY_WIDTH 1
+#define	FRF_AB_RST_PCIE_CORE_LBN 25
+#define	FRF_AB_RST_PCIE_CORE_WIDTH 1
+#define	FRF_AB_RST_XGRX_LBN 24
+#define	FRF_AB_RST_XGRX_WIDTH 1
+#define	FRF_AB_RST_XGTX_LBN 23
+#define	FRF_AB_RST_XGTX_WIDTH 1
+#define	FRF_AB_RST_EM_LBN 22
+#define	FRF_AB_RST_EM_WIDTH 1
+#define	FRF_AB_RST_EV_LBN 21
+#define	FRF_AB_RST_EV_WIDTH 1
+#define	FRF_AB_RST_SR_LBN 20
+#define	FRF_AB_RST_SR_WIDTH 1
+#define	FRF_AB_RST_RX_LBN 19
+#define	FRF_AB_RST_RX_WIDTH 1
+#define	FRF_AB_RST_TX_LBN 18
+#define	FRF_AB_RST_TX_WIDTH 1
+#define	FRF_AB_RST_SF_LBN 17
+#define	FRF_AB_RST_SF_WIDTH 1
+#define	FRF_AB_RST_CS_LBN 16
+#define	FRF_AB_RST_CS_WIDTH 1
+#define	FRF_AB_INT_RST_DUR_LBN 4
+#define	FRF_AB_INT_RST_DUR_WIDTH 3
+#define	FRF_AB_EXT_PHY_RST_DUR_LBN 1
+#define	FRF_AB_EXT_PHY_RST_DUR_WIDTH 3
+#define	FFE_AB_EXT_PHY_RST_DUR_10240US 7
+#define	FFE_AB_EXT_PHY_RST_DUR_5120US 6
+#define	FFE_AB_EXT_PHY_RST_DUR_2560US 5
+#define	FFE_AB_EXT_PHY_RST_DUR_1280US 4
+#define	FFE_AB_EXT_PHY_RST_DUR_640US 3
+#define	FFE_AB_EXT_PHY_RST_DUR_320US 2
+#define	FFE_AB_EXT_PHY_RST_DUR_160US 1
+#define	FFE_AB_EXT_PHY_RST_DUR_80US 0
+#define	FRF_AB_SWRST_LBN 0
+#define	FRF_AB_SWRST_WIDTH 1
+
+/* FATAL_INTR_REG_KER: Fatal interrupt register for Kernel */
+#define	FR_AZ_FATAL_INTR_KER 0x00000230
+#define	FRF_CZ_SRAM_PERR_INT_P_KER_EN_LBN 44
+#define	FRF_CZ_SRAM_PERR_INT_P_KER_EN_WIDTH 1
+#define	FRF_AB_PCI_BUSERR_INT_KER_EN_LBN 43
+#define	FRF_AB_PCI_BUSERR_INT_KER_EN_WIDTH 1
+#define	FRF_CZ_MBU_PERR_INT_KER_EN_LBN 43
+#define	FRF_CZ_MBU_PERR_INT_KER_EN_WIDTH 1
+#define	FRF_AZ_SRAM_OOB_INT_KER_EN_LBN 42
+#define	FRF_AZ_SRAM_OOB_INT_KER_EN_WIDTH 1
+#define	FRF_AZ_BUFID_OOB_INT_KER_EN_LBN 41
+#define	FRF_AZ_BUFID_OOB_INT_KER_EN_WIDTH 1
+#define	FRF_AZ_MEM_PERR_INT_KER_EN_LBN 40
+#define	FRF_AZ_MEM_PERR_INT_KER_EN_WIDTH 1
+#define	FRF_AZ_RBUF_OWN_INT_KER_EN_LBN 39
+#define	FRF_AZ_RBUF_OWN_INT_KER_EN_WIDTH 1
+#define	FRF_AZ_TBUF_OWN_INT_KER_EN_LBN 38
+#define	FRF_AZ_TBUF_OWN_INT_KER_EN_WIDTH 1
+#define	FRF_AZ_RDESCQ_OWN_INT_KER_EN_LBN 37
+#define	FRF_AZ_RDESCQ_OWN_INT_KER_EN_WIDTH 1
+#define	FRF_AZ_TDESCQ_OWN_INT_KER_EN_LBN 36
+#define	FRF_AZ_TDESCQ_OWN_INT_KER_EN_WIDTH 1
+#define	FRF_AZ_EVQ_OWN_INT_KER_EN_LBN 35
+#define	FRF_AZ_EVQ_OWN_INT_KER_EN_WIDTH 1
+#define	FRF_AZ_EVF_OFLO_INT_KER_EN_LBN 34
+#define	FRF_AZ_EVF_OFLO_INT_KER_EN_WIDTH 1
+#define	FRF_AZ_ILL_ADR_INT_KER_EN_LBN 33
+#define	FRF_AZ_ILL_ADR_INT_KER_EN_WIDTH 1
+#define	FRF_AZ_SRM_PERR_INT_KER_EN_LBN 32
+#define	FRF_AZ_SRM_PERR_INT_KER_EN_WIDTH 1
+#define	FRF_CZ_SRAM_PERR_INT_P_KER_LBN 12
+#define	FRF_CZ_SRAM_PERR_INT_P_KER_WIDTH 1
+#define	FRF_AB_PCI_BUSERR_INT_KER_LBN 11
+#define	FRF_AB_PCI_BUSERR_INT_KER_WIDTH 1
+#define	FRF_CZ_MBU_PERR_INT_KER_LBN 11
+#define	FRF_CZ_MBU_PERR_INT_KER_WIDTH 1
+#define	FRF_AZ_SRAM_OOB_INT_KER_LBN 10
+#define	FRF_AZ_SRAM_OOB_INT_KER_WIDTH 1
+#define	FRF_AZ_BUFID_DC_OOB_INT_KER_LBN 9
+#define	FRF_AZ_BUFID_DC_OOB_INT_KER_WIDTH 1
+#define	FRF_AZ_MEM_PERR_INT_KER_LBN 8
+#define	FRF_AZ_MEM_PERR_INT_KER_WIDTH 1
+#define	FRF_AZ_RBUF_OWN_INT_KER_LBN 7
+#define	FRF_AZ_RBUF_OWN_INT_KER_WIDTH 1
+#define	FRF_AZ_TBUF_OWN_INT_KER_LBN 6
+#define	FRF_AZ_TBUF_OWN_INT_KER_WIDTH 1
+#define	FRF_AZ_RDESCQ_OWN_INT_KER_LBN 5
+#define	FRF_AZ_RDESCQ_OWN_INT_KER_WIDTH 1
+#define	FRF_AZ_TDESCQ_OWN_INT_KER_LBN 4
+#define	FRF_AZ_TDESCQ_OWN_INT_KER_WIDTH 1
+#define	FRF_AZ_EVQ_OWN_INT_KER_LBN 3
+#define	FRF_AZ_EVQ_OWN_INT_KER_WIDTH 1
+#define	FRF_AZ_EVF_OFLO_INT_KER_LBN 2
+#define	FRF_AZ_EVF_OFLO_INT_KER_WIDTH 1
+#define	FRF_AZ_ILL_ADR_INT_KER_LBN 1
+#define	FRF_AZ_ILL_ADR_INT_KER_WIDTH 1
+#define	FRF_AZ_SRM_PERR_INT_KER_LBN 0
+#define	FRF_AZ_SRM_PERR_INT_KER_WIDTH 1
+
+/* FATAL_INTR_REG_CHAR: Fatal interrupt register for Char */
+#define	FR_BZ_FATAL_INTR_CHAR 0x00000240
+#define	FRF_CZ_SRAM_PERR_INT_P_CHAR_EN_LBN 44
+#define	FRF_CZ_SRAM_PERR_INT_P_CHAR_EN_WIDTH 1
+#define	FRF_BB_PCI_BUSERR_INT_CHAR_EN_LBN 43
+#define	FRF_BB_PCI_BUSERR_INT_CHAR_EN_WIDTH 1
+#define	FRF_CZ_MBU_PERR_INT_CHAR_EN_LBN 43
+#define	FRF_CZ_MBU_PERR_INT_CHAR_EN_WIDTH 1
+#define	FRF_BZ_SRAM_OOB_INT_CHAR_EN_LBN 42
+#define	FRF_BZ_SRAM_OOB_INT_CHAR_EN_WIDTH 1
+#define	FRF_BZ_BUFID_OOB_INT_CHAR_EN_LBN 41
+#define	FRF_BZ_BUFID_OOB_INT_CHAR_EN_WIDTH 1
+#define	FRF_BZ_MEM_PERR_INT_CHAR_EN_LBN 40
+#define	FRF_BZ_MEM_PERR_INT_CHAR_EN_WIDTH 1
+#define	FRF_BZ_RBUF_OWN_INT_CHAR_EN_LBN 39
+#define	FRF_BZ_RBUF_OWN_INT_CHAR_EN_WIDTH 1
+#define	FRF_BZ_TBUF_OWN_INT_CHAR_EN_LBN 38
+#define	FRF_BZ_TBUF_OWN_INT_CHAR_EN_WIDTH 1
+#define	FRF_BZ_RDESCQ_OWN_INT_CHAR_EN_LBN 37
+#define	FRF_BZ_RDESCQ_OWN_INT_CHAR_EN_WIDTH 1
+#define	FRF_BZ_TDESCQ_OWN_INT_CHAR_EN_LBN 36
+#define	FRF_BZ_TDESCQ_OWN_INT_CHAR_EN_WIDTH 1
+#define	FRF_BZ_EVQ_OWN_INT_CHAR_EN_LBN 35
+#define	FRF_BZ_EVQ_OWN_INT_CHAR_EN_WIDTH 1
+#define	FRF_BZ_EVF_OFLO_INT_CHAR_EN_LBN 34
+#define	FRF_BZ_EVF_OFLO_INT_CHAR_EN_WIDTH 1
+#define	FRF_BZ_ILL_ADR_INT_CHAR_EN_LBN 33
+#define	FRF_BZ_ILL_ADR_INT_CHAR_EN_WIDTH 1
+#define	FRF_BZ_SRM_PERR_INT_CHAR_EN_LBN 32
+#define	FRF_BZ_SRM_PERR_INT_CHAR_EN_WIDTH 1
+#define	FRF_CZ_SRAM_PERR_INT_P_CHAR_LBN 12
+#define	FRF_CZ_SRAM_PERR_INT_P_CHAR_WIDTH 1
+#define	FRF_BB_PCI_BUSERR_INT_CHAR_LBN 11
+#define	FRF_BB_PCI_BUSERR_INT_CHAR_WIDTH 1
+#define	FRF_CZ_MBU_PERR_INT_CHAR_LBN 11
+#define	FRF_CZ_MBU_PERR_INT_CHAR_WIDTH 1
+#define	FRF_BZ_SRAM_OOB_INT_CHAR_LBN 10
+#define	FRF_BZ_SRAM_OOB_INT_CHAR_WIDTH 1
+#define	FRF_BZ_BUFID_DC_OOB_INT_CHAR_LBN 9
+#define	FRF_BZ_BUFID_DC_OOB_INT_CHAR_WIDTH 1
+#define	FRF_BZ_MEM_PERR_INT_CHAR_LBN 8
+#define	FRF_BZ_MEM_PERR_INT_CHAR_WIDTH 1
+#define	FRF_BZ_RBUF_OWN_INT_CHAR_LBN 7
+#define	FRF_BZ_RBUF_OWN_INT_CHAR_WIDTH 1
+#define	FRF_BZ_TBUF_OWN_INT_CHAR_LBN 6
+#define	FRF_BZ_TBUF_OWN_INT_CHAR_WIDTH 1
+#define	FRF_BZ_RDESCQ_OWN_INT_CHAR_LBN 5
+#define	FRF_BZ_RDESCQ_OWN_INT_CHAR_WIDTH 1
+#define	FRF_BZ_TDESCQ_OWN_INT_CHAR_LBN 4
+#define	FRF_BZ_TDESCQ_OWN_INT_CHAR_WIDTH 1
+#define	FRF_BZ_EVQ_OWN_INT_CHAR_LBN 3
+#define	FRF_BZ_EVQ_OWN_INT_CHAR_WIDTH 1
+#define	FRF_BZ_EVF_OFLO_INT_CHAR_LBN 2
+#define	FRF_BZ_EVF_OFLO_INT_CHAR_WIDTH 1
+#define	FRF_BZ_ILL_ADR_INT_CHAR_LBN 1
+#define	FRF_BZ_ILL_ADR_INT_CHAR_WIDTH 1
+#define	FRF_BZ_SRM_PERR_INT_CHAR_LBN 0
+#define	FRF_BZ_SRM_PERR_INT_CHAR_WIDTH 1
+
+/* DP_CTRL_REG: Datapath control register */
+#define	FR_BZ_DP_CTRL 0x00000250
+#define	FRF_BZ_FLS_EVQ_ID_LBN 0
+#define	FRF_BZ_FLS_EVQ_ID_WIDTH 12
+
+/* MEM_STAT_REG: Memory status register */
+#define	FR_AZ_MEM_STAT 0x00000260
+#define	FRF_AB_MEM_PERR_VEC_LBN 53
+#define	FRF_AB_MEM_PERR_VEC_WIDTH 38
+#define	FRF_AB_MBIST_CORR_LBN 38
+#define	FRF_AB_MBIST_CORR_WIDTH 15
+#define	FRF_AB_MBIST_ERR_LBN 0
+#define	FRF_AB_MBIST_ERR_WIDTH 40
+#define	FRF_CZ_MEM_PERR_VEC_LBN 0
+#define	FRF_CZ_MEM_PERR_VEC_WIDTH 35
+
+/* CS_DEBUG_REG: Debug register */
+#define	FR_AZ_CS_DEBUG 0x00000270
+#define	FRF_AB_GLB_DEBUG2_SEL_LBN 50
+#define	FRF_AB_GLB_DEBUG2_SEL_WIDTH 3
+#define	FRF_AB_DEBUG_BLK_SEL2_LBN 47
+#define	FRF_AB_DEBUG_BLK_SEL2_WIDTH 3
+#define	FRF_AB_DEBUG_BLK_SEL1_LBN 44
+#define	FRF_AB_DEBUG_BLK_SEL1_WIDTH 3
+#define	FRF_AB_DEBUG_BLK_SEL0_LBN 41
+#define	FRF_AB_DEBUG_BLK_SEL0_WIDTH 3
+#define	FRF_CZ_CS_PORT_NUM_LBN 40
+#define	FRF_CZ_CS_PORT_NUM_WIDTH 2
+#define	FRF_AB_MISC_DEBUG_ADDR_LBN 36
+#define	FRF_AB_MISC_DEBUG_ADDR_WIDTH 5
+#define	FRF_AB_SERDES_DEBUG_ADDR_LBN 31
+#define	FRF_AB_SERDES_DEBUG_ADDR_WIDTH 5
+#define	FRF_CZ_CS_PORT_FPE_LBN 1
+#define	FRF_CZ_CS_PORT_FPE_WIDTH 35
+#define	FRF_AB_EM_DEBUG_ADDR_LBN 26
+#define	FRF_AB_EM_DEBUG_ADDR_WIDTH 5
+#define	FRF_AB_SR_DEBUG_ADDR_LBN 21
+#define	FRF_AB_SR_DEBUG_ADDR_WIDTH 5
+#define	FRF_AB_EV_DEBUG_ADDR_LBN 16
+#define	FRF_AB_EV_DEBUG_ADDR_WIDTH 5
+#define	FRF_AB_RX_DEBUG_ADDR_LBN 11
+#define	FRF_AB_RX_DEBUG_ADDR_WIDTH 5
+#define	FRF_AB_TX_DEBUG_ADDR_LBN 6
+#define	FRF_AB_TX_DEBUG_ADDR_WIDTH 5
+#define	FRF_AB_CS_BIU_DEBUG_ADDR_LBN 1
+#define	FRF_AB_CS_BIU_DEBUG_ADDR_WIDTH 5
+#define	FRF_AZ_CS_DEBUG_EN_LBN 0
+#define	FRF_AZ_CS_DEBUG_EN_WIDTH 1
+
+/* DRIVER_REG: Driver scratch register [0-7] */
+#define	FR_AZ_DRIVER 0x00000280
+#define	FR_AZ_DRIVER_STEP 16
+#define	FR_AZ_DRIVER_ROWS 8
+#define	FRF_AZ_DRIVER_DW0_LBN 0
+#define	FRF_AZ_DRIVER_DW0_WIDTH 32
+
+/* ALTERA_BUILD_REG: Altera build register */
+#define	FR_AZ_ALTERA_BUILD 0x00000300
+#define	FRF_AZ_ALTERA_BUILD_VER_LBN 0
+#define	FRF_AZ_ALTERA_BUILD_VER_WIDTH 32
+
+/* CSR_SPARE_REG: Spare register */
+#define	FR_AZ_CSR_SPARE 0x00000310
+#define	FRF_AB_MEM_PERR_EN_LBN 64
+#define	FRF_AB_MEM_PERR_EN_WIDTH 38
+#define	FRF_CZ_MEM_PERR_EN_LBN 64
+#define	FRF_CZ_MEM_PERR_EN_WIDTH 35
+#define	FRF_AB_MEM_PERR_EN_TX_DATA_LBN 72
+#define	FRF_AB_MEM_PERR_EN_TX_DATA_WIDTH 2
+#define	FRF_AZ_CSR_SPARE_BITS_LBN 0
+#define	FRF_AZ_CSR_SPARE_BITS_WIDTH 32
+
+/* PCIE_SD_CTL0123_REG: PCIE SerDes control register 0 to 3 */
+#define	FR_AB_PCIE_SD_CTL0123 0x00000320
+#define	FRF_AB_PCIE_TESTSIG_H_LBN 96
+#define	FRF_AB_PCIE_TESTSIG_H_WIDTH 19
+#define	FRF_AB_PCIE_TESTSIG_L_LBN 64
+#define	FRF_AB_PCIE_TESTSIG_L_WIDTH 19
+#define	FRF_AB_PCIE_OFFSET_LBN 56
+#define	FRF_AB_PCIE_OFFSET_WIDTH 8
+#define	FRF_AB_PCIE_OFFSETEN_H_LBN 55
+#define	FRF_AB_PCIE_OFFSETEN_H_WIDTH 1
+#define	FRF_AB_PCIE_OFFSETEN_L_LBN 54
+#define	FRF_AB_PCIE_OFFSETEN_L_WIDTH 1
+#define	FRF_AB_PCIE_HIVMODE_H_LBN 53
+#define	FRF_AB_PCIE_HIVMODE_H_WIDTH 1
+#define	FRF_AB_PCIE_HIVMODE_L_LBN 52
+#define	FRF_AB_PCIE_HIVMODE_L_WIDTH 1
+#define	FRF_AB_PCIE_PARRESET_H_LBN 51
+#define	FRF_AB_PCIE_PARRESET_H_WIDTH 1
+#define	FRF_AB_PCIE_PARRESET_L_LBN 50
+#define	FRF_AB_PCIE_PARRESET_L_WIDTH 1
+#define	FRF_AB_PCIE_LPBKWDRV_H_LBN 49
+#define	FRF_AB_PCIE_LPBKWDRV_H_WIDTH 1
+#define	FRF_AB_PCIE_LPBKWDRV_L_LBN 48
+#define	FRF_AB_PCIE_LPBKWDRV_L_WIDTH 1
+#define	FRF_AB_PCIE_LPBK_LBN 40
+#define	FRF_AB_PCIE_LPBK_WIDTH 8
+#define	FRF_AB_PCIE_PARLPBK_LBN 32
+#define	FRF_AB_PCIE_PARLPBK_WIDTH 8
+#define	FRF_AB_PCIE_RXTERMADJ_H_LBN 30
+#define	FRF_AB_PCIE_RXTERMADJ_H_WIDTH 2
+#define	FRF_AB_PCIE_RXTERMADJ_L_LBN 28
+#define	FRF_AB_PCIE_RXTERMADJ_L_WIDTH 2
+#define	FFE_AB_PCIE_RXTERMADJ_MIN15PCNT 3
+#define	FFE_AB_PCIE_RXTERMADJ_PL10PCNT 2
+#define	FFE_AB_PCIE_RXTERMADJ_MIN17PCNT 1
+#define	FFE_AB_PCIE_RXTERMADJ_NOMNL 0
+#define	FRF_AB_PCIE_TXTERMADJ_H_LBN 26
+#define	FRF_AB_PCIE_TXTERMADJ_H_WIDTH 2
+#define	FRF_AB_PCIE_TXTERMADJ_L_LBN 24
+#define	FRF_AB_PCIE_TXTERMADJ_L_WIDTH 2
+#define	FFE_AB_PCIE_TXTERMADJ_MIN15PCNT 3
+#define	FFE_AB_PCIE_TXTERMADJ_PL10PCNT 2
+#define	FFE_AB_PCIE_TXTERMADJ_MIN17PCNT 1
+#define	FFE_AB_PCIE_TXTERMADJ_NOMNL 0
+#define	FRF_AB_PCIE_RXEQCTL_H_LBN 18
+#define	FRF_AB_PCIE_RXEQCTL_H_WIDTH 2
+#define	FRF_AB_PCIE_RXEQCTL_L_LBN 16
+#define	FRF_AB_PCIE_RXEQCTL_L_WIDTH 2
+#define	FFE_AB_PCIE_RXEQCTL_OFF_ALT 3
+#define	FFE_AB_PCIE_RXEQCTL_OFF 2
+#define	FFE_AB_PCIE_RXEQCTL_MIN 1
+#define	FFE_AB_PCIE_RXEQCTL_MAX 0
+#define	FRF_AB_PCIE_HIDRV_LBN 8
+#define	FRF_AB_PCIE_HIDRV_WIDTH 8
+#define	FRF_AB_PCIE_LODRV_LBN 0
+#define	FRF_AB_PCIE_LODRV_WIDTH 8
+
+/* PCIE_SD_CTL45_REG: PCIE SerDes control register 4 and 5 */
+#define	FR_AB_PCIE_SD_CTL45 0x00000330
+#define	FRF_AB_PCIE_DTX7_LBN 60
+#define	FRF_AB_PCIE_DTX7_WIDTH 4
+#define	FRF_AB_PCIE_DTX6_LBN 56
+#define	FRF_AB_PCIE_DTX6_WIDTH 4
+#define	FRF_AB_PCIE_DTX5_LBN 52
+#define	FRF_AB_PCIE_DTX5_WIDTH 4
+#define	FRF_AB_PCIE_DTX4_LBN 48
+#define	FRF_AB_PCIE_DTX4_WIDTH 4
+#define	FRF_AB_PCIE_DTX3_LBN 44
+#define	FRF_AB_PCIE_DTX3_WIDTH 4
+#define	FRF_AB_PCIE_DTX2_LBN 40
+#define	FRF_AB_PCIE_DTX2_WIDTH 4
+#define	FRF_AB_PCIE_DTX1_LBN 36
+#define	FRF_AB_PCIE_DTX1_WIDTH 4
+#define	FRF_AB_PCIE_DTX0_LBN 32
+#define	FRF_AB_PCIE_DTX0_WIDTH 4
+#define	FRF_AB_PCIE_DEQ7_LBN 28
+#define	FRF_AB_PCIE_DEQ7_WIDTH 4
+#define	FRF_AB_PCIE_DEQ6_LBN 24
+#define	FRF_AB_PCIE_DEQ6_WIDTH 4
+#define	FRF_AB_PCIE_DEQ5_LBN 20
+#define	FRF_AB_PCIE_DEQ5_WIDTH 4
+#define	FRF_AB_PCIE_DEQ4_LBN 16
+#define	FRF_AB_PCIE_DEQ4_WIDTH 4
+#define	FRF_AB_PCIE_DEQ3_LBN 12
+#define	FRF_AB_PCIE_DEQ3_WIDTH 4
+#define	FRF_AB_PCIE_DEQ2_LBN 8
+#define	FRF_AB_PCIE_DEQ2_WIDTH 4
+#define	FRF_AB_PCIE_DEQ1_LBN 4
+#define	FRF_AB_PCIE_DEQ1_WIDTH 4
+#define	FRF_AB_PCIE_DEQ0_LBN 0
+#define	FRF_AB_PCIE_DEQ0_WIDTH 4
+
+/* PCIE_PCS_CTL_STAT_REG: PCIE PCS control and status register */
+#define	FR_AB_PCIE_PCS_CTL_STAT 0x00000340
+#define	FRF_AB_PCIE_PRBSERRCOUNT0_H_LBN 52
+#define	FRF_AB_PCIE_PRBSERRCOUNT0_H_WIDTH 4
+#define	FRF_AB_PCIE_PRBSERRCOUNT0_L_LBN 48
+#define	FRF_AB_PCIE_PRBSERRCOUNT0_L_WIDTH 4
+#define	FRF_AB_PCIE_PRBSERR_LBN 40
+#define	FRF_AB_PCIE_PRBSERR_WIDTH 8
+#define	FRF_AB_PCIE_PRBSERRH0_LBN 32
+#define	FRF_AB_PCIE_PRBSERRH0_WIDTH 8
+#define	FRF_AB_PCIE_FASTINIT_H_LBN 15
+#define	FRF_AB_PCIE_FASTINIT_H_WIDTH 1
+#define	FRF_AB_PCIE_FASTINIT_L_LBN 14
+#define	FRF_AB_PCIE_FASTINIT_L_WIDTH 1
+#define	FRF_AB_PCIE_CTCDISABLE_H_LBN 13
+#define	FRF_AB_PCIE_CTCDISABLE_H_WIDTH 1
+#define	FRF_AB_PCIE_CTCDISABLE_L_LBN 12
+#define	FRF_AB_PCIE_CTCDISABLE_L_WIDTH 1
+#define	FRF_AB_PCIE_PRBSSYNC_H_LBN 11
+#define	FRF_AB_PCIE_PRBSSYNC_H_WIDTH 1
+#define	FRF_AB_PCIE_PRBSSYNC_L_LBN 10
+#define	FRF_AB_PCIE_PRBSSYNC_L_WIDTH 1
+#define	FRF_AB_PCIE_PRBSERRACK_H_LBN 9
+#define	FRF_AB_PCIE_PRBSERRACK_H_WIDTH 1
+#define	FRF_AB_PCIE_PRBSERRACK_L_LBN 8
+#define	FRF_AB_PCIE_PRBSERRACK_L_WIDTH 1
+#define	FRF_AB_PCIE_PRBSSEL_LBN 0
+#define	FRF_AB_PCIE_PRBSSEL_WIDTH 8
+
+/* DEBUG_DATA_OUT_REG: Live Debug and Debug 2 out ports */
+#define	FR_BB_DEBUG_DATA_OUT 0x00000350
+#define	FRF_BB_DEBUG2_PORT_LBN 25
+#define	FRF_BB_DEBUG2_PORT_WIDTH 15
+#define	FRF_BB_DEBUG1_PORT_LBN 0
+#define	FRF_BB_DEBUG1_PORT_WIDTH 25
+
+/* EVQ_RPTR_REGP0: Event queue read pointer register */
+#define	FR_BZ_EVQ_RPTR_P0 0x00000400
+#define	FR_BZ_EVQ_RPTR_P0_STEP 8192
+#define	FR_BZ_EVQ_RPTR_P0_ROWS 1024
+/* EVQ_RPTR_REG_KER: Event queue read pointer register */
+#define	FR_AA_EVQ_RPTR_KER 0x00011b00
+#define	FR_AA_EVQ_RPTR_KER_STEP 4
+#define	FR_AA_EVQ_RPTR_KER_ROWS 4
+/* EVQ_RPTR_REG: Event queue read pointer register */
+#define	FR_BZ_EVQ_RPTR 0x00fa0000
+#define	FR_BZ_EVQ_RPTR_STEP 16
+#define	FR_BB_EVQ_RPTR_ROWS 4096
+#define	FR_CZ_EVQ_RPTR_ROWS 1024
+/* EVQ_RPTR_REGP123: Event queue read pointer register */
+#define	FR_BB_EVQ_RPTR_P123 0x01000400
+#define	FR_BB_EVQ_RPTR_P123_STEP 8192
+#define	FR_BB_EVQ_RPTR_P123_ROWS 3072
+#define	FRF_AZ_EVQ_RPTR_VLD_LBN 15
+#define	FRF_AZ_EVQ_RPTR_VLD_WIDTH 1
+#define	FRF_AZ_EVQ_RPTR_LBN 0
+#define	FRF_AZ_EVQ_RPTR_WIDTH 15
+
+/* TIMER_COMMAND_REGP0: Timer Command Registers */
+#define	FR_BZ_TIMER_COMMAND_P0 0x00000420
+#define	FR_BZ_TIMER_COMMAND_P0_STEP 8192
+#define	FR_BZ_TIMER_COMMAND_P0_ROWS 1024
+/* TIMER_COMMAND_REG_KER: Timer Command Registers */
+#define	FR_AA_TIMER_COMMAND_KER 0x00000420
+#define	FR_AA_TIMER_COMMAND_KER_STEP 8192
+#define	FR_AA_TIMER_COMMAND_KER_ROWS 4
+/* TIMER_COMMAND_REGP123: Timer Command Registers */
+#define	FR_BB_TIMER_COMMAND_P123 0x01000420
+#define	FR_BB_TIMER_COMMAND_P123_STEP 8192
+#define	FR_BB_TIMER_COMMAND_P123_ROWS 3072
+#define	FRF_CZ_TC_TIMER_MODE_LBN 14
+#define	FRF_CZ_TC_TIMER_MODE_WIDTH 2
+#define	FRF_AB_TC_TIMER_MODE_LBN 12
+#define	FRF_AB_TC_TIMER_MODE_WIDTH 2
+#define	FRF_CZ_TC_TIMER_VAL_LBN 0
+#define	FRF_CZ_TC_TIMER_VAL_WIDTH 14
+#define	FRF_AB_TC_TIMER_VAL_LBN 0
+#define	FRF_AB_TC_TIMER_VAL_WIDTH 12
+
+/* DRV_EV_REG: Driver generated event register */
+#define	FR_AZ_DRV_EV 0x00000440
+#define	FRF_AZ_DRV_EV_QID_LBN 64
+#define	FRF_AZ_DRV_EV_QID_WIDTH 12
+#define	FRF_AZ_DRV_EV_DATA_LBN 0
+#define	FRF_AZ_DRV_EV_DATA_WIDTH 64
+
+/* EVQ_CTL_REG: Event queue control register */
+#define	FR_AZ_EVQ_CTL 0x00000450
+#define	FRF_CZ_RX_EVQ_WAKEUP_MASK_LBN 15
+#define	FRF_CZ_RX_EVQ_WAKEUP_MASK_WIDTH 10
+#define	FRF_BB_RX_EVQ_WAKEUP_MASK_LBN 15
+#define	FRF_BB_RX_EVQ_WAKEUP_MASK_WIDTH 6
+#define	FRF_AZ_EVQ_OWNERR_CTL_LBN 14
+#define	FRF_AZ_EVQ_OWNERR_CTL_WIDTH 1
+#define	FRF_AZ_EVQ_FIFO_AF_TH_LBN 7
+#define	FRF_AZ_EVQ_FIFO_AF_TH_WIDTH 7
+#define	FRF_AZ_EVQ_FIFO_NOTAF_TH_LBN 0
+#define	FRF_AZ_EVQ_FIFO_NOTAF_TH_WIDTH 7
+
+/* EVQ_CNT1_REG: Event counter 1 register */
+#define	FR_AZ_EVQ_CNT1 0x00000460
+#define	FRF_AZ_EVQ_CNT_PRE_FIFO_LBN 120
+#define	FRF_AZ_EVQ_CNT_PRE_FIFO_WIDTH 7
+#define	FRF_AZ_EVQ_CNT_TOBIU_LBN 100
+#define	FRF_AZ_EVQ_CNT_TOBIU_WIDTH 20
+#define	FRF_AZ_EVQ_TX_REQ_CNT_LBN 80
+#define	FRF_AZ_EVQ_TX_REQ_CNT_WIDTH 20
+#define	FRF_AZ_EVQ_RX_REQ_CNT_LBN 60
+#define	FRF_AZ_EVQ_RX_REQ_CNT_WIDTH 20
+#define	FRF_AZ_EVQ_EM_REQ_CNT_LBN 40
+#define	FRF_AZ_EVQ_EM_REQ_CNT_WIDTH 20
+#define	FRF_AZ_EVQ_CSR_REQ_CNT_LBN 20
+#define	FRF_AZ_EVQ_CSR_REQ_CNT_WIDTH 20
+#define	FRF_AZ_EVQ_ERR_REQ_CNT_LBN 0
+#define	FRF_AZ_EVQ_ERR_REQ_CNT_WIDTH 20
+
+/* EVQ_CNT2_REG: Event counter 2 register */
+#define	FR_AZ_EVQ_CNT2 0x00000470
+#define	FRF_AZ_EVQ_UPD_REQ_CNT_LBN 104
+#define	FRF_AZ_EVQ_UPD_REQ_CNT_WIDTH 20
+#define	FRF_AZ_EVQ_CLR_REQ_CNT_LBN 84
+#define	FRF_AZ_EVQ_CLR_REQ_CNT_WIDTH 20
+#define	FRF_AZ_EVQ_RDY_CNT_LBN 80
+#define	FRF_AZ_EVQ_RDY_CNT_WIDTH 4
+#define	FRF_AZ_EVQ_WU_REQ_CNT_LBN 60
+#define	FRF_AZ_EVQ_WU_REQ_CNT_WIDTH 20
+#define	FRF_AZ_EVQ_WET_REQ_CNT_LBN 40
+#define	FRF_AZ_EVQ_WET_REQ_CNT_WIDTH 20
+#define	FRF_AZ_EVQ_INIT_REQ_CNT_LBN 20
+#define	FRF_AZ_EVQ_INIT_REQ_CNT_WIDTH 20
+#define	FRF_AZ_EVQ_TM_REQ_CNT_LBN 0
+#define	FRF_AZ_EVQ_TM_REQ_CNT_WIDTH 20
+
+/* USR_EV_REG: Event mailbox register */
+#define	FR_CZ_USR_EV 0x00000540
+#define	FR_CZ_USR_EV_STEP 8192
+#define	FR_CZ_USR_EV_ROWS 1024
+#define	FRF_CZ_USR_EV_DATA_LBN 0
+#define	FRF_CZ_USR_EV_DATA_WIDTH 32
+
+/* BUF_TBL_CFG_REG: Buffer table configuration register */
+#define	FR_AZ_BUF_TBL_CFG 0x00000600
+#define	FRF_AZ_BUF_TBL_MODE_LBN 3
+#define	FRF_AZ_BUF_TBL_MODE_WIDTH 1
+
+/* SRM_RX_DC_CFG_REG: SRAM receive descriptor cache configuration register */
+#define	FR_AZ_SRM_RX_DC_CFG 0x00000610
+#define	FRF_AZ_SRM_CLK_TMP_EN_LBN 21
+#define	FRF_AZ_SRM_CLK_TMP_EN_WIDTH 1
+#define	FRF_AZ_SRM_RX_DC_BASE_ADR_LBN 0
+#define	FRF_AZ_SRM_RX_DC_BASE_ADR_WIDTH 21
+
+/* SRM_TX_DC_CFG_REG: SRAM transmit descriptor cache configuration register */
+#define	FR_AZ_SRM_TX_DC_CFG 0x00000620
+#define	FRF_AZ_SRM_TX_DC_BASE_ADR_LBN 0
+#define	FRF_AZ_SRM_TX_DC_BASE_ADR_WIDTH 21
+
+/* SRM_CFG_REG: SRAM configuration register */
+#define	FR_AZ_SRM_CFG 0x00000630
+#define	FRF_AZ_SRM_OOB_ADR_INTEN_LBN 5
+#define	FRF_AZ_SRM_OOB_ADR_INTEN_WIDTH 1
+#define	FRF_AZ_SRM_OOB_BUF_INTEN_LBN 4
+#define	FRF_AZ_SRM_OOB_BUF_INTEN_WIDTH 1
+#define	FRF_AZ_SRM_INIT_EN_LBN 3
+#define	FRF_AZ_SRM_INIT_EN_WIDTH 1
+#define	FRF_AZ_SRM_NUM_BANK_LBN 2
+#define	FRF_AZ_SRM_NUM_BANK_WIDTH 1
+#define	FRF_AZ_SRM_BANK_SIZE_LBN 0
+#define	FRF_AZ_SRM_BANK_SIZE_WIDTH 2
+
+/* BUF_TBL_UPD_REG: Buffer table update register */
+#define	FR_AZ_BUF_TBL_UPD 0x00000650
+#define	FRF_AZ_BUF_UPD_CMD_LBN 63
+#define	FRF_AZ_BUF_UPD_CMD_WIDTH 1
+#define	FRF_AZ_BUF_CLR_CMD_LBN 62
+#define	FRF_AZ_BUF_CLR_CMD_WIDTH 1
+#define	FRF_AZ_BUF_CLR_END_ID_LBN 32
+#define	FRF_AZ_BUF_CLR_END_ID_WIDTH 20
+#define	FRF_AZ_BUF_CLR_START_ID_LBN 0
+#define	FRF_AZ_BUF_CLR_START_ID_WIDTH 20
+
+/* SRM_UPD_EVQ_REG: Buffer table update register */
+#define	FR_AZ_SRM_UPD_EVQ 0x00000660
+#define	FRF_AZ_SRM_UPD_EVQ_ID_LBN 0
+#define	FRF_AZ_SRM_UPD_EVQ_ID_WIDTH 12
+
+/* SRAM_PARITY_REG: SRAM parity register. */
+#define	FR_AZ_SRAM_PARITY 0x00000670
+#define	FRF_CZ_BYPASS_ECC_LBN 3
+#define	FRF_CZ_BYPASS_ECC_WIDTH 1
+#define	FRF_CZ_SEC_INT_LBN 2
+#define	FRF_CZ_SEC_INT_WIDTH 1
+#define	FRF_CZ_FORCE_SRAM_DOUBLE_ERR_LBN 1
+#define	FRF_CZ_FORCE_SRAM_DOUBLE_ERR_WIDTH 1
+#define	FRF_AB_FORCE_SRAM_PERR_LBN 0
+#define	FRF_AB_FORCE_SRAM_PERR_WIDTH 1
+#define	FRF_CZ_FORCE_SRAM_SINGLE_ERR_LBN 0
+#define	FRF_CZ_FORCE_SRAM_SINGLE_ERR_WIDTH 1
+
+/* RX_CFG_REG: Receive configuration register */
+#define	FR_AZ_RX_CFG 0x00000800
+#define	FRF_CZ_RX_MIN_KBUF_SIZE_LBN 72
+#define	FRF_CZ_RX_MIN_KBUF_SIZE_WIDTH 14
+#define	FRF_CZ_RX_HDR_SPLIT_EN_LBN 71
+#define	FRF_CZ_RX_HDR_SPLIT_EN_WIDTH 1
+#define	FRF_CZ_RX_HDR_SPLIT_PLD_BUF_SIZE_LBN 62
+#define	FRF_CZ_RX_HDR_SPLIT_PLD_BUF_SIZE_WIDTH 9
+#define	FRF_CZ_RX_HDR_SPLIT_HDR_BUF_SIZE_LBN 53
+#define	FRF_CZ_RX_HDR_SPLIT_HDR_BUF_SIZE_WIDTH 9
+#define	FRF_CZ_RX_PRE_RFF_IPG_LBN 49
+#define	FRF_CZ_RX_PRE_RFF_IPG_WIDTH 4
+#define	FRF_BZ_RX_TCP_SUP_LBN 48
+#define	FRF_BZ_RX_TCP_SUP_WIDTH 1
+#define	FRF_BZ_RX_INGR_EN_LBN 47
+#define	FRF_BZ_RX_INGR_EN_WIDTH 1
+#define	FRF_BZ_RX_IP_HASH_LBN 46
+#define	FRF_BZ_RX_IP_HASH_WIDTH 1
+#define	FRF_BZ_RX_HASH_ALG_LBN 45
+#define	FRF_BZ_RX_HASH_ALG_WIDTH 1
+#define	FRF_BZ_RX_HASH_INSRT_HDR_LBN 44
+#define	FRF_BZ_RX_HASH_INSRT_HDR_WIDTH 1
+#define	FRF_BZ_RX_DESC_PUSH_EN_LBN 43
+#define	FRF_BZ_RX_DESC_PUSH_EN_WIDTH 1
+#define	FRF_BZ_RX_RDW_PATCH_EN_LBN 42
+#define	FRF_BZ_RX_RDW_PATCH_EN_WIDTH 1
+#define	FRF_BB_RX_PCI_BURST_SIZE_LBN 39
+#define	FRF_BB_RX_PCI_BURST_SIZE_WIDTH 3
+#define	FRF_BZ_RX_OWNERR_CTL_LBN 38
+#define	FRF_BZ_RX_OWNERR_CTL_WIDTH 1
+#define	FRF_BZ_RX_XON_TX_TH_LBN 33
+#define	FRF_BZ_RX_XON_TX_TH_WIDTH 5
+#define	FRF_AA_RX_DESC_PUSH_EN_LBN 35
+#define	FRF_AA_RX_DESC_PUSH_EN_WIDTH 1
+#define	FRF_AA_RX_RDW_PATCH_EN_LBN 34
+#define	FRF_AA_RX_RDW_PATCH_EN_WIDTH 1
+#define	FRF_AA_RX_PCI_BURST_SIZE_LBN 31
+#define	FRF_AA_RX_PCI_BURST_SIZE_WIDTH 3
+#define	FRF_BZ_RX_XOFF_TX_TH_LBN 28
+#define	FRF_BZ_RX_XOFF_TX_TH_WIDTH 5
+#define	FRF_AA_RX_OWNERR_CTL_LBN 30
+#define	FRF_AA_RX_OWNERR_CTL_WIDTH 1
+#define	FRF_AA_RX_XON_TX_TH_LBN 25
+#define	FRF_AA_RX_XON_TX_TH_WIDTH 5
+#define	FRF_BZ_RX_USR_BUF_SIZE_LBN 19
+#define	FRF_BZ_RX_USR_BUF_SIZE_WIDTH 9
+#define	FRF_AA_RX_XOFF_TX_TH_LBN 20
+#define	FRF_AA_RX_XOFF_TX_TH_WIDTH 5
+#define	FRF_AA_RX_USR_BUF_SIZE_LBN 11
+#define	FRF_AA_RX_USR_BUF_SIZE_WIDTH 9
+#define	FRF_BZ_RX_XON_MAC_TH_LBN 10
+#define	FRF_BZ_RX_XON_MAC_TH_WIDTH 9
+#define	FRF_AA_RX_XON_MAC_TH_LBN 6
+#define	FRF_AA_RX_XON_MAC_TH_WIDTH 5
+#define	FRF_BZ_RX_XOFF_MAC_TH_LBN 1
+#define	FRF_BZ_RX_XOFF_MAC_TH_WIDTH 9
+#define	FRF_AA_RX_XOFF_MAC_TH_LBN 1
+#define	FRF_AA_RX_XOFF_MAC_TH_WIDTH 5
+#define	FRF_AZ_RX_XOFF_MAC_EN_LBN 0
+#define	FRF_AZ_RX_XOFF_MAC_EN_WIDTH 1
+
+/* RX_FILTER_CTL_REG: Receive filter control registers */
+#define	FR_BZ_RX_FILTER_CTL 0x00000810
+#define	FRF_CZ_ETHERNET_WILDCARD_SEARCH_LIMIT_LBN 94
+#define	FRF_CZ_ETHERNET_WILDCARD_SEARCH_LIMIT_WIDTH 8
+#define	FRF_CZ_ETHERNET_FULL_SEARCH_LIMIT_LBN 86
+#define	FRF_CZ_ETHERNET_FULL_SEARCH_LIMIT_WIDTH 8
+#define	FRF_CZ_RX_FILTER_ALL_VLAN_ETHERTYPES_LBN 85
+#define	FRF_CZ_RX_FILTER_ALL_VLAN_ETHERTYPES_WIDTH 1
+#define	FRF_CZ_RX_VLAN_MATCH_ETHERTYPE_LBN 69
+#define	FRF_CZ_RX_VLAN_MATCH_ETHERTYPE_WIDTH 16
+#define	FRF_CZ_MULTICAST_NOMATCH_Q_ID_LBN 57
+#define	FRF_CZ_MULTICAST_NOMATCH_Q_ID_WIDTH 12
+#define	FRF_CZ_MULTICAST_NOMATCH_RSS_ENABLED_LBN 56
+#define	FRF_CZ_MULTICAST_NOMATCH_RSS_ENABLED_WIDTH 1
+#define	FRF_CZ_MULTICAST_NOMATCH_IP_OVERRIDE_LBN 55
+#define	FRF_CZ_MULTICAST_NOMATCH_IP_OVERRIDE_WIDTH 1
+#define	FRF_CZ_UNICAST_NOMATCH_Q_ID_LBN 43
+#define	FRF_CZ_UNICAST_NOMATCH_Q_ID_WIDTH 12
+#define	FRF_CZ_UNICAST_NOMATCH_RSS_ENABLED_LBN 42
+#define	FRF_CZ_UNICAST_NOMATCH_RSS_ENABLED_WIDTH 1
+#define	FRF_CZ_UNICAST_NOMATCH_IP_OVERRIDE_LBN 41
+#define	FRF_CZ_UNICAST_NOMATCH_IP_OVERRIDE_WIDTH 1
+#define	FRF_BZ_SCATTER_ENBL_NO_MATCH_Q_LBN 40
+#define	FRF_BZ_SCATTER_ENBL_NO_MATCH_Q_WIDTH 1
+#define	FRF_BZ_UDP_FULL_SRCH_LIMIT_LBN 32
+#define	FRF_BZ_UDP_FULL_SRCH_LIMIT_WIDTH 8
+#define	FRF_BZ_NUM_KER_LBN 24
+#define	FRF_BZ_NUM_KER_WIDTH 2
+#define	FRF_BZ_UDP_WILD_SRCH_LIMIT_LBN 16
+#define	FRF_BZ_UDP_WILD_SRCH_LIMIT_WIDTH 8
+#define	FRF_BZ_TCP_WILD_SRCH_LIMIT_LBN 8
+#define	FRF_BZ_TCP_WILD_SRCH_LIMIT_WIDTH 8
+#define	FRF_BZ_TCP_FULL_SRCH_LIMIT_LBN 0
+#define	FRF_BZ_TCP_FULL_SRCH_LIMIT_WIDTH 8
+
+/* RX_FLUSH_DESCQ_REG: Receive flush descriptor queue register */
+#define	FR_AZ_RX_FLUSH_DESCQ 0x00000820
+#define	FRF_AZ_RX_FLUSH_DESCQ_CMD_LBN 24
+#define	FRF_AZ_RX_FLUSH_DESCQ_CMD_WIDTH 1
+#define	FRF_AZ_RX_FLUSH_DESCQ_LBN 0
+#define	FRF_AZ_RX_FLUSH_DESCQ_WIDTH 12
+
+/* RX_DESC_UPD_REGP0: Receive descriptor update register. */
+#define	FR_BZ_RX_DESC_UPD_P0 0x00000830
+#define	FR_BZ_RX_DESC_UPD_P0_STEP 8192
+#define	FR_BZ_RX_DESC_UPD_P0_ROWS 1024
+/* RX_DESC_UPD_REG_KER: Receive descriptor update register. */
+#define	FR_AA_RX_DESC_UPD_KER 0x00000830
+#define	FR_AA_RX_DESC_UPD_KER_STEP 8192
+#define	FR_AA_RX_DESC_UPD_KER_ROWS 4
+/* RX_DESC_UPD_REGP123: Receive descriptor update register. */
+#define	FR_BB_RX_DESC_UPD_P123 0x01000830
+#define	FR_BB_RX_DESC_UPD_P123_STEP 8192
+#define	FR_BB_RX_DESC_UPD_P123_ROWS 3072
+#define	FRF_AZ_RX_DESC_WPTR_LBN 96
+#define	FRF_AZ_RX_DESC_WPTR_WIDTH 12
+#define	FRF_AZ_RX_DESC_PUSH_CMD_LBN 95
+#define	FRF_AZ_RX_DESC_PUSH_CMD_WIDTH 1
+#define	FRF_AZ_RX_DESC_LBN 0
+#define	FRF_AZ_RX_DESC_WIDTH 64
+
+/* RX_DC_CFG_REG: Receive descriptor cache configuration register */
+#define	FR_AZ_RX_DC_CFG 0x00000840
+#define	FRF_AB_RX_MAX_PF_LBN 2
+#define	FRF_AB_RX_MAX_PF_WIDTH 2
+#define	FRF_AZ_RX_DC_SIZE_LBN 0
+#define	FRF_AZ_RX_DC_SIZE_WIDTH 2
+#define	FFE_AZ_RX_DC_SIZE_64 3
+#define	FFE_AZ_RX_DC_SIZE_32 2
+#define	FFE_AZ_RX_DC_SIZE_16 1
+#define	FFE_AZ_RX_DC_SIZE_8 0
+
+/* RX_DC_PF_WM_REG: Receive descriptor cache pre-fetch watermark register */
+#define	FR_AZ_RX_DC_PF_WM 0x00000850
+#define	FRF_AZ_RX_DC_PF_HWM_LBN 6
+#define	FRF_AZ_RX_DC_PF_HWM_WIDTH 6
+#define	FRF_AZ_RX_DC_PF_LWM_LBN 0
+#define	FRF_AZ_RX_DC_PF_LWM_WIDTH 6
+
+/* RX_RSS_TKEY_REG: RSS Toeplitz hash key */
+#define	FR_BZ_RX_RSS_TKEY 0x00000860
+#define	FRF_BZ_RX_RSS_TKEY_HI_LBN 64
+#define	FRF_BZ_RX_RSS_TKEY_HI_WIDTH 64
+#define	FRF_BZ_RX_RSS_TKEY_LO_LBN 0
+#define	FRF_BZ_RX_RSS_TKEY_LO_WIDTH 64
+
+/* RX_NODESC_DROP_REG: Receive dropped packet counter register */
+#define	FR_AZ_RX_NODESC_DROP 0x00000880
+#define	FRF_CZ_RX_NODESC_DROP_CNT_LBN 0
+#define	FRF_CZ_RX_NODESC_DROP_CNT_WIDTH 32
+#define	FRF_AB_RX_NODESC_DROP_CNT_LBN 0
+#define	FRF_AB_RX_NODESC_DROP_CNT_WIDTH 16
+
+/* RX_SELF_RST_REG: Receive self reset register */
+#define	FR_AA_RX_SELF_RST 0x00000890
+#define	FRF_AA_RX_ISCSI_DIS_LBN 17
+#define	FRF_AA_RX_ISCSI_DIS_WIDTH 1
+#define	FRF_AA_RX_SW_RST_REG_LBN 16
+#define	FRF_AA_RX_SW_RST_REG_WIDTH 1
+#define FRF_AA_RX_NODESC_WAIT_DIS_LBN 9
+#define FRF_AA_RX_NODESC_WAIT_DIS_WIDTH 1
+#define	FRF_AA_RX_SELF_RST_EN_LBN 8
+#define	FRF_AA_RX_SELF_RST_EN_WIDTH 1
+#define	FRF_AA_RX_MAX_PF_LAT_LBN 4
+#define	FRF_AA_RX_MAX_PF_LAT_WIDTH 4
+#define	FRF_AA_RX_MAX_LU_LAT_LBN 0
+#define	FRF_AA_RX_MAX_LU_LAT_WIDTH 4
+
+/* RX_DEBUG_REG: undocumented register */
+#define	FR_AZ_RX_DEBUG 0x000008a0
+#define	FRF_AZ_RX_DEBUG_LBN 0
+#define	FRF_AZ_RX_DEBUG_WIDTH 64
+
+/* RX_PUSH_DROP_REG: Receive descriptor push dropped counter register */
+#define	FR_AZ_RX_PUSH_DROP 0x000008b0
+#define	FRF_AZ_RX_PUSH_DROP_CNT_LBN 0
+#define	FRF_AZ_RX_PUSH_DROP_CNT_WIDTH 32
+
+/* RX_RSS_IPV6_REG1: IPv6 RSS Toeplitz hash key low bytes */
+#define	FR_CZ_RX_RSS_IPV6_REG1 0x000008d0
+#define	FRF_CZ_RX_RSS_IPV6_TKEY_LO_LBN 0
+#define	FRF_CZ_RX_RSS_IPV6_TKEY_LO_WIDTH 128
+
+/* RX_RSS_IPV6_REG2: IPv6 RSS Toeplitz hash key middle bytes */
+#define	FR_CZ_RX_RSS_IPV6_REG2 0x000008e0
+#define	FRF_CZ_RX_RSS_IPV6_TKEY_MID_LBN 0
+#define	FRF_CZ_RX_RSS_IPV6_TKEY_MID_WIDTH 128
+
+/* RX_RSS_IPV6_REG3: IPv6 RSS Toeplitz hash key upper bytes and IPv6 RSS settings */
+#define	FR_CZ_RX_RSS_IPV6_REG3 0x000008f0
+#define	FRF_CZ_RX_RSS_IPV6_THASH_ENABLE_LBN 66
+#define	FRF_CZ_RX_RSS_IPV6_THASH_ENABLE_WIDTH 1
+#define	FRF_CZ_RX_RSS_IPV6_IP_THASH_ENABLE_LBN 65
+#define	FRF_CZ_RX_RSS_IPV6_IP_THASH_ENABLE_WIDTH 1
+#define	FRF_CZ_RX_RSS_IPV6_TCP_SUPPRESS_LBN 64
+#define	FRF_CZ_RX_RSS_IPV6_TCP_SUPPRESS_WIDTH 1
+#define	FRF_CZ_RX_RSS_IPV6_TKEY_HI_LBN 0
+#define	FRF_CZ_RX_RSS_IPV6_TKEY_HI_WIDTH 64
+
+/* TX_FLUSH_DESCQ_REG: Transmit flush descriptor queue register */
+#define	FR_AZ_TX_FLUSH_DESCQ 0x00000a00
+#define	FRF_AZ_TX_FLUSH_DESCQ_CMD_LBN 12
+#define	FRF_AZ_TX_FLUSH_DESCQ_CMD_WIDTH 1
+#define	FRF_AZ_TX_FLUSH_DESCQ_LBN 0
+#define	FRF_AZ_TX_FLUSH_DESCQ_WIDTH 12
+
+/* TX_DESC_UPD_REGP0: Transmit descriptor update register. */
+#define	FR_BZ_TX_DESC_UPD_P0 0x00000a10
+#define	FR_BZ_TX_DESC_UPD_P0_STEP 8192
+#define	FR_BZ_TX_DESC_UPD_P0_ROWS 1024
+/* TX_DESC_UPD_REG_KER: Transmit descriptor update register. */
+#define	FR_AA_TX_DESC_UPD_KER 0x00000a10
+#define	FR_AA_TX_DESC_UPD_KER_STEP 8192
+#define	FR_AA_TX_DESC_UPD_KER_ROWS 8
+/* TX_DESC_UPD_REGP123: Transmit descriptor update register. */
+#define	FR_BB_TX_DESC_UPD_P123 0x01000a10
+#define	FR_BB_TX_DESC_UPD_P123_STEP 8192
+#define	FR_BB_TX_DESC_UPD_P123_ROWS 3072
+#define	FRF_AZ_TX_DESC_WPTR_LBN 96
+#define	FRF_AZ_TX_DESC_WPTR_WIDTH 12
+#define	FRF_AZ_TX_DESC_PUSH_CMD_LBN 95
+#define	FRF_AZ_TX_DESC_PUSH_CMD_WIDTH 1
+#define	FRF_AZ_TX_DESC_LBN 0
+#define	FRF_AZ_TX_DESC_WIDTH 95
+
+/* TX_DC_CFG_REG: Transmit descriptor cache configuration register */
+#define	FR_AZ_TX_DC_CFG 0x00000a20
+#define	FRF_AZ_TX_DC_SIZE_LBN 0
+#define	FRF_AZ_TX_DC_SIZE_WIDTH 2
+#define	FFE_AZ_TX_DC_SIZE_32 2
+#define	FFE_AZ_TX_DC_SIZE_16 1
+#define	FFE_AZ_TX_DC_SIZE_8 0
+
+/* TX_CHKSM_CFG_REG: Transmit checksum configuration register */
+#define	FR_AA_TX_CHKSM_CFG 0x00000a30
+#define	FRF_AA_TX_Q_CHKSM_DIS_96_127_LBN 96
+#define	FRF_AA_TX_Q_CHKSM_DIS_96_127_WIDTH 32
+#define	FRF_AA_TX_Q_CHKSM_DIS_64_95_LBN 64
+#define	FRF_AA_TX_Q_CHKSM_DIS_64_95_WIDTH 32
+#define	FRF_AA_TX_Q_CHKSM_DIS_32_63_LBN 32
+#define	FRF_AA_TX_Q_CHKSM_DIS_32_63_WIDTH 32
+#define	FRF_AA_TX_Q_CHKSM_DIS_0_31_LBN 0
+#define	FRF_AA_TX_Q_CHKSM_DIS_0_31_WIDTH 32
+
+/* TX_CFG_REG: Transmit configuration register */
+#define	FR_AZ_TX_CFG 0x00000a50
+#define	FRF_CZ_TX_CONT_LOOKUP_THRESH_RANGE_LBN 114
+#define	FRF_CZ_TX_CONT_LOOKUP_THRESH_RANGE_WIDTH 8
+#define	FRF_CZ_TX_FILTER_TEST_MODE_BIT_LBN 113
+#define	FRF_CZ_TX_FILTER_TEST_MODE_BIT_WIDTH 1
+#define	FRF_CZ_TX_ETH_FILTER_WILD_SEARCH_RANGE_LBN 105
+#define	FRF_CZ_TX_ETH_FILTER_WILD_SEARCH_RANGE_WIDTH 8
+#define	FRF_CZ_TX_ETH_FILTER_FULL_SEARCH_RANGE_LBN 97
+#define	FRF_CZ_TX_ETH_FILTER_FULL_SEARCH_RANGE_WIDTH 8
+#define	FRF_CZ_TX_UDPIP_FILTER_WILD_SEARCH_RANGE_LBN 89
+#define	FRF_CZ_TX_UDPIP_FILTER_WILD_SEARCH_RANGE_WIDTH 8
+#define	FRF_CZ_TX_UDPIP_FILTER_FULL_SEARCH_RANGE_LBN 81
+#define	FRF_CZ_TX_UDPIP_FILTER_FULL_SEARCH_RANGE_WIDTH 8
+#define	FRF_CZ_TX_TCPIP_FILTER_WILD_SEARCH_RANGE_LBN 73
+#define	FRF_CZ_TX_TCPIP_FILTER_WILD_SEARCH_RANGE_WIDTH 8
+#define	FRF_CZ_TX_TCPIP_FILTER_FULL_SEARCH_RANGE_LBN 65
+#define	FRF_CZ_TX_TCPIP_FILTER_FULL_SEARCH_RANGE_WIDTH 8
+#define	FRF_CZ_TX_FILTER_ALL_VLAN_ETHERTYPES_BIT_LBN 64
+#define	FRF_CZ_TX_FILTER_ALL_VLAN_ETHERTYPES_BIT_WIDTH 1
+#define	FRF_CZ_TX_VLAN_MATCH_ETHERTYPE_RANGE_LBN 48
+#define	FRF_CZ_TX_VLAN_MATCH_ETHERTYPE_RANGE_WIDTH 16
+#define	FRF_CZ_TX_FILTER_EN_BIT_LBN 47
+#define	FRF_CZ_TX_FILTER_EN_BIT_WIDTH 1
+#define	FRF_AZ_TX_IP_ID_P0_OFS_LBN 16
+#define	FRF_AZ_TX_IP_ID_P0_OFS_WIDTH 15
+#define	FRF_AZ_TX_NO_EOP_DISC_EN_LBN 5
+#define	FRF_AZ_TX_NO_EOP_DISC_EN_WIDTH 1
+#define	FRF_AZ_TX_P1_PRI_EN_LBN 4
+#define	FRF_AZ_TX_P1_PRI_EN_WIDTH 1
+#define	FRF_AZ_TX_OWNERR_CTL_LBN 2
+#define	FRF_AZ_TX_OWNERR_CTL_WIDTH 1
+#define	FRF_AA_TX_NON_IP_DROP_DIS_LBN 1
+#define	FRF_AA_TX_NON_IP_DROP_DIS_WIDTH 1
+#define	FRF_AZ_TX_IP_ID_REP_EN_LBN 0
+#define	FRF_AZ_TX_IP_ID_REP_EN_WIDTH 1
+
+/* TX_PUSH_DROP_REG: Transmit push dropped register */
+#define	FR_AZ_TX_PUSH_DROP 0x00000a60
+#define	FRF_AZ_TX_PUSH_DROP_CNT_LBN 0
+#define	FRF_AZ_TX_PUSH_DROP_CNT_WIDTH 32
+
+/* TX_RESERVED_REG: Transmit configuration register */
+#define	FR_AZ_TX_RESERVED 0x00000a80
+#define	FRF_AZ_TX_EVT_CNT_LBN 121
+#define	FRF_AZ_TX_EVT_CNT_WIDTH 7
+#define	FRF_AZ_TX_PREF_AGE_CNT_LBN 119
+#define	FRF_AZ_TX_PREF_AGE_CNT_WIDTH 2
+#define	FRF_AZ_TX_RD_COMP_TMR_LBN 96
+#define	FRF_AZ_TX_RD_COMP_TMR_WIDTH 23
+#define	FRF_AZ_TX_PUSH_EN_LBN 89
+#define	FRF_AZ_TX_PUSH_EN_WIDTH 1
+#define	FRF_AZ_TX_PUSH_CHK_DIS_LBN 88
+#define	FRF_AZ_TX_PUSH_CHK_DIS_WIDTH 1
+#define	FRF_AZ_TX_D_FF_FULL_P0_LBN 85
+#define	FRF_AZ_TX_D_FF_FULL_P0_WIDTH 1
+#define	FRF_AZ_TX_DMAR_ST_P0_LBN 81
+#define	FRF_AZ_TX_DMAR_ST_P0_WIDTH 1
+#define	FRF_AZ_TX_DMAQ_ST_LBN 78
+#define	FRF_AZ_TX_DMAQ_ST_WIDTH 1
+#define	FRF_AZ_TX_RX_SPACER_LBN 64
+#define	FRF_AZ_TX_RX_SPACER_WIDTH 8
+#define	FRF_AZ_TX_DROP_ABORT_EN_LBN 60
+#define	FRF_AZ_TX_DROP_ABORT_EN_WIDTH 1
+#define	FRF_AZ_TX_SOFT_EVT_EN_LBN 59
+#define	FRF_AZ_TX_SOFT_EVT_EN_WIDTH 1
+#define	FRF_AZ_TX_PS_EVT_DIS_LBN 58
+#define	FRF_AZ_TX_PS_EVT_DIS_WIDTH 1
+#define	FRF_AZ_TX_RX_SPACER_EN_LBN 57
+#define	FRF_AZ_TX_RX_SPACER_EN_WIDTH 1
+#define	FRF_AZ_TX_XP_TIMER_LBN 52
+#define	FRF_AZ_TX_XP_TIMER_WIDTH 5
+#define	FRF_AZ_TX_PREF_SPACER_LBN 44
+#define	FRF_AZ_TX_PREF_SPACER_WIDTH 8
+#define	FRF_AZ_TX_PREF_WD_TMR_LBN 22
+#define	FRF_AZ_TX_PREF_WD_TMR_WIDTH 22
+#define	FRF_AZ_TX_ONLY1TAG_LBN 21
+#define	FRF_AZ_TX_ONLY1TAG_WIDTH 1
+#define	FRF_AZ_TX_PREF_THRESHOLD_LBN 19
+#define	FRF_AZ_TX_PREF_THRESHOLD_WIDTH 2
+#define	FRF_AZ_TX_ONE_PKT_PER_Q_LBN 18
+#define	FRF_AZ_TX_ONE_PKT_PER_Q_WIDTH 1
+#define	FRF_AZ_TX_DIS_NON_IP_EV_LBN 17
+#define	FRF_AZ_TX_DIS_NON_IP_EV_WIDTH 1
+#define	FRF_AA_TX_DMA_FF_THR_LBN 16
+#define	FRF_AA_TX_DMA_FF_THR_WIDTH 1
+#define	FRF_AZ_TX_DMA_SPACER_LBN 8
+#define	FRF_AZ_TX_DMA_SPACER_WIDTH 8
+#define	FRF_AA_TX_TCP_DIS_LBN 7
+#define	FRF_AA_TX_TCP_DIS_WIDTH 1
+#define	FRF_BZ_TX_FLUSH_MIN_LEN_EN_LBN 7
+#define	FRF_BZ_TX_FLUSH_MIN_LEN_EN_WIDTH 1
+#define	FRF_AA_TX_IP_DIS_LBN 6
+#define	FRF_AA_TX_IP_DIS_WIDTH 1
+#define	FRF_AZ_TX_MAX_CPL_LBN 2
+#define	FRF_AZ_TX_MAX_CPL_WIDTH 2
+#define	FFE_AZ_TX_MAX_CPL_16 3
+#define	FFE_AZ_TX_MAX_CPL_8 2
+#define	FFE_AZ_TX_MAX_CPL_4 1
+#define	FFE_AZ_TX_MAX_CPL_NOLIMIT 0
+#define	FRF_AZ_TX_MAX_PREF_LBN 0
+#define	FRF_AZ_TX_MAX_PREF_WIDTH 2
+#define	FFE_AZ_TX_MAX_PREF_32 3
+#define	FFE_AZ_TX_MAX_PREF_16 2
+#define	FFE_AZ_TX_MAX_PREF_8 1
+#define	FFE_AZ_TX_MAX_PREF_OFF 0
+
+/* TX_PACE_REG: Transmit pace control register */
+#define	FR_BZ_TX_PACE 0x00000a90
+#define	FRF_BZ_TX_PACE_SB_NOT_AF_LBN 19
+#define	FRF_BZ_TX_PACE_SB_NOT_AF_WIDTH 10
+#define	FRF_BZ_TX_PACE_SB_AF_LBN 9
+#define	FRF_BZ_TX_PACE_SB_AF_WIDTH 10
+#define	FRF_BZ_TX_PACE_FB_BASE_LBN 5
+#define	FRF_BZ_TX_PACE_FB_BASE_WIDTH 4
+#define	FRF_BZ_TX_PACE_BIN_TH_LBN 0
+#define	FRF_BZ_TX_PACE_BIN_TH_WIDTH 5
+
+/* TX_PACE_DROP_QID_REG: PACE Drop QID Counter */
+#define	FR_BZ_TX_PACE_DROP_QID 0x00000aa0
+#define	FRF_BZ_TX_PACE_QID_DRP_CNT_LBN 0
+#define	FRF_BZ_TX_PACE_QID_DRP_CNT_WIDTH 16
+
+/* TX_VLAN_REG: Transmit VLAN tag register */
+#define	FR_BB_TX_VLAN 0x00000ae0
+#define	FRF_BB_TX_VLAN_EN_LBN 127
+#define	FRF_BB_TX_VLAN_EN_WIDTH 1
+#define	FRF_BB_TX_VLAN7_PORT1_EN_LBN 125
+#define	FRF_BB_TX_VLAN7_PORT1_EN_WIDTH 1
+#define	FRF_BB_TX_VLAN7_PORT0_EN_LBN 124
+#define	FRF_BB_TX_VLAN7_PORT0_EN_WIDTH 1
+#define	FRF_BB_TX_VLAN7_LBN 112
+#define	FRF_BB_TX_VLAN7_WIDTH 12
+#define	FRF_BB_TX_VLAN6_PORT1_EN_LBN 109
+#define	FRF_BB_TX_VLAN6_PORT1_EN_WIDTH 1
+#define	FRF_BB_TX_VLAN6_PORT0_EN_LBN 108
+#define	FRF_BB_TX_VLAN6_PORT0_EN_WIDTH 1
+#define	FRF_BB_TX_VLAN6_LBN 96
+#define	FRF_BB_TX_VLAN6_WIDTH 12
+#define	FRF_BB_TX_VLAN5_PORT1_EN_LBN 93
+#define	FRF_BB_TX_VLAN5_PORT1_EN_WIDTH 1
+#define	FRF_BB_TX_VLAN5_PORT0_EN_LBN 92
+#define	FRF_BB_TX_VLAN5_PORT0_EN_WIDTH 1
+#define	FRF_BB_TX_VLAN5_LBN 80
+#define	FRF_BB_TX_VLAN5_WIDTH 12
+#define	FRF_BB_TX_VLAN4_PORT1_EN_LBN 77
+#define	FRF_BB_TX_VLAN4_PORT1_EN_WIDTH 1
+#define	FRF_BB_TX_VLAN4_PORT0_EN_LBN 76
+#define	FRF_BB_TX_VLAN4_PORT0_EN_WIDTH 1
+#define	FRF_BB_TX_VLAN4_LBN 64
+#define	FRF_BB_TX_VLAN4_WIDTH 12
+#define	FRF_BB_TX_VLAN3_PORT1_EN_LBN 61
+#define	FRF_BB_TX_VLAN3_PORT1_EN_WIDTH 1
+#define	FRF_BB_TX_VLAN3_PORT0_EN_LBN 60
+#define	FRF_BB_TX_VLAN3_PORT0_EN_WIDTH 1
+#define	FRF_BB_TX_VLAN3_LBN 48
+#define	FRF_BB_TX_VLAN3_WIDTH 12
+#define	FRF_BB_TX_VLAN2_PORT1_EN_LBN 45
+#define	FRF_BB_TX_VLAN2_PORT1_EN_WIDTH 1
+#define	FRF_BB_TX_VLAN2_PORT0_EN_LBN 44
+#define	FRF_BB_TX_VLAN2_PORT0_EN_WIDTH 1
+#define	FRF_BB_TX_VLAN2_LBN 32
+#define	FRF_BB_TX_VLAN2_WIDTH 12
+#define	FRF_BB_TX_VLAN1_PORT1_EN_LBN 29
+#define	FRF_BB_TX_VLAN1_PORT1_EN_WIDTH 1
+#define	FRF_BB_TX_VLAN1_PORT0_EN_LBN 28
+#define	FRF_BB_TX_VLAN1_PORT0_EN_WIDTH 1
+#define	FRF_BB_TX_VLAN1_LBN 16
+#define	FRF_BB_TX_VLAN1_WIDTH 12
+#define	FRF_BB_TX_VLAN0_PORT1_EN_LBN 13
+#define	FRF_BB_TX_VLAN0_PORT1_EN_WIDTH 1
+#define	FRF_BB_TX_VLAN0_PORT0_EN_LBN 12
+#define	FRF_BB_TX_VLAN0_PORT0_EN_WIDTH 1
+#define	FRF_BB_TX_VLAN0_LBN 0
+#define	FRF_BB_TX_VLAN0_WIDTH 12
+
+/* TX_IPFIL_PORTEN_REG: Transmit filter control register */
+#define	FR_BZ_TX_IPFIL_PORTEN 0x00000af0
+#define	FRF_BZ_TX_MADR0_FIL_EN_LBN 64
+#define	FRF_BZ_TX_MADR0_FIL_EN_WIDTH 1
+#define	FRF_BB_TX_IPFIL31_PORT_EN_LBN 62
+#define	FRF_BB_TX_IPFIL31_PORT_EN_WIDTH 1
+#define	FRF_BB_TX_IPFIL30_PORT_EN_LBN 60
+#define	FRF_BB_TX_IPFIL30_PORT_EN_WIDTH 1
+#define	FRF_BB_TX_IPFIL29_PORT_EN_LBN 58
+#define	FRF_BB_TX_IPFIL29_PORT_EN_WIDTH 1
+#define	FRF_BB_TX_IPFIL28_PORT_EN_LBN 56
+#define	FRF_BB_TX_IPFIL28_PORT_EN_WIDTH 1
+#define	FRF_BB_TX_IPFIL27_PORT_EN_LBN 54
+#define	FRF_BB_TX_IPFIL27_PORT_EN_WIDTH 1
+#define	FRF_BB_TX_IPFIL26_PORT_EN_LBN 52
+#define	FRF_BB_TX_IPFIL26_PORT_EN_WIDTH 1
+#define	FRF_BB_TX_IPFIL25_PORT_EN_LBN 50
+#define	FRF_BB_TX_IPFIL25_PORT_EN_WIDTH 1
+#define	FRF_BB_TX_IPFIL24_PORT_EN_LBN 48
+#define	FRF_BB_TX_IPFIL24_PORT_EN_WIDTH 1
+#define	FRF_BB_TX_IPFIL23_PORT_EN_LBN 46
+#define	FRF_BB_TX_IPFIL23_PORT_EN_WIDTH 1
+#define	FRF_BB_TX_IPFIL22_PORT_EN_LBN 44
+#define	FRF_BB_TX_IPFIL22_PORT_EN_WIDTH 1
+#define	FRF_BB_TX_IPFIL21_PORT_EN_LBN 42
+#define	FRF_BB_TX_IPFIL21_PORT_EN_WIDTH 1
+#define	FRF_BB_TX_IPFIL20_PORT_EN_LBN 40
+#define	FRF_BB_TX_IPFIL20_PORT_EN_WIDTH 1
+#define	FRF_BB_TX_IPFIL19_PORT_EN_LBN 38
+#define	FRF_BB_TX_IPFIL19_PORT_EN_WIDTH 1
+#define	FRF_BB_TX_IPFIL18_PORT_EN_LBN 36
+#define	FRF_BB_TX_IPFIL18_PORT_EN_WIDTH 1
+#define	FRF_BB_TX_IPFIL17_PORT_EN_LBN 34
+#define	FRF_BB_TX_IPFIL17_PORT_EN_WIDTH 1
+#define	FRF_BB_TX_IPFIL16_PORT_EN_LBN 32
+#define	FRF_BB_TX_IPFIL16_PORT_EN_WIDTH 1
+#define	FRF_BB_TX_IPFIL15_PORT_EN_LBN 30
+#define	FRF_BB_TX_IPFIL15_PORT_EN_WIDTH 1
+#define	FRF_BB_TX_IPFIL14_PORT_EN_LBN 28
+#define	FRF_BB_TX_IPFIL14_PORT_EN_WIDTH 1
+#define	FRF_BB_TX_IPFIL13_PORT_EN_LBN 26
+#define	FRF_BB_TX_IPFIL13_PORT_EN_WIDTH 1
+#define	FRF_BB_TX_IPFIL12_PORT_EN_LBN 24
+#define	FRF_BB_TX_IPFIL12_PORT_EN_WIDTH 1
+#define	FRF_BB_TX_IPFIL11_PORT_EN_LBN 22
+#define	FRF_BB_TX_IPFIL11_PORT_EN_WIDTH 1
+#define	FRF_BB_TX_IPFIL10_PORT_EN_LBN 20
+#define	FRF_BB_TX_IPFIL10_PORT_EN_WIDTH 1
+#define	FRF_BB_TX_IPFIL9_PORT_EN_LBN 18
+#define	FRF_BB_TX_IPFIL9_PORT_EN_WIDTH 1
+#define	FRF_BB_TX_IPFIL8_PORT_EN_LBN 16
+#define	FRF_BB_TX_IPFIL8_PORT_EN_WIDTH 1
+#define	FRF_BB_TX_IPFIL7_PORT_EN_LBN 14
+#define	FRF_BB_TX_IPFIL7_PORT_EN_WIDTH 1
+#define	FRF_BB_TX_IPFIL6_PORT_EN_LBN 12
+#define	FRF_BB_TX_IPFIL6_PORT_EN_WIDTH 1
+#define	FRF_BB_TX_IPFIL5_PORT_EN_LBN 10
+#define	FRF_BB_TX_IPFIL5_PORT_EN_WIDTH 1
+#define	FRF_BB_TX_IPFIL4_PORT_EN_LBN 8
+#define	FRF_BB_TX_IPFIL4_PORT_EN_WIDTH 1
+#define	FRF_BB_TX_IPFIL3_PORT_EN_LBN 6
+#define	FRF_BB_TX_IPFIL3_PORT_EN_WIDTH 1
+#define	FRF_BB_TX_IPFIL2_PORT_EN_LBN 4
+#define	FRF_BB_TX_IPFIL2_PORT_EN_WIDTH 1
+#define	FRF_BB_TX_IPFIL1_PORT_EN_LBN 2
+#define	FRF_BB_TX_IPFIL1_PORT_EN_WIDTH 1
+#define	FRF_BB_TX_IPFIL0_PORT_EN_LBN 0
+#define	FRF_BB_TX_IPFIL0_PORT_EN_WIDTH 1
+
+/* TX_IPFIL_TBL: Transmit IP source address filter table */
+#define	FR_BB_TX_IPFIL_TBL 0x00000b00
+#define	FR_BB_TX_IPFIL_TBL_STEP 16
+#define	FR_BB_TX_IPFIL_TBL_ROWS 16
+#define	FRF_BB_TX_IPFIL_MASK_1_LBN 96
+#define	FRF_BB_TX_IPFIL_MASK_1_WIDTH 32
+#define	FRF_BB_TX_IP_SRC_ADR_1_LBN 64
+#define	FRF_BB_TX_IP_SRC_ADR_1_WIDTH 32
+#define	FRF_BB_TX_IPFIL_MASK_0_LBN 32
+#define	FRF_BB_TX_IPFIL_MASK_0_WIDTH 32
+#define	FRF_BB_TX_IP_SRC_ADR_0_LBN 0
+#define	FRF_BB_TX_IP_SRC_ADR_0_WIDTH 32
+
+/* MD_TXD_REG: PHY management transmit data register */
+#define	FR_AB_MD_TXD 0x00000c00
+#define	FRF_AB_MD_TXD_LBN 0
+#define	FRF_AB_MD_TXD_WIDTH 16
+
+/* MD_RXD_REG: PHY management receive data register */
+#define	FR_AB_MD_RXD 0x00000c10
+#define	FRF_AB_MD_RXD_LBN 0
+#define	FRF_AB_MD_RXD_WIDTH 16
+
+/* MD_CS_REG: PHY management configuration & status register */
+#define	FR_AB_MD_CS 0x00000c20
+#define	FRF_AB_MD_RD_EN_CMD_LBN 15
+#define	FRF_AB_MD_RD_EN_CMD_WIDTH 1
+#define	FRF_AB_MD_WR_EN_CMD_LBN 14
+#define	FRF_AB_MD_WR_EN_CMD_WIDTH 1
+#define	FRF_AB_MD_ADDR_CMD_LBN 13
+#define	FRF_AB_MD_ADDR_CMD_WIDTH 1
+#define	FRF_AB_MD_PT_LBN 7
+#define	FRF_AB_MD_PT_WIDTH 3
+#define	FRF_AB_MD_PL_LBN 6
+#define	FRF_AB_MD_PL_WIDTH 1
+#define	FRF_AB_MD_INT_CLR_LBN 5
+#define	FRF_AB_MD_INT_CLR_WIDTH 1
+#define	FRF_AB_MD_GC_LBN 4
+#define	FRF_AB_MD_GC_WIDTH 1
+#define	FRF_AB_MD_PRSP_LBN 3
+#define	FRF_AB_MD_PRSP_WIDTH 1
+#define	FRF_AB_MD_RIC_LBN 2
+#define	FRF_AB_MD_RIC_WIDTH 1
+#define	FRF_AB_MD_RDC_LBN 1
+#define	FRF_AB_MD_RDC_WIDTH 1
+#define	FRF_AB_MD_WRC_LBN 0
+#define	FRF_AB_MD_WRC_WIDTH 1
+
+/* MD_PHY_ADR_REG: PHY management PHY address register */
+#define	FR_AB_MD_PHY_ADR 0x00000c30
+#define	FRF_AB_MD_PHY_ADR_LBN 0
+#define	FRF_AB_MD_PHY_ADR_WIDTH 16
+
+/* MD_ID_REG: PHY management ID register */
+#define	FR_AB_MD_ID 0x00000c40
+#define	FRF_AB_MD_PRT_ADR_LBN 11
+#define	FRF_AB_MD_PRT_ADR_WIDTH 5
+#define	FRF_AB_MD_DEV_ADR_LBN 6
+#define	FRF_AB_MD_DEV_ADR_WIDTH 5
+
+/* MD_STAT_REG: PHY management status & mask register */
+#define	FR_AB_MD_STAT 0x00000c50
+#define	FRF_AB_MD_PINT_LBN 4
+#define	FRF_AB_MD_PINT_WIDTH 1
+#define	FRF_AB_MD_DONE_LBN 3
+#define	FRF_AB_MD_DONE_WIDTH 1
+#define	FRF_AB_MD_BSERR_LBN 2
+#define	FRF_AB_MD_BSERR_WIDTH 1
+#define	FRF_AB_MD_LNFL_LBN 1
+#define	FRF_AB_MD_LNFL_WIDTH 1
+#define	FRF_AB_MD_BSY_LBN 0
+#define	FRF_AB_MD_BSY_WIDTH 1
+
+/* MAC_STAT_DMA_REG: Port MAC statistical counter DMA register */
+#define	FR_AB_MAC_STAT_DMA 0x00000c60
+#define	FRF_AB_MAC_STAT_DMA_CMD_LBN 48
+#define	FRF_AB_MAC_STAT_DMA_CMD_WIDTH 1
+#define	FRF_AB_MAC_STAT_DMA_ADR_LBN 0
+#define	FRF_AB_MAC_STAT_DMA_ADR_WIDTH 48
+
+/* MAC_CTRL_REG: Port MAC control register */
+#define	FR_AB_MAC_CTRL 0x00000c80
+#define	FRF_AB_MAC_XOFF_VAL_LBN 16
+#define	FRF_AB_MAC_XOFF_VAL_WIDTH 16
+#define	FRF_BB_TXFIFO_DRAIN_EN_LBN 7
+#define	FRF_BB_TXFIFO_DRAIN_EN_WIDTH 1
+#define	FRF_AB_MAC_XG_DISTXCRC_LBN 5
+#define	FRF_AB_MAC_XG_DISTXCRC_WIDTH 1
+#define	FRF_AB_MAC_BCAD_ACPT_LBN 4
+#define	FRF_AB_MAC_BCAD_ACPT_WIDTH 1
+#define	FRF_AB_MAC_UC_PROM_LBN 3
+#define	FRF_AB_MAC_UC_PROM_WIDTH 1
+#define	FRF_AB_MAC_LINK_STATUS_LBN 2
+#define	FRF_AB_MAC_LINK_STATUS_WIDTH 1
+#define	FRF_AB_MAC_SPEED_LBN 0
+#define	FRF_AB_MAC_SPEED_WIDTH 2
+#define	FFE_AB_MAC_SPEED_10G 3
+#define	FFE_AB_MAC_SPEED_1G 2
+#define	FFE_AB_MAC_SPEED_100M 1
+#define	FFE_AB_MAC_SPEED_10M 0
+
+/* GEN_MODE_REG: General Purpose mode register (external interrupt mask) */
+#define	FR_BB_GEN_MODE 0x00000c90
+#define	FRF_BB_XFP_PHY_INT_POL_SEL_LBN 3
+#define	FRF_BB_XFP_PHY_INT_POL_SEL_WIDTH 1
+#define	FRF_BB_XG_PHY_INT_POL_SEL_LBN 2
+#define	FRF_BB_XG_PHY_INT_POL_SEL_WIDTH 1
+#define	FRF_BB_XFP_PHY_INT_MASK_LBN 1
+#define	FRF_BB_XFP_PHY_INT_MASK_WIDTH 1
+#define	FRF_BB_XG_PHY_INT_MASK_LBN 0
+#define	FRF_BB_XG_PHY_INT_MASK_WIDTH 1
+
+/* MAC_MC_HASH_REG0: Multicast address hash table */
+#define	FR_AB_MAC_MC_HASH_REG0 0x00000ca0
+#define	FRF_AB_MAC_MCAST_HASH0_LBN 0
+#define	FRF_AB_MAC_MCAST_HASH0_WIDTH 128
+
+/* MAC_MC_HASH_REG1: Multicast address hash table */
+#define	FR_AB_MAC_MC_HASH_REG1 0x00000cb0
+#define	FRF_AB_MAC_MCAST_HASH1_LBN 0
+#define	FRF_AB_MAC_MCAST_HASH1_WIDTH 128
+
+/* GM_CFG1_REG: GMAC configuration register 1 */
+#define	FR_AB_GM_CFG1 0x00000e00
+#define	FRF_AB_GM_SW_RST_LBN 31
+#define	FRF_AB_GM_SW_RST_WIDTH 1
+#define	FRF_AB_GM_SIM_RST_LBN 30
+#define	FRF_AB_GM_SIM_RST_WIDTH 1
+#define	FRF_AB_GM_RST_RX_MAC_CTL_LBN 19
+#define	FRF_AB_GM_RST_RX_MAC_CTL_WIDTH 1
+#define	FRF_AB_GM_RST_TX_MAC_CTL_LBN 18
+#define	FRF_AB_GM_RST_TX_MAC_CTL_WIDTH 1
+#define	FRF_AB_GM_RST_RX_FUNC_LBN 17
+#define	FRF_AB_GM_RST_RX_FUNC_WIDTH 1
+#define	FRF_AB_GM_RST_TX_FUNC_LBN 16
+#define	FRF_AB_GM_RST_TX_FUNC_WIDTH 1
+#define	FRF_AB_GM_LOOP_LBN 8
+#define	FRF_AB_GM_LOOP_WIDTH 1
+#define	FRF_AB_GM_RX_FC_EN_LBN 5
+#define	FRF_AB_GM_RX_FC_EN_WIDTH 1
+#define	FRF_AB_GM_TX_FC_EN_LBN 4
+#define	FRF_AB_GM_TX_FC_EN_WIDTH 1
+#define	FRF_AB_GM_SYNC_RXEN_LBN 3
+#define	FRF_AB_GM_SYNC_RXEN_WIDTH 1
+#define	FRF_AB_GM_RX_EN_LBN 2
+#define	FRF_AB_GM_RX_EN_WIDTH 1
+#define	FRF_AB_GM_SYNC_TXEN_LBN 1
+#define	FRF_AB_GM_SYNC_TXEN_WIDTH 1
+#define	FRF_AB_GM_TX_EN_LBN 0
+#define	FRF_AB_GM_TX_EN_WIDTH 1
+
+/* GM_CFG2_REG: GMAC configuration register 2 */
+#define	FR_AB_GM_CFG2 0x00000e10
+#define	FRF_AB_GM_PAMBL_LEN_LBN 12
+#define	FRF_AB_GM_PAMBL_LEN_WIDTH 4
+#define	FRF_AB_GM_IF_MODE_LBN 8
+#define	FRF_AB_GM_IF_MODE_WIDTH 2
+#define	FFE_AB_IF_MODE_BYTE_MODE 2
+#define	FFE_AB_IF_MODE_NIBBLE_MODE 1
+#define	FRF_AB_GM_HUGE_FRM_EN_LBN 5
+#define	FRF_AB_GM_HUGE_FRM_EN_WIDTH 1
+#define	FRF_AB_GM_LEN_CHK_LBN 4
+#define	FRF_AB_GM_LEN_CHK_WIDTH 1
+#define	FRF_AB_GM_PAD_CRC_EN_LBN 2
+#define	FRF_AB_GM_PAD_CRC_EN_WIDTH 1
+#define	FRF_AB_GM_CRC_EN_LBN 1
+#define	FRF_AB_GM_CRC_EN_WIDTH 1
+#define	FRF_AB_GM_FD_LBN 0
+#define	FRF_AB_GM_FD_WIDTH 1
+
+/* GM_IPG_REG: GMAC IPG register */
+#define	FR_AB_GM_IPG 0x00000e20
+#define	FRF_AB_GM_NONB2B_IPG1_LBN 24
+#define	FRF_AB_GM_NONB2B_IPG1_WIDTH 7
+#define	FRF_AB_GM_NONB2B_IPG2_LBN 16
+#define	FRF_AB_GM_NONB2B_IPG2_WIDTH 7
+#define	FRF_AB_GM_MIN_IPG_ENF_LBN 8
+#define	FRF_AB_GM_MIN_IPG_ENF_WIDTH 8
+#define	FRF_AB_GM_B2B_IPG_LBN 0
+#define	FRF_AB_GM_B2B_IPG_WIDTH 7
+
+/* GM_HD_REG: GMAC half duplex register */
+#define	FR_AB_GM_HD 0x00000e30
+#define	FRF_AB_GM_ALT_BOFF_VAL_LBN 20
+#define	FRF_AB_GM_ALT_BOFF_VAL_WIDTH 4
+#define	FRF_AB_GM_ALT_BOFF_EN_LBN 19
+#define	FRF_AB_GM_ALT_BOFF_EN_WIDTH 1
+#define	FRF_AB_GM_BP_NO_BOFF_LBN 18
+#define	FRF_AB_GM_BP_NO_BOFF_WIDTH 1
+#define	FRF_AB_GM_DIS_BOFF_LBN 17
+#define	FRF_AB_GM_DIS_BOFF_WIDTH 1
+#define	FRF_AB_GM_EXDEF_TX_EN_LBN 16
+#define	FRF_AB_GM_EXDEF_TX_EN_WIDTH 1
+#define	FRF_AB_GM_RTRY_LIMIT_LBN 12
+#define	FRF_AB_GM_RTRY_LIMIT_WIDTH 4
+#define	FRF_AB_GM_COL_WIN_LBN 0
+#define	FRF_AB_GM_COL_WIN_WIDTH 10
+
+/* GM_MAX_FLEN_REG: GMAC maximum frame length register */
+#define	FR_AB_GM_MAX_FLEN 0x00000e40
+#define	FRF_AB_GM_MAX_FLEN_LBN 0
+#define	FRF_AB_GM_MAX_FLEN_WIDTH 16
+
+/* GM_TEST_REG: GMAC test register */
+#define	FR_AB_GM_TEST 0x00000e70
+#define	FRF_AB_GM_MAX_BOFF_LBN 3
+#define	FRF_AB_GM_MAX_BOFF_WIDTH 1
+#define	FRF_AB_GM_REG_TX_FLOW_EN_LBN 2
+#define	FRF_AB_GM_REG_TX_FLOW_EN_WIDTH 1
+#define	FRF_AB_GM_TEST_PAUSE_LBN 1
+#define	FRF_AB_GM_TEST_PAUSE_WIDTH 1
+#define	FRF_AB_GM_SHORT_SLOT_LBN 0
+#define	FRF_AB_GM_SHORT_SLOT_WIDTH 1
+
+/* GM_ADR1_REG: GMAC station address register 1 */
+#define	FR_AB_GM_ADR1 0x00000f00
+#define	FRF_AB_GM_ADR_B0_LBN 24
+#define	FRF_AB_GM_ADR_B0_WIDTH 8
+#define	FRF_AB_GM_ADR_B1_LBN 16
+#define	FRF_AB_GM_ADR_B1_WIDTH 8
+#define	FRF_AB_GM_ADR_B2_LBN 8
+#define	FRF_AB_GM_ADR_B2_WIDTH 8
+#define	FRF_AB_GM_ADR_B3_LBN 0
+#define	FRF_AB_GM_ADR_B3_WIDTH 8
+
+/* GM_ADR2_REG: GMAC station address register 2 */
+#define	FR_AB_GM_ADR2 0x00000f10
+#define	FRF_AB_GM_ADR_B4_LBN 24
+#define	FRF_AB_GM_ADR_B4_WIDTH 8
+#define	FRF_AB_GM_ADR_B5_LBN 16
+#define	FRF_AB_GM_ADR_B5_WIDTH 8
+
+/* GMF_CFG0_REG: GMAC FIFO configuration register 0 */
+#define	FR_AB_GMF_CFG0 0x00000f20
+#define	FRF_AB_GMF_FTFENRPLY_LBN 20
+#define	FRF_AB_GMF_FTFENRPLY_WIDTH 1
+#define	FRF_AB_GMF_STFENRPLY_LBN 19
+#define	FRF_AB_GMF_STFENRPLY_WIDTH 1
+#define	FRF_AB_GMF_FRFENRPLY_LBN 18
+#define	FRF_AB_GMF_FRFENRPLY_WIDTH 1
+#define	FRF_AB_GMF_SRFENRPLY_LBN 17
+#define	FRF_AB_GMF_SRFENRPLY_WIDTH 1
+#define	FRF_AB_GMF_WTMENRPLY_LBN 16
+#define	FRF_AB_GMF_WTMENRPLY_WIDTH 1
+#define	FRF_AB_GMF_FTFENREQ_LBN 12
+#define	FRF_AB_GMF_FTFENREQ_WIDTH 1
+#define	FRF_AB_GMF_STFENREQ_LBN 11
+#define	FRF_AB_GMF_STFENREQ_WIDTH 1
+#define	FRF_AB_GMF_FRFENREQ_LBN 10
+#define	FRF_AB_GMF_FRFENREQ_WIDTH 1
+#define	FRF_AB_GMF_SRFENREQ_LBN 9
+#define	FRF_AB_GMF_SRFENREQ_WIDTH 1
+#define	FRF_AB_GMF_WTMENREQ_LBN 8
+#define	FRF_AB_GMF_WTMENREQ_WIDTH 1
+#define	FRF_AB_GMF_HSTRSTFT_LBN 4
+#define	FRF_AB_GMF_HSTRSTFT_WIDTH 1
+#define	FRF_AB_GMF_HSTRSTST_LBN 3
+#define	FRF_AB_GMF_HSTRSTST_WIDTH 1
+#define	FRF_AB_GMF_HSTRSTFR_LBN 2
+#define	FRF_AB_GMF_HSTRSTFR_WIDTH 1
+#define	FRF_AB_GMF_HSTRSTSR_LBN 1
+#define	FRF_AB_GMF_HSTRSTSR_WIDTH 1
+#define	FRF_AB_GMF_HSTRSTWT_LBN 0
+#define	FRF_AB_GMF_HSTRSTWT_WIDTH 1
+
+/* GMF_CFG1_REG: GMAC FIFO configuration register 1 */
+#define	FR_AB_GMF_CFG1 0x00000f30
+#define	FRF_AB_GMF_CFGFRTH_LBN 16
+#define	FRF_AB_GMF_CFGFRTH_WIDTH 5
+#define	FRF_AB_GMF_CFGXOFFRTX_LBN 0
+#define	FRF_AB_GMF_CFGXOFFRTX_WIDTH 16
+
+/* GMF_CFG2_REG: GMAC FIFO configuration register 2 */
+#define	FR_AB_GMF_CFG2 0x00000f40
+#define	FRF_AB_GMF_CFGHWM_LBN 16
+#define	FRF_AB_GMF_CFGHWM_WIDTH 6
+#define	FRF_AB_GMF_CFGLWM_LBN 0
+#define	FRF_AB_GMF_CFGLWM_WIDTH 6
+
+/* GMF_CFG3_REG: GMAC FIFO configuration register 3 */
+#define	FR_AB_GMF_CFG3 0x00000f50
+#define	FRF_AB_GMF_CFGHWMFT_LBN 16
+#define	FRF_AB_GMF_CFGHWMFT_WIDTH 6
+#define	FRF_AB_GMF_CFGFTTH_LBN 0
+#define	FRF_AB_GMF_CFGFTTH_WIDTH 6
+
+/* GMF_CFG4_REG: GMAC FIFO configuration register 4 */
+#define	FR_AB_GMF_CFG4 0x00000f60
+#define	FRF_AB_GMF_HSTFLTRFRM_LBN 0
+#define	FRF_AB_GMF_HSTFLTRFRM_WIDTH 18
+
+/* GMF_CFG5_REG: GMAC FIFO configuration register 5 */
+#define	FR_AB_GMF_CFG5 0x00000f70
+#define	FRF_AB_GMF_CFGHDPLX_LBN 22
+#define	FRF_AB_GMF_CFGHDPLX_WIDTH 1
+#define	FRF_AB_GMF_SRFULL_LBN 21
+#define	FRF_AB_GMF_SRFULL_WIDTH 1
+#define	FRF_AB_GMF_HSTSRFULLCLR_LBN 20
+#define	FRF_AB_GMF_HSTSRFULLCLR_WIDTH 1
+#define	FRF_AB_GMF_CFGBYTMODE_LBN 19
+#define	FRF_AB_GMF_CFGBYTMODE_WIDTH 1
+#define	FRF_AB_GMF_HSTDRPLT64_LBN 18
+#define	FRF_AB_GMF_HSTDRPLT64_WIDTH 1
+#define	FRF_AB_GMF_HSTFLTRFRMDC_LBN 0
+#define	FRF_AB_GMF_HSTFLTRFRMDC_WIDTH 18
+
+/* TX_SRC_MAC_TBL: Transmit IP source address filter table */
+#define	FR_BB_TX_SRC_MAC_TBL 0x00001000
+#define	FR_BB_TX_SRC_MAC_TBL_STEP 16
+#define	FR_BB_TX_SRC_MAC_TBL_ROWS 16
+#define	FRF_BB_TX_SRC_MAC_ADR_1_LBN 64
+#define	FRF_BB_TX_SRC_MAC_ADR_1_WIDTH 48
+#define	FRF_BB_TX_SRC_MAC_ADR_0_LBN 0
+#define	FRF_BB_TX_SRC_MAC_ADR_0_WIDTH 48
+
+/* TX_SRC_MAC_CTL_REG: Transmit MAC source address filter control */
+#define	FR_BB_TX_SRC_MAC_CTL 0x00001100
+#define	FRF_BB_TX_SRC_DROP_CTR_LBN 16
+#define	FRF_BB_TX_SRC_DROP_CTR_WIDTH 16
+#define	FRF_BB_TX_SRC_FLTR_EN_LBN 15
+#define	FRF_BB_TX_SRC_FLTR_EN_WIDTH 1
+#define	FRF_BB_TX_DROP_CTR_CLR_LBN 12
+#define	FRF_BB_TX_DROP_CTR_CLR_WIDTH 1
+#define	FRF_BB_TX_MAC_QID_SEL_LBN 0
+#define	FRF_BB_TX_MAC_QID_SEL_WIDTH 3
+
+/* XM_ADR_LO_REG: XGMAC address register low */
+#define	FR_AB_XM_ADR_LO 0x00001200
+#define	FRF_AB_XM_ADR_LO_LBN 0
+#define	FRF_AB_XM_ADR_LO_WIDTH 32
+
+/* XM_ADR_HI_REG: XGMAC address register high */
+#define	FR_AB_XM_ADR_HI 0x00001210
+#define	FRF_AB_XM_ADR_HI_LBN 0
+#define	FRF_AB_XM_ADR_HI_WIDTH 16
+
+/* XM_GLB_CFG_REG: XGMAC global configuration */
+#define	FR_AB_XM_GLB_CFG 0x00001220
+#define	FRF_AB_XM_RMTFLT_GEN_LBN 17
+#define	FRF_AB_XM_RMTFLT_GEN_WIDTH 1
+#define	FRF_AB_XM_DEBUG_MODE_LBN 16
+#define	FRF_AB_XM_DEBUG_MODE_WIDTH 1
+#define	FRF_AB_XM_RX_STAT_EN_LBN 11
+#define	FRF_AB_XM_RX_STAT_EN_WIDTH 1
+#define	FRF_AB_XM_TX_STAT_EN_LBN 10
+#define	FRF_AB_XM_TX_STAT_EN_WIDTH 1
+#define	FRF_AB_XM_RX_JUMBO_MODE_LBN 6
+#define	FRF_AB_XM_RX_JUMBO_MODE_WIDTH 1
+#define	FRF_AB_XM_WAN_MODE_LBN 5
+#define	FRF_AB_XM_WAN_MODE_WIDTH 1
+#define	FRF_AB_XM_INTCLR_MODE_LBN 3
+#define	FRF_AB_XM_INTCLR_MODE_WIDTH 1
+#define	FRF_AB_XM_CORE_RST_LBN 0
+#define	FRF_AB_XM_CORE_RST_WIDTH 1
+
+/* XM_TX_CFG_REG: XGMAC transmit configuration */
+#define	FR_AB_XM_TX_CFG 0x00001230
+#define	FRF_AB_XM_TX_PROG_LBN 24
+#define	FRF_AB_XM_TX_PROG_WIDTH 1
+#define	FRF_AB_XM_IPG_LBN 16
+#define	FRF_AB_XM_IPG_WIDTH 4
+#define	FRF_AB_XM_FCNTL_LBN 10
+#define	FRF_AB_XM_FCNTL_WIDTH 1
+#define	FRF_AB_XM_TXCRC_LBN 8
+#define	FRF_AB_XM_TXCRC_WIDTH 1
+#define	FRF_AB_XM_EDRC_LBN 6
+#define	FRF_AB_XM_EDRC_WIDTH 1
+#define	FRF_AB_XM_AUTO_PAD_LBN 5
+#define	FRF_AB_XM_AUTO_PAD_WIDTH 1
+#define	FRF_AB_XM_TX_PRMBL_LBN 2
+#define	FRF_AB_XM_TX_PRMBL_WIDTH 1
+#define	FRF_AB_XM_TXEN_LBN 1
+#define	FRF_AB_XM_TXEN_WIDTH 1
+#define	FRF_AB_XM_TX_RST_LBN 0
+#define	FRF_AB_XM_TX_RST_WIDTH 1
+
+/* XM_RX_CFG_REG: XGMAC receive configuration */
+#define	FR_AB_XM_RX_CFG 0x00001240
+#define	FRF_AB_XM_PASS_LENERR_LBN 26
+#define	FRF_AB_XM_PASS_LENERR_WIDTH 1
+#define	FRF_AB_XM_PASS_CRC_ERR_LBN 25
+#define	FRF_AB_XM_PASS_CRC_ERR_WIDTH 1
+#define	FRF_AB_XM_PASS_PRMBLE_ERR_LBN 24
+#define	FRF_AB_XM_PASS_PRMBLE_ERR_WIDTH 1
+#define	FRF_AB_XM_REJ_BCAST_LBN 20
+#define	FRF_AB_XM_REJ_BCAST_WIDTH 1
+#define	FRF_AB_XM_ACPT_ALL_MCAST_LBN 11
+#define	FRF_AB_XM_ACPT_ALL_MCAST_WIDTH 1
+#define	FRF_AB_XM_ACPT_ALL_UCAST_LBN 9
+#define	FRF_AB_XM_ACPT_ALL_UCAST_WIDTH 1
+#define	FRF_AB_XM_AUTO_DEPAD_LBN 8
+#define	FRF_AB_XM_AUTO_DEPAD_WIDTH 1
+#define	FRF_AB_XM_RXCRC_LBN 3
+#define	FRF_AB_XM_RXCRC_WIDTH 1
+#define	FRF_AB_XM_RX_PRMBL_LBN 2
+#define	FRF_AB_XM_RX_PRMBL_WIDTH 1
+#define	FRF_AB_XM_RXEN_LBN 1
+#define	FRF_AB_XM_RXEN_WIDTH 1
+#define	FRF_AB_XM_RX_RST_LBN 0
+#define	FRF_AB_XM_RX_RST_WIDTH 1
+
+/* XM_MGT_INT_MASK: documentation to be written for sum_XM_MGT_INT_MASK */
+#define	FR_AB_XM_MGT_INT_MASK 0x00001250
+#define	FRF_AB_XM_MSK_STA_INTR_LBN 16
+#define	FRF_AB_XM_MSK_STA_INTR_WIDTH 1
+#define	FRF_AB_XM_MSK_STAT_CNTR_HF_LBN 9
+#define	FRF_AB_XM_MSK_STAT_CNTR_HF_WIDTH 1
+#define	FRF_AB_XM_MSK_STAT_CNTR_OF_LBN 8
+#define	FRF_AB_XM_MSK_STAT_CNTR_OF_WIDTH 1
+#define	FRF_AB_XM_MSK_PRMBLE_ERR_LBN 2
+#define	FRF_AB_XM_MSK_PRMBLE_ERR_WIDTH 1
+#define	FRF_AB_XM_MSK_RMTFLT_LBN 1
+#define	FRF_AB_XM_MSK_RMTFLT_WIDTH 1
+#define	FRF_AB_XM_MSK_LCLFLT_LBN 0
+#define	FRF_AB_XM_MSK_LCLFLT_WIDTH 1
+
+/* XM_FC_REG: XGMAC flow control register */
+#define	FR_AB_XM_FC 0x00001270
+#define	FRF_AB_XM_PAUSE_TIME_LBN 16
+#define	FRF_AB_XM_PAUSE_TIME_WIDTH 16
+#define	FRF_AB_XM_RX_MAC_STAT_LBN 11
+#define	FRF_AB_XM_RX_MAC_STAT_WIDTH 1
+#define	FRF_AB_XM_TX_MAC_STAT_LBN 10
+#define	FRF_AB_XM_TX_MAC_STAT_WIDTH 1
+#define	FRF_AB_XM_MCNTL_PASS_LBN 8
+#define	FRF_AB_XM_MCNTL_PASS_WIDTH 2
+#define	FRF_AB_XM_REJ_CNTL_UCAST_LBN 6
+#define	FRF_AB_XM_REJ_CNTL_UCAST_WIDTH 1
+#define	FRF_AB_XM_REJ_CNTL_MCAST_LBN 5
+#define	FRF_AB_XM_REJ_CNTL_MCAST_WIDTH 1
+#define	FRF_AB_XM_ZPAUSE_LBN 2
+#define	FRF_AB_XM_ZPAUSE_WIDTH 1
+#define	FRF_AB_XM_XMIT_PAUSE_LBN 1
+#define	FRF_AB_XM_XMIT_PAUSE_WIDTH 1
+#define	FRF_AB_XM_DIS_FCNTL_LBN 0
+#define	FRF_AB_XM_DIS_FCNTL_WIDTH 1
+
+/* XM_PAUSE_TIME_REG: XGMAC pause time register */
+#define	FR_AB_XM_PAUSE_TIME 0x00001290
+#define	FRF_AB_XM_TX_PAUSE_CNT_LBN 16
+#define	FRF_AB_XM_TX_PAUSE_CNT_WIDTH 16
+#define	FRF_AB_XM_RX_PAUSE_CNT_LBN 0
+#define	FRF_AB_XM_RX_PAUSE_CNT_WIDTH 16
+
+/* XM_TX_PARAM_REG: XGMAC transmit parameter register */
+#define	FR_AB_XM_TX_PARAM 0x000012d0
+#define	FRF_AB_XM_TX_JUMBO_MODE_LBN 31
+#define	FRF_AB_XM_TX_JUMBO_MODE_WIDTH 1
+#define	FRF_AB_XM_MAX_TX_FRM_SIZE_HI_LBN 19
+#define	FRF_AB_XM_MAX_TX_FRM_SIZE_HI_WIDTH 11
+#define	FRF_AB_XM_MAX_TX_FRM_SIZE_LO_LBN 16
+#define	FRF_AB_XM_MAX_TX_FRM_SIZE_LO_WIDTH 3
+#define	FRF_AB_XM_PAD_CHAR_LBN 0
+#define	FRF_AB_XM_PAD_CHAR_WIDTH 8
+
+/* XM_RX_PARAM_REG: XGMAC receive parameter register */
+#define	FR_AB_XM_RX_PARAM 0x000012e0
+#define	FRF_AB_XM_MAX_RX_FRM_SIZE_HI_LBN 3
+#define	FRF_AB_XM_MAX_RX_FRM_SIZE_HI_WIDTH 11
+#define	FRF_AB_XM_MAX_RX_FRM_SIZE_LO_LBN 0
+#define	FRF_AB_XM_MAX_RX_FRM_SIZE_LO_WIDTH 3
+
+/* XM_MGT_INT_MSK_REG: XGMAC management interrupt mask register */
+#define	FR_AB_XM_MGT_INT_MSK 0x000012f0
+#define	FRF_AB_XM_STAT_CNTR_OF_LBN 9
+#define	FRF_AB_XM_STAT_CNTR_OF_WIDTH 1
+#define	FRF_AB_XM_STAT_CNTR_HF_LBN 8
+#define	FRF_AB_XM_STAT_CNTR_HF_WIDTH 1
+#define	FRF_AB_XM_PRMBLE_ERR_LBN 2
+#define	FRF_AB_XM_PRMBLE_ERR_WIDTH 1
+#define	FRF_AB_XM_RMTFLT_LBN 1
+#define	FRF_AB_XM_RMTFLT_WIDTH 1
+#define	FRF_AB_XM_LCLFLT_LBN 0
+#define	FRF_AB_XM_LCLFLT_WIDTH 1
+
+/* XX_PWR_RST_REG: XGXS/XAUI powerdown/reset register */
+#define	FR_AB_XX_PWR_RST 0x00001300
+#define	FRF_AB_XX_PWRDND_SIG_LBN 31
+#define	FRF_AB_XX_PWRDND_SIG_WIDTH 1
+#define	FRF_AB_XX_PWRDNC_SIG_LBN 30
+#define	FRF_AB_XX_PWRDNC_SIG_WIDTH 1
+#define	FRF_AB_XX_PWRDNB_SIG_LBN 29
+#define	FRF_AB_XX_PWRDNB_SIG_WIDTH 1
+#define	FRF_AB_XX_PWRDNA_SIG_LBN 28
+#define	FRF_AB_XX_PWRDNA_SIG_WIDTH 1
+#define	FRF_AB_XX_SIM_MODE_LBN 27
+#define	FRF_AB_XX_SIM_MODE_WIDTH 1
+#define	FRF_AB_XX_RSTPLLCD_SIG_LBN 25
+#define	FRF_AB_XX_RSTPLLCD_SIG_WIDTH 1
+#define	FRF_AB_XX_RSTPLLAB_SIG_LBN 24
+#define	FRF_AB_XX_RSTPLLAB_SIG_WIDTH 1
+#define	FRF_AB_XX_RESETD_SIG_LBN 23
+#define	FRF_AB_XX_RESETD_SIG_WIDTH 1
+#define	FRF_AB_XX_RESETC_SIG_LBN 22
+#define	FRF_AB_XX_RESETC_SIG_WIDTH 1
+#define	FRF_AB_XX_RESETB_SIG_LBN 21
+#define	FRF_AB_XX_RESETB_SIG_WIDTH 1
+#define	FRF_AB_XX_RESETA_SIG_LBN 20
+#define	FRF_AB_XX_RESETA_SIG_WIDTH 1
+#define	FRF_AB_XX_RSTXGXSRX_SIG_LBN 18
+#define	FRF_AB_XX_RSTXGXSRX_SIG_WIDTH 1
+#define	FRF_AB_XX_RSTXGXSTX_SIG_LBN 17
+#define	FRF_AB_XX_RSTXGXSTX_SIG_WIDTH 1
+#define	FRF_AB_XX_SD_RST_ACT_LBN 16
+#define	FRF_AB_XX_SD_RST_ACT_WIDTH 1
+#define	FRF_AB_XX_PWRDND_EN_LBN 15
+#define	FRF_AB_XX_PWRDND_EN_WIDTH 1
+#define	FRF_AB_XX_PWRDNC_EN_LBN 14
+#define	FRF_AB_XX_PWRDNC_EN_WIDTH 1
+#define	FRF_AB_XX_PWRDNB_EN_LBN 13
+#define	FRF_AB_XX_PWRDNB_EN_WIDTH 1
+#define	FRF_AB_XX_PWRDNA_EN_LBN 12
+#define	FRF_AB_XX_PWRDNA_EN_WIDTH 1
+#define	FRF_AB_XX_RSTPLLCD_EN_LBN 9
+#define	FRF_AB_XX_RSTPLLCD_EN_WIDTH 1
+#define	FRF_AB_XX_RSTPLLAB_EN_LBN 8
+#define	FRF_AB_XX_RSTPLLAB_EN_WIDTH 1
+#define	FRF_AB_XX_RESETD_EN_LBN 7
+#define	FRF_AB_XX_RESETD_EN_WIDTH 1
+#define	FRF_AB_XX_RESETC_EN_LBN 6
+#define	FRF_AB_XX_RESETC_EN_WIDTH 1
+#define	FRF_AB_XX_RESETB_EN_LBN 5
+#define	FRF_AB_XX_RESETB_EN_WIDTH 1
+#define	FRF_AB_XX_RESETA_EN_LBN 4
+#define	FRF_AB_XX_RESETA_EN_WIDTH 1
+#define	FRF_AB_XX_RSTXGXSRX_EN_LBN 2
+#define	FRF_AB_XX_RSTXGXSRX_EN_WIDTH 1
+#define	FRF_AB_XX_RSTXGXSTX_EN_LBN 1
+#define	FRF_AB_XX_RSTXGXSTX_EN_WIDTH 1
+#define	FRF_AB_XX_RST_XX_EN_LBN 0
+#define	FRF_AB_XX_RST_XX_EN_WIDTH 1
+
+/* XX_SD_CTL_REG: XGXS/XAUI powerdown/reset control register */
+#define	FR_AB_XX_SD_CTL 0x00001310
+#define	FRF_AB_XX_TERMADJ1_LBN 17
+#define	FRF_AB_XX_TERMADJ1_WIDTH 1
+#define	FRF_AB_XX_TERMADJ0_LBN 16
+#define	FRF_AB_XX_TERMADJ0_WIDTH 1
+#define	FRF_AB_XX_HIDRVD_LBN 15
+#define	FRF_AB_XX_HIDRVD_WIDTH 1
+#define	FRF_AB_XX_LODRVD_LBN 14
+#define	FRF_AB_XX_LODRVD_WIDTH 1
+#define	FRF_AB_XX_HIDRVC_LBN 13
+#define	FRF_AB_XX_HIDRVC_WIDTH 1
+#define	FRF_AB_XX_LODRVC_LBN 12
+#define	FRF_AB_XX_LODRVC_WIDTH 1
+#define	FRF_AB_XX_HIDRVB_LBN 11
+#define	FRF_AB_XX_HIDRVB_WIDTH 1
+#define	FRF_AB_XX_LODRVB_LBN 10
+#define	FRF_AB_XX_LODRVB_WIDTH 1
+#define	FRF_AB_XX_HIDRVA_LBN 9
+#define	FRF_AB_XX_HIDRVA_WIDTH 1
+#define	FRF_AB_XX_LODRVA_LBN 8
+#define	FRF_AB_XX_LODRVA_WIDTH 1
+#define	FRF_AB_XX_LPBKD_LBN 3
+#define	FRF_AB_XX_LPBKD_WIDTH 1
+#define	FRF_AB_XX_LPBKC_LBN 2
+#define	FRF_AB_XX_LPBKC_WIDTH 1
+#define	FRF_AB_XX_LPBKB_LBN 1
+#define	FRF_AB_XX_LPBKB_WIDTH 1
+#define	FRF_AB_XX_LPBKA_LBN 0
+#define	FRF_AB_XX_LPBKA_WIDTH 1
+
+/* XX_TXDRV_CTL_REG: XAUI SerDes transmit drive control register */
+#define	FR_AB_XX_TXDRV_CTL 0x00001320
+#define	FRF_AB_XX_DEQD_LBN 28
+#define	FRF_AB_XX_DEQD_WIDTH 4
+#define	FRF_AB_XX_DEQC_LBN 24
+#define	FRF_AB_XX_DEQC_WIDTH 4
+#define	FRF_AB_XX_DEQB_LBN 20
+#define	FRF_AB_XX_DEQB_WIDTH 4
+#define	FRF_AB_XX_DEQA_LBN 16
+#define	FRF_AB_XX_DEQA_WIDTH 4
+#define	FRF_AB_XX_DTXD_LBN 12
+#define	FRF_AB_XX_DTXD_WIDTH 4
+#define	FRF_AB_XX_DTXC_LBN 8
+#define	FRF_AB_XX_DTXC_WIDTH 4
+#define	FRF_AB_XX_DTXB_LBN 4
+#define	FRF_AB_XX_DTXB_WIDTH 4
+#define	FRF_AB_XX_DTXA_LBN 0
+#define	FRF_AB_XX_DTXA_WIDTH 4
+
+/* XX_PRBS_CTL_REG: documentation to be written for sum_XX_PRBS_CTL_REG */
+#define	FR_AB_XX_PRBS_CTL 0x00001330
+#define	FRF_AB_XX_CH3_RX_PRBS_SEL_LBN 30
+#define	FRF_AB_XX_CH3_RX_PRBS_SEL_WIDTH 2
+#define	FRF_AB_XX_CH3_RX_PRBS_INV_LBN 29
+#define	FRF_AB_XX_CH3_RX_PRBS_INV_WIDTH 1
+#define	FRF_AB_XX_CH3_RX_PRBS_CHKEN_LBN 28
+#define	FRF_AB_XX_CH3_RX_PRBS_CHKEN_WIDTH 1
+#define	FRF_AB_XX_CH2_RX_PRBS_SEL_LBN 26
+#define	FRF_AB_XX_CH2_RX_PRBS_SEL_WIDTH 2
+#define	FRF_AB_XX_CH2_RX_PRBS_INV_LBN 25
+#define	FRF_AB_XX_CH2_RX_PRBS_INV_WIDTH 1
+#define	FRF_AB_XX_CH2_RX_PRBS_CHKEN_LBN 24
+#define	FRF_AB_XX_CH2_RX_PRBS_CHKEN_WIDTH 1
+#define	FRF_AB_XX_CH1_RX_PRBS_SEL_LBN 22
+#define	FRF_AB_XX_CH1_RX_PRBS_SEL_WIDTH 2
+#define	FRF_AB_XX_CH1_RX_PRBS_INV_LBN 21
+#define	FRF_AB_XX_CH1_RX_PRBS_INV_WIDTH 1
+#define	FRF_AB_XX_CH1_RX_PRBS_CHKEN_LBN 20
+#define	FRF_AB_XX_CH1_RX_PRBS_CHKEN_WIDTH 1
+#define	FRF_AB_XX_CH0_RX_PRBS_SEL_LBN 18
+#define	FRF_AB_XX_CH0_RX_PRBS_SEL_WIDTH 2
+#define	FRF_AB_XX_CH0_RX_PRBS_INV_LBN 17
+#define	FRF_AB_XX_CH0_RX_PRBS_INV_WIDTH 1
+#define	FRF_AB_XX_CH0_RX_PRBS_CHKEN_LBN 16
+#define	FRF_AB_XX_CH0_RX_PRBS_CHKEN_WIDTH 1
+#define	FRF_AB_XX_CH3_TX_PRBS_SEL_LBN 14
+#define	FRF_AB_XX_CH3_TX_PRBS_SEL_WIDTH 2
+#define	FRF_AB_XX_CH3_TX_PRBS_INV_LBN 13
+#define	FRF_AB_XX_CH3_TX_PRBS_INV_WIDTH 1
+#define	FRF_AB_XX_CH3_TX_PRBS_CHKEN_LBN 12
+#define	FRF_AB_XX_CH3_TX_PRBS_CHKEN_WIDTH 1
+#define	FRF_AB_XX_CH2_TX_PRBS_SEL_LBN 10
+#define	FRF_AB_XX_CH2_TX_PRBS_SEL_WIDTH 2
+#define	FRF_AB_XX_CH2_TX_PRBS_INV_LBN 9
+#define	FRF_AB_XX_CH2_TX_PRBS_INV_WIDTH 1
+#define	FRF_AB_XX_CH2_TX_PRBS_CHKEN_LBN 8
+#define	FRF_AB_XX_CH2_TX_PRBS_CHKEN_WIDTH 1
+#define	FRF_AB_XX_CH1_TX_PRBS_SEL_LBN 6
+#define	FRF_AB_XX_CH1_TX_PRBS_SEL_WIDTH 2
+#define	FRF_AB_XX_CH1_TX_PRBS_INV_LBN 5
+#define	FRF_AB_XX_CH1_TX_PRBS_INV_WIDTH 1
+#define	FRF_AB_XX_CH1_TX_PRBS_CHKEN_LBN 4
+#define	FRF_AB_XX_CH1_TX_PRBS_CHKEN_WIDTH 1
+#define	FRF_AB_XX_CH0_TX_PRBS_SEL_LBN 2
+#define	FRF_AB_XX_CH0_TX_PRBS_SEL_WIDTH 2
+#define	FRF_AB_XX_CH0_TX_PRBS_INV_LBN 1
+#define	FRF_AB_XX_CH0_TX_PRBS_INV_WIDTH 1
+#define	FRF_AB_XX_CH0_TX_PRBS_CHKEN_LBN 0
+#define	FRF_AB_XX_CH0_TX_PRBS_CHKEN_WIDTH 1
+
+/* XX_PRBS_CHK_REG: documentation to be written for sum_XX_PRBS_CHK_REG */
+#define	FR_AB_XX_PRBS_CHK 0x00001340
+#define	FRF_AB_XX_REV_LB_EN_LBN 16
+#define	FRF_AB_XX_REV_LB_EN_WIDTH 1
+#define	FRF_AB_XX_CH3_DEG_DET_LBN 15
+#define	FRF_AB_XX_CH3_DEG_DET_WIDTH 1
+#define	FRF_AB_XX_CH3_LFSR_LOCK_IND_LBN 14
+#define	FRF_AB_XX_CH3_LFSR_LOCK_IND_WIDTH 1
+#define	FRF_AB_XX_CH3_PRBS_FRUN_LBN 13
+#define	FRF_AB_XX_CH3_PRBS_FRUN_WIDTH 1
+#define	FRF_AB_XX_CH3_ERR_CHK_LBN 12
+#define	FRF_AB_XX_CH3_ERR_CHK_WIDTH 1
+#define	FRF_AB_XX_CH2_DEG_DET_LBN 11
+#define	FRF_AB_XX_CH2_DEG_DET_WIDTH 1
+#define	FRF_AB_XX_CH2_LFSR_LOCK_IND_LBN 10
+#define	FRF_AB_XX_CH2_LFSR_LOCK_IND_WIDTH 1
+#define	FRF_AB_XX_CH2_PRBS_FRUN_LBN 9
+#define	FRF_AB_XX_CH2_PRBS_FRUN_WIDTH 1
+#define	FRF_AB_XX_CH2_ERR_CHK_LBN 8
+#define	FRF_AB_XX_CH2_ERR_CHK_WIDTH 1
+#define	FRF_AB_XX_CH1_DEG_DET_LBN 7
+#define	FRF_AB_XX_CH1_DEG_DET_WIDTH 1
+#define	FRF_AB_XX_CH1_LFSR_LOCK_IND_LBN 6
+#define	FRF_AB_XX_CH1_LFSR_LOCK_IND_WIDTH 1
+#define	FRF_AB_XX_CH1_PRBS_FRUN_LBN 5
+#define	FRF_AB_XX_CH1_PRBS_FRUN_WIDTH 1
+#define	FRF_AB_XX_CH1_ERR_CHK_LBN 4
+#define	FRF_AB_XX_CH1_ERR_CHK_WIDTH 1
+#define	FRF_AB_XX_CH0_DEG_DET_LBN 3
+#define	FRF_AB_XX_CH0_DEG_DET_WIDTH 1
+#define	FRF_AB_XX_CH0_LFSR_LOCK_IND_LBN 2
+#define	FRF_AB_XX_CH0_LFSR_LOCK_IND_WIDTH 1
+#define	FRF_AB_XX_CH0_PRBS_FRUN_LBN 1
+#define	FRF_AB_XX_CH0_PRBS_FRUN_WIDTH 1
+#define	FRF_AB_XX_CH0_ERR_CHK_LBN 0
+#define	FRF_AB_XX_CH0_ERR_CHK_WIDTH 1
+
+/* XX_PRBS_ERR_REG: documentation to be written for sum_XX_PRBS_ERR_REG */
+#define	FR_AB_XX_PRBS_ERR 0x00001350
+#define	FRF_AB_XX_CH3_PRBS_ERR_CNT_LBN 24
+#define	FRF_AB_XX_CH3_PRBS_ERR_CNT_WIDTH 8
+#define	FRF_AB_XX_CH2_PRBS_ERR_CNT_LBN 16
+#define	FRF_AB_XX_CH2_PRBS_ERR_CNT_WIDTH 8
+#define	FRF_AB_XX_CH1_PRBS_ERR_CNT_LBN 8
+#define	FRF_AB_XX_CH1_PRBS_ERR_CNT_WIDTH 8
+#define	FRF_AB_XX_CH0_PRBS_ERR_CNT_LBN 0
+#define	FRF_AB_XX_CH0_PRBS_ERR_CNT_WIDTH 8
+
+/* XX_CORE_STAT_REG: XAUI XGXS core status register */
+#define	FR_AB_XX_CORE_STAT 0x00001360
+#define	FRF_AB_XX_FORCE_SIG3_LBN 31
+#define	FRF_AB_XX_FORCE_SIG3_WIDTH 1
+#define	FRF_AB_XX_FORCE_SIG3_VAL_LBN 30
+#define	FRF_AB_XX_FORCE_SIG3_VAL_WIDTH 1
+#define	FRF_AB_XX_FORCE_SIG2_LBN 29
+#define	FRF_AB_XX_FORCE_SIG2_WIDTH 1
+#define	FRF_AB_XX_FORCE_SIG2_VAL_LBN 28
+#define	FRF_AB_XX_FORCE_SIG2_VAL_WIDTH 1
+#define	FRF_AB_XX_FORCE_SIG1_LBN 27
+#define	FRF_AB_XX_FORCE_SIG1_WIDTH 1
+#define	FRF_AB_XX_FORCE_SIG1_VAL_LBN 26
+#define	FRF_AB_XX_FORCE_SIG1_VAL_WIDTH 1
+#define	FRF_AB_XX_FORCE_SIG0_LBN 25
+#define	FRF_AB_XX_FORCE_SIG0_WIDTH 1
+#define	FRF_AB_XX_FORCE_SIG0_VAL_LBN 24
+#define	FRF_AB_XX_FORCE_SIG0_VAL_WIDTH 1
+#define	FRF_AB_XX_XGXS_LB_EN_LBN 23
+#define	FRF_AB_XX_XGXS_LB_EN_WIDTH 1
+#define	FRF_AB_XX_XGMII_LB_EN_LBN 22
+#define	FRF_AB_XX_XGMII_LB_EN_WIDTH 1
+#define	FRF_AB_XX_MATCH_FAULT_LBN 21
+#define	FRF_AB_XX_MATCH_FAULT_WIDTH 1
+#define	FRF_AB_XX_ALIGN_DONE_LBN 20
+#define	FRF_AB_XX_ALIGN_DONE_WIDTH 1
+#define	FRF_AB_XX_SYNC_STAT3_LBN 19
+#define	FRF_AB_XX_SYNC_STAT3_WIDTH 1
+#define	FRF_AB_XX_SYNC_STAT2_LBN 18
+#define	FRF_AB_XX_SYNC_STAT2_WIDTH 1
+#define	FRF_AB_XX_SYNC_STAT1_LBN 17
+#define	FRF_AB_XX_SYNC_STAT1_WIDTH 1
+#define	FRF_AB_XX_SYNC_STAT0_LBN 16
+#define	FRF_AB_XX_SYNC_STAT0_WIDTH 1
+#define	FRF_AB_XX_COMMA_DET_CH3_LBN 15
+#define	FRF_AB_XX_COMMA_DET_CH3_WIDTH 1
+#define	FRF_AB_XX_COMMA_DET_CH2_LBN 14
+#define	FRF_AB_XX_COMMA_DET_CH2_WIDTH 1
+#define	FRF_AB_XX_COMMA_DET_CH1_LBN 13
+#define	FRF_AB_XX_COMMA_DET_CH1_WIDTH 1
+#define	FRF_AB_XX_COMMA_DET_CH0_LBN 12
+#define	FRF_AB_XX_COMMA_DET_CH0_WIDTH 1
+#define	FRF_AB_XX_CGRP_ALIGN_CH3_LBN 11
+#define	FRF_AB_XX_CGRP_ALIGN_CH3_WIDTH 1
+#define	FRF_AB_XX_CGRP_ALIGN_CH2_LBN 10
+#define	FRF_AB_XX_CGRP_ALIGN_CH2_WIDTH 1
+#define	FRF_AB_XX_CGRP_ALIGN_CH1_LBN 9
+#define	FRF_AB_XX_CGRP_ALIGN_CH1_WIDTH 1
+#define	FRF_AB_XX_CGRP_ALIGN_CH0_LBN 8
+#define	FRF_AB_XX_CGRP_ALIGN_CH0_WIDTH 1
+#define	FRF_AB_XX_CHAR_ERR_CH3_LBN 7
+#define	FRF_AB_XX_CHAR_ERR_CH3_WIDTH 1
+#define	FRF_AB_XX_CHAR_ERR_CH2_LBN 6
+#define	FRF_AB_XX_CHAR_ERR_CH2_WIDTH 1
+#define	FRF_AB_XX_CHAR_ERR_CH1_LBN 5
+#define	FRF_AB_XX_CHAR_ERR_CH1_WIDTH 1
+#define	FRF_AB_XX_CHAR_ERR_CH0_LBN 4
+#define	FRF_AB_XX_CHAR_ERR_CH0_WIDTH 1
+#define	FRF_AB_XX_DISPERR_CH3_LBN 3
+#define	FRF_AB_XX_DISPERR_CH3_WIDTH 1
+#define	FRF_AB_XX_DISPERR_CH2_LBN 2
+#define	FRF_AB_XX_DISPERR_CH2_WIDTH 1
+#define	FRF_AB_XX_DISPERR_CH1_LBN 1
+#define	FRF_AB_XX_DISPERR_CH1_WIDTH 1
+#define	FRF_AB_XX_DISPERR_CH0_LBN 0
+#define	FRF_AB_XX_DISPERR_CH0_WIDTH 1
+
+/* RX_DESC_PTR_TBL_KER: Receive descriptor pointer table */
+#define	FR_AA_RX_DESC_PTR_TBL_KER 0x00011800
+#define	FR_AA_RX_DESC_PTR_TBL_KER_STEP 16
+#define	FR_AA_RX_DESC_PTR_TBL_KER_ROWS 4
+/* RX_DESC_PTR_TBL: Receive descriptor pointer table */
+#define	FR_BZ_RX_DESC_PTR_TBL 0x00f40000
+#define	FR_BZ_RX_DESC_PTR_TBL_STEP 16
+#define	FR_BB_RX_DESC_PTR_TBL_ROWS 4096
+#define	FR_CZ_RX_DESC_PTR_TBL_ROWS 1024
+#define	FRF_CZ_RX_HDR_SPLIT_LBN 90
+#define	FRF_CZ_RX_HDR_SPLIT_WIDTH 1
+#define	FRF_AA_RX_RESET_LBN 89
+#define	FRF_AA_RX_RESET_WIDTH 1
+#define	FRF_AZ_RX_ISCSI_DDIG_EN_LBN 88
+#define	FRF_AZ_RX_ISCSI_DDIG_EN_WIDTH 1
+#define	FRF_AZ_RX_ISCSI_HDIG_EN_LBN 87
+#define	FRF_AZ_RX_ISCSI_HDIG_EN_WIDTH 1
+#define	FRF_AZ_RX_DESC_PREF_ACT_LBN 86
+#define	FRF_AZ_RX_DESC_PREF_ACT_WIDTH 1
+#define	FRF_AZ_RX_DC_HW_RPTR_LBN 80
+#define	FRF_AZ_RX_DC_HW_RPTR_WIDTH 6
+#define	FRF_AZ_RX_DESCQ_HW_RPTR_LBN 68
+#define	FRF_AZ_RX_DESCQ_HW_RPTR_WIDTH 12
+#define	FRF_AZ_RX_DESCQ_SW_WPTR_LBN 56
+#define	FRF_AZ_RX_DESCQ_SW_WPTR_WIDTH 12
+#define	FRF_AZ_RX_DESCQ_BUF_BASE_ID_LBN 36
+#define	FRF_AZ_RX_DESCQ_BUF_BASE_ID_WIDTH 20
+#define	FRF_AZ_RX_DESCQ_EVQ_ID_LBN 24
+#define	FRF_AZ_RX_DESCQ_EVQ_ID_WIDTH 12
+#define	FRF_AZ_RX_DESCQ_OWNER_ID_LBN 10
+#define	FRF_AZ_RX_DESCQ_OWNER_ID_WIDTH 14
+#define	FRF_AZ_RX_DESCQ_LABEL_LBN 5
+#define	FRF_AZ_RX_DESCQ_LABEL_WIDTH 5
+#define	FRF_AZ_RX_DESCQ_SIZE_LBN 3
+#define	FRF_AZ_RX_DESCQ_SIZE_WIDTH 2
+#define	FFE_AZ_RX_DESCQ_SIZE_4K 3
+#define	FFE_AZ_RX_DESCQ_SIZE_2K 2
+#define	FFE_AZ_RX_DESCQ_SIZE_1K 1
+#define	FFE_AZ_RX_DESCQ_SIZE_512 0
+#define	FRF_AZ_RX_DESCQ_TYPE_LBN 2
+#define	FRF_AZ_RX_DESCQ_TYPE_WIDTH 1
+#define	FRF_AZ_RX_DESCQ_JUMBO_LBN 1
+#define	FRF_AZ_RX_DESCQ_JUMBO_WIDTH 1
+#define	FRF_AZ_RX_DESCQ_EN_LBN 0
+#define	FRF_AZ_RX_DESCQ_EN_WIDTH 1
+
+/* TX_DESC_PTR_TBL_KER: Transmit descriptor pointer */
+#define	FR_AA_TX_DESC_PTR_TBL_KER 0x00011900
+#define	FR_AA_TX_DESC_PTR_TBL_KER_STEP 16
+#define	FR_AA_TX_DESC_PTR_TBL_KER_ROWS 8
+/* TX_DESC_PTR_TBL: Transmit descriptor pointer */
+#define	FR_BZ_TX_DESC_PTR_TBL 0x00f50000
+#define	FR_BZ_TX_DESC_PTR_TBL_STEP 16
+#define	FR_BB_TX_DESC_PTR_TBL_ROWS 4096
+#define	FR_CZ_TX_DESC_PTR_TBL_ROWS 1024
+#define	FRF_CZ_TX_DPT_Q_MASK_WIDTH_LBN 94
+#define	FRF_CZ_TX_DPT_Q_MASK_WIDTH_WIDTH 2
+#define	FRF_CZ_TX_DPT_ETH_FILT_EN_LBN 93
+#define	FRF_CZ_TX_DPT_ETH_FILT_EN_WIDTH 1
+#define	FRF_CZ_TX_DPT_IP_FILT_EN_LBN 92
+#define	FRF_CZ_TX_DPT_IP_FILT_EN_WIDTH 1
+#define	FRF_BZ_TX_NON_IP_DROP_DIS_LBN 91
+#define	FRF_BZ_TX_NON_IP_DROP_DIS_WIDTH 1
+#define	FRF_BZ_TX_IP_CHKSM_DIS_LBN 90
+#define	FRF_BZ_TX_IP_CHKSM_DIS_WIDTH 1
+#define	FRF_BZ_TX_TCP_CHKSM_DIS_LBN 89
+#define	FRF_BZ_TX_TCP_CHKSM_DIS_WIDTH 1
+#define	FRF_AZ_TX_DESCQ_EN_LBN 88
+#define	FRF_AZ_TX_DESCQ_EN_WIDTH 1
+#define	FRF_AZ_TX_ISCSI_DDIG_EN_LBN 87
+#define	FRF_AZ_TX_ISCSI_DDIG_EN_WIDTH 1
+#define	FRF_AZ_TX_ISCSI_HDIG_EN_LBN 86
+#define	FRF_AZ_TX_ISCSI_HDIG_EN_WIDTH 1
+#define	FRF_AZ_TX_DC_HW_RPTR_LBN 80
+#define	FRF_AZ_TX_DC_HW_RPTR_WIDTH 6
+#define	FRF_AZ_TX_DESCQ_HW_RPTR_LBN 68
+#define	FRF_AZ_TX_DESCQ_HW_RPTR_WIDTH 12
+#define	FRF_AZ_TX_DESCQ_SW_WPTR_LBN 56
+#define	FRF_AZ_TX_DESCQ_SW_WPTR_WIDTH 12
+#define	FRF_AZ_TX_DESCQ_BUF_BASE_ID_LBN 36
+#define	FRF_AZ_TX_DESCQ_BUF_BASE_ID_WIDTH 20
+#define	FRF_AZ_TX_DESCQ_EVQ_ID_LBN 24
+#define	FRF_AZ_TX_DESCQ_EVQ_ID_WIDTH 12
+#define	FRF_AZ_TX_DESCQ_OWNER_ID_LBN 10
+#define	FRF_AZ_TX_DESCQ_OWNER_ID_WIDTH 14
+#define	FRF_AZ_TX_DESCQ_LABEL_LBN 5
+#define	FRF_AZ_TX_DESCQ_LABEL_WIDTH 5
+#define	FRF_AZ_TX_DESCQ_SIZE_LBN 3
+#define	FRF_AZ_TX_DESCQ_SIZE_WIDTH 2
+#define	FFE_AZ_TX_DESCQ_SIZE_4K 3
+#define	FFE_AZ_TX_DESCQ_SIZE_2K 2
+#define	FFE_AZ_TX_DESCQ_SIZE_1K 1
+#define	FFE_AZ_TX_DESCQ_SIZE_512 0
+#define	FRF_AZ_TX_DESCQ_TYPE_LBN 1
+#define	FRF_AZ_TX_DESCQ_TYPE_WIDTH 2
+#define	FRF_AZ_TX_DESCQ_FLUSH_LBN 0
+#define	FRF_AZ_TX_DESCQ_FLUSH_WIDTH 1
+
+/* EVQ_PTR_TBL_KER: Event queue pointer table */
+#define	FR_AA_EVQ_PTR_TBL_KER 0x00011a00
+#define	FR_AA_EVQ_PTR_TBL_KER_STEP 16
+#define	FR_AA_EVQ_PTR_TBL_KER_ROWS 4
+/* EVQ_PTR_TBL: Event queue pointer table */
+#define	FR_BZ_EVQ_PTR_TBL 0x00f60000
+#define	FR_BZ_EVQ_PTR_TBL_STEP 16
+#define	FR_CZ_EVQ_PTR_TBL_ROWS 1024
+#define	FR_BB_EVQ_PTR_TBL_ROWS 4096
+#define	FRF_BZ_EVQ_RPTR_IGN_LBN 40
+#define	FRF_BZ_EVQ_RPTR_IGN_WIDTH 1
+#define	FRF_AB_EVQ_WKUP_OR_INT_EN_LBN 39
+#define	FRF_AB_EVQ_WKUP_OR_INT_EN_WIDTH 1
+#define	FRF_CZ_EVQ_DOS_PROTECT_EN_LBN 39
+#define	FRF_CZ_EVQ_DOS_PROTECT_EN_WIDTH 1
+#define	FRF_AZ_EVQ_NXT_WPTR_LBN 24
+#define	FRF_AZ_EVQ_NXT_WPTR_WIDTH 15
+#define	FRF_AZ_EVQ_EN_LBN 23
+#define	FRF_AZ_EVQ_EN_WIDTH 1
+#define	FRF_AZ_EVQ_SIZE_LBN 20
+#define	FRF_AZ_EVQ_SIZE_WIDTH 3
+#define	FFE_AZ_EVQ_SIZE_32K 6
+#define	FFE_AZ_EVQ_SIZE_16K 5
+#define	FFE_AZ_EVQ_SIZE_8K 4
+#define	FFE_AZ_EVQ_SIZE_4K 3
+#define	FFE_AZ_EVQ_SIZE_2K 2
+#define	FFE_AZ_EVQ_SIZE_1K 1
+#define	FFE_AZ_EVQ_SIZE_512 0
+#define	FRF_AZ_EVQ_BUF_BASE_ID_LBN 0
+#define	FRF_AZ_EVQ_BUF_BASE_ID_WIDTH 20
+
+/* BUF_HALF_TBL_KER: Buffer table in half buffer table mode direct access by driver */
+#define	FR_AA_BUF_HALF_TBL_KER 0x00018000
+#define	FR_AA_BUF_HALF_TBL_KER_STEP 8
+#define	FR_AA_BUF_HALF_TBL_KER_ROWS 4096
+/* BUF_HALF_TBL: Buffer table in half buffer table mode direct access by driver */
+#define	FR_BZ_BUF_HALF_TBL 0x00800000
+#define	FR_BZ_BUF_HALF_TBL_STEP 8
+#define	FR_CZ_BUF_HALF_TBL_ROWS 147456
+#define	FR_BB_BUF_HALF_TBL_ROWS 524288
+#define	FRF_AZ_BUF_ADR_HBUF_ODD_LBN 44
+#define	FRF_AZ_BUF_ADR_HBUF_ODD_WIDTH 20
+#define	FRF_AZ_BUF_OWNER_ID_HBUF_ODD_LBN 32
+#define	FRF_AZ_BUF_OWNER_ID_HBUF_ODD_WIDTH 12
+#define	FRF_AZ_BUF_ADR_HBUF_EVEN_LBN 12
+#define	FRF_AZ_BUF_ADR_HBUF_EVEN_WIDTH 20
+#define	FRF_AZ_BUF_OWNER_ID_HBUF_EVEN_LBN 0
+#define	FRF_AZ_BUF_OWNER_ID_HBUF_EVEN_WIDTH 12
+
+/* BUF_FULL_TBL_KER: Buffer table in full buffer table mode direct access by driver */
+#define	FR_AA_BUF_FULL_TBL_KER 0x00018000
+#define	FR_AA_BUF_FULL_TBL_KER_STEP 8
+#define	FR_AA_BUF_FULL_TBL_KER_ROWS 4096
+/* BUF_FULL_TBL: Buffer table in full buffer table mode direct access by driver */
+#define	FR_BZ_BUF_FULL_TBL 0x00800000
+#define	FR_BZ_BUF_FULL_TBL_STEP 8
+#define	FR_CZ_BUF_FULL_TBL_ROWS 147456
+#define	FR_BB_BUF_FULL_TBL_ROWS 917504
+#define	FRF_AZ_BUF_FULL_UNUSED_LBN 51
+#define	FRF_AZ_BUF_FULL_UNUSED_WIDTH 13
+#define	FRF_AZ_IP_DAT_BUF_SIZE_LBN 50
+#define	FRF_AZ_IP_DAT_BUF_SIZE_WIDTH 1
+#define	FRF_AZ_BUF_ADR_REGION_LBN 48
+#define	FRF_AZ_BUF_ADR_REGION_WIDTH 2
+#define	FFE_AZ_BUF_ADR_REGN3 3
+#define	FFE_AZ_BUF_ADR_REGN2 2
+#define	FFE_AZ_BUF_ADR_REGN1 1
+#define	FFE_AZ_BUF_ADR_REGN0 0
+#define	FRF_AZ_BUF_ADR_FBUF_LBN 14
+#define	FRF_AZ_BUF_ADR_FBUF_WIDTH 34
+#define	FRF_AZ_BUF_OWNER_ID_FBUF_LBN 0
+#define	FRF_AZ_BUF_OWNER_ID_FBUF_WIDTH 14
+
+/* RX_FILTER_TBL0: TCP/IPv4 Receive filter table */
+#define	FR_BZ_RX_FILTER_TBL0 0x00f00000
+#define	FR_BZ_RX_FILTER_TBL0_STEP 32
+#define	FR_BZ_RX_FILTER_TBL0_ROWS 8192
+/* RX_FILTER_TBL1: TCP/IPv4 Receive filter table */
+#define	FR_BB_RX_FILTER_TBL1 0x00f00010
+#define	FR_BB_RX_FILTER_TBL1_STEP 32
+#define	FR_BB_RX_FILTER_TBL1_ROWS 8192
+#define	FRF_BZ_RSS_EN_LBN 110
+#define	FRF_BZ_RSS_EN_WIDTH 1
+#define	FRF_BZ_SCATTER_EN_LBN 109
+#define	FRF_BZ_SCATTER_EN_WIDTH 1
+#define	FRF_BZ_TCP_UDP_LBN 108
+#define	FRF_BZ_TCP_UDP_WIDTH 1
+#define	FRF_BZ_RXQ_ID_LBN 96
+#define	FRF_BZ_RXQ_ID_WIDTH 12
+#define	FRF_BZ_DEST_IP_LBN 64
+#define	FRF_BZ_DEST_IP_WIDTH 32
+#define	FRF_BZ_DEST_PORT_TCP_LBN 48
+#define	FRF_BZ_DEST_PORT_TCP_WIDTH 16
+#define	FRF_BZ_SRC_IP_LBN 16
+#define	FRF_BZ_SRC_IP_WIDTH 32
+#define	FRF_BZ_SRC_TCP_DEST_UDP_LBN 0
+#define	FRF_BZ_SRC_TCP_DEST_UDP_WIDTH 16
+
+/* RX_MAC_FILTER_TBL0: Receive Ethernet filter table */
+#define	FR_CZ_RX_MAC_FILTER_TBL0 0x00f00010
+#define	FR_CZ_RX_MAC_FILTER_TBL0_STEP 32
+#define	FR_CZ_RX_MAC_FILTER_TBL0_ROWS 512
+#define	FRF_CZ_RMFT_RSS_EN_LBN 75
+#define	FRF_CZ_RMFT_RSS_EN_WIDTH 1
+#define	FRF_CZ_RMFT_SCATTER_EN_LBN 74
+#define	FRF_CZ_RMFT_SCATTER_EN_WIDTH 1
+#define	FRF_CZ_RMFT_IP_OVERRIDE_LBN 73
+#define	FRF_CZ_RMFT_IP_OVERRIDE_WIDTH 1
+#define	FRF_CZ_RMFT_RXQ_ID_LBN 61
+#define	FRF_CZ_RMFT_RXQ_ID_WIDTH 12
+#define	FRF_CZ_RMFT_WILDCARD_MATCH_LBN 60
+#define	FRF_CZ_RMFT_WILDCARD_MATCH_WIDTH 1
+#define	FRF_CZ_RMFT_DEST_MAC_LBN 12
+#define	FRF_CZ_RMFT_DEST_MAC_WIDTH 48
+#define	FRF_CZ_RMFT_VLAN_ID_LBN 0
+#define	FRF_CZ_RMFT_VLAN_ID_WIDTH 12
+
+/* TIMER_TBL: Timer table */
+#define	FR_BZ_TIMER_TBL 0x00f70000
+#define	FR_BZ_TIMER_TBL_STEP 16
+#define	FR_CZ_TIMER_TBL_ROWS 1024
+#define	FR_BB_TIMER_TBL_ROWS 4096
+#define	FRF_CZ_TIMER_Q_EN_LBN 33
+#define	FRF_CZ_TIMER_Q_EN_WIDTH 1
+#define	FRF_CZ_INT_ARMD_LBN 32
+#define	FRF_CZ_INT_ARMD_WIDTH 1
+#define	FRF_CZ_INT_PEND_LBN 31
+#define	FRF_CZ_INT_PEND_WIDTH 1
+#define	FRF_CZ_HOST_NOTIFY_MODE_LBN 30
+#define	FRF_CZ_HOST_NOTIFY_MODE_WIDTH 1
+#define	FRF_CZ_RELOAD_TIMER_VAL_LBN 16
+#define	FRF_CZ_RELOAD_TIMER_VAL_WIDTH 14
+#define	FRF_CZ_TIMER_MODE_LBN 14
+#define	FRF_CZ_TIMER_MODE_WIDTH 2
+#define	FFE_CZ_TIMER_MODE_INT_HLDOFF 3
+#define	FFE_CZ_TIMER_MODE_TRIG_START 2
+#define	FFE_CZ_TIMER_MODE_IMMED_START 1
+#define	FFE_CZ_TIMER_MODE_DIS 0
+#define	FRF_BB_TIMER_MODE_LBN 12
+#define	FRF_BB_TIMER_MODE_WIDTH 2
+#define	FFE_BB_TIMER_MODE_INT_HLDOFF 2
+#define	FFE_BB_TIMER_MODE_TRIG_START 2
+#define	FFE_BB_TIMER_MODE_IMMED_START 1
+#define	FFE_BB_TIMER_MODE_DIS 0
+#define	FRF_CZ_TIMER_VAL_LBN 0
+#define	FRF_CZ_TIMER_VAL_WIDTH 14
+#define	FRF_BB_TIMER_VAL_LBN 0
+#define	FRF_BB_TIMER_VAL_WIDTH 12
+
+/* TX_PACE_TBL: Transmit pacing table */
+#define	FR_BZ_TX_PACE_TBL 0x00f80000
+#define	FR_BZ_TX_PACE_TBL_STEP 16
+#define	FR_CZ_TX_PACE_TBL_ROWS 1024
+#define	FR_BB_TX_PACE_TBL_ROWS 4096
+#define	FRF_BZ_TX_PACE_LBN 0
+#define	FRF_BZ_TX_PACE_WIDTH 5
+
+/* RX_INDIRECTION_TBL: RX Indirection Table */
+#define	FR_BZ_RX_INDIRECTION_TBL 0x00fb0000
+#define	FR_BZ_RX_INDIRECTION_TBL_STEP 16
+#define	FR_BZ_RX_INDIRECTION_TBL_ROWS 128
+#define	FRF_BZ_IT_QUEUE_LBN 0
+#define	FRF_BZ_IT_QUEUE_WIDTH 6
+
+/* TX_FILTER_TBL0: TCP/IPv4 Transmit filter table */
+#define	FR_CZ_TX_FILTER_TBL0 0x00fc0000
+#define	FR_CZ_TX_FILTER_TBL0_STEP 16
+#define	FR_CZ_TX_FILTER_TBL0_ROWS 8192
+#define	FRF_CZ_TIFT_TCP_UDP_LBN 108
+#define	FRF_CZ_TIFT_TCP_UDP_WIDTH 1
+#define	FRF_CZ_TIFT_TXQ_ID_LBN 96
+#define	FRF_CZ_TIFT_TXQ_ID_WIDTH 12
+#define	FRF_CZ_TIFT_DEST_IP_LBN 64
+#define	FRF_CZ_TIFT_DEST_IP_WIDTH 32
+#define	FRF_CZ_TIFT_DEST_PORT_TCP_LBN 48
+#define	FRF_CZ_TIFT_DEST_PORT_TCP_WIDTH 16
+#define	FRF_CZ_TIFT_SRC_IP_LBN 16
+#define	FRF_CZ_TIFT_SRC_IP_WIDTH 32
+#define	FRF_CZ_TIFT_SRC_TCP_DEST_UDP_LBN 0
+#define	FRF_CZ_TIFT_SRC_TCP_DEST_UDP_WIDTH 16
+
+/* TX_MAC_FILTER_TBL0: Transmit Ethernet filter table */
+#define	FR_CZ_TX_MAC_FILTER_TBL0 0x00fe0000
+#define	FR_CZ_TX_MAC_FILTER_TBL0_STEP 16
+#define	FR_CZ_TX_MAC_FILTER_TBL0_ROWS 512
+#define	FRF_CZ_TMFT_TXQ_ID_LBN 61
+#define	FRF_CZ_TMFT_TXQ_ID_WIDTH 12
+#define	FRF_CZ_TMFT_WILDCARD_MATCH_LBN 60
+#define	FRF_CZ_TMFT_WILDCARD_MATCH_WIDTH 1
+#define	FRF_CZ_TMFT_SRC_MAC_LBN 12
+#define	FRF_CZ_TMFT_SRC_MAC_WIDTH 48
+#define	FRF_CZ_TMFT_VLAN_ID_LBN 0
+#define	FRF_CZ_TMFT_VLAN_ID_WIDTH 12
+
+/* MC_TREG_SMEM: MC Shared Memory */
+#define	FR_CZ_MC_TREG_SMEM 0x00ff0000
+#define	FR_CZ_MC_TREG_SMEM_STEP 4
+#define	FR_CZ_MC_TREG_SMEM_ROWS 512
+#define	FRF_CZ_MC_TREG_SMEM_ROW_LBN 0
+#define	FRF_CZ_MC_TREG_SMEM_ROW_WIDTH 32
+
+/* MSIX_VECTOR_TABLE: MSIX Vector Table */
+#define	FR_BB_MSIX_VECTOR_TABLE 0x00ff0000
+#define	FR_BZ_MSIX_VECTOR_TABLE_STEP 16
+#define	FR_BB_MSIX_VECTOR_TABLE_ROWS 64
+/* MSIX_VECTOR_TABLE: MSIX Vector Table */
+#define	FR_CZ_MSIX_VECTOR_TABLE 0x00000000
+/* FR_BZ_MSIX_VECTOR_TABLE_STEP 16 */
+#define	FR_CZ_MSIX_VECTOR_TABLE_ROWS 1024
+#define	FRF_BZ_MSIX_VECTOR_RESERVED_LBN 97
+#define	FRF_BZ_MSIX_VECTOR_RESERVED_WIDTH 31
+#define	FRF_BZ_MSIX_VECTOR_MASK_LBN 96
+#define	FRF_BZ_MSIX_VECTOR_MASK_WIDTH 1
+#define	FRF_BZ_MSIX_MESSAGE_DATA_LBN 64
+#define	FRF_BZ_MSIX_MESSAGE_DATA_WIDTH 32
+#define	FRF_BZ_MSIX_MESSAGE_ADDRESS_HI_LBN 32
+#define	FRF_BZ_MSIX_MESSAGE_ADDRESS_HI_WIDTH 32
+#define	FRF_BZ_MSIX_MESSAGE_ADDRESS_LO_LBN 0
+#define	FRF_BZ_MSIX_MESSAGE_ADDRESS_LO_WIDTH 32
+
+/* MSIX_PBA_TABLE: MSIX Pending Bit Array */
+#define	FR_BB_MSIX_PBA_TABLE 0x00ff2000
+#define	FR_BZ_MSIX_PBA_TABLE_STEP 4
+#define	FR_BB_MSIX_PBA_TABLE_ROWS 2
+/* MSIX_PBA_TABLE: MSIX Pending Bit Array */
+#define	FR_CZ_MSIX_PBA_TABLE 0x00008000
+/* FR_BZ_MSIX_PBA_TABLE_STEP 4 */
+#define	FR_CZ_MSIX_PBA_TABLE_ROWS 32
+#define	FRF_BZ_MSIX_PBA_PEND_DWORD_LBN 0
+#define	FRF_BZ_MSIX_PBA_PEND_DWORD_WIDTH 32
+
+/* SRM_DBG_REG: SRAM debug access */
+#define	FR_BZ_SRM_DBG 0x03000000
+#define	FR_BZ_SRM_DBG_STEP 8
+#define	FR_CZ_SRM_DBG_ROWS 262144
+#define	FR_BB_SRM_DBG_ROWS 2097152
+#define	FRF_BZ_SRM_DBG_LBN 0
+#define	FRF_BZ_SRM_DBG_WIDTH 64
+
+/* TB_MSIX_PBA_TABLE: MSIX Pending Bit Array */
+#define	FR_CZ_TB_MSIX_PBA_TABLE 0x00008000
+#define	FR_CZ_TB_MSIX_PBA_TABLE_STEP 4
+#define	FR_CZ_TB_MSIX_PBA_TABLE_ROWS 1024
+#define	FRF_CZ_TB_MSIX_PBA_PEND_DWORD_LBN 0
+#define	FRF_CZ_TB_MSIX_PBA_PEND_DWORD_WIDTH 32
+
+/* DRIVER_EV */
+#define	FSF_AZ_DRIVER_EV_SUBCODE_LBN 56
+#define	FSF_AZ_DRIVER_EV_SUBCODE_WIDTH 4
+#define	FSE_BZ_TX_DSC_ERROR_EV 15
+#define	FSE_BZ_RX_DSC_ERROR_EV 14
+#define	FSE_AA_RX_RECOVER_EV 11
+#define	FSE_AZ_TIMER_EV 10
+#define	FSE_AZ_TX_PKT_NON_TCP_UDP 9
+#define	FSE_AZ_WAKE_UP_EV 6
+#define	FSE_AZ_SRM_UPD_DONE_EV 5
+#define	FSE_AB_EVQ_NOT_EN_EV 3
+#define	FSE_AZ_EVQ_INIT_DONE_EV 2
+#define	FSE_AZ_RX_DESCQ_FLS_DONE_EV 1
+#define	FSE_AZ_TX_DESCQ_FLS_DONE_EV 0
+#define	FSF_AZ_DRIVER_EV_SUBDATA_LBN 0
+#define	FSF_AZ_DRIVER_EV_SUBDATA_WIDTH 14
+
+/* EVENT_ENTRY */
+#define	FSF_AZ_EV_CODE_LBN 60
+#define	FSF_AZ_EV_CODE_WIDTH 4
+#define	FSE_CZ_EV_CODE_MCDI_EV 12
+#define	FSE_CZ_EV_CODE_USER_EV 8
+#define	FSE_AZ_EV_CODE_DRV_GEN_EV 7
+#define	FSE_AZ_EV_CODE_GLOBAL_EV 6
+#define	FSE_AZ_EV_CODE_DRIVER_EV 5
+#define	FSE_AZ_EV_CODE_TX_EV 2
+#define	FSE_AZ_EV_CODE_RX_EV 0
+#define	FSF_AZ_EV_DATA_LBN 0
+#define	FSF_AZ_EV_DATA_WIDTH 60
+
+/* GLOBAL_EV */
+#define	FSF_BB_GLB_EV_RX_RECOVERY_LBN 12
+#define	FSF_BB_GLB_EV_RX_RECOVERY_WIDTH 1
+#define	FSF_AA_GLB_EV_RX_RECOVERY_LBN 11
+#define	FSF_AA_GLB_EV_RX_RECOVERY_WIDTH 1
+#define	FSF_BB_GLB_EV_XG_MGT_INTR_LBN 11
+#define	FSF_BB_GLB_EV_XG_MGT_INTR_WIDTH 1
+#define	FSF_AB_GLB_EV_XFP_PHY0_INTR_LBN 10
+#define	FSF_AB_GLB_EV_XFP_PHY0_INTR_WIDTH 1
+#define	FSF_AB_GLB_EV_XG_PHY0_INTR_LBN 9
+#define	FSF_AB_GLB_EV_XG_PHY0_INTR_WIDTH 1
+#define	FSF_AB_GLB_EV_G_PHY0_INTR_LBN 7
+#define	FSF_AB_GLB_EV_G_PHY0_INTR_WIDTH 1
+
+/* LEGACY_INT_VEC */
+#define	FSF_AZ_NET_IVEC_FATAL_INT_LBN 64
+#define	FSF_AZ_NET_IVEC_FATAL_INT_WIDTH 1
+#define	FSF_AZ_NET_IVEC_INT_Q_LBN 40
+#define	FSF_AZ_NET_IVEC_INT_Q_WIDTH 4
+#define	FSF_AZ_NET_IVEC_INT_FLAG_LBN 32
+#define	FSF_AZ_NET_IVEC_INT_FLAG_WIDTH 1
+#define	FSF_AZ_NET_IVEC_EVQ_FIFO_HF_LBN 1
+#define	FSF_AZ_NET_IVEC_EVQ_FIFO_HF_WIDTH 1
+#define	FSF_AZ_NET_IVEC_EVQ_FIFO_AF_LBN 0
+#define	FSF_AZ_NET_IVEC_EVQ_FIFO_AF_WIDTH 1
+
+/* MC_XGMAC_FLTR_RULE_DEF */
+#define	FSF_CZ_MC_XFRC_MODE_LBN 416
+#define	FSF_CZ_MC_XFRC_MODE_WIDTH 1
+#define	FSE_CZ_MC_XFRC_MODE_LAYERED 1
+#define	FSE_CZ_MC_XFRC_MODE_SIMPLE 0
+#define	FSF_CZ_MC_XFRC_HASH_LBN 384
+#define	FSF_CZ_MC_XFRC_HASH_WIDTH 32
+#define	FSF_CZ_MC_XFRC_LAYER4_BYTE_MASK_LBN 256
+#define	FSF_CZ_MC_XFRC_LAYER4_BYTE_MASK_WIDTH 128
+#define	FSF_CZ_MC_XFRC_LAYER3_BYTE_MASK_LBN 128
+#define	FSF_CZ_MC_XFRC_LAYER3_BYTE_MASK_WIDTH 128
+#define	FSF_CZ_MC_XFRC_LAYER2_OR_SIMPLE_BYTE_MASK_LBN 0
+#define	FSF_CZ_MC_XFRC_LAYER2_OR_SIMPLE_BYTE_MASK_WIDTH 128
+
+/* RX_EV */
+#define	FSF_CZ_RX_EV_PKT_NOT_PARSED_LBN 58
+#define	FSF_CZ_RX_EV_PKT_NOT_PARSED_WIDTH 1
+#define	FSF_CZ_RX_EV_IPV6_PKT_LBN 57
+#define	FSF_CZ_RX_EV_IPV6_PKT_WIDTH 1
+#define	FSF_AZ_RX_EV_PKT_OK_LBN 56
+#define	FSF_AZ_RX_EV_PKT_OK_WIDTH 1
+#define	FSF_AZ_RX_EV_PAUSE_FRM_ERR_LBN 55
+#define	FSF_AZ_RX_EV_PAUSE_FRM_ERR_WIDTH 1
+#define	FSF_AZ_RX_EV_BUF_OWNER_ID_ERR_LBN 54
+#define	FSF_AZ_RX_EV_BUF_OWNER_ID_ERR_WIDTH 1
+#define	FSF_AZ_RX_EV_IP_FRAG_ERR_LBN 53
+#define	FSF_AZ_RX_EV_IP_FRAG_ERR_WIDTH 1
+#define	FSF_AZ_RX_EV_IP_HDR_CHKSUM_ERR_LBN 52
+#define	FSF_AZ_RX_EV_IP_HDR_CHKSUM_ERR_WIDTH 1
+#define	FSF_AZ_RX_EV_TCP_UDP_CHKSUM_ERR_LBN 51
+#define	FSF_AZ_RX_EV_TCP_UDP_CHKSUM_ERR_WIDTH 1
+#define	FSF_AZ_RX_EV_ETH_CRC_ERR_LBN 50
+#define	FSF_AZ_RX_EV_ETH_CRC_ERR_WIDTH 1
+#define	FSF_AZ_RX_EV_FRM_TRUNC_LBN 49
+#define	FSF_AZ_RX_EV_FRM_TRUNC_WIDTH 1
+#define	FSF_AA_RX_EV_DRIB_NIB_LBN 49
+#define	FSF_AA_RX_EV_DRIB_NIB_WIDTH 1
+#define	FSF_AZ_RX_EV_TOBE_DISC_LBN 47
+#define	FSF_AZ_RX_EV_TOBE_DISC_WIDTH 1
+#define	FSF_AZ_RX_EV_PKT_TYPE_LBN 44
+#define	FSF_AZ_RX_EV_PKT_TYPE_WIDTH 3
+#define	FSE_AZ_RX_EV_PKT_TYPE_VLAN_JUMBO 5
+#define	FSE_AZ_RX_EV_PKT_TYPE_VLAN_LLC 4
+#define	FSE_AZ_RX_EV_PKT_TYPE_VLAN 3
+#define	FSE_AZ_RX_EV_PKT_TYPE_JUMBO 2
+#define	FSE_AZ_RX_EV_PKT_TYPE_LLC 1
+#define	FSE_AZ_RX_EV_PKT_TYPE_ETH 0
+#define	FSF_AZ_RX_EV_HDR_TYPE_LBN 42
+#define	FSF_AZ_RX_EV_HDR_TYPE_WIDTH 2
+#define	FSE_AZ_RX_EV_HDR_TYPE_OTHER 3
+#define	FSE_AB_RX_EV_HDR_TYPE_IPV4_OTHER 2
+#define	FSE_CZ_RX_EV_HDR_TYPE_IPV4V6_OTHER 2
+#define	FSE_AB_RX_EV_HDR_TYPE_IPV4_UDP 1
+#define	FSE_CZ_RX_EV_HDR_TYPE_IPV4V6_UDP 1
+#define	FSE_AB_RX_EV_HDR_TYPE_IPV4_TCP 0
+#define	FSE_CZ_RX_EV_HDR_TYPE_IPV4V6_TCP 0
+#define	FSF_AZ_RX_EV_DESC_Q_EMPTY_LBN 41
+#define	FSF_AZ_RX_EV_DESC_Q_EMPTY_WIDTH 1
+#define	FSF_AZ_RX_EV_MCAST_HASH_MATCH_LBN 40
+#define	FSF_AZ_RX_EV_MCAST_HASH_MATCH_WIDTH 1
+#define	FSF_AZ_RX_EV_MCAST_PKT_LBN 39
+#define	FSF_AZ_RX_EV_MCAST_PKT_WIDTH 1
+#define	FSF_AA_RX_EV_RECOVERY_FLAG_LBN 37
+#define	FSF_AA_RX_EV_RECOVERY_FLAG_WIDTH 1
+#define	FSF_AZ_RX_EV_Q_LABEL_LBN 32
+#define	FSF_AZ_RX_EV_Q_LABEL_WIDTH 5
+#define	FSF_AZ_RX_EV_JUMBO_CONT_LBN 31
+#define	FSF_AZ_RX_EV_JUMBO_CONT_WIDTH 1
+#define	FSF_AZ_RX_EV_PORT_LBN 30
+#define	FSF_AZ_RX_EV_PORT_WIDTH 1
+#define	FSF_AZ_RX_EV_BYTE_CNT_LBN 16
+#define	FSF_AZ_RX_EV_BYTE_CNT_WIDTH 14
+#define	FSF_AZ_RX_EV_SOP_LBN 15
+#define	FSF_AZ_RX_EV_SOP_WIDTH 1
+#define	FSF_AZ_RX_EV_ISCSI_PKT_OK_LBN 14
+#define	FSF_AZ_RX_EV_ISCSI_PKT_OK_WIDTH 1
+#define	FSF_AZ_RX_EV_ISCSI_DDIG_ERR_LBN 13
+#define	FSF_AZ_RX_EV_ISCSI_DDIG_ERR_WIDTH 1
+#define	FSF_AZ_RX_EV_ISCSI_HDIG_ERR_LBN 12
+#define	FSF_AZ_RX_EV_ISCSI_HDIG_ERR_WIDTH 1
+#define	FSF_AZ_RX_EV_DESC_PTR_LBN 0
+#define	FSF_AZ_RX_EV_DESC_PTR_WIDTH 12
+
+/* RX_KER_DESC */
+#define	FSF_AZ_RX_KER_BUF_SIZE_LBN 48
+#define	FSF_AZ_RX_KER_BUF_SIZE_WIDTH 14
+#define	FSF_AZ_RX_KER_BUF_REGION_LBN 46
+#define	FSF_AZ_RX_KER_BUF_REGION_WIDTH 2
+#define	FSF_AZ_RX_KER_BUF_ADDR_LBN 0
+#define	FSF_AZ_RX_KER_BUF_ADDR_WIDTH 46
+
+/* RX_USER_DESC */
+#define	FSF_AZ_RX_USER_2BYTE_OFFSET_LBN 20
+#define	FSF_AZ_RX_USER_2BYTE_OFFSET_WIDTH 12
+#define	FSF_AZ_RX_USER_BUF_ID_LBN 0
+#define	FSF_AZ_RX_USER_BUF_ID_WIDTH 20
+
+/* TX_EV */
+#define	FSF_AZ_TX_EV_PKT_ERR_LBN 38
+#define	FSF_AZ_TX_EV_PKT_ERR_WIDTH 1
+#define	FSF_AZ_TX_EV_PKT_TOO_BIG_LBN 37
+#define	FSF_AZ_TX_EV_PKT_TOO_BIG_WIDTH 1
+#define	FSF_AZ_TX_EV_Q_LABEL_LBN 32
+#define	FSF_AZ_TX_EV_Q_LABEL_WIDTH 5
+#define	FSF_AZ_TX_EV_PORT_LBN 16
+#define	FSF_AZ_TX_EV_PORT_WIDTH 1
+#define	FSF_AZ_TX_EV_WQ_FF_FULL_LBN 15
+#define	FSF_AZ_TX_EV_WQ_FF_FULL_WIDTH 1
+#define	FSF_AZ_TX_EV_BUF_OWNER_ID_ERR_LBN 14
+#define	FSF_AZ_TX_EV_BUF_OWNER_ID_ERR_WIDTH 1
+#define	FSF_AZ_TX_EV_COMP_LBN 12
+#define	FSF_AZ_TX_EV_COMP_WIDTH 1
+#define	FSF_AZ_TX_EV_DESC_PTR_LBN 0
+#define	FSF_AZ_TX_EV_DESC_PTR_WIDTH 12
+
+/* TX_KER_DESC */
+#define	FSF_AZ_TX_KER_CONT_LBN 62
+#define	FSF_AZ_TX_KER_CONT_WIDTH 1
+#define	FSF_AZ_TX_KER_BYTE_COUNT_LBN 48
+#define	FSF_AZ_TX_KER_BYTE_COUNT_WIDTH 14
+#define	FSF_AZ_TX_KER_BUF_REGION_LBN 46
+#define	FSF_AZ_TX_KER_BUF_REGION_WIDTH 2
+#define	FSF_AZ_TX_KER_BUF_ADDR_LBN 0
+#define	FSF_AZ_TX_KER_BUF_ADDR_WIDTH 46
+
+/* TX_USER_DESC */
+#define	FSF_AZ_TX_USER_SW_EV_EN_LBN 48
+#define	FSF_AZ_TX_USER_SW_EV_EN_WIDTH 1
+#define	FSF_AZ_TX_USER_CONT_LBN 46
+#define	FSF_AZ_TX_USER_CONT_WIDTH 1
+#define	FSF_AZ_TX_USER_BYTE_CNT_LBN 33
+#define	FSF_AZ_TX_USER_BYTE_CNT_WIDTH 13
+#define	FSF_AZ_TX_USER_BUF_ID_LBN 13
+#define	FSF_AZ_TX_USER_BUF_ID_WIDTH 20
+#define	FSF_AZ_TX_USER_BYTE_OFS_LBN 0
+#define	FSF_AZ_TX_USER_BYTE_OFS_WIDTH 13
+
+/* USER_EV */
+#define	FSF_CZ_USER_QID_LBN 32
+#define	FSF_CZ_USER_QID_WIDTH 10
+#define	FSF_CZ_USER_EV_REG_VALUE_LBN 0
+#define	FSF_CZ_USER_EV_REG_VALUE_WIDTH 32
+
+/**************************************************************************
+ *
+ * Falcon B0 PCIe core indirect registers
+ *
+ **************************************************************************
+ */
+
+#define FPCR_BB_PCIE_DEVICE_CTRL_STAT 0x68
+
+#define FPCR_BB_PCIE_LINK_CTRL_STAT 0x70
+
+#define FPCR_BB_ACK_RPL_TIMER 0x700
+#define FPCRF_BB_ACK_TL_LBN 0
+#define FPCRF_BB_ACK_TL_WIDTH 16
+#define FPCRF_BB_RPL_TL_LBN 16
+#define FPCRF_BB_RPL_TL_WIDTH 16
+
+#define FPCR_BB_ACK_FREQ 0x70C
+#define FPCRF_BB_ACK_FREQ_LBN 0
+#define FPCRF_BB_ACK_FREQ_WIDTH 7
+
+/**************************************************************************
+ *
+ * Pseudo-registers and fields
+ *
+ **************************************************************************
+ */
+
+/* Interrupt acknowledge work-around register (A0/A1 only) */
+#define FR_AA_WORK_AROUND_BROKEN_PCI_READS 0x0070
+
+/* EE_SPI_HCMD_REG: SPI host command register */
+/* Values for the EE_SPI_HCMD_SF_SEL register field */
+#define FFE_AB_SPI_DEVICE_EEPROM 0
+#define FFE_AB_SPI_DEVICE_FLASH 1
+
+/* NIC_STAT_REG: NIC status register */
+#define FRF_AB_STRAP_10G_LBN 2
+#define FRF_AB_STRAP_10G_WIDTH 1
+#define FRF_AA_STRAP_PCIE_LBN 0
+#define FRF_AA_STRAP_PCIE_WIDTH 1
+
+/* FATAL_INTR_REG_KER: Fatal interrupt register for Kernel */
+#define FRF_AZ_FATAL_INTR_LBN 0
+#define FRF_AZ_FATAL_INTR_WIDTH 12
+
+/* SRM_CFG_REG: SRAM configuration register */
+/* We treat the number of SRAM banks and bank size as a single field */
+#define	FRF_AZ_SRM_NB_SZ_LBN FRF_AZ_SRM_BANK_SIZE_LBN
+#define	FRF_AZ_SRM_NB_SZ_WIDTH \
+	(FRF_AZ_SRM_BANK_SIZE_WIDTH + FRF_AZ_SRM_NUM_BANK_WIDTH)
+#define FFE_AB_SRM_NB1_SZ2M 0
+#define FFE_AB_SRM_NB1_SZ4M 1
+#define FFE_AB_SRM_NB1_SZ8M 2
+#define FFE_AB_SRM_NB_SZ_DEF 3
+#define FFE_AB_SRM_NB2_SZ4M 4
+#define FFE_AB_SRM_NB2_SZ8M 5
+#define FFE_AB_SRM_NB2_SZ16M 6
+#define FFE_AB_SRM_NB_SZ_RES 7
+
+/* RX_DESC_UPD_REGP0: Receive descriptor update register. */
+/* We write just the last dword of these registers */
+#define	FR_AZ_RX_DESC_UPD_DWORD_P0 \
+	(BUILD_BUG_ON_ZERO(FR_AA_RX_DESC_UPD_KER != FR_BZ_RX_DESC_UPD_P0) + \
+	 FR_BZ_RX_DESC_UPD_P0 + 3 * 4)
+#define	FRF_AZ_RX_DESC_WPTR_DWORD_LBN (FRF_AZ_RX_DESC_WPTR_LBN - 3 * 32)
+#define	FRF_AZ_RX_DESC_WPTR_DWORD_WIDTH FRF_AZ_RX_DESC_WPTR_WIDTH
+
+/* TX_DESC_UPD_REGP0: Transmit descriptor update register. */
+#define FR_AZ_TX_DESC_UPD_DWORD_P0 \
+	(BUILD_BUG_ON_ZERO(FR_AA_TX_DESC_UPD_KER != FR_BZ_TX_DESC_UPD_P0) + \
+	 FR_BZ_TX_DESC_UPD_P0 + 3 * 4)
+#define	FRF_AZ_TX_DESC_WPTR_DWORD_LBN (FRF_AZ_TX_DESC_WPTR_LBN - 3 * 32)
+#define	FRF_AZ_TX_DESC_WPTR_DWORD_WIDTH FRF_AZ_TX_DESC_WPTR_WIDTH
+
+/* GMF_CFG4_REG: GMAC FIFO configuration register 4 */
+#define FRF_AB_GMF_HSTFLTRFRM_PAUSE_LBN 12
+#define FRF_AB_GMF_HSTFLTRFRM_PAUSE_WIDTH 1
+
+/* GMF_CFG5_REG: GMAC FIFO configuration register 5 */
+#define FRF_AB_GMF_HSTFLTRFRMDC_PAUSE_LBN 12
+#define FRF_AB_GMF_HSTFLTRFRMDC_PAUSE_WIDTH 1
+
+/* XM_TX_PARAM_REG: XGMAC transmit parameter register */
+#define	FRF_AB_XM_MAX_TX_FRM_SIZE_LBN FRF_AB_XM_MAX_TX_FRM_SIZE_LO_LBN
+#define	FRF_AB_XM_MAX_TX_FRM_SIZE_WIDTH (FRF_AB_XM_MAX_TX_FRM_SIZE_HI_WIDTH + \
+					 FRF_AB_XM_MAX_TX_FRM_SIZE_LO_WIDTH)
+
+/* XM_RX_PARAM_REG: XGMAC receive parameter register */
+#define	FRF_AB_XM_MAX_RX_FRM_SIZE_LBN FRF_AB_XM_MAX_RX_FRM_SIZE_LO_LBN
+#define	FRF_AB_XM_MAX_RX_FRM_SIZE_WIDTH (FRF_AB_XM_MAX_RX_FRM_SIZE_HI_WIDTH + \
+					 FRF_AB_XM_MAX_RX_FRM_SIZE_LO_WIDTH)
+
+/* XX_TXDRV_CTL_REG: XAUI SerDes transmit drive control register */
+/* Default values */
+#define FFE_AB_XX_TXDRV_DEQ_DEF 0xe /* deq=.6 */
+#define FFE_AB_XX_TXDRV_DTX_DEF 0x5 /* 1.25 */
+#define FFE_AB_XX_SD_CTL_DRV_DEF 0  /* 20mA */
+
+/* XX_CORE_STAT_REG: XAUI XGXS core status register */
+/* XGXS all-lanes status fields */
+#define	FRF_AB_XX_SYNC_STAT_LBN FRF_AB_XX_SYNC_STAT0_LBN
+#define	FRF_AB_XX_SYNC_STAT_WIDTH 4
+#define	FRF_AB_XX_COMMA_DET_LBN FRF_AB_XX_COMMA_DET_CH0_LBN
+#define	FRF_AB_XX_COMMA_DET_WIDTH 4
+#define	FRF_AB_XX_CHAR_ERR_LBN FRF_AB_XX_CHAR_ERR_CH0_LBN
+#define	FRF_AB_XX_CHAR_ERR_WIDTH 4
+#define	FRF_AB_XX_DISPERR_LBN FRF_AB_XX_DISPERR_CH0_LBN
+#define	FRF_AB_XX_DISPERR_WIDTH 4
+#define	FFE_AB_XX_STAT_ALL_LANES 0xf
+#define	FRF_AB_XX_FORCE_SIG_LBN FRF_AB_XX_FORCE_SIG0_VAL_LBN
+#define	FRF_AB_XX_FORCE_SIG_WIDTH 8
+#define	FFE_AB_XX_FORCE_SIG_ALL_LANES 0xff
+
+/* RX_MAC_FILTER_TBL0 */
+/* RMFT_DEST_MAC is wider than 32 bits */
+#define FRF_CZ_RMFT_DEST_MAC_LO_LBN FRF_CZ_RMFT_DEST_MAC_LBN
+#define FRF_CZ_RMFT_DEST_MAC_LO_WIDTH 32
+#define FRF_CZ_RMFT_DEST_MAC_HI_LBN (FRF_CZ_RMFT_DEST_MAC_LBN + 32)
+#define FRF_CZ_RMFT_DEST_MAC_HI_WIDTH (FRF_CZ_RMFT_DEST_MAC_WIDTH - 32)
+
+/* TX_MAC_FILTER_TBL0 */
+/* TMFT_SRC_MAC is wider than 32 bits */
+#define FRF_CZ_TMFT_SRC_MAC_LO_LBN FRF_CZ_TMFT_SRC_MAC_LBN
+#define FRF_CZ_TMFT_SRC_MAC_LO_WIDTH 32
+#define FRF_CZ_TMFT_SRC_MAC_HI_LBN (FRF_CZ_TMFT_SRC_MAC_LBN + 32)
+#define FRF_CZ_TMFT_SRC_MAC_HI_WIDTH (FRF_CZ_TMFT_SRC_MAC_WIDTH - 32)
+
+/* TX_PACE_TBL */
+/* Values >20 are documented as reserved, but will result in a queue going
+ * into the fast bin with a pace value of zero. */
+#define FFE_BZ_TX_PACE_OFF 0
+#define FFE_BZ_TX_PACE_RESERVED 21
+
+/* DRIVER_EV */
+/* Sub-fields of an RX flush completion event */
+#define FSF_AZ_DRIVER_EV_RX_FLUSH_FAIL_LBN 12
+#define FSF_AZ_DRIVER_EV_RX_FLUSH_FAIL_WIDTH 1
+#define FSF_AZ_DRIVER_EV_RX_DESCQ_ID_LBN 0
+#define FSF_AZ_DRIVER_EV_RX_DESCQ_ID_WIDTH 12
+
+/* EVENT_ENTRY */
+/* Magic number field for event test */
+#define FSF_AZ_DRV_GEN_EV_MAGIC_LBN 0
+#define FSF_AZ_DRV_GEN_EV_MAGIC_WIDTH 32
+
+/* RX packet prefix */
+#define FS_BZ_RX_PREFIX_HASH_OFST 12
+#define FS_BZ_RX_PREFIX_SIZE 16
+
+#endif /* EF4_FARCH_REGS_H */
diff --git a/drivers/net/ethernet/sfc/falcon/filter.h b/drivers/net/ethernet/sfc/falcon/filter.h
new file mode 100644
index 000000000000..647f6b2725c5
--- /dev/null
+++ b/drivers/net/ethernet/sfc/falcon/filter.h
@@ -0,0 +1,272 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2013 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EF4_FILTER_H
+#define EF4_FILTER_H
+
+#include <linux/types.h>
+#include <linux/if_ether.h>
+#include <asm/byteorder.h>
+
+/**
+ * enum ef4_filter_match_flags - Flags for hardware filter match type
+ * @EF4_FILTER_MATCH_REM_HOST: Match by remote IP host address
+ * @EF4_FILTER_MATCH_LOC_HOST: Match by local IP host address
+ * @EF4_FILTER_MATCH_REM_MAC: Match by remote MAC address
+ * @EF4_FILTER_MATCH_REM_PORT: Match by remote TCP/UDP port
+ * @EF4_FILTER_MATCH_LOC_MAC: Match by local MAC address
+ * @EF4_FILTER_MATCH_LOC_PORT: Match by local TCP/UDP port
+ * @EF4_FILTER_MATCH_ETHER_TYPE: Match by Ether-type
+ * @EF4_FILTER_MATCH_INNER_VID: Match by inner VLAN ID
+ * @EF4_FILTER_MATCH_OUTER_VID: Match by outer VLAN ID
+ * @EF4_FILTER_MATCH_IP_PROTO: Match by IP transport protocol
+ * @EF4_FILTER_MATCH_LOC_MAC_IG: Match by local MAC address I/G bit.
+ *	Used for RX default unicast and multicast/broadcast filters.
+ *
+ * Only some combinations are supported, depending on NIC type:
+ *
+ * - Falcon supports RX filters matching by {TCP,UDP}/IPv4 4-tuple or
+ *   local 2-tuple (only implemented for Falcon B0)
+ *
+ * - Siena supports RX and TX filters matching by {TCP,UDP}/IPv4 4-tuple
+ *   or local 2-tuple, or local MAC with or without outer VID, and RX
+ *   default filters
+ *
+ * - Huntington supports filter matching controlled by firmware, potentially
+ *   using {TCP,UDP}/IPv{4,6} 4-tuple or local 2-tuple, local MAC or I/G bit,
+ *   with or without outer and inner VID
+ */
+enum ef4_filter_match_flags {
+	EF4_FILTER_MATCH_REM_HOST =	0x0001,
+	EF4_FILTER_MATCH_LOC_HOST =	0x0002,
+	EF4_FILTER_MATCH_REM_MAC =	0x0004,
+	EF4_FILTER_MATCH_REM_PORT =	0x0008,
+	EF4_FILTER_MATCH_LOC_MAC =	0x0010,
+	EF4_FILTER_MATCH_LOC_PORT =	0x0020,
+	EF4_FILTER_MATCH_ETHER_TYPE =	0x0040,
+	EF4_FILTER_MATCH_INNER_VID =	0x0080,
+	EF4_FILTER_MATCH_OUTER_VID =	0x0100,
+	EF4_FILTER_MATCH_IP_PROTO =	0x0200,
+	EF4_FILTER_MATCH_LOC_MAC_IG =	0x0400,
+};
+
+/**
+ * enum ef4_filter_priority - priority of a hardware filter specification
+ * @EF4_FILTER_PRI_HINT: Performance hint
+ * @EF4_FILTER_PRI_AUTO: Automatic filter based on device address list
+ *	or hardware requirements.  This may only be used by the filter
+ *	implementation for each NIC type.
+ * @EF4_FILTER_PRI_MANUAL: Manually configured filter
+ * @EF4_FILTER_PRI_REQUIRED: Required for correct behaviour (user-level
+ *	networking and SR-IOV)
+ */
+enum ef4_filter_priority {
+	EF4_FILTER_PRI_HINT = 0,
+	EF4_FILTER_PRI_AUTO,
+	EF4_FILTER_PRI_MANUAL,
+	EF4_FILTER_PRI_REQUIRED,
+};
+
+/**
+ * enum ef4_filter_flags - flags for hardware filter specifications
+ * @EF4_FILTER_FLAG_RX_RSS: Use RSS to spread across multiple queues.
+ *	By default, matching packets will be delivered only to the
+ *	specified queue. If this flag is set, they will be delivered
+ *	to a range of queues offset from the specified queue number
+ *	according to the indirection table.
+ * @EF4_FILTER_FLAG_RX_SCATTER: Enable DMA scatter on the receiving
+ *	queue.
+ * @EF4_FILTER_FLAG_RX_OVER_AUTO: Indicates a filter that is
+ *	overriding an automatic filter (priority
+ *	%EF4_FILTER_PRI_AUTO).  This may only be set by the filter
+ *	implementation for each type.  A removal request will restore
+ *	the automatic filter in its place.
+ * @EF4_FILTER_FLAG_RX: Filter is for RX
+ * @EF4_FILTER_FLAG_TX: Filter is for TX
+ */
+enum ef4_filter_flags {
+	EF4_FILTER_FLAG_RX_RSS = 0x01,
+	EF4_FILTER_FLAG_RX_SCATTER = 0x02,
+	EF4_FILTER_FLAG_RX_OVER_AUTO = 0x04,
+	EF4_FILTER_FLAG_RX = 0x08,
+	EF4_FILTER_FLAG_TX = 0x10,
+};
+
+/**
+ * struct ef4_filter_spec - specification for a hardware filter
+ * @match_flags: Match type flags, from &enum ef4_filter_match_flags
+ * @priority: Priority of the filter, from &enum ef4_filter_priority
+ * @flags: Miscellaneous flags, from &enum ef4_filter_flags
+ * @rss_context: RSS context to use, if %EF4_FILTER_FLAG_RX_RSS is set
+ * @dmaq_id: Source/target queue index, or %EF4_FILTER_RX_DMAQ_ID_DROP for
+ *	an RX drop filter
+ * @outer_vid: Outer VLAN ID to match, if %EF4_FILTER_MATCH_OUTER_VID is set
+ * @inner_vid: Inner VLAN ID to match, if %EF4_FILTER_MATCH_INNER_VID is set
+ * @loc_mac: Local MAC address to match, if %EF4_FILTER_MATCH_LOC_MAC or
+ *	%EF4_FILTER_MATCH_LOC_MAC_IG is set
+ * @rem_mac: Remote MAC address to match, if %EF4_FILTER_MATCH_REM_MAC is set
+ * @ether_type: Ether-type to match, if %EF4_FILTER_MATCH_ETHER_TYPE is set
+ * @ip_proto: IP transport protocol to match, if %EF4_FILTER_MATCH_IP_PROTO
+ *	is set
+ * @loc_host: Local IP host to match, if %EF4_FILTER_MATCH_LOC_HOST is set
+ * @rem_host: Remote IP host to match, if %EF4_FILTER_MATCH_REM_HOST is set
+ * @loc_port: Local TCP/UDP port to match, if %EF4_FILTER_MATCH_LOC_PORT is set
+ * @rem_port: Remote TCP/UDP port to match, if %EF4_FILTER_MATCH_REM_PORT is set
+ *
+ * The ef4_filter_init_rx() or ef4_filter_init_tx() function *must* be
+ * used to initialise the structure.  The ef4_filter_set_*() functions
+ * may then be used to set @rss_context, @match_flags and related
+ * fields.
+ *
+ * The @priority field is used by software to determine whether a new
+ * filter may replace an old one.  The hardware priority of a filter
+ * depends on which fields are matched.
+ */
+struct ef4_filter_spec {
+	u32	match_flags:12;
+	u32	priority:2;
+	u32	flags:6;
+	u32	dmaq_id:12;
+	u32	rss_context;
+	__be16	outer_vid __aligned(4); /* allow jhash2() of match values */
+	__be16	inner_vid;
+	u8	loc_mac[ETH_ALEN];
+	u8	rem_mac[ETH_ALEN];
+	__be16	ether_type;
+	u8	ip_proto;
+	__be32	loc_host[4];
+	__be32	rem_host[4];
+	__be16	loc_port;
+	__be16	rem_port;
+	/* total 64 bytes */
+};
+
+enum {
+	EF4_FILTER_RSS_CONTEXT_DEFAULT = 0xffffffff,
+	EF4_FILTER_RX_DMAQ_ID_DROP = 0xfff
+};
+
+static inline void ef4_filter_init_rx(struct ef4_filter_spec *spec,
+				      enum ef4_filter_priority priority,
+				      enum ef4_filter_flags flags,
+				      unsigned rxq_id)
+{
+	memset(spec, 0, sizeof(*spec));
+	spec->priority = priority;
+	spec->flags = EF4_FILTER_FLAG_RX | flags;
+	spec->rss_context = EF4_FILTER_RSS_CONTEXT_DEFAULT;
+	spec->dmaq_id = rxq_id;
+}
+
+static inline void ef4_filter_init_tx(struct ef4_filter_spec *spec,
+				      unsigned txq_id)
+{
+	memset(spec, 0, sizeof(*spec));
+	spec->priority = EF4_FILTER_PRI_REQUIRED;
+	spec->flags = EF4_FILTER_FLAG_TX;
+	spec->dmaq_id = txq_id;
+}
+
+/**
+ * ef4_filter_set_ipv4_local - specify IPv4 host, transport protocol and port
+ * @spec: Specification to initialise
+ * @proto: Transport layer protocol number
+ * @host: Local host address (network byte order)
+ * @port: Local port (network byte order)
+ */
+static inline int
+ef4_filter_set_ipv4_local(struct ef4_filter_spec *spec, u8 proto,
+			  __be32 host, __be16 port)
+{
+	spec->match_flags |=
+		EF4_FILTER_MATCH_ETHER_TYPE | EF4_FILTER_MATCH_IP_PROTO |
+		EF4_FILTER_MATCH_LOC_HOST | EF4_FILTER_MATCH_LOC_PORT;
+	spec->ether_type = htons(ETH_P_IP);
+	spec->ip_proto = proto;
+	spec->loc_host[0] = host;
+	spec->loc_port = port;
+	return 0;
+}
+
+/**
+ * ef4_filter_set_ipv4_full - specify IPv4 hosts, transport protocol and ports
+ * @spec: Specification to initialise
+ * @proto: Transport layer protocol number
+ * @lhost: Local host address (network byte order)
+ * @lport: Local port (network byte order)
+ * @rhost: Remote host address (network byte order)
+ * @rport: Remote port (network byte order)
+ */
+static inline int
+ef4_filter_set_ipv4_full(struct ef4_filter_spec *spec, u8 proto,
+			 __be32 lhost, __be16 lport,
+			 __be32 rhost, __be16 rport)
+{
+	spec->match_flags |=
+		EF4_FILTER_MATCH_ETHER_TYPE | EF4_FILTER_MATCH_IP_PROTO |
+		EF4_FILTER_MATCH_LOC_HOST | EF4_FILTER_MATCH_LOC_PORT |
+		EF4_FILTER_MATCH_REM_HOST | EF4_FILTER_MATCH_REM_PORT;
+	spec->ether_type = htons(ETH_P_IP);
+	spec->ip_proto = proto;
+	spec->loc_host[0] = lhost;
+	spec->loc_port = lport;
+	spec->rem_host[0] = rhost;
+	spec->rem_port = rport;
+	return 0;
+}
+
+enum {
+	EF4_FILTER_VID_UNSPEC = 0xffff,
+};
+
+/**
+ * ef4_filter_set_eth_local - specify local Ethernet address and/or VID
+ * @spec: Specification to initialise
+ * @vid: Outer VLAN ID to match, or %EF4_FILTER_VID_UNSPEC
+ * @addr: Local Ethernet MAC address, or %NULL
+ */
+static inline int ef4_filter_set_eth_local(struct ef4_filter_spec *spec,
+					   u16 vid, const u8 *addr)
+{
+	if (vid == EF4_FILTER_VID_UNSPEC && addr == NULL)
+		return -EINVAL;
+
+	if (vid != EF4_FILTER_VID_UNSPEC) {
+		spec->match_flags |= EF4_FILTER_MATCH_OUTER_VID;
+		spec->outer_vid = htons(vid);
+	}
+	if (addr != NULL) {
+		spec->match_flags |= EF4_FILTER_MATCH_LOC_MAC;
+		ether_addr_copy(spec->loc_mac, addr);
+	}
+	return 0;
+}
+
+/**
+ * ef4_filter_set_uc_def - specify matching otherwise-unmatched unicast
+ * @spec: Specification to initialise
+ */
+static inline int ef4_filter_set_uc_def(struct ef4_filter_spec *spec)
+{
+	spec->match_flags |= EF4_FILTER_MATCH_LOC_MAC_IG;
+	return 0;
+}
+
+/**
+ * ef4_filter_set_mc_def - specify matching otherwise-unmatched multicast
+ * @spec: Specification to initialise
+ */
+static inline int ef4_filter_set_mc_def(struct ef4_filter_spec *spec)
+{
+	spec->match_flags |= EF4_FILTER_MATCH_LOC_MAC_IG;
+	spec->loc_mac[0] = 1;
+	return 0;
+}
+
+#endif /* EF4_FILTER_H */
diff --git a/drivers/net/ethernet/sfc/falcon/io.h b/drivers/net/ethernet/sfc/falcon/io.h
new file mode 100644
index 000000000000..7085ee1d5e2b
--- /dev/null
+++ b/drivers/net/ethernet/sfc/falcon/io.h
@@ -0,0 +1,290 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2013 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EF4_IO_H
+#define EF4_IO_H
+
+#include <linux/io.h>
+#include <linux/spinlock.h>
+
+/**************************************************************************
+ *
+ * NIC register I/O
+ *
+ **************************************************************************
+ *
+ * Notes on locking strategy for the Falcon architecture:
+ *
+ * Many CSRs are very wide and cannot be read or written atomically.
+ * Writes from the host are buffered by the Bus Interface Unit (BIU)
+ * up to 128 bits.  Whenever the host writes part of such a register,
+ * the BIU collects the written value and does not write to the
+ * underlying register until all 4 dwords have been written.  A
+ * similar buffering scheme applies to host access to the NIC's 64-bit
+ * SRAM.
+ *
+ * Writes to different CSRs and 64-bit SRAM words must be serialised,
+ * since interleaved access can result in lost writes.  We use
+ * ef4_nic::biu_lock for this.
+ *
+ * We also serialise reads from 128-bit CSRs and SRAM with the same
+ * spinlock.  This may not be necessary, but it doesn't really matter
+ * as there are no such reads on the fast path.
+ *
+ * The DMA descriptor pointers (RX_DESC_UPD and TX_DESC_UPD) are
+ * 128-bit but are special-cased in the BIU to avoid the need for
+ * locking in the host:
+ *
+ * - They are write-only.
+ * - The semantics of writing to these registers are such that
+ *   replacing the low 96 bits with zero does not affect functionality.
+ * - If the host writes to the last dword address of such a register
+ *   (i.e. the high 32 bits) the underlying register will always be
+ *   written.  If the collector and the current write together do not
+ *   provide values for all 128 bits of the register, the low 96 bits
+ *   will be written as zero.
+ * - If the host writes to the address of any other part of such a
+ *   register while the collector already holds values for some other
+ *   register, the write is discarded and the collector maintains its
+ *   current state.
+ *
+ * The EF10 architecture exposes very few registers to the host and
+ * most of them are only 32 bits wide.  The only exceptions are the MC
+ * doorbell register pair, which has its own latching, and
+ * TX_DESC_UPD, which works in a similar way to the Falcon
+ * architecture.
+ */
+
+#if BITS_PER_LONG == 64
+#define EF4_USE_QWORD_IO 1
+#endif
+
+#ifdef EF4_USE_QWORD_IO
+static inline void _ef4_writeq(struct ef4_nic *efx, __le64 value,
+				  unsigned int reg)
+{
+	__raw_writeq((__force u64)value, efx->membase + reg);
+}
+static inline __le64 _ef4_readq(struct ef4_nic *efx, unsigned int reg)
+{
+	return (__force __le64)__raw_readq(efx->membase + reg);
+}
+#endif
+
+static inline void _ef4_writed(struct ef4_nic *efx, __le32 value,
+				  unsigned int reg)
+{
+	__raw_writel((__force u32)value, efx->membase + reg);
+}
+static inline __le32 _ef4_readd(struct ef4_nic *efx, unsigned int reg)
+{
+	return (__force __le32)__raw_readl(efx->membase + reg);
+}
+
+/* Write a normal 128-bit CSR, locking as appropriate. */
+static inline void ef4_writeo(struct ef4_nic *efx, const ef4_oword_t *value,
+			      unsigned int reg)
+{
+	unsigned long flags __attribute__ ((unused));
+
+	netif_vdbg(efx, hw, efx->net_dev,
+		   "writing register %x with " EF4_OWORD_FMT "\n", reg,
+		   EF4_OWORD_VAL(*value));
+
+	spin_lock_irqsave(&efx->biu_lock, flags);
+#ifdef EF4_USE_QWORD_IO
+	_ef4_writeq(efx, value->u64[0], reg + 0);
+	_ef4_writeq(efx, value->u64[1], reg + 8);
+#else
+	_ef4_writed(efx, value->u32[0], reg + 0);
+	_ef4_writed(efx, value->u32[1], reg + 4);
+	_ef4_writed(efx, value->u32[2], reg + 8);
+	_ef4_writed(efx, value->u32[3], reg + 12);
+#endif
+	mmiowb();
+	spin_unlock_irqrestore(&efx->biu_lock, flags);
+}
+
+/* Write 64-bit SRAM through the supplied mapping, locking as appropriate. */
+static inline void ef4_sram_writeq(struct ef4_nic *efx, void __iomem *membase,
+				   const ef4_qword_t *value, unsigned int index)
+{
+	unsigned int addr = index * sizeof(*value);
+	unsigned long flags __attribute__ ((unused));
+
+	netif_vdbg(efx, hw, efx->net_dev,
+		   "writing SRAM address %x with " EF4_QWORD_FMT "\n",
+		   addr, EF4_QWORD_VAL(*value));
+
+	spin_lock_irqsave(&efx->biu_lock, flags);
+#ifdef EF4_USE_QWORD_IO
+	__raw_writeq((__force u64)value->u64[0], membase + addr);
+#else
+	__raw_writel((__force u32)value->u32[0], membase + addr);
+	__raw_writel((__force u32)value->u32[1], membase + addr + 4);
+#endif
+	mmiowb();
+	spin_unlock_irqrestore(&efx->biu_lock, flags);
+}
+
+/* Write a 32-bit CSR or the last dword of a special 128-bit CSR */
+static inline void ef4_writed(struct ef4_nic *efx, const ef4_dword_t *value,
+			      unsigned int reg)
+{
+	netif_vdbg(efx, hw, efx->net_dev,
+		   "writing register %x with "EF4_DWORD_FMT"\n",
+		   reg, EF4_DWORD_VAL(*value));
+
+	/* No lock required */
+	_ef4_writed(efx, value->u32[0], reg);
+}
+
+/* Read a 128-bit CSR, locking as appropriate. */
+static inline void ef4_reado(struct ef4_nic *efx, ef4_oword_t *value,
+			     unsigned int reg)
+{
+	unsigned long flags __attribute__ ((unused));
+
+	spin_lock_irqsave(&efx->biu_lock, flags);
+	value->u32[0] = _ef4_readd(efx, reg + 0);
+	value->u32[1] = _ef4_readd(efx, reg + 4);
+	value->u32[2] = _ef4_readd(efx, reg + 8);
+	value->u32[3] = _ef4_readd(efx, reg + 12);
+	spin_unlock_irqrestore(&efx->biu_lock, flags);
+
+	netif_vdbg(efx, hw, efx->net_dev,
+		   "read from register %x, got " EF4_OWORD_FMT "\n", reg,
+		   EF4_OWORD_VAL(*value));
+}
+
+/* Read 64-bit SRAM through the supplied mapping, locking as appropriate. */
+static inline void ef4_sram_readq(struct ef4_nic *efx, void __iomem *membase,
+				  ef4_qword_t *value, unsigned int index)
+{
+	unsigned int addr = index * sizeof(*value);
+	unsigned long flags __attribute__ ((unused));
+
+	spin_lock_irqsave(&efx->biu_lock, flags);
+#ifdef EF4_USE_QWORD_IO
+	value->u64[0] = (__force __le64)__raw_readq(membase + addr);
+#else
+	value->u32[0] = (__force __le32)__raw_readl(membase + addr);
+	value->u32[1] = (__force __le32)__raw_readl(membase + addr + 4);
+#endif
+	spin_unlock_irqrestore(&efx->biu_lock, flags);
+
+	netif_vdbg(efx, hw, efx->net_dev,
+		   "read from SRAM address %x, got "EF4_QWORD_FMT"\n",
+		   addr, EF4_QWORD_VAL(*value));
+}
+
+/* Read a 32-bit CSR or SRAM */
+static inline void ef4_readd(struct ef4_nic *efx, ef4_dword_t *value,
+				unsigned int reg)
+{
+	value->u32[0] = _ef4_readd(efx, reg);
+	netif_vdbg(efx, hw, efx->net_dev,
+		   "read from register %x, got "EF4_DWORD_FMT"\n",
+		   reg, EF4_DWORD_VAL(*value));
+}
+
+/* Write a 128-bit CSR forming part of a table */
+static inline void
+ef4_writeo_table(struct ef4_nic *efx, const ef4_oword_t *value,
+		 unsigned int reg, unsigned int index)
+{
+	ef4_writeo(efx, value, reg + index * sizeof(ef4_oword_t));
+}
+
+/* Read a 128-bit CSR forming part of a table */
+static inline void ef4_reado_table(struct ef4_nic *efx, ef4_oword_t *value,
+				     unsigned int reg, unsigned int index)
+{
+	ef4_reado(efx, value, reg + index * sizeof(ef4_oword_t));
+}
+
+/* Page size used as step between per-VI registers */
+#define EF4_VI_PAGE_SIZE 0x2000
+
+/* Calculate offset to page-mapped register */
+#define EF4_PAGED_REG(page, reg) \
+	((page) * EF4_VI_PAGE_SIZE + (reg))
+
+/* Write the whole of RX_DESC_UPD or TX_DESC_UPD */
+static inline void _ef4_writeo_page(struct ef4_nic *efx, ef4_oword_t *value,
+				    unsigned int reg, unsigned int page)
+{
+	reg = EF4_PAGED_REG(page, reg);
+
+	netif_vdbg(efx, hw, efx->net_dev,
+		   "writing register %x with " EF4_OWORD_FMT "\n", reg,
+		   EF4_OWORD_VAL(*value));
+
+#ifdef EF4_USE_QWORD_IO
+	_ef4_writeq(efx, value->u64[0], reg + 0);
+	_ef4_writeq(efx, value->u64[1], reg + 8);
+#else
+	_ef4_writed(efx, value->u32[0], reg + 0);
+	_ef4_writed(efx, value->u32[1], reg + 4);
+	_ef4_writed(efx, value->u32[2], reg + 8);
+	_ef4_writed(efx, value->u32[3], reg + 12);
+#endif
+}
+#define ef4_writeo_page(efx, value, reg, page)				\
+	_ef4_writeo_page(efx, value,					\
+			 reg +						\
+			 BUILD_BUG_ON_ZERO((reg) != 0x830 && (reg) != 0xa10), \
+			 page)
+
+/* Write a page-mapped 32-bit CSR (EVQ_RPTR, EVQ_TMR (EF10), or the
+ * high bits of RX_DESC_UPD or TX_DESC_UPD)
+ */
+static inline void
+_ef4_writed_page(struct ef4_nic *efx, const ef4_dword_t *value,
+		 unsigned int reg, unsigned int page)
+{
+	ef4_writed(efx, value, EF4_PAGED_REG(page, reg));
+}
+#define ef4_writed_page(efx, value, reg, page)				\
+	_ef4_writed_page(efx, value,					\
+			 reg +						\
+			 BUILD_BUG_ON_ZERO((reg) != 0x400 &&		\
+					   (reg) != 0x420 &&		\
+					   (reg) != 0x830 &&		\
+					   (reg) != 0x83c &&		\
+					   (reg) != 0xa18 &&		\
+					   (reg) != 0xa1c),		\
+			 page)
+
+/* Write TIMER_COMMAND.  This is a page-mapped 32-bit CSR, but a bug
+ * in the BIU means that writes to TIMER_COMMAND[0] invalidate the
+ * collector register.
+ */
+static inline void _ef4_writed_page_locked(struct ef4_nic *efx,
+					   const ef4_dword_t *value,
+					   unsigned int reg,
+					   unsigned int page)
+{
+	unsigned long flags __attribute__ ((unused));
+
+	if (page == 0) {
+		spin_lock_irqsave(&efx->biu_lock, flags);
+		ef4_writed(efx, value, EF4_PAGED_REG(page, reg));
+		spin_unlock_irqrestore(&efx->biu_lock, flags);
+	} else {
+		ef4_writed(efx, value, EF4_PAGED_REG(page, reg));
+	}
+}
+#define ef4_writed_page_locked(efx, value, reg, page)			\
+	_ef4_writed_page_locked(efx, value,				\
+				reg + BUILD_BUG_ON_ZERO((reg) != 0x420), \
+				page)
+
+#endif /* EF4_IO_H */
diff --git a/drivers/net/ethernet/sfc/mdio_10g.c b/drivers/net/ethernet/sfc/falcon/mdio_10g.c
index 8ff954c59efa..e7d7c09296aa 100644
--- a/drivers/net/ethernet/sfc/mdio_10g.c
+++ b/drivers/net/ethernet/sfc/falcon/mdio_10g.c
@@ -16,7 +16,7 @@
 #include "mdio_10g.h"
 #include "workarounds.h"
 
-unsigned efx_mdio_id_oui(u32 id)
+unsigned ef4_mdio_id_oui(u32 id)
 {
 	unsigned oui = 0;
 	int i;
@@ -31,19 +31,19 @@ unsigned efx_mdio_id_oui(u32 id)
 	return oui;
 }
 
-int efx_mdio_reset_mmd(struct efx_nic *port, int mmd,
+int ef4_mdio_reset_mmd(struct ef4_nic *port, int mmd,
 			    int spins, int spintime)
 {
 	u32 ctrl;
 
 	/* Catch callers passing values in the wrong units (or just silly) */
-	EFX_BUG_ON_PARANOID(spins * spintime >= 5000);
+	EF4_BUG_ON_PARANOID(spins * spintime >= 5000);
 
-	efx_mdio_write(port, mmd, MDIO_CTRL1, MDIO_CTRL1_RESET);
+	ef4_mdio_write(port, mmd, MDIO_CTRL1, MDIO_CTRL1_RESET);
 	/* Wait for the reset bit to clear. */
 	do {
 		msleep(spintime);
-		ctrl = efx_mdio_read(port, mmd, MDIO_CTRL1);
+		ctrl = ef4_mdio_read(port, mmd, MDIO_CTRL1);
 		spins--;
 
 	} while (spins && (ctrl & MDIO_CTRL1_RESET));
@@ -51,13 +51,13 @@ int efx_mdio_reset_mmd(struct efx_nic *port, int mmd,
 	return spins ? spins : -ETIMEDOUT;
 }
 
-static int efx_mdio_check_mmd(struct efx_nic *efx, int mmd)
+static int ef4_mdio_check_mmd(struct ef4_nic *efx, int mmd)
 {
 	int status;
 
 	if (mmd != MDIO_MMD_AN) {
 		/* Read MMD STATUS2 to check it is responding. */
-		status = efx_mdio_read(efx, mmd, MDIO_STAT2);
+		status = ef4_mdio_read(efx, mmd, MDIO_STAT2);
 		if ((status & MDIO_STAT2_DEVPRST) != MDIO_STAT2_DEVPRST_VAL) {
 			netif_err(efx, hw, efx->net_dev,
 				  "PHY MMD %d not responding.\n", mmd);
@@ -72,7 +72,7 @@ static int efx_mdio_check_mmd(struct efx_nic *efx, int mmd)
 #define MDIO45_RESET_TIME	1000 /* ms */
 #define MDIO45_RESET_ITERS	100
 
-int efx_mdio_wait_reset_mmds(struct efx_nic *efx, unsigned int mmd_mask)
+int ef4_mdio_wait_reset_mmds(struct ef4_nic *efx, unsigned int mmd_mask)
 {
 	const int spintime = MDIO45_RESET_TIME / MDIO45_RESET_ITERS;
 	int tries = MDIO45_RESET_ITERS;
@@ -86,7 +86,7 @@ int efx_mdio_wait_reset_mmds(struct efx_nic *efx, unsigned int mmd_mask)
 		in_reset = 0;
 		while (mask) {
 			if (mask & 1) {
-				stat = efx_mdio_read(efx, mmd, MDIO_CTRL1);
+				stat = ef4_mdio_read(efx, mmd, MDIO_CTRL1);
 				if (stat < 0) {
 					netif_err(efx, hw, efx->net_dev,
 						  "failed to read status of"
@@ -113,7 +113,7 @@ int efx_mdio_wait_reset_mmds(struct efx_nic *efx, unsigned int mmd_mask)
 	return rc;
 }
 
-int efx_mdio_check_mmds(struct efx_nic *efx, unsigned int mmd_mask)
+int ef4_mdio_check_mmds(struct ef4_nic *efx, unsigned int mmd_mask)
 {
 	int mmd = 0, probe_mmd, devs1, devs2;
 	u32 devices;
@@ -125,8 +125,8 @@ int efx_mdio_check_mmds(struct efx_nic *efx, unsigned int mmd_mask)
 	    __ffs(mmd_mask);
 
 	/* Check all the expected MMDs are present */
-	devs1 = efx_mdio_read(efx, probe_mmd, MDIO_DEVS1);
-	devs2 = efx_mdio_read(efx, probe_mmd, MDIO_DEVS2);
+	devs1 = ef4_mdio_read(efx, probe_mmd, MDIO_DEVS1);
+	devs2 = ef4_mdio_read(efx, probe_mmd, MDIO_DEVS2);
 	if (devs1 < 0 || devs2 < 0) {
 		netif_err(efx, hw, efx->net_dev,
 			  "failed to read devices present\n");
@@ -143,7 +143,7 @@ int efx_mdio_check_mmds(struct efx_nic *efx, unsigned int mmd_mask)
 
 	/* Check all required MMDs are responding and happy. */
 	while (mmd_mask) {
-		if ((mmd_mask & 1) && efx_mdio_check_mmd(efx, mmd))
+		if ((mmd_mask & 1) && ef4_mdio_check_mmd(efx, mmd))
 			return -EIO;
 		mmd_mask = mmd_mask >> 1;
 		mmd++;
@@ -152,7 +152,7 @@ int efx_mdio_check_mmds(struct efx_nic *efx, unsigned int mmd_mask)
 	return 0;
 }
 
-bool efx_mdio_links_ok(struct efx_nic *efx, unsigned int mmd_mask)
+bool ef4_mdio_links_ok(struct ef4_nic *efx, unsigned int mmd_mask)
 {
 	/* If the port is in loopback, then we should only consider a subset
 	 * of mmd's */
@@ -160,7 +160,7 @@ bool efx_mdio_links_ok(struct efx_nic *efx, unsigned int mmd_mask)
 		return true;
 	else if (LOOPBACK_MASK(efx) & LOOPBACKS_WS)
 		return false;
-	else if (efx_phy_mode_disabled(efx->phy_mode))
+	else if (ef4_phy_mode_disabled(efx->phy_mode))
 		return false;
 	else if (efx->loopback_mode == LOOPBACK_PHYXS)
 		mmd_mask &= ~(MDIO_DEVS_PHYXS |
@@ -178,59 +178,59 @@ bool efx_mdio_links_ok(struct efx_nic *efx, unsigned int mmd_mask)
 	return mdio45_links_ok(&efx->mdio, mmd_mask);
 }
 
-void efx_mdio_transmit_disable(struct efx_nic *efx)
+void ef4_mdio_transmit_disable(struct ef4_nic *efx)
 {
-	efx_mdio_set_flag(efx, MDIO_MMD_PMAPMD,
+	ef4_mdio_set_flag(efx, MDIO_MMD_PMAPMD,
 			  MDIO_PMA_TXDIS, MDIO_PMD_TXDIS_GLOBAL,
 			  efx->phy_mode & PHY_MODE_TX_DISABLED);
 }
 
-void efx_mdio_phy_reconfigure(struct efx_nic *efx)
+void ef4_mdio_phy_reconfigure(struct ef4_nic *efx)
 {
-	efx_mdio_set_flag(efx, MDIO_MMD_PMAPMD,
+	ef4_mdio_set_flag(efx, MDIO_MMD_PMAPMD,
 			  MDIO_CTRL1, MDIO_PMA_CTRL1_LOOPBACK,
 			  efx->loopback_mode == LOOPBACK_PMAPMD);
-	efx_mdio_set_flag(efx, MDIO_MMD_PCS,
+	ef4_mdio_set_flag(efx, MDIO_MMD_PCS,
 			  MDIO_CTRL1, MDIO_PCS_CTRL1_LOOPBACK,
 			  efx->loopback_mode == LOOPBACK_PCS);
-	efx_mdio_set_flag(efx, MDIO_MMD_PHYXS,
+	ef4_mdio_set_flag(efx, MDIO_MMD_PHYXS,
 			  MDIO_CTRL1, MDIO_PHYXS_CTRL1_LOOPBACK,
 			  efx->loopback_mode == LOOPBACK_PHYXS_WS);
 }
 
-static void efx_mdio_set_mmd_lpower(struct efx_nic *efx,
+static void ef4_mdio_set_mmd_lpower(struct ef4_nic *efx,
 				    int lpower, int mmd)
 {
-	int stat = efx_mdio_read(efx, mmd, MDIO_STAT1);
+	int stat = ef4_mdio_read(efx, mmd, MDIO_STAT1);
 
 	netif_vdbg(efx, drv, efx->net_dev, "Setting low power mode for MMD %d to %d\n",
 		  mmd, lpower);
 
 	if (stat & MDIO_STAT1_LPOWERABLE) {
-		efx_mdio_set_flag(efx, mmd, MDIO_CTRL1,
+		ef4_mdio_set_flag(efx, mmd, MDIO_CTRL1,
 				  MDIO_CTRL1_LPOWER, lpower);
 	}
 }
 
-void efx_mdio_set_mmds_lpower(struct efx_nic *efx,
+void ef4_mdio_set_mmds_lpower(struct ef4_nic *efx,
 			      int low_power, unsigned int mmd_mask)
 {
 	int mmd = 0;
 	mmd_mask &= ~MDIO_DEVS_AN;
 	while (mmd_mask) {
 		if (mmd_mask & 1)
-			efx_mdio_set_mmd_lpower(efx, low_power, mmd);
+			ef4_mdio_set_mmd_lpower(efx, low_power, mmd);
 		mmd_mask = (mmd_mask >> 1);
 		mmd++;
 	}
 }
 
 /**
- * efx_mdio_set_settings - Set (some of) the PHY settings over MDIO.
+ * ef4_mdio_set_settings - Set (some of) the PHY settings over MDIO.
  * @efx:		Efx NIC
  * @ecmd:		New settings
  */
-int efx_mdio_set_settings(struct efx_nic *efx, struct ethtool_cmd *ecmd)
+int ef4_mdio_set_settings(struct ef4_nic *efx, struct ethtool_cmd *ecmd)
 {
 	struct ethtool_cmd prev = { .cmd = ETHTOOL_GSET };
 
@@ -252,16 +252,16 @@ int efx_mdio_set_settings(struct efx_nic *efx, struct ethtool_cmd *ecmd)
 	    (ecmd->advertising | SUPPORTED_Autoneg) & ~prev.supported)
 		return -EINVAL;
 
-	efx_link_set_advertising(efx, ecmd->advertising | ADVERTISED_Autoneg);
-	efx_mdio_an_reconfigure(efx);
+	ef4_link_set_advertising(efx, ecmd->advertising | ADVERTISED_Autoneg);
+	ef4_mdio_an_reconfigure(efx);
 	return 0;
 }
 
 /**
- * efx_mdio_an_reconfigure - Push advertising flags and restart autonegotiation
+ * ef4_mdio_an_reconfigure - Push advertising flags and restart autonegotiation
  * @efx:		Efx NIC
  */
-void efx_mdio_an_reconfigure(struct efx_nic *efx)
+void ef4_mdio_an_reconfigure(struct ef4_nic *efx)
 {
 	int reg;
 
@@ -273,32 +273,32 @@ void efx_mdio_an_reconfigure(struct efx_nic *efx)
 		reg |= ADVERTISE_PAUSE_CAP;
 	if (efx->link_advertising & ADVERTISED_Asym_Pause)
 		reg |= ADVERTISE_PAUSE_ASYM;
-	efx_mdio_write(efx, MDIO_MMD_AN, MDIO_AN_ADVERTISE, reg);
+	ef4_mdio_write(efx, MDIO_MMD_AN, MDIO_AN_ADVERTISE, reg);
 
 	/* Set up the (extended) next page */
 	efx->phy_op->set_npage_adv(efx, efx->link_advertising);
 
 	/* Enable and restart AN */
-	reg = efx_mdio_read(efx, MDIO_MMD_AN, MDIO_CTRL1);
+	reg = ef4_mdio_read(efx, MDIO_MMD_AN, MDIO_CTRL1);
 	reg |= MDIO_AN_CTRL1_ENABLE | MDIO_AN_CTRL1_RESTART | MDIO_AN_CTRL1_XNP;
-	efx_mdio_write(efx, MDIO_MMD_AN, MDIO_CTRL1, reg);
+	ef4_mdio_write(efx, MDIO_MMD_AN, MDIO_CTRL1, reg);
 }
 
-u8 efx_mdio_get_pause(struct efx_nic *efx)
+u8 ef4_mdio_get_pause(struct ef4_nic *efx)
 {
-	BUILD_BUG_ON(EFX_FC_AUTO & (EFX_FC_RX | EFX_FC_TX));
+	BUILD_BUG_ON(EF4_FC_AUTO & (EF4_FC_RX | EF4_FC_TX));
 
-	if (!(efx->wanted_fc & EFX_FC_AUTO))
+	if (!(efx->wanted_fc & EF4_FC_AUTO))
 		return efx->wanted_fc;
 
 	WARN_ON(!(efx->mdio.mmds & MDIO_DEVS_AN));
 
 	return mii_resolve_flowctrl_fdx(
 		mii_advertise_flowctrl(efx->wanted_fc),
-		efx_mdio_read(efx, MDIO_MMD_AN, MDIO_AN_LPA));
+		ef4_mdio_read(efx, MDIO_MMD_AN, MDIO_AN_LPA));
 }
 
-int efx_mdio_test_alive(struct efx_nic *efx)
+int ef4_mdio_test_alive(struct ef4_nic *efx)
 {
 	int rc;
 	int devad = __ffs(efx->mdio.mmds);
@@ -306,8 +306,8 @@ int efx_mdio_test_alive(struct efx_nic *efx)
 
 	mutex_lock(&efx->mac_lock);
 
-	physid1 = efx_mdio_read(efx, devad, MDIO_DEVID1);
-	physid2 = efx_mdio_read(efx, devad, MDIO_DEVID2);
+	physid1 = ef4_mdio_read(efx, devad, MDIO_DEVID1);
+	physid2 = ef4_mdio_read(efx, devad, MDIO_DEVID2);
 
 	if ((physid1 == 0x0000) || (physid1 == 0xffff) ||
 	    (physid2 == 0x0000) || (physid2 == 0xffff)) {
@@ -315,7 +315,7 @@ int efx_mdio_test_alive(struct efx_nic *efx)
 			  "no MDIO PHY present with ID %d\n", efx->mdio.prtad);
 		rc = -EINVAL;
 	} else {
-		rc = efx_mdio_check_mmds(efx, efx->mdio.mmds);
+		rc = ef4_mdio_check_mmds(efx, efx->mdio.mmds);
 	}
 
 	mutex_unlock(&efx->mac_lock);
diff --git a/drivers/net/ethernet/sfc/mdio_10g.h b/drivers/net/ethernet/sfc/falcon/mdio_10g.h
index 4a2dc4c281b7..885cf7a834a6 100644
--- a/drivers/net/ethernet/sfc/mdio_10g.h
+++ b/drivers/net/ethernet/sfc/falcon/mdio_10g.h
@@ -7,8 +7,8 @@
  * by the Free Software Foundation, incorporated herein by reference.
  */
 
-#ifndef EFX_MDIO_10G_H
-#define EFX_MDIO_10G_H
+#ifndef EF4_MDIO_10G_H
+#define EF4_MDIO_10G_H
 
 #include <linux/mdio.h>
 
@@ -18,35 +18,35 @@
 
 #include "efx.h"
 
-static inline unsigned efx_mdio_id_rev(u32 id) { return id & 0xf; }
-static inline unsigned efx_mdio_id_model(u32 id) { return (id >> 4) & 0x3f; }
-unsigned efx_mdio_id_oui(u32 id);
+static inline unsigned ef4_mdio_id_rev(u32 id) { return id & 0xf; }
+static inline unsigned ef4_mdio_id_model(u32 id) { return (id >> 4) & 0x3f; }
+unsigned ef4_mdio_id_oui(u32 id);
 
-static inline int efx_mdio_read(struct efx_nic *efx, int devad, int addr)
+static inline int ef4_mdio_read(struct ef4_nic *efx, int devad, int addr)
 {
 	return efx->mdio.mdio_read(efx->net_dev, efx->mdio.prtad, devad, addr);
 }
 
 static inline void
-efx_mdio_write(struct efx_nic *efx, int devad, int addr, int value)
+ef4_mdio_write(struct ef4_nic *efx, int devad, int addr, int value)
 {
 	efx->mdio.mdio_write(efx->net_dev, efx->mdio.prtad, devad, addr, value);
 }
 
-static inline u32 efx_mdio_read_id(struct efx_nic *efx, int mmd)
+static inline u32 ef4_mdio_read_id(struct ef4_nic *efx, int mmd)
 {
-	u16 id_low = efx_mdio_read(efx, mmd, MDIO_DEVID2);
-	u16 id_hi = efx_mdio_read(efx, mmd, MDIO_DEVID1);
+	u16 id_low = ef4_mdio_read(efx, mmd, MDIO_DEVID2);
+	u16 id_hi = ef4_mdio_read(efx, mmd, MDIO_DEVID1);
 	return (id_hi << 16) | (id_low);
 }
 
-static inline bool efx_mdio_phyxgxs_lane_sync(struct efx_nic *efx)
+static inline bool ef4_mdio_phyxgxs_lane_sync(struct ef4_nic *efx)
 {
 	int i, lane_status;
 	bool sync;
 
 	for (i = 0; i < 2; ++i)
-		lane_status = efx_mdio_read(efx, MDIO_MMD_PHYXS,
+		lane_status = ef4_mdio_read(efx, MDIO_MMD_PHYXS,
 					    MDIO_PHYXS_LNSTAT);
 
 	sync = !!(lane_status & MDIO_PHYXS_LNSTAT_ALIGN);
@@ -56,7 +56,7 @@ static inline bool efx_mdio_phyxgxs_lane_sync(struct efx_nic *efx)
 	return sync;
 }
 
-const char *efx_mdio_mmd_name(int mmd);
+const char *ef4_mdio_mmd_name(int mmd);
 
 /*
  * Reset a specific MMD and wait for reset to clear.
@@ -64,47 +64,47 @@ const char *efx_mdio_mmd_name(int mmd);
  *
  * This function will sleep
  */
-int efx_mdio_reset_mmd(struct efx_nic *efx, int mmd, int spins, int spintime);
+int ef4_mdio_reset_mmd(struct ef4_nic *efx, int mmd, int spins, int spintime);
 
-/* As efx_mdio_check_mmd but for multiple MMDs */
-int efx_mdio_check_mmds(struct efx_nic *efx, unsigned int mmd_mask);
+/* As ef4_mdio_check_mmd but for multiple MMDs */
+int ef4_mdio_check_mmds(struct ef4_nic *efx, unsigned int mmd_mask);
 
 /* Check the link status of specified mmds in bit mask */
-bool efx_mdio_links_ok(struct efx_nic *efx, unsigned int mmd_mask);
+bool ef4_mdio_links_ok(struct ef4_nic *efx, unsigned int mmd_mask);
 
 /* Generic transmit disable support though PMAPMD */
-void efx_mdio_transmit_disable(struct efx_nic *efx);
+void ef4_mdio_transmit_disable(struct ef4_nic *efx);
 
 /* Generic part of reconfigure: set/clear loopback bits */
-void efx_mdio_phy_reconfigure(struct efx_nic *efx);
+void ef4_mdio_phy_reconfigure(struct ef4_nic *efx);
 
 /* Set the power state of the specified MMDs */
-void efx_mdio_set_mmds_lpower(struct efx_nic *efx, int low_power,
+void ef4_mdio_set_mmds_lpower(struct ef4_nic *efx, int low_power,
 			      unsigned int mmd_mask);
 
 /* Set (some of) the PHY settings over MDIO */
-int efx_mdio_set_settings(struct efx_nic *efx, struct ethtool_cmd *ecmd);
+int ef4_mdio_set_settings(struct ef4_nic *efx, struct ethtool_cmd *ecmd);
 
 /* Push advertising flags and restart autonegotiation */
-void efx_mdio_an_reconfigure(struct efx_nic *efx);
+void ef4_mdio_an_reconfigure(struct ef4_nic *efx);
 
 /* Get pause parameters from AN if available (otherwise return
  * requested pause parameters)
  */
-u8 efx_mdio_get_pause(struct efx_nic *efx);
+u8 ef4_mdio_get_pause(struct ef4_nic *efx);
 
 /* Wait for specified MMDs to exit reset within a timeout */
-int efx_mdio_wait_reset_mmds(struct efx_nic *efx, unsigned int mmd_mask);
+int ef4_mdio_wait_reset_mmds(struct ef4_nic *efx, unsigned int mmd_mask);
 
 /* Set or clear flag, debouncing */
 static inline void
-efx_mdio_set_flag(struct efx_nic *efx, int devad, int addr,
+ef4_mdio_set_flag(struct ef4_nic *efx, int devad, int addr,
 		  int mask, bool state)
 {
 	mdio_set_flag(&efx->mdio, efx->mdio.prtad, devad, addr, mask, state);
 }
 
 /* Liveness self-test for MDIO PHYs */
-int efx_mdio_test_alive(struct efx_nic *efx);
+int ef4_mdio_test_alive(struct ef4_nic *efx);
 
-#endif /* EFX_MDIO_10G_H */
+#endif /* EF4_MDIO_10G_H */
diff --git a/drivers/net/ethernet/sfc/falcon/mtd.c b/drivers/net/ethernet/sfc/falcon/mtd.c
new file mode 100644
index 000000000000..cde593cb1052
--- /dev/null
+++ b/drivers/net/ethernet/sfc/falcon/mtd.c
@@ -0,0 +1,133 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2013 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/module.h>
+#include <linux/mtd/mtd.h>
+#include <linux/slab.h>
+#include <linux/rtnetlink.h>
+
+#include "net_driver.h"
+#include "efx.h"
+
+#define to_ef4_mtd_partition(mtd)				\
+	container_of(mtd, struct ef4_mtd_partition, mtd)
+
+/* MTD interface */
+
+static int ef4_mtd_erase(struct mtd_info *mtd, struct erase_info *erase)
+{
+	struct ef4_nic *efx = mtd->priv;
+	int rc;
+
+	rc = efx->type->mtd_erase(mtd, erase->addr, erase->len);
+	if (rc == 0) {
+		erase->state = MTD_ERASE_DONE;
+	} else {
+		erase->state = MTD_ERASE_FAILED;
+		erase->fail_addr = MTD_FAIL_ADDR_UNKNOWN;
+	}
+	mtd_erase_callback(erase);
+	return rc;
+}
+
+static void ef4_mtd_sync(struct mtd_info *mtd)
+{
+	struct ef4_mtd_partition *part = to_ef4_mtd_partition(mtd);
+	struct ef4_nic *efx = mtd->priv;
+	int rc;
+
+	rc = efx->type->mtd_sync(mtd);
+	if (rc)
+		pr_err("%s: %s sync failed (%d)\n",
+		       part->name, part->dev_type_name, rc);
+}
+
+static void ef4_mtd_remove_partition(struct ef4_mtd_partition *part)
+{
+	int rc;
+
+	for (;;) {
+		rc = mtd_device_unregister(&part->mtd);
+		if (rc != -EBUSY)
+			break;
+		ssleep(1);
+	}
+	WARN_ON(rc);
+	list_del(&part->node);
+}
+
+int ef4_mtd_add(struct ef4_nic *efx, struct ef4_mtd_partition *parts,
+		size_t n_parts, size_t sizeof_part)
+{
+	struct ef4_mtd_partition *part;
+	size_t i;
+
+	for (i = 0; i < n_parts; i++) {
+		part = (struct ef4_mtd_partition *)((char *)parts +
+						    i * sizeof_part);
+
+		part->mtd.writesize = 1;
+
+		part->mtd.owner = THIS_MODULE;
+		part->mtd.priv = efx;
+		part->mtd.name = part->name;
+		part->mtd._erase = ef4_mtd_erase;
+		part->mtd._read = efx->type->mtd_read;
+		part->mtd._write = efx->type->mtd_write;
+		part->mtd._sync = ef4_mtd_sync;
+
+		efx->type->mtd_rename(part);
+
+		if (mtd_device_register(&part->mtd, NULL, 0))
+			goto fail;
+
+		/* Add to list in order - ef4_mtd_remove() depends on this */
+		list_add_tail(&part->node, &efx->mtd_list);
+	}
+
+	return 0;
+
+fail:
+	while (i--) {
+		part = (struct ef4_mtd_partition *)((char *)parts +
+						    i * sizeof_part);
+		ef4_mtd_remove_partition(part);
+	}
+	/* Failure is unlikely here, but probably means we're out of memory */
+	return -ENOMEM;
+}
+
+void ef4_mtd_remove(struct ef4_nic *efx)
+{
+	struct ef4_mtd_partition *parts, *part, *next;
+
+	WARN_ON(ef4_dev_registered(efx));
+
+	if (list_empty(&efx->mtd_list))
+		return;
+
+	parts = list_first_entry(&efx->mtd_list, struct ef4_mtd_partition,
+				 node);
+
+	list_for_each_entry_safe(part, next, &efx->mtd_list, node)
+		ef4_mtd_remove_partition(part);
+
+	kfree(parts);
+}
+
+void ef4_mtd_rename(struct ef4_nic *efx)
+{
+	struct ef4_mtd_partition *part;
+
+	ASSERT_RTNL();
+
+	list_for_each_entry(part, &efx->mtd_list, node)
+		efx->type->mtd_rename(part);
+}
diff --git a/drivers/net/ethernet/sfc/falcon/net_driver.h b/drivers/net/ethernet/sfc/falcon/net_driver.h
new file mode 100644
index 000000000000..210b28f7d2a1
--- /dev/null
+++ b/drivers/net/ethernet/sfc/falcon/net_driver.h
@@ -0,0 +1,1464 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2005-2013 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+/* Common definitions for all Efx net driver code */
+
+#ifndef EF4_NET_DRIVER_H
+#define EF4_NET_DRIVER_H
+
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/if_vlan.h>
+#include <linux/timer.h>
+#include <linux/mdio.h>
+#include <linux/list.h>
+#include <linux/pci.h>
+#include <linux/device.h>
+#include <linux/highmem.h>
+#include <linux/workqueue.h>
+#include <linux/mutex.h>
+#include <linux/rwsem.h>
+#include <linux/vmalloc.h>
+#include <linux/i2c.h>
+#include <linux/mtd/mtd.h>
+#include <net/busy_poll.h>
+
+#include "enum.h"
+#include "bitfield.h"
+#include "filter.h"
+
+/**************************************************************************
+ *
+ * Build definitions
+ *
+ **************************************************************************/
+
+#define EF4_DRIVER_VERSION	"4.1"
+
+#ifdef DEBUG
+#define EF4_BUG_ON_PARANOID(x) BUG_ON(x)
+#define EF4_WARN_ON_PARANOID(x) WARN_ON(x)
+#else
+#define EF4_BUG_ON_PARANOID(x) do {} while (0)
+#define EF4_WARN_ON_PARANOID(x) do {} while (0)
+#endif
+
+/**************************************************************************
+ *
+ * Efx data structures
+ *
+ **************************************************************************/
+
+#define EF4_MAX_CHANNELS 32U
+#define EF4_MAX_RX_QUEUES EF4_MAX_CHANNELS
+#define EF4_EXTRA_CHANNEL_IOV	0
+#define EF4_EXTRA_CHANNEL_PTP	1
+#define EF4_MAX_EXTRA_CHANNELS	2U
+
+/* Checksum generation is a per-queue option in hardware, so each
+ * queue visible to the networking core is backed by two hardware TX
+ * queues. */
+#define EF4_MAX_TX_TC		2
+#define EF4_MAX_CORE_TX_QUEUES	(EF4_MAX_TX_TC * EF4_MAX_CHANNELS)
+#define EF4_TXQ_TYPE_OFFLOAD	1	/* flag */
+#define EF4_TXQ_TYPE_HIGHPRI	2	/* flag */
+#define EF4_TXQ_TYPES		4
+#define EF4_MAX_TX_QUEUES	(EF4_TXQ_TYPES * EF4_MAX_CHANNELS)
+
+/* Maximum possible MTU the driver supports */
+#define EF4_MAX_MTU (9 * 1024)
+
+/* Minimum MTU, from RFC791 (IP) */
+#define EF4_MIN_MTU 68
+
+/* Size of an RX scatter buffer.  Small enough to pack 2 into a 4K page,
+ * and should be a multiple of the cache line size.
+ */
+#define EF4_RX_USR_BUF_SIZE	(2048 - 256)
+
+/* If possible, we should ensure cache line alignment at start and end
+ * of every buffer.  Otherwise, we just need to ensure 4-byte
+ * alignment of the network header.
+ */
+#if NET_IP_ALIGN == 0
+#define EF4_RX_BUF_ALIGNMENT	L1_CACHE_BYTES
+#else
+#define EF4_RX_BUF_ALIGNMENT	4
+#endif
+
+struct ef4_self_tests;
+
+/**
+ * struct ef4_buffer - A general-purpose DMA buffer
+ * @addr: host base address of the buffer
+ * @dma_addr: DMA base address of the buffer
+ * @len: Buffer length, in bytes
+ *
+ * The NIC uses these buffers for its interrupt status registers and
+ * MAC stats dumps.
+ */
+struct ef4_buffer {
+	void *addr;
+	dma_addr_t dma_addr;
+	unsigned int len;
+};
+
+/**
+ * struct ef4_special_buffer - DMA buffer entered into buffer table
+ * @buf: Standard &struct ef4_buffer
+ * @index: Buffer index within controller;s buffer table
+ * @entries: Number of buffer table entries
+ *
+ * The NIC has a buffer table that maps buffers of size %EF4_BUF_SIZE.
+ * Event and descriptor rings are addressed via one or more buffer
+ * table entries (and so can be physically non-contiguous, although we
+ * currently do not take advantage of that).  On Falcon and Siena we
+ * have to take care of allocating and initialising the entries
+ * ourselves.  On later hardware this is managed by the firmware and
+ * @index and @entries are left as 0.
+ */
+struct ef4_special_buffer {
+	struct ef4_buffer buf;
+	unsigned int index;
+	unsigned int entries;
+};
+
+/**
+ * struct ef4_tx_buffer - buffer state for a TX descriptor
+ * @skb: When @flags & %EF4_TX_BUF_SKB, the associated socket buffer to be
+ *	freed when descriptor completes
+ * @option: When @flags & %EF4_TX_BUF_OPTION, a NIC-specific option descriptor.
+ * @dma_addr: DMA address of the fragment.
+ * @flags: Flags for allocation and DMA mapping type
+ * @len: Length of this fragment.
+ *	This field is zero when the queue slot is empty.
+ * @unmap_len: Length of this fragment to unmap
+ * @dma_offset: Offset of @dma_addr from the address of the backing DMA mapping.
+ * Only valid if @unmap_len != 0.
+ */
+struct ef4_tx_buffer {
+	const struct sk_buff *skb;
+	union {
+		ef4_qword_t option;
+		dma_addr_t dma_addr;
+	};
+	unsigned short flags;
+	unsigned short len;
+	unsigned short unmap_len;
+	unsigned short dma_offset;
+};
+#define EF4_TX_BUF_CONT		1	/* not last descriptor of packet */
+#define EF4_TX_BUF_SKB		2	/* buffer is last part of skb */
+#define EF4_TX_BUF_MAP_SINGLE	8	/* buffer was mapped with dma_map_single() */
+#define EF4_TX_BUF_OPTION	0x10	/* empty buffer for option descriptor */
+
+/**
+ * struct ef4_tx_queue - An Efx TX queue
+ *
+ * This is a ring buffer of TX fragments.
+ * Since the TX completion path always executes on the same
+ * CPU and the xmit path can operate on different CPUs,
+ * performance is increased by ensuring that the completion
+ * path and the xmit path operate on different cache lines.
+ * This is particularly important if the xmit path is always
+ * executing on one CPU which is different from the completion
+ * path.  There is also a cache line for members which are
+ * read but not written on the fast path.
+ *
+ * @efx: The associated Efx NIC
+ * @queue: DMA queue number
+ * @channel: The associated channel
+ * @core_txq: The networking core TX queue structure
+ * @buffer: The software buffer ring
+ * @cb_page: Array of pages of copy buffers.  Carved up according to
+ *	%EF4_TX_CB_ORDER into %EF4_TX_CB_SIZE-sized chunks.
+ * @txd: The hardware descriptor ring
+ * @ptr_mask: The size of the ring minus 1.
+ * @initialised: Has hardware queue been initialised?
+ * @tx_min_size: Minimum transmit size for this queue. Depends on HW.
+ * @read_count: Current read pointer.
+ *	This is the number of buffers that have been removed from both rings.
+ * @old_write_count: The value of @write_count when last checked.
+ *	This is here for performance reasons.  The xmit path will
+ *	only get the up-to-date value of @write_count if this
+ *	variable indicates that the queue is empty.  This is to
+ *	avoid cache-line ping-pong between the xmit path and the
+ *	completion path.
+ * @merge_events: Number of TX merged completion events
+ * @insert_count: Current insert pointer
+ *	This is the number of buffers that have been added to the
+ *	software ring.
+ * @write_count: Current write pointer
+ *	This is the number of buffers that have been added to the
+ *	hardware ring.
+ * @old_read_count: The value of read_count when last checked.
+ *	This is here for performance reasons.  The xmit path will
+ *	only get the up-to-date value of read_count if this
+ *	variable indicates that the queue is full.  This is to
+ *	avoid cache-line ping-pong between the xmit path and the
+ *	completion path.
+ * @pushes: Number of times the TX push feature has been used
+ * @xmit_more_available: Are any packets waiting to be pushed to the NIC
+ * @cb_packets: Number of times the TX copybreak feature has been used
+ * @empty_read_count: If the completion path has seen the queue as empty
+ *	and the transmission path has not yet checked this, the value of
+ *	@read_count bitwise-added to %EF4_EMPTY_COUNT_VALID; otherwise 0.
+ */
+struct ef4_tx_queue {
+	/* Members which don't change on the fast path */
+	struct ef4_nic *efx ____cacheline_aligned_in_smp;
+	unsigned queue;
+	struct ef4_channel *channel;
+	struct netdev_queue *core_txq;
+	struct ef4_tx_buffer *buffer;
+	struct ef4_buffer *cb_page;
+	struct ef4_special_buffer txd;
+	unsigned int ptr_mask;
+	bool initialised;
+	unsigned int tx_min_size;
+
+	/* Function pointers used in the fast path. */
+	int (*handle_tso)(struct ef4_tx_queue*, struct sk_buff*, bool *);
+
+	/* Members used mainly on the completion path */
+	unsigned int read_count ____cacheline_aligned_in_smp;
+	unsigned int old_write_count;
+	unsigned int merge_events;
+	unsigned int bytes_compl;
+	unsigned int pkts_compl;
+
+	/* Members used only on the xmit path */
+	unsigned int insert_count ____cacheline_aligned_in_smp;
+	unsigned int write_count;
+	unsigned int old_read_count;
+	unsigned int pushes;
+	bool xmit_more_available;
+	unsigned int cb_packets;
+	/* Statistics to supplement MAC stats */
+	unsigned long tx_packets;
+
+	/* Members shared between paths and sometimes updated */
+	unsigned int empty_read_count ____cacheline_aligned_in_smp;
+#define EF4_EMPTY_COUNT_VALID 0x80000000
+	atomic_t flush_outstanding;
+};
+
+#define EF4_TX_CB_ORDER	7
+#define EF4_TX_CB_SIZE	(1 << EF4_TX_CB_ORDER) - NET_IP_ALIGN
+
+/**
+ * struct ef4_rx_buffer - An Efx RX data buffer
+ * @dma_addr: DMA base address of the buffer
+ * @page: The associated page buffer.
+ *	Will be %NULL if the buffer slot is currently free.
+ * @page_offset: If pending: offset in @page of DMA base address.
+ *	If completed: offset in @page of Ethernet header.
+ * @len: If pending: length for DMA descriptor.
+ *	If completed: received length, excluding hash prefix.
+ * @flags: Flags for buffer and packet state.  These are only set on the
+ *	first buffer of a scattered packet.
+ */
+struct ef4_rx_buffer {
+	dma_addr_t dma_addr;
+	struct page *page;
+	u16 page_offset;
+	u16 len;
+	u16 flags;
+};
+#define EF4_RX_BUF_LAST_IN_PAGE	0x0001
+#define EF4_RX_PKT_CSUMMED	0x0002
+#define EF4_RX_PKT_DISCARD	0x0004
+#define EF4_RX_PKT_TCP		0x0040
+#define EF4_RX_PKT_PREFIX_LEN	0x0080	/* length is in prefix only */
+
+/**
+ * struct ef4_rx_page_state - Page-based rx buffer state
+ *
+ * Inserted at the start of every page allocated for receive buffers.
+ * Used to facilitate sharing dma mappings between recycled rx buffers
+ * and those passed up to the kernel.
+ *
+ * @dma_addr: The dma address of this page.
+ */
+struct ef4_rx_page_state {
+	dma_addr_t dma_addr;
+
+	unsigned int __pad[0] ____cacheline_aligned;
+};
+
+/**
+ * struct ef4_rx_queue - An Efx RX queue
+ * @efx: The associated Efx NIC
+ * @core_index:  Index of network core RX queue.  Will be >= 0 iff this
+ *	is associated with a real RX queue.
+ * @buffer: The software buffer ring
+ * @rxd: The hardware descriptor ring
+ * @ptr_mask: The size of the ring minus 1.
+ * @refill_enabled: Enable refill whenever fill level is low
+ * @flush_pending: Set when a RX flush is pending. Has the same lifetime as
+ *	@rxq_flush_pending.
+ * @added_count: Number of buffers added to the receive queue.
+ * @notified_count: Number of buffers given to NIC (<= @added_count).
+ * @removed_count: Number of buffers removed from the receive queue.
+ * @scatter_n: Used by NIC specific receive code.
+ * @scatter_len: Used by NIC specific receive code.
+ * @page_ring: The ring to store DMA mapped pages for reuse.
+ * @page_add: Counter to calculate the write pointer for the recycle ring.
+ * @page_remove: Counter to calculate the read pointer for the recycle ring.
+ * @page_recycle_count: The number of pages that have been recycled.
+ * @page_recycle_failed: The number of pages that couldn't be recycled because
+ *      the kernel still held a reference to them.
+ * @page_recycle_full: The number of pages that were released because the
+ *      recycle ring was full.
+ * @page_ptr_mask: The number of pages in the RX recycle ring minus 1.
+ * @max_fill: RX descriptor maximum fill level (<= ring size)
+ * @fast_fill_trigger: RX descriptor fill level that will trigger a fast fill
+ *	(<= @max_fill)
+ * @min_fill: RX descriptor minimum non-zero fill level.
+ *	This records the minimum fill level observed when a ring
+ *	refill was triggered.
+ * @recycle_count: RX buffer recycle counter.
+ * @slow_fill: Timer used to defer ef4_nic_generate_fill_event().
+ */
+struct ef4_rx_queue {
+	struct ef4_nic *efx;
+	int core_index;
+	struct ef4_rx_buffer *buffer;
+	struct ef4_special_buffer rxd;
+	unsigned int ptr_mask;
+	bool refill_enabled;
+	bool flush_pending;
+
+	unsigned int added_count;
+	unsigned int notified_count;
+	unsigned int removed_count;
+	unsigned int scatter_n;
+	unsigned int scatter_len;
+	struct page **page_ring;
+	unsigned int page_add;
+	unsigned int page_remove;
+	unsigned int page_recycle_count;
+	unsigned int page_recycle_failed;
+	unsigned int page_recycle_full;
+	unsigned int page_ptr_mask;
+	unsigned int max_fill;
+	unsigned int fast_fill_trigger;
+	unsigned int min_fill;
+	unsigned int min_overfill;
+	unsigned int recycle_count;
+	struct timer_list slow_fill;
+	unsigned int slow_fill_count;
+	/* Statistics to supplement MAC stats */
+	unsigned long rx_packets;
+};
+
+/**
+ * struct ef4_channel - An Efx channel
+ *
+ * A channel comprises an event queue, at least one TX queue, at least
+ * one RX queue, and an associated tasklet for processing the event
+ * queue.
+ *
+ * @efx: Associated Efx NIC
+ * @channel: Channel instance number
+ * @type: Channel type definition
+ * @eventq_init: Event queue initialised flag
+ * @enabled: Channel enabled indicator
+ * @irq: IRQ number (MSI and MSI-X only)
+ * @irq_moderation_us: IRQ moderation value (in microseconds)
+ * @napi_dev: Net device used with NAPI
+ * @napi_str: NAPI control structure
+ * @state: state for NAPI vs busy polling
+ * @state_lock: lock protecting @state
+ * @eventq: Event queue buffer
+ * @eventq_mask: Event queue pointer mask
+ * @eventq_read_ptr: Event queue read pointer
+ * @event_test_cpu: Last CPU to handle interrupt or test event for this channel
+ * @irq_count: Number of IRQs since last adaptive moderation decision
+ * @irq_mod_score: IRQ moderation score
+ * @rps_flow_id: Flow IDs of filters allocated for accelerated RFS,
+ *      indexed by filter ID
+ * @n_rx_tobe_disc: Count of RX_TOBE_DISC errors
+ * @n_rx_ip_hdr_chksum_err: Count of RX IP header checksum errors
+ * @n_rx_tcp_udp_chksum_err: Count of RX TCP and UDP checksum errors
+ * @n_rx_mcast_mismatch: Count of unmatched multicast frames
+ * @n_rx_frm_trunc: Count of RX_FRM_TRUNC errors
+ * @n_rx_overlength: Count of RX_OVERLENGTH errors
+ * @n_skbuff_leaks: Count of skbuffs leaked due to RX overrun
+ * @n_rx_nodesc_trunc: Number of RX packets truncated and then dropped due to
+ *	lack of descriptors
+ * @n_rx_merge_events: Number of RX merged completion events
+ * @n_rx_merge_packets: Number of RX packets completed by merged events
+ * @rx_pkt_n_frags: Number of fragments in next packet to be delivered by
+ *	__ef4_rx_packet(), or zero if there is none
+ * @rx_pkt_index: Ring index of first buffer for next packet to be delivered
+ *	by __ef4_rx_packet(), if @rx_pkt_n_frags != 0
+ * @rx_queue: RX queue for this channel
+ * @tx_queue: TX queues for this channel
+ */
+struct ef4_channel {
+	struct ef4_nic *efx;
+	int channel;
+	const struct ef4_channel_type *type;
+	bool eventq_init;
+	bool enabled;
+	int irq;
+	unsigned int irq_moderation_us;
+	struct net_device *napi_dev;
+	struct napi_struct napi_str;
+#ifdef CONFIG_NET_RX_BUSY_POLL
+	unsigned long busy_poll_state;
+#endif
+	struct ef4_special_buffer eventq;
+	unsigned int eventq_mask;
+	unsigned int eventq_read_ptr;
+	int event_test_cpu;
+
+	unsigned int irq_count;
+	unsigned int irq_mod_score;
+#ifdef CONFIG_RFS_ACCEL
+	unsigned int rfs_filters_added;
+#define RPS_FLOW_ID_INVALID 0xFFFFFFFF
+	u32 *rps_flow_id;
+#endif
+
+	unsigned n_rx_tobe_disc;
+	unsigned n_rx_ip_hdr_chksum_err;
+	unsigned n_rx_tcp_udp_chksum_err;
+	unsigned n_rx_mcast_mismatch;
+	unsigned n_rx_frm_trunc;
+	unsigned n_rx_overlength;
+	unsigned n_skbuff_leaks;
+	unsigned int n_rx_nodesc_trunc;
+	unsigned int n_rx_merge_events;
+	unsigned int n_rx_merge_packets;
+
+	unsigned int rx_pkt_n_frags;
+	unsigned int rx_pkt_index;
+
+	struct ef4_rx_queue rx_queue;
+	struct ef4_tx_queue tx_queue[EF4_TXQ_TYPES];
+};
+
+#ifdef CONFIG_NET_RX_BUSY_POLL
+enum ef4_channel_busy_poll_state {
+	EF4_CHANNEL_STATE_IDLE = 0,
+	EF4_CHANNEL_STATE_NAPI = BIT(0),
+	EF4_CHANNEL_STATE_NAPI_REQ_BIT = 1,
+	EF4_CHANNEL_STATE_NAPI_REQ = BIT(1),
+	EF4_CHANNEL_STATE_POLL_BIT = 2,
+	EF4_CHANNEL_STATE_POLL = BIT(2),
+	EF4_CHANNEL_STATE_DISABLE_BIT = 3,
+};
+
+static inline void ef4_channel_busy_poll_init(struct ef4_channel *channel)
+{
+	WRITE_ONCE(channel->busy_poll_state, EF4_CHANNEL_STATE_IDLE);
+}
+
+/* Called from the device poll routine to get ownership of a channel. */
+static inline bool ef4_channel_lock_napi(struct ef4_channel *channel)
+{
+	unsigned long prev, old = READ_ONCE(channel->busy_poll_state);
+
+	while (1) {
+		switch (old) {
+		case EF4_CHANNEL_STATE_POLL:
+			/* Ensure ef4_channel_try_lock_poll() wont starve us */
+			set_bit(EF4_CHANNEL_STATE_NAPI_REQ_BIT,
+				&channel->busy_poll_state);
+			/* fallthrough */
+		case EF4_CHANNEL_STATE_POLL | EF4_CHANNEL_STATE_NAPI_REQ:
+			return false;
+		default:
+			break;
+		}
+		prev = cmpxchg(&channel->busy_poll_state, old,
+			       EF4_CHANNEL_STATE_NAPI);
+		if (unlikely(prev != old)) {
+			/* This is likely to mean we've just entered polling
+			 * state. Go back round to set the REQ bit.
+			 */
+			old = prev;
+			continue;
+		}
+		return true;
+	}
+}
+
+static inline void ef4_channel_unlock_napi(struct ef4_channel *channel)
+{
+	/* Make sure write has completed from ef4_channel_lock_napi() */
+	smp_wmb();
+	WRITE_ONCE(channel->busy_poll_state, EF4_CHANNEL_STATE_IDLE);
+}
+
+/* Called from ef4_busy_poll(). */
+static inline bool ef4_channel_try_lock_poll(struct ef4_channel *channel)
+{
+	return cmpxchg(&channel->busy_poll_state, EF4_CHANNEL_STATE_IDLE,
+			EF4_CHANNEL_STATE_POLL) == EF4_CHANNEL_STATE_IDLE;
+}
+
+static inline void ef4_channel_unlock_poll(struct ef4_channel *channel)
+{
+	clear_bit_unlock(EF4_CHANNEL_STATE_POLL_BIT, &channel->busy_poll_state);
+}
+
+static inline bool ef4_channel_busy_polling(struct ef4_channel *channel)
+{
+	return test_bit(EF4_CHANNEL_STATE_POLL_BIT, &channel->busy_poll_state);
+}
+
+static inline void ef4_channel_enable(struct ef4_channel *channel)
+{
+	clear_bit_unlock(EF4_CHANNEL_STATE_DISABLE_BIT,
+			 &channel->busy_poll_state);
+}
+
+/* Stop further polling or napi access.
+ * Returns false if the channel is currently busy polling.
+ */
+static inline bool ef4_channel_disable(struct ef4_channel *channel)
+{
+	set_bit(EF4_CHANNEL_STATE_DISABLE_BIT, &channel->busy_poll_state);
+	/* Implicit barrier in ef4_channel_busy_polling() */
+	return !ef4_channel_busy_polling(channel);
+}
+
+#else /* CONFIG_NET_RX_BUSY_POLL */
+
+static inline void ef4_channel_busy_poll_init(struct ef4_channel *channel)
+{
+}
+
+static inline bool ef4_channel_lock_napi(struct ef4_channel *channel)
+{
+	return true;
+}
+
+static inline void ef4_channel_unlock_napi(struct ef4_channel *channel)
+{
+}
+
+static inline bool ef4_channel_try_lock_poll(struct ef4_channel *channel)
+{
+	return false;
+}
+
+static inline void ef4_channel_unlock_poll(struct ef4_channel *channel)
+{
+}
+
+static inline bool ef4_channel_busy_polling(struct ef4_channel *channel)
+{
+	return false;
+}
+
+static inline void ef4_channel_enable(struct ef4_channel *channel)
+{
+}
+
+static inline bool ef4_channel_disable(struct ef4_channel *channel)
+{
+	return true;
+}
+#endif /* CONFIG_NET_RX_BUSY_POLL */
+
+/**
+ * struct ef4_msi_context - Context for each MSI
+ * @efx: The associated NIC
+ * @index: Index of the channel/IRQ
+ * @name: Name of the channel/IRQ
+ *
+ * Unlike &struct ef4_channel, this is never reallocated and is always
+ * safe for the IRQ handler to access.
+ */
+struct ef4_msi_context {
+	struct ef4_nic *efx;
+	unsigned int index;
+	char name[IFNAMSIZ + 6];
+};
+
+/**
+ * struct ef4_channel_type - distinguishes traffic and extra channels
+ * @handle_no_channel: Handle failure to allocate an extra channel
+ * @pre_probe: Set up extra state prior to initialisation
+ * @post_remove: Tear down extra state after finalisation, if allocated.
+ *	May be called on channels that have not been probed.
+ * @get_name: Generate the channel's name (used for its IRQ handler)
+ * @copy: Copy the channel state prior to reallocation.  May be %NULL if
+ *	reallocation is not supported.
+ * @receive_skb: Handle an skb ready to be passed to netif_receive_skb()
+ * @keep_eventq: Flag for whether event queue should be kept initialised
+ *	while the device is stopped
+ */
+struct ef4_channel_type {
+	void (*handle_no_channel)(struct ef4_nic *);
+	int (*pre_probe)(struct ef4_channel *);
+	void (*post_remove)(struct ef4_channel *);
+	void (*get_name)(struct ef4_channel *, char *buf, size_t len);
+	struct ef4_channel *(*copy)(const struct ef4_channel *);
+	bool (*receive_skb)(struct ef4_channel *, struct sk_buff *);
+	bool keep_eventq;
+};
+
+enum ef4_led_mode {
+	EF4_LED_OFF	= 0,
+	EF4_LED_ON	= 1,
+	EF4_LED_DEFAULT	= 2
+};
+
+#define STRING_TABLE_LOOKUP(val, member) \
+	((val) < member ## _max) ? member ## _names[val] : "(invalid)"
+
+extern const char *const ef4_loopback_mode_names[];
+extern const unsigned int ef4_loopback_mode_max;
+#define LOOPBACK_MODE(efx) \
+	STRING_TABLE_LOOKUP((efx)->loopback_mode, ef4_loopback_mode)
+
+extern const char *const ef4_reset_type_names[];
+extern const unsigned int ef4_reset_type_max;
+#define RESET_TYPE(type) \
+	STRING_TABLE_LOOKUP(type, ef4_reset_type)
+
+enum ef4_int_mode {
+	/* Be careful if altering to correct macro below */
+	EF4_INT_MODE_MSIX = 0,
+	EF4_INT_MODE_MSI = 1,
+	EF4_INT_MODE_LEGACY = 2,
+	EF4_INT_MODE_MAX	/* Insert any new items before this */
+};
+#define EF4_INT_MODE_USE_MSI(x) (((x)->interrupt_mode) <= EF4_INT_MODE_MSI)
+
+enum nic_state {
+	STATE_UNINIT = 0,	/* device being probed/removed or is frozen */
+	STATE_READY = 1,	/* hardware ready and netdev registered */
+	STATE_DISABLED = 2,	/* device disabled due to hardware errors */
+	STATE_RECOVERY = 3,	/* device recovering from PCI error */
+};
+
+/* Forward declaration */
+struct ef4_nic;
+
+/* Pseudo bit-mask flow control field */
+#define EF4_FC_RX	FLOW_CTRL_RX
+#define EF4_FC_TX	FLOW_CTRL_TX
+#define EF4_FC_AUTO	4
+
+/**
+ * struct ef4_link_state - Current state of the link
+ * @up: Link is up
+ * @fd: Link is full-duplex
+ * @fc: Actual flow control flags
+ * @speed: Link speed (Mbps)
+ */
+struct ef4_link_state {
+	bool up;
+	bool fd;
+	u8 fc;
+	unsigned int speed;
+};
+
+static inline bool ef4_link_state_equal(const struct ef4_link_state *left,
+					const struct ef4_link_state *right)
+{
+	return left->up == right->up && left->fd == right->fd &&
+		left->fc == right->fc && left->speed == right->speed;
+}
+
+/**
+ * struct ef4_phy_operations - Efx PHY operations table
+ * @probe: Probe PHY and initialise efx->mdio.mode_support, efx->mdio.mmds,
+ *	efx->loopback_modes.
+ * @init: Initialise PHY
+ * @fini: Shut down PHY
+ * @reconfigure: Reconfigure PHY (e.g. for new link parameters)
+ * @poll: Update @link_state and report whether it changed.
+ *	Serialised by the mac_lock.
+ * @get_settings: Get ethtool settings. Serialised by the mac_lock.
+ * @set_settings: Set ethtool settings. Serialised by the mac_lock.
+ * @set_npage_adv: Set abilities advertised in (Extended) Next Page
+ *	(only needed where AN bit is set in mmds)
+ * @test_alive: Test that PHY is 'alive' (online)
+ * @test_name: Get the name of a PHY-specific test/result
+ * @run_tests: Run tests and record results as appropriate (offline).
+ *	Flags are the ethtool tests flags.
+ */
+struct ef4_phy_operations {
+	int (*probe) (struct ef4_nic *efx);
+	int (*init) (struct ef4_nic *efx);
+	void (*fini) (struct ef4_nic *efx);
+	void (*remove) (struct ef4_nic *efx);
+	int (*reconfigure) (struct ef4_nic *efx);
+	bool (*poll) (struct ef4_nic *efx);
+	void (*get_settings) (struct ef4_nic *efx,
+			      struct ethtool_cmd *ecmd);
+	int (*set_settings) (struct ef4_nic *efx,
+			     struct ethtool_cmd *ecmd);
+	void (*set_npage_adv) (struct ef4_nic *efx, u32);
+	int (*test_alive) (struct ef4_nic *efx);
+	const char *(*test_name) (struct ef4_nic *efx, unsigned int index);
+	int (*run_tests) (struct ef4_nic *efx, int *results, unsigned flags);
+	int (*get_module_eeprom) (struct ef4_nic *efx,
+			       struct ethtool_eeprom *ee,
+			       u8 *data);
+	int (*get_module_info) (struct ef4_nic *efx,
+				struct ethtool_modinfo *modinfo);
+};
+
+/**
+ * enum ef4_phy_mode - PHY operating mode flags
+ * @PHY_MODE_NORMAL: on and should pass traffic
+ * @PHY_MODE_TX_DISABLED: on with TX disabled
+ * @PHY_MODE_LOW_POWER: set to low power through MDIO
+ * @PHY_MODE_OFF: switched off through external control
+ * @PHY_MODE_SPECIAL: on but will not pass traffic
+ */
+enum ef4_phy_mode {
+	PHY_MODE_NORMAL		= 0,
+	PHY_MODE_TX_DISABLED	= 1,
+	PHY_MODE_LOW_POWER	= 2,
+	PHY_MODE_OFF		= 4,
+	PHY_MODE_SPECIAL	= 8,
+};
+
+static inline bool ef4_phy_mode_disabled(enum ef4_phy_mode mode)
+{
+	return !!(mode & ~PHY_MODE_TX_DISABLED);
+}
+
+/**
+ * struct ef4_hw_stat_desc - Description of a hardware statistic
+ * @name: Name of the statistic as visible through ethtool, or %NULL if
+ *	it should not be exposed
+ * @dma_width: Width in bits (0 for non-DMA statistics)
+ * @offset: Offset within stats (ignored for non-DMA statistics)
+ */
+struct ef4_hw_stat_desc {
+	const char *name;
+	u16 dma_width;
+	u16 offset;
+};
+
+/* Number of bits used in a multicast filter hash address */
+#define EF4_MCAST_HASH_BITS 8
+
+/* Number of (single-bit) entries in a multicast filter hash */
+#define EF4_MCAST_HASH_ENTRIES (1 << EF4_MCAST_HASH_BITS)
+
+/* An Efx multicast filter hash */
+union ef4_multicast_hash {
+	u8 byte[EF4_MCAST_HASH_ENTRIES / 8];
+	ef4_oword_t oword[EF4_MCAST_HASH_ENTRIES / sizeof(ef4_oword_t) / 8];
+};
+
+/**
+ * struct ef4_nic - an Efx NIC
+ * @name: Device name (net device name or bus id before net device registered)
+ * @pci_dev: The PCI device
+ * @node: List node for maintaning primary/secondary function lists
+ * @primary: &struct ef4_nic instance for the primary function of this
+ *	controller.  May be the same structure, and may be %NULL if no
+ *	primary function is bound.  Serialised by rtnl_lock.
+ * @secondary_list: List of &struct ef4_nic instances for the secondary PCI
+ *	functions of the controller, if this is for the primary function.
+ *	Serialised by rtnl_lock.
+ * @type: Controller type attributes
+ * @legacy_irq: IRQ number
+ * @workqueue: Workqueue for port reconfigures and the HW monitor.
+ *	Work items do not hold and must not acquire RTNL.
+ * @workqueue_name: Name of workqueue
+ * @reset_work: Scheduled reset workitem
+ * @membase_phys: Memory BAR value as physical address
+ * @membase: Memory BAR value
+ * @interrupt_mode: Interrupt mode
+ * @timer_quantum_ns: Interrupt timer quantum, in nanoseconds
+ * @timer_max_ns: Interrupt timer maximum value, in nanoseconds
+ * @irq_rx_adaptive: Adaptive IRQ moderation enabled for RX event queues
+ * @irq_rx_mod_step_us: Step size for IRQ moderation for RX event queues
+ * @irq_rx_moderation_us: IRQ moderation time for RX event queues
+ * @msg_enable: Log message enable flags
+ * @state: Device state number (%STATE_*). Serialised by the rtnl_lock.
+ * @reset_pending: Bitmask for pending resets
+ * @tx_queue: TX DMA queues
+ * @rx_queue: RX DMA queues
+ * @channel: Channels
+ * @msi_context: Context for each MSI
+ * @extra_channel_types: Types of extra (non-traffic) channels that
+ *	should be allocated for this NIC
+ * @rxq_entries: Size of receive queues requested by user.
+ * @txq_entries: Size of transmit queues requested by user.
+ * @txq_stop_thresh: TX queue fill level at or above which we stop it.
+ * @txq_wake_thresh: TX queue fill level at or below which we wake it.
+ * @tx_dc_base: Base qword address in SRAM of TX queue descriptor caches
+ * @rx_dc_base: Base qword address in SRAM of RX queue descriptor caches
+ * @sram_lim_qw: Qword address limit of SRAM
+ * @next_buffer_table: First available buffer table id
+ * @n_channels: Number of channels in use
+ * @n_rx_channels: Number of channels used for RX (= number of RX queues)
+ * @n_tx_channels: Number of channels used for TX
+ * @rx_ip_align: RX DMA address offset to have IP header aligned in
+ *	in accordance with NET_IP_ALIGN
+ * @rx_dma_len: Current maximum RX DMA length
+ * @rx_buffer_order: Order (log2) of number of pages for each RX buffer
+ * @rx_buffer_truesize: Amortised allocation size of an RX buffer,
+ *	for use in sk_buff::truesize
+ * @rx_prefix_size: Size of RX prefix before packet data
+ * @rx_packet_hash_offset: Offset of RX flow hash from start of packet data
+ *	(valid only if @rx_prefix_size != 0; always negative)
+ * @rx_packet_len_offset: Offset of RX packet length from start of packet data
+ *	(valid only for NICs that set %EF4_RX_PKT_PREFIX_LEN; always negative)
+ * @rx_packet_ts_offset: Offset of timestamp from start of packet data
+ *	(valid only if channel->sync_timestamps_enabled; always negative)
+ * @rx_hash_key: Toeplitz hash key for RSS
+ * @rx_indir_table: Indirection table for RSS
+ * @rx_scatter: Scatter mode enabled for receives
+ * @int_error_count: Number of internal errors seen recently
+ * @int_error_expire: Time at which error count will be expired
+ * @irq_soft_enabled: Are IRQs soft-enabled? If not, IRQ handler will
+ *	acknowledge but do nothing else.
+ * @irq_status: Interrupt status buffer
+ * @irq_zero_count: Number of legacy IRQs seen with queue flags == 0
+ * @irq_level: IRQ level/index for IRQs not triggered by an event queue
+ * @selftest_work: Work item for asynchronous self-test
+ * @mtd_list: List of MTDs attached to the NIC
+ * @nic_data: Hardware dependent state
+ * @mac_lock: MAC access lock. Protects @port_enabled, @phy_mode,
+ *	ef4_monitor() and ef4_reconfigure_port()
+ * @port_enabled: Port enabled indicator.
+ *	Serialises ef4_stop_all(), ef4_start_all(), ef4_monitor() and
+ *	ef4_mac_work() with kernel interfaces. Safe to read under any
+ *	one of the rtnl_lock, mac_lock, or netif_tx_lock, but all three must
+ *	be held to modify it.
+ * @port_initialized: Port initialized?
+ * @net_dev: Operating system network device. Consider holding the rtnl lock
+ * @fixed_features: Features which cannot be turned off
+ * @stats_buffer: DMA buffer for statistics
+ * @phy_type: PHY type
+ * @phy_op: PHY interface
+ * @phy_data: PHY private data (including PHY-specific stats)
+ * @mdio: PHY MDIO interface
+ * @phy_mode: PHY operating mode. Serialised by @mac_lock.
+ * @link_advertising: Autonegotiation advertising flags
+ * @link_state: Current state of the link
+ * @n_link_state_changes: Number of times the link has changed state
+ * @unicast_filter: Flag for Falcon-arch simple unicast filter.
+ *	Protected by @mac_lock.
+ * @multicast_hash: Multicast hash table for Falcon-arch.
+ *	Protected by @mac_lock.
+ * @wanted_fc: Wanted flow control flags
+ * @fc_disable: When non-zero flow control is disabled. Typically used to
+ *	ensure that network back pressure doesn't delay dma queue flushes.
+ *	Serialised by the rtnl lock.
+ * @mac_work: Work item for changing MAC promiscuity and multicast hash
+ * @loopback_mode: Loopback status
+ * @loopback_modes: Supported loopback mode bitmask
+ * @loopback_selftest: Offline self-test private state
+ * @filter_sem: Filter table rw_semaphore, for freeing the table
+ * @filter_lock: Filter table lock, for mere content changes
+ * @filter_state: Architecture-dependent filter table state
+ * @rps_expire_channel: Next channel to check for expiry
+ * @rps_expire_index: Next index to check for expiry in
+ *	@rps_expire_channel's @rps_flow_id
+ * @active_queues: Count of RX and TX queues that haven't been flushed and drained.
+ * @rxq_flush_pending: Count of number of receive queues that need to be flushed.
+ *	Decremented when the ef4_flush_rx_queue() is called.
+ * @rxq_flush_outstanding: Count of number of RX flushes started but not yet
+ *	completed (either success or failure). Not used when MCDI is used to
+ *	flush receive queues.
+ * @flush_wq: wait queue used by ef4_nic_flush_queues() to wait for flush completions.
+ * @vpd_sn: Serial number read from VPD
+ * @monitor_work: Hardware monitor workitem
+ * @biu_lock: BIU (bus interface unit) lock
+ * @last_irq_cpu: Last CPU to handle a possible test interrupt.  This
+ *	field is used by ef4_test_interrupts() to verify that an
+ *	interrupt has occurred.
+ * @stats_lock: Statistics update lock. Must be held when calling
+ *	ef4_nic_type::{update,start,stop}_stats.
+ * @n_rx_noskb_drops: Count of RX packets dropped due to failure to allocate an skb
+ *
+ * This is stored in the private area of the &struct net_device.
+ */
+struct ef4_nic {
+	/* The following fields should be written very rarely */
+
+	char name[IFNAMSIZ];
+	struct list_head node;
+	struct ef4_nic *primary;
+	struct list_head secondary_list;
+	struct pci_dev *pci_dev;
+	unsigned int port_num;
+	const struct ef4_nic_type *type;
+	int legacy_irq;
+	bool eeh_disabled_legacy_irq;
+	struct workqueue_struct *workqueue;
+	char workqueue_name[16];
+	struct work_struct reset_work;
+	resource_size_t membase_phys;
+	void __iomem *membase;
+
+	enum ef4_int_mode interrupt_mode;
+	unsigned int timer_quantum_ns;
+	unsigned int timer_max_ns;
+	bool irq_rx_adaptive;
+	unsigned int irq_mod_step_us;
+	unsigned int irq_rx_moderation_us;
+	u32 msg_enable;
+
+	enum nic_state state;
+	unsigned long reset_pending;
+
+	struct ef4_channel *channel[EF4_MAX_CHANNELS];
+	struct ef4_msi_context msi_context[EF4_MAX_CHANNELS];
+	const struct ef4_channel_type *
+	extra_channel_type[EF4_MAX_EXTRA_CHANNELS];
+
+	unsigned rxq_entries;
+	unsigned txq_entries;
+	unsigned int txq_stop_thresh;
+	unsigned int txq_wake_thresh;
+
+	unsigned tx_dc_base;
+	unsigned rx_dc_base;
+	unsigned sram_lim_qw;
+	unsigned next_buffer_table;
+
+	unsigned int max_channels;
+	unsigned int max_tx_channels;
+	unsigned n_channels;
+	unsigned n_rx_channels;
+	unsigned rss_spread;
+	unsigned tx_channel_offset;
+	unsigned n_tx_channels;
+	unsigned int rx_ip_align;
+	unsigned int rx_dma_len;
+	unsigned int rx_buffer_order;
+	unsigned int rx_buffer_truesize;
+	unsigned int rx_page_buf_step;
+	unsigned int rx_bufs_per_page;
+	unsigned int rx_pages_per_batch;
+	unsigned int rx_prefix_size;
+	int rx_packet_hash_offset;
+	int rx_packet_len_offset;
+	int rx_packet_ts_offset;
+	u8 rx_hash_key[40];
+	u32 rx_indir_table[128];
+	bool rx_scatter;
+
+	unsigned int_error_count;
+	unsigned long int_error_expire;
+
+	bool irq_soft_enabled;
+	struct ef4_buffer irq_status;
+	unsigned irq_zero_count;
+	unsigned irq_level;
+	struct delayed_work selftest_work;
+
+#ifdef CONFIG_SFC_FALCON_MTD
+	struct list_head mtd_list;
+#endif
+
+	void *nic_data;
+
+	struct mutex mac_lock;
+	struct work_struct mac_work;
+	bool port_enabled;
+
+	bool mc_bist_for_other_fn;
+	bool port_initialized;
+	struct net_device *net_dev;
+
+	netdev_features_t fixed_features;
+
+	struct ef4_buffer stats_buffer;
+	u64 rx_nodesc_drops_total;
+	u64 rx_nodesc_drops_while_down;
+	bool rx_nodesc_drops_prev_state;
+
+	unsigned int phy_type;
+	const struct ef4_phy_operations *phy_op;
+	void *phy_data;
+	struct mdio_if_info mdio;
+	enum ef4_phy_mode phy_mode;
+
+	u32 link_advertising;
+	struct ef4_link_state link_state;
+	unsigned int n_link_state_changes;
+
+	bool unicast_filter;
+	union ef4_multicast_hash multicast_hash;
+	u8 wanted_fc;
+	unsigned fc_disable;
+
+	atomic_t rx_reset;
+	enum ef4_loopback_mode loopback_mode;
+	u64 loopback_modes;
+
+	void *loopback_selftest;
+
+	struct rw_semaphore filter_sem;
+	spinlock_t filter_lock;
+	void *filter_state;
+#ifdef CONFIG_RFS_ACCEL
+	unsigned int rps_expire_channel;
+	unsigned int rps_expire_index;
+#endif
+
+	atomic_t active_queues;
+	atomic_t rxq_flush_pending;
+	atomic_t rxq_flush_outstanding;
+	wait_queue_head_t flush_wq;
+
+	char *vpd_sn;
+
+	/* The following fields may be written more often */
+
+	struct delayed_work monitor_work ____cacheline_aligned_in_smp;
+	spinlock_t biu_lock;
+	int last_irq_cpu;
+	spinlock_t stats_lock;
+	atomic_t n_rx_noskb_drops;
+};
+
+static inline int ef4_dev_registered(struct ef4_nic *efx)
+{
+	return efx->net_dev->reg_state == NETREG_REGISTERED;
+}
+
+static inline unsigned int ef4_port_num(struct ef4_nic *efx)
+{
+	return efx->port_num;
+}
+
+struct ef4_mtd_partition {
+	struct list_head node;
+	struct mtd_info mtd;
+	const char *dev_type_name;
+	const char *type_name;
+	char name[IFNAMSIZ + 20];
+};
+
+/**
+ * struct ef4_nic_type - Efx device type definition
+ * @mem_bar: Get the memory BAR
+ * @mem_map_size: Get memory BAR mapped size
+ * @probe: Probe the controller
+ * @remove: Free resources allocated by probe()
+ * @init: Initialise the controller
+ * @dimension_resources: Dimension controller resources (buffer table,
+ *	and VIs once the available interrupt resources are clear)
+ * @fini: Shut down the controller
+ * @monitor: Periodic function for polling link state and hardware monitor
+ * @map_reset_reason: Map ethtool reset reason to a reset method
+ * @map_reset_flags: Map ethtool reset flags to a reset method, if possible
+ * @reset: Reset the controller hardware and possibly the PHY.  This will
+ *	be called while the controller is uninitialised.
+ * @probe_port: Probe the MAC and PHY
+ * @remove_port: Free resources allocated by probe_port()
+ * @handle_global_event: Handle a "global" event (may be %NULL)
+ * @fini_dmaq: Flush and finalise DMA queues (RX and TX queues)
+ * @prepare_flush: Prepare the hardware for flushing the DMA queues
+ *	(for Falcon architecture)
+ * @finish_flush: Clean up after flushing the DMA queues (for Falcon
+ *	architecture)
+ * @prepare_flr: Prepare for an FLR
+ * @finish_flr: Clean up after an FLR
+ * @describe_stats: Describe statistics for ethtool
+ * @update_stats: Update statistics not provided by event handling.
+ *	Either argument may be %NULL.
+ * @start_stats: Start the regular fetching of statistics
+ * @pull_stats: Pull stats from the NIC and wait until they arrive.
+ * @stop_stats: Stop the regular fetching of statistics
+ * @set_id_led: Set state of identifying LED or revert to automatic function
+ * @push_irq_moderation: Apply interrupt moderation value
+ * @reconfigure_port: Push loopback/power/txdis changes to the MAC and PHY
+ * @prepare_enable_fc_tx: Prepare MAC to enable pause frame TX (may be %NULL)
+ * @reconfigure_mac: Push MAC address, MTU, flow control and filter settings
+ *	to the hardware.  Serialised by the mac_lock.
+ * @check_mac_fault: Check MAC fault state. True if fault present.
+ * @get_wol: Get WoL configuration from driver state
+ * @set_wol: Push WoL configuration to the NIC
+ * @resume_wol: Synchronise WoL state between driver and MC (e.g. after resume)
+ * @test_chip: Test registers.  May use ef4_farch_test_registers(), and is
+ *	expected to reset the NIC.
+ * @test_nvram: Test validity of NVRAM contents
+ * @irq_enable_master: Enable IRQs on the NIC.  Each event queue must
+ *	be separately enabled after this.
+ * @irq_test_generate: Generate a test IRQ
+ * @irq_disable_non_ev: Disable non-event IRQs on the NIC.  Each event
+ *	queue must be separately disabled before this.
+ * @irq_handle_msi: Handle MSI for a channel.  The @dev_id argument is
+ *	a pointer to the &struct ef4_msi_context for the channel.
+ * @irq_handle_legacy: Handle legacy interrupt.  The @dev_id argument
+ *	is a pointer to the &struct ef4_nic.
+ * @tx_probe: Allocate resources for TX queue
+ * @tx_init: Initialise TX queue on the NIC
+ * @tx_remove: Free resources for TX queue
+ * @tx_write: Write TX descriptors and doorbell
+ * @rx_push_rss_config: Write RSS hash key and indirection table to the NIC
+ * @rx_probe: Allocate resources for RX queue
+ * @rx_init: Initialise RX queue on the NIC
+ * @rx_remove: Free resources for RX queue
+ * @rx_write: Write RX descriptors and doorbell
+ * @rx_defer_refill: Generate a refill reminder event
+ * @ev_probe: Allocate resources for event queue
+ * @ev_init: Initialise event queue on the NIC
+ * @ev_fini: Deinitialise event queue on the NIC
+ * @ev_remove: Free resources for event queue
+ * @ev_process: Process events for a queue, up to the given NAPI quota
+ * @ev_read_ack: Acknowledge read events on a queue, rearming its IRQ
+ * @ev_test_generate: Generate a test event
+ * @filter_table_probe: Probe filter capabilities and set up filter software state
+ * @filter_table_restore: Restore filters removed from hardware
+ * @filter_table_remove: Remove filters from hardware and tear down software state
+ * @filter_update_rx_scatter: Update filters after change to rx scatter setting
+ * @filter_insert: add or replace a filter
+ * @filter_remove_safe: remove a filter by ID, carefully
+ * @filter_get_safe: retrieve a filter by ID, carefully
+ * @filter_clear_rx: Remove all RX filters whose priority is less than or
+ *	equal to the given priority and is not %EF4_FILTER_PRI_AUTO
+ * @filter_count_rx_used: Get the number of filters in use at a given priority
+ * @filter_get_rx_id_limit: Get maximum value of a filter id, plus 1
+ * @filter_get_rx_ids: Get list of RX filters at a given priority
+ * @filter_rfs_insert: Add or replace a filter for RFS.  This must be
+ *	atomic.  The hardware change may be asynchronous but should
+ *	not be delayed for long.  It may fail if this can't be done
+ *	atomically.
+ * @filter_rfs_expire_one: Consider expiring a filter inserted for RFS.
+ *	This must check whether the specified table entry is used by RFS
+ *	and that rps_may_expire_flow() returns true for it.
+ * @mtd_probe: Probe and add MTD partitions associated with this net device,
+ *	 using ef4_mtd_add()
+ * @mtd_rename: Set an MTD partition name using the net device name
+ * @mtd_read: Read from an MTD partition
+ * @mtd_erase: Erase part of an MTD partition
+ * @mtd_write: Write to an MTD partition
+ * @mtd_sync: Wait for write-back to complete on MTD partition.  This
+ *	also notifies the driver that a writer has finished using this
+ *	partition.
+ * @set_mac_address: Set the MAC address of the device
+ * @revision: Hardware architecture revision
+ * @txd_ptr_tbl_base: TX descriptor ring base address
+ * @rxd_ptr_tbl_base: RX descriptor ring base address
+ * @buf_tbl_base: Buffer table base address
+ * @evq_ptr_tbl_base: Event queue pointer table base address
+ * @evq_rptr_tbl_base: Event queue read-pointer table base address
+ * @max_dma_mask: Maximum possible DMA mask
+ * @rx_prefix_size: Size of RX prefix before packet data
+ * @rx_hash_offset: Offset of RX flow hash within prefix
+ * @rx_ts_offset: Offset of timestamp within prefix
+ * @rx_buffer_padding: Size of padding at end of RX packet
+ * @can_rx_scatter: NIC is able to scatter packets to multiple buffers
+ * @always_rx_scatter: NIC will always scatter packets to multiple buffers
+ * @max_interrupt_mode: Highest capability interrupt mode supported
+ *	from &enum ef4_init_mode.
+ * @timer_period_max: Maximum period of interrupt timer (in ticks)
+ * @offload_features: net_device feature flags for protocol offload
+ *	features implemented in hardware
+ */
+struct ef4_nic_type {
+	unsigned int mem_bar;
+	unsigned int (*mem_map_size)(struct ef4_nic *efx);
+	int (*probe)(struct ef4_nic *efx);
+	void (*remove)(struct ef4_nic *efx);
+	int (*init)(struct ef4_nic *efx);
+	int (*dimension_resources)(struct ef4_nic *efx);
+	void (*fini)(struct ef4_nic *efx);
+	void (*monitor)(struct ef4_nic *efx);
+	enum reset_type (*map_reset_reason)(enum reset_type reason);
+	int (*map_reset_flags)(u32 *flags);
+	int (*reset)(struct ef4_nic *efx, enum reset_type method);
+	int (*probe_port)(struct ef4_nic *efx);
+	void (*remove_port)(struct ef4_nic *efx);
+	bool (*handle_global_event)(struct ef4_channel *channel, ef4_qword_t *);
+	int (*fini_dmaq)(struct ef4_nic *efx);
+	void (*prepare_flush)(struct ef4_nic *efx);
+	void (*finish_flush)(struct ef4_nic *efx);
+	void (*prepare_flr)(struct ef4_nic *efx);
+	void (*finish_flr)(struct ef4_nic *efx);
+	size_t (*describe_stats)(struct ef4_nic *efx, u8 *names);
+	size_t (*update_stats)(struct ef4_nic *efx, u64 *full_stats,
+			       struct rtnl_link_stats64 *core_stats);
+	void (*start_stats)(struct ef4_nic *efx);
+	void (*pull_stats)(struct ef4_nic *efx);
+	void (*stop_stats)(struct ef4_nic *efx);
+	void (*set_id_led)(struct ef4_nic *efx, enum ef4_led_mode mode);
+	void (*push_irq_moderation)(struct ef4_channel *channel);
+	int (*reconfigure_port)(struct ef4_nic *efx);
+	void (*prepare_enable_fc_tx)(struct ef4_nic *efx);
+	int (*reconfigure_mac)(struct ef4_nic *efx);
+	bool (*check_mac_fault)(struct ef4_nic *efx);
+	void (*get_wol)(struct ef4_nic *efx, struct ethtool_wolinfo *wol);
+	int (*set_wol)(struct ef4_nic *efx, u32 type);
+	void (*resume_wol)(struct ef4_nic *efx);
+	int (*test_chip)(struct ef4_nic *efx, struct ef4_self_tests *tests);
+	int (*test_nvram)(struct ef4_nic *efx);
+	void (*irq_enable_master)(struct ef4_nic *efx);
+	int (*irq_test_generate)(struct ef4_nic *efx);
+	void (*irq_disable_non_ev)(struct ef4_nic *efx);
+	irqreturn_t (*irq_handle_msi)(int irq, void *dev_id);
+	irqreturn_t (*irq_handle_legacy)(int irq, void *dev_id);
+	int (*tx_probe)(struct ef4_tx_queue *tx_queue);
+	void (*tx_init)(struct ef4_tx_queue *tx_queue);
+	void (*tx_remove)(struct ef4_tx_queue *tx_queue);
+	void (*tx_write)(struct ef4_tx_queue *tx_queue);
+	unsigned int (*tx_limit_len)(struct ef4_tx_queue *tx_queue,
+				     dma_addr_t dma_addr, unsigned int len);
+	int (*rx_push_rss_config)(struct ef4_nic *efx, bool user,
+				  const u32 *rx_indir_table);
+	int (*rx_probe)(struct ef4_rx_queue *rx_queue);
+	void (*rx_init)(struct ef4_rx_queue *rx_queue);
+	void (*rx_remove)(struct ef4_rx_queue *rx_queue);
+	void (*rx_write)(struct ef4_rx_queue *rx_queue);
+	void (*rx_defer_refill)(struct ef4_rx_queue *rx_queue);
+	int (*ev_probe)(struct ef4_channel *channel);
+	int (*ev_init)(struct ef4_channel *channel);
+	void (*ev_fini)(struct ef4_channel *channel);
+	void (*ev_remove)(struct ef4_channel *channel);
+	int (*ev_process)(struct ef4_channel *channel, int quota);
+	void (*ev_read_ack)(struct ef4_channel *channel);
+	void (*ev_test_generate)(struct ef4_channel *channel);
+	int (*filter_table_probe)(struct ef4_nic *efx);
+	void (*filter_table_restore)(struct ef4_nic *efx);
+	void (*filter_table_remove)(struct ef4_nic *efx);
+	void (*filter_update_rx_scatter)(struct ef4_nic *efx);
+	s32 (*filter_insert)(struct ef4_nic *efx,
+			     struct ef4_filter_spec *spec, bool replace);
+	int (*filter_remove_safe)(struct ef4_nic *efx,
+				  enum ef4_filter_priority priority,
+				  u32 filter_id);
+	int (*filter_get_safe)(struct ef4_nic *efx,
+			       enum ef4_filter_priority priority,
+			       u32 filter_id, struct ef4_filter_spec *);
+	int (*filter_clear_rx)(struct ef4_nic *efx,
+			       enum ef4_filter_priority priority);
+	u32 (*filter_count_rx_used)(struct ef4_nic *efx,
+				    enum ef4_filter_priority priority);
+	u32 (*filter_get_rx_id_limit)(struct ef4_nic *efx);
+	s32 (*filter_get_rx_ids)(struct ef4_nic *efx,
+				 enum ef4_filter_priority priority,
+				 u32 *buf, u32 size);
+#ifdef CONFIG_RFS_ACCEL
+	s32 (*filter_rfs_insert)(struct ef4_nic *efx,
+				 struct ef4_filter_spec *spec);
+	bool (*filter_rfs_expire_one)(struct ef4_nic *efx, u32 flow_id,
+				      unsigned int index);
+#endif
+#ifdef CONFIG_SFC_FALCON_MTD
+	int (*mtd_probe)(struct ef4_nic *efx);
+	void (*mtd_rename)(struct ef4_mtd_partition *part);
+	int (*mtd_read)(struct mtd_info *mtd, loff_t start, size_t len,
+			size_t *retlen, u8 *buffer);
+	int (*mtd_erase)(struct mtd_info *mtd, loff_t start, size_t len);
+	int (*mtd_write)(struct mtd_info *mtd, loff_t start, size_t len,
+			 size_t *retlen, const u8 *buffer);
+	int (*mtd_sync)(struct mtd_info *mtd);
+#endif
+	int (*get_mac_address)(struct ef4_nic *efx, unsigned char *perm_addr);
+	int (*set_mac_address)(struct ef4_nic *efx);
+
+	int revision;
+	unsigned int txd_ptr_tbl_base;
+	unsigned int rxd_ptr_tbl_base;
+	unsigned int buf_tbl_base;
+	unsigned int evq_ptr_tbl_base;
+	unsigned int evq_rptr_tbl_base;
+	u64 max_dma_mask;
+	unsigned int rx_prefix_size;
+	unsigned int rx_hash_offset;
+	unsigned int rx_ts_offset;
+	unsigned int rx_buffer_padding;
+	bool can_rx_scatter;
+	bool always_rx_scatter;
+	unsigned int max_interrupt_mode;
+	unsigned int timer_period_max;
+	netdev_features_t offload_features;
+	unsigned int max_rx_ip_filters;
+};
+
+/**************************************************************************
+ *
+ * Prototypes and inline functions
+ *
+ *************************************************************************/
+
+static inline struct ef4_channel *
+ef4_get_channel(struct ef4_nic *efx, unsigned index)
+{
+	EF4_BUG_ON_PARANOID(index >= efx->n_channels);
+	return efx->channel[index];
+}
+
+/* Iterate over all used channels */
+#define ef4_for_each_channel(_channel, _efx)				\
+	for (_channel = (_efx)->channel[0];				\
+	     _channel;							\
+	     _channel = (_channel->channel + 1 < (_efx)->n_channels) ?	\
+		     (_efx)->channel[_channel->channel + 1] : NULL)
+
+/* Iterate over all used channels in reverse */
+#define ef4_for_each_channel_rev(_channel, _efx)			\
+	for (_channel = (_efx)->channel[(_efx)->n_channels - 1];	\
+	     _channel;							\
+	     _channel = _channel->channel ?				\
+		     (_efx)->channel[_channel->channel - 1] : NULL)
+
+static inline struct ef4_tx_queue *
+ef4_get_tx_queue(struct ef4_nic *efx, unsigned index, unsigned type)
+{
+	EF4_BUG_ON_PARANOID(index >= efx->n_tx_channels ||
+			    type >= EF4_TXQ_TYPES);
+	return &efx->channel[efx->tx_channel_offset + index]->tx_queue[type];
+}
+
+static inline bool ef4_channel_has_tx_queues(struct ef4_channel *channel)
+{
+	return channel->channel - channel->efx->tx_channel_offset <
+		channel->efx->n_tx_channels;
+}
+
+static inline struct ef4_tx_queue *
+ef4_channel_get_tx_queue(struct ef4_channel *channel, unsigned type)
+{
+	EF4_BUG_ON_PARANOID(!ef4_channel_has_tx_queues(channel) ||
+			    type >= EF4_TXQ_TYPES);
+	return &channel->tx_queue[type];
+}
+
+static inline bool ef4_tx_queue_used(struct ef4_tx_queue *tx_queue)
+{
+	return !(tx_queue->efx->net_dev->num_tc < 2 &&
+		 tx_queue->queue & EF4_TXQ_TYPE_HIGHPRI);
+}
+
+/* Iterate over all TX queues belonging to a channel */
+#define ef4_for_each_channel_tx_queue(_tx_queue, _channel)		\
+	if (!ef4_channel_has_tx_queues(_channel))			\
+		;							\
+	else								\
+		for (_tx_queue = (_channel)->tx_queue;			\
+		     _tx_queue < (_channel)->tx_queue + EF4_TXQ_TYPES && \
+			     ef4_tx_queue_used(_tx_queue);		\
+		     _tx_queue++)
+
+/* Iterate over all possible TX queues belonging to a channel */
+#define ef4_for_each_possible_channel_tx_queue(_tx_queue, _channel)	\
+	if (!ef4_channel_has_tx_queues(_channel))			\
+		;							\
+	else								\
+		for (_tx_queue = (_channel)->tx_queue;			\
+		     _tx_queue < (_channel)->tx_queue + EF4_TXQ_TYPES;	\
+		     _tx_queue++)
+
+static inline bool ef4_channel_has_rx_queue(struct ef4_channel *channel)
+{
+	return channel->rx_queue.core_index >= 0;
+}
+
+static inline struct ef4_rx_queue *
+ef4_channel_get_rx_queue(struct ef4_channel *channel)
+{
+	EF4_BUG_ON_PARANOID(!ef4_channel_has_rx_queue(channel));
+	return &channel->rx_queue;
+}
+
+/* Iterate over all RX queues belonging to a channel */
+#define ef4_for_each_channel_rx_queue(_rx_queue, _channel)		\
+	if (!ef4_channel_has_rx_queue(_channel))			\
+		;							\
+	else								\
+		for (_rx_queue = &(_channel)->rx_queue;			\
+		     _rx_queue;						\
+		     _rx_queue = NULL)
+
+static inline struct ef4_channel *
+ef4_rx_queue_channel(struct ef4_rx_queue *rx_queue)
+{
+	return container_of(rx_queue, struct ef4_channel, rx_queue);
+}
+
+static inline int ef4_rx_queue_index(struct ef4_rx_queue *rx_queue)
+{
+	return ef4_rx_queue_channel(rx_queue)->channel;
+}
+
+/* Returns a pointer to the specified receive buffer in the RX
+ * descriptor queue.
+ */
+static inline struct ef4_rx_buffer *ef4_rx_buffer(struct ef4_rx_queue *rx_queue,
+						  unsigned int index)
+{
+	return &rx_queue->buffer[index];
+}
+
+/**
+ * EF4_MAX_FRAME_LEN - calculate maximum frame length
+ *
+ * This calculates the maximum frame length that will be used for a
+ * given MTU.  The frame length will be equal to the MTU plus a
+ * constant amount of header space and padding.  This is the quantity
+ * that the net driver will program into the MAC as the maximum frame
+ * length.
+ *
+ * The 10G MAC requires 8-byte alignment on the frame
+ * length, so we round up to the nearest 8.
+ *
+ * Re-clocking by the XGXS on RX can reduce an IPG to 32 bits (half an
+ * XGMII cycle).  If the frame length reaches the maximum value in the
+ * same cycle, the XMAC can miss the IPG altogether.  We work around
+ * this by adding a further 16 bytes.
+ */
+#define EF4_FRAME_PAD	16
+#define EF4_MAX_FRAME_LEN(mtu) \
+	(ALIGN(((mtu) + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN + EF4_FRAME_PAD), 8))
+
+/* Get all supported features.
+ * If a feature is not fixed, it is present in hw_features.
+ * If a feature is fixed, it does not present in hw_features, but
+ * always in features.
+ */
+static inline netdev_features_t ef4_supported_features(const struct ef4_nic *efx)
+{
+	const struct net_device *net_dev = efx->net_dev;
+
+	return net_dev->features | net_dev->hw_features;
+}
+
+/* Get the current TX queue insert index. */
+static inline unsigned int
+ef4_tx_queue_get_insert_index(const struct ef4_tx_queue *tx_queue)
+{
+	return tx_queue->insert_count & tx_queue->ptr_mask;
+}
+
+/* Get a TX buffer. */
+static inline struct ef4_tx_buffer *
+__ef4_tx_queue_get_insert_buffer(const struct ef4_tx_queue *tx_queue)
+{
+	return &tx_queue->buffer[ef4_tx_queue_get_insert_index(tx_queue)];
+}
+
+/* Get a TX buffer, checking it's not currently in use. */
+static inline struct ef4_tx_buffer *
+ef4_tx_queue_get_insert_buffer(const struct ef4_tx_queue *tx_queue)
+{
+	struct ef4_tx_buffer *buffer =
+		__ef4_tx_queue_get_insert_buffer(tx_queue);
+
+	EF4_BUG_ON_PARANOID(buffer->len);
+	EF4_BUG_ON_PARANOID(buffer->flags);
+	EF4_BUG_ON_PARANOID(buffer->unmap_len);
+
+	return buffer;
+}
+
+#endif /* EF4_NET_DRIVER_H */
diff --git a/drivers/net/ethernet/sfc/falcon/nic.c b/drivers/net/ethernet/sfc/falcon/nic.c
new file mode 100644
index 000000000000..a8ecb33390da
--- /dev/null
+++ b/drivers/net/ethernet/sfc/falcon/nic.c
@@ -0,0 +1,527 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2013 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/bitops.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/cpu_rmap.h>
+#include "net_driver.h"
+#include "bitfield.h"
+#include "efx.h"
+#include "nic.h"
+#include "farch_regs.h"
+#include "io.h"
+#include "workarounds.h"
+
+/**************************************************************************
+ *
+ * Generic buffer handling
+ * These buffers are used for interrupt status, MAC stats, etc.
+ *
+ **************************************************************************/
+
+int ef4_nic_alloc_buffer(struct ef4_nic *efx, struct ef4_buffer *buffer,
+			 unsigned int len, gfp_t gfp_flags)
+{
+	buffer->addr = dma_zalloc_coherent(&efx->pci_dev->dev, len,
+					   &buffer->dma_addr, gfp_flags);
+	if (!buffer->addr)
+		return -ENOMEM;
+	buffer->len = len;
+	return 0;
+}
+
+void ef4_nic_free_buffer(struct ef4_nic *efx, struct ef4_buffer *buffer)
+{
+	if (buffer->addr) {
+		dma_free_coherent(&efx->pci_dev->dev, buffer->len,
+				  buffer->addr, buffer->dma_addr);
+		buffer->addr = NULL;
+	}
+}
+
+/* Check whether an event is present in the eventq at the current
+ * read pointer.  Only useful for self-test.
+ */
+bool ef4_nic_event_present(struct ef4_channel *channel)
+{
+	return ef4_event_present(ef4_event(channel, channel->eventq_read_ptr));
+}
+
+void ef4_nic_event_test_start(struct ef4_channel *channel)
+{
+	channel->event_test_cpu = -1;
+	smp_wmb();
+	channel->efx->type->ev_test_generate(channel);
+}
+
+int ef4_nic_irq_test_start(struct ef4_nic *efx)
+{
+	efx->last_irq_cpu = -1;
+	smp_wmb();
+	return efx->type->irq_test_generate(efx);
+}
+
+/* Hook interrupt handler(s)
+ * Try MSI and then legacy interrupts.
+ */
+int ef4_nic_init_interrupt(struct ef4_nic *efx)
+{
+	struct ef4_channel *channel;
+	unsigned int n_irqs;
+	int rc;
+
+	if (!EF4_INT_MODE_USE_MSI(efx)) {
+		rc = request_irq(efx->legacy_irq,
+				 efx->type->irq_handle_legacy, IRQF_SHARED,
+				 efx->name, efx);
+		if (rc) {
+			netif_err(efx, drv, efx->net_dev,
+				  "failed to hook legacy IRQ %d\n",
+				  efx->pci_dev->irq);
+			goto fail1;
+		}
+		return 0;
+	}
+
+#ifdef CONFIG_RFS_ACCEL
+	if (efx->interrupt_mode == EF4_INT_MODE_MSIX) {
+		efx->net_dev->rx_cpu_rmap =
+			alloc_irq_cpu_rmap(efx->n_rx_channels);
+		if (!efx->net_dev->rx_cpu_rmap) {
+			rc = -ENOMEM;
+			goto fail1;
+		}
+	}
+#endif
+
+	/* Hook MSI or MSI-X interrupt */
+	n_irqs = 0;
+	ef4_for_each_channel(channel, efx) {
+		rc = request_irq(channel->irq, efx->type->irq_handle_msi,
+				 IRQF_PROBE_SHARED, /* Not shared */
+				 efx->msi_context[channel->channel].name,
+				 &efx->msi_context[channel->channel]);
+		if (rc) {
+			netif_err(efx, drv, efx->net_dev,
+				  "failed to hook IRQ %d\n", channel->irq);
+			goto fail2;
+		}
+		++n_irqs;
+
+#ifdef CONFIG_RFS_ACCEL
+		if (efx->interrupt_mode == EF4_INT_MODE_MSIX &&
+		    channel->channel < efx->n_rx_channels) {
+			rc = irq_cpu_rmap_add(efx->net_dev->rx_cpu_rmap,
+					      channel->irq);
+			if (rc)
+				goto fail2;
+		}
+#endif
+	}
+
+	return 0;
+
+ fail2:
+#ifdef CONFIG_RFS_ACCEL
+	free_irq_cpu_rmap(efx->net_dev->rx_cpu_rmap);
+	efx->net_dev->rx_cpu_rmap = NULL;
+#endif
+	ef4_for_each_channel(channel, efx) {
+		if (n_irqs-- == 0)
+			break;
+		free_irq(channel->irq, &efx->msi_context[channel->channel]);
+	}
+ fail1:
+	return rc;
+}
+
+void ef4_nic_fini_interrupt(struct ef4_nic *efx)
+{
+	struct ef4_channel *channel;
+
+#ifdef CONFIG_RFS_ACCEL
+	free_irq_cpu_rmap(efx->net_dev->rx_cpu_rmap);
+	efx->net_dev->rx_cpu_rmap = NULL;
+#endif
+
+	if (EF4_INT_MODE_USE_MSI(efx)) {
+		/* Disable MSI/MSI-X interrupts */
+		ef4_for_each_channel(channel, efx)
+			free_irq(channel->irq,
+				 &efx->msi_context[channel->channel]);
+	} else {
+		/* Disable legacy interrupt */
+		free_irq(efx->legacy_irq, efx);
+	}
+}
+
+/* Register dump */
+
+#define REGISTER_REVISION_FA	1
+#define REGISTER_REVISION_FB	2
+#define REGISTER_REVISION_FC	3
+#define REGISTER_REVISION_FZ	3	/* last Falcon arch revision */
+#define REGISTER_REVISION_ED	4
+#define REGISTER_REVISION_EZ	4	/* latest EF10 revision */
+
+struct ef4_nic_reg {
+	u32 offset:24;
+	u32 min_revision:3, max_revision:3;
+};
+
+#define REGISTER(name, arch, min_rev, max_rev) {			\
+	arch ## R_ ## min_rev ## max_rev ## _ ## name,			\
+	REGISTER_REVISION_ ## arch ## min_rev,				\
+	REGISTER_REVISION_ ## arch ## max_rev				\
+}
+#define REGISTER_AA(name) REGISTER(name, F, A, A)
+#define REGISTER_AB(name) REGISTER(name, F, A, B)
+#define REGISTER_AZ(name) REGISTER(name, F, A, Z)
+#define REGISTER_BB(name) REGISTER(name, F, B, B)
+#define REGISTER_BZ(name) REGISTER(name, F, B, Z)
+#define REGISTER_CZ(name) REGISTER(name, F, C, Z)
+
+static const struct ef4_nic_reg ef4_nic_regs[] = {
+	REGISTER_AZ(ADR_REGION),
+	REGISTER_AZ(INT_EN_KER),
+	REGISTER_BZ(INT_EN_CHAR),
+	REGISTER_AZ(INT_ADR_KER),
+	REGISTER_BZ(INT_ADR_CHAR),
+	/* INT_ACK_KER is WO */
+	/* INT_ISR0 is RC */
+	REGISTER_AZ(HW_INIT),
+	REGISTER_CZ(USR_EV_CFG),
+	REGISTER_AB(EE_SPI_HCMD),
+	REGISTER_AB(EE_SPI_HADR),
+	REGISTER_AB(EE_SPI_HDATA),
+	REGISTER_AB(EE_BASE_PAGE),
+	REGISTER_AB(EE_VPD_CFG0),
+	/* EE_VPD_SW_CNTL and EE_VPD_SW_DATA are not used */
+	/* PMBX_DBG_IADDR and PBMX_DBG_IDATA are indirect */
+	/* PCIE_CORE_INDIRECT is indirect */
+	REGISTER_AB(NIC_STAT),
+	REGISTER_AB(GPIO_CTL),
+	REGISTER_AB(GLB_CTL),
+	/* FATAL_INTR_KER and FATAL_INTR_CHAR are partly RC */
+	REGISTER_BZ(DP_CTRL),
+	REGISTER_AZ(MEM_STAT),
+	REGISTER_AZ(CS_DEBUG),
+	REGISTER_AZ(ALTERA_BUILD),
+	REGISTER_AZ(CSR_SPARE),
+	REGISTER_AB(PCIE_SD_CTL0123),
+	REGISTER_AB(PCIE_SD_CTL45),
+	REGISTER_AB(PCIE_PCS_CTL_STAT),
+	/* DEBUG_DATA_OUT is not used */
+	/* DRV_EV is WO */
+	REGISTER_AZ(EVQ_CTL),
+	REGISTER_AZ(EVQ_CNT1),
+	REGISTER_AZ(EVQ_CNT2),
+	REGISTER_AZ(BUF_TBL_CFG),
+	REGISTER_AZ(SRM_RX_DC_CFG),
+	REGISTER_AZ(SRM_TX_DC_CFG),
+	REGISTER_AZ(SRM_CFG),
+	/* BUF_TBL_UPD is WO */
+	REGISTER_AZ(SRM_UPD_EVQ),
+	REGISTER_AZ(SRAM_PARITY),
+	REGISTER_AZ(RX_CFG),
+	REGISTER_BZ(RX_FILTER_CTL),
+	/* RX_FLUSH_DESCQ is WO */
+	REGISTER_AZ(RX_DC_CFG),
+	REGISTER_AZ(RX_DC_PF_WM),
+	REGISTER_BZ(RX_RSS_TKEY),
+	/* RX_NODESC_DROP is RC */
+	REGISTER_AA(RX_SELF_RST),
+	/* RX_DEBUG, RX_PUSH_DROP are not used */
+	REGISTER_CZ(RX_RSS_IPV6_REG1),
+	REGISTER_CZ(RX_RSS_IPV6_REG2),
+	REGISTER_CZ(RX_RSS_IPV6_REG3),
+	/* TX_FLUSH_DESCQ is WO */
+	REGISTER_AZ(TX_DC_CFG),
+	REGISTER_AA(TX_CHKSM_CFG),
+	REGISTER_AZ(TX_CFG),
+	/* TX_PUSH_DROP is not used */
+	REGISTER_AZ(TX_RESERVED),
+	REGISTER_BZ(TX_PACE),
+	/* TX_PACE_DROP_QID is RC */
+	REGISTER_BB(TX_VLAN),
+	REGISTER_BZ(TX_IPFIL_PORTEN),
+	REGISTER_AB(MD_TXD),
+	REGISTER_AB(MD_RXD),
+	REGISTER_AB(MD_CS),
+	REGISTER_AB(MD_PHY_ADR),
+	REGISTER_AB(MD_ID),
+	/* MD_STAT is RC */
+	REGISTER_AB(MAC_STAT_DMA),
+	REGISTER_AB(MAC_CTRL),
+	REGISTER_BB(GEN_MODE),
+	REGISTER_AB(MAC_MC_HASH_REG0),
+	REGISTER_AB(MAC_MC_HASH_REG1),
+	REGISTER_AB(GM_CFG1),
+	REGISTER_AB(GM_CFG2),
+	/* GM_IPG and GM_HD are not used */
+	REGISTER_AB(GM_MAX_FLEN),
+	/* GM_TEST is not used */
+	REGISTER_AB(GM_ADR1),
+	REGISTER_AB(GM_ADR2),
+	REGISTER_AB(GMF_CFG0),
+	REGISTER_AB(GMF_CFG1),
+	REGISTER_AB(GMF_CFG2),
+	REGISTER_AB(GMF_CFG3),
+	REGISTER_AB(GMF_CFG4),
+	REGISTER_AB(GMF_CFG5),
+	REGISTER_BB(TX_SRC_MAC_CTL),
+	REGISTER_AB(XM_ADR_LO),
+	REGISTER_AB(XM_ADR_HI),
+	REGISTER_AB(XM_GLB_CFG),
+	REGISTER_AB(XM_TX_CFG),
+	REGISTER_AB(XM_RX_CFG),
+	REGISTER_AB(XM_MGT_INT_MASK),
+	REGISTER_AB(XM_FC),
+	REGISTER_AB(XM_PAUSE_TIME),
+	REGISTER_AB(XM_TX_PARAM),
+	REGISTER_AB(XM_RX_PARAM),
+	/* XM_MGT_INT_MSK (note no 'A') is RC */
+	REGISTER_AB(XX_PWR_RST),
+	REGISTER_AB(XX_SD_CTL),
+	REGISTER_AB(XX_TXDRV_CTL),
+	/* XX_PRBS_CTL, XX_PRBS_CHK and XX_PRBS_ERR are not used */
+	/* XX_CORE_STAT is partly RC */
+};
+
+struct ef4_nic_reg_table {
+	u32 offset:24;
+	u32 min_revision:3, max_revision:3;
+	u32 step:6, rows:21;
+};
+
+#define REGISTER_TABLE_DIMENSIONS(_, offset, arch, min_rev, max_rev, step, rows) { \
+	offset,								\
+	REGISTER_REVISION_ ## arch ## min_rev,				\
+	REGISTER_REVISION_ ## arch ## max_rev,				\
+	step, rows							\
+}
+#define REGISTER_TABLE(name, arch, min_rev, max_rev)			\
+	REGISTER_TABLE_DIMENSIONS(					\
+		name, arch ## R_ ## min_rev ## max_rev ## _ ## name,	\
+		arch, min_rev, max_rev,					\
+		arch ## R_ ## min_rev ## max_rev ## _ ## name ## _STEP,	\
+		arch ## R_ ## min_rev ## max_rev ## _ ## name ## _ROWS)
+#define REGISTER_TABLE_AA(name) REGISTER_TABLE(name, F, A, A)
+#define REGISTER_TABLE_AZ(name) REGISTER_TABLE(name, F, A, Z)
+#define REGISTER_TABLE_BB(name) REGISTER_TABLE(name, F, B, B)
+#define REGISTER_TABLE_BZ(name) REGISTER_TABLE(name, F, B, Z)
+#define REGISTER_TABLE_BB_CZ(name)					\
+	REGISTER_TABLE_DIMENSIONS(name, FR_BZ_ ## name, F, B, B,	\
+				  FR_BZ_ ## name ## _STEP,		\
+				  FR_BB_ ## name ## _ROWS),		\
+	REGISTER_TABLE_DIMENSIONS(name, FR_BZ_ ## name, F, C, Z,	\
+				  FR_BZ_ ## name ## _STEP,		\
+				  FR_CZ_ ## name ## _ROWS)
+#define REGISTER_TABLE_CZ(name) REGISTER_TABLE(name, F, C, Z)
+
+static const struct ef4_nic_reg_table ef4_nic_reg_tables[] = {
+	/* DRIVER is not used */
+	/* EVQ_RPTR, TIMER_COMMAND, USR_EV and {RX,TX}_DESC_UPD are WO */
+	REGISTER_TABLE_BB(TX_IPFIL_TBL),
+	REGISTER_TABLE_BB(TX_SRC_MAC_TBL),
+	REGISTER_TABLE_AA(RX_DESC_PTR_TBL_KER),
+	REGISTER_TABLE_BB_CZ(RX_DESC_PTR_TBL),
+	REGISTER_TABLE_AA(TX_DESC_PTR_TBL_KER),
+	REGISTER_TABLE_BB_CZ(TX_DESC_PTR_TBL),
+	REGISTER_TABLE_AA(EVQ_PTR_TBL_KER),
+	REGISTER_TABLE_BB_CZ(EVQ_PTR_TBL),
+	/* We can't reasonably read all of the buffer table (up to 8MB!).
+	 * However this driver will only use a few entries.  Reading
+	 * 1K entries allows for some expansion of queue count and
+	 * size before we need to change the version. */
+	REGISTER_TABLE_DIMENSIONS(BUF_FULL_TBL_KER, FR_AA_BUF_FULL_TBL_KER,
+				  F, A, A, 8, 1024),
+	REGISTER_TABLE_DIMENSIONS(BUF_FULL_TBL, FR_BZ_BUF_FULL_TBL,
+				  F, B, Z, 8, 1024),
+	REGISTER_TABLE_CZ(RX_MAC_FILTER_TBL0),
+	REGISTER_TABLE_BB_CZ(TIMER_TBL),
+	REGISTER_TABLE_BB_CZ(TX_PACE_TBL),
+	REGISTER_TABLE_BZ(RX_INDIRECTION_TBL),
+	/* TX_FILTER_TBL0 is huge and not used by this driver */
+	REGISTER_TABLE_CZ(TX_MAC_FILTER_TBL0),
+	REGISTER_TABLE_CZ(MC_TREG_SMEM),
+	/* MSIX_PBA_TABLE is not mapped */
+	/* SRM_DBG is not mapped (and is redundant with BUF_FLL_TBL) */
+	REGISTER_TABLE_BZ(RX_FILTER_TBL0),
+};
+
+size_t ef4_nic_get_regs_len(struct ef4_nic *efx)
+{
+	const struct ef4_nic_reg *reg;
+	const struct ef4_nic_reg_table *table;
+	size_t len = 0;
+
+	for (reg = ef4_nic_regs;
+	     reg < ef4_nic_regs + ARRAY_SIZE(ef4_nic_regs);
+	     reg++)
+		if (efx->type->revision >= reg->min_revision &&
+		    efx->type->revision <= reg->max_revision)
+			len += sizeof(ef4_oword_t);
+
+	for (table = ef4_nic_reg_tables;
+	     table < ef4_nic_reg_tables + ARRAY_SIZE(ef4_nic_reg_tables);
+	     table++)
+		if (efx->type->revision >= table->min_revision &&
+		    efx->type->revision <= table->max_revision)
+			len += table->rows * min_t(size_t, table->step, 16);
+
+	return len;
+}
+
+void ef4_nic_get_regs(struct ef4_nic *efx, void *buf)
+{
+	const struct ef4_nic_reg *reg;
+	const struct ef4_nic_reg_table *table;
+
+	for (reg = ef4_nic_regs;
+	     reg < ef4_nic_regs + ARRAY_SIZE(ef4_nic_regs);
+	     reg++) {
+		if (efx->type->revision >= reg->min_revision &&
+		    efx->type->revision <= reg->max_revision) {
+			ef4_reado(efx, (ef4_oword_t *)buf, reg->offset);
+			buf += sizeof(ef4_oword_t);
+		}
+	}
+
+	for (table = ef4_nic_reg_tables;
+	     table < ef4_nic_reg_tables + ARRAY_SIZE(ef4_nic_reg_tables);
+	     table++) {
+		size_t size, i;
+
+		if (!(efx->type->revision >= table->min_revision &&
+		      efx->type->revision <= table->max_revision))
+			continue;
+
+		size = min_t(size_t, table->step, 16);
+
+		for (i = 0; i < table->rows; i++) {
+			switch (table->step) {
+			case 4: /* 32-bit SRAM */
+				ef4_readd(efx, buf, table->offset + 4 * i);
+				break;
+			case 8: /* 64-bit SRAM */
+				ef4_sram_readq(efx,
+					       efx->membase + table->offset,
+					       buf, i);
+				break;
+			case 16: /* 128-bit-readable register */
+				ef4_reado_table(efx, buf, table->offset, i);
+				break;
+			case 32: /* 128-bit register, interleaved */
+				ef4_reado_table(efx, buf, table->offset, 2 * i);
+				break;
+			default:
+				WARN_ON(1);
+				return;
+			}
+			buf += size;
+		}
+	}
+}
+
+/**
+ * ef4_nic_describe_stats - Describe supported statistics for ethtool
+ * @desc: Array of &struct ef4_hw_stat_desc describing the statistics
+ * @count: Length of the @desc array
+ * @mask: Bitmask of which elements of @desc are enabled
+ * @names: Buffer to copy names to, or %NULL.  The names are copied
+ *	starting at intervals of %ETH_GSTRING_LEN bytes.
+ *
+ * Returns the number of visible statistics, i.e. the number of set
+ * bits in the first @count bits of @mask for which a name is defined.
+ */
+size_t ef4_nic_describe_stats(const struct ef4_hw_stat_desc *desc, size_t count,
+			      const unsigned long *mask, u8 *names)
+{
+	size_t visible = 0;
+	size_t index;
+
+	for_each_set_bit(index, mask, count) {
+		if (desc[index].name) {
+			if (names) {
+				strlcpy(names, desc[index].name,
+					ETH_GSTRING_LEN);
+				names += ETH_GSTRING_LEN;
+			}
+			++visible;
+		}
+	}
+
+	return visible;
+}
+
+/**
+ * ef4_nic_update_stats - Convert statistics DMA buffer to array of u64
+ * @desc: Array of &struct ef4_hw_stat_desc describing the DMA buffer
+ *	layout.  DMA widths of 0, 16, 32 and 64 are supported; where
+ *	the width is specified as 0 the corresponding element of
+ *	@stats is not updated.
+ * @count: Length of the @desc array
+ * @mask: Bitmask of which elements of @desc are enabled
+ * @stats: Buffer to update with the converted statistics.  The length
+ *	of this array must be at least @count.
+ * @dma_buf: DMA buffer containing hardware statistics
+ * @accumulate: If set, the converted values will be added rather than
+ *	directly stored to the corresponding elements of @stats
+ */
+void ef4_nic_update_stats(const struct ef4_hw_stat_desc *desc, size_t count,
+			  const unsigned long *mask,
+			  u64 *stats, const void *dma_buf, bool accumulate)
+{
+	size_t index;
+
+	for_each_set_bit(index, mask, count) {
+		if (desc[index].dma_width) {
+			const void *addr = dma_buf + desc[index].offset;
+			u64 val;
+
+			switch (desc[index].dma_width) {
+			case 16:
+				val = le16_to_cpup((__le16 *)addr);
+				break;
+			case 32:
+				val = le32_to_cpup((__le32 *)addr);
+				break;
+			case 64:
+				val = le64_to_cpup((__le64 *)addr);
+				break;
+			default:
+				WARN_ON(1);
+				val = 0;
+				break;
+			}
+
+			if (accumulate)
+				stats[index] += val;
+			else
+				stats[index] = val;
+		}
+	}
+}
+
+void ef4_nic_fix_nodesc_drop_stat(struct ef4_nic *efx, u64 *rx_nodesc_drops)
+{
+	/* if down, or this is the first update after coming up */
+	if (!(efx->net_dev->flags & IFF_UP) || !efx->rx_nodesc_drops_prev_state)
+		efx->rx_nodesc_drops_while_down +=
+			*rx_nodesc_drops - efx->rx_nodesc_drops_total;
+	efx->rx_nodesc_drops_total = *rx_nodesc_drops;
+	efx->rx_nodesc_drops_prev_state = !!(efx->net_dev->flags & IFF_UP);
+	*rx_nodesc_drops -= efx->rx_nodesc_drops_while_down;
+}
diff --git a/drivers/net/ethernet/sfc/falcon/nic.h b/drivers/net/ethernet/sfc/falcon/nic.h
new file mode 100644
index 000000000000..a4c4592f6023
--- /dev/null
+++ b/drivers/net/ethernet/sfc/falcon/nic.h
@@ -0,0 +1,513 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2013 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EF4_NIC_H
+#define EF4_NIC_H
+
+#include <linux/net_tstamp.h>
+#include <linux/i2c-algo-bit.h>
+#include "net_driver.h"
+#include "efx.h"
+
+enum {
+	EF4_REV_FALCON_A0 = 0,
+	EF4_REV_FALCON_A1 = 1,
+	EF4_REV_FALCON_B0 = 2,
+};
+
+static inline int ef4_nic_rev(struct ef4_nic *efx)
+{
+	return efx->type->revision;
+}
+
+u32 ef4_farch_fpga_ver(struct ef4_nic *efx);
+
+/* NIC has two interlinked PCI functions for the same port. */
+static inline bool ef4_nic_is_dual_func(struct ef4_nic *efx)
+{
+	return ef4_nic_rev(efx) < EF4_REV_FALCON_B0;
+}
+
+/* Read the current event from the event queue */
+static inline ef4_qword_t *ef4_event(struct ef4_channel *channel,
+				     unsigned int index)
+{
+	return ((ef4_qword_t *) (channel->eventq.buf.addr)) +
+		(index & channel->eventq_mask);
+}
+
+/* See if an event is present
+ *
+ * We check both the high and low dword of the event for all ones.  We
+ * wrote all ones when we cleared the event, and no valid event can
+ * have all ones in either its high or low dwords.  This approach is
+ * robust against reordering.
+ *
+ * Note that using a single 64-bit comparison is incorrect; even
+ * though the CPU read will be atomic, the DMA write may not be.
+ */
+static inline int ef4_event_present(ef4_qword_t *event)
+{
+	return !(EF4_DWORD_IS_ALL_ONES(event->dword[0]) |
+		  EF4_DWORD_IS_ALL_ONES(event->dword[1]));
+}
+
+/* Returns a pointer to the specified transmit descriptor in the TX
+ * descriptor queue belonging to the specified channel.
+ */
+static inline ef4_qword_t *
+ef4_tx_desc(struct ef4_tx_queue *tx_queue, unsigned int index)
+{
+	return ((ef4_qword_t *) (tx_queue->txd.buf.addr)) + index;
+}
+
+/* Get partner of a TX queue, seen as part of the same net core queue */
+static inline struct ef4_tx_queue *ef4_tx_queue_partner(struct ef4_tx_queue *tx_queue)
+{
+	if (tx_queue->queue & EF4_TXQ_TYPE_OFFLOAD)
+		return tx_queue - EF4_TXQ_TYPE_OFFLOAD;
+	else
+		return tx_queue + EF4_TXQ_TYPE_OFFLOAD;
+}
+
+/* Report whether this TX queue would be empty for the given write_count.
+ * May return false negative.
+ */
+static inline bool __ef4_nic_tx_is_empty(struct ef4_tx_queue *tx_queue,
+					 unsigned int write_count)
+{
+	unsigned int empty_read_count = ACCESS_ONCE(tx_queue->empty_read_count);
+
+	if (empty_read_count == 0)
+		return false;
+
+	return ((empty_read_count ^ write_count) & ~EF4_EMPTY_COUNT_VALID) == 0;
+}
+
+/* Decide whether to push a TX descriptor to the NIC vs merely writing
+ * the doorbell.  This can reduce latency when we are adding a single
+ * descriptor to an empty queue, but is otherwise pointless.  Further,
+ * Falcon and Siena have hardware bugs (SF bug 33851) that may be
+ * triggered if we don't check this.
+ * We use the write_count used for the last doorbell push, to get the
+ * NIC's view of the tx queue.
+ */
+static inline bool ef4_nic_may_push_tx_desc(struct ef4_tx_queue *tx_queue,
+					    unsigned int write_count)
+{
+	bool was_empty = __ef4_nic_tx_is_empty(tx_queue, write_count);
+
+	tx_queue->empty_read_count = 0;
+	return was_empty && tx_queue->write_count - write_count == 1;
+}
+
+/* Returns a pointer to the specified descriptor in the RX descriptor queue */
+static inline ef4_qword_t *
+ef4_rx_desc(struct ef4_rx_queue *rx_queue, unsigned int index)
+{
+	return ((ef4_qword_t *) (rx_queue->rxd.buf.addr)) + index;
+}
+
+enum {
+	PHY_TYPE_NONE = 0,
+	PHY_TYPE_TXC43128 = 1,
+	PHY_TYPE_88E1111 = 2,
+	PHY_TYPE_SFX7101 = 3,
+	PHY_TYPE_QT2022C2 = 4,
+	PHY_TYPE_PM8358 = 6,
+	PHY_TYPE_SFT9001A = 8,
+	PHY_TYPE_QT2025C = 9,
+	PHY_TYPE_SFT9001B = 10,
+};
+
+#define FALCON_XMAC_LOOPBACKS			\
+	((1 << LOOPBACK_XGMII) |		\
+	 (1 << LOOPBACK_XGXS) |			\
+	 (1 << LOOPBACK_XAUI))
+
+/* Alignment of PCIe DMA boundaries (4KB) */
+#define EF4_PAGE_SIZE	4096
+/* Size and alignment of buffer table entries (same) */
+#define EF4_BUF_SIZE	EF4_PAGE_SIZE
+
+/* NIC-generic software stats */
+enum {
+	GENERIC_STAT_rx_noskb_drops,
+	GENERIC_STAT_rx_nodesc_trunc,
+	GENERIC_STAT_COUNT
+};
+
+/**
+ * struct falcon_board_type - board operations and type information
+ * @id: Board type id, as found in NVRAM
+ * @init: Allocate resources and initialise peripheral hardware
+ * @init_phy: Do board-specific PHY initialisation
+ * @fini: Shut down hardware and free resources
+ * @set_id_led: Set state of identifying LED or revert to automatic function
+ * @monitor: Board-specific health check function
+ */
+struct falcon_board_type {
+	u8 id;
+	int (*init) (struct ef4_nic *nic);
+	void (*init_phy) (struct ef4_nic *efx);
+	void (*fini) (struct ef4_nic *nic);
+	void (*set_id_led) (struct ef4_nic *efx, enum ef4_led_mode mode);
+	int (*monitor) (struct ef4_nic *nic);
+};
+
+/**
+ * struct falcon_board - board information
+ * @type: Type of board
+ * @major: Major rev. ('A', 'B' ...)
+ * @minor: Minor rev. (0, 1, ...)
+ * @i2c_adap: I2C adapter for on-board peripherals
+ * @i2c_data: Data for bit-banging algorithm
+ * @hwmon_client: I2C client for hardware monitor
+ * @ioexp_client: I2C client for power/port control
+ */
+struct falcon_board {
+	const struct falcon_board_type *type;
+	int major;
+	int minor;
+	struct i2c_adapter i2c_adap;
+	struct i2c_algo_bit_data i2c_data;
+	struct i2c_client *hwmon_client, *ioexp_client;
+};
+
+/**
+ * struct falcon_spi_device - a Falcon SPI (Serial Peripheral Interface) device
+ * @device_id:		Controller's id for the device
+ * @size:		Size (in bytes)
+ * @addr_len:		Number of address bytes in read/write commands
+ * @munge_address:	Flag whether addresses should be munged.
+ *	Some devices with 9-bit addresses (e.g. AT25040A EEPROM)
+ *	use bit 3 of the command byte as address bit A8, rather
+ *	than having a two-byte address.  If this flag is set, then
+ *	commands should be munged in this way.
+ * @erase_command:	Erase command (or 0 if sector erase not needed).
+ * @erase_size:		Erase sector size (in bytes)
+ *	Erase commands affect sectors with this size and alignment.
+ *	This must be a power of two.
+ * @block_size:		Write block size (in bytes).
+ *	Write commands are limited to blocks with this size and alignment.
+ */
+struct falcon_spi_device {
+	int device_id;
+	unsigned int size;
+	unsigned int addr_len;
+	unsigned int munge_address:1;
+	u8 erase_command;
+	unsigned int erase_size;
+	unsigned int block_size;
+};
+
+static inline bool falcon_spi_present(const struct falcon_spi_device *spi)
+{
+	return spi->size != 0;
+}
+
+enum {
+	FALCON_STAT_tx_bytes = GENERIC_STAT_COUNT,
+	FALCON_STAT_tx_packets,
+	FALCON_STAT_tx_pause,
+	FALCON_STAT_tx_control,
+	FALCON_STAT_tx_unicast,
+	FALCON_STAT_tx_multicast,
+	FALCON_STAT_tx_broadcast,
+	FALCON_STAT_tx_lt64,
+	FALCON_STAT_tx_64,
+	FALCON_STAT_tx_65_to_127,
+	FALCON_STAT_tx_128_to_255,
+	FALCON_STAT_tx_256_to_511,
+	FALCON_STAT_tx_512_to_1023,
+	FALCON_STAT_tx_1024_to_15xx,
+	FALCON_STAT_tx_15xx_to_jumbo,
+	FALCON_STAT_tx_gtjumbo,
+	FALCON_STAT_tx_non_tcpudp,
+	FALCON_STAT_tx_mac_src_error,
+	FALCON_STAT_tx_ip_src_error,
+	FALCON_STAT_rx_bytes,
+	FALCON_STAT_rx_good_bytes,
+	FALCON_STAT_rx_bad_bytes,
+	FALCON_STAT_rx_packets,
+	FALCON_STAT_rx_good,
+	FALCON_STAT_rx_bad,
+	FALCON_STAT_rx_pause,
+	FALCON_STAT_rx_control,
+	FALCON_STAT_rx_unicast,
+	FALCON_STAT_rx_multicast,
+	FALCON_STAT_rx_broadcast,
+	FALCON_STAT_rx_lt64,
+	FALCON_STAT_rx_64,
+	FALCON_STAT_rx_65_to_127,
+	FALCON_STAT_rx_128_to_255,
+	FALCON_STAT_rx_256_to_511,
+	FALCON_STAT_rx_512_to_1023,
+	FALCON_STAT_rx_1024_to_15xx,
+	FALCON_STAT_rx_15xx_to_jumbo,
+	FALCON_STAT_rx_gtjumbo,
+	FALCON_STAT_rx_bad_lt64,
+	FALCON_STAT_rx_bad_gtjumbo,
+	FALCON_STAT_rx_overflow,
+	FALCON_STAT_rx_symbol_error,
+	FALCON_STAT_rx_align_error,
+	FALCON_STAT_rx_length_error,
+	FALCON_STAT_rx_internal_error,
+	FALCON_STAT_rx_nodesc_drop_cnt,
+	FALCON_STAT_COUNT
+};
+
+/**
+ * struct falcon_nic_data - Falcon NIC state
+ * @pci_dev2: Secondary function of Falcon A
+ * @board: Board state and functions
+ * @stats: Hardware statistics
+ * @stats_disable_count: Nest count for disabling statistics fetches
+ * @stats_pending: Is there a pending DMA of MAC statistics.
+ * @stats_timer: A timer for regularly fetching MAC statistics.
+ * @spi_flash: SPI flash device
+ * @spi_eeprom: SPI EEPROM device
+ * @spi_lock: SPI bus lock
+ * @mdio_lock: MDIO bus lock
+ * @xmac_poll_required: XMAC link state needs polling
+ */
+struct falcon_nic_data {
+	struct pci_dev *pci_dev2;
+	struct falcon_board board;
+	u64 stats[FALCON_STAT_COUNT];
+	unsigned int stats_disable_count;
+	bool stats_pending;
+	struct timer_list stats_timer;
+	struct falcon_spi_device spi_flash;
+	struct falcon_spi_device spi_eeprom;
+	struct mutex spi_lock;
+	struct mutex mdio_lock;
+	bool xmac_poll_required;
+};
+
+static inline struct falcon_board *falcon_board(struct ef4_nic *efx)
+{
+	struct falcon_nic_data *data = efx->nic_data;
+	return &data->board;
+}
+
+struct ethtool_ts_info;
+
+extern const struct ef4_nic_type falcon_a1_nic_type;
+extern const struct ef4_nic_type falcon_b0_nic_type;
+
+/**************************************************************************
+ *
+ * Externs
+ *
+ **************************************************************************
+ */
+
+int falcon_probe_board(struct ef4_nic *efx, u16 revision_info);
+
+/* TX data path */
+static inline int ef4_nic_probe_tx(struct ef4_tx_queue *tx_queue)
+{
+	return tx_queue->efx->type->tx_probe(tx_queue);
+}
+static inline void ef4_nic_init_tx(struct ef4_tx_queue *tx_queue)
+{
+	tx_queue->efx->type->tx_init(tx_queue);
+}
+static inline void ef4_nic_remove_tx(struct ef4_tx_queue *tx_queue)
+{
+	tx_queue->efx->type->tx_remove(tx_queue);
+}
+static inline void ef4_nic_push_buffers(struct ef4_tx_queue *tx_queue)
+{
+	tx_queue->efx->type->tx_write(tx_queue);
+}
+
+/* RX data path */
+static inline int ef4_nic_probe_rx(struct ef4_rx_queue *rx_queue)
+{
+	return rx_queue->efx->type->rx_probe(rx_queue);
+}
+static inline void ef4_nic_init_rx(struct ef4_rx_queue *rx_queue)
+{
+	rx_queue->efx->type->rx_init(rx_queue);
+}
+static inline void ef4_nic_remove_rx(struct ef4_rx_queue *rx_queue)
+{
+	rx_queue->efx->type->rx_remove(rx_queue);
+}
+static inline void ef4_nic_notify_rx_desc(struct ef4_rx_queue *rx_queue)
+{
+	rx_queue->efx->type->rx_write(rx_queue);
+}
+static inline void ef4_nic_generate_fill_event(struct ef4_rx_queue *rx_queue)
+{
+	rx_queue->efx->type->rx_defer_refill(rx_queue);
+}
+
+/* Event data path */
+static inline int ef4_nic_probe_eventq(struct ef4_channel *channel)
+{
+	return channel->efx->type->ev_probe(channel);
+}
+static inline int ef4_nic_init_eventq(struct ef4_channel *channel)
+{
+	return channel->efx->type->ev_init(channel);
+}
+static inline void ef4_nic_fini_eventq(struct ef4_channel *channel)
+{
+	channel->efx->type->ev_fini(channel);
+}
+static inline void ef4_nic_remove_eventq(struct ef4_channel *channel)
+{
+	channel->efx->type->ev_remove(channel);
+}
+static inline int
+ef4_nic_process_eventq(struct ef4_channel *channel, int quota)
+{
+	return channel->efx->type->ev_process(channel, quota);
+}
+static inline void ef4_nic_eventq_read_ack(struct ef4_channel *channel)
+{
+	channel->efx->type->ev_read_ack(channel);
+}
+void ef4_nic_event_test_start(struct ef4_channel *channel);
+
+/* queue operations */
+int ef4_farch_tx_probe(struct ef4_tx_queue *tx_queue);
+void ef4_farch_tx_init(struct ef4_tx_queue *tx_queue);
+void ef4_farch_tx_fini(struct ef4_tx_queue *tx_queue);
+void ef4_farch_tx_remove(struct ef4_tx_queue *tx_queue);
+void ef4_farch_tx_write(struct ef4_tx_queue *tx_queue);
+unsigned int ef4_farch_tx_limit_len(struct ef4_tx_queue *tx_queue,
+				    dma_addr_t dma_addr, unsigned int len);
+int ef4_farch_rx_probe(struct ef4_rx_queue *rx_queue);
+void ef4_farch_rx_init(struct ef4_rx_queue *rx_queue);
+void ef4_farch_rx_fini(struct ef4_rx_queue *rx_queue);
+void ef4_farch_rx_remove(struct ef4_rx_queue *rx_queue);
+void ef4_farch_rx_write(struct ef4_rx_queue *rx_queue);
+void ef4_farch_rx_defer_refill(struct ef4_rx_queue *rx_queue);
+int ef4_farch_ev_probe(struct ef4_channel *channel);
+int ef4_farch_ev_init(struct ef4_channel *channel);
+void ef4_farch_ev_fini(struct ef4_channel *channel);
+void ef4_farch_ev_remove(struct ef4_channel *channel);
+int ef4_farch_ev_process(struct ef4_channel *channel, int quota);
+void ef4_farch_ev_read_ack(struct ef4_channel *channel);
+void ef4_farch_ev_test_generate(struct ef4_channel *channel);
+
+/* filter operations */
+int ef4_farch_filter_table_probe(struct ef4_nic *efx);
+void ef4_farch_filter_table_restore(struct ef4_nic *efx);
+void ef4_farch_filter_table_remove(struct ef4_nic *efx);
+void ef4_farch_filter_update_rx_scatter(struct ef4_nic *efx);
+s32 ef4_farch_filter_insert(struct ef4_nic *efx, struct ef4_filter_spec *spec,
+			    bool replace);
+int ef4_farch_filter_remove_safe(struct ef4_nic *efx,
+				 enum ef4_filter_priority priority,
+				 u32 filter_id);
+int ef4_farch_filter_get_safe(struct ef4_nic *efx,
+			      enum ef4_filter_priority priority, u32 filter_id,
+			      struct ef4_filter_spec *);
+int ef4_farch_filter_clear_rx(struct ef4_nic *efx,
+			      enum ef4_filter_priority priority);
+u32 ef4_farch_filter_count_rx_used(struct ef4_nic *efx,
+				   enum ef4_filter_priority priority);
+u32 ef4_farch_filter_get_rx_id_limit(struct ef4_nic *efx);
+s32 ef4_farch_filter_get_rx_ids(struct ef4_nic *efx,
+				enum ef4_filter_priority priority, u32 *buf,
+				u32 size);
+#ifdef CONFIG_RFS_ACCEL
+s32 ef4_farch_filter_rfs_insert(struct ef4_nic *efx,
+				struct ef4_filter_spec *spec);
+bool ef4_farch_filter_rfs_expire_one(struct ef4_nic *efx, u32 flow_id,
+				     unsigned int index);
+#endif
+void ef4_farch_filter_sync_rx_mode(struct ef4_nic *efx);
+
+bool ef4_nic_event_present(struct ef4_channel *channel);
+
+/* Some statistics are computed as A - B where A and B each increase
+ * linearly with some hardware counter(s) and the counters are read
+ * asynchronously.  If the counters contributing to B are always read
+ * after those contributing to A, the computed value may be lower than
+ * the true value by some variable amount, and may decrease between
+ * subsequent computations.
+ *
+ * We should never allow statistics to decrease or to exceed the true
+ * value.  Since the computed value will never be greater than the
+ * true value, we can achieve this by only storing the computed value
+ * when it increases.
+ */
+static inline void ef4_update_diff_stat(u64 *stat, u64 diff)
+{
+	if ((s64)(diff - *stat) > 0)
+		*stat = diff;
+}
+
+/* Interrupts */
+int ef4_nic_init_interrupt(struct ef4_nic *efx);
+int ef4_nic_irq_test_start(struct ef4_nic *efx);
+void ef4_nic_fini_interrupt(struct ef4_nic *efx);
+void ef4_farch_irq_enable_master(struct ef4_nic *efx);
+int ef4_farch_irq_test_generate(struct ef4_nic *efx);
+void ef4_farch_irq_disable_master(struct ef4_nic *efx);
+irqreturn_t ef4_farch_msi_interrupt(int irq, void *dev_id);
+irqreturn_t ef4_farch_legacy_interrupt(int irq, void *dev_id);
+irqreturn_t ef4_farch_fatal_interrupt(struct ef4_nic *efx);
+
+static inline int ef4_nic_event_test_irq_cpu(struct ef4_channel *channel)
+{
+	return ACCESS_ONCE(channel->event_test_cpu);
+}
+static inline int ef4_nic_irq_test_irq_cpu(struct ef4_nic *efx)
+{
+	return ACCESS_ONCE(efx->last_irq_cpu);
+}
+
+/* Global Resources */
+int ef4_nic_flush_queues(struct ef4_nic *efx);
+int ef4_farch_fini_dmaq(struct ef4_nic *efx);
+void ef4_farch_finish_flr(struct ef4_nic *efx);
+void falcon_start_nic_stats(struct ef4_nic *efx);
+void falcon_stop_nic_stats(struct ef4_nic *efx);
+int falcon_reset_xaui(struct ef4_nic *efx);
+void ef4_farch_dimension_resources(struct ef4_nic *efx, unsigned sram_lim_qw);
+void ef4_farch_init_common(struct ef4_nic *efx);
+void ef4_farch_rx_push_indir_table(struct ef4_nic *efx);
+
+int ef4_nic_alloc_buffer(struct ef4_nic *efx, struct ef4_buffer *buffer,
+			 unsigned int len, gfp_t gfp_flags);
+void ef4_nic_free_buffer(struct ef4_nic *efx, struct ef4_buffer *buffer);
+
+/* Tests */
+struct ef4_farch_register_test {
+	unsigned address;
+	ef4_oword_t mask;
+};
+int ef4_farch_test_registers(struct ef4_nic *efx,
+			     const struct ef4_farch_register_test *regs,
+			     size_t n_regs);
+
+size_t ef4_nic_get_regs_len(struct ef4_nic *efx);
+void ef4_nic_get_regs(struct ef4_nic *efx, void *buf);
+
+size_t ef4_nic_describe_stats(const struct ef4_hw_stat_desc *desc, size_t count,
+			      const unsigned long *mask, u8 *names);
+void ef4_nic_update_stats(const struct ef4_hw_stat_desc *desc, size_t count,
+			  const unsigned long *mask, u64 *stats,
+			  const void *dma_buf, bool accumulate);
+void ef4_nic_fix_nodesc_drop_stat(struct ef4_nic *efx, u64 *stat);
+
+#define EF4_MAX_FLUSH_TIME 5000
+
+void ef4_farch_generate_event(struct ef4_nic *efx, unsigned int evq,
+			      ef4_qword_t *event);
+
+#endif /* EF4_NIC_H */
diff --git a/drivers/net/ethernet/sfc/phy.h b/drivers/net/ethernet/sfc/falcon/phy.h
index 803bf445c08e..362141cee313 100644
--- a/drivers/net/ethernet/sfc/phy.h
+++ b/drivers/net/ethernet/sfc/falcon/phy.h
@@ -7,20 +7,20 @@
  * by the Free Software Foundation, incorporated herein by reference.
  */
 
-#ifndef EFX_PHY_H
-#define EFX_PHY_H
+#ifndef EF4_PHY_H
+#define EF4_PHY_H
 
 /****************************************************************************
  * 10Xpress (SFX7101) PHY
  */
-extern const struct efx_phy_operations falcon_sfx7101_phy_ops;
+extern const struct ef4_phy_operations falcon_sfx7101_phy_ops;
 
-void tenxpress_set_id_led(struct efx_nic *efx, enum efx_led_mode mode);
+void tenxpress_set_id_led(struct ef4_nic *efx, enum ef4_led_mode mode);
 
 /****************************************************************************
  * AMCC/Quake QT202x PHYs
  */
-extern const struct efx_phy_operations falcon_qt202x_phy_ops;
+extern const struct ef4_phy_operations falcon_qt202x_phy_ops;
 
 /* These PHYs provide various H/W control states for LEDs */
 #define QUAKE_LED_LINK_INVAL	(0)
@@ -34,17 +34,17 @@ extern const struct efx_phy_operations falcon_qt202x_phy_ops;
 #define QUAKE_LED_TXLINK	(0)
 #define QUAKE_LED_RXLINK	(8)
 
-void falcon_qt202x_set_led(struct efx_nic *p, int led, int state);
+void falcon_qt202x_set_led(struct ef4_nic *p, int led, int state);
 
 /****************************************************************************
 * Transwitch CX4 retimer
 */
-extern const struct efx_phy_operations falcon_txc_phy_ops;
+extern const struct ef4_phy_operations falcon_txc_phy_ops;
 
 #define TXC_GPIO_DIR_INPUT	0
 #define TXC_GPIO_DIR_OUTPUT	1
 
-void falcon_txc_set_gpio_dir(struct efx_nic *efx, int pin, int dir);
-void falcon_txc_set_gpio_val(struct efx_nic *efx, int pin, int val);
+void falcon_txc_set_gpio_dir(struct ef4_nic *efx, int pin, int dir);
+void falcon_txc_set_gpio_val(struct ef4_nic *efx, int pin, int val);
 
 #endif
diff --git a/drivers/net/ethernet/sfc/qt202x_phy.c b/drivers/net/ethernet/sfc/falcon/qt202x_phy.c
index efa3612affca..d29331652548 100644
--- a/drivers/net/ethernet/sfc/qt202x_phy.c
+++ b/drivers/net/ethernet/sfc/falcon/qt202x_phy.c
@@ -50,14 +50,14 @@
 #define PCS_VEND1_REG		0xc000
 #define PCS_VEND1_LBTXD_LBN	5
 
-void falcon_qt202x_set_led(struct efx_nic *p, int led, int mode)
+void falcon_qt202x_set_led(struct ef4_nic *p, int led, int mode)
 {
 	int addr = MDIO_QUAKE_LED0_REG + led;
-	efx_mdio_write(p, MDIO_MMD_PMAPMD, addr, mode);
+	ef4_mdio_write(p, MDIO_MMD_PMAPMD, addr, mode);
 }
 
 struct qt202x_phy_data {
-	enum efx_phy_mode phy_mode;
+	enum ef4_phy_mode phy_mode;
 	bool bug17190_in_bad_state;
 	unsigned long bug17190_timer;
 	u32 firmware_ver;
@@ -73,7 +73,7 @@ struct qt202x_phy_data {
 
 #define BUG17190_INTERVAL (2 * HZ)
 
-static int qt2025c_wait_heartbeat(struct efx_nic *efx)
+static int qt2025c_wait_heartbeat(struct ef4_nic *efx)
 {
 	unsigned long timeout = jiffies + QT2025C_MAX_HEARTB_TIME;
 	int reg, old_counter = 0;
@@ -81,7 +81,7 @@ static int qt2025c_wait_heartbeat(struct efx_nic *efx)
 	/* Wait for firmware heartbeat to start */
 	for (;;) {
 		int counter;
-		reg = efx_mdio_read(efx, MDIO_MMD_PCS, PCS_FW_HEARTBEAT_REG);
+		reg = ef4_mdio_read(efx, MDIO_MMD_PCS, PCS_FW_HEARTBEAT_REG);
 		if (reg < 0)
 			return reg;
 		counter = ((reg >> PCS_FW_HEARTB_LBN) &
@@ -105,14 +105,14 @@ static int qt2025c_wait_heartbeat(struct efx_nic *efx)
 	return 0;
 }
 
-static int qt2025c_wait_fw_status_good(struct efx_nic *efx)
+static int qt2025c_wait_fw_status_good(struct ef4_nic *efx)
 {
 	unsigned long timeout = jiffies + QT2025C_MAX_FWSTART_TIME;
 	int reg;
 
 	/* Wait for firmware status to look good */
 	for (;;) {
-		reg = efx_mdio_read(efx, MDIO_MMD_PCS, PCS_UC8051_STATUS_REG);
+		reg = ef4_mdio_read(efx, MDIO_MMD_PCS, PCS_UC8051_STATUS_REG);
 		if (reg < 0)
 			return reg;
 		if ((reg &
@@ -127,15 +127,15 @@ static int qt2025c_wait_fw_status_good(struct efx_nic *efx)
 	return 0;
 }
 
-static void qt2025c_restart_firmware(struct efx_nic *efx)
+static void qt2025c_restart_firmware(struct ef4_nic *efx)
 {
 	/* Restart microcontroller execution of firmware from RAM */
-	efx_mdio_write(efx, 3, 0xe854, 0x00c0);
-	efx_mdio_write(efx, 3, 0xe854, 0x0040);
+	ef4_mdio_write(efx, 3, 0xe854, 0x00c0);
+	ef4_mdio_write(efx, 3, 0xe854, 0x0040);
 	msleep(50);
 }
 
-static int qt2025c_wait_reset(struct efx_nic *efx)
+static int qt2025c_wait_reset(struct ef4_nic *efx)
 {
 	int rc;
 
@@ -160,14 +160,14 @@ static int qt2025c_wait_reset(struct efx_nic *efx)
 	return rc;
 }
 
-static void qt2025c_firmware_id(struct efx_nic *efx)
+static void qt2025c_firmware_id(struct ef4_nic *efx)
 {
 	struct qt202x_phy_data *phy_data = efx->phy_data;
 	u8 firmware_id[9];
 	size_t i;
 
 	for (i = 0; i < sizeof(firmware_id); i++)
-		firmware_id[i] = efx_mdio_read(efx, MDIO_MMD_PCS,
+		firmware_id[i] = ef4_mdio_read(efx, MDIO_MMD_PCS,
 					       PCS_FW_PRODUCT_CODE_1 + i);
 	netif_info(efx, probe, efx->net_dev,
 		   "QT2025C firmware %xr%d v%d.%d.%d.%d [20%02d-%02d-%02d]\n",
@@ -180,7 +180,7 @@ static void qt2025c_firmware_id(struct efx_nic *efx)
 				 (firmware_id[4] << 8) | firmware_id[5];
 }
 
-static void qt2025c_bug17190_workaround(struct efx_nic *efx)
+static void qt2025c_bug17190_workaround(struct ef4_nic *efx)
 {
 	struct qt202x_phy_data *phy_data = efx->phy_data;
 
@@ -191,7 +191,7 @@ static void qt2025c_bug17190_workaround(struct efx_nic *efx)
 	 * recover it.
 	 */
 	if (efx->link_state.up ||
-	    !efx_mdio_links_ok(efx, MDIO_DEVS_PMAPMD | MDIO_DEVS_PHYXS)) {
+	    !ef4_mdio_links_ok(efx, MDIO_DEVS_PMAPMD | MDIO_DEVS_PHYXS)) {
 		phy_data->bug17190_in_bad_state = false;
 		return;
 	}
@@ -204,16 +204,16 @@ static void qt2025c_bug17190_workaround(struct efx_nic *efx)
 
 	if (time_after_eq(jiffies, phy_data->bug17190_timer)) {
 		netif_dbg(efx, hw, efx->net_dev, "bashing QT2025C PMA/PMD\n");
-		efx_mdio_set_flag(efx, MDIO_MMD_PMAPMD, MDIO_CTRL1,
+		ef4_mdio_set_flag(efx, MDIO_MMD_PMAPMD, MDIO_CTRL1,
 				  MDIO_PMA_CTRL1_LOOPBACK, true);
 		msleep(100);
-		efx_mdio_set_flag(efx, MDIO_MMD_PMAPMD, MDIO_CTRL1,
+		ef4_mdio_set_flag(efx, MDIO_MMD_PMAPMD, MDIO_CTRL1,
 				  MDIO_PMA_CTRL1_LOOPBACK, false);
 		phy_data->bug17190_timer = jiffies + BUG17190_INTERVAL;
 	}
 }
 
-static int qt2025c_select_phy_mode(struct efx_nic *efx)
+static int qt2025c_select_phy_mode(struct ef4_nic *efx)
 {
 	struct qt202x_phy_data *phy_data = efx->phy_data;
 	struct falcon_board *board = falcon_board(efx);
@@ -233,7 +233,7 @@ static int qt2025c_select_phy_mode(struct efx_nic *efx)
 	phy_op_mode = (efx->loopback_mode == LOOPBACK_NONE) ? 0x0038 : 0x0020;
 
 	/* Only change mode if really necessary */
-	reg = efx_mdio_read(efx, 1, 0xc319);
+	reg = ef4_mdio_read(efx, 1, 0xc319);
 	if ((reg & 0x0038) == phy_op_mode)
 		return 0;
 	netif_dbg(efx, hw, efx->net_dev, "Switching PHY to mode 0x%04x\n",
@@ -243,52 +243,52 @@ static int qt2025c_select_phy_mode(struct efx_nic *efx)
 	 * EEPROM (including the differences between board revisions), except
 	 * that the operating mode is changed, and the PHY is prevented from
 	 * unnecessarily reloading the main firmware image again. */
-	efx_mdio_write(efx, 1, 0xc300, 0x0000);
+	ef4_mdio_write(efx, 1, 0xc300, 0x0000);
 	/* (Note: this portion of the boot EEPROM sequence, which bit-bashes 9
 	 * STOPs onto the firmware/module I2C bus to reset it, varies across
 	 * board revisions, as the bus is connected to different GPIO/LED
 	 * outputs on the PHY.) */
 	if (board->major == 0 && board->minor < 2) {
-		efx_mdio_write(efx, 1, 0xc303, 0x4498);
+		ef4_mdio_write(efx, 1, 0xc303, 0x4498);
 		for (i = 0; i < 9; i++) {
-			efx_mdio_write(efx, 1, 0xc303, 0x4488);
-			efx_mdio_write(efx, 1, 0xc303, 0x4480);
-			efx_mdio_write(efx, 1, 0xc303, 0x4490);
-			efx_mdio_write(efx, 1, 0xc303, 0x4498);
+			ef4_mdio_write(efx, 1, 0xc303, 0x4488);
+			ef4_mdio_write(efx, 1, 0xc303, 0x4480);
+			ef4_mdio_write(efx, 1, 0xc303, 0x4490);
+			ef4_mdio_write(efx, 1, 0xc303, 0x4498);
 		}
 	} else {
-		efx_mdio_write(efx, 1, 0xc303, 0x0920);
-		efx_mdio_write(efx, 1, 0xd008, 0x0004);
+		ef4_mdio_write(efx, 1, 0xc303, 0x0920);
+		ef4_mdio_write(efx, 1, 0xd008, 0x0004);
 		for (i = 0; i < 9; i++) {
-			efx_mdio_write(efx, 1, 0xc303, 0x0900);
-			efx_mdio_write(efx, 1, 0xd008, 0x0005);
-			efx_mdio_write(efx, 1, 0xc303, 0x0920);
-			efx_mdio_write(efx, 1, 0xd008, 0x0004);
+			ef4_mdio_write(efx, 1, 0xc303, 0x0900);
+			ef4_mdio_write(efx, 1, 0xd008, 0x0005);
+			ef4_mdio_write(efx, 1, 0xc303, 0x0920);
+			ef4_mdio_write(efx, 1, 0xd008, 0x0004);
 		}
-		efx_mdio_write(efx, 1, 0xc303, 0x4900);
+		ef4_mdio_write(efx, 1, 0xc303, 0x4900);
 	}
-	efx_mdio_write(efx, 1, 0xc303, 0x4900);
-	efx_mdio_write(efx, 1, 0xc302, 0x0004);
-	efx_mdio_write(efx, 1, 0xc316, 0x0013);
-	efx_mdio_write(efx, 1, 0xc318, 0x0054);
-	efx_mdio_write(efx, 1, 0xc319, phy_op_mode);
-	efx_mdio_write(efx, 1, 0xc31a, 0x0098);
-	efx_mdio_write(efx, 3, 0x0026, 0x0e00);
-	efx_mdio_write(efx, 3, 0x0027, 0x0013);
-	efx_mdio_write(efx, 3, 0x0028, 0xa528);
-	efx_mdio_write(efx, 1, 0xd006, 0x000a);
-	efx_mdio_write(efx, 1, 0xd007, 0x0009);
-	efx_mdio_write(efx, 1, 0xd008, 0x0004);
+	ef4_mdio_write(efx, 1, 0xc303, 0x4900);
+	ef4_mdio_write(efx, 1, 0xc302, 0x0004);
+	ef4_mdio_write(efx, 1, 0xc316, 0x0013);
+	ef4_mdio_write(efx, 1, 0xc318, 0x0054);
+	ef4_mdio_write(efx, 1, 0xc319, phy_op_mode);
+	ef4_mdio_write(efx, 1, 0xc31a, 0x0098);
+	ef4_mdio_write(efx, 3, 0x0026, 0x0e00);
+	ef4_mdio_write(efx, 3, 0x0027, 0x0013);
+	ef4_mdio_write(efx, 3, 0x0028, 0xa528);
+	ef4_mdio_write(efx, 1, 0xd006, 0x000a);
+	ef4_mdio_write(efx, 1, 0xd007, 0x0009);
+	ef4_mdio_write(efx, 1, 0xd008, 0x0004);
 	/* This additional write is not present in the boot EEPROM.  It
 	 * prevents the PHY's internal boot ROM doing another pointless (and
 	 * slow) reload of the firmware image (the microcontroller's code
 	 * memory is not affected by the microcontroller reset). */
-	efx_mdio_write(efx, 1, 0xc317, 0x00ff);
+	ef4_mdio_write(efx, 1, 0xc317, 0x00ff);
 	/* PMA/PMD loopback sets RXIN to inverse polarity and the firmware
 	 * restart doesn't reset it. We need to do that ourselves. */
-	efx_mdio_set_flag(efx, 1, PMA_PMD_MODE_REG,
+	ef4_mdio_set_flag(efx, 1, PMA_PMD_MODE_REG,
 			  1 << PMA_PMD_RXIN_SEL_LBN, false);
-	efx_mdio_write(efx, 1, 0xc300, 0x0002);
+	ef4_mdio_write(efx, 1, 0xc300, 0x0002);
 	msleep(20);
 
 	/* Restart microcontroller execution of firmware from RAM */
@@ -306,7 +306,7 @@ static int qt2025c_select_phy_mode(struct efx_nic *efx)
 	return 0;
 }
 
-static int qt202x_reset_phy(struct efx_nic *efx)
+static int qt202x_reset_phy(struct ef4_nic *efx)
 {
 	int rc;
 
@@ -319,7 +319,7 @@ static int qt202x_reset_phy(struct efx_nic *efx)
 	} else {
 		/* Reset the PHYXS MMD. This is documented as doing
 		 * a complete soft reset. */
-		rc = efx_mdio_reset_mmd(efx, MDIO_MMD_PHYXS,
+		rc = ef4_mdio_reset_mmd(efx, MDIO_MMD_PHYXS,
 					QT2022C2_MAX_RESET_TIME /
 					QT2022C2_RESET_WAIT,
 					QT2022C2_RESET_WAIT);
@@ -339,7 +339,7 @@ static int qt202x_reset_phy(struct efx_nic *efx)
 	return rc;
 }
 
-static int qt202x_phy_probe(struct efx_nic *efx)
+static int qt202x_phy_probe(struct ef4_nic *efx)
 {
 	struct qt202x_phy_data *phy_data;
 
@@ -357,7 +357,7 @@ static int qt202x_phy_probe(struct efx_nic *efx)
 	return 0;
 }
 
-static int qt202x_phy_init(struct efx_nic *efx)
+static int qt202x_phy_init(struct ef4_nic *efx)
 {
 	u32 devid;
 	int rc;
@@ -368,11 +368,11 @@ static int qt202x_phy_init(struct efx_nic *efx)
 		return rc;
 	}
 
-	devid = efx_mdio_read_id(efx, MDIO_MMD_PHYXS);
+	devid = ef4_mdio_read_id(efx, MDIO_MMD_PHYXS);
 	netif_info(efx, probe, efx->net_dev,
 		   "PHY ID reg %x (OUI %06x model %02x revision %x)\n",
-		   devid, efx_mdio_id_oui(devid), efx_mdio_id_model(devid),
-		   efx_mdio_id_rev(devid));
+		   devid, ef4_mdio_id_oui(devid), ef4_mdio_id_model(devid),
+		   ef4_mdio_id_rev(devid));
 
 	if (efx->phy_type == PHY_TYPE_QT2025C)
 		qt2025c_firmware_id(efx);
@@ -380,12 +380,12 @@ static int qt202x_phy_init(struct efx_nic *efx)
 	return 0;
 }
 
-static int qt202x_link_ok(struct efx_nic *efx)
+static int qt202x_link_ok(struct ef4_nic *efx)
 {
-	return efx_mdio_links_ok(efx, QT202X_REQUIRED_DEVS);
+	return ef4_mdio_links_ok(efx, QT202X_REQUIRED_DEVS);
 }
 
-static bool qt202x_phy_poll(struct efx_nic *efx)
+static bool qt202x_phy_poll(struct ef4_nic *efx)
 {
 	bool was_up = efx->link_state.up;
 
@@ -400,7 +400,7 @@ static bool qt202x_phy_poll(struct efx_nic *efx)
 	return efx->link_state.up != was_up;
 }
 
-static int qt202x_phy_reconfigure(struct efx_nic *efx)
+static int qt202x_phy_reconfigure(struct ef4_nic *efx)
 {
 	struct qt202x_phy_data *phy_data = efx->phy_data;
 
@@ -427,29 +427,29 @@ static int qt202x_phy_reconfigure(struct efx_nic *efx)
 		    (phy_data->phy_mode & PHY_MODE_TX_DISABLED))
 			qt202x_reset_phy(efx);
 
-		efx_mdio_transmit_disable(efx);
+		ef4_mdio_transmit_disable(efx);
 	}
 
-	efx_mdio_phy_reconfigure(efx);
+	ef4_mdio_phy_reconfigure(efx);
 
 	phy_data->phy_mode = efx->phy_mode;
 
 	return 0;
 }
 
-static void qt202x_phy_get_settings(struct efx_nic *efx, struct ethtool_cmd *ecmd)
+static void qt202x_phy_get_settings(struct ef4_nic *efx, struct ethtool_cmd *ecmd)
 {
 	mdio45_ethtool_gset(&efx->mdio, ecmd);
 }
 
-static void qt202x_phy_remove(struct efx_nic *efx)
+static void qt202x_phy_remove(struct ef4_nic *efx)
 {
 	/* Free the context block */
 	kfree(efx->phy_data);
 	efx->phy_data = NULL;
 }
 
-static int qt202x_phy_get_module_info(struct efx_nic *efx,
+static int qt202x_phy_get_module_info(struct ef4_nic *efx,
 				      struct ethtool_modinfo *modinfo)
 {
 	modinfo->type = ETH_MODULE_SFF_8079;
@@ -457,10 +457,10 @@ static int qt202x_phy_get_module_info(struct efx_nic *efx,
 	return 0;
 }
 
-static int qt202x_phy_get_module_eeprom(struct efx_nic *efx,
+static int qt202x_phy_get_module_eeprom(struct ef4_nic *efx,
 					struct ethtool_eeprom *ee, u8 *data)
 {
-	int mmd, reg_base, rc, i;		
+	int mmd, reg_base, rc, i;
 
 	if (efx->phy_type == PHY_TYPE_QT2025C) {
 		mmd = MDIO_MMD_PCS;
@@ -471,7 +471,7 @@ static int qt202x_phy_get_module_eeprom(struct efx_nic *efx,
 	}
 
 	for (i = 0; i < ee->len; i++) {
-		rc = efx_mdio_read(efx, mmd, reg_base + ee->offset + i);
+		rc = ef4_mdio_read(efx, mmd, reg_base + ee->offset + i);
 		if (rc < 0)
 			return rc;
 		data[i] = rc;
@@ -480,16 +480,16 @@ static int qt202x_phy_get_module_eeprom(struct efx_nic *efx,
 	return 0;
 }
 
-const struct efx_phy_operations falcon_qt202x_phy_ops = {
+const struct ef4_phy_operations falcon_qt202x_phy_ops = {
 	.probe		 = qt202x_phy_probe,
 	.init		 = qt202x_phy_init,
 	.reconfigure	 = qt202x_phy_reconfigure,
 	.poll		 = qt202x_phy_poll,
-	.fini		 = efx_port_dummy_op_void,
+	.fini		 = ef4_port_dummy_op_void,
 	.remove		 = qt202x_phy_remove,
 	.get_settings	 = qt202x_phy_get_settings,
-	.set_settings	 = efx_mdio_set_settings,
-	.test_alive	 = efx_mdio_test_alive,
+	.set_settings	 = ef4_mdio_set_settings,
+	.test_alive	 = ef4_mdio_test_alive,
 	.get_module_eeprom = qt202x_phy_get_module_eeprom,
 	.get_module_info = qt202x_phy_get_module_info,
 };
diff --git a/drivers/net/ethernet/sfc/falcon/rx.c b/drivers/net/ethernet/sfc/falcon/rx.c
new file mode 100644
index 000000000000..250458cbdb4d
--- /dev/null
+++ b/drivers/net/ethernet/sfc/falcon/rx.c
@@ -0,0 +1,974 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2005-2013 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/socket.h>
+#include <linux/in.h>
+#include <linux/slab.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/prefetch.h>
+#include <linux/moduleparam.h>
+#include <linux/iommu.h>
+#include <net/ip.h>
+#include <net/checksum.h>
+#include "net_driver.h"
+#include "efx.h"
+#include "filter.h"
+#include "nic.h"
+#include "selftest.h"
+#include "workarounds.h"
+
+/* Preferred number of descriptors to fill at once */
+#define EF4_RX_PREFERRED_BATCH 8U
+
+/* Number of RX buffers to recycle pages for.  When creating the RX page recycle
+ * ring, this number is divided by the number of buffers per page to calculate
+ * the number of pages to store in the RX page recycle ring.
+ */
+#define EF4_RECYCLE_RING_SIZE_IOMMU 4096
+#define EF4_RECYCLE_RING_SIZE_NOIOMMU (2 * EF4_RX_PREFERRED_BATCH)
+
+/* Size of buffer allocated for skb header area. */
+#define EF4_SKB_HEADERS  128u
+
+/* This is the percentage fill level below which new RX descriptors
+ * will be added to the RX descriptor ring.
+ */
+static unsigned int rx_refill_threshold;
+
+/* Each packet can consume up to ceil(max_frame_len / buffer_size) buffers */
+#define EF4_RX_MAX_FRAGS DIV_ROUND_UP(EF4_MAX_FRAME_LEN(EF4_MAX_MTU), \
+				      EF4_RX_USR_BUF_SIZE)
+
+/*
+ * RX maximum head room required.
+ *
+ * This must be at least 1 to prevent overflow, plus one packet-worth
+ * to allow pipelined receives.
+ */
+#define EF4_RXD_HEAD_ROOM (1 + EF4_RX_MAX_FRAGS)
+
+static inline u8 *ef4_rx_buf_va(struct ef4_rx_buffer *buf)
+{
+	return page_address(buf->page) + buf->page_offset;
+}
+
+static inline u32 ef4_rx_buf_hash(struct ef4_nic *efx, const u8 *eh)
+{
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+	return __le32_to_cpup((const __le32 *)(eh + efx->rx_packet_hash_offset));
+#else
+	const u8 *data = eh + efx->rx_packet_hash_offset;
+	return (u32)data[0]	  |
+	       (u32)data[1] << 8  |
+	       (u32)data[2] << 16 |
+	       (u32)data[3] << 24;
+#endif
+}
+
+static inline struct ef4_rx_buffer *
+ef4_rx_buf_next(struct ef4_rx_queue *rx_queue, struct ef4_rx_buffer *rx_buf)
+{
+	if (unlikely(rx_buf == ef4_rx_buffer(rx_queue, rx_queue->ptr_mask)))
+		return ef4_rx_buffer(rx_queue, 0);
+	else
+		return rx_buf + 1;
+}
+
+static inline void ef4_sync_rx_buffer(struct ef4_nic *efx,
+				      struct ef4_rx_buffer *rx_buf,
+				      unsigned int len)
+{
+	dma_sync_single_for_cpu(&efx->pci_dev->dev, rx_buf->dma_addr, len,
+				DMA_FROM_DEVICE);
+}
+
+void ef4_rx_config_page_split(struct ef4_nic *efx)
+{
+	efx->rx_page_buf_step = ALIGN(efx->rx_dma_len + efx->rx_ip_align,
+				      EF4_RX_BUF_ALIGNMENT);
+	efx->rx_bufs_per_page = efx->rx_buffer_order ? 1 :
+		((PAGE_SIZE - sizeof(struct ef4_rx_page_state)) /
+		 efx->rx_page_buf_step);
+	efx->rx_buffer_truesize = (PAGE_SIZE << efx->rx_buffer_order) /
+		efx->rx_bufs_per_page;
+	efx->rx_pages_per_batch = DIV_ROUND_UP(EF4_RX_PREFERRED_BATCH,
+					       efx->rx_bufs_per_page);
+}
+
+/* Check the RX page recycle ring for a page that can be reused. */
+static struct page *ef4_reuse_page(struct ef4_rx_queue *rx_queue)
+{
+	struct ef4_nic *efx = rx_queue->efx;
+	struct page *page;
+	struct ef4_rx_page_state *state;
+	unsigned index;
+
+	index = rx_queue->page_remove & rx_queue->page_ptr_mask;
+	page = rx_queue->page_ring[index];
+	if (page == NULL)
+		return NULL;
+
+	rx_queue->page_ring[index] = NULL;
+	/* page_remove cannot exceed page_add. */
+	if (rx_queue->page_remove != rx_queue->page_add)
+		++rx_queue->page_remove;
+
+	/* If page_count is 1 then we hold the only reference to this page. */
+	if (page_count(page) == 1) {
+		++rx_queue->page_recycle_count;
+		return page;
+	} else {
+		state = page_address(page);
+		dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
+			       PAGE_SIZE << efx->rx_buffer_order,
+			       DMA_FROM_DEVICE);
+		put_page(page);
+		++rx_queue->page_recycle_failed;
+	}
+
+	return NULL;
+}
+
+/**
+ * ef4_init_rx_buffers - create EF4_RX_BATCH page-based RX buffers
+ *
+ * @rx_queue:		Efx RX queue
+ *
+ * This allocates a batch of pages, maps them for DMA, and populates
+ * struct ef4_rx_buffers for each one. Return a negative error code or
+ * 0 on success. If a single page can be used for multiple buffers,
+ * then the page will either be inserted fully, or not at all.
+ */
+static int ef4_init_rx_buffers(struct ef4_rx_queue *rx_queue, bool atomic)
+{
+	struct ef4_nic *efx = rx_queue->efx;
+	struct ef4_rx_buffer *rx_buf;
+	struct page *page;
+	unsigned int page_offset;
+	struct ef4_rx_page_state *state;
+	dma_addr_t dma_addr;
+	unsigned index, count;
+
+	count = 0;
+	do {
+		page = ef4_reuse_page(rx_queue);
+		if (page == NULL) {
+			page = alloc_pages(__GFP_COLD | __GFP_COMP |
+					   (atomic ? GFP_ATOMIC : GFP_KERNEL),
+					   efx->rx_buffer_order);
+			if (unlikely(page == NULL))
+				return -ENOMEM;
+			dma_addr =
+				dma_map_page(&efx->pci_dev->dev, page, 0,
+					     PAGE_SIZE << efx->rx_buffer_order,
+					     DMA_FROM_DEVICE);
+			if (unlikely(dma_mapping_error(&efx->pci_dev->dev,
+						       dma_addr))) {
+				__free_pages(page, efx->rx_buffer_order);
+				return -EIO;
+			}
+			state = page_address(page);
+			state->dma_addr = dma_addr;
+		} else {
+			state = page_address(page);
+			dma_addr = state->dma_addr;
+		}
+
+		dma_addr += sizeof(struct ef4_rx_page_state);
+		page_offset = sizeof(struct ef4_rx_page_state);
+
+		do {
+			index = rx_queue->added_count & rx_queue->ptr_mask;
+			rx_buf = ef4_rx_buffer(rx_queue, index);
+			rx_buf->dma_addr = dma_addr + efx->rx_ip_align;
+			rx_buf->page = page;
+			rx_buf->page_offset = page_offset + efx->rx_ip_align;
+			rx_buf->len = efx->rx_dma_len;
+			rx_buf->flags = 0;
+			++rx_queue->added_count;
+			get_page(page);
+			dma_addr += efx->rx_page_buf_step;
+			page_offset += efx->rx_page_buf_step;
+		} while (page_offset + efx->rx_page_buf_step <= PAGE_SIZE);
+
+		rx_buf->flags = EF4_RX_BUF_LAST_IN_PAGE;
+	} while (++count < efx->rx_pages_per_batch);
+
+	return 0;
+}
+
+/* Unmap a DMA-mapped page.  This function is only called for the final RX
+ * buffer in a page.
+ */
+static void ef4_unmap_rx_buffer(struct ef4_nic *efx,
+				struct ef4_rx_buffer *rx_buf)
+{
+	struct page *page = rx_buf->page;
+
+	if (page) {
+		struct ef4_rx_page_state *state = page_address(page);
+		dma_unmap_page(&efx->pci_dev->dev,
+			       state->dma_addr,
+			       PAGE_SIZE << efx->rx_buffer_order,
+			       DMA_FROM_DEVICE);
+	}
+}
+
+static void ef4_free_rx_buffers(struct ef4_rx_queue *rx_queue,
+				struct ef4_rx_buffer *rx_buf,
+				unsigned int num_bufs)
+{
+	do {
+		if (rx_buf->page) {
+			put_page(rx_buf->page);
+			rx_buf->page = NULL;
+		}
+		rx_buf = ef4_rx_buf_next(rx_queue, rx_buf);
+	} while (--num_bufs);
+}
+
+/* Attempt to recycle the page if there is an RX recycle ring; the page can
+ * only be added if this is the final RX buffer, to prevent pages being used in
+ * the descriptor ring and appearing in the recycle ring simultaneously.
+ */
+static void ef4_recycle_rx_page(struct ef4_channel *channel,
+				struct ef4_rx_buffer *rx_buf)
+{
+	struct page *page = rx_buf->page;
+	struct ef4_rx_queue *rx_queue = ef4_channel_get_rx_queue(channel);
+	struct ef4_nic *efx = rx_queue->efx;
+	unsigned index;
+
+	/* Only recycle the page after processing the final buffer. */
+	if (!(rx_buf->flags & EF4_RX_BUF_LAST_IN_PAGE))
+		return;
+
+	index = rx_queue->page_add & rx_queue->page_ptr_mask;
+	if (rx_queue->page_ring[index] == NULL) {
+		unsigned read_index = rx_queue->page_remove &
+			rx_queue->page_ptr_mask;
+
+		/* The next slot in the recycle ring is available, but
+		 * increment page_remove if the read pointer currently
+		 * points here.
+		 */
+		if (read_index == index)
+			++rx_queue->page_remove;
+		rx_queue->page_ring[index] = page;
+		++rx_queue->page_add;
+		return;
+	}
+	++rx_queue->page_recycle_full;
+	ef4_unmap_rx_buffer(efx, rx_buf);
+	put_page(rx_buf->page);
+}
+
+static void ef4_fini_rx_buffer(struct ef4_rx_queue *rx_queue,
+			       struct ef4_rx_buffer *rx_buf)
+{
+	/* Release the page reference we hold for the buffer. */
+	if (rx_buf->page)
+		put_page(rx_buf->page);
+
+	/* If this is the last buffer in a page, unmap and free it. */
+	if (rx_buf->flags & EF4_RX_BUF_LAST_IN_PAGE) {
+		ef4_unmap_rx_buffer(rx_queue->efx, rx_buf);
+		ef4_free_rx_buffers(rx_queue, rx_buf, 1);
+	}
+	rx_buf->page = NULL;
+}
+
+/* Recycle the pages that are used by buffers that have just been received. */
+static void ef4_recycle_rx_pages(struct ef4_channel *channel,
+				 struct ef4_rx_buffer *rx_buf,
+				 unsigned int n_frags)
+{
+	struct ef4_rx_queue *rx_queue = ef4_channel_get_rx_queue(channel);
+
+	do {
+		ef4_recycle_rx_page(channel, rx_buf);
+		rx_buf = ef4_rx_buf_next(rx_queue, rx_buf);
+	} while (--n_frags);
+}
+
+static void ef4_discard_rx_packet(struct ef4_channel *channel,
+				  struct ef4_rx_buffer *rx_buf,
+				  unsigned int n_frags)
+{
+	struct ef4_rx_queue *rx_queue = ef4_channel_get_rx_queue(channel);
+
+	ef4_recycle_rx_pages(channel, rx_buf, n_frags);
+
+	ef4_free_rx_buffers(rx_queue, rx_buf, n_frags);
+}
+
+/**
+ * ef4_fast_push_rx_descriptors - push new RX descriptors quickly
+ * @rx_queue:		RX descriptor queue
+ *
+ * This will aim to fill the RX descriptor queue up to
+ * @rx_queue->@max_fill. If there is insufficient atomic
+ * memory to do so, a slow fill will be scheduled.
+ *
+ * The caller must provide serialisation (none is used here). In practise,
+ * this means this function must run from the NAPI handler, or be called
+ * when NAPI is disabled.
+ */
+void ef4_fast_push_rx_descriptors(struct ef4_rx_queue *rx_queue, bool atomic)
+{
+	struct ef4_nic *efx = rx_queue->efx;
+	unsigned int fill_level, batch_size;
+	int space, rc = 0;
+
+	if (!rx_queue->refill_enabled)
+		return;
+
+	/* Calculate current fill level, and exit if we don't need to fill */
+	fill_level = (rx_queue->added_count - rx_queue->removed_count);
+	EF4_BUG_ON_PARANOID(fill_level > rx_queue->efx->rxq_entries);
+	if (fill_level >= rx_queue->fast_fill_trigger)
+		goto out;
+
+	/* Record minimum fill level */
+	if (unlikely(fill_level < rx_queue->min_fill)) {
+		if (fill_level)
+			rx_queue->min_fill = fill_level;
+	}
+
+	batch_size = efx->rx_pages_per_batch * efx->rx_bufs_per_page;
+	space = rx_queue->max_fill - fill_level;
+	EF4_BUG_ON_PARANOID(space < batch_size);
+
+	netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
+		   "RX queue %d fast-filling descriptor ring from"
+		   " level %d to level %d\n",
+		   ef4_rx_queue_index(rx_queue), fill_level,
+		   rx_queue->max_fill);
+
+
+	do {
+		rc = ef4_init_rx_buffers(rx_queue, atomic);
+		if (unlikely(rc)) {
+			/* Ensure that we don't leave the rx queue empty */
+			if (rx_queue->added_count == rx_queue->removed_count)
+				ef4_schedule_slow_fill(rx_queue);
+			goto out;
+		}
+	} while ((space -= batch_size) >= batch_size);
+
+	netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
+		   "RX queue %d fast-filled descriptor ring "
+		   "to level %d\n", ef4_rx_queue_index(rx_queue),
+		   rx_queue->added_count - rx_queue->removed_count);
+
+ out:
+	if (rx_queue->notified_count != rx_queue->added_count)
+		ef4_nic_notify_rx_desc(rx_queue);
+}
+
+void ef4_rx_slow_fill(unsigned long context)
+{
+	struct ef4_rx_queue *rx_queue = (struct ef4_rx_queue *)context;
+
+	/* Post an event to cause NAPI to run and refill the queue */
+	ef4_nic_generate_fill_event(rx_queue);
+	++rx_queue->slow_fill_count;
+}
+
+static void ef4_rx_packet__check_len(struct ef4_rx_queue *rx_queue,
+				     struct ef4_rx_buffer *rx_buf,
+				     int len)
+{
+	struct ef4_nic *efx = rx_queue->efx;
+	unsigned max_len = rx_buf->len - efx->type->rx_buffer_padding;
+
+	if (likely(len <= max_len))
+		return;
+
+	/* The packet must be discarded, but this is only a fatal error
+	 * if the caller indicated it was
+	 */
+	rx_buf->flags |= EF4_RX_PKT_DISCARD;
+
+	if ((len > rx_buf->len) && EF4_WORKAROUND_8071(efx)) {
+		if (net_ratelimit())
+			netif_err(efx, rx_err, efx->net_dev,
+				  " RX queue %d seriously overlength "
+				  "RX event (0x%x > 0x%x+0x%x). Leaking\n",
+				  ef4_rx_queue_index(rx_queue), len, max_len,
+				  efx->type->rx_buffer_padding);
+		ef4_schedule_reset(efx, RESET_TYPE_RX_RECOVERY);
+	} else {
+		if (net_ratelimit())
+			netif_err(efx, rx_err, efx->net_dev,
+				  " RX queue %d overlength RX event "
+				  "(0x%x > 0x%x)\n",
+				  ef4_rx_queue_index(rx_queue), len, max_len);
+	}
+
+	ef4_rx_queue_channel(rx_queue)->n_rx_overlength++;
+}
+
+/* Pass a received packet up through GRO.  GRO can handle pages
+ * regardless of checksum state and skbs with a good checksum.
+ */
+static void
+ef4_rx_packet_gro(struct ef4_channel *channel, struct ef4_rx_buffer *rx_buf,
+		  unsigned int n_frags, u8 *eh)
+{
+	struct napi_struct *napi = &channel->napi_str;
+	gro_result_t gro_result;
+	struct ef4_nic *efx = channel->efx;
+	struct sk_buff *skb;
+
+	skb = napi_get_frags(napi);
+	if (unlikely(!skb)) {
+		struct ef4_rx_queue *rx_queue;
+
+		rx_queue = ef4_channel_get_rx_queue(channel);
+		ef4_free_rx_buffers(rx_queue, rx_buf, n_frags);
+		return;
+	}
+
+	if (efx->net_dev->features & NETIF_F_RXHASH)
+		skb_set_hash(skb, ef4_rx_buf_hash(efx, eh),
+			     PKT_HASH_TYPE_L3);
+	skb->ip_summed = ((rx_buf->flags & EF4_RX_PKT_CSUMMED) ?
+			  CHECKSUM_UNNECESSARY : CHECKSUM_NONE);
+
+	for (;;) {
+		skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
+				   rx_buf->page, rx_buf->page_offset,
+				   rx_buf->len);
+		rx_buf->page = NULL;
+		skb->len += rx_buf->len;
+		if (skb_shinfo(skb)->nr_frags == n_frags)
+			break;
+
+		rx_buf = ef4_rx_buf_next(&channel->rx_queue, rx_buf);
+	}
+
+	skb->data_len = skb->len;
+	skb->truesize += n_frags * efx->rx_buffer_truesize;
+
+	skb_record_rx_queue(skb, channel->rx_queue.core_index);
+
+	gro_result = napi_gro_frags(napi);
+	if (gro_result != GRO_DROP)
+		channel->irq_mod_score += 2;
+}
+
+/* Allocate and construct an SKB around page fragments */
+static struct sk_buff *ef4_rx_mk_skb(struct ef4_channel *channel,
+				     struct ef4_rx_buffer *rx_buf,
+				     unsigned int n_frags,
+				     u8 *eh, int hdr_len)
+{
+	struct ef4_nic *efx = channel->efx;
+	struct sk_buff *skb;
+
+	/* Allocate an SKB to store the headers */
+	skb = netdev_alloc_skb(efx->net_dev,
+			       efx->rx_ip_align + efx->rx_prefix_size +
+			       hdr_len);
+	if (unlikely(skb == NULL)) {
+		atomic_inc(&efx->n_rx_noskb_drops);
+		return NULL;
+	}
+
+	EF4_BUG_ON_PARANOID(rx_buf->len < hdr_len);
+
+	memcpy(skb->data + efx->rx_ip_align, eh - efx->rx_prefix_size,
+	       efx->rx_prefix_size + hdr_len);
+	skb_reserve(skb, efx->rx_ip_align + efx->rx_prefix_size);
+	__skb_put(skb, hdr_len);
+
+	/* Append the remaining page(s) onto the frag list */
+	if (rx_buf->len > hdr_len) {
+		rx_buf->page_offset += hdr_len;
+		rx_buf->len -= hdr_len;
+
+		for (;;) {
+			skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
+					   rx_buf->page, rx_buf->page_offset,
+					   rx_buf->len);
+			rx_buf->page = NULL;
+			skb->len += rx_buf->len;
+			skb->data_len += rx_buf->len;
+			if (skb_shinfo(skb)->nr_frags == n_frags)
+				break;
+
+			rx_buf = ef4_rx_buf_next(&channel->rx_queue, rx_buf);
+		}
+	} else {
+		__free_pages(rx_buf->page, efx->rx_buffer_order);
+		rx_buf->page = NULL;
+		n_frags = 0;
+	}
+
+	skb->truesize += n_frags * efx->rx_buffer_truesize;
+
+	/* Move past the ethernet header */
+	skb->protocol = eth_type_trans(skb, efx->net_dev);
+
+	skb_mark_napi_id(skb, &channel->napi_str);
+
+	return skb;
+}
+
+void ef4_rx_packet(struct ef4_rx_queue *rx_queue, unsigned int index,
+		   unsigned int n_frags, unsigned int len, u16 flags)
+{
+	struct ef4_nic *efx = rx_queue->efx;
+	struct ef4_channel *channel = ef4_rx_queue_channel(rx_queue);
+	struct ef4_rx_buffer *rx_buf;
+
+	rx_queue->rx_packets++;
+
+	rx_buf = ef4_rx_buffer(rx_queue, index);
+	rx_buf->flags |= flags;
+
+	/* Validate the number of fragments and completed length */
+	if (n_frags == 1) {
+		if (!(flags & EF4_RX_PKT_PREFIX_LEN))
+			ef4_rx_packet__check_len(rx_queue, rx_buf, len);
+	} else if (unlikely(n_frags > EF4_RX_MAX_FRAGS) ||
+		   unlikely(len <= (n_frags - 1) * efx->rx_dma_len) ||
+		   unlikely(len > n_frags * efx->rx_dma_len) ||
+		   unlikely(!efx->rx_scatter)) {
+		/* If this isn't an explicit discard request, either
+		 * the hardware or the driver is broken.
+		 */
+		WARN_ON(!(len == 0 && rx_buf->flags & EF4_RX_PKT_DISCARD));
+		rx_buf->flags |= EF4_RX_PKT_DISCARD;
+	}
+
+	netif_vdbg(efx, rx_status, efx->net_dev,
+		   "RX queue %d received ids %x-%x len %d %s%s\n",
+		   ef4_rx_queue_index(rx_queue), index,
+		   (index + n_frags - 1) & rx_queue->ptr_mask, len,
+		   (rx_buf->flags & EF4_RX_PKT_CSUMMED) ? " [SUMMED]" : "",
+		   (rx_buf->flags & EF4_RX_PKT_DISCARD) ? " [DISCARD]" : "");
+
+	/* Discard packet, if instructed to do so.  Process the
+	 * previous receive first.
+	 */
+	if (unlikely(rx_buf->flags & EF4_RX_PKT_DISCARD)) {
+		ef4_rx_flush_packet(channel);
+		ef4_discard_rx_packet(channel, rx_buf, n_frags);
+		return;
+	}
+
+	if (n_frags == 1 && !(flags & EF4_RX_PKT_PREFIX_LEN))
+		rx_buf->len = len;
+
+	/* Release and/or sync the DMA mapping - assumes all RX buffers
+	 * consumed in-order per RX queue.
+	 */
+	ef4_sync_rx_buffer(efx, rx_buf, rx_buf->len);
+
+	/* Prefetch nice and early so data will (hopefully) be in cache by
+	 * the time we look at it.
+	 */
+	prefetch(ef4_rx_buf_va(rx_buf));
+
+	rx_buf->page_offset += efx->rx_prefix_size;
+	rx_buf->len -= efx->rx_prefix_size;
+
+	if (n_frags > 1) {
+		/* Release/sync DMA mapping for additional fragments.
+		 * Fix length for last fragment.
+		 */
+		unsigned int tail_frags = n_frags - 1;
+
+		for (;;) {
+			rx_buf = ef4_rx_buf_next(rx_queue, rx_buf);
+			if (--tail_frags == 0)
+				break;
+			ef4_sync_rx_buffer(efx, rx_buf, efx->rx_dma_len);
+		}
+		rx_buf->len = len - (n_frags - 1) * efx->rx_dma_len;
+		ef4_sync_rx_buffer(efx, rx_buf, rx_buf->len);
+	}
+
+	/* All fragments have been DMA-synced, so recycle pages. */
+	rx_buf = ef4_rx_buffer(rx_queue, index);
+	ef4_recycle_rx_pages(channel, rx_buf, n_frags);
+
+	/* Pipeline receives so that we give time for packet headers to be
+	 * prefetched into cache.
+	 */
+	ef4_rx_flush_packet(channel);
+	channel->rx_pkt_n_frags = n_frags;
+	channel->rx_pkt_index = index;
+}
+
+static void ef4_rx_deliver(struct ef4_channel *channel, u8 *eh,
+			   struct ef4_rx_buffer *rx_buf,
+			   unsigned int n_frags)
+{
+	struct sk_buff *skb;
+	u16 hdr_len = min_t(u16, rx_buf->len, EF4_SKB_HEADERS);
+
+	skb = ef4_rx_mk_skb(channel, rx_buf, n_frags, eh, hdr_len);
+	if (unlikely(skb == NULL)) {
+		struct ef4_rx_queue *rx_queue;
+
+		rx_queue = ef4_channel_get_rx_queue(channel);
+		ef4_free_rx_buffers(rx_queue, rx_buf, n_frags);
+		return;
+	}
+	skb_record_rx_queue(skb, channel->rx_queue.core_index);
+
+	/* Set the SKB flags */
+	skb_checksum_none_assert(skb);
+	if (likely(rx_buf->flags & EF4_RX_PKT_CSUMMED))
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+
+	if (channel->type->receive_skb)
+		if (channel->type->receive_skb(channel, skb))
+			return;
+
+	/* Pass the packet up */
+	netif_receive_skb(skb);
+}
+
+/* Handle a received packet.  Second half: Touches packet payload. */
+void __ef4_rx_packet(struct ef4_channel *channel)
+{
+	struct ef4_nic *efx = channel->efx;
+	struct ef4_rx_buffer *rx_buf =
+		ef4_rx_buffer(&channel->rx_queue, channel->rx_pkt_index);
+	u8 *eh = ef4_rx_buf_va(rx_buf);
+
+	/* Read length from the prefix if necessary.  This already
+	 * excludes the length of the prefix itself.
+	 */
+	if (rx_buf->flags & EF4_RX_PKT_PREFIX_LEN)
+		rx_buf->len = le16_to_cpup((__le16 *)
+					   (eh + efx->rx_packet_len_offset));
+
+	/* If we're in loopback test, then pass the packet directly to the
+	 * loopback layer, and free the rx_buf here
+	 */
+	if (unlikely(efx->loopback_selftest)) {
+		struct ef4_rx_queue *rx_queue;
+
+		ef4_loopback_rx_packet(efx, eh, rx_buf->len);
+		rx_queue = ef4_channel_get_rx_queue(channel);
+		ef4_free_rx_buffers(rx_queue, rx_buf,
+				    channel->rx_pkt_n_frags);
+		goto out;
+	}
+
+	if (unlikely(!(efx->net_dev->features & NETIF_F_RXCSUM)))
+		rx_buf->flags &= ~EF4_RX_PKT_CSUMMED;
+
+	if ((rx_buf->flags & EF4_RX_PKT_TCP) && !channel->type->receive_skb &&
+	    !ef4_channel_busy_polling(channel))
+		ef4_rx_packet_gro(channel, rx_buf, channel->rx_pkt_n_frags, eh);
+	else
+		ef4_rx_deliver(channel, eh, rx_buf, channel->rx_pkt_n_frags);
+out:
+	channel->rx_pkt_n_frags = 0;
+}
+
+int ef4_probe_rx_queue(struct ef4_rx_queue *rx_queue)
+{
+	struct ef4_nic *efx = rx_queue->efx;
+	unsigned int entries;
+	int rc;
+
+	/* Create the smallest power-of-two aligned ring */
+	entries = max(roundup_pow_of_two(efx->rxq_entries), EF4_MIN_DMAQ_SIZE);
+	EF4_BUG_ON_PARANOID(entries > EF4_MAX_DMAQ_SIZE);
+	rx_queue->ptr_mask = entries - 1;
+
+	netif_dbg(efx, probe, efx->net_dev,
+		  "creating RX queue %d size %#x mask %#x\n",
+		  ef4_rx_queue_index(rx_queue), efx->rxq_entries,
+		  rx_queue->ptr_mask);
+
+	/* Allocate RX buffers */
+	rx_queue->buffer = kcalloc(entries, sizeof(*rx_queue->buffer),
+				   GFP_KERNEL);
+	if (!rx_queue->buffer)
+		return -ENOMEM;
+
+	rc = ef4_nic_probe_rx(rx_queue);
+	if (rc) {
+		kfree(rx_queue->buffer);
+		rx_queue->buffer = NULL;
+	}
+
+	return rc;
+}
+
+static void ef4_init_rx_recycle_ring(struct ef4_nic *efx,
+				     struct ef4_rx_queue *rx_queue)
+{
+	unsigned int bufs_in_recycle_ring, page_ring_size;
+
+	/* Set the RX recycle ring size */
+#ifdef CONFIG_PPC64
+	bufs_in_recycle_ring = EF4_RECYCLE_RING_SIZE_IOMMU;
+#else
+	if (iommu_present(&pci_bus_type))
+		bufs_in_recycle_ring = EF4_RECYCLE_RING_SIZE_IOMMU;
+	else
+		bufs_in_recycle_ring = EF4_RECYCLE_RING_SIZE_NOIOMMU;
+#endif /* CONFIG_PPC64 */
+
+	page_ring_size = roundup_pow_of_two(bufs_in_recycle_ring /
+					    efx->rx_bufs_per_page);
+	rx_queue->page_ring = kcalloc(page_ring_size,
+				      sizeof(*rx_queue->page_ring), GFP_KERNEL);
+	rx_queue->page_ptr_mask = page_ring_size - 1;
+}
+
+void ef4_init_rx_queue(struct ef4_rx_queue *rx_queue)
+{
+	struct ef4_nic *efx = rx_queue->efx;
+	unsigned int max_fill, trigger, max_trigger;
+
+	netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
+		  "initialising RX queue %d\n", ef4_rx_queue_index(rx_queue));
+
+	/* Initialise ptr fields */
+	rx_queue->added_count = 0;
+	rx_queue->notified_count = 0;
+	rx_queue->removed_count = 0;
+	rx_queue->min_fill = -1U;
+	ef4_init_rx_recycle_ring(efx, rx_queue);
+
+	rx_queue->page_remove = 0;
+	rx_queue->page_add = rx_queue->page_ptr_mask + 1;
+	rx_queue->page_recycle_count = 0;
+	rx_queue->page_recycle_failed = 0;
+	rx_queue->page_recycle_full = 0;
+
+	/* Initialise limit fields */
+	max_fill = efx->rxq_entries - EF4_RXD_HEAD_ROOM;
+	max_trigger =
+		max_fill - efx->rx_pages_per_batch * efx->rx_bufs_per_page;
+	if (rx_refill_threshold != 0) {
+		trigger = max_fill * min(rx_refill_threshold, 100U) / 100U;
+		if (trigger > max_trigger)
+			trigger = max_trigger;
+	} else {
+		trigger = max_trigger;
+	}
+
+	rx_queue->max_fill = max_fill;
+	rx_queue->fast_fill_trigger = trigger;
+	rx_queue->refill_enabled = true;
+
+	/* Set up RX descriptor ring */
+	ef4_nic_init_rx(rx_queue);
+}
+
+void ef4_fini_rx_queue(struct ef4_rx_queue *rx_queue)
+{
+	int i;
+	struct ef4_nic *efx = rx_queue->efx;
+	struct ef4_rx_buffer *rx_buf;
+
+	netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
+		  "shutting down RX queue %d\n", ef4_rx_queue_index(rx_queue));
+
+	del_timer_sync(&rx_queue->slow_fill);
+
+	/* Release RX buffers from the current read ptr to the write ptr */
+	if (rx_queue->buffer) {
+		for (i = rx_queue->removed_count; i < rx_queue->added_count;
+		     i++) {
+			unsigned index = i & rx_queue->ptr_mask;
+			rx_buf = ef4_rx_buffer(rx_queue, index);
+			ef4_fini_rx_buffer(rx_queue, rx_buf);
+		}
+	}
+
+	/* Unmap and release the pages in the recycle ring. Remove the ring. */
+	for (i = 0; i <= rx_queue->page_ptr_mask; i++) {
+		struct page *page = rx_queue->page_ring[i];
+		struct ef4_rx_page_state *state;
+
+		if (page == NULL)
+			continue;
+
+		state = page_address(page);
+		dma_unmap_page(&efx->pci_dev->dev, state->dma_addr,
+			       PAGE_SIZE << efx->rx_buffer_order,
+			       DMA_FROM_DEVICE);
+		put_page(page);
+	}
+	kfree(rx_queue->page_ring);
+	rx_queue->page_ring = NULL;
+}
+
+void ef4_remove_rx_queue(struct ef4_rx_queue *rx_queue)
+{
+	netif_dbg(rx_queue->efx, drv, rx_queue->efx->net_dev,
+		  "destroying RX queue %d\n", ef4_rx_queue_index(rx_queue));
+
+	ef4_nic_remove_rx(rx_queue);
+
+	kfree(rx_queue->buffer);
+	rx_queue->buffer = NULL;
+}
+
+
+module_param(rx_refill_threshold, uint, 0444);
+MODULE_PARM_DESC(rx_refill_threshold,
+		 "RX descriptor ring refill threshold (%)");
+
+#ifdef CONFIG_RFS_ACCEL
+
+int ef4_filter_rfs(struct net_device *net_dev, const struct sk_buff *skb,
+		   u16 rxq_index, u32 flow_id)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+	struct ef4_channel *channel;
+	struct ef4_filter_spec spec;
+	struct flow_keys fk;
+	int rc;
+
+	if (flow_id == RPS_FLOW_ID_INVALID)
+		return -EINVAL;
+
+	if (!skb_flow_dissect_flow_keys(skb, &fk, 0))
+		return -EPROTONOSUPPORT;
+
+	if (fk.basic.n_proto != htons(ETH_P_IP) && fk.basic.n_proto != htons(ETH_P_IPV6))
+		return -EPROTONOSUPPORT;
+	if (fk.control.flags & FLOW_DIS_IS_FRAGMENT)
+		return -EPROTONOSUPPORT;
+
+	ef4_filter_init_rx(&spec, EF4_FILTER_PRI_HINT,
+			   efx->rx_scatter ? EF4_FILTER_FLAG_RX_SCATTER : 0,
+			   rxq_index);
+	spec.match_flags =
+		EF4_FILTER_MATCH_ETHER_TYPE | EF4_FILTER_MATCH_IP_PROTO |
+		EF4_FILTER_MATCH_LOC_HOST | EF4_FILTER_MATCH_LOC_PORT |
+		EF4_FILTER_MATCH_REM_HOST | EF4_FILTER_MATCH_REM_PORT;
+	spec.ether_type = fk.basic.n_proto;
+	spec.ip_proto = fk.basic.ip_proto;
+
+	if (fk.basic.n_proto == htons(ETH_P_IP)) {
+		spec.rem_host[0] = fk.addrs.v4addrs.src;
+		spec.loc_host[0] = fk.addrs.v4addrs.dst;
+	} else {
+		memcpy(spec.rem_host, &fk.addrs.v6addrs.src, sizeof(struct in6_addr));
+		memcpy(spec.loc_host, &fk.addrs.v6addrs.dst, sizeof(struct in6_addr));
+	}
+
+	spec.rem_port = fk.ports.src;
+	spec.loc_port = fk.ports.dst;
+
+	rc = efx->type->filter_rfs_insert(efx, &spec);
+	if (rc < 0)
+		return rc;
+
+	/* Remember this so we can check whether to expire the filter later */
+	channel = ef4_get_channel(efx, rxq_index);
+	channel->rps_flow_id[rc] = flow_id;
+	++channel->rfs_filters_added;
+
+	if (spec.ether_type == htons(ETH_P_IP))
+		netif_info(efx, rx_status, efx->net_dev,
+			   "steering %s %pI4:%u:%pI4:%u to queue %u [flow %u filter %d]\n",
+			   (spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
+			   spec.rem_host, ntohs(spec.rem_port), spec.loc_host,
+			   ntohs(spec.loc_port), rxq_index, flow_id, rc);
+	else
+		netif_info(efx, rx_status, efx->net_dev,
+			   "steering %s [%pI6]:%u:[%pI6]:%u to queue %u [flow %u filter %d]\n",
+			   (spec.ip_proto == IPPROTO_TCP) ? "TCP" : "UDP",
+			   spec.rem_host, ntohs(spec.rem_port), spec.loc_host,
+			   ntohs(spec.loc_port), rxq_index, flow_id, rc);
+
+	return rc;
+}
+
+bool __ef4_filter_rfs_expire(struct ef4_nic *efx, unsigned int quota)
+{
+	bool (*expire_one)(struct ef4_nic *efx, u32 flow_id, unsigned int index);
+	unsigned int channel_idx, index, size;
+	u32 flow_id;
+
+	if (!spin_trylock_bh(&efx->filter_lock))
+		return false;
+
+	expire_one = efx->type->filter_rfs_expire_one;
+	channel_idx = efx->rps_expire_channel;
+	index = efx->rps_expire_index;
+	size = efx->type->max_rx_ip_filters;
+	while (quota--) {
+		struct ef4_channel *channel = ef4_get_channel(efx, channel_idx);
+		flow_id = channel->rps_flow_id[index];
+
+		if (flow_id != RPS_FLOW_ID_INVALID &&
+		    expire_one(efx, flow_id, index)) {
+			netif_info(efx, rx_status, efx->net_dev,
+				   "expired filter %d [queue %u flow %u]\n",
+				   index, channel_idx, flow_id);
+			channel->rps_flow_id[index] = RPS_FLOW_ID_INVALID;
+		}
+		if (++index == size) {
+			if (++channel_idx == efx->n_channels)
+				channel_idx = 0;
+			index = 0;
+		}
+	}
+	efx->rps_expire_channel = channel_idx;
+	efx->rps_expire_index = index;
+
+	spin_unlock_bh(&efx->filter_lock);
+	return true;
+}
+
+#endif /* CONFIG_RFS_ACCEL */
+
+/**
+ * ef4_filter_is_mc_recipient - test whether spec is a multicast recipient
+ * @spec: Specification to test
+ *
+ * Return: %true if the specification is a non-drop RX filter that
+ * matches a local MAC address I/G bit value of 1 or matches a local
+ * IPv4 or IPv6 address value in the respective multicast address
+ * range.  Otherwise %false.
+ */
+bool ef4_filter_is_mc_recipient(const struct ef4_filter_spec *spec)
+{
+	if (!(spec->flags & EF4_FILTER_FLAG_RX) ||
+	    spec->dmaq_id == EF4_FILTER_RX_DMAQ_ID_DROP)
+		return false;
+
+	if (spec->match_flags &
+	    (EF4_FILTER_MATCH_LOC_MAC | EF4_FILTER_MATCH_LOC_MAC_IG) &&
+	    is_multicast_ether_addr(spec->loc_mac))
+		return true;
+
+	if ((spec->match_flags &
+	     (EF4_FILTER_MATCH_ETHER_TYPE | EF4_FILTER_MATCH_LOC_HOST)) ==
+	    (EF4_FILTER_MATCH_ETHER_TYPE | EF4_FILTER_MATCH_LOC_HOST)) {
+		if (spec->ether_type == htons(ETH_P_IP) &&
+		    ipv4_is_multicast(spec->loc_host[0]))
+			return true;
+		if (spec->ether_type == htons(ETH_P_IPV6) &&
+		    ((const u8 *)spec->loc_host)[0] == 0xff)
+			return true;
+	}
+
+	return false;
+}
diff --git a/drivers/net/ethernet/sfc/falcon/selftest.c b/drivers/net/ethernet/sfc/falcon/selftest.c
new file mode 100644
index 000000000000..92bc34c91547
--- /dev/null
+++ b/drivers/net/ethernet/sfc/falcon/selftest.c
@@ -0,0 +1,808 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2012 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/kernel_stat.h>
+#include <linux/pci.h>
+#include <linux/ethtool.h>
+#include <linux/ip.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/rtnetlink.h>
+#include <linux/slab.h>
+#include "net_driver.h"
+#include "efx.h"
+#include "nic.h"
+#include "selftest.h"
+#include "workarounds.h"
+
+/* IRQ latency can be enormous because:
+ * - All IRQs may be disabled on a CPU for a *long* time by e.g. a
+ *   slow serial console or an old IDE driver doing error recovery
+ * - The PREEMPT_RT patches mostly deal with this, but also allow a
+ *   tasklet or normal task to be given higher priority than our IRQ
+ *   threads
+ * Try to avoid blaming the hardware for this.
+ */
+#define IRQ_TIMEOUT HZ
+
+/*
+ * Loopback test packet structure
+ *
+ * The self-test should stress every RSS vector, and unfortunately
+ * Falcon only performs RSS on TCP/UDP packets.
+ */
+struct ef4_loopback_payload {
+	struct ethhdr header;
+	struct iphdr ip;
+	struct udphdr udp;
+	__be16 iteration;
+	char msg[64];
+} __packed;
+
+/* Loopback test source MAC address */
+static const u8 payload_source[ETH_ALEN] __aligned(2) = {
+	0x00, 0x0f, 0x53, 0x1b, 0x1b, 0x1b,
+};
+
+static const char payload_msg[] =
+	"Hello world! This is an Efx loopback test in progress!";
+
+/* Interrupt mode names */
+static const unsigned int ef4_interrupt_mode_max = EF4_INT_MODE_MAX;
+static const char *const ef4_interrupt_mode_names[] = {
+	[EF4_INT_MODE_MSIX]   = "MSI-X",
+	[EF4_INT_MODE_MSI]    = "MSI",
+	[EF4_INT_MODE_LEGACY] = "legacy",
+};
+#define INT_MODE(efx) \
+	STRING_TABLE_LOOKUP(efx->interrupt_mode, ef4_interrupt_mode)
+
+/**
+ * ef4_loopback_state - persistent state during a loopback selftest
+ * @flush:		Drop all packets in ef4_loopback_rx_packet
+ * @packet_count:	Number of packets being used in this test
+ * @skbs:		An array of skbs transmitted
+ * @offload_csum:	Checksums are being offloaded
+ * @rx_good:		RX good packet count
+ * @rx_bad:		RX bad packet count
+ * @payload:		Payload used in tests
+ */
+struct ef4_loopback_state {
+	bool flush;
+	int packet_count;
+	struct sk_buff **skbs;
+	bool offload_csum;
+	atomic_t rx_good;
+	atomic_t rx_bad;
+	struct ef4_loopback_payload payload;
+};
+
+/* How long to wait for all the packets to arrive (in ms) */
+#define LOOPBACK_TIMEOUT_MS 1000
+
+/**************************************************************************
+ *
+ * MII, NVRAM and register tests
+ *
+ **************************************************************************/
+
+static int ef4_test_phy_alive(struct ef4_nic *efx, struct ef4_self_tests *tests)
+{
+	int rc = 0;
+
+	if (efx->phy_op->test_alive) {
+		rc = efx->phy_op->test_alive(efx);
+		tests->phy_alive = rc ? -1 : 1;
+	}
+
+	return rc;
+}
+
+static int ef4_test_nvram(struct ef4_nic *efx, struct ef4_self_tests *tests)
+{
+	int rc = 0;
+
+	if (efx->type->test_nvram) {
+		rc = efx->type->test_nvram(efx);
+		if (rc == -EPERM)
+			rc = 0;
+		else
+			tests->nvram = rc ? -1 : 1;
+	}
+
+	return rc;
+}
+
+/**************************************************************************
+ *
+ * Interrupt and event queue testing
+ *
+ **************************************************************************/
+
+/* Test generation and receipt of interrupts */
+static int ef4_test_interrupts(struct ef4_nic *efx,
+			       struct ef4_self_tests *tests)
+{
+	unsigned long timeout, wait;
+	int cpu;
+	int rc;
+
+	netif_dbg(efx, drv, efx->net_dev, "testing interrupts\n");
+	tests->interrupt = -1;
+
+	rc = ef4_nic_irq_test_start(efx);
+	if (rc == -ENOTSUPP) {
+		netif_dbg(efx, drv, efx->net_dev,
+			  "direct interrupt testing not supported\n");
+		tests->interrupt = 0;
+		return 0;
+	}
+
+	timeout = jiffies + IRQ_TIMEOUT;
+	wait = 1;
+
+	/* Wait for arrival of test interrupt. */
+	netif_dbg(efx, drv, efx->net_dev, "waiting for test interrupt\n");
+	do {
+		schedule_timeout_uninterruptible(wait);
+		cpu = ef4_nic_irq_test_irq_cpu(efx);
+		if (cpu >= 0)
+			goto success;
+		wait *= 2;
+	} while (time_before(jiffies, timeout));
+
+	netif_err(efx, drv, efx->net_dev, "timed out waiting for interrupt\n");
+	return -ETIMEDOUT;
+
+ success:
+	netif_dbg(efx, drv, efx->net_dev, "%s test interrupt seen on CPU%d\n",
+		  INT_MODE(efx), cpu);
+	tests->interrupt = 1;
+	return 0;
+}
+
+/* Test generation and receipt of interrupting events */
+static int ef4_test_eventq_irq(struct ef4_nic *efx,
+			       struct ef4_self_tests *tests)
+{
+	struct ef4_channel *channel;
+	unsigned int read_ptr[EF4_MAX_CHANNELS];
+	unsigned long napi_ran = 0, dma_pend = 0, int_pend = 0;
+	unsigned long timeout, wait;
+
+	BUILD_BUG_ON(EF4_MAX_CHANNELS > BITS_PER_LONG);
+
+	ef4_for_each_channel(channel, efx) {
+		read_ptr[channel->channel] = channel->eventq_read_ptr;
+		set_bit(channel->channel, &dma_pend);
+		set_bit(channel->channel, &int_pend);
+		ef4_nic_event_test_start(channel);
+	}
+
+	timeout = jiffies + IRQ_TIMEOUT;
+	wait = 1;
+
+	/* Wait for arrival of interrupts.  NAPI processing may or may
+	 * not complete in time, but we can cope in any case.
+	 */
+	do {
+		schedule_timeout_uninterruptible(wait);
+
+		ef4_for_each_channel(channel, efx) {
+			ef4_stop_eventq(channel);
+			if (channel->eventq_read_ptr !=
+			    read_ptr[channel->channel]) {
+				set_bit(channel->channel, &napi_ran);
+				clear_bit(channel->channel, &dma_pend);
+				clear_bit(channel->channel, &int_pend);
+			} else {
+				if (ef4_nic_event_present(channel))
+					clear_bit(channel->channel, &dma_pend);
+				if (ef4_nic_event_test_irq_cpu(channel) >= 0)
+					clear_bit(channel->channel, &int_pend);
+			}
+			ef4_start_eventq(channel);
+		}
+
+		wait *= 2;
+	} while ((dma_pend || int_pend) && time_before(jiffies, timeout));
+
+	ef4_for_each_channel(channel, efx) {
+		bool dma_seen = !test_bit(channel->channel, &dma_pend);
+		bool int_seen = !test_bit(channel->channel, &int_pend);
+
+		tests->eventq_dma[channel->channel] = dma_seen ? 1 : -1;
+		tests->eventq_int[channel->channel] = int_seen ? 1 : -1;
+
+		if (dma_seen && int_seen) {
+			netif_dbg(efx, drv, efx->net_dev,
+				  "channel %d event queue passed (with%s NAPI)\n",
+				  channel->channel,
+				  test_bit(channel->channel, &napi_ran) ?
+				  "" : "out");
+		} else {
+			/* Report failure and whether either interrupt or DMA
+			 * worked
+			 */
+			netif_err(efx, drv, efx->net_dev,
+				  "channel %d timed out waiting for event queue\n",
+				  channel->channel);
+			if (int_seen)
+				netif_err(efx, drv, efx->net_dev,
+					  "channel %d saw interrupt "
+					  "during event queue test\n",
+					  channel->channel);
+			if (dma_seen)
+				netif_err(efx, drv, efx->net_dev,
+					  "channel %d event was generated, but "
+					  "failed to trigger an interrupt\n",
+					  channel->channel);
+		}
+	}
+
+	return (dma_pend || int_pend) ? -ETIMEDOUT : 0;
+}
+
+static int ef4_test_phy(struct ef4_nic *efx, struct ef4_self_tests *tests,
+			unsigned flags)
+{
+	int rc;
+
+	if (!efx->phy_op->run_tests)
+		return 0;
+
+	mutex_lock(&efx->mac_lock);
+	rc = efx->phy_op->run_tests(efx, tests->phy_ext, flags);
+	mutex_unlock(&efx->mac_lock);
+	if (rc == -EPERM)
+		rc = 0;
+	else
+		netif_info(efx, drv, efx->net_dev,
+			   "%s phy selftest\n", rc ? "Failed" : "Passed");
+
+	return rc;
+}
+
+/**************************************************************************
+ *
+ * Loopback testing
+ * NB Only one loopback test can be executing concurrently.
+ *
+ **************************************************************************/
+
+/* Loopback test RX callback
+ * This is called for each received packet during loopback testing.
+ */
+void ef4_loopback_rx_packet(struct ef4_nic *efx,
+			    const char *buf_ptr, int pkt_len)
+{
+	struct ef4_loopback_state *state = efx->loopback_selftest;
+	struct ef4_loopback_payload *received;
+	struct ef4_loopback_payload *payload;
+
+	BUG_ON(!buf_ptr);
+
+	/* If we are just flushing, then drop the packet */
+	if ((state == NULL) || state->flush)
+		return;
+
+	payload = &state->payload;
+
+	received = (struct ef4_loopback_payload *) buf_ptr;
+	received->ip.saddr = payload->ip.saddr;
+	if (state->offload_csum)
+		received->ip.check = payload->ip.check;
+
+	/* Check that header exists */
+	if (pkt_len < sizeof(received->header)) {
+		netif_err(efx, drv, efx->net_dev,
+			  "saw runt RX packet (length %d) in %s loopback "
+			  "test\n", pkt_len, LOOPBACK_MODE(efx));
+		goto err;
+	}
+
+	/* Check that the ethernet header exists */
+	if (memcmp(&received->header, &payload->header, ETH_HLEN) != 0) {
+		netif_err(efx, drv, efx->net_dev,
+			  "saw non-loopback RX packet in %s loopback test\n",
+			  LOOPBACK_MODE(efx));
+		goto err;
+	}
+
+	/* Check packet length */
+	if (pkt_len != sizeof(*payload)) {
+		netif_err(efx, drv, efx->net_dev,
+			  "saw incorrect RX packet length %d (wanted %d) in "
+			  "%s loopback test\n", pkt_len, (int)sizeof(*payload),
+			  LOOPBACK_MODE(efx));
+		goto err;
+	}
+
+	/* Check that IP header matches */
+	if (memcmp(&received->ip, &payload->ip, sizeof(payload->ip)) != 0) {
+		netif_err(efx, drv, efx->net_dev,
+			  "saw corrupted IP header in %s loopback test\n",
+			  LOOPBACK_MODE(efx));
+		goto err;
+	}
+
+	/* Check that msg and padding matches */
+	if (memcmp(&received->msg, &payload->msg, sizeof(received->msg)) != 0) {
+		netif_err(efx, drv, efx->net_dev,
+			  "saw corrupted RX packet in %s loopback test\n",
+			  LOOPBACK_MODE(efx));
+		goto err;
+	}
+
+	/* Check that iteration matches */
+	if (received->iteration != payload->iteration) {
+		netif_err(efx, drv, efx->net_dev,
+			  "saw RX packet from iteration %d (wanted %d) in "
+			  "%s loopback test\n", ntohs(received->iteration),
+			  ntohs(payload->iteration), LOOPBACK_MODE(efx));
+		goto err;
+	}
+
+	/* Increase correct RX count */
+	netif_vdbg(efx, drv, efx->net_dev,
+		   "got loopback RX in %s loopback test\n", LOOPBACK_MODE(efx));
+
+	atomic_inc(&state->rx_good);
+	return;
+
+ err:
+#ifdef DEBUG
+	if (atomic_read(&state->rx_bad) == 0) {
+		netif_err(efx, drv, efx->net_dev, "received packet:\n");
+		print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 0x10, 1,
+			       buf_ptr, pkt_len, 0);
+		netif_err(efx, drv, efx->net_dev, "expected packet:\n");
+		print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 0x10, 1,
+			       &state->payload, sizeof(state->payload), 0);
+	}
+#endif
+	atomic_inc(&state->rx_bad);
+}
+
+/* Initialise an ef4_selftest_state for a new iteration */
+static void ef4_iterate_state(struct ef4_nic *efx)
+{
+	struct ef4_loopback_state *state = efx->loopback_selftest;
+	struct net_device *net_dev = efx->net_dev;
+	struct ef4_loopback_payload *payload = &state->payload;
+
+	/* Initialise the layerII header */
+	ether_addr_copy((u8 *)&payload->header.h_dest, net_dev->dev_addr);
+	ether_addr_copy((u8 *)&payload->header.h_source, payload_source);
+	payload->header.h_proto = htons(ETH_P_IP);
+
+	/* saddr set later and used as incrementing count */
+	payload->ip.daddr = htonl(INADDR_LOOPBACK);
+	payload->ip.ihl = 5;
+	payload->ip.check = (__force __sum16) htons(0xdead);
+	payload->ip.tot_len = htons(sizeof(*payload) - sizeof(struct ethhdr));
+	payload->ip.version = IPVERSION;
+	payload->ip.protocol = IPPROTO_UDP;
+
+	/* Initialise udp header */
+	payload->udp.source = 0;
+	payload->udp.len = htons(sizeof(*payload) - sizeof(struct ethhdr) -
+				 sizeof(struct iphdr));
+	payload->udp.check = 0;	/* checksum ignored */
+
+	/* Fill out payload */
+	payload->iteration = htons(ntohs(payload->iteration) + 1);
+	memcpy(&payload->msg, payload_msg, sizeof(payload_msg));
+
+	/* Fill out remaining state members */
+	atomic_set(&state->rx_good, 0);
+	atomic_set(&state->rx_bad, 0);
+	smp_wmb();
+}
+
+static int ef4_begin_loopback(struct ef4_tx_queue *tx_queue)
+{
+	struct ef4_nic *efx = tx_queue->efx;
+	struct ef4_loopback_state *state = efx->loopback_selftest;
+	struct ef4_loopback_payload *payload;
+	struct sk_buff *skb;
+	int i;
+	netdev_tx_t rc;
+
+	/* Transmit N copies of buffer */
+	for (i = 0; i < state->packet_count; i++) {
+		/* Allocate an skb, holding an extra reference for
+		 * transmit completion counting */
+		skb = alloc_skb(sizeof(state->payload), GFP_KERNEL);
+		if (!skb)
+			return -ENOMEM;
+		state->skbs[i] = skb;
+		skb_get(skb);
+
+		/* Copy the payload in, incrementing the source address to
+		 * exercise the rss vectors */
+		payload = ((struct ef4_loopback_payload *)
+			   skb_put(skb, sizeof(state->payload)));
+		memcpy(payload, &state->payload, sizeof(state->payload));
+		payload->ip.saddr = htonl(INADDR_LOOPBACK | (i << 2));
+
+		/* Ensure everything we've written is visible to the
+		 * interrupt handler. */
+		smp_wmb();
+
+		netif_tx_lock_bh(efx->net_dev);
+		rc = ef4_enqueue_skb(tx_queue, skb);
+		netif_tx_unlock_bh(efx->net_dev);
+
+		if (rc != NETDEV_TX_OK) {
+			netif_err(efx, drv, efx->net_dev,
+				  "TX queue %d could not transmit packet %d of "
+				  "%d in %s loopback test\n", tx_queue->queue,
+				  i + 1, state->packet_count,
+				  LOOPBACK_MODE(efx));
+
+			/* Defer cleaning up the other skbs for the caller */
+			kfree_skb(skb);
+			return -EPIPE;
+		}
+	}
+
+	return 0;
+}
+
+static int ef4_poll_loopback(struct ef4_nic *efx)
+{
+	struct ef4_loopback_state *state = efx->loopback_selftest;
+
+	return atomic_read(&state->rx_good) == state->packet_count;
+}
+
+static int ef4_end_loopback(struct ef4_tx_queue *tx_queue,
+			    struct ef4_loopback_self_tests *lb_tests)
+{
+	struct ef4_nic *efx = tx_queue->efx;
+	struct ef4_loopback_state *state = efx->loopback_selftest;
+	struct sk_buff *skb;
+	int tx_done = 0, rx_good, rx_bad;
+	int i, rc = 0;
+
+	netif_tx_lock_bh(efx->net_dev);
+
+	/* Count the number of tx completions, and decrement the refcnt. Any
+	 * skbs not already completed will be free'd when the queue is flushed */
+	for (i = 0; i < state->packet_count; i++) {
+		skb = state->skbs[i];
+		if (skb && !skb_shared(skb))
+			++tx_done;
+		dev_kfree_skb(skb);
+	}
+
+	netif_tx_unlock_bh(efx->net_dev);
+
+	/* Check TX completion and received packet counts */
+	rx_good = atomic_read(&state->rx_good);
+	rx_bad = atomic_read(&state->rx_bad);
+	if (tx_done != state->packet_count) {
+		/* Don't free the skbs; they will be picked up on TX
+		 * overflow or channel teardown.
+		 */
+		netif_err(efx, drv, efx->net_dev,
+			  "TX queue %d saw only %d out of an expected %d "
+			  "TX completion events in %s loopback test\n",
+			  tx_queue->queue, tx_done, state->packet_count,
+			  LOOPBACK_MODE(efx));
+		rc = -ETIMEDOUT;
+		/* Allow to fall through so we see the RX errors as well */
+	}
+
+	/* We may always be up to a flush away from our desired packet total */
+	if (rx_good != state->packet_count) {
+		netif_dbg(efx, drv, efx->net_dev,
+			  "TX queue %d saw only %d out of an expected %d "
+			  "received packets in %s loopback test\n",
+			  tx_queue->queue, rx_good, state->packet_count,
+			  LOOPBACK_MODE(efx));
+		rc = -ETIMEDOUT;
+		/* Fall through */
+	}
+
+	/* Update loopback test structure */
+	lb_tests->tx_sent[tx_queue->queue] += state->packet_count;
+	lb_tests->tx_done[tx_queue->queue] += tx_done;
+	lb_tests->rx_good += rx_good;
+	lb_tests->rx_bad += rx_bad;
+
+	return rc;
+}
+
+static int
+ef4_test_loopback(struct ef4_tx_queue *tx_queue,
+		  struct ef4_loopback_self_tests *lb_tests)
+{
+	struct ef4_nic *efx = tx_queue->efx;
+	struct ef4_loopback_state *state = efx->loopback_selftest;
+	int i, begin_rc, end_rc;
+
+	for (i = 0; i < 3; i++) {
+		/* Determine how many packets to send */
+		state->packet_count = efx->txq_entries / 3;
+		state->packet_count = min(1 << (i << 2), state->packet_count);
+		state->skbs = kcalloc(state->packet_count,
+				      sizeof(state->skbs[0]), GFP_KERNEL);
+		if (!state->skbs)
+			return -ENOMEM;
+		state->flush = false;
+
+		netif_dbg(efx, drv, efx->net_dev,
+			  "TX queue %d testing %s loopback with %d packets\n",
+			  tx_queue->queue, LOOPBACK_MODE(efx),
+			  state->packet_count);
+
+		ef4_iterate_state(efx);
+		begin_rc = ef4_begin_loopback(tx_queue);
+
+		/* This will normally complete very quickly, but be
+		 * prepared to wait much longer. */
+		msleep(1);
+		if (!ef4_poll_loopback(efx)) {
+			msleep(LOOPBACK_TIMEOUT_MS);
+			ef4_poll_loopback(efx);
+		}
+
+		end_rc = ef4_end_loopback(tx_queue, lb_tests);
+		kfree(state->skbs);
+
+		if (begin_rc || end_rc) {
+			/* Wait a while to ensure there are no packets
+			 * floating around after a failure. */
+			schedule_timeout_uninterruptible(HZ / 10);
+			return begin_rc ? begin_rc : end_rc;
+		}
+	}
+
+	netif_dbg(efx, drv, efx->net_dev,
+		  "TX queue %d passed %s loopback test with a burst length "
+		  "of %d packets\n", tx_queue->queue, LOOPBACK_MODE(efx),
+		  state->packet_count);
+
+	return 0;
+}
+
+/* Wait for link up. On Falcon, we would prefer to rely on ef4_monitor, but
+ * any contention on the mac lock (via e.g. ef4_mac_mcast_work) causes it
+ * to delay and retry. Therefore, it's safer to just poll directly. Wait
+ * for link up and any faults to dissipate. */
+static int ef4_wait_for_link(struct ef4_nic *efx)
+{
+	struct ef4_link_state *link_state = &efx->link_state;
+	int count, link_up_count = 0;
+	bool link_up;
+
+	for (count = 0; count < 40; count++) {
+		schedule_timeout_uninterruptible(HZ / 10);
+
+		if (efx->type->monitor != NULL) {
+			mutex_lock(&efx->mac_lock);
+			efx->type->monitor(efx);
+			mutex_unlock(&efx->mac_lock);
+		}
+
+		mutex_lock(&efx->mac_lock);
+		link_up = link_state->up;
+		if (link_up)
+			link_up = !efx->type->check_mac_fault(efx);
+		mutex_unlock(&efx->mac_lock);
+
+		if (link_up) {
+			if (++link_up_count == 2)
+				return 0;
+		} else {
+			link_up_count = 0;
+		}
+	}
+
+	return -ETIMEDOUT;
+}
+
+static int ef4_test_loopbacks(struct ef4_nic *efx, struct ef4_self_tests *tests,
+			      unsigned int loopback_modes)
+{
+	enum ef4_loopback_mode mode;
+	struct ef4_loopback_state *state;
+	struct ef4_channel *channel =
+		ef4_get_channel(efx, efx->tx_channel_offset);
+	struct ef4_tx_queue *tx_queue;
+	int rc = 0;
+
+	/* Set the port loopback_selftest member. From this point on
+	 * all received packets will be dropped. Mark the state as
+	 * "flushing" so all inflight packets are dropped */
+	state = kzalloc(sizeof(*state), GFP_KERNEL);
+	if (state == NULL)
+		return -ENOMEM;
+	BUG_ON(efx->loopback_selftest);
+	state->flush = true;
+	efx->loopback_selftest = state;
+
+	/* Test all supported loopback modes */
+	for (mode = LOOPBACK_NONE; mode <= LOOPBACK_TEST_MAX; mode++) {
+		if (!(loopback_modes & (1 << mode)))
+			continue;
+
+		/* Move the port into the specified loopback mode. */
+		state->flush = true;
+		mutex_lock(&efx->mac_lock);
+		efx->loopback_mode = mode;
+		rc = __ef4_reconfigure_port(efx);
+		mutex_unlock(&efx->mac_lock);
+		if (rc) {
+			netif_err(efx, drv, efx->net_dev,
+				  "unable to move into %s loopback\n",
+				  LOOPBACK_MODE(efx));
+			goto out;
+		}
+
+		rc = ef4_wait_for_link(efx);
+		if (rc) {
+			netif_err(efx, drv, efx->net_dev,
+				  "loopback %s never came up\n",
+				  LOOPBACK_MODE(efx));
+			goto out;
+		}
+
+		/* Test all enabled types of TX queue */
+		ef4_for_each_channel_tx_queue(tx_queue, channel) {
+			state->offload_csum = (tx_queue->queue &
+					       EF4_TXQ_TYPE_OFFLOAD);
+			rc = ef4_test_loopback(tx_queue,
+					       &tests->loopback[mode]);
+			if (rc)
+				goto out;
+		}
+	}
+
+ out:
+	/* Remove the flush. The caller will remove the loopback setting */
+	state->flush = true;
+	efx->loopback_selftest = NULL;
+	wmb();
+	kfree(state);
+
+	if (rc == -EPERM)
+		rc = 0;
+
+	return rc;
+}
+
+/**************************************************************************
+ *
+ * Entry point
+ *
+ *************************************************************************/
+
+int ef4_selftest(struct ef4_nic *efx, struct ef4_self_tests *tests,
+		 unsigned flags)
+{
+	enum ef4_loopback_mode loopback_mode = efx->loopback_mode;
+	int phy_mode = efx->phy_mode;
+	int rc_test = 0, rc_reset, rc;
+
+	ef4_selftest_async_cancel(efx);
+
+	/* Online (i.e. non-disruptive) testing
+	 * This checks interrupt generation, event delivery and PHY presence. */
+
+	rc = ef4_test_phy_alive(efx, tests);
+	if (rc && !rc_test)
+		rc_test = rc;
+
+	rc = ef4_test_nvram(efx, tests);
+	if (rc && !rc_test)
+		rc_test = rc;
+
+	rc = ef4_test_interrupts(efx, tests);
+	if (rc && !rc_test)
+		rc_test = rc;
+
+	rc = ef4_test_eventq_irq(efx, tests);
+	if (rc && !rc_test)
+		rc_test = rc;
+
+	if (rc_test)
+		return rc_test;
+
+	if (!(flags & ETH_TEST_FL_OFFLINE))
+		return ef4_test_phy(efx, tests, flags);
+
+	/* Offline (i.e. disruptive) testing
+	 * This checks MAC and PHY loopback on the specified port. */
+
+	/* Detach the device so the kernel doesn't transmit during the
+	 * loopback test and the watchdog timeout doesn't fire.
+	 */
+	ef4_device_detach_sync(efx);
+
+	if (efx->type->test_chip) {
+		rc_reset = efx->type->test_chip(efx, tests);
+		if (rc_reset) {
+			netif_err(efx, hw, efx->net_dev,
+				  "Unable to recover from chip test\n");
+			ef4_schedule_reset(efx, RESET_TYPE_DISABLE);
+			return rc_reset;
+		}
+
+		if ((tests->memory < 0 || tests->registers < 0) && !rc_test)
+			rc_test = -EIO;
+	}
+
+	/* Ensure that the phy is powered and out of loopback
+	 * for the bist and loopback tests */
+	mutex_lock(&efx->mac_lock);
+	efx->phy_mode &= ~PHY_MODE_LOW_POWER;
+	efx->loopback_mode = LOOPBACK_NONE;
+	__ef4_reconfigure_port(efx);
+	mutex_unlock(&efx->mac_lock);
+
+	rc = ef4_test_phy(efx, tests, flags);
+	if (rc && !rc_test)
+		rc_test = rc;
+
+	rc = ef4_test_loopbacks(efx, tests, efx->loopback_modes);
+	if (rc && !rc_test)
+		rc_test = rc;
+
+	/* restore the PHY to the previous state */
+	mutex_lock(&efx->mac_lock);
+	efx->phy_mode = phy_mode;
+	efx->loopback_mode = loopback_mode;
+	__ef4_reconfigure_port(efx);
+	mutex_unlock(&efx->mac_lock);
+
+	netif_device_attach(efx->net_dev);
+
+	return rc_test;
+}
+
+void ef4_selftest_async_start(struct ef4_nic *efx)
+{
+	struct ef4_channel *channel;
+
+	ef4_for_each_channel(channel, efx)
+		ef4_nic_event_test_start(channel);
+	schedule_delayed_work(&efx->selftest_work, IRQ_TIMEOUT);
+}
+
+void ef4_selftest_async_cancel(struct ef4_nic *efx)
+{
+	cancel_delayed_work_sync(&efx->selftest_work);
+}
+
+void ef4_selftest_async_work(struct work_struct *data)
+{
+	struct ef4_nic *efx = container_of(data, struct ef4_nic,
+					   selftest_work.work);
+	struct ef4_channel *channel;
+	int cpu;
+
+	ef4_for_each_channel(channel, efx) {
+		cpu = ef4_nic_event_test_irq_cpu(channel);
+		if (cpu < 0)
+			netif_err(efx, ifup, efx->net_dev,
+				  "channel %d failed to trigger an interrupt\n",
+				  channel->channel);
+		else
+			netif_dbg(efx, ifup, efx->net_dev,
+				  "channel %d triggered interrupt on CPU %d\n",
+				  channel->channel, cpu);
+	}
+}
diff --git a/drivers/net/ethernet/sfc/falcon/selftest.h b/drivers/net/ethernet/sfc/falcon/selftest.h
new file mode 100644
index 000000000000..be52a49c006a
--- /dev/null
+++ b/drivers/net/ethernet/sfc/falcon/selftest.h
@@ -0,0 +1,55 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2012 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EF4_SELFTEST_H
+#define EF4_SELFTEST_H
+
+#include "net_driver.h"
+
+/*
+ * Self tests
+ */
+
+struct ef4_loopback_self_tests {
+	int tx_sent[EF4_TXQ_TYPES];
+	int tx_done[EF4_TXQ_TYPES];
+	int rx_good;
+	int rx_bad;
+};
+
+#define EF4_MAX_PHY_TESTS 20
+
+/* Efx self test results
+ * For fields which are not counters, 1 indicates success and -1
+ * indicates failure; 0 indicates test could not be run.
+ */
+struct ef4_self_tests {
+	/* online tests */
+	int phy_alive;
+	int nvram;
+	int interrupt;
+	int eventq_dma[EF4_MAX_CHANNELS];
+	int eventq_int[EF4_MAX_CHANNELS];
+	/* offline tests */
+	int memory;
+	int registers;
+	int phy_ext[EF4_MAX_PHY_TESTS];
+	struct ef4_loopback_self_tests loopback[LOOPBACK_TEST_MAX + 1];
+};
+
+void ef4_loopback_rx_packet(struct ef4_nic *efx, const char *buf_ptr,
+			    int pkt_len);
+int ef4_selftest(struct ef4_nic *efx, struct ef4_self_tests *tests,
+		 unsigned flags);
+void ef4_selftest_async_start(struct ef4_nic *efx);
+void ef4_selftest_async_cancel(struct ef4_nic *efx);
+void ef4_selftest_async_work(struct work_struct *data);
+
+#endif /* EF4_SELFTEST_H */
diff --git a/drivers/net/ethernet/sfc/tenxpress.c b/drivers/net/ethernet/sfc/falcon/tenxpress.c
index 2c90e6b31575..acc548a1c4d6 100644
--- a/drivers/net/ethernet/sfc/tenxpress.c
+++ b/drivers/net/ethernet/sfc/falcon/tenxpress.c
@@ -143,27 +143,27 @@
 #define LNPGA_PDOWN_WAIT	(HZ / 5)
 
 struct tenxpress_phy_data {
-	enum efx_loopback_mode loopback_mode;
-	enum efx_phy_mode phy_mode;
+	enum ef4_loopback_mode loopback_mode;
+	enum ef4_phy_mode phy_mode;
 	int bad_lp_tries;
 };
 
-static int tenxpress_init(struct efx_nic *efx)
+static int tenxpress_init(struct ef4_nic *efx)
 {
 	/* Enable 312.5 MHz clock */
-	efx_mdio_write(efx, MDIO_MMD_PCS, PCS_TEST_SELECT_REG,
+	ef4_mdio_write(efx, MDIO_MMD_PCS, PCS_TEST_SELECT_REG,
 		       1 << CLK312_EN_LBN);
 
 	/* Set the LEDs up as: Green = Link, Amber = Link/Act, Red = Off */
-	efx_mdio_set_flag(efx, MDIO_MMD_PMAPMD, PMA_PMD_LED_CTRL_REG,
+	ef4_mdio_set_flag(efx, MDIO_MMD_PMAPMD, PMA_PMD_LED_CTRL_REG,
 			  1 << PMA_PMA_LED_ACTIVITY_LBN, true);
-	efx_mdio_write(efx, MDIO_MMD_PMAPMD, PMA_PMD_LED_OVERR_REG,
+	ef4_mdio_write(efx, MDIO_MMD_PMAPMD, PMA_PMD_LED_OVERR_REG,
 		       SFX7101_PMA_PMD_LED_DEFAULT);
 
 	return 0;
 }
 
-static int tenxpress_phy_probe(struct efx_nic *efx)
+static int tenxpress_phy_probe(struct ef4_nic *efx)
 {
 	struct tenxpress_phy_data *phy_data;
 
@@ -185,18 +185,18 @@ static int tenxpress_phy_probe(struct efx_nic *efx)
 	return 0;
 }
 
-static int tenxpress_phy_init(struct efx_nic *efx)
+static int tenxpress_phy_init(struct ef4_nic *efx)
 {
 	int rc;
 
 	falcon_board(efx)->type->init_phy(efx);
 
 	if (!(efx->phy_mode & PHY_MODE_SPECIAL)) {
-		rc = efx_mdio_wait_reset_mmds(efx, TENXPRESS_REQUIRED_DEVS);
+		rc = ef4_mdio_wait_reset_mmds(efx, TENXPRESS_REQUIRED_DEVS);
 		if (rc < 0)
 			return rc;
 
-		rc = efx_mdio_check_mmds(efx, TENXPRESS_REQUIRED_DEVS);
+		rc = ef4_mdio_check_mmds(efx, TENXPRESS_REQUIRED_DEVS);
 		if (rc < 0)
 			return rc;
 	}
@@ -206,8 +206,8 @@ static int tenxpress_phy_init(struct efx_nic *efx)
 		return rc;
 
 	/* Reinitialise flow control settings */
-	efx_link_set_wanted_fc(efx, efx->wanted_fc);
-	efx_mdio_an_reconfigure(efx);
+	ef4_link_set_wanted_fc(efx, efx->wanted_fc);
+	ef4_mdio_an_reconfigure(efx);
 
 	schedule_timeout_uninterruptible(HZ / 5); /* 200ms */
 
@@ -220,7 +220,7 @@ static int tenxpress_phy_init(struct efx_nic *efx)
 /* Perform a "special software reset" on the PHY. The caller is
  * responsible for saving and restoring the PHY hardware registers
  * properly, and masking/unmasking LASI */
-static int tenxpress_special_reset(struct efx_nic *efx)
+static int tenxpress_special_reset(struct ef4_nic *efx)
 {
 	int rc, reg;
 
@@ -230,14 +230,14 @@ static int tenxpress_special_reset(struct efx_nic *efx)
 	falcon_stop_nic_stats(efx);
 
 	/* Initiate reset */
-	reg = efx_mdio_read(efx, MDIO_MMD_PMAPMD, PMA_PMD_XCONTROL_REG);
+	reg = ef4_mdio_read(efx, MDIO_MMD_PMAPMD, PMA_PMD_XCONTROL_REG);
 	reg |= (1 << PMA_PMD_EXT_SSR_LBN);
-	efx_mdio_write(efx, MDIO_MMD_PMAPMD, PMA_PMD_XCONTROL_REG, reg);
+	ef4_mdio_write(efx, MDIO_MMD_PMAPMD, PMA_PMD_XCONTROL_REG, reg);
 
 	mdelay(200);
 
 	/* Wait for the blocks to come out of reset */
-	rc = efx_mdio_wait_reset_mmds(efx, TENXPRESS_REQUIRED_DEVS);
+	rc = ef4_mdio_wait_reset_mmds(efx, TENXPRESS_REQUIRED_DEVS);
 	if (rc < 0)
 		goto out;
 
@@ -253,7 +253,7 @@ out:
 	return rc;
 }
 
-static void sfx7101_check_bad_lp(struct efx_nic *efx, bool link_ok)
+static void sfx7101_check_bad_lp(struct ef4_nic *efx, bool link_ok)
 {
 	struct tenxpress_phy_data *pd = efx->phy_data;
 	bool bad_lp;
@@ -263,7 +263,7 @@ static void sfx7101_check_bad_lp(struct efx_nic *efx, bool link_ok)
 		bad_lp = false;
 	} else {
 		/* Check that AN has started but not completed. */
-		reg = efx_mdio_read(efx, MDIO_MMD_AN, MDIO_STAT1);
+		reg = ef4_mdio_read(efx, MDIO_MMD_AN, MDIO_STAT1);
 		if (!(reg & MDIO_AN_STAT1_LPABLE))
 			return; /* LP status is unknown */
 		bad_lp = !(reg & MDIO_AN_STAT1_COMPLETE);
@@ -278,7 +278,7 @@ static void sfx7101_check_bad_lp(struct efx_nic *efx, bool link_ok)
 	/* Use the RX (red) LED as an error indicator once we've seen AN
 	 * failure several times in a row, and also log a message. */
 	if (!bad_lp || pd->bad_lp_tries == MAX_BAD_LP_TRIES) {
-		reg = efx_mdio_read(efx, MDIO_MMD_PMAPMD,
+		reg = ef4_mdio_read(efx, MDIO_MMD_PMAPMD,
 				    PMA_PMD_LED_OVERR_REG);
 		reg &= ~(PMA_PMD_LED_MASK << PMA_PMD_LED_RX_LBN);
 		if (!bad_lp) {
@@ -291,35 +291,35 @@ static void sfx7101_check_bad_lp(struct efx_nic *efx, bool link_ok)
 				  " supports 10GBASE-T ONLY, so no link can"
 				  " be established\n");
 		}
-		efx_mdio_write(efx, MDIO_MMD_PMAPMD,
+		ef4_mdio_write(efx, MDIO_MMD_PMAPMD,
 			       PMA_PMD_LED_OVERR_REG, reg);
 		pd->bad_lp_tries = bad_lp;
 	}
 }
 
-static bool sfx7101_link_ok(struct efx_nic *efx)
+static bool sfx7101_link_ok(struct ef4_nic *efx)
 {
-	return efx_mdio_links_ok(efx,
+	return ef4_mdio_links_ok(efx,
 				 MDIO_DEVS_PMAPMD |
 				 MDIO_DEVS_PCS |
 				 MDIO_DEVS_PHYXS);
 }
 
-static void tenxpress_ext_loopback(struct efx_nic *efx)
+static void tenxpress_ext_loopback(struct ef4_nic *efx)
 {
-	efx_mdio_set_flag(efx, MDIO_MMD_PHYXS, PHYXS_TEST1,
+	ef4_mdio_set_flag(efx, MDIO_MMD_PHYXS, PHYXS_TEST1,
 			  1 << LOOPBACK_NEAR_LBN,
 			  efx->loopback_mode == LOOPBACK_PHYXS);
 }
 
-static void tenxpress_low_power(struct efx_nic *efx)
+static void tenxpress_low_power(struct ef4_nic *efx)
 {
-	efx_mdio_set_mmds_lpower(
+	ef4_mdio_set_mmds_lpower(
 		efx, !!(efx->phy_mode & PHY_MODE_LOW_POWER),
 		TENXPRESS_REQUIRED_DEVS);
 }
 
-static int tenxpress_phy_reconfigure(struct efx_nic *efx)
+static int tenxpress_phy_reconfigure(struct ef4_nic *efx)
 {
 	struct tenxpress_phy_data *phy_data = efx->phy_data;
 	bool phy_mode_change, loop_reset;
@@ -340,10 +340,10 @@ static int tenxpress_phy_reconfigure(struct efx_nic *efx)
 	}
 
 	tenxpress_low_power(efx);
-	efx_mdio_transmit_disable(efx);
-	efx_mdio_phy_reconfigure(efx);
+	ef4_mdio_transmit_disable(efx);
+	ef4_mdio_phy_reconfigure(efx);
 	tenxpress_ext_loopback(efx);
-	efx_mdio_an_reconfigure(efx);
+	ef4_mdio_an_reconfigure(efx);
 
 	phy_data->loopback_mode = efx->loopback_mode;
 	phy_data->phy_mode = efx->phy_mode;
@@ -352,30 +352,30 @@ static int tenxpress_phy_reconfigure(struct efx_nic *efx)
 }
 
 static void
-tenxpress_get_settings(struct efx_nic *efx, struct ethtool_cmd *ecmd);
+tenxpress_get_settings(struct ef4_nic *efx, struct ethtool_cmd *ecmd);
 
 /* Poll for link state changes */
-static bool tenxpress_phy_poll(struct efx_nic *efx)
+static bool tenxpress_phy_poll(struct ef4_nic *efx)
 {
-	struct efx_link_state old_state = efx->link_state;
+	struct ef4_link_state old_state = efx->link_state;
 
 	efx->link_state.up = sfx7101_link_ok(efx);
 	efx->link_state.speed = 10000;
 	efx->link_state.fd = true;
-	efx->link_state.fc = efx_mdio_get_pause(efx);
+	efx->link_state.fc = ef4_mdio_get_pause(efx);
 
 	sfx7101_check_bad_lp(efx, efx->link_state.up);
 
-	return !efx_link_state_equal(&efx->link_state, &old_state);
+	return !ef4_link_state_equal(&efx->link_state, &old_state);
 }
 
-static void sfx7101_phy_fini(struct efx_nic *efx)
+static void sfx7101_phy_fini(struct ef4_nic *efx)
 {
 	int reg;
 
 	/* Power down the LNPGA */
 	reg = (1 << PMA_PMD_LNPGA_POWERDOWN_LBN);
-	efx_mdio_write(efx, MDIO_MMD_PMAPMD, PMA_PMD_XCONTROL_REG, reg);
+	ef4_mdio_write(efx, MDIO_MMD_PMAPMD, PMA_PMD_XCONTROL_REG, reg);
 
 	/* Waiting here ensures that the board fini, which can turn
 	 * off the power to the PHY, won't get run until the LNPGA
@@ -383,7 +383,7 @@ static void sfx7101_phy_fini(struct efx_nic *efx)
 	schedule_timeout_uninterruptible(LNPGA_PDOWN_WAIT); /* 200 ms */
 }
 
-static void tenxpress_phy_remove(struct efx_nic *efx)
+static void tenxpress_phy_remove(struct ef4_nic *efx)
 {
 	kfree(efx->phy_data);
 	efx->phy_data = NULL;
@@ -391,17 +391,17 @@ static void tenxpress_phy_remove(struct efx_nic *efx)
 
 
 /* Override the RX, TX and link LEDs */
-void tenxpress_set_id_led(struct efx_nic *efx, enum efx_led_mode mode)
+void tenxpress_set_id_led(struct ef4_nic *efx, enum ef4_led_mode mode)
 {
 	int reg;
 
 	switch (mode) {
-	case EFX_LED_OFF:
+	case EF4_LED_OFF:
 		reg = (PMA_PMD_LED_OFF << PMA_PMD_LED_TX_LBN) |
 			(PMA_PMD_LED_OFF << PMA_PMD_LED_RX_LBN) |
 			(PMA_PMD_LED_OFF << PMA_PMD_LED_LINK_LBN);
 		break;
-	case EFX_LED_ON:
+	case EF4_LED_ON:
 		reg = (PMA_PMD_LED_ON << PMA_PMD_LED_TX_LBN) |
 			(PMA_PMD_LED_ON << PMA_PMD_LED_RX_LBN) |
 			(PMA_PMD_LED_ON << PMA_PMD_LED_LINK_LBN);
@@ -411,14 +411,14 @@ void tenxpress_set_id_led(struct efx_nic *efx, enum efx_led_mode mode)
 		break;
 	}
 
-	efx_mdio_write(efx, MDIO_MMD_PMAPMD, PMA_PMD_LED_OVERR_REG, reg);
+	ef4_mdio_write(efx, MDIO_MMD_PMAPMD, PMA_PMD_LED_OVERR_REG, reg);
 }
 
 static const char *const sfx7101_test_names[] = {
 	"bist"
 };
 
-static const char *sfx7101_test_name(struct efx_nic *efx, unsigned int index)
+static const char *sfx7101_test_name(struct ef4_nic *efx, unsigned int index)
 {
 	if (index < ARRAY_SIZE(sfx7101_test_names))
 		return sfx7101_test_names[index];
@@ -426,7 +426,7 @@ static const char *sfx7101_test_name(struct efx_nic *efx, unsigned int index)
 }
 
 static int
-sfx7101_run_tests(struct efx_nic *efx, int *results, unsigned flags)
+sfx7101_run_tests(struct ef4_nic *efx, int *results, unsigned flags)
 {
 	int rc;
 
@@ -437,21 +437,21 @@ sfx7101_run_tests(struct efx_nic *efx, int *results, unsigned flags)
 	rc = tenxpress_special_reset(efx);
 	results[0] = rc ? -1 : 1;
 
-	efx_mdio_an_reconfigure(efx);
+	ef4_mdio_an_reconfigure(efx);
 
 	return rc;
 }
 
 static void
-tenxpress_get_settings(struct efx_nic *efx, struct ethtool_cmd *ecmd)
+tenxpress_get_settings(struct ef4_nic *efx, struct ethtool_cmd *ecmd)
 {
 	u32 adv = 0, lpa = 0;
 	int reg;
 
-	reg = efx_mdio_read(efx, MDIO_MMD_AN, MDIO_AN_10GBT_CTRL);
+	reg = ef4_mdio_read(efx, MDIO_MMD_AN, MDIO_AN_10GBT_CTRL);
 	if (reg & MDIO_AN_10GBT_CTRL_ADV10G)
 		adv |= ADVERTISED_10000baseT_Full;
-	reg = efx_mdio_read(efx, MDIO_MMD_AN, MDIO_AN_10GBT_STAT);
+	reg = ef4_mdio_read(efx, MDIO_MMD_AN, MDIO_AN_10GBT_STAT);
 	if (reg & MDIO_AN_10GBT_STAT_LP10G)
 		lpa |= ADVERTISED_10000baseT_Full;
 
@@ -463,22 +463,22 @@ tenxpress_get_settings(struct efx_nic *efx, struct ethtool_cmd *ecmd)
 		ethtool_cmd_speed_set(ecmd, SPEED_10000);
 }
 
-static int tenxpress_set_settings(struct efx_nic *efx, struct ethtool_cmd *ecmd)
+static int tenxpress_set_settings(struct ef4_nic *efx, struct ethtool_cmd *ecmd)
 {
 	if (!ecmd->autoneg)
 		return -EINVAL;
 
-	return efx_mdio_set_settings(efx, ecmd);
+	return ef4_mdio_set_settings(efx, ecmd);
 }
 
-static void sfx7101_set_npage_adv(struct efx_nic *efx, u32 advertising)
+static void sfx7101_set_npage_adv(struct ef4_nic *efx, u32 advertising)
 {
-	efx_mdio_set_flag(efx, MDIO_MMD_AN, MDIO_AN_10GBT_CTRL,
+	ef4_mdio_set_flag(efx, MDIO_MMD_AN, MDIO_AN_10GBT_CTRL,
 			  MDIO_AN_10GBT_CTRL_ADV10G,
 			  advertising & ADVERTISED_10000baseT_Full);
 }
 
-const struct efx_phy_operations falcon_sfx7101_phy_ops = {
+const struct ef4_phy_operations falcon_sfx7101_phy_ops = {
 	.probe		  = tenxpress_phy_probe,
 	.init             = tenxpress_phy_init,
 	.reconfigure      = tenxpress_phy_reconfigure,
@@ -488,7 +488,7 @@ const struct efx_phy_operations falcon_sfx7101_phy_ops = {
 	.get_settings	  = tenxpress_get_settings,
 	.set_settings	  = tenxpress_set_settings,
 	.set_npage_adv    = sfx7101_set_npage_adv,
-	.test_alive	  = efx_mdio_test_alive,
+	.test_alive	  = ef4_mdio_test_alive,
 	.test_name	  = sfx7101_test_name,
 	.run_tests	  = sfx7101_run_tests,
 };
diff --git a/drivers/net/ethernet/sfc/falcon/tx.c b/drivers/net/ethernet/sfc/falcon/tx.c
new file mode 100644
index 000000000000..104fb15a73f2
--- /dev/null
+++ b/drivers/net/ethernet/sfc/falcon/tx.c
@@ -0,0 +1,649 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2005-2013 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/pci.h>
+#include <linux/tcp.h>
+#include <linux/ip.h>
+#include <linux/in.h>
+#include <linux/ipv6.h>
+#include <linux/slab.h>
+#include <net/ipv6.h>
+#include <linux/if_ether.h>
+#include <linux/highmem.h>
+#include <linux/cache.h>
+#include "net_driver.h"
+#include "efx.h"
+#include "io.h"
+#include "nic.h"
+#include "tx.h"
+#include "workarounds.h"
+
+static inline u8 *ef4_tx_get_copy_buffer(struct ef4_tx_queue *tx_queue,
+					 struct ef4_tx_buffer *buffer)
+{
+	unsigned int index = ef4_tx_queue_get_insert_index(tx_queue);
+	struct ef4_buffer *page_buf =
+		&tx_queue->cb_page[index >> (PAGE_SHIFT - EF4_TX_CB_ORDER)];
+	unsigned int offset =
+		((index << EF4_TX_CB_ORDER) + NET_IP_ALIGN) & (PAGE_SIZE - 1);
+
+	if (unlikely(!page_buf->addr) &&
+	    ef4_nic_alloc_buffer(tx_queue->efx, page_buf, PAGE_SIZE,
+				 GFP_ATOMIC))
+		return NULL;
+	buffer->dma_addr = page_buf->dma_addr + offset;
+	buffer->unmap_len = 0;
+	return (u8 *)page_buf->addr + offset;
+}
+
+u8 *ef4_tx_get_copy_buffer_limited(struct ef4_tx_queue *tx_queue,
+				   struct ef4_tx_buffer *buffer, size_t len)
+{
+	if (len > EF4_TX_CB_SIZE)
+		return NULL;
+	return ef4_tx_get_copy_buffer(tx_queue, buffer);
+}
+
+static void ef4_dequeue_buffer(struct ef4_tx_queue *tx_queue,
+			       struct ef4_tx_buffer *buffer,
+			       unsigned int *pkts_compl,
+			       unsigned int *bytes_compl)
+{
+	if (buffer->unmap_len) {
+		struct device *dma_dev = &tx_queue->efx->pci_dev->dev;
+		dma_addr_t unmap_addr = buffer->dma_addr - buffer->dma_offset;
+		if (buffer->flags & EF4_TX_BUF_MAP_SINGLE)
+			dma_unmap_single(dma_dev, unmap_addr, buffer->unmap_len,
+					 DMA_TO_DEVICE);
+		else
+			dma_unmap_page(dma_dev, unmap_addr, buffer->unmap_len,
+				       DMA_TO_DEVICE);
+		buffer->unmap_len = 0;
+	}
+
+	if (buffer->flags & EF4_TX_BUF_SKB) {
+		(*pkts_compl)++;
+		(*bytes_compl) += buffer->skb->len;
+		dev_consume_skb_any((struct sk_buff *)buffer->skb);
+		netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev,
+			   "TX queue %d transmission id %x complete\n",
+			   tx_queue->queue, tx_queue->read_count);
+	}
+
+	buffer->len = 0;
+	buffer->flags = 0;
+}
+
+unsigned int ef4_tx_max_skb_descs(struct ef4_nic *efx)
+{
+	/* This is probably too much since we don't have any TSO support;
+	 * it's a left-over from when we had Software TSO.  But it's safer
+	 * to leave it as-is than try to determine a new bound.
+	 */
+	/* Header and payload descriptor for each output segment, plus
+	 * one for every input fragment boundary within a segment
+	 */
+	unsigned int max_descs = EF4_TSO_MAX_SEGS * 2 + MAX_SKB_FRAGS;
+
+	/* Possibly one more per segment for the alignment workaround,
+	 * or for option descriptors
+	 */
+	if (EF4_WORKAROUND_5391(efx))
+		max_descs += EF4_TSO_MAX_SEGS;
+
+	/* Possibly more for PCIe page boundaries within input fragments */
+	if (PAGE_SIZE > EF4_PAGE_SIZE)
+		max_descs += max_t(unsigned int, MAX_SKB_FRAGS,
+				   DIV_ROUND_UP(GSO_MAX_SIZE, EF4_PAGE_SIZE));
+
+	return max_descs;
+}
+
+static void ef4_tx_maybe_stop_queue(struct ef4_tx_queue *txq1)
+{
+	/* We need to consider both queues that the net core sees as one */
+	struct ef4_tx_queue *txq2 = ef4_tx_queue_partner(txq1);
+	struct ef4_nic *efx = txq1->efx;
+	unsigned int fill_level;
+
+	fill_level = max(txq1->insert_count - txq1->old_read_count,
+			 txq2->insert_count - txq2->old_read_count);
+	if (likely(fill_level < efx->txq_stop_thresh))
+		return;
+
+	/* We used the stale old_read_count above, which gives us a
+	 * pessimistic estimate of the fill level (which may even
+	 * validly be >= efx->txq_entries).  Now try again using
+	 * read_count (more likely to be a cache miss).
+	 *
+	 * If we read read_count and then conditionally stop the
+	 * queue, it is possible for the completion path to race with
+	 * us and complete all outstanding descriptors in the middle,
+	 * after which there will be no more completions to wake it.
+	 * Therefore we stop the queue first, then read read_count
+	 * (with a memory barrier to ensure the ordering), then
+	 * restart the queue if the fill level turns out to be low
+	 * enough.
+	 */
+	netif_tx_stop_queue(txq1->core_txq);
+	smp_mb();
+	txq1->old_read_count = ACCESS_ONCE(txq1->read_count);
+	txq2->old_read_count = ACCESS_ONCE(txq2->read_count);
+
+	fill_level = max(txq1->insert_count - txq1->old_read_count,
+			 txq2->insert_count - txq2->old_read_count);
+	EF4_BUG_ON_PARANOID(fill_level >= efx->txq_entries);
+	if (likely(fill_level < efx->txq_stop_thresh)) {
+		smp_mb();
+		if (likely(!efx->loopback_selftest))
+			netif_tx_start_queue(txq1->core_txq);
+	}
+}
+
+static int ef4_enqueue_skb_copy(struct ef4_tx_queue *tx_queue,
+				struct sk_buff *skb)
+{
+	unsigned int min_len = tx_queue->tx_min_size;
+	unsigned int copy_len = skb->len;
+	struct ef4_tx_buffer *buffer;
+	u8 *copy_buffer;
+	int rc;
+
+	EF4_BUG_ON_PARANOID(copy_len > EF4_TX_CB_SIZE);
+
+	buffer = ef4_tx_queue_get_insert_buffer(tx_queue);
+
+	copy_buffer = ef4_tx_get_copy_buffer(tx_queue, buffer);
+	if (unlikely(!copy_buffer))
+		return -ENOMEM;
+
+	rc = skb_copy_bits(skb, 0, copy_buffer, copy_len);
+	EF4_WARN_ON_PARANOID(rc);
+	if (unlikely(copy_len < min_len)) {
+		memset(copy_buffer + copy_len, 0, min_len - copy_len);
+		buffer->len = min_len;
+	} else {
+		buffer->len = copy_len;
+	}
+
+	buffer->skb = skb;
+	buffer->flags = EF4_TX_BUF_SKB;
+
+	++tx_queue->insert_count;
+	return rc;
+}
+
+static struct ef4_tx_buffer *ef4_tx_map_chunk(struct ef4_tx_queue *tx_queue,
+					      dma_addr_t dma_addr,
+					      size_t len)
+{
+	const struct ef4_nic_type *nic_type = tx_queue->efx->type;
+	struct ef4_tx_buffer *buffer;
+	unsigned int dma_len;
+
+	/* Map the fragment taking account of NIC-dependent DMA limits. */
+	do {
+		buffer = ef4_tx_queue_get_insert_buffer(tx_queue);
+		dma_len = nic_type->tx_limit_len(tx_queue, dma_addr, len);
+
+		buffer->len = dma_len;
+		buffer->dma_addr = dma_addr;
+		buffer->flags = EF4_TX_BUF_CONT;
+		len -= dma_len;
+		dma_addr += dma_len;
+		++tx_queue->insert_count;
+	} while (len);
+
+	return buffer;
+}
+
+/* Map all data from an SKB for DMA and create descriptors on the queue.
+ */
+static int ef4_tx_map_data(struct ef4_tx_queue *tx_queue, struct sk_buff *skb)
+{
+	struct ef4_nic *efx = tx_queue->efx;
+	struct device *dma_dev = &efx->pci_dev->dev;
+	unsigned int frag_index, nr_frags;
+	dma_addr_t dma_addr, unmap_addr;
+	unsigned short dma_flags;
+	size_t len, unmap_len;
+
+	nr_frags = skb_shinfo(skb)->nr_frags;
+	frag_index = 0;
+
+	/* Map header data. */
+	len = skb_headlen(skb);
+	dma_addr = dma_map_single(dma_dev, skb->data, len, DMA_TO_DEVICE);
+	dma_flags = EF4_TX_BUF_MAP_SINGLE;
+	unmap_len = len;
+	unmap_addr = dma_addr;
+
+	if (unlikely(dma_mapping_error(dma_dev, dma_addr)))
+		return -EIO;
+
+	/* Add descriptors for each fragment. */
+	do {
+		struct ef4_tx_buffer *buffer;
+		skb_frag_t *fragment;
+
+		buffer = ef4_tx_map_chunk(tx_queue, dma_addr, len);
+
+		/* The final descriptor for a fragment is responsible for
+		 * unmapping the whole fragment.
+		 */
+		buffer->flags = EF4_TX_BUF_CONT | dma_flags;
+		buffer->unmap_len = unmap_len;
+		buffer->dma_offset = buffer->dma_addr - unmap_addr;
+
+		if (frag_index >= nr_frags) {
+			/* Store SKB details with the final buffer for
+			 * the completion.
+			 */
+			buffer->skb = skb;
+			buffer->flags = EF4_TX_BUF_SKB | dma_flags;
+			return 0;
+		}
+
+		/* Move on to the next fragment. */
+		fragment = &skb_shinfo(skb)->frags[frag_index++];
+		len = skb_frag_size(fragment);
+		dma_addr = skb_frag_dma_map(dma_dev, fragment,
+				0, len, DMA_TO_DEVICE);
+		dma_flags = 0;
+		unmap_len = len;
+		unmap_addr = dma_addr;
+
+		if (unlikely(dma_mapping_error(dma_dev, dma_addr)))
+			return -EIO;
+	} while (1);
+}
+
+/* Remove buffers put into a tx_queue.  None of the buffers must have
+ * an skb attached.
+ */
+static void ef4_enqueue_unwind(struct ef4_tx_queue *tx_queue)
+{
+	struct ef4_tx_buffer *buffer;
+
+	/* Work backwards until we hit the original insert pointer value */
+	while (tx_queue->insert_count != tx_queue->write_count) {
+		--tx_queue->insert_count;
+		buffer = __ef4_tx_queue_get_insert_buffer(tx_queue);
+		ef4_dequeue_buffer(tx_queue, buffer, NULL, NULL);
+	}
+}
+
+/*
+ * Add a socket buffer to a TX queue
+ *
+ * This maps all fragments of a socket buffer for DMA and adds them to
+ * the TX queue.  The queue's insert pointer will be incremented by
+ * the number of fragments in the socket buffer.
+ *
+ * If any DMA mapping fails, any mapped fragments will be unmapped,
+ * the queue's insert pointer will be restored to its original value.
+ *
+ * This function is split out from ef4_hard_start_xmit to allow the
+ * loopback test to direct packets via specific TX queues.
+ *
+ * Returns NETDEV_TX_OK.
+ * You must hold netif_tx_lock() to call this function.
+ */
+netdev_tx_t ef4_enqueue_skb(struct ef4_tx_queue *tx_queue, struct sk_buff *skb)
+{
+	bool data_mapped = false;
+	unsigned int skb_len;
+
+	skb_len = skb->len;
+	EF4_WARN_ON_PARANOID(skb_is_gso(skb));
+
+	if (skb_len < tx_queue->tx_min_size ||
+			(skb->data_len && skb_len <= EF4_TX_CB_SIZE)) {
+		/* Pad short packets or coalesce short fragmented packets. */
+		if (ef4_enqueue_skb_copy(tx_queue, skb))
+			goto err;
+		tx_queue->cb_packets++;
+		data_mapped = true;
+	}
+
+	/* Map for DMA and create descriptors if we haven't done so already. */
+	if (!data_mapped && (ef4_tx_map_data(tx_queue, skb)))
+		goto err;
+
+	/* Update BQL */
+	netdev_tx_sent_queue(tx_queue->core_txq, skb_len);
+
+	/* Pass off to hardware */
+	if (!skb->xmit_more || netif_xmit_stopped(tx_queue->core_txq)) {
+		struct ef4_tx_queue *txq2 = ef4_tx_queue_partner(tx_queue);
+
+		/* There could be packets left on the partner queue if those
+		 * SKBs had skb->xmit_more set. If we do not push those they
+		 * could be left for a long time and cause a netdev watchdog.
+		 */
+		if (txq2->xmit_more_available)
+			ef4_nic_push_buffers(txq2);
+
+		ef4_nic_push_buffers(tx_queue);
+	} else {
+		tx_queue->xmit_more_available = skb->xmit_more;
+	}
+
+	tx_queue->tx_packets++;
+
+	ef4_tx_maybe_stop_queue(tx_queue);
+
+	return NETDEV_TX_OK;
+
+
+err:
+	ef4_enqueue_unwind(tx_queue);
+	dev_kfree_skb_any(skb);
+	return NETDEV_TX_OK;
+}
+
+/* Remove packets from the TX queue
+ *
+ * This removes packets from the TX queue, up to and including the
+ * specified index.
+ */
+static void ef4_dequeue_buffers(struct ef4_tx_queue *tx_queue,
+				unsigned int index,
+				unsigned int *pkts_compl,
+				unsigned int *bytes_compl)
+{
+	struct ef4_nic *efx = tx_queue->efx;
+	unsigned int stop_index, read_ptr;
+
+	stop_index = (index + 1) & tx_queue->ptr_mask;
+	read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
+
+	while (read_ptr != stop_index) {
+		struct ef4_tx_buffer *buffer = &tx_queue->buffer[read_ptr];
+
+		if (!(buffer->flags & EF4_TX_BUF_OPTION) &&
+		    unlikely(buffer->len == 0)) {
+			netif_err(efx, tx_err, efx->net_dev,
+				  "TX queue %d spurious TX completion id %x\n",
+				  tx_queue->queue, read_ptr);
+			ef4_schedule_reset(efx, RESET_TYPE_TX_SKIP);
+			return;
+		}
+
+		ef4_dequeue_buffer(tx_queue, buffer, pkts_compl, bytes_compl);
+
+		++tx_queue->read_count;
+		read_ptr = tx_queue->read_count & tx_queue->ptr_mask;
+	}
+}
+
+/* Initiate a packet transmission.  We use one channel per CPU
+ * (sharing when we have more CPUs than channels).  On Falcon, the TX
+ * completion events will be directed back to the CPU that transmitted
+ * the packet, which should be cache-efficient.
+ *
+ * Context: non-blocking.
+ * Note that returning anything other than NETDEV_TX_OK will cause the
+ * OS to free the skb.
+ */
+netdev_tx_t ef4_hard_start_xmit(struct sk_buff *skb,
+				struct net_device *net_dev)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+	struct ef4_tx_queue *tx_queue;
+	unsigned index, type;
+
+	EF4_WARN_ON_PARANOID(!netif_device_present(net_dev));
+
+	index = skb_get_queue_mapping(skb);
+	type = skb->ip_summed == CHECKSUM_PARTIAL ? EF4_TXQ_TYPE_OFFLOAD : 0;
+	if (index >= efx->n_tx_channels) {
+		index -= efx->n_tx_channels;
+		type |= EF4_TXQ_TYPE_HIGHPRI;
+	}
+	tx_queue = ef4_get_tx_queue(efx, index, type);
+
+	return ef4_enqueue_skb(tx_queue, skb);
+}
+
+void ef4_init_tx_queue_core_txq(struct ef4_tx_queue *tx_queue)
+{
+	struct ef4_nic *efx = tx_queue->efx;
+
+	/* Must be inverse of queue lookup in ef4_hard_start_xmit() */
+	tx_queue->core_txq =
+		netdev_get_tx_queue(efx->net_dev,
+				    tx_queue->queue / EF4_TXQ_TYPES +
+				    ((tx_queue->queue & EF4_TXQ_TYPE_HIGHPRI) ?
+				     efx->n_tx_channels : 0));
+}
+
+int ef4_setup_tc(struct net_device *net_dev, u32 handle, __be16 proto,
+		 struct tc_to_netdev *ntc)
+{
+	struct ef4_nic *efx = netdev_priv(net_dev);
+	struct ef4_channel *channel;
+	struct ef4_tx_queue *tx_queue;
+	unsigned tc, num_tc;
+	int rc;
+
+	if (ntc->type != TC_SETUP_MQPRIO)
+		return -EINVAL;
+
+	num_tc = ntc->tc;
+
+	if (ef4_nic_rev(efx) < EF4_REV_FALCON_B0 || num_tc > EF4_MAX_TX_TC)
+		return -EINVAL;
+
+	if (num_tc == net_dev->num_tc)
+		return 0;
+
+	for (tc = 0; tc < num_tc; tc++) {
+		net_dev->tc_to_txq[tc].offset = tc * efx->n_tx_channels;
+		net_dev->tc_to_txq[tc].count = efx->n_tx_channels;
+	}
+
+	if (num_tc > net_dev->num_tc) {
+		/* Initialise high-priority queues as necessary */
+		ef4_for_each_channel(channel, efx) {
+			ef4_for_each_possible_channel_tx_queue(tx_queue,
+							       channel) {
+				if (!(tx_queue->queue & EF4_TXQ_TYPE_HIGHPRI))
+					continue;
+				if (!tx_queue->buffer) {
+					rc = ef4_probe_tx_queue(tx_queue);
+					if (rc)
+						return rc;
+				}
+				if (!tx_queue->initialised)
+					ef4_init_tx_queue(tx_queue);
+				ef4_init_tx_queue_core_txq(tx_queue);
+			}
+		}
+	} else {
+		/* Reduce number of classes before number of queues */
+		net_dev->num_tc = num_tc;
+	}
+
+	rc = netif_set_real_num_tx_queues(net_dev,
+					  max_t(int, num_tc, 1) *
+					  efx->n_tx_channels);
+	if (rc)
+		return rc;
+
+	/* Do not destroy high-priority queues when they become
+	 * unused.  We would have to flush them first, and it is
+	 * fairly difficult to flush a subset of TX queues.  Leave
+	 * it to ef4_fini_channels().
+	 */
+
+	net_dev->num_tc = num_tc;
+	return 0;
+}
+
+void ef4_xmit_done(struct ef4_tx_queue *tx_queue, unsigned int index)
+{
+	unsigned fill_level;
+	struct ef4_nic *efx = tx_queue->efx;
+	struct ef4_tx_queue *txq2;
+	unsigned int pkts_compl = 0, bytes_compl = 0;
+
+	EF4_BUG_ON_PARANOID(index > tx_queue->ptr_mask);
+
+	ef4_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl);
+	tx_queue->pkts_compl += pkts_compl;
+	tx_queue->bytes_compl += bytes_compl;
+
+	if (pkts_compl > 1)
+		++tx_queue->merge_events;
+
+	/* See if we need to restart the netif queue.  This memory
+	 * barrier ensures that we write read_count (inside
+	 * ef4_dequeue_buffers()) before reading the queue status.
+	 */
+	smp_mb();
+	if (unlikely(netif_tx_queue_stopped(tx_queue->core_txq)) &&
+	    likely(efx->port_enabled) &&
+	    likely(netif_device_present(efx->net_dev))) {
+		txq2 = ef4_tx_queue_partner(tx_queue);
+		fill_level = max(tx_queue->insert_count - tx_queue->read_count,
+				 txq2->insert_count - txq2->read_count);
+		if (fill_level <= efx->txq_wake_thresh)
+			netif_tx_wake_queue(tx_queue->core_txq);
+	}
+
+	/* Check whether the hardware queue is now empty */
+	if ((int)(tx_queue->read_count - tx_queue->old_write_count) >= 0) {
+		tx_queue->old_write_count = ACCESS_ONCE(tx_queue->write_count);
+		if (tx_queue->read_count == tx_queue->old_write_count) {
+			smp_mb();
+			tx_queue->empty_read_count =
+				tx_queue->read_count | EF4_EMPTY_COUNT_VALID;
+		}
+	}
+}
+
+static unsigned int ef4_tx_cb_page_count(struct ef4_tx_queue *tx_queue)
+{
+	return DIV_ROUND_UP(tx_queue->ptr_mask + 1, PAGE_SIZE >> EF4_TX_CB_ORDER);
+}
+
+int ef4_probe_tx_queue(struct ef4_tx_queue *tx_queue)
+{
+	struct ef4_nic *efx = tx_queue->efx;
+	unsigned int entries;
+	int rc;
+
+	/* Create the smallest power-of-two aligned ring */
+	entries = max(roundup_pow_of_two(efx->txq_entries), EF4_MIN_DMAQ_SIZE);
+	EF4_BUG_ON_PARANOID(entries > EF4_MAX_DMAQ_SIZE);
+	tx_queue->ptr_mask = entries - 1;
+
+	netif_dbg(efx, probe, efx->net_dev,
+		  "creating TX queue %d size %#x mask %#x\n",
+		  tx_queue->queue, efx->txq_entries, tx_queue->ptr_mask);
+
+	/* Allocate software ring */
+	tx_queue->buffer = kcalloc(entries, sizeof(*tx_queue->buffer),
+				   GFP_KERNEL);
+	if (!tx_queue->buffer)
+		return -ENOMEM;
+
+	tx_queue->cb_page = kcalloc(ef4_tx_cb_page_count(tx_queue),
+				    sizeof(tx_queue->cb_page[0]), GFP_KERNEL);
+	if (!tx_queue->cb_page) {
+		rc = -ENOMEM;
+		goto fail1;
+	}
+
+	/* Allocate hardware ring */
+	rc = ef4_nic_probe_tx(tx_queue);
+	if (rc)
+		goto fail2;
+
+	return 0;
+
+fail2:
+	kfree(tx_queue->cb_page);
+	tx_queue->cb_page = NULL;
+fail1:
+	kfree(tx_queue->buffer);
+	tx_queue->buffer = NULL;
+	return rc;
+}
+
+void ef4_init_tx_queue(struct ef4_tx_queue *tx_queue)
+{
+	struct ef4_nic *efx = tx_queue->efx;
+
+	netif_dbg(efx, drv, efx->net_dev,
+		  "initialising TX queue %d\n", tx_queue->queue);
+
+	tx_queue->insert_count = 0;
+	tx_queue->write_count = 0;
+	tx_queue->old_write_count = 0;
+	tx_queue->read_count = 0;
+	tx_queue->old_read_count = 0;
+	tx_queue->empty_read_count = 0 | EF4_EMPTY_COUNT_VALID;
+	tx_queue->xmit_more_available = false;
+
+	/* Some older hardware requires Tx writes larger than 32. */
+	tx_queue->tx_min_size = EF4_WORKAROUND_15592(efx) ? 33 : 0;
+
+	/* Set up TX descriptor ring */
+	ef4_nic_init_tx(tx_queue);
+
+	tx_queue->initialised = true;
+}
+
+void ef4_fini_tx_queue(struct ef4_tx_queue *tx_queue)
+{
+	struct ef4_tx_buffer *buffer;
+
+	netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev,
+		  "shutting down TX queue %d\n", tx_queue->queue);
+
+	if (!tx_queue->buffer)
+		return;
+
+	/* Free any buffers left in the ring */
+	while (tx_queue->read_count != tx_queue->write_count) {
+		unsigned int pkts_compl = 0, bytes_compl = 0;
+		buffer = &tx_queue->buffer[tx_queue->read_count & tx_queue->ptr_mask];
+		ef4_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
+
+		++tx_queue->read_count;
+	}
+	tx_queue->xmit_more_available = false;
+	netdev_tx_reset_queue(tx_queue->core_txq);
+}
+
+void ef4_remove_tx_queue(struct ef4_tx_queue *tx_queue)
+{
+	int i;
+
+	if (!tx_queue->buffer)
+		return;
+
+	netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev,
+		  "destroying TX queue %d\n", tx_queue->queue);
+	ef4_nic_remove_tx(tx_queue);
+
+	if (tx_queue->cb_page) {
+		for (i = 0; i < ef4_tx_cb_page_count(tx_queue); i++)
+			ef4_nic_free_buffer(tx_queue->efx,
+					    &tx_queue->cb_page[i]);
+		kfree(tx_queue->cb_page);
+		tx_queue->cb_page = NULL;
+	}
+
+	kfree(tx_queue->buffer);
+	tx_queue->buffer = NULL;
+}
diff --git a/drivers/net/ethernet/sfc/falcon/tx.h b/drivers/net/ethernet/sfc/falcon/tx.h
new file mode 100644
index 000000000000..a607eb0087a8
--- /dev/null
+++ b/drivers/net/ethernet/sfc/falcon/tx.h
@@ -0,0 +1,27 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2015 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EF4_TX_H
+#define EF4_TX_H
+
+#include <linux/types.h>
+
+/* Driver internal tx-path related declarations. */
+
+unsigned int ef4_tx_limit_len(struct ef4_tx_queue *tx_queue,
+			      dma_addr_t dma_addr, unsigned int len);
+
+u8 *ef4_tx_get_copy_buffer_limited(struct ef4_tx_queue *tx_queue,
+				   struct ef4_tx_buffer *buffer, size_t len);
+
+int ef4_enqueue_skb_tso(struct ef4_tx_queue *tx_queue, struct sk_buff *skb,
+			bool *data_mapped);
+
+#endif /* EF4_TX_H */
diff --git a/drivers/net/ethernet/sfc/txc43128_phy.c b/drivers/net/ethernet/sfc/falcon/txc43128_phy.c
index 194f67d9f3bf..18421f5e880f 100644
--- a/drivers/net/ethernet/sfc/txc43128_phy.c
+++ b/drivers/net/ethernet/sfc/falcon/txc43128_phy.c
@@ -158,8 +158,8 @@
 
 struct txc43128_data {
 	unsigned long bug10934_timer;
-	enum efx_phy_mode phy_mode;
-	enum efx_loopback_mode loopback_mode;
+	enum ef4_phy_mode phy_mode;
+	enum ef4_loopback_mode loopback_mode;
 };
 
 /* The PHY sometimes needs a reset to bring the link back up.  So long as
@@ -168,32 +168,32 @@ struct txc43128_data {
 #define BUG10934_RESET_INTERVAL (5 * HZ)
 
 /* Perform a reset that doesn't clear configuration changes */
-static void txc_reset_logic(struct efx_nic *efx);
+static void txc_reset_logic(struct ef4_nic *efx);
 
 /* Set the output value of a gpio */
-void falcon_txc_set_gpio_val(struct efx_nic *efx, int pin, int on)
+void falcon_txc_set_gpio_val(struct ef4_nic *efx, int pin, int on)
 {
-	efx_mdio_set_flag(efx, MDIO_MMD_PHYXS, TXC_GPIO_OUTPUT, 1 << pin, on);
+	ef4_mdio_set_flag(efx, MDIO_MMD_PHYXS, TXC_GPIO_OUTPUT, 1 << pin, on);
 }
 
 /* Set up the GPIO direction register */
-void falcon_txc_set_gpio_dir(struct efx_nic *efx, int pin, int dir)
+void falcon_txc_set_gpio_dir(struct ef4_nic *efx, int pin, int dir)
 {
-	efx_mdio_set_flag(efx, MDIO_MMD_PHYXS, TXC_GPIO_DIR, 1 << pin, dir);
+	ef4_mdio_set_flag(efx, MDIO_MMD_PHYXS, TXC_GPIO_DIR, 1 << pin, dir);
 }
 
 /* Reset the PMA/PMD MMD. The documentation is explicit that this does a
  * global reset (it's less clear what reset of other MMDs does).*/
-static int txc_reset_phy(struct efx_nic *efx)
+static int txc_reset_phy(struct ef4_nic *efx)
 {
-	int rc = efx_mdio_reset_mmd(efx, MDIO_MMD_PMAPMD,
+	int rc = ef4_mdio_reset_mmd(efx, MDIO_MMD_PMAPMD,
 				    TXC_MAX_RESET_TIME / TXC_RESET_WAIT,
 				    TXC_RESET_WAIT);
 	if (rc < 0)
 		goto fail;
 
 	/* Check that all the MMDs we expect are present and responding. */
-	rc = efx_mdio_check_mmds(efx, TXC_REQUIRED_DEVS);
+	rc = ef4_mdio_check_mmds(efx, TXC_REQUIRED_DEVS);
 	if (rc < 0)
 		goto fail;
 
@@ -205,28 +205,28 @@ fail:
 }
 
 /* Run a single BIST on one MMD */
-static int txc_bist_one(struct efx_nic *efx, int mmd, int test)
+static int txc_bist_one(struct ef4_nic *efx, int mmd, int test)
 {
 	int ctrl, bctl;
 	int lane;
 	int rc = 0;
 
 	/* Set PMA to test into loopback using Mt Diablo reg as per app note */
-	ctrl = efx_mdio_read(efx, MDIO_MMD_PCS, TXC_MTDIABLO_CTRL);
+	ctrl = ef4_mdio_read(efx, MDIO_MMD_PCS, TXC_MTDIABLO_CTRL);
 	ctrl |= (1 << TXC_MTDIABLO_CTRL_PMA_LOOP_LBN);
-	efx_mdio_write(efx, MDIO_MMD_PCS, TXC_MTDIABLO_CTRL, ctrl);
+	ef4_mdio_write(efx, MDIO_MMD_PCS, TXC_MTDIABLO_CTRL, ctrl);
 
 	/* The BIST app. note lists these  as 3 distinct steps. */
 	/* Set the BIST type */
 	bctl = (test << TXC_BIST_CTRL_TYPE_LBN);
-	efx_mdio_write(efx, mmd, TXC_BIST_CTL, bctl);
+	ef4_mdio_write(efx, mmd, TXC_BIST_CTL, bctl);
 
 	/* Set the BSTEN bit in the BIST Control register to enable */
 	bctl |= (1 << TXC_BIST_CTRL_ENAB_LBN);
-	efx_mdio_write(efx, mmd, TXC_BIST_CTL, bctl);
+	ef4_mdio_write(efx, mmd, TXC_BIST_CTL, bctl);
 
 	/* Set the BSTRT bit in the BIST Control register */
-	efx_mdio_write(efx, mmd, TXC_BIST_CTL,
+	ef4_mdio_write(efx, mmd, TXC_BIST_CTL,
 		       bctl | (1 << TXC_BIST_CTRL_STRT_LBN));
 
 	/* Wait. */
@@ -234,22 +234,22 @@ static int txc_bist_one(struct efx_nic *efx, int mmd, int test)
 
 	/* Set the BSTOP bit in the BIST Control register */
 	bctl |= (1 << TXC_BIST_CTRL_STOP_LBN);
-	efx_mdio_write(efx, mmd, TXC_BIST_CTL, bctl);
+	ef4_mdio_write(efx, mmd, TXC_BIST_CTL, bctl);
 
 	/* The STOP bit should go off when things have stopped */
 	while (bctl & (1 << TXC_BIST_CTRL_STOP_LBN))
-		bctl = efx_mdio_read(efx, mmd, TXC_BIST_CTL);
+		bctl = ef4_mdio_read(efx, mmd, TXC_BIST_CTL);
 
 	/* Check all the error counts are 0 and all the frame counts are
 	   non-zero */
 	for (lane = 0; lane < 4; lane++) {
-		int count = efx_mdio_read(efx, mmd, TXC_BIST_RX0ERRCNT + lane);
+		int count = ef4_mdio_read(efx, mmd, TXC_BIST_RX0ERRCNT + lane);
 		if (count != 0) {
 			netif_err(efx, hw, efx->net_dev, TXCNAME": BIST error. "
 				  "Lane %d had %d errs\n", lane, count);
 			rc = -EIO;
 		}
-		count = efx_mdio_read(efx, mmd, TXC_BIST_RX0FRMCNT + lane);
+		count = ef4_mdio_read(efx, mmd, TXC_BIST_RX0FRMCNT + lane);
 		if (count == 0) {
 			netif_err(efx, hw, efx->net_dev, TXCNAME": BIST error. "
 				  "Lane %d got 0 frames\n", lane);
@@ -261,23 +261,23 @@ static int txc_bist_one(struct efx_nic *efx, int mmd, int test)
 		netif_info(efx, hw, efx->net_dev, TXCNAME": BIST pass\n");
 
 	/* Disable BIST */
-	efx_mdio_write(efx, mmd, TXC_BIST_CTL, 0);
+	ef4_mdio_write(efx, mmd, TXC_BIST_CTL, 0);
 
 	/* Turn off loopback */
 	ctrl &= ~(1 << TXC_MTDIABLO_CTRL_PMA_LOOP_LBN);
-	efx_mdio_write(efx, MDIO_MMD_PCS, TXC_MTDIABLO_CTRL, ctrl);
+	ef4_mdio_write(efx, MDIO_MMD_PCS, TXC_MTDIABLO_CTRL, ctrl);
 
 	return rc;
 }
 
-static int txc_bist(struct efx_nic *efx)
+static int txc_bist(struct ef4_nic *efx)
 {
 	return txc_bist_one(efx, MDIO_MMD_PCS, TXC_BIST_CTRL_TYPE_TSD);
 }
 
 /* Push the non-configurable defaults into the PHY. This must be
  * done after every full reset */
-static void txc_apply_defaults(struct efx_nic *efx)
+static void txc_apply_defaults(struct ef4_nic *efx)
 {
 	int mctrl;
 
@@ -287,33 +287,33 @@ static void txc_apply_defaults(struct efx_nic *efx)
 	 * saves a picowatt or two */
 
 	/* Turn off preemphasis */
-	efx_mdio_write(efx, MDIO_MMD_PHYXS, TXC_ALRGS_ATXPRE0, TXC_ATXPRE_NONE);
-	efx_mdio_write(efx, MDIO_MMD_PHYXS, TXC_ALRGS_ATXPRE1, TXC_ATXPRE_NONE);
+	ef4_mdio_write(efx, MDIO_MMD_PHYXS, TXC_ALRGS_ATXPRE0, TXC_ATXPRE_NONE);
+	ef4_mdio_write(efx, MDIO_MMD_PHYXS, TXC_ALRGS_ATXPRE1, TXC_ATXPRE_NONE);
 
 	/* Turn down the amplitude */
-	efx_mdio_write(efx, MDIO_MMD_PHYXS,
+	ef4_mdio_write(efx, MDIO_MMD_PHYXS,
 		       TXC_ALRGS_ATXAMP0, TXC_ATXAMP_0820_BOTH);
-	efx_mdio_write(efx, MDIO_MMD_PHYXS,
+	ef4_mdio_write(efx, MDIO_MMD_PHYXS,
 		       TXC_ALRGS_ATXAMP1, TXC_ATXAMP_0820_BOTH);
 
 	/* Set the line side amplitude and preemphasis to the databook
 	 * defaults as an erratum causes them to be 0 on at least some
 	 * PHY rev.s */
-	efx_mdio_write(efx, MDIO_MMD_PMAPMD,
+	ef4_mdio_write(efx, MDIO_MMD_PMAPMD,
 		       TXC_ALRGS_ATXPRE0, TXC_ATXPRE_DEFAULT);
-	efx_mdio_write(efx, MDIO_MMD_PMAPMD,
+	ef4_mdio_write(efx, MDIO_MMD_PMAPMD,
 		       TXC_ALRGS_ATXPRE1, TXC_ATXPRE_DEFAULT);
-	efx_mdio_write(efx, MDIO_MMD_PMAPMD,
+	ef4_mdio_write(efx, MDIO_MMD_PMAPMD,
 		       TXC_ALRGS_ATXAMP0, TXC_ATXAMP_DEFAULT);
-	efx_mdio_write(efx, MDIO_MMD_PMAPMD,
+	ef4_mdio_write(efx, MDIO_MMD_PMAPMD,
 		       TXC_ALRGS_ATXAMP1, TXC_ATXAMP_DEFAULT);
 
 	/* Set up the LEDs  */
-	mctrl = efx_mdio_read(efx, MDIO_MMD_PHYXS, TXC_MRGS_CTL);
+	mctrl = ef4_mdio_read(efx, MDIO_MMD_PHYXS, TXC_MRGS_CTL);
 
 	/* Set the Green and Red LEDs to their default modes */
 	mctrl &= ~((1 << TXC_MCTL_TXLED_LBN) | (1 << TXC_MCTL_RXLED_LBN));
-	efx_mdio_write(efx, MDIO_MMD_PHYXS, TXC_MRGS_CTL, mctrl);
+	ef4_mdio_write(efx, MDIO_MMD_PHYXS, TXC_MRGS_CTL, mctrl);
 
 	/* Databook recommends doing this after configuration changes */
 	txc_reset_logic(efx);
@@ -321,7 +321,7 @@ static void txc_apply_defaults(struct efx_nic *efx)
 	falcon_board(efx)->type->init_phy(efx);
 }
 
-static int txc43128_phy_probe(struct efx_nic *efx)
+static int txc43128_phy_probe(struct ef4_nic *efx)
 {
 	struct txc43128_data *phy_data;
 
@@ -341,7 +341,7 @@ static int txc43128_phy_probe(struct efx_nic *efx)
 }
 
 /* Initialisation entry point for this PHY driver */
-static int txc43128_phy_init(struct efx_nic *efx)
+static int txc43128_phy_init(struct ef4_nic *efx)
 {
 	int rc;
 
@@ -359,28 +359,28 @@ static int txc43128_phy_init(struct efx_nic *efx)
 }
 
 /* Set the lane power down state in the global registers */
-static void txc_glrgs_lane_power(struct efx_nic *efx, int mmd)
+static void txc_glrgs_lane_power(struct ef4_nic *efx, int mmd)
 {
 	int pd = (1 << TXC_GLCMD_L01PD_LBN) | (1 << TXC_GLCMD_L23PD_LBN);
-	int ctl = efx_mdio_read(efx, mmd, TXC_GLRGS_GLCMD);
+	int ctl = ef4_mdio_read(efx, mmd, TXC_GLRGS_GLCMD);
 
 	if (!(efx->phy_mode & PHY_MODE_LOW_POWER))
 		ctl &= ~pd;
 	else
 		ctl |= pd;
 
-	efx_mdio_write(efx, mmd, TXC_GLRGS_GLCMD, ctl);
+	ef4_mdio_write(efx, mmd, TXC_GLRGS_GLCMD, ctl);
 }
 
 /* Set the lane power down state in the analog control registers */
-static void txc_analog_lane_power(struct efx_nic *efx, int mmd)
+static void txc_analog_lane_power(struct ef4_nic *efx, int mmd)
 {
 	int txpd = (1 << TXC_ATXCTL_TXPD3_LBN) | (1 << TXC_ATXCTL_TXPD2_LBN)
 		| (1 << TXC_ATXCTL_TXPD1_LBN) | (1 << TXC_ATXCTL_TXPD0_LBN);
 	int rxpd = (1 << TXC_ARXCTL_RXPD3_LBN) | (1 << TXC_ARXCTL_RXPD2_LBN)
 		| (1 << TXC_ARXCTL_RXPD1_LBN) | (1 << TXC_ARXCTL_RXPD0_LBN);
-	int txctl = efx_mdio_read(efx, mmd, TXC_ALRGS_ATXCTL);
-	int rxctl = efx_mdio_read(efx, mmd, TXC_ALRGS_ARXCTL);
+	int txctl = ef4_mdio_read(efx, mmd, TXC_ALRGS_ATXCTL);
+	int rxctl = ef4_mdio_read(efx, mmd, TXC_ALRGS_ARXCTL);
 
 	if (!(efx->phy_mode & PHY_MODE_LOW_POWER)) {
 		txctl &= ~txpd;
@@ -390,14 +390,14 @@ static void txc_analog_lane_power(struct efx_nic *efx, int mmd)
 		rxctl |= rxpd;
 	}
 
-	efx_mdio_write(efx, mmd, TXC_ALRGS_ATXCTL, txctl);
-	efx_mdio_write(efx, mmd, TXC_ALRGS_ARXCTL, rxctl);
+	ef4_mdio_write(efx, mmd, TXC_ALRGS_ATXCTL, txctl);
+	ef4_mdio_write(efx, mmd, TXC_ALRGS_ARXCTL, rxctl);
 }
 
-static void txc_set_power(struct efx_nic *efx)
+static void txc_set_power(struct ef4_nic *efx)
 {
 	/* According to the data book, all the MMDs can do low power */
-	efx_mdio_set_mmds_lpower(efx,
+	ef4_mdio_set_mmds_lpower(efx,
 				 !!(efx->phy_mode & PHY_MODE_LOW_POWER),
 				 TXC_REQUIRED_DEVS);
 
@@ -411,15 +411,15 @@ static void txc_set_power(struct efx_nic *efx)
 	txc_analog_lane_power(efx, MDIO_MMD_PHYXS);
 }
 
-static void txc_reset_logic_mmd(struct efx_nic *efx, int mmd)
+static void txc_reset_logic_mmd(struct ef4_nic *efx, int mmd)
 {
-	int val = efx_mdio_read(efx, mmd, TXC_GLRGS_GLCMD);
+	int val = ef4_mdio_read(efx, mmd, TXC_GLRGS_GLCMD);
 	int tries = 50;
 
 	val |= (1 << TXC_GLCMD_LMTSWRST_LBN);
-	efx_mdio_write(efx, mmd, TXC_GLRGS_GLCMD, val);
+	ef4_mdio_write(efx, mmd, TXC_GLRGS_GLCMD, val);
 	while (--tries) {
-		val = efx_mdio_read(efx, mmd, TXC_GLRGS_GLCMD);
+		val = ef4_mdio_read(efx, mmd, TXC_GLRGS_GLCMD);
 		if (!(val & (1 << TXC_GLCMD_LMTSWRST_LBN)))
 			break;
 		udelay(1);
@@ -431,7 +431,7 @@ static void txc_reset_logic_mmd(struct efx_nic *efx, int mmd)
 
 /* Perform a logic reset. This preserves the configuration registers
  * and is needed for some configuration changes to take effect */
-static void txc_reset_logic(struct efx_nic *efx)
+static void txc_reset_logic(struct ef4_nic *efx)
 {
 	/* The data sheet claims we can do the logic reset on either the
 	 * PCS or the PHYXS and the result is a reset of both host- and
@@ -439,15 +439,15 @@ static void txc_reset_logic(struct efx_nic *efx)
 	txc_reset_logic_mmd(efx, MDIO_MMD_PCS);
 }
 
-static bool txc43128_phy_read_link(struct efx_nic *efx)
+static bool txc43128_phy_read_link(struct ef4_nic *efx)
 {
-	return efx_mdio_links_ok(efx, TXC_REQUIRED_DEVS);
+	return ef4_mdio_links_ok(efx, TXC_REQUIRED_DEVS);
 }
 
-static int txc43128_phy_reconfigure(struct efx_nic *efx)
+static int txc43128_phy_reconfigure(struct ef4_nic *efx)
 {
 	struct txc43128_data *phy_data = efx->phy_data;
-	enum efx_phy_mode mode_change = efx->phy_mode ^ phy_data->phy_mode;
+	enum ef4_phy_mode mode_change = efx->phy_mode ^ phy_data->phy_mode;
 	bool loop_change = LOOPBACK_CHANGED(phy_data, efx, TXC_LOOPBACKS);
 
 	if (efx->phy_mode & mode_change & PHY_MODE_TX_DISABLED) {
@@ -457,8 +457,8 @@ static int txc43128_phy_reconfigure(struct efx_nic *efx)
 		mode_change &= ~PHY_MODE_TX_DISABLED;
 	}
 
-	efx_mdio_transmit_disable(efx);
-	efx_mdio_phy_reconfigure(efx);
+	ef4_mdio_transmit_disable(efx);
+	ef4_mdio_phy_reconfigure(efx);
 	if (mode_change & PHY_MODE_LOW_POWER)
 		txc_set_power(efx);
 
@@ -475,13 +475,13 @@ static int txc43128_phy_reconfigure(struct efx_nic *efx)
 	return 0;
 }
 
-static void txc43128_phy_fini(struct efx_nic *efx)
+static void txc43128_phy_fini(struct ef4_nic *efx)
 {
 	/* Disable link events */
-	efx_mdio_write(efx, MDIO_MMD_PMAPMD, MDIO_PMA_LASI_CTRL, 0);
+	ef4_mdio_write(efx, MDIO_MMD_PMAPMD, MDIO_PMA_LASI_CTRL, 0);
 }
 
-static void txc43128_phy_remove(struct efx_nic *efx)
+static void txc43128_phy_remove(struct ef4_nic *efx)
 {
 	kfree(efx->phy_data);
 	efx->phy_data = NULL;
@@ -489,7 +489,7 @@ static void txc43128_phy_remove(struct efx_nic *efx)
 
 /* Periodic callback: this exists mainly to poll link status as we
  * don't use LASI interrupts */
-static bool txc43128_phy_poll(struct efx_nic *efx)
+static bool txc43128_phy_poll(struct ef4_nic *efx)
 {
 	struct txc43128_data *data = efx->phy_data;
 	bool was_up = efx->link_state.up;
@@ -516,14 +516,14 @@ static const char *const txc43128_test_names[] = {
 	"bist"
 };
 
-static const char *txc43128_test_name(struct efx_nic *efx, unsigned int index)
+static const char *txc43128_test_name(struct ef4_nic *efx, unsigned int index)
 {
 	if (index < ARRAY_SIZE(txc43128_test_names))
 		return txc43128_test_names[index];
 	return NULL;
 }
 
-static int txc43128_run_tests(struct efx_nic *efx, int *results, unsigned flags)
+static int txc43128_run_tests(struct ef4_nic *efx, int *results, unsigned flags)
 {
 	int rc;
 
@@ -540,12 +540,12 @@ static int txc43128_run_tests(struct efx_nic *efx, int *results, unsigned flags)
 	return rc;
 }
 
-static void txc43128_get_settings(struct efx_nic *efx, struct ethtool_cmd *ecmd)
+static void txc43128_get_settings(struct ef4_nic *efx, struct ethtool_cmd *ecmd)
 {
 	mdio45_ethtool_gset(&efx->mdio, ecmd);
 }
 
-const struct efx_phy_operations falcon_txc_phy_ops = {
+const struct ef4_phy_operations falcon_txc_phy_ops = {
 	.probe		= txc43128_phy_probe,
 	.init		= txc43128_phy_init,
 	.reconfigure	= txc43128_phy_reconfigure,
@@ -553,8 +553,8 @@ const struct efx_phy_operations falcon_txc_phy_ops = {
 	.fini		= txc43128_phy_fini,
 	.remove		= txc43128_phy_remove,
 	.get_settings	= txc43128_get_settings,
-	.set_settings	= efx_mdio_set_settings,
-	.test_alive	= efx_mdio_test_alive,
+	.set_settings	= ef4_mdio_set_settings,
+	.test_alive	= ef4_mdio_test_alive,
 	.run_tests	= txc43128_run_tests,
 	.test_name	= txc43128_test_name,
 };
diff --git a/drivers/net/ethernet/sfc/falcon/workarounds.h b/drivers/net/ethernet/sfc/falcon/workarounds.h
new file mode 100644
index 000000000000..6af800bc9633
--- /dev/null
+++ b/drivers/net/ethernet/sfc/falcon/workarounds.h
@@ -0,0 +1,44 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2006-2013 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EF4_WORKAROUNDS_H
+#define EF4_WORKAROUNDS_H
+
+/*
+ * Hardware workarounds.
+ * Bug numbers are from Solarflare's Bugzilla.
+ */
+
+#define EF4_WORKAROUND_FALCON_A(efx) (ef4_nic_rev(efx) <= EF4_REV_FALCON_A1)
+#define EF4_WORKAROUND_FALCON_AB(efx) (ef4_nic_rev(efx) <= EF4_REV_FALCON_B0)
+#define EF4_WORKAROUND_10G(efx) 1
+
+/* Bit-bashed I2C reads cause performance drop */
+#define EF4_WORKAROUND_7884 EF4_WORKAROUND_10G
+/* Truncated IPv4 packets can confuse the TX packet parser */
+#define EF4_WORKAROUND_15592 EF4_WORKAROUND_FALCON_AB
+
+/* Spurious parity errors in TSORT buffers */
+#define EF4_WORKAROUND_5129 EF4_WORKAROUND_FALCON_A
+/* Unaligned read request >512 bytes after aligning may break TSORT */
+#define EF4_WORKAROUND_5391 EF4_WORKAROUND_FALCON_A
+/* iSCSI parsing errors */
+#define EF4_WORKAROUND_5583 EF4_WORKAROUND_FALCON_A
+/* RX events go missing */
+#define EF4_WORKAROUND_5676 EF4_WORKAROUND_FALCON_A
+/* RX_RESET on A1 */
+#define EF4_WORKAROUND_6555 EF4_WORKAROUND_FALCON_A
+/* Increase filter depth to avoid RX_RESET */
+#define EF4_WORKAROUND_7244 EF4_WORKAROUND_FALCON_A
+/* Flushes may never complete */
+#define EF4_WORKAROUND_7803 EF4_WORKAROUND_FALCON_AB
+/* Leak overlength packets rather than free */
+#define EF4_WORKAROUND_8071 EF4_WORKAROUND_FALCON_A
+
+#endif /* EF4_WORKAROUNDS_H */
diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c
index 4762ec444cb8..e4ca2161af70 100644
--- a/drivers/net/ethernet/sfc/farch.c
+++ b/drivers/net/ethernet/sfc/farch.c
@@ -25,7 +25,7 @@
 #include "io.h"
 #include "workarounds.h"
 
-/* Falcon-architecture (SFC4000 and SFC9000-family) support */
+/* Falcon-architecture (SFC9000-family) support */
 
 /**************************************************************************
  *
@@ -177,7 +177,7 @@ efx_init_special_buffer(struct efx_nic *efx, struct efx_special_buffer *buffer)
 	dma_addr_t dma_addr;
 	int i;
 
-	EFX_BUG_ON_PARANOID(!buffer->buf.addr);
+	EFX_WARN_ON_PARANOID(!buffer->buf.addr);
 
 	/* Write buffer descriptors to NIC */
 	for (i = 0; i < buffer->entries; i++) {
@@ -332,7 +332,7 @@ void efx_farch_tx_write(struct efx_tx_queue *tx_queue)
 		txd = efx_tx_desc(tx_queue, write_ptr);
 		++tx_queue->write_count;
 
-		EFX_BUG_ON_PARANOID(buffer->flags & EFX_TX_BUF_OPTION);
+		EFX_WARN_ON_ONCE_PARANOID(buffer->flags & EFX_TX_BUF_OPTION);
 
 		/* Create TX descriptor ring entry */
 		BUILD_BUG_ON(EFX_TX_BUF_CONT != 1);
@@ -356,6 +356,18 @@ void efx_farch_tx_write(struct efx_tx_queue *tx_queue)
 	}
 }
 
+unsigned int efx_farch_tx_limit_len(struct efx_tx_queue *tx_queue,
+				    dma_addr_t dma_addr, unsigned int len)
+{
+	/* Don't cross 4K boundaries with descriptors. */
+	unsigned int limit = (~dma_addr & (EFX_PAGE_SIZE - 1)) + 1;
+
+	len = min(limit, len);
+
+	return len;
+}
+
+
 /* Allocate hardware resources for a TX queue */
 int efx_farch_tx_probe(struct efx_tx_queue *tx_queue)
 {
@@ -369,6 +381,7 @@ int efx_farch_tx_probe(struct efx_tx_queue *tx_queue)
 
 void efx_farch_tx_init(struct efx_tx_queue *tx_queue)
 {
+	int csum = tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD;
 	struct efx_nic *efx = tx_queue->efx;
 	efx_oword_t reg;
 
@@ -390,37 +403,18 @@ void efx_farch_tx_init(struct efx_tx_queue *tx_queue)
 			      FRF_AZ_TX_DESCQ_TYPE, 0,
 			      FRF_BZ_TX_NON_IP_DROP_DIS, 1);
 
-	if (efx_nic_rev(efx) >= EFX_REV_FALCON_B0) {
-		int csum = tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD;
-		EFX_SET_OWORD_FIELD(reg, FRF_BZ_TX_IP_CHKSM_DIS, !csum);
-		EFX_SET_OWORD_FIELD(reg, FRF_BZ_TX_TCP_CHKSM_DIS,
-				    !csum);
-	}
+	EFX_SET_OWORD_FIELD(reg, FRF_BZ_TX_IP_CHKSM_DIS, !csum);
+	EFX_SET_OWORD_FIELD(reg, FRF_BZ_TX_TCP_CHKSM_DIS, !csum);
 
 	efx_writeo_table(efx, &reg, efx->type->txd_ptr_tbl_base,
 			 tx_queue->queue);
 
-	if (efx_nic_rev(efx) < EFX_REV_FALCON_B0) {
-		/* Only 128 bits in this register */
-		BUILD_BUG_ON(EFX_MAX_TX_QUEUES > 128);
-
-		efx_reado(efx, &reg, FR_AA_TX_CHKSM_CFG);
-		if (tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD)
-			__clear_bit_le(tx_queue->queue, &reg);
-		else
-			__set_bit_le(tx_queue->queue, &reg);
-		efx_writeo(efx, &reg, FR_AA_TX_CHKSM_CFG);
-	}
-
-	if (efx_nic_rev(efx) >= EFX_REV_FALCON_B0) {
-		EFX_POPULATE_OWORD_1(reg,
-				     FRF_BZ_TX_PACE,
-				     (tx_queue->queue & EFX_TXQ_TYPE_HIGHPRI) ?
-				     FFE_BZ_TX_PACE_OFF :
-				     FFE_BZ_TX_PACE_RESERVED);
-		efx_writeo_table(efx, &reg, FR_BZ_TX_PACE_TBL,
-				 tx_queue->queue);
-	}
+	EFX_POPULATE_OWORD_1(reg,
+			     FRF_BZ_TX_PACE,
+			     (tx_queue->queue & EFX_TXQ_TYPE_HIGHPRI) ?
+			     FFE_BZ_TX_PACE_OFF :
+			     FFE_BZ_TX_PACE_RESERVED);
+	efx_writeo_table(efx, &reg, FR_BZ_TX_PACE_TBL, tx_queue->queue);
 }
 
 static void efx_farch_flush_tx_queue(struct efx_tx_queue *tx_queue)
@@ -517,16 +511,10 @@ void efx_farch_rx_init(struct efx_rx_queue *rx_queue)
 {
 	efx_oword_t rx_desc_ptr;
 	struct efx_nic *efx = rx_queue->efx;
-	bool is_b0 = efx_nic_rev(efx) >= EFX_REV_FALCON_B0;
-	bool iscsi_digest_en = is_b0;
 	bool jumbo_en;
 
-	/* For kernel-mode queues in Falcon A1, the JUMBO flag enables
-	 * DMA to continue after a PCIe page boundary (and scattering
-	 * is not possible).  In Falcon B0 and Siena, it enables
-	 * scatter.
-	 */
-	jumbo_en = !is_b0 || efx->rx_scatter;
+	/* For kernel-mode queues in Siena, the JUMBO flag enables scatter. */
+	jumbo_en = efx->rx_scatter;
 
 	netif_dbg(efx, hw, efx->net_dev,
 		  "RX queue %d ring in special buffers %d-%d\n",
@@ -540,8 +528,8 @@ void efx_farch_rx_init(struct efx_rx_queue *rx_queue)
 
 	/* Push RX descriptor ring to card */
 	EFX_POPULATE_OWORD_10(rx_desc_ptr,
-			      FRF_AZ_RX_ISCSI_DDIG_EN, iscsi_digest_en,
-			      FRF_AZ_RX_ISCSI_HDIG_EN, iscsi_digest_en,
+			      FRF_AZ_RX_ISCSI_DDIG_EN, true,
+			      FRF_AZ_RX_ISCSI_HDIG_EN, true,
 			      FRF_AZ_RX_DESCQ_BUF_BASE_ID, rx_queue->rxd.index,
 			      FRF_AZ_RX_DESCQ_EVQ_ID,
 			      efx_rx_queue_channel(rx_queue)->channel,
@@ -880,7 +868,7 @@ static u16 efx_farch_handle_rx_not_ok(struct efx_rx_queue *rx_queue,
 	struct efx_nic *efx = rx_queue->efx;
 	bool rx_ev_buf_owner_id_err, rx_ev_ip_hdr_chksum_err;
 	bool rx_ev_tcp_udp_chksum_err, rx_ev_eth_crc_err;
-	bool rx_ev_frm_trunc, rx_ev_drib_nib, rx_ev_tobe_disc;
+	bool rx_ev_frm_trunc, rx_ev_tobe_disc;
 	bool rx_ev_other_err, rx_ev_pause_frm;
 	bool rx_ev_hdr_type, rx_ev_mcast_pkt;
 	unsigned rx_ev_pkt_type;
@@ -897,12 +885,10 @@ static u16 efx_farch_handle_rx_not_ok(struct efx_rx_queue *rx_queue,
 						   FSF_AZ_RX_EV_TCP_UDP_CHKSUM_ERR);
 	rx_ev_eth_crc_err = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_ETH_CRC_ERR);
 	rx_ev_frm_trunc = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_FRM_TRUNC);
-	rx_ev_drib_nib = ((efx_nic_rev(efx) >= EFX_REV_FALCON_B0) ?
-			  0 : EFX_QWORD_FIELD(*event, FSF_AA_RX_EV_DRIB_NIB));
 	rx_ev_pause_frm = EFX_QWORD_FIELD(*event, FSF_AZ_RX_EV_PAUSE_FRM_ERR);
 
 	/* Every error apart from tobe_disc and pause_frm */
-	rx_ev_other_err = (rx_ev_drib_nib | rx_ev_tcp_udp_chksum_err |
+	rx_ev_other_err = (rx_ev_tcp_udp_chksum_err |
 			   rx_ev_buf_owner_id_err | rx_ev_eth_crc_err |
 			   rx_ev_frm_trunc | rx_ev_ip_hdr_chksum_err);
 
@@ -927,7 +913,7 @@ static u16 efx_farch_handle_rx_not_ok(struct efx_rx_queue *rx_queue,
 	if (rx_ev_other_err && net_ratelimit()) {
 		netif_dbg(efx, rx_err, efx->net_dev,
 			  " RX queue %d unexpected RX event "
-			  EFX_QWORD_FMT "%s%s%s%s%s%s%s%s\n",
+			  EFX_QWORD_FMT "%s%s%s%s%s%s%s\n",
 			  efx_rx_queue_index(rx_queue), EFX_QWORD_VAL(*event),
 			  rx_ev_buf_owner_id_err ? " [OWNER_ID_ERR]" : "",
 			  rx_ev_ip_hdr_chksum_err ?
@@ -936,14 +922,13 @@ static u16 efx_farch_handle_rx_not_ok(struct efx_rx_queue *rx_queue,
 			  " [TCP_UDP_CHKSUM_ERR]" : "",
 			  rx_ev_eth_crc_err ? " [ETH_CRC_ERR]" : "",
 			  rx_ev_frm_trunc ? " [FRM_TRUNC]" : "",
-			  rx_ev_drib_nib ? " [DRIB_NIB]" : "",
 			  rx_ev_tobe_disc ? " [TOBE_DISC]" : "",
 			  rx_ev_pause_frm ? " [PAUSE]" : "");
 	}
 #endif
 
 	/* The frame must be discarded if any of these are true. */
-	return (rx_ev_eth_crc_err | rx_ev_frm_trunc | rx_ev_drib_nib |
+	return (rx_ev_eth_crc_err | rx_ev_frm_trunc |
 		rx_ev_tobe_disc | rx_ev_pause_frm) ?
 		EFX_RX_PKT_DISCARD : 0;
 }
@@ -972,8 +957,7 @@ efx_farch_handle_rx_bad_index(struct efx_rx_queue *rx_queue, unsigned index)
 		   "dropped %d events (index=%d expected=%d)\n",
 		   dropped, index, expected);
 
-	efx_schedule_reset(efx, EFX_WORKAROUND_5676(efx) ?
-			   RESET_TYPE_RX_RECOVERY : RESET_TYPE_DISABLE);
+	efx_schedule_reset(efx, RESET_TYPE_DISABLE);
 	return false;
 }
 
@@ -1239,10 +1223,7 @@ efx_farch_handle_driver_event(struct efx_channel *channel, efx_qword_t *event)
 			  "channel %d seen DRIVER RX_RESET event. "
 			"Resetting.\n", channel->channel);
 		atomic_inc(&efx->rx_reset);
-		efx_schedule_reset(efx,
-				   EFX_WORKAROUND_6555(efx) ?
-				   RESET_TYPE_RX_RECOVERY :
-				   RESET_TYPE_DISABLE);
+		efx_schedule_reset(efx, RESET_TYPE_DISABLE);
 		break;
 	case FSE_BZ_RX_DSC_ERROR_EV:
 		if (ev_sub_data < EFX_VI_BASE) {
@@ -1379,13 +1360,11 @@ int efx_farch_ev_init(struct efx_channel *channel)
 		  channel->channel, channel->eventq.index,
 		  channel->eventq.index + channel->eventq.entries - 1);
 
-	if (efx_nic_rev(efx) >= EFX_REV_SIENA_A0) {
-		EFX_POPULATE_OWORD_3(reg,
-				     FRF_CZ_TIMER_Q_EN, 1,
-				     FRF_CZ_HOST_NOTIFY_MODE, 0,
-				     FRF_CZ_TIMER_MODE, FFE_CZ_TIMER_MODE_DIS);
-		efx_writeo_table(efx, &reg, FR_BZ_TIMER_TBL, channel->channel);
-	}
+	EFX_POPULATE_OWORD_3(reg,
+			     FRF_CZ_TIMER_Q_EN, 1,
+			     FRF_CZ_HOST_NOTIFY_MODE, 0,
+			     FRF_CZ_TIMER_MODE, FFE_CZ_TIMER_MODE_DIS);
+	efx_writeo_table(efx, &reg, FR_BZ_TIMER_TBL, channel->channel);
 
 	/* Pin event queue buffer */
 	efx_init_special_buffer(efx, &channel->eventq);
@@ -1413,8 +1392,7 @@ void efx_farch_ev_fini(struct efx_channel *channel)
 	EFX_ZERO_OWORD(reg);
 	efx_writeo_table(efx, &reg, efx->type->evq_ptr_tbl_base,
 			 channel->channel);
-	if (efx_nic_rev(efx) >= EFX_REV_SIENA_A0)
-		efx_writeo_table(efx, &reg, FR_BZ_TIMER_TBL, channel->channel);
+	efx_writeo_table(efx, &reg, FR_BZ_TIMER_TBL, channel->channel);
 
 	/* Unpin event queue */
 	efx_fini_special_buffer(efx, &channel->eventq);
@@ -1488,7 +1466,6 @@ int efx_farch_irq_test_generate(struct efx_nic *efx)
  */
 irqreturn_t efx_farch_fatal_interrupt(struct efx_nic *efx)
 {
-	struct falcon_nic_data *nic_data = efx->nic_data;
 	efx_oword_t *int_ker = efx->irq_status.addr;
 	efx_oword_t fatal_intr;
 	int error, mem_perr;
@@ -1514,8 +1491,6 @@ irqreturn_t efx_farch_fatal_interrupt(struct efx_nic *efx)
 
 	/* Disable both devices */
 	pci_clear_master(efx->pci_dev);
-	if (efx_nic_is_dual_func(efx))
-		pci_clear_master(nic_data->pci_dev2);
 	efx_farch_irq_disable_master(efx);
 
 	/* Count errors and reset or disable the NIC accordingly */
@@ -1662,8 +1637,6 @@ void efx_farch_rx_push_indir_table(struct efx_nic *efx)
 	size_t i = 0;
 	efx_dword_t dword;
 
-	BUG_ON(efx_nic_rev(efx) < EFX_REV_FALCON_B0);
-
 	BUILD_BUG_ON(ARRAY_SIZE(efx->rx_indir_table) !=
 		     FR_BZ_RX_INDIRECTION_TBL_ROWS);
 
@@ -1791,8 +1764,7 @@ void efx_farch_init_common(struct efx_nic *efx)
 			     FRF_AZ_ILL_ADR_INT_KER_EN, 1,
 			     FRF_AZ_RBUF_OWN_INT_KER_EN, 1,
 			     FRF_AZ_TBUF_OWN_INT_KER_EN, 1);
-	if (efx_nic_rev(efx) >= EFX_REV_SIENA_A0)
-		EFX_SET_OWORD_FIELD(temp, FRF_CZ_SRAM_PERR_INT_P_KER_EN, 1);
+	EFX_SET_OWORD_FIELD(temp, FRF_CZ_SRAM_PERR_INT_P_KER_EN, 1);
 	EFX_INVERT_OWORD(temp);
 	efx_writeo(efx, &temp, FR_AZ_FATAL_INTR_KER);
 
@@ -1812,22 +1784,18 @@ void efx_farch_init_common(struct efx_nic *efx)
 	/* Disable hardware watchdog which can misfire */
 	EFX_SET_OWORD_FIELD(temp, FRF_AZ_TX_PREF_WD_TMR, 0x3fffff);
 	/* Squash TX of packets of 16 bytes or less */
-	if (efx_nic_rev(efx) >= EFX_REV_FALCON_B0)
-		EFX_SET_OWORD_FIELD(temp, FRF_BZ_TX_FLUSH_MIN_LEN_EN, 1);
+	EFX_SET_OWORD_FIELD(temp, FRF_BZ_TX_FLUSH_MIN_LEN_EN, 1);
 	efx_writeo(efx, &temp, FR_AZ_TX_RESERVED);
 
-	if (efx_nic_rev(efx) >= EFX_REV_FALCON_B0) {
-		EFX_POPULATE_OWORD_4(temp,
-				     /* Default values */
-				     FRF_BZ_TX_PACE_SB_NOT_AF, 0x15,
-				     FRF_BZ_TX_PACE_SB_AF, 0xb,
-				     FRF_BZ_TX_PACE_FB_BASE, 0,
-				     /* Allow large pace values in the
-				      * fast bin. */
-				     FRF_BZ_TX_PACE_BIN_TH,
-				     FFE_BZ_TX_PACE_RESERVED);
-		efx_writeo(efx, &temp, FR_BZ_TX_PACE);
-	}
+	EFX_POPULATE_OWORD_4(temp,
+			     /* Default values */
+			     FRF_BZ_TX_PACE_SB_NOT_AF, 0x15,
+			     FRF_BZ_TX_PACE_SB_AF, 0xb,
+			     FRF_BZ_TX_PACE_FB_BASE, 0,
+			     /* Allow large pace values in the fast bin. */
+			     FRF_BZ_TX_PACE_BIN_TH,
+			     FFE_BZ_TX_PACE_RESERVED);
+	efx_writeo(efx, &temp, FR_BZ_TX_PACE);
 }
 
 /**************************************************************************
@@ -2011,7 +1979,7 @@ static void efx_farch_filter_push_rx_config(struct efx_nic *efx)
 			!!(table->spec[EFX_FARCH_FILTER_INDEX_UC_DEF].flags &
 			   table->spec[EFX_FARCH_FILTER_INDEX_MC_DEF].flags &
 			   EFX_FILTER_FLAG_RX_SCATTER));
-	} else if (efx_nic_rev(efx) >= EFX_REV_FALCON_B0) {
+	} else {
 		/* We don't expose 'default' filters because unmatched
 		 * packets always go to the queue number found in the
 		 * RSS table.  But we still need to set the RX scatter
@@ -2073,7 +2041,7 @@ efx_farch_filter_from_gen_spec(struct efx_farch_filter_spec *spec,
 		__be32 rhost, host1, host2;
 		__be16 rport, port1, port2;
 
-		EFX_BUG_ON_PARANOID(!(gen_spec->flags & EFX_FILTER_FLAG_RX));
+		EFX_WARN_ON_PARANOID(!(gen_spec->flags & EFX_FILTER_FLAG_RX));
 
 		if (gen_spec->ether_type != htons(ETH_P_IP))
 			return -EPROTONOSUPPORT;
@@ -2819,31 +2787,27 @@ int efx_farch_filter_table_probe(struct efx_nic *efx)
 		return -ENOMEM;
 	efx->filter_state = state;
 
-	if (efx_nic_rev(efx) >= EFX_REV_FALCON_B0) {
-		table = &state->table[EFX_FARCH_FILTER_TABLE_RX_IP];
-		table->id = EFX_FARCH_FILTER_TABLE_RX_IP;
-		table->offset = FR_BZ_RX_FILTER_TBL0;
-		table->size = FR_BZ_RX_FILTER_TBL0_ROWS;
-		table->step = FR_BZ_RX_FILTER_TBL0_STEP;
-	}
+	table = &state->table[EFX_FARCH_FILTER_TABLE_RX_IP];
+	table->id = EFX_FARCH_FILTER_TABLE_RX_IP;
+	table->offset = FR_BZ_RX_FILTER_TBL0;
+	table->size = FR_BZ_RX_FILTER_TBL0_ROWS;
+	table->step = FR_BZ_RX_FILTER_TBL0_STEP;
 
-	if (efx_nic_rev(efx) >= EFX_REV_SIENA_A0) {
-		table = &state->table[EFX_FARCH_FILTER_TABLE_RX_MAC];
-		table->id = EFX_FARCH_FILTER_TABLE_RX_MAC;
-		table->offset = FR_CZ_RX_MAC_FILTER_TBL0;
-		table->size = FR_CZ_RX_MAC_FILTER_TBL0_ROWS;
-		table->step = FR_CZ_RX_MAC_FILTER_TBL0_STEP;
-
-		table = &state->table[EFX_FARCH_FILTER_TABLE_RX_DEF];
-		table->id = EFX_FARCH_FILTER_TABLE_RX_DEF;
-		table->size = EFX_FARCH_FILTER_SIZE_RX_DEF;
-
-		table = &state->table[EFX_FARCH_FILTER_TABLE_TX_MAC];
-		table->id = EFX_FARCH_FILTER_TABLE_TX_MAC;
-		table->offset = FR_CZ_TX_MAC_FILTER_TBL0;
-		table->size = FR_CZ_TX_MAC_FILTER_TBL0_ROWS;
-		table->step = FR_CZ_TX_MAC_FILTER_TBL0_STEP;
-	}
+	table = &state->table[EFX_FARCH_FILTER_TABLE_RX_MAC];
+	table->id = EFX_FARCH_FILTER_TABLE_RX_MAC;
+	table->offset = FR_CZ_RX_MAC_FILTER_TBL0;
+	table->size = FR_CZ_RX_MAC_FILTER_TBL0_ROWS;
+	table->step = FR_CZ_RX_MAC_FILTER_TBL0_STEP;
+
+	table = &state->table[EFX_FARCH_FILTER_TABLE_RX_DEF];
+	table->id = EFX_FARCH_FILTER_TABLE_RX_DEF;
+	table->size = EFX_FARCH_FILTER_SIZE_RX_DEF;
+
+	table = &state->table[EFX_FARCH_FILTER_TABLE_TX_MAC];
+	table->id = EFX_FARCH_FILTER_TABLE_TX_MAC;
+	table->offset = FR_CZ_TX_MAC_FILTER_TBL0;
+	table->size = FR_CZ_TX_MAC_FILTER_TBL0_ROWS;
+	table->step = FR_CZ_TX_MAC_FILTER_TBL0_STEP;
 
 	for (table_id = 0; table_id < EFX_FARCH_FILTER_TABLE_COUNT; table_id++) {
 		table = &state->table[table_id];
diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c
index 241520943ada..995651341b94 100644
--- a/drivers/net/ethernet/sfc/mcdi.c
+++ b/drivers/net/ethernet/sfc/mcdi.c
@@ -15,7 +15,6 @@
 #include "io.h"
 #include "farch_regs.h"
 #include "mcdi_pcol.h"
-#include "phy.h"
 
 /**************************************************************************
  *
diff --git a/drivers/net/ethernet/sfc/mcdi.h b/drivers/net/ethernet/sfc/mcdi.h
index c9aeb0701c9a..4472107ca8c1 100644
--- a/drivers/net/ethernet/sfc/mcdi.h
+++ b/drivers/net/ethernet/sfc/mcdi.h
@@ -129,14 +129,14 @@ struct efx_mcdi_data {
 
 static inline struct efx_mcdi_iface *efx_mcdi(struct efx_nic *efx)
 {
-	EFX_BUG_ON_PARANOID(!efx->mcdi);
+	EFX_WARN_ON_PARANOID(!efx->mcdi);
 	return &efx->mcdi->iface;
 }
 
 #ifdef CONFIG_SFC_MCDI_MON
 static inline struct efx_mcdi_mon *efx_mcdi_mon(struct efx_nic *efx)
 {
-	EFX_BUG_ON_PARANOID(!efx->mcdi);
+	EFX_WARN_ON_PARANOID(!efx->mcdi);
 	return &efx->mcdi->hwmon;
 }
 #endif
diff --git a/drivers/net/ethernet/sfc/mcdi_mon.c b/drivers/net/ethernet/sfc/mcdi_mon.c
index bc27d5b580f5..f97da05952c7 100644
--- a/drivers/net/ethernet/sfc/mcdi_mon.c
+++ b/drivers/net/ethernet/sfc/mcdi_mon.c
@@ -121,9 +121,9 @@ void efx_mcdi_sensor_event(struct efx_nic *efx, efx_qword_t *ev)
 	}
 	if (!name)
 		name = "No sensor name available";
-	EFX_BUG_ON_PARANOID(state >= ARRAY_SIZE(sensor_status_names));
+	EFX_WARN_ON_PARANOID(state >= ARRAY_SIZE(sensor_status_names));
 	state_txt = sensor_status_names[state];
-	EFX_BUG_ON_PARANOID(hwmon_type >= EFX_HWMON_TYPES_COUNT);
+	EFX_WARN_ON_PARANOID(hwmon_type >= EFX_HWMON_TYPES_COUNT);
 	unit = efx_hwmon_unit[hwmon_type];
 	if (!unit)
 		unit = "";
diff --git a/drivers/net/ethernet/sfc/mcdi_pcol.h b/drivers/net/ethernet/sfc/mcdi_pcol.h
index ccceafc15896..35cc3d4fa5f6 100644
--- a/drivers/net/ethernet/sfc/mcdi_pcol.h
+++ b/drivers/net/ethernet/sfc/mcdi_pcol.h
@@ -276,6 +276,9 @@
 /* The clock whose frequency you've attempted to set set
  * doesn't exist on this NIC */
 #define MC_CMD_ERR_NO_CLOCK 0x1015
+/* Returned by MC_CMD_TESTASSERT if the action that should
+ * have caused an assertion failed to do so.  */
+#define MC_CMD_ERR_UNREACHABLE 0x1016
 
 #define MC_CMD_ERR_CODE_OFST 0
 
@@ -933,6 +936,8 @@
 #define        MC_CMD_COPYCODE_IN_BOOT_MAGIC_SKIP_BOOT_ICORE_SYNC_WIDTH 1
 #define        MC_CMD_COPYCODE_IN_BOOT_MAGIC_FORCE_STANDALONE_LBN 5
 #define        MC_CMD_COPYCODE_IN_BOOT_MAGIC_FORCE_STANDALONE_WIDTH 1
+#define        MC_CMD_COPYCODE_IN_BOOT_MAGIC_DISABLE_XIP_LBN 6
+#define        MC_CMD_COPYCODE_IN_BOOT_MAGIC_DISABLE_XIP_WIDTH 1
 /* Destination address */
 #define       MC_CMD_COPYCODE_IN_DEST_ADDR_OFST 4
 #define       MC_CMD_COPYCODE_IN_NUMWORDS_OFST 8
@@ -1659,6 +1664,8 @@
 #define       MC_CMD_PTP_OUT_GET_ATTRIBUTES_CAPABILITIES_OFST 8
 #define        MC_CMD_PTP_OUT_GET_ATTRIBUTES_REPORT_SYNC_STATUS_LBN 0
 #define        MC_CMD_PTP_OUT_GET_ATTRIBUTES_REPORT_SYNC_STATUS_WIDTH 1
+#define        MC_CMD_PTP_OUT_GET_ATTRIBUTES_RX_TSTAMP_OOB_LBN 1
+#define        MC_CMD_PTP_OUT_GET_ATTRIBUTES_RX_TSTAMP_OOB_WIDTH 1
 #define       MC_CMD_PTP_OUT_GET_ATTRIBUTES_RESERVED0_OFST 12
 #define       MC_CMD_PTP_OUT_GET_ATTRIBUTES_RESERVED1_OFST 16
 #define       MC_CMD_PTP_OUT_GET_ATTRIBUTES_RESERVED2_OFST 20
@@ -2211,6 +2218,10 @@
 #define          MC_CMD_FW_HIGH_TX_RATE 0x3
 /* enum: Reserved value */
 #define          MC_CMD_FW_PACKED_STREAM_HASH_MODE_1 0x4
+/* enum: Prefer to use firmware with additional "rules engine" filtering
+ * support
+ */
+#define          MC_CMD_FW_RULES_ENGINE 0x5
 /* enum: Only this option is allowed for non-admin functions */
 #define          MC_CMD_FW_DONT_CARE  0xffffffff
 
@@ -3654,12 +3665,27 @@
 
 #define MC_CMD_0x38_PRIVILEGE_CTG SRIOV_CTG_ADMIN
 
-/* MC_CMD_NVRAM_UPDATE_START_IN msgrequest */
+/* MC_CMD_NVRAM_UPDATE_START_IN msgrequest: Legacy NVRAM_UPDATE_START request.
+ * Use NVRAM_UPDATE_START_V2_IN in new code
+ */
 #define    MC_CMD_NVRAM_UPDATE_START_IN_LEN 4
 #define       MC_CMD_NVRAM_UPDATE_START_IN_TYPE_OFST 0
 /*            Enum values, see field(s): */
 /*               MC_CMD_NVRAM_TYPES/MC_CMD_NVRAM_TYPES_OUT/TYPES */
 
+/* MC_CMD_NVRAM_UPDATE_START_V2_IN msgrequest: Extended NVRAM_UPDATE_START
+ * request with additional flags indicating version of command in use. See
+ * MC_CMD_NVRAM_UPDATE_FINISH_V2_OUT for details of extended functionality. Use
+ * paired up with NVRAM_UPDATE_FINISH_V2_IN.
+ */
+#define    MC_CMD_NVRAM_UPDATE_START_V2_IN_LEN 8
+#define       MC_CMD_NVRAM_UPDATE_START_V2_IN_TYPE_OFST 0
+/*            Enum values, see field(s): */
+/*               MC_CMD_NVRAM_TYPES/MC_CMD_NVRAM_TYPES_OUT/TYPES */
+#define       MC_CMD_NVRAM_UPDATE_START_V2_IN_FLAGS_OFST 4
+#define        MC_CMD_NVRAM_UPDATE_START_V2_IN_FLAG_REPORT_VERIFY_RESULT_LBN 0
+#define        MC_CMD_NVRAM_UPDATE_START_V2_IN_FLAG_REPORT_VERIFY_RESULT_WIDTH 1
+
 /* MC_CMD_NVRAM_UPDATE_START_OUT msgresponse */
 #define    MC_CMD_NVRAM_UPDATE_START_OUT_LEN 0
 
@@ -3784,16 +3810,81 @@
 
 #define MC_CMD_0x3c_PRIVILEGE_CTG SRIOV_CTG_ADMIN
 
-/* MC_CMD_NVRAM_UPDATE_FINISH_IN msgrequest */
+/* MC_CMD_NVRAM_UPDATE_FINISH_IN msgrequest: Legacy NVRAM_UPDATE_FINISH
+ * request. Use NVRAM_UPDATE_FINISH_V2_IN in new code
+ */
 #define    MC_CMD_NVRAM_UPDATE_FINISH_IN_LEN 8
 #define       MC_CMD_NVRAM_UPDATE_FINISH_IN_TYPE_OFST 0
 /*            Enum values, see field(s): */
 /*               MC_CMD_NVRAM_TYPES/MC_CMD_NVRAM_TYPES_OUT/TYPES */
 #define       MC_CMD_NVRAM_UPDATE_FINISH_IN_REBOOT_OFST 4
 
-/* MC_CMD_NVRAM_UPDATE_FINISH_OUT msgresponse */
+/* MC_CMD_NVRAM_UPDATE_FINISH_V2_IN msgrequest: Extended NVRAM_UPDATE_FINISH
+ * request with additional flags indicating version of NVRAM_UPDATE commands in
+ * use. See MC_CMD_NVRAM_UPDATE_FINISH_V2_OUT for details of extended
+ * functionality. Use paired up with NVRAM_UPDATE_START_V2_IN.
+ */
+#define    MC_CMD_NVRAM_UPDATE_FINISH_V2_IN_LEN 12
+#define       MC_CMD_NVRAM_UPDATE_FINISH_V2_IN_TYPE_OFST 0
+/*            Enum values, see field(s): */
+/*               MC_CMD_NVRAM_TYPES/MC_CMD_NVRAM_TYPES_OUT/TYPES */
+#define       MC_CMD_NVRAM_UPDATE_FINISH_V2_IN_REBOOT_OFST 4
+#define       MC_CMD_NVRAM_UPDATE_FINISH_V2_IN_FLAGS_OFST 8
+#define        MC_CMD_NVRAM_UPDATE_FINISH_V2_IN_FLAG_REPORT_VERIFY_RESULT_LBN 0
+#define        MC_CMD_NVRAM_UPDATE_FINISH_V2_IN_FLAG_REPORT_VERIFY_RESULT_WIDTH 1
+
+/* MC_CMD_NVRAM_UPDATE_FINISH_OUT msgresponse: Legacy NVRAM_UPDATE_FINISH
+ * response. Use NVRAM_UPDATE_FINISH_V2_OUT in new code
+ */
 #define    MC_CMD_NVRAM_UPDATE_FINISH_OUT_LEN 0
 
+/* MC_CMD_NVRAM_UPDATE_FINISH_V2_OUT msgresponse:
+ *
+ * Extended NVRAM_UPDATE_FINISH response that communicates the result of secure
+ * firmware validation where applicable back to the host.
+ *
+ * Medford only: For signed firmware images, such as those for medford, the MC
+ * firmware verifies the signature before marking the firmware image as valid.
+ * This process takes a few seconds to complete. So is likely to take more than
+ * the MCDI timeout. Hence signature verification is initiated when
+ * MC_CMD_NVRAM_UPDATE_FINISH_V2_IN is received by the firmware, however, the
+ * MCDI command returns immediately with error code EAGAIN. Subsequent
+ * NVRAM_UPDATE_FINISH_V2_IN requests also return EAGAIN if the verification is
+ * in progress. Once the verification has completed, this response payload
+ * includes the results of the signature verification. Note that the nvram lock
+ * in firmware is only released after the verification has completed and the
+ * host has read back the result code from firmware.
+ */
+#define    MC_CMD_NVRAM_UPDATE_FINISH_V2_OUT_LEN 4
+/* Result of nvram update completion processing */
+#define       MC_CMD_NVRAM_UPDATE_FINISH_V2_OUT_RESULT_CODE_OFST 0
+/* enum: Verify succeeded without any errors. */
+#define          MC_CMD_NVRAM_VERIFY_RC_SUCCESS 0x1
+/* enum: CMS format verification failed due to an internal error. */
+#define          MC_CMD_NVRAM_VERIFY_RC_CMS_CHECK_FAILED 0x2
+/* enum: Invalid CMS format in image metadata. */
+#define          MC_CMD_NVRAM_VERIFY_RC_INVALID_CMS_FORMAT 0x3
+/* enum: Message digest verification failed due to an internal error. */
+#define          MC_CMD_NVRAM_VERIFY_RC_MESSAGE_DIGEST_CHECK_FAILED 0x4
+/* enum: Error in message digest calculated over the reflash-header, payload
+ * and reflash-trailer.
+ */
+#define          MC_CMD_NVRAM_VERIFY_RC_BAD_MESSAGE_DIGEST 0x5
+/* enum: Signature verification failed due to an internal error. */
+#define          MC_CMD_NVRAM_VERIFY_RC_SIGNATURE_CHECK_FAILED 0x6
+/* enum: There are no valid signatures in the image. */
+#define          MC_CMD_NVRAM_VERIFY_RC_NO_VALID_SIGNATURES 0x7
+/* enum: Trusted approvers verification failed due to an internal error. */
+#define          MC_CMD_NVRAM_VERIFY_RC_TRUSTED_APPROVERS_CHECK_FAILED 0x8
+/* enum: The Trusted approver's list is empty. */
+#define          MC_CMD_NVRAM_VERIFY_RC_NO_TRUSTED_APPROVERS 0x9
+/* enum: Signature chain verification failed due to an internal error. */
+#define          MC_CMD_NVRAM_VERIFY_RC_SIGNATURE_CHAIN_CHECK_FAILED 0xa
+/* enum: The signers of the signatures in the image are not listed in the
+ * Trusted approver's list.
+ */
+#define          MC_CMD_NVRAM_VERIFY_RC_NO_SIGNATURE_MATCH 0xb
+
 
 /***********************************/
 /* MC_CMD_REBOOT
@@ -4356,6 +4447,28 @@
 /* MC_CMD_TESTASSERT_OUT msgresponse */
 #define    MC_CMD_TESTASSERT_OUT_LEN 0
 
+/* MC_CMD_TESTASSERT_V2_IN msgrequest */
+#define    MC_CMD_TESTASSERT_V2_IN_LEN 4
+/* How to provoke the assertion */
+#define       MC_CMD_TESTASSERT_V2_IN_TYPE_OFST 0
+/* enum: Assert using the FAIL_ASSERTION_WITH_USEFUL_VALUES macro. Unless
+ * you're testing firmware, this is what you want.
+ */
+#define          MC_CMD_TESTASSERT_V2_IN_FAIL_ASSERTION_WITH_USEFUL_VALUES  0x0
+/* enum: Assert using assert(0); */
+#define          MC_CMD_TESTASSERT_V2_IN_ASSERT_FALSE  0x1
+/* enum: Deliberately trigger a watchdog */
+#define          MC_CMD_TESTASSERT_V2_IN_WATCHDOG  0x2
+/* enum: Deliberately trigger a trap by loading from an invalid address */
+#define          MC_CMD_TESTASSERT_V2_IN_LOAD_TRAP  0x3
+/* enum: Deliberately trigger a trap by storing to an invalid address */
+#define          MC_CMD_TESTASSERT_V2_IN_STORE_TRAP  0x4
+/* enum: Jump to an invalid address */
+#define          MC_CMD_TESTASSERT_V2_IN_JUMP_TRAP  0x5
+
+/* MC_CMD_TESTASSERT_V2_OUT msgresponse */
+#define    MC_CMD_TESTASSERT_V2_OUT_LEN 0
+
 
 /***********************************/
 /* MC_CMD_WORKAROUND
@@ -4421,6 +4534,7 @@
  * (GET_PHY_CFG_OUT_MEDIA_TYPE); the valid 'page number' input values, and the
  * output data, are interpreted on a per-type basis. For SFP+: PAGE=0 or 1
  * returns a 128-byte block read from module I2C address 0xA0 offset 0 or 0x80.
+ * Anything else: currently undefined. Locks required: None. Return code: 0.
  */
 #define MC_CMD_GET_PHY_MEDIA_INFO 0x4b
 
@@ -5362,12 +5476,14 @@
 #define          NVRAM_PARTITION_TYPE_EXPANSION_UEFI       0xd00
 /* enum: Spare partition 0 */
 #define          NVRAM_PARTITION_TYPE_SPARE_0              0x1000
-/* enum: Spare partition 1 */
-#define          NVRAM_PARTITION_TYPE_SPARE_1              0x1100
+/* enum: Used for XIP code of shmbooted images */
+#define          NVRAM_PARTITION_TYPE_XIP_SCRATCH          0x1100
 /* enum: Spare partition 2 */
 #define          NVRAM_PARTITION_TYPE_SPARE_2              0x1200
-/* enum: Spare partition 3 */
-#define          NVRAM_PARTITION_TYPE_SPARE_3              0x1300
+/* enum: Manufacturing partition. Used during manufacture to pass information
+ * between XJTAG and Manftest.
+ */
+#define          NVRAM_PARTITION_TYPE_MANUFACTURING        0x1300
 /* enum: Spare partition 4 */
 #define          NVRAM_PARTITION_TYPE_SPARE_4              0x1400
 /* enum: Spare partition 5 */
@@ -5402,6 +5518,14 @@
 #define          LICENSED_APP_ID_CAPTURE_SOLARSYSTEM     0x40
 /* enum: Network Access Control */
 #define          LICENSED_APP_ID_NETWORK_ACCESS_CONTROL  0x80
+/* enum: TCP Direct */
+#define          LICENSED_APP_ID_TCP_DIRECT              0x100
+/* enum: Low Latency */
+#define          LICENSED_APP_ID_LOW_LATENCY             0x200
+/* enum: SolarCapture Tap */
+#define          LICENSED_APP_ID_SOLARCAPTURE_TAP        0x400
+/* enum: Capture SolarSystem 40G */
+#define          LICENSED_APP_ID_CAPTURE_SOLARSYSTEM_40G 0x800
 #define       LICENSED_APP_ID_ID_LBN 0
 #define       LICENSED_APP_ID_ID_WIDTH 32
 
@@ -5458,6 +5582,14 @@
 #define        LICENSED_V3_APPS_CAPTURE_SOLARSYSTEM_WIDTH 1
 #define        LICENSED_V3_APPS_NETWORK_ACCESS_CONTROL_LBN 7
 #define        LICENSED_V3_APPS_NETWORK_ACCESS_CONTROL_WIDTH 1
+#define        LICENSED_V3_APPS_TCP_DIRECT_LBN 8
+#define        LICENSED_V3_APPS_TCP_DIRECT_WIDTH 1
+#define        LICENSED_V3_APPS_LOW_LATENCY_LBN 9
+#define        LICENSED_V3_APPS_LOW_LATENCY_WIDTH 1
+#define        LICENSED_V3_APPS_SOLARCAPTURE_TAP_LBN 10
+#define        LICENSED_V3_APPS_SOLARCAPTURE_TAP_WIDTH 1
+#define        LICENSED_V3_APPS_CAPTURE_SOLARSYSTEM_40G_LBN 11
+#define        LICENSED_V3_APPS_CAPTURE_SOLARSYSTEM_40G_WIDTH 1
 #define       LICENSED_V3_APPS_MASK_LBN 0
 #define       LICENSED_V3_APPS_MASK_WIDTH 64
 
@@ -5988,6 +6120,8 @@
 #define        MC_CMD_INIT_TXQ_EXT_IN_FLAG_INNER_TCP_CSUM_EN_WIDTH 1
 #define        MC_CMD_INIT_TXQ_EXT_IN_FLAG_TSOV2_EN_LBN 12
 #define        MC_CMD_INIT_TXQ_EXT_IN_FLAG_TSOV2_EN_WIDTH 1
+#define        MC_CMD_INIT_TXQ_EXT_IN_FLAG_CTPIO_LBN 13
+#define        MC_CMD_INIT_TXQ_EXT_IN_FLAG_CTPIO_WIDTH 1
 /* Owner ID to use if in buffer mode (zero if physical) */
 #define       MC_CMD_INIT_TXQ_EXT_IN_OWNER_ID_OFST 20
 /* The port ID associated with the v-adaptor which should contain this DMAQ. */
@@ -7728,6 +7862,8 @@
  * tests (Medford development only)
  */
 #define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_LAYER2_PERF  0x7
+/* enum: Rules engine RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_RULES_ENGINE  0x8
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
 #define          MC_CMD_GET_CAPABILITIES_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
 /* enum: RX PD firmware parsing but not filtering network overlay tunnel
@@ -7763,6 +7899,8 @@
  * tests (Medford development only)
  */
 #define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_LAYER2_PERF  0x7
+/* enum: Rules engine TX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_RULES_ENGINE  0x8
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
 #define          MC_CMD_GET_CAPABILITIES_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
 /* Hardware capabilities of NIC */
@@ -7913,6 +8051,8 @@
  * tests (Medford development only)
  */
 #define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_LAYER2_PERF  0x7
+/* enum: Rules engine RX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_RULES_ENGINE  0x8
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
 #define          MC_CMD_GET_CAPABILITIES_V2_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
 /* enum: RX PD firmware parsing but not filtering network overlay tunnel
@@ -7948,6 +8088,8 @@
  * tests (Medford development only)
  */
 #define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_LAYER2_PERF  0x7
+/* enum: Rules engine TX PD production firmware */
+#define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_RULES_ENGINE  0x8
 /* enum: RX PD firmware for GUE parsing prototype (Medford development only) */
 #define          MC_CMD_GET_CAPABILITIES_V2_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE  0xe
 /* Hardware capabilities of NIC */
@@ -7980,6 +8122,8 @@
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_RX_SNIFF_WIDTH 1
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_SNIFF_LBN 11
 #define        MC_CMD_GET_CAPABILITIES_V2_OUT_TX_SNIFF_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_NVRAM_UPDATE_REPORT_VERIFY_RESULT_LBN 12
+#define        MC_CMD_GET_CAPABILITIES_V2_OUT_NVRAM_UPDATE_REPORT_VERIFY_RESULT_WIDTH 1
 /* Number of FATSOv2 contexts per datapath supported by this NIC. Not present
  * on older firmware (check the length).
  */
@@ -8247,6 +8391,8 @@
 #define        MC_CMD_GET_CAPABILITIES_V3_OUT_RX_SNIFF_WIDTH 1
 #define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_SNIFF_LBN 11
 #define        MC_CMD_GET_CAPABILITIES_V3_OUT_TX_SNIFF_WIDTH 1
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_NVRAM_UPDATE_REPORT_VERIFY_RESULT_LBN 12
+#define        MC_CMD_GET_CAPABILITIES_V3_OUT_NVRAM_UPDATE_REPORT_VERIFY_RESULT_WIDTH 1
 /* Number of FATSOv2 contexts per datapath supported by this NIC. Not present
  * on older firmware (check the length).
  */
@@ -8304,7 +8450,7 @@
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_SIZE_PIO_BUFF_LEN 2
 /* On chips later than Medford the amount of address space assigned to each VI
  * is configurable. This is a global setting that the driver must query to
- * discover the VI to address mapping. Cut-through PIO (CTPIO) in not available
+ * discover the VI to address mapping. Cut-through PIO (CTPIO) is not available
  * with 8k VI windows.
  */
 #define       MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_OFST 72
@@ -10283,6 +10429,8 @@
  * more data is returned.
  */
 #define          MC_CMD_PCIE_TUNE_IN_POLL_EYE_PLOT  0x6
+/* enum: Enable the SERDES BIST and set it to generate a 200MHz square wave */
+#define          MC_CMD_PCIE_TUNE_IN_BIST_SQUARE_WAVE  0x7
 /* Align the arguments to 32 bits */
 #define       MC_CMD_PCIE_TUNE_IN_PCIE_TUNE_RSVD_OFST 1
 #define       MC_CMD_PCIE_TUNE_IN_PCIE_TUNE_RSVD_LEN 3
@@ -10468,6 +10616,12 @@
 #define       MC_CMD_PCIE_TUNE_POLL_EYE_PLOT_OUT_SAMPLES_MINNUM 0
 #define       MC_CMD_PCIE_TUNE_POLL_EYE_PLOT_OUT_SAMPLES_MAXNUM 126
 
+/* MC_CMD_PCIE_TUNE_BIST_SQUARE_WAVE_IN msgrequest */
+#define    MC_CMD_PCIE_TUNE_BIST_SQUARE_WAVE_IN_LEN 0
+
+/* MC_CMD_PCIE_TUNE_BIST_SQUARE_WAVE_OUT msgrequest */
+#define    MC_CMD_PCIE_TUNE_BIST_SQUARE_WAVE_OUT_LEN 0
+
 
 /***********************************/
 /* MC_CMD_LICENSING
@@ -10783,29 +10937,45 @@
 #define MC_CMD_0xd4_PRIVILEGE_CTG SRIOV_CTG_GENERAL
 
 /* MC_CMD_LICENSED_V3_VALIDATE_APP_IN msgrequest */
-#define    MC_CMD_LICENSED_V3_VALIDATE_APP_IN_LEN 72
+#define    MC_CMD_LICENSED_V3_VALIDATE_APP_IN_LEN 56
+/* challenge for validation (384 bits) */
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_IN_CHALLENGE_OFST 0
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_IN_CHALLENGE_LEN 48
 /* application ID expressed as a single bit mask */
-#define       MC_CMD_LICENSED_V3_VALIDATE_APP_IN_APP_ID_OFST 0
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_IN_APP_ID_OFST 48
 #define       MC_CMD_LICENSED_V3_VALIDATE_APP_IN_APP_ID_LEN 8
-#define       MC_CMD_LICENSED_V3_VALIDATE_APP_IN_APP_ID_LO_OFST 0
-#define       MC_CMD_LICENSED_V3_VALIDATE_APP_IN_APP_ID_HI_OFST 4
-/* challenge for validation */
-#define       MC_CMD_LICENSED_V3_VALIDATE_APP_IN_CHALLENGE_OFST 8
-#define       MC_CMD_LICENSED_V3_VALIDATE_APP_IN_CHALLENGE_LEN 64
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_IN_APP_ID_LO_OFST 48
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_IN_APP_ID_HI_OFST 52
 
 /* MC_CMD_LICENSED_V3_VALIDATE_APP_OUT msgresponse */
-#define    MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_LEN 72
+#define    MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_LEN 116
+/* validation response to challenge in the form of ECDSA signature consisting
+ * of two 384-bit integers, r and s, in big-endian order. The signature signs a
+ * SHA-384 digest of a message constructed from the concatenation of the input
+ * message and the remaining fields of this output message, e.g. challenge[48
+ * bytes] ... expiry_time[4 bytes] ...
+ */
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_RESPONSE_OFST 0
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_RESPONSE_LEN 96
 /* application expiry time */
-#define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_TIME_OFST 0
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_TIME_OFST 96
 /* application expiry units */
-#define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNITS_OFST 4
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNITS_OFST 100
 /* enum: expiry units are accounting units */
 #define          MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNIT_ACC  0x0
 /* enum: expiry units are calendar days */
 #define          MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_EXPIRY_UNIT_DAYS  0x1
-/* validation response to challenge */
-#define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_RESPONSE_OFST 8
-#define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_RESPONSE_LEN 64
+/* base MAC address of the NIC stored in NVRAM (note that this is a constant
+ * value for a given NIC regardless which function is calling, effectively this
+ * is PF0 base MAC address)
+ */
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_BASE_MACADDR_OFST 104
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_BASE_MACADDR_LEN 6
+/* MAC address of v-adaptor associated with the client. If no such v-adapator
+ * exists, then the field is filled with 0xFF.
+ */
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_VADAPTOR_MACADDR_OFST 110
+#define       MC_CMD_LICENSED_V3_VALIDATE_APP_OUT_VADAPTOR_MACADDR_LEN 6
 
 
 /***********************************/
@@ -10835,6 +11005,70 @@
 
 
 /***********************************/
+/* MC_CMD_LICENSING_V3_TEMPORARY
+ * Perform operations to support installation of a single temporary license in
+ * the adapter, in addition to those found in the licensing partition. See
+ * SF-116124-SW for an overview of how this could be used. The license is
+ * stored in MC persistent data and so will survive a MC reboot, but will be
+ * erased when the adapter is power cycled
+ */
+#define MC_CMD_LICENSING_V3_TEMPORARY 0xd6
+
+#define MC_CMD_0xd6_PRIVILEGE_CTG SRIOV_CTG_GENERAL
+
+/* MC_CMD_LICENSING_V3_TEMPORARY_IN msgrequest */
+#define    MC_CMD_LICENSING_V3_TEMPORARY_IN_LEN 4
+/* operation code */
+#define       MC_CMD_LICENSING_V3_TEMPORARY_IN_OP_OFST 0
+/* enum: install a new license, overwriting any existing temporary license.
+ * This is an asynchronous operation owing to the time taken to validate an
+ * ECDSA license
+ */
+#define          MC_CMD_LICENSING_V3_TEMPORARY_SET  0x0
+/* enum: clear the license immediately rather than waiting for the next power
+ * cycle
+ */
+#define          MC_CMD_LICENSING_V3_TEMPORARY_CLEAR  0x1
+/* enum: get the status of the asynchronous MC_CMD_LICENSING_V3_TEMPORARY_SET
+ * operation
+ */
+#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS  0x2
+
+/* MC_CMD_LICENSING_V3_TEMPORARY_IN_SET msgrequest */
+#define    MC_CMD_LICENSING_V3_TEMPORARY_IN_SET_LEN 164
+#define       MC_CMD_LICENSING_V3_TEMPORARY_IN_SET_OP_OFST 0
+/* ECDSA license and signature */
+#define       MC_CMD_LICENSING_V3_TEMPORARY_IN_SET_LICENSE_OFST 4
+#define       MC_CMD_LICENSING_V3_TEMPORARY_IN_SET_LICENSE_LEN 160
+
+/* MC_CMD_LICENSING_V3_TEMPORARY_IN_CLEAR msgrequest */
+#define    MC_CMD_LICENSING_V3_TEMPORARY_IN_CLEAR_LEN 4
+#define       MC_CMD_LICENSING_V3_TEMPORARY_IN_CLEAR_OP_OFST 0
+
+/* MC_CMD_LICENSING_V3_TEMPORARY_IN_STATUS msgrequest */
+#define    MC_CMD_LICENSING_V3_TEMPORARY_IN_STATUS_LEN 4
+#define       MC_CMD_LICENSING_V3_TEMPORARY_IN_STATUS_OP_OFST 0
+
+/* MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS msgresponse */
+#define    MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS_LEN 12
+/* status code */
+#define       MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS_STATUS_OFST 0
+/* enum: finished validating and installing license */
+#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS_OK  0x0
+/* enum: license validation and installation in progress */
+#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS_IN_PROGRESS  0x1
+/* enum: licensing error. More specific error messages are not provided to
+ * avoid exposing details of the licensing system to the client
+ */
+#define          MC_CMD_LICENSING_V3_TEMPORARY_STATUS_ERROR  0x2
+/* bitmask of licensed features */
+#define       MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS_LICENSED_FEATURES_OFST 4
+#define       MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS_LICENSED_FEATURES_LEN 8
+#define       MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS_LICENSED_FEATURES_LO_OFST 4
+#define       MC_CMD_LICENSING_V3_TEMPORARY_OUT_STATUS_LICENSED_FEATURES_HI_OFST 8
+
+
+/***********************************/
 /* MC_CMD_SET_PORT_SNIFF_CONFIG
  * Configure RX port sniffing for the physical port associated with the calling
  * function. Only a privileged function may change the port sniffing
@@ -11705,6 +11939,66 @@
 /* MC_CMD_RX_BALANCING_OUT msgresponse */
 #define    MC_CMD_RX_BALANCING_OUT_LEN 0
 
+
+/***********************************/
+/* MC_CMD_NVRAM_PRIVATE_APPEND
+ * Append a single TLV to the MC_USAGE_TLV partition. Returns MC_CMD_ERR_EEXIST
+ * if the tag is already present.
+ */
+#define MC_CMD_NVRAM_PRIVATE_APPEND 0x11c
+
+#define MC_CMD_0x11c_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+
+/* MC_CMD_NVRAM_PRIVATE_APPEND_IN msgrequest */
+#define    MC_CMD_NVRAM_PRIVATE_APPEND_IN_LENMIN 9
+#define    MC_CMD_NVRAM_PRIVATE_APPEND_IN_LENMAX 252
+#define    MC_CMD_NVRAM_PRIVATE_APPEND_IN_LEN(num) (8+1*(num))
+/* The tag to be appended */
+#define       MC_CMD_NVRAM_PRIVATE_APPEND_IN_TAG_OFST 0
+/* The length of the data */
+#define       MC_CMD_NVRAM_PRIVATE_APPEND_IN_LENGTH_OFST 4
+/* The data to be contained in the TLV structure */
+#define       MC_CMD_NVRAM_PRIVATE_APPEND_IN_DATA_BUFFER_OFST 8
+#define       MC_CMD_NVRAM_PRIVATE_APPEND_IN_DATA_BUFFER_LEN 1
+#define       MC_CMD_NVRAM_PRIVATE_APPEND_IN_DATA_BUFFER_MINNUM 1
+#define       MC_CMD_NVRAM_PRIVATE_APPEND_IN_DATA_BUFFER_MAXNUM 244
+
+/* MC_CMD_NVRAM_PRIVATE_APPEND_OUT msgresponse */
+#define    MC_CMD_NVRAM_PRIVATE_APPEND_OUT_LEN 0
+
+
+/***********************************/
+/* MC_CMD_XPM_VERIFY_CONTENTS
+ * Verify that the contents of the XPM memory is correct (Medford only). This
+ * is used during manufacture to check that the XPM memory has been programmed
+ * correctly at ATE.
+ */
+#define MC_CMD_XPM_VERIFY_CONTENTS 0x11b
+
+#define MC_CMD_0x11b_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+
+/* MC_CMD_XPM_VERIFY_CONTENTS_IN msgrequest */
+#define    MC_CMD_XPM_VERIFY_CONTENTS_IN_LEN 4
+/* Data type to be checked */
+#define       MC_CMD_XPM_VERIFY_CONTENTS_IN_DATA_TYPE_OFST 0
+
+/* MC_CMD_XPM_VERIFY_CONTENTS_OUT msgresponse */
+#define    MC_CMD_XPM_VERIFY_CONTENTS_OUT_LENMIN 12
+#define    MC_CMD_XPM_VERIFY_CONTENTS_OUT_LENMAX 252
+#define    MC_CMD_XPM_VERIFY_CONTENTS_OUT_LEN(num) (12+1*(num))
+/* Number of sectors found (test builds only) */
+#define       MC_CMD_XPM_VERIFY_CONTENTS_OUT_NUM_SECTORS_OFST 0
+/* Number of bytes found (test builds only) */
+#define       MC_CMD_XPM_VERIFY_CONTENTS_OUT_NUM_BYTES_OFST 4
+/* Length of signature */
+#define       MC_CMD_XPM_VERIFY_CONTENTS_OUT_SIG_LENGTH_OFST 8
+/* Signature */
+#define       MC_CMD_XPM_VERIFY_CONTENTS_OUT_SIGNATURE_OFST 12
+#define       MC_CMD_XPM_VERIFY_CONTENTS_OUT_SIGNATURE_LEN 1
+#define       MC_CMD_XPM_VERIFY_CONTENTS_OUT_SIGNATURE_MINNUM 0
+#define       MC_CMD_XPM_VERIFY_CONTENTS_OUT_SIGNATURE_MAXNUM 240
+
+
 /***********************************/
 /* MC_CMD_SET_EVQ_TMR
  * Update the timer load, timer reload and timer mode values for a given EVQ.
@@ -11798,4 +12092,151 @@
  */
 #define       MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_BUG35388_TMR_STEP_OFST 32
 
+
+/***********************************/
+/* MC_CMD_ALLOCATE_TX_VFIFO_CP
+ * When we use the TX_vFIFO_ULL mode, we can allocate common pools using the
+ * non used switch buffers.
+ */
+#define MC_CMD_ALLOCATE_TX_VFIFO_CP 0x11d
+
+#define MC_CMD_0x11d_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+
+/* MC_CMD_ALLOCATE_TX_VFIFO_CP_IN msgrequest */
+#define    MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_LEN 20
+/* Desired instance. Must be set to a specific instance, which is a function
+ * local queue index.
+ */
+#define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_INSTANCE_OFST 0
+/* Will the common pool be used as TX_vFIFO_ULL (1) */
+#define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_MODE_OFST 4
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_ENABLED       0x1 /* enum */
+/* enum: Using this interface without TX_vFIFO_ULL is not supported for now */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_DISABLED      0x0
+/* Number of buffers to reserve for the common pool */
+#define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_SIZE_OFST 8
+/* TX datapath to which the Common Pool is connected to. */
+#define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_INGRESS_OFST 12
+/* enum: Extracts information from function */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_USE_FUNCTION_VALUE          -0x1
+/* Network port or RX Engine to which the common pool connects. */
+#define       MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_EGRESS_OFST 16
+/* enum: Extracts information from function */
+/*               MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_USE_FUNCTION_VALUE          -0x1 */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT0          0x0 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT1          0x1 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT2          0x2 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_PORT3          0x3 /* enum */
+/* enum: To enable Switch loopback with Rx engine 0 */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_RX_ENGINE0     0x4
+/* enum: To enable Switch loopback with Rx engine 1 */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_CP_IN_RX_ENGINE1     0x5
+
+/* MC_CMD_ALLOCATE_TX_VFIFO_CP_OUT msgresponse */
+#define    MC_CMD_ALLOCATE_TX_VFIFO_CP_OUT_LEN 4
+/* ID of the common pool allocated */
+#define       MC_CMD_ALLOCATE_TX_VFIFO_CP_OUT_CP_ID_OFST 0
+
+
+/***********************************/
+/* MC_CMD_ALLOCATE_TX_VFIFO_VFIFO
+ * When we use the TX_vFIFO_ULL mode, we can allocate vFIFOs using the
+ * previously allocated common pools.
+ */
+#define MC_CMD_ALLOCATE_TX_VFIFO_VFIFO 0x11e
+
+#define MC_CMD_0x11e_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+
+/* MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN msgrequest */
+#define    MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_LEN 20
+/* Common pool previously allocated to which the new vFIFO will be associated
+ */
+#define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_CP_OFST 0
+/* Port or RX engine to associate the vFIFO egress */
+#define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_EGRESS_OFST 4
+/* enum: Extracts information from common pool */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_USE_CP_VALUE   -0x1
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT0          0x0 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT1          0x1 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT2          0x2 /* enum */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PORT3          0x3 /* enum */
+/* enum: To enable Switch loopback with Rx engine 0 */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_RX_ENGINE0     0x4
+/* enum: To enable Switch loopback with Rx engine 1 */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_RX_ENGINE1     0x5
+/* Minimum number of buffers that the pool must have */
+#define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_SIZE_OFST 8
+/* enum: Do not check the space available */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_NO_MINIMUM     0x0
+/* Will the vFIFO be used as TX_vFIFO_ULL */
+#define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_MODE_OFST 12
+/* Network priority of the vFIFO,if applicable */
+#define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_PRIORITY_OFST 16
+/* enum: Search for the lowest unused priority */
+#define          MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_IN_LOWEST_AVAILABLE  -0x1
+
+/* MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_OUT msgresponse */
+#define    MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_OUT_LEN 8
+/* Short vFIFO ID */
+#define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_OUT_VID_OFST 0
+/* Network priority of the vFIFO */
+#define       MC_CMD_ALLOCATE_TX_VFIFO_VFIFO_OUT_PRIORITY_OFST 4
+
+
+/***********************************/
+/* MC_CMD_TEARDOWN_TX_VFIFO_VF
+ * This interface clears the configuration of the given vFIFO and leaves it
+ * ready to be re-used.
+ */
+#define MC_CMD_TEARDOWN_TX_VFIFO_VF 0x11f
+
+#define MC_CMD_0x11f_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+
+/* MC_CMD_TEARDOWN_TX_VFIFO_VF_IN msgrequest */
+#define    MC_CMD_TEARDOWN_TX_VFIFO_VF_IN_LEN 4
+/* Short vFIFO ID */
+#define       MC_CMD_TEARDOWN_TX_VFIFO_VF_IN_VFIFO_OFST 0
+
+/* MC_CMD_TEARDOWN_TX_VFIFO_VF_OUT msgresponse */
+#define    MC_CMD_TEARDOWN_TX_VFIFO_VF_OUT_LEN 0
+
+
+/***********************************/
+/* MC_CMD_DEALLOCATE_TX_VFIFO_CP
+ * This interface clears the configuration of the given common pool and leaves
+ * it ready to be re-used.
+ */
+#define MC_CMD_DEALLOCATE_TX_VFIFO_CP 0x121
+
+#define MC_CMD_0x121_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+
+/* MC_CMD_DEALLOCATE_TX_VFIFO_CP_IN msgrequest */
+#define    MC_CMD_DEALLOCATE_TX_VFIFO_CP_IN_LEN 4
+/* Common pool ID given when pool allocated */
+#define       MC_CMD_DEALLOCATE_TX_VFIFO_CP_IN_POOL_ID_OFST 0
+
+/* MC_CMD_DEALLOCATE_TX_VFIFO_CP_OUT msgresponse */
+#define    MC_CMD_DEALLOCATE_TX_VFIFO_CP_OUT_LEN 0
+
+
+/***********************************/
+/* MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS
+ * This interface allows the host to find out how many common pool buffers are
+ * not yet assigned.
+ */
+#define MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS 0x124
+
+#define MC_CMD_0x124_PRIVILEGE_CTG SRIOV_CTG_ADMIN
+
+/* MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS_IN msgrequest */
+#define    MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS_IN_LEN 0
+
+/* MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS_OUT msgresponse */
+#define    MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS_OUT_LEN 8
+/* Available buffers for the ENG to NET vFIFOs. */
+#define       MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS_OUT_NET_OFST 0
+/* Available buffers for the ENG to ENG and NET to ENG vFIFOs. */
+#define       MC_CMD_SWITCH_GET_UNASSIGNED_BUFFERS_OUT_ENG_OFST 4
+
+
 #endif /* MCDI_PCOL_H */
diff --git a/drivers/net/ethernet/sfc/mcdi_port.c b/drivers/net/ethernet/sfc/mcdi_port.c
index 2a9228a6e4a0..9dcd396784ae 100644
--- a/drivers/net/ethernet/sfc/mcdi_port.c
+++ b/drivers/net/ethernet/sfc/mcdi_port.c
@@ -13,7 +13,6 @@
 
 #include <linux/slab.h>
 #include "efx.h"
-#include "phy.h"
 #include "mcdi.h"
 #include "mcdi_pcol.h"
 #include "nic.h"
@@ -841,7 +840,7 @@ void efx_mcdi_process_link_change(struct efx_nic *efx, efx_qword_t *ev)
 	u32 flags, fcntl, speed, lpa;
 
 	speed = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_SPEED);
-	EFX_BUG_ON_PARANOID(speed >= ARRAY_SIZE(efx_mcdi_event_link_speed));
+	EFX_WARN_ON_PARANOID(speed >= ARRAY_SIZE(efx_mcdi_event_link_speed));
 	speed = efx_mcdi_event_link_speed[speed];
 
 	flags = EFX_QWORD_FIELD(*ev, MCDI_EVENT_LINKCHANGE_LINK_FLAGS);
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index 99d8c82124bb..8692e829b40f 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -41,13 +41,13 @@
  *
  **************************************************************************/
 
-#define EFX_DRIVER_VERSION	"4.0"
+#define EFX_DRIVER_VERSION	"4.1"
 
 #ifdef DEBUG
-#define EFX_BUG_ON_PARANOID(x) BUG_ON(x)
+#define EFX_WARN_ON_ONCE_PARANOID(x) WARN_ON_ONCE(x)
 #define EFX_WARN_ON_PARANOID(x) WARN_ON(x)
 #else
-#define EFX_BUG_ON_PARANOID(x) do {} while (0)
+#define EFX_WARN_ON_ONCE_PARANOID(x) do {} while (0)
 #define EFX_WARN_ON_PARANOID(x) do {} while (0)
 #endif
 
@@ -139,8 +139,6 @@ struct efx_special_buffer {
  * struct efx_tx_buffer - buffer state for a TX descriptor
  * @skb: When @flags & %EFX_TX_BUF_SKB, the associated socket buffer to be
  *	freed when descriptor completes
- * @heap_buf: When @flags & %EFX_TX_BUF_HEAP, the associated heap buffer to be
- *	freed when descriptor completes.
  * @option: When @flags & %EFX_TX_BUF_OPTION, a NIC-specific option descriptor.
  * @dma_addr: DMA address of the fragment.
  * @flags: Flags for allocation and DMA mapping type
@@ -151,10 +149,7 @@ struct efx_special_buffer {
  * Only valid if @unmap_len != 0.
  */
 struct efx_tx_buffer {
-	union {
-		const struct sk_buff *skb;
-		void *heap_buf;
-	};
+	const struct sk_buff *skb;
 	union {
 		efx_qword_t option;
 		dma_addr_t dma_addr;
@@ -166,7 +161,6 @@ struct efx_tx_buffer {
 };
 #define EFX_TX_BUF_CONT		1	/* not last descriptor of packet */
 #define EFX_TX_BUF_SKB		2	/* buffer is last part of skb */
-#define EFX_TX_BUF_HEAP		4	/* buffer was allocated with kmalloc() */
 #define EFX_TX_BUF_MAP_SINGLE	8	/* buffer was mapped with dma_map_single() */
 #define EFX_TX_BUF_OPTION	0x10	/* empty buffer for option descriptor */
 
@@ -189,13 +183,16 @@ struct efx_tx_buffer {
  * @channel: The associated channel
  * @core_txq: The networking core TX queue structure
  * @buffer: The software buffer ring
- * @tsoh_page: Array of pages of TSO header buffers
+ * @cb_page: Array of pages of copy buffers.  Carved up according to
+ *	%EFX_TX_CB_ORDER into %EFX_TX_CB_SIZE-sized chunks.
  * @txd: The hardware descriptor ring
  * @ptr_mask: The size of the ring minus 1.
  * @piobuf: PIO buffer region for this TX queue (shared with its partner).
  *	Size of the region is efx_piobuf_size.
  * @piobuf_offset: Buffer offset to be specified in PIO descriptors
  * @initialised: Has hardware queue been initialised?
+ * @handle_tso: TSO xmit preparation handler.  Sets up the TSO metadata and
+ *	may also map tx data, depending on the nature of the TSO implementation.
  * @read_count: Current read pointer.
  *	This is the number of buffers that have been removed from both rings.
  * @old_write_count: The value of @write_count when last checked.
@@ -221,9 +218,11 @@ struct efx_tx_buffer {
  * @tso_long_headers: Number of packets with headers too long for standard
  *	blocks
  * @tso_packets: Number of packets via the TSO xmit path
+ * @tso_fallbacks: Number of times TSO fallback used
  * @pushes: Number of times the TX push feature has been used
  * @pio_packets: Number of times the TX PIO feature has been used
  * @xmit_more_available: Are any packets waiting to be pushed to the NIC
+ * @cb_packets: Number of times the TX copybreak feature has been used
  * @empty_read_count: If the completion path has seen the queue as empty
  *	and the transmission path has not yet checked this, the value of
  *	@read_count bitwise-added to %EFX_EMPTY_COUNT_VALID; otherwise 0.
@@ -236,13 +235,16 @@ struct efx_tx_queue {
 	struct efx_channel *channel;
 	struct netdev_queue *core_txq;
 	struct efx_tx_buffer *buffer;
-	struct efx_buffer *tsoh_page;
+	struct efx_buffer *cb_page;
 	struct efx_special_buffer txd;
 	unsigned int ptr_mask;
 	void __iomem *piobuf;
 	unsigned int piobuf_offset;
 	bool initialised;
 
+	/* Function pointers used in the fast path. */
+	int (*handle_tso)(struct efx_tx_queue*, struct sk_buff*, bool *);
+
 	/* Members used mainly on the completion path */
 	unsigned int read_count ____cacheline_aligned_in_smp;
 	unsigned int old_write_count;
@@ -257,9 +259,11 @@ struct efx_tx_queue {
 	unsigned int tso_bursts;
 	unsigned int tso_long_headers;
 	unsigned int tso_packets;
+	unsigned int tso_fallbacks;
 	unsigned int pushes;
 	unsigned int pio_packets;
 	bool xmit_more_available;
+	unsigned int cb_packets;
 	/* Statistics to supplement MAC stats */
 	unsigned long tx_packets;
 
@@ -269,6 +273,9 @@ struct efx_tx_queue {
 	atomic_t flush_outstanding;
 };
 
+#define EFX_TX_CB_ORDER	7
+#define EFX_TX_CB_SIZE	(1 << EFX_TX_CB_ORDER) - NET_IP_ALIGN
+
 /**
  * struct efx_rx_buffer - An Efx RX data buffer
  * @dma_addr: DMA base address of the buffer
@@ -853,6 +860,7 @@ struct vfdi_status;
  * @rx_hash_key: Toeplitz hash key for RSS
  * @rx_indir_table: Indirection table for RSS
  * @rx_scatter: Scatter mode enabled for receives
+ * @rx_hash_udp_4tuple: UDP 4-tuple hashing enabled
  * @int_error_count: Number of internal errors seen recently
  * @int_error_expire: Time at which error count will be expired
  * @irq_soft_enabled: Are IRQs soft-enabled? If not, IRQ handler will
@@ -990,6 +998,7 @@ struct efx_nic {
 	u8 rx_hash_key[40];
 	u32 rx_indir_table[128];
 	bool rx_scatter;
+	bool rx_hash_udp_4tuple;
 
 	unsigned int_error_count;
 	unsigned long int_error_expire;
@@ -1210,6 +1219,8 @@ struct efx_mtd_partition {
  *	and tx_type will already have been validated but this operation
  *	must validate and update rx_filter.
  * @set_mac_address: Set the MAC address of the device
+ * @tso_versions: Returns mask of firmware-assisted TSO versions supported.
+ *	If %NULL, then device does not support any TSO version.
  * @revision: Hardware architecture revision
  * @txd_ptr_tbl_base: TX descriptor ring base address
  * @rxd_ptr_tbl_base: RX descriptor ring base address
@@ -1286,6 +1297,8 @@ struct efx_nic_type {
 	void (*tx_init)(struct efx_tx_queue *tx_queue);
 	void (*tx_remove)(struct efx_tx_queue *tx_queue);
 	void (*tx_write)(struct efx_tx_queue *tx_queue);
+	unsigned int (*tx_limit_len)(struct efx_tx_queue *tx_queue,
+				     dma_addr_t dma_addr, unsigned int len);
 	int (*rx_push_rss_config)(struct efx_nic *efx, bool user,
 				  const u32 *rx_indir_table);
 	int (*rx_probe)(struct efx_rx_queue *rx_queue);
@@ -1364,6 +1377,7 @@ struct efx_nic_type {
 	void (*vswitching_remove)(struct efx_nic *efx);
 	int (*get_mac_address)(struct efx_nic *efx, unsigned char *perm_addr);
 	int (*set_mac_address)(struct efx_nic *efx);
+	u32 (*tso_versions)(struct efx_nic *efx);
 
 	int revision;
 	unsigned int txd_ptr_tbl_base;
@@ -1395,7 +1409,7 @@ struct efx_nic_type {
 static inline struct efx_channel *
 efx_get_channel(struct efx_nic *efx, unsigned index)
 {
-	EFX_BUG_ON_PARANOID(index >= efx->n_channels);
+	EFX_WARN_ON_ONCE_PARANOID(index >= efx->n_channels);
 	return efx->channel[index];
 }
 
@@ -1416,8 +1430,8 @@ efx_get_channel(struct efx_nic *efx, unsigned index)
 static inline struct efx_tx_queue *
 efx_get_tx_queue(struct efx_nic *efx, unsigned index, unsigned type)
 {
-	EFX_BUG_ON_PARANOID(index >= efx->n_tx_channels ||
-			    type >= EFX_TXQ_TYPES);
+	EFX_WARN_ON_ONCE_PARANOID(index >= efx->n_tx_channels ||
+				  type >= EFX_TXQ_TYPES);
 	return &efx->channel[efx->tx_channel_offset + index]->tx_queue[type];
 }
 
@@ -1430,8 +1444,8 @@ static inline bool efx_channel_has_tx_queues(struct efx_channel *channel)
 static inline struct efx_tx_queue *
 efx_channel_get_tx_queue(struct efx_channel *channel, unsigned type)
 {
-	EFX_BUG_ON_PARANOID(!efx_channel_has_tx_queues(channel) ||
-			    type >= EFX_TXQ_TYPES);
+	EFX_WARN_ON_ONCE_PARANOID(!efx_channel_has_tx_queues(channel) ||
+				  type >= EFX_TXQ_TYPES);
 	return &channel->tx_queue[type];
 }
 
@@ -1468,7 +1482,7 @@ static inline bool efx_channel_has_rx_queue(struct efx_channel *channel)
 static inline struct efx_rx_queue *
 efx_channel_get_rx_queue(struct efx_channel *channel)
 {
-	EFX_BUG_ON_PARANOID(!efx_channel_has_rx_queue(channel));
+	EFX_WARN_ON_ONCE_PARANOID(!efx_channel_has_rx_queue(channel));
 	return &channel->rx_queue;
 }
 
@@ -1543,4 +1557,32 @@ static inline netdev_features_t efx_supported_features(const struct efx_nic *efx
 	return net_dev->features | net_dev->hw_features;
 }
 
+/* Get the current TX queue insert index. */
+static inline unsigned int
+efx_tx_queue_get_insert_index(const struct efx_tx_queue *tx_queue)
+{
+	return tx_queue->insert_count & tx_queue->ptr_mask;
+}
+
+/* Get a TX buffer. */
+static inline struct efx_tx_buffer *
+__efx_tx_queue_get_insert_buffer(const struct efx_tx_queue *tx_queue)
+{
+	return &tx_queue->buffer[efx_tx_queue_get_insert_index(tx_queue)];
+}
+
+/* Get a TX buffer, checking it's not currently in use. */
+static inline struct efx_tx_buffer *
+efx_tx_queue_get_insert_buffer(const struct efx_tx_queue *tx_queue)
+{
+	struct efx_tx_buffer *buffer =
+		__efx_tx_queue_get_insert_buffer(tx_queue);
+
+	EFX_WARN_ON_ONCE_PARANOID(buffer->len);
+	EFX_WARN_ON_ONCE_PARANOID(buffer->flags);
+	EFX_WARN_ON_ONCE_PARANOID(buffer->unmap_len);
+
+	return buffer;
+}
+
 #endif /* EFX_NET_DRIVER_H */
diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h
index 73bee7ea332a..223774635cba 100644
--- a/drivers/net/ethernet/sfc/nic.h
+++ b/drivers/net/ethernet/sfc/nic.h
@@ -18,11 +18,8 @@
 #include "mcdi.h"
 
 enum {
-	EFX_REV_FALCON_A0 = 0,
-	EFX_REV_FALCON_A1 = 1,
-	EFX_REV_FALCON_B0 = 2,
-	EFX_REV_SIENA_A0 = 3,
-	EFX_REV_HUNT_A0 = 4,
+	EFX_REV_SIENA_A0 = 0,
+	EFX_REV_HUNT_A0 = 1,
 };
 
 static inline int efx_nic_rev(struct efx_nic *efx)
@@ -32,12 +29,6 @@ static inline int efx_nic_rev(struct efx_nic *efx)
 
 u32 efx_farch_fpga_ver(struct efx_nic *efx);
 
-/* NIC has two interlinked PCI functions for the same port. */
-static inline bool efx_nic_is_dual_func(struct efx_nic *efx)
-{
-	return efx_nic_rev(efx) < EFX_REV_FALCON_B0;
-}
-
 /* Read the current event from the event queue */
 static inline efx_qword_t *efx_event(struct efx_channel *channel,
 				     unsigned int index)
@@ -144,11 +135,6 @@ enum {
 	PHY_TYPE_SFT9001B = 10,
 };
 
-#define FALCON_XMAC_LOOPBACKS			\
-	((1 << LOOPBACK_XGMII) |		\
-	 (1 << LOOPBACK_XGXS) |			\
-	 (1 << LOOPBACK_XAUI))
-
 /* Alignment of PCIe DMA boundaries (4KB) */
 #define EFX_PAGE_SIZE	4096
 /* Size and alignment of buffer table entries (same) */
@@ -161,160 +147,6 @@ enum {
 	GENERIC_STAT_COUNT
 };
 
-/**
- * struct falcon_board_type - board operations and type information
- * @id: Board type id, as found in NVRAM
- * @init: Allocate resources and initialise peripheral hardware
- * @init_phy: Do board-specific PHY initialisation
- * @fini: Shut down hardware and free resources
- * @set_id_led: Set state of identifying LED or revert to automatic function
- * @monitor: Board-specific health check function
- */
-struct falcon_board_type {
-	u8 id;
-	int (*init) (struct efx_nic *nic);
-	void (*init_phy) (struct efx_nic *efx);
-	void (*fini) (struct efx_nic *nic);
-	void (*set_id_led) (struct efx_nic *efx, enum efx_led_mode mode);
-	int (*monitor) (struct efx_nic *nic);
-};
-
-/**
- * struct falcon_board - board information
- * @type: Type of board
- * @major: Major rev. ('A', 'B' ...)
- * @minor: Minor rev. (0, 1, ...)
- * @i2c_adap: I2C adapter for on-board peripherals
- * @i2c_data: Data for bit-banging algorithm
- * @hwmon_client: I2C client for hardware monitor
- * @ioexp_client: I2C client for power/port control
- */
-struct falcon_board {
-	const struct falcon_board_type *type;
-	int major;
-	int minor;
-	struct i2c_adapter i2c_adap;
-	struct i2c_algo_bit_data i2c_data;
-	struct i2c_client *hwmon_client, *ioexp_client;
-};
-
-/**
- * struct falcon_spi_device - a Falcon SPI (Serial Peripheral Interface) device
- * @device_id:		Controller's id for the device
- * @size:		Size (in bytes)
- * @addr_len:		Number of address bytes in read/write commands
- * @munge_address:	Flag whether addresses should be munged.
- *	Some devices with 9-bit addresses (e.g. AT25040A EEPROM)
- *	use bit 3 of the command byte as address bit A8, rather
- *	than having a two-byte address.  If this flag is set, then
- *	commands should be munged in this way.
- * @erase_command:	Erase command (or 0 if sector erase not needed).
- * @erase_size:		Erase sector size (in bytes)
- *	Erase commands affect sectors with this size and alignment.
- *	This must be a power of two.
- * @block_size:		Write block size (in bytes).
- *	Write commands are limited to blocks with this size and alignment.
- */
-struct falcon_spi_device {
-	int device_id;
-	unsigned int size;
-	unsigned int addr_len;
-	unsigned int munge_address:1;
-	u8 erase_command;
-	unsigned int erase_size;
-	unsigned int block_size;
-};
-
-static inline bool falcon_spi_present(const struct falcon_spi_device *spi)
-{
-	return spi->size != 0;
-}
-
-enum {
-	FALCON_STAT_tx_bytes = GENERIC_STAT_COUNT,
-	FALCON_STAT_tx_packets,
-	FALCON_STAT_tx_pause,
-	FALCON_STAT_tx_control,
-	FALCON_STAT_tx_unicast,
-	FALCON_STAT_tx_multicast,
-	FALCON_STAT_tx_broadcast,
-	FALCON_STAT_tx_lt64,
-	FALCON_STAT_tx_64,
-	FALCON_STAT_tx_65_to_127,
-	FALCON_STAT_tx_128_to_255,
-	FALCON_STAT_tx_256_to_511,
-	FALCON_STAT_tx_512_to_1023,
-	FALCON_STAT_tx_1024_to_15xx,
-	FALCON_STAT_tx_15xx_to_jumbo,
-	FALCON_STAT_tx_gtjumbo,
-	FALCON_STAT_tx_non_tcpudp,
-	FALCON_STAT_tx_mac_src_error,
-	FALCON_STAT_tx_ip_src_error,
-	FALCON_STAT_rx_bytes,
-	FALCON_STAT_rx_good_bytes,
-	FALCON_STAT_rx_bad_bytes,
-	FALCON_STAT_rx_packets,
-	FALCON_STAT_rx_good,
-	FALCON_STAT_rx_bad,
-	FALCON_STAT_rx_pause,
-	FALCON_STAT_rx_control,
-	FALCON_STAT_rx_unicast,
-	FALCON_STAT_rx_multicast,
-	FALCON_STAT_rx_broadcast,
-	FALCON_STAT_rx_lt64,
-	FALCON_STAT_rx_64,
-	FALCON_STAT_rx_65_to_127,
-	FALCON_STAT_rx_128_to_255,
-	FALCON_STAT_rx_256_to_511,
-	FALCON_STAT_rx_512_to_1023,
-	FALCON_STAT_rx_1024_to_15xx,
-	FALCON_STAT_rx_15xx_to_jumbo,
-	FALCON_STAT_rx_gtjumbo,
-	FALCON_STAT_rx_bad_lt64,
-	FALCON_STAT_rx_bad_gtjumbo,
-	FALCON_STAT_rx_overflow,
-	FALCON_STAT_rx_symbol_error,
-	FALCON_STAT_rx_align_error,
-	FALCON_STAT_rx_length_error,
-	FALCON_STAT_rx_internal_error,
-	FALCON_STAT_rx_nodesc_drop_cnt,
-	FALCON_STAT_COUNT
-};
-
-/**
- * struct falcon_nic_data - Falcon NIC state
- * @pci_dev2: Secondary function of Falcon A
- * @board: Board state and functions
- * @stats: Hardware statistics
- * @stats_disable_count: Nest count for disabling statistics fetches
- * @stats_pending: Is there a pending DMA of MAC statistics.
- * @stats_timer: A timer for regularly fetching MAC statistics.
- * @spi_flash: SPI flash device
- * @spi_eeprom: SPI EEPROM device
- * @spi_lock: SPI bus lock
- * @mdio_lock: MDIO bus lock
- * @xmac_poll_required: XMAC link state needs polling
- */
-struct falcon_nic_data {
-	struct pci_dev *pci_dev2;
-	struct falcon_board board;
-	u64 stats[FALCON_STAT_COUNT];
-	unsigned int stats_disable_count;
-	bool stats_pending;
-	struct timer_list stats_timer;
-	struct falcon_spi_device spi_flash;
-	struct falcon_spi_device spi_eeprom;
-	struct mutex spi_lock;
-	struct mutex mdio_lock;
-	bool xmac_poll_required;
-};
-
-static inline struct falcon_board *falcon_board(struct efx_nic *efx)
-{
-	struct falcon_nic_data *data = efx->nic_data;
-	return &data->board;
-}
-
 enum {
 	SIENA_STAT_tx_bytes = GENERIC_STAT_COUNT,
 	SIENA_STAT_tx_good_bytes,
@@ -681,6 +513,8 @@ void efx_farch_tx_init(struct efx_tx_queue *tx_queue);
 void efx_farch_tx_fini(struct efx_tx_queue *tx_queue);
 void efx_farch_tx_remove(struct efx_tx_queue *tx_queue);
 void efx_farch_tx_write(struct efx_tx_queue *tx_queue);
+unsigned int efx_farch_tx_limit_len(struct efx_tx_queue *tx_queue,
+				    dma_addr_t dma_addr, unsigned int len);
 int efx_farch_rx_probe(struct efx_rx_queue *rx_queue);
 void efx_farch_rx_init(struct efx_rx_queue *rx_queue);
 void efx_farch_rx_fini(struct efx_rx_queue *rx_queue);
diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
index 77a5364f7a10..60cdb97f58e2 100644
--- a/drivers/net/ethernet/sfc/ptp.c
+++ b/drivers/net/ethernet/sfc/ptp.c
@@ -835,7 +835,7 @@ static int efx_ptp_synchronize(struct efx_nic *efx, unsigned int num_readings)
 	ACCESS_ONCE(*start) = 0;
 	rc = efx_mcdi_rpc_start(efx, MC_CMD_PTP, synch_buf,
 				MC_CMD_PTP_IN_SYNCHRONIZE_LEN);
-	EFX_BUG_ON_PARANOID(rc);
+	EFX_WARN_ON_ONCE_PARANOID(rc);
 
 	/* Wait for start from MCDI (or timeout) */
 	timeout = jiffies + msecs_to_jiffies(MAX_SYNCHRONISE_WAIT_MS);
diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index 02b0b5272c14..5f4ad4f3518f 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -335,7 +335,7 @@ void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic)
 
 	/* Calculate current fill level, and exit if we don't need to fill */
 	fill_level = (rx_queue->added_count - rx_queue->removed_count);
-	EFX_BUG_ON_PARANOID(fill_level > rx_queue->efx->rxq_entries);
+	EFX_WARN_ON_ONCE_PARANOID(fill_level > rx_queue->efx->rxq_entries);
 	if (fill_level >= rx_queue->fast_fill_trigger)
 		goto out;
 
@@ -347,7 +347,7 @@ void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue, bool atomic)
 
 	batch_size = efx->rx_pages_per_batch * efx->rx_bufs_per_page;
 	space = rx_queue->max_fill - fill_level;
-	EFX_BUG_ON_PARANOID(space < batch_size);
+	EFX_WARN_ON_ONCE_PARANOID(space < batch_size);
 
 	netif_vdbg(rx_queue->efx, rx_status, rx_queue->efx->net_dev,
 		   "RX queue %d fast-filling descriptor ring from"
@@ -400,21 +400,10 @@ static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue,
 	 */
 	rx_buf->flags |= EFX_RX_PKT_DISCARD;
 
-	if ((len > rx_buf->len) && EFX_WORKAROUND_8071(efx)) {
-		if (net_ratelimit())
-			netif_err(efx, rx_err, efx->net_dev,
-				  " RX queue %d seriously overlength "
-				  "RX event (0x%x > 0x%x+0x%x). Leaking\n",
-				  efx_rx_queue_index(rx_queue), len, max_len,
-				  efx->type->rx_buffer_padding);
-		efx_schedule_reset(efx, RESET_TYPE_RX_RECOVERY);
-	} else {
-		if (net_ratelimit())
-			netif_err(efx, rx_err, efx->net_dev,
-				  " RX queue %d overlength RX event "
-				  "(0x%x > 0x%x)\n",
-				  efx_rx_queue_index(rx_queue), len, max_len);
-	}
+	if (net_ratelimit())
+		netif_err(efx, rx_err, efx->net_dev,
+			  "RX queue %d overlength RX event (%#x > %#x)\n",
+			  efx_rx_queue_index(rx_queue), len, max_len);
 
 	efx_rx_queue_channel(rx_queue)->n_rx_overlength++;
 }
@@ -486,7 +475,7 @@ static struct sk_buff *efx_rx_mk_skb(struct efx_channel *channel,
 		return NULL;
 	}
 
-	EFX_BUG_ON_PARANOID(rx_buf->len < hdr_len);
+	EFX_WARN_ON_ONCE_PARANOID(rx_buf->len < hdr_len);
 
 	memcpy(skb->data + efx->rx_ip_align, eh - efx->rx_prefix_size,
 	       efx->rx_prefix_size + hdr_len);
@@ -693,7 +682,7 @@ int efx_probe_rx_queue(struct efx_rx_queue *rx_queue)
 
 	/* Create the smallest power-of-two aligned ring */
 	entries = max(roundup_pow_of_two(efx->rxq_entries), EFX_MIN_DMAQ_SIZE);
-	EFX_BUG_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE);
+	EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE);
 	rx_queue->ptr_mask = entries - 1;
 
 	netif_dbg(efx, probe, efx->net_dev,
diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c
index 04ed1b4c7cd9..a3901bc96586 100644
--- a/drivers/net/ethernet/sfc/siena.c
+++ b/drivers/net/ethernet/sfc/siena.c
@@ -20,7 +20,6 @@
 #include "nic.h"
 #include "farch_regs.h"
 #include "io.h"
-#include "phy.h"
 #include "workarounds.h"
 #include "mcdi.h"
 #include "mcdi_pcol.h"
@@ -718,7 +717,7 @@ static void siena_mcdi_request(struct efx_nic *efx,
 	unsigned int i;
 	unsigned int inlen_dw = DIV_ROUND_UP(sdu_len, 4);
 
-	EFX_BUG_ON_PARANOID(hdr_len != 4);
+	EFX_WARN_ON_PARANOID(hdr_len != 4);
 
 	efx_writed(efx, hdr, pdu);
 
@@ -977,6 +976,7 @@ const struct efx_nic_type siena_a0_nic_type = {
 	.tx_init = efx_farch_tx_init,
 	.tx_remove = efx_farch_tx_remove,
 	.tx_write = efx_farch_tx_write,
+	.tx_limit_len = efx_farch_tx_limit_len,
 	.rx_push_rss_config = siena_rx_push_rss_config,
 	.rx_probe = efx_farch_rx_probe,
 	.rx_init = efx_farch_rx_init,
diff --git a/drivers/net/ethernet/sfc/tx.c b/drivers/net/ethernet/sfc/tx.c
index 233778911557..3c0151424d12 100644
--- a/drivers/net/ethernet/sfc/tx.c
+++ b/drivers/net/ethernet/sfc/tx.c
@@ -22,6 +22,7 @@
 #include "efx.h"
 #include "io.h"
 #include "nic.h"
+#include "tx.h"
 #include "workarounds.h"
 #include "ef10_regs.h"
 
@@ -33,29 +34,30 @@ unsigned int efx_piobuf_size __read_mostly = EFX_PIOBUF_SIZE_DEF;
 
 #endif /* EFX_USE_PIO */
 
-static inline unsigned int
-efx_tx_queue_get_insert_index(const struct efx_tx_queue *tx_queue)
+static inline u8 *efx_tx_get_copy_buffer(struct efx_tx_queue *tx_queue,
+					 struct efx_tx_buffer *buffer)
 {
-	return tx_queue->insert_count & tx_queue->ptr_mask;
-}
+	unsigned int index = efx_tx_queue_get_insert_index(tx_queue);
+	struct efx_buffer *page_buf =
+		&tx_queue->cb_page[index >> (PAGE_SHIFT - EFX_TX_CB_ORDER)];
+	unsigned int offset =
+		((index << EFX_TX_CB_ORDER) + NET_IP_ALIGN) & (PAGE_SIZE - 1);
 
-static inline struct efx_tx_buffer *
-__efx_tx_queue_get_insert_buffer(const struct efx_tx_queue *tx_queue)
-{
-	return &tx_queue->buffer[efx_tx_queue_get_insert_index(tx_queue)];
+	if (unlikely(!page_buf->addr) &&
+	    efx_nic_alloc_buffer(tx_queue->efx, page_buf, PAGE_SIZE,
+				 GFP_ATOMIC))
+		return NULL;
+	buffer->dma_addr = page_buf->dma_addr + offset;
+	buffer->unmap_len = 0;
+	return (u8 *)page_buf->addr + offset;
 }
 
-static inline struct efx_tx_buffer *
-efx_tx_queue_get_insert_buffer(const struct efx_tx_queue *tx_queue)
+u8 *efx_tx_get_copy_buffer_limited(struct efx_tx_queue *tx_queue,
+				   struct efx_tx_buffer *buffer, size_t len)
 {
-	struct efx_tx_buffer *buffer =
-		__efx_tx_queue_get_insert_buffer(tx_queue);
-
-	EFX_BUG_ON_PARANOID(buffer->len);
-	EFX_BUG_ON_PARANOID(buffer->flags);
-	EFX_BUG_ON_PARANOID(buffer->unmap_len);
-
-	return buffer;
+	if (len > EFX_TX_CB_SIZE)
+		return NULL;
+	return efx_tx_get_copy_buffer(tx_queue, buffer);
 }
 
 static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
@@ -82,35 +84,12 @@ static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
 		netif_vdbg(tx_queue->efx, tx_done, tx_queue->efx->net_dev,
 			   "TX queue %d transmission id %x complete\n",
 			   tx_queue->queue, tx_queue->read_count);
-	} else if (buffer->flags & EFX_TX_BUF_HEAP) {
-		kfree(buffer->heap_buf);
 	}
 
 	buffer->len = 0;
 	buffer->flags = 0;
 }
 
-static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
-			       struct sk_buff *skb);
-
-static inline unsigned
-efx_max_tx_len(struct efx_nic *efx, dma_addr_t dma_addr)
-{
-	/* Depending on the NIC revision, we can use descriptor
-	 * lengths up to 8K or 8K-1.  However, since PCI Express
-	 * devices must split read requests at 4K boundaries, there is
-	 * little benefit from using descriptors that cross those
-	 * boundaries and we keep things simple by not doing so.
-	 */
-	unsigned len = (~dma_addr & (EFX_PAGE_SIZE - 1)) + 1;
-
-	/* Work around hardware bug for unaligned buffers. */
-	if (EFX_WORKAROUND_5391(efx) && (dma_addr & 0xf))
-		len = min_t(unsigned, len, 512 - (dma_addr & 0xf));
-
-	return len;
-}
-
 unsigned int efx_tx_max_skb_descs(struct efx_nic *efx)
 {
 	/* Header and payload descriptor for each output segment, plus
@@ -118,10 +97,8 @@ unsigned int efx_tx_max_skb_descs(struct efx_nic *efx)
 	 */
 	unsigned int max_descs = EFX_TSO_MAX_SEGS * 2 + MAX_SKB_FRAGS;
 
-	/* Possibly one more per segment for the alignment workaround,
-	 * or for option descriptors
-	 */
-	if (EFX_WORKAROUND_5391(efx) || efx_nic_rev(efx) >= EFX_REV_HUNT_A0)
+	/* Possibly one more per segment for option descriptors */
+	if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0)
 		max_descs += EFX_TSO_MAX_SEGS;
 
 	/* Possibly more for PCIe page boundaries within input fragments */
@@ -165,7 +142,7 @@ static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1)
 
 	fill_level = max(txq1->insert_count - txq1->old_read_count,
 			 txq2->insert_count - txq2->old_read_count);
-	EFX_BUG_ON_PARANOID(fill_level >= efx->txq_entries);
+	EFX_WARN_ON_ONCE_PARANOID(fill_level >= efx->txq_entries);
 	if (likely(fill_level < efx->txq_stop_thresh)) {
 		smp_mb();
 		if (likely(!efx->loopback_selftest))
@@ -173,6 +150,33 @@ static void efx_tx_maybe_stop_queue(struct efx_tx_queue *txq1)
 	}
 }
 
+static int efx_enqueue_skb_copy(struct efx_tx_queue *tx_queue,
+				struct sk_buff *skb)
+{
+	unsigned int copy_len = skb->len;
+	struct efx_tx_buffer *buffer;
+	u8 *copy_buffer;
+	int rc;
+
+	EFX_WARN_ON_ONCE_PARANOID(copy_len > EFX_TX_CB_SIZE);
+
+	buffer = efx_tx_queue_get_insert_buffer(tx_queue);
+
+	copy_buffer = efx_tx_get_copy_buffer(tx_queue, buffer);
+	if (unlikely(!copy_buffer))
+		return -ENOMEM;
+
+	rc = skb_copy_bits(skb, 0, copy_buffer, copy_len);
+	EFX_WARN_ON_PARANOID(rc);
+	buffer->len = copy_len;
+
+	buffer->skb = skb;
+	buffer->flags = EFX_TX_BUF_SKB;
+
+	++tx_queue->insert_count;
+	return rc;
+}
+
 #ifdef EFX_USE_PIO
 
 struct efx_short_copy_buffer {
@@ -264,11 +268,11 @@ static void efx_skb_copy_bits_to_pio(struct efx_nic *efx, struct sk_buff *skb,
 		kunmap_atomic(vaddr);
 	}
 
-	EFX_BUG_ON_PARANOID(skb_shinfo(skb)->frag_list);
+	EFX_WARN_ON_ONCE_PARANOID(skb_shinfo(skb)->frag_list);
 }
 
-static struct efx_tx_buffer *
-efx_enqueue_skb_pio(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
+static int efx_enqueue_skb_pio(struct efx_tx_queue *tx_queue,
+			       struct sk_buff *skb)
 {
 	struct efx_tx_buffer *buffer =
 		efx_tx_queue_get_insert_buffer(tx_queue);
@@ -292,7 +296,7 @@ efx_enqueue_skb_pio(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
 		efx_flush_copy_buffer(tx_queue->efx, piobuf, &copy_buf);
 	} else {
 		/* Pad the write to the size of a cache line.
-		 * We can do this because we know the skb_shared_info sruct is
+		 * We can do this because we know the skb_shared_info struct is
 		 * after the source, and the destination buffer is big enough.
 		 */
 		BUILD_BUG_ON(L1_CACHE_BYTES >
@@ -301,6 +305,9 @@ efx_enqueue_skb_pio(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
 				 ALIGN(skb->len, L1_CACHE_BYTES) >> 3);
 	}
 
+	buffer->skb = skb;
+	buffer->flags = EFX_TX_BUF_SKB | EFX_TX_BUF_OPTION;
+
 	EFX_POPULATE_QWORD_5(buffer->option,
 			     ESF_DZ_TX_DESC_IS_OPT, 1,
 			     ESF_DZ_TX_OPTION_TYPE, ESE_DZ_TX_OPTION_DESC_PIO,
@@ -308,127 +315,227 @@ efx_enqueue_skb_pio(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
 			     ESF_DZ_TX_PIO_BYTE_CNT, skb->len,
 			     ESF_DZ_TX_PIO_BUF_ADDR,
 			     tx_queue->piobuf_offset);
-	++tx_queue->pio_packets;
 	++tx_queue->insert_count;
-	return buffer;
+	return 0;
 }
 #endif /* EFX_USE_PIO */
 
-/*
- * Add a socket buffer to a TX queue
- *
- * This maps all fragments of a socket buffer for DMA and adds them to
- * the TX queue.  The queue's insert pointer will be incremented by
- * the number of fragments in the socket buffer.
- *
- * If any DMA mapping fails, any mapped fragments will be unmapped,
- * the queue's insert pointer will be restored to its original value.
- *
- * This function is split out from efx_hard_start_xmit to allow the
- * loopback test to direct packets via specific TX queues.
- *
- * Returns NETDEV_TX_OK.
- * You must hold netif_tx_lock() to call this function.
+static struct efx_tx_buffer *efx_tx_map_chunk(struct efx_tx_queue *tx_queue,
+					      dma_addr_t dma_addr,
+					      size_t len)
+{
+	const struct efx_nic_type *nic_type = tx_queue->efx->type;
+	struct efx_tx_buffer *buffer;
+	unsigned int dma_len;
+
+	/* Map the fragment taking account of NIC-dependent DMA limits. */
+	do {
+		buffer = efx_tx_queue_get_insert_buffer(tx_queue);
+		dma_len = nic_type->tx_limit_len(tx_queue, dma_addr, len);
+
+		buffer->len = dma_len;
+		buffer->dma_addr = dma_addr;
+		buffer->flags = EFX_TX_BUF_CONT;
+		len -= dma_len;
+		dma_addr += dma_len;
+		++tx_queue->insert_count;
+	} while (len);
+
+	return buffer;
+}
+
+/* Map all data from an SKB for DMA and create descriptors on the queue.
  */
-netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
+static int efx_tx_map_data(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
+			   unsigned int segment_count)
 {
 	struct efx_nic *efx = tx_queue->efx;
 	struct device *dma_dev = &efx->pci_dev->dev;
-	struct efx_tx_buffer *buffer;
-	unsigned int old_insert_count = tx_queue->insert_count;
-	skb_frag_t *fragment;
-	unsigned int len, unmap_len = 0;
-	dma_addr_t dma_addr, unmap_addr = 0;
-	unsigned int dma_len;
+	unsigned int frag_index, nr_frags;
+	dma_addr_t dma_addr, unmap_addr;
 	unsigned short dma_flags;
-	int i = 0;
+	size_t len, unmap_len;
 
-	if (skb_shinfo(skb)->gso_size)
-		return efx_enqueue_skb_tso(tx_queue, skb);
+	nr_frags = skb_shinfo(skb)->nr_frags;
+	frag_index = 0;
 
-	/* Get size of the initial fragment */
+	/* Map header data. */
 	len = skb_headlen(skb);
+	dma_addr = dma_map_single(dma_dev, skb->data, len, DMA_TO_DEVICE);
+	dma_flags = EFX_TX_BUF_MAP_SINGLE;
+	unmap_len = len;
+	unmap_addr = dma_addr;
 
-	/* Pad if necessary */
-	if (EFX_WORKAROUND_15592(efx) && skb->len <= 32) {
-		EFX_BUG_ON_PARANOID(skb->data_len);
-		len = 32 + 1;
-		if (skb_pad(skb, len - skb->len))
-			return NETDEV_TX_OK;
-	}
+	if (unlikely(dma_mapping_error(dma_dev, dma_addr)))
+		return -EIO;
 
-	/* Consider using PIO for short packets */
-#ifdef EFX_USE_PIO
-	if (skb->len <= efx_piobuf_size && !skb->xmit_more &&
-	    efx_nic_may_tx_pio(tx_queue)) {
-		buffer = efx_enqueue_skb_pio(tx_queue, skb);
-		dma_flags = EFX_TX_BUF_OPTION;
-		goto finish_packet;
+	if (segment_count) {
+		/* For TSO we need to put the header in to a separate
+		 * descriptor. Map this separately if necessary.
+		 */
+		size_t header_len = skb_transport_header(skb) - skb->data +
+				(tcp_hdr(skb)->doff << 2u);
+
+		if (header_len != len) {
+			tx_queue->tso_long_headers++;
+			efx_tx_map_chunk(tx_queue, dma_addr, header_len);
+			len -= header_len;
+			dma_addr += header_len;
+		}
 	}
-#endif
 
-	/* Map for DMA.  Use dma_map_single rather than dma_map_page
-	 * since this is more efficient on machines with sparse
-	 * memory.
-	 */
-	dma_flags = EFX_TX_BUF_MAP_SINGLE;
-	dma_addr = dma_map_single(dma_dev, skb->data, len, PCI_DMA_TODEVICE);
+	/* Add descriptors for each fragment. */
+	do {
+		struct efx_tx_buffer *buffer;
+		skb_frag_t *fragment;
 
-	/* Process all fragments */
-	while (1) {
-		if (unlikely(dma_mapping_error(dma_dev, dma_addr)))
-			goto dma_err;
+		buffer = efx_tx_map_chunk(tx_queue, dma_addr, len);
+
+		/* The final descriptor for a fragment is responsible for
+		 * unmapping the whole fragment.
+		 */
+		buffer->flags = EFX_TX_BUF_CONT | dma_flags;
+		buffer->unmap_len = unmap_len;
+		buffer->dma_offset = buffer->dma_addr - unmap_addr;
+
+		if (frag_index >= nr_frags) {
+			/* Store SKB details with the final buffer for
+			 * the completion.
+			 */
+			buffer->skb = skb;
+			buffer->flags = EFX_TX_BUF_SKB | dma_flags;
+			return 0;
+		}
 
-		/* Store fields for marking in the per-fragment final
-		 * descriptor */
+		/* Move on to the next fragment. */
+		fragment = &skb_shinfo(skb)->frags[frag_index++];
+		len = skb_frag_size(fragment);
+		dma_addr = skb_frag_dma_map(dma_dev, fragment,
+				0, len, DMA_TO_DEVICE);
+		dma_flags = 0;
 		unmap_len = len;
 		unmap_addr = dma_addr;
 
-		/* Add to TX queue, splitting across DMA boundaries */
-		do {
-			buffer = efx_tx_queue_get_insert_buffer(tx_queue);
+		if (unlikely(dma_mapping_error(dma_dev, dma_addr)))
+			return -EIO;
+	} while (1);
+}
+
+/* Remove buffers put into a tx_queue.  None of the buffers must have
+ * an skb attached.
+ */
+static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue)
+{
+	struct efx_tx_buffer *buffer;
+
+	/* Work backwards until we hit the original insert pointer value */
+	while (tx_queue->insert_count != tx_queue->write_count) {
+		--tx_queue->insert_count;
+		buffer = __efx_tx_queue_get_insert_buffer(tx_queue);
+		efx_dequeue_buffer(tx_queue, buffer, NULL, NULL);
+	}
+}
+
+/*
+ * Fallback to software TSO.
+ *
+ * This is used if we are unable to send a GSO packet through hardware TSO.
+ * This should only ever happen due to per-queue restrictions - unsupported
+ * packets should first be filtered by the feature flags.
+ *
+ * Returns 0 on success, error code otherwise.
+ */
+static int efx_tx_tso_fallback(struct efx_tx_queue *tx_queue,
+			       struct sk_buff *skb)
+{
+	struct sk_buff *segments, *next;
 
-			dma_len = efx_max_tx_len(efx, dma_addr);
-			if (likely(dma_len >= len))
-				dma_len = len;
+	segments = skb_gso_segment(skb, 0);
+	if (IS_ERR(segments))
+		return PTR_ERR(segments);
 
-			/* Fill out per descriptor fields */
-			buffer->len = dma_len;
-			buffer->dma_addr = dma_addr;
-			buffer->flags = EFX_TX_BUF_CONT;
-			len -= dma_len;
-			dma_addr += dma_len;
-			++tx_queue->insert_count;
-		} while (len);
+	dev_kfree_skb_any(skb);
+	skb = segments;
 
-		/* Transfer ownership of the unmapping to the final buffer */
-		buffer->flags = EFX_TX_BUF_CONT | dma_flags;
-		buffer->unmap_len = unmap_len;
-		buffer->dma_offset = buffer->dma_addr - unmap_addr;
-		unmap_len = 0;
+	while (skb) {
+		next = skb->next;
+		skb->next = NULL;
 
-		/* Get address and size of next fragment */
-		if (i >= skb_shinfo(skb)->nr_frags)
-			break;
-		fragment = &skb_shinfo(skb)->frags[i];
-		len = skb_frag_size(fragment);
-		i++;
-		/* Map for DMA */
-		dma_flags = 0;
-		dma_addr = skb_frag_dma_map(dma_dev, fragment, 0, len,
-					    DMA_TO_DEVICE);
+		if (next)
+			skb->xmit_more = true;
+		efx_enqueue_skb(tx_queue, skb);
+		skb = next;
 	}
 
-	/* Transfer ownership of the skb to the final buffer */
+	return 0;
+}
+
+/*
+ * Add a socket buffer to a TX queue
+ *
+ * This maps all fragments of a socket buffer for DMA and adds them to
+ * the TX queue.  The queue's insert pointer will be incremented by
+ * the number of fragments in the socket buffer.
+ *
+ * If any DMA mapping fails, any mapped fragments will be unmapped,
+ * the queue's insert pointer will be restored to its original value.
+ *
+ * This function is split out from efx_hard_start_xmit to allow the
+ * loopback test to direct packets via specific TX queues.
+ *
+ * Returns NETDEV_TX_OK.
+ * You must hold netif_tx_lock() to call this function.
+ */
+netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
+{
+	bool data_mapped = false;
+	unsigned int segments;
+	unsigned int skb_len;
+	int rc;
+
+	skb_len = skb->len;
+	segments = skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 0;
+	if (segments == 1)
+		segments = 0; /* Don't use TSO for a single segment. */
+
+	/* Handle TSO first - it's *possible* (although unlikely) that we might
+	 * be passed a packet to segment that's smaller than the copybreak/PIO
+	 * size limit.
+	 */
+	if (segments) {
+		EFX_WARN_ON_ONCE_PARANOID(!tx_queue->handle_tso);
+		rc = tx_queue->handle_tso(tx_queue, skb, &data_mapped);
+		if (rc == -EINVAL) {
+			rc = efx_tx_tso_fallback(tx_queue, skb);
+			tx_queue->tso_fallbacks++;
+			if (rc == 0)
+				return 0;
+		}
+		if (rc)
+			goto err;
 #ifdef EFX_USE_PIO
-finish_packet:
+	} else if (skb_len <= efx_piobuf_size && !skb->xmit_more &&
+		   efx_nic_may_tx_pio(tx_queue)) {
+		/* Use PIO for short packets with an empty queue. */
+		if (efx_enqueue_skb_pio(tx_queue, skb))
+			goto err;
+		tx_queue->pio_packets++;
+		data_mapped = true;
 #endif
-	buffer->skb = skb;
-	buffer->flags = EFX_TX_BUF_SKB | dma_flags;
+	} else if (skb->data_len && skb_len <= EFX_TX_CB_SIZE) {
+		/* Pad short packets or coalesce short fragmented packets. */
+		if (efx_enqueue_skb_copy(tx_queue, skb))
+			goto err;
+		tx_queue->cb_packets++;
+		data_mapped = true;
+	}
 
-	netdev_tx_sent_queue(tx_queue->core_txq, skb->len);
+	/* Map for DMA and create descriptors if we haven't done so already. */
+	if (!data_mapped && (efx_tx_map_data(tx_queue, skb, segments)))
+		goto err;
 
-	efx_tx_maybe_stop_queue(tx_queue);
+	/* Update BQL */
+	netdev_tx_sent_queue(tx_queue->core_txq, skb_len);
 
 	/* Pass off to hardware */
 	if (!skb->xmit_more || netif_xmit_stopped(tx_queue->core_txq)) {
@@ -446,37 +553,22 @@ finish_packet:
 		tx_queue->xmit_more_available = skb->xmit_more;
 	}
 
-	tx_queue->tx_packets++;
+	if (segments) {
+		tx_queue->tso_bursts++;
+		tx_queue->tso_packets += segments;
+		tx_queue->tx_packets  += segments;
+	} else {
+		tx_queue->tx_packets++;
+	}
+
+	efx_tx_maybe_stop_queue(tx_queue);
 
 	return NETDEV_TX_OK;
 
- dma_err:
-	netif_err(efx, tx_err, efx->net_dev,
-		  " TX queue %d could not map skb with %d bytes %d "
-		  "fragments for DMA\n", tx_queue->queue, skb->len,
-		  skb_shinfo(skb)->nr_frags + 1);
 
-	/* Mark the packet as transmitted, and free the SKB ourselves */
+err:
+	efx_enqueue_unwind(tx_queue);
 	dev_kfree_skb_any(skb);
-
-	/* Work backwards until we hit the original insert pointer value */
-	while (tx_queue->insert_count != old_insert_count) {
-		unsigned int pkts_compl = 0, bytes_compl = 0;
-		--tx_queue->insert_count;
-		buffer = __efx_tx_queue_get_insert_buffer(tx_queue);
-		efx_dequeue_buffer(tx_queue, buffer, &pkts_compl, &bytes_compl);
-	}
-
-	/* Free the fragment we were mid-way through pushing */
-	if (unmap_len) {
-		if (dma_flags & EFX_TX_BUF_MAP_SINGLE)
-			dma_unmap_single(dma_dev, unmap_addr, unmap_len,
-					 DMA_TO_DEVICE);
-		else
-			dma_unmap_page(dma_dev, unmap_addr, unmap_len,
-				       DMA_TO_DEVICE);
-	}
-
 	return NETDEV_TX_OK;
 }
 
@@ -576,7 +668,7 @@ int efx_setup_tc(struct net_device *net_dev, u32 handle, __be16 proto,
 
 	num_tc = ntc->tc;
 
-	if (efx_nic_rev(efx) < EFX_REV_FALCON_B0 || num_tc > EFX_MAX_TX_TC)
+	if (num_tc > EFX_MAX_TX_TC)
 		return -EINVAL;
 
 	if (num_tc == net_dev->num_tc)
@@ -632,7 +724,7 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
 	struct efx_tx_queue *txq2;
 	unsigned int pkts_compl = 0, bytes_compl = 0;
 
-	EFX_BUG_ON_PARANOID(index > tx_queue->ptr_mask);
+	EFX_WARN_ON_ONCE_PARANOID(index > tx_queue->ptr_mask);
 
 	efx_dequeue_buffers(tx_queue, index, &pkts_compl, &bytes_compl);
 	tx_queue->pkts_compl += pkts_compl;
@@ -667,19 +759,9 @@ void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
 	}
 }
 
-/* Size of page-based TSO header buffers.  Larger blocks must be
- * allocated from the heap.
- */
-#define TSOH_STD_SIZE	128
-#define TSOH_PER_PAGE	(PAGE_SIZE / TSOH_STD_SIZE)
-
-/* At most half the descriptors in the queue at any time will refer to
- * a TSO header buffer, since they must always be followed by a
- * payload descriptor referring to an skb.
- */
-static unsigned int efx_tsoh_page_count(struct efx_tx_queue *tx_queue)
+static unsigned int efx_tx_cb_page_count(struct efx_tx_queue *tx_queue)
 {
-	return DIV_ROUND_UP(tx_queue->ptr_mask + 1, 2 * TSOH_PER_PAGE);
+	return DIV_ROUND_UP(tx_queue->ptr_mask + 1, PAGE_SIZE >> EFX_TX_CB_ORDER);
 }
 
 int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
@@ -690,7 +772,7 @@ int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
 
 	/* Create the smallest power-of-two aligned ring */
 	entries = max(roundup_pow_of_two(efx->txq_entries), EFX_MIN_DMAQ_SIZE);
-	EFX_BUG_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE);
+	EFX_WARN_ON_PARANOID(entries > EFX_MAX_DMAQ_SIZE);
 	tx_queue->ptr_mask = entries - 1;
 
 	netif_dbg(efx, probe, efx->net_dev,
@@ -703,14 +785,11 @@ int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
 	if (!tx_queue->buffer)
 		return -ENOMEM;
 
-	if (tx_queue->queue & EFX_TXQ_TYPE_OFFLOAD) {
-		tx_queue->tsoh_page =
-			kcalloc(efx_tsoh_page_count(tx_queue),
-				sizeof(tx_queue->tsoh_page[0]), GFP_KERNEL);
-		if (!tx_queue->tsoh_page) {
-			rc = -ENOMEM;
-			goto fail1;
-		}
+	tx_queue->cb_page = kcalloc(efx_tx_cb_page_count(tx_queue),
+				    sizeof(tx_queue->cb_page[0]), GFP_KERNEL);
+	if (!tx_queue->cb_page) {
+		rc = -ENOMEM;
+		goto fail1;
 	}
 
 	/* Allocate hardware ring */
@@ -721,8 +800,8 @@ int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
 	return 0;
 
 fail2:
-	kfree(tx_queue->tsoh_page);
-	tx_queue->tsoh_page = NULL;
+	kfree(tx_queue->cb_page);
+	tx_queue->cb_page = NULL;
 fail1:
 	kfree(tx_queue->buffer);
 	tx_queue->buffer = NULL;
@@ -731,7 +810,9 @@ fail1:
 
 void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
 {
-	netif_dbg(tx_queue->efx, drv, tx_queue->efx->net_dev,
+	struct efx_nic *efx = tx_queue->efx;
+
+	netif_dbg(efx, drv, efx->net_dev,
 		  "initialising TX queue %d\n", tx_queue->queue);
 
 	tx_queue->insert_count = 0;
@@ -742,6 +823,11 @@ void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
 	tx_queue->empty_read_count = 0 | EFX_EMPTY_COUNT_VALID;
 	tx_queue->xmit_more_available = false;
 
+	/* Set up default function pointers. These may get replaced by
+	 * efx_nic_init_tx() based off NIC/queue capabilities.
+	 */
+	tx_queue->handle_tso = efx_enqueue_skb_tso;
+
 	/* Set up TX descriptor ring */
 	efx_nic_init_tx(tx_queue);
 
@@ -781,589 +867,14 @@ void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
 		  "destroying TX queue %d\n", tx_queue->queue);
 	efx_nic_remove_tx(tx_queue);
 
-	if (tx_queue->tsoh_page) {
-		for (i = 0; i < efx_tsoh_page_count(tx_queue); i++)
+	if (tx_queue->cb_page) {
+		for (i = 0; i < efx_tx_cb_page_count(tx_queue); i++)
 			efx_nic_free_buffer(tx_queue->efx,
-					    &tx_queue->tsoh_page[i]);
-		kfree(tx_queue->tsoh_page);
-		tx_queue->tsoh_page = NULL;
+					    &tx_queue->cb_page[i]);
+		kfree(tx_queue->cb_page);
+		tx_queue->cb_page = NULL;
 	}
 
 	kfree(tx_queue->buffer);
 	tx_queue->buffer = NULL;
 }
-
-
-/* Efx TCP segmentation acceleration.
- *
- * Why?  Because by doing it here in the driver we can go significantly
- * faster than the GSO.
- *
- * Requires TX checksum offload support.
- */
-
-#define PTR_DIFF(p1, p2)  ((u8 *)(p1) - (u8 *)(p2))
-
-/**
- * struct tso_state - TSO state for an SKB
- * @out_len: Remaining length in current segment
- * @seqnum: Current sequence number
- * @ipv4_id: Current IPv4 ID, host endian
- * @packet_space: Remaining space in current packet
- * @dma_addr: DMA address of current position
- * @in_len: Remaining length in current SKB fragment
- * @unmap_len: Length of SKB fragment
- * @unmap_addr: DMA address of SKB fragment
- * @dma_flags: TX buffer flags for DMA mapping - %EFX_TX_BUF_MAP_SINGLE or 0
- * @protocol: Network protocol (after any VLAN header)
- * @ip_off: Offset of IP header
- * @tcp_off: Offset of TCP header
- * @header_len: Number of bytes of header
- * @ip_base_len: IPv4 tot_len or IPv6 payload_len, before TCP payload
- * @header_dma_addr: Header DMA address, when using option descriptors
- * @header_unmap_len: Header DMA mapped length, or 0 if not using option
- *	descriptors
- *
- * The state used during segmentation.  It is put into this data structure
- * just to make it easy to pass into inline functions.
- */
-struct tso_state {
-	/* Output position */
-	unsigned out_len;
-	unsigned seqnum;
-	u16 ipv4_id;
-	unsigned packet_space;
-
-	/* Input position */
-	dma_addr_t dma_addr;
-	unsigned in_len;
-	unsigned unmap_len;
-	dma_addr_t unmap_addr;
-	unsigned short dma_flags;
-
-	__be16 protocol;
-	unsigned int ip_off;
-	unsigned int tcp_off;
-	unsigned header_len;
-	unsigned int ip_base_len;
-	dma_addr_t header_dma_addr;
-	unsigned int header_unmap_len;
-};
-
-
-/*
- * Verify that our various assumptions about sk_buffs and the conditions
- * under which TSO will be attempted hold true.  Return the protocol number.
- */
-static __be16 efx_tso_check_protocol(struct sk_buff *skb)
-{
-	__be16 protocol = skb->protocol;
-
-	EFX_BUG_ON_PARANOID(((struct ethhdr *)skb->data)->h_proto !=
-			    protocol);
-	if (protocol == htons(ETH_P_8021Q)) {
-		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
-		protocol = veh->h_vlan_encapsulated_proto;
-	}
-
-	if (protocol == htons(ETH_P_IP)) {
-		EFX_BUG_ON_PARANOID(ip_hdr(skb)->protocol != IPPROTO_TCP);
-	} else {
-		EFX_BUG_ON_PARANOID(protocol != htons(ETH_P_IPV6));
-		EFX_BUG_ON_PARANOID(ipv6_hdr(skb)->nexthdr != NEXTHDR_TCP);
-	}
-	EFX_BUG_ON_PARANOID((PTR_DIFF(tcp_hdr(skb), skb->data)
-			     + (tcp_hdr(skb)->doff << 2u)) >
-			    skb_headlen(skb));
-
-	return protocol;
-}
-
-static u8 *efx_tsoh_get_buffer(struct efx_tx_queue *tx_queue,
-			       struct efx_tx_buffer *buffer, unsigned int len)
-{
-	u8 *result;
-
-	EFX_BUG_ON_PARANOID(buffer->len);
-	EFX_BUG_ON_PARANOID(buffer->flags);
-	EFX_BUG_ON_PARANOID(buffer->unmap_len);
-
-	if (likely(len <= TSOH_STD_SIZE - NET_IP_ALIGN)) {
-		unsigned index =
-			(tx_queue->insert_count & tx_queue->ptr_mask) / 2;
-		struct efx_buffer *page_buf =
-			&tx_queue->tsoh_page[index / TSOH_PER_PAGE];
-		unsigned offset =
-			TSOH_STD_SIZE * (index % TSOH_PER_PAGE) + NET_IP_ALIGN;
-
-		if (unlikely(!page_buf->addr) &&
-		    efx_nic_alloc_buffer(tx_queue->efx, page_buf, PAGE_SIZE,
-					 GFP_ATOMIC))
-			return NULL;
-
-		result = (u8 *)page_buf->addr + offset;
-		buffer->dma_addr = page_buf->dma_addr + offset;
-		buffer->flags = EFX_TX_BUF_CONT;
-	} else {
-		tx_queue->tso_long_headers++;
-
-		buffer->heap_buf = kmalloc(NET_IP_ALIGN + len, GFP_ATOMIC);
-		if (unlikely(!buffer->heap_buf))
-			return NULL;
-		result = (u8 *)buffer->heap_buf + NET_IP_ALIGN;
-		buffer->flags = EFX_TX_BUF_CONT | EFX_TX_BUF_HEAP;
-	}
-
-	buffer->len = len;
-
-	return result;
-}
-
-/**
- * efx_tx_queue_insert - push descriptors onto the TX queue
- * @tx_queue:		Efx TX queue
- * @dma_addr:		DMA address of fragment
- * @len:		Length of fragment
- * @final_buffer:	The final buffer inserted into the queue
- *
- * Push descriptors onto the TX queue.
- */
-static void efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
-				dma_addr_t dma_addr, unsigned len,
-				struct efx_tx_buffer **final_buffer)
-{
-	struct efx_tx_buffer *buffer;
-	struct efx_nic *efx = tx_queue->efx;
-	unsigned dma_len;
-
-	EFX_BUG_ON_PARANOID(len <= 0);
-
-	while (1) {
-		buffer = efx_tx_queue_get_insert_buffer(tx_queue);
-		++tx_queue->insert_count;
-
-		EFX_BUG_ON_PARANOID(tx_queue->insert_count -
-				    tx_queue->read_count >=
-				    efx->txq_entries);
-
-		buffer->dma_addr = dma_addr;
-
-		dma_len = efx_max_tx_len(efx, dma_addr);
-
-		/* If there is enough space to send then do so */
-		if (dma_len >= len)
-			break;
-
-		buffer->len = dma_len;
-		buffer->flags = EFX_TX_BUF_CONT;
-		dma_addr += dma_len;
-		len -= dma_len;
-	}
-
-	EFX_BUG_ON_PARANOID(!len);
-	buffer->len = len;
-	*final_buffer = buffer;
-}
-
-
-/*
- * Put a TSO header into the TX queue.
- *
- * This is special-cased because we know that it is small enough to fit in
- * a single fragment, and we know it doesn't cross a page boundary.  It
- * also allows us to not worry about end-of-packet etc.
- */
-static int efx_tso_put_header(struct efx_tx_queue *tx_queue,
-			      struct efx_tx_buffer *buffer, u8 *header)
-{
-	if (unlikely(buffer->flags & EFX_TX_BUF_HEAP)) {
-		buffer->dma_addr = dma_map_single(&tx_queue->efx->pci_dev->dev,
-						  header, buffer->len,
-						  DMA_TO_DEVICE);
-		if (unlikely(dma_mapping_error(&tx_queue->efx->pci_dev->dev,
-					       buffer->dma_addr))) {
-			kfree(buffer->heap_buf);
-			buffer->len = 0;
-			buffer->flags = 0;
-			return -ENOMEM;
-		}
-		buffer->unmap_len = buffer->len;
-		buffer->dma_offset = 0;
-		buffer->flags |= EFX_TX_BUF_MAP_SINGLE;
-	}
-
-	++tx_queue->insert_count;
-	return 0;
-}
-
-
-/* Remove buffers put into a tx_queue.  None of the buffers must have
- * an skb attached.
- */
-static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue,
-			       unsigned int insert_count)
-{
-	struct efx_tx_buffer *buffer;
-
-	/* Work backwards until we hit the original insert pointer value */
-	while (tx_queue->insert_count != insert_count) {
-		--tx_queue->insert_count;
-		buffer = __efx_tx_queue_get_insert_buffer(tx_queue);
-		efx_dequeue_buffer(tx_queue, buffer, NULL, NULL);
-	}
-}
-
-
-/* Parse the SKB header and initialise state. */
-static int tso_start(struct tso_state *st, struct efx_nic *efx,
-		     struct efx_tx_queue *tx_queue,
-		     const struct sk_buff *skb)
-{
-	struct device *dma_dev = &efx->pci_dev->dev;
-	unsigned int header_len, in_len;
-	bool use_opt_desc = false;
-	dma_addr_t dma_addr;
-
-	if (tx_queue->tso_version == 1)
-		use_opt_desc = true;
-
-	st->ip_off = skb_network_header(skb) - skb->data;
-	st->tcp_off = skb_transport_header(skb) - skb->data;
-	header_len = st->tcp_off + (tcp_hdr(skb)->doff << 2u);
-	in_len = skb_headlen(skb) - header_len;
-	st->header_len = header_len;
-	st->in_len = in_len;
-	if (st->protocol == htons(ETH_P_IP)) {
-		st->ip_base_len = st->header_len - st->ip_off;
-		st->ipv4_id = ntohs(ip_hdr(skb)->id);
-	} else {
-		st->ip_base_len = st->header_len - st->tcp_off;
-		st->ipv4_id = 0;
-	}
-	st->seqnum = ntohl(tcp_hdr(skb)->seq);
-
-	EFX_BUG_ON_PARANOID(tcp_hdr(skb)->urg);
-	EFX_BUG_ON_PARANOID(tcp_hdr(skb)->syn);
-	EFX_BUG_ON_PARANOID(tcp_hdr(skb)->rst);
-
-	st->out_len = skb->len - header_len;
-
-	if (!use_opt_desc) {
-		st->header_unmap_len = 0;
-
-		if (likely(in_len == 0)) {
-			st->dma_flags = 0;
-			st->unmap_len = 0;
-			return 0;
-		}
-
-		dma_addr = dma_map_single(dma_dev, skb->data + header_len,
-					  in_len, DMA_TO_DEVICE);
-		st->dma_flags = EFX_TX_BUF_MAP_SINGLE;
-		st->dma_addr = dma_addr;
-		st->unmap_addr = dma_addr;
-		st->unmap_len = in_len;
-	} else {
-		dma_addr = dma_map_single(dma_dev, skb->data,
-					  skb_headlen(skb), DMA_TO_DEVICE);
-		st->header_dma_addr = dma_addr;
-		st->header_unmap_len = skb_headlen(skb);
-		st->dma_flags = 0;
-		st->dma_addr = dma_addr + header_len;
-		st->unmap_len = 0;
-	}
-
-	return unlikely(dma_mapping_error(dma_dev, dma_addr)) ? -ENOMEM : 0;
-}
-
-static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx,
-			    skb_frag_t *frag)
-{
-	st->unmap_addr = skb_frag_dma_map(&efx->pci_dev->dev, frag, 0,
-					  skb_frag_size(frag), DMA_TO_DEVICE);
-	if (likely(!dma_mapping_error(&efx->pci_dev->dev, st->unmap_addr))) {
-		st->dma_flags = 0;
-		st->unmap_len = skb_frag_size(frag);
-		st->in_len = skb_frag_size(frag);
-		st->dma_addr = st->unmap_addr;
-		return 0;
-	}
-	return -ENOMEM;
-}
-
-
-/**
- * tso_fill_packet_with_fragment - form descriptors for the current fragment
- * @tx_queue:		Efx TX queue
- * @skb:		Socket buffer
- * @st:			TSO state
- *
- * Form descriptors for the current fragment, until we reach the end
- * of fragment or end-of-packet.
- */
-static void tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue,
-					  const struct sk_buff *skb,
-					  struct tso_state *st)
-{
-	struct efx_tx_buffer *buffer;
-	int n;
-
-	if (st->in_len == 0)
-		return;
-	if (st->packet_space == 0)
-		return;
-
-	EFX_BUG_ON_PARANOID(st->in_len <= 0);
-	EFX_BUG_ON_PARANOID(st->packet_space <= 0);
-
-	n = min(st->in_len, st->packet_space);
-
-	st->packet_space -= n;
-	st->out_len -= n;
-	st->in_len -= n;
-
-	efx_tx_queue_insert(tx_queue, st->dma_addr, n, &buffer);
-
-	if (st->out_len == 0) {
-		/* Transfer ownership of the skb */
-		buffer->skb = skb;
-		buffer->flags = EFX_TX_BUF_SKB;
-	} else if (st->packet_space != 0) {
-		buffer->flags = EFX_TX_BUF_CONT;
-	}
-
-	if (st->in_len == 0) {
-		/* Transfer ownership of the DMA mapping */
-		buffer->unmap_len = st->unmap_len;
-		buffer->dma_offset = buffer->unmap_len - buffer->len;
-		buffer->flags |= st->dma_flags;
-		st->unmap_len = 0;
-	}
-
-	st->dma_addr += n;
-}
-
-
-/**
- * tso_start_new_packet - generate a new header and prepare for the new packet
- * @tx_queue:		Efx TX queue
- * @skb:		Socket buffer
- * @st:			TSO state
- *
- * Generate a new header and prepare for the new packet.  Return 0 on
- * success, or -%ENOMEM if failed to alloc header.
- */
-static int tso_start_new_packet(struct efx_tx_queue *tx_queue,
-				const struct sk_buff *skb,
-				struct tso_state *st)
-{
-	struct efx_tx_buffer *buffer =
-		efx_tx_queue_get_insert_buffer(tx_queue);
-	bool is_last = st->out_len <= skb_shinfo(skb)->gso_size;
-	u8 tcp_flags_clear;
-
-	if (!is_last) {
-		st->packet_space = skb_shinfo(skb)->gso_size;
-		tcp_flags_clear = 0x09; /* mask out FIN and PSH */
-	} else {
-		st->packet_space = st->out_len;
-		tcp_flags_clear = 0x00;
-	}
-
-	if (!st->header_unmap_len) {
-		/* Allocate and insert a DMA-mapped header buffer. */
-		struct tcphdr *tsoh_th;
-		unsigned ip_length;
-		u8 *header;
-		int rc;
-
-		header = efx_tsoh_get_buffer(tx_queue, buffer, st->header_len);
-		if (!header)
-			return -ENOMEM;
-
-		tsoh_th = (struct tcphdr *)(header + st->tcp_off);
-
-		/* Copy and update the headers. */
-		memcpy(header, skb->data, st->header_len);
-
-		tsoh_th->seq = htonl(st->seqnum);
-		((u8 *)tsoh_th)[13] &= ~tcp_flags_clear;
-
-		ip_length = st->ip_base_len + st->packet_space;
-
-		if (st->protocol == htons(ETH_P_IP)) {
-			struct iphdr *tsoh_iph =
-				(struct iphdr *)(header + st->ip_off);
-
-			tsoh_iph->tot_len = htons(ip_length);
-			tsoh_iph->id = htons(st->ipv4_id);
-		} else {
-			struct ipv6hdr *tsoh_iph =
-				(struct ipv6hdr *)(header + st->ip_off);
-
-			tsoh_iph->payload_len = htons(ip_length);
-		}
-
-		rc = efx_tso_put_header(tx_queue, buffer, header);
-		if (unlikely(rc))
-			return rc;
-	} else {
-		/* Send the original headers with a TSO option descriptor
-		 * in front
-		 */
-		u8 tcp_flags = ((u8 *)tcp_hdr(skb))[13] & ~tcp_flags_clear;
-
-		buffer->flags = EFX_TX_BUF_OPTION;
-		buffer->len = 0;
-		buffer->unmap_len = 0;
-		EFX_POPULATE_QWORD_5(buffer->option,
-				     ESF_DZ_TX_DESC_IS_OPT, 1,
-				     ESF_DZ_TX_OPTION_TYPE,
-				     ESE_DZ_TX_OPTION_DESC_TSO,
-				     ESF_DZ_TX_TSO_TCP_FLAGS, tcp_flags,
-				     ESF_DZ_TX_TSO_IP_ID, st->ipv4_id,
-				     ESF_DZ_TX_TSO_TCP_SEQNO, st->seqnum);
-		++tx_queue->insert_count;
-
-		/* We mapped the headers in tso_start().  Unmap them
-		 * when the last segment is completed.
-		 */
-		buffer = efx_tx_queue_get_insert_buffer(tx_queue);
-		buffer->dma_addr = st->header_dma_addr;
-		buffer->len = st->header_len;
-		if (is_last) {
-			buffer->flags = EFX_TX_BUF_CONT | EFX_TX_BUF_MAP_SINGLE;
-			buffer->unmap_len = st->header_unmap_len;
-			buffer->dma_offset = 0;
-			/* Ensure we only unmap them once in case of a
-			 * later DMA mapping error and rollback
-			 */
-			st->header_unmap_len = 0;
-		} else {
-			buffer->flags = EFX_TX_BUF_CONT;
-			buffer->unmap_len = 0;
-		}
-		++tx_queue->insert_count;
-	}
-
-	st->seqnum += skb_shinfo(skb)->gso_size;
-
-	/* Linux leaves suitable gaps in the IP ID space for us to fill. */
-	++st->ipv4_id;
-
-	++tx_queue->tso_packets;
-
-	++tx_queue->tx_packets;
-
-	return 0;
-}
-
-
-/**
- * efx_enqueue_skb_tso - segment and transmit a TSO socket buffer
- * @tx_queue:		Efx TX queue
- * @skb:		Socket buffer
- *
- * Context: You must hold netif_tx_lock() to call this function.
- *
- * Add socket buffer @skb to @tx_queue, doing TSO or return != 0 if
- * @skb was not enqueued.  In all cases @skb is consumed.  Return
- * %NETDEV_TX_OK.
- */
-static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
-			       struct sk_buff *skb)
-{
-	struct efx_nic *efx = tx_queue->efx;
-	unsigned int old_insert_count = tx_queue->insert_count;
-	int frag_i, rc;
-	struct tso_state state;
-
-	/* Find the packet protocol and sanity-check it */
-	state.protocol = efx_tso_check_protocol(skb);
-
-	rc = tso_start(&state, efx, tx_queue, skb);
-	if (rc)
-		goto mem_err;
-
-	if (likely(state.in_len == 0)) {
-		/* Grab the first payload fragment. */
-		EFX_BUG_ON_PARANOID(skb_shinfo(skb)->nr_frags < 1);
-		frag_i = 0;
-		rc = tso_get_fragment(&state, efx,
-				      skb_shinfo(skb)->frags + frag_i);
-		if (rc)
-			goto mem_err;
-	} else {
-		/* Payload starts in the header area. */
-		frag_i = -1;
-	}
-
-	if (tso_start_new_packet(tx_queue, skb, &state) < 0)
-		goto mem_err;
-
-	while (1) {
-		tso_fill_packet_with_fragment(tx_queue, skb, &state);
-
-		/* Move onto the next fragment? */
-		if (state.in_len == 0) {
-			if (++frag_i >= skb_shinfo(skb)->nr_frags)
-				/* End of payload reached. */
-				break;
-			rc = tso_get_fragment(&state, efx,
-					      skb_shinfo(skb)->frags + frag_i);
-			if (rc)
-				goto mem_err;
-		}
-
-		/* Start at new packet? */
-		if (state.packet_space == 0 &&
-		    tso_start_new_packet(tx_queue, skb, &state) < 0)
-			goto mem_err;
-	}
-
-	netdev_tx_sent_queue(tx_queue->core_txq, skb->len);
-
-	efx_tx_maybe_stop_queue(tx_queue);
-
-	/* Pass off to hardware */
-	if (!skb->xmit_more || netif_xmit_stopped(tx_queue->core_txq)) {
-		struct efx_tx_queue *txq2 = efx_tx_queue_partner(tx_queue);
-
-		/* There could be packets left on the partner queue if those
-		 * SKBs had skb->xmit_more set. If we do not push those they
-		 * could be left for a long time and cause a netdev watchdog.
-		 */
-		if (txq2->xmit_more_available)
-			efx_nic_push_buffers(txq2);
-
-		efx_nic_push_buffers(tx_queue);
-	} else {
-		tx_queue->xmit_more_available = skb->xmit_more;
-	}
-
-	tx_queue->tso_bursts++;
-	return NETDEV_TX_OK;
-
- mem_err:
-	netif_err(efx, tx_err, efx->net_dev,
-		  "Out of memory for TSO headers, or DMA mapping error\n");
-	dev_kfree_skb_any(skb);
-
-	/* Free the DMA mapping we were in the process of writing out */
-	if (state.unmap_len) {
-		if (state.dma_flags & EFX_TX_BUF_MAP_SINGLE)
-			dma_unmap_single(&efx->pci_dev->dev, state.unmap_addr,
-					 state.unmap_len, DMA_TO_DEVICE);
-		else
-			dma_unmap_page(&efx->pci_dev->dev, state.unmap_addr,
-				       state.unmap_len, DMA_TO_DEVICE);
-	}
-
-	/* Free the header DMA mapping, if using option descriptors */
-	if (state.header_unmap_len)
-		dma_unmap_single(&efx->pci_dev->dev, state.header_dma_addr,
-				 state.header_unmap_len, DMA_TO_DEVICE);
-
-	efx_enqueue_unwind(tx_queue, old_insert_count);
-	return NETDEV_TX_OK;
-}
diff --git a/drivers/net/ethernet/sfc/tx.h b/drivers/net/ethernet/sfc/tx.h
new file mode 100644
index 000000000000..1cccc97ec676
--- /dev/null
+++ b/drivers/net/ethernet/sfc/tx.h
@@ -0,0 +1,27 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2006-2015 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#ifndef EFX_TX_H
+#define EFX_TX_H
+
+#include <linux/types.h>
+
+/* Driver internal tx-path related declarations. */
+
+unsigned int efx_tx_limit_len(struct efx_tx_queue *tx_queue,
+			      dma_addr_t dma_addr, unsigned int len);
+
+u8 *efx_tx_get_copy_buffer_limited(struct efx_tx_queue *tx_queue,
+				   struct efx_tx_buffer *buffer, size_t len);
+
+int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue, struct sk_buff *skb,
+			bool *data_mapped);
+
+#endif /* EFX_TX_H */
diff --git a/drivers/net/ethernet/sfc/tx_tso.c b/drivers/net/ethernet/sfc/tx_tso.c
new file mode 100644
index 000000000000..e0cbda9ae859
--- /dev/null
+++ b/drivers/net/ethernet/sfc/tx_tso.c
@@ -0,0 +1,451 @@
+/****************************************************************************
+ * Driver for Solarflare network controllers and boards
+ * Copyright 2005-2006 Fen Systems Ltd.
+ * Copyright 2005-2015 Solarflare Communications Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation, incorporated herein by reference.
+ */
+
+#include <linux/pci.h>
+#include <linux/tcp.h>
+#include <linux/ip.h>
+#include <linux/in.h>
+#include <linux/ipv6.h>
+#include <linux/slab.h>
+#include <net/ipv6.h>
+#include <linux/if_ether.h>
+#include <linux/highmem.h>
+#include <linux/moduleparam.h>
+#include <linux/cache.h>
+#include "net_driver.h"
+#include "efx.h"
+#include "io.h"
+#include "nic.h"
+#include "tx.h"
+#include "workarounds.h"
+#include "ef10_regs.h"
+
+/* Efx legacy TCP segmentation acceleration.
+ *
+ * Utilises firmware support to go faster than GSO (but not as fast as TSOv2).
+ *
+ * Requires TX checksum offload support.
+ */
+
+#define PTR_DIFF(p1, p2)  ((u8 *)(p1) - (u8 *)(p2))
+
+/**
+ * struct tso_state - TSO state for an SKB
+ * @out_len: Remaining length in current segment
+ * @seqnum: Current sequence number
+ * @ipv4_id: Current IPv4 ID, host endian
+ * @packet_space: Remaining space in current packet
+ * @dma_addr: DMA address of current position
+ * @in_len: Remaining length in current SKB fragment
+ * @unmap_len: Length of SKB fragment
+ * @unmap_addr: DMA address of SKB fragment
+ * @protocol: Network protocol (after any VLAN header)
+ * @ip_off: Offset of IP header
+ * @tcp_off: Offset of TCP header
+ * @header_len: Number of bytes of header
+ * @ip_base_len: IPv4 tot_len or IPv6 payload_len, before TCP payload
+ * @header_dma_addr: Header DMA address
+ * @header_unmap_len: Header DMA mapped length
+ *
+ * The state used during segmentation.  It is put into this data structure
+ * just to make it easy to pass into inline functions.
+ */
+struct tso_state {
+	/* Output position */
+	unsigned int out_len;
+	unsigned int seqnum;
+	u16 ipv4_id;
+	unsigned int packet_space;
+
+	/* Input position */
+	dma_addr_t dma_addr;
+	unsigned int in_len;
+	unsigned int unmap_len;
+	dma_addr_t unmap_addr;
+
+	__be16 protocol;
+	unsigned int ip_off;
+	unsigned int tcp_off;
+	unsigned int header_len;
+	unsigned int ip_base_len;
+	dma_addr_t header_dma_addr;
+	unsigned int header_unmap_len;
+};
+
+static inline void prefetch_ptr(struct efx_tx_queue *tx_queue)
+{
+	unsigned int insert_ptr = efx_tx_queue_get_insert_index(tx_queue);
+	char *ptr;
+
+	ptr = (char *) (tx_queue->buffer + insert_ptr);
+	prefetch(ptr);
+	prefetch(ptr + 0x80);
+
+	ptr = (char *) (((efx_qword_t *)tx_queue->txd.buf.addr) + insert_ptr);
+	prefetch(ptr);
+	prefetch(ptr + 0x80);
+}
+
+/**
+ * efx_tx_queue_insert - push descriptors onto the TX queue
+ * @tx_queue:		Efx TX queue
+ * @dma_addr:		DMA address of fragment
+ * @len:		Length of fragment
+ * @final_buffer:	The final buffer inserted into the queue
+ *
+ * Push descriptors onto the TX queue.
+ */
+static void efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
+				dma_addr_t dma_addr, unsigned int len,
+				struct efx_tx_buffer **final_buffer)
+{
+	struct efx_tx_buffer *buffer;
+	unsigned int dma_len;
+
+	EFX_WARN_ON_ONCE_PARANOID(len <= 0);
+
+	while (1) {
+		buffer = efx_tx_queue_get_insert_buffer(tx_queue);
+		++tx_queue->insert_count;
+
+		EFX_WARN_ON_ONCE_PARANOID(tx_queue->insert_count -
+					  tx_queue->read_count >=
+					  tx_queue->efx->txq_entries);
+
+		buffer->dma_addr = dma_addr;
+
+		dma_len = tx_queue->efx->type->tx_limit_len(tx_queue,
+				dma_addr, len);
+
+		/* If there's space for everything this is our last buffer. */
+		if (dma_len >= len)
+			break;
+
+		buffer->len = dma_len;
+		buffer->flags = EFX_TX_BUF_CONT;
+		dma_addr += dma_len;
+		len -= dma_len;
+	}
+
+	EFX_WARN_ON_ONCE_PARANOID(!len);
+	buffer->len = len;
+	*final_buffer = buffer;
+}
+
+/*
+ * Verify that our various assumptions about sk_buffs and the conditions
+ * under which TSO will be attempted hold true.  Return the protocol number.
+ */
+static __be16 efx_tso_check_protocol(struct sk_buff *skb)
+{
+	__be16 protocol = skb->protocol;
+
+	EFX_WARN_ON_ONCE_PARANOID(((struct ethhdr *)skb->data)->h_proto !=
+				  protocol);
+	if (protocol == htons(ETH_P_8021Q)) {
+		struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
+
+		protocol = veh->h_vlan_encapsulated_proto;
+	}
+
+	if (protocol == htons(ETH_P_IP)) {
+		EFX_WARN_ON_ONCE_PARANOID(ip_hdr(skb)->protocol != IPPROTO_TCP);
+	} else {
+		EFX_WARN_ON_ONCE_PARANOID(protocol != htons(ETH_P_IPV6));
+		EFX_WARN_ON_ONCE_PARANOID(ipv6_hdr(skb)->nexthdr != NEXTHDR_TCP);
+	}
+	EFX_WARN_ON_ONCE_PARANOID((PTR_DIFF(tcp_hdr(skb), skb->data) +
+				   (tcp_hdr(skb)->doff << 2u)) >
+				  skb_headlen(skb));
+
+	return protocol;
+}
+
+/* Parse the SKB header and initialise state. */
+static int tso_start(struct tso_state *st, struct efx_nic *efx,
+		     struct efx_tx_queue *tx_queue,
+		     const struct sk_buff *skb)
+{
+	struct device *dma_dev = &efx->pci_dev->dev;
+	unsigned int header_len, in_len;
+	dma_addr_t dma_addr;
+
+	st->ip_off = skb_network_header(skb) - skb->data;
+	st->tcp_off = skb_transport_header(skb) - skb->data;
+	header_len = st->tcp_off + (tcp_hdr(skb)->doff << 2u);
+	in_len = skb_headlen(skb) - header_len;
+	st->header_len = header_len;
+	st->in_len = in_len;
+	if (st->protocol == htons(ETH_P_IP)) {
+		st->ip_base_len = st->header_len - st->ip_off;
+		st->ipv4_id = ntohs(ip_hdr(skb)->id);
+	} else {
+		st->ip_base_len = st->header_len - st->tcp_off;
+		st->ipv4_id = 0;
+	}
+	st->seqnum = ntohl(tcp_hdr(skb)->seq);
+
+	EFX_WARN_ON_ONCE_PARANOID(tcp_hdr(skb)->urg);
+	EFX_WARN_ON_ONCE_PARANOID(tcp_hdr(skb)->syn);
+	EFX_WARN_ON_ONCE_PARANOID(tcp_hdr(skb)->rst);
+
+	st->out_len = skb->len - header_len;
+
+	dma_addr = dma_map_single(dma_dev, skb->data,
+				  skb_headlen(skb), DMA_TO_DEVICE);
+	st->header_dma_addr = dma_addr;
+	st->header_unmap_len = skb_headlen(skb);
+	st->dma_addr = dma_addr + header_len;
+	st->unmap_len = 0;
+
+	return unlikely(dma_mapping_error(dma_dev, dma_addr)) ? -ENOMEM : 0;
+}
+
+static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx,
+			    skb_frag_t *frag)
+{
+	st->unmap_addr = skb_frag_dma_map(&efx->pci_dev->dev, frag, 0,
+					  skb_frag_size(frag), DMA_TO_DEVICE);
+	if (likely(!dma_mapping_error(&efx->pci_dev->dev, st->unmap_addr))) {
+		st->unmap_len = skb_frag_size(frag);
+		st->in_len = skb_frag_size(frag);
+		st->dma_addr = st->unmap_addr;
+		return 0;
+	}
+	return -ENOMEM;
+}
+
+
+/**
+ * tso_fill_packet_with_fragment - form descriptors for the current fragment
+ * @tx_queue:		Efx TX queue
+ * @skb:		Socket buffer
+ * @st:			TSO state
+ *
+ * Form descriptors for the current fragment, until we reach the end
+ * of fragment or end-of-packet.
+ */
+static void tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue,
+					  const struct sk_buff *skb,
+					  struct tso_state *st)
+{
+	struct efx_tx_buffer *buffer;
+	int n;
+
+	if (st->in_len == 0)
+		return;
+	if (st->packet_space == 0)
+		return;
+
+	EFX_WARN_ON_ONCE_PARANOID(st->in_len <= 0);
+	EFX_WARN_ON_ONCE_PARANOID(st->packet_space <= 0);
+
+	n = min(st->in_len, st->packet_space);
+
+	st->packet_space -= n;
+	st->out_len -= n;
+	st->in_len -= n;
+
+	efx_tx_queue_insert(tx_queue, st->dma_addr, n, &buffer);
+
+	if (st->out_len == 0) {
+		/* Transfer ownership of the skb */
+		buffer->skb = skb;
+		buffer->flags = EFX_TX_BUF_SKB;
+	} else if (st->packet_space != 0) {
+		buffer->flags = EFX_TX_BUF_CONT;
+	}
+
+	if (st->in_len == 0) {
+		/* Transfer ownership of the DMA mapping */
+		buffer->unmap_len = st->unmap_len;
+		buffer->dma_offset = buffer->unmap_len - buffer->len;
+		st->unmap_len = 0;
+	}
+
+	st->dma_addr += n;
+}
+
+
+#define TCP_FLAGS_OFFSET 13
+
+/**
+ * tso_start_new_packet - generate a new header and prepare for the new packet
+ * @tx_queue:		Efx TX queue
+ * @skb:		Socket buffer
+ * @st:			TSO state
+ *
+ * Generate a new header and prepare for the new packet.  Return 0 on
+ * success, or -%ENOMEM if failed to alloc header, or other negative error.
+ */
+static int tso_start_new_packet(struct efx_tx_queue *tx_queue,
+				const struct sk_buff *skb,
+				struct tso_state *st)
+{
+	struct efx_tx_buffer *buffer =
+		efx_tx_queue_get_insert_buffer(tx_queue);
+	bool is_last = st->out_len <= skb_shinfo(skb)->gso_size;
+	u8 tcp_flags_mask, tcp_flags;
+
+	if (!is_last) {
+		st->packet_space = skb_shinfo(skb)->gso_size;
+		tcp_flags_mask = 0x09; /* mask out FIN and PSH */
+	} else {
+		st->packet_space = st->out_len;
+		tcp_flags_mask = 0x00;
+	}
+
+	if (WARN_ON(!st->header_unmap_len))
+		return -EINVAL;
+	/* Send the original headers with a TSO option descriptor
+	 * in front
+	 */
+	tcp_flags = ((u8 *)tcp_hdr(skb))[TCP_FLAGS_OFFSET] & ~tcp_flags_mask;
+
+	buffer->flags = EFX_TX_BUF_OPTION;
+	buffer->len = 0;
+	buffer->unmap_len = 0;
+	EFX_POPULATE_QWORD_5(buffer->option,
+			     ESF_DZ_TX_DESC_IS_OPT, 1,
+			     ESF_DZ_TX_OPTION_TYPE,
+			     ESE_DZ_TX_OPTION_DESC_TSO,
+			     ESF_DZ_TX_TSO_TCP_FLAGS, tcp_flags,
+			     ESF_DZ_TX_TSO_IP_ID, st->ipv4_id,
+			     ESF_DZ_TX_TSO_TCP_SEQNO, st->seqnum);
+	++tx_queue->insert_count;
+
+	/* We mapped the headers in tso_start().  Unmap them
+	 * when the last segment is completed.
+	 */
+	buffer = efx_tx_queue_get_insert_buffer(tx_queue);
+	buffer->dma_addr = st->header_dma_addr;
+	buffer->len = st->header_len;
+	if (is_last) {
+		buffer->flags = EFX_TX_BUF_CONT | EFX_TX_BUF_MAP_SINGLE;
+		buffer->unmap_len = st->header_unmap_len;
+		buffer->dma_offset = 0;
+		/* Ensure we only unmap them once in case of a
+		 * later DMA mapping error and rollback
+		 */
+		st->header_unmap_len = 0;
+	} else {
+		buffer->flags = EFX_TX_BUF_CONT;
+		buffer->unmap_len = 0;
+	}
+	++tx_queue->insert_count;
+
+	st->seqnum += skb_shinfo(skb)->gso_size;
+
+	/* Linux leaves suitable gaps in the IP ID space for us to fill. */
+	++st->ipv4_id;
+
+	return 0;
+}
+
+/**
+ * efx_enqueue_skb_tso - segment and transmit a TSO socket buffer
+ * @tx_queue:		Efx TX queue
+ * @skb:		Socket buffer
+ * @data_mapped:        Did we map the data? Always set to true
+ *                      by this on success.
+ *
+ * Context: You must hold netif_tx_lock() to call this function.
+ *
+ * Add socket buffer @skb to @tx_queue, doing TSO or return != 0 if
+ * @skb was not enqueued.  @skb is consumed unless return value is
+ * %EINVAL.
+ */
+int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
+			struct sk_buff *skb,
+			bool *data_mapped)
+{
+	struct efx_nic *efx = tx_queue->efx;
+	int frag_i, rc;
+	struct tso_state state;
+
+	if (tx_queue->tso_version != 1)
+		return -EINVAL;
+
+	prefetch(skb->data);
+
+	/* Find the packet protocol and sanity-check it */
+	state.protocol = efx_tso_check_protocol(skb);
+
+	EFX_WARN_ON_ONCE_PARANOID(tx_queue->write_count != tx_queue->insert_count);
+
+	rc = tso_start(&state, efx, tx_queue, skb);
+	if (rc)
+		goto fail;
+
+	if (likely(state.in_len == 0)) {
+		/* Grab the first payload fragment. */
+		EFX_WARN_ON_ONCE_PARANOID(skb_shinfo(skb)->nr_frags < 1);
+		frag_i = 0;
+		rc = tso_get_fragment(&state, efx,
+				      skb_shinfo(skb)->frags + frag_i);
+		if (rc)
+			goto fail;
+	} else {
+		/* Payload starts in the header area. */
+		frag_i = -1;
+	}
+
+	rc = tso_start_new_packet(tx_queue, skb, &state);
+	if (rc)
+		goto fail;
+
+	prefetch_ptr(tx_queue);
+
+	while (1) {
+		tso_fill_packet_with_fragment(tx_queue, skb, &state);
+
+		/* Move onto the next fragment? */
+		if (state.in_len == 0) {
+			if (++frag_i >= skb_shinfo(skb)->nr_frags)
+				/* End of payload reached. */
+				break;
+			rc = tso_get_fragment(&state, efx,
+					      skb_shinfo(skb)->frags + frag_i);
+			if (rc)
+				goto fail;
+		}
+
+		/* Start at new packet? */
+		if (state.packet_space == 0) {
+			rc = tso_start_new_packet(tx_queue, skb, &state);
+			if (rc)
+				goto fail;
+		}
+	}
+
+	*data_mapped = true;
+
+	return 0;
+
+fail:
+	if (rc == -ENOMEM)
+		netif_err(efx, tx_err, efx->net_dev,
+			  "Out of memory for TSO headers, or DMA mapping error\n");
+	else
+		netif_err(efx, tx_err, efx->net_dev, "TSO failed, rc = %d\n", rc);
+
+	/* Free the DMA mapping we were in the process of writing out */
+	if (state.unmap_len) {
+		dma_unmap_page(&efx->pci_dev->dev, state.unmap_addr,
+			       state.unmap_len, DMA_TO_DEVICE);
+	}
+
+	/* Free the header DMA mapping */
+	if (state.header_unmap_len)
+		dma_unmap_single(&efx->pci_dev->dev, state.header_dma_addr,
+				 state.header_unmap_len, DMA_TO_DEVICE);
+
+	return rc;
+}
diff --git a/drivers/net/ethernet/sfc/workarounds.h b/drivers/net/ethernet/sfc/workarounds.h
index 351cd14cb9f9..103f827a1623 100644
--- a/drivers/net/ethernet/sfc/workarounds.h
+++ b/drivers/net/ethernet/sfc/workarounds.h
@@ -15,35 +15,14 @@
  * Bug numbers are from Solarflare's Bugzilla.
  */
 
-#define EFX_WORKAROUND_FALCON_A(efx) (efx_nic_rev(efx) <= EFX_REV_FALCON_A1)
-#define EFX_WORKAROUND_FALCON_AB(efx) (efx_nic_rev(efx) <= EFX_REV_FALCON_B0)
 #define EFX_WORKAROUND_SIENA(efx) (efx_nic_rev(efx) == EFX_REV_SIENA_A0)
 #define EFX_WORKAROUND_10G(efx) 1
 
 /* Bit-bashed I2C reads cause performance drop */
 #define EFX_WORKAROUND_7884 EFX_WORKAROUND_10G
-/* Truncated IPv4 packets can confuse the TX packet parser */
-#define EFX_WORKAROUND_15592 EFX_WORKAROUND_FALCON_AB
 /* Legacy interrupt storm when interrupt fifo fills */
 #define EFX_WORKAROUND_17213 EFX_WORKAROUND_SIENA
 
-/* Spurious parity errors in TSORT buffers */
-#define EFX_WORKAROUND_5129 EFX_WORKAROUND_FALCON_A
-/* Unaligned read request >512 bytes after aligning may break TSORT */
-#define EFX_WORKAROUND_5391 EFX_WORKAROUND_FALCON_A
-/* iSCSI parsing errors */
-#define EFX_WORKAROUND_5583 EFX_WORKAROUND_FALCON_A
-/* RX events go missing */
-#define EFX_WORKAROUND_5676 EFX_WORKAROUND_FALCON_A
-/* RX_RESET on A1 */
-#define EFX_WORKAROUND_6555 EFX_WORKAROUND_FALCON_A
-/* Increase filter depth to avoid RX_RESET */
-#define EFX_WORKAROUND_7244 EFX_WORKAROUND_FALCON_A
-/* Flushes may never complete */
-#define EFX_WORKAROUND_7803 EFX_WORKAROUND_FALCON_AB
-/* Leak overlength packets rather than free */
-#define EFX_WORKAROUND_8071 EFX_WORKAROUND_FALCON_A
-
 /* Lockup when writing event block registers at gen2/gen3 */
 #define EFX_EF10_WORKAROUND_35388(efx)					\
 	(((struct efx_ef10_nic_data *)efx->nic_data)->workaround_35388)
diff --git a/drivers/net/ethernet/sgi/ioc3-eth.c b/drivers/net/ethernet/sgi/ioc3-eth.c
index 7a254da85dd7..42051ab98cf0 100644
--- a/drivers/net/ethernet/sgi/ioc3-eth.c
+++ b/drivers/net/ethernet/sgi/ioc3-eth.c
@@ -1225,7 +1225,6 @@ static const struct net_device_ops ioc3_netdev_ops = {
 	.ndo_do_ioctl		= ioc3_ioctl,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= ioc3_set_mac_address,
-	.ndo_change_mtu		= eth_change_mtu,
 };
 
 static int ioc3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
diff --git a/drivers/net/ethernet/sgi/meth.c b/drivers/net/ethernet/sgi/meth.c
index aaa80f13859b..69d2d30e5ef1 100644
--- a/drivers/net/ethernet/sgi/meth.c
+++ b/drivers/net/ethernet/sgi/meth.c
@@ -815,7 +815,6 @@ static const struct net_device_ops meth_netdev_ops = {
 	.ndo_start_xmit		= meth_tx,
 	.ndo_do_ioctl		= meth_ioctl,
 	.ndo_tx_timeout		= meth_tx_timeout,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_set_rx_mode    	= meth_set_rx_mode,
diff --git a/drivers/net/ethernet/silan/sc92031.c b/drivers/net/ethernet/silan/sc92031.c
index 7426f8b21252..6c2e2b311c16 100644
--- a/drivers/net/ethernet/silan/sc92031.c
+++ b/drivers/net/ethernet/silan/sc92031.c
@@ -1386,7 +1386,6 @@ static const struct net_device_ops sc92031_netdev_ops = {
 	.ndo_open		= sc92031_open,
 	.ndo_stop		= sc92031_stop,
 	.ndo_set_rx_mode	= sc92031_set_multicast_list,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_tx_timeout		= sc92031_tx_timeout,
diff --git a/drivers/net/ethernet/sis/sis190.c b/drivers/net/ethernet/sis/sis190.c
index 27be6c869315..210e35d079dd 100644
--- a/drivers/net/ethernet/sis/sis190.c
+++ b/drivers/net/ethernet/sis/sis190.c
@@ -1833,7 +1833,6 @@ static const struct net_device_ops sis190_netdev_ops = {
 	.ndo_start_xmit		= sis190_start_xmit,
 	.ndo_tx_timeout		= sis190_tx_timeout,
 	.ndo_set_rx_mode	= sis190_set_rx_mode,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address	= sis190_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c
index 6f85276376e8..39fca6c0b68d 100644
--- a/drivers/net/ethernet/sis/sis900.c
+++ b/drivers/net/ethernet/sis/sis900.c
@@ -400,7 +400,6 @@ static const struct net_device_ops sis900_netdev_ops = {
 	.ndo_start_xmit		= sis900_start_xmit,
 	.ndo_set_config		= sis900_set_config,
 	.ndo_set_rx_mode	= set_rx_mode,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_do_ioctl		= mii_ioctl,
diff --git a/drivers/net/ethernet/smsc/epic100.c b/drivers/net/ethernet/smsc/epic100.c
index 7186b89269ad..fe9760ffab51 100644
--- a/drivers/net/ethernet/smsc/epic100.c
+++ b/drivers/net/ethernet/smsc/epic100.c
@@ -313,7 +313,6 @@ static const struct net_device_ops epic_netdev_ops = {
 	.ndo_get_stats		= epic_get_stats,
 	.ndo_set_rx_mode	= set_rx_mode,
 	.ndo_do_ioctl 		= netdev_ioctl,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 };
diff --git a/drivers/net/ethernet/smsc/smc911x.c b/drivers/net/ethernet/smsc/smc911x.c
index cb49c9654f0a..4f19c6166182 100644
--- a/drivers/net/ethernet/smsc/smc911x.c
+++ b/drivers/net/ethernet/smsc/smc911x.c
@@ -1753,7 +1753,6 @@ static const struct net_device_ops smc911x_netdev_ops = {
 	.ndo_start_xmit		= smc911x_hard_start_xmit,
 	.ndo_tx_timeout		= smc911x_timeout,
 	.ndo_set_rx_mode	= smc911x_set_multicast_list,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/smsc/smc9194.c b/drivers/net/ethernet/smsc/smc9194.c
index d496888b85d3..c8d84679ede7 100644
--- a/drivers/net/ethernet/smsc/smc9194.c
+++ b/drivers/net/ethernet/smsc/smc9194.c
@@ -809,7 +809,6 @@ static const struct net_device_ops smc_netdev_ops = {
 	.ndo_start_xmit    	= smc_wait_to_send_packet,
 	.ndo_tx_timeout	    	= smc_timeout,
 	.ndo_set_rx_mode	= smc_set_multicast_list,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 };
diff --git a/drivers/net/ethernet/smsc/smc91c92_cs.c b/drivers/net/ethernet/smsc/smc91c92_cs.c
index db3c696d7002..f1c75e291e55 100644
--- a/drivers/net/ethernet/smsc/smc91c92_cs.c
+++ b/drivers/net/ethernet/smsc/smc91c92_cs.c
@@ -294,7 +294,6 @@ static const struct net_device_ops smc_netdev_ops = {
 	.ndo_set_config 	= s9k_config,
 	.ndo_set_rx_mode	= set_rx_mode,
 	.ndo_do_ioctl		= smc_ioctl,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 };
diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c
index 73212590d04a..65077c77082a 100644
--- a/drivers/net/ethernet/smsc/smc91x.c
+++ b/drivers/net/ethernet/smsc/smc91x.c
@@ -602,7 +602,8 @@ static void smc_hardware_send_pkt(unsigned long data)
 	SMC_PUSH_DATA(lp, buf, len & ~1);
 
 	/* Send final ctl word with the last byte if there is one */
-	SMC_outw(((len & 1) ? (0x2000 | buf[len-1]) : 0), ioaddr, DATA_REG(lp));
+	SMC_outw(lp, ((len & 1) ? (0x2000 | buf[len - 1]) : 0), ioaddr,
+		 DATA_REG(lp));
 
 	/*
 	 * If THROTTLE_TX_PKTS is set, we stop the queue here. This will
@@ -1762,7 +1763,6 @@ static const struct net_device_ops smc_netdev_ops = {
 	.ndo_start_xmit		= smc_hard_start_xmit,
 	.ndo_tx_timeout		= smc_timeout,
 	.ndo_set_rx_mode	= smc_set_multicast_list,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address 	= eth_mac_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -2326,6 +2326,8 @@ static int smc_drv_probe(struct platform_device *pdev)
 		if (!device_property_read_u32(&pdev->dev, "reg-shift",
 					      &val))
 			lp->io_shift = val;
+		lp->cfg.pxa_u16_align4 =
+			device_property_read_bool(&pdev->dev, "pxa-u16-align4");
 	}
 #endif
 
diff --git a/drivers/net/ethernet/smsc/smc91x.h b/drivers/net/ethernet/smsc/smc91x.h
index ea8465467469..08b17adf0a65 100644
--- a/drivers/net/ethernet/smsc/smc91x.h
+++ b/drivers/net/ethernet/smsc/smc91x.h
@@ -86,11 +86,11 @@
 
 #define SMC_inl(a, r)		readl((a) + (r))
 #define SMC_outb(v, a, r)	writeb(v, (a) + (r))
-#define SMC_outw(v, a, r)						\
+#define SMC_outw(lp, v, a, r)						\
 	do {								\
 		unsigned int __v = v, __smc_r = r;			\
 		if (SMC_16BIT(lp))					\
-			__SMC_outw(__v, a, __smc_r);			\
+			__SMC_outw(lp, __v, a, __smc_r);		\
 		else if (SMC_8BIT(lp))					\
 			SMC_outw_b(__v, a, __smc_r);			\
 		else							\
@@ -107,10 +107,10 @@
 #define SMC_IRQ_FLAGS		(-1)	/* from resource */
 
 /* We actually can't write halfwords properly if not word aligned */
-static inline void __SMC_outw(u16 val, void __iomem *ioaddr, int reg)
+static inline void _SMC_outw_align4(u16 val, void __iomem *ioaddr, int reg,
+				    bool use_align4_workaround)
 {
-	if ((machine_is_mainstone() || machine_is_stargate2() ||
-	     machine_is_pxa_idp()) && reg & 2) {
+	if (use_align4_workaround) {
 		unsigned int v = val << 16;
 		v |= readl(ioaddr + (reg & ~2)) & 0xffff;
 		writel(v, ioaddr + (reg & ~2));
@@ -119,6 +119,12 @@ static inline void __SMC_outw(u16 val, void __iomem *ioaddr, int reg)
 	}
 }
 
+#define __SMC_outw(lp, v, a, r)						\
+	_SMC_outw_align4((v), (a), (r),					\
+			 IS_BUILTIN(CONFIG_ARCH_PXA) && ((r) & 2) &&	\
+			 (lp)->cfg.pxa_u16_align4)
+
+
 #elif	defined(CONFIG_SH_SH4202_MICRODEV)
 
 #define SMC_CAN_USE_8BIT	0
@@ -129,7 +135,7 @@ static inline void __SMC_outw(u16 val, void __iomem *ioaddr, int reg)
 #define SMC_inw(a, r)		inw((a) + (r) - 0xa0000000)
 #define SMC_inl(a, r)		inl((a) + (r) - 0xa0000000)
 #define SMC_outb(v, a, r)	outb(v, (a) + (r) - 0xa0000000)
-#define SMC_outw(v, a, r)	outw(v, (a) + (r) - 0xa0000000)
+#define SMC_outw(lp, v, a, r)	outw(v, (a) + (r) - 0xa0000000)
 #define SMC_outl(v, a, r)	outl(v, (a) + (r) - 0xa0000000)
 #define SMC_insl(a, r, p, l)	insl((a) + (r) - 0xa0000000, p, l)
 #define SMC_outsl(a, r, p, l)	outsl((a) + (r) - 0xa0000000, p, l)
@@ -147,7 +153,7 @@ static inline void __SMC_outw(u16 val, void __iomem *ioaddr, int reg)
 #define SMC_inb(a, r)		inb(((u32)a) + (r))
 #define SMC_inw(a, r)		inw(((u32)a) + (r))
 #define SMC_outb(v, a, r)	outb(v, ((u32)a) + (r))
-#define SMC_outw(v, a, r)	outw(v, ((u32)a) + (r))
+#define SMC_outw(lp, v, a, r)	outw(v, ((u32)a) + (r))
 #define SMC_insw(a, r, p, l)	insw(((u32)a) + (r), p, l)
 #define SMC_outsw(a, r, p, l)	outsw(((u32)a) + (r), p, l)
 
@@ -175,7 +181,7 @@ static inline void __SMC_outw(u16 val, void __iomem *ioaddr, int reg)
 #define SMC_inw(a, r)           readw((a) + (r))
 #define SMC_inl(a, r)           readl((a) + (r))
 #define SMC_outb(v, a, r)       writeb(v, (a) + (r))
-#define SMC_outw(v, a, r)       writew(v, (a) + (r))
+#define SMC_outw(lp, v, a, r)   writew(v, (a) + (r))
 #define SMC_outl(v, a, r)       writel(v, (a) + (r))
 #define SMC_insw(a, r, p, l)    readsw((a) + (r), p, l)
 #define SMC_outsw(a, r, p, l)   writesw((a) + (r), p, l)
@@ -207,7 +213,7 @@ static inline void mcf_outsw(void *a, unsigned char *p, int l)
 }
 
 #define SMC_inw(a, r)		_swapw(readw((a) + (r)))
-#define SMC_outw(v, a, r)	writew(_swapw(v), (a) + (r))
+#define SMC_outw(lp, v, a, r)	writew(_swapw(v), (a) + (r))
 #define SMC_insw(a, r, p, l)	mcf_insw(a + r, p, l)
 #define SMC_outsw(a, r, p, l)	mcf_outsw(a + r, p, l)
 
@@ -241,7 +247,7 @@ static inline void mcf_outsw(void *a, unsigned char *p, int l)
 #define SMC_inw(a, r)		ioread16((a) + (r))
 #define SMC_inl(a, r)		ioread32((a) + (r))
 #define SMC_outb(v, a, r)	iowrite8(v, (a) + (r))
-#define SMC_outw(v, a, r)	iowrite16(v, (a) + (r))
+#define SMC_outw(lp, v, a, r)	iowrite16(v, (a) + (r))
 #define SMC_outl(v, a, r)	iowrite32(v, (a) + (r))
 #define SMC_insw(a, r, p, l)	ioread16_rep((a) + (r), p, l)
 #define SMC_outsw(a, r, p, l)	iowrite16_rep((a) + (r), p, l)
@@ -303,6 +309,8 @@ struct smc_local {
 
 	/* the low address lines on some platforms aren't connected... */
 	int	io_shift;
+	/* on some platforms a u16 write must be 4-bytes aligned */
+	bool	half_word_align4;
 
 	struct smc91x_platdata cfg;
 };
@@ -457,7 +465,7 @@ smc_pxa_dma_insw(void __iomem *ioaddr, struct smc_local *lp, int reg, int dma,
 
 #if ! SMC_CAN_USE_16BIT
 
-#define SMC_outw(x, ioaddr, reg)	SMC_outw_b(x, ioaddr, reg)
+#define SMC_outw(lp, x, ioaddr, reg)	SMC_outw_b(x, ioaddr, reg)
 #define SMC_inw(ioaddr, reg)		SMC_inw_b(ioaddr, reg)
 #define SMC_insw(a, r, p, l)		BUG()
 #define SMC_outsw(a, r, p, l)		BUG()
@@ -909,7 +917,7 @@ static const char * chip_ids[ 16 ] =  {
 		else if (SMC_8BIT(lp))				\
 			SMC_outb(x, ioaddr, PN_REG(lp));		\
 		else							\
-			SMC_outw(x, ioaddr, PN_REG(lp));		\
+			SMC_outw(lp, x, ioaddr, PN_REG(lp));		\
 	} while (0)
 
 #define SMC_GET_AR(lp)						\
@@ -937,7 +945,7 @@ static const char * chip_ids[ 16 ] =  {
 			int __mask;					\
 			local_irq_save(__flags);			\
 			__mask = SMC_inw(ioaddr, INT_REG(lp)) & ~0xff; \
-			SMC_outw(__mask | (x), ioaddr, INT_REG(lp));	\
+			SMC_outw(lp, __mask | (x), ioaddr, INT_REG(lp)); \
 			local_irq_restore(__flags);			\
 		}							\
 	} while (0)
@@ -951,7 +959,7 @@ static const char * chip_ids[ 16 ] =  {
 		if (SMC_8BIT(lp))					\
 			SMC_outb(x, ioaddr, IM_REG(lp));		\
 		else							\
-			SMC_outw((x) << 8, ioaddr, INT_REG(lp));	\
+			SMC_outw(lp, (x) << 8, ioaddr, INT_REG(lp));	\
 	} while (0)
 
 #define SMC_CURRENT_BANK(lp)	SMC_inw(ioaddr, BANK_SELECT)
@@ -961,22 +969,22 @@ static const char * chip_ids[ 16 ] =  {
 		if (SMC_MUST_ALIGN_WRITE(lp))				\
 			SMC_outl((x)<<16, ioaddr, 12<<SMC_IO_SHIFT);	\
 		else							\
-			SMC_outw(x, ioaddr, BANK_SELECT);		\
+			SMC_outw(lp, x, ioaddr, BANK_SELECT);		\
 	} while (0)
 
 #define SMC_GET_BASE(lp)		SMC_inw(ioaddr, BASE_REG(lp))
 
-#define SMC_SET_BASE(lp, x)		SMC_outw(x, ioaddr, BASE_REG(lp))
+#define SMC_SET_BASE(lp, x)	SMC_outw(lp, x, ioaddr, BASE_REG(lp))
 
 #define SMC_GET_CONFIG(lp)	SMC_inw(ioaddr, CONFIG_REG(lp))
 
-#define SMC_SET_CONFIG(lp, x)	SMC_outw(x, ioaddr, CONFIG_REG(lp))
+#define SMC_SET_CONFIG(lp, x)	SMC_outw(lp, x, ioaddr, CONFIG_REG(lp))
 
 #define SMC_GET_COUNTER(lp)	SMC_inw(ioaddr, COUNTER_REG(lp))
 
 #define SMC_GET_CTL(lp)		SMC_inw(ioaddr, CTL_REG(lp))
 
-#define SMC_SET_CTL(lp, x)		SMC_outw(x, ioaddr, CTL_REG(lp))
+#define SMC_SET_CTL(lp, x)	SMC_outw(lp, x, ioaddr, CTL_REG(lp))
 
 #define SMC_GET_MII(lp)		SMC_inw(ioaddr, MII_REG(lp))
 
@@ -987,20 +995,20 @@ static const char * chip_ids[ 16 ] =  {
 		if (SMC_MUST_ALIGN_WRITE(lp))				\
 			SMC_outl((x)<<16, ioaddr, SMC_REG(lp, 8, 1));	\
 		else							\
-			SMC_outw(x, ioaddr, GP_REG(lp));		\
+			SMC_outw(lp, x, ioaddr, GP_REG(lp));		\
 	} while (0)
 
-#define SMC_SET_MII(lp, x)		SMC_outw(x, ioaddr, MII_REG(lp))
+#define SMC_SET_MII(lp, x)	SMC_outw(lp, x, ioaddr, MII_REG(lp))
 
 #define SMC_GET_MIR(lp)		SMC_inw(ioaddr, MIR_REG(lp))
 
-#define SMC_SET_MIR(lp, x)		SMC_outw(x, ioaddr, MIR_REG(lp))
+#define SMC_SET_MIR(lp, x)	SMC_outw(lp, x, ioaddr, MIR_REG(lp))
 
 #define SMC_GET_MMU_CMD(lp)	SMC_inw(ioaddr, MMU_CMD_REG(lp))
 
-#define SMC_SET_MMU_CMD(lp, x)	SMC_outw(x, ioaddr, MMU_CMD_REG(lp))
+#define SMC_SET_MMU_CMD(lp, x)	SMC_outw(lp, x, ioaddr, MMU_CMD_REG(lp))
 
-#define SMC_GET_FIFO(lp)		SMC_inw(ioaddr, FIFO_REG(lp))
+#define SMC_GET_FIFO(lp)	SMC_inw(ioaddr, FIFO_REG(lp))
 
 #define SMC_GET_PTR(lp)		SMC_inw(ioaddr, PTR_REG(lp))
 
@@ -1009,14 +1017,14 @@ static const char * chip_ids[ 16 ] =  {
 		if (SMC_MUST_ALIGN_WRITE(lp))				\
 			SMC_outl((x)<<16, ioaddr, SMC_REG(lp, 4, 2));	\
 		else							\
-			SMC_outw(x, ioaddr, PTR_REG(lp));		\
+			SMC_outw(lp, x, ioaddr, PTR_REG(lp));		\
 	} while (0)
 
 #define SMC_GET_EPH_STATUS(lp)	SMC_inw(ioaddr, EPH_STATUS_REG(lp))
 
 #define SMC_GET_RCR(lp)		SMC_inw(ioaddr, RCR_REG(lp))
 
-#define SMC_SET_RCR(lp, x)		SMC_outw(x, ioaddr, RCR_REG(lp))
+#define SMC_SET_RCR(lp, x)		SMC_outw(lp, x, ioaddr, RCR_REG(lp))
 
 #define SMC_GET_REV(lp)		SMC_inw(ioaddr, REV_REG(lp))
 
@@ -1027,12 +1035,12 @@ static const char * chip_ids[ 16 ] =  {
 		if (SMC_MUST_ALIGN_WRITE(lp))				\
 			SMC_outl((x)<<16, ioaddr, SMC_REG(lp, 8, 0));	\
 		else							\
-			SMC_outw(x, ioaddr, RPC_REG(lp));		\
+			SMC_outw(lp, x, ioaddr, RPC_REG(lp));		\
 	} while (0)
 
 #define SMC_GET_TCR(lp)		SMC_inw(ioaddr, TCR_REG(lp))
 
-#define SMC_SET_TCR(lp, x)		SMC_outw(x, ioaddr, TCR_REG(lp))
+#define SMC_SET_TCR(lp, x)	SMC_outw(lp, x, ioaddr, TCR_REG(lp))
 
 #ifndef SMC_GET_MAC_ADDR
 #define SMC_GET_MAC_ADDR(lp, addr)					\
@@ -1049,18 +1057,18 @@ static const char * chip_ids[ 16 ] =  {
 
 #define SMC_SET_MAC_ADDR(lp, addr)					\
 	do {								\
-		SMC_outw(addr[0]|(addr[1] << 8), ioaddr, ADDR0_REG(lp)); \
-		SMC_outw(addr[2]|(addr[3] << 8), ioaddr, ADDR1_REG(lp)); \
-		SMC_outw(addr[4]|(addr[5] << 8), ioaddr, ADDR2_REG(lp)); \
+		SMC_outw(lp, addr[0] | (addr[1] << 8), ioaddr, ADDR0_REG(lp)); \
+		SMC_outw(lp, addr[2] | (addr[3] << 8), ioaddr, ADDR1_REG(lp)); \
+		SMC_outw(lp, addr[4] | (addr[5] << 8), ioaddr, ADDR2_REG(lp)); \
 	} while (0)
 
 #define SMC_SET_MCAST(lp, x)						\
 	do {								\
 		const unsigned char *mt = (x);				\
-		SMC_outw(mt[0] | (mt[1] << 8), ioaddr, MCAST_REG1(lp)); \
-		SMC_outw(mt[2] | (mt[3] << 8), ioaddr, MCAST_REG2(lp)); \
-		SMC_outw(mt[4] | (mt[5] << 8), ioaddr, MCAST_REG3(lp)); \
-		SMC_outw(mt[6] | (mt[7] << 8), ioaddr, MCAST_REG4(lp)); \
+		SMC_outw(lp, mt[0] | (mt[1] << 8), ioaddr, MCAST_REG1(lp)); \
+		SMC_outw(lp, mt[2] | (mt[3] << 8), ioaddr, MCAST_REG2(lp)); \
+		SMC_outw(lp, mt[4] | (mt[5] << 8), ioaddr, MCAST_REG3(lp)); \
+		SMC_outw(lp, mt[6] | (mt[7] << 8), ioaddr, MCAST_REG4(lp)); \
 	} while (0)
 
 #define SMC_PUT_PKT_HDR(lp, status, length)				\
@@ -1069,8 +1077,8 @@ static const char * chip_ids[ 16 ] =  {
 			SMC_outl((status) | (length)<<16, ioaddr,	\
 				 DATA_REG(lp));			\
 		else {							\
-			SMC_outw(status, ioaddr, DATA_REG(lp));	\
-			SMC_outw(length, ioaddr, DATA_REG(lp));	\
+			SMC_outw(lp, status, ioaddr, DATA_REG(lp));	\
+			SMC_outw(lp, length, ioaddr, DATA_REG(lp));	\
 		}							\
 	} while (0)
 
diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c
index e9b8579e6241..fa5ca0992be6 100644
--- a/drivers/net/ethernet/smsc/smsc911x.c
+++ b/drivers/net/ethernet/smsc/smsc911x.c
@@ -438,9 +438,16 @@ static int smsc911x_request_resources(struct platform_device *pdev)
 	ret = regulator_bulk_get(&pdev->dev,
 			ARRAY_SIZE(pdata->supplies),
 			pdata->supplies);
-	if (ret)
+	if (ret) {
+		/*
+		 * Retry on deferrals, else just report the error
+		 * and try to continue.
+		 */
+		if (ret == -EPROBE_DEFER)
+			return ret;
 		netdev_err(ndev, "couldn't get regulators %d\n",
 				ret);
+	}
 
 	/* Request optional RESET GPIO */
 	pdata->reset_gpiod = devm_gpiod_get_optional(&pdev->dev,
@@ -1956,11 +1963,6 @@ static void smsc911x_ethtool_getdrvinfo(struct net_device *dev,
 		sizeof(info->bus_info));
 }
 
-static int smsc911x_ethtool_nwayreset(struct net_device *dev)
-{
-	return phy_start_aneg(dev->phydev);
-}
-
 static u32 smsc911x_ethtool_getmsglevel(struct net_device *dev)
 {
 	struct smsc911x_data *pdata = netdev_priv(dev);
@@ -2132,7 +2134,7 @@ static int smsc911x_ethtool_set_eeprom(struct net_device *dev,
 static const struct ethtool_ops smsc911x_ethtool_ops = {
 	.get_link = ethtool_op_get_link,
 	.get_drvinfo = smsc911x_ethtool_getdrvinfo,
-	.nway_reset = smsc911x_ethtool_nwayreset,
+	.nway_reset = phy_ethtool_nway_reset,
 	.get_msglevel = smsc911x_ethtool_getmsglevel,
 	.set_msglevel = smsc911x_ethtool_setmsglevel,
 	.get_regs_len = smsc911x_ethtool_getregslen,
@@ -2152,7 +2154,6 @@ static const struct net_device_ops smsc911x_netdev_ops = {
 	.ndo_get_stats		= smsc911x_get_stats,
 	.ndo_set_rx_mode	= smsc911x_set_multicast_list,
 	.ndo_do_ioctl		= smsc911x_do_ioctl,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address 	= smsc911x_set_mac_address,
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -2584,6 +2585,9 @@ static int smsc911x_suspend(struct device *dev)
 		PMT_CTRL_PM_MODE_D1_ | PMT_CTRL_WOL_EN_ |
 		PMT_CTRL_ED_EN_ | PMT_CTRL_PME_EN_);
 
+	pm_runtime_disable(dev);
+	pm_runtime_set_suspended(dev);
+
 	return 0;
 }
 
@@ -2593,6 +2597,9 @@ static int smsc911x_resume(struct device *dev)
 	struct smsc911x_data *pdata = netdev_priv(ndev);
 	unsigned int to = 100;
 
+	pm_runtime_enable(dev);
+	pm_runtime_resume(dev);
+
 	/* Note 3.11 from the datasheet:
 	 * 	"When the LAN9220 is in a power saving state, a write of any
 	 * 	 data to the BYTE_TEST register will wake-up the device."
diff --git a/drivers/net/ethernet/smsc/smsc9420.c b/drivers/net/ethernet/smsc/smsc9420.c
index b7bfed4bc96b..3174aebb322f 100644
--- a/drivers/net/ethernet/smsc/smsc9420.c
+++ b/drivers/net/ethernet/smsc/smsc9420.c
@@ -254,14 +254,6 @@ static void smsc9420_ethtool_set_msglevel(struct net_device *netdev, u32 data)
 	pd->msg_enable = data;
 }
 
-static int smsc9420_ethtool_nway_reset(struct net_device *netdev)
-{
-	if (!netdev->phydev)
-		return -ENODEV;
-
-	return phy_start_aneg(netdev->phydev);
-}
-
 static int smsc9420_ethtool_getregslen(struct net_device *dev)
 {
 	/* all smsc9420 registers plus all phy registers */
@@ -417,7 +409,7 @@ static const struct ethtool_ops smsc9420_ethtool_ops = {
 	.get_drvinfo = smsc9420_ethtool_get_drvinfo,
 	.get_msglevel = smsc9420_ethtool_get_msglevel,
 	.set_msglevel = smsc9420_ethtool_set_msglevel,
-	.nway_reset = smsc9420_ethtool_nway_reset,
+	.nway_reset = phy_ethtool_nway_reset,
 	.get_link = ethtool_op_get_link,
 	.get_eeprom_len = smsc9420_ethtool_get_eeprom_len,
 	.get_eeprom = smsc9420_ethtool_get_eeprom,
diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig
index 4b78168a5f3c..ab66248a4b78 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig
@@ -4,7 +4,7 @@ config STMMAC_ETH
 	select MII
 	select PHYLIB
 	select CRC32
-	select PTP_1588_CLOCK
+	imply PTP_1588_CLOCK
 	select RESET_CONTROLLER
 	---help---
 	  This is the driver for the Ethernet IPs are built around a
@@ -69,6 +69,17 @@ config DWMAC_MESON
 	  the stmmac device driver. This driver is used for Meson6,
 	  Meson8, Meson8b and GXBB SoCs.
 
+config DWMAC_OXNAS
+	tristate "Oxford Semiconductor OXNAS dwmac support"
+	default ARCH_OXNAS
+	depends on OF && COMMON_CLK && (ARCH_OXNAS || COMPILE_TEST)
+	select MFD_SYSCON
+	help
+	  Support for Ethernet controller on Oxford Semiconductor OXNAS SoCs.
+
+	  This selects the Oxford Semiconductor OXNASSoC glue layer support for
+	  the stmmac device driver. This driver is used for OX820.
+
 config DWMAC_ROCKCHIP
 	tristate "Rockchip dwmac support"
 	default ARCH_ROCKCHIP
diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile
index 5d6ece5919b3..8f83a86ba13c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Makefile
+++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
@@ -10,6 +10,7 @@ obj-$(CONFIG_STMMAC_PLATFORM)	+= stmmac-platform.o
 obj-$(CONFIG_DWMAC_IPQ806X)	+= dwmac-ipq806x.o
 obj-$(CONFIG_DWMAC_LPC18XX)	+= dwmac-lpc18xx.o
 obj-$(CONFIG_DWMAC_MESON)	+= dwmac-meson.o dwmac-meson8b.o
+obj-$(CONFIG_DWMAC_OXNAS)	+= dwmac-oxnas.o
 obj-$(CONFIG_DWMAC_ROCKCHIP)	+= dwmac-rk.o
 obj-$(CONFIG_DWMAC_SOCFPGA)	+= dwmac-altr-socfpga.o
 obj-$(CONFIG_DWMAC_STI)		+= dwmac-sti.o
diff --git a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c
index b3e669af3005..026e8e9cb942 100644
--- a/drivers/net/ethernet/stmicro/stmmac/chain_mode.c
+++ b/drivers/net/ethernet/stmicro/stmmac/chain_mode.c
@@ -34,7 +34,7 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
 	unsigned int entry = priv->cur_tx;
 	struct dma_desc *desc = priv->dma_tx + entry;
 	unsigned int nopaged_len = skb_headlen(skb);
-	unsigned int bmax;
+	unsigned int bmax, des2;
 	unsigned int i = 1, len;
 
 	if (priv->plat->enh_desc)
@@ -44,11 +44,12 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
 
 	len = nopaged_len - bmax;
 
-	desc->des2 = dma_map_single(priv->device, skb->data,
-				    bmax, DMA_TO_DEVICE);
-	if (dma_mapping_error(priv->device, desc->des2))
+	des2 = dma_map_single(priv->device, skb->data,
+			      bmax, DMA_TO_DEVICE);
+	desc->des2 = cpu_to_le32(des2);
+	if (dma_mapping_error(priv->device, des2))
 		return -1;
-	priv->tx_skbuff_dma[entry].buf = desc->des2;
+	priv->tx_skbuff_dma[entry].buf = des2;
 	priv->tx_skbuff_dma[entry].len = bmax;
 	/* do not close the descriptor and do not set own bit */
 	priv->hw->desc->prepare_tx_desc(desc, 1, bmax, csum, STMMAC_CHAIN_MODE,
@@ -60,12 +61,13 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
 		desc = priv->dma_tx + entry;
 
 		if (len > bmax) {
-			desc->des2 = dma_map_single(priv->device,
-						    (skb->data + bmax * i),
-						    bmax, DMA_TO_DEVICE);
-			if (dma_mapping_error(priv->device, desc->des2))
+			des2 = dma_map_single(priv->device,
+					      (skb->data + bmax * i),
+					      bmax, DMA_TO_DEVICE);
+			desc->des2 = cpu_to_le32(des2);
+			if (dma_mapping_error(priv->device, des2))
 				return -1;
-			priv->tx_skbuff_dma[entry].buf = desc->des2;
+			priv->tx_skbuff_dma[entry].buf = des2;
 			priv->tx_skbuff_dma[entry].len = bmax;
 			priv->hw->desc->prepare_tx_desc(desc, 0, bmax, csum,
 							STMMAC_CHAIN_MODE, 1,
@@ -73,12 +75,13 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
 			len -= bmax;
 			i++;
 		} else {
-			desc->des2 = dma_map_single(priv->device,
-						    (skb->data + bmax * i), len,
-						    DMA_TO_DEVICE);
-			if (dma_mapping_error(priv->device, desc->des2))
+			des2 = dma_map_single(priv->device,
+					      (skb->data + bmax * i), len,
+					      DMA_TO_DEVICE);
+			desc->des2 = cpu_to_le32(des2);
+			if (dma_mapping_error(priv->device, des2))
 				return -1;
-			priv->tx_skbuff_dma[entry].buf = desc->des2;
+			priv->tx_skbuff_dma[entry].buf = des2;
 			priv->tx_skbuff_dma[entry].len = len;
 			/* last descriptor can be set now */
 			priv->hw->desc->prepare_tx_desc(desc, 0, len, csum,
@@ -119,19 +122,19 @@ static void stmmac_init_dma_chain(void *des, dma_addr_t phy_addr,
 		struct dma_extended_desc *p = (struct dma_extended_desc *)des;
 		for (i = 0; i < (size - 1); i++) {
 			dma_phy += sizeof(struct dma_extended_desc);
-			p->basic.des3 = (unsigned int)dma_phy;
+			p->basic.des3 = cpu_to_le32((unsigned int)dma_phy);
 			p++;
 		}
-		p->basic.des3 = (unsigned int)phy_addr;
+		p->basic.des3 = cpu_to_le32((unsigned int)phy_addr);
 
 	} else {
 		struct dma_desc *p = (struct dma_desc *)des;
 		for (i = 0; i < (size - 1); i++) {
 			dma_phy += sizeof(struct dma_desc);
-			p->des3 = (unsigned int)dma_phy;
+			p->des3 = cpu_to_le32((unsigned int)dma_phy);
 			p++;
 		}
-		p->des3 = (unsigned int)phy_addr;
+		p->des3 = cpu_to_le32((unsigned int)phy_addr);
 	}
 }
 
@@ -144,10 +147,10 @@ static void stmmac_refill_desc3(void *priv_ptr, struct dma_desc *p)
 		 * 1588-2002 time stamping is enabled, hence reinitialize it
 		 * to keep explicit chaining in the descriptor.
 		 */
-		p->des3 = (unsigned int)(priv->dma_rx_phy +
-					 (((priv->dirty_rx) + 1) %
-					  DMA_RX_SIZE) *
-					 sizeof(struct dma_desc));
+		p->des3 = cpu_to_le32((unsigned int)(priv->dma_rx_phy +
+				      (((priv->dirty_rx) + 1) %
+				       DMA_RX_SIZE) *
+				      sizeof(struct dma_desc)));
 }
 
 static void stmmac_clean_desc3(void *priv_ptr, struct dma_desc *p)
@@ -161,9 +164,9 @@ static void stmmac_clean_desc3(void *priv_ptr, struct dma_desc *p)
 		 * 1588-2002 time stamping is enabled, hence reinitialize it
 		 * to keep explicit chaining in the descriptor.
 		 */
-		p->des3 = (unsigned int)((priv->dma_tx_phy +
-					  ((priv->dirty_tx + 1) % DMA_TX_SIZE))
-					  * sizeof(struct dma_desc));
+		p->des3 = cpu_to_le32((unsigned int)((priv->dma_tx_phy +
+				      ((priv->dirty_tx + 1) % DMA_TX_SIZE))
+				      * sizeof(struct dma_desc)));
 }
 
 const struct stmmac_mode_ops chain_mode_ops = {
diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 6d2de4e01f6d..b13a144f72ad 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -44,6 +44,7 @@
 #define	DWMAC_CORE_4_00	0x40
 #define STMMAC_CHAN0	0	/* Always supported and default for all chips */
 
+/* These need to be power of two, and >= 4 */
 #define DMA_TX_SIZE 512
 #define DMA_RX_SIZE 512
 #define STMMAC_GET_ENTRY(x, size)	((x + 1) & (size - 1))
@@ -411,8 +412,8 @@ extern const struct stmmac_desc_ops ndesc_ops;
 struct stmmac_dma_ops {
 	/* DMA core initialization */
 	int (*reset)(void __iomem *ioaddr);
-	void (*init)(void __iomem *ioaddr, int pbl, int fb, int mb,
-		     int aal, u32 dma_tx, u32 dma_rx, int atds);
+	void (*init)(void __iomem *ioaddr, struct stmmac_dma_cfg *dma_cfg,
+		     u32 dma_tx, u32 dma_rx, int atds);
 	/* Configure the AXI Bus Mode Register */
 	void (*axi)(void __iomem *ioaddr, struct stmmac_axi *axi);
 	/* Dump DMA registers */
@@ -506,6 +507,12 @@ struct mac_link {
 struct mii_regs {
 	unsigned int addr;	/* MII Address */
 	unsigned int data;	/* MII Data */
+	unsigned int addr_shift;	/* MII address shift */
+	unsigned int reg_shift;		/* MII reg shift */
+	unsigned int addr_mask;		/* MII address mask */
+	unsigned int reg_mask;		/* MII reg mask */
+	unsigned int clk_csr_shift;
+	unsigned int clk_csr_mask;
 };
 
 /* Helpers to manage the descriptors for chain and ring modes */
diff --git a/drivers/net/ethernet/stmicro/stmmac/descs.h b/drivers/net/ethernet/stmicro/stmmac/descs.h
index e3c86d422109..faeeef75d7f1 100644
--- a/drivers/net/ethernet/stmicro/stmmac/descs.h
+++ b/drivers/net/ethernet/stmicro/stmmac/descs.h
@@ -87,7 +87,7 @@
 #define	TDES0_ERROR_SUMMARY		BIT(15)
 #define	TDES0_IP_HEADER_ERROR		BIT(16)
 #define	TDES0_TIME_STAMP_STATUS		BIT(17)
-#define	TDES0_OWN			BIT(31)
+#define	TDES0_OWN			((u32)BIT(31))	/* silence sparse */
 /* TDES1 */
 #define	TDES1_BUFFER1_SIZE_MASK		GENMASK(10, 0)
 #define	TDES1_BUFFER2_SIZE_MASK		GENMASK(21, 11)
@@ -130,7 +130,7 @@
 #define	ETDES0_FIRST_SEGMENT		BIT(28)
 #define	ETDES0_LAST_SEGMENT		BIT(29)
 #define	ETDES0_INTERRUPT		BIT(30)
-#define	ETDES0_OWN			BIT(31)
+#define	ETDES0_OWN			((u32)BIT(31))	/* silence sparse */
 /* TDES1 */
 #define	ETDES1_BUFFER1_SIZE_MASK	GENMASK(12, 0)
 #define	ETDES1_BUFFER2_SIZE_MASK	GENMASK(28, 16)
@@ -170,19 +170,19 @@
 
 /* Basic descriptor structure for normal and alternate descriptors */
 struct dma_desc {
-	unsigned int des0;
-	unsigned int des1;
-	unsigned int des2;
-	unsigned int des3;
+	__le32 des0;
+	__le32 des1;
+	__le32 des2;
+	__le32 des3;
 };
 
 /* Extended descriptor structure (e.g. >= databook 3.50a) */
 struct dma_extended_desc {
 	struct dma_desc basic;	/* Basic descriptors */
-	unsigned int des4;	/* Extended Status */
-	unsigned int des5;	/* Reserved */
-	unsigned int des6;	/* Tx/Rx Timestamp Low */
-	unsigned int des7;	/* Tx/Rx Timestamp High */
+	__le32 des4;	/* Extended Status */
+	__le32 des5;	/* Reserved */
+	__le32 des6;	/* Tx/Rx Timestamp Low */
+	__le32 des7;	/* Tx/Rx Timestamp High */
 };
 
 /* Transmit checksum insertion control */
diff --git a/drivers/net/ethernet/stmicro/stmmac/descs_com.h b/drivers/net/ethernet/stmicro/stmmac/descs_com.h
index 7635a464ce41..1d181e205d6e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/descs_com.h
+++ b/drivers/net/ethernet/stmicro/stmmac/descs_com.h
@@ -35,47 +35,50 @@
 /* Enhanced descriptors */
 static inline void ehn_desc_rx_set_on_ring(struct dma_desc *p, int end)
 {
-	p->des1 |= ((BUF_SIZE_8KiB - 1) << ERDES1_BUFFER2_SIZE_SHIFT)
-		   & ERDES1_BUFFER2_SIZE_MASK;
+	p->des1 |= cpu_to_le32(((BUF_SIZE_8KiB - 1)
+			<< ERDES1_BUFFER2_SIZE_SHIFT)
+		   & ERDES1_BUFFER2_SIZE_MASK);
 
 	if (end)
-		p->des1 |= ERDES1_END_RING;
+		p->des1 |= cpu_to_le32(ERDES1_END_RING);
 }
 
 static inline void enh_desc_end_tx_desc_on_ring(struct dma_desc *p, int end)
 {
 	if (end)
-		p->des0 |= ETDES0_END_RING;
+		p->des0 |= cpu_to_le32(ETDES0_END_RING);
 	else
-		p->des0 &= ~ETDES0_END_RING;
+		p->des0 &= cpu_to_le32(~ETDES0_END_RING);
 }
 
 static inline void enh_set_tx_desc_len_on_ring(struct dma_desc *p, int len)
 {
 	if (unlikely(len > BUF_SIZE_4KiB)) {
-		p->des1 |= (((len - BUF_SIZE_4KiB) << ETDES1_BUFFER2_SIZE_SHIFT)
+		p->des1 |= cpu_to_le32((((len - BUF_SIZE_4KiB)
+					<< ETDES1_BUFFER2_SIZE_SHIFT)
 			    & ETDES1_BUFFER2_SIZE_MASK) | (BUF_SIZE_4KiB
-			    & ETDES1_BUFFER1_SIZE_MASK);
+			    & ETDES1_BUFFER1_SIZE_MASK));
 	} else
-		p->des1 |= (len & ETDES1_BUFFER1_SIZE_MASK);
+		p->des1 |= cpu_to_le32((len & ETDES1_BUFFER1_SIZE_MASK));
 }
 
 /* Normal descriptors */
 static inline void ndesc_rx_set_on_ring(struct dma_desc *p, int end)
 {
-	p->des1 |= ((BUF_SIZE_2KiB - 1) << RDES1_BUFFER2_SIZE_SHIFT)
-		    & RDES1_BUFFER2_SIZE_MASK;
+	p->des1 |= cpu_to_le32(((BUF_SIZE_2KiB - 1)
+				<< RDES1_BUFFER2_SIZE_SHIFT)
+		    & RDES1_BUFFER2_SIZE_MASK);
 
 	if (end)
-		p->des1 |= RDES1_END_RING;
+		p->des1 |= cpu_to_le32(RDES1_END_RING);
 }
 
 static inline void ndesc_end_tx_desc_on_ring(struct dma_desc *p, int end)
 {
 	if (end)
-		p->des1 |= TDES1_END_RING;
+		p->des1 |= cpu_to_le32(TDES1_END_RING);
 	else
-		p->des1 &= ~TDES1_END_RING;
+		p->des1 &= cpu_to_le32(~TDES1_END_RING);
 }
 
 static inline void norm_set_tx_desc_len_on_ring(struct dma_desc *p, int len)
@@ -83,10 +86,11 @@ static inline void norm_set_tx_desc_len_on_ring(struct dma_desc *p, int len)
 	if (unlikely(len > BUF_SIZE_2KiB)) {
 		unsigned int buffer1 = (BUF_SIZE_2KiB - 1)
 					& TDES1_BUFFER1_SIZE_MASK;
-		p->des1 |= ((((len - buffer1) << TDES1_BUFFER2_SIZE_SHIFT)
-			    & TDES1_BUFFER2_SIZE_MASK) | buffer1);
+		p->des1 |= cpu_to_le32((((len - buffer1)
+					<< TDES1_BUFFER2_SIZE_SHIFT)
+				& TDES1_BUFFER2_SIZE_MASK) | buffer1);
 	} else
-		p->des1 |= (len & TDES1_BUFFER1_SIZE_MASK);
+		p->des1 |= cpu_to_le32((len & TDES1_BUFFER1_SIZE_MASK));
 }
 
 /* Specific functions used for Chain mode */
@@ -94,32 +98,32 @@ static inline void norm_set_tx_desc_len_on_ring(struct dma_desc *p, int len)
 /* Enhanced descriptors */
 static inline void ehn_desc_rx_set_on_chain(struct dma_desc *p)
 {
-	p->des1 |= ERDES1_SECOND_ADDRESS_CHAINED;
+	p->des1 |= cpu_to_le32(ERDES1_SECOND_ADDRESS_CHAINED);
 }
 
 static inline void enh_desc_end_tx_desc_on_chain(struct dma_desc *p)
 {
-	p->des0 |= ETDES0_SECOND_ADDRESS_CHAINED;
+	p->des0 |= cpu_to_le32(ETDES0_SECOND_ADDRESS_CHAINED);
 }
 
 static inline void enh_set_tx_desc_len_on_chain(struct dma_desc *p, int len)
 {
-	p->des1 |= (len & ETDES1_BUFFER1_SIZE_MASK);
+	p->des1 |= cpu_to_le32(len & ETDES1_BUFFER1_SIZE_MASK);
 }
 
 /* Normal descriptors */
 static inline void ndesc_rx_set_on_chain(struct dma_desc *p, int end)
 {
-	p->des1 |= RDES1_SECOND_ADDRESS_CHAINED;
+	p->des1 |= cpu_to_le32(RDES1_SECOND_ADDRESS_CHAINED);
 }
 
 static inline void ndesc_tx_set_on_chain(struct dma_desc *p)
 {
-	p->des1 |= TDES1_SECOND_ADDRESS_CHAINED;
+	p->des1 |= cpu_to_le32(TDES1_SECOND_ADDRESS_CHAINED);
 }
 
 static inline void norm_set_tx_desc_len_on_chain(struct dma_desc *p, int len)
 {
-	p->des1 |= len & TDES1_BUFFER1_SIZE_MASK;
+	p->des1 |= cpu_to_le32(len & TDES1_BUFFER1_SIZE_MASK);
 }
 #endif /* __DESC_COM_H__ */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c
index b1e5f24708c9..3304095c934c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-generic.c
@@ -50,17 +50,33 @@ static int dwmac_generic_probe(struct platform_device *pdev)
 	if (plat_dat->init) {
 		ret = plat_dat->init(pdev, plat_dat->bsp_priv);
 		if (ret)
-			return ret;
+			goto err_remove_config_dt;
 	}
 
-	return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+	if (ret)
+		goto err_exit;
+
+	return 0;
+
+err_exit:
+	if (plat_dat->exit)
+		plat_dat->exit(pdev, plat_dat->bsp_priv);
+err_remove_config_dt:
+	if (pdev->dev.of_node)
+		stmmac_remove_config_dt(pdev, plat_dat);
+
+	return ret;
 }
 
 static const struct of_device_id dwmac_generic_match[] = {
 	{ .compatible = "st,spear600-gmac"},
+	{ .compatible = "snps,dwmac-3.50a"},
 	{ .compatible = "snps,dwmac-3.610"},
 	{ .compatible = "snps,dwmac-3.70a"},
 	{ .compatible = "snps,dwmac-3.710"},
+	{ .compatible = "snps,dwmac-4.00"},
+	{ .compatible = "snps,dwmac-4.10a"},
 	{ .compatible = "snps,dwmac"},
 	{ }
 };
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
index 36d3355f2fb0..866444b6c82f 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-ipq806x.c
@@ -271,15 +271,17 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
 		return PTR_ERR(plat_dat);
 
 	gmac = devm_kzalloc(dev, sizeof(*gmac), GFP_KERNEL);
-	if (!gmac)
-		return -ENOMEM;
+	if (!gmac) {
+		err = -ENOMEM;
+		goto err_remove_config_dt;
+	}
 
 	gmac->pdev = pdev;
 
 	err = ipq806x_gmac_of_parse(gmac);
 	if (err) {
 		dev_err(dev, "device tree parsing error\n");
-		return err;
+		goto err_remove_config_dt;
 	}
 
 	regmap_write(gmac->qsgmii_csr, QSGMII_PCS_CAL_LCKDT_CTL,
@@ -300,7 +302,8 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
 	default:
 		dev_err(&pdev->dev, "Unsupported PHY mode: \"%s\"\n",
 			phy_modes(gmac->phy_mode));
-		return -EINVAL;
+		err = -EINVAL;
+		goto err_remove_config_dt;
 	}
 	regmap_write(gmac->nss_common, NSS_COMMON_GMAC_CTL(gmac->id), val);
 
@@ -319,7 +322,8 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
 	default:
 		dev_err(&pdev->dev, "Unsupported PHY mode: \"%s\"\n",
 			phy_modes(gmac->phy_mode));
-		return -EINVAL;
+		err = -EINVAL;
+		goto err_remove_config_dt;
 	}
 	regmap_write(gmac->nss_common, NSS_COMMON_CLK_SRC_CTRL, val);
 
@@ -346,7 +350,16 @@ static int ipq806x_gmac_probe(struct platform_device *pdev)
 	plat_dat->bsp_priv = gmac;
 	plat_dat->fix_mac_speed = ipq806x_gmac_fix_mac_speed;
 
-	return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+	err = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+	if (err)
+		goto err_remove_config_dt;
+
+	return 0;
+
+err_remove_config_dt:
+	stmmac_remove_config_dt(pdev, plat_dat);
+
+	return err;
 }
 
 static const struct of_device_id ipq806x_gmac_dwmac_match[] = {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c
index 78e9d1861896..3d3f43d91b98 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-lpc18xx.c
@@ -46,7 +46,8 @@ static int lpc18xx_dwmac_probe(struct platform_device *pdev)
 	reg = syscon_regmap_lookup_by_compatible("nxp,lpc1850-creg");
 	if (IS_ERR(reg)) {
 		dev_err(&pdev->dev, "syscon lookup failed\n");
-		return PTR_ERR(reg);
+		ret = PTR_ERR(reg);
+		goto err_remove_config_dt;
 	}
 
 	if (plat_dat->interface == PHY_INTERFACE_MODE_MII) {
@@ -55,13 +56,23 @@ static int lpc18xx_dwmac_probe(struct platform_device *pdev)
 		ethmode = LPC18XX_CREG_CREG6_ETHMODE_RMII;
 	} else {
 		dev_err(&pdev->dev, "Only MII and RMII mode supported\n");
-		return -EINVAL;
+		ret = -EINVAL;
+		goto err_remove_config_dt;
 	}
 
 	regmap_update_bits(reg, LPC18XX_CREG_CREG6,
 			   LPC18XX_CREG_CREG6_ETHMODE_MASK, ethmode);
 
-	return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+	if (ret)
+		goto err_remove_config_dt;
+
+	return 0;
+
+err_remove_config_dt:
+	stmmac_remove_config_dt(pdev, plat_dat);
+
+	return ret;
 }
 
 static const struct of_device_id lpc18xx_dwmac_match[] = {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c
index 309d99536a2c..7fdd1760a74c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson.c
@@ -64,18 +64,31 @@ static int meson6_dwmac_probe(struct platform_device *pdev)
 		return PTR_ERR(plat_dat);
 
 	dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL);
-	if (!dwmac)
-		return -ENOMEM;
+	if (!dwmac) {
+		ret = -ENOMEM;
+		goto err_remove_config_dt;
+	}
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
 	dwmac->reg = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(dwmac->reg))
-		return PTR_ERR(dwmac->reg);
+	if (IS_ERR(dwmac->reg)) {
+		ret = PTR_ERR(dwmac->reg);
+		goto err_remove_config_dt;
+	}
 
 	plat_dat->bsp_priv = dwmac;
 	plat_dat->fix_mac_speed = meson6_dwmac_fix_mac_speed;
 
-	return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+	if (ret)
+		goto err_remove_config_dt;
+
+	return 0;
+
+err_remove_config_dt:
+	stmmac_remove_config_dt(pdev, plat_dat);
+
+	return ret;
 }
 
 static const struct of_device_id meson6_dwmac_match[] = {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
index 250e4ceafc8d..ffaed1f35efe 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
@@ -264,32 +264,48 @@ static int meson8b_dwmac_probe(struct platform_device *pdev)
 		return PTR_ERR(plat_dat);
 
 	dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL);
-	if (!dwmac)
-		return -ENOMEM;
+	if (!dwmac) {
+		ret = -ENOMEM;
+		goto err_remove_config_dt;
+	}
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
 	dwmac->regs = devm_ioremap_resource(&pdev->dev, res);
-	if (IS_ERR(dwmac->regs))
-		return PTR_ERR(dwmac->regs);
+	if (IS_ERR(dwmac->regs)) {
+		ret = PTR_ERR(dwmac->regs);
+		goto err_remove_config_dt;
+	}
 
 	dwmac->pdev = pdev;
 	dwmac->phy_mode = of_get_phy_mode(pdev->dev.of_node);
 	if (dwmac->phy_mode < 0) {
 		dev_err(&pdev->dev, "missing phy-mode property\n");
-		return -EINVAL;
+		ret = -EINVAL;
+		goto err_remove_config_dt;
 	}
 
 	ret = meson8b_init_clk(dwmac);
 	if (ret)
-		return ret;
+		goto err_remove_config_dt;
 
 	ret = meson8b_init_prg_eth(dwmac);
 	if (ret)
-		return ret;
+		goto err_remove_config_dt;
 
 	plat_dat->bsp_priv = dwmac;
 
-	return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+	if (ret)
+		goto err_clk_disable;
+
+	return 0;
+
+err_clk_disable:
+	clk_disable_unprepare(dwmac->m25_div_clk);
+err_remove_config_dt:
+	stmmac_remove_config_dt(pdev, plat_dat);
+
+	return ret;
 }
 
 static int meson8b_dwmac_remove(struct platform_device *pdev)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c
new file mode 100644
index 000000000000..c35597586121
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-oxnas.c
@@ -0,0 +1,217 @@
+/*
+ * Oxford Semiconductor OXNAS DWMAC glue layer
+ *
+ * Copyright (C) 2016 Neil Armstrong <narmstrong@baylibre.com>
+ * Copyright (C) 2014 Daniel Golle <daniel@makrotopia.org>
+ * Copyright (C) 2013 Ma Haijun <mahaijuns@gmail.com>
+ * Copyright (C) 2012 John Crispin <blogic@openwrt.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/mfd/syscon.h>
+#include <linux/stmmac.h>
+
+#include "stmmac_platform.h"
+
+/* System Control regmap offsets */
+#define OXNAS_DWMAC_CTRL_REGOFFSET	0x78
+#define OXNAS_DWMAC_DELAY_REGOFFSET	0x100
+
+/* Control Register */
+#define DWMAC_CKEN_RX_IN        14
+#define DWMAC_CKEN_RXN_OUT      13
+#define DWMAC_CKEN_RX_OUT       12
+#define DWMAC_CKEN_TX_IN        10
+#define DWMAC_CKEN_TXN_OUT      9
+#define DWMAC_CKEN_TX_OUT       8
+#define DWMAC_RX_SOURCE         7
+#define DWMAC_TX_SOURCE         6
+#define DWMAC_LOW_TX_SOURCE     4
+#define DWMAC_AUTO_TX_SOURCE    3
+#define DWMAC_RGMII             2
+#define DWMAC_SIMPLE_MUX        1
+#define DWMAC_CKEN_GTX          0
+
+/* Delay register */
+#define DWMAC_TX_VARDELAY_SHIFT		0
+#define DWMAC_TXN_VARDELAY_SHIFT	8
+#define DWMAC_RX_VARDELAY_SHIFT		16
+#define DWMAC_RXN_VARDELAY_SHIFT	24
+#define DWMAC_TX_VARDELAY(d)		((d) << DWMAC_TX_VARDELAY_SHIFT)
+#define DWMAC_TXN_VARDELAY(d)		((d) << DWMAC_TXN_VARDELAY_SHIFT)
+#define DWMAC_RX_VARDELAY(d)		((d) << DWMAC_RX_VARDELAY_SHIFT)
+#define DWMAC_RXN_VARDELAY(d)		((d) << DWMAC_RXN_VARDELAY_SHIFT)
+
+struct oxnas_dwmac {
+	struct device	*dev;
+	struct clk	*clk;
+	struct regmap	*regmap;
+};
+
+static int oxnas_dwmac_init(struct oxnas_dwmac *dwmac)
+{
+	unsigned int value;
+	int ret;
+
+	/* Reset HW here before changing the glue configuration */
+	ret = device_reset(dwmac->dev);
+	if (ret)
+		return ret;
+
+	ret = clk_prepare_enable(dwmac->clk);
+	if (ret)
+		return ret;
+
+	ret = regmap_read(dwmac->regmap, OXNAS_DWMAC_CTRL_REGOFFSET, &value);
+	if (ret < 0) {
+		clk_disable_unprepare(dwmac->clk);
+		return ret;
+	}
+
+	/* Enable GMII_GTXCLK to follow GMII_REFCLK, required for gigabit PHY */
+	value |= BIT(DWMAC_CKEN_GTX)		|
+		 /* Use simple mux for 25/125 Mhz clock switching */
+		 BIT(DWMAC_SIMPLE_MUX)		|
+		 /* set auto switch tx clock source */
+		 BIT(DWMAC_AUTO_TX_SOURCE)	|
+		 /* enable tx & rx vardelay */
+		 BIT(DWMAC_CKEN_TX_OUT)		|
+		 BIT(DWMAC_CKEN_TXN_OUT)	|
+		 BIT(DWMAC_CKEN_TX_IN)		|
+		 BIT(DWMAC_CKEN_RX_OUT)		|
+		 BIT(DWMAC_CKEN_RXN_OUT)	|
+		 BIT(DWMAC_CKEN_RX_IN);
+	regmap_write(dwmac->regmap, OXNAS_DWMAC_CTRL_REGOFFSET, value);
+
+	/* set tx & rx vardelay */
+	value = DWMAC_TX_VARDELAY(4)	|
+		DWMAC_TXN_VARDELAY(2)	|
+		DWMAC_RX_VARDELAY(10)	|
+		DWMAC_RXN_VARDELAY(8);
+	regmap_write(dwmac->regmap, OXNAS_DWMAC_DELAY_REGOFFSET, value);
+
+	return 0;
+}
+
+static int oxnas_dwmac_probe(struct platform_device *pdev)
+{
+	struct plat_stmmacenet_data *plat_dat;
+	struct stmmac_resources stmmac_res;
+	struct device_node *sysctrl;
+	struct oxnas_dwmac *dwmac;
+	int ret;
+
+	sysctrl = of_parse_phandle(pdev->dev.of_node, "oxsemi,sys-ctrl", 0);
+	if (!sysctrl) {
+		dev_err(&pdev->dev, "failed to get sys-ctrl node\n");
+		return -EINVAL;
+	}
+
+	ret = stmmac_get_platform_resources(pdev, &stmmac_res);
+	if (ret)
+		return ret;
+
+	plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+	if (IS_ERR(plat_dat))
+		return PTR_ERR(plat_dat);
+
+	dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL);
+	if (!dwmac)
+		return -ENOMEM;
+
+	dwmac->dev = &pdev->dev;
+	plat_dat->bsp_priv = dwmac;
+
+	dwmac->regmap = syscon_node_to_regmap(sysctrl);
+	if (IS_ERR(dwmac->regmap)) {
+		dev_err(&pdev->dev, "failed to have sysctrl regmap\n");
+		return PTR_ERR(dwmac->regmap);
+	}
+
+	dwmac->clk = devm_clk_get(&pdev->dev, "gmac");
+	if (IS_ERR(dwmac->clk))
+		return PTR_ERR(dwmac->clk);
+
+	ret = oxnas_dwmac_init(dwmac);
+	if (ret)
+		return ret;
+
+	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+	if (ret)
+		clk_disable_unprepare(dwmac->clk);
+
+	return ret;
+}
+
+static int oxnas_dwmac_remove(struct platform_device *pdev)
+{
+	struct oxnas_dwmac *dwmac = get_stmmac_bsp_priv(&pdev->dev);
+	int ret = stmmac_dvr_remove(&pdev->dev);
+
+	clk_disable_unprepare(dwmac->clk);
+
+	return ret;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int oxnas_dwmac_suspend(struct device *dev)
+{
+	struct oxnas_dwmac *dwmac = get_stmmac_bsp_priv(dev);
+	int ret;
+
+	ret = stmmac_suspend(dev);
+	clk_disable_unprepare(dwmac->clk);
+
+	return ret;
+}
+
+static int oxnas_dwmac_resume(struct device *dev)
+{
+	struct oxnas_dwmac *dwmac = get_stmmac_bsp_priv(dev);
+	int ret;
+
+	ret = oxnas_dwmac_init(dwmac);
+	if (ret)
+		return ret;
+
+	ret = stmmac_resume(dev);
+
+	return ret;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+static SIMPLE_DEV_PM_OPS(oxnas_dwmac_pm_ops,
+	oxnas_dwmac_suspend, oxnas_dwmac_resume);
+
+static const struct of_device_id oxnas_dwmac_match[] = {
+	{ .compatible = "oxsemi,ox820-dwmac" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, oxnas_dwmac_match);
+
+static struct platform_driver oxnas_dwmac_driver = {
+	.probe  = oxnas_dwmac_probe,
+	.remove = oxnas_dwmac_remove,
+	.driver = {
+		.name           = "oxnas-dwmac",
+		.pm		= &oxnas_dwmac_pm_ops,
+		.of_match_table = oxnas_dwmac_match,
+	},
+};
+module_platform_driver(oxnas_dwmac_driver);
+
+MODULE_AUTHOR("Neil Armstrong <narmstrong@baylibre.com>");
+MODULE_DESCRIPTION("Oxford Semiconductor OXNAS DWMAC glue layer");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
index 3740a4417fa0..77ab0a85f067 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c
@@ -901,44 +901,6 @@ static void rk_gmac_powerdown(struct rk_priv_data *gmac)
 	gmac_clk_enable(gmac, false);
 }
 
-static int rk_gmac_init(struct platform_device *pdev, void *priv)
-{
-	struct rk_priv_data *bsp_priv = priv;
-
-	return rk_gmac_powerup(bsp_priv);
-}
-
-static void rk_gmac_exit(struct platform_device *pdev, void *priv)
-{
-	struct rk_priv_data *bsp_priv = priv;
-
-	rk_gmac_powerdown(bsp_priv);
-}
-
-static void rk_gmac_suspend(struct platform_device *pdev, void *priv)
-{
-	struct rk_priv_data *bsp_priv = priv;
-
-	/* Keep the PHY up if we use Wake-on-Lan. */
-	if (device_may_wakeup(&pdev->dev))
-		return;
-
-	rk_gmac_powerdown(bsp_priv);
-	bsp_priv->suspended = true;
-}
-
-static void rk_gmac_resume(struct platform_device *pdev, void *priv)
-{
-	struct rk_priv_data *bsp_priv = priv;
-
-	/* The PHY was up for Wake-on-Lan. */
-	if (!bsp_priv->suspended)
-		return;
-
-	rk_gmac_powerup(bsp_priv);
-	bsp_priv->suspended = false;
-}
-
 static void rk_fix_speed(void *priv, unsigned int speed)
 {
 	struct rk_priv_data *bsp_priv = priv;
@@ -974,22 +936,72 @@ static int rk_gmac_probe(struct platform_device *pdev)
 		return PTR_ERR(plat_dat);
 
 	plat_dat->has_gmac = true;
-	plat_dat->init = rk_gmac_init;
-	plat_dat->exit = rk_gmac_exit;
 	plat_dat->fix_mac_speed = rk_fix_speed;
-	plat_dat->suspend = rk_gmac_suspend;
-	plat_dat->resume = rk_gmac_resume;
 
 	plat_dat->bsp_priv = rk_gmac_setup(pdev, data);
-	if (IS_ERR(plat_dat->bsp_priv))
-		return PTR_ERR(plat_dat->bsp_priv);
+	if (IS_ERR(plat_dat->bsp_priv)) {
+		ret = PTR_ERR(plat_dat->bsp_priv);
+		goto err_remove_config_dt;
+	}
 
-	ret = rk_gmac_init(pdev, plat_dat->bsp_priv);
+	ret = rk_gmac_powerup(plat_dat->bsp_priv);
 	if (ret)
-		return ret;
+		goto err_remove_config_dt;
+
+	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+	if (ret)
+		goto err_gmac_powerdown;
+
+	return 0;
+
+err_gmac_powerdown:
+	rk_gmac_powerdown(plat_dat->bsp_priv);
+err_remove_config_dt:
+	stmmac_remove_config_dt(pdev, plat_dat);
+
+	return ret;
+}
+
+static int rk_gmac_remove(struct platform_device *pdev)
+{
+	struct rk_priv_data *bsp_priv = get_stmmac_bsp_priv(&pdev->dev);
+	int ret = stmmac_dvr_remove(&pdev->dev);
+
+	rk_gmac_powerdown(bsp_priv);
+
+	return ret;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int rk_gmac_suspend(struct device *dev)
+{
+	struct rk_priv_data *bsp_priv = get_stmmac_bsp_priv(dev);
+	int ret = stmmac_suspend(dev);
+
+	/* Keep the PHY up if we use Wake-on-Lan. */
+	if (!device_may_wakeup(dev)) {
+		rk_gmac_powerdown(bsp_priv);
+		bsp_priv->suspended = true;
+	}
+
+	return ret;
+}
+
+static int rk_gmac_resume(struct device *dev)
+{
+	struct rk_priv_data *bsp_priv = get_stmmac_bsp_priv(dev);
 
-	return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+	/* The PHY was up for Wake-on-Lan. */
+	if (bsp_priv->suspended) {
+		rk_gmac_powerup(bsp_priv);
+		bsp_priv->suspended = false;
+	}
+
+	return stmmac_resume(dev);
 }
+#endif /* CONFIG_PM_SLEEP */
+
+static SIMPLE_DEV_PM_OPS(rk_gmac_pm_ops, rk_gmac_suspend, rk_gmac_resume);
 
 static const struct of_device_id rk_gmac_dwmac_match[] = {
 	{ .compatible = "rockchip,rk3228-gmac", .data = &rk3228_ops },
@@ -1003,10 +1015,10 @@ MODULE_DEVICE_TABLE(of, rk_gmac_dwmac_match);
 
 static struct platform_driver rk_gmac_dwmac_driver = {
 	.probe  = rk_gmac_probe,
-	.remove = stmmac_pltfr_remove,
+	.remove = rk_gmac_remove,
 	.driver = {
 		.name           = "rk_gmac-dwmac",
-		.pm		= &stmmac_pltfr_pm_ops,
+		.pm		= &rk_gmac_pm_ops,
 		.of_match_table = rk_gmac_dwmac_match,
 	},
 };
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
index bec6963ac71e..1f997027ae51 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-socfpga.c
@@ -304,6 +304,8 @@ static int socfpga_dwmac_probe(struct platform_device *pdev)
 	struct device		*dev = &pdev->dev;
 	int			ret;
 	struct socfpga_dwmac	*dwmac;
+	struct net_device	*ndev;
+	struct stmmac_priv	*stpriv;
 
 	ret = stmmac_get_platform_resources(pdev, &stmmac_res);
 	if (ret)
@@ -314,32 +316,43 @@ static int socfpga_dwmac_probe(struct platform_device *pdev)
 		return PTR_ERR(plat_dat);
 
 	dwmac = devm_kzalloc(dev, sizeof(*dwmac), GFP_KERNEL);
-	if (!dwmac)
-		return -ENOMEM;
+	if (!dwmac) {
+		ret = -ENOMEM;
+		goto err_remove_config_dt;
+	}
 
 	ret = socfpga_dwmac_parse_data(dwmac, dev);
 	if (ret) {
 		dev_err(dev, "Unable to parse OF data\n");
-		return ret;
+		goto err_remove_config_dt;
 	}
 
 	plat_dat->bsp_priv = dwmac;
 	plat_dat->fix_mac_speed = socfpga_dwmac_fix_mac_speed;
 
 	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+	if (ret)
+		goto err_remove_config_dt;
 
-	if (!ret) {
-		struct net_device *ndev = platform_get_drvdata(pdev);
-		struct stmmac_priv *stpriv = netdev_priv(ndev);
+	ndev = platform_get_drvdata(pdev);
+	stpriv = netdev_priv(ndev);
 
-		/* The socfpga driver needs to control the stmmac reset to
-		 * set the phy mode. Create a copy of the core reset handel
-		 * so it can be used by the driver later.
-		 */
-		dwmac->stmmac_rst = stpriv->stmmac_rst;
+	/* The socfpga driver needs to control the stmmac reset to set the phy
+	 * mode. Create a copy of the core reset handle so it can be used by
+	 * the driver later.
+	 */
+	dwmac->stmmac_rst = stpriv->stmmac_rst;
 
-		ret = socfpga_dwmac_set_phy_mode(dwmac);
-	}
+	ret = socfpga_dwmac_set_phy_mode(dwmac);
+	if (ret)
+		goto err_dvr_remove;
+
+	return 0;
+
+err_dvr_remove:
+	stmmac_dvr_remove(&pdev->dev);
+err_remove_config_dt:
+	stmmac_remove_config_dt(pdev, plat_dat);
 
 	return ret;
 }
@@ -367,8 +380,8 @@ static int socfpga_dwmac_resume(struct device *dev)
 	 * control register 0, and can be modified by the phy driver
 	 * framework.
 	 */
-	if (priv->phydev)
-		phy_resume(priv->phydev);
+	if (ndev->phydev)
+		phy_resume(ndev->phydev);
 
 	return stmmac_resume(dev);
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
index 58c05acc2aab..86e0e053804c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sti.c
@@ -126,8 +126,8 @@ struct sti_dwmac {
 	struct clk *clk;	/* PHY clock */
 	u32 ctrl_reg;		/* GMAC glue-logic control register */
 	int clk_sel_reg;	/* GMAC ext clk selection register */
-	struct device *dev;
 	struct regmap *regmap;
+	bool gmac_en;
 	u32 speed;
 	void (*fix_retime_src)(void *priv, unsigned int speed);
 };
@@ -191,7 +191,7 @@ static void stih4xx_fix_retime_src(void *priv, u32 spd)
 		}
 	}
 
-	if (src == TX_RETIME_SRC_CLKGEN && dwmac->clk && freq)
+	if (src == TX_RETIME_SRC_CLKGEN && freq)
 		clk_set_rate(dwmac->clk, freq);
 
 	regmap_update_bits(dwmac->regmap, reg, STIH4XX_RETIME_SRC_MASK,
@@ -222,26 +222,20 @@ static void stid127_fix_retime_src(void *priv, u32 spd)
 			freq = DWMAC_2_5MHZ;
 	}
 
-	if (dwmac->clk && freq)
+	if (freq)
 		clk_set_rate(dwmac->clk, freq);
 
 	regmap_update_bits(dwmac->regmap, reg, STID127_RETIME_SRC_MASK, val);
 }
 
-static int sti_dwmac_init(struct platform_device *pdev, void *priv)
+static int sti_dwmac_set_mode(struct sti_dwmac *dwmac)
 {
-	struct sti_dwmac *dwmac = priv;
 	struct regmap *regmap = dwmac->regmap;
 	int iface = dwmac->interface;
-	struct device *dev = dwmac->dev;
-	struct device_node *np = dev->of_node;
 	u32 reg = dwmac->ctrl_reg;
 	u32 val;
 
-	if (dwmac->clk)
-		clk_prepare_enable(dwmac->clk);
-
-	if (of_property_read_bool(np, "st,gmac_en"))
+	if (dwmac->gmac_en)
 		regmap_update_bits(regmap, reg, EN_MASK, EN);
 
 	regmap_update_bits(regmap, reg, MII_PHY_SEL_MASK, phy_intf_sels[iface]);
@@ -249,18 +243,11 @@ static int sti_dwmac_init(struct platform_device *pdev, void *priv)
 	val = (iface == PHY_INTERFACE_MODE_REVMII) ? 0 : ENMII;
 	regmap_update_bits(regmap, reg, ENMII_MASK, val);
 
-	dwmac->fix_retime_src(priv, dwmac->speed);
+	dwmac->fix_retime_src(dwmac, dwmac->speed);
 
 	return 0;
 }
 
-static void sti_dwmac_exit(struct platform_device *pdev, void *priv)
-{
-	struct sti_dwmac *dwmac = priv;
-
-	if (dwmac->clk)
-		clk_disable_unprepare(dwmac->clk);
-}
 static int sti_dwmac_parse_data(struct sti_dwmac *dwmac,
 				struct platform_device *pdev)
 {
@@ -270,9 +257,6 @@ static int sti_dwmac_parse_data(struct sti_dwmac *dwmac,
 	struct regmap *regmap;
 	int err;
 
-	if (!np)
-		return -EINVAL;
-
 	/* clk selection from extra syscfg register */
 	dwmac->clk_sel_reg = -ENXIO;
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "sti-clkconf");
@@ -289,9 +273,9 @@ static int sti_dwmac_parse_data(struct sti_dwmac *dwmac,
 		return err;
 	}
 
-	dwmac->dev = dev;
 	dwmac->interface = of_get_phy_mode(np);
 	dwmac->regmap = regmap;
+	dwmac->gmac_en = of_property_read_bool(np, "st,gmac_en");
 	dwmac->ext_phyclk = of_property_read_bool(np, "st,ext-phyclk");
 	dwmac->tx_retime_src = TX_RETIME_SRC_NA;
 	dwmac->speed = SPEED_100;
@@ -345,29 +329,79 @@ static int sti_dwmac_probe(struct platform_device *pdev)
 		return PTR_ERR(plat_dat);
 
 	dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL);
-	if (!dwmac)
-		return -ENOMEM;
+	if (!dwmac) {
+		ret = -ENOMEM;
+		goto err_remove_config_dt;
+	}
 
 	ret = sti_dwmac_parse_data(dwmac, pdev);
 	if (ret) {
 		dev_err(&pdev->dev, "Unable to parse OF data\n");
-		return ret;
+		goto err_remove_config_dt;
 	}
 
 	dwmac->fix_retime_src = data->fix_retime_src;
 
 	plat_dat->bsp_priv = dwmac;
-	plat_dat->init = sti_dwmac_init;
-	plat_dat->exit = sti_dwmac_exit;
 	plat_dat->fix_mac_speed = data->fix_retime_src;
 
-	ret = sti_dwmac_init(pdev, plat_dat->bsp_priv);
+	ret = clk_prepare_enable(dwmac->clk);
 	if (ret)
-		return ret;
+		goto err_remove_config_dt;
+
+	ret = sti_dwmac_set_mode(dwmac);
+	if (ret)
+		goto disable_clk;
+
+	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+	if (ret)
+		goto disable_clk;
+
+	return 0;
 
-	return stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+disable_clk:
+	clk_disable_unprepare(dwmac->clk);
+err_remove_config_dt:
+	stmmac_remove_config_dt(pdev, plat_dat);
+
+	return ret;
 }
 
+static int sti_dwmac_remove(struct platform_device *pdev)
+{
+	struct sti_dwmac *dwmac = get_stmmac_bsp_priv(&pdev->dev);
+	int ret = stmmac_dvr_remove(&pdev->dev);
+
+	clk_disable_unprepare(dwmac->clk);
+
+	return ret;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int sti_dwmac_suspend(struct device *dev)
+{
+	struct sti_dwmac *dwmac = get_stmmac_bsp_priv(dev);
+	int ret = stmmac_suspend(dev);
+
+	clk_disable_unprepare(dwmac->clk);
+
+	return ret;
+}
+
+static int sti_dwmac_resume(struct device *dev)
+{
+	struct sti_dwmac *dwmac = get_stmmac_bsp_priv(dev);
+
+	clk_prepare_enable(dwmac->clk);
+	sti_dwmac_set_mode(dwmac);
+
+	return stmmac_resume(dev);
+}
+#endif /* CONFIG_PM_SLEEP */
+
+static SIMPLE_DEV_PM_OPS(sti_dwmac_pm_ops, sti_dwmac_suspend,
+					   sti_dwmac_resume);
+
 static const struct sti_dwmac_of_data stih4xx_dwmac_data = {
 	.fix_retime_src = stih4xx_fix_retime_src,
 };
@@ -387,10 +421,10 @@ MODULE_DEVICE_TABLE(of, sti_dwmac_match);
 
 static struct platform_driver sti_dwmac_driver = {
 	.probe  = sti_dwmac_probe,
-	.remove = stmmac_pltfr_remove,
+	.remove = sti_dwmac_remove,
 	.driver = {
 		.name           = "sti-dwmac",
-		.pm		= &stmmac_pltfr_pm_ops,
+		.pm		= &sti_dwmac_pm_ops,
 		.of_match_table = sti_dwmac_match,
 	},
 };
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
index e5a926b8bee7..61cb24810d10 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
@@ -107,24 +107,33 @@ static int stm32_dwmac_probe(struct platform_device *pdev)
 		return PTR_ERR(plat_dat);
 
 	dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL);
-	if (!dwmac)
-		return -ENOMEM;
+	if (!dwmac) {
+		ret = -ENOMEM;
+		goto err_remove_config_dt;
+	}
 
 	ret = stm32_dwmac_parse_data(dwmac, &pdev->dev);
 	if (ret) {
 		dev_err(&pdev->dev, "Unable to parse OF data\n");
-		return ret;
+		goto err_remove_config_dt;
 	}
 
 	plat_dat->bsp_priv = dwmac;
 
 	ret = stm32_dwmac_init(plat_dat);
 	if (ret)
-		return ret;
+		goto err_remove_config_dt;
 
 	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
 	if (ret)
-		stm32_dwmac_clk_disable(dwmac);
+		goto err_clk_disable;
+
+	return 0;
+
+err_clk_disable:
+	stm32_dwmac_clk_disable(dwmac);
+err_remove_config_dt:
+	stmmac_remove_config_dt(pdev, plat_dat);
 
 	return ret;
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
index adff46375a32..d07520fb969e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sunxi.c
@@ -120,22 +120,27 @@ static int sun7i_gmac_probe(struct platform_device *pdev)
 		return PTR_ERR(plat_dat);
 
 	gmac = devm_kzalloc(dev, sizeof(*gmac), GFP_KERNEL);
-	if (!gmac)
-		return -ENOMEM;
+	if (!gmac) {
+		ret = -ENOMEM;
+		goto err_remove_config_dt;
+	}
 
 	gmac->interface = of_get_phy_mode(dev->of_node);
 
 	gmac->tx_clk = devm_clk_get(dev, "allwinner_gmac_tx");
 	if (IS_ERR(gmac->tx_clk)) {
 		dev_err(dev, "could not get tx clock\n");
-		return PTR_ERR(gmac->tx_clk);
+		ret = PTR_ERR(gmac->tx_clk);
+		goto err_remove_config_dt;
 	}
 
 	/* Optional regulator for PHY */
 	gmac->regulator = devm_regulator_get_optional(dev, "phy");
 	if (IS_ERR(gmac->regulator)) {
-		if (PTR_ERR(gmac->regulator) == -EPROBE_DEFER)
-			return -EPROBE_DEFER;
+		if (PTR_ERR(gmac->regulator) == -EPROBE_DEFER) {
+			ret = -EPROBE_DEFER;
+			goto err_remove_config_dt;
+		}
 		dev_info(dev, "no regulator found\n");
 		gmac->regulator = NULL;
 	}
@@ -151,11 +156,18 @@ static int sun7i_gmac_probe(struct platform_device *pdev)
 
 	ret = sun7i_gmac_init(pdev, plat_dat->bsp_priv);
 	if (ret)
-		return ret;
+		goto err_remove_config_dt;
 
 	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
 	if (ret)
-		sun7i_gmac_exit(pdev, plat_dat->bsp_priv);
+		goto err_gmac_exit;
+
+	return 0;
+
+err_gmac_exit:
+	sun7i_gmac_exit(pdev, plat_dat->bsp_priv);
+err_remove_config_dt:
+	stmmac_remove_config_dt(pdev, plat_dat);
 
 	return ret;
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
index ff3e5ab39bd0..52b9407a8a39 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000.h
@@ -225,7 +225,7 @@ enum rx_tx_priority_ratio {
 
 #define DMA_BUS_MODE_FB		0x00010000	/* Fixed burst */
 #define DMA_BUS_MODE_MB		0x04000000	/* Mixed burst */
-#define DMA_BUS_MODE_RPBL_MASK	0x003e0000	/* Rx-Programmable Burst Len */
+#define DMA_BUS_MODE_RPBL_MASK	0x007e0000	/* Rx-Programmable Burst Len */
 #define DMA_BUS_MODE_RPBL_SHIFT	17
 #define DMA_BUS_MODE_USP	0x00800000
 #define DMA_BUS_MODE_MAXPBL	0x01000000
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
index 7df4ff158f3d..b21d03fe4f43 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c
@@ -534,6 +534,12 @@ struct mac_device_info *dwmac1000_setup(void __iomem *ioaddr, int mcbins,
 	mac->link.speed = GMAC_CONTROL_FES;
 	mac->mii.addr = GMAC_MII_ADDR;
 	mac->mii.data = GMAC_MII_DATA;
+	mac->mii.addr_shift = 11;
+	mac->mii.addr_mask = 0x0000F800;
+	mac->mii.reg_shift = 6;
+	mac->mii.reg_mask = 0x000007C0;
+	mac->mii.clk_csr_shift = 2;
+	mac->mii.clk_csr_mask = 0xF;
 
 	/* Get and dump the chip ID */
 	*synopsys_id = stmmac_get_synopsys_id(hwid);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
index 990746955216..612d3aaac9a4 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_dma.c
@@ -43,9 +43,11 @@ static void dwmac1000_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi)
 	if (axi->axi_xit_frm)
 		value |= DMA_AXI_LPI_XIT_FRM;
 
+	value &= ~DMA_AXI_WR_OSR_LMT;
 	value |= (axi->axi_wr_osr_lmt & DMA_AXI_WR_OSR_LMT_MASK) <<
 		 DMA_AXI_WR_OSR_LMT_SHIFT;
 
+	value &= ~DMA_AXI_RD_OSR_LMT;
 	value |= (axi->axi_rd_osr_lmt & DMA_AXI_RD_OSR_LMT_MASK) <<
 		 DMA_AXI_RD_OSR_LMT_SHIFT;
 
@@ -82,37 +84,39 @@ static void dwmac1000_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi)
 	writel(value, ioaddr + DMA_AXI_BUS_MODE);
 }
 
-static void dwmac1000_dma_init(void __iomem *ioaddr, int pbl, int fb, int mb,
-			       int aal, u32 dma_tx, u32 dma_rx, int atds)
+static void dwmac1000_dma_init(void __iomem *ioaddr,
+			       struct stmmac_dma_cfg *dma_cfg,
+			       u32 dma_tx, u32 dma_rx, int atds)
 {
 	u32 value = readl(ioaddr + DMA_BUS_MODE);
+	int txpbl = dma_cfg->txpbl ?: dma_cfg->pbl;
+	int rxpbl = dma_cfg->rxpbl ?: dma_cfg->pbl;
 
 	/*
 	 * Set the DMA PBL (Programmable Burst Length) mode.
 	 *
 	 * Note: before stmmac core 3.50 this mode bit was 4xPBL, and
 	 * post 3.5 mode bit acts as 8*PBL.
-	 *
-	 * This configuration doesn't take care about the Separate PBL
-	 * so only the bits: 13-8 are programmed with the PBL passed from the
-	 * platform.
 	 */
-	value |= DMA_BUS_MODE_MAXPBL;
-	value &= ~DMA_BUS_MODE_PBL_MASK;
-	value |= (pbl << DMA_BUS_MODE_PBL_SHIFT);
+	if (dma_cfg->pblx8)
+		value |= DMA_BUS_MODE_MAXPBL;
+	value |= DMA_BUS_MODE_USP;
+	value &= ~(DMA_BUS_MODE_PBL_MASK | DMA_BUS_MODE_RPBL_MASK);
+	value |= (txpbl << DMA_BUS_MODE_PBL_SHIFT);
+	value |= (rxpbl << DMA_BUS_MODE_RPBL_SHIFT);
 
 	/* Set the Fixed burst mode */
-	if (fb)
+	if (dma_cfg->fixed_burst)
 		value |= DMA_BUS_MODE_FB;
 
 	/* Mixed Burst has no effect when fb is set */
-	if (mb)
+	if (dma_cfg->mixed_burst)
 		value |= DMA_BUS_MODE_MB;
 
 	if (atds)
 		value |= DMA_BUS_MODE_ATDS;
 
-	if (aal)
+	if (dma_cfg->aal)
 		value |= DMA_BUS_MODE_AAL;
 
 	writel(value, ioaddr + DMA_BUS_MODE);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
index 6418b2e07619..a1d582f47b1a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_core.c
@@ -192,6 +192,13 @@ struct mac_device_info *dwmac100_setup(void __iomem *ioaddr, int *synopsys_id)
 	mac->link.speed = 0;
 	mac->mii.addr = MAC_MII_ADDR;
 	mac->mii.data = MAC_MII_DATA;
+	mac->mii.addr_shift = 11;
+	mac->mii.addr_mask = 0x0000F800;
+	mac->mii.reg_shift = 6;
+	mac->mii.reg_mask = 0x000007C0;
+	mac->mii.clk_csr_shift = 2;
+	mac->mii.clk_csr_mask = 0xF;
+
 	/* Synopsys Id is not available on old chips */
 	*synopsys_id = 0;
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
index 61f54c99a7de..e5664da382f3 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac100_dma.c
@@ -32,11 +32,12 @@
 #include "dwmac100.h"
 #include "dwmac_dma.h"
 
-static void dwmac100_dma_init(void __iomem *ioaddr, int pbl, int fb, int mb,
-			      int aal, u32 dma_tx, u32 dma_rx, int atds)
+static void dwmac100_dma_init(void __iomem *ioaddr,
+			      struct stmmac_dma_cfg *dma_cfg,
+			      u32 dma_tx, u32 dma_rx, int atds)
 {
 	/* Enable Application Access by writing to DMA CSR0 */
-	writel(DMA_BUS_MODE_DEFAULT | (pbl << DMA_BUS_MODE_PBL_SHIFT),
+	writel(DMA_BUS_MODE_DEFAULT | (dma_cfg->pbl << DMA_BUS_MODE_PBL_SHIFT),
 	       ioaddr + DMA_BUS_MODE);
 
 	/* Mask interrupts by writing to CSR7 */
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
index 6f4f5ce25114..3e8d4fefa5e0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
@@ -155,8 +155,11 @@ enum power_event {
 #define MTL_CHAN_RX_DEBUG(x)		(MTL_CHANX_BASE_ADDR(x) + 0x38)
 
 #define MTL_OP_MODE_RSF			BIT(5)
+#define MTL_OP_MODE_TXQEN		BIT(3)
 #define MTL_OP_MODE_TSF			BIT(1)
 
+#define MTL_OP_MODE_TQS_MASK		GENMASK(24, 16)
+
 #define MTL_OP_MODE_TTC_MASK		0x70
 #define MTL_OP_MODE_TTC_SHIFT		4
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index 51019b794be5..eaed7cb21867 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -430,6 +430,12 @@ struct mac_device_info *dwmac4_setup(void __iomem *ioaddr, int mcbins,
 	mac->link.speed = GMAC_CONFIG_FES;
 	mac->mii.addr = GMAC_MDIO_ADDR;
 	mac->mii.data = GMAC_MDIO_DATA;
+	mac->mii.addr_shift = 21;
+	mac->mii.addr_mask = GENMASK(25, 21);
+	mac->mii.reg_shift = 16;
+	mac->mii.reg_mask = GENMASK(20, 16);
+	mac->mii.clk_csr_shift = 8;
+	mac->mii.clk_csr_mask = GENMASK(11, 8);
 
 	/* Get and dump the chip ID */
 	*synopsys_id = stmmac_get_synopsys_id(hwid);
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
index a601f8d43b75..a340fc8bd0de 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
@@ -23,7 +23,7 @@ static int dwmac4_wrback_get_tx_status(void *data, struct stmmac_extra_stats *x,
 	unsigned int tdes3;
 	int ret = tx_done;
 
-	tdes3 = p->des3;
+	tdes3 = le32_to_cpu(p->des3);
 
 	/* Get tx owner first */
 	if (unlikely(tdes3 & TDES3_OWN))
@@ -77,9 +77,9 @@ static int dwmac4_wrback_get_rx_status(void *data, struct stmmac_extra_stats *x,
 				       struct dma_desc *p)
 {
 	struct net_device_stats *stats = (struct net_device_stats *)data;
-	unsigned int rdes1 = p->des1;
-	unsigned int rdes2 = p->des2;
-	unsigned int rdes3 = p->des3;
+	unsigned int rdes1 = le32_to_cpu(p->des1);
+	unsigned int rdes2 = le32_to_cpu(p->des2);
+	unsigned int rdes3 = le32_to_cpu(p->des3);
 	int message_type;
 	int ret = good_frame;
 
@@ -176,47 +176,48 @@ static int dwmac4_wrback_get_rx_status(void *data, struct stmmac_extra_stats *x,
 
 static int dwmac4_rd_get_tx_len(struct dma_desc *p)
 {
-	return (p->des2 & TDES2_BUFFER1_SIZE_MASK);
+	return (le32_to_cpu(p->des2) & TDES2_BUFFER1_SIZE_MASK);
 }
 
 static int dwmac4_get_tx_owner(struct dma_desc *p)
 {
-	return (p->des3 & TDES3_OWN) >> TDES3_OWN_SHIFT;
+	return (le32_to_cpu(p->des3) & TDES3_OWN) >> TDES3_OWN_SHIFT;
 }
 
 static void dwmac4_set_tx_owner(struct dma_desc *p)
 {
-	p->des3 |= TDES3_OWN;
+	p->des3 |= cpu_to_le32(TDES3_OWN);
 }
 
 static void dwmac4_set_rx_owner(struct dma_desc *p)
 {
-	p->des3 |= RDES3_OWN;
+	p->des3 |= cpu_to_le32(RDES3_OWN);
 }
 
 static int dwmac4_get_tx_ls(struct dma_desc *p)
 {
-	return (p->des3 & TDES3_LAST_DESCRIPTOR) >> TDES3_LAST_DESCRIPTOR_SHIFT;
+	return (le32_to_cpu(p->des3) & TDES3_LAST_DESCRIPTOR)
+		>> TDES3_LAST_DESCRIPTOR_SHIFT;
 }
 
 static int dwmac4_wrback_get_rx_frame_len(struct dma_desc *p, int rx_coe)
 {
-	return (p->des3 & RDES3_PACKET_SIZE_MASK);
+	return (le32_to_cpu(p->des3) & RDES3_PACKET_SIZE_MASK);
 }
 
 static void dwmac4_rd_enable_tx_timestamp(struct dma_desc *p)
 {
-	p->des2 |= TDES2_TIMESTAMP_ENABLE;
+	p->des2 |= cpu_to_le32(TDES2_TIMESTAMP_ENABLE);
 }
 
 static int dwmac4_wrback_get_tx_timestamp_status(struct dma_desc *p)
 {
 	/* Context type from W/B descriptor must be zero */
-	if (p->des3 & TDES3_CONTEXT_TYPE)
+	if (le32_to_cpu(p->des3) & TDES3_CONTEXT_TYPE)
 		return -EINVAL;
 
 	/* Tx Timestamp Status is 1 so des0 and des1'll have valid values */
-	if (p->des3 & TDES3_TIMESTAMP_STATUS)
+	if (le32_to_cpu(p->des3) & TDES3_TIMESTAMP_STATUS)
 		return 0;
 
 	return 1;
@@ -227,9 +228,9 @@ static inline u64 dwmac4_get_timestamp(void *desc, u32 ats)
 	struct dma_desc *p = (struct dma_desc *)desc;
 	u64 ns;
 
-	ns = p->des0;
+	ns = le32_to_cpu(p->des0);
 	/* convert high/sec time stamp value to nanosecond */
-	ns += p->des1 * 1000000000ULL;
+	ns += le32_to_cpu(p->des1) * 1000000000ULL;
 
 	return ns;
 }
@@ -264,7 +265,7 @@ static int dwmac4_wrback_get_rx_timestamp_status(void *desc, u32 ats)
 
 	/* Get the status from normal w/b descriptor */
 	if (likely(p->des3 & TDES3_RS1V)) {
-		if (likely(p->des1 & RDES1_TIMESTAMP_AVAILABLE)) {
+		if (likely(le32_to_cpu(p->des1) & RDES1_TIMESTAMP_AVAILABLE)) {
 			int i = 0;
 
 			/* Check if timestamp is OK from context descriptor */
@@ -287,10 +288,10 @@ exit:
 static void dwmac4_rd_init_rx_desc(struct dma_desc *p, int disable_rx_ic,
 				   int mode, int end)
 {
-	p->des3 = RDES3_OWN | RDES3_BUFFER1_VALID_ADDR;
+	p->des3 = cpu_to_le32(RDES3_OWN | RDES3_BUFFER1_VALID_ADDR);
 
 	if (!disable_rx_ic)
-		p->des3 |= RDES3_INT_ON_COMPLETION_EN;
+		p->des3 |= cpu_to_le32(RDES3_INT_ON_COMPLETION_EN);
 }
 
 static void dwmac4_rd_init_tx_desc(struct dma_desc *p, int mode, int end)
@@ -305,9 +306,9 @@ static void dwmac4_rd_prepare_tx_desc(struct dma_desc *p, int is_fs, int len,
 				      bool csum_flag, int mode, bool tx_own,
 				      bool ls)
 {
-	unsigned int tdes3 = p->des3;
+	unsigned int tdes3 = le32_to_cpu(p->des3);
 
-	p->des2 |= (len & TDES2_BUFFER1_SIZE_MASK);
+	p->des2 |= cpu_to_le32(len & TDES2_BUFFER1_SIZE_MASK);
 
 	if (is_fs)
 		tdes3 |= TDES3_FIRST_DESCRIPTOR;
@@ -335,7 +336,7 @@ static void dwmac4_rd_prepare_tx_desc(struct dma_desc *p, int is_fs, int len,
 		 */
 		wmb();
 
-	p->des3 = tdes3;
+	p->des3 = cpu_to_le32(tdes3);
 }
 
 static void dwmac4_rd_prepare_tso_tx_desc(struct dma_desc *p, int is_fs,
@@ -343,14 +344,14 @@ static void dwmac4_rd_prepare_tso_tx_desc(struct dma_desc *p, int is_fs,
 					  bool ls, unsigned int tcphdrlen,
 					  unsigned int tcppayloadlen)
 {
-	unsigned int tdes3 = p->des3;
+	unsigned int tdes3 = le32_to_cpu(p->des3);
 
 	if (len1)
-		p->des2 |= (len1 & TDES2_BUFFER1_SIZE_MASK);
+		p->des2 |= cpu_to_le32((len1 & TDES2_BUFFER1_SIZE_MASK));
 
 	if (len2)
-		p->des2 |= (len2 << TDES2_BUFFER2_SIZE_MASK_SHIFT)
-			    & TDES2_BUFFER2_SIZE_MASK;
+		p->des2 |= cpu_to_le32((len2 << TDES2_BUFFER2_SIZE_MASK_SHIFT)
+			    & TDES2_BUFFER2_SIZE_MASK);
 
 	if (is_fs) {
 		tdes3 |= TDES3_FIRST_DESCRIPTOR |
@@ -378,7 +379,7 @@ static void dwmac4_rd_prepare_tso_tx_desc(struct dma_desc *p, int is_fs,
 		 */
 		wmb();
 
-	p->des3 = tdes3;
+	p->des3 = cpu_to_le32(tdes3);
 }
 
 static void dwmac4_release_tx_desc(struct dma_desc *p, int mode)
@@ -389,7 +390,7 @@ static void dwmac4_release_tx_desc(struct dma_desc *p, int mode)
 
 static void dwmac4_rd_set_tx_ic(struct dma_desc *p)
 {
-	p->des2 |= TDES2_INTERRUPT_ON_COMPLETION;
+	p->des2 |= cpu_to_le32(TDES2_INTERRUPT_ON_COMPLETION);
 }
 
 static void dwmac4_display_ring(void *head, unsigned int size, bool rx)
@@ -402,7 +403,8 @@ static void dwmac4_display_ring(void *head, unsigned int size, bool rx)
 	for (i = 0; i < size; i++) {
 		pr_info("%d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n",
 			i, (unsigned int)virt_to_phys(p),
-			p->des0, p->des1, p->des2, p->des3);
+			le32_to_cpu(p->des0), le32_to_cpu(p->des1),
+			le32_to_cpu(p->des2), le32_to_cpu(p->des3));
 		p++;
 	}
 }
@@ -411,8 +413,8 @@ static void dwmac4_set_mss_ctxt(struct dma_desc *p, unsigned int mss)
 {
 	p->des0 = 0;
 	p->des1 = 0;
-	p->des2 = mss;
-	p->des3 = TDES3_CONTEXT_TYPE | TDES3_CTXT_TCMSSV;
+	p->des2 = cpu_to_le32(mss);
+	p->des3 = cpu_to_le32(TDES3_CONTEXT_TYPE | TDES3_CTXT_TCMSSV);
 }
 
 const struct stmmac_desc_ops dwmac4_desc_ops = {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
index 116151cd6a95..8196ab5fc33c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
@@ -30,9 +30,11 @@ static void dwmac4_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi)
 	if (axi->axi_xit_frm)
 		value |= DMA_AXI_LPI_XIT_FRM;
 
+	value &= ~DMA_AXI_WR_OSR_LMT;
 	value |= (axi->axi_wr_osr_lmt & DMA_AXI_OSR_MAX) <<
 		 DMA_AXI_WR_OSR_LMT_SHIFT;
 
+	value &= ~DMA_AXI_RD_OSR_LMT;
 	value |= (axi->axi_rd_osr_lmt & DMA_AXI_OSR_MAX) <<
 		 DMA_AXI_RD_OSR_LMT_SHIFT;
 
@@ -69,25 +71,29 @@ static void dwmac4_dma_axi(void __iomem *ioaddr, struct stmmac_axi *axi)
 	writel(value, ioaddr + DMA_SYS_BUS_MODE);
 }
 
-static void dwmac4_dma_init_channel(void __iomem *ioaddr, int pbl,
+static void dwmac4_dma_init_channel(void __iomem *ioaddr,
+				    struct stmmac_dma_cfg *dma_cfg,
 				    u32 dma_tx_phy, u32 dma_rx_phy,
 				    u32 channel)
 {
 	u32 value;
+	int txpbl = dma_cfg->txpbl ?: dma_cfg->pbl;
+	int rxpbl = dma_cfg->rxpbl ?: dma_cfg->pbl;
 
 	/* set PBL for each channels. Currently we affect same configuration
 	 * on each channel
 	 */
 	value = readl(ioaddr + DMA_CHAN_CONTROL(channel));
-	value = value | DMA_BUS_MODE_PBL;
+	if (dma_cfg->pblx8)
+		value = value | DMA_BUS_MODE_PBL;
 	writel(value, ioaddr + DMA_CHAN_CONTROL(channel));
 
 	value = readl(ioaddr + DMA_CHAN_TX_CONTROL(channel));
-	value = value | (pbl << DMA_BUS_MODE_PBL_SHIFT);
+	value = value | (txpbl << DMA_BUS_MODE_PBL_SHIFT);
 	writel(value, ioaddr + DMA_CHAN_TX_CONTROL(channel));
 
 	value = readl(ioaddr + DMA_CHAN_RX_CONTROL(channel));
-	value = value | (pbl << DMA_BUS_MODE_RPBL_SHIFT);
+	value = value | (rxpbl << DMA_BUS_MODE_RPBL_SHIFT);
 	writel(value, ioaddr + DMA_CHAN_RX_CONTROL(channel));
 
 	/* Mask interrupts by writing to CSR7 */
@@ -97,27 +103,28 @@ static void dwmac4_dma_init_channel(void __iomem *ioaddr, int pbl,
 	writel(dma_rx_phy, ioaddr + DMA_CHAN_RX_BASE_ADDR(channel));
 }
 
-static void dwmac4_dma_init(void __iomem *ioaddr, int pbl, int fb, int mb,
-			    int aal, u32 dma_tx, u32 dma_rx, int atds)
+static void dwmac4_dma_init(void __iomem *ioaddr,
+			    struct stmmac_dma_cfg *dma_cfg,
+			    u32 dma_tx, u32 dma_rx, int atds)
 {
 	u32 value = readl(ioaddr + DMA_SYS_BUS_MODE);
 	int i;
 
 	/* Set the Fixed burst mode */
-	if (fb)
+	if (dma_cfg->fixed_burst)
 		value |= DMA_SYS_BUS_FB;
 
 	/* Mixed Burst has no effect when fb is set */
-	if (mb)
+	if (dma_cfg->mixed_burst)
 		value |= DMA_SYS_BUS_MB;
 
-	if (aal)
+	if (dma_cfg->aal)
 		value |= DMA_SYS_BUS_AAL;
 
 	writel(value, ioaddr + DMA_SYS_BUS_MODE);
 
 	for (i = 0; i < DMA_CHANNEL_NB_MAX; i++)
-		dwmac4_dma_init_channel(ioaddr, pbl, dma_tx, dma_rx, i);
+		dwmac4_dma_init_channel(ioaddr, dma_cfg, dma_tx, dma_rx, i);
 }
 
 static void _dwmac4_dump_dma_regs(void __iomem *ioaddr, u32 channel)
@@ -213,7 +220,17 @@ static void dwmac4_dma_chan_op_mode(void __iomem *ioaddr, int txmode,
 		else
 			mtl_tx_op |= MTL_OP_MODE_TTC_512;
 	}
-
+	/* For an IP with DWC_EQOS_NUM_TXQ == 1, the fields TXQEN and TQS are RO
+	 * with reset values: TXQEN on, TQS == DWC_EQOS_TXFIFO_SIZE.
+	 * For an IP with DWC_EQOS_NUM_TXQ > 1, the fields TXQEN and TQS are R/W
+	 * with reset values: TXQEN off, TQS 256 bytes.
+	 *
+	 * Write the bits in both cases, since it will have no effect when RO.
+	 * For DWC_EQOS_NUM_TXQ > 1, the top bits in MTL_OP_MODE_TQS_MASK might
+	 * be RO, however, writing the whole TQS field will result in a value
+	 * equal to DWC_EQOS_TXFIFO_SIZE, just like for DWC_EQOS_NUM_TXQ == 1.
+	 */
+	mtl_tx_op |= MTL_OP_MODE_TXQEN | MTL_OP_MODE_TQS_MASK;
 	writel(mtl_tx_op, ioaddr +  MTL_CHAN_TX_OP_MODE(channel));
 
 	mtl_rx_op = readl(ioaddr + MTL_CHAN_RX_OP_MODE(channel));
diff --git a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
index e75549327c34..ce97e522566a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/enh_desc.c
@@ -30,7 +30,7 @@ static int enh_desc_get_tx_status(void *data, struct stmmac_extra_stats *x,
 				  struct dma_desc *p, void __iomem *ioaddr)
 {
 	struct net_device_stats *stats = (struct net_device_stats *)data;
-	unsigned int tdes0 = p->des0;
+	unsigned int tdes0 = le32_to_cpu(p->des0);
 	int ret = tx_done;
 
 	/* Get tx owner first */
@@ -95,7 +95,7 @@ static int enh_desc_get_tx_status(void *data, struct stmmac_extra_stats *x,
 
 static int enh_desc_get_tx_len(struct dma_desc *p)
 {
-	return (p->des1 & ETDES1_BUFFER1_SIZE_MASK);
+	return (le32_to_cpu(p->des1) & ETDES1_BUFFER1_SIZE_MASK);
 }
 
 static int enh_desc_coe_rdes0(int ipc_err, int type, int payload_err)
@@ -134,8 +134,8 @@ static int enh_desc_coe_rdes0(int ipc_err, int type, int payload_err)
 static void enh_desc_get_ext_status(void *data, struct stmmac_extra_stats *x,
 				    struct dma_extended_desc *p)
 {
-	unsigned int rdes0 = p->basic.des0;
-	unsigned int rdes4 = p->des4;
+	unsigned int rdes0 = le32_to_cpu(p->basic.des0);
+	unsigned int rdes4 = le32_to_cpu(p->des4);
 
 	if (unlikely(rdes0 & ERDES0_RX_MAC_ADDR)) {
 		int message_type = (rdes4 & ERDES4_MSG_TYPE_MASK) >> 8;
@@ -199,7 +199,7 @@ static int enh_desc_get_rx_status(void *data, struct stmmac_extra_stats *x,
 				  struct dma_desc *p)
 {
 	struct net_device_stats *stats = (struct net_device_stats *)data;
-	unsigned int rdes0 = p->des0;
+	unsigned int rdes0 = le32_to_cpu(p->des0);
 	int ret = good_frame;
 
 	if (unlikely(rdes0 & RDES0_OWN))
@@ -265,8 +265,8 @@ static int enh_desc_get_rx_status(void *data, struct stmmac_extra_stats *x,
 static void enh_desc_init_rx_desc(struct dma_desc *p, int disable_rx_ic,
 				  int mode, int end)
 {
-	p->des0 |= RDES0_OWN;
-	p->des1 |= ((BUF_SIZE_8KiB - 1) & ERDES1_BUFFER1_SIZE_MASK);
+	p->des0 |= cpu_to_le32(RDES0_OWN);
+	p->des1 |= cpu_to_le32((BUF_SIZE_8KiB - 1) & ERDES1_BUFFER1_SIZE_MASK);
 
 	if (mode == STMMAC_CHAIN_MODE)
 		ehn_desc_rx_set_on_chain(p);
@@ -274,12 +274,12 @@ static void enh_desc_init_rx_desc(struct dma_desc *p, int disable_rx_ic,
 		ehn_desc_rx_set_on_ring(p, end);
 
 	if (disable_rx_ic)
-		p->des1 |= ERDES1_DISABLE_IC;
+		p->des1 |= cpu_to_le32(ERDES1_DISABLE_IC);
 }
 
 static void enh_desc_init_tx_desc(struct dma_desc *p, int mode, int end)
 {
-	p->des0 &= ~ETDES0_OWN;
+	p->des0 &= cpu_to_le32(~ETDES0_OWN);
 	if (mode == STMMAC_CHAIN_MODE)
 		enh_desc_end_tx_desc_on_chain(p);
 	else
@@ -288,27 +288,27 @@ static void enh_desc_init_tx_desc(struct dma_desc *p, int mode, int end)
 
 static int enh_desc_get_tx_owner(struct dma_desc *p)
 {
-	return (p->des0 & ETDES0_OWN) >> 31;
+	return (le32_to_cpu(p->des0) & ETDES0_OWN) >> 31;
 }
 
 static void enh_desc_set_tx_owner(struct dma_desc *p)
 {
-	p->des0 |= ETDES0_OWN;
+	p->des0 |= cpu_to_le32(ETDES0_OWN);
 }
 
 static void enh_desc_set_rx_owner(struct dma_desc *p)
 {
-	p->des0 |= RDES0_OWN;
+	p->des0 |= cpu_to_le32(RDES0_OWN);
 }
 
 static int enh_desc_get_tx_ls(struct dma_desc *p)
 {
-	return (p->des0 & ETDES0_LAST_SEGMENT) >> 29;
+	return (le32_to_cpu(p->des0) & ETDES0_LAST_SEGMENT) >> 29;
 }
 
 static void enh_desc_release_tx_desc(struct dma_desc *p, int mode)
 {
-	int ter = (p->des0 & ETDES0_END_RING) >> 21;
+	int ter = (le32_to_cpu(p->des0) & ETDES0_END_RING) >> 21;
 
 	memset(p, 0, offsetof(struct dma_desc, des2));
 	if (mode == STMMAC_CHAIN_MODE)
@@ -321,7 +321,7 @@ static void enh_desc_prepare_tx_desc(struct dma_desc *p, int is_fs, int len,
 				     bool csum_flag, int mode, bool tx_own,
 				     bool ls)
 {
-	unsigned int tdes0 = p->des0;
+	unsigned int tdes0 = le32_to_cpu(p->des0);
 
 	if (mode == STMMAC_CHAIN_MODE)
 		enh_set_tx_desc_len_on_chain(p, len);
@@ -352,12 +352,12 @@ static void enh_desc_prepare_tx_desc(struct dma_desc *p, int is_fs, int len,
 		 */
 		wmb();
 
-	p->des0 = tdes0;
+	p->des0 = cpu_to_le32(tdes0);
 }
 
 static void enh_desc_set_tx_ic(struct dma_desc *p)
 {
-	p->des0 |= ETDES0_INTERRUPT;
+	p->des0 |= cpu_to_le32(ETDES0_INTERRUPT);
 }
 
 static int enh_desc_get_rx_frame_len(struct dma_desc *p, int rx_coe_type)
@@ -372,18 +372,18 @@ static int enh_desc_get_rx_frame_len(struct dma_desc *p, int rx_coe_type)
 	if (rx_coe_type == STMMAC_RX_COE_TYPE1)
 		csum = 2;
 
-	return (((p->des0 & RDES0_FRAME_LEN_MASK) >> RDES0_FRAME_LEN_SHIFT) -
-		csum);
+	return (((le32_to_cpu(p->des0) & RDES0_FRAME_LEN_MASK)
+				>> RDES0_FRAME_LEN_SHIFT) - csum);
 }
 
 static void enh_desc_enable_tx_timestamp(struct dma_desc *p)
 {
-	p->des0 |= ETDES0_TIME_STAMP_ENABLE;
+	p->des0 |= cpu_to_le32(ETDES0_TIME_STAMP_ENABLE);
 }
 
 static int enh_desc_get_tx_timestamp_status(struct dma_desc *p)
 {
-	return (p->des0 & ETDES0_TIME_STAMP_STATUS) >> 17;
+	return (le32_to_cpu(p->des0) & ETDES0_TIME_STAMP_STATUS) >> 17;
 }
 
 static u64 enh_desc_get_timestamp(void *desc, u32 ats)
@@ -392,13 +392,13 @@ static u64 enh_desc_get_timestamp(void *desc, u32 ats)
 
 	if (ats) {
 		struct dma_extended_desc *p = (struct dma_extended_desc *)desc;
-		ns = p->des6;
+		ns = le32_to_cpu(p->des6);
 		/* convert high/sec time stamp value to nanosecond */
-		ns += p->des7 * 1000000000ULL;
+		ns += le32_to_cpu(p->des7) * 1000000000ULL;
 	} else {
 		struct dma_desc *p = (struct dma_desc *)desc;
-		ns = p->des2;
-		ns += p->des3 * 1000000000ULL;
+		ns = le32_to_cpu(p->des2);
+		ns += le32_to_cpu(p->des3) * 1000000000ULL;
 	}
 
 	return ns;
@@ -408,10 +408,11 @@ static int enh_desc_get_rx_timestamp_status(void *desc, u32 ats)
 {
 	if (ats) {
 		struct dma_extended_desc *p = (struct dma_extended_desc *)desc;
-		return (p->basic.des0 & RDES0_IPC_CSUM_ERROR) >> 7;
+		return (le32_to_cpu(p->basic.des0) & RDES0_IPC_CSUM_ERROR) >> 7;
 	} else {
 		struct dma_desc *p = (struct dma_desc *)desc;
-		if ((p->des2 == 0xffffffff) && (p->des3 == 0xffffffff))
+		if ((le32_to_cpu(p->des2) == 0xffffffff) &&
+		    (le32_to_cpu(p->des3) == 0xffffffff))
 			/* timestamp is corrupted, hence don't store it */
 			return 0;
 		else
diff --git a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
index 2beacd0d3043..fd78406e2e9a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
+++ b/drivers/net/ethernet/stmicro/stmmac/norm_desc.c
@@ -30,8 +30,8 @@ static int ndesc_get_tx_status(void *data, struct stmmac_extra_stats *x,
 			       struct dma_desc *p, void __iomem *ioaddr)
 {
 	struct net_device_stats *stats = (struct net_device_stats *)data;
-	unsigned int tdes0 = p->des0;
-	unsigned int tdes1 = p->des1;
+	unsigned int tdes0 = le32_to_cpu(p->des0);
+	unsigned int tdes1 = le32_to_cpu(p->des1);
 	int ret = tx_done;
 
 	/* Get tx owner first */
@@ -77,7 +77,7 @@ static int ndesc_get_tx_status(void *data, struct stmmac_extra_stats *x,
 
 static int ndesc_get_tx_len(struct dma_desc *p)
 {
-	return (p->des1 & RDES1_BUFFER1_SIZE_MASK);
+	return (le32_to_cpu(p->des1) & RDES1_BUFFER1_SIZE_MASK);
 }
 
 /* This function verifies if each incoming frame has some errors
@@ -88,7 +88,7 @@ static int ndesc_get_rx_status(void *data, struct stmmac_extra_stats *x,
 			       struct dma_desc *p)
 {
 	int ret = good_frame;
-	unsigned int rdes0 = p->des0;
+	unsigned int rdes0 = le32_to_cpu(p->des0);
 	struct net_device_stats *stats = (struct net_device_stats *)data;
 
 	if (unlikely(rdes0 & RDES0_OWN))
@@ -141,8 +141,8 @@ static int ndesc_get_rx_status(void *data, struct stmmac_extra_stats *x,
 static void ndesc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode,
 			       int end)
 {
-	p->des0 |= RDES0_OWN;
-	p->des1 |= (BUF_SIZE_2KiB - 1) & RDES1_BUFFER1_SIZE_MASK;
+	p->des0 |= cpu_to_le32(RDES0_OWN);
+	p->des1 |= cpu_to_le32((BUF_SIZE_2KiB - 1) & RDES1_BUFFER1_SIZE_MASK);
 
 	if (mode == STMMAC_CHAIN_MODE)
 		ndesc_rx_set_on_chain(p, end);
@@ -150,12 +150,12 @@ static void ndesc_init_rx_desc(struct dma_desc *p, int disable_rx_ic, int mode,
 		ndesc_rx_set_on_ring(p, end);
 
 	if (disable_rx_ic)
-		p->des1 |= RDES1_DISABLE_IC;
+		p->des1 |= cpu_to_le32(RDES1_DISABLE_IC);
 }
 
 static void ndesc_init_tx_desc(struct dma_desc *p, int mode, int end)
 {
-	p->des0 &= ~TDES0_OWN;
+	p->des0 &= cpu_to_le32(~TDES0_OWN);
 	if (mode == STMMAC_CHAIN_MODE)
 		ndesc_tx_set_on_chain(p);
 	else
@@ -164,27 +164,27 @@ static void ndesc_init_tx_desc(struct dma_desc *p, int mode, int end)
 
 static int ndesc_get_tx_owner(struct dma_desc *p)
 {
-	return (p->des0 & TDES0_OWN) >> 31;
+	return (le32_to_cpu(p->des0) & TDES0_OWN) >> 31;
 }
 
 static void ndesc_set_tx_owner(struct dma_desc *p)
 {
-	p->des0 |= TDES0_OWN;
+	p->des0 |= cpu_to_le32(TDES0_OWN);
 }
 
 static void ndesc_set_rx_owner(struct dma_desc *p)
 {
-	p->des0 |= RDES0_OWN;
+	p->des0 |= cpu_to_le32(RDES0_OWN);
 }
 
 static int ndesc_get_tx_ls(struct dma_desc *p)
 {
-	return (p->des1 & TDES1_LAST_SEGMENT) >> 30;
+	return (le32_to_cpu(p->des1) & TDES1_LAST_SEGMENT) >> 30;
 }
 
 static void ndesc_release_tx_desc(struct dma_desc *p, int mode)
 {
-	int ter = (p->des1 & TDES1_END_RING) >> 25;
+	int ter = (le32_to_cpu(p->des1) & TDES1_END_RING) >> 25;
 
 	memset(p, 0, offsetof(struct dma_desc, des2));
 	if (mode == STMMAC_CHAIN_MODE)
@@ -197,7 +197,7 @@ static void ndesc_prepare_tx_desc(struct dma_desc *p, int is_fs, int len,
 				  bool csum_flag, int mode, bool tx_own,
 				  bool ls)
 {
-	unsigned int tdes1 = p->des1;
+	unsigned int tdes1 = le32_to_cpu(p->des1);
 
 	if (is_fs)
 		tdes1 |= TDES1_FIRST_SEGMENT;
@@ -212,7 +212,7 @@ static void ndesc_prepare_tx_desc(struct dma_desc *p, int is_fs, int len,
 	if (ls)
 		tdes1 |= TDES1_LAST_SEGMENT;
 
-	p->des1 = tdes1;
+	p->des1 = cpu_to_le32(tdes1);
 
 	if (mode == STMMAC_CHAIN_MODE)
 		norm_set_tx_desc_len_on_chain(p, len);
@@ -220,12 +220,12 @@ static void ndesc_prepare_tx_desc(struct dma_desc *p, int is_fs, int len,
 		norm_set_tx_desc_len_on_ring(p, len);
 
 	if (tx_own)
-		p->des0 |= TDES0_OWN;
+		p->des0 |= cpu_to_le32(TDES0_OWN);
 }
 
 static void ndesc_set_tx_ic(struct dma_desc *p)
 {
-	p->des1 |= TDES1_INTERRUPT;
+	p->des1 |= cpu_to_le32(TDES1_INTERRUPT);
 }
 
 static int ndesc_get_rx_frame_len(struct dma_desc *p, int rx_coe_type)
@@ -241,19 +241,20 @@ static int ndesc_get_rx_frame_len(struct dma_desc *p, int rx_coe_type)
 	if (rx_coe_type == STMMAC_RX_COE_TYPE1)
 		csum = 2;
 
-	return (((p->des0 & RDES0_FRAME_LEN_MASK) >> RDES0_FRAME_LEN_SHIFT) -
+	return (((le32_to_cpu(p->des0) & RDES0_FRAME_LEN_MASK)
+				>> RDES0_FRAME_LEN_SHIFT) -
 		csum);
 
 }
 
 static void ndesc_enable_tx_timestamp(struct dma_desc *p)
 {
-	p->des1 |= TDES1_TIME_STAMP_ENABLE;
+	p->des1 |= cpu_to_le32(TDES1_TIME_STAMP_ENABLE);
 }
 
 static int ndesc_get_tx_timestamp_status(struct dma_desc *p)
 {
-	return (p->des0 & TDES0_TIME_STAMP_STATUS) >> 17;
+	return (le32_to_cpu(p->des0) & TDES0_TIME_STAMP_STATUS) >> 17;
 }
 
 static u64 ndesc_get_timestamp(void *desc, u32 ats)
@@ -261,9 +262,9 @@ static u64 ndesc_get_timestamp(void *desc, u32 ats)
 	struct dma_desc *p = (struct dma_desc *)desc;
 	u64 ns;
 
-	ns = p->des2;
+	ns = le32_to_cpu(p->des2);
 	/* convert high/sec time stamp value to nanosecond */
-	ns += p->des3 * 1000000000ULL;
+	ns += le32_to_cpu(p->des3) * 1000000000ULL;
 
 	return ns;
 }
@@ -272,7 +273,8 @@ static int ndesc_get_rx_timestamp_status(void *desc, u32 ats)
 {
 	struct dma_desc *p = (struct dma_desc *)desc;
 
-	if ((p->des2 == 0xffffffff) && (p->des3 == 0xffffffff))
+	if ((le32_to_cpu(p->des2) == 0xffffffff) &&
+	    (le32_to_cpu(p->des3) == 0xffffffff))
 		/* timestamp is corrupted, hence don't store it */
 		return 0;
 	else
diff --git a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
index 7723b5d2499a..9983ce9bd90d 100644
--- a/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
+++ b/drivers/net/ethernet/stmicro/stmmac/ring_mode.c
@@ -34,7 +34,7 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
 	unsigned int entry = priv->cur_tx;
 	struct dma_desc *desc;
 	unsigned int nopaged_len = skb_headlen(skb);
-	unsigned int bmax, len;
+	unsigned int bmax, len, des2;
 
 	if (priv->extend_desc)
 		desc = (struct dma_desc *)(priv->dma_etx + entry);
@@ -50,16 +50,17 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
 
 	if (nopaged_len > BUF_SIZE_8KiB) {
 
-		desc->des2 = dma_map_single(priv->device, skb->data,
-					    bmax, DMA_TO_DEVICE);
-		if (dma_mapping_error(priv->device, desc->des2))
+		des2 = dma_map_single(priv->device, skb->data, bmax,
+				      DMA_TO_DEVICE);
+		desc->des2 = cpu_to_le32(des2);
+		if (dma_mapping_error(priv->device, des2))
 			return -1;
 
-		priv->tx_skbuff_dma[entry].buf = desc->des2;
+		priv->tx_skbuff_dma[entry].buf = des2;
 		priv->tx_skbuff_dma[entry].len = bmax;
 		priv->tx_skbuff_dma[entry].is_jumbo = true;
 
-		desc->des3 = desc->des2 + BUF_SIZE_4KiB;
+		desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB);
 		priv->hw->desc->prepare_tx_desc(desc, 1, bmax, csum,
 						STMMAC_RING_MODE, 0, false);
 		priv->tx_skbuff[entry] = NULL;
@@ -70,26 +71,28 @@ static int stmmac_jumbo_frm(void *p, struct sk_buff *skb, int csum)
 		else
 			desc = priv->dma_tx + entry;
 
-		desc->des2 = dma_map_single(priv->device, skb->data + bmax,
-					    len, DMA_TO_DEVICE);
-		if (dma_mapping_error(priv->device, desc->des2))
+		des2 = dma_map_single(priv->device, skb->data + bmax, len,
+				      DMA_TO_DEVICE);
+		desc->des2 = cpu_to_le32(des2);
+		if (dma_mapping_error(priv->device, des2))
 			return -1;
-		priv->tx_skbuff_dma[entry].buf = desc->des2;
+		priv->tx_skbuff_dma[entry].buf = des2;
 		priv->tx_skbuff_dma[entry].len = len;
 		priv->tx_skbuff_dma[entry].is_jumbo = true;
 
-		desc->des3 = desc->des2 + BUF_SIZE_4KiB;
+		desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB);
 		priv->hw->desc->prepare_tx_desc(desc, 0, len, csum,
 						STMMAC_RING_MODE, 1, true);
 	} else {
-		desc->des2 = dma_map_single(priv->device, skb->data,
-					    nopaged_len, DMA_TO_DEVICE);
-		if (dma_mapping_error(priv->device, desc->des2))
+		des2 = dma_map_single(priv->device, skb->data,
+				      nopaged_len, DMA_TO_DEVICE);
+		desc->des2 = cpu_to_le32(des2);
+		if (dma_mapping_error(priv->device, des2))
 			return -1;
-		priv->tx_skbuff_dma[entry].buf = desc->des2;
+		priv->tx_skbuff_dma[entry].buf = des2;
 		priv->tx_skbuff_dma[entry].len = nopaged_len;
 		priv->tx_skbuff_dma[entry].is_jumbo = true;
-		desc->des3 = desc->des2 + BUF_SIZE_4KiB;
+		desc->des3 = cpu_to_le32(des2 + BUF_SIZE_4KiB);
 		priv->hw->desc->prepare_tx_desc(desc, 1, nopaged_len, csum,
 						STMMAC_RING_MODE, 0, true);
 	}
@@ -115,13 +118,13 @@ static void stmmac_refill_desc3(void *priv_ptr, struct dma_desc *p)
 
 	/* Fill DES3 in case of RING mode */
 	if (priv->dma_buf_sz >= BUF_SIZE_8KiB)
-		p->des3 = p->des2 + BUF_SIZE_8KiB;
+		p->des3 = cpu_to_le32(le32_to_cpu(p->des2) + BUF_SIZE_8KiB);
 }
 
 /* In ring mode we need to fill the desc3 because it is used as buffer */
 static void stmmac_init_desc3(struct dma_desc *p)
 {
-	p->des3 = p->des2 + BUF_SIZE_8KiB;
+	p->des3 = cpu_to_le32(le32_to_cpu(p->des2) + BUF_SIZE_8KiB);
 }
 
 static void stmmac_clean_desc3(void *priv_ptr, struct dma_desc *p)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac.h b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
index 4d2a759b8465..eab04aeeeb95 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac.h
@@ -64,7 +64,6 @@ struct stmmac_priv {
 	dma_addr_t dma_tx_phy;
 	int tx_coalesce;
 	int hwts_tx_en;
-	spinlock_t tx_lock;
 	bool tx_path_in_lpi_mode;
 	struct timer_list txtimer;
 	bool tso;
@@ -90,7 +89,6 @@ struct stmmac_priv {
 	struct mac_device_info *hw;
 	spinlock_t lock;
 
-	struct phy_device *phydev ____cacheline_aligned_in_smp;
 	int oldlink;
 	int speed;
 	int oldduplex;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
index c5d0142adda2..699ee1d30426 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ethtool.c
@@ -263,7 +263,7 @@ static void stmmac_ethtool_getdrvinfo(struct net_device *dev,
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
 
-	if (priv->plat->has_gmac)
+	if (priv->plat->has_gmac || priv->plat->has_gmac4)
 		strlcpy(info->driver, GMAC_ETHTOOL_NAME, sizeof(info->driver));
 	else
 		strlcpy(info->driver, MAC100_ETHTOOL_NAME,
@@ -272,25 +272,26 @@ static void stmmac_ethtool_getdrvinfo(struct net_device *dev,
 	strlcpy(info->version, DRV_MODULE_VERSION, sizeof(info->version));
 }
 
-static int stmmac_ethtool_getsettings(struct net_device *dev,
-				      struct ethtool_cmd *cmd)
+static int stmmac_ethtool_get_link_ksettings(struct net_device *dev,
+					     struct ethtool_link_ksettings *cmd)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
-	struct phy_device *phy = priv->phydev;
+	struct phy_device *phy = dev->phydev;
 	int rc;
 
 	if (priv->hw->pcs & STMMAC_PCS_RGMII ||
 	    priv->hw->pcs & STMMAC_PCS_SGMII) {
 		struct rgmii_adv adv;
+		u32 supported, advertising, lp_advertising;
 
 		if (!priv->xstats.pcs_link) {
-			ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN);
-			cmd->duplex = DUPLEX_UNKNOWN;
+			cmd->base.speed = SPEED_UNKNOWN;
+			cmd->base.duplex = DUPLEX_UNKNOWN;
 			return 0;
 		}
-		cmd->duplex = priv->xstats.pcs_duplex;
+		cmd->base.duplex = priv->xstats.pcs_duplex;
 
-		ethtool_cmd_speed_set(cmd, priv->xstats.pcs_speed);
+		cmd->base.speed = priv->xstats.pcs_speed;
 
 		/* Get and convert ADV/LP_ADV from the HW AN registers */
 		if (!priv->hw->mac->pcs_get_adv_lp)
@@ -300,45 +301,59 @@ static int stmmac_ethtool_getsettings(struct net_device *dev,
 
 		/* Encoding of PSE bits is defined in 802.3z, 37.2.1.4 */
 
+		ethtool_convert_link_mode_to_legacy_u32(
+			&supported, cmd->link_modes.supported);
+		ethtool_convert_link_mode_to_legacy_u32(
+			&advertising, cmd->link_modes.advertising);
+		ethtool_convert_link_mode_to_legacy_u32(
+			&lp_advertising, cmd->link_modes.lp_advertising);
+
 		if (adv.pause & STMMAC_PCS_PAUSE)
-			cmd->advertising |= ADVERTISED_Pause;
+			advertising |= ADVERTISED_Pause;
 		if (adv.pause & STMMAC_PCS_ASYM_PAUSE)
-			cmd->advertising |= ADVERTISED_Asym_Pause;
+			advertising |= ADVERTISED_Asym_Pause;
 		if (adv.lp_pause & STMMAC_PCS_PAUSE)
-			cmd->lp_advertising |= ADVERTISED_Pause;
+			lp_advertising |= ADVERTISED_Pause;
 		if (adv.lp_pause & STMMAC_PCS_ASYM_PAUSE)
-			cmd->lp_advertising |= ADVERTISED_Asym_Pause;
+			lp_advertising |= ADVERTISED_Asym_Pause;
 
 		/* Reg49[3] always set because ANE is always supported */
-		cmd->autoneg = ADVERTISED_Autoneg;
-		cmd->supported |= SUPPORTED_Autoneg;
-		cmd->advertising |= ADVERTISED_Autoneg;
-		cmd->lp_advertising |= ADVERTISED_Autoneg;
+		cmd->base.autoneg = ADVERTISED_Autoneg;
+		supported |= SUPPORTED_Autoneg;
+		advertising |= ADVERTISED_Autoneg;
+		lp_advertising |= ADVERTISED_Autoneg;
 
 		if (adv.duplex) {
-			cmd->supported |= (SUPPORTED_1000baseT_Full |
-					   SUPPORTED_100baseT_Full |
-					   SUPPORTED_10baseT_Full);
-			cmd->advertising |= (ADVERTISED_1000baseT_Full |
-					     ADVERTISED_100baseT_Full |
-					     ADVERTISED_10baseT_Full);
+			supported |= (SUPPORTED_1000baseT_Full |
+				      SUPPORTED_100baseT_Full |
+				      SUPPORTED_10baseT_Full);
+			advertising |= (ADVERTISED_1000baseT_Full |
+					ADVERTISED_100baseT_Full |
+					ADVERTISED_10baseT_Full);
 		} else {
-			cmd->supported |= (SUPPORTED_1000baseT_Half |
-					   SUPPORTED_100baseT_Half |
-					   SUPPORTED_10baseT_Half);
-			cmd->advertising |= (ADVERTISED_1000baseT_Half |
-					     ADVERTISED_100baseT_Half |
-					     ADVERTISED_10baseT_Half);
+			supported |= (SUPPORTED_1000baseT_Half |
+				      SUPPORTED_100baseT_Half |
+				      SUPPORTED_10baseT_Half);
+			advertising |= (ADVERTISED_1000baseT_Half |
+					ADVERTISED_100baseT_Half |
+					ADVERTISED_10baseT_Half);
 		}
 		if (adv.lp_duplex)
-			cmd->lp_advertising |= (ADVERTISED_1000baseT_Full |
-						ADVERTISED_100baseT_Full |
-						ADVERTISED_10baseT_Full);
+			lp_advertising |= (ADVERTISED_1000baseT_Full |
+					   ADVERTISED_100baseT_Full |
+					   ADVERTISED_10baseT_Full);
 		else
-			cmd->lp_advertising |= (ADVERTISED_1000baseT_Half |
-						ADVERTISED_100baseT_Half |
-						ADVERTISED_10baseT_Half);
-		cmd->port = PORT_OTHER;
+			lp_advertising |= (ADVERTISED_1000baseT_Half |
+					   ADVERTISED_100baseT_Half |
+					   ADVERTISED_10baseT_Half);
+		cmd->base.port = PORT_OTHER;
+
+		ethtool_convert_legacy_u32_to_link_mode(
+			cmd->link_modes.supported, supported);
+		ethtool_convert_legacy_u32_to_link_mode(
+			cmd->link_modes.advertising, advertising);
+		ethtool_convert_legacy_u32_to_link_mode(
+			cmd->link_modes.lp_advertising, lp_advertising);
 
 		return 0;
 	}
@@ -353,16 +368,16 @@ static int stmmac_ethtool_getsettings(struct net_device *dev,
 		"link speed / duplex setting\n", dev->name);
 		return -EBUSY;
 	}
-	cmd->transceiver = XCVR_INTERNAL;
-	rc = phy_ethtool_gset(phy, cmd);
+	rc = phy_ethtool_ksettings_get(phy, cmd);
 	return rc;
 }
 
-static int stmmac_ethtool_setsettings(struct net_device *dev,
-				      struct ethtool_cmd *cmd)
+static int
+stmmac_ethtool_set_link_ksettings(struct net_device *dev,
+				  const struct ethtool_link_ksettings *cmd)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
-	struct phy_device *phy = priv->phydev;
+	struct phy_device *phy = dev->phydev;
 	int rc;
 
 	if (priv->hw->pcs & STMMAC_PCS_RGMII ||
@@ -370,7 +385,7 @@ static int stmmac_ethtool_setsettings(struct net_device *dev,
 		u32 mask = ADVERTISED_Autoneg | ADVERTISED_Pause;
 
 		/* Only support ANE */
-		if (cmd->autoneg != AUTONEG_ENABLE)
+		if (cmd->base.autoneg != AUTONEG_ENABLE)
 			return -EINVAL;
 
 		mask &= (ADVERTISED_1000baseT_Half |
@@ -391,9 +406,7 @@ static int stmmac_ethtool_setsettings(struct net_device *dev,
 		return 0;
 	}
 
-	spin_lock(&priv->lock);
-	rc = phy_ethtool_sset(phy, cmd);
-	spin_unlock(&priv->lock);
+	rc = phy_ethtool_ksettings_set(phy, cmd);
 
 	return rc;
 }
@@ -433,7 +446,7 @@ static void stmmac_ethtool_gregs(struct net_device *dev,
 
 	memset(reg_space, 0x0, REG_SPACE_SIZE);
 
-	if (!priv->plat->has_gmac) {
+	if (!(priv->plat->has_gmac || priv->plat->has_gmac4)) {
 		/* MAC registers */
 		for (i = 0; i < 12; i++)
 			reg_space[i] = readl(priv->ioaddr + (i * 4));
@@ -471,12 +484,12 @@ stmmac_get_pauseparam(struct net_device *netdev,
 		if (!adv_lp.pause)
 			return;
 	} else {
-		if (!(priv->phydev->supported & SUPPORTED_Pause) ||
-		    !(priv->phydev->supported & SUPPORTED_Asym_Pause))
+		if (!(netdev->phydev->supported & SUPPORTED_Pause) ||
+		    !(netdev->phydev->supported & SUPPORTED_Asym_Pause))
 			return;
 	}
 
-	pause->autoneg = priv->phydev->autoneg;
+	pause->autoneg = netdev->phydev->autoneg;
 
 	if (priv->flow_ctrl & FLOW_RX)
 		pause->rx_pause = 1;
@@ -490,7 +503,7 @@ stmmac_set_pauseparam(struct net_device *netdev,
 		      struct ethtool_pauseparam *pause)
 {
 	struct stmmac_priv *priv = netdev_priv(netdev);
-	struct phy_device *phy = priv->phydev;
+	struct phy_device *phy = netdev->phydev;
 	int new_pause = FLOW_OFF;
 
 	if (priv->hw->pcs && priv->hw->mac->pcs_get_adv_lp) {
@@ -550,7 +563,7 @@ static void stmmac_get_ethtool_stats(struct net_device *dev,
 			}
 		}
 		if (priv->eee_enabled) {
-			int val = phy_get_eee_err(priv->phydev);
+			int val = phy_get_eee_err(dev->phydev);
 			if (val)
 				priv->xstats.phy_eee_wakeup_error_n = val;
 		}
@@ -669,7 +682,7 @@ static int stmmac_ethtool_op_get_eee(struct net_device *dev,
 	edata->eee_active = priv->eee_active;
 	edata->tx_lpi_timer = priv->tx_lpi_timer;
 
-	return phy_ethtool_get_eee(priv->phydev, edata);
+	return phy_ethtool_get_eee(dev->phydev, edata);
 }
 
 static int stmmac_ethtool_op_set_eee(struct net_device *dev,
@@ -694,7 +707,7 @@ static int stmmac_ethtool_op_set_eee(struct net_device *dev,
 		priv->tx_lpi_timer = edata->tx_lpi_timer;
 	}
 
-	return phy_ethtool_set_eee(priv->phydev, edata);
+	return phy_ethtool_set_eee(dev->phydev, edata);
 }
 
 static u32 stmmac_usec2riwt(u32 usec, struct stmmac_priv *priv)
@@ -853,13 +866,12 @@ static int stmmac_set_tunable(struct net_device *dev,
 static const struct ethtool_ops stmmac_ethtool_ops = {
 	.begin = stmmac_check_if_running,
 	.get_drvinfo = stmmac_ethtool_getdrvinfo,
-	.get_settings = stmmac_ethtool_getsettings,
-	.set_settings = stmmac_ethtool_setsettings,
 	.get_msglevel = stmmac_ethtool_getmsglevel,
 	.set_msglevel = stmmac_ethtool_setmsglevel,
 	.get_regs = stmmac_ethtool_gregs,
 	.get_regs_len = stmmac_ethtool_get_regs_len,
 	.get_link = ethtool_op_get_link,
+	.nway_reset = phy_ethtool_nway_reset,
 	.get_pauseparam = stmmac_get_pauseparam,
 	.set_pauseparam = stmmac_set_pauseparam,
 	.get_ethtool_stats = stmmac_get_ethtool_stats,
@@ -874,6 +886,8 @@ static const struct ethtool_ops stmmac_ethtool_ops = {
 	.set_coalesce = stmmac_set_coalesce,
 	.get_tunable = stmmac_get_tunable,
 	.set_tunable = stmmac_set_tunable,
+	.get_link_ksettings = stmmac_ethtool_get_link_ksettings,
+	.set_link_ksettings = stmmac_ethtool_set_link_ksettings,
 };
 
 void stmmac_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 1f9ec02fa7f8..3e405785b81c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -105,8 +105,8 @@ module_param(eee_timer, int, S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(eee_timer, "LPI tx expiration time in msec");
 #define STMMAC_LPI_T(x) (jiffies + msecs_to_jiffies(x))
 
-/* By default the driver will use the ring mode to manage tx and rx descriptors
- * but passing this value so user can force to use the chain instead of the ring
+/* By default the driver will use the ring mode to manage tx and rx descriptors,
+ * but allow user to force to use the chain instead of the ring
  */
 static unsigned int chain_mode;
 module_param(chain_mode, int, S_IRUGO);
@@ -221,7 +221,8 @@ static inline u32 stmmac_rx_dirty(struct stmmac_priv *priv)
  */
 static inline void stmmac_hw_fix_mac_speed(struct stmmac_priv *priv)
 {
-	struct phy_device *phydev = priv->phydev;
+	struct net_device *ndev = priv->dev;
+	struct phy_device *phydev = ndev->phydev;
 
 	if (likely(priv->plat->fix_mac_speed))
 		priv->plat->fix_mac_speed(priv->plat->bsp_priv, phydev->speed);
@@ -279,6 +280,7 @@ static void stmmac_eee_ctrl_timer(unsigned long arg)
  */
 bool stmmac_eee_init(struct stmmac_priv *priv)
 {
+	struct net_device *ndev = priv->dev;
 	unsigned long flags;
 	bool ret = false;
 
@@ -295,7 +297,7 @@ bool stmmac_eee_init(struct stmmac_priv *priv)
 		int tx_lpi_timer = priv->tx_lpi_timer;
 
 		/* Check if the PHY supports EEE */
-		if (phy_init_eee(priv->phydev, 1)) {
+		if (phy_init_eee(ndev->phydev, 1)) {
 			/* To manage at run-time if the EEE cannot be supported
 			 * anymore (for example because the lp caps have been
 			 * changed).
@@ -303,7 +305,7 @@ bool stmmac_eee_init(struct stmmac_priv *priv)
 			 */
 			spin_lock_irqsave(&priv->lock, flags);
 			if (priv->eee_active) {
-				pr_debug("stmmac: disable EEE\n");
+				netdev_dbg(priv->dev, "disable EEE\n");
 				del_timer_sync(&priv->eee_ctrl_timer);
 				priv->hw->mac->set_eee_timer(priv->hw, 0,
 							     tx_lpi_timer);
@@ -327,12 +329,12 @@ bool stmmac_eee_init(struct stmmac_priv *priv)
 						     tx_lpi_timer);
 		}
 		/* Set HW EEE according to the speed */
-		priv->hw->mac->set_eee_pls(priv->hw, priv->phydev->link);
+		priv->hw->mac->set_eee_pls(priv->hw, ndev->phydev->link);
 
 		ret = true;
 		spin_unlock_irqrestore(&priv->lock, flags);
 
-		pr_debug("stmmac: Energy-Efficient Ethernet initialized\n");
+		netdev_dbg(priv->dev, "Energy-Efficient Ethernet initialized\n");
 	}
 out:
 	return ret;
@@ -450,8 +452,8 @@ static int stmmac_hwtstamp_ioctl(struct net_device *dev, struct ifreq *ifr)
 			   sizeof(struct hwtstamp_config)))
 		return -EFAULT;
 
-	pr_debug("%s config flags:0x%x, tx_type:0x%x, rx_filter:0x%x\n",
-		 __func__, config.flags, config.tx_type, config.rx_filter);
+	netdev_dbg(priv->dev, "%s config flags:0x%x, tx_type:0x%x, rx_filter:0x%x\n",
+		   __func__, config.flags, config.tx_type, config.rx_filter);
 
 	/* reserved for future extensions */
 	if (config.flags)
@@ -697,7 +699,7 @@ static void stmmac_release_ptp(struct stmmac_priv *priv)
 static void stmmac_adjust_link(struct net_device *dev)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
-	struct phy_device *phydev = priv->phydev;
+	struct phy_device *phydev = dev->phydev;
 	unsigned long flags;
 	int new_state = 0;
 	unsigned int fc = priv->flow_ctrl, pause_time = priv->pause;
@@ -750,9 +752,9 @@ static void stmmac_adjust_link(struct net_device *dev)
 				stmmac_hw_fix_mac_speed(priv);
 				break;
 			default:
-				if (netif_msg_link(priv))
-					pr_warn("%s: Speed (%d) not 10/100\n",
-						dev->name, phydev->speed);
+				netif_warn(priv, link, priv->dev,
+					   "Speed (%d) not 10/100\n",
+					   phydev->speed);
 				break;
 			}
 
@@ -805,10 +807,10 @@ static void stmmac_check_pcs_mode(struct stmmac_priv *priv)
 		    (interface == PHY_INTERFACE_MODE_RGMII_ID) ||
 		    (interface == PHY_INTERFACE_MODE_RGMII_RXID) ||
 		    (interface == PHY_INTERFACE_MODE_RGMII_TXID)) {
-			pr_debug("STMMAC: PCS RGMII support enable\n");
+			netdev_dbg(priv->dev, "PCS RGMII support enabled\n");
 			priv->hw->pcs = STMMAC_PCS_RGMII;
 		} else if (interface == PHY_INTERFACE_MODE_SGMII) {
-			pr_debug("STMMAC: PCS SGMII support enable\n");
+			netdev_dbg(priv->dev, "PCS SGMII support enabled\n");
 			priv->hw->pcs = STMMAC_PCS_SGMII;
 		}
 	}
@@ -843,15 +845,15 @@ static int stmmac_init_phy(struct net_device *dev)
 
 		snprintf(phy_id_fmt, MII_BUS_ID_SIZE + 3, PHY_ID_FMT, bus_id,
 			 priv->plat->phy_addr);
-		pr_debug("stmmac_init_phy:  trying to attach to %s\n",
-			 phy_id_fmt);
+		netdev_dbg(priv->dev, "%s: trying to attach to %s\n", __func__,
+			   phy_id_fmt);
 
 		phydev = phy_connect(dev, phy_id_fmt, &stmmac_adjust_link,
 				     interface);
 	}
 
 	if (IS_ERR_OR_NULL(phydev)) {
-		pr_err("%s: Could not attach to PHY\n", dev->name);
+		netdev_err(priv->dev, "Could not attach to PHY\n");
 		if (!phydev)
 			return -ENODEV;
 
@@ -884,10 +886,8 @@ static int stmmac_init_phy(struct net_device *dev)
 	if (phydev->is_pseudo_fixed_link)
 		phydev->irq = PHY_POLL;
 
-	pr_debug("stmmac_init_phy:  %s: attached to PHY (UID 0x%x)"
-		 " Link = %d\n", dev->name, phydev->phy_id, phydev->link);
-
-	priv->phydev = phydev;
+	netdev_dbg(priv->dev, "%s: attached to PHY (UID 0x%x) Link = %d\n",
+		   __func__, phydev->phy_id, phydev->link);
 
 	return 0;
 }
@@ -973,7 +973,8 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p,
 
 	skb = __netdev_alloc_skb_ip_align(priv->dev, priv->dma_buf_sz, flags);
 	if (!skb) {
-		pr_err("%s: Rx init fails; skb is NULL\n", __func__);
+		netdev_err(priv->dev,
+			   "%s: Rx init fails; skb is NULL\n", __func__);
 		return -ENOMEM;
 	}
 	priv->rx_skbuff[i] = skb;
@@ -981,15 +982,15 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p,
 						priv->dma_buf_sz,
 						DMA_FROM_DEVICE);
 	if (dma_mapping_error(priv->device, priv->rx_skbuff_dma[i])) {
-		pr_err("%s: DMA mapping error\n", __func__);
+		netdev_err(priv->dev, "%s: DMA mapping error\n", __func__);
 		dev_kfree_skb_any(skb);
 		return -EINVAL;
 	}
 
 	if (priv->synopsys_id >= DWMAC_CORE_4_00)
-		p->des0 = priv->rx_skbuff_dma[i];
+		p->des0 = cpu_to_le32(priv->rx_skbuff_dma[i]);
 	else
-		p->des2 = priv->rx_skbuff_dma[i];
+		p->des2 = cpu_to_le32(priv->rx_skbuff_dma[i]);
 
 	if ((priv->hw->mode->init_desc3) &&
 	    (priv->dma_buf_sz == BUF_SIZE_16KiB))
@@ -1031,13 +1032,14 @@ static int init_dma_desc_rings(struct net_device *dev, gfp_t flags)
 
 	priv->dma_buf_sz = bfsize;
 
-	if (netif_msg_probe(priv)) {
-		pr_debug("(%s) dma_rx_phy=0x%08x dma_tx_phy=0x%08x\n", __func__,
-			 (u32) priv->dma_rx_phy, (u32) priv->dma_tx_phy);
+	netif_dbg(priv, probe, priv->dev,
+		  "(%s) dma_rx_phy=0x%08x dma_tx_phy=0x%08x\n",
+		  __func__, (u32)priv->dma_rx_phy, (u32)priv->dma_tx_phy);
+
+	/* RX INITIALIZATION */
+	netif_dbg(priv, probe, priv->dev,
+		  "SKB addresses:\nskb\t\tskb data\tdma data\n");
 
-		/* RX INITIALIZATION */
-		pr_debug("\tSKB addresses:\nskb\t\tskb data\tdma data\n");
-	}
 	for (i = 0; i < DMA_RX_SIZE; i++) {
 		struct dma_desc *p;
 		if (priv->extend_desc)
@@ -1049,10 +1051,9 @@ static int init_dma_desc_rings(struct net_device *dev, gfp_t flags)
 		if (ret)
 			goto err_init_rx_buffers;
 
-		if (netif_msg_probe(priv))
-			pr_debug("[%p]\t[%p]\t[%x]\n", priv->rx_skbuff[i],
-				 priv->rx_skbuff[i]->data,
-				 (unsigned int)priv->rx_skbuff_dma[i]);
+		netif_dbg(priv, probe, priv->dev, "[%p]\t[%p]\t[%x]\n",
+			  priv->rx_skbuff[i], priv->rx_skbuff[i]->data,
+			  (unsigned int)priv->rx_skbuff_dma[i]);
 	}
 	priv->cur_rx = 0;
 	priv->dirty_rx = (unsigned int)(i - DMA_RX_SIZE);
@@ -1307,7 +1308,7 @@ static void stmmac_tx_clean(struct stmmac_priv *priv)
 	unsigned int bytes_compl = 0, pkts_compl = 0;
 	unsigned int entry = priv->dirty_tx;
 
-	spin_lock(&priv->tx_lock);
+	netif_tx_lock(priv->dev);
 
 	priv->xstats.tx_clean++;
 
@@ -1378,22 +1379,17 @@ static void stmmac_tx_clean(struct stmmac_priv *priv)
 	netdev_completed_queue(priv->dev, pkts_compl, bytes_compl);
 
 	if (unlikely(netif_queue_stopped(priv->dev) &&
-		     stmmac_tx_avail(priv) > STMMAC_TX_THRESH)) {
-		netif_tx_lock(priv->dev);
-		if (netif_queue_stopped(priv->dev) &&
-		    stmmac_tx_avail(priv) > STMMAC_TX_THRESH) {
-			if (netif_msg_tx_done(priv))
-				pr_debug("%s: restart transmit\n", __func__);
-			netif_wake_queue(priv->dev);
-		}
-		netif_tx_unlock(priv->dev);
+	    stmmac_tx_avail(priv) > STMMAC_TX_THRESH)) {
+		netif_dbg(priv, tx_done, priv->dev,
+			  "%s: restart transmit\n", __func__);
+		netif_wake_queue(priv->dev);
 	}
 
 	if ((priv->eee_enabled) && (!priv->tx_path_in_lpi_mode)) {
 		stmmac_enable_eee_mode(priv);
 		mod_timer(&priv->eee_ctrl_timer, STMMAC_LPI_T(eee_timer));
 	}
-	spin_unlock(&priv->tx_lock);
+	netif_tx_unlock(priv->dev);
 }
 
 static inline void stmmac_enable_dma_irq(struct stmmac_priv *priv)
@@ -1497,7 +1493,7 @@ static void stmmac_mmc_setup(struct stmmac_priv *priv)
 		dwmac_mmc_ctrl(priv->mmcaddr, mode);
 		memset(&priv->mmc, 0, sizeof(struct stmmac_counters));
 	} else
-		pr_info(" No MAC Management Counters available\n");
+		netdev_info(priv->dev, "No MAC Management Counters available\n");
 }
 
 /**
@@ -1510,18 +1506,18 @@ static void stmmac_mmc_setup(struct stmmac_priv *priv)
 static void stmmac_selec_desc_mode(struct stmmac_priv *priv)
 {
 	if (priv->plat->enh_desc) {
-		pr_info(" Enhanced/Alternate descriptors\n");
+		dev_info(priv->device, "Enhanced/Alternate descriptors\n");
 
 		/* GMAC older than 3.50 has no extended descriptors */
 		if (priv->synopsys_id >= DWMAC_CORE_3_50) {
-			pr_info("\tEnabled extended descriptors\n");
+			dev_info(priv->device, "Enabled extended descriptors\n");
 			priv->extend_desc = 1;
 		} else
-			pr_warn("Extended descriptors not supported\n");
+			dev_warn(priv->device, "Extended descriptors not supported\n");
 
 		priv->hw->desc = &enh_desc_ops;
 	} else {
-		pr_info(" Normal descriptors\n");
+		dev_info(priv->device, "Normal descriptors\n");
 		priv->hw->desc = &ndesc_ops;
 	}
 }
@@ -1562,8 +1558,8 @@ static void stmmac_check_ether_addr(struct stmmac_priv *priv)
 					     priv->dev->dev_addr, 0);
 		if (!is_valid_ether_addr(priv->dev->dev_addr))
 			eth_hw_addr_random(priv->dev);
-		pr_info("%s: device MAC address %pM\n", priv->dev->name,
-			priv->dev->dev_addr);
+		netdev_info(priv->dev, "device MAC address %pM\n",
+			    priv->dev->dev_addr);
 	}
 }
 
@@ -1577,16 +1573,12 @@ static void stmmac_check_ether_addr(struct stmmac_priv *priv)
  */
 static int stmmac_init_dma_engine(struct stmmac_priv *priv)
 {
-	int pbl = DEFAULT_DMA_PBL, fixed_burst = 0, aal = 0;
-	int mixed_burst = 0;
 	int atds = 0;
 	int ret = 0;
 
-	if (priv->plat->dma_cfg) {
-		pbl = priv->plat->dma_cfg->pbl;
-		fixed_burst = priv->plat->dma_cfg->fixed_burst;
-		mixed_burst = priv->plat->dma_cfg->mixed_burst;
-		aal = priv->plat->dma_cfg->aal;
+	if (!priv->plat->dma_cfg || !priv->plat->dma_cfg->pbl) {
+		dev_err(priv->device, "Invalid DMA configuration\n");
+		return -EINVAL;
 	}
 
 	if (priv->extend_desc && (priv->mode == STMMAC_RING_MODE))
@@ -1598,8 +1590,8 @@ static int stmmac_init_dma_engine(struct stmmac_priv *priv)
 		return ret;
 	}
 
-	priv->hw->dma->init(priv->ioaddr, pbl, fixed_burst, mixed_burst,
-			    aal, priv->dma_tx_phy, priv->dma_rx_phy, atds);
+	priv->hw->dma->init(priv->ioaddr, priv->plat->dma_cfg,
+			    priv->dma_tx_phy, priv->dma_rx_phy, atds);
 
 	if (priv->synopsys_id >= DWMAC_CORE_4_00) {
 		priv->rx_tail_addr = priv->dma_rx_phy +
@@ -1671,7 +1663,8 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
 	/* DMA initialization and SW reset */
 	ret = stmmac_init_dma_engine(priv);
 	if (ret < 0) {
-		pr_err("%s: DMA engine initialization failed\n", __func__);
+		netdev_err(priv->dev, "%s: DMA engine initialization failed\n",
+			   __func__);
 		return ret;
 	}
 
@@ -1700,7 +1693,7 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
 
 	ret = priv->hw->mac->rx_ipc(priv->hw);
 	if (!ret) {
-		pr_warn(" RX IPC Checksum Offload disabled\n");
+		netdev_warn(priv->dev, "RX IPC Checksum Offload disabled\n");
 		priv->plat->rx_coe = STMMAC_RX_COE_NONE;
 		priv->hw->rx_csum = 0;
 	}
@@ -1725,10 +1718,11 @@ static int stmmac_hw_setup(struct net_device *dev, bool init_ptp)
 #ifdef CONFIG_DEBUG_FS
 	ret = stmmac_init_fs(dev);
 	if (ret < 0)
-		pr_warn("%s: failed debugFS registration\n", __func__);
+		netdev_warn(priv->dev, "%s: failed debugFS registration\n",
+			    __func__);
 #endif
 	/* Start the ball rolling... */
-	pr_debug("%s: DMA RX/TX processes started...\n", dev->name);
+	netdev_dbg(priv->dev, "DMA RX/TX processes started...\n");
 	priv->hw->dma->start_tx(priv->ioaddr);
 	priv->hw->dma->start_rx(priv->ioaddr);
 
@@ -1783,8 +1777,9 @@ static int stmmac_open(struct net_device *dev)
 	    priv->hw->pcs != STMMAC_PCS_RTBI) {
 		ret = stmmac_init_phy(dev);
 		if (ret) {
-			pr_err("%s: Cannot attach to PHY (error: %d)\n",
-			       __func__, ret);
+			netdev_err(priv->dev,
+				   "%s: Cannot attach to PHY (error: %d)\n",
+				   __func__, ret);
 			return ret;
 		}
 	}
@@ -1798,33 +1793,36 @@ static int stmmac_open(struct net_device *dev)
 
 	ret = alloc_dma_desc_resources(priv);
 	if (ret < 0) {
-		pr_err("%s: DMA descriptors allocation failed\n", __func__);
+		netdev_err(priv->dev, "%s: DMA descriptors allocation failed\n",
+			   __func__);
 		goto dma_desc_error;
 	}
 
 	ret = init_dma_desc_rings(dev, GFP_KERNEL);
 	if (ret < 0) {
-		pr_err("%s: DMA descriptors initialization failed\n", __func__);
+		netdev_err(priv->dev, "%s: DMA descriptors initialization failed\n",
+			   __func__);
 		goto init_error;
 	}
 
 	ret = stmmac_hw_setup(dev, true);
 	if (ret < 0) {
-		pr_err("%s: Hw setup failed\n", __func__);
+		netdev_err(priv->dev, "%s: Hw setup failed\n", __func__);
 		goto init_error;
 	}
 
 	stmmac_init_tx_coalesce(priv);
 
-	if (priv->phydev)
-		phy_start(priv->phydev);
+	if (dev->phydev)
+		phy_start(dev->phydev);
 
 	/* Request the IRQ lines */
 	ret = request_irq(dev->irq, stmmac_interrupt,
 			  IRQF_SHARED, dev->name, dev);
 	if (unlikely(ret < 0)) {
-		pr_err("%s: ERROR: allocating the IRQ %d (error: %d)\n",
-		       __func__, dev->irq, ret);
+		netdev_err(priv->dev,
+			   "%s: ERROR: allocating the IRQ %d (error: %d)\n",
+			   __func__, dev->irq, ret);
 		goto init_error;
 	}
 
@@ -1833,8 +1831,9 @@ static int stmmac_open(struct net_device *dev)
 		ret = request_irq(priv->wol_irq, stmmac_interrupt,
 				  IRQF_SHARED, dev->name, dev);
 		if (unlikely(ret < 0)) {
-			pr_err("%s: ERROR: allocating the WoL IRQ %d (%d)\n",
-			       __func__, priv->wol_irq, ret);
+			netdev_err(priv->dev,
+				   "%s: ERROR: allocating the WoL IRQ %d (%d)\n",
+				   __func__, priv->wol_irq, ret);
 			goto wolirq_error;
 		}
 	}
@@ -1844,8 +1843,9 @@ static int stmmac_open(struct net_device *dev)
 		ret = request_irq(priv->lpi_irq, stmmac_interrupt, IRQF_SHARED,
 				  dev->name, dev);
 		if (unlikely(ret < 0)) {
-			pr_err("%s: ERROR: allocating the LPI IRQ %d (%d)\n",
-			       __func__, priv->lpi_irq, ret);
+			netdev_err(priv->dev,
+				   "%s: ERROR: allocating the LPI IRQ %d (%d)\n",
+				   __func__, priv->lpi_irq, ret);
 			goto lpiirq_error;
 		}
 	}
@@ -1864,8 +1864,8 @@ wolirq_error:
 init_error:
 	free_dma_desc_resources(priv);
 dma_desc_error:
-	if (priv->phydev)
-		phy_disconnect(priv->phydev);
+	if (dev->phydev)
+		phy_disconnect(dev->phydev);
 
 	return ret;
 }
@@ -1884,10 +1884,9 @@ static int stmmac_release(struct net_device *dev)
 		del_timer_sync(&priv->eee_ctrl_timer);
 
 	/* Stop and disconnect the PHY */
-	if (priv->phydev) {
-		phy_stop(priv->phydev);
-		phy_disconnect(priv->phydev);
-		priv->phydev = NULL;
+	if (dev->phydev) {
+		phy_stop(dev->phydev);
+		phy_disconnect(dev->phydev);
 	}
 
 	netif_stop_queue(dev);
@@ -1947,7 +1946,7 @@ static void stmmac_tso_allocator(struct stmmac_priv *priv, unsigned int des,
 		priv->cur_tx = STMMAC_GET_ENTRY(priv->cur_tx, DMA_TX_SIZE);
 		desc = priv->dma_tx + priv->cur_tx;
 
-		desc->des0 = des + (total_len - tmp_len);
+		desc->des0 = cpu_to_le32(des + (total_len - tmp_len));
 		buff_size = tmp_len >= TSO_MAX_BUFF_SIZE ?
 			    TSO_MAX_BUFF_SIZE : tmp_len;
 
@@ -1998,8 +1997,6 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 	u8 proto_hdr_len;
 	int i;
 
-	spin_lock(&priv->tx_lock);
-
 	/* Compute header lengths */
 	proto_hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
 
@@ -2009,9 +2006,10 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 		if (!netif_queue_stopped(dev)) {
 			netif_stop_queue(dev);
 			/* This is a hard error, log it. */
-			pr_err("%s: Tx Ring full when queue awake\n", __func__);
+			netdev_err(priv->dev,
+				   "%s: Tx Ring full when queue awake\n",
+				   __func__);
 		}
-		spin_unlock(&priv->tx_lock);
 		return NETDEV_TX_BUSY;
 	}
 
@@ -2049,11 +2047,11 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 	priv->tx_skbuff_dma[first_entry].len = skb_headlen(skb);
 	priv->tx_skbuff[first_entry] = skb;
 
-	first->des0 = des;
+	first->des0 = cpu_to_le32(des);
 
 	/* Fill start of payload in buff2 of first descriptor */
 	if (pay_len)
-		first->des1 =  des + proto_hdr_len;
+		first->des1 = cpu_to_le32(des + proto_hdr_len);
 
 	/* If needed take extra descriptors to fill the remaining payload */
 	tmp_pay_len = pay_len - TSO_MAX_BUFF_SIZE;
@@ -2082,8 +2080,8 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 	priv->cur_tx = STMMAC_GET_ENTRY(priv->cur_tx, DMA_TX_SIZE);
 
 	if (unlikely(stmmac_tx_avail(priv) <= (MAX_SKB_FRAGS + 1))) {
-		if (netif_msg_hw(priv))
-			pr_debug("%s: stop transmitted packets\n", __func__);
+		netif_dbg(priv, hw, priv->dev, "%s: stop transmitted packets\n",
+			  __func__);
 		netif_stop_queue(dev);
 	}
 
@@ -2146,11 +2144,9 @@ static netdev_tx_t stmmac_tso_xmit(struct sk_buff *skb, struct net_device *dev)
 	priv->hw->dma->set_tx_tail_ptr(priv->ioaddr, priv->tx_tail_addr,
 				       STMMAC_CHAN0);
 
-	spin_unlock(&priv->tx_lock);
 	return NETDEV_TX_OK;
 
 dma_map_err:
-	spin_unlock(&priv->tx_lock);
 	dev_err(priv->device, "Tx dma map failed\n");
 	dev_kfree_skb(skb);
 	priv->dev->stats.tx_dropped++;
@@ -2182,14 +2178,13 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 			return stmmac_tso_xmit(skb, dev);
 	}
 
-	spin_lock(&priv->tx_lock);
-
 	if (unlikely(stmmac_tx_avail(priv) < nfrags + 1)) {
-		spin_unlock(&priv->tx_lock);
 		if (!netif_queue_stopped(dev)) {
 			netif_stop_queue(dev);
 			/* This is a hard error, log it. */
-			pr_err("%s: Tx Ring full when queue awake\n", __func__);
+			netdev_err(priv->dev,
+				   "%s: Tx Ring full when queue awake\n",
+				   __func__);
 		}
 		return NETDEV_TX_BUSY;
 	}
@@ -2242,13 +2237,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
 		priv->tx_skbuff[entry] = NULL;
 
-		if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00)) {
-			desc->des0 = des;
-			priv->tx_skbuff_dma[entry].buf = desc->des0;
-		} else {
-			desc->des2 = des;
-			priv->tx_skbuff_dma[entry].buf = desc->des2;
-		}
+		priv->tx_skbuff_dma[entry].buf = des;
+		if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00))
+			desc->des0 = cpu_to_le32(des);
+		else
+			desc->des2 = cpu_to_le32(des);
 
 		priv->tx_skbuff_dma[entry].map_as_page = true;
 		priv->tx_skbuff_dma[entry].len = len;
@@ -2266,9 +2259,10 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (netif_msg_pktdata(priv)) {
 		void *tx_head;
 
-		pr_debug("%s: curr=%d dirty=%d f=%d, e=%d, first=%p, nfrags=%d",
-			 __func__, priv->cur_tx, priv->dirty_tx, first_entry,
-			 entry, first, nfrags);
+		netdev_dbg(priv->dev,
+			   "%s: curr=%d dirty=%d f=%d, e=%d, first=%p, nfrags=%d",
+			   __func__, priv->cur_tx, priv->dirty_tx, first_entry,
+			   entry, first, nfrags);
 
 		if (priv->extend_desc)
 			tx_head = (void *)priv->dma_etx;
@@ -2277,13 +2271,13 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 
 		priv->hw->desc->display_ring(tx_head, DMA_TX_SIZE, false);
 
-		pr_debug(">>> frame to be transmitted: ");
+		netdev_dbg(priv->dev, ">>> frame to be transmitted: ");
 		print_pkt(skb->data, skb->len);
 	}
 
 	if (unlikely(stmmac_tx_avail(priv) <= (MAX_SKB_FRAGS + 1))) {
-		if (netif_msg_hw(priv))
-			pr_debug("%s: stop transmitted packets\n", __func__);
+		netif_dbg(priv, hw, priv->dev, "%s: stop transmitted packets\n",
+			  __func__);
 		netif_stop_queue(dev);
 	}
 
@@ -2319,13 +2313,11 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 		if (dma_mapping_error(priv->device, des))
 			goto dma_map_err;
 
-		if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00)) {
-			first->des0 = des;
-			priv->tx_skbuff_dma[first_entry].buf = first->des0;
-		} else {
-			first->des2 = des;
-			priv->tx_skbuff_dma[first_entry].buf = first->des2;
-		}
+		priv->tx_skbuff_dma[first_entry].buf = des;
+		if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00))
+			first->des0 = cpu_to_le32(des);
+		else
+			first->des2 = cpu_to_le32(des);
 
 		priv->tx_skbuff_dma[first_entry].len = nopaged_len;
 		priv->tx_skbuff_dma[first_entry].last_segment = last_segment;
@@ -2357,12 +2349,10 @@ static netdev_tx_t stmmac_xmit(struct sk_buff *skb, struct net_device *dev)
 		priv->hw->dma->set_tx_tail_ptr(priv->ioaddr, priv->tx_tail_addr,
 					       STMMAC_CHAN0);
 
-	spin_unlock(&priv->tx_lock);
 	return NETDEV_TX_OK;
 
 dma_map_err:
-	spin_unlock(&priv->tx_lock);
-	dev_err(priv->device, "Tx dma map failed\n");
+	netdev_err(priv->dev, "Tx DMA map failed\n");
 	dev_kfree_skb(skb);
 	priv->dev->stats.tx_dropped++;
 	return NETDEV_TX_OK;
@@ -2433,16 +2423,16 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv)
 					   DMA_FROM_DEVICE);
 			if (dma_mapping_error(priv->device,
 					      priv->rx_skbuff_dma[entry])) {
-				dev_err(priv->device, "Rx dma map failed\n");
+				netdev_err(priv->dev, "Rx DMA map failed\n");
 				dev_kfree_skb(skb);
 				break;
 			}
 
 			if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00)) {
-				p->des0 = priv->rx_skbuff_dma[entry];
+				p->des0 = cpu_to_le32(priv->rx_skbuff_dma[entry]);
 				p->des1 = 0;
 			} else {
-				p->des2 = priv->rx_skbuff_dma[entry];
+				p->des2 = cpu_to_le32(priv->rx_skbuff_dma[entry]);
 			}
 			if (priv->hw->mode->refill_desc3)
 				priv->hw->mode->refill_desc3(priv, p);
@@ -2450,8 +2440,8 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv)
 			if (priv->rx_zeroc_thresh > 0)
 				priv->rx_zeroc_thresh--;
 
-			if (netif_msg_rx_status(priv))
-				pr_debug("\trefill entry #%d\n", entry);
+			netif_dbg(priv, rx_status, priv->dev,
+				  "refill entry #%d\n", entry);
 		}
 		wmb();
 
@@ -2484,7 +2474,7 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
 	if (netif_msg_rx_status(priv)) {
 		void *rx_head;
 
-		pr_info(">>>>>> %s: descriptor ring:\n", __func__);
+		netdev_dbg(priv->dev, "%s: descriptor ring:\n", __func__);
 		if (priv->extend_desc)
 			rx_head = (void *)priv->dma_erx;
 		else
@@ -2546,9 +2536,9 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
 			unsigned int des;
 
 			if (unlikely(priv->synopsys_id >= DWMAC_CORE_4_00))
-				des = p->des0;
+				des = le32_to_cpu(p->des0);
 			else
-				des = p->des2;
+				des = le32_to_cpu(p->des2);
 
 			frame_len = priv->hw->desc->get_rx_frame_len(p, coe);
 
@@ -2557,9 +2547,9 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
 			 *  ignored
 			 */
 			if (frame_len > priv->dma_buf_sz) {
-				pr_err("%s: len %d larger than size (%d)\n",
-				       priv->dev->name, frame_len,
-				       priv->dma_buf_sz);
+				netdev_err(priv->dev,
+					   "len %d larger than size (%d)\n",
+					   frame_len, priv->dma_buf_sz);
 				priv->dev->stats.rx_length_errors++;
 				break;
 			}
@@ -2571,11 +2561,11 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
 				frame_len -= ETH_FCS_LEN;
 
 			if (netif_msg_rx_status(priv)) {
-				pr_info("\tdesc: %p [entry %d] buff=0x%x\n",
-					p, entry, des);
+				netdev_dbg(priv->dev, "\tdesc: %p [entry %d] buff=0x%x\n",
+					   p, entry, des);
 				if (frame_len > ETH_FRAME_LEN)
-					pr_debug("\tframe size %d, COE: %d\n",
-						 frame_len, status);
+					netdev_dbg(priv->dev, "frame size %d, COE: %d\n",
+						   frame_len, status);
 			}
 
 			/* The zero-copy is always used for all the sizes
@@ -2612,8 +2602,9 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
 			} else {
 				skb = priv->rx_skbuff[entry];
 				if (unlikely(!skb)) {
-					pr_err("%s: Inconsistent Rx chain\n",
-					       priv->dev->name);
+					netdev_err(priv->dev,
+						   "%s: Inconsistent Rx chain\n",
+						   priv->dev->name);
 					priv->dev->stats.rx_dropped++;
 					break;
 				}
@@ -2629,7 +2620,8 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
 			}
 
 			if (netif_msg_pktdata(priv)) {
-				pr_debug("frame received (%dbytes)", frame_len);
+				netdev_dbg(priv->dev, "frame received (%dbytes)",
+					   frame_len);
 				print_pkt(skb->data, frame_len);
 			}
 
@@ -2729,26 +2721,12 @@ static void stmmac_set_rx_mode(struct net_device *dev)
 static int stmmac_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct stmmac_priv *priv = netdev_priv(dev);
-	int max_mtu;
 
 	if (netif_running(dev)) {
-		pr_err("%s: must be stopped to change its MTU\n", dev->name);
+		netdev_err(priv->dev, "must be stopped to change its MTU\n");
 		return -EBUSY;
 	}
 
-	if ((priv->plat->enh_desc) || (priv->synopsys_id >= DWMAC_CORE_4_00))
-		max_mtu = JUMBO_LEN;
-	else
-		max_mtu = SKB_MAX_HEAD(NET_SKB_PAD + NET_IP_ALIGN);
-
-	if (priv->plat->maxmtu < max_mtu)
-		max_mtu = priv->plat->maxmtu;
-
-	if ((new_mtu < 46) || (new_mtu > max_mtu)) {
-		pr_err("%s: invalid MTU, max MTU is: %d\n", dev->name, max_mtu);
-		return -EINVAL;
-	}
-
 	dev->mtu = new_mtu;
 
 	netdev_update_features(dev);
@@ -2824,7 +2802,7 @@ static irqreturn_t stmmac_interrupt(int irq, void *dev_id)
 		pm_wakeup_event(priv->device, 0);
 
 	if (unlikely(!dev)) {
-		pr_err("%s: invalid dev pointer\n", __func__);
+		netdev_err(priv->dev, "%s: invalid dev pointer\n", __func__);
 		return IRQ_NONE;
 	}
 
@@ -2882,7 +2860,6 @@ static void stmmac_poll_controller(struct net_device *dev)
  */
 static int stmmac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 {
-	struct stmmac_priv *priv = netdev_priv(dev);
 	int ret = -EOPNOTSUPP;
 
 	if (!netif_running(dev))
@@ -2892,9 +2869,9 @@ static int stmmac_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	case SIOCGMIIPHY:
 	case SIOCGMIIREG:
 	case SIOCSMIIREG:
-		if (!priv->phydev)
+		if (!dev->phydev)
 			return -EINVAL;
-		ret = phy_mii_ioctl(priv->phydev, rq, cmd);
+		ret = phy_mii_ioctl(dev->phydev, rq, cmd);
 		break;
 	case SIOCSHWTSTAMP:
 		ret = stmmac_hwtstamp_ioctl(dev, rq);
@@ -2922,14 +2899,17 @@ static void sysfs_display_ring(void *head, int size, int extend_desc,
 			x = *(u64 *) ep;
 			seq_printf(seq, "%d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n",
 				   i, (unsigned int)virt_to_phys(ep),
-				   ep->basic.des0, ep->basic.des1,
-				   ep->basic.des2, ep->basic.des3);
+				   le32_to_cpu(ep->basic.des0),
+				   le32_to_cpu(ep->basic.des1),
+				   le32_to_cpu(ep->basic.des2),
+				   le32_to_cpu(ep->basic.des3));
 			ep++;
 		} else {
 			x = *(u64 *) p;
 			seq_printf(seq, "%d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n",
 				   i, (unsigned int)virt_to_phys(ep),
-				   p->des0, p->des1, p->des2, p->des3);
+				   le32_to_cpu(p->des0), le32_to_cpu(p->des1),
+				   le32_to_cpu(p->des2), le32_to_cpu(p->des3));
 			p++;
 		}
 		seq_printf(seq, "\n");
@@ -2961,6 +2941,8 @@ static int stmmac_sysfs_ring_open(struct inode *inode, struct file *file)
 	return single_open(file, stmmac_sysfs_ring_read, inode->i_private);
 }
 
+/* Debugfs files, should appear in /sys/kernel/debug/stmmaceth/eth0 */
+
 static const struct file_operations stmmac_rings_status_fops = {
 	.owner = THIS_MODULE,
 	.open = stmmac_sysfs_ring_open,
@@ -2983,11 +2965,11 @@ static int stmmac_sysfs_dma_cap_read(struct seq_file *seq, void *v)
 	seq_printf(seq, "\tDMA HW features\n");
 	seq_printf(seq, "==============================\n");
 
-	seq_printf(seq, "\t10/100 Mbps %s\n",
+	seq_printf(seq, "\t10/100 Mbps: %s\n",
 		   (priv->dma_cap.mbps_10_100) ? "Y" : "N");
-	seq_printf(seq, "\t1000 Mbps %s\n",
+	seq_printf(seq, "\t1000 Mbps: %s\n",
 		   (priv->dma_cap.mbps_1000) ? "Y" : "N");
-	seq_printf(seq, "\tHalf duple %s\n",
+	seq_printf(seq, "\tHalf duplex: %s\n",
 		   (priv->dma_cap.half_duplex) ? "Y" : "N");
 	seq_printf(seq, "\tHash Filter: %s\n",
 		   (priv->dma_cap.hash_filter) ? "Y" : "N");
@@ -3005,9 +2987,9 @@ static int stmmac_sysfs_dma_cap_read(struct seq_file *seq, void *v)
 		   (priv->dma_cap.rmon) ? "Y" : "N");
 	seq_printf(seq, "\tIEEE 1588-2002 Time Stamp: %s\n",
 		   (priv->dma_cap.time_stamp) ? "Y" : "N");
-	seq_printf(seq, "\tIEEE 1588-2008 Advanced Time Stamp:%s\n",
+	seq_printf(seq, "\tIEEE 1588-2008 Advanced Time Stamp: %s\n",
 		   (priv->dma_cap.atime_stamp) ? "Y" : "N");
-	seq_printf(seq, "\t802.3az - Energy-Efficient Ethernet (EEE) %s\n",
+	seq_printf(seq, "\t802.3az - Energy-Efficient Ethernet (EEE): %s\n",
 		   (priv->dma_cap.eee) ? "Y" : "N");
 	seq_printf(seq, "\tAV features: %s\n", (priv->dma_cap.av) ? "Y" : "N");
 	seq_printf(seq, "\tChecksum Offload in TX: %s\n",
@@ -3054,8 +3036,7 @@ static int stmmac_init_fs(struct net_device *dev)
 	priv->dbgfs_dir = debugfs_create_dir(dev->name, stmmac_fs_dir);
 
 	if (!priv->dbgfs_dir || IS_ERR(priv->dbgfs_dir)) {
-		pr_err("ERROR %s/%s, debugfs create directory failed\n",
-		       STMMAC_RESOURCE_NAME, dev->name);
+		netdev_err(priv->dev, "ERROR failed to create debugfs directory\n");
 
 		return -ENOMEM;
 	}
@@ -3067,7 +3048,7 @@ static int stmmac_init_fs(struct net_device *dev)
 				    &stmmac_rings_status_fops);
 
 	if (!priv->dbgfs_rings_status || IS_ERR(priv->dbgfs_rings_status)) {
-		pr_info("ERROR creating stmmac ring debugfs file\n");
+		netdev_err(priv->dev, "ERROR creating stmmac ring debugfs file\n");
 		debugfs_remove_recursive(priv->dbgfs_dir);
 
 		return -ENOMEM;
@@ -3079,7 +3060,7 @@ static int stmmac_init_fs(struct net_device *dev)
 					    dev, &stmmac_dma_cap_fops);
 
 	if (!priv->dbgfs_dma_cap || IS_ERR(priv->dbgfs_dma_cap)) {
-		pr_info("ERROR creating stmmac MMC debugfs file\n");
+		netdev_err(priv->dev, "ERROR creating stmmac MMC debugfs file\n");
 		debugfs_remove_recursive(priv->dbgfs_dir);
 
 		return -ENOMEM;
@@ -3151,11 +3132,11 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
 	} else {
 		if (chain_mode) {
 			priv->hw->mode = &chain_mode_ops;
-			pr_info(" Chain mode enabled\n");
+			dev_info(priv->device, "Chain mode enabled\n");
 			priv->mode = STMMAC_CHAIN_MODE;
 		} else {
 			priv->hw->mode = &ring_mode_ops;
-			pr_info(" Ring mode enabled\n");
+			dev_info(priv->device, "Ring mode enabled\n");
 			priv->mode = STMMAC_RING_MODE;
 		}
 	}
@@ -3163,7 +3144,7 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
 	/* Get the HW capability (new GMAC newer than 3.50a) */
 	priv->hw_cap_support = stmmac_get_hw_features(priv);
 	if (priv->hw_cap_support) {
-		pr_info(" DMA HW capability register supported");
+		dev_info(priv->device, "DMA HW capability register supported\n");
 
 		/* We can override some gmac/dma configuration fields: e.g.
 		 * enh_desc, tx_coe (e.g. that are passed through the
@@ -3188,8 +3169,9 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
 		else if (priv->dma_cap.rx_coe_type1)
 			priv->plat->rx_coe = STMMAC_RX_COE_TYPE1;
 
-	} else
-		pr_info(" No HW DMA feature register supported");
+	} else {
+		dev_info(priv->device, "No HW DMA feature register supported\n");
+	}
 
 	/* To use alternate (extended), normal or GMAC4 descriptor structures */
 	if (priv->synopsys_id >= DWMAC_CORE_4_00)
@@ -3199,20 +3181,20 @@ static int stmmac_hw_init(struct stmmac_priv *priv)
 
 	if (priv->plat->rx_coe) {
 		priv->hw->rx_csum = priv->plat->rx_coe;
-		pr_info(" RX Checksum Offload Engine supported\n");
+		dev_info(priv->device, "RX Checksum Offload Engine supported\n");
 		if (priv->synopsys_id < DWMAC_CORE_4_00)
-			pr_info("\tCOE Type %d\n", priv->hw->rx_csum);
+			dev_info(priv->device, "COE Type %d\n", priv->hw->rx_csum);
 	}
 	if (priv->plat->tx_coe)
-		pr_info(" TX Checksum insertion supported\n");
+		dev_info(priv->device, "TX Checksum insertion supported\n");
 
 	if (priv->plat->pmt) {
-		pr_info(" Wake-Up On Lan supported\n");
+		dev_info(priv->device, "Wake-Up On Lan supported\n");
 		device_set_wakeup_capable(priv->device, 1);
 	}
 
 	if (priv->dma_cap.tsoen)
-		pr_info(" TSO supported\n");
+		dev_info(priv->device, "TSO supported\n");
 
 	return 0;
 }
@@ -3271,8 +3253,8 @@ int stmmac_dvr_probe(struct device *device,
 
 	priv->stmmac_clk = devm_clk_get(priv->device, STMMAC_RESOURCE_NAME);
 	if (IS_ERR(priv->stmmac_clk)) {
-		dev_warn(priv->device, "%s: warning: cannot get CSR clock\n",
-			 __func__);
+		netdev_warn(priv->dev, "%s: warning: cannot get CSR clock\n",
+			    __func__);
 		/* If failed to obtain stmmac_clk and specific clk_csr value
 		 * is NOT passed from the platform, probe fail.
 		 */
@@ -3321,7 +3303,7 @@ int stmmac_dvr_probe(struct device *device,
 	if ((priv->plat->tso_en) && (priv->dma_cap.tsoen)) {
 		ndev->hw_features |= NETIF_F_TSO;
 		priv->tso = true;
-		pr_info(" TSO feature enabled\n");
+		dev_info(priv->device, "TSO feature enabled\n");
 	}
 	ndev->features |= ndev->hw_features | NETIF_F_HIGHDMA;
 	ndev->watchdog_timeo = msecs_to_jiffies(watchdog);
@@ -3331,6 +3313,15 @@ int stmmac_dvr_probe(struct device *device,
 #endif
 	priv->msg_enable = netif_msg_init(debug, default_msg_level);
 
+	/* MTU range: 46 - hw-specific max */
+	ndev->min_mtu = ETH_ZLEN - ETH_HLEN;
+	if ((priv->plat->enh_desc) || (priv->synopsys_id >= DWMAC_CORE_4_00))
+		ndev->max_mtu = JUMBO_LEN;
+	else
+		ndev->max_mtu = SKB_MAX_HEAD(NET_SKB_PAD + NET_IP_ALIGN);
+	if (priv->plat->maxmtu < ndev->max_mtu)
+		ndev->max_mtu = priv->plat->maxmtu;
+
 	if (flow_ctrl)
 		priv->flow_ctrl = FLOW_AUTO;	/* RX/TX pause on */
 
@@ -3341,17 +3332,17 @@ int stmmac_dvr_probe(struct device *device,
 	 */
 	if ((priv->synopsys_id >= DWMAC_CORE_3_50) && (!priv->plat->riwt_off)) {
 		priv->use_riwt = 1;
-		pr_info(" Enable RX Mitigation via HW Watchdog Timer\n");
+		netdev_info(priv->dev, "Enable RX Mitigation via HW Watchdog Timer\n");
 	}
 
 	netif_napi_add(ndev, &priv->napi, stmmac_poll, 64);
 
 	spin_lock_init(&priv->lock);
-	spin_lock_init(&priv->tx_lock);
 
 	ret = register_netdev(ndev);
 	if (ret) {
-		pr_err("%s: ERROR %i registering the device\n", __func__, ret);
+		netdev_err(priv->dev, "%s: ERROR %i registering the device\n",
+			   __func__, ret);
 		goto error_netdev_register;
 	}
 
@@ -3374,8 +3365,9 @@ int stmmac_dvr_probe(struct device *device,
 		/* MDIO bus Registration */
 		ret = stmmac_mdio_register(ndev);
 		if (ret < 0) {
-			pr_debug("%s: MDIO bus (id: %d) registration failed",
-				 __func__, priv->plat->bus_id);
+			netdev_err(priv->dev,
+				   "%s: MDIO bus (id: %d) registration failed",
+				   __func__, priv->plat->bus_id);
 			goto error_mdio_register;
 		}
 	}
@@ -3408,7 +3400,7 @@ int stmmac_dvr_remove(struct device *dev)
 	struct net_device *ndev = dev_get_drvdata(dev);
 	struct stmmac_priv *priv = netdev_priv(ndev);
 
-	pr_info("%s:\n\tremoving driver", __func__);
+	netdev_info(priv->dev, "%s: removing driver", __func__);
 
 	priv->hw->dma->stop_rx(priv->ioaddr);
 	priv->hw->dma->stop_tx(priv->ioaddr);
@@ -3416,7 +3408,6 @@ int stmmac_dvr_remove(struct device *dev)
 	stmmac_set_mac(priv->ioaddr, false);
 	netif_carrier_off(ndev);
 	unregister_netdev(ndev);
-	of_node_put(priv->plat->phy_node);
 	if (priv->stmmac_rst)
 		reset_control_assert(priv->stmmac_rst);
 	clk_disable_unprepare(priv->pclk);
@@ -3447,8 +3438,8 @@ int stmmac_suspend(struct device *dev)
 	if (!ndev || !netif_running(ndev))
 		return 0;
 
-	if (priv->phydev)
-		phy_stop(priv->phydev);
+	if (ndev->phydev)
+		phy_stop(ndev->phydev);
 
 	spin_lock_irqsave(&priv->lock, flags);
 
@@ -3542,8 +3533,8 @@ int stmmac_resume(struct device *dev)
 
 	spin_unlock_irqrestore(&priv->lock, flags);
 
-	if (priv->phydev)
-		phy_start(priv->phydev);
+	if (ndev->phydev)
+		phy_start(ndev->phydev);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
index ec295851812b..23322fd9e3ac 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_mdio.c
@@ -42,13 +42,6 @@
 #define MII_GMAC4_WRITE			(1 << MII_GMAC4_GOC_SHIFT)
 #define MII_GMAC4_READ			(3 << MII_GMAC4_GOC_SHIFT)
 
-#define MII_PHY_ADDR_GMAC4_SHIFT	21
-#define MII_PHY_ADDR_GMAC4_MASK		GENMASK(25, 21)
-#define MII_PHY_REG_GMAC4_SHIFT		16
-#define MII_PHY_REG_GMAC4_MASK		GENMASK(20, 16)
-#define MII_CSR_CLK_GMAC4_SHIFT		8
-#define MII_CSR_CLK_GMAC4_MASK		GENMASK(11, 8)
-
 static int stmmac_mdio_busy_wait(void __iomem *ioaddr, unsigned int mii_addr)
 {
 	unsigned long curr;
@@ -68,8 +61,8 @@ static int stmmac_mdio_busy_wait(void __iomem *ioaddr, unsigned int mii_addr)
 /**
  * stmmac_mdio_read
  * @bus: points to the mii_bus structure
- * @phyaddr: MII addr reg bits 15-11
- * @phyreg: MII addr reg bits 10-6
+ * @phyaddr: MII addr
+ * @phyreg: MII reg
  * Description: it reads data from the MII register from within the phy device.
  * For the 7111 GMAC, we must set the bit 0 in the MII address register while
  * accessing the PHY registers.
@@ -83,14 +76,20 @@ static int stmmac_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg)
 	unsigned int mii_data = priv->hw->mii.data;
 
 	int data;
-	u16 regValue = (((phyaddr << 11) & (0x0000F800)) |
-			((phyreg << 6) & (0x000007C0)));
-	regValue |= MII_BUSY | ((priv->clk_csr & 0xF) << 2);
+	u32 value = MII_BUSY;
+
+	value |= (phyaddr << priv->hw->mii.addr_shift)
+		& priv->hw->mii.addr_mask;
+	value |= (phyreg << priv->hw->mii.reg_shift) & priv->hw->mii.reg_mask;
+	value |= (priv->clk_csr & priv->hw->mii.clk_csr_mask)
+		<< priv->hw->mii.clk_csr_shift;
+	if (priv->plat->has_gmac4)
+		value |= MII_GMAC4_READ;
 
 	if (stmmac_mdio_busy_wait(priv->ioaddr, mii_address))
 		return -EBUSY;
 
-	writel(regValue, priv->ioaddr + mii_address);
+	writel(value, priv->ioaddr + mii_address);
 
 	if (stmmac_mdio_busy_wait(priv->ioaddr, mii_address))
 		return -EBUSY;
@@ -104,8 +103,8 @@ static int stmmac_mdio_read(struct mii_bus *bus, int phyaddr, int phyreg)
 /**
  * stmmac_mdio_write
  * @bus: points to the mii_bus structure
- * @phyaddr: MII addr reg bits 15-11
- * @phyreg: MII addr reg bits 10-6
+ * @phyaddr: MII addr
+ * @phyreg: MII reg
  * @phydata: phy data
  * Description: it writes the data into the MII register from within the device.
  */
@@ -117,85 +116,16 @@ static int stmmac_mdio_write(struct mii_bus *bus, int phyaddr, int phyreg,
 	unsigned int mii_address = priv->hw->mii.addr;
 	unsigned int mii_data = priv->hw->mii.data;
 
-	u16 value =
-	    (((phyaddr << 11) & (0x0000F800)) | ((phyreg << 6) & (0x000007C0)))
-	    | MII_WRITE;
-
-	value |= MII_BUSY | ((priv->clk_csr & 0xF) << 2);
-
-	/* Wait until any existing MII operation is complete */
-	if (stmmac_mdio_busy_wait(priv->ioaddr, mii_address))
-		return -EBUSY;
-
-	/* Set the MII address register to write */
-	writel(phydata, priv->ioaddr + mii_data);
-	writel(value, priv->ioaddr + mii_address);
-
-	/* Wait until any existing MII operation is complete */
-	return stmmac_mdio_busy_wait(priv->ioaddr, mii_address);
-}
-
-/**
- * stmmac_mdio_read_gmac4
- * @bus: points to the mii_bus structure
- * @phyaddr: MII addr reg bits 25-21
- * @phyreg: MII addr reg bits 20-16
- * Description: it reads data from the MII register of GMAC4 from within
- * the phy device.
- */
-static int stmmac_mdio_read_gmac4(struct mii_bus *bus, int phyaddr, int phyreg)
-{
-	struct net_device *ndev = bus->priv;
-	struct stmmac_priv *priv = netdev_priv(ndev);
-	unsigned int mii_address = priv->hw->mii.addr;
-	unsigned int mii_data = priv->hw->mii.data;
-	int data;
-	u32 value = (((phyaddr << MII_PHY_ADDR_GMAC4_SHIFT) &
-		     (MII_PHY_ADDR_GMAC4_MASK)) |
-		     ((phyreg << MII_PHY_REG_GMAC4_SHIFT) &
-		     (MII_PHY_REG_GMAC4_MASK))) | MII_GMAC4_READ;
-
-	value |= MII_BUSY | ((priv->clk_csr & MII_CSR_CLK_GMAC4_MASK)
-		 << MII_CSR_CLK_GMAC4_SHIFT);
-
-	if (stmmac_mdio_busy_wait(priv->ioaddr, mii_address))
-		return -EBUSY;
-
-	writel(value, priv->ioaddr + mii_address);
-
-	if (stmmac_mdio_busy_wait(priv->ioaddr, mii_address))
-		return -EBUSY;
-
-	/* Read the data from the MII data register */
-	data = (int)readl(priv->ioaddr + mii_data);
-
-	return data;
-}
+	u32 value = MII_WRITE | MII_BUSY;
 
-/**
- * stmmac_mdio_write_gmac4
- * @bus: points to the mii_bus structure
- * @phyaddr: MII addr reg bits 25-21
- * @phyreg: MII addr reg bits 20-16
- * @phydata: phy data
- * Description: it writes the data into the MII register of GMAC4 from within
- * the device.
- */
-static int stmmac_mdio_write_gmac4(struct mii_bus *bus, int phyaddr, int phyreg,
-				   u16 phydata)
-{
-	struct net_device *ndev = bus->priv;
-	struct stmmac_priv *priv = netdev_priv(ndev);
-	unsigned int mii_address = priv->hw->mii.addr;
-	unsigned int mii_data = priv->hw->mii.data;
-
-	u32 value = (((phyaddr << MII_PHY_ADDR_GMAC4_SHIFT) &
-		     (MII_PHY_ADDR_GMAC4_MASK)) |
-		     ((phyreg << MII_PHY_REG_GMAC4_SHIFT) &
-		     (MII_PHY_REG_GMAC4_MASK))) | MII_GMAC4_WRITE;
+	value |= (phyaddr << priv->hw->mii.addr_shift)
+		& priv->hw->mii.addr_mask;
+	value |= (phyreg << priv->hw->mii.reg_shift) & priv->hw->mii.reg_mask;
 
-	value |= MII_BUSY | ((priv->clk_csr & MII_CSR_CLK_GMAC4_MASK)
-		 << MII_CSR_CLK_GMAC4_SHIFT);
+	value |= ((priv->clk_csr & priv->hw->mii.clk_csr_mask)
+		<< priv->hw->mii.clk_csr_shift);
+	if (priv->plat->has_gmac4)
+		value |= MII_GMAC4_WRITE;
 
 	/* Wait until any existing MII operation is complete */
 	if (stmmac_mdio_busy_wait(priv->ioaddr, mii_address))
@@ -260,7 +190,7 @@ int stmmac_mdio_reset(struct mii_bus *bus)
 #endif
 
 	if (data->phy_reset) {
-		pr_debug("stmmac_mdio_reset: calling phy_reset\n");
+		netdev_dbg(ndev, "stmmac_mdio_reset: calling phy_reset\n");
 		data->phy_reset(priv->plat->bsp_priv);
 	}
 
@@ -305,13 +235,8 @@ int stmmac_mdio_register(struct net_device *ndev)
 #endif
 
 	new_bus->name = "stmmac";
-	if (priv->plat->has_gmac4) {
-		new_bus->read = &stmmac_mdio_read_gmac4;
-		new_bus->write = &stmmac_mdio_write_gmac4;
-	} else {
-		new_bus->read = &stmmac_mdio_read;
-		new_bus->write = &stmmac_mdio_write;
-	}
+	new_bus->read = &stmmac_mdio_read;
+	new_bus->write = &stmmac_mdio_write;
 
 	new_bus->reset = &stmmac_mdio_reset;
 	snprintf(new_bus->id, MII_BUS_ID_SIZE, "%s-%x",
@@ -325,7 +250,7 @@ int stmmac_mdio_register(struct net_device *ndev)
 	else
 		err = mdiobus_register(new_bus);
 	if (err != 0) {
-		pr_err("%s: Cannot register as MDIO bus\n", new_bus->name);
+		netdev_err(ndev, "Cannot register the MDIO bus\n");
 		goto bus_register_fail;
 	}
 
@@ -372,16 +297,16 @@ int stmmac_mdio_register(struct net_device *ndev)
 				irq_str = irq_num;
 				break;
 			}
-			pr_info("%s: PHY ID %08x at %d IRQ %s (%s)%s\n",
-				ndev->name, phydev->phy_id, addr,
-				irq_str, phydev_name(phydev),
-				act ? " active" : "");
+			netdev_info(ndev, "PHY ID %08x at %d IRQ %s (%s)%s\n",
+				    phydev->phy_id, addr,
+				    irq_str, phydev_name(phydev),
+				    act ? " active" : "");
 			found = 1;
 		}
 	}
 
 	if (!found && !mdio_node) {
-		pr_warn("%s: No PHY found\n", ndev->name);
+		netdev_warn(ndev, "No PHY found\n");
 		mdiobus_unregister(new_bus);
 		mdiobus_free(new_bus);
 		return -ENODEV;
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
index 56c8a2342c14..a2831773431a 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -81,6 +81,7 @@ static void stmmac_default_data(struct plat_stmmacenet_data *plat)
 	plat->mdio_bus_data->phy_mask = 0;
 
 	plat->dma_cfg->pbl = 32;
+	plat->dma_cfg->pblx8 = true;
 	/* TODO: AXI */
 
 	/* Set default value for multicast hash bins */
@@ -115,6 +116,7 @@ static int quark_default_data(struct plat_stmmacenet_data *plat,
 	plat->mdio_bus_data->phy_mask = 0;
 
 	plat->dma_cfg->pbl = 16;
+	plat->dma_cfg->pblx8 = true;
 	plat->dma_cfg->fixed_burst = 1;
 	/* AXI (TODO) */
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index 0a0d6a86f397..082cd48db6a7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -126,8 +126,10 @@ static struct stmmac_axi *stmmac_axi_setup(struct platform_device *pdev)
 	axi->axi_mb = of_property_read_bool(np, "snps,axi_mb");
 	axi->axi_rb =  of_property_read_bool(np, "snps,axi_rb");
 
-	of_property_read_u32(np, "snps,wr_osr_lmt", &axi->axi_wr_osr_lmt);
-	of_property_read_u32(np, "snps,rd_osr_lmt", &axi->axi_rd_osr_lmt);
+	if (of_property_read_u32(np, "snps,wr_osr_lmt", &axi->axi_wr_osr_lmt))
+		axi->axi_wr_osr_lmt = 1;
+	if (of_property_read_u32(np, "snps,rd_osr_lmt", &axi->axi_rd_osr_lmt))
+		axi->axi_rd_osr_lmt = 1;
 	of_property_read_u32_array(np, "snps,blen", axi->axi_blen, AXI_BLEN);
 	of_node_put(np);
 
@@ -200,7 +202,6 @@ static int stmmac_dt_phy(struct plat_stmmacenet_data *plat,
 /**
  * stmmac_probe_config_dt - parse device-tree driver parameters
  * @pdev: platform_device structure
- * @plat: driver data platform structure
  * @mac: MAC address to use
  * Description:
  * this function is to read the driver parameters from device-tree and
@@ -291,6 +292,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
 	if (of_device_is_compatible(np, "snps,dwmac-4.00") ||
 	    of_device_is_compatible(np, "snps,dwmac-4.10a")) {
 		plat->has_gmac4 = 1;
+		plat->has_gmac = 0;
 		plat->pmt = 1;
 		plat->tso_en = of_property_read_bool(np, "snps,tso");
 	}
@@ -302,21 +304,25 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
 		plat->force_sf_dma_mode = 1;
 	}
 
-	if (of_find_property(np, "snps,pbl", NULL)) {
-		dma_cfg = devm_kzalloc(&pdev->dev, sizeof(*dma_cfg),
-				       GFP_KERNEL);
-		if (!dma_cfg) {
-			of_node_put(plat->phy_node);
-			return ERR_PTR(-ENOMEM);
-		}
-		plat->dma_cfg = dma_cfg;
-		of_property_read_u32(np, "snps,pbl", &dma_cfg->pbl);
-		dma_cfg->aal = of_property_read_bool(np, "snps,aal");
-		dma_cfg->fixed_burst =
-			of_property_read_bool(np, "snps,fixed-burst");
-		dma_cfg->mixed_burst =
-			of_property_read_bool(np, "snps,mixed-burst");
+	dma_cfg = devm_kzalloc(&pdev->dev, sizeof(*dma_cfg),
+			       GFP_KERNEL);
+	if (!dma_cfg) {
+		stmmac_remove_config_dt(pdev, plat);
+		return ERR_PTR(-ENOMEM);
 	}
+	plat->dma_cfg = dma_cfg;
+
+	of_property_read_u32(np, "snps,pbl", &dma_cfg->pbl);
+	if (!dma_cfg->pbl)
+		dma_cfg->pbl = DEFAULT_DMA_PBL;
+	of_property_read_u32(np, "snps,txpbl", &dma_cfg->txpbl);
+	of_property_read_u32(np, "snps,rxpbl", &dma_cfg->rxpbl);
+	dma_cfg->pblx8 = !of_property_read_bool(np, "snps,no-pbl-x8");
+
+	dma_cfg->aal = of_property_read_bool(np, "snps,aal");
+	dma_cfg->fixed_burst = of_property_read_bool(np, "snps,fixed-burst");
+	dma_cfg->mixed_burst = of_property_read_bool(np, "snps,mixed-burst");
+
 	plat->force_thresh_dma_mode = of_property_read_bool(np, "snps,force_thresh_dma_mode");
 	if (plat->force_thresh_dma_mode) {
 		plat->force_sf_dma_mode = 0;
@@ -329,14 +335,37 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
 
 	return plat;
 }
+
+/**
+ * stmmac_remove_config_dt - undo the effects of stmmac_probe_config_dt()
+ * @pdev: platform_device structure
+ * @plat: driver data platform structure
+ *
+ * Release resources claimed by stmmac_probe_config_dt().
+ */
+void stmmac_remove_config_dt(struct platform_device *pdev,
+			     struct plat_stmmacenet_data *plat)
+{
+	struct device_node *np = pdev->dev.of_node;
+
+	if (of_phy_is_fixed_link(np))
+		of_phy_deregister_fixed_link(np);
+	of_node_put(plat->phy_node);
+}
 #else
 struct plat_stmmacenet_data *
 stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
 {
 	return ERR_PTR(-ENOSYS);
 }
+
+void stmmac_remove_config_dt(struct platform_device *pdev,
+			     struct plat_stmmacenet_data *plat)
+{
+}
 #endif /* CONFIG_OF */
 EXPORT_SYMBOL_GPL(stmmac_probe_config_dt);
+EXPORT_SYMBOL_GPL(stmmac_remove_config_dt);
 
 int stmmac_get_platform_resources(struct platform_device *pdev,
 				  struct stmmac_resources *stmmac_res)
@@ -392,10 +421,13 @@ int stmmac_pltfr_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct stmmac_priv *priv = netdev_priv(ndev);
+	struct plat_stmmacenet_data *plat = priv->plat;
 	int ret = stmmac_dvr_remove(&pdev->dev);
 
-	if (priv->plat->exit)
-		priv->plat->exit(pdev, priv->plat->bsp_priv);
+	if (plat->exit)
+		plat->exit(pdev, plat->bsp_priv);
+
+	stmmac_remove_config_dt(pdev, plat);
 
 	return ret;
 }
@@ -417,9 +449,7 @@ static int stmmac_pltfr_suspend(struct device *dev)
 	struct platform_device *pdev = to_platform_device(dev);
 
 	ret = stmmac_suspend(dev);
-	if (priv->plat->suspend)
-		priv->plat->suspend(pdev, priv->plat->bsp_priv);
-	else if (priv->plat->exit)
+	if (priv->plat->exit)
 		priv->plat->exit(pdev, priv->plat->bsp_priv);
 
 	return ret;
@@ -438,9 +468,7 @@ static int stmmac_pltfr_resume(struct device *dev)
 	struct stmmac_priv *priv = netdev_priv(ndev);
 	struct platform_device *pdev = to_platform_device(dev);
 
-	if (priv->plat->resume)
-		priv->plat->resume(pdev, priv->plat->bsp_priv);
-	else if (priv->plat->init)
+	if (priv->plat->init)
 		priv->plat->init(pdev, priv->plat->bsp_priv);
 
 	return stmmac_resume(dev);
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h
index 64e147f53a9c..b72eb0de57b7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.h
@@ -23,6 +23,8 @@
 
 struct plat_stmmacenet_data *
 stmmac_probe_config_dt(struct platform_device *pdev, const char **mac);
+void stmmac_remove_config_dt(struct platform_device *pdev,
+			     struct plat_stmmacenet_data *plat);
 
 int stmmac_get_platform_resources(struct platform_device *pdev,
 				  struct stmmac_resources *stmmac_res);
diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c
index 062bce9acde6..e9e5ef241c6f 100644
--- a/drivers/net/ethernet/sun/cassini.c
+++ b/drivers/net/ethernet/sun/cassini.c
@@ -3863,9 +3863,6 @@ static int cas_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct cas *cp = netdev_priv(dev);
 
-	if (new_mtu < CAS_MIN_MTU || new_mtu > CAS_MAX_MTU)
-		return -EINVAL;
-
 	dev->mtu = new_mtu;
 	if (!netif_running(dev) || !netif_device_present(dev))
 		return 0;
@@ -5115,6 +5112,10 @@ static int cas_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (pci_using_dac)
 		dev->features |= NETIF_F_HIGHDMA;
 
+	/* MTU range: 60 - varies or 9000 */
+	dev->min_mtu = CAS_MIN_MTU;
+	dev->max_mtu = CAS_MAX_MTU;
+
 	if (register_netdev(dev)) {
 		dev_err(&pdev->dev, "Cannot register net device, aborting\n");
 		goto err_out_free_consistent;
diff --git a/drivers/net/ethernet/sun/ldmvsw.c b/drivers/net/ethernet/sun/ldmvsw.c
index 0ac449acaf5b..335b87660638 100644
--- a/drivers/net/ethernet/sun/ldmvsw.c
+++ b/drivers/net/ethernet/sun/ldmvsw.c
@@ -139,7 +139,6 @@ static const struct net_device_ops vsw_ops = {
 	.ndo_set_mac_address	= sunvnet_set_mac_addr_common,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_tx_timeout		= sunvnet_tx_timeout_common,
-	.ndo_change_mtu		= sunvnet_change_mtu_common,
 	.ndo_start_xmit		= vsw_start_xmit,
 	.ndo_select_queue	= vsw_select_queue,
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -239,6 +238,10 @@ static struct net_device *vsw_alloc_netdev(u8 hwaddr[],
 			   NETIF_F_HW_CSUM | NETIF_F_SG;
 	dev->features = dev->hw_features;
 
+	/* MTU range: 68 - 65535 */
+	dev->min_mtu = ETH_MIN_MTU;
+	dev->max_mtu = VNET_MAX_MTU;
+
 	SET_NETDEV_DEV(dev, &vdev->dev);
 
 	return dev;
diff --git a/drivers/net/ethernet/sun/niu.c b/drivers/net/ethernet/sun/niu.c
index a2371aa14a49..f90d1af6d390 100644
--- a/drivers/net/ethernet/sun/niu.c
+++ b/drivers/net/ethernet/sun/niu.c
@@ -6754,9 +6754,6 @@ static int niu_change_mtu(struct net_device *dev, int new_mtu)
 	struct niu *np = netdev_priv(dev);
 	int err, orig_jumbo, new_jumbo;
 
-	if (new_mtu < 68 || new_mtu > NIU_MAX_MTU)
-		return -EINVAL;
-
 	orig_jumbo = (dev->mtu > ETH_DATA_LEN);
 	new_jumbo = (new_mtu > ETH_DATA_LEN);
 
@@ -9823,6 +9820,10 @@ static int niu_pci_init_one(struct pci_dev *pdev,
 
 	dev->irq = pdev->irq;
 
+	/* MTU range: 68 - 9216 */
+	dev->min_mtu = ETH_MIN_MTU;
+	dev->max_mtu = NIU_MAX_MTU;
+
 	niu_assign_netdev_ops(dev);
 
 	err = niu_get_invariants(np);
diff --git a/drivers/net/ethernet/sun/sunbmac.c b/drivers/net/ethernet/sun/sunbmac.c
index 02f452730d52..c4caf486cbef 100644
--- a/drivers/net/ethernet/sun/sunbmac.c
+++ b/drivers/net/ethernet/sun/sunbmac.c
@@ -1065,7 +1065,6 @@ static const struct net_device_ops bigmac_ops = {
 	.ndo_get_stats		= bigmac_get_stats,
 	.ndo_set_rx_mode	= bigmac_set_multicast,
 	.ndo_tx_timeout		= bigmac_tx_timeout,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 };
diff --git a/drivers/net/ethernet/sun/sungem.c b/drivers/net/ethernet/sun/sungem.c
index d6ad0fbd054e..66ecf0fcc330 100644
--- a/drivers/net/ethernet/sun/sungem.c
+++ b/drivers/net/ethernet/sun/sungem.c
@@ -2476,9 +2476,9 @@ static void gem_set_multicast(struct net_device *dev)
 }
 
 /* Jumbo-grams don't seem to work :-( */
-#define GEM_MIN_MTU	68
+#define GEM_MIN_MTU	ETH_MIN_MTU
 #if 1
-#define GEM_MAX_MTU	1500
+#define GEM_MAX_MTU	ETH_DATA_LEN
 #else
 #define GEM_MAX_MTU	9000
 #endif
@@ -2487,9 +2487,6 @@ static int gem_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct gem *gp = netdev_priv(dev);
 
-	if (new_mtu < GEM_MIN_MTU || new_mtu > GEM_MAX_MTU)
-		return -EINVAL;
-
 	dev->mtu = new_mtu;
 
 	/* We'll just catch it later when the device is up'd or resumed */
@@ -2977,6 +2974,10 @@ static int gem_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (pci_using_dac)
 		dev->features |= NETIF_F_HIGHDMA;
 
+	/* MTU range: 68 - 1500 (Jumbo mode is broken) */
+	dev->min_mtu = GEM_MIN_MTU;
+	dev->max_mtu = GEM_MAX_MTU;
+
 	/* Register with kernel */
 	if (register_netdev(dev)) {
 		pr_err("Cannot register net device, aborting\n");
diff --git a/drivers/net/ethernet/sun/sunhme.c b/drivers/net/ethernet/sun/sunhme.c
index cf4dcff051d5..ca96408058b0 100644
--- a/drivers/net/ethernet/sun/sunhme.c
+++ b/drivers/net/ethernet/sun/sunhme.c
@@ -2669,7 +2669,6 @@ static const struct net_device_ops hme_netdev_ops = {
 	.ndo_tx_timeout		= happy_meal_tx_timeout,
 	.ndo_get_stats		= happy_meal_get_stats,
 	.ndo_set_rx_mode	= happy_meal_set_multicast,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 };
diff --git a/drivers/net/ethernet/sun/sunqe.c b/drivers/net/ethernet/sun/sunqe.c
index 9582948145c1..a6bcdcdd947e 100644
--- a/drivers/net/ethernet/sun/sunqe.c
+++ b/drivers/net/ethernet/sun/sunqe.c
@@ -824,7 +824,6 @@ static const struct net_device_ops qec_ops = {
 	.ndo_start_xmit		= qe_start_xmit,
 	.ndo_set_rx_mode	= qe_set_multicast,
 	.ndo_tx_timeout		= qe_tx_timeout,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 };
diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c
index a2f9b47de187..5356a7074796 100644
--- a/drivers/net/ethernet/sun/sunvnet.c
+++ b/drivers/net/ethernet/sun/sunvnet.c
@@ -159,7 +159,6 @@ static const struct net_device_ops vnet_ops = {
 	.ndo_set_mac_address	= sunvnet_set_mac_addr_common,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_tx_timeout		= sunvnet_tx_timeout_common,
-	.ndo_change_mtu		= sunvnet_change_mtu_common,
 	.ndo_start_xmit		= vnet_start_xmit,
 	.ndo_select_queue	= vnet_select_queue,
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -202,6 +201,10 @@ static struct vnet *vnet_new(const u64 *local_mac,
 			   NETIF_F_HW_CSUM | NETIF_F_SG;
 	dev->features = dev->hw_features;
 
+	/* MTU range: 68 - 65535 */
+	dev->min_mtu = ETH_MIN_MTU;
+	dev->max_mtu = VNET_MAX_MTU;
+
 	SET_NETDEV_DEV(dev, &vdev->dev);
 
 	err = register_netdev(dev);
diff --git a/drivers/net/ethernet/sun/sunvnet_common.c b/drivers/net/ethernet/sun/sunvnet_common.c
index 904a5a12a85d..8878b75d68b4 100644
--- a/drivers/net/ethernet/sun/sunvnet_common.c
+++ b/drivers/net/ethernet/sun/sunvnet_common.c
@@ -704,9 +704,8 @@ static int handle_mcast(struct vnet_port *port, void *msgbuf)
 	return 0;
 }
 
-/* Got back a STOPPED LDC message on port. If the queue is stopped,
- * wake it up so that we'll send out another START message at the
- * next TX.
+/* If the queue is stopped, wake it up so that we'll
+ * send out another START message at the next TX.
  */
 static void maybe_tx_wakeup(struct vnet_port *port)
 {
@@ -734,6 +733,7 @@ EXPORT_SYMBOL_GPL(sunvnet_port_is_up_common);
 
 static int vnet_event_napi(struct vnet_port *port, int budget)
 {
+	struct net_device *dev = VNET_PORT_TO_NET_DEVICE(port);
 	struct vio_driver_state *vio = &port->vio;
 	int tx_wakeup, err;
 	int npkts = 0;
@@ -747,6 +747,16 @@ ldc_ctrl:
 		if (event == LDC_EVENT_RESET) {
 			vnet_port_reset(port);
 			vio_port_up(vio);
+
+			/* If the device is running but its tx queue was
+			 * stopped (due to flow control), restart it.
+			 * This is necessary since vnet_port_reset()
+			 * clears the tx drings and thus we may never get
+			 * back a VIO_TYPE_DATA ACK packet - which is
+			 * the normal mechanism to restart the tx queue.
+			 */
+			if (netif_running(dev))
+				maybe_tx_wakeup(port);
 		}
 		port->rx_event = 0;
 		return 0;
@@ -1583,16 +1593,6 @@ void sunvnet_set_rx_mode_common(struct net_device *dev, struct vnet *vp)
 }
 EXPORT_SYMBOL_GPL(sunvnet_set_rx_mode_common);
 
-int sunvnet_change_mtu_common(struct net_device *dev, int new_mtu)
-{
-	if (new_mtu < 68 || new_mtu > 65535)
-		return -EINVAL;
-
-	dev->mtu = new_mtu;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(sunvnet_change_mtu_common);
-
 int sunvnet_set_mac_addr_common(struct net_device *dev, void *p)
 {
 	return -EINVAL;
diff --git a/drivers/net/ethernet/sun/sunvnet_common.h b/drivers/net/ethernet/sun/sunvnet_common.h
index bd36528af972..ce5c824128a3 100644
--- a/drivers/net/ethernet/sun/sunvnet_common.h
+++ b/drivers/net/ethernet/sun/sunvnet_common.h
@@ -15,6 +15,8 @@
 #define	VNET_MINTSO	 2048	/* VIO protocol's minimum TSO len */
 #define	VNET_MAXTSO	65535	/* VIO protocol's maximum TSO len */
 
+#define VNET_MAX_MTU	65535
+
 /* VNET packets are sent in buffers with the first 6 bytes skipped
  * so that after the ethernet header the IPv4/IPv6 headers are aligned
  * properly.
@@ -125,7 +127,6 @@ int sunvnet_close_common(struct net_device *dev);
 void sunvnet_set_rx_mode_common(struct net_device *dev, struct vnet *vp);
 int sunvnet_set_mac_addr_common(struct net_device *dev, void *p);
 void sunvnet_tx_timeout_common(struct net_device *dev);
-int sunvnet_change_mtu_common(struct net_device *dev, int new_mtu);
 int sunvnet_start_xmit_common(struct sk_buff *skb, struct net_device *dev,
 			   struct vnet_port *(*vnet_tx_port)
 			   (struct sk_buff *, struct net_device *));
diff --git a/drivers/net/ethernet/synopsys/dwc_eth_qos.c b/drivers/net/ethernet/synopsys/dwc_eth_qos.c
index 4ba2421e625d..09f5a67da35e 100644
--- a/drivers/net/ethernet/synopsys/dwc_eth_qos.c
+++ b/drivers/net/ethernet/synopsys/dwc_eth_qos.c
@@ -2211,7 +2211,7 @@ static int dwceqos_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 
 tx_error:
 	dwceqos_tx_rollback(lp, &trans);
-	dev_kfree_skb(skb);
+	dev_kfree_skb_any(skb);
 	return 0;
 }
 
@@ -2881,7 +2881,7 @@ static int dwceqos_probe(struct platform_device *pdev)
 	ret = of_get_phy_mode(lp->pdev->dev.of_node);
 	if (ret < 0) {
 		dev_err(&lp->pdev->dev, "error in getting phy i/f\n");
-		goto err_out_clk_dis_phy;
+		goto err_out_deregister_fixed_link;
 	}
 
 	lp->phy_interface = ret;
@@ -2889,14 +2889,14 @@ static int dwceqos_probe(struct platform_device *pdev)
 	ret = dwceqos_mii_init(lp);
 	if (ret) {
 		dev_err(&lp->pdev->dev, "error in dwceqos_mii_init\n");
-		goto err_out_clk_dis_phy;
+		goto err_out_deregister_fixed_link;
 	}
 
 	ret = dwceqos_mii_probe(ndev);
 	if (ret != 0) {
 		netdev_err(ndev, "mii_probe fail.\n");
 		ret = -ENXIO;
-		goto err_out_clk_dis_phy;
+		goto err_out_deregister_fixed_link;
 	}
 
 	dwceqos_set_umac_addr(lp, lp->ndev->dev_addr, 0);
@@ -2914,7 +2914,7 @@ static int dwceqos_probe(struct platform_device *pdev)
 	if (ret) {
 		dev_err(&lp->pdev->dev, "Unable to retrieve DT, error %d\n",
 			ret);
-		goto err_out_clk_dis_phy;
+		goto err_out_deregister_fixed_link;
 	}
 	dev_info(&lp->pdev->dev, "pdev->id %d, baseaddr 0x%08lx, irq %d\n",
 		 pdev->id, ndev->base_addr, ndev->irq);
@@ -2924,7 +2924,7 @@ static int dwceqos_probe(struct platform_device *pdev)
 	if (ret) {
 		dev_err(&lp->pdev->dev, "Unable to request IRQ %d, error %d\n",
 			ndev->irq, ret);
-		goto err_out_clk_dis_phy;
+		goto err_out_deregister_fixed_link;
 	}
 
 	if (netif_msg_probe(lp))
@@ -2935,11 +2935,14 @@ static int dwceqos_probe(struct platform_device *pdev)
 	ret = register_netdev(ndev);
 	if (ret) {
 		dev_err(&pdev->dev, "Cannot register net device, aborting.\n");
-			goto err_out_clk_dis_phy;
+		goto err_out_deregister_fixed_link;
 	}
 
 	return 0;
 
+err_out_deregister_fixed_link:
+	if (of_phy_is_fixed_link(pdev->dev.of_node))
+		of_phy_deregister_fixed_link(pdev->dev.of_node);
 err_out_clk_dis_phy:
 	clk_disable_unprepare(lp->phy_ref_clk);
 err_out_clk_dis_aper:
@@ -2959,8 +2962,11 @@ static int dwceqos_remove(struct platform_device *pdev)
 	if (ndev) {
 		lp = netdev_priv(ndev);
 
-		if (ndev->phydev)
+		if (ndev->phydev) {
 			phy_disconnect(ndev->phydev);
+			if (of_phy_is_fixed_link(pdev->dev.of_node))
+				of_phy_deregister_fixed_link(pdev->dev.of_node);
+		}
 		mdiobus_unregister(lp->mii_bus);
 		mdiobus_free(lp->mii_bus);
 
diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c
index 7108c68f16d3..baa3e4a5731c 100644
--- a/drivers/net/ethernet/tehuti/tehuti.c
+++ b/drivers/net/ethernet/tehuti/tehuti.c
@@ -761,16 +761,6 @@ static int bdx_change_mtu(struct net_device *ndev, int new_mtu)
 {
 	ENTER;
 
-	if (new_mtu == ndev->mtu)
-		RET(0);
-
-	/* enforce minimum frame size */
-	if (new_mtu < ETH_ZLEN) {
-		netdev_err(ndev, "mtu %d is less then minimal %d\n",
-			   new_mtu, ETH_ZLEN);
-		RET(-EINVAL);
-	}
-
 	ndev->mtu = new_mtu;
 	if (netif_running(ndev)) {
 		bdx_close(ndev);
@@ -2057,6 +2047,10 @@ bdx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 #ifdef BDX_LLTX
 		ndev->features |= NETIF_F_LLTX;
 #endif
+		/* MTU range: 60 - 16384 */
+		ndev->min_mtu = ETH_ZLEN;
+		ndev->max_mtu = BDX_MAX_MTU;
+
 		spin_lock_init(&priv->tx_lock);
 
 		/*bdx_hw_reset(priv); */
diff --git a/drivers/net/ethernet/tehuti/tehuti.h b/drivers/net/ethernet/tehuti/tehuti.h
index 709ebd6e28b4..8e7b4c9abf21 100644
--- a/drivers/net/ethernet/tehuti/tehuti.h
+++ b/drivers/net/ethernet/tehuti/tehuti.h
@@ -74,6 +74,9 @@
  * ifcontig eth1 txqueuelen 3000 - to change it at runtime */
 #define BDX_NDEV_TXQ_LEN 3000
 
+/* Max MTU for Jumbo Frame mode, per tehutinetworks.net Features FAQ is 16k */
+#define BDX_MAX_MTU	(16 * 1024)
+
 #define FIFO_SIZE  4096
 #define FIFO_EXTRA_SPACE            1024
 
diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
index 9904d740d528..296c8efd0038 100644
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig
@@ -74,13 +74,14 @@ config TI_CPSW
 	  will be called cpsw.
 
 config TI_CPTS
-	bool "TI Common Platform Time Sync (CPTS) Support"
-	depends on TI_CPSW
-	select PTP_1588_CLOCK
+	tristate "TI Common Platform Time Sync (CPTS) Support"
+	depends on TI_CPSW || TI_KEYSTONE_NETCP
+	imply PTP_1588_CLOCK
 	---help---
 	  This driver supports the Common Platform Time Sync unit of
-	  the CPSW Ethernet Switch. The unit can time stamp PTP UDP/IPv4
-	  and Layer 2 packets, and the driver offers a PTP Hardware Clock.
+	  the CPSW Ethernet Switch and Keystone 2 1g/10g Switch Subsystem.
+	  The unit can time stamp PTP UDP/IPv4 and Layer 2 packets, and the
+	  driver offers a PTP Hardware Clock.
 
 config TI_KEYSTONE_NETCP
 	tristate "TI Keystone NETCP Core Support"
diff --git a/drivers/net/ethernet/ti/Makefile b/drivers/net/ethernet/ti/Makefile
index d420d9413e4a..1e7c10bf8713 100644
--- a/drivers/net/ethernet/ti/Makefile
+++ b/drivers/net/ethernet/ti/Makefile
@@ -12,8 +12,9 @@ obj-$(CONFIG_TI_DAVINCI_MDIO) += davinci_mdio.o
 obj-$(CONFIG_TI_DAVINCI_CPDMA) += davinci_cpdma.o
 obj-$(CONFIG_TI_CPSW_PHY_SEL) += cpsw-phy-sel.o
 obj-$(CONFIG_TI_CPSW_ALE) += cpsw_ale.o
+obj-$(CONFIG_TI_CPTS) += cpts.o
 obj-$(CONFIG_TI_CPSW) += ti_cpsw.o
-ti_cpsw-y := cpsw.o cpts.o
+ti_cpsw-y := cpsw.o
 
 obj-$(CONFIG_TI_KEYSTONE_NETCP) += keystone_netcp.o
 keystone_netcp-y := netcp_core.o
diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c
index fa0cfda24fd9..77c88fcf2b86 100644
--- a/drivers/net/ethernet/ti/cpmac.c
+++ b/drivers/net/ethernet/ti/cpmac.c
@@ -1068,7 +1068,6 @@ static const struct net_device_ops cpmac_netdev_ops = {
 	.ndo_tx_timeout		= cpmac_tx_timeout,
 	.ndo_set_rx_mode	= cpmac_set_multicast_list,
 	.ndo_do_ioctl		= cpmac_ioctl,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_set_mac_address	= eth_mac_addr,
 };
@@ -1113,6 +1112,7 @@ static int cpmac_probe(struct platform_device *pdev)
 	if (!dev)
 		return -ENOMEM;
 
+	SET_NETDEV_DEV(dev, &pdev->dev);
 	platform_set_drvdata(pdev, dev);
 	priv = netdev_priv(dev);
 
diff --git a/drivers/net/ethernet/ti/cpsw-phy-sel.c b/drivers/net/ethernet/ti/cpsw-phy-sel.c
index ba1e45ff6aae..18013645e76c 100644
--- a/drivers/net/ethernet/ti/cpsw-phy-sel.c
+++ b/drivers/net/ethernet/ti/cpsw-phy-sel.c
@@ -81,6 +81,7 @@ static void cpsw_gmii_sel_am3352(struct cpsw_phy_sel_priv *priv,
 	};
 
 	mask = GMII_SEL_MODE_MASK << (slave * 2) | BIT(slave + 6);
+	mask |= BIT(slave + 4);
 	mode <<= slave * 2;
 
 	if (priv->rmii_clock_external) {
diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 58947aae31c7..b203143647e6 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -365,6 +365,11 @@ static inline void slave_write(struct cpsw_slave *slave, u32 val, u32 offset)
 	__raw_writel(val, slave->regs + offset);
 }
 
+struct cpsw_vector {
+	struct cpdma_chan *ch;
+	int budget;
+};
+
 struct cpsw_common {
 	struct device			*dev;
 	struct cpsw_platform_data	data;
@@ -380,8 +385,8 @@ struct cpsw_common {
 	int				rx_packet_max;
 	struct cpsw_slave		*slaves;
 	struct cpdma_ctlr		*dma;
-	struct cpdma_chan		*txch[CPSW_MAX_QUEUES];
-	struct cpdma_chan		*rxch[CPSW_MAX_QUEUES];
+	struct cpsw_vector		txv[CPSW_MAX_QUEUES];
+	struct cpsw_vector		rxv[CPSW_MAX_QUEUES];
 	struct cpsw_ale			*ale;
 	bool				quirk_irq;
 	bool				rx_irq_disabled;
@@ -389,6 +394,7 @@ struct cpsw_common {
 	u32 irqs_table[IRQ_NUM];
 	struct cpts			*cpts;
 	int				rx_ch_num, tx_ch_num;
+	int				speed;
 };
 
 struct cpsw_priv {
@@ -741,13 +747,100 @@ requeue:
 		return;
 	}
 
-	ch = cpsw->rxch[skb_get_queue_mapping(new_skb)];
+	ch = cpsw->rxv[skb_get_queue_mapping(new_skb)].ch;
 	ret = cpdma_chan_submit(ch, new_skb, new_skb->data,
 				skb_tailroom(new_skb), 0);
 	if (WARN_ON(ret < 0))
 		dev_kfree_skb_any(new_skb);
 }
 
+static void cpsw_split_res(struct net_device *ndev)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+	u32 consumed_rate = 0, bigest_rate = 0;
+	struct cpsw_common *cpsw = priv->cpsw;
+	struct cpsw_vector *txv = cpsw->txv;
+	int i, ch_weight, rlim_ch_num = 0;
+	int budget, bigest_rate_ch = 0;
+	u32 ch_rate, max_rate;
+	int ch_budget = 0;
+
+	for (i = 0; i < cpsw->tx_ch_num; i++) {
+		ch_rate = cpdma_chan_get_rate(txv[i].ch);
+		if (!ch_rate)
+			continue;
+
+		rlim_ch_num++;
+		consumed_rate += ch_rate;
+	}
+
+	if (cpsw->tx_ch_num == rlim_ch_num) {
+		max_rate = consumed_rate;
+	} else if (!rlim_ch_num) {
+		ch_budget = CPSW_POLL_WEIGHT / cpsw->tx_ch_num;
+		bigest_rate = 0;
+		max_rate = consumed_rate;
+	} else {
+		max_rate = cpsw->speed * 1000;
+
+		/* if max_rate is less then expected due to reduced link speed,
+		 * split proportionally according next potential max speed
+		 */
+		if (max_rate < consumed_rate)
+			max_rate *= 10;
+
+		if (max_rate < consumed_rate)
+			max_rate *= 10;
+
+		ch_budget = (consumed_rate * CPSW_POLL_WEIGHT) / max_rate;
+		ch_budget = (CPSW_POLL_WEIGHT - ch_budget) /
+			    (cpsw->tx_ch_num - rlim_ch_num);
+		bigest_rate = (max_rate - consumed_rate) /
+			      (cpsw->tx_ch_num - rlim_ch_num);
+	}
+
+	/* split tx weight/budget */
+	budget = CPSW_POLL_WEIGHT;
+	for (i = 0; i < cpsw->tx_ch_num; i++) {
+		ch_rate = cpdma_chan_get_rate(txv[i].ch);
+		if (ch_rate) {
+			txv[i].budget = (ch_rate * CPSW_POLL_WEIGHT) / max_rate;
+			if (!txv[i].budget)
+				txv[i].budget++;
+			if (ch_rate > bigest_rate) {
+				bigest_rate_ch = i;
+				bigest_rate = ch_rate;
+			}
+
+			ch_weight = (ch_rate * 100) / max_rate;
+			if (!ch_weight)
+				ch_weight++;
+			cpdma_chan_set_weight(cpsw->txv[i].ch, ch_weight);
+		} else {
+			txv[i].budget = ch_budget;
+			if (!bigest_rate_ch)
+				bigest_rate_ch = i;
+			cpdma_chan_set_weight(cpsw->txv[i].ch, 0);
+		}
+
+		budget -= txv[i].budget;
+	}
+
+	if (budget)
+		txv[bigest_rate_ch].budget += budget;
+
+	/* split rx budget */
+	budget = CPSW_POLL_WEIGHT;
+	ch_budget = budget / cpsw->rx_ch_num;
+	for (i = 0; i < cpsw->rx_ch_num; i++) {
+		cpsw->rxv[i].budget = ch_budget;
+		budget -= ch_budget;
+	}
+
+	if (budget)
+		cpsw->rxv[0].budget += budget;
+}
+
 static irqreturn_t cpsw_tx_interrupt(int irq, void *dev_id)
 {
 	struct cpsw_common *cpsw = dev_id;
@@ -783,24 +876,25 @@ static irqreturn_t cpsw_rx_interrupt(int irq, void *dev_id)
 static int cpsw_tx_poll(struct napi_struct *napi_tx, int budget)
 {
 	u32			ch_map;
-	int			num_tx, ch;
+	int			num_tx, cur_budget, ch;
 	struct cpsw_common	*cpsw = napi_to_cpsw(napi_tx);
+	struct cpsw_vector	*txv;
 
 	/* process every unprocessed channel */
 	ch_map = cpdma_ctrl_txchs_state(cpsw->dma);
-	for (ch = 0, num_tx = 0; num_tx < budget; ch_map >>= 1, ch++) {
-		if (!ch_map) {
-			ch_map = cpdma_ctrl_txchs_state(cpsw->dma);
-			if (!ch_map)
-				break;
-
-			ch = 0;
-		}
-
+	for (ch = 0, num_tx = 0; ch_map; ch_map >>= 1, ch++) {
 		if (!(ch_map & 0x01))
 			continue;
 
-		num_tx += cpdma_chan_process(cpsw->txch[ch], budget - num_tx);
+		txv = &cpsw->txv[ch];
+		if (unlikely(txv->budget > budget - num_tx))
+			cur_budget = budget - num_tx;
+		else
+			cur_budget = txv->budget;
+
+		num_tx += cpdma_chan_process(txv->ch, cur_budget);
+		if (num_tx >= budget)
+			break;
 	}
 
 	if (num_tx < budget) {
@@ -818,24 +912,25 @@ static int cpsw_tx_poll(struct napi_struct *napi_tx, int budget)
 static int cpsw_rx_poll(struct napi_struct *napi_rx, int budget)
 {
 	u32			ch_map;
-	int			num_rx, ch;
+	int			num_rx, cur_budget, ch;
 	struct cpsw_common	*cpsw = napi_to_cpsw(napi_rx);
+	struct cpsw_vector	*rxv;
 
 	/* process every unprocessed channel */
 	ch_map = cpdma_ctrl_rxchs_state(cpsw->dma);
-	for (ch = 0, num_rx = 0; num_rx < budget; ch_map >>= 1, ch++) {
-		if (!ch_map) {
-			ch_map = cpdma_ctrl_rxchs_state(cpsw->dma);
-			if (!ch_map)
-				break;
-
-			ch = 0;
-		}
-
+	for (ch = 0, num_rx = 0; ch_map; ch_map >>= 1, ch++) {
 		if (!(ch_map & 0x01))
 			continue;
 
-		num_rx += cpdma_chan_process(cpsw->rxch[ch], budget - num_rx);
+		rxv = &cpsw->rxv[ch];
+		if (unlikely(rxv->budget > budget - num_rx))
+			cur_budget = budget - num_rx;
+		else
+			cur_budget = rxv->budget;
+
+		num_rx += cpdma_chan_process(rxv->ch, cur_budget);
+		if (num_rx >= budget)
+			break;
 	}
 
 	if (num_rx < budget) {
@@ -926,14 +1021,56 @@ static void _cpsw_adjust_link(struct cpsw_slave *slave,
 	slave->mac_control = mac_control;
 }
 
+static int cpsw_get_common_speed(struct cpsw_common *cpsw)
+{
+	int i, speed;
+
+	for (i = 0, speed = 0; i < cpsw->data.slaves; i++)
+		if (cpsw->slaves[i].phy && cpsw->slaves[i].phy->link)
+			speed += cpsw->slaves[i].phy->speed;
+
+	return speed;
+}
+
+static int cpsw_need_resplit(struct cpsw_common *cpsw)
+{
+	int i, rlim_ch_num;
+	int speed, ch_rate;
+
+	/* re-split resources only in case speed was changed */
+	speed = cpsw_get_common_speed(cpsw);
+	if (speed == cpsw->speed || !speed)
+		return 0;
+
+	cpsw->speed = speed;
+
+	for (i = 0, rlim_ch_num = 0; i < cpsw->tx_ch_num; i++) {
+		ch_rate = cpdma_chan_get_rate(cpsw->txv[i].ch);
+		if (!ch_rate)
+			break;
+
+		rlim_ch_num++;
+	}
+
+	/* cases not dependent on speed */
+	if (!rlim_ch_num || rlim_ch_num == cpsw->tx_ch_num)
+		return 0;
+
+	return 1;
+}
+
 static void cpsw_adjust_link(struct net_device *ndev)
 {
 	struct cpsw_priv	*priv = netdev_priv(ndev);
+	struct cpsw_common	*cpsw = priv->cpsw;
 	bool			link = false;
 
 	for_each_slave(priv, _cpsw_adjust_link, priv, &link);
 
 	if (link) {
+		if (cpsw_need_resplit(cpsw))
+			cpsw_split_res(ndev);
+
 		netif_carrier_on(ndev);
 		if (netif_running(ndev))
 			netif_tx_wake_all_queues(ndev);
@@ -1075,7 +1212,7 @@ static void cpsw_get_ethtool_stats(struct net_device *ndev,
 				cpsw_gstrings_stats[l].stat_offset);
 
 	for (ch = 0; ch < cpsw->rx_ch_num; ch++) {
-		cpdma_chan_get_stats(cpsw->rxch[ch], &ch_stats);
+		cpdma_chan_get_stats(cpsw->rxv[ch].ch, &ch_stats);
 		for (i = 0; i < CPSW_STATS_CH_LEN; i++, l++) {
 			p = (u8 *)&ch_stats +
 				cpsw_gstrings_ch_stats[i].stat_offset;
@@ -1084,7 +1221,7 @@ static void cpsw_get_ethtool_stats(struct net_device *ndev,
 	}
 
 	for (ch = 0; ch < cpsw->tx_ch_num; ch++) {
-		cpdma_chan_get_stats(cpsw->txch[ch], &ch_stats);
+		cpdma_chan_get_stats(cpsw->txv[ch].ch, &ch_stats);
 		for (i = 0; i < CPSW_STATS_CH_LEN; i++, l++) {
 			p = (u8 *)&ch_stats +
 				cpsw_gstrings_ch_stats[i].stat_offset;
@@ -1281,7 +1418,7 @@ static int cpsw_fill_rx_channels(struct cpsw_priv *priv)
 	int ch, i, ret;
 
 	for (ch = 0; ch < cpsw->rx_ch_num; ch++) {
-		ch_buf_num = cpdma_chan_get_rx_buf_num(cpsw->rxch[ch]);
+		ch_buf_num = cpdma_chan_get_rx_buf_num(cpsw->rxv[ch].ch);
 		for (i = 0; i < ch_buf_num; i++) {
 			skb = __netdev_alloc_skb_ip_align(priv->ndev,
 							  cpsw->rx_packet_max,
@@ -1292,8 +1429,9 @@ static int cpsw_fill_rx_channels(struct cpsw_priv *priv)
 			}
 
 			skb_set_queue_mapping(skb, ch);
-			ret = cpdma_chan_submit(cpsw->rxch[ch], skb, skb->data,
-						skb_tailroom(skb), 0);
+			ret = cpdma_chan_submit(cpsw->rxv[ch].ch, skb,
+						skb->data, skb_tailroom(skb),
+						0);
 			if (ret < 0) {
 				cpsw_err(priv, ifup,
 					 "cannot submit skb to channel %d rx, error %d\n",
@@ -1376,10 +1514,6 @@ static int cpsw_ndo_open(struct net_device *ndev)
 				  ALE_ALL_PORTS, ALE_ALL_PORTS, 0, 0);
 
 	if (!cpsw_common_res_usage_state(cpsw)) {
-		/* setup tx dma to fixed prio and zero offset */
-		cpdma_control_set(cpsw->dma, CPDMA_TX_PRIO_FIXED, 1);
-		cpdma_control_set(cpsw->dma, CPDMA_RX_BUFFER_OFFSET, 0);
-
 		/* disable priority elevation */
 		__raw_writel(0, &cpsw->regs->ptype);
 
@@ -1406,9 +1540,7 @@ static int cpsw_ndo_open(struct net_device *ndev)
 		if (ret < 0)
 			goto err_cleanup;
 
-		if (cpts_register(cpsw->dev, cpsw->cpts,
-				  cpsw->data.cpts_clock_mult,
-				  cpsw->data.cpts_clock_shift))
+		if (cpts_register(cpsw->cpts))
 			dev_err(priv->dev, "error registering cpts device\n");
 
 	}
@@ -1427,8 +1559,6 @@ static int cpsw_ndo_open(struct net_device *ndev)
 	if (cpsw->data.dual_emac)
 		cpsw->slaves[priv->emac_port].open_stat = true;
 
-	netif_tx_start_all_queues(ndev);
-
 	return 0;
 
 err_cleanup:
@@ -1457,6 +1587,10 @@ static int cpsw_ndo_stop(struct net_device *ndev)
 		cpsw_ale_stop(cpsw->ale);
 	}
 	for_each_slave(priv, cpsw_slave_stop, cpsw);
+
+	if (cpsw_need_resplit(cpsw))
+		cpsw_split_res(ndev);
+
 	pm_runtime_put_sync(cpsw->dev);
 	if (cpsw->data.dual_emac)
 		cpsw->slaves[priv->emac_port].open_stat = false;
@@ -1481,7 +1615,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb,
 	}
 
 	if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP &&
-				cpsw->cpts->tx_enable)
+	    cpts_is_tx_enabled(cpsw->cpts))
 		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
 
 	skb_tx_timestamp(skb);
@@ -1490,7 +1624,7 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb,
 	if (q_idx >= cpsw->tx_ch_num)
 		q_idx = q_idx % cpsw->tx_ch_num;
 
-	txch = cpsw->txch[q_idx];
+	txch = cpsw->txv[q_idx].ch;
 	ret = cpsw_tx_packet_submit(priv, skb, txch);
 	if (unlikely(ret != 0)) {
 		cpsw_err(priv, tx_err, "desc submit failed\n");
@@ -1513,14 +1647,15 @@ fail:
 	return NETDEV_TX_BUSY;
 }
 
-#ifdef CONFIG_TI_CPTS
+#if IS_ENABLED(CONFIG_TI_CPTS)
 
 static void cpsw_hwtstamp_v1(struct cpsw_common *cpsw)
 {
 	struct cpsw_slave *slave = &cpsw->slaves[cpsw->data.active_slave];
 	u32 ts_en, seq_id;
 
-	if (!cpsw->cpts->tx_enable && !cpsw->cpts->rx_enable) {
+	if (!cpts_is_tx_enabled(cpsw->cpts) &&
+	    !cpts_is_rx_enabled(cpsw->cpts)) {
 		slave_write(slave, 0, CPSW1_TS_CTL);
 		return;
 	}
@@ -1528,10 +1663,10 @@ static void cpsw_hwtstamp_v1(struct cpsw_common *cpsw)
 	seq_id = (30 << CPSW_V1_SEQ_ID_OFS_SHIFT) | ETH_P_1588;
 	ts_en = EVENT_MSG_BITS << CPSW_V1_MSG_TYPE_OFS;
 
-	if (cpsw->cpts->tx_enable)
+	if (cpts_is_tx_enabled(cpsw->cpts))
 		ts_en |= CPSW_V1_TS_TX_EN;
 
-	if (cpsw->cpts->rx_enable)
+	if (cpts_is_rx_enabled(cpsw->cpts))
 		ts_en |= CPSW_V1_TS_RX_EN;
 
 	slave_write(slave, ts_en, CPSW1_TS_CTL);
@@ -1544,30 +1679,27 @@ static void cpsw_hwtstamp_v2(struct cpsw_priv *priv)
 	struct cpsw_common *cpsw = priv->cpsw;
 	u32 ctrl, mtype;
 
-	if (cpsw->data.dual_emac)
-		slave = &cpsw->slaves[priv->emac_port];
-	else
-		slave = &cpsw->slaves[cpsw->data.active_slave];
+	slave = &cpsw->slaves[cpsw_slave_index(cpsw, priv)];
 
 	ctrl = slave_read(slave, CPSW2_CONTROL);
 	switch (cpsw->version) {
 	case CPSW_VERSION_2:
 		ctrl &= ~CTRL_V2_ALL_TS_MASK;
 
-		if (cpsw->cpts->tx_enable)
+		if (cpts_is_tx_enabled(cpsw->cpts))
 			ctrl |= CTRL_V2_TX_TS_BITS;
 
-		if (cpsw->cpts->rx_enable)
+		if (cpts_is_rx_enabled(cpsw->cpts))
 			ctrl |= CTRL_V2_RX_TS_BITS;
 		break;
 	case CPSW_VERSION_3:
 	default:
 		ctrl &= ~CTRL_V3_ALL_TS_MASK;
 
-		if (cpsw->cpts->tx_enable)
+		if (cpts_is_tx_enabled(cpsw->cpts))
 			ctrl |= CTRL_V3_TX_TS_BITS;
 
-		if (cpsw->cpts->rx_enable)
+		if (cpts_is_rx_enabled(cpsw->cpts))
 			ctrl |= CTRL_V3_RX_TS_BITS;
 		break;
 	}
@@ -1603,7 +1735,7 @@ static int cpsw_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
 
 	switch (cfg.rx_filter) {
 	case HWTSTAMP_FILTER_NONE:
-		cpts->rx_enable = 0;
+		cpts_rx_enable(cpts, 0);
 		break;
 	case HWTSTAMP_FILTER_ALL:
 	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
@@ -1619,14 +1751,14 @@ static int cpsw_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
 	case HWTSTAMP_FILTER_PTP_V2_EVENT:
 	case HWTSTAMP_FILTER_PTP_V2_SYNC:
 	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
-		cpts->rx_enable = 1;
+		cpts_rx_enable(cpts, 1);
 		cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
 		break;
 	default:
 		return -ERANGE;
 	}
 
-	cpts->tx_enable = cfg.tx_type == HWTSTAMP_TX_ON;
+	cpts_tx_enable(cpts, cfg.tx_type == HWTSTAMP_TX_ON);
 
 	switch (cpsw->version) {
 	case CPSW_VERSION_1:
@@ -1655,13 +1787,23 @@ static int cpsw_hwtstamp_get(struct net_device *dev, struct ifreq *ifr)
 		return -EOPNOTSUPP;
 
 	cfg.flags = 0;
-	cfg.tx_type = cpts->tx_enable ? HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
-	cfg.rx_filter = (cpts->rx_enable ?
+	cfg.tx_type = cpts_is_tx_enabled(cpts) ?
+		      HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
+	cfg.rx_filter = (cpts_is_rx_enabled(cpts) ?
 			 HWTSTAMP_FILTER_PTP_V2_EVENT : HWTSTAMP_FILTER_NONE);
 
 	return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
 }
+#else
+static int cpsw_hwtstamp_get(struct net_device *dev, struct ifreq *ifr)
+{
+	return -EOPNOTSUPP;
+}
 
+static int cpsw_hwtstamp_set(struct net_device *dev, struct ifreq *ifr)
+{
+	return -EOPNOTSUPP;
+}
 #endif /*CONFIG_TI_CPTS*/
 
 static int cpsw_ndo_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
@@ -1674,12 +1816,10 @@ static int cpsw_ndo_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
 		return -EINVAL;
 
 	switch (cmd) {
-#ifdef CONFIG_TI_CPTS
 	case SIOCSHWTSTAMP:
 		return cpsw_hwtstamp_set(dev, req);
 	case SIOCGHWTSTAMP:
 		return cpsw_hwtstamp_get(dev, req);
-#endif
 	}
 
 	if (!cpsw->slaves[slave_no].phy)
@@ -1697,8 +1837,8 @@ static void cpsw_ndo_tx_timeout(struct net_device *ndev)
 	ndev->stats.tx_errors++;
 	cpsw_intr_disable(cpsw);
 	for (ch = 0; ch < cpsw->tx_ch_num; ch++) {
-		cpdma_chan_stop(cpsw->txch[ch]);
-		cpdma_chan_start(cpsw->txch[ch]);
+		cpdma_chan_stop(cpsw->txv[ch].ch);
+		cpdma_chan_start(cpsw->txv[ch].ch);
 	}
 
 	cpsw_intr_enable(cpsw);
@@ -1876,6 +2016,57 @@ static int cpsw_ndo_vlan_rx_kill_vid(struct net_device *ndev,
 	return ret;
 }
 
+static int cpsw_ndo_set_tx_maxrate(struct net_device *ndev, int queue, u32 rate)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
+	struct cpsw_slave *slave;
+	u32 min_rate;
+	u32 ch_rate;
+	int i, ret;
+
+	ch_rate = netdev_get_tx_queue(ndev, queue)->tx_maxrate;
+	if (ch_rate == rate)
+		return 0;
+
+	ch_rate = rate * 1000;
+	min_rate = cpdma_chan_get_min_rate(cpsw->dma);
+	if ((ch_rate < min_rate && ch_rate)) {
+		dev_err(priv->dev, "The channel rate cannot be less than %dMbps",
+			min_rate);
+		return -EINVAL;
+	}
+
+	if (rate > cpsw->speed) {
+		dev_err(priv->dev, "The channel rate cannot be more than 2Gbps");
+		return -EINVAL;
+	}
+
+	ret = pm_runtime_get_sync(cpsw->dev);
+	if (ret < 0) {
+		pm_runtime_put_noidle(cpsw->dev);
+		return ret;
+	}
+
+	ret = cpdma_chan_set_rate(cpsw->txv[queue].ch, ch_rate);
+	pm_runtime_put(cpsw->dev);
+
+	if (ret)
+		return ret;
+
+	/* update rates for slaves tx queues */
+	for (i = 0; i < cpsw->data.slaves; i++) {
+		slave = &cpsw->slaves[i];
+		if (!slave->ndev)
+			continue;
+
+		netdev_get_tx_queue(slave->ndev, queue)->tx_maxrate = rate;
+	}
+
+	cpsw_split_res(ndev);
+	return ret;
+}
+
 static const struct net_device_ops cpsw_netdev_ops = {
 	.ndo_open		= cpsw_ndo_open,
 	.ndo_stop		= cpsw_ndo_stop,
@@ -1883,9 +2074,9 @@ static const struct net_device_ops cpsw_netdev_ops = {
 	.ndo_set_mac_address	= cpsw_ndo_set_mac_address,
 	.ndo_do_ioctl		= cpsw_ndo_ioctl,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_tx_timeout		= cpsw_ndo_tx_timeout,
 	.ndo_set_rx_mode	= cpsw_ndo_set_rx_mode,
+	.ndo_set_tx_maxrate	= cpsw_ndo_set_tx_maxrate,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= cpsw_ndo_poll_controller,
 #endif
@@ -1935,10 +2126,10 @@ static void cpsw_set_msglevel(struct net_device *ndev, u32 value)
 	priv->msg_enable = value;
 }
 
+#if IS_ENABLED(CONFIG_TI_CPTS)
 static int cpsw_get_ts_info(struct net_device *ndev,
 			    struct ethtool_ts_info *info)
 {
-#ifdef CONFIG_TI_CPTS
 	struct cpsw_common *cpsw = ndev_to_cpsw(ndev);
 
 	info->so_timestamping =
@@ -1955,7 +2146,12 @@ static int cpsw_get_ts_info(struct net_device *ndev,
 	info->rx_filters =
 		(1 << HWTSTAMP_FILTER_NONE) |
 		(1 << HWTSTAMP_FILTER_PTP_V2_EVENT);
+	return 0;
+}
 #else
+static int cpsw_get_ts_info(struct net_device *ndev,
+			    struct ethtool_ts_info *info)
+{
 	info->so_timestamping =
 		SOF_TIMESTAMPING_TX_SOFTWARE |
 		SOF_TIMESTAMPING_RX_SOFTWARE |
@@ -1963,31 +2159,34 @@ static int cpsw_get_ts_info(struct net_device *ndev,
 	info->phc_index = -1;
 	info->tx_types = 0;
 	info->rx_filters = 0;
-#endif
 	return 0;
 }
+#endif
 
-static int cpsw_get_settings(struct net_device *ndev,
-			     struct ethtool_cmd *ecmd)
+static int cpsw_get_link_ksettings(struct net_device *ndev,
+				   struct ethtool_link_ksettings *ecmd)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
 	struct cpsw_common *cpsw = priv->cpsw;
 	int slave_no = cpsw_slave_index(cpsw, priv);
 
 	if (cpsw->slaves[slave_no].phy)
-		return phy_ethtool_gset(cpsw->slaves[slave_no].phy, ecmd);
+		return phy_ethtool_ksettings_get(cpsw->slaves[slave_no].phy,
+						 ecmd);
 	else
 		return -EOPNOTSUPP;
 }
 
-static int cpsw_set_settings(struct net_device *ndev, struct ethtool_cmd *ecmd)
+static int cpsw_set_link_ksettings(struct net_device *ndev,
+				   const struct ethtool_link_ksettings *ecmd)
 {
 	struct cpsw_priv *priv = netdev_priv(ndev);
 	struct cpsw_common *cpsw = priv->cpsw;
 	int slave_no = cpsw_slave_index(cpsw, priv);
 
 	if (cpsw->slaves[slave_no].phy)
-		return phy_ethtool_sset(cpsw->slaves[slave_no].phy, ecmd);
+		return phy_ethtool_ksettings_set(cpsw->slaves[slave_no].phy,
+						 ecmd);
 	else
 		return -EOPNOTSUPP;
 }
@@ -2102,28 +2301,31 @@ static int cpsw_update_channels_res(struct cpsw_priv *priv, int ch_num, int rx)
 	int (*poll)(struct napi_struct *, int);
 	struct cpsw_common *cpsw = priv->cpsw;
 	void (*handler)(void *, int, int);
-	struct cpdma_chan **chan;
+	struct netdev_queue *queue;
+	struct cpsw_vector *vec;
 	int ret, *ch;
 
 	if (rx) {
 		ch = &cpsw->rx_ch_num;
-		chan = cpsw->rxch;
+		vec = cpsw->rxv;
 		handler = cpsw_rx_handler;
 		poll = cpsw_rx_poll;
 	} else {
 		ch = &cpsw->tx_ch_num;
-		chan = cpsw->txch;
+		vec = cpsw->txv;
 		handler = cpsw_tx_handler;
 		poll = cpsw_tx_poll;
 	}
 
 	while (*ch < ch_num) {
-		chan[*ch] = cpdma_chan_create(cpsw->dma, *ch, handler, rx);
+		vec[*ch].ch = cpdma_chan_create(cpsw->dma, *ch, handler, rx);
+		queue = netdev_get_tx_queue(priv->ndev, *ch);
+		queue->tx_maxrate = 0;
 
-		if (IS_ERR(chan[*ch]))
-			return PTR_ERR(chan[*ch]);
+		if (IS_ERR(vec[*ch].ch))
+			return PTR_ERR(vec[*ch].ch);
 
-		if (!chan[*ch])
+		if (!vec[*ch].ch)
 			return -EINVAL;
 
 		cpsw_info(priv, ifup, "created new %d %s channel\n", *ch,
@@ -2134,7 +2336,7 @@ static int cpsw_update_channels_res(struct cpsw_priv *priv, int ch_num, int rx)
 	while (*ch > ch_num) {
 		(*ch)--;
 
-		ret = cpdma_chan_destroy(chan[*ch]);
+		ret = cpdma_chan_destroy(vec[*ch].ch);
 		if (ret)
 			return ret;
 
@@ -2221,6 +2423,8 @@ static int cpsw_set_channels(struct net_device *ndev,
 		if (ret)
 			goto err;
 
+		cpsw_split_res(ndev);
+
 		/* After this receive is started */
 		cpdma_ctlr_start(cpsw->dma);
 		cpsw_intr_enable(cpsw);
@@ -2239,14 +2443,48 @@ err:
 	return ret;
 }
 
+static int cpsw_get_eee(struct net_device *ndev, struct ethtool_eee *edata)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
+	int slave_no = cpsw_slave_index(cpsw, priv);
+
+	if (cpsw->slaves[slave_no].phy)
+		return phy_ethtool_get_eee(cpsw->slaves[slave_no].phy, edata);
+	else
+		return -EOPNOTSUPP;
+}
+
+static int cpsw_set_eee(struct net_device *ndev, struct ethtool_eee *edata)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
+	int slave_no = cpsw_slave_index(cpsw, priv);
+
+	if (cpsw->slaves[slave_no].phy)
+		return phy_ethtool_set_eee(cpsw->slaves[slave_no].phy, edata);
+	else
+		return -EOPNOTSUPP;
+}
+
+static int cpsw_nway_reset(struct net_device *ndev)
+{
+	struct cpsw_priv *priv = netdev_priv(ndev);
+	struct cpsw_common *cpsw = priv->cpsw;
+	int slave_no = cpsw_slave_index(cpsw, priv);
+
+	if (cpsw->slaves[slave_no].phy)
+		return genphy_restart_aneg(cpsw->slaves[slave_no].phy);
+	else
+		return -EOPNOTSUPP;
+}
+
 static const struct ethtool_ops cpsw_ethtool_ops = {
 	.get_drvinfo	= cpsw_get_drvinfo,
 	.get_msglevel	= cpsw_get_msglevel,
 	.set_msglevel	= cpsw_set_msglevel,
 	.get_link	= ethtool_op_get_link,
 	.get_ts_info	= cpsw_get_ts_info,
-	.get_settings	= cpsw_get_settings,
-	.set_settings	= cpsw_set_settings,
 	.get_coalesce	= cpsw_get_coalesce,
 	.set_coalesce	= cpsw_set_coalesce,
 	.get_sset_count		= cpsw_get_sset_count,
@@ -2262,6 +2500,11 @@ static const struct ethtool_ops cpsw_ethtool_ops = {
 	.complete	= cpsw_ethtool_op_complete,
 	.get_channels	= cpsw_get_channels,
 	.set_channels	= cpsw_set_channels,
+	.get_link_ksettings	= cpsw_get_link_ksettings,
+	.set_link_ksettings	= cpsw_set_link_ksettings,
+	.get_eee	= cpsw_get_eee,
+	.set_eee	= cpsw_set_eee,
+	.nway_reset	= cpsw_nway_reset,
 };
 
 static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_common *cpsw,
@@ -2300,18 +2543,6 @@ static int cpsw_probe_dt(struct cpsw_platform_data *data,
 	}
 	data->active_slave = prop;
 
-	if (of_property_read_u32(node, "cpts_clock_mult", &prop)) {
-		dev_err(&pdev->dev, "Missing cpts_clock_mult property in the DT.\n");
-		return -EINVAL;
-	}
-	data->cpts_clock_mult = prop;
-
-	if (of_property_read_u32(node, "cpts_clock_shift", &prop)) {
-		dev_err(&pdev->dev, "Missing cpts_clock_shift property in the DT.\n");
-		return -EINVAL;
-	}
-	data->cpts_clock_shift = prop;
-
 	data->slave_data = devm_kzalloc(&pdev->dev, data->slaves
 					* sizeof(struct cpsw_slave_data),
 					GFP_KERNEL);
@@ -2459,20 +2690,8 @@ static void cpsw_remove_dt(struct platform_device *pdev)
 		if (strcmp(slave_node->name, "slave"))
 			continue;
 
-		if (of_phy_is_fixed_link(slave_node)) {
-			struct phy_device *phydev;
-
-			phydev = of_phy_find_device(slave_node);
-			if (phydev) {
-				fixed_phy_unregister(phydev);
-				/* Put references taken by
-				 * of_phy_find_device() and
-				 * of_phy_register_fixed_link().
-				 */
-				phy_device_free(phydev);
-				phy_device_free(phydev);
-			}
-		}
+		if (of_phy_is_fixed_link(slave_node))
+			of_phy_deregister_fixed_link(slave_node);
 
 		of_node_put(slave_data->phy_node);
 
@@ -2582,6 +2801,7 @@ static int cpsw_probe(struct platform_device *pdev)
 	struct cpdma_params		dma_params;
 	struct cpsw_ale_params		ale_params;
 	void __iomem			*ss_regs;
+	void __iomem			*cpts_regs;
 	struct resource			*res, *ss_res;
 	const struct of_device_id	*of_id;
 	struct gpio_descs		*mode;
@@ -2609,12 +2829,6 @@ static int cpsw_probe(struct platform_device *pdev)
 	priv->dev  = &ndev->dev;
 	priv->msg_enable = netif_msg_init(debug_level, CPSW_DEBUG);
 	cpsw->rx_packet_max = max(rx_packet_max, 128);
-	cpsw->cpts = devm_kzalloc(&pdev->dev, sizeof(struct cpts), GFP_KERNEL);
-	if (!cpsw->cpts) {
-		dev_err(&pdev->dev, "error allocating cpts\n");
-		ret = -ENOMEM;
-		goto clean_ndev_ret;
-	}
 
 	mode = devm_gpiod_get_array_optional(&pdev->dev, "mode", GPIOD_OUT_LOW);
 	if (IS_ERR(mode)) {
@@ -2702,7 +2916,7 @@ static int cpsw_probe(struct platform_device *pdev)
 	switch (cpsw->version) {
 	case CPSW_VERSION_1:
 		cpsw->host_port_regs = ss_regs + CPSW1_HOST_PORT_OFFSET;
-		cpsw->cpts->reg      = ss_regs + CPSW1_CPTS_OFFSET;
+		cpts_regs		= ss_regs + CPSW1_CPTS_OFFSET;
 		cpsw->hw_stats	     = ss_regs + CPSW1_HW_STATS;
 		dma_params.dmaregs   = ss_regs + CPSW1_CPDMA_OFFSET;
 		dma_params.txhdp     = ss_regs + CPSW1_STATERAM_OFFSET;
@@ -2716,7 +2930,7 @@ static int cpsw_probe(struct platform_device *pdev)
 	case CPSW_VERSION_3:
 	case CPSW_VERSION_4:
 		cpsw->host_port_regs = ss_regs + CPSW2_HOST_PORT_OFFSET;
-		cpsw->cpts->reg      = ss_regs + CPSW2_CPTS_OFFSET;
+		cpts_regs		= ss_regs + CPSW2_CPTS_OFFSET;
 		cpsw->hw_stats	     = ss_regs + CPSW2_HW_STATS;
 		dma_params.dmaregs   = ss_regs + CPSW2_CPDMA_OFFSET;
 		dma_params.txhdp     = ss_regs + CPSW2_STATERAM_OFFSET;
@@ -2754,6 +2968,7 @@ static int cpsw_probe(struct platform_device *pdev)
 	dma_params.desc_align		= 16;
 	dma_params.has_ext_regs		= true;
 	dma_params.desc_hw_addr         = dma_params.desc_mem_phys;
+	dma_params.bus_freq_mhz		= cpsw->bus_freq_mhz;
 
 	cpsw->dma = cpdma_ctlr_create(&dma_params);
 	if (!cpsw->dma) {
@@ -2762,9 +2977,9 @@ static int cpsw_probe(struct platform_device *pdev)
 		goto clean_dt_ret;
 	}
 
-	cpsw->txch[0] = cpdma_chan_create(cpsw->dma, 0, cpsw_tx_handler, 0);
-	cpsw->rxch[0] = cpdma_chan_create(cpsw->dma, 0, cpsw_rx_handler, 1);
-	if (WARN_ON(!cpsw->rxch[0] || !cpsw->txch[0])) {
+	cpsw->txv[0].ch = cpdma_chan_create(cpsw->dma, 0, cpsw_tx_handler, 0);
+	cpsw->rxv[0].ch = cpdma_chan_create(cpsw->dma, 0, cpsw_rx_handler, 1);
+	if (WARN_ON(!cpsw->rxv[0].ch || !cpsw->txv[0].ch)) {
 		dev_err(priv->dev, "error initializing dma channels\n");
 		ret = -ENOMEM;
 		goto clean_dma_ret;
@@ -2782,6 +2997,12 @@ static int cpsw_probe(struct platform_device *pdev)
 		goto clean_dma_ret;
 	}
 
+	cpsw->cpts = cpts_create(cpsw->dev, cpts_regs, cpsw->dev->of_node);
+	if (IS_ERR(cpsw->cpts)) {
+		ret = PTR_ERR(cpsw->cpts);
+		goto clean_ale_ret;
+	}
+
 	ndev->irq = platform_get_irq(pdev, 1);
 	if (ndev->irq < 0) {
 		dev_err(priv->dev, "error getting irq resource\n");
@@ -2840,6 +3061,7 @@ static int cpsw_probe(struct platform_device *pdev)
 	ndev->ethtool_ops = &cpsw_ethtool_ops;
 	netif_napi_add(ndev, &cpsw->napi_rx, cpsw_rx_poll, CPSW_POLL_WEIGHT);
 	netif_tx_napi_add(ndev, &cpsw->napi_tx, cpsw_tx_poll, CPSW_POLL_WEIGHT);
+	cpsw_split_res(ndev);
 
 	/* register the network device */
 	SET_NETDEV_DEV(ndev, &pdev->dev);
@@ -2897,6 +3119,7 @@ static int cpsw_remove(struct platform_device *pdev)
 		unregister_netdev(cpsw->slaves[1].ndev);
 	unregister_netdev(ndev);
 
+	cpts_release(cpsw->cpts);
 	cpsw_ale_destroy(cpsw->ale);
 	cpdma_ctlr_destroy(cpsw->dma);
 	cpsw_remove_dt(pdev);
@@ -2942,6 +3165,8 @@ static int cpsw_resume(struct device *dev)
 	/* Select default pin state */
 	pinctrl_pm_select_default_state(dev);
 
+	/* shut up ASSERT_RTNL() warning in netif_set_real_num_tx/rx_queues */
+	rtnl_lock();
 	if (cpsw->data.dual_emac) {
 		int i;
 
@@ -2953,6 +3178,8 @@ static int cpsw_resume(struct device *dev)
 		if (netif_running(ndev))
 			cpsw_ndo_open(ndev);
 	}
+	rtnl_unlock();
+
 	return 0;
 }
 #endif
diff --git a/drivers/net/ethernet/ti/cpsw.h b/drivers/net/ethernet/ti/cpsw.h
index 16b54c6f32c2..6c3037aa2cd3 100644
--- a/drivers/net/ethernet/ti/cpsw.h
+++ b/drivers/net/ethernet/ti/cpsw.h
@@ -31,8 +31,6 @@ struct cpsw_platform_data {
 	u32	channels;	/* number of cpdma channels (symmetric) */
 	u32	slaves;		/* number of slave cpgmac ports */
 	u32	active_slave; /* time stamping, ethtool and SIOCGMIIPHY slave */
-	u32	cpts_clock_mult;  /* convert input clock ticks to nanoseconds */
-	u32	cpts_clock_shift; /* convert input clock ticks to nanoseconds */
 	u32	ale_entries;	/* ale table size */
 	u32	bd_ram_size;  /*buffer descriptor ram size */
 	u32	mac_control;	/* Mac control register */
diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 85a55b4ff8c0..0c0d48e5bea4 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -31,10 +31,8 @@
 
 #include "cpts.h"
 
-#ifdef CONFIG_TI_CPTS
-
-#define cpts_read32(c, r)	__raw_readl(&c->reg->r)
-#define cpts_write32(c, v, r)	__raw_writel(v, &c->reg->r)
+#define cpts_read32(c, r)	readl_relaxed(&c->reg->r)
+#define cpts_write32(c, v, r)	writel_relaxed(v, &c->reg->r)
 
 static int event_expired(struct cpts_event *event)
 {
@@ -59,6 +57,26 @@ static int cpts_fifo_pop(struct cpts *cpts, u32 *high, u32 *low)
 	return -1;
 }
 
+static int cpts_purge_events(struct cpts *cpts)
+{
+	struct list_head *this, *next;
+	struct cpts_event *event;
+	int removed = 0;
+
+	list_for_each_safe(this, next, &cpts->events) {
+		event = list_entry(this, struct cpts_event, list);
+		if (event_expired(event)) {
+			list_del_init(&event->list);
+			list_add(&event->list, &cpts->pool);
+			++removed;
+		}
+	}
+
+	if (removed)
+		pr_debug("cpts: event pool cleaned up %d\n", removed);
+	return removed ? 0 : -1;
+}
+
 /*
  * Returns zero if matching event type was found.
  */
@@ -71,10 +89,12 @@ static int cpts_fifo_read(struct cpts *cpts, int match)
 	for (i = 0; i < CPTS_FIFO_DEPTH; i++) {
 		if (cpts_fifo_pop(cpts, &hi, &lo))
 			break;
-		if (list_empty(&cpts->pool)) {
-			pr_err("cpts: event pool is empty\n");
+
+		if (list_empty(&cpts->pool) && cpts_purge_events(cpts)) {
+			pr_err("cpts: event pool empty\n");
 			return -1;
 		}
+
 		event = list_first_entry(&cpts->pool, struct cpts_event, list);
 		event->tmo = jiffies + 2;
 		event->high = hi;
@@ -223,27 +243,9 @@ static void cpts_overflow_check(struct work_struct *work)
 	struct timespec64 ts;
 	struct cpts *cpts = container_of(work, struct cpts, overflow_work.work);
 
-	cpts_write32(cpts, CPTS_EN, control);
-	cpts_write32(cpts, TS_PEND_EN, int_enable);
 	cpts_ptp_gettime(&cpts->info, &ts);
 	pr_debug("cpts overflow check at %lld.%09lu\n", ts.tv_sec, ts.tv_nsec);
-	schedule_delayed_work(&cpts->overflow_work, CPTS_OVERFLOW_PERIOD);
-}
-
-static void cpts_clk_init(struct device *dev, struct cpts *cpts)
-{
-	cpts->refclk = devm_clk_get(dev, "cpts");
-	if (IS_ERR(cpts->refclk)) {
-		dev_err(dev, "Failed to get cpts refclk\n");
-		cpts->refclk = NULL;
-		return;
-	}
-	clk_prepare_enable(cpts->refclk);
-}
-
-static void cpts_clk_release(struct cpts *cpts)
-{
-	clk_disable(cpts->refclk);
+	schedule_delayed_work(&cpts->overflow_work, cpts->ov_check_period);
 }
 
 static int cpts_match(struct sk_buff *skb, unsigned int ptp_class,
@@ -334,6 +336,7 @@ void cpts_rx_timestamp(struct cpts *cpts, struct sk_buff *skb)
 	memset(ssh, 0, sizeof(*ssh));
 	ssh->hwtstamp = ns_to_ktime(ns);
 }
+EXPORT_SYMBOL_GPL(cpts_rx_timestamp);
 
 void cpts_tx_timestamp(struct cpts *cpts, struct sk_buff *skb)
 {
@@ -349,60 +352,170 @@ void cpts_tx_timestamp(struct cpts *cpts, struct sk_buff *skb)
 	ssh.hwtstamp = ns_to_ktime(ns);
 	skb_tstamp_tx(skb, &ssh);
 }
+EXPORT_SYMBOL_GPL(cpts_tx_timestamp);
 
-#endif /*CONFIG_TI_CPTS*/
-
-int cpts_register(struct device *dev, struct cpts *cpts,
-		  u32 mult, u32 shift)
+int cpts_register(struct cpts *cpts)
 {
-#ifdef CONFIG_TI_CPTS
 	int err, i;
-	unsigned long flags;
-
-	cpts->info = cpts_info;
-	cpts->clock = ptp_clock_register(&cpts->info, dev);
-	if (IS_ERR(cpts->clock)) {
-		err = PTR_ERR(cpts->clock);
-		cpts->clock = NULL;
-		return err;
-	}
-	spin_lock_init(&cpts->lock);
-
-	cpts->cc.read = cpts_systim_read;
-	cpts->cc.mask = CLOCKSOURCE_MASK(32);
-	cpts->cc_mult = mult;
-	cpts->cc.mult = mult;
-	cpts->cc.shift = shift;
 
 	INIT_LIST_HEAD(&cpts->events);
 	INIT_LIST_HEAD(&cpts->pool);
 	for (i = 0; i < CPTS_MAX_EVENTS; i++)
 		list_add(&cpts->pool_data[i].list, &cpts->pool);
 
-	cpts_clk_init(dev, cpts);
+	clk_enable(cpts->refclk);
+
 	cpts_write32(cpts, CPTS_EN, control);
 	cpts_write32(cpts, TS_PEND_EN, int_enable);
 
-	spin_lock_irqsave(&cpts->lock, flags);
 	timecounter_init(&cpts->tc, &cpts->cc, ktime_to_ns(ktime_get_real()));
-	spin_unlock_irqrestore(&cpts->lock, flags);
-
-	INIT_DELAYED_WORK(&cpts->overflow_work, cpts_overflow_check);
-	schedule_delayed_work(&cpts->overflow_work, CPTS_OVERFLOW_PERIOD);
 
+	cpts->clock = ptp_clock_register(&cpts->info, cpts->dev);
+	if (IS_ERR(cpts->clock)) {
+		err = PTR_ERR(cpts->clock);
+		cpts->clock = NULL;
+		goto err_ptp;
+	}
 	cpts->phc_index = ptp_clock_index(cpts->clock);
-#endif
+
+	schedule_delayed_work(&cpts->overflow_work, cpts->ov_check_period);
 	return 0;
+
+err_ptp:
+	clk_disable(cpts->refclk);
+	return err;
 }
+EXPORT_SYMBOL_GPL(cpts_register);
 
 void cpts_unregister(struct cpts *cpts)
 {
-#ifdef CONFIG_TI_CPTS
-	if (cpts->clock) {
-		ptp_clock_unregister(cpts->clock);
-		cancel_delayed_work_sync(&cpts->overflow_work);
+	if (WARN_ON(!cpts->clock))
+		return;
+
+	cancel_delayed_work_sync(&cpts->overflow_work);
+
+	ptp_clock_unregister(cpts->clock);
+	cpts->clock = NULL;
+
+	cpts_write32(cpts, 0, int_enable);
+	cpts_write32(cpts, 0, control);
+
+	clk_disable(cpts->refclk);
+}
+EXPORT_SYMBOL_GPL(cpts_unregister);
+
+static void cpts_calc_mult_shift(struct cpts *cpts)
+{
+	u64 frac, maxsec, ns;
+	u32 freq;
+
+	freq = clk_get_rate(cpts->refclk);
+
+	/* Calc the maximum number of seconds which we can run before
+	 * wrapping around.
+	 */
+	maxsec = cpts->cc.mask;
+	do_div(maxsec, freq);
+	/* limit conversation rate to 10 sec as higher values will produce
+	 * too small mult factors and so reduce the conversion accuracy
+	 */
+	if (maxsec > 10)
+		maxsec = 10;
+
+	/* Calc overflow check period (maxsec / 2) */
+	cpts->ov_check_period = (HZ * maxsec) / 2;
+	dev_info(cpts->dev, "cpts: overflow check period %lu (jiffies)\n",
+		 cpts->ov_check_period);
+
+	if (cpts->cc.mult || cpts->cc.shift)
+		return;
+
+	clocks_calc_mult_shift(&cpts->cc.mult, &cpts->cc.shift,
+			       freq, NSEC_PER_SEC, maxsec);
+
+	frac = 0;
+	ns = cyclecounter_cyc2ns(&cpts->cc, freq, cpts->cc.mask, &frac);
+
+	dev_info(cpts->dev,
+		 "CPTS: ref_clk_freq:%u calc_mult:%u calc_shift:%u error:%lld nsec/sec\n",
+		 freq, cpts->cc.mult, cpts->cc.shift, (ns - NSEC_PER_SEC));
+}
+
+static int cpts_of_parse(struct cpts *cpts, struct device_node *node)
+{
+	int ret = -EINVAL;
+	u32 prop;
+
+	if (!of_property_read_u32(node, "cpts_clock_mult", &prop))
+		cpts->cc.mult = prop;
+
+	if (!of_property_read_u32(node, "cpts_clock_shift", &prop))
+		cpts->cc.shift = prop;
+
+	if ((cpts->cc.mult && !cpts->cc.shift) ||
+	    (!cpts->cc.mult && cpts->cc.shift))
+		goto of_error;
+
+	return 0;
+
+of_error:
+	dev_err(cpts->dev, "CPTS: Missing property in the DT.\n");
+	return ret;
+}
+
+struct cpts *cpts_create(struct device *dev, void __iomem *regs,
+			 struct device_node *node)
+{
+	struct cpts *cpts;
+	int ret;
+
+	cpts = devm_kzalloc(dev, sizeof(*cpts), GFP_KERNEL);
+	if (!cpts)
+		return ERR_PTR(-ENOMEM);
+
+	cpts->dev = dev;
+	cpts->reg = (struct cpsw_cpts __iomem *)regs;
+	spin_lock_init(&cpts->lock);
+	INIT_DELAYED_WORK(&cpts->overflow_work, cpts_overflow_check);
+
+	ret = cpts_of_parse(cpts, node);
+	if (ret)
+		return ERR_PTR(ret);
+
+	cpts->refclk = devm_clk_get(dev, "cpts");
+	if (IS_ERR(cpts->refclk)) {
+		dev_err(dev, "Failed to get cpts refclk\n");
+		return ERR_PTR(PTR_ERR(cpts->refclk));
 	}
-	if (cpts->refclk)
-		cpts_clk_release(cpts);
-#endif
+
+	clk_prepare(cpts->refclk);
+
+	cpts->cc.read = cpts_systim_read;
+	cpts->cc.mask = CLOCKSOURCE_MASK(32);
+	cpts->info = cpts_info;
+
+	cpts_calc_mult_shift(cpts);
+	/* save cc.mult original value as it can be modified
+	 * by cpts_ptp_adjfreq().
+	 */
+	cpts->cc_mult = cpts->cc.mult;
+
+	return cpts;
 }
+EXPORT_SYMBOL_GPL(cpts_create);
+
+void cpts_release(struct cpts *cpts)
+{
+	if (!cpts)
+		return;
+
+	if (WARN_ON(!cpts->refclk))
+		return;
+
+	clk_unprepare(cpts->refclk);
+}
+EXPORT_SYMBOL_GPL(cpts_release);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("TI CPTS driver");
+MODULE_AUTHOR("Richard Cochran <richardcochran@gmail.com>");
diff --git a/drivers/net/ethernet/ti/cpts.h b/drivers/net/ethernet/ti/cpts.h
index 69a46b92c7d6..c96eca2b1b46 100644
--- a/drivers/net/ethernet/ti/cpts.h
+++ b/drivers/net/ethernet/ti/cpts.h
@@ -20,11 +20,14 @@
 #ifndef _TI_CPTS_H_
 #define _TI_CPTS_H_
 
+#if IS_ENABLED(CONFIG_TI_CPTS)
+
 #include <linux/clk.h>
 #include <linux/clkdev.h>
 #include <linux/clocksource.h>
 #include <linux/device.h>
 #include <linux/list.h>
+#include <linux/of.h>
 #include <linux/ptp_clock_kernel.h>
 #include <linux/skbuff.h>
 #include <linux/timecounter.h>
@@ -94,9 +97,6 @@ enum {
 	CPTS_EV_TX,   /* Ethernet Transmit Event */
 };
 
-/* This covers any input clock up to about 500 MHz. */
-#define CPTS_OVERFLOW_PERIOD (HZ * 8)
-
 #define CPTS_FIFO_DEPTH 16
 #define CPTS_MAX_EVENTS 32
 
@@ -108,10 +108,10 @@ struct cpts_event {
 };
 
 struct cpts {
+	struct device *dev;
 	struct cpsw_cpts __iomem *reg;
 	int tx_enable;
 	int rx_enable;
-#ifdef CONFIG_TI_CPTS
 	struct ptp_clock_info info;
 	struct ptp_clock *clock;
 	spinlock_t lock; /* protects time registers */
@@ -124,22 +124,86 @@ struct cpts {
 	struct list_head events;
 	struct list_head pool;
 	struct cpts_event pool_data[CPTS_MAX_EVENTS];
-#endif
+	unsigned long ov_check_period;
 };
 
-#ifdef CONFIG_TI_CPTS
 void cpts_rx_timestamp(struct cpts *cpts, struct sk_buff *skb);
 void cpts_tx_timestamp(struct cpts *cpts, struct sk_buff *skb);
+int cpts_register(struct cpts *cpts);
+void cpts_unregister(struct cpts *cpts);
+struct cpts *cpts_create(struct device *dev, void __iomem *regs,
+			 struct device_node *node);
+void cpts_release(struct cpts *cpts);
+
+static inline void cpts_rx_enable(struct cpts *cpts, int enable)
+{
+	cpts->rx_enable = enable;
+}
+
+static inline bool cpts_is_rx_enabled(struct cpts *cpts)
+{
+	return !!cpts->rx_enable;
+}
+
+static inline void cpts_tx_enable(struct cpts *cpts, int enable)
+{
+	cpts->tx_enable = enable;
+}
+
+static inline bool cpts_is_tx_enabled(struct cpts *cpts)
+{
+	return !!cpts->tx_enable;
+}
+
 #else
+struct cpts;
+
 static inline void cpts_rx_timestamp(struct cpts *cpts, struct sk_buff *skb)
 {
 }
 static inline void cpts_tx_timestamp(struct cpts *cpts, struct sk_buff *skb)
 {
 }
+
+static inline
+struct cpts *cpts_create(struct device *dev, void __iomem *regs,
+			 struct device_node *node)
+{
+	return NULL;
+}
+
+static inline void cpts_release(struct cpts *cpts)
+{
+}
+
+static inline int
+cpts_register(struct cpts *cpts)
+{
+	return 0;
+}
+
+static inline void cpts_unregister(struct cpts *cpts)
+{
+}
+
+static inline void cpts_rx_enable(struct cpts *cpts, int enable)
+{
+}
+
+static inline bool cpts_is_rx_enabled(struct cpts *cpts)
+{
+	return false;
+}
+
+static inline void cpts_tx_enable(struct cpts *cpts, int enable)
+{
+}
+
+static inline bool cpts_is_tx_enabled(struct cpts *cpts)
+{
+	return false;
+}
 #endif
 
-int cpts_register(struct device *dev, struct cpts *cpts, u32 mult, u32 shift);
-void cpts_unregister(struct cpts *cpts);
 
 #endif
diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c
index c3f35f11a8fd..36518fc5c7cc 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.c
+++ b/drivers/net/ethernet/ti/davinci_cpdma.c
@@ -32,6 +32,7 @@
 #define CPDMA_RXCONTROL		0x14
 #define CPDMA_SOFTRESET		0x1c
 #define CPDMA_RXTEARDOWN	0x18
+#define CPDMA_TX_PRI0_RATE	0x30
 #define CPDMA_TXINTSTATRAW	0x80
 #define CPDMA_TXINTSTATMASKED	0x84
 #define CPDMA_TXINTMASKSET	0x88
@@ -68,6 +69,8 @@
 
 #define CPDMA_TEARDOWN_VALUE	0xfffffffc
 
+#define CPDMA_MAX_RLIM_CNT	16384
+
 struct cpdma_desc {
 	/* hardware fields */
 	u32			hw_next;
@@ -122,6 +125,33 @@ struct cpdma_chan {
 	struct cpdma_chan_stats		stats;
 	/* offsets into dmaregs */
 	int	int_set, int_clear, td;
+	int				weight;
+	u32				rate_factor;
+	u32				rate;
+};
+
+struct cpdma_control_info {
+	u32		reg;
+	u32		shift, mask;
+	int		access;
+#define ACCESS_RO	BIT(0)
+#define ACCESS_WO	BIT(1)
+#define ACCESS_RW	(ACCESS_RO | ACCESS_WO)
+};
+
+static struct cpdma_control_info controls[] = {
+	[CPDMA_TX_RLIM]		  = {CPDMA_DMACONTROL,	8,  0xffff, ACCESS_RW},
+	[CPDMA_CMD_IDLE]	  = {CPDMA_DMACONTROL,	3,  1,      ACCESS_WO},
+	[CPDMA_COPY_ERROR_FRAMES] = {CPDMA_DMACONTROL,	4,  1,      ACCESS_RW},
+	[CPDMA_RX_OFF_LEN_UPDATE] = {CPDMA_DMACONTROL,	2,  1,      ACCESS_RW},
+	[CPDMA_RX_OWNERSHIP_FLIP] = {CPDMA_DMACONTROL,	1,  1,      ACCESS_RW},
+	[CPDMA_TX_PRIO_FIXED]	  = {CPDMA_DMACONTROL,	0,  1,      ACCESS_RW},
+	[CPDMA_STAT_IDLE]	  = {CPDMA_DMASTATUS,	31, 1,      ACCESS_RO},
+	[CPDMA_STAT_TX_ERR_CODE]  = {CPDMA_DMASTATUS,	20, 0xf,    ACCESS_RW},
+	[CPDMA_STAT_TX_ERR_CHAN]  = {CPDMA_DMASTATUS,	16, 0x7,    ACCESS_RW},
+	[CPDMA_STAT_RX_ERR_CODE]  = {CPDMA_DMASTATUS,	12, 0xf,    ACCESS_RW},
+	[CPDMA_STAT_RX_ERR_CHAN]  = {CPDMA_DMASTATUS,	8,  0x7,    ACCESS_RW},
+	[CPDMA_RX_BUFFER_OFFSET]  = {CPDMA_RXBUFFOFS,	0,  0xffff, ACCESS_RW},
 };
 
 #define tx_chan_num(chan)	(chan)
@@ -253,6 +283,211 @@ static void cpdma_desc_free(struct cpdma_desc_pool *pool,
 	gen_pool_free(pool->gen_pool, (unsigned long)desc, pool->desc_size);
 }
 
+static int _cpdma_control_set(struct cpdma_ctlr *ctlr, int control, int value)
+{
+	struct cpdma_control_info *info = &controls[control];
+	u32 val;
+
+	if (!ctlr->params.has_ext_regs)
+		return -ENOTSUPP;
+
+	if (ctlr->state != CPDMA_STATE_ACTIVE)
+		return -EINVAL;
+
+	if (control < 0 || control >= ARRAY_SIZE(controls))
+		return -ENOENT;
+
+	if ((info->access & ACCESS_WO) != ACCESS_WO)
+		return -EPERM;
+
+	val  = dma_reg_read(ctlr, info->reg);
+	val &= ~(info->mask << info->shift);
+	val |= (value & info->mask) << info->shift;
+	dma_reg_write(ctlr, info->reg, val);
+
+	return 0;
+}
+
+static int _cpdma_control_get(struct cpdma_ctlr *ctlr, int control)
+{
+	struct cpdma_control_info *info = &controls[control];
+	int ret;
+
+	if (!ctlr->params.has_ext_regs)
+		return -ENOTSUPP;
+
+	if (ctlr->state != CPDMA_STATE_ACTIVE)
+		return -EINVAL;
+
+	if (control < 0 || control >= ARRAY_SIZE(controls))
+		return -ENOENT;
+
+	if ((info->access & ACCESS_RO) != ACCESS_RO)
+		return -EPERM;
+
+	ret = (dma_reg_read(ctlr, info->reg) >> info->shift) & info->mask;
+	return ret;
+}
+
+/* cpdma_chan_set_chan_shaper - set shaper for a channel
+ * Has to be called under ctlr lock
+ */
+static int cpdma_chan_set_chan_shaper(struct cpdma_chan *chan)
+{
+	struct cpdma_ctlr *ctlr = chan->ctlr;
+	u32 rate_reg;
+	u32 rmask;
+	int ret;
+
+	if (!chan->rate)
+		return 0;
+
+	rate_reg = CPDMA_TX_PRI0_RATE + 4 * chan->chan_num;
+	dma_reg_write(ctlr, rate_reg, chan->rate_factor);
+
+	rmask = _cpdma_control_get(ctlr, CPDMA_TX_RLIM);
+	rmask |= chan->mask;
+
+	ret = _cpdma_control_set(ctlr, CPDMA_TX_RLIM, rmask);
+	return ret;
+}
+
+static int cpdma_chan_on(struct cpdma_chan *chan)
+{
+	struct cpdma_ctlr *ctlr = chan->ctlr;
+	struct cpdma_desc_pool	*pool = ctlr->pool;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	if (chan->state != CPDMA_STATE_IDLE) {
+		spin_unlock_irqrestore(&chan->lock, flags);
+		return -EBUSY;
+	}
+	if (ctlr->state != CPDMA_STATE_ACTIVE) {
+		spin_unlock_irqrestore(&chan->lock, flags);
+		return -EINVAL;
+	}
+	dma_reg_write(ctlr, chan->int_set, chan->mask);
+	chan->state = CPDMA_STATE_ACTIVE;
+	if (chan->head) {
+		chan_write(chan, hdp, desc_phys(pool, chan->head));
+		if (chan->rxfree)
+			chan_write(chan, rxfree, chan->count);
+	}
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+	return 0;
+}
+
+/* cpdma_chan_fit_rate - set rate for a channel and check if it's possible.
+ * rmask - mask of rate limited channels
+ * Returns min rate in Kb/s
+ */
+static int cpdma_chan_fit_rate(struct cpdma_chan *ch, u32 rate,
+			       u32 *rmask, int *prio_mode)
+{
+	struct cpdma_ctlr *ctlr = ch->ctlr;
+	struct cpdma_chan *chan;
+	u32 old_rate = ch->rate;
+	u32 new_rmask = 0;
+	int rlim = 1;
+	int i;
+
+	*prio_mode = 0;
+	for (i = tx_chan_num(0); i < tx_chan_num(CPDMA_MAX_CHANNELS); i++) {
+		chan = ctlr->channels[i];
+		if (!chan) {
+			rlim = 0;
+			continue;
+		}
+
+		if (chan == ch)
+			chan->rate = rate;
+
+		if (chan->rate) {
+			if (rlim) {
+				new_rmask |= chan->mask;
+			} else {
+				ch->rate = old_rate;
+				dev_err(ctlr->dev, "Prev channel of %dch is not rate limited\n",
+					chan->chan_num);
+				return -EINVAL;
+			}
+		} else {
+			*prio_mode = 1;
+			rlim = 0;
+		}
+	}
+
+	*rmask = new_rmask;
+	return 0;
+}
+
+static u32 cpdma_chan_set_factors(struct cpdma_ctlr *ctlr,
+				  struct cpdma_chan *ch)
+{
+	u32 delta = UINT_MAX, prev_delta = UINT_MAX, best_delta = UINT_MAX;
+	u32 best_send_cnt = 0, best_idle_cnt = 0;
+	u32 new_rate, best_rate = 0, rate_reg;
+	u64 send_cnt, idle_cnt;
+	u32 min_send_cnt, freq;
+	u64 divident, divisor;
+
+	if (!ch->rate) {
+		ch->rate_factor = 0;
+		goto set_factor;
+	}
+
+	freq = ctlr->params.bus_freq_mhz * 1000 * 32;
+	if (!freq) {
+		dev_err(ctlr->dev, "The bus frequency is not set\n");
+		return -EINVAL;
+	}
+
+	min_send_cnt = freq - ch->rate;
+	send_cnt = DIV_ROUND_UP(min_send_cnt, ch->rate);
+	while (send_cnt <= CPDMA_MAX_RLIM_CNT) {
+		divident = ch->rate * send_cnt;
+		divisor = min_send_cnt;
+		idle_cnt = DIV_ROUND_CLOSEST_ULL(divident, divisor);
+
+		divident = freq * idle_cnt;
+		divisor = idle_cnt + send_cnt;
+		new_rate = DIV_ROUND_CLOSEST_ULL(divident, divisor);
+
+		delta = new_rate >= ch->rate ? new_rate - ch->rate : delta;
+		if (delta < best_delta) {
+			best_delta = delta;
+			best_send_cnt = send_cnt;
+			best_idle_cnt = idle_cnt;
+			best_rate = new_rate;
+
+			if (!delta)
+				break;
+		}
+
+		if (prev_delta >= delta) {
+			prev_delta = delta;
+			send_cnt++;
+			continue;
+		}
+
+		idle_cnt++;
+		divident = freq * idle_cnt;
+		send_cnt = DIV_ROUND_CLOSEST_ULL(divident, ch->rate);
+		send_cnt -= idle_cnt;
+		prev_delta = UINT_MAX;
+	}
+
+	ch->rate = best_rate;
+	ch->rate_factor = best_send_cnt | (best_idle_cnt << 16);
+
+set_factor:
+	rate_reg = CPDMA_TX_PRI0_RATE + 4 * ch->chan_num;
+	dma_reg_write(ctlr, rate_reg, ch->rate_factor);
+	return 0;
+}
+
 struct cpdma_ctlr *cpdma_ctlr_create(struct cpdma_params *params)
 {
 	struct cpdma_ctlr *ctlr;
@@ -283,8 +518,9 @@ EXPORT_SYMBOL_GPL(cpdma_ctlr_create);
 
 int cpdma_ctlr_start(struct cpdma_ctlr *ctlr)
 {
+	struct cpdma_chan *chan;
 	unsigned long flags;
-	int i;
+	int i, prio_mode;
 
 	spin_lock_irqsave(&ctlr->lock, flags);
 	if (ctlr->state != CPDMA_STATE_IDLE) {
@@ -320,10 +556,22 @@ int cpdma_ctlr_start(struct cpdma_ctlr *ctlr)
 
 	ctlr->state = CPDMA_STATE_ACTIVE;
 
+	prio_mode = 0;
 	for (i = 0; i < ARRAY_SIZE(ctlr->channels); i++) {
-		if (ctlr->channels[i])
-			cpdma_chan_start(ctlr->channels[i]);
+		chan = ctlr->channels[i];
+		if (chan) {
+			cpdma_chan_set_chan_shaper(chan);
+			cpdma_chan_on(chan);
+
+			/* off prio mode if all tx channels are rate limited */
+			if (is_tx_chan(chan) && !chan->rate)
+				prio_mode = 1;
+		}
 	}
+
+	_cpdma_control_set(ctlr, CPDMA_TX_PRIO_FIXED, prio_mode);
+	_cpdma_control_set(ctlr, CPDMA_RX_BUFFER_OFFSET, 0);
+
 	spin_unlock_irqrestore(&ctlr->lock, flags);
 	return 0;
 }
@@ -335,7 +583,7 @@ int cpdma_ctlr_stop(struct cpdma_ctlr *ctlr)
 	int i;
 
 	spin_lock_irqsave(&ctlr->lock, flags);
-	if (ctlr->state == CPDMA_STATE_TEARDOWN) {
+	if (ctlr->state != CPDMA_STATE_ACTIVE) {
 		spin_unlock_irqrestore(&ctlr->lock, flags);
 		return -EINVAL;
 	}
@@ -422,30 +670,205 @@ u32 cpdma_ctrl_txchs_state(struct cpdma_ctlr *ctlr)
 }
 EXPORT_SYMBOL_GPL(cpdma_ctrl_txchs_state);
 
+static void cpdma_chan_set_descs(struct cpdma_ctlr *ctlr,
+				 int rx, int desc_num,
+				 int per_ch_desc)
+{
+	struct cpdma_chan *chan, *most_chan = NULL;
+	int desc_cnt = desc_num;
+	int most_dnum = 0;
+	int min, max, i;
+
+	if (!desc_num)
+		return;
+
+	if (rx) {
+		min = rx_chan_num(0);
+		max = rx_chan_num(CPDMA_MAX_CHANNELS);
+	} else {
+		min = tx_chan_num(0);
+		max = tx_chan_num(CPDMA_MAX_CHANNELS);
+	}
+
+	for (i = min; i < max; i++) {
+		chan = ctlr->channels[i];
+		if (!chan)
+			continue;
+
+		if (chan->weight)
+			chan->desc_num = (chan->weight * desc_num) / 100;
+		else
+			chan->desc_num = per_ch_desc;
+
+		desc_cnt -= chan->desc_num;
+
+		if (most_dnum < chan->desc_num) {
+			most_dnum = chan->desc_num;
+			most_chan = chan;
+		}
+	}
+	/* use remains */
+	most_chan->desc_num += desc_cnt;
+}
+
 /**
  * cpdma_chan_split_pool - Splits ctrl pool between all channels.
  * Has to be called under ctlr lock
  */
-static void cpdma_chan_split_pool(struct cpdma_ctlr *ctlr)
+static int cpdma_chan_split_pool(struct cpdma_ctlr *ctlr)
 {
+	int tx_per_ch_desc = 0, rx_per_ch_desc = 0;
 	struct cpdma_desc_pool *pool = ctlr->pool;
+	int free_rx_num = 0, free_tx_num = 0;
+	int rx_weight = 0, tx_weight = 0;
+	int tx_desc_num, rx_desc_num;
 	struct cpdma_chan *chan;
-	int ch_desc_num;
-	int i;
+	int i, tx_num = 0;
 
 	if (!ctlr->chan_num)
-		return;
-
-	/* calculate average size of pool slice */
-	ch_desc_num = pool->num_desc / ctlr->chan_num;
+		return 0;
 
-	/* split ctlr pool */
 	for (i = 0; i < ARRAY_SIZE(ctlr->channels); i++) {
 		chan = ctlr->channels[i];
-		if (chan)
-			chan->desc_num = ch_desc_num;
+		if (!chan)
+			continue;
+
+		if (is_rx_chan(chan)) {
+			if (!chan->weight)
+				free_rx_num++;
+			rx_weight += chan->weight;
+		} else {
+			if (!chan->weight)
+				free_tx_num++;
+			tx_weight += chan->weight;
+			tx_num++;
+		}
+	}
+
+	if (rx_weight > 100 || tx_weight > 100)
+		return -EINVAL;
+
+	tx_desc_num = (tx_num * pool->num_desc) / ctlr->chan_num;
+	rx_desc_num = pool->num_desc - tx_desc_num;
+
+	if (free_tx_num) {
+		tx_per_ch_desc = tx_desc_num - (tx_weight * tx_desc_num) / 100;
+		tx_per_ch_desc /= free_tx_num;
+	}
+	if (free_rx_num) {
+		rx_per_ch_desc = rx_desc_num - (rx_weight * rx_desc_num) / 100;
+		rx_per_ch_desc /= free_rx_num;
+	}
+
+	cpdma_chan_set_descs(ctlr, 0, tx_desc_num, tx_per_ch_desc);
+	cpdma_chan_set_descs(ctlr, 1, rx_desc_num, rx_per_ch_desc);
+
+	return 0;
+}
+
+/* cpdma_chan_set_weight - set weight of a channel in percentage.
+ * Tx and Rx channels have separate weights. That is 100% for RX
+ * and 100% for Tx. The weight is used to split cpdma resources
+ * in correct proportion required by the channels, including number
+ * of descriptors. The channel rate is not enough to know the
+ * weight of a channel as the maximum rate of an interface is needed.
+ * If weight = 0, then channel uses rest of descriptors leaved by
+ * weighted channels.
+ */
+int cpdma_chan_set_weight(struct cpdma_chan *ch, int weight)
+{
+	struct cpdma_ctlr *ctlr = ch->ctlr;
+	unsigned long flags, ch_flags;
+	int ret;
+
+	spin_lock_irqsave(&ctlr->lock, flags);
+	spin_lock_irqsave(&ch->lock, ch_flags);
+	if (ch->weight == weight) {
+		spin_unlock_irqrestore(&ch->lock, ch_flags);
+		spin_unlock_irqrestore(&ctlr->lock, flags);
+		return 0;
 	}
+	ch->weight = weight;
+	spin_unlock_irqrestore(&ch->lock, ch_flags);
+
+	/* re-split pool using new channel weight */
+	ret = cpdma_chan_split_pool(ctlr);
+	spin_unlock_irqrestore(&ctlr->lock, flags);
+	return ret;
 }
+EXPORT_SYMBOL_GPL(cpdma_chan_set_weight);
+
+/* cpdma_chan_get_min_rate - get minimum allowed rate for channel
+ * Should be called before cpdma_chan_set_rate.
+ * Returns min rate in Kb/s
+ */
+u32 cpdma_chan_get_min_rate(struct cpdma_ctlr *ctlr)
+{
+	unsigned int divident, divisor;
+
+	divident = ctlr->params.bus_freq_mhz * 32 * 1000;
+	divisor = 1 + CPDMA_MAX_RLIM_CNT;
+
+	return DIV_ROUND_UP(divident, divisor);
+}
+EXPORT_SYMBOL_GPL(cpdma_chan_get_min_rate);
+
+/* cpdma_chan_set_rate - limits bandwidth for transmit channel.
+ * The bandwidth * limited channels have to be in order beginning from lowest.
+ * ch - transmit channel the bandwidth is configured for
+ * rate - bandwidth in Kb/s, if 0 - then off shaper
+ */
+int cpdma_chan_set_rate(struct cpdma_chan *ch, u32 rate)
+{
+	struct cpdma_ctlr *ctlr = ch->ctlr;
+	unsigned long flags, ch_flags;
+	int ret, prio_mode;
+	u32 rmask;
+
+	if (!ch || !is_tx_chan(ch))
+		return -EINVAL;
+
+	if (ch->rate == rate)
+		return rate;
+
+	spin_lock_irqsave(&ctlr->lock, flags);
+	spin_lock_irqsave(&ch->lock, ch_flags);
+
+	ret = cpdma_chan_fit_rate(ch, rate, &rmask, &prio_mode);
+	if (ret)
+		goto err;
+
+	ret = cpdma_chan_set_factors(ctlr, ch);
+	if (ret)
+		goto err;
+
+	spin_unlock_irqrestore(&ch->lock, ch_flags);
+
+	/* on shapers */
+	_cpdma_control_set(ctlr, CPDMA_TX_RLIM, rmask);
+	_cpdma_control_set(ctlr, CPDMA_TX_PRIO_FIXED, prio_mode);
+	spin_unlock_irqrestore(&ctlr->lock, flags);
+	return ret;
+
+err:
+	spin_unlock_irqrestore(&ch->lock, ch_flags);
+	spin_unlock_irqrestore(&ctlr->lock, flags);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(cpdma_chan_set_rate);
+
+u32 cpdma_chan_get_rate(struct cpdma_chan *ch)
+{
+	unsigned long flags;
+	u32 rate;
+
+	spin_lock_irqsave(&ch->lock, flags);
+	rate = ch->rate;
+	spin_unlock_irqrestore(&ch->lock, flags);
+
+	return rate;
+}
+EXPORT_SYMBOL_GPL(cpdma_chan_get_rate);
 
 struct cpdma_chan *cpdma_chan_create(struct cpdma_ctlr *ctlr, int chan_num,
 				     cpdma_handler_fn handler, int rx_type)
@@ -474,7 +897,9 @@ struct cpdma_chan *cpdma_chan_create(struct cpdma_ctlr *ctlr, int chan_num,
 	chan->state	= CPDMA_STATE_IDLE;
 	chan->chan_num	= chan_num;
 	chan->handler	= handler;
+	chan->rate	= 0;
 	chan->desc_num = ctlr->pool->num_desc / 2;
+	chan->weight	= 0;
 
 	if (is_rx_chan(chan)) {
 		chan->hdp	= ctlr->params.rxhdp + offset;
@@ -533,7 +958,7 @@ int cpdma_chan_destroy(struct cpdma_chan *chan)
 		cpdma_chan_stop(chan);
 	ctlr->channels[chan->chan_num] = NULL;
 	ctlr->chan_num--;
-
+	devm_kfree(ctlr->dev, chan);
 	cpdma_chan_split_pool(ctlr);
 
 	spin_unlock_irqrestore(&ctlr->lock, flags);
@@ -768,28 +1193,20 @@ EXPORT_SYMBOL_GPL(cpdma_chan_process);
 
 int cpdma_chan_start(struct cpdma_chan *chan)
 {
-	struct cpdma_ctlr	*ctlr = chan->ctlr;
-	struct cpdma_desc_pool	*pool = ctlr->pool;
-	unsigned long		flags;
+	struct cpdma_ctlr *ctlr = chan->ctlr;
+	unsigned long flags;
+	int ret;
 
-	spin_lock_irqsave(&chan->lock, flags);
-	if (chan->state != CPDMA_STATE_IDLE) {
-		spin_unlock_irqrestore(&chan->lock, flags);
-		return -EBUSY;
-	}
-	if (ctlr->state != CPDMA_STATE_ACTIVE) {
-		spin_unlock_irqrestore(&chan->lock, flags);
-		return -EINVAL;
-	}
-	dma_reg_write(ctlr, chan->int_set, chan->mask);
-	chan->state = CPDMA_STATE_ACTIVE;
-	if (chan->head) {
-		chan_write(chan, hdp, desc_phys(pool, chan->head));
-		if (chan->rxfree)
-			chan_write(chan, rxfree, chan->count);
-	}
+	spin_lock_irqsave(&ctlr->lock, flags);
+	ret = cpdma_chan_set_chan_shaper(chan);
+	spin_unlock_irqrestore(&ctlr->lock, flags);
+	if (ret)
+		return ret;
+
+	ret = cpdma_chan_on(chan);
+	if (ret)
+		return ret;
 
-	spin_unlock_irqrestore(&chan->lock, flags);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(cpdma_chan_start);
@@ -874,93 +1291,27 @@ int cpdma_chan_int_ctrl(struct cpdma_chan *chan, bool enable)
 	return 0;
 }
 
-struct cpdma_control_info {
-	u32		reg;
-	u32		shift, mask;
-	int		access;
-#define ACCESS_RO	BIT(0)
-#define ACCESS_WO	BIT(1)
-#define ACCESS_RW	(ACCESS_RO | ACCESS_WO)
-};
-
-static struct cpdma_control_info controls[] = {
-	[CPDMA_CMD_IDLE]	  = {CPDMA_DMACONTROL,	3,  1,      ACCESS_WO},
-	[CPDMA_COPY_ERROR_FRAMES] = {CPDMA_DMACONTROL,	4,  1,      ACCESS_RW},
-	[CPDMA_RX_OFF_LEN_UPDATE] = {CPDMA_DMACONTROL,	2,  1,      ACCESS_RW},
-	[CPDMA_RX_OWNERSHIP_FLIP] = {CPDMA_DMACONTROL,	1,  1,      ACCESS_RW},
-	[CPDMA_TX_PRIO_FIXED]	  = {CPDMA_DMACONTROL,	0,  1,      ACCESS_RW},
-	[CPDMA_STAT_IDLE]	  = {CPDMA_DMASTATUS,	31, 1,      ACCESS_RO},
-	[CPDMA_STAT_TX_ERR_CODE]  = {CPDMA_DMASTATUS,	20, 0xf,    ACCESS_RW},
-	[CPDMA_STAT_TX_ERR_CHAN]  = {CPDMA_DMASTATUS,	16, 0x7,    ACCESS_RW},
-	[CPDMA_STAT_RX_ERR_CODE]  = {CPDMA_DMASTATUS,	12, 0xf,    ACCESS_RW},
-	[CPDMA_STAT_RX_ERR_CHAN]  = {CPDMA_DMASTATUS,	8,  0x7,    ACCESS_RW},
-	[CPDMA_RX_BUFFER_OFFSET]  = {CPDMA_RXBUFFOFS,	0,  0xffff, ACCESS_RW},
-};
-
 int cpdma_control_get(struct cpdma_ctlr *ctlr, int control)
 {
 	unsigned long flags;
-	struct cpdma_control_info *info = &controls[control];
 	int ret;
 
 	spin_lock_irqsave(&ctlr->lock, flags);
-
-	ret = -ENOTSUPP;
-	if (!ctlr->params.has_ext_regs)
-		goto unlock_ret;
-
-	ret = -EINVAL;
-	if (ctlr->state != CPDMA_STATE_ACTIVE)
-		goto unlock_ret;
-
-	ret = -ENOENT;
-	if (control < 0 || control >= ARRAY_SIZE(controls))
-		goto unlock_ret;
-
-	ret = -EPERM;
-	if ((info->access & ACCESS_RO) != ACCESS_RO)
-		goto unlock_ret;
-
-	ret = (dma_reg_read(ctlr, info->reg) >> info->shift) & info->mask;
-
-unlock_ret:
+	ret = _cpdma_control_get(ctlr, control);
 	spin_unlock_irqrestore(&ctlr->lock, flags);
+
 	return ret;
 }
 
 int cpdma_control_set(struct cpdma_ctlr *ctlr, int control, int value)
 {
 	unsigned long flags;
-	struct cpdma_control_info *info = &controls[control];
 	int ret;
-	u32 val;
 
 	spin_lock_irqsave(&ctlr->lock, flags);
-
-	ret = -ENOTSUPP;
-	if (!ctlr->params.has_ext_regs)
-		goto unlock_ret;
-
-	ret = -EINVAL;
-	if (ctlr->state != CPDMA_STATE_ACTIVE)
-		goto unlock_ret;
-
-	ret = -ENOENT;
-	if (control < 0 || control >= ARRAY_SIZE(controls))
-		goto unlock_ret;
-
-	ret = -EPERM;
-	if ((info->access & ACCESS_WO) != ACCESS_WO)
-		goto unlock_ret;
-
-	val  = dma_reg_read(ctlr, info->reg);
-	val &= ~(info->mask << info->shift);
-	val |= (value & info->mask) << info->shift;
-	dma_reg_write(ctlr, info->reg, val);
-	ret = 0;
-
-unlock_ret:
+	ret = _cpdma_control_set(ctlr, control, value);
 	spin_unlock_irqrestore(&ctlr->lock, flags);
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(cpdma_control_set);
diff --git a/drivers/net/ethernet/ti/davinci_cpdma.h b/drivers/net/ethernet/ti/davinci_cpdma.h
index a07b22b12bc1..4a167db2abab 100644
--- a/drivers/net/ethernet/ti/davinci_cpdma.h
+++ b/drivers/net/ethernet/ti/davinci_cpdma.h
@@ -36,6 +36,7 @@ struct cpdma_params {
 	u32			desc_hw_addr;
 	int			desc_mem_size;
 	int			desc_align;
+	u32			bus_freq_mhz;
 
 	/*
 	 * Some instances of embedded cpdma controllers have extra control and
@@ -90,8 +91,13 @@ int cpdma_chan_int_ctrl(struct cpdma_chan *chan, bool enable);
 u32 cpdma_ctrl_rxchs_state(struct cpdma_ctlr *ctlr);
 u32 cpdma_ctrl_txchs_state(struct cpdma_ctlr *ctlr);
 bool cpdma_check_free_tx_desc(struct cpdma_chan *chan);
+int cpdma_chan_set_weight(struct cpdma_chan *ch, int weight);
+int cpdma_chan_set_rate(struct cpdma_chan *ch, u32 rate);
+u32 cpdma_chan_get_rate(struct cpdma_chan *ch);
+u32 cpdma_chan_get_min_rate(struct cpdma_ctlr *ctlr);
 
 enum cpdma_control {
+	CPDMA_TX_RLIM,			/* read-write */
 	CPDMA_CMD_IDLE,			/* write-only */
 	CPDMA_COPY_ERROR_FRAMES,	/* read-write */
 	CPDMA_RX_OFF_LEN_UPDATE,	/* read-write */
diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c
index 84fbe5714f8b..481c7bf0395b 100644
--- a/drivers/net/ethernet/ti/davinci_emac.c
+++ b/drivers/net/ethernet/ti/davinci_emac.c
@@ -1767,6 +1767,7 @@ static int davinci_emac_try_get_mac(struct platform_device *pdev,
  */
 static int davinci_emac_probe(struct platform_device *pdev)
 {
+	struct device_node *np = pdev->dev.of_node;
 	int rc = 0;
 	struct resource *res, *res_ctrl;
 	struct net_device *ndev;
@@ -1805,7 +1806,7 @@ static int davinci_emac_probe(struct platform_device *pdev)
 	if (!pdata) {
 		dev_err(&pdev->dev, "no platform data\n");
 		rc = -ENODEV;
-		goto no_pdata;
+		goto err_free_netdev;
 	}
 
 	/* MAC addr and PHY mask , RMII enable info from platform_data */
@@ -1941,6 +1942,10 @@ no_cpdma_chan:
 		cpdma_chan_destroy(priv->rxchan);
 	cpdma_ctlr_destroy(priv->dma);
 no_pdata:
+	if (of_phy_is_fixed_link(np))
+		of_phy_deregister_fixed_link(np);
+	of_node_put(priv->phy_node);
+err_free_netdev:
 	free_netdev(ndev);
 	return rc;
 }
@@ -1956,6 +1961,7 @@ static int davinci_emac_remove(struct platform_device *pdev)
 {
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct emac_priv *priv = netdev_priv(ndev);
+	struct device_node *np = pdev->dev.of_node;
 
 	dev_notice(&ndev->dev, "DaVinci EMAC: davinci_emac_remove()\n");
 
@@ -1968,6 +1974,8 @@ static int davinci_emac_remove(struct platform_device *pdev)
 	unregister_netdev(ndev);
 	of_node_put(priv->phy_node);
 	pm_runtime_disable(&pdev->dev);
+	if (of_phy_is_fixed_link(np))
+		of_phy_deregister_fixed_link(np);
 	free_netdev(ndev);
 
 	return 0;
diff --git a/drivers/net/ethernet/ti/netcp.h b/drivers/net/ethernet/ti/netcp.h
index 17a26a429b71..0f58c584ae09 100644
--- a/drivers/net/ethernet/ti/netcp.h
+++ b/drivers/net/ethernet/ti/netcp.h
@@ -121,7 +121,7 @@ struct netcp_packet {
 	bool			rxtstamp_complete;
 	void			*ts_context;
 
-	int	(*txtstamp_complete)(void *ctx, struct netcp_packet *pkt);
+	void (*txtstamp)(void *ctx, struct sk_buff *skb);
 };
 
 static inline u32 *netcp_push_psdata(struct netcp_packet *p_info,
diff --git a/drivers/net/ethernet/ti/netcp_core.c b/drivers/net/ethernet/ti/netcp_core.c
index 32516661f180..c243335ed649 100644
--- a/drivers/net/ethernet/ti/netcp_core.c
+++ b/drivers/net/ethernet/ti/netcp_core.c
@@ -100,6 +100,11 @@ struct netcp_intf_modpriv {
 	void			*module_priv;
 };
 
+struct netcp_tx_cb {
+	void	*ts_context;
+	void	(*txtstamp)(void *context, struct sk_buff *skb);
+};
+
 static LIST_HEAD(netcp_devices);
 static LIST_HEAD(netcp_modules);
 static DEFINE_MUTEX(netcp_modules_lock);
@@ -544,6 +549,7 @@ int netcp_register_rxhook(struct netcp_intf *netcp_priv, int order,
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(netcp_register_rxhook);
 
 int netcp_unregister_rxhook(struct netcp_intf *netcp_priv, int order,
 			    netcp_hook_rtn *hook_rtn, void *hook_data)
@@ -566,6 +572,7 @@ int netcp_unregister_rxhook(struct netcp_intf *netcp_priv, int order,
 
 	return -ENOENT;
 }
+EXPORT_SYMBOL_GPL(netcp_unregister_rxhook);
 
 static void netcp_frag_free(bool is_frag, void *ptr)
 {
@@ -730,6 +737,7 @@ static int netcp_process_one_rx_packet(struct netcp_intf *netcp)
 
 	/* Call each of the RX hooks */
 	p_info.skb = skb;
+	skb->dev = netcp->ndev;
 	p_info.rxtstamp_complete = false;
 	list_for_each_entry(rx_hook, &netcp->rxhook_list_head, list) {
 		int ret;
@@ -987,6 +995,7 @@ static int netcp_process_tx_compl_packets(struct netcp_intf *netcp,
 					  unsigned int budget)
 {
 	struct knav_dma_desc *desc;
+	struct netcp_tx_cb *tx_cb;
 	struct sk_buff *skb;
 	unsigned int dma_sz;
 	dma_addr_t dma;
@@ -1014,6 +1023,10 @@ static int netcp_process_tx_compl_packets(struct netcp_intf *netcp,
 			continue;
 		}
 
+		tx_cb = (struct netcp_tx_cb *)skb->cb;
+		if (tx_cb->txtstamp)
+			tx_cb->txtstamp(tx_cb->ts_context, skb);
+
 		if (netif_subqueue_stopped(netcp->ndev, skb) &&
 		    netif_running(netcp->ndev) &&
 		    (knav_pool_count(netcp->tx_pool) >
@@ -1154,6 +1167,7 @@ static int netcp_tx_submit_skb(struct netcp_intf *netcp,
 	struct netcp_tx_pipe *tx_pipe = NULL;
 	struct netcp_hook_list *tx_hook;
 	struct netcp_packet p_info;
+	struct netcp_tx_cb *tx_cb;
 	unsigned int dma_sz;
 	dma_addr_t dma;
 	u32 tmp = 0;
@@ -1164,7 +1178,7 @@ static int netcp_tx_submit_skb(struct netcp_intf *netcp,
 	p_info.tx_pipe = NULL;
 	p_info.psdata_len = 0;
 	p_info.ts_context = NULL;
-	p_info.txtstamp_complete = NULL;
+	p_info.txtstamp = NULL;
 	p_info.epib = desc->epib;
 	p_info.psdata = (u32 __force *)desc->psdata;
 	memset(p_info.epib, 0, KNAV_DMA_NUM_EPIB_WORDS * sizeof(__le32));
@@ -1189,6 +1203,10 @@ static int netcp_tx_submit_skb(struct netcp_intf *netcp,
 		goto out;
 	}
 
+	tx_cb = (struct netcp_tx_cb *)skb->cb;
+	tx_cb->ts_context = p_info.ts_context;
+	tx_cb->txtstamp = p_info.txtstamp;
+
 	/* update descriptor */
 	if (p_info.psdata_len) {
 		/* psdata points to both native-endian and device-endian data */
@@ -1568,7 +1586,7 @@ static int netcp_setup_navigator_resources(struct net_device *ndev)
 	/* open Tx completion queue */
 	snprintf(name, sizeof(name), "tx-compl-%s", ndev->name);
 	netcp->tx_compl_q = knav_queue_open(name, netcp->tx_compl_qid, 0);
-	if (IS_ERR_OR_NULL(netcp->tx_compl_q)) {
+	if (IS_ERR(netcp->tx_compl_q)) {
 		ret = PTR_ERR(netcp->tx_compl_q);
 		goto fail;
 	}
@@ -1588,7 +1606,7 @@ static int netcp_setup_navigator_resources(struct net_device *ndev)
 	/* open Rx completion queue */
 	snprintf(name, sizeof(name), "rx-compl-%s", ndev->name);
 	netcp->rx_queue = knav_queue_open(name, netcp->rx_queue_id, 0);
-	if (IS_ERR_OR_NULL(netcp->rx_queue)) {
+	if (IS_ERR(netcp->rx_queue)) {
 		ret = PTR_ERR(netcp->rx_queue);
 		goto fail;
 	}
@@ -1610,7 +1628,7 @@ static int netcp_setup_navigator_resources(struct net_device *ndev)
 	     ++i) {
 		snprintf(name, sizeof(name), "rx-fdq-%s-%d", ndev->name, i);
 		netcp->rx_fdq[i] = knav_queue_open(name, KNAV_QUEUE_GP, 0);
-		if (IS_ERR_OR_NULL(netcp->rx_fdq[i])) {
+		if (IS_ERR(netcp->rx_fdq[i])) {
 			ret = PTR_ERR(netcp->rx_fdq[i]);
 			goto fail;
 		}
@@ -1766,21 +1784,6 @@ out:
 	return (ret == 0) ? 0 : err;
 }
 
-static int netcp_ndo_change_mtu(struct net_device *ndev, int new_mtu)
-{
-	struct netcp_intf *netcp = netdev_priv(ndev);
-
-	/* MTU < 68 is an error for IPv4 traffic */
-	if ((new_mtu < 68) ||
-	    (new_mtu > (NETCP_MAX_FRAME_SIZE - ETH_HLEN - ETH_FCS_LEN))) {
-		dev_err(netcp->ndev_dev, "Invalid mtu size = %d\n", new_mtu);
-		return -EINVAL;
-	}
-
-	ndev->mtu = new_mtu;
-	return 0;
-}
-
 static void netcp_ndo_tx_timeout(struct net_device *ndev)
 {
 	struct netcp_intf *netcp = netdev_priv(ndev);
@@ -1886,7 +1889,6 @@ static const struct net_device_ops netcp_netdev_ops = {
 	.ndo_start_xmit		= netcp_ndo_start_xmit,
 	.ndo_set_rx_mode	= netcp_set_rx_mode,
 	.ndo_do_ioctl           = netcp_ndo_ioctl,
-	.ndo_change_mtu		= netcp_ndo_change_mtu,
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 	.ndo_vlan_rx_add_vid	= netcp_rx_add_vid,
@@ -1923,6 +1925,10 @@ static int netcp_create_interface(struct netcp_device *netcp_device,
 	ndev->hw_features = ndev->features;
 	ndev->vlan_features |=  NETIF_F_SG;
 
+	/* MTU range: 68 - 9486 */
+	ndev->min_mtu = ETH_MIN_MTU;
+	ndev->max_mtu = NETCP_MAX_FRAME_SIZE - (ETH_HLEN + ETH_FCS_LEN);
+
 	netcp = netdev_priv(ndev);
 	spin_lock_init(&netcp->lock);
 	INIT_LIST_HEAD(&netcp->module_head);
@@ -2070,7 +2076,6 @@ static void netcp_delete_interface(struct netcp_device *netcp_device,
 		if (module->release)
 			module->release(intf_modpriv->module_priv);
 		list_del(&intf_modpriv->intf_list);
-		kfree(intf_modpriv);
 	}
 	WARN(!list_empty(&netcp->module_head), "%s interface module list is not empty!\n",
 	     ndev->name);
@@ -2133,6 +2138,8 @@ static int netcp_probe(struct platform_device *pdev)
 		}
 	}
 
+	of_node_put(interfaces);
+
 	/* Add the device instance to the list */
 	list_add_tail(&netcp_device->device_list, &netcp_devices);
 
@@ -2145,6 +2152,8 @@ probe_quit_interface:
 		netcp_delete_interface(netcp_device, netcp_intf->ndev);
 	}
 
+	of_node_put(interfaces);
+
 probe_quit:
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
@@ -2165,7 +2174,6 @@ static int netcp_remove(struct platform_device *pdev)
 		dev_dbg(&pdev->dev, "Removing module \"%s\"\n", module->name);
 		module->remove(netcp_device, inst_modpriv->module_priv);
 		list_del(&inst_modpriv->inst_list);
-		kfree(inst_modpriv);
 	}
 
 	/* now that all modules are removed, clean up the interfaces */
diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c
index d543298d6750..c7e547e4f2b1 100644
--- a/drivers/net/ethernet/ti/netcp_ethss.c
+++ b/drivers/net/ethernet/ti/netcp_ethss.c
@@ -23,10 +23,13 @@
 #include <linux/of_mdio.h>
 #include <linux/of_address.h>
 #include <linux/if_vlan.h>
+#include <linux/ptp_classify.h>
+#include <linux/net_tstamp.h>
 #include <linux/ethtool.h>
 
 #include "cpsw_ale.h"
 #include "netcp.h"
+#include "cpts.h"
 
 #define NETCP_DRIVER_NAME		"TI KeyStone Ethernet Driver"
 #define NETCP_DRIVER_VERSION		"v1.0"
@@ -51,6 +54,7 @@
 #define GBE13_EMAC_OFFSET		0x100
 #define GBE13_SLAVE_PORT2_OFFSET	0x200
 #define GBE13_HW_STATS_OFFSET		0x300
+#define GBE13_CPTS_OFFSET		0x500
 #define GBE13_ALE_OFFSET		0x600
 #define GBE13_HOST_PORT_NUM		0
 #define GBE13_NUM_ALE_ENTRIES		1024
@@ -74,6 +78,7 @@
 #define GBENU_SLAVE_PORT_OFFSET		0x2000
 #define GBENU_EMAC_OFFSET		0x2330
 #define GBENU_HW_STATS_OFFSET		0x1a000
+#define GBENU_CPTS_OFFSET		0x1d000
 #define GBENU_ALE_OFFSET		0x1e000
 #define GBENU_HOST_PORT_NUM		0
 #define GBENU_NUM_ALE_ENTRIES		1024
@@ -93,6 +98,7 @@
 #define XGBE10_HOST_PORT_OFFSET		0x34
 #define XGBE10_SLAVE_PORT_OFFSET	0x64
 #define XGBE10_EMAC_OFFSET		0x400
+#define XGBE10_CPTS_OFFSET		0x600
 #define XGBE10_ALE_OFFSET		0x700
 #define XGBE10_HW_STATS_OFFSET		0x800
 #define XGBE10_HOST_PORT_NUM		0
@@ -155,6 +161,7 @@
 
 #define GBE_TX_QUEUE				648
 #define	GBE_TXHOOK_ORDER			0
+#define	GBE_RXHOOK_ORDER			0
 #define GBE_DEFAULT_ALE_AGEOUT			30
 #define SLAVE_LINK_IS_XGMII(s) ((s)->link_interface >= XGMII_LINK_MAC_PHY)
 #define NETCP_LINK_STATE_INVALID		-1
@@ -169,6 +176,56 @@
 
 #define HOST_TX_PRI_MAP_DEFAULT			0x00000000
 
+#if IS_ENABLED(CONFIG_TI_CPTS)
+/* Px_TS_CTL register fields */
+#define TS_RX_ANX_F_EN				BIT(0)
+#define TS_RX_VLAN_LT1_EN			BIT(1)
+#define TS_RX_VLAN_LT2_EN			BIT(2)
+#define TS_RX_ANX_D_EN				BIT(3)
+#define TS_TX_ANX_F_EN				BIT(4)
+#define TS_TX_VLAN_LT1_EN			BIT(5)
+#define TS_TX_VLAN_LT2_EN			BIT(6)
+#define TS_TX_ANX_D_EN				BIT(7)
+#define TS_LT2_EN				BIT(8)
+#define TS_RX_ANX_E_EN				BIT(9)
+#define TS_TX_ANX_E_EN				BIT(10)
+#define TS_MSG_TYPE_EN_SHIFT			16
+#define TS_MSG_TYPE_EN_MASK			0xffff
+
+/* Px_TS_SEQ_LTYPE register fields */
+#define TS_SEQ_ID_OFS_SHIFT			16
+#define TS_SEQ_ID_OFS_MASK			0x3f
+
+/* Px_TS_CTL_LTYPE2 register fields */
+#define TS_107					BIT(16)
+#define TS_129					BIT(17)
+#define TS_130					BIT(18)
+#define TS_131					BIT(19)
+#define TS_132					BIT(20)
+#define TS_319					BIT(21)
+#define TS_320					BIT(22)
+#define TS_TTL_NONZERO				BIT(23)
+#define TS_UNI_EN				BIT(24)
+#define TS_UNI_EN_SHIFT				24
+
+#define TS_TX_ANX_ALL_EN	 \
+	(TS_TX_ANX_D_EN	| TS_TX_ANX_E_EN | TS_TX_ANX_F_EN)
+
+#define TS_RX_ANX_ALL_EN	 \
+	(TS_RX_ANX_D_EN	| TS_RX_ANX_E_EN | TS_RX_ANX_F_EN)
+
+#define TS_CTL_DST_PORT				TS_319
+#define TS_CTL_DST_PORT_SHIFT			21
+
+#define TS_CTL_MADDR_ALL	\
+	(TS_107 | TS_129 | TS_130 | TS_131 | TS_132)
+
+#define TS_CTL_MADDR_SHIFT			16
+
+/* The PTP event messages - Sync, Delay_Req, Pdelay_Req, and Pdelay_Resp. */
+#define EVENT_MSG_BITS (BIT(0) | BIT(1) | BIT(2) | BIT(3))
+#endif /* CONFIG_TI_CPTS */
+
 struct xgbe_ss_regs {
 	u32	id_ver;
 	u32	synce_count;
@@ -616,6 +673,13 @@ struct gbe_hw_stats {
 #define GBE_MAX_HW_STAT_MODS			9
 #define GBE_HW_STATS_REG_MAP_SZ			0x100
 
+struct ts_ctl {
+	int     uni;
+	u8      dst_port_map;
+	u8      maddr_map;
+	u8      ts_mcast_type;
+};
+
 struct gbe_slave {
 	void __iomem			*port_regs;
 	void __iomem			*emac_regs;
@@ -630,6 +694,7 @@ struct gbe_slave {
 	u32				mac_control;
 	u8				phy_port_t;
 	struct device_node		*phy_node;
+	struct ts_ctl                   ts_ctl;
 	struct list_head		slave_list;
 };
 
@@ -655,6 +720,7 @@ struct gbe_priv {
 	void __iomem			*switch_regs;
 	void __iomem			*host_port_regs;
 	void __iomem			*ale_reg;
+	void __iomem                    *cpts_reg;
 	void __iomem			*sgmii_port_regs;
 	void __iomem			*sgmii_port34_regs;
 	void __iomem			*xgbe_serdes_regs;
@@ -678,6 +744,9 @@ struct gbe_priv {
 	int				num_et_stats;
 	/*  Lock for updating the hwstats */
 	spinlock_t			hw_stats_lock;
+
+	int                             cpts_registered;
+	struct cpts                     *cpts;
 };
 
 struct gbe_intf {
@@ -1840,8 +1909,8 @@ static void keystone_get_ethtool_stats(struct net_device *ndev,
 	spin_unlock_bh(&gbe_dev->hw_stats_lock);
 }
 
-static int keystone_get_settings(struct net_device *ndev,
-				 struct ethtool_cmd *cmd)
+static int keystone_get_link_ksettings(struct net_device *ndev,
+				       struct ethtool_link_ksettings *cmd)
 {
 	struct netcp_intf *netcp = netdev_priv(ndev);
 	struct phy_device *phy = ndev->phydev;
@@ -1858,20 +1927,28 @@ static int keystone_get_settings(struct net_device *ndev,
 	if (!gbe_intf->slave)
 		return -EINVAL;
 
-	ret = phy_ethtool_gset(phy, cmd);
+	ret = phy_ethtool_ksettings_get(phy, cmd);
 	if (!ret)
-		cmd->port = gbe_intf->slave->phy_port_t;
+		cmd->base.port = gbe_intf->slave->phy_port_t;
 
 	return ret;
 }
 
-static int keystone_set_settings(struct net_device *ndev,
-				 struct ethtool_cmd *cmd)
+static int keystone_set_link_ksettings(struct net_device *ndev,
+				       const struct ethtool_link_ksettings *cmd)
 {
 	struct netcp_intf *netcp = netdev_priv(ndev);
 	struct phy_device *phy = ndev->phydev;
 	struct gbe_intf *gbe_intf;
-	u32 features = cmd->advertising & cmd->supported;
+	u8 port = cmd->base.port;
+	u32 advertising, supported;
+	u32 features;
+
+	ethtool_convert_link_mode_to_legacy_u32(&advertising,
+						cmd->link_modes.advertising);
+	ethtool_convert_link_mode_to_legacy_u32(&supported,
+						cmd->link_modes.supported);
+	features = advertising & supported;
 
 	if (!phy)
 		return -EINVAL;
@@ -1883,26 +1960,69 @@ static int keystone_set_settings(struct net_device *ndev,
 	if (!gbe_intf->slave)
 		return -EINVAL;
 
-	if (cmd->port != gbe_intf->slave->phy_port_t) {
-		if ((cmd->port == PORT_TP) && !(features & ADVERTISED_TP))
+	if (port != gbe_intf->slave->phy_port_t) {
+		if ((port == PORT_TP) && !(features & ADVERTISED_TP))
 			return -EINVAL;
 
-		if ((cmd->port == PORT_AUI) && !(features & ADVERTISED_AUI))
+		if ((port == PORT_AUI) && !(features & ADVERTISED_AUI))
 			return -EINVAL;
 
-		if ((cmd->port == PORT_BNC) && !(features & ADVERTISED_BNC))
+		if ((port == PORT_BNC) && !(features & ADVERTISED_BNC))
 			return -EINVAL;
 
-		if ((cmd->port == PORT_MII) && !(features & ADVERTISED_MII))
+		if ((port == PORT_MII) && !(features & ADVERTISED_MII))
 			return -EINVAL;
 
-		if ((cmd->port == PORT_FIBRE) && !(features & ADVERTISED_FIBRE))
+		if ((port == PORT_FIBRE) && !(features & ADVERTISED_FIBRE))
 			return -EINVAL;
 	}
 
-	gbe_intf->slave->phy_port_t = cmd->port;
-	return phy_ethtool_sset(phy, cmd);
+	gbe_intf->slave->phy_port_t = port;
+	return phy_ethtool_ksettings_set(phy, cmd);
+}
+
+#if IS_ENABLED(CONFIG_TI_CPTS)
+static int keystone_get_ts_info(struct net_device *ndev,
+				struct ethtool_ts_info *info)
+{
+	struct netcp_intf *netcp = netdev_priv(ndev);
+	struct gbe_intf *gbe_intf;
+
+	gbe_intf = netcp_module_get_intf_data(&gbe_module, netcp);
+	if (!gbe_intf || !gbe_intf->gbe_dev->cpts)
+		return -EINVAL;
+
+	info->so_timestamping =
+		SOF_TIMESTAMPING_TX_HARDWARE |
+		SOF_TIMESTAMPING_TX_SOFTWARE |
+		SOF_TIMESTAMPING_RX_HARDWARE |
+		SOF_TIMESTAMPING_RX_SOFTWARE |
+		SOF_TIMESTAMPING_SOFTWARE |
+		SOF_TIMESTAMPING_RAW_HARDWARE;
+	info->phc_index = gbe_intf->gbe_dev->cpts->phc_index;
+	info->tx_types =
+		(1 << HWTSTAMP_TX_OFF) |
+		(1 << HWTSTAMP_TX_ON);
+	info->rx_filters =
+		(1 << HWTSTAMP_FILTER_NONE) |
+		(1 << HWTSTAMP_FILTER_PTP_V1_L4_EVENT) |
+		(1 << HWTSTAMP_FILTER_PTP_V2_EVENT);
+	return 0;
 }
+#else
+static int keystone_get_ts_info(struct net_device *ndev,
+				struct ethtool_ts_info *info)
+{
+	info->so_timestamping =
+		SOF_TIMESTAMPING_TX_SOFTWARE |
+		SOF_TIMESTAMPING_RX_SOFTWARE |
+		SOF_TIMESTAMPING_SOFTWARE;
+	info->phc_index = -1;
+	info->tx_types = 0;
+	info->rx_filters = 0;
+	return 0;
+}
+#endif /* CONFIG_TI_CPTS */
 
 static const struct ethtool_ops keystone_ethtool_ops = {
 	.get_drvinfo		= keystone_get_drvinfo,
@@ -1912,8 +2032,9 @@ static const struct ethtool_ops keystone_ethtool_ops = {
 	.get_strings		= keystone_get_stat_strings,
 	.get_sset_count		= keystone_get_sset_count,
 	.get_ethtool_stats	= keystone_get_ethtool_stats,
-	.get_settings		= keystone_get_settings,
-	.set_settings		= keystone_set_settings,
+	.get_link_ksettings	= keystone_get_link_ksettings,
+	.set_link_ksettings	= keystone_set_link_ksettings,
+	.get_ts_info		= keystone_get_ts_info,
 };
 
 #define mac_hi(mac)	(((mac)[0] << 0) | ((mac)[1] << 8) |	\
@@ -2357,16 +2478,279 @@ static int gbe_del_vid(void *intf_priv, int vid)
 	return 0;
 }
 
+#if IS_ENABLED(CONFIG_TI_CPTS)
+#define HAS_PHY_TXTSTAMP(p) ((p)->drv && (p)->drv->txtstamp)
+#define HAS_PHY_RXTSTAMP(p) ((p)->drv && (p)->drv->rxtstamp)
+
+static void gbe_txtstamp(void *context, struct sk_buff *skb)
+{
+	struct gbe_intf *gbe_intf = context;
+	struct gbe_priv *gbe_dev = gbe_intf->gbe_dev;
+
+	cpts_tx_timestamp(gbe_dev->cpts, skb);
+}
+
+static bool gbe_need_txtstamp(struct gbe_intf *gbe_intf,
+			      const struct netcp_packet *p_info)
+{
+	struct sk_buff *skb = p_info->skb;
+	unsigned int class = ptp_classify_raw(skb);
+
+	if (class == PTP_CLASS_NONE)
+		return false;
+
+	switch (class) {
+	case PTP_CLASS_V1_IPV4:
+	case PTP_CLASS_V1_IPV6:
+	case PTP_CLASS_V2_IPV4:
+	case PTP_CLASS_V2_IPV6:
+	case PTP_CLASS_V2_L2:
+	case (PTP_CLASS_V2_VLAN | PTP_CLASS_L2):
+	case (PTP_CLASS_V2_VLAN | PTP_CLASS_IPV4):
+	case (PTP_CLASS_V2_VLAN | PTP_CLASS_IPV6):
+		return true;
+	}
+
+	return false;
+}
+
+static int gbe_txtstamp_mark_pkt(struct gbe_intf *gbe_intf,
+				 struct netcp_packet *p_info)
+{
+	struct phy_device *phydev = p_info->skb->dev->phydev;
+	struct gbe_priv *gbe_dev = gbe_intf->gbe_dev;
+
+	if (!(skb_shinfo(p_info->skb)->tx_flags & SKBTX_HW_TSTAMP) ||
+	    !cpts_is_tx_enabled(gbe_dev->cpts))
+		return 0;
+
+	/* If phy has the txtstamp api, assume it will do it.
+	 * We mark it here because skb_tx_timestamp() is called
+	 * after all the txhooks are called.
+	 */
+	if (phydev && HAS_PHY_TXTSTAMP(phydev)) {
+		skb_shinfo(p_info->skb)->tx_flags |= SKBTX_IN_PROGRESS;
+		return 0;
+	}
+
+	if (gbe_need_txtstamp(gbe_intf, p_info)) {
+		p_info->txtstamp = gbe_txtstamp;
+		p_info->ts_context = (void *)gbe_intf;
+		skb_shinfo(p_info->skb)->tx_flags |= SKBTX_IN_PROGRESS;
+	}
+
+	return 0;
+}
+
+static int gbe_rxtstamp(struct gbe_intf *gbe_intf, struct netcp_packet *p_info)
+{
+	struct phy_device *phydev = p_info->skb->dev->phydev;
+	struct gbe_priv *gbe_dev = gbe_intf->gbe_dev;
+
+	if (p_info->rxtstamp_complete)
+		return 0;
+
+	if (phydev && HAS_PHY_RXTSTAMP(phydev)) {
+		p_info->rxtstamp_complete = true;
+		return 0;
+	}
+
+	cpts_rx_timestamp(gbe_dev->cpts, p_info->skb);
+	p_info->rxtstamp_complete = true;
+
+	return 0;
+}
+
+static int gbe_hwtstamp_get(struct gbe_intf *gbe_intf, struct ifreq *ifr)
+{
+	struct gbe_priv *gbe_dev = gbe_intf->gbe_dev;
+	struct cpts *cpts = gbe_dev->cpts;
+	struct hwtstamp_config cfg;
+
+	if (!cpts)
+		return -EOPNOTSUPP;
+
+	cfg.flags = 0;
+	cfg.tx_type = cpts_is_tx_enabled(cpts) ?
+		      HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
+	cfg.rx_filter = (cpts_is_rx_enabled(cpts) ?
+			 cpts->rx_enable : HWTSTAMP_FILTER_NONE);
+
+	return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
+}
+
+static void gbe_hwtstamp(struct gbe_intf *gbe_intf)
+{
+	struct gbe_priv *gbe_dev = gbe_intf->gbe_dev;
+	struct gbe_slave *slave = gbe_intf->slave;
+	u32 ts_en, seq_id, ctl;
+
+	if (!cpts_is_rx_enabled(gbe_dev->cpts) &&
+	    !cpts_is_tx_enabled(gbe_dev->cpts)) {
+		writel(0, GBE_REG_ADDR(slave, port_regs, ts_ctl));
+		return;
+	}
+
+	seq_id = (30 << TS_SEQ_ID_OFS_SHIFT) | ETH_P_1588;
+	ts_en = EVENT_MSG_BITS << TS_MSG_TYPE_EN_SHIFT;
+	ctl = ETH_P_1588 | TS_TTL_NONZERO |
+		(slave->ts_ctl.dst_port_map << TS_CTL_DST_PORT_SHIFT) |
+		(slave->ts_ctl.uni ?  TS_UNI_EN :
+			slave->ts_ctl.maddr_map << TS_CTL_MADDR_SHIFT);
+
+	if (cpts_is_tx_enabled(gbe_dev->cpts))
+		ts_en |= (TS_TX_ANX_ALL_EN | TS_TX_VLAN_LT1_EN);
+
+	if (cpts_is_rx_enabled(gbe_dev->cpts))
+		ts_en |= (TS_RX_ANX_ALL_EN | TS_RX_VLAN_LT1_EN);
+
+	writel(ts_en,  GBE_REG_ADDR(slave, port_regs, ts_ctl));
+	writel(seq_id, GBE_REG_ADDR(slave, port_regs, ts_seq_ltype));
+	writel(ctl,    GBE_REG_ADDR(slave, port_regs, ts_ctl_ltype2));
+}
+
+static int gbe_hwtstamp_set(struct gbe_intf *gbe_intf, struct ifreq *ifr)
+{
+	struct gbe_priv *gbe_dev = gbe_intf->gbe_dev;
+	struct cpts *cpts = gbe_dev->cpts;
+	struct hwtstamp_config cfg;
+
+	if (!cpts)
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
+		return -EFAULT;
+
+	/* reserved for future extensions */
+	if (cfg.flags)
+		return -EINVAL;
+
+	switch (cfg.tx_type) {
+	case HWTSTAMP_TX_OFF:
+		cpts_tx_enable(cpts, 0);
+		break;
+	case HWTSTAMP_TX_ON:
+		cpts_tx_enable(cpts, 1);
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	switch (cfg.rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		cpts_rx_enable(cpts, 0);
+		break;
+	case HWTSTAMP_FILTER_ALL:
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+		cpts_rx_enable(cpts, HWTSTAMP_FILTER_PTP_V1_L4_EVENT);
+		cfg.rx_filter = HWTSTAMP_FILTER_PTP_V1_L4_EVENT;
+		break;
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+		cpts_rx_enable(cpts, HWTSTAMP_FILTER_PTP_V2_EVENT);
+		cfg.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	gbe_hwtstamp(gbe_intf);
+
+	return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
+}
+
+static void gbe_register_cpts(struct gbe_priv *gbe_dev)
+{
+	if (!gbe_dev->cpts)
+		return;
+
+	if (gbe_dev->cpts_registered > 0)
+		goto done;
+
+	if (cpts_register(gbe_dev->cpts)) {
+		dev_err(gbe_dev->dev, "error registering cpts device\n");
+		return;
+	}
+
+done:
+	++gbe_dev->cpts_registered;
+}
+
+static void gbe_unregister_cpts(struct gbe_priv *gbe_dev)
+{
+	if (!gbe_dev->cpts || (gbe_dev->cpts_registered <= 0))
+		return;
+
+	if (--gbe_dev->cpts_registered)
+		return;
+
+	cpts_unregister(gbe_dev->cpts);
+}
+#else
+static inline int gbe_txtstamp_mark_pkt(struct gbe_intf *gbe_intf,
+					struct netcp_packet *p_info)
+{
+	return 0;
+}
+
+static inline int gbe_rxtstamp(struct gbe_intf *gbe_intf,
+			       struct netcp_packet *p_info)
+{
+	return 0;
+}
+
+static inline int gbe_hwtstamp(struct gbe_intf *gbe_intf,
+			       struct ifreq *ifr, int cmd)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void gbe_register_cpts(struct gbe_priv *gbe_dev)
+{
+}
+
+static inline void gbe_unregister_cpts(struct gbe_priv *gbe_dev)
+{
+}
+
+static inline int gbe_hwtstamp_get(struct gbe_intf *gbe_intf, struct ifreq *req)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int gbe_hwtstamp_set(struct gbe_intf *gbe_intf, struct ifreq *req)
+{
+	return -EOPNOTSUPP;
+}
+#endif /* CONFIG_TI_CPTS */
+
 static int gbe_ioctl(void *intf_priv, struct ifreq *req, int cmd)
 {
 	struct gbe_intf *gbe_intf = intf_priv;
 	struct phy_device *phy = gbe_intf->slave->phy;
-	int ret = -EOPNOTSUPP;
+
+	if (!phy || !phy->drv->hwtstamp) {
+		switch (cmd) {
+		case SIOCGHWTSTAMP:
+			return gbe_hwtstamp_get(gbe_intf, req);
+		case SIOCSHWTSTAMP:
+			return gbe_hwtstamp_set(gbe_intf, req);
+		}
+	}
 
 	if (phy)
-		ret = phy_mii_ioctl(phy, req, cmd);
+		return phy_mii_ioctl(phy, req, cmd);
 
-	return ret;
+	return -EOPNOTSUPP;
 }
 
 static void netcp_ethss_timer(unsigned long arg)
@@ -2402,12 +2786,20 @@ static void netcp_ethss_timer(unsigned long arg)
 	add_timer(&gbe_dev->timer);
 }
 
-static int gbe_tx_hook(int order, void *data, struct netcp_packet *p_info)
+static int gbe_txhook(int order, void *data, struct netcp_packet *p_info)
 {
 	struct gbe_intf *gbe_intf = data;
 
 	p_info->tx_pipe = &gbe_intf->tx_pipe;
-	return 0;
+
+	return gbe_txtstamp_mark_pkt(gbe_intf, p_info);
+}
+
+static int gbe_rxhook(int order, void *data, struct netcp_packet *p_info)
+{
+	struct gbe_intf *gbe_intf = data;
+
+	return gbe_rxtstamp(gbe_intf, p_info);
 }
 
 static int gbe_open(void *intf_priv, struct net_device *ndev)
@@ -2457,11 +2849,14 @@ static int gbe_open(void *intf_priv, struct net_device *ndev)
 	if (ret)
 		goto fail;
 
-	netcp_register_txhook(netcp, GBE_TXHOOK_ORDER, gbe_tx_hook,
-			      gbe_intf);
+	netcp_register_txhook(netcp, GBE_TXHOOK_ORDER, gbe_txhook, gbe_intf);
+	netcp_register_rxhook(netcp, GBE_RXHOOK_ORDER, gbe_rxhook, gbe_intf);
 
 	slave->open = true;
 	netcp_ethss_update_link_state(gbe_dev, slave, ndev);
+
+	gbe_register_cpts(gbe_dev);
+
 	return 0;
 
 fail:
@@ -2473,16 +2868,36 @@ static int gbe_close(void *intf_priv, struct net_device *ndev)
 {
 	struct gbe_intf *gbe_intf = intf_priv;
 	struct netcp_intf *netcp = netdev_priv(ndev);
+	struct gbe_priv *gbe_dev = gbe_intf->gbe_dev;
+
+	gbe_unregister_cpts(gbe_dev);
 
 	gbe_slave_stop(gbe_intf);
-	netcp_unregister_txhook(netcp, GBE_TXHOOK_ORDER, gbe_tx_hook,
-				gbe_intf);
+
+	netcp_unregister_rxhook(netcp, GBE_RXHOOK_ORDER, gbe_rxhook, gbe_intf);
+	netcp_unregister_txhook(netcp, GBE_TXHOOK_ORDER, gbe_txhook, gbe_intf);
 
 	gbe_intf->slave->open = false;
 	atomic_set(&gbe_intf->slave->link_state, NETCP_LINK_STATE_INVALID);
 	return 0;
 }
 
+#if IS_ENABLED(CONFIG_TI_CPTS)
+static void init_slave_ts_ctl(struct gbe_slave *slave)
+{
+	slave->ts_ctl.uni = 1;
+	slave->ts_ctl.dst_port_map =
+		(TS_CTL_DST_PORT >> TS_CTL_DST_PORT_SHIFT) & 0x3;
+	slave->ts_ctl.maddr_map =
+		(TS_CTL_MADDR_ALL >> TS_CTL_MADDR_SHIFT) & 0x1f;
+}
+
+#else
+static void init_slave_ts_ctl(struct gbe_slave *slave)
+{
+}
+#endif /* CONFIG_TI_CPTS */
+
 static int init_slave(struct gbe_priv *gbe_dev, struct gbe_slave *slave,
 		      struct device_node *node)
 {
@@ -2597,6 +3012,8 @@ static int init_slave(struct gbe_priv *gbe_dev, struct gbe_slave *slave,
 	}
 
 	atomic_set(&slave->link_state, NETCP_LINK_STATE_INVALID);
+
+	init_slave_ts_ctl(slave);
 	return 0;
 }
 
@@ -2787,6 +3204,7 @@ static int set_xgbe_ethss10_priv(struct gbe_priv *gbe_dev,
 			XGBE10_HW_STATS_OFFSET + (GBE_HW_STATS_REG_MAP_SZ * i);
 
 	gbe_dev->ale_reg = gbe_dev->switch_regs + XGBE10_ALE_OFFSET;
+	gbe_dev->cpts_reg = gbe_dev->switch_regs + XGBE10_CPTS_OFFSET;
 	gbe_dev->ale_ports = gbe_dev->max_num_ports;
 	gbe_dev->host_port = XGBE10_HOST_PORT_NUM;
 	gbe_dev->ale_entries = XGBE10_NUM_ALE_ENTRIES;
@@ -2909,6 +3327,7 @@ static int set_gbe_ethss14_priv(struct gbe_priv *gbe_dev,
 			(GBE_HW_STATS_REG_MAP_SZ * (i & 0x1));
 	}
 
+	gbe_dev->cpts_reg = gbe_dev->switch_regs + GBE13_CPTS_OFFSET;
 	gbe_dev->ale_reg = gbe_dev->switch_regs + GBE13_ALE_OFFSET;
 	gbe_dev->ale_ports = gbe_dev->max_num_ports;
 	gbe_dev->host_port = GBE13_HOST_PORT_NUM;
@@ -2998,6 +3417,7 @@ static int set_gbenu_ethss_priv(struct gbe_priv *gbe_dev,
 		gbe_dev->hw_stats_regs[i] = gbe_dev->switch_regs +
 			GBENU_HW_STATS_OFFSET + (GBENU_HW_STATS_REG_MAP_SZ * i);
 
+	gbe_dev->cpts_reg = gbe_dev->switch_regs + GBENU_CPTS_OFFSET;
 	gbe_dev->ale_reg = gbe_dev->switch_regs + GBENU_ALE_OFFSET;
 	gbe_dev->ale_ports = gbe_dev->max_num_ports;
 	gbe_dev->host_port = GBENU_HOST_PORT_NUM;
@@ -3179,6 +3599,12 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev,
 		dev_dbg(gbe_dev->dev, "Created a gbe ale engine\n");
 	}
 
+	gbe_dev->cpts = cpts_create(gbe_dev->dev, gbe_dev->cpts_reg, node);
+	if (IS_ENABLED(CONFIG_TI_CPTS) && IS_ERR(gbe_dev->cpts)) {
+		ret = PTR_ERR(gbe_dev->cpts);
+		goto free_sec_ports;
+	}
+
 	/* initialize host port */
 	gbe_init_host_port(gbe_dev);
 
@@ -3267,6 +3693,7 @@ static int gbe_remove(struct netcp_device *netcp_device, void *inst_priv)
 	struct gbe_priv *gbe_dev = inst_priv;
 
 	del_timer_sync(&gbe_dev->timer);
+	cpts_release(gbe_dev->cpts);
 	cpsw_ale_stop(gbe_dev->ale);
 	cpsw_ale_destroy(gbe_dev->ale);
 	netcp_txpipe_close(&gbe_dev->tx_pipe);
diff --git a/drivers/net/ethernet/ti/tlan.c b/drivers/net/ethernet/ti/tlan.c
index 6c7ec1ddd475..c8d53d8c83ee 100644
--- a/drivers/net/ethernet/ti/tlan.c
+++ b/drivers/net/ethernet/ti/tlan.c
@@ -772,7 +772,6 @@ static const struct net_device_ops tlan_netdev_ops = {
 	.ndo_get_stats		= tlan_get_stats,
 	.ndo_set_rx_mode	= tlan_set_multicast_list,
 	.ndo_do_ioctl		= tlan_ioctl,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
diff --git a/drivers/net/ethernet/tile/Kconfig b/drivers/net/ethernet/tile/Kconfig
index f59a6c265331..bdfeaf3d4fce 100644
--- a/drivers/net/ethernet/tile/Kconfig
+++ b/drivers/net/ethernet/tile/Kconfig
@@ -9,7 +9,7 @@ config TILE_NET
 	select CRC32
 	select TILE_GXIO_MPIPE if TILEGX
 	select HIGH_RES_TIMERS if TILEGX
-	select PTP_1588_CLOCK if TILEGX
+	imply PTP_1588_CLOCK if TILEGX
 	---help---
 	  This is a standard Linux network device driver for the
 	  on-chip Tilera Gigabit Ethernet and XAUI interfaces.
diff --git a/drivers/net/ethernet/tile/tilegx.c b/drivers/net/ethernet/tile/tilegx.c
index 11213a38c795..0aaf975bb347 100644
--- a/drivers/net/ethernet/tile/tilegx.c
+++ b/drivers/net/ethernet/tile/tilegx.c
@@ -59,6 +59,9 @@
 /* Maximum number of packets to handle per "poll". */
 #define TILE_NET_WEIGHT 64
 
+/* Maximum Jumbo Packet MTU */
+#define TILE_JUMBO_MAX_MTU 9000
+
 /* Number of entries in each iqueue. */
 #define IQUEUE_ENTRIES 512
 
@@ -2101,17 +2104,6 @@ static int tile_net_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 	return -EOPNOTSUPP;
 }
 
-/* Change the MTU. */
-static int tile_net_change_mtu(struct net_device *dev, int new_mtu)
-{
-	if (new_mtu < 68)
-		return -EINVAL;
-	if (new_mtu > ((jumbo_num != 0) ? 9000 : 1500))
-		return -EINVAL;
-	dev->mtu = new_mtu;
-	return 0;
-}
-
 /* Change the Ethernet address of the NIC.
  *
  * The hypervisor driver does not support changing MAC address.  However,
@@ -2154,7 +2146,6 @@ static const struct net_device_ops tile_net_ops = {
 	.ndo_start_xmit = tile_net_tx,
 	.ndo_select_queue = tile_net_select_queue,
 	.ndo_do_ioctl = tile_net_ioctl,
-	.ndo_change_mtu = tile_net_change_mtu,
 	.ndo_tx_timeout = tile_net_tx_timeout,
 	.ndo_set_mac_address = tile_net_set_mac_address,
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -2174,7 +2165,11 @@ static void tile_net_setup(struct net_device *dev)
 	ether_setup(dev);
 	dev->netdev_ops = &tile_net_ops;
 	dev->watchdog_timeo = TILE_NET_TIMEOUT;
-	dev->mtu = 1500;
+
+	/* MTU range: 68 - 1500 or 9000 */
+	dev->mtu = ETH_DATA_LEN;
+	dev->min_mtu = ETH_MIN_MTU;
+	dev->max_mtu = jumbo_num ? TILE_JUMBO_MAX_MTU : ETH_DATA_LEN;
 
 	features |= NETIF_F_HW_CSUM;
 	features |= NETIF_F_SG;
diff --git a/drivers/net/ethernet/tile/tilepro.c b/drivers/net/ethernet/tile/tilepro.c
index 4ef605a90247..0a3b7dafa3ba 100644
--- a/drivers/net/ethernet/tile/tilepro.c
+++ b/drivers/net/ethernet/tile/tilepro.c
@@ -87,7 +87,7 @@
 /* This should be 1500 if "jumbo" is not set in LIPP. */
 /* This should be at most 10226 (10240 - 14) if "jumbo" is set in LIPP. */
 /* ISSUE: This has not been thoroughly tested (except at 1500). */
-#define TILE_NET_MTU 1500
+#define TILE_NET_MTU ETH_DATA_LEN
 
 /* HACK: Define this to verify incoming packets. */
 /* #define TILE_NET_VERIFY_INGRESS */
@@ -2095,26 +2095,6 @@ static struct rtnl_link_stats64 *tile_net_get_stats64(struct net_device *dev,
 }
 
 
-/*
- * Change the "mtu".
- *
- * The "change_mtu" method is usually not needed.
- * If you need it, it must be like this.
- */
-static int tile_net_change_mtu(struct net_device *dev, int new_mtu)
-{
-	PDEBUG("tile_net_change_mtu()\n");
-
-	/* Check ranges. */
-	if ((new_mtu < 68) || (new_mtu > 1500))
-		return -EINVAL;
-
-	/* Accept the value. */
-	dev->mtu = new_mtu;
-
-	return 0;
-}
-
 
 /*
  * Change the Ethernet Address of the NIC.
@@ -2229,7 +2209,6 @@ static const struct net_device_ops tile_net_ops = {
 	.ndo_start_xmit = tile_net_tx,
 	.ndo_do_ioctl = tile_net_ioctl,
 	.ndo_get_stats64 = tile_net_get_stats64,
-	.ndo_change_mtu = tile_net_change_mtu,
 	.ndo_tx_timeout = tile_net_tx_timeout,
 	.ndo_set_mac_address = tile_net_set_mac_address,
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -2252,7 +2231,11 @@ static void tile_net_setup(struct net_device *dev)
 	dev->netdev_ops = &tile_net_ops;
 	dev->watchdog_timeo = TILE_NET_TIMEOUT;
 	dev->tx_queue_len = TILE_NET_TX_QUEUE_LEN;
+
+	/* MTU range: 68 - 1500 */
 	dev->mtu = TILE_NET_MTU;
+	dev->min_mtu = ETH_MIN_MTU;
+	dev->max_mtu = TILE_NET_MTU;
 
 	features |= NETIF_F_HW_CSUM;
 	features |= NETIF_F_SG;
diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
index 272f2b1cb7ad..345316c749e7 100644
--- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c
+++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
@@ -1114,24 +1114,6 @@ static int gelic_net_poll(struct napi_struct *napi, int budget)
 	}
 	return packets_done;
 }
-/**
- * gelic_net_change_mtu - changes the MTU of an interface
- * @netdev: interface device structure
- * @new_mtu: new MTU value
- *
- * returns 0 on success, <0 on failure
- */
-int gelic_net_change_mtu(struct net_device *netdev, int new_mtu)
-{
-	/* no need to re-alloc skbs or so -- the max mtu is about 2.3k
-	 * and mtu is outbound only anyway */
-	if ((new_mtu < GELIC_NET_MIN_MTU) ||
-	    (new_mtu > GELIC_NET_MAX_MTU)) {
-		return -EINVAL;
-	}
-	netdev->mtu = new_mtu;
-	return 0;
-}
 
 /**
  * gelic_card_interrupt - event handler for gelic_net
@@ -1446,7 +1428,6 @@ static const struct net_device_ops gelic_netdevice_ops = {
 	.ndo_stop = gelic_net_stop,
 	.ndo_start_xmit = gelic_net_xmit,
 	.ndo_set_rx_mode = gelic_net_set_multi,
-	.ndo_change_mtu = gelic_net_change_mtu,
 	.ndo_tx_timeout = gelic_net_tx_timeout,
 	.ndo_set_mac_address = eth_mac_addr,
 	.ndo_validate_addr = eth_validate_addr,
@@ -1513,6 +1494,10 @@ int gelic_net_setup_netdev(struct net_device *netdev, struct gelic_card *card)
 		netdev->features |= NETIF_F_VLAN_CHALLENGED;
 	}
 
+	/* MTU range: 64 - 1518 */
+	netdev->min_mtu = GELIC_NET_MIN_MTU;
+	netdev->max_mtu = GELIC_NET_MAX_MTU;
+
 	status = register_netdev(netdev);
 	if (status) {
 		dev_err(ctodev(card), "%s:Couldn't register %s %d\n",
diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.h b/drivers/net/ethernet/toshiba/ps3_gelic_net.h
index 8505196be9f5..003d0452d9cb 100644
--- a/drivers/net/ethernet/toshiba/ps3_gelic_net.h
+++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.h
@@ -373,7 +373,6 @@ int gelic_net_stop(struct net_device *netdev);
 int gelic_net_xmit(struct sk_buff *skb, struct net_device *netdev);
 void gelic_net_set_multi(struct net_device *netdev);
 void gelic_net_tx_timeout(struct net_device *netdev);
-int gelic_net_change_mtu(struct net_device *netdev, int new_mtu);
 int gelic_net_setup_netdev(struct net_device *netdev, struct gelic_card *card);
 
 /* shared ethtool ops */
diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c b/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c
index 928c1dca2673..eed18f88bdff 100644
--- a/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c
+++ b/drivers/net/ethernet/toshiba/ps3_gelic_wireless.c
@@ -2558,7 +2558,6 @@ static const struct net_device_ops gelic_wl_netdevice_ops = {
 	.ndo_stop = gelic_wl_stop,
 	.ndo_start_xmit = gelic_net_xmit,
 	.ndo_set_rx_mode = gelic_net_set_multi,
-	.ndo_change_mtu = gelic_net_change_mtu,
 	.ndo_tx_timeout = gelic_net_tx_timeout,
 	.ndo_set_mac_address = eth_mac_addr,
 	.ndo_validate_addr = eth_validate_addr,
diff --git a/drivers/net/ethernet/toshiba/spider_net.c b/drivers/net/ethernet/toshiba/spider_net.c
index 36a6e8b54d94..cb341dfe65ad 100644
--- a/drivers/net/ethernet/toshiba/spider_net.c
+++ b/drivers/net/ethernet/toshiba/spider_net.c
@@ -1279,25 +1279,6 @@ static int spider_net_poll(struct napi_struct *napi, int budget)
 }
 
 /**
- * spider_net_change_mtu - changes the MTU of an interface
- * @netdev: interface device structure
- * @new_mtu: new MTU value
- *
- * returns 0 on success, <0 on failure
- */
-static int
-spider_net_change_mtu(struct net_device *netdev, int new_mtu)
-{
-	/* no need to re-alloc skbs or so -- the max mtu is about 2.3k
-	 * and mtu is outbound only anyway */
-	if ( (new_mtu < SPIDER_NET_MIN_MTU ) ||
-		(new_mtu > SPIDER_NET_MAX_MTU) )
-		return -EINVAL;
-	netdev->mtu = new_mtu;
-	return 0;
-}
-
-/**
  * spider_net_set_mac - sets the MAC of an interface
  * @netdev: interface device structure
  * @ptr: pointer to new MAC address
@@ -2229,7 +2210,6 @@ static const struct net_device_ops spider_net_ops = {
 	.ndo_start_xmit		= spider_net_xmit,
 	.ndo_set_rx_mode	= spider_net_set_multi,
 	.ndo_set_mac_address	= spider_net_set_mac,
-	.ndo_change_mtu		= spider_net_change_mtu,
 	.ndo_do_ioctl		= spider_net_do_ioctl,
 	.ndo_tx_timeout		= spider_net_tx_timeout,
 	.ndo_validate_addr	= eth_validate_addr,
@@ -2299,6 +2279,10 @@ spider_net_setup_netdev(struct spider_net_card *card)
 	/* some time: NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
 	 *		NETIF_F_HW_VLAN_CTAG_FILTER */
 
+	/* MTU range: 64 - 2294 */
+	netdev->min_mtu = SPIDER_NET_MIN_MTU;
+	netdev->max_mtu = SPIDER_NET_MAX_MTU;
+
 	netdev->irq = card->pdev->irq;
 	card->num_rx_ints = 0;
 	card->ignore_rx_ramfull = 0;
diff --git a/drivers/net/ethernet/toshiba/tc35815.c b/drivers/net/ethernet/toshiba/tc35815.c
index 5b01b3fa9fec..3be61ed28741 100644
--- a/drivers/net/ethernet/toshiba/tc35815.c
+++ b/drivers/net/ethernet/toshiba/tc35815.c
@@ -747,7 +747,6 @@ static const struct net_device_ops tc35815_netdev_ops = {
 	.ndo_tx_timeout		= tc35815_tx_timeout,
 	.ndo_do_ioctl		= tc35815_ioctl,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address	= eth_mac_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller	= tc35815_poll_controller,
diff --git a/drivers/net/ethernet/tundra/tsi108_eth.c b/drivers/net/ethernet/tundra/tsi108_eth.c
index 8fd131207ee1..f153ad729ce5 100644
--- a/drivers/net/ethernet/tundra/tsi108_eth.c
+++ b/drivers/net/ethernet/tundra/tsi108_eth.c
@@ -1548,7 +1548,6 @@ static const struct net_device_ops tsi108_netdev_ops = {
 	.ndo_do_ioctl		= tsi108_do_ioctl,
 	.ndo_set_mac_address	= tsi108_set_mac,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 };
 
 static int
diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c
index 9d14731cdcb1..ba5c54249055 100644
--- a/drivers/net/ethernet/via/via-rhine.c
+++ b/drivers/net/ethernet/via/via-rhine.c
@@ -890,7 +890,6 @@ static const struct net_device_ops rhine_netdev_ops = {
 	.ndo_start_xmit		 = rhine_start_tx,
 	.ndo_get_stats64	 = rhine_get_stats64,
 	.ndo_set_rx_mode	 = rhine_set_rx_mode,
-	.ndo_change_mtu		 = eth_change_mtu,
 	.ndo_validate_addr	 = eth_validate_addr,
 	.ndo_set_mac_address 	 = eth_mac_addr,
 	.ndo_do_ioctl		 = netdev_ioctl,
diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c
index 908e72e18ef7..4716e60e2ccb 100644
--- a/drivers/net/ethernet/via/via-velocity.c
+++ b/drivers/net/ethernet/via/via-velocity.c
@@ -2284,13 +2284,6 @@ static int velocity_change_mtu(struct net_device *dev, int new_mtu)
 	struct velocity_info *vptr = netdev_priv(dev);
 	int ret = 0;
 
-	if ((new_mtu < VELOCITY_MIN_MTU) || new_mtu > (VELOCITY_MAX_MTU)) {
-		VELOCITY_PRT(MSG_LEVEL_ERR, KERN_NOTICE "%s: Invalid MTU.\n",
-				vptr->netdev->name);
-		ret = -EINVAL;
-		goto out_0;
-	}
-
 	if (!netif_running(dev)) {
 		dev->mtu = new_mtu;
 		goto out_0;
@@ -2864,6 +2857,10 @@ static int velocity_probe(struct device *dev, int irq,
 			NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_CTAG_RX |
 			NETIF_F_IP_CSUM;
 
+	/* MTU range: 64 - 9000 */
+	netdev->min_mtu = VELOCITY_MIN_MTU;
+	netdev->max_mtu = VELOCITY_MAX_MTU;
+
 	ret = register_netdev(netdev);
 	if (ret < 0)
 		goto err_iounmap;
diff --git a/drivers/net/ethernet/wiznet/w5100.c b/drivers/net/ethernet/wiznet/w5100.c
index d2349a1bc6ba..e1296ef2cf66 100644
--- a/drivers/net/ethernet/wiznet/w5100.c
+++ b/drivers/net/ethernet/wiznet/w5100.c
@@ -1045,7 +1045,6 @@ static const struct net_device_ops w5100_netdev_ops = {
 	.ndo_set_rx_mode	= w5100_set_rx_mode,
 	.ndo_set_mac_address	= w5100_set_macaddr,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 };
 
 static int w5100_mmio_probe(struct platform_device *pdev)
diff --git a/drivers/net/ethernet/wiznet/w5300.c b/drivers/net/ethernet/wiznet/w5300.c
index ca31a57dbc86..724fabd38a23 100644
--- a/drivers/net/ethernet/wiznet/w5300.c
+++ b/drivers/net/ethernet/wiznet/w5300.c
@@ -536,7 +536,6 @@ static const struct net_device_ops w5300_netdev_ops = {
 	.ndo_set_rx_mode	= w5300_set_rx_mode,
 	.ndo_set_mac_address	= w5300_set_macaddr,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_change_mtu		= eth_change_mtu,
 };
 
 static int w5300_hw_probe(struct platform_device *pdev)
diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index a9bd665fd122..d73da8afe08e 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -37,6 +37,7 @@
 #include <linux/of_device.h>
 #include <linux/of_irq.h>
 #include <linux/of_mdio.h>
+#include <linux/of_net.h>
 #include <linux/of_platform.h>
 #include <linux/of_address.h>
 #include <linux/skbuff.h>
@@ -332,7 +333,7 @@ static void temac_do_set_mac_address(struct net_device *ndev)
 	mutex_unlock(&lp->indirect_mutex);
 }
 
-static int temac_init_mac_address(struct net_device *ndev, void *address)
+static int temac_init_mac_address(struct net_device *ndev, const void *address)
 {
 	memcpy(ndev->dev_addr, address, ETH_ALEN);
 	if (!is_valid_ether_addr(ndev->dev_addr))
@@ -967,13 +968,8 @@ static const struct attribute_group temac_attr_group = {
 };
 
 /* ethtool support */
-static int temac_nway_reset(struct net_device *ndev)
-{
-	return phy_start_aneg(ndev->phydev);
-}
-
 static const struct ethtool_ops temac_ethtool_ops = {
-	.nway_reset = temac_nway_reset,
+	.nway_reset = phy_ethtool_nway_reset,
 	.get_link = ethtool_op_get_link,
 	.get_ts_info = ethtool_op_get_ts_info,
 	.get_link_ksettings = phy_ethtool_get_link_ksettings,
@@ -987,7 +983,7 @@ static int temac_of_probe(struct platform_device *op)
 	struct net_device *ndev;
 	const void *addr;
 	__be32 *p;
-	int size, rc = 0;
+	int rc = 0;
 
 	/* Init network device structure */
 	ndev = alloc_etherdev(sizeof(*lp));
@@ -1079,13 +1075,13 @@ static int temac_of_probe(struct platform_device *op)
 
 
 	/* Retrieve the MAC address */
-	addr = of_get_property(op->dev.of_node, "local-mac-address", &size);
-	if ((!addr) || (size != 6)) {
+	addr = of_get_mac_address(op->dev.of_node);
+	if (!addr) {
 		dev_err(&op->dev, "could not find MAC address\n");
 		rc = -ENODEV;
 		goto err_iounmap_2;
 	}
-	temac_init_mac_address(ndev, (void *)addr);
+	temac_init_mac_address(ndev, addr);
 
 	rc = temac_mdio_setup(lp, op->dev.of_node);
 	if (rc)
diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
index c688d68c39aa..b96e96919e31 100644
--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
@@ -25,6 +25,7 @@
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/of_mdio.h>
+#include <linux/of_net.h>
 #include <linux/of_platform.h>
 #include <linux/of_irq.h>
 #include <linux/of_address.h>
@@ -292,7 +293,8 @@ out:
  * This function is called to initialize the MAC address of the Axi Ethernet
  * core. It writes to the UAW0 and UAW1 registers of the core.
  */
-static void axienet_set_mac_address(struct net_device *ndev, void *address)
+static void axienet_set_mac_address(struct net_device *ndev,
+				    const void *address)
 {
 	struct axienet_local *lp = netdev_priv(ndev);
 
@@ -1034,9 +1036,6 @@ static int axienet_change_mtu(struct net_device *ndev, int new_mtu)
 		XAE_TRL_SIZE) > lp->rxmem)
 		return -EINVAL;
 
-	if ((new_mtu > XAE_JUMBO_MTU) || (new_mtu < 64))
-		return -EINVAL;
-
 	ndev->mtu = new_mtu;
 
 	return 0;
@@ -1459,7 +1458,7 @@ static int axienet_probe(struct platform_device *pdev)
 	struct device_node *np;
 	struct axienet_local *lp;
 	struct net_device *ndev;
-	u8 mac_addr[6];
+	const void *mac_addr;
 	struct resource *ethres, dmares;
 	u32 value;
 
@@ -1475,6 +1474,10 @@ static int axienet_probe(struct platform_device *pdev)
 	ndev->netdev_ops = &axienet_netdev_ops;
 	ndev->ethtool_ops = &axienet_ethtool_ops;
 
+	/* MTU range: 64 - 9000 */
+	ndev->min_mtu = 64;
+	ndev->max_mtu = XAE_JUMBO_MTU;
+
 	lp = netdev_priv(ndev);
 	lp->ndev = ndev;
 	lp->dev = &pdev->dev;
@@ -1566,13 +1569,12 @@ static int axienet_probe(struct platform_device *pdev)
 	}
 
 	/* Retrieve the MAC address */
-	ret = of_property_read_u8_array(pdev->dev.of_node,
-					"local-mac-address", mac_addr, 6);
-	if (ret) {
+	mac_addr = of_get_mac_address(pdev->dev.of_node);
+	if (!mac_addr) {
 		dev_err(&pdev->dev, "could not find MAC address\n");
 		goto free_netdev;
 	}
-	axienet_set_mac_address(ndev, (void *)mac_addr);
+	axienet_set_mac_address(ndev, mac_addr);
 
 	lp->coalesce_count_rx = XAXIDMA_DFT_RX_THRESHOLD;
 	lp->coalesce_count_tx = XAXIDMA_DFT_TX_THRESHOLD;
diff --git a/drivers/net/ethernet/xircom/xirc2ps_cs.c b/drivers/net/ethernet/xircom/xirc2ps_cs.c
index ddced28e8247..3b08ec766076 100644
--- a/drivers/net/ethernet/xircom/xirc2ps_cs.c
+++ b/drivers/net/ethernet/xircom/xirc2ps_cs.c
@@ -466,7 +466,6 @@ static const struct net_device_ops netdev_ops = {
 	.ndo_set_config		= do_config,
 	.ndo_do_ioctl		= do_ioctl,
 	.ndo_set_rx_mode	= set_multicast_list,
-	.ndo_change_mtu		= eth_change_mtu,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
 };
diff --git a/drivers/net/ethernet/xscale/ixp4xx_eth.c b/drivers/net/ethernet/xscale/ixp4xx_eth.c
index fa32391720fe..aee55c03def0 100644
--- a/drivers/net/ethernet/xscale/ixp4xx_eth.c
+++ b/drivers/net/ethernet/xscale/ixp4xx_eth.c
@@ -1001,11 +1001,6 @@ static void ixp4xx_get_drvinfo(struct net_device *dev,
 	strlcpy(info->bus_info, "internal", sizeof(info->bus_info));
 }
 
-static int ixp4xx_nway_reset(struct net_device *dev)
-{
-	return phy_start_aneg(dev->phydev);
-}
-
 int ixp46x_phc_index = -1;
 EXPORT_SYMBOL_GPL(ixp46x_phc_index);
 
@@ -1037,7 +1032,7 @@ static int ixp4xx_get_ts_info(struct net_device *dev,
 
 static const struct ethtool_ops ixp4xx_ethtool_ops = {
 	.get_drvinfo = ixp4xx_get_drvinfo,
-	.nway_reset = ixp4xx_nway_reset,
+	.nway_reset = phy_ethtool_nway_reset,
 	.get_link = ethtool_op_get_link,
 	.get_ts_info = ixp4xx_get_ts_info,
 	.get_link_ksettings = phy_ethtool_get_link_ksettings,
@@ -1378,7 +1373,6 @@ static const struct net_device_ops ixp4xx_netdev_ops = {
 	.ndo_start_xmit = eth_xmit,
 	.ndo_set_rx_mode = eth_set_mcast_list,
 	.ndo_do_ioctl = eth_ioctl,
-	.ndo_change_mtu = eth_change_mtu,
 	.ndo_set_mac_address = eth_mac_addr,
 	.ndo_validate_addr = eth_validate_addr,
 };