45 files changed, 3164 insertions, 973 deletions
diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt
index b89bc82eed46..16a924c486bf 100644
--- a/Documentation/networking/rxrpc.txt
+++ b/Documentation/networking/rxrpc.txt
@@ -27,6 +27,8 @@ Contents of this document:
 
  (*) AF_RXRPC kernel interface.
 
+ (*) Configurable parameters.
+
 
 ========
 OVERVIEW
@@ -864,3 +866,82 @@ The kernel interface functions are as follows:
 
      This is used to allocate a null RxRPC key that can be used to indicate
      anonymous security for a particular domain.
+
+
+=======================
+CONFIGURABLE PARAMETERS
+=======================
+
+The RxRPC protocol driver has a number of configurable parameters that can be
+adjusted through sysctls in /proc/net/rxrpc/:
+
+ (*) req_ack_delay
+
+     The amount of time in milliseconds after receiving a packet with the
+     request-ack flag set before we honour the flag and actually send the
+     requested ack.
+
+     Usually the other side won't stop sending packets until the advertised
+     reception window is full (to a maximum of 255 packets), so delaying the
+     ACK permits several packets to be ACK'd in one go.
+
+ (*) soft_ack_delay
+
+     The amount of time in milliseconds after receiving a new packet before we
+     generate a soft-ACK to tell the sender that it doesn't need to resend.
+
+ (*) idle_ack_delay
+
+     The amount of time in milliseconds after all the packets currently in the
+     received queue have been consumed before we generate a hard-ACK to tell
+     the sender it can free its buffers, assuming no other reason occurs that
+     we would send an ACK.
+
+ (*) resend_timeout
+
+     The amount of time in milliseconds after transmitting a packet before we
+     transmit it again, assuming no ACK is received from the receiver telling
+     us they got it.
+
+ (*) max_call_lifetime
+
+     The maximum amount of time in seconds that a call may be in progress
+     before we preemptively kill it.
+
+ (*) dead_call_expiry
+
+     The amount of time in seconds before we remove a dead call from the call
+     list.  Dead calls are kept around for a little while for the purpose of
+     repeating ACK and ABORT packets.
+
+ (*) connection_expiry
+
+     The amount of time in seconds after a connection was last used before we
+     remove it from the connection list.  Whilst a connection is in existence,
+     it serves as a placeholder for negotiated security; when it is deleted,
+     the security must be renegotiated.
+
+ (*) transport_expiry
+
+     The amount of time in seconds after a transport was last used before we
+     remove it from the transport list.  Whilst a transport is in existence, it
+     serves to anchor the peer data and keeps the connection ID counter.
+
+ (*) rxrpc_rx_window_size
+
+     The size of the receive window in packets.  This is the maximum number of
+     unconsumed received packets we're willing to hold in memory for any
+     particular call.
+
+ (*) rxrpc_rx_mtu
+
+     The maximum packet MTU size that we're willing to receive in bytes.  This
+     indicates to the peer whether we're willing to accept jumbo packets.
+
+ (*) rxrpc_rx_jumbo_max
+
+     The maximum number of packets that we're willing to accept in a jumbo
+     packet.  Non-terminal packets in a jumbo packet must contain a four byte
+     header plus exactly 1412 bytes of data.  The terminal packet must contain
+     a four byte header plus any amount of data.  In any event, a jumbo packet
+     may not exceed rxrpc_rx_mtu in size.
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index fc6d25e7d053..22800bde9752 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -21,55 +21,55 @@
 #include "bonding.h"
 
 static int bond_option_active_slave_set(struct bonding *bond,
-					struct bond_opt_value *newval);
+					const struct bond_opt_value *newval);
 static int bond_option_miimon_set(struct bonding *bond,
-				  struct bond_opt_value *newval);
+				  const struct bond_opt_value *newval);
 static int bond_option_updelay_set(struct bonding *bond,
-				   struct bond_opt_value *newval);
+				   const struct bond_opt_value *newval);
 static int bond_option_downdelay_set(struct bonding *bond,
-				     struct bond_opt_value *newval);
+				     const struct bond_opt_value *newval);
 static int bond_option_use_carrier_set(struct bonding *bond,
-				       struct bond_opt_value *newval);
+				       const struct bond_opt_value *newval);
 static int bond_option_arp_interval_set(struct bonding *bond,
-					struct bond_opt_value *newval);
+					const struct bond_opt_value *newval);
 static int bond_option_arp_ip_target_add(struct bonding *bond, __be32 target);
 static int bond_option_arp_ip_target_rem(struct bonding *bond, __be32 target);
 static int bond_option_arp_ip_targets_set(struct bonding *bond,
-					  struct bond_opt_value *newval);
+					  const struct bond_opt_value *newval);
 static int bond_option_arp_validate_set(struct bonding *bond,
-					struct bond_opt_value *newval);
+					const struct bond_opt_value *newval);
 static int bond_option_arp_all_targets_set(struct bonding *bond,
-					   struct bond_opt_value *newval);
+					   const struct bond_opt_value *newval);
 static int bond_option_primary_set(struct bonding *bond,
-				   struct bond_opt_value *newval);
+				   const struct bond_opt_value *newval);
 static int bond_option_primary_reselect_set(struct bonding *bond,
-					    struct bond_opt_value *newval);
+					    const struct bond_opt_value *newval);
 static int bond_option_fail_over_mac_set(struct bonding *bond,
-					 struct bond_opt_value *newval);
+					 const struct bond_opt_value *newval);
 static int bond_option_xmit_hash_policy_set(struct bonding *bond,
-					    struct bond_opt_value *newval);
+					    const struct bond_opt_value *newval);
 static int bond_option_resend_igmp_set(struct bonding *bond,
-				       struct bond_opt_value *newval);
+				       const struct bond_opt_value *newval);
 static int bond_option_num_peer_notif_set(struct bonding *bond,
-					  struct bond_opt_value *newval);
+					  const struct bond_opt_value *newval);
 static int bond_option_all_slaves_active_set(struct bonding *bond,
-					     struct bond_opt_value *newval);
+					     const struct bond_opt_value *newval);
 static int bond_option_min_links_set(struct bonding *bond,
-				     struct bond_opt_value *newval);
+				     const struct bond_opt_value *newval);
 static int bond_option_lp_interval_set(struct bonding *bond,
-				       struct bond_opt_value *newval);
+				       const struct bond_opt_value *newval);
 static int bond_option_pps_set(struct bonding *bond,
-			       struct bond_opt_value *newval);
+			       const struct bond_opt_value *newval);
 static int bond_option_lacp_rate_set(struct bonding *bond,
-				     struct bond_opt_value *newval);
+				     const struct bond_opt_value *newval);
 static int bond_option_ad_select_set(struct bonding *bond,
-				     struct bond_opt_value *newval);
+				     const struct bond_opt_value *newval);
 static int bond_option_queue_id_set(struct bonding *bond,
-				    struct bond_opt_value *newval);
+				    const struct bond_opt_value *newval);
 static int bond_option_mode_set(struct bonding *bond,
-				struct bond_opt_value *newval);
+				const struct bond_opt_value *newval);
 static int bond_option_slaves_set(struct bonding *bond,
-				  struct bond_opt_value *newval);
+				  const struct bond_opt_value *newval);
 
 
 static const struct bond_opt_value bond_mode_tbl[] = {
@@ -504,7 +504,7 @@ static int bond_opt_check_deps(struct bonding *bond,
 static void bond_opt_dep_print(struct bonding *bond,
 			       const struct bond_option *opt)
 {
-	struct bond_opt_value *modeval;
+	const struct bond_opt_value *modeval;
 	struct bond_params *params;
 
 	params = &bond->params;
@@ -517,9 +517,9 @@ static void bond_opt_dep_print(struct bonding *bond,
 
 static void bond_opt_error_interpret(struct bonding *bond,
 				     const struct bond_option *opt,
-				     int error, struct bond_opt_value *val)
+				     int error, const struct bond_opt_value *val)
 {
-	struct bond_opt_value *minval, *maxval;
+	const struct bond_opt_value *minval, *maxval;
 	char *p;
 
 	switch (error) {
@@ -574,7 +574,7 @@ static void bond_opt_error_interpret(struct bonding *bond,
 int __bond_opt_set(struct bonding *bond,
 		   unsigned int option, struct bond_opt_value *val)
 {
-	struct bond_opt_value *retval = NULL;
+	const struct bond_opt_value *retval = NULL;
 	const struct bond_option *opt;
 	int ret = -ENOENT;
 
@@ -637,7 +637,7 @@ const struct bond_option *bond_opt_get(unsigned int option)
 	return &bond_opts[option];
 }
 
-int bond_option_mode_set(struct bonding *bond, struct bond_opt_value *newval)
+int bond_option_mode_set(struct bonding *bond, const struct bond_opt_value *newval)
 {
 	if (BOND_NO_USES_ARP(newval->value) && bond->params.arp_interval) {
 		pr_info("%s: %s mode is incompatible with arp monitoring, start mii monitoring\n",
@@ -676,7 +676,7 @@ struct net_device *bond_option_active_slave_get(struct bonding *bond)
 }
 
 static int bond_option_active_slave_set(struct bonding *bond,
-					struct bond_opt_value *newval)
+					const struct bond_opt_value *newval)
 {
 	char ifname[IFNAMSIZ] = { 0, };
 	struct net_device *slave_dev;
@@ -745,7 +745,7 @@ static int bond_option_active_slave_set(struct bonding *bond,
 }
 
 static int bond_option_miimon_set(struct bonding *bond,
-				  struct bond_opt_value *newval)
+				  const struct bond_opt_value *newval)
 {
 	pr_info("%s: Setting MII monitoring interval to %llu\n",
 		bond->dev->name, newval->value);
@@ -783,7 +783,7 @@ static int bond_option_miimon_set(struct bonding *bond,
 }
 
 static int bond_option_updelay_set(struct bonding *bond,
-				   struct bond_opt_value *newval)
+				   const struct bond_opt_value *newval)
 {
 	int value = newval->value;
 
@@ -807,7 +807,7 @@ static int bond_option_updelay_set(struct bonding *bond,
 }
 
 static int bond_option_downdelay_set(struct bonding *bond,
-				     struct bond_opt_value *newval)
+				     const struct bond_opt_value *newval)
 {
 	int value = newval->value;
 
@@ -831,7 +831,7 @@ static int bond_option_downdelay_set(struct bonding *bond,
 }
 
 static int bond_option_use_carrier_set(struct bonding *bond,
-				       struct bond_opt_value *newval)
+				       const struct bond_opt_value *newval)
 {
 	pr_info("%s: Setting use_carrier to %llu\n",
 		bond->dev->name, newval->value);
@@ -841,7 +841,7 @@ static int bond_option_use_carrier_set(struct bonding *bond,
 }
 
 static int bond_option_arp_interval_set(struct bonding *bond,
-					struct bond_opt_value *newval)
+					const struct bond_opt_value *newval)
 {
 	pr_info("%s: Setting ARP monitoring interval to %llu\n",
 		bond->dev->name, newval->value);
@@ -991,7 +991,7 @@ void bond_option_arp_ip_targets_clear(struct bonding *bond)
 }
 
 static int bond_option_arp_ip_targets_set(struct bonding *bond,
-					  struct bond_opt_value *newval)
+					  const struct bond_opt_value *newval)
 {
 	int ret = -EPERM;
 	__be32 target;
@@ -1018,7 +1018,7 @@ static int bond_option_arp_ip_targets_set(struct bonding *bond,
 }
 
 static int bond_option_arp_validate_set(struct bonding *bond,
-					struct bond_opt_value *newval)
+					const struct bond_opt_value *newval)
 {
 	pr_info("%s: Setting arp_validate to %s (%llu)\n",
 		bond->dev->name, newval->string, newval->value);
@@ -1035,7 +1035,7 @@ static int bond_option_arp_validate_set(struct bonding *bond,
 }
 
 static int bond_option_arp_all_targets_set(struct bonding *bond,
-					   struct bond_opt_value *newval)
+					   const struct bond_opt_value *newval)
 {
 	pr_info("%s: Setting arp_all_targets to %s (%llu)\n",
 		bond->dev->name, newval->string, newval->value);
@@ -1045,7 +1045,7 @@ static int bond_option_arp_all_targets_set(struct bonding *bond,
 }
 
 static int bond_option_primary_set(struct bonding *bond,
-				   struct bond_opt_value *newval)
+				   const struct bond_opt_value *newval)
 {
 	char *p, *primary = newval->string;
 	struct list_head *iter;
@@ -1098,7 +1098,7 @@ out:
 }
 
 static int bond_option_primary_reselect_set(struct bonding *bond,
-					    struct bond_opt_value *newval)
+					    const struct bond_opt_value *newval)
 {
 	pr_info("%s: Setting primary_reselect to %s (%llu)\n",
 		bond->dev->name, newval->string, newval->value);
@@ -1114,7 +1114,7 @@ static int bond_option_primary_reselect_set(struct bonding *bond,
 }
 
 static int bond_option_fail_over_mac_set(struct bonding *bond,
-					 struct bond_opt_value *newval)
+					 const struct bond_opt_value *newval)
 {
 	pr_info("%s: Setting fail_over_mac to %s (%llu)\n",
 		bond->dev->name, newval->string, newval->value);
@@ -1124,7 +1124,7 @@ static int bond_option_fail_over_mac_set(struct bonding *bond,
 }
 
 static int bond_option_xmit_hash_policy_set(struct bonding *bond,
-					    struct bond_opt_value *newval)
+					    const struct bond_opt_value *newval)
 {
 	pr_info("%s: Setting xmit hash policy to %s (%llu)\n",
 		bond->dev->name, newval->string, newval->value);
@@ -1134,7 +1134,7 @@ static int bond_option_xmit_hash_policy_set(struct bonding *bond,
 }
 
 static int bond_option_resend_igmp_set(struct bonding *bond,
-				       struct bond_opt_value *newval)
+				       const struct bond_opt_value *newval)
 {
 	pr_info("%s: Setting resend_igmp to %llu\n",
 		bond->dev->name, newval->value);
@@ -1144,7 +1144,7 @@ static int bond_option_resend_igmp_set(struct bonding *bond,
 }
 
 static int bond_option_num_peer_notif_set(struct bonding *bond,
-				   struct bond_opt_value *newval)
+				   const struct bond_opt_value *newval)
 {
 	bond->params.num_peer_notif = newval->value;
 
@@ -1152,7 +1152,7 @@ static int bond_option_num_peer_notif_set(struct bonding *bond,
 }
 
 static int bond_option_all_slaves_active_set(struct bonding *bond,
-					     struct bond_opt_value *newval)
+					     const struct bond_opt_value *newval)
 {
 	struct list_head *iter;
 	struct slave *slave;
@@ -1173,7 +1173,7 @@ static int bond_option_all_slaves_active_set(struct bonding *bond,
 }
 
 static int bond_option_min_links_set(struct bonding *bond,
-				     struct bond_opt_value *newval)
+				     const struct bond_opt_value *newval)
 {
 	pr_info("%s: Setting min links value to %llu\n",
 		bond->dev->name, newval->value);
@@ -1183,7 +1183,7 @@ static int bond_option_min_links_set(struct bonding *bond,
 }
 
 static int bond_option_lp_interval_set(struct bonding *bond,
-				       struct bond_opt_value *newval)
+				       const struct bond_opt_value *newval)
 {
 	bond->params.lp_interval = newval->value;
 
@@ -1191,7 +1191,7 @@ static int bond_option_lp_interval_set(struct bonding *bond,
 }
 
 static int bond_option_pps_set(struct bonding *bond,
-			       struct bond_opt_value *newval)
+			       const struct bond_opt_value *newval)
 {
 	bond->params.packets_per_slave = newval->value;
 	if (newval->value > 0) {
@@ -1209,7 +1209,7 @@ static int bond_option_pps_set(struct bonding *bond,
 }
 
 static int bond_option_lacp_rate_set(struct bonding *bond,
-				     struct bond_opt_value *newval)
+				     const struct bond_opt_value *newval)
 {
 	pr_info("%s: Setting LACP rate to %s (%llu)\n",
 		bond->dev->name, newval->string, newval->value);
@@ -1220,7 +1220,7 @@ static int bond_option_lacp_rate_set(struct bonding *bond,
 }
 
 static int bond_option_ad_select_set(struct bonding *bond,
-				     struct bond_opt_value *newval)
+				     const struct bond_opt_value *newval)
 {
 	pr_info("%s: Setting ad_select to %s (%llu)\n",
 		bond->dev->name, newval->string, newval->value);
@@ -1230,7 +1230,7 @@ static int bond_option_ad_select_set(struct bonding *bond,
 }
 
 static int bond_option_queue_id_set(struct bonding *bond,
-				    struct bond_opt_value *newval)
+				    const struct bond_opt_value *newval)
 {
 	struct slave *slave, *update_slave;
 	struct net_device *sdev;
@@ -1291,7 +1291,7 @@ err_no_cmd:
 }
 
 static int bond_option_slaves_set(struct bonding *bond,
-				  struct bond_opt_value *newval)
+				  const struct bond_opt_value *newval)
 {
 	char command[IFNAMSIZ + 1] = { 0, };
 	struct net_device *dev;
diff --git a/drivers/net/bonding/bond_options.h b/drivers/net/bonding/bond_options.h
index 6c5ba0ffc31c..12be9e1bfb0c 100644
--- a/drivers/net/bonding/bond_options.h
+++ b/drivers/net/bonding/bond_options.h
@@ -94,14 +94,15 @@ struct bond_option {
 	 */
 	const struct bond_opt_value *values;
 
-	int (*set)(struct bonding *bond, struct bond_opt_value *val);
+	int (*set)(struct bonding *bond, const struct bond_opt_value *val);
 };
 
 int __bond_opt_set(struct bonding *bond, unsigned int option,
 		   struct bond_opt_value *val);
 int bond_opt_tryset_rtnl(struct bonding *bond, unsigned int option, char *buf);
+
 const struct bond_opt_value *bond_opt_parse(const struct bond_option *opt,
-				      struct bond_opt_value *val);
+					    struct bond_opt_value *val);
 const struct bond_option *bond_opt_get(unsigned int option);
 const struct bond_opt_value *bond_opt_get_val(unsigned int option, u64 val);
 
diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c
index 588cf39d832c..013fdd0f45e9 100644
--- a/drivers/net/bonding/bond_procfs.c
+++ b/drivers/net/bonding/bond_procfs.c
@@ -65,7 +65,7 @@ static void bond_info_seq_stop(struct seq_file *seq, void *v)
 static void bond_info_show_master(struct seq_file *seq)
 {
 	struct bonding *bond = seq->private;
-	struct bond_opt_value *optval;
+	const struct bond_opt_value *optval;
 	struct slave *curr;
 	int i;
 
diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c
index cf09d8faca84..66759b6ce373 100644
--- a/drivers/net/ethernet/emulex/benet/be_ethtool.c
+++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c
@@ -802,16 +802,18 @@ be_self_test(struct net_device *netdev, struct ethtool_test *test, u64 *data)
 
 	if (test->flags & ETH_TEST_FL_OFFLINE) {
 		if (be_loopback_test(adapter, BE_MAC_LOOPBACK,
-						&data[0]) != 0) {
+				     &data[0]) != 0)
 			test->flags |= ETH_TEST_FL_FAILED;
-		}
+
 		if (be_loopback_test(adapter, BE_PHY_LOOPBACK,
-						&data[1]) != 0) {
-			test->flags |= ETH_TEST_FL_FAILED;
-		}
-		if (be_loopback_test(adapter, BE_ONE_PORT_EXT_LOOPBACK,
-						&data[2]) != 0) {
+				     &data[1]) != 0)
 			test->flags |= ETH_TEST_FL_FAILED;
+
+		if (test->flags & ETH_TEST_FL_EXTERNAL_LB) {
+			if (be_loopback_test(adapter, BE_ONE_PORT_EXT_LOOPBACK,
+					     &data[2]) != 0)
+				test->flags |= ETH_TEST_FL_FAILED;
+			test->flags |= ETH_TEST_FL_EXTERNAL_LB_DONE;
 		}
 	}
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 72dae4d97b43..838b69b74edf 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -86,12 +86,12 @@
 
 #define I40E_NVM_VERSION_LO_SHIFT  0
 #define I40E_NVM_VERSION_LO_MASK   (0xff << I40E_NVM_VERSION_LO_SHIFT)
-#define I40E_NVM_VERSION_HI_SHIFT  8
-#define I40E_NVM_VERSION_HI_MASK   (0xff << I40E_NVM_VERSION_HI_SHIFT)
+#define I40E_NVM_VERSION_HI_SHIFT  12
+#define I40E_NVM_VERSION_HI_MASK   (0xf << I40E_NVM_VERSION_HI_SHIFT)
 
 /* The values in here are decimal coded as hex as is the case in the NVM map*/
 #define I40E_CURRENT_NVM_VERSION_HI 0x2
-#define I40E_CURRENT_NVM_VERSION_LO 0x30
+#define I40E_CURRENT_NVM_VERSION_LO 0x40
 
 /* magic for getting defines into strings */
 #define STRINGIFY(foo)  #foo
@@ -152,8 +152,18 @@ struct i40e_lump_tracking {
 };
 
 #define I40E_DEFAULT_ATR_SAMPLE_RATE	20
-#define I40E_FDIR_MAX_RAW_PACKET_LOOKUP 512
-struct i40e_fdir_data {
+#define I40E_FDIR_MAX_RAW_PACKET_SIZE   512
+struct i40e_fdir_filter {
+	struct hlist_node fdir_node;
+	/* filter ipnut set */
+	u8 flow_type;
+	u8 ip4_proto;
+	__be32 dst_ip[4];
+	__be32 src_ip[4];
+	__be16 src_port;
+	__be16 dst_port;
+	__be32 sctp_v_tag;
+	/* filter control */
 	u16 q_index;
 	u8  flex_off;
 	u8  pctype;
@@ -162,7 +172,6 @@ struct i40e_fdir_data {
 	u8  fd_status;
 	u16 cnt_index;
 	u32 fd_id;
-	u8  *raw_packet;
 };
 
 #define I40E_ETH_P_LLDP			0x88cc
@@ -210,6 +219,9 @@ struct i40e_pf {
 	u8 atr_sample_rate;
 	bool wol_en;
 
+	struct hlist_head fdir_filter_list;
+	u16 fdir_pf_active_filters;
+
 #ifdef CONFIG_I40E_VXLAN
 	__be16  vxlan_ports[I40E_MAX_PF_UDP_OFFLOAD_PORTS];
 	u16 pending_vxlan_bitmap;
@@ -477,10 +489,10 @@ static inline char *i40e_fw_version_str(struct i40e_hw *hw)
 		 "f%d.%d a%d.%d n%02x.%02x e%08x",
 		 hw->aq.fw_maj_ver, hw->aq.fw_min_ver,
 		 hw->aq.api_maj_ver, hw->aq.api_min_ver,
-		 (hw->nvm.version & I40E_NVM_VERSION_HI_MASK)
-						>> I40E_NVM_VERSION_HI_SHIFT,
-		 (hw->nvm.version & I40E_NVM_VERSION_LO_MASK)
-						>> I40E_NVM_VERSION_LO_SHIFT,
+		 (hw->nvm.version & I40E_NVM_VERSION_HI_MASK) >>
+			I40E_NVM_VERSION_HI_SHIFT,
+		 (hw->nvm.version & I40E_NVM_VERSION_LO_MASK) >>
+			I40E_NVM_VERSION_LO_SHIFT,
 		 hw->nvm.eetrack);
 
 	return buf;
@@ -534,9 +546,10 @@ struct rtnl_link_stats64 *i40e_get_vsi_stats_struct(struct i40e_vsi *vsi);
 int i40e_fetch_switch_configuration(struct i40e_pf *pf,
 				    bool printconfig);
 
-int i40e_program_fdir_filter(struct i40e_fdir_data *fdir_data,
+int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet,
 			     struct i40e_pf *pf, bool add);
-
+int i40e_add_del_fdir(struct i40e_vsi *vsi,
+		      struct i40e_fdir_filter *input, bool add);
 void i40e_set_ethtool_ops(struct net_device *netdev);
 struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi,
 					u8 *macaddr, s16 vlan,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index e7f38b57834d..bb948dd92474 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -162,6 +162,372 @@ i40e_status i40e_aq_queue_shutdown(struct i40e_hw *hw,
 	return status;
 }
 
+/* The i40e_ptype_lookup table is used to convert from the 8-bit ptype in the
+ * hardware to a bit-field that can be used by SW to more easily determine the
+ * packet type.
+ *
+ * Macros are used to shorten the table lines and make this table human
+ * readable.
+ *
+ * We store the PTYPE in the top byte of the bit field - this is just so that
+ * we can check that the table doesn't have a row missing, as the index into
+ * the table should be the PTYPE.
+ *
+ * Typical work flow:
+ *
+ * IF NOT i40e_ptype_lookup[ptype].known
+ * THEN
+ *      Packet is unknown
+ * ELSE IF i40e_ptype_lookup[ptype].outer_ip == I40E_RX_PTYPE_OUTER_IP
+ *      Use the rest of the fields to look at the tunnels, inner protocols, etc
+ * ELSE
+ *      Use the enum i40e_rx_l2_ptype to decode the packet type
+ * ENDIF
+ */
+
+/* macro to make the table lines short */
+#define I40E_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\
+	{	PTYPE, \
+		1, \
+		I40E_RX_PTYPE_OUTER_##OUTER_IP, \
+		I40E_RX_PTYPE_OUTER_##OUTER_IP_VER, \
+		I40E_RX_PTYPE_##OUTER_FRAG, \
+		I40E_RX_PTYPE_TUNNEL_##T, \
+		I40E_RX_PTYPE_TUNNEL_END_##TE, \
+		I40E_RX_PTYPE_##TEF, \
+		I40E_RX_PTYPE_INNER_PROT_##I, \
+		I40E_RX_PTYPE_PAYLOAD_LAYER_##PL }
+
+#define I40E_PTT_UNUSED_ENTRY(PTYPE) \
+		{ PTYPE, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+
+/* shorter macros makes the table fit but are terse */
+#define I40E_RX_PTYPE_NOF		I40E_RX_PTYPE_NOT_FRAG
+#define I40E_RX_PTYPE_FRG		I40E_RX_PTYPE_FRAG
+#define I40E_RX_PTYPE_INNER_PROT_TS	I40E_RX_PTYPE_INNER_PROT_TIMESYNC
+
+/* Lookup table mapping the HW PTYPE to the bit field for decoding */
+struct i40e_rx_ptype_decoded i40e_ptype_lookup[] = {
+	/* L2 Packet types */
+	I40E_PTT_UNUSED_ENTRY(0),
+	I40E_PTT(1,  L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+	I40E_PTT(2,  L2, NONE, NOF, NONE, NONE, NOF, TS,   PAY2),
+	I40E_PTT(3,  L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+	I40E_PTT_UNUSED_ENTRY(4),
+	I40E_PTT_UNUSED_ENTRY(5),
+	I40E_PTT(6,  L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+	I40E_PTT(7,  L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+	I40E_PTT_UNUSED_ENTRY(8),
+	I40E_PTT_UNUSED_ENTRY(9),
+	I40E_PTT(10, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+	I40E_PTT(11, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE),
+	I40E_PTT(12, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(13, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(14, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(15, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(16, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(17, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(18, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(19, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(20, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(21, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+
+	/* Non Tunneled IPv4 */
+	I40E_PTT(22, IP, IPV4, FRG, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(23, IP, IPV4, NOF, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(24, IP, IPV4, NOF, NONE, NONE, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(25),
+	I40E_PTT(26, IP, IPV4, NOF, NONE, NONE, NOF, TCP,  PAY4),
+	I40E_PTT(27, IP, IPV4, NOF, NONE, NONE, NOF, SCTP, PAY4),
+	I40E_PTT(28, IP, IPV4, NOF, NONE, NONE, NOF, ICMP, PAY4),
+
+	/* IPv4 --> IPv4 */
+	I40E_PTT(29, IP, IPV4, NOF, IP_IP, IPV4, FRG, NONE, PAY3),
+	I40E_PTT(30, IP, IPV4, NOF, IP_IP, IPV4, NOF, NONE, PAY3),
+	I40E_PTT(31, IP, IPV4, NOF, IP_IP, IPV4, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(32),
+	I40E_PTT(33, IP, IPV4, NOF, IP_IP, IPV4, NOF, TCP,  PAY4),
+	I40E_PTT(34, IP, IPV4, NOF, IP_IP, IPV4, NOF, SCTP, PAY4),
+	I40E_PTT(35, IP, IPV4, NOF, IP_IP, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv4 --> IPv6 */
+	I40E_PTT(36, IP, IPV4, NOF, IP_IP, IPV6, FRG, NONE, PAY3),
+	I40E_PTT(37, IP, IPV4, NOF, IP_IP, IPV6, NOF, NONE, PAY3),
+	I40E_PTT(38, IP, IPV4, NOF, IP_IP, IPV6, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(39),
+	I40E_PTT(40, IP, IPV4, NOF, IP_IP, IPV6, NOF, TCP,  PAY4),
+	I40E_PTT(41, IP, IPV4, NOF, IP_IP, IPV6, NOF, SCTP, PAY4),
+	I40E_PTT(42, IP, IPV4, NOF, IP_IP, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT */
+	I40E_PTT(43, IP, IPV4, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3),
+
+	/* IPv4 --> GRE/NAT --> IPv4 */
+	I40E_PTT(44, IP, IPV4, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3),
+	I40E_PTT(45, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3),
+	I40E_PTT(46, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(47),
+	I40E_PTT(48, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, TCP,  PAY4),
+	I40E_PTT(49, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4),
+	I40E_PTT(50, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT --> IPv6 */
+	I40E_PTT(51, IP, IPV4, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3),
+	I40E_PTT(52, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3),
+	I40E_PTT(53, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(54),
+	I40E_PTT(55, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, TCP,  PAY4),
+	I40E_PTT(56, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4),
+	I40E_PTT(57, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT --> MAC */
+	I40E_PTT(58, IP, IPV4, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3),
+
+	/* IPv4 --> GRE/NAT --> MAC --> IPv4 */
+	I40E_PTT(59, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3),
+	I40E_PTT(60, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3),
+	I40E_PTT(61, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(62),
+	I40E_PTT(63, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP,  PAY4),
+	I40E_PTT(64, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4),
+	I40E_PTT(65, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT -> MAC --> IPv6 */
+	I40E_PTT(66, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3),
+	I40E_PTT(67, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3),
+	I40E_PTT(68, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(69),
+	I40E_PTT(70, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP,  PAY4),
+	I40E_PTT(71, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4),
+	I40E_PTT(72, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT --> MAC/VLAN */
+	I40E_PTT(73, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3),
+
+	/* IPv4 ---> GRE/NAT -> MAC/VLAN --> IPv4 */
+	I40E_PTT(74, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3),
+	I40E_PTT(75, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3),
+	I40E_PTT(76, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(77),
+	I40E_PTT(78, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP,  PAY4),
+	I40E_PTT(79, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4),
+	I40E_PTT(80, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv4 -> GRE/NAT -> MAC/VLAN --> IPv6 */
+	I40E_PTT(81, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3),
+	I40E_PTT(82, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3),
+	I40E_PTT(83, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(84),
+	I40E_PTT(85, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP,  PAY4),
+	I40E_PTT(86, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4),
+	I40E_PTT(87, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
+
+	/* Non Tunneled IPv6 */
+	I40E_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP,  PAY3),
+	I40E_PTT_UNUSED_ENTRY(91),
+	I40E_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP,  PAY4),
+	I40E_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4),
+	I40E_PTT(94, IP, IPV6, NOF, NONE, NONE, NOF, ICMP, PAY4),
+
+	/* IPv6 --> IPv4 */
+	I40E_PTT(95,  IP, IPV6, NOF, IP_IP, IPV4, FRG, NONE, PAY3),
+	I40E_PTT(96,  IP, IPV6, NOF, IP_IP, IPV4, NOF, NONE, PAY3),
+	I40E_PTT(97,  IP, IPV6, NOF, IP_IP, IPV4, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(98),
+	I40E_PTT(99,  IP, IPV6, NOF, IP_IP, IPV4, NOF, TCP,  PAY4),
+	I40E_PTT(100, IP, IPV6, NOF, IP_IP, IPV4, NOF, SCTP, PAY4),
+	I40E_PTT(101, IP, IPV6, NOF, IP_IP, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv6 --> IPv6 */
+	I40E_PTT(102, IP, IPV6, NOF, IP_IP, IPV6, FRG, NONE, PAY3),
+	I40E_PTT(103, IP, IPV6, NOF, IP_IP, IPV6, NOF, NONE, PAY3),
+	I40E_PTT(104, IP, IPV6, NOF, IP_IP, IPV6, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(105),
+	I40E_PTT(106, IP, IPV6, NOF, IP_IP, IPV6, NOF, TCP,  PAY4),
+	I40E_PTT(107, IP, IPV6, NOF, IP_IP, IPV6, NOF, SCTP, PAY4),
+	I40E_PTT(108, IP, IPV6, NOF, IP_IP, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT */
+	I40E_PTT(109, IP, IPV6, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3),
+
+	/* IPv6 --> GRE/NAT -> IPv4 */
+	I40E_PTT(110, IP, IPV6, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3),
+	I40E_PTT(111, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3),
+	I40E_PTT(112, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(113),
+	I40E_PTT(114, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, TCP,  PAY4),
+	I40E_PTT(115, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4),
+	I40E_PTT(116, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> IPv6 */
+	I40E_PTT(117, IP, IPV6, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3),
+	I40E_PTT(118, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3),
+	I40E_PTT(119, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(120),
+	I40E_PTT(121, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, TCP,  PAY4),
+	I40E_PTT(122, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4),
+	I40E_PTT(123, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> MAC */
+	I40E_PTT(124, IP, IPV6, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3),
+
+	/* IPv6 --> GRE/NAT -> MAC -> IPv4 */
+	I40E_PTT(125, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3),
+	I40E_PTT(126, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3),
+	I40E_PTT(127, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(128),
+	I40E_PTT(129, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP,  PAY4),
+	I40E_PTT(130, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4),
+	I40E_PTT(131, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> MAC -> IPv6 */
+	I40E_PTT(132, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3),
+	I40E_PTT(133, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3),
+	I40E_PTT(134, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(135),
+	I40E_PTT(136, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP,  PAY4),
+	I40E_PTT(137, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4),
+	I40E_PTT(138, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> MAC/VLAN */
+	I40E_PTT(139, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3),
+
+	/* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv4 */
+	I40E_PTT(140, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3),
+	I40E_PTT(141, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3),
+	I40E_PTT(142, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(143),
+	I40E_PTT(144, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP,  PAY4),
+	I40E_PTT(145, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4),
+	I40E_PTT(146, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv6 */
+	I40E_PTT(147, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3),
+	I40E_PTT(148, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3),
+	I40E_PTT(149, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(150),
+	I40E_PTT(151, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP,  PAY4),
+	I40E_PTT(152, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4),
+	I40E_PTT(153, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
+
+	/* unused entries */
+	I40E_PTT_UNUSED_ENTRY(154),
+	I40E_PTT_UNUSED_ENTRY(155),
+	I40E_PTT_UNUSED_ENTRY(156),
+	I40E_PTT_UNUSED_ENTRY(157),
+	I40E_PTT_UNUSED_ENTRY(158),
+	I40E_PTT_UNUSED_ENTRY(159),
+
+	I40E_PTT_UNUSED_ENTRY(160),
+	I40E_PTT_UNUSED_ENTRY(161),
+	I40E_PTT_UNUSED_ENTRY(162),
+	I40E_PTT_UNUSED_ENTRY(163),
+	I40E_PTT_UNUSED_ENTRY(164),
+	I40E_PTT_UNUSED_ENTRY(165),
+	I40E_PTT_UNUSED_ENTRY(166),
+	I40E_PTT_UNUSED_ENTRY(167),
+	I40E_PTT_UNUSED_ENTRY(168),
+	I40E_PTT_UNUSED_ENTRY(169),
+
+	I40E_PTT_UNUSED_ENTRY(170),
+	I40E_PTT_UNUSED_ENTRY(171),
+	I40E_PTT_UNUSED_ENTRY(172),
+	I40E_PTT_UNUSED_ENTRY(173),
+	I40E_PTT_UNUSED_ENTRY(174),
+	I40E_PTT_UNUSED_ENTRY(175),
+	I40E_PTT_UNUSED_ENTRY(176),
+	I40E_PTT_UNUSED_ENTRY(177),
+	I40E_PTT_UNUSED_ENTRY(178),
+	I40E_PTT_UNUSED_ENTRY(179),
+
+	I40E_PTT_UNUSED_ENTRY(180),
+	I40E_PTT_UNUSED_ENTRY(181),
+	I40E_PTT_UNUSED_ENTRY(182),
+	I40E_PTT_UNUSED_ENTRY(183),
+	I40E_PTT_UNUSED_ENTRY(184),
+	I40E_PTT_UNUSED_ENTRY(185),
+	I40E_PTT_UNUSED_ENTRY(186),
+	I40E_PTT_UNUSED_ENTRY(187),
+	I40E_PTT_UNUSED_ENTRY(188),
+	I40E_PTT_UNUSED_ENTRY(189),
+
+	I40E_PTT_UNUSED_ENTRY(190),
+	I40E_PTT_UNUSED_ENTRY(191),
+	I40E_PTT_UNUSED_ENTRY(192),
+	I40E_PTT_UNUSED_ENTRY(193),
+	I40E_PTT_UNUSED_ENTRY(194),
+	I40E_PTT_UNUSED_ENTRY(195),
+	I40E_PTT_UNUSED_ENTRY(196),
+	I40E_PTT_UNUSED_ENTRY(197),
+	I40E_PTT_UNUSED_ENTRY(198),
+	I40E_PTT_UNUSED_ENTRY(199),
+
+	I40E_PTT_UNUSED_ENTRY(200),
+	I40E_PTT_UNUSED_ENTRY(201),
+	I40E_PTT_UNUSED_ENTRY(202),
+	I40E_PTT_UNUSED_ENTRY(203),
+	I40E_PTT_UNUSED_ENTRY(204),
+	I40E_PTT_UNUSED_ENTRY(205),
+	I40E_PTT_UNUSED_ENTRY(206),
+	I40E_PTT_UNUSED_ENTRY(207),
+	I40E_PTT_UNUSED_ENTRY(208),
+	I40E_PTT_UNUSED_ENTRY(209),
+
+	I40E_PTT_UNUSED_ENTRY(210),
+	I40E_PTT_UNUSED_ENTRY(211),
+	I40E_PTT_UNUSED_ENTRY(212),
+	I40E_PTT_UNUSED_ENTRY(213),
+	I40E_PTT_UNUSED_ENTRY(214),
+	I40E_PTT_UNUSED_ENTRY(215),
+	I40E_PTT_UNUSED_ENTRY(216),
+	I40E_PTT_UNUSED_ENTRY(217),
+	I40E_PTT_UNUSED_ENTRY(218),
+	I40E_PTT_UNUSED_ENTRY(219),
+
+	I40E_PTT_UNUSED_ENTRY(220),
+	I40E_PTT_UNUSED_ENTRY(221),
+	I40E_PTT_UNUSED_ENTRY(222),
+	I40E_PTT_UNUSED_ENTRY(223),
+	I40E_PTT_UNUSED_ENTRY(224),
+	I40E_PTT_UNUSED_ENTRY(225),
+	I40E_PTT_UNUSED_ENTRY(226),
+	I40E_PTT_UNUSED_ENTRY(227),
+	I40E_PTT_UNUSED_ENTRY(228),
+	I40E_PTT_UNUSED_ENTRY(229),
+
+	I40E_PTT_UNUSED_ENTRY(230),
+	I40E_PTT_UNUSED_ENTRY(231),
+	I40E_PTT_UNUSED_ENTRY(232),
+	I40E_PTT_UNUSED_ENTRY(233),
+	I40E_PTT_UNUSED_ENTRY(234),
+	I40E_PTT_UNUSED_ENTRY(235),
+	I40E_PTT_UNUSED_ENTRY(236),
+	I40E_PTT_UNUSED_ENTRY(237),
+	I40E_PTT_UNUSED_ENTRY(238),
+	I40E_PTT_UNUSED_ENTRY(239),
+
+	I40E_PTT_UNUSED_ENTRY(240),
+	I40E_PTT_UNUSED_ENTRY(241),
+	I40E_PTT_UNUSED_ENTRY(242),
+	I40E_PTT_UNUSED_ENTRY(243),
+	I40E_PTT_UNUSED_ENTRY(244),
+	I40E_PTT_UNUSED_ENTRY(245),
+	I40E_PTT_UNUSED_ENTRY(246),
+	I40E_PTT_UNUSED_ENTRY(247),
+	I40E_PTT_UNUSED_ENTRY(248),
+	I40E_PTT_UNUSED_ENTRY(249),
+
+	I40E_PTT_UNUSED_ENTRY(250),
+	I40E_PTT_UNUSED_ENTRY(251),
+	I40E_PTT_UNUSED_ENTRY(252),
+	I40E_PTT_UNUSED_ENTRY(253),
+	I40E_PTT_UNUSED_ENTRY(254),
+	I40E_PTT_UNUSED_ENTRY(255)
+};
+
+
 /**
  * i40e_init_shared_code - Initialize the shared code
  * @hw: pointer to hardware structure
diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
index 50730141bb7b..036570d76176 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
@@ -332,6 +332,7 @@ i40e_status i40e_lldp_to_dcb_config(u8 *lldpmib,
 	u16 type;
 	u16 length;
 	u16 typelength;
+	u16 offset = 0;
 
 	if (!lldpmib || !dcbcfg)
 		return I40E_ERR_PARAM;
@@ -339,15 +340,17 @@ i40e_status i40e_lldp_to_dcb_config(u8 *lldpmib,
 	/* set to the start of LLDPDU */
 	lldpmib += ETH_HLEN;
 	tlv = (struct i40e_lldp_org_tlv *)lldpmib;
-	while (tlv) {
+	while (1) {
 		typelength = ntohs(tlv->typelength);
 		type = (u16)((typelength & I40E_LLDP_TLV_TYPE_MASK) >>
 			     I40E_LLDP_TLV_TYPE_SHIFT);
 		length = (u16)((typelength & I40E_LLDP_TLV_LEN_MASK) >>
 			       I40E_LLDP_TLV_LEN_SHIFT);
+		offset += sizeof(typelength) + length;
 
-		if (type == I40E_TLV_TYPE_END)
-			break;/* END TLV break out */
+		/* END TLV or beyond LLDPDU size */
+		if ((type == I40E_TLV_TYPE_END) || (offset > I40E_LLDPDU_SIZE))
+			break;
 
 		switch (type) {
 		case I40E_TLV_TYPE_ORG:
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index da22c3fa2c00..57fc86496f30 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -1663,21 +1663,22 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
 		desc = NULL;
 	} else if ((strncmp(cmd_buf, "add fd_filter", 13) == 0) ||
 		   (strncmp(cmd_buf, "rem fd_filter", 13) == 0)) {
-		struct i40e_fdir_data fd_data;
+		struct i40e_fdir_filter fd_data;
 		u16 packet_len, i, j = 0;
 		char *asc_packet;
+		u8 *raw_packet;
 		bool add = false;
 		int ret;
 
-		asc_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_LOOKUP,
+		asc_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE,
 				     GFP_KERNEL);
 		if (!asc_packet)
 			goto command_write_done;
 
-		fd_data.raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_LOOKUP,
-					     GFP_KERNEL);
+		raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE,
+				     GFP_KERNEL);
 
-		if (!fd_data.raw_packet) {
+		if (!raw_packet) {
 			kfree(asc_packet);
 			asc_packet = NULL;
 			goto command_write_done;
@@ -1698,36 +1699,36 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
 				 cnt);
 			kfree(asc_packet);
 			asc_packet = NULL;
-			kfree(fd_data.raw_packet);
+			kfree(raw_packet);
 			goto command_write_done;
 		}
 
 		/* fix packet length if user entered 0 */
 		if (packet_len == 0)
-			packet_len = I40E_FDIR_MAX_RAW_PACKET_LOOKUP;
+			packet_len = I40E_FDIR_MAX_RAW_PACKET_SIZE;
 
 		/* make sure to check the max as well */
 		packet_len = min_t(u16,
-				   packet_len, I40E_FDIR_MAX_RAW_PACKET_LOOKUP);
+				   packet_len, I40E_FDIR_MAX_RAW_PACKET_SIZE);
 
 		for (i = 0; i < packet_len; i++) {
 			sscanf(&asc_packet[j], "%2hhx ",
-			       &fd_data.raw_packet[i]);
+			       &raw_packet[i]);
 			j += 3;
 		}
 		dev_info(&pf->pdev->dev, "FD raw packet dump\n");
 		print_hex_dump(KERN_INFO, "FD raw packet: ",
 			       DUMP_PREFIX_OFFSET, 16, 1,
-			       fd_data.raw_packet, packet_len, true);
-		ret = i40e_program_fdir_filter(&fd_data, pf, add);
+			       raw_packet, packet_len, true);
+		ret = i40e_program_fdir_filter(&fd_data, raw_packet, pf, add);
 		if (!ret) {
 			dev_info(&pf->pdev->dev, "Filter command send Status : Success\n");
 		} else {
 			dev_info(&pf->pdev->dev,
 				 "Filter command send failed %d\n", ret);
 		}
-		kfree(fd_data.raw_packet);
-		fd_data.raw_packet = NULL;
+		kfree(raw_packet);
+		raw_packet = NULL;
 		kfree(asc_packet);
 		asc_packet = NULL;
 	} else if (strncmp(cmd_buf, "fd-atr off", 10) == 0) {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index b1d7d8c5cb9b..d34ff31fddd8 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -62,6 +62,9 @@ static const struct i40e_stats i40e_gstrings_net_stats[] = {
 	I40E_NETDEV_STAT(rx_crc_errors),
 };
 
+static int i40e_add_del_fdir_ethtool(struct i40e_vsi *vsi,
+				     struct ethtool_rxnfc *cmd, bool add);
+
 /* These PF_STATs might look like duplicates of some NETDEV_STATs,
  * but they are separate.  This device supports Virtualization, and
  * as such might have several netdevs supporting VMDq and FCoE going
@@ -84,6 +87,7 @@ static struct i40e_stats i40e_gstrings_stats[] = {
 	I40E_PF_STAT("illegal_bytes", stats.illegal_bytes),
 	I40E_PF_STAT("mac_local_faults", stats.mac_local_faults),
 	I40E_PF_STAT("mac_remote_faults", stats.mac_remote_faults),
+	I40E_PF_STAT("tx_timeout", tx_timeout_count),
 	I40E_PF_STAT("rx_length_errors", stats.rx_length_errors),
 	I40E_PF_STAT("link_xon_rx", stats.link_xon_rx),
 	I40E_PF_STAT("link_xoff_rx", stats.link_xoff_rx),
@@ -1112,6 +1116,84 @@ static int i40e_get_rss_hash_opts(struct i40e_pf *pf, struct ethtool_rxnfc *cmd)
 }
 
 /**
+ * i40e_get_ethtool_fdir_all - Populates the rule count of a command
+ * @pf: Pointer to the physical function struct
+ * @cmd: The command to get or set Rx flow classification rules
+ * @rule_locs: Array of used rule locations
+ *
+ * This function populates both the total and actual rule count of
+ * the ethtool flow classification command
+ *
+ * Returns 0 on success or -EMSGSIZE if entry not found
+ **/
+static int i40e_get_ethtool_fdir_all(struct i40e_pf *pf,
+				     struct ethtool_rxnfc *cmd,
+				     u32 *rule_locs)
+{
+	struct i40e_fdir_filter *rule;
+	struct hlist_node *node2;
+	int cnt = 0;
+
+	/* report total rule count */
+	cmd->data = pf->hw.fdir_shared_filter_count +
+		    pf->fdir_pf_filter_count;
+
+	hlist_for_each_entry_safe(rule, node2,
+				  &pf->fdir_filter_list, fdir_node) {
+		if (cnt == cmd->rule_cnt)
+			return -EMSGSIZE;
+
+		rule_locs[cnt] = rule->fd_id;
+		cnt++;
+	}
+
+	cmd->rule_cnt = cnt;
+
+	return 0;
+}
+
+/**
+ * i40e_get_ethtool_fdir_entry - Look up a filter based on Rx flow
+ * @pf: Pointer to the physical function struct
+ * @cmd: The command to get or set Rx flow classification rules
+ *
+ * This function looks up a filter based on the Rx flow classification
+ * command and fills the flow spec info for it if found
+ *
+ * Returns 0 on success or -EINVAL if filter not found
+ **/
+static int i40e_get_ethtool_fdir_entry(struct i40e_pf *pf,
+				       struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp =
+			(struct ethtool_rx_flow_spec *)&cmd->fs;
+	struct i40e_fdir_filter *rule = NULL;
+	struct hlist_node *node2;
+
+	/* report total rule count */
+	cmd->data = pf->hw.fdir_shared_filter_count +
+		    pf->fdir_pf_filter_count;
+
+	hlist_for_each_entry_safe(rule, node2,
+				  &pf->fdir_filter_list, fdir_node) {
+		if (fsp->location <= rule->fd_id)
+			break;
+	}
+
+	if (!rule || fsp->location != rule->fd_id)
+		return -EINVAL;
+
+	fsp->flow_type = rule->flow_type;
+	fsp->h_u.tcp_ip4_spec.psrc = rule->src_port;
+	fsp->h_u.tcp_ip4_spec.pdst = rule->dst_port;
+	fsp->h_u.tcp_ip4_spec.ip4src = rule->src_ip[0];
+	fsp->h_u.tcp_ip4_spec.ip4dst = rule->dst_ip[0];
+	fsp->ring_cookie = rule->q_index;
+
+	return 0;
+}
+
+/**
  * i40e_get_rxnfc - command to get RX flow classification rules
  * @netdev: network interface device structure
  * @cmd: ethtool rxnfc command
@@ -1135,15 +1217,15 @@ static int i40e_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
 		ret = i40e_get_rss_hash_opts(pf, cmd);
 		break;
 	case ETHTOOL_GRXCLSRLCNT:
-		cmd->rule_cnt = 10;
+		cmd->rule_cnt = pf->fdir_pf_active_filters;
 		ret = 0;
 		break;
 	case ETHTOOL_GRXCLSRULE:
-		ret = 0;
+		ret = i40e_get_ethtool_fdir_entry(pf, cmd);
 		break;
 	case ETHTOOL_GRXCLSRLALL:
-		cmd->data = 500;
-		ret = 0;
+		ret = i40e_get_ethtool_fdir_all(pf, cmd, rule_locs);
+		break;
 	default:
 		break;
 	}
@@ -1274,289 +1356,158 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc)
 	return 0;
 }
 
-#define IP_HEADER_OFFSET 14
-#define I40E_UDPIP_DUMMY_PACKET_LEN 42
 /**
- * i40e_add_del_fdir_udpv4 - Add/Remove UDPv4 Flow Director filters for
- * a specific flow spec
- * @vsi: pointer to the targeted VSI
- * @fd_data: the flow director data required from the FDir descriptor
- * @ethtool_rx_flow_spec: the flow spec
- * @add: true adds a filter, false removes it
+ * i40e_update_ethtool_fdir_entry - Updates the fdir filter entry
+ * @vsi: Pointer to the targeted VSI
+ * @input: The filter to update or NULL to indicate deletion
+ * @sw_idx: Software index to the filter
+ * @cmd: The command to get or set Rx flow classification rules
  *
- * Returns 0 if the filters were successfully added or removed
+ * This function updates (or deletes) a Flow Director entry from
+ * the hlist of the corresponding PF
+ *
+ * Returns 0 on success
  **/
-static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi,
-				   struct i40e_fdir_data *fd_data,
-				   struct ethtool_rx_flow_spec *fsp, bool add)
+static int i40e_update_ethtool_fdir_entry(struct i40e_vsi *vsi,
+					  struct i40e_fdir_filter *input,
+					  u16 sw_idx,
+					  struct ethtool_rxnfc *cmd)
 {
+	struct i40e_fdir_filter *rule, *parent;
 	struct i40e_pf *pf = vsi->back;
-	struct udphdr *udp;
-	struct iphdr *ip;
-	bool err = false;
-	int ret;
-	int i;
-	char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
-			 0x45, 0, 0, 0x1c, 0, 0, 0x40, 0, 0x40, 0x11,
-			 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-			 0, 0, 0, 0, 0, 0, 0, 0};
-
-	memcpy(fd_data->raw_packet, packet, I40E_UDPIP_DUMMY_PACKET_LEN);
-
-	ip = (struct iphdr *)(fd_data->raw_packet + IP_HEADER_OFFSET);
-	udp = (struct udphdr *)(fd_data->raw_packet + IP_HEADER_OFFSET
-	      + sizeof(struct iphdr));
+	struct hlist_node *node2;
+	int err = -EINVAL;
 
-	ip->saddr = fsp->h_u.tcp_ip4_spec.ip4src;
-	ip->daddr = fsp->h_u.tcp_ip4_spec.ip4dst;
-	udp->source = fsp->h_u.tcp_ip4_spec.psrc;
-	udp->dest = fsp->h_u.tcp_ip4_spec.pdst;
+	parent = NULL;
+	rule = NULL;
 
-	for (i = I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP;
-	     i <= I40E_FILTER_PCTYPE_NONF_IPV4_UDP; i++) {
-		fd_data->pctype = i;
-		ret = i40e_program_fdir_filter(fd_data, pf, add);
-
-		if (ret) {
-			dev_info(&pf->pdev->dev,
-				 "Filter command send failed for PCTYPE %d (ret = %d)\n",
-				 fd_data->pctype, ret);
-			err = true;
-		} else {
-			dev_info(&pf->pdev->dev,
-				 "Filter OK for PCTYPE %d (ret = %d)\n",
-				 fd_data->pctype, ret);
-		}
+	hlist_for_each_entry_safe(rule, node2,
+				  &pf->fdir_filter_list, fdir_node) {
+		/* hash found, or no matching entry */
+		if (rule->fd_id >= sw_idx)
+			break;
+		parent = rule;
 	}
 
-	return err ? -EOPNOTSUPP : 0;
-}
-
-#define I40E_TCPIP_DUMMY_PACKET_LEN 54
-/**
- * i40e_add_del_fdir_tcpv4 - Add/Remove TCPv4 Flow Director filters for
- * a specific flow spec
- * @vsi: pointer to the targeted VSI
- * @fd_data: the flow director data required from the FDir descriptor
- * @ethtool_rx_flow_spec: the flow spec
- * @add: true adds a filter, false removes it
- *
- * Returns 0 if the filters were successfully added or removed
- **/
-static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
-				   struct i40e_fdir_data *fd_data,
-				   struct ethtool_rx_flow_spec *fsp, bool add)
-{
-	struct i40e_pf *pf = vsi->back;
-	struct tcphdr *tcp;
-	struct iphdr *ip;
-	bool err = false;
-	int ret;
-	/* Dummy packet */
-	char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
-			 0x45, 0, 0, 0x28, 0, 0, 0x40, 0, 0x40, 0x6,
-			 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-			 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-			 0x80, 0x11, 0x0, 0x72, 0, 0, 0, 0};
-
-	memcpy(fd_data->raw_packet, packet, I40E_TCPIP_DUMMY_PACKET_LEN);
-
-	ip = (struct iphdr *)(fd_data->raw_packet + IP_HEADER_OFFSET);
-	tcp = (struct tcphdr *)(fd_data->raw_packet + IP_HEADER_OFFSET
-	      + sizeof(struct iphdr));
-
-	ip->daddr = fsp->h_u.tcp_ip4_spec.ip4dst;
-	tcp->dest = fsp->h_u.tcp_ip4_spec.pdst;
-	ip->saddr = fsp->h_u.tcp_ip4_spec.ip4src;
-	tcp->source = fsp->h_u.tcp_ip4_spec.psrc;
-
-	if (add) {
-		if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) {
-			dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
-			pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
+	/* if there is an old rule occupying our place remove it */
+	if (rule && (rule->fd_id == sw_idx)) {
+		if (!input || (rule->fd_id != input->fd_id)) {
+			cmd->fs.flow_type = rule->flow_type;
+			err = i40e_add_del_fdir_ethtool(vsi, cmd, false);
 		}
+
+		hlist_del(&rule->fdir_node);
+		kfree(rule);
+		pf->fdir_pf_active_filters--;
 	}
 
-	fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN;
-	ret = i40e_program_fdir_filter(fd_data, pf, add);
+	/* If no input this was a delete, err should be 0 if a rule was
+	 * successfully found and removed from the list else -EINVAL
+	 */
+	if (!input)
+		return err;
 
-	if (ret) {
-		dev_info(&pf->pdev->dev,
-			 "Filter command send failed for PCTYPE %d (ret = %d)\n",
-			 fd_data->pctype, ret);
-		err = true;
-	} else {
-		dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d (ret = %d)\n",
-			 fd_data->pctype, ret);
-	}
+	/* initialize node and set software index */
+	INIT_HLIST_NODE(&input->fdir_node);
 
-	fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
+	/* add filter to the list */
+	if (parent)
+		hlist_add_after(&parent->fdir_node, &input->fdir_node);
+	else
+		hlist_add_head(&input->fdir_node,
+			       &pf->fdir_filter_list);
 
-	ret = i40e_program_fdir_filter(fd_data, pf, add);
-	if (ret) {
-		dev_info(&pf->pdev->dev,
-			 "Filter command send failed for PCTYPE %d (ret = %d)\n",
-			 fd_data->pctype, ret);
-		err = true;
-	} else {
-		dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d (ret = %d)\n",
-			  fd_data->pctype, ret);
-	}
+	/* update counts */
+	pf->fdir_pf_active_filters++;
 
-	return err ? -EOPNOTSUPP : 0;
+	return 0;
 }
 
 /**
- * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for
- * a specific flow spec
- * @vsi: pointer to the targeted VSI
- * @fd_data: the flow director data required from the FDir descriptor
- * @ethtool_rx_flow_spec: the flow spec
- * @add: true adds a filter, false removes it
+ * i40e_del_fdir_entry - Deletes a Flow Director filter entry
+ * @vsi: Pointer to the targeted VSI
+ * @cmd: The command to get or set Rx flow classification rules
  *
- * Returns 0 if the filters were successfully added or removed
- **/
-static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi,
-				    struct i40e_fdir_data *fd_data,
-				    struct ethtool_rx_flow_spec *fsp, bool add)
-{
-	return -EOPNOTSUPP;
-}
-
-#define I40E_IP_DUMMY_PACKET_LEN 34
-/**
- * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for
- * a specific flow spec
- * @vsi: pointer to the targeted VSI
- * @fd_data: the flow director data required for the FDir descriptor
- * @fsp: the ethtool flow spec
- * @add: true adds a filter, false removes it
+ * The function removes a Flow Director filter entry from the
+ * hlist of the corresponding PF
  *
- * Returns 0 if the filters were successfully added or removed
- **/
-static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi,
-				  struct i40e_fdir_data *fd_data,
-				  struct ethtool_rx_flow_spec *fsp, bool add)
+ * Returns 0 on success
+ */
+static int i40e_del_fdir_entry(struct i40e_vsi *vsi,
+			       struct ethtool_rxnfc *cmd)
 {
+	struct ethtool_rx_flow_spec *fsp =
+		(struct ethtool_rx_flow_spec *)&cmd->fs;
 	struct i40e_pf *pf = vsi->back;
-	struct iphdr *ip;
-	bool err = false;
-	int ret;
-	int i;
-	char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
-			 0x45, 0, 0, 0x14, 0, 0, 0x40, 0, 0x40, 0x10,
-			 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+	int ret = 0;
 
-	memcpy(fd_data->raw_packet, packet, I40E_IP_DUMMY_PACKET_LEN);
-	ip = (struct iphdr *)(fd_data->raw_packet + IP_HEADER_OFFSET);
-
-	ip->saddr = fsp->h_u.usr_ip4_spec.ip4src;
-	ip->daddr = fsp->h_u.usr_ip4_spec.ip4dst;
-	ip->protocol = fsp->h_u.usr_ip4_spec.proto;
-
-	for (i = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
-	     i <= I40E_FILTER_PCTYPE_FRAG_IPV4;	i++) {
-		fd_data->pctype = i;
-		ret = i40e_program_fdir_filter(fd_data, pf, add);
-
-		if (ret) {
-			dev_info(&pf->pdev->dev,
-				 "Filter command send failed for PCTYPE %d (ret = %d)\n",
-				 fd_data->pctype, ret);
-			err = true;
-		} else {
-			dev_info(&pf->pdev->dev,
-				 "Filter OK for PCTYPE %d (ret = %d)\n",
-				 fd_data->pctype, ret);
-		}
-	}
+	ret = i40e_update_ethtool_fdir_entry(vsi, NULL, fsp->location, cmd);
 
-	return err ? -EOPNOTSUPP : 0;
+	return ret;
 }
 
 /**
- * i40e_add_del_fdir_ethtool - Add/Remove Flow Director filters for
- * a specific flow spec based on their protocol
+ * i40e_add_del_fdir_ethtool - Add/Remove Flow Director filters
  * @vsi: pointer to the targeted VSI
  * @cmd: command to get or set RX flow classification rules
  * @add: true adds a filter, false removes it
  *
- * Returns 0 if the filters were successfully added or removed
+ * Add/Remove Flow Director filters for a specific flow spec based on their
+ * protocol.  Returns 0 if the filters were successfully added or removed.
  **/
 static int i40e_add_del_fdir_ethtool(struct i40e_vsi *vsi,
-			struct ethtool_rxnfc *cmd, bool add)
+				     struct ethtool_rxnfc *cmd, bool add)
 {
-	struct i40e_fdir_data fd_data;
-	int ret = -EINVAL;
+	struct ethtool_rx_flow_spec *fsp;
+	struct i40e_fdir_filter *input;
 	struct i40e_pf *pf;
-	struct ethtool_rx_flow_spec *fsp =
-		(struct ethtool_rx_flow_spec *)&cmd->fs;
+	int ret = -EINVAL;
 
 	if (!vsi)
 		return -EINVAL;
 
+	fsp = (struct ethtool_rx_flow_spec *)&cmd->fs;
 	pf = vsi->back;
 
-	if ((fsp->ring_cookie != RX_CLS_FLOW_DISC) &&
-	    (fsp->ring_cookie >= vsi->num_queue_pairs))
+	if (fsp->location >= (pf->hw.func_caps.fd_filters_best_effort +
+			      pf->hw.func_caps.fd_filters_guaranteed)) {
 		return -EINVAL;
+	}
 
-	/* Populate the Flow Director that we have at the moment
-	 * and allocate the raw packet buffer for the calling functions
-	 */
-	fd_data.raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_LOOKUP,
-				     GFP_KERNEL);
+	if ((fsp->ring_cookie >= vsi->num_queue_pairs) && add)
+		return -EINVAL;
 
-	if (!fd_data.raw_packet) {
-		dev_info(&pf->pdev->dev, "Could not allocate memory\n");
-		return -ENOMEM;
-	}
+	input = kzalloc(sizeof(*input), GFP_KERNEL);
 
-	fd_data.q_index = fsp->ring_cookie;
-	fd_data.flex_off = 0;
-	fd_data.pctype = 0;
-	fd_data.dest_vsi = vsi->id;
-	fd_data.dest_ctl = I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX;
-	fd_data.fd_status = I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID;
-	fd_data.cnt_index = 0;
-	fd_data.fd_id = 0;
+	if (!input)
+		return -ENOMEM;
 
-	switch (fsp->flow_type & ~FLOW_EXT) {
-	case TCP_V4_FLOW:
-		ret = i40e_add_del_fdir_tcpv4(vsi, &fd_data, fsp, add);
-		break;
-	case UDP_V4_FLOW:
-		ret = i40e_add_del_fdir_udpv4(vsi, &fd_data, fsp, add);
-		break;
-	case SCTP_V4_FLOW:
-		ret = i40e_add_del_fdir_sctpv4(vsi, &fd_data, fsp, add);
-		break;
-	case IPV4_FLOW:
-		ret = i40e_add_del_fdir_ipv4(vsi, &fd_data, fsp, add);
-		break;
-	case IP_USER_FLOW:
-		switch (fsp->h_u.usr_ip4_spec.proto) {
-		case IPPROTO_TCP:
-			ret = i40e_add_del_fdir_tcpv4(vsi, &fd_data, fsp, add);
-			break;
-		case IPPROTO_UDP:
-			ret = i40e_add_del_fdir_udpv4(vsi, &fd_data, fsp, add);
-			break;
-		case IPPROTO_SCTP:
-			ret = i40e_add_del_fdir_sctpv4(vsi, &fd_data, fsp, add);
-			break;
-		default:
-			ret = i40e_add_del_fdir_ipv4(vsi, &fd_data, fsp, add);
-			break;
-		}
-		break;
-	default:
-		dev_info(&pf->pdev->dev, "Could not specify spec type\n");
-		ret = -EINVAL;
+	input->fd_id = fsp->location;
+
+	input->q_index = fsp->ring_cookie;
+	input->flex_off = 0;
+	input->pctype = 0;
+	input->dest_vsi = vsi->id;
+	input->dest_ctl = I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX;
+	input->fd_status = I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID;
+	input->cnt_index = 0;
+	input->flow_type = fsp->flow_type;
+	input->ip4_proto = fsp->h_u.usr_ip4_spec.proto;
+	input->src_port = fsp->h_u.tcp_ip4_spec.psrc;
+	input->dst_port = fsp->h_u.tcp_ip4_spec.pdst;
+	input->src_ip[0] = fsp->h_u.tcp_ip4_spec.ip4src;
+	input->dst_ip[0] = fsp->h_u.tcp_ip4_spec.ip4dst;
+
+	ret = i40e_add_del_fdir(vsi, input, add);
+	if (ret) {
+		kfree(input);
+		return ret;
 	}
 
-	kfree(fd_data.raw_packet);
-	fd_data.raw_packet = NULL;
+	if (!ret && add)
+		i40e_update_ethtool_fdir_entry(vsi, input, fsp->location, NULL);
+	else
+		kfree(input);
 
 	return ret;
 }
@@ -1583,7 +1534,7 @@ static int i40e_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd)
 		ret = i40e_add_del_fdir_ethtool(vsi, cmd, true);
 		break;
 	case ETHTOOL_SRXCLSRLDEL:
-		ret = i40e_add_del_fdir_ethtool(vsi, cmd, false);
+		ret = i40e_del_fdir_entry(vsi, cmd);
 		break;
 	default:
 		break;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 53f3ed2df796..43d391bb65c4 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -38,7 +38,7 @@ static const char i40e_driver_string[] =
 
 #define DRV_VERSION_MAJOR 0
 #define DRV_VERSION_MINOR 3
-#define DRV_VERSION_BUILD 32
+#define DRV_VERSION_BUILD 34
 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
 	     __stringify(DRV_VERSION_MINOR) "." \
 	     __stringify(DRV_VERSION_BUILD)    DRV_KERN
@@ -1965,11 +1965,14 @@ static int i40e_vlan_rx_add_vid(struct net_device *netdev,
 
 	netdev_info(netdev, "adding %pM vid=%d\n", netdev->dev_addr, vid);
 
-	/* If the network stack called us with vid = 0, we should
-	 * indicate to i40e_vsi_add_vlan() that we want to receive
-	 * any traffic (i.e. with any vlan tag, or untagged)
+	/* If the network stack called us with vid = 0 then
+	 * it is asking to receive priority tagged packets with
+	 * vlan id 0.  Our HW receives them by default when configured
+	 * to receive untagged packets so there is no need to add an
+	 * extra filter for vlan 0 tagged packets.
 	 */
-	ret = i40e_vsi_add_vlan(vsi, vid ? vid : I40E_VLAN_ANY);
+	if (vid)
+		ret = i40e_vsi_add_vlan(vsi, vid);
 
 	if (!ret && (vid < VLAN_N_VID))
 		set_bit(vid, vsi->active_vlans);
@@ -2421,6 +2424,25 @@ static void i40e_set_vsi_rx_mode(struct i40e_vsi *vsi)
 }
 
 /**
+ * i40e_fdir_filter_restore - Restore the Sideband Flow Director filters
+ * @vsi: Pointer to the targeted VSI
+ *
+ * This function replays the hlist on the hw where all the SB Flow Director
+ * filters were saved.
+ **/
+static void i40e_fdir_filter_restore(struct i40e_vsi *vsi)
+{
+	struct i40e_fdir_filter *filter;
+	struct i40e_pf *pf = vsi->back;
+	struct hlist_node *node;
+
+	hlist_for_each_entry_safe(filter, node,
+				  &pf->fdir_filter_list, fdir_node) {
+		i40e_add_del_fdir(vsi, filter, true);
+	}
+}
+
+/**
  * i40e_vsi_configure - Set up the VSI for action
  * @vsi: the VSI being configured
  **/
@@ -2431,6 +2453,8 @@ static int i40e_vsi_configure(struct i40e_vsi *vsi)
 	i40e_set_vsi_rx_mode(vsi);
 	i40e_restore_vlan(vsi);
 	i40e_vsi_config_dcb_rings(vsi);
+	if (vsi->type == I40E_VSI_FDIR)
+		i40e_fdir_filter_restore(vsi);
 	err = i40e_vsi_configure_tx(vsi);
 	if (!err)
 		err = i40e_vsi_configure_rx(vsi);
@@ -4268,6 +4292,26 @@ err_setup_tx:
 }
 
 /**
+ * i40e_fdir_filter_exit - Cleans up the Flow Director accounting
+ * @pf: Pointer to pf
+ *
+ * This function destroys the hlist where all the Flow Director
+ * filters were saved.
+ **/
+static void i40e_fdir_filter_exit(struct i40e_pf *pf)
+{
+	struct i40e_fdir_filter *filter;
+	struct hlist_node *node2;
+
+	hlist_for_each_entry_safe(filter, node2,
+				  &pf->fdir_filter_list, fdir_node) {
+		hlist_del(&filter->fdir_node);
+		kfree(filter);
+	}
+	pf->fdir_pf_active_filters = 0;
+}
+
+/**
  * i40e_close - Disables a network interface
  * @netdev: network interface device structure
  *
@@ -5052,6 +5096,12 @@ static int i40e_get_capabilities(struct i40e_pf *pf)
 	/* increment MSI-X count because current FW skips one */
 	pf->hw.func_caps.num_msix_vectors++;
 
+	if (((pf->hw.aq.fw_maj_ver == 2) && (pf->hw.aq.fw_min_ver < 22)) ||
+	    (pf->hw.aq.fw_maj_ver < 2)) {
+		pf->hw.func_caps.num_msix_vectors++;
+		pf->hw.func_caps.num_msix_vectors_vf++;
+	}
+
 	if (pf->hw.debug_mask & I40E_DEBUG_USER)
 		dev_info(&pf->pdev->dev,
 			 "pf=%d, num_vfs=%d, msix_pf=%d, msix_vf=%d, fd_g=%d, fd_b=%d, pf_max_q=%d num_vsi=%d\n",
@@ -5131,9 +5181,9 @@ static void i40e_fdir_sb_setup(struct i40e_pf *pf)
 		err = i40e_up_complete(vsi);
 		if (err)
 			goto err_up_complete;
+		clear_bit(__I40E_NEEDS_RESTART, &vsi->state);
 	}
 
-	clear_bit(__I40E_NEEDS_RESTART, &vsi->state);
 	return;
 
 err_up_complete:
@@ -5156,6 +5206,7 @@ static void i40e_fdir_teardown(struct i40e_pf *pf)
 {
 	int i;
 
+	i40e_fdir_filter_exit(pf);
 	for (i = 0; i < pf->hw.func_caps.num_vsis; i++) {
 		if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR) {
 			i40e_vsi_release(pf->vsi[i]);
@@ -7930,13 +7981,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	err = i40e_init_adminq(hw);
 	dev_info(&pdev->dev, "%s\n", i40e_fw_version_str(hw));
-	if (((hw->nvm.version & I40E_NVM_VERSION_HI_MASK)
-		 >> I40E_NVM_VERSION_HI_SHIFT) != I40E_CURRENT_NVM_VERSION_HI) {
-		dev_info(&pdev->dev,
-			 "warning: NVM version not supported, supported version: %02x.%02x\n",
-			 I40E_CURRENT_NVM_VERSION_HI,
-			 I40E_CURRENT_NVM_VERSION_LO);
-	}
 	if (err) {
 		dev_info(&pdev->dev,
 			 "init_adminq failed: %d expecting API %02x.%02x\n",
diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
index 73f95b081927..262bdf11d221 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
@@ -27,14 +27,14 @@
 #include "i40e_prototype.h"
 
 /**
- *  i40e_init_nvm_ops - Initialize NVM function pointers.
- *  @hw: pointer to the HW structure.
+ * i40e_init_nvm_ops - Initialize NVM function pointers
+ * @hw: pointer to the HW structure
  *
- *  Setups the function pointers and the NVM info structure. Should be called
- *  once per NVM initialization, e.g. inside the i40e_init_shared_code().
- *  Please notice that the NVM term is used here (& in all methods covered
- *  in this file) as an equivalent of the FLASH part mapped into the SR.
- *  We are accessing FLASH always thru the Shadow RAM.
+ * Setup the function pointers and the NVM info structure. Should be called
+ * once per NVM initialization, e.g. inside the i40e_init_shared_code().
+ * Please notice that the NVM term is used here (& in all methods covered
+ * in this file) as an equivalent of the FLASH part mapped into the SR.
+ * We are accessing FLASH always thru the Shadow RAM.
  **/
 i40e_status i40e_init_nvm(struct i40e_hw *hw)
 {
@@ -49,16 +49,16 @@ i40e_status i40e_init_nvm(struct i40e_hw *hw)
 	gens = rd32(hw, I40E_GLNVM_GENS);
 	sr_size = ((gens & I40E_GLNVM_GENS_SR_SIZE_MASK) >>
 			   I40E_GLNVM_GENS_SR_SIZE_SHIFT);
-	/* Switching to words (sr_size contains power of 2KB). */
+	/* Switching to words (sr_size contains power of 2KB) */
 	nvm->sr_size = (1 << sr_size) * I40E_SR_WORDS_IN_1KB;
 
-	/* Check if we are in the normal or blank NVM programming mode. */
+	/* Check if we are in the normal or blank NVM programming mode */
 	fla = rd32(hw, I40E_GLNVM_FLA);
-	if (fla & I40E_GLNVM_FLA_LOCKED_MASK) { /* Normal programming mode. */
-		/* Max NVM timeout. */
+	if (fla & I40E_GLNVM_FLA_LOCKED_MASK) { /* Normal programming mode */
+		/* Max NVM timeout */
 		nvm->timeout = I40E_MAX_NVM_TIMEOUT;
 		nvm->blank_nvm_mode = false;
-	} else { /* Blank programming mode. */
+	} else { /* Blank programming mode */
 		nvm->blank_nvm_mode = true;
 		ret_code = I40E_ERR_NVM_BLANK_MODE;
 		hw_dbg(hw, "NVM init error: unsupported blank mode.\n");
@@ -68,12 +68,12 @@ i40e_status i40e_init_nvm(struct i40e_hw *hw)
 }
 
 /**
- *  i40e_acquire_nvm - Generic request for acquiring the NVM ownership.
- *  @hw: pointer to the HW structure.
- *  @access: NVM access type (read or write).
+ * i40e_acquire_nvm - Generic request for acquiring the NVM ownership
+ * @hw: pointer to the HW structure
+ * @access: NVM access type (read or write)
  *
- *  This function will request NVM ownership for reading
- *  via the proper Admin Command.
+ * This function will request NVM ownership for reading
+ * via the proper Admin Command.
  **/
 i40e_status i40e_acquire_nvm(struct i40e_hw *hw,
 				       enum i40e_aq_resource_access_type access)
@@ -87,20 +87,20 @@ i40e_status i40e_acquire_nvm(struct i40e_hw *hw,
 
 	ret_code = i40e_aq_request_resource(hw, I40E_NVM_RESOURCE_ID, access,
 					    0, &time, NULL);
-	/* Reading the Global Device Timer. */
+	/* Reading the Global Device Timer */
 	gtime = rd32(hw, I40E_GLVFGEN_TIMER);
 
-	/* Store the timeout. */
+	/* Store the timeout */
 	hw->nvm.hw_semaphore_timeout = I40E_MS_TO_GTIME(time) + gtime;
 
 	if (ret_code) {
-		/* Set the polling timeout. */
+		/* Set the polling timeout */
 		if (time > I40E_MAX_NVM_TIMEOUT)
 			timeout = I40E_MS_TO_GTIME(I40E_MAX_NVM_TIMEOUT)
 				  + gtime;
 		else
 			timeout = hw->nvm.hw_semaphore_timeout;
-		/* Poll until the current NVM owner timeouts. */
+		/* Poll until the current NVM owner timeouts */
 		while (gtime < timeout) {
 			usleep_range(10000, 20000);
 			ret_code = i40e_aq_request_resource(hw,
@@ -128,10 +128,10 @@ i40e_i40e_acquire_nvm_exit:
 }
 
 /**
- *  i40e_release_nvm - Generic request for releasing the NVM ownership.
- *  @hw: pointer to the HW structure.
+ * i40e_release_nvm - Generic request for releasing the NVM ownership
+ * @hw: pointer to the HW structure
  *
- *  This function will release NVM resource via the proper Admin Command.
+ * This function will release NVM resource via the proper Admin Command.
  **/
 void i40e_release_nvm(struct i40e_hw *hw)
 {
@@ -140,17 +140,17 @@ void i40e_release_nvm(struct i40e_hw *hw)
 }
 
 /**
- *  i40e_poll_sr_srctl_done_bit - Polls the GLNVM_SRCTL done bit.
- *  @hw: pointer to the HW structure.
+ * i40e_poll_sr_srctl_done_bit - Polls the GLNVM_SRCTL done bit
+ * @hw: pointer to the HW structure
  *
- *  Polls the SRCTL Shadow RAM register done bit.
+ * Polls the SRCTL Shadow RAM register done bit.
  **/
 static i40e_status i40e_poll_sr_srctl_done_bit(struct i40e_hw *hw)
 {
 	i40e_status ret_code = I40E_ERR_TIMEOUT;
 	u32 srctl, wait_cnt;
 
-	/* Poll the I40E_GLNVM_SRCTL until the done bit is set. */
+	/* Poll the I40E_GLNVM_SRCTL until the done bit is set */
 	for (wait_cnt = 0; wait_cnt < I40E_SRRD_SRCTL_ATTEMPTS; wait_cnt++) {
 		srctl = rd32(hw, I40E_GLNVM_SRCTL);
 		if (srctl & I40E_GLNVM_SRCTL_DONE_MASK) {
@@ -165,12 +165,12 @@ static i40e_status i40e_poll_sr_srctl_done_bit(struct i40e_hw *hw)
 }
 
 /**
- *  i40e_read_nvm_word - Reads Shadow RAM
- *  @hw: pointer to the HW structure.
- *  @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF).
- *  @data: word read from the Shadow RAM.
+ * i40e_read_nvm_word - Reads Shadow RAM
+ * @hw: pointer to the HW structure
+ * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF)
+ * @data: word read from the Shadow RAM
  *
- *  Reads 16 bit word from the Shadow RAM using the GLNVM_SRCTL register.
+ * Reads one 16 bit word from the Shadow RAM using the GLNVM_SRCTL register.
  **/
 i40e_status i40e_read_nvm_word(struct i40e_hw *hw, u16 offset,
 					 u16 *data)
@@ -184,15 +184,15 @@ i40e_status i40e_read_nvm_word(struct i40e_hw *hw, u16 offset,
 		goto read_nvm_exit;
 	}
 
-	/* Poll the done bit first. */
+	/* Poll the done bit first */
 	ret_code = i40e_poll_sr_srctl_done_bit(hw);
 	if (!ret_code) {
-		/* Write the address and start reading. */
+		/* Write the address and start reading */
 		sr_reg = (u32)(offset << I40E_GLNVM_SRCTL_ADDR_SHIFT) |
 			 (1 << I40E_GLNVM_SRCTL_START_SHIFT);
 		wr32(hw, I40E_GLNVM_SRCTL, sr_reg);
 
-		/* Poll I40E_GLNVM_SRCTL until the done bit is set. */
+		/* Poll I40E_GLNVM_SRCTL until the done bit is set */
 		ret_code = i40e_poll_sr_srctl_done_bit(hw);
 		if (!ret_code) {
 			sr_reg = rd32(hw, I40E_GLNVM_SRDATA);
@@ -210,16 +210,15 @@ read_nvm_exit:
 }
 
 /**
- *  i40e_read_nvm_buffer - Reads Shadow RAM buffer.
- *  @hw: pointer to the HW structure.
- *  @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF).
- *  @words: number of words to read (in) &
- *          number of words read before the NVM ownership timeout (out).
- *  @data: words read from the Shadow RAM.
+ * i40e_read_nvm_buffer - Reads Shadow RAM buffer
+ * @hw: pointer to the HW structure
+ * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF).
+ * @words: (in) number of words to read; (out) number of words actually read
+ * @data: words read from the Shadow RAM
  *
- *  Reads 16 bit words (data buffer) from the SR using the i40e_read_nvm_srrd()
- *  method. The buffer read is preceded by the NVM ownership take
- *  and followed by the release.
+ * Reads 16 bit words (data buffer) from the SR using the i40e_read_nvm_srrd()
+ * method. The buffer read is preceded by the NVM ownership take
+ * and followed by the release.
  **/
 i40e_status i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset,
 					   u16 *words, u16 *data)
@@ -227,7 +226,7 @@ i40e_status i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset,
 	i40e_status ret_code = 0;
 	u16 index, word;
 
-	/* Loop thru the selected region. */
+	/* Loop thru the selected region */
 	for (word = 0; word < *words; word++) {
 		index = offset + word;
 		ret_code = i40e_read_nvm_word(hw, index, &data[word]);
@@ -235,21 +234,21 @@ i40e_status i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset,
 			break;
 	}
 
-	/* Update the number of words read from the Shadow RAM. */
+	/* Update the number of words read from the Shadow RAM */
 	*words = word;
 
 	return ret_code;
 }
 
 /**
- *  i40e_calc_nvm_checksum - Calculates and returns the checksum
- *  @hw: pointer to hardware structure
- *  @checksum: pointer to the checksum
+ * i40e_calc_nvm_checksum - Calculates and returns the checksum
+ * @hw: pointer to hardware structure
+ * @checksum: pointer to the checksum
  *
- *  This function calculate SW Checksum that covers the whole 64kB shadow RAM
- *  except the VPD and PCIe ALT Auto-load modules. The structure and size of VPD
- *  is customer specific and unknown. Therefore, this function skips all maximum
- *  possible size of VPD (1kB).
+ * This function calculates SW Checksum that covers the whole 64kB shadow RAM
+ * except the VPD and PCIe ALT Auto-load modules. The structure and size of VPD
+ * is customer specific and unknown. Therefore, this function skips all maximum
+ * possible size of VPD (1kB).
  **/
 static i40e_status i40e_calc_nvm_checksum(struct i40e_hw *hw,
 						    u16 *checksum)
@@ -311,12 +310,12 @@ i40e_calc_nvm_checksum_exit:
 }
 
 /**
- *  i40e_validate_nvm_checksum - Validate EEPROM checksum
- *  @hw: pointer to hardware structure
- *  @checksum: calculated checksum
+ * i40e_validate_nvm_checksum - Validate EEPROM checksum
+ * @hw: pointer to hardware structure
+ * @checksum: calculated checksum
  *
- *  Performs checksum calculation and validates the NVM SW checksum. If the
- *  caller does not need checksum, the value can be NULL.
+ * Performs checksum calculation and validates the NVM SW checksum. If the
+ * caller does not need checksum, the value can be NULL.
  **/
 i40e_status i40e_validate_nvm_checksum(struct i40e_hw *hw,
 						 u16 *checksum)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index ed91f93ede2b..9cd57e617959 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -231,6 +231,13 @@ i40e_status i40e_validate_nvm_checksum(struct i40e_hw *hw,
 						 u16 *checksum);
 void i40e_set_pci_config_data(struct i40e_hw *hw, u16 link_status);
 
+extern struct i40e_rx_ptype_decoded i40e_ptype_lookup[];
+
+static inline struct i40e_rx_ptype_decoded decode_rx_desc_ptype(u8 ptype)
+{
+	return i40e_ptype_lookup[ptype];
+}
+
 /* prototype for functions used for SW locks */
 
 /* i40e_common for VF drivers*/
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 19af4ce0a4fe..2081bdb214e5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -25,6 +25,7 @@
  ******************************************************************************/
 
 #include "i40e.h"
+#include "i40e_prototype.h"
 
 static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
 				u32 td_tag)
@@ -39,11 +40,12 @@ static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
 #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
 /**
  * i40e_program_fdir_filter - Program a Flow Director filter
- * @fdir_input: Packet data that will be filter parameters
+ * @fdir_data: Packet data that will be filter parameters
+ * @raw_packet: the pre-allocated packet buffer for FDir
  * @pf: The pf pointer
  * @add: True for add/update, False for remove
  **/
-int i40e_program_fdir_filter(struct i40e_fdir_data *fdir_data,
+int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet,
 			     struct i40e_pf *pf, bool add)
 {
 	struct i40e_filter_program_desc *fdir_desc;
@@ -68,8 +70,8 @@ int i40e_program_fdir_filter(struct i40e_fdir_data *fdir_data,
 	tx_ring = vsi->tx_rings[0];
 	dev = tx_ring->dev;
 
-	dma = dma_map_single(dev, fdir_data->raw_packet,
-			     I40E_FDIR_MAX_RAW_PACKET_LOOKUP, DMA_TO_DEVICE);
+	dma = dma_map_single(dev, raw_packet,
+			     I40E_FDIR_MAX_RAW_PACKET_SIZE, DMA_TO_DEVICE);
 	if (dma_mapping_error(dev, dma))
 		goto dma_fail;
 
@@ -132,14 +134,14 @@ int i40e_program_fdir_filter(struct i40e_fdir_data *fdir_data,
 	tx_ring->next_to_use = (i + 1 < tx_ring->count) ? i + 1 : 0;
 
 	/* record length, and DMA address */
-	dma_unmap_len_set(tx_buf, len, I40E_FDIR_MAX_RAW_PACKET_LOOKUP);
+	dma_unmap_len_set(tx_buf, len, I40E_FDIR_MAX_RAW_PACKET_SIZE);
 	dma_unmap_addr_set(tx_buf, dma, dma);
 
 	tx_desc->buffer_addr = cpu_to_le64(dma);
 	td_cmd = I40E_TXD_CMD | I40E_TX_DESC_CMD_DUMMY;
 
 	tx_desc->cmd_type_offset_bsz =
-		build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_LOOKUP, 0);
+		build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_SIZE, 0);
 
 	/* set the timestamp */
 	tx_buf->time_stamp = jiffies;
@@ -161,6 +163,270 @@ dma_fail:
 	return -1;
 }
 
+#define IP_HEADER_OFFSET 14
+#define I40E_UDPIP_DUMMY_PACKET_LEN 42
+/**
+ * i40e_add_del_fdir_udpv4 - Add/Remove UDPv4 filters
+ * @vsi: pointer to the targeted VSI
+ * @fd_data: the flow director data required for the FDir descriptor
+ * @raw_packet: the pre-allocated packet buffer for FDir
+ * @add: true adds a filter, false removes it
+ *
+ * Returns 0 if the filters were successfully added or removed
+ **/
+static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi,
+				   struct i40e_fdir_filter *fd_data,
+				   u8 *raw_packet, bool add)
+{
+	struct i40e_pf *pf = vsi->back;
+	struct udphdr *udp;
+	struct iphdr *ip;
+	bool err = false;
+	int ret;
+	int i;
+	static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
+		0x45, 0, 0, 0x1c, 0, 0, 0x40, 0, 0x40, 0x11, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+	memcpy(raw_packet, packet, I40E_UDPIP_DUMMY_PACKET_LEN);
+
+	ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
+	udp = (struct udphdr *)(raw_packet + IP_HEADER_OFFSET
+	      + sizeof(struct iphdr));
+
+	ip->daddr = fd_data->dst_ip[0];
+	udp->dest = fd_data->dst_port;
+	ip->saddr = fd_data->src_ip[0];
+	udp->source = fd_data->src_port;
+
+	for (i = I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP;
+	     i <= I40E_FILTER_PCTYPE_NONF_IPV4_UDP; i++) {
+		fd_data->pctype = i;
+		ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
+
+		if (ret) {
+			dev_info(&pf->pdev->dev,
+				 "Filter command send failed for PCTYPE %d (ret = %d)\n",
+				 fd_data->pctype, ret);
+			err = true;
+		} else {
+			dev_info(&pf->pdev->dev,
+				 "Filter OK for PCTYPE %d (ret = %d)\n",
+				 fd_data->pctype, ret);
+		}
+	}
+
+	return err ? -EOPNOTSUPP : 0;
+}
+
+#define I40E_TCPIP_DUMMY_PACKET_LEN 54
+/**
+ * i40e_add_del_fdir_tcpv4 - Add/Remove TCPv4 filters
+ * @vsi: pointer to the targeted VSI
+ * @fd_data: the flow director data required for the FDir descriptor
+ * @raw_packet: the pre-allocated packet buffer for FDir
+ * @add: true adds a filter, false removes it
+ *
+ * Returns 0 if the filters were successfully added or removed
+ **/
+static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
+				   struct i40e_fdir_filter *fd_data,
+				   u8 *raw_packet, bool add)
+{
+	struct i40e_pf *pf = vsi->back;
+	struct tcphdr *tcp;
+	struct iphdr *ip;
+	bool err = false;
+	int ret;
+	/* Dummy packet */
+	static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
+		0x45, 0, 0, 0x28, 0, 0, 0x40, 0, 0x40, 0x6, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x80, 0x11,
+		0x0, 0x72, 0, 0, 0, 0};
+
+	memcpy(raw_packet, packet, I40E_TCPIP_DUMMY_PACKET_LEN);
+
+	ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
+	tcp = (struct tcphdr *)(raw_packet + IP_HEADER_OFFSET
+	      + sizeof(struct iphdr));
+
+	ip->daddr = fd_data->dst_ip[0];
+	tcp->dest = fd_data->dst_port;
+	ip->saddr = fd_data->src_ip[0];
+	tcp->source = fd_data->src_port;
+
+	if (add) {
+		if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) {
+			dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
+			pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
+		}
+	}
+
+	fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN;
+	ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
+
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "Filter command send failed for PCTYPE %d (ret = %d)\n",
+			 fd_data->pctype, ret);
+		err = true;
+	} else {
+		dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d (ret = %d)\n",
+			 fd_data->pctype, ret);
+	}
+
+	fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
+
+	ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
+	if (ret) {
+		dev_info(&pf->pdev->dev,
+			 "Filter command send failed for PCTYPE %d (ret = %d)\n",
+			 fd_data->pctype, ret);
+		err = true;
+	} else {
+		dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d (ret = %d)\n",
+			  fd_data->pctype, ret);
+	}
+
+	return err ? -EOPNOTSUPP : 0;
+}
+
+/**
+ * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for
+ * a specific flow spec
+ * @vsi: pointer to the targeted VSI
+ * @fd_data: the flow director data required for the FDir descriptor
+ * @raw_packet: the pre-allocated packet buffer for FDir
+ * @add: true adds a filter, false removes it
+ *
+ * Returns 0 if the filters were successfully added or removed
+ **/
+static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi,
+				    struct i40e_fdir_filter *fd_data,
+				    u8 *raw_packet, bool add)
+{
+	return -EOPNOTSUPP;
+}
+
+#define I40E_IP_DUMMY_PACKET_LEN 34
+/**
+ * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for
+ * a specific flow spec
+ * @vsi: pointer to the targeted VSI
+ * @fd_data: the flow director data required for the FDir descriptor
+ * @raw_packet: the pre-allocated packet buffer for FDir
+ * @add: true adds a filter, false removes it
+ *
+ * Returns 0 if the filters were successfully added or removed
+ **/
+static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi,
+				  struct i40e_fdir_filter *fd_data,
+				  u8 *raw_packet, bool add)
+{
+	struct i40e_pf *pf = vsi->back;
+	struct iphdr *ip;
+	bool err = false;
+	int ret;
+	int i;
+	static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
+		0x45, 0, 0, 0x14, 0, 0, 0x40, 0, 0x40, 0x10, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0};
+
+	memcpy(raw_packet, packet, I40E_IP_DUMMY_PACKET_LEN);
+	ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
+
+	ip->saddr = fd_data->src_ip[0];
+	ip->daddr = fd_data->dst_ip[0];
+	ip->protocol = 0;
+
+	for (i = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
+	     i <= I40E_FILTER_PCTYPE_FRAG_IPV4;	i++) {
+		fd_data->pctype = i;
+		ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
+
+		if (ret) {
+			dev_info(&pf->pdev->dev,
+				 "Filter command send failed for PCTYPE %d (ret = %d)\n",
+				 fd_data->pctype, ret);
+			err = true;
+		} else {
+			dev_info(&pf->pdev->dev,
+				 "Filter OK for PCTYPE %d (ret = %d)\n",
+				 fd_data->pctype, ret);
+		}
+	}
+
+	return err ? -EOPNOTSUPP : 0;
+}
+
+/**
+ * i40e_add_del_fdir - Build raw packets to add/del fdir filter
+ * @vsi: pointer to the targeted VSI
+ * @cmd: command to get or set RX flow classification rules
+ * @add: true adds a filter, false removes it
+ *
+ **/
+int i40e_add_del_fdir(struct i40e_vsi *vsi,
+		      struct i40e_fdir_filter *input, bool add)
+{
+	struct i40e_pf *pf = vsi->back;
+	u8 *raw_packet;
+	int ret;
+
+	/* Populate the Flow Director that we have at the moment
+	 * and allocate the raw packet buffer for the calling functions
+	 */
+	raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
+	if (!raw_packet)
+		return -ENOMEM;
+
+	switch (input->flow_type & ~FLOW_EXT) {
+	case TCP_V4_FLOW:
+		ret = i40e_add_del_fdir_tcpv4(vsi, input, raw_packet,
+					      add);
+		break;
+	case UDP_V4_FLOW:
+		ret = i40e_add_del_fdir_udpv4(vsi, input, raw_packet,
+					      add);
+		break;
+	case SCTP_V4_FLOW:
+		ret = i40e_add_del_fdir_sctpv4(vsi, input, raw_packet,
+					       add);
+		break;
+	case IPV4_FLOW:
+		ret = i40e_add_del_fdir_ipv4(vsi, input, raw_packet,
+					     add);
+		break;
+	case IP_USER_FLOW:
+		switch (input->ip4_proto) {
+		case IPPROTO_TCP:
+			ret = i40e_add_del_fdir_tcpv4(vsi, input,
+						      raw_packet, add);
+			break;
+		case IPPROTO_UDP:
+			ret = i40e_add_del_fdir_udpv4(vsi, input,
+						      raw_packet, add);
+			break;
+		case IPPROTO_SCTP:
+			ret = i40e_add_del_fdir_sctpv4(vsi, input,
+						       raw_packet, add);
+			break;
+		default:
+			ret = i40e_add_del_fdir_ipv4(vsi, input,
+						     raw_packet, add);
+			break;
+		}
+		break;
+	default:
+		dev_info(&pf->pdev->dev, "Could not specify spec type %d",
+			 input->flow_type);
+		ret = -EINVAL;
+	}
+
+	kfree(raw_packet);
+	return ret;
+}
+
 /**
  * i40e_fd_handle_status - check the Programming Status for FD
  * @rx_ring: the Rx ring for this descriptor
@@ -956,6 +1222,29 @@ static inline u32 i40e_rx_hash(struct i40e_ring *ring,
 }
 
 /**
+ * i40e_ptype_to_hash - get a hash type
+ * @ptype: the ptype value from the descriptor
+ *
+ * Returns a hash type to be used by skb_set_hash
+ **/
+static inline enum pkt_hash_types i40e_ptype_to_hash(u8 ptype)
+{
+	struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype);
+
+	if (!decoded.known)
+		return PKT_HASH_TYPE_NONE;
+
+	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
+	    decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4)
+		return PKT_HASH_TYPE_L4;
+	else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
+		 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3)
+		return PKT_HASH_TYPE_L3;
+	else
+		return PKT_HASH_TYPE_L2;
+}
+
+/**
  * i40e_clean_rx_irq - Reclaim resources after receive completes
  * @rx_ring:  rx ring to clean
  * @budget:   how many cleans we're allowed
@@ -972,8 +1261,8 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 	u16 i = rx_ring->next_to_clean;
 	union i40e_rx_desc *rx_desc;
 	u32 rx_error, rx_status;
+	u8 rx_ptype;
 	u64 qword;
-	u16 rx_ptype;
 
 	rx_desc = I40E_RX_DESC(rx_ring, i);
 	qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
@@ -1087,7 +1376,8 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 			goto next_desc;
 		}
 
-		skb->rxhash = i40e_rx_hash(rx_ring, rx_desc);
+		skb_set_hash(skb, i40e_rx_hash(rx_ring, rx_desc),
+			     i40e_ptype_to_hash(rx_ptype));
 		if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) {
 			i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status &
 					   I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
@@ -1812,7 +2102,7 @@ static int i40e_xmit_descriptor_count(struct sk_buff *skb,
 
 	/* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
 	 *       + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
-	 *       + 2 desc gap to keep tail from touching head,
+	 *       + 4 desc gap to avoid the cache line where head is,
 	 *       + 1 desc for context descriptor,
 	 * otherwise try next time
 	 */
@@ -1823,7 +2113,7 @@ static int i40e_xmit_descriptor_count(struct sk_buff *skb,
 	count += skb_shinfo(skb)->nr_frags;
 #endif
 	count += TXD_USE_COUNT(skb_headlen(skb));
-	if (i40e_maybe_stop_tx(tx_ring, count + 3)) {
+	if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) {
 		tx_ring->tx_stats.tx_busy++;
 		return 0;
 	}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 189e250198dd..42cc6ba88005 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -858,7 +858,7 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs)
 		}
 	}
 	/* allocate memory */
-	vfs = kzalloc(num_alloc_vfs * sizeof(struct i40e_vf), GFP_KERNEL);
+	vfs = kcalloc(num_alloc_vfs, sizeof(struct i40e_vf), GFP_KERNEL);
 	if (!vfs) {
 		ret = -ENOMEM;
 		goto err_alloc;
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c b/drivers/net/ethernet/intel/i40evf/i40e_common.c
index 7b13953b28c4..78618af271cf 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c
@@ -160,6 +160,372 @@ i40e_status i40evf_aq_queue_shutdown(struct i40e_hw *hw,
 }
 
 
+/* The i40e_ptype_lookup table is used to convert from the 8-bit ptype in the
+ * hardware to a bit-field that can be used by SW to more easily determine the
+ * packet type.
+ *
+ * Macros are used to shorten the table lines and make this table human
+ * readable.
+ *
+ * We store the PTYPE in the top byte of the bit field - this is just so that
+ * we can check that the table doesn't have a row missing, as the index into
+ * the table should be the PTYPE.
+ *
+ * Typical work flow:
+ *
+ * IF NOT i40e_ptype_lookup[ptype].known
+ * THEN
+ *      Packet is unknown
+ * ELSE IF i40e_ptype_lookup[ptype].outer_ip == I40E_RX_PTYPE_OUTER_IP
+ *      Use the rest of the fields to look at the tunnels, inner protocols, etc
+ * ELSE
+ *      Use the enum i40e_rx_l2_ptype to decode the packet type
+ * ENDIF
+ */
+
+/* macro to make the table lines short */
+#define I40E_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\
+	{	PTYPE, \
+		1, \
+		I40E_RX_PTYPE_OUTER_##OUTER_IP, \
+		I40E_RX_PTYPE_OUTER_##OUTER_IP_VER, \
+		I40E_RX_PTYPE_##OUTER_FRAG, \
+		I40E_RX_PTYPE_TUNNEL_##T, \
+		I40E_RX_PTYPE_TUNNEL_END_##TE, \
+		I40E_RX_PTYPE_##TEF, \
+		I40E_RX_PTYPE_INNER_PROT_##I, \
+		I40E_RX_PTYPE_PAYLOAD_LAYER_##PL }
+
+#define I40E_PTT_UNUSED_ENTRY(PTYPE) \
+		{ PTYPE, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+
+/* shorter macros makes the table fit but are terse */
+#define I40E_RX_PTYPE_NOF		I40E_RX_PTYPE_NOT_FRAG
+#define I40E_RX_PTYPE_FRG		I40E_RX_PTYPE_FRAG
+#define I40E_RX_PTYPE_INNER_PROT_TS	I40E_RX_PTYPE_INNER_PROT_TIMESYNC
+
+/* Lookup table mapping the HW PTYPE to the bit field for decoding */
+struct i40e_rx_ptype_decoded i40e_ptype_lookup[] = {
+	/* L2 Packet types */
+	I40E_PTT_UNUSED_ENTRY(0),
+	I40E_PTT(1,  L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+	I40E_PTT(2,  L2, NONE, NOF, NONE, NONE, NOF, TS,   PAY2),
+	I40E_PTT(3,  L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+	I40E_PTT_UNUSED_ENTRY(4),
+	I40E_PTT_UNUSED_ENTRY(5),
+	I40E_PTT(6,  L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+	I40E_PTT(7,  L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+	I40E_PTT_UNUSED_ENTRY(8),
+	I40E_PTT_UNUSED_ENTRY(9),
+	I40E_PTT(10, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+	I40E_PTT(11, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE),
+	I40E_PTT(12, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(13, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(14, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(15, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(16, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(17, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(18, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(19, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(20, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(21, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+
+	/* Non Tunneled IPv4 */
+	I40E_PTT(22, IP, IPV4, FRG, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(23, IP, IPV4, NOF, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(24, IP, IPV4, NOF, NONE, NONE, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(25),
+	I40E_PTT(26, IP, IPV4, NOF, NONE, NONE, NOF, TCP,  PAY4),
+	I40E_PTT(27, IP, IPV4, NOF, NONE, NONE, NOF, SCTP, PAY4),
+	I40E_PTT(28, IP, IPV4, NOF, NONE, NONE, NOF, ICMP, PAY4),
+
+	/* IPv4 --> IPv4 */
+	I40E_PTT(29, IP, IPV4, NOF, IP_IP, IPV4, FRG, NONE, PAY3),
+	I40E_PTT(30, IP, IPV4, NOF, IP_IP, IPV4, NOF, NONE, PAY3),
+	I40E_PTT(31, IP, IPV4, NOF, IP_IP, IPV4, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(32),
+	I40E_PTT(33, IP, IPV4, NOF, IP_IP, IPV4, NOF, TCP,  PAY4),
+	I40E_PTT(34, IP, IPV4, NOF, IP_IP, IPV4, NOF, SCTP, PAY4),
+	I40E_PTT(35, IP, IPV4, NOF, IP_IP, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv4 --> IPv6 */
+	I40E_PTT(36, IP, IPV4, NOF, IP_IP, IPV6, FRG, NONE, PAY3),
+	I40E_PTT(37, IP, IPV4, NOF, IP_IP, IPV6, NOF, NONE, PAY3),
+	I40E_PTT(38, IP, IPV4, NOF, IP_IP, IPV6, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(39),
+	I40E_PTT(40, IP, IPV4, NOF, IP_IP, IPV6, NOF, TCP,  PAY4),
+	I40E_PTT(41, IP, IPV4, NOF, IP_IP, IPV6, NOF, SCTP, PAY4),
+	I40E_PTT(42, IP, IPV4, NOF, IP_IP, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT */
+	I40E_PTT(43, IP, IPV4, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3),
+
+	/* IPv4 --> GRE/NAT --> IPv4 */
+	I40E_PTT(44, IP, IPV4, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3),
+	I40E_PTT(45, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3),
+	I40E_PTT(46, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(47),
+	I40E_PTT(48, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, TCP,  PAY4),
+	I40E_PTT(49, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4),
+	I40E_PTT(50, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT --> IPv6 */
+	I40E_PTT(51, IP, IPV4, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3),
+	I40E_PTT(52, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3),
+	I40E_PTT(53, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(54),
+	I40E_PTT(55, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, TCP,  PAY4),
+	I40E_PTT(56, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4),
+	I40E_PTT(57, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT --> MAC */
+	I40E_PTT(58, IP, IPV4, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3),
+
+	/* IPv4 --> GRE/NAT --> MAC --> IPv4 */
+	I40E_PTT(59, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3),
+	I40E_PTT(60, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3),
+	I40E_PTT(61, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(62),
+	I40E_PTT(63, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP,  PAY4),
+	I40E_PTT(64, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4),
+	I40E_PTT(65, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT -> MAC --> IPv6 */
+	I40E_PTT(66, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3),
+	I40E_PTT(67, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3),
+	I40E_PTT(68, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(69),
+	I40E_PTT(70, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP,  PAY4),
+	I40E_PTT(71, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4),
+	I40E_PTT(72, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv4 --> GRE/NAT --> MAC/VLAN */
+	I40E_PTT(73, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3),
+
+	/* IPv4 ---> GRE/NAT -> MAC/VLAN --> IPv4 */
+	I40E_PTT(74, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3),
+	I40E_PTT(75, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3),
+	I40E_PTT(76, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(77),
+	I40E_PTT(78, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP,  PAY4),
+	I40E_PTT(79, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4),
+	I40E_PTT(80, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv4 -> GRE/NAT -> MAC/VLAN --> IPv6 */
+	I40E_PTT(81, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3),
+	I40E_PTT(82, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3),
+	I40E_PTT(83, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(84),
+	I40E_PTT(85, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP,  PAY4),
+	I40E_PTT(86, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4),
+	I40E_PTT(87, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
+
+	/* Non Tunneled IPv6 */
+	I40E_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3),
+	I40E_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP,  PAY3),
+	I40E_PTT_UNUSED_ENTRY(91),
+	I40E_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP,  PAY4),
+	I40E_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4),
+	I40E_PTT(94, IP, IPV6, NOF, NONE, NONE, NOF, ICMP, PAY4),
+
+	/* IPv6 --> IPv4 */
+	I40E_PTT(95,  IP, IPV6, NOF, IP_IP, IPV4, FRG, NONE, PAY3),
+	I40E_PTT(96,  IP, IPV6, NOF, IP_IP, IPV4, NOF, NONE, PAY3),
+	I40E_PTT(97,  IP, IPV6, NOF, IP_IP, IPV4, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(98),
+	I40E_PTT(99,  IP, IPV6, NOF, IP_IP, IPV4, NOF, TCP,  PAY4),
+	I40E_PTT(100, IP, IPV6, NOF, IP_IP, IPV4, NOF, SCTP, PAY4),
+	I40E_PTT(101, IP, IPV6, NOF, IP_IP, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv6 --> IPv6 */
+	I40E_PTT(102, IP, IPV6, NOF, IP_IP, IPV6, FRG, NONE, PAY3),
+	I40E_PTT(103, IP, IPV6, NOF, IP_IP, IPV6, NOF, NONE, PAY3),
+	I40E_PTT(104, IP, IPV6, NOF, IP_IP, IPV6, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(105),
+	I40E_PTT(106, IP, IPV6, NOF, IP_IP, IPV6, NOF, TCP,  PAY4),
+	I40E_PTT(107, IP, IPV6, NOF, IP_IP, IPV6, NOF, SCTP, PAY4),
+	I40E_PTT(108, IP, IPV6, NOF, IP_IP, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT */
+	I40E_PTT(109, IP, IPV6, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3),
+
+	/* IPv6 --> GRE/NAT -> IPv4 */
+	I40E_PTT(110, IP, IPV6, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3),
+	I40E_PTT(111, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3),
+	I40E_PTT(112, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(113),
+	I40E_PTT(114, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, TCP,  PAY4),
+	I40E_PTT(115, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4),
+	I40E_PTT(116, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> IPv6 */
+	I40E_PTT(117, IP, IPV6, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3),
+	I40E_PTT(118, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3),
+	I40E_PTT(119, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(120),
+	I40E_PTT(121, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, TCP,  PAY4),
+	I40E_PTT(122, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4),
+	I40E_PTT(123, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> MAC */
+	I40E_PTT(124, IP, IPV6, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3),
+
+	/* IPv6 --> GRE/NAT -> MAC -> IPv4 */
+	I40E_PTT(125, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3),
+	I40E_PTT(126, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3),
+	I40E_PTT(127, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(128),
+	I40E_PTT(129, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP,  PAY4),
+	I40E_PTT(130, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4),
+	I40E_PTT(131, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> MAC -> IPv6 */
+	I40E_PTT(132, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3),
+	I40E_PTT(133, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3),
+	I40E_PTT(134, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(135),
+	I40E_PTT(136, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP,  PAY4),
+	I40E_PTT(137, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4),
+	I40E_PTT(138, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> MAC/VLAN */
+	I40E_PTT(139, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3),
+
+	/* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv4 */
+	I40E_PTT(140, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3),
+	I40E_PTT(141, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3),
+	I40E_PTT(142, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(143),
+	I40E_PTT(144, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP,  PAY4),
+	I40E_PTT(145, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4),
+	I40E_PTT(146, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4),
+
+	/* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv6 */
+	I40E_PTT(147, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3),
+	I40E_PTT(148, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3),
+	I40E_PTT(149, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP,  PAY4),
+	I40E_PTT_UNUSED_ENTRY(150),
+	I40E_PTT(151, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP,  PAY4),
+	I40E_PTT(152, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4),
+	I40E_PTT(153, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
+
+	/* unused entries */
+	I40E_PTT_UNUSED_ENTRY(154),
+	I40E_PTT_UNUSED_ENTRY(155),
+	I40E_PTT_UNUSED_ENTRY(156),
+	I40E_PTT_UNUSED_ENTRY(157),
+	I40E_PTT_UNUSED_ENTRY(158),
+	I40E_PTT_UNUSED_ENTRY(159),
+
+	I40E_PTT_UNUSED_ENTRY(160),
+	I40E_PTT_UNUSED_ENTRY(161),
+	I40E_PTT_UNUSED_ENTRY(162),
+	I40E_PTT_UNUSED_ENTRY(163),
+	I40E_PTT_UNUSED_ENTRY(164),
+	I40E_PTT_UNUSED_ENTRY(165),
+	I40E_PTT_UNUSED_ENTRY(166),
+	I40E_PTT_UNUSED_ENTRY(167),
+	I40E_PTT_UNUSED_ENTRY(168),
+	I40E_PTT_UNUSED_ENTRY(169),
+
+	I40E_PTT_UNUSED_ENTRY(170),
+	I40E_PTT_UNUSED_ENTRY(171),
+	I40E_PTT_UNUSED_ENTRY(172),
+	I40E_PTT_UNUSED_ENTRY(173),
+	I40E_PTT_UNUSED_ENTRY(174),
+	I40E_PTT_UNUSED_ENTRY(175),
+	I40E_PTT_UNUSED_ENTRY(176),
+	I40E_PTT_UNUSED_ENTRY(177),
+	I40E_PTT_UNUSED_ENTRY(178),
+	I40E_PTT_UNUSED_ENTRY(179),
+
+	I40E_PTT_UNUSED_ENTRY(180),
+	I40E_PTT_UNUSED_ENTRY(181),
+	I40E_PTT_UNUSED_ENTRY(182),
+	I40E_PTT_UNUSED_ENTRY(183),
+	I40E_PTT_UNUSED_ENTRY(184),
+	I40E_PTT_UNUSED_ENTRY(185),
+	I40E_PTT_UNUSED_ENTRY(186),
+	I40E_PTT_UNUSED_ENTRY(187),
+	I40E_PTT_UNUSED_ENTRY(188),
+	I40E_PTT_UNUSED_ENTRY(189),
+
+	I40E_PTT_UNUSED_ENTRY(190),
+	I40E_PTT_UNUSED_ENTRY(191),
+	I40E_PTT_UNUSED_ENTRY(192),
+	I40E_PTT_UNUSED_ENTRY(193),
+	I40E_PTT_UNUSED_ENTRY(194),
+	I40E_PTT_UNUSED_ENTRY(195),
+	I40E_PTT_UNUSED_ENTRY(196),
+	I40E_PTT_UNUSED_ENTRY(197),
+	I40E_PTT_UNUSED_ENTRY(198),
+	I40E_PTT_UNUSED_ENTRY(199),
+
+	I40E_PTT_UNUSED_ENTRY(200),
+	I40E_PTT_UNUSED_ENTRY(201),
+	I40E_PTT_UNUSED_ENTRY(202),
+	I40E_PTT_UNUSED_ENTRY(203),
+	I40E_PTT_UNUSED_ENTRY(204),
+	I40E_PTT_UNUSED_ENTRY(205),
+	I40E_PTT_UNUSED_ENTRY(206),
+	I40E_PTT_UNUSED_ENTRY(207),
+	I40E_PTT_UNUSED_ENTRY(208),
+	I40E_PTT_UNUSED_ENTRY(209),
+
+	I40E_PTT_UNUSED_ENTRY(210),
+	I40E_PTT_UNUSED_ENTRY(211),
+	I40E_PTT_UNUSED_ENTRY(212),
+	I40E_PTT_UNUSED_ENTRY(213),
+	I40E_PTT_UNUSED_ENTRY(214),
+	I40E_PTT_UNUSED_ENTRY(215),
+	I40E_PTT_UNUSED_ENTRY(216),
+	I40E_PTT_UNUSED_ENTRY(217),
+	I40E_PTT_UNUSED_ENTRY(218),
+	I40E_PTT_UNUSED_ENTRY(219),
+
+	I40E_PTT_UNUSED_ENTRY(220),
+	I40E_PTT_UNUSED_ENTRY(221),
+	I40E_PTT_UNUSED_ENTRY(222),
+	I40E_PTT_UNUSED_ENTRY(223),
+	I40E_PTT_UNUSED_ENTRY(224),
+	I40E_PTT_UNUSED_ENTRY(225),
+	I40E_PTT_UNUSED_ENTRY(226),
+	I40E_PTT_UNUSED_ENTRY(227),
+	I40E_PTT_UNUSED_ENTRY(228),
+	I40E_PTT_UNUSED_ENTRY(229),
+
+	I40E_PTT_UNUSED_ENTRY(230),
+	I40E_PTT_UNUSED_ENTRY(231),
+	I40E_PTT_UNUSED_ENTRY(232),
+	I40E_PTT_UNUSED_ENTRY(233),
+	I40E_PTT_UNUSED_ENTRY(234),
+	I40E_PTT_UNUSED_ENTRY(235),
+	I40E_PTT_UNUSED_ENTRY(236),
+	I40E_PTT_UNUSED_ENTRY(237),
+	I40E_PTT_UNUSED_ENTRY(238),
+	I40E_PTT_UNUSED_ENTRY(239),
+
+	I40E_PTT_UNUSED_ENTRY(240),
+	I40E_PTT_UNUSED_ENTRY(241),
+	I40E_PTT_UNUSED_ENTRY(242),
+	I40E_PTT_UNUSED_ENTRY(243),
+	I40E_PTT_UNUSED_ENTRY(244),
+	I40E_PTT_UNUSED_ENTRY(245),
+	I40E_PTT_UNUSED_ENTRY(246),
+	I40E_PTT_UNUSED_ENTRY(247),
+	I40E_PTT_UNUSED_ENTRY(248),
+	I40E_PTT_UNUSED_ENTRY(249),
+
+	I40E_PTT_UNUSED_ENTRY(250),
+	I40E_PTT_UNUSED_ENTRY(251),
+	I40E_PTT_UNUSED_ENTRY(252),
+	I40E_PTT_UNUSED_ENTRY(253),
+	I40E_PTT_UNUSED_ENTRY(254),
+	I40E_PTT_UNUSED_ENTRY(255)
+};
+
+
 /**
  * i40e_aq_send_msg_to_pf
  * @hw: pointer to the hardware structure
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
index 7841573a58c9..33c99051cc96 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
@@ -63,6 +63,13 @@ i40e_status i40evf_aq_queue_shutdown(struct i40e_hw *hw,
 
 i40e_status i40e_set_mac_type(struct i40e_hw *hw);
 
+extern struct i40e_rx_ptype_decoded i40e_ptype_lookup[];
+
+static inline struct i40e_rx_ptype_decoded decode_rx_desc_ptype(u8 ptype)
+{
+	return  i40e_ptype_lookup[ptype];
+}
+
 /* prototype for functions used for SW locks */
 
 /* i40e_common for VF drivers*/
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index 827bb5fa4af9..b1d87c6a5c35 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -24,6 +24,7 @@
 #include <linux/prefetch.h>
 
 #include "i40evf.h"
+#include "i40e_prototype.h"
 
 static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
 				u32 td_tag)
@@ -786,6 +787,29 @@ static inline u32 i40e_rx_hash(struct i40e_ring *ring,
 }
 
 /**
+ * i40e_ptype_to_hash - get a hash type
+ * @ptype: the ptype value from the descriptor
+ *
+ * Returns a hash type to be used by skb_set_hash
+ **/
+static inline enum pkt_hash_types i40e_ptype_to_hash(u8 ptype)
+{
+	struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype);
+
+	if (!decoded.known)
+		return PKT_HASH_TYPE_NONE;
+
+	if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
+	    decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4)
+		return PKT_HASH_TYPE_L4;
+	else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
+		 decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3)
+		return PKT_HASH_TYPE_L3;
+	else
+		return PKT_HASH_TYPE_L2;
+}
+
+/**
  * i40e_clean_rx_irq - Reclaim resources after receive completes
  * @rx_ring:  rx ring to clean
  * @budget:   how many cleans we're allowed
@@ -802,8 +826,8 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 	u16 i = rx_ring->next_to_clean;
 	union i40e_rx_desc *rx_desc;
 	u32 rx_error, rx_status;
+	u8 rx_ptype;
 	u64 qword;
-	u16 rx_ptype;
 
 	rx_desc = I40E_RX_DESC(rx_ring, i);
 	qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
@@ -912,7 +936,8 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 			goto next_desc;
 		}
 
-		skb->rxhash = i40e_rx_hash(rx_ring, rx_desc);
+		skb_set_hash(skb, i40e_rx_hash(rx_ring, rx_desc),
+			     i40e_ptype_to_hash(rx_ptype));
 		/* probably a little skewed due to removing CRC */
 		total_rx_bytes += skb->len;
 		total_rx_packets++;
@@ -1457,7 +1482,7 @@ static int i40e_xmit_descriptor_count(struct sk_buff *skb,
 
 	/* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
 	 *       + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
-	 *       + 2 desc gap to keep tail from touching head,
+	 *       + 4 desc gap to avoid the cache line where head is,
 	 *       + 1 desc for context descriptor,
 	 * otherwise try next time
 	 */
@@ -1468,7 +1493,7 @@ static int i40e_xmit_descriptor_count(struct sk_buff *skb,
 	count += skb_shinfo(skb)->nr_frags;
 #endif
 	count += TXD_USE_COUNT(skb_headlen(skb));
-	if (i40e_maybe_stop_tx(tx_ring, count + 3)) {
+	if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) {
 		tx_ring->tx_stats.tx_busy++;
 		return 0;
 	}
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index b2c03bca7929..11d0b61510b0 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -31,7 +31,7 @@ char i40evf_driver_name[] = "i40evf";
 static const char i40evf_driver_string[] =
 	"Intel(R) XL710 X710 Virtual Function Network Driver";
 
-#define DRV_VERSION "0.9.13"
+#define DRV_VERSION "0.9.14"
 const char i40evf_driver_version[] = DRV_VERSION;
 static const char i40evf_copyright[] =
 	"Copyright (c) 2013 - 2014 Intel Corporation.";
@@ -2036,6 +2036,7 @@ static void i40evf_init_task(struct work_struct *work)
 			    NETIF_F_IPV6_CSUM |
 			    NETIF_F_TSO |
 			    NETIF_F_TSO6 |
+			    NETIF_F_RXCSUM |
 			    NETIF_F_GRO;
 
 	if (adapter->vf_res->vf_offload_flags
@@ -2046,6 +2047,10 @@ static void i40evf_init_task(struct work_struct *work)
 				    NETIF_F_HW_VLAN_CTAG_FILTER;
 	}
 
+	/* copy netdev features into list of user selectable features */
+	netdev->hw_features |= netdev->features;
+	netdev->hw_features &= ~NETIF_F_RXCSUM;
+
 	if (!is_valid_ether_addr(adapter->hw.mac.addr)) {
 		dev_info(&pdev->dev, "Invalid MAC address %pMAC, using random\n",
 			 adapter->hw.mac.addr);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c
index 3454437fcd95..0c59d4fe7e3a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c
@@ -332,7 +332,7 @@ static struct mlx4_interface mlx4_en_interface = {
 	.protocol	= MLX4_PROT_ETH,
 };
 
-void mlx4_en_verify_params(void)
+static void mlx4_en_verify_params(void)
 {
 	if (pfctx > MAX_PFC_TX) {
 		pr_warn("mlx4_en: WARNING: illegal module parameter pfctx 0x%x - should be in range 0-0x%x, will be changed to default (0)\n",
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index b8eee365e15d..c7ef30dee1b9 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -21,9 +21,10 @@
 #include <linux/list.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
+#include <net/ip6_checksum.h>
 
 /* Version Information */
-#define DRIVER_VERSION "v1.05.0 (2014/02/18)"
+#define DRIVER_VERSION "v1.06.0 (2014/03/03)"
 #define DRIVER_AUTHOR "Realtek linux nic maintainers <[email protected]>"
 #define DRIVER_DESC "Realtek RTL8152/RTL8153 Based USB Ethernet Adapters"
 #define MODULENAME "r8152"
@@ -447,6 +448,7 @@ enum rtl8152_flags {
 	RTL8152_LINK_CHG,
 	SELECTIVE_SUSPEND,
 	PHY_RESET,
+	SCHEDULE_TASKLET,
 };
 
 /* Define these values to match your device */
@@ -466,8 +468,18 @@ enum rtl8152_flags {
 struct rx_desc {
 	__le32 opts1;
 #define RX_LEN_MASK			0x7fff
+
 	__le32 opts2;
+#define RD_UDP_CS			(1 << 23)
+#define RD_TCP_CS			(1 << 22)
+#define RD_IPV6_CS			(1 << 20)
+#define RD_IPV4_CS			(1 << 19)
+
 	__le32 opts3;
+#define IPF				(1 << 23) /* IP checksum fail */
+#define UDPF				(1 << 22) /* UDP checksum fail */
+#define TCPF				(1 << 21) /* TCP checksum fail */
+
 	__le32 opts4;
 	__le32 opts5;
 	__le32 opts6;
@@ -477,13 +489,21 @@ struct tx_desc {
 	__le32 opts1;
 #define TX_FS			(1 << 31) /* First segment of a packet */
 #define TX_LS			(1 << 30) /* Final segment of a packet */
-#define TX_LEN_MASK		0x3ffff
+#define GTSENDV4		(1 << 28)
+#define GTSENDV6		(1 << 27)
+#define GTTCPHO_SHIFT		18
+#define GTTCPHO_MAX		0x7fU
+#define TX_LEN_MAX		0x3ffffU
 
 	__le32 opts2;
 #define UDP_CS			(1 << 31) /* Calculate UDP/IP checksum */
 #define TCP_CS			(1 << 30) /* Calculate TCP/IP checksum */
 #define IPV4_CS			(1 << 29) /* Calculate IPv4 checksum */
 #define IPV6_CS			(1 << 28) /* Calculate IPv6 checksum */
+#define MSS_SHIFT		17
+#define MSS_MAX			0x7ffU
+#define TCPHO_SHIFT		17
+#define TCPHO_MAX		0x7ffU
 };
 
 struct r8152;
@@ -550,12 +570,21 @@ enum rtl_version {
 	RTL_VER_MAX
 };
 
+enum tx_csum_stat {
+	TX_CSUM_SUCCESS = 0,
+	TX_CSUM_TSO,
+	TX_CSUM_NONE
+};
+
 /* Maximum number of multicast addresses to filter (vs. Rx-all-multicast).
  * The RTL chips use a 64 element hash table based on the Ethernet CRC.
  */
 static const int multicast_filter_limit = 32;
 static unsigned int rx_buf_sz = 16384;
 
+#define RTL_LIMITED_TSO_SIZE	(rx_buf_sz - sizeof(struct tx_desc) - \
+				 VLAN_ETH_HLEN - VLAN_HLEN)
+
 static
 int get_registers(struct r8152 *tp, u16 value, u16 index, u16 size, void *data)
 {
@@ -963,7 +992,6 @@ static int rtl8152_set_mac_address(struct net_device *netdev, void *p)
 static void read_bulk_callback(struct urb *urb)
 {
 	struct net_device *netdev;
-	unsigned long flags;
 	int status = urb->status;
 	struct rx_agg *agg;
 	struct r8152 *tp;
@@ -997,9 +1025,9 @@ static void read_bulk_callback(struct urb *urb)
 		if (urb->actual_length < ETH_ZLEN)
 			break;
 
-		spin_lock_irqsave(&tp->rx_lock, flags);
+		spin_lock(&tp->rx_lock);
 		list_add_tail(&agg->list, &tp->rx_done);
-		spin_unlock_irqrestore(&tp->rx_lock, flags);
+		spin_unlock(&tp->rx_lock);
 		tasklet_schedule(&tp->tl);
 		return;
 	case -ESHUTDOWN:
@@ -1022,9 +1050,9 @@ static void read_bulk_callback(struct urb *urb)
 	if (result == -ENODEV) {
 		netif_device_detach(tp->netdev);
 	} else if (result) {
-		spin_lock_irqsave(&tp->rx_lock, flags);
+		spin_lock(&tp->rx_lock);
 		list_add_tail(&agg->list, &tp->rx_done);
-		spin_unlock_irqrestore(&tp->rx_lock, flags);
+		spin_unlock(&tp->rx_lock);
 		tasklet_schedule(&tp->tl);
 	}
 }
@@ -1033,7 +1061,6 @@ static void write_bulk_callback(struct urb *urb)
 {
 	struct net_device_stats *stats;
 	struct net_device *netdev;
-	unsigned long flags;
 	struct tx_agg *agg;
 	struct r8152 *tp;
 	int status = urb->status;
@@ -1057,9 +1084,9 @@ static void write_bulk_callback(struct urb *urb)
 		stats->tx_bytes += agg->skb_len;
 	}
 
-	spin_lock_irqsave(&tp->tx_lock, flags);
+	spin_lock(&tp->tx_lock);
 	list_add_tail(&agg->list, &tp->tx_free);
-	spin_unlock_irqrestore(&tp->tx_lock, flags);
+	spin_unlock(&tp->tx_lock);
 
 	usb_autopm_put_interface_async(tp->intf);
 
@@ -1073,7 +1100,7 @@ static void write_bulk_callback(struct urb *urb)
 		return;
 
 	if (!skb_queue_empty(&tp->tx_queue))
-		schedule_delayed_work(&tp->schedule, 0);
+		tasklet_schedule(&tp->tl);
 }
 
 static void intr_callback(struct urb *urb)
@@ -1268,6 +1295,9 @@ static struct tx_agg *r8152_get_tx_agg(struct r8152 *tp)
 	struct tx_agg *agg = NULL;
 	unsigned long flags;
 
+	if (list_empty(&tp->tx_free))
+		return NULL;
+
 	spin_lock_irqsave(&tp->tx_lock, flags);
 	if (!list_empty(&tp->tx_free)) {
 		struct list_head *cursor;
@@ -1281,24 +1311,130 @@ static struct tx_agg *r8152_get_tx_agg(struct r8152 *tp)
 	return agg;
 }
 
-static void
-r8152_tx_csum(struct r8152 *tp, struct tx_desc *desc, struct sk_buff *skb)
+static inline __be16 get_protocol(struct sk_buff *skb)
 {
-	memset(desc, 0, sizeof(*desc));
+	__be16 protocol;
 
-	desc->opts1 = cpu_to_le32((skb->len & TX_LEN_MASK) | TX_FS | TX_LS);
+	if (skb->protocol == htons(ETH_P_8021Q))
+		protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
+	else
+		protocol = skb->protocol;
+
+	return protocol;
+}
+
+/*
+ * r8152_csum_workaround()
+ * The hw limites the value the transport offset. When the offset is out of the
+ * range, calculate the checksum by sw.
+ */
+static void r8152_csum_workaround(struct r8152 *tp, struct sk_buff *skb,
+				  struct sk_buff_head *list)
+{
+	if (skb_shinfo(skb)->gso_size) {
+		netdev_features_t features = tp->netdev->features;
+		struct sk_buff_head seg_list;
+		struct sk_buff *segs, *nskb;
+
+		features &= ~(NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO);
+		segs = skb_gso_segment(skb, features);
+		if (IS_ERR(segs) || !segs)
+			goto drop;
+
+		__skb_queue_head_init(&seg_list);
+
+		do {
+			nskb = segs;
+			segs = segs->next;
+			nskb->next = NULL;
+			__skb_queue_tail(&seg_list, nskb);
+		} while (segs);
+
+		skb_queue_splice(&seg_list, list);
+		dev_kfree_skb(skb);
+	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		if (skb_checksum_help(skb) < 0)
+			goto drop;
+
+		__skb_queue_head(list, skb);
+	} else {
+		struct net_device_stats *stats;
+
+drop:
+		stats = &tp->netdev->stats;
+		stats->tx_dropped++;
+		dev_kfree_skb(skb);
+	}
+}
+
+/*
+ * msdn_giant_send_check()
+ * According to the document of microsoft, the TCP Pseudo Header excludes the
+ * packet length for IPv6 TCP large packets.
+ */
+static int msdn_giant_send_check(struct sk_buff *skb)
+{
+	const struct ipv6hdr *ipv6h;
+	struct tcphdr *th;
+
+	ipv6h = ipv6_hdr(skb);
+	th = tcp_hdr(skb);
+
+	th->check = 0;
+	th->check = ~tcp_v6_check(0, &ipv6h->saddr, &ipv6h->daddr, 0);
+
+	return 0;
+}
+
+static int r8152_tx_csum(struct r8152 *tp, struct tx_desc *desc,
+			 struct sk_buff *skb, u32 len, u32 transport_offset)
+{
+	u32 mss = skb_shinfo(skb)->gso_size;
+	u32 opts1, opts2 = 0;
+	int ret = TX_CSUM_SUCCESS;
+
+	WARN_ON_ONCE(len > TX_LEN_MAX);
+
+	opts1 = len | TX_FS | TX_LS;
+
+	if (mss) {
+		if (transport_offset > GTTCPHO_MAX) {
+			netif_warn(tp, tx_err, tp->netdev,
+				   "Invalid transport offset 0x%x for TSO\n",
+				   transport_offset);
+			ret = TX_CSUM_TSO;
+			goto unavailable;
+		}
+
+		switch (get_protocol(skb)) {
+		case htons(ETH_P_IP):
+			opts1 |= GTSENDV4;
+			break;
+
+		case htons(ETH_P_IPV6):
+			opts1 |= GTSENDV6;
+			msdn_giant_send_check(skb);
+			break;
 
-	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		__be16 protocol;
+		default:
+			WARN_ON_ONCE(1);
+			break;
+		}
+
+		opts1 |= transport_offset << GTTCPHO_SHIFT;
+		opts2 |= min(mss, MSS_MAX) << MSS_SHIFT;
+	} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		u8 ip_protocol;
-		u32 opts2 = 0;
 
-		if (skb->protocol == htons(ETH_P_8021Q))
-			protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
-		else
-			protocol = skb->protocol;
+		if (transport_offset > TCPHO_MAX) {
+			netif_warn(tp, tx_err, tp->netdev,
+				   "Invalid transport offset 0x%x\n",
+				   transport_offset);
+			ret = TX_CSUM_NONE;
+			goto unavailable;
+		}
 
-		switch (protocol) {
+		switch (get_protocol(skb)) {
 		case htons(ETH_P_IP):
 			opts2 |= IPV4_CS;
 			ip_protocol = ip_hdr(skb)->protocol;
@@ -1314,30 +1450,33 @@ r8152_tx_csum(struct r8152 *tp, struct tx_desc *desc, struct sk_buff *skb)
 			break;
 		}
 
-		if (ip_protocol == IPPROTO_TCP) {
+		if (ip_protocol == IPPROTO_TCP)
 			opts2 |= TCP_CS;
-			opts2 |= (skb_transport_offset(skb) & 0x7fff) << 17;
-		} else if (ip_protocol == IPPROTO_UDP) {
+		else if (ip_protocol == IPPROTO_UDP)
 			opts2 |= UDP_CS;
-		} else {
+		else
 			WARN_ON_ONCE(1);
-		}
 
-		desc->opts2 = cpu_to_le32(opts2);
+		opts2 |= transport_offset << TCPHO_SHIFT;
 	}
+
+	desc->opts2 = cpu_to_le32(opts2);
+	desc->opts1 = cpu_to_le32(opts1);
+
+unavailable:
+	return ret;
 }
 
 static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg)
 {
 	struct sk_buff_head skb_head, *tx_queue = &tp->tx_queue;
-	unsigned long flags;
 	int remain, ret;
 	u8 *tx_data;
 
 	__skb_queue_head_init(&skb_head);
-	spin_lock_irqsave(&tx_queue->lock, flags);
+	spin_lock(&tx_queue->lock);
 	skb_queue_splice_init(tx_queue, &skb_head);
-	spin_unlock_irqrestore(&tx_queue->lock, flags);
+	spin_unlock(&tx_queue->lock);
 
 	tx_data = agg->head;
 	agg->skb_num = agg->skb_len = 0;
@@ -1347,47 +1486,65 @@ static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg)
 		struct tx_desc *tx_desc;
 		struct sk_buff *skb;
 		unsigned int len;
+		u32 offset;
 
 		skb = __skb_dequeue(&skb_head);
 		if (!skb)
 			break;
 
-		remain -= sizeof(*tx_desc);
-		len = skb->len;
-		if (remain < len) {
+		len = skb->len + sizeof(*tx_desc);
+
+		if (len > remain) {
 			__skb_queue_head(&skb_head, skb);
 			break;
 		}
 
 		tx_data = tx_agg_align(tx_data);
 		tx_desc = (struct tx_desc *)tx_data;
+
+		offset = (u32)skb_transport_offset(skb);
+
+		if (r8152_tx_csum(tp, tx_desc, skb, skb->len, offset)) {
+			r8152_csum_workaround(tp, skb, &skb_head);
+			continue;
+		}
+
 		tx_data += sizeof(*tx_desc);
 
-		r8152_tx_csum(tp, tx_desc, skb);
-		memcpy(tx_data, skb->data, len);
-		agg->skb_num++;
+		len = skb->len;
+		if (skb_copy_bits(skb, 0, tx_data, len) < 0) {
+			struct net_device_stats *stats = &tp->netdev->stats;
+
+			stats->tx_dropped++;
+			dev_kfree_skb_any(skb);
+			tx_data -= sizeof(*tx_desc);
+			continue;
+		}
+
+		tx_data += len;
 		agg->skb_len += len;
+		agg->skb_num++;
+
 		dev_kfree_skb_any(skb);
 
-		tx_data += len;
 		remain = rx_buf_sz - (int)(tx_agg_align(tx_data) - agg->head);
 	}
 
 	if (!skb_queue_empty(&skb_head)) {
-		spin_lock_irqsave(&tx_queue->lock, flags);
+		spin_lock(&tx_queue->lock);
 		skb_queue_splice(&skb_head, tx_queue);
-		spin_unlock_irqrestore(&tx_queue->lock, flags);
+		spin_unlock(&tx_queue->lock);
 	}
 
-	netif_tx_lock_bh(tp->netdev);
+	netif_tx_lock(tp->netdev);
 
 	if (netif_queue_stopped(tp->netdev) &&
 	    skb_queue_len(&tp->tx_queue) < tp->tx_qlen)
 		netif_wake_queue(tp->netdev);
 
-	netif_tx_unlock_bh(tp->netdev);
+	netif_tx_unlock(tp->netdev);
 
-	ret = usb_autopm_get_interface(tp->intf);
+	ret = usb_autopm_get_interface_async(tp->intf);
 	if (ret < 0)
 		goto out_tx_fill;
 
@@ -1395,14 +1552,45 @@ static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg)
 			  agg->head, (int)(tx_data - (u8 *)agg->head),
 			  (usb_complete_t)write_bulk_callback, agg);
 
-	ret = usb_submit_urb(agg->urb, GFP_KERNEL);
+	ret = usb_submit_urb(agg->urb, GFP_ATOMIC);
 	if (ret < 0)
-		usb_autopm_put_interface(tp->intf);
+		usb_autopm_put_interface_async(tp->intf);
 
 out_tx_fill:
 	return ret;
 }
 
+static u8 r8152_rx_csum(struct r8152 *tp, struct rx_desc *rx_desc)
+{
+	u8 checksum = CHECKSUM_NONE;
+	u32 opts2, opts3;
+
+	if (tp->version == RTL_VER_01)
+		goto return_result;
+
+	opts2 = le32_to_cpu(rx_desc->opts2);
+	opts3 = le32_to_cpu(rx_desc->opts3);
+
+	if (opts2 & RD_IPV4_CS) {
+		if (opts3 & IPF)
+			checksum = CHECKSUM_NONE;
+		else if ((opts2 & RD_UDP_CS) && (opts3 & UDPF))
+			checksum = CHECKSUM_NONE;
+		else if ((opts2 & RD_TCP_CS) && (opts3 & TCPF))
+			checksum = CHECKSUM_NONE;
+		else
+			checksum = CHECKSUM_UNNECESSARY;
+	} else if (RD_IPV6_CS) {
+		if ((opts2 & RD_UDP_CS) && !(opts3 & UDPF))
+			checksum = CHECKSUM_UNNECESSARY;
+		else if ((opts2 & RD_TCP_CS) && !(opts3 & TCPF))
+			checksum = CHECKSUM_UNNECESSARY;
+	}
+
+return_result:
+	return checksum;
+}
+
 static void rx_bottom(struct r8152 *tp)
 {
 	unsigned long flags;
@@ -1455,8 +1643,10 @@ static void rx_bottom(struct r8152 *tp)
 			skb = netdev_alloc_skb_ip_align(netdev, pkt_len);
 			if (!skb) {
 				stats->rx_dropped++;
-				break;
+				goto find_next_rx;
 			}
+
+			skb->ip_summed = r8152_rx_csum(tp, rx_desc);
 			memcpy(skb->data, rx_data, pkt_len);
 			skb_put(skb, pkt_len);
 			skb->protocol = eth_type_trans(skb, netdev);
@@ -1464,6 +1654,7 @@ static void rx_bottom(struct r8152 *tp)
 			stats->rx_packets++;
 			stats->rx_bytes += pkt_len;
 
+find_next_rx:
 			rx_data = rx_agg_align(rx_data + pkt_len + CRC_SIZE);
 			rx_desc = (struct rx_desc *)rx_data;
 			len_used = (int)(rx_data - (u8 *)agg->head);
@@ -1535,6 +1726,7 @@ static void bottom_half(unsigned long data)
 		return;
 
 	rx_bottom(tp);
+	tx_bottom(tp);
 }
 
 static
@@ -1551,16 +1743,15 @@ static void rtl_drop_queued_tx(struct r8152 *tp)
 {
 	struct net_device_stats *stats = &tp->netdev->stats;
 	struct sk_buff_head skb_head, *tx_queue = &tp->tx_queue;
-	unsigned long flags;
 	struct sk_buff *skb;
 
 	if (skb_queue_empty(tx_queue))
 		return;
 
 	__skb_queue_head_init(&skb_head);
-	spin_lock_irqsave(&tx_queue->lock, flags);
+	spin_lock_bh(&tx_queue->lock);
 	skb_queue_splice_init(tx_queue, &skb_head);
-	spin_unlock_irqrestore(&tx_queue->lock, flags);
+	spin_unlock_bh(&tx_queue->lock);
 
 	while ((skb = __skb_dequeue(&skb_head))) {
 		dev_kfree_skb(skb);
@@ -1631,7 +1822,7 @@ static void _rtl8152_set_rx_mode(struct net_device *netdev)
 }
 
 static netdev_tx_t rtl8152_start_xmit(struct sk_buff *skb,
-					    struct net_device *netdev)
+					struct net_device *netdev)
 {
 	struct r8152 *tp = netdev_priv(netdev);
 
@@ -1639,13 +1830,17 @@ static netdev_tx_t rtl8152_start_xmit(struct sk_buff *skb,
 
 	skb_queue_tail(&tp->tx_queue, skb);
 
-	if (list_empty(&tp->tx_free) &&
-	    skb_queue_len(&tp->tx_queue) > tp->tx_qlen)
+	if (!list_empty(&tp->tx_free)) {
+		if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
+			set_bit(SCHEDULE_TASKLET, &tp->flags);
+			schedule_delayed_work(&tp->schedule, 0);
+		} else {
+			usb_mark_last_busy(tp->udev);
+			tasklet_schedule(&tp->tl);
+		}
+	} else if (skb_queue_len(&tp->tx_queue) > tp->tx_qlen)
 		netif_stop_queue(netdev);
 
-	if (!list_empty(&tp->tx_free))
-		schedule_delayed_work(&tp->schedule, 0);
-
 	return NETDEV_TX_OK;
 }
 
@@ -2524,8 +2719,11 @@ static void rtl_work_func_t(struct work_struct *work)
 	if (test_bit(RTL8152_SET_RX_MODE, &tp->flags))
 		_rtl8152_set_rx_mode(tp->netdev);
 
-	if (tp->speed & LINK_STATUS)
-		tx_bottom(tp);
+	if (test_bit(SCHEDULE_TASKLET, &tp->flags) &&
+	    (tp->speed & LINK_STATUS)) {
+		clear_bit(SCHEDULE_TASKLET, &tp->flags);
+		tasklet_schedule(&tp->tl);
+	}
 
 	if (test_bit(PHY_RESET, &tp->flags))
 		rtl_phy_reset(tp);
@@ -3094,10 +3292,15 @@ static int rtl8152_probe(struct usb_interface *intf,
 	netdev->netdev_ops = &rtl8152_netdev_ops;
 	netdev->watchdog_timeo = RTL8152_TX_TIMEOUT;
 
-	netdev->features |= NETIF_F_IP_CSUM;
-	netdev->hw_features = NETIF_F_IP_CSUM;
+	netdev->features |= NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_SG |
+			    NETIF_F_TSO | NETIF_F_FRAGLIST | NETIF_F_IPV6_CSUM |
+			    NETIF_F_TSO6;
+	netdev->hw_features = NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_SG |
+			      NETIF_F_TSO | NETIF_F_FRAGLIST |
+			      NETIF_F_IPV6_CSUM | NETIF_F_TSO6;
 
 	SET_ETHTOOL_OPS(netdev, &ops);
+	netif_set_gso_max_size(netdev, RTL_LIMITED_TSO_SIZE);
 
 	tp->mii.dev = netdev;
 	tp->mii.mdio_read = read_mii_word;
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index ae413a2cbee7..bef37be402b8 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -48,37 +48,19 @@
 typedef unsigned int pending_ring_idx_t;
 #define INVALID_PENDING_RING_IDX (~0U)
 
-/* For the head field in pending_tx_info: it is used to indicate
- * whether this tx info is the head of one or more coalesced requests.
- *
- * When head != INVALID_PENDING_RING_IDX, it means the start of a new
- * tx requests queue and the end of previous queue.
- *
- * An example sequence of head fields (I = INVALID_PENDING_RING_IDX):
- *
- * ...|0 I I I|5 I|9 I I I|...
- * -->|<-INUSE----------------
- *
- * After consuming the first slot(s) we have:
- *
- * ...|V V V V|5 I|9 I I I|...
- * -----FREE->|<-INUSE--------
- *
- * where V stands for "valid pending ring index". Any number other
- * than INVALID_PENDING_RING_IDX is OK. These entries are considered
- * free and can contain any number other than
- * INVALID_PENDING_RING_IDX. In practice we use 0.
- *
- * The in use non-INVALID_PENDING_RING_IDX (say 0, 5 and 9 in the
- * above example) number is the index into pending_tx_info and
- * mmap_pages arrays.
- */
 struct pending_tx_info {
-	struct xen_netif_tx_request req; /* coalesced tx request */
-	pending_ring_idx_t head; /* head != INVALID_PENDING_RING_IDX
-				  * if it is head of one or more tx
-				  * reqs
-				  */
+	struct xen_netif_tx_request req; /* tx request */
+	/* Callback data for released SKBs. The callback is always
+	 * xenvif_zerocopy_callback, desc contains the pending_idx, which is
+	 * also an index in pending_tx_info array. It is initialized in
+	 * xenvif_alloc and it never changes.
+	 * skb_shinfo(skb)->destructor_arg points to the first mapped slot's
+	 * callback_struct in this array of struct pending_tx_info's, then ctx
+	 * to the next, or NULL if there is no more slot for this skb.
+	 * ubuf_to_vif is a helper which finds the struct xenvif from a pointer
+	 * to this field.
+	 */
+	struct ubuf_info callback_struct;
 };
 
 #define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
@@ -108,6 +90,15 @@ struct xenvif_rx_meta {
  */
 #define MAX_GRANT_COPY_OPS (MAX_SKB_FRAGS * XEN_NETIF_RX_RING_SIZE)
 
+#define NETBACK_INVALID_HANDLE -1
+
+/* To avoid confusion, we define XEN_NETBK_LEGACY_SLOTS_MAX indicating
+ * the maximum slots a valid packet can use. Now this value is defined
+ * to be XEN_NETIF_NR_SLOTS_MIN, which is supposed to be supported by
+ * all backend.
+ */
+#define XEN_NETBK_LEGACY_SLOTS_MAX XEN_NETIF_NR_SLOTS_MIN
+
 struct xenvif {
 	/* Unique identifier for this interface. */
 	domid_t          domid;
@@ -126,13 +117,28 @@ struct xenvif {
 	pending_ring_idx_t pending_cons;
 	u16 pending_ring[MAX_PENDING_REQS];
 	struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
-
-	/* Coalescing tx requests before copying makes number of grant
-	 * copy ops greater or equal to number of slots required. In
-	 * worst case a tx request consumes 2 gnttab_copy.
+	grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
+
+	struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
+	struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
+	/* passed to gnttab_[un]map_refs with pages under (un)mapping */
+	struct page *pages_to_map[MAX_PENDING_REQS];
+	struct page *pages_to_unmap[MAX_PENDING_REQS];
+
+	/* This prevents zerocopy callbacks  to race over dealloc_ring */
+	spinlock_t callback_lock;
+	/* This prevents dealloc thread and NAPI instance to race over response
+	 * creation and pending_ring in xenvif_idx_release. In xenvif_tx_err
+	 * it only protect response creation
 	 */
-	struct gnttab_copy tx_copy_ops[2*MAX_PENDING_REQS];
-
+	spinlock_t response_lock;
+	pending_ring_idx_t dealloc_prod;
+	pending_ring_idx_t dealloc_cons;
+	u16 dealloc_ring[MAX_PENDING_REQS];
+	struct task_struct *dealloc_task;
+	wait_queue_head_t dealloc_wq;
+	struct timer_list dealloc_delay;
+	bool dealloc_delay_timed_out;
 
 	/* Use kthread for guest RX */
 	struct task_struct *task;
@@ -144,6 +150,9 @@ struct xenvif {
 	struct xen_netif_rx_back_ring rx;
 	struct sk_buff_head rx_queue;
 	RING_IDX rx_last_skb_slots;
+	bool rx_queue_purge;
+
+	struct timer_list wake_queue;
 
 	/* This array is allocated seperately as it is large */
 	struct gnttab_copy *grant_copy_op;
@@ -175,6 +184,10 @@ struct xenvif {
 
 	/* Statistics */
 	unsigned long rx_gso_checksum_fixup;
+	unsigned long tx_zerocopy_sent;
+	unsigned long tx_zerocopy_success;
+	unsigned long tx_zerocopy_fail;
+	unsigned long tx_frag_overflow;
 
 	/* Miscellaneous private stuff. */
 	struct net_device *dev;
@@ -216,9 +229,11 @@ void xenvif_carrier_off(struct xenvif *vif);
 
 int xenvif_tx_action(struct xenvif *vif, int budget);
 
-int xenvif_kthread(void *data);
+int xenvif_kthread_guest_rx(void *data);
 void xenvif_kick_thread(struct xenvif *vif);
 
+int xenvif_dealloc_kthread(void *data);
+
 /* Determine whether the needed number of slots (req) are available,
  * and set req_event if not.
  */
@@ -226,6 +241,30 @@ bool xenvif_rx_ring_slots_available(struct xenvif *vif, int needed);
 
 void xenvif_stop_queue(struct xenvif *vif);
 
+/* Callback from stack when TX packet can be released */
+void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success);
+
+/* Unmap a pending page and release it back to the guest */
+void xenvif_idx_unmap(struct xenvif *vif, u16 pending_idx);
+
+static inline pending_ring_idx_t nr_pending_reqs(struct xenvif *vif)
+{
+	return MAX_PENDING_REQS -
+		vif->pending_prod + vif->pending_cons;
+}
+
+static inline bool xenvif_tx_pending_slots_available(struct xenvif *vif)
+{
+	return nr_pending_reqs(vif) + XEN_NETBK_LEGACY_SLOTS_MAX
+		< MAX_PENDING_REQS;
+}
+
+/* Callback from stack when TX packet can be released */
+void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success);
+
 extern bool separate_tx_rx_irq;
 
+extern unsigned int rx_drain_timeout_msecs;
+extern unsigned int rx_drain_timeout_jiffies;
+
 #endif /* __XEN_NETBACK__COMMON_H__ */
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 7669d49a67e2..83a71ac5b93a 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -38,6 +38,7 @@
 
 #include <xen/events.h>
 #include <asm/xen/hypercall.h>
+#include <xen/balloon.h>
 
 #define XENVIF_QUEUE_LENGTH 32
 #define XENVIF_NAPI_WEIGHT  64
@@ -87,7 +88,8 @@ static int xenvif_poll(struct napi_struct *napi, int budget)
 		local_irq_save(flags);
 
 		RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do);
-		if (!more_to_do)
+		if (!(more_to_do &&
+		      xenvif_tx_pending_slots_available(vif)))
 			__napi_complete(napi);
 
 		local_irq_restore(flags);
@@ -113,6 +115,18 @@ static irqreturn_t xenvif_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+static void xenvif_wake_queue(unsigned long data)
+{
+	struct xenvif *vif = (struct xenvif *)data;
+
+	if (netif_queue_stopped(vif->dev)) {
+		netdev_err(vif->dev, "draining TX queue\n");
+		vif->rx_queue_purge = true;
+		xenvif_kick_thread(vif);
+		netif_wake_queue(vif->dev);
+	}
+}
+
 static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct xenvif *vif = netdev_priv(dev);
@@ -121,7 +135,9 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	BUG_ON(skb->dev != dev);
 
 	/* Drop the packet if vif is not ready */
-	if (vif->task == NULL || !xenvif_schedulable(vif))
+	if (vif->task == NULL ||
+	    vif->dealloc_task == NULL ||
+	    !xenvif_schedulable(vif))
 		goto drop;
 
 	/* At best we'll need one slot for the header and one for each
@@ -140,8 +156,13 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	 * then turn off the queue to give the ring a chance to
 	 * drain.
 	 */
-	if (!xenvif_rx_ring_slots_available(vif, min_slots_needed))
+	if (!xenvif_rx_ring_slots_available(vif, min_slots_needed)) {
+		vif->wake_queue.function = xenvif_wake_queue;
+		vif->wake_queue.data = (unsigned long)vif;
 		xenvif_stop_queue(vif);
+		mod_timer(&vif->wake_queue,
+			jiffies + rx_drain_timeout_jiffies);
+	}
 
 	skb_queue_tail(&vif->rx_queue, skb);
 	xenvif_kick_thread(vif);
@@ -234,6 +255,28 @@ static const struct xenvif_stat {
 		"rx_gso_checksum_fixup",
 		offsetof(struct xenvif, rx_gso_checksum_fixup)
 	},
+	/* If (sent != success + fail), there are probably packets never
+	 * freed up properly!
+	 */
+	{
+		"tx_zerocopy_sent",
+		offsetof(struct xenvif, tx_zerocopy_sent),
+	},
+	{
+		"tx_zerocopy_success",
+		offsetof(struct xenvif, tx_zerocopy_success),
+	},
+	{
+		"tx_zerocopy_fail",
+		offsetof(struct xenvif, tx_zerocopy_fail)
+	},
+	/* Number of packets exceeding MAX_SKB_FRAG slots. You should use
+	 * a guest with the same MAX_SKB_FRAG
+	 */
+	{
+		"tx_frag_overflow",
+		offsetof(struct xenvif, tx_frag_overflow)
+	},
 };
 
 static int xenvif_get_sset_count(struct net_device *dev, int string_set)
@@ -327,6 +370,8 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
 	init_timer(&vif->credit_timeout);
 	vif->credit_window_start = get_jiffies_64();
 
+	init_timer(&vif->wake_queue);
+
 	dev->netdev_ops	= &xenvif_netdev_ops;
 	dev->hw_features = NETIF_F_SG |
 		NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
@@ -343,8 +388,27 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
 	vif->pending_prod = MAX_PENDING_REQS;
 	for (i = 0; i < MAX_PENDING_REQS; i++)
 		vif->pending_ring[i] = i;
-	for (i = 0; i < MAX_PENDING_REQS; i++)
-		vif->mmap_pages[i] = NULL;
+	spin_lock_init(&vif->callback_lock);
+	spin_lock_init(&vif->response_lock);
+	/* If ballooning is disabled, this will consume real memory, so you
+	 * better enable it. The long term solution would be to use just a
+	 * bunch of valid page descriptors, without dependency on ballooning
+	 */
+	err = alloc_xenballooned_pages(MAX_PENDING_REQS,
+				       vif->mmap_pages,
+				       false);
+	if (err) {
+		netdev_err(dev, "Could not reserve mmap_pages\n");
+		return ERR_PTR(-ENOMEM);
+	}
+	for (i = 0; i < MAX_PENDING_REQS; i++) {
+		vif->pending_tx_info[i].callback_struct = (struct ubuf_info)
+			{ .callback = xenvif_zerocopy_callback,
+			  .ctx = NULL,
+			  .desc = i };
+		vif->grant_tx_handle[i] = NETBACK_INVALID_HANDLE;
+	}
+	init_timer(&vif->dealloc_delay);
 
 	/*
 	 * Initialise a dummy MAC address. We choose the numerically
@@ -382,12 +446,14 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
 
 	BUG_ON(vif->tx_irq);
 	BUG_ON(vif->task);
+	BUG_ON(vif->dealloc_task);
 
 	err = xenvif_map_frontend_rings(vif, tx_ring_ref, rx_ring_ref);
 	if (err < 0)
 		goto err;
 
 	init_waitqueue_head(&vif->wq);
+	init_waitqueue_head(&vif->dealloc_wq);
 
 	if (tx_evtchn == rx_evtchn) {
 		/* feature-split-event-channels == 0 */
@@ -421,8 +487,8 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
 		disable_irq(vif->rx_irq);
 	}
 
-	task = kthread_create(xenvif_kthread,
-			      (void *)vif, "%s", vif->dev->name);
+	task = kthread_create(xenvif_kthread_guest_rx,
+			      (void *)vif, "%s-guest-rx", vif->dev->name);
 	if (IS_ERR(task)) {
 		pr_warn("Could not allocate kthread for %s\n", vif->dev->name);
 		err = PTR_ERR(task);
@@ -431,6 +497,16 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
 
 	vif->task = task;
 
+	task = kthread_create(xenvif_dealloc_kthread,
+			      (void *)vif, "%s-dealloc", vif->dev->name);
+	if (IS_ERR(task)) {
+		pr_warn("Could not allocate kthread for %s\n", vif->dev->name);
+		err = PTR_ERR(task);
+		goto err_rx_unbind;
+	}
+
+	vif->dealloc_task = task;
+
 	rtnl_lock();
 	if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN)
 		dev_set_mtu(vif->dev, ETH_DATA_LEN);
@@ -441,6 +517,7 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
 	rtnl_unlock();
 
 	wake_up_process(vif->task);
+	wake_up_process(vif->dealloc_task);
 
 	return 0;
 
@@ -474,10 +551,17 @@ void xenvif_disconnect(struct xenvif *vif)
 		xenvif_carrier_off(vif);
 
 	if (vif->task) {
+		del_timer_sync(&vif->wake_queue);
 		kthread_stop(vif->task);
 		vif->task = NULL;
 	}
 
+	if (vif->dealloc_task) {
+		del_timer_sync(&vif->dealloc_delay);
+		kthread_stop(vif->dealloc_task);
+		vif->dealloc_task = NULL;
+	}
+
 	if (vif->tx_irq) {
 		if (vif->tx_irq == vif->rx_irq)
 			unbind_from_irqhandler(vif->tx_irq, vif);
@@ -493,6 +577,36 @@ void xenvif_disconnect(struct xenvif *vif)
 
 void xenvif_free(struct xenvif *vif)
 {
+	int i, unmap_timeout = 0;
+	/* Here we want to avoid timeout messages if an skb can be legitimatly
+	 * stucked somewhere else. Realisticly this could be an another vif's
+	 * internal or QDisc queue. That another vif also has this
+	 * rx_drain_timeout_msecs timeout, but the timer only ditches the
+	 * internal queue. After that, the QDisc queue can put in worst case
+	 * XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS skbs into that another vif's
+	 * internal queue, so we need several rounds of such timeouts until we
+	 * can be sure that no another vif should have skb's from us. We are
+	 * not sending more skb's, so newly stucked packets are not interesting
+	 * for us here.
+	 */
+	unsigned int worst_case_skb_lifetime = (rx_drain_timeout_msecs/1000) *
+		DIV_ROUND_UP(XENVIF_QUEUE_LENGTH, (XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS));
+
+	for (i = 0; i < MAX_PENDING_REQS; ++i) {
+		if (vif->grant_tx_handle[i] != NETBACK_INVALID_HANDLE) {
+			unmap_timeout++;
+			schedule_timeout(msecs_to_jiffies(1000));
+			if (unmap_timeout > worst_case_skb_lifetime &&
+			    net_ratelimit())
+				netdev_err(vif->dev,
+					   "Page still granted! Index: %x\n",
+					   i);
+			i = -1;
+		}
+	}
+
+	free_xenballooned_pages(MAX_PENDING_REQS, vif->mmap_pages);
+
 	netif_napi_del(&vif->napi);
 
 	unregister_netdev(vif->dev);
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index e5284bca2d90..bc943205a691 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -37,6 +37,7 @@
 #include <linux/kthread.h>
 #include <linux/if_vlan.h>
 #include <linux/udp.h>
+#include <linux/highmem.h>
 
 #include <net/tcp.h>
 
@@ -54,6 +55,13 @@
 bool separate_tx_rx_irq = 1;
 module_param(separate_tx_rx_irq, bool, 0644);
 
+/* When guest ring is filled up, qdisc queues the packets for us, but we have
+ * to timeout them, otherwise other guests' packets can get stucked there
+ */
+unsigned int rx_drain_timeout_msecs = 10000;
+module_param(rx_drain_timeout_msecs, uint, 0444);
+unsigned int rx_drain_timeout_jiffies;
+
 /*
  * This is the maximum slots a skb can have. If a guest sends a skb
  * which exceeds this limit it is considered malicious.
@@ -62,24 +70,6 @@ module_param(separate_tx_rx_irq, bool, 0644);
 static unsigned int fatal_skb_slots = FATAL_SKB_SLOTS_DEFAULT;
 module_param(fatal_skb_slots, uint, 0444);
 
-/*
- * To avoid confusion, we define XEN_NETBK_LEGACY_SLOTS_MAX indicating
- * the maximum slots a valid packet can use. Now this value is defined
- * to be XEN_NETIF_NR_SLOTS_MIN, which is supposed to be supported by
- * all backend.
- */
-#define XEN_NETBK_LEGACY_SLOTS_MAX XEN_NETIF_NR_SLOTS_MIN
-
-/*
- * If head != INVALID_PENDING_RING_IDX, it means this tx request is head of
- * one or more merged tx requests, otherwise it is the continuation of
- * previous tx request.
- */
-static inline int pending_tx_is_head(struct xenvif *vif, RING_IDX idx)
-{
-	return vif->pending_tx_info[idx].head != INVALID_PENDING_RING_IDX;
-}
-
 static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx,
 			       u8 status);
 
@@ -109,6 +99,18 @@ static inline unsigned long idx_to_kaddr(struct xenvif *vif,
 	return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx));
 }
 
+/* Find the containing VIF's structure from a pointer in pending_tx_info array
+ */
+static inline struct xenvif* ubuf_to_vif(struct ubuf_info *ubuf)
+{
+	u16 pending_idx = ubuf->desc;
+	struct pending_tx_info *temp =
+		container_of(ubuf, struct pending_tx_info, callback_struct);
+	return container_of(temp - pending_idx,
+			    struct xenvif,
+			    pending_tx_info[0]);
+}
+
 /* This is a miniumum size for the linear area to avoid lots of
  * calls to __pskb_pull_tail() as we set up checksum offsets. The
  * value 128 was chosen as it covers all IPv4 and most likely
@@ -131,10 +133,9 @@ static inline pending_ring_idx_t pending_index(unsigned i)
 	return i & (MAX_PENDING_REQS-1);
 }
 
-static inline pending_ring_idx_t nr_pending_reqs(struct xenvif *vif)
+static inline pending_ring_idx_t nr_free_slots(struct xen_netif_tx_back_ring *ring)
 {
-	return MAX_PENDING_REQS -
-		vif->pending_prod + vif->pending_cons;
+	return ring->nr_ents -	(ring->sring->req_prod - ring->rsp_prod_pvt);
 }
 
 bool xenvif_rx_ring_slots_available(struct xenvif *vif, int needed)
@@ -235,7 +236,9 @@ static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif *vif,
 static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
 				 struct netrx_pending_operations *npo,
 				 struct page *page, unsigned long size,
-				 unsigned long offset, int *head)
+				 unsigned long offset, int *head,
+				 struct xenvif *foreign_vif,
+				 grant_ref_t foreign_gref)
 {
 	struct gnttab_copy *copy_gop;
 	struct xenvif_rx_meta *meta;
@@ -277,8 +280,15 @@ static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
 		copy_gop->flags = GNTCOPY_dest_gref;
 		copy_gop->len = bytes;
 
-		copy_gop->source.domid = DOMID_SELF;
-		copy_gop->source.u.gmfn = virt_to_mfn(page_address(page));
+		if (foreign_vif) {
+			copy_gop->source.domid = foreign_vif->domid;
+			copy_gop->source.u.ref = foreign_gref;
+			copy_gop->flags |= GNTCOPY_source_gref;
+		} else {
+			copy_gop->source.domid = DOMID_SELF;
+			copy_gop->source.u.gmfn =
+				virt_to_mfn(page_address(page));
+		}
 		copy_gop->source.offset = offset;
 
 		copy_gop->dest.domid = vif->domid;
@@ -339,6 +349,9 @@ static int xenvif_gop_skb(struct sk_buff *skb,
 	int old_meta_prod;
 	int gso_type;
 	int gso_size;
+	struct ubuf_info *ubuf = skb_shinfo(skb)->destructor_arg;
+	grant_ref_t foreign_grefs[MAX_SKB_FRAGS];
+	struct xenvif *foreign_vif = NULL;
 
 	old_meta_prod = npo->meta_prod;
 
@@ -379,6 +392,19 @@ static int xenvif_gop_skb(struct sk_buff *skb,
 	npo->copy_off = 0;
 	npo->copy_gref = req->gref;
 
+	if ((skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) &&
+		 (ubuf->callback == &xenvif_zerocopy_callback)) {
+		int i = 0;
+		foreign_vif = ubuf_to_vif(ubuf);
+
+		do {
+			u16 pending_idx = ubuf->desc;
+			foreign_grefs[i++] =
+				foreign_vif->pending_tx_info[pending_idx].req.gref;
+			ubuf = (struct ubuf_info *) ubuf->ctx;
+		} while (ubuf);
+	}
+
 	data = skb->data;
 	while (data < skb_tail_pointer(skb)) {
 		unsigned int offset = offset_in_page(data);
@@ -388,7 +414,9 @@ static int xenvif_gop_skb(struct sk_buff *skb,
 			len = skb_tail_pointer(skb) - data;
 
 		xenvif_gop_frag_copy(vif, skb, npo,
-				     virt_to_page(data), len, offset, &head);
+				     virt_to_page(data), len, offset, &head,
+				     NULL,
+				     0);
 		data += len;
 	}
 
@@ -397,7 +425,9 @@ static int xenvif_gop_skb(struct sk_buff *skb,
 				     skb_frag_page(&skb_shinfo(skb)->frags[i]),
 				     skb_frag_size(&skb_shinfo(skb)->frags[i]),
 				     skb_shinfo(skb)->frags[i].page_offset,
-				     &head);
+				     &head,
+				     foreign_vif,
+				     foreign_grefs[i]);
 	}
 
 	return npo->meta_prod - old_meta_prod;
@@ -455,10 +485,12 @@ static void xenvif_add_frag_responses(struct xenvif *vif, int status,
 	}
 }
 
-struct skb_cb_overlay {
+struct xenvif_rx_cb {
 	int meta_slots_used;
 };
 
+#define XENVIF_RX_CB(skb) ((struct xenvif_rx_cb *)(skb)->cb)
+
 void xenvif_kick_thread(struct xenvif *vif)
 {
 	wake_up(&vif->wq);
@@ -474,7 +506,6 @@ static void xenvif_rx_action(struct xenvif *vif)
 	LIST_HEAD(notify);
 	int ret;
 	unsigned long offset;
-	struct skb_cb_overlay *sco;
 	bool need_to_notify = false;
 
 	struct netrx_pending_operations npo = {
@@ -513,9 +544,8 @@ static void xenvif_rx_action(struct xenvif *vif)
 		} else
 			vif->rx_last_skb_slots = 0;
 
-		sco = (struct skb_cb_overlay *)skb->cb;
-		sco->meta_slots_used = xenvif_gop_skb(skb, &npo);
-		BUG_ON(sco->meta_slots_used > max_slots_needed);
+		XENVIF_RX_CB(skb)->meta_slots_used = xenvif_gop_skb(skb, &npo);
+		BUG_ON(XENVIF_RX_CB(skb)->meta_slots_used > max_slots_needed);
 
 		__skb_queue_tail(&rxq, skb);
 	}
@@ -529,7 +559,6 @@ static void xenvif_rx_action(struct xenvif *vif)
 	gnttab_batch_copy(vif->grant_copy_op, npo.copy_prod);
 
 	while ((skb = __skb_dequeue(&rxq)) != NULL) {
-		sco = (struct skb_cb_overlay *)skb->cb;
 
 		if ((1 << vif->meta[npo.meta_cons].gso_type) &
 		    vif->gso_prefix_mask) {
@@ -540,19 +569,21 @@ static void xenvif_rx_action(struct xenvif *vif)
 
 			resp->offset = vif->meta[npo.meta_cons].gso_size;
 			resp->id = vif->meta[npo.meta_cons].id;
-			resp->status = sco->meta_slots_used;
+			resp->status = XENVIF_RX_CB(skb)->meta_slots_used;
 
 			npo.meta_cons++;
-			sco->meta_slots_used--;
+			XENVIF_RX_CB(skb)->meta_slots_used--;
 		}
 
 
 		vif->dev->stats.tx_bytes += skb->len;
 		vif->dev->stats.tx_packets++;
 
-		status = xenvif_check_gop(vif, sco->meta_slots_used, &npo);
+		status = xenvif_check_gop(vif,
+					  XENVIF_RX_CB(skb)->meta_slots_used,
+					  &npo);
 
-		if (sco->meta_slots_used == 1)
+		if (XENVIF_RX_CB(skb)->meta_slots_used == 1)
 			flags = 0;
 		else
 			flags = XEN_NETRXF_more_data;
@@ -589,13 +620,13 @@ static void xenvif_rx_action(struct xenvif *vif)
 
 		xenvif_add_frag_responses(vif, status,
 					  vif->meta + npo.meta_cons + 1,
-					  sco->meta_slots_used);
+					  XENVIF_RX_CB(skb)->meta_slots_used);
 
 		RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret);
 
 		need_to_notify |= !!ret;
 
-		npo.meta_cons += sco->meta_slots_used;
+		npo.meta_cons += XENVIF_RX_CB(skb)->meta_slots_used;
 		dev_kfree_skb(skb);
 	}
 
@@ -645,9 +676,12 @@ static void xenvif_tx_err(struct xenvif *vif,
 			  struct xen_netif_tx_request *txp, RING_IDX end)
 {
 	RING_IDX cons = vif->tx.req_cons;
+	unsigned long flags;
 
 	do {
+		spin_lock_irqsave(&vif->response_lock, flags);
 		make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
+		spin_unlock_irqrestore(&vif->response_lock, flags);
 		if (cons == end)
 			break;
 		txp = RING_GET_REQUEST(&vif->tx, cons++);
@@ -759,180 +793,168 @@ static int xenvif_count_requests(struct xenvif *vif,
 	return slots;
 }
 
-static struct page *xenvif_alloc_page(struct xenvif *vif,
-				      u16 pending_idx)
+
+struct xenvif_tx_cb {
+	u16 pending_idx;
+};
+
+#define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
+
+static inline void xenvif_tx_create_gop(struct xenvif *vif,
+					u16 pending_idx,
+					struct xen_netif_tx_request *txp,
+					struct gnttab_map_grant_ref *gop)
 {
-	struct page *page;
+	vif->pages_to_map[gop-vif->tx_map_ops] = vif->mmap_pages[pending_idx];
+	gnttab_set_map_op(gop, idx_to_kaddr(vif, pending_idx),
+			  GNTMAP_host_map | GNTMAP_readonly,
+			  txp->gref, vif->domid);
+
+	memcpy(&vif->pending_tx_info[pending_idx].req, txp,
+	       sizeof(*txp));
+}
 
-	page = alloc_page(GFP_ATOMIC|__GFP_COLD);
-	if (!page)
+static inline struct sk_buff *xenvif_alloc_skb(unsigned int size)
+{
+	struct sk_buff *skb =
+		alloc_skb(size + NET_SKB_PAD + NET_IP_ALIGN,
+			  GFP_ATOMIC | __GFP_NOWARN);
+	if (unlikely(skb == NULL))
 		return NULL;
-	vif->mmap_pages[pending_idx] = page;
 
-	return page;
+	/* Packets passed to netif_rx() must have some headroom. */
+	skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
+
+	/* Initialize it here to avoid later surprises */
+	skb_shinfo(skb)->destructor_arg = NULL;
+
+	return skb;
 }
 
-static struct gnttab_copy *xenvif_get_requests(struct xenvif *vif,
-					       struct sk_buff *skb,
-					       struct xen_netif_tx_request *txp,
-					       struct gnttab_copy *gop)
+static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif *vif,
+							struct sk_buff *skb,
+							struct xen_netif_tx_request *txp,
+							struct gnttab_map_grant_ref *gop)
 {
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
 	skb_frag_t *frags = shinfo->frags;
-	u16 pending_idx = *((u16 *)skb->data);
-	u16 head_idx = 0;
-	int slot, start;
-	struct page *page;
-	pending_ring_idx_t index, start_idx = 0;
-	uint16_t dst_offset;
-	unsigned int nr_slots;
-	struct pending_tx_info *first = NULL;
+	u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
+	int start;
+	pending_ring_idx_t index;
+	unsigned int nr_slots, frag_overflow = 0;
 
 	/* At this point shinfo->nr_frags is in fact the number of
 	 * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX.
 	 */
+	if (shinfo->nr_frags > MAX_SKB_FRAGS) {
+		frag_overflow = shinfo->nr_frags - MAX_SKB_FRAGS;
+		BUG_ON(frag_overflow > MAX_SKB_FRAGS);
+		shinfo->nr_frags = MAX_SKB_FRAGS;
+	}
 	nr_slots = shinfo->nr_frags;
 
 	/* Skip first skb fragment if it is on same page as header fragment. */
 	start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
 
-	/* Coalesce tx requests, at this point the packet passed in
-	 * should be <= 64K. Any packets larger than 64K have been
-	 * handled in xenvif_count_requests().
-	 */
-	for (shinfo->nr_frags = slot = start; slot < nr_slots;
-	     shinfo->nr_frags++) {
-		struct pending_tx_info *pending_tx_info =
-			vif->pending_tx_info;
+	for (shinfo->nr_frags = start; shinfo->nr_frags < nr_slots;
+	     shinfo->nr_frags++, txp++, gop++) {
+		index = pending_index(vif->pending_cons++);
+		pending_idx = vif->pending_ring[index];
+		xenvif_tx_create_gop(vif, pending_idx, txp, gop);
+		frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx);
+	}
 
-		page = alloc_page(GFP_ATOMIC|__GFP_COLD);
-		if (!page)
-			goto err;
-
-		dst_offset = 0;
-		first = NULL;
-		while (dst_offset < PAGE_SIZE && slot < nr_slots) {
-			gop->flags = GNTCOPY_source_gref;
-
-			gop->source.u.ref = txp->gref;
-			gop->source.domid = vif->domid;
-			gop->source.offset = txp->offset;
-
-			gop->dest.domid = DOMID_SELF;
-
-			gop->dest.offset = dst_offset;
-			gop->dest.u.gmfn = virt_to_mfn(page_address(page));
-
-			if (dst_offset + txp->size > PAGE_SIZE) {
-				/* This page can only merge a portion
-				 * of tx request. Do not increment any
-				 * pointer / counter here. The txp
-				 * will be dealt with in future
-				 * rounds, eventually hitting the
-				 * `else` branch.
-				 */
-				gop->len = PAGE_SIZE - dst_offset;
-				txp->offset += gop->len;
-				txp->size -= gop->len;
-				dst_offset += gop->len; /* quit loop */
-			} else {
-				/* This tx request can be merged in the page */
-				gop->len = txp->size;
-				dst_offset += gop->len;
-
-				index = pending_index(vif->pending_cons++);
-
-				pending_idx = vif->pending_ring[index];
-
-				memcpy(&pending_tx_info[pending_idx].req, txp,
-				       sizeof(*txp));
-
-				/* Poison these fields, corresponding
-				 * fields for head tx req will be set
-				 * to correct values after the loop.
-				 */
-				vif->mmap_pages[pending_idx] = (void *)(~0UL);
-				pending_tx_info[pending_idx].head =
-					INVALID_PENDING_RING_IDX;
-
-				if (!first) {
-					first = &pending_tx_info[pending_idx];
-					start_idx = index;
-					head_idx = pending_idx;
-				}
-
-				txp++;
-				slot++;
-			}
+	if (frag_overflow) {
+		struct sk_buff *nskb = xenvif_alloc_skb(0);
+		if (unlikely(nskb == NULL)) {
+			if (net_ratelimit())
+				netdev_err(vif->dev,
+					   "Can't allocate the frag_list skb.\n");
+			return NULL;
+		}
+
+		shinfo = skb_shinfo(nskb);
+		frags = shinfo->frags;
 
-			gop++;
+		for (shinfo->nr_frags = 0; shinfo->nr_frags < frag_overflow;
+		     shinfo->nr_frags++, txp++, gop++) {
+			index = pending_index(vif->pending_cons++);
+			pending_idx = vif->pending_ring[index];
+			xenvif_tx_create_gop(vif, pending_idx, txp, gop);
+			frag_set_pending_idx(&frags[shinfo->nr_frags],
+					     pending_idx);
 		}
 
-		first->req.offset = 0;
-		first->req.size = dst_offset;
-		first->head = start_idx;
-		vif->mmap_pages[head_idx] = page;
-		frag_set_pending_idx(&frags[shinfo->nr_frags], head_idx);
+		skb_shinfo(skb)->frag_list = nskb;
 	}
 
-	BUG_ON(shinfo->nr_frags > MAX_SKB_FRAGS);
-
 	return gop;
-err:
-	/* Unwind, freeing all pages and sending error responses. */
-	while (shinfo->nr_frags-- > start) {
-		xenvif_idx_release(vif,
-				frag_get_pending_idx(&frags[shinfo->nr_frags]),
-				XEN_NETIF_RSP_ERROR);
+}
+
+static inline void xenvif_grant_handle_set(struct xenvif *vif,
+					   u16 pending_idx,
+					   grant_handle_t handle)
+{
+	if (unlikely(vif->grant_tx_handle[pending_idx] !=
+		     NETBACK_INVALID_HANDLE)) {
+		netdev_err(vif->dev,
+			   "Trying to overwrite active handle! pending_idx: %x\n",
+			   pending_idx);
+		BUG();
 	}
-	/* The head too, if necessary. */
-	if (start)
-		xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
+	vif->grant_tx_handle[pending_idx] = handle;
+}
 
-	return NULL;
+static inline void xenvif_grant_handle_reset(struct xenvif *vif,
+					     u16 pending_idx)
+{
+	if (unlikely(vif->grant_tx_handle[pending_idx] ==
+		     NETBACK_INVALID_HANDLE)) {
+		netdev_err(vif->dev,
+			   "Trying to unmap invalid handle! pending_idx: %x\n",
+			   pending_idx);
+		BUG();
+	}
+	vif->grant_tx_handle[pending_idx] = NETBACK_INVALID_HANDLE;
 }
 
 static int xenvif_tx_check_gop(struct xenvif *vif,
 			       struct sk_buff *skb,
-			       struct gnttab_copy **gopp)
+			       struct gnttab_map_grant_ref **gopp)
 {
-	struct gnttab_copy *gop = *gopp;
-	u16 pending_idx = *((u16 *)skb->data);
+	struct gnttab_map_grant_ref *gop = *gopp;
+	u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
 	struct pending_tx_info *tx_info;
 	int nr_frags = shinfo->nr_frags;
 	int i, err, start;
-	u16 peek; /* peek into next tx request */
+	struct sk_buff *first_skb = NULL;
 
 	/* Check status of header. */
 	err = gop->status;
 	if (unlikely(err))
 		xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
+	else
+		xenvif_grant_handle_set(vif, pending_idx , gop->handle);
 
 	/* Skip first skb fragment if it is on same page as header fragment. */
 	start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
 
+check_frags:
 	for (i = start; i < nr_frags; i++) {
 		int j, newerr;
-		pending_ring_idx_t head;
 
 		pending_idx = frag_get_pending_idx(&shinfo->frags[i]);
 		tx_info = &vif->pending_tx_info[pending_idx];
-		head = tx_info->head;
 
 		/* Check error status: if okay then remember grant handle. */
-		do {
-			newerr = (++gop)->status;
-			if (newerr)
-				break;
-			peek = vif->pending_ring[pending_index(++head)];
-		} while (!pending_tx_is_head(vif, peek));
+		newerr = (++gop)->status;
 
 		if (likely(!newerr)) {
+			xenvif_grant_handle_set(vif, pending_idx , gop->handle);
 			/* Had a previous error? Invalidate this fragment. */
 			if (unlikely(err))
-				xenvif_idx_release(vif, pending_idx,
-						   XEN_NETIF_RSP_OKAY);
+				xenvif_idx_unmap(vif, pending_idx);
 			continue;
 		}
 
@@ -942,20 +964,45 @@ static int xenvif_tx_check_gop(struct xenvif *vif,
 		/* Not the first error? Preceding frags already invalidated. */
 		if (err)
 			continue;
-
 		/* First error: invalidate header and preceding fragments. */
-		pending_idx = *((u16 *)skb->data);
-		xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY);
+		if (!first_skb)
+			pending_idx = XENVIF_TX_CB(skb)->pending_idx;
+		else
+			pending_idx = XENVIF_TX_CB(skb)->pending_idx;
+		xenvif_idx_unmap(vif, pending_idx);
 		for (j = start; j < i; j++) {
 			pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
-			xenvif_idx_release(vif, pending_idx,
-					   XEN_NETIF_RSP_OKAY);
+			xenvif_idx_unmap(vif, pending_idx);
 		}
 
 		/* Remember the error: invalidate all subsequent fragments. */
 		err = newerr;
 	}
 
+	if (skb_has_frag_list(skb)) {
+		first_skb = skb;
+		skb = shinfo->frag_list;
+		shinfo = skb_shinfo(skb);
+		nr_frags = shinfo->nr_frags;
+		start = 0;
+
+		goto check_frags;
+	}
+
+	/* There was a mapping error in the frag_list skb. We have to unmap
+	 * the first skb's frags
+	 */
+	if (first_skb && err) {
+		int j;
+		shinfo = skb_shinfo(first_skb);
+		pending_idx = XENVIF_TX_CB(skb)->pending_idx;
+		start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
+		for (j = start; j < shinfo->nr_frags; j++) {
+			pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
+			xenvif_idx_unmap(vif, pending_idx);
+		}
+	}
+
 	*gopp = gop + 1;
 	return err;
 }
@@ -965,6 +1012,10 @@ static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb)
 	struct skb_shared_info *shinfo = skb_shinfo(skb);
 	int nr_frags = shinfo->nr_frags;
 	int i;
+	u16 prev_pending_idx = INVALID_PENDING_IDX;
+
+	if (skb_shinfo(skb)->destructor_arg)
+		prev_pending_idx = XENVIF_TX_CB(skb)->pending_idx;
 
 	for (i = 0; i < nr_frags; i++) {
 		skb_frag_t *frag = shinfo->frags + i;
@@ -974,6 +1025,17 @@ static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb)
 
 		pending_idx = frag_get_pending_idx(frag);
 
+		/* If this is not the first frag, chain it to the previous*/
+		if (unlikely(prev_pending_idx == INVALID_PENDING_IDX))
+			skb_shinfo(skb)->destructor_arg =
+				&vif->pending_tx_info[pending_idx].callback_struct;
+		else if (likely(pending_idx != prev_pending_idx))
+			vif->pending_tx_info[prev_pending_idx].callback_struct.ctx =
+				&(vif->pending_tx_info[pending_idx].callback_struct);
+
+		vif->pending_tx_info[pending_idx].callback_struct.ctx = NULL;
+		prev_pending_idx = pending_idx;
+
 		txp = &vif->pending_tx_info[pending_idx].req;
 		page = virt_to_page(idx_to_kaddr(vif, pending_idx));
 		__skb_fill_page_desc(skb, i, page, txp->offset, txp->size);
@@ -981,10 +1043,15 @@ static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb)
 		skb->data_len += txp->size;
 		skb->truesize += txp->size;
 
-		/* Take an extra reference to offset xenvif_idx_release */
+		/* Take an extra reference to offset network stack's put_page */
 		get_page(vif->mmap_pages[pending_idx]);
-		xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY);
 	}
+	/* FIXME: __skb_fill_page_desc set this to true because page->pfmemalloc
+	 * overlaps with "index", and "mapping" is not set. I think mapping
+	 * should be set. If delivered to local stack, it would drop this
+	 * skb in sk_filter unless the socket has the right to use it.
+	 */
+	skb->pfmemalloc	= false;
 }
 
 static int xenvif_get_extras(struct xenvif *vif,
@@ -1104,16 +1171,14 @@ static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
 
 static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget)
 {
-	struct gnttab_copy *gop = vif->tx_copy_ops, *request_gop;
+	struct gnttab_map_grant_ref *gop = vif->tx_map_ops, *request_gop;
 	struct sk_buff *skb;
 	int ret;
 
-	while ((nr_pending_reqs(vif) + XEN_NETBK_LEGACY_SLOTS_MAX
-		< MAX_PENDING_REQS) &&
+	while (xenvif_tx_pending_slots_available(vif) &&
 	       (skb_queue_len(&vif->tx_queue) < budget)) {
 		struct xen_netif_tx_request txreq;
 		struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX];
-		struct page *page;
 		struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
 		u16 pending_idx;
 		RING_IDX idx;
@@ -1189,8 +1254,7 @@ static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget)
 			    ret < XEN_NETBK_LEGACY_SLOTS_MAX) ?
 			PKT_PROT_LEN : txreq.size;
 
-		skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN,
-				GFP_ATOMIC | __GFP_NOWARN);
+		skb = xenvif_alloc_skb(data_len);
 		if (unlikely(skb == NULL)) {
 			netdev_dbg(vif->dev,
 				   "Can't allocate a skb in start_xmit.\n");
@@ -1198,9 +1262,6 @@ static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget)
 			break;
 		}
 
-		/* Packets passed to netif_rx() must have some headroom. */
-		skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
-
 		if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
 			struct xen_netif_extra_info *gso;
 			gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
@@ -1212,31 +1273,11 @@ static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget)
 			}
 		}
 
-		/* XXX could copy straight to head */
-		page = xenvif_alloc_page(vif, pending_idx);
-		if (!page) {
-			kfree_skb(skb);
-			xenvif_tx_err(vif, &txreq, idx);
-			break;
-		}
-
-		gop->source.u.ref = txreq.gref;
-		gop->source.domid = vif->domid;
-		gop->source.offset = txreq.offset;
-
-		gop->dest.u.gmfn = virt_to_mfn(page_address(page));
-		gop->dest.domid = DOMID_SELF;
-		gop->dest.offset = txreq.offset;
-
-		gop->len = txreq.size;
-		gop->flags = GNTCOPY_source_gref;
+		xenvif_tx_create_gop(vif, pending_idx, &txreq, gop);
 
 		gop++;
 
-		memcpy(&vif->pending_tx_info[pending_idx].req,
-		       &txreq, sizeof(txreq));
-		vif->pending_tx_info[pending_idx].head = index;
-		*((u16 *)skb->data) = pending_idx;
+		XENVIF_TX_CB(skb)->pending_idx = pending_idx;
 
 		__skb_put(skb, data_len);
 
@@ -1264,17 +1305,82 @@ static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget)
 
 		vif->tx.req_cons = idx;
 
-		if ((gop-vif->tx_copy_ops) >= ARRAY_SIZE(vif->tx_copy_ops))
+		if ((gop-vif->tx_map_ops) >= ARRAY_SIZE(vif->tx_map_ops))
 			break;
 	}
 
-	return gop - vif->tx_copy_ops;
+	return gop - vif->tx_map_ops;
 }
 
+/* Consolidate skb with a frag_list into a brand new one with local pages on
+ * frags. Returns 0 or -ENOMEM if can't allocate new pages.
+ */
+static int xenvif_handle_frag_list(struct xenvif *vif, struct sk_buff *skb)
+{
+	unsigned int offset = skb_headlen(skb);
+	skb_frag_t frags[MAX_SKB_FRAGS];
+	int i;
+	struct ubuf_info *uarg;
+	struct sk_buff *nskb = skb_shinfo(skb)->frag_list;
+
+	vif->tx_zerocopy_sent += 2;
+	vif->tx_frag_overflow++;
+
+	xenvif_fill_frags(vif, nskb);
+	/* Subtract frags size, we will correct it later */
+	skb->truesize -= skb->data_len;
+	skb->len += nskb->len;
+	skb->data_len += nskb->len;
+
+	/* create a brand new frags array and coalesce there */
+	for (i = 0; offset < skb->len; i++) {
+		struct page *page;
+		unsigned int len;
+
+		BUG_ON(i >= MAX_SKB_FRAGS);
+		page = alloc_page(GFP_ATOMIC|__GFP_COLD);
+		if (!page) {
+			int j;
+			skb->truesize += skb->data_len;
+			for (j = 0; j < i; j++)
+				put_page(frags[j].page.p);
+			return -ENOMEM;
+		}
+
+		if (offset + PAGE_SIZE < skb->len)
+			len = PAGE_SIZE;
+		else
+			len = skb->len - offset;
+		if (skb_copy_bits(skb, offset, page_address(page), len))
+			BUG();
+
+		offset += len;
+		frags[i].page.p = page;
+		frags[i].page_offset = 0;
+		skb_frag_size_set(&frags[i], len);
+	}
+	/* swap out with old one */
+	memcpy(skb_shinfo(skb)->frags,
+	       frags,
+	       i * sizeof(skb_frag_t));
+	skb_shinfo(skb)->nr_frags = i;
+	skb->truesize += i * PAGE_SIZE;
+
+	/* remove traces of mapped pages and frag_list */
+	skb_frag_list_init(skb);
+	uarg = skb_shinfo(skb)->destructor_arg;
+	uarg->callback(uarg, true);
+	skb_shinfo(skb)->destructor_arg = NULL;
+
+	skb_shinfo(nskb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+	kfree_skb(nskb);
+
+	return 0;
+}
 
 static int xenvif_tx_submit(struct xenvif *vif)
 {
-	struct gnttab_copy *gop = vif->tx_copy_ops;
+	struct gnttab_map_grant_ref *gop = vif->tx_map_ops;
 	struct sk_buff *skb;
 	int work_done = 0;
 
@@ -1283,7 +1389,7 @@ static int xenvif_tx_submit(struct xenvif *vif)
 		u16 pending_idx;
 		unsigned data_len;
 
-		pending_idx = *((u16 *)skb->data);
+		pending_idx = XENVIF_TX_CB(skb)->pending_idx;
 		txp = &vif->pending_tx_info[pending_idx].req;
 
 		/* Check the remap error code. */
@@ -1298,14 +1404,16 @@ static int xenvif_tx_submit(struct xenvif *vif)
 		memcpy(skb->data,
 		       (void *)(idx_to_kaddr(vif, pending_idx)|txp->offset),
 		       data_len);
+		vif->pending_tx_info[pending_idx].callback_struct.ctx = NULL;
 		if (data_len < txp->size) {
 			/* Append the packet payload as a fragment. */
 			txp->offset += data_len;
 			txp->size -= data_len;
+			skb_shinfo(skb)->destructor_arg =
+				&vif->pending_tx_info[pending_idx].callback_struct;
 		} else {
 			/* Schedule a response immediately. */
-			xenvif_idx_release(vif, pending_idx,
-					   XEN_NETIF_RSP_OKAY);
+			xenvif_idx_unmap(vif, pending_idx);
 		}
 
 		if (txp->flags & XEN_NETTXF_csum_blank)
@@ -1315,6 +1423,17 @@ static int xenvif_tx_submit(struct xenvif *vif)
 
 		xenvif_fill_frags(vif, skb);
 
+		if (unlikely(skb_has_frag_list(skb))) {
+			if (xenvif_handle_frag_list(vif, skb)) {
+				if (net_ratelimit())
+					netdev_err(vif->dev,
+						   "Not enough memory to consolidate frag_list!\n");
+				skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+				kfree_skb(skb);
+				continue;
+			}
+		}
+
 		if (skb_is_nonlinear(skb) && skb_headlen(skb) < PKT_PROT_LEN) {
 			int target = min_t(int, skb->len, PKT_PROT_LEN);
 			__pskb_pull_tail(skb, target - skb_headlen(skb));
@@ -1327,6 +1446,9 @@ static int xenvif_tx_submit(struct xenvif *vif)
 		if (checksum_setup(vif, skb)) {
 			netdev_dbg(vif->dev,
 				   "Can't setup checksum in net_tx_action\n");
+			/* We have to set this flag to trigger the callback */
+			if (skb_shinfo(skb)->destructor_arg)
+				skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
 			kfree_skb(skb);
 			continue;
 		}
@@ -1352,17 +1474,134 @@ static int xenvif_tx_submit(struct xenvif *vif)
 
 		work_done++;
 
+		/* Set this flag right before netif_receive_skb, otherwise
+		 * someone might think this packet already left netback, and
+		 * do a skb_copy_ubufs while we are still in control of the
+		 * skb. E.g. the __pskb_pull_tail earlier can do such thing.
+		 */
+		if (skb_shinfo(skb)->destructor_arg) {
+			skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+			vif->tx_zerocopy_sent++;
+		}
+
 		netif_receive_skb(skb);
 	}
 
 	return work_done;
 }
 
+void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success)
+{
+	unsigned long flags;
+	pending_ring_idx_t index;
+	struct xenvif *vif = ubuf_to_vif(ubuf);
+
+	/* This is the only place where we grab this lock, to protect callbacks
+	 * from each other.
+	 */
+	spin_lock_irqsave(&vif->callback_lock, flags);
+	do {
+		u16 pending_idx = ubuf->desc;
+		ubuf = (struct ubuf_info *) ubuf->ctx;
+		BUG_ON(vif->dealloc_prod - vif->dealloc_cons >=
+			MAX_PENDING_REQS);
+		index = pending_index(vif->dealloc_prod);
+		vif->dealloc_ring[index] = pending_idx;
+		/* Sync with xenvif_tx_dealloc_action:
+		 * insert idx then incr producer.
+		 */
+		smp_wmb();
+		vif->dealloc_prod++;
+	} while (ubuf);
+	wake_up(&vif->dealloc_wq);
+	spin_unlock_irqrestore(&vif->callback_lock, flags);
+
+	if (RING_HAS_UNCONSUMED_REQUESTS(&vif->tx) &&
+	    xenvif_tx_pending_slots_available(vif)) {
+		local_bh_disable();
+		napi_schedule(&vif->napi);
+		local_bh_enable();
+	}
+
+	if (likely(zerocopy_success))
+		vif->tx_zerocopy_success++;
+	else
+		vif->tx_zerocopy_fail++;
+}
+
+static inline void xenvif_tx_dealloc_action(struct xenvif *vif)
+{
+	struct gnttab_unmap_grant_ref *gop;
+	pending_ring_idx_t dc, dp;
+	u16 pending_idx, pending_idx_release[MAX_PENDING_REQS];
+	unsigned int i = 0;
+
+	dc = vif->dealloc_cons;
+	gop = vif->tx_unmap_ops;
+
+	/* Free up any grants we have finished using */
+	do {
+		dp = vif->dealloc_prod;
+
+		/* Ensure we see all indices enqueued by all
+		 * xenvif_zerocopy_callback().
+		 */
+		smp_rmb();
+
+		while (dc != dp) {
+			BUG_ON(gop - vif->tx_unmap_ops > MAX_PENDING_REQS);
+			pending_idx =
+				vif->dealloc_ring[pending_index(dc++)];
+
+			pending_idx_release[gop-vif->tx_unmap_ops] =
+				pending_idx;
+			vif->pages_to_unmap[gop-vif->tx_unmap_ops] =
+				vif->mmap_pages[pending_idx];
+			gnttab_set_unmap_op(gop,
+					    idx_to_kaddr(vif, pending_idx),
+					    GNTMAP_host_map,
+					    vif->grant_tx_handle[pending_idx]);
+			/* Btw. already unmapped? */
+			xenvif_grant_handle_reset(vif, pending_idx);
+			++gop;
+		}
+
+	} while (dp != vif->dealloc_prod);
+
+	vif->dealloc_cons = dc;
+
+	if (gop - vif->tx_unmap_ops > 0) {
+		int ret;
+		ret = gnttab_unmap_refs(vif->tx_unmap_ops,
+					NULL,
+					vif->pages_to_unmap,
+					gop - vif->tx_unmap_ops);
+		if (ret) {
+			netdev_err(vif->dev, "Unmap fail: nr_ops %x ret %d\n",
+				   gop - vif->tx_unmap_ops, ret);
+			for (i = 0; i < gop - vif->tx_unmap_ops; ++i) {
+				if (gop[i].status != GNTST_okay)
+					netdev_err(vif->dev,
+						   " host_addr: %llx handle: %x status: %d\n",
+						   gop[i].host_addr,
+						   gop[i].handle,
+						   gop[i].status);
+			}
+			BUG();
+		}
+	}
+
+	for (i = 0; i < gop - vif->tx_unmap_ops; ++i)
+		xenvif_idx_release(vif, pending_idx_release[i],
+				   XEN_NETIF_RSP_OKAY);
+}
+
+
 /* Called after netfront has transmitted */
 int xenvif_tx_action(struct xenvif *vif, int budget)
 {
 	unsigned nr_gops;
-	int work_done;
+	int work_done, ret;
 
 	if (unlikely(!tx_work_todo(vif)))
 		return 0;
@@ -1372,7 +1611,11 @@ int xenvif_tx_action(struct xenvif *vif, int budget)
 	if (nr_gops == 0)
 		return 0;
 
-	gnttab_batch_copy(vif->tx_copy_ops, nr_gops);
+	ret = gnttab_map_refs(vif->tx_map_ops,
+			      NULL,
+			      vif->pages_to_map,
+			      nr_gops);
+	BUG_ON(ret);
 
 	work_done = xenvif_tx_submit(vif);
 
@@ -1383,45 +1626,18 @@ static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx,
 			       u8 status)
 {
 	struct pending_tx_info *pending_tx_info;
-	pending_ring_idx_t head;
-	u16 peek; /* peek into next tx request */
-
-	BUG_ON(vif->mmap_pages[pending_idx] == (void *)(~0UL));
-
-	/* Already complete? */
-	if (vif->mmap_pages[pending_idx] == NULL)
-		return;
+	pending_ring_idx_t index;
+	unsigned long flags;
 
 	pending_tx_info = &vif->pending_tx_info[pending_idx];
-
-	head = pending_tx_info->head;
-
-	BUG_ON(!pending_tx_is_head(vif, head));
-	BUG_ON(vif->pending_ring[pending_index(head)] != pending_idx);
-
-	do {
-		pending_ring_idx_t index;
-		pending_ring_idx_t idx = pending_index(head);
-		u16 info_idx = vif->pending_ring[idx];
-
-		pending_tx_info = &vif->pending_tx_info[info_idx];
-		make_tx_response(vif, &pending_tx_info->req, status);
-
-		/* Setting any number other than
-		 * INVALID_PENDING_RING_IDX indicates this slot is
-		 * starting a new packet / ending a previous packet.
-		 */
-		pending_tx_info->head = 0;
-
-		index = pending_index(vif->pending_prod++);
-		vif->pending_ring[index] = vif->pending_ring[info_idx];
-
-		peek = vif->pending_ring[pending_index(++head)];
-
-	} while (!pending_tx_is_head(vif, peek));
-
-	put_page(vif->mmap_pages[pending_idx]);
-	vif->mmap_pages[pending_idx] = NULL;
+	spin_lock_irqsave(&vif->response_lock, flags);
+	make_tx_response(vif, &pending_tx_info->req, status);
+	index = pending_index(vif->pending_prod);
+	vif->pending_ring[index] = pending_idx;
+	/* TX shouldn't use the index before we give it back here */
+	mb();
+	vif->pending_prod++;
+	spin_unlock_irqrestore(&vif->response_lock, flags);
 }
 
 
@@ -1469,23 +1685,74 @@ static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif,
 	return resp;
 }
 
+void xenvif_idx_unmap(struct xenvif *vif, u16 pending_idx)
+{
+	int ret;
+	struct gnttab_unmap_grant_ref tx_unmap_op;
+
+	gnttab_set_unmap_op(&tx_unmap_op,
+			    idx_to_kaddr(vif, pending_idx),
+			    GNTMAP_host_map,
+			    vif->grant_tx_handle[pending_idx]);
+	/* Btw. already unmapped? */
+	xenvif_grant_handle_reset(vif, pending_idx);
+
+	ret = gnttab_unmap_refs(&tx_unmap_op, NULL,
+				&vif->mmap_pages[pending_idx], 1);
+	BUG_ON(ret);
+
+	xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY);
+}
+
 static inline int rx_work_todo(struct xenvif *vif)
 {
-	return !skb_queue_empty(&vif->rx_queue) &&
-	       xenvif_rx_ring_slots_available(vif, vif->rx_last_skb_slots);
+	return (!skb_queue_empty(&vif->rx_queue) &&
+	       xenvif_rx_ring_slots_available(vif, vif->rx_last_skb_slots)) ||
+	       vif->rx_queue_purge;
 }
 
 static inline int tx_work_todo(struct xenvif *vif)
 {
 
 	if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif->tx)) &&
-	    (nr_pending_reqs(vif) + XEN_NETBK_LEGACY_SLOTS_MAX
-	     < MAX_PENDING_REQS))
+	    xenvif_tx_pending_slots_available(vif))
 		return 1;
 
 	return 0;
 }
 
+static void xenvif_dealloc_delay(unsigned long data)
+{
+	struct xenvif *vif = (struct xenvif *)data;
+
+	vif->dealloc_delay_timed_out = true;
+	wake_up(&vif->dealloc_wq);
+}
+
+static inline bool tx_dealloc_work_todo(struct xenvif *vif)
+{
+	if (vif->dealloc_cons != vif->dealloc_prod) {
+		if ((nr_free_slots(&vif->tx) > 2 * XEN_NETBK_LEGACY_SLOTS_MAX) &&
+		    (vif->dealloc_prod - vif->dealloc_cons < MAX_PENDING_REQS / 4) &&
+		    !vif->dealloc_delay_timed_out) {
+			if (!timer_pending(&vif->dealloc_delay)) {
+				vif->dealloc_delay.function =
+					xenvif_dealloc_delay;
+				vif->dealloc_delay.data = (unsigned long)vif;
+				mod_timer(&vif->dealloc_delay,
+					  jiffies + msecs_to_jiffies(1));
+
+			}
+			return false;
+		}
+		del_timer_sync(&vif->dealloc_delay);
+		vif->dealloc_delay_timed_out = false;
+		return true;
+	}
+
+	return false;
+}
+
 void xenvif_unmap_frontend_rings(struct xenvif *vif)
 {
 	if (vif->tx.sring)
@@ -1543,7 +1810,7 @@ static void xenvif_start_queue(struct xenvif *vif)
 		netif_wake_queue(vif->dev);
 }
 
-int xenvif_kthread(void *data)
+int xenvif_kthread_guest_rx(void *data)
 {
 	struct xenvif *vif = data;
 	struct sk_buff *skb;
@@ -1555,12 +1822,19 @@ int xenvif_kthread(void *data)
 		if (kthread_should_stop())
 			break;
 
+		if (vif->rx_queue_purge) {
+			skb_queue_purge(&vif->rx_queue);
+			vif->rx_queue_purge = false;
+		}
+
 		if (!skb_queue_empty(&vif->rx_queue))
 			xenvif_rx_action(vif);
 
 		if (skb_queue_empty(&vif->rx_queue) &&
-		    netif_queue_stopped(vif->dev))
+		    netif_queue_stopped(vif->dev)) {
+			del_timer_sync(&vif->wake_queue);
 			xenvif_start_queue(vif);
+		}
 
 		cond_resched();
 	}
@@ -1572,6 +1846,28 @@ int xenvif_kthread(void *data)
 	return 0;
 }
 
+int xenvif_dealloc_kthread(void *data)
+{
+	struct xenvif *vif = data;
+
+	while (!kthread_should_stop()) {
+		wait_event_interruptible(vif->dealloc_wq,
+					 tx_dealloc_work_todo(vif) ||
+					 kthread_should_stop());
+		if (kthread_should_stop())
+			break;
+
+		xenvif_tx_dealloc_action(vif);
+		cond_resched();
+	}
+
+	/* Unmap anything remaining*/
+	if (tx_dealloc_work_todo(vif))
+		xenvif_tx_dealloc_action(vif);
+
+	return 0;
+}
+
 static int __init netback_init(void)
 {
 	int rc = 0;
@@ -1589,6 +1885,8 @@ static int __init netback_init(void)
 	if (rc)
 		goto failed_init;
 
+	rx_drain_timeout_jiffies = msecs_to_jiffies(rx_drain_timeout_msecs);
+
 	return 0;
 
 failed_init:
diff --git a/net/ieee802154/6lowpan.h b/include/net/6lowpan.h
index 0dccf62434d5..f7d372b7d4ff 100644
--- a/net/ieee802154/6lowpan.h
+++ b/include/net/6lowpan.h
@@ -53,6 +53,8 @@
 #ifndef __6LOWPAN_H__
 #define __6LOWPAN_H__
 
+#include <net/ipv6.h>
+
 #define UIP_802154_SHORTADDR_LEN	2  /* compressed ipv6 address length */
 #define UIP_IPH_LEN			40 /* ipv6 fixed header size */
 #define UIP_PROTO_UDP			17 /* ipv6 next header value for UDP */
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index adb3ea04adaa..73492b91105a 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -27,7 +27,7 @@
 
 #include "6lowpan.h"
 
-#include "../ieee802154/6lowpan.h" /* for the compression support */
+#include <net/6lowpan.h> /* for the compression support */
 
 #define IFACE_NAME_TEMPLATE "bt%d"
 #define EUI64_ADDR_LEN 8
diff --git a/net/ieee802154/6lowpan_iphc.c b/net/ieee802154/6lowpan_iphc.c
index 860aa2d445ba..211b5686d719 100644
--- a/net/ieee802154/6lowpan_iphc.c
+++ b/net/ieee802154/6lowpan_iphc.c
@@ -54,11 +54,10 @@
 #include <linux/if_arp.h>
 #include <linux/module.h>
 #include <linux/netdevice.h>
+#include <net/6lowpan.h>
 #include <net/ipv6.h>
 #include <net/af_ieee802154.h>
 
-#include "6lowpan.h"
-
 /*
  * Uncompress address function for source and
  * destination address(non-multicast).
diff --git a/net/ieee802154/6lowpan_rtnl.c b/net/ieee802154/6lowpan_rtnl.c
index e4726180fc36..1bbab8952f77 100644
--- a/net/ieee802154/6lowpan_rtnl.c
+++ b/net/ieee802154/6lowpan_rtnl.c
@@ -52,10 +52,10 @@
 #include <net/af_ieee802154.h>
 #include <net/ieee802154.h>
 #include <net/ieee802154_netdev.h>
+#include <net/6lowpan.h>
 #include <net/ipv6.h>
 
 #include "reassembly.h"
-#include "6lowpan.h"
 
 static LIST_HEAD(lowpan_devices);
 
diff --git a/net/ieee802154/reassembly.c b/net/ieee802154/reassembly.c
index 4511fc22ef16..1cc2336eb52c 100644
--- a/net/ieee802154/reassembly.c
+++ b/net/ieee802154/reassembly.c
@@ -24,10 +24,10 @@
 #include <linux/export.h>
 
 #include <net/ieee802154_netdev.h>
+#include <net/6lowpan.h>
 #include <net/ipv6.h>
 #include <net/inet_frag.h>
 
-#include "6lowpan.h"
 #include "reassembly.h"
 
 static struct inet_frags lowpan_frags;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index a02c884d4321..bc0fb0fc7552 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -882,6 +882,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 			skb = skb_clone(skb, gfp_mask);
 		if (unlikely(!skb))
 			return -ENOBUFS;
+		/* Our usage of tstamp should remain private */
+		skb->tstamp.tv64 = 0;
 	}
 
 	inet = inet_sk(sk);
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index e5dc42f0e527..9958c31c2c54 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1108,6 +1108,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
 			  struct flowi *fl, size_t data_len)
 {
 	struct l2tp_tunnel *tunnel = session->tunnel;
+	struct sock *sk = tunnel->sock;
 	unsigned int len = skb->len;
 	int error;
 
@@ -1131,7 +1132,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
 	/* Queue the packet to IP for output */
 	skb->local_df = 1;
 #if IS_ENABLED(CONFIG_IPV6)
-	if (skb->sk->sk_family == PF_INET6 && !tunnel->v4mapped)
+	if (sk->sk_family == PF_INET6 && !tunnel->v4mapped)
 		error = inet6_csk_xmit(skb, NULL);
 	else
 #endif
@@ -1151,23 +1152,6 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
 	return 0;
 }
 
-/* Automatically called when the skb is freed.
- */
-static void l2tp_sock_wfree(struct sk_buff *skb)
-{
-	sock_put(skb->sk);
-}
-
-/* For data skbs that we transmit, we associate with the tunnel socket
- * but don't do accounting.
- */
-static inline void l2tp_skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
-{
-	sock_hold(sk);
-	skb->sk = sk;
-	skb->destructor = l2tp_sock_wfree;
-}
-
 #if IS_ENABLED(CONFIG_IPV6)
 static void l2tp_xmit_ipv6_csum(struct sock *sk, struct sk_buff *skb,
 				int udp_len)
@@ -1221,7 +1205,6 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
 		return NET_XMIT_DROP;
 	}
 
-	skb_orphan(skb);
 	/* Setup L2TP header */
 	session->build_header(session, __skb_push(skb, hdr_len));
 
@@ -1287,8 +1270,6 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
 		break;
 	}
 
-	l2tp_skb_set_owner_w(skb, sk);
-
 	l2tp_xmit_core(session, skb, fl, data_len);
 out_unlock:
 	bh_unlock_sock(sk);
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index d1c3429b69ed..ec126f91276b 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -20,9 +20,8 @@ af-rxrpc-y := \
 	ar-skbuff.o \
 	ar-transport.o
 
-ifeq ($(CONFIG_PROC_FS),y)
-af-rxrpc-y += ar-proc.o
-endif
+af-rxrpc-$(CONFIG_PROC_FS) += ar-proc.o
+af-rxrpc-$(CONFIG_SYSCTL) += sysctl.o
 
 obj-$(CONFIG_AF_RXRPC) += af-rxrpc.o
 
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index e61aa6001c65..7b1670489638 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -838,6 +838,12 @@ static int __init af_rxrpc_init(void)
 		goto error_key_type_s;
 	}
 
+	ret = rxrpc_sysctl_init();
+	if (ret < 0) {
+		printk(KERN_CRIT "RxRPC: Cannot register sysctls\n");
+		goto error_sysctls;
+	}
+
 #ifdef CONFIG_PROC_FS
 	proc_create("rxrpc_calls", 0, init_net.proc_net, &rxrpc_call_seq_fops);
 	proc_create("rxrpc_conns", 0, init_net.proc_net,
@@ -845,6 +851,8 @@ static int __init af_rxrpc_init(void)
 #endif
 	return 0;
 
+error_sysctls:
+	unregister_key_type(&key_type_rxrpc_s);
 error_key_type_s:
 	unregister_key_type(&key_type_rxrpc);
 error_key_type:
@@ -865,6 +873,7 @@ error_call_jar:
 static void __exit af_rxrpc_exit(void)
 {
 	_enter("");
+	rxrpc_sysctl_exit();
 	unregister_key_type(&key_type_rxrpc_s);
 	unregister_key_type(&key_type_rxrpc);
 	sock_unregister(PF_RXRPC);
diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c
index cd97a0ce48d8..c6be17a959a6 100644
--- a/net/rxrpc/ar-ack.c
+++ b/net/rxrpc/ar-ack.c
@@ -19,7 +19,49 @@
 #include <net/af_rxrpc.h>
 #include "ar-internal.h"
 
-static unsigned int rxrpc_ack_defer = 1;
+/*
+ * How long to wait before scheduling ACK generation after seeing a
+ * packet with RXRPC_REQUEST_ACK set (in jiffies).
+ */
+unsigned rxrpc_requested_ack_delay = 1;
+
+/*
+ * How long to wait before scheduling an ACK with subtype DELAY (in jiffies).
+ *
+ * We use this when we've received new data packets.  If those packets aren't
+ * all consumed within this time we will send a DELAY ACK if an ACK was not
+ * requested to let the sender know it doesn't need to resend.
+ */
+unsigned rxrpc_soft_ack_delay = 1 * HZ;
+
+/*
+ * How long to wait before scheduling an ACK with subtype IDLE (in jiffies).
+ *
+ * We use this when we've consumed some previously soft-ACK'd packets when
+ * further packets aren't immediately received to decide when to send an IDLE
+ * ACK let the other end know that it can free up its Tx buffer space.
+ */
+unsigned rxrpc_idle_ack_delay = 0.5 * HZ;
+
+/*
+ * Receive window size in packets.  This indicates the maximum number of
+ * unconsumed received packets we're willing to retain in memory.  Once this
+ * limit is hit, we should generate an EXCEEDS_WINDOW ACK and discard further
+ * packets.
+ */
+unsigned rxrpc_rx_window_size = 32;
+
+/*
+ * Maximum Rx MTU size.  This indicates to the sender the size of jumbo packet
+ * made by gluing normal packets together that we're willing to handle.
+ */
+unsigned rxrpc_rx_mtu = 5692;
+
+/*
+ * The maximum number of fragments in a received jumbo packet that we tell the
+ * sender that we're willing to handle.
+ */
+unsigned rxrpc_rx_jumbo_max = 4;
 
 static const char *rxrpc_acks(u8 reason)
 {
@@ -82,24 +124,23 @@ void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
 	switch (ack_reason) {
 	case RXRPC_ACK_DELAY:
 		_debug("run delay timer");
-		call->ack_timer.expires = jiffies + rxrpc_ack_timeout * HZ;
-		add_timer(&call->ack_timer);
-		return;
+		expiry = rxrpc_soft_ack_delay;
+		goto run_timer;
 
 	case RXRPC_ACK_IDLE:
 		if (!immediate) {
 			_debug("run defer timer");
-			expiry = 1;
+			expiry = rxrpc_idle_ack_delay;
 			goto run_timer;
 		}
 		goto cancel_timer;
 
 	case RXRPC_ACK_REQUESTED:
-		if (!rxrpc_ack_defer)
+		expiry = rxrpc_requested_ack_delay;
+		if (!expiry)
 			goto cancel_timer;
 		if (!immediate || serial == cpu_to_be32(1)) {
 			_debug("run defer timer");
-			expiry = rxrpc_ack_defer;
 			goto run_timer;
 		}
 
@@ -1174,11 +1215,11 @@ send_ACK:
 	mtu = call->conn->trans->peer->if_mtu;
 	mtu -= call->conn->trans->peer->hdrsize;
 	ackinfo.maxMTU	= htonl(mtu);
-	ackinfo.rwind	= htonl(32);
+	ackinfo.rwind	= htonl(rxrpc_rx_window_size);
 
 	/* permit the peer to send us jumbo packets if it wants to */
-	ackinfo.rxMTU	= htonl(5692);
-	ackinfo.jumbo_max = htonl(4);
+	ackinfo.rxMTU	= htonl(rxrpc_rx_mtu);
+	ackinfo.jumbo_max = htonl(rxrpc_rx_jumbo_max);
 
 	hdr.serial = htonl(atomic_inc_return(&call->conn->serial));
 	_proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
diff --git a/net/rxrpc/ar-call.c b/net/rxrpc/ar-call.c
index a3bbb360a3f9..a9e05db0f5d5 100644
--- a/net/rxrpc/ar-call.c
+++ b/net/rxrpc/ar-call.c
@@ -12,10 +12,22 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/circ_buf.h>
+#include <linux/hashtable.h>
+#include <linux/spinlock_types.h>
 #include <net/sock.h>
 #include <net/af_rxrpc.h>
 #include "ar-internal.h"
 
+/*
+ * Maximum lifetime of a call (in jiffies).
+ */
+unsigned rxrpc_max_call_lifetime = 60 * HZ;
+
+/*
+ * Time till dead call expires after last use (in jiffies).
+ */
+unsigned rxrpc_dead_call_expiry = 2 * HZ;
+
 const char *const rxrpc_call_states[] = {
 	[RXRPC_CALL_CLIENT_SEND_REQUEST]	= "ClSndReq",
 	[RXRPC_CALL_CLIENT_AWAIT_REPLY]		= "ClAwtRpl",
@@ -38,8 +50,6 @@ const char *const rxrpc_call_states[] = {
 struct kmem_cache *rxrpc_call_jar;
 LIST_HEAD(rxrpc_calls);
 DEFINE_RWLOCK(rxrpc_call_lock);
-static unsigned int rxrpc_call_max_lifetime = 60;
-static unsigned int rxrpc_dead_call_timeout = 2;
 
 static void rxrpc_destroy_call(struct work_struct *work);
 static void rxrpc_call_life_expired(unsigned long _call);
@@ -47,6 +57,145 @@ static void rxrpc_dead_call_expired(unsigned long _call);
 static void rxrpc_ack_time_expired(unsigned long _call);
 static void rxrpc_resend_time_expired(unsigned long _call);
 
+static DEFINE_SPINLOCK(rxrpc_call_hash_lock);
+static DEFINE_HASHTABLE(rxrpc_call_hash, 10);
+
+/*
+ * Hash function for rxrpc_call_hash
+ */
+static unsigned long rxrpc_call_hashfunc(
+	u8		clientflag,
+	__be32		cid,
+	__be32		call_id,
+	__be32		epoch,
+	__be16		service_id,
+	sa_family_t	proto,
+	void		*localptr,
+	unsigned int	addr_size,
+	const u8	*peer_addr)
+{
+	const u16 *p;
+	unsigned int i;
+	unsigned long key;
+	u32 hcid = ntohl(cid);
+
+	_enter("");
+
+	key = (unsigned long)localptr;
+	/* We just want to add up the __be32 values, so forcing the
+	 * cast should be okay.
+	 */
+	key += (__force u32)epoch;
+	key += (__force u16)service_id;
+	key += (__force u32)call_id;
+	key += (hcid & RXRPC_CIDMASK) >> RXRPC_CIDSHIFT;
+	key += hcid & RXRPC_CHANNELMASK;
+	key += clientflag;
+	key += proto;
+	/* Step through the peer address in 16-bit portions for speed */
+	for (i = 0, p = (const u16 *)peer_addr; i < addr_size >> 1; i++, p++)
+		key += *p;
+	_leave(" key = 0x%lx", key);
+	return key;
+}
+
+/*
+ * Add a call to the hashtable
+ */
+static void rxrpc_call_hash_add(struct rxrpc_call *call)
+{
+	unsigned long key;
+	unsigned int addr_size = 0;
+
+	_enter("");
+	switch (call->proto) {
+	case AF_INET:
+		addr_size = sizeof(call->peer_ip.ipv4_addr);
+		break;
+	case AF_INET6:
+		addr_size = sizeof(call->peer_ip.ipv6_addr);
+		break;
+	default:
+		break;
+	}
+	key = rxrpc_call_hashfunc(call->in_clientflag, call->cid,
+				  call->call_id, call->epoch,
+				  call->service_id, call->proto,
+				  call->conn->trans->local, addr_size,
+				  call->peer_ip.ipv6_addr);
+	/* Store the full key in the call */
+	call->hash_key = key;
+	spin_lock(&rxrpc_call_hash_lock);
+	hash_add_rcu(rxrpc_call_hash, &call->hash_node, key);
+	spin_unlock(&rxrpc_call_hash_lock);
+	_leave("");
+}
+
+/*
+ * Remove a call from the hashtable
+ */
+static void rxrpc_call_hash_del(struct rxrpc_call *call)
+{
+	_enter("");
+	spin_lock(&rxrpc_call_hash_lock);
+	hash_del_rcu(&call->hash_node);
+	spin_unlock(&rxrpc_call_hash_lock);
+	_leave("");
+}
+
+/*
+ * Find a call in the hashtable and return it, or NULL if it
+ * isn't there.
+ */
+struct rxrpc_call *rxrpc_find_call_hash(
+	u8		clientflag,
+	__be32		cid,
+	__be32		call_id,
+	__be32		epoch,
+	__be16		service_id,
+	void		*localptr,
+	sa_family_t	proto,
+	const u8	*peer_addr)
+{
+	unsigned long key;
+	unsigned int addr_size = 0;
+	struct rxrpc_call *call = NULL;
+	struct rxrpc_call *ret = NULL;
+
+	_enter("");
+	switch (proto) {
+	case AF_INET:
+		addr_size = sizeof(call->peer_ip.ipv4_addr);
+		break;
+	case AF_INET6:
+		addr_size = sizeof(call->peer_ip.ipv6_addr);
+		break;
+	default:
+		break;
+	}
+
+	key = rxrpc_call_hashfunc(clientflag, cid, call_id, epoch,
+				  service_id, proto, localptr, addr_size,
+				  peer_addr);
+	hash_for_each_possible_rcu(rxrpc_call_hash, call, hash_node, key) {
+		if (call->hash_key == key &&
+		    call->call_id == call_id &&
+		    call->cid == cid &&
+		    call->in_clientflag == clientflag &&
+		    call->service_id == service_id &&
+		    call->proto == proto &&
+		    call->local == localptr &&
+		    memcmp(call->peer_ip.ipv6_addr, peer_addr,
+			      addr_size) == 0 &&
+		    call->epoch == epoch) {
+			ret = call;
+			break;
+		}
+	}
+	_leave(" = %p", ret);
+	return ret;
+}
+
 /*
  * allocate a new call
  */
@@ -91,7 +240,7 @@ static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
 	call->rx_data_expect = 1;
 	call->rx_data_eaten = 0;
 	call->rx_first_oos = 0;
-	call->ackr_win_top = call->rx_data_eaten + 1 + RXRPC_MAXACKS;
+	call->ackr_win_top = call->rx_data_eaten + 1 + rxrpc_rx_window_size;
 	call->creation_jif = jiffies;
 	return call;
 }
@@ -128,11 +277,31 @@ static struct rxrpc_call *rxrpc_alloc_client_call(
 		return ERR_PTR(ret);
 	}
 
+	/* Record copies of information for hashtable lookup */
+	call->proto = rx->proto;
+	call->local = trans->local;
+	switch (call->proto) {
+	case AF_INET:
+		call->peer_ip.ipv4_addr =
+			trans->peer->srx.transport.sin.sin_addr.s_addr;
+		break;
+	case AF_INET6:
+		memcpy(call->peer_ip.ipv6_addr,
+		       trans->peer->srx.transport.sin6.sin6_addr.in6_u.u6_addr8,
+		       sizeof(call->peer_ip.ipv6_addr));
+		break;
+	}
+	call->epoch = call->conn->epoch;
+	call->service_id = call->conn->service_id;
+	call->in_clientflag = call->conn->in_clientflag;
+	/* Add the new call to the hashtable */
+	rxrpc_call_hash_add(call);
+
 	spin_lock(&call->conn->trans->peer->lock);
 	list_add(&call->error_link, &call->conn->trans->peer->error_targets);
 	spin_unlock(&call->conn->trans->peer->lock);
 
-	call->lifetimer.expires = jiffies + rxrpc_call_max_lifetime * HZ;
+	call->lifetimer.expires = jiffies + rxrpc_max_call_lifetime;
 	add_timer(&call->lifetimer);
 
 	_leave(" = %p", call);
@@ -320,9 +489,12 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
 		parent = *p;
 		call = rb_entry(parent, struct rxrpc_call, conn_node);
 
-		if (call_id < call->call_id)
+		/* The tree is sorted in order of the __be32 value without
+		 * turning it into host order.
+		 */
+		if ((__force u32)call_id < (__force u32)call->call_id)
 			p = &(*p)->rb_left;
-		else if (call_id > call->call_id)
+		else if ((__force u32)call_id > (__force u32)call->call_id)
 			p = &(*p)->rb_right;
 		else
 			goto old_call;
@@ -347,9 +519,31 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
 	list_add_tail(&call->link, &rxrpc_calls);
 	write_unlock_bh(&rxrpc_call_lock);
 
+	/* Record copies of information for hashtable lookup */
+	call->proto = rx->proto;
+	call->local = conn->trans->local;
+	switch (call->proto) {
+	case AF_INET:
+		call->peer_ip.ipv4_addr =
+			conn->trans->peer->srx.transport.sin.sin_addr.s_addr;
+		break;
+	case AF_INET6:
+		memcpy(call->peer_ip.ipv6_addr,
+		       conn->trans->peer->srx.transport.sin6.sin6_addr.in6_u.u6_addr8,
+		       sizeof(call->peer_ip.ipv6_addr));
+		break;
+	default:
+		break;
+	}
+	call->epoch = conn->epoch;
+	call->service_id = conn->service_id;
+	call->in_clientflag = conn->in_clientflag;
+	/* Add the new call to the hashtable */
+	rxrpc_call_hash_add(call);
+
 	_net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id);
 
-	call->lifetimer.expires = jiffies + rxrpc_call_max_lifetime * HZ;
+	call->lifetimer.expires = jiffies + rxrpc_max_call_lifetime;
 	add_timer(&call->lifetimer);
 	_leave(" = %p {%d} [new]", call, call->debug_id);
 	return call;
@@ -533,7 +727,7 @@ void rxrpc_release_call(struct rxrpc_call *call)
 	del_timer_sync(&call->resend_timer);
 	del_timer_sync(&call->ack_timer);
 	del_timer_sync(&call->lifetimer);
-	call->deadspan.expires = jiffies + rxrpc_dead_call_timeout * HZ;
+	call->deadspan.expires = jiffies + rxrpc_dead_call_expiry;
 	add_timer(&call->deadspan);
 
 	_leave("");
@@ -665,6 +859,9 @@ static void rxrpc_cleanup_call(struct rxrpc_call *call)
 		rxrpc_put_connection(call->conn);
 	}
 
+	/* Remove the call from the hash */
+	rxrpc_call_hash_del(call);
+
 	if (call->acks_window) {
 		_debug("kill Tx window %d",
 		       CIRC_CNT(call->acks_head, call->acks_tail,
diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c
index 7bf5b5b9e8b9..6631f4f1e39b 100644
--- a/net/rxrpc/ar-connection.c
+++ b/net/rxrpc/ar-connection.c
@@ -18,11 +18,15 @@
 #include <net/af_rxrpc.h>
 #include "ar-internal.h"
 
+/*
+ * Time till a connection expires after last use (in seconds).
+ */
+unsigned rxrpc_connection_expiry = 10 * 60;
+
 static void rxrpc_connection_reaper(struct work_struct *work);
 
 LIST_HEAD(rxrpc_connections);
 DEFINE_RWLOCK(rxrpc_connection_lock);
-static unsigned long rxrpc_connection_timeout = 10 * 60;
 static DECLARE_DELAYED_WORK(rxrpc_connection_reap, rxrpc_connection_reaper);
 
 /*
@@ -862,7 +866,7 @@ static void rxrpc_connection_reaper(struct work_struct *work)
 
 		spin_lock(&conn->trans->client_lock);
 		write_lock(&conn->trans->conn_lock);
-		reap_time = conn->put_time + rxrpc_connection_timeout;
+		reap_time = conn->put_time + rxrpc_connection_expiry;
 
 		if (atomic_read(&conn->usage) > 0) {
 			;
@@ -916,7 +920,7 @@ void __exit rxrpc_destroy_all_connections(void)
 {
 	_enter("");
 
-	rxrpc_connection_timeout = 0;
+	rxrpc_connection_expiry = 0;
 	cancel_delayed_work(&rxrpc_connection_reap);
 	rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
 
diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c
index a9206087b4d7..db57458c824c 100644
--- a/net/rxrpc/ar-error.c
+++ b/net/rxrpc/ar-error.c
@@ -83,6 +83,7 @@ void rxrpc_UDP_error_report(struct sock *sk)
 
 		if (mtu == 0) {
 			/* they didn't give us a size, estimate one */
+			mtu = peer->if_mtu;
 			if (mtu > 1500) {
 				mtu >>= 1;
 				if (mtu < 1500)
diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c
index 529572f18d1f..73742647c135 100644
--- a/net/rxrpc/ar-input.c
+++ b/net/rxrpc/ar-input.c
@@ -25,8 +25,6 @@
 #include <net/net_namespace.h>
 #include "ar-internal.h"
 
-unsigned long rxrpc_ack_timeout = 1;
-
 const char *rxrpc_pkts[] = {
 	"?00",
 	"DATA", "ACK", "BUSY", "ABORT", "ACKALL", "CHALL", "RESP", "DEBUG",
@@ -349,8 +347,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
 	 * it */
 	if (sp->hdr.flags & RXRPC_REQUEST_ACK) {
 		_proto("ACK Requested on %%%u", serial);
-		rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, sp->hdr.serial,
-				  !(sp->hdr.flags & RXRPC_MORE_PACKETS));
+		rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, sp->hdr.serial, false);
 	}
 
 	switch (sp->hdr.type) {
@@ -526,36 +523,38 @@ protocol_error:
  * post an incoming packet to the appropriate call/socket to deal with
  * - must get rid of the sk_buff, either by freeing it or by queuing it
  */
-static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn,
+static void rxrpc_post_packet_to_call(struct rxrpc_call *call,
 				      struct sk_buff *skb)
 {
 	struct rxrpc_skb_priv *sp;
-	struct rxrpc_call *call;
-	struct rb_node *p;
-	__be32 call_id;
-
-	_enter("%p,%p", conn, skb);
 
-	read_lock_bh(&conn->lock);
+	_enter("%p,%p", call, skb);
 
 	sp = rxrpc_skb(skb);
 
-	/* look at extant calls by channel number first */
-	call = conn->channels[ntohl(sp->hdr.cid) & RXRPC_CHANNELMASK];
-	if (!call || call->call_id != sp->hdr.callNumber)
-		goto call_not_extant;
-
 	_debug("extant call [%d]", call->state);
-	ASSERTCMP(call->conn, ==, conn);
 
 	read_lock(&call->state_lock);
 	switch (call->state) {
 	case RXRPC_CALL_LOCALLY_ABORTED:
-		if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events))
+		if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events)) {
 			rxrpc_queue_call(call);
+			goto free_unlock;
+		}
 	case RXRPC_CALL_REMOTELY_ABORTED:
 	case RXRPC_CALL_NETWORK_ERROR:
 	case RXRPC_CALL_DEAD:
+		goto dead_call;
+	case RXRPC_CALL_COMPLETE:
+	case RXRPC_CALL_CLIENT_FINAL_ACK:
+		/* complete server call */
+		if (call->conn->in_clientflag)
+			goto dead_call;
+		/* resend last packet of a completed call */
+		_debug("final ack again");
+		rxrpc_get_call(call);
+		set_bit(RXRPC_CALL_ACK_FINAL, &call->events);
+		rxrpc_queue_call(call);
 		goto free_unlock;
 	default:
 		break;
@@ -563,7 +562,6 @@ static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn,
 
 	read_unlock(&call->state_lock);
 	rxrpc_get_call(call);
-	read_unlock_bh(&conn->lock);
 
 	if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
 	    sp->hdr.flags & RXRPC_JUMBO_PACKET)
@@ -574,78 +572,16 @@ static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn,
 	rxrpc_put_call(call);
 	goto done;
 
-call_not_extant:
-	/* search the completed calls in case what we're dealing with is
-	 * there */
-	_debug("call not extant");
-
-	call_id = sp->hdr.callNumber;
-	p = conn->calls.rb_node;
-	while (p) {
-		call = rb_entry(p, struct rxrpc_call, conn_node);
-
-		if (call_id < call->call_id)
-			p = p->rb_left;
-		else if (call_id > call->call_id)
-			p = p->rb_right;
-		else
-			goto found_completed_call;
-	}
-
 dead_call:
-	/* it's a either a really old call that we no longer remember or its a
-	 * new incoming call */
-	read_unlock_bh(&conn->lock);
-
-	if (sp->hdr.flags & RXRPC_CLIENT_INITIATED &&
-	    sp->hdr.seq == cpu_to_be32(1)) {
-		_debug("incoming call");
-		skb_queue_tail(&conn->trans->local->accept_queue, skb);
-		rxrpc_queue_work(&conn->trans->local->acceptor);
-		goto done;
-	}
-
-	_debug("dead call");
-	skb->priority = RX_CALL_DEAD;
-	rxrpc_reject_packet(conn->trans->local, skb);
-	goto done;
-
-	/* resend last packet of a completed call
-	 * - client calls may have been aborted or ACK'd
-	 * - server calls may have been aborted
-	 */
-found_completed_call:
-	_debug("completed call");
-
-	if (atomic_read(&call->usage) == 0)
-		goto dead_call;
-
-	/* synchronise any state changes */
-	read_lock(&call->state_lock);
-	ASSERTIFCMP(call->state != RXRPC_CALL_CLIENT_FINAL_ACK,
-		    call->state, >=, RXRPC_CALL_COMPLETE);
-
-	if (call->state == RXRPC_CALL_LOCALLY_ABORTED ||
-	    call->state == RXRPC_CALL_REMOTELY_ABORTED ||
-	    call->state == RXRPC_CALL_DEAD) {
-		read_unlock(&call->state_lock);
-		goto dead_call;
-	}
-
-	if (call->conn->in_clientflag) {
-		read_unlock(&call->state_lock);
-		goto dead_call; /* complete server call */
+	if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) {
+		skb->priority = RX_CALL_DEAD;
+		rxrpc_reject_packet(call->conn->trans->local, skb);
+		goto unlock;
 	}
-
-	_debug("final ack again");
-	rxrpc_get_call(call);
-	set_bit(RXRPC_CALL_ACK_FINAL, &call->events);
-	rxrpc_queue_call(call);
-
 free_unlock:
-	read_unlock(&call->state_lock);
-	read_unlock_bh(&conn->lock);
 	rxrpc_free_skb(skb);
+unlock:
+	read_unlock(&call->state_lock);
 done:
 	_leave("");
 }
@@ -664,17 +600,42 @@ static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
 	rxrpc_queue_conn(conn);
 }
 
+static struct rxrpc_connection *rxrpc_conn_from_local(struct rxrpc_local *local,
+					       struct sk_buff *skb,
+					       struct rxrpc_skb_priv *sp)
+{
+	struct rxrpc_peer *peer;
+	struct rxrpc_transport *trans;
+	struct rxrpc_connection *conn;
+
+	peer = rxrpc_find_peer(local, ip_hdr(skb)->saddr,
+				udp_hdr(skb)->source);
+	if (IS_ERR(peer))
+		goto cant_find_conn;
+
+	trans = rxrpc_find_transport(local, peer);
+	rxrpc_put_peer(peer);
+	if (!trans)
+		goto cant_find_conn;
+
+	conn = rxrpc_find_connection(trans, &sp->hdr);
+	rxrpc_put_transport(trans);
+	if (!conn)
+		goto cant_find_conn;
+
+	return conn;
+cant_find_conn:
+	return NULL;
+}
+
 /*
  * handle data received on the local endpoint
  * - may be called in interrupt context
  */
 void rxrpc_data_ready(struct sock *sk, int count)
 {
-	struct rxrpc_connection *conn;
-	struct rxrpc_transport *trans;
 	struct rxrpc_skb_priv *sp;
 	struct rxrpc_local *local;
-	struct rxrpc_peer *peer;
 	struct sk_buff *skb;
 	int ret;
 
@@ -749,27 +710,34 @@ void rxrpc_data_ready(struct sock *sk, int count)
 	    (sp->hdr.callNumber == 0 || sp->hdr.seq == 0))
 		goto bad_message;
 
-	peer = rxrpc_find_peer(local, ip_hdr(skb)->saddr, udp_hdr(skb)->source);
-	if (IS_ERR(peer))
-		goto cant_route_call;
+	if (sp->hdr.callNumber == 0) {
+		/* This is a connection-level packet. These should be
+		 * fairly rare, so the extra overhead of looking them up the
+		 * old-fashioned way doesn't really hurt */
+		struct rxrpc_connection *conn;
 
-	trans = rxrpc_find_transport(local, peer);
-	rxrpc_put_peer(peer);
-	if (!trans)
-		goto cant_route_call;
+		conn = rxrpc_conn_from_local(local, skb, sp);
+		if (!conn)
+			goto cant_route_call;
 
-	conn = rxrpc_find_connection(trans, &sp->hdr);
-	rxrpc_put_transport(trans);
-	if (!conn)
-		goto cant_route_call;
-
-	_debug("CONN %p {%d}", conn, conn->debug_id);
-
-	if (sp->hdr.callNumber == 0)
+		_debug("CONN %p {%d}", conn, conn->debug_id);
 		rxrpc_post_packet_to_conn(conn, skb);
-	else
-		rxrpc_post_packet_to_call(conn, skb);
-	rxrpc_put_connection(conn);
+		rxrpc_put_connection(conn);
+	} else {
+		struct rxrpc_call *call;
+		u8 in_clientflag = 0;
+
+		if (sp->hdr.flags & RXRPC_CLIENT_INITIATED)
+			in_clientflag = RXRPC_CLIENT_INITIATED;
+		call = rxrpc_find_call_hash(in_clientflag, sp->hdr.cid,
+					    sp->hdr.callNumber, sp->hdr.epoch,
+					    sp->hdr.serviceId, local, AF_INET,
+					    (u8 *)&ip_hdr(skb)->saddr);
+		if (call)
+			rxrpc_post_packet_to_call(call, skb);
+		else
+			goto cant_route_call;
+	}
 	rxrpc_put_local(local);
 	return;
 
@@ -790,8 +758,10 @@ cant_route_call:
 		skb->priority = RX_CALL_DEAD;
 	}
 
-	_debug("reject");
-	rxrpc_reject_packet(local, skb);
+	if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) {
+		_debug("reject type %d",sp->hdr.type);
+		rxrpc_reject_packet(local, skb);
+	}
 	rxrpc_put_local(local);
 	_leave(" [no call]");
 	return;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 5f43675ee1df..c831d44b0841 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -396,9 +396,20 @@ struct rxrpc_call {
 #define RXRPC_ACKR_WINDOW_ASZ DIV_ROUND_UP(RXRPC_MAXACKS, BITS_PER_LONG)
 	unsigned long		ackr_window[RXRPC_ACKR_WINDOW_ASZ + 1];
 
+	struct hlist_node	hash_node;
+	unsigned long		hash_key;	/* Full hash key */
+	u8			in_clientflag;	/* Copy of conn->in_clientflag for hashing */
+	struct rxrpc_local	*local;		/* Local endpoint. Used for hashing. */
+	sa_family_t		proto;		/* Frame protocol */
 	/* the following should all be in net order */
 	__be32			cid;		/* connection ID + channel index  */
 	__be32			call_id;	/* call ID on connection  */
+	__be32			epoch;		/* epoch of this connection */
+	__be16			service_id;	/* service ID */
+	union {					/* Peer IP address for hashing */
+		__be32	ipv4_addr;
+		__u8	ipv6_addr[16];		/* Anticipates eventual IPv6 support */
+	} peer_ip;
 };
 
 /*
@@ -433,6 +444,13 @@ int rxrpc_reject_call(struct rxrpc_sock *);
 /*
  * ar-ack.c
  */
+extern unsigned rxrpc_requested_ack_delay;
+extern unsigned rxrpc_soft_ack_delay;
+extern unsigned rxrpc_idle_ack_delay;
+extern unsigned rxrpc_rx_window_size;
+extern unsigned rxrpc_rx_mtu;
+extern unsigned rxrpc_rx_jumbo_max;
+
 void __rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool);
 void rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool);
 void rxrpc_process_call(struct work_struct *);
@@ -440,10 +458,14 @@ void rxrpc_process_call(struct work_struct *);
 /*
  * ar-call.c
  */
+extern unsigned rxrpc_max_call_lifetime;
+extern unsigned rxrpc_dead_call_expiry;
 extern struct kmem_cache *rxrpc_call_jar;
 extern struct list_head rxrpc_calls;
 extern rwlock_t rxrpc_call_lock;
 
+struct rxrpc_call *rxrpc_find_call_hash(u8,  __be32, __be32, __be32,
+					__be16, void *, sa_family_t, const u8 *);
 struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *,
 					 struct rxrpc_transport *,
 					 struct rxrpc_conn_bundle *,
@@ -460,6 +482,7 @@ void __exit rxrpc_destroy_all_calls(void);
 /*
  * ar-connection.c
  */
+extern unsigned rxrpc_connection_expiry;
 extern struct list_head rxrpc_connections;
 extern rwlock_t rxrpc_connection_lock;
 
@@ -493,7 +516,6 @@ void rxrpc_UDP_error_handler(struct work_struct *);
 /*
  * ar-input.c
  */
-extern unsigned long rxrpc_ack_timeout;
 extern const char *rxrpc_pkts[];
 
 void rxrpc_data_ready(struct sock *, int);
@@ -504,6 +526,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *, struct sk_buff *);
  * ar-local.c
  */
 extern rwlock_t rxrpc_local_lock;
+
 struct rxrpc_local *rxrpc_lookup_local(struct sockaddr_rxrpc *);
 void rxrpc_put_local(struct rxrpc_local *);
 void __exit rxrpc_destroy_all_locals(void);
@@ -522,7 +545,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time_t,
 /*
  * ar-output.c
  */
-extern int rxrpc_resend_timeout;
+extern unsigned rxrpc_resend_timeout;
 
 int rxrpc_send_packet(struct rxrpc_transport *, struct sk_buff *);
 int rxrpc_client_sendmsg(struct kiocb *, struct rxrpc_sock *,
@@ -572,6 +595,8 @@ void rxrpc_packet_destructor(struct sk_buff *);
 /*
  * ar-transport.c
  */
+extern unsigned rxrpc_transport_expiry;
+
 struct rxrpc_transport *rxrpc_get_transport(struct rxrpc_local *,
 					    struct rxrpc_peer *, gfp_t);
 void rxrpc_put_transport(struct rxrpc_transport *);
@@ -580,6 +605,17 @@ struct rxrpc_transport *rxrpc_find_transport(struct rxrpc_local *,
 					     struct rxrpc_peer *);
 
 /*
+ * sysctl.c
+ */
+#ifdef CONFIG_SYSCTL
+extern int __init rxrpc_sysctl_init(void);
+extern void rxrpc_sysctl_exit(void);
+#else
+static inline int __init rxrpc_sysctl_init(void) { return 0; }
+static inline void rxrpc_sysctl_exit(void) {}
+#endif
+
+/*
  * debug tracing
  */
 extern unsigned int rxrpc_debug;
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
index d0e8f1c1898a..0b4b9a79f5ab 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/ar-output.c
@@ -18,7 +18,10 @@
 #include <net/af_rxrpc.h>
 #include "ar-internal.h"
 
-int rxrpc_resend_timeout = 4;
+/*
+ * Time till packet resend (in jiffies).
+ */
+unsigned rxrpc_resend_timeout = 4 * HZ;
 
 static int rxrpc_send_data(struct kiocb *iocb,
 			   struct rxrpc_sock *rx,
@@ -487,7 +490,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
 	       ntohl(sp->hdr.serial), ntohl(sp->hdr.seq));
 
 	sp->need_resend = false;
-	sp->resend_at = jiffies + rxrpc_resend_timeout * HZ;
+	sp->resend_at = jiffies + rxrpc_resend_timeout;
 	if (!test_and_set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags)) {
 		_debug("run timer");
 		call->resend_timer.expires = sp->resend_at;
@@ -666,6 +669,7 @@ static int rxrpc_send_data(struct kiocb *iocb,
 		/* add the packet to the send queue if it's now full */
 		if (sp->remain <= 0 || (segment == 0 && !more)) {
 			struct rxrpc_connection *conn = call->conn;
+			uint32_t seq;
 			size_t pad;
 
 			/* pad out if we're using security */
@@ -678,11 +682,12 @@ static int rxrpc_send_data(struct kiocb *iocb,
 					memset(skb_put(skb, pad), 0, pad);
 			}
 
+			seq = atomic_inc_return(&call->sequence);
+
 			sp->hdr.epoch = conn->epoch;
 			sp->hdr.cid = call->cid;
 			sp->hdr.callNumber = call->call_id;
-			sp->hdr.seq =
-				htonl(atomic_inc_return(&call->sequence));
+			sp->hdr.seq = htonl(seq);
 			sp->hdr.serial =
 				htonl(atomic_inc_return(&conn->serial));
 			sp->hdr.type = RXRPC_PACKET_TYPE_DATA;
@@ -697,6 +702,8 @@ static int rxrpc_send_data(struct kiocb *iocb,
 			else if (CIRC_SPACE(call->acks_head, call->acks_tail,
 					    call->acks_winsz) > 1)
 				sp->hdr.flags |= RXRPC_MORE_PACKETS;
+			if (more && seq & 1)
+				sp->hdr.flags |= RXRPC_REQUEST_ACK;
 
 			ret = rxrpc_secure_packet(
 				call, skb, skb->mark,
diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c
index 34b5490dde65..e9aaa65c0778 100644
--- a/net/rxrpc/ar-recvmsg.c
+++ b/net/rxrpc/ar-recvmsg.c
@@ -180,16 +180,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock,
 		if (copy > len - copied)
 			copy = len - copied;
 
-		if (skb->ip_summed == CHECKSUM_UNNECESSARY ||
-		    skb->ip_summed == CHECKSUM_PARTIAL) {
-			ret = skb_copy_datagram_iovec(skb, offset,
-						      msg->msg_iov, copy);
-		} else {
-			ret = skb_copy_and_csum_datagram_iovec(skb, offset,
-							       msg->msg_iov);
-			if (ret == -EINVAL)
-				goto csum_copy_error;
-		}
+		ret = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, copy);
 
 		if (ret < 0)
 			goto copy_error;
@@ -348,20 +339,6 @@ copy_error:
 	_leave(" = %d", ret);
 	return ret;
 
-csum_copy_error:
-	_debug("csum error");
-	release_sock(&rx->sk);
-	if (continue_call)
-		rxrpc_put_call(continue_call);
-	rxrpc_kill_skb(skb);
-	if (!(flags & MSG_PEEK)) {
-		if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
-			BUG();
-	}
-	skb_kill_datagram(&rx->sk, skb, flags);
-	rxrpc_put_call(call);
-	return -EAGAIN;
-
 wait_interrupted:
 	ret = sock_intr_errno(timeo);
 wait_error:
diff --git a/net/rxrpc/ar-skbuff.c b/net/rxrpc/ar-skbuff.c
index de755e04d29c..4cfab49e329d 100644
--- a/net/rxrpc/ar-skbuff.c
+++ b/net/rxrpc/ar-skbuff.c
@@ -83,9 +83,14 @@ static void rxrpc_hard_ACK_data(struct rxrpc_call *call,
 		rxrpc_request_final_ACK(call);
 	} else if (atomic_dec_and_test(&call->ackr_not_idle) &&
 		   test_and_clear_bit(RXRPC_CALL_TX_SOFT_ACK, &call->flags)) {
+		/* We previously soft-ACK'd some received packets that have now
+		 * been consumed, so send a hard-ACK if no more packets are
+		 * immediately forthcoming to allow the transmitter to free up
+		 * its Tx bufferage.
+		 */
 		_debug("send Rx idle ACK");
 		__rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, sp->hdr.serial,
-				    true);
+				    false);
 	}
 
 	spin_unlock_bh(&call->lock);
diff --git a/net/rxrpc/ar-transport.c b/net/rxrpc/ar-transport.c
index 92df566930b9..1976dec84f29 100644
--- a/net/rxrpc/ar-transport.c
+++ b/net/rxrpc/ar-transport.c
@@ -17,11 +17,15 @@
 #include <net/af_rxrpc.h>
 #include "ar-internal.h"
 
+/*
+ * Time after last use at which transport record is cleaned up.
+ */
+unsigned rxrpc_transport_expiry = 3600 * 24;
+
 static void rxrpc_transport_reaper(struct work_struct *work);
 
 static LIST_HEAD(rxrpc_transports);
 static DEFINE_RWLOCK(rxrpc_transport_lock);
-static unsigned long rxrpc_transport_timeout = 3600 * 24;
 static DECLARE_DELAYED_WORK(rxrpc_transport_reap, rxrpc_transport_reaper);
 
 /*
@@ -235,7 +239,7 @@ static void rxrpc_transport_reaper(struct work_struct *work)
 		if (likely(atomic_read(&trans->usage) > 0))
 			continue;
 
-		reap_time = trans->put_time + rxrpc_transport_timeout;
+		reap_time = trans->put_time + rxrpc_transport_expiry;
 		if (reap_time <= now)
 			list_move_tail(&trans->link, &graveyard);
 		else if (reap_time < earliest)
@@ -271,7 +275,7 @@ void __exit rxrpc_destroy_all_transports(void)
 {
 	_enter("");
 
-	rxrpc_transport_timeout = 0;
+	rxrpc_transport_expiry = 0;
 	cancel_delayed_work(&rxrpc_transport_reap);
 	rxrpc_queue_delayed_work(&rxrpc_transport_reap, 0);
 
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
new file mode 100644
index 000000000000..50a98a910eb1
--- /dev/null
+++ b/net/rxrpc/sysctl.c
@@ -0,0 +1,146 @@
+/* sysctls for configuring RxRPC operating parameters
+ *
+ * Copyright (C) 2014 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells ([email protected])
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/sysctl.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+static struct ctl_table_header *rxrpc_sysctl_reg_table;
+static const unsigned zero = 0;
+static const unsigned one = 1;
+static const unsigned four = 4;
+static const unsigned n_65535 = 65535;
+static const unsigned n_max_acks = RXRPC_MAXACKS;
+
+/*
+ * RxRPC operating parameters.
+ *
+ * See Documentation/networking/rxrpc.txt and the variable definitions for more
+ * information on the individual parameters.
+ */
+static struct ctl_table rxrpc_sysctl_table[] = {
+	/* Values measured in milliseconds */
+	{
+		.procname	= "req_ack_delay",
+		.data		= &rxrpc_requested_ack_delay,
+		.maxlen		= sizeof(unsigned),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_ms_jiffies,
+		.extra1		= (void *)&zero,
+	},
+	{
+		.procname	= "soft_ack_delay",
+		.data		= &rxrpc_soft_ack_delay,
+		.maxlen		= sizeof(unsigned),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_ms_jiffies,
+		.extra1		= (void *)&one,
+	},
+	{
+		.procname	= "idle_ack_delay",
+		.data		= &rxrpc_idle_ack_delay,
+		.maxlen		= sizeof(unsigned),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_ms_jiffies,
+		.extra1		= (void *)&one,
+	},
+	{
+		.procname	= "resend_timeout",
+		.data		= &rxrpc_resend_timeout,
+		.maxlen		= sizeof(unsigned),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_ms_jiffies,
+		.extra1		= (void *)&one,
+	},
+
+	/* Values measured in seconds but used in jiffies */
+	{
+		.procname	= "max_call_lifetime",
+		.data		= &rxrpc_max_call_lifetime,
+		.maxlen		= sizeof(unsigned),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+		.extra1		= (void *)&one,
+	},
+	{
+		.procname	= "dead_call_expiry",
+		.data		= &rxrpc_dead_call_expiry,
+		.maxlen		= sizeof(unsigned),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+		.extra1		= (void *)&one,
+	},
+
+	/* Values measured in seconds */
+	{
+		.procname	= "connection_expiry",
+		.data		= &rxrpc_connection_expiry,
+		.maxlen		= sizeof(unsigned),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= (void *)&one,
+	},
+	{
+		.procname	= "transport_expiry",
+		.data		= &rxrpc_transport_expiry,
+		.maxlen		= sizeof(unsigned),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= (void *)&one,
+	},
+
+	/* Non-time values */
+	{
+		.procname	= "rx_window_size",
+		.data		= &rxrpc_rx_window_size,
+		.maxlen		= sizeof(unsigned),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= (void *)&one,
+		.extra2		= (void *)&n_max_acks,
+	},
+	{
+		.procname	= "rx_mtu",
+		.data		= &rxrpc_rx_mtu,
+		.maxlen		= sizeof(unsigned),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= (void *)&one,
+		.extra1		= (void *)&n_65535,
+	},
+	{
+		.procname	= "rx_jumbo_max",
+		.data		= &rxrpc_rx_jumbo_max,
+		.maxlen		= sizeof(unsigned),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= (void *)&one,
+		.extra2		= (void *)&four,
+	},
+
+	{ }
+};
+
+int __init rxrpc_sysctl_init(void)
+{
+	rxrpc_sysctl_reg_table = register_net_sysctl(&init_net, "net/rxrpc",
+						     rxrpc_sysctl_table);
+	if (!rxrpc_sysctl_reg_table)
+		return -ENOMEM;
+	return 0;
+}
+
+void rxrpc_sysctl_exit(void)
+{
+	if (rxrpc_sysctl_reg_table)
+		unregister_net_sysctl_table(rxrpc_sysctl_reg_table);
+}
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 722e137df244..9f949abcacef 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1062,12 +1062,13 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
 
 static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
-	spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
 	struct htb_sched *q = qdisc_priv(sch);
 	struct nlattr *nest;
 	struct tc_htb_glob gopt;
 
-	spin_lock_bh(root_lock);
+	/* Its safe to not acquire qdisc lock. As we hold RTNL,
+	 * no change can happen on the qdisc parameters.
+	 */
 
 	gopt.direct_pkts = q->direct_pkts;
 	gopt.version = HTB_VER;
@@ -1081,13 +1082,10 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
 	if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
 	    nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
 		goto nla_put_failure;
-	nla_nest_end(skb, nest);
 
-	spin_unlock_bh(root_lock);
-	return skb->len;
+	return nla_nest_end(skb, nest);
 
 nla_put_failure:
-	spin_unlock_bh(root_lock);
 	nla_nest_cancel(skb, nest);
 	return -1;
 }
@@ -1096,11 +1094,12 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
 			  struct sk_buff *skb, struct tcmsg *tcm)
 {
 	struct htb_class *cl = (struct htb_class *)arg;
-	spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
 	struct nlattr *nest;
 	struct tc_htb_opt opt;
 
-	spin_lock_bh(root_lock);
+	/* Its safe to not acquire qdisc lock. As we hold RTNL,
+	 * no change can happen on the class parameters.
+	 */
 	tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
 	tcm->tcm_handle = cl->common.classid;
 	if (!cl->level && cl->un.leaf.q)
@@ -1128,12 +1127,9 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
 	    nla_put_u64(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps))
 		goto nla_put_failure;
 
-	nla_nest_end(skb, nest);
-	spin_unlock_bh(root_lock);
-	return skb->len;
+	return nla_nest_end(skb, nest);
 
 nla_put_failure:
-	spin_unlock_bh(root_lock);
 	nla_nest_cancel(skb, nest);
 	return -1;
 }