aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/rxrpc.txt81
-rw-r--r--drivers/net/bonding/bond_options.c104
-rw-r--r--drivers/net/bonding/bond_options.h5
-rw-r--r--drivers/net/bonding/bond_procfs.c2
-rw-r--r--drivers/net/ethernet/emulex/benet/be_ethtool.c16
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e.h37
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_common.c366
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_dcb.c9
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_debugfs.c27
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_ethtool.c429
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c70
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_nvm.c117
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_prototype.h7
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_txrx.c310
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c2
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_common.c366
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_prototype.h7
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40e_txrx.c33
-rw-r--r--drivers/net/ethernet/intel/i40evf/i40evf_main.c7
-rw-r--r--drivers/net/ethernet/mellanox/mlx4/en_main.c2
-rw-r--r--drivers/net/usb/r8152.c323
-rw-r--r--drivers/net/xen-netback/common.h113
-rw-r--r--drivers/net/xen-netback/interface.c128
-rw-r--r--drivers/net/xen-netback/netback.c788
-rw-r--r--include/net/6lowpan.h (renamed from net/ieee802154/6lowpan.h)2
-rw-r--r--net/bluetooth/6lowpan.c2
-rw-r--r--net/ieee802154/6lowpan_iphc.c3
-rw-r--r--net/ieee802154/6lowpan_rtnl.c2
-rw-r--r--net/ieee802154/reassembly.c2
-rw-r--r--net/ipv4/tcp_output.c2
-rw-r--r--net/l2tp/l2tp_core.c23
-rw-r--r--net/rxrpc/Makefile5
-rw-r--r--net/rxrpc/af_rxrpc.c9
-rw-r--r--net/rxrpc/ar-ack.c61
-rw-r--r--net/rxrpc/ar-call.c213
-rw-r--r--net/rxrpc/ar-connection.c10
-rw-r--r--net/rxrpc/ar-error.c1
-rw-r--r--net/rxrpc/ar-input.c190
-rw-r--r--net/rxrpc/ar-internal.h40
-rw-r--r--net/rxrpc/ar-output.c15
-rw-r--r--net/rxrpc/ar-recvmsg.c25
-rw-r--r--net/rxrpc/ar-skbuff.c7
-rw-r--r--net/rxrpc/ar-transport.c10
-rw-r--r--net/rxrpc/sysctl.c146
-rw-r--r--net/sched/sch_htb.c20
45 files changed, 3164 insertions, 973 deletions
diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt
index b89bc82eed46..16a924c486bf 100644
--- a/Documentation/networking/rxrpc.txt
+++ b/Documentation/networking/rxrpc.txt
@@ -27,6 +27,8 @@ Contents of this document:
(*) AF_RXRPC kernel interface.
+ (*) Configurable parameters.
+
========
OVERVIEW
@@ -864,3 +866,82 @@ The kernel interface functions are as follows:
This is used to allocate a null RxRPC key that can be used to indicate
anonymous security for a particular domain.
+
+
+=======================
+CONFIGURABLE PARAMETERS
+=======================
+
+The RxRPC protocol driver has a number of configurable parameters that can be
+adjusted through sysctls in /proc/net/rxrpc/:
+
+ (*) req_ack_delay
+
+ The amount of time in milliseconds after receiving a packet with the
+ request-ack flag set before we honour the flag and actually send the
+ requested ack.
+
+ Usually the other side won't stop sending packets until the advertised
+ reception window is full (to a maximum of 255 packets), so delaying the
+ ACK permits several packets to be ACK'd in one go.
+
+ (*) soft_ack_delay
+
+ The amount of time in milliseconds after receiving a new packet before we
+ generate a soft-ACK to tell the sender that it doesn't need to resend.
+
+ (*) idle_ack_delay
+
+ The amount of time in milliseconds after all the packets currently in the
+ received queue have been consumed before we generate a hard-ACK to tell
+ the sender it can free its buffers, assuming no other reason occurs that
+ we would send an ACK.
+
+ (*) resend_timeout
+
+ The amount of time in milliseconds after transmitting a packet before we
+ transmit it again, assuming no ACK is received from the receiver telling
+ us they got it.
+
+ (*) max_call_lifetime
+
+ The maximum amount of time in seconds that a call may be in progress
+ before we preemptively kill it.
+
+ (*) dead_call_expiry
+
+ The amount of time in seconds before we remove a dead call from the call
+ list. Dead calls are kept around for a little while for the purpose of
+ repeating ACK and ABORT packets.
+
+ (*) connection_expiry
+
+ The amount of time in seconds after a connection was last used before we
+ remove it from the connection list. Whilst a connection is in existence,
+ it serves as a placeholder for negotiated security; when it is deleted,
+ the security must be renegotiated.
+
+ (*) transport_expiry
+
+ The amount of time in seconds after a transport was last used before we
+ remove it from the transport list. Whilst a transport is in existence, it
+ serves to anchor the peer data and keeps the connection ID counter.
+
+ (*) rxrpc_rx_window_size
+
+ The size of the receive window in packets. This is the maximum number of
+ unconsumed received packets we're willing to hold in memory for any
+ particular call.
+
+ (*) rxrpc_rx_mtu
+
+ The maximum packet MTU size that we're willing to receive in bytes. This
+ indicates to the peer whether we're willing to accept jumbo packets.
+
+ (*) rxrpc_rx_jumbo_max
+
+ The maximum number of packets that we're willing to accept in a jumbo
+ packet. Non-terminal packets in a jumbo packet must contain a four byte
+ header plus exactly 1412 bytes of data. The terminal packet must contain
+ a four byte header plus any amount of data. In any event, a jumbo packet
+ may not exceed rxrpc_rx_mtu in size.
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index fc6d25e7d053..22800bde9752 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -21,55 +21,55 @@
#include "bonding.h"
static int bond_option_active_slave_set(struct bonding *bond,
- struct bond_opt_value *newval);
+ const struct bond_opt_value *newval);
static int bond_option_miimon_set(struct bonding *bond,
- struct bond_opt_value *newval);
+ const struct bond_opt_value *newval);
static int bond_option_updelay_set(struct bonding *bond,
- struct bond_opt_value *newval);
+ const struct bond_opt_value *newval);
static int bond_option_downdelay_set(struct bonding *bond,
- struct bond_opt_value *newval);
+ const struct bond_opt_value *newval);
static int bond_option_use_carrier_set(struct bonding *bond,
- struct bond_opt_value *newval);
+ const struct bond_opt_value *newval);
static int bond_option_arp_interval_set(struct bonding *bond,
- struct bond_opt_value *newval);
+ const struct bond_opt_value *newval);
static int bond_option_arp_ip_target_add(struct bonding *bond, __be32 target);
static int bond_option_arp_ip_target_rem(struct bonding *bond, __be32 target);
static int bond_option_arp_ip_targets_set(struct bonding *bond,
- struct bond_opt_value *newval);
+ const struct bond_opt_value *newval);
static int bond_option_arp_validate_set(struct bonding *bond,
- struct bond_opt_value *newval);
+ const struct bond_opt_value *newval);
static int bond_option_arp_all_targets_set(struct bonding *bond,
- struct bond_opt_value *newval);
+ const struct bond_opt_value *newval);
static int bond_option_primary_set(struct bonding *bond,
- struct bond_opt_value *newval);
+ const struct bond_opt_value *newval);
static int bond_option_primary_reselect_set(struct bonding *bond,
- struct bond_opt_value *newval);
+ const struct bond_opt_value *newval);
static int bond_option_fail_over_mac_set(struct bonding *bond,
- struct bond_opt_value *newval);
+ const struct bond_opt_value *newval);
static int bond_option_xmit_hash_policy_set(struct bonding *bond,
- struct bond_opt_value *newval);
+ const struct bond_opt_value *newval);
static int bond_option_resend_igmp_set(struct bonding *bond,
- struct bond_opt_value *newval);
+ const struct bond_opt_value *newval);
static int bond_option_num_peer_notif_set(struct bonding *bond,
- struct bond_opt_value *newval);
+ const struct bond_opt_value *newval);
static int bond_option_all_slaves_active_set(struct bonding *bond,
- struct bond_opt_value *newval);
+ const struct bond_opt_value *newval);
static int bond_option_min_links_set(struct bonding *bond,
- struct bond_opt_value *newval);
+ const struct bond_opt_value *newval);
static int bond_option_lp_interval_set(struct bonding *bond,
- struct bond_opt_value *newval);
+ const struct bond_opt_value *newval);
static int bond_option_pps_set(struct bonding *bond,
- struct bond_opt_value *newval);
+ const struct bond_opt_value *newval);
static int bond_option_lacp_rate_set(struct bonding *bond,
- struct bond_opt_value *newval);
+ const struct bond_opt_value *newval);
static int bond_option_ad_select_set(struct bonding *bond,
- struct bond_opt_value *newval);
+ const struct bond_opt_value *newval);
static int bond_option_queue_id_set(struct bonding *bond,
- struct bond_opt_value *newval);
+ const struct bond_opt_value *newval);
static int bond_option_mode_set(struct bonding *bond,
- struct bond_opt_value *newval);
+ const struct bond_opt_value *newval);
static int bond_option_slaves_set(struct bonding *bond,
- struct bond_opt_value *newval);
+ const struct bond_opt_value *newval);
static const struct bond_opt_value bond_mode_tbl[] = {
@@ -504,7 +504,7 @@ static int bond_opt_check_deps(struct bonding *bond,
static void bond_opt_dep_print(struct bonding *bond,
const struct bond_option *opt)
{
- struct bond_opt_value *modeval;
+ const struct bond_opt_value *modeval;
struct bond_params *params;
params = &bond->params;
@@ -517,9 +517,9 @@ static void bond_opt_dep_print(struct bonding *bond,
static void bond_opt_error_interpret(struct bonding *bond,
const struct bond_option *opt,
- int error, struct bond_opt_value *val)
+ int error, const struct bond_opt_value *val)
{
- struct bond_opt_value *minval, *maxval;
+ const struct bond_opt_value *minval, *maxval;
char *p;
switch (error) {
@@ -574,7 +574,7 @@ static void bond_opt_error_interpret(struct bonding *bond,
int __bond_opt_set(struct bonding *bond,
unsigned int option, struct bond_opt_value *val)
{
- struct bond_opt_value *retval = NULL;
+ const struct bond_opt_value *retval = NULL;
const struct bond_option *opt;
int ret = -ENOENT;
@@ -637,7 +637,7 @@ const struct bond_option *bond_opt_get(unsigned int option)
return &bond_opts[option];
}
-int bond_option_mode_set(struct bonding *bond, struct bond_opt_value *newval)
+int bond_option_mode_set(struct bonding *bond, const struct bond_opt_value *newval)
{
if (BOND_NO_USES_ARP(newval->value) && bond->params.arp_interval) {
pr_info("%s: %s mode is incompatible with arp monitoring, start mii monitoring\n",
@@ -676,7 +676,7 @@ struct net_device *bond_option_active_slave_get(struct bonding *bond)
}
static int bond_option_active_slave_set(struct bonding *bond,
- struct bond_opt_value *newval)
+ const struct bond_opt_value *newval)
{
char ifname[IFNAMSIZ] = { 0, };
struct net_device *slave_dev;
@@ -745,7 +745,7 @@ static int bond_option_active_slave_set(struct bonding *bond,
}
static int bond_option_miimon_set(struct bonding *bond,
- struct bond_opt_value *newval)
+ const struct bond_opt_value *newval)
{
pr_info("%s: Setting MII monitoring interval to %llu\n",
bond->dev->name, newval->value);
@@ -783,7 +783,7 @@ static int bond_option_miimon_set(struct bonding *bond,
}
static int bond_option_updelay_set(struct bonding *bond,
- struct bond_opt_value *newval)
+ const struct bond_opt_value *newval)
{
int value = newval->value;
@@ -807,7 +807,7 @@ static int bond_option_updelay_set(struct bonding *bond,
}
static int bond_option_downdelay_set(struct bonding *bond,
- struct bond_opt_value *newval)
+ const struct bond_opt_value *newval)
{
int value = newval->value;
@@ -831,7 +831,7 @@ static int bond_option_downdelay_set(struct bonding *bond,
}
static int bond_option_use_carrier_set(struct bonding *bond,
- struct bond_opt_value *newval)
+ const struct bond_opt_value *newval)
{
pr_info("%s: Setting use_carrier to %llu\n",
bond->dev->name, newval->value);
@@ -841,7 +841,7 @@ static int bond_option_use_carrier_set(struct bonding *bond,
}
static int bond_option_arp_interval_set(struct bonding *bond,
- struct bond_opt_value *newval)
+ const struct bond_opt_value *newval)
{
pr_info("%s: Setting ARP monitoring interval to %llu\n",
bond->dev->name, newval->value);
@@ -991,7 +991,7 @@ void bond_option_arp_ip_targets_clear(struct bonding *bond)
}
static int bond_option_arp_ip_targets_set(struct bonding *bond,
- struct bond_opt_value *newval)
+ const struct bond_opt_value *newval)
{
int ret = -EPERM;
__be32 target;
@@ -1018,7 +1018,7 @@ static int bond_option_arp_ip_targets_set(struct bonding *bond,
}
static int bond_option_arp_validate_set(struct bonding *bond,
- struct bond_opt_value *newval)
+ const struct bond_opt_value *newval)
{
pr_info("%s: Setting arp_validate to %s (%llu)\n",
bond->dev->name, newval->string, newval->value);
@@ -1035,7 +1035,7 @@ static int bond_option_arp_validate_set(struct bonding *bond,
}
static int bond_option_arp_all_targets_set(struct bonding *bond,
- struct bond_opt_value *newval)
+ const struct bond_opt_value *newval)
{
pr_info("%s: Setting arp_all_targets to %s (%llu)\n",
bond->dev->name, newval->string, newval->value);
@@ -1045,7 +1045,7 @@ static int bond_option_arp_all_targets_set(struct bonding *bond,
}
static int bond_option_primary_set(struct bonding *bond,
- struct bond_opt_value *newval)
+ const struct bond_opt_value *newval)
{
char *p, *primary = newval->string;
struct list_head *iter;
@@ -1098,7 +1098,7 @@ out:
}
static int bond_option_primary_reselect_set(struct bonding *bond,
- struct bond_opt_value *newval)
+ const struct bond_opt_value *newval)
{
pr_info("%s: Setting primary_reselect to %s (%llu)\n",
bond->dev->name, newval->string, newval->value);
@@ -1114,7 +1114,7 @@ static int bond_option_primary_reselect_set(struct bonding *bond,
}
static int bond_option_fail_over_mac_set(struct bonding *bond,
- struct bond_opt_value *newval)
+ const struct bond_opt_value *newval)
{
pr_info("%s: Setting fail_over_mac to %s (%llu)\n",
bond->dev->name, newval->string, newval->value);
@@ -1124,7 +1124,7 @@ static int bond_option_fail_over_mac_set(struct bonding *bond,
}
static int bond_option_xmit_hash_policy_set(struct bonding *bond,
- struct bond_opt_value *newval)
+ const struct bond_opt_value *newval)
{
pr_info("%s: Setting xmit hash policy to %s (%llu)\n",
bond->dev->name, newval->string, newval->value);
@@ -1134,7 +1134,7 @@ static int bond_option_xmit_hash_policy_set(struct bonding *bond,
}
static int bond_option_resend_igmp_set(struct bonding *bond,
- struct bond_opt_value *newval)
+ const struct bond_opt_value *newval)
{
pr_info("%s: Setting resend_igmp to %llu\n",
bond->dev->name, newval->value);
@@ -1144,7 +1144,7 @@ static int bond_option_resend_igmp_set(struct bonding *bond,
}
static int bond_option_num_peer_notif_set(struct bonding *bond,
- struct bond_opt_value *newval)
+ const struct bond_opt_value *newval)
{
bond->params.num_peer_notif = newval->value;
@@ -1152,7 +1152,7 @@ static int bond_option_num_peer_notif_set(struct bonding *bond,
}
static int bond_option_all_slaves_active_set(struct bonding *bond,
- struct bond_opt_value *newval)
+ const struct bond_opt_value *newval)
{
struct list_head *iter;
struct slave *slave;
@@ -1173,7 +1173,7 @@ static int bond_option_all_slaves_active_set(struct bonding *bond,
}
static int bond_option_min_links_set(struct bonding *bond,
- struct bond_opt_value *newval)
+ const struct bond_opt_value *newval)
{
pr_info("%s: Setting min links value to %llu\n",
bond->dev->name, newval->value);
@@ -1183,7 +1183,7 @@ static int bond_option_min_links_set(struct bonding *bond,
}
static int bond_option_lp_interval_set(struct bonding *bond,
- struct bond_opt_value *newval)
+ const struct bond_opt_value *newval)
{
bond->params.lp_interval = newval->value;
@@ -1191,7 +1191,7 @@ static int bond_option_lp_interval_set(struct bonding *bond,
}
static int bond_option_pps_set(struct bonding *bond,
- struct bond_opt_value *newval)
+ const struct bond_opt_value *newval)
{
bond->params.packets_per_slave = newval->value;
if (newval->value > 0) {
@@ -1209,7 +1209,7 @@ static int bond_option_pps_set(struct bonding *bond,
}
static int bond_option_lacp_rate_set(struct bonding *bond,
- struct bond_opt_value *newval)
+ const struct bond_opt_value *newval)
{
pr_info("%s: Setting LACP rate to %s (%llu)\n",
bond->dev->name, newval->string, newval->value);
@@ -1220,7 +1220,7 @@ static int bond_option_lacp_rate_set(struct bonding *bond,
}
static int bond_option_ad_select_set(struct bonding *bond,
- struct bond_opt_value *newval)
+ const struct bond_opt_value *newval)
{
pr_info("%s: Setting ad_select to %s (%llu)\n",
bond->dev->name, newval->string, newval->value);
@@ -1230,7 +1230,7 @@ static int bond_option_ad_select_set(struct bonding *bond,
}
static int bond_option_queue_id_set(struct bonding *bond,
- struct bond_opt_value *newval)
+ const struct bond_opt_value *newval)
{
struct slave *slave, *update_slave;
struct net_device *sdev;
@@ -1291,7 +1291,7 @@ err_no_cmd:
}
static int bond_option_slaves_set(struct bonding *bond,
- struct bond_opt_value *newval)
+ const struct bond_opt_value *newval)
{
char command[IFNAMSIZ + 1] = { 0, };
struct net_device *dev;
diff --git a/drivers/net/bonding/bond_options.h b/drivers/net/bonding/bond_options.h
index 6c5ba0ffc31c..12be9e1bfb0c 100644
--- a/drivers/net/bonding/bond_options.h
+++ b/drivers/net/bonding/bond_options.h
@@ -94,14 +94,15 @@ struct bond_option {
*/
const struct bond_opt_value *values;
- int (*set)(struct bonding *bond, struct bond_opt_value *val);
+ int (*set)(struct bonding *bond, const struct bond_opt_value *val);
};
int __bond_opt_set(struct bonding *bond, unsigned int option,
struct bond_opt_value *val);
int bond_opt_tryset_rtnl(struct bonding *bond, unsigned int option, char *buf);
+
const struct bond_opt_value *bond_opt_parse(const struct bond_option *opt,
- struct bond_opt_value *val);
+ struct bond_opt_value *val);
const struct bond_option *bond_opt_get(unsigned int option);
const struct bond_opt_value *bond_opt_get_val(unsigned int option, u64 val);
diff --git a/drivers/net/bonding/bond_procfs.c b/drivers/net/bonding/bond_procfs.c
index 588cf39d832c..013fdd0f45e9 100644
--- a/drivers/net/bonding/bond_procfs.c
+++ b/drivers/net/bonding/bond_procfs.c
@@ -65,7 +65,7 @@ static void bond_info_seq_stop(struct seq_file *seq, void *v)
static void bond_info_show_master(struct seq_file *seq)
{
struct bonding *bond = seq->private;
- struct bond_opt_value *optval;
+ const struct bond_opt_value *optval;
struct slave *curr;
int i;
diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c
index cf09d8faca84..66759b6ce373 100644
--- a/drivers/net/ethernet/emulex/benet/be_ethtool.c
+++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c
@@ -802,16 +802,18 @@ be_self_test(struct net_device *netdev, struct ethtool_test *test, u64 *data)
if (test->flags & ETH_TEST_FL_OFFLINE) {
if (be_loopback_test(adapter, BE_MAC_LOOPBACK,
- &data[0]) != 0) {
+ &data[0]) != 0)
test->flags |= ETH_TEST_FL_FAILED;
- }
+
if (be_loopback_test(adapter, BE_PHY_LOOPBACK,
- &data[1]) != 0) {
- test->flags |= ETH_TEST_FL_FAILED;
- }
- if (be_loopback_test(adapter, BE_ONE_PORT_EXT_LOOPBACK,
- &data[2]) != 0) {
+ &data[1]) != 0)
test->flags |= ETH_TEST_FL_FAILED;
+
+ if (test->flags & ETH_TEST_FL_EXTERNAL_LB) {
+ if (be_loopback_test(adapter, BE_ONE_PORT_EXT_LOOPBACK,
+ &data[2]) != 0)
+ test->flags |= ETH_TEST_FL_FAILED;
+ test->flags |= ETH_TEST_FL_EXTERNAL_LB_DONE;
}
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
index 72dae4d97b43..838b69b74edf 100644
--- a/drivers/net/ethernet/intel/i40e/i40e.h
+++ b/drivers/net/ethernet/intel/i40e/i40e.h
@@ -86,12 +86,12 @@
#define I40E_NVM_VERSION_LO_SHIFT 0
#define I40E_NVM_VERSION_LO_MASK (0xff << I40E_NVM_VERSION_LO_SHIFT)
-#define I40E_NVM_VERSION_HI_SHIFT 8
-#define I40E_NVM_VERSION_HI_MASK (0xff << I40E_NVM_VERSION_HI_SHIFT)
+#define I40E_NVM_VERSION_HI_SHIFT 12
+#define I40E_NVM_VERSION_HI_MASK (0xf << I40E_NVM_VERSION_HI_SHIFT)
/* The values in here are decimal coded as hex as is the case in the NVM map*/
#define I40E_CURRENT_NVM_VERSION_HI 0x2
-#define I40E_CURRENT_NVM_VERSION_LO 0x30
+#define I40E_CURRENT_NVM_VERSION_LO 0x40
/* magic for getting defines into strings */
#define STRINGIFY(foo) #foo
@@ -152,8 +152,18 @@ struct i40e_lump_tracking {
};
#define I40E_DEFAULT_ATR_SAMPLE_RATE 20
-#define I40E_FDIR_MAX_RAW_PACKET_LOOKUP 512
-struct i40e_fdir_data {
+#define I40E_FDIR_MAX_RAW_PACKET_SIZE 512
+struct i40e_fdir_filter {
+ struct hlist_node fdir_node;
+ /* filter ipnut set */
+ u8 flow_type;
+ u8 ip4_proto;
+ __be32 dst_ip[4];
+ __be32 src_ip[4];
+ __be16 src_port;
+ __be16 dst_port;
+ __be32 sctp_v_tag;
+ /* filter control */
u16 q_index;
u8 flex_off;
u8 pctype;
@@ -162,7 +172,6 @@ struct i40e_fdir_data {
u8 fd_status;
u16 cnt_index;
u32 fd_id;
- u8 *raw_packet;
};
#define I40E_ETH_P_LLDP 0x88cc
@@ -210,6 +219,9 @@ struct i40e_pf {
u8 atr_sample_rate;
bool wol_en;
+ struct hlist_head fdir_filter_list;
+ u16 fdir_pf_active_filters;
+
#ifdef CONFIG_I40E_VXLAN
__be16 vxlan_ports[I40E_MAX_PF_UDP_OFFLOAD_PORTS];
u16 pending_vxlan_bitmap;
@@ -477,10 +489,10 @@ static inline char *i40e_fw_version_str(struct i40e_hw *hw)
"f%d.%d a%d.%d n%02x.%02x e%08x",
hw->aq.fw_maj_ver, hw->aq.fw_min_ver,
hw->aq.api_maj_ver, hw->aq.api_min_ver,
- (hw->nvm.version & I40E_NVM_VERSION_HI_MASK)
- >> I40E_NVM_VERSION_HI_SHIFT,
- (hw->nvm.version & I40E_NVM_VERSION_LO_MASK)
- >> I40E_NVM_VERSION_LO_SHIFT,
+ (hw->nvm.version & I40E_NVM_VERSION_HI_MASK) >>
+ I40E_NVM_VERSION_HI_SHIFT,
+ (hw->nvm.version & I40E_NVM_VERSION_LO_MASK) >>
+ I40E_NVM_VERSION_LO_SHIFT,
hw->nvm.eetrack);
return buf;
@@ -534,9 +546,10 @@ struct rtnl_link_stats64 *i40e_get_vsi_stats_struct(struct i40e_vsi *vsi);
int i40e_fetch_switch_configuration(struct i40e_pf *pf,
bool printconfig);
-int i40e_program_fdir_filter(struct i40e_fdir_data *fdir_data,
+int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet,
struct i40e_pf *pf, bool add);
-
+int i40e_add_del_fdir(struct i40e_vsi *vsi,
+ struct i40e_fdir_filter *input, bool add);
void i40e_set_ethtool_ops(struct net_device *netdev);
struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi,
u8 *macaddr, s16 vlan,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
index e7f38b57834d..bb948dd92474 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_common.c
@@ -162,6 +162,372 @@ i40e_status i40e_aq_queue_shutdown(struct i40e_hw *hw,
return status;
}
+/* The i40e_ptype_lookup table is used to convert from the 8-bit ptype in the
+ * hardware to a bit-field that can be used by SW to more easily determine the
+ * packet type.
+ *
+ * Macros are used to shorten the table lines and make this table human
+ * readable.
+ *
+ * We store the PTYPE in the top byte of the bit field - this is just so that
+ * we can check that the table doesn't have a row missing, as the index into
+ * the table should be the PTYPE.
+ *
+ * Typical work flow:
+ *
+ * IF NOT i40e_ptype_lookup[ptype].known
+ * THEN
+ * Packet is unknown
+ * ELSE IF i40e_ptype_lookup[ptype].outer_ip == I40E_RX_PTYPE_OUTER_IP
+ * Use the rest of the fields to look at the tunnels, inner protocols, etc
+ * ELSE
+ * Use the enum i40e_rx_l2_ptype to decode the packet type
+ * ENDIF
+ */
+
+/* macro to make the table lines short */
+#define I40E_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\
+ { PTYPE, \
+ 1, \
+ I40E_RX_PTYPE_OUTER_##OUTER_IP, \
+ I40E_RX_PTYPE_OUTER_##OUTER_IP_VER, \
+ I40E_RX_PTYPE_##OUTER_FRAG, \
+ I40E_RX_PTYPE_TUNNEL_##T, \
+ I40E_RX_PTYPE_TUNNEL_END_##TE, \
+ I40E_RX_PTYPE_##TEF, \
+ I40E_RX_PTYPE_INNER_PROT_##I, \
+ I40E_RX_PTYPE_PAYLOAD_LAYER_##PL }
+
+#define I40E_PTT_UNUSED_ENTRY(PTYPE) \
+ { PTYPE, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+
+/* shorter macros makes the table fit but are terse */
+#define I40E_RX_PTYPE_NOF I40E_RX_PTYPE_NOT_FRAG
+#define I40E_RX_PTYPE_FRG I40E_RX_PTYPE_FRAG
+#define I40E_RX_PTYPE_INNER_PROT_TS I40E_RX_PTYPE_INNER_PROT_TIMESYNC
+
+/* Lookup table mapping the HW PTYPE to the bit field for decoding */
+struct i40e_rx_ptype_decoded i40e_ptype_lookup[] = {
+ /* L2 Packet types */
+ I40E_PTT_UNUSED_ENTRY(0),
+ I40E_PTT(1, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+ I40E_PTT(2, L2, NONE, NOF, NONE, NONE, NOF, TS, PAY2),
+ I40E_PTT(3, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+ I40E_PTT_UNUSED_ENTRY(4),
+ I40E_PTT_UNUSED_ENTRY(5),
+ I40E_PTT(6, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+ I40E_PTT(7, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+ I40E_PTT_UNUSED_ENTRY(8),
+ I40E_PTT_UNUSED_ENTRY(9),
+ I40E_PTT(10, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+ I40E_PTT(11, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE),
+ I40E_PTT(12, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(13, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(14, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(15, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(16, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(17, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(18, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(19, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(20, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(21, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+
+ /* Non Tunneled IPv4 */
+ I40E_PTT(22, IP, IPV4, FRG, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(23, IP, IPV4, NOF, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(24, IP, IPV4, NOF, NONE, NONE, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(25),
+ I40E_PTT(26, IP, IPV4, NOF, NONE, NONE, NOF, TCP, PAY4),
+ I40E_PTT(27, IP, IPV4, NOF, NONE, NONE, NOF, SCTP, PAY4),
+ I40E_PTT(28, IP, IPV4, NOF, NONE, NONE, NOF, ICMP, PAY4),
+
+ /* IPv4 --> IPv4 */
+ I40E_PTT(29, IP, IPV4, NOF, IP_IP, IPV4, FRG, NONE, PAY3),
+ I40E_PTT(30, IP, IPV4, NOF, IP_IP, IPV4, NOF, NONE, PAY3),
+ I40E_PTT(31, IP, IPV4, NOF, IP_IP, IPV4, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(32),
+ I40E_PTT(33, IP, IPV4, NOF, IP_IP, IPV4, NOF, TCP, PAY4),
+ I40E_PTT(34, IP, IPV4, NOF, IP_IP, IPV4, NOF, SCTP, PAY4),
+ I40E_PTT(35, IP, IPV4, NOF, IP_IP, IPV4, NOF, ICMP, PAY4),
+
+ /* IPv4 --> IPv6 */
+ I40E_PTT(36, IP, IPV4, NOF, IP_IP, IPV6, FRG, NONE, PAY3),
+ I40E_PTT(37, IP, IPV4, NOF, IP_IP, IPV6, NOF, NONE, PAY3),
+ I40E_PTT(38, IP, IPV4, NOF, IP_IP, IPV6, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(39),
+ I40E_PTT(40, IP, IPV4, NOF, IP_IP, IPV6, NOF, TCP, PAY4),
+ I40E_PTT(41, IP, IPV4, NOF, IP_IP, IPV6, NOF, SCTP, PAY4),
+ I40E_PTT(42, IP, IPV4, NOF, IP_IP, IPV6, NOF, ICMP, PAY4),
+
+ /* IPv4 --> GRE/NAT */
+ I40E_PTT(43, IP, IPV4, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3),
+
+ /* IPv4 --> GRE/NAT --> IPv4 */
+ I40E_PTT(44, IP, IPV4, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3),
+ I40E_PTT(45, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3),
+ I40E_PTT(46, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(47),
+ I40E_PTT(48, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, TCP, PAY4),
+ I40E_PTT(49, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4),
+ I40E_PTT(50, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4),
+
+ /* IPv4 --> GRE/NAT --> IPv6 */
+ I40E_PTT(51, IP, IPV4, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3),
+ I40E_PTT(52, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3),
+ I40E_PTT(53, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(54),
+ I40E_PTT(55, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, TCP, PAY4),
+ I40E_PTT(56, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4),
+ I40E_PTT(57, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4),
+
+ /* IPv4 --> GRE/NAT --> MAC */
+ I40E_PTT(58, IP, IPV4, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3),
+
+ /* IPv4 --> GRE/NAT --> MAC --> IPv4 */
+ I40E_PTT(59, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3),
+ I40E_PTT(60, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3),
+ I40E_PTT(61, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(62),
+ I40E_PTT(63, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP, PAY4),
+ I40E_PTT(64, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4),
+ I40E_PTT(65, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4),
+
+ /* IPv4 --> GRE/NAT -> MAC --> IPv6 */
+ I40E_PTT(66, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3),
+ I40E_PTT(67, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3),
+ I40E_PTT(68, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(69),
+ I40E_PTT(70, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP, PAY4),
+ I40E_PTT(71, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4),
+ I40E_PTT(72, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4),
+
+ /* IPv4 --> GRE/NAT --> MAC/VLAN */
+ I40E_PTT(73, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3),
+
+ /* IPv4 ---> GRE/NAT -> MAC/VLAN --> IPv4 */
+ I40E_PTT(74, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3),
+ I40E_PTT(75, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3),
+ I40E_PTT(76, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(77),
+ I40E_PTT(78, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP, PAY4),
+ I40E_PTT(79, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4),
+ I40E_PTT(80, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4),
+
+ /* IPv4 -> GRE/NAT -> MAC/VLAN --> IPv6 */
+ I40E_PTT(81, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3),
+ I40E_PTT(82, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3),
+ I40E_PTT(83, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(84),
+ I40E_PTT(85, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP, PAY4),
+ I40E_PTT(86, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4),
+ I40E_PTT(87, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
+
+ /* Non Tunneled IPv6 */
+ I40E_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP, PAY3),
+ I40E_PTT_UNUSED_ENTRY(91),
+ I40E_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP, PAY4),
+ I40E_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4),
+ I40E_PTT(94, IP, IPV6, NOF, NONE, NONE, NOF, ICMP, PAY4),
+
+ /* IPv6 --> IPv4 */
+ I40E_PTT(95, IP, IPV6, NOF, IP_IP, IPV4, FRG, NONE, PAY3),
+ I40E_PTT(96, IP, IPV6, NOF, IP_IP, IPV4, NOF, NONE, PAY3),
+ I40E_PTT(97, IP, IPV6, NOF, IP_IP, IPV4, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(98),
+ I40E_PTT(99, IP, IPV6, NOF, IP_IP, IPV4, NOF, TCP, PAY4),
+ I40E_PTT(100, IP, IPV6, NOF, IP_IP, IPV4, NOF, SCTP, PAY4),
+ I40E_PTT(101, IP, IPV6, NOF, IP_IP, IPV4, NOF, ICMP, PAY4),
+
+ /* IPv6 --> IPv6 */
+ I40E_PTT(102, IP, IPV6, NOF, IP_IP, IPV6, FRG, NONE, PAY3),
+ I40E_PTT(103, IP, IPV6, NOF, IP_IP, IPV6, NOF, NONE, PAY3),
+ I40E_PTT(104, IP, IPV6, NOF, IP_IP, IPV6, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(105),
+ I40E_PTT(106, IP, IPV6, NOF, IP_IP, IPV6, NOF, TCP, PAY4),
+ I40E_PTT(107, IP, IPV6, NOF, IP_IP, IPV6, NOF, SCTP, PAY4),
+ I40E_PTT(108, IP, IPV6, NOF, IP_IP, IPV6, NOF, ICMP, PAY4),
+
+ /* IPv6 --> GRE/NAT */
+ I40E_PTT(109, IP, IPV6, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3),
+
+ /* IPv6 --> GRE/NAT -> IPv4 */
+ I40E_PTT(110, IP, IPV6, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3),
+ I40E_PTT(111, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3),
+ I40E_PTT(112, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(113),
+ I40E_PTT(114, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, TCP, PAY4),
+ I40E_PTT(115, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4),
+ I40E_PTT(116, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4),
+
+ /* IPv6 --> GRE/NAT -> IPv6 */
+ I40E_PTT(117, IP, IPV6, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3),
+ I40E_PTT(118, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3),
+ I40E_PTT(119, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(120),
+ I40E_PTT(121, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, TCP, PAY4),
+ I40E_PTT(122, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4),
+ I40E_PTT(123, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4),
+
+ /* IPv6 --> GRE/NAT -> MAC */
+ I40E_PTT(124, IP, IPV6, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3),
+
+ /* IPv6 --> GRE/NAT -> MAC -> IPv4 */
+ I40E_PTT(125, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3),
+ I40E_PTT(126, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3),
+ I40E_PTT(127, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(128),
+ I40E_PTT(129, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP, PAY4),
+ I40E_PTT(130, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4),
+ I40E_PTT(131, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4),
+
+ /* IPv6 --> GRE/NAT -> MAC -> IPv6 */
+ I40E_PTT(132, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3),
+ I40E_PTT(133, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3),
+ I40E_PTT(134, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(135),
+ I40E_PTT(136, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP, PAY4),
+ I40E_PTT(137, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4),
+ I40E_PTT(138, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4),
+
+ /* IPv6 --> GRE/NAT -> MAC/VLAN */
+ I40E_PTT(139, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3),
+
+ /* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv4 */
+ I40E_PTT(140, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3),
+ I40E_PTT(141, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3),
+ I40E_PTT(142, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(143),
+ I40E_PTT(144, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP, PAY4),
+ I40E_PTT(145, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4),
+ I40E_PTT(146, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4),
+
+ /* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv6 */
+ I40E_PTT(147, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3),
+ I40E_PTT(148, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3),
+ I40E_PTT(149, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(150),
+ I40E_PTT(151, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP, PAY4),
+ I40E_PTT(152, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4),
+ I40E_PTT(153, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
+
+ /* unused entries */
+ I40E_PTT_UNUSED_ENTRY(154),
+ I40E_PTT_UNUSED_ENTRY(155),
+ I40E_PTT_UNUSED_ENTRY(156),
+ I40E_PTT_UNUSED_ENTRY(157),
+ I40E_PTT_UNUSED_ENTRY(158),
+ I40E_PTT_UNUSED_ENTRY(159),
+
+ I40E_PTT_UNUSED_ENTRY(160),
+ I40E_PTT_UNUSED_ENTRY(161),
+ I40E_PTT_UNUSED_ENTRY(162),
+ I40E_PTT_UNUSED_ENTRY(163),
+ I40E_PTT_UNUSED_ENTRY(164),
+ I40E_PTT_UNUSED_ENTRY(165),
+ I40E_PTT_UNUSED_ENTRY(166),
+ I40E_PTT_UNUSED_ENTRY(167),
+ I40E_PTT_UNUSED_ENTRY(168),
+ I40E_PTT_UNUSED_ENTRY(169),
+
+ I40E_PTT_UNUSED_ENTRY(170),
+ I40E_PTT_UNUSED_ENTRY(171),
+ I40E_PTT_UNUSED_ENTRY(172),
+ I40E_PTT_UNUSED_ENTRY(173),
+ I40E_PTT_UNUSED_ENTRY(174),
+ I40E_PTT_UNUSED_ENTRY(175),
+ I40E_PTT_UNUSED_ENTRY(176),
+ I40E_PTT_UNUSED_ENTRY(177),
+ I40E_PTT_UNUSED_ENTRY(178),
+ I40E_PTT_UNUSED_ENTRY(179),
+
+ I40E_PTT_UNUSED_ENTRY(180),
+ I40E_PTT_UNUSED_ENTRY(181),
+ I40E_PTT_UNUSED_ENTRY(182),
+ I40E_PTT_UNUSED_ENTRY(183),
+ I40E_PTT_UNUSED_ENTRY(184),
+ I40E_PTT_UNUSED_ENTRY(185),
+ I40E_PTT_UNUSED_ENTRY(186),
+ I40E_PTT_UNUSED_ENTRY(187),
+ I40E_PTT_UNUSED_ENTRY(188),
+ I40E_PTT_UNUSED_ENTRY(189),
+
+ I40E_PTT_UNUSED_ENTRY(190),
+ I40E_PTT_UNUSED_ENTRY(191),
+ I40E_PTT_UNUSED_ENTRY(192),
+ I40E_PTT_UNUSED_ENTRY(193),
+ I40E_PTT_UNUSED_ENTRY(194),
+ I40E_PTT_UNUSED_ENTRY(195),
+ I40E_PTT_UNUSED_ENTRY(196),
+ I40E_PTT_UNUSED_ENTRY(197),
+ I40E_PTT_UNUSED_ENTRY(198),
+ I40E_PTT_UNUSED_ENTRY(199),
+
+ I40E_PTT_UNUSED_ENTRY(200),
+ I40E_PTT_UNUSED_ENTRY(201),
+ I40E_PTT_UNUSED_ENTRY(202),
+ I40E_PTT_UNUSED_ENTRY(203),
+ I40E_PTT_UNUSED_ENTRY(204),
+ I40E_PTT_UNUSED_ENTRY(205),
+ I40E_PTT_UNUSED_ENTRY(206),
+ I40E_PTT_UNUSED_ENTRY(207),
+ I40E_PTT_UNUSED_ENTRY(208),
+ I40E_PTT_UNUSED_ENTRY(209),
+
+ I40E_PTT_UNUSED_ENTRY(210),
+ I40E_PTT_UNUSED_ENTRY(211),
+ I40E_PTT_UNUSED_ENTRY(212),
+ I40E_PTT_UNUSED_ENTRY(213),
+ I40E_PTT_UNUSED_ENTRY(214),
+ I40E_PTT_UNUSED_ENTRY(215),
+ I40E_PTT_UNUSED_ENTRY(216),
+ I40E_PTT_UNUSED_ENTRY(217),
+ I40E_PTT_UNUSED_ENTRY(218),
+ I40E_PTT_UNUSED_ENTRY(219),
+
+ I40E_PTT_UNUSED_ENTRY(220),
+ I40E_PTT_UNUSED_ENTRY(221),
+ I40E_PTT_UNUSED_ENTRY(222),
+ I40E_PTT_UNUSED_ENTRY(223),
+ I40E_PTT_UNUSED_ENTRY(224),
+ I40E_PTT_UNUSED_ENTRY(225),
+ I40E_PTT_UNUSED_ENTRY(226),
+ I40E_PTT_UNUSED_ENTRY(227),
+ I40E_PTT_UNUSED_ENTRY(228),
+ I40E_PTT_UNUSED_ENTRY(229),
+
+ I40E_PTT_UNUSED_ENTRY(230),
+ I40E_PTT_UNUSED_ENTRY(231),
+ I40E_PTT_UNUSED_ENTRY(232),
+ I40E_PTT_UNUSED_ENTRY(233),
+ I40E_PTT_UNUSED_ENTRY(234),
+ I40E_PTT_UNUSED_ENTRY(235),
+ I40E_PTT_UNUSED_ENTRY(236),
+ I40E_PTT_UNUSED_ENTRY(237),
+ I40E_PTT_UNUSED_ENTRY(238),
+ I40E_PTT_UNUSED_ENTRY(239),
+
+ I40E_PTT_UNUSED_ENTRY(240),
+ I40E_PTT_UNUSED_ENTRY(241),
+ I40E_PTT_UNUSED_ENTRY(242),
+ I40E_PTT_UNUSED_ENTRY(243),
+ I40E_PTT_UNUSED_ENTRY(244),
+ I40E_PTT_UNUSED_ENTRY(245),
+ I40E_PTT_UNUSED_ENTRY(246),
+ I40E_PTT_UNUSED_ENTRY(247),
+ I40E_PTT_UNUSED_ENTRY(248),
+ I40E_PTT_UNUSED_ENTRY(249),
+
+ I40E_PTT_UNUSED_ENTRY(250),
+ I40E_PTT_UNUSED_ENTRY(251),
+ I40E_PTT_UNUSED_ENTRY(252),
+ I40E_PTT_UNUSED_ENTRY(253),
+ I40E_PTT_UNUSED_ENTRY(254),
+ I40E_PTT_UNUSED_ENTRY(255)
+};
+
+
/**
* i40e_init_shared_code - Initialize the shared code
* @hw: pointer to hardware structure
diff --git a/drivers/net/ethernet/intel/i40e/i40e_dcb.c b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
index 50730141bb7b..036570d76176 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_dcb.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_dcb.c
@@ -332,6 +332,7 @@ i40e_status i40e_lldp_to_dcb_config(u8 *lldpmib,
u16 type;
u16 length;
u16 typelength;
+ u16 offset = 0;
if (!lldpmib || !dcbcfg)
return I40E_ERR_PARAM;
@@ -339,15 +340,17 @@ i40e_status i40e_lldp_to_dcb_config(u8 *lldpmib,
/* set to the start of LLDPDU */
lldpmib += ETH_HLEN;
tlv = (struct i40e_lldp_org_tlv *)lldpmib;
- while (tlv) {
+ while (1) {
typelength = ntohs(tlv->typelength);
type = (u16)((typelength & I40E_LLDP_TLV_TYPE_MASK) >>
I40E_LLDP_TLV_TYPE_SHIFT);
length = (u16)((typelength & I40E_LLDP_TLV_LEN_MASK) >>
I40E_LLDP_TLV_LEN_SHIFT);
+ offset += sizeof(typelength) + length;
- if (type == I40E_TLV_TYPE_END)
- break;/* END TLV break out */
+ /* END TLV or beyond LLDPDU size */
+ if ((type == I40E_TLV_TYPE_END) || (offset > I40E_LLDPDU_SIZE))
+ break;
switch (type) {
case I40E_TLV_TYPE_ORG:
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
index da22c3fa2c00..57fc86496f30 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
@@ -1663,21 +1663,22 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
desc = NULL;
} else if ((strncmp(cmd_buf, "add fd_filter", 13) == 0) ||
(strncmp(cmd_buf, "rem fd_filter", 13) == 0)) {
- struct i40e_fdir_data fd_data;
+ struct i40e_fdir_filter fd_data;
u16 packet_len, i, j = 0;
char *asc_packet;
+ u8 *raw_packet;
bool add = false;
int ret;
- asc_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_LOOKUP,
+ asc_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE,
GFP_KERNEL);
if (!asc_packet)
goto command_write_done;
- fd_data.raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_LOOKUP,
- GFP_KERNEL);
+ raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE,
+ GFP_KERNEL);
- if (!fd_data.raw_packet) {
+ if (!raw_packet) {
kfree(asc_packet);
asc_packet = NULL;
goto command_write_done;
@@ -1698,36 +1699,36 @@ static ssize_t i40e_dbg_command_write(struct file *filp,
cnt);
kfree(asc_packet);
asc_packet = NULL;
- kfree(fd_data.raw_packet);
+ kfree(raw_packet);
goto command_write_done;
}
/* fix packet length if user entered 0 */
if (packet_len == 0)
- packet_len = I40E_FDIR_MAX_RAW_PACKET_LOOKUP;
+ packet_len = I40E_FDIR_MAX_RAW_PACKET_SIZE;
/* make sure to check the max as well */
packet_len = min_t(u16,
- packet_len, I40E_FDIR_MAX_RAW_PACKET_LOOKUP);
+ packet_len, I40E_FDIR_MAX_RAW_PACKET_SIZE);
for (i = 0; i < packet_len; i++) {
sscanf(&asc_packet[j], "%2hhx ",
- &fd_data.raw_packet[i]);
+ &raw_packet[i]);
j += 3;
}
dev_info(&pf->pdev->dev, "FD raw packet dump\n");
print_hex_dump(KERN_INFO, "FD raw packet: ",
DUMP_PREFIX_OFFSET, 16, 1,
- fd_data.raw_packet, packet_len, true);
- ret = i40e_program_fdir_filter(&fd_data, pf, add);
+ raw_packet, packet_len, true);
+ ret = i40e_program_fdir_filter(&fd_data, raw_packet, pf, add);
if (!ret) {
dev_info(&pf->pdev->dev, "Filter command send Status : Success\n");
} else {
dev_info(&pf->pdev->dev,
"Filter command send failed %d\n", ret);
}
- kfree(fd_data.raw_packet);
- fd_data.raw_packet = NULL;
+ kfree(raw_packet);
+ raw_packet = NULL;
kfree(asc_packet);
asc_packet = NULL;
} else if (strncmp(cmd_buf, "fd-atr off", 10) == 0) {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
index b1d7d8c5cb9b..d34ff31fddd8 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
@@ -62,6 +62,9 @@ static const struct i40e_stats i40e_gstrings_net_stats[] = {
I40E_NETDEV_STAT(rx_crc_errors),
};
+static int i40e_add_del_fdir_ethtool(struct i40e_vsi *vsi,
+ struct ethtool_rxnfc *cmd, bool add);
+
/* These PF_STATs might look like duplicates of some NETDEV_STATs,
* but they are separate. This device supports Virtualization, and
* as such might have several netdevs supporting VMDq and FCoE going
@@ -84,6 +87,7 @@ static struct i40e_stats i40e_gstrings_stats[] = {
I40E_PF_STAT("illegal_bytes", stats.illegal_bytes),
I40E_PF_STAT("mac_local_faults", stats.mac_local_faults),
I40E_PF_STAT("mac_remote_faults", stats.mac_remote_faults),
+ I40E_PF_STAT("tx_timeout", tx_timeout_count),
I40E_PF_STAT("rx_length_errors", stats.rx_length_errors),
I40E_PF_STAT("link_xon_rx", stats.link_xon_rx),
I40E_PF_STAT("link_xoff_rx", stats.link_xoff_rx),
@@ -1112,6 +1116,84 @@ static int i40e_get_rss_hash_opts(struct i40e_pf *pf, struct ethtool_rxnfc *cmd)
}
/**
+ * i40e_get_ethtool_fdir_all - Populates the rule count of a command
+ * @pf: Pointer to the physical function struct
+ * @cmd: The command to get or set Rx flow classification rules
+ * @rule_locs: Array of used rule locations
+ *
+ * This function populates both the total and actual rule count of
+ * the ethtool flow classification command
+ *
+ * Returns 0 on success or -EMSGSIZE if entry not found
+ **/
+static int i40e_get_ethtool_fdir_all(struct i40e_pf *pf,
+ struct ethtool_rxnfc *cmd,
+ u32 *rule_locs)
+{
+ struct i40e_fdir_filter *rule;
+ struct hlist_node *node2;
+ int cnt = 0;
+
+ /* report total rule count */
+ cmd->data = pf->hw.fdir_shared_filter_count +
+ pf->fdir_pf_filter_count;
+
+ hlist_for_each_entry_safe(rule, node2,
+ &pf->fdir_filter_list, fdir_node) {
+ if (cnt == cmd->rule_cnt)
+ return -EMSGSIZE;
+
+ rule_locs[cnt] = rule->fd_id;
+ cnt++;
+ }
+
+ cmd->rule_cnt = cnt;
+
+ return 0;
+}
+
+/**
+ * i40e_get_ethtool_fdir_entry - Look up a filter based on Rx flow
+ * @pf: Pointer to the physical function struct
+ * @cmd: The command to get or set Rx flow classification rules
+ *
+ * This function looks up a filter based on the Rx flow classification
+ * command and fills the flow spec info for it if found
+ *
+ * Returns 0 on success or -EINVAL if filter not found
+ **/
+static int i40e_get_ethtool_fdir_entry(struct i40e_pf *pf,
+ struct ethtool_rxnfc *cmd)
+{
+ struct ethtool_rx_flow_spec *fsp =
+ (struct ethtool_rx_flow_spec *)&cmd->fs;
+ struct i40e_fdir_filter *rule = NULL;
+ struct hlist_node *node2;
+
+ /* report total rule count */
+ cmd->data = pf->hw.fdir_shared_filter_count +
+ pf->fdir_pf_filter_count;
+
+ hlist_for_each_entry_safe(rule, node2,
+ &pf->fdir_filter_list, fdir_node) {
+ if (fsp->location <= rule->fd_id)
+ break;
+ }
+
+ if (!rule || fsp->location != rule->fd_id)
+ return -EINVAL;
+
+ fsp->flow_type = rule->flow_type;
+ fsp->h_u.tcp_ip4_spec.psrc = rule->src_port;
+ fsp->h_u.tcp_ip4_spec.pdst = rule->dst_port;
+ fsp->h_u.tcp_ip4_spec.ip4src = rule->src_ip[0];
+ fsp->h_u.tcp_ip4_spec.ip4dst = rule->dst_ip[0];
+ fsp->ring_cookie = rule->q_index;
+
+ return 0;
+}
+
+/**
* i40e_get_rxnfc - command to get RX flow classification rules
* @netdev: network interface device structure
* @cmd: ethtool rxnfc command
@@ -1135,15 +1217,15 @@ static int i40e_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
ret = i40e_get_rss_hash_opts(pf, cmd);
break;
case ETHTOOL_GRXCLSRLCNT:
- cmd->rule_cnt = 10;
+ cmd->rule_cnt = pf->fdir_pf_active_filters;
ret = 0;
break;
case ETHTOOL_GRXCLSRULE:
- ret = 0;
+ ret = i40e_get_ethtool_fdir_entry(pf, cmd);
break;
case ETHTOOL_GRXCLSRLALL:
- cmd->data = 500;
- ret = 0;
+ ret = i40e_get_ethtool_fdir_all(pf, cmd, rule_locs);
+ break;
default:
break;
}
@@ -1274,289 +1356,158 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc)
return 0;
}
-#define IP_HEADER_OFFSET 14
-#define I40E_UDPIP_DUMMY_PACKET_LEN 42
/**
- * i40e_add_del_fdir_udpv4 - Add/Remove UDPv4 Flow Director filters for
- * a specific flow spec
- * @vsi: pointer to the targeted VSI
- * @fd_data: the flow director data required from the FDir descriptor
- * @ethtool_rx_flow_spec: the flow spec
- * @add: true adds a filter, false removes it
+ * i40e_update_ethtool_fdir_entry - Updates the fdir filter entry
+ * @vsi: Pointer to the targeted VSI
+ * @input: The filter to update or NULL to indicate deletion
+ * @sw_idx: Software index to the filter
+ * @cmd: The command to get or set Rx flow classification rules
*
- * Returns 0 if the filters were successfully added or removed
+ * This function updates (or deletes) a Flow Director entry from
+ * the hlist of the corresponding PF
+ *
+ * Returns 0 on success
**/
-static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi,
- struct i40e_fdir_data *fd_data,
- struct ethtool_rx_flow_spec *fsp, bool add)
+static int i40e_update_ethtool_fdir_entry(struct i40e_vsi *vsi,
+ struct i40e_fdir_filter *input,
+ u16 sw_idx,
+ struct ethtool_rxnfc *cmd)
{
+ struct i40e_fdir_filter *rule, *parent;
struct i40e_pf *pf = vsi->back;
- struct udphdr *udp;
- struct iphdr *ip;
- bool err = false;
- int ret;
- int i;
- char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
- 0x45, 0, 0, 0x1c, 0, 0, 0x40, 0, 0x40, 0x11,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0};
-
- memcpy(fd_data->raw_packet, packet, I40E_UDPIP_DUMMY_PACKET_LEN);
-
- ip = (struct iphdr *)(fd_data->raw_packet + IP_HEADER_OFFSET);
- udp = (struct udphdr *)(fd_data->raw_packet + IP_HEADER_OFFSET
- + sizeof(struct iphdr));
+ struct hlist_node *node2;
+ int err = -EINVAL;
- ip->saddr = fsp->h_u.tcp_ip4_spec.ip4src;
- ip->daddr = fsp->h_u.tcp_ip4_spec.ip4dst;
- udp->source = fsp->h_u.tcp_ip4_spec.psrc;
- udp->dest = fsp->h_u.tcp_ip4_spec.pdst;
+ parent = NULL;
+ rule = NULL;
- for (i = I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP;
- i <= I40E_FILTER_PCTYPE_NONF_IPV4_UDP; i++) {
- fd_data->pctype = i;
- ret = i40e_program_fdir_filter(fd_data, pf, add);
-
- if (ret) {
- dev_info(&pf->pdev->dev,
- "Filter command send failed for PCTYPE %d (ret = %d)\n",
- fd_data->pctype, ret);
- err = true;
- } else {
- dev_info(&pf->pdev->dev,
- "Filter OK for PCTYPE %d (ret = %d)\n",
- fd_data->pctype, ret);
- }
+ hlist_for_each_entry_safe(rule, node2,
+ &pf->fdir_filter_list, fdir_node) {
+ /* hash found, or no matching entry */
+ if (rule->fd_id >= sw_idx)
+ break;
+ parent = rule;
}
- return err ? -EOPNOTSUPP : 0;
-}
-
-#define I40E_TCPIP_DUMMY_PACKET_LEN 54
-/**
- * i40e_add_del_fdir_tcpv4 - Add/Remove TCPv4 Flow Director filters for
- * a specific flow spec
- * @vsi: pointer to the targeted VSI
- * @fd_data: the flow director data required from the FDir descriptor
- * @ethtool_rx_flow_spec: the flow spec
- * @add: true adds a filter, false removes it
- *
- * Returns 0 if the filters were successfully added or removed
- **/
-static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
- struct i40e_fdir_data *fd_data,
- struct ethtool_rx_flow_spec *fsp, bool add)
-{
- struct i40e_pf *pf = vsi->back;
- struct tcphdr *tcp;
- struct iphdr *ip;
- bool err = false;
- int ret;
- /* Dummy packet */
- char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
- 0x45, 0, 0, 0x28, 0, 0, 0x40, 0, 0x40, 0x6,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0x80, 0x11, 0x0, 0x72, 0, 0, 0, 0};
-
- memcpy(fd_data->raw_packet, packet, I40E_TCPIP_DUMMY_PACKET_LEN);
-
- ip = (struct iphdr *)(fd_data->raw_packet + IP_HEADER_OFFSET);
- tcp = (struct tcphdr *)(fd_data->raw_packet + IP_HEADER_OFFSET
- + sizeof(struct iphdr));
-
- ip->daddr = fsp->h_u.tcp_ip4_spec.ip4dst;
- tcp->dest = fsp->h_u.tcp_ip4_spec.pdst;
- ip->saddr = fsp->h_u.tcp_ip4_spec.ip4src;
- tcp->source = fsp->h_u.tcp_ip4_spec.psrc;
-
- if (add) {
- if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) {
- dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
- pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
+ /* if there is an old rule occupying our place remove it */
+ if (rule && (rule->fd_id == sw_idx)) {
+ if (!input || (rule->fd_id != input->fd_id)) {
+ cmd->fs.flow_type = rule->flow_type;
+ err = i40e_add_del_fdir_ethtool(vsi, cmd, false);
}
+
+ hlist_del(&rule->fdir_node);
+ kfree(rule);
+ pf->fdir_pf_active_filters--;
}
- fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN;
- ret = i40e_program_fdir_filter(fd_data, pf, add);
+ /* If no input this was a delete, err should be 0 if a rule was
+ * successfully found and removed from the list else -EINVAL
+ */
+ if (!input)
+ return err;
- if (ret) {
- dev_info(&pf->pdev->dev,
- "Filter command send failed for PCTYPE %d (ret = %d)\n",
- fd_data->pctype, ret);
- err = true;
- } else {
- dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d (ret = %d)\n",
- fd_data->pctype, ret);
- }
+ /* initialize node and set software index */
+ INIT_HLIST_NODE(&input->fdir_node);
- fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
+ /* add filter to the list */
+ if (parent)
+ hlist_add_after(&parent->fdir_node, &input->fdir_node);
+ else
+ hlist_add_head(&input->fdir_node,
+ &pf->fdir_filter_list);
- ret = i40e_program_fdir_filter(fd_data, pf, add);
- if (ret) {
- dev_info(&pf->pdev->dev,
- "Filter command send failed for PCTYPE %d (ret = %d)\n",
- fd_data->pctype, ret);
- err = true;
- } else {
- dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d (ret = %d)\n",
- fd_data->pctype, ret);
- }
+ /* update counts */
+ pf->fdir_pf_active_filters++;
- return err ? -EOPNOTSUPP : 0;
+ return 0;
}
/**
- * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for
- * a specific flow spec
- * @vsi: pointer to the targeted VSI
- * @fd_data: the flow director data required from the FDir descriptor
- * @ethtool_rx_flow_spec: the flow spec
- * @add: true adds a filter, false removes it
+ * i40e_del_fdir_entry - Deletes a Flow Director filter entry
+ * @vsi: Pointer to the targeted VSI
+ * @cmd: The command to get or set Rx flow classification rules
*
- * Returns 0 if the filters were successfully added or removed
- **/
-static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi,
- struct i40e_fdir_data *fd_data,
- struct ethtool_rx_flow_spec *fsp, bool add)
-{
- return -EOPNOTSUPP;
-}
-
-#define I40E_IP_DUMMY_PACKET_LEN 34
-/**
- * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for
- * a specific flow spec
- * @vsi: pointer to the targeted VSI
- * @fd_data: the flow director data required for the FDir descriptor
- * @fsp: the ethtool flow spec
- * @add: true adds a filter, false removes it
+ * The function removes a Flow Director filter entry from the
+ * hlist of the corresponding PF
*
- * Returns 0 if the filters were successfully added or removed
- **/
-static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi,
- struct i40e_fdir_data *fd_data,
- struct ethtool_rx_flow_spec *fsp, bool add)
+ * Returns 0 on success
+ */
+static int i40e_del_fdir_entry(struct i40e_vsi *vsi,
+ struct ethtool_rxnfc *cmd)
{
+ struct ethtool_rx_flow_spec *fsp =
+ (struct ethtool_rx_flow_spec *)&cmd->fs;
struct i40e_pf *pf = vsi->back;
- struct iphdr *ip;
- bool err = false;
- int ret;
- int i;
- char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
- 0x45, 0, 0, 0x14, 0, 0, 0x40, 0, 0x40, 0x10,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+ int ret = 0;
- memcpy(fd_data->raw_packet, packet, I40E_IP_DUMMY_PACKET_LEN);
- ip = (struct iphdr *)(fd_data->raw_packet + IP_HEADER_OFFSET);
-
- ip->saddr = fsp->h_u.usr_ip4_spec.ip4src;
- ip->daddr = fsp->h_u.usr_ip4_spec.ip4dst;
- ip->protocol = fsp->h_u.usr_ip4_spec.proto;
-
- for (i = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
- i <= I40E_FILTER_PCTYPE_FRAG_IPV4; i++) {
- fd_data->pctype = i;
- ret = i40e_program_fdir_filter(fd_data, pf, add);
-
- if (ret) {
- dev_info(&pf->pdev->dev,
- "Filter command send failed for PCTYPE %d (ret = %d)\n",
- fd_data->pctype, ret);
- err = true;
- } else {
- dev_info(&pf->pdev->dev,
- "Filter OK for PCTYPE %d (ret = %d)\n",
- fd_data->pctype, ret);
- }
- }
+ ret = i40e_update_ethtool_fdir_entry(vsi, NULL, fsp->location, cmd);
- return err ? -EOPNOTSUPP : 0;
+ return ret;
}
/**
- * i40e_add_del_fdir_ethtool - Add/Remove Flow Director filters for
- * a specific flow spec based on their protocol
+ * i40e_add_del_fdir_ethtool - Add/Remove Flow Director filters
* @vsi: pointer to the targeted VSI
* @cmd: command to get or set RX flow classification rules
* @add: true adds a filter, false removes it
*
- * Returns 0 if the filters were successfully added or removed
+ * Add/Remove Flow Director filters for a specific flow spec based on their
+ * protocol. Returns 0 if the filters were successfully added or removed.
**/
static int i40e_add_del_fdir_ethtool(struct i40e_vsi *vsi,
- struct ethtool_rxnfc *cmd, bool add)
+ struct ethtool_rxnfc *cmd, bool add)
{
- struct i40e_fdir_data fd_data;
- int ret = -EINVAL;
+ struct ethtool_rx_flow_spec *fsp;
+ struct i40e_fdir_filter *input;
struct i40e_pf *pf;
- struct ethtool_rx_flow_spec *fsp =
- (struct ethtool_rx_flow_spec *)&cmd->fs;
+ int ret = -EINVAL;
if (!vsi)
return -EINVAL;
+ fsp = (struct ethtool_rx_flow_spec *)&cmd->fs;
pf = vsi->back;
- if ((fsp->ring_cookie != RX_CLS_FLOW_DISC) &&
- (fsp->ring_cookie >= vsi->num_queue_pairs))
+ if (fsp->location >= (pf->hw.func_caps.fd_filters_best_effort +
+ pf->hw.func_caps.fd_filters_guaranteed)) {
return -EINVAL;
+ }
- /* Populate the Flow Director that we have at the moment
- * and allocate the raw packet buffer for the calling functions
- */
- fd_data.raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_LOOKUP,
- GFP_KERNEL);
+ if ((fsp->ring_cookie >= vsi->num_queue_pairs) && add)
+ return -EINVAL;
- if (!fd_data.raw_packet) {
- dev_info(&pf->pdev->dev, "Could not allocate memory\n");
- return -ENOMEM;
- }
+ input = kzalloc(sizeof(*input), GFP_KERNEL);
- fd_data.q_index = fsp->ring_cookie;
- fd_data.flex_off = 0;
- fd_data.pctype = 0;
- fd_data.dest_vsi = vsi->id;
- fd_data.dest_ctl = I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX;
- fd_data.fd_status = I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID;
- fd_data.cnt_index = 0;
- fd_data.fd_id = 0;
+ if (!input)
+ return -ENOMEM;
- switch (fsp->flow_type & ~FLOW_EXT) {
- case TCP_V4_FLOW:
- ret = i40e_add_del_fdir_tcpv4(vsi, &fd_data, fsp, add);
- break;
- case UDP_V4_FLOW:
- ret = i40e_add_del_fdir_udpv4(vsi, &fd_data, fsp, add);
- break;
- case SCTP_V4_FLOW:
- ret = i40e_add_del_fdir_sctpv4(vsi, &fd_data, fsp, add);
- break;
- case IPV4_FLOW:
- ret = i40e_add_del_fdir_ipv4(vsi, &fd_data, fsp, add);
- break;
- case IP_USER_FLOW:
- switch (fsp->h_u.usr_ip4_spec.proto) {
- case IPPROTO_TCP:
- ret = i40e_add_del_fdir_tcpv4(vsi, &fd_data, fsp, add);
- break;
- case IPPROTO_UDP:
- ret = i40e_add_del_fdir_udpv4(vsi, &fd_data, fsp, add);
- break;
- case IPPROTO_SCTP:
- ret = i40e_add_del_fdir_sctpv4(vsi, &fd_data, fsp, add);
- break;
- default:
- ret = i40e_add_del_fdir_ipv4(vsi, &fd_data, fsp, add);
- break;
- }
- break;
- default:
- dev_info(&pf->pdev->dev, "Could not specify spec type\n");
- ret = -EINVAL;
+ input->fd_id = fsp->location;
+
+ input->q_index = fsp->ring_cookie;
+ input->flex_off = 0;
+ input->pctype = 0;
+ input->dest_vsi = vsi->id;
+ input->dest_ctl = I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX;
+ input->fd_status = I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID;
+ input->cnt_index = 0;
+ input->flow_type = fsp->flow_type;
+ input->ip4_proto = fsp->h_u.usr_ip4_spec.proto;
+ input->src_port = fsp->h_u.tcp_ip4_spec.psrc;
+ input->dst_port = fsp->h_u.tcp_ip4_spec.pdst;
+ input->src_ip[0] = fsp->h_u.tcp_ip4_spec.ip4src;
+ input->dst_ip[0] = fsp->h_u.tcp_ip4_spec.ip4dst;
+
+ ret = i40e_add_del_fdir(vsi, input, add);
+ if (ret) {
+ kfree(input);
+ return ret;
}
- kfree(fd_data.raw_packet);
- fd_data.raw_packet = NULL;
+ if (!ret && add)
+ i40e_update_ethtool_fdir_entry(vsi, input, fsp->location, NULL);
+ else
+ kfree(input);
return ret;
}
@@ -1583,7 +1534,7 @@ static int i40e_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd)
ret = i40e_add_del_fdir_ethtool(vsi, cmd, true);
break;
case ETHTOOL_SRXCLSRLDEL:
- ret = i40e_add_del_fdir_ethtool(vsi, cmd, false);
+ ret = i40e_del_fdir_entry(vsi, cmd);
break;
default:
break;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 53f3ed2df796..43d391bb65c4 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -38,7 +38,7 @@ static const char i40e_driver_string[] =
#define DRV_VERSION_MAJOR 0
#define DRV_VERSION_MINOR 3
-#define DRV_VERSION_BUILD 32
+#define DRV_VERSION_BUILD 34
#define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
__stringify(DRV_VERSION_MINOR) "." \
__stringify(DRV_VERSION_BUILD) DRV_KERN
@@ -1965,11 +1965,14 @@ static int i40e_vlan_rx_add_vid(struct net_device *netdev,
netdev_info(netdev, "adding %pM vid=%d\n", netdev->dev_addr, vid);
- /* If the network stack called us with vid = 0, we should
- * indicate to i40e_vsi_add_vlan() that we want to receive
- * any traffic (i.e. with any vlan tag, or untagged)
+ /* If the network stack called us with vid = 0 then
+ * it is asking to receive priority tagged packets with
+ * vlan id 0. Our HW receives them by default when configured
+ * to receive untagged packets so there is no need to add an
+ * extra filter for vlan 0 tagged packets.
*/
- ret = i40e_vsi_add_vlan(vsi, vid ? vid : I40E_VLAN_ANY);
+ if (vid)
+ ret = i40e_vsi_add_vlan(vsi, vid);
if (!ret && (vid < VLAN_N_VID))
set_bit(vid, vsi->active_vlans);
@@ -2421,6 +2424,25 @@ static void i40e_set_vsi_rx_mode(struct i40e_vsi *vsi)
}
/**
+ * i40e_fdir_filter_restore - Restore the Sideband Flow Director filters
+ * @vsi: Pointer to the targeted VSI
+ *
+ * This function replays the hlist on the hw where all the SB Flow Director
+ * filters were saved.
+ **/
+static void i40e_fdir_filter_restore(struct i40e_vsi *vsi)
+{
+ struct i40e_fdir_filter *filter;
+ struct i40e_pf *pf = vsi->back;
+ struct hlist_node *node;
+
+ hlist_for_each_entry_safe(filter, node,
+ &pf->fdir_filter_list, fdir_node) {
+ i40e_add_del_fdir(vsi, filter, true);
+ }
+}
+
+/**
* i40e_vsi_configure - Set up the VSI for action
* @vsi: the VSI being configured
**/
@@ -2431,6 +2453,8 @@ static int i40e_vsi_configure(struct i40e_vsi *vsi)
i40e_set_vsi_rx_mode(vsi);
i40e_restore_vlan(vsi);
i40e_vsi_config_dcb_rings(vsi);
+ if (vsi->type == I40E_VSI_FDIR)
+ i40e_fdir_filter_restore(vsi);
err = i40e_vsi_configure_tx(vsi);
if (!err)
err = i40e_vsi_configure_rx(vsi);
@@ -4268,6 +4292,26 @@ err_setup_tx:
}
/**
+ * i40e_fdir_filter_exit - Cleans up the Flow Director accounting
+ * @pf: Pointer to pf
+ *
+ * This function destroys the hlist where all the Flow Director
+ * filters were saved.
+ **/
+static void i40e_fdir_filter_exit(struct i40e_pf *pf)
+{
+ struct i40e_fdir_filter *filter;
+ struct hlist_node *node2;
+
+ hlist_for_each_entry_safe(filter, node2,
+ &pf->fdir_filter_list, fdir_node) {
+ hlist_del(&filter->fdir_node);
+ kfree(filter);
+ }
+ pf->fdir_pf_active_filters = 0;
+}
+
+/**
* i40e_close - Disables a network interface
* @netdev: network interface device structure
*
@@ -5052,6 +5096,12 @@ static int i40e_get_capabilities(struct i40e_pf *pf)
/* increment MSI-X count because current FW skips one */
pf->hw.func_caps.num_msix_vectors++;
+ if (((pf->hw.aq.fw_maj_ver == 2) && (pf->hw.aq.fw_min_ver < 22)) ||
+ (pf->hw.aq.fw_maj_ver < 2)) {
+ pf->hw.func_caps.num_msix_vectors++;
+ pf->hw.func_caps.num_msix_vectors_vf++;
+ }
+
if (pf->hw.debug_mask & I40E_DEBUG_USER)
dev_info(&pf->pdev->dev,
"pf=%d, num_vfs=%d, msix_pf=%d, msix_vf=%d, fd_g=%d, fd_b=%d, pf_max_q=%d num_vsi=%d\n",
@@ -5131,9 +5181,9 @@ static void i40e_fdir_sb_setup(struct i40e_pf *pf)
err = i40e_up_complete(vsi);
if (err)
goto err_up_complete;
+ clear_bit(__I40E_NEEDS_RESTART, &vsi->state);
}
- clear_bit(__I40E_NEEDS_RESTART, &vsi->state);
return;
err_up_complete:
@@ -5156,6 +5206,7 @@ static void i40e_fdir_teardown(struct i40e_pf *pf)
{
int i;
+ i40e_fdir_filter_exit(pf);
for (i = 0; i < pf->hw.func_caps.num_vsis; i++) {
if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR) {
i40e_vsi_release(pf->vsi[i]);
@@ -7930,13 +7981,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
err = i40e_init_adminq(hw);
dev_info(&pdev->dev, "%s\n", i40e_fw_version_str(hw));
- if (((hw->nvm.version & I40E_NVM_VERSION_HI_MASK)
- >> I40E_NVM_VERSION_HI_SHIFT) != I40E_CURRENT_NVM_VERSION_HI) {
- dev_info(&pdev->dev,
- "warning: NVM version not supported, supported version: %02x.%02x\n",
- I40E_CURRENT_NVM_VERSION_HI,
- I40E_CURRENT_NVM_VERSION_LO);
- }
if (err) {
dev_info(&pdev->dev,
"init_adminq failed: %d expecting API %02x.%02x\n",
diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
index 73f95b081927..262bdf11d221 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_nvm.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
@@ -27,14 +27,14 @@
#include "i40e_prototype.h"
/**
- * i40e_init_nvm_ops - Initialize NVM function pointers.
- * @hw: pointer to the HW structure.
+ * i40e_init_nvm_ops - Initialize NVM function pointers
+ * @hw: pointer to the HW structure
*
- * Setups the function pointers and the NVM info structure. Should be called
- * once per NVM initialization, e.g. inside the i40e_init_shared_code().
- * Please notice that the NVM term is used here (& in all methods covered
- * in this file) as an equivalent of the FLASH part mapped into the SR.
- * We are accessing FLASH always thru the Shadow RAM.
+ * Setup the function pointers and the NVM info structure. Should be called
+ * once per NVM initialization, e.g. inside the i40e_init_shared_code().
+ * Please notice that the NVM term is used here (& in all methods covered
+ * in this file) as an equivalent of the FLASH part mapped into the SR.
+ * We are accessing FLASH always thru the Shadow RAM.
**/
i40e_status i40e_init_nvm(struct i40e_hw *hw)
{
@@ -49,16 +49,16 @@ i40e_status i40e_init_nvm(struct i40e_hw *hw)
gens = rd32(hw, I40E_GLNVM_GENS);
sr_size = ((gens & I40E_GLNVM_GENS_SR_SIZE_MASK) >>
I40E_GLNVM_GENS_SR_SIZE_SHIFT);
- /* Switching to words (sr_size contains power of 2KB). */
+ /* Switching to words (sr_size contains power of 2KB) */
nvm->sr_size = (1 << sr_size) * I40E_SR_WORDS_IN_1KB;
- /* Check if we are in the normal or blank NVM programming mode. */
+ /* Check if we are in the normal or blank NVM programming mode */
fla = rd32(hw, I40E_GLNVM_FLA);
- if (fla & I40E_GLNVM_FLA_LOCKED_MASK) { /* Normal programming mode. */
- /* Max NVM timeout. */
+ if (fla & I40E_GLNVM_FLA_LOCKED_MASK) { /* Normal programming mode */
+ /* Max NVM timeout */
nvm->timeout = I40E_MAX_NVM_TIMEOUT;
nvm->blank_nvm_mode = false;
- } else { /* Blank programming mode. */
+ } else { /* Blank programming mode */
nvm->blank_nvm_mode = true;
ret_code = I40E_ERR_NVM_BLANK_MODE;
hw_dbg(hw, "NVM init error: unsupported blank mode.\n");
@@ -68,12 +68,12 @@ i40e_status i40e_init_nvm(struct i40e_hw *hw)
}
/**
- * i40e_acquire_nvm - Generic request for acquiring the NVM ownership.
- * @hw: pointer to the HW structure.
- * @access: NVM access type (read or write).
+ * i40e_acquire_nvm - Generic request for acquiring the NVM ownership
+ * @hw: pointer to the HW structure
+ * @access: NVM access type (read or write)
*
- * This function will request NVM ownership for reading
- * via the proper Admin Command.
+ * This function will request NVM ownership for reading
+ * via the proper Admin Command.
**/
i40e_status i40e_acquire_nvm(struct i40e_hw *hw,
enum i40e_aq_resource_access_type access)
@@ -87,20 +87,20 @@ i40e_status i40e_acquire_nvm(struct i40e_hw *hw,
ret_code = i40e_aq_request_resource(hw, I40E_NVM_RESOURCE_ID, access,
0, &time, NULL);
- /* Reading the Global Device Timer. */
+ /* Reading the Global Device Timer */
gtime = rd32(hw, I40E_GLVFGEN_TIMER);
- /* Store the timeout. */
+ /* Store the timeout */
hw->nvm.hw_semaphore_timeout = I40E_MS_TO_GTIME(time) + gtime;
if (ret_code) {
- /* Set the polling timeout. */
+ /* Set the polling timeout */
if (time > I40E_MAX_NVM_TIMEOUT)
timeout = I40E_MS_TO_GTIME(I40E_MAX_NVM_TIMEOUT)
+ gtime;
else
timeout = hw->nvm.hw_semaphore_timeout;
- /* Poll until the current NVM owner timeouts. */
+ /* Poll until the current NVM owner timeouts */
while (gtime < timeout) {
usleep_range(10000, 20000);
ret_code = i40e_aq_request_resource(hw,
@@ -128,10 +128,10 @@ i40e_i40e_acquire_nvm_exit:
}
/**
- * i40e_release_nvm - Generic request for releasing the NVM ownership.
- * @hw: pointer to the HW structure.
+ * i40e_release_nvm - Generic request for releasing the NVM ownership
+ * @hw: pointer to the HW structure
*
- * This function will release NVM resource via the proper Admin Command.
+ * This function will release NVM resource via the proper Admin Command.
**/
void i40e_release_nvm(struct i40e_hw *hw)
{
@@ -140,17 +140,17 @@ void i40e_release_nvm(struct i40e_hw *hw)
}
/**
- * i40e_poll_sr_srctl_done_bit - Polls the GLNVM_SRCTL done bit.
- * @hw: pointer to the HW structure.
+ * i40e_poll_sr_srctl_done_bit - Polls the GLNVM_SRCTL done bit
+ * @hw: pointer to the HW structure
*
- * Polls the SRCTL Shadow RAM register done bit.
+ * Polls the SRCTL Shadow RAM register done bit.
**/
static i40e_status i40e_poll_sr_srctl_done_bit(struct i40e_hw *hw)
{
i40e_status ret_code = I40E_ERR_TIMEOUT;
u32 srctl, wait_cnt;
- /* Poll the I40E_GLNVM_SRCTL until the done bit is set. */
+ /* Poll the I40E_GLNVM_SRCTL until the done bit is set */
for (wait_cnt = 0; wait_cnt < I40E_SRRD_SRCTL_ATTEMPTS; wait_cnt++) {
srctl = rd32(hw, I40E_GLNVM_SRCTL);
if (srctl & I40E_GLNVM_SRCTL_DONE_MASK) {
@@ -165,12 +165,12 @@ static i40e_status i40e_poll_sr_srctl_done_bit(struct i40e_hw *hw)
}
/**
- * i40e_read_nvm_word - Reads Shadow RAM
- * @hw: pointer to the HW structure.
- * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF).
- * @data: word read from the Shadow RAM.
+ * i40e_read_nvm_word - Reads Shadow RAM
+ * @hw: pointer to the HW structure
+ * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF)
+ * @data: word read from the Shadow RAM
*
- * Reads 16 bit word from the Shadow RAM using the GLNVM_SRCTL register.
+ * Reads one 16 bit word from the Shadow RAM using the GLNVM_SRCTL register.
**/
i40e_status i40e_read_nvm_word(struct i40e_hw *hw, u16 offset,
u16 *data)
@@ -184,15 +184,15 @@ i40e_status i40e_read_nvm_word(struct i40e_hw *hw, u16 offset,
goto read_nvm_exit;
}
- /* Poll the done bit first. */
+ /* Poll the done bit first */
ret_code = i40e_poll_sr_srctl_done_bit(hw);
if (!ret_code) {
- /* Write the address and start reading. */
+ /* Write the address and start reading */
sr_reg = (u32)(offset << I40E_GLNVM_SRCTL_ADDR_SHIFT) |
(1 << I40E_GLNVM_SRCTL_START_SHIFT);
wr32(hw, I40E_GLNVM_SRCTL, sr_reg);
- /* Poll I40E_GLNVM_SRCTL until the done bit is set. */
+ /* Poll I40E_GLNVM_SRCTL until the done bit is set */
ret_code = i40e_poll_sr_srctl_done_bit(hw);
if (!ret_code) {
sr_reg = rd32(hw, I40E_GLNVM_SRDATA);
@@ -210,16 +210,15 @@ read_nvm_exit:
}
/**
- * i40e_read_nvm_buffer - Reads Shadow RAM buffer.
- * @hw: pointer to the HW structure.
- * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF).
- * @words: number of words to read (in) &
- * number of words read before the NVM ownership timeout (out).
- * @data: words read from the Shadow RAM.
+ * i40e_read_nvm_buffer - Reads Shadow RAM buffer
+ * @hw: pointer to the HW structure
+ * @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF).
+ * @words: (in) number of words to read; (out) number of words actually read
+ * @data: words read from the Shadow RAM
*
- * Reads 16 bit words (data buffer) from the SR using the i40e_read_nvm_srrd()
- * method. The buffer read is preceded by the NVM ownership take
- * and followed by the release.
+ * Reads 16 bit words (data buffer) from the SR using the i40e_read_nvm_srrd()
+ * method. The buffer read is preceded by the NVM ownership take
+ * and followed by the release.
**/
i40e_status i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset,
u16 *words, u16 *data)
@@ -227,7 +226,7 @@ i40e_status i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset,
i40e_status ret_code = 0;
u16 index, word;
- /* Loop thru the selected region. */
+ /* Loop thru the selected region */
for (word = 0; word < *words; word++) {
index = offset + word;
ret_code = i40e_read_nvm_word(hw, index, &data[word]);
@@ -235,21 +234,21 @@ i40e_status i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset,
break;
}
- /* Update the number of words read from the Shadow RAM. */
+ /* Update the number of words read from the Shadow RAM */
*words = word;
return ret_code;
}
/**
- * i40e_calc_nvm_checksum - Calculates and returns the checksum
- * @hw: pointer to hardware structure
- * @checksum: pointer to the checksum
+ * i40e_calc_nvm_checksum - Calculates and returns the checksum
+ * @hw: pointer to hardware structure
+ * @checksum: pointer to the checksum
*
- * This function calculate SW Checksum that covers the whole 64kB shadow RAM
- * except the VPD and PCIe ALT Auto-load modules. The structure and size of VPD
- * is customer specific and unknown. Therefore, this function skips all maximum
- * possible size of VPD (1kB).
+ * This function calculates SW Checksum that covers the whole 64kB shadow RAM
+ * except the VPD and PCIe ALT Auto-load modules. The structure and size of VPD
+ * is customer specific and unknown. Therefore, this function skips all maximum
+ * possible size of VPD (1kB).
**/
static i40e_status i40e_calc_nvm_checksum(struct i40e_hw *hw,
u16 *checksum)
@@ -311,12 +310,12 @@ i40e_calc_nvm_checksum_exit:
}
/**
- * i40e_validate_nvm_checksum - Validate EEPROM checksum
- * @hw: pointer to hardware structure
- * @checksum: calculated checksum
+ * i40e_validate_nvm_checksum - Validate EEPROM checksum
+ * @hw: pointer to hardware structure
+ * @checksum: calculated checksum
*
- * Performs checksum calculation and validates the NVM SW checksum. If the
- * caller does not need checksum, the value can be NULL.
+ * Performs checksum calculation and validates the NVM SW checksum. If the
+ * caller does not need checksum, the value can be NULL.
**/
i40e_status i40e_validate_nvm_checksum(struct i40e_hw *hw,
u16 *checksum)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index ed91f93ede2b..9cd57e617959 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -231,6 +231,13 @@ i40e_status i40e_validate_nvm_checksum(struct i40e_hw *hw,
u16 *checksum);
void i40e_set_pci_config_data(struct i40e_hw *hw, u16 link_status);
+extern struct i40e_rx_ptype_decoded i40e_ptype_lookup[];
+
+static inline struct i40e_rx_ptype_decoded decode_rx_desc_ptype(u8 ptype)
+{
+ return i40e_ptype_lookup[ptype];
+}
+
/* prototype for functions used for SW locks */
/* i40e_common for VF drivers*/
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 19af4ce0a4fe..2081bdb214e5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -25,6 +25,7 @@
******************************************************************************/
#include "i40e.h"
+#include "i40e_prototype.h"
static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
u32 td_tag)
@@ -39,11 +40,12 @@ static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
#define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
/**
* i40e_program_fdir_filter - Program a Flow Director filter
- * @fdir_input: Packet data that will be filter parameters
+ * @fdir_data: Packet data that will be filter parameters
+ * @raw_packet: the pre-allocated packet buffer for FDir
* @pf: The pf pointer
* @add: True for add/update, False for remove
**/
-int i40e_program_fdir_filter(struct i40e_fdir_data *fdir_data,
+int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet,
struct i40e_pf *pf, bool add)
{
struct i40e_filter_program_desc *fdir_desc;
@@ -68,8 +70,8 @@ int i40e_program_fdir_filter(struct i40e_fdir_data *fdir_data,
tx_ring = vsi->tx_rings[0];
dev = tx_ring->dev;
- dma = dma_map_single(dev, fdir_data->raw_packet,
- I40E_FDIR_MAX_RAW_PACKET_LOOKUP, DMA_TO_DEVICE);
+ dma = dma_map_single(dev, raw_packet,
+ I40E_FDIR_MAX_RAW_PACKET_SIZE, DMA_TO_DEVICE);
if (dma_mapping_error(dev, dma))
goto dma_fail;
@@ -132,14 +134,14 @@ int i40e_program_fdir_filter(struct i40e_fdir_data *fdir_data,
tx_ring->next_to_use = (i + 1 < tx_ring->count) ? i + 1 : 0;
/* record length, and DMA address */
- dma_unmap_len_set(tx_buf, len, I40E_FDIR_MAX_RAW_PACKET_LOOKUP);
+ dma_unmap_len_set(tx_buf, len, I40E_FDIR_MAX_RAW_PACKET_SIZE);
dma_unmap_addr_set(tx_buf, dma, dma);
tx_desc->buffer_addr = cpu_to_le64(dma);
td_cmd = I40E_TXD_CMD | I40E_TX_DESC_CMD_DUMMY;
tx_desc->cmd_type_offset_bsz =
- build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_LOOKUP, 0);
+ build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_SIZE, 0);
/* set the timestamp */
tx_buf->time_stamp = jiffies;
@@ -161,6 +163,270 @@ dma_fail:
return -1;
}
+#define IP_HEADER_OFFSET 14
+#define I40E_UDPIP_DUMMY_PACKET_LEN 42
+/**
+ * i40e_add_del_fdir_udpv4 - Add/Remove UDPv4 filters
+ * @vsi: pointer to the targeted VSI
+ * @fd_data: the flow director data required for the FDir descriptor
+ * @raw_packet: the pre-allocated packet buffer for FDir
+ * @add: true adds a filter, false removes it
+ *
+ * Returns 0 if the filters were successfully added or removed
+ **/
+static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi,
+ struct i40e_fdir_filter *fd_data,
+ u8 *raw_packet, bool add)
+{
+ struct i40e_pf *pf = vsi->back;
+ struct udphdr *udp;
+ struct iphdr *ip;
+ bool err = false;
+ int ret;
+ int i;
+ static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
+ 0x45, 0, 0, 0x1c, 0, 0, 0x40, 0, 0x40, 0x11, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+ memcpy(raw_packet, packet, I40E_UDPIP_DUMMY_PACKET_LEN);
+
+ ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
+ udp = (struct udphdr *)(raw_packet + IP_HEADER_OFFSET
+ + sizeof(struct iphdr));
+
+ ip->daddr = fd_data->dst_ip[0];
+ udp->dest = fd_data->dst_port;
+ ip->saddr = fd_data->src_ip[0];
+ udp->source = fd_data->src_port;
+
+ for (i = I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP;
+ i <= I40E_FILTER_PCTYPE_NONF_IPV4_UDP; i++) {
+ fd_data->pctype = i;
+ ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
+
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+ "Filter command send failed for PCTYPE %d (ret = %d)\n",
+ fd_data->pctype, ret);
+ err = true;
+ } else {
+ dev_info(&pf->pdev->dev,
+ "Filter OK for PCTYPE %d (ret = %d)\n",
+ fd_data->pctype, ret);
+ }
+ }
+
+ return err ? -EOPNOTSUPP : 0;
+}
+
+#define I40E_TCPIP_DUMMY_PACKET_LEN 54
+/**
+ * i40e_add_del_fdir_tcpv4 - Add/Remove TCPv4 filters
+ * @vsi: pointer to the targeted VSI
+ * @fd_data: the flow director data required for the FDir descriptor
+ * @raw_packet: the pre-allocated packet buffer for FDir
+ * @add: true adds a filter, false removes it
+ *
+ * Returns 0 if the filters were successfully added or removed
+ **/
+static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
+ struct i40e_fdir_filter *fd_data,
+ u8 *raw_packet, bool add)
+{
+ struct i40e_pf *pf = vsi->back;
+ struct tcphdr *tcp;
+ struct iphdr *ip;
+ bool err = false;
+ int ret;
+ /* Dummy packet */
+ static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
+ 0x45, 0, 0, 0x28, 0, 0, 0x40, 0, 0x40, 0x6, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x80, 0x11,
+ 0x0, 0x72, 0, 0, 0, 0};
+
+ memcpy(raw_packet, packet, I40E_TCPIP_DUMMY_PACKET_LEN);
+
+ ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
+ tcp = (struct tcphdr *)(raw_packet + IP_HEADER_OFFSET
+ + sizeof(struct iphdr));
+
+ ip->daddr = fd_data->dst_ip[0];
+ tcp->dest = fd_data->dst_port;
+ ip->saddr = fd_data->src_ip[0];
+ tcp->source = fd_data->src_port;
+
+ if (add) {
+ if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) {
+ dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n");
+ pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED;
+ }
+ }
+
+ fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN;
+ ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
+
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+ "Filter command send failed for PCTYPE %d (ret = %d)\n",
+ fd_data->pctype, ret);
+ err = true;
+ } else {
+ dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d (ret = %d)\n",
+ fd_data->pctype, ret);
+ }
+
+ fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
+
+ ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+ "Filter command send failed for PCTYPE %d (ret = %d)\n",
+ fd_data->pctype, ret);
+ err = true;
+ } else {
+ dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d (ret = %d)\n",
+ fd_data->pctype, ret);
+ }
+
+ return err ? -EOPNOTSUPP : 0;
+}
+
+/**
+ * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for
+ * a specific flow spec
+ * @vsi: pointer to the targeted VSI
+ * @fd_data: the flow director data required for the FDir descriptor
+ * @raw_packet: the pre-allocated packet buffer for FDir
+ * @add: true adds a filter, false removes it
+ *
+ * Returns 0 if the filters were successfully added or removed
+ **/
+static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi,
+ struct i40e_fdir_filter *fd_data,
+ u8 *raw_packet, bool add)
+{
+ return -EOPNOTSUPP;
+}
+
+#define I40E_IP_DUMMY_PACKET_LEN 34
+/**
+ * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for
+ * a specific flow spec
+ * @vsi: pointer to the targeted VSI
+ * @fd_data: the flow director data required for the FDir descriptor
+ * @raw_packet: the pre-allocated packet buffer for FDir
+ * @add: true adds a filter, false removes it
+ *
+ * Returns 0 if the filters were successfully added or removed
+ **/
+static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi,
+ struct i40e_fdir_filter *fd_data,
+ u8 *raw_packet, bool add)
+{
+ struct i40e_pf *pf = vsi->back;
+ struct iphdr *ip;
+ bool err = false;
+ int ret;
+ int i;
+ static char packet[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0,
+ 0x45, 0, 0, 0x14, 0, 0, 0x40, 0, 0x40, 0x10, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0};
+
+ memcpy(raw_packet, packet, I40E_IP_DUMMY_PACKET_LEN);
+ ip = (struct iphdr *)(raw_packet + IP_HEADER_OFFSET);
+
+ ip->saddr = fd_data->src_ip[0];
+ ip->daddr = fd_data->dst_ip[0];
+ ip->protocol = 0;
+
+ for (i = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
+ i <= I40E_FILTER_PCTYPE_FRAG_IPV4; i++) {
+ fd_data->pctype = i;
+ ret = i40e_program_fdir_filter(fd_data, raw_packet, pf, add);
+
+ if (ret) {
+ dev_info(&pf->pdev->dev,
+ "Filter command send failed for PCTYPE %d (ret = %d)\n",
+ fd_data->pctype, ret);
+ err = true;
+ } else {
+ dev_info(&pf->pdev->dev,
+ "Filter OK for PCTYPE %d (ret = %d)\n",
+ fd_data->pctype, ret);
+ }
+ }
+
+ return err ? -EOPNOTSUPP : 0;
+}
+
+/**
+ * i40e_add_del_fdir - Build raw packets to add/del fdir filter
+ * @vsi: pointer to the targeted VSI
+ * @cmd: command to get or set RX flow classification rules
+ * @add: true adds a filter, false removes it
+ *
+ **/
+int i40e_add_del_fdir(struct i40e_vsi *vsi,
+ struct i40e_fdir_filter *input, bool add)
+{
+ struct i40e_pf *pf = vsi->back;
+ u8 *raw_packet;
+ int ret;
+
+ /* Populate the Flow Director that we have at the moment
+ * and allocate the raw packet buffer for the calling functions
+ */
+ raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, GFP_KERNEL);
+ if (!raw_packet)
+ return -ENOMEM;
+
+ switch (input->flow_type & ~FLOW_EXT) {
+ case TCP_V4_FLOW:
+ ret = i40e_add_del_fdir_tcpv4(vsi, input, raw_packet,
+ add);
+ break;
+ case UDP_V4_FLOW:
+ ret = i40e_add_del_fdir_udpv4(vsi, input, raw_packet,
+ add);
+ break;
+ case SCTP_V4_FLOW:
+ ret = i40e_add_del_fdir_sctpv4(vsi, input, raw_packet,
+ add);
+ break;
+ case IPV4_FLOW:
+ ret = i40e_add_del_fdir_ipv4(vsi, input, raw_packet,
+ add);
+ break;
+ case IP_USER_FLOW:
+ switch (input->ip4_proto) {
+ case IPPROTO_TCP:
+ ret = i40e_add_del_fdir_tcpv4(vsi, input,
+ raw_packet, add);
+ break;
+ case IPPROTO_UDP:
+ ret = i40e_add_del_fdir_udpv4(vsi, input,
+ raw_packet, add);
+ break;
+ case IPPROTO_SCTP:
+ ret = i40e_add_del_fdir_sctpv4(vsi, input,
+ raw_packet, add);
+ break;
+ default:
+ ret = i40e_add_del_fdir_ipv4(vsi, input,
+ raw_packet, add);
+ break;
+ }
+ break;
+ default:
+ dev_info(&pf->pdev->dev, "Could not specify spec type %d",
+ input->flow_type);
+ ret = -EINVAL;
+ }
+
+ kfree(raw_packet);
+ return ret;
+}
+
/**
* i40e_fd_handle_status - check the Programming Status for FD
* @rx_ring: the Rx ring for this descriptor
@@ -956,6 +1222,29 @@ static inline u32 i40e_rx_hash(struct i40e_ring *ring,
}
/**
+ * i40e_ptype_to_hash - get a hash type
+ * @ptype: the ptype value from the descriptor
+ *
+ * Returns a hash type to be used by skb_set_hash
+ **/
+static inline enum pkt_hash_types i40e_ptype_to_hash(u8 ptype)
+{
+ struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype);
+
+ if (!decoded.known)
+ return PKT_HASH_TYPE_NONE;
+
+ if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
+ decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4)
+ return PKT_HASH_TYPE_L4;
+ else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
+ decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3)
+ return PKT_HASH_TYPE_L3;
+ else
+ return PKT_HASH_TYPE_L2;
+}
+
+/**
* i40e_clean_rx_irq - Reclaim resources after receive completes
* @rx_ring: rx ring to clean
* @budget: how many cleans we're allowed
@@ -972,8 +1261,8 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
u16 i = rx_ring->next_to_clean;
union i40e_rx_desc *rx_desc;
u32 rx_error, rx_status;
+ u8 rx_ptype;
u64 qword;
- u16 rx_ptype;
rx_desc = I40E_RX_DESC(rx_ring, i);
qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
@@ -1087,7 +1376,8 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
goto next_desc;
}
- skb->rxhash = i40e_rx_hash(rx_ring, rx_desc);
+ skb_set_hash(skb, i40e_rx_hash(rx_ring, rx_desc),
+ i40e_ptype_to_hash(rx_ptype));
if (unlikely(rx_status & I40E_RXD_QW1_STATUS_TSYNVALID_MASK)) {
i40e_ptp_rx_hwtstamp(vsi->back, skb, (rx_status &
I40E_RXD_QW1_STATUS_TSYNINDX_MASK) >>
@@ -1812,7 +2102,7 @@ static int i40e_xmit_descriptor_count(struct sk_buff *skb,
/* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
* + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
- * + 2 desc gap to keep tail from touching head,
+ * + 4 desc gap to avoid the cache line where head is,
* + 1 desc for context descriptor,
* otherwise try next time
*/
@@ -1823,7 +2113,7 @@ static int i40e_xmit_descriptor_count(struct sk_buff *skb,
count += skb_shinfo(skb)->nr_frags;
#endif
count += TXD_USE_COUNT(skb_headlen(skb));
- if (i40e_maybe_stop_tx(tx_ring, count + 3)) {
+ if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) {
tx_ring->tx_stats.tx_busy++;
return 0;
}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
index 189e250198dd..42cc6ba88005 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
@@ -858,7 +858,7 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs)
}
}
/* allocate memory */
- vfs = kzalloc(num_alloc_vfs * sizeof(struct i40e_vf), GFP_KERNEL);
+ vfs = kcalloc(num_alloc_vfs, sizeof(struct i40e_vf), GFP_KERNEL);
if (!vfs) {
ret = -ENOMEM;
goto err_alloc;
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c b/drivers/net/ethernet/intel/i40evf/i40e_common.c
index 7b13953b28c4..78618af271cf 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_common.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c
@@ -160,6 +160,372 @@ i40e_status i40evf_aq_queue_shutdown(struct i40e_hw *hw,
}
+/* The i40e_ptype_lookup table is used to convert from the 8-bit ptype in the
+ * hardware to a bit-field that can be used by SW to more easily determine the
+ * packet type.
+ *
+ * Macros are used to shorten the table lines and make this table human
+ * readable.
+ *
+ * We store the PTYPE in the top byte of the bit field - this is just so that
+ * we can check that the table doesn't have a row missing, as the index into
+ * the table should be the PTYPE.
+ *
+ * Typical work flow:
+ *
+ * IF NOT i40e_ptype_lookup[ptype].known
+ * THEN
+ * Packet is unknown
+ * ELSE IF i40e_ptype_lookup[ptype].outer_ip == I40E_RX_PTYPE_OUTER_IP
+ * Use the rest of the fields to look at the tunnels, inner protocols, etc
+ * ELSE
+ * Use the enum i40e_rx_l2_ptype to decode the packet type
+ * ENDIF
+ */
+
+/* macro to make the table lines short */
+#define I40E_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\
+ { PTYPE, \
+ 1, \
+ I40E_RX_PTYPE_OUTER_##OUTER_IP, \
+ I40E_RX_PTYPE_OUTER_##OUTER_IP_VER, \
+ I40E_RX_PTYPE_##OUTER_FRAG, \
+ I40E_RX_PTYPE_TUNNEL_##T, \
+ I40E_RX_PTYPE_TUNNEL_END_##TE, \
+ I40E_RX_PTYPE_##TEF, \
+ I40E_RX_PTYPE_INNER_PROT_##I, \
+ I40E_RX_PTYPE_PAYLOAD_LAYER_##PL }
+
+#define I40E_PTT_UNUSED_ENTRY(PTYPE) \
+ { PTYPE, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
+
+/* shorter macros makes the table fit but are terse */
+#define I40E_RX_PTYPE_NOF I40E_RX_PTYPE_NOT_FRAG
+#define I40E_RX_PTYPE_FRG I40E_RX_PTYPE_FRAG
+#define I40E_RX_PTYPE_INNER_PROT_TS I40E_RX_PTYPE_INNER_PROT_TIMESYNC
+
+/* Lookup table mapping the HW PTYPE to the bit field for decoding */
+struct i40e_rx_ptype_decoded i40e_ptype_lookup[] = {
+ /* L2 Packet types */
+ I40E_PTT_UNUSED_ENTRY(0),
+ I40E_PTT(1, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+ I40E_PTT(2, L2, NONE, NOF, NONE, NONE, NOF, TS, PAY2),
+ I40E_PTT(3, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+ I40E_PTT_UNUSED_ENTRY(4),
+ I40E_PTT_UNUSED_ENTRY(5),
+ I40E_PTT(6, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+ I40E_PTT(7, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+ I40E_PTT_UNUSED_ENTRY(8),
+ I40E_PTT_UNUSED_ENTRY(9),
+ I40E_PTT(10, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2),
+ I40E_PTT(11, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE),
+ I40E_PTT(12, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(13, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(14, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(15, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(16, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(17, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(18, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(19, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(20, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(21, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY3),
+
+ /* Non Tunneled IPv4 */
+ I40E_PTT(22, IP, IPV4, FRG, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(23, IP, IPV4, NOF, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(24, IP, IPV4, NOF, NONE, NONE, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(25),
+ I40E_PTT(26, IP, IPV4, NOF, NONE, NONE, NOF, TCP, PAY4),
+ I40E_PTT(27, IP, IPV4, NOF, NONE, NONE, NOF, SCTP, PAY4),
+ I40E_PTT(28, IP, IPV4, NOF, NONE, NONE, NOF, ICMP, PAY4),
+
+ /* IPv4 --> IPv4 */
+ I40E_PTT(29, IP, IPV4, NOF, IP_IP, IPV4, FRG, NONE, PAY3),
+ I40E_PTT(30, IP, IPV4, NOF, IP_IP, IPV4, NOF, NONE, PAY3),
+ I40E_PTT(31, IP, IPV4, NOF, IP_IP, IPV4, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(32),
+ I40E_PTT(33, IP, IPV4, NOF, IP_IP, IPV4, NOF, TCP, PAY4),
+ I40E_PTT(34, IP, IPV4, NOF, IP_IP, IPV4, NOF, SCTP, PAY4),
+ I40E_PTT(35, IP, IPV4, NOF, IP_IP, IPV4, NOF, ICMP, PAY4),
+
+ /* IPv4 --> IPv6 */
+ I40E_PTT(36, IP, IPV4, NOF, IP_IP, IPV6, FRG, NONE, PAY3),
+ I40E_PTT(37, IP, IPV4, NOF, IP_IP, IPV6, NOF, NONE, PAY3),
+ I40E_PTT(38, IP, IPV4, NOF, IP_IP, IPV6, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(39),
+ I40E_PTT(40, IP, IPV4, NOF, IP_IP, IPV6, NOF, TCP, PAY4),
+ I40E_PTT(41, IP, IPV4, NOF, IP_IP, IPV6, NOF, SCTP, PAY4),
+ I40E_PTT(42, IP, IPV4, NOF, IP_IP, IPV6, NOF, ICMP, PAY4),
+
+ /* IPv4 --> GRE/NAT */
+ I40E_PTT(43, IP, IPV4, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3),
+
+ /* IPv4 --> GRE/NAT --> IPv4 */
+ I40E_PTT(44, IP, IPV4, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3),
+ I40E_PTT(45, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3),
+ I40E_PTT(46, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(47),
+ I40E_PTT(48, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, TCP, PAY4),
+ I40E_PTT(49, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4),
+ I40E_PTT(50, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4),
+
+ /* IPv4 --> GRE/NAT --> IPv6 */
+ I40E_PTT(51, IP, IPV4, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3),
+ I40E_PTT(52, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3),
+ I40E_PTT(53, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(54),
+ I40E_PTT(55, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, TCP, PAY4),
+ I40E_PTT(56, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4),
+ I40E_PTT(57, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4),
+
+ /* IPv4 --> GRE/NAT --> MAC */
+ I40E_PTT(58, IP, IPV4, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3),
+
+ /* IPv4 --> GRE/NAT --> MAC --> IPv4 */
+ I40E_PTT(59, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3),
+ I40E_PTT(60, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3),
+ I40E_PTT(61, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(62),
+ I40E_PTT(63, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP, PAY4),
+ I40E_PTT(64, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4),
+ I40E_PTT(65, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4),
+
+ /* IPv4 --> GRE/NAT -> MAC --> IPv6 */
+ I40E_PTT(66, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3),
+ I40E_PTT(67, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3),
+ I40E_PTT(68, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(69),
+ I40E_PTT(70, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP, PAY4),
+ I40E_PTT(71, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4),
+ I40E_PTT(72, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4),
+
+ /* IPv4 --> GRE/NAT --> MAC/VLAN */
+ I40E_PTT(73, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3),
+
+ /* IPv4 ---> GRE/NAT -> MAC/VLAN --> IPv4 */
+ I40E_PTT(74, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3),
+ I40E_PTT(75, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3),
+ I40E_PTT(76, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(77),
+ I40E_PTT(78, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP, PAY4),
+ I40E_PTT(79, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4),
+ I40E_PTT(80, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4),
+
+ /* IPv4 -> GRE/NAT -> MAC/VLAN --> IPv6 */
+ I40E_PTT(81, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3),
+ I40E_PTT(82, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3),
+ I40E_PTT(83, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(84),
+ I40E_PTT(85, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP, PAY4),
+ I40E_PTT(86, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4),
+ I40E_PTT(87, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
+
+ /* Non Tunneled IPv6 */
+ I40E_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3),
+ I40E_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP, PAY3),
+ I40E_PTT_UNUSED_ENTRY(91),
+ I40E_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP, PAY4),
+ I40E_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4),
+ I40E_PTT(94, IP, IPV6, NOF, NONE, NONE, NOF, ICMP, PAY4),
+
+ /* IPv6 --> IPv4 */
+ I40E_PTT(95, IP, IPV6, NOF, IP_IP, IPV4, FRG, NONE, PAY3),
+ I40E_PTT(96, IP, IPV6, NOF, IP_IP, IPV4, NOF, NONE, PAY3),
+ I40E_PTT(97, IP, IPV6, NOF, IP_IP, IPV4, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(98),
+ I40E_PTT(99, IP, IPV6, NOF, IP_IP, IPV4, NOF, TCP, PAY4),
+ I40E_PTT(100, IP, IPV6, NOF, IP_IP, IPV4, NOF, SCTP, PAY4),
+ I40E_PTT(101, IP, IPV6, NOF, IP_IP, IPV4, NOF, ICMP, PAY4),
+
+ /* IPv6 --> IPv6 */
+ I40E_PTT(102, IP, IPV6, NOF, IP_IP, IPV6, FRG, NONE, PAY3),
+ I40E_PTT(103, IP, IPV6, NOF, IP_IP, IPV6, NOF, NONE, PAY3),
+ I40E_PTT(104, IP, IPV6, NOF, IP_IP, IPV6, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(105),
+ I40E_PTT(106, IP, IPV6, NOF, IP_IP, IPV6, NOF, TCP, PAY4),
+ I40E_PTT(107, IP, IPV6, NOF, IP_IP, IPV6, NOF, SCTP, PAY4),
+ I40E_PTT(108, IP, IPV6, NOF, IP_IP, IPV6, NOF, ICMP, PAY4),
+
+ /* IPv6 --> GRE/NAT */
+ I40E_PTT(109, IP, IPV6, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3),
+
+ /* IPv6 --> GRE/NAT -> IPv4 */
+ I40E_PTT(110, IP, IPV6, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3),
+ I40E_PTT(111, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3),
+ I40E_PTT(112, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(113),
+ I40E_PTT(114, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, TCP, PAY4),
+ I40E_PTT(115, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4),
+ I40E_PTT(116, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4),
+
+ /* IPv6 --> GRE/NAT -> IPv6 */
+ I40E_PTT(117, IP, IPV6, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3),
+ I40E_PTT(118, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3),
+ I40E_PTT(119, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(120),
+ I40E_PTT(121, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, TCP, PAY4),
+ I40E_PTT(122, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4),
+ I40E_PTT(123, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4),
+
+ /* IPv6 --> GRE/NAT -> MAC */
+ I40E_PTT(124, IP, IPV6, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3),
+
+ /* IPv6 --> GRE/NAT -> MAC -> IPv4 */
+ I40E_PTT(125, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3),
+ I40E_PTT(126, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3),
+ I40E_PTT(127, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(128),
+ I40E_PTT(129, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP, PAY4),
+ I40E_PTT(130, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4),
+ I40E_PTT(131, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4),
+
+ /* IPv6 --> GRE/NAT -> MAC -> IPv6 */
+ I40E_PTT(132, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3),
+ I40E_PTT(133, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3),
+ I40E_PTT(134, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(135),
+ I40E_PTT(136, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP, PAY4),
+ I40E_PTT(137, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4),
+ I40E_PTT(138, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4),
+
+ /* IPv6 --> GRE/NAT -> MAC/VLAN */
+ I40E_PTT(139, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3),
+
+ /* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv4 */
+ I40E_PTT(140, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3),
+ I40E_PTT(141, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3),
+ I40E_PTT(142, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(143),
+ I40E_PTT(144, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP, PAY4),
+ I40E_PTT(145, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4),
+ I40E_PTT(146, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4),
+
+ /* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv6 */
+ I40E_PTT(147, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3),
+ I40E_PTT(148, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3),
+ I40E_PTT(149, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP, PAY4),
+ I40E_PTT_UNUSED_ENTRY(150),
+ I40E_PTT(151, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP, PAY4),
+ I40E_PTT(152, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4),
+ I40E_PTT(153, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4),
+
+ /* unused entries */
+ I40E_PTT_UNUSED_ENTRY(154),
+ I40E_PTT_UNUSED_ENTRY(155),
+ I40E_PTT_UNUSED_ENTRY(156),
+ I40E_PTT_UNUSED_ENTRY(157),
+ I40E_PTT_UNUSED_ENTRY(158),
+ I40E_PTT_UNUSED_ENTRY(159),
+
+ I40E_PTT_UNUSED_ENTRY(160),
+ I40E_PTT_UNUSED_ENTRY(161),
+ I40E_PTT_UNUSED_ENTRY(162),
+ I40E_PTT_UNUSED_ENTRY(163),
+ I40E_PTT_UNUSED_ENTRY(164),
+ I40E_PTT_UNUSED_ENTRY(165),
+ I40E_PTT_UNUSED_ENTRY(166),
+ I40E_PTT_UNUSED_ENTRY(167),
+ I40E_PTT_UNUSED_ENTRY(168),
+ I40E_PTT_UNUSED_ENTRY(169),
+
+ I40E_PTT_UNUSED_ENTRY(170),
+ I40E_PTT_UNUSED_ENTRY(171),
+ I40E_PTT_UNUSED_ENTRY(172),
+ I40E_PTT_UNUSED_ENTRY(173),
+ I40E_PTT_UNUSED_ENTRY(174),
+ I40E_PTT_UNUSED_ENTRY(175),
+ I40E_PTT_UNUSED_ENTRY(176),
+ I40E_PTT_UNUSED_ENTRY(177),
+ I40E_PTT_UNUSED_ENTRY(178),
+ I40E_PTT_UNUSED_ENTRY(179),
+
+ I40E_PTT_UNUSED_ENTRY(180),
+ I40E_PTT_UNUSED_ENTRY(181),
+ I40E_PTT_UNUSED_ENTRY(182),
+ I40E_PTT_UNUSED_ENTRY(183),
+ I40E_PTT_UNUSED_ENTRY(184),
+ I40E_PTT_UNUSED_ENTRY(185),
+ I40E_PTT_UNUSED_ENTRY(186),
+ I40E_PTT_UNUSED_ENTRY(187),
+ I40E_PTT_UNUSED_ENTRY(188),
+ I40E_PTT_UNUSED_ENTRY(189),
+
+ I40E_PTT_UNUSED_ENTRY(190),
+ I40E_PTT_UNUSED_ENTRY(191),
+ I40E_PTT_UNUSED_ENTRY(192),
+ I40E_PTT_UNUSED_ENTRY(193),
+ I40E_PTT_UNUSED_ENTRY(194),
+ I40E_PTT_UNUSED_ENTRY(195),
+ I40E_PTT_UNUSED_ENTRY(196),
+ I40E_PTT_UNUSED_ENTRY(197),
+ I40E_PTT_UNUSED_ENTRY(198),
+ I40E_PTT_UNUSED_ENTRY(199),
+
+ I40E_PTT_UNUSED_ENTRY(200),
+ I40E_PTT_UNUSED_ENTRY(201),
+ I40E_PTT_UNUSED_ENTRY(202),
+ I40E_PTT_UNUSED_ENTRY(203),
+ I40E_PTT_UNUSED_ENTRY(204),
+ I40E_PTT_UNUSED_ENTRY(205),
+ I40E_PTT_UNUSED_ENTRY(206),
+ I40E_PTT_UNUSED_ENTRY(207),
+ I40E_PTT_UNUSED_ENTRY(208),
+ I40E_PTT_UNUSED_ENTRY(209),
+
+ I40E_PTT_UNUSED_ENTRY(210),
+ I40E_PTT_UNUSED_ENTRY(211),
+ I40E_PTT_UNUSED_ENTRY(212),
+ I40E_PTT_UNUSED_ENTRY(213),
+ I40E_PTT_UNUSED_ENTRY(214),
+ I40E_PTT_UNUSED_ENTRY(215),
+ I40E_PTT_UNUSED_ENTRY(216),
+ I40E_PTT_UNUSED_ENTRY(217),
+ I40E_PTT_UNUSED_ENTRY(218),
+ I40E_PTT_UNUSED_ENTRY(219),
+
+ I40E_PTT_UNUSED_ENTRY(220),
+ I40E_PTT_UNUSED_ENTRY(221),
+ I40E_PTT_UNUSED_ENTRY(222),
+ I40E_PTT_UNUSED_ENTRY(223),
+ I40E_PTT_UNUSED_ENTRY(224),
+ I40E_PTT_UNUSED_ENTRY(225),
+ I40E_PTT_UNUSED_ENTRY(226),
+ I40E_PTT_UNUSED_ENTRY(227),
+ I40E_PTT_UNUSED_ENTRY(228),
+ I40E_PTT_UNUSED_ENTRY(229),
+
+ I40E_PTT_UNUSED_ENTRY(230),
+ I40E_PTT_UNUSED_ENTRY(231),
+ I40E_PTT_UNUSED_ENTRY(232),
+ I40E_PTT_UNUSED_ENTRY(233),
+ I40E_PTT_UNUSED_ENTRY(234),
+ I40E_PTT_UNUSED_ENTRY(235),
+ I40E_PTT_UNUSED_ENTRY(236),
+ I40E_PTT_UNUSED_ENTRY(237),
+ I40E_PTT_UNUSED_ENTRY(238),
+ I40E_PTT_UNUSED_ENTRY(239),
+
+ I40E_PTT_UNUSED_ENTRY(240),
+ I40E_PTT_UNUSED_ENTRY(241),
+ I40E_PTT_UNUSED_ENTRY(242),
+ I40E_PTT_UNUSED_ENTRY(243),
+ I40E_PTT_UNUSED_ENTRY(244),
+ I40E_PTT_UNUSED_ENTRY(245),
+ I40E_PTT_UNUSED_ENTRY(246),
+ I40E_PTT_UNUSED_ENTRY(247),
+ I40E_PTT_UNUSED_ENTRY(248),
+ I40E_PTT_UNUSED_ENTRY(249),
+
+ I40E_PTT_UNUSED_ENTRY(250),
+ I40E_PTT_UNUSED_ENTRY(251),
+ I40E_PTT_UNUSED_ENTRY(252),
+ I40E_PTT_UNUSED_ENTRY(253),
+ I40E_PTT_UNUSED_ENTRY(254),
+ I40E_PTT_UNUSED_ENTRY(255)
+};
+
+
/**
* i40e_aq_send_msg_to_pf
* @hw: pointer to the hardware structure
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
index 7841573a58c9..33c99051cc96 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_prototype.h
@@ -63,6 +63,13 @@ i40e_status i40evf_aq_queue_shutdown(struct i40e_hw *hw,
i40e_status i40e_set_mac_type(struct i40e_hw *hw);
+extern struct i40e_rx_ptype_decoded i40e_ptype_lookup[];
+
+static inline struct i40e_rx_ptype_decoded decode_rx_desc_ptype(u8 ptype)
+{
+ return i40e_ptype_lookup[ptype];
+}
+
/* prototype for functions used for SW locks */
/* i40e_common for VF drivers*/
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index 827bb5fa4af9..b1d87c6a5c35 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -24,6 +24,7 @@
#include <linux/prefetch.h>
#include "i40evf.h"
+#include "i40e_prototype.h"
static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
u32 td_tag)
@@ -786,6 +787,29 @@ static inline u32 i40e_rx_hash(struct i40e_ring *ring,
}
/**
+ * i40e_ptype_to_hash - get a hash type
+ * @ptype: the ptype value from the descriptor
+ *
+ * Returns a hash type to be used by skb_set_hash
+ **/
+static inline enum pkt_hash_types i40e_ptype_to_hash(u8 ptype)
+{
+ struct i40e_rx_ptype_decoded decoded = decode_rx_desc_ptype(ptype);
+
+ if (!decoded.known)
+ return PKT_HASH_TYPE_NONE;
+
+ if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
+ decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4)
+ return PKT_HASH_TYPE_L4;
+ else if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
+ decoded.payload_layer == I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3)
+ return PKT_HASH_TYPE_L3;
+ else
+ return PKT_HASH_TYPE_L2;
+}
+
+/**
* i40e_clean_rx_irq - Reclaim resources after receive completes
* @rx_ring: rx ring to clean
* @budget: how many cleans we're allowed
@@ -802,8 +826,8 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
u16 i = rx_ring->next_to_clean;
union i40e_rx_desc *rx_desc;
u32 rx_error, rx_status;
+ u8 rx_ptype;
u64 qword;
- u16 rx_ptype;
rx_desc = I40E_RX_DESC(rx_ring, i);
qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
@@ -912,7 +936,8 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
goto next_desc;
}
- skb->rxhash = i40e_rx_hash(rx_ring, rx_desc);
+ skb_set_hash(skb, i40e_rx_hash(rx_ring, rx_desc),
+ i40e_ptype_to_hash(rx_ptype));
/* probably a little skewed due to removing CRC */
total_rx_bytes += skb->len;
total_rx_packets++;
@@ -1457,7 +1482,7 @@ static int i40e_xmit_descriptor_count(struct sk_buff *skb,
/* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
* + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
- * + 2 desc gap to keep tail from touching head,
+ * + 4 desc gap to avoid the cache line where head is,
* + 1 desc for context descriptor,
* otherwise try next time
*/
@@ -1468,7 +1493,7 @@ static int i40e_xmit_descriptor_count(struct sk_buff *skb,
count += skb_shinfo(skb)->nr_frags;
#endif
count += TXD_USE_COUNT(skb_headlen(skb));
- if (i40e_maybe_stop_tx(tx_ring, count + 3)) {
+ if (i40e_maybe_stop_tx(tx_ring, count + 4 + 1)) {
tx_ring->tx_stats.tx_busy++;
return 0;
}
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
index b2c03bca7929..11d0b61510b0 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c
@@ -31,7 +31,7 @@ char i40evf_driver_name[] = "i40evf";
static const char i40evf_driver_string[] =
"Intel(R) XL710 X710 Virtual Function Network Driver";
-#define DRV_VERSION "0.9.13"
+#define DRV_VERSION "0.9.14"
const char i40evf_driver_version[] = DRV_VERSION;
static const char i40evf_copyright[] =
"Copyright (c) 2013 - 2014 Intel Corporation.";
@@ -2036,6 +2036,7 @@ static void i40evf_init_task(struct work_struct *work)
NETIF_F_IPV6_CSUM |
NETIF_F_TSO |
NETIF_F_TSO6 |
+ NETIF_F_RXCSUM |
NETIF_F_GRO;
if (adapter->vf_res->vf_offload_flags
@@ -2046,6 +2047,10 @@ static void i40evf_init_task(struct work_struct *work)
NETIF_F_HW_VLAN_CTAG_FILTER;
}
+ /* copy netdev features into list of user selectable features */
+ netdev->hw_features |= netdev->features;
+ netdev->hw_features &= ~NETIF_F_RXCSUM;
+
if (!is_valid_ether_addr(adapter->hw.mac.addr)) {
dev_info(&pdev->dev, "Invalid MAC address %pMAC, using random\n",
adapter->hw.mac.addr);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c
index 3454437fcd95..0c59d4fe7e3a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c
@@ -332,7 +332,7 @@ static struct mlx4_interface mlx4_en_interface = {
.protocol = MLX4_PROT_ETH,
};
-void mlx4_en_verify_params(void)
+static void mlx4_en_verify_params(void)
{
if (pfctx > MAX_PFC_TX) {
pr_warn("mlx4_en: WARNING: illegal module parameter pfctx 0x%x - should be in range 0-0x%x, will be changed to default (0)\n",
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index b8eee365e15d..c7ef30dee1b9 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -21,9 +21,10 @@
#include <linux/list.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <net/ip6_checksum.h>
/* Version Information */
-#define DRIVER_VERSION "v1.05.0 (2014/02/18)"
+#define DRIVER_VERSION "v1.06.0 (2014/03/03)"
#define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>"
#define DRIVER_DESC "Realtek RTL8152/RTL8153 Based USB Ethernet Adapters"
#define MODULENAME "r8152"
@@ -447,6 +448,7 @@ enum rtl8152_flags {
RTL8152_LINK_CHG,
SELECTIVE_SUSPEND,
PHY_RESET,
+ SCHEDULE_TASKLET,
};
/* Define these values to match your device */
@@ -466,8 +468,18 @@ enum rtl8152_flags {
struct rx_desc {
__le32 opts1;
#define RX_LEN_MASK 0x7fff
+
__le32 opts2;
+#define RD_UDP_CS (1 << 23)
+#define RD_TCP_CS (1 << 22)
+#define RD_IPV6_CS (1 << 20)
+#define RD_IPV4_CS (1 << 19)
+
__le32 opts3;
+#define IPF (1 << 23) /* IP checksum fail */
+#define UDPF (1 << 22) /* UDP checksum fail */
+#define TCPF (1 << 21) /* TCP checksum fail */
+
__le32 opts4;
__le32 opts5;
__le32 opts6;
@@ -477,13 +489,21 @@ struct tx_desc {
__le32 opts1;
#define TX_FS (1 << 31) /* First segment of a packet */
#define TX_LS (1 << 30) /* Final segment of a packet */
-#define TX_LEN_MASK 0x3ffff
+#define GTSENDV4 (1 << 28)
+#define GTSENDV6 (1 << 27)
+#define GTTCPHO_SHIFT 18
+#define GTTCPHO_MAX 0x7fU
+#define TX_LEN_MAX 0x3ffffU
__le32 opts2;
#define UDP_CS (1 << 31) /* Calculate UDP/IP checksum */
#define TCP_CS (1 << 30) /* Calculate TCP/IP checksum */
#define IPV4_CS (1 << 29) /* Calculate IPv4 checksum */
#define IPV6_CS (1 << 28) /* Calculate IPv6 checksum */
+#define MSS_SHIFT 17
+#define MSS_MAX 0x7ffU
+#define TCPHO_SHIFT 17
+#define TCPHO_MAX 0x7ffU
};
struct r8152;
@@ -550,12 +570,21 @@ enum rtl_version {
RTL_VER_MAX
};
+enum tx_csum_stat {
+ TX_CSUM_SUCCESS = 0,
+ TX_CSUM_TSO,
+ TX_CSUM_NONE
+};
+
/* Maximum number of multicast addresses to filter (vs. Rx-all-multicast).
* The RTL chips use a 64 element hash table based on the Ethernet CRC.
*/
static const int multicast_filter_limit = 32;
static unsigned int rx_buf_sz = 16384;
+#define RTL_LIMITED_TSO_SIZE (rx_buf_sz - sizeof(struct tx_desc) - \
+ VLAN_ETH_HLEN - VLAN_HLEN)
+
static
int get_registers(struct r8152 *tp, u16 value, u16 index, u16 size, void *data)
{
@@ -963,7 +992,6 @@ static int rtl8152_set_mac_address(struct net_device *netdev, void *p)
static void read_bulk_callback(struct urb *urb)
{
struct net_device *netdev;
- unsigned long flags;
int status = urb->status;
struct rx_agg *agg;
struct r8152 *tp;
@@ -997,9 +1025,9 @@ static void read_bulk_callback(struct urb *urb)
if (urb->actual_length < ETH_ZLEN)
break;
- spin_lock_irqsave(&tp->rx_lock, flags);
+ spin_lock(&tp->rx_lock);
list_add_tail(&agg->list, &tp->rx_done);
- spin_unlock_irqrestore(&tp->rx_lock, flags);
+ spin_unlock(&tp->rx_lock);
tasklet_schedule(&tp->tl);
return;
case -ESHUTDOWN:
@@ -1022,9 +1050,9 @@ static void read_bulk_callback(struct urb *urb)
if (result == -ENODEV) {
netif_device_detach(tp->netdev);
} else if (result) {
- spin_lock_irqsave(&tp->rx_lock, flags);
+ spin_lock(&tp->rx_lock);
list_add_tail(&agg->list, &tp->rx_done);
- spin_unlock_irqrestore(&tp->rx_lock, flags);
+ spin_unlock(&tp->rx_lock);
tasklet_schedule(&tp->tl);
}
}
@@ -1033,7 +1061,6 @@ static void write_bulk_callback(struct urb *urb)
{
struct net_device_stats *stats;
struct net_device *netdev;
- unsigned long flags;
struct tx_agg *agg;
struct r8152 *tp;
int status = urb->status;
@@ -1057,9 +1084,9 @@ static void write_bulk_callback(struct urb *urb)
stats->tx_bytes += agg->skb_len;
}
- spin_lock_irqsave(&tp->tx_lock, flags);
+ spin_lock(&tp->tx_lock);
list_add_tail(&agg->list, &tp->tx_free);
- spin_unlock_irqrestore(&tp->tx_lock, flags);
+ spin_unlock(&tp->tx_lock);
usb_autopm_put_interface_async(tp->intf);
@@ -1073,7 +1100,7 @@ static void write_bulk_callback(struct urb *urb)
return;
if (!skb_queue_empty(&tp->tx_queue))
- schedule_delayed_work(&tp->schedule, 0);
+ tasklet_schedule(&tp->tl);
}
static void intr_callback(struct urb *urb)
@@ -1268,6 +1295,9 @@ static struct tx_agg *r8152_get_tx_agg(struct r8152 *tp)
struct tx_agg *agg = NULL;
unsigned long flags;
+ if (list_empty(&tp->tx_free))
+ return NULL;
+
spin_lock_irqsave(&tp->tx_lock, flags);
if (!list_empty(&tp->tx_free)) {
struct list_head *cursor;
@@ -1281,24 +1311,130 @@ static struct tx_agg *r8152_get_tx_agg(struct r8152 *tp)
return agg;
}
-static void
-r8152_tx_csum(struct r8152 *tp, struct tx_desc *desc, struct sk_buff *skb)
+static inline __be16 get_protocol(struct sk_buff *skb)
{
- memset(desc, 0, sizeof(*desc));
+ __be16 protocol;
- desc->opts1 = cpu_to_le32((skb->len & TX_LEN_MASK) | TX_FS | TX_LS);
+ if (skb->protocol == htons(ETH_P_8021Q))
+ protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
+ else
+ protocol = skb->protocol;
+
+ return protocol;
+}
+
+/*
+ * r8152_csum_workaround()
+ * The hw limites the value the transport offset. When the offset is out of the
+ * range, calculate the checksum by sw.
+ */
+static void r8152_csum_workaround(struct r8152 *tp, struct sk_buff *skb,
+ struct sk_buff_head *list)
+{
+ if (skb_shinfo(skb)->gso_size) {
+ netdev_features_t features = tp->netdev->features;
+ struct sk_buff_head seg_list;
+ struct sk_buff *segs, *nskb;
+
+ features &= ~(NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO);
+ segs = skb_gso_segment(skb, features);
+ if (IS_ERR(segs) || !segs)
+ goto drop;
+
+ __skb_queue_head_init(&seg_list);
+
+ do {
+ nskb = segs;
+ segs = segs->next;
+ nskb->next = NULL;
+ __skb_queue_tail(&seg_list, nskb);
+ } while (segs);
+
+ skb_queue_splice(&seg_list, list);
+ dev_kfree_skb(skb);
+ } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ if (skb_checksum_help(skb) < 0)
+ goto drop;
+
+ __skb_queue_head(list, skb);
+ } else {
+ struct net_device_stats *stats;
+
+drop:
+ stats = &tp->netdev->stats;
+ stats->tx_dropped++;
+ dev_kfree_skb(skb);
+ }
+}
+
+/*
+ * msdn_giant_send_check()
+ * According to the document of microsoft, the TCP Pseudo Header excludes the
+ * packet length for IPv6 TCP large packets.
+ */
+static int msdn_giant_send_check(struct sk_buff *skb)
+{
+ const struct ipv6hdr *ipv6h;
+ struct tcphdr *th;
+
+ ipv6h = ipv6_hdr(skb);
+ th = tcp_hdr(skb);
+
+ th->check = 0;
+ th->check = ~tcp_v6_check(0, &ipv6h->saddr, &ipv6h->daddr, 0);
+
+ return 0;
+}
+
+static int r8152_tx_csum(struct r8152 *tp, struct tx_desc *desc,
+ struct sk_buff *skb, u32 len, u32 transport_offset)
+{
+ u32 mss = skb_shinfo(skb)->gso_size;
+ u32 opts1, opts2 = 0;
+ int ret = TX_CSUM_SUCCESS;
+
+ WARN_ON_ONCE(len > TX_LEN_MAX);
+
+ opts1 = len | TX_FS | TX_LS;
+
+ if (mss) {
+ if (transport_offset > GTTCPHO_MAX) {
+ netif_warn(tp, tx_err, tp->netdev,
+ "Invalid transport offset 0x%x for TSO\n",
+ transport_offset);
+ ret = TX_CSUM_TSO;
+ goto unavailable;
+ }
+
+ switch (get_protocol(skb)) {
+ case htons(ETH_P_IP):
+ opts1 |= GTSENDV4;
+ break;
+
+ case htons(ETH_P_IPV6):
+ opts1 |= GTSENDV6;
+ msdn_giant_send_check(skb);
+ break;
- if (skb->ip_summed == CHECKSUM_PARTIAL) {
- __be16 protocol;
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+
+ opts1 |= transport_offset << GTTCPHO_SHIFT;
+ opts2 |= min(mss, MSS_MAX) << MSS_SHIFT;
+ } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
u8 ip_protocol;
- u32 opts2 = 0;
- if (skb->protocol == htons(ETH_P_8021Q))
- protocol = vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
- else
- protocol = skb->protocol;
+ if (transport_offset > TCPHO_MAX) {
+ netif_warn(tp, tx_err, tp->netdev,
+ "Invalid transport offset 0x%x\n",
+ transport_offset);
+ ret = TX_CSUM_NONE;
+ goto unavailable;
+ }
- switch (protocol) {
+ switch (get_protocol(skb)) {
case htons(ETH_P_IP):
opts2 |= IPV4_CS;
ip_protocol = ip_hdr(skb)->protocol;
@@ -1314,30 +1450,33 @@ r8152_tx_csum(struct r8152 *tp, struct tx_desc *desc, struct sk_buff *skb)
break;
}
- if (ip_protocol == IPPROTO_TCP) {
+ if (ip_protocol == IPPROTO_TCP)
opts2 |= TCP_CS;
- opts2 |= (skb_transport_offset(skb) & 0x7fff) << 17;
- } else if (ip_protocol == IPPROTO_UDP) {
+ else if (ip_protocol == IPPROTO_UDP)
opts2 |= UDP_CS;
- } else {
+ else
WARN_ON_ONCE(1);
- }
- desc->opts2 = cpu_to_le32(opts2);
+ opts2 |= transport_offset << TCPHO_SHIFT;
}
+
+ desc->opts2 = cpu_to_le32(opts2);
+ desc->opts1 = cpu_to_le32(opts1);
+
+unavailable:
+ return ret;
}
static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg)
{
struct sk_buff_head skb_head, *tx_queue = &tp->tx_queue;
- unsigned long flags;
int remain, ret;
u8 *tx_data;
__skb_queue_head_init(&skb_head);
- spin_lock_irqsave(&tx_queue->lock, flags);
+ spin_lock(&tx_queue->lock);
skb_queue_splice_init(tx_queue, &skb_head);
- spin_unlock_irqrestore(&tx_queue->lock, flags);
+ spin_unlock(&tx_queue->lock);
tx_data = agg->head;
agg->skb_num = agg->skb_len = 0;
@@ -1347,47 +1486,65 @@ static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg)
struct tx_desc *tx_desc;
struct sk_buff *skb;
unsigned int len;
+ u32 offset;
skb = __skb_dequeue(&skb_head);
if (!skb)
break;
- remain -= sizeof(*tx_desc);
- len = skb->len;
- if (remain < len) {
+ len = skb->len + sizeof(*tx_desc);
+
+ if (len > remain) {
__skb_queue_head(&skb_head, skb);
break;
}
tx_data = tx_agg_align(tx_data);
tx_desc = (struct tx_desc *)tx_data;
+
+ offset = (u32)skb_transport_offset(skb);
+
+ if (r8152_tx_csum(tp, tx_desc, skb, skb->len, offset)) {
+ r8152_csum_workaround(tp, skb, &skb_head);
+ continue;
+ }
+
tx_data += sizeof(*tx_desc);
- r8152_tx_csum(tp, tx_desc, skb);
- memcpy(tx_data, skb->data, len);
- agg->skb_num++;
+ len = skb->len;
+ if (skb_copy_bits(skb, 0, tx_data, len) < 0) {
+ struct net_device_stats *stats = &tp->netdev->stats;
+
+ stats->tx_dropped++;
+ dev_kfree_skb_any(skb);
+ tx_data -= sizeof(*tx_desc);
+ continue;
+ }
+
+ tx_data += len;
agg->skb_len += len;
+ agg->skb_num++;
+
dev_kfree_skb_any(skb);
- tx_data += len;
remain = rx_buf_sz - (int)(tx_agg_align(tx_data) - agg->head);
}
if (!skb_queue_empty(&skb_head)) {
- spin_lock_irqsave(&tx_queue->lock, flags);
+ spin_lock(&tx_queue->lock);
skb_queue_splice(&skb_head, tx_queue);
- spin_unlock_irqrestore(&tx_queue->lock, flags);
+ spin_unlock(&tx_queue->lock);
}
- netif_tx_lock_bh(tp->netdev);
+ netif_tx_lock(tp->netdev);
if (netif_queue_stopped(tp->netdev) &&
skb_queue_len(&tp->tx_queue) < tp->tx_qlen)
netif_wake_queue(tp->netdev);
- netif_tx_unlock_bh(tp->netdev);
+ netif_tx_unlock(tp->netdev);
- ret = usb_autopm_get_interface(tp->intf);
+ ret = usb_autopm_get_interface_async(tp->intf);
if (ret < 0)
goto out_tx_fill;
@@ -1395,14 +1552,45 @@ static int r8152_tx_agg_fill(struct r8152 *tp, struct tx_agg *agg)
agg->head, (int)(tx_data - (u8 *)agg->head),
(usb_complete_t)write_bulk_callback, agg);
- ret = usb_submit_urb(agg->urb, GFP_KERNEL);
+ ret = usb_submit_urb(agg->urb, GFP_ATOMIC);
if (ret < 0)
- usb_autopm_put_interface(tp->intf);
+ usb_autopm_put_interface_async(tp->intf);
out_tx_fill:
return ret;
}
+static u8 r8152_rx_csum(struct r8152 *tp, struct rx_desc *rx_desc)
+{
+ u8 checksum = CHECKSUM_NONE;
+ u32 opts2, opts3;
+
+ if (tp->version == RTL_VER_01)
+ goto return_result;
+
+ opts2 = le32_to_cpu(rx_desc->opts2);
+ opts3 = le32_to_cpu(rx_desc->opts3);
+
+ if (opts2 & RD_IPV4_CS) {
+ if (opts3 & IPF)
+ checksum = CHECKSUM_NONE;
+ else if ((opts2 & RD_UDP_CS) && (opts3 & UDPF))
+ checksum = CHECKSUM_NONE;
+ else if ((opts2 & RD_TCP_CS) && (opts3 & TCPF))
+ checksum = CHECKSUM_NONE;
+ else
+ checksum = CHECKSUM_UNNECESSARY;
+ } else if (RD_IPV6_CS) {
+ if ((opts2 & RD_UDP_CS) && !(opts3 & UDPF))
+ checksum = CHECKSUM_UNNECESSARY;
+ else if ((opts2 & RD_TCP_CS) && !(opts3 & TCPF))
+ checksum = CHECKSUM_UNNECESSARY;
+ }
+
+return_result:
+ return checksum;
+}
+
static void rx_bottom(struct r8152 *tp)
{
unsigned long flags;
@@ -1455,8 +1643,10 @@ static void rx_bottom(struct r8152 *tp)
skb = netdev_alloc_skb_ip_align(netdev, pkt_len);
if (!skb) {
stats->rx_dropped++;
- break;
+ goto find_next_rx;
}
+
+ skb->ip_summed = r8152_rx_csum(tp, rx_desc);
memcpy(skb->data, rx_data, pkt_len);
skb_put(skb, pkt_len);
skb->protocol = eth_type_trans(skb, netdev);
@@ -1464,6 +1654,7 @@ static void rx_bottom(struct r8152 *tp)
stats->rx_packets++;
stats->rx_bytes += pkt_len;
+find_next_rx:
rx_data = rx_agg_align(rx_data + pkt_len + CRC_SIZE);
rx_desc = (struct rx_desc *)rx_data;
len_used = (int)(rx_data - (u8 *)agg->head);
@@ -1535,6 +1726,7 @@ static void bottom_half(unsigned long data)
return;
rx_bottom(tp);
+ tx_bottom(tp);
}
static
@@ -1551,16 +1743,15 @@ static void rtl_drop_queued_tx(struct r8152 *tp)
{
struct net_device_stats *stats = &tp->netdev->stats;
struct sk_buff_head skb_head, *tx_queue = &tp->tx_queue;
- unsigned long flags;
struct sk_buff *skb;
if (skb_queue_empty(tx_queue))
return;
__skb_queue_head_init(&skb_head);
- spin_lock_irqsave(&tx_queue->lock, flags);
+ spin_lock_bh(&tx_queue->lock);
skb_queue_splice_init(tx_queue, &skb_head);
- spin_unlock_irqrestore(&tx_queue->lock, flags);
+ spin_unlock_bh(&tx_queue->lock);
while ((skb = __skb_dequeue(&skb_head))) {
dev_kfree_skb(skb);
@@ -1631,7 +1822,7 @@ static void _rtl8152_set_rx_mode(struct net_device *netdev)
}
static netdev_tx_t rtl8152_start_xmit(struct sk_buff *skb,
- struct net_device *netdev)
+ struct net_device *netdev)
{
struct r8152 *tp = netdev_priv(netdev);
@@ -1639,13 +1830,17 @@ static netdev_tx_t rtl8152_start_xmit(struct sk_buff *skb,
skb_queue_tail(&tp->tx_queue, skb);
- if (list_empty(&tp->tx_free) &&
- skb_queue_len(&tp->tx_queue) > tp->tx_qlen)
+ if (!list_empty(&tp->tx_free)) {
+ if (test_bit(SELECTIVE_SUSPEND, &tp->flags)) {
+ set_bit(SCHEDULE_TASKLET, &tp->flags);
+ schedule_delayed_work(&tp->schedule, 0);
+ } else {
+ usb_mark_last_busy(tp->udev);
+ tasklet_schedule(&tp->tl);
+ }
+ } else if (skb_queue_len(&tp->tx_queue) > tp->tx_qlen)
netif_stop_queue(netdev);
- if (!list_empty(&tp->tx_free))
- schedule_delayed_work(&tp->schedule, 0);
-
return NETDEV_TX_OK;
}
@@ -2524,8 +2719,11 @@ static void rtl_work_func_t(struct work_struct *work)
if (test_bit(RTL8152_SET_RX_MODE, &tp->flags))
_rtl8152_set_rx_mode(tp->netdev);
- if (tp->speed & LINK_STATUS)
- tx_bottom(tp);
+ if (test_bit(SCHEDULE_TASKLET, &tp->flags) &&
+ (tp->speed & LINK_STATUS)) {
+ clear_bit(SCHEDULE_TASKLET, &tp->flags);
+ tasklet_schedule(&tp->tl);
+ }
if (test_bit(PHY_RESET, &tp->flags))
rtl_phy_reset(tp);
@@ -3094,10 +3292,15 @@ static int rtl8152_probe(struct usb_interface *intf,
netdev->netdev_ops = &rtl8152_netdev_ops;
netdev->watchdog_timeo = RTL8152_TX_TIMEOUT;
- netdev->features |= NETIF_F_IP_CSUM;
- netdev->hw_features = NETIF_F_IP_CSUM;
+ netdev->features |= NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_SG |
+ NETIF_F_TSO | NETIF_F_FRAGLIST | NETIF_F_IPV6_CSUM |
+ NETIF_F_TSO6;
+ netdev->hw_features = NETIF_F_RXCSUM | NETIF_F_IP_CSUM | NETIF_F_SG |
+ NETIF_F_TSO | NETIF_F_FRAGLIST |
+ NETIF_F_IPV6_CSUM | NETIF_F_TSO6;
SET_ETHTOOL_OPS(netdev, &ops);
+ netif_set_gso_max_size(netdev, RTL_LIMITED_TSO_SIZE);
tp->mii.dev = netdev;
tp->mii.mdio_read = read_mii_word;
diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h
index ae413a2cbee7..bef37be402b8 100644
--- a/drivers/net/xen-netback/common.h
+++ b/drivers/net/xen-netback/common.h
@@ -48,37 +48,19 @@
typedef unsigned int pending_ring_idx_t;
#define INVALID_PENDING_RING_IDX (~0U)
-/* For the head field in pending_tx_info: it is used to indicate
- * whether this tx info is the head of one or more coalesced requests.
- *
- * When head != INVALID_PENDING_RING_IDX, it means the start of a new
- * tx requests queue and the end of previous queue.
- *
- * An example sequence of head fields (I = INVALID_PENDING_RING_IDX):
- *
- * ...|0 I I I|5 I|9 I I I|...
- * -->|<-INUSE----------------
- *
- * After consuming the first slot(s) we have:
- *
- * ...|V V V V|5 I|9 I I I|...
- * -----FREE->|<-INUSE--------
- *
- * where V stands for "valid pending ring index". Any number other
- * than INVALID_PENDING_RING_IDX is OK. These entries are considered
- * free and can contain any number other than
- * INVALID_PENDING_RING_IDX. In practice we use 0.
- *
- * The in use non-INVALID_PENDING_RING_IDX (say 0, 5 and 9 in the
- * above example) number is the index into pending_tx_info and
- * mmap_pages arrays.
- */
struct pending_tx_info {
- struct xen_netif_tx_request req; /* coalesced tx request */
- pending_ring_idx_t head; /* head != INVALID_PENDING_RING_IDX
- * if it is head of one or more tx
- * reqs
- */
+ struct xen_netif_tx_request req; /* tx request */
+ /* Callback data for released SKBs. The callback is always
+ * xenvif_zerocopy_callback, desc contains the pending_idx, which is
+ * also an index in pending_tx_info array. It is initialized in
+ * xenvif_alloc and it never changes.
+ * skb_shinfo(skb)->destructor_arg points to the first mapped slot's
+ * callback_struct in this array of struct pending_tx_info's, then ctx
+ * to the next, or NULL if there is no more slot for this skb.
+ * ubuf_to_vif is a helper which finds the struct xenvif from a pointer
+ * to this field.
+ */
+ struct ubuf_info callback_struct;
};
#define XEN_NETIF_TX_RING_SIZE __CONST_RING_SIZE(xen_netif_tx, PAGE_SIZE)
@@ -108,6 +90,15 @@ struct xenvif_rx_meta {
*/
#define MAX_GRANT_COPY_OPS (MAX_SKB_FRAGS * XEN_NETIF_RX_RING_SIZE)
+#define NETBACK_INVALID_HANDLE -1
+
+/* To avoid confusion, we define XEN_NETBK_LEGACY_SLOTS_MAX indicating
+ * the maximum slots a valid packet can use. Now this value is defined
+ * to be XEN_NETIF_NR_SLOTS_MIN, which is supposed to be supported by
+ * all backend.
+ */
+#define XEN_NETBK_LEGACY_SLOTS_MAX XEN_NETIF_NR_SLOTS_MIN
+
struct xenvif {
/* Unique identifier for this interface. */
domid_t domid;
@@ -126,13 +117,28 @@ struct xenvif {
pending_ring_idx_t pending_cons;
u16 pending_ring[MAX_PENDING_REQS];
struct pending_tx_info pending_tx_info[MAX_PENDING_REQS];
-
- /* Coalescing tx requests before copying makes number of grant
- * copy ops greater or equal to number of slots required. In
- * worst case a tx request consumes 2 gnttab_copy.
+ grant_handle_t grant_tx_handle[MAX_PENDING_REQS];
+
+ struct gnttab_map_grant_ref tx_map_ops[MAX_PENDING_REQS];
+ struct gnttab_unmap_grant_ref tx_unmap_ops[MAX_PENDING_REQS];
+ /* passed to gnttab_[un]map_refs with pages under (un)mapping */
+ struct page *pages_to_map[MAX_PENDING_REQS];
+ struct page *pages_to_unmap[MAX_PENDING_REQS];
+
+ /* This prevents zerocopy callbacks to race over dealloc_ring */
+ spinlock_t callback_lock;
+ /* This prevents dealloc thread and NAPI instance to race over response
+ * creation and pending_ring in xenvif_idx_release. In xenvif_tx_err
+ * it only protect response creation
*/
- struct gnttab_copy tx_copy_ops[2*MAX_PENDING_REQS];
-
+ spinlock_t response_lock;
+ pending_ring_idx_t dealloc_prod;
+ pending_ring_idx_t dealloc_cons;
+ u16 dealloc_ring[MAX_PENDING_REQS];
+ struct task_struct *dealloc_task;
+ wait_queue_head_t dealloc_wq;
+ struct timer_list dealloc_delay;
+ bool dealloc_delay_timed_out;
/* Use kthread for guest RX */
struct task_struct *task;
@@ -144,6 +150,9 @@ struct xenvif {
struct xen_netif_rx_back_ring rx;
struct sk_buff_head rx_queue;
RING_IDX rx_last_skb_slots;
+ bool rx_queue_purge;
+
+ struct timer_list wake_queue;
/* This array is allocated seperately as it is large */
struct gnttab_copy *grant_copy_op;
@@ -175,6 +184,10 @@ struct xenvif {
/* Statistics */
unsigned long rx_gso_checksum_fixup;
+ unsigned long tx_zerocopy_sent;
+ unsigned long tx_zerocopy_success;
+ unsigned long tx_zerocopy_fail;
+ unsigned long tx_frag_overflow;
/* Miscellaneous private stuff. */
struct net_device *dev;
@@ -216,9 +229,11 @@ void xenvif_carrier_off(struct xenvif *vif);
int xenvif_tx_action(struct xenvif *vif, int budget);
-int xenvif_kthread(void *data);
+int xenvif_kthread_guest_rx(void *data);
void xenvif_kick_thread(struct xenvif *vif);
+int xenvif_dealloc_kthread(void *data);
+
/* Determine whether the needed number of slots (req) are available,
* and set req_event if not.
*/
@@ -226,6 +241,30 @@ bool xenvif_rx_ring_slots_available(struct xenvif *vif, int needed);
void xenvif_stop_queue(struct xenvif *vif);
+/* Callback from stack when TX packet can be released */
+void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success);
+
+/* Unmap a pending page and release it back to the guest */
+void xenvif_idx_unmap(struct xenvif *vif, u16 pending_idx);
+
+static inline pending_ring_idx_t nr_pending_reqs(struct xenvif *vif)
+{
+ return MAX_PENDING_REQS -
+ vif->pending_prod + vif->pending_cons;
+}
+
+static inline bool xenvif_tx_pending_slots_available(struct xenvif *vif)
+{
+ return nr_pending_reqs(vif) + XEN_NETBK_LEGACY_SLOTS_MAX
+ < MAX_PENDING_REQS;
+}
+
+/* Callback from stack when TX packet can be released */
+void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success);
+
extern bool separate_tx_rx_irq;
+extern unsigned int rx_drain_timeout_msecs;
+extern unsigned int rx_drain_timeout_jiffies;
+
#endif /* __XEN_NETBACK__COMMON_H__ */
diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c
index 7669d49a67e2..83a71ac5b93a 100644
--- a/drivers/net/xen-netback/interface.c
+++ b/drivers/net/xen-netback/interface.c
@@ -38,6 +38,7 @@
#include <xen/events.h>
#include <asm/xen/hypercall.h>
+#include <xen/balloon.h>
#define XENVIF_QUEUE_LENGTH 32
#define XENVIF_NAPI_WEIGHT 64
@@ -87,7 +88,8 @@ static int xenvif_poll(struct napi_struct *napi, int budget)
local_irq_save(flags);
RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, more_to_do);
- if (!more_to_do)
+ if (!(more_to_do &&
+ xenvif_tx_pending_slots_available(vif)))
__napi_complete(napi);
local_irq_restore(flags);
@@ -113,6 +115,18 @@ static irqreturn_t xenvif_interrupt(int irq, void *dev_id)
return IRQ_HANDLED;
}
+static void xenvif_wake_queue(unsigned long data)
+{
+ struct xenvif *vif = (struct xenvif *)data;
+
+ if (netif_queue_stopped(vif->dev)) {
+ netdev_err(vif->dev, "draining TX queue\n");
+ vif->rx_queue_purge = true;
+ xenvif_kick_thread(vif);
+ netif_wake_queue(vif->dev);
+ }
+}
+
static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct xenvif *vif = netdev_priv(dev);
@@ -121,7 +135,9 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
BUG_ON(skb->dev != dev);
/* Drop the packet if vif is not ready */
- if (vif->task == NULL || !xenvif_schedulable(vif))
+ if (vif->task == NULL ||
+ vif->dealloc_task == NULL ||
+ !xenvif_schedulable(vif))
goto drop;
/* At best we'll need one slot for the header and one for each
@@ -140,8 +156,13 @@ static int xenvif_start_xmit(struct sk_buff *skb, struct net_device *dev)
* then turn off the queue to give the ring a chance to
* drain.
*/
- if (!xenvif_rx_ring_slots_available(vif, min_slots_needed))
+ if (!xenvif_rx_ring_slots_available(vif, min_slots_needed)) {
+ vif->wake_queue.function = xenvif_wake_queue;
+ vif->wake_queue.data = (unsigned long)vif;
xenvif_stop_queue(vif);
+ mod_timer(&vif->wake_queue,
+ jiffies + rx_drain_timeout_jiffies);
+ }
skb_queue_tail(&vif->rx_queue, skb);
xenvif_kick_thread(vif);
@@ -234,6 +255,28 @@ static const struct xenvif_stat {
"rx_gso_checksum_fixup",
offsetof(struct xenvif, rx_gso_checksum_fixup)
},
+ /* If (sent != success + fail), there are probably packets never
+ * freed up properly!
+ */
+ {
+ "tx_zerocopy_sent",
+ offsetof(struct xenvif, tx_zerocopy_sent),
+ },
+ {
+ "tx_zerocopy_success",
+ offsetof(struct xenvif, tx_zerocopy_success),
+ },
+ {
+ "tx_zerocopy_fail",
+ offsetof(struct xenvif, tx_zerocopy_fail)
+ },
+ /* Number of packets exceeding MAX_SKB_FRAG slots. You should use
+ * a guest with the same MAX_SKB_FRAG
+ */
+ {
+ "tx_frag_overflow",
+ offsetof(struct xenvif, tx_frag_overflow)
+ },
};
static int xenvif_get_sset_count(struct net_device *dev, int string_set)
@@ -327,6 +370,8 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
init_timer(&vif->credit_timeout);
vif->credit_window_start = get_jiffies_64();
+ init_timer(&vif->wake_queue);
+
dev->netdev_ops = &xenvif_netdev_ops;
dev->hw_features = NETIF_F_SG |
NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
@@ -343,8 +388,27 @@ struct xenvif *xenvif_alloc(struct device *parent, domid_t domid,
vif->pending_prod = MAX_PENDING_REQS;
for (i = 0; i < MAX_PENDING_REQS; i++)
vif->pending_ring[i] = i;
- for (i = 0; i < MAX_PENDING_REQS; i++)
- vif->mmap_pages[i] = NULL;
+ spin_lock_init(&vif->callback_lock);
+ spin_lock_init(&vif->response_lock);
+ /* If ballooning is disabled, this will consume real memory, so you
+ * better enable it. The long term solution would be to use just a
+ * bunch of valid page descriptors, without dependency on ballooning
+ */
+ err = alloc_xenballooned_pages(MAX_PENDING_REQS,
+ vif->mmap_pages,
+ false);
+ if (err) {
+ netdev_err(dev, "Could not reserve mmap_pages\n");
+ return ERR_PTR(-ENOMEM);
+ }
+ for (i = 0; i < MAX_PENDING_REQS; i++) {
+ vif->pending_tx_info[i].callback_struct = (struct ubuf_info)
+ { .callback = xenvif_zerocopy_callback,
+ .ctx = NULL,
+ .desc = i };
+ vif->grant_tx_handle[i] = NETBACK_INVALID_HANDLE;
+ }
+ init_timer(&vif->dealloc_delay);
/*
* Initialise a dummy MAC address. We choose the numerically
@@ -382,12 +446,14 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
BUG_ON(vif->tx_irq);
BUG_ON(vif->task);
+ BUG_ON(vif->dealloc_task);
err = xenvif_map_frontend_rings(vif, tx_ring_ref, rx_ring_ref);
if (err < 0)
goto err;
init_waitqueue_head(&vif->wq);
+ init_waitqueue_head(&vif->dealloc_wq);
if (tx_evtchn == rx_evtchn) {
/* feature-split-event-channels == 0 */
@@ -421,8 +487,8 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
disable_irq(vif->rx_irq);
}
- task = kthread_create(xenvif_kthread,
- (void *)vif, "%s", vif->dev->name);
+ task = kthread_create(xenvif_kthread_guest_rx,
+ (void *)vif, "%s-guest-rx", vif->dev->name);
if (IS_ERR(task)) {
pr_warn("Could not allocate kthread for %s\n", vif->dev->name);
err = PTR_ERR(task);
@@ -431,6 +497,16 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
vif->task = task;
+ task = kthread_create(xenvif_dealloc_kthread,
+ (void *)vif, "%s-dealloc", vif->dev->name);
+ if (IS_ERR(task)) {
+ pr_warn("Could not allocate kthread for %s\n", vif->dev->name);
+ err = PTR_ERR(task);
+ goto err_rx_unbind;
+ }
+
+ vif->dealloc_task = task;
+
rtnl_lock();
if (!vif->can_sg && vif->dev->mtu > ETH_DATA_LEN)
dev_set_mtu(vif->dev, ETH_DATA_LEN);
@@ -441,6 +517,7 @@ int xenvif_connect(struct xenvif *vif, unsigned long tx_ring_ref,
rtnl_unlock();
wake_up_process(vif->task);
+ wake_up_process(vif->dealloc_task);
return 0;
@@ -474,10 +551,17 @@ void xenvif_disconnect(struct xenvif *vif)
xenvif_carrier_off(vif);
if (vif->task) {
+ del_timer_sync(&vif->wake_queue);
kthread_stop(vif->task);
vif->task = NULL;
}
+ if (vif->dealloc_task) {
+ del_timer_sync(&vif->dealloc_delay);
+ kthread_stop(vif->dealloc_task);
+ vif->dealloc_task = NULL;
+ }
+
if (vif->tx_irq) {
if (vif->tx_irq == vif->rx_irq)
unbind_from_irqhandler(vif->tx_irq, vif);
@@ -493,6 +577,36 @@ void xenvif_disconnect(struct xenvif *vif)
void xenvif_free(struct xenvif *vif)
{
+ int i, unmap_timeout = 0;
+ /* Here we want to avoid timeout messages if an skb can be legitimatly
+ * stucked somewhere else. Realisticly this could be an another vif's
+ * internal or QDisc queue. That another vif also has this
+ * rx_drain_timeout_msecs timeout, but the timer only ditches the
+ * internal queue. After that, the QDisc queue can put in worst case
+ * XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS skbs into that another vif's
+ * internal queue, so we need several rounds of such timeouts until we
+ * can be sure that no another vif should have skb's from us. We are
+ * not sending more skb's, so newly stucked packets are not interesting
+ * for us here.
+ */
+ unsigned int worst_case_skb_lifetime = (rx_drain_timeout_msecs/1000) *
+ DIV_ROUND_UP(XENVIF_QUEUE_LENGTH, (XEN_NETIF_RX_RING_SIZE / MAX_SKB_FRAGS));
+
+ for (i = 0; i < MAX_PENDING_REQS; ++i) {
+ if (vif->grant_tx_handle[i] != NETBACK_INVALID_HANDLE) {
+ unmap_timeout++;
+ schedule_timeout(msecs_to_jiffies(1000));
+ if (unmap_timeout > worst_case_skb_lifetime &&
+ net_ratelimit())
+ netdev_err(vif->dev,
+ "Page still granted! Index: %x\n",
+ i);
+ i = -1;
+ }
+ }
+
+ free_xenballooned_pages(MAX_PENDING_REQS, vif->mmap_pages);
+
netif_napi_del(&vif->napi);
unregister_netdev(vif->dev);
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c
index e5284bca2d90..bc943205a691 100644
--- a/drivers/net/xen-netback/netback.c
+++ b/drivers/net/xen-netback/netback.c
@@ -37,6 +37,7 @@
#include <linux/kthread.h>
#include <linux/if_vlan.h>
#include <linux/udp.h>
+#include <linux/highmem.h>
#include <net/tcp.h>
@@ -54,6 +55,13 @@
bool separate_tx_rx_irq = 1;
module_param(separate_tx_rx_irq, bool, 0644);
+/* When guest ring is filled up, qdisc queues the packets for us, but we have
+ * to timeout them, otherwise other guests' packets can get stucked there
+ */
+unsigned int rx_drain_timeout_msecs = 10000;
+module_param(rx_drain_timeout_msecs, uint, 0444);
+unsigned int rx_drain_timeout_jiffies;
+
/*
* This is the maximum slots a skb can have. If a guest sends a skb
* which exceeds this limit it is considered malicious.
@@ -62,24 +70,6 @@ module_param(separate_tx_rx_irq, bool, 0644);
static unsigned int fatal_skb_slots = FATAL_SKB_SLOTS_DEFAULT;
module_param(fatal_skb_slots, uint, 0444);
-/*
- * To avoid confusion, we define XEN_NETBK_LEGACY_SLOTS_MAX indicating
- * the maximum slots a valid packet can use. Now this value is defined
- * to be XEN_NETIF_NR_SLOTS_MIN, which is supposed to be supported by
- * all backend.
- */
-#define XEN_NETBK_LEGACY_SLOTS_MAX XEN_NETIF_NR_SLOTS_MIN
-
-/*
- * If head != INVALID_PENDING_RING_IDX, it means this tx request is head of
- * one or more merged tx requests, otherwise it is the continuation of
- * previous tx request.
- */
-static inline int pending_tx_is_head(struct xenvif *vif, RING_IDX idx)
-{
- return vif->pending_tx_info[idx].head != INVALID_PENDING_RING_IDX;
-}
-
static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx,
u8 status);
@@ -109,6 +99,18 @@ static inline unsigned long idx_to_kaddr(struct xenvif *vif,
return (unsigned long)pfn_to_kaddr(idx_to_pfn(vif, idx));
}
+/* Find the containing VIF's structure from a pointer in pending_tx_info array
+ */
+static inline struct xenvif* ubuf_to_vif(struct ubuf_info *ubuf)
+{
+ u16 pending_idx = ubuf->desc;
+ struct pending_tx_info *temp =
+ container_of(ubuf, struct pending_tx_info, callback_struct);
+ return container_of(temp - pending_idx,
+ struct xenvif,
+ pending_tx_info[0]);
+}
+
/* This is a miniumum size for the linear area to avoid lots of
* calls to __pskb_pull_tail() as we set up checksum offsets. The
* value 128 was chosen as it covers all IPv4 and most likely
@@ -131,10 +133,9 @@ static inline pending_ring_idx_t pending_index(unsigned i)
return i & (MAX_PENDING_REQS-1);
}
-static inline pending_ring_idx_t nr_pending_reqs(struct xenvif *vif)
+static inline pending_ring_idx_t nr_free_slots(struct xen_netif_tx_back_ring *ring)
{
- return MAX_PENDING_REQS -
- vif->pending_prod + vif->pending_cons;
+ return ring->nr_ents - (ring->sring->req_prod - ring->rsp_prod_pvt);
}
bool xenvif_rx_ring_slots_available(struct xenvif *vif, int needed)
@@ -235,7 +236,9 @@ static struct xenvif_rx_meta *get_next_rx_buffer(struct xenvif *vif,
static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
struct netrx_pending_operations *npo,
struct page *page, unsigned long size,
- unsigned long offset, int *head)
+ unsigned long offset, int *head,
+ struct xenvif *foreign_vif,
+ grant_ref_t foreign_gref)
{
struct gnttab_copy *copy_gop;
struct xenvif_rx_meta *meta;
@@ -277,8 +280,15 @@ static void xenvif_gop_frag_copy(struct xenvif *vif, struct sk_buff *skb,
copy_gop->flags = GNTCOPY_dest_gref;
copy_gop->len = bytes;
- copy_gop->source.domid = DOMID_SELF;
- copy_gop->source.u.gmfn = virt_to_mfn(page_address(page));
+ if (foreign_vif) {
+ copy_gop->source.domid = foreign_vif->domid;
+ copy_gop->source.u.ref = foreign_gref;
+ copy_gop->flags |= GNTCOPY_source_gref;
+ } else {
+ copy_gop->source.domid = DOMID_SELF;
+ copy_gop->source.u.gmfn =
+ virt_to_mfn(page_address(page));
+ }
copy_gop->source.offset = offset;
copy_gop->dest.domid = vif->domid;
@@ -339,6 +349,9 @@ static int xenvif_gop_skb(struct sk_buff *skb,
int old_meta_prod;
int gso_type;
int gso_size;
+ struct ubuf_info *ubuf = skb_shinfo(skb)->destructor_arg;
+ grant_ref_t foreign_grefs[MAX_SKB_FRAGS];
+ struct xenvif *foreign_vif = NULL;
old_meta_prod = npo->meta_prod;
@@ -379,6 +392,19 @@ static int xenvif_gop_skb(struct sk_buff *skb,
npo->copy_off = 0;
npo->copy_gref = req->gref;
+ if ((skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) &&
+ (ubuf->callback == &xenvif_zerocopy_callback)) {
+ int i = 0;
+ foreign_vif = ubuf_to_vif(ubuf);
+
+ do {
+ u16 pending_idx = ubuf->desc;
+ foreign_grefs[i++] =
+ foreign_vif->pending_tx_info[pending_idx].req.gref;
+ ubuf = (struct ubuf_info *) ubuf->ctx;
+ } while (ubuf);
+ }
+
data = skb->data;
while (data < skb_tail_pointer(skb)) {
unsigned int offset = offset_in_page(data);
@@ -388,7 +414,9 @@ static int xenvif_gop_skb(struct sk_buff *skb,
len = skb_tail_pointer(skb) - data;
xenvif_gop_frag_copy(vif, skb, npo,
- virt_to_page(data), len, offset, &head);
+ virt_to_page(data), len, offset, &head,
+ NULL,
+ 0);
data += len;
}
@@ -397,7 +425,9 @@ static int xenvif_gop_skb(struct sk_buff *skb,
skb_frag_page(&skb_shinfo(skb)->frags[i]),
skb_frag_size(&skb_shinfo(skb)->frags[i]),
skb_shinfo(skb)->frags[i].page_offset,
- &head);
+ &head,
+ foreign_vif,
+ foreign_grefs[i]);
}
return npo->meta_prod - old_meta_prod;
@@ -455,10 +485,12 @@ static void xenvif_add_frag_responses(struct xenvif *vif, int status,
}
}
-struct skb_cb_overlay {
+struct xenvif_rx_cb {
int meta_slots_used;
};
+#define XENVIF_RX_CB(skb) ((struct xenvif_rx_cb *)(skb)->cb)
+
void xenvif_kick_thread(struct xenvif *vif)
{
wake_up(&vif->wq);
@@ -474,7 +506,6 @@ static void xenvif_rx_action(struct xenvif *vif)
LIST_HEAD(notify);
int ret;
unsigned long offset;
- struct skb_cb_overlay *sco;
bool need_to_notify = false;
struct netrx_pending_operations npo = {
@@ -513,9 +544,8 @@ static void xenvif_rx_action(struct xenvif *vif)
} else
vif->rx_last_skb_slots = 0;
- sco = (struct skb_cb_overlay *)skb->cb;
- sco->meta_slots_used = xenvif_gop_skb(skb, &npo);
- BUG_ON(sco->meta_slots_used > max_slots_needed);
+ XENVIF_RX_CB(skb)->meta_slots_used = xenvif_gop_skb(skb, &npo);
+ BUG_ON(XENVIF_RX_CB(skb)->meta_slots_used > max_slots_needed);
__skb_queue_tail(&rxq, skb);
}
@@ -529,7 +559,6 @@ static void xenvif_rx_action(struct xenvif *vif)
gnttab_batch_copy(vif->grant_copy_op, npo.copy_prod);
while ((skb = __skb_dequeue(&rxq)) != NULL) {
- sco = (struct skb_cb_overlay *)skb->cb;
if ((1 << vif->meta[npo.meta_cons].gso_type) &
vif->gso_prefix_mask) {
@@ -540,19 +569,21 @@ static void xenvif_rx_action(struct xenvif *vif)
resp->offset = vif->meta[npo.meta_cons].gso_size;
resp->id = vif->meta[npo.meta_cons].id;
- resp->status = sco->meta_slots_used;
+ resp->status = XENVIF_RX_CB(skb)->meta_slots_used;
npo.meta_cons++;
- sco->meta_slots_used--;
+ XENVIF_RX_CB(skb)->meta_slots_used--;
}
vif->dev->stats.tx_bytes += skb->len;
vif->dev->stats.tx_packets++;
- status = xenvif_check_gop(vif, sco->meta_slots_used, &npo);
+ status = xenvif_check_gop(vif,
+ XENVIF_RX_CB(skb)->meta_slots_used,
+ &npo);
- if (sco->meta_slots_used == 1)
+ if (XENVIF_RX_CB(skb)->meta_slots_used == 1)
flags = 0;
else
flags = XEN_NETRXF_more_data;
@@ -589,13 +620,13 @@ static void xenvif_rx_action(struct xenvif *vif)
xenvif_add_frag_responses(vif, status,
vif->meta + npo.meta_cons + 1,
- sco->meta_slots_used);
+ XENVIF_RX_CB(skb)->meta_slots_used);
RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret);
need_to_notify |= !!ret;
- npo.meta_cons += sco->meta_slots_used;
+ npo.meta_cons += XENVIF_RX_CB(skb)->meta_slots_used;
dev_kfree_skb(skb);
}
@@ -645,9 +676,12 @@ static void xenvif_tx_err(struct xenvif *vif,
struct xen_netif_tx_request *txp, RING_IDX end)
{
RING_IDX cons = vif->tx.req_cons;
+ unsigned long flags;
do {
+ spin_lock_irqsave(&vif->response_lock, flags);
make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR);
+ spin_unlock_irqrestore(&vif->response_lock, flags);
if (cons == end)
break;
txp = RING_GET_REQUEST(&vif->tx, cons++);
@@ -759,180 +793,168 @@ static int xenvif_count_requests(struct xenvif *vif,
return slots;
}
-static struct page *xenvif_alloc_page(struct xenvif *vif,
- u16 pending_idx)
+
+struct xenvif_tx_cb {
+ u16 pending_idx;
+};
+
+#define XENVIF_TX_CB(skb) ((struct xenvif_tx_cb *)(skb)->cb)
+
+static inline void xenvif_tx_create_gop(struct xenvif *vif,
+ u16 pending_idx,
+ struct xen_netif_tx_request *txp,
+ struct gnttab_map_grant_ref *gop)
{
- struct page *page;
+ vif->pages_to_map[gop-vif->tx_map_ops] = vif->mmap_pages[pending_idx];
+ gnttab_set_map_op(gop, idx_to_kaddr(vif, pending_idx),
+ GNTMAP_host_map | GNTMAP_readonly,
+ txp->gref, vif->domid);
+
+ memcpy(&vif->pending_tx_info[pending_idx].req, txp,
+ sizeof(*txp));
+}
- page = alloc_page(GFP_ATOMIC|__GFP_COLD);
- if (!page)
+static inline struct sk_buff *xenvif_alloc_skb(unsigned int size)
+{
+ struct sk_buff *skb =
+ alloc_skb(size + NET_SKB_PAD + NET_IP_ALIGN,
+ GFP_ATOMIC | __GFP_NOWARN);
+ if (unlikely(skb == NULL))
return NULL;
- vif->mmap_pages[pending_idx] = page;
- return page;
+ /* Packets passed to netif_rx() must have some headroom. */
+ skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
+
+ /* Initialize it here to avoid later surprises */
+ skb_shinfo(skb)->destructor_arg = NULL;
+
+ return skb;
}
-static struct gnttab_copy *xenvif_get_requests(struct xenvif *vif,
- struct sk_buff *skb,
- struct xen_netif_tx_request *txp,
- struct gnttab_copy *gop)
+static struct gnttab_map_grant_ref *xenvif_get_requests(struct xenvif *vif,
+ struct sk_buff *skb,
+ struct xen_netif_tx_request *txp,
+ struct gnttab_map_grant_ref *gop)
{
struct skb_shared_info *shinfo = skb_shinfo(skb);
skb_frag_t *frags = shinfo->frags;
- u16 pending_idx = *((u16 *)skb->data);
- u16 head_idx = 0;
- int slot, start;
- struct page *page;
- pending_ring_idx_t index, start_idx = 0;
- uint16_t dst_offset;
- unsigned int nr_slots;
- struct pending_tx_info *first = NULL;
+ u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
+ int start;
+ pending_ring_idx_t index;
+ unsigned int nr_slots, frag_overflow = 0;
/* At this point shinfo->nr_frags is in fact the number of
* slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX.
*/
+ if (shinfo->nr_frags > MAX_SKB_FRAGS) {
+ frag_overflow = shinfo->nr_frags - MAX_SKB_FRAGS;
+ BUG_ON(frag_overflow > MAX_SKB_FRAGS);
+ shinfo->nr_frags = MAX_SKB_FRAGS;
+ }
nr_slots = shinfo->nr_frags;
/* Skip first skb fragment if it is on same page as header fragment. */
start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
- /* Coalesce tx requests, at this point the packet passed in
- * should be <= 64K. Any packets larger than 64K have been
- * handled in xenvif_count_requests().
- */
- for (shinfo->nr_frags = slot = start; slot < nr_slots;
- shinfo->nr_frags++) {
- struct pending_tx_info *pending_tx_info =
- vif->pending_tx_info;
+ for (shinfo->nr_frags = start; shinfo->nr_frags < nr_slots;
+ shinfo->nr_frags++, txp++, gop++) {
+ index = pending_index(vif->pending_cons++);
+ pending_idx = vif->pending_ring[index];
+ xenvif_tx_create_gop(vif, pending_idx, txp, gop);
+ frag_set_pending_idx(&frags[shinfo->nr_frags], pending_idx);
+ }
- page = alloc_page(GFP_ATOMIC|__GFP_COLD);
- if (!page)
- goto err;
-
- dst_offset = 0;
- first = NULL;
- while (dst_offset < PAGE_SIZE && slot < nr_slots) {
- gop->flags = GNTCOPY_source_gref;
-
- gop->source.u.ref = txp->gref;
- gop->source.domid = vif->domid;
- gop->source.offset = txp->offset;
-
- gop->dest.domid = DOMID_SELF;
-
- gop->dest.offset = dst_offset;
- gop->dest.u.gmfn = virt_to_mfn(page_address(page));
-
- if (dst_offset + txp->size > PAGE_SIZE) {
- /* This page can only merge a portion
- * of tx request. Do not increment any
- * pointer / counter here. The txp
- * will be dealt with in future
- * rounds, eventually hitting the
- * `else` branch.
- */
- gop->len = PAGE_SIZE - dst_offset;
- txp->offset += gop->len;
- txp->size -= gop->len;
- dst_offset += gop->len; /* quit loop */
- } else {
- /* This tx request can be merged in the page */
- gop->len = txp->size;
- dst_offset += gop->len;
-
- index = pending_index(vif->pending_cons++);
-
- pending_idx = vif->pending_ring[index];
-
- memcpy(&pending_tx_info[pending_idx].req, txp,
- sizeof(*txp));
-
- /* Poison these fields, corresponding
- * fields for head tx req will be set
- * to correct values after the loop.
- */
- vif->mmap_pages[pending_idx] = (void *)(~0UL);
- pending_tx_info[pending_idx].head =
- INVALID_PENDING_RING_IDX;
-
- if (!first) {
- first = &pending_tx_info[pending_idx];
- start_idx = index;
- head_idx = pending_idx;
- }
-
- txp++;
- slot++;
- }
+ if (frag_overflow) {
+ struct sk_buff *nskb = xenvif_alloc_skb(0);
+ if (unlikely(nskb == NULL)) {
+ if (net_ratelimit())
+ netdev_err(vif->dev,
+ "Can't allocate the frag_list skb.\n");
+ return NULL;
+ }
+
+ shinfo = skb_shinfo(nskb);
+ frags = shinfo->frags;
- gop++;
+ for (shinfo->nr_frags = 0; shinfo->nr_frags < frag_overflow;
+ shinfo->nr_frags++, txp++, gop++) {
+ index = pending_index(vif->pending_cons++);
+ pending_idx = vif->pending_ring[index];
+ xenvif_tx_create_gop(vif, pending_idx, txp, gop);
+ frag_set_pending_idx(&frags[shinfo->nr_frags],
+ pending_idx);
}
- first->req.offset = 0;
- first->req.size = dst_offset;
- first->head = start_idx;
- vif->mmap_pages[head_idx] = page;
- frag_set_pending_idx(&frags[shinfo->nr_frags], head_idx);
+ skb_shinfo(skb)->frag_list = nskb;
}
- BUG_ON(shinfo->nr_frags > MAX_SKB_FRAGS);
-
return gop;
-err:
- /* Unwind, freeing all pages and sending error responses. */
- while (shinfo->nr_frags-- > start) {
- xenvif_idx_release(vif,
- frag_get_pending_idx(&frags[shinfo->nr_frags]),
- XEN_NETIF_RSP_ERROR);
+}
+
+static inline void xenvif_grant_handle_set(struct xenvif *vif,
+ u16 pending_idx,
+ grant_handle_t handle)
+{
+ if (unlikely(vif->grant_tx_handle[pending_idx] !=
+ NETBACK_INVALID_HANDLE)) {
+ netdev_err(vif->dev,
+ "Trying to overwrite active handle! pending_idx: %x\n",
+ pending_idx);
+ BUG();
}
- /* The head too, if necessary. */
- if (start)
- xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
+ vif->grant_tx_handle[pending_idx] = handle;
+}
- return NULL;
+static inline void xenvif_grant_handle_reset(struct xenvif *vif,
+ u16 pending_idx)
+{
+ if (unlikely(vif->grant_tx_handle[pending_idx] ==
+ NETBACK_INVALID_HANDLE)) {
+ netdev_err(vif->dev,
+ "Trying to unmap invalid handle! pending_idx: %x\n",
+ pending_idx);
+ BUG();
+ }
+ vif->grant_tx_handle[pending_idx] = NETBACK_INVALID_HANDLE;
}
static int xenvif_tx_check_gop(struct xenvif *vif,
struct sk_buff *skb,
- struct gnttab_copy **gopp)
+ struct gnttab_map_grant_ref **gopp)
{
- struct gnttab_copy *gop = *gopp;
- u16 pending_idx = *((u16 *)skb->data);
+ struct gnttab_map_grant_ref *gop = *gopp;
+ u16 pending_idx = XENVIF_TX_CB(skb)->pending_idx;
struct skb_shared_info *shinfo = skb_shinfo(skb);
struct pending_tx_info *tx_info;
int nr_frags = shinfo->nr_frags;
int i, err, start;
- u16 peek; /* peek into next tx request */
+ struct sk_buff *first_skb = NULL;
/* Check status of header. */
err = gop->status;
if (unlikely(err))
xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_ERROR);
+ else
+ xenvif_grant_handle_set(vif, pending_idx , gop->handle);
/* Skip first skb fragment if it is on same page as header fragment. */
start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
+check_frags:
for (i = start; i < nr_frags; i++) {
int j, newerr;
- pending_ring_idx_t head;
pending_idx = frag_get_pending_idx(&shinfo->frags[i]);
tx_info = &vif->pending_tx_info[pending_idx];
- head = tx_info->head;
/* Check error status: if okay then remember grant handle. */
- do {
- newerr = (++gop)->status;
- if (newerr)
- break;
- peek = vif->pending_ring[pending_index(++head)];
- } while (!pending_tx_is_head(vif, peek));
+ newerr = (++gop)->status;
if (likely(!newerr)) {
+ xenvif_grant_handle_set(vif, pending_idx , gop->handle);
/* Had a previous error? Invalidate this fragment. */
if (unlikely(err))
- xenvif_idx_release(vif, pending_idx,
- XEN_NETIF_RSP_OKAY);
+ xenvif_idx_unmap(vif, pending_idx);
continue;
}
@@ -942,20 +964,45 @@ static int xenvif_tx_check_gop(struct xenvif *vif,
/* Not the first error? Preceding frags already invalidated. */
if (err)
continue;
-
/* First error: invalidate header and preceding fragments. */
- pending_idx = *((u16 *)skb->data);
- xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY);
+ if (!first_skb)
+ pending_idx = XENVIF_TX_CB(skb)->pending_idx;
+ else
+ pending_idx = XENVIF_TX_CB(skb)->pending_idx;
+ xenvif_idx_unmap(vif, pending_idx);
for (j = start; j < i; j++) {
pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
- xenvif_idx_release(vif, pending_idx,
- XEN_NETIF_RSP_OKAY);
+ xenvif_idx_unmap(vif, pending_idx);
}
/* Remember the error: invalidate all subsequent fragments. */
err = newerr;
}
+ if (skb_has_frag_list(skb)) {
+ first_skb = skb;
+ skb = shinfo->frag_list;
+ shinfo = skb_shinfo(skb);
+ nr_frags = shinfo->nr_frags;
+ start = 0;
+
+ goto check_frags;
+ }
+
+ /* There was a mapping error in the frag_list skb. We have to unmap
+ * the first skb's frags
+ */
+ if (first_skb && err) {
+ int j;
+ shinfo = skb_shinfo(first_skb);
+ pending_idx = XENVIF_TX_CB(skb)->pending_idx;
+ start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx);
+ for (j = start; j < shinfo->nr_frags; j++) {
+ pending_idx = frag_get_pending_idx(&shinfo->frags[j]);
+ xenvif_idx_unmap(vif, pending_idx);
+ }
+ }
+
*gopp = gop + 1;
return err;
}
@@ -965,6 +1012,10 @@ static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb)
struct skb_shared_info *shinfo = skb_shinfo(skb);
int nr_frags = shinfo->nr_frags;
int i;
+ u16 prev_pending_idx = INVALID_PENDING_IDX;
+
+ if (skb_shinfo(skb)->destructor_arg)
+ prev_pending_idx = XENVIF_TX_CB(skb)->pending_idx;
for (i = 0; i < nr_frags; i++) {
skb_frag_t *frag = shinfo->frags + i;
@@ -974,6 +1025,17 @@ static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb)
pending_idx = frag_get_pending_idx(frag);
+ /* If this is not the first frag, chain it to the previous*/
+ if (unlikely(prev_pending_idx == INVALID_PENDING_IDX))
+ skb_shinfo(skb)->destructor_arg =
+ &vif->pending_tx_info[pending_idx].callback_struct;
+ else if (likely(pending_idx != prev_pending_idx))
+ vif->pending_tx_info[prev_pending_idx].callback_struct.ctx =
+ &(vif->pending_tx_info[pending_idx].callback_struct);
+
+ vif->pending_tx_info[pending_idx].callback_struct.ctx = NULL;
+ prev_pending_idx = pending_idx;
+
txp = &vif->pending_tx_info[pending_idx].req;
page = virt_to_page(idx_to_kaddr(vif, pending_idx));
__skb_fill_page_desc(skb, i, page, txp->offset, txp->size);
@@ -981,10 +1043,15 @@ static void xenvif_fill_frags(struct xenvif *vif, struct sk_buff *skb)
skb->data_len += txp->size;
skb->truesize += txp->size;
- /* Take an extra reference to offset xenvif_idx_release */
+ /* Take an extra reference to offset network stack's put_page */
get_page(vif->mmap_pages[pending_idx]);
- xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY);
}
+ /* FIXME: __skb_fill_page_desc set this to true because page->pfmemalloc
+ * overlaps with "index", and "mapping" is not set. I think mapping
+ * should be set. If delivered to local stack, it would drop this
+ * skb in sk_filter unless the socket has the right to use it.
+ */
+ skb->pfmemalloc = false;
}
static int xenvif_get_extras(struct xenvif *vif,
@@ -1104,16 +1171,14 @@ static bool tx_credit_exceeded(struct xenvif *vif, unsigned size)
static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget)
{
- struct gnttab_copy *gop = vif->tx_copy_ops, *request_gop;
+ struct gnttab_map_grant_ref *gop = vif->tx_map_ops, *request_gop;
struct sk_buff *skb;
int ret;
- while ((nr_pending_reqs(vif) + XEN_NETBK_LEGACY_SLOTS_MAX
- < MAX_PENDING_REQS) &&
+ while (xenvif_tx_pending_slots_available(vif) &&
(skb_queue_len(&vif->tx_queue) < budget)) {
struct xen_netif_tx_request txreq;
struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX];
- struct page *page;
struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1];
u16 pending_idx;
RING_IDX idx;
@@ -1189,8 +1254,7 @@ static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget)
ret < XEN_NETBK_LEGACY_SLOTS_MAX) ?
PKT_PROT_LEN : txreq.size;
- skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN,
- GFP_ATOMIC | __GFP_NOWARN);
+ skb = xenvif_alloc_skb(data_len);
if (unlikely(skb == NULL)) {
netdev_dbg(vif->dev,
"Can't allocate a skb in start_xmit.\n");
@@ -1198,9 +1262,6 @@ static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget)
break;
}
- /* Packets passed to netif_rx() must have some headroom. */
- skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
-
if (extras[XEN_NETIF_EXTRA_TYPE_GSO - 1].type) {
struct xen_netif_extra_info *gso;
gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1];
@@ -1212,31 +1273,11 @@ static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget)
}
}
- /* XXX could copy straight to head */
- page = xenvif_alloc_page(vif, pending_idx);
- if (!page) {
- kfree_skb(skb);
- xenvif_tx_err(vif, &txreq, idx);
- break;
- }
-
- gop->source.u.ref = txreq.gref;
- gop->source.domid = vif->domid;
- gop->source.offset = txreq.offset;
-
- gop->dest.u.gmfn = virt_to_mfn(page_address(page));
- gop->dest.domid = DOMID_SELF;
- gop->dest.offset = txreq.offset;
-
- gop->len = txreq.size;
- gop->flags = GNTCOPY_source_gref;
+ xenvif_tx_create_gop(vif, pending_idx, &txreq, gop);
gop++;
- memcpy(&vif->pending_tx_info[pending_idx].req,
- &txreq, sizeof(txreq));
- vif->pending_tx_info[pending_idx].head = index;
- *((u16 *)skb->data) = pending_idx;
+ XENVIF_TX_CB(skb)->pending_idx = pending_idx;
__skb_put(skb, data_len);
@@ -1264,17 +1305,82 @@ static unsigned xenvif_tx_build_gops(struct xenvif *vif, int budget)
vif->tx.req_cons = idx;
- if ((gop-vif->tx_copy_ops) >= ARRAY_SIZE(vif->tx_copy_ops))
+ if ((gop-vif->tx_map_ops) >= ARRAY_SIZE(vif->tx_map_ops))
break;
}
- return gop - vif->tx_copy_ops;
+ return gop - vif->tx_map_ops;
}
+/* Consolidate skb with a frag_list into a brand new one with local pages on
+ * frags. Returns 0 or -ENOMEM if can't allocate new pages.
+ */
+static int xenvif_handle_frag_list(struct xenvif *vif, struct sk_buff *skb)
+{
+ unsigned int offset = skb_headlen(skb);
+ skb_frag_t frags[MAX_SKB_FRAGS];
+ int i;
+ struct ubuf_info *uarg;
+ struct sk_buff *nskb = skb_shinfo(skb)->frag_list;
+
+ vif->tx_zerocopy_sent += 2;
+ vif->tx_frag_overflow++;
+
+ xenvif_fill_frags(vif, nskb);
+ /* Subtract frags size, we will correct it later */
+ skb->truesize -= skb->data_len;
+ skb->len += nskb->len;
+ skb->data_len += nskb->len;
+
+ /* create a brand new frags array and coalesce there */
+ for (i = 0; offset < skb->len; i++) {
+ struct page *page;
+ unsigned int len;
+
+ BUG_ON(i >= MAX_SKB_FRAGS);
+ page = alloc_page(GFP_ATOMIC|__GFP_COLD);
+ if (!page) {
+ int j;
+ skb->truesize += skb->data_len;
+ for (j = 0; j < i; j++)
+ put_page(frags[j].page.p);
+ return -ENOMEM;
+ }
+
+ if (offset + PAGE_SIZE < skb->len)
+ len = PAGE_SIZE;
+ else
+ len = skb->len - offset;
+ if (skb_copy_bits(skb, offset, page_address(page), len))
+ BUG();
+
+ offset += len;
+ frags[i].page.p = page;
+ frags[i].page_offset = 0;
+ skb_frag_size_set(&frags[i], len);
+ }
+ /* swap out with old one */
+ memcpy(skb_shinfo(skb)->frags,
+ frags,
+ i * sizeof(skb_frag_t));
+ skb_shinfo(skb)->nr_frags = i;
+ skb->truesize += i * PAGE_SIZE;
+
+ /* remove traces of mapped pages and frag_list */
+ skb_frag_list_init(skb);
+ uarg = skb_shinfo(skb)->destructor_arg;
+ uarg->callback(uarg, true);
+ skb_shinfo(skb)->destructor_arg = NULL;
+
+ skb_shinfo(nskb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+ kfree_skb(nskb);
+
+ return 0;
+}
static int xenvif_tx_submit(struct xenvif *vif)
{
- struct gnttab_copy *gop = vif->tx_copy_ops;
+ struct gnttab_map_grant_ref *gop = vif->tx_map_ops;
struct sk_buff *skb;
int work_done = 0;
@@ -1283,7 +1389,7 @@ static int xenvif_tx_submit(struct xenvif *vif)
u16 pending_idx;
unsigned data_len;
- pending_idx = *((u16 *)skb->data);
+ pending_idx = XENVIF_TX_CB(skb)->pending_idx;
txp = &vif->pending_tx_info[pending_idx].req;
/* Check the remap error code. */
@@ -1298,14 +1404,16 @@ static int xenvif_tx_submit(struct xenvif *vif)
memcpy(skb->data,
(void *)(idx_to_kaddr(vif, pending_idx)|txp->offset),
data_len);
+ vif->pending_tx_info[pending_idx].callback_struct.ctx = NULL;
if (data_len < txp->size) {
/* Append the packet payload as a fragment. */
txp->offset += data_len;
txp->size -= data_len;
+ skb_shinfo(skb)->destructor_arg =
+ &vif->pending_tx_info[pending_idx].callback_struct;
} else {
/* Schedule a response immediately. */
- xenvif_idx_release(vif, pending_idx,
- XEN_NETIF_RSP_OKAY);
+ xenvif_idx_unmap(vif, pending_idx);
}
if (txp->flags & XEN_NETTXF_csum_blank)
@@ -1315,6 +1423,17 @@ static int xenvif_tx_submit(struct xenvif *vif)
xenvif_fill_frags(vif, skb);
+ if (unlikely(skb_has_frag_list(skb))) {
+ if (xenvif_handle_frag_list(vif, skb)) {
+ if (net_ratelimit())
+ netdev_err(vif->dev,
+ "Not enough memory to consolidate frag_list!\n");
+ skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+ kfree_skb(skb);
+ continue;
+ }
+ }
+
if (skb_is_nonlinear(skb) && skb_headlen(skb) < PKT_PROT_LEN) {
int target = min_t(int, skb->len, PKT_PROT_LEN);
__pskb_pull_tail(skb, target - skb_headlen(skb));
@@ -1327,6 +1446,9 @@ static int xenvif_tx_submit(struct xenvif *vif)
if (checksum_setup(vif, skb)) {
netdev_dbg(vif->dev,
"Can't setup checksum in net_tx_action\n");
+ /* We have to set this flag to trigger the callback */
+ if (skb_shinfo(skb)->destructor_arg)
+ skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
kfree_skb(skb);
continue;
}
@@ -1352,17 +1474,134 @@ static int xenvif_tx_submit(struct xenvif *vif)
work_done++;
+ /* Set this flag right before netif_receive_skb, otherwise
+ * someone might think this packet already left netback, and
+ * do a skb_copy_ubufs while we are still in control of the
+ * skb. E.g. the __pskb_pull_tail earlier can do such thing.
+ */
+ if (skb_shinfo(skb)->destructor_arg) {
+ skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
+ vif->tx_zerocopy_sent++;
+ }
+
netif_receive_skb(skb);
}
return work_done;
}
+void xenvif_zerocopy_callback(struct ubuf_info *ubuf, bool zerocopy_success)
+{
+ unsigned long flags;
+ pending_ring_idx_t index;
+ struct xenvif *vif = ubuf_to_vif(ubuf);
+
+ /* This is the only place where we grab this lock, to protect callbacks
+ * from each other.
+ */
+ spin_lock_irqsave(&vif->callback_lock, flags);
+ do {
+ u16 pending_idx = ubuf->desc;
+ ubuf = (struct ubuf_info *) ubuf->ctx;
+ BUG_ON(vif->dealloc_prod - vif->dealloc_cons >=
+ MAX_PENDING_REQS);
+ index = pending_index(vif->dealloc_prod);
+ vif->dealloc_ring[index] = pending_idx;
+ /* Sync with xenvif_tx_dealloc_action:
+ * insert idx then incr producer.
+ */
+ smp_wmb();
+ vif->dealloc_prod++;
+ } while (ubuf);
+ wake_up(&vif->dealloc_wq);
+ spin_unlock_irqrestore(&vif->callback_lock, flags);
+
+ if (RING_HAS_UNCONSUMED_REQUESTS(&vif->tx) &&
+ xenvif_tx_pending_slots_available(vif)) {
+ local_bh_disable();
+ napi_schedule(&vif->napi);
+ local_bh_enable();
+ }
+
+ if (likely(zerocopy_success))
+ vif->tx_zerocopy_success++;
+ else
+ vif->tx_zerocopy_fail++;
+}
+
+static inline void xenvif_tx_dealloc_action(struct xenvif *vif)
+{
+ struct gnttab_unmap_grant_ref *gop;
+ pending_ring_idx_t dc, dp;
+ u16 pending_idx, pending_idx_release[MAX_PENDING_REQS];
+ unsigned int i = 0;
+
+ dc = vif->dealloc_cons;
+ gop = vif->tx_unmap_ops;
+
+ /* Free up any grants we have finished using */
+ do {
+ dp = vif->dealloc_prod;
+
+ /* Ensure we see all indices enqueued by all
+ * xenvif_zerocopy_callback().
+ */
+ smp_rmb();
+
+ while (dc != dp) {
+ BUG_ON(gop - vif->tx_unmap_ops > MAX_PENDING_REQS);
+ pending_idx =
+ vif->dealloc_ring[pending_index(dc++)];
+
+ pending_idx_release[gop-vif->tx_unmap_ops] =
+ pending_idx;
+ vif->pages_to_unmap[gop-vif->tx_unmap_ops] =
+ vif->mmap_pages[pending_idx];
+ gnttab_set_unmap_op(gop,
+ idx_to_kaddr(vif, pending_idx),
+ GNTMAP_host_map,
+ vif->grant_tx_handle[pending_idx]);
+ /* Btw. already unmapped? */
+ xenvif_grant_handle_reset(vif, pending_idx);
+ ++gop;
+ }
+
+ } while (dp != vif->dealloc_prod);
+
+ vif->dealloc_cons = dc;
+
+ if (gop - vif->tx_unmap_ops > 0) {
+ int ret;
+ ret = gnttab_unmap_refs(vif->tx_unmap_ops,
+ NULL,
+ vif->pages_to_unmap,
+ gop - vif->tx_unmap_ops);
+ if (ret) {
+ netdev_err(vif->dev, "Unmap fail: nr_ops %x ret %d\n",
+ gop - vif->tx_unmap_ops, ret);
+ for (i = 0; i < gop - vif->tx_unmap_ops; ++i) {
+ if (gop[i].status != GNTST_okay)
+ netdev_err(vif->dev,
+ " host_addr: %llx handle: %x status: %d\n",
+ gop[i].host_addr,
+ gop[i].handle,
+ gop[i].status);
+ }
+ BUG();
+ }
+ }
+
+ for (i = 0; i < gop - vif->tx_unmap_ops; ++i)
+ xenvif_idx_release(vif, pending_idx_release[i],
+ XEN_NETIF_RSP_OKAY);
+}
+
+
/* Called after netfront has transmitted */
int xenvif_tx_action(struct xenvif *vif, int budget)
{
unsigned nr_gops;
- int work_done;
+ int work_done, ret;
if (unlikely(!tx_work_todo(vif)))
return 0;
@@ -1372,7 +1611,11 @@ int xenvif_tx_action(struct xenvif *vif, int budget)
if (nr_gops == 0)
return 0;
- gnttab_batch_copy(vif->tx_copy_ops, nr_gops);
+ ret = gnttab_map_refs(vif->tx_map_ops,
+ NULL,
+ vif->pages_to_map,
+ nr_gops);
+ BUG_ON(ret);
work_done = xenvif_tx_submit(vif);
@@ -1383,45 +1626,18 @@ static void xenvif_idx_release(struct xenvif *vif, u16 pending_idx,
u8 status)
{
struct pending_tx_info *pending_tx_info;
- pending_ring_idx_t head;
- u16 peek; /* peek into next tx request */
-
- BUG_ON(vif->mmap_pages[pending_idx] == (void *)(~0UL));
-
- /* Already complete? */
- if (vif->mmap_pages[pending_idx] == NULL)
- return;
+ pending_ring_idx_t index;
+ unsigned long flags;
pending_tx_info = &vif->pending_tx_info[pending_idx];
-
- head = pending_tx_info->head;
-
- BUG_ON(!pending_tx_is_head(vif, head));
- BUG_ON(vif->pending_ring[pending_index(head)] != pending_idx);
-
- do {
- pending_ring_idx_t index;
- pending_ring_idx_t idx = pending_index(head);
- u16 info_idx = vif->pending_ring[idx];
-
- pending_tx_info = &vif->pending_tx_info[info_idx];
- make_tx_response(vif, &pending_tx_info->req, status);
-
- /* Setting any number other than
- * INVALID_PENDING_RING_IDX indicates this slot is
- * starting a new packet / ending a previous packet.
- */
- pending_tx_info->head = 0;
-
- index = pending_index(vif->pending_prod++);
- vif->pending_ring[index] = vif->pending_ring[info_idx];
-
- peek = vif->pending_ring[pending_index(++head)];
-
- } while (!pending_tx_is_head(vif, peek));
-
- put_page(vif->mmap_pages[pending_idx]);
- vif->mmap_pages[pending_idx] = NULL;
+ spin_lock_irqsave(&vif->response_lock, flags);
+ make_tx_response(vif, &pending_tx_info->req, status);
+ index = pending_index(vif->pending_prod);
+ vif->pending_ring[index] = pending_idx;
+ /* TX shouldn't use the index before we give it back here */
+ mb();
+ vif->pending_prod++;
+ spin_unlock_irqrestore(&vif->response_lock, flags);
}
@@ -1469,23 +1685,74 @@ static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif,
return resp;
}
+void xenvif_idx_unmap(struct xenvif *vif, u16 pending_idx)
+{
+ int ret;
+ struct gnttab_unmap_grant_ref tx_unmap_op;
+
+ gnttab_set_unmap_op(&tx_unmap_op,
+ idx_to_kaddr(vif, pending_idx),
+ GNTMAP_host_map,
+ vif->grant_tx_handle[pending_idx]);
+ /* Btw. already unmapped? */
+ xenvif_grant_handle_reset(vif, pending_idx);
+
+ ret = gnttab_unmap_refs(&tx_unmap_op, NULL,
+ &vif->mmap_pages[pending_idx], 1);
+ BUG_ON(ret);
+
+ xenvif_idx_release(vif, pending_idx, XEN_NETIF_RSP_OKAY);
+}
+
static inline int rx_work_todo(struct xenvif *vif)
{
- return !skb_queue_empty(&vif->rx_queue) &&
- xenvif_rx_ring_slots_available(vif, vif->rx_last_skb_slots);
+ return (!skb_queue_empty(&vif->rx_queue) &&
+ xenvif_rx_ring_slots_available(vif, vif->rx_last_skb_slots)) ||
+ vif->rx_queue_purge;
}
static inline int tx_work_todo(struct xenvif *vif)
{
if (likely(RING_HAS_UNCONSUMED_REQUESTS(&vif->tx)) &&
- (nr_pending_reqs(vif) + XEN_NETBK_LEGACY_SLOTS_MAX
- < MAX_PENDING_REQS))
+ xenvif_tx_pending_slots_available(vif))
return 1;
return 0;
}
+static void xenvif_dealloc_delay(unsigned long data)
+{
+ struct xenvif *vif = (struct xenvif *)data;
+
+ vif->dealloc_delay_timed_out = true;
+ wake_up(&vif->dealloc_wq);
+}
+
+static inline bool tx_dealloc_work_todo(struct xenvif *vif)
+{
+ if (vif->dealloc_cons != vif->dealloc_prod) {
+ if ((nr_free_slots(&vif->tx) > 2 * XEN_NETBK_LEGACY_SLOTS_MAX) &&
+ (vif->dealloc_prod - vif->dealloc_cons < MAX_PENDING_REQS / 4) &&
+ !vif->dealloc_delay_timed_out) {
+ if (!timer_pending(&vif->dealloc_delay)) {
+ vif->dealloc_delay.function =
+ xenvif_dealloc_delay;
+ vif->dealloc_delay.data = (unsigned long)vif;
+ mod_timer(&vif->dealloc_delay,
+ jiffies + msecs_to_jiffies(1));
+
+ }
+ return false;
+ }
+ del_timer_sync(&vif->dealloc_delay);
+ vif->dealloc_delay_timed_out = false;
+ return true;
+ }
+
+ return false;
+}
+
void xenvif_unmap_frontend_rings(struct xenvif *vif)
{
if (vif->tx.sring)
@@ -1543,7 +1810,7 @@ static void xenvif_start_queue(struct xenvif *vif)
netif_wake_queue(vif->dev);
}
-int xenvif_kthread(void *data)
+int xenvif_kthread_guest_rx(void *data)
{
struct xenvif *vif = data;
struct sk_buff *skb;
@@ -1555,12 +1822,19 @@ int xenvif_kthread(void *data)
if (kthread_should_stop())
break;
+ if (vif->rx_queue_purge) {
+ skb_queue_purge(&vif->rx_queue);
+ vif->rx_queue_purge = false;
+ }
+
if (!skb_queue_empty(&vif->rx_queue))
xenvif_rx_action(vif);
if (skb_queue_empty(&vif->rx_queue) &&
- netif_queue_stopped(vif->dev))
+ netif_queue_stopped(vif->dev)) {
+ del_timer_sync(&vif->wake_queue);
xenvif_start_queue(vif);
+ }
cond_resched();
}
@@ -1572,6 +1846,28 @@ int xenvif_kthread(void *data)
return 0;
}
+int xenvif_dealloc_kthread(void *data)
+{
+ struct xenvif *vif = data;
+
+ while (!kthread_should_stop()) {
+ wait_event_interruptible(vif->dealloc_wq,
+ tx_dealloc_work_todo(vif) ||
+ kthread_should_stop());
+ if (kthread_should_stop())
+ break;
+
+ xenvif_tx_dealloc_action(vif);
+ cond_resched();
+ }
+
+ /* Unmap anything remaining*/
+ if (tx_dealloc_work_todo(vif))
+ xenvif_tx_dealloc_action(vif);
+
+ return 0;
+}
+
static int __init netback_init(void)
{
int rc = 0;
@@ -1589,6 +1885,8 @@ static int __init netback_init(void)
if (rc)
goto failed_init;
+ rx_drain_timeout_jiffies = msecs_to_jiffies(rx_drain_timeout_msecs);
+
return 0;
failed_init:
diff --git a/net/ieee802154/6lowpan.h b/include/net/6lowpan.h
index 0dccf62434d5..f7d372b7d4ff 100644
--- a/net/ieee802154/6lowpan.h
+++ b/include/net/6lowpan.h
@@ -53,6 +53,8 @@
#ifndef __6LOWPAN_H__
#define __6LOWPAN_H__
+#include <net/ipv6.h>
+
#define UIP_802154_SHORTADDR_LEN 2 /* compressed ipv6 address length */
#define UIP_IPH_LEN 40 /* ipv6 fixed header size */
#define UIP_PROTO_UDP 17 /* ipv6 next header value for UDP */
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index adb3ea04adaa..73492b91105a 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -27,7 +27,7 @@
#include "6lowpan.h"
-#include "../ieee802154/6lowpan.h" /* for the compression support */
+#include <net/6lowpan.h> /* for the compression support */
#define IFACE_NAME_TEMPLATE "bt%d"
#define EUI64_ADDR_LEN 8
diff --git a/net/ieee802154/6lowpan_iphc.c b/net/ieee802154/6lowpan_iphc.c
index 860aa2d445ba..211b5686d719 100644
--- a/net/ieee802154/6lowpan_iphc.c
+++ b/net/ieee802154/6lowpan_iphc.c
@@ -54,11 +54,10 @@
#include <linux/if_arp.h>
#include <linux/module.h>
#include <linux/netdevice.h>
+#include <net/6lowpan.h>
#include <net/ipv6.h>
#include <net/af_ieee802154.h>
-#include "6lowpan.h"
-
/*
* Uncompress address function for source and
* destination address(non-multicast).
diff --git a/net/ieee802154/6lowpan_rtnl.c b/net/ieee802154/6lowpan_rtnl.c
index e4726180fc36..1bbab8952f77 100644
--- a/net/ieee802154/6lowpan_rtnl.c
+++ b/net/ieee802154/6lowpan_rtnl.c
@@ -52,10 +52,10 @@
#include <net/af_ieee802154.h>
#include <net/ieee802154.h>
#include <net/ieee802154_netdev.h>
+#include <net/6lowpan.h>
#include <net/ipv6.h>
#include "reassembly.h"
-#include "6lowpan.h"
static LIST_HEAD(lowpan_devices);
diff --git a/net/ieee802154/reassembly.c b/net/ieee802154/reassembly.c
index 4511fc22ef16..1cc2336eb52c 100644
--- a/net/ieee802154/reassembly.c
+++ b/net/ieee802154/reassembly.c
@@ -24,10 +24,10 @@
#include <linux/export.h>
#include <net/ieee802154_netdev.h>
+#include <net/6lowpan.h>
#include <net/ipv6.h>
#include <net/inet_frag.h>
-#include "6lowpan.h"
#include "reassembly.h"
static struct inet_frags lowpan_frags;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index a02c884d4321..bc0fb0fc7552 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -882,6 +882,8 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
skb = skb_clone(skb, gfp_mask);
if (unlikely(!skb))
return -ENOBUFS;
+ /* Our usage of tstamp should remain private */
+ skb->tstamp.tv64 = 0;
}
inet = inet_sk(sk);
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index e5dc42f0e527..9958c31c2c54 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -1108,6 +1108,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
struct flowi *fl, size_t data_len)
{
struct l2tp_tunnel *tunnel = session->tunnel;
+ struct sock *sk = tunnel->sock;
unsigned int len = skb->len;
int error;
@@ -1131,7 +1132,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
/* Queue the packet to IP for output */
skb->local_df = 1;
#if IS_ENABLED(CONFIG_IPV6)
- if (skb->sk->sk_family == PF_INET6 && !tunnel->v4mapped)
+ if (sk->sk_family == PF_INET6 && !tunnel->v4mapped)
error = inet6_csk_xmit(skb, NULL);
else
#endif
@@ -1151,23 +1152,6 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb,
return 0;
}
-/* Automatically called when the skb is freed.
- */
-static void l2tp_sock_wfree(struct sk_buff *skb)
-{
- sock_put(skb->sk);
-}
-
-/* For data skbs that we transmit, we associate with the tunnel socket
- * but don't do accounting.
- */
-static inline void l2tp_skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
-{
- sock_hold(sk);
- skb->sk = sk;
- skb->destructor = l2tp_sock_wfree;
-}
-
#if IS_ENABLED(CONFIG_IPV6)
static void l2tp_xmit_ipv6_csum(struct sock *sk, struct sk_buff *skb,
int udp_len)
@@ -1221,7 +1205,6 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
return NET_XMIT_DROP;
}
- skb_orphan(skb);
/* Setup L2TP header */
session->build_header(session, __skb_push(skb, hdr_len));
@@ -1287,8 +1270,6 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
break;
}
- l2tp_skb_set_owner_w(skb, sk);
-
l2tp_xmit_core(session, skb, fl, data_len);
out_unlock:
bh_unlock_sock(sk);
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index d1c3429b69ed..ec126f91276b 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -20,9 +20,8 @@ af-rxrpc-y := \
ar-skbuff.o \
ar-transport.o
-ifeq ($(CONFIG_PROC_FS),y)
-af-rxrpc-y += ar-proc.o
-endif
+af-rxrpc-$(CONFIG_PROC_FS) += ar-proc.o
+af-rxrpc-$(CONFIG_SYSCTL) += sysctl.o
obj-$(CONFIG_AF_RXRPC) += af-rxrpc.o
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index e61aa6001c65..7b1670489638 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -838,6 +838,12 @@ static int __init af_rxrpc_init(void)
goto error_key_type_s;
}
+ ret = rxrpc_sysctl_init();
+ if (ret < 0) {
+ printk(KERN_CRIT "RxRPC: Cannot register sysctls\n");
+ goto error_sysctls;
+ }
+
#ifdef CONFIG_PROC_FS
proc_create("rxrpc_calls", 0, init_net.proc_net, &rxrpc_call_seq_fops);
proc_create("rxrpc_conns", 0, init_net.proc_net,
@@ -845,6 +851,8 @@ static int __init af_rxrpc_init(void)
#endif
return 0;
+error_sysctls:
+ unregister_key_type(&key_type_rxrpc_s);
error_key_type_s:
unregister_key_type(&key_type_rxrpc);
error_key_type:
@@ -865,6 +873,7 @@ error_call_jar:
static void __exit af_rxrpc_exit(void)
{
_enter("");
+ rxrpc_sysctl_exit();
unregister_key_type(&key_type_rxrpc_s);
unregister_key_type(&key_type_rxrpc);
sock_unregister(PF_RXRPC);
diff --git a/net/rxrpc/ar-ack.c b/net/rxrpc/ar-ack.c
index cd97a0ce48d8..c6be17a959a6 100644
--- a/net/rxrpc/ar-ack.c
+++ b/net/rxrpc/ar-ack.c
@@ -19,7 +19,49 @@
#include <net/af_rxrpc.h>
#include "ar-internal.h"
-static unsigned int rxrpc_ack_defer = 1;
+/*
+ * How long to wait before scheduling ACK generation after seeing a
+ * packet with RXRPC_REQUEST_ACK set (in jiffies).
+ */
+unsigned rxrpc_requested_ack_delay = 1;
+
+/*
+ * How long to wait before scheduling an ACK with subtype DELAY (in jiffies).
+ *
+ * We use this when we've received new data packets. If those packets aren't
+ * all consumed within this time we will send a DELAY ACK if an ACK was not
+ * requested to let the sender know it doesn't need to resend.
+ */
+unsigned rxrpc_soft_ack_delay = 1 * HZ;
+
+/*
+ * How long to wait before scheduling an ACK with subtype IDLE (in jiffies).
+ *
+ * We use this when we've consumed some previously soft-ACK'd packets when
+ * further packets aren't immediately received to decide when to send an IDLE
+ * ACK let the other end know that it can free up its Tx buffer space.
+ */
+unsigned rxrpc_idle_ack_delay = 0.5 * HZ;
+
+/*
+ * Receive window size in packets. This indicates the maximum number of
+ * unconsumed received packets we're willing to retain in memory. Once this
+ * limit is hit, we should generate an EXCEEDS_WINDOW ACK and discard further
+ * packets.
+ */
+unsigned rxrpc_rx_window_size = 32;
+
+/*
+ * Maximum Rx MTU size. This indicates to the sender the size of jumbo packet
+ * made by gluing normal packets together that we're willing to handle.
+ */
+unsigned rxrpc_rx_mtu = 5692;
+
+/*
+ * The maximum number of fragments in a received jumbo packet that we tell the
+ * sender that we're willing to handle.
+ */
+unsigned rxrpc_rx_jumbo_max = 4;
static const char *rxrpc_acks(u8 reason)
{
@@ -82,24 +124,23 @@ void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason,
switch (ack_reason) {
case RXRPC_ACK_DELAY:
_debug("run delay timer");
- call->ack_timer.expires = jiffies + rxrpc_ack_timeout * HZ;
- add_timer(&call->ack_timer);
- return;
+ expiry = rxrpc_soft_ack_delay;
+ goto run_timer;
case RXRPC_ACK_IDLE:
if (!immediate) {
_debug("run defer timer");
- expiry = 1;
+ expiry = rxrpc_idle_ack_delay;
goto run_timer;
}
goto cancel_timer;
case RXRPC_ACK_REQUESTED:
- if (!rxrpc_ack_defer)
+ expiry = rxrpc_requested_ack_delay;
+ if (!expiry)
goto cancel_timer;
if (!immediate || serial == cpu_to_be32(1)) {
_debug("run defer timer");
- expiry = rxrpc_ack_defer;
goto run_timer;
}
@@ -1174,11 +1215,11 @@ send_ACK:
mtu = call->conn->trans->peer->if_mtu;
mtu -= call->conn->trans->peer->hdrsize;
ackinfo.maxMTU = htonl(mtu);
- ackinfo.rwind = htonl(32);
+ ackinfo.rwind = htonl(rxrpc_rx_window_size);
/* permit the peer to send us jumbo packets if it wants to */
- ackinfo.rxMTU = htonl(5692);
- ackinfo.jumbo_max = htonl(4);
+ ackinfo.rxMTU = htonl(rxrpc_rx_mtu);
+ ackinfo.jumbo_max = htonl(rxrpc_rx_jumbo_max);
hdr.serial = htonl(atomic_inc_return(&call->conn->serial));
_proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }",
diff --git a/net/rxrpc/ar-call.c b/net/rxrpc/ar-call.c
index a3bbb360a3f9..a9e05db0f5d5 100644
--- a/net/rxrpc/ar-call.c
+++ b/net/rxrpc/ar-call.c
@@ -12,10 +12,22 @@
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/circ_buf.h>
+#include <linux/hashtable.h>
+#include <linux/spinlock_types.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
#include "ar-internal.h"
+/*
+ * Maximum lifetime of a call (in jiffies).
+ */
+unsigned rxrpc_max_call_lifetime = 60 * HZ;
+
+/*
+ * Time till dead call expires after last use (in jiffies).
+ */
+unsigned rxrpc_dead_call_expiry = 2 * HZ;
+
const char *const rxrpc_call_states[] = {
[RXRPC_CALL_CLIENT_SEND_REQUEST] = "ClSndReq",
[RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl",
@@ -38,8 +50,6 @@ const char *const rxrpc_call_states[] = {
struct kmem_cache *rxrpc_call_jar;
LIST_HEAD(rxrpc_calls);
DEFINE_RWLOCK(rxrpc_call_lock);
-static unsigned int rxrpc_call_max_lifetime = 60;
-static unsigned int rxrpc_dead_call_timeout = 2;
static void rxrpc_destroy_call(struct work_struct *work);
static void rxrpc_call_life_expired(unsigned long _call);
@@ -47,6 +57,145 @@ static void rxrpc_dead_call_expired(unsigned long _call);
static void rxrpc_ack_time_expired(unsigned long _call);
static void rxrpc_resend_time_expired(unsigned long _call);
+static DEFINE_SPINLOCK(rxrpc_call_hash_lock);
+static DEFINE_HASHTABLE(rxrpc_call_hash, 10);
+
+/*
+ * Hash function for rxrpc_call_hash
+ */
+static unsigned long rxrpc_call_hashfunc(
+ u8 clientflag,
+ __be32 cid,
+ __be32 call_id,
+ __be32 epoch,
+ __be16 service_id,
+ sa_family_t proto,
+ void *localptr,
+ unsigned int addr_size,
+ const u8 *peer_addr)
+{
+ const u16 *p;
+ unsigned int i;
+ unsigned long key;
+ u32 hcid = ntohl(cid);
+
+ _enter("");
+
+ key = (unsigned long)localptr;
+ /* We just want to add up the __be32 values, so forcing the
+ * cast should be okay.
+ */
+ key += (__force u32)epoch;
+ key += (__force u16)service_id;
+ key += (__force u32)call_id;
+ key += (hcid & RXRPC_CIDMASK) >> RXRPC_CIDSHIFT;
+ key += hcid & RXRPC_CHANNELMASK;
+ key += clientflag;
+ key += proto;
+ /* Step through the peer address in 16-bit portions for speed */
+ for (i = 0, p = (const u16 *)peer_addr; i < addr_size >> 1; i++, p++)
+ key += *p;
+ _leave(" key = 0x%lx", key);
+ return key;
+}
+
+/*
+ * Add a call to the hashtable
+ */
+static void rxrpc_call_hash_add(struct rxrpc_call *call)
+{
+ unsigned long key;
+ unsigned int addr_size = 0;
+
+ _enter("");
+ switch (call->proto) {
+ case AF_INET:
+ addr_size = sizeof(call->peer_ip.ipv4_addr);
+ break;
+ case AF_INET6:
+ addr_size = sizeof(call->peer_ip.ipv6_addr);
+ break;
+ default:
+ break;
+ }
+ key = rxrpc_call_hashfunc(call->in_clientflag, call->cid,
+ call->call_id, call->epoch,
+ call->service_id, call->proto,
+ call->conn->trans->local, addr_size,
+ call->peer_ip.ipv6_addr);
+ /* Store the full key in the call */
+ call->hash_key = key;
+ spin_lock(&rxrpc_call_hash_lock);
+ hash_add_rcu(rxrpc_call_hash, &call->hash_node, key);
+ spin_unlock(&rxrpc_call_hash_lock);
+ _leave("");
+}
+
+/*
+ * Remove a call from the hashtable
+ */
+static void rxrpc_call_hash_del(struct rxrpc_call *call)
+{
+ _enter("");
+ spin_lock(&rxrpc_call_hash_lock);
+ hash_del_rcu(&call->hash_node);
+ spin_unlock(&rxrpc_call_hash_lock);
+ _leave("");
+}
+
+/*
+ * Find a call in the hashtable and return it, or NULL if it
+ * isn't there.
+ */
+struct rxrpc_call *rxrpc_find_call_hash(
+ u8 clientflag,
+ __be32 cid,
+ __be32 call_id,
+ __be32 epoch,
+ __be16 service_id,
+ void *localptr,
+ sa_family_t proto,
+ const u8 *peer_addr)
+{
+ unsigned long key;
+ unsigned int addr_size = 0;
+ struct rxrpc_call *call = NULL;
+ struct rxrpc_call *ret = NULL;
+
+ _enter("");
+ switch (proto) {
+ case AF_INET:
+ addr_size = sizeof(call->peer_ip.ipv4_addr);
+ break;
+ case AF_INET6:
+ addr_size = sizeof(call->peer_ip.ipv6_addr);
+ break;
+ default:
+ break;
+ }
+
+ key = rxrpc_call_hashfunc(clientflag, cid, call_id, epoch,
+ service_id, proto, localptr, addr_size,
+ peer_addr);
+ hash_for_each_possible_rcu(rxrpc_call_hash, call, hash_node, key) {
+ if (call->hash_key == key &&
+ call->call_id == call_id &&
+ call->cid == cid &&
+ call->in_clientflag == clientflag &&
+ call->service_id == service_id &&
+ call->proto == proto &&
+ call->local == localptr &&
+ memcmp(call->peer_ip.ipv6_addr, peer_addr,
+ addr_size) == 0 &&
+ call->epoch == epoch) {
+ ret = call;
+ break;
+ }
+ }
+ _leave(" = %p", ret);
+ return ret;
+}
+
/*
* allocate a new call
*/
@@ -91,7 +240,7 @@ static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp)
call->rx_data_expect = 1;
call->rx_data_eaten = 0;
call->rx_first_oos = 0;
- call->ackr_win_top = call->rx_data_eaten + 1 + RXRPC_MAXACKS;
+ call->ackr_win_top = call->rx_data_eaten + 1 + rxrpc_rx_window_size;
call->creation_jif = jiffies;
return call;
}
@@ -128,11 +277,31 @@ static struct rxrpc_call *rxrpc_alloc_client_call(
return ERR_PTR(ret);
}
+ /* Record copies of information for hashtable lookup */
+ call->proto = rx->proto;
+ call->local = trans->local;
+ switch (call->proto) {
+ case AF_INET:
+ call->peer_ip.ipv4_addr =
+ trans->peer->srx.transport.sin.sin_addr.s_addr;
+ break;
+ case AF_INET6:
+ memcpy(call->peer_ip.ipv6_addr,
+ trans->peer->srx.transport.sin6.sin6_addr.in6_u.u6_addr8,
+ sizeof(call->peer_ip.ipv6_addr));
+ break;
+ }
+ call->epoch = call->conn->epoch;
+ call->service_id = call->conn->service_id;
+ call->in_clientflag = call->conn->in_clientflag;
+ /* Add the new call to the hashtable */
+ rxrpc_call_hash_add(call);
+
spin_lock(&call->conn->trans->peer->lock);
list_add(&call->error_link, &call->conn->trans->peer->error_targets);
spin_unlock(&call->conn->trans->peer->lock);
- call->lifetimer.expires = jiffies + rxrpc_call_max_lifetime * HZ;
+ call->lifetimer.expires = jiffies + rxrpc_max_call_lifetime;
add_timer(&call->lifetimer);
_leave(" = %p", call);
@@ -320,9 +489,12 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
parent = *p;
call = rb_entry(parent, struct rxrpc_call, conn_node);
- if (call_id < call->call_id)
+ /* The tree is sorted in order of the __be32 value without
+ * turning it into host order.
+ */
+ if ((__force u32)call_id < (__force u32)call->call_id)
p = &(*p)->rb_left;
- else if (call_id > call->call_id)
+ else if ((__force u32)call_id > (__force u32)call->call_id)
p = &(*p)->rb_right;
else
goto old_call;
@@ -347,9 +519,31 @@ struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx,
list_add_tail(&call->link, &rxrpc_calls);
write_unlock_bh(&rxrpc_call_lock);
+ /* Record copies of information for hashtable lookup */
+ call->proto = rx->proto;
+ call->local = conn->trans->local;
+ switch (call->proto) {
+ case AF_INET:
+ call->peer_ip.ipv4_addr =
+ conn->trans->peer->srx.transport.sin.sin_addr.s_addr;
+ break;
+ case AF_INET6:
+ memcpy(call->peer_ip.ipv6_addr,
+ conn->trans->peer->srx.transport.sin6.sin6_addr.in6_u.u6_addr8,
+ sizeof(call->peer_ip.ipv6_addr));
+ break;
+ default:
+ break;
+ }
+ call->epoch = conn->epoch;
+ call->service_id = conn->service_id;
+ call->in_clientflag = conn->in_clientflag;
+ /* Add the new call to the hashtable */
+ rxrpc_call_hash_add(call);
+
_net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id);
- call->lifetimer.expires = jiffies + rxrpc_call_max_lifetime * HZ;
+ call->lifetimer.expires = jiffies + rxrpc_max_call_lifetime;
add_timer(&call->lifetimer);
_leave(" = %p {%d} [new]", call, call->debug_id);
return call;
@@ -533,7 +727,7 @@ void rxrpc_release_call(struct rxrpc_call *call)
del_timer_sync(&call->resend_timer);
del_timer_sync(&call->ack_timer);
del_timer_sync(&call->lifetimer);
- call->deadspan.expires = jiffies + rxrpc_dead_call_timeout * HZ;
+ call->deadspan.expires = jiffies + rxrpc_dead_call_expiry;
add_timer(&call->deadspan);
_leave("");
@@ -665,6 +859,9 @@ static void rxrpc_cleanup_call(struct rxrpc_call *call)
rxrpc_put_connection(call->conn);
}
+ /* Remove the call from the hash */
+ rxrpc_call_hash_del(call);
+
if (call->acks_window) {
_debug("kill Tx window %d",
CIRC_CNT(call->acks_head, call->acks_tail,
diff --git a/net/rxrpc/ar-connection.c b/net/rxrpc/ar-connection.c
index 7bf5b5b9e8b9..6631f4f1e39b 100644
--- a/net/rxrpc/ar-connection.c
+++ b/net/rxrpc/ar-connection.c
@@ -18,11 +18,15 @@
#include <net/af_rxrpc.h>
#include "ar-internal.h"
+/*
+ * Time till a connection expires after last use (in seconds).
+ */
+unsigned rxrpc_connection_expiry = 10 * 60;
+
static void rxrpc_connection_reaper(struct work_struct *work);
LIST_HEAD(rxrpc_connections);
DEFINE_RWLOCK(rxrpc_connection_lock);
-static unsigned long rxrpc_connection_timeout = 10 * 60;
static DECLARE_DELAYED_WORK(rxrpc_connection_reap, rxrpc_connection_reaper);
/*
@@ -862,7 +866,7 @@ static void rxrpc_connection_reaper(struct work_struct *work)
spin_lock(&conn->trans->client_lock);
write_lock(&conn->trans->conn_lock);
- reap_time = conn->put_time + rxrpc_connection_timeout;
+ reap_time = conn->put_time + rxrpc_connection_expiry;
if (atomic_read(&conn->usage) > 0) {
;
@@ -916,7 +920,7 @@ void __exit rxrpc_destroy_all_connections(void)
{
_enter("");
- rxrpc_connection_timeout = 0;
+ rxrpc_connection_expiry = 0;
cancel_delayed_work(&rxrpc_connection_reap);
rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0);
diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c
index a9206087b4d7..db57458c824c 100644
--- a/net/rxrpc/ar-error.c
+++ b/net/rxrpc/ar-error.c
@@ -83,6 +83,7 @@ void rxrpc_UDP_error_report(struct sock *sk)
if (mtu == 0) {
/* they didn't give us a size, estimate one */
+ mtu = peer->if_mtu;
if (mtu > 1500) {
mtu >>= 1;
if (mtu < 1500)
diff --git a/net/rxrpc/ar-input.c b/net/rxrpc/ar-input.c
index 529572f18d1f..73742647c135 100644
--- a/net/rxrpc/ar-input.c
+++ b/net/rxrpc/ar-input.c
@@ -25,8 +25,6 @@
#include <net/net_namespace.h>
#include "ar-internal.h"
-unsigned long rxrpc_ack_timeout = 1;
-
const char *rxrpc_pkts[] = {
"?00",
"DATA", "ACK", "BUSY", "ABORT", "ACKALL", "CHALL", "RESP", "DEBUG",
@@ -349,8 +347,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb)
* it */
if (sp->hdr.flags & RXRPC_REQUEST_ACK) {
_proto("ACK Requested on %%%u", serial);
- rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, sp->hdr.serial,
- !(sp->hdr.flags & RXRPC_MORE_PACKETS));
+ rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, sp->hdr.serial, false);
}
switch (sp->hdr.type) {
@@ -526,36 +523,38 @@ protocol_error:
* post an incoming packet to the appropriate call/socket to deal with
* - must get rid of the sk_buff, either by freeing it or by queuing it
*/
-static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn,
+static void rxrpc_post_packet_to_call(struct rxrpc_call *call,
struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp;
- struct rxrpc_call *call;
- struct rb_node *p;
- __be32 call_id;
-
- _enter("%p,%p", conn, skb);
- read_lock_bh(&conn->lock);
+ _enter("%p,%p", call, skb);
sp = rxrpc_skb(skb);
- /* look at extant calls by channel number first */
- call = conn->channels[ntohl(sp->hdr.cid) & RXRPC_CHANNELMASK];
- if (!call || call->call_id != sp->hdr.callNumber)
- goto call_not_extant;
-
_debug("extant call [%d]", call->state);
- ASSERTCMP(call->conn, ==, conn);
read_lock(&call->state_lock);
switch (call->state) {
case RXRPC_CALL_LOCALLY_ABORTED:
- if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events))
+ if (!test_and_set_bit(RXRPC_CALL_ABORT, &call->events)) {
rxrpc_queue_call(call);
+ goto free_unlock;
+ }
case RXRPC_CALL_REMOTELY_ABORTED:
case RXRPC_CALL_NETWORK_ERROR:
case RXRPC_CALL_DEAD:
+ goto dead_call;
+ case RXRPC_CALL_COMPLETE:
+ case RXRPC_CALL_CLIENT_FINAL_ACK:
+ /* complete server call */
+ if (call->conn->in_clientflag)
+ goto dead_call;
+ /* resend last packet of a completed call */
+ _debug("final ack again");
+ rxrpc_get_call(call);
+ set_bit(RXRPC_CALL_ACK_FINAL, &call->events);
+ rxrpc_queue_call(call);
goto free_unlock;
default:
break;
@@ -563,7 +562,6 @@ static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn,
read_unlock(&call->state_lock);
rxrpc_get_call(call);
- read_unlock_bh(&conn->lock);
if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA &&
sp->hdr.flags & RXRPC_JUMBO_PACKET)
@@ -574,78 +572,16 @@ static void rxrpc_post_packet_to_call(struct rxrpc_connection *conn,
rxrpc_put_call(call);
goto done;
-call_not_extant:
- /* search the completed calls in case what we're dealing with is
- * there */
- _debug("call not extant");
-
- call_id = sp->hdr.callNumber;
- p = conn->calls.rb_node;
- while (p) {
- call = rb_entry(p, struct rxrpc_call, conn_node);
-
- if (call_id < call->call_id)
- p = p->rb_left;
- else if (call_id > call->call_id)
- p = p->rb_right;
- else
- goto found_completed_call;
- }
-
dead_call:
- /* it's a either a really old call that we no longer remember or its a
- * new incoming call */
- read_unlock_bh(&conn->lock);
-
- if (sp->hdr.flags & RXRPC_CLIENT_INITIATED &&
- sp->hdr.seq == cpu_to_be32(1)) {
- _debug("incoming call");
- skb_queue_tail(&conn->trans->local->accept_queue, skb);
- rxrpc_queue_work(&conn->trans->local->acceptor);
- goto done;
- }
-
- _debug("dead call");
- skb->priority = RX_CALL_DEAD;
- rxrpc_reject_packet(conn->trans->local, skb);
- goto done;
-
- /* resend last packet of a completed call
- * - client calls may have been aborted or ACK'd
- * - server calls may have been aborted
- */
-found_completed_call:
- _debug("completed call");
-
- if (atomic_read(&call->usage) == 0)
- goto dead_call;
-
- /* synchronise any state changes */
- read_lock(&call->state_lock);
- ASSERTIFCMP(call->state != RXRPC_CALL_CLIENT_FINAL_ACK,
- call->state, >=, RXRPC_CALL_COMPLETE);
-
- if (call->state == RXRPC_CALL_LOCALLY_ABORTED ||
- call->state == RXRPC_CALL_REMOTELY_ABORTED ||
- call->state == RXRPC_CALL_DEAD) {
- read_unlock(&call->state_lock);
- goto dead_call;
- }
-
- if (call->conn->in_clientflag) {
- read_unlock(&call->state_lock);
- goto dead_call; /* complete server call */
+ if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) {
+ skb->priority = RX_CALL_DEAD;
+ rxrpc_reject_packet(call->conn->trans->local, skb);
+ goto unlock;
}
-
- _debug("final ack again");
- rxrpc_get_call(call);
- set_bit(RXRPC_CALL_ACK_FINAL, &call->events);
- rxrpc_queue_call(call);
-
free_unlock:
- read_unlock(&call->state_lock);
- read_unlock_bh(&conn->lock);
rxrpc_free_skb(skb);
+unlock:
+ read_unlock(&call->state_lock);
done:
_leave("");
}
@@ -664,17 +600,42 @@ static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
rxrpc_queue_conn(conn);
}
+static struct rxrpc_connection *rxrpc_conn_from_local(struct rxrpc_local *local,
+ struct sk_buff *skb,
+ struct rxrpc_skb_priv *sp)
+{
+ struct rxrpc_peer *peer;
+ struct rxrpc_transport *trans;
+ struct rxrpc_connection *conn;
+
+ peer = rxrpc_find_peer(local, ip_hdr(skb)->saddr,
+ udp_hdr(skb)->source);
+ if (IS_ERR(peer))
+ goto cant_find_conn;
+
+ trans = rxrpc_find_transport(local, peer);
+ rxrpc_put_peer(peer);
+ if (!trans)
+ goto cant_find_conn;
+
+ conn = rxrpc_find_connection(trans, &sp->hdr);
+ rxrpc_put_transport(trans);
+ if (!conn)
+ goto cant_find_conn;
+
+ return conn;
+cant_find_conn:
+ return NULL;
+}
+
/*
* handle data received on the local endpoint
* - may be called in interrupt context
*/
void rxrpc_data_ready(struct sock *sk, int count)
{
- struct rxrpc_connection *conn;
- struct rxrpc_transport *trans;
struct rxrpc_skb_priv *sp;
struct rxrpc_local *local;
- struct rxrpc_peer *peer;
struct sk_buff *skb;
int ret;
@@ -749,27 +710,34 @@ void rxrpc_data_ready(struct sock *sk, int count)
(sp->hdr.callNumber == 0 || sp->hdr.seq == 0))
goto bad_message;
- peer = rxrpc_find_peer(local, ip_hdr(skb)->saddr, udp_hdr(skb)->source);
- if (IS_ERR(peer))
- goto cant_route_call;
+ if (sp->hdr.callNumber == 0) {
+ /* This is a connection-level packet. These should be
+ * fairly rare, so the extra overhead of looking them up the
+ * old-fashioned way doesn't really hurt */
+ struct rxrpc_connection *conn;
- trans = rxrpc_find_transport(local, peer);
- rxrpc_put_peer(peer);
- if (!trans)
- goto cant_route_call;
+ conn = rxrpc_conn_from_local(local, skb, sp);
+ if (!conn)
+ goto cant_route_call;
- conn = rxrpc_find_connection(trans, &sp->hdr);
- rxrpc_put_transport(trans);
- if (!conn)
- goto cant_route_call;
-
- _debug("CONN %p {%d}", conn, conn->debug_id);
-
- if (sp->hdr.callNumber == 0)
+ _debug("CONN %p {%d}", conn, conn->debug_id);
rxrpc_post_packet_to_conn(conn, skb);
- else
- rxrpc_post_packet_to_call(conn, skb);
- rxrpc_put_connection(conn);
+ rxrpc_put_connection(conn);
+ } else {
+ struct rxrpc_call *call;
+ u8 in_clientflag = 0;
+
+ if (sp->hdr.flags & RXRPC_CLIENT_INITIATED)
+ in_clientflag = RXRPC_CLIENT_INITIATED;
+ call = rxrpc_find_call_hash(in_clientflag, sp->hdr.cid,
+ sp->hdr.callNumber, sp->hdr.epoch,
+ sp->hdr.serviceId, local, AF_INET,
+ (u8 *)&ip_hdr(skb)->saddr);
+ if (call)
+ rxrpc_post_packet_to_call(call, skb);
+ else
+ goto cant_route_call;
+ }
rxrpc_put_local(local);
return;
@@ -790,8 +758,10 @@ cant_route_call:
skb->priority = RX_CALL_DEAD;
}
- _debug("reject");
- rxrpc_reject_packet(local, skb);
+ if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) {
+ _debug("reject type %d",sp->hdr.type);
+ rxrpc_reject_packet(local, skb);
+ }
rxrpc_put_local(local);
_leave(" [no call]");
return;
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 5f43675ee1df..c831d44b0841 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -396,9 +396,20 @@ struct rxrpc_call {
#define RXRPC_ACKR_WINDOW_ASZ DIV_ROUND_UP(RXRPC_MAXACKS, BITS_PER_LONG)
unsigned long ackr_window[RXRPC_ACKR_WINDOW_ASZ + 1];
+ struct hlist_node hash_node;
+ unsigned long hash_key; /* Full hash key */
+ u8 in_clientflag; /* Copy of conn->in_clientflag for hashing */
+ struct rxrpc_local *local; /* Local endpoint. Used for hashing. */
+ sa_family_t proto; /* Frame protocol */
/* the following should all be in net order */
__be32 cid; /* connection ID + channel index */
__be32 call_id; /* call ID on connection */
+ __be32 epoch; /* epoch of this connection */
+ __be16 service_id; /* service ID */
+ union { /* Peer IP address for hashing */
+ __be32 ipv4_addr;
+ __u8 ipv6_addr[16]; /* Anticipates eventual IPv6 support */
+ } peer_ip;
};
/*
@@ -433,6 +444,13 @@ int rxrpc_reject_call(struct rxrpc_sock *);
/*
* ar-ack.c
*/
+extern unsigned rxrpc_requested_ack_delay;
+extern unsigned rxrpc_soft_ack_delay;
+extern unsigned rxrpc_idle_ack_delay;
+extern unsigned rxrpc_rx_window_size;
+extern unsigned rxrpc_rx_mtu;
+extern unsigned rxrpc_rx_jumbo_max;
+
void __rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool);
void rxrpc_propose_ACK(struct rxrpc_call *, u8, __be32, bool);
void rxrpc_process_call(struct work_struct *);
@@ -440,10 +458,14 @@ void rxrpc_process_call(struct work_struct *);
/*
* ar-call.c
*/
+extern unsigned rxrpc_max_call_lifetime;
+extern unsigned rxrpc_dead_call_expiry;
extern struct kmem_cache *rxrpc_call_jar;
extern struct list_head rxrpc_calls;
extern rwlock_t rxrpc_call_lock;
+struct rxrpc_call *rxrpc_find_call_hash(u8, __be32, __be32, __be32,
+ __be16, void *, sa_family_t, const u8 *);
struct rxrpc_call *rxrpc_get_client_call(struct rxrpc_sock *,
struct rxrpc_transport *,
struct rxrpc_conn_bundle *,
@@ -460,6 +482,7 @@ void __exit rxrpc_destroy_all_calls(void);
/*
* ar-connection.c
*/
+extern unsigned rxrpc_connection_expiry;
extern struct list_head rxrpc_connections;
extern rwlock_t rxrpc_connection_lock;
@@ -493,7 +516,6 @@ void rxrpc_UDP_error_handler(struct work_struct *);
/*
* ar-input.c
*/
-extern unsigned long rxrpc_ack_timeout;
extern const char *rxrpc_pkts[];
void rxrpc_data_ready(struct sock *, int);
@@ -504,6 +526,7 @@ void rxrpc_fast_process_packet(struct rxrpc_call *, struct sk_buff *);
* ar-local.c
*/
extern rwlock_t rxrpc_local_lock;
+
struct rxrpc_local *rxrpc_lookup_local(struct sockaddr_rxrpc *);
void rxrpc_put_local(struct rxrpc_local *);
void __exit rxrpc_destroy_all_locals(void);
@@ -522,7 +545,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time_t,
/*
* ar-output.c
*/
-extern int rxrpc_resend_timeout;
+extern unsigned rxrpc_resend_timeout;
int rxrpc_send_packet(struct rxrpc_transport *, struct sk_buff *);
int rxrpc_client_sendmsg(struct kiocb *, struct rxrpc_sock *,
@@ -572,6 +595,8 @@ void rxrpc_packet_destructor(struct sk_buff *);
/*
* ar-transport.c
*/
+extern unsigned rxrpc_transport_expiry;
+
struct rxrpc_transport *rxrpc_get_transport(struct rxrpc_local *,
struct rxrpc_peer *, gfp_t);
void rxrpc_put_transport(struct rxrpc_transport *);
@@ -580,6 +605,17 @@ struct rxrpc_transport *rxrpc_find_transport(struct rxrpc_local *,
struct rxrpc_peer *);
/*
+ * sysctl.c
+ */
+#ifdef CONFIG_SYSCTL
+extern int __init rxrpc_sysctl_init(void);
+extern void rxrpc_sysctl_exit(void);
+#else
+static inline int __init rxrpc_sysctl_init(void) { return 0; }
+static inline void rxrpc_sysctl_exit(void) {}
+#endif
+
+/*
* debug tracing
*/
extern unsigned int rxrpc_debug;
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
index d0e8f1c1898a..0b4b9a79f5ab 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/ar-output.c
@@ -18,7 +18,10 @@
#include <net/af_rxrpc.h>
#include "ar-internal.h"
-int rxrpc_resend_timeout = 4;
+/*
+ * Time till packet resend (in jiffies).
+ */
+unsigned rxrpc_resend_timeout = 4 * HZ;
static int rxrpc_send_data(struct kiocb *iocb,
struct rxrpc_sock *rx,
@@ -487,7 +490,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb,
ntohl(sp->hdr.serial), ntohl(sp->hdr.seq));
sp->need_resend = false;
- sp->resend_at = jiffies + rxrpc_resend_timeout * HZ;
+ sp->resend_at = jiffies + rxrpc_resend_timeout;
if (!test_and_set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags)) {
_debug("run timer");
call->resend_timer.expires = sp->resend_at;
@@ -666,6 +669,7 @@ static int rxrpc_send_data(struct kiocb *iocb,
/* add the packet to the send queue if it's now full */
if (sp->remain <= 0 || (segment == 0 && !more)) {
struct rxrpc_connection *conn = call->conn;
+ uint32_t seq;
size_t pad;
/* pad out if we're using security */
@@ -678,11 +682,12 @@ static int rxrpc_send_data(struct kiocb *iocb,
memset(skb_put(skb, pad), 0, pad);
}
+ seq = atomic_inc_return(&call->sequence);
+
sp->hdr.epoch = conn->epoch;
sp->hdr.cid = call->cid;
sp->hdr.callNumber = call->call_id;
- sp->hdr.seq =
- htonl(atomic_inc_return(&call->sequence));
+ sp->hdr.seq = htonl(seq);
sp->hdr.serial =
htonl(atomic_inc_return(&conn->serial));
sp->hdr.type = RXRPC_PACKET_TYPE_DATA;
@@ -697,6 +702,8 @@ static int rxrpc_send_data(struct kiocb *iocb,
else if (CIRC_SPACE(call->acks_head, call->acks_tail,
call->acks_winsz) > 1)
sp->hdr.flags |= RXRPC_MORE_PACKETS;
+ if (more && seq & 1)
+ sp->hdr.flags |= RXRPC_REQUEST_ACK;
ret = rxrpc_secure_packet(
call, skb, skb->mark,
diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c
index 34b5490dde65..e9aaa65c0778 100644
--- a/net/rxrpc/ar-recvmsg.c
+++ b/net/rxrpc/ar-recvmsg.c
@@ -180,16 +180,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock,
if (copy > len - copied)
copy = len - copied;
- if (skb->ip_summed == CHECKSUM_UNNECESSARY ||
- skb->ip_summed == CHECKSUM_PARTIAL) {
- ret = skb_copy_datagram_iovec(skb, offset,
- msg->msg_iov, copy);
- } else {
- ret = skb_copy_and_csum_datagram_iovec(skb, offset,
- msg->msg_iov);
- if (ret == -EINVAL)
- goto csum_copy_error;
- }
+ ret = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, copy);
if (ret < 0)
goto copy_error;
@@ -348,20 +339,6 @@ copy_error:
_leave(" = %d", ret);
return ret;
-csum_copy_error:
- _debug("csum error");
- release_sock(&rx->sk);
- if (continue_call)
- rxrpc_put_call(continue_call);
- rxrpc_kill_skb(skb);
- if (!(flags & MSG_PEEK)) {
- if (skb_dequeue(&rx->sk.sk_receive_queue) != skb)
- BUG();
- }
- skb_kill_datagram(&rx->sk, skb, flags);
- rxrpc_put_call(call);
- return -EAGAIN;
-
wait_interrupted:
ret = sock_intr_errno(timeo);
wait_error:
diff --git a/net/rxrpc/ar-skbuff.c b/net/rxrpc/ar-skbuff.c
index de755e04d29c..4cfab49e329d 100644
--- a/net/rxrpc/ar-skbuff.c
+++ b/net/rxrpc/ar-skbuff.c
@@ -83,9 +83,14 @@ static void rxrpc_hard_ACK_data(struct rxrpc_call *call,
rxrpc_request_final_ACK(call);
} else if (atomic_dec_and_test(&call->ackr_not_idle) &&
test_and_clear_bit(RXRPC_CALL_TX_SOFT_ACK, &call->flags)) {
+ /* We previously soft-ACK'd some received packets that have now
+ * been consumed, so send a hard-ACK if no more packets are
+ * immediately forthcoming to allow the transmitter to free up
+ * its Tx bufferage.
+ */
_debug("send Rx idle ACK");
__rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, sp->hdr.serial,
- true);
+ false);
}
spin_unlock_bh(&call->lock);
diff --git a/net/rxrpc/ar-transport.c b/net/rxrpc/ar-transport.c
index 92df566930b9..1976dec84f29 100644
--- a/net/rxrpc/ar-transport.c
+++ b/net/rxrpc/ar-transport.c
@@ -17,11 +17,15 @@
#include <net/af_rxrpc.h>
#include "ar-internal.h"
+/*
+ * Time after last use at which transport record is cleaned up.
+ */
+unsigned rxrpc_transport_expiry = 3600 * 24;
+
static void rxrpc_transport_reaper(struct work_struct *work);
static LIST_HEAD(rxrpc_transports);
static DEFINE_RWLOCK(rxrpc_transport_lock);
-static unsigned long rxrpc_transport_timeout = 3600 * 24;
static DECLARE_DELAYED_WORK(rxrpc_transport_reap, rxrpc_transport_reaper);
/*
@@ -235,7 +239,7 @@ static void rxrpc_transport_reaper(struct work_struct *work)
if (likely(atomic_read(&trans->usage) > 0))
continue;
- reap_time = trans->put_time + rxrpc_transport_timeout;
+ reap_time = trans->put_time + rxrpc_transport_expiry;
if (reap_time <= now)
list_move_tail(&trans->link, &graveyard);
else if (reap_time < earliest)
@@ -271,7 +275,7 @@ void __exit rxrpc_destroy_all_transports(void)
{
_enter("");
- rxrpc_transport_timeout = 0;
+ rxrpc_transport_expiry = 0;
cancel_delayed_work(&rxrpc_transport_reap);
rxrpc_queue_delayed_work(&rxrpc_transport_reap, 0);
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
new file mode 100644
index 000000000000..50a98a910eb1
--- /dev/null
+++ b/net/rxrpc/sysctl.c
@@ -0,0 +1,146 @@
+/* sysctls for configuring RxRPC operating parameters
+ *
+ * Copyright (C) 2014 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#include <linux/sysctl.h>
+#include <net/sock.h>
+#include <net/af_rxrpc.h>
+#include "ar-internal.h"
+
+static struct ctl_table_header *rxrpc_sysctl_reg_table;
+static const unsigned zero = 0;
+static const unsigned one = 1;
+static const unsigned four = 4;
+static const unsigned n_65535 = 65535;
+static const unsigned n_max_acks = RXRPC_MAXACKS;
+
+/*
+ * RxRPC operating parameters.
+ *
+ * See Documentation/networking/rxrpc.txt and the variable definitions for more
+ * information on the individual parameters.
+ */
+static struct ctl_table rxrpc_sysctl_table[] = {
+ /* Values measured in milliseconds */
+ {
+ .procname = "req_ack_delay",
+ .data = &rxrpc_requested_ack_delay,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_ms_jiffies,
+ .extra1 = (void *)&zero,
+ },
+ {
+ .procname = "soft_ack_delay",
+ .data = &rxrpc_soft_ack_delay,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_ms_jiffies,
+ .extra1 = (void *)&one,
+ },
+ {
+ .procname = "idle_ack_delay",
+ .data = &rxrpc_idle_ack_delay,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_ms_jiffies,
+ .extra1 = (void *)&one,
+ },
+ {
+ .procname = "resend_timeout",
+ .data = &rxrpc_resend_timeout,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_ms_jiffies,
+ .extra1 = (void *)&one,
+ },
+
+ /* Values measured in seconds but used in jiffies */
+ {
+ .procname = "max_call_lifetime",
+ .data = &rxrpc_max_call_lifetime,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ .extra1 = (void *)&one,
+ },
+ {
+ .procname = "dead_call_expiry",
+ .data = &rxrpc_dead_call_expiry,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ .extra1 = (void *)&one,
+ },
+
+ /* Values measured in seconds */
+ {
+ .procname = "connection_expiry",
+ .data = &rxrpc_connection_expiry,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)&one,
+ },
+ {
+ .procname = "transport_expiry",
+ .data = &rxrpc_transport_expiry,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)&one,
+ },
+
+ /* Non-time values */
+ {
+ .procname = "rx_window_size",
+ .data = &rxrpc_rx_window_size,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)&one,
+ .extra2 = (void *)&n_max_acks,
+ },
+ {
+ .procname = "rx_mtu",
+ .data = &rxrpc_rx_mtu,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)&one,
+ .extra1 = (void *)&n_65535,
+ },
+ {
+ .procname = "rx_jumbo_max",
+ .data = &rxrpc_rx_jumbo_max,
+ .maxlen = sizeof(unsigned),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = (void *)&one,
+ .extra2 = (void *)&four,
+ },
+
+ { }
+};
+
+int __init rxrpc_sysctl_init(void)
+{
+ rxrpc_sysctl_reg_table = register_net_sysctl(&init_net, "net/rxrpc",
+ rxrpc_sysctl_table);
+ if (!rxrpc_sysctl_reg_table)
+ return -ENOMEM;
+ return 0;
+}
+
+void rxrpc_sysctl_exit(void)
+{
+ if (rxrpc_sysctl_reg_table)
+ unregister_net_sysctl_table(rxrpc_sysctl_reg_table);
+}
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 722e137df244..9f949abcacef 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1062,12 +1062,13 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
{
- spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
struct htb_sched *q = qdisc_priv(sch);
struct nlattr *nest;
struct tc_htb_glob gopt;
- spin_lock_bh(root_lock);
+ /* Its safe to not acquire qdisc lock. As we hold RTNL,
+ * no change can happen on the qdisc parameters.
+ */
gopt.direct_pkts = q->direct_pkts;
gopt.version = HTB_VER;
@@ -1081,13 +1082,10 @@ static int htb_dump(struct Qdisc *sch, struct sk_buff *skb)
if (nla_put(skb, TCA_HTB_INIT, sizeof(gopt), &gopt) ||
nla_put_u32(skb, TCA_HTB_DIRECT_QLEN, q->direct_qlen))
goto nla_put_failure;
- nla_nest_end(skb, nest);
- spin_unlock_bh(root_lock);
- return skb->len;
+ return nla_nest_end(skb, nest);
nla_put_failure:
- spin_unlock_bh(root_lock);
nla_nest_cancel(skb, nest);
return -1;
}
@@ -1096,11 +1094,12 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
struct sk_buff *skb, struct tcmsg *tcm)
{
struct htb_class *cl = (struct htb_class *)arg;
- spinlock_t *root_lock = qdisc_root_sleeping_lock(sch);
struct nlattr *nest;
struct tc_htb_opt opt;
- spin_lock_bh(root_lock);
+ /* Its safe to not acquire qdisc lock. As we hold RTNL,
+ * no change can happen on the class parameters.
+ */
tcm->tcm_parent = cl->parent ? cl->parent->common.classid : TC_H_ROOT;
tcm->tcm_handle = cl->common.classid;
if (!cl->level && cl->un.leaf.q)
@@ -1128,12 +1127,9 @@ static int htb_dump_class(struct Qdisc *sch, unsigned long arg,
nla_put_u64(skb, TCA_HTB_CEIL64, cl->ceil.rate_bytes_ps))
goto nla_put_failure;
- nla_nest_end(skb, nest);
- spin_unlock_bh(root_lock);
- return skb->len;
+ return nla_nest_end(skb, nest);
nla_put_failure:
- spin_unlock_bh(root_lock);
nla_nest_cancel(skb, nest);
return -1;
}