aboutsummaryrefslogtreecommitdiff
path: root/drivers/net/hyperv
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/hyperv')
-rw-r--r--drivers/net/hyperv/hyperv_net.h256
-rw-r--r--drivers/net/hyperv/netvsc.c592
-rw-r--r--drivers/net/hyperv/netvsc_drv.c885
-rw-r--r--drivers/net/hyperv/rndis_filter.c473
4 files changed, 1257 insertions, 949 deletions
diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h
index 3958adade7eb..d6c25580f8dd 100644
--- a/drivers/net/hyperv/hyperv_net.h
+++ b/drivers/net/hyperv/hyperv_net.h
@@ -34,6 +34,7 @@
#define NDIS_OBJECT_TYPE_RSS_CAPABILITIES 0x88
#define NDIS_OBJECT_TYPE_RSS_PARAMETERS 0x89
+#define NDIS_OBJECT_TYPE_OFFLOAD 0xa7
#define NDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2 2
#define NDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2 2
@@ -118,6 +119,7 @@ struct ndis_recv_scale_param { /* NDIS_RECEIVE_SCALE_PARAMETERS */
/* Fwd declaration */
struct ndis_tcp_ip_checksum_info;
+struct ndis_pkt_8021q_info;
/*
* Represent netvsc packet which contains 1 RNDIS and 1 ethernet frame
@@ -135,14 +137,15 @@ struct hv_netvsc_packet {
u8 page_buf_cnt;
u16 q_idx;
- u32 send_buf_index;
+ u16 total_packets;
+ u32 total_bytes;
+ u32 send_buf_index;
u32 total_data_buflen;
};
struct netvsc_device_info {
unsigned char mac_adr[ETH_ALEN];
- bool link_state; /* 0 - link up, 1 - link down */
int ring_size;
u32 max_num_vrss_chns;
u32 num_chn;
@@ -155,24 +158,33 @@ enum rndis_device_state {
RNDIS_DEV_DATAINITIALIZED,
};
+#define NETVSC_HASH_KEYLEN 40
+
struct rndis_device {
struct net_device *ndev;
enum rndis_device_state state;
- bool link_state;
+
atomic_t new_req_id;
spinlock_t request_lock;
struct list_head req_list;
- unsigned char hw_mac_adr[ETH_ALEN];
+ struct work_struct mcast_work;
+
+ bool link_state; /* 0 - link up, 1 - link down */
+
+ u8 hw_mac_adr[ETH_ALEN];
+ u8 rss_key[NETVSC_HASH_KEYLEN];
+ u16 ind_table[ITAB_NUM];
};
/* Interface */
struct rndis_message;
struct netvsc_device;
-int netvsc_device_add(struct hv_device *device, void *additional_info);
+int netvsc_device_add(struct hv_device *device,
+ const struct netvsc_device_info *info);
void netvsc_device_remove(struct hv_device *device);
int netvsc_send(struct hv_device *device,
struct hv_netvsc_packet *packet,
@@ -181,24 +193,28 @@ int netvsc_send(struct hv_device *device,
struct sk_buff *skb);
void netvsc_linkstatus_callback(struct hv_device *device_obj,
struct rndis_message *resp);
-int netvsc_recv_callback(struct hv_device *device_obj,
- struct hv_netvsc_packet *packet,
- void **data,
- struct ndis_tcp_ip_checksum_info *csum_info,
- struct vmbus_channel *channel,
- u16 vlan_tci);
+int netvsc_recv_callback(struct net_device *net,
+ struct vmbus_channel *channel,
+ void *data, u32 len,
+ const struct ndis_tcp_ip_checksum_info *csum_info,
+ const struct ndis_pkt_8021q_info *vlan);
void netvsc_channel_cb(void *context);
+int netvsc_poll(struct napi_struct *napi, int budget);
int rndis_filter_open(struct netvsc_device *nvdev);
int rndis_filter_close(struct netvsc_device *nvdev);
int rndis_filter_device_add(struct hv_device *dev,
- void *additional_info);
-void rndis_filter_device_remove(struct hv_device *dev);
-int rndis_filter_receive(struct hv_device *dev,
- struct hv_netvsc_packet *pkt,
- void **data,
- struct vmbus_channel *channel);
-
-int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter);
+ struct netvsc_device_info *info);
+void rndis_filter_update(struct netvsc_device *nvdev);
+void rndis_filter_device_remove(struct hv_device *dev,
+ struct netvsc_device *nvdev);
+int rndis_filter_set_rss_param(struct rndis_device *rdev,
+ const u8 *key, int num_queue);
+int rndis_filter_receive(struct net_device *ndev,
+ struct netvsc_device *net_dev,
+ struct hv_device *dev,
+ struct vmbus_channel *channel,
+ void *data, u32 buflen);
+
int rndis_filter_set_device_mac(struct net_device *ndev, char *mac);
void netvsc_switch_datapath(struct net_device *nv_dev, bool vf);
@@ -620,8 +636,9 @@ struct nvsp_message {
#define NETVSC_PACKET_SIZE 4096
-#define VRSS_SEND_TAB_SIZE 16
+#define VRSS_SEND_TAB_SIZE 16 /* must be power of 2 */
#define VRSS_CHANNEL_MAX 64
+#define VRSS_CHANNEL_DEFAULT 8
#define RNDIS_MAX_PKT_DEFAULT 8
#define RNDIS_PKT_ALIGN_DEFAULT 8
@@ -672,7 +689,7 @@ struct net_device_context {
/* point back to our device context */
struct hv_device *device_ctx;
/* netvsc_device */
- struct netvsc_device *nvdev;
+ struct netvsc_device __rcu *nvdev;
/* reconfigure work */
struct delayed_work dwork;
/* last reconfig time */
@@ -682,20 +699,17 @@ struct net_device_context {
/* list protection */
spinlock_t lock;
- struct work_struct work;
u32 msg_enable; /* debug level */
- struct netvsc_stats __percpu *tx_stats;
- struct netvsc_stats __percpu *rx_stats;
+ u32 tx_checksum_mask;
+
+ u32 tx_send_table[VRSS_SEND_TAB_SIZE];
/* Ethtool settings */
u8 duplex;
u32 speed;
struct netvsc_ethtool_stats eth_stats;
- /* the device is going away */
- bool start_remove;
-
/* State to manage the associated VF interface. */
struct net_device __rcu *vf_netdev;
@@ -703,13 +717,27 @@ struct net_device_context {
u32 vf_alloc;
/* Serial number of the VF to team with */
u32 vf_serial;
+
+ bool datapath; /* 0 - synthetic, 1 - VF nic */
+};
+
+/* Per channel data */
+struct netvsc_channel {
+ struct vmbus_channel *channel;
+ const struct vmpacket_descriptor *desc;
+ struct napi_struct napi;
+ struct multi_send_data msd;
+ struct multi_recv_comp mrc;
+ atomic_t queue_sends;
+
+ struct netvsc_stats tx_stats;
+ struct netvsc_stats rx_stats;
};
/* Per netvsc device */
struct netvsc_device {
u32 nvsp_version;
- atomic_t num_outstanding_sends;
wait_queue_head_t wait_drain;
bool destroy;
@@ -727,7 +755,6 @@ struct netvsc_device {
u32 send_section_cnt;
u32 send_section_size;
unsigned long *send_section_map;
- int map_words;
/* Used for NetVSP initialization protocol */
struct completion channel_init_wait;
@@ -735,32 +762,25 @@ struct netvsc_device {
struct nvsp_message revoke_packet;
- struct vmbus_channel *chn_table[VRSS_CHANNEL_MAX];
- u32 send_table[VRSS_SEND_TAB_SIZE];
u32 max_chn;
u32 num_chn;
- spinlock_t sc_lock; /* Protects num_sc_offered variable */
- u32 num_sc_offered;
- atomic_t queue_sends[VRSS_CHANNEL_MAX];
- /* Holds rndis device info */
- void *extension;
+ refcount_t sc_offered;
- int ring_size;
+ struct rndis_device *extension;
- /* The primary channel callback buffer */
- unsigned char *cb_buffer;
- /* The sub channel callback buffer */
- unsigned char *sub_cb_buf;
+ int ring_size;
- struct multi_send_data msd[VRSS_CHANNEL_MAX];
u32 max_pkt; /* max number of pkt in one send, e.g. 8 */
u32 pkt_align; /* alignment bytes, e.g. 8 */
- struct multi_recv_comp mrc[VRSS_CHANNEL_MAX];
atomic_t num_outstanding_recvs;
atomic_t open_cnt;
+
+ struct netvsc_channel chan_table[VRSS_CHANNEL_MAX];
+
+ struct rcu_head rcu;
};
static inline struct netvsc_device *
@@ -939,7 +959,7 @@ struct ndis_pkt_8021q_info {
};
};
-struct ndis_oject_header {
+struct ndis_object_header {
u8 type;
u8 revision;
u16 size;
@@ -947,6 +967,9 @@ struct ndis_oject_header {
#define NDIS_OBJECT_TYPE_DEFAULT 0x80
#define NDIS_OFFLOAD_PARAMETERS_REVISION_3 3
+#define NDIS_OFFLOAD_PARAMETERS_REVISION_2 2
+#define NDIS_OFFLOAD_PARAMETERS_REVISION_1 1
+
#define NDIS_OFFLOAD_PARAMETERS_NO_CHANGE 0
#define NDIS_OFFLOAD_PARAMETERS_LSOV2_DISABLED 1
#define NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED 2
@@ -973,8 +996,135 @@ struct ndis_oject_header {
#define OID_TCP_CONNECTION_OFFLOAD_HARDWARE_CAPABILITIES 0xFC01020F /* query */
#define OID_OFFLOAD_ENCAPSULATION 0x0101010A /* set/query */
+/*
+ * OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES
+ * ndis_type: NDIS_OBJTYPE_OFFLOAD
+ */
+
+#define NDIS_OFFLOAD_ENCAP_NONE 0x0000
+#define NDIS_OFFLOAD_ENCAP_NULL 0x0001
+#define NDIS_OFFLOAD_ENCAP_8023 0x0002
+#define NDIS_OFFLOAD_ENCAP_8023PQ 0x0004
+#define NDIS_OFFLOAD_ENCAP_8023PQ_OOB 0x0008
+#define NDIS_OFFLOAD_ENCAP_RFC1483 0x0010
+
+struct ndis_csum_offload {
+ u32 ip4_txenc;
+ u32 ip4_txcsum;
+#define NDIS_TXCSUM_CAP_IP4OPT 0x001
+#define NDIS_TXCSUM_CAP_TCP4OPT 0x004
+#define NDIS_TXCSUM_CAP_TCP4 0x010
+#define NDIS_TXCSUM_CAP_UDP4 0x040
+#define NDIS_TXCSUM_CAP_IP4 0x100
+
+#define NDIS_TXCSUM_ALL_TCP4 (NDIS_TXCSUM_CAP_TCP4 | NDIS_TXCSUM_CAP_TCP4OPT)
+
+ u32 ip4_rxenc;
+ u32 ip4_rxcsum;
+#define NDIS_RXCSUM_CAP_IP4OPT 0x001
+#define NDIS_RXCSUM_CAP_TCP4OPT 0x004
+#define NDIS_RXCSUM_CAP_TCP4 0x010
+#define NDIS_RXCSUM_CAP_UDP4 0x040
+#define NDIS_RXCSUM_CAP_IP4 0x100
+ u32 ip6_txenc;
+ u32 ip6_txcsum;
+#define NDIS_TXCSUM_CAP_IP6EXT 0x001
+#define NDIS_TXCSUM_CAP_TCP6OPT 0x004
+#define NDIS_TXCSUM_CAP_TCP6 0x010
+#define NDIS_TXCSUM_CAP_UDP6 0x040
+ u32 ip6_rxenc;
+ u32 ip6_rxcsum;
+#define NDIS_RXCSUM_CAP_IP6EXT 0x001
+#define NDIS_RXCSUM_CAP_TCP6OPT 0x004
+#define NDIS_RXCSUM_CAP_TCP6 0x010
+#define NDIS_RXCSUM_CAP_UDP6 0x040
+
+#define NDIS_TXCSUM_ALL_TCP6 (NDIS_TXCSUM_CAP_TCP6 | \
+ NDIS_TXCSUM_CAP_TCP6OPT | \
+ NDIS_TXCSUM_CAP_IP6EXT)
+};
+
+struct ndis_lsov1_offload {
+ u32 encap;
+ u32 maxsize;
+ u32 minsegs;
+ u32 opts;
+};
+
+struct ndis_ipsecv1_offload {
+ u32 encap;
+ u32 ah_esp;
+ u32 xport_tun;
+ u32 ip4_opts;
+ u32 flags;
+ u32 ip4_ah;
+ u32 ip4_esp;
+};
+
+struct ndis_lsov2_offload {
+ u32 ip4_encap;
+ u32 ip4_maxsz;
+ u32 ip4_minsg;
+ u32 ip6_encap;
+ u32 ip6_maxsz;
+ u32 ip6_minsg;
+ u32 ip6_opts;
+#define NDIS_LSOV2_CAP_IP6EXT 0x001
+#define NDIS_LSOV2_CAP_TCP6OPT 0x004
+
+#define NDIS_LSOV2_CAP_IP6 (NDIS_LSOV2_CAP_IP6EXT | \
+ NDIS_LSOV2_CAP_TCP6OPT)
+};
+
+struct ndis_ipsecv2_offload {
+ u32 encap;
+ u16 ip6;
+ u16 ip4opt;
+ u16 ip6ext;
+ u16 ah;
+ u16 esp;
+ u16 ah_esp;
+ u16 xport;
+ u16 tun;
+ u16 xport_tun;
+ u16 lso;
+ u16 extseq;
+ u32 udp_esp;
+ u32 auth;
+ u32 crypto;
+ u32 sa_caps;
+};
+
+struct ndis_rsc_offload {
+ u16 ip4;
+ u16 ip6;
+};
+
+struct ndis_encap_offload {
+ u32 flags;
+ u32 maxhdr;
+};
+
+struct ndis_offload {
+ struct ndis_object_header header;
+ struct ndis_csum_offload csum;
+ struct ndis_lsov1_offload lsov1;
+ struct ndis_ipsecv1_offload ipsecv1;
+ struct ndis_lsov2_offload lsov2;
+ u32 flags;
+ /* NDIS >= 6.1 */
+ struct ndis_ipsecv2_offload ipsecv2;
+ /* NDIS >= 6.30 */
+ struct ndis_rsc_offload rsc;
+ struct ndis_encap_offload encap_gre;
+};
+
+#define NDIS_OFFLOAD_SIZE sizeof(struct ndis_offload)
+#define NDIS_OFFLOAD_SIZE_6_0 offsetof(struct ndis_offload, ipsecv2)
+#define NDIS_OFFLOAD_SIZE_6_1 offsetof(struct ndis_offload, rsc)
+
struct ndis_offload_params {
- struct ndis_oject_header header;
+ struct ndis_object_header header;
u8 ip_v4_csum;
u8 tcp_ip_v4_csum;
u8 udp_ip_v4_csum;
@@ -1279,9 +1429,6 @@ struct rndis_message {
((void *) rndis_msg)
-#define __struct_bcount(x)
-
-
#define RNDIS_HEADER_SIZE (sizeof(struct rndis_message) - \
sizeof(union rndis_message_container))
@@ -1301,15 +1448,10 @@ struct rndis_message {
#define NDIS_PACKET_TYPE_FUNCTIONAL 0x00000400
#define NDIS_PACKET_TYPE_MAC_FRAME 0x00000800
-#define INFO_IPV4 2
-#define INFO_IPV6 4
-#define INFO_TCP 2
-#define INFO_UDP 4
-
#define TRANSPORT_INFO_NOT_IP 0
-#define TRANSPORT_INFO_IPV4_TCP ((INFO_IPV4 << 16) | INFO_TCP)
-#define TRANSPORT_INFO_IPV4_UDP ((INFO_IPV4 << 16) | INFO_UDP)
-#define TRANSPORT_INFO_IPV6_TCP ((INFO_IPV6 << 16) | INFO_TCP)
-#define TRANSPORT_INFO_IPV6_UDP ((INFO_IPV6 << 16) | INFO_UDP)
+#define TRANSPORT_INFO_IPV4_TCP 0x01
+#define TRANSPORT_INFO_IPV4_UDP 0x02
+#define TRANSPORT_INFO_IPV6_TCP 0x10
+#define TRANSPORT_INFO_IPV6_UDP 0x20
#endif /* _HYPERV_NET_H */
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 86e5749226ef..0a9167dd72fb 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -57,6 +57,8 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf)
sizeof(struct nvsp_message),
(unsigned long)init_pkt,
VM_PKT_DATA_INBAND, 0);
+
+ net_device_ctx->datapath = vf;
}
static struct netvsc_device *alloc_net_device(void)
@@ -67,14 +69,8 @@ static struct netvsc_device *alloc_net_device(void)
if (!net_device)
return NULL;
- net_device->cb_buffer = kzalloc(NETVSC_PACKET_SIZE, GFP_KERNEL);
- if (!net_device->cb_buffer) {
- kfree(net_device);
- return NULL;
- }
-
- net_device->mrc[0].buf = vzalloc(NETVSC_RECVSLOT_MAX *
- sizeof(struct recv_comp_data));
+ net_device->chan_table[0].mrc.buf
+ = vzalloc(NETVSC_RECVSLOT_MAX * sizeof(struct recv_comp_data));
init_waitqueue_head(&net_device->wait_drain);
net_device->destroy = false;
@@ -86,41 +82,21 @@ static struct netvsc_device *alloc_net_device(void)
return net_device;
}
-static void free_netvsc_device(struct netvsc_device *nvdev)
+static void free_netvsc_device(struct rcu_head *head)
{
+ struct netvsc_device *nvdev
+ = container_of(head, struct netvsc_device, rcu);
int i;
for (i = 0; i < VRSS_CHANNEL_MAX; i++)
- vfree(nvdev->mrc[i].buf);
+ vfree(nvdev->chan_table[i].mrc.buf);
- kfree(nvdev->cb_buffer);
kfree(nvdev);
}
-static struct netvsc_device *get_outbound_net_device(struct hv_device *device)
+static void free_netvsc_device_rcu(struct netvsc_device *nvdev)
{
- struct netvsc_device *net_device = hv_device_to_netvsc_device(device);
-
- if (net_device && net_device->destroy)
- net_device = NULL;
-
- return net_device;
-}
-
-static struct netvsc_device *get_inbound_net_device(struct hv_device *device)
-{
- struct netvsc_device *net_device = hv_device_to_netvsc_device(device);
-
- if (!net_device)
- goto get_in_err;
-
- if (net_device->destroy &&
- atomic_read(&net_device->num_outstanding_sends) == 0 &&
- atomic_read(&net_device->num_outstanding_recvs) == 0)
- net_device = NULL;
-
-get_in_err:
- return net_device;
+ call_rcu(&nvdev->rcu, free_netvsc_device);
}
static void netvsc_destroy_buf(struct hv_device *device)
@@ -151,6 +127,13 @@ static void netvsc_destroy_buf(struct hv_device *device)
sizeof(struct nvsp_message),
(unsigned long)revoke_packet,
VM_PKT_DATA_INBAND, 0);
+ /* If the failure is because the channel is rescinded;
+ * ignore the failure since we cannot send on a rescinded
+ * channel. This would allow us to properly cleanup
+ * even when the channel is rescinded.
+ */
+ if (device->channel->rescind)
+ ret = 0;
/*
* If we failed here, we might as well return and
* have a leak rather than continue and a bugchk
@@ -211,6 +194,15 @@ static void netvsc_destroy_buf(struct hv_device *device)
sizeof(struct nvsp_message),
(unsigned long)revoke_packet,
VM_PKT_DATA_INBAND, 0);
+
+ /* If the failure is because the channel is rescinded;
+ * ignore the failure since we cannot send on a rescinded
+ * channel. This would allow us to properly cleanup
+ * even when the channel is rescinded.
+ */
+ if (device->channel->rescind)
+ ret = 0;
+
/* If we failed here, we might as well return and
* have a leak rather than continue and a bugchk
*/
@@ -243,17 +235,15 @@ static void netvsc_destroy_buf(struct hv_device *device)
kfree(net_device->send_section_map);
}
-static int netvsc_init_buf(struct hv_device *device)
+static int netvsc_init_buf(struct hv_device *device,
+ struct netvsc_device *net_device)
{
int ret = 0;
- struct netvsc_device *net_device;
struct nvsp_message *init_packet;
struct net_device *ndev;
+ size_t map_words;
int node;
- net_device = get_outbound_net_device(device);
- if (!net_device)
- return -ENODEV;
ndev = hv_get_drvdata(device);
node = cpu_to_node(device->channel->target_cpu);
@@ -284,9 +274,7 @@ static int netvsc_init_buf(struct hv_device *device)
/* Notify the NetVsp of the gpadl handle */
init_packet = &net_device->channel_init_pkt;
-
memset(init_packet, 0, sizeof(struct nvsp_message));
-
init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_RECV_BUF;
init_packet->msg.v1_msg.send_recv_buf.
gpadl_handle = net_device->recv_buf_gpadl_handle;
@@ -414,11 +402,9 @@ static int netvsc_init_buf(struct hv_device *device)
net_device->send_section_size, net_device->send_section_cnt);
/* Setup state for managing the send buffer. */
- net_device->map_words = DIV_ROUND_UP(net_device->send_section_cnt,
- BITS_PER_LONG);
+ map_words = DIV_ROUND_UP(net_device->send_section_cnt, BITS_PER_LONG);
- net_device->send_section_map = kcalloc(net_device->map_words,
- sizeof(ulong), GFP_KERNEL);
+ net_device->send_section_map = kcalloc(map_words, sizeof(ulong), GFP_KERNEL);
if (net_device->send_section_map == NULL) {
ret = -ENOMEM;
goto cleanup;
@@ -487,20 +473,15 @@ static int negotiate_nvsp_ver(struct hv_device *device,
return ret;
}
-static int netvsc_connect_vsp(struct hv_device *device)
+static int netvsc_connect_vsp(struct hv_device *device,
+ struct netvsc_device *net_device)
{
- int ret;
- struct netvsc_device *net_device;
- struct nvsp_message *init_packet;
- int ndis_version;
const u32 ver_list[] = {
NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
- NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5 };
- int i;
-
- net_device = get_outbound_net_device(device);
- if (!net_device)
- return -ENODEV;
+ NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5
+ };
+ struct nvsp_message *init_packet;
+ int ndis_version, i, ret;
init_packet = &net_device->channel_init_pkt;
@@ -550,7 +531,7 @@ static int netvsc_connect_vsp(struct hv_device *device)
net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
net_device->send_buf_size = NETVSC_SEND_BUFFER_SIZE;
- ret = netvsc_init_buf(device);
+ ret = netvsc_init_buf(device, net_device);
cleanup:
return ret;
@@ -569,10 +550,11 @@ void netvsc_device_remove(struct hv_device *device)
struct net_device *ndev = hv_get_drvdata(device);
struct net_device_context *net_device_ctx = netdev_priv(ndev);
struct netvsc_device *net_device = net_device_ctx->nvdev;
+ int i;
netvsc_disconnect_vsp(device);
- net_device_ctx->nvdev = NULL;
+ RCU_INIT_POINTER(net_device_ctx->nvdev, NULL);
/*
* At this point, no one should be accessing net_device
@@ -583,9 +565,12 @@ void netvsc_device_remove(struct hv_device *device)
/* Now, we can close the channel safely */
vmbus_close(device->channel);
+ /* And dissassociate NAPI context from device */
+ for (i = 0; i < net_device->num_chn; i++)
+ netif_napi_del(&net_device->chan_table[i].napi);
+
/* Release all resources */
- vfree(net_device->sub_cb_buf);
- free_netvsc_device(net_device);
+ free_netvsc_device_rcu(net_device);
}
#define RING_AVAIL_PERCENT_HIWATER 20
@@ -614,39 +599,44 @@ static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
static void netvsc_send_tx_complete(struct netvsc_device *net_device,
struct vmbus_channel *incoming_channel,
struct hv_device *device,
- struct vmpacket_descriptor *packet)
+ const struct vmpacket_descriptor *desc,
+ int budget)
{
- struct sk_buff *skb = (struct sk_buff *)(unsigned long)packet->trans_id;
+ struct sk_buff *skb = (struct sk_buff *)(unsigned long)desc->trans_id;
struct net_device *ndev = hv_get_drvdata(device);
- struct net_device_context *net_device_ctx = netdev_priv(ndev);
struct vmbus_channel *channel = device->channel;
- int num_outstanding_sends;
u16 q_idx = 0;
int queue_sends;
/* Notify the layer above us */
if (likely(skb)) {
- struct hv_netvsc_packet *nvsc_packet
+ const struct hv_netvsc_packet *packet
= (struct hv_netvsc_packet *)skb->cb;
- u32 send_index = nvsc_packet->send_buf_index;
+ u32 send_index = packet->send_buf_index;
+ struct netvsc_stats *tx_stats;
if (send_index != NETVSC_INVALID_INDEX)
netvsc_free_send_slot(net_device, send_index);
- q_idx = nvsc_packet->q_idx;
+ q_idx = packet->q_idx;
channel = incoming_channel;
- dev_consume_skb_any(skb);
+ tx_stats = &net_device->chan_table[q_idx].tx_stats;
+
+ u64_stats_update_begin(&tx_stats->syncp);
+ tx_stats->packets += packet->total_packets;
+ tx_stats->bytes += packet->total_bytes;
+ u64_stats_update_end(&tx_stats->syncp);
+
+ napi_consume_skb(skb, budget);
}
- num_outstanding_sends =
- atomic_dec_return(&net_device->num_outstanding_sends);
- queue_sends = atomic_dec_return(&net_device->queue_sends[q_idx]);
+ queue_sends =
+ atomic_dec_return(&net_device->chan_table[q_idx].queue_sends);
- if (net_device->destroy && num_outstanding_sends == 0)
+ if (net_device->destroy && queue_sends == 0)
wake_up(&net_device->wait_drain);
if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
- !net_device_ctx->start_remove &&
(hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER ||
queue_sends < 1))
netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx));
@@ -655,14 +645,12 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device,
static void netvsc_send_completion(struct netvsc_device *net_device,
struct vmbus_channel *incoming_channel,
struct hv_device *device,
- struct vmpacket_descriptor *packet)
+ const struct vmpacket_descriptor *desc,
+ int budget)
{
- struct nvsp_message *nvsp_packet;
+ struct nvsp_message *nvsp_packet = hv_pkt_data(desc);
struct net_device *ndev = hv_get_drvdata(device);
- nvsp_packet = (struct nvsp_message *)((unsigned long)packet +
- (packet->offset8 << 3));
-
switch (nvsp_packet->hdr.msg_type) {
case NVSP_MSG_TYPE_INIT_COMPLETE:
case NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE:
@@ -676,7 +664,7 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE:
netvsc_send_tx_complete(net_device, incoming_channel,
- device, packet);
+ device, desc, budget);
break;
default:
@@ -688,27 +676,15 @@ static void netvsc_send_completion(struct netvsc_device *net_device,
static u32 netvsc_get_next_send_section(struct netvsc_device *net_device)
{
- unsigned long index;
- u32 max_words = net_device->map_words;
- unsigned long *map_addr = (unsigned long *)net_device->send_section_map;
- u32 section_cnt = net_device->send_section_cnt;
- int ret_val = NETVSC_INVALID_INDEX;
- int i;
- int prev_val;
-
- for (i = 0; i < max_words; i++) {
- if (!~(map_addr[i]))
- continue;
- index = ffz(map_addr[i]);
- prev_val = sync_test_and_set_bit(index, &map_addr[i]);
- if (prev_val)
- continue;
- if ((index + (i * BITS_PER_LONG)) >= section_cnt)
- break;
- ret_val = (index + (i * BITS_PER_LONG));
- break;
+ unsigned long *map_addr = net_device->send_section_map;
+ unsigned int i;
+
+ for_each_clear_bit(i, map_addr, net_device->send_section_cnt) {
+ if (sync_test_and_set_bit(i, map_addr) == 0)
+ return i;
}
- return ret_val;
+
+ return NETVSC_INVALID_INDEX;
}
static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
@@ -723,8 +699,6 @@ static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
char *dest = start + (section_index * net_device->send_section_size)
+ pend_size;
int i;
- bool is_data_pkt = (skb != NULL) ? true : false;
- bool xmit_more = (skb != NULL) ? skb->xmit_more : false;
u32 msg_size = 0;
u32 padding = 0;
u32 remain = packet->total_data_buflen % net_device->pkt_align;
@@ -732,8 +706,7 @@ static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
packet->page_buf_cnt;
/* Add padding */
- if (is_data_pkt && xmit_more && remain &&
- !packet->cp_partial) {
+ if (skb->xmit_more && remain && !packet->cp_partial) {
padding = net_device->pkt_align - remain;
rndis_msg->msg_len += padding;
packet->total_data_buflen += padding;
@@ -765,14 +738,15 @@ static inline int netvsc_send_pkt(
struct sk_buff *skb)
{
struct nvsp_message nvmsg;
- u16 q_idx = packet->q_idx;
- struct vmbus_channel *out_channel = net_device->chn_table[q_idx];
+ struct netvsc_channel *nvchan
+ = &net_device->chan_table[packet->q_idx];
+ struct vmbus_channel *out_channel = nvchan->channel;
struct net_device *ndev = hv_get_drvdata(device);
+ struct netdev_queue *txq = netdev_get_tx_queue(ndev, packet->q_idx);
u64 req_id;
int ret;
struct hv_page_buffer *pgbuf;
u32 ring_avail = hv_ringbuf_avail_percent(&out_channel->outbound);
- bool xmit_more = (skb != NULL) ? skb->xmit_more : false;
nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
if (skb != NULL) {
@@ -796,16 +770,6 @@ static inline int netvsc_send_pkt(
if (out_channel->rescind)
return -ENODEV;
- /*
- * It is possible that once we successfully place this packet
- * on the ringbuffer, we may stop the queue. In that case, we want
- * to notify the host independent of the xmit_more flag. We don't
- * need to be precise here; in the worst case we may signal the host
- * unnecessarily.
- */
- if (ring_avail < (RING_AVAIL_PERCENT_LOWATER + 1))
- xmit_more = false;
-
if (packet->page_buf_cnt) {
pgbuf = packet->cp_partial ? (*pb) +
packet->rmsg_pgcnt : (*pb);
@@ -815,35 +779,24 @@ static inline int netvsc_send_pkt(
&nvmsg,
sizeof(struct nvsp_message),
req_id,
- VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED,
- !xmit_more);
+ VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
} else {
ret = vmbus_sendpacket_ctl(out_channel, &nvmsg,
sizeof(struct nvsp_message),
req_id,
VM_PKT_DATA_INBAND,
- VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED,
- !xmit_more);
+ VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
}
if (ret == 0) {
- atomic_inc(&net_device->num_outstanding_sends);
- atomic_inc(&net_device->queue_sends[q_idx]);
-
- if (ring_avail < RING_AVAIL_PERCENT_LOWATER) {
- netif_tx_stop_queue(netdev_get_tx_queue(ndev, q_idx));
+ atomic_inc_return(&nvchan->queue_sends);
- if (atomic_read(&net_device->
- queue_sends[q_idx]) < 1)
- netif_tx_wake_queue(netdev_get_tx_queue(
- ndev, q_idx));
- }
+ if (ring_avail < RING_AVAIL_PERCENT_LOWATER)
+ netif_tx_stop_queue(txq);
} else if (ret == -EAGAIN) {
- netif_tx_stop_queue(netdev_get_tx_queue(
- ndev, q_idx));
- if (atomic_read(&net_device->queue_sends[q_idx]) < 1) {
- netif_tx_wake_queue(netdev_get_tx_queue(
- ndev, q_idx));
+ netif_tx_stop_queue(txq);
+ if (atomic_read(&nvchan->queue_sends) < 1) {
+ netif_tx_wake_queue(txq);
ret = -ENOSPC;
}
} else {
@@ -872,10 +825,9 @@ int netvsc_send(struct hv_device *device,
struct hv_page_buffer **pb,
struct sk_buff *skb)
{
- struct netvsc_device *net_device;
+ struct netvsc_device *net_device = hv_device_to_netvsc_device(device);
int ret = 0;
- struct vmbus_channel *out_channel;
- u16 q_idx = packet->q_idx;
+ struct netvsc_channel *nvchan;
u32 pktlen = packet->total_data_buflen, msd_len = 0;
unsigned int section_index = NETVSC_INVALID_INDEX;
struct multi_send_data *msdp;
@@ -884,19 +836,18 @@ int netvsc_send(struct hv_device *device,
bool try_batch;
bool xmit_more = (skb != NULL) ? skb->xmit_more : false;
- net_device = get_outbound_net_device(device);
- if (!net_device)
+ /* If device is rescinded, return error and packet will get dropped. */
+ if (unlikely(net_device->destroy))
return -ENODEV;
/* We may race with netvsc_connect_vsp()/netvsc_init_buf() and get
* here before the negotiation with the host is finished and
* send_section_map may not be allocated yet.
*/
- if (!net_device->send_section_map)
+ if (unlikely(!net_device->send_section_map))
return -EAGAIN;
- out_channel = net_device->chn_table[q_idx];
-
+ nvchan = &net_device->chan_table[packet->q_idx];
packet->send_buf_index = NETVSC_INVALID_INDEX;
packet->cp_partial = false;
@@ -908,15 +859,12 @@ int netvsc_send(struct hv_device *device,
goto send_now;
}
- msdp = &net_device->msd[q_idx];
-
/* batch packets in send buffer if possible */
+ msdp = &nvchan->msd;
if (msdp->pkt)
msd_len = msdp->pkt->total_data_buflen;
- try_batch = (skb != NULL) && msd_len > 0 && msdp->count <
- net_device->max_pkt;
-
+ try_batch = msd_len > 0 && msdp->count < net_device->max_pkt;
if (try_batch && msd_len + pktlen + net_device->pkt_align <
net_device->send_section_size) {
section_index = msdp->pkt->send_buf_index;
@@ -926,7 +874,7 @@ int netvsc_send(struct hv_device *device,
section_index = msdp->pkt->send_buf_index;
packet->cp_partial = true;
- } else if ((skb != NULL) && pktlen + net_device->pkt_align <
+ } else if (pktlen + net_device->pkt_align <
net_device->send_section_size) {
section_index = netvsc_get_next_send_section(net_device);
if (section_index != NETVSC_INVALID_INDEX) {
@@ -950,6 +898,11 @@ int netvsc_send(struct hv_device *device,
packet->total_data_buflen += msd_len;
}
+ if (msdp->pkt) {
+ packet->total_packets += msdp->pkt->total_packets;
+ packet->total_bytes += msdp->pkt->total_bytes;
+ }
+
if (msdp->skb)
dev_consume_skb_any(msdp->skb);
@@ -1011,8 +964,9 @@ static int netvsc_send_recv_completion(struct vmbus_channel *channel,
static inline void count_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx,
u32 *filled, u32 *avail)
{
- u32 first = nvdev->mrc[q_idx].first;
- u32 next = nvdev->mrc[q_idx].next;
+ struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
+ u32 first = mrc->first;
+ u32 next = mrc->next;
*filled = (first > next) ? NETVSC_RECVSLOT_MAX - first + next :
next - first;
@@ -1024,26 +978,26 @@ static inline void count_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx,
static inline struct recv_comp_data *read_recv_comp_slot(struct netvsc_device
*nvdev, u16 q_idx)
{
+ struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
u32 filled, avail;
- if (!nvdev->mrc[q_idx].buf)
+ if (unlikely(!mrc->buf))
return NULL;
count_recv_comp_slot(nvdev, q_idx, &filled, &avail);
if (!filled)
return NULL;
- return nvdev->mrc[q_idx].buf + nvdev->mrc[q_idx].first *
- sizeof(struct recv_comp_data);
+ return mrc->buf + mrc->first * sizeof(struct recv_comp_data);
}
/* Put the first filled slot back to available pool */
static inline void put_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx)
{
+ struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
int num_recv;
- nvdev->mrc[q_idx].first = (nvdev->mrc[q_idx].first + 1) %
- NETVSC_RECVSLOT_MAX;
+ mrc->first = (mrc->first + 1) % NETVSC_RECVSLOT_MAX;
num_recv = atomic_dec_return(&nvdev->num_outstanding_recvs);
@@ -1078,13 +1032,14 @@ static void netvsc_chk_recv_comp(struct netvsc_device *nvdev,
static inline struct recv_comp_data *get_recv_comp_slot(
struct netvsc_device *nvdev, struct vmbus_channel *channel, u16 q_idx)
{
+ struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc;
u32 filled, avail, next;
struct recv_comp_data *rcd;
- if (!nvdev->recv_section)
+ if (unlikely(!nvdev->recv_section))
return NULL;
- if (!nvdev->mrc[q_idx].buf)
+ if (unlikely(!mrc->buf))
return NULL;
if (atomic_read(&nvdev->num_outstanding_recvs) >
@@ -1095,112 +1050,91 @@ static inline struct recv_comp_data *get_recv_comp_slot(
if (!avail)
return NULL;
- next = nvdev->mrc[q_idx].next;
- rcd = nvdev->mrc[q_idx].buf + next * sizeof(struct recv_comp_data);
- nvdev->mrc[q_idx].next = (next + 1) % NETVSC_RECVSLOT_MAX;
+ next = mrc->next;
+ rcd = mrc->buf + next * sizeof(struct recv_comp_data);
+ mrc->next = (next + 1) % NETVSC_RECVSLOT_MAX;
atomic_inc(&nvdev->num_outstanding_recvs);
return rcd;
}
-static void netvsc_receive(struct netvsc_device *net_device,
- struct vmbus_channel *channel,
- struct hv_device *device,
- struct vmpacket_descriptor *packet)
+static int netvsc_receive(struct net_device *ndev,
+ struct netvsc_device *net_device,
+ struct net_device_context *net_device_ctx,
+ struct hv_device *device,
+ struct vmbus_channel *channel,
+ const struct vmpacket_descriptor *desc,
+ struct nvsp_message *nvsp)
{
- struct vmtransfer_page_packet_header *vmxferpage_packet;
- struct nvsp_message *nvsp_packet;
- struct hv_netvsc_packet nv_pkt;
- struct hv_netvsc_packet *netvsc_packet = &nv_pkt;
+ const struct vmtransfer_page_packet_header *vmxferpage_packet
+ = container_of(desc, const struct vmtransfer_page_packet_header, d);
+ u16 q_idx = channel->offermsg.offer.sub_channel_index;
+ char *recv_buf = net_device->recv_buf;
u32 status = NVSP_STAT_SUCCESS;
int i;
int count = 0;
- struct net_device *ndev = hv_get_drvdata(device);
- void *data;
int ret;
- struct recv_comp_data *rcd;
- u16 q_idx = channel->offermsg.offer.sub_channel_index;
-
- /*
- * All inbound packets other than send completion should be xfer page
- * packet
- */
- if (packet->type != VM_PKT_DATA_USING_XFER_PAGES) {
- netdev_err(ndev, "Unknown packet type received - %d\n",
- packet->type);
- return;
- }
-
- nvsp_packet = (struct nvsp_message *)((unsigned long)packet +
- (packet->offset8 << 3));
/* Make sure this is a valid nvsp packet */
- if (nvsp_packet->hdr.msg_type !=
- NVSP_MSG1_TYPE_SEND_RNDIS_PKT) {
- netdev_err(ndev, "Unknown nvsp packet type received-"
- " %d\n", nvsp_packet->hdr.msg_type);
- return;
+ if (unlikely(nvsp->hdr.msg_type != NVSP_MSG1_TYPE_SEND_RNDIS_PKT)) {
+ netif_err(net_device_ctx, rx_err, ndev,
+ "Unknown nvsp packet type received %u\n",
+ nvsp->hdr.msg_type);
+ return 0;
}
- vmxferpage_packet = (struct vmtransfer_page_packet_header *)packet;
-
- if (vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID) {
- netdev_err(ndev, "Invalid xfer page set id - "
- "expecting %x got %x\n", NETVSC_RECEIVE_BUFFER_ID,
- vmxferpage_packet->xfer_pageset_id);
- return;
+ if (unlikely(vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID)) {
+ netif_err(net_device_ctx, rx_err, ndev,
+ "Invalid xfer page set id - expecting %x got %x\n",
+ NETVSC_RECEIVE_BUFFER_ID,
+ vmxferpage_packet->xfer_pageset_id);
+ return 0;
}
count = vmxferpage_packet->range_cnt;
/* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */
for (i = 0; i < count; i++) {
- /* Initialize the netvsc packet */
- data = (void *)((unsigned long)net_device->
- recv_buf + vmxferpage_packet->ranges[i].byte_offset);
- netvsc_packet->total_data_buflen =
- vmxferpage_packet->ranges[i].byte_count;
+ void *data = recv_buf
+ + vmxferpage_packet->ranges[i].byte_offset;
+ u32 buflen = vmxferpage_packet->ranges[i].byte_count;
/* Pass it to the upper layer */
- status = rndis_filter_receive(device, netvsc_packet, &data,
- channel);
+ status = rndis_filter_receive(ndev, net_device, device,
+ channel, data, buflen);
}
- if (!net_device->mrc[q_idx].buf) {
+ if (net_device->chan_table[q_idx].mrc.buf) {
+ struct recv_comp_data *rcd;
+
+ rcd = get_recv_comp_slot(net_device, channel, q_idx);
+ if (rcd) {
+ rcd->tid = vmxferpage_packet->d.trans_id;
+ rcd->status = status;
+ } else {
+ netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
+ q_idx, vmxferpage_packet->d.trans_id);
+ }
+ } else {
ret = netvsc_send_recv_completion(channel,
vmxferpage_packet->d.trans_id,
status);
if (ret)
netdev_err(ndev, "Recv_comp q:%hd, tid:%llx, err:%d\n",
q_idx, vmxferpage_packet->d.trans_id, ret);
- return;
- }
-
- rcd = get_recv_comp_slot(net_device, channel, q_idx);
-
- if (!rcd) {
- netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n",
- q_idx, vmxferpage_packet->d.trans_id);
- return;
}
-
- rcd->tid = vmxferpage_packet->d.trans_id;
- rcd->status = status;
+ return count;
}
static void netvsc_send_table(struct hv_device *hdev,
struct nvsp_message *nvmsg)
{
- struct netvsc_device *nvscdev;
struct net_device *ndev = hv_get_drvdata(hdev);
+ struct net_device_context *net_device_ctx = netdev_priv(ndev);
int i;
u32 count, *tab;
- nvscdev = get_outbound_net_device(hdev);
- if (!nvscdev)
- return;
-
count = nvmsg->msg.v5_msg.send_table.count;
if (count != VRSS_SEND_TAB_SIZE) {
netdev_err(ndev, "Received wrong send-table size:%u\n", count);
@@ -1211,7 +1145,7 @@ static void netvsc_send_table(struct hv_device *hdev,
nvmsg->msg.v5_msg.send_table.offset);
for (i = 0; i < count; i++)
- nvscdev->send_table[i] = tab[i];
+ net_device_ctx->tx_send_table[i] = tab[i];
}
static void netvsc_send_vf(struct net_device_context *net_device_ctx,
@@ -1236,26 +1170,25 @@ static inline void netvsc_receive_inband(struct hv_device *hdev,
}
}
-static void netvsc_process_raw_pkt(struct hv_device *device,
- struct vmbus_channel *channel,
- struct netvsc_device *net_device,
- struct net_device *ndev,
- u64 request_id,
- struct vmpacket_descriptor *desc)
+static int netvsc_process_raw_pkt(struct hv_device *device,
+ struct vmbus_channel *channel,
+ struct netvsc_device *net_device,
+ struct net_device *ndev,
+ const struct vmpacket_descriptor *desc,
+ int budget)
{
- struct nvsp_message *nvmsg;
struct net_device_context *net_device_ctx = netdev_priv(ndev);
-
- nvmsg = (struct nvsp_message *)((unsigned long)
- desc + (desc->offset8 << 3));
+ struct nvsp_message *nvmsg = hv_pkt_data(desc);
switch (desc->type) {
case VM_PKT_COMP:
- netvsc_send_completion(net_device, channel, device, desc);
+ netvsc_send_completion(net_device, channel, device,
+ desc, budget);
break;
case VM_PKT_DATA_USING_XFER_PAGES:
- netvsc_receive(net_device, channel, device, desc);
+ return netvsc_receive(ndev, net_device, net_device_ctx,
+ device, channel, desc, nvmsg);
break;
case VM_PKT_DATA_INBAND:
@@ -1264,112 +1197,85 @@ static void netvsc_process_raw_pkt(struct hv_device *device,
default:
netdev_err(ndev, "unhandled packet type %d, tid %llx\n",
- desc->type, request_id);
+ desc->type, desc->trans_id);
break;
}
+
+ return 0;
}
-void netvsc_channel_cb(void *context)
+static struct hv_device *netvsc_channel_to_device(struct vmbus_channel *channel)
{
- int ret;
- struct vmbus_channel *channel = (struct vmbus_channel *)context;
+ struct vmbus_channel *primary = channel->primary_channel;
+
+ return primary ? primary->device_obj : channel->device_obj;
+}
+
+/* Network processing softirq
+ * Process data in incoming ring buffer from host
+ * Stops when ring is empty or budget is met or exceeded.
+ */
+int netvsc_poll(struct napi_struct *napi, int budget)
+{
+ struct netvsc_channel *nvchan
+ = container_of(napi, struct netvsc_channel, napi);
+ struct vmbus_channel *channel = nvchan->channel;
+ struct hv_device *device = netvsc_channel_to_device(channel);
u16 q_idx = channel->offermsg.offer.sub_channel_index;
- struct hv_device *device;
- struct netvsc_device *net_device;
- u32 bytes_recvd;
- u64 request_id;
- struct vmpacket_descriptor *desc;
- unsigned char *buffer;
- int bufferlen = NETVSC_PACKET_SIZE;
- struct net_device *ndev;
- bool need_to_commit = false;
+ struct net_device *ndev = hv_get_drvdata(device);
+ struct netvsc_device *net_device = net_device_to_netvsc_device(ndev);
+ int work_done = 0;
- if (channel->primary_channel != NULL)
- device = channel->primary_channel->device_obj;
- else
- device = channel->device_obj;
+ /* If starting a new interval */
+ if (!nvchan->desc)
+ nvchan->desc = hv_pkt_iter_first(channel);
- net_device = get_inbound_net_device(device);
- if (!net_device)
- return;
- ndev = hv_get_drvdata(device);
- buffer = get_per_channel_state(channel);
-
- /* commit_rd_index() -> hv_signal_on_read() needs this. */
- init_cached_read_index(channel);
-
- do {
- desc = get_next_pkt_raw(channel);
- if (desc != NULL) {
- netvsc_process_raw_pkt(device,
- channel,
- net_device,
- ndev,
- desc->trans_id,
- desc);
-
- put_pkt_raw(channel, desc);
- need_to_commit = true;
- continue;
- }
- if (need_to_commit) {
- need_to_commit = false;
- commit_rd_index(channel);
- }
+ while (nvchan->desc && work_done < budget) {
+ work_done += netvsc_process_raw_pkt(device, channel, net_device,
+ ndev, nvchan->desc, budget);
+ nvchan->desc = hv_pkt_iter_next(channel, nvchan->desc);
+ }
- ret = vmbus_recvpacket_raw(channel, buffer, bufferlen,
- &bytes_recvd, &request_id);
- if (ret == 0) {
- if (bytes_recvd > 0) {
- desc = (struct vmpacket_descriptor *)buffer;
- netvsc_process_raw_pkt(device,
- channel,
- net_device,
- ndev,
- request_id,
- desc);
- } else {
- /*
- * We are done for this pass.
- */
- break;
- }
-
- } else if (ret == -ENOBUFS) {
- if (bufferlen > NETVSC_PACKET_SIZE)
- kfree(buffer);
- /* Handle large packet */
- buffer = kmalloc(bytes_recvd, GFP_ATOMIC);
- if (buffer == NULL) {
- /* Try again next time around */
- netdev_err(ndev,
- "unable to allocate buffer of size "
- "(%d)!!\n", bytes_recvd);
- break;
- }
-
- bufferlen = bytes_recvd;
- }
+ /* If receive ring was exhausted
+ * and not doing busy poll
+ * then re-enable host interrupts
+ * and reschedule if ring is not empty.
+ */
+ if (work_done < budget &&
+ napi_complete_done(napi, work_done) &&
+ hv_end_read(&channel->inbound) != 0)
+ napi_reschedule(napi);
- init_cached_read_index(channel);
+ netvsc_chk_recv_comp(net_device, channel, q_idx);
- } while (1);
+ /* Driver may overshoot since multiple packets per descriptor */
+ return min(work_done, budget);
+}
+
+/* Call back when data is available in host ring buffer.
+ * Processing is deferred until network softirq (NAPI)
+ */
+void netvsc_channel_cb(void *context)
+{
+ struct netvsc_channel *nvchan = context;
- if (bufferlen > NETVSC_PACKET_SIZE)
- kfree(buffer);
+ if (napi_schedule_prep(&nvchan->napi)) {
+ /* disable interupts from host */
+ hv_begin_read(&nvchan->channel->inbound);
- netvsc_chk_recv_comp(net_device, channel, q_idx);
+ __napi_schedule(&nvchan->napi);
+ }
}
/*
* netvsc_device_add - Callback when the device belonging to this
* driver is added
*/
-int netvsc_device_add(struct hv_device *device, void *additional_info)
+int netvsc_device_add(struct hv_device *device,
+ const struct netvsc_device_info *device_info)
{
int i, ret = 0;
- int ring_size =
- ((struct netvsc_device_info *)additional_info)->ring_size;
+ int ring_size = device_info->ring_size;
struct netvsc_device *net_device;
struct net_device *ndev = hv_get_drvdata(device);
struct net_device_context *net_device_ctx = netdev_priv(ndev);
@@ -1380,14 +1286,36 @@ int netvsc_device_add(struct hv_device *device, void *additional_info)
net_device->ring_size = ring_size;
- set_per_channel_state(device->channel, net_device->cb_buffer);
+ /* Because the device uses NAPI, all the interrupt batching and
+ * control is done via Net softirq, not the channel handling
+ */
+ set_channel_read_mode(device->channel, HV_CALL_ISR);
+
+ /* If we're reopening the device we may have multiple queues, fill the
+ * chn_table with the default channel to use it before subchannels are
+ * opened.
+ * Initialize the channel state before we open;
+ * we can be interrupted as soon as we open the channel.
+ */
+
+ for (i = 0; i < VRSS_CHANNEL_MAX; i++) {
+ struct netvsc_channel *nvchan = &net_device->chan_table[i];
+
+ nvchan->channel = device->channel;
+ }
+
+ /* Enable NAPI handler before init callbacks */
+ netif_napi_add(ndev, &net_device->chan_table[0].napi,
+ netvsc_poll, NAPI_POLL_WEIGHT);
/* Open the channel */
ret = vmbus_open(device->channel, ring_size * PAGE_SIZE,
ring_size * PAGE_SIZE, NULL, 0,
- netvsc_channel_cb, device->channel);
+ netvsc_channel_cb,
+ net_device->chan_table);
if (ret != 0) {
+ netif_napi_del(&net_device->chan_table[0].napi);
netdev_err(ndev, "unable to open channel: %d\n", ret);
goto cleanup;
}
@@ -1395,22 +1323,15 @@ int netvsc_device_add(struct hv_device *device, void *additional_info)
/* Channel is opened */
netdev_dbg(ndev, "hv_netvsc channel opened successfully\n");
- /* If we're reopening the device we may have multiple queues, fill the
- * chn_table with the default channel to use it before subchannels are
- * opened.
- */
- for (i = 0; i < VRSS_CHANNEL_MAX; i++)
- net_device->chn_table[i] = device->channel;
+ napi_enable(&net_device->chan_table[0].napi);
/* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is
* populated.
*/
- wmb();
-
- net_device_ctx->nvdev = net_device;
+ rcu_assign_pointer(net_device_ctx->nvdev, net_device);
/* Connect with the NetVsp */
- ret = netvsc_connect_vsp(device);
+ ret = netvsc_connect_vsp(device, net_device);
if (ret != 0) {
netdev_err(ndev,
"unable to connect to NetVSP - %d\n", ret);
@@ -1420,11 +1341,14 @@ int netvsc_device_add(struct hv_device *device, void *additional_info)
return ret;
close:
+ netif_napi_del(&net_device->chan_table[0].napi);
+
/* Now, we can close the channel safely */
vmbus_close(device->channel);
cleanup:
- free_netvsc_device(net_device);
+ free_netvsc_device(&net_device->rcu);
return ret;
+
}
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index fcab8019dda0..63c98bbbc596 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -37,26 +37,18 @@
#include <net/route.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
+#include <net/checksum.h>
+#include <net/ip6_checksum.h>
#include "hyperv_net.h"
#define RING_SIZE_MIN 64
#define LINKCHANGE_INT (2 * HZ)
-#define NETVSC_HW_FEATURES (NETIF_F_RXCSUM | \
- NETIF_F_SG | \
- NETIF_F_TSO | \
- NETIF_F_TSO6 | \
- NETIF_F_HW_CSUM)
-
-/* Restrict GSO size to account for NVGRE */
-#define NETVSC_GSO_MAX_SIZE 62768
static int ring_size = 128;
module_param(ring_size, int, S_IRUGO);
MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)");
-static int max_num_vrss_chns = 8;
-
static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE |
NETIF_MSG_LINK | NETIF_MSG_IFUP |
NETIF_MSG_IFDOWN | NETIF_MSG_RX_ERR |
@@ -66,42 +58,18 @@ static int debug = -1;
module_param(debug, int, S_IRUGO);
MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
-static void do_set_multicast(struct work_struct *w)
-{
- struct net_device_context *ndevctx =
- container_of(w, struct net_device_context, work);
- struct hv_device *device_obj = ndevctx->device_ctx;
- struct net_device *ndev = hv_get_drvdata(device_obj);
- struct netvsc_device *nvdev = ndevctx->nvdev;
- struct rndis_device *rdev;
-
- if (!nvdev)
- return;
-
- rdev = nvdev->extension;
- if (rdev == NULL)
- return;
-
- if (ndev->flags & IFF_PROMISC)
- rndis_filter_set_packet_filter(rdev,
- NDIS_PACKET_TYPE_PROMISCUOUS);
- else
- rndis_filter_set_packet_filter(rdev,
- NDIS_PACKET_TYPE_BROADCAST |
- NDIS_PACKET_TYPE_ALL_MULTICAST |
- NDIS_PACKET_TYPE_DIRECTED);
-}
-
static void netvsc_set_multicast_list(struct net_device *net)
{
struct net_device_context *net_device_ctx = netdev_priv(net);
+ struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
- schedule_work(&net_device_ctx->work);
+ rndis_filter_update(nvdev);
}
static int netvsc_open(struct net_device *net)
{
- struct netvsc_device *nvdev = net_device_to_netvsc_device(net);
+ struct net_device_context *ndev_ctx = netdev_priv(net);
+ struct netvsc_device *nvdev = ndev_ctx->nvdev;
struct rndis_device *rdev;
int ret = 0;
@@ -117,7 +85,7 @@ static int netvsc_open(struct net_device *net)
netif_tx_wake_all_queues(net);
rdev = nvdev->extension;
- if (!rdev->link_state)
+ if (!rdev->link_state && !ndev_ctx->datapath)
netif_carrier_on(net);
return ret;
@@ -126,15 +94,13 @@ static int netvsc_open(struct net_device *net)
static int netvsc_close(struct net_device *net)
{
struct net_device_context *net_device_ctx = netdev_priv(net);
- struct netvsc_device *nvdev = net_device_ctx->nvdev;
+ struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
int ret;
- u32 aread, awrite, i, msec = 10, retry = 0, retry_max = 20;
+ u32 aread, i, msec = 10, retry = 0, retry_max = 20;
struct vmbus_channel *chn;
netif_tx_disable(net);
- /* Make sure netvsc_set_multicast_list doesn't re-enable filter! */
- cancel_work_sync(&net_device_ctx->work);
ret = rndis_filter_close(nvdev);
if (ret != 0) {
netdev_err(net, "unable to close device (ret %d).\n", ret);
@@ -145,19 +111,15 @@ static int netvsc_close(struct net_device *net)
while (true) {
aread = 0;
for (i = 0; i < nvdev->num_chn; i++) {
- chn = nvdev->chn_table[i];
+ chn = nvdev->chan_table[i].channel;
if (!chn)
continue;
- hv_get_ringbuffer_availbytes(&chn->inbound, &aread,
- &awrite);
-
+ aread = hv_get_bytes_to_read(&chn->inbound);
if (aread)
break;
- hv_get_ringbuffer_availbytes(&chn->outbound, &aread,
- &awrite);
-
+ aread = hv_get_bytes_to_read(&chn->outbound);
if (aread)
break;
}
@@ -201,23 +163,83 @@ static void *init_ppi_data(struct rndis_message *msg, u32 ppi_size,
return ppi;
}
-static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
- void *accel_priv, select_queue_fallback_t fallback)
+/* Azure hosts don't support non-TCP port numbers in hashing yet. We compute
+ * hash for non-TCP traffic with only IP numbers.
+ */
+static inline u32 netvsc_get_hash(struct sk_buff *skb, struct sock *sk)
{
- struct net_device_context *net_device_ctx = netdev_priv(ndev);
- struct netvsc_device *nvsc_dev = net_device_ctx->nvdev;
+ struct flow_keys flow;
u32 hash;
- u16 q_idx = 0;
+ static u32 hashrnd __read_mostly;
+
+ net_get_random_once(&hashrnd, sizeof(hashrnd));
- if (nvsc_dev == NULL || ndev->real_num_tx_queues <= 1)
+ if (!skb_flow_dissect_flow_keys(skb, &flow, 0))
return 0;
- hash = skb_get_hash(skb);
- q_idx = nvsc_dev->send_table[hash % VRSS_SEND_TAB_SIZE] %
- ndev->real_num_tx_queues;
+ if (flow.basic.ip_proto == IPPROTO_TCP) {
+ return skb_get_hash(skb);
+ } else {
+ if (flow.basic.n_proto == htons(ETH_P_IP))
+ hash = jhash2((u32 *)&flow.addrs.v4addrs, 2, hashrnd);
+ else if (flow.basic.n_proto == htons(ETH_P_IPV6))
+ hash = jhash2((u32 *)&flow.addrs.v6addrs, 8, hashrnd);
+ else
+ hash = 0;
+
+ skb_set_hash(skb, hash, PKT_HASH_TYPE_L3);
+ }
+
+ return hash;
+}
+
+static inline int netvsc_get_tx_queue(struct net_device *ndev,
+ struct sk_buff *skb, int old_idx)
+{
+ const struct net_device_context *ndc = netdev_priv(ndev);
+ struct sock *sk = skb->sk;
+ int q_idx;
+
+ q_idx = ndc->tx_send_table[netvsc_get_hash(skb, sk) &
+ (VRSS_SEND_TAB_SIZE - 1)];
+
+ /* If queue index changed record the new value */
+ if (q_idx != old_idx &&
+ sk && sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache))
+ sk_tx_queue_set(sk, q_idx);
+
+ return q_idx;
+}
+
+/*
+ * Select queue for transmit.
+ *
+ * If a valid queue has already been assigned, then use that.
+ * Otherwise compute tx queue based on hash and the send table.
+ *
+ * This is basically similar to default (__netdev_pick_tx) with the added step
+ * of using the host send_table when no other queue has been assigned.
+ *
+ * TODO support XPS - but get_xps_queue not exported
+ */
+static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb,
+ void *accel_priv, select_queue_fallback_t fallback)
+{
+ unsigned int num_tx_queues = ndev->real_num_tx_queues;
+ int q_idx = sk_tx_queue_get(skb->sk);
- if (!nvsc_dev->chn_table[q_idx])
- q_idx = 0;
+ if (q_idx < 0 || skb->ooo_okay) {
+ /* If forwarding a packet, we use the recorded queue when
+ * available for better cache locality.
+ */
+ if (skb_rx_queue_recorded(skb))
+ q_idx = skb_get_rx_queue(skb);
+ else
+ q_idx = netvsc_get_tx_queue(ndev, skb, q_idx);
+ }
+
+ while (unlikely(q_idx >= num_tx_queues))
+ q_idx -= num_tx_queues;
return q_idx;
}
@@ -293,63 +315,35 @@ static u32 init_page_array(void *hdr, u32 len, struct sk_buff *skb,
return slots_used;
}
-static int count_skb_frag_slots(struct sk_buff *skb)
-{
- int i, frags = skb_shinfo(skb)->nr_frags;
- int pages = 0;
-
- for (i = 0; i < frags; i++) {
- skb_frag_t *frag = skb_shinfo(skb)->frags + i;
- unsigned long size = skb_frag_size(frag);
- unsigned long offset = frag->page_offset;
-
- /* Skip unused frames from start of page */
- offset &= ~PAGE_MASK;
- pages += PFN_UP(offset + size);
- }
- return pages;
-}
-
-static int netvsc_get_slots(struct sk_buff *skb)
+/* Estimate number of page buffers neede to transmit
+ * Need at most 2 for RNDIS header plus skb body and fragments.
+ */
+static unsigned int netvsc_get_slots(const struct sk_buff *skb)
{
- char *data = skb->data;
- unsigned int offset = offset_in_page(data);
- unsigned int len = skb_headlen(skb);
- int slots;
- int frag_slots;
-
- slots = DIV_ROUND_UP(offset + len, PAGE_SIZE);
- frag_slots = count_skb_frag_slots(skb);
- return slots + frag_slots;
+ return PFN_UP(offset_in_page(skb->data) + skb_headlen(skb))
+ + skb_shinfo(skb)->nr_frags
+ + 2;
}
-static u32 get_net_transport_info(struct sk_buff *skb, u32 *trans_off)
+static u32 net_checksum_info(struct sk_buff *skb)
{
- u32 ret_val = TRANSPORT_INFO_NOT_IP;
-
- if ((eth_hdr(skb)->h_proto != htons(ETH_P_IP)) &&
- (eth_hdr(skb)->h_proto != htons(ETH_P_IPV6))) {
- goto not_ip;
- }
-
- *trans_off = skb_transport_offset(skb);
-
- if ((eth_hdr(skb)->h_proto == htons(ETH_P_IP))) {
- struct iphdr *iphdr = ip_hdr(skb);
+ if (skb->protocol == htons(ETH_P_IP)) {
+ struct iphdr *ip = ip_hdr(skb);
- if (iphdr->protocol == IPPROTO_TCP)
- ret_val = TRANSPORT_INFO_IPV4_TCP;
- else if (iphdr->protocol == IPPROTO_UDP)
- ret_val = TRANSPORT_INFO_IPV4_UDP;
+ if (ip->protocol == IPPROTO_TCP)
+ return TRANSPORT_INFO_IPV4_TCP;
+ else if (ip->protocol == IPPROTO_UDP)
+ return TRANSPORT_INFO_IPV4_UDP;
} else {
- if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
- ret_val = TRANSPORT_INFO_IPV6_TCP;
+ struct ipv6hdr *ip6 = ipv6_hdr(skb);
+
+ if (ip6->nexthdr == IPPROTO_TCP)
+ return TRANSPORT_INFO_IPV6_TCP;
else if (ipv6_hdr(skb)->nexthdr == IPPROTO_UDP)
- ret_val = TRANSPORT_INFO_IPV6_UDP;
+ return TRANSPORT_INFO_IPV6_UDP;
}
-not_ip:
- return ret_val;
+ return TRANSPORT_INFO_NOT_IP;
}
static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
@@ -362,30 +356,22 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
struct rndis_packet *rndis_pkt;
u32 rndis_msg_size;
struct rndis_per_packet_info *ppi;
- struct ndis_tcp_ip_checksum_info *csum_info;
- int hdr_offset;
- u32 net_trans_info;
u32 hash;
- u32 skb_length;
struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT];
struct hv_page_buffer *pb = page_buf;
- /* We will atmost need two pages to describe the rndis
- * header. We can only transmit MAX_PAGE_BUFFER_COUNT number
+ /* We can only transmit MAX_PAGE_BUFFER_COUNT number
* of pages in a single packet. If skb is scattered around
* more pages we try linearizing it.
*/
-
- skb_length = skb->len;
- num_data_pgs = netvsc_get_slots(skb) + 2;
-
+ num_data_pgs = netvsc_get_slots(skb);
if (unlikely(num_data_pgs > MAX_PAGE_BUFFER_COUNT)) {
++net_device_ctx->eth_stats.tx_scattered;
if (skb_linearize(skb))
goto no_memory;
- num_data_pgs = netvsc_get_slots(skb) + 2;
+ num_data_pgs = netvsc_get_slots(skb);
if (num_data_pgs > MAX_PAGE_BUFFER_COUNT) {
++net_device_ctx->eth_stats.tx_too_big;
goto drop;
@@ -409,6 +395,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
packet->q_idx = skb_get_queue_mapping(skb);
packet->total_data_buflen = skb->len;
+ packet->total_bytes = skb->len;
+ packet->total_packets = 1;
rndis_msg = (struct rndis_message *)skb->head;
@@ -445,13 +433,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
VLAN_PRIO_SHIFT;
}
- net_trans_info = get_net_transport_info(skb, &hdr_offset);
-
- /*
- * Setup the sendside checksum offload only if this is not a
- * GSO packet.
- */
- if ((net_trans_info & (INFO_TCP | INFO_UDP)) && skb_is_gso(skb)) {
+ if (skb_is_gso(skb)) {
struct ndis_tcp_lso_info *lso_info;
rndis_msg_size += NDIS_LSO_PPI_SIZE;
@@ -462,7 +444,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
ppi->ppi_offset);
lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE;
- if (net_trans_info & (INFO_IPV4 << 16)) {
+ if (skb->protocol == htons(ETH_P_IP)) {
lso_info->lso_v2_transmit.ip_version =
NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4;
ip_hdr(skb)->tot_len = 0;
@@ -478,10 +460,12 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
&ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0);
}
- lso_info->lso_v2_transmit.tcp_header_offset = hdr_offset;
+ lso_info->lso_v2_transmit.tcp_header_offset = skb_transport_offset(skb);
lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size;
} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
- if (net_trans_info & INFO_TCP) {
+ if (net_checksum_info(skb) & net_device_ctx->tx_checksum_mask) {
+ struct ndis_tcp_ip_checksum_info *csum_info;
+
rndis_msg_size += NDIS_CSUM_PPI_SIZE;
ppi = init_ppi_data(rndis_msg, NDIS_CSUM_PPI_SIZE,
TCPIP_CHKSUM_PKTINFO);
@@ -489,15 +473,25 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
csum_info = (struct ndis_tcp_ip_checksum_info *)((void *)ppi +
ppi->ppi_offset);
- if (net_trans_info & (INFO_IPV4 << 16))
+ csum_info->transmit.tcp_header_offset = skb_transport_offset(skb);
+
+ if (skb->protocol == htons(ETH_P_IP)) {
csum_info->transmit.is_ipv4 = 1;
- else
+
+ if (ip_hdr(skb)->protocol == IPPROTO_TCP)
+ csum_info->transmit.tcp_checksum = 1;
+ else
+ csum_info->transmit.udp_checksum = 1;
+ } else {
csum_info->transmit.is_ipv6 = 1;
- csum_info->transmit.tcp_checksum = 1;
- csum_info->transmit.tcp_header_offset = hdr_offset;
+ if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
+ csum_info->transmit.tcp_checksum = 1;
+ else
+ csum_info->transmit.udp_checksum = 1;
+ }
} else {
- /* UDP checksum (and other) offload is not supported. */
+ /* Can't do offload of this type of checksum */
if (skb_checksum_help(skb))
goto drop;
}
@@ -513,15 +507,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net)
skb_tx_timestamp(skb);
ret = netvsc_send(net_device_ctx->device_ctx, packet,
rndis_msg, &pb, skb);
- if (likely(ret == 0)) {
- struct netvsc_stats *tx_stats = this_cpu_ptr(net_device_ctx->tx_stats);
-
- u64_stats_update_begin(&tx_stats->syncp);
- tx_stats->packets++;
- tx_stats->bytes += skb_length;
- u64_stats_update_end(&tx_stats->syncp);
+ if (likely(ret == 0))
return NETDEV_TX_OK;
- }
if (ret == -EAGAIN) {
++net_device_ctx->eth_stats.tx_busy;
@@ -541,7 +528,6 @@ no_memory:
++net_device_ctx->eth_stats.tx_no_memory;
goto drop;
}
-
/*
* netvsc_linkstatus_callback - Link up/down notification
*/
@@ -593,13 +579,14 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj,
}
static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net,
- struct hv_netvsc_packet *packet,
- struct ndis_tcp_ip_checksum_info *csum_info,
- void *data, u16 vlan_tci)
+ struct napi_struct *napi,
+ const struct ndis_tcp_ip_checksum_info *csum_info,
+ const struct ndis_pkt_8021q_info *vlan,
+ void *data, u32 buflen)
{
struct sk_buff *skb;
- skb = netdev_alloc_skb_ip_align(net, packet->total_data_buflen);
+ skb = napi_alloc_skb(napi, buflen);
if (!skb)
return skb;
@@ -607,8 +594,7 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net,
* Copy to skb. This copy is needed here since the memory pointed by
* hv_netvsc_packet cannot be deallocated
*/
- memcpy(skb_put(skb, packet->total_data_buflen), data,
- packet->total_data_buflen);
+ skb_put_data(skb, data, buflen);
skb->protocol = eth_type_trans(skb, net);
@@ -625,9 +611,12 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net,
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
- if (vlan_tci & VLAN_TAG_PRESENT)
+ if (vlan) {
+ u16 vlan_tci = vlan->vlanid | (vlan->pri << VLAN_PRIO_SHIFT);
+
__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
vlan_tci);
+ }
return skb;
}
@@ -636,15 +625,16 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net,
* netvsc_recv_callback - Callback when we receive a packet from the
* "wire" on the specified device.
*/
-int netvsc_recv_callback(struct hv_device *device_obj,
- struct hv_netvsc_packet *packet,
- void **data,
- struct ndis_tcp_ip_checksum_info *csum_info,
- struct vmbus_channel *channel,
- u16 vlan_tci)
+int netvsc_recv_callback(struct net_device *net,
+ struct vmbus_channel *channel,
+ void *data, u32 len,
+ const struct ndis_tcp_ip_checksum_info *csum_info,
+ const struct ndis_pkt_8021q_info *vlan)
{
- struct net_device *net = hv_get_drvdata(device_obj);
struct net_device_context *net_device_ctx = netdev_priv(net);
+ struct netvsc_device *net_device;
+ u16 q_idx = channel->offermsg.offer.sub_channel_index;
+ struct netvsc_channel *nvchan;
struct net_device *vf_netdev;
struct sk_buff *skb;
struct netvsc_stats *rx_stats;
@@ -660,31 +650,37 @@ int netvsc_recv_callback(struct hv_device *device_obj,
* interface in the guest.
*/
rcu_read_lock();
+ net_device = rcu_dereference(net_device_ctx->nvdev);
+ if (unlikely(!net_device))
+ goto drop;
+
+ nvchan = &net_device->chan_table[q_idx];
vf_netdev = rcu_dereference(net_device_ctx->vf_netdev);
if (vf_netdev && (vf_netdev->flags & IFF_UP))
net = vf_netdev;
/* Allocate a skb - TODO direct I/O to pages? */
- skb = netvsc_alloc_recv_skb(net, packet, csum_info, *data, vlan_tci);
+ skb = netvsc_alloc_recv_skb(net, &nvchan->napi,
+ csum_info, vlan, data, len);
if (unlikely(!skb)) {
+drop:
++net->stats.rx_dropped;
rcu_read_unlock();
return NVSP_STAT_FAIL;
}
if (net != vf_netdev)
- skb_record_rx_queue(skb,
- channel->offermsg.offer.sub_channel_index);
+ skb_record_rx_queue(skb, q_idx);
/*
* Even if injecting the packet, record the statistics
* on the synthetic device because modifying the VF device
* statistics will not work correctly.
*/
- rx_stats = this_cpu_ptr(net_device_ctx->rx_stats);
+ rx_stats = &nvchan->rx_stats;
u64_stats_update_begin(&rx_stats->syncp);
rx_stats->packets++;
- rx_stats->bytes += packet->total_data_buflen;
+ rx_stats->bytes += len;
if (skb->pkt_type == PACKET_BROADCAST)
++rx_stats->broadcast;
@@ -692,12 +688,7 @@ int netvsc_recv_callback(struct hv_device *device_obj,
++rx_stats->multicast;
u64_stats_update_end(&rx_stats->syncp);
- /*
- * Pass the skb back up. Network stack will deallocate the skb when it
- * is done.
- * TODO - use NAPI?
- */
- netif_rx(skb);
+ napi_gro_receive(&nvchan->napi, skb);
rcu_read_unlock();
return 0;
@@ -714,7 +705,7 @@ static void netvsc_get_channels(struct net_device *net,
struct ethtool_channels *channel)
{
struct net_device_context *net_device_ctx = netdev_priv(net);
- struct netvsc_device *nvdev = net_device_ctx->nvdev;
+ struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
if (nvdev) {
channel->max_combined = nvdev->max_chn;
@@ -722,116 +713,94 @@ static void netvsc_get_channels(struct net_device *net,
}
}
+static int netvsc_set_queues(struct net_device *net, struct hv_device *dev,
+ u32 num_chn)
+{
+ struct netvsc_device_info device_info;
+ int ret;
+
+ memset(&device_info, 0, sizeof(device_info));
+ device_info.num_chn = num_chn;
+ device_info.ring_size = ring_size;
+ device_info.max_num_vrss_chns = num_chn;
+
+ ret = rndis_filter_device_add(dev, &device_info);
+ if (ret)
+ return ret;
+
+ ret = netif_set_real_num_tx_queues(net, num_chn);
+ if (ret)
+ return ret;
+
+ ret = netif_set_real_num_rx_queues(net, num_chn);
+
+ return ret;
+}
+
static int netvsc_set_channels(struct net_device *net,
struct ethtool_channels *channels)
{
struct net_device_context *net_device_ctx = netdev_priv(net);
struct hv_device *dev = net_device_ctx->device_ctx;
- struct netvsc_device *nvdev = net_device_ctx->nvdev;
- struct netvsc_device_info device_info;
- u32 num_chn;
- u32 max_chn;
- int ret = 0;
- bool recovering = false;
-
- if (net_device_ctx->start_remove || !nvdev || nvdev->destroy)
- return -ENODEV;
-
- num_chn = nvdev->num_chn;
- max_chn = min_t(u32, nvdev->max_chn, num_online_cpus());
+ struct netvsc_device *nvdev = rtnl_dereference(net_device_ctx->nvdev);
+ unsigned int count = channels->combined_count;
+ bool was_running;
+ int ret;
- if (nvdev->nvsp_version < NVSP_PROTOCOL_VERSION_5) {
- pr_info("vRSS unsupported before NVSP Version 5\n");
+ /* We do not support separate count for rx, tx, or other */
+ if (count == 0 ||
+ channels->rx_count || channels->tx_count || channels->other_count)
return -EINVAL;
- }
- /* We do not support rx, tx, or other */
- if (!channels ||
- channels->rx_count ||
- channels->tx_count ||
- channels->other_count ||
- (channels->combined_count < 1))
+ if (count > net->num_tx_queues || count > VRSS_CHANNEL_MAX)
return -EINVAL;
- if (channels->combined_count > max_chn) {
- pr_info("combined channels too high, using %d\n", max_chn);
- channels->combined_count = max_chn;
- }
-
- ret = netvsc_close(net);
- if (ret)
- goto out;
-
- do_set:
- net_device_ctx->start_remove = true;
- rndis_filter_device_remove(dev);
+ if (!nvdev || nvdev->destroy)
+ return -ENODEV;
- nvdev->num_chn = channels->combined_count;
+ if (nvdev->nvsp_version < NVSP_PROTOCOL_VERSION_5)
+ return -EINVAL;
- memset(&device_info, 0, sizeof(device_info));
- device_info.num_chn = nvdev->num_chn; /* passed to RNDIS */
- device_info.ring_size = ring_size;
- device_info.max_num_vrss_chns = max_num_vrss_chns;
+ if (count > nvdev->max_chn)
+ return -EINVAL;
- ret = rndis_filter_device_add(dev, &device_info);
- if (ret) {
- if (recovering) {
- netdev_err(net, "unable to add netvsc device (ret %d)\n", ret);
+ was_running = netif_running(net);
+ if (was_running) {
+ ret = netvsc_close(net);
+ if (ret)
return ret;
- }
- goto recover;
}
- nvdev = net_device_ctx->nvdev;
+ rndis_filter_device_remove(dev, nvdev);
- ret = netif_set_real_num_tx_queues(net, nvdev->num_chn);
- if (ret) {
- if (recovering) {
- netdev_err(net, "could not set tx queue count (ret %d)\n", ret);
- return ret;
- }
- goto recover;
- }
+ ret = netvsc_set_queues(net, dev, count);
+ if (ret == 0)
+ nvdev->num_chn = count;
+ else
+ netvsc_set_queues(net, dev, nvdev->num_chn);
- ret = netif_set_real_num_rx_queues(net, nvdev->num_chn);
- if (ret) {
- if (recovering) {
- netdev_err(net, "could not set rx queue count (ret %d)\n", ret);
- return ret;
- }
- goto recover;
- }
+ if (was_running)
+ ret = netvsc_open(net);
- out:
- netvsc_open(net);
- net_device_ctx->start_remove = false;
/* We may have missed link change notifications */
schedule_delayed_work(&net_device_ctx->dwork, 0);
return ret;
-
- recover:
- /* If the above failed, we attempt to recover through the same
- * process but with the original number of channels.
- */
- netdev_err(net, "could not set channels, recovering\n");
- recovering = true;
- channels->combined_count = num_chn;
- goto do_set;
}
-static bool netvsc_validate_ethtool_ss_cmd(const struct ethtool_cmd *cmd)
+static bool
+netvsc_validate_ethtool_ss_cmd(const struct ethtool_link_ksettings *cmd)
{
- struct ethtool_cmd diff1 = *cmd;
- struct ethtool_cmd diff2 = {};
+ struct ethtool_link_ksettings diff1 = *cmd;
+ struct ethtool_link_ksettings diff2 = {};
- ethtool_cmd_speed_set(&diff1, 0);
- diff1.duplex = 0;
+ diff1.base.speed = 0;
+ diff1.base.duplex = 0;
/* advertising and cmd are usually set */
- diff1.advertising = 0;
- diff1.cmd = 0;
+ ethtool_link_ksettings_zero_link_mode(&diff1, advertising);
+ diff1.base.cmd = 0;
/* We set port to PORT_OTHER */
- diff2.port = PORT_OTHER;
+ diff2.base.port = PORT_OTHER;
return !memcmp(&diff1, &diff2, sizeof(diff1));
}
@@ -841,33 +810,35 @@ static void netvsc_init_settings(struct net_device *dev)
struct net_device_context *ndc = netdev_priv(dev);
ndc->speed = SPEED_UNKNOWN;
- ndc->duplex = DUPLEX_UNKNOWN;
+ ndc->duplex = DUPLEX_FULL;
}
-static int netvsc_get_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int netvsc_get_link_ksettings(struct net_device *dev,
+ struct ethtool_link_ksettings *cmd)
{
struct net_device_context *ndc = netdev_priv(dev);
- ethtool_cmd_speed_set(cmd, ndc->speed);
- cmd->duplex = ndc->duplex;
- cmd->port = PORT_OTHER;
+ cmd->base.speed = ndc->speed;
+ cmd->base.duplex = ndc->duplex;
+ cmd->base.port = PORT_OTHER;
return 0;
}
-static int netvsc_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
+static int netvsc_set_link_ksettings(struct net_device *dev,
+ const struct ethtool_link_ksettings *cmd)
{
struct net_device_context *ndc = netdev_priv(dev);
u32 speed;
- speed = ethtool_cmd_speed(cmd);
+ speed = cmd->base.speed;
if (!ethtool_validate_speed(speed) ||
- !ethtool_validate_duplex(cmd->duplex) ||
+ !ethtool_validate_duplex(cmd->base.duplex) ||
!netvsc_validate_ethtool_ss_cmd(cmd))
return -EINVAL;
ndc->speed = speed;
- ndc->duplex = cmd->duplex;
+ ndc->duplex = cmd->base.duplex;
return 0;
}
@@ -875,35 +846,41 @@ static int netvsc_set_settings(struct net_device *dev, struct ethtool_cmd *cmd)
static int netvsc_change_mtu(struct net_device *ndev, int mtu)
{
struct net_device_context *ndevctx = netdev_priv(ndev);
- struct netvsc_device *nvdev = ndevctx->nvdev;
+ struct netvsc_device *nvdev = rtnl_dereference(ndevctx->nvdev);
struct hv_device *hdev = ndevctx->device_ctx;
struct netvsc_device_info device_info;
- u32 num_chn;
+ bool was_running;
int ret = 0;
- if (ndevctx->start_remove || !nvdev || nvdev->destroy)
+ if (!nvdev || nvdev->destroy)
return -ENODEV;
- ret = netvsc_close(ndev);
- if (ret)
- goto out;
+ was_running = netif_running(ndev);
+ if (was_running) {
+ ret = netvsc_close(ndev);
+ if (ret)
+ return ret;
+ }
- num_chn = nvdev->num_chn;
+ memset(&device_info, 0, sizeof(device_info));
+ device_info.ring_size = ring_size;
+ device_info.num_chn = nvdev->num_chn;
+ device_info.max_num_vrss_chns = nvdev->num_chn;
- ndevctx->start_remove = true;
- rndis_filter_device_remove(hdev);
+ rndis_filter_device_remove(hdev, nvdev);
+
+ /* 'nvdev' has been freed in rndis_filter_device_remove() ->
+ * netvsc_device_remove () -> free_netvsc_device().
+ * We mustn't access it before it's re-created in
+ * rndis_filter_device_add() -> netvsc_device_add().
+ */
ndev->mtu = mtu;
- memset(&device_info, 0, sizeof(device_info));
- device_info.ring_size = ring_size;
- device_info.num_chn = num_chn;
- device_info.max_num_vrss_chns = max_num_vrss_chns;
rndis_filter_device_add(hdev, &device_info);
-out:
- netvsc_open(ndev);
- ndevctx->start_remove = false;
+ if (was_running)
+ ret = netvsc_open(ndev);
/* We may have missed link change notifications */
schedule_delayed_work(&ndevctx->dwork, 0);
@@ -911,47 +888,50 @@ out:
return ret;
}
-static struct rtnl_link_stats64 *netvsc_get_stats64(struct net_device *net,
- struct rtnl_link_stats64 *t)
+static void netvsc_get_stats64(struct net_device *net,
+ struct rtnl_link_stats64 *t)
{
struct net_device_context *ndev_ctx = netdev_priv(net);
- int cpu;
-
- for_each_possible_cpu(cpu) {
- struct netvsc_stats *tx_stats = per_cpu_ptr(ndev_ctx->tx_stats,
- cpu);
- struct netvsc_stats *rx_stats = per_cpu_ptr(ndev_ctx->rx_stats,
- cpu);
- u64 tx_packets, tx_bytes, rx_packets, rx_bytes, rx_multicast;
+ struct netvsc_device *nvdev = rcu_dereference_rtnl(ndev_ctx->nvdev);
+ int i;
+
+ if (!nvdev)
+ return;
+
+ for (i = 0; i < nvdev->num_chn; i++) {
+ const struct netvsc_channel *nvchan = &nvdev->chan_table[i];
+ const struct netvsc_stats *stats;
+ u64 packets, bytes, multicast;
unsigned int start;
+ stats = &nvchan->tx_stats;
do {
- start = u64_stats_fetch_begin_irq(&tx_stats->syncp);
- tx_packets = tx_stats->packets;
- tx_bytes = tx_stats->bytes;
- } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start));
+ start = u64_stats_fetch_begin_irq(&stats->syncp);
+ packets = stats->packets;
+ bytes = stats->bytes;
+ } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+ t->tx_bytes += bytes;
+ t->tx_packets += packets;
+
+ stats = &nvchan->rx_stats;
do {
- start = u64_stats_fetch_begin_irq(&rx_stats->syncp);
- rx_packets = rx_stats->packets;
- rx_bytes = rx_stats->bytes;
- rx_multicast = rx_stats->multicast + rx_stats->broadcast;
- } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start));
-
- t->tx_bytes += tx_bytes;
- t->tx_packets += tx_packets;
- t->rx_bytes += rx_bytes;
- t->rx_packets += rx_packets;
- t->multicast += rx_multicast;
+ start = u64_stats_fetch_begin_irq(&stats->syncp);
+ packets = stats->packets;
+ bytes = stats->bytes;
+ multicast = stats->multicast + stats->broadcast;
+ } while (u64_stats_fetch_retry_irq(&stats->syncp, start));
+
+ t->rx_bytes += bytes;
+ t->rx_packets += packets;
+ t->multicast += multicast;
}
t->tx_dropped = net->stats.tx_dropped;
- t->tx_errors = net->stats.tx_dropped;
+ t->tx_errors = net->stats.tx_errors;
t->rx_dropped = net->stats.rx_dropped;
t->rx_errors = net->stats.rx_errors;
-
- return t;
}
static int netvsc_set_mac_addr(struct net_device *ndev, void *p)
@@ -989,11 +969,22 @@ static const struct {
{ "tx_busy", offsetof(struct netvsc_ethtool_stats, tx_busy) },
};
+#define NETVSC_GLOBAL_STATS_LEN ARRAY_SIZE(netvsc_stats)
+
+/* 4 statistics per queue (rx/tx packets/bytes) */
+#define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 4)
+
static int netvsc_get_sset_count(struct net_device *dev, int string_set)
{
+ struct net_device_context *ndc = netdev_priv(dev);
+ struct netvsc_device *nvdev = rtnl_dereference(ndc->nvdev);
+
+ if (!nvdev)
+ return -ENODEV;
+
switch (string_set) {
case ETH_SS_STATS:
- return ARRAY_SIZE(netvsc_stats);
+ return NETVSC_GLOBAL_STATS_LEN + NETVSC_QUEUE_STATS_LEN(nvdev);
default:
return -EINVAL;
}
@@ -1003,35 +994,208 @@ static void netvsc_get_ethtool_stats(struct net_device *dev,
struct ethtool_stats *stats, u64 *data)
{
struct net_device_context *ndc = netdev_priv(dev);
+ struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
const void *nds = &ndc->eth_stats;
- int i;
+ const struct netvsc_stats *qstats;
+ unsigned int start;
+ u64 packets, bytes;
+ int i, j;
+
+ if (!nvdev)
+ return;
- for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++)
+ for (i = 0; i < NETVSC_GLOBAL_STATS_LEN; i++)
data[i] = *(unsigned long *)(nds + netvsc_stats[i].offset);
+
+ for (j = 0; j < nvdev->num_chn; j++) {
+ qstats = &nvdev->chan_table[j].tx_stats;
+
+ do {
+ start = u64_stats_fetch_begin_irq(&qstats->syncp);
+ packets = qstats->packets;
+ bytes = qstats->bytes;
+ } while (u64_stats_fetch_retry_irq(&qstats->syncp, start));
+ data[i++] = packets;
+ data[i++] = bytes;
+
+ qstats = &nvdev->chan_table[j].rx_stats;
+ do {
+ start = u64_stats_fetch_begin_irq(&qstats->syncp);
+ packets = qstats->packets;
+ bytes = qstats->bytes;
+ } while (u64_stats_fetch_retry_irq(&qstats->syncp, start));
+ data[i++] = packets;
+ data[i++] = bytes;
+ }
}
static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data)
{
+ struct net_device_context *ndc = netdev_priv(dev);
+ struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
+ u8 *p = data;
int i;
+ if (!nvdev)
+ return;
+
switch (stringset) {
case ETH_SS_STATS:
for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++)
- memcpy(data + i * ETH_GSTRING_LEN,
+ memcpy(p + i * ETH_GSTRING_LEN,
netvsc_stats[i].name, ETH_GSTRING_LEN);
+
+ p += i * ETH_GSTRING_LEN;
+ for (i = 0; i < nvdev->num_chn; i++) {
+ sprintf(p, "tx_queue_%u_packets", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "tx_queue_%u_bytes", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "rx_queue_%u_packets", i);
+ p += ETH_GSTRING_LEN;
+ sprintf(p, "rx_queue_%u_bytes", i);
+ p += ETH_GSTRING_LEN;
+ }
+
+ break;
+ }
+}
+
+static int
+netvsc_get_rss_hash_opts(struct netvsc_device *nvdev,
+ struct ethtool_rxnfc *info)
+{
+ info->data = RXH_IP_SRC | RXH_IP_DST;
+
+ switch (info->flow_type) {
+ case TCP_V4_FLOW:
+ case TCP_V6_FLOW:
+ info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+ /* fallthrough */
+ case UDP_V4_FLOW:
+ case UDP_V6_FLOW:
+ case IPV4_FLOW:
+ case IPV6_FLOW:
+ break;
+ default:
+ info->data = 0;
break;
}
+
+ return 0;
+}
+
+static int
+netvsc_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info,
+ u32 *rules)
+{
+ struct net_device_context *ndc = netdev_priv(dev);
+ struct netvsc_device *nvdev = rcu_dereference(ndc->nvdev);
+
+ if (!nvdev)
+ return -ENODEV;
+
+ switch (info->cmd) {
+ case ETHTOOL_GRXRINGS:
+ info->data = nvdev->num_chn;
+ return 0;
+
+ case ETHTOOL_GRXFH:
+ return netvsc_get_rss_hash_opts(nvdev, info);
+ }
+ return -EOPNOTSUPP;
}
#ifdef CONFIG_NET_POLL_CONTROLLER
-static void netvsc_poll_controller(struct net_device *net)
+static void netvsc_poll_controller(struct net_device *dev)
{
- /* As netvsc_start_xmit() works synchronous we don't have to
- * trigger anything here.
- */
+ struct net_device_context *ndc = netdev_priv(dev);
+ struct netvsc_device *ndev;
+ int i;
+
+ rcu_read_lock();
+ ndev = rcu_dereference(ndc->nvdev);
+ if (ndev) {
+ for (i = 0; i < ndev->num_chn; i++) {
+ struct netvsc_channel *nvchan = &ndev->chan_table[i];
+
+ napi_schedule(&nvchan->napi);
+ }
+ }
+ rcu_read_unlock();
}
#endif
+static u32 netvsc_get_rxfh_key_size(struct net_device *dev)
+{
+ return NETVSC_HASH_KEYLEN;
+}
+
+static u32 netvsc_rss_indir_size(struct net_device *dev)
+{
+ return ITAB_NUM;
+}
+
+static int netvsc_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
+ u8 *hfunc)
+{
+ struct net_device_context *ndc = netdev_priv(dev);
+ struct netvsc_device *ndev = rcu_dereference(ndc->nvdev);
+ struct rndis_device *rndis_dev;
+ int i;
+
+ if (!ndev)
+ return -ENODEV;
+
+ if (hfunc)
+ *hfunc = ETH_RSS_HASH_TOP; /* Toeplitz */
+
+ rndis_dev = ndev->extension;
+ if (indir) {
+ for (i = 0; i < ITAB_NUM; i++)
+ indir[i] = rndis_dev->ind_table[i];
+ }
+
+ if (key)
+ memcpy(key, rndis_dev->rss_key, NETVSC_HASH_KEYLEN);
+
+ return 0;
+}
+
+static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir,
+ const u8 *key, const u8 hfunc)
+{
+ struct net_device_context *ndc = netdev_priv(dev);
+ struct netvsc_device *ndev = rtnl_dereference(ndc->nvdev);
+ struct rndis_device *rndis_dev;
+ int i;
+
+ if (!ndev)
+ return -ENODEV;
+
+ if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
+ return -EOPNOTSUPP;
+
+ rndis_dev = ndev->extension;
+ if (indir) {
+ for (i = 0; i < ITAB_NUM; i++)
+ if (indir[i] >= VRSS_CHANNEL_MAX)
+ return -EINVAL;
+
+ for (i = 0; i < ITAB_NUM; i++)
+ rndis_dev->ind_table[i] = indir[i];
+ }
+
+ if (!key) {
+ if (!indir)
+ return 0;
+
+ key = rndis_dev->rss_key;
+ }
+
+ return rndis_filter_set_rss_param(rndis_dev, key, ndev->num_chn);
+}
+
static const struct ethtool_ops ethtool_ops = {
.get_drvinfo = netvsc_get_drvinfo,
.get_link = ethtool_op_get_link,
@@ -1041,8 +1205,13 @@ static const struct ethtool_ops ethtool_ops = {
.get_channels = netvsc_get_channels,
.set_channels = netvsc_set_channels,
.get_ts_info = ethtool_op_get_ts_info,
- .get_settings = netvsc_get_settings,
- .set_settings = netvsc_set_settings,
+ .get_rxnfc = netvsc_get_rxnfc,
+ .get_rxfh_key_size = netvsc_get_rxfh_key_size,
+ .get_rxfh_indir_size = netvsc_rss_indir_size,
+ .get_rxfh = netvsc_get_rxfh,
+ .set_rxfh = netvsc_set_rxfh,
+ .get_link_ksettings = netvsc_get_link_ksettings,
+ .set_link_ksettings = netvsc_set_link_ksettings,
};
static const struct net_device_ops device_ops = {
@@ -1078,10 +1247,10 @@ static void netvsc_link_change(struct work_struct *w)
unsigned long flags, next_reconfig, delay;
rtnl_lock();
- if (ndev_ctx->start_remove)
+ net_device = rtnl_dereference(ndev_ctx->nvdev);
+ if (!net_device)
goto out_unlock;
- net_device = ndev_ctx->nvdev;
rdev = net_device->extension;
next_reconfig = ndev_ctx->last_reconfig + LINKCHANGE_INT;
@@ -1116,7 +1285,8 @@ static void netvsc_link_change(struct work_struct *w)
case RNDIS_STATUS_MEDIA_CONNECT:
if (rdev->link_state) {
rdev->link_state = false;
- netif_carrier_on(net);
+ if (!ndev_ctx->datapath)
+ netif_carrier_on(net);
netif_tx_wake_all_queues(net);
} else {
notify = true;
@@ -1163,15 +1333,6 @@ out_unlock:
rtnl_unlock();
}
-static void netvsc_free_netdev(struct net_device *netdev)
-{
- struct net_device_context *net_device_ctx = netdev_priv(netdev);
-
- free_percpu(net_device_ctx->tx_stats);
- free_percpu(net_device_ctx->rx_stats);
- free_netdev(netdev);
-}
-
static struct net_device *get_netvsc_bymac(const u8 *mac)
{
struct net_device *dev;
@@ -1231,7 +1392,7 @@ static int netvsc_register_vf(struct net_device *vf_netdev)
return NOTIFY_DONE;
net_device_ctx = netdev_priv(ndev);
- netvsc_dev = net_device_ctx->nvdev;
+ netvsc_dev = rtnl_dereference(net_device_ctx->nvdev);
if (!netvsc_dev || rtnl_dereference(net_device_ctx->vf_netdev))
return NOTIFY_DONE;
@@ -1257,7 +1418,7 @@ static int netvsc_vf_up(struct net_device *vf_netdev)
return NOTIFY_DONE;
net_device_ctx = netdev_priv(ndev);
- netvsc_dev = net_device_ctx->nvdev;
+ netvsc_dev = rtnl_dereference(net_device_ctx->nvdev);
netdev_info(ndev, "VF up: %s\n", vf_netdev->name);
@@ -1291,7 +1452,7 @@ static int netvsc_vf_down(struct net_device *vf_netdev)
return NOTIFY_DONE;
net_device_ctx = netdev_priv(ndev);
- netvsc_dev = net_device_ctx->nvdev;
+ netvsc_dev = rtnl_dereference(net_device_ctx->nvdev);
netdev_info(ndev, "VF down: %s\n", vf_netdev->name);
netvsc_switch_datapath(ndev, false);
@@ -1308,7 +1469,6 @@ static int netvsc_vf_down(struct net_device *vf_netdev)
static int netvsc_unregister_vf(struct net_device *vf_netdev)
{
struct net_device *ndev;
- struct netvsc_device *netvsc_dev;
struct net_device_context *net_device_ctx;
ndev = get_netvsc_byref(vf_netdev);
@@ -1316,7 +1476,6 @@ static int netvsc_unregister_vf(struct net_device *vf_netdev)
return NOTIFY_DONE;
net_device_ctx = netdev_priv(ndev);
- netvsc_dev = net_device_ctx->nvdev;
netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name);
@@ -1336,7 +1495,7 @@ static int netvsc_probe(struct hv_device *dev,
int ret;
net = alloc_etherdev_mq(sizeof(struct net_device_context),
- num_online_cpus());
+ VRSS_CHANNEL_MAX);
if (!net)
return -ENOMEM;
@@ -1351,33 +1510,14 @@ static int netvsc_probe(struct hv_device *dev,
netdev_dbg(net, "netvsc msg_enable: %d\n",
net_device_ctx->msg_enable);
- net_device_ctx->tx_stats = netdev_alloc_pcpu_stats(struct netvsc_stats);
- if (!net_device_ctx->tx_stats) {
- free_netdev(net);
- return -ENOMEM;
- }
- net_device_ctx->rx_stats = netdev_alloc_pcpu_stats(struct netvsc_stats);
- if (!net_device_ctx->rx_stats) {
- free_percpu(net_device_ctx->tx_stats);
- free_netdev(net);
- return -ENOMEM;
- }
-
hv_set_drvdata(dev, net);
- net_device_ctx->start_remove = false;
-
INIT_DELAYED_WORK(&net_device_ctx->dwork, netvsc_link_change);
- INIT_WORK(&net_device_ctx->work, do_set_multicast);
spin_lock_init(&net_device_ctx->lock);
INIT_LIST_HEAD(&net_device_ctx->reconfig_events);
net->netdev_ops = &device_ops;
-
- net->hw_features = NETVSC_HW_FEATURES;
- net->features = NETVSC_HW_FEATURES | NETIF_F_HW_VLAN_CTAG_TX;
-
net->ethtool_ops = &ethtool_ops;
SET_NETDEV_DEV(net, &dev->device);
@@ -1387,20 +1527,26 @@ static int netvsc_probe(struct hv_device *dev,
/* Notify the netvsc driver of the new device */
memset(&device_info, 0, sizeof(device_info));
device_info.ring_size = ring_size;
- device_info.max_num_vrss_chns = max_num_vrss_chns;
+ device_info.num_chn = VRSS_CHANNEL_DEFAULT;
ret = rndis_filter_device_add(dev, &device_info);
if (ret != 0) {
netdev_err(net, "unable to add netvsc device (ret %d)\n", ret);
- netvsc_free_netdev(net);
+ free_netdev(net);
hv_set_drvdata(dev, NULL);
return ret;
}
memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN);
+ /* hw_features computed in rndis_filter_device_add */
+ net->features = net->hw_features |
+ NETIF_F_HIGHDMA | NETIF_F_SG |
+ NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
+ net->vlan_features = net->features;
+
+ /* RCU not necessary here, device not registered */
nvdev = net_device_ctx->nvdev;
netif_set_real_num_tx_queues(net, nvdev->num_chn);
netif_set_real_num_rx_queues(net, nvdev->num_chn);
- netif_set_gso_max_size(net, NETVSC_GSO_MAX_SIZE);
/* MTU range: 68 - 1500 or 65521 */
net->min_mtu = NETVSC_MTU_MIN;
@@ -1412,8 +1558,8 @@ static int netvsc_probe(struct hv_device *dev,
ret = register_netdev(net);
if (ret != 0) {
pr_err("Unable to register netdev.\n");
- rndis_filter_device_remove(dev);
- netvsc_free_netdev(net);
+ rndis_filter_device_remove(dev, nvdev);
+ free_netdev(net);
}
return ret;
@@ -1423,7 +1569,6 @@ static int netvsc_remove(struct hv_device *dev)
{
struct net_device *net;
struct net_device_context *ndev_ctx;
- struct netvsc_device *net_device;
net = hv_get_drvdata(dev);
@@ -1433,32 +1578,24 @@ static int netvsc_remove(struct hv_device *dev)
}
ndev_ctx = netdev_priv(net);
- net_device = ndev_ctx->nvdev;
- /* Avoid racing with netvsc_change_mtu()/netvsc_set_channels()
- * removing the device.
- */
- rtnl_lock();
- ndev_ctx->start_remove = true;
- rtnl_unlock();
+ netif_device_detach(net);
cancel_delayed_work_sync(&ndev_ctx->dwork);
- cancel_work_sync(&ndev_ctx->work);
-
- /* Stop outbound asap */
- netif_tx_disable(net);
-
- unregister_netdev(net);
/*
* Call to the vsc driver to let it know that the device is being
- * removed
+ * removed. Also blocks mtu and channel changes.
*/
- rndis_filter_device_remove(dev);
+ rtnl_lock();
+ rndis_filter_device_remove(dev, ndev_ctx->nvdev);
+ rtnl_unlock();
+
+ unregister_netdev(net);
hv_set_drvdata(dev, NULL);
- netvsc_free_netdev(net);
+ free_netdev(net);
return 0;
}
@@ -1498,7 +1635,7 @@ static int netvsc_netdev_event(struct notifier_block *this,
return NOTIFY_DONE;
/* Avoid Vlan dev with same MAC registering as VF */
- if (event_dev->priv_flags & IFF_802_1Q_VLAN)
+ if (is_vlan_dev(event_dev))
return NOTIFY_DONE;
/* Avoid Bonding master dev with same MAC registering as VF */
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 8d90904e0e49..85c00e1c52b6 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -31,6 +31,7 @@
#include "hyperv_net.h"
+static void rndis_set_multicast(struct work_struct *w);
#define RNDIS_EXT_LEN PAGE_SIZE
struct rndis_request {
@@ -57,6 +58,14 @@ struct rndis_request {
u8 request_ext[RNDIS_EXT_LEN];
};
+static const u8 netvsc_hash_key[NETVSC_HASH_KEYLEN] = {
+ 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
+ 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
+ 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
+ 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
+ 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
+};
+
static struct rndis_device *get_rndis_device(void)
{
struct rndis_device *device;
@@ -68,6 +77,7 @@ static struct rndis_device *get_rndis_device(void)
spin_lock_init(&device->request_lock);
INIT_LIST_HEAD(&device->req_list);
+ INIT_WORK(&device->mcast_work, rndis_set_multicast);
device->state = RNDIS_DEV_UNINITIALIZED;
@@ -124,7 +134,7 @@ static void put_rndis_request(struct rndis_device *dev,
}
static void dump_rndis_message(struct hv_device *hv_dev,
- struct rndis_message *rndis_msg)
+ const struct rndis_message *rndis_msg)
{
struct net_device *netdev = hv_get_drvdata(hv_dev);
@@ -339,102 +349,78 @@ static inline void *rndis_get_ppi(struct rndis_packet *rpkt, u32 type)
return NULL;
}
-static int rndis_filter_receive_data(struct rndis_device *dev,
- struct rndis_message *msg,
- struct hv_netvsc_packet *pkt,
- void **data,
- struct vmbus_channel *channel)
+static int rndis_filter_receive_data(struct net_device *ndev,
+ struct rndis_device *dev,
+ struct rndis_message *msg,
+ struct vmbus_channel *channel,
+ void *data, u32 data_buflen)
{
- struct rndis_packet *rndis_pkt;
+ struct rndis_packet *rndis_pkt = &msg->msg.pkt;
+ const struct ndis_tcp_ip_checksum_info *csum_info;
+ const struct ndis_pkt_8021q_info *vlan;
u32 data_offset;
- struct ndis_pkt_8021q_info *vlan;
- struct ndis_tcp_ip_checksum_info *csum_info;
- u16 vlan_tci = 0;
- struct net_device_context *net_device_ctx = netdev_priv(dev->ndev);
-
- rndis_pkt = &msg->msg.pkt;
/* Remove the rndis header and pass it back up the stack */
data_offset = RNDIS_HEADER_SIZE + rndis_pkt->data_offset;
- pkt->total_data_buflen -= data_offset;
+ data_buflen -= data_offset;
/*
* Make sure we got a valid RNDIS message, now total_data_buflen
* should be the data packet size plus the trailer padding size
*/
- if (pkt->total_data_buflen < rndis_pkt->data_len) {
+ if (unlikely(data_buflen < rndis_pkt->data_len)) {
netdev_err(dev->ndev, "rndis message buffer "
"overflow detected (got %u, min %u)"
"...dropping this message!\n",
- pkt->total_data_buflen, rndis_pkt->data_len);
+ data_buflen, rndis_pkt->data_len);
return NVSP_STAT_FAIL;
}
+ vlan = rndis_get_ppi(rndis_pkt, IEEE_8021Q_INFO);
+
/*
* Remove the rndis trailer padding from rndis packet message
* rndis_pkt->data_len tell us the real data length, we only copy
* the data packet to the stack, without the rndis trailer padding
*/
- pkt->total_data_buflen = rndis_pkt->data_len;
- *data = (void *)((unsigned long)(*data) + data_offset);
-
- vlan = rndis_get_ppi(rndis_pkt, IEEE_8021Q_INFO);
- if (vlan) {
- vlan_tci = VLAN_TAG_PRESENT | vlan->vlanid |
- (vlan->pri << VLAN_PRIO_SHIFT);
- }
-
+ data = (void *)((unsigned long)data + data_offset);
csum_info = rndis_get_ppi(rndis_pkt, TCPIP_CHKSUM_PKTINFO);
- return netvsc_recv_callback(net_device_ctx->device_ctx, pkt, data,
- csum_info, channel, vlan_tci);
+ return netvsc_recv_callback(ndev, channel,
+ data, rndis_pkt->data_len,
+ csum_info, vlan);
}
-int rndis_filter_receive(struct hv_device *dev,
- struct hv_netvsc_packet *pkt,
- void **data,
- struct vmbus_channel *channel)
+int rndis_filter_receive(struct net_device *ndev,
+ struct netvsc_device *net_dev,
+ struct hv_device *dev,
+ struct vmbus_channel *channel,
+ void *data, u32 buflen)
{
- struct net_device *ndev = hv_get_drvdata(dev);
struct net_device_context *net_device_ctx = netdev_priv(ndev);
- struct netvsc_device *net_dev = net_device_ctx->nvdev;
- struct rndis_device *rndis_dev;
- struct rndis_message *rndis_msg;
- int ret = 0;
-
- if (!net_dev) {
- ret = NVSP_STAT_FAIL;
- goto exit;
- }
+ struct rndis_device *rndis_dev = net_dev->extension;
+ struct rndis_message *rndis_msg = data;
/* Make sure the rndis device state is initialized */
- if (!net_dev->extension) {
- netdev_err(ndev, "got rndis message but no rndis device - "
- "dropping this message!\n");
- ret = NVSP_STAT_FAIL;
- goto exit;
+ if (unlikely(!rndis_dev)) {
+ netif_err(net_device_ctx, rx_err, ndev,
+ "got rndis message but no rndis device!\n");
+ return NVSP_STAT_FAIL;
}
- rndis_dev = (struct rndis_device *)net_dev->extension;
- if (rndis_dev->state == RNDIS_DEV_UNINITIALIZED) {
- netdev_err(ndev, "got rndis message but rndis device "
- "uninitialized...dropping this message!\n");
- ret = NVSP_STAT_FAIL;
- goto exit;
+ if (unlikely(rndis_dev->state == RNDIS_DEV_UNINITIALIZED)) {
+ netif_err(net_device_ctx, rx_err, ndev,
+ "got rndis message uninitialized\n");
+ return NVSP_STAT_FAIL;
}
- rndis_msg = *data;
-
- if (netif_msg_rx_err(net_device_ctx))
+ if (netif_msg_rx_status(net_device_ctx))
dump_rndis_message(dev, rndis_msg);
switch (rndis_msg->ndis_msg_type) {
case RNDIS_MSG_PACKET:
- /* data msg */
- ret = rndis_filter_receive_data(rndis_dev, rndis_msg, pkt,
- data, channel);
- break;
-
+ return rndis_filter_receive_data(ndev, rndis_dev, rndis_msg,
+ channel, data, buflen);
case RNDIS_MSG_INIT_C:
case RNDIS_MSG_QUERY_C:
case RNDIS_MSG_SET_C:
@@ -454,8 +440,7 @@ int rndis_filter_receive(struct hv_device *dev,
break;
}
-exit:
- return ret;
+ return 0;
}
static int rndis_filter_query_device(struct rndis_device *dev, u32 oid,
@@ -485,7 +470,35 @@ static int rndis_filter_query_device(struct rndis_device *dev, u32 oid,
query->info_buflen = 0;
query->dev_vc_handle = 0;
- if (oid == OID_GEN_RECEIVE_SCALE_CAPABILITIES) {
+ if (oid == OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES) {
+ struct net_device_context *ndevctx = netdev_priv(dev->ndev);
+ struct netvsc_device *nvdev = ndevctx->nvdev;
+ struct ndis_offload *hwcaps;
+ u32 nvsp_version = nvdev->nvsp_version;
+ u8 ndis_rev;
+ size_t size;
+
+ if (nvsp_version >= NVSP_PROTOCOL_VERSION_5) {
+ ndis_rev = NDIS_OFFLOAD_PARAMETERS_REVISION_3;
+ size = NDIS_OFFLOAD_SIZE;
+ } else if (nvsp_version >= NVSP_PROTOCOL_VERSION_4) {
+ ndis_rev = NDIS_OFFLOAD_PARAMETERS_REVISION_2;
+ size = NDIS_OFFLOAD_SIZE_6_1;
+ } else {
+ ndis_rev = NDIS_OFFLOAD_PARAMETERS_REVISION_1;
+ size = NDIS_OFFLOAD_SIZE_6_0;
+ }
+
+ request->request_msg.msg_len += size;
+ query->info_buflen = size;
+ hwcaps = (struct ndis_offload *)
+ ((unsigned long)query + query->info_buf_offset);
+
+ hwcaps->header.type = NDIS_OBJECT_TYPE_OFFLOAD;
+ hwcaps->header.revision = ndis_rev;
+ hwcaps->header.size = size;
+
+ } else if (oid == OID_GEN_RECEIVE_SCALE_CAPABILITIES) {
struct ndis_recv_scale_cap *cap;
request->request_msg.msg_len +=
@@ -526,6 +539,44 @@ cleanup:
return ret;
}
+/* Get the hardware offload capabilities */
+static int
+rndis_query_hwcaps(struct rndis_device *dev, struct ndis_offload *caps)
+{
+ u32 caps_len = sizeof(*caps);
+ int ret;
+
+ memset(caps, 0, sizeof(*caps));
+
+ ret = rndis_filter_query_device(dev,
+ OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES,
+ caps, &caps_len);
+ if (ret)
+ return ret;
+
+ if (caps->header.type != NDIS_OBJECT_TYPE_OFFLOAD) {
+ netdev_warn(dev->ndev, "invalid NDIS objtype %#x\n",
+ caps->header.type);
+ return -EINVAL;
+ }
+
+ if (caps->header.revision < NDIS_OFFLOAD_PARAMETERS_REVISION_1) {
+ netdev_warn(dev->ndev, "invalid NDIS objrev %x\n",
+ caps->header.revision);
+ return -EINVAL;
+ }
+
+ if (caps->header.size > caps_len ||
+ caps->header.size < NDIS_OFFLOAD_SIZE_6_0) {
+ netdev_warn(dev->ndev,
+ "invalid NDIS objsize %u, data size %u\n",
+ caps->header.size, caps_len);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int rndis_filter_query_device_mac(struct rndis_device *dev)
{
u32 size = ETH_ALEN;
@@ -663,23 +714,15 @@ cleanup:
return ret;
}
-static const u8 netvsc_hash_key[] = {
- 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
- 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
- 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
- 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
- 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
-};
-#define HASH_KEYLEN ARRAY_SIZE(netvsc_hash_key)
-
-static int rndis_filter_set_rss_param(struct rndis_device *rdev, int num_queue)
+int rndis_filter_set_rss_param(struct rndis_device *rdev,
+ const u8 *rss_key, int num_queue)
{
struct net_device *ndev = rdev->ndev;
struct rndis_request *request;
struct rndis_set_request *set;
struct rndis_set_complete *set_complete;
u32 extlen = sizeof(struct ndis_recv_scale_param) +
- 4*ITAB_NUM + HASH_KEYLEN;
+ 4 * ITAB_NUM + NETVSC_HASH_KEYLEN;
struct ndis_recv_scale_param *rssp;
u32 *itab;
u8 *keyp;
@@ -707,19 +750,18 @@ static int rndis_filter_set_rss_param(struct rndis_device *rdev, int num_queue)
NDIS_HASH_TCP_IPV6;
rssp->indirect_tabsize = 4*ITAB_NUM;
rssp->indirect_taboffset = sizeof(struct ndis_recv_scale_param);
- rssp->hashkey_size = HASH_KEYLEN;
+ rssp->hashkey_size = NETVSC_HASH_KEYLEN;
rssp->kashkey_offset = rssp->indirect_taboffset +
rssp->indirect_tabsize;
/* Set indirection table entries */
itab = (u32 *)(rssp + 1);
for (i = 0; i < ITAB_NUM; i++)
- itab[i] = i % num_queue;
+ itab[i] = rdev->ind_table[i];
/* Set hask key values */
keyp = (u8 *)((unsigned long)rssp + rssp->kashkey_offset);
- for (i = 0; i < HASH_KEYLEN; i++)
- keyp[i] = netvsc_hash_key[i];
+ memcpy(keyp, rss_key, NETVSC_HASH_KEYLEN);
ret = rndis_filter_send_request(rdev, request);
if (ret != 0)
@@ -727,7 +769,9 @@ static int rndis_filter_set_rss_param(struct rndis_device *rdev, int num_queue)
wait_for_completion(&request->wait_event);
set_complete = &request->response_msg.msg.set_complete;
- if (set_complete->status != RNDIS_STATUS_SUCCESS) {
+ if (set_complete->status == RNDIS_STATUS_SUCCESS)
+ memcpy(rdev->rss_key, rss_key, NETVSC_HASH_KEYLEN);
+ else {
netdev_err(ndev, "Fail to set RSS parameters:0x%x\n",
set_complete->status);
ret = -EINVAL;
@@ -773,21 +817,19 @@ static int rndis_filter_query_link_speed(struct rndis_device *dev)
return ret;
}
-int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter)
+static int rndis_filter_set_packet_filter(struct rndis_device *dev,
+ u32 new_filter)
{
struct rndis_request *request;
struct rndis_set_request *set;
- struct rndis_set_complete *set_complete;
- u32 status;
int ret;
request = get_rndis_request(dev, RNDIS_MSG_SET,
RNDIS_MESSAGE_SIZE(struct rndis_set_request) +
sizeof(u32));
- if (!request) {
- ret = -ENOMEM;
- goto cleanup;
- }
+ if (!request)
+ return -ENOMEM;
+
/* Setup the rndis set */
set = &request->request_msg.msg.set_req;
@@ -799,20 +841,36 @@ int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter)
&new_filter, sizeof(u32));
ret = rndis_filter_send_request(dev, request);
- if (ret != 0)
- goto cleanup;
-
- wait_for_completion(&request->wait_event);
+ if (ret == 0)
+ wait_for_completion(&request->wait_event);
- set_complete = &request->response_msg.msg.set_complete;
- status = set_complete->status;
+ put_rndis_request(dev, request);
-cleanup:
- if (request)
- put_rndis_request(dev, request);
return ret;
}
+static void rndis_set_multicast(struct work_struct *w)
+{
+ struct rndis_device *rdev
+ = container_of(w, struct rndis_device, mcast_work);
+
+ if (rdev->ndev->flags & IFF_PROMISC)
+ rndis_filter_set_packet_filter(rdev,
+ NDIS_PACKET_TYPE_PROMISCUOUS);
+ else
+ rndis_filter_set_packet_filter(rdev,
+ NDIS_PACKET_TYPE_BROADCAST |
+ NDIS_PACKET_TYPE_ALL_MULTICAST |
+ NDIS_PACKET_TYPE_DIRECTED);
+}
+
+void rndis_filter_update(struct netvsc_device *nvdev)
+{
+ struct rndis_device *rdev = nvdev->extension;
+
+ schedule_work(&rdev->mcast_work);
+}
+
static int rndis_filter_init_device(struct rndis_device *dev)
{
struct rndis_request *request;
@@ -864,14 +922,29 @@ cleanup:
return ret;
}
+static bool netvsc_device_idle(const struct netvsc_device *nvdev)
+{
+ int i;
+
+ if (atomic_read(&nvdev->num_outstanding_recvs) > 0)
+ return false;
+
+ for (i = 0; i < nvdev->num_chn; i++) {
+ const struct netvsc_channel *nvchan = &nvdev->chan_table[i];
+
+ if (atomic_read(&nvchan->queue_sends) > 0)
+ return false;
+ }
+
+ return true;
+}
+
static void rndis_filter_halt_device(struct rndis_device *dev)
{
struct rndis_request *request;
struct rndis_halt_request *halt;
struct net_device_context *net_device_ctx = netdev_priv(dev->ndev);
struct netvsc_device *nvdev = net_device_ctx->nvdev;
- struct hv_device *hdev = net_device_ctx->device_ctx;
- ulong flags;
/* Attempt to do a rndis device halt */
request = get_rndis_request(dev, RNDIS_MSG_HALT,
@@ -889,14 +962,13 @@ static void rndis_filter_halt_device(struct rndis_device *dev)
dev->state = RNDIS_DEV_UNINITIALIZED;
cleanup:
- spin_lock_irqsave(&hdev->channel->inbound_lock, flags);
nvdev->destroy = true;
- spin_unlock_irqrestore(&hdev->channel->inbound_lock, flags);
+
+ /* Force flag to be ordered before waiting */
+ wmb();
/* Wait for all send completions */
- wait_event(nvdev->wait_drain,
- atomic_read(&nvdev->num_outstanding_sends) == 0 &&
- atomic_read(&nvdev->num_outstanding_recvs) == 0);
+ wait_event(nvdev->wait_drain, netvsc_device_idle(nvdev));
if (request)
put_rndis_request(dev, request);
@@ -926,6 +998,9 @@ static int rndis_filter_close_device(struct rndis_device *dev)
if (dev->state != RNDIS_DEV_DATAINITIALIZED)
return 0;
+ /* Make sure rndis_set_multicast doesn't re-enable filter! */
+ cancel_work_sync(&dev->mcast_work);
+
ret = rndis_filter_set_packet_filter(dev, 0);
if (ret == -ENODEV)
ret = 0;
@@ -942,51 +1017,58 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc)
hv_get_drvdata(new_sc->primary_channel->device_obj);
struct netvsc_device *nvscdev = net_device_to_netvsc_device(ndev);
u16 chn_index = new_sc->offermsg.offer.sub_channel_index;
+ struct netvsc_channel *nvchan;
int ret;
- unsigned long flags;
if (chn_index >= nvscdev->num_chn)
return;
- set_per_channel_state(new_sc, nvscdev->sub_cb_buf + (chn_index - 1) *
- NETVSC_PACKET_SIZE);
+ nvchan = nvscdev->chan_table + chn_index;
+ nvchan->mrc.buf
+ = vzalloc(NETVSC_RECVSLOT_MAX * sizeof(struct recv_comp_data));
- nvscdev->mrc[chn_index].buf = vzalloc(NETVSC_RECVSLOT_MAX *
- sizeof(struct recv_comp_data));
+ if (!nvchan->mrc.buf)
+ return;
+
+ /* Because the device uses NAPI, all the interrupt batching and
+ * control is done via Net softirq, not the channel handling
+ */
+ set_channel_read_mode(new_sc, HV_CALL_ISR);
+
+ /* Set the channel before opening.*/
+ nvchan->channel = new_sc;
+ netif_napi_add(ndev, &nvchan->napi,
+ netvsc_poll, NAPI_POLL_WEIGHT);
ret = vmbus_open(new_sc, nvscdev->ring_size * PAGE_SIZE,
nvscdev->ring_size * PAGE_SIZE, NULL, 0,
- netvsc_channel_cb, new_sc);
-
+ netvsc_channel_cb, nvchan);
if (ret == 0)
- nvscdev->chn_table[chn_index] = new_sc;
+ napi_enable(&nvchan->napi);
+ else
+ netif_napi_del(&nvchan->napi);
- spin_lock_irqsave(&nvscdev->sc_lock, flags);
- nvscdev->num_sc_offered--;
- spin_unlock_irqrestore(&nvscdev->sc_lock, flags);
- if (nvscdev->num_sc_offered == 0)
+ if (refcount_dec_and_test(&nvscdev->sc_offered))
complete(&nvscdev->channel_init_wait);
}
int rndis_filter_device_add(struct hv_device *dev,
- void *additional_info)
+ struct netvsc_device_info *device_info)
{
- int ret;
struct net_device *net = hv_get_drvdata(dev);
struct net_device_context *net_device_ctx = netdev_priv(net);
struct netvsc_device *net_device;
struct rndis_device *rndis_device;
- struct netvsc_device_info *device_info = additional_info;
+ struct ndis_offload hwcaps;
struct ndis_offload_params offloads;
struct nvsp_message *init_packet;
struct ndis_recv_scale_cap rsscap;
u32 rsscap_size = sizeof(struct ndis_recv_scale_cap);
- u32 mtu, size;
- u32 num_rss_qs;
- u32 sc_delta;
+ unsigned int gso_max_size = GSO_MAX_SIZE;
+ u32 mtu, size, num_rss_qs;
const struct cpumask *node_cpu_mask;
u32 num_possible_rss_qs;
- unsigned long flags;
+ int i, ret;
rndis_device = get_rndis_device();
if (!rndis_device)
@@ -997,7 +1079,7 @@ int rndis_filter_device_add(struct hv_device *dev,
* NOTE! Once the channel is created, we may get a receive callback
* (RndisFilterOnReceive()) before this call is completed
*/
- ret = netvsc_device_add(dev, additional_info);
+ ret = netvsc_device_add(dev, device_info);
if (ret != 0) {
kfree(rndis_device);
return ret;
@@ -1008,7 +1090,7 @@ int rndis_filter_device_add(struct hv_device *dev,
net_device->max_chn = 1;
net_device->num_chn = 1;
- spin_lock_init(&net_device->sc_lock);
+ refcount_set(&net_device->sc_offered, 0);
net_device->extension = rndis_device;
rndis_device->ndev = net;
@@ -1016,7 +1098,7 @@ int rndis_filter_device_add(struct hv_device *dev,
/* Send the rndis initialization message */
ret = rndis_filter_init_device(rndis_device);
if (ret != 0) {
- rndis_filter_device_remove(dev);
+ rndis_filter_device_remove(dev, net_device);
return ret;
}
@@ -1031,25 +1113,71 @@ int rndis_filter_device_add(struct hv_device *dev,
/* Get the mac address */
ret = rndis_filter_query_device_mac(rndis_device);
if (ret != 0) {
- rndis_filter_device_remove(dev);
+ rndis_filter_device_remove(dev, net_device);
return ret;
}
memcpy(device_info->mac_adr, rndis_device->hw_mac_adr, ETH_ALEN);
- /* Turn on the offloads; the host supports all of the relevant
- * offloads.
- */
+ /* Find HW offload capabilities */
+ ret = rndis_query_hwcaps(rndis_device, &hwcaps);
+ if (ret != 0) {
+ rndis_filter_device_remove(dev, net_device);
+ return ret;
+ }
+
+ /* A value of zero means "no change"; now turn on what we want. */
memset(&offloads, 0, sizeof(struct ndis_offload_params));
- /* A value of zero means "no change"; now turn on what we
- * want.
- */
- offloads.ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
- offloads.tcp_ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
- offloads.udp_ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
- offloads.tcp_ip_v6_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
- offloads.udp_ip_v6_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
- offloads.lso_v2_ipv4 = NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED;
+
+ /* Linux does not care about IP checksum, always does in kernel */
+ offloads.ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_DISABLED;
+
+ /* Compute tx offload settings based on hw capabilities */
+ net->hw_features = NETIF_F_RXCSUM;
+
+ if ((hwcaps.csum.ip4_txcsum & NDIS_TXCSUM_ALL_TCP4) == NDIS_TXCSUM_ALL_TCP4) {
+ /* Can checksum TCP */
+ net->hw_features |= NETIF_F_IP_CSUM;
+ net_device_ctx->tx_checksum_mask |= TRANSPORT_INFO_IPV4_TCP;
+
+ offloads.tcp_ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
+
+ if (hwcaps.lsov2.ip4_encap & NDIS_OFFLOAD_ENCAP_8023) {
+ offloads.lso_v2_ipv4 = NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED;
+ net->hw_features |= NETIF_F_TSO;
+
+ if (hwcaps.lsov2.ip4_maxsz < gso_max_size)
+ gso_max_size = hwcaps.lsov2.ip4_maxsz;
+ }
+
+ if (hwcaps.csum.ip4_txcsum & NDIS_TXCSUM_CAP_UDP4) {
+ offloads.udp_ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
+ net_device_ctx->tx_checksum_mask |= TRANSPORT_INFO_IPV4_UDP;
+ }
+ }
+
+ if ((hwcaps.csum.ip6_txcsum & NDIS_TXCSUM_ALL_TCP6) == NDIS_TXCSUM_ALL_TCP6) {
+ net->hw_features |= NETIF_F_IPV6_CSUM;
+
+ offloads.tcp_ip_v6_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
+ net_device_ctx->tx_checksum_mask |= TRANSPORT_INFO_IPV6_TCP;
+
+ if ((hwcaps.lsov2.ip6_encap & NDIS_OFFLOAD_ENCAP_8023) &&
+ (hwcaps.lsov2.ip6_opts & NDIS_LSOV2_CAP_IP6) == NDIS_LSOV2_CAP_IP6) {
+ offloads.lso_v2_ipv6 = NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED;
+ net->hw_features |= NETIF_F_TSO6;
+
+ if (hwcaps.lsov2.ip6_maxsz < gso_max_size)
+ gso_max_size = hwcaps.lsov2.ip6_maxsz;
+ }
+
+ if (hwcaps.csum.ip6_txcsum & NDIS_TXCSUM_CAP_UDP6) {
+ offloads.udp_ip_v6_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED;
+ net_device_ctx->tx_checksum_mask |= TRANSPORT_INFO_IPV6_UDP;
+ }
+ }
+
+ netif_set_gso_max_size(net, gso_max_size);
ret = rndis_filter_set_offload_params(net, &offloads);
if (ret)
@@ -1057,11 +1185,9 @@ int rndis_filter_device_add(struct hv_device *dev,
rndis_filter_query_device_link_status(rndis_device);
- device_info->link_state = rndis_device->link_state;
-
netdev_dbg(net, "Device MAC %pM link state %s\n",
rndis_device->hw_mac_adr,
- device_info->link_state ? "down" : "up");
+ rndis_device->link_state ? "down" : "up");
if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5)
return 0;
@@ -1076,37 +1202,30 @@ int rndis_filter_device_add(struct hv_device *dev,
if (ret || rsscap.num_recv_que < 2)
goto out;
- net_device->max_chn = min_t(u32, VRSS_CHANNEL_MAX, rsscap.num_recv_que);
-
- num_rss_qs = min(device_info->max_num_vrss_chns, net_device->max_chn);
-
/*
* We will limit the VRSS channels to the number CPUs in the NUMA node
* the primary channel is currently bound to.
+ *
+ * This also guarantees that num_possible_rss_qs <= num_online_cpus
*/
node_cpu_mask = cpumask_of_node(cpu_to_node(dev->channel->target_cpu));
- num_possible_rss_qs = cpumask_weight(node_cpu_mask);
+ num_possible_rss_qs = min_t(u32, cpumask_weight(node_cpu_mask),
+ rsscap.num_recv_que);
- /* We will use the given number of channels if available. */
- if (device_info->num_chn && device_info->num_chn < net_device->max_chn)
- net_device->num_chn = device_info->num_chn;
- else
- net_device->num_chn = min(num_possible_rss_qs, num_rss_qs);
+ net_device->max_chn = min_t(u32, VRSS_CHANNEL_MAX, num_possible_rss_qs);
- num_rss_qs = net_device->num_chn - 1;
- net_device->num_sc_offered = num_rss_qs;
+ /* We will use the given number of channels if available. */
+ net_device->num_chn = min(net_device->max_chn, device_info->num_chn);
- if (net_device->num_chn == 1)
- goto out;
+ for (i = 0; i < ITAB_NUM; i++)
+ rndis_device->ind_table[i] = ethtool_rxfh_indir_default(i,
+ net_device->num_chn);
- net_device->sub_cb_buf = vzalloc((net_device->num_chn - 1) *
- NETVSC_PACKET_SIZE);
- if (!net_device->sub_cb_buf) {
- net_device->num_chn = 1;
- dev_info(&dev->device, "No memory for subchannels.\n");
- goto out;
- }
+ num_rss_qs = net_device->num_chn - 1;
+ if (num_rss_qs == 0)
+ return 0;
+ refcount_set(&net_device->sc_offered, num_rss_qs);
vmbus_set_sc_create_callback(dev->channel, netvsc_sc_open);
init_packet = &net_device->channel_init_pkt;
@@ -1122,51 +1241,37 @@ int rndis_filter_device_add(struct hv_device *dev,
VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
if (ret)
goto out;
- wait_for_completion(&net_device->channel_init_wait);
- if (init_packet->msg.v5_msg.subchn_comp.status !=
- NVSP_STAT_SUCCESS) {
+ if (init_packet->msg.v5_msg.subchn_comp.status != NVSP_STAT_SUCCESS) {
ret = -ENODEV;
goto out;
}
+ wait_for_completion(&net_device->channel_init_wait);
+
net_device->num_chn = 1 +
init_packet->msg.v5_msg.subchn_comp.num_subchannels;
- ret = rndis_filter_set_rss_param(rndis_device, net_device->num_chn);
-
- /*
- * Set the number of sub-channels to be received.
- */
- spin_lock_irqsave(&net_device->sc_lock, flags);
- sc_delta = num_rss_qs - (net_device->num_chn - 1);
- net_device->num_sc_offered -= sc_delta;
- spin_unlock_irqrestore(&net_device->sc_lock, flags);
-
+ /* ignore failues from setting rss parameters, still have channels */
+ rndis_filter_set_rss_param(rndis_device, netvsc_hash_key,
+ net_device->num_chn);
out:
if (ret) {
net_device->max_chn = 1;
net_device->num_chn = 1;
- net_device->num_sc_offered = 0;
}
return 0; /* return 0 because primary channel can be used alone */
err_dev_remv:
- rndis_filter_device_remove(dev);
+ rndis_filter_device_remove(dev, net_device);
return ret;
}
-void rndis_filter_device_remove(struct hv_device *dev)
+void rndis_filter_device_remove(struct hv_device *dev,
+ struct netvsc_device *net_dev)
{
- struct netvsc_device *net_dev = hv_device_to_netvsc_device(dev);
struct rndis_device *rndis_dev = net_dev->extension;
- /* If not all subchannel offers are complete, wait for them until
- * completion to avoid race.
- */
- if (net_dev->num_sc_offered > 0)
- wait_for_completion(&net_dev->channel_init_wait);
-
/* Halt and release the rndis device */
rndis_filter_halt_device(rndis_dev);