From 0b307ebd6834012c483b8ee9801164123a54f79a Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 24 Jan 2017 13:05:58 -0800 Subject: netvsc: remove no longer needed receive staging buffers The ring buffer mapping now handles the wraparound case inside get_next_pkt_raw. Therefore it is not necessary to have an additional special receive staging buffer. See commit 1562edaed8c164ca5199 ("Drivers: hv: ring_buffer: count on wrap around mappings") Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers/net/hyperv/hyperv_net.h') diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 3958adade7eb..cce70ceba6d5 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -748,11 +748,6 @@ struct netvsc_device { int ring_size; - /* The primary channel callback buffer */ - unsigned char *cb_buffer; - /* The sub channel callback buffer */ - unsigned char *sub_cb_buf; - struct multi_send_data msd[VRSS_CHANNEL_MAX]; u32 max_pkt; /* max number of pkt in one send, e.g. 8 */ u32 pkt_align; /* alignment bytes, e.g. 8 */ -- cgit From 23312a3be999c22ec70c0fdf9f01cdee05fac986 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 24 Jan 2017 13:05:59 -0800 Subject: netvsc: negotiate checksum and segmentation parameters Redo how Hyper-V network driver negotiates offload features. Query the host to determine offload settings, and use the result. Also: * disable IPv4 header checksum offload (not used by Linux) * enable TSO only if host supports * enable UDP checksum offload if supported * don't advertise support for checksumming of non-IP protocols * adjust GSO maximum segment size * enable HIGHDMA Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 149 +++++++++++++++++++++++++++++++++++--- drivers/net/hyperv/netvsc_drv.c | 92 ++++++++++------------- drivers/net/hyperv/rndis_filter.c | 140 +++++++++++++++++++++++++++++++---- 3 files changed, 305 insertions(+), 76 deletions(-) (limited to 'drivers/net/hyperv/hyperv_net.h') diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index cce70ceba6d5..ccf94c16084c 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -34,6 +34,7 @@ #define NDIS_OBJECT_TYPE_RSS_CAPABILITIES 0x88 #define NDIS_OBJECT_TYPE_RSS_PARAMETERS 0x89 +#define NDIS_OBJECT_TYPE_OFFLOAD 0xa7 #define NDIS_RECEIVE_SCALE_CAPABILITIES_REVISION_2 2 #define NDIS_RECEIVE_SCALE_PARAMETERS_REVISION_2 2 @@ -685,6 +686,7 @@ struct net_device_context { struct work_struct work; u32 msg_enable; /* debug level */ + u32 tx_checksum_mask; struct netvsc_stats __percpu *tx_stats; struct netvsc_stats __percpu *rx_stats; @@ -934,7 +936,7 @@ struct ndis_pkt_8021q_info { }; }; -struct ndis_oject_header { +struct ndis_object_header { u8 type; u8 revision; u16 size; @@ -942,6 +944,9 @@ struct ndis_oject_header { #define NDIS_OBJECT_TYPE_DEFAULT 0x80 #define NDIS_OFFLOAD_PARAMETERS_REVISION_3 3 +#define NDIS_OFFLOAD_PARAMETERS_REVISION_2 2 +#define NDIS_OFFLOAD_PARAMETERS_REVISION_1 1 + #define NDIS_OFFLOAD_PARAMETERS_NO_CHANGE 0 #define NDIS_OFFLOAD_PARAMETERS_LSOV2_DISABLED 1 #define NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED 2 @@ -968,8 +973,135 @@ struct ndis_oject_header { #define OID_TCP_CONNECTION_OFFLOAD_HARDWARE_CAPABILITIES 0xFC01020F /* query */ #define OID_OFFLOAD_ENCAPSULATION 0x0101010A /* set/query */ +/* + * OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES + * ndis_type: NDIS_OBJTYPE_OFFLOAD + */ + +#define NDIS_OFFLOAD_ENCAP_NONE 0x0000 +#define NDIS_OFFLOAD_ENCAP_NULL 0x0001 +#define NDIS_OFFLOAD_ENCAP_8023 0x0002 +#define NDIS_OFFLOAD_ENCAP_8023PQ 0x0004 +#define NDIS_OFFLOAD_ENCAP_8023PQ_OOB 0x0008 +#define NDIS_OFFLOAD_ENCAP_RFC1483 0x0010 + +struct ndis_csum_offload { + u32 ip4_txenc; + u32 ip4_txcsum; +#define NDIS_TXCSUM_CAP_IP4OPT 0x001 +#define NDIS_TXCSUM_CAP_TCP4OPT 0x004 +#define NDIS_TXCSUM_CAP_TCP4 0x010 +#define NDIS_TXCSUM_CAP_UDP4 0x040 +#define NDIS_TXCSUM_CAP_IP4 0x100 + +#define NDIS_TXCSUM_ALL_TCP4 (NDIS_TXCSUM_CAP_TCP4 | NDIS_TXCSUM_CAP_TCP4OPT) + + u32 ip4_rxenc; + u32 ip4_rxcsum; +#define NDIS_RXCSUM_CAP_IP4OPT 0x001 +#define NDIS_RXCSUM_CAP_TCP4OPT 0x004 +#define NDIS_RXCSUM_CAP_TCP4 0x010 +#define NDIS_RXCSUM_CAP_UDP4 0x040 +#define NDIS_RXCSUM_CAP_IP4 0x100 + u32 ip6_txenc; + u32 ip6_txcsum; +#define NDIS_TXCSUM_CAP_IP6EXT 0x001 +#define NDIS_TXCSUM_CAP_TCP6OPT 0x004 +#define NDIS_TXCSUM_CAP_TCP6 0x010 +#define NDIS_TXCSUM_CAP_UDP6 0x040 + u32 ip6_rxenc; + u32 ip6_rxcsum; +#define NDIS_RXCSUM_CAP_IP6EXT 0x001 +#define NDIS_RXCSUM_CAP_TCP6OPT 0x004 +#define NDIS_RXCSUM_CAP_TCP6 0x010 +#define NDIS_RXCSUM_CAP_UDP6 0x040 + +#define NDIS_TXCSUM_ALL_TCP6 (NDIS_TXCSUM_CAP_TCP6 | \ + NDIS_TXCSUM_CAP_TCP6OPT | \ + NDIS_TXCSUM_CAP_IP6EXT) +}; + +struct ndis_lsov1_offload { + u32 encap; + u32 maxsize; + u32 minsegs; + u32 opts; +}; + +struct ndis_ipsecv1_offload { + u32 encap; + u32 ah_esp; + u32 xport_tun; + u32 ip4_opts; + u32 flags; + u32 ip4_ah; + u32 ip4_esp; +}; + +struct ndis_lsov2_offload { + u32 ip4_encap; + u32 ip4_maxsz; + u32 ip4_minsg; + u32 ip6_encap; + u32 ip6_maxsz; + u32 ip6_minsg; + u32 ip6_opts; +#define NDIS_LSOV2_CAP_IP6EXT 0x001 +#define NDIS_LSOV2_CAP_TCP6OPT 0x004 + +#define NDIS_LSOV2_CAP_IP6 (NDIS_LSOV2_CAP_IP6EXT | \ + NDIS_LSOV2_CAP_TCP6OPT) +}; + +struct ndis_ipsecv2_offload { + u32 encap; + u16 ip6; + u16 ip4opt; + u16 ip6ext; + u16 ah; + u16 esp; + u16 ah_esp; + u16 xport; + u16 tun; + u16 xport_tun; + u16 lso; + u16 extseq; + u32 udp_esp; + u32 auth; + u32 crypto; + u32 sa_caps; +}; + +struct ndis_rsc_offload { + u16 ip4; + u16 ip6; +}; + +struct ndis_encap_offload { + u32 flags; + u32 maxhdr; +}; + +struct ndis_offload { + struct ndis_object_header header; + struct ndis_csum_offload csum; + struct ndis_lsov1_offload lsov1; + struct ndis_ipsecv1_offload ipsecv1; + struct ndis_lsov2_offload lsov2; + u32 flags; + /* NDIS >= 6.1 */ + struct ndis_ipsecv2_offload ipsecv2; + /* NDIS >= 6.30 */ + struct ndis_rsc_offload rsc; + struct ndis_encap_offload encap_gre; +}; + +#define NDIS_OFFLOAD_SIZE sizeof(struct ndis_offload) +#define NDIS_OFFLOAD_SIZE_6_0 offsetof(struct ndis_offload, ipsecv2) +#define NDIS_OFFLOAD_SIZE_6_1 offsetof(struct ndis_offload, rsc) + struct ndis_offload_params { - struct ndis_oject_header header; + struct ndis_object_header header; u8 ip_v4_csum; u8 tcp_ip_v4_csum; u8 udp_ip_v4_csum; @@ -1296,15 +1428,10 @@ struct rndis_message { #define NDIS_PACKET_TYPE_FUNCTIONAL 0x00000400 #define NDIS_PACKET_TYPE_MAC_FRAME 0x00000800 -#define INFO_IPV4 2 -#define INFO_IPV6 4 -#define INFO_TCP 2 -#define INFO_UDP 4 - #define TRANSPORT_INFO_NOT_IP 0 -#define TRANSPORT_INFO_IPV4_TCP ((INFO_IPV4 << 16) | INFO_TCP) -#define TRANSPORT_INFO_IPV4_UDP ((INFO_IPV4 << 16) | INFO_UDP) -#define TRANSPORT_INFO_IPV6_TCP ((INFO_IPV6 << 16) | INFO_TCP) -#define TRANSPORT_INFO_IPV6_UDP ((INFO_IPV6 << 16) | INFO_UDP) +#define TRANSPORT_INFO_IPV4_TCP 0x01 +#define TRANSPORT_INFO_IPV4_UDP 0x02 +#define TRANSPORT_INFO_IPV6_TCP 0x10 +#define TRANSPORT_INFO_IPV6_UDP 0x20 #endif /* _HYPERV_NET_H */ diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index eebeb9378bac..f26d9d2d475f 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -42,14 +42,6 @@ #define RING_SIZE_MIN 64 #define LINKCHANGE_INT (2 * HZ) -#define NETVSC_HW_FEATURES (NETIF_F_RXCSUM | \ - NETIF_F_SG | \ - NETIF_F_TSO | \ - NETIF_F_TSO6 | \ - NETIF_F_HW_CSUM) - -/* Restrict GSO size to account for NVGRE */ -#define NETVSC_GSO_MAX_SIZE 62768 static int ring_size = 128; module_param(ring_size, int, S_IRUGO); @@ -323,33 +315,25 @@ static int netvsc_get_slots(struct sk_buff *skb) return slots + frag_slots; } -static u32 get_net_transport_info(struct sk_buff *skb, u32 *trans_off) +static u32 net_checksum_info(struct sk_buff *skb) { - u32 ret_val = TRANSPORT_INFO_NOT_IP; - - if ((eth_hdr(skb)->h_proto != htons(ETH_P_IP)) && - (eth_hdr(skb)->h_proto != htons(ETH_P_IPV6))) { - goto not_ip; - } - - *trans_off = skb_transport_offset(skb); + if (skb->protocol == htons(ETH_P_IP)) { + struct iphdr *ip = ip_hdr(skb); - if ((eth_hdr(skb)->h_proto == htons(ETH_P_IP))) { - struct iphdr *iphdr = ip_hdr(skb); - - if (iphdr->protocol == IPPROTO_TCP) - ret_val = TRANSPORT_INFO_IPV4_TCP; - else if (iphdr->protocol == IPPROTO_UDP) - ret_val = TRANSPORT_INFO_IPV4_UDP; + if (ip->protocol == IPPROTO_TCP) + return TRANSPORT_INFO_IPV4_TCP; + else if (ip->protocol == IPPROTO_UDP) + return TRANSPORT_INFO_IPV4_UDP; } else { - if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP) - ret_val = TRANSPORT_INFO_IPV6_TCP; + struct ipv6hdr *ip6 = ipv6_hdr(skb); + + if (ip6->nexthdr == IPPROTO_TCP) + return TRANSPORT_INFO_IPV6_TCP; else if (ipv6_hdr(skb)->nexthdr == IPPROTO_UDP) - ret_val = TRANSPORT_INFO_IPV6_UDP; + return TRANSPORT_INFO_IPV6_UDP; } -not_ip: - return ret_val; + return TRANSPORT_INFO_NOT_IP; } static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) @@ -362,9 +346,6 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) struct rndis_packet *rndis_pkt; u32 rndis_msg_size; struct rndis_per_packet_info *ppi; - struct ndis_tcp_ip_checksum_info *csum_info; - int hdr_offset; - u32 net_trans_info; u32 hash; u32 skb_length; struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT]; @@ -445,13 +426,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) VLAN_PRIO_SHIFT; } - net_trans_info = get_net_transport_info(skb, &hdr_offset); - - /* - * Setup the sendside checksum offload only if this is not a - * GSO packet. - */ - if ((net_trans_info & (INFO_TCP | INFO_UDP)) && skb_is_gso(skb)) { + if (skb_is_gso(skb)) { struct ndis_tcp_lso_info *lso_info; rndis_msg_size += NDIS_LSO_PPI_SIZE; @@ -462,7 +437,7 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) ppi->ppi_offset); lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; - if (net_trans_info & (INFO_IPV4 << 16)) { + if (skb->protocol == htons(ETH_P_IP)) { lso_info->lso_v2_transmit.ip_version = NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4; ip_hdr(skb)->tot_len = 0; @@ -478,10 +453,12 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); } - lso_info->lso_v2_transmit.tcp_header_offset = hdr_offset; + lso_info->lso_v2_transmit.tcp_header_offset = skb_transport_offset(skb); lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { - if (net_trans_info & INFO_TCP) { + if (net_checksum_info(skb) & net_device_ctx->tx_checksum_mask) { + struct ndis_tcp_ip_checksum_info *csum_info; + rndis_msg_size += NDIS_CSUM_PPI_SIZE; ppi = init_ppi_data(rndis_msg, NDIS_CSUM_PPI_SIZE, TCPIP_CHKSUM_PKTINFO); @@ -489,15 +466,25 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) csum_info = (struct ndis_tcp_ip_checksum_info *)((void *)ppi + ppi->ppi_offset); - if (net_trans_info & (INFO_IPV4 << 16)) + csum_info->transmit.tcp_header_offset = skb_transport_offset(skb); + + if (skb->protocol == htons(ETH_P_IP)) { csum_info->transmit.is_ipv4 = 1; - else + + if (ip_hdr(skb)->protocol == IPPROTO_TCP) + csum_info->transmit.tcp_checksum = 1; + else + csum_info->transmit.udp_checksum = 1; + } else { csum_info->transmit.is_ipv6 = 1; - csum_info->transmit.tcp_checksum = 1; - csum_info->transmit.tcp_header_offset = hdr_offset; + if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP) + csum_info->transmit.tcp_checksum = 1; + else + csum_info->transmit.udp_checksum = 1; + } } else { - /* UDP checksum (and other) offload is not supported. */ + /* Can't do offload of this type of checksum */ if (skb_checksum_help(skb)) goto drop; } @@ -1372,10 +1359,6 @@ static int netvsc_probe(struct hv_device *dev, INIT_LIST_HEAD(&net_device_ctx->reconfig_events); net->netdev_ops = &device_ops; - - net->hw_features = NETVSC_HW_FEATURES; - net->features = NETVSC_HW_FEATURES | NETIF_F_HW_VLAN_CTAG_TX; - net->ethtool_ops = ðtool_ops; SET_NETDEV_DEV(net, &dev->device); @@ -1395,10 +1378,15 @@ static int netvsc_probe(struct hv_device *dev, } memcpy(net->dev_addr, device_info.mac_adr, ETH_ALEN); + /* hw_features computed in rndis_filter_device_add */ + net->features = net->hw_features | + NETIF_F_HIGHDMA | NETIF_F_SG | + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; + net->vlan_features = net->features; + nvdev = net_device_ctx->nvdev; netif_set_real_num_tx_queues(net, nvdev->num_chn); netif_set_real_num_rx_queues(net, nvdev->num_chn); - netif_set_gso_max_size(net, NETVSC_GSO_MAX_SIZE); /* MTU range: 68 - 1500 or 65521 */ net->min_mtu = NETVSC_MTU_MIN; diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 113c7f4d1590..8f04be1d7421 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -485,7 +485,35 @@ static int rndis_filter_query_device(struct rndis_device *dev, u32 oid, query->info_buflen = 0; query->dev_vc_handle = 0; - if (oid == OID_GEN_RECEIVE_SCALE_CAPABILITIES) { + if (oid == OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES) { + struct net_device_context *ndevctx = netdev_priv(dev->ndev); + struct netvsc_device *nvdev = ndevctx->nvdev; + struct ndis_offload *hwcaps; + u32 nvsp_version = nvdev->nvsp_version; + u8 ndis_rev; + size_t size; + + if (nvsp_version >= NVSP_PROTOCOL_VERSION_5) { + ndis_rev = NDIS_OFFLOAD_PARAMETERS_REVISION_3; + size = NDIS_OFFLOAD_SIZE; + } else if (nvsp_version >= NVSP_PROTOCOL_VERSION_4) { + ndis_rev = NDIS_OFFLOAD_PARAMETERS_REVISION_2; + size = NDIS_OFFLOAD_SIZE_6_1; + } else { + ndis_rev = NDIS_OFFLOAD_PARAMETERS_REVISION_1; + size = NDIS_OFFLOAD_SIZE_6_0; + } + + request->request_msg.msg_len += size; + query->info_buflen = size; + hwcaps = (struct ndis_offload *) + ((unsigned long)query + query->info_buf_offset); + + hwcaps->header.type = NDIS_OBJECT_TYPE_OFFLOAD; + hwcaps->header.revision = ndis_rev; + hwcaps->header.size = size; + + } else if (oid == OID_GEN_RECEIVE_SCALE_CAPABILITIES) { struct ndis_recv_scale_cap *cap; request->request_msg.msg_len += @@ -526,6 +554,44 @@ cleanup: return ret; } +/* Get the hardware offload capabilities */ +static int +rndis_query_hwcaps(struct rndis_device *dev, struct ndis_offload *caps) +{ + u32 caps_len = sizeof(*caps); + int ret; + + memset(caps, 0, sizeof(*caps)); + + ret = rndis_filter_query_device(dev, + OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES, + caps, &caps_len); + if (ret) + return ret; + + if (caps->header.type != NDIS_OBJECT_TYPE_OFFLOAD) { + netdev_warn(dev->ndev, "invalid NDIS objtype %#x\n", + caps->header.type); + return -EINVAL; + } + + if (caps->header.revision < NDIS_OFFLOAD_PARAMETERS_REVISION_1) { + netdev_warn(dev->ndev, "invalid NDIS objrev %x\n", + caps->header.revision); + return -EINVAL; + } + + if (caps->header.size > caps_len || + caps->header.size < NDIS_OFFLOAD_SIZE_6_0) { + netdev_warn(dev->ndev, + "invalid NDIS objsize %u, data size %u\n", + caps->header.size, caps_len); + return -EINVAL; + } + + return 0; +} + static int rndis_filter_query_device_mac(struct rndis_device *dev) { u32 size = ETH_ALEN; @@ -974,10 +1040,12 @@ int rndis_filter_device_add(struct hv_device *dev, struct netvsc_device *net_device; struct rndis_device *rndis_device; struct netvsc_device_info *device_info = additional_info; + struct ndis_offload hwcaps; struct ndis_offload_params offloads; struct nvsp_message *init_packet; struct ndis_recv_scale_cap rsscap; u32 rsscap_size = sizeof(struct ndis_recv_scale_cap); + unsigned int gso_max_size = GSO_MAX_SIZE; u32 mtu, size; u32 num_rss_qs; u32 sc_delta; @@ -1034,19 +1102,65 @@ int rndis_filter_device_add(struct hv_device *dev, memcpy(device_info->mac_adr, rndis_device->hw_mac_adr, ETH_ALEN); - /* Turn on the offloads; the host supports all of the relevant - * offloads. - */ + /* Find HW offload capabilities */ + ret = rndis_query_hwcaps(rndis_device, &hwcaps); + if (ret != 0) { + rndis_filter_device_remove(dev); + return ret; + } + + /* A value of zero means "no change"; now turn on what we want. */ memset(&offloads, 0, sizeof(struct ndis_offload_params)); - /* A value of zero means "no change"; now turn on what we - * want. - */ - offloads.ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; - offloads.tcp_ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; - offloads.udp_ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; - offloads.tcp_ip_v6_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; - offloads.udp_ip_v6_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; - offloads.lso_v2_ipv4 = NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED; + + /* Linux does not care about IP checksum, always does in kernel */ + offloads.ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_DISABLED; + + /* Compute tx offload settings based on hw capabilities */ + net->hw_features = NETIF_F_RXCSUM; + + if ((hwcaps.csum.ip4_txcsum & NDIS_TXCSUM_ALL_TCP4) == NDIS_TXCSUM_ALL_TCP4) { + /* Can checksum TCP */ + net->hw_features |= NETIF_F_IP_CSUM; + net_device_ctx->tx_checksum_mask |= TRANSPORT_INFO_IPV4_TCP; + + offloads.tcp_ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; + + if (hwcaps.lsov2.ip4_encap & NDIS_OFFLOAD_ENCAP_8023) { + offloads.lso_v2_ipv4 = NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED; + net->hw_features |= NETIF_F_TSO; + + if (hwcaps.lsov2.ip4_maxsz < gso_max_size) + gso_max_size = hwcaps.lsov2.ip4_maxsz; + } + + if (hwcaps.csum.ip4_txcsum & NDIS_TXCSUM_CAP_UDP4) { + offloads.udp_ip_v4_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; + net_device_ctx->tx_checksum_mask |= TRANSPORT_INFO_IPV4_UDP; + } + } + + if ((hwcaps.csum.ip6_txcsum & NDIS_TXCSUM_ALL_TCP6) == NDIS_TXCSUM_ALL_TCP6) { + net->hw_features |= NETIF_F_IPV6_CSUM; + + offloads.tcp_ip_v6_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; + net_device_ctx->tx_checksum_mask |= TRANSPORT_INFO_IPV6_TCP; + + if ((hwcaps.lsov2.ip6_encap & NDIS_OFFLOAD_ENCAP_8023) && + (hwcaps.lsov2.ip6_opts & NDIS_LSOV2_CAP_IP6) == NDIS_LSOV2_CAP_IP6) { + offloads.lso_v2_ipv6 = NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED; + net->hw_features |= NETIF_F_TSO6; + + if (hwcaps.lsov2.ip6_maxsz < gso_max_size) + gso_max_size = hwcaps.lsov2.ip6_maxsz; + } + + if (hwcaps.csum.ip6_txcsum & NDIS_TXCSUM_CAP_UDP6) { + offloads.udp_ip_v6_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; + net_device_ctx->tx_checksum_mask |= TRANSPORT_INFO_IPV6_UDP; + } + } + + netif_set_gso_max_size(net, gso_max_size); ret = rndis_filter_set_offload_params(net, &offloads); if (ret) -- cgit From 962f3fee83a4ef9010ae84dc43ae7aecb572e2a9 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 24 Jan 2017 13:06:02 -0800 Subject: netvsc: add ethtool ops to get/set RSS key For some cases it is useful to be able to change RSS key value. For example, replacing RSS key with a symmetric hash. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 7 +++++- drivers/net/hyperv/netvsc_drv.c | 46 +++++++++++++++++++++++++++++++++++++++ drivers/net/hyperv/rndis_filter.c | 34 +++++++++++++++-------------- 3 files changed, 70 insertions(+), 17 deletions(-) (limited to 'drivers/net/hyperv/hyperv_net.h') diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index ccf94c16084c..5bf21418bbe5 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -156,6 +156,8 @@ enum rndis_device_state { RNDIS_DEV_DATAINITIALIZED, }; +#define NETVSC_HASH_KEYLEN 40 + struct rndis_device { struct net_device *ndev; @@ -166,7 +168,8 @@ struct rndis_device { spinlock_t request_lock; struct list_head req_list; - unsigned char hw_mac_adr[ETH_ALEN]; + u8 hw_mac_adr[ETH_ALEN]; + u8 rss_key[NETVSC_HASH_KEYLEN]; }; @@ -194,6 +197,8 @@ int rndis_filter_close(struct netvsc_device *nvdev); int rndis_filter_device_add(struct hv_device *dev, void *additional_info); void rndis_filter_device_remove(struct hv_device *dev); +int rndis_filter_set_rss_param(struct rndis_device *rdev, + const u8 *key, int num_queue); int rndis_filter_receive(struct hv_device *dev, struct hv_netvsc_packet *pkt, void **data, diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 6ab8b52af965..40a88387a8f5 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1059,6 +1059,48 @@ static void netvsc_poll_controller(struct net_device *net) } #endif +static u32 netvsc_get_rxfh_key_size(struct net_device *dev) +{ + return NETVSC_HASH_KEYLEN; +} + +static u32 netvsc_rss_indir_size(struct net_device *dev) +{ + return 0; +} + +static int netvsc_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, + u8 *hfunc) +{ + struct net_device_context *ndc = netdev_priv(dev); + struct netvsc_device *ndev = ndc->nvdev; + struct rndis_device *rndis_dev = ndev->extension; + + if (hfunc) + *hfunc = ETH_RSS_HASH_TOP; /* Toeplitz */ + + if (key) + memcpy(key, rndis_dev->rss_key, NETVSC_HASH_KEYLEN); + + return 0; +} + +static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir, + const u8 *key, const u8 hfunc) +{ + struct net_device_context *ndc = netdev_priv(dev); + struct netvsc_device *ndev = ndc->nvdev; + struct rndis_device *rndis_dev = ndev->extension; + + if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) + return -EOPNOTSUPP; + + if (!key || memcmp(key, rndis_dev->rss_key, NETVSC_HASH_KEYLEN) == 0) + return 0; /* no change */ + + return rndis_filter_set_rss_param(rndis_dev, key, ndev->num_chn); +} + static const struct ethtool_ops ethtool_ops = { .get_drvinfo = netvsc_get_drvinfo, .get_link = ethtool_op_get_link, @@ -1071,6 +1113,10 @@ static const struct ethtool_ops ethtool_ops = { .get_settings = netvsc_get_settings, .set_settings = netvsc_set_settings, .get_rxnfc = netvsc_get_rxnfc, + .get_rxfh_key_size = netvsc_get_rxfh_key_size, + .get_rxfh_indir_size = netvsc_rss_indir_size, + .get_rxfh = netvsc_get_rxfh, + .set_rxfh = netvsc_set_rxfh, }; static const struct net_device_ops device_ops = { diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 8f04be1d7421..ac08aa1b089a 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -57,6 +57,14 @@ struct rndis_request { u8 request_ext[RNDIS_EXT_LEN]; }; +static const u8 netvsc_hash_key[NETVSC_HASH_KEYLEN] = { + 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, + 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, + 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, + 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, + 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa +}; + static struct rndis_device *get_rndis_device(void) { struct rndis_device *device; @@ -729,23 +737,15 @@ cleanup: return ret; } -static const u8 netvsc_hash_key[] = { - 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, - 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, - 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, - 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, - 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa -}; -#define HASH_KEYLEN ARRAY_SIZE(netvsc_hash_key) - -static int rndis_filter_set_rss_param(struct rndis_device *rdev, int num_queue) +int rndis_filter_set_rss_param(struct rndis_device *rdev, + const u8 *rss_key, int num_queue) { struct net_device *ndev = rdev->ndev; struct rndis_request *request; struct rndis_set_request *set; struct rndis_set_complete *set_complete; u32 extlen = sizeof(struct ndis_recv_scale_param) + - 4*ITAB_NUM + HASH_KEYLEN; + 4 * ITAB_NUM + NETVSC_HASH_KEYLEN; struct ndis_recv_scale_param *rssp; u32 *itab; u8 *keyp; @@ -773,7 +773,7 @@ static int rndis_filter_set_rss_param(struct rndis_device *rdev, int num_queue) NDIS_HASH_TCP_IPV6; rssp->indirect_tabsize = 4*ITAB_NUM; rssp->indirect_taboffset = sizeof(struct ndis_recv_scale_param); - rssp->hashkey_size = HASH_KEYLEN; + rssp->hashkey_size = NETVSC_HASH_KEYLEN; rssp->kashkey_offset = rssp->indirect_taboffset + rssp->indirect_tabsize; @@ -784,8 +784,7 @@ static int rndis_filter_set_rss_param(struct rndis_device *rdev, int num_queue) /* Set hask key values */ keyp = (u8 *)((unsigned long)rssp + rssp->kashkey_offset); - for (i = 0; i < HASH_KEYLEN; i++) - keyp[i] = netvsc_hash_key[i]; + memcpy(keyp, rss_key, NETVSC_HASH_KEYLEN); ret = rndis_filter_send_request(rdev, request); if (ret != 0) @@ -793,7 +792,9 @@ static int rndis_filter_set_rss_param(struct rndis_device *rdev, int num_queue) wait_for_completion(&request->wait_event); set_complete = &request->response_msg.msg.set_complete; - if (set_complete->status != RNDIS_STATUS_SUCCESS) { + if (set_complete->status == RNDIS_STATUS_SUCCESS) + memcpy(rdev->rss_key, rss_key, NETVSC_HASH_KEYLEN); + else { netdev_err(ndev, "Fail to set RSS parameters:0x%x\n", set_complete->status); ret = -EINVAL; @@ -1235,7 +1236,8 @@ int rndis_filter_device_add(struct hv_device *dev, net_device->num_chn = 1 + init_packet->msg.v5_msg.subchn_comp.num_subchannels; - ret = rndis_filter_set_rss_param(rndis_device, net_device->num_chn); + ret = rndis_filter_set_rss_param(rndis_device, netvsc_hash_key, + net_device->num_chn); /* * Set the number of sub-channels to be received. -- cgit From 2b01888d1b453096b5b13c0d4b73e630411198b4 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 24 Jan 2017 13:06:03 -0800 Subject: netvsc: allow more flexible setting of number of channels This allows for number of channels to be managed in a manner similar to existing hardware drivers. It also removes the restriction of maximum 8 channels and allows as many as the host will allow. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 1 + drivers/net/hyperv/netvsc_drv.c | 128 +++++++++++++++------------------------- 2 files changed, 50 insertions(+), 79 deletions(-) (limited to 'drivers/net/hyperv/hyperv_net.h') diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 5bf21418bbe5..5a652eb8a619 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -628,6 +628,7 @@ struct nvsp_message { #define VRSS_SEND_TAB_SIZE 16 #define VRSS_CHANNEL_MAX 64 +#define VRSS_CHANNEL_DEFAULT 8 #define RNDIS_MAX_PKT_DEFAULT 8 #define RNDIS_PKT_ALIGN_DEFAULT 8 diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 40a88387a8f5..ead472150742 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -47,8 +47,6 @@ static int ring_size = 128; module_param(ring_size, int, S_IRUGO); MODULE_PARM_DESC(ring_size, "Ring buffer size (# of pages)"); -static int max_num_vrss_chns = 8; - static const u32 default_msg = NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN | NETIF_MSG_RX_ERR | @@ -709,102 +707,76 @@ static void netvsc_get_channels(struct net_device *net, } } +static int netvsc_set_queues(struct net_device *net, struct hv_device *dev, + u32 num_chn) +{ + struct netvsc_device_info device_info; + int ret; + + memset(&device_info, 0, sizeof(device_info)); + device_info.num_chn = num_chn; + device_info.ring_size = ring_size; + device_info.max_num_vrss_chns = num_chn; + + ret = rndis_filter_device_add(dev, &device_info); + if (ret) + return ret; + + ret = netif_set_real_num_tx_queues(net, num_chn); + if (ret) + return ret; + + ret = netif_set_real_num_rx_queues(net, num_chn); + + return ret; +} + static int netvsc_set_channels(struct net_device *net, struct ethtool_channels *channels) { struct net_device_context *net_device_ctx = netdev_priv(net); struct hv_device *dev = net_device_ctx->device_ctx; struct netvsc_device *nvdev = net_device_ctx->nvdev; - struct netvsc_device_info device_info; - u32 num_chn; - u32 max_chn; - int ret = 0; - bool recovering = false; + unsigned int count = channels->combined_count; + int ret; + + /* We do not support separate count for rx, tx, or other */ + if (count == 0 || + channels->rx_count || channels->tx_count || channels->other_count) + return -EINVAL; + + if (count > net->num_tx_queues || count > net->num_rx_queues) + return -EINVAL; if (net_device_ctx->start_remove || !nvdev || nvdev->destroy) return -ENODEV; - num_chn = nvdev->num_chn; - max_chn = min_t(u32, nvdev->max_chn, num_online_cpus()); - - if (nvdev->nvsp_version < NVSP_PROTOCOL_VERSION_5) { - pr_info("vRSS unsupported before NVSP Version 5\n"); + if (nvdev->nvsp_version < NVSP_PROTOCOL_VERSION_5) return -EINVAL; - } - /* We do not support rx, tx, or other */ - if (!channels || - channels->rx_count || - channels->tx_count || - channels->other_count || - (channels->combined_count < 1)) + if (count > nvdev->max_chn) return -EINVAL; - if (channels->combined_count > max_chn) { - pr_info("combined channels too high, using %d\n", max_chn); - channels->combined_count = max_chn; - } - ret = netvsc_close(net); if (ret) - goto out; + return ret; - do_set: net_device_ctx->start_remove = true; rndis_filter_device_remove(dev); - nvdev->num_chn = channels->combined_count; - - memset(&device_info, 0, sizeof(device_info)); - device_info.num_chn = nvdev->num_chn; /* passed to RNDIS */ - device_info.ring_size = ring_size; - device_info.max_num_vrss_chns = max_num_vrss_chns; - - ret = rndis_filter_device_add(dev, &device_info); - if (ret) { - if (recovering) { - netdev_err(net, "unable to add netvsc device (ret %d)\n", ret); - return ret; - } - goto recover; - } - - nvdev = net_device_ctx->nvdev; - - ret = netif_set_real_num_tx_queues(net, nvdev->num_chn); - if (ret) { - if (recovering) { - netdev_err(net, "could not set tx queue count (ret %d)\n", ret); - return ret; - } - goto recover; - } - - ret = netif_set_real_num_rx_queues(net, nvdev->num_chn); - if (ret) { - if (recovering) { - netdev_err(net, "could not set rx queue count (ret %d)\n", ret); - return ret; - } - goto recover; - } + ret = netvsc_set_queues(net, dev, count); + if (ret == 0) + nvdev->num_chn = count; + else + netvsc_set_queues(net, dev, nvdev->num_chn); - out: netvsc_open(net); net_device_ctx->start_remove = false; + /* We may have missed link change notifications */ schedule_delayed_work(&net_device_ctx->dwork, 0); return ret; - - recover: - /* If the above failed, we attempt to recover through the same - * process but with the original number of channels. - */ - netdev_err(net, "could not set channels, recovering\n"); - recovering = true; - channels->combined_count = num_chn; - goto do_set; } static bool netvsc_validate_ethtool_ss_cmd(const struct ethtool_cmd *cmd) @@ -865,8 +837,7 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) struct netvsc_device *nvdev = ndevctx->nvdev; struct hv_device *hdev = ndevctx->device_ctx; struct netvsc_device_info device_info; - u32 num_chn; - int ret = 0; + int ret; if (ndevctx->start_remove || !nvdev || nvdev->destroy) return -ENODEV; @@ -875,8 +846,6 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) if (ret) goto out; - num_chn = nvdev->num_chn; - ndevctx->start_remove = true; rndis_filter_device_remove(hdev); @@ -884,8 +853,8 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) memset(&device_info, 0, sizeof(device_info)); device_info.ring_size = ring_size; - device_info.num_chn = num_chn; - device_info.max_num_vrss_chns = max_num_vrss_chns; + device_info.num_chn = nvdev->num_chn; + device_info.max_num_vrss_chns = nvdev->num_chn; rndis_filter_device_add(hdev, &device_info); out: @@ -1410,7 +1379,7 @@ static int netvsc_probe(struct hv_device *dev, int ret; net = alloc_etherdev_mq(sizeof(struct net_device_context), - num_online_cpus()); + VRSS_CHANNEL_MAX); if (!net) return -ENOMEM; @@ -1457,7 +1426,8 @@ static int netvsc_probe(struct hv_device *dev, /* Notify the netvsc driver of the new device */ memset(&device_info, 0, sizeof(device_info)); device_info.ring_size = ring_size; - device_info.max_num_vrss_chns = max_num_vrss_chns; + device_info.max_num_vrss_chns = min_t(u32, VRSS_CHANNEL_DEFAULT, + num_online_cpus()); ret = rndis_filter_device_add(dev, &device_info); if (ret != 0) { netdev_err(net, "unable to add netvsc device (ret %d)\n", ret); -- cgit From ff4a44199012ee32839278cb84f82ae32c01dbc9 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 24 Jan 2017 13:06:04 -0800 Subject: netvsc: allow get/set of RSS indirection table Allow setting receive indirection table. Also uses the system standard for initialization. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 1 + drivers/net/hyperv/netvsc_drv.c | 26 +++++++++++++++++++++++--- drivers/net/hyperv/rndis_filter.c | 9 +++++++-- 3 files changed, 31 insertions(+), 5 deletions(-) (limited to 'drivers/net/hyperv/hyperv_net.h') diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 5a652eb8a619..db11f7ab67a8 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -170,6 +170,7 @@ struct rndis_device { u8 hw_mac_adr[ETH_ALEN]; u8 rss_key[NETVSC_HASH_KEYLEN]; + u16 ind_table[ITAB_NUM]; }; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index ead472150742..a09602e59cf5 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -1035,7 +1035,7 @@ static u32 netvsc_get_rxfh_key_size(struct net_device *dev) static u32 netvsc_rss_indir_size(struct net_device *dev) { - return 0; + return ITAB_NUM; } static int netvsc_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, @@ -1044,10 +1044,16 @@ static int netvsc_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, struct net_device_context *ndc = netdev_priv(dev); struct netvsc_device *ndev = ndc->nvdev; struct rndis_device *rndis_dev = ndev->extension; + int i; if (hfunc) *hfunc = ETH_RSS_HASH_TOP; /* Toeplitz */ + if (indir) { + for (i = 0; i < ITAB_NUM; i++) + indir[i] = rndis_dev->ind_table[i]; + } + if (key) memcpy(key, rndis_dev->rss_key, NETVSC_HASH_KEYLEN); @@ -1060,12 +1066,26 @@ static int netvsc_set_rxfh(struct net_device *dev, const u32 *indir, struct net_device_context *ndc = netdev_priv(dev); struct netvsc_device *ndev = ndc->nvdev; struct rndis_device *rndis_dev = ndev->extension; + int i; if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) return -EOPNOTSUPP; - if (!key || memcmp(key, rndis_dev->rss_key, NETVSC_HASH_KEYLEN) == 0) - return 0; /* no change */ + if (indir) { + for (i = 0; i < ITAB_NUM; i++) + if (indir[i] >= dev->num_rx_queues) + return -EINVAL; + + for (i = 0; i < ITAB_NUM; i++) + rndis_dev->ind_table[i] = indir[i]; + } + + if (!key) { + if (!indir) + return 0; + + key = rndis_dev->rss_key; + } return rndis_filter_set_rss_param(rndis_dev, key, ndev->num_chn); } diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index ac08aa1b089a..8d2b078403de 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -780,7 +780,7 @@ int rndis_filter_set_rss_param(struct rndis_device *rdev, /* Set indirection table entries */ itab = (u32 *)(rssp + 1); for (i = 0; i < ITAB_NUM; i++) - itab[i] = i % num_queue; + itab[i] = rdev->ind_table[i]; /* Set hask key values */ keyp = (u8 *)((unsigned long)rssp + rssp->kashkey_offset); @@ -1035,7 +1035,6 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc) int rndis_filter_device_add(struct hv_device *dev, void *additional_info) { - int ret; struct net_device *net = hv_get_drvdata(dev); struct net_device_context *net_device_ctx = netdev_priv(net); struct netvsc_device *net_device; @@ -1053,6 +1052,7 @@ int rndis_filter_device_add(struct hv_device *dev, const struct cpumask *node_cpu_mask; u32 num_possible_rss_qs; unsigned long flags; + int i, ret; rndis_device = get_rndis_device(); if (!rndis_device) @@ -1206,6 +1206,11 @@ int rndis_filter_device_add(struct hv_device *dev, net_device->num_chn = min(num_possible_rss_qs, num_rss_qs); num_rss_qs = net_device->num_chn - 1; + + for (i = 0; i < ITAB_NUM; i++) + rndis_device->ind_table[i] = ethtool_rxfh_indir_default(i, + net_device->num_chn); + net_device->num_sc_offered = num_rss_qs; if (net_device->num_chn == 1) -- cgit From b8b835a89b2f7a7fe681983dfe5c489cb9ad9500 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 24 Jan 2017 13:06:07 -0800 Subject: netvsc: group all per-channel state together Put all the per-channel state together in one data struct. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 14 +++++--- drivers/net/hyperv/netvsc.c | 73 +++++++++++++++++++-------------------- drivers/net/hyperv/netvsc_drv.c | 5 ++- drivers/net/hyperv/rndis_filter.c | 6 ++-- 4 files changed, 51 insertions(+), 47 deletions(-) (limited to 'drivers/net/hyperv/hyperv_net.h') diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index db11f7ab67a8..fb73caad0965 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -714,6 +714,14 @@ struct net_device_context { u32 vf_serial; }; +/* Per channel data */ +struct netvsc_channel { + struct vmbus_channel *channel; + struct multi_send_data msd; + struct multi_recv_comp mrc; + atomic_t queue_sends; +}; + /* Per netvsc device */ struct netvsc_device { u32 nvsp_version; @@ -744,27 +752,25 @@ struct netvsc_device { struct nvsp_message revoke_packet; - struct vmbus_channel *chn_table[VRSS_CHANNEL_MAX]; u32 send_table[VRSS_SEND_TAB_SIZE]; u32 max_chn; u32 num_chn; spinlock_t sc_lock; /* Protects num_sc_offered variable */ u32 num_sc_offered; - atomic_t queue_sends[VRSS_CHANNEL_MAX]; /* Holds rndis device info */ void *extension; int ring_size; - struct multi_send_data msd[VRSS_CHANNEL_MAX]; u32 max_pkt; /* max number of pkt in one send, e.g. 8 */ u32 pkt_align; /* alignment bytes, e.g. 8 */ - struct multi_recv_comp mrc[VRSS_CHANNEL_MAX]; atomic_t num_outstanding_recvs; atomic_t open_cnt; + + struct netvsc_channel chan_table[VRSS_CHANNEL_MAX]; }; static inline struct netvsc_device * diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 5e90c7fb1bd2..4dd2a1f2da11 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -67,8 +67,8 @@ static struct netvsc_device *alloc_net_device(void) if (!net_device) return NULL; - net_device->mrc[0].buf = vzalloc(NETVSC_RECVSLOT_MAX * - sizeof(struct recv_comp_data)); + net_device->chan_table[0].mrc.buf + = vzalloc(NETVSC_RECVSLOT_MAX * sizeof(struct recv_comp_data)); init_waitqueue_head(&net_device->wait_drain); net_device->destroy = false; @@ -85,7 +85,7 @@ static void free_netvsc_device(struct netvsc_device *nvdev) int i; for (i = 0; i < VRSS_CHANNEL_MAX; i++) - vfree(nvdev->mrc[i].buf); + vfree(nvdev->chan_table[i].mrc.buf); kfree(nvdev); } @@ -632,7 +632,9 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device, num_outstanding_sends = atomic_dec_return(&net_device->num_outstanding_sends); - queue_sends = atomic_dec_return(&net_device->queue_sends[q_idx]); + + queue_sends = + atomic_dec_return(&net_device->chan_table[q_idx].queue_sends); if (net_device->destroy && num_outstanding_sends == 0) wake_up(&net_device->wait_drain); @@ -757,9 +759,11 @@ static inline int netvsc_send_pkt( struct sk_buff *skb) { struct nvsp_message nvmsg; - u16 q_idx = packet->q_idx; - struct vmbus_channel *out_channel = net_device->chn_table[q_idx]; + struct netvsc_channel *nvchan + = &net_device->chan_table[packet->q_idx]; + struct vmbus_channel *out_channel = nvchan->channel; struct net_device *ndev = hv_get_drvdata(device); + struct netdev_queue *txq = netdev_get_tx_queue(ndev, packet->q_idx); u64 req_id; int ret; struct hv_page_buffer *pgbuf; @@ -820,22 +824,18 @@ static inline int netvsc_send_pkt( if (ret == 0) { atomic_inc(&net_device->num_outstanding_sends); - atomic_inc(&net_device->queue_sends[q_idx]); + atomic_inc_return(&nvchan->queue_sends); if (ring_avail < RING_AVAIL_PERCENT_LOWATER) { - netif_tx_stop_queue(netdev_get_tx_queue(ndev, q_idx)); + netif_tx_stop_queue(txq); - if (atomic_read(&net_device-> - queue_sends[q_idx]) < 1) - netif_tx_wake_queue(netdev_get_tx_queue( - ndev, q_idx)); + if (atomic_read(&nvchan->queue_sends) < 1) + netif_tx_wake_queue(txq); } } else if (ret == -EAGAIN) { - netif_tx_stop_queue(netdev_get_tx_queue( - ndev, q_idx)); - if (atomic_read(&net_device->queue_sends[q_idx]) < 1) { - netif_tx_wake_queue(netdev_get_tx_queue( - ndev, q_idx)); + netif_tx_stop_queue(txq); + if (atomic_read(&nvchan->queue_sends) < 1) { + netif_tx_wake_queue(txq); ret = -ENOSPC; } } else { @@ -866,8 +866,7 @@ int netvsc_send(struct hv_device *device, { struct netvsc_device *net_device; int ret = 0; - struct vmbus_channel *out_channel; - u16 q_idx = packet->q_idx; + struct netvsc_channel *nvchan; u32 pktlen = packet->total_data_buflen, msd_len = 0; unsigned int section_index = NETVSC_INVALID_INDEX; struct multi_send_data *msdp; @@ -887,8 +886,7 @@ int netvsc_send(struct hv_device *device, if (!net_device->send_section_map) return -EAGAIN; - out_channel = net_device->chn_table[q_idx]; - + nvchan = &net_device->chan_table[packet->q_idx]; packet->send_buf_index = NETVSC_INVALID_INDEX; packet->cp_partial = false; @@ -900,9 +898,8 @@ int netvsc_send(struct hv_device *device, goto send_now; } - msdp = &net_device->msd[q_idx]; - /* batch packets in send buffer if possible */ + msdp = &nvchan->msd; if (msdp->pkt) msd_len = msdp->pkt->total_data_buflen; @@ -1003,8 +1000,9 @@ static int netvsc_send_recv_completion(struct vmbus_channel *channel, static inline void count_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx, u32 *filled, u32 *avail) { - u32 first = nvdev->mrc[q_idx].first; - u32 next = nvdev->mrc[q_idx].next; + struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc; + u32 first = mrc->first; + u32 next = mrc->next; *filled = (first > next) ? NETVSC_RECVSLOT_MAX - first + next : next - first; @@ -1016,26 +1014,26 @@ static inline void count_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx, static inline struct recv_comp_data *read_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx) { + struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc; u32 filled, avail; - if (!nvdev->mrc[q_idx].buf) + if (unlikely(!mrc->buf)) return NULL; count_recv_comp_slot(nvdev, q_idx, &filled, &avail); if (!filled) return NULL; - return nvdev->mrc[q_idx].buf + nvdev->mrc[q_idx].first * - sizeof(struct recv_comp_data); + return mrc->buf + mrc->first * sizeof(struct recv_comp_data); } /* Put the first filled slot back to available pool */ static inline void put_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx) { + struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc; int num_recv; - nvdev->mrc[q_idx].first = (nvdev->mrc[q_idx].first + 1) % - NETVSC_RECVSLOT_MAX; + mrc->first = (mrc->first + 1) % NETVSC_RECVSLOT_MAX; num_recv = atomic_dec_return(&nvdev->num_outstanding_recvs); @@ -1070,13 +1068,14 @@ static void netvsc_chk_recv_comp(struct netvsc_device *nvdev, static inline struct recv_comp_data *get_recv_comp_slot( struct netvsc_device *nvdev, struct vmbus_channel *channel, u16 q_idx) { + struct multi_recv_comp *mrc = &nvdev->chan_table[q_idx].mrc; u32 filled, avail, next; struct recv_comp_data *rcd; - if (!nvdev->recv_section) + if (unlikely(!nvdev->recv_section)) return NULL; - if (!nvdev->mrc[q_idx].buf) + if (unlikely(!mrc->buf)) return NULL; if (atomic_read(&nvdev->num_outstanding_recvs) > @@ -1087,9 +1086,9 @@ static inline struct recv_comp_data *get_recv_comp_slot( if (!avail) return NULL; - next = nvdev->mrc[q_idx].next; - rcd = nvdev->mrc[q_idx].buf + next * sizeof(struct recv_comp_data); - nvdev->mrc[q_idx].next = (next + 1) % NETVSC_RECVSLOT_MAX; + next = mrc->next; + rcd = mrc->buf + next * sizeof(struct recv_comp_data); + mrc->next = (next + 1) % NETVSC_RECVSLOT_MAX; atomic_inc(&nvdev->num_outstanding_recvs); @@ -1159,7 +1158,7 @@ static void netvsc_receive(struct netvsc_device *net_device, channel); } - if (!net_device->mrc[q_idx].buf) { + if (!net_device->chan_table[q_idx].mrc.buf) { ret = netvsc_send_recv_completion(channel, vmxferpage_packet->d.trans_id, status); @@ -1333,7 +1332,7 @@ int netvsc_device_add(struct hv_device *device, void *additional_info) * opened. */ for (i = 0; i < VRSS_CHANNEL_MAX; i++) - net_device->chn_table[i] = device->channel; + net_device->chan_table[i].channel = device->channel; /* Writing nvdev pointer unlocks netvsc_send(), make sure chn_table is * populated. diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index f96b44eee270..da5863641703 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -135,7 +135,7 @@ static int netvsc_close(struct net_device *net) while (true) { aread = 0; for (i = 0; i < nvdev->num_chn; i++) { - chn = nvdev->chn_table[i]; + chn = nvdev->chan_table[i].channel; if (!chn) continue; @@ -225,7 +225,7 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, q_idx = new_idx; } - if (unlikely(!nvsc_dev->chn_table[q_idx])) + if (unlikely(!nvsc_dev->chan_table[q_idx].channel)) q_idx = 0; return q_idx; @@ -545,7 +545,6 @@ no_memory: ++net_device_ctx->eth_stats.tx_no_memory; goto drop; } - /* * netvsc_linkstatus_callback - Link up/down notification */ diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index fc98d2a16ddd..0df02c9808e8 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -1012,15 +1012,15 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc) if (chn_index >= nvscdev->num_chn) return; - nvscdev->mrc[chn_index].buf = vzalloc(NETVSC_RECVSLOT_MAX * - sizeof(struct recv_comp_data)); + nvscdev->chan_table[chn_index].mrc.buf + = vzalloc(NETVSC_RECVSLOT_MAX * sizeof(struct recv_comp_data)); ret = vmbus_open(new_sc, nvscdev->ring_size * PAGE_SIZE, nvscdev->ring_size * PAGE_SIZE, NULL, 0, netvsc_channel_cb, new_sc); if (ret == 0) - nvscdev->chn_table[chn_index] = new_sc; + nvscdev->chan_table[chn_index].channel = new_sc; spin_lock_irqsave(&nvscdev->sc_lock, flags); nvscdev->num_sc_offered--; -- cgit From dc54a08cd3620e6457382c0cd0c8f03513dd749a Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 24 Jan 2017 13:06:08 -0800 Subject: netvsc: optimize receive path Do manual optimizations of receive path: - remove checks for impossible conditions (but keep checks for bad data from host) - pass argument down, rather than having callee recompute what is already known - remove indirection about receive buffer datalength - remove dependence on VLAN_TAG_PRESENCE - use _hot/_cold and likely/unlikely Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 21 +++++---- drivers/net/hyperv/netvsc.c | 74 ++++++++++++----------------- drivers/net/hyperv/netvsc_drv.c | 32 ++++++------- drivers/net/hyperv/rndis_filter.c | 99 +++++++++++++++------------------------ 4 files changed, 93 insertions(+), 133 deletions(-) (limited to 'drivers/net/hyperv/hyperv_net.h') diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index fb73caad0965..4b91e37c85ca 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -119,6 +119,7 @@ struct ndis_recv_scale_param { /* NDIS_RECEIVE_SCALE_PARAMETERS */ /* Fwd declaration */ struct ndis_tcp_ip_checksum_info; +struct ndis_pkt_8021q_info; /* * Represent netvsc packet which contains 1 RNDIS and 1 ethernet frame @@ -186,12 +187,11 @@ int netvsc_send(struct hv_device *device, struct sk_buff *skb); void netvsc_linkstatus_callback(struct hv_device *device_obj, struct rndis_message *resp); -int netvsc_recv_callback(struct hv_device *device_obj, - struct hv_netvsc_packet *packet, - void **data, - struct ndis_tcp_ip_checksum_info *csum_info, - struct vmbus_channel *channel, - u16 vlan_tci); +int netvsc_recv_callback(struct net_device *net, + struct vmbus_channel *channel, + void *data, u32 len, + const struct ndis_tcp_ip_checksum_info *csum_info, + const struct ndis_pkt_8021q_info *vlan); void netvsc_channel_cb(void *context); int rndis_filter_open(struct netvsc_device *nvdev); int rndis_filter_close(struct netvsc_device *nvdev); @@ -200,10 +200,11 @@ int rndis_filter_device_add(struct hv_device *dev, void rndis_filter_device_remove(struct hv_device *dev); int rndis_filter_set_rss_param(struct rndis_device *rdev, const u8 *key, int num_queue); -int rndis_filter_receive(struct hv_device *dev, - struct hv_netvsc_packet *pkt, - void **data, - struct vmbus_channel *channel); +int rndis_filter_receive(struct net_device *ndev, + struct netvsc_device *net_dev, + struct hv_device *dev, + struct vmbus_channel *channel, + void *data, u32 buflen); int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter); int rndis_filter_set_device_mac(struct net_device *ndev, char *mac); diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 4dd2a1f2da11..80eecb4f9456 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -1095,51 +1095,35 @@ static inline struct recv_comp_data *get_recv_comp_slot( return rcd; } -static void netvsc_receive(struct netvsc_device *net_device, - struct vmbus_channel *channel, - struct hv_device *device, - struct vmpacket_descriptor *packet) +static void netvsc_receive(struct net_device *ndev, + struct netvsc_device *net_device, + struct net_device_context *net_device_ctx, + struct hv_device *device, + struct vmbus_channel *channel, + struct vmtransfer_page_packet_header *vmxferpage_packet, + struct nvsp_message *nvsp) { - struct vmtransfer_page_packet_header *vmxferpage_packet; - struct nvsp_message *nvsp_packet; - struct hv_netvsc_packet nv_pkt; - struct hv_netvsc_packet *netvsc_packet = &nv_pkt; + char *recv_buf = net_device->recv_buf; u32 status = NVSP_STAT_SUCCESS; int i; int count = 0; - struct net_device *ndev = hv_get_drvdata(device); - void *data; int ret; struct recv_comp_data *rcd; u16 q_idx = channel->offermsg.offer.sub_channel_index; - /* - * All inbound packets other than send completion should be xfer page - * packet - */ - if (packet->type != VM_PKT_DATA_USING_XFER_PAGES) { - netdev_err(ndev, "Unknown packet type received - %d\n", - packet->type); - return; - } - - nvsp_packet = (struct nvsp_message *)((unsigned long)packet + - (packet->offset8 << 3)); - /* Make sure this is a valid nvsp packet */ - if (nvsp_packet->hdr.msg_type != - NVSP_MSG1_TYPE_SEND_RNDIS_PKT) { - netdev_err(ndev, "Unknown nvsp packet type received-" - " %d\n", nvsp_packet->hdr.msg_type); + if (unlikely(nvsp->hdr.msg_type != NVSP_MSG1_TYPE_SEND_RNDIS_PKT)) { + netif_err(net_device_ctx, rx_err, ndev, + "Unknown nvsp packet type received %u\n", + nvsp->hdr.msg_type); return; } - vmxferpage_packet = (struct vmtransfer_page_packet_header *)packet; - - if (vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID) { - netdev_err(ndev, "Invalid xfer page set id - " - "expecting %x got %x\n", NETVSC_RECEIVE_BUFFER_ID, - vmxferpage_packet->xfer_pageset_id); + if (unlikely(vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID)) { + netif_err(net_device_ctx, rx_err, ndev, + "Invalid xfer page set id - expecting %x got %x\n", + NETVSC_RECEIVE_BUFFER_ID, + vmxferpage_packet->xfer_pageset_id); return; } @@ -1147,15 +1131,13 @@ static void netvsc_receive(struct netvsc_device *net_device, /* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */ for (i = 0; i < count; i++) { - /* Initialize the netvsc packet */ - data = (void *)((unsigned long)net_device-> - recv_buf + vmxferpage_packet->ranges[i].byte_offset); - netvsc_packet->total_data_buflen = - vmxferpage_packet->ranges[i].byte_count; + void *data = recv_buf + + vmxferpage_packet->ranges[i].byte_offset; + u32 buflen = vmxferpage_packet->ranges[i].byte_count; /* Pass it to the upper layer */ - status = rndis_filter_receive(device, netvsc_packet, &data, - channel); + status = rndis_filter_receive(ndev, net_device, device, + channel, data, buflen); } if (!net_device->chan_table[q_idx].mrc.buf) { @@ -1234,11 +1216,10 @@ static void netvsc_process_raw_pkt(struct hv_device *device, u64 request_id, struct vmpacket_descriptor *desc) { - struct nvsp_message *nvmsg; struct net_device_context *net_device_ctx = netdev_priv(ndev); - - nvmsg = (struct nvsp_message *)((unsigned long) - desc + (desc->offset8 << 3)); + struct nvsp_message *nvmsg + = (struct nvsp_message *)((unsigned long)desc + + (desc->offset8 << 3)); switch (desc->type) { case VM_PKT_COMP: @@ -1246,7 +1227,10 @@ static void netvsc_process_raw_pkt(struct hv_device *device, break; case VM_PKT_DATA_USING_XFER_PAGES: - netvsc_receive(net_device, channel, device, desc); + netvsc_receive(ndev, net_device, net_device_ctx, + device, channel, + (struct vmtransfer_page_packet_header *)desc, + nvmsg); break; case VM_PKT_DATA_INBAND: diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index da5863641703..4bc1fdbc8cd7 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -596,13 +596,13 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj, } static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net, - struct hv_netvsc_packet *packet, - struct ndis_tcp_ip_checksum_info *csum_info, - void *data, u16 vlan_tci) + const struct ndis_tcp_ip_checksum_info *csum_info, + const struct ndis_pkt_8021q_info *vlan, + void *data, u32 buflen) { struct sk_buff *skb; - skb = netdev_alloc_skb_ip_align(net, packet->total_data_buflen); + skb = netdev_alloc_skb_ip_align(net, buflen); if (!skb) return skb; @@ -610,8 +610,7 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net, * Copy to skb. This copy is needed here since the memory pointed by * hv_netvsc_packet cannot be deallocated */ - memcpy(skb_put(skb, packet->total_data_buflen), data, - packet->total_data_buflen); + memcpy(skb_put(skb, buflen), data, buflen); skb->protocol = eth_type_trans(skb, net); @@ -628,9 +627,12 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net, skb->ip_summed = CHECKSUM_UNNECESSARY; } - if (vlan_tci & VLAN_TAG_PRESENT) + if (vlan) { + u16 vlan_tci = vlan->vlanid | (vlan->pri << VLAN_PRIO_SHIFT); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci); + } return skb; } @@ -639,14 +641,12 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net, * netvsc_recv_callback - Callback when we receive a packet from the * "wire" on the specified device. */ -int netvsc_recv_callback(struct hv_device *device_obj, - struct hv_netvsc_packet *packet, - void **data, - struct ndis_tcp_ip_checksum_info *csum_info, - struct vmbus_channel *channel, - u16 vlan_tci) +int netvsc_recv_callback(struct net_device *net, + struct vmbus_channel *channel, + void *data, u32 len, + const struct ndis_tcp_ip_checksum_info *csum_info, + const struct ndis_pkt_8021q_info *vlan) { - struct net_device *net = hv_get_drvdata(device_obj); struct net_device_context *net_device_ctx = netdev_priv(net); struct net_device *vf_netdev; struct sk_buff *skb; @@ -668,7 +668,7 @@ int netvsc_recv_callback(struct hv_device *device_obj, net = vf_netdev; /* Allocate a skb - TODO direct I/O to pages? */ - skb = netvsc_alloc_recv_skb(net, packet, csum_info, *data, vlan_tci); + skb = netvsc_alloc_recv_skb(net, csum_info, vlan, data, len); if (unlikely(!skb)) { ++net->stats.rx_dropped; rcu_read_unlock(); @@ -687,7 +687,7 @@ int netvsc_recv_callback(struct hv_device *device_obj, rx_stats = this_cpu_ptr(net_device_ctx->rx_stats); u64_stats_update_begin(&rx_stats->syncp); rx_stats->packets++; - rx_stats->bytes += packet->total_data_buflen; + rx_stats->bytes += len; if (skb->pkt_type == PACKET_BROADCAST) ++rx_stats->broadcast; diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 0df02c9808e8..decce4f8fda8 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -132,7 +132,7 @@ static void put_rndis_request(struct rndis_device *dev, } static void dump_rndis_message(struct hv_device *hv_dev, - struct rndis_message *rndis_msg) + const struct rndis_message *rndis_msg) { struct net_device *netdev = hv_get_drvdata(hv_dev); @@ -347,102 +347,78 @@ static inline void *rndis_get_ppi(struct rndis_packet *rpkt, u32 type) return NULL; } -static int rndis_filter_receive_data(struct rndis_device *dev, - struct rndis_message *msg, - struct hv_netvsc_packet *pkt, - void **data, - struct vmbus_channel *channel) +static int rndis_filter_receive_data(struct net_device *ndev, + struct rndis_device *dev, + struct rndis_message *msg, + struct vmbus_channel *channel, + void *data, u32 data_buflen) { - struct rndis_packet *rndis_pkt; + struct rndis_packet *rndis_pkt = &msg->msg.pkt; + const struct ndis_tcp_ip_checksum_info *csum_info; + const struct ndis_pkt_8021q_info *vlan; u32 data_offset; - struct ndis_pkt_8021q_info *vlan; - struct ndis_tcp_ip_checksum_info *csum_info; - u16 vlan_tci = 0; - struct net_device_context *net_device_ctx = netdev_priv(dev->ndev); - - rndis_pkt = &msg->msg.pkt; /* Remove the rndis header and pass it back up the stack */ data_offset = RNDIS_HEADER_SIZE + rndis_pkt->data_offset; - pkt->total_data_buflen -= data_offset; + data_buflen -= data_offset; /* * Make sure we got a valid RNDIS message, now total_data_buflen * should be the data packet size plus the trailer padding size */ - if (pkt->total_data_buflen < rndis_pkt->data_len) { + if (unlikely(data_buflen < rndis_pkt->data_len)) { netdev_err(dev->ndev, "rndis message buffer " "overflow detected (got %u, min %u)" "...dropping this message!\n", - pkt->total_data_buflen, rndis_pkt->data_len); + data_buflen, rndis_pkt->data_len); return NVSP_STAT_FAIL; } + vlan = rndis_get_ppi(rndis_pkt, IEEE_8021Q_INFO); + /* * Remove the rndis trailer padding from rndis packet message * rndis_pkt->data_len tell us the real data length, we only copy * the data packet to the stack, without the rndis trailer padding */ - pkt->total_data_buflen = rndis_pkt->data_len; - *data = (void *)((unsigned long)(*data) + data_offset); - - vlan = rndis_get_ppi(rndis_pkt, IEEE_8021Q_INFO); - if (vlan) { - vlan_tci = VLAN_TAG_PRESENT | vlan->vlanid | - (vlan->pri << VLAN_PRIO_SHIFT); - } - + data = (void *)((unsigned long)data + data_offset); csum_info = rndis_get_ppi(rndis_pkt, TCPIP_CHKSUM_PKTINFO); - return netvsc_recv_callback(net_device_ctx->device_ctx, pkt, data, - csum_info, channel, vlan_tci); + return netvsc_recv_callback(ndev, channel, + data, rndis_pkt->data_len, + csum_info, vlan); } -int rndis_filter_receive(struct hv_device *dev, - struct hv_netvsc_packet *pkt, - void **data, - struct vmbus_channel *channel) +int rndis_filter_receive(struct net_device *ndev, + struct netvsc_device *net_dev, + struct hv_device *dev, + struct vmbus_channel *channel, + void *data, u32 buflen) { - struct net_device *ndev = hv_get_drvdata(dev); struct net_device_context *net_device_ctx = netdev_priv(ndev); - struct netvsc_device *net_dev = net_device_ctx->nvdev; - struct rndis_device *rndis_dev; - struct rndis_message *rndis_msg; - int ret = 0; - - if (!net_dev) { - ret = NVSP_STAT_FAIL; - goto exit; - } + struct rndis_device *rndis_dev = net_dev->extension; + struct rndis_message *rndis_msg = data; /* Make sure the rndis device state is initialized */ - if (!net_dev->extension) { - netdev_err(ndev, "got rndis message but no rndis device - " - "dropping this message!\n"); - ret = NVSP_STAT_FAIL; - goto exit; + if (unlikely(!rndis_dev)) { + netif_err(net_device_ctx, rx_err, ndev, + "got rndis message but no rndis device!\n"); + return NVSP_STAT_FAIL; } - rndis_dev = (struct rndis_device *)net_dev->extension; - if (rndis_dev->state == RNDIS_DEV_UNINITIALIZED) { - netdev_err(ndev, "got rndis message but rndis device " - "uninitialized...dropping this message!\n"); - ret = NVSP_STAT_FAIL; - goto exit; + if (unlikely(rndis_dev->state == RNDIS_DEV_UNINITIALIZED)) { + netif_err(net_device_ctx, rx_err, ndev, + "got rndis message uninitialized\n"); + return NVSP_STAT_FAIL; } - rndis_msg = *data; - - if (netif_msg_rx_err(net_device_ctx)) + if (netif_msg_rx_status(net_device_ctx)) dump_rndis_message(dev, rndis_msg); switch (rndis_msg->ndis_msg_type) { case RNDIS_MSG_PACKET: - /* data msg */ - ret = rndis_filter_receive_data(rndis_dev, rndis_msg, pkt, - data, channel); - break; - + return rndis_filter_receive_data(ndev, rndis_dev, rndis_msg, + channel, data, buflen); case RNDIS_MSG_INIT_C: case RNDIS_MSG_QUERY_C: case RNDIS_MSG_SET_C: @@ -462,8 +438,7 @@ int rndis_filter_receive(struct hv_device *dev, break; } -exit: - return ret; + return 0; } static int rndis_filter_query_device(struct rndis_device *dev, u32 oid, -- cgit From 2c7f83ca713fa0ac5c4698e4134b09a355f60263 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 24 Jan 2017 13:06:09 -0800 Subject: netvsc: don't pass void * to internal device_add All the caller's/callee's know that the format of the device_add parameter is a netvsc_device_info struct. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 5 +++-- drivers/net/hyperv/netvsc.c | 6 +++--- drivers/net/hyperv/rndis_filter.c | 5 ++--- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'drivers/net/hyperv/hyperv_net.h') diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 4b91e37c85ca..28cbd6a2ecf8 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -178,7 +178,8 @@ struct rndis_device { /* Interface */ struct rndis_message; struct netvsc_device; -int netvsc_device_add(struct hv_device *device, void *additional_info); +int netvsc_device_add(struct hv_device *device, + const struct netvsc_device_info *info); void netvsc_device_remove(struct hv_device *device); int netvsc_send(struct hv_device *device, struct hv_netvsc_packet *packet, @@ -196,7 +197,7 @@ void netvsc_channel_cb(void *context); int rndis_filter_open(struct netvsc_device *nvdev); int rndis_filter_close(struct netvsc_device *nvdev); int rndis_filter_device_add(struct hv_device *dev, - void *additional_info); + struct netvsc_device_info *info); void rndis_filter_device_remove(struct hv_device *dev); int rndis_filter_set_rss_param(struct rndis_device *rdev, const u8 *key, int num_queue); diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 80eecb4f9456..359e7ef7040b 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -1283,11 +1283,11 @@ void netvsc_channel_cb(void *context) * netvsc_device_add - Callback when the device belonging to this * driver is added */ -int netvsc_device_add(struct hv_device *device, void *additional_info) +int netvsc_device_add(struct hv_device *device, + const struct netvsc_device_info *device_info) { int i, ret = 0; - int ring_size = - ((struct netvsc_device_info *)additional_info)->ring_size; + int ring_size = device_info->ring_size; struct netvsc_device *net_device; struct net_device *ndev = hv_get_drvdata(device); struct net_device_context *net_device_ctx = netdev_priv(ndev); diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index decce4f8fda8..e3b29f35366c 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -1005,13 +1005,12 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc) } int rndis_filter_device_add(struct hv_device *dev, - void *additional_info) + struct netvsc_device_info *device_info) { struct net_device *net = hv_get_drvdata(dev); struct net_device_context *net_device_ctx = netdev_priv(net); struct netvsc_device *net_device; struct rndis_device *rndis_device; - struct netvsc_device_info *device_info = additional_info; struct ndis_offload hwcaps; struct ndis_offload_params offloads; struct nvsp_message *init_packet; @@ -1035,7 +1034,7 @@ int rndis_filter_device_add(struct hv_device *dev, * NOTE! Once the channel is created, we may get a receive callback * (RndisFilterOnReceive()) before this call is completed */ - ret = netvsc_device_add(dev, additional_info); + ret = netvsc_device_add(dev, device_info); if (ret != 0) { kfree(rndis_device); return ret; -- cgit From 2289f0aa706e5160e078f73c32fcbfb56a3ff1e2 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 24 Jan 2017 13:06:10 -0800 Subject: netvsc: simplify rndis_filter_remove All caller's already have pointer to netvsc_device so pass it. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 3 ++- drivers/net/hyperv/netvsc_drv.c | 8 ++++---- drivers/net/hyperv/rndis_filter.c | 12 ++++++------ 3 files changed, 12 insertions(+), 11 deletions(-) (limited to 'drivers/net/hyperv/hyperv_net.h') diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 28cbd6a2ecf8..757205c9cb93 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -198,7 +198,8 @@ int rndis_filter_open(struct netvsc_device *nvdev); int rndis_filter_close(struct netvsc_device *nvdev); int rndis_filter_device_add(struct hv_device *dev, struct netvsc_device_info *info); -void rndis_filter_device_remove(struct hv_device *dev); +void rndis_filter_device_remove(struct hv_device *dev, + struct netvsc_device *nvdev); int rndis_filter_set_rss_param(struct rndis_device *rdev, const u8 *key, int num_queue); int rndis_filter_receive(struct net_device *ndev, diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 4bc1fdbc8cd7..11755783c2f6 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -780,7 +780,7 @@ static int netvsc_set_channels(struct net_device *net, return ret; net_device_ctx->start_remove = true; - rndis_filter_device_remove(dev); + rndis_filter_device_remove(dev, nvdev); ret = netvsc_set_queues(net, dev, count); if (ret == 0) @@ -865,7 +865,7 @@ static int netvsc_change_mtu(struct net_device *ndev, int mtu) goto out; ndevctx->start_remove = true; - rndis_filter_device_remove(hdev); + rndis_filter_device_remove(hdev, nvdev); ndev->mtu = mtu; @@ -1493,7 +1493,7 @@ static int netvsc_probe(struct hv_device *dev, ret = register_netdev(net); if (ret != 0) { pr_err("Unable to register netdev.\n"); - rndis_filter_device_remove(dev); + rndis_filter_device_remove(dev, nvdev); netvsc_free_netdev(net); } @@ -1533,7 +1533,7 @@ static int netvsc_remove(struct hv_device *dev) * Call to the vsc driver to let it know that the device is being * removed */ - rndis_filter_device_remove(dev); + rndis_filter_device_remove(dev, ndev_ctx->nvdev); hv_set_drvdata(dev, NULL); diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index e3b29f35366c..70b099a731a9 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -1053,7 +1053,7 @@ int rndis_filter_device_add(struct hv_device *dev, /* Send the rndis initialization message */ ret = rndis_filter_init_device(rndis_device); if (ret != 0) { - rndis_filter_device_remove(dev); + rndis_filter_device_remove(dev, net_device); return ret; } @@ -1068,7 +1068,7 @@ int rndis_filter_device_add(struct hv_device *dev, /* Get the mac address */ ret = rndis_filter_query_device_mac(rndis_device); if (ret != 0) { - rndis_filter_device_remove(dev); + rndis_filter_device_remove(dev, net_device); return ret; } @@ -1077,7 +1077,7 @@ int rndis_filter_device_add(struct hv_device *dev, /* Find HW offload capabilities */ ret = rndis_query_hwcaps(rndis_device, &hwcaps); if (ret != 0) { - rndis_filter_device_remove(dev); + rndis_filter_device_remove(dev, net_device); return ret; } @@ -1233,13 +1233,13 @@ out: return 0; /* return 0 because primary channel can be used alone */ err_dev_remv: - rndis_filter_device_remove(dev); + rndis_filter_device_remove(dev, net_device); return ret; } -void rndis_filter_device_remove(struct hv_device *dev) +void rndis_filter_device_remove(struct hv_device *dev, + struct netvsc_device *net_dev) { - struct netvsc_device *net_dev = hv_device_to_netvsc_device(dev); struct rndis_device *rndis_dev = net_dev->extension; /* If not all subchannel offers are complete, wait for them until -- cgit From 46b4f7f5d1f7410de48128540ef2d1aab913a619 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 24 Jan 2017 13:06:11 -0800 Subject: netvsc: eliminate per-device outstanding send counter Since now keep track of per-queue outstanding sends, we can avoid one atomic update by removing no longer needed per-device atomic. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 1 - drivers/net/hyperv/netvsc.c | 44 ++++++++++++++------------------------- drivers/net/hyperv/rndis_filter.c | 21 ++++++++++++++++--- 3 files changed, 34 insertions(+), 32 deletions(-) (limited to 'drivers/net/hyperv/hyperv_net.h') diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 757205c9cb93..fec365241f37 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -729,7 +729,6 @@ struct netvsc_channel { struct netvsc_device { u32 nvsp_version; - atomic_t num_outstanding_sends; wait_queue_head_t wait_drain; bool destroy; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 359e7ef7040b..bd055146f098 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -90,29 +90,23 @@ static void free_netvsc_device(struct netvsc_device *nvdev) kfree(nvdev); } -static struct netvsc_device *get_outbound_net_device(struct hv_device *device) -{ - struct netvsc_device *net_device = hv_device_to_netvsc_device(device); - if (net_device && net_device->destroy) - net_device = NULL; +static inline bool netvsc_channel_idle(const struct netvsc_device *net_device, + u16 q_idx) +{ + const struct netvsc_channel *nvchan = &net_device->chan_table[q_idx]; - return net_device; + return atomic_read(&net_device->num_outstanding_recvs) == 0 && + atomic_read(&nvchan->queue_sends) == 0; } -static struct netvsc_device *get_inbound_net_device(struct hv_device *device) +static struct netvsc_device *get_outbound_net_device(struct hv_device *device) { struct netvsc_device *net_device = hv_device_to_netvsc_device(device); - if (!net_device) - goto get_in_err; - - if (net_device->destroy && - atomic_read(&net_device->num_outstanding_sends) == 0 && - atomic_read(&net_device->num_outstanding_recvs) == 0) + if (net_device && net_device->destroy) net_device = NULL; -get_in_err: return net_device; } @@ -612,7 +606,6 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device, struct net_device *ndev = hv_get_drvdata(device); struct net_device_context *net_device_ctx = netdev_priv(ndev); struct vmbus_channel *channel = device->channel; - int num_outstanding_sends; u16 q_idx = 0; int queue_sends; @@ -630,13 +623,10 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device, dev_consume_skb_any(skb); } - num_outstanding_sends = - atomic_dec_return(&net_device->num_outstanding_sends); - queue_sends = atomic_dec_return(&net_device->chan_table[q_idx].queue_sends); - if (net_device->destroy && num_outstanding_sends == 0) + if (net_device->destroy && queue_sends == 0) wake_up(&net_device->wait_drain); if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) && @@ -823,15 +813,10 @@ static inline int netvsc_send_pkt( } if (ret == 0) { - atomic_inc(&net_device->num_outstanding_sends); atomic_inc_return(&nvchan->queue_sends); - if (ring_avail < RING_AVAIL_PERCENT_LOWATER) { + if (ring_avail < RING_AVAIL_PERCENT_LOWATER) netif_tx_stop_queue(txq); - - if (atomic_read(&nvchan->queue_sends) < 1) - netif_tx_wake_queue(txq); - } } else if (ret == -EAGAIN) { netif_tx_stop_queue(txq); if (atomic_read(&nvchan->queue_sends) < 1) { @@ -1259,11 +1244,14 @@ void netvsc_channel_cb(void *context) else device = channel->device_obj; - net_device = get_inbound_net_device(device); - if (!net_device) + ndev = hv_get_drvdata(device); + if (unlikely(!ndev)) return; - ndev = hv_get_drvdata(device); + net_device = net_device_to_netvsc_device(ndev); + if (unlikely(net_device->destroy) && + netvsc_channel_idle(net_device, q_idx)) + return; while ((desc = get_next_pkt_raw(channel)) != NULL) { netvsc_process_raw_pkt(device, channel, net_device, diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 70b099a731a9..19356f56b7b1 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -903,6 +903,23 @@ cleanup: return ret; } +static bool netvsc_device_idle(const struct netvsc_device *nvdev) +{ + int i; + + if (atomic_read(&nvdev->num_outstanding_recvs) > 0) + return false; + + for (i = 0; i < nvdev->num_chn; i++) { + const struct netvsc_channel *nvchan = &nvdev->chan_table[i]; + + if (atomic_read(&nvchan->queue_sends) > 0) + return false; + } + + return true; +} + static void rndis_filter_halt_device(struct rndis_device *dev) { struct rndis_request *request; @@ -933,9 +950,7 @@ cleanup: spin_unlock_irqrestore(&hdev->channel->inbound_lock, flags); /* Wait for all send completions */ - wait_event(nvdev->wait_drain, - atomic_read(&nvdev->num_outstanding_sends) == 0 && - atomic_read(&nvdev->num_outstanding_recvs) == 0); + wait_event(nvdev->wait_drain, netvsc_device_idle(nvdev)); if (request) put_rndis_request(dev, request); -- cgit From 793e39555511bccd73308c41205b72448d0077db Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 24 Jan 2017 13:06:12 -0800 Subject: netvsc: account for packets/bytes transmitted after completion Most drivers do not increment transmit statistics until after the transmit is completed. This will also be necessary for BQL support. Slight additional complexity because the netvsc driver aggregates multiple packets into one transmit. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 4 +++- drivers/net/hyperv/netvsc.c | 19 ++++++++++++++++--- drivers/net/hyperv/netvsc_drv.c | 13 +++---------- 3 files changed, 22 insertions(+), 14 deletions(-) (limited to 'drivers/net/hyperv/hyperv_net.h') diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index fec365241f37..340f64233e2a 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -137,8 +137,10 @@ struct hv_netvsc_packet { u8 page_buf_cnt; u16 q_idx; - u32 send_buf_index; + u16 total_packets; + u32 total_bytes; + u32 send_buf_index; u32 total_data_buflen; }; diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index bd055146f098..397aa1d88a6f 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -611,15 +611,23 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device, /* Notify the layer above us */ if (likely(skb)) { - struct hv_netvsc_packet *nvsc_packet + const struct hv_netvsc_packet *packet = (struct hv_netvsc_packet *)skb->cb; - u32 send_index = nvsc_packet->send_buf_index; + u32 send_index = packet->send_buf_index; + struct netvsc_stats *tx_stats; if (send_index != NETVSC_INVALID_INDEX) netvsc_free_send_slot(net_device, send_index); - q_idx = nvsc_packet->q_idx; + q_idx = packet->q_idx; channel = incoming_channel; + tx_stats = this_cpu_ptr(net_device_ctx->tx_stats); + + u64_stats_update_begin(&tx_stats->syncp); + tx_stats->packets += packet->total_packets; + tx_stats->bytes += packet->total_bytes; + u64_stats_update_end(&tx_stats->syncp); + dev_consume_skb_any(skb); } @@ -924,6 +932,11 @@ int netvsc_send(struct hv_device *device, packet->total_data_buflen += msd_len; } + if (msdp->pkt) { + packet->total_packets += msdp->pkt->total_packets; + packet->total_bytes += msdp->pkt->total_bytes; + } + if (msdp->skb) dev_consume_skb_any(msdp->skb); diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 11755783c2f6..1dd13da79f02 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -364,7 +364,6 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) u32 rndis_msg_size; struct rndis_per_packet_info *ppi; u32 hash; - u32 skb_length; struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT]; struct hv_page_buffer *pb = page_buf; @@ -374,7 +373,6 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) * more pages we try linearizing it. */ - skb_length = skb->len; num_data_pgs = netvsc_get_slots(skb) + 2; if (unlikely(num_data_pgs > MAX_PAGE_BUFFER_COUNT)) { @@ -407,6 +405,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) packet->q_idx = skb_get_queue_mapping(skb); packet->total_data_buflen = skb->len; + packet->total_bytes = skb->len; + packet->total_packets = 1; rndis_msg = (struct rndis_message *)skb->head; @@ -517,15 +517,8 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) skb_tx_timestamp(skb); ret = netvsc_send(net_device_ctx->device_ctx, packet, rndis_msg, &pb, skb); - if (likely(ret == 0)) { - struct netvsc_stats *tx_stats = this_cpu_ptr(net_device_ctx->tx_stats); - - u64_stats_update_begin(&tx_stats->syncp); - tx_stats->packets++; - tx_stats->bytes += skb_length; - u64_stats_update_end(&tx_stats->syncp); + if (likely(ret == 0)) return NETDEV_TX_OK; - } if (ret == -EAGAIN) { ++net_device_ctx->eth_stats.tx_busy; -- cgit From 6c80f3fc2398aef22798e8ac4258454b1062f3fb Mon Sep 17 00:00:00 2001 From: Simon Xiao Date: Tue, 24 Jan 2017 13:06:13 -0800 Subject: netvsc: report per-channel stats in ethtool statistics Report packets and bytes transferred through a vmbus channel via ethtool. This supersedes need for per-cpu statistics. Example: $ ethtool -S eth0 NIC statistics: ... tx_queue_0_packets: 3523179 tx_queue_0_bytes: 505370920 rx_queue_0_packets: 41430490 rx_queue_0_bytes: 62714661254 tx_queue_1_packets: 0 tx_queue_1_bytes: 0 rx_queue_1_packets: 0 rx_queue_1_bytes: 0 ... Reviewed-by: Long Li Reviewed-by: K. Y. Srinivasan Reviewed-by: Haiyang Zhang Signed-off-by: Simon Xiao Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 5 +- drivers/net/hyperv/netvsc.c | 2 +- drivers/net/hyperv/netvsc_drv.c | 143 +++++++++++++++++++++++++--------------- 3 files changed, 93 insertions(+), 57 deletions(-) (limited to 'drivers/net/hyperv/hyperv_net.h') diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 340f64233e2a..d3e73ac158ae 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -699,8 +699,6 @@ struct net_device_context { u32 msg_enable; /* debug level */ u32 tx_checksum_mask; - struct netvsc_stats __percpu *tx_stats; - struct netvsc_stats __percpu *rx_stats; /* Ethtool settings */ u8 duplex; @@ -725,6 +723,9 @@ struct netvsc_channel { struct multi_send_data msd; struct multi_recv_comp mrc; atomic_t queue_sends; + + struct netvsc_stats tx_stats; + struct netvsc_stats rx_stats; }; /* Per netvsc device */ diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 397aa1d88a6f..c70c4ac77b74 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -621,7 +621,7 @@ static void netvsc_send_tx_complete(struct netvsc_device *net_device, q_idx = packet->q_idx; channel = incoming_channel; - tx_stats = this_cpu_ptr(net_device_ctx->tx_stats); + tx_stats = &net_device->chan_table[q_idx].tx_stats; u64_stats_update_begin(&tx_stats->syncp); tx_stats->packets += packet->total_packets; diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 1dd13da79f02..fe0df72532a3 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -641,9 +641,12 @@ int netvsc_recv_callback(struct net_device *net, const struct ndis_pkt_8021q_info *vlan) { struct net_device_context *net_device_ctx = netdev_priv(net); + struct netvsc_device *net_device = net_device_ctx->nvdev; struct net_device *vf_netdev; struct sk_buff *skb; struct netvsc_stats *rx_stats; + u16 q_idx = channel->offermsg.offer.sub_channel_index; + if (net->reg_state != NETREG_REGISTERED) return NVSP_STAT_FAIL; @@ -669,15 +672,14 @@ int netvsc_recv_callback(struct net_device *net, } if (net != vf_netdev) - skb_record_rx_queue(skb, - channel->offermsg.offer.sub_channel_index); + skb_record_rx_queue(skb, q_idx); /* * Even if injecting the packet, record the statistics * on the synthetic device because modifying the VF device * statistics will not work correctly. */ - rx_stats = this_cpu_ptr(net_device_ctx->rx_stats); + rx_stats = &net_device->chan_table[q_idx].rx_stats; u64_stats_update_begin(&rx_stats->syncp); rx_stats->packets++; rx_stats->bytes += len; @@ -882,34 +884,39 @@ static void netvsc_get_stats64(struct net_device *net, struct rtnl_link_stats64 *t) { struct net_device_context *ndev_ctx = netdev_priv(net); - int cpu; - - for_each_possible_cpu(cpu) { - struct netvsc_stats *tx_stats = per_cpu_ptr(ndev_ctx->tx_stats, - cpu); - struct netvsc_stats *rx_stats = per_cpu_ptr(ndev_ctx->rx_stats, - cpu); - u64 tx_packets, tx_bytes, rx_packets, rx_bytes, rx_multicast; + struct netvsc_device *nvdev = ndev_ctx->nvdev; + int i; + + if (!nvdev) + return; + + for (i = 0; i < nvdev->num_chn; i++) { + const struct netvsc_channel *nvchan = &nvdev->chan_table[i]; + const struct netvsc_stats *stats; + u64 packets, bytes, multicast; unsigned int start; + stats = &nvchan->tx_stats; do { - start = u64_stats_fetch_begin_irq(&tx_stats->syncp); - tx_packets = tx_stats->packets; - tx_bytes = tx_stats->bytes; - } while (u64_stats_fetch_retry_irq(&tx_stats->syncp, start)); + start = u64_stats_fetch_begin_irq(&stats->syncp); + packets = stats->packets; + bytes = stats->bytes; + } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + + t->tx_bytes += bytes; + t->tx_packets += packets; + stats = &nvchan->rx_stats; do { - start = u64_stats_fetch_begin_irq(&rx_stats->syncp); - rx_packets = rx_stats->packets; - rx_bytes = rx_stats->bytes; - rx_multicast = rx_stats->multicast + rx_stats->broadcast; - } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start)); - - t->tx_bytes += tx_bytes; - t->tx_packets += tx_packets; - t->rx_bytes += rx_bytes; - t->rx_packets += rx_packets; - t->multicast += rx_multicast; + start = u64_stats_fetch_begin_irq(&stats->syncp); + packets = stats->packets; + bytes = stats->bytes; + multicast = stats->multicast + stats->broadcast; + } while (u64_stats_fetch_retry_irq(&stats->syncp, start)); + + t->rx_bytes += bytes; + t->rx_packets += packets; + t->multicast += multicast; } t->tx_dropped = net->stats.tx_dropped; @@ -954,11 +961,19 @@ static const struct { { "tx_busy", offsetof(struct netvsc_ethtool_stats, tx_busy) }, }; +#define NETVSC_GLOBAL_STATS_LEN ARRAY_SIZE(netvsc_stats) + +/* 4 statistics per queue (rx/tx packets/bytes) */ +#define NETVSC_QUEUE_STATS_LEN(dev) ((dev)->num_chn * 4) + static int netvsc_get_sset_count(struct net_device *dev, int string_set) { + struct net_device_context *ndc = netdev_priv(dev); + struct netvsc_device *nvdev = ndc->nvdev; + switch (string_set) { case ETH_SS_STATS: - return ARRAY_SIZE(netvsc_stats); + return NETVSC_GLOBAL_STATS_LEN + NETVSC_QUEUE_STATS_LEN(nvdev); default: return -EINVAL; } @@ -968,22 +983,63 @@ static void netvsc_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, u64 *data) { struct net_device_context *ndc = netdev_priv(dev); + struct netvsc_device *nvdev = ndc->nvdev; const void *nds = &ndc->eth_stats; - int i; + const struct netvsc_stats *qstats; + unsigned int start; + u64 packets, bytes; + int i, j; - for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++) + for (i = 0; i < NETVSC_GLOBAL_STATS_LEN; i++) data[i] = *(unsigned long *)(nds + netvsc_stats[i].offset); + + for (j = 0; j < nvdev->num_chn; j++) { + qstats = &nvdev->chan_table[j].tx_stats; + + do { + start = u64_stats_fetch_begin_irq(&qstats->syncp); + packets = qstats->packets; + bytes = qstats->bytes; + } while (u64_stats_fetch_retry_irq(&qstats->syncp, start)); + data[i++] = packets; + data[i++] = bytes; + + qstats = &nvdev->chan_table[j].rx_stats; + do { + start = u64_stats_fetch_begin_irq(&qstats->syncp); + packets = qstats->packets; + bytes = qstats->bytes; + } while (u64_stats_fetch_retry_irq(&qstats->syncp, start)); + data[i++] = packets; + data[i++] = bytes; + } } static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data) { + struct net_device_context *ndc = netdev_priv(dev); + struct netvsc_device *nvdev = ndc->nvdev; + u8 *p = data; int i; switch (stringset) { case ETH_SS_STATS: for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++) - memcpy(data + i * ETH_GSTRING_LEN, + memcpy(p + i * ETH_GSTRING_LEN, netvsc_stats[i].name, ETH_GSTRING_LEN); + + p += i * ETH_GSTRING_LEN; + for (i = 0; i < nvdev->num_chn; i++) { + sprintf(p, "tx_queue_%u_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_queue_%u_bytes", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_bytes", i); + p += ETH_GSTRING_LEN; + } + break; } } @@ -1237,15 +1293,6 @@ out_unlock: rtnl_unlock(); } -static void netvsc_free_netdev(struct net_device *netdev) -{ - struct net_device_context *net_device_ctx = netdev_priv(netdev); - - free_percpu(net_device_ctx->tx_stats); - free_percpu(net_device_ctx->rx_stats); - free_netdev(netdev); -} - static struct net_device *get_netvsc_bymac(const u8 *mac) { struct net_device *dev; @@ -1423,18 +1470,6 @@ static int netvsc_probe(struct hv_device *dev, netdev_dbg(net, "netvsc msg_enable: %d\n", net_device_ctx->msg_enable); - net_device_ctx->tx_stats = netdev_alloc_pcpu_stats(struct netvsc_stats); - if (!net_device_ctx->tx_stats) { - free_netdev(net); - return -ENOMEM; - } - net_device_ctx->rx_stats = netdev_alloc_pcpu_stats(struct netvsc_stats); - if (!net_device_ctx->rx_stats) { - free_percpu(net_device_ctx->tx_stats); - free_netdev(net); - return -ENOMEM; - } - hv_set_drvdata(dev, net); net_device_ctx->start_remove = false; @@ -1460,7 +1495,7 @@ static int netvsc_probe(struct hv_device *dev, ret = rndis_filter_device_add(dev, &device_info); if (ret != 0) { netdev_err(net, "unable to add netvsc device (ret %d)\n", ret); - netvsc_free_netdev(net); + free_netdev(net); hv_set_drvdata(dev, NULL); return ret; } @@ -1487,7 +1522,7 @@ static int netvsc_probe(struct hv_device *dev, if (ret != 0) { pr_err("Unable to register netdev.\n"); rndis_filter_device_remove(dev, nvdev); - netvsc_free_netdev(net); + free_netdev(net); } return ret; @@ -1530,7 +1565,7 @@ static int netvsc_remove(struct hv_device *dev) hv_set_drvdata(dev, NULL); - netvsc_free_netdev(net); + free_netdev(net); return 0; } -- cgit From 7ce101246655935b014b11d81f815342921f5654 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Thu, 9 Mar 2017 14:58:29 -0800 Subject: netvsc: handle select_queue when device is being removed Move the send indirection table from the inner device (netvsc) to the network device context. It is possible that netvsc_device is not present (remove in progress). This solves potential use after free issues when packet is being created during MTU change, shutdown, or queue count changes. Fixes: d8e18ee0fa96 ("netvsc: enhance transmit select_queue") Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- drivers/net/hyperv/hyperv_net.h | 3 ++- drivers/net/hyperv/netvsc.c | 8 ++------ drivers/net/hyperv/netvsc_drv.c | 11 +++-------- 3 files changed, 7 insertions(+), 15 deletions(-) (limited to 'drivers/net/hyperv/hyperv_net.h') diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index d3e73ac158ae..f9f3dba7a588 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -700,6 +700,8 @@ struct net_device_context { u32 tx_checksum_mask; + u32 tx_send_table[VRSS_SEND_TAB_SIZE]; + /* Ethtool settings */ u8 duplex; u32 speed; @@ -757,7 +759,6 @@ struct netvsc_device { struct nvsp_message revoke_packet; - u32 send_table[VRSS_SEND_TAB_SIZE]; u32 max_chn; u32 num_chn; spinlock_t sc_lock; /* Protects num_sc_offered variable */ diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index d35ebd993b38..4c1d8cca247b 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -1136,15 +1136,11 @@ static void netvsc_receive(struct net_device *ndev, static void netvsc_send_table(struct hv_device *hdev, struct nvsp_message *nvmsg) { - struct netvsc_device *nvscdev; struct net_device *ndev = hv_get_drvdata(hdev); + struct net_device_context *net_device_ctx = netdev_priv(ndev); int i; u32 count, *tab; - nvscdev = get_outbound_net_device(hdev); - if (!nvscdev) - return; - count = nvmsg->msg.v5_msg.send_table.count; if (count != VRSS_SEND_TAB_SIZE) { netdev_err(ndev, "Received wrong send-table size:%u\n", count); @@ -1155,7 +1151,7 @@ static void netvsc_send_table(struct hv_device *hdev, nvmsg->msg.v5_msg.send_table.offset); for (i = 0; i < count; i++) - nvscdev->send_table[i] = tab[i]; + net_device_ctx->tx_send_table[i] = tab[i]; } static void netvsc_send_vf(struct net_device_context *net_device_ctx, diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index bc05c895d958..5ede87f30463 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -206,17 +206,15 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, void *accel_priv, select_queue_fallback_t fallback) { struct net_device_context *net_device_ctx = netdev_priv(ndev); - struct netvsc_device *nvsc_dev = net_device_ctx->nvdev; + unsigned int num_tx_queues = ndev->real_num_tx_queues; struct sock *sk = skb->sk; int q_idx = sk_tx_queue_get(sk); - if (q_idx < 0 || skb->ooo_okay || - q_idx >= ndev->real_num_tx_queues) { + if (q_idx < 0 || skb->ooo_okay || q_idx >= num_tx_queues) { u16 hash = __skb_tx_hash(ndev, skb, VRSS_SEND_TAB_SIZE); int new_idx; - new_idx = nvsc_dev->send_table[hash] - % nvsc_dev->num_chn; + new_idx = net_device_ctx->tx_send_table[hash] % num_tx_queues; if (q_idx != new_idx && sk && sk_fullsock(sk) && rcu_access_pointer(sk->sk_dst_cache)) @@ -225,9 +223,6 @@ static u16 netvsc_select_queue(struct net_device *ndev, struct sk_buff *skb, q_idx = new_idx; } - if (unlikely(!nvsc_dev->chan_table[q_idx].channel)) - q_idx = 0; - return q_idx; } -- cgit