diff options
Diffstat (limited to 'drivers/net/xen-netback/netback.c')
| -rw-r--r-- | drivers/net/xen-netback/netback.c | 485 |
1 files changed, 367 insertions, 118 deletions
diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index f2d6b78d901d..64828de25d9a 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -47,11 +47,40 @@ #include <asm/xen/hypercall.h> #include <asm/xen/page.h> +/* Provide an option to disable split event channels at load time as + * event channels are limited resource. Split event channels are + * enabled by default. + */ +bool separate_tx_rx_irq = 1; +module_param(separate_tx_rx_irq, bool, 0644); + +/* + * This is the maximum slots a skb can have. If a guest sends a skb + * which exceeds this limit it is considered malicious. + */ +#define FATAL_SKB_SLOTS_DEFAULT 20 +static unsigned int fatal_skb_slots = FATAL_SKB_SLOTS_DEFAULT; +module_param(fatal_skb_slots, uint, 0444); + +/* + * To avoid confusion, we define XEN_NETBK_LEGACY_SLOTS_MAX indicating + * the maximum slots a valid packet can use. Now this value is defined + * to be XEN_NETIF_NR_SLOTS_MIN, which is supposed to be supported by + * all backend. + */ +#define XEN_NETBK_LEGACY_SLOTS_MAX XEN_NETIF_NR_SLOTS_MIN + +typedef unsigned int pending_ring_idx_t; +#define INVALID_PENDING_RING_IDX (~0U) + struct pending_tx_info { - struct xen_netif_tx_request req; + struct xen_netif_tx_request req; /* coalesced tx request */ struct xenvif *vif; + pending_ring_idx_t head; /* head != INVALID_PENDING_RING_IDX + * if it is head of one or more tx + * reqs + */ }; -typedef unsigned int pending_ring_idx_t; struct netbk_rx_meta { int id; @@ -102,7 +131,11 @@ struct xen_netbk { atomic_t netfront_count; struct pending_tx_info pending_tx_info[MAX_PENDING_REQS]; - struct gnttab_copy tx_copy_ops[MAX_PENDING_REQS]; + /* Coalescing tx requests before copying makes number of grant + * copy ops greater or equal to number of slots required. In + * worst case a tx request consumes 2 gnttab_copy. + */ + struct gnttab_copy tx_copy_ops[2*MAX_PENDING_REQS]; u16 pending_ring[MAX_PENDING_REQS]; @@ -118,6 +151,16 @@ struct xen_netbk { static struct xen_netbk *xen_netbk; static int xen_netbk_group_nr; +/* + * If head != INVALID_PENDING_RING_IDX, it means this tx request is head of + * one or more merged tx requests, otherwise it is the continuation of + * previous tx request. + */ +static inline int pending_tx_is_head(struct xen_netbk *netbk, RING_IDX idx) +{ + return netbk->pending_tx_info[idx].head != INVALID_PENDING_RING_IDX; +} + void xen_netbk_add_xenvif(struct xenvif *vif) { int i; @@ -147,7 +190,8 @@ void xen_netbk_remove_xenvif(struct xenvif *vif) atomic_dec(&netbk->netfront_count); } -static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx); +static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx, + u8 status); static void make_tx_response(struct xenvif *vif, struct xen_netif_tx_request *txp, s8 st); @@ -249,6 +293,7 @@ static int max_required_rx_slots(struct xenvif *vif) { int max = DIV_ROUND_UP(vif->dev->mtu, PAGE_SIZE); + /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */ if (vif->can_sg || vif->gso || vif->gso_prefix) max += MAX_SKB_FRAGS + 1; /* extra_info + frags */ @@ -624,7 +669,7 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk) { struct xenvif *vif = NULL, *tmp; s8 status; - u16 irq, flags; + u16 flags; struct xen_netif_rx_response *resp; struct sk_buff_head rxq; struct sk_buff *skb; @@ -656,6 +701,7 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk) __skb_queue_tail(&rxq, skb); /* Filled the batch queue? */ + /* XXX FIXME: RX path dependent on MAX_SKB_FRAGS */ if (count + MAX_SKB_FRAGS >= XEN_NETIF_RX_RING_SIZE) break; } @@ -732,20 +778,21 @@ static void xen_netbk_rx_action(struct xen_netbk *netbk) sco->meta_slots_used); RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->rx, ret); - irq = vif->irq; - if (ret && list_empty(&vif->notify_list)) - list_add_tail(&vif->notify_list, ¬ify); xenvif_notify_tx_completion(vif); - xenvif_put(vif); + if (ret && list_empty(&vif->notify_list)) + list_add_tail(&vif->notify_list, ¬ify); + else + xenvif_put(vif); npo.meta_cons += sco->meta_slots_used; dev_kfree_skb(skb); } list_for_each_entry_safe(vif, tmp, ¬ify, notify_list) { - notify_remote_via_irq(vif->irq); + notify_remote_via_irq(vif->rx_irq); list_del_init(&vif->notify_list); + xenvif_put(vif); } /* More work to do? */ @@ -879,7 +926,7 @@ static void netbk_tx_err(struct xenvif *vif, do { make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR); - if (cons >= end) + if (cons == end) break; txp = RING_GET_REQUEST(&vif->tx, cons++); } while (1); @@ -888,49 +935,112 @@ static void netbk_tx_err(struct xenvif *vif, xenvif_put(vif); } +static void netbk_fatal_tx_err(struct xenvif *vif) +{ + netdev_err(vif->dev, "fatal error; disabling device\n"); + xenvif_carrier_off(vif); + xenvif_put(vif); +} + static int netbk_count_requests(struct xenvif *vif, struct xen_netif_tx_request *first, struct xen_netif_tx_request *txp, int work_to_do) { RING_IDX cons = vif->tx.req_cons; - int frags = 0; + int slots = 0; + int drop_err = 0; + int more_data; if (!(first->flags & XEN_NETTXF_more_data)) return 0; do { - if (frags >= work_to_do) { - netdev_dbg(vif->dev, "Need more frags\n"); - return -frags; + struct xen_netif_tx_request dropped_tx = { 0 }; + + if (slots >= work_to_do) { + netdev_err(vif->dev, + "Asked for %d slots but exceeds this limit\n", + work_to_do); + netbk_fatal_tx_err(vif); + return -ENODATA; } - if (unlikely(frags >= MAX_SKB_FRAGS)) { - netdev_dbg(vif->dev, "Too many frags\n"); - return -frags; + /* This guest is really using too many slots and + * considered malicious. + */ + if (unlikely(slots >= fatal_skb_slots)) { + netdev_err(vif->dev, + "Malicious frontend using %d slots, threshold %u\n", + slots, fatal_skb_slots); + netbk_fatal_tx_err(vif); + return -E2BIG; + } + + /* Xen network protocol had implicit dependency on + * MAX_SKB_FRAGS. XEN_NETBK_LEGACY_SLOTS_MAX is set to + * the historical MAX_SKB_FRAGS value 18 to honor the + * same behavior as before. Any packet using more than + * 18 slots but less than fatal_skb_slots slots is + * dropped + */ + if (!drop_err && slots >= XEN_NETBK_LEGACY_SLOTS_MAX) { + if (net_ratelimit()) + netdev_dbg(vif->dev, + "Too many slots (%d) exceeding limit (%d), dropping packet\n", + slots, XEN_NETBK_LEGACY_SLOTS_MAX); + drop_err = -E2BIG; } - memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + frags), + if (drop_err) + txp = &dropped_tx; + + memcpy(txp, RING_GET_REQUEST(&vif->tx, cons + slots), sizeof(*txp)); - if (txp->size > first->size) { - netdev_dbg(vif->dev, "Frags galore\n"); - return -frags; + + /* If the guest submitted a frame >= 64 KiB then + * first->size overflowed and following slots will + * appear to be larger than the frame. + * + * This cannot be fatal error as there are buggy + * frontends that do this. + * + * Consume all slots and drop the packet. + */ + if (!drop_err && txp->size > first->size) { + if (net_ratelimit()) + netdev_dbg(vif->dev, + "Invalid tx request, slot size %u > remaining size %u\n", + txp->size, first->size); + drop_err = -EIO; } first->size -= txp->size; - frags++; + slots++; if (unlikely((txp->offset + txp->size) > PAGE_SIZE)) { - netdev_dbg(vif->dev, "txp->offset: %x, size: %u\n", + netdev_err(vif->dev, "Cross page boundary, txp->offset: %x, size: %u\n", txp->offset, txp->size); - return -frags; + netbk_fatal_tx_err(vif); + return -EINVAL; } - } while ((txp++)->flags & XEN_NETTXF_more_data); - return frags; + + more_data = txp->flags & XEN_NETTXF_more_data; + + if (!drop_err) + txp++; + + } while (more_data); + + if (drop_err) { + netbk_tx_err(vif, first, cons + slots); + return drop_err; + } + + return slots; } static struct page *xen_netbk_alloc_page(struct xen_netbk *netbk, - struct sk_buff *skb, u16 pending_idx) { struct page *page; @@ -951,43 +1061,120 @@ static struct gnttab_copy *xen_netbk_get_requests(struct xen_netbk *netbk, struct skb_shared_info *shinfo = skb_shinfo(skb); skb_frag_t *frags = shinfo->frags; u16 pending_idx = *((u16 *)skb->data); - int i, start; + u16 head_idx = 0; + int slot, start; + struct page *page; + pending_ring_idx_t index, start_idx = 0; + uint16_t dst_offset; + unsigned int nr_slots; + struct pending_tx_info *first = NULL; + + /* At this point shinfo->nr_frags is in fact the number of + * slots, which can be as large as XEN_NETBK_LEGACY_SLOTS_MAX. + */ + nr_slots = shinfo->nr_frags; /* Skip first skb fragment if it is on same page as header fragment. */ start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); - for (i = start; i < shinfo->nr_frags; i++, txp++) { - struct page *page; - pending_ring_idx_t index; + /* Coalesce tx requests, at this point the packet passed in + * should be <= 64K. Any packets larger than 64K have been + * handled in netbk_count_requests(). + */ + for (shinfo->nr_frags = slot = start; slot < nr_slots; + shinfo->nr_frags++) { struct pending_tx_info *pending_tx_info = netbk->pending_tx_info; - index = pending_index(netbk->pending_cons++); - pending_idx = netbk->pending_ring[index]; - page = xen_netbk_alloc_page(netbk, skb, pending_idx); + page = alloc_page(GFP_KERNEL|__GFP_COLD); if (!page) - return NULL; - - gop->source.u.ref = txp->gref; - gop->source.domid = vif->domid; - gop->source.offset = txp->offset; + goto err; + + dst_offset = 0; + first = NULL; + while (dst_offset < PAGE_SIZE && slot < nr_slots) { + gop->flags = GNTCOPY_source_gref; + + gop->source.u.ref = txp->gref; + gop->source.domid = vif->domid; + gop->source.offset = txp->offset; + + gop->dest.domid = DOMID_SELF; + + gop->dest.offset = dst_offset; + gop->dest.u.gmfn = virt_to_mfn(page_address(page)); + + if (dst_offset + txp->size > PAGE_SIZE) { + /* This page can only merge a portion + * of tx request. Do not increment any + * pointer / counter here. The txp + * will be dealt with in future + * rounds, eventually hitting the + * `else` branch. + */ + gop->len = PAGE_SIZE - dst_offset; + txp->offset += gop->len; + txp->size -= gop->len; + dst_offset += gop->len; /* quit loop */ + } else { + /* This tx request can be merged in the page */ + gop->len = txp->size; + dst_offset += gop->len; + + index = pending_index(netbk->pending_cons++); + + pending_idx = netbk->pending_ring[index]; + + memcpy(&pending_tx_info[pending_idx].req, txp, + sizeof(*txp)); + xenvif_get(vif); + + pending_tx_info[pending_idx].vif = vif; + + /* Poison these fields, corresponding + * fields for head tx req will be set + * to correct values after the loop. + */ + netbk->mmap_pages[pending_idx] = (void *)(~0UL); + pending_tx_info[pending_idx].head = + INVALID_PENDING_RING_IDX; + + if (!first) { + first = &pending_tx_info[pending_idx]; + start_idx = index; + head_idx = pending_idx; + } + + txp++; + slot++; + } - gop->dest.u.gmfn = virt_to_mfn(page_address(page)); - gop->dest.domid = DOMID_SELF; - gop->dest.offset = txp->offset; + gop++; + } - gop->len = txp->size; - gop->flags = GNTCOPY_source_gref; + first->req.offset = 0; + first->req.size = dst_offset; + first->head = start_idx; + set_page_ext(page, netbk, head_idx); + netbk->mmap_pages[head_idx] = page; + frag_set_pending_idx(&frags[shinfo->nr_frags], head_idx); + } - gop++; + BUG_ON(shinfo->nr_frags > MAX_SKB_FRAGS); - memcpy(&pending_tx_info[pending_idx].req, txp, sizeof(*txp)); - xenvif_get(vif); - pending_tx_info[pending_idx].vif = vif; - frag_set_pending_idx(&frags[i], pending_idx); + return gop; +err: + /* Unwind, freeing all pages and sending error responses. */ + while (shinfo->nr_frags-- > start) { + xen_netbk_idx_release(netbk, + frag_get_pending_idx(&frags[shinfo->nr_frags]), + XEN_NETIF_RSP_ERROR); } + /* The head too, if necessary. */ + if (start) + xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_ERROR); - return gop; + return NULL; } static int xen_netbk_tx_check_gop(struct xen_netbk *netbk, @@ -996,48 +1183,45 @@ static int xen_netbk_tx_check_gop(struct xen_netbk *netbk, { struct gnttab_copy *gop = *gopp; u16 pending_idx = *((u16 *)skb->data); - struct pending_tx_info *pending_tx_info = netbk->pending_tx_info; - struct xenvif *vif = pending_tx_info[pending_idx].vif; - struct xen_netif_tx_request *txp; struct skb_shared_info *shinfo = skb_shinfo(skb); + struct pending_tx_info *tx_info; int nr_frags = shinfo->nr_frags; int i, err, start; + u16 peek; /* peek into next tx request */ /* Check status of header. */ err = gop->status; - if (unlikely(err)) { - pending_ring_idx_t index; - index = pending_index(netbk->pending_prod++); - txp = &pending_tx_info[pending_idx].req; - make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR); - netbk->pending_ring[index] = pending_idx; - xenvif_put(vif); - } + if (unlikely(err)) + xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_ERROR); /* Skip first skb fragment if it is on same page as header fragment. */ start = (frag_get_pending_idx(&shinfo->frags[0]) == pending_idx); for (i = start; i < nr_frags; i++) { int j, newerr; - pending_ring_idx_t index; + pending_ring_idx_t head; pending_idx = frag_get_pending_idx(&shinfo->frags[i]); + tx_info = &netbk->pending_tx_info[pending_idx]; + head = tx_info->head; /* Check error status: if okay then remember grant handle. */ - newerr = (++gop)->status; + do { + newerr = (++gop)->status; + if (newerr) + break; + peek = netbk->pending_ring[pending_index(++head)]; + } while (!pending_tx_is_head(netbk, peek)); + if (likely(!newerr)) { /* Had a previous error? Invalidate this fragment. */ if (unlikely(err)) - xen_netbk_idx_release(netbk, pending_idx); + xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY); continue; } /* Error on this fragment: respond to client with an error. */ - txp = &netbk->pending_tx_info[pending_idx].req; - make_tx_response(vif, txp, XEN_NETIF_RSP_ERROR); - index = pending_index(netbk->pending_prod++); - netbk->pending_ring[index] = pending_idx; - xenvif_put(vif); + xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_ERROR); /* Not the first error? Preceding frags already invalidated. */ if (err) @@ -1045,10 +1229,10 @@ static int xen_netbk_tx_check_gop(struct xen_netbk *netbk, /* First error: invalidate header and preceding fragments. */ pending_idx = *((u16 *)skb->data); - xen_netbk_idx_release(netbk, pending_idx); + xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY); for (j = start; j < i; j++) { pending_idx = frag_get_pending_idx(&shinfo->frags[j]); - xen_netbk_idx_release(netbk, pending_idx); + xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY); } /* Remember the error: invalidate all subsequent fragments. */ @@ -1082,7 +1266,7 @@ static void xen_netbk_fill_frags(struct xen_netbk *netbk, struct sk_buff *skb) /* Take an extra reference to offset xen_netbk_idx_release */ get_page(netbk->mmap_pages[pending_idx]); - xen_netbk_idx_release(netbk, pending_idx); + xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY); } } @@ -1095,7 +1279,8 @@ static int xen_netbk_get_extras(struct xenvif *vif, do { if (unlikely(work_to_do-- <= 0)) { - netdev_dbg(vif->dev, "Missing extra info\n"); + netdev_err(vif->dev, "Missing extra info\n"); + netbk_fatal_tx_err(vif); return -EBADR; } @@ -1104,8 +1289,9 @@ static int xen_netbk_get_extras(struct xenvif *vif, if (unlikely(!extra.type || extra.type >= XEN_NETIF_EXTRA_TYPE_MAX)) { vif->tx.req_cons = ++cons; - netdev_dbg(vif->dev, + netdev_err(vif->dev, "Invalid extra type: %d\n", extra.type); + netbk_fatal_tx_err(vif); return -EINVAL; } @@ -1121,13 +1307,15 @@ static int netbk_set_skb_gso(struct xenvif *vif, struct xen_netif_extra_info *gso) { if (!gso->u.gso.size) { - netdev_dbg(vif->dev, "GSO size must not be zero.\n"); + netdev_err(vif->dev, "GSO size must not be zero.\n"); + netbk_fatal_tx_err(vif); return -EINVAL; } /* Currently only TCPv4 S.O. is supported. */ if (gso->u.gso.type != XEN_NETIF_GSO_TYPE_TCPV4) { - netdev_dbg(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type); + netdev_err(vif->dev, "Bad GSO type %d.\n", gso->u.gso.type); + netbk_fatal_tx_err(vif); return -EINVAL; } @@ -1144,7 +1332,6 @@ static int netbk_set_skb_gso(struct xenvif *vif, static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) { struct iphdr *iph; - unsigned char *th; int err = -EPROTO; int recalculate_partial_csum = 0; @@ -1168,27 +1355,26 @@ static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) goto out; iph = (void *)skb->data; - th = skb->data + 4 * iph->ihl; - if (th >= skb_tail_pointer(skb)) - goto out; - - skb->csum_start = th - skb->head; switch (iph->protocol) { case IPPROTO_TCP: - skb->csum_offset = offsetof(struct tcphdr, check); + if (!skb_partial_csum_set(skb, 4 * iph->ihl, + offsetof(struct tcphdr, check))) + goto out; if (recalculate_partial_csum) { - struct tcphdr *tcph = (struct tcphdr *)th; + struct tcphdr *tcph = tcp_hdr(skb); tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len - iph->ihl*4, IPPROTO_TCP, 0); } break; case IPPROTO_UDP: - skb->csum_offset = offsetof(struct udphdr, check); + if (!skb_partial_csum_set(skb, 4 * iph->ihl, + offsetof(struct udphdr, check))) + goto out; if (recalculate_partial_csum) { - struct udphdr *udph = (struct udphdr *)th; + struct udphdr *udph = udp_hdr(skb); udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len - iph->ihl*4, IPPROTO_UDP, 0); @@ -1202,9 +1388,6 @@ static int checksum_setup(struct xenvif *vif, struct sk_buff *skb) goto out; } - if ((th + skb->csum_offset + 2) > skb_tail_pointer(skb)) - goto out; - err = 0; out: @@ -1249,11 +1432,12 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) struct sk_buff *skb; int ret; - while (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && + while ((nr_pending_reqs(netbk) + XEN_NETBK_LEGACY_SLOTS_MAX + < MAX_PENDING_REQS) && !list_empty(&netbk->net_schedule_list)) { struct xenvif *vif; struct xen_netif_tx_request txreq; - struct xen_netif_tx_request txfrags[MAX_SKB_FRAGS]; + struct xen_netif_tx_request txfrags[XEN_NETBK_LEGACY_SLOTS_MAX]; struct page *page; struct xen_netif_extra_info extras[XEN_NETIF_EXTRA_TYPE_MAX-1]; u16 pending_idx; @@ -1264,9 +1448,25 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) /* Get a netif from the list with work to do. */ vif = poll_net_schedule_list(netbk); + /* This can sometimes happen because the test of + * list_empty(net_schedule_list) at the top of the + * loop is unlocked. Just go back and have another + * look. + */ if (!vif) continue; + if (vif->tx.sring->req_prod - vif->tx.req_cons > + XEN_NETIF_TX_RING_SIZE) { + netdev_err(vif->dev, + "Impossible number of requests. " + "req_prod %d, req_cons %d, size %ld\n", + vif->tx.sring->req_prod, vif->tx.req_cons, + XEN_NETIF_TX_RING_SIZE); + netbk_fatal_tx_err(vif); + continue; + } + RING_FINAL_CHECK_FOR_REQUESTS(&vif->tx, work_to_do); if (!work_to_do) { xenvif_put(vif); @@ -1294,17 +1494,14 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) work_to_do = xen_netbk_get_extras(vif, extras, work_to_do); idx = vif->tx.req_cons; - if (unlikely(work_to_do < 0)) { - netbk_tx_err(vif, &txreq, idx); + if (unlikely(work_to_do < 0)) continue; - } } ret = netbk_count_requests(vif, &txreq, txfrags, work_to_do); - if (unlikely(ret < 0)) { - netbk_tx_err(vif, &txreq, idx - ret); + if (unlikely(ret < 0)) continue; - } + idx += ret; if (unlikely(txreq.size < ETH_HLEN)) { @@ -1316,11 +1513,11 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) /* No crossing a page as the payload mustn't fragment. */ if (unlikely((txreq.offset + txreq.size) > PAGE_SIZE)) { - netdev_dbg(vif->dev, + netdev_err(vif->dev, "txreq.offset: %x, size: %u, end: %lu\n", txreq.offset, txreq.size, (txreq.offset&~PAGE_MASK) + txreq.size); - netbk_tx_err(vif, &txreq, idx); + netbk_fatal_tx_err(vif); continue; } @@ -1328,7 +1525,7 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) pending_idx = netbk->pending_ring[index]; data_len = (txreq.size > PKT_PROT_LEN && - ret < MAX_SKB_FRAGS) ? + ret < XEN_NETBK_LEGACY_SLOTS_MAX) ? PKT_PROT_LEN : txreq.size; skb = alloc_skb(data_len + NET_SKB_PAD + NET_IP_ALIGN, @@ -1348,14 +1545,14 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) gso = &extras[XEN_NETIF_EXTRA_TYPE_GSO - 1]; if (netbk_set_skb_gso(vif, skb, gso)) { + /* Failure in netbk_set_skb_gso is fatal. */ kfree_skb(skb); - netbk_tx_err(vif, &txreq, idx); continue; } } /* XXX could copy straight to head */ - page = xen_netbk_alloc_page(netbk, skb, pending_idx); + page = xen_netbk_alloc_page(netbk, pending_idx); if (!page) { kfree_skb(skb); netbk_tx_err(vif, &txreq, idx); @@ -1378,6 +1575,7 @@ static unsigned xen_netbk_tx_build_gops(struct xen_netbk *netbk) memcpy(&netbk->pending_tx_info[pending_idx].req, &txreq, sizeof(txreq)); netbk->pending_tx_info[pending_idx].vif = vif; + netbk->pending_tx_info[pending_idx].head = index; *((u16 *)skb->data) = pending_idx; __skb_put(skb, data_len); @@ -1448,7 +1646,7 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk) txp->size -= data_len; } else { /* Schedule a response immediately. */ - xen_netbk_idx_release(netbk, pending_idx); + xen_netbk_idx_release(netbk, pending_idx, XEN_NETIF_RSP_OKAY); } if (txp->flags & XEN_NETTXF_csum_blank) @@ -1470,6 +1668,7 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk) skb->dev = vif->dev; skb->protocol = eth_type_trans(skb, skb->dev); + skb_reset_network_header(skb); if (checksum_setup(vif, skb)) { netdev_dbg(vif->dev, @@ -1478,6 +1677,8 @@ static void xen_netbk_tx_submit(struct xen_netbk *netbk) continue; } + skb_probe_transport_header(skb, 0); + vif->dev->stats.rx_bytes += skb->len; vif->dev->stats.rx_packets++; @@ -1500,11 +1701,15 @@ static void xen_netbk_tx_action(struct xen_netbk *netbk) xen_netbk_tx_submit(netbk); } -static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx) +static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx, + u8 status) { struct xenvif *vif; struct pending_tx_info *pending_tx_info; - pending_ring_idx_t index; + pending_ring_idx_t head; + u16 peek; /* peek into next tx request */ + + BUG_ON(netbk->mmap_pages[pending_idx] == (void *)(~0UL)); /* Already complete? */ if (netbk->mmap_pages[pending_idx] == NULL) @@ -1513,19 +1718,40 @@ static void xen_netbk_idx_release(struct xen_netbk *netbk, u16 pending_idx) pending_tx_info = &netbk->pending_tx_info[pending_idx]; vif = pending_tx_info->vif; + head = pending_tx_info->head; - make_tx_response(vif, &pending_tx_info->req, XEN_NETIF_RSP_OKAY); + BUG_ON(!pending_tx_is_head(netbk, head)); + BUG_ON(netbk->pending_ring[pending_index(head)] != pending_idx); - index = pending_index(netbk->pending_prod++); - netbk->pending_ring[index] = pending_idx; + do { + pending_ring_idx_t index; + pending_ring_idx_t idx = pending_index(head); + u16 info_idx = netbk->pending_ring[idx]; - xenvif_put(vif); + pending_tx_info = &netbk->pending_tx_info[info_idx]; + make_tx_response(vif, &pending_tx_info->req, status); + + /* Setting any number other than + * INVALID_PENDING_RING_IDX indicates this slot is + * starting a new packet / ending a previous packet. + */ + pending_tx_info->head = 0; + + index = pending_index(netbk->pending_prod++); + netbk->pending_ring[index] = netbk->pending_ring[info_idx]; + + xenvif_put(vif); + + peek = netbk->pending_ring[pending_index(++head)]; + + } while (!pending_tx_is_head(netbk, peek)); netbk->mmap_pages[pending_idx]->mapping = 0; put_page(netbk->mmap_pages[pending_idx]); netbk->mmap_pages[pending_idx] = NULL; } + static void make_tx_response(struct xenvif *vif, struct xen_netif_tx_request *txp, s8 st) @@ -1544,7 +1770,7 @@ static void make_tx_response(struct xenvif *vif, vif->tx.rsp_prod_pvt = ++i; RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&vif->tx, notify); if (notify) - notify_remote_via_irq(vif->irq); + notify_remote_via_irq(vif->tx_irq); } static struct xen_netif_rx_response *make_rx_response(struct xenvif *vif, @@ -1578,8 +1804,9 @@ static inline int rx_work_todo(struct xen_netbk *netbk) static inline int tx_work_todo(struct xen_netbk *netbk) { - if (((nr_pending_reqs(netbk) + MAX_SKB_FRAGS) < MAX_PENDING_REQS) && - !list_empty(&netbk->net_schedule_list)) + if ((nr_pending_reqs(netbk) + XEN_NETBK_LEGACY_SLOTS_MAX + < MAX_PENDING_REQS) && + !list_empty(&netbk->net_schedule_list)) return 1; return 0; @@ -1662,6 +1889,12 @@ static int __init netback_init(void) if (!xen_domain()) return -ENODEV; + if (fatal_skb_slots < XEN_NETBK_LEGACY_SLOTS_MAX) { + pr_info("fatal_skb_slots too small (%d), bump it to XEN_NETBK_LEGACY_SLOTS_MAX (%d)\n", + fatal_skb_slots, XEN_NETBK_LEGACY_SLOTS_MAX); + fatal_skb_slots = XEN_NETBK_LEGACY_SLOTS_MAX; + } + xen_netbk_group_nr = num_online_cpus(); xen_netbk = vzalloc(sizeof(struct xen_netbk) * xen_netbk_group_nr); if (!xen_netbk) @@ -1687,7 +1920,7 @@ static int __init netback_init(void) "netback/%u", group); if (IS_ERR(netbk->task)) { - printk(KERN_ALERT "kthread_create() fails at netback\n"); + pr_alert("kthread_create() fails at netback\n"); del_timer(&netbk->net_timer); rc = PTR_ERR(netbk->task); goto failed_init; @@ -1713,10 +1946,6 @@ static int __init netback_init(void) failed_init: while (--group >= 0) { struct xen_netbk *netbk = &xen_netbk[group]; - for (i = 0; i < MAX_PENDING_REQS; i++) { - if (netbk->mmap_pages[i]) - __free_page(netbk->mmap_pages[i]); - } del_timer(&netbk->net_timer); kthread_stop(netbk->task); } @@ -1727,5 +1956,25 @@ failed_init: module_init(netback_init); +static void __exit netback_fini(void) +{ + int i, j; + + xenvif_xenbus_fini(); + + for (i = 0; i < xen_netbk_group_nr; i++) { + struct xen_netbk *netbk = &xen_netbk[i]; + del_timer_sync(&netbk->net_timer); + kthread_stop(netbk->task); + for (j = 0; j < MAX_PENDING_REQS; j++) { + if (netbk->mmap_pages[j]) + __free_page(netbk->mmap_pages[j]); + } + } + + vfree(xen_netbk); +} +module_exit(netback_fini); + MODULE_LICENSE("Dual BSD/GPL"); MODULE_ALIAS("xen-backend:vif"); |