diff options
author | David S. Miller <[email protected]> | 2022-03-06 11:04:01 +0000 |
---|---|---|
committer | David S. Miller <[email protected]> | 2022-03-06 11:04:01 +0000 |
commit | 4c22aac3f83ea3c4c90fa964a88e197f16b8cae0 (patch) | |
tree | 55ae97b17bc598e205cf4908021040685f8a4a7b | |
parent | 925a24213b5cc80fcef8858e6dc1f97ea2b17afb (diff) | |
parent | 4b4f052e2d89c2eb7e13ee28ba9e85f8097aef3d (diff) |
Merge branch 'tuntap-kfree_skb_reason'
Dongli Zhang says:
====================
tun/tap: use kfree_skb_reason() to trace dropped skb
The commit c504e5c2f964 ("net: skb: introduce kfree_skb_reason()") has
introduced the kfree_skb_reason() to help track the reason.
The tun and tap are commonly used as virtio-net/vhost-net backend. This is to
use kfree_skb_reason() to trace the dropped skb for those two drivers.
Changed since v1:
- I have renamed many of the reasons since v1. I make them as generic as
possible so that they can be re-used by core networking and drivers.
Changed since v2:
- declare drop_reason as type "enum skb_drop_reason"
- handle the drop in skb_list_walk_safe() case for tap driver, and
kfree_skb_list_reason() is introduced
Changed since v3 (only for PATCH 4/4):
- rename to TAP_FILTER and TAP_TXFILTER
- honor reverse xmas tree style declaration for 'drop_reason' in
tun_net_xmit()
Changed since v4:
- make kfree_skb_list() static inline
- add 'computation' to SKB_CSUM comment
- change COPY_DATA to UCOPY_FAULT
- add 'metadata' to DEV_HDR comment
- expand comment on DEV_READY
- change SKB_TRIM to NOMEM
- chnage SKB_PULL to HDR_TRUNC
Changed since v5:
- rebase to net-next
The following reasons are introduced.
- SKB_DROP_REASON_SKB_CSUM
- SKB_DROP_REASON_SKB_GSO_SEG
- SKB_DROP_REASON_SKB_UCOPY_FAULT
- SKB_DROP_REASON_DEV_HDR
- SKB_DROP_REASON_FULL_RING
- SKB_DROP_REASON_DEV_READY
- SKB_DROP_REASON_NOMEM
- SKB_DROP_REASON_HDR_TRUNC
- SKB_DROP_REASON_TAP_FILTER
- SKB_DROP_REASON_TAP_TXFILTER
This is the output for TUN device.
<idle>-0 [029] ..s1. 450.727651: kfree_skb: skbaddr=0000000023d235cc protocol=0 location=00000000a6748854 reason: NOT_SPECIFIED
<idle>-0 [000] b.s3. 451.165671: kfree_skb: skbaddr=000000006b5de7cc protocol=4 location=000000007c2b9eae reason: FULL_RING
<idle>-0 [000] b.s3. 453.149650: kfree_skb: skbaddr=000000006b5de7cc protocol=4 location=000000007c2b9eae reason: FULL_RING
<idle>-0 [000] b.s3. 455.133576: kfree_skb: skbaddr=000000006b5de7cc protocol=4 location=000000007c2b9eae reason: FULL_RING
<idle>-0 [000] b.s3. 457.117566: kfree_skb: skbaddr=000000006b5de7cc protocol=4 location=000000007c2b9eae reason: FULL_RING
This is the output for TAP device.
arping-7053 [006] ..s1. 1000.047753: kfree_skb: skbaddr=000000008618a587 protocol=2054 location=00000000743ad4a7 reason: FULL_RING
<idle>-0 [022] ..s1. 1000.778514: kfree_skb: skbaddr=000000002c1e706c protocol=0 location=00000000a6748854 reason: NOT_SPECIFIED
arping-7053 [006] ..s1. 1001.047830: kfree_skb: skbaddr=000000008618a587 protocol=2054 location=00000000743ad4a7 reason: FULL_RING
arping-7053 [006] ..s1. 1002.047918: kfree_skb: skbaddr=000000008618a587 protocol=2054 location=00000000743ad4a7 reason: FULL_RING
arping-7053 [006] ..s1. 1003.048017: kfree_skb: skbaddr=000000008618a587 protocol=2054 location=00000000743ad4a7 reason: FULL_RING
====================
Signed-off-by: David S. Miller <[email protected]>
-rw-r--r-- | drivers/net/tap.c | 35 | ||||
-rw-r--r-- | drivers/net/tun.c | 38 | ||||
-rw-r--r-- | include/linux/skbuff.h | 31 | ||||
-rw-r--r-- | include/trace/events/skb.h | 10 |
4 files changed, 96 insertions, 18 deletions
diff --git a/drivers/net/tap.c b/drivers/net/tap.c index ba2ef5437e16..c3d42062559d 100644 --- a/drivers/net/tap.c +++ b/drivers/net/tap.c @@ -322,6 +322,7 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) struct tap_dev *tap; struct tap_queue *q; netdev_features_t features = TAP_FEATURES; + enum skb_drop_reason drop_reason; tap = tap_dev_get_rcu(dev); if (!tap) @@ -343,12 +344,16 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) struct sk_buff *segs = __skb_gso_segment(skb, features, false); struct sk_buff *next; - if (IS_ERR(segs)) + if (IS_ERR(segs)) { + drop_reason = SKB_DROP_REASON_SKB_GSO_SEG; goto drop; + } if (!segs) { - if (ptr_ring_produce(&q->ring, skb)) + if (ptr_ring_produce(&q->ring, skb)) { + drop_reason = SKB_DROP_REASON_FULL_RING; goto drop; + } goto wake_up; } @@ -356,8 +361,9 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) skb_list_walk_safe(segs, skb, next) { skb_mark_not_on_list(skb); if (ptr_ring_produce(&q->ring, skb)) { - kfree_skb(skb); - kfree_skb_list(next); + drop_reason = SKB_DROP_REASON_FULL_RING; + kfree_skb_reason(skb, drop_reason); + kfree_skb_list_reason(next, drop_reason); break; } } @@ -369,10 +375,14 @@ rx_handler_result_t tap_handle_frame(struct sk_buff **pskb) */ if (skb->ip_summed == CHECKSUM_PARTIAL && !(features & NETIF_F_CSUM_MASK) && - skb_checksum_help(skb)) + skb_checksum_help(skb)) { + drop_reason = SKB_DROP_REASON_SKB_CSUM; goto drop; - if (ptr_ring_produce(&q->ring, skb)) + } + if (ptr_ring_produce(&q->ring, skb)) { + drop_reason = SKB_DROP_REASON_FULL_RING; goto drop; + } } wake_up: @@ -383,7 +393,7 @@ drop: /* Count errors/drops only here, thus don't care about args. */ if (tap->count_rx_dropped) tap->count_rx_dropped(tap); - kfree_skb(skb); + kfree_skb_reason(skb, drop_reason); return RX_HANDLER_CONSUMED; } EXPORT_SYMBOL_GPL(tap_handle_frame); @@ -632,6 +642,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, int depth; bool zerocopy = false; size_t linear; + enum skb_drop_reason drop_reason; if (q->flags & IFF_VNET_HDR) { vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz); @@ -696,8 +707,10 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, else err = skb_copy_datagram_from_iter(skb, 0, from, len); - if (err) + if (err) { + drop_reason = SKB_DROP_REASON_SKB_UCOPY_FAULT; goto err_kfree; + } skb_set_network_header(skb, ETH_HLEN); skb_reset_mac_header(skb); @@ -706,8 +719,10 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, if (vnet_hdr_len) { err = virtio_net_hdr_to_skb(skb, &vnet_hdr, tap_is_little_endian(q)); - if (err) + if (err) { + drop_reason = SKB_DROP_REASON_DEV_HDR; goto err_kfree; + } } skb_probe_transport_header(skb); @@ -738,7 +753,7 @@ static ssize_t tap_get_user(struct tap_queue *q, void *msg_control, return total_len; err_kfree: - kfree_skb(skb); + kfree_skb_reason(skb, drop_reason); err: rcu_read_lock(); diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 2a0d8a5d7aec..bab92e489fba 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -1058,6 +1058,7 @@ static unsigned int run_ebpf_filter(struct tun_struct *tun, static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); + enum skb_drop_reason drop_reason; int txq = skb->queue_mapping; struct netdev_queue *queue; struct tun_file *tfile; @@ -1067,8 +1068,10 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) tfile = rcu_dereference(tun->tfiles[txq]); /* Drop packet if interface is not attached */ - if (!tfile) + if (!tfile) { + drop_reason = SKB_DROP_REASON_DEV_READY; goto drop; + } if (!rcu_dereference(tun->steering_prog)) tun_automq_xmit(tun, skb); @@ -1078,19 +1081,32 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) /* Drop if the filter does not like it. * This is a noop if the filter is disabled. * Filter can be enabled only for the TAP devices. */ - if (!check_filter(&tun->txflt, skb)) + if (!check_filter(&tun->txflt, skb)) { + drop_reason = SKB_DROP_REASON_TAP_TXFILTER; goto drop; + } if (tfile->socket.sk->sk_filter && - sk_filter(tfile->socket.sk, skb)) + sk_filter(tfile->socket.sk, skb)) { + drop_reason = SKB_DROP_REASON_SOCKET_FILTER; goto drop; + } len = run_ebpf_filter(tun, skb, len); - if (len == 0 || pskb_trim(skb, len)) + if (len == 0) { + drop_reason = SKB_DROP_REASON_TAP_FILTER; goto drop; + } - if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) + if (pskb_trim(skb, len)) { + drop_reason = SKB_DROP_REASON_NOMEM; goto drop; + } + + if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) { + drop_reason = SKB_DROP_REASON_SKB_UCOPY_FAULT; + goto drop; + } skb_tx_timestamp(skb); @@ -1101,8 +1117,10 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) nf_reset_ct(skb); - if (ptr_ring_produce(&tfile->tx_ring, skb)) + if (ptr_ring_produce(&tfile->tx_ring, skb)) { + drop_reason = SKB_DROP_REASON_FULL_RING; goto drop; + } /* NETIF_F_LLTX requires to do our own update of trans_start */ queue = netdev_get_tx_queue(dev, txq); @@ -1119,7 +1137,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) drop: atomic_long_inc(&dev->tx_dropped); skb_tx_error(skb); - kfree_skb(skb); + kfree_skb_reason(skb, drop_reason); rcu_read_unlock(); return NET_XMIT_DROP; } @@ -1717,6 +1735,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, u32 rxhash = 0; int skb_xdp = 1; bool frags = tun_napi_frags_enabled(tfile); + enum skb_drop_reason drop_reason; if (!(tun->flags & IFF_NO_PI)) { if (len < sizeof(pi)) @@ -1820,9 +1839,10 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile, if (err) { err = -EFAULT; + drop_reason = SKB_DROP_REASON_SKB_UCOPY_FAULT; drop: atomic_long_inc(&tun->dev->rx_dropped); - kfree_skb(skb); + kfree_skb_reason(skb, drop_reason); if (frags) { tfile->napi.skb = NULL; mutex_unlock(&tfile->napi_mutex); @@ -1869,6 +1889,7 @@ drop: case IFF_TAP: if (frags && !pskb_may_pull(skb, ETH_HLEN)) { err = -ENOMEM; + drop_reason = SKB_DROP_REASON_HDR_TRUNC; goto drop; } skb->protocol = eth_type_trans(skb, tun->dev); @@ -1922,6 +1943,7 @@ drop: if (unlikely(!(tun->dev->flags & IFF_UP))) { err = -EIO; rcu_read_unlock(); + drop_reason = SKB_DROP_REASON_DEV_READY; goto drop; } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2be263184d1e..34f572271c0c 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -412,6 +412,37 @@ enum skb_drop_reason { * this means that L3 protocol is * not supported */ + SKB_DROP_REASON_SKB_CSUM, /* sk_buff checksum computation + * error + */ + SKB_DROP_REASON_SKB_GSO_SEG, /* gso segmentation error */ + SKB_DROP_REASON_SKB_UCOPY_FAULT, /* failed to copy data from + * user space, e.g., via + * zerocopy_sg_from_iter() + * or skb_orphan_frags_rx() + */ + SKB_DROP_REASON_DEV_HDR, /* device driver specific + * header/metadata is invalid + */ + /* the device is not ready to xmit/recv due to any of its data + * structure that is not up/ready/initialized, e.g., the IFF_UP is + * not set, or driver specific tun->tfiles[txq] is not initialized + */ + SKB_DROP_REASON_DEV_READY, + SKB_DROP_REASON_FULL_RING, /* ring buffer is full */ + SKB_DROP_REASON_NOMEM, /* error due to OOM */ + SKB_DROP_REASON_HDR_TRUNC, /* failed to trunc/extract the header + * from networking data, e.g., failed + * to pull the protocol header from + * frags via pskb_may_pull() + */ + SKB_DROP_REASON_TAP_FILTER, /* dropped by (ebpf) filter directly + * attached to tun/tap, e.g., via + * TUNSETFILTEREBPF + */ + SKB_DROP_REASON_TAP_TXFILTER, /* dropped by tx filter implemented + * at tun/tap, e.g., check_filter() + */ SKB_DROP_REASON_MAX, }; diff --git a/include/trace/events/skb.h b/include/trace/events/skb.h index c0769d943f8e..e1670e1e4934 100644 --- a/include/trace/events/skb.h +++ b/include/trace/events/skb.h @@ -51,6 +51,16 @@ EM(SKB_DROP_REASON_XDP, XDP) \ EM(SKB_DROP_REASON_TC_INGRESS, TC_INGRESS) \ EM(SKB_DROP_REASON_PTYPE_ABSENT, PTYPE_ABSENT) \ + EM(SKB_DROP_REASON_SKB_CSUM, SKB_CSUM) \ + EM(SKB_DROP_REASON_SKB_GSO_SEG, SKB_GSO_SEG) \ + EM(SKB_DROP_REASON_SKB_UCOPY_FAULT, SKB_UCOPY_FAULT) \ + EM(SKB_DROP_REASON_DEV_HDR, DEV_HDR) \ + EM(SKB_DROP_REASON_DEV_READY, DEV_READY) \ + EM(SKB_DROP_REASON_FULL_RING, FULL_RING) \ + EM(SKB_DROP_REASON_NOMEM, NOMEM) \ + EM(SKB_DROP_REASON_HDR_TRUNC, HDR_TRUNC) \ + EM(SKB_DROP_REASON_TAP_FILTER, TAP_FILTER) \ + EM(SKB_DROP_REASON_TAP_TXFILTER, TAP_TXFILTER) \ EMe(SKB_DROP_REASON_MAX, MAX) #undef EM |