diff options
Diffstat (limited to 'net/core/skbuff.c')
-rw-r--r-- | net/core/skbuff.c | 148 |
1 files changed, 101 insertions, 47 deletions
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 1a31815104d6..2112146092bf 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -58,6 +58,7 @@ #include <linux/scatterlist.h> #include <linux/errqueue.h> #include <linux/prefetch.h> +#include <linux/bitfield.h> #include <linux/if_vlan.h> #include <linux/mpls.h> #include <linux/kcov.h> @@ -72,6 +73,7 @@ #include <net/mptcp.h> #include <net/mctp.h> #include <net/page_pool.h> +#include <net/dropreason.h> #include <linux/uaccess.h> #include <trace/events/skb.h> @@ -122,11 +124,59 @@ EXPORT_SYMBOL(sysctl_max_skb_frags); #undef FN #define FN(reason) [SKB_DROP_REASON_##reason] = #reason, -const char * const drop_reasons[] = { +static const char * const drop_reasons[] = { [SKB_CONSUMED] = "CONSUMED", DEFINE_DROP_REASON(FN, FN) }; -EXPORT_SYMBOL(drop_reasons); + +static const struct drop_reason_list drop_reasons_core = { + .reasons = drop_reasons, + .n_reasons = ARRAY_SIZE(drop_reasons), +}; + +const struct drop_reason_list __rcu * +drop_reasons_by_subsys[SKB_DROP_REASON_SUBSYS_NUM] = { + [SKB_DROP_REASON_SUBSYS_CORE] = RCU_INITIALIZER(&drop_reasons_core), +}; +EXPORT_SYMBOL(drop_reasons_by_subsys); + +/** + * drop_reasons_register_subsys - register another drop reason subsystem + * @subsys: the subsystem to register, must not be the core + * @list: the list of drop reasons within the subsystem, must point to + * a statically initialized list + */ +void drop_reasons_register_subsys(enum skb_drop_reason_subsys subsys, + const struct drop_reason_list *list) +{ + if (WARN(subsys <= SKB_DROP_REASON_SUBSYS_CORE || + subsys >= ARRAY_SIZE(drop_reasons_by_subsys), + "invalid subsystem %d\n", subsys)) + return; + + /* must point to statically allocated memory, so INIT is OK */ + RCU_INIT_POINTER(drop_reasons_by_subsys[subsys], list); +} +EXPORT_SYMBOL_GPL(drop_reasons_register_subsys); + +/** + * drop_reasons_unregister_subsys - unregister a drop reason subsystem + * @subsys: the subsystem to remove, must not be the core + * + * Note: This will synchronize_rcu() to ensure no users when it returns. + */ +void drop_reasons_unregister_subsys(enum skb_drop_reason_subsys subsys) +{ + if (WARN(subsys <= SKB_DROP_REASON_SUBSYS_CORE || + subsys >= ARRAY_SIZE(drop_reasons_by_subsys), + "invalid subsystem %d\n", subsys)) + return; + + RCU_INIT_POINTER(drop_reasons_by_subsys[subsys], NULL); + + synchronize_rcu(); +} +EXPORT_SYMBOL_GPL(drop_reasons_unregister_subsys); /** * skb_panic - private function for out-of-line support @@ -420,10 +470,9 @@ struct sk_buff *build_skb(void *data, unsigned int frag_size) { struct sk_buff *skb = __build_skb(data, frag_size); - if (skb && frag_size) { + if (likely(skb && frag_size)) { skb->head_frag = 1; - if (page_is_pfmemalloc(virt_to_head_page(data))) - skb->pfmemalloc = 1; + skb_propagate_pfmemalloc(virt_to_head_page(data), skb); } return skb; } @@ -445,8 +494,7 @@ struct sk_buff *build_skb_around(struct sk_buff *skb, if (frag_size) { skb->head_frag = 1; - if (page_is_pfmemalloc(virt_to_head_page(data))) - skb->pfmemalloc = 1; + skb_propagate_pfmemalloc(virt_to_head_page(data), skb); } return skb; } @@ -841,11 +889,11 @@ static void skb_clone_fraglist(struct sk_buff *skb) skb_get(list); } -static bool skb_pp_recycle(struct sk_buff *skb, void *data) +static bool skb_pp_recycle(struct sk_buff *skb, void *data, bool napi_safe) { if (!IS_ENABLED(CONFIG_PAGE_POOL) || !skb->pp_recycle) return false; - return page_pool_return_skb_page(virt_to_page(data)); + return page_pool_return_skb_page(virt_to_page(data), napi_safe); } static void skb_kfree_head(void *head, unsigned int end_offset) @@ -858,12 +906,12 @@ static void skb_kfree_head(void *head, unsigned int end_offset) kfree(head); } -static void skb_free_head(struct sk_buff *skb) +static void skb_free_head(struct sk_buff *skb, bool napi_safe) { unsigned char *head = skb->head; if (skb->head_frag) { - if (skb_pp_recycle(skb, head)) + if (skb_pp_recycle(skb, head, napi_safe)) return; skb_free_frag(head); } else { @@ -871,7 +919,8 @@ static void skb_free_head(struct sk_buff *skb) } } -static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason) +static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason, + bool napi_safe) { struct skb_shared_info *shinfo = skb_shinfo(skb); int i; @@ -890,13 +939,13 @@ static void skb_release_data(struct sk_buff *skb, enum skb_drop_reason reason) } for (i = 0; i < shinfo->nr_frags; i++) - __skb_frag_unref(&shinfo->frags[i], skb->pp_recycle); + napi_frag_unref(&shinfo->frags[i], skb->pp_recycle, napi_safe); free_head: if (shinfo->frag_list) kfree_skb_list_reason(shinfo->frag_list, reason); - skb_free_head(skb); + skb_free_head(skb, napi_safe); exit: /* When we clone an SKB we copy the reycling bit. The pp_recycle * bit is only set on the head though, so in order to avoid races @@ -957,11 +1006,12 @@ void skb_release_head_state(struct sk_buff *skb) } /* Free everything but the sk_buff shell. */ -static void skb_release_all(struct sk_buff *skb, enum skb_drop_reason reason) +static void skb_release_all(struct sk_buff *skb, enum skb_drop_reason reason, + bool napi_safe) { skb_release_head_state(skb); if (likely(skb->head)) - skb_release_data(skb, reason); + skb_release_data(skb, reason, napi_safe); } /** @@ -975,7 +1025,7 @@ static void skb_release_all(struct sk_buff *skb, enum skb_drop_reason reason) void __kfree_skb(struct sk_buff *skb) { - skb_release_all(skb, SKB_DROP_REASON_NOT_SPECIFIED); + skb_release_all(skb, SKB_DROP_REASON_NOT_SPECIFIED, false); kfree_skbmem(skb); } EXPORT_SYMBOL(__kfree_skb); @@ -986,7 +1036,10 @@ bool __kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason) if (unlikely(!skb_unref(skb))) return false; - DEBUG_NET_WARN_ON_ONCE(reason <= 0 || reason >= SKB_DROP_REASON_MAX); + DEBUG_NET_WARN_ON_ONCE(reason == SKB_NOT_DROPPED_YET || + u32_get_bits(reason, + SKB_DROP_REASON_SUBSYS_MASK) >= + SKB_DROP_REASON_SUBSYS_NUM); if (reason == SKB_CONSUMED) trace_consume_skb(skb, __builtin_return_address(0)); @@ -1029,7 +1082,7 @@ static void kfree_skb_add_bulk(struct sk_buff *skb, return; } - skb_release_all(skb, reason); + skb_release_all(skb, reason, false); sa->skb_array[sa->skb_count++] = skb; if (unlikely(sa->skb_count == KFREE_SKB_BULK_SIZE)) { @@ -1203,7 +1256,7 @@ EXPORT_SYMBOL(consume_skb); void __consume_stateless_skb(struct sk_buff *skb) { trace_consume_skb(skb, __builtin_return_address(0)); - skb_release_data(skb, SKB_CONSUMED); + skb_release_data(skb, SKB_CONSUMED, false); kfree_skbmem(skb); } @@ -1226,9 +1279,9 @@ static void napi_skb_cache_put(struct sk_buff *skb) } } -void __kfree_skb_defer(struct sk_buff *skb) +void __napi_kfree_skb(struct sk_buff *skb, enum skb_drop_reason reason) { - skb_release_all(skb, SKB_DROP_REASON_NOT_SPECIFIED); + skb_release_all(skb, reason, true); napi_skb_cache_put(skb); } @@ -1266,7 +1319,7 @@ void napi_consume_skb(struct sk_buff *skb, int budget) return; } - skb_release_all(skb, SKB_CONSUMED); + skb_release_all(skb, SKB_CONSUMED, !!budget); napi_skb_cache_put(skb); } EXPORT_SYMBOL(napi_consume_skb); @@ -1397,7 +1450,7 @@ EXPORT_SYMBOL_GPL(alloc_skb_for_msg); */ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src) { - skb_release_all(dst, SKB_CONSUMED); + skb_release_all(dst, SKB_CONSUMED, false); return __skb_clone(dst, src); } EXPORT_SYMBOL_GPL(skb_morph); @@ -2020,9 +2073,9 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, if (skb_has_frag_list(skb)) skb_clone_fraglist(skb); - skb_release_data(skb, SKB_CONSUMED); + skb_release_data(skb, SKB_CONSUMED, false); } else { - skb_free_head(skb); + skb_free_head(skb, false); } off = (data + nhead) - skb->head; @@ -5162,6 +5215,9 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb, skb = alloc_skb(0, GFP_ATOMIC); } else { skb = skb_clone(orig_skb, GFP_ATOMIC); + + if (skb_orphan_frags_rx(skb, GFP_ATOMIC)) + return; } if (!skb) return; @@ -5189,6 +5245,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb, } EXPORT_SYMBOL_GPL(skb_tstamp_tx); +#ifdef CONFIG_WIRELESS void skb_complete_wifi_ack(struct sk_buff *skb, bool acked) { struct sock *sk = skb->sk; @@ -5214,6 +5271,7 @@ void skb_complete_wifi_ack(struct sk_buff *skb, bool acked) kfree_skb(skb); } EXPORT_SYMBOL_GPL(skb_complete_wifi_ack); +#endif /* CONFIG_WIRELESS */ /** * skb_partial_csum_set - set up and verify partial csum values for packet @@ -5599,18 +5657,18 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, if (skb_cloned(to)) return false; - /* In general, avoid mixing slab allocated and page_pool allocated - * pages within the same SKB. However when @to is not pp_recycle and - * @from is cloned, we can transition frag pages from page_pool to - * reference counted. - * - * On the other hand, don't allow coalescing two pp_recycle SKBs if - * @from is cloned, in case the SKB is using page_pool fragment + /* In general, avoid mixing page_pool and non-page_pool allocated + * pages within the same SKB. Additionally avoid dealing with clones + * with page_pool pages, in case the SKB is using page_pool fragment * references (PP_FLAG_PAGE_FRAG). Since we only take full page * references for cloned SKBs at the moment that would result in * inconsistent reference counts. + * In theory we could take full references if @from is cloned and + * !@to->pp_recycle but its tricky (due to potential race with + * the clone disappearing) and rare, so not worth dealing with. */ - if (to->pp_recycle != (from->pp_recycle && !skb_cloned(from))) + if (to->pp_recycle != from->pp_recycle || + (from->pp_recycle && skb_cloned(from))) return false; if (len <= skb_tailroom(to)) { @@ -5941,7 +5999,6 @@ EXPORT_SYMBOL(skb_ensure_writable); */ int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci) { - struct vlan_hdr *vhdr; int offset = skb->data - skb_mac_header(skb); int err; @@ -5957,13 +6014,8 @@ int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci) skb_postpull_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN); - vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN); - *vlan_tci = ntohs(vhdr->h_vlan_TCI); - - memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN); - __skb_pull(skb, VLAN_HLEN); + vlan_remove_tag(skb, vlan_tci); - vlan_set_encap_proto(skb, vhdr); skb->mac_header += VLAN_HLEN; if (skb_network_offset(skb) < ETH_HLEN) @@ -6391,12 +6443,12 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off, skb_frag_ref(skb, i); if (skb_has_frag_list(skb)) skb_clone_fraglist(skb); - skb_release_data(skb, SKB_CONSUMED); + skb_release_data(skb, SKB_CONSUMED, false); } else { /* we can reuse existing recount- all we did was * relocate values */ - skb_free_head(skb); + skb_free_head(skb, false); } skb->head = data; @@ -6531,7 +6583,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, skb_kfree_head(data, size); return -ENOMEM; } - skb_release_data(skb, SKB_CONSUMED); + skb_release_data(skb, SKB_CONSUMED, false); skb->head = data; skb->head_frag = 0; @@ -6815,7 +6867,6 @@ void skb_attempt_defer_free(struct sk_buff *skb) { int cpu = skb->alloc_cpu; struct softnet_data *sd; - unsigned long flags; unsigned int defer_max; bool kick; @@ -6826,12 +6877,15 @@ nodefer: __kfree_skb(skb); return; } + DEBUG_NET_WARN_ON_ONCE(skb_dst(skb)); + DEBUG_NET_WARN_ON_ONCE(skb->destructor); + sd = &per_cpu(softnet_data, cpu); defer_max = READ_ONCE(sysctl_skb_defer_max); if (READ_ONCE(sd->defer_count) >= defer_max) goto nodefer; - spin_lock_irqsave(&sd->defer_lock, flags); + spin_lock_bh(&sd->defer_lock); /* Send an IPI every time queue reaches half capacity. */ kick = sd->defer_count == (defer_max >> 1); /* Paired with the READ_ONCE() few lines above */ @@ -6840,7 +6894,7 @@ nodefer: __kfree_skb(skb); skb->next = sd->defer_list; /* Paired with READ_ONCE() in skb_defer_free_flush() */ WRITE_ONCE(sd->defer_list, skb); - spin_unlock_irqrestore(&sd->defer_lock, flags); + spin_unlock_bh(&sd->defer_lock); /* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU * if we are unlucky enough (this seems very unlikely). |