diff options
Diffstat (limited to 'net/core/skbuff.c')
-rw-r--r-- | net/core/skbuff.c | 136 |
1 files changed, 111 insertions, 25 deletions
diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 83f8cd8aa2d1..74149dc4ee31 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -88,6 +88,7 @@ #include <linux/textsearch.h> #include "dev.h" +#include "netmem_priv.h" #include "sock_destructor.h" #ifdef CONFIG_SKB_EXTENSIONS @@ -314,8 +315,8 @@ void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) fragsz = SKB_DATA_ALIGN(fragsz); local_lock_nested_bh(&napi_alloc_cache.bh_lock); - data = __page_frag_alloc_align(&nc->page, fragsz, GFP_ATOMIC, - align_mask); + data = __page_frag_alloc_align(&nc->page, fragsz, + GFP_ATOMIC | __GFP_NOWARN, align_mask); local_unlock_nested_bh(&napi_alloc_cache.bh_lock); return data; @@ -330,7 +331,8 @@ void *__netdev_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) struct page_frag_cache *nc = this_cpu_ptr(&netdev_alloc_cache); fragsz = SKB_DATA_ALIGN(fragsz); - data = __page_frag_alloc_align(nc, fragsz, GFP_ATOMIC, + data = __page_frag_alloc_align(nc, fragsz, + GFP_ATOMIC | __GFP_NOWARN, align_mask); } else { local_bh_disable(); @@ -349,7 +351,7 @@ static struct sk_buff *napi_skb_cache_get(void) local_lock_nested_bh(&napi_alloc_cache.bh_lock); if (unlikely(!nc->skb_count)) { nc->skb_count = kmem_cache_alloc_bulk(net_hotdata.skbuff_cache, - GFP_ATOMIC, + GFP_ATOMIC | __GFP_NOWARN, NAPI_SKB_CACHE_BULK, nc->skb_cache); if (unlikely(!nc->skb_count)) { @@ -418,7 +420,8 @@ struct sk_buff *slab_build_skb(void *data) struct sk_buff *skb; unsigned int size; - skb = kmem_cache_alloc(net_hotdata.skbuff_cache, GFP_ATOMIC); + skb = kmem_cache_alloc(net_hotdata.skbuff_cache, + GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) return NULL; @@ -469,7 +472,8 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size) { struct sk_buff *skb; - skb = kmem_cache_alloc(net_hotdata.skbuff_cache, GFP_ATOMIC); + skb = kmem_cache_alloc(net_hotdata.skbuff_cache, + GFP_ATOMIC | __GFP_NOWARN); if (unlikely(!skb)) return NULL; @@ -917,9 +921,9 @@ static void skb_clone_fraglist(struct sk_buff *skb) skb_get(list); } -static bool is_pp_page(struct page *page) +static bool is_pp_netmem(netmem_ref netmem) { - return (page->pp_magic & ~0x3UL) == PP_SIGNATURE; + return (netmem_get_pp_magic(netmem) & ~0x3UL) == PP_SIGNATURE; } int skb_pp_cow_data(struct page_pool *pool, struct sk_buff **pskb, @@ -1017,9 +1021,7 @@ EXPORT_SYMBOL(skb_cow_data_for_xdp); #if IS_ENABLED(CONFIG_PAGE_POOL) bool napi_pp_put_page(netmem_ref netmem) { - struct page *page = netmem_to_page(netmem); - - page = compound_head(page); + netmem = netmem_compound_head(netmem); /* page->pp_magic is OR'ed with PP_SIGNATURE after the allocation * in order to preserve any existing bits, such as bit 0 for the @@ -1028,10 +1030,10 @@ bool napi_pp_put_page(netmem_ref netmem) * and page_is_pfmemalloc() is checked in __page_pool_put_page() * to avoid recycling the pfmemalloc page. */ - if (unlikely(!is_pp_page(page))) + if (unlikely(!is_pp_netmem(netmem))) return false; - page_pool_put_full_netmem(page->pp, page_to_netmem(page), false); + page_pool_put_full_netmem(netmem_get_pp(netmem), netmem, false); return true; } @@ -1058,7 +1060,7 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data) static int skb_pp_frag_ref(struct sk_buff *skb) { struct skb_shared_info *shinfo; - struct page *head_page; + netmem_ref head_netmem; int i; if (!skb->pp_recycle) @@ -1067,11 +1069,11 @@ static int skb_pp_frag_ref(struct sk_buff *skb) shinfo = skb_shinfo(skb); for (i = 0; i < shinfo->nr_frags; i++) { - head_page = compound_head(skb_frag_page(&shinfo->frags[i])); - if (likely(is_pp_page(head_page))) - page_pool_ref_page(head_page); + head_netmem = netmem_compound_head(shinfo->frags[i].netmem); + if (likely(is_pp_netmem(head_netmem))) + page_pool_ref_netmem(head_netmem); else - page_ref_inc(head_page); + page_ref_inc(netmem_to_page(head_netmem)); } return 0; } @@ -1369,6 +1371,14 @@ void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt) struct page *p; u8 *vaddr; + if (skb_frag_is_net_iov(frag)) { + printk("%sskb frag %d: not readable\n", level, i); + len -= skb_frag_size(frag); + if (!len) + break; + continue; + } + skb_frag_foreach_page(frag, skb_frag_off(frag), skb_frag_size(frag), p, p_off, p_len, copied) { @@ -1962,6 +1972,9 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) if (skb_shared(skb) || skb_unclone(skb, gfp_mask)) return -EINVAL; + if (!skb_frags_readable(skb)) + return -EFAULT; + if (!num_frags) goto release; @@ -2135,6 +2148,9 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) unsigned int size; int headerlen; + if (!skb_frags_readable(skb)) + return NULL; + if (WARN_ON_ONCE(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)) return NULL; @@ -2473,6 +2489,9 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, struct sk_buff *n; int oldheadroom; + if (!skb_frags_readable(skb)) + return NULL; + if (WARN_ON_ONCE(skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST)) return NULL; @@ -2817,6 +2836,9 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta) */ int i, k, eat = (skb->tail + delta) - skb->end; + if (!skb_frags_readable(skb)) + return NULL; + if (eat > 0 || skb_cloned(skb)) { if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0, GFP_ATOMIC)) @@ -2970,6 +2992,9 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) to += copy; } + if (!skb_frags_readable(skb)) + goto fault; + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; skb_frag_t *f = &skb_shinfo(skb)->frags[i]; @@ -3158,9 +3183,15 @@ static bool __skb_splice_bits(struct sk_buff *skb, struct pipe_inode_info *pipe, /* * then map the fragments */ + if (!skb_frags_readable(skb)) + return false; + for (seg = 0; seg < skb_shinfo(skb)->nr_frags; seg++) { const skb_frag_t *f = &skb_shinfo(skb)->frags[seg]; + if (WARN_ON_ONCE(!skb_frag_page(f))) + return false; + if (__splice_segment(skb_frag_page(f), skb_frag_off(f), skb_frag_size(f), offset, len, spd, false, sk, pipe)) @@ -3378,6 +3409,9 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len) from += copy; } + if (!skb_frags_readable(skb)) + goto fault; + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; int end; @@ -3457,6 +3491,9 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, pos = copy; } + if (WARN_ON_ONCE(!skb_frags_readable(skb))) + return 0; + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; @@ -3557,6 +3594,9 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, pos = copy; } + if (!skb_frags_readable(skb)) + return 0; + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { int end; @@ -4048,6 +4088,7 @@ static inline void skb_split_inside_header(struct sk_buff *skb, skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i]; skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags; + skb1->unreadable = skb->unreadable; skb_shinfo(skb)->nr_frags = 0; skb1->data_len = skb->data_len; skb1->len += skb1->data_len; @@ -4095,6 +4136,8 @@ static inline void skb_split_no_header(struct sk_buff *skb, pos += size; } skb_shinfo(skb1)->nr_frags = k; + + skb1->unreadable = skb->unreadable; } /** @@ -4169,8 +4212,7 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) /* Actual merge is delayed until the point when we know we can * commit all, so that we don't have to undo partial changes */ - if (!to || - !skb_can_coalesce(tgt, to, skb_frag_page(fragfrom), + if (!skb_can_coalesce(tgt, to, skb_frag_page(fragfrom), skb_frag_off(fragfrom))) { merge = -1; } else { @@ -4333,6 +4375,9 @@ next_skb: return block_limit - abs_offset; } + if (!skb_frags_readable(st->cur_skb)) + return 0; + if (st->frag_idx == 0 && !st->frag_data) st->stepped_offset += skb_headlen(st->cur_skb); @@ -4409,6 +4454,41 @@ void skb_abort_seq_read(struct skb_seq_state *st) } EXPORT_SYMBOL(skb_abort_seq_read); +/** + * skb_copy_seq_read() - copy from a skb_seq_state to a buffer + * @st: source skb_seq_state + * @offset: offset in source + * @to: destination buffer + * @len: number of bytes to copy + * + * Copy @len bytes from @offset bytes into the source @st to the destination + * buffer @to. `offset` should increase (or be unchanged) with each subsequent + * call to this function. If offset needs to decrease from the previous use `st` + * should be reset first. + * + * Return: 0 on success or -EINVAL if the copy ended early + */ +int skb_copy_seq_read(struct skb_seq_state *st, int offset, void *to, int len) +{ + const u8 *data; + u32 sqlen; + + for (;;) { + sqlen = skb_seq_read(offset, &data, st); + if (sqlen == 0) + return -EINVAL; + if (sqlen >= len) { + memcpy(to, data, len); + return 0; + } + memcpy(to, data, sqlen); + to += sqlen; + offset += sqlen; + len -= sqlen; + } +} +EXPORT_SYMBOL(skb_copy_seq_read); + #define TS_SKB_CB(state) ((struct skb_seq_state *) &((state)->cb)) static unsigned int skb_ts_get_next_block(unsigned int offset, const u8 **text, @@ -5161,7 +5241,7 @@ EXPORT_SYMBOL_GPL(skb_to_sgvec); * 3. sg_unmark_end * 4. skb_to_sgvec(payload2) * - * When mapping mutilple payload conditionally, skb_to_sgvec_nomark + * When mapping multiple payload conditionally, skb_to_sgvec_nomark * is more preferable. */ int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg, @@ -5945,7 +6025,10 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, if (to->pp_recycle != from->pp_recycle) return false; - if (len <= skb_tailroom(to)) { + if (skb_frags_readable(from) != skb_frags_readable(to)) + return false; + + if (len <= skb_tailroom(to) && skb_frags_readable(from)) { if (len) BUG_ON(skb_copy_bits(from, 0, skb_put(to, len), len)); *delta_truesize = 0; @@ -6019,7 +6102,7 @@ EXPORT_SYMBOL(skb_try_coalesce); * @skb: buffer to clean * @xnet: packet is crossing netns * - * skb_scrub_packet can be used after encapsulating or decapsulting a packet + * skb_scrub_packet can be used after encapsulating or decapsulating a packet * into/from a tunnel. Some information have to be cleared during these * operations. * skb_scrub_packet can also be used to clean a skb before injecting it in @@ -6122,6 +6205,9 @@ int skb_ensure_writable(struct sk_buff *skb, unsigned int write_len) if (!pskb_may_pull(skb, write_len)) return -ENOMEM; + if (!skb_frags_readable(skb)) + return -EFAULT; + if (!skb_cloned(skb) || skb_clone_writable(skb, write_len)) return 0; @@ -6241,7 +6327,7 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) return err; skb->protocol = skb->vlan_proto; - skb->mac_len += VLAN_HLEN; + skb->network_header -= VLAN_HLEN; skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN); } @@ -6801,7 +6887,7 @@ void skb_condense(struct sk_buff *skb) { if (skb->data_len) { if (skb->data_len > skb->end - skb->tail || - skb_cloned(skb)) + skb_cloned(skb) || !skb_frags_readable(skb)) return; /* Nice, we can free page frag(s) right now */ |