diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/dev.c | 19 | ||||
-rw-r--r-- | net/core/filter.c | 14 | ||||
-rw-r--r-- | net/core/flow_dissector.c | 2 | ||||
-rw-r--r-- | net/core/flow_offload.c | 22 | ||||
-rw-r--r-- | net/core/netpoll.c | 6 | ||||
-rw-r--r-- | net/core/skbuff.c | 19 | ||||
-rw-r--r-- | net/core/skmsg.c | 4 | ||||
-rw-r--r-- | net/core/sock.c | 50 | ||||
-rw-r--r-- | net/core/sock_diag.c | 3 | ||||
-rw-r--r-- | net/core/sock_map.c | 22 | ||||
-rw-r--r-- | net/core/stream.c | 16 |
11 files changed, 119 insertions, 58 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index fc676b2610e3..5156c0edebe8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4374,12 +4374,17 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb, act = bpf_prog_run_xdp(xdp_prog, xdp); + /* check if bpf_xdp_adjust_head was used */ off = xdp->data - orig_data; - if (off > 0) - __skb_pull(skb, off); - else if (off < 0) - __skb_push(skb, -off); - skb->mac_header += off; + if (off) { + if (off > 0) + __skb_pull(skb, off); + else if (off < 0) + __skb_push(skb, -off); + + skb->mac_header += off; + skb_reset_network_header(skb); + } /* check if bpf_xdp_adjust_tail was used. it can only "shrink" * pckt. @@ -8753,6 +8758,8 @@ int register_netdevice(struct net_device *dev) ret = notifier_to_errno(ret); if (ret) { rollback_registered(dev); + rcu_barrier(); + dev->reg_state = NETREG_UNREGISTERED; } /* @@ -9701,6 +9708,8 @@ static void __net_exit default_device_exit(struct net *net) /* Push remaining network devices to init_net */ snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex); + if (__dev_get_by_name(&init_net, fb_name)) + snprintf(fb_name, IFNAMSIZ, "dev%%d"); err = dev_change_net_namespace(dev, &init_net, fb_name); if (err) { pr_emerg("%s: failed to move %s to init_net: %d\n", diff --git a/net/core/filter.c b/net/core/filter.c index 4e2a79b2fd77..4c6a252d4212 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -7455,12 +7455,12 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, case offsetof(struct __sk_buff, gso_segs): /* si->dst_reg = skb_shinfo(SKB); */ #ifdef NET_SKBUFF_DATA_USES_OFFSET - *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head), - si->dst_reg, si->src_reg, - offsetof(struct sk_buff, head)); *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end), BPF_REG_AX, si->src_reg, offsetof(struct sk_buff, end)); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, head), + si->dst_reg, si->src_reg, + offsetof(struct sk_buff, head)); *insn++ = BPF_ALU64_REG(BPF_ADD, si->dst_reg, BPF_REG_AX); #else *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, end), @@ -8757,13 +8757,13 @@ sk_reuseport_is_valid_access(int off, int size, return size == size_default; /* Fields that allow narrowing */ - case offsetof(struct sk_reuseport_md, eth_protocol): + case bpf_ctx_range(struct sk_reuseport_md, eth_protocol): if (size < FIELD_SIZEOF(struct sk_buff, protocol)) return false; /* fall through */ - case offsetof(struct sk_reuseport_md, ip_protocol): - case offsetof(struct sk_reuseport_md, bind_inany): - case offsetof(struct sk_reuseport_md, len): + case bpf_ctx_range(struct sk_reuseport_md, ip_protocol): + case bpf_ctx_range(struct sk_reuseport_md, bind_inany): + case bpf_ctx_range(struct sk_reuseport_md, len): bpf_ctx_record_field_size(info, size_default); return bpf_ctx_narrow_access_ok(off, size, size_default); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 3e6fedb57bc1..2470b4b404e6 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -142,8 +142,8 @@ int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr) mutex_unlock(&flow_dissector_mutex); return -ENOENT; } - bpf_prog_put(attached); RCU_INIT_POINTER(net->flow_dissector_prog, NULL); + bpf_prog_put(attached); mutex_unlock(&flow_dissector_mutex); return 0; } diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index 76f8db3841d7..d63b970784dc 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -165,7 +165,7 @@ void flow_rule_match_enc_opts(const struct flow_rule *rule, } EXPORT_SYMBOL(flow_rule_match_enc_opts); -struct flow_block_cb *flow_block_cb_alloc(struct net *net, tc_setup_cb_t *cb, +struct flow_block_cb *flow_block_cb_alloc(flow_setup_cb_t *cb, void *cb_ident, void *cb_priv, void (*release)(void *cb_priv)) { @@ -175,7 +175,6 @@ struct flow_block_cb *flow_block_cb_alloc(struct net *net, tc_setup_cb_t *cb, if (!block_cb) return ERR_PTR(-ENOMEM); - block_cb->net = net; block_cb->cb = cb; block_cb->cb_ident = cb_ident; block_cb->cb_priv = cb_priv; @@ -194,14 +193,13 @@ void flow_block_cb_free(struct flow_block_cb *block_cb) } EXPORT_SYMBOL(flow_block_cb_free); -struct flow_block_cb *flow_block_cb_lookup(struct flow_block_offload *f, - tc_setup_cb_t *cb, void *cb_ident) +struct flow_block_cb *flow_block_cb_lookup(struct flow_block *block, + flow_setup_cb_t *cb, void *cb_ident) { struct flow_block_cb *block_cb; - list_for_each_entry(block_cb, f->driver_block_list, driver_list) { - if (block_cb->net == f->net && - block_cb->cb == cb && + list_for_each_entry(block_cb, &block->cb_list, list) { + if (block_cb->cb == cb && block_cb->cb_ident == cb_ident) return block_cb; } @@ -228,7 +226,7 @@ unsigned int flow_block_cb_decref(struct flow_block_cb *block_cb) } EXPORT_SYMBOL(flow_block_cb_decref); -bool flow_block_cb_is_busy(tc_setup_cb_t *cb, void *cb_ident, +bool flow_block_cb_is_busy(flow_setup_cb_t *cb, void *cb_ident, struct list_head *driver_block_list) { struct flow_block_cb *block_cb; @@ -245,7 +243,8 @@ EXPORT_SYMBOL(flow_block_cb_is_busy); int flow_block_cb_setup_simple(struct flow_block_offload *f, struct list_head *driver_block_list, - tc_setup_cb_t *cb, void *cb_ident, void *cb_priv, + flow_setup_cb_t *cb, + void *cb_ident, void *cb_priv, bool ingress_only) { struct flow_block_cb *block_cb; @@ -261,8 +260,7 @@ int flow_block_cb_setup_simple(struct flow_block_offload *f, if (flow_block_cb_is_busy(cb, cb_ident, driver_block_list)) return -EBUSY; - block_cb = flow_block_cb_alloc(f->net, cb, cb_ident, - cb_priv, NULL); + block_cb = flow_block_cb_alloc(cb, cb_ident, cb_priv, NULL); if (IS_ERR(block_cb)) return PTR_ERR(block_cb); @@ -270,7 +268,7 @@ int flow_block_cb_setup_simple(struct flow_block_offload *f, list_add_tail(&block_cb->driver_list, driver_block_list); return 0; case FLOW_BLOCK_UNBIND: - block_cb = flow_block_cb_lookup(f, cb, cb_ident); + block_cb = flow_block_cb_lookup(f->block, cb, cb_ident); if (!block_cb) return -ENOENT; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 2cf27da1baeb..849380a622ef 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -122,7 +122,7 @@ static void queue_process(struct work_struct *work) txq = netdev_get_tx_queue(dev, q_index); HARD_TX_LOCK(dev, txq, smp_processor_id()); if (netif_xmit_frozen_or_stopped(txq) || - netpoll_start_xmit(skb, dev, txq) != NETDEV_TX_OK) { + !dev_xmit_complete(netpoll_start_xmit(skb, dev, txq))) { skb_queue_head(&npinfo->txq, skb); HARD_TX_UNLOCK(dev, txq); local_irq_restore(flags); @@ -335,7 +335,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, HARD_TX_UNLOCK(dev, txq); - if (status == NETDEV_TX_OK) + if (dev_xmit_complete(status)) break; } @@ -352,7 +352,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, } - if (status != NETDEV_TX_OK) { + if (!dev_xmit_complete(status)) { skb_queue_tail(&npinfo->txq, skb); schedule_delayed_work(&npinfo->tx_work,0); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 0338820ee0ec..982d8d12830e 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3664,6 +3664,25 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, int pos; int dummy; + if (list_skb && !list_skb->head_frag && skb_headlen(list_skb) && + (skb_shinfo(head_skb)->gso_type & SKB_GSO_DODGY)) { + /* gso_size is untrusted, and we have a frag_list with a linear + * non head_frag head. + * + * (we assume checking the first list_skb member suffices; + * i.e if either of the list_skb members have non head_frag + * head, then the first one has too). + * + * If head_skb's headlen does not fit requested gso_size, it + * means that the frag_list members do NOT terminate on exact + * gso_size boundaries. Hence we cannot perform skb_frag_t page + * sharing. Therefore we must fallback to copying the frag_list + * skbs; we do so by disabling SG. + */ + if (mss != GSO_BY_FRAGS && mss != skb_headlen(head_skb)) + features &= ~NETIF_F_SG; + } + __skb_push(head_skb, doffset); proto = skb_network_protocol(head_skb, &dummy); if (unlikely(!proto)) diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 93bffaad2135..6832eeb4b785 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -585,12 +585,12 @@ EXPORT_SYMBOL_GPL(sk_psock_destroy); void sk_psock_drop(struct sock *sk, struct sk_psock *psock) { - rcu_assign_sk_user_data(sk, NULL); sk_psock_cork_free(psock); sk_psock_zap_ingress(psock); - sk_psock_restore_proto(sk, psock); write_lock_bh(&sk->sk_callback_lock); + sk_psock_restore_proto(sk, psock); + rcu_assign_sk_user_data(sk, NULL); if (psock->progs.skb_parser) sk_psock_stop_strp(sk, psock); write_unlock_bh(&sk->sk_callback_lock); diff --git a/net/core/sock.c b/net/core/sock.c index d57b0cc995a0..545fac19a711 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1992,6 +1992,19 @@ void skb_set_owner_w(struct sk_buff *skb, struct sock *sk) } EXPORT_SYMBOL(skb_set_owner_w); +static bool can_skb_orphan_partial(const struct sk_buff *skb) +{ +#ifdef CONFIG_TLS_DEVICE + /* Drivers depend on in-order delivery for crypto offload, + * partial orphan breaks out-of-order-OK logic. + */ + if (skb->decrypted) + return false; +#endif + return (skb->destructor == sock_wfree || + (IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree)); +} + /* This helper is used by netem, as it can hold packets in its * delay queue. We want to allow the owner socket to send more * packets, as if they were already TX completed by a typical driver. @@ -2003,11 +2016,7 @@ void skb_orphan_partial(struct sk_buff *skb) if (skb_is_tcp_pure_ack(skb)) return; - if (skb->destructor == sock_wfree -#ifdef CONFIG_INET - || skb->destructor == tcp_wfree -#endif - ) { + if (can_skb_orphan_partial(skb)) { struct sock *sk = skb->sk; if (refcount_inc_not_zero(&sk->sk_refcnt)) { @@ -3278,16 +3287,17 @@ static __init int net_inuse_init(void) core_initcall(net_inuse_init); -static void assign_proto_idx(struct proto *prot) +static int assign_proto_idx(struct proto *prot) { prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR); if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) { pr_err("PROTO_INUSE_NR exhausted\n"); - return; + return -ENOSPC; } set_bit(prot->inuse_idx, proto_inuse_idx); + return 0; } static void release_proto_idx(struct proto *prot) @@ -3296,8 +3306,9 @@ static void release_proto_idx(struct proto *prot) clear_bit(prot->inuse_idx, proto_inuse_idx); } #else -static inline void assign_proto_idx(struct proto *prot) +static inline int assign_proto_idx(struct proto *prot) { + return 0; } static inline void release_proto_idx(struct proto *prot) @@ -3346,6 +3357,8 @@ static int req_prot_init(const struct proto *prot) int proto_register(struct proto *prot, int alloc_slab) { + int ret = -ENOBUFS; + if (alloc_slab) { prot->slab = kmem_cache_create_usercopy(prot->name, prot->obj_size, 0, @@ -3382,20 +3395,27 @@ int proto_register(struct proto *prot, int alloc_slab) } mutex_lock(&proto_list_mutex); + ret = assign_proto_idx(prot); + if (ret) { + mutex_unlock(&proto_list_mutex); + goto out_free_timewait_sock_slab_name; + } list_add(&prot->node, &proto_list); - assign_proto_idx(prot); mutex_unlock(&proto_list_mutex); - return 0; + return ret; out_free_timewait_sock_slab_name: - kfree(prot->twsk_prot->twsk_slab_name); + if (alloc_slab && prot->twsk_prot) + kfree(prot->twsk_prot->twsk_slab_name); out_free_request_sock_slab: - req_prot_cleanup(prot->rsk_prot); + if (alloc_slab) { + req_prot_cleanup(prot->rsk_prot); - kmem_cache_destroy(prot->slab); - prot->slab = NULL; + kmem_cache_destroy(prot->slab); + prot->slab = NULL; + } out: - return -ENOBUFS; + return ret; } EXPORT_SYMBOL(proto_register); diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index 3312a5849a97..c13ffbd33d8d 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -19,6 +19,7 @@ static const struct sock_diag_handler *sock_diag_handlers[AF_MAX]; static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh); static DEFINE_MUTEX(sock_diag_table_mutex); static struct workqueue_struct *broadcast_wq; +static atomic64_t cookie_gen; u64 sock_gen_cookie(struct sock *sk) { @@ -27,7 +28,7 @@ u64 sock_gen_cookie(struct sock *sk) if (res) return res; - res = atomic64_inc_return(&sock_net(sk)->cookie_gen); + res = atomic64_inc_return(&cookie_gen); atomic64_cmpxchg(&sk->sk_cookie, 0, res); } } diff --git a/net/core/sock_map.c b/net/core/sock_map.c index 52d4faeee18b..50916f9bc4f2 100644 --- a/net/core/sock_map.c +++ b/net/core/sock_map.c @@ -247,6 +247,8 @@ static void sock_map_free(struct bpf_map *map) raw_spin_unlock_bh(&stab->lock); rcu_read_unlock(); + synchronize_rcu(); + bpf_map_area_free(stab->sks); kfree(stab); } @@ -276,16 +278,20 @@ static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test, struct sock **psk) { struct sock *sk; + int err = 0; raw_spin_lock_bh(&stab->lock); sk = *psk; if (!sk_test || sk_test == sk) - *psk = NULL; + sk = xchg(psk, NULL); + + if (likely(sk)) + sock_map_unref(sk, psk); + else + err = -EINVAL; + raw_spin_unlock_bh(&stab->lock); - if (unlikely(!sk)) - return -EINVAL; - sock_map_unref(sk, psk); - return 0; + return err; } static void sock_map_delete_from_link(struct bpf_map *map, struct sock *sk, @@ -328,6 +334,7 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx, struct sock *sk, u64 flags) { struct bpf_stab *stab = container_of(map, struct bpf_stab, map); + struct inet_connection_sock *icsk = inet_csk(sk); struct sk_psock_link *link; struct sk_psock *psock; struct sock *osk; @@ -338,6 +345,8 @@ static int sock_map_update_common(struct bpf_map *map, u32 idx, return -EINVAL; if (unlikely(idx >= map->max_entries)) return -E2BIG; + if (unlikely(icsk->icsk_ulp_data)) + return -EINVAL; link = sk_psock_init_link(); if (!link) @@ -647,6 +656,7 @@ static int sock_hash_update_common(struct bpf_map *map, void *key, struct sock *sk, u64 flags) { struct bpf_htab *htab = container_of(map, struct bpf_htab, map); + struct inet_connection_sock *icsk = inet_csk(sk); u32 key_size = map->key_size, hash; struct bpf_htab_elem *elem, *elem_new; struct bpf_htab_bucket *bucket; @@ -657,6 +667,8 @@ static int sock_hash_update_common(struct bpf_map *map, void *key, WARN_ON_ONCE(!rcu_read_lock_held()); if (unlikely(flags > BPF_EXIST)) return -EINVAL; + if (unlikely(icsk->icsk_ulp_data)) + return -EINVAL; link = sk_psock_init_link(); if (!link) diff --git a/net/core/stream.c b/net/core/stream.c index e94bb02a5629..4f1d4aa5fb38 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -120,7 +120,6 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) int err = 0; long vm_wait = 0; long current_timeo = *timeo_p; - bool noblock = (*timeo_p ? false : true); DEFINE_WAIT_FUNC(wait, woken_wake_function); if (sk_stream_memory_free(sk)) @@ -133,11 +132,8 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) goto do_error; - if (!*timeo_p) { - if (noblock) - set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); - goto do_nonblock; - } + if (!*timeo_p) + goto do_eagain; if (signal_pending(current)) goto do_interrupted; sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); @@ -169,7 +165,13 @@ out: do_error: err = -EPIPE; goto out; -do_nonblock: +do_eagain: + /* Make sure that whenever EAGAIN is returned, EPOLLOUT event can + * be generated later. + * When TCP receives ACK packets that make room, tcp_check_space() + * only calls tcp_new_space() if SOCK_NOSPACE is set. + */ + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); err = -EAGAIN; goto out; do_interrupted: |