diff options
Diffstat (limited to 'net/mptcp/protocol.c')
-rw-r--r-- | net/mptcp/protocol.c | 115 |
1 files changed, 40 insertions, 75 deletions
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index cc21fafd9726..da4257504fad 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -167,8 +167,8 @@ static bool mptcp_ooo_try_coalesce(struct mptcp_sock *msk, struct sk_buff *to, static void __mptcp_rmem_reclaim(struct sock *sk, int amount) { - amount >>= SK_MEM_QUANTUM_SHIFT; - mptcp_sk(sk)->rmem_fwd_alloc -= amount << SK_MEM_QUANTUM_SHIFT; + amount >>= PAGE_SHIFT; + mptcp_sk(sk)->rmem_fwd_alloc -= amount << PAGE_SHIFT; __sk_mem_reduce_allocated(sk, amount); } @@ -181,8 +181,8 @@ static void mptcp_rmem_uncharge(struct sock *sk, int size) reclaimable = msk->rmem_fwd_alloc - sk_unused_reserved_mem(sk); /* see sk_mem_uncharge() for the rationale behind the following schema */ - if (unlikely(reclaimable >= SK_RECLAIM_THRESHOLD)) - __mptcp_rmem_reclaim(sk, SK_RECLAIM_CHUNK); + if (unlikely(reclaimable >= PAGE_SIZE)) + __mptcp_rmem_reclaim(sk, reclaimable); } static void mptcp_rfree(struct sk_buff *skb) @@ -323,20 +323,16 @@ static bool mptcp_rmem_schedule(struct sock *sk, struct sock *ssk, int size) struct mptcp_sock *msk = mptcp_sk(sk); int amt, amount; - if (size < msk->rmem_fwd_alloc) + if (size <= msk->rmem_fwd_alloc) return true; + size -= msk->rmem_fwd_alloc; amt = sk_mem_pages(size); - amount = amt << SK_MEM_QUANTUM_SHIFT; - msk->rmem_fwd_alloc += amount; - if (!__sk_mem_raise_allocated(sk, size, amt, SK_MEM_RECV)) { - if (ssk->sk_forward_alloc < amount) { - msk->rmem_fwd_alloc -= amount; - return false; - } + amount = amt << PAGE_SHIFT; + if (!__sk_mem_raise_allocated(sk, size, amt, SK_MEM_RECV)) + return false; - ssk->sk_forward_alloc -= amount; - } + msk->rmem_fwd_alloc += amount; return true; } @@ -512,7 +508,7 @@ void __mptcp_subflow_send_ack(struct sock *ssk) tcp_send_ack(ssk); } -void mptcp_subflow_send_ack(struct sock *ssk) +static void mptcp_subflow_send_ack(struct sock *ssk) { bool slow; @@ -971,25 +967,6 @@ static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk, df->data_seq + df->data_len == msk->write_seq; } -static void __mptcp_mem_reclaim_partial(struct sock *sk) -{ - int reclaimable = mptcp_sk(sk)->rmem_fwd_alloc - sk_unused_reserved_mem(sk); - - lockdep_assert_held_once(&sk->sk_lock.slock); - - if (reclaimable > SK_MEM_QUANTUM) - __mptcp_rmem_reclaim(sk, reclaimable - 1); - - sk_mem_reclaim_partial(sk); -} - -static void mptcp_mem_reclaim_partial(struct sock *sk) -{ - mptcp_data_lock(sk); - __mptcp_mem_reclaim_partial(sk); - mptcp_data_unlock(sk); -} - static void dfrag_uncharge(struct sock *sk, int len) { sk_mem_uncharge(sk, len); @@ -1009,7 +986,6 @@ static void __mptcp_clean_una(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_data_frag *dtmp, *dfrag; - bool cleaned = false; u64 snd_una; /* on fallback we just need to ignore snd_una, as this is really @@ -1032,7 +1008,6 @@ static void __mptcp_clean_una(struct sock *sk) } dfrag_clear(sk, dfrag); - cleaned = true; } dfrag = mptcp_rtx_head(sk); @@ -1054,7 +1029,6 @@ static void __mptcp_clean_una(struct sock *sk) dfrag->already_sent -= delta; dfrag_uncharge(sk, delta); - cleaned = true; } /* all retransmitted data acked, recovery completed */ @@ -1062,9 +1036,6 @@ static void __mptcp_clean_una(struct sock *sk) msk->recovery = false; out: - if (cleaned && tcp_under_memory_pressure(sk)) - __mptcp_mem_reclaim_partial(sk); - if (snd_una == READ_ONCE(msk->snd_nxt) && snd_una == READ_ONCE(msk->write_seq)) { if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk)) @@ -1216,12 +1187,6 @@ static struct sk_buff *mptcp_alloc_tx_skb(struct sock *sk, struct sock *ssk, boo { gfp_t gfp = data_lock_held ? GFP_ATOMIC : sk->sk_allocation; - if (unlikely(tcp_under_memory_pressure(sk))) { - if (data_lock_held) - __mptcp_mem_reclaim_partial(sk); - else - mptcp_mem_reclaim_partial(sk); - } return __mptcp_alloc_tx_skb(sk, ssk, gfp); } @@ -1275,6 +1240,9 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk, info->limit > dfrag->data_len)) return 0; + if (unlikely(!__tcp_can_send(ssk))) + return -EAGAIN; + /* compute send limit */ info->mss_now = tcp_send_mss(ssk, &info->size_goal, info->flags); copy = info->size_goal; @@ -1448,7 +1416,8 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) if (__mptcp_check_fallback(msk)) { if (!msk->first) return NULL; - return sk_stream_memory_free(msk->first) ? msk->first : NULL; + return __tcp_can_send(msk->first) && + sk_stream_memory_free(msk->first) ? msk->first : NULL; } /* re-use last subflow, if the burst allow that */ @@ -1599,6 +1568,8 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags) ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info); if (ret <= 0) { + if (ret == -EAGAIN) + continue; mptcp_push_release(ssk, &info); goto out; } @@ -1908,7 +1879,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) if (msk->rcvq_space.copied <= msk->rcvq_space.space) goto new_measure; - if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf && + if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { int rcvmem, rcvbuf; u64 rcvwin, grow; @@ -1926,7 +1897,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) do_div(rcvwin, advmss); rcvbuf = min_t(u64, rcvwin * rcvmem, - sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); + READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])); if (rcvbuf > sk->sk_rcvbuf) { u32 window_clamp; @@ -2669,8 +2640,8 @@ static int mptcp_init_sock(struct sock *sk) mptcp_ca_reset(sk); sk_sockets_allocated_inc(sk); - sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1]; - sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1]; + sk->sk_rcvbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]); + sk->sk_sndbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]); return 0; } @@ -2804,30 +2775,16 @@ static void __mptcp_wr_shutdown(struct sock *sk) static void __mptcp_destroy_sock(struct sock *sk) { - struct mptcp_subflow_context *subflow, *tmp; struct mptcp_sock *msk = mptcp_sk(sk); - LIST_HEAD(conn_list); pr_debug("msk=%p", msk); might_sleep(); - /* join list will be eventually flushed (with rst) at sock lock release time*/ - list_splice_init(&msk->conn_list, &conn_list); - mptcp_stop_timer(sk); sk_stop_timer(sk, &sk->sk_timer); msk->pm.status = 0; - /* clears msk->subflow, allowing the following loop to close - * even the initial subflow - */ - mptcp_dispose_initial_subflow(msk); - list_for_each_entry_safe(subflow, tmp, &conn_list, node) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - __mptcp_close_ssk(sk, ssk, subflow, 0); - } - sk->sk_prot->destroy(sk); WARN_ON_ONCE(msk->rmem_fwd_alloc); @@ -2919,24 +2876,20 @@ static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk) static int mptcp_disconnect(struct sock *sk, int flags) { - struct mptcp_subflow_context *subflow; struct mptcp_sock *msk = mptcp_sk(sk); inet_sk_state_store(sk, TCP_CLOSE); - mptcp_for_each_subflow(msk, subflow) { - struct sock *ssk = mptcp_subflow_tcp_sock(subflow); - - __mptcp_close_ssk(sk, ssk, subflow, MPTCP_CF_FASTCLOSE); - } - mptcp_stop_timer(sk); sk_stop_timer(sk, &sk->sk_timer); if (mptcp_sk(sk)->token) mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL); - mptcp_destroy_common(msk); + /* msk->subflow is still intact, the following will not free the first + * subflow + */ + mptcp_destroy_common(msk, MPTCP_CF_FASTCLOSE); msk->last_snd = NULL; WRITE_ONCE(msk->flags, 0); msk->cb_flags = 0; @@ -3086,12 +3039,17 @@ out: return newsk; } -void mptcp_destroy_common(struct mptcp_sock *msk) +void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags) { + struct mptcp_subflow_context *subflow, *tmp; struct sock *sk = (struct sock *)msk; __mptcp_clear_xmit(sk); + /* join list will be eventually flushed (with rst) at sock lock release time */ + list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) + __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow), subflow, flags); + /* move to sk_receive_queue, sk_stream_kill_queues will purge it */ mptcp_data_lock(sk); skb_queue_splice_tail_init(&msk->receive_queue, &sk->sk_receive_queue); @@ -3113,7 +3071,11 @@ static void mptcp_destroy(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); - mptcp_destroy_common(msk); + /* clears msk->subflow, allowing the following to close + * even the initial subflow + */ + mptcp_dispose_initial_subflow(msk); + mptcp_destroy_common(msk, 0); sk_sockets_allocated_dec(sk); } @@ -3464,7 +3426,10 @@ static struct proto mptcp_prot = { .get_port = mptcp_get_port, .forward_alloc_get = mptcp_forward_alloc_get, .sockets_allocated = &mptcp_sockets_allocated, + .memory_allocated = &tcp_memory_allocated, + .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, + .memory_pressure = &tcp_memory_pressure, .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), |