aboutsummaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/inet_fragment.c29
-rw-r--r--net/ipv4/ip_fragment.c12
-rw-r--r--net/ipv4/ip_output.c3
-rw-r--r--net/ipv4/ip_sockglue.c6
-rw-r--r--net/ipv4/ip_tunnel_core.c2
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c7
-rw-r--r--net/ipv4/netfilter/nf_nat_masquerade_ipv4.c38
-rw-r--r--net/ipv4/netfilter/nft_masq_ipv4.c4
-rw-r--r--net/ipv4/tcp_input.c31
-rw-r--r--net/ipv4/tcp_output.c6
-rw-r--r--net/ipv4/tcp_timer.c12
11 files changed, 100 insertions, 50 deletions
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index bcb11f3a27c0..760a9e52e02b 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -178,21 +178,22 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
}
static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
- void *arg)
+ void *arg,
+ struct inet_frag_queue **prev)
{
struct inet_frags *f = nf->f;
struct inet_frag_queue *q;
- int err;
q = inet_frag_alloc(nf, f, arg);
- if (!q)
+ if (!q) {
+ *prev = ERR_PTR(-ENOMEM);
return NULL;
-
+ }
mod_timer(&q->timer, jiffies + nf->timeout);
- err = rhashtable_insert_fast(&nf->rhashtable, &q->node,
- f->rhash_params);
- if (err < 0) {
+ *prev = rhashtable_lookup_get_insert_key(&nf->rhashtable, &q->key,
+ &q->node, f->rhash_params);
+ if (*prev) {
q->flags |= INET_FRAG_COMPLETE;
inet_frag_kill(q);
inet_frag_destroy(q);
@@ -204,22 +205,22 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
{
- struct inet_frag_queue *fq;
+ struct inet_frag_queue *fq = NULL, *prev;
if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh)
return NULL;
rcu_read_lock();
- fq = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params);
- if (fq) {
+ prev = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params);
+ if (!prev)
+ fq = inet_frag_create(nf, key, &prev);
+ if (prev && !IS_ERR(prev)) {
+ fq = prev;
if (!refcount_inc_not_zero(&fq->refcnt))
fq = NULL;
- rcu_read_unlock();
- return fq;
}
rcu_read_unlock();
-
- return inet_frag_create(nf, key);
+ return fq;
}
EXPORT_SYMBOL(inet_frag_find);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 9b0158fa431f..d6ee343fdb86 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -722,10 +722,14 @@ struct sk_buff *ip_check_defrag(struct net *net, struct sk_buff *skb, u32 user)
if (ip_is_fragment(&iph)) {
skb = skb_share_check(skb, GFP_ATOMIC);
if (skb) {
- if (!pskb_may_pull(skb, netoff + iph.ihl * 4))
- return skb;
- if (pskb_trim_rcsum(skb, netoff + len))
- return skb;
+ if (!pskb_may_pull(skb, netoff + iph.ihl * 4)) {
+ kfree_skb(skb);
+ return NULL;
+ }
+ if (pskb_trim_rcsum(skb, netoff + len)) {
+ kfree_skb(skb);
+ return NULL;
+ }
memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
if (ip_defrag(net, skb, user))
return NULL;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index c09219e7f230..5dbec21856f4 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -939,7 +939,7 @@ static int __ip_append_data(struct sock *sk,
unsigned int fraglen;
unsigned int fraggap;
unsigned int alloclen;
- unsigned int pagedlen = 0;
+ unsigned int pagedlen;
struct sk_buff *skb_prev;
alloc_new_skb:
skb_prev = skb;
@@ -956,6 +956,7 @@ alloc_new_skb:
if (datalen > mtu - fragheaderlen)
datalen = maxfraglen - fragheaderlen;
fraglen = datalen + fragheaderlen;
+ pagedlen = 0;
if ((flags & MSG_MORE) &&
!(rt->dst.dev->features&NETIF_F_SG))
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 26c36cccabdc..fffcc130900e 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1246,7 +1246,7 @@ int ip_setsockopt(struct sock *sk, int level,
return -ENOPROTOOPT;
err = do_ip_setsockopt(sk, level, optname, optval, optlen);
-#ifdef CONFIG_BPFILTER
+#if IS_ENABLED(CONFIG_BPFILTER_UMH)
if (optname >= BPFILTER_IPT_SO_SET_REPLACE &&
optname < BPFILTER_IPT_SET_MAX)
err = bpfilter_ip_set_sockopt(sk, optname, optval, optlen);
@@ -1559,7 +1559,7 @@ int ip_getsockopt(struct sock *sk, int level,
int err;
err = do_ip_getsockopt(sk, level, optname, optval, optlen, 0);
-#ifdef CONFIG_BPFILTER
+#if IS_ENABLED(CONFIG_BPFILTER_UMH)
if (optname >= BPFILTER_IPT_SO_GET_INFO &&
optname < BPFILTER_IPT_GET_MAX)
err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
@@ -1596,7 +1596,7 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname,
err = do_ip_getsockopt(sk, level, optname, optval, optlen,
MSG_CMSG_COMPAT);
-#ifdef CONFIG_BPFILTER
+#if IS_ENABLED(CONFIG_BPFILTER_UMH)
if (optname >= BPFILTER_IPT_SO_GET_INFO &&
optname < BPFILTER_IPT_GET_MAX)
err = bpfilter_ip_get_sockopt(sk, optname, optval, optlen);
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index dde671e97829..c248e0dccbe1 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -80,7 +80,7 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
iph->version = 4;
iph->ihl = sizeof(struct iphdr) >> 2;
- iph->frag_off = df;
+ iph->frag_off = ip_mtu_locked(&rt->dst) ? 0 : df;
iph->protocol = proto;
iph->tos = tos;
iph->daddr = dst;
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index ce1512b02cb2..fd3f9e8a74da 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -81,9 +81,12 @@ static int __init masquerade_tg_init(void)
int ret;
ret = xt_register_target(&masquerade_tg_reg);
+ if (ret)
+ return ret;
- if (ret == 0)
- nf_nat_masquerade_ipv4_register_notifier();
+ ret = nf_nat_masquerade_ipv4_register_notifier();
+ if (ret)
+ xt_unregister_target(&masquerade_tg_reg);
return ret;
}
diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
index a9d5e013e555..41327bb99093 100644
--- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
+++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c
@@ -147,28 +147,50 @@ static struct notifier_block masq_inet_notifier = {
.notifier_call = masq_inet_event,
};
-static atomic_t masquerade_notifier_refcount = ATOMIC_INIT(0);
+static int masq_refcnt;
+static DEFINE_MUTEX(masq_mutex);
-void nf_nat_masquerade_ipv4_register_notifier(void)
+int nf_nat_masquerade_ipv4_register_notifier(void)
{
+ int ret = 0;
+
+ mutex_lock(&masq_mutex);
/* check if the notifier was already set */
- if (atomic_inc_return(&masquerade_notifier_refcount) > 1)
- return;
+ if (++masq_refcnt > 1)
+ goto out_unlock;
/* Register for device down reports */
- register_netdevice_notifier(&masq_dev_notifier);
+ ret = register_netdevice_notifier(&masq_dev_notifier);
+ if (ret)
+ goto err_dec;
/* Register IP address change reports */
- register_inetaddr_notifier(&masq_inet_notifier);
+ ret = register_inetaddr_notifier(&masq_inet_notifier);
+ if (ret)
+ goto err_unregister;
+
+ mutex_unlock(&masq_mutex);
+ return ret;
+
+err_unregister:
+ unregister_netdevice_notifier(&masq_dev_notifier);
+err_dec:
+ masq_refcnt--;
+out_unlock:
+ mutex_unlock(&masq_mutex);
+ return ret;
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_register_notifier);
void nf_nat_masquerade_ipv4_unregister_notifier(void)
{
+ mutex_lock(&masq_mutex);
/* check if the notifier still has clients */
- if (atomic_dec_return(&masquerade_notifier_refcount) > 0)
- return;
+ if (--masq_refcnt > 0)
+ goto out_unlock;
unregister_netdevice_notifier(&masq_dev_notifier);
unregister_inetaddr_notifier(&masq_inet_notifier);
+out_unlock:
+ mutex_unlock(&masq_mutex);
}
EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4_unregister_notifier);
diff --git a/net/ipv4/netfilter/nft_masq_ipv4.c b/net/ipv4/netfilter/nft_masq_ipv4.c
index f1193e1e928a..6847de1d1db8 100644
--- a/net/ipv4/netfilter/nft_masq_ipv4.c
+++ b/net/ipv4/netfilter/nft_masq_ipv4.c
@@ -69,7 +69,9 @@ static int __init nft_masq_ipv4_module_init(void)
if (ret < 0)
return ret;
- nf_nat_masquerade_ipv4_register_notifier();
+ ret = nf_nat_masquerade_ipv4_register_notifier();
+ if (ret)
+ nft_unregister_expr(&nft_masq_ipv4_type);
return ret;
}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2868ef28ce52..a9d9555a973f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -579,10 +579,12 @@ static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
u32 delta_us;
- if (!delta)
- delta = 1;
- delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
- tcp_rcv_rtt_update(tp, delta_us, 0);
+ if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
+ if (!delta)
+ delta = 1;
+ delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+ tcp_rcv_rtt_update(tp, delta_us, 0);
+ }
}
}
@@ -2910,9 +2912,11 @@ static bool tcp_ack_update_rtt(struct sock *sk, const int flag,
if (seq_rtt_us < 0 && tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
flag & FLAG_ACKED) {
u32 delta = tcp_time_stamp(tp) - tp->rx_opt.rcv_tsecr;
- u32 delta_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
- seq_rtt_us = ca_rtt_us = delta_us;
+ if (likely(delta < INT_MAX / (USEC_PER_SEC / TCP_TS_HZ))) {
+ seq_rtt_us = delta * (USEC_PER_SEC / TCP_TS_HZ);
+ ca_rtt_us = seq_rtt_us;
+ }
}
rs->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet (or -1) */
if (seq_rtt_us < 0)
@@ -4268,7 +4272,7 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
* If the sack array is full, forget about the last one.
*/
if (this_sack >= TCP_NUM_SACKS) {
- if (tp->compressed_ack)
+ if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
tcp_send_ack(sk);
this_sack--;
tp->rx_opt.num_sacks--;
@@ -4363,6 +4367,7 @@ static bool tcp_try_coalesce(struct sock *sk,
if (TCP_SKB_CB(from)->has_rxtstamp) {
TCP_SKB_CB(to)->has_rxtstamp = true;
to->tstamp = from->tstamp;
+ skb_hwtstamps(to)->hwtstamp = skb_hwtstamps(from)->hwtstamp;
}
return true;
@@ -5188,7 +5193,17 @@ send_now:
if (!tcp_is_sack(tp) ||
tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)
goto send_now;
- tp->compressed_ack++;
+
+ if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
+ tp->compressed_ack_rcv_nxt = tp->rcv_nxt;
+ if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
+ NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
+ tp->compressed_ack - TCP_FASTRETRANS_THRESH);
+ tp->compressed_ack = 0;
+ }
+
+ if (++tp->compressed_ack <= TCP_FASTRETRANS_THRESH)
+ goto send_now;
if (hrtimer_is_queued(&tp->compressed_ack_timer))
return;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9c34b97d365d..3f510cad0b3e 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -180,10 +180,10 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
{
struct tcp_sock *tp = tcp_sk(sk);
- if (unlikely(tp->compressed_ack)) {
+ if (unlikely(tp->compressed_ack > TCP_FASTRETRANS_THRESH)) {
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
- tp->compressed_ack);
- tp->compressed_ack = 0;
+ tp->compressed_ack - TCP_FASTRETRANS_THRESH);
+ tp->compressed_ack = TCP_FASTRETRANS_THRESH;
if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
__sock_put(sk);
}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 676020663ce8..091c53925e4d 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -40,15 +40,17 @@ static u32 tcp_clamp_rto_to_user_timeout(const struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
u32 elapsed, start_ts;
+ s32 remaining;
start_ts = tcp_retransmit_stamp(sk);
if (!icsk->icsk_user_timeout || !start_ts)
return icsk->icsk_rto;
elapsed = tcp_time_stamp(tcp_sk(sk)) - start_ts;
- if (elapsed >= icsk->icsk_user_timeout)
+ remaining = icsk->icsk_user_timeout - elapsed;
+ if (remaining <= 0)
return 1; /* user timeout has passed; fire ASAP */
- else
- return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(icsk->icsk_user_timeout - elapsed));
+
+ return min_t(u32, icsk->icsk_rto, msecs_to_jiffies(remaining));
}
/**
@@ -209,7 +211,7 @@ static bool retransmits_timed_out(struct sock *sk,
(boundary - linear_backoff_thresh) * TCP_RTO_MAX;
timeout = jiffies_to_msecs(timeout);
}
- return (tcp_time_stamp(tcp_sk(sk)) - start_ts) >= timeout;
+ return (s32)(tcp_time_stamp(tcp_sk(sk)) - start_ts - timeout) >= 0;
}
/* A write timeout has occurred. Process the after effects. */
@@ -740,7 +742,7 @@ static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer)
bh_lock_sock(sk);
if (!sock_owned_by_user(sk)) {
- if (tp->compressed_ack)
+ if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
tcp_send_ack(sk);
} else {
if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED,