aboutsummaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/fib_trie.c13
-rw-r--r--net/ipv4/inet_connection_sock.c47
-rw-r--r--net/ipv4/inet_diag.c4
-rw-r--r--net/ipv4/ip_output.c5
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c4
-rw-r--r--net/ipv4/tcp_ipv4.c81
-rw-r--r--net/ipv4/tcp_timer.c8
7 files changed, 85 insertions, 77 deletions
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index e3b4aee4244e..2c7c299ee2b9 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -830,7 +830,7 @@ static struct key_vector *resize(struct trie *t, struct key_vector *tn)
/* Double as long as the resulting node has a number of
* nonempty nodes that are above the threshold.
*/
- while (should_inflate(tp, tn) && max_work--) {
+ while (should_inflate(tp, tn) && max_work) {
tp = inflate(t, tn);
if (!tp) {
#ifdef CONFIG_IP_FIB_TRIE_STATS
@@ -839,17 +839,21 @@ static struct key_vector *resize(struct trie *t, struct key_vector *tn)
break;
}
+ max_work--;
tn = get_child(tp, cindex);
}
+ /* update parent in case inflate failed */
+ tp = node_parent(tn);
+
/* Return if at least one inflate is run */
if (max_work != MAX_WORK)
- return node_parent(tn);
+ return tp;
/* Halve as long as the number of empty children in this
* node is above threshold.
*/
- while (should_halve(tp, tn) && max_work--) {
+ while (should_halve(tp, tn) && max_work) {
tp = halve(t, tn);
if (!tp) {
#ifdef CONFIG_IP_FIB_TRIE_STATS
@@ -858,6 +862,7 @@ static struct key_vector *resize(struct trie *t, struct key_vector *tn)
break;
}
+ max_work--;
tn = get_child(tp, cindex);
}
@@ -865,7 +870,7 @@ static struct key_vector *resize(struct trie *t, struct key_vector *tn)
if (should_collapse(tn))
return collapse(t, tn);
- /* update parent in case inflate or halve failed */
+ /* update parent in case halve failed */
tp = node_parent(tn);
/* Return if at least one deflate was run */
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 844808d9337b..79c0c9439fdc 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -403,18 +403,17 @@ struct dst_entry *inet_csk_route_req(struct sock *sk,
struct flowi4 *fl4,
const struct request_sock *req)
{
- struct rtable *rt;
const struct inet_request_sock *ireq = inet_rsk(req);
- struct ip_options_rcu *opt = inet_rsk(req)->opt;
- struct net *net = sock_net(sk);
- int flags = inet_sk_flowi_flags(sk);
+ struct net *net = read_pnet(&ireq->ireq_net);
+ struct ip_options_rcu *opt = ireq->opt;
+ struct rtable *rt;
- flowi4_init_output(fl4, sk->sk_bound_dev_if, ireq->ir_mark,
+ flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
- sk->sk_protocol,
- flags,
+ sk->sk_protocol, inet_sk_flowi_flags(sk),
(opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
- ireq->ir_loc_addr, ireq->ir_rmt_port, inet_sk(sk)->inet_sport);
+ ireq->ir_loc_addr, ireq->ir_rmt_port,
+ htons(ireq->ir_num));
security_req_classify_flow(req, flowi4_to_flowi(fl4));
rt = ip_route_output_flow(net, fl4, sk);
if (IS_ERR(rt))
@@ -436,9 +435,9 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
const struct request_sock *req)
{
const struct inet_request_sock *ireq = inet_rsk(req);
+ struct net *net = read_pnet(&ireq->ireq_net);
struct inet_sock *newinet = inet_sk(newsk);
struct ip_options_rcu *opt;
- struct net *net = sock_net(sk);
struct flowi4 *fl4;
struct rtable *rt;
@@ -446,11 +445,12 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk,
rcu_read_lock();
opt = rcu_dereference(newinet->inet_opt);
- flowi4_init_output(fl4, sk->sk_bound_dev_if, inet_rsk(req)->ir_mark,
+ flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark,
RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE,
sk->sk_protocol, inet_sk_flowi_flags(sk),
(opt && opt->opt.srr) ? opt->opt.faddr : ireq->ir_rmt_addr,
- ireq->ir_loc_addr, ireq->ir_rmt_port, inet_sk(sk)->inet_sport);
+ ireq->ir_loc_addr, ireq->ir_rmt_port,
+ htons(ireq->ir_num));
security_req_classify_flow(req, flowi4_to_flowi(fl4));
rt = ip_route_output_flow(net, fl4, sk);
if (IS_ERR(rt))
@@ -495,7 +495,7 @@ struct request_sock *inet_csk_search_req(struct sock *sk,
u32 hash = inet_synq_hash(raddr, rport, lopt->hash_rnd,
lopt->nr_table_entries);
- write_lock(&icsk->icsk_accept_queue.syn_wait_lock);
+ spin_lock(&icsk->icsk_accept_queue.syn_wait_lock);
for (req = lopt->syn_table[hash]; req != NULL; req = req->dl_next) {
const struct inet_request_sock *ireq = inet_rsk(req);
@@ -508,7 +508,7 @@ struct request_sock *inet_csk_search_req(struct sock *sk,
break;
}
}
- write_unlock(&icsk->icsk_accept_queue.syn_wait_lock);
+ spin_unlock(&icsk->icsk_accept_queue.syn_wait_lock);
return req;
}
@@ -571,8 +571,9 @@ static void reqsk_timer_handler(unsigned long data)
struct inet_connection_sock *icsk = inet_csk(sk_listener);
struct request_sock_queue *queue = &icsk->icsk_accept_queue;
struct listen_sock *lopt = queue->listen_opt;
- int expire = 0, resend = 0;
+ int qlen, expire = 0, resend = 0;
int max_retries, thresh;
+ u8 defer_accept;
if (sk_listener->sk_state != TCP_LISTEN || !lopt) {
reqsk_put(req);
@@ -598,21 +599,23 @@ static void reqsk_timer_handler(unsigned long data)
* embrions; and abort old ones without pity, if old
* ones are about to clog our table.
*/
- if (listen_sock_qlen(lopt) >> (lopt->max_qlen_log - 1)) {
+ qlen = listen_sock_qlen(lopt);
+ if (qlen >> (lopt->max_qlen_log - 1)) {
int young = listen_sock_young(lopt) << 1;
while (thresh > 2) {
- if (listen_sock_qlen(lopt) < young)
+ if (qlen < young)
break;
thresh--;
young <<= 1;
}
}
- if (queue->rskq_defer_accept)
- max_retries = queue->rskq_defer_accept;
- syn_ack_recalc(req, thresh, max_retries, queue->rskq_defer_accept,
+ defer_accept = READ_ONCE(queue->rskq_defer_accept);
+ if (defer_accept)
+ max_retries = defer_accept;
+ syn_ack_recalc(req, thresh, max_retries, defer_accept,
&expire, &resend);
- req->rsk_ops->syn_ack_timeout(sk_listener, req);
+ req->rsk_ops->syn_ack_timeout(req);
if (!expire &&
(!resend ||
!inet_rtx_syn_ack(sk_listener, req) ||
@@ -647,10 +650,10 @@ void reqsk_queue_hash_req(struct request_sock_queue *queue,
setup_timer(&req->rsk_timer, reqsk_timer_handler, (unsigned long)req);
req->rsk_hash = hash;
- write_lock(&queue->syn_wait_lock);
+ spin_lock(&queue->syn_wait_lock);
req->dl_next = lopt->syn_table[hash];
lopt->syn_table[hash] = req;
- write_unlock(&queue->syn_wait_lock);
+ spin_unlock(&queue->syn_wait_lock);
mod_timer_pinned(&req->rsk_timer, jiffies + timeout);
}
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index f984b2001d0a..76322c9867d5 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -728,7 +728,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
entry.family = sk->sk_family;
- read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+ spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
lopt = icsk->icsk_accept_queue.listen_opt;
if (!lopt || !listen_sock_qlen(lopt))
@@ -776,7 +776,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
}
out:
- read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+ spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
return err;
}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index a7aea2048a0d..90b49e88e84a 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -636,10 +636,7 @@ slow_path:
left = skb->len - hlen; /* Space per frame */
ptr = hlen; /* Where to start from */
- /* for bridged IP traffic encapsulated inside f.e. a vlan header,
- * we need to make room for the encapsulating header
- */
- ll_rs = LL_RESERVED_SPACE_EXTRA(rt->dst.dev, nf_bridge_pad(skb));
+ ll_rs = LL_RESERVED_SPACE(rt->dst.dev);
/*
* Fragment the datagram.
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index a460a87e14f8..f0dfe92a00d6 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -300,7 +300,9 @@ static int exp_seq_show(struct seq_file *s, void *v)
__nf_ct_l3proto_find(exp->tuple.src.l3num),
__nf_ct_l4proto_find(exp->tuple.src.l3num,
exp->tuple.dst.protonum));
- return seq_putc(s, '\n');
+ seq_putc(s, '\n');
+
+ return 0;
}
static const struct seq_operations exp_seq_ops = {
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5554b8f33d41..4e90217003e8 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -310,6 +310,34 @@ static void do_redirect(struct sk_buff *skb, struct sock *sk)
dst->ops->redirect(dst, sk, skb);
}
+
+/* handle ICMP messages on TCP_NEW_SYN_RECV request sockets */
+void tcp_req_err(struct sock *sk, u32 seq)
+{
+ struct request_sock *req = inet_reqsk(sk);
+ struct net *net = sock_net(sk);
+
+ /* ICMPs are not backlogged, hence we cannot get
+ * an established socket here.
+ */
+ WARN_ON(req->sk);
+
+ if (seq != tcp_rsk(req)->snt_isn) {
+ NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
+ reqsk_put(req);
+ } else {
+ /*
+ * Still in SYN_RECV, just remove it silently.
+ * There is no good way to pass the error to the newly
+ * created socket, and POSIX does not want network
+ * errors returned from accept().
+ */
+ NET_INC_STATS_BH(net, LINUX_MIB_LISTENDROPS);
+ inet_csk_reqsk_queue_drop(req->rsk_listener, req);
+ }
+}
+EXPORT_SYMBOL(tcp_req_err);
+
/*
* This routine is called by the ICMP module when it gets some
* sort of error condition. If err < 0 then the socket should
@@ -343,8 +371,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
int err;
struct net *net = dev_net(icmp_skb->dev);
- sk = inet_lookup(net, &tcp_hashinfo, iph->daddr, th->dest,
- iph->saddr, th->source, inet_iif(icmp_skb));
+ sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
+ th->dest, iph->saddr, ntohs(th->source),
+ inet_iif(icmp_skb));
if (!sk) {
ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS);
return;
@@ -353,6 +382,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
inet_twsk_put(inet_twsk(sk));
return;
}
+ seq = ntohl(th->seq);
+ if (sk->sk_state == TCP_NEW_SYN_RECV)
+ return tcp_req_err(sk, seq);
bh_lock_sock(sk);
/* If too many ICMPs get dropped on busy
@@ -374,7 +406,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
icsk = inet_csk(sk);
tp = tcp_sk(sk);
- seq = ntohl(th->seq);
/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
fastopen = tp->fastopen_rsk;
snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
@@ -458,38 +489,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
}
switch (sk->sk_state) {
- struct request_sock *req;
- case TCP_LISTEN:
- if (sock_owned_by_user(sk))
- goto out;
-
- req = inet_csk_search_req(sk, th->dest,
- iph->daddr, iph->saddr);
- if (!req)
- goto out;
-
- /* ICMPs are not backlogged, hence we cannot get
- an established socket here.
- */
- WARN_ON(req->sk);
-
- if (seq != tcp_rsk(req)->snt_isn) {
- NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
- reqsk_put(req);
- goto out;
- }
-
- /*
- * Still in SYN_RECV, just remove it silently.
- * There is no good way to pass the error to the newly
- * created socket, and POSIX does not want network
- * errors returned from accept().
- */
- inet_csk_reqsk_queue_drop(sk, req);
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
- reqsk_put(req);
- goto out;
-
case TCP_SYN_SENT:
case TCP_SYN_RECV:
/* Only in fast or simultaneous open. If a fast open socket is
@@ -1909,13 +1908,13 @@ get_req:
}
sk = sk_nulls_next(st->syn_wait_sk);
st->state = TCP_SEQ_STATE_LISTENING;
- read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+ spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
} else {
icsk = inet_csk(sk);
- read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+ spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
if (reqsk_queue_len(&icsk->icsk_accept_queue))
goto start_req;
- read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+ spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
sk = sk_nulls_next(sk);
}
get_sk:
@@ -1927,7 +1926,7 @@ get_sk:
goto out;
}
icsk = inet_csk(sk);
- read_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+ spin_lock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
if (reqsk_queue_len(&icsk->icsk_accept_queue)) {
start_req:
st->uid = sock_i_uid(sk);
@@ -1936,7 +1935,7 @@ start_req:
st->sbucket = 0;
goto get_req;
}
- read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+ spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
}
spin_unlock_bh(&ilb->lock);
st->offset = 0;
@@ -2155,7 +2154,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
case TCP_SEQ_STATE_OPENREQ:
if (v) {
struct inet_connection_sock *icsk = inet_csk(st->syn_wait_sk);
- read_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
+ spin_unlock_bh(&icsk->icsk_accept_queue.syn_wait_lock);
}
case TCP_SEQ_STATE_LISTENING:
if (v != SEQ_START_TOKEN)
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 3daa6b5d766d..2568fd282873 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -327,7 +327,7 @@ static void tcp_fastopen_synack_timer(struct sock *sk)
struct request_sock *req;
req = tcp_sk(sk)->fastopen_rsk;
- req->rsk_ops->syn_ack_timeout(sk, req);
+ req->rsk_ops->syn_ack_timeout(req);
if (req->num_timeout >= max_retries) {
tcp_write_err(sk);
@@ -539,9 +539,11 @@ static void tcp_write_timer(unsigned long data)
sock_put(sk);
}
-void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req)
+void tcp_syn_ack_timeout(const struct request_sock *req)
{
- NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEOUTS);
+ struct net *net = read_pnet(&inet_rsk(req)->ireq_net);
+
+ NET_INC_STATS_BH(net, LINUX_MIB_TCPTIMEOUTS);
}
EXPORT_SYMBOL(tcp_syn_ack_timeout);