From 98d1bd802cdbc8f56868fae51edec13e86b59515 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 14 Jul 2015 17:51:06 +0200
Subject: netfilter: xtables: compute exact size needed for jumpstack

The {arp,ip,ip6tables} jump stack is currently sized based
on the number of user chains.

However, its rather unlikely that every user defined chain jumps to the
next, so lets use the existing loop detection logic to also track the
chain depths.

The stacksize is then set to the largest chain depth seen.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv6/netfilter/ip6_tables.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 3c35ced39b42..baf032179918 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -452,11 +452,15 @@ ip6t_do_table(struct sk_buff *skb,
 }
 
 /* Figures out from what hook each rule can be called: returns 0 if
-   there are loops.  Puts hook bitmask in comefrom. */
+ * there are loops.  Puts hook bitmask in comefrom.
+ *
+ * Keeps track of largest call depth seen and stores it in newinfo->stacksize.
+ */
 static int
-mark_source_chains(const struct xt_table_info *newinfo,
+mark_source_chains(struct xt_table_info *newinfo,
 		   unsigned int valid_hooks, void *entry0)
 {
+	unsigned int calldepth, max_calldepth = 0;
 	unsigned int hook;
 
 	/* No recursion; use packet counter to save back ptrs (reset
@@ -470,6 +474,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
 
 		/* Set initial back pointer. */
 		e->counters.pcnt = pos;
+		calldepth = 0;
 
 		for (;;) {
 			const struct xt_standard_target *t
@@ -531,6 +536,8 @@ mark_source_chains(const struct xt_table_info *newinfo,
 					(entry0 + pos + size);
 				e->counters.pcnt = pos;
 				pos += size;
+				if (calldepth > 0)
+					--calldepth;
 			} else {
 				int newpos = t->verdict;
 
@@ -544,6 +551,11 @@ mark_source_chains(const struct xt_table_info *newinfo,
 								newpos);
 						return 0;
 					}
+					if (entry0 + newpos != ip6t_next_entry(e) &&
+					    !(e->ipv6.flags & IP6T_F_GOTO) &&
+					    ++calldepth > max_calldepth)
+						max_calldepth = calldepth;
+
 					/* This a jump; chase it. */
 					duprintf("Jump rule %u -> %u\n",
 						 pos, newpos);
@@ -560,6 +572,7 @@ mark_source_chains(const struct xt_table_info *newinfo,
 		next:
 		duprintf("Finished chain %u\n", hook);
 	}
+	newinfo->stacksize = max_calldepth;
 	return 1;
 }
 
@@ -839,9 +852,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
 		if (ret != 0)
 			return ret;
 		++i;
-		if (strcmp(ip6t_get_target(iter)->u.user.name,
-		    XT_ERROR_TARGET) == 0)
-			++newinfo->stacksize;
 	}
 
 	if (i != repl->num_entries) {
@@ -1754,9 +1764,6 @@ translate_compat_table(struct net *net,
 		if (ret != 0)
 			break;
 		++i;
-		if (strcmp(ip6t_get_target(iter1)->u.user.name,
-		    XT_ERROR_TARGET) == 0)
-			++newinfo->stacksize;
 	}
 	if (ret) {
 		/*
-- 
cgit v1.2.3-73-gaa49b


From 7814b6ec6d0d63444abdb49554166c8cfcbd063e Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 14 Jul 2015 17:51:08 +0200
Subject: netfilter: xtables: don't save/restore jumpstack offset

In most cases there is no reentrancy into ip/ip6tables.

For skbs sent by REJECT or SYNPROXY targets, there is one level
of reentrancy, but its not relevant as those targets issue an absolute
verdict, i.e. the jumpstack can be clobbered since its not used
after the target issues absolute verdict (ACCEPT, DROP, STOLEN, etc).

So the only special case where it is relevant is the TEE target, which
returns XT_CONTINUE.

This patch changes ip(6)_do_table to always use the jump stack starting
from 0.

When we detect we're operating on an skb sent via TEE (percpu
nf_skb_duplicated is 1) we switch to an alternate stack to leave
the original one alone.

Since there is no TEE support for arptables, it doesn't need to
test if tee is active.

The jump stack overflow tests are no longer needed as well --
since ->stacksize is the largest call depth we cannot exceed it.

A much better alternative to the external jumpstack would be to just
declare a jumps[32] stack on the local stack frame, but that would mean
we'd have to reject iptables rulesets that used to work before.

Another alternative would be to start rejecting rulesets with a larger
call depth, e.g. 1000 -- in this case it would be feasible to allocate the
entire stack in the percpu area which would avoid one dereference.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h |  1 -
 net/ipv4/netfilter/arp_tables.c    | 11 +++--------
 net/ipv4/netfilter/ip_tables.c     | 37 ++++++++++++++++++++-----------------
 net/ipv6/netfilter/ip6_tables.c    | 26 ++++++++++++++------------
 net/netfilter/x_tables.c           | 22 +++++++++++-----------
 5 files changed, 48 insertions(+), 49 deletions(-)

(limited to 'net/ipv6')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 286098a5667f..149284557ca7 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -222,7 +222,6 @@ struct xt_table_info {
 	 * @stacksize jumps (number of user chains) can possibly be made.
 	 */
 	unsigned int stacksize;
-	unsigned int __percpu *stackptr;
 	void ***jumpstack;
 
 	unsigned char entries[0] __aligned(8);
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index ae6d0a124213..969fdbe6fbb5 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -280,6 +280,9 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 	table_base = private->entries;
 	jumpstack  = (struct arpt_entry **)private->jumpstack[cpu];
 
+	/* No TEE support for arptables, so no need to switch to alternate
+	 * stack.  All targets that reenter must return absolute verdicts.
+	 */
 	e = get_entry(table_base, private->hook_entry[hook]);
 
 	acpar.in      = state->in;
@@ -325,11 +328,6 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 			}
 			if (table_base + v
 			    != arpt_next_entry(e)) {
-
-				if (stackidx >= private->stacksize) {
-					verdict = NF_DROP;
-					break;
-				}
 				jumpstack[stackidx++] = e;
 			}
 
@@ -337,9 +335,6 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 			continue;
 		}
 
-		/* Targets which reenter must return
-		 * abs. verdicts
-		 */
 		acpar.target   = t->u.kernel.target;
 		acpar.targinfo = t->data;
 		verdict = t->u.kernel.target->target(skb, &acpar);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 5e44b35a8de8..a2e4b018a254 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -296,12 +296,13 @@ ipt_do_table(struct sk_buff *skb,
 	const char *indev, *outdev;
 	const void *table_base;
 	struct ipt_entry *e, **jumpstack;
-	unsigned int *stackptr, origptr, cpu;
+	unsigned int stackidx, cpu;
 	const struct xt_table_info *private;
 	struct xt_action_param acpar;
 	unsigned int addend;
 
 	/* Initialization */
+	stackidx = 0;
 	ip = ip_hdr(skb);
 	indev = state->in ? state->in->name : nulldevname;
 	outdev = state->out ? state->out->name : nulldevname;
@@ -331,13 +332,20 @@ ipt_do_table(struct sk_buff *skb,
 	smp_read_barrier_depends();
 	table_base = private->entries;
 	jumpstack  = (struct ipt_entry **)private->jumpstack[cpu];
-	stackptr   = per_cpu_ptr(private->stackptr, cpu);
-	origptr    = *stackptr;
+
+	/* Switch to alternate jumpstack if we're being invoked via TEE.
+	 * TEE issues XT_CONTINUE verdict on original skb so we must not
+	 * clobber the jumpstack.
+	 *
+	 * For recursion via REJECT or SYNPROXY the stack will be clobbered
+	 * but it is no problem since absolute verdict is issued by these.
+	 */
+	jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated);
 
 	e = get_entry(table_base, private->hook_entry[hook]);
 
-	pr_debug("Entering %s(hook %u); sp at %u (UF %p)\n",
-		 table->name, hook, origptr,
+	pr_debug("Entering %s(hook %u), UF %p\n",
+		 table->name, hook,
 		 get_entry(table_base, private->underflow[hook]));
 
 	do {
@@ -383,28 +391,24 @@ ipt_do_table(struct sk_buff *skb,
 					verdict = (unsigned int)(-v) - 1;
 					break;
 				}
-				if (*stackptr <= origptr) {
+				if (stackidx == 0) {
 					e = get_entry(table_base,
 					    private->underflow[hook]);
 					pr_debug("Underflow (this is normal) "
 						 "to %p\n", e);
 				} else {
-					e = jumpstack[--*stackptr];
+					e = jumpstack[--stackidx];
 					pr_debug("Pulled %p out from pos %u\n",
-						 e, *stackptr);
+						 e, stackidx);
 					e = ipt_next_entry(e);
 				}
 				continue;
 			}
 			if (table_base + v != ipt_next_entry(e) &&
 			    !(e->ip.flags & IPT_F_GOTO)) {
-				if (*stackptr >= private->stacksize) {
-					verdict = NF_DROP;
-					break;
-				}
-				jumpstack[(*stackptr)++] = e;
+				jumpstack[stackidx++] = e;
 				pr_debug("Pushed %p into pos %u\n",
-					 e, *stackptr - 1);
+					 e, stackidx - 1);
 			}
 
 			e = get_entry(table_base, v);
@@ -423,9 +427,8 @@ ipt_do_table(struct sk_buff *skb,
 			/* Verdict */
 			break;
 	} while (!acpar.hotdrop);
-	pr_debug("Exiting %s; resetting sp from %u to %u\n",
-		 __func__, *stackptr, origptr);
-	*stackptr = origptr;
+	pr_debug("Exiting %s; sp at %u\n", __func__, stackidx);
+
  	xt_write_recseq_end(addend);
  	local_bh_enable();
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index baf032179918..531281f0ff86 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -324,12 +324,13 @@ ip6t_do_table(struct sk_buff *skb,
 	const char *indev, *outdev;
 	const void *table_base;
 	struct ip6t_entry *e, **jumpstack;
-	unsigned int *stackptr, origptr, cpu;
+	unsigned int stackidx, cpu;
 	const struct xt_table_info *private;
 	struct xt_action_param acpar;
 	unsigned int addend;
 
 	/* Initialization */
+	stackidx = 0;
 	indev = state->in ? state->in->name : nulldevname;
 	outdev = state->out ? state->out->name : nulldevname;
 	/* We handle fragments by dealing with the first fragment as
@@ -357,8 +358,15 @@ ip6t_do_table(struct sk_buff *skb,
 	cpu        = smp_processor_id();
 	table_base = private->entries;
 	jumpstack  = (struct ip6t_entry **)private->jumpstack[cpu];
-	stackptr   = per_cpu_ptr(private->stackptr, cpu);
-	origptr    = *stackptr;
+
+	/* Switch to alternate jumpstack if we're being invoked via TEE.
+	 * TEE issues XT_CONTINUE verdict on original skb so we must not
+	 * clobber the jumpstack.
+	 *
+	 * For recursion via REJECT or SYNPROXY the stack will be clobbered
+	 * but it is no problem since absolute verdict is issued by these.
+	 */
+	jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated);
 
 	e = get_entry(table_base, private->hook_entry[hook]);
 
@@ -406,20 +414,16 @@ ip6t_do_table(struct sk_buff *skb,
 					verdict = (unsigned int)(-v) - 1;
 					break;
 				}
-				if (*stackptr <= origptr)
+				if (stackidx == 0)
 					e = get_entry(table_base,
 					    private->underflow[hook]);
 				else
-					e = ip6t_next_entry(jumpstack[--*stackptr]);
+					e = ip6t_next_entry(jumpstack[--stackidx]);
 				continue;
 			}
 			if (table_base + v != ip6t_next_entry(e) &&
 			    !(e->ipv6.flags & IP6T_F_GOTO)) {
-				if (*stackptr >= private->stacksize) {
-					verdict = NF_DROP;
-					break;
-				}
-				jumpstack[(*stackptr)++] = e;
+				jumpstack[stackidx++] = e;
 			}
 
 			e = get_entry(table_base, v);
@@ -437,8 +441,6 @@ ip6t_do_table(struct sk_buff *skb,
 			break;
 	} while (!acpar.hotdrop);
 
-	*stackptr = origptr;
-
  	xt_write_recseq_end(addend);
  	local_bh_enable();
 
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 4db7d60d42fa..154447e519ab 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -67,9 +67,6 @@ static const char *const xt_prefix[NFPROTO_NUMPROTO] = {
 	[NFPROTO_IPV6]   = "ip6",
 };
 
-/* Allow this many total (re)entries. */
-static const unsigned int xt_jumpstack_multiplier = 2;
-
 /* Registration hooks for targets. */
 int xt_register_target(struct xt_target *target)
 {
@@ -688,8 +685,6 @@ void xt_free_table_info(struct xt_table_info *info)
 		kvfree(info->jumpstack);
 	}
 
-	free_percpu(info->stackptr);
-
 	kvfree(info);
 }
 EXPORT_SYMBOL(xt_free_table_info);
@@ -737,10 +732,6 @@ static int xt_jumpstack_alloc(struct xt_table_info *i)
 	unsigned int size;
 	int cpu;
 
-	i->stackptr = alloc_percpu(unsigned int);
-	if (i->stackptr == NULL)
-		return -ENOMEM;
-
 	size = sizeof(void **) * nr_cpu_ids;
 	if (size > PAGE_SIZE)
 		i->jumpstack = vzalloc(size);
@@ -753,8 +744,17 @@ static int xt_jumpstack_alloc(struct xt_table_info *i)
 	if (i->stacksize == 0)
 		return 0;
 
-	i->stacksize *= xt_jumpstack_multiplier;
-	size = sizeof(void *) * i->stacksize;
+	/* Jumpstack needs to be able to record two full callchains, one
+	 * from the first rule set traversal, plus one table reentrancy
+	 * via -j TEE without clobbering the callchain that brought us to
+	 * TEE target.
+	 *
+	 * This is done by allocating two jumpstacks per cpu, on reentry
+	 * the upper half of the stack is used.
+	 *
+	 * see the jumpstack setup in ipt_do_table() for more details.
+	 */
+	size = sizeof(void *) * i->stacksize * 2u;
 	for_each_possible_cpu(cpu) {
 		if (size > PAGE_SIZE)
 			i->jumpstack[cpu] = vmalloc_node(size,
-- 
cgit v1.2.3-73-gaa49b


From dcebd3153e0a7749bb054ab73fa4e1ca33e9d3f9 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 14 Jul 2015 17:51:09 +0200
Subject: netfilter: add and use jump label for xt_tee

Don't bother testing if we need to switch to alternate stack
unless TEE target is used.

Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/x_tables.h | 7 +++++++
 net/ipv4/netfilter/ip_tables.c     | 3 ++-
 net/ipv6/netfilter/ip6_tables.c    | 3 ++-
 net/netfilter/x_tables.c           | 3 +++
 net/netfilter/xt_TEE.c             | 2 ++
 5 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'net/ipv6')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 149284557ca7..b006b719183f 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -3,6 +3,7 @@
 
 
 #include <linux/netdevice.h>
+#include <linux/static_key.h>
 #include <uapi/linux/netfilter/x_tables.h>
 
 /**
@@ -280,6 +281,12 @@ void xt_free_table_info(struct xt_table_info *info);
  */
 DECLARE_PER_CPU(seqcount_t, xt_recseq);
 
+/* xt_tee_enabled - true if x_tables needs to handle reentrancy
+ *
+ * Enabled if current ip(6)tables ruleset has at least one -j TEE rule.
+ */
+extern struct static_key xt_tee_enabled;
+
 /**
  * xt_write_recseq_begin - start of a write section
  *
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index a2e4b018a254..ff585bdbf850 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -340,7 +340,8 @@ ipt_do_table(struct sk_buff *skb,
 	 * For recursion via REJECT or SYNPROXY the stack will be clobbered
 	 * but it is no problem since absolute verdict is issued by these.
 	 */
-	jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated);
+	if (static_key_false(&xt_tee_enabled))
+		jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated);
 
 	e = get_entry(table_base, private->hook_entry[hook]);
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 531281f0ff86..ea6d105063c2 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -366,7 +366,8 @@ ip6t_do_table(struct sk_buff *skb,
 	 * For recursion via REJECT or SYNPROXY the stack will be clobbered
 	 * but it is no problem since absolute verdict is issued by these.
 	 */
-	jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated);
+	if (static_key_false(&xt_tee_enabled))
+		jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated);
 
 	e = get_entry(table_base, private->hook_entry[hook]);
 
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 154447e519ab..9b42b5ea6dcd 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -727,6 +727,9 @@ EXPORT_SYMBOL_GPL(xt_compat_unlock);
 DEFINE_PER_CPU(seqcount_t, xt_recseq);
 EXPORT_PER_CPU_SYMBOL_GPL(xt_recseq);
 
+struct static_key xt_tee_enabled __read_mostly;
+EXPORT_SYMBOL_GPL(xt_tee_enabled);
+
 static int xt_jumpstack_alloc(struct xt_table_info *i)
 {
 	unsigned int size;
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 8950e79c4dc9..c5d6556dbc5e 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -251,6 +251,7 @@ static int tee_tg_check(const struct xt_tgchk_param *par)
 	} else
 		info->priv = NULL;
 
+	static_key_slow_inc(&xt_tee_enabled);
 	return 0;
 }
 
@@ -262,6 +263,7 @@ static void tee_tg_destroy(const struct xt_tgdtor_param *par)
 		unregister_netdevice_notifier(&info->priv->notifier);
 		kfree(info->priv);
 	}
+	static_key_slow_dec(&xt_tee_enabled);
 }
 
 static struct xt_target tee_tg_reg[] __read_mostly = {
-- 
cgit v1.2.3-73-gaa49b


From 6c7941dee9c41d6ab5a8be06ec44aa579a6123e1 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Tue, 14 Jul 2015 17:51:10 +0200
Subject: netfilter: xtables: remove __pure annotation

sparse complains:
ip_tables.c:361:27: warning: incorrect type in assignment (different modifiers)
ip_tables.c:361:27:    expected struct ipt_entry *[assigned] e
ip_tables.c:361:27:    got struct ipt_entry [pure] *

doesn't change generated code.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv4/netfilter/arp_tables.c | 2 +-
 net/ipv4/netfilter/ip_tables.c  | 2 +-
 net/ipv6/netfilter/ip6_tables.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 969fdbe6fbb5..c416cb355cb0 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -240,7 +240,7 @@ get_entry(const void *base, unsigned int offset)
 	return (struct arpt_entry *)(base + offset);
 }
 
-static inline __pure
+static inline
 struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry)
 {
 	return (void *)entry + entry->next_offset;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index ff585bdbf850..787f99ed55e2 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -276,7 +276,7 @@ static void trace_packet(const struct sk_buff *skb,
 }
 #endif
 
-static inline __pure
+static inline
 struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry)
 {
 	return (void *)entry + entry->next_offset;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index ea6d105063c2..4e21f80228be 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -305,7 +305,7 @@ static void trace_packet(const struct sk_buff *skb,
 }
 #endif
 
-static inline __pure struct ip6t_entry *
+static inline struct ip6t_entry *
 ip6t_next_entry(const struct ip6t_entry *entry)
 {
 	return (void *)entry + entry->next_offset;
-- 
cgit v1.2.3-73-gaa49b


From 72b1e5e4cac72efa6b739b47e41f53e4520b4194 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 23 Jul 2015 16:21:30 +0200
Subject: netfilter: bridge: reduce nf_bridge_info to 32 bytes again

We can use union for most of the temporary cruft (original ipv4/ipv6
address, source mac, physoutdev) since they're used during different
stages of br netfilter traversal.

Also get rid of the last two ->mask users.

Shrinks struct from 48 to 32 on 64bit arch.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_bridge.h          | 12 +++++++++---
 include/linux/skbuff.h                    | 19 +++++++++++++------
 net/bridge/br_netfilter_hooks.c           | 14 ++++++--------
 net/bridge/br_netfilter_ipv6.c            |  2 +-
 net/ipv4/netfilter/nf_defrag_ipv4.c       |  7 ++-----
 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c |  7 ++-----
 6 files changed, 33 insertions(+), 28 deletions(-)

(limited to 'net/ipv6')

diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index 6d80fc686323..2437b8a5d7a9 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -17,9 +17,6 @@ enum nf_br_hook_priorities {
 
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 
-#define BRNF_BRIDGED_DNAT		0x02
-#define BRNF_NF_BRIDGE_PREROUTING	0x08
-
 int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb);
 
 static inline void br_drop_fake_rtable(struct sk_buff *skb)
@@ -63,8 +60,17 @@ nf_bridge_get_physoutdev(const struct sk_buff *skb)
 {
 	return skb->nf_bridge ? skb->nf_bridge->physoutdev : NULL;
 }
+
+static inline bool nf_bridge_in_prerouting(const struct sk_buff *skb)
+{
+	return skb->nf_bridge && skb->nf_bridge->in_prerouting;
+}
 #else
 #define br_drop_fake_rtable(skb)	        do { } while (0)
+static inline bool nf_bridge_in_prerouting(const struct sk_buff *skb)
+{
+	return false;
+}
 #endif /* CONFIG_BRIDGE_NETFILTER */
 
 #endif
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d6cdd6e87d53..ac732e67a6c8 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -173,17 +173,24 @@ struct nf_bridge_info {
 		BRNF_PROTO_8021Q,
 		BRNF_PROTO_PPPOE
 	} orig_proto:8;
-	bool			pkt_otherhost;
+	u8			pkt_otherhost:1;
+	u8			in_prerouting:1;
+	u8			bridged_dnat:1;
 	__u16			frag_max_size;
-	unsigned int		mask;
 	struct net_device	*physindev;
 	union {
-		struct net_device *physoutdev;
-		char neigh_header[8];
-	};
-	union {
+		/* prerouting: detect dnat in orig/reply direction */
 		__be32          ipv4_daddr;
 		struct in6_addr ipv6_daddr;
+
+		/* after prerouting + nat detected: store original source
+		 * mac since neigh resolution overwrites it, only used while
+		 * skb is out in neigh layer.
+		 */
+		char neigh_header[8];
+
+		/* always valid & non-NULL from FORWARD on, for physdev match */
+		struct net_device *physoutdev;
 	};
 };
 #endif
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index c8b9bcfe997e..ec51c2ba30e9 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -284,7 +284,7 @@ int br_nf_pre_routing_finish_bridge(struct sock *sk, struct sk_buff *skb)
 							 nf_bridge->neigh_header,
 							 ETH_HLEN-ETH_ALEN);
 			/* tell br_dev_xmit to continue with forwarding */
-			nf_bridge->mask |= BRNF_BRIDGED_DNAT;
+			nf_bridge->bridged_dnat = 1;
 			/* FIXME Need to refragment */
 			ret = neigh->output(neigh, skb);
 		}
@@ -356,7 +356,7 @@ static int br_nf_pre_routing_finish(struct sock *sk, struct sk_buff *skb)
 		skb->pkt_type = PACKET_OTHERHOST;
 		nf_bridge->pkt_otherhost = false;
 	}
-	nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING;
+	nf_bridge->in_prerouting = 0;
 	if (br_nf_ipv4_daddr_was_changed(skb, nf_bridge)) {
 		if ((err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev))) {
 			struct in_device *in_dev = __in_dev_get_rcu(dev);
@@ -444,7 +444,7 @@ struct net_device *setup_pre_routing(struct sk_buff *skb)
 		nf_bridge->pkt_otherhost = true;
 	}
 
-	nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING;
+	nf_bridge->in_prerouting = 1;
 	nf_bridge->physindev = skb->dev;
 	skb->dev = brnf_get_logical_dev(skb, skb->dev);
 
@@ -850,10 +850,8 @@ static unsigned int ip_sabotage_in(const struct nf_hook_ops *ops,
 				   struct sk_buff *skb,
 				   const struct nf_hook_state *state)
 {
-	if (skb->nf_bridge &&
-	    !(skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)) {
+	if (skb->nf_bridge && !skb->nf_bridge->in_prerouting)
 		return NF_STOP;
-	}
 
 	return NF_ACCEPT;
 }
@@ -872,7 +870,7 @@ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
 	struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb);
 
 	skb_pull(skb, ETH_HLEN);
-	nf_bridge->mask &= ~BRNF_BRIDGED_DNAT;
+	nf_bridge->bridged_dnat = 0;
 
 	BUILD_BUG_ON(sizeof(nf_bridge->neigh_header) != (ETH_HLEN - ETH_ALEN));
 
@@ -887,7 +885,7 @@ static void br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
 
 static int br_nf_dev_xmit(struct sk_buff *skb)
 {
-	if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) {
+	if (skb->nf_bridge && skb->nf_bridge->bridged_dnat) {
 		br_nf_pre_routing_finish_bridge_slow(skb);
 		return 1;
 	}
diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c
index 13b7d1e3d185..77383bfe7ea3 100644
--- a/net/bridge/br_netfilter_ipv6.c
+++ b/net/bridge/br_netfilter_ipv6.c
@@ -174,7 +174,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sock *sk, struct sk_buff *skb)
 		skb->pkt_type = PACKET_OTHERHOST;
 		nf_bridge->pkt_otherhost = false;
 	}
-	nf_bridge->mask &= ~BRNF_NF_BRIDGE_PREROUTING;
+	nf_bridge->in_prerouting = 0;
 	if (br_nf_ipv6_daddr_was_changed(skb, nf_bridge)) {
 		skb_dst_drop(skb);
 		v6ops->route_input(skb);
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index c88b7d434718..b69e82bda215 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -49,12 +49,9 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum,
 	if (skb->nfct)
 		zone = nf_ct_zone((struct nf_conn *)skb->nfct);
 #endif
-
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
-	if (skb->nf_bridge &&
-	    skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
+	if (nf_bridge_in_prerouting(skb))
 		return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
-#endif
+
 	if (hooknum == NF_INET_PRE_ROUTING)
 		return IP_DEFRAG_CONNTRACK_IN + zone;
 	else
diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
index a45db0b4785c..267fb8d5876e 100644
--- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
+++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c
@@ -39,12 +39,9 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,
 	if (skb->nfct)
 		zone = nf_ct_zone((struct nf_conn *)skb->nfct);
 #endif
-
-#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
-	if (skb->nf_bridge &&
-	    skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
+	if (nf_bridge_in_prerouting(skb))
 		return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
-#endif
+
 	if (hooknum == NF_INET_PRE_ROUTING)
 		return IP6_DEFRAG_CONNTRACK_IN + zone;
 	else
-- 
cgit v1.2.3-73-gaa49b


From a6cd379b4d68867295ea35a719008e86d7a2ee9f Mon Sep 17 00:00:00 2001
From: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Date: Thu, 30 Jul 2015 16:53:45 +0000
Subject: netfilter: ip6t_REJECT: Remove debug messages from reject_tg6()

Make it similar to reject_tg() in ipt_REJECT.

Suggested-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/ipv6/netfilter/ip6t_REJECT.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 12331efd49cf..567367a75172 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -35,14 +35,12 @@ MODULE_AUTHOR("Yasuyuki KOZAKAI <yasuyuki.kozakai@toshiba.co.jp>");
 MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv6");
 MODULE_LICENSE("GPL");
 
-
 static unsigned int
 reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ip6t_reject_info *reject = par->targinfo;
 	struct net *net = dev_net((par->in != NULL) ? par->in : par->out);
 
-	pr_debug("%s: medium point\n", __func__);
 	switch (reject->with) {
 	case IP6T_ICMP6_NO_ROUTE:
 		nf_send_unreach6(net, skb, ICMPV6_NOROUTE, par->hooknum);
@@ -65,9 +63,6 @@ reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 	case IP6T_TCP_RESET:
 		nf_send_reset6(net, skb, par->hooknum);
 		break;
-	default:
-		net_info_ratelimited("case %u not handled yet\n", reject->with);
-		break;
 	}
 
 	return NF_DROP;
-- 
cgit v1.2.3-73-gaa49b