From 1549210fcc17e9ae20c09ac8cd4c48a8dfd431bd Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 5 Jun 2012 09:16:47 -0400
Subject: NFSv4: Fix an Oops in the open recovery code

The open recovery code does not need to request a new value for the
mdsthreshold, and so does not allocate a struct nfs4_threshold.
The problem is that encode_getfattr_open() will still request an
mdsthreshold, and so we end up Oopsing in decode_attr_mdsthreshold.

This patch fixes encode_getfattr_open so that it doesn't request an
mdsthreshold when the caller isn't asking for one. It also fixes
decode_attr_mdsthreshold so that it errors if the server returns
an mdsthreshold that we didn't ask for (instead of Oopsing).

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Andy Adamson <andros@netapp.com>
---
 include/linux/nfs_xdr.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index d1a7bf51c326..7519baef025b 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -348,6 +348,7 @@ struct nfs_openargs {
 	const struct qstr *	name;
 	const struct nfs_server *server;	 /* Needed for ID mapping */
 	const u32 *		bitmask;
+	const u32 *		open_bitmap;
 	__u32			claim;
 	struct nfs4_sequence_args	seq_args;
 };
-- 
cgit 


From 9bce008bae8b57bc7b007bcc2071d1247a527120 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 5 Jun 2012 18:32:03 -0400
Subject: NFS: Fix a commit bug

The new commit code fails to copy the verifier into the wb_verf field
of _all_ the nfs_page structures; it only copies it into the first entry.
The consequence is that most requests end up failing to match in
nfs_commit_release.

Fix is to copy the verifier into the req->wb_verf field in
nfs_write_completion.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/direct.c         | 4 ++--
 fs/nfs/write.c          | 7 ++++---
 include/linux/nfs_xdr.h | 2 ++
 3 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 23d170bc44f4..b5385a7efd56 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -710,12 +710,12 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
 			if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
 				bit = NFS_IOHDR_NEED_RESCHED;
 			else if (dreq->flags == 0) {
-				memcpy(&dreq->verf, &req->wb_verf,
+				memcpy(&dreq->verf, hdr->verf,
 				       sizeof(dreq->verf));
 				bit = NFS_IOHDR_NEED_COMMIT;
 				dreq->flags = NFS_ODIRECT_DO_COMMIT;
 			} else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
-				if (memcmp(&dreq->verf, &req->wb_verf, sizeof(dreq->verf))) {
+				if (memcmp(&dreq->verf, hdr->verf, sizeof(dreq->verf))) {
 					dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
 					bit = NFS_IOHDR_NEED_RESCHED;
 				} else
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e6fe3d69d14c..4d6861c0dc14 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -80,6 +80,7 @@ struct nfs_write_header *nfs_writehdr_alloc(void)
 		INIT_LIST_HEAD(&hdr->rpc_list);
 		spin_lock_init(&hdr->lock);
 		atomic_set(&hdr->refcnt, 0);
+		hdr->verf = &p->verf;
 	}
 	return p;
 }
@@ -619,6 +620,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
 			goto next;
 		}
 		if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
+			memcpy(&req->wb_verf, hdr->verf, sizeof(req->wb_verf));
 			nfs_mark_request_commit(req, hdr->lseg, &cinfo);
 			goto next;
 		}
@@ -1255,15 +1257,14 @@ static void nfs_writeback_release_common(void *calldata)
 	struct nfs_write_data	*data = calldata;
 	struct nfs_pgio_header *hdr = data->header;
 	int status = data->task.tk_status;
-	struct nfs_page *req = hdr->req;
 
 	if ((status >= 0) && nfs_write_need_commit(data)) {
 		spin_lock(&hdr->lock);
 		if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))
 			; /* Do nothing */
 		else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
-			memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
-		else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf)))
+			memcpy(hdr->verf, &data->verf, sizeof(*hdr->verf));
+		else if (memcmp(hdr->verf, &data->verf, sizeof(*hdr->verf)))
 			set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
 		spin_unlock(&hdr->lock);
 	}
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 7519baef025b..8aadd90b808a 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1237,6 +1237,7 @@ struct nfs_pgio_header {
 	struct list_head	rpc_list;
 	atomic_t		refcnt;
 	struct nfs_page		*req;
+	struct nfs_writeverf	*verf;
 	struct pnfs_layout_segment *lseg;
 	loff_t			io_start;
 	const struct rpc_call_ops *mds_ops;
@@ -1274,6 +1275,7 @@ struct nfs_write_data {
 struct nfs_write_header {
 	struct nfs_pgio_header	header;
 	struct nfs_write_data	rpc_data;
+	struct nfs_writeverf	verf;
 };
 
 struct nfs_mds_commit_info {
-- 
cgit 


From aa9b16306e3243229580ff889cc59fd66bf77973 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Thu, 10 May 2012 16:41:44 -0700
Subject: rcu: Precompute RCU_FAST_NO_HZ timer offsets

When a CPU is entering dyntick-idle mode, tick_nohz_stop_sched_tick()
calls rcu_needs_cpu() see if RCU needs that CPU, and, if not, computes the
next wakeup time based on the timer wheels.  Only later, when actually
entering the idle loop, rcu_prepare_for_idle() will be invoked.  In some
cases, rcu_prepare_for_idle() will post timers to wake the CPU back up.
But all for naught: The next wakeup time for the CPU has already been
computed, and posting a timer afterwards does not force that wakeup
time to be recomputed.  This means that rcu_prepare_for_idle()'s have
no effect.

This is not a problem on a busy system because something else will wake
up the CPU soon enough.  However, on lightly loaded systems, the CPU
might stay asleep for a considerable length of time.  If that CPU has
a callback that the rest of the system is waiting on, the system might
run very slowly or (in theory) even hang.

This commit avoids this problem by having rcu_needs_cpu() give
tick_nohz_stop_sched_tick() an estimate of when RCU will need the CPU
to wake back up, which tick_nohz_stop_sched_tick() takes into account
when programming the CPU's wakeup time.  An alternative approach is
for rcu_prepare_for_idle() to use hrtimers instead of normal timers,
but timers are much more efficient than are hrtimers for frequently
and repeatedly posting and cancelling a given timer, which is exactly
what RCU_FAST_NO_HZ does.

Reported-by: Pascal Chapperon <pascal.chapperon@wanadoo.fr>
Reported-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Tested-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Tested-by: Pascal Chapperon <pascal.chapperon@wanadoo.fr>
---
 include/linux/rcutiny.h  |  6 +++--
 include/linux/rcutree.h  |  2 +-
 kernel/rcutree_plugin.h  | 66 +++++++++++++++++++++++++++++++-----------------
 kernel/time/tick-sched.c |  7 ++++-
 4 files changed, 54 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index adb5e5a38cae..854dc4c5c271 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -87,8 +87,9 @@ static inline void kfree_call_rcu(struct rcu_head *head,
 
 #ifdef CONFIG_TINY_RCU
 
-static inline int rcu_needs_cpu(int cpu)
+static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
 {
+	*delta_jiffies = ULONG_MAX;
 	return 0;
 }
 
@@ -96,8 +97,9 @@ static inline int rcu_needs_cpu(int cpu)
 
 int rcu_preempt_needs_cpu(void);
 
-static inline int rcu_needs_cpu(int cpu)
+static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
 {
+	*delta_jiffies = ULONG_MAX;
 	return rcu_preempt_needs_cpu();
 }
 
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 3c6083cde4fc..952b79339304 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -32,7 +32,7 @@
 
 extern void rcu_init(void);
 extern void rcu_note_context_switch(int cpu);
-extern int rcu_needs_cpu(int cpu);
+extern int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies);
 extern void rcu_cpu_stall_reset(void);
 
 /*
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 6bd9637d5d83..5271a020887e 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -1886,8 +1886,9 @@ static void __cpuinit rcu_prepare_kthreads(int cpu)
  * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs
  * any flavor of RCU.
  */
-int rcu_needs_cpu(int cpu)
+int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
 {
+	*delta_jiffies = ULONG_MAX;
 	return rcu_cpu_has_callbacks(cpu);
 }
 
@@ -1962,28 +1963,6 @@ static void rcu_idle_count_callbacks_posted(void)
 #define RCU_IDLE_GP_DELAY 6		/* Roughly one grace period. */
 #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ)	/* Roughly six seconds. */
 
-/*
- * Allow the CPU to enter dyntick-idle mode if either: (1) There are no
- * callbacks on this CPU, (2) this CPU has not yet attempted to enter
- * dyntick-idle mode, or (3) this CPU is in the process of attempting to
- * enter dyntick-idle mode.  Otherwise, if we have recently tried and failed
- * to enter dyntick-idle mode, we refuse to try to enter it.  After all,
- * it is better to incur scheduling-clock interrupts than to spin
- * continuously for the same time duration!
- */
-int rcu_needs_cpu(int cpu)
-{
-	struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
-
-	/* Flag a new idle sojourn to the idle-entry state machine. */
-	rdtp->idle_first_pass = 1;
-	/* If no callbacks, RCU doesn't need the CPU. */
-	if (!rcu_cpu_has_callbacks(cpu))
-		return 0;
-	/* Otherwise, RCU needs the CPU only if it recently tried and failed. */
-	return rdtp->dyntick_holdoff == jiffies;
-}
-
 /*
  * Does the specified flavor of RCU have non-lazy callbacks pending on
  * the specified CPU?  Both RCU flavor and CPU are specified by the
@@ -2026,6 +2005,47 @@ static bool rcu_cpu_has_nonlazy_callbacks(int cpu)
 	       rcu_preempt_cpu_has_nonlazy_callbacks(cpu);
 }
 
+/*
+ * Allow the CPU to enter dyntick-idle mode if either: (1) There are no
+ * callbacks on this CPU, (2) this CPU has not yet attempted to enter
+ * dyntick-idle mode, or (3) this CPU is in the process of attempting to
+ * enter dyntick-idle mode.  Otherwise, if we have recently tried and failed
+ * to enter dyntick-idle mode, we refuse to try to enter it.  After all,
+ * it is better to incur scheduling-clock interrupts than to spin
+ * continuously for the same time duration!
+ *
+ * The delta_jiffies argument is used to store the time when RCU is
+ * going to need the CPU again if it still has callbacks.  The reason
+ * for this is that rcu_prepare_for_idle() might need to post a timer,
+ * but if so, it will do so after tick_nohz_stop_sched_tick() has set
+ * the wakeup time for this CPU.  This means that RCU's timer can be
+ * delayed until the wakeup time, which defeats the purpose of posting
+ * a timer.
+ */
+int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
+{
+	struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
+
+	/* Flag a new idle sojourn to the idle-entry state machine. */
+	rdtp->idle_first_pass = 1;
+	/* If no callbacks, RCU doesn't need the CPU. */
+	if (!rcu_cpu_has_callbacks(cpu)) {
+		*delta_jiffies = ULONG_MAX;
+		return 0;
+	}
+	if (rdtp->dyntick_holdoff == jiffies) {
+		/* RCU recently tried and failed, so don't try again. */
+		*delta_jiffies = 1;
+		return 1;
+	}
+	/* Set up for the possibility that RCU will post a timer. */
+	if (rcu_cpu_has_nonlazy_callbacks(cpu))
+		*delta_jiffies = RCU_IDLE_GP_DELAY;
+	else
+		*delta_jiffies = RCU_IDLE_LAZY_GP_DELAY;
+	return 0;
+}
+
 /*
  * Handler for smp_call_function_single().  The only point of this
  * handler is to wake the CPU up, so the handler does only tracing.
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 6a3a5b9ff561..52f5ebbd443b 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -274,6 +274,7 @@ EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
 static void tick_nohz_stop_sched_tick(struct tick_sched *ts)
 {
 	unsigned long seq, last_jiffies, next_jiffies, delta_jiffies;
+	unsigned long rcu_delta_jiffies;
 	ktime_t last_update, expires, now;
 	struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
 	u64 time_delta;
@@ -322,7 +323,7 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts)
 		time_delta = timekeeping_max_deferment();
 	} while (read_seqretry(&xtime_lock, seq));
 
-	if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) ||
+	if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) ||
 	    arch_needs_cpu(cpu)) {
 		next_jiffies = last_jiffies + 1;
 		delta_jiffies = 1;
@@ -330,6 +331,10 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts)
 		/* Get the next timer wheel timer */
 		next_jiffies = get_next_timer_interrupt(last_jiffies);
 		delta_jiffies = next_jiffies - last_jiffies;
+		if (rcu_delta_jiffies < delta_jiffies) {
+			next_jiffies = last_jiffies + rcu_delta_jiffies;
+			delta_jiffies = rcu_delta_jiffies;
+		}
 	}
 	/*
 	 * Do not stop the tick, if we are only one off
-- 
cgit 


From d1992b169d31f339dc5ea4e9f312567c8cf322a3 Mon Sep 17 00:00:00 2001
From: Hans Schillstrom <hans@schillstrom.com>
Date: Thu, 17 May 2012 22:35:46 +0000
Subject: netfilter: xt_HMARK: fix endianness and provide consistent hashing

This patch addresses two issues:

a) Fix usage of u32 and __be32 that causes endianess warnings via sparse.
b) Ensure consistent hashing in a cluster that is composed of big and
   little endian systems. Thus, we obtain the same hash mark in an
   heterogeneous cluster.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Hans Schillstrom <hans@schillstrom.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/xt_HMARK.h |  5 +++
 net/netfilter/xt_HMARK.c           | 72 ++++++++++++++++++++++----------------
 2 files changed, 46 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/xt_HMARK.h b/include/linux/netfilter/xt_HMARK.h
index abb1650940d2..826fc5807577 100644
--- a/include/linux/netfilter/xt_HMARK.h
+++ b/include/linux/netfilter/xt_HMARK.h
@@ -27,7 +27,12 @@ union hmark_ports {
 		__u16	src;
 		__u16	dst;
 	} p16;
+	struct {
+		__be16	src;
+		__be16	dst;
+	} b16;
 	__u32	v32;
+	__be32	b32;
 };
 
 struct xt_hmark_info {
diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c
index 0a96a43108ed..1686ca1b53a1 100644
--- a/net/netfilter/xt_HMARK.c
+++ b/net/netfilter/xt_HMARK.c
@@ -32,13 +32,13 @@ MODULE_ALIAS("ipt_HMARK");
 MODULE_ALIAS("ip6t_HMARK");
 
 struct hmark_tuple {
-	u32			src;
-	u32			dst;
+	__be32			src;
+	__be32			dst;
 	union hmark_ports	uports;
-	uint8_t			proto;
+	u8			proto;
 };
 
-static inline u32 hmark_addr6_mask(const __u32 *addr32, const __u32 *mask)
+static inline __be32 hmark_addr6_mask(const __be32 *addr32, const __be32 *mask)
 {
 	return (addr32[0] & mask[0]) ^
 	       (addr32[1] & mask[1]) ^
@@ -46,8 +46,8 @@ static inline u32 hmark_addr6_mask(const __u32 *addr32, const __u32 *mask)
 	       (addr32[3] & mask[3]);
 }
 
-static inline u32
-hmark_addr_mask(int l3num, const __u32 *addr32, const __u32 *mask)
+static inline __be32
+hmark_addr_mask(int l3num, const __be32 *addr32, const __be32 *mask)
 {
 	switch (l3num) {
 	case AF_INET:
@@ -58,6 +58,22 @@ hmark_addr_mask(int l3num, const __u32 *addr32, const __u32 *mask)
 	return 0;
 }
 
+static inline void hmark_swap_ports(union hmark_ports *uports,
+				    const struct xt_hmark_info *info)
+{
+	union hmark_ports hp;
+	u16 src, dst;
+
+	hp.b32 = (uports->b32 & info->port_mask.b32) | info->port_set.b32;
+	src = ntohs(hp.b16.src);
+	dst = ntohs(hp.b16.dst);
+
+	if (dst > src)
+		uports->v32 = (dst << 16) | src;
+	else
+		uports->v32 = (src << 16) | dst;
+}
+
 static int
 hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t,
 		    const struct xt_hmark_info *info)
@@ -74,22 +90,19 @@ hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t,
 	otuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
 	rtuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
 
-	t->src = hmark_addr_mask(otuple->src.l3num, otuple->src.u3.all,
-				 info->src_mask.all);
-	t->dst = hmark_addr_mask(otuple->src.l3num, rtuple->src.u3.all,
-				 info->dst_mask.all);
+	t->src = hmark_addr_mask(otuple->src.l3num, otuple->src.u3.ip6,
+				 info->src_mask.ip6);
+	t->dst = hmark_addr_mask(otuple->src.l3num, rtuple->src.u3.ip6,
+				 info->dst_mask.ip6);
 
 	if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
 		return 0;
 
 	t->proto = nf_ct_protonum(ct);
 	if (t->proto != IPPROTO_ICMP) {
-		t->uports.p16.src = otuple->src.u.all;
-		t->uports.p16.dst = rtuple->src.u.all;
-		t->uports.v32 = (t->uports.v32 & info->port_mask.v32) |
-				info->port_set.v32;
-		if (t->uports.p16.dst < t->uports.p16.src)
-			swap(t->uports.p16.dst, t->uports.p16.src);
+		t->uports.b16.src = otuple->src.u.all;
+		t->uports.b16.dst = rtuple->src.u.all;
+		hmark_swap_ports(&t->uports, info);
 	}
 
 	return 0;
@@ -98,15 +111,19 @@ hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t,
 #endif
 }
 
+/* This hash function is endian independent, to ensure consistent hashing if
+ * the cluster is composed of big and little endian systems. */
 static inline u32
 hmark_hash(struct hmark_tuple *t, const struct xt_hmark_info *info)
 {
 	u32 hash;
+	u32 src = ntohl(t->src);
+	u32 dst = ntohl(t->dst);
 
-	if (t->dst < t->src)
-		swap(t->src, t->dst);
+	if (dst < src)
+		swap(src, dst);
 
-	hash = jhash_3words(t->src, t->dst, t->uports.v32, info->hashrnd);
+	hash = jhash_3words(src, dst, t->uports.v32, info->hashrnd);
 	hash = hash ^ (t->proto & info->proto_mask);
 
 	return (((u64)hash * info->hmodulus) >> 32) + info->hoffset;
@@ -126,11 +143,7 @@ hmark_set_tuple_ports(const struct sk_buff *skb, unsigned int nhoff,
 	if (skb_copy_bits(skb, nhoff, &t->uports, sizeof(t->uports)) < 0)
 		return;
 
-	t->uports.v32 = (t->uports.v32 & info->port_mask.v32) |
-			info->port_set.v32;
-
-	if (t->uports.p16.dst < t->uports.p16.src)
-		swap(t->uports.p16.dst, t->uports.p16.src);
+	hmark_swap_ports(&t->uports, info);
 }
 
 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
@@ -178,8 +191,8 @@ hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t,
 			return -1;
 	}
 noicmp:
-	t->src = hmark_addr6_mask(ip6->saddr.s6_addr32, info->src_mask.all);
-	t->dst = hmark_addr6_mask(ip6->daddr.s6_addr32, info->dst_mask.all);
+	t->src = hmark_addr6_mask(ip6->saddr.s6_addr32, info->src_mask.ip6);
+	t->dst = hmark_addr6_mask(ip6->daddr.s6_addr32, info->dst_mask.ip6);
 
 	if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
 		return 0;
@@ -255,11 +268,8 @@ hmark_pkt_set_htuple_ipv4(const struct sk_buff *skb, struct hmark_tuple *t,
 		}
 	}
 
-	t->src = (__force u32) ip->saddr;
-	t->dst = (__force u32) ip->daddr;
-
-	t->src &= info->src_mask.ip;
-	t->dst &= info->dst_mask.ip;
+	t->src = ip->saddr & info->src_mask.ip;
+	t->dst = ip->daddr & info->dst_mask.ip;
 
 	if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
 		return 0;
-- 
cgit 


From c8e9cf7bb240049117d2fa64d1540476c289396d Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 7 Jun 2012 12:15:15 +0200
Subject: vga_switcheroo: Add a helper function to get the client state

Add vga_switcheroo_get_client_state() to get the current state of the
client.  This is necessary to determine the proper initial state of
audio clients in HD-audio driver.

Acked-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 drivers/gpu/vga/vga_switcheroo.c | 13 +++++++++++++
 include/linux/vga_switcheroo.h   |  7 +++++++
 2 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index 38f9534ac513..eb4f64f0a565 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -190,6 +190,19 @@ find_active_client(struct list_head *head)
 	return NULL;
 }
 
+int vga_switcheroo_get_client_state(struct pci_dev *pdev)
+{
+	struct vga_switcheroo_client *client;
+
+	client = find_client_from_pci(&vgasr_priv.clients, pdev);
+	if (!client)
+		return VGA_SWITCHEROO_NOT_FOUND;
+	if (!vgasr_priv.active)
+		return VGA_SWITCHEROO_INIT;
+	return client->pwr_state;
+}
+EXPORT_SYMBOL(vga_switcheroo_get_client_state);
+
 void vga_switcheroo_unregister_client(struct pci_dev *pdev)
 {
 	struct vga_switcheroo_client *client;
diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
index b455c7c212eb..b176342ca031 100644
--- a/include/linux/vga_switcheroo.h
+++ b/include/linux/vga_switcheroo.h
@@ -12,6 +12,9 @@
 enum vga_switcheroo_state {
 	VGA_SWITCHEROO_OFF,
 	VGA_SWITCHEROO_ON,
+	/* below are referred only from vga_switcheroo_get_client_state() */
+	VGA_SWITCHEROO_INIT,
+	VGA_SWITCHEROO_NOT_FOUND,
 };
 
 enum vga_switcheroo_client_id {
@@ -50,6 +53,8 @@ void vga_switcheroo_unregister_handler(void);
 
 int vga_switcheroo_process_delayed_switch(void);
 
+int vga_switcheroo_get_client_state(struct pci_dev *dev);
+
 #else
 
 static inline void vga_switcheroo_unregister_client(struct pci_dev *dev) {}
@@ -62,5 +67,7 @@ static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
 	int id, bool active) { return 0; }
 static inline void vga_switcheroo_unregister_handler(void) {}
 static inline int vga_switcheroo_process_delayed_switch(void) { return 0; }
+static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_CLIENT_ON; }
+
 
 #endif
-- 
cgit 


From 505cff00de9c303b95c204eb4544066e3e707911 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 8 Jun 2012 12:49:17 +0200
Subject: vga_switcheroo: Fix error without CONFIG_VGA_SWITCHEROO

Fix a typo that is built only when CONFIG_VGA_SWITCHEROO=n.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/vga_switcheroo.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
index b176342ca031..60da41fe9dc2 100644
--- a/include/linux/vga_switcheroo.h
+++ b/include/linux/vga_switcheroo.h
@@ -67,7 +67,7 @@ static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
 	int id, bool active) { return 0; }
 static inline void vga_switcheroo_unregister_handler(void) {}
 static inline int vga_switcheroo_process_delayed_switch(void) { return 0; }
-static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_CLIENT_ON; }
+static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_ON; }
 
 
 #endif
-- 
cgit 


From 8876d6b5f81f4e242a6660da22bbd92f17a8d058 Mon Sep 17 00:00:00 2001
From: Paul Pluzhnikov <ppluzhnikov@google.com>
Date: Sat, 9 Jun 2012 07:53:03 -0700
Subject: net: Make linux/tcp.h C++ friendly (trivial)

I originally sent this patch to <trivial@kernel.org>, but Jiri Kosina did
not feel that this is fully appropriate for the trivial tree.

Using linux/tcp.h from C++ results in:

cat t.cc
#include <linux/tcp.h>
int main() { }

g++ -c t.cc

In file included from t.cc:1:
/usr/include/linux/tcp.h:72: error: '__u32 __fswab32(__u32)' cannot appear in a constant-expression
/usr/include/linux/tcp.h:72: error: a function call cannot appear in a constant-expression
...

Attached trivial patch fixes this problem.

Tested:
- the t.cc above compiles with g++ and
- the following program generates the same output before/after
  the patch:

#include <linux/tcp.h>
#include <stdio.h>

int main ()
{
#define P(a) printf("%s: %08x\n", #a, (int)a)
 P(TCP_FLAG_CWR);
 P(TCP_FLAG_ECE);
 P(TCP_FLAG_URG);
 P(TCP_FLAG_ACK);
 P(TCP_FLAG_PSH);
 P(TCP_FLAG_RST);
 P(TCP_FLAG_SYN);
 P(TCP_FLAG_FIN);
 P(TCP_RESERVED_BITS);
 P(TCP_DATA_OFFSET);
#undef P
 return 0;
}

Signed-off-by: Paul Pluzhnikov <ppluzhnikov@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 4c5b63283377..5f359dbfcdce 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -69,16 +69,16 @@ union tcp_word_hdr {
 #define tcp_flag_word(tp) ( ((union tcp_word_hdr *)(tp))->words [3]) 
 
 enum { 
-	TCP_FLAG_CWR = __cpu_to_be32(0x00800000),
-	TCP_FLAG_ECE = __cpu_to_be32(0x00400000),
-	TCP_FLAG_URG = __cpu_to_be32(0x00200000),
-	TCP_FLAG_ACK = __cpu_to_be32(0x00100000),
-	TCP_FLAG_PSH = __cpu_to_be32(0x00080000),
-	TCP_FLAG_RST = __cpu_to_be32(0x00040000),
-	TCP_FLAG_SYN = __cpu_to_be32(0x00020000),
-	TCP_FLAG_FIN = __cpu_to_be32(0x00010000),
-	TCP_RESERVED_BITS = __cpu_to_be32(0x0F000000),
-	TCP_DATA_OFFSET = __cpu_to_be32(0xF0000000)
+	TCP_FLAG_CWR = __constant_cpu_to_be32(0x00800000),
+	TCP_FLAG_ECE = __constant_cpu_to_be32(0x00400000),
+	TCP_FLAG_URG = __constant_cpu_to_be32(0x00200000),
+	TCP_FLAG_ACK = __constant_cpu_to_be32(0x00100000),
+	TCP_FLAG_PSH = __constant_cpu_to_be32(0x00080000),
+	TCP_FLAG_RST = __constant_cpu_to_be32(0x00040000),
+	TCP_FLAG_SYN = __constant_cpu_to_be32(0x00020000),
+	TCP_FLAG_FIN = __constant_cpu_to_be32(0x00010000),
+	TCP_RESERVED_BITS = __constant_cpu_to_be32(0x0F000000),
+	TCP_DATA_OFFSET = __constant_cpu_to_be32(0xF0000000)
 }; 
 
 /*
-- 
cgit 


From 9b15b817f3d62409290fd56fe3cbb076a931bb0a Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Fri, 15 Jun 2012 17:55:50 -0700
Subject: swap: fix shmem swapping when more than 8 areas

Minchan Kim reports that when a system has many swap areas, and tmpfs
swaps out to the ninth or more, shmem_getpage_gfp()'s attempts to read
back the page cannot locate it, and the read fails with -ENOMEM.

Whoops.  Yes, I blindly followed read_swap_header()'s pte_to_swp_entry(
swp_entry_to_pte()) technique for determining maximum usable swap
offset, without stopping to realize that that actually depends upon the
pte swap encoding shifting swap offset to the higher bits and truncating
it there.  Whereas our radix_tree swap encoding leaves offset in the
lower bits: it's swap "type" (that is, index of swap area) that was
truncated.

Fix it by reducing the SWP_TYPE_SHIFT() in swapops.h, and removing the
broken radix_to_swp_entry(swp_to_radix_entry()) from read_swap_header().

This does not reduce the usable size of a swap area any further, it
leaves it as claimed when making the original commit: no change from 3.0
on x86_64, nor on i386 without PAE; but 3.0's 512GB is reduced to 128GB
per swapfile on i386 with PAE.  It's not a change I would have risked
five years ago, but with x86_64 supported for ten years, I believe it's
appropriate now.

Hmm, and what if some architecture implements its swap pte with offset
encoded below type? That would equally break the maximum usable swap
offset check.  Happily, they all follow the same tradition of encoding
offset above type, but I'll prepare a check on that for next.

Reported-and-Reviewed-and-Tested-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: stable@vger.kernel.org [3.1, 3.2, 3.3, 3.4]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swapops.h |  8 +++++---
 mm/swapfile.c           | 12 ++++--------
 2 files changed, 9 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 792d16d9cbc7..47ead515c811 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -9,13 +9,15 @@
  * get good packing density in that tree, so the index should be dense in
  * the low-order bits.
  *
- * We arrange the `type' and `offset' fields so that `type' is at the five
+ * We arrange the `type' and `offset' fields so that `type' is at the seven
  * high-order bits of the swp_entry_t and `offset' is right-aligned in the
- * remaining bits.
+ * remaining bits.  Although `type' itself needs only five bits, we allow for
+ * shmem/tmpfs to shift it all up a further two bits: see swp_to_radix_entry().
  *
  * swp_entry_t's are *never* stored anywhere in their arch-dependent format.
  */
-#define SWP_TYPE_SHIFT(e)	(sizeof(e.val) * 8 - MAX_SWAPFILES_SHIFT)
+#define SWP_TYPE_SHIFT(e)	((sizeof(e.val) * 8) - \
+			(MAX_SWAPFILES_SHIFT + RADIX_TREE_EXCEPTIONAL_SHIFT))
 #define SWP_OFFSET_MASK(e)	((1UL << SWP_TYPE_SHIFT(e)) - 1)
 
 /*
diff --git a/mm/swapfile.c b/mm/swapfile.c
index de5bc51c4a66..71373d03fcee 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1916,24 +1916,20 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
 
 	/*
 	 * Find out how many pages are allowed for a single swap
-	 * device. There are three limiting factors: 1) the number
+	 * device. There are two limiting factors: 1) the number
 	 * of bits for the swap offset in the swp_entry_t type, and
 	 * 2) the number of bits in the swap pte as defined by the
-	 * the different architectures, and 3) the number of free bits
-	 * in an exceptional radix_tree entry. In order to find the
+	 * different architectures. In order to find the
 	 * largest possible bit mask, a swap entry with swap type 0
 	 * and swap offset ~0UL is created, encoded to a swap pte,
 	 * decoded to a swp_entry_t again, and finally the swap
 	 * offset is extracted. This will mask all the bits from
 	 * the initial ~0UL mask that can't be encoded in either
 	 * the swp_entry_t or the architecture definition of a
-	 * swap pte.  Then the same is done for a radix_tree entry.
+	 * swap pte.
 	 */
 	maxpages = swp_offset(pte_to_swp_entry(
-			swp_entry_to_pte(swp_entry(0, ~0UL))));
-	maxpages = swp_offset(radix_to_swp_entry(
-			swp_to_radix_entry(swp_entry(0, maxpages)))) + 1;
-
+			swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
 	if (maxpages > swap_header->info.last_page) {
 		maxpages = swap_header->info.last_page + 1;
 		/* p->max is an unsigned int: don't overflow it */
-- 
cgit