From 4282d60689d4f21b40692029080440cc58e8a17d Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 20 Jan 2015 11:36:55 -0500
Subject: tracefs: Add new tracefs file system

Add a separate file system to handle the tracing directory. Currently it
is part of debugfs, but that is starting to show its limits.

One thing is that in order to access the tracing infrastructure, you need
to mount debugfs. As that includes debugging from all sorts of sub systems
in the kernel, it is not considered advisable to mount such an all
encompassing debugging system.

Having the tracing system in its own file systems gives access to the
tracing sub system without needing to include all other systems.

Another problem with tracing using the debugfs system is that the
instances use mkdir to create sub buffers. debugfs does not support mkdir
from userspace so to implement it, special hacks were used. By controlling
the file system that the tracing infrastructure uses, this can be properly
done without hacks.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/uapi/linux/magic.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index 7d664ea85ebd..7b1425a6b370 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -58,6 +58,8 @@
 
 #define STACK_END_MAGIC		0x57AC6E9D
 
+#define TRACEFS_MAGIC          0x74726163
+
 #define V9FS_MAGIC		0x01021997
 
 #define BDEVFS_MAGIC            0x62646576
-- 
cgit 


From cd67cd5eb25ae9a7bafbfd3d52d4c05e1d80af3b Mon Sep 17 00:00:00 2001
From: Julian Anastasov <ja@ssi.bg>
Date: Fri, 6 Feb 2015 09:44:44 +0200
Subject: ipvs: use 64-bit rates in stats

IPVS stats are limited to 2^(32-10) conns/s and packets/s,
2^(32-5) bytes/s. It is time to use 64 bits:

* Change all conn/packet kernel counters to 64-bit and update
them in u64_stats_update_{begin,end} section

* In kernel use struct ip_vs_kstats instead of the user-space
struct ip_vs_stats_user and use new func ip_vs_export_stats_user
to export it to sockopt users to preserve compatibility with
32-bit values

* Rename cpu counters "ustats" to "cnt"

* To netlink users provide additionally 64-bit stats:
IPVS_SVC_ATTR_STATS64 and IPVS_DEST_ATTR_STATS64. Old stats
remain for old binaries.

* We can use ip_vs_copy_stats in ip_vs_stats_percpu_show

Thanks to Chris Caputo for providing initial patch for ip_vs_est.c

Signed-off-by: Chris Caputo <ccaputo@alt.net>
Signed-off-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Simon Horman <horms@verge.net.au>
---
 include/net/ip_vs.h             |  50 ++++++++----
 include/uapi/linux/ip_vs.h      |   7 +-
 net/netfilter/ipvs/ip_vs_core.c |  36 +++++----
 net/netfilter/ipvs/ip_vs_ctl.c  | 174 ++++++++++++++++++++++++++--------------
 net/netfilter/ipvs/ip_vs_est.c  | 102 ++++++++++++-----------
 5 files changed, 226 insertions(+), 143 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 615b20b58545..a627fe690c19 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -365,15 +365,15 @@ struct ip_vs_seq {
 
 /* counters per cpu */
 struct ip_vs_counters {
-	__u32		conns;		/* connections scheduled */
-	__u32		inpkts;		/* incoming packets */
-	__u32		outpkts;	/* outgoing packets */
+	__u64		conns;		/* connections scheduled */
+	__u64		inpkts;		/* incoming packets */
+	__u64		outpkts;	/* outgoing packets */
 	__u64		inbytes;	/* incoming bytes */
 	__u64		outbytes;	/* outgoing bytes */
 };
 /* Stats per cpu */
 struct ip_vs_cpu_stats {
-	struct ip_vs_counters   ustats;
+	struct ip_vs_counters   cnt;
 	struct u64_stats_sync   syncp;
 };
 
@@ -383,23 +383,40 @@ struct ip_vs_estimator {
 
 	u64			last_inbytes;
 	u64			last_outbytes;
-	u32			last_conns;
-	u32			last_inpkts;
-	u32			last_outpkts;
-
-	u32			cps;
-	u32			inpps;
-	u32			outpps;
-	u32			inbps;
-	u32			outbps;
+	u64			last_conns;
+	u64			last_inpkts;
+	u64			last_outpkts;
+
+	u64			cps;
+	u64			inpps;
+	u64			outpps;
+	u64			inbps;
+	u64			outbps;
+};
+
+/*
+ * IPVS statistics object, 64-bit kernel version of struct ip_vs_stats_user
+ */
+struct ip_vs_kstats {
+	u64			conns;		/* connections scheduled */
+	u64			inpkts;		/* incoming packets */
+	u64			outpkts;	/* outgoing packets */
+	u64			inbytes;	/* incoming bytes */
+	u64			outbytes;	/* outgoing bytes */
+
+	u64			cps;		/* current connection rate */
+	u64			inpps;		/* current in packet rate */
+	u64			outpps;		/* current out packet rate */
+	u64			inbps;		/* current in byte rate */
+	u64			outbps;		/* current out byte rate */
 };
 
 struct ip_vs_stats {
-	struct ip_vs_stats_user	ustats;		/* statistics */
+	struct ip_vs_kstats	kstats;		/* kernel statistics */
 	struct ip_vs_estimator	est;		/* estimator */
 	struct ip_vs_cpu_stats __percpu	*cpustats;	/* per cpu counters */
 	spinlock_t		lock;		/* spin lock */
-	struct ip_vs_stats_user	ustats0;	/* reset values */
+	struct ip_vs_kstats	kstats0;	/* reset values */
 };
 
 struct dst_entry;
@@ -1388,8 +1405,7 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp, int pkts);
 void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats);
 void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats);
 void ip_vs_zero_estimator(struct ip_vs_stats *stats);
-void ip_vs_read_estimator(struct ip_vs_stats_user *dst,
-			  struct ip_vs_stats *stats);
+void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats);
 
 /* Various IPVS packet transmitters (from ip_vs_xmit.c) */
 int ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
diff --git a/include/uapi/linux/ip_vs.h b/include/uapi/linux/ip_vs.h
index cabe95d5b461..3199243f2028 100644
--- a/include/uapi/linux/ip_vs.h
+++ b/include/uapi/linux/ip_vs.h
@@ -358,6 +358,8 @@ enum {
 
 	IPVS_SVC_ATTR_PE_NAME,		/* name of ct retriever */
 
+	IPVS_SVC_ATTR_STATS64,		/* nested attribute for service stats */
+
 	__IPVS_SVC_ATTR_MAX,
 };
 
@@ -387,6 +389,8 @@ enum {
 
 	IPVS_DEST_ATTR_ADDR_FAMILY,	/* Address family of address */
 
+	IPVS_DEST_ATTR_STATS64,		/* nested attribute for dest stats */
+
 	__IPVS_DEST_ATTR_MAX,
 };
 
@@ -410,7 +414,8 @@ enum {
 /*
  * Attributes used to describe service or destination entry statistics
  *
- * Used inside nested attributes IPVS_SVC_ATTR_STATS and IPVS_DEST_ATTR_STATS
+ * Used inside nested attributes IPVS_SVC_ATTR_STATS, IPVS_DEST_ATTR_STATS,
+ * IPVS_SVC_ATTR_STATS64 and IPVS_DEST_ATTR_STATS64.
  */
 enum {
 	IPVS_STATS_ATTR_UNSPEC = 0,
diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c
index 990decba1fe4..c9470c86308f 100644
--- a/net/netfilter/ipvs/ip_vs_core.c
+++ b/net/netfilter/ipvs/ip_vs_core.c
@@ -119,24 +119,24 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 		struct ip_vs_service *svc;
 
 		s = this_cpu_ptr(dest->stats.cpustats);
-		s->ustats.inpkts++;
 		u64_stats_update_begin(&s->syncp);
-		s->ustats.inbytes += skb->len;
+		s->cnt.inpkts++;
+		s->cnt.inbytes += skb->len;
 		u64_stats_update_end(&s->syncp);
 
 		rcu_read_lock();
 		svc = rcu_dereference(dest->svc);
 		s = this_cpu_ptr(svc->stats.cpustats);
-		s->ustats.inpkts++;
 		u64_stats_update_begin(&s->syncp);
-		s->ustats.inbytes += skb->len;
+		s->cnt.inpkts++;
+		s->cnt.inbytes += skb->len;
 		u64_stats_update_end(&s->syncp);
 		rcu_read_unlock();
 
 		s = this_cpu_ptr(ipvs->tot_stats.cpustats);
-		s->ustats.inpkts++;
 		u64_stats_update_begin(&s->syncp);
-		s->ustats.inbytes += skb->len;
+		s->cnt.inpkts++;
+		s->cnt.inbytes += skb->len;
 		u64_stats_update_end(&s->syncp);
 	}
 }
@@ -153,24 +153,24 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
 		struct ip_vs_service *svc;
 
 		s = this_cpu_ptr(dest->stats.cpustats);
-		s->ustats.outpkts++;
 		u64_stats_update_begin(&s->syncp);
-		s->ustats.outbytes += skb->len;
+		s->cnt.outpkts++;
+		s->cnt.outbytes += skb->len;
 		u64_stats_update_end(&s->syncp);
 
 		rcu_read_lock();
 		svc = rcu_dereference(dest->svc);
 		s = this_cpu_ptr(svc->stats.cpustats);
-		s->ustats.outpkts++;
 		u64_stats_update_begin(&s->syncp);
-		s->ustats.outbytes += skb->len;
+		s->cnt.outpkts++;
+		s->cnt.outbytes += skb->len;
 		u64_stats_update_end(&s->syncp);
 		rcu_read_unlock();
 
 		s = this_cpu_ptr(ipvs->tot_stats.cpustats);
-		s->ustats.outpkts++;
 		u64_stats_update_begin(&s->syncp);
-		s->ustats.outbytes += skb->len;
+		s->cnt.outpkts++;
+		s->cnt.outbytes += skb->len;
 		u64_stats_update_end(&s->syncp);
 	}
 }
@@ -183,13 +183,19 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
 	struct ip_vs_cpu_stats *s;
 
 	s = this_cpu_ptr(cp->dest->stats.cpustats);
-	s->ustats.conns++;
+	u64_stats_update_begin(&s->syncp);
+	s->cnt.conns++;
+	u64_stats_update_end(&s->syncp);
 
 	s = this_cpu_ptr(svc->stats.cpustats);
-	s->ustats.conns++;
+	u64_stats_update_begin(&s->syncp);
+	s->cnt.conns++;
+	u64_stats_update_end(&s->syncp);
 
 	s = this_cpu_ptr(ipvs->tot_stats.cpustats);
-	s->ustats.conns++;
+	u64_stats_update_begin(&s->syncp);
+	s->cnt.conns++;
+	u64_stats_update_end(&s->syncp);
 }
 
 
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index e55759056361..6fd60059faf0 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -729,9 +729,9 @@ static void ip_vs_trash_cleanup(struct net *net)
 }
 
 static void
-ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
+ip_vs_copy_stats(struct ip_vs_kstats *dst, struct ip_vs_stats *src)
 {
-#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
+#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->kstats.c - src->kstats0.c
 
 	spin_lock_bh(&src->lock);
 
@@ -746,6 +746,21 @@ ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
 	spin_unlock_bh(&src->lock);
 }
 
+static void
+ip_vs_export_stats_user(struct ip_vs_stats_user *dst, struct ip_vs_kstats *src)
+{
+	dst->conns = (u32)src->conns;
+	dst->inpkts = (u32)src->inpkts;
+	dst->outpkts = (u32)src->outpkts;
+	dst->inbytes = src->inbytes;
+	dst->outbytes = src->outbytes;
+	dst->cps = (u32)src->cps;
+	dst->inpps = (u32)src->inpps;
+	dst->outpps = (u32)src->outpps;
+	dst->inbps = (u32)src->inbps;
+	dst->outbps = (u32)src->outbps;
+}
+
 static void
 ip_vs_zero_stats(struct ip_vs_stats *stats)
 {
@@ -753,7 +768,7 @@ ip_vs_zero_stats(struct ip_vs_stats *stats)
 
 	/* get current counters as zero point, rates are zeroed */
 
-#define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
+#define IP_VS_ZERO_STATS_COUNTER(c) stats->kstats0.c = stats->kstats.c
 
 	IP_VS_ZERO_STATS_COUNTER(conns);
 	IP_VS_ZERO_STATS_COUNTER(inpkts);
@@ -2044,7 +2059,7 @@ static const struct file_operations ip_vs_info_fops = {
 static int ip_vs_stats_show(struct seq_file *seq, void *v)
 {
 	struct net *net = seq_file_single_net(seq);
-	struct ip_vs_stats_user show;
+	struct ip_vs_kstats show;
 
 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
 	seq_puts(seq,
@@ -2053,17 +2068,22 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v)
 		   "   Conns  Packets  Packets            Bytes            Bytes\n");
 
 	ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
-	seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
-		   show.inpkts, show.outpkts,
-		   (unsigned long long) show.inbytes,
-		   (unsigned long long) show.outbytes);
-
-/*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
+	seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n\n",
+		   (unsigned long long)show.conns,
+		   (unsigned long long)show.inpkts,
+		   (unsigned long long)show.outpkts,
+		   (unsigned long long)show.inbytes,
+		   (unsigned long long)show.outbytes);
+
+/*                01234567 01234567 01234567 0123456701234567 0123456701234567*/
 	seq_puts(seq,
-		   " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
-	seq_printf(seq, "%8X %8X %8X %16X %16X\n",
-			show.cps, show.inpps, show.outpps,
-			show.inbps, show.outbps);
+		 " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
+	seq_printf(seq, "%8LX %8LX %8LX %16LX %16LX\n",
+		   (unsigned long long)show.cps,
+		   (unsigned long long)show.inpps,
+		   (unsigned long long)show.outpps,
+		   (unsigned long long)show.inbps,
+		   (unsigned long long)show.outbps);
 
 	return 0;
 }
@@ -2086,7 +2106,7 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
 	struct net *net = seq_file_single_net(seq);
 	struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
 	struct ip_vs_cpu_stats __percpu *cpustats = tot_stats->cpustats;
-	struct ip_vs_stats_user rates;
+	struct ip_vs_kstats kstats;
 	int i;
 
 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
@@ -2098,41 +2118,41 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
 	for_each_possible_cpu(i) {
 		struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
 		unsigned int start;
-		__u64 inbytes, outbytes;
+		u64 conns, inpkts, outpkts, inbytes, outbytes;
 
 		do {
 			start = u64_stats_fetch_begin_irq(&u->syncp);
-			inbytes = u->ustats.inbytes;
-			outbytes = u->ustats.outbytes;
+			conns = u->cnt.conns;
+			inpkts = u->cnt.inpkts;
+			outpkts = u->cnt.outpkts;
+			inbytes = u->cnt.inbytes;
+			outbytes = u->cnt.outbytes;
 		} while (u64_stats_fetch_retry_irq(&u->syncp, start));
 
-		seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
-			   i, u->ustats.conns, u->ustats.inpkts,
-			   u->ustats.outpkts, (__u64)inbytes,
-			   (__u64)outbytes);
+		seq_printf(seq, "%3X %8LX %8LX %8LX %16LX %16LX\n",
+			   i, (u64)conns, (u64)inpkts,
+			   (u64)outpkts, (u64)inbytes,
+			   (u64)outbytes);
 	}
 
-	spin_lock_bh(&tot_stats->lock);
-
-	seq_printf(seq, "  ~ %8X %8X %8X %16LX %16LX\n\n",
-		   tot_stats->ustats.conns, tot_stats->ustats.inpkts,
-		   tot_stats->ustats.outpkts,
-		   (unsigned long long) tot_stats->ustats.inbytes,
-		   (unsigned long long) tot_stats->ustats.outbytes);
-
-	ip_vs_read_estimator(&rates, tot_stats);
+	ip_vs_copy_stats(&kstats, tot_stats);
 
-	spin_unlock_bh(&tot_stats->lock);
+	seq_printf(seq, "  ~ %8LX %8LX %8LX %16LX %16LX\n\n",
+		   (unsigned long long)kstats.conns,
+		   (unsigned long long)kstats.inpkts,
+		   (unsigned long long)kstats.outpkts,
+		   (unsigned long long)kstats.inbytes,
+		   (unsigned long long)kstats.outbytes);
 
-/*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
+/*                ... 01234567 01234567 01234567 0123456701234567 0123456701234567 */
 	seq_puts(seq,
-		   "     Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
-	seq_printf(seq, "    %8X %8X %8X %16X %16X\n",
-			rates.cps,
-			rates.inpps,
-			rates.outpps,
-			rates.inbps,
-			rates.outbps);
+		 "     Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
+	seq_printf(seq, "    %8LX %8LX %8LX %16LX %16LX\n",
+		   kstats.cps,
+		   kstats.inpps,
+		   kstats.outpps,
+		   kstats.inbps,
+		   kstats.outbps);
 
 	return 0;
 }
@@ -2400,6 +2420,7 @@ static void
 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
 {
 	struct ip_vs_scheduler *sched;
+	struct ip_vs_kstats kstats;
 
 	sched = rcu_dereference_protected(src->scheduler, 1);
 	dst->protocol = src->protocol;
@@ -2411,7 +2432,8 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
 	dst->timeout = src->timeout / HZ;
 	dst->netmask = src->netmask;
 	dst->num_dests = src->num_dests;
-	ip_vs_copy_stats(&dst->stats, &src->stats);
+	ip_vs_copy_stats(&kstats, &src->stats);
+	ip_vs_export_stats_user(&dst->stats, &kstats);
 }
 
 static inline int
@@ -2485,6 +2507,7 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
 		int count = 0;
 		struct ip_vs_dest *dest;
 		struct ip_vs_dest_entry entry;
+		struct ip_vs_kstats kstats;
 
 		memset(&entry, 0, sizeof(entry));
 		list_for_each_entry(dest, &svc->destinations, n_list) {
@@ -2506,7 +2529,8 @@ __ip_vs_get_dest_entries(struct net *net, const struct ip_vs_get_dests *get,
 			entry.activeconns = atomic_read(&dest->activeconns);
 			entry.inactconns = atomic_read(&dest->inactconns);
 			entry.persistconns = atomic_read(&dest->persistconns);
-			ip_vs_copy_stats(&entry.stats, &dest->stats);
+			ip_vs_copy_stats(&kstats, &dest->stats);
+			ip_vs_export_stats_user(&entry.stats, &kstats);
 			if (copy_to_user(&uptr->entrytable[count],
 					 &entry, sizeof(entry))) {
 				ret = -EFAULT;
@@ -2798,25 +2822,51 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
 };
 
 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
-				 struct ip_vs_stats *stats)
+				 struct ip_vs_kstats *kstats)
+{
+	struct nlattr *nl_stats = nla_nest_start(skb, container_type);
+
+	if (!nl_stats)
+		return -EMSGSIZE;
+
+	if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, (u32)kstats->conns) ||
+	    nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, (u32)kstats->inpkts) ||
+	    nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, (u32)kstats->outpkts) ||
+	    nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes) ||
+	    nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes) ||
+	    nla_put_u32(skb, IPVS_STATS_ATTR_CPS, (u32)kstats->cps) ||
+	    nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, (u32)kstats->inpps) ||
+	    nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, (u32)kstats->outpps) ||
+	    nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, (u32)kstats->inbps) ||
+	    nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, (u32)kstats->outbps))
+		goto nla_put_failure;
+	nla_nest_end(skb, nl_stats);
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nl_stats);
+	return -EMSGSIZE;
+}
+
+static int ip_vs_genl_fill_stats64(struct sk_buff *skb, int container_type,
+				   struct ip_vs_kstats *kstats)
 {
-	struct ip_vs_stats_user ustats;
 	struct nlattr *nl_stats = nla_nest_start(skb, container_type);
+
 	if (!nl_stats)
 		return -EMSGSIZE;
 
-	ip_vs_copy_stats(&ustats, stats);
-
-	if (nla_put_u32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns) ||
-	    nla_put_u32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts) ||
-	    nla_put_u32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts) ||
-	    nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes) ||
-	    nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes) ||
-	    nla_put_u32(skb, IPVS_STATS_ATTR_CPS, ustats.cps) ||
-	    nla_put_u32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps) ||
-	    nla_put_u32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps) ||
-	    nla_put_u32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps) ||
-	    nla_put_u32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps))
+	if (nla_put_u64(skb, IPVS_STATS_ATTR_CONNS, kstats->conns) ||
+	    nla_put_u64(skb, IPVS_STATS_ATTR_INPKTS, kstats->inpkts) ||
+	    nla_put_u64(skb, IPVS_STATS_ATTR_OUTPKTS, kstats->outpkts) ||
+	    nla_put_u64(skb, IPVS_STATS_ATTR_INBYTES, kstats->inbytes) ||
+	    nla_put_u64(skb, IPVS_STATS_ATTR_OUTBYTES, kstats->outbytes) ||
+	    nla_put_u64(skb, IPVS_STATS_ATTR_CPS, kstats->cps) ||
+	    nla_put_u64(skb, IPVS_STATS_ATTR_INPPS, kstats->inpps) ||
+	    nla_put_u64(skb, IPVS_STATS_ATTR_OUTPPS, kstats->outpps) ||
+	    nla_put_u64(skb, IPVS_STATS_ATTR_INBPS, kstats->inbps) ||
+	    nla_put_u64(skb, IPVS_STATS_ATTR_OUTBPS, kstats->outbps))
 		goto nla_put_failure;
 	nla_nest_end(skb, nl_stats);
 
@@ -2835,6 +2885,7 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
 	struct nlattr *nl_service;
 	struct ip_vs_flags flags = { .flags = svc->flags,
 				     .mask = ~0 };
+	struct ip_vs_kstats kstats;
 
 	nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
 	if (!nl_service)
@@ -2860,7 +2911,10 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb,
 	    nla_put_u32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ) ||
 	    nla_put_be32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask))
 		goto nla_put_failure;
-	if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
+	ip_vs_copy_stats(&kstats, &svc->stats);
+	if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &kstats))
+		goto nla_put_failure;
+	if (ip_vs_genl_fill_stats64(skb, IPVS_SVC_ATTR_STATS64, &kstats))
 		goto nla_put_failure;
 
 	nla_nest_end(skb, nl_service);
@@ -3032,6 +3086,7 @@ static struct ip_vs_service *ip_vs_genl_find_service(struct net *net,
 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
 {
 	struct nlattr *nl_dest;
+	struct ip_vs_kstats kstats;
 
 	nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
 	if (!nl_dest)
@@ -3054,7 +3109,10 @@ static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
 			atomic_read(&dest->persistconns)) ||
 	    nla_put_u16(skb, IPVS_DEST_ATTR_ADDR_FAMILY, dest->af))
 		goto nla_put_failure;
-	if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
+	ip_vs_copy_stats(&kstats, &dest->stats);
+	if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &kstats))
+		goto nla_put_failure;
+	if (ip_vs_genl_fill_stats64(skb, IPVS_DEST_ATTR_STATS64, &kstats))
 		goto nla_put_failure;
 
 	nla_nest_end(skb, nl_dest);
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index 1425e9a924c4..ef0eb0a8d552 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -45,17 +45,19 @@
 
   NOTES.
 
-  * The stored value for average bps is scaled by 2^5, so that maximal
-    rate is ~2.15Gbits/s, average pps and cps are scaled by 2^10.
+  * Average bps is scaled by 2^5, while average pps and cps are scaled by 2^10.
 
-  * A lot code is taken from net/sched/estimator.c
+  * Netlink users can see 64-bit values but sockopt users are restricted
+    to 32-bit values for conns, packets, bps, cps and pps.
+
+  * A lot of code is taken from net/core/gen_estimator.c
  */
 
 
 /*
  * Make a summary from each cpu
  */
-static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
+static void ip_vs_read_cpu_stats(struct ip_vs_kstats *sum,
 				 struct ip_vs_cpu_stats __percpu *stats)
 {
 	int i;
@@ -64,27 +66,31 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
 	for_each_possible_cpu(i) {
 		struct ip_vs_cpu_stats *s = per_cpu_ptr(stats, i);
 		unsigned int start;
-		__u64 inbytes, outbytes;
+		u64 conns, inpkts, outpkts, inbytes, outbytes;
+
 		if (add) {
-			sum->conns += s->ustats.conns;
-			sum->inpkts += s->ustats.inpkts;
-			sum->outpkts += s->ustats.outpkts;
 			do {
 				start = u64_stats_fetch_begin(&s->syncp);
-				inbytes = s->ustats.inbytes;
-				outbytes = s->ustats.outbytes;
+				conns = s->cnt.conns;
+				inpkts = s->cnt.inpkts;
+				outpkts = s->cnt.outpkts;
+				inbytes = s->cnt.inbytes;
+				outbytes = s->cnt.outbytes;
 			} while (u64_stats_fetch_retry(&s->syncp, start));
+			sum->conns += conns;
+			sum->inpkts += inpkts;
+			sum->outpkts += outpkts;
 			sum->inbytes += inbytes;
 			sum->outbytes += outbytes;
 		} else {
 			add = true;
-			sum->conns = s->ustats.conns;
-			sum->inpkts = s->ustats.inpkts;
-			sum->outpkts = s->ustats.outpkts;
 			do {
 				start = u64_stats_fetch_begin(&s->syncp);
-				sum->inbytes = s->ustats.inbytes;
-				sum->outbytes = s->ustats.outbytes;
+				sum->conns = s->cnt.conns;
+				sum->inpkts = s->cnt.inpkts;
+				sum->outpkts = s->cnt.outpkts;
+				sum->inbytes = s->cnt.inbytes;
+				sum->outbytes = s->cnt.outbytes;
 			} while (u64_stats_fetch_retry(&s->syncp, start));
 		}
 	}
@@ -95,10 +101,7 @@ static void estimation_timer(unsigned long arg)
 {
 	struct ip_vs_estimator *e;
 	struct ip_vs_stats *s;
-	u32 n_conns;
-	u32 n_inpkts, n_outpkts;
-	u64 n_inbytes, n_outbytes;
-	u32 rate;
+	u64 rate;
 	struct net *net = (struct net *)arg;
 	struct netns_ipvs *ipvs;
 
@@ -108,33 +111,29 @@ static void estimation_timer(unsigned long arg)
 		s = container_of(e, struct ip_vs_stats, est);
 
 		spin_lock(&s->lock);
-		ip_vs_read_cpu_stats(&s->ustats, s->cpustats);
-		n_conns = s->ustats.conns;
-		n_inpkts = s->ustats.inpkts;
-		n_outpkts = s->ustats.outpkts;
-		n_inbytes = s->ustats.inbytes;
-		n_outbytes = s->ustats.outbytes;
+		ip_vs_read_cpu_stats(&s->kstats, s->cpustats);
 
 		/* scaled by 2^10, but divided 2 seconds */
-		rate = (n_conns - e->last_conns) << 9;
-		e->last_conns = n_conns;
-		e->cps += ((long)rate - (long)e->cps) >> 2;
-
-		rate = (n_inpkts - e->last_inpkts) << 9;
-		e->last_inpkts = n_inpkts;
-		e->inpps += ((long)rate - (long)e->inpps) >> 2;
-
-		rate = (n_outpkts - e->last_outpkts) << 9;
-		e->last_outpkts = n_outpkts;
-		e->outpps += ((long)rate - (long)e->outpps) >> 2;
-
-		rate = (n_inbytes - e->last_inbytes) << 4;
-		e->last_inbytes = n_inbytes;
-		e->inbps += ((long)rate - (long)e->inbps) >> 2;
-
-		rate = (n_outbytes - e->last_outbytes) << 4;
-		e->last_outbytes = n_outbytes;
-		e->outbps += ((long)rate - (long)e->outbps) >> 2;
+		rate = (s->kstats.conns - e->last_conns) << 9;
+		e->last_conns = s->kstats.conns;
+		e->cps += ((s64)rate - (s64)e->cps) >> 2;
+
+		rate = (s->kstats.inpkts - e->last_inpkts) << 9;
+		e->last_inpkts = s->kstats.inpkts;
+		e->inpps += ((s64)rate - (s64)e->inpps) >> 2;
+
+		rate = (s->kstats.outpkts - e->last_outpkts) << 9;
+		e->last_outpkts = s->kstats.outpkts;
+		e->outpps += ((s64)rate - (s64)e->outpps) >> 2;
+
+		/* scaled by 2^5, but divided 2 seconds */
+		rate = (s->kstats.inbytes - e->last_inbytes) << 4;
+		e->last_inbytes = s->kstats.inbytes;
+		e->inbps += ((s64)rate - (s64)e->inbps) >> 2;
+
+		rate = (s->kstats.outbytes - e->last_outbytes) << 4;
+		e->last_outbytes = s->kstats.outbytes;
+		e->outbps += ((s64)rate - (s64)e->outbps) >> 2;
 		spin_unlock(&s->lock);
 	}
 	spin_unlock(&ipvs->est_lock);
@@ -166,14 +165,14 @@ void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats)
 void ip_vs_zero_estimator(struct ip_vs_stats *stats)
 {
 	struct ip_vs_estimator *est = &stats->est;
-	struct ip_vs_stats_user *u = &stats->ustats;
+	struct ip_vs_kstats *k = &stats->kstats;
 
 	/* reset counters, caller must hold the stats->lock lock */
-	est->last_inbytes = u->inbytes;
-	est->last_outbytes = u->outbytes;
-	est->last_conns = u->conns;
-	est->last_inpkts = u->inpkts;
-	est->last_outpkts = u->outpkts;
+	est->last_inbytes = k->inbytes;
+	est->last_outbytes = k->outbytes;
+	est->last_conns = k->conns;
+	est->last_inpkts = k->inpkts;
+	est->last_outpkts = k->outpkts;
 	est->cps = 0;
 	est->inpps = 0;
 	est->outpps = 0;
@@ -182,8 +181,7 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats)
 }
 
 /* Get decoded rates */
-void ip_vs_read_estimator(struct ip_vs_stats_user *dst,
-			  struct ip_vs_stats *stats)
+void ip_vs_read_estimator(struct ip_vs_kstats *dst, struct ip_vs_stats *stats)
 {
 	struct ip_vs_estimator *e = &stats->est;
 
-- 
cgit 


From e31a0ba7df6ce21ac4ed58c4182ec12ca8fd78fb Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Fri, 2 Jan 2015 12:18:23 -0300
Subject: [media] media: Fix DVB devnode representation at media controller

The previous provision for DVB media controller support were to
define an ID (likely meaning the adapter number) for the DVB
devnodes.

This is just plain wrong. Just like V4L, DVB devices (and any other
device node)) are uniquely identified via a (major, minor) tuple.

This is enough to uniquely identify a devnode, no matter what
API it implements.

So, before we go too far, let's mark the old v4l, fb, dvb and alsa
"devnode" info as deprecated, and just call it as "dev".

We can latter add fields specific to each API if needed.

As we don't want to break compilation on already existing apps,
let's just keep the old definitions as-is, adding a note that
those are deprecated at media-entity.h.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 drivers/media/v4l2-core/v4l2-dev.c    |  4 ++--
 drivers/media/v4l2-core/v4l2-device.c |  4 ++--
 include/media/media-entity.h          | 12 +-----------
 include/uapi/linux/media.h            | 15 +++++++++++++++
 4 files changed, 20 insertions(+), 15 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/media/v4l2-core/v4l2-dev.c b/drivers/media/v4l2-core/v4l2-dev.c
index 86bb93fd7db8..d89d5cb465d9 100644
--- a/drivers/media/v4l2-core/v4l2-dev.c
+++ b/drivers/media/v4l2-core/v4l2-dev.c
@@ -943,8 +943,8 @@ int __video_register_device(struct video_device *vdev, int type, int nr,
 	    vdev->vfl_type != VFL_TYPE_SUBDEV) {
 		vdev->entity.type = MEDIA_ENT_T_DEVNODE_V4L;
 		vdev->entity.name = vdev->name;
-		vdev->entity.info.v4l.major = VIDEO_MAJOR;
-		vdev->entity.info.v4l.minor = vdev->minor;
+		vdev->entity.info.dev.major = VIDEO_MAJOR;
+		vdev->entity.info.dev.minor = vdev->minor;
 		ret = media_device_register_entity(vdev->v4l2_dev->mdev,
 			&vdev->entity);
 		if (ret < 0)
diff --git a/drivers/media/v4l2-core/v4l2-device.c b/drivers/media/v4l2-core/v4l2-device.c
index 015f92aab44a..204cc67c84e8 100644
--- a/drivers/media/v4l2-core/v4l2-device.c
+++ b/drivers/media/v4l2-core/v4l2-device.c
@@ -248,8 +248,8 @@ int v4l2_device_register_subdev_nodes(struct v4l2_device *v4l2_dev)
 			goto clean_up;
 		}
 #if defined(CONFIG_MEDIA_CONTROLLER)
-		sd->entity.info.v4l.major = VIDEO_MAJOR;
-		sd->entity.info.v4l.minor = vdev->minor;
+		sd->entity.info.dev.major = VIDEO_MAJOR;
+		sd->entity.info.dev.minor = vdev->minor;
 #endif
 		sd->devnode = vdev;
 	}
diff --git a/include/media/media-entity.h b/include/media/media-entity.h
index e00459185d20..d6d74bcfe183 100644
--- a/include/media/media-entity.h
+++ b/include/media/media-entity.h
@@ -87,17 +87,7 @@ struct media_entity {
 		struct {
 			u32 major;
 			u32 minor;
-		} v4l;
-		struct {
-			u32 major;
-			u32 minor;
-		} fb;
-		struct {
-			u32 card;
-			u32 device;
-			u32 subdevice;
-		} alsa;
-		int dvb;
+		} dev;
 
 		/* Sub-device specifications */
 		/* Nothing needed yet */
diff --git a/include/uapi/linux/media.h b/include/uapi/linux/media.h
index d847c760e8f0..418f4fec391a 100644
--- a/include/uapi/linux/media.h
+++ b/include/uapi/linux/media.h
@@ -75,6 +75,20 @@ struct media_entity_desc {
 
 	union {
 		/* Node specifications */
+		struct {
+			__u32 major;
+			__u32 minor;
+		} dev;
+
+#if 1
+		/*
+		 * DEPRECATED: previous node specifications. Kept just to
+		 * avoid breaking compilation, but media_entity_desc.dev
+		 * should be used instead. In particular, alsa and dvb
+		 * fields below are wrong: for all devnodes, there should
+		 * be just major/minor inside the struct, as this is enough
+		 * to represent any devnode, no matter what type.
+		 */
 		struct {
 			__u32 major;
 			__u32 minor;
@@ -89,6 +103,7 @@ struct media_entity_desc {
 			__u32 subdevice;
 		} alsa;
 		int dvb;
+#endif
 
 		/* Sub-device specifications */
 		/* Nothing needed yet */
-- 
cgit 


From 1d20f9f6330c988505e51f5010656978fd70cd0c Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Sat, 3 Jan 2015 11:09:39 -0300
Subject: [media] media: add new types for DVB devnodes

Most of the DVB subdevs have already their own devnode.

Add support for them at the media controller API.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/media.h | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/media.h b/include/uapi/linux/media.h
index 418f4fec391a..4c8f26243252 100644
--- a/include/uapi/linux/media.h
+++ b/include/uapi/linux/media.h
@@ -50,7 +50,14 @@ struct media_device_info {
 #define MEDIA_ENT_T_DEVNODE_V4L		(MEDIA_ENT_T_DEVNODE + 1)
 #define MEDIA_ENT_T_DEVNODE_FB		(MEDIA_ENT_T_DEVNODE + 2)
 #define MEDIA_ENT_T_DEVNODE_ALSA	(MEDIA_ENT_T_DEVNODE + 3)
-#define MEDIA_ENT_T_DEVNODE_DVB		(MEDIA_ENT_T_DEVNODE + 4)
+#define MEDIA_ENT_T_DEVNODE_DVB_FE	(MEDIA_ENT_T_DEVNODE + 4)
+#define MEDIA_ENT_T_DEVNODE_DVB_DEMUX	(MEDIA_ENT_T_DEVNODE + 5)
+#define MEDIA_ENT_T_DEVNODE_DVB_DVR	(MEDIA_ENT_T_DEVNODE + 6)
+#define MEDIA_ENT_T_DEVNODE_DVB_CA	(MEDIA_ENT_T_DEVNODE + 7)
+#define MEDIA_ENT_T_DEVNODE_DVB_NET	(MEDIA_ENT_T_DEVNODE + 8)
+
+/* Legacy symbol. Use it to avoid userspace compilation breakages */
+#define MEDIA_ENT_T_DEVNODE_DVB		MEDIA_ENT_T_DEVNODE_DVB_FE
 
 #define MEDIA_ENT_T_V4L2_SUBDEV		(2 << MEDIA_ENT_TYPE_SHIFT)
 #define MEDIA_ENT_T_V4L2_SUBDEV_SENSOR	(MEDIA_ENT_T_V4L2_SUBDEV + 1)
-- 
cgit 


From 91b0f3a06e432d01c66cd05973628a2ec3bde9ed Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Mon, 26 Jan 2015 08:53:18 -0300
Subject: [media] media: add a subdev type for tuner

Add MEDIA_ENT_T_V4L2_SUBDEV_TUNER to represent the V4L2
(and dvb) tuner subdevices.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/media.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/media.h b/include/uapi/linux/media.h
index 4c8f26243252..52cc2a6b19b7 100644
--- a/include/uapi/linux/media.h
+++ b/include/uapi/linux/media.h
@@ -66,6 +66,8 @@ struct media_device_info {
 /* A converter of analogue video to its digital representation. */
 #define MEDIA_ENT_T_V4L2_SUBDEV_DECODER	(MEDIA_ENT_T_V4L2_SUBDEV + 4)
 
+#define MEDIA_ENT_T_V4L2_SUBDEV_TUNER	(MEDIA_ENT_T_V4L2_SUBDEV + 5)
+
 #define MEDIA_ENT_FL_DEFAULT		(1 << 0)
 
 struct media_entity_desc {
-- 
cgit 


From 293487c8ecc1103f4625cea5e90e1ba0cc89660f Mon Sep 17 00:00:00 2001
From: Daniel Baluta <daniel.baluta@intel.com>
Date: Tue, 10 Feb 2015 18:33:51 +0200
Subject: iio: Export userspace IIO headers

After UAPI header file split [1] all user-kernel interfaces were
placed under include/uapi/.

This patch moves IIO user specific API from:
	* include/linux/iio/events.h => include/uapi/linux/iio/events.h
	* include/linux/types.h => include/uapi/linux/types.h

Now there is no need for nasty tricks to compile userspace programs
(e.g iio_event_monitor). Just installing the kernel headers with
make headers_install command does the job.

[1] http://lwn.net/Articles/507794/

Signed-off-by: Daniel Baluta <daniel.baluta@intel.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 include/linux/iio/events.h      | 30 +-------------
 include/linux/iio/types.h       | 78 +---------------------------------
 include/uapi/linux/Kbuild       |  1 +
 include/uapi/linux/iio/Kbuild   |  3 ++
 include/uapi/linux/iio/events.h | 42 +++++++++++++++++++
 include/uapi/linux/iio/types.h  | 92 +++++++++++++++++++++++++++++++++++++++++
 6 files changed, 140 insertions(+), 106 deletions(-)
 create mode 100644 include/uapi/linux/iio/Kbuild
 create mode 100644 include/uapi/linux/iio/events.h
 create mode 100644 include/uapi/linux/iio/types.h

(limited to 'include/uapi/linux')

diff --git a/include/linux/iio/events.h b/include/linux/iio/events.h
index 03fa332ad2a8..8ad87d1c5340 100644
--- a/include/linux/iio/events.h
+++ b/include/linux/iio/events.h
@@ -9,22 +9,8 @@
 #ifndef _IIO_EVENTS_H_
 #define _IIO_EVENTS_H_
 
-#include <linux/ioctl.h>
-#include <linux/types.h>
 #include <linux/iio/types.h>
-
-/**
- * struct iio_event_data - The actual event being pushed to userspace
- * @id:		event identifier
- * @timestamp:	best estimate of time of event occurrence (often from
- *		the interrupt handler)
- */
-struct iio_event_data {
-	__u64	id;
-	__s64	timestamp;
-};
-
-#define IIO_GET_EVENT_FD_IOCTL _IOR('i', 0x90, int)
+#include <uapi/linux/iio/events.h>
 
 /**
  * IIO_EVENT_CODE() - create event identifier
@@ -70,18 +56,4 @@ struct iio_event_data {
 #define IIO_UNMOD_EVENT_CODE(chan_type, number, type, direction)	\
 	IIO_EVENT_CODE(chan_type, 0, 0, direction, type, number, 0, 0)
 
-#define IIO_EVENT_CODE_EXTRACT_TYPE(mask) ((mask >> 56) & 0xFF)
-
-#define IIO_EVENT_CODE_EXTRACT_DIR(mask) ((mask >> 48) & 0x7F)
-
-#define IIO_EVENT_CODE_EXTRACT_CHAN_TYPE(mask) ((mask >> 32) & 0xFF)
-
-/* Event code number extraction depends on which type of event we have.
- * Perhaps review this function in the future*/
-#define IIO_EVENT_CODE_EXTRACT_CHAN(mask) ((__s16)(mask & 0xFFFF))
-#define IIO_EVENT_CODE_EXTRACT_CHAN2(mask) ((__s16)(((mask) >> 16) & 0xFFFF))
-
-#define IIO_EVENT_CODE_EXTRACT_MODIFIER(mask) ((mask >> 40) & 0xFF)
-#define IIO_EVENT_CODE_EXTRACT_DIFF(mask) (((mask) >> 55) & 0x1)
-
 #endif
diff --git a/include/linux/iio/types.h b/include/linux/iio/types.h
index 580ed5bdb3fa..942b6de68e2f 100644
--- a/include/linux/iio/types.h
+++ b/include/linux/iio/types.h
@@ -10,76 +10,7 @@
 #ifndef _IIO_TYPES_H_
 #define _IIO_TYPES_H_
 
-enum iio_chan_type {
-	IIO_VOLTAGE,
-	IIO_CURRENT,
-	IIO_POWER,
-	IIO_ACCEL,
-	IIO_ANGL_VEL,
-	IIO_MAGN,
-	IIO_LIGHT,
-	IIO_INTENSITY,
-	IIO_PROXIMITY,
-	IIO_TEMP,
-	IIO_INCLI,
-	IIO_ROT,
-	IIO_ANGL,
-	IIO_TIMESTAMP,
-	IIO_CAPACITANCE,
-	IIO_ALTVOLTAGE,
-	IIO_CCT,
-	IIO_PRESSURE,
-	IIO_HUMIDITYRELATIVE,
-	IIO_ACTIVITY,
-	IIO_STEPS,
-	IIO_ENERGY,
-	IIO_DISTANCE,
-	IIO_VELOCITY,
-};
-
-enum iio_modifier {
-	IIO_NO_MOD,
-	IIO_MOD_X,
-	IIO_MOD_Y,
-	IIO_MOD_Z,
-	IIO_MOD_X_AND_Y,
-	IIO_MOD_X_AND_Z,
-	IIO_MOD_Y_AND_Z,
-	IIO_MOD_X_AND_Y_AND_Z,
-	IIO_MOD_X_OR_Y,
-	IIO_MOD_X_OR_Z,
-	IIO_MOD_Y_OR_Z,
-	IIO_MOD_X_OR_Y_OR_Z,
-	IIO_MOD_LIGHT_BOTH,
-	IIO_MOD_LIGHT_IR,
-	IIO_MOD_ROOT_SUM_SQUARED_X_Y,
-	IIO_MOD_SUM_SQUARED_X_Y_Z,
-	IIO_MOD_LIGHT_CLEAR,
-	IIO_MOD_LIGHT_RED,
-	IIO_MOD_LIGHT_GREEN,
-	IIO_MOD_LIGHT_BLUE,
-	IIO_MOD_QUATERNION,
-	IIO_MOD_TEMP_AMBIENT,
-	IIO_MOD_TEMP_OBJECT,
-	IIO_MOD_NORTH_MAGN,
-	IIO_MOD_NORTH_TRUE,
-	IIO_MOD_NORTH_MAGN_TILT_COMP,
-	IIO_MOD_NORTH_TRUE_TILT_COMP,
-	IIO_MOD_RUNNING,
-	IIO_MOD_JOGGING,
-	IIO_MOD_WALKING,
-	IIO_MOD_STILL,
-	IIO_MOD_ROOT_SUM_SQUARED_X_Y_Z,
-};
-
-enum iio_event_type {
-	IIO_EV_TYPE_THRESH,
-	IIO_EV_TYPE_MAG,
-	IIO_EV_TYPE_ROC,
-	IIO_EV_TYPE_THRESH_ADAPTIVE,
-	IIO_EV_TYPE_MAG_ADAPTIVE,
-	IIO_EV_TYPE_CHANGE,
-};
+#include <uapi/linux/iio/types.h>
 
 enum iio_event_info {
 	IIO_EV_INFO_ENABLE,
@@ -88,13 +19,6 @@ enum iio_event_info {
 	IIO_EV_INFO_PERIOD,
 };
 
-enum iio_event_direction {
-	IIO_EV_DIR_EITHER,
-	IIO_EV_DIR_RISING,
-	IIO_EV_DIR_FALLING,
-	IIO_EV_DIR_NONE,
-};
-
 #define IIO_VAL_INT 1
 #define IIO_VAL_INT_PLUS_MICRO 2
 #define IIO_VAL_INT_PLUS_NANO 3
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 00b100023c47..5bfc5bdc3c5d 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -6,6 +6,7 @@ header-y += caif/
 header-y += dvb/
 header-y += hdlc/
 header-y += hsi/
+header-y += iio/
 header-y += isdn/
 header-y += mmc/
 header-y += nfsd/
diff --git a/include/uapi/linux/iio/Kbuild b/include/uapi/linux/iio/Kbuild
new file mode 100644
index 000000000000..86f76d84c44f
--- /dev/null
+++ b/include/uapi/linux/iio/Kbuild
@@ -0,0 +1,3 @@
+# UAPI Header export list
+header-y += events.h
+header-y += types.h
diff --git a/include/uapi/linux/iio/events.h b/include/uapi/linux/iio/events.h
new file mode 100644
index 000000000000..00bbdaed2f97
--- /dev/null
+++ b/include/uapi/linux/iio/events.h
@@ -0,0 +1,42 @@
+/* The industrial I/O - event passing to userspace
+ *
+ * Copyright (c) 2008-2011 Jonathan Cameron
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+#ifndef _UAPI_IIO_EVENTS_H_
+#define _UAPI_IIO_EVENTS_H_
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+/**
+ * struct iio_event_data - The actual event being pushed to userspace
+ * @id:		event identifier
+ * @timestamp:	best estimate of time of event occurrence (often from
+ *		the interrupt handler)
+ */
+struct iio_event_data {
+	__u64	id;
+	__s64	timestamp;
+};
+
+#define IIO_GET_EVENT_FD_IOCTL _IOR('i', 0x90, int)
+
+#define IIO_EVENT_CODE_EXTRACT_TYPE(mask) ((mask >> 56) & 0xFF)
+
+#define IIO_EVENT_CODE_EXTRACT_DIR(mask) ((mask >> 48) & 0x7F)
+
+#define IIO_EVENT_CODE_EXTRACT_CHAN_TYPE(mask) ((mask >> 32) & 0xFF)
+
+/* Event code number extraction depends on which type of event we have.
+ * Perhaps review this function in the future*/
+#define IIO_EVENT_CODE_EXTRACT_CHAN(mask) ((__s16)(mask & 0xFFFF))
+#define IIO_EVENT_CODE_EXTRACT_CHAN2(mask) ((__s16)(((mask) >> 16) & 0xFFFF))
+
+#define IIO_EVENT_CODE_EXTRACT_MODIFIER(mask) ((mask >> 40) & 0xFF)
+#define IIO_EVENT_CODE_EXTRACT_DIFF(mask) (((mask) >> 55) & 0x1)
+
+#endif /* _UAPI_IIO_EVENTS_H_ */
diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h
new file mode 100644
index 000000000000..5c4601935005
--- /dev/null
+++ b/include/uapi/linux/iio/types.h
@@ -0,0 +1,92 @@
+/* industrial I/O data types needed both in and out of kernel
+ *
+ * Copyright (c) 2008 Jonathan Cameron
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#ifndef _UAPI_IIO_TYPES_H_
+#define _UAPI_IIO_TYPES_H_
+
+enum iio_chan_type {
+	IIO_VOLTAGE,
+	IIO_CURRENT,
+	IIO_POWER,
+	IIO_ACCEL,
+	IIO_ANGL_VEL,
+	IIO_MAGN,
+	IIO_LIGHT,
+	IIO_INTENSITY,
+	IIO_PROXIMITY,
+	IIO_TEMP,
+	IIO_INCLI,
+	IIO_ROT,
+	IIO_ANGL,
+	IIO_TIMESTAMP,
+	IIO_CAPACITANCE,
+	IIO_ALTVOLTAGE,
+	IIO_CCT,
+	IIO_PRESSURE,
+	IIO_HUMIDITYRELATIVE,
+	IIO_ACTIVITY,
+	IIO_STEPS,
+	IIO_ENERGY,
+	IIO_DISTANCE,
+	IIO_VELOCITY,
+};
+
+enum iio_modifier {
+	IIO_NO_MOD,
+	IIO_MOD_X,
+	IIO_MOD_Y,
+	IIO_MOD_Z,
+	IIO_MOD_X_AND_Y,
+	IIO_MOD_X_AND_Z,
+	IIO_MOD_Y_AND_Z,
+	IIO_MOD_X_AND_Y_AND_Z,
+	IIO_MOD_X_OR_Y,
+	IIO_MOD_X_OR_Z,
+	IIO_MOD_Y_OR_Z,
+	IIO_MOD_X_OR_Y_OR_Z,
+	IIO_MOD_LIGHT_BOTH,
+	IIO_MOD_LIGHT_IR,
+	IIO_MOD_ROOT_SUM_SQUARED_X_Y,
+	IIO_MOD_SUM_SQUARED_X_Y_Z,
+	IIO_MOD_LIGHT_CLEAR,
+	IIO_MOD_LIGHT_RED,
+	IIO_MOD_LIGHT_GREEN,
+	IIO_MOD_LIGHT_BLUE,
+	IIO_MOD_QUATERNION,
+	IIO_MOD_TEMP_AMBIENT,
+	IIO_MOD_TEMP_OBJECT,
+	IIO_MOD_NORTH_MAGN,
+	IIO_MOD_NORTH_TRUE,
+	IIO_MOD_NORTH_MAGN_TILT_COMP,
+	IIO_MOD_NORTH_TRUE_TILT_COMP,
+	IIO_MOD_RUNNING,
+	IIO_MOD_JOGGING,
+	IIO_MOD_WALKING,
+	IIO_MOD_STILL,
+	IIO_MOD_ROOT_SUM_SQUARED_X_Y_Z,
+};
+
+enum iio_event_type {
+	IIO_EV_TYPE_THRESH,
+	IIO_EV_TYPE_MAG,
+	IIO_EV_TYPE_ROC,
+	IIO_EV_TYPE_THRESH_ADAPTIVE,
+	IIO_EV_TYPE_MAG_ADAPTIVE,
+	IIO_EV_TYPE_CHANGE,
+};
+
+enum iio_event_direction {
+	IIO_EV_DIR_EITHER,
+	IIO_EV_DIR_RISING,
+	IIO_EV_DIR_FALLING,
+	IIO_EV_DIR_NONE,
+};
+
+#endif /* _UAPI_IIO_TYPES_H_ */
+
-- 
cgit 


From 27ac905b8f88d28779b0661809286b5ba2817d37 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Tue, 4 Nov 2014 21:55:57 -0500
Subject: perf/x86/intel: Reduce lbr_sel_map[] size

The index of lbr_sel_map is bit value of perf branch_sample_type.
PERF_SAMPLE_BRANCH_MAX is 1024 at present, so each lbr_sel_map uses
4096 bytes. By using bit shift as index, we can reduce lbr_sel_map
size to 40 bytes. This patch defines 'bit shift' for branch types,
and use 'bit shift' to define lbr_sel_maps.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Stephane Eranian <eranian@google.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: jolsa@redhat.com
Cc: linux-api@vger.kernel.org
Link: http://lkml.kernel.org/r/1415156173-10035-2-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event.h           |  4 +++
 arch/x86/kernel/cpu/perf_event_intel_lbr.c | 54 ++++++++++++++----------------
 include/uapi/linux/perf_event.h            | 49 +++++++++++++++++++--------
 3 files changed, 64 insertions(+), 43 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index df525d2be1e8..0c45b22495dc 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -515,6 +515,10 @@ struct x86_pmu {
 	struct perf_guest_switch_msr *(*guest_get_msrs)(int *nr);
 };
 
+enum {
+	PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE = PERF_SAMPLE_BRANCH_MAX_SHIFT,
+};
+
 #define x86_add_quirk(func_)						\
 do {									\
 	static struct x86_pmu_quirk __quirk __initdata = {		\
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 58f1a94beaf0..8bc078f43a82 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -69,10 +69,6 @@ static enum {
 #define LBR_FROM_FLAG_IN_TX    (1ULL << 62)
 #define LBR_FROM_FLAG_ABORT    (1ULL << 61)
 
-#define for_each_branch_sample_type(x) \
-	for ((x) = PERF_SAMPLE_BRANCH_USER; \
-	     (x) < PERF_SAMPLE_BRANCH_MAX; (x) <<= 1)
-
 /*
  * x86control flow change classification
  * x86control flow changes include branches, interrupts, traps, faults
@@ -403,14 +399,14 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
 {
 	struct hw_perf_event_extra *reg;
 	u64 br_type = event->attr.branch_sample_type;
-	u64 mask = 0, m;
-	u64 v;
+	u64 mask = 0, v;
+	int i;
 
-	for_each_branch_sample_type(m) {
-		if (!(br_type & m))
+	for (i = 0; i < PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE; i++) {
+		if (!(br_type & (1ULL << i)))
 			continue;
 
-		v = x86_pmu.lbr_sel_map[m];
+		v = x86_pmu.lbr_sel_map[i];
 		if (v == LBR_NOT_SUPP)
 			return -EOPNOTSUPP;
 
@@ -678,35 +674,35 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
 /*
  * Map interface branch filters onto LBR filters
  */
-static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
-	[PERF_SAMPLE_BRANCH_ANY]	= LBR_ANY,
-	[PERF_SAMPLE_BRANCH_USER]	= LBR_USER,
-	[PERF_SAMPLE_BRANCH_KERNEL]	= LBR_KERNEL,
-	[PERF_SAMPLE_BRANCH_HV]		= LBR_IGN,
-	[PERF_SAMPLE_BRANCH_ANY_RETURN]	= LBR_RETURN | LBR_REL_JMP
-					| LBR_IND_JMP | LBR_FAR,
+static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE] = {
+	[PERF_SAMPLE_BRANCH_ANY_SHIFT]		= LBR_ANY,
+	[PERF_SAMPLE_BRANCH_USER_SHIFT]		= LBR_USER,
+	[PERF_SAMPLE_BRANCH_KERNEL_SHIFT]	= LBR_KERNEL,
+	[PERF_SAMPLE_BRANCH_HV_SHIFT]		= LBR_IGN,
+	[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]	= LBR_RETURN | LBR_REL_JMP
+						| LBR_IND_JMP | LBR_FAR,
 	/*
 	 * NHM/WSM erratum: must include REL_JMP+IND_JMP to get CALL branches
 	 */
-	[PERF_SAMPLE_BRANCH_ANY_CALL] =
+	[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT] =
 	 LBR_REL_CALL | LBR_IND_CALL | LBR_REL_JMP | LBR_IND_JMP | LBR_FAR,
 	/*
 	 * NHM/WSM erratum: must include IND_JMP to capture IND_CALL
 	 */
-	[PERF_SAMPLE_BRANCH_IND_CALL] = LBR_IND_CALL | LBR_IND_JMP,
-	[PERF_SAMPLE_BRANCH_COND]     = LBR_JCC,
+	[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP,
+	[PERF_SAMPLE_BRANCH_COND_SHIFT]     = LBR_JCC,
 };
 
-static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX] = {
-	[PERF_SAMPLE_BRANCH_ANY]	= LBR_ANY,
-	[PERF_SAMPLE_BRANCH_USER]	= LBR_USER,
-	[PERF_SAMPLE_BRANCH_KERNEL]	= LBR_KERNEL,
-	[PERF_SAMPLE_BRANCH_HV]		= LBR_IGN,
-	[PERF_SAMPLE_BRANCH_ANY_RETURN]	= LBR_RETURN | LBR_FAR,
-	[PERF_SAMPLE_BRANCH_ANY_CALL]	= LBR_REL_CALL | LBR_IND_CALL
-					| LBR_FAR,
-	[PERF_SAMPLE_BRANCH_IND_CALL]	= LBR_IND_CALL,
-	[PERF_SAMPLE_BRANCH_COND]       = LBR_JCC,
+static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE] = {
+	[PERF_SAMPLE_BRANCH_ANY_SHIFT]		= LBR_ANY,
+	[PERF_SAMPLE_BRANCH_USER_SHIFT]		= LBR_USER,
+	[PERF_SAMPLE_BRANCH_KERNEL_SHIFT]	= LBR_KERNEL,
+	[PERF_SAMPLE_BRANCH_HV_SHIFT]		= LBR_IGN,
+	[PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT]	= LBR_RETURN | LBR_FAR,
+	[PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT]	= LBR_REL_CALL | LBR_IND_CALL
+						| LBR_FAR,
+	[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]	= LBR_IND_CALL,
+	[PERF_SAMPLE_BRANCH_COND_SHIFT]		= LBR_JCC,
 };
 
 /* core */
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 9b79abbd1ab8..e46b93279e3d 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -152,21 +152,42 @@ enum perf_event_sample_format {
  * The branch types can be combined, however BRANCH_ANY covers all types
  * of branches and therefore it supersedes all the other types.
  */
+enum perf_branch_sample_type_shift {
+	PERF_SAMPLE_BRANCH_USER_SHIFT		= 0, /* user branches */
+	PERF_SAMPLE_BRANCH_KERNEL_SHIFT		= 1, /* kernel branches */
+	PERF_SAMPLE_BRANCH_HV_SHIFT		= 2, /* hypervisor branches */
+
+	PERF_SAMPLE_BRANCH_ANY_SHIFT		= 3, /* any branch types */
+	PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT	= 4, /* any call branch */
+	PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT	= 5, /* any return branch */
+	PERF_SAMPLE_BRANCH_IND_CALL_SHIFT	= 6, /* indirect calls */
+	PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT	= 7, /* transaction aborts */
+	PERF_SAMPLE_BRANCH_IN_TX_SHIFT		= 8, /* in transaction */
+	PERF_SAMPLE_BRANCH_NO_TX_SHIFT		= 9, /* not in transaction */
+	PERF_SAMPLE_BRANCH_COND_SHIFT		= 10, /* conditional branches */
+
+	PERF_SAMPLE_BRANCH_MAX_SHIFT		/* non-ABI */
+};
+
 enum perf_branch_sample_type {
-	PERF_SAMPLE_BRANCH_USER		= 1U << 0, /* user branches */
-	PERF_SAMPLE_BRANCH_KERNEL	= 1U << 1, /* kernel branches */
-	PERF_SAMPLE_BRANCH_HV		= 1U << 2, /* hypervisor branches */
-
-	PERF_SAMPLE_BRANCH_ANY		= 1U << 3, /* any branch types */
-	PERF_SAMPLE_BRANCH_ANY_CALL	= 1U << 4, /* any call branch */
-	PERF_SAMPLE_BRANCH_ANY_RETURN	= 1U << 5, /* any return branch */
-	PERF_SAMPLE_BRANCH_IND_CALL	= 1U << 6, /* indirect calls */
-	PERF_SAMPLE_BRANCH_ABORT_TX	= 1U << 7, /* transaction aborts */
-	PERF_SAMPLE_BRANCH_IN_TX	= 1U << 8, /* in transaction */
-	PERF_SAMPLE_BRANCH_NO_TX	= 1U << 9, /* not in transaction */
-	PERF_SAMPLE_BRANCH_COND		= 1U << 10, /* conditional branches */
-
-	PERF_SAMPLE_BRANCH_MAX		= 1U << 11, /* non-ABI */
+	PERF_SAMPLE_BRANCH_USER		= 1U << PERF_SAMPLE_BRANCH_USER_SHIFT,
+	PERF_SAMPLE_BRANCH_KERNEL	= 1U << PERF_SAMPLE_BRANCH_KERNEL_SHIFT,
+	PERF_SAMPLE_BRANCH_HV		= 1U << PERF_SAMPLE_BRANCH_HV_SHIFT,
+
+	PERF_SAMPLE_BRANCH_ANY		= 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT,
+	PERF_SAMPLE_BRANCH_ANY_CALL	=
+				1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT,
+	PERF_SAMPLE_BRANCH_ANY_RETURN	=
+				1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT,
+	PERF_SAMPLE_BRANCH_IND_CALL	=
+				1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT,
+	PERF_SAMPLE_BRANCH_ABORT_TX	=
+				1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT,
+	PERF_SAMPLE_BRANCH_IN_TX	= 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT,
+	PERF_SAMPLE_BRANCH_NO_TX	= 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT,
+	PERF_SAMPLE_BRANCH_COND		= 1U << PERF_SAMPLE_BRANCH_COND_SHIFT,
+
+	PERF_SAMPLE_BRANCH_MAX		= 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
 };
 
 #define PERF_SAMPLE_BRANCH_PLM_ALL \
-- 
cgit 


From 2c44b1936bb3b135a3fac8b3493394d42e51cf70 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 5 Nov 2014 10:36:45 +0100
Subject: perf/x86/intel: Expose LBR callstack to user space tooling

With LBR call stack feature enable, there are three callchain options.
Enable the 3rd callchain option (LBR callstack) to user space tooling.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Cc: linux-api@vger.kernel.org
Link: http://lkml.kernel.org/r/20141105093759.GQ10501@worktop.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event.h           |  8 --------
 arch/x86/kernel/cpu/perf_event_intel_lbr.c |  8 ++++----
 include/uapi/linux/perf_event.h            | 16 ++++++++--------
 3 files changed, 12 insertions(+), 20 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 69c26b396cf4..a371d27d6795 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -523,14 +523,6 @@ struct x86_perf_task_context {
 	int lbr_stack_state;
 };
 
-enum {
-	PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT = PERF_SAMPLE_BRANCH_MAX_SHIFT,
-	PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE,
-
-	PERF_SAMPLE_BRANCH_CALL_STACK =
-				1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT,
-};
-
 #define x86_add_quirk(func_)						\
 do {									\
 	static struct x86_pmu_quirk __quirk __initdata = {		\
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 084f2eb20c8b..0473874109cb 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -537,7 +537,7 @@ static int intel_pmu_setup_hw_lbr_filter(struct perf_event *event)
 	u64 mask = 0, v;
 	int i;
 
-	for (i = 0; i < PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE; i++) {
+	for (i = 0; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
 		if (!(br_type & (1ULL << i)))
 			continue;
 
@@ -821,7 +821,7 @@ intel_pmu_lbr_filter(struct cpu_hw_events *cpuc)
 /*
  * Map interface branch filters onto LBR filters
  */
-static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE] = {
+static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
 	[PERF_SAMPLE_BRANCH_ANY_SHIFT]		= LBR_ANY,
 	[PERF_SAMPLE_BRANCH_USER_SHIFT]		= LBR_USER,
 	[PERF_SAMPLE_BRANCH_KERNEL_SHIFT]	= LBR_KERNEL,
@@ -840,7 +840,7 @@ static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE] = {
 	[PERF_SAMPLE_BRANCH_COND_SHIFT]     = LBR_JCC,
 };
 
-static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE] = {
+static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
 	[PERF_SAMPLE_BRANCH_ANY_SHIFT]		= LBR_ANY,
 	[PERF_SAMPLE_BRANCH_USER_SHIFT]		= LBR_USER,
 	[PERF_SAMPLE_BRANCH_KERNEL_SHIFT]	= LBR_KERNEL,
@@ -852,7 +852,7 @@ static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE] = {
 	[PERF_SAMPLE_BRANCH_COND_SHIFT]		= LBR_JCC,
 };
 
-static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_SELECT_MAP_SIZE] = {
+static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
 	[PERF_SAMPLE_BRANCH_ANY_SHIFT]		= LBR_ANY,
 	[PERF_SAMPLE_BRANCH_USER_SHIFT]		= LBR_USER,
 	[PERF_SAMPLE_BRANCH_KERNEL_SHIFT]	= LBR_KERNEL,
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index e46b93279e3d..1e3cd07cf76e 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -166,6 +166,8 @@ enum perf_branch_sample_type_shift {
 	PERF_SAMPLE_BRANCH_NO_TX_SHIFT		= 9, /* not in transaction */
 	PERF_SAMPLE_BRANCH_COND_SHIFT		= 10, /* conditional branches */
 
+	PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT	= 11, /* call/ret stack */
+
 	PERF_SAMPLE_BRANCH_MAX_SHIFT		/* non-ABI */
 };
 
@@ -175,18 +177,16 @@ enum perf_branch_sample_type {
 	PERF_SAMPLE_BRANCH_HV		= 1U << PERF_SAMPLE_BRANCH_HV_SHIFT,
 
 	PERF_SAMPLE_BRANCH_ANY		= 1U << PERF_SAMPLE_BRANCH_ANY_SHIFT,
-	PERF_SAMPLE_BRANCH_ANY_CALL	=
-				1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT,
-	PERF_SAMPLE_BRANCH_ANY_RETURN	=
-				1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT,
-	PERF_SAMPLE_BRANCH_IND_CALL	=
-				1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT,
-	PERF_SAMPLE_BRANCH_ABORT_TX	=
-				1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT,
+	PERF_SAMPLE_BRANCH_ANY_CALL	= 1U << PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT,
+	PERF_SAMPLE_BRANCH_ANY_RETURN	= 1U << PERF_SAMPLE_BRANCH_ANY_RETURN_SHIFT,
+	PERF_SAMPLE_BRANCH_IND_CALL	= 1U << PERF_SAMPLE_BRANCH_IND_CALL_SHIFT,
+	PERF_SAMPLE_BRANCH_ABORT_TX	= 1U << PERF_SAMPLE_BRANCH_ABORT_TX_SHIFT,
 	PERF_SAMPLE_BRANCH_IN_TX	= 1U << PERF_SAMPLE_BRANCH_IN_TX_SHIFT,
 	PERF_SAMPLE_BRANCH_NO_TX	= 1U << PERF_SAMPLE_BRANCH_NO_TX_SHIFT,
 	PERF_SAMPLE_BRANCH_COND		= 1U << PERF_SAMPLE_BRANCH_COND_SHIFT,
 
+	PERF_SAMPLE_BRANCH_CALL_STACK	= 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT,
+
 	PERF_SAMPLE_BRANCH_MAX		= 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
 };
 
-- 
cgit 


From ca8895d9bb41e743271c42a4438a296de891b73b Mon Sep 17 00:00:00 2001
From: Goldwyn Rodrigues <rgoldwyn@suse.com>
Date: Wed, 26 Nov 2014 12:22:03 -0600
Subject: Return MD_SB_CLUSTERED if mddev is clustered

Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
 drivers/md/md.c                | 3 +++
 include/uapi/linux/raid/md_p.h | 1 +
 2 files changed, 4 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 3387f940140b..5ed57688e5c5 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5634,6 +5634,9 @@ static int get_array_info(struct mddev *mddev, void __user *arg)
 		info.state = (1<<MD_SB_CLEAN);
 	if (mddev->bitmap && mddev->bitmap_info.offset)
 		info.state |= (1<<MD_SB_BITMAP_PRESENT);
+	if (mddev_is_clustered(mddev))
+		info.state |= (1<<MD_SB_CLUSTERED);
+
 	info.active_disks  = insync;
 	info.working_disks = working;
 	info.failed_disks  = failed;
diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h
index 49f4210d4394..643489d33e68 100644
--- a/include/uapi/linux/raid/md_p.h
+++ b/include/uapi/linux/raid/md_p.h
@@ -101,6 +101,7 @@ typedef struct mdp_device_descriptor_s {
 #define MD_SB_CLEAN		0
 #define MD_SB_ERRORS		1
 
+#define	MD_SB_CLUSTERED		5 /* MD is clustered */
 #define	MD_SB_BITMAP_PRESENT	8 /* bitmap may be present nearby */
 
 /*
-- 
cgit 


From 1aee41f637694d4bbf91c24195f2b63e3f6badd2 Mon Sep 17 00:00:00 2001
From: Goldwyn Rodrigues <rgoldwyn@suse.com>
Date: Wed, 29 Oct 2014 18:51:31 -0500
Subject: Add new disk to clustered array

Algorithm:
1. Node 1 issues mdadm --manage /dev/mdX --add /dev/sdYY which issues
   ioctl(ADD_NEW_DISC with disc.state set to MD_DISK_CLUSTER_ADD)
2. Node 1 sends NEWDISK with uuid and slot number
3. Other nodes issue kobject_uevent_env with uuid and slot number
(Steps 4,5 could be a udev rule)
4. In userspace, the node searches for the disk, perhaps
   using blkid -t SUB_UUID=""
5. Other nodes issue either of the following depending on whether the disk
   was found:
   ioctl(ADD_NEW_DISK with disc.state set to MD_DISK_CANDIDATE and
	 disc.number set to slot number)
   ioctl(CLUSTERED_DISK_NACK)
6. Other nodes drop lock on no-new-devs (CR) if device is found
7. Node 1 attempts EX lock on no-new-devs
8. If node 1 gets the lock, it sends METADATA_UPDATED after unmarking the disk
   as SpareLocal
9. If not (get no-new-dev lock), it fails the operation and sends METADATA_UPDATED
10. Other nodes understand if the device is added or not by reading the superblock again after receiving the METADATA_UPDATED message.

Signed-off-by: Lidong Zhong <lzhong@suse.com>
Signed-off-by: Goldwyn Rodrigues <rgoldwyn@suse.com>
---
 drivers/md/md-cluster.c        | 104 ++++++++++++++++++++++++++++++++++++++++-
 drivers/md/md-cluster.h        |   4 ++
 drivers/md/md.c                |  52 +++++++++++++++++++--
 drivers/md/md.h                |   5 ++
 drivers/md/raid1.c             |   1 +
 include/uapi/linux/raid/md_p.h |   6 +++
 include/uapi/linux/raid/md_u.h |   1 +
 7 files changed, 169 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c
index d85a6ca4443e..03e521a9ca7d 100644
--- a/drivers/md/md-cluster.c
+++ b/drivers/md/md-cluster.c
@@ -12,11 +12,13 @@
 #include <linux/module.h>
 #include <linux/dlm.h>
 #include <linux/sched.h>
+#include <linux/raid/md_p.h>
 #include "md.h"
 #include "bitmap.h"
 #include "md-cluster.h"
 
 #define LVB_SIZE	64
+#define NEW_DEV_TIMEOUT 5000
 
 struct dlm_lock_resource {
 	dlm_lockspace_t *ls;
@@ -56,19 +58,25 @@ struct md_cluster_info {
 	struct dlm_lock_resource *ack_lockres;
 	struct dlm_lock_resource *message_lockres;
 	struct dlm_lock_resource *token_lockres;
+	struct dlm_lock_resource *no_new_dev_lockres;
 	struct md_thread *recv_thread;
+	struct completion newdisk_completion;
 };
 
 enum msg_type {
 	METADATA_UPDATED = 0,
 	RESYNCING,
+	NEWDISK,
 };
 
 struct cluster_msg {
 	int type;
 	int slot;
+	/* TODO: Unionize this for smaller footprint */
 	sector_t low;
 	sector_t high;
+	char uuid[16];
+	int raid_slot;
 };
 
 static void sync_ast(void *arg)
@@ -358,13 +366,41 @@ static void process_suspend_info(struct md_cluster_info *cinfo,
 	spin_unlock_irq(&cinfo->suspend_lock);
 }
 
+static void process_add_new_disk(struct mddev *mddev, struct cluster_msg *cmsg)
+{
+	char disk_uuid[64];
+	struct md_cluster_info *cinfo = mddev->cluster_info;
+	char event_name[] = "EVENT=ADD_DEVICE";
+	char raid_slot[16];
+	char *envp[] = {event_name, disk_uuid, raid_slot, NULL};
+	int len;
+
+	len = snprintf(disk_uuid, 64, "DEVICE_UUID=");
+	pretty_uuid(disk_uuid + len, cmsg->uuid);
+	snprintf(raid_slot, 16, "RAID_DISK=%d", cmsg->raid_slot);
+	pr_info("%s:%d Sending kobject change with %s and %s\n", __func__, __LINE__, disk_uuid, raid_slot);
+	init_completion(&cinfo->newdisk_completion);
+	kobject_uevent_env(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE, envp);
+	wait_for_completion_timeout(&cinfo->newdisk_completion,
+			NEW_DEV_TIMEOUT);
+}
+
+
+static void process_metadata_update(struct mddev *mddev, struct cluster_msg *msg)
+{
+	struct md_cluster_info *cinfo = mddev->cluster_info;
+
+	md_reload_sb(mddev);
+	dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR);
+}
+
 static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
 {
 	switch (msg->type) {
 	case METADATA_UPDATED:
 		pr_info("%s: %d Received message: METADATA_UPDATE from %d\n",
 			__func__, __LINE__, msg->slot);
-		md_reload_sb(mddev);
+		process_metadata_update(mddev, msg);
 		break;
 	case RESYNCING:
 		pr_info("%s: %d Received message: RESYNCING from %d\n",
@@ -372,6 +408,10 @@ static void process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg)
 		process_suspend_info(mddev->cluster_info, msg->slot,
 				msg->low, msg->high);
 		break;
+	case NEWDISK:
+		pr_info("%s: %d Received message: NEWDISK from %d\n",
+			__func__, __LINE__, msg->slot);
+		process_add_new_disk(mddev, msg);
 	};
 }
 
@@ -593,10 +633,18 @@ static int join(struct mddev *mddev, int nodes)
 	cinfo->ack_lockres = lockres_init(mddev, "ack", ack_bast, 0);
 	if (!cinfo->ack_lockres)
 		goto err;
+	cinfo->no_new_dev_lockres = lockres_init(mddev, "no-new-dev", NULL, 0);
+	if (!cinfo->no_new_dev_lockres)
+		goto err;
+
 	/* get sync CR lock on ACK. */
 	if (dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_CR))
 		pr_err("md-cluster: failed to get a sync CR lock on ACK!(%d)\n",
 				ret);
+	/* get sync CR lock on no-new-dev. */
+	if (dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR))
+		pr_err("md-cluster: failed to get a sync CR lock on no-new-dev!(%d)\n", ret);
+
 
 	pr_info("md-cluster: Joined cluster %s slot %d\n", str, cinfo->slot_number);
 	snprintf(str, 64, "bitmap%04d", cinfo->slot_number - 1);
@@ -621,6 +669,7 @@ err:
 	lockres_free(cinfo->message_lockres);
 	lockres_free(cinfo->token_lockres);
 	lockres_free(cinfo->ack_lockres);
+	lockres_free(cinfo->no_new_dev_lockres);
 	lockres_free(cinfo->bitmap_lockres);
 	lockres_free(cinfo->sb_lock);
 	if (cinfo->lockspace)
@@ -642,6 +691,7 @@ static int leave(struct mddev *mddev)
 	lockres_free(cinfo->message_lockres);
 	lockres_free(cinfo->token_lockres);
 	lockres_free(cinfo->ack_lockres);
+	lockres_free(cinfo->no_new_dev_lockres);
 	lockres_free(cinfo->sb_lock);
 	lockres_free(cinfo->bitmap_lockres);
 	dlm_release_lockspace(cinfo->lockspace, 2);
@@ -742,6 +792,55 @@ out:
 	return ret;
 }
 
+static int add_new_disk_start(struct mddev *mddev, struct md_rdev *rdev)
+{
+	struct md_cluster_info *cinfo = mddev->cluster_info;
+	struct cluster_msg cmsg;
+	int ret = 0;
+	struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
+	char *uuid = sb->device_uuid;
+
+	memset(&cmsg, 0, sizeof(cmsg));
+	cmsg.type = cpu_to_le32(NEWDISK);
+	memcpy(cmsg.uuid, uuid, 16);
+	cmsg.raid_slot = rdev->desc_nr;
+	lock_comm(cinfo);
+	ret = __sendmsg(cinfo, &cmsg);
+	if (ret)
+		return ret;
+	cinfo->no_new_dev_lockres->flags |= DLM_LKF_NOQUEUE;
+	ret = dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_EX);
+	cinfo->no_new_dev_lockres->flags &= ~DLM_LKF_NOQUEUE;
+	/* Some node does not "see" the device */
+	if (ret == -EAGAIN)
+		ret = -ENOENT;
+	else
+		dlm_lock_sync(cinfo->no_new_dev_lockres, DLM_LOCK_CR);
+	return ret;
+}
+
+static int add_new_disk_finish(struct mddev *mddev)
+{
+	struct cluster_msg cmsg;
+	struct md_cluster_info *cinfo = mddev->cluster_info;
+	int ret;
+	/* Write sb and inform others */
+	md_update_sb(mddev, 1);
+	cmsg.type = METADATA_UPDATED;
+	ret = __sendmsg(cinfo, &cmsg);
+	unlock_comm(cinfo);
+	return ret;
+}
+
+static void new_disk_ack(struct mddev *mddev, bool ack)
+{
+	struct md_cluster_info *cinfo = mddev->cluster_info;
+
+	if (ack)
+		dlm_unlock_sync(cinfo->no_new_dev_lockres);
+	complete(&cinfo->newdisk_completion);
+}
+
 static struct md_cluster_operations cluster_ops = {
 	.join   = join,
 	.leave  = leave,
@@ -753,6 +852,9 @@ static struct md_cluster_operations cluster_ops = {
 	.metadata_update_finish = metadata_update_finish,
 	.metadata_update_cancel = metadata_update_cancel,
 	.area_resyncing = area_resyncing,
+	.add_new_disk_start = add_new_disk_start,
+	.add_new_disk_finish = add_new_disk_finish,
+	.new_disk_ack = new_disk_ack,
 };
 
 static int __init cluster_init(void)
diff --git a/drivers/md/md-cluster.h b/drivers/md/md-cluster.h
index 03785402afaa..60d7e58964f5 100644
--- a/drivers/md/md-cluster.h
+++ b/drivers/md/md-cluster.h
@@ -6,6 +6,7 @@
 #include "md.h"
 
 struct mddev;
+struct md_rdev;
 
 struct md_cluster_operations {
 	int (*join)(struct mddev *mddev, int nodes);
@@ -18,6 +19,9 @@ struct md_cluster_operations {
 	int (*metadata_update_finish)(struct mddev *mddev);
 	int (*metadata_update_cancel)(struct mddev *mddev);
 	int (*area_resyncing)(struct mddev *mddev, sector_t lo, sector_t hi);
+	int (*add_new_disk_start)(struct mddev *mddev, struct md_rdev *rdev);
+	int (*add_new_disk_finish)(struct mddev *mddev);
+	void (*new_disk_ack)(struct mddev *mddev, bool ack);
 };
 
 #endif /* _MD_CLUSTER_H */
diff --git a/drivers/md/md.c b/drivers/md/md.c
index fe0484648de4..5703c2e89f3a 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2210,7 +2210,7 @@ static void sync_sbs(struct mddev *mddev, int nospares)
 	}
 }
 
-static void md_update_sb(struct mddev *mddev, int force_change)
+void md_update_sb(struct mddev *mddev, int force_change)
 {
 	struct md_rdev *rdev;
 	int sync_req;
@@ -2371,6 +2371,7 @@ repeat:
 		wake_up(&rdev->blocked_wait);
 	}
 }
+EXPORT_SYMBOL(md_update_sb);
 
 /* words written to sysfs files may, or may not, be \n terminated.
  * We want to accept with case. For this we use cmd_match.
@@ -3151,7 +3152,7 @@ static void analyze_sbs(struct mddev *mddev)
 			kick_rdev_from_array(rdev);
 			continue;
 		}
-		if (rdev != freshest)
+		if (rdev != freshest) {
 			if (super_types[mddev->major_version].
 			    validate_super(mddev, rdev)) {
 				printk(KERN_WARNING "md: kicking non-fresh %s"
@@ -3160,6 +3161,15 @@ static void analyze_sbs(struct mddev *mddev)
 				kick_rdev_from_array(rdev);
 				continue;
 			}
+			/* No device should have a Candidate flag
+			 * when reading devices
+			 */
+			if (test_bit(Candidate, &rdev->flags)) {
+				pr_info("md: kicking Cluster Candidate %s from array!\n",
+					bdevname(rdev->bdev, b));
+				kick_rdev_from_array(rdev);
+			}
+		}
 		if (mddev->level == LEVEL_MULTIPATH) {
 			rdev->desc_nr = i++;
 			rdev->raid_disk = rdev->desc_nr;
@@ -5655,7 +5665,6 @@ static int get_array_info(struct mddev *mddev, void __user *arg)
 		info.state |= (1<<MD_SB_BITMAP_PRESENT);
 	if (mddev_is_clustered(mddev))
 		info.state |= (1<<MD_SB_CLUSTERED);
-
 	info.active_disks  = insync;
 	info.working_disks = working;
 	info.failed_disks  = failed;
@@ -5744,6 +5753,13 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
 	struct md_rdev *rdev;
 	dev_t dev = MKDEV(info->major,info->minor);
 
+	if (mddev_is_clustered(mddev) &&
+		!(info->state & ((1 << MD_DISK_CLUSTER_ADD) | (1 << MD_DISK_CANDIDATE)))) {
+		pr_err("%s: Cannot add to clustered mddev. Try --cluster-add\n",
+			       mdname(mddev));
+		return -EINVAL;
+	}
+
 	if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
 		return -EOVERFLOW;
 
@@ -5830,6 +5846,25 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
 		else
 			clear_bit(WriteMostly, &rdev->flags);
 
+		/*
+		 * check whether the device shows up in other nodes
+		 */
+		if (mddev_is_clustered(mddev)) {
+			if (info->state & (1 << MD_DISK_CANDIDATE)) {
+				/* Through --cluster-confirm */
+				set_bit(Candidate, &rdev->flags);
+				md_cluster_ops->new_disk_ack(mddev, true);
+			} else if (info->state & (1 << MD_DISK_CLUSTER_ADD)) {
+				/* --add initiated by this node */
+				err = md_cluster_ops->add_new_disk_start(mddev, rdev);
+				if (err) {
+					md_cluster_ops->add_new_disk_finish(mddev);
+					export_rdev(rdev);
+					return err;
+				}
+			}
+		}
+
 		rdev->raid_disk = -1;
 		err = bind_rdev_to_array(rdev, mddev);
 		if (!err && !mddev->pers->hot_remove_disk) {
@@ -5855,6 +5890,9 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info)
 		if (!err)
 			md_new_event(mddev);
 		md_wakeup_thread(mddev->thread);
+		if (mddev_is_clustered(mddev) &&
+				(info->state & (1 << MD_DISK_CLUSTER_ADD)))
+			md_cluster_ops->add_new_disk_finish(mddev);
 		return err;
 	}
 
@@ -6456,6 +6494,7 @@ static inline bool md_ioctl_valid(unsigned int cmd)
 	case SET_DISK_FAULTY:
 	case STOP_ARRAY:
 	case STOP_ARRAY_RO:
+	case CLUSTERED_DISK_NACK:
 		return true;
 	default:
 		return false;
@@ -6728,6 +6767,13 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
 		goto unlock;
 	}
 
+	case CLUSTERED_DISK_NACK:
+		if (mddev_is_clustered(mddev))
+			md_cluster_ops->new_disk_ack(mddev, false);
+		else
+			err = -EINVAL;
+		goto unlock;
+
 	case HOT_ADD_DISK:
 		err = hot_add_disk(mddev, new_decode_dev(arg));
 		goto unlock;
diff --git a/drivers/md/md.h b/drivers/md/md.h
index bfebcfdf54e6..6dc0ce09f50c 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -171,6 +171,10 @@ enum flag_bits {
 				 * a want_replacement device with same
 				 * raid_disk number.
 				 */
+	Candidate,		/* For clustered environments only:
+				 * This device is seen locally but not
+				 * by the whole cluster
+				 */
 };
 
 #define BB_LEN_MASK	(0x00000000000001FFULL)
@@ -666,6 +670,7 @@ extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
 
 extern void md_unplug(struct blk_plug_cb *cb, bool from_schedule);
 extern void md_reload_sb(struct mddev *mddev);
+extern void md_update_sb(struct mddev *mddev, int force);
 static inline int mddev_check_plugged(struct mddev *mddev)
 {
 	return !!blk_check_plugged(md_unplug, mddev,
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index f70d74189d16..53ed5d48308f 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1571,6 +1571,7 @@ static int raid1_spare_active(struct mddev *mddev)
 		struct md_rdev *rdev = conf->mirrors[i].rdev;
 		struct md_rdev *repl = conf->mirrors[conf->raid_disks + i].rdev;
 		if (repl
+		    && !test_bit(Candidate, &repl->flags)
 		    && repl->recovery_offset == MaxSector
 		    && !test_bit(Faulty, &repl->flags)
 		    && !test_and_set_bit(In_sync, &repl->flags)) {
diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h
index 643489d33e68..2ae6131e69a5 100644
--- a/include/uapi/linux/raid/md_p.h
+++ b/include/uapi/linux/raid/md_p.h
@@ -78,6 +78,12 @@
 #define MD_DISK_ACTIVE		1 /* disk is running or spare disk */
 #define MD_DISK_SYNC		2 /* disk is in sync with the raid set */
 #define MD_DISK_REMOVED		3 /* disk is in sync with the raid set */
+#define MD_DISK_CLUSTER_ADD     4 /* Initiate a disk add across the cluster
+				   * For clustered enviroments only.
+				   */
+#define MD_DISK_CANDIDATE	5 /* disk is added as spare (local) until confirmed
+				   * For clustered enviroments only.
+				   */
 
 #define	MD_DISK_WRITEMOSTLY	9 /* disk is "write-mostly" is RAID1 config.
 				   * read requests will only be sent here in
diff --git a/include/uapi/linux/raid/md_u.h b/include/uapi/linux/raid/md_u.h
index 74e7c60c4716..1cb8aa6850b5 100644
--- a/include/uapi/linux/raid/md_u.h
+++ b/include/uapi/linux/raid/md_u.h
@@ -62,6 +62,7 @@
 #define STOP_ARRAY		_IO (MD_MAJOR, 0x32)
 #define STOP_ARRAY_RO		_IO (MD_MAJOR, 0x33)
 #define RESTART_ARRAY_RW	_IO (MD_MAJOR, 0x34)
+#define CLUSTERED_DISK_NACK	_IO (MD_MAJOR, 0x35)
 
 /* 63 partitions with the alternate major number (mdp) */
 #define MdpMinorShift 6
-- 
cgit 


From 7e182f78988404717e27aed5a9d4150631b323f4 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Wed, 25 Feb 2015 12:05:13 -0300
Subject: [media] media.h: mark alsa struct in media_entity_desc as TODO

The alsa struct in struct media_entity_desc is now marked as deprecated.
However, the alsa struct should remain as it is since it cannot be replaced
by a simple major/minor device node description. The alsa struct was designed
to be used as an alsa card description so V4L2 drivers could use this to expose
the alsa card that they create to carry the captured audio. Such a card is not
just a PCM device, but also needs to contain the alsa subdevice information,
and it may map to multiple devices, e.g. a PCM and a mixer device, such as the
au0828 usb stick creates.

This is exactly as intended and this cannot and should not be replaced by a
simple major/minor.

However, whether this information is in the right form for an ALSA device such
that it can handle udev renaming rules as well is another matter. So mark this
alsa struct as TODO and document the problems involved.

Updated the documentation as well to reflect this and to add the 'major'
and 'minor' field documentation.

Updated the documentation to clearly state that struct dev is to be used for
(sub-)devices that create a single device node. Other devices need their own
structure here.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 .../DocBook/media/v4l/media-ioc-enum-entities.xml  | 16 ++++++++++++-
 include/uapi/linux/media.h                         | 26 +++++++++++++++++-----
 2 files changed, 36 insertions(+), 6 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/v4l/media-ioc-enum-entities.xml b/Documentation/DocBook/media/v4l/media-ioc-enum-entities.xml
index cbf307f21a63..5872f8bbf774 100644
--- a/Documentation/DocBook/media/v4l/media-ioc-enum-entities.xml
+++ b/Documentation/DocBook/media/v4l/media-ioc-enum-entities.xml
@@ -145,7 +145,21 @@
 	    <entry>struct</entry>
 	    <entry><structfield>dev</structfield></entry>
 	    <entry></entry>
-	    <entry>Valid for (sub-)devices that create devnodes.</entry>
+	    <entry>Valid for (sub-)devices that create a single device node.</entry>
+	  </row>
+	  <row>
+	    <entry></entry>
+	    <entry></entry>
+	    <entry>__u32</entry>
+	    <entry><structfield>major</structfield></entry>
+	    <entry>Device node major number.</entry>
+	  </row>
+	  <row>
+	    <entry></entry>
+	    <entry></entry>
+	    <entry>__u32</entry>
+	    <entry><structfield>minor</structfield></entry>
+	    <entry>Device node minor number.</entry>
 	  </row>
 	  <row>
 	    <entry></entry>
diff --git a/include/uapi/linux/media.h b/include/uapi/linux/media.h
index 52cc2a6b19b7..4e816be3de39 100644
--- a/include/uapi/linux/media.h
+++ b/include/uapi/linux/media.h
@@ -89,6 +89,27 @@ struct media_entity_desc {
 			__u32 minor;
 		} dev;
 
+#if 1
+		/*
+		 * TODO: this shouldn't have been added without
+		 * actual drivers that use this. When the first real driver
+		 * appears that sets this information, special attention
+		 * should be given whether this information is 1) enough, and
+		 * 2) can deal with udev rules that rename devices. The struct
+		 * dev would not be sufficient for this since that does not
+		 * contain the subdevice information. In addition, struct dev
+		 * can only refer to a single device, and not to multiple (e.g.
+		 * pcm and mixer devices).
+		 *
+		 * So for now mark this as a to do.
+		 */
+		struct {
+			__u32 card;
+			__u32 device;
+			__u32 subdevice;
+		} alsa;
+#endif
+
 #if 1
 		/*
 		 * DEPRECATED: previous node specifications. Kept just to
@@ -106,11 +127,6 @@ struct media_entity_desc {
 			__u32 major;
 			__u32 minor;
 		} fb;
-		struct {
-			__u32 card;
-			__u32 device;
-			__u32 subdevice;
-		} alsa;
 		int dvb;
 #endif
 
-- 
cgit 


From 93a714d6b53d87872e552dbb273544bdeaaf6e12 Mon Sep 17 00:00:00 2001
From: Madhu Challa <challa@noironetworks.com>
Date: Wed, 25 Feb 2015 09:58:35 -0800
Subject: multicast: Extend ip address command to enable multicast group
 join/leave on

Joining multicast group on ethernet level via "ip maddr" command would
not work if we have an Ethernet switch that does igmp snooping since
the switch would not replicate multicast packets on ports that did not
have IGMP reports for the multicast addresses.

Linux vxlan interfaces created via "ip link add vxlan" have the group option
that enables then to do the required join.

By extending ip address command with option "autojoin" we can get similar
functionality for openvswitch vxlan interfaces as well as other tunneling
mechanisms that need to receive multicast traffic. The kernel code is
structured similar to how the vxlan driver does a group join / leave.

example:
ip address add 224.1.1.10/24 dev eth5 autojoin
ip address del 224.1.1.10/24 dev eth5

Signed-off-by: Madhu Challa <challa@noironetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h     |  1 +
 include/net/netns/ipv6.h     |  1 +
 include/uapi/linux/if_addr.h |  1 +
 net/ipv4/devinet.c           | 31 +++++++++++++++++++++++++++++++
 net/ipv4/igmp.c              | 13 +++++++++++++
 net/ipv6/addrconf.c          | 38 +++++++++++++++++++++++++++++++++++---
 net/ipv6/mcast.c             | 20 ++++++++++++++++----
 7 files changed, 98 insertions(+), 7 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index dbe225478adb..1b26c6c3fd7c 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -49,6 +49,7 @@ struct netns_ipv4 {
 	struct sock		*fibnl;
 
 	struct sock  * __percpu	*icmp_sk;
+	struct sock		*mc_autojoin_sk;
 
 	struct inet_peer_base	*peers;
 	struct tcpm_hash_bucket	*tcp_metrics_hash;
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 69ae41f2098c..ca0db12cd089 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -67,6 +67,7 @@ struct netns_ipv6 {
 	struct sock             *ndisc_sk;
 	struct sock             *tcp_sk;
 	struct sock             *igmp_sk;
+	struct sock		*mc_autojoin_sk;
 #ifdef CONFIG_IPV6_MROUTE
 #ifndef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 	struct mr6_table	*mrt6;
diff --git a/include/uapi/linux/if_addr.h b/include/uapi/linux/if_addr.h
index dea10a87dfd1..40fdfea39714 100644
--- a/include/uapi/linux/if_addr.h
+++ b/include/uapi/linux/if_addr.h
@@ -50,6 +50,7 @@ enum {
 #define IFA_F_PERMANENT		0x80
 #define IFA_F_MANAGETEMPADDR	0x100
 #define IFA_F_NOPREFIXROUTE	0x200
+#define IFA_F_MCAUTOJOIN	0x400
 
 struct ifa_cacheinfo {
 	__u32	ifa_prefered;
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 3a8985c94581..5105759e4e00 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -548,6 +548,26 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
 	return NULL;
 }
 
+static int ip_mc_config(struct sock *sk, bool join, const struct in_ifaddr *ifa)
+{
+	struct ip_mreqn mreq = {
+		.imr_multiaddr.s_addr = ifa->ifa_address,
+		.imr_ifindex = ifa->ifa_dev->dev->ifindex,
+	};
+	int ret;
+
+	ASSERT_RTNL();
+
+	lock_sock(sk);
+	if (join)
+		ret = __ip_mc_join_group(sk, &mreq);
+	else
+		ret = __ip_mc_leave_group(sk, &mreq);
+	release_sock(sk);
+
+	return ret;
+}
+
 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(skb->sk);
@@ -584,6 +604,8 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)
 		    !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
 			continue;
 
+		if (ipv4_is_multicast(ifa->ifa_address))
+			ip_mc_config(net->ipv4.mc_autojoin_sk, false, ifa);
 		__inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid);
 		return 0;
 	}
@@ -838,6 +860,15 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
 		 * userspace already relies on not having to provide this.
 		 */
 		set_ifa_lifetime(ifa, valid_lft, prefered_lft);
+		if (ifa->ifa_flags & IFA_F_MCAUTOJOIN) {
+			int ret = ip_mc_config(net->ipv4.mc_autojoin_sk,
+					       true, ifa);
+
+			if (ret < 0) {
+				inet_free_ifa(ifa);
+				return ret;
+			}
+		}
 		return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid);
 	} else {
 		inet_free_ifa(ifa);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 4b1172d73e03..5cb1ef4ce292 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -97,6 +97,7 @@
 #include <net/route.h>
 #include <net/sock.h>
 #include <net/checksum.h>
+#include <net/inet_common.h>
 #include <linux/netfilter_ipv4.h>
 #ifdef CONFIG_IP_MROUTE
 #include <linux/mroute.h>
@@ -2740,6 +2741,7 @@ static const struct file_operations igmp_mcf_seq_fops = {
 static int __net_init igmp_net_init(struct net *net)
 {
 	struct proc_dir_entry *pde;
+	int err;
 
 	pde = proc_create("igmp", S_IRUGO, net->proc_net, &igmp_mc_seq_fops);
 	if (!pde)
@@ -2748,8 +2750,18 @@ static int __net_init igmp_net_init(struct net *net)
 			  &igmp_mcf_seq_fops);
 	if (!pde)
 		goto out_mcfilter;
+	err = inet_ctl_sock_create(&net->ipv4.mc_autojoin_sk, AF_INET,
+				   SOCK_DGRAM, 0, net);
+	if (err < 0) {
+		pr_err("Failed to initialize the IGMP autojoin socket (err %d)\n",
+		       err);
+		goto out_sock;
+	}
+
 	return 0;
 
+out_sock:
+	remove_proc_entry("mcfilter", net->proc_net);
 out_mcfilter:
 	remove_proc_entry("igmp", net->proc_net);
 out_igmp:
@@ -2760,6 +2772,7 @@ static void __net_exit igmp_net_exit(struct net *net)
 {
 	remove_proc_entry("mcfilter", net->proc_net);
 	remove_proc_entry("igmp", net->proc_net);
+	inet_ctl_sock_destroy(net->ipv4.mc_autojoin_sk);
 }
 
 static struct pernet_operations igmp_net_ops = {
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 98e4a63d72bb..783bccfcc060 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2464,6 +2464,23 @@ err_exit:
 	return err;
 }
 
+static int ipv6_mc_config(struct sock *sk, bool join,
+			  const struct in6_addr *addr, int ifindex)
+{
+	int ret;
+
+	ASSERT_RTNL();
+
+	lock_sock(sk);
+	if (join)
+		ret = __ipv6_sock_mc_join(sk, ifindex, addr);
+	else
+		ret = __ipv6_sock_mc_drop(sk, ifindex, addr);
+	release_sock(sk);
+
+	return ret;
+}
+
 /*
  *	Manual configuration of address on an interface
  */
@@ -2476,10 +2493,10 @@ static int inet6_addr_add(struct net *net, int ifindex,
 	struct inet6_ifaddr *ifp;
 	struct inet6_dev *idev;
 	struct net_device *dev;
+	unsigned long timeout;
+	clock_t expires;
 	int scope;
 	u32 flags;
-	clock_t expires;
-	unsigned long timeout;
 
 	ASSERT_RTNL();
 
@@ -2501,6 +2518,14 @@ static int inet6_addr_add(struct net *net, int ifindex,
 	if (IS_ERR(idev))
 		return PTR_ERR(idev);
 
+	if (ifa_flags & IFA_F_MCAUTOJOIN) {
+		int ret = ipv6_mc_config(net->ipv6.mc_autojoin_sk,
+					 true, pfx, ifindex);
+
+		if (ret < 0)
+			return ret;
+	}
+
 	scope = ipv6_addr_scope(pfx);
 
 	timeout = addrconf_timeout_fixup(valid_lft, HZ);
@@ -2542,6 +2567,9 @@ static int inet6_addr_add(struct net *net, int ifindex,
 		in6_ifa_put(ifp);
 		addrconf_verify_rtnl();
 		return 0;
+	} else if (ifa_flags & IFA_F_MCAUTOJOIN) {
+		ipv6_mc_config(net->ipv6.mc_autojoin_sk,
+			       false, pfx, ifindex);
 	}
 
 	return PTR_ERR(ifp);
@@ -2578,6 +2606,10 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags,
 						 jiffies);
 			ipv6_del_addr(ifp);
 			addrconf_verify_rtnl();
+			if (ipv6_addr_is_multicast(pfx)) {
+				ipv6_mc_config(net->ipv6.mc_autojoin_sk,
+					       false, pfx, dev->ifindex);
+			}
 			return 0;
 		}
 	}
@@ -3945,7 +3977,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)
 
 	/* We ignore other flags so far. */
 	ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR |
-		     IFA_F_NOPREFIXROUTE;
+		     IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN;
 
 	ifa = ipv6_get_ifaddr(net, pfx, dev, 1);
 	if (ifa == NULL) {
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index e4955d019734..1dd1fedff9f4 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -2929,20 +2929,32 @@ static int __net_init igmp6_net_init(struct net *net)
 
 	inet6_sk(net->ipv6.igmp_sk)->hop_limit = 1;
 
+	err = inet_ctl_sock_create(&net->ipv6.mc_autojoin_sk, PF_INET6,
+				   SOCK_RAW, IPPROTO_ICMPV6, net);
+	if (err < 0) {
+		pr_err("Failed to initialize the IGMP6 autojoin socket (err %d)\n",
+		       err);
+		goto out_sock_create;
+	}
+
 	err = igmp6_proc_init(net);
 	if (err)
-		goto out_sock_create;
-out:
-	return err;
+		goto out_sock_create_autojoin;
+
+	return 0;
 
+out_sock_create_autojoin:
+	inet_ctl_sock_destroy(net->ipv6.mc_autojoin_sk);
 out_sock_create:
 	inet_ctl_sock_destroy(net->ipv6.igmp_sk);
-	goto out;
+out:
+	return err;
 }
 
 static void __net_exit igmp6_net_exit(struct net *net)
 {
 	inet_ctl_sock_destroy(net->ipv6.igmp_sk);
+	inet_ctl_sock_destroy(net->ipv6.mc_autojoin_sk);
 	igmp6_proc_exit(net);
 }
 
-- 
cgit 


From 31f909a2c0abfc1a1a76b2981d28ac85d33210e7 Mon Sep 17 00:00:00 2001
From: Masashi Honma <masashi.honma@gmail.com>
Date: Tue, 24 Feb 2015 22:42:16 +0900
Subject: nl/mac80211: allow zero plink timeout to disable STA expiration

Both wpa_supplicant and mac80211 have and inactivity timer. By default
wpa_supplicant will be timed out in 5 minutes and mac80211's it is 30
minutes. If wpa_supplicant uses a longer timer than mac80211, it will
get unexpected disconnection by mac80211.

Using 0xffffffff instead as the configured value could solve this w/o
changing the code, but due to integer overflow in the expression used
this doesn't work. The expression is:

(current jiffies) > (frame Rx jiffies + NL80211_MESHCONF_PLINK_TIMEOUT * 250)

On 32bit system, the right side would overflow and be a very small
value if NL80211_MESHCONF_PLINK_TIMEOUT is sufficiently large,
causing unexpectedly early disconnections.

Instead allow disabling the inactivity timer to avoid this situation,
by passing the (previously invalid and useless) value 0.

Signed-off-by: Masashi Honma <masashi.honma@gmail.com>
[reword/rewrap commit log]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 3 ++-
 net/mac80211/mesh.c          | 3 ++-
 net/wireless/nl80211.c       | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 68b294e83944..2dcf9bba317c 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3092,7 +3092,8 @@ enum nl80211_mesh_power_mode {
  *
  * @NL80211_MESHCONF_PLINK_TIMEOUT: If no tx activity is seen from a STA we've
  *	established peering with for longer than this time (in seconds), then
- *	remove it from the STA's list of peers.  Default is 30 minutes.
+ *	remove it from the STA's list of peers. You may set this to 0 to disable
+ *	the removal of the STA. Default is 30 minutes.
  *
  * @__NL80211_MESHCONF_ATTR_AFTER_LAST: internal use
  */
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 0c8b2a77d312..acf441ff9f4a 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -574,7 +574,8 @@ static void ieee80211_mesh_housekeeping(struct ieee80211_sub_if_data *sdata)
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	u32 changed;
 
-	ieee80211_sta_expire(sdata, ifmsh->mshcfg.plink_timeout * HZ);
+	if (ifmsh->mshcfg.plink_timeout > 0)
+		ieee80211_sta_expire(sdata, ifmsh->mshcfg.plink_timeout * HZ);
 	mesh_path_expire(sdata);
 
 	changed = mesh_accept_plinks_update(sdata);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index d78fd8b54515..9c6e23ede5b2 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5265,7 +5265,7 @@ do {									    \
 	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration,
 				  0, 65535, mask,
 				  NL80211_MESHCONF_AWAKE_WINDOW, nla_get_u16);
-	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, plink_timeout, 1, 0xffffffff,
+	FILL_IN_MESH_PARAM_IF_SET(tb, cfg, plink_timeout, 0, 0xffffffff,
 				  mask, NL80211_MESHCONF_PLINK_TIMEOUT,
 				  nla_get_u32);
 	if (mask_out)
-- 
cgit 


From f1a66f85b74c5ef7b503f746ea97742dacd56419 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sun, 1 Mar 2015 12:31:43 +0100
Subject: ebpf: export BPF_PSEUDO_MAP_FD to uapi

We need to export BPF_PSEUDO_MAP_FD to user space, as it's used in the
ELF BPF loader where instructions are being loaded that need map fixups.

An initial stage loads all maps into the kernel, and later on replaces
related instructions in the eBPF blob with BPF_PSEUDO_MAP_FD as source
register and the actual fd as immediate value.

The kernel verifier recognizes this keyword and replaces the map fd with
a real pointer internally.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h   | 2 --
 include/uapi/linux/bpf.h | 2 ++
 samples/bpf/libbpf.h     | 4 +++-
 3 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index caac2087a4d5..5e3863d5f666 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -145,8 +145,6 @@ struct bpf_prog_aux;
 		.off   = 0,					\
 		.imm   = ((__u64) (IMM)) >> 32 })
 
-#define BPF_PSEUDO_MAP_FD	1
-
 /* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
 #define BPF_LD_MAP_FD(DST, MAP_FD)				\
 	BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 45da7ec7d274..0248180bf2e2 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -120,6 +120,8 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_SOCKET_FILTER,
 };
 
+#define BPF_PSEUDO_MAP_FD	1
+
 /* flags for BPF_MAP_UPDATE_ELEM command */
 #define BPF_ANY		0 /* create new element or update existing */
 #define BPF_NOEXIST	1 /* create new element if it didn't exist */
diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h
index 58c5fe1bdba1..a6bb7e9c22c3 100644
--- a/samples/bpf/libbpf.h
+++ b/samples/bpf/libbpf.h
@@ -92,7 +92,9 @@ extern char bpf_log_buf[LOG_BUF_SIZE];
 		.off   = 0,					\
 		.imm   = ((__u64) (IMM)) >> 32 })
 
-#define BPF_PSEUDO_MAP_FD	1
+#ifndef BPF_PSEUDO_MAP_FD
+# define BPF_PSEUDO_MAP_FD	1
+#endif
 
 /* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
 #define BPF_LD_MAP_FD(DST, MAP_FD)				\
-- 
cgit 


From 96be4325f443dbbfeb37d2a157675ac0736531a1 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sun, 1 Mar 2015 12:31:46 +0100
Subject: ebpf: add sched_cls_type and map it to sk_filter's verifier ops

As discussed recently and at netconf/netdev01, we want to prevent making
bpf_verifier_ops registration available for modules, but have them at a
controlled place inside the kernel instead.

The reason for this is, that out-of-tree modules can go crazy and define
and register any verfifier ops they want, doing all sorts of crap, even
bypassing available GPLed eBPF helper functions. We don't want to offer
such a shiny playground, of course, but keep strict control to ourselves
inside the core kernel.

This also encourages us to design eBPF user helpers carefully and
generically, so they can be shared among various subsystems using eBPF.

For the eBPF traffic classifier (cls_bpf), it's a good start to share
the same helper facilities as we currently do in eBPF for socket filters.

That way, we have BPF_PROG_TYPE_SCHED_CLS look like it's own type, thus
one day if there's a good reason to diverge the set of helper functions
from the set available to socket filters, we keep ABI compatibility.

In future, we could place all bpf_prog_type_list at a central place,
perhaps.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h |  1 +
 kernel/bpf/verifier.c    | 15 +++++++++++++--
 net/core/filter.c        |  7 +++++++
 3 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0248180bf2e2..3fa1af8a58d7 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -118,6 +118,7 @@ enum bpf_map_type {
 enum bpf_prog_type {
 	BPF_PROG_TYPE_UNSPEC,
 	BPF_PROG_TYPE_SOCKET_FILTER,
+	BPF_PROG_TYPE_SCHED_CLS,
 };
 
 #define BPF_PSEUDO_MAP_FD	1
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a28e09c7825d..594d341f04db 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1172,6 +1172,17 @@ static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn)
 	return 0;
 }
 
+static bool may_access_skb(enum bpf_prog_type type)
+{
+	switch (type) {
+	case BPF_PROG_TYPE_SOCKET_FILTER:
+	case BPF_PROG_TYPE_SCHED_CLS:
+		return true;
+	default:
+		return false;
+	}
+}
+
 /* verify safety of LD_ABS|LD_IND instructions:
  * - they can only appear in the programs where ctx == skb
  * - since they are wrappers of function calls, they scratch R1-R5 registers,
@@ -1194,8 +1205,8 @@ static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn)
 	struct reg_state *reg;
 	int i, err;
 
-	if (env->prog->aux->prog_type != BPF_PROG_TYPE_SOCKET_FILTER) {
-		verbose("BPF_LD_ABS|IND instructions are only allowed in socket filters\n");
+	if (!may_access_skb(env->prog->aux->prog_type)) {
+		verbose("BPF_LD_ABS|IND instructions not allowed for this program type\n");
 		return -EINVAL;
 	}
 
diff --git a/net/core/filter.c b/net/core/filter.c
index 741721233166..514d4082f326 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1166,9 +1166,16 @@ static struct bpf_prog_type_list sk_filter_type __read_mostly = {
 	.type = BPF_PROG_TYPE_SOCKET_FILTER,
 };
 
+static struct bpf_prog_type_list sched_cls_type __read_mostly = {
+	.ops = &sk_filter_ops,
+	.type = BPF_PROG_TYPE_SCHED_CLS,
+};
+
 static int __init register_sk_filter_ops(void)
 {
 	bpf_register_prog_type(&sk_filter_type);
+	bpf_register_prog_type(&sched_cls_type);
+
 	return 0;
 }
 late_initcall(register_sk_filter_ops);
-- 
cgit 


From e2e9b6541dd4b31848079da80fe2253daaafb549 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sun, 1 Mar 2015 12:31:48 +0100
Subject: cls_bpf: add initial eBPF support for programmable classifiers

This work extends the "classic" BPF programmable tc classifier by
extending its scope also to native eBPF code!

This allows for user space to implement own custom, 'safe' C like
classifiers (or whatever other frontend language LLVM et al may
provide in future), that can then be compiled with the LLVM eBPF
backend to an eBPF elf file. The result of this can be loaded into
the kernel via iproute2's tc. In the kernel, they can be JITed on
major archs and thus run in native performance.

Simple, minimal toy example to demonstrate the workflow:

  #include <linux/ip.h>
  #include <linux/if_ether.h>
  #include <linux/bpf.h>

  #include "tc_bpf_api.h"

  __section("classify")
  int cls_main(struct sk_buff *skb)
  {
    return (0x800 << 16) | load_byte(skb, ETH_HLEN + __builtin_offsetof(struct iphdr, tos));
  }

  char __license[] __section("license") = "GPL";

The classifier can then be compiled into eBPF opcodes and loaded
via tc, for example:

  clang -O2 -emit-llvm -c cls.c -o - | llc -march=bpf -filetype=obj -o cls.o
  tc filter add dev em1 parent 1: bpf cls.o [...]

As it has been demonstrated, the scope can even reach up to a fully
fledged flow dissector (similarly as in samples/bpf/sockex2_kern.c).

For tc, maps are allowed to be used, but from kernel context only,
in other words, eBPF code can keep state across filter invocations.
In future, we perhaps may reattach from a different application to
those maps e.g., to read out collected statistics/state.

Similarly as in socket filters, we may extend functionality for eBPF
classifiers over time depending on the use cases. For that purpose,
cls_bpf programs are using BPF_PROG_TYPE_SCHED_CLS program type, so
we can allow additional functions/accessors (e.g. an ABI compatible
offset translation to skb fields/metadata). For an initial cls_bpf
support, we allow the same set of helper functions as eBPF socket
filters, but we could diverge at some point in time w/o problem.

I was wondering whether cls_bpf and act_bpf could share C programs,
I can imagine that at some point, we introduce i) further common
handlers for both (or even beyond their scope), and/or if truly needed
ii) some restricted function space for each of them. Both can be
abstracted easily through struct bpf_verifier_ops in future.

The context of cls_bpf versus act_bpf is slightly different though:
a cls_bpf program will return a specific classid whereas act_bpf a
drop/non-drop return code, latter may also in future mangle skbs.
That said, we can surely have a "classify" and "action" section in
a single object file, or considered mentioned constraint add a
possibility of a shared section.

The workflow for getting native eBPF running from tc [1] is as
follows: for f_bpf, I've added a slightly modified ELF parser code
from Alexei's kernel sample, which reads out the LLVM compiled
object, sets up maps (and dynamically fixes up map fds) if any, and
loads the eBPF instructions all centrally through the bpf syscall.

The resulting fd from the loaded program itself is being passed down
to cls_bpf, which looks up struct bpf_prog from the fd store, and
holds reference, so that it stays available also after tc program
lifetime. On tc filter destruction, it will then drop its reference.

Moreover, I've also added the optional possibility to annotate an
eBPF filter with a name (e.g. path to object file, or something
else if preferred) so that when tc dumps currently installed filters,
some more context can be given to an admin for a given instance (as
opposed to just the file descriptor number).

Last but not least, bpf_prog_get() and bpf_prog_put() needed to be
exported, so that eBPF can be used from cls_bpf built as a module.
Thanks to 60a3b2253c41 ("net: bpf: make eBPF interpreter images
read-only") I think this is of no concern since anything wanting to
alter eBPF opcode after verification stage would crash the kernel.

  [1] http://git.breakpoint.cc/cgit/dborkman/iproute2.git/log/?h=ebpf

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_cls.h |   2 +
 kernel/bpf/syscall.c         |   2 +
 net/sched/cls_bpf.c          | 206 ++++++++++++++++++++++++++++++++-----------
 3 files changed, 158 insertions(+), 52 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 25731dfb3fcc..bf08e76bf505 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -397,6 +397,8 @@ enum {
 	TCA_BPF_CLASSID,
 	TCA_BPF_OPS_LEN,
 	TCA_BPF_OPS,
+	TCA_BPF_FD,
+	TCA_BPF_NAME,
 	__TCA_BPF_MAX,
 };
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 0d69449acbd0..669719ccc9ee 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -419,6 +419,7 @@ void bpf_prog_put(struct bpf_prog *prog)
 		bpf_prog_free(prog);
 	}
 }
+EXPORT_SYMBOL_GPL(bpf_prog_put);
 
 static int bpf_prog_release(struct inode *inode, struct file *filp)
 {
@@ -466,6 +467,7 @@ struct bpf_prog *bpf_prog_get(u32 ufd)
 	fdput(f);
 	return prog;
 }
+EXPORT_SYMBOL_GPL(bpf_prog_get);
 
 /* last field in 'union bpf_attr' used by this command */
 #define	BPF_PROG_LOAD_LAST_FIELD log_buf
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 5f3ee9e4b5bf..6f7ed8f8e6ee 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -16,6 +16,8 @@
 #include <linux/types.h>
 #include <linux/skbuff.h>
 #include <linux/filter.h>
+#include <linux/bpf.h>
+
 #include <net/rtnetlink.h>
 #include <net/pkt_cls.h>
 #include <net/sock.h>
@@ -24,6 +26,8 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Daniel Borkmann <dborkman@redhat.com>");
 MODULE_DESCRIPTION("TC BPF based classifier");
 
+#define CLS_BPF_NAME_LEN	256
+
 struct cls_bpf_head {
 	struct list_head plist;
 	u32 hgen;
@@ -32,18 +36,24 @@ struct cls_bpf_head {
 
 struct cls_bpf_prog {
 	struct bpf_prog *filter;
-	struct sock_filter *bpf_ops;
-	struct tcf_exts exts;
-	struct tcf_result res;
 	struct list_head link;
+	struct tcf_result res;
+	struct tcf_exts exts;
 	u32 handle;
-	u16 bpf_num_ops;
+	union {
+		u32 bpf_fd;
+		u16 bpf_num_ops;
+	};
+	struct sock_filter *bpf_ops;
+	const char *bpf_name;
 	struct tcf_proto *tp;
 	struct rcu_head rcu;
 };
 
 static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = {
 	[TCA_BPF_CLASSID]	= { .type = NLA_U32 },
+	[TCA_BPF_FD]		= { .type = NLA_U32 },
+	[TCA_BPF_NAME]		= { .type = NLA_NUL_STRING, .len = CLS_BPF_NAME_LEN },
 	[TCA_BPF_OPS_LEN]	= { .type = NLA_U16 },
 	[TCA_BPF_OPS]		= { .type = NLA_BINARY,
 				    .len = sizeof(struct sock_filter) * BPF_MAXINSNS },
@@ -76,6 +86,11 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	return -1;
 }
 
+static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog)
+{
+	return !prog->bpf_ops;
+}
+
 static int cls_bpf_init(struct tcf_proto *tp)
 {
 	struct cls_bpf_head *head;
@@ -94,8 +109,12 @@ static void cls_bpf_delete_prog(struct tcf_proto *tp, struct cls_bpf_prog *prog)
 {
 	tcf_exts_destroy(&prog->exts);
 
-	bpf_prog_destroy(prog->filter);
+	if (cls_bpf_is_ebpf(prog))
+		bpf_prog_put(prog->filter);
+	else
+		bpf_prog_destroy(prog->filter);
 
+	kfree(prog->bpf_name);
 	kfree(prog->bpf_ops);
 	kfree(prog);
 }
@@ -114,6 +133,7 @@ static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg)
 	list_del_rcu(&prog->link);
 	tcf_unbind_filter(tp, &prog->res);
 	call_rcu(&prog->rcu, __cls_bpf_delete_prog);
+
 	return 0;
 }
 
@@ -151,69 +171,121 @@ static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle)
 	return ret;
 }
 
-static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
-				   struct cls_bpf_prog *prog,
-				   unsigned long base, struct nlattr **tb,
-				   struct nlattr *est, bool ovr)
+static int cls_bpf_prog_from_ops(struct nlattr **tb,
+				 struct cls_bpf_prog *prog, u32 classid)
 {
 	struct sock_filter *bpf_ops;
-	struct tcf_exts exts;
-	struct sock_fprog_kern tmp;
+	struct sock_fprog_kern fprog_tmp;
 	struct bpf_prog *fp;
 	u16 bpf_size, bpf_num_ops;
-	u32 classid;
 	int ret;
 
-	if (!tb[TCA_BPF_OPS_LEN] || !tb[TCA_BPF_OPS] || !tb[TCA_BPF_CLASSID])
-		return -EINVAL;
-
-	tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
-	ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr);
-	if (ret < 0)
-		return ret;
-
-	classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
 	bpf_num_ops = nla_get_u16(tb[TCA_BPF_OPS_LEN]);
-	if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0) {
-		ret = -EINVAL;
-		goto errout;
-	}
+	if (bpf_num_ops > BPF_MAXINSNS || bpf_num_ops == 0)
+		return -EINVAL;
 
 	bpf_size = bpf_num_ops * sizeof(*bpf_ops);
-	if (bpf_size != nla_len(tb[TCA_BPF_OPS])) {
-		ret = -EINVAL;
-		goto errout;
-	}
+	if (bpf_size != nla_len(tb[TCA_BPF_OPS]))
+		return -EINVAL;
 
 	bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
-	if (bpf_ops == NULL) {
-		ret = -ENOMEM;
-		goto errout;
-	}
+	if (bpf_ops == NULL)
+		return -ENOMEM;
 
 	memcpy(bpf_ops, nla_data(tb[TCA_BPF_OPS]), bpf_size);
 
-	tmp.len = bpf_num_ops;
-	tmp.filter = bpf_ops;
+	fprog_tmp.len = bpf_num_ops;
+	fprog_tmp.filter = bpf_ops;
 
-	ret = bpf_prog_create(&fp, &tmp);
-	if (ret)
-		goto errout_free;
+	ret = bpf_prog_create(&fp, &fprog_tmp);
+	if (ret < 0) {
+		kfree(bpf_ops);
+		return ret;
+	}
 
-	prog->bpf_num_ops = bpf_num_ops;
 	prog->bpf_ops = bpf_ops;
+	prog->bpf_num_ops = bpf_num_ops;
+	prog->bpf_name = NULL;
+
 	prog->filter = fp;
 	prog->res.classid = classid;
 
+	return 0;
+}
+
+static int cls_bpf_prog_from_efd(struct nlattr **tb,
+				 struct cls_bpf_prog *prog, u32 classid)
+{
+	struct bpf_prog *fp;
+	char *name = NULL;
+	u32 bpf_fd;
+
+	bpf_fd = nla_get_u32(tb[TCA_BPF_FD]);
+
+	fp = bpf_prog_get(bpf_fd);
+	if (IS_ERR(fp))
+		return PTR_ERR(fp);
+
+	if (fp->type != BPF_PROG_TYPE_SCHED_CLS) {
+		bpf_prog_put(fp);
+		return -EINVAL;
+	}
+
+	if (tb[TCA_BPF_NAME]) {
+		name = kmemdup(nla_data(tb[TCA_BPF_NAME]),
+			       nla_len(tb[TCA_BPF_NAME]),
+			       GFP_KERNEL);
+		if (!name) {
+			bpf_prog_put(fp);
+			return -ENOMEM;
+		}
+	}
+
+	prog->bpf_ops = NULL;
+	prog->bpf_fd = bpf_fd;
+	prog->bpf_name = name;
+
+	prog->filter = fp;
+	prog->res.classid = classid;
+
+	return 0;
+}
+
+static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp,
+				   struct cls_bpf_prog *prog,
+				   unsigned long base, struct nlattr **tb,
+				   struct nlattr *est, bool ovr)
+{
+	struct tcf_exts exts;
+	bool is_bpf, is_ebpf;
+	u32 classid;
+	int ret;
+
+	is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS];
+	is_ebpf = tb[TCA_BPF_FD];
+
+	if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf) ||
+	    !tb[TCA_BPF_CLASSID])
+		return -EINVAL;
+
+	tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE);
+	ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr);
+	if (ret < 0)
+		return ret;
+
+	classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
+
+	ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog, classid) :
+		       cls_bpf_prog_from_efd(tb, prog, classid);
+	if (ret < 0) {
+		tcf_exts_destroy(&exts);
+		return ret;
+	}
+
 	tcf_bind_filter(tp, &prog->res, base);
 	tcf_exts_change(tp, &prog->exts, &exts);
 
 	return 0;
-errout_free:
-	kfree(bpf_ops);
-errout:
-	tcf_exts_destroy(&exts);
-	return ret;
 }
 
 static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp,
@@ -297,11 +369,43 @@ errout:
 	return ret;
 }
 
+static int cls_bpf_dump_bpf_info(const struct cls_bpf_prog *prog,
+				 struct sk_buff *skb)
+{
+	struct nlattr *nla;
+
+	if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_num_ops))
+		return -EMSGSIZE;
+
+	nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_num_ops *
+			  sizeof(struct sock_filter));
+	if (nla == NULL)
+		return -EMSGSIZE;
+
+	memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla));
+
+	return 0;
+}
+
+static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog,
+				  struct sk_buff *skb)
+{
+	if (nla_put_u32(skb, TCA_BPF_FD, prog->bpf_fd))
+		return -EMSGSIZE;
+
+	if (prog->bpf_name &&
+	    nla_put_string(skb, TCA_BPF_NAME, prog->bpf_name))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
 static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 			struct sk_buff *skb, struct tcmsg *tm)
 {
 	struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh;
-	struct nlattr *nest, *nla;
+	struct nlattr *nest;
+	int ret;
 
 	if (prog == NULL)
 		return skb->len;
@@ -314,16 +418,14 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 
 	if (nla_put_u32(skb, TCA_BPF_CLASSID, prog->res.classid))
 		goto nla_put_failure;
-	if (nla_put_u16(skb, TCA_BPF_OPS_LEN, prog->bpf_num_ops))
-		goto nla_put_failure;
 
-	nla = nla_reserve(skb, TCA_BPF_OPS, prog->bpf_num_ops *
-			  sizeof(struct sock_filter));
-	if (nla == NULL)
+	if (cls_bpf_is_ebpf(prog))
+		ret = cls_bpf_dump_ebpf_info(prog, skb);
+	else
+		ret = cls_bpf_dump_bpf_info(prog, skb);
+	if (ret)
 		goto nla_put_failure;
 
-	memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla));
-
 	if (tcf_exts_dump(skb, &prog->exts) < 0)
 		goto nla_put_failure;
 
-- 
cgit 


From ffc1199122d83d60ad99f9c55df32feb650b7bff Mon Sep 17 00:00:00 2001
From: "Janusz.Dziedzic@tieto.com" <Janusz.Dziedzic@tieto.com>
Date: Sat, 21 Feb 2015 16:52:39 +0100
Subject: cfg80211: add VHT support for IBSS

Add NL80211_EXT_FEATURE_VHT_IBSS flag and VHT
support for IBSS.

Signed-off-by: Janusz Dziedzic <janusz.dziedzic@tieto.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h |  2 ++
 net/wireless/nl80211.c       | 14 ++++++++++++--
 2 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 2dcf9bba317c..8ee31f108407 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -4328,11 +4328,13 @@ enum nl80211_feature_flags {
 
 /**
  * enum nl80211_ext_feature_index - bit index of extended features.
+ * @NL80211_EXT_FEATURE_VHT_IBSS: This driver supports IBSS with VHT datarates.
  *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
  */
 enum nl80211_ext_feature_index {
+	NL80211_EXT_FEATURE_VHT_IBSS,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 9c6e23ede5b2..66666fdf1c8d 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -7265,8 +7265,18 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info)
 		break;
 	case NL80211_CHAN_WIDTH_20:
 	case NL80211_CHAN_WIDTH_40:
-		if (rdev->wiphy.features & NL80211_FEATURE_HT_IBSS)
-			break;
+		if (!(rdev->wiphy.features & NL80211_FEATURE_HT_IBSS))
+			return -EINVAL;
+		break;
+	case NL80211_CHAN_WIDTH_80:
+	case NL80211_CHAN_WIDTH_80P80:
+	case NL80211_CHAN_WIDTH_160:
+		if (!(rdev->wiphy.features & NL80211_FEATURE_HT_IBSS))
+			return -EINVAL;
+		if (!wiphy_ext_feature_isset(&rdev->wiphy,
+					     NL80211_EXT_FEATURE_VHT_IBSS))
+			return -EINVAL;
+		break;
 	default:
 		return -EINVAL;
 	}
-- 
cgit 


From 5fc7432991a86678b38a2d700edbe8bcd29cc579 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 27 Feb 2015 15:32:43 +0100
Subject: nl80211: add notes about userspace API/ABI modifications

Add notes about userspace ABI/API modifications, including the
fact that we decided that API submissions should come with a
driver implementation.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 8ee31f108407..90c5aeb3cca7 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -25,6 +25,19 @@
  *
  */
 
+/*
+ * This header file defines the userspace API to the wireless stack. Please
+ * be careful not to break things - i.e. don't move anything around or so
+ * unless you can demonstrate that it breaks neither API nor ABI.
+ *
+ * Additions to the API should be accompanied by actual implementations in
+ * an upstream driver, so that example implementations exist in case there
+ * are ever concerns about the precise semantics of the API or changes are
+ * needed, and to ensure that code for dead (no longer implemented) API
+ * can actually be identified and removed.
+ * Nonetheless, semantics should also be documented carefully in this file.
+ */
+
 #include <linux/types.h>
 
 #define NL80211_GENL_NAME "nl80211"
-- 
cgit 


From 03c0566542f4c7a45ce3193f27cbf5700b506c18 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 3 Mar 2015 19:13:56 -0600
Subject: mpls: Netlink commands to add, remove, and dump routes

This change adds two new netlink routing attributes:
RTA_VIA and RTA_NEWDST.

RTA_VIA specifies the specifies the next machine to send a packet to
like RTA_GATEWAY.  RTA_VIA differs from RTA_GATEWAY in that it
includes the address family of the address of the next machine to send
a packet to.  Currently the MPLS code supports addresses in AF_INET,
AF_INET6 and AF_PACKET.  For AF_INET and AF_INET6 the destination mac
address is acquired from the neighbour table.  For AF_PACKET the
destination mac_address is specified in the netlink configuration.

I think raw destination mac address support with the family AF_PACKET
will prove useful.  There is MPLS-TP which is defined to operate
on machines that do not support internet packets of any flavor.  Further
seem to be corner cases where it can be useful.  At this point
I don't care much either way.

RTA_NEWDST specifies the destination address to forward the packet
with.  MPLS typically changes it's destination address at every hop.
For a swap operation RTA_NEWDST is specified with a length of one label.
For a push operation RTA_NEWDST is specified with two or more labels.
For a pop operation RTA_NEWDST is not specified or equivalently an emtpy
RTAN_NEWDST is specified.

Those new netlink attributes are used to implement handling of rt-netlink
RTM_NEWROUTE, RTM_DELROUTE, and RTM_GETROUTE messages, to maintain the
MPLS label table.

rtm_to_route_config parses a netlink RTM_NEWROUTE or RTM_DELROUTE message,
verify no unhandled attributes or unhandled values are present and sets
up the data structures for mpls_route_add and mpls_route_del.

I did my best to match up with the existing conventions with the caveats
that MPLS addresses are all destination-specific-addresses, and so
don't properly have a scope.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/rtnetlink.h |   8 ++
 net/mpls/af_mpls.c             | 229 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 237 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 5cc5d66bf519..bad65550ae3e 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -303,6 +303,8 @@ enum rtattr_type_t {
 	RTA_TABLE,
 	RTA_MARK,
 	RTA_MFC_STATS,
+	RTA_VIA,
+	RTA_NEWDST,
 	__RTA_MAX
 };
 
@@ -344,6 +346,12 @@ struct rtnexthop {
 #define RTNH_SPACE(len)	RTNH_ALIGN(RTNH_LENGTH(len))
 #define RTNH_DATA(rtnh)   ((struct rtattr*)(((char*)(rtnh)) + RTNH_LENGTH(0)))
 
+/* RTA_VIA */
+struct rtvia {
+	__kernel_sa_family_t	rtvia_family;
+	__u8			rtvia_addr[0];
+};
+
 /* RTM_CACHEINFO */
 
 struct rta_cacheinfo {
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 2d6612a10e30..b4d7cec398d2 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -212,6 +212,11 @@ static struct packet_type mpls_packet_type __read_mostly = {
 	.func = mpls_forward,
 };
 
+const struct nla_policy rtm_mpls_policy[RTA_MAX+1] = {
+	[RTA_DST]		= { .type = NLA_U32 },
+	[RTA_OIF]		= { .type = NLA_U32 },
+};
+
 struct mpls_route_config {
 	u32		rc_protocol;
 	u32		rc_ifindex;
@@ -410,6 +415,22 @@ static struct notifier_block mpls_dev_notifier = {
 	.notifier_call = mpls_dev_notify,
 };
 
+static int nla_put_via(struct sk_buff *skb,
+		       u16 family, const void *addr, int alen)
+{
+	struct nlattr *nla;
+	struct rtvia *via;
+
+	nla = nla_reserve(skb, RTA_VIA, alen + 2);
+	if (!nla)
+		return -EMSGSIZE;
+
+	via = nla_data(nla);
+	via->rtvia_family = family;
+	memcpy(via->rtvia_addr, addr, alen);
+	return 0;
+}
+
 int nla_put_labels(struct sk_buff *skb, int attrtype,
 		   u8 labels, const u32 label[])
 {
@@ -467,6 +488,210 @@ int nla_get_labels(const struct nlattr *nla,
 	return 0;
 }
 
+static int rtm_to_route_config(struct sk_buff *skb,  struct nlmsghdr *nlh,
+			       struct mpls_route_config *cfg)
+{
+	struct rtmsg *rtm;
+	struct nlattr *tb[RTA_MAX+1];
+	int index;
+	int err;
+
+	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_mpls_policy);
+	if (err < 0)
+		goto errout;
+
+	err = -EINVAL;
+	rtm = nlmsg_data(nlh);
+	memset(cfg, 0, sizeof(*cfg));
+
+	if (rtm->rtm_family != AF_MPLS)
+		goto errout;
+	if (rtm->rtm_dst_len != 20)
+		goto errout;
+	if (rtm->rtm_src_len != 0)
+		goto errout;
+	if (rtm->rtm_tos != 0)
+		goto errout;
+	if (rtm->rtm_table != RT_TABLE_MAIN)
+		goto errout;
+	/* Any value is acceptable for rtm_protocol */
+
+	/* As mpls uses destination specific addresses
+	 * (or source specific address in the case of multicast)
+	 * all addresses have universal scope.
+	 */
+	if (rtm->rtm_scope != RT_SCOPE_UNIVERSE)
+		goto errout;
+	if (rtm->rtm_type != RTN_UNICAST)
+		goto errout;
+	if (rtm->rtm_flags != 0)
+		goto errout;
+
+	cfg->rc_label		= LABEL_NOT_SPECIFIED;
+	cfg->rc_protocol	= rtm->rtm_protocol;
+	cfg->rc_nlflags		= nlh->nlmsg_flags;
+	cfg->rc_nlinfo.portid	= NETLINK_CB(skb).portid;
+	cfg->rc_nlinfo.nlh	= nlh;
+	cfg->rc_nlinfo.nl_net	= sock_net(skb->sk);
+
+	for (index = 0; index <= RTA_MAX; index++) {
+		struct nlattr *nla = tb[index];
+		if (!nla)
+			continue;
+
+		switch(index) {
+		case RTA_OIF:
+			cfg->rc_ifindex = nla_get_u32(nla);
+			break;
+		case RTA_NEWDST:
+			if (nla_get_labels(nla, MAX_NEW_LABELS,
+					   &cfg->rc_output_labels,
+					   cfg->rc_output_label))
+				goto errout;
+			break;
+		case RTA_DST:
+		{
+			u32 label_count;
+			if (nla_get_labels(nla, 1, &label_count,
+					   &cfg->rc_label))
+				goto errout;
+
+			/* The first 16 labels are reserved, and may not be set */
+			if (cfg->rc_label < 16)
+				goto errout;
+
+			break;
+		}
+		case RTA_VIA:
+		{
+			struct rtvia *via = nla_data(nla);
+			cfg->rc_via_family = via->rtvia_family;
+			cfg->rc_via_alen   = nla_len(nla) - 2;
+			if (cfg->rc_via_alen > MAX_VIA_ALEN)
+				goto errout;
+
+			/* Validate the address family */
+			switch(cfg->rc_via_family) {
+			case AF_PACKET:
+				break;
+			case AF_INET:
+				if (cfg->rc_via_alen != 4)
+					goto errout;
+				break;
+			case AF_INET6:
+				if (cfg->rc_via_alen != 16)
+					goto errout;
+				break;
+			default:
+				/* Unsupported address family */
+				goto errout;
+			}
+
+			memcpy(cfg->rc_via, via->rtvia_addr, cfg->rc_via_alen);
+			break;
+		}
+		default:
+			/* Unsupported attribute */
+			goto errout;
+		}
+	}
+
+	err = 0;
+errout:
+	return err;
+}
+
+static int mpls_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+	struct mpls_route_config cfg;
+	int err;
+
+	err = rtm_to_route_config(skb, nlh, &cfg);
+	if (err < 0)
+		return err;
+
+	return mpls_route_del(&cfg);
+}
+
+
+static int mpls_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+	struct mpls_route_config cfg;
+	int err;
+
+	err = rtm_to_route_config(skb, nlh, &cfg);
+	if (err < 0)
+		return err;
+
+	return mpls_route_add(&cfg);
+}
+
+static int mpls_dump_route(struct sk_buff *skb, u32 portid, u32 seq, int event,
+			   u32 label, struct mpls_route *rt, int flags)
+{
+	struct nlmsghdr *nlh;
+	struct rtmsg *rtm;
+
+	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags);
+	if (nlh == NULL)
+		return -EMSGSIZE;
+
+	rtm = nlmsg_data(nlh);
+	rtm->rtm_family = AF_MPLS;
+	rtm->rtm_dst_len = 20;
+	rtm->rtm_src_len = 0;
+	rtm->rtm_tos = 0;
+	rtm->rtm_table = RT_TABLE_MAIN;
+	rtm->rtm_protocol = rt->rt_protocol;
+	rtm->rtm_scope = RT_SCOPE_UNIVERSE;
+	rtm->rtm_type = RTN_UNICAST;
+	rtm->rtm_flags = 0;
+
+	if (rt->rt_labels &&
+	    nla_put_labels(skb, RTA_NEWDST, rt->rt_labels, rt->rt_label))
+		goto nla_put_failure;
+	if (nla_put_via(skb, rt->rt_via_family, rt->rt_via, rt->rt_via_alen))
+		goto nla_put_failure;
+	if (rt->rt_dev && nla_put_u32(skb, RTA_OIF, rt->rt_dev->ifindex))
+		goto nla_put_failure;
+	if (nla_put_labels(skb, RTA_DST, 1, &label))
+		goto nla_put_failure;
+
+	nlmsg_end(skb, nlh);
+	return 0;
+
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
+}
+
+static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	unsigned int index;
+
+	ASSERT_RTNL();
+
+	index = cb->args[0];
+	if (index < 16)
+		index = 16;
+
+	for (; index < net->mpls.platform_labels; index++) {
+		struct mpls_route *rt;
+		rt = net->mpls.platform_label[index];
+		if (!rt)
+			continue;
+
+		if (mpls_dump_route(skb, NETLINK_CB(cb->skb).portid,
+				    cb->nlh->nlmsg_seq, RTM_NEWROUTE,
+				    index, rt, NLM_F_MULTI) < 0)
+			break;
+	}
+	cb->args[0] = index;
+
+	return skb->len;
+}
+
 static int resize_platform_label_table(struct net *net, size_t limit)
 {
 	size_t size = sizeof(struct mpls_route *) * limit;
@@ -662,6 +887,9 @@ static int __init mpls_init(void)
 
 	dev_add_pack(&mpls_packet_type);
 
+	rtnl_register(PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, NULL);
+	rtnl_register(PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, NULL);
+	rtnl_register(PF_MPLS, RTM_GETROUTE, NULL, mpls_dump_routes, NULL);
 	err = 0;
 out:
 	return err;
@@ -674,6 +902,7 @@ module_init(mpls_init);
 
 static void __exit mpls_exit(void)
 {
+	rtnl_unregister_all(PF_MPLS);
 	dev_remove_pack(&mpls_packet_type);
 	unregister_netdevice_notifier(&mpls_dev_notifier);
 	unregister_pernet_subsys(&mpls_net_ops);
-- 
cgit 


From 8de147dc8e2adea82b8a1a2a08fcc983330f6770 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 3 Mar 2015 19:14:31 -0600
Subject: mpls: Multicast route table change notifications

Unlike IPv4 this code notifies on all cases where mpls routes
are added or removed and it never automatically removes routes.
Avoiding both the userspace confusion that is caused by omitting
route updates and the possibility of a flood of netlink traffic
when an interface goes doew.

For now reserved labels are handled automatically and userspace
is not notified.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/rtnetlink.h |  2 ++
 net/mpls/af_mpls.c             | 60 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 62 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index bad65550ae3e..06f75a407f74 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -631,6 +631,8 @@ enum rtnetlink_groups {
 #define RTNLGRP_IPV6_NETCONF	RTNLGRP_IPV6_NETCONF
 	RTNLGRP_MDB,
 #define RTNLGRP_MDB		RTNLGRP_MDB
+	RTNLGRP_MPLS_ROUTE,
+#define RTNLGRP_MPLS_ROUTE	RTNLGRP_MPLS_ROUTE
 	__RTNLGRP_MAX
 };
 #define RTNLGRP_MAX	(__RTNLGRP_MAX - 1)
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index b4d7cec398d2..75a994a50381 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -36,6 +36,10 @@ struct mpls_route { /* next hop label forwarding entry */
 static int zero = 0;
 static int label_limit = (1 << 20) - 1;
 
+static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
+		       struct nlmsghdr *nlh, struct net *net, u32 portid,
+		       unsigned int nlm_flags);
+
 static struct mpls_route *mpls_route_input_rcu(struct net *net, unsigned index)
 {
 	struct mpls_route *rt = NULL;
@@ -246,6 +250,20 @@ static void mpls_rt_free(struct mpls_route *rt)
 		kfree_rcu(rt, rt_rcu);
 }
 
+static void mpls_notify_route(struct net *net, unsigned index,
+			      struct mpls_route *old, struct mpls_route *new,
+			      const struct nl_info *info)
+{
+	struct nlmsghdr *nlh = info ? info->nlh : NULL;
+	unsigned portid = info ? info->portid : 0;
+	int event = new ? RTM_NEWROUTE : RTM_DELROUTE;
+	struct mpls_route *rt = new ? new : old;
+	unsigned nlm_flags = (old && new) ? NLM_F_REPLACE : 0;
+	/* Ignore reserved labels for now */
+	if (rt && (index >= 16))
+		rtmsg_lfib(event, index, rt, nlh, net, portid, nlm_flags);
+}
+
 static void mpls_route_update(struct net *net, unsigned index,
 			      struct net_device *dev, struct mpls_route *new,
 			      const struct nl_info *info)
@@ -260,6 +278,8 @@ static void mpls_route_update(struct net *net, unsigned index,
 		old = rt;
 	}
 
+	mpls_notify_route(net, index, old, new, info);
+
 	/* If we removed a route free it now */
 	mpls_rt_free(old);
 }
@@ -692,6 +712,46 @@ static int mpls_dump_routes(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
+static inline size_t lfib_nlmsg_size(struct mpls_route *rt)
+{
+	size_t payload =
+		NLMSG_ALIGN(sizeof(struct rtmsg))
+		+ nla_total_size(2 + rt->rt_via_alen)	/* RTA_VIA */
+		+ nla_total_size(4);			/* RTA_DST */
+	if (rt->rt_labels)				/* RTA_NEWDST */
+		payload += nla_total_size(rt->rt_labels * 4);
+	if (rt->rt_dev)					/* RTA_OIF */
+		payload += nla_total_size(4);
+	return payload;
+}
+
+static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt,
+		       struct nlmsghdr *nlh, struct net *net, u32 portid,
+		       unsigned int nlm_flags)
+{
+	struct sk_buff *skb;
+	u32 seq = nlh ? nlh->nlmsg_seq : 0;
+	int err = -ENOBUFS;
+
+	skb = nlmsg_new(lfib_nlmsg_size(rt), GFP_KERNEL);
+	if (skb == NULL)
+		goto errout;
+
+	err = mpls_dump_route(skb, portid, seq, event, label, rt, nlm_flags);
+	if (err < 0) {
+		/* -EMSGSIZE implies BUG in lfib_nlmsg_size */
+		WARN_ON(err == -EMSGSIZE);
+		kfree_skb(skb);
+		goto errout;
+	}
+	rtnl_notify(skb, net, portid, RTNLGRP_MPLS_ROUTE, nlh, GFP_KERNEL);
+
+	return;
+errout:
+	if (err < 0)
+		rtnl_set_sk_err(net, RTNLGRP_MPLS_ROUTE, err);
+}
+
 static int resize_platform_label_table(struct net *net, size_t limit)
 {
 	size_t size = sizeof(struct mpls_route *) * limit;
-- 
cgit 


From 98fc43864af9e74116eec81c290db048cded15d8 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sun, 1 Mar 2015 09:10:13 +0200
Subject: nl80211: prohibit mixing 'any' and regular wowlan triggers

If the device supports waking up on 'any' signal - i.e. it continues
operating as usual and wakes up the host on pretty much anything that
happens, then it makes no sense to also configure the more restricted
WoWLAN mode where the device operates more autonomously but also in a
more restricted fashion.

Currently only cw2100 supports both 'any' and other triggers, but it
seems to be broken as it doesn't configure anything to the device, so
we can't currently get into a situation where both even can correctly
be configured. This is about to change (Intel devices are going to
support both and have different behaviour depending on configuration)
so make sure the conflicting modes cannot be configured.

(It seems that cw2100 advertises 'any' and 'disconnect' as a means of
saying that's what it will always do, but that isn't really the way
this API was meant to be used nor does it actually mean anything as
'any' always implies 'disconnect' already, and the driver doesn't
change device configuration in any way depending on the settings.)

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h |  2 ++
 net/wireless/nl80211.c       | 22 ++++++++++++++++++++++
 2 files changed, 24 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 90c5aeb3cca7..37e7f39441e5 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3708,6 +3708,8 @@ struct nl80211_pattern_support {
  * @NL80211_WOWLAN_TRIG_ANY: wake up on any activity, do not really put
  *	the chip into a special state -- works best with chips that have
  *	support for low-power operation already (flag)
+ *	Note that this mode is incompatible with all of the others, if
+ *	any others are even supported by the device.
  * @NL80211_WOWLAN_TRIG_DISCONNECT: wake up on disconnect, the way disconnect
  *	is detected is implementation-specific (flag)
  * @NL80211_WOWLAN_TRIG_MAGIC_PKT: wake up on magic packet (6x 0xff, followed
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 01874628ae00..07cef3d7653e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -9105,6 +9105,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
 	const struct wiphy_wowlan_support *wowlan = rdev->wiphy.wowlan;
 	int err, i;
 	bool prev_enabled = rdev->wiphy.wowlan_config;
+	bool regular = false;
 
 	if (!wowlan)
 		return -EOPNOTSUPP;
@@ -9132,12 +9133,14 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
 		if (!(wowlan->flags & WIPHY_WOWLAN_DISCONNECT))
 			return -EINVAL;
 		new_triggers.disconnect = true;
+		regular = true;
 	}
 
 	if (tb[NL80211_WOWLAN_TRIG_MAGIC_PKT]) {
 		if (!(wowlan->flags & WIPHY_WOWLAN_MAGIC_PKT))
 			return -EINVAL;
 		new_triggers.magic_pkt = true;
+		regular = true;
 	}
 
 	if (tb[NL80211_WOWLAN_TRIG_GTK_REKEY_SUPPORTED])
@@ -9147,24 +9150,28 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
 		if (!(wowlan->flags & WIPHY_WOWLAN_GTK_REKEY_FAILURE))
 			return -EINVAL;
 		new_triggers.gtk_rekey_failure = true;
+		regular = true;
 	}
 
 	if (tb[NL80211_WOWLAN_TRIG_EAP_IDENT_REQUEST]) {
 		if (!(wowlan->flags & WIPHY_WOWLAN_EAP_IDENTITY_REQ))
 			return -EINVAL;
 		new_triggers.eap_identity_req = true;
+		regular = true;
 	}
 
 	if (tb[NL80211_WOWLAN_TRIG_4WAY_HANDSHAKE]) {
 		if (!(wowlan->flags & WIPHY_WOWLAN_4WAY_HANDSHAKE))
 			return -EINVAL;
 		new_triggers.four_way_handshake = true;
+		regular = true;
 	}
 
 	if (tb[NL80211_WOWLAN_TRIG_RFKILL_RELEASE]) {
 		if (!(wowlan->flags & WIPHY_WOWLAN_RFKILL_RELEASE))
 			return -EINVAL;
 		new_triggers.rfkill_release = true;
+		regular = true;
 	}
 
 	if (tb[NL80211_WOWLAN_TRIG_PKT_PATTERN]) {
@@ -9173,6 +9180,8 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
 		int rem, pat_len, mask_len, pkt_offset;
 		struct nlattr *pat_tb[NUM_NL80211_PKTPAT];
 
+		regular = true;
+
 		nla_for_each_nested(pat, tb[NL80211_WOWLAN_TRIG_PKT_PATTERN],
 				    rem)
 			n_patterns++;
@@ -9234,6 +9243,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	if (tb[NL80211_WOWLAN_TRIG_TCP_CONNECTION]) {
+		regular = true;
 		err = nl80211_parse_wowlan_tcp(
 			rdev, tb[NL80211_WOWLAN_TRIG_TCP_CONNECTION],
 			&new_triggers);
@@ -9242,6 +9252,7 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	if (tb[NL80211_WOWLAN_TRIG_NET_DETECT]) {
+		regular = true;
 		err = nl80211_parse_wowlan_nd(
 			rdev, wowlan, tb[NL80211_WOWLAN_TRIG_NET_DETECT],
 			&new_triggers);
@@ -9249,6 +9260,17 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info)
 			goto error;
 	}
 
+	/* The 'any' trigger means the device continues operating more or less
+	 * as in its normal operation mode and wakes up the host on most of the
+	 * normal interrupts (like packet RX, ...)
+	 * It therefore makes little sense to combine with the more constrained
+	 * wakeup trigger modes.
+	 */
+	if (new_triggers.any && regular) {
+		err = -EINVAL;
+		goto error;
+	}
+
 	ntrig = kmemdup(&new_triggers, sizeof(new_triggers), GFP_KERNEL);
 	if (!ntrig) {
 		err = -ENOMEM;
-- 
cgit 


From f3dddf2432e3123ef34b470129295641f7513d26 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Wed, 4 Mar 2015 14:10:26 -0800
Subject: HID: map telephony usage page

Currently HID code maps usages from telephony page into BTN_0, BTN_1, etc
keys which get interpreted by mousedev and userspace as left/right/middle
button clicks, which is not really helpful.

This change adds mappings for usages that have corresponding input event
definitions, and leaves the rest unmapped. This can be changed when
there are userspace consumers for more telephony usages.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-input.c    | 23 +++++++++++++++++++++++
 include/linux/hid.h        |  1 +
 include/uapi/linux/input.h |  4 ++++
 3 files changed, 28 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 052869d0ab78..19603efc8fa2 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -711,6 +711,29 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
 		}
 		break;
 
+	case HID_UP_TELEPHONY:
+		switch (usage->hid & HID_USAGE) {
+		case 0x2f: map_key_clear(KEY_MICMUTE);		break;
+		case 0xb0: map_key_clear(KEY_NUMERIC_0);	break;
+		case 0xb1: map_key_clear(KEY_NUMERIC_1);	break;
+		case 0xb2: map_key_clear(KEY_NUMERIC_2);	break;
+		case 0xb3: map_key_clear(KEY_NUMERIC_3);	break;
+		case 0xb4: map_key_clear(KEY_NUMERIC_4);	break;
+		case 0xb5: map_key_clear(KEY_NUMERIC_5);	break;
+		case 0xb6: map_key_clear(KEY_NUMERIC_6);	break;
+		case 0xb7: map_key_clear(KEY_NUMERIC_7);	break;
+		case 0xb8: map_key_clear(KEY_NUMERIC_8);	break;
+		case 0xb9: map_key_clear(KEY_NUMERIC_9);	break;
+		case 0xba: map_key_clear(KEY_NUMERIC_STAR);	break;
+		case 0xbb: map_key_clear(KEY_NUMERIC_POUND);	break;
+		case 0xbc: map_key_clear(KEY_NUMERIC_A);	break;
+		case 0xbd: map_key_clear(KEY_NUMERIC_B);	break;
+		case 0xbe: map_key_clear(KEY_NUMERIC_C);	break;
+		case 0xbf: map_key_clear(KEY_NUMERIC_D);	break;
+		default: goto ignore;
+		}
+		break;
+
 	case HID_UP_CONSUMER:	/* USB HUT v1.12, pages 75-84 */
 		switch (usage->hid & HID_USAGE) {
 		case 0x000: goto ignore;
diff --git a/include/linux/hid.h b/include/linux/hid.h
index efc7787a41a8..69f9cf7f078d 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -159,6 +159,7 @@ struct hid_item {
 #define HID_UP_LED		0x00080000
 #define HID_UP_BUTTON		0x00090000
 #define HID_UP_ORDINAL		0x000a0000
+#define HID_UP_TELEPHONY	0x000b0000
 #define HID_UP_CONSUMER		0x000c0000
 #define HID_UP_DIGITIZER	0x000d0000
 #define HID_UP_PID		0x000f0000
diff --git a/include/uapi/linux/input.h b/include/uapi/linux/input.h
index b0a813079852..2b628c316882 100644
--- a/include/uapi/linux/input.h
+++ b/include/uapi/linux/input.h
@@ -702,6 +702,10 @@ struct input_keymap_entry {
 #define KEY_NUMERIC_9		0x209
 #define KEY_NUMERIC_STAR	0x20a
 #define KEY_NUMERIC_POUND	0x20b
+#define KEY_NUMERIC_A		0x20c	/* Phone key A - HUT Telephony 0xb9 */
+#define KEY_NUMERIC_B		0x20d
+#define KEY_NUMERIC_C		0x20e
+#define KEY_NUMERIC_D		0x20f
 
 #define KEY_CAMERA_FOCUS	0x210
 #define KEY_WPS_BUTTON		0x211	/* WiFi Protected Setup key */
-- 
cgit 


From 842a9ae08a25671db3d4f689eed68b4d64be15b5 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni@codeaurora.org>
Date: Wed, 4 Mar 2015 12:54:21 +0200
Subject: bridge: Extend Proxy ARP design to allow optional rules for Wi-Fi

This extends the design in commit 958501163ddd ("bridge: Add support for
IEEE 802.11 Proxy ARP") with optional set of rules that are needed to
meet the IEEE 802.11 and Hotspot 2.0 requirements for ProxyARP. The
previously added BR_PROXYARP behavior is left as-is and a new
BR_PROXYARP_WIFI alternative is added so that this behavior can be
configured from user space when required.

In addition, this enables proxyarp functionality for unicast ARP
requests for both BR_PROXYARP and BR_PROXYARP_WIFI since it is possible
to use unicast as well as broadcast for these frames.

The key differences in functionality:

BR_PROXYARP:
- uses the flag on the bridge port on which the request frame was
  received to determine whether to reply
- block bridge port flooding completely on ports that enable proxy ARP

BR_PROXYARP_WIFI:
- uses the flag on the bridge port to which the target device of the
  request belongs
- block bridge port flooding selectively based on whether the proxyarp
  functionality replied

Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_bridge.h    |  1 +
 include/uapi/linux/if_link.h |  1 +
 net/bridge/br_forward.c      |  3 +++
 net/bridge/br_input.c        | 17 ++++++++++-------
 net/bridge/br_netlink.c      |  5 ++++-
 net/bridge/br_private.h      |  1 +
 net/bridge/br_sysfs_if.c     |  2 ++
 7 files changed, 22 insertions(+), 8 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index a57bca2ea97e..dad8b00beed2 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -44,6 +44,7 @@ struct br_ip_list {
 #define BR_PROMISC		BIT(7)
 #define BR_PROXYARP		BIT(8)
 #define BR_LEARNING_SYNC	BIT(9)
+#define BR_PROXYARP_WIFI	BIT(10)
 
 extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
 
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index dfd0bb22e554..756436e1ce89 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -247,6 +247,7 @@ enum {
 	IFLA_BRPORT_UNICAST_FLOOD, /* flood unicast traffic */
 	IFLA_BRPORT_PROXYARP,	/* proxy ARP */
 	IFLA_BRPORT_LEARNING_SYNC, /* mac learning sync from device */
+	IFLA_BRPORT_PROXYARP_WIFI, /* proxy ARP for Wi-Fi */
 	__IFLA_BRPORT_MAX
 };
 #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index f96933a823e3..1238fabff874 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -188,6 +188,9 @@ static void br_flood(struct net_bridge *br, struct sk_buff *skb,
 		/* Do not flood to ports that enable proxy ARP */
 		if (p->flags & BR_PROXYARP)
 			continue;
+		if ((p->flags & BR_PROXYARP_WIFI) &&
+		    BR_INPUT_SKB_CB(skb)->proxyarp_replied)
+			continue;
 
 		prev = maybe_deliver(prev, p, skb, __packet_hook);
 		if (IS_ERR(prev))
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index e2aa7be3a847..052c5ebbc947 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -60,7 +60,7 @@ static int br_pass_frame_up(struct sk_buff *skb)
 }
 
 static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
-			    u16 vid)
+			    u16 vid, struct net_bridge_port *p)
 {
 	struct net_device *dev = br->dev;
 	struct neighbour *n;
@@ -68,6 +68,8 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
 	u8 *arpptr, *sha;
 	__be32 sip, tip;
 
+	BR_INPUT_SKB_CB(skb)->proxyarp_replied = false;
+
 	if (dev->flags & IFF_NOARP)
 		return;
 
@@ -105,9 +107,12 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br,
 		}
 
 		f = __br_fdb_get(br, n->ha, vid);
-		if (f)
+		if (f && ((p->flags & BR_PROXYARP) ||
+			  (f->dst && (f->dst->flags & BR_PROXYARP_WIFI)))) {
 			arp_send(ARPOP_REPLY, ETH_P_ARP, sip, skb->dev, tip,
 				 sha, n->ha, sha);
+			BR_INPUT_SKB_CB(skb)->proxyarp_replied = true;
+		}
 
 		neigh_release(n);
 	}
@@ -153,12 +158,10 @@ int br_handle_frame_finish(struct sk_buff *skb)
 
 	dst = NULL;
 
-	if (is_broadcast_ether_addr(dest)) {
-		if (IS_ENABLED(CONFIG_INET) &&
-		    p->flags & BR_PROXYARP &&
-		    skb->protocol == htons(ETH_P_ARP))
-			br_do_proxy_arp(skb, br, vid);
+	if (IS_ENABLED(CONFIG_INET) && skb->protocol == htons(ETH_P_ARP))
+		br_do_proxy_arp(skb, br, vid, p);
 
+	if (is_broadcast_ether_addr(dest)) {
 		skb2 = skb;
 		unicast = false;
 	} else if (is_multicast_ether_addr(dest)) {
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index c72083968768..8bc6b67457dc 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -143,7 +143,9 @@ static int br_port_fill_attrs(struct sk_buff *skb,
 	    nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE)) ||
 	    nla_put_u8(skb, IFLA_BRPORT_LEARNING, !!(p->flags & BR_LEARNING)) ||
 	    nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, !!(p->flags & BR_FLOOD)) ||
-	    nla_put_u8(skb, IFLA_BRPORT_PROXYARP, !!(p->flags & BR_PROXYARP)))
+	    nla_put_u8(skb, IFLA_BRPORT_PROXYARP, !!(p->flags & BR_PROXYARP)) ||
+	    nla_put_u8(skb, IFLA_BRPORT_PROXYARP_WIFI,
+		       !!(p->flags & BR_PROXYARP_WIFI)))
 		return -EMSGSIZE;
 
 	return 0;
@@ -553,6 +555,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
 	br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING);
 	br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD);
 	br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP, BR_PROXYARP);
+	br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP_WIFI, BR_PROXYARP_WIFI);
 
 	if (tb[IFLA_BRPORT_COST]) {
 		err = br_stp_set_path_cost(p, nla_get_u32(tb[IFLA_BRPORT_COST]));
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index de0919975a25..c32e279c62f8 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -305,6 +305,7 @@ struct br_input_skb_cb {
 #endif
 
 	u16 frag_max_size;
+	bool proxyarp_replied;
 
 #ifdef CONFIG_BRIDGE_VLAN_FILTERING
 	bool vlan_filtered;
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index 2de5d91199e8..4905845a94e9 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -171,6 +171,7 @@ BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK);
 BRPORT_ATTR_FLAG(learning, BR_LEARNING);
 BRPORT_ATTR_FLAG(unicast_flood, BR_FLOOD);
 BRPORT_ATTR_FLAG(proxyarp, BR_PROXYARP);
+BRPORT_ATTR_FLAG(proxyarp_wifi, BR_PROXYARP_WIFI);
 
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
 static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf)
@@ -215,6 +216,7 @@ static const struct brport_attribute *brport_attrs[] = {
 	&brport_attr_multicast_fast_leave,
 #endif
 	&brport_attr_proxyarp,
+	&brport_attr_proxyarp_wifi,
 	NULL
 };
 
-- 
cgit 


From 1cae565e8b746f484f1ff1b71d2a1c89d7cf0668 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 5 Mar 2015 15:05:36 +0100
Subject: netfilter: nf_tables: limit maximum table name length to 32 bytes

Set the same as we use for chain names, it should be enough.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h        | 2 +-
 include/uapi/linux/netfilter/nf_tables.h | 1 +
 net/netfilter/nf_tables_api.c            | 7 ++++---
 3 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 04188b47629d..a143acafa5d9 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -535,7 +535,7 @@ struct nft_table {
 	u64				hgenerator;
 	u32				use;
 	u16				flags;
-	char				name[];
+	char				name[NFT_TABLE_MAXNAMELEN];
 };
 
 /**
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 832bc46db78b..b9783931503b 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -1,6 +1,7 @@
 #ifndef _LINUX_NF_TABLES_H
 #define _LINUX_NF_TABLES_H
 
+#define NFT_TABLE_MAXNAMELEN	32
 #define NFT_CHAIN_MAXNAMELEN	32
 #define NFT_USERDATA_MAXLEN	256
 
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 199fd0f27b0e..284b20ce566b 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -401,7 +401,8 @@ nf_tables_chain_type_lookup(const struct nft_af_info *afi,
 }
 
 static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
-	[NFTA_TABLE_NAME]	= { .type = NLA_STRING },
+	[NFTA_TABLE_NAME]	= { .type = NLA_STRING,
+				    .len = NFT_TABLE_MAXNAMELEN - 1 },
 	[NFTA_TABLE_FLAGS]	= { .type = NLA_U32 },
 };
 
@@ -686,13 +687,13 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
 	if (!try_module_get(afi->owner))
 		return -EAFNOSUPPORT;
 
-	table = kzalloc(sizeof(*table) + nla_len(name), GFP_KERNEL);
+	table = kzalloc(sizeof(*table), GFP_KERNEL);
 	if (table == NULL) {
 		module_put(afi->owner);
 		return -ENOMEM;
 	}
 
-	nla_strlcpy(table->name, name, nla_len(name));
+	nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN);
 	INIT_LIST_HEAD(&table->chains);
 	INIT_LIST_HEAD(&table->sets);
 	table->flags = flags;
-- 
cgit 


From d0f91938bede204a343473792529e0db7d599836 Mon Sep 17 00:00:00 2001
From: Erik Hugne <erik.hugne@ericsson.com>
Date: Thu, 5 Mar 2015 10:23:49 +0100
Subject: tipc: add ip/udp media type

The ip/udp bearer can be configured in a point-to-point
mode by specifying both local and remote ip/hostname,
or it can be enabled in multicast mode, where links are
established to all tipc nodes that have joined the same
multicast group. The multicast IP address is generated
based on the TIPC network ID, but can be overridden by
using another multicast address as remote ip.

Signed-off-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc_netlink.h |   9 +
 net/tipc/Kconfig                  |   8 +
 net/tipc/Makefile                 |   1 +
 net/tipc/bearer.c                 |  13 +-
 net/tipc/bearer.h                 |  12 +-
 net/tipc/msg.h                    |   2 +-
 net/tipc/udp_media.c              | 442 ++++++++++++++++++++++++++++++++++++++
 7 files changed, 479 insertions(+), 8 deletions(-)
 create mode 100644 net/tipc/udp_media.c

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h
index 8d723824ad69..d4c8f142ba63 100644
--- a/include/uapi/linux/tipc_netlink.h
+++ b/include/uapi/linux/tipc_netlink.h
@@ -83,11 +83,20 @@ enum {
 	TIPC_NLA_BEARER_NAME,		/* string */
 	TIPC_NLA_BEARER_PROP,		/* nest */
 	TIPC_NLA_BEARER_DOMAIN,		/* u32 */
+	TIPC_NLA_BEARER_UDP_OPTS,	/* nest */
 
 	__TIPC_NLA_BEARER_MAX,
 	TIPC_NLA_BEARER_MAX = __TIPC_NLA_BEARER_MAX - 1
 };
 
+enum {
+	TIPC_NLA_UDP_UNSPEC,
+	TIPC_NLA_UDP_LOCAL,		/* sockaddr_storage */
+	TIPC_NLA_UDP_REMOTE,		/* sockaddr_storage */
+
+	__TIPC_NLA_UDP_MAX,
+	TIPC_NLA_UDP_MAX = __TIPC_NLA_UDP_MAX - 1
+};
 /* Socket info */
 enum {
 	TIPC_NLA_SOCK_UNSPEC,
diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig
index 91c8a8e031db..c25a3a149dc4 100644
--- a/net/tipc/Kconfig
+++ b/net/tipc/Kconfig
@@ -26,3 +26,11 @@ config TIPC_MEDIA_IB
 	help
 	  Saying Y here will enable support for running TIPC on
 	  IP-over-InfiniBand devices.
+config TIPC_MEDIA_UDP
+	bool "IP/UDP media type support"
+	depends on TIPC
+	select NET_UDP_TUNNEL
+	help
+	  Saying Y here will enable support for running TIPC over IP/UDP
+	bool
+	default y
diff --git a/net/tipc/Makefile b/net/tipc/Makefile
index 599b1a540d2b..57e460be4692 100644
--- a/net/tipc/Makefile
+++ b/net/tipc/Makefile
@@ -10,5 +10,6 @@ tipc-y	+= addr.o bcast.o bearer.o \
 	   netlink.o netlink_compat.o node.o socket.o eth_media.o \
 	   server.o socket.o
 
+tipc-$(CONFIG_TIPC_MEDIA_UDP)	+= udp_media.o
 tipc-$(CONFIG_TIPC_MEDIA_IB)	+= ib_media.o
 tipc-$(CONFIG_SYSCTL)		+= sysctl.o
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index af6deeb397a8..840db89e4283 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -47,6 +47,9 @@ static struct tipc_media * const media_info_array[] = {
 	&eth_media_info,
 #ifdef CONFIG_TIPC_MEDIA_IB
 	&ib_media_info,
+#endif
+#ifdef CONFIG_TIPC_MEDIA_UDP
+	&udp_media_info,
 #endif
 	NULL
 };
@@ -216,7 +219,8 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest)
  * tipc_enable_bearer - enable bearer with the given name
  */
 static int tipc_enable_bearer(struct net *net, const char *name,
-			      u32 disc_domain, u32 priority)
+			      u32 disc_domain, u32 priority,
+			      struct nlattr *attr[])
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_bearer *b_ptr;
@@ -304,7 +308,7 @@ restart:
 
 	strcpy(b_ptr->name, name);
 	b_ptr->media = m_ptr;
-	res = m_ptr->enable_media(net, b_ptr);
+	res = m_ptr->enable_media(net, b_ptr, attr);
 	if (res) {
 		pr_warn("Bearer <%s> rejected, enable failure (%d)\n",
 			name, -res);
@@ -372,7 +376,8 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr,
 	kfree_rcu(b_ptr, rcu);
 }
 
-int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b)
+int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
+			 struct nlattr *attr[])
 {
 	struct net_device *dev;
 	char *driver_name = strchr((const char *)b->name, ':') + 1;
@@ -791,7 +796,7 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	rtnl_lock();
-	err = tipc_enable_bearer(net, bearer, domain, prio);
+	err = tipc_enable_bearer(net, bearer, domain, prio, attrs);
 	if (err) {
 		rtnl_unlock();
 		return err;
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 097aff08ad5b..5cad243ee8fc 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -41,7 +41,7 @@
 #include <net/genetlink.h>
 
 #define MAX_BEARERS	2
-#define MAX_MEDIA	2
+#define MAX_MEDIA	3
 #define MAX_NODES	4096
 #define WSIZE		32
 
@@ -59,6 +59,7 @@
  */
 #define TIPC_MEDIA_TYPE_ETH	1
 #define TIPC_MEDIA_TYPE_IB	2
+#define TIPC_MEDIA_TYPE_UDP	3
 
 /**
  * struct tipc_node_map - set of node identifiers
@@ -104,7 +105,8 @@ struct tipc_media {
 	int (*send_msg)(struct net *net, struct sk_buff *buf,
 			struct tipc_bearer *b_ptr,
 			struct tipc_media_addr *dest);
-	int (*enable_media)(struct net *net, struct tipc_bearer *b_ptr);
+	int (*enable_media)(struct net *net, struct tipc_bearer *b_ptr,
+			    struct nlattr *attr[]);
 	void (*disable_media)(struct tipc_bearer *b_ptr);
 	int (*addr2str)(struct tipc_media_addr *addr,
 			char *strbuf,
@@ -183,6 +185,9 @@ extern struct tipc_media eth_media_info;
 #ifdef CONFIG_TIPC_MEDIA_IB
 extern struct tipc_media ib_media_info;
 #endif
+#ifdef CONFIG_TIPC_MEDIA_UDP
+extern struct tipc_media udp_media_info;
+#endif
 
 int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info);
 int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info);
@@ -197,7 +202,8 @@ int tipc_nl_media_set(struct sk_buff *skb, struct genl_info *info);
 int tipc_media_set_priority(const char *name, u32 new_value);
 int tipc_media_set_window(const char *name, u32 new_value);
 void tipc_media_addr_printf(char *buf, int len, struct tipc_media_addr *a);
-int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b);
+int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
+			 struct nlattr *attrs[]);
 void tipc_disable_l2_media(struct tipc_bearer *b);
 int tipc_l2_send_msg(struct net *net, struct sk_buff *buf,
 		     struct tipc_bearer *b, struct tipc_media_addr *dest);
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index c1cc8d7a5d52..fa167846d1ab 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -87,7 +87,7 @@ struct plist;
  * Note: Headroom should be a multiple of 4 to ensure the TIPC header fields
  *       are word aligned for quicker access
  */
-#define BUF_HEADROOM LL_MAX_HEADER
+#define BUF_HEADROOM (LL_MAX_HEADER + 48)
 
 struct tipc_skb_cb {
 	void *handle;
diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
new file mode 100644
index 000000000000..0d10001db40d
--- /dev/null
+++ b/net/tipc/udp_media.c
@@ -0,0 +1,442 @@
+/* net/tipc/udp_media.c: IP bearer support for TIPC
+ *
+ * Copyright (c) 2015, Ericsson AB
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the names of the copyright holders nor the names of its
+ *    contributors may be used to endorse or promote products derived from
+ *    this software without specific prior written permission.
+ *
+ * Alternatively, this software may be distributed under the terms of the
+ * GNU General Public License ("GPL") version 2 as published by the Free
+ * Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/socket.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/inet.h>
+#include <linux/inetdevice.h>
+#include <linux/igmp.h>
+#include <linux/kernel.h>
+#include <linux/workqueue.h>
+#include <linux/list.h>
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/udp_tunnel.h>
+#include <linux/tipc_netlink.h>
+#include "core.h"
+#include "bearer.h"
+
+/* IANA assigned UDP port */
+#define UDP_PORT_DEFAULT	6118
+
+static const struct nla_policy tipc_nl_udp_policy[TIPC_NLA_UDP_MAX + 1] = {
+	[TIPC_NLA_UDP_UNSPEC]	= {.type = NLA_UNSPEC},
+	[TIPC_NLA_UDP_LOCAL]	= {.type = NLA_BINARY,
+				   .len = sizeof(struct sockaddr_storage)},
+	[TIPC_NLA_UDP_REMOTE]	= {.type = NLA_BINARY,
+				   .len = sizeof(struct sockaddr_storage)},
+};
+
+/**
+ * struct udp_media_addr - IP/UDP addressing information
+ *
+ * This is the bearer level originating address used in neighbor discovery
+ * messages, and all fields should be in network byte order
+ */
+struct udp_media_addr {
+	__be16	proto;
+	__be16	udp_port;
+	union {
+		struct in_addr ipv4;
+		struct in6_addr ipv6;
+	};
+};
+
+/**
+ * struct udp_bearer - ip/udp bearer data structure
+ * @bearer:	associated generic tipc bearer
+ * @ubsock:	bearer associated socket
+ * @ifindex:	local address scope
+ * @work:	used to schedule deferred work on a bearer
+ */
+struct udp_bearer {
+	struct tipc_bearer __rcu *bearer;
+	struct socket *ubsock;
+	u32 ifindex;
+	struct work_struct work;
+};
+
+/* udp_media_addr_set - convert a ip/udp address to a TIPC media address */
+static void tipc_udp_media_addr_set(struct tipc_media_addr *addr,
+				    struct udp_media_addr *ua)
+{
+	memset(addr, 0, sizeof(struct tipc_media_addr));
+	addr->media_id = TIPC_MEDIA_TYPE_UDP;
+	memcpy(addr->value, ua, sizeof(struct udp_media_addr));
+	if (ntohs(ua->proto) == ETH_P_IP) {
+		if (ipv4_is_multicast(ua->ipv4.s_addr))
+			addr->broadcast = 1;
+	} else if (ntohs(ua->proto) == ETH_P_IPV6) {
+		if (ipv6_addr_type(&ua->ipv6) & IPV6_ADDR_MULTICAST)
+			addr->broadcast = 1;
+	} else {
+		pr_err("Invalid UDP media address\n");
+	}
+}
+
+/* tipc_udp_addr2str - convert ip/udp address to string */
+static int tipc_udp_addr2str(struct tipc_media_addr *a, char *buf, int size)
+{
+	struct udp_media_addr *ua = (struct udp_media_addr *)&a->value;
+
+	if (ntohs(ua->proto) == ETH_P_IP)
+		snprintf(buf, size, "%pI4:%u", &ua->ipv4, ntohs(ua->udp_port));
+	else if (ntohs(ua->proto) == ETH_P_IPV6)
+		snprintf(buf, size, "%pI6:%u", &ua->ipv6, ntohs(ua->udp_port));
+	else
+		pr_err("Invalid UDP media address\n");
+	return 0;
+}
+
+/* tipc_udp_msg2addr - extract an ip/udp address from a TIPC ndisc message */
+static int tipc_udp_msg2addr(struct tipc_bearer *b, struct tipc_media_addr *a,
+			     char *msg)
+{
+	struct udp_media_addr *ua;
+
+	ua = (struct udp_media_addr *) (msg + TIPC_MEDIA_ADDR_OFFSET);
+	if (msg[TIPC_MEDIA_TYPE_OFFSET] != TIPC_MEDIA_TYPE_UDP)
+		return -EINVAL;
+	tipc_udp_media_addr_set(a, ua);
+	return 0;
+}
+
+/* tipc_udp_addr2msg - write an ip/udp address to a TIPC ndisc message */
+static int tipc_udp_addr2msg(char *msg, struct tipc_media_addr *a)
+{
+	memset(msg, 0, TIPC_MEDIA_INFO_SIZE);
+	msg[TIPC_MEDIA_TYPE_OFFSET] = TIPC_MEDIA_TYPE_UDP;
+	memcpy(msg + TIPC_MEDIA_ADDR_OFFSET, a->value,
+	       sizeof(struct udp_media_addr));
+	return 0;
+}
+
+/* tipc_send_msg - enqueue a send request */
+static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb,
+			     struct tipc_bearer *b,
+			     struct tipc_media_addr *dest)
+{
+	int ttl, err = 0;
+	struct udp_bearer *ub;
+	struct udp_media_addr *dst = (struct udp_media_addr *)&dest->value;
+	struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value;
+	struct sk_buff *clone;
+	struct rtable *rt;
+
+	clone = skb_clone(skb, GFP_ATOMIC);
+	skb_set_inner_protocol(clone, htons(ETH_P_TIPC));
+	ub = rcu_dereference_rtnl(b->media_ptr);
+	if (!ub) {
+		err = -ENODEV;
+		goto tx_error;
+	}
+	if (htons(dst->proto) == ETH_P_IP) {
+		struct flowi4 fl = {
+			.daddr = dst->ipv4.s_addr,
+			.saddr = src->ipv4.s_addr,
+			.flowi4_mark = clone->mark,
+			.flowi4_proto = IPPROTO_UDP
+		};
+		rt = ip_route_output_key(net, &fl);
+		if (IS_ERR(rt)) {
+			err = PTR_ERR(rt);
+			goto tx_error;
+		}
+		ttl = ip4_dst_hoplimit(&rt->dst);
+		err = udp_tunnel_xmit_skb(rt, clone, src->ipv4.s_addr,
+					  dst->ipv4.s_addr, 0, ttl, 0,
+					  src->udp_port, dst->udp_port,
+					  false, true);
+		if (err < 0) {
+			ip_rt_put(rt);
+			goto tx_error;
+		}
+#if IS_ENABLED(CONFIG_IPV6)
+	} else {
+		struct dst_entry *ndst;
+		struct flowi6 fl6 = {
+			.flowi6_oif = ub->ifindex,
+			.daddr = dst->ipv6,
+			.saddr = src->ipv6,
+			.flowi6_proto = IPPROTO_UDP
+		};
+		err = ip6_dst_lookup(ub->ubsock->sk, &ndst, &fl6);
+		if (err)
+			goto tx_error;
+		ttl = ip6_dst_hoplimit(ndst);
+		err = udp_tunnel6_xmit_skb(ndst, clone, ndst->dev, &src->ipv6,
+					   &dst->ipv6, 0, ttl, src->udp_port,
+					   dst->udp_port, false);
+#endif
+	}
+	return err;
+
+tx_error:
+	kfree_skb(clone);
+	return err;
+}
+
+/* tipc_udp_recv - read data from bearer socket */
+static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb)
+{
+	struct udp_bearer *ub;
+	struct tipc_bearer *b;
+
+	ub = rcu_dereference_sk_user_data(sk);
+	if (!ub) {
+		pr_err_ratelimited("Failed to get UDP bearer reference");
+		kfree_skb(skb);
+		return 0;
+	}
+
+	skb_pull(skb, sizeof(struct udphdr));
+	rcu_read_lock();
+	b = rcu_dereference_rtnl(ub->bearer);
+
+	if (b) {
+		tipc_rcv(sock_net(sk), skb, b);
+		rcu_read_unlock();
+		return 0;
+	}
+	rcu_read_unlock();
+	kfree_skb(skb);
+	return 0;
+}
+
+static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote)
+{
+	int err = 0;
+	struct ip_mreqn mreqn;
+	struct sock *sk = ub->ubsock->sk;
+
+	if (ntohs(remote->proto) == ETH_P_IP) {
+		if (!ipv4_is_multicast(remote->ipv4.s_addr))
+			return 0;
+		mreqn.imr_multiaddr = remote->ipv4;
+		mreqn.imr_ifindex = ub->ifindex;
+		err = __ip_mc_join_group(sk, &mreqn);
+	} else {
+		if (!ipv6_addr_is_multicast(&remote->ipv6))
+			return 0;
+		err = __ipv6_sock_mc_join(sk, ub->ifindex, &remote->ipv6);
+	}
+	return err;
+}
+
+/**
+ * parse_options - build local/remote addresses from configuration
+ * @attrs:	netlink config data
+ * @ub:		UDP bearer instance
+ * @local:	local bearer IP address/port
+ * @remote:	peer or multicast IP/port
+ */
+static int parse_options(struct nlattr *attrs[], struct udp_bearer *ub,
+			 struct udp_media_addr *local,
+			 struct udp_media_addr *remote)
+{
+	struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
+	struct sockaddr_storage *sa_local, *sa_remote;
+
+	if (!attrs[TIPC_NLA_BEARER_UDP_OPTS])
+		goto err;
+	if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX,
+			     attrs[TIPC_NLA_BEARER_UDP_OPTS],
+			     tipc_nl_udp_policy))
+		goto err;
+	if (opts[TIPC_NLA_UDP_LOCAL] && opts[TIPC_NLA_UDP_REMOTE]) {
+		sa_local = nla_data(opts[TIPC_NLA_UDP_LOCAL]);
+		sa_remote = nla_data(opts[TIPC_NLA_UDP_REMOTE]);
+	} else {
+err:
+		pr_err("Invalid UDP bearer configuration");
+		return -EINVAL;
+	}
+	if ((sa_local->ss_family & sa_remote->ss_family) == AF_INET) {
+		struct sockaddr_in *ip4;
+
+		ip4 = (struct sockaddr_in *)sa_local;
+		local->proto = htons(ETH_P_IP);
+		local->udp_port = ip4->sin_port;
+		local->ipv4.s_addr = ip4->sin_addr.s_addr;
+
+		ip4 = (struct sockaddr_in *)sa_remote;
+		remote->proto = htons(ETH_P_IP);
+		remote->udp_port = ip4->sin_port;
+		remote->ipv4.s_addr = ip4->sin_addr.s_addr;
+		return 0;
+
+#if IS_ENABLED(CONFIG_IPV6)
+	} else if ((sa_local->ss_family & sa_remote->ss_family) == AF_INET6) {
+		struct sockaddr_in6 *ip6;
+
+		ip6 = (struct sockaddr_in6 *)sa_local;
+		local->proto = htons(ETH_P_IPV6);
+		local->udp_port = ip6->sin6_port;
+		local->ipv6 = ip6->sin6_addr;
+		ub->ifindex = ip6->sin6_scope_id;
+
+		ip6 = (struct sockaddr_in6 *)sa_remote;
+		remote->proto = htons(ETH_P_IPV6);
+		remote->udp_port = ip6->sin6_port;
+		remote->ipv6 = ip6->sin6_addr;
+		return 0;
+#endif
+	}
+	return -EADDRNOTAVAIL;
+}
+
+/**
+ * tipc_udp_enable - callback to create a new udp bearer instance
+ * @net:	network namespace
+ * @b:		pointer to generic tipc_bearer
+ * @attrs:	netlink bearer configuration
+ *
+ * validate the bearer parameters and initialize the udp bearer
+ * rtnl_lock should be held
+ */
+static int tipc_udp_enable(struct net *net, struct tipc_bearer *b,
+			   struct nlattr *attrs[])
+{
+	int err = -EINVAL;
+	struct udp_bearer *ub;
+	struct udp_media_addr *remote;
+	struct udp_media_addr local = {0};
+	struct udp_port_cfg udp_conf = {0};
+	struct udp_tunnel_sock_cfg tuncfg = {0};
+
+	ub = kzalloc(sizeof(*ub), GFP_ATOMIC);
+	if (!ub)
+		return -ENOMEM;
+
+	remote = (struct udp_media_addr *)&b->bcast_addr.value;
+	memset(remote, 0, sizeof(struct udp_media_addr));
+	err = parse_options(attrs, ub, &local, remote);
+	if (err)
+		goto err;
+
+	b->bcast_addr.media_id = TIPC_MEDIA_TYPE_UDP;
+	b->bcast_addr.broadcast = 1;
+	rcu_assign_pointer(b->media_ptr, ub);
+	rcu_assign_pointer(ub->bearer, b);
+	tipc_udp_media_addr_set(&b->addr, &local);
+	if (htons(local.proto) == ETH_P_IP) {
+		struct net_device *dev;
+
+		dev = __ip_dev_find(net, local.ipv4.s_addr, false);
+		if (!dev) {
+			err = -ENODEV;
+			goto err;
+		}
+		udp_conf.family = AF_INET;
+		udp_conf.local_ip.s_addr = htonl(INADDR_ANY);
+		udp_conf.use_udp_checksums = false;
+		ub->ifindex = dev->ifindex;
+		b->mtu = dev->mtu - sizeof(struct iphdr)
+			- sizeof(struct udphdr);
+#if IS_ENABLED(CONFIG_IPV6)
+	} else if (htons(local.proto) == ETH_P_IPV6) {
+		udp_conf.family = AF_INET6;
+		udp_conf.use_udp6_tx_checksums = true;
+		udp_conf.use_udp6_rx_checksums = true;
+		udp_conf.local_ip6 = in6addr_any;
+		b->mtu = 1280;
+#endif
+	} else {
+		err = -EAFNOSUPPORT;
+		goto err;
+	}
+	udp_conf.local_udp_port = local.udp_port;
+	err = udp_sock_create(net, &udp_conf, &ub->ubsock);
+	if (err)
+		goto err;
+	tuncfg.sk_user_data = ub;
+	tuncfg.encap_type = 1;
+	tuncfg.encap_rcv = tipc_udp_recv;
+	tuncfg.encap_destroy = NULL;
+	setup_udp_tunnel_sock(net, ub->ubsock, &tuncfg);
+
+	if (enable_mcast(ub, remote))
+		goto err;
+	return 0;
+err:
+	kfree(ub);
+	return err;
+}
+
+/* cleanup_bearer - break the socket/bearer association */
+static void cleanup_bearer(struct work_struct *work)
+{
+	struct udp_bearer *ub = container_of(work, struct udp_bearer, work);
+
+	if (ub->ubsock)
+		udp_tunnel_sock_release(ub->ubsock);
+	synchronize_net();
+	kfree(ub);
+}
+
+/* tipc_udp_disable - detach bearer from socket */
+static void tipc_udp_disable(struct tipc_bearer *b)
+{
+	struct udp_bearer *ub;
+
+	ub = rcu_dereference_rtnl(b->media_ptr);
+	if (!ub) {
+		pr_err("UDP bearer instance not found\n");
+		return;
+	}
+	if (ub->ubsock)
+		sock_set_flag(ub->ubsock->sk, SOCK_DEAD);
+	RCU_INIT_POINTER(b->media_ptr, NULL);
+	RCU_INIT_POINTER(ub->bearer, NULL);
+
+	/* sock_release need to be done outside of rtnl lock */
+	INIT_WORK(&ub->work, cleanup_bearer);
+	schedule_work(&ub->work);
+}
+
+struct tipc_media udp_media_info = {
+	.send_msg	= tipc_udp_send_msg,
+	.enable_media	= tipc_udp_enable,
+	.disable_media	= tipc_udp_disable,
+	.addr2str	= tipc_udp_addr2str,
+	.addr2msg	= tipc_udp_addr2msg,
+	.msg2addr	= tipc_udp_msg2addr,
+	.priority	= TIPC_DEF_LINK_PRI,
+	.tolerance	= TIPC_DEF_LINK_TOL,
+	.window		= TIPC_DEF_LINK_WIN,
+	.type_id	= TIPC_MEDIA_TYPE_UDP,
+	.hwaddr_len	= 0,
+	.name		= "udp"
+};
-- 
cgit 


From 37ed9493699cc5dafe1b8725858ef73176fdc9d2 Mon Sep 17 00:00:00 2001
From: Scott Feldman <sfeldma@gmail.com>
Date: Thu, 5 Mar 2015 21:21:13 -0800
Subject: rtnetlink: add RTNH_F_EXTERNAL flag for fib offload

Add new RTNH_F_EXTERNAL flag to mark fib entries offloaded externally, for
example to a switchdev switch device.

Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/rtnetlink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 06f75a407f74..c3722b024e73 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -334,6 +334,7 @@ struct rtnexthop {
 #define RTNH_F_DEAD		1	/* Nexthop is dead (used by multipath)	*/
 #define RTNH_F_PERVASIVE	2	/* Do recursive gateway lookup	*/
 #define RTNH_F_ONLINK		4	/* Gateway is forced on link	*/
+#define RTNH_F_EXTERNAL		8	/* Route installed externally	*/
 
 /* Macros to handle hexthops */
 
-- 
cgit 


From 05050753602626ed4c46271c689929b625f409e7 Mon Sep 17 00:00:00 2001
From: Ilan peer <ilan.peer@intel.com>
Date: Wed, 4 Mar 2015 00:32:06 -0500
Subject: cfg80211: Add API to change the indoor regulatory setting

Previously, the indoor setting configuration assumed that as
long as a station interface is connected, the indoor environment
setting does not change. However, this assumption is problematic
as:

- It is possible that a station interface is connected to a mobile
  AP, e.g., softAP or a P2P GO, where it is possible that both the
  station and the mobile AP move out of the indoor environment making
  the indoor setting invalid. In such a case, user space has no way to
  invalidate the setting.
- A station interface disconnection does not necessarily imply that
  the device is no longer operating in an indoor environment, e.g.,
  it is possible that the station interface is roaming but is still
  stays indoor.

To handle the above, extend the indoor configuration API to allow
user space to indicate a change of indoor settings, and allow it to
indicate weather it controls the indoor setting, such that:

1. If the user space process explicitly indicates that it is going
   to control the indoor setting, do not clear the indoor setting
   internally, unless the socket is released. The user space process
   should use the NL80211_ATTR_SOCKET_OWNER attribute in the command
   to state that it is going to control the indoor setting.
2. Reset the indoor setting when restoring the regulatory settings in
   case it is not owned by a user space process.

Based on the above, a user space tool that continuously monitors the
indoor settings, i.e., tracking power setting, location etc., can
indicate environment changes to the regulatory core.

It should be noted that currently user space is the only provided mechanism
used to hint to the regulatory core over the indoor/outdoor environment --
while the country IEs do have an environment setting this has been completely
ignored by the regulatory core by design for a while now since country IEs
typically can contain bogus data.

Acked-by: Luis R. Rodriguez <mcgrof@suse.com>
Signed-off-by: ArikX Nemtsov <arik@wizery.com>
Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h |  9 +++++++
 net/wireless/nl80211.c       | 19 ++++++++++++++-
 net/wireless/reg.c           | 57 ++++++++++++++++++++++++++++++++++++++++----
 net/wireless/reg.h           | 15 +++++++++++-
 4 files changed, 94 insertions(+), 6 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 37e7f39441e5..ae16ba9cb1e3 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1697,6 +1697,10 @@ enum nl80211_commands {
  *	If set during scheduled scan start then the new scan req will be
  *	owned by the netlink socket that created it and the scheduled scan will
  *	be stopped when the socket is closed.
+ *	If set during configuration of regulatory indoor operation then the
+ *	regulatory indoor configuration would be owned by the netlink socket
+ *	that configured the indoor setting, and the indoor operation would be
+ *	cleared when the socket is closed.
  *
  * @NL80211_ATTR_TDLS_INITIATOR: flag attribute indicating the current end is
  *	the TDLS link initiator.
@@ -1752,6 +1756,9 @@ enum nl80211_commands {
  *
  * @NL80211_ATTR_SCHED_SCAN_DELAY: delay before a scheduled scan (or a
  *	WoWLAN net-detect scan) is started, u32 in seconds.
+
+ * @NL80211_ATTR_REG_INDOOR: flag attribute, if set indicates that the device
+ *      is operating in an indoor environment.
  *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
@@ -2120,6 +2127,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_SCHED_SCAN_DELAY,
 
+	NL80211_ATTR_REG_INDOOR,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 07cef3d7653e..b02085301785 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -399,6 +399,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_WIPHY_SELF_MANAGED_REG] = { .type = NLA_FLAG },
 	[NL80211_ATTR_NETNS_FD] = { .type = NLA_U32 },
 	[NL80211_ATTR_SCHED_SCAN_DELAY] = { .type = NLA_U32 },
+	[NL80211_ATTR_REG_INDOOR] = { .type = NLA_FLAG },
 };
 
 /* policy for the key attributes */
@@ -4958,7 +4959,10 @@ static int parse_reg_rule(struct nlattr *tb[],
 static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
 {
 	char *data = NULL;
+	bool is_indoor;
 	enum nl80211_user_reg_hint_type user_reg_hint_type;
+	u32 owner_nlportid;
+
 
 	/*
 	 * You should only get this when cfg80211 hasn't yet initialized
@@ -4984,7 +4988,15 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info)
 		data = nla_data(info->attrs[NL80211_ATTR_REG_ALPHA2]);
 		return regulatory_hint_user(data, user_reg_hint_type);
 	case NL80211_USER_REG_HINT_INDOOR:
-		return regulatory_hint_indoor_user();
+		if (info->attrs[NL80211_ATTR_SOCKET_OWNER]) {
+			owner_nlportid = info->snd_portid;
+			is_indoor = !!info->attrs[NL80211_ATTR_REG_INDOOR];
+		} else {
+			owner_nlportid = 0;
+			is_indoor = true;
+		}
+
+		return regulatory_hint_indoor(is_indoor, owner_nlportid);
 	default:
 		return -EINVAL;
 	}
@@ -12810,6 +12822,11 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
 
 	rcu_read_unlock();
 
+	/*
+	 * It is possible that the user space process that is controlling the
+	 * indoor setting disappeared, so notify the regulatory core.
+	 */
+	regulatory_netlink_notify(notify->portid);
 	return NOTIFY_OK;
 }
 
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index c24c8bf3c988..4239dd408137 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -128,9 +128,12 @@ static int reg_num_devs_support_basehint;
  * State variable indicating if the platform on which the devices
  * are attached is operating in an indoor environment. The state variable
  * is relevant for all registered devices.
- * (protected by RTNL)
  */
 static bool reg_is_indoor;
+static spinlock_t reg_indoor_lock;
+
+/* Used to track the userspace process controlling the indoor setting */
+static u32 reg_is_indoor_portid;
 
 static const struct ieee80211_regdomain *get_cfg80211_regdom(void)
 {
@@ -2288,15 +2291,50 @@ int regulatory_hint_user(const char *alpha2,
 	return 0;
 }
 
-int regulatory_hint_indoor_user(void)
+int regulatory_hint_indoor(bool is_indoor, u32 portid)
 {
+	spin_lock(&reg_indoor_lock);
+
+	/* It is possible that more than one user space process is trying to
+	 * configure the indoor setting. To handle such cases, clear the indoor
+	 * setting in case that some process does not think that the device
+	 * is operating in an indoor environment. In addition, if a user space
+	 * process indicates that it is controlling the indoor setting, save its
+	 * portid, i.e., make it the owner.
+	 */
+	reg_is_indoor = is_indoor;
+	if (reg_is_indoor) {
+		if (!reg_is_indoor_portid)
+			reg_is_indoor_portid = portid;
+	} else {
+		reg_is_indoor_portid = 0;
+	}
 
+	spin_unlock(&reg_indoor_lock);
 
-	reg_is_indoor = true;
+	if (!is_indoor)
+		reg_check_channels();
 
 	return 0;
 }
 
+void regulatory_netlink_notify(u32 portid)
+{
+	spin_lock(&reg_indoor_lock);
+
+	if (reg_is_indoor_portid != portid) {
+		spin_unlock(&reg_indoor_lock);
+		return;
+	}
+
+	reg_is_indoor = false;
+	reg_is_indoor_portid = 0;
+
+	spin_unlock(&reg_indoor_lock);
+
+	reg_check_channels();
+}
+
 /* Driver hints */
 int regulatory_hint(struct wiphy *wiphy, const char *alpha2)
 {
@@ -2464,7 +2502,17 @@ static void restore_regulatory_settings(bool reset_user)
 
 	ASSERT_RTNL();
 
-	reg_is_indoor = false;
+	/*
+	 * Clear the indoor setting in case that it is not controlled by user
+	 * space, as otherwise there is no guarantee that the device is still
+	 * operating in an indoor environment.
+	 */
+	spin_lock(&reg_indoor_lock);
+	if (reg_is_indoor && !reg_is_indoor_portid) {
+		reg_is_indoor = false;
+		reg_check_channels();
+	}
+	spin_unlock(&reg_indoor_lock);
 
 	reset_regdomains(true, &world_regdom);
 	restore_alpha2(alpha2, reset_user);
@@ -3061,6 +3109,7 @@ int __init regulatory_init(void)
 
 	spin_lock_init(&reg_requests_lock);
 	spin_lock_init(&reg_pending_beacons_lock);
+	spin_lock_init(&reg_indoor_lock);
 
 	reg_regdb_size_check();
 
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index 4b45d6e61d24..a2c4e16459da 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -25,7 +25,20 @@ enum nl80211_dfs_regions reg_get_dfs_region(struct wiphy *wiphy);
 
 int regulatory_hint_user(const char *alpha2,
 			 enum nl80211_user_reg_hint_type user_reg_hint_type);
-int regulatory_hint_indoor_user(void);
+
+/**
+ * regulatory_hint_indoor - hint operation in indoor env. or not
+ * @is_indoor: if true indicates that user space thinks that the
+ * device is operating in an indoor environment.
+ * @portid: the netlink port ID on which the hint was given.
+ */
+int regulatory_hint_indoor(bool is_indoor, u32 portid);
+
+/**
+ * regulatory_netlink_notify - notify on released netlink socket
+ * @portid: the netlink socket port ID
+ */
+void regulatory_netlink_notify(u32 portid);
 
 void wiphy_regulatory_register(struct wiphy *wiphy);
 void wiphy_regulatory_deregister(struct wiphy *wiphy);
-- 
cgit 


From 68c557501b008515cb86c9a36c75f4e82e14a819 Mon Sep 17 00:00:00 2001
From: Eric Farman <farman@linux.vnet.ibm.com>
Date: Mon, 9 Jun 2014 10:57:26 -0400
Subject: KVM: s390: Allocate and save/restore vector registers

Define and allocate space for both the host and guest views of
the vector registers for a given vcpu.  The 32 vector registers
occupy 128 bits each (512 bytes total), but architecturally are
paired with 512 additional bytes of reserved space for future
expansion.

The kvm_sync_regs structs containing the registers are union'ed
with 1024 bytes of padding in the common kvm_run struct.  The
addition of 1024 bytes of new register information clearly exceeds
the existing union, so an expansion of that padding is required.

When changing environments, we need to appropriately save and
restore the vector registers viewed by both the host and guest,
into and out of the sync_regs space.

The floating point registers overlay the upper half of vector
registers 0-15, so there's a bit of data duplication here that
needs to be carefully avoided.

Signed-off-by: Eric Farman <farman@linux.vnet.ibm.com>
Reviewed-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 Documentation/virtual/kvm/api.txt | 10 ++++++++++
 arch/s390/include/asm/kvm_host.h  | 10 +++++++++-
 arch/s390/include/uapi/asm/kvm.h  |  4 ++++
 arch/s390/kvm/kvm-s390.c          | 39 +++++++++++++++++++++++++++++++++------
 include/uapi/linux/kvm.h          |  3 ++-
 5 files changed, 58 insertions(+), 8 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index b112efc816f1..ee47998ec368 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3248,3 +3248,13 @@ All other orders will be handled completely in user space.
 Only privileged operation exceptions will be checked for in the kernel (or even
 in the hardware prior to interception). If this capability is not enabled, the
 old way of handling SIGP orders is used (partially in kernel and user space).
+
+7.3 KVM_CAP_S390_VECTOR_REGISTERS
+
+Architectures: s390
+Parameters: none
+Returns: 0 on success, negative value on error
+
+Allows use of the vector registers introduced with z13 processor, and
+provides for the synchronization between host and user space.  Will
+return -EINVAL if the machine does not support vectors.
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index f407bbf5ee94..3449a388b169 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -183,11 +183,17 @@ struct kvm_s390_itdb {
 	__u8	data[256];
 } __packed;
 
+struct kvm_s390_vregs {
+	__vector128 vrs[32];
+	__u8	reserved200[512];	/* for future vector expansion */
+} __packed;
+
 struct sie_page {
 	struct kvm_s390_sie_block sie_block;
 	__u8 reserved200[1024];		/* 0x0200 */
 	struct kvm_s390_itdb itdb;	/* 0x0600 */
-	__u8 reserved700[2304];		/* 0x0700 */
+	__u8 reserved700[1280];		/* 0x0700 */
+	struct kvm_s390_vregs vregs;	/* 0x0c00 */
 } __packed;
 
 struct kvm_vcpu_stat {
@@ -465,6 +471,7 @@ struct kvm_vcpu_arch {
 	s390_fp_regs      host_fpregs;
 	unsigned int      host_acrs[NUM_ACRS];
 	s390_fp_regs      guest_fpregs;
+	struct kvm_s390_vregs	*host_vregs;
 	struct kvm_s390_local_interrupt local_int;
 	struct hrtimer    ckc_timer;
 	struct kvm_s390_pgm_info pgm;
@@ -551,6 +558,7 @@ struct kvm_arch{
 	int css_support;
 	int use_irqchip;
 	int use_cmma;
+	int use_vectors;
 	int user_cpu_state_ctrl;
 	int user_sigp;
 	struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index 9c77e60b9a26..ef1a5fcc6c66 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -150,6 +150,7 @@ struct kvm_guest_debug_arch {
 #define KVM_SYNC_CRS    (1UL << 3)
 #define KVM_SYNC_ARCH0  (1UL << 4)
 #define KVM_SYNC_PFAULT (1UL << 5)
+#define KVM_SYNC_VRS    (1UL << 6)
 /* definition of registers in kvm_run */
 struct kvm_sync_regs {
 	__u64 prefix;	/* prefix register */
@@ -164,6 +165,9 @@ struct kvm_sync_regs {
 	__u64 pft;	/* pfault token [PFAULT] */
 	__u64 pfs;	/* pfault select [PFAULT] */
 	__u64 pfc;	/* pfault compare [PFAULT] */
+	__u64 vrs[32][2];	/* vector registers */
+	__u8  reserved[512];	/* for future vector expansion */
+	__u32 fpc;	/* only valid with vector registers */
 };
 
 #define KVM_REG_S390_TODPR	(KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index f3517e716fa4..a8fe3ab76d68 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -185,6 +185,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_S390_COW:
 		r = MACHINE_HAS_ESOP;
 		break;
+	case KVM_CAP_S390_VECTOR_REGISTERS:
+		r = MACHINE_HAS_VX;
+		break;
 	default:
 		r = 0;
 	}
@@ -265,6 +268,10 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 		kvm->arch.user_sigp = 1;
 		r = 0;
 		break;
+	case KVM_CAP_S390_VECTOR_REGISTERS:
+		kvm->arch.use_vectors = MACHINE_HAS_VX;
+		r = MACHINE_HAS_VX ? 0 : -EINVAL;
+		break;
 	default:
 		r = -EINVAL;
 		break;
@@ -942,6 +949,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 
 	kvm->arch.css_support = 0;
 	kvm->arch.use_irqchip = 0;
+	kvm->arch.use_vectors = 0;
 	kvm->arch.epoch = 0;
 
 	spin_lock_init(&kvm->arch.start_stop_lock);
@@ -1035,6 +1043,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 				    KVM_SYNC_CRS |
 				    KVM_SYNC_ARCH0 |
 				    KVM_SYNC_PFAULT;
+	if (test_kvm_facility(vcpu->kvm, 129))
+		vcpu->run->kvm_valid_regs |= KVM_SYNC_VRS;
 
 	if (kvm_is_ucontrol(vcpu->kvm))
 		return __kvm_ucontrol_vcpu_init(vcpu);
@@ -1045,10 +1055,18 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
-	save_fp_regs(vcpu->arch.host_fpregs.fprs);
+	if (vcpu->kvm->arch.use_vectors)
+		save_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
+	else
+		save_fp_regs(vcpu->arch.host_fpregs.fprs);
 	save_access_regs(vcpu->arch.host_acrs);
-	restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
-	restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
+	if (vcpu->kvm->arch.use_vectors) {
+		restore_fp_ctl(&vcpu->run->s.regs.fpc);
+		restore_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
+	} else {
+		restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
+		restore_fp_regs(vcpu->arch.guest_fpregs.fprs);
+	}
 	restore_access_regs(vcpu->run->s.regs.acrs);
 	gmap_enable(vcpu->arch.gmap);
 	atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
@@ -1058,11 +1076,19 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
 	atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags);
 	gmap_disable(vcpu->arch.gmap);
-	save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
-	save_fp_regs(vcpu->arch.guest_fpregs.fprs);
+	if (vcpu->kvm->arch.use_vectors) {
+		save_fp_ctl(&vcpu->run->s.regs.fpc);
+		save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs);
+	} else {
+		save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
+		save_fp_regs(vcpu->arch.guest_fpregs.fprs);
+	}
 	save_access_regs(vcpu->run->s.regs.acrs);
 	restore_fp_ctl(&vcpu->arch.host_fpregs.fpc);
-	restore_fp_regs(vcpu->arch.host_fpregs.fprs);
+	if (vcpu->kvm->arch.use_vectors)
+		restore_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs);
+	else
+		restore_fp_regs(vcpu->arch.host_fpregs.fprs);
 	restore_access_regs(vcpu->arch.host_acrs);
 }
 
@@ -1196,6 +1222,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm,
 
 	vcpu->arch.sie_block = &sie_page->sie_block;
 	vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb;
+	vcpu->arch.host_vregs = &sie_page->vregs;
 
 	vcpu->arch.sie_block->icpua = id;
 	if (!kvm_is_ucontrol(kvm)) {
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 805570650062..82634a492fe0 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -324,7 +324,7 @@ struct kvm_run {
 	__u64 kvm_dirty_regs;
 	union {
 		struct kvm_sync_regs regs;
-		char padding[1024];
+		char padding[2048];
 	} s;
 };
 
@@ -760,6 +760,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_PPC_ENABLE_HCALL 104
 #define KVM_CAP_CHECK_EXTENSION_VM 105
 #define KVM_CAP_S390_USER_SIGP 106
+#define KVM_CAP_S390_VECTOR_REGISTERS 107
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit 


From 1bd187de536494a27925902b9653e9ef0ace4774 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 23 Feb 2015 16:24:44 +0200
Subject: x86, intel-mid: remove Intel MID specific serial support

Since we have a native 8250 driver carrying the Intel MID serial devices the
specific support is not needed anymore. This patch removes it for Intel MID.

Note that the console device name is changed from ttyMFDx to ttySx.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/Kconfig.debug                             |    4 -
 arch/x86/include/asm/intel-mid.h                   |    3 -
 arch/x86/kernel/early_printk.c                     |    6 -
 arch/x86/platform/intel-mid/Makefile               |    1 -
 .../platform/intel-mid/early_printk_intel_mid.c    |  112 --
 drivers/tty/serial/Kconfig                         |   10 -
 drivers/tty/serial/Makefile                        |    1 -
 drivers/tty/serial/mfd.c                           | 1505 --------------------
 include/linux/serial_mfd.h                         |   47 -
 include/uapi/linux/serial_reg.h                    |   19 -
 10 files changed, 1708 deletions(-)
 delete mode 100644 arch/x86/platform/intel-mid/early_printk_intel_mid.c
 delete mode 100644 drivers/tty/serial/mfd.c
 delete mode 100644 include/linux/serial_mfd.h

(limited to 'include/uapi/linux')

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 20028da8ae18..72484a645f05 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -43,10 +43,6 @@ config EARLY_PRINTK
 	  with klogd/syslogd or the X server. You should normally N here,
 	  unless you want to debug such a crash.
 
-config EARLY_PRINTK_INTEL_MID
-	bool "Early printk for Intel MID platform support"
-	depends on EARLY_PRINTK && X86_INTEL_MID
-
 config EARLY_PRINTK_DBGP
 	bool "Early printk via EHCI debug port"
 	depends on EARLY_PRINTK && PCI
diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h
index 705d35708a50..7c5af123bdbd 100644
--- a/arch/x86/include/asm/intel-mid.h
+++ b/arch/x86/include/asm/intel-mid.h
@@ -136,9 +136,6 @@ extern enum intel_mid_timer_options intel_mid_timer_options;
 #define SFI_MTMR_MAX_NUM 8
 #define SFI_MRTC_MAX	8
 
-extern struct console early_hsu_console;
-extern void hsu_early_console_init(const char *);
-
 extern void intel_scu_devices_create(void);
 extern void intel_scu_devices_destroy(void);
 
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index a62536a1be88..f85e3fb50f28 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -375,12 +375,6 @@ static int __init setup_early_printk(char *buf)
 		if (!strncmp(buf, "xen", 3))
 			early_console_register(&xenboot_console, keep);
 #endif
-#ifdef CONFIG_EARLY_PRINTK_INTEL_MID
-		if (!strncmp(buf, "hsu", 3)) {
-			hsu_early_console_init(buf + 3);
-			early_console_register(&early_hsu_console, keep);
-		}
-#endif
 #ifdef CONFIG_EARLY_PRINTK_EFI
 		if (!strncmp(buf, "efi", 3))
 			early_console_register(&early_efi_console, keep);
diff --git a/arch/x86/platform/intel-mid/Makefile b/arch/x86/platform/intel-mid/Makefile
index 0a8ee703b9fa..0ce1b1913673 100644
--- a/arch/x86/platform/intel-mid/Makefile
+++ b/arch/x86/platform/intel-mid/Makefile
@@ -1,5 +1,4 @@
 obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o intel_mid_vrtc.o mfld.o mrfl.o
-obj-$(CONFIG_EARLY_PRINTK_INTEL_MID) += early_printk_intel_mid.o
 
 # SFI specific code
 ifdef CONFIG_X86_INTEL_MID
diff --git a/arch/x86/platform/intel-mid/early_printk_intel_mid.c b/arch/x86/platform/intel-mid/early_printk_intel_mid.c
deleted file mode 100644
index 4e720829ab90..000000000000
--- a/arch/x86/platform/intel-mid/early_printk_intel_mid.c
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * early_printk_intel_mid.c - early consoles for Intel MID platforms
- *
- * Copyright (c) 2008-2010, Intel Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- */
-
-/*
- * This file implements early console named hsu.
- * hsu is based on a High Speed UART device which only exists in the Medfield
- * platform
- */
-
-#include <linux/serial_reg.h>
-#include <linux/serial_mfd.h>
-#include <linux/console.h>
-#include <linux/kernel.h>
-#include <linux/delay.h>
-#include <linux/io.h>
-
-#include <asm/fixmap.h>
-#include <asm/pgtable.h>
-#include <asm/intel-mid.h>
-
-/*
- * Following is the early console based on Medfield HSU (High
- * Speed UART) device.
- */
-#define HSU_PORT_BASE		0xffa28080
-
-static void __iomem *phsu;
-
-void hsu_early_console_init(const char *s)
-{
-	unsigned long paddr, port = 0;
-	u8 lcr;
-
-	/*
-	 * Select the early HSU console port if specified by user in the
-	 * kernel command line.
-	 */
-	if (*s && !kstrtoul(s, 10, &port))
-		port = clamp_val(port, 0, 2);
-
-	paddr = HSU_PORT_BASE + port * 0x80;
-	phsu = (void __iomem *)set_fixmap_offset_nocache(FIX_EARLYCON_MEM_BASE, paddr);
-
-	/* Disable FIFO */
-	writeb(0x0, phsu + UART_FCR);
-
-	/* Set to default 115200 bps, 8n1 */
-	lcr = readb(phsu + UART_LCR);
-	writeb((0x80 | lcr), phsu + UART_LCR);
-	writeb(0x18, phsu + UART_DLL);
-	writeb(lcr,  phsu + UART_LCR);
-	writel(0x3600, phsu + UART_MUL*4);
-
-	writeb(0x8, phsu + UART_MCR);
-	writeb(0x7, phsu + UART_FCR);
-	writeb(0x3, phsu + UART_LCR);
-
-	/* Clear IRQ status */
-	readb(phsu + UART_LSR);
-	readb(phsu + UART_RX);
-	readb(phsu + UART_IIR);
-	readb(phsu + UART_MSR);
-
-	/* Enable FIFO */
-	writeb(0x7, phsu + UART_FCR);
-}
-
-#define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE)
-
-static void early_hsu_putc(char ch)
-{
-	unsigned int timeout = 10000; /* 10ms */
-	u8 status;
-
-	while (--timeout) {
-		status = readb(phsu + UART_LSR);
-		if (status & BOTH_EMPTY)
-			break;
-		udelay(1);
-	}
-
-	/* Only write the char when there was no timeout */
-	if (timeout)
-		writeb(ch, phsu + UART_TX);
-}
-
-static void early_hsu_write(struct console *con, const char *str, unsigned n)
-{
-	int i;
-
-	for (i = 0; i < n && *str; i++) {
-		if (*str == '\n')
-			early_hsu_putc('\r');
-		early_hsu_putc(*str);
-		str++;
-	}
-}
-
-struct console early_hsu_console = {
-	.name =		"earlyhsu",
-	.write =	early_hsu_write,
-	.flags =	CON_PRINTBUFFER,
-	.index =	-1,
-};
diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index 60c368a5dbf1..c9dbc626038d 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -483,16 +483,6 @@ config SERIAL_SA1100_CONSOLE
 	  your boot loader (lilo or loadlin) about how to pass options to the
 	  kernel at boot time.)
 
-config SERIAL_MFD_HSU
-	tristate "Medfield High Speed UART support"
-	depends on PCI
-	select SERIAL_CORE
-
-config SERIAL_MFD_HSU_CONSOLE
-	bool "Medfield HSU serial console support"
-	depends on SERIAL_MFD_HSU=y
-	select SERIAL_CORE_CONSOLE
-
 config SERIAL_BFIN
 	tristate "Blackfin serial port support"
 	depends on BLACKFIN
diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile
index 599be4b05a26..f42b4f9845df 100644
--- a/drivers/tty/serial/Makefile
+++ b/drivers/tty/serial/Makefile
@@ -78,7 +78,6 @@ obj-$(CONFIG_SERIAL_TIMBERDALE)	+= timbuart.o
 obj-$(CONFIG_SERIAL_GRLIB_GAISLER_APBUART) += apbuart.o
 obj-$(CONFIG_SERIAL_ALTERA_JTAGUART) += altera_jtaguart.o
 obj-$(CONFIG_SERIAL_VT8500) += vt8500_serial.o
-obj-$(CONFIG_SERIAL_MFD_HSU)	+= mfd.o
 obj-$(CONFIG_SERIAL_IFX6X60)  	+= ifx6x60.o
 obj-$(CONFIG_SERIAL_PCH_UART)	+= pch_uart.o
 obj-$(CONFIG_SERIAL_MSM_SMD)	+= msm_smd_tty.o
diff --git a/drivers/tty/serial/mfd.c b/drivers/tty/serial/mfd.c
deleted file mode 100644
index 8fe4501d7565..000000000000
--- a/drivers/tty/serial/mfd.c
+++ /dev/null
@@ -1,1505 +0,0 @@
-/*
- * mfd.c: driver for High Speed UART device of Intel Medfield platform
- *
- * Refer pxa.c, 8250.c and some other drivers in drivers/serial/
- *
- * (C) Copyright 2010 Intel Corporation
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; version 2
- * of the License.
- */
-
-/* Notes:
- * 1. DMA channel allocation: 0/1 channel are assigned to port 0,
- *    2/3 chan to port 1, 4/5 chan to port 3. Even number chans
- *    are used for RX, odd chans for TX
- *
- * 2. The RI/DSR/DCD/DTR are not pinned out, DCD & DSR are always
- *    asserted, only when the HW is reset the DDCD and DDSR will
- *    be triggered
- */
-
-#if defined(CONFIG_SERIAL_MFD_HSU_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
-#define SUPPORT_SYSRQ
-#endif
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/console.h>
-#include <linux/sysrq.h>
-#include <linux/slab.h>
-#include <linux/serial_reg.h>
-#include <linux/circ_buf.h>
-#include <linux/delay.h>
-#include <linux/interrupt.h>
-#include <linux/tty.h>
-#include <linux/tty_flip.h>
-#include <linux/serial_core.h>
-#include <linux/serial_mfd.h>
-#include <linux/dma-mapping.h>
-#include <linux/pci.h>
-#include <linux/nmi.h>
-#include <linux/io.h>
-#include <linux/debugfs.h>
-#include <linux/pm_runtime.h>
-
-#define HSU_DMA_BUF_SIZE	2048
-
-#define chan_readl(chan, offset)	readl(chan->reg + offset)
-#define chan_writel(chan, offset, val)	writel(val, chan->reg + offset)
-
-#define mfd_readl(obj, offset)		readl(obj->reg + offset)
-#define mfd_writel(obj, offset, val)	writel(val, obj->reg + offset)
-
-static int hsu_dma_enable;
-module_param(hsu_dma_enable, int, 0);
-MODULE_PARM_DESC(hsu_dma_enable,
-		 "It is a bitmap to set working mode, if bit[x] is 1, then port[x] will work in DMA mode, otherwise in PIO mode.");
-
-struct hsu_dma_buffer {
-	u8		*buf;
-	dma_addr_t	dma_addr;
-	u32		dma_size;
-	u32		ofs;
-};
-
-struct hsu_dma_chan {
-	u32	id;
-	enum dma_data_direction	dirt;
-	struct uart_hsu_port	*uport;
-	void __iomem		*reg;
-};
-
-struct uart_hsu_port {
-	struct uart_port        port;
-	unsigned char           ier;
-	unsigned char           lcr;
-	unsigned char           mcr;
-	unsigned int            lsr_break_flag;
-	char			name[12];
-	int			index;
-	struct device		*dev;
-
-	struct hsu_dma_chan	*txc;
-	struct hsu_dma_chan	*rxc;
-	struct hsu_dma_buffer	txbuf;
-	struct hsu_dma_buffer	rxbuf;
-	int			use_dma;	/* flag for DMA/PIO */
-	int			running;
-	int			dma_tx_on;
-};
-
-/* Top level data structure of HSU */
-struct hsu_port {
-	void __iomem	*reg;
-	unsigned long	paddr;
-	unsigned long	iolen;
-	u32		irq;
-
-	struct uart_hsu_port	port[3];
-	struct hsu_dma_chan	chans[10];
-
-	struct dentry *debugfs;
-};
-
-static inline unsigned int serial_in(struct uart_hsu_port *up, int offset)
-{
-	unsigned int val;
-
-	if (offset > UART_MSR) {
-		offset <<= 2;
-		val = readl(up->port.membase + offset);
-	} else
-		val = (unsigned int)readb(up->port.membase + offset);
-
-	return val;
-}
-
-static inline void serial_out(struct uart_hsu_port *up, int offset, int value)
-{
-	if (offset > UART_MSR) {
-		offset <<= 2;
-		writel(value, up->port.membase + offset);
-	} else {
-		unsigned char val = value & 0xff;
-		writeb(val, up->port.membase + offset);
-	}
-}
-
-#ifdef CONFIG_DEBUG_FS
-
-#define HSU_REGS_BUFSIZE	1024
-
-
-static ssize_t port_show_regs(struct file *file, char __user *user_buf,
-				size_t count, loff_t *ppos)
-{
-	struct uart_hsu_port *up = file->private_data;
-	char *buf;
-	u32 len = 0;
-	ssize_t ret;
-
-	buf = kzalloc(HSU_REGS_BUFSIZE, GFP_KERNEL);
-	if (!buf)
-		return 0;
-
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"MFD HSU port[%d] regs:\n", up->index);
-
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"=================================\n");
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"IER: \t\t0x%08x\n", serial_in(up, UART_IER));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"IIR: \t\t0x%08x\n", serial_in(up, UART_IIR));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"LCR: \t\t0x%08x\n", serial_in(up, UART_LCR));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"MCR: \t\t0x%08x\n", serial_in(up, UART_MCR));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"LSR: \t\t0x%08x\n", serial_in(up, UART_LSR));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"MSR: \t\t0x%08x\n", serial_in(up, UART_MSR));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"FOR: \t\t0x%08x\n", serial_in(up, UART_FOR));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"PS: \t\t0x%08x\n", serial_in(up, UART_PS));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"MUL: \t\t0x%08x\n", serial_in(up, UART_MUL));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"DIV: \t\t0x%08x\n", serial_in(up, UART_DIV));
-
-	if (len > HSU_REGS_BUFSIZE)
-		len = HSU_REGS_BUFSIZE;
-
-	ret =  simple_read_from_buffer(user_buf, count, ppos, buf, len);
-	kfree(buf);
-	return ret;
-}
-
-static ssize_t dma_show_regs(struct file *file, char __user *user_buf,
-				size_t count, loff_t *ppos)
-{
-	struct hsu_dma_chan *chan = file->private_data;
-	char *buf;
-	u32 len = 0;
-	ssize_t ret;
-
-	buf = kzalloc(HSU_REGS_BUFSIZE, GFP_KERNEL);
-	if (!buf)
-		return 0;
-
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"MFD HSU DMA channel [%d] regs:\n", chan->id);
-
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"=================================\n");
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"CR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_CR));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"DCR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_DCR));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"BSR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_BSR));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"MOTSR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_MOTSR));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"D0SAR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_D0SAR));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"D0TSR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_D0TSR));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"D0SAR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_D1SAR));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"D0TSR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_D1TSR));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"D0SAR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_D2SAR));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"D0TSR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_D2TSR));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"D0SAR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_D3SAR));
-	len += snprintf(buf + len, HSU_REGS_BUFSIZE - len,
-			"D0TSR: \t\t0x%08x\n", chan_readl(chan, HSU_CH_D3TSR));
-
-	if (len > HSU_REGS_BUFSIZE)
-		len = HSU_REGS_BUFSIZE;
-
-	ret =  simple_read_from_buffer(user_buf, count, ppos, buf, len);
-	kfree(buf);
-	return ret;
-}
-
-static const struct file_operations port_regs_ops = {
-	.owner		= THIS_MODULE,
-	.open		= simple_open,
-	.read		= port_show_regs,
-	.llseek		= default_llseek,
-};
-
-static const struct file_operations dma_regs_ops = {
-	.owner		= THIS_MODULE,
-	.open		= simple_open,
-	.read		= dma_show_regs,
-	.llseek		= default_llseek,
-};
-
-static int hsu_debugfs_init(struct hsu_port *hsu)
-{
-	int i;
-	char name[32];
-
-	hsu->debugfs = debugfs_create_dir("hsu", NULL);
-	if (!hsu->debugfs)
-		return -ENOMEM;
-
-	for (i = 0; i < 3; i++) {
-		snprintf(name, sizeof(name), "port_%d_regs", i);
-		debugfs_create_file(name, S_IFREG | S_IRUGO,
-			hsu->debugfs, (void *)(&hsu->port[i]), &port_regs_ops);
-	}
-
-	for (i = 0; i < 6; i++) {
-		snprintf(name, sizeof(name), "dma_chan_%d_regs", i);
-		debugfs_create_file(name, S_IFREG | S_IRUGO,
-			hsu->debugfs, (void *)&hsu->chans[i], &dma_regs_ops);
-	}
-
-	return 0;
-}
-
-static void hsu_debugfs_remove(struct hsu_port *hsu)
-{
-	if (hsu->debugfs)
-		debugfs_remove_recursive(hsu->debugfs);
-}
-
-#else
-static inline int hsu_debugfs_init(struct hsu_port *hsu)
-{
-	return 0;
-}
-
-static inline void hsu_debugfs_remove(struct hsu_port *hsu)
-{
-}
-#endif /* CONFIG_DEBUG_FS */
-
-static void serial_hsu_enable_ms(struct uart_port *port)
-{
-	struct uart_hsu_port *up =
-		container_of(port, struct uart_hsu_port, port);
-
-	up->ier |= UART_IER_MSI;
-	serial_out(up, UART_IER, up->ier);
-}
-
-static void hsu_dma_tx(struct uart_hsu_port *up)
-{
-	struct circ_buf *xmit = &up->port.state->xmit;
-	struct hsu_dma_buffer *dbuf = &up->txbuf;
-	int count;
-
-	/* test_and_set_bit may be better, but anyway it's in lock protected mode */
-	if (up->dma_tx_on)
-		return;
-
-	/* Update the circ buf info */
-	xmit->tail += dbuf->ofs;
-	xmit->tail &= UART_XMIT_SIZE - 1;
-
-	up->port.icount.tx += dbuf->ofs;
-	dbuf->ofs = 0;
-
-	/* Disable the channel */
-	chan_writel(up->txc, HSU_CH_CR, 0x0);
-
-	if (!uart_circ_empty(xmit) && !uart_tx_stopped(&up->port)) {
-		dma_sync_single_for_device(up->port.dev,
-					   dbuf->dma_addr,
-					   dbuf->dma_size,
-					   DMA_TO_DEVICE);
-
-		count = CIRC_CNT_TO_END(xmit->head, xmit->tail, UART_XMIT_SIZE);
-		dbuf->ofs = count;
-
-		/* Reprogram the channel */
-		chan_writel(up->txc, HSU_CH_D0SAR, dbuf->dma_addr + xmit->tail);
-		chan_writel(up->txc, HSU_CH_D0TSR, count);
-
-		/* Reenable the channel */
-		chan_writel(up->txc, HSU_CH_DCR, 0x1
-						 | (0x1 << 8)
-						 | (0x1 << 16)
-						 | (0x1 << 24));
-		up->dma_tx_on = 1;
-		chan_writel(up->txc, HSU_CH_CR, 0x1);
-	}
-
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(&up->port);
-}
-
-/* The buffer is already cache coherent */
-static void hsu_dma_start_rx_chan(struct hsu_dma_chan *rxc,
-					struct hsu_dma_buffer *dbuf)
-{
-	dbuf->ofs = 0;
-
-	chan_writel(rxc, HSU_CH_BSR, 32);
-	chan_writel(rxc, HSU_CH_MOTSR, 4);
-
-	chan_writel(rxc, HSU_CH_D0SAR, dbuf->dma_addr);
-	chan_writel(rxc, HSU_CH_D0TSR, dbuf->dma_size);
-	chan_writel(rxc, HSU_CH_DCR, 0x1 | (0x1 << 8)
-					 | (0x1 << 16)
-					 | (0x1 << 24)	/* timeout bit, see HSU Errata 1 */
-					 );
-	chan_writel(rxc, HSU_CH_CR, 0x3);
-}
-
-/* Protected by spin_lock_irqsave(port->lock) */
-static void serial_hsu_start_tx(struct uart_port *port)
-{
-	struct uart_hsu_port *up =
-		container_of(port, struct uart_hsu_port, port);
-
-	if (up->use_dma) {
-		hsu_dma_tx(up);
-	} else if (!(up->ier & UART_IER_THRI)) {
-		up->ier |= UART_IER_THRI;
-		serial_out(up, UART_IER, up->ier);
-	}
-}
-
-static void serial_hsu_stop_tx(struct uart_port *port)
-{
-	struct uart_hsu_port *up =
-		container_of(port, struct uart_hsu_port, port);
-	struct hsu_dma_chan *txc = up->txc;
-
-	if (up->use_dma)
-		chan_writel(txc, HSU_CH_CR, 0x0);
-	else if (up->ier & UART_IER_THRI) {
-		up->ier &= ~UART_IER_THRI;
-		serial_out(up, UART_IER, up->ier);
-	}
-}
-
-/* This is always called in spinlock protected mode, so
- * modify timeout timer is safe here */
-static void hsu_dma_rx(struct uart_hsu_port *up, u32 int_sts,
-			unsigned long *flags)
-{
-	struct hsu_dma_buffer *dbuf = &up->rxbuf;
-	struct hsu_dma_chan *chan = up->rxc;
-	struct uart_port *port = &up->port;
-	struct tty_port *tport = &port->state->port;
-	int count;
-
-	/*
-	 * First need to know how many is already transferred,
-	 * then check if its a timeout DMA irq, and return
-	 * the trail bytes out, push them up and reenable the
-	 * channel
-	 */
-
-	/* Timeout IRQ, need wait some time, see Errata 2 */
-	if (int_sts & 0xf00)
-		udelay(2);
-
-	/* Stop the channel */
-	chan_writel(chan, HSU_CH_CR, 0x0);
-
-	count = chan_readl(chan, HSU_CH_D0SAR) - dbuf->dma_addr;
-	if (!count) {
-		/* Restart the channel before we leave */
-		chan_writel(chan, HSU_CH_CR, 0x3);
-		return;
-	}
-
-	dma_sync_single_for_cpu(port->dev, dbuf->dma_addr,
-			dbuf->dma_size, DMA_FROM_DEVICE);
-
-	/*
-	 * Head will only wrap around when we recycle
-	 * the DMA buffer, and when that happens, we
-	 * explicitly set tail to 0. So head will
-	 * always be greater than tail.
-	 */
-	tty_insert_flip_string(tport, dbuf->buf, count);
-	port->icount.rx += count;
-
-	dma_sync_single_for_device(up->port.dev, dbuf->dma_addr,
-			dbuf->dma_size, DMA_FROM_DEVICE);
-
-	/* Reprogram the channel */
-	chan_writel(chan, HSU_CH_D0SAR, dbuf->dma_addr);
-	chan_writel(chan, HSU_CH_D0TSR, dbuf->dma_size);
-	chan_writel(chan, HSU_CH_DCR, 0x1
-					 | (0x1 << 8)
-					 | (0x1 << 16)
-					 | (0x1 << 24)	/* timeout bit, see HSU Errata 1 */
-					 );
-	spin_unlock_irqrestore(&up->port.lock, *flags);
-	tty_flip_buffer_push(tport);
-	spin_lock_irqsave(&up->port.lock, *flags);
-
-	chan_writel(chan, HSU_CH_CR, 0x3);
-
-}
-
-static void serial_hsu_stop_rx(struct uart_port *port)
-{
-	struct uart_hsu_port *up =
-		container_of(port, struct uart_hsu_port, port);
-	struct hsu_dma_chan *chan = up->rxc;
-
-	if (up->use_dma)
-		chan_writel(chan, HSU_CH_CR, 0x2);
-	else {
-		up->ier &= ~UART_IER_RLSI;
-		up->port.read_status_mask &= ~UART_LSR_DR;
-		serial_out(up, UART_IER, up->ier);
-	}
-}
-
-static inline void receive_chars(struct uart_hsu_port *up, int *status,
-		unsigned long *flags)
-{
-	unsigned int ch, flag;
-	unsigned int max_count = 256;
-
-	do {
-		ch = serial_in(up, UART_RX);
-		flag = TTY_NORMAL;
-		up->port.icount.rx++;
-
-		if (unlikely(*status & (UART_LSR_BI | UART_LSR_PE |
-				       UART_LSR_FE | UART_LSR_OE))) {
-
-			dev_warn(up->dev, "We really rush into ERR/BI case"
-				"status = 0x%02x", *status);
-			/* For statistics only */
-			if (*status & UART_LSR_BI) {
-				*status &= ~(UART_LSR_FE | UART_LSR_PE);
-				up->port.icount.brk++;
-				/*
-				 * We do the SysRQ and SAK checking
-				 * here because otherwise the break
-				 * may get masked by ignore_status_mask
-				 * or read_status_mask.
-				 */
-				if (uart_handle_break(&up->port))
-					goto ignore_char;
-			} else if (*status & UART_LSR_PE)
-				up->port.icount.parity++;
-			else if (*status & UART_LSR_FE)
-				up->port.icount.frame++;
-			if (*status & UART_LSR_OE)
-				up->port.icount.overrun++;
-
-			/* Mask off conditions which should be ignored. */
-			*status &= up->port.read_status_mask;
-
-#ifdef CONFIG_SERIAL_MFD_HSU_CONSOLE
-			if (up->port.cons &&
-				up->port.cons->index == up->port.line) {
-				/* Recover the break flag from console xmit */
-				*status |= up->lsr_break_flag;
-				up->lsr_break_flag = 0;
-			}
-#endif
-			if (*status & UART_LSR_BI) {
-				flag = TTY_BREAK;
-			} else if (*status & UART_LSR_PE)
-				flag = TTY_PARITY;
-			else if (*status & UART_LSR_FE)
-				flag = TTY_FRAME;
-		}
-
-		if (uart_handle_sysrq_char(&up->port, ch))
-			goto ignore_char;
-
-		uart_insert_char(&up->port, *status, UART_LSR_OE, ch, flag);
-	ignore_char:
-		*status = serial_in(up, UART_LSR);
-	} while ((*status & UART_LSR_DR) && max_count--);
-
-	spin_unlock_irqrestore(&up->port.lock, *flags);
-	tty_flip_buffer_push(&up->port.state->port);
-	spin_lock_irqsave(&up->port.lock, *flags);
-}
-
-static void transmit_chars(struct uart_hsu_port *up)
-{
-	struct circ_buf *xmit = &up->port.state->xmit;
-	int count;
-
-	if (up->port.x_char) {
-		serial_out(up, UART_TX, up->port.x_char);
-		up->port.icount.tx++;
-		up->port.x_char = 0;
-		return;
-	}
-	if (uart_circ_empty(xmit) || uart_tx_stopped(&up->port)) {
-		serial_hsu_stop_tx(&up->port);
-		return;
-	}
-
-	/* The IRQ is for TX FIFO half-empty */
-	count = up->port.fifosize / 2;
-
-	do {
-		serial_out(up, UART_TX, xmit->buf[xmit->tail]);
-		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
-
-		up->port.icount.tx++;
-		if (uart_circ_empty(xmit))
-			break;
-	} while (--count > 0);
-
-	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
-		uart_write_wakeup(&up->port);
-
-	if (uart_circ_empty(xmit))
-		serial_hsu_stop_tx(&up->port);
-}
-
-static inline void check_modem_status(struct uart_hsu_port *up)
-{
-	int status;
-
-	status = serial_in(up, UART_MSR);
-
-	if ((status & UART_MSR_ANY_DELTA) == 0)
-		return;
-
-	if (status & UART_MSR_TERI)
-		up->port.icount.rng++;
-	if (status & UART_MSR_DDSR)
-		up->port.icount.dsr++;
-	/* We may only get DDCD when HW init and reset */
-	if (status & UART_MSR_DDCD)
-		uart_handle_dcd_change(&up->port, status & UART_MSR_DCD);
-	/* Will start/stop_tx accordingly */
-	if (status & UART_MSR_DCTS)
-		uart_handle_cts_change(&up->port, status & UART_MSR_CTS);
-
-	wake_up_interruptible(&up->port.state->port.delta_msr_wait);
-}
-
-/*
- * This handles the interrupt from one port.
- */
-static irqreturn_t port_irq(int irq, void *dev_id)
-{
-	struct uart_hsu_port *up = dev_id;
-	unsigned int iir, lsr;
-	unsigned long flags;
-
-	if (unlikely(!up->running))
-		return IRQ_NONE;
-
-	spin_lock_irqsave(&up->port.lock, flags);
-	if (up->use_dma) {
-		lsr = serial_in(up, UART_LSR);
-		if (unlikely(lsr & (UART_LSR_BI | UART_LSR_PE |
-				       UART_LSR_FE | UART_LSR_OE)))
-			dev_warn(up->dev,
-				"Got lsr irq while using DMA, lsr = 0x%2x\n",
-				lsr);
-		check_modem_status(up);
-		spin_unlock_irqrestore(&up->port.lock, flags);
-		return IRQ_HANDLED;
-	}
-
-	iir = serial_in(up, UART_IIR);
-	if (iir & UART_IIR_NO_INT) {
-		spin_unlock_irqrestore(&up->port.lock, flags);
-		return IRQ_NONE;
-	}
-
-	lsr = serial_in(up, UART_LSR);
-	if (lsr & UART_LSR_DR)
-		receive_chars(up, &lsr, &flags);
-	check_modem_status(up);
-
-	/* lsr will be renewed during the receive_chars */
-	if (lsr & UART_LSR_THRE)
-		transmit_chars(up);
-
-	spin_unlock_irqrestore(&up->port.lock, flags);
-	return IRQ_HANDLED;
-}
-
-static inline void dma_chan_irq(struct hsu_dma_chan *chan)
-{
-	struct uart_hsu_port *up = chan->uport;
-	unsigned long flags;
-	u32 int_sts;
-
-	spin_lock_irqsave(&up->port.lock, flags);
-
-	if (!up->use_dma || !up->running)
-		goto exit;
-
-	/*
-	 * No matter what situation, need read clear the IRQ status
-	 * There is a bug, see Errata 5, HSD 2900918
-	 */
-	int_sts = chan_readl(chan, HSU_CH_SR);
-
-	/* Rx channel */
-	if (chan->dirt == DMA_FROM_DEVICE)
-		hsu_dma_rx(up, int_sts, &flags);
-
-	/* Tx channel */
-	if (chan->dirt == DMA_TO_DEVICE) {
-		chan_writel(chan, HSU_CH_CR, 0x0);
-		up->dma_tx_on = 0;
-		hsu_dma_tx(up);
-	}
-
-exit:
-	spin_unlock_irqrestore(&up->port.lock, flags);
-	return;
-}
-
-static irqreturn_t dma_irq(int irq, void *dev_id)
-{
-	struct hsu_port *hsu = dev_id;
-	u32 int_sts, i;
-
-	int_sts = mfd_readl(hsu, HSU_GBL_DMAISR);
-
-	/* Currently we only have 6 channels may be used */
-	for (i = 0; i < 6; i++) {
-		if (int_sts & 0x1)
-			dma_chan_irq(&hsu->chans[i]);
-		int_sts >>= 1;
-	}
-
-	return IRQ_HANDLED;
-}
-
-static unsigned int serial_hsu_tx_empty(struct uart_port *port)
-{
-	struct uart_hsu_port *up =
-		container_of(port, struct uart_hsu_port, port);
-	unsigned long flags;
-	unsigned int ret;
-
-	spin_lock_irqsave(&up->port.lock, flags);
-	ret = serial_in(up, UART_LSR) & UART_LSR_TEMT ? TIOCSER_TEMT : 0;
-	spin_unlock_irqrestore(&up->port.lock, flags);
-
-	return ret;
-}
-
-static unsigned int serial_hsu_get_mctrl(struct uart_port *port)
-{
-	struct uart_hsu_port *up =
-		container_of(port, struct uart_hsu_port, port);
-	unsigned char status;
-	unsigned int ret;
-
-	status = serial_in(up, UART_MSR);
-
-	ret = 0;
-	if (status & UART_MSR_DCD)
-		ret |= TIOCM_CAR;
-	if (status & UART_MSR_RI)
-		ret |= TIOCM_RNG;
-	if (status & UART_MSR_DSR)
-		ret |= TIOCM_DSR;
-	if (status & UART_MSR_CTS)
-		ret |= TIOCM_CTS;
-	return ret;
-}
-
-static void serial_hsu_set_mctrl(struct uart_port *port, unsigned int mctrl)
-{
-	struct uart_hsu_port *up =
-		container_of(port, struct uart_hsu_port, port);
-	unsigned char mcr = 0;
-
-	if (mctrl & TIOCM_RTS)
-		mcr |= UART_MCR_RTS;
-	if (mctrl & TIOCM_DTR)
-		mcr |= UART_MCR_DTR;
-	if (mctrl & TIOCM_OUT1)
-		mcr |= UART_MCR_OUT1;
-	if (mctrl & TIOCM_OUT2)
-		mcr |= UART_MCR_OUT2;
-	if (mctrl & TIOCM_LOOP)
-		mcr |= UART_MCR_LOOP;
-
-	mcr |= up->mcr;
-
-	serial_out(up, UART_MCR, mcr);
-}
-
-static void serial_hsu_break_ctl(struct uart_port *port, int break_state)
-{
-	struct uart_hsu_port *up =
-		container_of(port, struct uart_hsu_port, port);
-	unsigned long flags;
-
-	spin_lock_irqsave(&up->port.lock, flags);
-	if (break_state == -1)
-		up->lcr |= UART_LCR_SBC;
-	else
-		up->lcr &= ~UART_LCR_SBC;
-	serial_out(up, UART_LCR, up->lcr);
-	spin_unlock_irqrestore(&up->port.lock, flags);
-}
-
-/*
- * What special to do:
- * 1. chose the 64B fifo mode
- * 2. start dma or pio depends on configuration
- * 3. we only allocate dma memory when needed
- */
-static int serial_hsu_startup(struct uart_port *port)
-{
-	struct uart_hsu_port *up =
-		container_of(port, struct uart_hsu_port, port);
-	unsigned long flags;
-
-	pm_runtime_get_sync(up->dev);
-
-	/*
-	 * Clear the FIFO buffers and disable them.
-	 * (they will be reenabled in set_termios())
-	 */
-	serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO);
-	serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO |
-			UART_FCR_CLEAR_RCVR | UART_FCR_CLEAR_XMIT);
-	serial_out(up, UART_FCR, 0);
-
-	/* Clear the interrupt registers. */
-	(void) serial_in(up, UART_LSR);
-	(void) serial_in(up, UART_RX);
-	(void) serial_in(up, UART_IIR);
-	(void) serial_in(up, UART_MSR);
-
-	/* Now, initialize the UART, default is 8n1 */
-	serial_out(up, UART_LCR, UART_LCR_WLEN8);
-
-	spin_lock_irqsave(&up->port.lock, flags);
-
-	up->port.mctrl |= TIOCM_OUT2;
-	serial_hsu_set_mctrl(&up->port, up->port.mctrl);
-
-	/*
-	 * Finally, enable interrupts.  Note: Modem status interrupts
-	 * are set via set_termios(), which will be occurring imminently
-	 * anyway, so we don't enable them here.
-	 */
-	if (!up->use_dma)
-		up->ier = UART_IER_RLSI | UART_IER_RDI | UART_IER_RTOIE;
-	else
-		up->ier = 0;
-	serial_out(up, UART_IER, up->ier);
-
-	spin_unlock_irqrestore(&up->port.lock, flags);
-
-	/* DMA init */
-	if (up->use_dma) {
-		struct hsu_dma_buffer *dbuf;
-		struct circ_buf *xmit = &port->state->xmit;
-
-		up->dma_tx_on = 0;
-
-		/* First allocate the RX buffer */
-		dbuf = &up->rxbuf;
-		dbuf->buf = kzalloc(HSU_DMA_BUF_SIZE, GFP_KERNEL);
-		if (!dbuf->buf) {
-			up->use_dma = 0;
-			goto exit;
-		}
-		dbuf->dma_addr = dma_map_single(port->dev,
-						dbuf->buf,
-						HSU_DMA_BUF_SIZE,
-						DMA_FROM_DEVICE);
-		dbuf->dma_size = HSU_DMA_BUF_SIZE;
-
-		/* Start the RX channel right now */
-		hsu_dma_start_rx_chan(up->rxc, dbuf);
-
-		/* Next init the TX DMA */
-		dbuf = &up->txbuf;
-		dbuf->buf = xmit->buf;
-		dbuf->dma_addr = dma_map_single(port->dev,
-					       dbuf->buf,
-					       UART_XMIT_SIZE,
-					       DMA_TO_DEVICE);
-		dbuf->dma_size = UART_XMIT_SIZE;
-
-		/* This should not be changed all around */
-		chan_writel(up->txc, HSU_CH_BSR, 32);
-		chan_writel(up->txc, HSU_CH_MOTSR, 4);
-		dbuf->ofs = 0;
-	}
-
-exit:
-	 /* And clear the interrupt registers again for luck. */
-	(void) serial_in(up, UART_LSR);
-	(void) serial_in(up, UART_RX);
-	(void) serial_in(up, UART_IIR);
-	(void) serial_in(up, UART_MSR);
-
-	up->running = 1;
-	return 0;
-}
-
-static void serial_hsu_shutdown(struct uart_port *port)
-{
-	struct uart_hsu_port *up =
-		container_of(port, struct uart_hsu_port, port);
-	unsigned long flags;
-
-	/* Disable interrupts from this port */
-	up->ier = 0;
-	serial_out(up, UART_IER, 0);
-	up->running = 0;
-
-	spin_lock_irqsave(&up->port.lock, flags);
-	up->port.mctrl &= ~TIOCM_OUT2;
-	serial_hsu_set_mctrl(&up->port, up->port.mctrl);
-	spin_unlock_irqrestore(&up->port.lock, flags);
-
-	/* Disable break condition and FIFOs */
-	serial_out(up, UART_LCR, serial_in(up, UART_LCR) & ~UART_LCR_SBC);
-	serial_out(up, UART_FCR, UART_FCR_ENABLE_FIFO |
-				  UART_FCR_CLEAR_RCVR |
-				  UART_FCR_CLEAR_XMIT);
-	serial_out(up, UART_FCR, 0);
-
-	pm_runtime_put(up->dev);
-}
-
-static void
-serial_hsu_set_termios(struct uart_port *port, struct ktermios *termios,
-		       struct ktermios *old)
-{
-	struct uart_hsu_port *up =
-			container_of(port, struct uart_hsu_port, port);
-	unsigned char cval, fcr = 0;
-	unsigned long flags;
-	unsigned int baud, quot;
-	u32 ps, mul;
-
-	switch (termios->c_cflag & CSIZE) {
-	case CS5:
-		cval = UART_LCR_WLEN5;
-		break;
-	case CS6:
-		cval = UART_LCR_WLEN6;
-		break;
-	case CS7:
-		cval = UART_LCR_WLEN7;
-		break;
-	default:
-	case CS8:
-		cval = UART_LCR_WLEN8;
-		break;
-	}
-
-	/* CMSPAR isn't supported by this driver */
-	termios->c_cflag &= ~CMSPAR;
-
-	if (termios->c_cflag & CSTOPB)
-		cval |= UART_LCR_STOP;
-	if (termios->c_cflag & PARENB)
-		cval |= UART_LCR_PARITY;
-	if (!(termios->c_cflag & PARODD))
-		cval |= UART_LCR_EPAR;
-
-	/*
-	 * The base clk is 50Mhz, and the baud rate come from:
-	 *	baud = 50M * MUL / (DIV * PS * DLAB)
-	 *
-	 * For those basic low baud rate we can get the direct
-	 * scalar from 2746800, like 115200 = 2746800/24. For those
-	 * higher baud rate, we handle them case by case, mainly by
-	 * adjusting the MUL/PS registers, and DIV register is kept
-	 * as default value 0x3d09 to make things simple
-	 */
-	baud = uart_get_baud_rate(port, termios, old, 0, 4000000);
-
-	quot = 1;
-	ps = 0x10;
-	mul = 0x3600;
-	switch (baud) {
-	case 3500000:
-		mul = 0x3345;
-		ps = 0xC;
-		break;
-	case 1843200:
-		mul = 0x2400;
-		break;
-	case 3000000:
-	case 2500000:
-	case 2000000:
-	case 1500000:
-	case 1000000:
-	case 500000:
-		/* mul/ps/quot = 0x9C4/0x10/0x1 will make a 500000 bps */
-		mul = baud / 500000 * 0x9C4;
-		break;
-	default:
-		/* Use uart_get_divisor to get quot for other baud rates */
-		quot = 0;
-	}
-
-	if (!quot)
-		quot = uart_get_divisor(port, baud);
-
-	if ((up->port.uartclk / quot) < (2400 * 16))
-		fcr = UART_FCR_ENABLE_FIFO | UART_FCR_HSU_64_1B;
-	else if ((up->port.uartclk / quot) < (230400 * 16))
-		fcr = UART_FCR_ENABLE_FIFO | UART_FCR_HSU_64_16B;
-	else
-		fcr = UART_FCR_ENABLE_FIFO | UART_FCR_HSU_64_32B;
-
-	fcr |= UART_FCR_HSU_64B_FIFO;
-
-	/*
-	 * Ok, we're now changing the port state.  Do it with
-	 * interrupts disabled.
-	 */
-	spin_lock_irqsave(&up->port.lock, flags);
-
-	/* Update the per-port timeout */
-	uart_update_timeout(port, termios->c_cflag, baud);
-
-	up->port.read_status_mask = UART_LSR_OE | UART_LSR_THRE | UART_LSR_DR;
-	if (termios->c_iflag & INPCK)
-		up->port.read_status_mask |= UART_LSR_FE | UART_LSR_PE;
-	if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK))
-		up->port.read_status_mask |= UART_LSR_BI;
-
-	/* Characters to ignore */
-	up->port.ignore_status_mask = 0;
-	if (termios->c_iflag & IGNPAR)
-		up->port.ignore_status_mask |= UART_LSR_PE | UART_LSR_FE;
-	if (termios->c_iflag & IGNBRK) {
-		up->port.ignore_status_mask |= UART_LSR_BI;
-		/*
-		 * If we're ignoring parity and break indicators,
-		 * ignore overruns too (for real raw support).
-		 */
-		if (termios->c_iflag & IGNPAR)
-			up->port.ignore_status_mask |= UART_LSR_OE;
-	}
-
-	/* Ignore all characters if CREAD is not set */
-	if ((termios->c_cflag & CREAD) == 0)
-		up->port.ignore_status_mask |= UART_LSR_DR;
-
-	/*
-	 * CTS flow control flag and modem status interrupts, disable
-	 * MSI by default
-	 */
-	up->ier &= ~UART_IER_MSI;
-	if (UART_ENABLE_MS(&up->port, termios->c_cflag))
-		up->ier |= UART_IER_MSI;
-
-	serial_out(up, UART_IER, up->ier);
-
-	if (termios->c_cflag & CRTSCTS)
-		up->mcr |= UART_MCR_AFE | UART_MCR_RTS;
-	else
-		up->mcr &= ~UART_MCR_AFE;
-
-	serial_out(up, UART_LCR, cval | UART_LCR_DLAB);	/* set DLAB */
-	serial_out(up, UART_DLL, quot & 0xff);		/* LS of divisor */
-	serial_out(up, UART_DLM, quot >> 8);		/* MS of divisor */
-	serial_out(up, UART_LCR, cval);			/* reset DLAB */
-	serial_out(up, UART_MUL, mul);			/* set MUL */
-	serial_out(up, UART_PS, ps);			/* set PS */
-	up->lcr = cval;					/* Save LCR */
-	serial_hsu_set_mctrl(&up->port, up->port.mctrl);
-	serial_out(up, UART_FCR, fcr);
-	spin_unlock_irqrestore(&up->port.lock, flags);
-}
-
-static void
-serial_hsu_pm(struct uart_port *port, unsigned int state,
-	      unsigned int oldstate)
-{
-}
-
-static void serial_hsu_release_port(struct uart_port *port)
-{
-}
-
-static int serial_hsu_request_port(struct uart_port *port)
-{
-	return 0;
-}
-
-static void serial_hsu_config_port(struct uart_port *port, int flags)
-{
-	struct uart_hsu_port *up =
-		container_of(port, struct uart_hsu_port, port);
-	up->port.type = PORT_MFD;
-}
-
-static int
-serial_hsu_verify_port(struct uart_port *port, struct serial_struct *ser)
-{
-	/* We don't want the core code to modify any port params */
-	return -EINVAL;
-}
-
-static const char *
-serial_hsu_type(struct uart_port *port)
-{
-	struct uart_hsu_port *up =
-		container_of(port, struct uart_hsu_port, port);
-	return up->name;
-}
-
-/* Mainly for uart console use */
-static struct uart_hsu_port *serial_hsu_ports[3];
-static struct uart_driver serial_hsu_reg;
-
-#ifdef CONFIG_SERIAL_MFD_HSU_CONSOLE
-
-#define BOTH_EMPTY (UART_LSR_TEMT | UART_LSR_THRE)
-
-/* Wait for transmitter & holding register to empty */
-static inline void wait_for_xmitr(struct uart_hsu_port *up)
-{
-	unsigned int status, tmout = 1000;
-
-	/* Wait up to 1ms for the character to be sent. */
-	do {
-		status = serial_in(up, UART_LSR);
-
-		if (status & UART_LSR_BI)
-			up->lsr_break_flag = UART_LSR_BI;
-
-		if (--tmout == 0)
-			break;
-		udelay(1);
-	} while (!(status & BOTH_EMPTY));
-
-	/* Wait up to 1s for flow control if necessary */
-	if (up->port.flags & UPF_CONS_FLOW) {
-		tmout = 1000000;
-		while (--tmout &&
-		       ((serial_in(up, UART_MSR) & UART_MSR_CTS) == 0))
-			udelay(1);
-	}
-}
-
-static void serial_hsu_console_putchar(struct uart_port *port, int ch)
-{
-	struct uart_hsu_port *up =
-		container_of(port, struct uart_hsu_port, port);
-
-	wait_for_xmitr(up);
-	serial_out(up, UART_TX, ch);
-}
-
-/*
- * Print a string to the serial port trying not to disturb
- * any possible real use of the port...
- *
- *	The console_lock must be held when we get here.
- */
-static void
-serial_hsu_console_write(struct console *co, const char *s, unsigned int count)
-{
-	struct uart_hsu_port *up = serial_hsu_ports[co->index];
-	unsigned long flags;
-	unsigned int ier;
-	int locked = 1;
-
-	touch_nmi_watchdog();
-
-	local_irq_save(flags);
-	if (up->port.sysrq)
-		locked = 0;
-	else if (oops_in_progress) {
-		locked = spin_trylock(&up->port.lock);
-	} else
-		spin_lock(&up->port.lock);
-
-	/* First save the IER then disable the interrupts */
-	ier = serial_in(up, UART_IER);
-	serial_out(up, UART_IER, 0);
-
-	uart_console_write(&up->port, s, count, serial_hsu_console_putchar);
-
-	/*
-	 * Finally, wait for transmitter to become empty
-	 * and restore the IER
-	 */
-	wait_for_xmitr(up);
-	serial_out(up, UART_IER, ier);
-
-	if (locked)
-		spin_unlock(&up->port.lock);
-	local_irq_restore(flags);
-}
-
-static struct console serial_hsu_console;
-
-static int __init
-serial_hsu_console_setup(struct console *co, char *options)
-{
-	struct uart_hsu_port *up;
-	int baud = 115200;
-	int bits = 8;
-	int parity = 'n';
-	int flow = 'n';
-
-	if (co->index == -1 || co->index >= serial_hsu_reg.nr)
-		co->index = 0;
-	up = serial_hsu_ports[co->index];
-	if (!up)
-		return -ENODEV;
-
-	if (options)
-		uart_parse_options(options, &baud, &parity, &bits, &flow);
-
-	return uart_set_options(&up->port, co, baud, parity, bits, flow);
-}
-
-static struct console serial_hsu_console = {
-	.name		= "ttyMFD",
-	.write		= serial_hsu_console_write,
-	.device		= uart_console_device,
-	.setup		= serial_hsu_console_setup,
-	.flags		= CON_PRINTBUFFER,
-	.index		= -1,
-	.data		= &serial_hsu_reg,
-};
-
-#define SERIAL_HSU_CONSOLE	(&serial_hsu_console)
-#else
-#define SERIAL_HSU_CONSOLE	NULL
-#endif
-
-static struct uart_ops serial_hsu_pops = {
-	.tx_empty	= serial_hsu_tx_empty,
-	.set_mctrl	= serial_hsu_set_mctrl,
-	.get_mctrl	= serial_hsu_get_mctrl,
-	.stop_tx	= serial_hsu_stop_tx,
-	.start_tx	= serial_hsu_start_tx,
-	.stop_rx	= serial_hsu_stop_rx,
-	.enable_ms	= serial_hsu_enable_ms,
-	.break_ctl	= serial_hsu_break_ctl,
-	.startup	= serial_hsu_startup,
-	.shutdown	= serial_hsu_shutdown,
-	.set_termios	= serial_hsu_set_termios,
-	.pm		= serial_hsu_pm,
-	.type		= serial_hsu_type,
-	.release_port	= serial_hsu_release_port,
-	.request_port	= serial_hsu_request_port,
-	.config_port	= serial_hsu_config_port,
-	.verify_port	= serial_hsu_verify_port,
-};
-
-static struct uart_driver serial_hsu_reg = {
-	.owner		= THIS_MODULE,
-	.driver_name	= "MFD serial",
-	.dev_name	= "ttyMFD",
-	.major		= TTY_MAJOR,
-	.minor		= 128,
-	.nr		= 3,
-	.cons		= SERIAL_HSU_CONSOLE,
-};
-
-#ifdef CONFIG_PM
-static int serial_hsu_suspend(struct pci_dev *pdev, pm_message_t state)
-{
-	void *priv = pci_get_drvdata(pdev);
-	struct uart_hsu_port *up;
-
-	/* Make sure this is not the internal dma controller */
-	if (priv && (pdev->device != 0x081E)) {
-		up = priv;
-		uart_suspend_port(&serial_hsu_reg, &up->port);
-	}
-
-	pci_save_state(pdev);
-	pci_set_power_state(pdev, pci_choose_state(pdev, state));
-        return 0;
-}
-
-static int serial_hsu_resume(struct pci_dev *pdev)
-{
-	void *priv = pci_get_drvdata(pdev);
-	struct uart_hsu_port *up;
-	int ret;
-
-	pci_set_power_state(pdev, PCI_D0);
-	pci_restore_state(pdev);
-
-	ret = pci_enable_device(pdev);
-	if (ret)
-		dev_warn(&pdev->dev,
-			"HSU: can't re-enable device, try to continue\n");
-
-	if (priv && (pdev->device != 0x081E)) {
-		up = priv;
-		uart_resume_port(&serial_hsu_reg, &up->port);
-	}
-	return 0;
-}
-
-static int serial_hsu_runtime_idle(struct device *dev)
-{
-	pm_schedule_suspend(dev, 500);
-	return -EBUSY;
-}
-
-static int serial_hsu_runtime_suspend(struct device *dev)
-{
-	return 0;
-}
-
-static int serial_hsu_runtime_resume(struct device *dev)
-{
-	return 0;
-}
-#else
-#define serial_hsu_suspend		NULL
-#define serial_hsu_resume		NULL
-#define serial_hsu_runtime_idle		NULL
-#define serial_hsu_runtime_suspend	NULL
-#define serial_hsu_runtime_resume	NULL
-#endif
-
-static const struct dev_pm_ops serial_hsu_pm_ops = {
-	.runtime_suspend = serial_hsu_runtime_suspend,
-	.runtime_resume = serial_hsu_runtime_resume,
-	.runtime_idle = serial_hsu_runtime_idle,
-};
-
-/* temp global pointer before we settle down on using one or four PCI dev */
-static struct hsu_port *phsu;
-
-static int serial_hsu_probe(struct pci_dev *pdev,
-				const struct pci_device_id *ent)
-{
-	struct uart_hsu_port *uport;
-	int index, ret;
-
-	printk(KERN_INFO "HSU: found PCI Serial controller(ID: %04x:%04x)\n",
-		pdev->vendor, pdev->device);
-
-	switch (pdev->device) {
-	case 0x081B:
-		index = 0;
-		break;
-	case 0x081C:
-		index = 1;
-		break;
-	case 0x081D:
-		index = 2;
-		break;
-	case 0x081E:
-		/* internal DMA controller */
-		index = 3;
-		break;
-	default:
-		dev_err(&pdev->dev, "HSU: out of index!");
-		return -ENODEV;
-	}
-
-	ret = pci_enable_device(pdev);
-	if (ret)
-		return ret;
-
-	if (index == 3) {
-		/* DMA controller */
-		ret = request_irq(pdev->irq, dma_irq, 0, "hsu_dma", phsu);
-		if (ret) {
-			dev_err(&pdev->dev, "can not get IRQ\n");
-			goto err_disable;
-		}
-		pci_set_drvdata(pdev, phsu);
-	} else {
-		/* UART port 0~2 */
-		uport = &phsu->port[index];
-		uport->port.irq = pdev->irq;
-		uport->port.dev = &pdev->dev;
-		uport->dev = &pdev->dev;
-
-		ret = request_irq(pdev->irq, port_irq, 0, uport->name, uport);
-		if (ret) {
-			dev_err(&pdev->dev, "can not get IRQ\n");
-			goto err_disable;
-		}
-		uart_add_one_port(&serial_hsu_reg, &uport->port);
-
-		pci_set_drvdata(pdev, uport);
-	}
-
-	pm_runtime_put_noidle(&pdev->dev);
-	pm_runtime_allow(&pdev->dev);
-
-	return 0;
-
-err_disable:
-	pci_disable_device(pdev);
-	return ret;
-}
-
-static void hsu_global_init(void)
-{
-	struct hsu_port *hsu;
-	struct uart_hsu_port *uport;
-	struct hsu_dma_chan *dchan;
-	int i, ret;
-
-	hsu = kzalloc(sizeof(struct hsu_port), GFP_KERNEL);
-	if (!hsu)
-		return;
-
-	/* Get basic io resource and map it */
-	hsu->paddr = 0xffa28000;
-	hsu->iolen = 0x1000;
-
-	if (!(request_mem_region(hsu->paddr, hsu->iolen, "HSU global")))
-		pr_warn("HSU: error in request mem region\n");
-
-	hsu->reg = ioremap_nocache((unsigned long)hsu->paddr, hsu->iolen);
-	if (!hsu->reg) {
-		pr_err("HSU: error in ioremap\n");
-		ret = -ENOMEM;
-		goto err_free_region;
-	}
-
-	/* Initialise the 3 UART ports */
-	uport = hsu->port;
-	for (i = 0; i < 3; i++) {
-		uport->port.type = PORT_MFD;
-		uport->port.iotype = UPIO_MEM;
-		uport->port.mapbase = (resource_size_t)hsu->paddr
-					+ HSU_PORT_REG_OFFSET
-					+ i * HSU_PORT_REG_LENGTH;
-		uport->port.membase = hsu->reg + HSU_PORT_REG_OFFSET
-					+ i * HSU_PORT_REG_LENGTH;
-
-		sprintf(uport->name, "hsu_port%d", i);
-		uport->port.fifosize = 64;
-		uport->port.ops = &serial_hsu_pops;
-		uport->port.line = i;
-		uport->port.flags = UPF_IOREMAP;
-		/* set the scalable maxim support rate to 2746800 bps */
-		uport->port.uartclk = 115200 * 24 * 16;
-
-		uport->running = 0;
-		uport->txc = &hsu->chans[i * 2];
-		uport->rxc = &hsu->chans[i * 2 + 1];
-
-		serial_hsu_ports[i] = uport;
-		uport->index = i;
-
-		if (hsu_dma_enable & (1<<i))
-			uport->use_dma = 1;
-		else
-			uport->use_dma = 0;
-
-		uport++;
-	}
-
-	/* Initialise 6 dma channels */
-	dchan = hsu->chans;
-	for (i = 0; i < 6; i++) {
-		dchan->id = i;
-		dchan->dirt = (i & 0x1) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
-		dchan->uport = &hsu->port[i/2];
-		dchan->reg = hsu->reg + HSU_DMA_CHANS_REG_OFFSET +
-				i * HSU_DMA_CHANS_REG_LENGTH;
-
-		dchan++;
-	}
-
-	phsu = hsu;
-	hsu_debugfs_init(hsu);
-	return;
-
-err_free_region:
-	release_mem_region(hsu->paddr, hsu->iolen);
-	kfree(hsu);
-	return;
-}
-
-static void serial_hsu_remove(struct pci_dev *pdev)
-{
-	void *priv = pci_get_drvdata(pdev);
-	struct uart_hsu_port *up;
-
-	if (!priv)
-		return;
-
-	pm_runtime_forbid(&pdev->dev);
-	pm_runtime_get_noresume(&pdev->dev);
-
-	/* For port 0/1/2, priv is the address of uart_hsu_port */
-	if (pdev->device != 0x081E) {
-		up = priv;
-		uart_remove_one_port(&serial_hsu_reg, &up->port);
-	}
-
-	free_irq(pdev->irq, priv);
-	pci_disable_device(pdev);
-}
-
-/* First 3 are UART ports, and the 4th is the DMA */
-static const struct pci_device_id pci_ids[] = {
-	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x081B) },
-	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x081C) },
-	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x081D) },
-	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x081E) },
-	{},
-};
-
-static struct pci_driver hsu_pci_driver = {
-	.name =		"HSU serial",
-	.id_table =	pci_ids,
-	.probe =	serial_hsu_probe,
-	.remove =	serial_hsu_remove,
-	.suspend =	serial_hsu_suspend,
-	.resume	=	serial_hsu_resume,
-	.driver = {
-		.pm = &serial_hsu_pm_ops,
-	},
-};
-
-static int __init hsu_pci_init(void)
-{
-	int ret;
-
-	hsu_global_init();
-
-	ret = uart_register_driver(&serial_hsu_reg);
-	if (ret)
-		return ret;
-
-	return pci_register_driver(&hsu_pci_driver);
-}
-
-static void __exit hsu_pci_exit(void)
-{
-	pci_unregister_driver(&hsu_pci_driver);
-	uart_unregister_driver(&serial_hsu_reg);
-
-	hsu_debugfs_remove(phsu);
-
-	kfree(phsu);
-}
-
-module_init(hsu_pci_init);
-module_exit(hsu_pci_exit);
-
-MODULE_LICENSE("GPL v2");
-MODULE_DEVICE_TABLE(pci, pci_ids);
diff --git a/include/linux/serial_mfd.h b/include/linux/serial_mfd.h
deleted file mode 100644
index 2b071e0b034d..000000000000
--- a/include/linux/serial_mfd.h
+++ /dev/null
@@ -1,47 +0,0 @@
-#ifndef _SERIAL_MFD_H_
-#define _SERIAL_MFD_H_
-
-/* HW register offset definition */
-#define UART_FOR	0x08
-#define UART_PS		0x0C
-#define UART_MUL	0x0D
-#define UART_DIV	0x0E
-
-#define HSU_GBL_IEN	0x0
-#define HSU_GBL_IST	0x4
-
-#define HSU_GBL_INT_BIT_PORT0	0x0
-#define HSU_GBL_INT_BIT_PORT1	0x1
-#define HSU_GBL_INT_BIT_PORT2	0x2
-#define HSU_GBL_INT_BIT_IRI	0x3
-#define HSU_GBL_INT_BIT_HDLC	0x4
-#define HSU_GBL_INT_BIT_DMA	0x5
-
-#define HSU_GBL_ISR	0x8
-#define HSU_GBL_DMASR	0x400
-#define HSU_GBL_DMAISR	0x404
-
-#define HSU_PORT_REG_OFFSET	0x80
-#define HSU_PORT0_REG_OFFSET	0x80
-#define HSU_PORT1_REG_OFFSET	0x100
-#define HSU_PORT2_REG_OFFSET	0x180
-#define HSU_PORT_REG_LENGTH	0x80
-
-#define HSU_DMA_CHANS_REG_OFFSET	0x500
-#define HSU_DMA_CHANS_REG_LENGTH	0x40
-
-#define HSU_CH_SR		0x0	/* channel status reg */
-#define HSU_CH_CR		0x4	/* control reg */
-#define HSU_CH_DCR		0x8	/* descriptor control reg */
-#define HSU_CH_BSR		0x10	/* max fifo buffer size reg */
-#define HSU_CH_MOTSR		0x14	/* minimum ocp transfer size */
-#define HSU_CH_D0SAR		0x20	/* desc 0 start addr */
-#define HSU_CH_D0TSR		0x24	/* desc 0 transfer size */
-#define HSU_CH_D1SAR		0x28
-#define HSU_CH_D1TSR		0x2C
-#define HSU_CH_D2SAR		0x30
-#define HSU_CH_D2TSR		0x34
-#define HSU_CH_D3SAR		0x38
-#define HSU_CH_D3TSR		0x3C
-
-#endif
diff --git a/include/uapi/linux/serial_reg.h b/include/uapi/linux/serial_reg.h
index 00adb01fa5f3..e9b4cb0cd7ed 100644
--- a/include/uapi/linux/serial_reg.h
+++ b/include/uapi/linux/serial_reg.h
@@ -241,25 +241,6 @@
 #define UART_FCR_PXAR16	0x80	/* receive FIFO threshold = 16 */
 #define UART_FCR_PXAR32	0xc0	/* receive FIFO threshold = 32 */
 
-/*
- * Intel MID on-chip HSU (High Speed UART) defined bits
- */
-#define UART_FCR_HSU_64_1B	0x00	/* receive FIFO treshold = 1 */
-#define UART_FCR_HSU_64_16B	0x40	/* receive FIFO treshold = 16 */
-#define UART_FCR_HSU_64_32B	0x80	/* receive FIFO treshold = 32 */
-#define UART_FCR_HSU_64_56B	0xc0	/* receive FIFO treshold = 56 */
-
-#define UART_FCR_HSU_16_1B	0x00	/* receive FIFO treshold = 1 */
-#define UART_FCR_HSU_16_4B	0x40	/* receive FIFO treshold = 4 */
-#define UART_FCR_HSU_16_8B	0x80	/* receive FIFO treshold = 8 */
-#define UART_FCR_HSU_16_14B	0xc0	/* receive FIFO treshold = 14 */
-
-#define UART_FCR_HSU_64B_FIFO	0x20	/* chose 64 bytes FIFO */
-#define UART_FCR_HSU_16B_FIFO	0x00	/* chose 16 bytes FIFO */
-
-#define UART_FCR_HALF_EMPT_TXI	0x00	/* trigger TX_EMPT IRQ for half empty */
-#define UART_FCR_FULL_EMPT_TXI	0x08	/* trigger TX_EMPT IRQ for full empty */
-
 /*
  * These register definitions are for the 16C950
  */
-- 
cgit 


From c93682477bd861744589215515a63b81fdbd8948 Mon Sep 17 00:00:00 2001
From: Shani Michaeli <shanim@mellanox.com>
Date: Thu, 5 Mar 2015 20:16:11 +0200
Subject: net/dcb: Add IEEE QCN attribute

As specified in 802.1Qau spec. Add this optional attribute to the
DCB netlink layer. To allow for application to use the new attribute,
NIC drivers should implement and register the  callbacks ieee_getqcn,
ieee_setqcn and ieee_getqcnstats.

The QCN attribute holds a set of parameters for management, and
a set of statistics to provide informative data on Congestion-Control
defined by this spec.

Signed-off-by: Shani Michaeli <shanim@mellanox.com>
Signed-off-by: Shachar Raindel <raindel@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Acked-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dcbnl.h        |  3 +++
 include/uapi/linux/dcbnl.h | 66 ++++++++++++++++++++++++++++++++++++++++++++++
 net/dcb/dcbnl.c            | 44 ++++++++++++++++++++++++++++---
 3 files changed, 110 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/dcbnl.h b/include/net/dcbnl.h
index 597b88a94332..207d9ba1f92c 100644
--- a/include/net/dcbnl.h
+++ b/include/net/dcbnl.h
@@ -49,6 +49,9 @@ struct dcbnl_rtnl_ops {
 	int (*ieee_setets) (struct net_device *, struct ieee_ets *);
 	int (*ieee_getmaxrate) (struct net_device *, struct ieee_maxrate *);
 	int (*ieee_setmaxrate) (struct net_device *, struct ieee_maxrate *);
+	int (*ieee_getqcn) (struct net_device *, struct ieee_qcn *);
+	int (*ieee_setqcn) (struct net_device *, struct ieee_qcn *);
+	int (*ieee_getqcnstats) (struct net_device *, struct ieee_qcn_stats *);
 	int (*ieee_getpfc) (struct net_device *, struct ieee_pfc *);
 	int (*ieee_setpfc) (struct net_device *, struct ieee_pfc *);
 	int (*ieee_getapp) (struct net_device *, struct dcb_app *);
diff --git a/include/uapi/linux/dcbnl.h b/include/uapi/linux/dcbnl.h
index e711f20dc522..6497d7933d5b 100644
--- a/include/uapi/linux/dcbnl.h
+++ b/include/uapi/linux/dcbnl.h
@@ -78,6 +78,70 @@ struct ieee_maxrate {
 	__u64	tc_maxrate[IEEE_8021QAZ_MAX_TCS];
 };
 
+enum dcbnl_cndd_states {
+	DCB_CNDD_RESET = 0,
+	DCB_CNDD_EDGE,
+	DCB_CNDD_INTERIOR,
+	DCB_CNDD_INTERIOR_READY,
+};
+
+/* This structure contains the IEEE 802.1Qau QCN managed object.
+ *
+ *@rpg_enable: enable QCN RP
+ *@rppp_max_rps: maximum number of RPs allowed for this CNPV on this port
+ *@rpg_time_reset: time between rate increases if no CNMs received.
+ *		   given in u-seconds
+ *@rpg_byte_reset: transmitted data between rate increases if no CNMs received.
+ *		   given in Bytes
+ *@rpg_threshold: The number of times rpByteStage or rpTimeStage can count
+ *		   before RP rate control state machine advances states
+ *@rpg_max_rate: the maxinun rate, in Mbits per second,
+ *		 at which an RP can transmit
+ *@rpg_ai_rate: The rate, in Mbits per second,
+ *		used to increase rpTargetRate in the RPR_ACTIVE_INCREASE
+ *@rpg_hai_rate: The rate, in Mbits per second,
+ *		 used to increase rpTargetRate in the RPR_HYPER_INCREASE state
+ *@rpg_gd: Upon CNM receive, flow rate is limited to (Fb/Gd)*CurrentRate.
+ *	   rpgGd is given as log2(Gd), where Gd may only be powers of 2
+ *@rpg_min_dec_fac: The minimum factor by which the current transmit rate
+ *		    can be changed by reception of a CNM.
+ *		    value is given as percentage (1-100)
+ *@rpg_min_rate: The minimum value, in bits per second, for rate to limit
+ *@cndd_state_machine: The state of the congestion notification domain
+ *		       defense state machine, as defined by IEEE 802.3Qau
+ *		       section 32.1.1. In the interior ready state,
+ *		       the QCN capable hardware may add CN-TAG TLV to the
+ *		       outgoing traffic, to specifically identify outgoing
+ *		       flows.
+ */
+
+struct ieee_qcn {
+	__u8 rpg_enable[IEEE_8021QAZ_MAX_TCS];
+	__u32 rppp_max_rps[IEEE_8021QAZ_MAX_TCS];
+	__u32 rpg_time_reset[IEEE_8021QAZ_MAX_TCS];
+	__u32 rpg_byte_reset[IEEE_8021QAZ_MAX_TCS];
+	__u32 rpg_threshold[IEEE_8021QAZ_MAX_TCS];
+	__u32 rpg_max_rate[IEEE_8021QAZ_MAX_TCS];
+	__u32 rpg_ai_rate[IEEE_8021QAZ_MAX_TCS];
+	__u32 rpg_hai_rate[IEEE_8021QAZ_MAX_TCS];
+	__u32 rpg_gd[IEEE_8021QAZ_MAX_TCS];
+	__u32 rpg_min_dec_fac[IEEE_8021QAZ_MAX_TCS];
+	__u32 rpg_min_rate[IEEE_8021QAZ_MAX_TCS];
+	__u32 cndd_state_machine[IEEE_8021QAZ_MAX_TCS];
+};
+
+/* This structure contains the IEEE 802.1Qau QCN statistics.
+ *
+ *@rppp_rp_centiseconds: the number of RP-centiseconds accumulated
+ *			 by RPs at this priority level on this Port
+ *@rppp_created_rps: number of active RPs(flows) that react to CNMs
+ */
+
+struct ieee_qcn_stats {
+	__u64 rppp_rp_centiseconds[IEEE_8021QAZ_MAX_TCS];
+	__u32 rppp_created_rps[IEEE_8021QAZ_MAX_TCS];
+};
+
 /* This structure contains the IEEE 802.1Qaz PFC managed object
  *
  * @pfc_cap: Indicates the number of traffic classes on the local device
@@ -334,6 +398,8 @@ enum ieee_attrs {
 	DCB_ATTR_IEEE_PEER_PFC,
 	DCB_ATTR_IEEE_PEER_APP,
 	DCB_ATTR_IEEE_MAXRATE,
+	DCB_ATTR_IEEE_QCN,
+	DCB_ATTR_IEEE_QCN_STATS,
 	__DCB_ATTR_IEEE_MAX
 };
 #define DCB_ATTR_IEEE_MAX (__DCB_ATTR_IEEE_MAX - 1)
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index 93ea80196f0e..5b21f6f88e97 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -177,6 +177,8 @@ static const struct nla_policy dcbnl_ieee_policy[DCB_ATTR_IEEE_MAX + 1] = {
 	[DCB_ATTR_IEEE_PFC]	    = {.len = sizeof(struct ieee_pfc)},
 	[DCB_ATTR_IEEE_APP_TABLE]   = {.type = NLA_NESTED},
 	[DCB_ATTR_IEEE_MAXRATE]   = {.len = sizeof(struct ieee_maxrate)},
+	[DCB_ATTR_IEEE_QCN]         = {.len = sizeof(struct ieee_qcn)},
+	[DCB_ATTR_IEEE_QCN_STATS]   = {.len = sizeof(struct ieee_qcn_stats)},
 };
 
 static const struct nla_policy dcbnl_ieee_app[DCB_ATTR_IEEE_APP_MAX + 1] = {
@@ -1030,7 +1032,7 @@ nla_put_failure:
 	return err;
 }
 
-/* Handle IEEE 802.1Qaz GET commands. */
+/* Handle IEEE 802.1Qaz/802.1Qau/802.1Qbb GET commands. */
 static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev)
 {
 	struct nlattr *ieee, *app;
@@ -1067,6 +1069,32 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev)
 		}
 	}
 
+	if (ops->ieee_getqcn) {
+		struct ieee_qcn qcn;
+
+		memset(&qcn, 0, sizeof(qcn));
+		err = ops->ieee_getqcn(netdev, &qcn);
+		if (!err) {
+			err = nla_put(skb, DCB_ATTR_IEEE_QCN,
+				      sizeof(qcn), &qcn);
+			if (err)
+				return -EMSGSIZE;
+		}
+	}
+
+	if (ops->ieee_getqcnstats) {
+		struct ieee_qcn_stats qcn_stats;
+
+		memset(&qcn_stats, 0, sizeof(qcn_stats));
+		err = ops->ieee_getqcnstats(netdev, &qcn_stats);
+		if (!err) {
+			err = nla_put(skb, DCB_ATTR_IEEE_QCN_STATS,
+				      sizeof(qcn_stats), &qcn_stats);
+			if (err)
+				return -EMSGSIZE;
+		}
+	}
+
 	if (ops->ieee_getpfc) {
 		struct ieee_pfc pfc;
 		memset(&pfc, 0, sizeof(pfc));
@@ -1379,8 +1407,9 @@ int dcbnl_cee_notify(struct net_device *dev, int event, int cmd,
 }
 EXPORT_SYMBOL(dcbnl_cee_notify);
 
-/* Handle IEEE 802.1Qaz SET commands. If any requested operation can not
- * be completed the entire msg is aborted and error value is returned.
+/* Handle IEEE 802.1Qaz/802.1Qau/802.1Qbb SET commands.
+ * If any requested operation can not be completed
+ * the entire msg is aborted and error value is returned.
  * No attempt is made to reconcile the case where only part of the
  * cmd can be completed.
  */
@@ -1417,6 +1446,15 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh,
 			goto err;
 	}
 
+	if (ieee[DCB_ATTR_IEEE_QCN] && ops->ieee_setqcn) {
+		struct ieee_qcn *qcn =
+			nla_data(ieee[DCB_ATTR_IEEE_QCN]);
+
+		err = ops->ieee_setqcn(netdev, qcn);
+		if (err)
+			goto err;
+	}
+
 	if (ieee[DCB_ATTR_IEEE_PFC] && ops->ieee_setpfc) {
 		struct ieee_pfc *pfc = nla_data(ieee[DCB_ATTR_IEEE_PFC]);
 		err = ops->ieee_setpfc(netdev, pfc);
-- 
cgit 


From c78ba6d64c78634a875d1e316676667cabfea256 Mon Sep 17 00:00:00 2001
From: Lubomir Rintel <lkundrak@v3.sk>
Date: Wed, 11 Mar 2015 15:39:21 +0100
Subject: ipv6: expose RFC4191 route preference via rtnetlink

This makes it possible to retain the route preference when RAs are handled in
userspace.

Signed-off-by: Lubomir Rintel <lkundrak@v3.sk>
Reviewed-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/rtnetlink.h |  1 +
 net/ipv6/route.c               | 16 +++++++++++++++-
 2 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index c3722b024e73..bea910f924dd 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -305,6 +305,7 @@ enum rtattr_type_t {
 	RTA_MFC_STATS,
 	RTA_VIA,
 	RTA_NEWDST,
+	RTA_PREF,
 	__RTA_MAX
 };
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 06fa819c43c9..58c0e6a4d15d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2398,6 +2398,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
 	[RTA_PRIORITY]          = { .type = NLA_U32 },
 	[RTA_METRICS]           = { .type = NLA_NESTED },
 	[RTA_MULTIPATH]		= { .len = sizeof(struct rtnexthop) },
+	[RTA_PREF]              = { .type = NLA_U8 },
 };
 
 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -2405,6 +2406,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
 {
 	struct rtmsg *rtm;
 	struct nlattr *tb[RTA_MAX+1];
+	unsigned int pref;
 	int err;
 
 	err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
@@ -2480,6 +2482,14 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
 		cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]);
 	}
 
+	if (tb[RTA_PREF]) {
+		pref = nla_get_u8(tb[RTA_PREF]);
+		if (pref != ICMPV6_ROUTER_PREF_LOW &&
+		    pref != ICMPV6_ROUTER_PREF_HIGH)
+			pref = ICMPV6_ROUTER_PREF_MEDIUM;
+		cfg->fc_flags |= RTF_PREF(pref);
+	}
+
 	err = 0;
 errout:
 	return err;
@@ -2583,7 +2593,8 @@ static inline size_t rt6_nlmsg_size(void)
 	       + nla_total_size(4) /* RTA_PRIORITY */
 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
 	       + nla_total_size(sizeof(struct rta_cacheinfo))
-	       + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */
+	       + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */
+	       + nla_total_size(1); /* RTA_PREF */
 }
 
 static int rt6_fill_node(struct net *net,
@@ -2724,6 +2735,9 @@ static int rt6_fill_node(struct net *net,
 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
 		goto nla_put_failure;
 
+	if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags)))
+		goto nla_put_failure;
+
 	nlmsg_end(skb, nlh);
 	return 0;
 
-- 
cgit 


From 03e69b508b6f7c51743055c9f61d1dfeadf4b635 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 14 Mar 2015 02:27:16 +0100
Subject: ebpf: add prandom helper for packet sampling

This work is similar to commit 4cd3675ebf74 ("filter: added BPF
random opcode") and adds a possibility for packet sampling in eBPF.

Currently, this is only possible in classic BPF and useful to
combine sampling with f.e. packet sockets, possible also with tc.

Example function proto-type looks like:

  u32 (*prandom_u32)(void) = (void *)BPF_FUNC_get_prandom_u32;

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h      |  2 ++
 include/uapi/linux/bpf.h |  1 +
 kernel/bpf/core.c        |  2 ++
 kernel/bpf/helpers.c     | 12 ++++++++++++
 net/core/filter.c        |  2 ++
 5 files changed, 19 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 80f2e0fc3d02..50bf95e29a96 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -154,4 +154,6 @@ extern const struct bpf_func_proto bpf_map_lookup_elem_proto;
 extern const struct bpf_func_proto bpf_map_update_elem_proto;
 extern const struct bpf_func_proto bpf_map_delete_elem_proto;
 
+extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
+
 #endif /* _LINUX_BPF_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 3fa1af8a58d7..1c2ca2b477c8 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -165,6 +165,7 @@ enum bpf_func_id {
 	BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */
 	BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */
 	BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */
+	BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 50603aec766a..c1dbbb5d289b 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -661,6 +661,8 @@ const struct bpf_func_proto bpf_map_lookup_elem_proto __weak;
 const struct bpf_func_proto bpf_map_update_elem_proto __weak;
 const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
 
+const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
+
 /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call
  * skb_copy_bits(), so provide a weak definition of it for NET-less config.
  */
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index a3c7701a8b5e..95eb59a045ea 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -11,6 +11,7 @@
  */
 #include <linux/bpf.h>
 #include <linux/rcupdate.h>
+#include <linux/random.h>
 
 /* If kernel subsystem is allowing eBPF programs to call this function,
  * inside its own verifier_ops->get_func_proto() callback it should return
@@ -87,3 +88,14 @@ const struct bpf_func_proto bpf_map_delete_elem_proto = {
 	.arg1_type = ARG_CONST_MAP_PTR,
 	.arg2_type = ARG_PTR_TO_MAP_KEY,
 };
+
+static u64 bpf_get_prandom_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	return prandom_u32();
+}
+
+const struct bpf_func_proto bpf_get_prandom_u32_proto = {
+	.func		= bpf_get_prandom_u32,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+};
diff --git a/net/core/filter.c b/net/core/filter.c
index 7a4eb7030dba..4344db39af2e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1139,6 +1139,8 @@ sk_filter_func_proto(enum bpf_func_id func_id)
 		return &bpf_map_update_elem_proto;
 	case BPF_FUNC_map_delete_elem:
 		return &bpf_map_delete_elem_proto;
+	case BPF_FUNC_get_prandom_u32:
+		return &bpf_get_prandom_u32_proto;
 	default:
 		return NULL;
 	}
-- 
cgit 


From c04167ce2ca0ecaeaafef006cb0d65cf01b68e42 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 14 Mar 2015 02:27:17 +0100
Subject: ebpf: add helper for obtaining current processor id

This patch adds the possibility to obtain raw_smp_processor_id() in
eBPF. Currently, this is only possible in classic BPF where commit
da2033c28226 ("filter: add SKF_AD_RXHASH and SKF_AD_CPU") has added
facilities for this.

Perhaps most importantly, this would also allow us to track per CPU
statistics with eBPF maps, or to implement a poor-man's per CPU data
structure through eBPF maps.

Example function proto-type looks like:

  u32 (*smp_processor_id)(void) = (void *)BPF_FUNC_get_smp_processor_id;

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h      |  1 +
 include/uapi/linux/bpf.h |  1 +
 kernel/bpf/core.c        |  1 +
 kernel/bpf/helpers.c     | 12 ++++++++++++
 net/core/filter.c        |  2 ++
 5 files changed, 17 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 50bf95e29a96..30bfd331882a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -155,5 +155,6 @@ extern const struct bpf_func_proto bpf_map_update_elem_proto;
 extern const struct bpf_func_proto bpf_map_delete_elem_proto;
 
 extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
+extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
 
 #endif /* _LINUX_BPF_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1c2ca2b477c8..de1f63668daf 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -166,6 +166,7 @@ enum bpf_func_id {
 	BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */
 	BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */
 	BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */
+	BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index c1dbbb5d289b..4139a0f8b558 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -662,6 +662,7 @@ const struct bpf_func_proto bpf_map_update_elem_proto __weak;
 const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
 
 const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
+const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
 
 /* To execute LD_ABS/LD_IND instructions __bpf_prog_run() may call
  * skb_copy_bits(), so provide a weak definition of it for NET-less config.
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 95eb59a045ea..bd7f5988ed9c 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -12,6 +12,7 @@
 #include <linux/bpf.h>
 #include <linux/rcupdate.h>
 #include <linux/random.h>
+#include <linux/smp.h>
 
 /* If kernel subsystem is allowing eBPF programs to call this function,
  * inside its own verifier_ops->get_func_proto() callback it should return
@@ -99,3 +100,14 @@ const struct bpf_func_proto bpf_get_prandom_u32_proto = {
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
 };
+
+static u64 bpf_get_smp_processor_id(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	return raw_smp_processor_id();
+}
+
+const struct bpf_func_proto bpf_get_smp_processor_id_proto = {
+	.func		= bpf_get_smp_processor_id,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+};
diff --git a/net/core/filter.c b/net/core/filter.c
index 4344db39af2e..33310eee6134 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1141,6 +1141,8 @@ sk_filter_func_proto(enum bpf_func_id func_id)
 		return &bpf_map_delete_elem_proto;
 	case BPF_FUNC_get_prandom_u32:
 		return &bpf_get_prandom_u32_proto;
+	case BPF_FUNC_get_smp_processor_id:
+		return &bpf_get_smp_processor_id_proto;
 	default:
 		return NULL;
 	}
-- 
cgit 


From 9bac3d6d548e5cc925570b263f35b70a00a00ffd Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 13 Mar 2015 11:57:42 -0700
Subject: bpf: allow extended BPF programs access skb fields

introduce user accessible mirror of in-kernel 'struct sk_buff':
struct __sk_buff {
    __u32 len;
    __u32 pkt_type;
    __u32 mark;
    __u32 queue_mapping;
};

bpf programs can do:

int bpf_prog(struct __sk_buff *skb)
{
    __u32 var = skb->pkt_type;

which will be compiled to bpf assembler as:

dst_reg = *(u32 *)(src_reg + 4) // 4 == offsetof(struct __sk_buff, pkt_type)

bpf verifier will check validity of access and will convert it to:

dst_reg = *(u8 *)(src_reg + offsetof(struct sk_buff, __pkt_type_offset))
dst_reg &= 7

since skb->pkt_type is a bitfield.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h      |   5 +-
 include/uapi/linux/bpf.h |  10 ++++
 kernel/bpf/syscall.c     |   2 +-
 kernel/bpf/verifier.c    | 152 ++++++++++++++++++++++++++++++++++++++++++-----
 net/core/filter.c        | 100 +++++++++++++++++++++++++------
 5 files changed, 234 insertions(+), 35 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 30bfd331882a..280a315de8d6 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -103,6 +103,9 @@ struct bpf_verifier_ops {
 	 * with 'type' (read or write) is allowed
 	 */
 	bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
+
+	u32 (*convert_ctx_access)(int dst_reg, int src_reg, int ctx_off,
+				  struct bpf_insn *insn);
 };
 
 struct bpf_prog_type_list {
@@ -133,7 +136,7 @@ struct bpf_map *bpf_map_get(struct fd f);
 void bpf_map_put(struct bpf_map *map);
 
 /* verify correctness of eBPF program */
-int bpf_check(struct bpf_prog *fp, union bpf_attr *attr);
+int bpf_check(struct bpf_prog **fp, union bpf_attr *attr);
 #else
 static inline void bpf_register_prog_type(struct bpf_prog_type_list *tl)
 {
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index de1f63668daf..929545a27546 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -170,4 +170,14 @@ enum bpf_func_id {
 	__BPF_FUNC_MAX_ID,
 };
 
+/* user accessible mirror of in-kernel sk_buff.
+ * new fields can only be added to the end of this structure
+ */
+struct __sk_buff {
+	__u32 len;
+	__u32 pkt_type;
+	__u32 mark;
+	__u32 queue_mapping;
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 669719ccc9ee..ea75c654af1b 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -519,7 +519,7 @@ static int bpf_prog_load(union bpf_attr *attr)
 		goto free_prog;
 
 	/* run eBPF verifier */
-	err = bpf_check(prog, attr);
+	err = bpf_check(&prog, attr);
 	if (err < 0)
 		goto free_used_maps;
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index e6b522496250..c22ebd36fa4b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1620,11 +1620,10 @@ static int do_check(struct verifier_env *env)
 				return err;
 
 		} else if (class == BPF_LDX) {
-			if (BPF_MODE(insn->code) != BPF_MEM ||
-			    insn->imm != 0) {
-				verbose("BPF_LDX uses reserved fields\n");
-				return -EINVAL;
-			}
+			enum bpf_reg_type src_reg_type;
+
+			/* check for reserved fields is already done */
+
 			/* check src operand */
 			err = check_reg_arg(regs, insn->src_reg, SRC_OP);
 			if (err)
@@ -1643,6 +1642,29 @@ static int do_check(struct verifier_env *env)
 			if (err)
 				return err;
 
+			src_reg_type = regs[insn->src_reg].type;
+
+			if (insn->imm == 0 && BPF_SIZE(insn->code) == BPF_W) {
+				/* saw a valid insn
+				 * dst_reg = *(u32 *)(src_reg + off)
+				 * use reserved 'imm' field to mark this insn
+				 */
+				insn->imm = src_reg_type;
+
+			} else if (src_reg_type != insn->imm &&
+				   (src_reg_type == PTR_TO_CTX ||
+				    insn->imm == PTR_TO_CTX)) {
+				/* ABuser program is trying to use the same insn
+				 * dst_reg = *(u32*) (src_reg + off)
+				 * with different pointer types:
+				 * src_reg == ctx in one branch and
+				 * src_reg == stack|map in some other branch.
+				 * Reject it.
+				 */
+				verbose("same insn cannot be used with different pointers\n");
+				return -EINVAL;
+			}
+
 		} else if (class == BPF_STX) {
 			if (BPF_MODE(insn->code) == BPF_XADD) {
 				err = check_xadd(env, insn);
@@ -1790,6 +1812,13 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
 	int i, j;
 
 	for (i = 0; i < insn_cnt; i++, insn++) {
+		if (BPF_CLASS(insn->code) == BPF_LDX &&
+		    (BPF_MODE(insn->code) != BPF_MEM ||
+		     insn->imm != 0)) {
+			verbose("BPF_LDX uses reserved fields\n");
+			return -EINVAL;
+		}
+
 		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
 			struct bpf_map *map;
 			struct fd f;
@@ -1881,6 +1910,92 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env)
 			insn->src_reg = 0;
 }
 
+static void adjust_branches(struct bpf_prog *prog, int pos, int delta)
+{
+	struct bpf_insn *insn = prog->insnsi;
+	int insn_cnt = prog->len;
+	int i;
+
+	for (i = 0; i < insn_cnt; i++, insn++) {
+		if (BPF_CLASS(insn->code) != BPF_JMP ||
+		    BPF_OP(insn->code) == BPF_CALL ||
+		    BPF_OP(insn->code) == BPF_EXIT)
+			continue;
+
+		/* adjust offset of jmps if necessary */
+		if (i < pos && i + insn->off + 1 > pos)
+			insn->off += delta;
+		else if (i > pos && i + insn->off + 1 < pos)
+			insn->off -= delta;
+	}
+}
+
+/* convert load instructions that access fields of 'struct __sk_buff'
+ * into sequence of instructions that access fields of 'struct sk_buff'
+ */
+static int convert_ctx_accesses(struct verifier_env *env)
+{
+	struct bpf_insn *insn = env->prog->insnsi;
+	int insn_cnt = env->prog->len;
+	struct bpf_insn insn_buf[16];
+	struct bpf_prog *new_prog;
+	u32 cnt;
+	int i;
+
+	if (!env->prog->aux->ops->convert_ctx_access)
+		return 0;
+
+	for (i = 0; i < insn_cnt; i++, insn++) {
+		if (insn->code != (BPF_LDX | BPF_MEM | BPF_W))
+			continue;
+
+		if (insn->imm != PTR_TO_CTX) {
+			/* clear internal mark */
+			insn->imm = 0;
+			continue;
+		}
+
+		cnt = env->prog->aux->ops->
+			convert_ctx_access(insn->dst_reg, insn->src_reg,
+					   insn->off, insn_buf);
+		if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
+			verbose("bpf verifier is misconfigured\n");
+			return -EINVAL;
+		}
+
+		if (cnt == 1) {
+			memcpy(insn, insn_buf, sizeof(*insn));
+			continue;
+		}
+
+		/* several new insns need to be inserted. Make room for them */
+		insn_cnt += cnt - 1;
+		new_prog = bpf_prog_realloc(env->prog,
+					    bpf_prog_size(insn_cnt),
+					    GFP_USER);
+		if (!new_prog)
+			return -ENOMEM;
+
+		new_prog->len = insn_cnt;
+
+		memmove(new_prog->insnsi + i + cnt, new_prog->insns + i + 1,
+			sizeof(*insn) * (insn_cnt - i - cnt));
+
+		/* copy substitute insns in place of load instruction */
+		memcpy(new_prog->insnsi + i, insn_buf, sizeof(*insn) * cnt);
+
+		/* adjust branches in the whole program */
+		adjust_branches(new_prog, i, cnt - 1);
+
+		/* keep walking new program and skip insns we just inserted */
+		env->prog = new_prog;
+		insn = new_prog->insnsi + i + cnt - 1;
+		i += cnt - 1;
+	}
+
+	return 0;
+}
+
 static void free_states(struct verifier_env *env)
 {
 	struct verifier_state_list *sl, *sln;
@@ -1903,13 +2018,13 @@ static void free_states(struct verifier_env *env)
 	kfree(env->explored_states);
 }
 
-int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
+int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
 {
 	char __user *log_ubuf = NULL;
 	struct verifier_env *env;
 	int ret = -EINVAL;
 
-	if (prog->len <= 0 || prog->len > BPF_MAXINSNS)
+	if ((*prog)->len <= 0 || (*prog)->len > BPF_MAXINSNS)
 		return -E2BIG;
 
 	/* 'struct verifier_env' can be global, but since it's not small,
@@ -1919,7 +2034,7 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
 	if (!env)
 		return -ENOMEM;
 
-	env->prog = prog;
+	env->prog = *prog;
 
 	/* grab the mutex to protect few globals used by verifier */
 	mutex_lock(&bpf_verifier_lock);
@@ -1951,7 +2066,7 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
 	if (ret < 0)
 		goto skip_full_check;
 
-	env->explored_states = kcalloc(prog->len,
+	env->explored_states = kcalloc(env->prog->len,
 				       sizeof(struct verifier_state_list *),
 				       GFP_USER);
 	ret = -ENOMEM;
@@ -1968,6 +2083,10 @@ skip_full_check:
 	while (pop_stack(env, NULL) >= 0);
 	free_states(env);
 
+	if (ret == 0)
+		/* program is valid, convert *(u32*)(ctx + off) accesses */
+		ret = convert_ctx_accesses(env);
+
 	if (log_level && log_len >= log_size - 1) {
 		BUG_ON(log_len >= log_size);
 		/* verifier log exceeded user supplied buffer */
@@ -1983,18 +2102,18 @@ skip_full_check:
 
 	if (ret == 0 && env->used_map_cnt) {
 		/* if program passed verifier, update used_maps in bpf_prog_info */
-		prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
-						     sizeof(env->used_maps[0]),
-						     GFP_KERNEL);
+		env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
+							  sizeof(env->used_maps[0]),
+							  GFP_KERNEL);
 
-		if (!prog->aux->used_maps) {
+		if (!env->prog->aux->used_maps) {
 			ret = -ENOMEM;
 			goto free_log_buf;
 		}
 
-		memcpy(prog->aux->used_maps, env->used_maps,
+		memcpy(env->prog->aux->used_maps, env->used_maps,
 		       sizeof(env->used_maps[0]) * env->used_map_cnt);
-		prog->aux->used_map_cnt = env->used_map_cnt;
+		env->prog->aux->used_map_cnt = env->used_map_cnt;
 
 		/* program is valid. Convert pseudo bpf_ld_imm64 into generic
 		 * bpf_ld_imm64 instructions
@@ -2006,11 +2125,12 @@ free_log_buf:
 	if (log_level)
 		vfree(log_buf);
 free_env:
-	if (!prog->aux->used_maps)
+	if (!env->prog->aux->used_maps)
 		/* if we didn't copy map pointers into bpf_prog_info, release
 		 * them now. Otherwise free_bpf_prog_info() will release them.
 		 */
 		release_maps(env);
+	*prog = env->prog;
 	kfree(env);
 	mutex_unlock(&bpf_verifier_lock);
 	return ret;
diff --git a/net/core/filter.c b/net/core/filter.c
index 33310eee6134..4e9dd0ad0d5b 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -150,10 +150,43 @@ static u64 __get_random_u32(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
 	return prandom_u32();
 }
 
+static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
+			      struct bpf_insn *insn_buf)
+{
+	struct bpf_insn *insn = insn_buf;
+
+	switch (skb_field) {
+	case SKF_AD_MARK:
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
+
+		*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+				      offsetof(struct sk_buff, mark));
+		break;
+
+	case SKF_AD_PKTTYPE:
+		*insn++ = BPF_LDX_MEM(BPF_B, dst_reg, src_reg, PKT_TYPE_OFFSET());
+		*insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, PKT_TYPE_MAX);
+#ifdef __BIG_ENDIAN_BITFIELD
+		*insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 5);
+#endif
+		break;
+
+	case SKF_AD_QUEUE:
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
+
+		*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
+				      offsetof(struct sk_buff, queue_mapping));
+		break;
+	}
+
+	return insn - insn_buf;
+}
+
 static bool convert_bpf_extensions(struct sock_filter *fp,
 				   struct bpf_insn **insnp)
 {
 	struct bpf_insn *insn = *insnp;
+	u32 cnt;
 
 	switch (fp->k) {
 	case SKF_AD_OFF + SKF_AD_PROTOCOL:
@@ -167,13 +200,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		break;
 
 	case SKF_AD_OFF + SKF_AD_PKTTYPE:
-		*insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX,
-				      PKT_TYPE_OFFSET());
-		*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX);
-#ifdef __BIG_ENDIAN_BITFIELD
-		insn++;
-                *insn = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 5);
-#endif
+		cnt = convert_skb_access(SKF_AD_PKTTYPE, BPF_REG_A, BPF_REG_CTX, insn);
+		insn += cnt - 1;
 		break;
 
 	case SKF_AD_OFF + SKF_AD_IFINDEX:
@@ -197,10 +225,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		break;
 
 	case SKF_AD_OFF + SKF_AD_MARK:
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
-
-		*insn = BPF_LDX_MEM(BPF_W, BPF_REG_A, BPF_REG_CTX,
-				    offsetof(struct sk_buff, mark));
+		cnt = convert_skb_access(SKF_AD_MARK, BPF_REG_A, BPF_REG_CTX, insn);
+		insn += cnt - 1;
 		break;
 
 	case SKF_AD_OFF + SKF_AD_RXHASH:
@@ -211,10 +237,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		break;
 
 	case SKF_AD_OFF + SKF_AD_QUEUE:
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, queue_mapping) != 2);
-
-		*insn = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
-				    offsetof(struct sk_buff, queue_mapping));
+		cnt = convert_skb_access(SKF_AD_QUEUE, BPF_REG_A, BPF_REG_CTX, insn);
+		insn += cnt - 1;
 		break;
 
 	case SKF_AD_OFF + SKF_AD_VLAN_TAG:
@@ -1151,13 +1175,55 @@ sk_filter_func_proto(enum bpf_func_id func_id)
 static bool sk_filter_is_valid_access(int off, int size,
 				      enum bpf_access_type type)
 {
-	/* skb fields cannot be accessed yet */
-	return false;
+	/* only read is allowed */
+	if (type != BPF_READ)
+		return false;
+
+	/* check bounds */
+	if (off < 0 || off >= sizeof(struct __sk_buff))
+		return false;
+
+	/* disallow misaligned access */
+	if (off % size != 0)
+		return false;
+
+	/* all __sk_buff fields are __u32 */
+	if (size != 4)
+		return false;
+
+	return true;
+}
+
+static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
+					struct bpf_insn *insn_buf)
+{
+	struct bpf_insn *insn = insn_buf;
+
+	switch (ctx_off) {
+	case offsetof(struct __sk_buff, len):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
+
+		*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+				      offsetof(struct sk_buff, len));
+		break;
+
+	case offsetof(struct __sk_buff, mark):
+		return convert_skb_access(SKF_AD_MARK, dst_reg, src_reg, insn);
+
+	case offsetof(struct __sk_buff, pkt_type):
+		return convert_skb_access(SKF_AD_PKTTYPE, dst_reg, src_reg, insn);
+
+	case offsetof(struct __sk_buff, queue_mapping):
+		return convert_skb_access(SKF_AD_QUEUE, dst_reg, src_reg, insn);
+	}
+
+	return insn - insn_buf;
 }
 
 static const struct bpf_verifier_ops sk_filter_ops = {
 	.get_func_proto = sk_filter_func_proto,
 	.is_valid_access = sk_filter_is_valid_access,
+	.convert_ctx_access = sk_filter_convert_ctx_access,
 };
 
 static struct bpf_prog_type_list sk_filter_type __read_mostly = {
-- 
cgit 


From 9df85aaa43297cb12dc85155695dd1bfdf94f91d Mon Sep 17 00:00:00 2001
From: Antonios Motakis <a.motakis@virtualopensystems.com>
Date: Mon, 16 Mar 2015 14:08:43 -0600
Subject: vfio: platform: probe to devices on the platform bus

Driver to bind to Linux platform devices, and callbacks to discover their
resources to be used by the main VFIO PLATFORM code.

Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
Signed-off-by: Baptiste Reynal <b.reynal@virtualopensystems.com>
Reviewed-by: Eric Auger <eric.auger@linaro.org>
Tested-by: Eric Auger <eric.auger@linaro.org>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/platform/vfio_platform.c | 103 ++++++++++++++++++++++++++++++++++
 include/uapi/linux/vfio.h             |   1 +
 2 files changed, 104 insertions(+)
 create mode 100644 drivers/vfio/platform/vfio_platform.c

(limited to 'include/uapi/linux')

diff --git a/drivers/vfio/platform/vfio_platform.c b/drivers/vfio/platform/vfio_platform.c
new file mode 100644
index 000000000000..cef645c83996
--- /dev/null
+++ b/drivers/vfio/platform/vfio_platform.c
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2013 - Virtual Open Systems
+ * Author: Antonios Motakis <a.motakis@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/vfio.h>
+#include <linux/platform_device.h>
+
+#include "vfio_platform_private.h"
+
+#define DRIVER_VERSION  "0.10"
+#define DRIVER_AUTHOR   "Antonios Motakis <a.motakis@virtualopensystems.com>"
+#define DRIVER_DESC     "VFIO for platform devices - User Level meta-driver"
+
+/* probing devices from the linux platform bus */
+
+static struct resource *get_platform_resource(struct vfio_platform_device *vdev,
+					      int num)
+{
+	struct platform_device *dev = (struct platform_device *) vdev->opaque;
+	int i;
+
+	for (i = 0; i < dev->num_resources; i++) {
+		struct resource *r = &dev->resource[i];
+
+		if (resource_type(r) & (IORESOURCE_MEM|IORESOURCE_IO)) {
+			if (!num)
+				return r;
+
+			num--;
+		}
+	}
+	return NULL;
+}
+
+static int get_platform_irq(struct vfio_platform_device *vdev, int i)
+{
+	struct platform_device *pdev = (struct platform_device *) vdev->opaque;
+
+	return platform_get_irq(pdev, i);
+}
+
+static int vfio_platform_probe(struct platform_device *pdev)
+{
+	struct vfio_platform_device *vdev;
+	int ret;
+
+	vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
+	if (!vdev)
+		return -ENOMEM;
+
+	vdev->opaque = (void *) pdev;
+	vdev->name = pdev->name;
+	vdev->flags = VFIO_DEVICE_FLAGS_PLATFORM;
+	vdev->get_resource = get_platform_resource;
+	vdev->get_irq = get_platform_irq;
+
+	ret = vfio_platform_probe_common(vdev, &pdev->dev);
+	if (ret)
+		kfree(vdev);
+
+	return ret;
+}
+
+static int vfio_platform_remove(struct platform_device *pdev)
+{
+	struct vfio_platform_device *vdev;
+
+	vdev = vfio_platform_remove_common(&pdev->dev);
+	if (vdev) {
+		kfree(vdev);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static struct platform_driver vfio_platform_driver = {
+	.probe		= vfio_platform_probe,
+	.remove		= vfio_platform_remove,
+	.driver	= {
+		.name	= "vfio-platform",
+		.owner	= THIS_MODULE,
+	},
+};
+
+module_platform_driver(vfio_platform_driver);
+
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index 82889c30f4f5..ea9514b6bb5c 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -160,6 +160,7 @@ struct vfio_device_info {
 	__u32	flags;
 #define VFIO_DEVICE_FLAGS_RESET	(1 << 0)	/* Device supports reset */
 #define VFIO_DEVICE_FLAGS_PCI	(1 << 1)	/* vfio-pci device */
+#define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2)	/* vfio-platform device */
 	__u32	num_regions;	/* Max region index + 1 */
 	__u32	num_irqs;	/* Max IRQ index + 1 */
 };
-- 
cgit 


From 36fe431f2811fa3b5fed15d272c585d5a47977aa Mon Sep 17 00:00:00 2001
From: Antonios Motakis <a.motakis@virtualopensystems.com>
Date: Mon, 16 Mar 2015 14:08:44 -0600
Subject: vfio: amba: VFIO support for AMBA devices

Add support for discovering AMBA devices with VFIO and handle them
similarly to Linux platform devices.

Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
Signed-off-by: Baptiste Reynal <b.reynal@virtualopensystems.com>
Reviewed-by: Eric Auger <eric.auger@linaro.org>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 drivers/vfio/platform/vfio_amba.c | 115 ++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/vfio.h         |   1 +
 2 files changed, 116 insertions(+)
 create mode 100644 drivers/vfio/platform/vfio_amba.c

(limited to 'include/uapi/linux')

diff --git a/drivers/vfio/platform/vfio_amba.c b/drivers/vfio/platform/vfio_amba.c
new file mode 100644
index 000000000000..ff0331f72526
--- /dev/null
+++ b/drivers/vfio/platform/vfio_amba.c
@@ -0,0 +1,115 @@
+/*
+ * Copyright (C) 2013 - Virtual Open Systems
+ * Author: Antonios Motakis <a.motakis@virtualopensystems.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/vfio.h>
+#include <linux/amba/bus.h>
+
+#include "vfio_platform_private.h"
+
+#define DRIVER_VERSION  "0.10"
+#define DRIVER_AUTHOR   "Antonios Motakis <a.motakis@virtualopensystems.com>"
+#define DRIVER_DESC     "VFIO for AMBA devices - User Level meta-driver"
+
+/* probing devices from the AMBA bus */
+
+static struct resource *get_amba_resource(struct vfio_platform_device *vdev,
+					  int i)
+{
+	struct amba_device *adev = (struct amba_device *) vdev->opaque;
+
+	if (i == 0)
+		return &adev->res;
+
+	return NULL;
+}
+
+static int get_amba_irq(struct vfio_platform_device *vdev, int i)
+{
+	struct amba_device *adev = (struct amba_device *) vdev->opaque;
+	int ret = 0;
+
+	if (i < AMBA_NR_IRQS)
+		ret = adev->irq[i];
+
+	/* zero is an unset IRQ for AMBA devices */
+	return ret ? ret : -ENXIO;
+}
+
+static int vfio_amba_probe(struct amba_device *adev, const struct amba_id *id)
+{
+	struct vfio_platform_device *vdev;
+	int ret;
+
+	vdev = kzalloc(sizeof(*vdev), GFP_KERNEL);
+	if (!vdev)
+		return -ENOMEM;
+
+	vdev->name = kasprintf(GFP_KERNEL, "vfio-amba-%08x", adev->periphid);
+	if (!vdev->name) {
+		kfree(vdev);
+		return -ENOMEM;
+	}
+
+	vdev->opaque = (void *) adev;
+	vdev->flags = VFIO_DEVICE_FLAGS_AMBA;
+	vdev->get_resource = get_amba_resource;
+	vdev->get_irq = get_amba_irq;
+
+	ret = vfio_platform_probe_common(vdev, &adev->dev);
+	if (ret) {
+		kfree(vdev->name);
+		kfree(vdev);
+	}
+
+	return ret;
+}
+
+static int vfio_amba_remove(struct amba_device *adev)
+{
+	struct vfio_platform_device *vdev;
+
+	vdev = vfio_platform_remove_common(&adev->dev);
+	if (vdev) {
+		kfree(vdev->name);
+		kfree(vdev);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static struct amba_id pl330_ids[] = {
+	{ 0, 0 },
+};
+
+MODULE_DEVICE_TABLE(amba, pl330_ids);
+
+static struct amba_driver vfio_amba_driver = {
+	.probe = vfio_amba_probe,
+	.remove = vfio_amba_remove,
+	.id_table = pl330_ids,
+	.drv = {
+		.name = "vfio-amba",
+		.owner = THIS_MODULE,
+	},
+};
+
+module_amba_driver(vfio_amba_driver);
+
+MODULE_VERSION(DRIVER_VERSION);
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index ea9514b6bb5c..b57b750c222f 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -161,6 +161,7 @@ struct vfio_device_info {
 #define VFIO_DEVICE_FLAGS_RESET	(1 << 0)	/* Device supports reset */
 #define VFIO_DEVICE_FLAGS_PCI	(1 << 1)	/* vfio-pci device */
 #define VFIO_DEVICE_FLAGS_PLATFORM (1 << 2)	/* vfio-platform device */
+#define VFIO_DEVICE_FLAGS_AMBA  (1 << 3)	/* vfio-amba device */
 	__u32	num_regions;	/* Max region index + 1 */
 	__u32	num_irqs;	/* Max IRQ index + 1 */
 };
-- 
cgit 


From 41408c28f283b49202ae374b1c42bc8e9b33a048 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@linux.vnet.ibm.com>
Date: Fri, 6 Feb 2015 15:01:21 +0100
Subject: KVM: s390: Add MEMOP ioctls for reading/writing guest memory

On s390, we've got to make sure to hold the IPTE lock while accessing
logical memory. So let's add an ioctl for reading and writing logical
memory to provide this feature for userspace, too.
The maximum transfer size of this call is limited to 64kB to prevent
that the guest can trigger huge copy_from/to_user transfers. QEMU
currently only requests up to one or two pages so far, so 16*4kB seems
to be a reasonable limit here.

Signed-off-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 Documentation/virtual/kvm/api.txt | 46 ++++++++++++++++++++++++
 arch/s390/kvm/gaccess.c           | 22 ++++++++++++
 arch/s390/kvm/gaccess.h           |  2 ++
 arch/s390/kvm/kvm-s390.c          | 74 +++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/kvm.h          | 21 +++++++++++
 5 files changed, 165 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index ee47998ec368..281179d92a28 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2716,6 +2716,52 @@ The fields in each entry are defined as follows:
    eax, ebx, ecx, edx: the values returned by the cpuid instruction for
          this function/index combination
 
+4.89 KVM_S390_MEM_OP
+
+Capability: KVM_CAP_S390_MEM_OP
+Architectures: s390
+Type: vcpu ioctl
+Parameters: struct kvm_s390_mem_op (in)
+Returns: = 0 on success,
+         < 0 on generic error (e.g. -EFAULT or -ENOMEM),
+         > 0 if an exception occurred while walking the page tables
+
+Read or write data from/to the logical (virtual) memory of a VPCU.
+
+Parameters are specified via the following structure:
+
+struct kvm_s390_mem_op {
+	__u64 gaddr;		/* the guest address */
+	__u64 flags;		/* flags */
+	__u32 size;		/* amount of bytes */
+	__u32 op;		/* type of operation */
+	__u64 buf;		/* buffer in userspace */
+	__u8 ar;		/* the access register number */
+	__u8 reserved[31];	/* should be set to 0 */
+};
+
+The type of operation is specified in the "op" field. It is either
+KVM_S390_MEMOP_LOGICAL_READ for reading from logical memory space or
+KVM_S390_MEMOP_LOGICAL_WRITE for writing to logical memory space. The
+KVM_S390_MEMOP_F_CHECK_ONLY flag can be set in the "flags" field to check
+whether the corresponding memory access would create an access exception
+(without touching the data in the memory at the destination). In case an
+access exception occurred while walking the MMU tables of the guest, the
+ioctl returns a positive error number to indicate the type of exception.
+This exception is also raised directly at the corresponding VCPU if the
+flag KVM_S390_MEMOP_F_INJECT_EXCEPTION is set in the "flags" field.
+
+The start address of the memory region has to be specified in the "gaddr"
+field, and the length of the region in the "size" field. "buf" is the buffer
+supplied by the userspace application where the read data should be written
+to for KVM_S390_MEMOP_LOGICAL_READ, or where the data that should be written
+is stored for a KVM_S390_MEMOP_LOGICAL_WRITE. "buf" is unused and can be NULL
+when KVM_S390_MEMOP_F_CHECK_ONLY is specified. "ar" designates the access
+register number to be used.
+
+The "reserved" field is meant for future extensions. It is not used by
+KVM with the currently defined set of flags.
+
 5. The kvm_run structure
 ------------------------
 
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index ea38d716e24d..a7559f7207df 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -863,6 +863,28 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
 	return rc;
 }
 
+/**
+ * check_gva_range - test a range of guest virtual addresses for accessibility
+ */
+int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
+		    unsigned long length, int is_write)
+{
+	unsigned long gpa;
+	unsigned long currlen;
+	int rc = 0;
+
+	ipte_lock(vcpu);
+	while (length > 0 && !rc) {
+		currlen = min(length, PAGE_SIZE - (gva % PAGE_SIZE));
+		rc = guest_translate_address(vcpu, gva, ar, &gpa, is_write);
+		gva += currlen;
+		length -= currlen;
+	}
+	ipte_unlock(vcpu);
+
+	return rc;
+}
+
 /**
  * kvm_s390_check_low_addr_prot_real - check for low-address protection
  * @gra: Guest real address
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index 835e557dabf4..ef03726cc661 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -157,6 +157,8 @@ int read_guest_lc(struct kvm_vcpu *vcpu, unsigned long gra, void *data,
 
 int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva,
 			    ar_t ar, unsigned long *gpa, int write);
+int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar,
+		    unsigned long length, int is_write);
 
 int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data,
 		 unsigned long len, int write);
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 610e90afadf2..b7ecef98b668 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -25,6 +25,7 @@
 #include <linux/random.h>
 #include <linux/slab.h>
 #include <linux/timer.h>
+#include <linux/vmalloc.h>
 #include <asm/asm-offsets.h>
 #include <asm/lowcore.h>
 #include <asm/pgtable.h>
@@ -38,6 +39,8 @@
 #include "trace.h"
 #include "trace-s390.h"
 
+#define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
+
 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
 
 struct kvm_stats_debugfs_item debugfs_entries[] = {
@@ -177,6 +180,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_S390_USER_SIGP:
 		r = 1;
 		break;
+	case KVM_CAP_S390_MEM_OP:
+		r = MEM_OP_MAX_SIZE;
+		break;
 	case KVM_CAP_NR_VCPUS:
 	case KVM_CAP_MAX_VCPUS:
 		r = KVM_MAX_VCPUS;
@@ -2185,6 +2191,65 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 	return r;
 }
 
+static long kvm_s390_guest_mem_op(struct kvm_vcpu *vcpu,
+				  struct kvm_s390_mem_op *mop)
+{
+	void __user *uaddr = (void __user *)mop->buf;
+	void *tmpbuf = NULL;
+	int r, srcu_idx;
+	const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
+				    | KVM_S390_MEMOP_F_CHECK_ONLY;
+
+	if (mop->flags & ~supported_flags)
+		return -EINVAL;
+
+	if (mop->size > MEM_OP_MAX_SIZE)
+		return -E2BIG;
+
+	if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
+		tmpbuf = vmalloc(mop->size);
+		if (!tmpbuf)
+			return -ENOMEM;
+	}
+
+	srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
+
+	switch (mop->op) {
+	case KVM_S390_MEMOP_LOGICAL_READ:
+		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
+			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, false);
+			break;
+		}
+		r = read_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
+		if (r == 0) {
+			if (copy_to_user(uaddr, tmpbuf, mop->size))
+				r = -EFAULT;
+		}
+		break;
+	case KVM_S390_MEMOP_LOGICAL_WRITE:
+		if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
+			r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size, true);
+			break;
+		}
+		if (copy_from_user(tmpbuf, uaddr, mop->size)) {
+			r = -EFAULT;
+			break;
+		}
+		r = write_guest(vcpu, mop->gaddr, mop->ar, tmpbuf, mop->size);
+		break;
+	default:
+		r = -EINVAL;
+	}
+
+	srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
+
+	if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
+		kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+
+	vfree(tmpbuf);
+	return r;
+}
+
 long kvm_arch_vcpu_ioctl(struct file *filp,
 			 unsigned int ioctl, unsigned long arg)
 {
@@ -2284,6 +2349,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
 		break;
 	}
+	case KVM_S390_MEM_OP: {
+		struct kvm_s390_mem_op mem_op;
+
+		if (copy_from_user(&mem_op, argp, sizeof(mem_op)) == 0)
+			r = kvm_s390_guest_mem_op(vcpu, &mem_op);
+		else
+			r = -EFAULT;
+		break;
+	}
 	default:
 		r = -ENOTTY;
 	}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 82634a492fe0..0e16f2c9f0de 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -365,6 +365,24 @@ struct kvm_translation {
 	__u8  pad[5];
 };
 
+/* for KVM_S390_MEM_OP */
+struct kvm_s390_mem_op {
+	/* in */
+	__u64 gaddr;		/* the guest address */
+	__u64 flags;		/* flags */
+	__u32 size;		/* amount of bytes */
+	__u32 op;		/* type of operation */
+	__u64 buf;		/* buffer in userspace */
+	__u8 ar;		/* the access register number */
+	__u8 reserved[31];	/* should be set to 0 */
+};
+/* types for kvm_s390_mem_op->op */
+#define KVM_S390_MEMOP_LOGICAL_READ	0
+#define KVM_S390_MEMOP_LOGICAL_WRITE	1
+/* flags for kvm_s390_mem_op->flags */
+#define KVM_S390_MEMOP_F_CHECK_ONLY		(1ULL << 0)
+#define KVM_S390_MEMOP_F_INJECT_EXCEPTION	(1ULL << 1)
+
 /* for KVM_INTERRUPT */
 struct kvm_interrupt {
 	/* in */
@@ -761,6 +779,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_CHECK_EXTENSION_VM 105
 #define KVM_CAP_S390_USER_SIGP 106
 #define KVM_CAP_S390_VECTOR_REGISTERS 107
+#define KVM_CAP_S390_MEM_OP 108
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1136,6 +1155,8 @@ struct kvm_s390_ucas_mapping {
 #define KVM_ARM_VCPU_INIT	  _IOW(KVMIO,  0xae, struct kvm_vcpu_init)
 #define KVM_ARM_PREFERRED_TARGET  _IOR(KVMIO,  0xaf, struct kvm_vcpu_init)
 #define KVM_GET_REG_LIST	  _IOWR(KVMIO, 0xb0, struct kvm_reg_list)
+/* Available with KVM_CAP_S390_MEM_OP */
+#define KVM_S390_MEM_OP		  _IOW(KVMIO,  0xb1, struct kvm_s390_mem_op)
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 #define KVM_DEV_ASSIGN_PCI_2_3		(1 << 1)
-- 
cgit 


From e44fc8c9dab215ac0e398622a05574cffd5f5184 Mon Sep 17 00:00:00 2001
From: Ekaterina Tumanova <tumanova@linux.vnet.ibm.com>
Date: Fri, 30 Jan 2015 16:55:56 +0100
Subject: KVM: s390: introduce post handlers for STSI

The Store System Information (STSI) instruction currently collects all
information it relays to the caller in the kernel. Some information,
however, is only available in user space. An example of this is the
guest name: The kernel always sets "KVMGuest", but user space knows the
actual guest name.

This patch introduces a new exit, KVM_EXIT_S390_STSI, guarded by a
capability that can be enabled by user space if it wants to be able to
insert such data. User space will be provided with the target buffer
and the requested STSI function code.

Reviewed-by: Eric Farman <farman@linux.vnet.ibm.com>
Reviewed-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Ekaterina Tumanova <tumanova@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 Documentation/virtual/kvm/api.txt | 28 ++++++++++++++++++++++++++++
 arch/s390/include/asm/kvm_host.h  |  1 +
 arch/s390/kvm/kvm-s390.c          |  5 +++++
 arch/s390/kvm/priv.c              | 17 ++++++++++++++++-
 include/uapi/linux/kvm.h          | 11 +++++++++++
 5 files changed, 61 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 281179d92a28..c1fcb7a3c125 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3304,3 +3304,31 @@ Returns: 0 on success, negative value on error
 Allows use of the vector registers introduced with z13 processor, and
 provides for the synchronization between host and user space.  Will
 return -EINVAL if the machine does not support vectors.
+
+7.4 KVM_CAP_S390_USER_STSI
+
+Architectures: s390
+Parameters: none
+
+This capability allows post-handlers for the STSI instruction. After
+initial handling in the kernel, KVM exits to user space with
+KVM_EXIT_S390_STSI to allow user space to insert further data.
+
+Before exiting to userspace, kvm handlers should fill in s390_stsi field of
+vcpu->run:
+struct {
+	__u64 addr;
+	__u8 ar;
+	__u8 reserved;
+	__u8 fc;
+	__u8 sel1;
+	__u16 sel2;
+} s390_stsi;
+
+@addr - guest address of STSI SYSIB
+@fc   - function code
+@sel1 - selector 1
+@sel2 - selector 2
+@ar   - access register number
+
+KVM handlers should exit to userspace with rc = -EREMOTE.
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 347a3333d618..2356a8c660b3 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -565,6 +565,7 @@ struct kvm_arch{
 	int use_vectors;
 	int user_cpu_state_ctrl;
 	int user_sigp;
+	int user_stsi;
 	struct s390_io_adapter *adapters[MAX_S390_IO_ADAPTERS];
 	wait_queue_head_t ipte_wq;
 	int ipte_lock_count;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index b7ecef98b668..fdfa10662700 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -178,6 +178,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_VM_ATTRIBUTES:
 	case KVM_CAP_MP_STATE:
 	case KVM_CAP_S390_USER_SIGP:
+	case KVM_CAP_S390_USER_STSI:
 		r = 1;
 		break;
 	case KVM_CAP_S390_MEM_OP:
@@ -280,6 +281,10 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
 		kvm->arch.use_vectors = MACHINE_HAS_VX;
 		r = MACHINE_HAS_VX ? 0 : -EINVAL;
 		break;
+	case KVM_CAP_S390_USER_STSI:
+		kvm->arch.user_stsi = 1;
+		r = 0;
+		break;
 	default:
 		r = -EINVAL;
 		break;
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index f4fe02e84326..5e4658d20c77 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -496,6 +496,17 @@ static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem)
 	ASCEBC(mem->vm[0].cpi, 16);
 }
 
+static void insert_stsi_usr_data(struct kvm_vcpu *vcpu, u64 addr, ar_t ar,
+				 u8 fc, u8 sel1, u16 sel2)
+{
+	vcpu->run->exit_reason = KVM_EXIT_S390_STSI;
+	vcpu->run->s390_stsi.addr = addr;
+	vcpu->run->s390_stsi.ar = ar;
+	vcpu->run->s390_stsi.fc = fc;
+	vcpu->run->s390_stsi.sel1 = sel1;
+	vcpu->run->s390_stsi.sel2 = sel2;
+}
+
 static int handle_stsi(struct kvm_vcpu *vcpu)
 {
 	int fc = (vcpu->run->s.regs.gprs[0] & 0xf0000000) >> 28;
@@ -556,11 +567,15 @@ static int handle_stsi(struct kvm_vcpu *vcpu)
 		rc = kvm_s390_inject_prog_cond(vcpu, rc);
 		goto out;
 	}
+	if (vcpu->kvm->arch.user_stsi) {
+		insert_stsi_usr_data(vcpu, operand2, ar, fc, sel1, sel2);
+		rc = -EREMOTE;
+	}
 	trace_kvm_s390_handle_stsi(vcpu, fc, sel1, sel2, operand2);
 	free_page(mem);
 	kvm_s390_set_psw_cc(vcpu, 0);
 	vcpu->run->s.regs.gprs[0] = 0;
-	return 0;
+	return rc;
 out_no_data:
 	kvm_s390_set_psw_cc(vcpu, 3);
 out:
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 0e16f2c9f0de..57445ef88097 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -172,6 +172,7 @@ struct kvm_pit_config {
 #define KVM_EXIT_S390_TSCH        22
 #define KVM_EXIT_EPR              23
 #define KVM_EXIT_SYSTEM_EVENT     24
+#define KVM_EXIT_S390_STSI        25
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -309,6 +310,15 @@ struct kvm_run {
 			__u32 type;
 			__u64 flags;
 		} system_event;
+		/* KVM_EXIT_S390_STSI */
+		struct {
+			__u64 addr;
+			__u8 ar;
+			__u8 reserved;
+			__u8 fc;
+			__u8 sel1;
+			__u16 sel2;
+		} s390_stsi;
 		/* Fix the size of the union. */
 		char padding[256];
 	};
@@ -780,6 +790,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_S390_USER_SIGP 106
 #define KVM_CAP_S390_VECTOR_REGISTERS 107
 #define KVM_CAP_S390_MEM_OP 108
+#define KVM_CAP_S390_USER_STSI 109
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit 


From 30ee2a984f07b00895e0e01d78859b3aff9307c7 Mon Sep 17 00:00:00 2001
From: "Jason J. Herne" <jjherne@linux.vnet.ibm.com>
Date: Tue, 23 Sep 2014 09:23:01 -0400
Subject: KVM: s390: Create ioctl for Getting/Setting guest storage keys

Provide the KVM_S390_GET_SKEYS and KVM_S390_SET_SKEYS ioctl which can be used
to get/set guest storage keys. This functionality is needed for live migration
of s390 guests that use storage keys.

Signed-off-by: Jason J. Herne <jjherne@linux.vnet.ibm.com>
Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 Documentation/virtual/kvm/api.txt |  58 ++++++++++++++++++
 arch/s390/kvm/kvm-s390.c          | 123 ++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/kvm.h          |  14 +++++
 3 files changed, 195 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index c1fcb7a3c125..0d7fc66289a0 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2762,6 +2762,64 @@ register number to be used.
 The "reserved" field is meant for future extensions. It is not used by
 KVM with the currently defined set of flags.
 
+4.90 KVM_S390_GET_SKEYS
+
+Capability: KVM_CAP_S390_SKEYS
+Architectures: s390
+Type: vm ioctl
+Parameters: struct kvm_s390_skeys
+Returns: 0 on success, KVM_S390_GET_KEYS_NONE if guest is not using storage
+         keys, negative value on error
+
+This ioctl is used to get guest storage key values on the s390
+architecture. The ioctl takes parameters via the kvm_s390_skeys struct.
+
+struct kvm_s390_skeys {
+	__u64 start_gfn;
+	__u64 count;
+	__u64 skeydata_addr;
+	__u32 flags;
+	__u32 reserved[9];
+};
+
+The start_gfn field is the number of the first guest frame whose storage keys
+you want to get.
+
+The count field is the number of consecutive frames (starting from start_gfn)
+whose storage keys to get. The count field must be at least 1 and the maximum
+allowed value is defined as KVM_S390_SKEYS_ALLOC_MAX. Values outside this range
+will cause the ioctl to return -EINVAL.
+
+The skeydata_addr field is the address to a buffer large enough to hold count
+bytes. This buffer will be filled with storage key data by the ioctl.
+
+4.91 KVM_S390_SET_SKEYS
+
+Capability: KVM_CAP_S390_SKEYS
+Architectures: s390
+Type: vm ioctl
+Parameters: struct kvm_s390_skeys
+Returns: 0 on success, negative value on error
+
+This ioctl is used to set guest storage key values on the s390
+architecture. The ioctl takes parameters via the kvm_s390_skeys struct.
+See section on KVM_S390_GET_SKEYS for struct definition.
+
+The start_gfn field is the number of the first guest frame whose storage keys
+you want to set.
+
+The count field is the number of consecutive frames (starting from start_gfn)
+whose storage keys to get. The count field must be at least 1 and the maximum
+allowed value is defined as KVM_S390_SKEYS_ALLOC_MAX. Values outside this range
+will cause the ioctl to return -EINVAL.
+
+The skeydata_addr field is the address to a buffer containing count bytes of
+storage keys. Each byte in the buffer will be set as the storage key for a
+single frame starting at start_gfn for count frames.
+
+Note: If any architecturally invalid key value is found in the given data then
+the ioctl will return -EINVAL.
+
 5. The kvm_run structure
 ------------------------
 
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index fdfa10662700..0dc22baa0a57 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -179,6 +179,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_MP_STATE:
 	case KVM_CAP_S390_USER_SIGP:
 	case KVM_CAP_S390_USER_STSI:
+	case KVM_CAP_S390_SKEYS:
 		r = 1;
 		break;
 	case KVM_CAP_S390_MEM_OP:
@@ -729,6 +730,108 @@ static int kvm_s390_vm_has_attr(struct kvm *kvm, struct kvm_device_attr *attr)
 	return ret;
 }
 
+static long kvm_s390_get_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
+{
+	uint8_t *keys;
+	uint64_t hva;
+	unsigned long curkey;
+	int i, r = 0;
+
+	if (args->flags != 0)
+		return -EINVAL;
+
+	/* Is this guest using storage keys? */
+	if (!mm_use_skey(current->mm))
+		return KVM_S390_GET_SKEYS_NONE;
+
+	/* Enforce sane limit on memory allocation */
+	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
+		return -EINVAL;
+
+	keys = kmalloc_array(args->count, sizeof(uint8_t),
+			     GFP_KERNEL | __GFP_NOWARN);
+	if (!keys)
+		keys = vmalloc(sizeof(uint8_t) * args->count);
+	if (!keys)
+		return -ENOMEM;
+
+	for (i = 0; i < args->count; i++) {
+		hva = gfn_to_hva(kvm, args->start_gfn + i);
+		if (kvm_is_error_hva(hva)) {
+			r = -EFAULT;
+			goto out;
+		}
+
+		curkey = get_guest_storage_key(current->mm, hva);
+		if (IS_ERR_VALUE(curkey)) {
+			r = curkey;
+			goto out;
+		}
+		keys[i] = curkey;
+	}
+
+	r = copy_to_user((uint8_t __user *)args->skeydata_addr, keys,
+			 sizeof(uint8_t) * args->count);
+	if (r)
+		r = -EFAULT;
+out:
+	kvfree(keys);
+	return r;
+}
+
+static long kvm_s390_set_skeys(struct kvm *kvm, struct kvm_s390_skeys *args)
+{
+	uint8_t *keys;
+	uint64_t hva;
+	int i, r = 0;
+
+	if (args->flags != 0)
+		return -EINVAL;
+
+	/* Enforce sane limit on memory allocation */
+	if (args->count < 1 || args->count > KVM_S390_SKEYS_MAX)
+		return -EINVAL;
+
+	keys = kmalloc_array(args->count, sizeof(uint8_t),
+			     GFP_KERNEL | __GFP_NOWARN);
+	if (!keys)
+		keys = vmalloc(sizeof(uint8_t) * args->count);
+	if (!keys)
+		return -ENOMEM;
+
+	r = copy_from_user(keys, (uint8_t __user *)args->skeydata_addr,
+			   sizeof(uint8_t) * args->count);
+	if (r) {
+		r = -EFAULT;
+		goto out;
+	}
+
+	/* Enable storage key handling for the guest */
+	s390_enable_skey();
+
+	for (i = 0; i < args->count; i++) {
+		hva = gfn_to_hva(kvm, args->start_gfn + i);
+		if (kvm_is_error_hva(hva)) {
+			r = -EFAULT;
+			goto out;
+		}
+
+		/* Lowest order bit is reserved */
+		if (keys[i] & 0x01) {
+			r = -EINVAL;
+			goto out;
+		}
+
+		r = set_guest_storage_key(current->mm, hva,
+					  (unsigned long)keys[i], 0);
+		if (r)
+			goto out;
+	}
+out:
+	kvfree(keys);
+	return r;
+}
+
 long kvm_arch_vm_ioctl(struct file *filp,
 		       unsigned int ioctl, unsigned long arg)
 {
@@ -788,6 +891,26 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		r = kvm_s390_vm_has_attr(kvm, &attr);
 		break;
 	}
+	case KVM_S390_GET_SKEYS: {
+		struct kvm_s390_skeys args;
+
+		r = -EFAULT;
+		if (copy_from_user(&args, argp,
+				   sizeof(struct kvm_s390_skeys)))
+			break;
+		r = kvm_s390_get_skeys(kvm, &args);
+		break;
+	}
+	case KVM_S390_SET_SKEYS: {
+		struct kvm_s390_skeys args;
+
+		r = -EFAULT;
+		if (copy_from_user(&args, argp,
+				   sizeof(struct kvm_s390_skeys)))
+			break;
+		r = kvm_s390_set_skeys(kvm, &args);
+		break;
+	}
 	default:
 		r = -ENOTTY;
 	}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 57445ef88097..1162ef7a3fa1 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -147,6 +147,16 @@ struct kvm_pit_config {
 
 #define KVM_PIT_SPEAKER_DUMMY     1
 
+struct kvm_s390_skeys {
+	__u64 start_gfn;
+	__u64 count;
+	__u64 skeydata_addr;
+	__u32 flags;
+	__u32 reserved[9];
+};
+#define KVM_S390_GET_SKEYS_NONE   1
+#define KVM_S390_SKEYS_MAX        1048576
+
 #define KVM_EXIT_UNKNOWN          0
 #define KVM_EXIT_EXCEPTION        1
 #define KVM_EXIT_IO               2
@@ -791,6 +801,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_S390_VECTOR_REGISTERS 107
 #define KVM_CAP_S390_MEM_OP 108
 #define KVM_CAP_S390_USER_STSI 109
+#define KVM_CAP_S390_SKEYS 110
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1168,6 +1179,9 @@ struct kvm_s390_ucas_mapping {
 #define KVM_GET_REG_LIST	  _IOWR(KVMIO, 0xb0, struct kvm_reg_list)
 /* Available with KVM_CAP_S390_MEM_OP */
 #define KVM_S390_MEM_OP		  _IOW(KVMIO,  0xb1, struct kvm_s390_mem_op)
+/* Available with KVM_CAP_S390_SKEYS */
+#define KVM_S390_GET_SKEYS      _IOW(KVMIO, 0xb2, struct kvm_s390_skeys)
+#define KVM_S390_SET_SKEYS      _IOW(KVMIO, 0xb3, struct kvm_s390_skeys)
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 #define KVM_DEV_ASSIGN_PCI_2_3		(1 << 1)
-- 
cgit 


From c24973957975403521ca76a776c2dfd12fbe9add Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Mon, 16 Mar 2015 18:06:02 -0700
Subject: bpf: allow BPF programs access 'protocol' and 'vlan_tci' fields

as a follow on to patch 70006af95515 ("bpf: allow eBPF access skb fields")
this patch allows 'protocol' and 'vlan_tci' fields to be accessible
from extended BPF programs.

The usage of 'protocol', 'vlan_present' and 'vlan_tci' fields is the same as
corresponding SKF_AD_PROTOCOL, SKF_AD_VLAN_TAG_PRESENT and SKF_AD_VLAN_TAG
accesses in classic BPF.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h    |  3 ++
 net/core/filter.c           | 72 +++++++++++++++++++++++++++++++--------------
 samples/bpf/test_verifier.c |  9 ++++++
 3 files changed, 62 insertions(+), 22 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 929545a27546..1623047af463 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -178,6 +178,9 @@ struct __sk_buff {
 	__u32 pkt_type;
 	__u32 mark;
 	__u32 queue_mapping;
+	__u32 protocol;
+	__u32 vlan_present;
+	__u32 vlan_tci;
 };
 
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/net/core/filter.c b/net/core/filter.c
index 4e9dd0ad0d5b..b95ae7fe7e4f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -177,6 +177,35 @@ static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
 		*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
 				      offsetof(struct sk_buff, queue_mapping));
 		break;
+
+	case SKF_AD_PROTOCOL:
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
+
+		/* dst_reg = *(u16 *) (src_reg + offsetof(protocol)) */
+		*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
+				      offsetof(struct sk_buff, protocol));
+		/* dst_reg = ntohs(dst_reg) [emitting a nop or swap16] */
+		*insn++ = BPF_ENDIAN(BPF_FROM_BE, dst_reg, 16);
+		break;
+
+	case SKF_AD_VLAN_TAG:
+	case SKF_AD_VLAN_TAG_PRESENT:
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
+		BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
+
+		/* dst_reg = *(u16 *) (src_reg + offsetof(vlan_tci)) */
+		*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
+				      offsetof(struct sk_buff, vlan_tci));
+		if (skb_field == SKF_AD_VLAN_TAG) {
+			*insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg,
+						~VLAN_TAG_PRESENT);
+		} else {
+			/* dst_reg >>= 12 */
+			*insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, 12);
+			/* dst_reg &= 1 */
+			*insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, 1);
+		}
+		break;
 	}
 
 	return insn - insn_buf;
@@ -190,13 +219,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 
 	switch (fp->k) {
 	case SKF_AD_OFF + SKF_AD_PROTOCOL:
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, protocol) != 2);
-
-		/* A = *(u16 *) (CTX + offsetof(protocol)) */
-		*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
-				      offsetof(struct sk_buff, protocol));
-		/* A = ntohs(A) [emitting a nop or swap16] */
-		*insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
+		cnt = convert_skb_access(SKF_AD_PROTOCOL, BPF_REG_A, BPF_REG_CTX, insn);
+		insn += cnt - 1;
 		break;
 
 	case SKF_AD_OFF + SKF_AD_PKTTYPE:
@@ -242,22 +266,15 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		break;
 
 	case SKF_AD_OFF + SKF_AD_VLAN_TAG:
-	case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
-		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_tci) != 2);
-		BUILD_BUG_ON(VLAN_TAG_PRESENT != 0x1000);
+		cnt = convert_skb_access(SKF_AD_VLAN_TAG,
+					 BPF_REG_A, BPF_REG_CTX, insn);
+		insn += cnt - 1;
+		break;
 
-		/* A = *(u16 *) (CTX + offsetof(vlan_tci)) */
-		*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
-				      offsetof(struct sk_buff, vlan_tci));
-		if (fp->k == SKF_AD_OFF + SKF_AD_VLAN_TAG) {
-			*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A,
-					      ~VLAN_TAG_PRESENT);
-		} else {
-			/* A >>= 12 */
-			*insn++ = BPF_ALU32_IMM(BPF_RSH, BPF_REG_A, 12);
-			/* A &= 1 */
-			*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, 1);
-		}
+	case SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT:
+		cnt = convert_skb_access(SKF_AD_VLAN_TAG_PRESENT,
+					 BPF_REG_A, BPF_REG_CTX, insn);
+		insn += cnt - 1;
 		break;
 
 	case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
@@ -1215,6 +1232,17 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
 
 	case offsetof(struct __sk_buff, queue_mapping):
 		return convert_skb_access(SKF_AD_QUEUE, dst_reg, src_reg, insn);
+
+	case offsetof(struct __sk_buff, protocol):
+		return convert_skb_access(SKF_AD_PROTOCOL, dst_reg, src_reg, insn);
+
+	case offsetof(struct __sk_buff, vlan_present):
+		return convert_skb_access(SKF_AD_VLAN_TAG_PRESENT,
+					  dst_reg, src_reg, insn);
+
+	case offsetof(struct __sk_buff, vlan_tci):
+		return convert_skb_access(SKF_AD_VLAN_TAG,
+					  dst_reg, src_reg, insn);
 	}
 
 	return insn - insn_buf;
diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c
index df6dbb6576f6..75d561f9fd6a 100644
--- a/samples/bpf/test_verifier.c
+++ b/samples/bpf/test_verifier.c
@@ -658,6 +658,15 @@ static struct bpf_test tests[] = {
 			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
 				    offsetof(struct __sk_buff, queue_mapping)),
 			BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, protocol)),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, vlan_present)),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, vlan_tci)),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 0),
 			BPF_EXIT_INSN(),
 		},
 		.result = ACCEPT,
-- 
cgit 


From 847aac644e92e5624f2c153bab409bf713d5ff9a Mon Sep 17 00:00:00 2001
From: Li Xi <pkuelelixi@gmail.com>
Date: Thu, 19 Mar 2015 04:04:53 +0900
Subject: vfs: Add general support to enforce project quota limits

This patch adds support for a new quota type PRJQUOTA for project quota
enforcement. Also a new method get_projid() is added into dquot_operations
structure.

Signed-off-by: Li Xi <lixi@ddn.com>
Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c           | 20 +++++++++++++++++---
 fs/quota/quotaio_v2.h      |  6 ++++--
 include/linux/quota.h      |  2 ++
 include/uapi/linux/quota.h |  6 ++++--
 4 files changed, 27 insertions(+), 7 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 327a58448592..ecc25cf0ee6e 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1163,8 +1163,8 @@ static int need_print_warning(struct dquot_warn *warn)
 			return uid_eq(current_fsuid(), warn->w_dq_id.uid);
 		case GRPQUOTA:
 			return in_group_p(warn->w_dq_id.gid);
-		case PRJQUOTA:	/* Never taken... Just make gcc happy */
-			return 0;
+		case PRJQUOTA:
+			return 1;
 	}
 	return 0;
 }
@@ -1405,6 +1405,9 @@ static void __dquot_initialize(struct inode *inode, int type)
 	/* First get references to structures we might need. */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		struct kqid qid;
+		kprojid_t projid;
+		int rc;
+
 		got[cnt] = NULL;
 		if (type != -1 && cnt != type)
 			continue;
@@ -1415,6 +1418,10 @@ static void __dquot_initialize(struct inode *inode, int type)
 		 */
 		if (dquots[cnt])
 			continue;
+
+		if (!sb_has_quota_active(sb, cnt))
+			continue;
+
 		init_needed = 1;
 
 		switch (cnt) {
@@ -1424,6 +1431,12 @@ static void __dquot_initialize(struct inode *inode, int type)
 		case GRPQUOTA:
 			qid = make_kqid_gid(inode->i_gid);
 			break;
+		case PRJQUOTA:
+			rc = inode->i_sb->dq_op->get_projid(inode, &projid);
+			if (rc)
+				continue;
+			qid = make_kqid_projid(projid);
+			break;
 		}
 		got[cnt] = dqget(sb, qid);
 	}
@@ -2176,7 +2189,8 @@ static int vfs_load_quota_inode(struct inode *inode, int type, int format_id,
 		error = -EROFS;
 		goto out_fmt;
 	}
-	if (!sb->s_op->quota_write || !sb->s_op->quota_read) {
+	if (!sb->s_op->quota_write || !sb->s_op->quota_read ||
+	    (type == PRJQUOTA && sb->dq_op->get_projid == NULL)) {
 		error = -EINVAL;
 		goto out_fmt;
 	}
diff --git a/fs/quota/quotaio_v2.h b/fs/quota/quotaio_v2.h
index f1966b42c2fd..4e95430093d9 100644
--- a/fs/quota/quotaio_v2.h
+++ b/fs/quota/quotaio_v2.h
@@ -13,12 +13,14 @@
  */
 #define V2_INITQMAGICS {\
 	0xd9c01f11,	/* USRQUOTA */\
-	0xd9c01927	/* GRPQUOTA */\
+	0xd9c01927,	/* GRPQUOTA */\
+	0xd9c03f14,	/* PRJQUOTA */\
 }
 
 #define V2_INITQVERSIONS {\
 	1,		/* USRQUOTA */\
-	1		/* GRPQUOTA */\
+	1,		/* GRPQUOTA */\
+	1,		/* PRJQUOTA */\
 }
 
 /* First generic header */
diff --git a/include/linux/quota.h b/include/linux/quota.h
index cf910d1f8efa..b2505acfd3c0 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -50,6 +50,7 @@
 
 #undef USRQUOTA
 #undef GRPQUOTA
+#undef PRJQUOTA
 enum quota_type {
 	USRQUOTA = 0,		/* element used for user quotas */
 	GRPQUOTA = 1,		/* element used for group quotas */
@@ -319,6 +320,7 @@ struct dquot_operations {
 	/* get reserved quota for delayed alloc, value returned is managed by
 	 * quota code only */
 	qsize_t *(*get_reserved_space) (struct inode *);
+	int (*get_projid) (struct inode *, kprojid_t *);/* Get project ID */
 };
 
 struct path;
diff --git a/include/uapi/linux/quota.h b/include/uapi/linux/quota.h
index 1f49b8341c99..9c95b2c1c88a 100644
--- a/include/uapi/linux/quota.h
+++ b/include/uapi/linux/quota.h
@@ -36,11 +36,12 @@
 #include <linux/errno.h>
 #include <linux/types.h>
 
-#define __DQUOT_VERSION__	"dquot_6.5.2"
+#define __DQUOT_VERSION__	"dquot_6.6.0"
 
-#define MAXQUOTAS 2
+#define MAXQUOTAS 3
 #define USRQUOTA  0		/* element used for user quotas */
 #define GRPQUOTA  1		/* element used for group quotas */
+#define PRJQUOTA  2		/* element used for project quotas */
 
 /*
  * Definitions for the default names of the quotas files.
@@ -48,6 +49,7 @@
 #define INITQFNAMES { \
 	"user",    /* USRQUOTA */ \
 	"group",   /* GRPQUOTA */ \
+	"project", /* PRJQUOTA */ \
 	"undefined", \
 };
 
-- 
cgit 


From db24a9044ee191c397dcd1c6574f56d67d7c8df5 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 17 Mar 2015 20:23:15 -0600
Subject: net: add support for phys_port_name

Similar to port id allow netdevices to specify port names and export
the name via sysfs. Drivers can implement the netdevice operation to
assist udev in having sane default names for the devices using the
rule:

$ cat /etc/udev/rules.d/80-net-setup-link.rules
SUBSYSTEM=="net", ACTION=="add", ATTR{phys_port_name}!="",
NAME="$attr{phys_port_name}"

Use of phys_name versus phys_id was suggested-by Jiri Pirko.

Signed-off-by: David Ahern <dsahern@gmail.com>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/ABI/testing/sysfs-class-net |  8 ++++++++
 include/linux/netdevice.h                 |  4 ++++
 include/uapi/linux/if_link.h              |  1 +
 net/core/dev.c                            | 18 ++++++++++++++++++
 net/core/net-sysfs.c                      | 23 +++++++++++++++++++++++
 net/core/rtnetlink.c                      | 21 +++++++++++++++++++++
 6 files changed, 75 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/Documentation/ABI/testing/sysfs-class-net b/Documentation/ABI/testing/sysfs-class-net
index beb8ec4dabbc..5ecfd72ba684 100644
--- a/Documentation/ABI/testing/sysfs-class-net
+++ b/Documentation/ABI/testing/sysfs-class-net
@@ -188,6 +188,14 @@ Description:
 		Indicates the interface unique physical port identifier within
 		the NIC, as a string.
 
+What:		/sys/class/net/<iface>/phys_port_name
+Date:		March 2015
+KernelVersion:	4.0
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates the interface physical port name within the NIC,
+		as a string.
+
 What:		/sys/class/net/<iface>/speed
 Date:		October 2009
 KernelVersion:	2.6.33
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 76c5de4978a8..ec8f9b5f6500 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1164,6 +1164,8 @@ struct net_device_ops {
 						      bool new_carrier);
 	int			(*ndo_get_phys_port_id)(struct net_device *dev,
 							struct netdev_phys_item_id *ppid);
+	int			(*ndo_get_phys_port_name)(struct net_device *dev,
+							  char *name, size_t len);
 	void			(*ndo_add_vxlan_port)(struct  net_device *dev,
 						      sa_family_t sa_family,
 						      __be16 port);
@@ -2947,6 +2949,8 @@ int dev_set_mac_address(struct net_device *, struct sockaddr *);
 int dev_change_carrier(struct net_device *, bool new_carrier);
 int dev_get_phys_port_id(struct net_device *dev,
 			 struct netdev_phys_item_id *ppid);
+int dev_get_phys_port_name(struct net_device *dev,
+			   char *name, size_t len);
 struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev);
 struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 				    struct netdev_queue *txq, int *ret);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 756436e1ce89..7158fd00a109 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -147,6 +147,7 @@ enum {
 	IFLA_CARRIER_CHANGES,
 	IFLA_PHYS_SWITCH_ID,
 	IFLA_LINK_NETNSID,
+	IFLA_PHYS_PORT_NAME,
 	__IFLA_MAX
 };
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 39fe369b46ad..a1f24151db5b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5911,6 +5911,24 @@ int dev_get_phys_port_id(struct net_device *dev,
 }
 EXPORT_SYMBOL(dev_get_phys_port_id);
 
+/**
+ *	dev_get_phys_port_name - Get device physical port name
+ *	@dev: device
+ *	@name: port name
+ *
+ *	Get device physical port name
+ */
+int dev_get_phys_port_name(struct net_device *dev,
+			   char *name, size_t len)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	if (!ops->ndo_get_phys_port_name)
+		return -EOPNOTSUPP;
+	return ops->ndo_get_phys_port_name(dev, name, len);
+}
+EXPORT_SYMBOL(dev_get_phys_port_name);
+
 /**
  *	dev_new_index	-	allocate an ifindex
  *	@net: the applicable net namespace
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 7e58bd7ec232..cc5cf689809c 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -418,6 +418,28 @@ static ssize_t phys_port_id_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(phys_port_id);
 
+static ssize_t phys_port_name_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	struct net_device *netdev = to_net_dev(dev);
+	ssize_t ret = -EINVAL;
+
+	if (!rtnl_trylock())
+		return restart_syscall();
+
+	if (dev_isalive(netdev)) {
+		char name[IFNAMSIZ];
+
+		ret = dev_get_phys_port_name(netdev, name, sizeof(name));
+		if (!ret)
+			ret = sprintf(buf, "%s\n", name);
+	}
+	rtnl_unlock();
+
+	return ret;
+}
+static DEVICE_ATTR_RO(phys_port_name);
+
 static ssize_t phys_switch_id_show(struct device *dev,
 				   struct device_attribute *attr, char *buf)
 {
@@ -465,6 +487,7 @@ static struct attribute *net_class_attrs[] = {
 	&dev_attr_tx_queue_len.attr,
 	&dev_attr_gro_flush_timeout.attr,
 	&dev_attr_phys_port_id.attr,
+	&dev_attr_phys_port_name.attr,
 	&dev_attr_phys_switch_id.attr,
 	NULL,
 };
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 25b4b5d23485..6abe634c666c 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -982,6 +982,24 @@ static int rtnl_phys_port_id_fill(struct sk_buff *skb, struct net_device *dev)
 	return 0;
 }
 
+static int rtnl_phys_port_name_fill(struct sk_buff *skb, struct net_device *dev)
+{
+	char name[IFNAMSIZ];
+	int err;
+
+	err = dev_get_phys_port_name(dev, name, sizeof(name));
+	if (err) {
+		if (err == -EOPNOTSUPP)
+			return 0;
+		return err;
+	}
+
+	if (nla_put(skb, IFLA_PHYS_PORT_NAME, strlen(name), name))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
 static int rtnl_phys_switch_id_fill(struct sk_buff *skb, struct net_device *dev)
 {
 	int err;
@@ -1072,6 +1090,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	if (rtnl_phys_port_id_fill(skb, dev))
 		goto nla_put_failure;
 
+	if (rtnl_phys_port_name_fill(skb, dev))
+		goto nla_put_failure;
+
 	if (rtnl_phys_switch_id_fill(skb, dev))
 		goto nla_put_failure;
 
-- 
cgit 


From af615762e972be0c66cf1d156ca4fac13b93c0b0 Mon Sep 17 00:00:00 2001
From: Jörg Thalheim <joerg@higgsboson.tk>
Date: Wed, 18 Mar 2015 10:06:58 +0100
Subject: bridge: add ageing_time, stp_state, priority over netlink
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Jörg Thalheim <joerg@higgsboson.tk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_link.h |  3 +++
 net/bridge/br_netlink.c      | 32 +++++++++++++++++++++++++++++++-
 2 files changed, 34 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 7158fd00a109..f5f5edd5ae5f 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -225,6 +225,9 @@ enum {
 	IFLA_BR_FORWARD_DELAY,
 	IFLA_BR_HELLO_TIME,
 	IFLA_BR_MAX_AGE,
+	IFLA_BR_AGEING_TIME,
+	IFLA_BR_STP_STATE,
+	IFLA_BR_PRIORITY,
 	__IFLA_BR_MAX,
 };
 
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 8bc6b67457dc..e1115a224a95 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -733,6 +733,9 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = {
 	[IFLA_BR_FORWARD_DELAY]	= { .type = NLA_U32 },
 	[IFLA_BR_HELLO_TIME]	= { .type = NLA_U32 },
 	[IFLA_BR_MAX_AGE]	= { .type = NLA_U32 },
+	[IFLA_BR_AGEING_TIME] = { .type = NLA_U32 },
+	[IFLA_BR_STP_STATE] = { .type = NLA_U32 },
+	[IFLA_BR_PRIORITY] = { .type = NLA_U16 },
 };
 
 static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
@@ -762,6 +765,24 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
 			return err;
 	}
 
+	if (data[IFLA_BR_AGEING_TIME]) {
+		u32 ageing_time = nla_get_u32(data[IFLA_BR_AGEING_TIME]);
+
+		br->ageing_time = clock_t_to_jiffies(ageing_time);
+	}
+
+	if (data[IFLA_BR_STP_STATE]) {
+		u32 stp_enabled = nla_get_u32(data[IFLA_BR_STP_STATE]);
+
+		br_stp_set_enabled(br, stp_enabled);
+	}
+
+	if (data[IFLA_BR_PRIORITY]) {
+		u32 priority = nla_get_u16(data[IFLA_BR_PRIORITY]);
+
+		br_stp_set_bridge_priority(br, priority);
+	}
+
 	return 0;
 }
 
@@ -770,6 +791,9 @@ static size_t br_get_size(const struct net_device *brdev)
 	return nla_total_size(sizeof(u32)) +	/* IFLA_BR_FORWARD_DELAY  */
 	       nla_total_size(sizeof(u32)) +	/* IFLA_BR_HELLO_TIME */
 	       nla_total_size(sizeof(u32)) +	/* IFLA_BR_MAX_AGE */
+	       nla_total_size(sizeof(u32)) +    /* IFLA_BR_AGEING_TIME */
+	       nla_total_size(sizeof(u32)) +    /* IFLA_BR_STP_STATE */
+	       nla_total_size(sizeof(u16)) +    /* IFLA_BR_PRIORITY */
 	       0;
 }
 
@@ -779,10 +803,16 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev)
 	u32 forward_delay = jiffies_to_clock_t(br->forward_delay);
 	u32 hello_time = jiffies_to_clock_t(br->hello_time);
 	u32 age_time = jiffies_to_clock_t(br->max_age);
+	u32 ageing_time = jiffies_to_clock_t(br->ageing_time);
+	u32 stp_enabled = br->stp_enabled;
+	u16 priority = (br->bridge_id.prio[0] << 8) | br->bridge_id.prio[1];
 
 	if (nla_put_u32(skb, IFLA_BR_FORWARD_DELAY, forward_delay) ||
 	    nla_put_u32(skb, IFLA_BR_HELLO_TIME, hello_time) ||
-	    nla_put_u32(skb, IFLA_BR_MAX_AGE, age_time))
+	    nla_put_u32(skb, IFLA_BR_MAX_AGE, age_time) ||
+	    nla_put_u32(skb, IFLA_BR_AGEING_TIME, ageing_time) ||
+	    nla_put_u32(skb, IFLA_BR_STP_STATE, stp_enabled) ||
+	    nla_put_u16(skb, IFLA_BR_PRIORITY, priority))
 		return -EMSGSIZE;
 
 	return 0;
-- 
cgit 


From 94caee8c312d96522bcdae88791aaa9ebcd5f22c Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 20 Mar 2015 15:11:11 +0100
Subject: ebpf: add sched_act_type and map it to sk_filter's verifier ops

In order to prepare eBPF support for tc action, we need to add
sched_act_type, so that the eBPF verifier is aware of what helper
function act_bpf may use, that it can load skb data and read out
currently available skb fields.

This is bascially analogous to 96be4325f443 ("ebpf: add sched_cls_type
and map it to sk_filter's verifier ops").

BPF_PROG_TYPE_SCHED_CLS and BPF_PROG_TYPE_SCHED_ACT need to be
separate since both will have a different set of functionality in
future (classifier vs action), thus we won't run into ABI troubles
when the point in time comes to diverge functionality from the
classifier.

The future plan for act_bpf would be that it will be able to write
into skb->data and alter selected fields mirrored in struct __sk_buff.

For an initial support, it's sufficient to map it to sk_filter_ops.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jiri Pirko <jiri@resnulli.us>
Reviewed-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 1 +
 kernel/bpf/verifier.c    | 1 +
 net/core/filter.c        | 6 ++++++
 3 files changed, 8 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1623047af463..3dd314a45d0d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -119,6 +119,7 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_UNSPEC,
 	BPF_PROG_TYPE_SOCKET_FILTER,
 	BPF_PROG_TYPE_SCHED_CLS,
+	BPF_PROG_TYPE_SCHED_ACT,
 };
 
 #define BPF_PSEUDO_MAP_FD	1
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c22ebd36fa4b..0e714f799ec0 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1180,6 +1180,7 @@ static bool may_access_skb(enum bpf_prog_type type)
 	switch (type) {
 	case BPF_PROG_TYPE_SOCKET_FILTER:
 	case BPF_PROG_TYPE_SCHED_CLS:
+	case BPF_PROG_TYPE_SCHED_ACT:
 		return true;
 	default:
 		return false;
diff --git a/net/core/filter.c b/net/core/filter.c
index bdaac5895def..084eacc4d1d4 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1263,10 +1263,16 @@ static struct bpf_prog_type_list sched_cls_type __read_mostly = {
 	.type = BPF_PROG_TYPE_SCHED_CLS,
 };
 
+static struct bpf_prog_type_list sched_act_type __read_mostly = {
+	.ops = &sk_filter_ops,
+	.type = BPF_PROG_TYPE_SCHED_ACT,
+};
+
 static int __init register_sk_filter_ops(void)
 {
 	bpf_register_prog_type(&sk_filter_type);
 	bpf_register_prog_type(&sched_cls_type);
+	bpf_register_prog_type(&sched_act_type);
 
 	return 0;
 }
-- 
cgit 


From a8cb5f556b567974d75ea29c15181c445c541b1f Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 20 Mar 2015 15:11:12 +0100
Subject: act_bpf: add initial eBPF support for actions

This work extends the "classic" BPF programmable tc action by extending
its scope also to native eBPF code!

Together with commit e2e9b6541dd4 ("cls_bpf: add initial eBPF support
for programmable classifiers") this adds the facility to implement fully
flexible classifier and actions for tc that can be implemented in a C
subset in user space, "safely" loaded into the kernel, and being run in
native speed when JITed.

Also, since eBPF maps can be shared between eBPF programs, it offers the
possibility that cls_bpf and act_bpf can share data 1) between themselves
and 2) between user space applications. That means that, f.e. customized
runtime statistics can be collected in user space, but also more importantly
classifier and action behaviour could be altered based on map input from
the user space application.

For the remaining details on the workflow and integration, see the cls_bpf
commit e2e9b6541dd4. Preliminary iproute2 part can be found under [1].

  [1] http://git.breakpoint.cc/cgit/dborkman/iproute2.git/log/?h=ebpf-act

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Acked-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_bpf.h        |   6 +-
 include/uapi/linux/tc_act/tc_bpf.h |   2 +
 net/sched/act_bpf.c                | 295 ++++++++++++++++++++++++++-----------
 3 files changed, 220 insertions(+), 83 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/tc_act/tc_bpf.h b/include/net/tc_act/tc_bpf.h
index 86a070ffc930..a152e9858b2c 100644
--- a/include/net/tc_act/tc_bpf.h
+++ b/include/net/tc_act/tc_bpf.h
@@ -16,8 +16,12 @@
 struct tcf_bpf {
 	struct tcf_common	common;
 	struct bpf_prog		*filter;
+	union {
+		u32		bpf_fd;
+		u16		bpf_num_ops;
+	};
 	struct sock_filter	*bpf_ops;
-	u16			bpf_num_ops;
+	const char		*bpf_name;
 };
 #define to_bpf(a) \
 	container_of(a->priv, struct tcf_bpf, common)
diff --git a/include/uapi/linux/tc_act/tc_bpf.h b/include/uapi/linux/tc_act/tc_bpf.h
index 5288bd77e63b..07f17cc70bb3 100644
--- a/include/uapi/linux/tc_act/tc_bpf.h
+++ b/include/uapi/linux/tc_act/tc_bpf.h
@@ -24,6 +24,8 @@ enum {
 	TCA_ACT_BPF_PARMS,
 	TCA_ACT_BPF_OPS_LEN,
 	TCA_ACT_BPF_OPS,
+	TCA_ACT_BPF_FD,
+	TCA_ACT_BPF_NAME,
 	__TCA_ACT_BPF_MAX,
 };
 #define TCA_ACT_BPF_MAX (__TCA_ACT_BPF_MAX - 1)
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 5f6288fa3f12..4d2cede17468 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -13,26 +13,40 @@
 #include <linux/skbuff.h>
 #include <linux/rtnetlink.h>
 #include <linux/filter.h>
+#include <linux/bpf.h>
+
 #include <net/netlink.h>
 #include <net/pkt_sched.h>
 
 #include <linux/tc_act/tc_bpf.h>
 #include <net/tc_act/tc_bpf.h>
 
-#define BPF_TAB_MASK     15
+#define BPF_TAB_MASK		15
+#define ACT_BPF_NAME_LEN	256
+
+struct tcf_bpf_cfg {
+	struct bpf_prog *filter;
+	struct sock_filter *bpf_ops;
+	char *bpf_name;
+	u32 bpf_fd;
+	u16 bpf_num_ops;
+};
 
-static int tcf_bpf(struct sk_buff *skb, const struct tc_action *a,
+static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
 		   struct tcf_result *res)
 {
-	struct tcf_bpf *b = a->priv;
+	struct tcf_bpf *prog = act->priv;
 	int action, filter_res;
 
-	spin_lock(&b->tcf_lock);
+	spin_lock(&prog->tcf_lock);
 
-	b->tcf_tm.lastuse = jiffies;
-	bstats_update(&b->tcf_bstats, skb);
+	prog->tcf_tm.lastuse = jiffies;
+	bstats_update(&prog->tcf_bstats, skb);
 
-	filter_res = BPF_PROG_RUN(b->filter, skb);
+	/* Needed here for accessing maps. */
+	rcu_read_lock();
+	filter_res = BPF_PROG_RUN(prog->filter, skb);
+	rcu_read_unlock();
 
 	/* A BPF program may overwrite the default action opcode.
 	 * Similarly as in cls_bpf, if filter_res == -1 we use the
@@ -52,52 +66,87 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *a,
 		break;
 	case TC_ACT_SHOT:
 		action = filter_res;
-		b->tcf_qstats.drops++;
+		prog->tcf_qstats.drops++;
 		break;
 	case TC_ACT_UNSPEC:
-		action = b->tcf_action;
+		action = prog->tcf_action;
 		break;
 	default:
 		action = TC_ACT_UNSPEC;
 		break;
 	}
 
-	spin_unlock(&b->tcf_lock);
+	spin_unlock(&prog->tcf_lock);
 	return action;
 }
 
-static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *a,
+static bool tcf_bpf_is_ebpf(const struct tcf_bpf *prog)
+{
+	return !prog->bpf_ops;
+}
+
+static int tcf_bpf_dump_bpf_info(const struct tcf_bpf *prog,
+				 struct sk_buff *skb)
+{
+	struct nlattr *nla;
+
+	if (nla_put_u16(skb, TCA_ACT_BPF_OPS_LEN, prog->bpf_num_ops))
+		return -EMSGSIZE;
+
+	nla = nla_reserve(skb, TCA_ACT_BPF_OPS, prog->bpf_num_ops *
+			  sizeof(struct sock_filter));
+	if (nla == NULL)
+		return -EMSGSIZE;
+
+	memcpy(nla_data(nla), prog->bpf_ops, nla_len(nla));
+
+	return 0;
+}
+
+static int tcf_bpf_dump_ebpf_info(const struct tcf_bpf *prog,
+				  struct sk_buff *skb)
+{
+	if (nla_put_u32(skb, TCA_ACT_BPF_FD, prog->bpf_fd))
+		return -EMSGSIZE;
+
+	if (prog->bpf_name &&
+	    nla_put_string(skb, TCA_ACT_BPF_NAME, prog->bpf_name))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
+static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *act,
 			int bind, int ref)
 {
 	unsigned char *tp = skb_tail_pointer(skb);
-	struct tcf_bpf *b = a->priv;
+	struct tcf_bpf *prog = act->priv;
 	struct tc_act_bpf opt = {
-		.index    = b->tcf_index,
-		.refcnt   = b->tcf_refcnt - ref,
-		.bindcnt  = b->tcf_bindcnt - bind,
-		.action   = b->tcf_action,
+		.index   = prog->tcf_index,
+		.refcnt  = prog->tcf_refcnt - ref,
+		.bindcnt = prog->tcf_bindcnt - bind,
+		.action  = prog->tcf_action,
 	};
-	struct tcf_t t;
-	struct nlattr *nla;
+	struct tcf_t tm;
+	int ret;
 
 	if (nla_put(skb, TCA_ACT_BPF_PARMS, sizeof(opt), &opt))
 		goto nla_put_failure;
 
-	if (nla_put_u16(skb, TCA_ACT_BPF_OPS_LEN, b->bpf_num_ops))
-		goto nla_put_failure;
-
-	nla = nla_reserve(skb, TCA_ACT_BPF_OPS, b->bpf_num_ops *
-			  sizeof(struct sock_filter));
-	if (!nla)
+	if (tcf_bpf_is_ebpf(prog))
+		ret = tcf_bpf_dump_ebpf_info(prog, skb);
+	else
+		ret = tcf_bpf_dump_bpf_info(prog, skb);
+	if (ret)
 		goto nla_put_failure;
 
-	memcpy(nla_data(nla), b->bpf_ops, nla_len(nla));
+	tm.install = jiffies_to_clock_t(jiffies - prog->tcf_tm.install);
+	tm.lastuse = jiffies_to_clock_t(jiffies - prog->tcf_tm.lastuse);
+	tm.expires = jiffies_to_clock_t(prog->tcf_tm.expires);
 
-	t.install = jiffies_to_clock_t(jiffies - b->tcf_tm.install);
-	t.lastuse = jiffies_to_clock_t(jiffies - b->tcf_tm.lastuse);
-	t.expires = jiffies_to_clock_t(b->tcf_tm.expires);
-	if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(t), &t))
+	if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(tm), &tm))
 		goto nla_put_failure;
+
 	return skb->len;
 
 nla_put_failure:
@@ -107,36 +156,21 @@ nla_put_failure:
 
 static const struct nla_policy act_bpf_policy[TCA_ACT_BPF_MAX + 1] = {
 	[TCA_ACT_BPF_PARMS]	= { .len = sizeof(struct tc_act_bpf) },
+	[TCA_ACT_BPF_FD]	= { .type = NLA_U32 },
+	[TCA_ACT_BPF_NAME]	= { .type = NLA_NUL_STRING, .len = ACT_BPF_NAME_LEN },
 	[TCA_ACT_BPF_OPS_LEN]	= { .type = NLA_U16 },
 	[TCA_ACT_BPF_OPS]	= { .type = NLA_BINARY,
 				    .len = sizeof(struct sock_filter) * BPF_MAXINSNS },
 };
 
-static int tcf_bpf_init(struct net *net, struct nlattr *nla,
-			struct nlattr *est, struct tc_action *a,
-			int ovr, int bind)
+static int tcf_bpf_init_from_ops(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
 {
-	struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
-	struct tc_act_bpf *parm;
-	struct tcf_bpf *b;
-	u16 bpf_size, bpf_num_ops;
 	struct sock_filter *bpf_ops;
-	struct sock_fprog_kern tmp;
+	struct sock_fprog_kern fprog_tmp;
 	struct bpf_prog *fp;
+	u16 bpf_size, bpf_num_ops;
 	int ret;
 
-	if (!nla)
-		return -EINVAL;
-
-	ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy);
-	if (ret < 0)
-		return ret;
-
-	if (!tb[TCA_ACT_BPF_PARMS] ||
-	    !tb[TCA_ACT_BPF_OPS_LEN] || !tb[TCA_ACT_BPF_OPS])
-		return -EINVAL;
-	parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
-
 	bpf_num_ops = nla_get_u16(tb[TCA_ACT_BPF_OPS_LEN]);
 	if (bpf_num_ops	> BPF_MAXINSNS || bpf_num_ops == 0)
 		return -EINVAL;
@@ -146,68 +180,165 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 		return -EINVAL;
 
 	bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
-	if (!bpf_ops)
+	if (bpf_ops == NULL)
 		return -ENOMEM;
 
 	memcpy(bpf_ops, nla_data(tb[TCA_ACT_BPF_OPS]), bpf_size);
 
-	tmp.len = bpf_num_ops;
-	tmp.filter = bpf_ops;
+	fprog_tmp.len = bpf_num_ops;
+	fprog_tmp.filter = bpf_ops;
 
-	ret = bpf_prog_create(&fp, &tmp);
-	if (ret)
-		goto free_bpf_ops;
+	ret = bpf_prog_create(&fp, &fprog_tmp);
+	if (ret < 0) {
+		kfree(bpf_ops);
+		return ret;
+	}
 
-	if (!tcf_hash_check(parm->index, a, bind)) {
-		ret = tcf_hash_create(parm->index, est, a, sizeof(*b), bind);
-		if (ret)
+	cfg->bpf_ops = bpf_ops;
+	cfg->bpf_num_ops = bpf_num_ops;
+	cfg->filter = fp;
+
+	return 0;
+}
+
+static int tcf_bpf_init_from_efd(struct nlattr **tb, struct tcf_bpf_cfg *cfg)
+{
+	struct bpf_prog *fp;
+	char *name = NULL;
+	u32 bpf_fd;
+
+	bpf_fd = nla_get_u32(tb[TCA_ACT_BPF_FD]);
+
+	fp = bpf_prog_get(bpf_fd);
+	if (IS_ERR(fp))
+		return PTR_ERR(fp);
+
+	if (fp->type != BPF_PROG_TYPE_SCHED_ACT) {
+		bpf_prog_put(fp);
+		return -EINVAL;
+	}
+
+	if (tb[TCA_ACT_BPF_NAME]) {
+		name = kmemdup(nla_data(tb[TCA_ACT_BPF_NAME]),
+			       nla_len(tb[TCA_ACT_BPF_NAME]),
+			       GFP_KERNEL);
+		if (!name) {
+			bpf_prog_put(fp);
+			return -ENOMEM;
+		}
+	}
+
+	cfg->bpf_fd = bpf_fd;
+	cfg->bpf_name = name;
+	cfg->filter = fp;
+
+	return 0;
+}
+
+static int tcf_bpf_init(struct net *net, struct nlattr *nla,
+			struct nlattr *est, struct tc_action *act,
+			int replace, int bind)
+{
+	struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
+	struct tc_act_bpf *parm;
+	struct tcf_bpf *prog;
+	struct tcf_bpf_cfg cfg;
+	bool is_bpf, is_ebpf;
+	int ret;
+
+	if (!nla)
+		return -EINVAL;
+
+	ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy);
+	if (ret < 0)
+		return ret;
+
+	is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS];
+	is_ebpf = tb[TCA_ACT_BPF_FD];
+
+	if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf) ||
+	    !tb[TCA_ACT_BPF_PARMS])
+		return -EINVAL;
+
+	parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
+
+	memset(&cfg, 0, sizeof(cfg));
+
+	ret = is_bpf ? tcf_bpf_init_from_ops(tb, &cfg) :
+		       tcf_bpf_init_from_efd(tb, &cfg);
+	if (ret < 0)
+		return ret;
+
+	if (!tcf_hash_check(parm->index, act, bind)) {
+		ret = tcf_hash_create(parm->index, est, act,
+				      sizeof(*prog), bind);
+		if (ret < 0)
 			goto destroy_fp;
 
 		ret = ACT_P_CREATED;
 	} else {
+		/* Don't override defaults. */
 		if (bind)
 			goto destroy_fp;
-		tcf_hash_release(a, bind);
-		if (!ovr) {
+
+		tcf_hash_release(act, bind);
+		if (!replace) {
 			ret = -EEXIST;
 			goto destroy_fp;
 		}
 	}
 
-	b = to_bpf(a);
-	spin_lock_bh(&b->tcf_lock);
-	b->tcf_action = parm->action;
-	b->bpf_num_ops = bpf_num_ops;
-	b->bpf_ops = bpf_ops;
-	b->filter = fp;
-	spin_unlock_bh(&b->tcf_lock);
+	prog = to_bpf(act);
+	spin_lock_bh(&prog->tcf_lock);
+
+	prog->bpf_ops = cfg.bpf_ops;
+	prog->bpf_name = cfg.bpf_name;
+
+	if (cfg.bpf_num_ops)
+		prog->bpf_num_ops = cfg.bpf_num_ops;
+	if (cfg.bpf_fd)
+		prog->bpf_fd = cfg.bpf_fd;
+
+	prog->tcf_action = parm->action;
+	prog->filter = cfg.filter;
+
+	spin_unlock_bh(&prog->tcf_lock);
 
 	if (ret == ACT_P_CREATED)
-		tcf_hash_insert(a);
+		tcf_hash_insert(act);
+
 	return ret;
 
 destroy_fp:
-	bpf_prog_destroy(fp);
-free_bpf_ops:
-	kfree(bpf_ops);
+	if (is_ebpf)
+		bpf_prog_put(cfg.filter);
+	else
+		bpf_prog_destroy(cfg.filter);
+
+	kfree(cfg.bpf_ops);
+	kfree(cfg.bpf_name);
+
 	return ret;
 }
 
-static void tcf_bpf_cleanup(struct tc_action *a, int bind)
+static void tcf_bpf_cleanup(struct tc_action *act, int bind)
 {
-	struct tcf_bpf *b = a->priv;
+	const struct tcf_bpf *prog = act->priv;
 
-	bpf_prog_destroy(b->filter);
+	if (tcf_bpf_is_ebpf(prog))
+		bpf_prog_put(prog->filter);
+	else
+		bpf_prog_destroy(prog->filter);
 }
 
-static struct tc_action_ops act_bpf_ops = {
-	.kind =		"bpf",
-	.type =		TCA_ACT_BPF,
-	.owner =	THIS_MODULE,
-	.act =		tcf_bpf,
-	.dump =		tcf_bpf_dump,
-	.cleanup =	tcf_bpf_cleanup,
-	.init =		tcf_bpf_init,
+static struct tc_action_ops act_bpf_ops __read_mostly = {
+	.kind		=	"bpf",
+	.type		=	TCA_ACT_BPF,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_bpf,
+	.dump		=	tcf_bpf_dump,
+	.cleanup	=	tcf_bpf_cleanup,
+	.init		=	tcf_bpf_init,
 };
 
 static int __init bpf_init_module(void)
-- 
cgit 


From 8da86466b83787df0d4b89ec81c310de072d101c Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki/吉藤英明 <hideaki.yoshifuji@miraclelinux.com>
Date: Thu, 19 Mar 2015 22:41:46 +0900
Subject: net: neighbour: Add mcast_resolicit to configure the number of
 multicast resolicitations in PROBE state.

We send unicast neighbor (ARP or NDP) solicitations ucast_probes
times in PROBE state.  Zhu Yanjun reported that some implementation
does not reply against them and the entry will become FAILED, which
is undesirable.

We had been dealt with such nodes by sending multicast probes mcast_
solicit times after unicast probes in PROBE state.  In 2003, I made
a change not to send them to improve compatibility with IPv6 NDP.

Let's introduce per-protocol per-interface sysctl knob "mcast_
reprobe" to configure the number of multicast (re)solicitation for
reconfirmation in PROBE state.  The default is 0, since we have
been doing so for 10+ years.

Reported-by: Zhu Yanjun <Yanjun.Zhu@windriver.com>
CC: Ulf Samuelsson <ulf.samuelsson@ericsson.com>
Signed-off-by: YOSHIFUJI Hideaki <hideaki.yoshifuji@miraclelinux.com>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/neighbour.h        |  1 +
 include/uapi/linux/neighbour.h |  1 +
 net/core/neighbour.c           | 15 +++++++++++----
 3 files changed, 13 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index e7bdf5170802..bd33e66f49aa 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -42,6 +42,7 @@ enum {
 	NEIGH_VAR_MCAST_PROBES,
 	NEIGH_VAR_UCAST_PROBES,
 	NEIGH_VAR_APP_PROBES,
+	NEIGH_VAR_MCAST_REPROBES,
 	NEIGH_VAR_RETRANS_TIME,
 	NEIGH_VAR_BASE_REACHABLE_TIME,
 	NEIGH_VAR_DELAY_PROBE_TIME,
diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h
index 3873a35509aa..2e35c61bbdd1 100644
--- a/include/uapi/linux/neighbour.h
+++ b/include/uapi/linux/neighbour.h
@@ -126,6 +126,7 @@ enum {
 	NDTPA_PROXY_QLEN,		/* u32 */
 	NDTPA_LOCKTIME,			/* u64, msecs */
 	NDTPA_QUEUE_LENBYTES,		/* u32 */
+	NDTPA_MCAST_REPROBES,		/* u32 */
 	__NDTPA_MAX
 };
 #define NDTPA_MAX (__NDTPA_MAX - 1)
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 0e8b32efc031..3de654256028 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -817,10 +817,9 @@ out:
 static __inline__ int neigh_max_probes(struct neighbour *n)
 {
 	struct neigh_parms *p = n->parms;
-	int max_probes = NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES);
-	if (!(n->nud_state & NUD_PROBE))
-		max_probes += NEIGH_VAR(p, MCAST_PROBES);
-	return max_probes;
+	return NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
+	       (n->nud_state & NUD_PROBE ? NEIGH_VAR(p, MCAST_REPROBES) :
+	        NEIGH_VAR(p, MCAST_PROBES));
 }
 
 static void neigh_invalidate(struct neighbour *neigh)
@@ -1742,6 +1741,8 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
 			NEIGH_VAR(parms, UCAST_PROBES)) ||
 	    nla_put_u32(skb, NDTPA_MCAST_PROBES,
 			NEIGH_VAR(parms, MCAST_PROBES)) ||
+	    nla_put_u32(skb, NDTPA_MCAST_REPROBES,
+			NEIGH_VAR(parms, MCAST_REPROBES)) ||
 	    nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
 	    nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
 			  NEIGH_VAR(parms, BASE_REACHABLE_TIME)) ||
@@ -1901,6 +1902,7 @@ static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
 	[NDTPA_APP_PROBES]		= { .type = NLA_U32 },
 	[NDTPA_UCAST_PROBES]		= { .type = NLA_U32 },
 	[NDTPA_MCAST_PROBES]		= { .type = NLA_U32 },
+	[NDTPA_MCAST_REPROBES]		= { .type = NLA_U32 },
 	[NDTPA_BASE_REACHABLE_TIME]	= { .type = NLA_U64 },
 	[NDTPA_GC_STALETIME]		= { .type = NLA_U64 },
 	[NDTPA_DELAY_PROBE_TIME]	= { .type = NLA_U64 },
@@ -2001,6 +2003,10 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
 				NEIGH_VAR_SET(p, MCAST_PROBES,
 					      nla_get_u32(tbp[i]));
 				break;
+			case NDTPA_MCAST_REPROBES:
+				NEIGH_VAR_SET(p, MCAST_REPROBES,
+					      nla_get_u32(tbp[i]));
+				break;
 			case NDTPA_BASE_REACHABLE_TIME:
 				NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
 					      nla_get_msecs(tbp[i]));
@@ -2987,6 +2993,7 @@ static struct neigh_sysctl_table {
 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
 		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
+		NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_REPROBES, "mcast_resolicit"),
 		NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
 		NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
 		NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
-- 
cgit 


From e1c47e732cbb4211d15ae0c4465dc4ceefcaa398 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Wed, 4 Mar 2015 01:47:55 -0800
Subject: [media] v4l2-subdev.h: add 'which' field for the enum structs

While all other pad ops allow you to select whether to use the 'try' or
the 'active' formats, the enum ops didn't have that option and always used
'try'.

However, this will fail if a simple (e.g. PCI) bridge driver wants to use
the enum pad op of a subdev that's also used in a complex platform driver
like the omap3. Such a bridge driver generally wants to enum formats based
on the active format.

So add a new 'which' field to these structs. Note that V4L2_SUBDEV_FORMAT_TRY
is 0, so the default remains TRY (applications need to set reserved to 0).

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Acked-by: Lad, Prabhakar <prabhakar.csengg@gmail.com>
Tested-by: Lad, Prabhakar <prabhakar.csengg@gmail.com>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/v4l2-subdev.h | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/v4l2-subdev.h b/include/uapi/linux/v4l2-subdev.h
index e0a7e3da498a..dbce2b554e02 100644
--- a/include/uapi/linux/v4l2-subdev.h
+++ b/include/uapi/linux/v4l2-subdev.h
@@ -69,12 +69,14 @@ struct v4l2_subdev_crop {
  * @pad: pad number, as reported by the media API
  * @index: format index during enumeration
  * @code: format code (MEDIA_BUS_FMT_ definitions)
+ * @which: format type (from enum v4l2_subdev_format_whence)
  */
 struct v4l2_subdev_mbus_code_enum {
 	__u32 pad;
 	__u32 index;
 	__u32 code;
-	__u32 reserved[9];
+	__u32 which;
+	__u32 reserved[8];
 };
 
 /**
@@ -82,6 +84,7 @@ struct v4l2_subdev_mbus_code_enum {
  * @pad: pad number, as reported by the media API
  * @index: format index during enumeration
  * @code: format code (MEDIA_BUS_FMT_ definitions)
+ * @which: format type (from enum v4l2_subdev_format_whence)
  */
 struct v4l2_subdev_frame_size_enum {
 	__u32 index;
@@ -91,7 +94,8 @@ struct v4l2_subdev_frame_size_enum {
 	__u32 max_width;
 	__u32 min_height;
 	__u32 max_height;
-	__u32 reserved[9];
+	__u32 which;
+	__u32 reserved[8];
 };
 
 /**
@@ -113,6 +117,7 @@ struct v4l2_subdev_frame_interval {
  * @width: frame width in pixels
  * @height: frame height in pixels
  * @interval: frame interval in seconds
+ * @which: format type (from enum v4l2_subdev_format_whence)
  */
 struct v4l2_subdev_frame_interval_enum {
 	__u32 index;
@@ -121,7 +126,8 @@ struct v4l2_subdev_frame_interval_enum {
 	__u32 width;
 	__u32 height;
 	struct v4l2_fract interval;
-	__u32 reserved[9];
+	__u32 which;
+	__u32 reserved[8];
 };
 
 /**
-- 
cgit 


From 682f048bd49449f4ab978664a7f69a44a74e3caa Mon Sep 17 00:00:00 2001
From: Alexander Drozdov <al.drozdov@gmail.com>
Date: Mon, 23 Mar 2015 09:11:13 +0300
Subject: af_packet: pass checksum validation status to the user

Introduce TP_STATUS_CSUM_VALID tp_status flag to tell the
af_packet user that at least the transport header checksum
has been already validated.

For now, the flag may be set for incoming packets only.

Signed-off-by: Alexander Drozdov <al.drozdov@gmail.com>
Cc: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/packet_mmap.txt | 13 ++++++++++---
 include/uapi/linux/if_packet.h           |  1 +
 net/packet/af_packet.c                   |  9 +++++++++
 3 files changed, 20 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt
index a6d7cb91069e..daa015af16a0 100644
--- a/Documentation/networking/packet_mmap.txt
+++ b/Documentation/networking/packet_mmap.txt
@@ -440,9 +440,10 @@ and the following flags apply:
 +++ Capture process:
      from include/linux/if_packet.h
 
-     #define TP_STATUS_COPY          2 
-     #define TP_STATUS_LOSING        4 
-     #define TP_STATUS_CSUMNOTREADY  8 
+     #define TP_STATUS_COPY          (1 << 1)
+     #define TP_STATUS_LOSING        (1 << 2)
+     #define TP_STATUS_CSUMNOTREADY  (1 << 3)
+     #define TP_STATUS_CSUM_VALID    (1 << 7)
 
 TP_STATUS_COPY        : This flag indicates that the frame (and associated
                         meta information) has been truncated because it's 
@@ -466,6 +467,12 @@ TP_STATUS_CSUMNOTREADY: currently it's used for outgoing IP packets which
                         reading the packet we should not try to check the 
                         checksum. 
 
+TP_STATUS_CSUM_VALID  : This flag indicates that at least the transport
+                        header checksum of the packet has been already
+                        validated on the kernel side. If the flag is not set
+                        then we are free to check the checksum by ourselves
+                        provided that TP_STATUS_CSUMNOTREADY is also not set.
+
 for convenience there are also the following defines:
 
      #define TP_STATUS_KERNEL        0
diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h
index da2d668b8cf1..053bd102fbe0 100644
--- a/include/uapi/linux/if_packet.h
+++ b/include/uapi/linux/if_packet.h
@@ -99,6 +99,7 @@ struct tpacket_auxdata {
 #define TP_STATUS_VLAN_VALID		(1 << 4) /* auxdata has valid tp_vlan_tci */
 #define TP_STATUS_BLK_TMO		(1 << 5)
 #define TP_STATUS_VLAN_TPID_VALID	(1 << 6) /* auxdata has valid tp_vlan_tpid */
+#define TP_STATUS_CSUM_VALID		(1 << 7)
 
 /* Tx ring - header status */
 #define TP_STATUS_AVAILABLE	      0
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 9d854c5ce0b5..5102c3cc4eec 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1924,6 +1924,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL)
 		status |= TP_STATUS_CSUMNOTREADY;
+	else if (skb->pkt_type != PACKET_OUTGOING &&
+		 (skb->ip_summed == CHECKSUM_COMPLETE ||
+		  skb_csum_unnecessary(skb)))
+		status |= TP_STATUS_CSUM_VALID;
 
 	if (snaplen > res)
 		snaplen = res;
@@ -3031,6 +3035,11 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 		aux.tp_status = TP_STATUS_USER;
 		if (skb->ip_summed == CHECKSUM_PARTIAL)
 			aux.tp_status |= TP_STATUS_CSUMNOTREADY;
+		else if (skb->pkt_type != PACKET_OUTGOING &&
+			 (skb->ip_summed == CHECKSUM_COMPLETE ||
+			  skb_csum_unnecessary(skb)))
+			aux.tp_status |= TP_STATUS_CSUM_VALID;
+
 		aux.tp_len = origlen;
 		aux.tp_snaplen = skb->len;
 		aux.tp_mac = 0;
-- 
cgit 


From 3d1bec99320d4e96897805440f8cf4f68eff226b Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Mon, 23 Mar 2015 23:36:00 +0100
Subject: ipv6: introduce secret_stable to ipv6_devconf
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch implements the procfs logic for the stable_address knob:
The secret is formatted as an ipv6 address and will be stored per
interface and per namespace. We track initialized flag and return EIO
errors until the secret is set.

We don't inherit the secret to newly created namespaces.

Cc: Erik Kline <ek@google.com>
Cc: Fernando Gont <fgont@si6networks.com>
Cc: Lorenzo Colitti <lorenzo@google.com>
Cc: YOSHIFUJI Hideaki/吉藤英明 <hideaki.yoshifuji@miraclelinux.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h      |  4 +++
 include/uapi/linux/ipv6.h |  1 +
 net/ipv6/addrconf.c       | 68 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 73 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 4d5169f5d7d1..82806c60aa42 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -53,6 +53,10 @@ struct ipv6_devconf {
 	__s32           ndisc_notify;
 	__s32		suppress_frag_ndisc;
 	__s32		accept_ra_mtu;
+	struct ipv6_stable_secret {
+		bool initialized;
+		struct in6_addr secret;
+	} stable_secret;
 	void		*sysctl;
 };
 
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index 437a6a4b125a..5efa54ae567c 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -170,6 +170,7 @@ enum {
 	DEVCONF_ACCEPT_RA_FROM_LOCAL,
 	DEVCONF_USE_OPTIMISTIC,
 	DEVCONF_ACCEPT_RA_MTU,
+	DEVCONF_STABLE_SECRET,
 	DEVCONF_MAX
 };
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 158378e73f0a..5b967c8a617a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -46,6 +46,7 @@
 #include <linux/socket.h>
 #include <linux/sockios.h>
 #include <linux/net.h>
+#include <linux/inet.h>
 #include <linux/in6.h>
 #include <linux/netdevice.h>
 #include <linux/if_addr.h>
@@ -102,6 +103,9 @@
 
 #define	INFINITY_LIFE_TIME	0xFFFFFFFF
 
+#define IPV6_MAX_STRLEN \
+	sizeof("ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255")
+
 static inline u32 cstamp_delta(unsigned long cstamp)
 {
 	return (cstamp - INITIAL_JIFFIES) * 100UL / HZ;
@@ -202,6 +206,9 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
 	.accept_dad		= 1,
 	.suppress_frag_ndisc	= 1,
 	.accept_ra_mtu		= 1,
+	.stable_secret		= {
+		.initialized = false,
+	}
 };
 
 static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -240,6 +247,9 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
 	.accept_dad		= 1,
 	.suppress_frag_ndisc	= 1,
 	.accept_ra_mtu		= 1,
+	.stable_secret		= {
+		.initialized = false,
+	},
 };
 
 /* Check if a valid qdisc is available */
@@ -4430,6 +4440,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 	array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc;
 	array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local;
 	array[DEVCONF_ACCEPT_RA_MTU] = cnf->accept_ra_mtu;
+	/* we omit DEVCONF_STABLE_SECRET for now */
 }
 
 static inline size_t inet6_ifla6_size(void)
@@ -5074,6 +5085,53 @@ int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write,
 	return ret;
 }
 
+static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write,
+					 void __user *buffer, size_t *lenp,
+					 loff_t *ppos)
+{
+	int err;
+	struct in6_addr addr;
+	char str[IPV6_MAX_STRLEN];
+	struct ctl_table lctl = *ctl;
+	struct ipv6_stable_secret *secret = ctl->data;
+
+	lctl.maxlen = IPV6_MAX_STRLEN;
+	lctl.data = str;
+
+	if (!rtnl_trylock())
+		return restart_syscall();
+
+	if (!write && !secret->initialized) {
+		err = -EIO;
+		goto out;
+	}
+
+	if (!write) {
+		err = snprintf(str, sizeof(str), "%pI6",
+			       &secret->secret);
+		if (err >= sizeof(str)) {
+			err = -EIO;
+			goto out;
+		}
+	}
+
+	err = proc_dostring(&lctl, write, buffer, lenp, ppos);
+	if (err || !write)
+		goto out;
+
+	if (in6_pton(str, -1, addr.in6_u.u6_addr8, -1, NULL) != 1) {
+		err = -EIO;
+		goto out;
+	}
+
+	secret->initialized = true;
+	secret->secret = addr;
+
+out:
+	rtnl_unlock();
+
+	return err;
+}
 
 static struct addrconf_sysctl_table
 {
@@ -5346,6 +5404,13 @@ static struct addrconf_sysctl_table
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec,
 		},
+		{
+			.procname	= "stable_secret",
+			.data		= &ipv6_devconf.stable_secret,
+			.maxlen		= IPV6_MAX_STRLEN,
+			.mode		= 0600,
+			.proc_handler	= addrconf_sysctl_stable_secret,
+		},
 		{
 			/* sentinel */
 		}
@@ -5442,6 +5507,9 @@ static int __net_init addrconf_init_net(struct net *net)
 	dflt->autoconf = ipv6_defaults.autoconf;
 	dflt->disable_ipv6 = ipv6_defaults.disable_ipv6;
 
+	dflt->stable_secret.initialized = false;
+	all->stable_secret.initialized = false;
+
 	net->ipv6.devconf_all = all;
 	net->ipv6.devconf_dflt = dflt;
 
-- 
cgit 


From 622c81d57b392cc9be836670eb464a4dfaa9adfe Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Mon, 23 Mar 2015 23:36:01 +0100
Subject: ipv6: generation of stable privacy addresses for link-local and
 autoconf
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch implements the stable privacy address generation for
link-local and autoconf addresses as specified in RFC7217.

  RID = F(Prefix, Net_Iface, Network_ID, DAD_Counter, secret_key)

is the RID (random identifier). As the hash function F we chose one
round of sha1. Prefix will be either the link-local prefix or the
router advertised one. As Net_Iface we use the MAC address of the
device. DAD_Counter and secret_key are implemented as specified.

We don't use Network_ID, as it couples the code too closely to other
subsystems. It is specified as optional in the RFC.

As Net_Iface we only use the MAC address: we simply have no stable
identifier in the kernel we could possibly use: because this code might
run very early, we cannot depend on names, as they might be changed by
user space early on during the boot process.

A new address generation mode is introduced,
IN6_ADDR_GEN_MODE_STABLE_PRIVACY. With iproute2 one can switch back to
none or eui64 address configuration mode although the stable_secret is
already set.

We refuse writes to ipv6/conf/all/stable_secret but only allow
ipv6/conf/default/stable_secret and the interface specific file to be
written to. The default stable_secret is used as the parameter for the
namespace, the interface specific can overwrite the secret, e.g. when
switching a network configuration from one system to another while
inheriting the secret.

Cc: Erik Kline <ek@google.com>
Cc: Fernando Gont <fgont@si6networks.com>
Cc: Lorenzo Colitti <lorenzo@google.com>
Cc: YOSHIFUJI Hideaki/吉藤英明 <hideaki.yoshifuji@miraclelinux.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_link.h |   1 +
 net/ipv6/addrconf.c          | 130 +++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 127 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index f5f5edd5ae5f..7ffb18df01ca 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -216,6 +216,7 @@ enum {
 enum in6_addr_gen_mode {
 	IN6_ADDR_GEN_MODE_EUI64,
 	IN6_ADDR_GEN_MODE_NONE,
+	IN6_ADDR_GEN_MODE_STABLE_PRIVACY,
 };
 
 /* Bridge section */
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 5b967c8a617a..6813268ce8b8 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -131,6 +131,9 @@ static void ipv6_regen_rndid(unsigned long data);
 
 static int ipv6_generate_eui64(u8 *eui, struct net_device *dev);
 static int ipv6_count_addresses(struct inet6_dev *idev);
+static int ipv6_generate_stable_address(struct in6_addr *addr,
+					u8 dad_count,
+					const struct inet6_dev *idev);
 
 /*
  *	Configured unicast address hash table
@@ -2302,6 +2305,11 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
 				       in6_dev->token.s6_addr + 8, 8);
 				read_unlock_bh(&in6_dev->lock);
 				tokenized = true;
+			} else if (in6_dev->addr_gen_mode ==
+				   IN6_ADDR_GEN_MODE_STABLE_PRIVACY &&
+				   !ipv6_generate_stable_address(&addr, 0,
+								 in6_dev)) {
+				goto ok;
 			} else if (ipv6_generate_eui64(addr.s6_addr + 8, dev) &&
 				   ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) {
 				in6_dev_put(in6_dev);
@@ -2820,12 +2828,98 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr
 	}
 }
 
+static bool ipv6_reserved_interfaceid(struct in6_addr address)
+{
+	if ((address.s6_addr32[2] | address.s6_addr32[3]) == 0)
+		return true;
+
+	if (address.s6_addr32[2] == htonl(0x02005eff) &&
+	    ((address.s6_addr32[3] & htonl(0xfe000000)) == htonl(0xfe000000)))
+		return true;
+
+	if (address.s6_addr32[2] == htonl(0xfdffffff) &&
+	    ((address.s6_addr32[3] & htonl(0xffffff80)) == htonl(0xffffff80)))
+		return true;
+
+	return false;
+}
+
+static int ipv6_generate_stable_address(struct in6_addr *address,
+					u8 dad_count,
+					const struct inet6_dev *idev)
+{
+	static const int idgen_retries = 3;
+
+	static DEFINE_SPINLOCK(lock);
+	static __u32 digest[SHA_DIGEST_WORDS];
+	static __u32 workspace[SHA_WORKSPACE_WORDS];
+
+	static union {
+		char __data[SHA_MESSAGE_BYTES];
+		struct {
+			struct in6_addr secret;
+			__be64 prefix;
+			unsigned char hwaddr[MAX_ADDR_LEN];
+			u8 dad_count;
+		} __packed;
+	} data;
+
+	struct in6_addr secret;
+	struct in6_addr temp;
+	struct net *net = dev_net(idev->dev);
+
+	BUILD_BUG_ON(sizeof(data.__data) != sizeof(data));
+
+	if (idev->cnf.stable_secret.initialized)
+		secret = idev->cnf.stable_secret.secret;
+	else if (net->ipv6.devconf_dflt->stable_secret.initialized)
+		secret = net->ipv6.devconf_dflt->stable_secret.secret;
+	else
+		return -1;
+
+retry:
+	spin_lock_bh(&lock);
+
+	sha_init(digest);
+	memset(&data, 0, sizeof(data));
+	memset(workspace, 0, sizeof(workspace));
+	memcpy(data.hwaddr, idev->dev->perm_addr, idev->dev->addr_len);
+	data.prefix = ((__be64)address->s6_addr32[0] << 32) |
+		       (__be64)address->s6_addr32[1];
+	data.secret = secret;
+	data.dad_count = dad_count;
+
+	sha_transform(digest, data.__data, workspace);
+
+	temp = *address;
+	temp.s6_addr32[2] = digest[0];
+	temp.s6_addr32[3] = digest[1];
+
+	spin_unlock_bh(&lock);
+
+	if (ipv6_reserved_interfaceid(temp)) {
+		dad_count++;
+		if (dad_count > idgen_retries)
+			return -1;
+		goto retry;
+	}
+
+	*address = temp;
+	return 0;
+}
+
 static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route)
 {
-	if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) {
-		struct in6_addr addr;
+	struct in6_addr addr;
+
+	ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0);
 
-		ipv6_addr_set(&addr,  htonl(0xFE800000), 0, 0, 0);
+	if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY) {
+		if (!ipv6_generate_stable_address(&addr, 0, idev))
+			addrconf_add_linklocal(idev, &addr);
+		else if (prefix_route)
+			addrconf_prefix_route(&addr, 64, idev->dev, 0, 0);
+	} else if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) {
 		/* addrconf_add_linklocal also adds a prefix_route and we
 		 * only need to care about prefix routes if ipv6_generate_eui64
 		 * couldn't generate one.
@@ -4675,8 +4769,15 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla)
 		u8 mode = nla_get_u8(tb[IFLA_INET6_ADDR_GEN_MODE]);
 
 		if (mode != IN6_ADDR_GEN_MODE_EUI64 &&
-		    mode != IN6_ADDR_GEN_MODE_NONE)
+		    mode != IN6_ADDR_GEN_MODE_NONE &&
+		    mode != IN6_ADDR_GEN_MODE_STABLE_PRIVACY)
+			return -EINVAL;
+
+		if (mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY &&
+		    !idev->cnf.stable_secret.initialized &&
+		    !dev_net(dev)->ipv6.devconf_dflt->stable_secret.initialized)
 			return -EINVAL;
+
 		idev->addr_gen_mode = mode;
 		err = 0;
 	}
@@ -5093,8 +5194,12 @@ static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write,
 	struct in6_addr addr;
 	char str[IPV6_MAX_STRLEN];
 	struct ctl_table lctl = *ctl;
+	struct net *net = ctl->extra2;
 	struct ipv6_stable_secret *secret = ctl->data;
 
+	if (&net->ipv6.devconf_all->stable_secret == ctl->data)
+		return -EIO;
+
 	lctl.maxlen = IPV6_MAX_STRLEN;
 	lctl.data = str;
 
@@ -5127,6 +5232,23 @@ static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write,
 	secret->initialized = true;
 	secret->secret = addr;
 
+	if (&net->ipv6.devconf_dflt->stable_secret == ctl->data) {
+		struct net_device *dev;
+
+		for_each_netdev(net, dev) {
+			struct inet6_dev *idev = __in6_dev_get(dev);
+
+			if (idev) {
+				idev->addr_gen_mode =
+					IN6_ADDR_GEN_MODE_STABLE_PRIVACY;
+			}
+		}
+	} else {
+		struct inet6_dev *idev = ctl->extra1;
+
+		idev->addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY;
+	}
+
 out:
 	rtnl_unlock();
 
-- 
cgit 


From 64236f3f3d742469e4027b83a9515e84e9ab21b4 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Mon, 23 Mar 2015 23:36:02 +0100
Subject: ipv6: introduce IFA_F_STABLE_PRIVACY flag
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We need to mark appropriate addresses so we can do retries in case their
DAD failed.

Cc: Erik Kline <ek@google.com>
Cc: Fernando Gont <fgont@si6networks.com>
Cc: Lorenzo Colitti <lorenzo@google.com>
Cc: YOSHIFUJI Hideaki/吉藤英明 <hideaki.yoshifuji@miraclelinux.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_addr.h |  1 +
 net/ipv6/addrconf.c          | 14 ++++++++------
 2 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_addr.h b/include/uapi/linux/if_addr.h
index 40fdfea39714..4318ab1635ce 100644
--- a/include/uapi/linux/if_addr.h
+++ b/include/uapi/linux/if_addr.h
@@ -51,6 +51,7 @@ enum {
 #define IFA_F_MANAGETEMPADDR	0x100
 #define IFA_F_NOPREFIXROUTE	0x200
 #define IFA_F_MCAUTOJOIN	0x400
+#define IFA_F_STABLE_PRIVACY	0x800
 
 struct ifa_cacheinfo {
 	__u32	ifa_prefered;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 6813268ce8b8..c2357b6f62dd 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2199,6 +2199,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
 	__u32 valid_lft;
 	__u32 prefered_lft;
 	int addr_type;
+	u32 addr_flags = 0;
 	struct inet6_dev *in6_dev;
 	struct net *net = dev_net(dev);
 
@@ -2309,6 +2310,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
 				   IN6_ADDR_GEN_MODE_STABLE_PRIVACY &&
 				   !ipv6_generate_stable_address(&addr, 0,
 								 in6_dev)) {
+				addr_flags |= IFA_F_STABLE_PRIVACY;
 				goto ok;
 			} else if (ipv6_generate_eui64(addr.s6_addr + 8, dev) &&
 				   ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) {
@@ -2328,7 +2330,6 @@ ok:
 
 		if (ifp == NULL && valid_lft) {
 			int max_addresses = in6_dev->cnf.max_addresses;
-			u32 addr_flags = 0;
 
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 			if (in6_dev->cnf.optimistic_dad &&
@@ -2807,10 +2808,11 @@ static void init_loopback(struct net_device *dev)
 	}
 }
 
-static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr *addr)
+static void addrconf_add_linklocal(struct inet6_dev *idev,
+				   const struct in6_addr *addr, u32 flags)
 {
 	struct inet6_ifaddr *ifp;
-	u32 addr_flags = IFA_F_PERMANENT;
+	u32 addr_flags = flags | IFA_F_PERMANENT;
 
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 	if (idev->cnf.optimistic_dad &&
@@ -2818,7 +2820,6 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr
 		addr_flags |= IFA_F_OPTIMISTIC;
 #endif
 
-
 	ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags,
 			    INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);
 	if (!IS_ERR(ifp)) {
@@ -2916,7 +2917,8 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route)
 
 	if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY) {
 		if (!ipv6_generate_stable_address(&addr, 0, idev))
-			addrconf_add_linklocal(idev, &addr);
+			addrconf_add_linklocal(idev, &addr,
+					       IFA_F_STABLE_PRIVACY);
 		else if (prefix_route)
 			addrconf_prefix_route(&addr, 64, idev->dev, 0, 0);
 	} else if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) {
@@ -2925,7 +2927,7 @@ static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route)
 		 * couldn't generate one.
 		 */
 		if (ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) == 0)
-			addrconf_add_linklocal(idev, &addr);
+			addrconf_add_linklocal(idev, &addr, 0);
 		else if (prefix_route)
 			addrconf_prefix_route(&addr, 64, idev->dev, 0, 0);
 	}
-- 
cgit 


From 27cd5452476978283decb19e429e81fc6c71e74b Mon Sep 17 00:00:00 2001
From: Michal Sekletar <msekleta@redhat.com>
Date: Tue, 24 Mar 2015 14:48:41 +0100
Subject: filter: introduce SKF_AD_VLAN_TPID BPF extension

If vlan offloading takes place then vlan header is removed from frame
and its contents, both vlan_tci and vlan_proto, is available to user
space via TPACKET interface. However, only vlan_tci can be used in BPF
filters.

This commit introduces a new BPF extension. It makes possible to load
the value of vlan_proto (vlan TPID) to register A. Support for classic
BPF and eBPF is being added, analogous to skb->protocol.

Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Jiri Pirko <jpirko@redhat.com>

Signed-off-by: Michal Sekletar <msekleta@redhat.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/filter.txt |  3 ++-
 include/linux/filter.h              |  1 +
 include/uapi/linux/bpf.h            |  1 +
 include/uapi/linux/filter.h         |  3 ++-
 net/core/filter.c                   | 17 +++++++++++++++++
 tools/net/bpf_exp.l                 |  2 ++
 tools/net/bpf_exp.y                 | 11 ++++++++++-
 7 files changed, 35 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index 9930ecfbb465..135581f015e1 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -280,7 +280,8 @@ Possible BPF extensions are shown in the following table:
   rxhash                                skb->hash
   cpu                                   raw_smp_processor_id()
   vlan_tci                              skb_vlan_tag_get(skb)
-  vlan_pr                               skb_vlan_tag_present(skb)
+  vlan_avail                            skb_vlan_tag_present(skb)
+  vlan_tpid                             skb->vlan_proto
   rand                                  prandom_u32()
 
 These extensions can also be prefixed with '#'.
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 9ee8c67ea249..fa11b3a367be 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -454,6 +454,7 @@ static inline u16 bpf_anc_helper(const struct sock_filter *ftest)
 		BPF_ANCILLARY(VLAN_TAG_PRESENT);
 		BPF_ANCILLARY(PAY_OFFSET);
 		BPF_ANCILLARY(RANDOM);
+		BPF_ANCILLARY(VLAN_TPID);
 		}
 		/* Fallthrough. */
 	default:
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 3dd314a45d0d..27dc4ec58840 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -182,6 +182,7 @@ struct __sk_buff {
 	__u32 protocol;
 	__u32 vlan_present;
 	__u32 vlan_tci;
+	__u32 vlan_proto;
 };
 
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/include/uapi/linux/filter.h b/include/uapi/linux/filter.h
index 47785d5ecf17..34c7936ca114 100644
--- a/include/uapi/linux/filter.h
+++ b/include/uapi/linux/filter.h
@@ -77,7 +77,8 @@ struct sock_fprog {	/* Required for SO_ATTACH_FILTER. */
 #define SKF_AD_VLAN_TAG_PRESENT 48
 #define SKF_AD_PAY_OFFSET	52
 #define SKF_AD_RANDOM	56
-#define SKF_AD_MAX	60
+#define SKF_AD_VLAN_TPID	60
+#define SKF_AD_MAX	64
 #define SKF_NET_OFF   (-0x100000)
 #define SKF_LL_OFF    (-0x200000)
 
diff --git a/net/core/filter.c b/net/core/filter.c
index 084eacc4d1d4..32f43c59908c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -272,6 +272,16 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		insn += cnt - 1;
 		break;
 
+	case SKF_AD_OFF + SKF_AD_VLAN_TPID:
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2);
+
+		/* A = *(u16 *) (CTX + offsetof(vlan_proto)) */
+		*insn++ = BPF_LDX_MEM(BPF_H, BPF_REG_A, BPF_REG_CTX,
+				      offsetof(struct sk_buff, vlan_proto));
+		/* A = ntohs(A) [emitting a nop or swap16] */
+		*insn = BPF_ENDIAN(BPF_FROM_BE, BPF_REG_A, 16);
+		break;
+
 	case SKF_AD_OFF + SKF_AD_PAY_OFFSET:
 	case SKF_AD_OFF + SKF_AD_NLATTR:
 	case SKF_AD_OFF + SKF_AD_NLATTR_NEST:
@@ -1226,6 +1236,13 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
 				      offsetof(struct sk_buff, protocol));
 		break;
 
+	case offsetof(struct __sk_buff, vlan_proto):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, vlan_proto) != 2);
+
+		*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
+				      offsetof(struct sk_buff, vlan_proto));
+		break;
+
 	case offsetof(struct __sk_buff, mark):
 		return convert_skb_access(SKF_AD_MARK, dst_reg, src_reg, insn);
 
diff --git a/tools/net/bpf_exp.l b/tools/net/bpf_exp.l
index 833a96611da6..c83af3fb77de 100644
--- a/tools/net/bpf_exp.l
+++ b/tools/net/bpf_exp.l
@@ -92,6 +92,8 @@ extern void yyerror(const char *str);
 "#"?("cpu")	{ return K_CPU; }
 "#"?("vlan_tci") { return K_VLANT; }
 "#"?("vlan_pr")	{ return K_VLANP; }
+"#"?("vlan_avail")	{ return K_VLANP; }
+"#"?("vlan_tpid")	{ return K_VLANTPID; }
 "#"?("rand")	{ return K_RAND; }
 
 ":"		{ return ':'; }
diff --git a/tools/net/bpf_exp.y b/tools/net/bpf_exp.y
index e6306c51c26f..f8332749b44c 100644
--- a/tools/net/bpf_exp.y
+++ b/tools/net/bpf_exp.y
@@ -56,7 +56,7 @@ static void bpf_set_jmp_label(char *label, enum jmp_type type);
 %token OP_LDXI
 
 %token K_PKT_LEN K_PROTO K_TYPE K_NLATTR K_NLATTR_NEST K_MARK K_QUEUE K_HATYPE
-%token K_RXHASH K_CPU K_IFIDX K_VLANT K_VLANP K_POFF K_RAND
+%token K_RXHASH K_CPU K_IFIDX K_VLANT K_VLANP K_VLANTPID K_POFF K_RAND
 
 %token ':' ',' '[' ']' '(' ')' 'x' 'a' '+' 'M' '*' '&' '#' '%'
 
@@ -167,6 +167,9 @@ ldb
 	| OP_LDB K_RAND {
 		bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
 				   SKF_AD_OFF + SKF_AD_RANDOM); }
+	| OP_LDB K_VLANTPID {
+		bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
+				   SKF_AD_OFF + SKF_AD_VLAN_TPID); }
 	;
 
 ldh
@@ -218,6 +221,9 @@ ldh
 	| OP_LDH K_RAND {
 		bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
 				   SKF_AD_OFF + SKF_AD_RANDOM); }
+	| OP_LDH K_VLANTPID {
+		bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
+				   SKF_AD_OFF + SKF_AD_VLAN_TPID); }
 	;
 
 ldi
@@ -274,6 +280,9 @@ ld
 	| OP_LD K_RAND {
 		bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
 				   SKF_AD_OFF + SKF_AD_RANDOM); }
+	| OP_LD K_VLANTPID {
+		bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
+				   SKF_AD_OFF + SKF_AD_VLAN_TPID); }
 	| OP_LD 'M' '[' number ']' {
 		bpf_set_curr_instr(BPF_LD | BPF_MEM, 0, 0, $4); }
 	| OP_LD '[' 'x' '+' number ']' {
-- 
cgit 


From dd46c787788d5bf5b974729d43e4c405814a4c7d Mon Sep 17 00:00:00 2001
From: Namjae Jeon <namjae.jeon@samsung.com>
Date: Wed, 25 Mar 2015 15:07:05 +1100
Subject: fs: Add support FALLOC_FL_INSERT_RANGE for fallocate

FALLOC_FL_INSERT_RANGE command is the opposite command of
FALLOC_FL_COLLAPSE_RANGE that is needed for someone who wants to add
some data in the middle of file.

FALLOC_FL_INSERT_RANGE will create space for writing new data within
a file after shifting extents to right as given length. This command
also has same limitations as FALLOC_FL_COLLAPSE_RANGE in that
operations need to be filesystem block boundary aligned and cannot
cross the current EOF.

Signed-off-by: Namjae Jeon <namjae.jeon@samsung.com>
Signed-off-by: Ashish Sangwan <a.sangwan@samsung.com>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Signed-off-by: Dave Chinner <david@fromorbit.com>
---
 fs/open.c                   |  8 ++++++--
 include/linux/falloc.h      |  6 ++++++
 include/uapi/linux/falloc.h | 17 +++++++++++++++++
 3 files changed, 29 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/fs/open.c b/fs/open.c
index 33f9cbf2610b..b724cc0e0228 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -231,8 +231,7 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 		return -EINVAL;
 
 	/* Return error if mode is not supported */
-	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
-		     FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
+	if (mode & ~FALLOC_FL_SUPPORTED_MASK)
 		return -EOPNOTSUPP;
 
 	/* Punch hole and zero range are mutually exclusive */
@@ -250,6 +249,11 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 	    (mode & ~FALLOC_FL_COLLAPSE_RANGE))
 		return -EINVAL;
 
+	/* Insert range should only be used exclusively. */
+	if ((mode & FALLOC_FL_INSERT_RANGE) &&
+	    (mode & ~FALLOC_FL_INSERT_RANGE))
+		return -EINVAL;
+
 	if (!(file->f_mode & FMODE_WRITE))
 		return -EBADF;
 
diff --git a/include/linux/falloc.h b/include/linux/falloc.h
index 31591686ac2d..996111000a8c 100644
--- a/include/linux/falloc.h
+++ b/include/linux/falloc.h
@@ -21,4 +21,10 @@ struct space_resv {
 #define FS_IOC_RESVSP		_IOW('X', 40, struct space_resv)
 #define FS_IOC_RESVSP64		_IOW('X', 42, struct space_resv)
 
+#define	FALLOC_FL_SUPPORTED_MASK	(FALLOC_FL_KEEP_SIZE |		\
+					 FALLOC_FL_PUNCH_HOLE |		\
+					 FALLOC_FL_COLLAPSE_RANGE |	\
+					 FALLOC_FL_ZERO_RANGE |		\
+					 FALLOC_FL_INSERT_RANGE)
+
 #endif /* _FALLOC_H_ */
diff --git a/include/uapi/linux/falloc.h b/include/uapi/linux/falloc.h
index d1197ae3723c..3e445a760f14 100644
--- a/include/uapi/linux/falloc.h
+++ b/include/uapi/linux/falloc.h
@@ -41,4 +41,21 @@
  */
 #define FALLOC_FL_ZERO_RANGE		0x10
 
+/*
+ * FALLOC_FL_INSERT_RANGE is use to insert space within the file size without
+ * overwriting any existing data. The contents of the file beyond offset are
+ * shifted towards right by len bytes to create a hole.  As such, this
+ * operation will increase the size of the file by len bytes.
+ *
+ * Different filesystems may implement different limitations on the granularity
+ * of the operation. Most will limit operations to filesystem block size
+ * boundaries, but this boundary may be larger or smaller depending on
+ * the filesystem and/or the configuration of the filesystem or file.
+ *
+ * Attempting to insert space using this flag at OR beyond the end of
+ * the file is considered an illegal operation - just use ftruncate(2) or
+ * fallocate(2) with mode 0 for such type of operations.
+ */
+#define FALLOC_FL_INSERT_RANGE		0x20
+
 #endif /* _UAPI_FALLOC_H_ */
-- 
cgit 


From 34f439278cef7b1177f8ce24f9fc81dfc6221d3b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 20 Feb 2015 14:05:38 +0100
Subject: perf: Add per event clockid support

While thinking on the whole clock discussion it occurred to me we have
two distinct uses of time:

 1) the tracking of event/ctx/cgroup enabled/running/stopped times
    which includes the self-monitoring support in struct
    perf_event_mmap_page.

 2) the actual timestamps visible in the data records.

And we've been conflating them.

The first is all about tracking time deltas, nobody should really care
in what time base that happens, its all relative information, as long
as its internally consistent it works.

The second however is what people are worried about when having to
merge their data with external sources. And here we have the
discussion on MONOTONIC vs MONOTONIC_RAW etc..

Where MONOTONIC is good for correlating between machines (static
offset), MONOTNIC_RAW is required for correlating against a fixed rate
hardware clock.

This means configurability; now 1) makes that hard because it needs to
be internally consistent across groups of unrelated events; which is
why we had to have a global perf_clock().

However, for 2) it doesn't really matter, perf itself doesn't care
what it writes into the buffer.

The below patch makes the distinction between these two cases by
adding perf_event_clock() which is used for the second case. It
further makes this configurable on a per-event basis, but adds a few
sanity checks such that we cannot combine events with different clocks
in confusing ways.

And since we then have per-event configurability we might as well
retain the 'legacy' behaviour as a default.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event.c | 14 ++++++--
 include/linux/perf_event.h       |  2 ++
 include/uapi/linux/perf_event.h  |  6 ++--
 kernel/events/core.c             | 77 ++++++++++++++++++++++++++++++++++++++--
 4 files changed, 91 insertions(+), 8 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index ac41b3ad1fc9..0420ebcac116 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1978,13 +1978,23 @@ void arch_perf_update_userpage(struct perf_event *event,
 
 	data = cyc2ns_read_begin();
 
+	/*
+	 * Internal timekeeping for enabled/running/stopped times
+	 * is always in the local_clock domain.
+	 */
 	userpg->cap_user_time = 1;
 	userpg->time_mult = data->cyc2ns_mul;
 	userpg->time_shift = data->cyc2ns_shift;
 	userpg->time_offset = data->cyc2ns_offset - now;
 
-	userpg->cap_user_time_zero = 1;
-	userpg->time_zero = data->cyc2ns_offset;
+	/*
+	 * cap_user_time_zero doesn't make sense when we're using a different
+	 * time base for the records.
+	 */
+	if (event->clock == &local_clock) {
+		userpg->cap_user_time_zero = 1;
+		userpg->time_zero = data->cyc2ns_offset;
+	}
 
 	cyc2ns_read_end(data);
 }
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index b16eac5f54ce..401554074de9 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -173,6 +173,7 @@ struct perf_event;
  * pmu::capabilities flags
  */
 #define PERF_PMU_CAP_NO_INTERRUPT		0x01
+#define PERF_PMU_CAP_NO_NMI			0x02
 
 /**
  * struct pmu - generic performance monitoring unit
@@ -457,6 +458,7 @@ struct perf_event {
 	struct pid_namespace		*ns;
 	u64				id;
 
+	u64				(*clock)(void);
 	perf_overflow_handler_t		overflow_handler;
 	void				*overflow_handler_context;
 
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 1e3cd07cf76e..3bb40ddadbe5 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -326,7 +326,8 @@ struct perf_event_attr {
 				exclude_callchain_user   : 1, /* exclude user callchains */
 				mmap2          :  1, /* include mmap with inode data     */
 				comm_exec      :  1, /* flag comm events that are due to an exec */
-				__reserved_1   : 39;
+				use_clockid    :  1, /* use @clockid for time fields */
+				__reserved_1   : 38;
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
@@ -355,8 +356,7 @@ struct perf_event_attr {
 	 */
 	__u32	sample_stack_user;
 
-	/* Align to u64. */
-	__u32	__reserved_2;
+	__s32	clockid;
 	/*
 	 * Defines set of regs to dump for each sample
 	 * state captured on:
diff --git a/kernel/events/core.c b/kernel/events/core.c
index bb1a7c36e794..c40c2cac2d8e 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -327,6 +327,11 @@ static inline u64 perf_clock(void)
 	return local_clock();
 }
 
+static inline u64 perf_event_clock(struct perf_event *event)
+{
+	return event->clock();
+}
+
 static inline struct perf_cpu_context *
 __get_cpu_context(struct perf_event_context *ctx)
 {
@@ -4762,7 +4767,7 @@ static void __perf_event_header__init_id(struct perf_event_header *header,
 	}
 
 	if (sample_type & PERF_SAMPLE_TIME)
-		data->time = perf_clock();
+		data->time = perf_event_clock(event);
 
 	if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER))
 		data->id = primary_event_id(event);
@@ -5340,6 +5345,8 @@ static void perf_event_task_output(struct perf_event *event,
 	task_event->event_id.tid = perf_event_tid(event, task);
 	task_event->event_id.ptid = perf_event_tid(event, current);
 
+	task_event->event_id.time = perf_event_clock(event);
+
 	perf_output_put(&handle, task_event->event_id);
 
 	perf_event__output_id_sample(event, &handle, &sample);
@@ -5373,7 +5380,7 @@ static void perf_event_task(struct task_struct *task,
 			/* .ppid */
 			/* .tid  */
 			/* .ptid */
-			.time = perf_clock(),
+			/* .time */
 		},
 	};
 
@@ -5749,7 +5756,7 @@ static void perf_log_throttle(struct perf_event *event, int enable)
 			.misc = 0,
 			.size = sizeof(throttle_event),
 		},
-		.time		= perf_clock(),
+		.time		= perf_event_clock(event),
 		.id		= primary_event_id(event),
 		.stream_id	= event->id,
 	};
@@ -6293,6 +6300,8 @@ static int perf_swevent_init(struct perf_event *event)
 static struct pmu perf_swevent = {
 	.task_ctx_nr	= perf_sw_context,
 
+	.capabilities	= PERF_PMU_CAP_NO_NMI,
+
 	.event_init	= perf_swevent_init,
 	.add		= perf_swevent_add,
 	.del		= perf_swevent_del,
@@ -6636,6 +6645,8 @@ static int cpu_clock_event_init(struct perf_event *event)
 static struct pmu perf_cpu_clock = {
 	.task_ctx_nr	= perf_sw_context,
 
+	.capabilities	= PERF_PMU_CAP_NO_NMI,
+
 	.event_init	= cpu_clock_event_init,
 	.add		= cpu_clock_event_add,
 	.del		= cpu_clock_event_del,
@@ -6715,6 +6726,8 @@ static int task_clock_event_init(struct perf_event *event)
 static struct pmu perf_task_clock = {
 	.task_ctx_nr	= perf_sw_context,
 
+	.capabilities	= PERF_PMU_CAP_NO_NMI,
+
 	.event_init	= task_clock_event_init,
 	.add		= task_clock_event_add,
 	.del		= task_clock_event_del,
@@ -7200,6 +7213,10 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
 		event->hw.target = task;
 	}
 
+	event->clock = &local_clock;
+	if (parent_event)
+		event->clock = parent_event->clock;
+
 	if (!overflow_handler && parent_event) {
 		overflow_handler = parent_event->overflow_handler;
 		context = parent_event->overflow_handler_context;
@@ -7422,6 +7439,12 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
 	if (output_event->cpu == -1 && output_event->ctx != event->ctx)
 		goto out;
 
+	/*
+	 * Mixing clocks in the same buffer is trouble you don't need.
+	 */
+	if (output_event->clock != event->clock)
+		goto out;
+
 set:
 	mutex_lock(&event->mmap_mutex);
 	/* Can't redirect output if we've got an active mmap() */
@@ -7454,6 +7477,43 @@ static void mutex_lock_double(struct mutex *a, struct mutex *b)
 	mutex_lock_nested(b, SINGLE_DEPTH_NESTING);
 }
 
+static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id)
+{
+	bool nmi_safe = false;
+
+	switch (clk_id) {
+	case CLOCK_MONOTONIC:
+		event->clock = &ktime_get_mono_fast_ns;
+		nmi_safe = true;
+		break;
+
+	case CLOCK_MONOTONIC_RAW:
+		event->clock = &ktime_get_raw_fast_ns;
+		nmi_safe = true;
+		break;
+
+	case CLOCK_REALTIME:
+		event->clock = &ktime_get_real_ns;
+		break;
+
+	case CLOCK_BOOTTIME:
+		event->clock = &ktime_get_boot_ns;
+		break;
+
+	case CLOCK_TAI:
+		event->clock = &ktime_get_tai_ns;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	if (!nmi_safe && !(event->pmu->capabilities & PERF_PMU_CAP_NO_NMI))
+		return -EINVAL;
+
+	return 0;
+}
+
 /**
  * sys_perf_event_open - open a performance event, associate it to a task/cpu
  *
@@ -7569,6 +7629,12 @@ SYSCALL_DEFINE5(perf_event_open,
 	 */
 	pmu = event->pmu;
 
+	if (attr.use_clockid) {
+		err = perf_event_set_clock(event, attr.clockid);
+		if (err)
+			goto err_alloc;
+	}
+
 	if (group_leader &&
 	    (is_software_event(event) != is_software_event(group_leader))) {
 		if (is_software_event(event)) {
@@ -7618,6 +7684,11 @@ SYSCALL_DEFINE5(perf_event_open,
 		 */
 		if (group_leader->group_leader != group_leader)
 			goto err_context;
+
+		/* All events in a group should have the same clock */
+		if (group_leader->clock != event->clock)
+			goto err_context;
+
 		/*
 		 * Do not allow to attach to a group in a different
 		 * task or CPU context:
-- 
cgit 


From 5fafd8748b366105e08c198892e9fe02ef15c021 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Mon, 8 Dec 2014 23:07:56 +0000
Subject: MIPS: KVM: Wire up FPU capability

Now that the code is in place for KVM to support FPU in MIPS KVM guests,
wire up the new KVM_CAP_MIPS_FPU capability.

For backwards compatibility, the capability must be explicitly enabled
in order to detect or make use of the FPU from the guest.

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Gleb Natapov <gleb@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: linux-mips@linux-mips.org
Cc: kvm@vger.kernel.org
Cc: linux-api@vger.kernel.org
Cc: linux-doc@vger.kernel.org
---
 Documentation/virtual/kvm/api.txt | 13 +++++++++++++
 arch/mips/kvm/mips.c              | 37 +++++++++++++++++++++++++++++++++++++
 include/uapi/linux/kvm.h          |  1 +
 3 files changed, 51 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index f3c198360785..a1e9bfa5fe9e 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3312,6 +3312,19 @@ Parameters: none
 This capability enables the in-kernel irqchip for s390. Please refer to
 "4.24 KVM_CREATE_IRQCHIP" for details.
 
+6.9 KVM_CAP_MIPS_FPU
+
+Architectures: mips
+Target: vcpu
+Parameters: args[0] is reserved for future use (should be 0).
+
+This capability allows the use of the host Floating Point Unit by the guest. It
+allows the Config1.FP bit to be set to enable the FPU in the guest. Once this is
+done the KVM_REG_MIPS_FPR_* and KVM_REG_MIPS_FCR_* registers can be accessed
+(depending on the current guest FPU register mode), and the Status.FR,
+Config5.FRE bits are accessible via the KVM API and also from the guest,
+depending on them being supported by the FPU.
+
 7. Capabilities that can be enabled on VMs
 ------------------------------------------
 
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 5e41afe15ae8..7f86cb73d05d 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -797,6 +797,30 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu,
 	return 0;
 }
 
+static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
+				     struct kvm_enable_cap *cap)
+{
+	int r = 0;
+
+	if (!kvm_vm_ioctl_check_extension(vcpu->kvm, cap->cap))
+		return -EINVAL;
+	if (cap->flags)
+		return -EINVAL;
+	if (cap->args[0])
+		return -EINVAL;
+
+	switch (cap->cap) {
+	case KVM_CAP_MIPS_FPU:
+		vcpu->arch.fpu_enabled = true;
+		break;
+	default:
+		r = -EINVAL;
+		break;
+	}
+
+	return r;
+}
+
 long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl,
 			 unsigned long arg)
 {
@@ -854,6 +878,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl,
 			r = kvm_vcpu_ioctl_interrupt(vcpu, &irq);
 			break;
 		}
+	case KVM_ENABLE_CAP: {
+		struct kvm_enable_cap cap;
+
+		r = -EFAULT;
+		if (copy_from_user(&cap, argp, sizeof(cap)))
+			goto out;
+		r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap);
+		break;
+	}
 	default:
 		r = -ENOIOCTLCMD;
 	}
@@ -962,11 +995,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 
 	switch (ext) {
 	case KVM_CAP_ONE_REG:
+	case KVM_CAP_ENABLE_CAP:
 		r = 1;
 		break;
 	case KVM_CAP_COALESCED_MMIO:
 		r = KVM_COALESCED_MMIO_PAGE_OFFSET;
 		break;
+	case KVM_CAP_MIPS_FPU:
+		r = !!cpu_has_fpu;
+		break;
 	default:
 		r = 0;
 		break;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 1162ef7a3fa1..ce49688976d2 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -802,6 +802,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_S390_MEM_OP 108
 #define KVM_CAP_S390_USER_STSI 109
 #define KVM_CAP_S390_SKEYS 110
+#define KVM_CAP_MIPS_FPU 111
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit 


From d952bd070f79b6dcbad52c03dbc41cbc8ba086c8 Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Mon, 8 Dec 2014 23:07:56 +0000
Subject: MIPS: KVM: Wire up MSA capability

Now that the code is in place for KVM to support MIPS SIMD Architecutre
(MSA) in MIPS guests, wire up the new KVM_CAP_MIPS_MSA capability.

For backwards compatibility, the capability must be explicitly enabled
in order to detect or make use of MSA from the guest.

The capability is not supported if the hardware supports MSA vector
partitioning, since the extra support cannot be tested yet and it
extends the state that the userland program would have to save.

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Gleb Natapov <gleb@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: linux-mips@linux-mips.org
Cc: kvm@vger.kernel.org
Cc: linux-api@vger.kernel.org
Cc: linux-doc@vger.kernel.org
---
 Documentation/virtual/kvm/api.txt | 12 ++++++++++++
 arch/mips/kvm/mips.c              | 18 ++++++++++++++++++
 include/uapi/linux/kvm.h          |  1 +
 3 files changed, 31 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 62809871814b..1490eb0ef798 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3335,6 +3335,18 @@ done the KVM_REG_MIPS_FPR_* and KVM_REG_MIPS_FCR_* registers can be accessed
 Config5.FRE bits are accessible via the KVM API and also from the guest,
 depending on them being supported by the FPU.
 
+6.10 KVM_CAP_MIPS_MSA
+
+Architectures: mips
+Target: vcpu
+Parameters: args[0] is reserved for future use (should be 0).
+
+This capability allows the use of the MIPS SIMD Architecture (MSA) by the guest.
+It allows the Config3.MSAP bit to be set to enable the use of MSA by the guest.
+Once this is done the KVM_REG_MIPS_VEC_* and KVM_REG_MIPS_MSA_* registers can be
+accessed, and the Config5.MSAEn bit is accessible via the KVM API and also from
+the guest.
+
 7. Capabilities that can be enabled on VMs
 ------------------------------------------
 
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 35d3146895f1..bb68e8d520e8 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -880,6 +880,9 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 	case KVM_CAP_MIPS_FPU:
 		vcpu->arch.fpu_enabled = true;
 		break;
+	case KVM_CAP_MIPS_MSA:
+		vcpu->arch.msa_enabled = true;
+		break;
 	default:
 		r = -EINVAL;
 		break;
@@ -1071,6 +1074,21 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_MIPS_FPU:
 		r = !!cpu_has_fpu;
 		break;
+	case KVM_CAP_MIPS_MSA:
+		/*
+		 * We don't support MSA vector partitioning yet:
+		 * 1) It would require explicit support which can't be tested
+		 *    yet due to lack of support in current hardware.
+		 * 2) It extends the state that would need to be saved/restored
+		 *    by e.g. QEMU for migration.
+		 *
+		 * When vector partitioning hardware becomes available, support
+		 * could be added by requiring a flag when enabling
+		 * KVM_CAP_MIPS_MSA capability to indicate that userland knows
+		 * to save/restore the appropriate extra state.
+		 */
+		r = cpu_has_msa && !(boot_cpu_data.msa_id & MSA_IR_WRPF);
+		break;
 	default:
 		r = 0;
 		break;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index ce49688976d2..05a2083f7a28 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -803,6 +803,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_S390_USER_STSI 109
 #define KVM_CAP_S390_SKEYS 110
 #define KVM_CAP_MIPS_FPU 111
+#define KVM_CAP_MIPS_MSA 112
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit 


From 271c865161c57cfabca45b93eaa712b19da365bc Mon Sep 17 00:00:00 2001
From: Gerd Hoffmann <kraxel@redhat.com>
Date: Fri, 27 Mar 2015 12:46:12 +1030
Subject: Add virtio-input driver.

virtio-input is basically evdev-events-over-virtio, so this driver isn't
much more than reading configuration from config space and forwarding
incoming events to the linux input layer.

Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 MAINTAINERS                       |   6 +
 drivers/virtio/Kconfig            |  10 +
 drivers/virtio/Makefile           |   1 +
 drivers/virtio/virtio_input.c     | 384 ++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/Kbuild         |   1 +
 include/uapi/linux/virtio_ids.h   |   1 +
 include/uapi/linux/virtio_input.h |  76 ++++++++
 7 files changed, 479 insertions(+)
 create mode 100644 drivers/virtio/virtio_input.c
 create mode 100644 include/uapi/linux/virtio_input.h

(limited to 'include/uapi/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 0e1abe8cc684..585e6cdd9186 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10435,6 +10435,12 @@ S:	Maintained
 F:	drivers/vhost/
 F:	include/uapi/linux/vhost.h
 
+VIRTIO INPUT DRIVER
+M:	Gerd Hoffmann <kraxel@redhat.com>
+S:	Maintained
+F:	drivers/virtio/virtio_input.c
+F:	include/uapi/linux/virtio_input.h
+
 VIA RHINE NETWORK DRIVER
 M:	Roger Luethi <rl@hellgate.ch>
 S:	Maintained
diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index b546da5d8ea3..cab9f3f63a38 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -48,6 +48,16 @@ config VIRTIO_BALLOON
 
 	 If unsure, say M.
 
+config VIRTIO_INPUT
+	tristate "Virtio input driver"
+	depends on VIRTIO
+	depends on INPUT
+	---help---
+	 This driver supports virtio input devices such as
+	 keyboards, mice and tablets.
+
+	 If unsure, say M.
+
  config VIRTIO_MMIO
 	tristate "Platform bus driver for memory mapped virtio devices"
 	depends on HAS_IOMEM
diff --git a/drivers/virtio/Makefile b/drivers/virtio/Makefile
index d85565b8ea46..41e30e3dc842 100644
--- a/drivers/virtio/Makefile
+++ b/drivers/virtio/Makefile
@@ -4,3 +4,4 @@ obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
 virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o
 virtio_pci-$(CONFIG_VIRTIO_PCI_LEGACY) += virtio_pci_legacy.o
 obj-$(CONFIG_VIRTIO_BALLOON) += virtio_balloon.o
+obj-$(CONFIG_VIRTIO_INPUT) += virtio_input.o
diff --git a/drivers/virtio/virtio_input.c b/drivers/virtio/virtio_input.c
new file mode 100644
index 000000000000..60e2a1677563
--- /dev/null
+++ b/drivers/virtio/virtio_input.c
@@ -0,0 +1,384 @@
+#include <linux/module.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/input.h>
+
+#include <uapi/linux/virtio_ids.h>
+#include <uapi/linux/virtio_input.h>
+
+struct virtio_input {
+	struct virtio_device       *vdev;
+	struct input_dev           *idev;
+	char                       name[64];
+	char                       serial[64];
+	char                       phys[64];
+	struct virtqueue           *evt, *sts;
+	struct virtio_input_event  evts[64];
+	spinlock_t                 lock;
+	bool                       ready;
+};
+
+static void virtinput_queue_evtbuf(struct virtio_input *vi,
+				   struct virtio_input_event *evtbuf)
+{
+	struct scatterlist sg[1];
+
+	sg_init_one(sg, evtbuf, sizeof(*evtbuf));
+	virtqueue_add_inbuf(vi->evt, sg, 1, evtbuf, GFP_ATOMIC);
+}
+
+static void virtinput_recv_events(struct virtqueue *vq)
+{
+	struct virtio_input *vi = vq->vdev->priv;
+	struct virtio_input_event *event;
+	unsigned long flags;
+	unsigned int len;
+
+	spin_lock_irqsave(&vi->lock, flags);
+	if (vi->ready) {
+		while ((event = virtqueue_get_buf(vi->evt, &len)) != NULL) {
+			spin_unlock_irqrestore(&vi->lock, flags);
+			input_event(vi->idev,
+				    le16_to_cpu(event->type),
+				    le16_to_cpu(event->code),
+				    le32_to_cpu(event->value));
+			spin_lock_irqsave(&vi->lock, flags);
+			virtinput_queue_evtbuf(vi, event);
+		}
+		virtqueue_kick(vq);
+	}
+	spin_unlock_irqrestore(&vi->lock, flags);
+}
+
+/*
+ * On error we are losing the status update, which isn't critical as
+ * this is typically used for stuff like keyboard leds.
+ */
+static int virtinput_send_status(struct virtio_input *vi,
+				 u16 type, u16 code, s32 value)
+{
+	struct virtio_input_event *stsbuf;
+	struct scatterlist sg[1];
+	unsigned long flags;
+	int rc;
+
+	stsbuf = kzalloc(sizeof(*stsbuf), GFP_ATOMIC);
+	if (!stsbuf)
+		return -ENOMEM;
+
+	stsbuf->type  = cpu_to_le16(type);
+	stsbuf->code  = cpu_to_le16(code);
+	stsbuf->value = cpu_to_le32(value);
+	sg_init_one(sg, stsbuf, sizeof(*stsbuf));
+
+	spin_lock_irqsave(&vi->lock, flags);
+	if (vi->ready) {
+		rc = virtqueue_add_outbuf(vi->sts, sg, 1, stsbuf, GFP_ATOMIC);
+		virtqueue_kick(vi->sts);
+	} else {
+		rc = -ENODEV;
+	}
+	spin_unlock_irqrestore(&vi->lock, flags);
+
+	if (rc != 0)
+		kfree(stsbuf);
+	return rc;
+}
+
+static void virtinput_recv_status(struct virtqueue *vq)
+{
+	struct virtio_input *vi = vq->vdev->priv;
+	struct virtio_input_event *stsbuf;
+	unsigned long flags;
+	unsigned int len;
+
+	spin_lock_irqsave(&vi->lock, flags);
+	while ((stsbuf = virtqueue_get_buf(vi->sts, &len)) != NULL)
+		kfree(stsbuf);
+	spin_unlock_irqrestore(&vi->lock, flags);
+}
+
+static int virtinput_status(struct input_dev *idev, unsigned int type,
+			    unsigned int code, int value)
+{
+	struct virtio_input *vi = input_get_drvdata(idev);
+
+	return virtinput_send_status(vi, type, code, value);
+}
+
+static u8 virtinput_cfg_select(struct virtio_input *vi,
+			       u8 select, u8 subsel)
+{
+	u8 size;
+
+	virtio_cwrite(vi->vdev, struct virtio_input_config, select, &select);
+	virtio_cwrite(vi->vdev, struct virtio_input_config, subsel, &subsel);
+	virtio_cread(vi->vdev, struct virtio_input_config, size, &size);
+	return size;
+}
+
+static void virtinput_cfg_bits(struct virtio_input *vi, int select, int subsel,
+			       unsigned long *bits, unsigned int bitcount)
+{
+	unsigned int bit;
+	u8 *virtio_bits;
+	u8 bytes;
+
+	bytes = virtinput_cfg_select(vi, select, subsel);
+	if (!bytes)
+		return;
+	if (bitcount > bytes * 8)
+		bitcount = bytes * 8;
+
+	/*
+	 * Bitmap in virtio config space is a simple stream of bytes,
+	 * with the first byte carrying bits 0-7, second bits 8-15 and
+	 * so on.
+	 */
+	virtio_bits = kzalloc(bytes, GFP_KERNEL);
+	if (!virtio_bits)
+		return;
+	virtio_cread_bytes(vi->vdev, offsetof(struct virtio_input_config,
+					      u.bitmap),
+			   virtio_bits, bytes);
+	for (bit = 0; bit < bitcount; bit++) {
+		if (virtio_bits[bit / 8] & (1 << (bit % 8)))
+			__set_bit(bit, bits);
+	}
+	kfree(virtio_bits);
+
+	if (select == VIRTIO_INPUT_CFG_EV_BITS)
+		__set_bit(subsel, vi->idev->evbit);
+}
+
+static void virtinput_cfg_abs(struct virtio_input *vi, int abs)
+{
+	u32 mi, ma, re, fu, fl;
+
+	virtinput_cfg_select(vi, VIRTIO_INPUT_CFG_ABS_INFO, abs);
+	virtio_cread(vi->vdev, struct virtio_input_config, u.abs.min, &mi);
+	virtio_cread(vi->vdev, struct virtio_input_config, u.abs.max, &ma);
+	virtio_cread(vi->vdev, struct virtio_input_config, u.abs.res, &re);
+	virtio_cread(vi->vdev, struct virtio_input_config, u.abs.fuzz, &fu);
+	virtio_cread(vi->vdev, struct virtio_input_config, u.abs.flat, &fl);
+	input_set_abs_params(vi->idev, abs, mi, ma, fu, fl);
+	input_abs_set_res(vi->idev, abs, re);
+}
+
+static int virtinput_init_vqs(struct virtio_input *vi)
+{
+	struct virtqueue *vqs[2];
+	vq_callback_t *cbs[] = { virtinput_recv_events,
+				 virtinput_recv_status };
+	static const char *names[] = { "events", "status" };
+	int err;
+
+	err = vi->vdev->config->find_vqs(vi->vdev, 2, vqs, cbs, names);
+	if (err)
+		return err;
+	vi->evt = vqs[0];
+	vi->sts = vqs[1];
+
+	return 0;
+}
+
+static void virtinput_fill_evt(struct virtio_input *vi)
+{
+	unsigned long flags;
+	int i, size;
+
+	spin_lock_irqsave(&vi->lock, flags);
+	size = virtqueue_get_vring_size(vi->evt);
+	if (size > ARRAY_SIZE(vi->evts))
+		size = ARRAY_SIZE(vi->evts);
+	for (i = 0; i < size; i++)
+		virtinput_queue_evtbuf(vi, &vi->evts[i]);
+	virtqueue_kick(vi->evt);
+	spin_unlock_irqrestore(&vi->lock, flags);
+}
+
+static int virtinput_probe(struct virtio_device *vdev)
+{
+	struct virtio_input *vi;
+	unsigned long flags;
+	size_t size;
+	int abs, err;
+
+	if (!virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
+		return -ENODEV;
+
+	vi = kzalloc(sizeof(*vi), GFP_KERNEL);
+	if (!vi)
+		return -ENOMEM;
+
+	vdev->priv = vi;
+	vi->vdev = vdev;
+	spin_lock_init(&vi->lock);
+
+	err = virtinput_init_vqs(vi);
+	if (err)
+		goto err_init_vq;
+
+	vi->idev = input_allocate_device();
+	if (!vi->idev) {
+		err = -ENOMEM;
+		goto err_input_alloc;
+	}
+	input_set_drvdata(vi->idev, vi);
+
+	size = virtinput_cfg_select(vi, VIRTIO_INPUT_CFG_ID_NAME, 0);
+	virtio_cread_bytes(vi->vdev, offsetof(struct virtio_input_config,
+					      u.string),
+			   vi->name, min(size, sizeof(vi->name)));
+	size = virtinput_cfg_select(vi, VIRTIO_INPUT_CFG_ID_SERIAL, 0);
+	virtio_cread_bytes(vi->vdev, offsetof(struct virtio_input_config,
+					      u.string),
+			   vi->serial, min(size, sizeof(vi->serial)));
+	snprintf(vi->phys, sizeof(vi->phys),
+		 "virtio%d/input0", vdev->index);
+	vi->idev->name = vi->name;
+	vi->idev->phys = vi->phys;
+	vi->idev->uniq = vi->serial;
+
+	size = virtinput_cfg_select(vi, VIRTIO_INPUT_CFG_ID_DEVIDS, 0);
+	if (size >= sizeof(struct virtio_input_devids)) {
+		virtio_cread(vi->vdev, struct virtio_input_config,
+			     u.ids.bustype, &vi->idev->id.bustype);
+		virtio_cread(vi->vdev, struct virtio_input_config,
+			     u.ids.vendor, &vi->idev->id.vendor);
+		virtio_cread(vi->vdev, struct virtio_input_config,
+			     u.ids.product, &vi->idev->id.product);
+		virtio_cread(vi->vdev, struct virtio_input_config,
+			     u.ids.version, &vi->idev->id.version);
+	} else {
+		vi->idev->id.bustype = BUS_VIRTUAL;
+	}
+
+	virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_PROP_BITS, 0,
+			   vi->idev->propbit, INPUT_PROP_CNT);
+	size = virtinput_cfg_select(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_REP);
+	if (size)
+		__set_bit(EV_REP, vi->idev->evbit);
+
+	vi->idev->dev.parent = &vdev->dev;
+	vi->idev->event = virtinput_status;
+
+	/* device -> kernel */
+	virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_KEY,
+			   vi->idev->keybit, KEY_CNT);
+	virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_REL,
+			   vi->idev->relbit, REL_CNT);
+	virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_ABS,
+			   vi->idev->absbit, ABS_CNT);
+	virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_MSC,
+			   vi->idev->mscbit, MSC_CNT);
+	virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_SW,
+			   vi->idev->swbit,  SW_CNT);
+
+	/* kernel -> device */
+	virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_LED,
+			   vi->idev->ledbit, LED_CNT);
+	virtinput_cfg_bits(vi, VIRTIO_INPUT_CFG_EV_BITS, EV_SND,
+			   vi->idev->sndbit, SND_CNT);
+
+	if (test_bit(EV_ABS, vi->idev->evbit)) {
+		for (abs = 0; abs < ABS_CNT; abs++) {
+			if (!test_bit(abs, vi->idev->absbit))
+				continue;
+			virtinput_cfg_abs(vi, abs);
+		}
+	}
+
+	virtio_device_ready(vdev);
+	vi->ready = true;
+	err = input_register_device(vi->idev);
+	if (err)
+		goto err_input_register;
+
+	virtinput_fill_evt(vi);
+	return 0;
+
+err_input_register:
+	spin_lock_irqsave(&vi->lock, flags);
+	vi->ready = false;
+	spin_unlock_irqrestore(&vi->lock, flags);
+	input_free_device(vi->idev);
+err_input_alloc:
+	vdev->config->del_vqs(vdev);
+err_init_vq:
+	kfree(vi);
+	return err;
+}
+
+static void virtinput_remove(struct virtio_device *vdev)
+{
+	struct virtio_input *vi = vdev->priv;
+	unsigned long flags;
+
+	spin_lock_irqsave(&vi->lock, flags);
+	vi->ready = false;
+	spin_unlock_irqrestore(&vi->lock, flags);
+
+	input_unregister_device(vi->idev);
+	vdev->config->del_vqs(vdev);
+	kfree(vi);
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int virtinput_freeze(struct virtio_device *vdev)
+{
+	struct virtio_input *vi = vdev->priv;
+	unsigned long flags;
+
+	spin_lock_irqsave(&vi->lock, flags);
+	vi->ready = false;
+	spin_unlock_irqrestore(&vi->lock, flags);
+
+	vdev->config->del_vqs(vdev);
+	return 0;
+}
+
+static int virtinput_restore(struct virtio_device *vdev)
+{
+	struct virtio_input *vi = vdev->priv;
+	int err;
+
+	err = virtinput_init_vqs(vi);
+	if (err)
+		return err;
+
+	virtio_device_ready(vdev);
+	vi->ready = true;
+	virtinput_fill_evt(vi);
+	return 0;
+}
+#endif
+
+static unsigned int features[] = {
+	/* none */
+};
+static struct virtio_device_id id_table[] = {
+	{ VIRTIO_ID_INPUT, VIRTIO_DEV_ANY_ID },
+	{ 0 },
+};
+
+static struct virtio_driver virtio_input_driver = {
+	.driver.name         = KBUILD_MODNAME,
+	.driver.owner        = THIS_MODULE,
+	.feature_table       = features,
+	.feature_table_size  = ARRAY_SIZE(features),
+	.id_table            = id_table,
+	.probe               = virtinput_probe,
+	.remove              = virtinput_remove,
+#ifdef CONFIG_PM_SLEEP
+	.freeze	             = virtinput_freeze,
+	.restore             = virtinput_restore,
+#endif
+};
+
+module_virtio_driver(virtio_input_driver);
+MODULE_DEVICE_TABLE(virtio, id_table);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Virtio input device driver");
+MODULE_AUTHOR("Gerd Hoffmann <kraxel@redhat.com>");
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 68ceb97c458c..04b829e9ac46 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -430,6 +430,7 @@ header-y += virtio_blk.h
 header-y += virtio_config.h
 header-y += virtio_console.h
 header-y += virtio_ids.h
+header-y += virtio_input.h
 header-y += virtio_net.h
 header-y += virtio_pci.h
 header-y += virtio_ring.h
diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h
index 284fc3a05f7b..5f60aa4be50a 100644
--- a/include/uapi/linux/virtio_ids.h
+++ b/include/uapi/linux/virtio_ids.h
@@ -39,5 +39,6 @@
 #define VIRTIO_ID_9P		9 /* 9p virtio console */
 #define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */
 #define VIRTIO_ID_CAIF	       12 /* Virtio caif */
+#define VIRTIO_ID_INPUT        18 /* virtio input */
 
 #endif /* _LINUX_VIRTIO_IDS_H */
diff --git a/include/uapi/linux/virtio_input.h b/include/uapi/linux/virtio_input.h
new file mode 100644
index 000000000000..a7fe5c8fb135
--- /dev/null
+++ b/include/uapi/linux/virtio_input.h
@@ -0,0 +1,76 @@
+#ifndef _LINUX_VIRTIO_INPUT_H
+#define _LINUX_VIRTIO_INPUT_H
+/* This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE. */
+
+#include <linux/types.h>
+
+enum virtio_input_config_select {
+	VIRTIO_INPUT_CFG_UNSET      = 0x00,
+	VIRTIO_INPUT_CFG_ID_NAME    = 0x01,
+	VIRTIO_INPUT_CFG_ID_SERIAL  = 0x02,
+	VIRTIO_INPUT_CFG_ID_DEVIDS  = 0x03,
+	VIRTIO_INPUT_CFG_PROP_BITS  = 0x10,
+	VIRTIO_INPUT_CFG_EV_BITS    = 0x11,
+	VIRTIO_INPUT_CFG_ABS_INFO   = 0x12,
+};
+
+struct virtio_input_absinfo {
+	__u32 min;
+	__u32 max;
+	__u32 fuzz;
+	__u32 flat;
+	__u32 res;
+};
+
+struct virtio_input_devids {
+	__u16 bustype;
+	__u16 vendor;
+	__u16 product;
+	__u16 version;
+};
+
+struct virtio_input_config {
+	__u8    select;
+	__u8    subsel;
+	__u8    size;
+	__u8    reserved[5];
+	union {
+		char string[128];
+		__u8 bitmap[128];
+		struct virtio_input_absinfo abs;
+		struct virtio_input_devids ids;
+	} u;
+};
+
+struct virtio_input_event {
+	__le16 type;
+	__le16 code;
+	__le32 value;
+};
+
+#endif /* _LINUX_VIRTIO_INPUT_H */
-- 
cgit 


From 608cd71a9c7c9db76e78a792c5a4101e12fea43f Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Thu, 26 Mar 2015 19:53:57 -0700
Subject: tc: bpf: generalize pedit action

existing TC action 'pedit' can munge any bits of the packet.
Generalize it for use in bpf programs attached as cls_bpf and act_bpf via
bpf_skb_store_bytes() helper function.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h      |  1 +
 include/uapi/linux/bpf.h |  1 +
 kernel/bpf/verifier.c    |  2 ++
 net/core/filter.c        | 71 ++++++++++++++++++++++++++++++++++++++++++++++--
 4 files changed, 73 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 280a315de8d6..d5cda067115a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -59,6 +59,7 @@ enum bpf_arg_type {
 	ARG_PTR_TO_STACK,	/* any pointer to eBPF program stack */
 	ARG_CONST_STACK_SIZE,	/* number of bytes accessed from stack */
 
+	ARG_PTR_TO_CTX,		/* pointer to context */
 	ARG_ANYTHING,		/* any (initialized) argument is ok */
 };
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 27dc4ec58840..74aab6e0d964 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -168,6 +168,7 @@ enum bpf_func_id {
 	BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */
 	BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */
 	BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */
+	BPF_FUNC_skb_store_bytes, /* int skb_store_bytes(skb, offset, from, len) */
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 0e714f799ec0..630a7bac1e51 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -773,6 +773,8 @@ static int check_func_arg(struct verifier_env *env, u32 regno,
 		expected_type = CONST_IMM;
 	} else if (arg_type == ARG_CONST_MAP_PTR) {
 		expected_type = CONST_PTR_TO_MAP;
+	} else if (arg_type == ARG_PTR_TO_CTX) {
+		expected_type = PTR_TO_CTX;
 	} else {
 		verbose("unsupported arg_type %d\n", arg_type);
 		return -EFAULT;
diff --git a/net/core/filter.c b/net/core/filter.c
index 32f43c59908c..444a07e4f68d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1175,6 +1175,56 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
 	return 0;
 }
 
+static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	struct sk_buff *skb = (struct sk_buff *) (long) r1;
+	unsigned int offset = (unsigned int) r2;
+	void *from = (void *) (long) r3;
+	unsigned int len = (unsigned int) r4;
+	char buf[16];
+	void *ptr;
+
+	/* bpf verifier guarantees that:
+	 * 'from' pointer points to bpf program stack
+	 * 'len' bytes of it were initialized
+	 * 'len' > 0
+	 * 'skb' is a valid pointer to 'struct sk_buff'
+	 *
+	 * so check for invalid 'offset' and too large 'len'
+	 */
+	if (offset > 0xffff || len > sizeof(buf))
+		return -EFAULT;
+
+	if (skb_cloned(skb) && !skb_clone_writable(skb, offset + len))
+		return -EFAULT;
+
+	ptr = skb_header_pointer(skb, offset, len, buf);
+	if (unlikely(!ptr))
+		return -EFAULT;
+
+	skb_postpull_rcsum(skb, ptr, len);
+
+	memcpy(ptr, from, len);
+
+	if (ptr == buf)
+		/* skb_store_bits cannot return -EFAULT here */
+		skb_store_bits(skb, offset, ptr, len);
+
+	if (skb->ip_summed == CHECKSUM_COMPLETE)
+		skb->csum = csum_add(skb->csum, csum_partial(ptr, len, 0));
+	return 0;
+}
+
+const struct bpf_func_proto bpf_skb_store_bytes_proto = {
+	.func		= bpf_skb_store_bytes,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_PTR_TO_STACK,
+	.arg4_type	= ARG_CONST_STACK_SIZE,
+};
+
 static const struct bpf_func_proto *
 sk_filter_func_proto(enum bpf_func_id func_id)
 {
@@ -1194,6 +1244,17 @@ sk_filter_func_proto(enum bpf_func_id func_id)
 	}
 }
 
+static const struct bpf_func_proto *
+tc_cls_act_func_proto(enum bpf_func_id func_id)
+{
+	switch (func_id) {
+	case BPF_FUNC_skb_store_bytes:
+		return &bpf_skb_store_bytes_proto;
+	default:
+		return sk_filter_func_proto(func_id);
+	}
+}
+
 static bool sk_filter_is_valid_access(int off, int size,
 				      enum bpf_access_type type)
 {
@@ -1270,18 +1331,24 @@ static const struct bpf_verifier_ops sk_filter_ops = {
 	.convert_ctx_access = sk_filter_convert_ctx_access,
 };
 
+static const struct bpf_verifier_ops tc_cls_act_ops = {
+	.get_func_proto = tc_cls_act_func_proto,
+	.is_valid_access = sk_filter_is_valid_access,
+	.convert_ctx_access = sk_filter_convert_ctx_access,
+};
+
 static struct bpf_prog_type_list sk_filter_type __read_mostly = {
 	.ops = &sk_filter_ops,
 	.type = BPF_PROG_TYPE_SOCKET_FILTER,
 };
 
 static struct bpf_prog_type_list sched_cls_type __read_mostly = {
-	.ops = &sk_filter_ops,
+	.ops = &tc_cls_act_ops,
 	.type = BPF_PROG_TYPE_SCHED_CLS,
 };
 
 static struct bpf_prog_type_list sched_act_type __read_mostly = {
-	.ops = &sk_filter_ops,
+	.ops = &tc_cls_act_ops,
 	.type = BPF_PROG_TYPE_SCHED_ACT,
 };
 
-- 
cgit 


From 3a323d4e17dd5a84f6ad036e6f985d263ca973ed Mon Sep 17 00:00:00 2001
From: Luciano Coelho <luciano.coelho@intel.com>
Date: Wed, 18 Mar 2015 10:47:02 +0200
Subject: nl80211: small clarification of the sched_scan delay attribute

Just clarify that the delay is only before the first cycle.

Signed-off-by: Luciano Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index ae16ba9cb1e3..241220c43e86 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1754,8 +1754,9 @@ enum nl80211_commands {
  *	should be contained in the result as the sum of the respective counters
  *	over all channels.
  *
- * @NL80211_ATTR_SCHED_SCAN_DELAY: delay before a scheduled scan (or a
- *	WoWLAN net-detect scan) is started, u32 in seconds.
+ * @NL80211_ATTR_SCHED_SCAN_DELAY: delay before the first cycle of a
+ *	scheduled scan (or a WoWLAN net-detect scan) is started, u32
+ *	in seconds.
 
  * @NL80211_ATTR_REG_INDOOR: flag attribute, if set indicates that the device
  *      is operating in an indoor environment.
-- 
cgit 


From 4b3a81a917a5ef21a4483d699cefd4d9fa35b841 Mon Sep 17 00:00:00 2001
From: Boris Brezillion <boris.brezillon@free-electrons.com>
Date: Wed, 28 Jan 2015 17:49:51 +0100
Subject: Add RGB444_1X12 and RGB565_1X16 media bus formats

Add RGB444_1X12 and RGB565_1X16 format definitions and update the
documentation.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Acked-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 Documentation/DocBook/media/v4l/subdev-formats.xml | 40 ++++++++++++++++++++++
 include/uapi/linux/media-bus-format.h              |  4 ++-
 2 files changed, 43 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/v4l/subdev-formats.xml b/Documentation/DocBook/media/v4l/subdev-formats.xml
index c5ea868e3909..29fe60112f4c 100644
--- a/Documentation/DocBook/media/v4l/subdev-formats.xml
+++ b/Documentation/DocBook/media/v4l/subdev-formats.xml
@@ -192,6 +192,24 @@ see <xref linkend="colorspaces" />.</entry>
 	    </row>
 	  </thead>
 	  <tbody valign="top">
+	    <row id="MEDIA-BUS-FMT-RGB444-1X12">
+	      <entry>MEDIA_BUS_FMT_RGB444_1X12</entry>
+	      <entry>0x100e</entry>
+	      <entry></entry>
+	      &dash-ent-20;
+	      <entry>r<subscript>3</subscript></entry>
+	      <entry>r<subscript>2</subscript></entry>
+	      <entry>r<subscript>1</subscript></entry>
+	      <entry>r<subscript>0</subscript></entry>
+	      <entry>g<subscript>3</subscript></entry>
+	      <entry>g<subscript>2</subscript></entry>
+	      <entry>g<subscript>1</subscript></entry>
+	      <entry>g<subscript>0</subscript></entry>
+	      <entry>b<subscript>3</subscript></entry>
+	      <entry>b<subscript>2</subscript></entry>
+	      <entry>b<subscript>1</subscript></entry>
+	      <entry>b<subscript>0</subscript></entry>
+	    </row>
 	    <row id="MEDIA-BUS-FMT-RGB444-2X8-PADHI-BE">
 	      <entry>MEDIA_BUS_FMT_RGB444_2X8_PADHI_BE</entry>
 	      <entry>0x1001</entry>
@@ -304,6 +322,28 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>g<subscript>4</subscript></entry>
 	      <entry>g<subscript>3</subscript></entry>
 	    </row>
+	    <row id="MEDIA-BUS-FMT-RGB565-1X16">
+	      <entry>MEDIA_BUS_FMT_RGB565_1X16</entry>
+	      <entry>0x100f</entry>
+	      <entry></entry>
+	      &dash-ent-16;
+	      <entry>r<subscript>4</subscript></entry>
+	      <entry>r<subscript>3</subscript></entry>
+	      <entry>r<subscript>2</subscript></entry>
+	      <entry>r<subscript>1</subscript></entry>
+	      <entry>r<subscript>0</subscript></entry>
+	      <entry>g<subscript>5</subscript></entry>
+	      <entry>g<subscript>4</subscript></entry>
+	      <entry>g<subscript>3</subscript></entry>
+	      <entry>g<subscript>2</subscript></entry>
+	      <entry>g<subscript>1</subscript></entry>
+	      <entry>g<subscript>0</subscript></entry>
+	      <entry>b<subscript>4</subscript></entry>
+	      <entry>b<subscript>3</subscript></entry>
+	      <entry>b<subscript>2</subscript></entry>
+	      <entry>b<subscript>1</subscript></entry>
+	      <entry>b<subscript>0</subscript></entry>
+	    </row>
 	    <row id="MEDIA-BUS-FMT-BGR565-2X8-BE">
 	      <entry>MEDIA_BUS_FMT_BGR565_2X8_BE</entry>
 	      <entry>0x1005</entry>
diff --git a/include/uapi/linux/media-bus-format.h b/include/uapi/linux/media-bus-format.h
index 23b40908be30..37091c668f65 100644
--- a/include/uapi/linux/media-bus-format.h
+++ b/include/uapi/linux/media-bus-format.h
@@ -33,11 +33,13 @@
 
 #define MEDIA_BUS_FMT_FIXED			0x0001
 
-/* RGB - next is	0x100e */
+/* RGB - next is	0x1010 */
+#define MEDIA_BUS_FMT_RGB444_1X12		0x100e
 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_BE	0x1001
 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_LE	0x1002
 #define MEDIA_BUS_FMT_RGB555_2X8_PADHI_BE	0x1003
 #define MEDIA_BUS_FMT_RGB555_2X8_PADHI_LE	0x1004
+#define MEDIA_BUS_FMT_RGB565_1X16		0x100f
 #define MEDIA_BUS_FMT_BGR565_2X8_BE		0x1005
 #define MEDIA_BUS_FMT_BGR565_2X8_LE		0x1006
 #define MEDIA_BUS_FMT_RGB565_2X8_BE		0x1007
-- 
cgit 


From b295c22978b86fc62019d12f4108b68b7e795610 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Tue, 2 Dec 2014 17:49:04 +0100
Subject: Add LVDS RGB media bus formats

This patch adds three new RGB media bus formats that describe
18-bit or 24-bit samples transferred over an LVDS bus with three
or four differential data pairs, serialized into 7 time slots,
using standard SPWG/PSWG/VESA or JEIDA data ordering.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
---
 Documentation/DocBook/media/v4l/subdev-formats.xml | 255 +++++++++++++++++++++
 include/uapi/linux/media-bus-format.h              |   5 +-
 2 files changed, 259 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/v4l/subdev-formats.xml b/Documentation/DocBook/media/v4l/subdev-formats.xml
index 29fe60112f4c..18449b32f240 100644
--- a/Documentation/DocBook/media/v4l/subdev-formats.xml
+++ b/Documentation/DocBook/media/v4l/subdev-formats.xml
@@ -622,6 +622,261 @@ see <xref linkend="colorspaces" />.</entry>
 	  </tbody>
 	</tgroup>
       </table>
+
+      <para>On LVDS buses, usually each sample is transferred serialized in
+      seven time slots per pixel clock, on three (18-bit) or four (24-bit)
+      differential data pairs at the same time. The remaining bits are used for
+      control signals as defined by SPWG/PSWG/VESA or JEIDA standards.
+      The 24-bit RGB format serialized in seven time slots on four lanes using
+      JEIDA defined bit mapping will be named
+      <constant>MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA</constant>, for example.
+      </para>
+
+      <table pgwide="0" frame="none" id="v4l2-mbus-pixelcode-rgb-lvds">
+	<title>LVDS RGB formats</title>
+	<tgroup cols="8">
+	  <colspec colname="id" align="left" />
+	  <colspec colname="code" align="center" />
+	  <colspec colname="slot" align="center" />
+	  <colspec colname="lane" />
+	  <colspec colnum="5" colname="l03" align="center" />
+	  <colspec colnum="6" colname="l02" align="center" />
+	  <colspec colnum="7" colname="l01" align="center" />
+	  <colspec colnum="8" colname="l00" align="center" />
+	  <spanspec namest="l03" nameend="l00" spanname="l0" />
+	  <thead>
+	    <row>
+	      <entry>Identifier</entry>
+	      <entry>Code</entry>
+	      <entry></entry>
+	      <entry></entry>
+	      <entry spanname="l0">Data organization</entry>
+	    </row>
+	    <row>
+	      <entry></entry>
+	      <entry></entry>
+	      <entry>Timeslot</entry>
+	      <entry>Lane</entry>
+	      <entry>3</entry>
+	      <entry>2</entry>
+	      <entry>1</entry>
+	      <entry>0</entry>
+	    </row>
+	  </thead>
+	  <tbody valign="top">
+	    <row id="MEDIA-BUS-FMT-RGB666-1X7X3-SPWG">
+	      <entry>MEDIA_BUS_FMT_RGB666_1X7X3_SPWG</entry>
+	      <entry>0x1010</entry>
+	      <entry>0</entry>
+	      <entry></entry>
+	      <entry>-</entry>
+	      <entry>d</entry>
+	      <entry>b<subscript>1</subscript></entry>
+	      <entry>g<subscript>0</subscript></entry>
+	    </row>
+	    <row>
+	      <entry></entry>
+	      <entry></entry>
+	      <entry>1</entry>
+	      <entry></entry>
+	      <entry>-</entry>
+	      <entry>d</entry>
+	      <entry>b<subscript>0</subscript></entry>
+	      <entry>r<subscript>5</subscript></entry>
+	    </row>
+	    <row>
+	      <entry></entry>
+	      <entry></entry>
+	      <entry>2</entry>
+	      <entry></entry>
+	      <entry>-</entry>
+	      <entry>d</entry>
+	      <entry>g<subscript>5</subscript></entry>
+	      <entry>r<subscript>4</subscript></entry>
+	    </row>
+	    <row>
+	      <entry></entry>
+	      <entry></entry>
+	      <entry>3</entry>
+	      <entry></entry>
+	      <entry>-</entry>
+	      <entry>b<subscript>5</subscript></entry>
+	      <entry>g<subscript>4</subscript></entry>
+	      <entry>r<subscript>3</subscript></entry>
+	    </row>
+	    <row>
+	      <entry></entry>
+	      <entry></entry>
+	      <entry>4</entry>
+	      <entry></entry>
+	      <entry>-</entry>
+	      <entry>b<subscript>4</subscript></entry>
+	      <entry>g<subscript>3</subscript></entry>
+	      <entry>r<subscript>2</subscript></entry>
+	    </row>
+	    <row>
+	      <entry></entry>
+	      <entry></entry>
+	      <entry>5</entry>
+	      <entry></entry>
+	      <entry>-</entry>
+	      <entry>b<subscript>3</subscript></entry>
+	      <entry>g<subscript>2</subscript></entry>
+	      <entry>r<subscript>1</subscript></entry>
+	    </row>
+	    <row>
+	      <entry></entry>
+	      <entry></entry>
+	      <entry>6</entry>
+	      <entry></entry>
+	      <entry>-</entry>
+	      <entry>b<subscript>2</subscript></entry>
+	      <entry>g<subscript>1</subscript></entry>
+	      <entry>r<subscript>0</subscript></entry>
+	    </row>
+	    <row id="MEDIA-BUS-FMT-RGB888-1X7X4-SPWG">
+	      <entry>MEDIA_BUS_FMT_RGB888_1X7X4_SPWG</entry>
+	      <entry>0x1011</entry>
+	      <entry>0</entry>
+	      <entry></entry>
+	      <entry>d</entry>
+	      <entry>d</entry>
+	      <entry>b<subscript>1</subscript></entry>
+	      <entry>g<subscript>0</subscript></entry>
+	    </row>
+	    <row>
+	      <entry></entry>
+	      <entry></entry>
+	      <entry>1</entry>
+	      <entry></entry>
+	      <entry>b<subscript>7</subscript></entry>
+	      <entry>d</entry>
+	      <entry>b<subscript>0</subscript></entry>
+	      <entry>r<subscript>5</subscript></entry>
+	    </row>
+	    <row>
+	      <entry></entry>
+	      <entry></entry>
+	      <entry>2</entry>
+	      <entry></entry>
+	      <entry>b<subscript>6</subscript></entry>
+	      <entry>d</entry>
+	      <entry>g<subscript>5</subscript></entry>
+	      <entry>r<subscript>4</subscript></entry>
+	    </row>
+	    <row>
+	      <entry></entry>
+	      <entry></entry>
+	      <entry>3</entry>
+	      <entry></entry>
+	      <entry>g<subscript>7</subscript></entry>
+	      <entry>b<subscript>5</subscript></entry>
+	      <entry>g<subscript>4</subscript></entry>
+	      <entry>r<subscript>3</subscript></entry>
+	    </row>
+	    <row>
+	      <entry></entry>
+	      <entry></entry>
+	      <entry>4</entry>
+	      <entry></entry>
+	      <entry>g<subscript>6</subscript></entry>
+	      <entry>b<subscript>4</subscript></entry>
+	      <entry>g<subscript>3</subscript></entry>
+	      <entry>r<subscript>2</subscript></entry>
+	    </row>
+	    <row>
+	      <entry></entry>
+	      <entry></entry>
+	      <entry>5</entry>
+	      <entry></entry>
+	      <entry>r<subscript>7</subscript></entry>
+	      <entry>b<subscript>3</subscript></entry>
+	      <entry>g<subscript>2</subscript></entry>
+	      <entry>r<subscript>1</subscript></entry>
+	    </row>
+	    <row>
+	      <entry></entry>
+	      <entry></entry>
+	      <entry>6</entry>
+	      <entry></entry>
+	      <entry>r<subscript>6</subscript></entry>
+	      <entry>b<subscript>2</subscript></entry>
+	      <entry>g<subscript>1</subscript></entry>
+	      <entry>r<subscript>0</subscript></entry>
+	    </row>
+	    <row id="MEDIA-BUS-FMT-RGB888-1X7X4-JEIDA">
+	      <entry>MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA</entry>
+	      <entry>0x1012</entry>
+	      <entry>0</entry>
+	      <entry></entry>
+	      <entry>d</entry>
+	      <entry>d</entry>
+	      <entry>b<subscript>3</subscript></entry>
+	      <entry>g<subscript>2</subscript></entry>
+	    </row>
+	    <row>
+	      <entry></entry>
+	      <entry></entry>
+	      <entry>1</entry>
+	      <entry></entry>
+	      <entry>b<subscript>1</subscript></entry>
+	      <entry>d</entry>
+	      <entry>b<subscript>2</subscript></entry>
+	      <entry>r<subscript>7</subscript></entry>
+	    </row>
+	    <row>
+	      <entry></entry>
+	      <entry></entry>
+	      <entry>2</entry>
+	      <entry></entry>
+	      <entry>b<subscript>0</subscript></entry>
+	      <entry>d</entry>
+	      <entry>g<subscript>7</subscript></entry>
+	      <entry>r<subscript>6</subscript></entry>
+	    </row>
+	    <row>
+	      <entry></entry>
+	      <entry></entry>
+	      <entry>3</entry>
+	      <entry></entry>
+	      <entry>g<subscript>1</subscript></entry>
+	      <entry>b<subscript>7</subscript></entry>
+	      <entry>g<subscript>6</subscript></entry>
+	      <entry>r<subscript>5</subscript></entry>
+	    </row>
+	    <row>
+	      <entry></entry>
+	      <entry></entry>
+	      <entry>4</entry>
+	      <entry></entry>
+	      <entry>g<subscript>0</subscript></entry>
+	      <entry>b<subscript>6</subscript></entry>
+	      <entry>g<subscript>5</subscript></entry>
+	      <entry>r<subscript>4</subscript></entry>
+	    </row>
+	    <row>
+	      <entry></entry>
+	      <entry></entry>
+	      <entry>5</entry>
+	      <entry></entry>
+	      <entry>r<subscript>1</subscript></entry>
+	      <entry>b<subscript>5</subscript></entry>
+	      <entry>g<subscript>4</subscript></entry>
+	      <entry>r<subscript>3</subscript></entry>
+	    </row>
+	    <row>
+	      <entry></entry>
+	      <entry></entry>
+	      <entry>6</entry>
+	      <entry></entry>
+	      <entry>r<subscript>0</subscript></entry>
+	      <entry>b<subscript>4</subscript></entry>
+	      <entry>g<subscript>3</subscript></entry>
+	      <entry>r<subscript>2</subscript></entry>
+	    </row>
+	  </tbody>
+	</tgroup>
+      </table>
     </section>
 
     <section>
diff --git a/include/uapi/linux/media-bus-format.h b/include/uapi/linux/media-bus-format.h
index 37091c668f65..3fb9cbbb603f 100644
--- a/include/uapi/linux/media-bus-format.h
+++ b/include/uapi/linux/media-bus-format.h
@@ -33,7 +33,7 @@
 
 #define MEDIA_BUS_FMT_FIXED			0x0001
 
-/* RGB - next is	0x1010 */
+/* RGB - next is	0x1013 */
 #define MEDIA_BUS_FMT_RGB444_1X12		0x100e
 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_BE	0x1001
 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_LE	0x1002
@@ -45,9 +45,12 @@
 #define MEDIA_BUS_FMT_RGB565_2X8_BE		0x1007
 #define MEDIA_BUS_FMT_RGB565_2X8_LE		0x1008
 #define MEDIA_BUS_FMT_RGB666_1X18		0x1009
+#define MEDIA_BUS_FMT_RGB666_1X7X3_SPWG		0x1010
 #define MEDIA_BUS_FMT_RGB888_1X24		0x100a
 #define MEDIA_BUS_FMT_RGB888_2X12_BE		0x100b
 #define MEDIA_BUS_FMT_RGB888_2X12_LE		0x100c
+#define MEDIA_BUS_FMT_RGB888_1X7X4_SPWG		0x1011
+#define MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA	0x1012
 #define MEDIA_BUS_FMT_ARGB8888_1X32		0x100d
 
 /* YUV (including grey) - next is	0x2024 */
-- 
cgit 


From 08c38458be7efa36a1d2dffd40500448e46d29c5 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Tue, 2 Dec 2014 15:45:25 +0100
Subject: Add BGR888_1X24 and GBR888_1X24 media bus formats

This patch adds two more 24-bit RGB formats. BGR888 is more or less common,
GBR888 is used on the internal connection between the IPU display interface
and the TVE (VGA DAC) on i.MX53 SoCs.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
---
 Documentation/DocBook/media/v4l/subdev-formats.xml | 60 ++++++++++++++++++++++
 include/uapi/linux/media-bus-format.h              |  4 +-
 2 files changed, 63 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/v4l/subdev-formats.xml b/Documentation/DocBook/media/v4l/subdev-formats.xml
index 18449b32f240..805cbe1acab7 100644
--- a/Documentation/DocBook/media/v4l/subdev-formats.xml
+++ b/Documentation/DocBook/media/v4l/subdev-formats.xml
@@ -480,6 +480,66 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>b<subscript>1</subscript></entry>
 	      <entry>b<subscript>0</subscript></entry>
 	    </row>
+	    <row id="MEDIA-BUS-FMT-BGR888-1X24">
+	      <entry>MEDIA_BUS_FMT_BGR888_1X24</entry>
+	      <entry>0x1013</entry>
+	      <entry></entry>
+	      &dash-ent-8;
+	      <entry>b<subscript>7</subscript></entry>
+	      <entry>b<subscript>6</subscript></entry>
+	      <entry>b<subscript>5</subscript></entry>
+	      <entry>b<subscript>4</subscript></entry>
+	      <entry>b<subscript>3</subscript></entry>
+	      <entry>b<subscript>2</subscript></entry>
+	      <entry>b<subscript>1</subscript></entry>
+	      <entry>b<subscript>0</subscript></entry>
+	      <entry>g<subscript>7</subscript></entry>
+	      <entry>g<subscript>6</subscript></entry>
+	      <entry>g<subscript>5</subscript></entry>
+	      <entry>g<subscript>4</subscript></entry>
+	      <entry>g<subscript>3</subscript></entry>
+	      <entry>g<subscript>2</subscript></entry>
+	      <entry>g<subscript>1</subscript></entry>
+	      <entry>g<subscript>0</subscript></entry>
+	      <entry>r<subscript>7</subscript></entry>
+	      <entry>r<subscript>6</subscript></entry>
+	      <entry>r<subscript>5</subscript></entry>
+	      <entry>r<subscript>4</subscript></entry>
+	      <entry>r<subscript>3</subscript></entry>
+	      <entry>r<subscript>2</subscript></entry>
+	      <entry>r<subscript>1</subscript></entry>
+	      <entry>r<subscript>0</subscript></entry>
+	    </row>
+	    <row id="MEDIA-BUS-FMT-GBR888-1X24">
+	      <entry>MEDIA_BUS_FMT_GBR888_1X24</entry>
+	      <entry>0x1014</entry>
+	      <entry></entry>
+	      &dash-ent-8;
+	      <entry>g<subscript>7</subscript></entry>
+	      <entry>g<subscript>6</subscript></entry>
+	      <entry>g<subscript>5</subscript></entry>
+	      <entry>g<subscript>4</subscript></entry>
+	      <entry>g<subscript>3</subscript></entry>
+	      <entry>g<subscript>2</subscript></entry>
+	      <entry>g<subscript>1</subscript></entry>
+	      <entry>g<subscript>0</subscript></entry>
+	      <entry>b<subscript>7</subscript></entry>
+	      <entry>b<subscript>6</subscript></entry>
+	      <entry>b<subscript>5</subscript></entry>
+	      <entry>b<subscript>4</subscript></entry>
+	      <entry>b<subscript>3</subscript></entry>
+	      <entry>b<subscript>2</subscript></entry>
+	      <entry>b<subscript>1</subscript></entry>
+	      <entry>b<subscript>0</subscript></entry>
+	      <entry>r<subscript>7</subscript></entry>
+	      <entry>r<subscript>6</subscript></entry>
+	      <entry>r<subscript>5</subscript></entry>
+	      <entry>r<subscript>4</subscript></entry>
+	      <entry>r<subscript>3</subscript></entry>
+	      <entry>r<subscript>2</subscript></entry>
+	      <entry>r<subscript>1</subscript></entry>
+	      <entry>r<subscript>0</subscript></entry>
+	    </row>
 	    <row id="MEDIA-BUS-FMT-RGB888-1X24">
 	      <entry>MEDIA_BUS_FMT_RGB888_1X24</entry>
 	      <entry>0x100a</entry>
diff --git a/include/uapi/linux/media-bus-format.h b/include/uapi/linux/media-bus-format.h
index 3fb9cbbb603f..6f6942e8dae6 100644
--- a/include/uapi/linux/media-bus-format.h
+++ b/include/uapi/linux/media-bus-format.h
@@ -33,7 +33,7 @@
 
 #define MEDIA_BUS_FMT_FIXED			0x0001
 
-/* RGB - next is	0x1013 */
+/* RGB - next is	0x1015 */
 #define MEDIA_BUS_FMT_RGB444_1X12		0x100e
 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_BE	0x1001
 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_LE	0x1002
@@ -46,6 +46,8 @@
 #define MEDIA_BUS_FMT_RGB565_2X8_LE		0x1008
 #define MEDIA_BUS_FMT_RGB666_1X18		0x1009
 #define MEDIA_BUS_FMT_RGB666_1X7X3_SPWG		0x1010
+#define MEDIA_BUS_FMT_BGR888_1X24		0x1013
+#define MEDIA_BUS_FMT_GBR888_1X24		0x1014
 #define MEDIA_BUS_FMT_RGB888_1X24		0x100a
 #define MEDIA_BUS_FMT_RGB888_2X12_BE		0x100b
 #define MEDIA_BUS_FMT_RGB888_2X12_LE		0x100c
-- 
cgit 


From 0fc63eb104d76e20654cd97eeb1bfd0f35bc3d3a Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Wed, 3 Dec 2014 11:01:54 +0100
Subject: Add YUV8_1X24 media bus format

This patch adds the media bus format for a 24-bit bus format with three
8-bit YUV components.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Acked-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
---
 Documentation/DocBook/media/v4l/subdev-formats.xml | 37 ++++++++++++++++++++++
 include/uapi/linux/media-bus-format.h              |  3 +-
 2 files changed, 39 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/v4l/subdev-formats.xml b/Documentation/DocBook/media/v4l/subdev-formats.xml
index 805cbe1acab7..8d1f62402f88 100644
--- a/Documentation/DocBook/media/v4l/subdev-formats.xml
+++ b/Documentation/DocBook/media/v4l/subdev-formats.xml
@@ -3015,6 +3015,43 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>u<subscript>1</subscript></entry>
 	      <entry>u<subscript>0</subscript></entry>
 	    </row>
+	    <row id="MEDIA-BUS-FMT-YUV8-1X24">
+	      <entry>MEDIA_BUS_FMT_YUV8_1X24</entry>
+	      <entry>0x2024</entry>
+	      <entry></entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>y<subscript>7</subscript></entry>
+	      <entry>y<subscript>6</subscript></entry>
+	      <entry>y<subscript>5</subscript></entry>
+	      <entry>y<subscript>4</subscript></entry>
+	      <entry>y<subscript>3</subscript></entry>
+	      <entry>y<subscript>2</subscript></entry>
+	      <entry>y<subscript>1</subscript></entry>
+	      <entry>y<subscript>0</subscript></entry>
+	      <entry>u<subscript>7</subscript></entry>
+	      <entry>u<subscript>6</subscript></entry>
+	      <entry>u<subscript>5</subscript></entry>
+	      <entry>u<subscript>4</subscript></entry>
+	      <entry>u<subscript>3</subscript></entry>
+	      <entry>u<subscript>2</subscript></entry>
+	      <entry>u<subscript>1</subscript></entry>
+	      <entry>u<subscript>0</subscript></entry>
+	      <entry>v<subscript>7</subscript></entry>
+	      <entry>v<subscript>6</subscript></entry>
+	      <entry>v<subscript>5</subscript></entry>
+	      <entry>v<subscript>4</subscript></entry>
+	      <entry>v<subscript>3</subscript></entry>
+	      <entry>v<subscript>2</subscript></entry>
+	      <entry>v<subscript>1</subscript></entry>
+	      <entry>v<subscript>0</subscript></entry>
+	    </row>
 	    <row id="MEDIA-BUS-FMT-YUV10-1X30">
 	      <entry>MEDIA_BUS_FMT_YUV10_1X30</entry>
 	      <entry>0x2016</entry>
diff --git a/include/uapi/linux/media-bus-format.h b/include/uapi/linux/media-bus-format.h
index 6f6942e8dae6..8dbf16cc5d1c 100644
--- a/include/uapi/linux/media-bus-format.h
+++ b/include/uapi/linux/media-bus-format.h
@@ -55,7 +55,7 @@
 #define MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA	0x1012
 #define MEDIA_BUS_FMT_ARGB8888_1X32		0x100d
 
-/* YUV (including grey) - next is	0x2024 */
+/* YUV (including grey) - next is	0x2025 */
 #define MEDIA_BUS_FMT_Y8_1X8			0x2001
 #define MEDIA_BUS_FMT_UV8_1X8			0x2015
 #define MEDIA_BUS_FMT_UYVY8_1_5X8		0x2002
@@ -81,6 +81,7 @@
 #define MEDIA_BUS_FMT_VYUY10_1X20		0x201b
 #define MEDIA_BUS_FMT_YUYV10_1X20		0x200d
 #define MEDIA_BUS_FMT_YVYU10_1X20		0x200e
+#define MEDIA_BUS_FMT_YUV8_1X24			0x2024
 #define MEDIA_BUS_FMT_YUV10_1X30		0x2016
 #define MEDIA_BUS_FMT_AYUV8_1X32		0x2017
 #define MEDIA_BUS_FMT_UYVY12_2X12		0x201c
-- 
cgit 


From 203508ef52e3fee93b71262928541ecea82c735d Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Tue, 2 Dec 2014 17:56:04 +0100
Subject: Add RGB666_1X24_CPADHI media bus format

Commit 9e74d2926a28 ("staging: imx-drm: add LVDS666 support for parallel
display") describes a 24-bit bus format where three 6-bit components each
take the lower part of 8 bits with the two high bits zero padded. Add a
component-wise padded media bus format RGB666_1X24_CPADHI to support this
connection.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Tested-by: Emil Renner Berthing <kernel@esmil.dk>
---
 Documentation/DocBook/media/v4l/subdev-formats.xml | 34 +++++++++++++++++++++-
 include/uapi/linux/media-bus-format.h              |  3 +-
 2 files changed, 35 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/v4l/subdev-formats.xml b/Documentation/DocBook/media/v4l/subdev-formats.xml
index 8d1f62402f88..18b71aff48c9 100644
--- a/Documentation/DocBook/media/v4l/subdev-formats.xml
+++ b/Documentation/DocBook/media/v4l/subdev-formats.xml
@@ -91,7 +91,9 @@ see <xref linkend="colorspaces" />.</entry>
 	<listitem><para>For formats where the total number of bits per pixel is smaller
 	than the number of bus samples per pixel times the bus width, a padding
 	value stating if the bytes are padded in their most high order bits
-	(PADHI) or low order bits (PADLO).</para></listitem>
+	(PADHI) or low order bits (PADLO). A "C" prefix is used for component-wise
+	padding in the most high order bits (CPADHI) or low order bits (CPADLO)
+	of each separate component.</para></listitem>
 	<listitem><para>For formats where the number of bus samples per pixel is larger
 	than 1, an endianness value stating if the pixel is transferred MSB first
 	(BE) or LSB first (LE).</para></listitem>
@@ -480,6 +482,36 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>b<subscript>1</subscript></entry>
 	      <entry>b<subscript>0</subscript></entry>
 	    </row>
+	    <row id="MEDIA-BUS-FMT-RGB666-1X24_CPADHI">
+	      <entry>MEDIA_BUS_FMT_RGB666_1X24_CPADHI</entry>
+	      <entry>0x1015</entry>
+	      <entry></entry>
+	      &dash-ent-8;
+	      <entry>0</entry>
+	      <entry>0</entry>
+	      <entry>r<subscript>5</subscript></entry>
+	      <entry>r<subscript>4</subscript></entry>
+	      <entry>r<subscript>3</subscript></entry>
+	      <entry>r<subscript>2</subscript></entry>
+	      <entry>r<subscript>1</subscript></entry>
+	      <entry>r<subscript>0</subscript></entry>
+	      <entry>0</entry>
+	      <entry>0</entry>
+	      <entry>g<subscript>5</subscript></entry>
+	      <entry>g<subscript>4</subscript></entry>
+	      <entry>g<subscript>3</subscript></entry>
+	      <entry>g<subscript>2</subscript></entry>
+	      <entry>g<subscript>1</subscript></entry>
+	      <entry>g<subscript>0</subscript></entry>
+	      <entry>0</entry>
+	      <entry>0</entry>
+	      <entry>b<subscript>5</subscript></entry>
+	      <entry>b<subscript>4</subscript></entry>
+	      <entry>b<subscript>3</subscript></entry>
+	      <entry>b<subscript>2</subscript></entry>
+	      <entry>b<subscript>1</subscript></entry>
+	      <entry>b<subscript>0</subscript></entry>
+	    </row>
 	    <row id="MEDIA-BUS-FMT-BGR888-1X24">
 	      <entry>MEDIA_BUS_FMT_BGR888_1X24</entry>
 	      <entry>0x1013</entry>
diff --git a/include/uapi/linux/media-bus-format.h b/include/uapi/linux/media-bus-format.h
index 8dbf16cc5d1c..83ea46f4be51 100644
--- a/include/uapi/linux/media-bus-format.h
+++ b/include/uapi/linux/media-bus-format.h
@@ -33,7 +33,7 @@
 
 #define MEDIA_BUS_FMT_FIXED			0x0001
 
-/* RGB - next is	0x1015 */
+/* RGB - next is	0x1016 */
 #define MEDIA_BUS_FMT_RGB444_1X12		0x100e
 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_BE	0x1001
 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_LE	0x1002
@@ -45,6 +45,7 @@
 #define MEDIA_BUS_FMT_RGB565_2X8_BE		0x1007
 #define MEDIA_BUS_FMT_RGB565_2X8_LE		0x1008
 #define MEDIA_BUS_FMT_RGB666_1X18		0x1009
+#define MEDIA_BUS_FMT_RGB666_1X24_CPADHI	0x1015
 #define MEDIA_BUS_FMT_RGB666_1X7X3_SPWG		0x1010
 #define MEDIA_BUS_FMT_BGR888_1X24		0x1013
 #define MEDIA_BUS_FMT_GBR888_1X24		0x1014
-- 
cgit 


From 15e318bdc6dfb82914c82fb7ad00badaa8387d8e Mon Sep 17 00:00:00 2001
From: Jiri Benc <jbenc@redhat.com>
Date: Sun, 29 Mar 2015 16:59:24 +0200
Subject: xfrm: simplify xfrm_address_t use

In many places, the a6 field is typecasted to struct in6_addr. As the
fields are in union anyway, just add in6_addr type to the union and
get rid of the typecasting.

Modifying the uapi header is okay, the union has still the same size.

Signed-off-by: Jiri Benc <jbenc@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h         | 6 +++---
 include/uapi/linux/xfrm.h  | 2 ++
 net/ipv6/xfrm6_mode_beet.c | 4 ++--
 net/ipv6/xfrm6_policy.c    | 4 +---
 net/key/af_key.c           | 2 +-
 net/xfrm/xfrm_state.c      | 8 ++++----
 6 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index d0ac7d7be8a7..461f83539493 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1025,7 +1025,7 @@ xfrm_addr_any(const xfrm_address_t *addr, unsigned short family)
 	case AF_INET:
 		return addr->a4 == 0;
 	case AF_INET6:
-		return ipv6_addr_any((struct in6_addr *)&addr->a6);
+		return ipv6_addr_any(&addr->in6);
 	}
 	return 0;
 }
@@ -1238,8 +1238,8 @@ void xfrm_flowi_addr_get(const struct flowi *fl,
 		memcpy(&daddr->a4, &fl->u.ip4.daddr, sizeof(daddr->a4));
 		break;
 	case AF_INET6:
-		*(struct in6_addr *)saddr->a6 = fl->u.ip6.saddr;
-		*(struct in6_addr *)daddr->a6 = fl->u.ip6.daddr;
+		saddr->in6 = fl->u.ip6.saddr;
+		daddr->in6 = fl->u.ip6.daddr;
 		break;
 	}
 }
diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h
index 02d5125a5ee8..2cd9e608d0d1 100644
--- a/include/uapi/linux/xfrm.h
+++ b/include/uapi/linux/xfrm.h
@@ -1,6 +1,7 @@
 #ifndef _LINUX_XFRM_H
 #define _LINUX_XFRM_H
 
+#include <linux/in6.h>
 #include <linux/types.h>
 
 /* All of the structures in this file may not change size as they are
@@ -13,6 +14,7 @@
 typedef union {
 	__be32		a4;
 	__be32		a6[4];
+	struct in6_addr	in6;
 } xfrm_address_t;
 
 /* Ident of a specific xfrm_state. It is used on input to lookup
diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c
index 9949a356d62c..1e205c3253ac 100644
--- a/net/ipv6/xfrm6_mode_beet.c
+++ b/net/ipv6/xfrm6_mode_beet.c
@@ -95,8 +95,8 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
 
 	ip6h = ipv6_hdr(skb);
 	ip6h->payload_len = htons(skb->len - size);
-	ip6h->daddr = *(struct in6_addr *)&x->sel.daddr.a6;
-	ip6h->saddr = *(struct in6_addr *)&x->sel.saddr.a6;
+	ip6h->daddr = x->sel.daddr.in6;
+	ip6h->saddr = x->sel.saddr.in6;
 	err = 0;
 out:
 	return err;
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 91d934c22a2a..f337a908a76a 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -61,9 +61,7 @@ static int xfrm6_get_saddr(struct net *net,
 		return -EHOSTUNREACH;
 
 	dev = ip6_dst_idev(dst)->dev;
-	ipv6_dev_get_saddr(dev_net(dev), dev,
-			   (struct in6_addr *)&daddr->a6, 0,
-			   (struct in6_addr *)&saddr->a6);
+	ipv6_dev_get_saddr(dev_net(dev), dev, &daddr->in6, 0, &saddr->in6);
 	dst_release(dst);
 	return 0;
 }
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 9255fd9d94bc..f0d52d721b3a 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -709,7 +709,7 @@ static unsigned int pfkey_sockaddr_fill(const xfrm_address_t *xaddr, __be16 port
 		sin6->sin6_family = AF_INET6;
 		sin6->sin6_port = port;
 		sin6->sin6_flowinfo = 0;
-		sin6->sin6_addr = *(struct in6_addr *)xaddr->a6;
+		sin6->sin6_addr = xaddr->in6;
 		sin6->sin6_scope_id = 0;
 		return 128;
 	    }
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index de971b6d38c5..f5e39e35d73a 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1043,12 +1043,12 @@ static struct xfrm_state *__find_acq_core(struct net *net,
 			break;
 
 		case AF_INET6:
-			*(struct in6_addr *)x->sel.daddr.a6 = *(struct in6_addr *)daddr;
-			*(struct in6_addr *)x->sel.saddr.a6 = *(struct in6_addr *)saddr;
+			x->sel.daddr.in6 = daddr->in6;
+			x->sel.saddr.in6 = saddr->in6;
 			x->sel.prefixlen_d = 128;
 			x->sel.prefixlen_s = 128;
-			*(struct in6_addr *)x->props.saddr.a6 = *(struct in6_addr *)saddr;
-			*(struct in6_addr *)x->id.daddr.a6 = *(struct in6_addr *)daddr;
+			x->props.saddr.in6 = saddr->in6;
+			x->id.daddr.in6 = daddr->in6;
 			break;
 		}
 
-- 
cgit 


From 47b43c52ee4b0425449d1b2b1eedca7f6b7a578a Mon Sep 17 00:00:00 2001
From: Jens Freimann <jfrei@linux.vnet.ibm.com>
Date: Tue, 11 Nov 2014 20:57:06 +0100
Subject: KVM: s390: add ioctl to inject local interrupts

We have introduced struct kvm_s390_irq a while ago which allows to
inject all kinds of interrupts as defined in the Principles of
Operation.
Add ioctl to inject interrupts with the extended struct kvm_s390_irq

Signed-off-by: Jens Freimann <jfrei@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
 Documentation/virtual/kvm/api.txt | 56 +++++++++++++++++++++++++++++++++++++++
 arch/s390/kvm/kvm-s390.c          | 10 +++++++
 include/uapi/linux/kvm.h          |  3 +++
 virt/kvm/kvm_main.c               |  2 +-
 4 files changed, 70 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 0d7fc66289a0..a7c651d0dc63 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2820,6 +2820,62 @@ single frame starting at start_gfn for count frames.
 Note: If any architecturally invalid key value is found in the given data then
 the ioctl will return -EINVAL.
 
+4.92 KVM_S390_IRQ
+
+Capability: KVM_CAP_S390_INJECT_IRQ
+Architectures: s390
+Type: vcpu ioctl
+Parameters: struct kvm_s390_irq (in)
+Returns: 0 on success, -1 on error
+Errors:
+  EINVAL: interrupt type is invalid
+          type is KVM_S390_SIGP_STOP and flag parameter is invalid value
+          type is KVM_S390_INT_EXTERNAL_CALL and code is bigger
+            than the maximum of VCPUs
+  EBUSY:  type is KVM_S390_SIGP_SET_PREFIX and vcpu is not stopped
+          type is KVM_S390_SIGP_STOP and a stop irq is already pending
+          type is KVM_S390_INT_EXTERNAL_CALL and an external call interrupt
+            is already pending
+
+Allows to inject an interrupt to the guest.
+
+Using struct kvm_s390_irq as a parameter allows
+to inject additional payload which is not
+possible via KVM_S390_INTERRUPT.
+
+Interrupt parameters are passed via kvm_s390_irq:
+
+struct kvm_s390_irq {
+	__u64 type;
+	union {
+		struct kvm_s390_io_info io;
+		struct kvm_s390_ext_info ext;
+		struct kvm_s390_pgm_info pgm;
+		struct kvm_s390_emerg_info emerg;
+		struct kvm_s390_extcall_info extcall;
+		struct kvm_s390_prefix_info prefix;
+		struct kvm_s390_stop_info stop;
+		struct kvm_s390_mchk_info mchk;
+		char reserved[64];
+	} u;
+};
+
+type can be one of the following:
+
+KVM_S390_SIGP_STOP - sigp stop; parameter in .stop
+KVM_S390_PROGRAM_INT - program check; parameters in .pgm
+KVM_S390_SIGP_SET_PREFIX - sigp set prefix; parameters in .prefix
+KVM_S390_RESTART - restart; no parameters
+KVM_S390_INT_CLOCK_COMP - clock comparator interrupt; no parameters
+KVM_S390_INT_CPU_TIMER - CPU timer interrupt; no parameters
+KVM_S390_INT_EMERGENCY - sigp emergency; parameters in .emerg
+KVM_S390_INT_EXTERNAL_CALL - sigp external call; parameters in .extcall
+KVM_S390_MCHK - machine check interrupt; parameters in .mchk
+
+
+Note that the vcpu ioctl is asynchronous to vcpu execution.
+
+
 5. The kvm_run structure
 ------------------------
 
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index dbc9ca34d9da..8bc25d405edf 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -177,6 +177,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_S390_IRQCHIP:
 	case KVM_CAP_VM_ATTRIBUTES:
 	case KVM_CAP_MP_STATE:
+	case KVM_CAP_S390_INJECT_IRQ:
 	case KVM_CAP_S390_USER_SIGP:
 	case KVM_CAP_S390_USER_STSI:
 	case KVM_CAP_S390_SKEYS:
@@ -2391,6 +2392,15 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 	long r;
 
 	switch (ioctl) {
+	case KVM_S390_IRQ: {
+		struct kvm_s390_irq s390irq;
+
+		r = -EFAULT;
+		if (copy_from_user(&s390irq, argp, sizeof(s390irq)))
+			break;
+		r = kvm_s390_inject_vcpu(vcpu, &s390irq);
+		break;
+	}
 	case KVM_S390_INTERRUPT: {
 		struct kvm_s390_interrupt s390int;
 		struct kvm_s390_irq s390irq;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 1162ef7a3fa1..c0632e87a00f 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -802,6 +802,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_S390_MEM_OP 108
 #define KVM_CAP_S390_USER_STSI 109
 #define KVM_CAP_S390_SKEYS 110
+#define KVM_CAP_S390_INJECT_IRQ 113
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1182,6 +1183,8 @@ struct kvm_s390_ucas_mapping {
 /* Available with KVM_CAP_S390_SKEYS */
 #define KVM_S390_GET_SKEYS      _IOW(KVMIO, 0xb2, struct kvm_s390_skeys)
 #define KVM_S390_SET_SKEYS      _IOW(KVMIO, 0xb3, struct kvm_s390_skeys)
+/* Available with KVM_CAP_S390_INJECT_IRQ */
+#define KVM_S390_IRQ              _IOW(KVMIO,  0xb4, struct kvm_s390_irq)
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 #define KVM_DEV_ASSIGN_PCI_2_3		(1 << 1)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index a1093700f3a4..34310a8d24b9 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2118,7 +2118,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
 	 * Special cases: vcpu ioctls that are asynchronous to vcpu execution,
 	 * so vcpu_load() would break it.
 	 */
-	if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_INTERRUPT)
+	if (ioctl == KVM_S390_INTERRUPT || ioctl == KVM_S390_IRQ || ioctl == KVM_INTERRUPT)
 		return kvm_arch_vcpu_ioctl(filp, ioctl, arg);
 #endif
 
-- 
cgit 


From 816c7667ea97c61884e014cfeedaede5b67b0e58 Mon Sep 17 00:00:00 2001
From: Jens Freimann <jfrei@linux.vnet.ibm.com>
Date: Mon, 24 Nov 2014 17:13:46 +0100
Subject: KVM: s390: migrate vcpu interrupt state

This patch adds support to migrate vcpu interrupts. Two new vcpu ioctls
are added which get/set the complete status of pending interrupts in one
go. The ioctls are marked as available with the new capability
KVM_CAP_S390_IRQ_STATE.

We can not use a ONEREG, as the number of pending local interrupts is not
constant and depends on the number of CPUs.

To retrieve the interrupt state we add an ioctl KVM_S390_GET_IRQ_STATE.
Its input parameter is a pointer to a struct kvm_s390_irq_state which
has a buffer and length.  For all currently pending interrupts, we copy
a struct kvm_s390_irq into the buffer and pass it to userspace.

To store interrupt state into a buffer provided by userspace, we add an
ioctl KVM_S390_SET_IRQ_STATE. It passes a struct kvm_s390_irq_state into
the kernel and injects all interrupts contained in the buffer.

Signed-off-by: Jens Freimann <jfrei@linux.vnet.ibm.com>
Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
 Documentation/virtual/kvm/api.txt |  61 +++++++++++++++++
 arch/s390/kvm/interrupt.c         | 140 ++++++++++++++++++++++++++++++++++++++
 arch/s390/kvm/kvm-s390.c          |  36 ++++++++++
 arch/s390/kvm/kvm-s390.h          |   4 ++
 include/uapi/linux/kvm.h          |  11 +++
 5 files changed, 252 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index a7c651d0dc63..18fb7630e2ad 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2875,6 +2875,67 @@ KVM_S390_MCHK - machine check interrupt; parameters in .mchk
 
 Note that the vcpu ioctl is asynchronous to vcpu execution.
 
+4.94 KVM_S390_GET_IRQ_STATE
+
+Capability: KVM_CAP_S390_IRQ_STATE
+Architectures: s390
+Type: vcpu ioctl
+Parameters: struct kvm_s390_irq_state (out)
+Returns: >= number of bytes copied into buffer,
+         -EINVAL if buffer size is 0,
+         -ENOBUFS if buffer size is too small to fit all pending interrupts,
+         -EFAULT if the buffer address was invalid
+
+This ioctl allows userspace to retrieve the complete state of all currently
+pending interrupts in a single buffer. Use cases include migration
+and introspection. The parameter structure contains the address of a
+userspace buffer and its length:
+
+struct kvm_s390_irq_state {
+	__u64 buf;
+	__u32 flags;
+	__u32 len;
+	__u32 reserved[4];
+};
+
+Userspace passes in the above struct and for each pending interrupt a
+struct kvm_s390_irq is copied to the provided buffer.
+
+If -ENOBUFS is returned the buffer provided was too small and userspace
+may retry with a bigger buffer.
+
+4.95 KVM_S390_SET_IRQ_STATE
+
+Capability: KVM_CAP_S390_IRQ_STATE
+Architectures: s390
+Type: vcpu ioctl
+Parameters: struct kvm_s390_irq_state (in)
+Returns: 0 on success,
+         -EFAULT if the buffer address was invalid,
+         -EINVAL for an invalid buffer length (see below),
+         -EBUSY if there were already interrupts pending,
+         errors occurring when actually injecting the
+          interrupt. See KVM_S390_IRQ.
+
+This ioctl allows userspace to set the complete state of all cpu-local
+interrupts currently pending for the vcpu. It is intended for restoring
+interrupt state after a migration. The input parameter is a userspace buffer
+containing a struct kvm_s390_irq_state:
+
+struct kvm_s390_irq_state {
+	__u64 buf;
+	__u32 len;
+	__u32 pad;
+};
+
+The userspace memory referenced by buf contains a struct kvm_s390_irq
+for each interrupt to be injected into the guest.
+If one of the interrupts could not be injected for some reason the
+ioctl aborts.
+
+len must be a multiple of sizeof(struct kvm_s390_irq). It must be > 0
+and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
+which is the maximum number of possibly pending cpu-local interrupts.
 
 5. The kvm_run structure
 ------------------------
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index bc0988093c5b..9de47265ef73 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -2123,3 +2123,143 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm,
 {
 	return -EINVAL;
 }
+
+int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu, void __user *irqstate, int len)
+{
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	struct kvm_s390_irq *buf;
+	int r = 0;
+	int n;
+
+	buf = vmalloc(len);
+	if (!buf)
+		return -ENOMEM;
+
+	if (copy_from_user((void *) buf, irqstate, len)) {
+		r = -EFAULT;
+		goto out_free;
+	}
+
+	/*
+	 * Don't allow setting the interrupt state
+	 * when there are already interrupts pending
+	 */
+	spin_lock(&li->lock);
+	if (li->pending_irqs) {
+		r = -EBUSY;
+		goto out_unlock;
+	}
+
+	for (n = 0; n < len / sizeof(*buf); n++) {
+		r = do_inject_vcpu(vcpu, &buf[n]);
+		if (r)
+			break;
+	}
+
+out_unlock:
+	spin_unlock(&li->lock);
+out_free:
+	vfree(buf);
+
+	return r;
+}
+
+static void store_local_irq(struct kvm_s390_local_interrupt *li,
+			    struct kvm_s390_irq *irq,
+			    unsigned long irq_type)
+{
+	switch (irq_type) {
+	case IRQ_PEND_MCHK_EX:
+	case IRQ_PEND_MCHK_REP:
+		irq->type = KVM_S390_MCHK;
+		irq->u.mchk = li->irq.mchk;
+		break;
+	case IRQ_PEND_PROG:
+		irq->type = KVM_S390_PROGRAM_INT;
+		irq->u.pgm = li->irq.pgm;
+		break;
+	case IRQ_PEND_PFAULT_INIT:
+		irq->type = KVM_S390_INT_PFAULT_INIT;
+		irq->u.ext = li->irq.ext;
+		break;
+	case IRQ_PEND_EXT_EXTERNAL:
+		irq->type = KVM_S390_INT_EXTERNAL_CALL;
+		irq->u.extcall = li->irq.extcall;
+		break;
+	case IRQ_PEND_EXT_CLOCK_COMP:
+		irq->type = KVM_S390_INT_CLOCK_COMP;
+		break;
+	case IRQ_PEND_EXT_CPU_TIMER:
+		irq->type = KVM_S390_INT_CPU_TIMER;
+		break;
+	case IRQ_PEND_SIGP_STOP:
+		irq->type = KVM_S390_SIGP_STOP;
+		irq->u.stop = li->irq.stop;
+		break;
+	case IRQ_PEND_RESTART:
+		irq->type = KVM_S390_RESTART;
+		break;
+	case IRQ_PEND_SET_PREFIX:
+		irq->type = KVM_S390_SIGP_SET_PREFIX;
+		irq->u.prefix = li->irq.prefix;
+		break;
+	}
+}
+
+int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu, __u8 __user *buf, int len)
+{
+	uint8_t sigp_ctrl = vcpu->kvm->arch.sca->cpu[vcpu->vcpu_id].sigp_ctrl;
+	unsigned long sigp_emerg_pending[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
+	unsigned long pending_irqs;
+	struct kvm_s390_irq irq;
+	unsigned long irq_type;
+	int cpuaddr;
+	int n = 0;
+
+	spin_lock(&li->lock);
+	pending_irqs = li->pending_irqs;
+	memcpy(&sigp_emerg_pending, &li->sigp_emerg_pending,
+	       sizeof(sigp_emerg_pending));
+	spin_unlock(&li->lock);
+
+	for_each_set_bit(irq_type, &pending_irqs, IRQ_PEND_COUNT) {
+		memset(&irq, 0, sizeof(irq));
+		if (irq_type == IRQ_PEND_EXT_EMERGENCY)
+			continue;
+		if (n + sizeof(irq) > len)
+			return -ENOBUFS;
+		store_local_irq(&vcpu->arch.local_int, &irq, irq_type);
+		if (copy_to_user(&buf[n], &irq, sizeof(irq)))
+			return -EFAULT;
+		n += sizeof(irq);
+	}
+
+	if (test_bit(IRQ_PEND_EXT_EMERGENCY, &pending_irqs)) {
+		for_each_set_bit(cpuaddr, sigp_emerg_pending, KVM_MAX_VCPUS) {
+			memset(&irq, 0, sizeof(irq));
+			if (n + sizeof(irq) > len)
+				return -ENOBUFS;
+			irq.type = KVM_S390_INT_EMERGENCY;
+			irq.u.emerg.code = cpuaddr;
+			if (copy_to_user(&buf[n], &irq, sizeof(irq)))
+				return -EFAULT;
+			n += sizeof(irq);
+		}
+	}
+
+	if ((sigp_ctrl & SIGP_CTRL_C) &&
+	    (atomic_read(&vcpu->arch.sie_block->cpuflags) &
+	     CPUSTAT_ECALL_PEND)) {
+		if (n + sizeof(irq) > len)
+			return -ENOBUFS;
+		memset(&irq, 0, sizeof(irq));
+		irq.type = KVM_S390_INT_EXTERNAL_CALL;
+		irq.u.extcall.code = sigp_ctrl & SIGP_CTRL_SCN_MASK;
+		if (copy_to_user(&buf[n], &irq, sizeof(irq)))
+			return -EFAULT;
+		n += sizeof(irq);
+	}
+
+	return n;
+}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 8bc25d405edf..3040b14751b8 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -41,6 +41,9 @@
 #include "trace-s390.h"
 
 #define MEM_OP_MAX_SIZE 65536	/* Maximum transfer size for KVM_S390_MEM_OP */
+#define LOCAL_IRQS 32
+#define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \
+			   (KVM_MAX_VCPUS + LOCAL_IRQS))
 
 #define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
 
@@ -181,6 +184,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_S390_USER_SIGP:
 	case KVM_CAP_S390_USER_STSI:
 	case KVM_CAP_S390_SKEYS:
+	case KVM_CAP_S390_IRQ_STATE:
 		r = 1;
 		break;
 	case KVM_CAP_S390_MEM_OP:
@@ -2500,6 +2504,38 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 			r = -EFAULT;
 		break;
 	}
+	case KVM_S390_SET_IRQ_STATE: {
+		struct kvm_s390_irq_state irq_state;
+
+		r = -EFAULT;
+		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
+			break;
+		if (irq_state.len > VCPU_IRQS_MAX_BUF ||
+		    irq_state.len == 0 ||
+		    irq_state.len % sizeof(struct kvm_s390_irq) > 0) {
+			r = -EINVAL;
+			break;
+		}
+		r = kvm_s390_set_irq_state(vcpu,
+					   (void __user *) irq_state.buf,
+					   irq_state.len);
+		break;
+	}
+	case KVM_S390_GET_IRQ_STATE: {
+		struct kvm_s390_irq_state irq_state;
+
+		r = -EFAULT;
+		if (copy_from_user(&irq_state, argp, sizeof(irq_state)))
+			break;
+		if (irq_state.len == 0) {
+			r = -EINVAL;
+			break;
+		}
+		r = kvm_s390_get_irq_state(vcpu,
+					   (__u8 __user *)  irq_state.buf,
+					   irq_state.len);
+		break;
+	}
 	default:
 		r = -ENOTTY;
 	}
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 343644a59392..ca108b90ae56 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -272,6 +272,10 @@ int kvm_s390_ext_call_pending(struct kvm_vcpu *vcpu);
 extern struct kvm_device_ops kvm_flic_ops;
 int kvm_s390_is_stop_irq_pending(struct kvm_vcpu *vcpu);
 void kvm_s390_clear_stop_irq(struct kvm_vcpu *vcpu);
+int kvm_s390_set_irq_state(struct kvm_vcpu *vcpu,
+			   void __user *buf, int len);
+int kvm_s390_get_irq_state(struct kvm_vcpu *vcpu,
+			   __u8 __user *buf, int len);
 
 /* implemented in guestdbg.c */
 void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu);
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index c0632e87a00f..c045c725e521 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -558,6 +558,13 @@ struct kvm_s390_irq {
 	} u;
 };
 
+struct kvm_s390_irq_state {
+	__u64 buf;
+	__u32 flags;
+	__u32 len;
+	__u32 reserved[4];
+};
+
 /* for KVM_SET_GUEST_DEBUG */
 
 #define KVM_GUESTDBG_ENABLE		0x00000001
@@ -803,6 +810,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_S390_USER_STSI 109
 #define KVM_CAP_S390_SKEYS 110
 #define KVM_CAP_S390_INJECT_IRQ 113
+#define KVM_CAP_S390_IRQ_STATE 114
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1185,6 +1193,9 @@ struct kvm_s390_ucas_mapping {
 #define KVM_S390_SET_SKEYS      _IOW(KVMIO, 0xb3, struct kvm_s390_skeys)
 /* Available with KVM_CAP_S390_INJECT_IRQ */
 #define KVM_S390_IRQ              _IOW(KVMIO,  0xb4, struct kvm_s390_irq)
+/* Available with KVM_CAP_S390_IRQ_STATE */
+#define KVM_S390_SET_IRQ_STATE	  _IOW(KVMIO, 0xb5, struct kvm_s390_irq_state)
+#define KVM_S390_GET_IRQ_STATE	  _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state)
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 #define KVM_DEV_ASSIGN_PCI_2_3		(1 << 1)
-- 
cgit 


From 7f62fe8a5851db94e10d8d956c123d4011aaeed9 Mon Sep 17 00:00:00 2001
From: Kai Vehmanen <kai.vehmanen@nokia.com>
Date: Wed, 2 Jun 2010 22:23:34 +0300
Subject: HSI: cmt_speech: Add cmt-speech driver

Introduces the cmt-speech driver, which implements
a character device interface for transferring speech
data frames over HSI/SSI.

The driver is used to exchange voice/speech data between
the Nokia N900/N950/N9's modem and its cpu.

Signed-off-by: Kai Vehmanen <kai.vehmanen@nokia.com>
Signed-off-by: Carlos Chinea <carlos.chinea@nokia.com>
Signed-off-by: Joni Lapilainen <joni.lapilainen@gmail.com>

Since the original driver has been written for 2.6.28 some
build fixes and general cleanups have been added by me:

 * fix build for 4.0 kernel
 * replace GFP_ATOMIC with GFP_KERNEL in cs_alloc_cmds()
 * add sanity check for CS_SET_WAKELINE ioctl
 * cleanup driver initialisation
 * rename driver to cmt-speech to be consistent with
   ssi-protocol driver
 * move cs-protocol.h to include/uapi/linux/hsi, since
   it describes a userspace API
 * replace hardcoded channels numbers with values provided
   via the HSI framework (e.g. coming from DT)

Acked-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Tested-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 drivers/hsi/clients/Kconfig          |   10 +
 drivers/hsi/clients/Makefile         |    1 +
 drivers/hsi/clients/cmt_speech.c     | 1456 ++++++++++++++++++++++++++++++++++
 include/uapi/linux/hsi/Kbuild        |    2 +-
 include/uapi/linux/hsi/cs-protocol.h |  113 +++
 5 files changed, 1581 insertions(+), 1 deletion(-)
 create mode 100644 drivers/hsi/clients/cmt_speech.c
 create mode 100644 include/uapi/linux/hsi/cs-protocol.h

(limited to 'include/uapi/linux')

diff --git a/drivers/hsi/clients/Kconfig b/drivers/hsi/clients/Kconfig
index bc60dec3f586..86c849506f34 100644
--- a/drivers/hsi/clients/Kconfig
+++ b/drivers/hsi/clients/Kconfig
@@ -13,6 +13,16 @@ config NOKIA_MODEM
 
 	If unsure, say N.
 
+config CMT_SPEECH
+	tristate "CMT speech"
+	depends on HSI && SSI_PROTOCOL
+	help
+	If you say Y here, you will enable the CMT speech protocol used
+	by Nokia modems. If you say M the protocol will be available as
+	module named cmt_speech.
+
+	If unsure, say N.
+
 config SSI_PROTOCOL
 	tristate "SSI protocol"
 	depends on HSI && PHONET && OMAP_SSI
diff --git a/drivers/hsi/clients/Makefile b/drivers/hsi/clients/Makefile
index 4d5bc0e0b27b..260723266407 100644
--- a/drivers/hsi/clients/Makefile
+++ b/drivers/hsi/clients/Makefile
@@ -4,4 +4,5 @@
 
 obj-$(CONFIG_NOKIA_MODEM)	+= nokia-modem.o
 obj-$(CONFIG_SSI_PROTOCOL)	+= ssi_protocol.o
+obj-$(CONFIG_CMT_SPEECH)	+= cmt_speech.o
 obj-$(CONFIG_HSI_CHAR)		+= hsi_char.o
diff --git a/drivers/hsi/clients/cmt_speech.c b/drivers/hsi/clients/cmt_speech.c
new file mode 100644
index 000000000000..e9560ef23092
--- /dev/null
+++ b/drivers/hsi/clients/cmt_speech.c
@@ -0,0 +1,1456 @@
+/*
+ * cmt_speech.c - HSI CMT speech driver
+ *
+ * Copyright (C) 2008,2009,2010 Nokia Corporation. All rights reserved.
+ *
+ * Contact: Kai Vehmanen <kai.vehmanen@nokia.com>
+ * Original author: Peter Ujfalusi <peter.ujfalusi@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/device.h>
+#include <linux/miscdevice.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <linux/sched.h>
+#include <linux/ioctl.h>
+#include <linux/uaccess.h>
+#include <linux/pm_qos.h>
+#include <linux/hsi/hsi.h>
+#include <linux/hsi/ssi_protocol.h>
+#include <linux/hsi/cs-protocol.h>
+
+#define CS_MMAP_SIZE	PAGE_SIZE
+
+struct char_queue {
+	struct list_head	list;
+	u32			msg;
+};
+
+struct cs_char {
+	unsigned int		opened;
+	struct hsi_client	*cl;
+	struct cs_hsi_iface	*hi;
+	struct list_head	chardev_queue;
+	struct list_head	dataind_queue;
+	int			dataind_pending;
+	/* mmap things */
+	unsigned long		mmap_base;
+	unsigned long		mmap_size;
+	spinlock_t		lock;
+	struct fasync_struct	*async_queue;
+	wait_queue_head_t	wait;
+	/* hsi channel ids */
+	int                     channel_id_cmd;
+	int                     channel_id_data;
+};
+
+#define SSI_CHANNEL_STATE_READING	1
+#define SSI_CHANNEL_STATE_WRITING	(1 << 1)
+#define SSI_CHANNEL_STATE_POLL		(1 << 2)
+#define SSI_CHANNEL_STATE_ERROR		(1 << 3)
+
+#define TARGET_MASK			0xf000000
+#define TARGET_REMOTE			(1 << CS_DOMAIN_SHIFT)
+#define TARGET_LOCAL			0
+
+/* Number of pre-allocated commands buffers */
+#define CS_MAX_CMDS		        4
+
+/*
+ * During data transfers, transactions must be handled
+ * within 20ms (fixed value in cmtspeech HSI protocol)
+ */
+#define CS_QOS_LATENCY_FOR_DATA_USEC	20000
+
+/* Timeout to wait for pending HSI transfers to complete */
+#define CS_HSI_TRANSFER_TIMEOUT_MS      500
+
+
+#define RX_PTR_BOUNDARY_SHIFT		8
+#define RX_PTR_MAX_SHIFT		(RX_PTR_BOUNDARY_SHIFT + \
+						CS_MAX_BUFFERS_SHIFT)
+struct cs_hsi_iface {
+	struct hsi_client		*cl;
+	struct hsi_client		*master;
+
+	unsigned int			iface_state;
+	unsigned int			wakeline_state;
+	unsigned int			control_state;
+	unsigned int			data_state;
+
+	/* state exposed to application */
+	struct cs_mmap_config_block	*mmap_cfg;
+
+	unsigned long			mmap_base;
+	unsigned long			mmap_size;
+
+	unsigned int			rx_slot;
+	unsigned int			tx_slot;
+
+	/* note: for security reasons, we do not trust the contents of
+	 * mmap_cfg, but instead duplicate the variables here */
+	unsigned int			buf_size;
+	unsigned int			rx_bufs;
+	unsigned int			tx_bufs;
+	unsigned int			rx_ptr_boundary;
+	unsigned int			rx_offsets[CS_MAX_BUFFERS];
+	unsigned int			tx_offsets[CS_MAX_BUFFERS];
+
+	/* size of aligned memory blocks */
+	unsigned int			slot_size;
+	unsigned int			flags;
+
+	struct list_head		cmdqueue;
+
+	struct hsi_msg			*data_rx_msg;
+	struct hsi_msg			*data_tx_msg;
+	wait_queue_head_t		datawait;
+
+	struct pm_qos_request           pm_qos_req;
+
+	spinlock_t			lock;
+};
+
+static struct cs_char cs_char_data;
+
+static void cs_hsi_read_on_control(struct cs_hsi_iface *hi);
+static void cs_hsi_read_on_data(struct cs_hsi_iface *hi);
+
+static inline void rx_ptr_shift_too_big(void)
+{
+	BUILD_BUG_ON((1LLU << RX_PTR_MAX_SHIFT) > UINT_MAX);
+}
+
+static void cs_notify(u32 message, struct list_head *head)
+{
+	struct char_queue *entry;
+
+	spin_lock(&cs_char_data.lock);
+
+	if (!cs_char_data.opened) {
+		spin_unlock(&cs_char_data.lock);
+		goto out;
+	}
+
+	entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
+	if (!entry) {
+		dev_err(&cs_char_data.cl->device,
+			"Can't allocate new entry for the queue.\n");
+		spin_unlock(&cs_char_data.lock);
+		goto out;
+	}
+
+	entry->msg = message;
+	list_add_tail(&entry->list, head);
+
+	spin_unlock(&cs_char_data.lock);
+
+	wake_up_interruptible(&cs_char_data.wait);
+	kill_fasync(&cs_char_data.async_queue, SIGIO, POLL_IN);
+
+out:
+	return;
+}
+
+static u32 cs_pop_entry(struct list_head *head)
+{
+	struct char_queue *entry;
+	u32 data;
+
+	entry = list_entry(head->next, struct char_queue, list);
+	data = entry->msg;
+	list_del(&entry->list);
+	kfree(entry);
+
+	return data;
+}
+
+static void cs_notify_control(u32 message)
+{
+	cs_notify(message, &cs_char_data.chardev_queue);
+}
+
+static void cs_notify_data(u32 message, int maxlength)
+{
+	cs_notify(message, &cs_char_data.dataind_queue);
+
+	spin_lock(&cs_char_data.lock);
+	cs_char_data.dataind_pending++;
+	while (cs_char_data.dataind_pending > maxlength &&
+				!list_empty(&cs_char_data.dataind_queue)) {
+		dev_dbg(&cs_char_data.cl->device, "data notification "
+		"queue overrun (%u entries)\n", cs_char_data.dataind_pending);
+
+		cs_pop_entry(&cs_char_data.dataind_queue);
+		cs_char_data.dataind_pending--;
+	}
+	spin_unlock(&cs_char_data.lock);
+}
+
+static inline void cs_set_cmd(struct hsi_msg *msg, u32 cmd)
+{
+	u32 *data = sg_virt(msg->sgt.sgl);
+	*data = cmd;
+}
+
+static inline u32 cs_get_cmd(struct hsi_msg *msg)
+{
+	u32 *data = sg_virt(msg->sgt.sgl);
+	return *data;
+}
+
+static void cs_release_cmd(struct hsi_msg *msg)
+{
+	struct cs_hsi_iface *hi = msg->context;
+
+	list_add_tail(&msg->link, &hi->cmdqueue);
+}
+
+static void cs_cmd_destructor(struct hsi_msg *msg)
+{
+	struct cs_hsi_iface *hi = msg->context;
+
+	spin_lock(&hi->lock);
+
+	dev_dbg(&cs_char_data.cl->device, "control cmd destructor\n");
+
+	if (hi->iface_state != CS_STATE_CLOSED)
+		dev_err(&hi->cl->device, "Cmd flushed while driver active\n");
+
+	if (msg->ttype == HSI_MSG_READ)
+		hi->control_state &=
+			~(SSI_CHANNEL_STATE_POLL | SSI_CHANNEL_STATE_READING);
+	else if (msg->ttype == HSI_MSG_WRITE &&
+			hi->control_state & SSI_CHANNEL_STATE_WRITING)
+		hi->control_state &= ~SSI_CHANNEL_STATE_WRITING;
+
+	cs_release_cmd(msg);
+
+	spin_unlock(&hi->lock);
+}
+
+static struct hsi_msg *cs_claim_cmd(struct cs_hsi_iface* ssi)
+{
+	struct hsi_msg *msg;
+
+	BUG_ON(list_empty(&ssi->cmdqueue));
+
+	msg = list_first_entry(&ssi->cmdqueue, struct hsi_msg, link);
+	list_del(&msg->link);
+	msg->destructor = cs_cmd_destructor;
+
+	return msg;
+}
+
+static void cs_free_cmds(struct cs_hsi_iface *ssi)
+{
+	struct hsi_msg *msg, *tmp;
+
+	list_for_each_entry_safe(msg, tmp, &ssi->cmdqueue, link) {
+		list_del(&msg->link);
+		msg->destructor = NULL;
+		kfree(sg_virt(msg->sgt.sgl));
+		hsi_free_msg(msg);
+	}
+}
+
+static int cs_alloc_cmds(struct cs_hsi_iface *hi)
+{
+	struct hsi_msg *msg;
+	u32 *buf;
+	unsigned int i;
+
+	INIT_LIST_HEAD(&hi->cmdqueue);
+
+	for (i = 0; i < CS_MAX_CMDS; i++) {
+		msg = hsi_alloc_msg(1, GFP_KERNEL);
+		if (!msg)
+			goto out;
+		buf = kmalloc(sizeof(*buf), GFP_KERNEL);
+		if (!buf) {
+			hsi_free_msg(msg);
+			goto out;
+		}
+		sg_init_one(msg->sgt.sgl, buf, sizeof(*buf));
+		msg->channel = cs_char_data.channel_id_cmd;
+		msg->context = hi;
+		list_add_tail(&msg->link, &hi->cmdqueue);
+	}
+
+	return 0;
+
+out:
+	cs_free_cmds(hi);
+	return -ENOMEM;
+}
+
+static void cs_hsi_data_destructor(struct hsi_msg *msg)
+{
+	struct cs_hsi_iface *hi = msg->context;
+	const char *dir = (msg->ttype == HSI_MSG_READ) ? "TX" : "RX";
+
+	dev_dbg(&cs_char_data.cl->device, "Freeing data %s message\n", dir);
+
+	spin_lock(&hi->lock);
+	if (hi->iface_state != CS_STATE_CLOSED)
+		dev_err(&cs_char_data.cl->device,
+				"Data %s flush while device active\n", dir);
+	if (msg->ttype == HSI_MSG_READ)
+		hi->data_state &=
+			~(SSI_CHANNEL_STATE_POLL | SSI_CHANNEL_STATE_READING);
+	else
+		hi->data_state &= ~SSI_CHANNEL_STATE_WRITING;
+
+	msg->status = HSI_STATUS_COMPLETED;
+	if (unlikely(waitqueue_active(&hi->datawait)))
+		wake_up_interruptible(&hi->datawait);
+
+	spin_unlock(&hi->lock);
+}
+
+static int cs_hsi_alloc_data(struct cs_hsi_iface *hi)
+{
+	struct hsi_msg *txmsg, *rxmsg;
+	int res = 0;
+
+	rxmsg = hsi_alloc_msg(1, GFP_KERNEL);
+	if (!rxmsg) {
+		res = -ENOMEM;
+		goto out1;
+	}
+	rxmsg->channel = cs_char_data.channel_id_data;
+	rxmsg->destructor = cs_hsi_data_destructor;
+	rxmsg->context = hi;
+
+	txmsg = hsi_alloc_msg(1, GFP_KERNEL);
+	if (!txmsg) {
+		res = -ENOMEM;
+		goto out2;
+	}
+	txmsg->channel = cs_char_data.channel_id_data;
+	txmsg->destructor = cs_hsi_data_destructor;
+	txmsg->context = hi;
+
+	hi->data_rx_msg = rxmsg;
+	hi->data_tx_msg = txmsg;
+
+	return 0;
+
+out2:
+	hsi_free_msg(rxmsg);
+out1:
+	return res;
+}
+
+static void cs_hsi_free_data_msg(struct hsi_msg *msg)
+{
+	WARN_ON(msg->status != HSI_STATUS_COMPLETED &&
+					msg->status != HSI_STATUS_ERROR);
+	hsi_free_msg(msg);
+}
+
+static void cs_hsi_free_data(struct cs_hsi_iface *hi)
+{
+	cs_hsi_free_data_msg(hi->data_rx_msg);
+	cs_hsi_free_data_msg(hi->data_tx_msg);
+}
+
+static inline void __cs_hsi_error_pre(struct cs_hsi_iface *hi,
+					struct hsi_msg *msg, const char *info,
+					unsigned int *state)
+{
+	spin_lock(&hi->lock);
+	dev_err(&hi->cl->device, "HSI %s error, msg %d, state %u\n",
+		info, msg->status, *state);
+}
+
+static inline void __cs_hsi_error_post(struct cs_hsi_iface *hi)
+{
+	spin_unlock(&hi->lock);
+}
+
+static inline void __cs_hsi_error_read_bits(unsigned int *state)
+{
+	*state |= SSI_CHANNEL_STATE_ERROR;
+	*state &= ~(SSI_CHANNEL_STATE_READING | SSI_CHANNEL_STATE_POLL);
+}
+
+static inline void __cs_hsi_error_write_bits(unsigned int *state)
+{
+	*state |= SSI_CHANNEL_STATE_ERROR;
+	*state &= ~SSI_CHANNEL_STATE_WRITING;
+}
+
+static void cs_hsi_control_read_error(struct cs_hsi_iface *hi,
+							struct hsi_msg *msg)
+{
+	__cs_hsi_error_pre(hi, msg, "control read", &hi->control_state);
+	cs_release_cmd(msg);
+	__cs_hsi_error_read_bits(&hi->control_state);
+	__cs_hsi_error_post(hi);
+}
+
+static void cs_hsi_control_write_error(struct cs_hsi_iface *hi,
+							struct hsi_msg *msg)
+{
+	__cs_hsi_error_pre(hi, msg, "control write", &hi->control_state);
+	cs_release_cmd(msg);
+	__cs_hsi_error_write_bits(&hi->control_state);
+	__cs_hsi_error_post(hi);
+
+}
+
+static void cs_hsi_data_read_error(struct cs_hsi_iface *hi, struct hsi_msg *msg)
+{
+	__cs_hsi_error_pre(hi, msg, "data read", &hi->data_state);
+	__cs_hsi_error_read_bits(&hi->data_state);
+	__cs_hsi_error_post(hi);
+}
+
+static void cs_hsi_data_write_error(struct cs_hsi_iface *hi,
+							struct hsi_msg *msg)
+{
+	__cs_hsi_error_pre(hi, msg, "data write", &hi->data_state);
+	__cs_hsi_error_write_bits(&hi->data_state);
+	__cs_hsi_error_post(hi);
+}
+
+static void cs_hsi_read_on_control_complete(struct hsi_msg *msg)
+{
+	u32 cmd = cs_get_cmd(msg);
+	struct cs_hsi_iface *hi = msg->context;
+
+	spin_lock(&hi->lock);
+	hi->control_state &= ~SSI_CHANNEL_STATE_READING;
+	if (msg->status == HSI_STATUS_ERROR) {
+		dev_err(&hi->cl->device, "Control RX error detected\n");
+		cs_hsi_control_read_error(hi, msg);
+		spin_unlock(&hi->lock);
+		goto out;
+	}
+	dev_dbg(&hi->cl->device, "Read on control: %08X\n", cmd);
+	cs_release_cmd(msg);
+	if (hi->flags & CS_FEAT_TSTAMP_RX_CTRL) {
+		struct timespec *tstamp =
+			&hi->mmap_cfg->tstamp_rx_ctrl;
+		do_posix_clock_monotonic_gettime(tstamp);
+	}
+	spin_unlock(&hi->lock);
+
+	cs_notify_control(cmd);
+
+out:
+	cs_hsi_read_on_control(hi);
+}
+
+static void cs_hsi_peek_on_control_complete(struct hsi_msg *msg)
+{
+	struct cs_hsi_iface *hi = msg->context;
+	int ret;
+
+	if (msg->status == HSI_STATUS_ERROR) {
+		dev_err(&hi->cl->device, "Control peek RX error detected\n");
+		cs_hsi_control_read_error(hi, msg);
+		return;
+	}
+
+	WARN_ON(!(hi->control_state & SSI_CHANNEL_STATE_READING));
+
+	dev_dbg(&hi->cl->device, "Peek on control complete, reading\n");
+	msg->sgt.nents = 1;
+	msg->complete = cs_hsi_read_on_control_complete;
+	ret = hsi_async_read(hi->cl, msg);
+	if (ret)
+		cs_hsi_control_read_error(hi, msg);
+}
+
+static void cs_hsi_read_on_control(struct cs_hsi_iface *hi)
+{
+	struct hsi_msg *msg;
+	int ret;
+
+	spin_lock(&hi->lock);
+	if (hi->control_state & SSI_CHANNEL_STATE_READING) {
+		dev_err(&hi->cl->device, "Control read already pending (%d)\n",
+			hi->control_state);
+		spin_unlock(&hi->lock);
+		return;
+	}
+	if (hi->control_state & SSI_CHANNEL_STATE_ERROR) {
+		dev_err(&hi->cl->device, "Control read error (%d)\n",
+			hi->control_state);
+		spin_unlock(&hi->lock);
+		return;
+	}
+	hi->control_state |= SSI_CHANNEL_STATE_READING;
+	dev_dbg(&hi->cl->device, "Issuing RX on control\n");
+	msg = cs_claim_cmd(hi);
+	spin_unlock(&hi->lock);
+
+	msg->sgt.nents = 0;
+	msg->complete = cs_hsi_peek_on_control_complete;
+	ret = hsi_async_read(hi->cl, msg);
+	if (ret)
+		cs_hsi_control_read_error(hi, msg);
+}
+
+static void cs_hsi_write_on_control_complete(struct hsi_msg *msg)
+{
+	struct cs_hsi_iface *hi = msg->context;
+	if (msg->status == HSI_STATUS_COMPLETED) {
+		spin_lock(&hi->lock);
+		hi->control_state &= ~SSI_CHANNEL_STATE_WRITING;
+		cs_release_cmd(msg);
+		spin_unlock(&hi->lock);
+	} else if (msg->status == HSI_STATUS_ERROR) {
+		cs_hsi_control_write_error(hi, msg);
+	} else {
+		dev_err(&hi->cl->device,
+			"unexpected status in control write callback %d\n",
+			msg->status);
+	}
+}
+
+static int cs_hsi_write_on_control(struct cs_hsi_iface *hi, u32 message)
+{
+	struct hsi_msg *msg;
+	int ret;
+
+	spin_lock(&hi->lock);
+	if (hi->control_state & SSI_CHANNEL_STATE_ERROR) {
+		spin_unlock(&hi->lock);
+		return -EIO;
+	}
+	if (hi->control_state & SSI_CHANNEL_STATE_WRITING) {
+		dev_err(&hi->cl->device,
+			"Write still pending on control channel.\n");
+		spin_unlock(&hi->lock);
+		return -EBUSY;
+	}
+	hi->control_state |= SSI_CHANNEL_STATE_WRITING;
+	msg = cs_claim_cmd(hi);
+	spin_unlock(&hi->lock);
+
+	cs_set_cmd(msg, message);
+	msg->sgt.nents = 1;
+	msg->complete = cs_hsi_write_on_control_complete;
+	dev_dbg(&hi->cl->device,
+		"Sending control message %08X\n", message);
+	ret = hsi_async_write(hi->cl, msg);
+	if (ret) {
+		dev_err(&hi->cl->device,
+			"async_write failed with %d\n", ret);
+		cs_hsi_control_write_error(hi, msg);
+	}
+
+	/*
+	 * Make sure control read is always pending when issuing
+	 * new control writes. This is needed as the controller
+	 * may flush our messages if e.g. the peer device reboots
+	 * unexpectedly (and we cannot directly resubmit a new read from
+	 * the message destructor; see cs_cmd_destructor()).
+	 */
+	if (!(hi->control_state & SSI_CHANNEL_STATE_READING)) {
+		dev_err(&hi->cl->device, "Restarting control reads\n");
+		cs_hsi_read_on_control(hi);
+	}
+
+	return 0;
+}
+
+static void cs_hsi_read_on_data_complete(struct hsi_msg *msg)
+{
+	struct cs_hsi_iface *hi = msg->context;
+	u32 payload;
+
+	if (unlikely(msg->status == HSI_STATUS_ERROR)) {
+		cs_hsi_data_read_error(hi, msg);
+		return;
+	}
+
+	spin_lock(&hi->lock);
+	WARN_ON(!(hi->data_state & SSI_CHANNEL_STATE_READING));
+	hi->data_state &= ~SSI_CHANNEL_STATE_READING;
+	payload = CS_RX_DATA_RECEIVED;
+	payload |= hi->rx_slot;
+	hi->rx_slot++;
+	hi->rx_slot %= hi->rx_ptr_boundary;
+	/* expose current rx ptr in mmap area */
+	hi->mmap_cfg->rx_ptr = hi->rx_slot;
+	if (unlikely(waitqueue_active(&hi->datawait)))
+		wake_up_interruptible(&hi->datawait);
+	spin_unlock(&hi->lock);
+
+	cs_notify_data(payload, hi->rx_bufs);
+	cs_hsi_read_on_data(hi);
+}
+
+static void cs_hsi_peek_on_data_complete(struct hsi_msg *msg)
+{
+	struct cs_hsi_iface *hi = msg->context;
+	u32 *address;
+	int ret;
+
+	if (unlikely(msg->status == HSI_STATUS_ERROR)) {
+		cs_hsi_data_read_error(hi, msg);
+		return;
+	}
+	if (unlikely(hi->iface_state != CS_STATE_CONFIGURED)) {
+		dev_err(&hi->cl->device, "Data received in invalid state\n");
+		cs_hsi_data_read_error(hi, msg);
+		return;
+	}
+
+	spin_lock(&hi->lock);
+	WARN_ON(!(hi->data_state & SSI_CHANNEL_STATE_POLL));
+	hi->data_state &= ~SSI_CHANNEL_STATE_POLL;
+	hi->data_state |= SSI_CHANNEL_STATE_READING;
+	spin_unlock(&hi->lock);
+
+	address = (u32 *)(hi->mmap_base +
+				hi->rx_offsets[hi->rx_slot % hi->rx_bufs]);
+	sg_init_one(msg->sgt.sgl, address, hi->buf_size);
+	msg->sgt.nents = 1;
+	msg->complete = cs_hsi_read_on_data_complete;
+	ret = hsi_async_read(hi->cl, msg);
+	if (ret)
+		cs_hsi_data_read_error(hi, msg);
+}
+
+/*
+ * Read/write transaction is ongoing. Returns false if in
+ * SSI_CHANNEL_STATE_POLL state.
+ */
+static inline int cs_state_xfer_active(unsigned int state)
+{
+	return (state & SSI_CHANNEL_STATE_WRITING) ||
+		(state & SSI_CHANNEL_STATE_READING);
+}
+
+/*
+ * No pending read/writes
+ */
+static inline int cs_state_idle(unsigned int state)
+{
+	return !(state & ~SSI_CHANNEL_STATE_ERROR);
+}
+
+static void cs_hsi_read_on_data(struct cs_hsi_iface *hi)
+{
+	struct hsi_msg *rxmsg;
+	int ret;
+
+	spin_lock(&hi->lock);
+	if (hi->data_state &
+		(SSI_CHANNEL_STATE_READING | SSI_CHANNEL_STATE_POLL)) {
+		dev_dbg(&hi->cl->device, "Data read already pending (%u)\n",
+			hi->data_state);
+		spin_unlock(&hi->lock);
+		return;
+	}
+	hi->data_state |= SSI_CHANNEL_STATE_POLL;
+	spin_unlock(&hi->lock);
+
+	rxmsg = hi->data_rx_msg;
+	sg_init_one(rxmsg->sgt.sgl, (void *)hi->mmap_base, 0);
+	rxmsg->sgt.nents = 0;
+	rxmsg->complete = cs_hsi_peek_on_data_complete;
+
+	ret = hsi_async_read(hi->cl, rxmsg);
+	if (ret)
+		cs_hsi_data_read_error(hi, rxmsg);
+}
+
+static void cs_hsi_write_on_data_complete(struct hsi_msg *msg)
+{
+	struct cs_hsi_iface *hi = msg->context;
+
+	if (msg->status == HSI_STATUS_COMPLETED) {
+		spin_lock(&hi->lock);
+		hi->data_state &= ~SSI_CHANNEL_STATE_WRITING;
+		if (unlikely(waitqueue_active(&hi->datawait)))
+			wake_up_interruptible(&hi->datawait);
+		spin_unlock(&hi->lock);
+	} else {
+		cs_hsi_data_write_error(hi, msg);
+	}
+}
+
+static int cs_hsi_write_on_data(struct cs_hsi_iface *hi, unsigned int slot)
+{
+	u32 *address;
+	struct hsi_msg *txmsg;
+	int ret;
+
+	spin_lock(&hi->lock);
+	if (hi->iface_state != CS_STATE_CONFIGURED) {
+		dev_err(&hi->cl->device, "Not configured, aborting\n");
+		ret = -EINVAL;
+		goto error;
+	}
+	if (hi->data_state & SSI_CHANNEL_STATE_ERROR) {
+		dev_err(&hi->cl->device, "HSI error, aborting\n");
+		ret = -EIO;
+		goto error;
+	}
+	if (hi->data_state & SSI_CHANNEL_STATE_WRITING) {
+		dev_err(&hi->cl->device, "Write pending on data channel.\n");
+		ret = -EBUSY;
+		goto error;
+	}
+	hi->data_state |= SSI_CHANNEL_STATE_WRITING;
+	spin_unlock(&hi->lock);
+
+	hi->tx_slot = slot;
+	address = (u32 *)(hi->mmap_base + hi->tx_offsets[hi->tx_slot]);
+	txmsg = hi->data_tx_msg;
+	sg_init_one(txmsg->sgt.sgl, address, hi->buf_size);
+	txmsg->complete = cs_hsi_write_on_data_complete;
+	ret = hsi_async_write(hi->cl, txmsg);
+	if (ret)
+		cs_hsi_data_write_error(hi, txmsg);
+
+	return ret;
+
+error:
+	spin_unlock(&hi->lock);
+	if (ret == -EIO)
+		cs_hsi_data_write_error(hi, hi->data_tx_msg);
+
+	return ret;
+}
+
+static unsigned int cs_hsi_get_state(struct cs_hsi_iface *hi)
+{
+	return hi->iface_state;
+}
+
+static int cs_hsi_command(struct cs_hsi_iface *hi, u32 cmd)
+{
+	int ret = 0;
+
+	local_bh_disable();
+	switch (cmd & TARGET_MASK) {
+	case TARGET_REMOTE:
+		ret = cs_hsi_write_on_control(hi, cmd);
+		break;
+	case TARGET_LOCAL:
+		if ((cmd & CS_CMD_MASK) == CS_TX_DATA_READY)
+			ret = cs_hsi_write_on_data(hi, cmd & CS_PARAM_MASK);
+		else
+			ret = -EINVAL;
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	local_bh_enable();
+
+	return ret;
+}
+
+static void cs_hsi_set_wakeline(struct cs_hsi_iface *hi, bool new_state)
+{
+	int change = 0;
+
+	spin_lock_bh(&hi->lock);
+	if (hi->wakeline_state != new_state) {
+		hi->wakeline_state = new_state;
+		change = 1;
+		dev_dbg(&hi->cl->device, "setting wake line to %d (%p)\n",
+			new_state, hi->cl);
+	}
+	spin_unlock_bh(&hi->lock);
+
+	if (change) {
+		if (new_state)
+			ssip_slave_start_tx(hi->master);
+		else
+			ssip_slave_stop_tx(hi->master);
+	}
+
+	dev_dbg(&hi->cl->device, "wake line set to %d (%p)\n",
+		new_state, hi->cl);
+}
+
+static void set_buffer_sizes(struct cs_hsi_iface *hi, int rx_bufs, int tx_bufs)
+{
+	hi->rx_bufs = rx_bufs;
+	hi->tx_bufs = tx_bufs;
+	hi->mmap_cfg->rx_bufs = rx_bufs;
+	hi->mmap_cfg->tx_bufs = tx_bufs;
+
+	if (hi->flags & CS_FEAT_ROLLING_RX_COUNTER) {
+		/*
+		 * For more robust overrun detection, let the rx
+		 * pointer run in range 0..'boundary-1'. Boundary
+		 * is a multiple of rx_bufs, and limited in max size
+		 * by RX_PTR_MAX_SHIFT to allow for fast ptr-diff
+		 * calculation.
+		 */
+		hi->rx_ptr_boundary = (rx_bufs << RX_PTR_BOUNDARY_SHIFT);
+		hi->mmap_cfg->rx_ptr_boundary = hi->rx_ptr_boundary;
+	} else {
+		hi->rx_ptr_boundary = hi->rx_bufs;
+	}
+}
+
+static int check_buf_params(struct cs_hsi_iface *hi,
+					const struct cs_buffer_config *buf_cfg)
+{
+	size_t buf_size_aligned = L1_CACHE_ALIGN(buf_cfg->buf_size) *
+					(buf_cfg->rx_bufs + buf_cfg->tx_bufs);
+	size_t ctrl_size_aligned = L1_CACHE_ALIGN(sizeof(*hi->mmap_cfg));
+	int r = 0;
+
+	if (buf_cfg->rx_bufs > CS_MAX_BUFFERS ||
+					buf_cfg->tx_bufs > CS_MAX_BUFFERS) {
+		r = -EINVAL;
+	} else if ((buf_size_aligned + ctrl_size_aligned) >= hi->mmap_size) {
+		dev_err(&hi->cl->device, "No space for the requested buffer "
+			"configuration\n");
+		r = -ENOBUFS;
+	}
+
+	return r;
+}
+
+/**
+ * Block until pending data transfers have completed.
+ */
+static int cs_hsi_data_sync(struct cs_hsi_iface *hi)
+{
+	int r = 0;
+
+	spin_lock_bh(&hi->lock);
+
+	if (!cs_state_xfer_active(hi->data_state)) {
+		dev_dbg(&hi->cl->device, "hsi_data_sync break, idle\n");
+		goto out;
+	}
+
+	for (;;) {
+		int s;
+		DEFINE_WAIT(wait);
+		if (!cs_state_xfer_active(hi->data_state))
+			goto out;
+		if (signal_pending(current)) {
+			r = -ERESTARTSYS;
+			goto out;
+		}
+		/**
+		 * prepare_to_wait must be called with hi->lock held
+		 * so that callbacks can check for waitqueue_active()
+		 */
+		prepare_to_wait(&hi->datawait, &wait, TASK_INTERRUPTIBLE);
+		spin_unlock_bh(&hi->lock);
+		s = schedule_timeout(
+			msecs_to_jiffies(CS_HSI_TRANSFER_TIMEOUT_MS));
+		spin_lock_bh(&hi->lock);
+		finish_wait(&hi->datawait, &wait);
+		if (!s) {
+			dev_dbg(&hi->cl->device,
+				"hsi_data_sync timeout after %d ms\n",
+				CS_HSI_TRANSFER_TIMEOUT_MS);
+			r = -EIO;
+			goto out;
+		}
+	}
+
+out:
+	spin_unlock_bh(&hi->lock);
+	dev_dbg(&hi->cl->device, "hsi_data_sync done with res %d\n", r);
+
+	return r;
+}
+
+static void cs_hsi_data_enable(struct cs_hsi_iface *hi,
+					struct cs_buffer_config *buf_cfg)
+{
+	unsigned int data_start, i;
+
+	BUG_ON(hi->buf_size == 0);
+
+	set_buffer_sizes(hi, buf_cfg->rx_bufs, buf_cfg->tx_bufs);
+
+	hi->slot_size = L1_CACHE_ALIGN(hi->buf_size);
+	dev_dbg(&hi->cl->device,
+			"setting slot size to %u, buf size %u, align %u\n",
+			hi->slot_size, hi->buf_size, L1_CACHE_BYTES);
+
+	data_start = L1_CACHE_ALIGN(sizeof(*hi->mmap_cfg));
+	dev_dbg(&hi->cl->device,
+			"setting data start at %u, cfg block %u, align %u\n",
+			data_start, sizeof(*hi->mmap_cfg), L1_CACHE_BYTES);
+
+	for (i = 0; i < hi->mmap_cfg->rx_bufs; i++) {
+		hi->rx_offsets[i] = data_start + i * hi->slot_size;
+		hi->mmap_cfg->rx_offsets[i] = hi->rx_offsets[i];
+		dev_dbg(&hi->cl->device, "DL buf #%u at %u\n",
+					i, hi->rx_offsets[i]);
+	}
+	for (i = 0; i < hi->mmap_cfg->tx_bufs; i++) {
+		hi->tx_offsets[i] = data_start +
+			(i + hi->mmap_cfg->rx_bufs) * hi->slot_size;
+		hi->mmap_cfg->tx_offsets[i] = hi->tx_offsets[i];
+		dev_dbg(&hi->cl->device, "UL buf #%u at %u\n",
+					i, hi->rx_offsets[i]);
+	}
+
+	hi->iface_state = CS_STATE_CONFIGURED;
+}
+
+static void cs_hsi_data_disable(struct cs_hsi_iface *hi, int old_state)
+{
+	if (old_state == CS_STATE_CONFIGURED) {
+		dev_dbg(&hi->cl->device,
+			"closing data channel with slot size 0\n");
+		hi->iface_state = CS_STATE_OPENED;
+	}
+}
+
+static int cs_hsi_buf_config(struct cs_hsi_iface *hi,
+					struct cs_buffer_config *buf_cfg)
+{
+	int r = 0;
+	unsigned int old_state = hi->iface_state;
+
+	spin_lock_bh(&hi->lock);
+	/* Prevent new transactions during buffer reconfig */
+	if (old_state == CS_STATE_CONFIGURED)
+		hi->iface_state = CS_STATE_OPENED;
+	spin_unlock_bh(&hi->lock);
+
+	/*
+	 * make sure that no non-zero data reads are ongoing before
+	 * proceeding to change the buffer layout
+	 */
+	r = cs_hsi_data_sync(hi);
+	if (r < 0)
+		return r;
+
+	WARN_ON(cs_state_xfer_active(hi->data_state));
+
+	spin_lock_bh(&hi->lock);
+	r = check_buf_params(hi, buf_cfg);
+	if (r < 0)
+		goto error;
+
+	hi->buf_size = buf_cfg->buf_size;
+	hi->mmap_cfg->buf_size = hi->buf_size;
+	hi->flags = buf_cfg->flags;
+
+	hi->rx_slot = 0;
+	hi->tx_slot = 0;
+	hi->slot_size = 0;
+
+	if (hi->buf_size)
+		cs_hsi_data_enable(hi, buf_cfg);
+	else
+		cs_hsi_data_disable(hi, old_state);
+
+	spin_unlock_bh(&hi->lock);
+
+	if (old_state != hi->iface_state) {
+		if (hi->iface_state == CS_STATE_CONFIGURED) {
+			pm_qos_add_request(&hi->pm_qos_req,
+				PM_QOS_CPU_DMA_LATENCY,
+				CS_QOS_LATENCY_FOR_DATA_USEC);
+			local_bh_disable();
+			cs_hsi_read_on_data(hi);
+			local_bh_enable();
+		} else if (old_state == CS_STATE_CONFIGURED) {
+			pm_qos_remove_request(&hi->pm_qos_req);
+		}
+	}
+	return r;
+
+error:
+	spin_unlock_bh(&hi->lock);
+	return r;
+}
+
+static int cs_hsi_start(struct cs_hsi_iface **hi, struct hsi_client *cl,
+			unsigned long mmap_base, unsigned long mmap_size)
+{
+	int err = 0;
+	struct cs_hsi_iface *hsi_if = kzalloc(sizeof(*hsi_if), GFP_KERNEL);
+
+	dev_dbg(&cl->device, "cs_hsi_start\n");
+
+	if (!hsi_if) {
+		err = -ENOMEM;
+		goto leave0;
+	}
+	spin_lock_init(&hsi_if->lock);
+	hsi_if->cl = cl;
+	hsi_if->iface_state = CS_STATE_CLOSED;
+	hsi_if->mmap_cfg = (struct cs_mmap_config_block *)mmap_base;
+	hsi_if->mmap_base = mmap_base;
+	hsi_if->mmap_size = mmap_size;
+	memset(hsi_if->mmap_cfg, 0, sizeof(*hsi_if->mmap_cfg));
+	init_waitqueue_head(&hsi_if->datawait);
+	err = cs_alloc_cmds(hsi_if);
+	if (err < 0) {
+		dev_err(&cl->device, "Unable to alloc HSI messages\n");
+		goto leave1;
+	}
+	err = cs_hsi_alloc_data(hsi_if);
+	if (err < 0) {
+		dev_err(&cl->device, "Unable to alloc HSI messages for data\n");
+		goto leave2;
+	}
+	err = hsi_claim_port(cl, 1);
+	if (err < 0) {
+		dev_err(&cl->device,
+				"Could not open, HSI port already claimed\n");
+		goto leave3;
+	}
+	hsi_if->master = ssip_slave_get_master(cl);
+	if (IS_ERR(hsi_if->master)) {
+		dev_err(&cl->device, "Could not get HSI master client\n");
+		goto leave4;
+	}
+	if (!ssip_slave_running(hsi_if->master)) {
+		err = -ENODEV;
+		dev_err(&cl->device,
+				"HSI port not initialized\n");
+		goto leave4;
+	}
+
+	hsi_if->iface_state = CS_STATE_OPENED;
+	local_bh_disable();
+	cs_hsi_read_on_control(hsi_if);
+	local_bh_enable();
+
+	dev_dbg(&cl->device, "cs_hsi_start...done\n");
+
+	BUG_ON(!hi);
+	*hi = hsi_if;
+
+	return 0;
+
+leave4:
+	hsi_release_port(cl);
+leave3:
+	cs_hsi_free_data(hsi_if);
+leave2:
+	cs_free_cmds(hsi_if);
+leave1:
+	kfree(hsi_if);
+leave0:
+	dev_dbg(&cl->device, "cs_hsi_start...done/error\n\n");
+
+	return err;
+}
+
+static void cs_hsi_stop(struct cs_hsi_iface *hi)
+{
+	dev_dbg(&hi->cl->device, "cs_hsi_stop\n");
+	cs_hsi_set_wakeline(hi, 0);
+	ssip_slave_put_master(hi->master);
+
+	/* hsi_release_port() needs to be called with CS_STATE_CLOSED */
+	hi->iface_state = CS_STATE_CLOSED;
+	hsi_release_port(hi->cl);
+
+	/*
+	 * hsi_release_port() should flush out all the pending
+	 * messages, so cs_state_idle() should be true for both
+	 * control and data channels.
+	 */
+	WARN_ON(!cs_state_idle(hi->control_state));
+	WARN_ON(!cs_state_idle(hi->data_state));
+
+	if (pm_qos_request_active(&hi->pm_qos_req))
+		pm_qos_remove_request(&hi->pm_qos_req);
+
+	spin_lock_bh(&hi->lock);
+	cs_hsi_free_data(hi);
+	cs_free_cmds(hi);
+	spin_unlock_bh(&hi->lock);
+	kfree(hi);
+}
+
+static int cs_char_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct cs_char *csdata = vma->vm_private_data;
+	struct page *page;
+
+	page = virt_to_page(csdata->mmap_base);
+	get_page(page);
+	vmf->page = page;
+
+	return 0;
+}
+
+static struct vm_operations_struct cs_char_vm_ops = {
+	.fault	= cs_char_vma_fault,
+};
+
+static int cs_char_fasync(int fd, struct file *file, int on)
+{
+	struct cs_char *csdata = file->private_data;
+
+	if (fasync_helper(fd, file, on, &csdata->async_queue) < 0)
+		return -EIO;
+
+	return 0;
+}
+
+static unsigned int cs_char_poll(struct file *file, poll_table *wait)
+{
+	struct cs_char *csdata = file->private_data;
+	unsigned int ret = 0;
+
+	poll_wait(file, &cs_char_data.wait, wait);
+	spin_lock_bh(&csdata->lock);
+	if (!list_empty(&csdata->chardev_queue))
+		ret = POLLIN | POLLRDNORM;
+	else if (!list_empty(&csdata->dataind_queue))
+		ret = POLLIN | POLLRDNORM;
+	spin_unlock_bh(&csdata->lock);
+
+	return ret;
+}
+
+static ssize_t cs_char_read(struct file *file, char __user *buf, size_t count,
+								loff_t *unused)
+{
+	struct cs_char *csdata = file->private_data;
+	u32 data;
+	ssize_t retval;
+
+	if (count < sizeof(data))
+		return -EINVAL;
+
+	for (;;) {
+		DEFINE_WAIT(wait);
+
+		spin_lock_bh(&csdata->lock);
+		if (!list_empty(&csdata->chardev_queue)) {
+			data = cs_pop_entry(&csdata->chardev_queue);
+		} else if (!list_empty(&csdata->dataind_queue)) {
+			data = cs_pop_entry(&csdata->dataind_queue);
+			csdata->dataind_pending--;
+		} else {
+			data = 0;
+		}
+		spin_unlock_bh(&csdata->lock);
+
+		if (data)
+			break;
+		if (file->f_flags & O_NONBLOCK) {
+			retval = -EAGAIN;
+			goto out;
+		} else if (signal_pending(current)) {
+			retval = -ERESTARTSYS;
+			goto out;
+		}
+		prepare_to_wait_exclusive(&csdata->wait, &wait,
+						TASK_INTERRUPTIBLE);
+		schedule();
+		finish_wait(&csdata->wait, &wait);
+	}
+
+	retval = put_user(data, (u32 __user *)buf);
+	if (!retval)
+		retval = sizeof(data);
+
+out:
+	return retval;
+}
+
+static ssize_t cs_char_write(struct file *file, const char __user *buf,
+						size_t count, loff_t *unused)
+{
+	struct cs_char *csdata = file->private_data;
+	u32 data;
+	int err;
+	ssize_t	retval;
+
+	if (count < sizeof(data))
+		return -EINVAL;
+
+	if (get_user(data, (u32 __user *)buf))
+		retval = -EFAULT;
+	else
+		retval = count;
+
+	err = cs_hsi_command(csdata->hi, data);
+	if (err < 0)
+		retval = err;
+
+	return retval;
+}
+
+static long cs_char_ioctl(struct file *file, unsigned int cmd,
+				unsigned long arg)
+{
+	struct cs_char *csdata = file->private_data;
+	int r = 0;
+
+	switch (cmd) {
+	case CS_GET_STATE: {
+		unsigned int state;
+
+		state = cs_hsi_get_state(csdata->hi);
+		if (copy_to_user((void __user *)arg, &state, sizeof(state)))
+			r = -EFAULT;
+
+		break;
+	}
+	case CS_SET_WAKELINE: {
+		unsigned int state;
+
+		if (copy_from_user(&state, (void __user *)arg, sizeof(state))) {
+			r = -EFAULT;
+			break;
+		}
+
+		if (state > 1) {
+			r = -EINVAL;
+			break;
+		}
+
+		cs_hsi_set_wakeline(csdata->hi, !!state);
+
+		break;
+	}
+	case CS_GET_IF_VERSION: {
+		unsigned int ifver = CS_IF_VERSION;
+
+		if (copy_to_user((void __user *)arg, &ifver, sizeof(ifver)))
+			r = -EFAULT;
+
+		break;
+	}
+	case CS_CONFIG_BUFS: {
+		struct cs_buffer_config buf_cfg;
+
+		if (copy_from_user(&buf_cfg, (void __user *)arg,
+							sizeof(buf_cfg)))
+			r = -EFAULT;
+		else
+			r = cs_hsi_buf_config(csdata->hi, &buf_cfg);
+
+		break;
+	}
+	default:
+		r = -ENOTTY;
+		break;
+	}
+
+	return r;
+}
+
+static int cs_char_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	if (vma->vm_end < vma->vm_start)
+		return -EINVAL;
+
+	if (((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) != 1)
+		return -EINVAL;
+
+	vma->vm_flags |= VM_IO | VM_DONTDUMP | VM_DONTEXPAND;
+	vma->vm_ops = &cs_char_vm_ops;
+	vma->vm_private_data = file->private_data;
+
+	return 0;
+}
+
+static int cs_char_open(struct inode *unused, struct file *file)
+{
+	int ret = 0;
+	unsigned long p;
+
+	spin_lock_bh(&cs_char_data.lock);
+	if (cs_char_data.opened) {
+		ret = -EBUSY;
+		spin_unlock_bh(&cs_char_data.lock);
+		goto out1;
+	}
+	cs_char_data.opened = 1;
+	cs_char_data.dataind_pending = 0;
+	spin_unlock_bh(&cs_char_data.lock);
+
+	p = get_zeroed_page(GFP_KERNEL);
+	if (!p) {
+		ret = -ENOMEM;
+		goto out2;
+	}
+
+	ret = cs_hsi_start(&cs_char_data.hi, cs_char_data.cl, p, CS_MMAP_SIZE);
+	if (ret) {
+		dev_err(&cs_char_data.cl->device, "Unable to initialize HSI\n");
+		goto out3;
+	}
+
+	/* these are only used in release so lock not needed */
+	cs_char_data.mmap_base = p;
+	cs_char_data.mmap_size = CS_MMAP_SIZE;
+
+	file->private_data = &cs_char_data;
+
+	return 0;
+
+out3:
+	free_page(p);
+out2:
+	spin_lock_bh(&cs_char_data.lock);
+	cs_char_data.opened = 0;
+	spin_unlock_bh(&cs_char_data.lock);
+out1:
+	return ret;
+}
+
+static void cs_free_char_queue(struct list_head *head)
+{
+	struct char_queue *entry;
+	struct list_head *cursor, *next;
+
+	if (!list_empty(head)) {
+		list_for_each_safe(cursor, next, head) {
+			entry = list_entry(cursor, struct char_queue, list);
+			list_del(&entry->list);
+			kfree(entry);
+		}
+	}
+
+}
+
+static int cs_char_release(struct inode *unused, struct file *file)
+{
+	struct cs_char *csdata = file->private_data;
+
+	cs_hsi_stop(csdata->hi);
+	spin_lock_bh(&csdata->lock);
+	csdata->hi = NULL;
+	free_page(csdata->mmap_base);
+	cs_free_char_queue(&csdata->chardev_queue);
+	cs_free_char_queue(&csdata->dataind_queue);
+	csdata->opened = 0;
+	spin_unlock_bh(&csdata->lock);
+
+	return 0;
+}
+
+static const struct file_operations cs_char_fops = {
+	.owner		= THIS_MODULE,
+	.read		= cs_char_read,
+	.write		= cs_char_write,
+	.poll		= cs_char_poll,
+	.unlocked_ioctl	= cs_char_ioctl,
+	.mmap		= cs_char_mmap,
+	.open		= cs_char_open,
+	.release	= cs_char_release,
+	.fasync		= cs_char_fasync,
+};
+
+static struct miscdevice cs_char_miscdev = {
+	.minor	= MISC_DYNAMIC_MINOR,
+	.name	= "cmt_speech",
+	.fops	= &cs_char_fops
+};
+
+static int cs_hsi_client_probe(struct device *dev)
+{
+	int err = 0;
+	struct hsi_client *cl = to_hsi_client(dev);
+
+	dev_dbg(dev, "hsi_client_probe\n");
+	init_waitqueue_head(&cs_char_data.wait);
+	spin_lock_init(&cs_char_data.lock);
+	cs_char_data.opened = 0;
+	cs_char_data.cl = cl;
+	cs_char_data.hi = NULL;
+	INIT_LIST_HEAD(&cs_char_data.chardev_queue);
+	INIT_LIST_HEAD(&cs_char_data.dataind_queue);
+
+	cs_char_data.channel_id_cmd = hsi_get_channel_id_by_name(cl,
+		"speech-control");
+	if (cs_char_data.channel_id_cmd < 0) {
+		err = cs_char_data.channel_id_cmd;
+		dev_err(dev, "Could not get cmd channel (%d)\n", err);
+		return err;
+	}
+
+	cs_char_data.channel_id_data = hsi_get_channel_id_by_name(cl,
+		"speech-data");
+	if (cs_char_data.channel_id_data < 0) {
+		err = cs_char_data.channel_id_data;
+		dev_err(dev, "Could not get data channel (%d)\n", err);
+		return err;
+	}
+
+	err = misc_register(&cs_char_miscdev);
+	if (err)
+		dev_err(dev, "Failed to register: %d\n", err);
+
+	return err;
+}
+
+static int cs_hsi_client_remove(struct device *dev)
+{
+	struct cs_hsi_iface *hi;
+
+	dev_dbg(dev, "hsi_client_remove\n");
+	misc_deregister(&cs_char_miscdev);
+	spin_lock_bh(&cs_char_data.lock);
+	hi = cs_char_data.hi;
+	cs_char_data.hi = NULL;
+	spin_unlock_bh(&cs_char_data.lock);
+	if (hi)
+		cs_hsi_stop(hi);
+
+	return 0;
+}
+
+static struct hsi_client_driver cs_hsi_driver = {
+	.driver = {
+		.name	= "cmt-speech",
+		.owner	= THIS_MODULE,
+		.probe	= cs_hsi_client_probe,
+		.remove	= cs_hsi_client_remove,
+	},
+};
+
+static int __init cs_char_init(void)
+{
+	pr_info("CMT speech driver added\n");
+	return hsi_register_client_driver(&cs_hsi_driver);
+}
+module_init(cs_char_init);
+
+static void __exit cs_char_exit(void)
+{
+	hsi_unregister_client_driver(&cs_hsi_driver);
+	pr_info("CMT speech driver removed\n");
+}
+module_exit(cs_char_exit);
+
+MODULE_ALIAS("hsi:cmt-speech");
+MODULE_AUTHOR("Kai Vehmanen <kai.vehmanen@nokia.com>");
+MODULE_AUTHOR("Peter Ujfalusi <peter.ujfalusi@nokia.com>");
+MODULE_DESCRIPTION("CMT speech driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/uapi/linux/hsi/Kbuild b/include/uapi/linux/hsi/Kbuild
index 30ab3cd3b8a5..a16a00544258 100644
--- a/include/uapi/linux/hsi/Kbuild
+++ b/include/uapi/linux/hsi/Kbuild
@@ -1,2 +1,2 @@
 # UAPI Header export list
-header-y += hsi_char.h
+header-y += hsi_char.h cs-protocol.h
diff --git a/include/uapi/linux/hsi/cs-protocol.h b/include/uapi/linux/hsi/cs-protocol.h
new file mode 100644
index 000000000000..4957bba57cbe
--- /dev/null
+++ b/include/uapi/linux/hsi/cs-protocol.h
@@ -0,0 +1,113 @@
+/*
+ * cmt-speech interface definitions
+ *
+ * Copyright (C) 2008,2009,2010 Nokia Corporation. All rights reserved.
+ *
+ * Contact: Kai Vehmanen <kai.vehmanen@nokia.com>
+ * Original author: Peter Ujfalusi <peter.ujfalusi@nokia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#ifndef _CS_PROTOCOL_H
+#define _CS_PROTOCOL_H
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+/* chardev parameters */
+#define CS_DEV_FILE_NAME		"/dev/cmt_speech"
+
+/* user-space API versioning */
+#define CS_IF_VERSION			2
+
+/* APE kernel <-> user space messages */
+#define CS_CMD_SHIFT			28
+#define CS_DOMAIN_SHIFT			24
+
+#define CS_CMD_MASK			0xff000000
+#define CS_PARAM_MASK			0xffffff
+
+#define CS_CMD(id, dom) \
+	(((id) << CS_CMD_SHIFT) | ((dom) << CS_DOMAIN_SHIFT))
+
+#define CS_ERROR			CS_CMD(1, 0)
+#define CS_RX_DATA_RECEIVED		CS_CMD(2, 0)
+#define CS_TX_DATA_READY		CS_CMD(3, 0)
+#define CS_TX_DATA_SENT			CS_CMD(4, 0)
+
+/* params to CS_ERROR indication */
+#define CS_ERR_PEER_RESET		0
+
+/* ioctl interface */
+
+/* parameters to CS_CONFIG_BUFS ioctl */
+#define CS_FEAT_TSTAMP_RX_CTRL		(1 << 0)
+#define CS_FEAT_ROLLING_RX_COUNTER	(2 << 0)
+
+/* parameters to CS_GET_STATE ioctl */
+#define CS_STATE_CLOSED			0
+#define CS_STATE_OPENED			1 /* resource allocated */
+#define CS_STATE_CONFIGURED		2 /* data path active */
+
+/* maximum number of TX/RX buffers */
+#define CS_MAX_BUFFERS_SHIFT		4
+#define CS_MAX_BUFFERS			(1 << CS_MAX_BUFFERS_SHIFT)
+
+/* Parameters for setting up the data buffers */
+struct cs_buffer_config {
+	__u32 rx_bufs;	/* number of RX buffer slots */
+	__u32 tx_bufs;	/* number of TX buffer slots */
+	__u32 buf_size;	/* bytes */
+	__u32 flags;	/* see CS_FEAT_* */
+	__u32 reserved[4];
+};
+
+/*
+ * Struct describing the layout and contents of the driver mmap area.
+ * This information is meant as read-only information for the application.
+ */
+struct cs_mmap_config_block {
+	__u32 reserved1;
+	__u32 buf_size;		/* 0=disabled, otherwise the transfer size */
+	__u32 rx_bufs;		/* # of RX buffers */
+	__u32 tx_bufs;		/* # of TX buffers */
+	__u32 reserved2;
+	/* array of offsets within the mmap area for each RX and TX buffer */
+	__u32 rx_offsets[CS_MAX_BUFFERS];
+	__u32 tx_offsets[CS_MAX_BUFFERS];
+	__u32 rx_ptr;
+	__u32 rx_ptr_boundary;
+	__u32 reserved3[2];
+	/*
+	 * if enabled with CS_FEAT_TSTAMP_RX_CTRL, monotonic
+	 * timestamp taken when the last control command was received
+	 */
+	struct timespec tstamp_rx_ctrl;
+};
+
+#define CS_IO_MAGIC		'C'
+
+#define CS_IOW(num, dtype)	_IOW(CS_IO_MAGIC, num, dtype)
+#define CS_IOR(num, dtype)	_IOR(CS_IO_MAGIC, num, dtype)
+#define CS_IOWR(num, dtype)	_IOWR(CS_IO_MAGIC, num, dtype)
+#define CS_IO(num)		_IO(CS_IO_MAGIC, num)
+
+#define CS_GET_STATE		CS_IOR(21, unsigned int)
+#define CS_SET_WAKELINE		CS_IOW(23, unsigned int)
+#define CS_GET_IF_VERSION	CS_IOR(30, unsigned int)
+#define CS_CONFIG_BUFS		CS_IOW(31, struct cs_buffer_config)
+
+#endif /* _CS_PROTOCOL_H */
-- 
cgit 


From 761da2935d6e18d178582dbdf315a3a458555505 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 26 Mar 2015 12:39:36 +0000
Subject: netfilter: nf_tables: add set timeout API support

Add set timeout support to the netlink API. Sets with timeout support
enabled can have a default timeout value and garbage collection interval
specified.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h        |  9 +++++++++
 include/uapi/linux/netfilter/nf_tables.h |  6 ++++++
 net/netfilter/nf_tables_api.c            | 30 ++++++++++++++++++++++++++++--
 3 files changed, 43 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index b8cd60dcb4e1..8936803a2ad5 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -258,6 +258,8 @@ void nft_unregister_set(struct nft_set_ops *ops);
  * 	@dtype: data type (verdict or numeric type defined by userspace)
  * 	@size: maximum set size
  * 	@nelems: number of elements
+ * 	@timeout: default timeout value in msecs
+ * 	@gc_int: garbage collection interval in msecs
  *	@policy: set parameterization (see enum nft_set_policies)
  * 	@ops: set ops
  * 	@pnet: network namespace
@@ -274,6 +276,8 @@ struct nft_set {
 	u32				dtype;
 	u32				size;
 	u32				nelems;
+	u64				timeout;
+	u32				gc_int;
 	u16				policy;
 	/* runtime data below here */
 	const struct nft_set_ops	*ops ____cacheline_aligned;
@@ -295,6 +299,11 @@ struct nft_set *nf_tables_set_lookup(const struct nft_table *table,
 struct nft_set *nf_tables_set_lookup_byid(const struct net *net,
 					  const struct nlattr *nla);
 
+static inline unsigned long nft_set_gc_interval(const struct nft_set *set)
+{
+	return set->gc_int ? msecs_to_jiffies(set->gc_int) : HZ;
+}
+
 /**
  *	struct nft_set_binding - nf_tables set binding
  *
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index b9783931503b..971d245e7378 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -208,12 +208,14 @@ enum nft_rule_compat_attributes {
  * @NFT_SET_CONSTANT: set contents may not change while bound
  * @NFT_SET_INTERVAL: set contains intervals
  * @NFT_SET_MAP: set is used as a dictionary
+ * @NFT_SET_TIMEOUT: set uses timeouts
  */
 enum nft_set_flags {
 	NFT_SET_ANONYMOUS		= 0x1,
 	NFT_SET_CONSTANT		= 0x2,
 	NFT_SET_INTERVAL		= 0x4,
 	NFT_SET_MAP			= 0x8,
+	NFT_SET_TIMEOUT			= 0x10,
 };
 
 /**
@@ -252,6 +254,8 @@ enum nft_set_desc_attributes {
  * @NFTA_SET_POLICY: selection policy (NLA_U32)
  * @NFTA_SET_DESC: set description (NLA_NESTED)
  * @NFTA_SET_ID: uniquely identifies a set in a transaction (NLA_U32)
+ * @NFTA_SET_TIMEOUT: default timeout value (NLA_U64)
+ * @NFTA_SET_GC_INTERVAL: garbage collection interval (NLA_U32)
  */
 enum nft_set_attributes {
 	NFTA_SET_UNSPEC,
@@ -265,6 +269,8 @@ enum nft_set_attributes {
 	NFTA_SET_POLICY,
 	NFTA_SET_DESC,
 	NFTA_SET_ID,
+	NFTA_SET_TIMEOUT,
+	NFTA_SET_GC_INTERVAL,
 	__NFTA_SET_MAX
 };
 #define NFTA_SET_MAX		(__NFTA_SET_MAX - 1)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 5604c2df05d1..6320b64e773e 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2216,6 +2216,8 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = {
 	[NFTA_SET_POLICY]		= { .type = NLA_U32 },
 	[NFTA_SET_DESC]			= { .type = NLA_NESTED },
 	[NFTA_SET_ID]			= { .type = NLA_U32 },
+	[NFTA_SET_TIMEOUT]		= { .type = NLA_U64 },
+	[NFTA_SET_GC_INTERVAL]		= { .type = NLA_U32 },
 };
 
 static const struct nla_policy nft_set_desc_policy[NFTA_SET_DESC_MAX + 1] = {
@@ -2366,6 +2368,13 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
 			goto nla_put_failure;
 	}
 
+	if (set->timeout &&
+	    nla_put_be64(skb, NFTA_SET_TIMEOUT, cpu_to_be64(set->timeout)))
+		goto nla_put_failure;
+	if (set->gc_int &&
+	    nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(set->gc_int)))
+		goto nla_put_failure;
+
 	if (set->policy != NFT_SET_POL_PERFORMANCE) {
 		if (nla_put_be32(skb, NFTA_SET_POLICY, htonl(set->policy)))
 			goto nla_put_failure;
@@ -2578,7 +2587,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
 	char name[IFNAMSIZ];
 	unsigned int size;
 	bool create;
-	u32 ktype, dtype, flags, policy;
+	u64 timeout;
+	u32 ktype, dtype, flags, policy, gc_int;
 	struct nft_set_desc desc;
 	int err;
 
@@ -2605,7 +2615,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
 	if (nla[NFTA_SET_FLAGS] != NULL) {
 		flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
 		if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT |
-			      NFT_SET_INTERVAL | NFT_SET_MAP))
+			      NFT_SET_INTERVAL | NFT_SET_MAP |
+			      NFT_SET_TIMEOUT))
 			return -EINVAL;
 	}
 
@@ -2631,6 +2642,19 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
 	} else if (flags & NFT_SET_MAP)
 		return -EINVAL;
 
+	timeout = 0;
+	if (nla[NFTA_SET_TIMEOUT] != NULL) {
+		if (!(flags & NFT_SET_TIMEOUT))
+			return -EINVAL;
+		timeout = be64_to_cpu(nla_get_be64(nla[NFTA_SET_TIMEOUT]));
+	}
+	gc_int = 0;
+	if (nla[NFTA_SET_GC_INTERVAL] != NULL) {
+		if (!(flags & NFT_SET_TIMEOUT))
+			return -EINVAL;
+		gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
+	}
+
 	policy = NFT_SET_POL_PERFORMANCE;
 	if (nla[NFTA_SET_POLICY] != NULL)
 		policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
@@ -2699,6 +2723,8 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
 	set->flags = flags;
 	set->size  = desc.size;
 	set->policy = policy;
+	set->timeout = timeout;
+	set->gc_int = gc_int;
 
 	err = ops->init(set, &desc, nla);
 	if (err < 0)
-- 
cgit 


From c3e1b005ed1cc068fc9d454a6e745830d55d251d Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 26 Mar 2015 12:39:37 +0000
Subject: netfilter: nf_tables: add set element timeout support

Add API support for set element timeouts. Elements can have a individual
timeout value specified, overriding the sets' default.

Two new extension types are used for timeouts - the timeout value and
the expiration time. The timeout value only exists if it differs from
the default value.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h        | 20 ++++++++++++
 include/uapi/linux/netfilter/nf_tables.h |  4 +++
 net/netfilter/nf_tables_api.c            | 53 ++++++++++++++++++++++++++++++--
 3 files changed, 75 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 8936803a2ad5..f2726c537248 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -329,12 +329,16 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
  *	@NFT_SET_EXT_KEY: element key
  *	@NFT_SET_EXT_DATA: mapping data
  *	@NFT_SET_EXT_FLAGS: element flags
+ *	@NFT_SET_EXT_TIMEOUT: element timeout
+ *	@NFT_SET_EXT_EXPIRATION: element expiration time
  *	@NFT_SET_EXT_NUM: number of extension types
  */
 enum nft_set_extensions {
 	NFT_SET_EXT_KEY,
 	NFT_SET_EXT_DATA,
 	NFT_SET_EXT_FLAGS,
+	NFT_SET_EXT_TIMEOUT,
+	NFT_SET_EXT_EXPIRATION,
 	NFT_SET_EXT_NUM
 };
 
@@ -431,6 +435,22 @@ static inline u8 *nft_set_ext_flags(const struct nft_set_ext *ext)
 	return nft_set_ext(ext, NFT_SET_EXT_FLAGS);
 }
 
+static inline u64 *nft_set_ext_timeout(const struct nft_set_ext *ext)
+{
+	return nft_set_ext(ext, NFT_SET_EXT_TIMEOUT);
+}
+
+static inline unsigned long *nft_set_ext_expiration(const struct nft_set_ext *ext)
+{
+	return nft_set_ext(ext, NFT_SET_EXT_EXPIRATION);
+}
+
+static inline bool nft_set_elem_expired(const struct nft_set_ext *ext)
+{
+	return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) &&
+	       time_is_before_eq_jiffies(*nft_set_ext_expiration(ext));
+}
+
 static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set,
 						   void *elem)
 {
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 971d245e7378..83441cc4594b 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -290,12 +290,16 @@ enum nft_set_elem_flags {
  * @NFTA_SET_ELEM_KEY: key value (NLA_NESTED: nft_data)
  * @NFTA_SET_ELEM_DATA: data value of mapping (NLA_NESTED: nft_data_attributes)
  * @NFTA_SET_ELEM_FLAGS: bitmask of nft_set_elem_flags (NLA_U32)
+ * @NFTA_SET_ELEM_TIMEOUT: timeout value (NLA_U64)
+ * @NFTA_SET_ELEM_EXPIRATION: expiration time (NLA_U64)
  */
 enum nft_set_elem_attributes {
 	NFTA_SET_ELEM_UNSPEC,
 	NFTA_SET_ELEM_KEY,
 	NFTA_SET_ELEM_DATA,
 	NFTA_SET_ELEM_FLAGS,
+	NFTA_SET_ELEM_TIMEOUT,
+	NFTA_SET_ELEM_EXPIRATION,
 	__NFTA_SET_ELEM_MAX
 };
 #define NFTA_SET_ELEM_MAX	(__NFTA_SET_ELEM_MAX - 1)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 6320b64e773e..9e032dbc149c 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2863,6 +2863,14 @@ const struct nft_set_ext_type nft_set_ext_types[] = {
 		.len	= sizeof(u8),
 		.align	= __alignof__(u8),
 	},
+	[NFT_SET_EXT_TIMEOUT]		= {
+		.len	= sizeof(u64),
+		.align	= __alignof__(u64),
+	},
+	[NFT_SET_EXT_EXPIRATION]	= {
+		.len	= sizeof(unsigned long),
+		.align	= __alignof__(unsigned long),
+	},
 };
 EXPORT_SYMBOL_GPL(nft_set_ext_types);
 
@@ -2874,6 +2882,7 @@ static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = {
 	[NFTA_SET_ELEM_KEY]		= { .type = NLA_NESTED },
 	[NFTA_SET_ELEM_DATA]		= { .type = NLA_NESTED },
 	[NFTA_SET_ELEM_FLAGS]		= { .type = NLA_U32 },
+	[NFTA_SET_ELEM_TIMEOUT]		= { .type = NLA_U64 },
 };
 
 static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = {
@@ -2935,6 +2944,25 @@ static int nf_tables_fill_setelem(struct sk_buff *skb,
 		         htonl(*nft_set_ext_flags(ext))))
 		goto nla_put_failure;
 
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) &&
+	    nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT,
+			 cpu_to_be64(*nft_set_ext_timeout(ext))))
+		goto nla_put_failure;
+
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
+		unsigned long expires, now = jiffies;
+
+		expires = *nft_set_ext_expiration(ext);
+		if (time_before(now, expires))
+			expires -= now;
+		else
+			expires = 0;
+
+		if (nla_put_be64(skb, NFTA_SET_ELEM_EXPIRATION,
+				 cpu_to_be64(jiffies_to_msecs(expires))))
+			goto nla_put_failure;
+	}
+
 	nla_nest_end(skb, nest);
 	return 0;
 
@@ -3158,7 +3186,7 @@ static void *nft_set_elem_init(const struct nft_set *set,
 			       const struct nft_set_ext_tmpl *tmpl,
 			       const struct nft_data *key,
 			       const struct nft_data *data,
-			       gfp_t gfp)
+			       u64 timeout, gfp_t gfp)
 {
 	struct nft_set_ext *ext;
 	void *elem;
@@ -3173,6 +3201,11 @@ static void *nft_set_elem_init(const struct nft_set *set,
 	memcpy(nft_set_ext_key(ext), key, set->klen);
 	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
 		memcpy(nft_set_ext_data(ext), data, set->dlen);
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION))
+		*nft_set_ext_expiration(ext) =
+			jiffies + msecs_to_jiffies(timeout);
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT))
+		*nft_set_ext_timeout(ext) = timeout;
 
 	return elem;
 }
@@ -3201,6 +3234,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 	struct nft_data data;
 	enum nft_registers dreg;
 	struct nft_trans *trans;
+	u64 timeout;
 	u32 flags;
 	int err;
 
@@ -3241,6 +3275,15 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 			return -EINVAL;
 	}
 
+	timeout = 0;
+	if (nla[NFTA_SET_ELEM_TIMEOUT] != NULL) {
+		if (!(set->flags & NFT_SET_TIMEOUT))
+			return -EINVAL;
+		timeout = be64_to_cpu(nla_get_be64(nla[NFTA_SET_ELEM_TIMEOUT]));
+	} else if (set->flags & NFT_SET_TIMEOUT) {
+		timeout = set->timeout;
+	}
+
 	err = nft_data_init(ctx, &elem.key, &d1, nla[NFTA_SET_ELEM_KEY]);
 	if (err < 0)
 		goto err1;
@@ -3249,6 +3292,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 		goto err2;
 
 	nft_set_ext_add(&tmpl, NFT_SET_EXT_KEY);
+	if (timeout > 0) {
+		nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION);
+		if (timeout != set->timeout)
+			nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
+	}
 
 	if (nla[NFTA_SET_ELEM_DATA] != NULL) {
 		err = nft_data_init(ctx, &data, &d2, nla[NFTA_SET_ELEM_DATA]);
@@ -3277,7 +3325,8 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 	}
 
 	err = -ENOMEM;
-	elem.priv = nft_set_elem_init(set, &tmpl, &elem.key, &data, GFP_KERNEL);
+	elem.priv = nft_set_elem_init(set, &tmpl, &elem.key, &data,
+				      timeout, GFP_KERNEL);
 	if (elem.priv == NULL)
 		goto err3;
 
-- 
cgit 


From a5581ef4c2eac6449188862e903eb46c7233582a Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Wed, 1 Apr 2015 07:50:29 +0200
Subject: can: introduce new raw socket option to join the given CAN filters

The CAN_RAW socket can set multiple CAN identifier specific filters that lead
to multiple filters in the af_can.c filter processing. These filters are
indenpendent from each other which leads to logical OR'ed filters when applied.

This socket option joines the given CAN filters in the way that only CAN frames
are passed to user space that matched *all* given CAN filters. The semantic for
the applied filters is therefore changed to a logical AND.

This is useful especially when the filterset is a combination of filters where
the CAN_INV_FILTER flag is set in order to notch single CAN IDs or CAN ID
ranges from the incoming traffic.

As the raw_rcv() function is executed from NET_RX softirq the introduced
variables are implemented as per-CPU variables to avoid extensive locking at
CAN frame reception time.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 Documentation/networking/can.txt | 20 ++++++++++++++++++--
 include/uapi/linux/can/raw.h     |  1 +
 net/can/raw.c                    | 31 ++++++++++++++++++++++++++++++-
 3 files changed, 49 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/networking/can.txt b/Documentation/networking/can.txt
index 0a2859a8ee7e..5abad1e921ca 100644
--- a/Documentation/networking/can.txt
+++ b/Documentation/networking/can.txt
@@ -22,7 +22,8 @@ This file contains
       4.1.3 RAW socket option CAN_RAW_LOOPBACK
       4.1.4 RAW socket option CAN_RAW_RECV_OWN_MSGS
       4.1.5 RAW socket option CAN_RAW_FD_FRAMES
-      4.1.6 RAW socket returned message flags
+      4.1.6 RAW socket option CAN_RAW_JOIN_FILTERS
+      4.1.7 RAW socket returned message flags
     4.2 Broadcast Manager protocol sockets (SOCK_DGRAM)
       4.2.1 Broadcast Manager operations
       4.2.2 Broadcast Manager message flags
@@ -601,7 +602,22 @@ solution for a couple of reasons:
   CAN FD frames by checking if the device maximum transfer unit is CANFD_MTU.
   The CAN device MTU can be retrieved e.g. with a SIOCGIFMTU ioctl() syscall.
 
-  4.1.6 RAW socket returned message flags
+  4.1.6 RAW socket option CAN_RAW_JOIN_FILTERS
+
+  The CAN_RAW socket can set multiple CAN identifier specific filters that
+  lead to multiple filters in the af_can.c filter processing. These filters
+  are indenpendent from each other which leads to logical OR'ed filters when
+  applied (see 4.1.1).
+
+  This socket option joines the given CAN filters in the way that only CAN
+  frames are passed to user space that matched *all* given CAN filters. The
+  semantic for the applied filters is therefore changed to a logical AND.
+
+  This is useful especially when the filterset is a combination of filters
+  where the CAN_INV_FILTER flag is set in order to notch single CAN IDs or
+  CAN ID ranges from the incoming traffic.
+
+  4.1.7 RAW socket returned message flags
 
   When using recvmsg() call, the msg->msg_flags may contain following flags:
 
diff --git a/include/uapi/linux/can/raw.h b/include/uapi/linux/can/raw.h
index 78ec76fd89a6..8735f1080385 100644
--- a/include/uapi/linux/can/raw.h
+++ b/include/uapi/linux/can/raw.h
@@ -57,6 +57,7 @@ enum {
 	CAN_RAW_LOOPBACK,	/* local loopback (default:on)       */
 	CAN_RAW_RECV_OWN_MSGS,	/* receive my own msgs (default:off) */
 	CAN_RAW_FD_FRAMES,	/* allow CAN FD frames (default:off) */
+	CAN_RAW_JOIN_FILTERS,	/* all filters must match to trigger */
 };
 
 #endif /* !_UAPI_CAN_RAW_H */
diff --git a/net/can/raw.c b/net/can/raw.c
index 0c8d537b59b8..31b9748cbb4e 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -77,6 +77,7 @@ MODULE_ALIAS("can-proto-1");
 struct uniqframe {
 	ktime_t tstamp;
 	const struct sk_buff *skb;
+	unsigned int join_rx_count;
 };
 
 struct raw_sock {
@@ -87,6 +88,7 @@ struct raw_sock {
 	int loopback;
 	int recv_own_msgs;
 	int fd_frames;
+	int join_filters;
 	int count;                 /* number of active filters */
 	struct can_filter dfilter; /* default/single filter */
 	struct can_filter *filter; /* pointer to filter(s) */
@@ -132,10 +134,21 @@ static void raw_rcv(struct sk_buff *oskb, void *data)
 	/* eliminate multiple filter matches for the same skb */
 	if (this_cpu_ptr(ro->uniq)->skb == oskb &&
 	    ktime_equal(this_cpu_ptr(ro->uniq)->tstamp, oskb->tstamp)) {
-		return;
+		if (ro->join_filters) {
+			this_cpu_inc(ro->uniq->join_rx_count);
+			/* drop frame until all enabled filters matched */
+			if (this_cpu_ptr(ro->uniq)->join_rx_count < ro->count)
+				return;
+		} else {
+			return;
+		}
 	} else {
 		this_cpu_ptr(ro->uniq)->skb = oskb;
 		this_cpu_ptr(ro->uniq)->tstamp = oskb->tstamp;
+		this_cpu_ptr(ro->uniq)->join_rx_count = 1;
+		/* drop first frame to check all enabled filters? */
+		if (ro->join_filters && ro->count > 1)
+			return;
 	}
 
 	/* clone the given skb to be able to enqueue it into the rcv queue */
@@ -311,6 +324,7 @@ static int raw_init(struct sock *sk)
 	ro->loopback         = 1;
 	ro->recv_own_msgs    = 0;
 	ro->fd_frames        = 0;
+	ro->join_filters     = 0;
 
 	/* alloc_percpu provides zero'ed memory */
 	ro->uniq = alloc_percpu(struct uniqframe);
@@ -604,6 +618,15 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
 
 		break;
 
+	case CAN_RAW_JOIN_FILTERS:
+		if (optlen != sizeof(ro->join_filters))
+			return -EINVAL;
+
+		if (copy_from_user(&ro->join_filters, optval, optlen))
+			return -EFAULT;
+
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
@@ -668,6 +691,12 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
 		val = &ro->fd_frames;
 		break;
 
+	case CAN_RAW_JOIN_FILTERS:
+		if (len > sizeof(int))
+			len = sizeof(int);
+		val = &ro->join_filters;
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
-- 
cgit 


From 2541517c32be2531e0da59dfd7efc1ce844644f5 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Wed, 25 Mar 2015 12:49:20 -0700
Subject: tracing, perf: Implement BPF programs attached to kprobes

BPF programs, attached to kprobes, provide a safe way to execute
user-defined BPF byte-code programs without being able to crash or
hang the kernel in any way. The BPF engine makes sure that such
programs have a finite execution time and that they cannot break
out of their sandbox.

The user interface is to attach to a kprobe via the perf syscall:

	struct perf_event_attr attr = {
		.type	= PERF_TYPE_TRACEPOINT,
		.config	= event_id,
		...
	};

	event_fd = perf_event_open(&attr,...);
	ioctl(event_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);

'prog_fd' is a file descriptor associated with BPF program
previously loaded.

'event_id' is an ID of the kprobe created.

Closing 'event_fd':

	close(event_fd);

... automatically detaches BPF program from it.

BPF programs can call in-kernel helper functions to:

  - lookup/update/delete elements in maps

  - probe_read - wraper of probe_kernel_read() used to access any
    kernel data structures

BPF programs receive 'struct pt_regs *' as an input ('struct pt_regs' is
architecture dependent) and return 0 to ignore the event and 1 to store
kprobe event into the ring buffer.

Note, kprobes are a fundamentally _not_ a stable kernel ABI,
so BPF programs attached to kprobes must be recompiled for
every kernel version and user must supply correct LINUX_VERSION_CODE
in attr.kern_version during bpf_prog_load() call.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Reviewed-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David S. Miller <davem@davemloft.net>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1427312966-8434-4-git-send-email-ast@plumgrid.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/ftrace_event.h    |  11 ++++
 include/uapi/linux/bpf.h        |   3 +
 include/uapi/linux/perf_event.h |   1 +
 kernel/bpf/syscall.c            |   7 ++-
 kernel/events/core.c            |  59 ++++++++++++++++++
 kernel/trace/Makefile           |   1 +
 kernel/trace/bpf_trace.c        | 130 ++++++++++++++++++++++++++++++++++++++++
 kernel/trace/trace_kprobe.c     |   8 +++
 8 files changed, 219 insertions(+), 1 deletion(-)
 create mode 100644 kernel/trace/bpf_trace.c

(limited to 'include/uapi/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 77325e1a1816..0aa535bc9f05 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -13,6 +13,7 @@ struct trace_array;
 struct trace_buffer;
 struct tracer;
 struct dentry;
+struct bpf_prog;
 
 struct trace_print_flags {
 	unsigned long		mask;
@@ -306,6 +307,7 @@ struct ftrace_event_call {
 #ifdef CONFIG_PERF_EVENTS
 	int				perf_refcount;
 	struct hlist_head __percpu	*perf_events;
+	struct bpf_prog			*prog;
 
 	int	(*perf_perm)(struct ftrace_event_call *,
 			     struct perf_event *);
@@ -551,6 +553,15 @@ event_trigger_unlock_commit_regs(struct ftrace_event_file *file,
 		event_triggers_post_call(file, tt);
 }
 
+#ifdef CONFIG_BPF_SYSCALL
+unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx);
+#else
+static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
+{
+	return 1;
+}
+#endif
+
 enum {
 	FILTER_OTHER = 0,
 	FILTER_STATIC_STRING,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 45da7ec7d274..b2948feeb70b 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -118,6 +118,7 @@ enum bpf_map_type {
 enum bpf_prog_type {
 	BPF_PROG_TYPE_UNSPEC,
 	BPF_PROG_TYPE_SOCKET_FILTER,
+	BPF_PROG_TYPE_KPROBE,
 };
 
 /* flags for BPF_MAP_UPDATE_ELEM command */
@@ -151,6 +152,7 @@ union bpf_attr {
 		__u32		log_level;	/* verbosity level of verifier */
 		__u32		log_size;	/* size of user buffer */
 		__aligned_u64	log_buf;	/* user supplied buffer */
+		__u32		kern_version;	/* checked when prog_type=kprobe */
 	};
 } __attribute__((aligned(8)));
 
@@ -162,6 +164,7 @@ enum bpf_func_id {
 	BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */
 	BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */
 	BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */
+	BPF_FUNC_probe_read,      /* int bpf_probe_read(void *dst, int size, void *src) */
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 3bb40ddadbe5..91803e54ee73 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -381,6 +381,7 @@ struct perf_event_attr {
 #define PERF_EVENT_IOC_SET_OUTPUT	_IO ('$', 5)
 #define PERF_EVENT_IOC_SET_FILTER	_IOW('$', 6, char *)
 #define PERF_EVENT_IOC_ID		_IOR('$', 7, __u64 *)
+#define PERF_EVENT_IOC_SET_BPF		_IOW('$', 8, __u32)
 
 enum perf_event_ioc_flags {
 	PERF_IOC_FLAG_GROUP		= 1U << 0,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 536edc2be307..504c10b990ef 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -16,6 +16,7 @@
 #include <linux/file.h>
 #include <linux/license.h>
 #include <linux/filter.h>
+#include <linux/version.h>
 
 static LIST_HEAD(bpf_map_types);
 
@@ -467,7 +468,7 @@ struct bpf_prog *bpf_prog_get(u32 ufd)
 }
 
 /* last field in 'union bpf_attr' used by this command */
-#define	BPF_PROG_LOAD_LAST_FIELD log_buf
+#define	BPF_PROG_LOAD_LAST_FIELD kern_version
 
 static int bpf_prog_load(union bpf_attr *attr)
 {
@@ -492,6 +493,10 @@ static int bpf_prog_load(union bpf_attr *attr)
 	if (attr->insn_cnt >= BPF_MAXINSNS)
 		return -EINVAL;
 
+	if (type == BPF_PROG_TYPE_KPROBE &&
+	    attr->kern_version != LINUX_VERSION_CODE)
+		return -EINVAL;
+
 	/* plain bpf_prog allocation */
 	prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
 	if (!prog)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index c40c2cac2d8e..5c13862d3e85 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -42,6 +42,8 @@
 #include <linux/module.h>
 #include <linux/mman.h>
 #include <linux/compat.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
 
 #include "internal.h"
 
@@ -3407,6 +3409,7 @@ errout:
 }
 
 static void perf_event_free_filter(struct perf_event *event);
+static void perf_event_free_bpf_prog(struct perf_event *event);
 
 static void free_event_rcu(struct rcu_head *head)
 {
@@ -3416,6 +3419,7 @@ static void free_event_rcu(struct rcu_head *head)
 	if (event->ns)
 		put_pid_ns(event->ns);
 	perf_event_free_filter(event);
+	perf_event_free_bpf_prog(event);
 	kfree(event);
 }
 
@@ -3928,6 +3932,7 @@ static inline int perf_fget_light(int fd, struct fd *p)
 static int perf_event_set_output(struct perf_event *event,
 				 struct perf_event *output_event);
 static int perf_event_set_filter(struct perf_event *event, void __user *arg);
+static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd);
 
 static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned long arg)
 {
@@ -3981,6 +3986,9 @@ static long _perf_ioctl(struct perf_event *event, unsigned int cmd, unsigned lon
 	case PERF_EVENT_IOC_SET_FILTER:
 		return perf_event_set_filter(event, (void __user *)arg);
 
+	case PERF_EVENT_IOC_SET_BPF:
+		return perf_event_set_bpf_prog(event, arg);
+
 	default:
 		return -ENOTTY;
 	}
@@ -6455,6 +6463,49 @@ static void perf_event_free_filter(struct perf_event *event)
 	ftrace_profile_free_filter(event);
 }
 
+static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
+{
+	struct bpf_prog *prog;
+
+	if (event->attr.type != PERF_TYPE_TRACEPOINT)
+		return -EINVAL;
+
+	if (event->tp_event->prog)
+		return -EEXIST;
+
+	if (!(event->tp_event->flags & TRACE_EVENT_FL_KPROBE))
+		/* bpf programs can only be attached to kprobes */
+		return -EINVAL;
+
+	prog = bpf_prog_get(prog_fd);
+	if (IS_ERR(prog))
+		return PTR_ERR(prog);
+
+	if (prog->aux->prog_type != BPF_PROG_TYPE_KPROBE) {
+		/* valid fd, but invalid bpf program type */
+		bpf_prog_put(prog);
+		return -EINVAL;
+	}
+
+	event->tp_event->prog = prog;
+
+	return 0;
+}
+
+static void perf_event_free_bpf_prog(struct perf_event *event)
+{
+	struct bpf_prog *prog;
+
+	if (!event->tp_event)
+		return;
+
+	prog = event->tp_event->prog;
+	if (prog) {
+		event->tp_event->prog = NULL;
+		bpf_prog_put(prog);
+	}
+}
+
 #else
 
 static inline void perf_tp_register(void)
@@ -6470,6 +6521,14 @@ static void perf_event_free_filter(struct perf_event *event)
 {
 }
 
+static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
+{
+	return -ENOENT;
+}
+
+static void perf_event_free_bpf_prog(struct perf_event *event)
+{
+}
 #endif /* CONFIG_EVENT_TRACING */
 
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 98f26588255e..c575a300103b 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -53,6 +53,7 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
 endif
 obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
 obj-$(CONFIG_EVENT_TRACING) += trace_events_trigger.o
+obj-$(CONFIG_BPF_SYSCALL) += bpf_trace.o
 obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
 obj-$(CONFIG_TRACEPOINTS) += power-traces.o
 ifeq ($(CONFIG_PM),y)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
new file mode 100644
index 000000000000..f1e87da91da3
--- /dev/null
+++ b/kernel/trace/bpf_trace.c
@@ -0,0 +1,130 @@
+/* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <linux/uaccess.h>
+#include "trace.h"
+
+static DEFINE_PER_CPU(int, bpf_prog_active);
+
+/**
+ * trace_call_bpf - invoke BPF program
+ * @prog: BPF program
+ * @ctx: opaque context pointer
+ *
+ * kprobe handlers execute BPF programs via this helper.
+ * Can be used from static tracepoints in the future.
+ *
+ * Return: BPF programs always return an integer which is interpreted by
+ * kprobe handler as:
+ * 0 - return from kprobe (event is filtered out)
+ * 1 - store kprobe event into ring buffer
+ * Other values are reserved and currently alias to 1
+ */
+unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx)
+{
+	unsigned int ret;
+
+	if (in_nmi()) /* not supported yet */
+		return 1;
+
+	preempt_disable();
+
+	if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) {
+		/*
+		 * since some bpf program is already running on this cpu,
+		 * don't call into another bpf program (same or different)
+		 * and don't send kprobe event into ring-buffer,
+		 * so return zero here
+		 */
+		ret = 0;
+		goto out;
+	}
+
+	rcu_read_lock();
+	ret = BPF_PROG_RUN(prog, ctx);
+	rcu_read_unlock();
+
+ out:
+	__this_cpu_dec(bpf_prog_active);
+	preempt_enable();
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(trace_call_bpf);
+
+static u64 bpf_probe_read(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	void *dst = (void *) (long) r1;
+	int size = (int) r2;
+	void *unsafe_ptr = (void *) (long) r3;
+
+	return probe_kernel_read(dst, unsafe_ptr, size);
+}
+
+static const struct bpf_func_proto bpf_probe_read_proto = {
+	.func		= bpf_probe_read,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_STACK,
+	.arg2_type	= ARG_CONST_STACK_SIZE,
+	.arg3_type	= ARG_ANYTHING,
+};
+
+static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id)
+{
+	switch (func_id) {
+	case BPF_FUNC_map_lookup_elem:
+		return &bpf_map_lookup_elem_proto;
+	case BPF_FUNC_map_update_elem:
+		return &bpf_map_update_elem_proto;
+	case BPF_FUNC_map_delete_elem:
+		return &bpf_map_delete_elem_proto;
+	case BPF_FUNC_probe_read:
+		return &bpf_probe_read_proto;
+	default:
+		return NULL;
+	}
+}
+
+/* bpf+kprobe programs can access fields of 'struct pt_regs' */
+static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type)
+{
+	/* check bounds */
+	if (off < 0 || off >= sizeof(struct pt_regs))
+		return false;
+
+	/* only read is allowed */
+	if (type != BPF_READ)
+		return false;
+
+	/* disallow misaligned access */
+	if (off % size != 0)
+		return false;
+
+	return true;
+}
+
+static struct bpf_verifier_ops kprobe_prog_ops = {
+	.get_func_proto  = kprobe_prog_func_proto,
+	.is_valid_access = kprobe_prog_is_valid_access,
+};
+
+static struct bpf_prog_type_list kprobe_tl = {
+	.ops	= &kprobe_prog_ops,
+	.type	= BPF_PROG_TYPE_KPROBE,
+};
+
+static int __init register_kprobe_prog_ops(void)
+{
+	bpf_register_prog_type(&kprobe_tl);
+	return 0;
+}
+late_initcall(register_kprobe_prog_ops);
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 8fa549f6f528..dc3462507d7c 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1134,11 +1134,15 @@ static void
 kprobe_perf_func(struct trace_kprobe *tk, struct pt_regs *regs)
 {
 	struct ftrace_event_call *call = &tk->tp.call;
+	struct bpf_prog *prog = call->prog;
 	struct kprobe_trace_entry_head *entry;
 	struct hlist_head *head;
 	int size, __size, dsize;
 	int rctx;
 
+	if (prog && !trace_call_bpf(prog, regs))
+		return;
+
 	head = this_cpu_ptr(call->perf_events);
 	if (hlist_empty(head))
 		return;
@@ -1165,11 +1169,15 @@ kretprobe_perf_func(struct trace_kprobe *tk, struct kretprobe_instance *ri,
 		    struct pt_regs *regs)
 {
 	struct ftrace_event_call *call = &tk->tp.call;
+	struct bpf_prog *prog = call->prog;
 	struct kretprobe_trace_entry_head *entry;
 	struct hlist_head *head;
 	int size, __size, dsize;
 	int rctx;
 
+	if (prog && !trace_call_bpf(prog, regs))
+		return;
+
 	head = this_cpu_ptr(call->perf_events);
 	if (hlist_empty(head))
 		return;
-- 
cgit 


From d9847d310ab4003725e6ed1822682e24bd406908 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Wed, 25 Mar 2015 12:49:21 -0700
Subject: tracing: Allow BPF programs to call bpf_ktime_get_ns()

bpf_ktime_get_ns() is used by programs to compute time delta
between events or as a timestamp

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David S. Miller <davem@davemloft.net>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1427312966-8434-5-git-send-email-ast@plumgrid.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/uapi/linux/bpf.h |  1 +
 kernel/trace/bpf_trace.c | 14 ++++++++++++++
 2 files changed, 15 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b2948feeb70b..238c6883877b 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -165,6 +165,7 @@ enum bpf_func_id {
 	BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */
 	BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */
 	BPF_FUNC_probe_read,      /* int bpf_probe_read(void *dst, int size, void *src) */
+	BPF_FUNC_ktime_get_ns,    /* u64 bpf_ktime_get_ns(void) */
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index f1e87da91da3..8f5787294971 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -78,6 +78,18 @@ static const struct bpf_func_proto bpf_probe_read_proto = {
 	.arg3_type	= ARG_ANYTHING,
 };
 
+static u64 bpf_ktime_get_ns(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	/* NMI safe access to clock monotonic */
+	return ktime_get_mono_fast_ns();
+}
+
+static const struct bpf_func_proto bpf_ktime_get_ns_proto = {
+	.func		= bpf_ktime_get_ns,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+};
+
 static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id)
 {
 	switch (func_id) {
@@ -89,6 +101,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
 		return &bpf_map_delete_elem_proto;
 	case BPF_FUNC_probe_read:
 		return &bpf_probe_read_proto;
+	case BPF_FUNC_ktime_get_ns:
+		return &bpf_ktime_get_ns_proto;
 	default:
 		return NULL;
 	}
-- 
cgit 


From 9c959c863f8217a2ff3d7c296e8223654d240569 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Wed, 25 Mar 2015 12:49:22 -0700
Subject: tracing: Allow BPF programs to call bpf_trace_printk()

Debugging of BPF programs needs some form of printk from the
program, so let programs call limited trace_printk() with %d %u
%x %p modifiers only.

Similar to kernel modules, during program load verifier checks
whether program is calling bpf_trace_printk() and if so, kernel
allocates trace_printk buffers and emits big 'this is debug
only' banner.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Reviewed-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David S. Miller <davem@davemloft.net>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1427312966-8434-6-git-send-email-ast@plumgrid.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/uapi/linux/bpf.h |  1 +
 kernel/trace/bpf_trace.c | 78 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 238c6883877b..cc47ef41076a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -166,6 +166,7 @@ enum bpf_func_id {
 	BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */
 	BPF_FUNC_probe_read,      /* int bpf_probe_read(void *dst, int size, void *src) */
 	BPF_FUNC_ktime_get_ns,    /* u64 bpf_ktime_get_ns(void) */
+	BPF_FUNC_trace_printk,    /* int bpf_trace_printk(const char *fmt, int fmt_size, ...) */
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 8f5787294971..2d56ce501632 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -10,6 +10,7 @@
 #include <linux/bpf.h>
 #include <linux/filter.h>
 #include <linux/uaccess.h>
+#include <linux/ctype.h>
 #include "trace.h"
 
 static DEFINE_PER_CPU(int, bpf_prog_active);
@@ -90,6 +91,74 @@ static const struct bpf_func_proto bpf_ktime_get_ns_proto = {
 	.ret_type	= RET_INTEGER,
 };
 
+/*
+ * limited trace_printk()
+ * only %d %u %x %ld %lu %lx %lld %llu %llx %p conversion specifiers allowed
+ */
+static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5)
+{
+	char *fmt = (char *) (long) r1;
+	int mod[3] = {};
+	int fmt_cnt = 0;
+	int i;
+
+	/*
+	 * bpf_check()->check_func_arg()->check_stack_boundary()
+	 * guarantees that fmt points to bpf program stack,
+	 * fmt_size bytes of it were initialized and fmt_size > 0
+	 */
+	if (fmt[--fmt_size] != 0)
+		return -EINVAL;
+
+	/* check format string for allowed specifiers */
+	for (i = 0; i < fmt_size; i++) {
+		if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i]))
+			return -EINVAL;
+
+		if (fmt[i] != '%')
+			continue;
+
+		if (fmt_cnt >= 3)
+			return -EINVAL;
+
+		/* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */
+		i++;
+		if (fmt[i] == 'l') {
+			mod[fmt_cnt]++;
+			i++;
+		} else if (fmt[i] == 'p') {
+			mod[fmt_cnt]++;
+			i++;
+			if (!isspace(fmt[i]) && !ispunct(fmt[i]) && fmt[i] != 0)
+				return -EINVAL;
+			fmt_cnt++;
+			continue;
+		}
+
+		if (fmt[i] == 'l') {
+			mod[fmt_cnt]++;
+			i++;
+		}
+
+		if (fmt[i] != 'd' && fmt[i] != 'u' && fmt[i] != 'x')
+			return -EINVAL;
+		fmt_cnt++;
+	}
+
+	return __trace_printk(1/* fake ip will not be printed */, fmt,
+			      mod[0] == 2 ? r3 : mod[0] == 1 ? (long) r3 : (u32) r3,
+			      mod[1] == 2 ? r4 : mod[1] == 1 ? (long) r4 : (u32) r4,
+			      mod[2] == 2 ? r5 : mod[2] == 1 ? (long) r5 : (u32) r5);
+}
+
+static const struct bpf_func_proto bpf_trace_printk_proto = {
+	.func		= bpf_trace_printk,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_STACK,
+	.arg2_type	= ARG_CONST_STACK_SIZE,
+};
+
 static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func_id)
 {
 	switch (func_id) {
@@ -103,6 +172,15 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
 		return &bpf_probe_read_proto;
 	case BPF_FUNC_ktime_get_ns:
 		return &bpf_ktime_get_ns_proto;
+
+	case BPF_FUNC_trace_printk:
+		/*
+		 * this program might be calling bpf_trace_printk,
+		 * so allocate per-cpu printk buffers
+		 */
+		trace_printk_init_buffers();
+
+		return &bpf_trace_printk_proto;
 	default:
 		return NULL;
 	}
-- 
cgit 


From e8c6deac69629c0cb97c3d3272f8631ef17f8f0f Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Wed, 14 Jan 2015 14:18:10 +0200
Subject: perf: Add data_{offset,size} to user_page

Currently, the actual perf ring buffer is one page into the mmap area,
following the user page and the userspace follows this convention. This
patch adds data_{offset,size} fields to user_page that can be used by
userspace instead for locating perf data in the mmap area. This is also
helpful when mapping existing or shared buffers if their size is not
known in advance.

Right now, it is made to follow the existing convention that

	data_offset == PAGE_SIZE and
	data_offset + data_size == mmap_size.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kaixu Xia <kaixu.xia@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Robert Richter <rric@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: adrian.hunter@intel.com
Cc: kan.liang@intel.com
Cc: markus.t.metzger@intel.com
Cc: mathieu.poirier@linaro.org
Link: http://lkml.kernel.org/r/1421237903-181015-2-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/uapi/linux/perf_event.h | 5 +++++
 kernel/events/core.c            | 2 ++
 2 files changed, 7 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 91803e54ee73..86c44ae66d43 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -522,9 +522,14 @@ struct perf_event_mmap_page {
 	 * In this case the kernel will not over-write unread data.
 	 *
 	 * See perf_output_put_handle() for the data ordering.
+	 *
+	 * data_{offset,size} indicate the location and size of the perf record
+	 * buffer within the mmapped area.
 	 */
 	__u64   data_head;		/* head in the data section */
 	__u64	data_tail;		/* user-space written tail */
+	__u64	data_offset;		/* where the buffer starts */
+	__u64	data_size;		/* data buffer size */
 };
 
 #define PERF_RECORD_MISC_CPUMODE_MASK		(7 << 0)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 5c13862d3e85..6efa516f1ab8 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4105,6 +4105,8 @@ static void perf_event_init_userpage(struct perf_event *event)
 	/* Allow new userspace to detect that bit 0 is deprecated */
 	userpg->cap_bit0_is_deprecated = 1;
 	userpg->size = offsetof(struct perf_event_mmap_page, __reserved);
+	userpg->data_offset = PAGE_SIZE;
+	userpg->data_size = perf_data_size(rb);
 
 unlock:
 	rcu_read_unlock();
-- 
cgit 


From 45bfb2e50471abbbfd83d40d28c986078b0d24ff Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 14 Jan 2015 14:18:11 +0200
Subject: perf: Add AUX area to ring buffer for raw data streams

This patch introduces "AUX space" in the perf mmap buffer, intended for
exporting high bandwidth data streams to userspace, such as instruction
flow traces.

AUX space is a ring buffer, defined by aux_{offset,size} fields in the
user_page structure, and read/write pointers aux_{head,tail}, which abide
by the same rules as data_* counterparts of the main perf buffer.

In order to allocate/mmap AUX, userspace needs to set up aux_offset to
such an offset that will be greater than data_offset+data_size and
aux_size to be the desired buffer size. Both need to be page aligned.
Then, same aux_offset and aux_size should be passed to mmap() call and
if everything adds up, you should have an AUX buffer as a result.

Pages that are mapped into this buffer also come out of user's mlock
rlimit plus perf_event_mlock_kb allowance.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kaixu Xia <kaixu.xia@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Robert Richter <rric@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: adrian.hunter@intel.com
Cc: kan.liang@intel.com
Cc: markus.t.metzger@intel.com
Cc: mathieu.poirier@linaro.org
Link: http://lkml.kernel.org/r/1421237903-181015-3-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h      |  17 +++++
 include/uapi/linux/perf_event.h |  16 +++++
 kernel/events/core.c            | 141 +++++++++++++++++++++++++++++++++-------
 kernel/events/internal.h        |  23 +++++++
 kernel/events/ring_buffer.c     |  97 +++++++++++++++++++++++++--
 5 files changed, 265 insertions(+), 29 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 401554074de9..5a94f6d6fa91 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -284,6 +284,18 @@ struct pmu {
 	 * Return the count value for a counter.
 	 */
 	u64 (*count)			(struct perf_event *event); /*optional*/
+
+	/*
+	 * Set up pmu-private data structures for an AUX area
+	 */
+	void *(*setup_aux)		(int cpu, void **pages,
+					 int nr_pages, bool overwrite);
+					/* optional */
+
+	/*
+	 * Free pmu-private AUX data structures
+	 */
+	void (*free_aux)		(void *aux); /* optional */
 };
 
 /**
@@ -862,6 +874,11 @@ static inline bool needs_branch_stack(struct perf_event *event)
 	return event->attr.branch_sample_type != 0;
 }
 
+static inline bool has_aux(struct perf_event *event)
+{
+	return event->pmu->setup_aux;
+}
+
 extern int perf_output_begin(struct perf_output_handle *handle,
 			     struct perf_event *event, unsigned int size);
 extern void perf_output_end(struct perf_output_handle *handle);
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 86c44ae66d43..6c5013a71714 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -530,6 +530,22 @@ struct perf_event_mmap_page {
 	__u64	data_tail;		/* user-space written tail */
 	__u64	data_offset;		/* where the buffer starts */
 	__u64	data_size;		/* data buffer size */
+
+	/*
+	 * AUX area is defined by aux_{offset,size} fields that should be set
+	 * by the userspace, so that
+	 *
+	 *   aux_offset >= data_offset + data_size
+	 *
+	 * prior to mmap()ing it. Size of the mmap()ed area should be aux_size.
+	 *
+	 * Ring buffer pointers aux_{head,tail} have the same semantics as
+	 * data_{head,tail} and same ordering rules apply.
+	 */
+	__u64	aux_head;
+	__u64	aux_tail;
+	__u64	aux_offset;
+	__u64	aux_size;
 };
 
 #define PERF_RECORD_MISC_CPUMODE_MASK		(7 << 0)
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6efa516f1ab8..da51128c337a 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4306,6 +4306,9 @@ static void perf_mmap_open(struct vm_area_struct *vma)
 	atomic_inc(&event->mmap_count);
 	atomic_inc(&event->rb->mmap_count);
 
+	if (vma->vm_pgoff)
+		atomic_inc(&event->rb->aux_mmap_count);
+
 	if (event->pmu->event_mapped)
 		event->pmu->event_mapped(event);
 }
@@ -4330,6 +4333,20 @@ static void perf_mmap_close(struct vm_area_struct *vma)
 	if (event->pmu->event_unmapped)
 		event->pmu->event_unmapped(event);
 
+	/*
+	 * rb->aux_mmap_count will always drop before rb->mmap_count and
+	 * event->mmap_count, so it is ok to use event->mmap_mutex to
+	 * serialize with perf_mmap here.
+	 */
+	if (rb_has_aux(rb) && vma->vm_pgoff == rb->aux_pgoff &&
+	    atomic_dec_and_mutex_lock(&rb->aux_mmap_count, &event->mmap_mutex)) {
+		atomic_long_sub(rb->aux_nr_pages, &mmap_user->locked_vm);
+		vma->vm_mm->pinned_vm -= rb->aux_mmap_locked;
+
+		rb_free_aux(rb);
+		mutex_unlock(&event->mmap_mutex);
+	}
+
 	atomic_dec(&rb->mmap_count);
 
 	if (!atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex))
@@ -4403,7 +4420,7 @@ out_put:
 
 static const struct vm_operations_struct perf_mmap_vmops = {
 	.open		= perf_mmap_open,
-	.close		= perf_mmap_close,
+	.close		= perf_mmap_close, /* non mergable */
 	.fault		= perf_mmap_fault,
 	.page_mkwrite	= perf_mmap_fault,
 };
@@ -4414,10 +4431,10 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 	unsigned long user_locked, user_lock_limit;
 	struct user_struct *user = current_user();
 	unsigned long locked, lock_limit;
-	struct ring_buffer *rb;
+	struct ring_buffer *rb = NULL;
 	unsigned long vma_size;
 	unsigned long nr_pages;
-	long user_extra, extra;
+	long user_extra = 0, extra = 0;
 	int ret = 0, flags = 0;
 
 	/*
@@ -4432,7 +4449,66 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 		return -EINVAL;
 
 	vma_size = vma->vm_end - vma->vm_start;
-	nr_pages = (vma_size / PAGE_SIZE) - 1;
+
+	if (vma->vm_pgoff == 0) {
+		nr_pages = (vma_size / PAGE_SIZE) - 1;
+	} else {
+		/*
+		 * AUX area mapping: if rb->aux_nr_pages != 0, it's already
+		 * mapped, all subsequent mappings should have the same size
+		 * and offset. Must be above the normal perf buffer.
+		 */
+		u64 aux_offset, aux_size;
+
+		if (!event->rb)
+			return -EINVAL;
+
+		nr_pages = vma_size / PAGE_SIZE;
+
+		mutex_lock(&event->mmap_mutex);
+		ret = -EINVAL;
+
+		rb = event->rb;
+		if (!rb)
+			goto aux_unlock;
+
+		aux_offset = ACCESS_ONCE(rb->user_page->aux_offset);
+		aux_size = ACCESS_ONCE(rb->user_page->aux_size);
+
+		if (aux_offset < perf_data_size(rb) + PAGE_SIZE)
+			goto aux_unlock;
+
+		if (aux_offset != vma->vm_pgoff << PAGE_SHIFT)
+			goto aux_unlock;
+
+		/* already mapped with a different offset */
+		if (rb_has_aux(rb) && rb->aux_pgoff != vma->vm_pgoff)
+			goto aux_unlock;
+
+		if (aux_size != vma_size || aux_size != nr_pages * PAGE_SIZE)
+			goto aux_unlock;
+
+		/* already mapped with a different size */
+		if (rb_has_aux(rb) && rb->aux_nr_pages != nr_pages)
+			goto aux_unlock;
+
+		if (!is_power_of_2(nr_pages))
+			goto aux_unlock;
+
+		if (!atomic_inc_not_zero(&rb->mmap_count))
+			goto aux_unlock;
+
+		if (rb_has_aux(rb)) {
+			atomic_inc(&rb->aux_mmap_count);
+			ret = 0;
+			goto unlock;
+		}
+
+		atomic_set(&rb->aux_mmap_count, 1);
+		user_extra = nr_pages;
+
+		goto accounting;
+	}
 
 	/*
 	 * If we have rb pages ensure they're a power-of-two number, so we
@@ -4444,9 +4520,6 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
 	if (vma_size != PAGE_SIZE * (1 + nr_pages))
 		return -EINVAL;
 
-	if (vma->vm_pgoff != 0)
-		return -EINVAL;
-
 	WARN_ON_ONCE(event->ctx->parent_ctx);
 again:
 	mutex_lock(&event->mmap_mutex);
@@ -4470,6 +4543,8 @@ again:
 	}
 
 	user_extra = nr_pages + 1;
+
+accounting:
 	user_lock_limit = sysctl_perf_event_mlock >> (PAGE_SHIFT - 10);
 
 	/*
@@ -4479,7 +4554,6 @@ again:
 
 	user_locked = atomic_long_read(&user->locked_vm) + user_extra;
 
-	extra = 0;
 	if (user_locked > user_lock_limit)
 		extra = user_locked - user_lock_limit;
 
@@ -4493,35 +4567,45 @@ again:
 		goto unlock;
 	}
 
-	WARN_ON(event->rb);
+	WARN_ON(!rb && event->rb);
 
 	if (vma->vm_flags & VM_WRITE)
 		flags |= RING_BUFFER_WRITABLE;
 
-	rb = rb_alloc(nr_pages, 
-		event->attr.watermark ? event->attr.wakeup_watermark : 0,
-		event->cpu, flags);
-
 	if (!rb) {
-		ret = -ENOMEM;
-		goto unlock;
-	}
+		rb = rb_alloc(nr_pages,
+			      event->attr.watermark ? event->attr.wakeup_watermark : 0,
+			      event->cpu, flags);
 
-	atomic_set(&rb->mmap_count, 1);
-	rb->mmap_locked = extra;
-	rb->mmap_user = get_current_user();
+		if (!rb) {
+			ret = -ENOMEM;
+			goto unlock;
+		}
 
-	atomic_long_add(user_extra, &user->locked_vm);
-	vma->vm_mm->pinned_vm += extra;
+		atomic_set(&rb->mmap_count, 1);
+		rb->mmap_user = get_current_user();
+		rb->mmap_locked = extra;
 
-	ring_buffer_attach(event, rb);
+		ring_buffer_attach(event, rb);
 
-	perf_event_init_userpage(event);
-	perf_event_update_userpage(event);
+		perf_event_init_userpage(event);
+		perf_event_update_userpage(event);
+	} else {
+		ret = rb_alloc_aux(rb, event, vma->vm_pgoff, nr_pages, flags);
+		if (!ret)
+			rb->aux_mmap_locked = extra;
+	}
 
 unlock:
-	if (!ret)
+	if (!ret) {
+		atomic_long_add(user_extra, &user->locked_vm);
+		vma->vm_mm->pinned_vm += extra;
+
 		atomic_inc(&event->mmap_count);
+	} else if (rb) {
+		atomic_dec(&rb->mmap_count);
+	}
+aux_unlock:
 	mutex_unlock(&event->mmap_mutex);
 
 	/*
@@ -7506,6 +7590,13 @@ perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
 	if (output_event->clock != event->clock)
 		goto out;
 
+	/*
+	 * If both events generate aux data, they must be on the same PMU
+	 */
+	if (has_aux(event) && has_aux(output_event) &&
+	    event->pmu != output_event->pmu)
+		goto out;
+
 set:
 	mutex_lock(&event->mmap_mutex);
 	/* Can't redirect output if we've got an active mmap() */
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 569b218782ad..0f6d08015927 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -35,6 +35,16 @@ struct ring_buffer {
 	unsigned long			mmap_locked;
 	struct user_struct		*mmap_user;
 
+	/* AUX area */
+	unsigned long			aux_pgoff;
+	int				aux_nr_pages;
+	atomic_t			aux_mmap_count;
+	unsigned long			aux_mmap_locked;
+	void				(*free_aux)(void *);
+	atomic_t			aux_refcount;
+	void				**aux_pages;
+	void				*aux_priv;
+
 	struct perf_event_mmap_page	*user_page;
 	void				*data_pages[0];
 };
@@ -43,6 +53,14 @@ extern void rb_free(struct ring_buffer *rb);
 extern struct ring_buffer *
 rb_alloc(int nr_pages, long watermark, int cpu, int flags);
 extern void perf_event_wakeup(struct perf_event *event);
+extern int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
+			pgoff_t pgoff, int nr_pages, int flags);
+extern void rb_free_aux(struct ring_buffer *rb);
+
+static inline bool rb_has_aux(struct ring_buffer *rb)
+{
+	return !!rb->aux_nr_pages;
+}
 
 extern void
 perf_event_header__init_id(struct perf_event_header *header,
@@ -81,6 +99,11 @@ static inline unsigned long perf_data_size(struct ring_buffer *rb)
 	return rb->nr_pages << (PAGE_SHIFT + page_order(rb));
 }
 
+static inline unsigned long perf_aux_size(struct ring_buffer *rb)
+{
+	return rb->aux_nr_pages << PAGE_SHIFT;
+}
+
 #define DEFINE_OUTPUT_COPY(func_name, memcpy_func)			\
 static inline unsigned long						\
 func_name(struct perf_output_handle *handle,				\
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index eadb95ce7aac..3de9c4e9ea9f 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -243,14 +243,87 @@ ring_buffer_init(struct ring_buffer *rb, long watermark, int flags)
 	spin_lock_init(&rb->event_lock);
 }
 
+int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
+		 pgoff_t pgoff, int nr_pages, int flags)
+{
+	bool overwrite = !(flags & RING_BUFFER_WRITABLE);
+	int node = (event->cpu == -1) ? -1 : cpu_to_node(event->cpu);
+	int ret = -ENOMEM;
+
+	if (!has_aux(event))
+		return -ENOTSUPP;
+
+	rb->aux_pages = kzalloc_node(nr_pages * sizeof(void *), GFP_KERNEL, node);
+	if (!rb->aux_pages)
+		return -ENOMEM;
+
+	rb->free_aux = event->pmu->free_aux;
+	for (rb->aux_nr_pages = 0; rb->aux_nr_pages < nr_pages;
+	     rb->aux_nr_pages++) {
+		struct page *page;
+
+		page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
+		if (!page)
+			goto out;
+
+		rb->aux_pages[rb->aux_nr_pages] = page_address(page);
+	}
+
+	rb->aux_priv = event->pmu->setup_aux(event->cpu, rb->aux_pages, nr_pages,
+					     overwrite);
+	if (!rb->aux_priv)
+		goto out;
+
+	ret = 0;
+
+	/*
+	 * aux_pages (and pmu driver's private data, aux_priv) will be
+	 * referenced in both producer's and consumer's contexts, thus
+	 * we keep a refcount here to make sure either of the two can
+	 * reference them safely.
+	 */
+	atomic_set(&rb->aux_refcount, 1);
+
+out:
+	if (!ret)
+		rb->aux_pgoff = pgoff;
+	else
+		rb_free_aux(rb);
+
+	return ret;
+}
+
+static void __rb_free_aux(struct ring_buffer *rb)
+{
+	int pg;
+
+	if (rb->aux_priv) {
+		rb->free_aux(rb->aux_priv);
+		rb->free_aux = NULL;
+		rb->aux_priv = NULL;
+	}
+
+	for (pg = 0; pg < rb->aux_nr_pages; pg++)
+		free_page((unsigned long)rb->aux_pages[pg]);
+
+	kfree(rb->aux_pages);
+	rb->aux_nr_pages = 0;
+}
+
+void rb_free_aux(struct ring_buffer *rb)
+{
+	if (atomic_dec_and_test(&rb->aux_refcount))
+		__rb_free_aux(rb);
+}
+
 #ifndef CONFIG_PERF_USE_VMALLOC
 
 /*
  * Back perf_mmap() with regular GFP_KERNEL-0 pages.
  */
 
-struct page *
-perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
+static struct page *
+__perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
 {
 	if (pgoff > rb->nr_pages)
 		return NULL;
@@ -340,8 +413,8 @@ static int data_page_nr(struct ring_buffer *rb)
 	return rb->nr_pages << page_order(rb);
 }
 
-struct page *
-perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
+static struct page *
+__perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
 {
 	/* The '>' counts in the user page. */
 	if (pgoff > data_page_nr(rb))
@@ -416,3 +489,19 @@ fail:
 }
 
 #endif
+
+struct page *
+perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff)
+{
+	if (rb->aux_nr_pages) {
+		/* above AUX space */
+		if (pgoff > rb->aux_pgoff + rb->aux_nr_pages)
+			return NULL;
+
+		/* AUX space */
+		if (pgoff >= rb->aux_pgoff)
+			return virt_to_page(rb->aux_pages[pgoff - rb->aux_pgoff]);
+	}
+
+	return __perf_mmap_to_page(rb, pgoff);
+}
-- 
cgit 


From 68db7e98c3a6ebe7284b6cf14906ed7c55f3f7f0 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Wed, 14 Jan 2015 14:18:15 +0200
Subject: perf: Add AUX record

When there's new data in the AUX space, output a record indicating its
offset and size and a set of flags, such as PERF_AUX_FLAG_TRUNCATED, to
mean the described data was truncated to fit in the ring buffer.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kaixu Xia <kaixu.xia@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Robert Richter <rric@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: adrian.hunter@intel.com
Cc: kan.liang@intel.com
Cc: markus.t.metzger@intel.com
Cc: mathieu.poirier@linaro.org
Link: http://lkml.kernel.org/r/1421237903-181015-7-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/uapi/linux/perf_event.h | 19 +++++++++++++++++++
 kernel/events/core.c            | 34 ++++++++++++++++++++++++++++++++++
 kernel/events/internal.h        |  3 +++
 3 files changed, 56 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 6c5013a71714..8904ad3a850b 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -768,6 +768,20 @@ enum perf_event_type {
 	 */
 	PERF_RECORD_MMAP2			= 10,
 
+	/*
+	 * Records that new data landed in the AUX buffer part.
+	 *
+	 * struct {
+	 * 	struct perf_event_header	header;
+	 *
+	 * 	u64				aux_offset;
+	 * 	u64				aux_size;
+	 *	u64				flags;
+	 * 	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_AUX				= 11,
+
 	PERF_RECORD_MAX,			/* non-ABI */
 };
 
@@ -785,6 +799,11 @@ enum perf_callchain_context {
 	PERF_CONTEXT_MAX		= (__u64)-4095,
 };
 
+/**
+ * PERF_RECORD_AUX::flags bits
+ */
+#define PERF_AUX_FLAG_TRUNCATED		0x01	/* record was truncated to fit */
+
 #define PERF_FLAG_FD_NO_GROUP		(1UL << 0)
 #define PERF_FLAG_FD_OUTPUT		(1UL << 1)
 #define PERF_FLAG_PID_CGROUP		(1UL << 2) /* pid=cgroup id, per-cpu mode only */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 6d9fdaef7b57..dbc2eff32230 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5916,6 +5916,40 @@ void perf_event_mmap(struct vm_area_struct *vma)
 	perf_event_mmap_event(&mmap_event);
 }
 
+void perf_event_aux_event(struct perf_event *event, unsigned long head,
+			  unsigned long size, u64 flags)
+{
+	struct perf_output_handle handle;
+	struct perf_sample_data sample;
+	struct perf_aux_event {
+		struct perf_event_header	header;
+		u64				offset;
+		u64				size;
+		u64				flags;
+	} rec = {
+		.header = {
+			.type = PERF_RECORD_AUX,
+			.misc = 0,
+			.size = sizeof(rec),
+		},
+		.offset		= head,
+		.size		= size,
+		.flags		= flags,
+	};
+	int ret;
+
+	perf_event_header__init_id(&rec.header, &sample, event);
+	ret = perf_output_begin(&handle, event, rec.header.size);
+
+	if (ret)
+		return;
+
+	perf_output_put(&handle, rec);
+	perf_event__output_id_sample(event, &handle, &sample);
+
+	perf_output_end(&handle);
+}
+
 /*
  * IRQ throttle logging
  */
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 0f6d08015927..4d117a981431 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -62,6 +62,9 @@ static inline bool rb_has_aux(struct ring_buffer *rb)
 	return !!rb->aux_nr_pages;
 }
 
+void perf_event_aux_event(struct perf_event *event, unsigned long head,
+			  unsigned long size, u64 flags);
+
 extern void
 perf_event_header__init_id(struct perf_event_header *header,
 			   struct perf_sample_data *data,
-- 
cgit 


From 2023a0d2829e521fe6ad6b9907f3f90bfbf57142 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Wed, 14 Jan 2015 14:18:17 +0200
Subject: perf: Support overwrite mode for the AUX area

This adds support for overwrite mode in the AUX area, which means "keep
collecting data till you're stopped", turning AUX area into a circular
buffer, where new data overwrites old data. It does not depend on data
buffer's overwrite mode, so that it doesn't lose sideband data that is
instrumental for processing AUX data.

Overwrite mode is enabled at mapping AUX area read only. Even though
aux_tail in the buffer's user page might be user writable, it will be
ignored in this mode.

A PERF_RECORD_AUX with PERF_AUX_FLAG_OVERWRITE set is written to the perf
data stream every time an event writes new data to the AUX area. The pmu
driver might not be able to infer the exact beginning of the new data in
each snapshot, some drivers will only provide the tail, which is
aux_offset + aux_size in the AUX record. Consumer has to be able to tell
the new data from the old one, for example, by means of time stamps if
such are provided in the trace.

Consumer is also responsible for disabling any events that might write
to the AUX area (thus potentially racing with the consumer) before
collecting the data.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kaixu Xia <kaixu.xia@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Robert Richter <rric@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: adrian.hunter@intel.com
Cc: kan.liang@intel.com
Cc: markus.t.metzger@intel.com
Cc: mathieu.poirier@linaro.org
Link: http://lkml.kernel.org/r/1421237903-181015-9-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/uapi/linux/perf_event.h |  1 +
 kernel/events/internal.h        |  1 +
 kernel/events/ring_buffer.c     | 48 ++++++++++++++++++++++++++++-------------
 3 files changed, 35 insertions(+), 15 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 8904ad3a850b..29ef2f73bb4a 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -803,6 +803,7 @@ enum perf_callchain_context {
  * PERF_RECORD_AUX::flags bits
  */
 #define PERF_AUX_FLAG_TRUNCATED		0x01	/* record was truncated to fit */
+#define PERF_AUX_FLAG_OVERWRITE		0x02	/* snapshot from overwrite mode */
 
 #define PERF_FLAG_FD_NO_GROUP		(1UL << 0)
 #define PERF_FLAG_FD_OUTPUT		(1UL << 1)
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index b701ebc32570..ffd51d9f5945 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -40,6 +40,7 @@ struct ring_buffer {
 	local_t				aux_nest;
 	unsigned long			aux_pgoff;
 	int				aux_nr_pages;
+	int				aux_overwrite;
 	atomic_t			aux_mmap_count;
 	unsigned long			aux_mmap_locked;
 	void				(*free_aux)(void *);
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 0cc7b0f39058..67b328337a41 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -283,26 +283,33 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
 		goto err_put;
 
 	aux_head = local_read(&rb->aux_head);
-	aux_tail = ACCESS_ONCE(rb->user_page->aux_tail);
 
 	handle->rb = rb;
 	handle->event = event;
 	handle->head = aux_head;
-	if (aux_head - aux_tail < perf_aux_size(rb))
-		handle->size = CIRC_SPACE(aux_head, aux_tail, perf_aux_size(rb));
-	else
-		handle->size = 0;
+	handle->size = 0;
 
 	/*
-	 * handle->size computation depends on aux_tail load; this forms a
-	 * control dependency barrier separating aux_tail load from aux data
-	 * store that will be enabled on successful return
+	 * In overwrite mode, AUX data stores do not depend on aux_tail,
+	 * therefore (A) control dependency barrier does not exist. The
+	 * (B) <-> (C) ordering is still observed by the pmu driver.
 	 */
-	if (!handle->size) { /* A, matches D */
-		event->pending_disable = 1;
-		perf_output_wakeup(handle);
-		local_set(&rb->aux_nest, 0);
-		goto err_put;
+	if (!rb->aux_overwrite) {
+		aux_tail = ACCESS_ONCE(rb->user_page->aux_tail);
+		if (aux_head - aux_tail < perf_aux_size(rb))
+			handle->size = CIRC_SPACE(aux_head, aux_tail, perf_aux_size(rb));
+
+		/*
+		 * handle->size computation depends on aux_tail load; this forms a
+		 * control dependency barrier separating aux_tail load from aux data
+		 * store that will be enabled on successful return
+		 */
+		if (!handle->size) { /* A, matches D */
+			event->pending_disable = 1;
+			perf_output_wakeup(handle);
+			local_set(&rb->aux_nest, 0);
+			goto err_put;
+		}
 	}
 
 	return handle->rb->aux_priv;
@@ -327,13 +334,22 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
 			 bool truncated)
 {
 	struct ring_buffer *rb = handle->rb;
-	unsigned long aux_head = local_read(&rb->aux_head);
+	unsigned long aux_head;
 	u64 flags = 0;
 
 	if (truncated)
 		flags |= PERF_AUX_FLAG_TRUNCATED;
 
-	local_add(size, &rb->aux_head);
+	/* in overwrite mode, driver provides aux_head via handle */
+	if (rb->aux_overwrite) {
+		flags |= PERF_AUX_FLAG_OVERWRITE;
+
+		aux_head = handle->head;
+		local_set(&rb->aux_head, aux_head);
+	} else {
+		aux_head = local_read(&rb->aux_head);
+		local_add(size, &rb->aux_head);
+	}
 
 	if (size || flags) {
 		/*
@@ -480,6 +496,8 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
 	 */
 	atomic_set(&rb->aux_refcount, 1);
 
+	rb->aux_overwrite = overwrite;
+
 out:
 	if (!ret)
 		rb->aux_pgoff = pgoff;
-- 
cgit 


From 1a5941312414c71dece6717da9a0fa1303127afa Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Wed, 14 Jan 2015 14:18:18 +0200
Subject: perf: Add wakeup watermark control to the AUX area

When AUX area gets a certain amount of new data, we want to wake up
userspace to collect it. This adds a new control to specify how much
data will cause a wakeup. This is then passed down to pmu drivers via
output handle's "wakeup" field, so that the driver can find the nearest
point where it can generate an interrupt.

We repurpose __reserved_2 in the event attribute for this, even though
it was never checked to be zero before, aux_watermark will only matter
for new AUX-aware code, so the old code should still be fine.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kaixu Xia <kaixu.xia@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Robert Richter <rric@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: adrian.hunter@intel.com
Cc: kan.liang@intel.com
Cc: markus.t.metzger@intel.com
Cc: mathieu.poirier@linaro.org
Link: http://lkml.kernel.org/r/1421237903-181015-10-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/uapi/linux/perf_event.h |  7 +++++++
 kernel/events/core.c            |  3 ++-
 kernel/events/internal.h        |  4 +++-
 kernel/events/ring_buffer.c     | 22 +++++++++++++++++++---
 4 files changed, 31 insertions(+), 5 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 29ef2f73bb4a..84819546c8ce 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -261,6 +261,7 @@ enum perf_event_read_format {
 #define PERF_ATTR_SIZE_VER3	96	/* add: sample_regs_user */
 					/* add: sample_stack_user */
 #define PERF_ATTR_SIZE_VER4	104	/* add: sample_regs_intr */
+#define PERF_ATTR_SIZE_VER5	112	/* add: aux_watermark */
 
 /*
  * Hardware event_id to monitor via a performance monitoring event:
@@ -366,6 +367,12 @@ struct perf_event_attr {
 	 * See asm/perf_regs.h for details.
 	 */
 	__u64	sample_regs_intr;
+
+	/*
+	 * Wakeup watermark for AUX area
+	 */
+	__u32	aux_watermark;
+	__u32	__reserved_2;	/* align to __u64 */
 };
 
 #define perf_flags(attr)	(*(&(attr)->read_format + 1))
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 81e8d14ac59a..31f6b504ad62 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4677,7 +4677,8 @@ accounting:
 		perf_event_init_userpage(event);
 		perf_event_update_userpage(event);
 	} else {
-		ret = rb_alloc_aux(rb, event, vma->vm_pgoff, nr_pages, flags);
+		ret = rb_alloc_aux(rb, event, vma->vm_pgoff, nr_pages,
+				   event->attr.aux_watermark, flags);
 		if (!ret)
 			rb->aux_mmap_locked = extra;
 	}
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index ffd51d9f5945..9f6ce9ba4a04 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -27,6 +27,7 @@ struct ring_buffer {
 	local_t				lost;		/* nr records lost   */
 
 	long				watermark;	/* wakeup watermark  */
+	long				aux_watermark;
 	/* poll crap */
 	spinlock_t			event_lock;
 	struct list_head		event_list;
@@ -38,6 +39,7 @@ struct ring_buffer {
 	/* AUX area */
 	local_t				aux_head;
 	local_t				aux_nest;
+	local_t				aux_wakeup;
 	unsigned long			aux_pgoff;
 	int				aux_nr_pages;
 	int				aux_overwrite;
@@ -57,7 +59,7 @@ extern struct ring_buffer *
 rb_alloc(int nr_pages, long watermark, int cpu, int flags);
 extern void perf_event_wakeup(struct perf_event *event);
 extern int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
-			pgoff_t pgoff, int nr_pages, int flags);
+			pgoff_t pgoff, int nr_pages, long watermark, int flags);
 extern void rb_free_aux(struct ring_buffer *rb);
 extern struct ring_buffer *ring_buffer_get(struct perf_event *event);
 extern void ring_buffer_put(struct ring_buffer *rb);
diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c
index 67b328337a41..232f00f273cb 100644
--- a/kernel/events/ring_buffer.c
+++ b/kernel/events/ring_buffer.c
@@ -296,6 +296,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle,
 	 */
 	if (!rb->aux_overwrite) {
 		aux_tail = ACCESS_ONCE(rb->user_page->aux_tail);
+		handle->wakeup = local_read(&rb->aux_wakeup) + rb->aux_watermark;
 		if (aux_head - aux_tail < perf_aux_size(rb))
 			handle->size = CIRC_SPACE(aux_head, aux_tail, perf_aux_size(rb));
 
@@ -359,9 +360,12 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size,
 		perf_event_aux_event(handle->event, aux_head, size, flags);
 	}
 
-	rb->user_page->aux_head = local_read(&rb->aux_head);
+	aux_head = rb->user_page->aux_head = local_read(&rb->aux_head);
 
-	perf_output_wakeup(handle);
+	if (aux_head - local_read(&rb->aux_wakeup) >= rb->aux_watermark) {
+		perf_output_wakeup(handle);
+		local_add(rb->aux_watermark, &rb->aux_wakeup);
+	}
 	handle->event = NULL;
 
 	local_set(&rb->aux_nest, 0);
@@ -383,6 +387,14 @@ int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size)
 
 	local_add(size, &rb->aux_head);
 
+	aux_head = rb->user_page->aux_head = local_read(&rb->aux_head);
+	if (aux_head - local_read(&rb->aux_wakeup) >= rb->aux_watermark) {
+		perf_output_wakeup(handle);
+		local_add(rb->aux_watermark, &rb->aux_wakeup);
+		handle->wakeup = local_read(&rb->aux_wakeup) +
+				 rb->aux_watermark;
+	}
+
 	handle->head = aux_head;
 	handle->size -= size;
 
@@ -433,7 +445,7 @@ static void rb_free_aux_page(struct ring_buffer *rb, int idx)
 }
 
 int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
-		 pgoff_t pgoff, int nr_pages, int flags)
+		 pgoff_t pgoff, int nr_pages, long watermark, int flags)
 {
 	bool overwrite = !(flags & RING_BUFFER_WRITABLE);
 	int node = (event->cpu == -1) ? -1 : cpu_to_node(event->cpu);
@@ -497,6 +509,10 @@ int rb_alloc_aux(struct ring_buffer *rb, struct perf_event *event,
 	atomic_set(&rb->aux_refcount, 1);
 
 	rb->aux_overwrite = overwrite;
+	rb->aux_watermark = watermark;
+
+	if (!rb->aux_watermark && !rb->aux_overwrite)
+		rb->aux_watermark = nr_pages << (PAGE_SHIFT - 1);
 
 out:
 	if (!ret)
-- 
cgit 


From ec0d7729bbaed4b9d2d3fada693278e13a3d1368 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Wed, 14 Jan 2015 14:18:23 +0200
Subject: perf: Add ITRACE_START record to indicate that tracing has started

For counters that generate AUX data that is bound to the context of a
running task, such as instruction tracing, the decoder needs to know
exactly which task is running when the event is first scheduled in,
before the first sched_switch. The decoder's need to know this stems
from the fact that instruction flow trace decoding will almost always
require program's object code in order to reconstruct said flow and
for that we need at least its pid/tid in the perf stream.

To single out such instruction tracing pmus, this patch introduces
ITRACE PMU capability. The reason this is not part of RECORD_AUX
record is that not all pmus capable of generating AUX data need this,
and the opposite is *probably* also true.

While sched_switch covers for most cases, there are two problems with it:
the consumer will need to process events out of order (that is, having
found RECORD_AUX, it will have to skip forward to the nearest sched_switch
to figure out which task it was, then go back to the actual trace to
decode it) and it completely misses the case when the tracing is enabled
and disabled before sched_switch, for example, via PERF_EVENT_IOC_DISABLE.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Kaixu Xia <kaixu.xia@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Robert Richter <rric@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: adrian.hunter@intel.com
Cc: kan.liang@intel.com
Cc: markus.t.metzger@intel.com
Cc: mathieu.poirier@linaro.org
Link: http://lkml.kernel.org/r/1421237903-181015-15-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h      |  4 ++++
 include/uapi/linux/perf_event.h | 11 +++++++++++
 kernel/events/core.c            | 41 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 56 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 45c5873ad9b3..61992cf2e977 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -129,6 +129,9 @@ struct hw_perf_event {
 			struct list_head	cqm_groups_entry;
 			struct list_head	cqm_group_entry;
 		};
+		struct { /* itrace */
+			int			itrace_started;
+		};
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 		struct { /* breakpoint */
 			/*
@@ -177,6 +180,7 @@ struct perf_event;
 #define PERF_PMU_CAP_AUX_NO_SG			0x04
 #define PERF_PMU_CAP_AUX_SW_DOUBLEBUF		0x08
 #define PERF_PMU_CAP_EXCLUSIVE			0x10
+#define PERF_PMU_CAP_ITRACE			0x20
 
 /**
  * struct pmu - generic performance monitoring unit
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 84819546c8ce..309211b3eb67 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -789,6 +789,17 @@ enum perf_event_type {
 	 */
 	PERF_RECORD_AUX				= 11,
 
+	/*
+	 * Indicates that instruction trace has started
+	 *
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *	u32				pid;
+	 *	u32				tid;
+	 * };
+	 */
+	PERF_RECORD_ITRACE_START		= 12,
+
 	PERF_RECORD_MAX,			/* non-ABI */
 };
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 31f6b504ad62..06917d537302 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1831,6 +1831,7 @@ static void perf_set_shadow_time(struct perf_event *event,
 #define MAX_INTERRUPTS (~0ULL)
 
 static void perf_log_throttle(struct perf_event *event, int enable);
+static void perf_log_itrace_start(struct perf_event *event);
 
 static int
 event_sched_in(struct perf_event *event,
@@ -1869,6 +1870,8 @@ event_sched_in(struct perf_event *event,
 
 	perf_set_shadow_time(event, ctx, tstamp);
 
+	perf_log_itrace_start(event);
+
 	if (event->pmu->add(event, PERF_EF_START)) {
 		event->state = PERF_EVENT_STATE_INACTIVE;
 		event->oncpu = -1;
@@ -5991,6 +5994,44 @@ static void perf_log_throttle(struct perf_event *event, int enable)
 	perf_output_end(&handle);
 }
 
+static void perf_log_itrace_start(struct perf_event *event)
+{
+	struct perf_output_handle handle;
+	struct perf_sample_data sample;
+	struct perf_aux_event {
+		struct perf_event_header        header;
+		u32				pid;
+		u32				tid;
+	} rec;
+	int ret;
+
+	if (event->parent)
+		event = event->parent;
+
+	if (!(event->pmu->capabilities & PERF_PMU_CAP_ITRACE) ||
+	    event->hw.itrace_started)
+		return;
+
+	event->hw.itrace_started = 1;
+
+	rec.header.type	= PERF_RECORD_ITRACE_START;
+	rec.header.misc	= 0;
+	rec.header.size	= sizeof(rec);
+	rec.pid	= perf_event_pid(event, current);
+	rec.tid	= perf_event_tid(event, current);
+
+	perf_event_header__init_id(&rec.header, &sample, event);
+	ret = perf_output_begin(&handle, event, rec.header.size);
+
+	if (ret)
+		return;
+
+	perf_output_put(&handle, rec);
+	perf_event__output_id_sample(event, &handle, &sample);
+
+	perf_output_end(&handle);
+}
+
 /*
  * Generic event overflow handling, sampling.
  */
-- 
cgit 


From 64bf8049a2ec1e61e1475eb02b5514d7061d443e Mon Sep 17 00:00:00 2001
From: Masatake YAMATO <yamato@redhat.com>
Date: Wed, 4 Mar 2015 17:13:24 -0300
Subject: [media] am437x: include linux/videodev2.h for expanding
 BASE_VIDIOC_PRIVATE

In am437x-vpfe.h BASE_VIDIOC_PRIVATE is used for
making the name of ioctl command(VIDIOC_AM437X_CCDC_CFG).
The definition of BASE_VIDIOC_PRIVATE is in linux/videodev2.h.
However, linux/videodev2.h is not included in am437x-vpfe.h.
As the result an application using has to include both
am437x-vpfe.h and linux/videodev2.h.

With this patch, the application can include just am437x-vpfe.h.

Signed-off-by: Masatake YAMATO <yamato@redhat.com>
Acked-by: Lad, Prabhakar <prabhakar.csengg@gmail.com>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/am437x-vpfe.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/am437x-vpfe.h b/include/uapi/linux/am437x-vpfe.h
index 9b03033f9cd6..d75774317b9b 100644
--- a/include/uapi/linux/am437x-vpfe.h
+++ b/include/uapi/linux/am437x-vpfe.h
@@ -21,6 +21,8 @@
 #ifndef AM437X_VPFE_USER_H
 #define AM437X_VPFE_USER_H
 
+#include <linux/videodev2.h>
+
 enum vpfe_ccdc_data_size {
 	VPFE_CCDC_DATA_16BITS = 0,
 	VPFE_CCDC_DATA_15BITS,
-- 
cgit 


From aa05b979f14998fec16fa38f1c91eaa197e73148 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Sun, 8 Mar 2015 04:53:32 -0300
Subject: [media] videodev2.h: fix comment

The quantization comment in the header was incorrect w.r.t. BT.2020.
Fix this.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/videodev2.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index fbdc3602ee27..15b21e481246 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -268,9 +268,10 @@ enum v4l2_ycbcr_encoding {
 
 enum v4l2_quantization {
 	/*
-	 * The default for R'G'B' quantization is always full range. For
-	 * Y'CbCr the quantization is always limited range, except for
-	 * SYCC, XV601, XV709 or JPEG: those are full range.
+	 * The default for R'G'B' quantization is always full range, except
+	 * for the BT2020 colorspace. For Y'CbCr the quantization is always
+	 * limited range, except for COLORSPACE_JPEG, SYCC, XV601 or XV709:
+	 * those are full range.
 	 */
 	V4L2_QUANTIZATION_DEFAULT     = 0,
 	V4L2_QUANTIZATION_FULL_RANGE  = 1,
-- 
cgit 


From cc7d2dfb75b3ac0f248801ceed65f69465eb0389 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sun, 15 Mar 2015 14:30:25 -0300
Subject: [media] v4l2_plane_pix_format: use __u32 bytesperline instead of
 __u16

While running v4l2-compliance tests on vivid I suddenly got errors due to
a call to vmalloc_user with size 0 from vb2.

Digging deeper into the cause I discovered that this was due to the fact that
struct v4l2_plane_pix_format defines bytesperline as a __u16 instead of a __u32.

The test I was running selected a format of 4 * 4096 by 4 * 2048 with a 32
bit pixelformat.

So bytesperline was 4 * 4 * 4096 = 65536, which becomes 0 in a __u16. And
bytesperline * height is suddenly 0 as well. While the vivid driver may be
a virtual driver, it is to be expected that this limit will be hit for real
hardware as well in the near future: 8k deep-color video will already reach
it.

The solution is to change the type to __u32. The only drivers besides vivid
that use the multiplanar API are little-endian ARM and SH platforms (exynos,
ti-vpe, vsp1), so this is safe.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/v4l/pixfmt.xml  | 4 ++--
 drivers/media/platform/s5p-tv/mixer_video.c | 2 +-
 include/uapi/linux/videodev2.h              | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/v4l/pixfmt.xml b/Documentation/DocBook/media/v4l/pixfmt.xml
index 0a1528f3e891..fcde4e20205e 100644
--- a/Documentation/DocBook/media/v4l/pixfmt.xml
+++ b/Documentation/DocBook/media/v4l/pixfmt.xml
@@ -182,14 +182,14 @@ see <xref linkend="colorspaces" />.</entry>
           </entry>
         </row>
         <row>
-          <entry>__u16</entry>
+          <entry>__u32</entry>
           <entry><structfield>bytesperline</structfield></entry>
           <entry>Distance in bytes between the leftmost pixels in two adjacent
             lines. See &v4l2-pix-format;.</entry>
         </row>
         <row>
           <entry>__u16</entry>
-          <entry><structfield>reserved[7]</structfield></entry>
+          <entry><structfield>reserved[6]</structfield></entry>
           <entry>Reserved for future extensions. Should be zeroed by the
            application.</entry>
         </row>
diff --git a/drivers/media/platform/s5p-tv/mixer_video.c b/drivers/media/platform/s5p-tv/mixer_video.c
index 72d4f2e1efc0..751f3b618337 100644
--- a/drivers/media/platform/s5p-tv/mixer_video.c
+++ b/drivers/media/platform/s5p-tv/mixer_video.c
@@ -287,7 +287,7 @@ static void mxr_mplane_fill(struct v4l2_plane_pix_format *planes,
 		u32 bl_width = divup(width, blk->width);
 		u32 bl_height = divup(height, blk->height);
 		u32 sizeimage = bl_width * bl_height * blk->size;
-		u16 bytesperline = bl_width * blk->size / blk->height;
+		u32 bytesperline = bl_width * blk->size / blk->height;
 
 		plane->sizeimage += sizeimage;
 		plane->bytesperline = max(plane->bytesperline, bytesperline);
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 15b21e481246..47df18f36c4b 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -1842,8 +1842,8 @@ struct v4l2_mpeg_vbi_fmt_ivtv {
  */
 struct v4l2_plane_pix_format {
 	__u32		sizeimage;
-	__u16		bytesperline;
-	__u16		reserved[7];
+	__u32		bytesperline;
+	__u16		reserved[6];
 } __attribute__ ((packed));
 
 /**
-- 
cgit 


From 7b0fd4568bee379474eb0d989ef1125064f19fa7 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Wed, 15 May 2013 11:34:26 -0300
Subject: [media] v4l: Add RBG and RGB 8:8:8 media bus formats on 24 and 32 bit
 busses

Add support and documentation for two media bus formats:
MEDIA_BUS_FMT_RBG888_1X24 and MEDIA_BUS_FMT_RGB888_1X32_PADHI

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/v4l/subdev-formats.xml | 67 ++++++++++++++++++++++
 include/uapi/linux/media-bus-format.h              |  4 +-
 2 files changed, 70 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/v4l/subdev-formats.xml b/Documentation/DocBook/media/v4l/subdev-formats.xml
index c5ea868e3909..d253e8f85f4d 100644
--- a/Documentation/DocBook/media/v4l/subdev-formats.xml
+++ b/Documentation/DocBook/media/v4l/subdev-formats.xml
@@ -440,6 +440,36 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>b<subscript>1</subscript></entry>
 	      <entry>b<subscript>0</subscript></entry>
 	    </row>
+	    <row id="MEDIA-BUS-FMT-RBG888-1X24">
+	      <entry>MEDIA_BUS_FMT_RBG888_1X24</entry>
+	      <entry>0x100e</entry>
+	      <entry></entry>
+	      &dash-ent-8;
+	      <entry>r<subscript>7</subscript></entry>
+	      <entry>r<subscript>6</subscript></entry>
+	      <entry>r<subscript>5</subscript></entry>
+	      <entry>r<subscript>4</subscript></entry>
+	      <entry>r<subscript>3</subscript></entry>
+	      <entry>r<subscript>2</subscript></entry>
+	      <entry>r<subscript>1</subscript></entry>
+	      <entry>r<subscript>0</subscript></entry>
+	      <entry>b<subscript>7</subscript></entry>
+	      <entry>b<subscript>6</subscript></entry>
+	      <entry>b<subscript>5</subscript></entry>
+	      <entry>b<subscript>4</subscript></entry>
+	      <entry>b<subscript>3</subscript></entry>
+	      <entry>b<subscript>2</subscript></entry>
+	      <entry>b<subscript>1</subscript></entry>
+	      <entry>b<subscript>0</subscript></entry>
+	      <entry>g<subscript>7</subscript></entry>
+	      <entry>g<subscript>6</subscript></entry>
+	      <entry>g<subscript>5</subscript></entry>
+	      <entry>g<subscript>4</subscript></entry>
+	      <entry>g<subscript>3</subscript></entry>
+	      <entry>g<subscript>2</subscript></entry>
+	      <entry>g<subscript>1</subscript></entry>
+	      <entry>g<subscript>0</subscript></entry>
+	    </row>
 	    <row id="MEDIA-BUS-FMT-RGB888-1X24">
 	      <entry>MEDIA_BUS_FMT_RGB888_1X24</entry>
 	      <entry>0x100a</entry>
@@ -579,6 +609,43 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>b<subscript>1</subscript></entry>
 	      <entry>b<subscript>0</subscript></entry>
 	    </row>
+	    <row id="MEDIA-BUS-FMT-RGB888-1X32-PADHI">
+	      <entry>MEDIA_BUS_FMT_RGB888_1X32_PADHI</entry>
+	      <entry>0x100f</entry>
+	      <entry></entry>
+	      <entry>0</entry>
+	      <entry>0</entry>
+	      <entry>0</entry>
+	      <entry>0</entry>
+	      <entry>0</entry>
+	      <entry>0</entry>
+	      <entry>0</entry>
+	      <entry>0</entry>
+	      <entry>r<subscript>7</subscript></entry>
+	      <entry>r<subscript>6</subscript></entry>
+	      <entry>r<subscript>5</subscript></entry>
+	      <entry>r<subscript>4</subscript></entry>
+	      <entry>r<subscript>3</subscript></entry>
+	      <entry>r<subscript>2</subscript></entry>
+	      <entry>r<subscript>1</subscript></entry>
+	      <entry>r<subscript>0</subscript></entry>
+	      <entry>g<subscript>7</subscript></entry>
+	      <entry>g<subscript>6</subscript></entry>
+	      <entry>g<subscript>5</subscript></entry>
+	      <entry>g<subscript>4</subscript></entry>
+	      <entry>g<subscript>3</subscript></entry>
+	      <entry>g<subscript>2</subscript></entry>
+	      <entry>g<subscript>1</subscript></entry>
+	      <entry>g<subscript>0</subscript></entry>
+	      <entry>b<subscript>7</subscript></entry>
+	      <entry>b<subscript>6</subscript></entry>
+	      <entry>b<subscript>5</subscript></entry>
+	      <entry>b<subscript>4</subscript></entry>
+	      <entry>b<subscript>3</subscript></entry>
+	      <entry>b<subscript>2</subscript></entry>
+	      <entry>b<subscript>1</subscript></entry>
+	      <entry>b<subscript>0</subscript></entry>
+	    </row>
 	  </tbody>
 	</tgroup>
       </table>
diff --git a/include/uapi/linux/media-bus-format.h b/include/uapi/linux/media-bus-format.h
index 23b40908be30..b585bb32d25e 100644
--- a/include/uapi/linux/media-bus-format.h
+++ b/include/uapi/linux/media-bus-format.h
@@ -33,7 +33,7 @@
 
 #define MEDIA_BUS_FMT_FIXED			0x0001
 
-/* RGB - next is	0x100e */
+/* RGB - next is	0x1010 */
 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_BE	0x1001
 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_LE	0x1002
 #define MEDIA_BUS_FMT_RGB555_2X8_PADHI_BE	0x1003
@@ -43,10 +43,12 @@
 #define MEDIA_BUS_FMT_RGB565_2X8_BE		0x1007
 #define MEDIA_BUS_FMT_RGB565_2X8_LE		0x1008
 #define MEDIA_BUS_FMT_RGB666_1X18		0x1009
+#define MEDIA_BUS_FMT_RBG888_1X24		0x100e
 #define MEDIA_BUS_FMT_RGB888_1X24		0x100a
 #define MEDIA_BUS_FMT_RGB888_2X12_BE		0x100b
 #define MEDIA_BUS_FMT_RGB888_2X12_LE		0x100c
 #define MEDIA_BUS_FMT_ARGB8888_1X32		0x100d
+#define MEDIA_BUS_FMT_RGB888_1X32_PADHI		0x100f
 
 /* YUV (including grey) - next is	0x2024 */
 #define MEDIA_BUS_FMT_Y8_1X8			0x2001
-- 
cgit 


From e8b2d7a565ae4fc8627ffe35b953062ff6119533 Mon Sep 17 00:00:00 2001
From: Hyun Kwon <hyun.kwon@xilinx.com>
Date: Tue, 18 Mar 2014 13:18:14 -0300
Subject: [media] v4l: Sort YUV formats of v4l2_mbus_pixelcode

Keep the formats sorted by type, bus_width, bits per component, samples
per pixel and order of subsamples, in that order.

Signed-off-by: Hyun Kwon <hyun.kwon@xilinx.com>
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/v4l/subdev-formats.xml | 600 ++++++++++-----------
 include/uapi/linux/media-bus-format.h              |  12 +-
 2 files changed, 306 insertions(+), 306 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/v4l/subdev-formats.xml b/Documentation/DocBook/media/v4l/subdev-formats.xml
index d253e8f85f4d..9bfd468cd524 100644
--- a/Documentation/DocBook/media/v4l/subdev-formats.xml
+++ b/Documentation/DocBook/media/v4l/subdev-formats.xml
@@ -2255,11 +2255,15 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>y<subscript>1</subscript></entry>
 	      <entry>y<subscript>0</subscript></entry>
 	    </row>
-	    <row id="MEDIA-BUS-FMT-UYVY8-1X16">
-	      <entry>MEDIA_BUS_FMT_UYVY8_1X16</entry>
-	      <entry>0x200f</entry>
+	    <row id="MEDIA-BUS-FMT-UYVY12-2X12">
+	      <entry>MEDIA_BUS_FMT_UYVY12_2X12</entry>
+	      <entry>0x201c</entry>
 	      <entry></entry>
-	      &dash-ent-16;
+	      &dash-ent-20;
+	      <entry>u<subscript>11</subscript></entry>
+	      <entry>u<subscript>10</subscript></entry>
+	      <entry>u<subscript>9</subscript></entry>
+	      <entry>u<subscript>8</subscript></entry>
 	      <entry>u<subscript>7</subscript></entry>
 	      <entry>u<subscript>6</subscript></entry>
 	      <entry>u<subscript>5</subscript></entry>
@@ -2268,6 +2272,16 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>u<subscript>2</subscript></entry>
 	      <entry>u<subscript>1</subscript></entry>
 	      <entry>u<subscript>0</subscript></entry>
+	    </row>
+	    <row>
+	      <entry></entry>
+	      <entry></entry>
+	      <entry></entry>
+	      &dash-ent-20;
+	      <entry>y<subscript>11</subscript></entry>
+	      <entry>y<subscript>10</subscript></entry>
+	      <entry>y<subscript>9</subscript></entry>
+	      <entry>y<subscript>8</subscript></entry>
 	      <entry>y<subscript>7</subscript></entry>
 	      <entry>y<subscript>6</subscript></entry>
 	      <entry>y<subscript>5</subscript></entry>
@@ -2281,7 +2295,11 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry></entry>
 	      <entry></entry>
 	      <entry></entry>
-	      &dash-ent-16;
+	      &dash-ent-20;
+	      <entry>v<subscript>11</subscript></entry>
+	      <entry>v<subscript>10</subscript></entry>
+	      <entry>v<subscript>9</subscript></entry>
+	      <entry>v<subscript>8</subscript></entry>
 	      <entry>v<subscript>7</subscript></entry>
 	      <entry>v<subscript>6</subscript></entry>
 	      <entry>v<subscript>5</subscript></entry>
@@ -2290,6 +2308,16 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>v<subscript>2</subscript></entry>
 	      <entry>v<subscript>1</subscript></entry>
 	      <entry>v<subscript>0</subscript></entry>
+	    </row>
+	    <row>
+	      <entry></entry>
+	      <entry></entry>
+	      <entry></entry>
+	      &dash-ent-20;
+	      <entry>y<subscript>11</subscript></entry>
+	      <entry>y<subscript>10</subscript></entry>
+	      <entry>y<subscript>9</subscript></entry>
+	      <entry>y<subscript>8</subscript></entry>
 	      <entry>y<subscript>7</subscript></entry>
 	      <entry>y<subscript>6</subscript></entry>
 	      <entry>y<subscript>5</subscript></entry>
@@ -2299,11 +2327,15 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>y<subscript>1</subscript></entry>
 	      <entry>y<subscript>0</subscript></entry>
 	    </row>
-	    <row id="MEDIA-BUS-FMT-VYUY8-1X16">
-	      <entry>MEDIA_BUS_FMT_VYUY8_1X16</entry>
-	      <entry>0x2010</entry>
+	    <row id="MEDIA-BUS-FMT-VYUY12-2X12">
+	      <entry>MEDIA_BUS_FMT_VYUY12_2X12</entry>
+	      <entry>0x201d</entry>
 	      <entry></entry>
-	      &dash-ent-16;
+	      &dash-ent-20;
+	      <entry>v<subscript>11</subscript></entry>
+	      <entry>v<subscript>10</subscript></entry>
+	      <entry>v<subscript>9</subscript></entry>
+	      <entry>v<subscript>8</subscript></entry>
 	      <entry>v<subscript>7</subscript></entry>
 	      <entry>v<subscript>6</subscript></entry>
 	      <entry>v<subscript>5</subscript></entry>
@@ -2312,6 +2344,16 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>v<subscript>2</subscript></entry>
 	      <entry>v<subscript>1</subscript></entry>
 	      <entry>v<subscript>0</subscript></entry>
+	    </row>
+	    <row>
+	      <entry></entry>
+	      <entry></entry>
+	      <entry></entry>
+	      &dash-ent-20;
+	      <entry>y<subscript>11</subscript></entry>
+	      <entry>y<subscript>10</subscript></entry>
+	      <entry>y<subscript>9</subscript></entry>
+	      <entry>y<subscript>8</subscript></entry>
 	      <entry>y<subscript>7</subscript></entry>
 	      <entry>y<subscript>6</subscript></entry>
 	      <entry>y<subscript>5</subscript></entry>
@@ -2325,7 +2367,11 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry></entry>
 	      <entry></entry>
 	      <entry></entry>
-	      &dash-ent-16;
+	      &dash-ent-20;
+	      <entry>u<subscript>11</subscript></entry>
+	      <entry>u<subscript>10</subscript></entry>
+	      <entry>u<subscript>9</subscript></entry>
+	      <entry>u<subscript>8</subscript></entry>
 	      <entry>u<subscript>7</subscript></entry>
 	      <entry>u<subscript>6</subscript></entry>
 	      <entry>u<subscript>5</subscript></entry>
@@ -2334,6 +2380,16 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>u<subscript>2</subscript></entry>
 	      <entry>u<subscript>1</subscript></entry>
 	      <entry>u<subscript>0</subscript></entry>
+	    </row>
+	    <row>
+	      <entry></entry>
+	      <entry></entry>
+	      <entry></entry>
+	      &dash-ent-20;
+	      <entry>y<subscript>11</subscript></entry>
+	      <entry>y<subscript>10</subscript></entry>
+	      <entry>y<subscript>9</subscript></entry>
+	      <entry>y<subscript>8</subscript></entry>
 	      <entry>y<subscript>7</subscript></entry>
 	      <entry>y<subscript>6</subscript></entry>
 	      <entry>y<subscript>5</subscript></entry>
@@ -2343,11 +2399,15 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>y<subscript>1</subscript></entry>
 	      <entry>y<subscript>0</subscript></entry>
 	    </row>
-	    <row id="MEDIA-BUS-FMT-YUYV8-1X16">
-	      <entry>MEDIA_BUS_FMT_YUYV8_1X16</entry>
-	      <entry>0x2011</entry>
+	    <row id="MEDIA-BUS-FMT-YUYV12-2X12">
+	      <entry>MEDIA_BUS_FMT_YUYV12_2X12</entry>
+	      <entry>0x201e</entry>
 	      <entry></entry>
-	      &dash-ent-16;
+	      &dash-ent-20;
+	      <entry>y<subscript>11</subscript></entry>
+	      <entry>y<subscript>10</subscript></entry>
+	      <entry>y<subscript>9</subscript></entry>
+	      <entry>y<subscript>8</subscript></entry>
 	      <entry>y<subscript>7</subscript></entry>
 	      <entry>y<subscript>6</subscript></entry>
 	      <entry>y<subscript>5</subscript></entry>
@@ -2356,6 +2416,16 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>y<subscript>2</subscript></entry>
 	      <entry>y<subscript>1</subscript></entry>
 	      <entry>y<subscript>0</subscript></entry>
+	    </row>
+	    <row>
+	      <entry></entry>
+	      <entry></entry>
+	      <entry></entry>
+	      &dash-ent-20;
+	      <entry>u<subscript>11</subscript></entry>
+	      <entry>u<subscript>10</subscript></entry>
+	      <entry>u<subscript>9</subscript></entry>
+	      <entry>u<subscript>8</subscript></entry>
 	      <entry>u<subscript>7</subscript></entry>
 	      <entry>u<subscript>6</subscript></entry>
 	      <entry>u<subscript>5</subscript></entry>
@@ -2369,7 +2439,11 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry></entry>
 	      <entry></entry>
 	      <entry></entry>
-	      &dash-ent-16;
+	      &dash-ent-20;
+	      <entry>y<subscript>11</subscript></entry>
+	      <entry>y<subscript>10</subscript></entry>
+	      <entry>y<subscript>9</subscript></entry>
+	      <entry>y<subscript>8</subscript></entry>
 	      <entry>y<subscript>7</subscript></entry>
 	      <entry>y<subscript>6</subscript></entry>
 	      <entry>y<subscript>5</subscript></entry>
@@ -2378,6 +2452,16 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>y<subscript>2</subscript></entry>
 	      <entry>y<subscript>1</subscript></entry>
 	      <entry>y<subscript>0</subscript></entry>
+	    </row>
+	    <row>
+	      <entry></entry>
+	      <entry></entry>
+	      <entry></entry>
+	      &dash-ent-20;
+	      <entry>v<subscript>11</subscript></entry>
+	      <entry>v<subscript>10</subscript></entry>
+	      <entry>v<subscript>9</subscript></entry>
+	      <entry>v<subscript>8</subscript></entry>
 	      <entry>v<subscript>7</subscript></entry>
 	      <entry>v<subscript>6</subscript></entry>
 	      <entry>v<subscript>5</subscript></entry>
@@ -2387,11 +2471,15 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>v<subscript>1</subscript></entry>
 	      <entry>v<subscript>0</subscript></entry>
 	    </row>
-	    <row id="MEDIA-BUS-FMT-YVYU8-1X16">
-	      <entry>MEDIA_BUS_FMT_YVYU8_1X16</entry>
-	      <entry>0x2012</entry>
+	    <row id="MEDIA-BUS-FMT-YVYU12-2X12">
+	      <entry>MEDIA_BUS_FMT_YVYU12_2X12</entry>
+	      <entry>0x201f</entry>
 	      <entry></entry>
-	      &dash-ent-16;
+	      &dash-ent-20;
+	      <entry>y<subscript>11</subscript></entry>
+	      <entry>y<subscript>10</subscript></entry>
+	      <entry>y<subscript>9</subscript></entry>
+	      <entry>y<subscript>8</subscript></entry>
 	      <entry>y<subscript>7</subscript></entry>
 	      <entry>y<subscript>6</subscript></entry>
 	      <entry>y<subscript>5</subscript></entry>
@@ -2400,6 +2488,16 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>y<subscript>2</subscript></entry>
 	      <entry>y<subscript>1</subscript></entry>
 	      <entry>y<subscript>0</subscript></entry>
+	    </row>
+	    <row>
+	      <entry></entry>
+	      <entry></entry>
+	      <entry></entry>
+	      &dash-ent-20;
+	      <entry>v<subscript>11</subscript></entry>
+	      <entry>v<subscript>10</subscript></entry>
+	      <entry>v<subscript>9</subscript></entry>
+	      <entry>v<subscript>8</subscript></entry>
 	      <entry>v<subscript>7</subscript></entry>
 	      <entry>v<subscript>6</subscript></entry>
 	      <entry>v<subscript>5</subscript></entry>
@@ -2413,29 +2511,11 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry></entry>
 	      <entry></entry>
 	      <entry></entry>
-	      &dash-ent-16;
-	      <entry>y<subscript>7</subscript></entry>
-	      <entry>y<subscript>6</subscript></entry>
-	      <entry>y<subscript>5</subscript></entry>
-	      <entry>y<subscript>4</subscript></entry>
-	      <entry>y<subscript>3</subscript></entry>
-	      <entry>y<subscript>2</subscript></entry>
-	      <entry>y<subscript>1</subscript></entry>
-	      <entry>y<subscript>0</subscript></entry>
-	      <entry>u<subscript>7</subscript></entry>
-	      <entry>u<subscript>6</subscript></entry>
-	      <entry>u<subscript>5</subscript></entry>
-	      <entry>u<subscript>4</subscript></entry>
-	      <entry>u<subscript>3</subscript></entry>
-	      <entry>u<subscript>2</subscript></entry>
-	      <entry>u<subscript>1</subscript></entry>
-	      <entry>u<subscript>0</subscript></entry>
-	    </row>
-	    <row id="MEDIA-BUS-FMT-YDYUYDYV8-1X16">
-	      <entry>MEDIA_BUS_FMT_YDYUYDYV8_1X16</entry>
-	      <entry>0x2014</entry>
-	      <entry></entry>
-	      &dash-ent-16;
+	      &dash-ent-20;
+	      <entry>y<subscript>11</subscript></entry>
+	      <entry>y<subscript>10</subscript></entry>
+	      <entry>y<subscript>9</subscript></entry>
+	      <entry>y<subscript>8</subscript></entry>
 	      <entry>y<subscript>7</subscript></entry>
 	      <entry>y<subscript>6</subscript></entry>
 	      <entry>y<subscript>5</subscript></entry>
@@ -2444,28 +2524,16 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>y<subscript>2</subscript></entry>
 	      <entry>y<subscript>1</subscript></entry>
 	      <entry>y<subscript>0</subscript></entry>
-	      <entry>d</entry>
-	      <entry>d</entry>
-	      <entry>d</entry>
-	      <entry>d</entry>
-	      <entry>d</entry>
-	      <entry>d</entry>
-	      <entry>d</entry>
-	      <entry>d</entry>
 	    </row>
 	    <row>
 	      <entry></entry>
 	      <entry></entry>
 	      <entry></entry>
-	      &dash-ent-16;
-	      <entry>y<subscript>7</subscript></entry>
-	      <entry>y<subscript>6</subscript></entry>
-	      <entry>y<subscript>5</subscript></entry>
-	      <entry>y<subscript>4</subscript></entry>
-	      <entry>y<subscript>3</subscript></entry>
-	      <entry>y<subscript>2</subscript></entry>
-	      <entry>y<subscript>1</subscript></entry>
-	      <entry>y<subscript>0</subscript></entry>
+	      &dash-ent-20;
+	      <entry>u<subscript>11</subscript></entry>
+	      <entry>u<subscript>10</subscript></entry>
+	      <entry>u<subscript>9</subscript></entry>
+	      <entry>u<subscript>8</subscript></entry>
 	      <entry>u<subscript>7</subscript></entry>
 	      <entry>u<subscript>6</subscript></entry>
 	      <entry>u<subscript>5</subscript></entry>
@@ -2475,57 +2543,11 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>u<subscript>1</subscript></entry>
 	      <entry>u<subscript>0</subscript></entry>
 	    </row>
-	    <row>
-	      <entry></entry>
-	      <entry></entry>
-	      <entry></entry>
-	      &dash-ent-16;
-	      <entry>y<subscript>7</subscript></entry>
-	      <entry>y<subscript>6</subscript></entry>
-	      <entry>y<subscript>5</subscript></entry>
-	      <entry>y<subscript>4</subscript></entry>
-	      <entry>y<subscript>3</subscript></entry>
-	      <entry>y<subscript>2</subscript></entry>
-	      <entry>y<subscript>1</subscript></entry>
-	      <entry>y<subscript>0</subscript></entry>
-	      <entry>d</entry>
-	      <entry>d</entry>
-	      <entry>d</entry>
-	      <entry>d</entry>
-	      <entry>d</entry>
-	      <entry>d</entry>
-	      <entry>d</entry>
-	      <entry>d</entry>
-	    </row>
-	    <row>
-	      <entry></entry>
-	      <entry></entry>
+	    <row id="MEDIA-BUS-FMT-UYVY8-1X16">
+	      <entry>MEDIA_BUS_FMT_UYVY8_1X16</entry>
+	      <entry>0x200f</entry>
 	      <entry></entry>
 	      &dash-ent-16;
-	      <entry>y<subscript>7</subscript></entry>
-	      <entry>y<subscript>6</subscript></entry>
-	      <entry>y<subscript>5</subscript></entry>
-	      <entry>y<subscript>4</subscript></entry>
-	      <entry>y<subscript>3</subscript></entry>
-	      <entry>y<subscript>2</subscript></entry>
-	      <entry>y<subscript>1</subscript></entry>
-	      <entry>y<subscript>0</subscript></entry>
-	      <entry>v<subscript>7</subscript></entry>
-	      <entry>v<subscript>6</subscript></entry>
-	      <entry>v<subscript>5</subscript></entry>
-	      <entry>v<subscript>4</subscript></entry>
-	      <entry>v<subscript>3</subscript></entry>
-	      <entry>v<subscript>2</subscript></entry>
-	      <entry>v<subscript>1</subscript></entry>
-	      <entry>v<subscript>0</subscript></entry>
-	    </row>
-	    <row id="MEDIA-BUS-FMT-UYVY10-1X20">
-	      <entry>MEDIA_BUS_FMT_UYVY10_1X20</entry>
-	      <entry>0x201a</entry>
-	      <entry></entry>
-	      &dash-ent-12;
-	      <entry>u<subscript>9</subscript></entry>
-	      <entry>u<subscript>8</subscript></entry>
 	      <entry>u<subscript>7</subscript></entry>
 	      <entry>u<subscript>6</subscript></entry>
 	      <entry>u<subscript>5</subscript></entry>
@@ -2534,8 +2556,6 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>u<subscript>2</subscript></entry>
 	      <entry>u<subscript>1</subscript></entry>
 	      <entry>u<subscript>0</subscript></entry>
-	      <entry>y<subscript>9</subscript></entry>
-	      <entry>y<subscript>8</subscript></entry>
 	      <entry>y<subscript>7</subscript></entry>
 	      <entry>y<subscript>6</subscript></entry>
 	      <entry>y<subscript>5</subscript></entry>
@@ -2549,9 +2569,7 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry></entry>
 	      <entry></entry>
 	      <entry></entry>
-	      &dash-ent-12;
-	      <entry>v<subscript>9</subscript></entry>
-	      <entry>v<subscript>8</subscript></entry>
+	      &dash-ent-16;
 	      <entry>v<subscript>7</subscript></entry>
 	      <entry>v<subscript>6</subscript></entry>
 	      <entry>v<subscript>5</subscript></entry>
@@ -2560,8 +2578,6 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>v<subscript>2</subscript></entry>
 	      <entry>v<subscript>1</subscript></entry>
 	      <entry>v<subscript>0</subscript></entry>
-	      <entry>y<subscript>9</subscript></entry>
-	      <entry>y<subscript>8</subscript></entry>
 	      <entry>y<subscript>7</subscript></entry>
 	      <entry>y<subscript>6</subscript></entry>
 	      <entry>y<subscript>5</subscript></entry>
@@ -2571,13 +2587,11 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>y<subscript>1</subscript></entry>
 	      <entry>y<subscript>0</subscript></entry>
 	    </row>
-	    <row id="MEDIA-BUS-FMT-VYUY10-1X20">
-	      <entry>MEDIA_BUS_FMT_VYUY10_1X20</entry>
-	      <entry>0x201b</entry>
+	    <row id="MEDIA-BUS-FMT-VYUY8-1X16">
+	      <entry>MEDIA_BUS_FMT_VYUY8_1X16</entry>
+	      <entry>0x2010</entry>
 	      <entry></entry>
-	      &dash-ent-12;
-	      <entry>v<subscript>9</subscript></entry>
-	      <entry>v<subscript>8</subscript></entry>
+	      &dash-ent-16;
 	      <entry>v<subscript>7</subscript></entry>
 	      <entry>v<subscript>6</subscript></entry>
 	      <entry>v<subscript>5</subscript></entry>
@@ -2586,8 +2600,6 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>v<subscript>2</subscript></entry>
 	      <entry>v<subscript>1</subscript></entry>
 	      <entry>v<subscript>0</subscript></entry>
-	      <entry>y<subscript>9</subscript></entry>
-	      <entry>y<subscript>8</subscript></entry>
 	      <entry>y<subscript>7</subscript></entry>
 	      <entry>y<subscript>6</subscript></entry>
 	      <entry>y<subscript>5</subscript></entry>
@@ -2601,9 +2613,7 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry></entry>
 	      <entry></entry>
 	      <entry></entry>
-	      &dash-ent-12;
-	      <entry>u<subscript>9</subscript></entry>
-	      <entry>u<subscript>8</subscript></entry>
+	      &dash-ent-16;
 	      <entry>u<subscript>7</subscript></entry>
 	      <entry>u<subscript>6</subscript></entry>
 	      <entry>u<subscript>5</subscript></entry>
@@ -2612,8 +2622,6 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>u<subscript>2</subscript></entry>
 	      <entry>u<subscript>1</subscript></entry>
 	      <entry>u<subscript>0</subscript></entry>
-	      <entry>y<subscript>9</subscript></entry>
-	      <entry>y<subscript>8</subscript></entry>
 	      <entry>y<subscript>7</subscript></entry>
 	      <entry>y<subscript>6</subscript></entry>
 	      <entry>y<subscript>5</subscript></entry>
@@ -2623,13 +2631,11 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>y<subscript>1</subscript></entry>
 	      <entry>y<subscript>0</subscript></entry>
 	    </row>
-	    <row id="MEDIA-BUS-FMT-YUYV10-1X20">
-	      <entry>MEDIA_BUS_FMT_YUYV10_1X20</entry>
-	      <entry>0x200d</entry>
+	    <row id="MEDIA-BUS-FMT-YUYV8-1X16">
+	      <entry>MEDIA_BUS_FMT_YUYV8_1X16</entry>
+	      <entry>0x2011</entry>
 	      <entry></entry>
-	      &dash-ent-12;
-	      <entry>y<subscript>9</subscript></entry>
-	      <entry>y<subscript>8</subscript></entry>
+	      &dash-ent-16;
 	      <entry>y<subscript>7</subscript></entry>
 	      <entry>y<subscript>6</subscript></entry>
 	      <entry>y<subscript>5</subscript></entry>
@@ -2638,8 +2644,6 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>y<subscript>2</subscript></entry>
 	      <entry>y<subscript>1</subscript></entry>
 	      <entry>y<subscript>0</subscript></entry>
-	      <entry>u<subscript>9</subscript></entry>
-	      <entry>u<subscript>8</subscript></entry>
 	      <entry>u<subscript>7</subscript></entry>
 	      <entry>u<subscript>6</subscript></entry>
 	      <entry>u<subscript>5</subscript></entry>
@@ -2653,9 +2657,7 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry></entry>
 	      <entry></entry>
 	      <entry></entry>
-	      &dash-ent-12;
-	      <entry>y<subscript>9</subscript></entry>
-	      <entry>y<subscript>8</subscript></entry>
+	      &dash-ent-16;
 	      <entry>y<subscript>7</subscript></entry>
 	      <entry>y<subscript>6</subscript></entry>
 	      <entry>y<subscript>5</subscript></entry>
@@ -2664,8 +2666,6 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>y<subscript>2</subscript></entry>
 	      <entry>y<subscript>1</subscript></entry>
 	      <entry>y<subscript>0</subscript></entry>
-	      <entry>v<subscript>9</subscript></entry>
-	      <entry>v<subscript>8</subscript></entry>
 	      <entry>v<subscript>7</subscript></entry>
 	      <entry>v<subscript>6</subscript></entry>
 	      <entry>v<subscript>5</subscript></entry>
@@ -2675,13 +2675,11 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>v<subscript>1</subscript></entry>
 	      <entry>v<subscript>0</subscript></entry>
 	    </row>
-	    <row id="MEDIA-BUS-FMT-YVYU10-1X20">
-	      <entry>MEDIA_BUS_FMT_YVYU10_1X20</entry>
-	      <entry>0x200e</entry>
+	    <row id="MEDIA-BUS-FMT-YVYU8-1X16">
+	      <entry>MEDIA_BUS_FMT_YVYU8_1X16</entry>
+	      <entry>0x2012</entry>
 	      <entry></entry>
-	      &dash-ent-12;
-	      <entry>y<subscript>9</subscript></entry>
-	      <entry>y<subscript>8</subscript></entry>
+	      &dash-ent-16;
 	      <entry>y<subscript>7</subscript></entry>
 	      <entry>y<subscript>6</subscript></entry>
 	      <entry>y<subscript>5</subscript></entry>
@@ -2690,8 +2688,6 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>y<subscript>2</subscript></entry>
 	      <entry>y<subscript>1</subscript></entry>
 	      <entry>y<subscript>0</subscript></entry>
-	      <entry>v<subscript>9</subscript></entry>
-	      <entry>v<subscript>8</subscript></entry>
 	      <entry>v<subscript>7</subscript></entry>
 	      <entry>v<subscript>6</subscript></entry>
 	      <entry>v<subscript>5</subscript></entry>
@@ -2705,9 +2701,51 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry></entry>
 	      <entry></entry>
 	      <entry></entry>
-	      &dash-ent-12;
-	      <entry>y<subscript>9</subscript></entry>
-	      <entry>y<subscript>8</subscript></entry>
+	      &dash-ent-16;
+	      <entry>y<subscript>7</subscript></entry>
+	      <entry>y<subscript>6</subscript></entry>
+	      <entry>y<subscript>5</subscript></entry>
+	      <entry>y<subscript>4</subscript></entry>
+	      <entry>y<subscript>3</subscript></entry>
+	      <entry>y<subscript>2</subscript></entry>
+	      <entry>y<subscript>1</subscript></entry>
+	      <entry>y<subscript>0</subscript></entry>
+	      <entry>u<subscript>7</subscript></entry>
+	      <entry>u<subscript>6</subscript></entry>
+	      <entry>u<subscript>5</subscript></entry>
+	      <entry>u<subscript>4</subscript></entry>
+	      <entry>u<subscript>3</subscript></entry>
+	      <entry>u<subscript>2</subscript></entry>
+	      <entry>u<subscript>1</subscript></entry>
+	      <entry>u<subscript>0</subscript></entry>
+	    </row>
+	    <row id="MEDIA-BUS-FMT-YDYUYDYV8-1X16">
+	      <entry>MEDIA_BUS_FMT_YDYUYDYV8_1X16</entry>
+	      <entry>0x2014</entry>
+	      <entry></entry>
+	      &dash-ent-16;
+	      <entry>y<subscript>7</subscript></entry>
+	      <entry>y<subscript>6</subscript></entry>
+	      <entry>y<subscript>5</subscript></entry>
+	      <entry>y<subscript>4</subscript></entry>
+	      <entry>y<subscript>3</subscript></entry>
+	      <entry>y<subscript>2</subscript></entry>
+	      <entry>y<subscript>1</subscript></entry>
+	      <entry>y<subscript>0</subscript></entry>
+	      <entry>d</entry>
+	      <entry>d</entry>
+	      <entry>d</entry>
+	      <entry>d</entry>
+	      <entry>d</entry>
+	      <entry>d</entry>
+	      <entry>d</entry>
+	      <entry>d</entry>
+	    </row>
+	    <row>
+	      <entry></entry>
+	      <entry></entry>
+	      <entry></entry>
+	      &dash-ent-16;
 	      <entry>y<subscript>7</subscript></entry>
 	      <entry>y<subscript>6</subscript></entry>
 	      <entry>y<subscript>5</subscript></entry>
@@ -2716,8 +2754,6 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>y<subscript>2</subscript></entry>
 	      <entry>y<subscript>1</subscript></entry>
 	      <entry>y<subscript>0</subscript></entry>
-	      <entry>u<subscript>9</subscript></entry>
-	      <entry>u<subscript>8</subscript></entry>
 	      <entry>u<subscript>7</subscript></entry>
 	      <entry>u<subscript>6</subscript></entry>
 	      <entry>u<subscript>5</subscript></entry>
@@ -2727,14 +2763,11 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>u<subscript>1</subscript></entry>
 	      <entry>u<subscript>0</subscript></entry>
 	    </row>
-	    <row id="MEDIA-BUS-FMT-YUV10-1X30">
-	      <entry>MEDIA_BUS_FMT_YUV10_1X30</entry>
-	      <entry>0x2016</entry>
+	    <row>
 	      <entry></entry>
-	      <entry>-</entry>
-	      <entry>-</entry>
-	      <entry>y<subscript>9</subscript></entry>
-	      <entry>y<subscript>8</subscript></entry>
+	      <entry></entry>
+	      <entry></entry>
+	      &dash-ent-16;
 	      <entry>y<subscript>7</subscript></entry>
 	      <entry>y<subscript>6</subscript></entry>
 	      <entry>y<subscript>5</subscript></entry>
@@ -2743,39 +2776,20 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>y<subscript>2</subscript></entry>
 	      <entry>y<subscript>1</subscript></entry>
 	      <entry>y<subscript>0</subscript></entry>
-	      <entry>u<subscript>9</subscript></entry>
-	      <entry>u<subscript>8</subscript></entry>
-	      <entry>u<subscript>7</subscript></entry>
-	      <entry>u<subscript>6</subscript></entry>
-	      <entry>u<subscript>5</subscript></entry>
-	      <entry>u<subscript>4</subscript></entry>
-	      <entry>u<subscript>3</subscript></entry>
-	      <entry>u<subscript>2</subscript></entry>
-	      <entry>u<subscript>1</subscript></entry>
-	      <entry>u<subscript>0</subscript></entry>
-	      <entry>v<subscript>9</subscript></entry>
-	      <entry>v<subscript>8</subscript></entry>
-	      <entry>v<subscript>7</subscript></entry>
-	      <entry>v<subscript>6</subscript></entry>
-	      <entry>v<subscript>5</subscript></entry>
-	      <entry>v<subscript>4</subscript></entry>
-	      <entry>v<subscript>3</subscript></entry>
-	      <entry>v<subscript>2</subscript></entry>
-	      <entry>v<subscript>1</subscript></entry>
-	      <entry>v<subscript>0</subscript></entry>
+	      <entry>d</entry>
+	      <entry>d</entry>
+	      <entry>d</entry>
+	      <entry>d</entry>
+	      <entry>d</entry>
+	      <entry>d</entry>
+	      <entry>d</entry>
+	      <entry>d</entry>
 	    </row>
-	    <row id="MEDIA-BUS-FMT-AYUV8-1X32">
-	      <entry>MEDIA_BUS_FMT_AYUV8_1X32</entry>
-	      <entry>0x2017</entry>
+	    <row>
 	      <entry></entry>
-	      <entry>a<subscript>7</subscript></entry>
-	      <entry>a<subscript>6</subscript></entry>
-	      <entry>a<subscript>5</subscript></entry>
-	      <entry>a<subscript>4</subscript></entry>
-	      <entry>a<subscript>3</subscript></entry>
-	      <entry>a<subscript>2</subscript></entry>
-	      <entry>a<subscript>1</subscript></entry>
-	      <entry>a<subscript>0</subscript></entry>
+	      <entry></entry>
+	      <entry></entry>
+	      &dash-ent-16;
 	      <entry>y<subscript>7</subscript></entry>
 	      <entry>y<subscript>6</subscript></entry>
 	      <entry>y<subscript>5</subscript></entry>
@@ -2784,14 +2798,6 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>y<subscript>2</subscript></entry>
 	      <entry>y<subscript>1</subscript></entry>
 	      <entry>y<subscript>0</subscript></entry>
-	      <entry>u<subscript>7</subscript></entry>
-	      <entry>u<subscript>6</subscript></entry>
-	      <entry>u<subscript>5</subscript></entry>
-	      <entry>u<subscript>4</subscript></entry>
-	      <entry>u<subscript>3</subscript></entry>
-	      <entry>u<subscript>2</subscript></entry>
-	      <entry>u<subscript>1</subscript></entry>
-	      <entry>u<subscript>0</subscript></entry>
 	      <entry>v<subscript>7</subscript></entry>
 	      <entry>v<subscript>6</subscript></entry>
 	      <entry>v<subscript>5</subscript></entry>
@@ -2801,13 +2807,11 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>v<subscript>1</subscript></entry>
 	      <entry>v<subscript>0</subscript></entry>
 	    </row>
-	    <row id="MEDIA-BUS-FMT-UYVY12-2X12">
-	      <entry>MEDIA_BUS_FMT_UYVY12_2X12</entry>
-	      <entry>0x201c</entry>
+	    <row id="MEDIA-BUS-FMT-UYVY10-1X20">
+	      <entry>MEDIA_BUS_FMT_UYVY10_1X20</entry>
+	      <entry>0x201a</entry>
 	      <entry></entry>
-	      &dash-ent-20;
-	      <entry>u<subscript>11</subscript></entry>
-	      <entry>u<subscript>10</subscript></entry>
+	      &dash-ent-12;
 	      <entry>u<subscript>9</subscript></entry>
 	      <entry>u<subscript>8</subscript></entry>
 	      <entry>u<subscript>7</subscript></entry>
@@ -2818,14 +2822,6 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>u<subscript>2</subscript></entry>
 	      <entry>u<subscript>1</subscript></entry>
 	      <entry>u<subscript>0</subscript></entry>
-	    </row>
-	    <row>
-	      <entry></entry>
-	      <entry></entry>
-	      <entry></entry>
-	      &dash-ent-20;
-	      <entry>y<subscript>11</subscript></entry>
-	      <entry>y<subscript>10</subscript></entry>
 	      <entry>y<subscript>9</subscript></entry>
 	      <entry>y<subscript>8</subscript></entry>
 	      <entry>y<subscript>7</subscript></entry>
@@ -2841,9 +2837,7 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry></entry>
 	      <entry></entry>
 	      <entry></entry>
-	      &dash-ent-20;
-	      <entry>v<subscript>11</subscript></entry>
-	      <entry>v<subscript>10</subscript></entry>
+	      &dash-ent-12;
 	      <entry>v<subscript>9</subscript></entry>
 	      <entry>v<subscript>8</subscript></entry>
 	      <entry>v<subscript>7</subscript></entry>
@@ -2854,14 +2848,6 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>v<subscript>2</subscript></entry>
 	      <entry>v<subscript>1</subscript></entry>
 	      <entry>v<subscript>0</subscript></entry>
-	    </row>
-	    <row>
-	      <entry></entry>
-	      <entry></entry>
-	      <entry></entry>
-	      &dash-ent-20;
-	      <entry>y<subscript>11</subscript></entry>
-	      <entry>y<subscript>10</subscript></entry>
 	      <entry>y<subscript>9</subscript></entry>
 	      <entry>y<subscript>8</subscript></entry>
 	      <entry>y<subscript>7</subscript></entry>
@@ -2873,13 +2859,11 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>y<subscript>1</subscript></entry>
 	      <entry>y<subscript>0</subscript></entry>
 	    </row>
-	    <row id="MEDIA-BUS-FMT-VYUY12-2X12">
-	      <entry>MEDIA_BUS_FMT_VYUY12_2X12</entry>
-	      <entry>0x201d</entry>
+	    <row id="MEDIA-BUS-FMT-VYUY10-1X20">
+	      <entry>MEDIA_BUS_FMT_VYUY10_1X20</entry>
+	      <entry>0x201b</entry>
 	      <entry></entry>
-	      &dash-ent-20;
-	      <entry>v<subscript>11</subscript></entry>
-	      <entry>v<subscript>10</subscript></entry>
+	      &dash-ent-12;
 	      <entry>v<subscript>9</subscript></entry>
 	      <entry>v<subscript>8</subscript></entry>
 	      <entry>v<subscript>7</subscript></entry>
@@ -2890,14 +2874,6 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>v<subscript>2</subscript></entry>
 	      <entry>v<subscript>1</subscript></entry>
 	      <entry>v<subscript>0</subscript></entry>
-	    </row>
-	    <row>
-	      <entry></entry>
-	      <entry></entry>
-	      <entry></entry>
-	      &dash-ent-20;
-	      <entry>y<subscript>11</subscript></entry>
-	      <entry>y<subscript>10</subscript></entry>
 	      <entry>y<subscript>9</subscript></entry>
 	      <entry>y<subscript>8</subscript></entry>
 	      <entry>y<subscript>7</subscript></entry>
@@ -2913,9 +2889,7 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry></entry>
 	      <entry></entry>
 	      <entry></entry>
-	      &dash-ent-20;
-	      <entry>u<subscript>11</subscript></entry>
-	      <entry>u<subscript>10</subscript></entry>
+	      &dash-ent-12;
 	      <entry>u<subscript>9</subscript></entry>
 	      <entry>u<subscript>8</subscript></entry>
 	      <entry>u<subscript>7</subscript></entry>
@@ -2926,14 +2900,6 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>u<subscript>2</subscript></entry>
 	      <entry>u<subscript>1</subscript></entry>
 	      <entry>u<subscript>0</subscript></entry>
-	    </row>
-	    <row>
-	      <entry></entry>
-	      <entry></entry>
-	      <entry></entry>
-	      &dash-ent-20;
-	      <entry>y<subscript>11</subscript></entry>
-	      <entry>y<subscript>10</subscript></entry>
 	      <entry>y<subscript>9</subscript></entry>
 	      <entry>y<subscript>8</subscript></entry>
 	      <entry>y<subscript>7</subscript></entry>
@@ -2945,13 +2911,11 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>y<subscript>1</subscript></entry>
 	      <entry>y<subscript>0</subscript></entry>
 	    </row>
-	    <row id="MEDIA-BUS-FMT-YUYV12-2X12">
-	      <entry>MEDIA_BUS_FMT_YUYV12_2X12</entry>
-	      <entry>0x201e</entry>
+	    <row id="MEDIA-BUS-FMT-YUYV10-1X20">
+	      <entry>MEDIA_BUS_FMT_YUYV10_1X20</entry>
+	      <entry>0x200d</entry>
 	      <entry></entry>
-	      &dash-ent-20;
-	      <entry>y<subscript>11</subscript></entry>
-	      <entry>y<subscript>10</subscript></entry>
+	      &dash-ent-12;
 	      <entry>y<subscript>9</subscript></entry>
 	      <entry>y<subscript>8</subscript></entry>
 	      <entry>y<subscript>7</subscript></entry>
@@ -2962,14 +2926,6 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>y<subscript>2</subscript></entry>
 	      <entry>y<subscript>1</subscript></entry>
 	      <entry>y<subscript>0</subscript></entry>
-	    </row>
-	    <row>
-	      <entry></entry>
-	      <entry></entry>
-	      <entry></entry>
-	      &dash-ent-20;
-	      <entry>u<subscript>11</subscript></entry>
-	      <entry>u<subscript>10</subscript></entry>
 	      <entry>u<subscript>9</subscript></entry>
 	      <entry>u<subscript>8</subscript></entry>
 	      <entry>u<subscript>7</subscript></entry>
@@ -2985,9 +2941,7 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry></entry>
 	      <entry></entry>
 	      <entry></entry>
-	      &dash-ent-20;
-	      <entry>y<subscript>11</subscript></entry>
-	      <entry>y<subscript>10</subscript></entry>
+	      &dash-ent-12;
 	      <entry>y<subscript>9</subscript></entry>
 	      <entry>y<subscript>8</subscript></entry>
 	      <entry>y<subscript>7</subscript></entry>
@@ -2998,14 +2952,6 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>y<subscript>2</subscript></entry>
 	      <entry>y<subscript>1</subscript></entry>
 	      <entry>y<subscript>0</subscript></entry>
-	    </row>
-	    <row>
-	      <entry></entry>
-	      <entry></entry>
-	      <entry></entry>
-	      &dash-ent-20;
-	      <entry>v<subscript>11</subscript></entry>
-	      <entry>v<subscript>10</subscript></entry>
 	      <entry>v<subscript>9</subscript></entry>
 	      <entry>v<subscript>8</subscript></entry>
 	      <entry>v<subscript>7</subscript></entry>
@@ -3017,13 +2963,11 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>v<subscript>1</subscript></entry>
 	      <entry>v<subscript>0</subscript></entry>
 	    </row>
-	    <row id="MEDIA-BUS-FMT-YVYU12-2X12">
-	      <entry>MEDIA_BUS_FMT_YVYU12_2X12</entry>
-	      <entry>0x201f</entry>
+	    <row id="MEDIA-BUS-FMT-YVYU10-1X20">
+	      <entry>MEDIA_BUS_FMT_YVYU10_1X20</entry>
+	      <entry>0x200e</entry>
 	      <entry></entry>
-	      &dash-ent-20;
-	      <entry>y<subscript>11</subscript></entry>
-	      <entry>y<subscript>10</subscript></entry>
+	      &dash-ent-12;
 	      <entry>y<subscript>9</subscript></entry>
 	      <entry>y<subscript>8</subscript></entry>
 	      <entry>y<subscript>7</subscript></entry>
@@ -3034,14 +2978,6 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>y<subscript>2</subscript></entry>
 	      <entry>y<subscript>1</subscript></entry>
 	      <entry>y<subscript>0</subscript></entry>
-	    </row>
-	    <row>
-	      <entry></entry>
-	      <entry></entry>
-	      <entry></entry>
-	      &dash-ent-20;
-	      <entry>v<subscript>11</subscript></entry>
-	      <entry>v<subscript>10</subscript></entry>
 	      <entry>v<subscript>9</subscript></entry>
 	      <entry>v<subscript>8</subscript></entry>
 	      <entry>v<subscript>7</subscript></entry>
@@ -3057,9 +2993,7 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry></entry>
 	      <entry></entry>
 	      <entry></entry>
-	      &dash-ent-20;
-	      <entry>y<subscript>11</subscript></entry>
-	      <entry>y<subscript>10</subscript></entry>
+	      &dash-ent-12;
 	      <entry>y<subscript>9</subscript></entry>
 	      <entry>y<subscript>8</subscript></entry>
 	      <entry>y<subscript>7</subscript></entry>
@@ -3070,14 +3004,6 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>y<subscript>2</subscript></entry>
 	      <entry>y<subscript>1</subscript></entry>
 	      <entry>y<subscript>0</subscript></entry>
-	    </row>
-	    <row>
-	      <entry></entry>
-	      <entry></entry>
-	      <entry></entry>
-	      &dash-ent-20;
-	      <entry>u<subscript>11</subscript></entry>
-	      <entry>u<subscript>10</subscript></entry>
 	      <entry>u<subscript>9</subscript></entry>
 	      <entry>u<subscript>8</subscript></entry>
 	      <entry>u<subscript>7</subscript></entry>
@@ -3329,6 +3255,80 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>u<subscript>1</subscript></entry>
 	      <entry>u<subscript>0</subscript></entry>
 	    </row>
+	    <row id="MEDIA-BUS-FMT-YUV10-1X30">
+	      <entry>MEDIA_BUS_FMT_YUV10_1X30</entry>
+	      <entry>0x2016</entry>
+	      <entry></entry>
+	      <entry>-</entry>
+	      <entry>-</entry>
+	      <entry>y<subscript>9</subscript></entry>
+	      <entry>y<subscript>8</subscript></entry>
+	      <entry>y<subscript>7</subscript></entry>
+	      <entry>y<subscript>6</subscript></entry>
+	      <entry>y<subscript>5</subscript></entry>
+	      <entry>y<subscript>4</subscript></entry>
+	      <entry>y<subscript>3</subscript></entry>
+	      <entry>y<subscript>2</subscript></entry>
+	      <entry>y<subscript>1</subscript></entry>
+	      <entry>y<subscript>0</subscript></entry>
+	      <entry>u<subscript>9</subscript></entry>
+	      <entry>u<subscript>8</subscript></entry>
+	      <entry>u<subscript>7</subscript></entry>
+	      <entry>u<subscript>6</subscript></entry>
+	      <entry>u<subscript>5</subscript></entry>
+	      <entry>u<subscript>4</subscript></entry>
+	      <entry>u<subscript>3</subscript></entry>
+	      <entry>u<subscript>2</subscript></entry>
+	      <entry>u<subscript>1</subscript></entry>
+	      <entry>u<subscript>0</subscript></entry>
+	      <entry>v<subscript>9</subscript></entry>
+	      <entry>v<subscript>8</subscript></entry>
+	      <entry>v<subscript>7</subscript></entry>
+	      <entry>v<subscript>6</subscript></entry>
+	      <entry>v<subscript>5</subscript></entry>
+	      <entry>v<subscript>4</subscript></entry>
+	      <entry>v<subscript>3</subscript></entry>
+	      <entry>v<subscript>2</subscript></entry>
+	      <entry>v<subscript>1</subscript></entry>
+	      <entry>v<subscript>0</subscript></entry>
+	    </row>
+	    <row id="MEDIA-BUS-FMT-AYUV8-1X32">
+	      <entry>MEDIA_BUS_FMT_AYUV8_1X32</entry>
+	      <entry>0x2017</entry>
+	      <entry></entry>
+	      <entry>a<subscript>7</subscript></entry>
+	      <entry>a<subscript>6</subscript></entry>
+	      <entry>a<subscript>5</subscript></entry>
+	      <entry>a<subscript>4</subscript></entry>
+	      <entry>a<subscript>3</subscript></entry>
+	      <entry>a<subscript>2</subscript></entry>
+	      <entry>a<subscript>1</subscript></entry>
+	      <entry>a<subscript>0</subscript></entry>
+	      <entry>y<subscript>7</subscript></entry>
+	      <entry>y<subscript>6</subscript></entry>
+	      <entry>y<subscript>5</subscript></entry>
+	      <entry>y<subscript>4</subscript></entry>
+	      <entry>y<subscript>3</subscript></entry>
+	      <entry>y<subscript>2</subscript></entry>
+	      <entry>y<subscript>1</subscript></entry>
+	      <entry>y<subscript>0</subscript></entry>
+	      <entry>u<subscript>7</subscript></entry>
+	      <entry>u<subscript>6</subscript></entry>
+	      <entry>u<subscript>5</subscript></entry>
+	      <entry>u<subscript>4</subscript></entry>
+	      <entry>u<subscript>3</subscript></entry>
+	      <entry>u<subscript>2</subscript></entry>
+	      <entry>u<subscript>1</subscript></entry>
+	      <entry>u<subscript>0</subscript></entry>
+	      <entry>v<subscript>7</subscript></entry>
+	      <entry>v<subscript>6</subscript></entry>
+	      <entry>v<subscript>5</subscript></entry>
+	      <entry>v<subscript>4</subscript></entry>
+	      <entry>v<subscript>3</subscript></entry>
+	      <entry>v<subscript>2</subscript></entry>
+	      <entry>v<subscript>1</subscript></entry>
+	      <entry>v<subscript>0</subscript></entry>
+	    </row>
 	  </tbody>
 	</tgroup>
       </table>
diff --git a/include/uapi/linux/media-bus-format.h b/include/uapi/linux/media-bus-format.h
index b585bb32d25e..363a30fd8a21 100644
--- a/include/uapi/linux/media-bus-format.h
+++ b/include/uapi/linux/media-bus-format.h
@@ -67,6 +67,10 @@
 #define MEDIA_BUS_FMT_YUYV10_2X10		0x200b
 #define MEDIA_BUS_FMT_YVYU10_2X10		0x200c
 #define MEDIA_BUS_FMT_Y12_1X12			0x2013
+#define MEDIA_BUS_FMT_UYVY12_2X12		0x201c
+#define MEDIA_BUS_FMT_VYUY12_2X12		0x201d
+#define MEDIA_BUS_FMT_YUYV12_2X12		0x201e
+#define MEDIA_BUS_FMT_YVYU12_2X12		0x201f
 #define MEDIA_BUS_FMT_UYVY8_1X16		0x200f
 #define MEDIA_BUS_FMT_VYUY8_1X16		0x2010
 #define MEDIA_BUS_FMT_YUYV8_1X16		0x2011
@@ -76,16 +80,12 @@
 #define MEDIA_BUS_FMT_VYUY10_1X20		0x201b
 #define MEDIA_BUS_FMT_YUYV10_1X20		0x200d
 #define MEDIA_BUS_FMT_YVYU10_1X20		0x200e
-#define MEDIA_BUS_FMT_YUV10_1X30		0x2016
-#define MEDIA_BUS_FMT_AYUV8_1X32		0x2017
-#define MEDIA_BUS_FMT_UYVY12_2X12		0x201c
-#define MEDIA_BUS_FMT_VYUY12_2X12		0x201d
-#define MEDIA_BUS_FMT_YUYV12_2X12		0x201e
-#define MEDIA_BUS_FMT_YVYU12_2X12		0x201f
 #define MEDIA_BUS_FMT_UYVY12_1X24		0x2020
 #define MEDIA_BUS_FMT_VYUY12_1X24		0x2021
 #define MEDIA_BUS_FMT_YUYV12_1X24		0x2022
 #define MEDIA_BUS_FMT_YVYU12_1X24		0x2023
+#define MEDIA_BUS_FMT_YUV10_1X30		0x2016
+#define MEDIA_BUS_FMT_AYUV8_1X32		0x2017
 
 /* Bayer - next is	0x3019 */
 #define MEDIA_BUS_FMT_SBGGR8_1X8		0x3001
-- 
cgit 


From 2dca0551e4e2f3fc2b9beee784c7071e1e79c14b Mon Sep 17 00:00:00 2001
From: Hyun Kwon <hyun.kwon@xilinx.com>
Date: Tue, 18 Mar 2014 13:18:15 -0300
Subject: [media] v4l: Add VUY8 24 bits bus format

Add VUY8 24 bits bus format, V4L2_MBUS_FMT_VUY8_1X24.

Signed-off-by: Hyun Kwon <hyun.kwon@xilinx.com>
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/v4l/subdev-formats.xml | 30 ++++++++++++++++++++++
 include/uapi/linux/media-bus-format.h              |  3 ++-
 2 files changed, 32 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/v4l/subdev-formats.xml b/Documentation/DocBook/media/v4l/subdev-formats.xml
index 9bfd468cd524..bc8d3fb9e4a9 100644
--- a/Documentation/DocBook/media/v4l/subdev-formats.xml
+++ b/Documentation/DocBook/media/v4l/subdev-formats.xml
@@ -3015,6 +3015,36 @@ see <xref linkend="colorspaces" />.</entry>
 	      <entry>u<subscript>1</subscript></entry>
 	      <entry>u<subscript>0</subscript></entry>
 	    </row>
+	    <row id="MEDIA-BUS-FMT-VUY8-1X24">
+	      <entry>MEDIA_BUS_FMT_VUY8_1X24</entry>
+	      <entry>0x201a</entry>
+	      <entry></entry>
+	      &dash-ent-8;
+	      <entry>v<subscript>7</subscript></entry>
+	      <entry>v<subscript>6</subscript></entry>
+	      <entry>v<subscript>5</subscript></entry>
+	      <entry>v<subscript>4</subscript></entry>
+	      <entry>v<subscript>3</subscript></entry>
+	      <entry>v<subscript>2</subscript></entry>
+	      <entry>v<subscript>1</subscript></entry>
+	      <entry>v<subscript>0</subscript></entry>
+	      <entry>u<subscript>7</subscript></entry>
+	      <entry>u<subscript>6</subscript></entry>
+	      <entry>u<subscript>5</subscript></entry>
+	      <entry>u<subscript>4</subscript></entry>
+	      <entry>u<subscript>3</subscript></entry>
+	      <entry>u<subscript>2</subscript></entry>
+	      <entry>u<subscript>1</subscript></entry>
+	      <entry>u<subscript>0</subscript></entry>
+	      <entry>y<subscript>7</subscript></entry>
+	      <entry>y<subscript>6</subscript></entry>
+	      <entry>y<subscript>5</subscript></entry>
+	      <entry>y<subscript>4</subscript></entry>
+	      <entry>y<subscript>3</subscript></entry>
+	      <entry>y<subscript>2</subscript></entry>
+	      <entry>y<subscript>1</subscript></entry>
+	      <entry>y<subscript>0</subscript></entry>
+	    </row>
 	    <row id="MEDIA-BUS-FMT-UYVY12-1X24">
 	      <entry>MEDIA_BUS_FMT_UYVY12_1X24</entry>
 	      <entry>0x2020</entry>
diff --git a/include/uapi/linux/media-bus-format.h b/include/uapi/linux/media-bus-format.h
index 363a30fd8a21..d391893064a0 100644
--- a/include/uapi/linux/media-bus-format.h
+++ b/include/uapi/linux/media-bus-format.h
@@ -50,7 +50,7 @@
 #define MEDIA_BUS_FMT_ARGB8888_1X32		0x100d
 #define MEDIA_BUS_FMT_RGB888_1X32_PADHI		0x100f
 
-/* YUV (including grey) - next is	0x2024 */
+/* YUV (including grey) - next is	0x2025 */
 #define MEDIA_BUS_FMT_Y8_1X8			0x2001
 #define MEDIA_BUS_FMT_UV8_1X8			0x2015
 #define MEDIA_BUS_FMT_UYVY8_1_5X8		0x2002
@@ -80,6 +80,7 @@
 #define MEDIA_BUS_FMT_VYUY10_1X20		0x201b
 #define MEDIA_BUS_FMT_YUYV10_1X20		0x200d
 #define MEDIA_BUS_FMT_YVYU10_1X20		0x200e
+#define MEDIA_BUS_FMT_VUY8_1X24			0x2024
 #define MEDIA_BUS_FMT_UYVY12_1X24		0x2020
 #define MEDIA_BUS_FMT_VYUY12_1X24		0x2021
 #define MEDIA_BUS_FMT_YUYV12_1X24		0x2022
-- 
cgit 


From a5562f65b1371a0988b707c10c44fcc2bba56990 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Wed, 15 May 2013 11:36:56 -0300
Subject: [media] v4l: xilinx: Add Test Pattern Generator driver

The TPG generates multiple static or dynamic test patterns. The driver
currently hardcodes the pattern to the moving box pattern.

Signed-off-by: Christian Kohn <christian.kohn@xilinx.com>
Signed-off-by: Hyun Kwon <hyun.kwon@xilinx.com>
Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Michal Simek <michal.simek@xilinx.com>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 .../bindings/media/xilinx/xlnx,v-tpg.txt           |  71 ++
 MAINTAINERS                                        |   1 +
 drivers/media/platform/xilinx/Kconfig              |   7 +
 drivers/media/platform/xilinx/Makefile             |   1 +
 drivers/media/platform/xilinx/xilinx-tpg.c         | 931 +++++++++++++++++++++
 include/uapi/linux/Kbuild                          |   1 +
 include/uapi/linux/xilinx-v4l2-controls.h          |  73 ++
 7 files changed, 1085 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/media/xilinx/xlnx,v-tpg.txt
 create mode 100644 drivers/media/platform/xilinx/xilinx-tpg.c
 create mode 100644 include/uapi/linux/xilinx-v4l2-controls.h

(limited to 'include/uapi/linux')

diff --git a/Documentation/devicetree/bindings/media/xilinx/xlnx,v-tpg.txt b/Documentation/devicetree/bindings/media/xilinx/xlnx,v-tpg.txt
new file mode 100644
index 000000000000..9dd86b3db937
--- /dev/null
+++ b/Documentation/devicetree/bindings/media/xilinx/xlnx,v-tpg.txt
@@ -0,0 +1,71 @@
+Xilinx Video Test Pattern Generator (TPG)
+-----------------------------------------
+
+Required properties:
+
+- compatible: Must contain at least one of
+
+    "xlnx,v-tpg-5.0" (TPG version 5.0)
+    "xlnx,v-tpg-6.0" (TPG version 6.0)
+
+  TPG versions backward-compatible with previous versions should list all
+  compatible versions in the newer to older order.
+
+- reg: Physical base address and length of the registers set for the device.
+
+- clocks: Reference to the video core clock.
+
+- xlnx,video-format, xlnx,video-width: Video format and width, as defined in
+  video.txt.
+
+- port: Video port, using the DT bindings defined in ../video-interfaces.txt.
+  The TPG has a single output port numbered 0.
+
+Optional properties:
+
+- xlnx,vtc: A phandle referencing the Video Timing Controller that generates
+  video timings for the TPG test patterns.
+
+- timing-gpios: Specifier for a GPIO that controls the timing mux at the TPG
+  input. The GPIO active level corresponds to the selection of VTC-generated
+  video timings.
+
+The xlnx,vtc and timing-gpios properties are mandatory when the TPG is
+synthesized with two ports and forbidden when synthesized with one port.
+
+Example:
+
+	tpg_0: tpg@40050000 {
+		compatible = "xlnx,v-tpg-6.0", "xlnx,v-tpg-5.0";
+		reg = <0x40050000 0x10000>;
+		clocks = <&clkc 15>;
+
+		xlnx,vtc = <&vtc_3>;
+		timing-gpios = <&ps7_gpio_0 55 GPIO_ACTIVE_LOW>;
+
+		ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			port@0 {
+				reg = <0>;
+
+				xlnx,video-format = <XVIP_VF_YUV_422>;
+				xlnx,video-width = <8>;
+
+				tpg_in: endpoint {
+					remote-endpoint = <&adv7611_out>;
+				};
+			};
+			port@1 {
+				reg = <1>;
+
+				xlnx,video-format = <XVIP_VF_YUV_422>;
+				xlnx,video-width = <8>;
+
+				tpg1_out: endpoint {
+					remote-endpoint = <&switch_in0>;
+				};
+			}:
+		};
+	};
diff --git a/MAINTAINERS b/MAINTAINERS
index ca3b2638fcc6..30e7e38ccad0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10826,6 +10826,7 @@ T:	git git://linuxtv.org/media_tree.git
 S:	Supported
 F:	Documentation/devicetree/bindings/media/xilinx/
 F:	drivers/media/platform/xilinx/
+F:	include/uapi/linux/xilinx-v4l2-controls.h
 
 XILLYBUS DRIVER
 M:	Eli Billauer <eli.billauer@gmail.com>
diff --git a/drivers/media/platform/xilinx/Kconfig b/drivers/media/platform/xilinx/Kconfig
index 19db82343bb8..d7324c726fc2 100644
--- a/drivers/media/platform/xilinx/Kconfig
+++ b/drivers/media/platform/xilinx/Kconfig
@@ -7,6 +7,13 @@ config VIDEO_XILINX
 
 if VIDEO_XILINX
 
+config VIDEO_XILINX_TPG
+	tristate "Xilinx Video Test Pattern Generator"
+	depends on VIDEO_XILINX
+	select VIDEO_XILINX_VTC
+	---help---
+	   Driver for the Xilinx Video Test Pattern Generator
+
 config VIDEO_XILINX_VTC
 	tristate "Xilinx Video Timing Controller"
 	depends on VIDEO_XILINX
diff --git a/drivers/media/platform/xilinx/Makefile b/drivers/media/platform/xilinx/Makefile
index 6611e3228e3c..e8a0f2a9f733 100644
--- a/drivers/media/platform/xilinx/Makefile
+++ b/drivers/media/platform/xilinx/Makefile
@@ -1,4 +1,5 @@
 xilinx-video-objs += xilinx-dma.o xilinx-vip.o xilinx-vipp.o
 
 obj-$(CONFIG_VIDEO_XILINX) += xilinx-video.o
+obj-$(CONFIG_VIDEO_XILINX_TPG) += xilinx-tpg.o
 obj-$(CONFIG_VIDEO_XILINX_VTC) += xilinx-vtc.o
diff --git a/drivers/media/platform/xilinx/xilinx-tpg.c b/drivers/media/platform/xilinx/xilinx-tpg.c
new file mode 100644
index 000000000000..b5f7d5ecb7f6
--- /dev/null
+++ b/drivers/media/platform/xilinx/xilinx-tpg.c
@@ -0,0 +1,931 @@
+/*
+ * Xilinx Test Pattern Generator
+ *
+ * Copyright (C) 2013-2015 Ideas on Board
+ * Copyright (C) 2013-2015 Xilinx, Inc.
+ *
+ * Contacts: Hyun Kwon <hyun.kwon@xilinx.com>
+ *           Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/device.h>
+#include <linux/gpio/consumer.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/xilinx-v4l2-controls.h>
+
+#include <media/v4l2-async.h>
+#include <media/v4l2-ctrls.h>
+#include <media/v4l2-subdev.h>
+
+#include "xilinx-vip.h"
+#include "xilinx-vtc.h"
+
+#define XTPG_CTRL_STATUS_SLAVE_ERROR		(1 << 16)
+#define XTPG_CTRL_IRQ_SLAVE_ERROR		(1 << 16)
+
+#define XTPG_PATTERN_CONTROL			0x0100
+#define XTPG_PATTERN_MASK			(0xf << 0)
+#define XTPG_PATTERN_CONTROL_CROSS_HAIRS	(1 << 4)
+#define XTPG_PATTERN_CONTROL_MOVING_BOX		(1 << 5)
+#define XTPG_PATTERN_CONTROL_COLOR_MASK_SHIFT	6
+#define XTPG_PATTERN_CONTROL_COLOR_MASK_MASK	(0xf << 6)
+#define XTPG_PATTERN_CONTROL_STUCK_PIXEL	(1 << 9)
+#define XTPG_PATTERN_CONTROL_NOISE		(1 << 10)
+#define XTPG_PATTERN_CONTROL_MOTION		(1 << 12)
+#define XTPG_MOTION_SPEED			0x0104
+#define XTPG_CROSS_HAIRS			0x0108
+#define XTPG_CROSS_HAIRS_ROW_SHIFT		0
+#define XTPG_CROSS_HAIRS_ROW_MASK		(0xfff << 0)
+#define XTPG_CROSS_HAIRS_COLUMN_SHIFT		16
+#define XTPG_CROSS_HAIRS_COLUMN_MASK		(0xfff << 16)
+#define XTPG_ZPLATE_HOR_CONTROL			0x010c
+#define XTPG_ZPLATE_VER_CONTROL			0x0110
+#define XTPG_ZPLATE_START_SHIFT			0
+#define XTPG_ZPLATE_START_MASK			(0xffff << 0)
+#define XTPG_ZPLATE_SPEED_SHIFT			16
+#define XTPG_ZPLATE_SPEED_MASK			(0xffff << 16)
+#define XTPG_BOX_SIZE				0x0114
+#define XTPG_BOX_COLOR				0x0118
+#define XTPG_STUCK_PIXEL_THRESH			0x011c
+#define XTPG_NOISE_GAIN				0x0120
+#define XTPG_BAYER_PHASE			0x0124
+#define XTPG_BAYER_PHASE_RGGB			0
+#define XTPG_BAYER_PHASE_GRBG			1
+#define XTPG_BAYER_PHASE_GBRG			2
+#define XTPG_BAYER_PHASE_BGGR			3
+#define XTPG_BAYER_PHASE_OFF			4
+
+/*
+ * The minimum blanking value is one clock cycle for the front porch, one clock
+ * cycle for the sync pulse and one clock cycle for the back porch.
+ */
+#define XTPG_MIN_HBLANK			3
+#define XTPG_MAX_HBLANK			(XVTC_MAX_HSIZE - XVIP_MIN_WIDTH)
+#define XTPG_MIN_VBLANK			3
+#define XTPG_MAX_VBLANK			(XVTC_MAX_VSIZE - XVIP_MIN_HEIGHT)
+
+/**
+ * struct xtpg_device - Xilinx Test Pattern Generator device structure
+ * @xvip: Xilinx Video IP device
+ * @pads: media pads
+ * @npads: number of pads (1 or 2)
+ * @has_input: whether an input is connected to the sink pad
+ * @formats: active V4L2 media bus format for each pad
+ * @default_format: default V4L2 media bus format
+ * @vip_format: format information corresponding to the active format
+ * @bayer: boolean flag if TPG is set to any bayer format
+ * @ctrl_handler: control handler
+ * @hblank: horizontal blanking control
+ * @vblank: vertical blanking control
+ * @pattern: test pattern control
+ * @streaming: is the video stream active
+ * @vtc: video timing controller
+ * @vtmux_gpio: video timing mux GPIO
+ */
+struct xtpg_device {
+	struct xvip_device xvip;
+
+	struct media_pad pads[2];
+	unsigned int npads;
+	bool has_input;
+
+	struct v4l2_mbus_framefmt formats[2];
+	struct v4l2_mbus_framefmt default_format;
+	const struct xvip_video_format *vip_format;
+	bool bayer;
+
+	struct v4l2_ctrl_handler ctrl_handler;
+	struct v4l2_ctrl *hblank;
+	struct v4l2_ctrl *vblank;
+	struct v4l2_ctrl *pattern;
+	bool streaming;
+
+	struct xvtc_device *vtc;
+	struct gpio_desc *vtmux_gpio;
+};
+
+static inline struct xtpg_device *to_tpg(struct v4l2_subdev *subdev)
+{
+	return container_of(subdev, struct xtpg_device, xvip.subdev);
+}
+
+static u32 xtpg_get_bayer_phase(unsigned int code)
+{
+	switch (code) {
+	case MEDIA_BUS_FMT_SRGGB8_1X8:
+		return XTPG_BAYER_PHASE_RGGB;
+	case MEDIA_BUS_FMT_SGRBG8_1X8:
+		return XTPG_BAYER_PHASE_GRBG;
+	case MEDIA_BUS_FMT_SGBRG8_1X8:
+		return XTPG_BAYER_PHASE_GBRG;
+	case MEDIA_BUS_FMT_SBGGR8_1X8:
+		return XTPG_BAYER_PHASE_BGGR;
+	default:
+		return XTPG_BAYER_PHASE_OFF;
+	}
+}
+
+static void __xtpg_update_pattern_control(struct xtpg_device *xtpg,
+					  bool passthrough, bool pattern)
+{
+	u32 pattern_mask = (1 << (xtpg->pattern->maximum + 1)) - 1;
+
+	/*
+	 * If the TPG has no sink pad or no input connected to its sink pad
+	 * passthrough mode can't be enabled.
+	 */
+	if (xtpg->npads == 1 || !xtpg->has_input)
+		passthrough = false;
+
+	/* If passthrough mode is allowed unmask bit 0. */
+	if (passthrough)
+		pattern_mask &= ~1;
+
+	/* If test pattern mode is allowed unmask all other bits. */
+	if (pattern)
+		pattern_mask &= 1;
+
+	__v4l2_ctrl_modify_range(xtpg->pattern, 0, xtpg->pattern->maximum,
+				 pattern_mask, pattern ? 9 : 0);
+}
+
+static void xtpg_update_pattern_control(struct xtpg_device *xtpg,
+					bool passthrough, bool pattern)
+{
+	mutex_lock(xtpg->ctrl_handler.lock);
+	__xtpg_update_pattern_control(xtpg, passthrough, pattern);
+	mutex_unlock(xtpg->ctrl_handler.lock);
+}
+
+/* -----------------------------------------------------------------------------
+ * V4L2 Subdevice Video Operations
+ */
+
+static int xtpg_s_stream(struct v4l2_subdev *subdev, int enable)
+{
+	struct xtpg_device *xtpg = to_tpg(subdev);
+	unsigned int width = xtpg->formats[0].width;
+	unsigned int height = xtpg->formats[0].height;
+	bool passthrough;
+	u32 bayer_phase;
+
+	if (!enable) {
+		xvip_stop(&xtpg->xvip);
+		if (xtpg->vtc)
+			xvtc_generator_stop(xtpg->vtc);
+
+		xtpg_update_pattern_control(xtpg, true, true);
+		xtpg->streaming = false;
+		return 0;
+	}
+
+	xvip_set_frame_size(&xtpg->xvip, &xtpg->formats[0]);
+
+	if (xtpg->vtc) {
+		struct xvtc_config config = {
+			.hblank_start = width,
+			.hsync_start = width + 1,
+			.vblank_start = height,
+			.vsync_start = height + 1,
+		};
+		unsigned int htotal;
+		unsigned int vtotal;
+
+		htotal = min_t(unsigned int, XVTC_MAX_HSIZE,
+			       v4l2_ctrl_g_ctrl(xtpg->hblank) + width);
+		vtotal = min_t(unsigned int, XVTC_MAX_VSIZE,
+			       v4l2_ctrl_g_ctrl(xtpg->vblank) + height);
+
+		config.hsync_end = htotal - 1;
+		config.hsize = htotal;
+		config.vsync_end = vtotal - 1;
+		config.vsize = vtotal;
+
+		xvtc_generator_start(xtpg->vtc, &config);
+	}
+
+	/*
+	 * Configure the bayer phase and video timing mux based on the
+	 * operation mode (passthrough or test pattern generation). The test
+	 * pattern can be modified by the control set handler, we thus need to
+	 * take the control lock here to avoid races.
+	 */
+	mutex_lock(xtpg->ctrl_handler.lock);
+
+	xvip_clr_and_set(&xtpg->xvip, XTPG_PATTERN_CONTROL,
+			 XTPG_PATTERN_MASK, xtpg->pattern->cur.val);
+
+	/*
+	 * Switching between passthrough and test pattern generation modes isn't
+	 * allowed during streaming, update the control range accordingly.
+	 */
+	passthrough = xtpg->pattern->cur.val == 0;
+	__xtpg_update_pattern_control(xtpg, passthrough, !passthrough);
+
+	xtpg->streaming = true;
+
+	mutex_unlock(xtpg->ctrl_handler.lock);
+
+	/*
+	 * For TPG v5.0, the bayer phase needs to be off for the pass through
+	 * mode, otherwise the external input would be subsampled.
+	 */
+	bayer_phase = passthrough ? XTPG_BAYER_PHASE_OFF
+		    : xtpg_get_bayer_phase(xtpg->formats[0].code);
+	xvip_write(&xtpg->xvip, XTPG_BAYER_PHASE, bayer_phase);
+
+	if (xtpg->vtmux_gpio)
+		gpiod_set_value_cansleep(xtpg->vtmux_gpio, !passthrough);
+
+	xvip_start(&xtpg->xvip);
+
+	return 0;
+}
+
+/* -----------------------------------------------------------------------------
+ * V4L2 Subdevice Pad Operations
+ */
+
+static struct v4l2_mbus_framefmt *
+__xtpg_get_pad_format(struct xtpg_device *xtpg,
+		      struct v4l2_subdev_pad_config *cfg,
+		      unsigned int pad, u32 which)
+{
+	switch (which) {
+	case V4L2_SUBDEV_FORMAT_TRY:
+		return v4l2_subdev_get_try_format(&xtpg->xvip.subdev, cfg, pad);
+	case V4L2_SUBDEV_FORMAT_ACTIVE:
+		return &xtpg->formats[pad];
+	default:
+		return NULL;
+	}
+}
+
+static int xtpg_get_format(struct v4l2_subdev *subdev,
+			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_format *fmt)
+{
+	struct xtpg_device *xtpg = to_tpg(subdev);
+
+	fmt->format = *__xtpg_get_pad_format(xtpg, cfg, fmt->pad, fmt->which);
+
+	return 0;
+}
+
+static int xtpg_set_format(struct v4l2_subdev *subdev,
+			   struct v4l2_subdev_pad_config *cfg,
+			   struct v4l2_subdev_format *fmt)
+{
+	struct xtpg_device *xtpg = to_tpg(subdev);
+	struct v4l2_mbus_framefmt *__format;
+	u32 bayer_phase;
+
+	__format = __xtpg_get_pad_format(xtpg, cfg, fmt->pad, fmt->which);
+
+	/* In two pads mode the source pad format is always identical to the
+	 * sink pad format.
+	 */
+	if (xtpg->npads == 2 && fmt->pad == 1) {
+		fmt->format = *__format;
+		return 0;
+	}
+
+	/* Bayer phase is configurable at runtime */
+	if (xtpg->bayer) {
+		bayer_phase = xtpg_get_bayer_phase(fmt->format.code);
+		if (bayer_phase != XTPG_BAYER_PHASE_OFF)
+			__format->code = fmt->format.code;
+	}
+
+	xvip_set_format_size(__format, fmt);
+
+	fmt->format = *__format;
+
+	/* Propagate the format to the source pad. */
+	if (xtpg->npads == 2) {
+		__format = __xtpg_get_pad_format(xtpg, cfg, 1, fmt->which);
+		*__format = fmt->format;
+	}
+
+	return 0;
+}
+
+/* -----------------------------------------------------------------------------
+ * V4L2 Subdevice Operations
+ */
+
+static int xtpg_enum_frame_size(struct v4l2_subdev *subdev,
+				struct v4l2_subdev_pad_config *cfg,
+				struct v4l2_subdev_frame_size_enum *fse)
+{
+	struct v4l2_mbus_framefmt *format;
+
+	format = v4l2_subdev_get_try_format(subdev, cfg, fse->pad);
+
+	if (fse->index || fse->code != format->code)
+		return -EINVAL;
+
+	/* Min / max values for pad 0 is always fixed in both one and two pads
+	 * modes. In two pads mode, the source pad(= 1) size is identical to
+	 * the sink pad size */
+	if (fse->pad == 0) {
+		fse->min_width = XVIP_MIN_WIDTH;
+		fse->max_width = XVIP_MAX_WIDTH;
+		fse->min_height = XVIP_MIN_HEIGHT;
+		fse->max_height = XVIP_MAX_HEIGHT;
+	} else {
+		fse->min_width = format->width;
+		fse->max_width = format->width;
+		fse->min_height = format->height;
+		fse->max_height = format->height;
+	}
+
+	return 0;
+}
+
+static int xtpg_open(struct v4l2_subdev *subdev, struct v4l2_subdev_fh *fh)
+{
+	struct xtpg_device *xtpg = to_tpg(subdev);
+	struct v4l2_mbus_framefmt *format;
+
+	format = v4l2_subdev_get_try_format(subdev, fh->pad, 0);
+	*format = xtpg->default_format;
+
+	if (xtpg->npads == 2) {
+		format = v4l2_subdev_get_try_format(subdev, fh->pad, 1);
+		*format = xtpg->default_format;
+	}
+
+	return 0;
+}
+
+static int xtpg_close(struct v4l2_subdev *subdev, struct v4l2_subdev_fh *fh)
+{
+	return 0;
+}
+
+static int xtpg_s_ctrl(struct v4l2_ctrl *ctrl)
+{
+	struct xtpg_device *xtpg = container_of(ctrl->handler,
+						struct xtpg_device,
+						ctrl_handler);
+	switch (ctrl->id) {
+	case V4L2_CID_TEST_PATTERN:
+		xvip_clr_and_set(&xtpg->xvip, XTPG_PATTERN_CONTROL,
+				 XTPG_PATTERN_MASK, ctrl->val);
+		return 0;
+	case V4L2_CID_XILINX_TPG_CROSS_HAIRS:
+		xvip_clr_or_set(&xtpg->xvip, XTPG_PATTERN_CONTROL,
+				XTPG_PATTERN_CONTROL_CROSS_HAIRS, ctrl->val);
+		return 0;
+	case V4L2_CID_XILINX_TPG_MOVING_BOX:
+		xvip_clr_or_set(&xtpg->xvip, XTPG_PATTERN_CONTROL,
+				XTPG_PATTERN_CONTROL_MOVING_BOX, ctrl->val);
+		return 0;
+	case V4L2_CID_XILINX_TPG_COLOR_MASK:
+		xvip_clr_and_set(&xtpg->xvip, XTPG_PATTERN_CONTROL,
+				 XTPG_PATTERN_CONTROL_COLOR_MASK_MASK,
+				 ctrl->val <<
+				 XTPG_PATTERN_CONTROL_COLOR_MASK_SHIFT);
+		return 0;
+	case V4L2_CID_XILINX_TPG_STUCK_PIXEL:
+		xvip_clr_or_set(&xtpg->xvip, XTPG_PATTERN_CONTROL,
+				XTPG_PATTERN_CONTROL_STUCK_PIXEL, ctrl->val);
+		return 0;
+	case V4L2_CID_XILINX_TPG_NOISE:
+		xvip_clr_or_set(&xtpg->xvip, XTPG_PATTERN_CONTROL,
+				XTPG_PATTERN_CONTROL_NOISE, ctrl->val);
+		return 0;
+	case V4L2_CID_XILINX_TPG_MOTION:
+		xvip_clr_or_set(&xtpg->xvip, XTPG_PATTERN_CONTROL,
+				XTPG_PATTERN_CONTROL_MOTION, ctrl->val);
+		return 0;
+	case V4L2_CID_XILINX_TPG_MOTION_SPEED:
+		xvip_write(&xtpg->xvip, XTPG_MOTION_SPEED, ctrl->val);
+		return 0;
+	case V4L2_CID_XILINX_TPG_CROSS_HAIR_ROW:
+		xvip_clr_and_set(&xtpg->xvip, XTPG_CROSS_HAIRS,
+				 XTPG_CROSS_HAIRS_ROW_MASK,
+				 ctrl->val << XTPG_CROSS_HAIRS_ROW_SHIFT);
+		return 0;
+	case V4L2_CID_XILINX_TPG_CROSS_HAIR_COLUMN:
+		xvip_clr_and_set(&xtpg->xvip, XTPG_CROSS_HAIRS,
+				 XTPG_CROSS_HAIRS_COLUMN_MASK,
+				 ctrl->val << XTPG_CROSS_HAIRS_COLUMN_SHIFT);
+		return 0;
+	case V4L2_CID_XILINX_TPG_ZPLATE_HOR_START:
+		xvip_clr_and_set(&xtpg->xvip, XTPG_ZPLATE_HOR_CONTROL,
+				 XTPG_ZPLATE_START_MASK,
+				 ctrl->val << XTPG_ZPLATE_START_SHIFT);
+		return 0;
+	case V4L2_CID_XILINX_TPG_ZPLATE_HOR_SPEED:
+		xvip_clr_and_set(&xtpg->xvip, XTPG_ZPLATE_HOR_CONTROL,
+				 XTPG_ZPLATE_SPEED_MASK,
+				 ctrl->val << XTPG_ZPLATE_SPEED_SHIFT);
+		return 0;
+	case V4L2_CID_XILINX_TPG_ZPLATE_VER_START:
+		xvip_clr_and_set(&xtpg->xvip, XTPG_ZPLATE_VER_CONTROL,
+				 XTPG_ZPLATE_START_MASK,
+				 ctrl->val << XTPG_ZPLATE_START_SHIFT);
+		return 0;
+	case V4L2_CID_XILINX_TPG_ZPLATE_VER_SPEED:
+		xvip_clr_and_set(&xtpg->xvip, XTPG_ZPLATE_VER_CONTROL,
+				 XTPG_ZPLATE_SPEED_MASK,
+				 ctrl->val << XTPG_ZPLATE_SPEED_SHIFT);
+		return 0;
+	case V4L2_CID_XILINX_TPG_BOX_SIZE:
+		xvip_write(&xtpg->xvip, XTPG_BOX_SIZE, ctrl->val);
+		return 0;
+	case V4L2_CID_XILINX_TPG_BOX_COLOR:
+		xvip_write(&xtpg->xvip, XTPG_BOX_COLOR, ctrl->val);
+		return 0;
+	case V4L2_CID_XILINX_TPG_STUCK_PIXEL_THRESH:
+		xvip_write(&xtpg->xvip, XTPG_STUCK_PIXEL_THRESH, ctrl->val);
+		return 0;
+	case V4L2_CID_XILINX_TPG_NOISE_GAIN:
+		xvip_write(&xtpg->xvip, XTPG_NOISE_GAIN, ctrl->val);
+		return 0;
+	}
+
+	return 0;
+}
+
+static const struct v4l2_ctrl_ops xtpg_ctrl_ops = {
+	.s_ctrl	= xtpg_s_ctrl,
+};
+
+static struct v4l2_subdev_core_ops xtpg_core_ops = {
+};
+
+static struct v4l2_subdev_video_ops xtpg_video_ops = {
+	.s_stream = xtpg_s_stream,
+};
+
+static struct v4l2_subdev_pad_ops xtpg_pad_ops = {
+	.enum_mbus_code		= xvip_enum_mbus_code,
+	.enum_frame_size	= xtpg_enum_frame_size,
+	.get_fmt		= xtpg_get_format,
+	.set_fmt		= xtpg_set_format,
+};
+
+static struct v4l2_subdev_ops xtpg_ops = {
+	.core   = &xtpg_core_ops,
+	.video  = &xtpg_video_ops,
+	.pad    = &xtpg_pad_ops,
+};
+
+static const struct v4l2_subdev_internal_ops xtpg_internal_ops = {
+	.open	= xtpg_open,
+	.close	= xtpg_close,
+};
+
+/*
+ * Control Config
+ */
+
+static const char *const xtpg_pattern_strings[] = {
+	"Passthrough",
+	"Horizontal Ramp",
+	"Vertical Ramp",
+	"Temporal Ramp",
+	"Solid Red",
+	"Solid Green",
+	"Solid Blue",
+	"Solid Black",
+	"Solid White",
+	"Color Bars",
+	"Zone Plate",
+	"Tartan Color Bars",
+	"Cross Hatch",
+	"None",
+	"Vertical/Horizontal Ramps",
+	"Black/White Checker Board",
+};
+
+static struct v4l2_ctrl_config xtpg_ctrls[] = {
+	{
+		.ops	= &xtpg_ctrl_ops,
+		.id	= V4L2_CID_XILINX_TPG_CROSS_HAIRS,
+		.name	= "Test Pattern: Cross Hairs",
+		.type	= V4L2_CTRL_TYPE_BOOLEAN,
+		.min	= false,
+		.max	= true,
+		.step	= 1,
+		.def	= 0,
+	}, {
+		.ops	= &xtpg_ctrl_ops,
+		.id	= V4L2_CID_XILINX_TPG_MOVING_BOX,
+		.name	= "Test Pattern: Moving Box",
+		.type	= V4L2_CTRL_TYPE_BOOLEAN,
+		.min	= false,
+		.max	= true,
+		.step	= 1,
+		.def	= 0,
+	}, {
+		.ops	= &xtpg_ctrl_ops,
+		.id	= V4L2_CID_XILINX_TPG_COLOR_MASK,
+		.name	= "Test Pattern: Color Mask",
+		.type	= V4L2_CTRL_TYPE_BITMASK,
+		.min	= 0,
+		.max	= 0xf,
+		.def	= 0,
+	}, {
+		.ops	= &xtpg_ctrl_ops,
+		.id	= V4L2_CID_XILINX_TPG_STUCK_PIXEL,
+		.name	= "Test Pattern: Stuck Pixel",
+		.type	= V4L2_CTRL_TYPE_BOOLEAN,
+		.min	= false,
+		.max	= true,
+		.step	= 1,
+		.def	= 0,
+	}, {
+		.ops	= &xtpg_ctrl_ops,
+		.id	= V4L2_CID_XILINX_TPG_NOISE,
+		.name	= "Test Pattern: Noise",
+		.type	= V4L2_CTRL_TYPE_BOOLEAN,
+		.min	= false,
+		.max	= true,
+		.step	= 1,
+		.def	= 0,
+	}, {
+		.ops	= &xtpg_ctrl_ops,
+		.id	= V4L2_CID_XILINX_TPG_MOTION,
+		.name	= "Test Pattern: Motion",
+		.type	= V4L2_CTRL_TYPE_BOOLEAN,
+		.min	= false,
+		.max	= true,
+		.step	= 1,
+		.def	= 0,
+	}, {
+		.ops	= &xtpg_ctrl_ops,
+		.id	= V4L2_CID_XILINX_TPG_MOTION_SPEED,
+		.name	= "Test Pattern: Motion Speed",
+		.type	= V4L2_CTRL_TYPE_INTEGER,
+		.min	= 0,
+		.max	= (1 << 8) - 1,
+		.step	= 1,
+		.def	= 4,
+		.flags	= V4L2_CTRL_FLAG_SLIDER,
+	}, {
+		.ops	= &xtpg_ctrl_ops,
+		.id	= V4L2_CID_XILINX_TPG_CROSS_HAIR_ROW,
+		.name	= "Test Pattern: Cross Hairs Row",
+		.type	= V4L2_CTRL_TYPE_INTEGER,
+		.min	= 0,
+		.max	= (1 << 12) - 1,
+		.step	= 1,
+		.def	= 0x64,
+		.flags	= V4L2_CTRL_FLAG_SLIDER,
+	}, {
+		.ops	= &xtpg_ctrl_ops,
+		.id	= V4L2_CID_XILINX_TPG_CROSS_HAIR_COLUMN,
+		.name	= "Test Pattern: Cross Hairs Column",
+		.type	= V4L2_CTRL_TYPE_INTEGER,
+		.min	= 0,
+		.max	= (1 << 12) - 1,
+		.step	= 1,
+		.def	= 0x64,
+		.flags	= V4L2_CTRL_FLAG_SLIDER,
+	}, {
+		.ops	= &xtpg_ctrl_ops,
+		.id	= V4L2_CID_XILINX_TPG_ZPLATE_HOR_START,
+		.name	= "Test Pattern: Zplate Horizontal Start Pos",
+		.type	= V4L2_CTRL_TYPE_INTEGER,
+		.min	= 0,
+		.max	= (1 << 16) - 1,
+		.step	= 1,
+		.def	= 0x1e,
+		.flags	= V4L2_CTRL_FLAG_SLIDER,
+	}, {
+		.ops	= &xtpg_ctrl_ops,
+		.id	= V4L2_CID_XILINX_TPG_ZPLATE_HOR_SPEED,
+		.name	= "Test Pattern: Zplate Horizontal Speed",
+		.type	= V4L2_CTRL_TYPE_INTEGER,
+		.min	= 0,
+		.max	= (1 << 16) - 1,
+		.step	= 1,
+		.def	= 0,
+		.flags	= V4L2_CTRL_FLAG_SLIDER,
+	}, {
+		.ops	= &xtpg_ctrl_ops,
+		.id	= V4L2_CID_XILINX_TPG_ZPLATE_VER_START,
+		.name	= "Test Pattern: Zplate Vertical Start Pos",
+		.type	= V4L2_CTRL_TYPE_INTEGER,
+		.min	= 0,
+		.max	= (1 << 16) - 1,
+		.step	= 1,
+		.def	= 1,
+		.flags	= V4L2_CTRL_FLAG_SLIDER,
+	}, {
+		.ops	= &xtpg_ctrl_ops,
+		.id	= V4L2_CID_XILINX_TPG_ZPLATE_VER_SPEED,
+		.name	= "Test Pattern: Zplate Vertical Speed",
+		.type	= V4L2_CTRL_TYPE_INTEGER,
+		.min	= 0,
+		.max	= (1 << 16) - 1,
+		.step	= 1,
+		.def	= 0,
+		.flags	= V4L2_CTRL_FLAG_SLIDER,
+	}, {
+		.ops	= &xtpg_ctrl_ops,
+		.id	= V4L2_CID_XILINX_TPG_BOX_SIZE,
+		.name	= "Test Pattern: Box Size",
+		.type	= V4L2_CTRL_TYPE_INTEGER,
+		.min	= 0,
+		.max	= (1 << 12) - 1,
+		.step	= 1,
+		.def	= 0x32,
+		.flags	= V4L2_CTRL_FLAG_SLIDER,
+	}, {
+		.ops	= &xtpg_ctrl_ops,
+		.id	= V4L2_CID_XILINX_TPG_BOX_COLOR,
+		.name	= "Test Pattern: Box Color(RGB)",
+		.type	= V4L2_CTRL_TYPE_INTEGER,
+		.min	= 0,
+		.max	= (1 << 24) - 1,
+		.step	= 1,
+		.def	= 0,
+	}, {
+		.ops	= &xtpg_ctrl_ops,
+		.id	= V4L2_CID_XILINX_TPG_STUCK_PIXEL_THRESH,
+		.name	= "Test Pattern: Stuck Pixel threshold",
+		.type	= V4L2_CTRL_TYPE_INTEGER,
+		.min	= 0,
+		.max	= (1 << 16) - 1,
+		.step	= 1,
+		.def	= 0,
+		.flags	= V4L2_CTRL_FLAG_SLIDER,
+	}, {
+		.ops	= &xtpg_ctrl_ops,
+		.id	= V4L2_CID_XILINX_TPG_NOISE_GAIN,
+		.name	= "Test Pattern: Noise Gain",
+		.type	= V4L2_CTRL_TYPE_INTEGER,
+		.min	= 0,
+		.max	= (1 << 8) - 1,
+		.step	= 1,
+		.def	= 0,
+		.flags	= V4L2_CTRL_FLAG_SLIDER,
+	},
+};
+
+/* -----------------------------------------------------------------------------
+ * Media Operations
+ */
+
+static const struct media_entity_operations xtpg_media_ops = {
+	.link_validate = v4l2_subdev_link_validate,
+};
+
+/* -----------------------------------------------------------------------------
+ * Power Management
+ */
+
+static int __maybe_unused xtpg_pm_suspend(struct device *dev)
+{
+	struct xtpg_device *xtpg = dev_get_drvdata(dev);
+
+	xvip_suspend(&xtpg->xvip);
+
+	return 0;
+}
+
+static int __maybe_unused xtpg_pm_resume(struct device *dev)
+{
+	struct xtpg_device *xtpg = dev_get_drvdata(dev);
+
+	xvip_resume(&xtpg->xvip);
+
+	return 0;
+}
+
+/* -----------------------------------------------------------------------------
+ * Platform Device Driver
+ */
+
+static int xtpg_parse_of(struct xtpg_device *xtpg)
+{
+	struct device *dev = xtpg->xvip.dev;
+	struct device_node *node = xtpg->xvip.dev->of_node;
+	struct device_node *ports;
+	struct device_node *port;
+	unsigned int nports = 0;
+	bool has_endpoint = false;
+
+	ports = of_get_child_by_name(node, "ports");
+	if (ports == NULL)
+		ports = node;
+
+	for_each_child_of_node(ports, port) {
+		const struct xvip_video_format *format;
+		struct device_node *endpoint;
+
+		if (!port->name || of_node_cmp(port->name, "port"))
+			continue;
+
+		format = xvip_of_get_format(port);
+		if (IS_ERR(format)) {
+			dev_err(dev, "invalid format in DT");
+			return PTR_ERR(format);
+		}
+
+		/* Get and check the format description */
+		if (!xtpg->vip_format) {
+			xtpg->vip_format = format;
+		} else if (xtpg->vip_format != format) {
+			dev_err(dev, "in/out format mismatch in DT");
+			return -EINVAL;
+		}
+
+		if (nports == 0) {
+			endpoint = of_get_next_child(port, NULL);
+			if (endpoint)
+				has_endpoint = true;
+			of_node_put(endpoint);
+		}
+
+		/* Count the number of ports. */
+		nports++;
+	}
+
+	if (nports != 1 && nports != 2) {
+		dev_err(dev, "invalid number of ports %u\n", nports);
+		return -EINVAL;
+	}
+
+	xtpg->npads = nports;
+	if (nports == 2 && has_endpoint)
+		xtpg->has_input = true;
+
+	return 0;
+}
+
+static int xtpg_probe(struct platform_device *pdev)
+{
+	struct v4l2_subdev *subdev;
+	struct xtpg_device *xtpg;
+	u32 i, bayer_phase;
+	int ret;
+
+	xtpg = devm_kzalloc(&pdev->dev, sizeof(*xtpg), GFP_KERNEL);
+	if (!xtpg)
+		return -ENOMEM;
+
+	xtpg->xvip.dev = &pdev->dev;
+
+	ret = xtpg_parse_of(xtpg);
+	if (ret < 0)
+		return ret;
+
+	ret = xvip_init_resources(&xtpg->xvip);
+	if (ret < 0)
+		return ret;
+
+	xtpg->vtmux_gpio = devm_gpiod_get_optional(&pdev->dev, "timing",
+						   GPIOD_OUT_HIGH);
+	if (IS_ERR(xtpg->vtmux_gpio)) {
+		ret = PTR_ERR(xtpg->vtmux_gpio);
+		goto error_resource;
+	}
+
+	xtpg->vtc = xvtc_of_get(pdev->dev.of_node);
+	if (IS_ERR(xtpg->vtc)) {
+		ret = PTR_ERR(xtpg->vtc);
+		goto error_resource;
+	}
+
+	/* Reset and initialize the core */
+	xvip_reset(&xtpg->xvip);
+
+	/* Initialize V4L2 subdevice and media entity. Pad numbers depend on the
+	 * number of pads.
+	 */
+	if (xtpg->npads == 2) {
+		xtpg->pads[0].flags = MEDIA_PAD_FL_SINK;
+		xtpg->pads[1].flags = MEDIA_PAD_FL_SOURCE;
+	} else {
+		xtpg->pads[0].flags = MEDIA_PAD_FL_SOURCE;
+	}
+
+	/* Initialize the default format */
+	xtpg->default_format.code = xtpg->vip_format->code;
+	xtpg->default_format.field = V4L2_FIELD_NONE;
+	xtpg->default_format.colorspace = V4L2_COLORSPACE_SRGB;
+	xvip_get_frame_size(&xtpg->xvip, &xtpg->default_format);
+
+	bayer_phase = xtpg_get_bayer_phase(xtpg->vip_format->code);
+	if (bayer_phase != XTPG_BAYER_PHASE_OFF)
+		xtpg->bayer = true;
+
+	xtpg->formats[0] = xtpg->default_format;
+	if (xtpg->npads == 2)
+		xtpg->formats[1] = xtpg->default_format;
+
+	/* Initialize V4L2 subdevice and media entity */
+	subdev = &xtpg->xvip.subdev;
+	v4l2_subdev_init(subdev, &xtpg_ops);
+	subdev->dev = &pdev->dev;
+	subdev->internal_ops = &xtpg_internal_ops;
+	strlcpy(subdev->name, dev_name(&pdev->dev), sizeof(subdev->name));
+	v4l2_set_subdevdata(subdev, xtpg);
+	subdev->flags |= V4L2_SUBDEV_FL_HAS_DEVNODE;
+	subdev->entity.ops = &xtpg_media_ops;
+
+	ret = media_entity_init(&subdev->entity, xtpg->npads, xtpg->pads, 0);
+	if (ret < 0)
+		goto error;
+
+	v4l2_ctrl_handler_init(&xtpg->ctrl_handler, 3 + ARRAY_SIZE(xtpg_ctrls));
+
+	xtpg->vblank = v4l2_ctrl_new_std(&xtpg->ctrl_handler, &xtpg_ctrl_ops,
+					 V4L2_CID_VBLANK, XTPG_MIN_VBLANK,
+					 XTPG_MAX_VBLANK, 1, 100);
+	xtpg->hblank = v4l2_ctrl_new_std(&xtpg->ctrl_handler, &xtpg_ctrl_ops,
+					 V4L2_CID_HBLANK, XTPG_MIN_HBLANK,
+					 XTPG_MAX_HBLANK, 1, 100);
+	xtpg->pattern = v4l2_ctrl_new_std_menu_items(&xtpg->ctrl_handler,
+					&xtpg_ctrl_ops, V4L2_CID_TEST_PATTERN,
+					ARRAY_SIZE(xtpg_pattern_strings) - 1,
+					1, 9, xtpg_pattern_strings);
+
+	for (i = 0; i < ARRAY_SIZE(xtpg_ctrls); i++)
+		v4l2_ctrl_new_custom(&xtpg->ctrl_handler, &xtpg_ctrls[i], NULL);
+
+	if (xtpg->ctrl_handler.error) {
+		dev_err(&pdev->dev, "failed to add controls\n");
+		ret = xtpg->ctrl_handler.error;
+		goto error;
+	}
+	subdev->ctrl_handler = &xtpg->ctrl_handler;
+
+	xtpg_update_pattern_control(xtpg, true, true);
+
+	ret = v4l2_ctrl_handler_setup(&xtpg->ctrl_handler);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "failed to set controls\n");
+		goto error;
+	}
+
+	platform_set_drvdata(pdev, xtpg);
+
+	xvip_print_version(&xtpg->xvip);
+
+	ret = v4l2_async_register_subdev(subdev);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "failed to register subdev\n");
+		goto error;
+	}
+
+	return 0;
+
+error:
+	v4l2_ctrl_handler_free(&xtpg->ctrl_handler);
+	media_entity_cleanup(&subdev->entity);
+	xvtc_put(xtpg->vtc);
+error_resource:
+	xvip_cleanup_resources(&xtpg->xvip);
+	return ret;
+}
+
+static int xtpg_remove(struct platform_device *pdev)
+{
+	struct xtpg_device *xtpg = platform_get_drvdata(pdev);
+	struct v4l2_subdev *subdev = &xtpg->xvip.subdev;
+
+	v4l2_async_unregister_subdev(subdev);
+	v4l2_ctrl_handler_free(&xtpg->ctrl_handler);
+	media_entity_cleanup(&subdev->entity);
+
+	xvip_cleanup_resources(&xtpg->xvip);
+
+	return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(xtpg_pm_ops, xtpg_pm_suspend, xtpg_pm_resume);
+
+static const struct of_device_id xtpg_of_id_table[] = {
+	{ .compatible = "xlnx,v-tpg-5.0" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, xtpg_of_id_table);
+
+static struct platform_driver xtpg_driver = {
+	.driver = {
+		.name		= "xilinx-tpg",
+		.pm		= &xtpg_pm_ops,
+		.of_match_table	= xtpg_of_id_table,
+	},
+	.probe			= xtpg_probe,
+	.remove			= xtpg_remove,
+};
+
+module_platform_driver(xtpg_driver);
+
+MODULE_AUTHOR("Laurent Pinchart <laurent.pinchart@ideasonboard.com>");
+MODULE_DESCRIPTION("Xilinx Test Pattern Generator Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 68ceb97c458c..d4cfc17cc414 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -446,5 +446,6 @@ header-y += wireless.h
 header-y += x25.h
 header-y += xattr.h
 header-y += xfrm.h
+header-y += xilinx-v4l2-controls.h
 header-y += zorro.h
 header-y += zorro_ids.h
diff --git a/include/uapi/linux/xilinx-v4l2-controls.h b/include/uapi/linux/xilinx-v4l2-controls.h
new file mode 100644
index 000000000000..fb495b91e800
--- /dev/null
+++ b/include/uapi/linux/xilinx-v4l2-controls.h
@@ -0,0 +1,73 @@
+/*
+ * Xilinx Controls Header
+ *
+ * Copyright (C) 2013-2015 Ideas on Board
+ * Copyright (C) 2013-2015 Xilinx, Inc.
+ *
+ * Contacts: Hyun Kwon <hyun.kwon@xilinx.com>
+ *           Laurent Pinchart <laurent.pinchart@ideasonboard.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __UAPI_XILINX_V4L2_CONTROLS_H__
+#define __UAPI_XILINX_V4L2_CONTROLS_H__
+
+#include <linux/v4l2-controls.h>
+
+#define V4L2_CID_XILINX_OFFSET	0xc000
+#define V4L2_CID_XILINX_BASE	(V4L2_CID_USER_BASE + V4L2_CID_XILINX_OFFSET)
+
+/*
+ * Private Controls for Xilinx Video IPs
+ */
+
+/*
+ * Xilinx TPG Video IP
+ */
+
+#define V4L2_CID_XILINX_TPG			(V4L2_CID_USER_BASE + 0xc000)
+
+/* Draw cross hairs */
+#define V4L2_CID_XILINX_TPG_CROSS_HAIRS		(V4L2_CID_XILINX_TPG + 1)
+/* Enable a moving box */
+#define V4L2_CID_XILINX_TPG_MOVING_BOX		(V4L2_CID_XILINX_TPG + 2)
+/* Mask out a color component */
+#define V4L2_CID_XILINX_TPG_COLOR_MASK		(V4L2_CID_XILINX_TPG + 3)
+/* Enable a stuck pixel feature */
+#define V4L2_CID_XILINX_TPG_STUCK_PIXEL		(V4L2_CID_XILINX_TPG + 4)
+/* Enable a noisy output */
+#define V4L2_CID_XILINX_TPG_NOISE		(V4L2_CID_XILINX_TPG + 5)
+/* Enable the motion feature */
+#define V4L2_CID_XILINX_TPG_MOTION		(V4L2_CID_XILINX_TPG + 6)
+/* Configure the motion speed of moving patterns */
+#define V4L2_CID_XILINX_TPG_MOTION_SPEED	(V4L2_CID_XILINX_TPG + 7)
+/* The row of horizontal cross hair location */
+#define V4L2_CID_XILINX_TPG_CROSS_HAIR_ROW	(V4L2_CID_XILINX_TPG + 8)
+/* The colum of vertical cross hair location */
+#define V4L2_CID_XILINX_TPG_CROSS_HAIR_COLUMN	(V4L2_CID_XILINX_TPG + 9)
+/* Set starting point of sine wave for horizontal component */
+#define V4L2_CID_XILINX_TPG_ZPLATE_HOR_START	(V4L2_CID_XILINX_TPG + 10)
+/* Set speed of the horizontal component */
+#define V4L2_CID_XILINX_TPG_ZPLATE_HOR_SPEED	(V4L2_CID_XILINX_TPG + 11)
+/* Set starting point of sine wave for vertical component */
+#define V4L2_CID_XILINX_TPG_ZPLATE_VER_START	(V4L2_CID_XILINX_TPG + 12)
+/* Set speed of the vertical component */
+#define V4L2_CID_XILINX_TPG_ZPLATE_VER_SPEED	(V4L2_CID_XILINX_TPG + 13)
+/* Moving box size */
+#define V4L2_CID_XILINX_TPG_BOX_SIZE		(V4L2_CID_XILINX_TPG + 14)
+/* Moving box color */
+#define V4L2_CID_XILINX_TPG_BOX_COLOR		(V4L2_CID_XILINX_TPG + 15)
+/* Upper limit count of generated stuck pixels */
+#define V4L2_CID_XILINX_TPG_STUCK_PIXEL_THRESH	(V4L2_CID_XILINX_TPG + 16)
+/* Noise level */
+#define V4L2_CID_XILINX_TPG_NOISE_GAIN		(V4L2_CID_XILINX_TPG + 17)
+
+#endif /* __UAPI_XILINX_V4L2_CONTROLS_H__ */
-- 
cgit 


From 9b3075c59f858d64478f46a15daa6ecda3cf2318 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Wed, 1 Apr 2015 01:37:05 -0700
Subject: nfsd: add NFSEXP_PNFS to the exflags array

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/export.c                 | 1 +
 include/uapi/linux/nfsd/export.h | 3 +++
 2 files changed, 4 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 2911f519fe64..900c3ae94adc 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1158,6 +1158,7 @@ static struct flags {
 	{ NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}},
 	{ NFSEXP_NOAUTHNLM, {"insecure_locks", ""}},
 	{ NFSEXP_V4ROOT, {"v4root", ""}},
+	{ NFSEXP_PNFS, {"pnfs", ""}},
 	{ 0, {"", ""}}
 };
 
diff --git a/include/uapi/linux/nfsd/export.h b/include/uapi/linux/nfsd/export.h
index d3bd6ffec041..0df7bd5d2fb1 100644
--- a/include/uapi/linux/nfsd/export.h
+++ b/include/uapi/linux/nfsd/export.h
@@ -21,6 +21,9 @@
 
 /*
  * Export flags.
+ *
+ * Please update the expflags[] array in fs/nfsd/export.c when adding
+ * a new flag.
  */
 #define NFSEXP_READONLY		0x0001
 #define NFSEXP_INSECURE_PORT	0x0002
-- 
cgit 


From bcad57182425426dd4aa14deb27f97acb329f3cd Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 3 Apr 2015 20:52:24 +0200
Subject: ebpf: add skb->priority to offset map for usage in {cls, act}_bpf

This adds the ability to read out the skb->priority from an eBPF
program, so that it can be taken into account from a tc filter
or action for the use-case where the priority is not being used
to directly override the filter classification in a qdisc, but
to tag traffic otherwise for the classifier; the priority can be
assigned from various places incl. user space, in future we may
also mangle it from an eBPF program.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 1 +
 net/core/filter.c        | 7 +++++++
 2 files changed, 8 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 74aab6e0d964..0db8580f3cca 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -184,6 +184,7 @@ struct __sk_buff {
 	__u32 vlan_present;
 	__u32 vlan_tci;
 	__u32 vlan_proto;
+	__u32 priority;
 };
 
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/net/core/filter.c b/net/core/filter.c
index 444a07e4f68d..955a7d77decd 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1304,6 +1304,13 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
 				      offsetof(struct sk_buff, vlan_proto));
 		break;
 
+	case offsetof(struct __sk_buff, priority):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, priority) != 4);
+
+		*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+				      offsetof(struct sk_buff, priority));
+		break;
+
 	case offsetof(struct __sk_buff, mark):
 		return convert_skb_access(SKF_AD_MARK, dst_reg, src_reg, insn);
 
-- 
cgit 


From 91bc4822c3d61b9bb7ef66d3b77948a4f9177954 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Wed, 1 Apr 2015 17:12:13 -0700
Subject: tc: bpf: add checksum helpers

Commit 608cd71a9c7c ("tc: bpf: generalize pedit action") has added the
possibility to mangle packet data to BPF programs in the tc pipeline.
This patch adds two helpers bpf_l3_csum_replace() and bpf_l4_csum_replace()
for fixing up the protocol checksums after the packet mangling.

It also adds 'flags' argument to bpf_skb_store_bytes() helper to avoid
unnecessary checksum recomputations when BPF programs adjusting l3/l4
checksums and documents all three helpers in uapi header.

Moreover, a sample program is added to show how BPF programs can make use
of the mangle and csum helpers.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h  |  38 +++++++++++++++-
 net/core/filter.c         | 108 ++++++++++++++++++++++++++++++++++++++++++++--
 samples/bpf/Makefile      |   1 +
 samples/bpf/bpf_helpers.h |   7 +++
 samples/bpf/tcbpf1_kern.c |  71 ++++++++++++++++++++++++++++++
 5 files changed, 220 insertions(+), 5 deletions(-)
 create mode 100644 samples/bpf/tcbpf1_kern.c

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0db8580f3cca..23df3e7f8e7d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -168,7 +168,43 @@ enum bpf_func_id {
 	BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */
 	BPF_FUNC_get_prandom_u32, /* u32 prandom_u32(void) */
 	BPF_FUNC_get_smp_processor_id, /* u32 raw_smp_processor_id(void) */
-	BPF_FUNC_skb_store_bytes, /* int skb_store_bytes(skb, offset, from, len) */
+
+	/**
+	 * skb_store_bytes(skb, offset, from, len, flags) - store bytes into packet
+	 * @skb: pointer to skb
+	 * @offset: offset within packet from skb->data
+	 * @from: pointer where to copy bytes from
+	 * @len: number of bytes to store into packet
+	 * @flags: bit 0 - if true, recompute skb->csum
+	 *         other bits - reserved
+	 * Return: 0 on success
+	 */
+	BPF_FUNC_skb_store_bytes,
+
+	/**
+	 * l3_csum_replace(skb, offset, from, to, flags) - recompute IP checksum
+	 * @skb: pointer to skb
+	 * @offset: offset within packet where IP checksum is located
+	 * @from: old value of header field
+	 * @to: new value of header field
+	 * @flags: bits 0-3 - size of header field
+	 *         other bits - reserved
+	 * Return: 0 on success
+	 */
+	BPF_FUNC_l3_csum_replace,
+
+	/**
+	 * l4_csum_replace(skb, offset, from, to, flags) - recompute TCP/UDP checksum
+	 * @skb: pointer to skb
+	 * @offset: offset within packet where TCP/UDP checksum is located
+	 * @from: old value of header field
+	 * @to: new value of header field
+	 * @flags: bits 0-3 - size of header field
+	 *         bit 4 - is pseudo header
+	 *         other bits - reserved
+	 * Return: 0 on success
+	 */
+	BPF_FUNC_l4_csum_replace,
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/net/core/filter.c b/net/core/filter.c
index 955a7d77decd..b669e75d2b36 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1175,7 +1175,9 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
 	return 0;
 }
 
-static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+#define BPF_RECOMPUTE_CSUM(flags)	((flags) & 1)
+
+static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
 {
 	struct sk_buff *skb = (struct sk_buff *) (long) r1;
 	unsigned int offset = (unsigned int) r2;
@@ -1192,7 +1194,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 	 *
 	 * so check for invalid 'offset' and too large 'len'
 	 */
-	if (offset > 0xffff || len > sizeof(buf))
+	if (unlikely(offset > 0xffff || len > sizeof(buf)))
 		return -EFAULT;
 
 	if (skb_cloned(skb) && !skb_clone_writable(skb, offset + len))
@@ -1202,7 +1204,8 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 	if (unlikely(!ptr))
 		return -EFAULT;
 
-	skb_postpull_rcsum(skb, ptr, len);
+	if (BPF_RECOMPUTE_CSUM(flags))
+		skb_postpull_rcsum(skb, ptr, len);
 
 	memcpy(ptr, from, len);
 
@@ -1210,7 +1213,7 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 		/* skb_store_bits cannot return -EFAULT here */
 		skb_store_bits(skb, offset, ptr, len);
 
-	if (skb->ip_summed == CHECKSUM_COMPLETE)
+	if (BPF_RECOMPUTE_CSUM(flags) && skb->ip_summed == CHECKSUM_COMPLETE)
 		skb->csum = csum_add(skb->csum, csum_partial(ptr, len, 0));
 	return 0;
 }
@@ -1223,6 +1226,99 @@ const struct bpf_func_proto bpf_skb_store_bytes_proto = {
 	.arg2_type	= ARG_ANYTHING,
 	.arg3_type	= ARG_PTR_TO_STACK,
 	.arg4_type	= ARG_CONST_STACK_SIZE,
+	.arg5_type	= ARG_ANYTHING,
+};
+
+#define BPF_HEADER_FIELD_SIZE(flags)	((flags) & 0x0f)
+#define BPF_IS_PSEUDO_HEADER(flags)	((flags) & 0x10)
+
+static u64 bpf_l3_csum_replace(u64 r1, u64 offset, u64 from, u64 to, u64 flags)
+{
+	struct sk_buff *skb = (struct sk_buff *) (long) r1;
+	__sum16 sum, *ptr;
+
+	if (unlikely(offset > 0xffff))
+		return -EFAULT;
+
+	if (skb_cloned(skb) && !skb_clone_writable(skb, offset + sizeof(sum)))
+		return -EFAULT;
+
+	ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
+	if (unlikely(!ptr))
+		return -EFAULT;
+
+	switch (BPF_HEADER_FIELD_SIZE(flags)) {
+	case 2:
+		csum_replace2(ptr, from, to);
+		break;
+	case 4:
+		csum_replace4(ptr, from, to);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (ptr == &sum)
+		/* skb_store_bits guaranteed to not return -EFAULT here */
+		skb_store_bits(skb, offset, ptr, sizeof(sum));
+
+	return 0;
+}
+
+const struct bpf_func_proto bpf_l3_csum_replace_proto = {
+	.func		= bpf_l3_csum_replace,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_ANYTHING,
+	.arg5_type	= ARG_ANYTHING,
+};
+
+static u64 bpf_l4_csum_replace(u64 r1, u64 offset, u64 from, u64 to, u64 flags)
+{
+	struct sk_buff *skb = (struct sk_buff *) (long) r1;
+	u32 is_pseudo = BPF_IS_PSEUDO_HEADER(flags);
+	__sum16 sum, *ptr;
+
+	if (unlikely(offset > 0xffff))
+		return -EFAULT;
+
+	if (skb_cloned(skb) && !skb_clone_writable(skb, offset + sizeof(sum)))
+		return -EFAULT;
+
+	ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
+	if (unlikely(!ptr))
+		return -EFAULT;
+
+	switch (BPF_HEADER_FIELD_SIZE(flags)) {
+	case 2:
+		inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
+		break;
+	case 4:
+		inet_proto_csum_replace4(ptr, skb, from, to, is_pseudo);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (ptr == &sum)
+		/* skb_store_bits guaranteed to not return -EFAULT here */
+		skb_store_bits(skb, offset, ptr, sizeof(sum));
+
+	return 0;
+}
+
+const struct bpf_func_proto bpf_l4_csum_replace_proto = {
+	.func		= bpf_l4_csum_replace,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_ANYTHING,
+	.arg5_type	= ARG_ANYTHING,
 };
 
 static const struct bpf_func_proto *
@@ -1250,6 +1346,10 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 	switch (func_id) {
 	case BPF_FUNC_skb_store_bytes:
 		return &bpf_skb_store_bytes_proto;
+	case BPF_FUNC_l3_csum_replace:
+		return &bpf_l3_csum_replace_proto;
+	case BPF_FUNC_l4_csum_replace:
+		return &bpf_l4_csum_replace_proto;
 	default:
 		return sk_filter_func_proto(func_id);
 	}
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index b5b3600dcdf5..d24f51bca465 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -17,6 +17,7 @@ sockex2-objs := bpf_load.o libbpf.o sockex2_user.o
 always := $(hostprogs-y)
 always += sockex1_kern.o
 always += sockex2_kern.o
+always += tcbpf1_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index ca0333146006..72540ec1f003 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -37,4 +37,11 @@ struct bpf_map_def {
 	unsigned int max_entries;
 };
 
+static int (*bpf_skb_store_bytes)(void *ctx, int off, void *from, int len, int flags) =
+	(void *) BPF_FUNC_skb_store_bytes;
+static int (*bpf_l3_csum_replace)(void *ctx, int off, int from, int to, int flags) =
+	(void *) BPF_FUNC_l3_csum_replace;
+static int (*bpf_l4_csum_replace)(void *ctx, int off, int from, int to, int flags) =
+	(void *) BPF_FUNC_l4_csum_replace;
+
 #endif
diff --git a/samples/bpf/tcbpf1_kern.c b/samples/bpf/tcbpf1_kern.c
new file mode 100644
index 000000000000..7cf3f42a6e39
--- /dev/null
+++ b/samples/bpf/tcbpf1_kern.c
@@ -0,0 +1,71 @@
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/if_ether.h>
+#include <uapi/linux/if_packet.h>
+#include <uapi/linux/ip.h>
+#include <uapi/linux/in.h>
+#include <uapi/linux/tcp.h>
+#include "bpf_helpers.h"
+
+/* compiler workaround */
+#define _htonl __builtin_bswap32
+
+static inline void set_dst_mac(struct __sk_buff *skb, char *mac)
+{
+	bpf_skb_store_bytes(skb, 0, mac, ETH_ALEN, 1);
+}
+
+/* use 1 below for ingress qdisc and 0 for egress */
+#if 0
+#undef ETH_HLEN
+#define ETH_HLEN 0
+#endif
+
+#define IP_CSUM_OFF (ETH_HLEN + offsetof(struct iphdr, check))
+#define TOS_OFF (ETH_HLEN + offsetof(struct iphdr, tos))
+
+static inline void set_ip_tos(struct __sk_buff *skb, __u8 new_tos)
+{
+	__u8 old_tos = load_byte(skb, TOS_OFF);
+
+	bpf_l3_csum_replace(skb, IP_CSUM_OFF, htons(old_tos), htons(new_tos), 2);
+	bpf_skb_store_bytes(skb, TOS_OFF, &new_tos, sizeof(new_tos), 0);
+}
+
+#define TCP_CSUM_OFF (ETH_HLEN + sizeof(struct iphdr) + offsetof(struct tcphdr, check))
+#define IP_SRC_OFF (ETH_HLEN + offsetof(struct iphdr, saddr))
+
+#define IS_PSEUDO 0x10
+
+static inline void set_tcp_ip_src(struct __sk_buff *skb, __u32 new_ip)
+{
+	__u32 old_ip = _htonl(load_word(skb, IP_SRC_OFF));
+
+	bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_ip, new_ip, IS_PSEUDO | sizeof(new_ip));
+	bpf_l3_csum_replace(skb, IP_CSUM_OFF, old_ip, new_ip, sizeof(new_ip));
+	bpf_skb_store_bytes(skb, IP_SRC_OFF, &new_ip, sizeof(new_ip), 0);
+}
+
+#define TCP_DPORT_OFF (ETH_HLEN + sizeof(struct iphdr) + offsetof(struct tcphdr, dest))
+static inline void set_tcp_dest_port(struct __sk_buff *skb, __u16 new_port)
+{
+	__u16 old_port = htons(load_half(skb, TCP_DPORT_OFF));
+
+	bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_port, new_port, sizeof(new_port));
+	bpf_skb_store_bytes(skb, TCP_DPORT_OFF, &new_port, sizeof(new_port), 0);
+}
+
+SEC("classifier")
+int bpf_prog1(struct __sk_buff *skb)
+{
+	__u8 proto = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
+	long *value;
+
+	if (proto == IPPROTO_TCP) {
+		set_ip_tos(skb, 8);
+		set_tcp_ip_src(skb, 0xA010101);
+		set_tcp_dest_port(skb, 5001);
+	}
+
+	return 0;
+}
+char _license[] SEC("license") = "GPL";
-- 
cgit 


From 9a9634545c7051f567096117d417e9c3be24706d Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Tue, 7 Apr 2015 11:51:53 +0200
Subject: netns: notify netns id events

With this patch, netns ids that are created and deleted are advertised into the
group RTNLGRP_NSID.

Because callers of rtnl_net_notifyid() already know the id of the peer, there is
no need to call __peernet2id() in rtnl_net_fill().

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/rtnetlink.h |  4 ++++
 net/core/net_namespace.c       | 52 +++++++++++++++++++++++++++++++++++-------
 2 files changed, 48 insertions(+), 8 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index bea910f924dd..974db03f7b1a 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -134,6 +134,8 @@ enum {
 
 	RTM_NEWNSID = 88,
 #define RTM_NEWNSID RTM_NEWNSID
+	RTM_DELNSID = 89,
+#define RTM_DELNSID RTM_DELNSID
 	RTM_GETNSID = 90,
 #define RTM_GETNSID RTM_GETNSID
 
@@ -635,6 +637,8 @@ enum rtnetlink_groups {
 #define RTNLGRP_MDB		RTNLGRP_MDB
 	RTNLGRP_MPLS_ROUTE,
 #define RTNLGRP_MPLS_ROUTE	RTNLGRP_MPLS_ROUTE
+	RTNLGRP_NSID,
+#define RTNLGRP_NSID		RTNLGRP_NSID
 	__RTNLGRP_MAX
 };
 #define RTNLGRP_MAX	(__RTNLGRP_MAX - 1)
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index be28afccfbbb..b3b5f22f0e90 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -148,9 +148,11 @@ static void ops_free_list(const struct pernet_operations *ops,
 	}
 }
 
+static void rtnl_net_notifyid(struct net *net, struct net *peer, int cmd,
+			      int id);
 static int alloc_netid(struct net *net, struct net *peer, int reqid)
 {
-	int min = 0, max = 0;
+	int min = 0, max = 0, id;
 
 	ASSERT_RTNL();
 
@@ -159,7 +161,11 @@ static int alloc_netid(struct net *net, struct net *peer, int reqid)
 		max = reqid + 1;
 	}
 
-	return idr_alloc(&net->netns_ids, peer, min, max, GFP_KERNEL);
+	id = idr_alloc(&net->netns_ids, peer, min, max, GFP_KERNEL);
+	if (id >= 0)
+		rtnl_net_notifyid(net, peer, RTM_NEWNSID, id);
+
+	return id;
 }
 
 /* This function is used by idr_for_each(). If net is equal to peer, the
@@ -359,8 +365,10 @@ static void cleanup_net(struct work_struct *work)
 		for_each_net(tmp) {
 			int id = __peernet2id(tmp, net, false);
 
-			if (id >= 0)
+			if (id >= 0) {
+				rtnl_net_notifyid(tmp, net, RTM_DELNSID, id);
 				idr_remove(&tmp->netns_ids, id);
+			}
 		}
 		idr_destroy(&net->netns_ids);
 
@@ -531,7 +539,8 @@ static int rtnl_net_get_size(void)
 }
 
 static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags,
-			 int cmd, struct net *net, struct net *peer)
+			 int cmd, struct net *net, struct net *peer,
+			 int nsid)
 {
 	struct nlmsghdr *nlh;
 	struct rtgenmsg *rth;
@@ -546,9 +555,13 @@ static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags,
 	rth = nlmsg_data(nlh);
 	rth->rtgen_family = AF_UNSPEC;
 
-	id = __peernet2id(net, peer, false);
-	if  (id < 0)
-		id = NETNSA_NSID_NOT_ASSIGNED;
+	if (nsid >= 0) {
+		id = nsid;
+	} else {
+		id = __peernet2id(net, peer, false);
+		if  (id < 0)
+			id = NETNSA_NSID_NOT_ASSIGNED;
+	}
 	if (nla_put_s32(skb, NETNSA_NSID, id))
 		goto nla_put_failure;
 
@@ -589,7 +602,7 @@ static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
 	}
 
 	err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
-			    RTM_GETNSID, net, peer);
+			    RTM_GETNSID, net, peer, -1);
 	if (err < 0)
 		goto err_out;
 
@@ -603,6 +616,29 @@ out:
 	return err;
 }
 
+static void rtnl_net_notifyid(struct net *net, struct net *peer, int cmd,
+			      int id)
+{
+	struct sk_buff *msg;
+	int err = -ENOMEM;
+
+	msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
+	if (!msg)
+		goto out;
+
+	err = rtnl_net_fill(msg, 0, 0, 0, cmd, net, peer, id);
+	if (err < 0)
+		goto err_out;
+
+	rtnl_notify(msg, net, 0, RTNLGRP_NSID, NULL, 0);
+	return;
+
+err_out:
+	nlmsg_free(msg);
+out:
+	rtnl_set_sk_err(net, RTNLGRP_NSID, err);
+}
+
 static int __init net_ns_init(void)
 {
 	struct net_generic *ng;
-- 
cgit 


From b6e5b8f1a90230f922de8af59cdaf1ad83e1ac1e Mon Sep 17 00:00:00 2001
From: Ricardo Ribalda <ricardo.ribalda@gmail.com>
Date: Fri, 20 Mar 2015 10:45:43 -0300
Subject: [media] media: New flag V4L2_CTRL_FLAG_EXECUTE_ON_WRITE

Create a new flag that represent controls which its value needs to be
passed to the driver even if it has not changed.

They typically represent actions, like triggering a flash or clearing an
error flag. So writing to such a control means some action is executed.

Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/videodev2.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 47df18f36c4b..810bade873f4 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -1457,6 +1457,7 @@ struct v4l2_querymenu {
 #define V4L2_CTRL_FLAG_WRITE_ONLY 	0x0040
 #define V4L2_CTRL_FLAG_VOLATILE		0x0080
 #define V4L2_CTRL_FLAG_HAS_PAYLOAD	0x0100
+#define V4L2_CTRL_FLAG_EXECUTE_ON_WRITE	0x0200
 
 /*  Query flags, to be ORed with the control ID */
 #define V4L2_CTRL_FLAG_NEXT_CTRL	0x80000000
-- 
cgit 


From 5ce65d1f874ddc109780fc781bb3b099bff82001 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Fri, 20 Mar 2015 14:05:03 -0300
Subject: [media] videodev2.h/v4l2-dv-timings.h: add V4L2_DV_FL_IS_CE_VIDEO
 flag

In the past the V4L2_DV_BT_STD_CEA861 standard bit was used to
determine whether the format is a CE (Consumer Electronics) format
or not. However, the 640x480p59.94 format is part of the CEA-861
standard, but it is *not* a CE video format.

Add a new flag to make this explicit. This information is needed
in order to determine the default R'G'B' encoding for the format:
for CE video this is limited range (16-235) instead of full range
(0-255).

The header with all the timings has been updated with this new
flag.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Cc: Martin Bugge <marbugge@cisco.com>
Cc: Mats Randgaard <mats.randgaard@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/v4l2-dv-timings.h | 64 ++++++++++++++++++++++--------------
 include/uapi/linux/videodev2.h       |  6 ++++
 2 files changed, 45 insertions(+), 25 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/v4l2-dv-timings.h b/include/uapi/linux/v4l2-dv-timings.h
index 6c8f159e416e..c039f1d68a09 100644
--- a/include/uapi/linux/v4l2-dv-timings.h
+++ b/include/uapi/linux/v4l2-dv-timings.h
@@ -48,14 +48,15 @@
 	.type = V4L2_DV_BT_656_1120, \
 	V4L2_INIT_BT_TIMINGS(720, 480, 1, 0, \
 		13500000, 19, 62, 57, 4, 3, 15, 4, 3, 16, \
-		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_HALF_LINE) \
+		V4L2_DV_BT_STD_CEA861, \
+		V4L2_DV_FL_HALF_LINE | V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_720X480P59_94 { \
 	.type = V4L2_DV_BT_656_1120, \
 	V4L2_INIT_BT_TIMINGS(720, 480, 0, 0, \
 		27000000, 16, 62, 60, 9, 6, 30, 0, 0, 0, \
-		V4L2_DV_BT_STD_CEA861, 0) \
+		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 /* Note: these are the nominal timings, for HDMI links this format is typically
@@ -64,14 +65,15 @@
 	.type = V4L2_DV_BT_656_1120, \
 	V4L2_INIT_BT_TIMINGS(720, 576, 1, 0, \
 		13500000, 12, 63, 69, 2, 3, 19, 2, 3, 20, \
-		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_HALF_LINE) \
+		V4L2_DV_BT_STD_CEA861, \
+		V4L2_DV_FL_HALF_LINE | V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_720X576P50 { \
 	.type = V4L2_DV_BT_656_1120, \
 	V4L2_INIT_BT_TIMINGS(720, 576, 0, 0, \
 		27000000, 12, 64, 68, 5, 5, 39, 0, 0, 0, \
-		V4L2_DV_BT_STD_CEA861, 0) \
+		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_1280X720P24 { \
@@ -88,7 +90,7 @@
 	V4L2_INIT_BT_TIMINGS(1280, 720, 0, \
 		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		74250000, 2420, 40, 220, 5, 5, 20, 0, 0, 0, \
-		V4L2_DV_BT_STD_CEA861, 0) \
+		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_1280X720P30 { \
@@ -96,7 +98,8 @@
 	V4L2_INIT_BT_TIMINGS(1280, 720, 0, \
 		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		74250000, 1760, 40, 220, 5, 5, 20, 0, 0, 0, \
-		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_CAN_REDUCE_FPS) \
+		V4L2_DV_BT_STD_CEA861, \
+		V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_1280X720P50 { \
@@ -104,7 +107,7 @@
 	V4L2_INIT_BT_TIMINGS(1280, 720, 0, \
 		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		74250000, 440, 40, 220, 5, 5, 20, 0, 0, 0, \
-		V4L2_DV_BT_STD_CEA861, 0) \
+		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_1280X720P60 { \
@@ -112,7 +115,8 @@
 	V4L2_INIT_BT_TIMINGS(1280, 720, 0, \
 		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		74250000, 110, 40, 220, 5, 5, 20, 0, 0, 0, \
-		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_CAN_REDUCE_FPS) \
+		V4L2_DV_BT_STD_CEA861, \
+		V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_1920X1080P24 { \
@@ -120,7 +124,8 @@
 	V4L2_INIT_BT_TIMINGS(1920, 1080, 0, \
 		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		74250000, 638, 44, 148, 4, 5, 36, 0, 0, 0, \
-		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_CAN_REDUCE_FPS) \
+		V4L2_DV_BT_STD_CEA861, \
+		V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_1920X1080P25 { \
@@ -128,7 +133,7 @@
 	V4L2_INIT_BT_TIMINGS(1920, 1080, 0, \
 		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		74250000, 528, 44, 148, 4, 5, 36, 0, 0, 0, \
-		V4L2_DV_BT_STD_CEA861, 0) \
+		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_1920X1080P30 { \
@@ -136,7 +141,8 @@
 	V4L2_INIT_BT_TIMINGS(1920, 1080, 0, \
 		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		74250000, 88, 44, 148, 4, 5, 36, 0, 0, 0, \
-		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_CAN_REDUCE_FPS) \
+		V4L2_DV_BT_STD_CEA861, \
+		V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_1920X1080I50 { \
@@ -144,7 +150,8 @@
 	V4L2_INIT_BT_TIMINGS(1920, 1080, 1, \
 		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		74250000, 528, 44, 148, 2, 5, 15, 2, 5, 16, \
-		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_HALF_LINE) \
+		V4L2_DV_BT_STD_CEA861, \
+		V4L2_DV_FL_HALF_LINE | V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_1920X1080P50 { \
@@ -152,7 +159,7 @@
 	V4L2_INIT_BT_TIMINGS(1920, 1080, 0, \
 		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		148500000, 528, 44, 148, 4, 5, 36, 0, 0, 0, \
-		V4L2_DV_BT_STD_CEA861, 0) \
+		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_1920X1080I60 { \
@@ -161,7 +168,8 @@
 		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		74250000, 88, 44, 148, 2, 5, 15, 2, 5, 16, \
 		V4L2_DV_BT_STD_CEA861, \
-		V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_HALF_LINE) \
+		V4L2_DV_FL_CAN_REDUCE_FPS | \
+		V4L2_DV_FL_HALF_LINE | V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_1920X1080P60 { \
@@ -170,77 +178,83 @@
 		V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \
 		148500000, 88, 44, 148, 4, 5, 36, 0, 0, 0, \
 		V4L2_DV_BT_STD_DMT | V4L2_DV_BT_STD_CEA861, \
-		V4L2_DV_FL_CAN_REDUCE_FPS) \
+		V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_3840X2160P24 { \
 	.type = V4L2_DV_BT_656_1120, \
 	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
 		297000000, 1276, 88, 296, 8, 10, 72, 0, 0, 0, \
-		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_CAN_REDUCE_FPS) \
+		V4L2_DV_BT_STD_CEA861, \
+		V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_3840X2160P25 { \
 	.type = V4L2_DV_BT_656_1120, \
 	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
 		297000000, 1056, 88, 296, 8, 10, 72, 0, 0, 0, \
-		V4L2_DV_BT_STD_CEA861, 0) \
+		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_3840X2160P30 { \
 	.type = V4L2_DV_BT_656_1120, \
 	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
 		297000000, 176, 88, 296, 8, 10, 72, 0, 0, 0, \
-		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_CAN_REDUCE_FPS) \
+		V4L2_DV_BT_STD_CEA861, \
+		V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_3840X2160P50 { \
 	.type = V4L2_DV_BT_656_1120, \
 	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
 		594000000, 1056, 88, 296, 8, 10, 72, 0, 0, 0, \
-		V4L2_DV_BT_STD_CEA861, 0) \
+		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_3840X2160P60 { \
 	.type = V4L2_DV_BT_656_1120, \
 	V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
 		594000000, 176, 88, 296, 8, 10, 72, 0, 0, 0, \
-		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_CAN_REDUCE_FPS) \
+		V4L2_DV_BT_STD_CEA861, \
+		V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_4096X2160P24 { \
 	.type = V4L2_DV_BT_656_1120, \
 	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
 		297000000, 1020, 88, 296, 8, 10, 72, 0, 0, 0, \
-		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_CAN_REDUCE_FPS) \
+		V4L2_DV_BT_STD_CEA861, \
+		V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_4096X2160P25 { \
 	.type = V4L2_DV_BT_656_1120, \
 	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
 		297000000, 968, 88, 128, 8, 10, 72, 0, 0, 0, \
-		V4L2_DV_BT_STD_CEA861, 0) \
+		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_4096X2160P30 { \
 	.type = V4L2_DV_BT_656_1120, \
 	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
 		297000000, 88, 88, 128, 8, 10, 72, 0, 0, 0, \
-		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_CAN_REDUCE_FPS) \
+		V4L2_DV_BT_STD_CEA861, \
+		V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_4096X2160P50 { \
 	.type = V4L2_DV_BT_656_1120, \
 	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
 		594000000, 968, 88, 128, 8, 10, 72, 0, 0, 0, \
-		V4L2_DV_BT_STD_CEA861, 0) \
+		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 #define V4L2_DV_BT_CEA_4096X2160P60 { \
 	.type = V4L2_DV_BT_656_1120, \
 	V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \
 		594000000, 88, 88, 128, 8, 10, 72, 0, 0, 0, \
-		V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_CAN_REDUCE_FPS) \
+		V4L2_DV_BT_STD_CEA861, \
+		V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \
 }
 
 
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 810bade873f4..fa376f7666ba 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -1188,6 +1188,12 @@ struct v4l2_bt_timings {
    exactly the same number of half-lines. Whether half-lines can be detected
    or used depends on the hardware. */
 #define V4L2_DV_FL_HALF_LINE			(1 << 3)
+/* If set, then this is a Consumer Electronics (CE) video format. Such formats
+ * differ from other formats (commonly called IT formats) in that if RGB
+ * encoding is used then by default the RGB values use limited range (i.e.
+ * use the range 16-235) as opposed to 0-255. All formats defined in CEA-861
+ * except for the 640x480 format are CE formats. */
+#define V4L2_DV_FL_IS_CE_VIDEO			(1 << 4)
 
 /* A few useful defines to calculate the total blanking and frame sizes */
 #define V4L2_DV_BT_BLANKING_WIDTH(bt) \
-- 
cgit 


From 22fe54d5fefcfa98c58cc2f4607dd26d9648b3f5 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sun, 5 Apr 2015 14:41:08 +0200
Subject: netfilter: nf_tables: add support for dynamic set updates

Add a new "dynset" expression for dynamic set updates.

A new set op ->update() is added which, for non existant elements,
invokes an initialization callback and inserts the new element.
For both new or existing elements the extenstion pointer is returned
to the caller to optionally perform timer updates or other actions.

Element removal is not supported so far, however that seems to be a
rather exotic need and can be added later on.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h        |  17 +++
 include/net/netfilter/nf_tables_core.h   |   3 +
 include/uapi/linux/netfilter/nf_tables.h |  27 ++++
 net/netfilter/Makefile                   |   2 +-
 net/netfilter/nf_tables_api.c            |  10 +-
 net/netfilter/nf_tables_core.c           |   7 +
 net/netfilter/nft_dynset.c               | 218 +++++++++++++++++++++++++++++++
 net/netfilter/nft_hash.c                 |  37 ++++++
 8 files changed, 315 insertions(+), 6 deletions(-)
 create mode 100644 net/netfilter/nft_dynset.c

(limited to 'include/uapi/linux')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index e7e6365c248f..38c3496f7bf2 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -196,6 +196,7 @@ struct nft_set_estimate {
 };
 
 struct nft_set_ext;
+struct nft_expr;
 
 /**
  *	struct nft_set_ops - nf_tables set operations
@@ -218,6 +219,15 @@ struct nft_set_ops {
 	bool				(*lookup)(const struct nft_set *set,
 						  const struct nft_data *key,
 						  const struct nft_set_ext **ext);
+	bool				(*update)(struct nft_set *set,
+						  const struct nft_data *key,
+						  void *(*new)(struct nft_set *,
+							       const struct nft_expr *,
+							       struct nft_data []),
+						  const struct nft_expr *expr,
+						  struct nft_data data[],
+						  const struct nft_set_ext **ext);
+
 	int				(*insert)(const struct nft_set *set,
 						  const struct nft_set_elem *elem);
 	void				(*activate)(const struct nft_set *set,
@@ -466,6 +476,11 @@ static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set,
 	return elem + set->ops->elemsize;
 }
 
+void *nft_set_elem_init(const struct nft_set *set,
+			const struct nft_set_ext_tmpl *tmpl,
+			const struct nft_data *key,
+			const struct nft_data *data,
+			u64 timeout, gfp_t gfp);
 void nft_set_elem_destroy(const struct nft_set *set, void *elem);
 
 /**
@@ -845,6 +860,8 @@ static inline u8 nft_genmask_cur(const struct net *net)
 	return 1 << ACCESS_ONCE(net->nft.gencursor);
 }
 
+#define NFT_GENMASK_ANY		((1 << 0) | (1 << 1))
+
 /*
  * Set element transaction helpers
  */
diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h
index a75fc8e27cd6..c6f400cfaac8 100644
--- a/include/net/netfilter/nf_tables_core.h
+++ b/include/net/netfilter/nf_tables_core.h
@@ -31,6 +31,9 @@ void nft_cmp_module_exit(void);
 int nft_lookup_module_init(void);
 void nft_lookup_module_exit(void);
 
+int nft_dynset_module_init(void);
+void nft_dynset_module_exit(void);
+
 int nft_bitwise_module_init(void);
 void nft_bitwise_module_exit(void);
 
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 83441cc4594b..0b87b2f67fe3 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -515,6 +515,33 @@ enum nft_lookup_attributes {
 };
 #define NFTA_LOOKUP_MAX		(__NFTA_LOOKUP_MAX - 1)
 
+enum nft_dynset_ops {
+	NFT_DYNSET_OP_ADD,
+	NFT_DYNSET_OP_UPDATE,
+};
+
+/**
+ * enum nft_dynset_attributes - dynset expression attributes
+ *
+ * @NFTA_DYNSET_SET_NAME: name of set the to add data to (NLA_STRING)
+ * @NFTA_DYNSET_SET_ID: uniquely identifier of the set in the transaction (NLA_U32)
+ * @NFTA_DYNSET_OP: operation (NLA_U32)
+ * @NFTA_DYNSET_SREG_KEY: source register of the key (NLA_U32)
+ * @NFTA_DYNSET_SREG_DATA: source register of the data (NLA_U32)
+ * @NFTA_DYNSET_TIMEOUT: timeout value for the new element (NLA_U64)
+ */
+enum nft_dynset_attributes {
+	NFTA_DYNSET_UNSPEC,
+	NFTA_DYNSET_SET_NAME,
+	NFTA_DYNSET_SET_ID,
+	NFTA_DYNSET_OP,
+	NFTA_DYNSET_SREG_KEY,
+	NFTA_DYNSET_SREG_DATA,
+	NFTA_DYNSET_TIMEOUT,
+	__NFTA_DYNSET_MAX,
+};
+#define NFTA_DYNSET_MAX		(__NFTA_DYNSET_MAX - 1)
+
 /**
  * enum nft_payload_bases - nf_tables payload expression offset bases
  *
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 89f73a9e9874..a87d8b8ec730 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -70,7 +70,7 @@ obj-$(CONFIG_NETFILTER_SYNPROXY) += nf_synproxy_core.o
 
 # nf_tables
 nf_tables-objs += nf_tables_core.o nf_tables_api.o
-nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o
+nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nft_dynset.o
 nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o
 
 obj-$(CONFIG_NF_TABLES)		+= nf_tables.o
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 90b898491da7..598e53eb64b3 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3183,11 +3183,11 @@ static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx,
 	return trans;
 }
 
-static void *nft_set_elem_init(const struct nft_set *set,
-			       const struct nft_set_ext_tmpl *tmpl,
-			       const struct nft_data *key,
-			       const struct nft_data *data,
-			       u64 timeout, gfp_t gfp)
+void *nft_set_elem_init(const struct nft_set *set,
+			const struct nft_set_ext_tmpl *tmpl,
+			const struct nft_data *key,
+			const struct nft_data *data,
+			u64 timeout, gfp_t gfp)
 {
 	struct nft_set_ext *ext;
 	void *elem;
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index ef4dfcbaf149..7caf08a9225d 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -239,8 +239,14 @@ int __init nf_tables_core_module_init(void)
 	if (err < 0)
 		goto err6;
 
+	err = nft_dynset_module_init();
+	if (err < 0)
+		goto err7;
+
 	return 0;
 
+err7:
+	nft_payload_module_exit();
 err6:
 	nft_byteorder_module_exit();
 err5:
@@ -257,6 +263,7 @@ err1:
 
 void nf_tables_core_module_exit(void)
 {
+	nft_dynset_module_exit();
 	nft_payload_module_exit();
 	nft_byteorder_module_exit();
 	nft_bitwise_module_exit();
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
new file mode 100644
index 000000000000..eeb72dee78ef
--- /dev/null
+++ b/net/netfilter/nft_dynset.c
@@ -0,0 +1,218 @@
+/*
+ * Copyright (c) 2015 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+
+struct nft_dynset {
+	struct nft_set			*set;
+	struct nft_set_ext_tmpl		tmpl;
+	enum nft_dynset_ops		op:8;
+	enum nft_registers		sreg_key:8;
+	enum nft_registers		sreg_data:8;
+	u64				timeout;
+	struct nft_set_binding		binding;
+};
+
+static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr,
+			    struct nft_data data[NFT_REG_MAX + 1])
+{
+	const struct nft_dynset *priv = nft_expr_priv(expr);
+	u64 timeout;
+	void *elem;
+
+	if (set->size && !atomic_add_unless(&set->nelems, 1, set->size))
+		return NULL;
+
+	timeout = priv->timeout ? : set->timeout;
+	elem = nft_set_elem_init(set, &priv->tmpl,
+				 &data[priv->sreg_key], &data[priv->sreg_data],
+				 timeout, GFP_ATOMIC);
+	if (elem == NULL) {
+		if (set->size)
+			atomic_dec(&set->nelems);
+	}
+	return elem;
+}
+
+static void nft_dynset_eval(const struct nft_expr *expr,
+			    struct nft_data data[NFT_REG_MAX + 1],
+			    const struct nft_pktinfo *pkt)
+{
+	const struct nft_dynset *priv = nft_expr_priv(expr);
+	struct nft_set *set = priv->set;
+	const struct nft_set_ext *ext;
+	u64 timeout;
+
+	if (set->ops->update(set, &data[priv->sreg_key], nft_dynset_new,
+			     expr, data, &ext)) {
+		if (priv->op == NFT_DYNSET_OP_UPDATE &&
+		    nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
+			timeout = priv->timeout ? : set->timeout;
+			*nft_set_ext_expiration(ext) = jiffies + timeout;
+			return;
+		}
+	}
+
+	data[NFT_REG_VERDICT].verdict = NFT_BREAK;
+}
+
+static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = {
+	[NFTA_DYNSET_SET_NAME]	= { .type = NLA_STRING },
+	[NFTA_DYNSET_SET_ID]	= { .type = NLA_U32 },
+	[NFTA_DYNSET_OP]	= { .type = NLA_U32 },
+	[NFTA_DYNSET_SREG_KEY]	= { .type = NLA_U32 },
+	[NFTA_DYNSET_SREG_DATA]	= { .type = NLA_U32 },
+	[NFTA_DYNSET_TIMEOUT]	= { .type = NLA_U64 },
+};
+
+static int nft_dynset_init(const struct nft_ctx *ctx,
+			   const struct nft_expr *expr,
+			   const struct nlattr * const tb[])
+{
+	struct nft_dynset *priv = nft_expr_priv(expr);
+	struct nft_set *set;
+	u64 timeout;
+	int err;
+
+	if (tb[NFTA_DYNSET_SET_NAME] == NULL ||
+	    tb[NFTA_DYNSET_OP] == NULL ||
+	    tb[NFTA_DYNSET_SREG_KEY] == NULL)
+		return -EINVAL;
+
+	set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME]);
+	if (IS_ERR(set)) {
+		if (tb[NFTA_DYNSET_SET_ID])
+			set = nf_tables_set_lookup_byid(ctx->net,
+							tb[NFTA_DYNSET_SET_ID]);
+		if (IS_ERR(set))
+			return PTR_ERR(set);
+	}
+
+	if (set->flags & NFT_SET_CONSTANT)
+		return -EBUSY;
+
+	priv->op = ntohl(nla_get_be32(tb[NFTA_DYNSET_OP]));
+	switch (priv->op) {
+	case NFT_DYNSET_OP_ADD:
+		break;
+	case NFT_DYNSET_OP_UPDATE:
+		if (!(set->flags & NFT_SET_TIMEOUT))
+			return -EOPNOTSUPP;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	timeout = 0;
+	if (tb[NFTA_DYNSET_TIMEOUT] != NULL) {
+		if (!(set->flags & NFT_SET_TIMEOUT))
+			return -EINVAL;
+		timeout = be64_to_cpu(nla_get_be64(tb[NFTA_DYNSET_TIMEOUT]));
+	}
+
+	priv->sreg_key = ntohl(nla_get_be32(tb[NFTA_DYNSET_SREG_KEY]));
+	err = nft_validate_input_register(priv->sreg_key);
+	if (err < 0)
+		return err;
+
+	if (tb[NFTA_DYNSET_SREG_DATA] != NULL) {
+		if (!(set->flags & NFT_SET_MAP))
+			return -EINVAL;
+		if (set->dtype == NFT_DATA_VERDICT)
+			return -EOPNOTSUPP;
+
+		priv->sreg_data = ntohl(nla_get_be32(tb[NFTA_DYNSET_SREG_DATA]));
+		err = nft_validate_input_register(priv->sreg_data);
+		if (err < 0)
+			return err;
+	} else if (set->flags & NFT_SET_MAP)
+		return -EINVAL;
+
+	nft_set_ext_prepare(&priv->tmpl);
+	nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_KEY, set->klen);
+	if (set->flags & NFT_SET_MAP)
+		nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_DATA, set->dlen);
+	if (set->flags & NFT_SET_TIMEOUT) {
+		if (timeout || set->timeout)
+			nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_EXPIRATION);
+	}
+
+	priv->timeout = timeout;
+
+	err = nf_tables_bind_set(ctx, set, &priv->binding);
+	if (err < 0)
+		return err;
+
+	priv->set = set;
+	return 0;
+}
+
+static void nft_dynset_destroy(const struct nft_ctx *ctx,
+			       const struct nft_expr *expr)
+{
+	struct nft_dynset *priv = nft_expr_priv(expr);
+
+	nf_tables_unbind_set(ctx, priv->set, &priv->binding);
+}
+
+static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr)
+{
+	const struct nft_dynset *priv = nft_expr_priv(expr);
+
+	if (nla_put_be32(skb, NFTA_DYNSET_SREG_KEY, htonl(priv->sreg_key)))
+		goto nla_put_failure;
+	if (priv->set->flags & NFT_SET_MAP &&
+	    nla_put_be32(skb, NFTA_DYNSET_SREG_DATA, htonl(priv->sreg_data)))
+		goto nla_put_failure;
+	if (nla_put_be32(skb, NFTA_DYNSET_OP, htonl(priv->op)))
+		goto nla_put_failure;
+	if (nla_put_string(skb, NFTA_DYNSET_SET_NAME, priv->set->name))
+		goto nla_put_failure;
+	if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, cpu_to_be64(priv->timeout)))
+		goto nla_put_failure;
+	return 0;
+
+nla_put_failure:
+	return -1;
+}
+
+static struct nft_expr_type nft_dynset_type;
+static const struct nft_expr_ops nft_dynset_ops = {
+	.type		= &nft_dynset_type,
+	.size		= NFT_EXPR_SIZE(sizeof(struct nft_dynset)),
+	.eval		= nft_dynset_eval,
+	.init		= nft_dynset_init,
+	.destroy	= nft_dynset_destroy,
+	.dump		= nft_dynset_dump,
+};
+
+static struct nft_expr_type nft_dynset_type __read_mostly = {
+	.name		= "dynset",
+	.ops		= &nft_dynset_ops,
+	.policy		= nft_dynset_policy,
+	.maxattr	= NFTA_DYNSET_MAX,
+	.owner		= THIS_MODULE,
+};
+
+int __init nft_dynset_module_init(void)
+{
+	return nft_register_expr(&nft_dynset_type);
+}
+
+void nft_dynset_module_exit(void)
+{
+	nft_unregister_expr(&nft_dynset_type);
+}
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index c74e2bf1a1e4..bc23806b7fbe 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -90,6 +90,42 @@ static bool nft_hash_lookup(const struct nft_set *set,
 	return !!he;
 }
 
+static bool nft_hash_update(struct nft_set *set, const struct nft_data *key,
+			    void *(*new)(struct nft_set *,
+					 const struct nft_expr *,
+					 struct nft_data []),
+			    const struct nft_expr *expr,
+			    struct nft_data data[],
+			    const struct nft_set_ext **ext)
+{
+	struct nft_hash *priv = nft_set_priv(set);
+	struct nft_hash_elem *he;
+	struct nft_hash_cmp_arg arg = {
+		.genmask = NFT_GENMASK_ANY,
+		.set	 = set,
+		.key	 = key,
+	};
+
+	he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params);
+	if (he != NULL)
+		goto out;
+
+	he = new(set, expr, data);
+	if (he == NULL)
+		goto err1;
+	if (rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node,
+					 nft_hash_params))
+		goto err2;
+out:
+	*ext = &he->ext;
+	return true;
+
+err2:
+	nft_set_elem_destroy(set, he);
+err1:
+	return false;
+}
+
 static int nft_hash_insert(const struct nft_set *set,
 			   const struct nft_set_elem *elem)
 {
@@ -335,6 +371,7 @@ static struct nft_set_ops nft_hash_ops __read_mostly = {
 	.deactivate	= nft_hash_deactivate,
 	.remove		= nft_hash_remove,
 	.lookup		= nft_hash_lookup,
+	.update		= nft_hash_update,
 	.walk		= nft_hash_walk,
 	.features	= NFT_SET_MAP | NFT_SET_TIMEOUT,
 	.owner		= THIS_MODULE,
-- 
cgit 


From 68e942e88add0ac8576fc8397e86495edf3dcea7 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sun, 5 Apr 2015 14:43:38 +0200
Subject: netfilter: nf_tables: support optional userdata for set elements

Add an userdata set extension and allow the user to attach arbitrary
data to set elements. This is intended to hold TLV encoded data like
comments or DNS annotations that have no meaning to the kernel.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h        |  7 +++++++
 include/uapi/linux/netfilter/nf_tables.h |  2 ++
 net/netfilter/nf_tables_api.c            | 34 ++++++++++++++++++++++++++++++++
 3 files changed, 43 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 38c3496f7bf2..63c44bdfdd3b 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -350,6 +350,7 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
  *	@NFT_SET_EXT_FLAGS: element flags
  *	@NFT_SET_EXT_TIMEOUT: element timeout
  *	@NFT_SET_EXT_EXPIRATION: element expiration time
+ *	@NFT_SET_EXT_USERDATA: user data associated with the element
  *	@NFT_SET_EXT_NUM: number of extension types
  */
 enum nft_set_extensions {
@@ -358,6 +359,7 @@ enum nft_set_extensions {
 	NFT_SET_EXT_FLAGS,
 	NFT_SET_EXT_TIMEOUT,
 	NFT_SET_EXT_EXPIRATION,
+	NFT_SET_EXT_USERDATA,
 	NFT_SET_EXT_NUM
 };
 
@@ -464,6 +466,11 @@ static inline unsigned long *nft_set_ext_expiration(const struct nft_set_ext *ex
 	return nft_set_ext(ext, NFT_SET_EXT_EXPIRATION);
 }
 
+static inline struct nft_userdata *nft_set_ext_userdata(const struct nft_set_ext *ext)
+{
+	return nft_set_ext(ext, NFT_SET_EXT_USERDATA);
+}
+
 static inline bool nft_set_elem_expired(const struct nft_set_ext *ext)
 {
 	return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) &&
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 0b87b2f67fe3..05ee1e0804a3 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -292,6 +292,7 @@ enum nft_set_elem_flags {
  * @NFTA_SET_ELEM_FLAGS: bitmask of nft_set_elem_flags (NLA_U32)
  * @NFTA_SET_ELEM_TIMEOUT: timeout value (NLA_U64)
  * @NFTA_SET_ELEM_EXPIRATION: expiration time (NLA_U64)
+ * @NFTA_SET_ELEM_USERDATA: user data (NLA_BINARY)
  */
 enum nft_set_elem_attributes {
 	NFTA_SET_ELEM_UNSPEC,
@@ -300,6 +301,7 @@ enum nft_set_elem_attributes {
 	NFTA_SET_ELEM_FLAGS,
 	NFTA_SET_ELEM_TIMEOUT,
 	NFTA_SET_ELEM_EXPIRATION,
+	NFTA_SET_ELEM_USERDATA,
 	__NFTA_SET_ELEM_MAX
 };
 #define NFTA_SET_ELEM_MAX	(__NFTA_SET_ELEM_MAX - 1)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 598e53eb64b3..0b96fa0d64b2 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2872,6 +2872,10 @@ const struct nft_set_ext_type nft_set_ext_types[] = {
 		.len	= sizeof(unsigned long),
 		.align	= __alignof__(unsigned long),
 	},
+	[NFT_SET_EXT_USERDATA]		= {
+		.len	= sizeof(struct nft_userdata),
+		.align	= __alignof__(struct nft_userdata),
+	},
 };
 EXPORT_SYMBOL_GPL(nft_set_ext_types);
 
@@ -2884,6 +2888,8 @@ static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = {
 	[NFTA_SET_ELEM_DATA]		= { .type = NLA_NESTED },
 	[NFTA_SET_ELEM_FLAGS]		= { .type = NLA_U32 },
 	[NFTA_SET_ELEM_TIMEOUT]		= { .type = NLA_U64 },
+	[NFTA_SET_ELEM_USERDATA]	= { .type = NLA_BINARY,
+					    .len = NFT_USERDATA_MAXLEN },
 };
 
 static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX + 1] = {
@@ -2964,6 +2970,15 @@ static int nf_tables_fill_setelem(struct sk_buff *skb,
 			goto nla_put_failure;
 	}
 
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_USERDATA)) {
+		struct nft_userdata *udata;
+
+		udata = nft_set_ext_userdata(ext);
+		if (nla_put(skb, NFTA_SET_ELEM_USERDATA,
+			    udata->len + 1, udata->data))
+			goto nla_put_failure;
+	}
+
 	nla_nest_end(skb, nest);
 	return 0;
 
@@ -3232,11 +3247,13 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 	struct nft_set_ext *ext;
 	struct nft_set_elem elem;
 	struct nft_set_binding *binding;
+	struct nft_userdata *udata;
 	struct nft_data data;
 	enum nft_registers dreg;
 	struct nft_trans *trans;
 	u64 timeout;
 	u32 flags;
+	u8 ulen;
 	int err;
 
 	err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr,
@@ -3325,6 +3342,18 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 		nft_set_ext_add(&tmpl, NFT_SET_EXT_DATA);
 	}
 
+	/* The full maximum length of userdata can exceed the maximum
+	 * offset value (U8_MAX) for following extensions, therefor it
+	 * must be the last extension added.
+	 */
+	ulen = 0;
+	if (nla[NFTA_SET_ELEM_USERDATA] != NULL) {
+		ulen = nla_len(nla[NFTA_SET_ELEM_USERDATA]);
+		if (ulen > 0)
+			nft_set_ext_add_length(&tmpl, NFT_SET_EXT_USERDATA,
+					       ulen);
+	}
+
 	err = -ENOMEM;
 	elem.priv = nft_set_elem_init(set, &tmpl, &elem.key, &data,
 				      timeout, GFP_KERNEL);
@@ -3334,6 +3363,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 	ext = nft_set_elem_ext(set, elem.priv);
 	if (flags)
 		*nft_set_ext_flags(ext) = flags;
+	if (ulen > 0) {
+		udata = nft_set_ext_userdata(ext);
+		udata->len = ulen - 1;
+		nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen);
+	}
 
 	trans = nft_trans_elem_alloc(ctx, NFT_MSG_NEWSETELEM, set);
 	if (trans == NULL)
-- 
cgit 


From 01a3d796813d6302af9f828f34b73d21a4b96c9a Mon Sep 17 00:00:00 2001
From: Vlad Zolotarov <vladz@cloudius-systems.com>
Date: Mon, 30 Mar 2015 21:35:23 +0300
Subject: if_link: Add an additional parameter to ifla_vf_info for RSS querying

Add configuration setting for drivers to allow/block an RSS Redirection
Table and a Hash Key querying for discrete VFs.

On some devices VF share the mentioned above information with PF and
querying it may adduce a theoretical security risk. We want to let a
system administrator to decide if he/she wants to take this risk or not.

Signed-off-by: Vlad Zolotarov <vladz@cloudius-systems.com>
Tested-by: Phil Schmitt <phillip.j.schmitt@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 include/linux/if_link.h      |  1 +
 include/linux/netdevice.h    |  8 ++++++++
 include/uapi/linux/if_link.h |  8 ++++++++
 net/core/rtnetlink.c         | 32 ++++++++++++++++++++++++++------
 4 files changed, 43 insertions(+), 6 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index 119130e9298b..da4929927f69 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -14,5 +14,6 @@ struct ifla_vf_info {
 	__u32 linkstate;
 	__u32 min_tx_rate;
 	__u32 max_tx_rate;
+	__u32 rss_query_en;
 };
 #endif /* _LINUX_IF_LINK_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index bf6d9df34d7b..13acb3d8ecdd 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -878,6 +878,11 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  * int (*ndo_set_vf_link_state)(struct net_device *dev, int vf, int link_state);
  * int (*ndo_set_vf_port)(struct net_device *dev, int vf,
  *			  struct nlattr *port[]);
+ *
+ *      Enable or disable the VF ability to query its RSS Redirection Table and
+ *      Hash Key. This is needed since on some devices VF share this information
+ *      with PF and querying it may adduce a theoretical security risk.
+ * int (*ndo_set_vf_rss_query_en)(struct net_device *dev, int vf, bool setting);
  * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb);
  * int (*ndo_setup_tc)(struct net_device *dev, u8 tc)
  * 	Called to setup 'tc' number of traffic classes in the net device. This
@@ -1099,6 +1104,9 @@ struct net_device_ops {
 						   struct nlattr *port[]);
 	int			(*ndo_get_vf_port)(struct net_device *dev,
 						   int vf, struct sk_buff *skb);
+	int			(*ndo_set_vf_rss_query_en)(
+						   struct net_device *dev,
+						   int vf, bool setting);
 	int			(*ndo_setup_tc)(struct net_device *dev, u8 tc);
 #if IS_ENABLED(CONFIG_FCOE)
 	int			(*ndo_fcoe_enable)(struct net_device *dev);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 7ffb18df01ca..d9cd19214b98 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -465,6 +465,9 @@ enum {
 	IFLA_VF_SPOOFCHK,	/* Spoof Checking on/off switch */
 	IFLA_VF_LINK_STATE,	/* link state enable/disable/auto switch */
 	IFLA_VF_RATE,		/* Min and Max TX Bandwidth Allocation */
+	IFLA_VF_RSS_QUERY_EN,	/* RSS Redirection Table and Hash Key query
+				 * on/off switch
+				 */
 	__IFLA_VF_MAX,
 };
 
@@ -509,6 +512,11 @@ struct ifla_vf_link_state {
 	__u32 link_state;
 };
 
+struct ifla_vf_rss_query_en {
+	__u32 vf;
+	__u32 setting;
+};
+
 /* VF ports management section
  *
  *	Nested layout of set/get msg is:
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 7a836152359b..358d52a38533 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -818,7 +818,8 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
 			 nla_total_size(sizeof(struct ifla_vf_vlan)) +
 			 nla_total_size(sizeof(struct ifla_vf_spoofchk)) +
 			 nla_total_size(sizeof(struct ifla_vf_rate)) +
-			 nla_total_size(sizeof(struct ifla_vf_link_state)));
+			 nla_total_size(sizeof(struct ifla_vf_link_state)) +
+			 nla_total_size(sizeof(struct ifla_vf_rss_query_en)));
 		return size;
 	} else
 		return 0;
@@ -1132,14 +1133,16 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			struct ifla_vf_tx_rate vf_tx_rate;
 			struct ifla_vf_spoofchk vf_spoofchk;
 			struct ifla_vf_link_state vf_linkstate;
+			struct ifla_vf_rss_query_en vf_rss_query_en;
 
 			/*
 			 * Not all SR-IOV capable drivers support the
-			 * spoofcheck query.  Preset to -1 so the user
-			 * space tool can detect that the driver didn't
-			 * report anything.
+			 * spoofcheck and "RSS query enable" query.  Preset to
+			 * -1 so the user space tool can detect that the driver
+			 * didn't report anything.
 			 */
 			ivi.spoofchk = -1;
+			ivi.rss_query_en = -1;
 			memset(ivi.mac, 0, sizeof(ivi.mac));
 			/* The default value for VF link state is "auto"
 			 * IFLA_VF_LINK_STATE_AUTO which equals zero
@@ -1152,7 +1155,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 				vf_rate.vf =
 				vf_tx_rate.vf =
 				vf_spoofchk.vf =
-				vf_linkstate.vf = ivi.vf;
+				vf_linkstate.vf =
+				vf_rss_query_en.vf = ivi.vf;
 
 			memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
 			vf_vlan.vlan = ivi.vlan;
@@ -1162,6 +1166,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			vf_rate.max_tx_rate = ivi.max_tx_rate;
 			vf_spoofchk.setting = ivi.spoofchk;
 			vf_linkstate.link_state = ivi.linkstate;
+			vf_rss_query_en.setting = ivi.rss_query_en;
 			vf = nla_nest_start(skb, IFLA_VF_INFO);
 			if (!vf) {
 				nla_nest_cancel(skb, vfinfo);
@@ -1176,7 +1181,10 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			    nla_put(skb, IFLA_VF_SPOOFCHK, sizeof(vf_spoofchk),
 				    &vf_spoofchk) ||
 			    nla_put(skb, IFLA_VF_LINK_STATE, sizeof(vf_linkstate),
-				    &vf_linkstate))
+				    &vf_linkstate) ||
+			    nla_put(skb, IFLA_VF_RSS_QUERY_EN,
+				    sizeof(vf_rss_query_en),
+				    &vf_rss_query_en))
 				goto nla_put_failure;
 			nla_nest_end(skb, vf);
 		}
@@ -1290,6 +1298,7 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
 	[IFLA_VF_SPOOFCHK]	= { .len = sizeof(struct ifla_vf_spoofchk) },
 	[IFLA_VF_RATE]		= { .len = sizeof(struct ifla_vf_rate) },
 	[IFLA_VF_LINK_STATE]	= { .len = sizeof(struct ifla_vf_link_state) },
+	[IFLA_VF_RSS_QUERY_EN]	= { .len = sizeof(struct ifla_vf_rss_query_en) },
 };
 
 static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
@@ -1500,6 +1509,17 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
 								 ivl->link_state);
 			break;
 		}
+		case IFLA_VF_RSS_QUERY_EN: {
+			struct ifla_vf_rss_query_en *ivrssq_en;
+
+			ivrssq_en = nla_data(vf);
+			err = -EOPNOTSUPP;
+			if (ops->ndo_set_vf_rss_query_en)
+				err = ops->ndo_set_vf_rss_query_en(dev,
+							    ivrssq_en->vf,
+							    ivrssq_en->setting);
+			break;
+		}
 		default:
 			err = -EINVAL;
 			break;
-- 
cgit 


From 7a6c8c34e5b71ac50e39588e20b39494a9e1d8e5 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Fri, 10 Apr 2015 12:00:30 -0700
Subject: fou: implement FOU_CMD_GET

Cc: Tom Herbert <tom@herbertland.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/fou.h |   1 +
 net/ipv4/fou.c           | 109 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 110 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/fou.h b/include/uapi/linux/fou.h
index c303588bb767..d2947c52dc67 100644
--- a/include/uapi/linux/fou.h
+++ b/include/uapi/linux/fou.h
@@ -25,6 +25,7 @@ enum {
 	FOU_CMD_UNSPEC,
 	FOU_CMD_ADD,
 	FOU_CMD_DEL,
+	FOU_CMD_GET,
 
 	__FOU_CMD_MAX,
 };
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index c244b1a65787..263710259774 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -21,6 +21,7 @@ struct fou {
 	u8 protocol;
 	u8 flags;
 	__be16 port;
+	u16 type;
 	struct udp_offload udp_offloads;
 	struct list_head list;
 };
@@ -487,6 +488,8 @@ static int fou_create(struct net *net, struct fou_cfg *cfg,
 		goto error;
 	}
 
+	fou->type = cfg->type;
+
 	udp_sk(sk)->encap_type = 1;
 	udp_encap_enable();
 
@@ -617,6 +620,106 @@ static int fou_nl_cmd_rm_port(struct sk_buff *skb, struct genl_info *info)
 	return fou_destroy(net, &cfg);
 }
 
+static int fou_fill_info(struct fou *fou, struct sk_buff *msg)
+{
+	if (nla_put_u8(msg, FOU_ATTR_AF, fou->sock->sk->sk_family) ||
+	    nla_put_be16(msg, FOU_ATTR_PORT, fou->port) ||
+	    nla_put_u8(msg, FOU_ATTR_IPPROTO, fou->protocol) ||
+	    nla_put_u8(msg, FOU_ATTR_TYPE, fou->type))
+		return -1;
+
+	if (fou->flags & FOU_F_REMCSUM_NOPARTIAL)
+		if (nla_put_flag(msg, FOU_ATTR_REMCSUM_NOPARTIAL))
+			return -1;
+	return 0;
+}
+
+static int fou_dump_info(struct fou *fou, u32 portid, u32 seq,
+			 u32 flags, struct sk_buff *skb, u8 cmd)
+{
+	void *hdr;
+
+	hdr = genlmsg_put(skb, portid, seq, &fou_nl_family, flags, cmd);
+	if (!hdr)
+		return -ENOMEM;
+
+	if (fou_fill_info(fou, skb) < 0)
+		goto nla_put_failure;
+
+	genlmsg_end(skb, hdr);
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
+}
+
+static int fou_nl_cmd_get_port(struct sk_buff *skb, struct genl_info *info)
+{
+	struct net *net = genl_info_net(info);
+	struct fou_net *fn = net_generic(net, fou_net_id);
+	struct sk_buff *msg;
+	struct fou_cfg cfg;
+	struct fou *fout;
+	__be16 port;
+	int ret;
+
+	ret = parse_nl_config(info, &cfg);
+	if (ret)
+		return ret;
+	port = cfg.udp_config.local_udp_port;
+	if (port == 0)
+		return -EINVAL;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	ret = -ESRCH;
+	mutex_lock(&fn->fou_lock);
+	list_for_each_entry(fout, &fn->fou_list, list) {
+		if (port == fout->port) {
+			ret = fou_dump_info(fout, info->snd_portid,
+					    info->snd_seq, 0, msg,
+					    info->genlhdr->cmd);
+			break;
+		}
+	}
+	mutex_unlock(&fn->fou_lock);
+	if (ret < 0)
+		goto out_free;
+
+	return genlmsg_reply(msg, info);
+
+out_free:
+	nlmsg_free(msg);
+	return ret;
+}
+
+static int fou_nl_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	struct fou_net *fn = net_generic(net, fou_net_id);
+	struct fou *fout;
+	int idx = 0, ret;
+
+	mutex_lock(&fn->fou_lock);
+	list_for_each_entry(fout, &fn->fou_list, list) {
+		if (idx++ < cb->args[0])
+			continue;
+		ret = fou_dump_info(fout, NETLINK_CB(cb->skb).portid,
+				    cb->nlh->nlmsg_seq, NLM_F_MULTI,
+				    skb, FOU_CMD_GET);
+		if (ret)
+			goto done;
+	}
+	mutex_unlock(&fn->fou_lock);
+
+done:
+	cb->args[0] = idx;
+	return skb->len;
+}
+
 static const struct genl_ops fou_nl_ops[] = {
 	{
 		.cmd = FOU_CMD_ADD,
@@ -630,6 +733,12 @@ static const struct genl_ops fou_nl_ops[] = {
 		.policy = fou_nl_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
+	{
+		.cmd = FOU_CMD_GET,
+		.doit = fou_nl_cmd_get_port,
+		.dumpit = fou_nl_dump,
+		.policy = fou_nl_policy,
+	},
 };
 
 size_t fou_encap_hlen(struct ip_tunnel_encap *e)
-- 
cgit 


From 49499c3e6e18b7677a63316f3ff54a16533dc28f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 11 Apr 2015 02:27:37 +0100
Subject: netfilter: nf_tables: switch registers to 32 bit addressing

Switch the nf_tables registers from 128 bit addressing to 32 bit
addressing to support so called concatenations, where multiple values
can be concatenated over multiple registers for O(1) exact matches of
multiple dimensions using sets.

The old register values are mapped to areas of 128 bits for compatibility.
When dumping register numbers, values are expressed using the old values
if they refer to the beginning of a 128 bit area for compatibility.

To support concatenations, register loads of less than a full 32 bit
value need to be padded. This mainly affects the payload and exthdr
expressions, which both unconditionally zero the last word before
copying the data.

Userspace fully passes the testsuite using both old and new register
addressing.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h        | 13 +++-----
 include/uapi/linux/netfilter/nf_tables.h | 31 +++++++++++++++++-
 net/bridge/netfilter/nft_meta_bridge.c   |  2 +-
 net/ipv4/netfilter/nft_redir_ipv4.c      |  4 +--
 net/ipv6/netfilter/nft_redir_ipv6.c      |  4 +--
 net/netfilter/nf_tables_api.c            | 54 +++++++++++++++++++++++++-------
 net/netfilter/nf_tables_core.c           |  5 +--
 net/netfilter/nft_bitwise.c              |  4 +--
 net/netfilter/nft_byteorder.c            |  4 +--
 net/netfilter/nft_ct.c                   |  4 +--
 net/netfilter/nft_exthdr.c               |  3 +-
 net/netfilter/nft_immediate.c            |  2 +-
 net/netfilter/nft_lookup.c               |  2 +-
 net/netfilter/nft_meta.c                 |  7 +++--
 net/netfilter/nft_nat.c                  | 16 +++++-----
 net/netfilter/nft_payload.c              |  3 +-
 16 files changed, 110 insertions(+), 48 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index f8f27a48bbe9..1f9b848c778c 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -64,17 +64,15 @@ struct nft_data {
  */
 struct nft_regs {
 	union {
-		struct nft_data 	data[NFT_REG_MAX + 1];
+		u32			data[20];
 		struct nft_verdict	verdict;
 	};
 };
 
-static inline void nft_data_copy(struct nft_data *dst,
-				 const struct nft_data *src)
+static inline void nft_data_copy(u32 *dst, const struct nft_data *src,
+				 unsigned int len)
 {
-	BUILD_BUG_ON(__alignof__(*dst) != __alignof__(u64));
-	*(u64 *)&dst->data[0] = *(u64 *)&src->data[0];
-	*(u64 *)&dst->data[2] = *(u64 *)&src->data[2];
+	memcpy(dst, src, len);
 }
 
 static inline void nft_data_debug(const struct nft_data *data)
@@ -502,8 +500,7 @@ static inline struct nft_set_ext *nft_set_elem_ext(const struct nft_set *set,
 
 void *nft_set_elem_init(const struct nft_set *set,
 			const struct nft_set_ext_tmpl *tmpl,
-			const struct nft_data *key,
-			const struct nft_data *data,
+			const u32 *key, const u32 *data,
 			u64 timeout, gfp_t gfp);
 void nft_set_elem_destroy(const struct nft_set *set, void *elem);
 
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 05ee1e0804a3..4221a6c3a8a5 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -5,16 +5,45 @@
 #define NFT_CHAIN_MAXNAMELEN	32
 #define NFT_USERDATA_MAXLEN	256
 
+/**
+ * enum nft_registers - nf_tables registers
+ *
+ * nf_tables used to have five registers: a verdict register and four data
+ * registers of size 16. The data registers have been changed to 16 registers
+ * of size 4. For compatibility reasons, the NFT_REG_[1-4] registers still
+ * map to areas of size 16, the 4 byte registers are addressed using
+ * NFT_REG32_00 - NFT_REG32_15.
+ */
 enum nft_registers {
 	NFT_REG_VERDICT,
 	NFT_REG_1,
 	NFT_REG_2,
 	NFT_REG_3,
 	NFT_REG_4,
-	__NFT_REG_MAX
+	__NFT_REG_MAX,
+
+	NFT_REG32_00	= 8,
+	MFT_REG32_01,
+	NFT_REG32_02,
+	NFT_REG32_03,
+	NFT_REG32_04,
+	NFT_REG32_05,
+	NFT_REG32_06,
+	NFT_REG32_07,
+	NFT_REG32_08,
+	NFT_REG32_09,
+	NFT_REG32_10,
+	NFT_REG32_11,
+	NFT_REG32_12,
+	NFT_REG32_13,
+	NFT_REG32_14,
+	NFT_REG32_15,
 };
 #define NFT_REG_MAX	(__NFT_REG_MAX - 1)
 
+#define NFT_REG_SIZE	16
+#define NFT_REG32_SIZE	4
+
 /**
  * enum nft_verdicts - nf_tables internal verdicts
  *
diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c
index 99dab70ecae0..a21269b83f16 100644
--- a/net/bridge/netfilter/nft_meta_bridge.c
+++ b/net/bridge/netfilter/nft_meta_bridge.c
@@ -24,7 +24,7 @@ static void nft_meta_bridge_get_eval(const struct nft_expr *expr,
 {
 	const struct nft_meta *priv = nft_expr_priv(expr);
 	const struct net_device *in = pkt->in, *out = pkt->out;
-	u32 *dest = &regs->data[priv->dreg].data[0];
+	u32 *dest = &regs->data[priv->dreg];
 	const struct net_bridge_port *p;
 
 	switch (priv->key) {
diff --git a/net/ipv4/netfilter/nft_redir_ipv4.c b/net/ipv4/netfilter/nft_redir_ipv4.c
index 312cf6f3b6dc..d8d795df9c13 100644
--- a/net/ipv4/netfilter/nft_redir_ipv4.c
+++ b/net/ipv4/netfilter/nft_redir_ipv4.c
@@ -27,9 +27,9 @@ static void nft_redir_ipv4_eval(const struct nft_expr *expr,
 	memset(&mr, 0, sizeof(mr));
 	if (priv->sreg_proto_min) {
 		mr.range[0].min.all =
-			*(__be16 *)&regs->data[priv->sreg_proto_min].data[0];
+			*(__be16 *)&regs->data[priv->sreg_proto_min];
 		mr.range[0].max.all =
-			*(__be16 *)&regs->data[priv->sreg_proto_max].data[0];
+			*(__be16 *)&regs->data[priv->sreg_proto_max];
 		mr.range[0].flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
 	}
 
diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c
index 0eed774815cf..effd393bd517 100644
--- a/net/ipv6/netfilter/nft_redir_ipv6.c
+++ b/net/ipv6/netfilter/nft_redir_ipv6.c
@@ -27,9 +27,9 @@ static void nft_redir_ipv6_eval(const struct nft_expr *expr,
 	memset(&range, 0, sizeof(range));
 	if (priv->sreg_proto_min) {
 		range.min_proto.all =
-			*(__be16 *)&regs->data[priv->sreg_proto_min].data[0];
+			*(__be16 *)&regs->data[priv->sreg_proto_min],
 		range.max_proto.all =
-			*(__be16 *)&regs->data[priv->sreg_proto_max].data[0];
+			*(__be16 *)&regs->data[priv->sreg_proto_max],
 		range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
 	}
 
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index a25fd19453e7..03faf76ce3b8 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -3201,8 +3201,7 @@ static struct nft_trans *nft_trans_elem_alloc(struct nft_ctx *ctx,
 
 void *nft_set_elem_init(const struct nft_set *set,
 			const struct nft_set_ext_tmpl *tmpl,
-			const struct nft_data *key,
-			const struct nft_data *data,
+			const u32 *key, const u32 *data,
 			u64 timeout, gfp_t gfp)
 {
 	struct nft_set_ext *ext;
@@ -3357,7 +3356,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 	}
 
 	err = -ENOMEM;
-	elem.priv = nft_set_elem_init(set, &tmpl, &elem.key, &data,
+	elem.priv = nft_set_elem_init(set, &tmpl, elem.key.data, data.data,
 				      timeout, GFP_KERNEL);
 	if (elem.priv == NULL)
 		goto err3;
@@ -4122,14 +4121,47 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
 	return 0;
 }
 
+/**
+ *	nft_parse_register - parse a register value from a netlink attribute
+ *
+ *	@attr: netlink attribute
+ *
+ *	Parse and translate a register value from a netlink attribute.
+ *	Registers used to be 128 bit wide, these register numbers will be
+ *	mapped to the corresponding 32 bit register numbers.
+ */
 unsigned int nft_parse_register(const struct nlattr *attr)
 {
-	return ntohl(nla_get_be32(attr));
+	unsigned int reg;
+
+	reg = ntohl(nla_get_be32(attr));
+	switch (reg) {
+	case NFT_REG_VERDICT...NFT_REG_4:
+		return reg * NFT_REG_SIZE / NFT_REG32_SIZE;
+	default:
+		return reg + NFT_REG_SIZE / NFT_REG32_SIZE - NFT_REG32_00;
+	}
 }
 EXPORT_SYMBOL_GPL(nft_parse_register);
 
+/**
+ *	nft_dump_register - dump a register value to a netlink attribute
+ *
+ *	@skb: socket buffer
+ *	@attr: attribute number
+ *	@reg: register number
+ *
+ *	Construct a netlink attribute containing the register number. For
+ *	compatibility reasons, register numbers being a multiple of 4 are
+ *	translated to the corresponding 128 bit register numbers.
+ */
 int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg)
 {
+	if (reg % (NFT_REG_SIZE / NFT_REG32_SIZE) == 0)
+		reg = reg / (NFT_REG_SIZE / NFT_REG32_SIZE);
+	else
+		reg = reg - NFT_REG_SIZE / NFT_REG32_SIZE + NFT_REG32_00;
+
 	return nla_put_be32(skb, attr, htonl(reg));
 }
 EXPORT_SYMBOL_GPL(nft_dump_register);
@@ -4145,14 +4177,13 @@ EXPORT_SYMBOL_GPL(nft_dump_register);
  */
 int nft_validate_register_load(enum nft_registers reg, unsigned int len)
 {
-	if (reg <= NFT_REG_VERDICT)
+	if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE)
 		return -EINVAL;
-	if (reg > NFT_REG_MAX)
-		return -ERANGE;
 	if (len == 0)
 		return -EINVAL;
-	if (len > FIELD_SIZEOF(struct nft_data, data))
+	if (reg * NFT_REG32_SIZE + len > FIELD_SIZEOF(struct nft_regs, data))
 		return -ERANGE;
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(nft_validate_register_load);
@@ -4200,13 +4231,12 @@ int nft_validate_register_store(const struct nft_ctx *ctx,
 
 		return 0;
 	default:
-		if (reg < NFT_REG_1)
+		if (reg < NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE)
 			return -EINVAL;
-		if (reg > NFT_REG_MAX)
-			return -ERANGE;
 		if (len == 0)
 			return -EINVAL;
-		if (len > FIELD_SIZEOF(struct nft_data, data))
+		if (reg * NFT_REG32_SIZE + len >
+		    FIELD_SIZEOF(struct nft_regs, data))
 			return -ERANGE;
 
 		if (data != NULL && type != NFT_DATA_VALUE)
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 5ef07d17b358..f153b07073af 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -70,7 +70,7 @@ static void nft_cmp_fast_eval(const struct nft_expr *expr,
 	const struct nft_cmp_fast_expr *priv = nft_expr_priv(expr);
 	u32 mask = nft_cmp_fast_mask(priv->len);
 
-	if ((regs->data[priv->sreg].data[0] & mask) == priv->data)
+	if ((regs->data[priv->sreg] & mask) == priv->data)
 		return;
 	regs->verdict.code = NFT_BREAK;
 }
@@ -81,7 +81,7 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr,
 {
 	const struct nft_payload *priv = nft_expr_priv(expr);
 	const struct sk_buff *skb = pkt->skb;
-	u32 *dest = &regs->data[priv->dreg].data[0];
+	u32 *dest = &regs->data[priv->dreg];
 	unsigned char *ptr;
 
 	if (priv->base == NFT_PAYLOAD_NETWORK_HEADER)
@@ -94,6 +94,7 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr,
 	if (unlikely(ptr + priv->len >= skb_tail_pointer(skb)))
 		return false;
 
+	*dest = 0;
 	if (priv->len == 2)
 		*(u16 *)dest = *(u16 *)ptr;
 	else if (priv->len == 4)
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index aa1147032ace..f1a9be2aecd1 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -30,8 +30,8 @@ static void nft_bitwise_eval(const struct nft_expr *expr,
 			     const struct nft_pktinfo *pkt)
 {
 	const struct nft_bitwise *priv = nft_expr_priv(expr);
-	const u32  *src = &regs->data[priv->sreg].data[0];
-	u32 *dst = &regs->data[priv->dreg].data[0];
+	const u32 *src = &regs->data[priv->sreg];
+	u32 *dst = &regs->data[priv->dreg];
 	unsigned int i;
 
 	for (i = 0; i < DIV_ROUND_UP(priv->len, 4); i++)
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index 2ee3e57ad814..fde5145f2e36 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c
@@ -30,8 +30,8 @@ static void nft_byteorder_eval(const struct nft_expr *expr,
 			       const struct nft_pktinfo *pkt)
 {
 	const struct nft_byteorder *priv = nft_expr_priv(expr);
-	u32 *src = &regs->data[priv->sreg].data[0];
-	u32 *dst = &regs->data[priv->dreg].data[0];
+	u32 *src = &regs->data[priv->sreg];
+	u32 *dst = &regs->data[priv->dreg];
 	union { u32 u32; u16 u16; } *s, *d;
 	unsigned int i;
 
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index fab8e754b18a..8cbca3432f90 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -35,7 +35,7 @@ static void nft_ct_get_eval(const struct nft_expr *expr,
 			    const struct nft_pktinfo *pkt)
 {
 	const struct nft_ct *priv = nft_expr_priv(expr);
-	u32 *dest = &regs->data[priv->dreg].data[0];
+	u32 *dest = &regs->data[priv->dreg];
 	enum ip_conntrack_info ctinfo;
 	const struct nf_conn *ct;
 	const struct nf_conn_help *help;
@@ -156,7 +156,7 @@ static void nft_ct_set_eval(const struct nft_expr *expr,
 	const struct nft_ct *priv = nft_expr_priv(expr);
 	struct sk_buff *skb = pkt->skb;
 #ifdef CONFIG_NF_CONNTRACK_MARK
-	u32 value = regs->data[priv->sreg].data[0];
+	u32 value = regs->data[priv->sreg];
 #endif
 	enum ip_conntrack_info ctinfo;
 	struct nf_conn *ct;
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index 098ffee793d7..ba7aed13e174 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -30,7 +30,7 @@ static void nft_exthdr_eval(const struct nft_expr *expr,
 			    const struct nft_pktinfo *pkt)
 {
 	struct nft_exthdr *priv = nft_expr_priv(expr);
-	u32 *dest = &regs->data[priv->dreg].data[0];
+	u32 *dest = &regs->data[priv->dreg];
 	unsigned int offset = 0;
 	int err;
 
@@ -39,6 +39,7 @@ static void nft_exthdr_eval(const struct nft_expr *expr,
 		goto err;
 	offset += priv->offset;
 
+	dest[priv->len / NFT_REG32_SIZE] = 0;
 	if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0)
 		goto err;
 	return;
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index 0682f600c7a5..1e8e412eadae 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -29,7 +29,7 @@ static void nft_immediate_eval(const struct nft_expr *expr,
 {
 	const struct nft_immediate_expr *priv = nft_expr_priv(expr);
 
-	nft_data_copy(&regs->data[priv->dreg], &priv->data);
+	nft_data_copy(&regs->data[priv->dreg], &priv->data, priv->dlen);
 }
 
 static const struct nla_policy nft_immediate_policy[NFTA_IMMEDIATE_MAX + 1] = {
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index fc7afff81566..ba1466209f2a 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -36,7 +36,7 @@ static void nft_lookup_eval(const struct nft_expr *expr,
 	if (set->ops->lookup(set, &regs->data[priv->sreg], &ext)) {
 		if (set->flags & NFT_SET_MAP)
 			nft_data_copy(&regs->data[priv->dreg],
-				      nft_set_ext_data(ext));
+				      nft_set_ext_data(ext), set->dlen);
 		return;
 	}
 	regs->verdict.code = NFT_BREAK;
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 5f744eb61de5..52561e1c31e2 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -31,13 +31,14 @@ void nft_meta_get_eval(const struct nft_expr *expr,
 	const struct nft_meta *priv = nft_expr_priv(expr);
 	const struct sk_buff *skb = pkt->skb;
 	const struct net_device *in = pkt->in, *out = pkt->out;
-	u32 *dest = &regs->data[priv->dreg].data[0];
+	u32 *dest = &regs->data[priv->dreg];
 
 	switch (priv->key) {
 	case NFT_META_LEN:
 		*dest = skb->len;
 		break;
 	case NFT_META_PROTOCOL:
+		*dest = 0;
 		*(__be16 *)dest = skb->protocol;
 		break;
 	case NFT_META_NFPROTO:
@@ -75,11 +76,13 @@ void nft_meta_get_eval(const struct nft_expr *expr,
 	case NFT_META_IIFTYPE:
 		if (in == NULL)
 			goto err;
+		*dest = 0;
 		*(u16 *)dest = in->type;
 		break;
 	case NFT_META_OIFTYPE:
 		if (out == NULL)
 			goto err;
+		*dest = 0;
 		*(u16 *)dest = out->type;
 		break;
 	case NFT_META_SKUID:
@@ -185,7 +188,7 @@ void nft_meta_set_eval(const struct nft_expr *expr,
 {
 	const struct nft_meta *meta = nft_expr_priv(expr);
 	struct sk_buff *skb = pkt->skb;
-	u32 value = regs->data[meta->sreg].data[0];
+	u32 value = regs->data[meta->sreg];
 
 	switch (meta->key) {
 	case NFT_META_MARK:
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 065cbda63b0a..ee2d71753746 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -49,26 +49,26 @@ static void nft_nat_eval(const struct nft_expr *expr,
 	if (priv->sreg_addr_min) {
 		if (priv->family == AF_INET) {
 			range.min_addr.ip = (__force __be32)
-					regs->data[priv->sreg_addr_min].data[0];
+					regs->data[priv->sreg_addr_min];
 			range.max_addr.ip = (__force __be32)
-					regs->data[priv->sreg_addr_max].data[0];
+					regs->data[priv->sreg_addr_max];
 
 		} else {
 			memcpy(range.min_addr.ip6,
-			       &regs->data[priv->sreg_addr_min].data,
-			       sizeof(struct nft_data));
+			       &regs->data[priv->sreg_addr_min],
+			       sizeof(range.min_addr.ip6));
 			memcpy(range.max_addr.ip6,
-			       &regs->data[priv->sreg_addr_max].data,
-			       sizeof(struct nft_data));
+			       &regs->data[priv->sreg_addr_max],
+			       sizeof(range.max_addr.ip6));
 		}
 		range.flags |= NF_NAT_RANGE_MAP_IPS;
 	}
 
 	if (priv->sreg_proto_min) {
 		range.min_proto.all =
-			*(__be16 *)&regs->data[priv->sreg_proto_min].data[0];
+			*(__be16 *)&regs->data[priv->sreg_proto_min];
 		range.max_proto.all =
-			*(__be16 *)&regs->data[priv->sreg_proto_max].data[0];
+			*(__be16 *)&regs->data[priv->sreg_proto_max];
 		range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
 	}
 
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 5fa997346a23..94fb3b27a2c5 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -23,7 +23,7 @@ static void nft_payload_eval(const struct nft_expr *expr,
 {
 	const struct nft_payload *priv = nft_expr_priv(expr);
 	const struct sk_buff *skb = pkt->skb;
-	u32 *dest = &regs->data[priv->dreg].data[0];
+	u32 *dest = &regs->data[priv->dreg];
 	int offset;
 
 	switch (priv->base) {
@@ -43,6 +43,7 @@ static void nft_payload_eval(const struct nft_expr *expr,
 	}
 	offset += priv->offset;
 
+	dest[priv->len / NFT_REG32_SIZE] = 0;
 	if (skb_copy_bits(skb, offset, dest, priv->len) < 0)
 		goto err;
 	return;
-- 
cgit 


From 7d7402642eaf385aef0772eff5a35e34fc4995d7 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 11 Apr 2015 02:27:39 +0100
Subject: netfilter: nf_tables: variable sized set element keys / data

This patch changes sets to support variable sized set element keys / data
up to 64 bytes each by using variable sized set extensions. This allows
to use concatenations with bigger data items suchs as IPv6 addresses.

As a side effect, small keys/data now don't require the full 16 bytes
of struct nft_data anymore but just the space they need.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h        |  5 ++++-
 include/uapi/linux/netfilter/nf_tables.h |  3 +++
 net/netfilter/nf_tables_api.c            | 27 ++++++++++++---------------
 net/netfilter/nft_hash.c                 |  4 ++--
 net/netfilter/nft_rbtree.c               |  3 ++-
 5 files changed, 23 insertions(+), 19 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 160577bf0f0a..cb42da1011ef 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -158,7 +158,10 @@ struct nft_userdata {
  *	@priv: element private data and extensions
  */
 struct nft_set_elem {
-	struct nft_data		key;
+	union {
+		u32		buf[NFT_DATA_VALUE_MAXLEN / sizeof(u32)];
+		struct nft_data	val;
+	} key;
 	void			*priv;
 };
 
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 4221a6c3a8a5..be8584c95297 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -388,6 +388,9 @@ enum nft_data_attributes {
 };
 #define NFTA_DATA_MAX		(__NFTA_DATA_MAX - 1)
 
+/* Maximum length of a value */
+#define NFT_DATA_VALUE_MAXLEN	64
+
 /**
  * enum nft_verdict_attributes - nf_tables verdict netlink attributes
  *
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 2b3f88f4c70f..ed0e70ea2bc5 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2608,7 +2608,7 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
 	}
 
 	desc.klen = ntohl(nla_get_be32(nla[NFTA_SET_KEY_LEN]));
-	if (desc.klen == 0 || desc.klen > FIELD_SIZEOF(struct nft_data, data))
+	if (desc.klen == 0 || desc.klen > NFT_DATA_VALUE_MAXLEN)
 		return -EINVAL;
 
 	flags = 0;
@@ -2634,11 +2634,10 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
 			if (nla[NFTA_SET_DATA_LEN] == NULL)
 				return -EINVAL;
 			desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
-			if (desc.dlen == 0 ||
-			    desc.dlen > FIELD_SIZEOF(struct nft_data, data))
+			if (desc.dlen == 0 || desc.dlen > NFT_DATA_VALUE_MAXLEN)
 				return -EINVAL;
 		} else
-			desc.dlen = sizeof(struct nft_data);
+			desc.dlen = sizeof(struct nft_verdict);
 	} else if (flags & NFT_SET_MAP)
 		return -EINVAL;
 
@@ -2854,12 +2853,10 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
 
 const struct nft_set_ext_type nft_set_ext_types[] = {
 	[NFT_SET_EXT_KEY]		= {
-		.len	= sizeof(struct nft_data),
-		.align	= __alignof__(struct nft_data),
+		.align	= __alignof__(u32),
 	},
 	[NFT_SET_EXT_DATA]		= {
-		.len	= sizeof(struct nft_data),
-		.align	= __alignof__(struct nft_data),
+		.align	= __alignof__(u32),
 	},
 	[NFT_SET_EXT_FLAGS]		= {
 		.len	= sizeof(u8),
@@ -3299,7 +3296,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 		timeout = set->timeout;
 	}
 
-	err = nft_data_init(ctx, &elem.key, sizeof(elem.key), &d1,
+	err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &d1,
 			    nla[NFTA_SET_ELEM_KEY]);
 	if (err < 0)
 		goto err1;
@@ -3307,7 +3304,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 	if (d1.type != NFT_DATA_VALUE || d1.len != set->klen)
 		goto err2;
 
-	nft_set_ext_add(&tmpl, NFT_SET_EXT_KEY);
+	nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, d1.len);
 	if (timeout > 0) {
 		nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION);
 		if (timeout != set->timeout)
@@ -3342,7 +3339,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 				goto err3;
 		}
 
-		nft_set_ext_add(&tmpl, NFT_SET_EXT_DATA);
+		nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, d2.len);
 	}
 
 	/* The full maximum length of userdata can exceed the maximum
@@ -3358,7 +3355,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 	}
 
 	err = -ENOMEM;
-	elem.priv = nft_set_elem_init(set, &tmpl, elem.key.data, data.data,
+	elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data, data.data,
 				      timeout, GFP_KERNEL);
 	if (elem.priv == NULL)
 		goto err3;
@@ -3393,7 +3390,7 @@ err3:
 	if (nla[NFTA_SET_ELEM_DATA] != NULL)
 		nft_data_uninit(&data, d2.type);
 err2:
-	nft_data_uninit(&elem.key, d1.type);
+	nft_data_uninit(&elem.key.val, d1.type);
 err1:
 	return err;
 }
@@ -3460,7 +3457,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
 	if (nla[NFTA_SET_ELEM_KEY] == NULL)
 		goto err1;
 
-	err = nft_data_init(ctx, &elem.key, sizeof(elem.key), &desc,
+	err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &desc,
 			    nla[NFTA_SET_ELEM_KEY]);
 	if (err < 0)
 		goto err1;
@@ -3488,7 +3485,7 @@ static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
 err3:
 	kfree(trans);
 err2:
-	nft_data_uninit(&elem.key, desc.type);
+	nft_data_uninit(&elem.key.val, desc.type);
 err1:
 	return err;
 }
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 767df41d28ea..3f9d45d3d9b7 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -133,7 +133,7 @@ static int nft_hash_insert(const struct nft_set *set,
 	struct nft_hash_cmp_arg arg = {
 		.genmask = nft_genmask_next(read_pnet(&set->pnet)),
 		.set	 = set,
-		.key	 = elem->key.data,
+		.key	 = elem->key.val.data,
 	};
 
 	return rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node,
@@ -157,7 +157,7 @@ static void *nft_hash_deactivate(const struct nft_set *set,
 	struct nft_hash_cmp_arg arg = {
 		.genmask = nft_genmask_next(read_pnet(&set->pnet)),
 		.set	 = set,
-		.key	 = elem->key.data,
+		.key	 = elem->key.val.data,
 	};
 
 	rcu_read_lock();
diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c
index b888e0cdf1e2..1c30f41cff5b 100644
--- a/net/netfilter/nft_rbtree.c
+++ b/net/netfilter/nft_rbtree.c
@@ -152,7 +152,8 @@ static void *nft_rbtree_deactivate(const struct nft_set *set,
 	while (parent != NULL) {
 		rbe = rb_entry(parent, struct nft_rbtree_elem, node);
 
-		d = memcmp(nft_set_ext_key(&rbe->ext), &elem->key, set->klen);
+		d = memcmp(nft_set_ext_key(&rbe->ext), &elem->key.val,
+					   set->klen);
 		if (d < 0)
 			parent = parent->rb_left;
 		else if (d > 0)
-- 
cgit 


From 24477e57412a7a7dea62637ac990bc5c1cff0665 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 8 Apr 2015 19:41:40 +0200
Subject: uapi: ebtables: don't include linux/if.h

linux/if.h creates conflicts in userspace with net/if.h

By using it here we force userspace to use linux/if.h while
net/if.h may be needed.

Note that:

include/linux/netfilter_ipv4/ip_tables.h and
include/linux/netfilter_ipv6/ip6_tables.h

don't include linux/if.h and they also refer to IFNAMSIZ, so they are
expecting userspace to include use net/if.h from the client program.

Signed-off-by: Arturo Borrero Gonzalez <arturo.borrero.glez@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_bridge/ebtables.h      | 3 ++-
 include/uapi/linux/netfilter_bridge/ebtables.h | 2 --
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index 34e7a2b7f867..f1bd3962e6b6 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -12,9 +12,10 @@
 #ifndef __LINUX_BRIDGE_EFF_H
 #define __LINUX_BRIDGE_EFF_H
 
+#include <linux/if.h>
+#include <linux/if_ether.h>
 #include <uapi/linux/netfilter_bridge/ebtables.h>
 
-
 /* return values for match() functions */
 #define EBT_MATCH 0
 #define EBT_NOMATCH 1
diff --git a/include/uapi/linux/netfilter_bridge/ebtables.h b/include/uapi/linux/netfilter_bridge/ebtables.h
index ba993360dbe9..773dfe8924c7 100644
--- a/include/uapi/linux/netfilter_bridge/ebtables.h
+++ b/include/uapi/linux/netfilter_bridge/ebtables.h
@@ -12,9 +12,7 @@
 
 #ifndef _UAPI__LINUX_BRIDGE_EFF_H
 #define _UAPI__LINUX_BRIDGE_EFF_H
-#include <linux/if.h>
 #include <linux/netfilter_bridge.h>
-#include <linux/if_ether.h>
 
 #define EBT_TABLE_MAXNAMELEN 32
 #define EBT_CHAIN_MAXNAMELEN EBT_TABLE_MAXNAMELEN
-- 
cgit 


From f25ad2e907f110378159fe5e088aa13176faaa5b Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 11 Apr 2015 10:46:39 +0100
Subject: netfilter: nf_tables: prepare for expressions associated to set
 elements

Preparation to attach expressions to set elements: add a set extension
type to hold an expression and dump the expression information with the
set element.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h        | 7 +++++++
 include/uapi/linux/netfilter/nf_tables.h | 2 ++
 net/netfilter/nf_tables_api.c            | 9 +++++++++
 3 files changed, 18 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index e21623cb7b20..d45a871b3da6 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -371,6 +371,7 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
  *	@NFT_SET_EXT_TIMEOUT: element timeout
  *	@NFT_SET_EXT_EXPIRATION: element expiration time
  *	@NFT_SET_EXT_USERDATA: user data associated with the element
+ *	@NFT_SET_EXT_EXPR: expression assiociated with the element
  *	@NFT_SET_EXT_NUM: number of extension types
  */
 enum nft_set_extensions {
@@ -380,6 +381,7 @@ enum nft_set_extensions {
 	NFT_SET_EXT_TIMEOUT,
 	NFT_SET_EXT_EXPIRATION,
 	NFT_SET_EXT_USERDATA,
+	NFT_SET_EXT_EXPR,
 	NFT_SET_EXT_NUM
 };
 
@@ -491,6 +493,11 @@ static inline struct nft_userdata *nft_set_ext_userdata(const struct nft_set_ext
 	return nft_set_ext(ext, NFT_SET_EXT_USERDATA);
 }
 
+static inline struct nft_expr *nft_set_ext_expr(const struct nft_set_ext *ext)
+{
+	return nft_set_ext(ext, NFT_SET_EXT_EXPR);
+}
+
 static inline bool nft_set_elem_expired(const struct nft_set_ext *ext)
 {
 	return nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION) &&
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index be8584c95297..f9c5af22a6af 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -322,6 +322,7 @@ enum nft_set_elem_flags {
  * @NFTA_SET_ELEM_TIMEOUT: timeout value (NLA_U64)
  * @NFTA_SET_ELEM_EXPIRATION: expiration time (NLA_U64)
  * @NFTA_SET_ELEM_USERDATA: user data (NLA_BINARY)
+ * @NFTA_SET_ELEM_EXPR: expression (NLA_NESTED: nft_expr_attributes)
  */
 enum nft_set_elem_attributes {
 	NFTA_SET_ELEM_UNSPEC,
@@ -331,6 +332,7 @@ enum nft_set_elem_attributes {
 	NFTA_SET_ELEM_TIMEOUT,
 	NFTA_SET_ELEM_EXPIRATION,
 	NFTA_SET_ELEM_USERDATA,
+	NFTA_SET_ELEM_EXPR,
 	__NFTA_SET_ELEM_MAX
 };
 #define NFTA_SET_ELEM_MAX	(__NFTA_SET_ELEM_MAX - 1)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index e97bee59fe08..8830811550ec 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2904,6 +2904,9 @@ const struct nft_set_ext_type nft_set_ext_types[] = {
 	[NFT_SET_EXT_DATA]		= {
 		.align	= __alignof__(u32),
 	},
+	[NFT_SET_EXT_EXPR]		= {
+		.align	= __alignof__(struct nft_expr),
+	},
 	[NFT_SET_EXT_FLAGS]		= {
 		.len	= sizeof(u8),
 		.align	= __alignof__(u8),
@@ -2990,6 +2993,10 @@ static int nf_tables_fill_setelem(struct sk_buff *skb,
 			  set->dlen) < 0)
 		goto nla_put_failure;
 
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR) &&
+	    nft_expr_dump(skb, NFTA_SET_ELEM_EXPR, nft_set_ext_expr(ext)) < 0)
+		goto nla_put_failure;
+
 	if (nft_set_ext_exists(ext, NFT_SET_EXT_FLAGS) &&
 	    nla_put_be32(skb, NFTA_SET_ELEM_FLAGS,
 		         htonl(*nft_set_ext_flags(ext))))
@@ -3276,6 +3283,8 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem)
 	nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE);
 	if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
 		nft_data_uninit(nft_set_ext_data(ext), set->dtype);
+	if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
+		nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext));
 
 	kfree(elem);
 }
-- 
cgit 


From 7c6c6e95a12e46f499749bdd496e53d03950f377 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 11 Apr 2015 10:46:41 +0100
Subject: netfilter: nf_tables: add flag to indicate set contains expressions

Add a set flag to indicate that the set is used as a state table and
contains expressions for evaluation. This operation is mutually
exclusive with the mapping operation, so sets specifying both are
rejected. The lookup expression also rejects binding to state tables
since it only deals with loopup and map operations.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h | 2 ++
 net/netfilter/nf_tables_api.c            | 8 ++++++--
 net/netfilter/nft_lookup.c               | 3 +++
 3 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index f9c5af22a6af..48942381d02f 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -238,6 +238,7 @@ enum nft_rule_compat_attributes {
  * @NFT_SET_INTERVAL: set contains intervals
  * @NFT_SET_MAP: set is used as a dictionary
  * @NFT_SET_TIMEOUT: set uses timeouts
+ * @NFT_SET_EVAL: set contains expressions for evaluation
  */
 enum nft_set_flags {
 	NFT_SET_ANONYMOUS		= 0x1,
@@ -245,6 +246,7 @@ enum nft_set_flags {
 	NFT_SET_INTERVAL		= 0x4,
 	NFT_SET_MAP			= 0x8,
 	NFT_SET_TIMEOUT			= 0x10,
+	NFT_SET_EVAL			= 0x20,
 };
 
 /**
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 8830811550ec..78af83bc9c8e 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -2661,9 +2661,13 @@ static int nf_tables_newset(struct sock *nlsk, struct sk_buff *skb,
 	if (nla[NFTA_SET_FLAGS] != NULL) {
 		flags = ntohl(nla_get_be32(nla[NFTA_SET_FLAGS]));
 		if (flags & ~(NFT_SET_ANONYMOUS | NFT_SET_CONSTANT |
-			      NFT_SET_INTERVAL | NFT_SET_MAP |
-			      NFT_SET_TIMEOUT))
+			      NFT_SET_INTERVAL | NFT_SET_TIMEOUT |
+			      NFT_SET_MAP | NFT_SET_EVAL))
 			return -EINVAL;
+		/* Only one of both operations is supported */
+		if ((flags & (NFT_SET_MAP | NFT_SET_EVAL)) ==
+			     (NFT_SET_MAP | NFT_SET_EVAL))
+			return -EOPNOTSUPP;
 	}
 
 	dtype = 0;
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index ba1466209f2a..b3c31ef8015d 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -71,6 +71,9 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
 			return PTR_ERR(set);
 	}
 
+	if (set->flags & NFT_SET_EVAL)
+		return -EOPNOTSUPP;
+
 	priv->sreg = nft_parse_register(tb[NFTA_LOOKUP_SREG]);
 	err = nft_validate_register_load(priv->sreg, set->klen);
 	if (err < 0)
-- 
cgit 


From 3e135cd499bfbec15684fe9c756162d58df4dc77 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Sat, 11 Apr 2015 10:46:42 +0100
Subject: netfilter: nft_dynset: dynamic stateful expression instantiation

Support instantiating stateful expressions based on a template that
are associated with dynamically created set entries. The expressions
are evaluated when adding or updating the set element.

This allows to maintain per flow state using the existing set
infrastructure and expression types, with arbitrary definitions of
a flow.

Usage is currently restricted to anonymous sets, meaning only a single
binding can exist, since the desired semantics of multiple independant
bindings haven't been defined so far.

Examples (userspace syntax is still WIP):

1. Limit the rate of new SSH connections per host, similar to iptables
   hashlimit:

	flow ip saddr timeout 60s \
	limit 10/second \
	accept

2. Account network traffic between each set of /24 networks:

	flow ip saddr & 255.255.255.0 . ip daddr & 255.255.255.0 \
	counter

3. Account traffic to each host per user:

	flow skuid . ip daddr \
	counter

4. Account traffic for each combination of source address and TCP flags:

	flow ip saddr . tcp flags \
	counter

The resulting set content after a Xmas-scan look like this:

{
	192.168.122.1 . fin | psh | urg : counter packets 1001 bytes 40040,
	192.168.122.1 . ack : counter packets 74 bytes 3848,
	192.168.122.1 . psh | ack : counter packets 35 bytes 3144
}

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_tables.h |  2 ++
 net/netfilter/nft_dynset.c               | 54 +++++++++++++++++++++++++++++---
 2 files changed, 52 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 48942381d02f..5fa1cd04762e 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -567,6 +567,7 @@ enum nft_dynset_ops {
  * @NFTA_DYNSET_SREG_KEY: source register of the key (NLA_U32)
  * @NFTA_DYNSET_SREG_DATA: source register of the data (NLA_U32)
  * @NFTA_DYNSET_TIMEOUT: timeout value for the new element (NLA_U64)
+ * @NFTA_DYNSET_EXPR: expression (NLA_NESTED: nft_expr_attributes)
  */
 enum nft_dynset_attributes {
 	NFTA_DYNSET_UNSPEC,
@@ -576,6 +577,7 @@ enum nft_dynset_attributes {
 	NFTA_DYNSET_SREG_KEY,
 	NFTA_DYNSET_SREG_DATA,
 	NFTA_DYNSET_TIMEOUT,
+	NFTA_DYNSET_EXPR,
 	__NFTA_DYNSET_MAX,
 };
 #define NFTA_DYNSET_MAX		(__NFTA_DYNSET_MAX - 1)
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index 03699d5c0b4b..513a8ef60a59 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -23,6 +23,7 @@ struct nft_dynset {
 	enum nft_registers		sreg_key:8;
 	enum nft_registers		sreg_data:8;
 	u64				timeout;
+	struct nft_expr			*expr;
 	struct nft_set_binding		binding;
 };
 
@@ -30,6 +31,7 @@ static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr,
 			    struct nft_regs *regs)
 {
 	const struct nft_dynset *priv = nft_expr_priv(expr);
+	struct nft_set_ext *ext;
 	u64 timeout;
 	void *elem;
 
@@ -44,7 +46,13 @@ static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr,
 	if (elem == NULL) {
 		if (set->size)
 			atomic_dec(&set->nelems);
+		return NULL;
 	}
+
+	ext = nft_set_elem_ext(set, elem);
+	if (priv->expr != NULL)
+		nft_expr_clone(nft_set_ext_expr(ext), priv->expr);
+
 	return elem;
 }
 
@@ -55,18 +63,27 @@ static void nft_dynset_eval(const struct nft_expr *expr,
 	const struct nft_dynset *priv = nft_expr_priv(expr);
 	struct nft_set *set = priv->set;
 	const struct nft_set_ext *ext;
+	const struct nft_expr *sexpr;
 	u64 timeout;
 
 	if (set->ops->update(set, &regs->data[priv->sreg_key], nft_dynset_new,
 			     expr, regs, &ext)) {
+		sexpr = NULL;
+		if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
+			sexpr = nft_set_ext_expr(ext);
+
 		if (priv->op == NFT_DYNSET_OP_UPDATE &&
 		    nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
 			timeout = priv->timeout ? : set->timeout;
 			*nft_set_ext_expiration(ext) = jiffies + timeout;
-			return;
-		}
-	}
+		} else if (sexpr == NULL)
+			goto out;
 
+		if (sexpr != NULL)
+			sexpr->ops->eval(sexpr, regs, pkt);
+		return;
+	}
+out:
 	regs->verdict.code = NFT_BREAK;
 }
 
@@ -77,6 +94,7 @@ static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = {
 	[NFTA_DYNSET_SREG_KEY]	= { .type = NLA_U32 },
 	[NFTA_DYNSET_SREG_DATA]	= { .type = NLA_U32 },
 	[NFTA_DYNSET_TIMEOUT]	= { .type = NLA_U64 },
+	[NFTA_DYNSET_EXPR]	= { .type = NLA_NESTED },
 };
 
 static int nft_dynset_init(const struct nft_ctx *ctx,
@@ -142,10 +160,29 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
 	} else if (set->flags & NFT_SET_MAP)
 		return -EINVAL;
 
+	if (tb[NFTA_DYNSET_EXPR] != NULL) {
+		if (!(set->flags & NFT_SET_EVAL))
+			return -EINVAL;
+		if (!(set->flags & NFT_SET_ANONYMOUS))
+			return -EOPNOTSUPP;
+
+		priv->expr = nft_expr_init(ctx, tb[NFTA_DYNSET_EXPR]);
+		if (IS_ERR(priv->expr))
+			return PTR_ERR(priv->expr);
+
+		err = -EOPNOTSUPP;
+		if (!(priv->expr->ops->type->flags & NFT_EXPR_STATEFUL))
+			goto err1;
+	} else if (set->flags & NFT_SET_EVAL)
+		return -EINVAL;
+
 	nft_set_ext_prepare(&priv->tmpl);
 	nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_KEY, set->klen);
 	if (set->flags & NFT_SET_MAP)
 		nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_DATA, set->dlen);
+	if (priv->expr != NULL)
+		nft_set_ext_add_length(&priv->tmpl, NFT_SET_EXT_EXPR,
+				       priv->expr->ops->size);
 	if (set->flags & NFT_SET_TIMEOUT) {
 		if (timeout || set->timeout)
 			nft_set_ext_add(&priv->tmpl, NFT_SET_EXT_EXPIRATION);
@@ -155,10 +192,15 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
 
 	err = nf_tables_bind_set(ctx, set, &priv->binding);
 	if (err < 0)
-		return err;
+		goto err1;
 
 	priv->set = set;
 	return 0;
+
+err1:
+	if (priv->expr != NULL)
+		nft_expr_destroy(ctx, priv->expr);
+	return err;
 }
 
 static void nft_dynset_destroy(const struct nft_ctx *ctx,
@@ -167,6 +209,8 @@ static void nft_dynset_destroy(const struct nft_ctx *ctx,
 	struct nft_dynset *priv = nft_expr_priv(expr);
 
 	nf_tables_unbind_set(ctx, priv->set, &priv->binding);
+	if (priv->expr != NULL)
+		nft_expr_destroy(ctx, priv->expr);
 }
 
 static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr)
@@ -184,6 +228,8 @@ static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr)
 		goto nla_put_failure;
 	if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, cpu_to_be64(priv->timeout)))
 		goto nla_put_failure;
+	if (priv->expr && nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr))
+		goto nla_put_failure;
 	return 0;
 
 nla_put_failure:
-- 
cgit 


From df81b29c7b81b9d70ee29b7a263dd5009daa0ce4 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 15 Apr 2015 10:17:43 +0930
Subject: virtio_balloon: transitional interface

Virtio 1.0 doesn't include a modern balloon device.
But it's not a big change to support a transitional
balloon device: this has the advantage of supporting
existing drivers, transparently.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio_balloon.c     | 21 +++++++++++++++------
 include/uapi/linux/virtio_balloon.h | 32 ++++++++++++++++++++++++++++----
 2 files changed, 43 insertions(+), 10 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 6a356e344f82..82e80e034f25 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -214,8 +214,8 @@ static inline void update_stat(struct virtio_balloon *vb, int idx,
 			       u16 tag, u64 val)
 {
 	BUG_ON(idx >= VIRTIO_BALLOON_S_NR);
-	vb->stats[idx].tag = tag;
-	vb->stats[idx].val = val;
+	vb->stats[idx].tag = cpu_to_virtio16(vb->vdev, tag);
+	vb->stats[idx].val = cpu_to_virtio64(vb->vdev, val);
 }
 
 #define pages_to_bytes(x) ((u64)(x) << PAGE_SHIFT)
@@ -283,18 +283,27 @@ static void virtballoon_changed(struct virtio_device *vdev)
 
 static inline s64 towards_target(struct virtio_balloon *vb)
 {
-	__le32 v;
 	s64 target;
+	u32 num_pages;
 
-	virtio_cread(vb->vdev, struct virtio_balloon_config, num_pages, &v);
+	virtio_cread(vb->vdev, struct virtio_balloon_config, num_pages,
+		     &num_pages);
 
-	target = le32_to_cpu(v);
+	/* Legacy balloon config space is LE, unlike all other devices. */
+	if (!virtio_has_feature(vb->vdev, VIRTIO_F_VERSION_1))
+		num_pages = le32_to_cpu((__force __le32)num_pages);
+
+	target = num_pages;
 	return target - vb->num_pages;
 }
 
 static void update_balloon_size(struct virtio_balloon *vb)
 {
-	__le32 actual = cpu_to_le32(vb->num_pages);
+	u32 actual = vb->num_pages;
+
+	/* Legacy balloon config space is LE, unlike all other devices. */
+	if (!virtio_has_feature(vb->vdev, VIRTIO_F_VERSION_1))
+		actual = (__force u32)cpu_to_le32(actual);
 
 	virtio_cwrite(vb->vdev, struct virtio_balloon_config, actual,
 		      &actual);
diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
index 4b0488f20b2e..984169a819ee 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -25,6 +25,7 @@
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE. */
+#include <linux/types.h>
 #include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
 
@@ -38,9 +39,9 @@
 
 struct virtio_balloon_config {
 	/* Number of pages host wants Guest to give up. */
-	__le32 num_pages;
+	__u32 num_pages;
 	/* Number of pages we've actually got in balloon. */
-	__le32 actual;
+	__u32 actual;
 };
 
 #define VIRTIO_BALLOON_S_SWAP_IN  0   /* Amount of memory swapped in */
@@ -51,9 +52,32 @@ struct virtio_balloon_config {
 #define VIRTIO_BALLOON_S_MEMTOT   5   /* Total amount of memory */
 #define VIRTIO_BALLOON_S_NR       6
 
+/*
+ * Memory statistics structure.
+ * Driver fills an array of these structures and passes to device.
+ *
+ * NOTE: fields are laid out in a way that would make compiler add padding
+ * between and after fields, so we have to use compiler-specific attributes to
+ * pack it, to disable this padding. This also often causes compiler to
+ * generate suboptimal code.
+ *
+ * We maintain this statistics structure format for backwards compatibility,
+ * but don't follow this example.
+ *
+ * If implementing a similar structure, do something like the below instead:
+ *     struct virtio_balloon_stat {
+ *         __virtio16 tag;
+ *         __u8 reserved[6];
+ *         __virtio64 val;
+ *     };
+ *
+ * In other words, add explicit reserved fields to align field and
+ * structure boundaries at field size, avoiding compiler padding
+ * without the packed attribute.
+ */
 struct virtio_balloon_stat {
-	__u16 tag;
-	__u64 val;
+	__virtio16 tag;
+	__virtio64 val;
 } __attribute__((packed));
 
 #endif /* _LINUX_VIRTIO_BALLOON_H */
-- 
cgit 


From bfebd1cdb497a57757c83f5fbf1a29931591e2a4 Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Sun, 8 Mar 2015 00:51:47 -0500
Subject: dm: add full blk-mq support to request-based DM

Commit e5863d9ad ("dm: allocate requests in target when stacking on
blk-mq devices") served as the first step toward fully utilizing blk-mq
in request-based DM -- it enabled stacking an old-style (request_fn)
request_queue ontop of the underlying blk-mq device(s).  That first step
didn't improve performance of DM multipath ontop of fast blk-mq devices
(e.g. NVMe) because the top-level old-style request_queue was severely
limited by the queue_lock.

The second step offered here enables stacking a blk-mq request_queue
ontop of the underlying blk-mq device(s).  This unlocks significant
performance gains on fast blk-mq devices, Keith Busch tested on his NVMe
testbed and offered this really positive news:

 "Just providing a performance update. All my fio tests are getting
  roughly equal performance whether accessed through the raw block
  device or the multipath device mapper (~470k IOPS). I could only push
  ~20% of the raw iops through dm before this conversion, so this latest
  tree is looking really solid from a performance standpoint."

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Tested-by: Keith Busch <keith.busch@intel.com>
---
 drivers/md/dm-mpath.c         |   2 +-
 drivers/md/dm-table.c         |  11 +-
 drivers/md/dm.c               | 317 +++++++++++++++++++++++++++++++++---------
 include/uapi/linux/dm-ioctl.h |   4 +-
 4 files changed, 261 insertions(+), 73 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index add6391f3f8e..c8f07e5a9a17 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -1703,7 +1703,7 @@ out:
  *---------------------------------------------------------------*/
 static struct target_type multipath_target = {
 	.name = "multipath",
-	.version = {1, 8, 0},
+	.version = {1, 9, 0},
 	.module = THIS_MODULE,
 	.ctr = multipath_ctr,
 	.dtr = multipath_dtr,
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 057312048b68..66600cab9fa5 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -18,6 +18,7 @@
 #include <linux/mutex.h>
 #include <linux/delay.h>
 #include <linux/atomic.h>
+#include <linux/blk-mq.h>
 
 #define DM_MSG_PREFIX "table"
 
@@ -1695,9 +1696,13 @@ void dm_table_run_md_queue_async(struct dm_table *t)
 	md = dm_table_get_md(t);
 	queue = dm_get_md_queue(md);
 	if (queue) {
-		spin_lock_irqsave(queue->queue_lock, flags);
-		blk_run_queue_async(queue);
-		spin_unlock_irqrestore(queue->queue_lock, flags);
+		if (queue->mq_ops)
+			blk_mq_run_hw_queues(queue, true);
+		else {
+			spin_lock_irqsave(queue->queue_lock, flags);
+			blk_run_queue_async(queue);
+			spin_unlock_irqrestore(queue->queue_lock, flags);
+		}
 	}
 }
 EXPORT_SYMBOL(dm_table_run_md_queue_async);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 5294e016e92b..3a66baac76ed 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -23,6 +23,7 @@
 #include <linux/kthread.h>
 #include <linux/ktime.h>
 #include <linux/elevator.h> /* for rq_end_sector() */
+#include <linux/blk-mq.h>
 
 #include <trace/events/block.h>
 
@@ -224,6 +225,9 @@ struct mapped_device {
 	int last_rq_rw;
 	sector_t last_rq_pos;
 	ktime_t last_rq_start_time;
+
+	/* for blk-mq request-based DM support */
+	struct blk_mq_tag_set tag_set;
 };
 
 /*
@@ -1025,6 +1029,11 @@ static void end_clone_bio(struct bio *clone, int error)
 	blk_update_request(tio->orig, 0, nr_bytes);
 }
 
+static struct dm_rq_target_io *tio_from_request(struct request *rq)
+{
+	return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special);
+}
+
 /*
  * Don't touch any member of the md after calling this function because
  * the md may be freed in dm_put() at the end of this function.
@@ -1048,8 +1057,10 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
 	 * queue lock again.
 	 */
 	if (run_queue) {
-		if (!nr_requests_pending ||
-		    (nr_requests_pending >= md->queue->nr_congestion_on))
+		if (md->queue->mq_ops)
+			blk_mq_run_hw_queues(md->queue, true);
+		else if (!nr_requests_pending ||
+			 (nr_requests_pending >= md->queue->nr_congestion_on))
 			blk_run_queue_async(md->queue);
 	}
 
@@ -1062,13 +1073,17 @@ static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
 static void free_rq_clone(struct request *clone)
 {
 	struct dm_rq_target_io *tio = clone->end_io_data;
+	struct mapped_device *md = tio->md;
 
 	blk_rq_unprep_clone(clone);
+
 	if (clone->q && clone->q->mq_ops)
 		tio->ti->type->release_clone_rq(clone);
 	else
-		free_clone_request(tio->md, clone);
-	free_rq_tio(tio);
+		free_clone_request(md, clone);
+
+	if (!md->queue->mq_ops)
+		free_rq_tio(tio);
 }
 
 /*
@@ -1097,17 +1112,22 @@ static void dm_end_request(struct request *clone, int error)
 	}
 
 	free_rq_clone(clone);
-	blk_end_request_all(rq, error);
+	if (!rq->q->mq_ops)
+		blk_end_request_all(rq, error);
+	else
+		blk_mq_end_request(rq, error);
 	rq_completed(md, rw, true);
 }
 
 static void dm_unprep_request(struct request *rq)
 {
-	struct dm_rq_target_io *tio = rq->special;
+	struct dm_rq_target_io *tio = tio_from_request(rq);
 	struct request *clone = tio->clone;
 
-	rq->special = NULL;
-	rq->cmd_flags &= ~REQ_DONTPREP;
+	if (!rq->q->mq_ops) {
+		rq->special = NULL;
+		rq->cmd_flags &= ~REQ_DONTPREP;
+	}
 
 	if (clone)
 		free_rq_clone(clone);
@@ -1116,18 +1136,29 @@ static void dm_unprep_request(struct request *rq)
 /*
  * Requeue the original request of a clone.
  */
-static void dm_requeue_unmapped_original_request(struct mapped_device *md,
-						 struct request *rq)
+static void old_requeue_request(struct request *rq)
 {
-	int rw = rq_data_dir(rq);
 	struct request_queue *q = rq->q;
 	unsigned long flags;
 
-	dm_unprep_request(rq);
-
 	spin_lock_irqsave(q->queue_lock, flags);
 	blk_requeue_request(q, rq);
 	spin_unlock_irqrestore(q->queue_lock, flags);
+}
+
+static void dm_requeue_unmapped_original_request(struct mapped_device *md,
+						 struct request *rq)
+{
+	int rw = rq_data_dir(rq);
+
+	dm_unprep_request(rq);
+
+	if (!rq->q->mq_ops)
+		old_requeue_request(rq);
+	else {
+		blk_mq_requeue_request(rq);
+		blk_mq_kick_requeue_list(rq->q);
+	}
 
 	rq_completed(md, rw, false);
 }
@@ -1139,35 +1170,44 @@ static void dm_requeue_unmapped_request(struct request *clone)
 	dm_requeue_unmapped_original_request(tio->md, tio->orig);
 }
 
-static void __stop_queue(struct request_queue *q)
-{
-	blk_stop_queue(q);
-}
-
-static void stop_queue(struct request_queue *q)
+static void old_stop_queue(struct request_queue *q)
 {
 	unsigned long flags;
 
+	if (blk_queue_stopped(q))
+		return;
+
 	spin_lock_irqsave(q->queue_lock, flags);
-	__stop_queue(q);
+	blk_stop_queue(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
-static void __start_queue(struct request_queue *q)
+static void stop_queue(struct request_queue *q)
 {
-	if (blk_queue_stopped(q))
-		blk_start_queue(q);
+	if (!q->mq_ops)
+		old_stop_queue(q);
+	else
+		blk_mq_stop_hw_queues(q);
 }
 
-static void start_queue(struct request_queue *q)
+static void old_start_queue(struct request_queue *q)
 {
 	unsigned long flags;
 
 	spin_lock_irqsave(q->queue_lock, flags);
-	__start_queue(q);
+	if (blk_queue_stopped(q))
+		blk_start_queue(q);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
 
+static void start_queue(struct request_queue *q)
+{
+	if (!q->mq_ops)
+		old_start_queue(q);
+	else
+		blk_mq_start_stopped_hw_queues(q, true);
+}
+
 static void dm_done(struct request *clone, int error, bool mapped)
 {
 	int r = error;
@@ -1206,13 +1246,20 @@ static void dm_done(struct request *clone, int error, bool mapped)
 static void dm_softirq_done(struct request *rq)
 {
 	bool mapped = true;
-	struct dm_rq_target_io *tio = rq->special;
+	struct dm_rq_target_io *tio = tio_from_request(rq);
 	struct request *clone = tio->clone;
+	int rw;
 
 	if (!clone) {
-		blk_end_request_all(rq, tio->error);
-		rq_completed(tio->md, rq_data_dir(rq), false);
-		free_rq_tio(tio);
+		rw = rq_data_dir(rq);
+		if (!rq->q->mq_ops) {
+			blk_end_request_all(rq, tio->error);
+			rq_completed(tio->md, rw, false);
+			free_rq_tio(tio);
+		} else {
+			blk_mq_end_request(rq, tio->error);
+			rq_completed(tio->md, rw, false);
+		}
 		return;
 	}
 
@@ -1228,7 +1275,7 @@ static void dm_softirq_done(struct request *rq)
  */
 static void dm_complete_request(struct request *rq, int error)
 {
-	struct dm_rq_target_io *tio = rq->special;
+	struct dm_rq_target_io *tio = tio_from_request(rq);
 
 	tio->error = error;
 	blk_complete_request(rq);
@@ -1247,7 +1294,7 @@ static void dm_kill_unmapped_request(struct request *rq, int error)
 }
 
 /*
- * Called with the clone's queue lock held
+ * Called with the clone's queue lock held (for non-blk-mq)
  */
 static void end_clone_request(struct request *clone, int error)
 {
@@ -1808,6 +1855,18 @@ static struct request *clone_rq(struct request *rq, struct mapped_device *md,
 
 static void map_tio_request(struct kthread_work *work);
 
+static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
+		     struct mapped_device *md)
+{
+	tio->md = md;
+	tio->ti = NULL;
+	tio->clone = NULL;
+	tio->orig = rq;
+	tio->error = 0;
+	memset(&tio->info, 0, sizeof(tio->info));
+	init_kthread_work(&tio->work, map_tio_request);
+}
+
 static struct dm_rq_target_io *prep_tio(struct request *rq,
 					struct mapped_device *md, gfp_t gfp_mask)
 {
@@ -1819,13 +1878,7 @@ static struct dm_rq_target_io *prep_tio(struct request *rq,
 	if (!tio)
 		return NULL;
 
-	tio->md = md;
-	tio->ti = NULL;
-	tio->clone = NULL;
-	tio->orig = rq;
-	tio->error = 0;
-	memset(&tio->info, 0, sizeof(tio->info));
-	init_kthread_work(&tio->work, map_tio_request);
+	init_tio(tio, rq, md);
 
 	table = dm_get_live_table(md, &srcu_idx);
 	if (!dm_table_mq_request_based(table)) {
@@ -1869,11 +1922,11 @@ static int dm_prep_fn(struct request_queue *q, struct request *rq)
  * DM_MAPIO_REQUEUE : the original request needs to be requeued
  * < 0              : the request was completed due to failure
  */
-static int map_request(struct dm_target *ti, struct request *rq,
+static int map_request(struct dm_rq_target_io *tio, struct request *rq,
 		       struct mapped_device *md)
 {
 	int r;
-	struct dm_rq_target_io *tio = rq->special;
+	struct dm_target *ti = tio->ti;
 	struct request *clone = NULL;
 
 	if (tio->clone) {
@@ -1888,7 +1941,7 @@ static int map_request(struct dm_target *ti, struct request *rq,
 		}
 		if (IS_ERR(clone))
 			return DM_MAPIO_REQUEUE;
-		if (setup_clone(clone, rq, tio, GFP_KERNEL)) {
+		if (setup_clone(clone, rq, tio, GFP_NOIO)) {
 			/* -ENOMEM */
 			ti->type->release_clone_rq(clone);
 			return DM_MAPIO_REQUEUE;
@@ -1929,13 +1982,16 @@ static void map_tio_request(struct kthread_work *work)
 	struct request *rq = tio->orig;
 	struct mapped_device *md = tio->md;
 
-	if (map_request(tio->ti, rq, md) == DM_MAPIO_REQUEUE)
+	if (map_request(tio, rq, md) == DM_MAPIO_REQUEUE)
 		dm_requeue_unmapped_original_request(md, rq);
 }
 
 static void dm_start_request(struct mapped_device *md, struct request *orig)
 {
-	blk_start_request(orig);
+	if (!orig->q->mq_ops)
+		blk_start_request(orig);
+	else
+		blk_mq_start_request(orig);
 	atomic_inc(&md->pending[rq_data_dir(orig)]);
 
 	if (md->seq_rq_merge_deadline_usecs) {
@@ -2045,7 +2101,7 @@ static void dm_request_fn(struct request_queue *q)
 
 		dm_start_request(md, rq);
 
-		tio = rq->special;
+		tio = tio_from_request(rq);
 		/* Establish tio->ti before queuing work (map_tio_request) */
 		tio->ti = ti;
 		queue_kthread_work(&md->kworker, &tio->work);
@@ -2142,7 +2198,7 @@ static void dm_init_md_queue(struct mapped_device *md)
 {
 	/*
 	 * Request-based dm devices cannot be stacked on top of bio-based dm
-	 * devices.  The type of this dm device has not been decided yet.
+	 * devices.  The type of this dm device may not have been decided yet.
 	 * The type is decided at the first table loading time.
 	 * To prevent problematic device stacking, clear the queue flag
 	 * for request stacking support until then.
@@ -2150,7 +2206,15 @@ static void dm_init_md_queue(struct mapped_device *md)
 	 * This queue is new, so no concurrency on the queue_flags.
 	 */
 	queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, md->queue);
+}
+
+static void dm_init_old_md_queue(struct mapped_device *md)
+{
+	dm_init_md_queue(md);
 
+	/*
+	 * Initialize aspects of queue that aren't relevant for blk-mq
+	 */
 	md->queue->queuedata = md;
 	md->queue->backing_dev_info.congested_fn = dm_any_congested;
 	md->queue->backing_dev_info.congested_data = md;
@@ -2273,6 +2337,7 @@ static void unlock_fs(struct mapped_device *md);
 static void free_dev(struct mapped_device *md)
 {
 	int minor = MINOR(disk_devt(md->disk));
+	bool using_blk_mq = !!md->queue->mq_ops;
 
 	unlock_fs(md);
 	destroy_workqueue(md->wq);
@@ -2298,6 +2363,8 @@ static void free_dev(struct mapped_device *md)
 	del_gendisk(md->disk);
 	put_disk(md->disk);
 	blk_cleanup_queue(md->queue);
+	if (using_blk_mq)
+		blk_mq_free_tag_set(&md->tag_set);
 	bdput(md->bdev);
 	free_minor(minor);
 
@@ -2457,7 +2524,7 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
 	 * This must be done before setting the queue restrictions,
 	 * because request-based dm may be run just after the setting.
 	 */
-	if (dm_table_request_based(t) && !blk_queue_stopped(q))
+	if (dm_table_request_based(t))
 		stop_queue(q);
 
 	__bind_mempools(md, t);
@@ -2539,14 +2606,6 @@ unsigned dm_get_md_type(struct mapped_device *md)
 	return md->type;
 }
 
-static bool dm_md_type_request_based(struct mapped_device *md)
-{
-	unsigned table_type = dm_get_md_type(md);
-
-	return (table_type == DM_TYPE_REQUEST_BASED ||
-		table_type == DM_TYPE_MQ_REQUEST_BASED);
-}
-
 struct target_type *dm_get_immutable_target_type(struct mapped_device *md)
 {
 	return md->immutable_target_type;
@@ -2563,6 +2622,14 @@ struct queue_limits *dm_get_queue_limits(struct mapped_device *md)
 }
 EXPORT_SYMBOL_GPL(dm_get_queue_limits);
 
+static void init_rq_based_worker_thread(struct mapped_device *md)
+{
+	/* Initialize the request-based DM worker thread */
+	init_kthread_worker(&md->kworker);
+	md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker,
+				       "kdmwork-%s", dm_device_name(md));
+}
+
 /*
  * Fully initialize a request-based queue (->elevator, ->request_fn, etc).
  */
@@ -2571,29 +2638,131 @@ static int dm_init_request_based_queue(struct mapped_device *md)
 	struct request_queue *q = NULL;
 
 	if (md->queue->elevator)
-		return 1;
+		return 0;
 
 	/* Fully initialize the queue */
 	q = blk_init_allocated_queue(md->queue, dm_request_fn, NULL);
 	if (!q)
-		return 0;
+		return -EINVAL;
 
 	/* disable dm_request_fn's merge heuristic by default */
 	md->seq_rq_merge_deadline_usecs = 0;
 
 	md->queue = q;
-	dm_init_md_queue(md);
+	dm_init_old_md_queue(md);
 	blk_queue_softirq_done(md->queue, dm_softirq_done);
 	blk_queue_prep_rq(md->queue, dm_prep_fn);
 
-	/* Also initialize the request-based DM worker thread */
-	init_kthread_worker(&md->kworker);
-	md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker,
-				       "kdmwork-%s", dm_device_name(md));
+	init_rq_based_worker_thread(md);
 
 	elv_register_queue(md->queue);
 
-	return 1;
+	return 0;
+}
+
+static int dm_mq_init_request(void *data, struct request *rq,
+			      unsigned int hctx_idx, unsigned int request_idx,
+			      unsigned int numa_node)
+{
+	struct mapped_device *md = data;
+	struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
+
+	/*
+	 * Must initialize md member of tio, otherwise it won't
+	 * be available in dm_mq_queue_rq.
+	 */
+	tio->md = md;
+
+	return 0;
+}
+
+static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
+			  const struct blk_mq_queue_data *bd)
+{
+	struct request *rq = bd->rq;
+	struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
+	struct mapped_device *md = tio->md;
+	int srcu_idx;
+	struct dm_table *map = dm_get_live_table(md, &srcu_idx);
+	struct dm_target *ti;
+	sector_t pos;
+
+	/* always use block 0 to find the target for flushes for now */
+	pos = 0;
+	if (!(rq->cmd_flags & REQ_FLUSH))
+		pos = blk_rq_pos(rq);
+
+	ti = dm_table_find_target(map, pos);
+	if (!dm_target_is_valid(ti)) {
+		dm_put_live_table(md, srcu_idx);
+		DMERR_LIMIT("request attempted access beyond the end of device");
+		/*
+		 * Must perform setup, that rq_completed() requires,
+		 * before returning BLK_MQ_RQ_QUEUE_ERROR
+		 */
+		dm_start_request(md, rq);
+		return BLK_MQ_RQ_QUEUE_ERROR;
+	}
+	dm_put_live_table(md, srcu_idx);
+
+	if (ti->type->busy && ti->type->busy(ti))
+		return BLK_MQ_RQ_QUEUE_BUSY;
+
+	dm_start_request(md, rq);
+
+	/* Init tio using md established in .init_request */
+	init_tio(tio, rq, md);
+
+	/* Establish tio->ti before queuing work (map_tio_request) */
+	tio->ti = ti;
+	queue_kthread_work(&md->kworker, &tio->work);
+
+	return BLK_MQ_RQ_QUEUE_OK;
+}
+
+static struct blk_mq_ops dm_mq_ops = {
+	.queue_rq = dm_mq_queue_rq,
+	.map_queue = blk_mq_map_queue,
+	.complete = dm_softirq_done,
+	.init_request = dm_mq_init_request,
+};
+
+static int dm_init_request_based_blk_mq_queue(struct mapped_device *md)
+{
+	struct request_queue *q;
+	int err;
+
+	memset(&md->tag_set, 0, sizeof(md->tag_set));
+	md->tag_set.ops = &dm_mq_ops;
+	md->tag_set.queue_depth = BLKDEV_MAX_RQ;
+	md->tag_set.numa_node = NUMA_NO_NODE;
+	md->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE;
+	md->tag_set.nr_hw_queues = 1;
+	md->tag_set.cmd_size = sizeof(struct dm_rq_target_io);
+	md->tag_set.driver_data = md;
+
+	err = blk_mq_alloc_tag_set(&md->tag_set);
+	if (err)
+		return err;
+
+	q = blk_mq_init_allocated_queue(&md->tag_set, md->queue);
+	if (IS_ERR(q)) {
+		err = PTR_ERR(q);
+		goto out_tag_set;
+	}
+	md->queue = q;
+	dm_init_md_queue(md);
+
+	/* backfill 'mq' sysfs registration normally done in blk_register_queue */
+	blk_mq_register_disk(md->disk);
+
+	init_rq_based_worker_thread(md);
+
+	return 0;
+
+out_tag_set:
+	blk_mq_free_tag_set(&md->tag_set);
+	return err;
 }
 
 /*
@@ -2601,15 +2770,29 @@ static int dm_init_request_based_queue(struct mapped_device *md)
  */
 int dm_setup_md_queue(struct mapped_device *md)
 {
-	if (dm_md_type_request_based(md)) {
-		if (!dm_init_request_based_queue(md)) {
+	int r;
+	unsigned md_type = dm_get_md_type(md);
+
+	switch (md_type) {
+	case DM_TYPE_REQUEST_BASED:
+		r = dm_init_request_based_queue(md);
+		if (r) {
 			DMWARN("Cannot initialize queue for request-based mapped device");
-			return -EINVAL;
+			return r;
 		}
-	} else {
-		/* bio-based specific initialization */
+		break;
+	case DM_TYPE_MQ_REQUEST_BASED:
+		r = dm_init_request_based_blk_mq_queue(md);
+		if (r) {
+			DMWARN("Cannot initialize queue for request-based blk-mq mapped device");
+			return r;
+		}
+		break;
+	case DM_TYPE_BIO_BASED:
+		dm_init_old_md_queue(md);
 		blk_queue_make_request(md->queue, dm_make_request);
 		blk_queue_merge_bvec(md->queue, dm_merge_bvec);
+		break;
 	}
 
 	return 0;
diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h
index 889f3a5b7b18..eac8c3641f39 100644
--- a/include/uapi/linux/dm-ioctl.h
+++ b/include/uapi/linux/dm-ioctl.h
@@ -267,9 +267,9 @@ enum {
 #define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	30
+#define DM_VERSION_MINOR	31
 #define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2014-12-22)"
+#define DM_VERSION_EXTRA	"-ioctl (2015-3-12)"
 
 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */
-- 
cgit 


From a166151cbe33b53221c24259e4a7201064b3ba79 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Wed, 15 Apr 2015 12:55:45 -0700
Subject: bpf: fix bpf helpers to use skb->mac_header relative offsets

For the short-term solution, lets fix bpf helper functions to use
skb->mac_header relative offsets instead of skb->data in order to
get the same eBPF programs with cls_bpf and act_bpf work on ingress
and egress qdisc path. We need to ensure that mac_header is set
before calling into programs. This is effectively the first option
from below referenced discussion.

More long term solution for LD_ABS|LD_IND instructions will be more
intrusive but also more beneficial than this, and implemented later
as it's too risky at this point in time.

I.e., we plan to look into the option of moving skb_pull() out of
eth_type_trans() and into netif_receive_skb() as has been suggested
as second option. Meanwhile, this solution ensures ingress can be
used with eBPF, too, and that we won't run into ABI troubles later.
For dealing with negative offsets inside eBPF helper functions,
we've implemented bpf_skb_clone_unwritable() to test for unwriteable
headers.

Reference: http://thread.gmane.org/gmane.linux.network/359129/focus=359694
Fixes: 608cd71a9c7c ("tc: bpf: generalize pedit action")
Fixes: 91bc4822c3d6 ("tc: bpf: add checksum helpers")
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h    |  2 +-
 include/uapi/linux/filter.h |  7 +++++--
 net/core/filter.c           | 41 ++++++++++++++++++++++++++++++++---------
 net/sched/act_bpf.c         |  3 +++
 net/sched/cls_bpf.c         |  3 +++
 samples/bpf/tcbpf1_kern.c   | 16 ++++++----------
 6 files changed, 50 insertions(+), 22 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 5c1cee11f777..a9ebdf5701e8 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -177,7 +177,7 @@ enum bpf_func_id {
 	/**
 	 * skb_store_bytes(skb, offset, from, len, flags) - store bytes into packet
 	 * @skb: pointer to skb
-	 * @offset: offset within packet from skb->data
+	 * @offset: offset within packet from skb->mac_header
 	 * @from: pointer where to copy bytes from
 	 * @len: number of bytes to store into packet
 	 * @flags: bit 0 - if true, recompute skb->csum
diff --git a/include/uapi/linux/filter.h b/include/uapi/linux/filter.h
index 34c7936ca114..c97340e43dd6 100644
--- a/include/uapi/linux/filter.h
+++ b/include/uapi/linux/filter.h
@@ -79,8 +79,11 @@ struct sock_fprog {	/* Required for SO_ATTACH_FILTER. */
 #define SKF_AD_RANDOM	56
 #define SKF_AD_VLAN_TPID	60
 #define SKF_AD_MAX	64
-#define SKF_NET_OFF   (-0x100000)
-#define SKF_LL_OFF    (-0x200000)
 
+#define SKF_NET_OFF	(-0x100000)
+#define SKF_LL_OFF	(-0x200000)
+
+#define BPF_NET_OFF	SKF_NET_OFF
+#define BPF_LL_OFF	SKF_LL_OFF
 
 #endif /* _UAPI__LINUX_FILTER_H__ */
diff --git a/net/core/filter.c b/net/core/filter.c
index b669e75d2b36..bf831a85c315 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1175,12 +1175,27 @@ int sk_attach_bpf(u32 ufd, struct sock *sk)
 	return 0;
 }
 
+/**
+ *	bpf_skb_clone_not_writable - is the header of a clone not writable
+ *	@skb: buffer to check
+ *	@len: length up to which to write, can be negative
+ *
+ *	Returns true if modifying the header part of the cloned buffer
+ *	does require the data to be copied. I.e. this version works with
+ *	negative lengths needed for eBPF case!
+ */
+static bool bpf_skb_clone_unwritable(const struct sk_buff *skb, int len)
+{
+	return skb_header_cloned(skb) ||
+	       (int) skb_headroom(skb) + len > skb->hdr_len;
+}
+
 #define BPF_RECOMPUTE_CSUM(flags)	((flags) & 1)
 
 static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
 {
 	struct sk_buff *skb = (struct sk_buff *) (long) r1;
-	unsigned int offset = (unsigned int) r2;
+	int offset = (int) r2;
 	void *from = (void *) (long) r3;
 	unsigned int len = (unsigned int) r4;
 	char buf[16];
@@ -1194,10 +1209,12 @@ static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags)
 	 *
 	 * so check for invalid 'offset' and too large 'len'
 	 */
-	if (unlikely(offset > 0xffff || len > sizeof(buf)))
+	if (unlikely((u32) offset > 0xffff || len > sizeof(buf)))
 		return -EFAULT;
 
-	if (skb_cloned(skb) && !skb_clone_writable(skb, offset + len))
+	offset -= skb->data - skb_mac_header(skb);
+	if (unlikely(skb_cloned(skb) &&
+		     bpf_skb_clone_unwritable(skb, offset + len)))
 		return -EFAULT;
 
 	ptr = skb_header_pointer(skb, offset, len, buf);
@@ -1232,15 +1249,18 @@ const struct bpf_func_proto bpf_skb_store_bytes_proto = {
 #define BPF_HEADER_FIELD_SIZE(flags)	((flags) & 0x0f)
 #define BPF_IS_PSEUDO_HEADER(flags)	((flags) & 0x10)
 
-static u64 bpf_l3_csum_replace(u64 r1, u64 offset, u64 from, u64 to, u64 flags)
+static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
 {
 	struct sk_buff *skb = (struct sk_buff *) (long) r1;
+	int offset = (int) r2;
 	__sum16 sum, *ptr;
 
-	if (unlikely(offset > 0xffff))
+	if (unlikely((u32) offset > 0xffff))
 		return -EFAULT;
 
-	if (skb_cloned(skb) && !skb_clone_writable(skb, offset + sizeof(sum)))
+	offset -= skb->data - skb_mac_header(skb);
+	if (unlikely(skb_cloned(skb) &&
+		     bpf_skb_clone_unwritable(skb, offset + sizeof(sum))))
 		return -EFAULT;
 
 	ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
@@ -1276,16 +1296,19 @@ const struct bpf_func_proto bpf_l3_csum_replace_proto = {
 	.arg5_type	= ARG_ANYTHING,
 };
 
-static u64 bpf_l4_csum_replace(u64 r1, u64 offset, u64 from, u64 to, u64 flags)
+static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags)
 {
 	struct sk_buff *skb = (struct sk_buff *) (long) r1;
 	u32 is_pseudo = BPF_IS_PSEUDO_HEADER(flags);
+	int offset = (int) r2;
 	__sum16 sum, *ptr;
 
-	if (unlikely(offset > 0xffff))
+	if (unlikely((u32) offset > 0xffff))
 		return -EFAULT;
 
-	if (skb_cloned(skb) && !skb_clone_writable(skb, offset + sizeof(sum)))
+	offset -= skb->data - skb_mac_header(skb);
+	if (unlikely(skb_cloned(skb) &&
+		     bpf_skb_clone_unwritable(skb, offset + sizeof(sum))))
 		return -EFAULT;
 
 	ptr = skb_header_pointer(skb, offset, sizeof(sum), &sum);
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 4d2cede17468..dc6a2d324bd8 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -38,6 +38,9 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act,
 	struct tcf_bpf *prog = act->priv;
 	int action, filter_res;
 
+	if (unlikely(!skb_mac_header_was_set(skb)))
+		return TC_ACT_UNSPEC;
+
 	spin_lock(&prog->tcf_lock);
 
 	prog->tcf_tm.lastuse = jiffies;
diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
index 5c4171c5d2bd..91bd9c19471d 100644
--- a/net/sched/cls_bpf.c
+++ b/net/sched/cls_bpf.c
@@ -66,6 +66,9 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	struct cls_bpf_prog *prog;
 	int ret = -1;
 
+	if (unlikely(!skb_mac_header_was_set(skb)))
+		return -1;
+
 	/* Needed here for accessing maps. */
 	rcu_read_lock();
 	list_for_each_entry_rcu(prog, &head->plist, link) {
diff --git a/samples/bpf/tcbpf1_kern.c b/samples/bpf/tcbpf1_kern.c
index 7cf3f42a6e39..7c27710f8296 100644
--- a/samples/bpf/tcbpf1_kern.c
+++ b/samples/bpf/tcbpf1_kern.c
@@ -4,6 +4,8 @@
 #include <uapi/linux/ip.h>
 #include <uapi/linux/in.h>
 #include <uapi/linux/tcp.h>
+#include <uapi/linux/filter.h>
+
 #include "bpf_helpers.h"
 
 /* compiler workaround */
@@ -14,18 +16,12 @@ static inline void set_dst_mac(struct __sk_buff *skb, char *mac)
 	bpf_skb_store_bytes(skb, 0, mac, ETH_ALEN, 1);
 }
 
-/* use 1 below for ingress qdisc and 0 for egress */
-#if 0
-#undef ETH_HLEN
-#define ETH_HLEN 0
-#endif
-
 #define IP_CSUM_OFF (ETH_HLEN + offsetof(struct iphdr, check))
 #define TOS_OFF (ETH_HLEN + offsetof(struct iphdr, tos))
 
 static inline void set_ip_tos(struct __sk_buff *skb, __u8 new_tos)
 {
-	__u8 old_tos = load_byte(skb, TOS_OFF);
+	__u8 old_tos = load_byte(skb, BPF_LL_OFF + TOS_OFF);
 
 	bpf_l3_csum_replace(skb, IP_CSUM_OFF, htons(old_tos), htons(new_tos), 2);
 	bpf_skb_store_bytes(skb, TOS_OFF, &new_tos, sizeof(new_tos), 0);
@@ -38,7 +34,7 @@ static inline void set_ip_tos(struct __sk_buff *skb, __u8 new_tos)
 
 static inline void set_tcp_ip_src(struct __sk_buff *skb, __u32 new_ip)
 {
-	__u32 old_ip = _htonl(load_word(skb, IP_SRC_OFF));
+	__u32 old_ip = _htonl(load_word(skb, BPF_LL_OFF + IP_SRC_OFF));
 
 	bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_ip, new_ip, IS_PSEUDO | sizeof(new_ip));
 	bpf_l3_csum_replace(skb, IP_CSUM_OFF, old_ip, new_ip, sizeof(new_ip));
@@ -48,7 +44,7 @@ static inline void set_tcp_ip_src(struct __sk_buff *skb, __u32 new_ip)
 #define TCP_DPORT_OFF (ETH_HLEN + sizeof(struct iphdr) + offsetof(struct tcphdr, dest))
 static inline void set_tcp_dest_port(struct __sk_buff *skb, __u16 new_port)
 {
-	__u16 old_port = htons(load_half(skb, TCP_DPORT_OFF));
+	__u16 old_port = htons(load_half(skb, BPF_LL_OFF + TCP_DPORT_OFF));
 
 	bpf_l4_csum_replace(skb, TCP_CSUM_OFF, old_port, new_port, sizeof(new_port));
 	bpf_skb_store_bytes(skb, TCP_DPORT_OFF, &new_port, sizeof(new_port), 0);
@@ -57,7 +53,7 @@ static inline void set_tcp_dest_port(struct __sk_buff *skb, __u16 new_port)
 SEC("classifier")
 int bpf_prog1(struct __sk_buff *skb)
 {
-	__u8 proto = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol));
+	__u8 proto = load_byte(skb, BPF_LL_OFF + ETH_HLEN + offsetof(struct iphdr, protocol));
 	long *value;
 
 	if (proto == IPPROTO_TCP) {
-- 
cgit 


From cec32a47010647e8b0603726ebb75b990a4057a4 Mon Sep 17 00:00:00 2001
From: Philipp Zabel <p.zabel@pengutronix.de>
Date: Fri, 17 Apr 2015 19:12:41 +0200
Subject: media-bus: Fixup RGB444_1X12, RGB565_1X16, and YUV8_1X24 media bus
 format

Change the constant values for RGB444_1X12, RGB565_1X16, and YUV8_1X24 media
bus formats in anticipation of a merge conflict with the media tree, where
the old values are already taken by RBG888_1X24, RGB888_1X32_PADHI, and
VUY8_1X24, respectively.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 Documentation/DocBook/media/v4l/subdev-formats.xml |  6 +++---
 include/uapi/linux/media-bus-format.h              | 10 +++++-----
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/v4l/subdev-formats.xml b/Documentation/DocBook/media/v4l/subdev-formats.xml
index 18b71aff48c9..553a38024745 100644
--- a/Documentation/DocBook/media/v4l/subdev-formats.xml
+++ b/Documentation/DocBook/media/v4l/subdev-formats.xml
@@ -196,7 +196,7 @@ see <xref linkend="colorspaces" />.</entry>
 	  <tbody valign="top">
 	    <row id="MEDIA-BUS-FMT-RGB444-1X12">
 	      <entry>MEDIA_BUS_FMT_RGB444_1X12</entry>
-	      <entry>0x100e</entry>
+	      <entry>0x1016</entry>
 	      <entry></entry>
 	      &dash-ent-20;
 	      <entry>r<subscript>3</subscript></entry>
@@ -326,7 +326,7 @@ see <xref linkend="colorspaces" />.</entry>
 	    </row>
 	    <row id="MEDIA-BUS-FMT-RGB565-1X16">
 	      <entry>MEDIA_BUS_FMT_RGB565_1X16</entry>
-	      <entry>0x100f</entry>
+	      <entry>0x1017</entry>
 	      <entry></entry>
 	      &dash-ent-16;
 	      <entry>r<subscript>4</subscript></entry>
@@ -3049,7 +3049,7 @@ see <xref linkend="colorspaces" />.</entry>
 	    </row>
 	    <row id="MEDIA-BUS-FMT-YUV8-1X24">
 	      <entry>MEDIA_BUS_FMT_YUV8_1X24</entry>
-	      <entry>0x2024</entry>
+	      <entry>0x2025</entry>
 	      <entry></entry>
 	      <entry>-</entry>
 	      <entry>-</entry>
diff --git a/include/uapi/linux/media-bus-format.h b/include/uapi/linux/media-bus-format.h
index 83ea46f4be51..73c78f18a328 100644
--- a/include/uapi/linux/media-bus-format.h
+++ b/include/uapi/linux/media-bus-format.h
@@ -33,13 +33,13 @@
 
 #define MEDIA_BUS_FMT_FIXED			0x0001
 
-/* RGB - next is	0x1016 */
-#define MEDIA_BUS_FMT_RGB444_1X12		0x100e
+/* RGB - next is	0x1018 */
+#define MEDIA_BUS_FMT_RGB444_1X12		0x1016
 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_BE	0x1001
 #define MEDIA_BUS_FMT_RGB444_2X8_PADHI_LE	0x1002
 #define MEDIA_BUS_FMT_RGB555_2X8_PADHI_BE	0x1003
 #define MEDIA_BUS_FMT_RGB555_2X8_PADHI_LE	0x1004
-#define MEDIA_BUS_FMT_RGB565_1X16		0x100f
+#define MEDIA_BUS_FMT_RGB565_1X16		0x1017
 #define MEDIA_BUS_FMT_BGR565_2X8_BE		0x1005
 #define MEDIA_BUS_FMT_BGR565_2X8_LE		0x1006
 #define MEDIA_BUS_FMT_RGB565_2X8_BE		0x1007
@@ -56,7 +56,7 @@
 #define MEDIA_BUS_FMT_RGB888_1X7X4_JEIDA	0x1012
 #define MEDIA_BUS_FMT_ARGB8888_1X32		0x100d
 
-/* YUV (including grey) - next is	0x2025 */
+/* YUV (including grey) - next is	0x2026 */
 #define MEDIA_BUS_FMT_Y8_1X8			0x2001
 #define MEDIA_BUS_FMT_UV8_1X8			0x2015
 #define MEDIA_BUS_FMT_UYVY8_1_5X8		0x2002
@@ -82,7 +82,7 @@
 #define MEDIA_BUS_FMT_VYUY10_1X20		0x201b
 #define MEDIA_BUS_FMT_YUYV10_1X20		0x200d
 #define MEDIA_BUS_FMT_YVYU10_1X20		0x200e
-#define MEDIA_BUS_FMT_YUV8_1X24			0x2024
+#define MEDIA_BUS_FMT_YUV8_1X24			0x2025
 #define MEDIA_BUS_FMT_YUV10_1X30		0x2016
 #define MEDIA_BUS_FMT_AYUV8_1X32		0x2017
 #define MEDIA_BUS_FMT_UYVY12_2X12		0x201c
-- 
cgit 


From 0ad46af8a618fc38e0cdc3927cfa9f7b42cc9423 Mon Sep 17 00:00:00 2001
From: Andy Grover <agrover@redhat.com>
Date: Tue, 14 Apr 2015 17:30:04 -0700
Subject: target: Version 2 of TCMU ABI

The initial version of TCMU (in 3.18) does not properly handle
bidirectional SCSI commands -- those with both an in and out buffer. In
looking to fix this it also became clear that TCMU's support for adding
new types of entries (opcodes) to the command ring was broken. We need
to fix this now, so that future issues can be handled properly by adding
new opcodes.

We make the most of this ABI break by enabling bidi cmd handling within
TCMP_OP_CMD opcode. Add an iov_bidi_cnt field to tcmu_cmd_entry.req.
This enables TCMU to describe bidi commands, but further kernel work is
needed for full bidi support.

Enlarge tcmu_cmd_entry_hdr by 32 bits by pulling in cmd_id and __pad1. Turn
__pad1 into two 8 bit flags fields, for kernel-set and userspace-set flags,
"kflags" and "uflags" respectively.

Update version fields so userspace can tell the interface is changed.

Update tcmu-design.txt with details of how new stuff works:
- Specify an additional requirement for userspace to set UNKNOWN_OP
  (bit 0) in hdr.uflags for unknown/unhandled opcodes.
- Define how Data-In and Data-Out fields are described in req.iov[]

Changed in v2:
- Change name of SKIPPED bit to UNKNOWN bit
- PAD op does not set the bit any more
- Change len_op helper functions to take just len_op, not the whole struct
- Change version to 2 in missed spots, and use defines
- Add 16 unused bytes to cmd_entry.req, in case additional SAM cmd
  parameters need to be included
- Add iov_dif_cnt field to specify buffers used for DIF info in iov[]
- Rearrange fields to naturally align cdb_off
- Handle if userspace sets UNKNOWN_OP by indicating failure of the cmd
- Wrap some overly long UPDATE_HEAD lines

(Add missing req.iov_bidi_cnt + req.iov_dif_cnt zeroing - Ilias)

Signed-off-by: Andy Grover <agrover@redhat.com>
Reviewed-by: Ilias Tsitsimpis <iliastsi@arrikto.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 Documentation/target/tcmu-design.txt  | 43 ++++++++++++++++++++------------
 drivers/target/target_core_user.c     | 46 ++++++++++++++++++++++++++---------
 include/uapi/linux/target_core_user.h | 44 ++++++++++++++++++++-------------
 3 files changed, 89 insertions(+), 44 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/target/tcmu-design.txt b/Documentation/target/tcmu-design.txt
index 5518465290bf..43e94ea6d2ca 100644
--- a/Documentation/target/tcmu-design.txt
+++ b/Documentation/target/tcmu-design.txt
@@ -138,27 +138,40 @@ signals the kernel via a 4-byte write(). When cmd_head equals
 cmd_tail, the ring is empty -- no commands are currently waiting to be
 processed by userspace.
 
-TCMU commands start with a common header containing "len_op", a 32-bit
-value that stores the length, as well as the opcode in the lowest
-unused bits. Currently only two opcodes are defined, TCMU_OP_PAD and
-TCMU_OP_CMD. When userspace encounters a command with PAD opcode, it
-should skip ahead by the bytes in "length". (The kernel inserts PAD
-entries to ensure each CMD entry fits contigously into the circular
-buffer.)
-
-When userspace handles a CMD, it finds the SCSI CDB (Command Data
-Block) via tcmu_cmd_entry.req.cdb_off. This is an offset from the
-start of the overall shared memory region, not the entry. The data
-in/out buffers are accessible via tht req.iov[] array. Note that
-each iov.iov_base is also an offset from the start of the region.
-
-TCMU currently does not support BIDI operations.
+TCMU commands are 8-byte aligned. They start with a common header
+containing "len_op", a 32-bit value that stores the length, as well as
+the opcode in the lowest unused bits. It also contains cmd_id and
+flags fields for setting by the kernel (kflags) and userspace
+(uflags).
+
+Currently only two opcodes are defined, TCMU_OP_CMD and TCMU_OP_PAD.
+
+When the opcode is CMD, the entry in the command ring is a struct
+tcmu_cmd_entry. Userspace finds the SCSI CDB (Command Data Block) via
+tcmu_cmd_entry.req.cdb_off. This is an offset from the start of the
+overall shared memory region, not the entry. The data in/out buffers
+are accessible via tht req.iov[] array. iov_cnt contains the number of
+entries in iov[] needed to describe either the Data-In or Data-Out
+buffers. For bidirectional commands, iov_cnt specifies how many iovec
+entries cover the Data-Out area, and iov_bidi_count specifies how many
+iovec entries immediately after that in iov[] cover the Data-In
+area. Just like other fields, iov.iov_base is an offset from the start
+of the region.
 
 When completing a command, userspace sets rsp.scsi_status, and
 rsp.sense_buffer if necessary. Userspace then increments
 mailbox.cmd_tail by entry.hdr.length (mod cmdr_size) and signals the
 kernel via the UIO method, a 4-byte write to the file descriptor.
 
+When the opcode is PAD, userspace only updates cmd_tail as above --
+it's a no-op. (The kernel inserts PAD entries to ensure each CMD entry
+is contiguous within the command ring.)
+
+More opcodes may be added in the future. If userspace encounters an
+opcode it does not handle, it must set UNKNOWN_OP bit (bit 0) in
+hdr.uflags, update cmd_tail, and proceed with processing additional
+commands, if any.
+
 The Data Area:
 
 This is shared-memory space after the command ring. The organization
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 1fbf304a9491..dbc872a6c981 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -344,8 +344,11 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 
 		entry = (void *) mb + CMDR_OFF + cmd_head;
 		tcmu_flush_dcache_range(entry, sizeof(*entry));
-		tcmu_hdr_set_op(&entry->hdr, TCMU_OP_PAD);
-		tcmu_hdr_set_len(&entry->hdr, pad_size);
+		tcmu_hdr_set_op(&entry->hdr.len_op, TCMU_OP_PAD);
+		tcmu_hdr_set_len(&entry->hdr.len_op, pad_size);
+		entry->hdr.cmd_id = 0; /* not used for PAD */
+		entry->hdr.kflags = 0;
+		entry->hdr.uflags = 0;
 
 		UPDATE_HEAD(mb->cmd_head, pad_size, udev->cmdr_size);
 
@@ -355,9 +358,11 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 
 	entry = (void *) mb + CMDR_OFF + cmd_head;
 	tcmu_flush_dcache_range(entry, sizeof(*entry));
-	tcmu_hdr_set_op(&entry->hdr, TCMU_OP_CMD);
-	tcmu_hdr_set_len(&entry->hdr, command_size);
-	entry->cmd_id = tcmu_cmd->cmd_id;
+	tcmu_hdr_set_op(&entry->hdr.len_op, TCMU_OP_CMD);
+	tcmu_hdr_set_len(&entry->hdr.len_op, command_size);
+	entry->hdr.cmd_id = tcmu_cmd->cmd_id;
+	entry->hdr.kflags = 0;
+	entry->hdr.uflags = 0;
 
 	/*
 	 * Fix up iovecs, and handle if allocation in data ring wrapped.
@@ -407,6 +412,8 @@ static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
 		kunmap_atomic(from);
 	}
 	entry->req.iov_cnt = iov_cnt;
+	entry->req.iov_bidi_cnt = 0;
+	entry->req.iov_dif_cnt = 0;
 
 	/* All offsets relative to mb_addr, not start of entry! */
 	cdb_off = CMDR_OFF + cmd_head + base_command_size;
@@ -464,6 +471,17 @@ static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry *
 		return;
 	}
 
+	if (entry->hdr.uflags & TCMU_UFLAG_UNKNOWN_OP) {
+		UPDATE_HEAD(udev->data_tail, cmd->data_length, udev->data_size);
+		pr_warn("TCMU: Userspace set UNKNOWN_OP flag on se_cmd %p\n",
+			cmd->se_cmd);
+		transport_generic_request_failure(cmd->se_cmd,
+			TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE);
+		cmd->se_cmd = NULL;
+		kmem_cache_free(tcmu_cmd_cache, cmd);
+		return;
+	}
+
 	if (entry->rsp.scsi_status == SAM_STAT_CHECK_CONDITION) {
 		memcpy(se_cmd->sense_buffer, entry->rsp.sense_buffer,
 			       se_cmd->scsi_sense_length);
@@ -542,14 +560,16 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev)
 
 		tcmu_flush_dcache_range(entry, sizeof(*entry));
 
-		if (tcmu_hdr_get_op(&entry->hdr) == TCMU_OP_PAD) {
-			UPDATE_HEAD(udev->cmdr_last_cleaned, tcmu_hdr_get_len(&entry->hdr), udev->cmdr_size);
+		if (tcmu_hdr_get_op(entry->hdr.len_op) == TCMU_OP_PAD) {
+			UPDATE_HEAD(udev->cmdr_last_cleaned,
+				    tcmu_hdr_get_len(entry->hdr.len_op),
+				    udev->cmdr_size);
 			continue;
 		}
-		WARN_ON(tcmu_hdr_get_op(&entry->hdr) != TCMU_OP_CMD);
+		WARN_ON(tcmu_hdr_get_op(entry->hdr.len_op) != TCMU_OP_CMD);
 
 		spin_lock(&udev->commands_lock);
-		cmd = idr_find(&udev->commands, entry->cmd_id);
+		cmd = idr_find(&udev->commands, entry->hdr.cmd_id);
 		if (cmd)
 			idr_remove(&udev->commands, cmd->cmd_id);
 		spin_unlock(&udev->commands_lock);
@@ -562,7 +582,9 @@ static unsigned int tcmu_handle_completions(struct tcmu_dev *udev)
 
 		tcmu_handle_completion(cmd, entry);
 
-		UPDATE_HEAD(udev->cmdr_last_cleaned, tcmu_hdr_get_len(&entry->hdr), udev->cmdr_size);
+		UPDATE_HEAD(udev->cmdr_last_cleaned,
+			    tcmu_hdr_get_len(entry->hdr.len_op),
+			    udev->cmdr_size);
 
 		handled++;
 	}
@@ -840,14 +862,14 @@ static int tcmu_configure_device(struct se_device *dev)
 	udev->data_size = TCMU_RING_SIZE - CMDR_SIZE;
 
 	mb = udev->mb_addr;
-	mb->version = 1;
+	mb->version = TCMU_MAILBOX_VERSION;
 	mb->cmdr_off = CMDR_OFF;
 	mb->cmdr_size = udev->cmdr_size;
 
 	WARN_ON(!PAGE_ALIGNED(udev->data_off));
 	WARN_ON(udev->data_size % PAGE_SIZE);
 
-	info->version = "1";
+	info->version = xstr(TCMU_MAILBOX_VERSION);
 
 	info->mem[0].name = "tcm-user command & data buffer";
 	info->mem[0].addr = (phys_addr_t) udev->mb_addr;
diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h
index b483d1909d3e..b67f99d3c520 100644
--- a/include/uapi/linux/target_core_user.h
+++ b/include/uapi/linux/target_core_user.h
@@ -6,7 +6,7 @@
 #include <linux/types.h>
 #include <linux/uio.h>
 
-#define TCMU_VERSION "1.0"
+#define TCMU_VERSION "2.0"
 
 /*
  * Ring Design
@@ -39,9 +39,13 @@
  * should process the next packet the same way, and so on.
  */
 
-#define TCMU_MAILBOX_VERSION 1
+#define TCMU_MAILBOX_VERSION 2
 #define ALIGN_SIZE 64 /* Should be enough for most CPUs */
 
+/* See https://gcc.gnu.org/onlinedocs/cpp/Stringification.html */
+#define xstr(s) str(s)
+#define str(s) #s
+
 struct tcmu_mailbox {
 	__u16 version;
 	__u16 flags;
@@ -64,31 +68,36 @@ enum tcmu_opcode {
  * Only a few opcodes, and length is 8-byte aligned, so use low bits for opcode.
  */
 struct tcmu_cmd_entry_hdr {
-		__u32 len_op;
+	__u32 len_op;
+	__u16 cmd_id;
+	__u8 kflags;
+#define TCMU_UFLAG_UNKNOWN_OP 0x1
+	__u8 uflags;
+
 } __packed;
 
 #define TCMU_OP_MASK 0x7
 
-static inline enum tcmu_opcode tcmu_hdr_get_op(struct tcmu_cmd_entry_hdr *hdr)
+static inline enum tcmu_opcode tcmu_hdr_get_op(__u32 len_op)
 {
-	return hdr->len_op & TCMU_OP_MASK;
+	return len_op & TCMU_OP_MASK;
 }
 
-static inline void tcmu_hdr_set_op(struct tcmu_cmd_entry_hdr *hdr, enum tcmu_opcode op)
+static inline void tcmu_hdr_set_op(__u32 *len_op, enum tcmu_opcode op)
 {
-	hdr->len_op &= ~TCMU_OP_MASK;
-	hdr->len_op |= (op & TCMU_OP_MASK);
+	*len_op &= ~TCMU_OP_MASK;
+	*len_op |= (op & TCMU_OP_MASK);
 }
 
-static inline __u32 tcmu_hdr_get_len(struct tcmu_cmd_entry_hdr *hdr)
+static inline __u32 tcmu_hdr_get_len(__u32 len_op)
 {
-	return hdr->len_op & ~TCMU_OP_MASK;
+	return len_op & ~TCMU_OP_MASK;
 }
 
-static inline void tcmu_hdr_set_len(struct tcmu_cmd_entry_hdr *hdr, __u32 len)
+static inline void tcmu_hdr_set_len(__u32 *len_op, __u32 len)
 {
-	hdr->len_op &= TCMU_OP_MASK;
-	hdr->len_op |= len;
+	*len_op &= TCMU_OP_MASK;
+	*len_op |= len;
 }
 
 /* Currently the same as SCSI_SENSE_BUFFERSIZE */
@@ -97,13 +106,14 @@ static inline void tcmu_hdr_set_len(struct tcmu_cmd_entry_hdr *hdr, __u32 len)
 struct tcmu_cmd_entry {
 	struct tcmu_cmd_entry_hdr hdr;
 
-	uint16_t cmd_id;
-	uint16_t __pad1;
-
 	union {
 		struct {
+			uint32_t iov_cnt;
+			uint32_t iov_bidi_cnt;
+			uint32_t iov_dif_cnt;
 			uint64_t cdb_off;
-			uint64_t iov_cnt;
+			uint64_t __pad1;
+			uint64_t __pad2;
 			struct iovec iov[0];
 		} req;
 		struct {
-- 
cgit 


From e928e9cb3601ce240189bfea05b67ebd391c85ae Mon Sep 17 00:00:00 2001
From: Michael Ellerman <michael@ellerman.id.au>
Date: Fri, 20 Mar 2015 20:39:41 +1100
Subject: KVM: PPC: Book3S HV: Add fast real-mode H_RANDOM implementation.

Some PowerNV systems include a hardware random-number generator.
This HWRNG is present on POWER7+ and POWER8 chips and is capable of
generating one 64-bit random number every microsecond.  The random
numbers are produced by sampling a set of 64 unstable high-frequency
oscillators and are almost completely entropic.

PAPR defines an H_RANDOM hypercall which guests can use to obtain one
64-bit random sample from the HWRNG.  This adds a real-mode
implementation of the H_RANDOM hypercall.  This hypercall was
implemented in real mode because the latency of reading the HWRNG is
generally small compared to the latency of a guest exit and entry for
all the threads in the same virtual core.

Userspace can detect the presence of the HWRNG and the H_RANDOM
implementation by querying the KVM_CAP_PPC_HWRNG capability.  The
H_RANDOM hypercall implementation will only be invoked when the guest
does an H_RANDOM hypercall if userspace first enables the in-kernel
H_RANDOM implementation using the KVM_CAP_PPC_ENABLE_HCALL capability.

Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Signed-off-by: Alexander Graf <agraf@suse.de>
---
 Documentation/virtual/kvm/api.txt       |  17 +++++
 arch/powerpc/include/asm/archrandom.h   |  11 ++-
 arch/powerpc/include/asm/kvm_ppc.h      |   2 +
 arch/powerpc/kvm/book3s_hv_builtin.c    |  15 +++++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 115 ++++++++++++++++++++++++++++++++
 arch/powerpc/kvm/powerpc.c              |   3 +
 arch/powerpc/platforms/powernv/rng.c    |  29 ++++++++
 include/uapi/linux/kvm.h                |   1 +
 8 files changed, 191 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index bc9f6fe44e27..9fa2bf8c3f6f 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -3573,3 +3573,20 @@ struct {
 @ar   - access register number
 
 KVM handlers should exit to userspace with rc = -EREMOTE.
+
+
+8. Other capabilities.
+----------------------
+
+This section lists capabilities that give information about other
+features of the KVM implementation.
+
+8.1 KVM_CAP_PPC_HWRNG
+
+Architectures: ppc
+
+This capability, if KVM_CHECK_EXTENSION indicates that it is
+available, means that that the kernel has an implementation of the
+H_RANDOM hypercall backed by a hardware random-number generator.
+If present, the kernel H_RANDOM handler can be enabled for guest use
+with the KVM_CAP_PPC_ENABLE_HCALL capability.
diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h
index bde531103638..0cc6eedc4780 100644
--- a/arch/powerpc/include/asm/archrandom.h
+++ b/arch/powerpc/include/asm/archrandom.h
@@ -30,8 +30,6 @@ static inline int arch_has_random(void)
 	return !!ppc_md.get_random_long;
 }
 
-int powernv_get_random_long(unsigned long *v);
-
 static inline int arch_get_random_seed_long(unsigned long *v)
 {
 	return 0;
@@ -47,4 +45,13 @@ static inline int arch_has_random_seed(void)
 
 #endif /* CONFIG_ARCH_RANDOM */
 
+#ifdef CONFIG_PPC_POWERNV
+int powernv_hwrng_present(void);
+int powernv_get_random_long(unsigned long *v);
+int powernv_get_random_real_mode(unsigned long *v);
+#else
+static inline int powernv_hwrng_present(void) { return 0; }
+static inline int powernv_get_random_real_mode(unsigned long *v) { return 0; }
+#endif
+
 #endif /* _ASM_POWERPC_ARCHRANDOM_H */
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index 46bf652c9169..b8475daad884 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -302,6 +302,8 @@ static inline bool is_kvmppc_hv_enabled(struct kvm *kvm)
 	return kvm->arch.kvm_ops == kvmppc_hv_ops;
 }
 
+extern int kvmppc_hwrng_present(void);
+
 /*
  * Cuts out inst bits with ordering according to spec.
  * That means the leftmost bit is zero. All given bits are included.
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 1f083ff8a61a..1954a1c4b1f9 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -21,6 +21,7 @@
 #include <asm/cputable.h>
 #include <asm/kvm_ppc.h>
 #include <asm/kvm_book3s.h>
+#include <asm/archrandom.h>
 
 #define KVM_CMA_CHUNK_ORDER	18
 
@@ -169,3 +170,17 @@ int kvmppc_hcall_impl_hv_realmode(unsigned long cmd)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode);
+
+int kvmppc_hwrng_present(void)
+{
+	return powernv_hwrng_present();
+}
+EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
+
+long kvmppc_h_random(struct kvm_vcpu *vcpu)
+{
+	if (powernv_get_random_real_mode(&vcpu->arch.gpr[4]))
+		return H_SUCCESS;
+
+	return H_HARDWARE;
+}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 6cbf1630cb70..0814ca122fcd 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1839,6 +1839,121 @@ hcall_real_table:
 	.long	0		/* 0x12c */
 	.long	0		/* 0x130 */
 	.long	DOTSYM(kvmppc_h_set_xdabr) - hcall_real_table
+	.long	0		/* 0x138 */
+	.long	0		/* 0x13c */
+	.long	0		/* 0x140 */
+	.long	0		/* 0x144 */
+	.long	0		/* 0x148 */
+	.long	0		/* 0x14c */
+	.long	0		/* 0x150 */
+	.long	0		/* 0x154 */
+	.long	0		/* 0x158 */
+	.long	0		/* 0x15c */
+	.long	0		/* 0x160 */
+	.long	0		/* 0x164 */
+	.long	0		/* 0x168 */
+	.long	0		/* 0x16c */
+	.long	0		/* 0x170 */
+	.long	0		/* 0x174 */
+	.long	0		/* 0x178 */
+	.long	0		/* 0x17c */
+	.long	0		/* 0x180 */
+	.long	0		/* 0x184 */
+	.long	0		/* 0x188 */
+	.long	0		/* 0x18c */
+	.long	0		/* 0x190 */
+	.long	0		/* 0x194 */
+	.long	0		/* 0x198 */
+	.long	0		/* 0x19c */
+	.long	0		/* 0x1a0 */
+	.long	0		/* 0x1a4 */
+	.long	0		/* 0x1a8 */
+	.long	0		/* 0x1ac */
+	.long	0		/* 0x1b0 */
+	.long	0		/* 0x1b4 */
+	.long	0		/* 0x1b8 */
+	.long	0		/* 0x1bc */
+	.long	0		/* 0x1c0 */
+	.long	0		/* 0x1c4 */
+	.long	0		/* 0x1c8 */
+	.long	0		/* 0x1cc */
+	.long	0		/* 0x1d0 */
+	.long	0		/* 0x1d4 */
+	.long	0		/* 0x1d8 */
+	.long	0		/* 0x1dc */
+	.long	0		/* 0x1e0 */
+	.long	0		/* 0x1e4 */
+	.long	0		/* 0x1e8 */
+	.long	0		/* 0x1ec */
+	.long	0		/* 0x1f0 */
+	.long	0		/* 0x1f4 */
+	.long	0		/* 0x1f8 */
+	.long	0		/* 0x1fc */
+	.long	0		/* 0x200 */
+	.long	0		/* 0x204 */
+	.long	0		/* 0x208 */
+	.long	0		/* 0x20c */
+	.long	0		/* 0x210 */
+	.long	0		/* 0x214 */
+	.long	0		/* 0x218 */
+	.long	0		/* 0x21c */
+	.long	0		/* 0x220 */
+	.long	0		/* 0x224 */
+	.long	0		/* 0x228 */
+	.long	0		/* 0x22c */
+	.long	0		/* 0x230 */
+	.long	0		/* 0x234 */
+	.long	0		/* 0x238 */
+	.long	0		/* 0x23c */
+	.long	0		/* 0x240 */
+	.long	0		/* 0x244 */
+	.long	0		/* 0x248 */
+	.long	0		/* 0x24c */
+	.long	0		/* 0x250 */
+	.long	0		/* 0x254 */
+	.long	0		/* 0x258 */
+	.long	0		/* 0x25c */
+	.long	0		/* 0x260 */
+	.long	0		/* 0x264 */
+	.long	0		/* 0x268 */
+	.long	0		/* 0x26c */
+	.long	0		/* 0x270 */
+	.long	0		/* 0x274 */
+	.long	0		/* 0x278 */
+	.long	0		/* 0x27c */
+	.long	0		/* 0x280 */
+	.long	0		/* 0x284 */
+	.long	0		/* 0x288 */
+	.long	0		/* 0x28c */
+	.long	0		/* 0x290 */
+	.long	0		/* 0x294 */
+	.long	0		/* 0x298 */
+	.long	0		/* 0x29c */
+	.long	0		/* 0x2a0 */
+	.long	0		/* 0x2a4 */
+	.long	0		/* 0x2a8 */
+	.long	0		/* 0x2ac */
+	.long	0		/* 0x2b0 */
+	.long	0		/* 0x2b4 */
+	.long	0		/* 0x2b8 */
+	.long	0		/* 0x2bc */
+	.long	0		/* 0x2c0 */
+	.long	0		/* 0x2c4 */
+	.long	0		/* 0x2c8 */
+	.long	0		/* 0x2cc */
+	.long	0		/* 0x2d0 */
+	.long	0		/* 0x2d4 */
+	.long	0		/* 0x2d8 */
+	.long	0		/* 0x2dc */
+	.long	0		/* 0x2e0 */
+	.long	0		/* 0x2e4 */
+	.long	0		/* 0x2e8 */
+	.long	0		/* 0x2ec */
+	.long	0		/* 0x2f0 */
+	.long	0		/* 0x2f4 */
+	.long	0		/* 0x2f8 */
+	.long	0		/* 0x2fc */
+	.long	DOTSYM(kvmppc_h_random) - hcall_real_table
 	.globl	hcall_real_table_end
 hcall_real_table_end:
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 24bfe401373e..55a4763d6d11 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -529,6 +529,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_PPC_RMA:
 		r = 0;
 		break;
+	case KVM_CAP_PPC_HWRNG:
+		r = kvmppc_hwrng_present();
+		break;
 #endif
 	case KVM_CAP_SYNC_MMU:
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
index 80db43944afe..6eb808ff637e 100644
--- a/arch/powerpc/platforms/powernv/rng.c
+++ b/arch/powerpc/platforms/powernv/rng.c
@@ -24,12 +24,22 @@
 
 struct powernv_rng {
 	void __iomem *regs;
+	void __iomem *regs_real;
 	unsigned long mask;
 };
 
 static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng);
 
 
+int powernv_hwrng_present(void)
+{
+	struct powernv_rng *rng;
+
+	rng = get_cpu_var(powernv_rng);
+	put_cpu_var(rng);
+	return rng != NULL;
+}
+
 static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
 {
 	unsigned long parity;
@@ -46,6 +56,17 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
 	return val;
 }
 
+int powernv_get_random_real_mode(unsigned long *v)
+{
+	struct powernv_rng *rng;
+
+	rng = raw_cpu_read(powernv_rng);
+
+	*v = rng_whiten(rng, in_rm64(rng->regs_real));
+
+	return 1;
+}
+
 int powernv_get_random_long(unsigned long *v)
 {
 	struct powernv_rng *rng;
@@ -80,12 +101,20 @@ static __init void rng_init_per_cpu(struct powernv_rng *rng,
 static __init int rng_create(struct device_node *dn)
 {
 	struct powernv_rng *rng;
+	struct resource res;
 	unsigned long val;
 
 	rng = kzalloc(sizeof(*rng), GFP_KERNEL);
 	if (!rng)
 		return -ENOMEM;
 
+	if (of_address_to_resource(dn, 0, &res)) {
+		kfree(rng);
+		return -ENXIO;
+	}
+
+	rng->regs_real = (void __iomem *)res.start;
+
 	rng->regs = of_iomap(dn, 0);
 	if (!rng->regs) {
 		kfree(rng);
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index f574d7be7631..4b60056776d1 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -813,6 +813,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_MIPS_MSA 112
 #define KVM_CAP_S390_INJECT_IRQ 113
 #define KVM_CAP_S390_IRQ_STATE 114
+#define KVM_CAP_PPC_HWRNG 115
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit 


From 135dd002c23054aaa056ea3162c1e0356905c195 Mon Sep 17 00:00:00 2001
From: Mark Salter <msalter@redhat.com>
Date: Mon, 6 Apr 2015 09:46:00 -0400
Subject: nfsd: eliminate NFSD_DEBUG

Commit f895b252d4edf ("sunrpc: eliminate RPC_DEBUG") introduced
use of IS_ENABLED() in a uapi header which leads to a build
failure for userspace apps trying to use <linux/nfsd/debug.h>:

   linux/nfsd/debug.h:18:15: error: missing binary operator before token "("
  #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
                ^

Since this was only used to define NFSD_DEBUG if CONFIG_SUNRPC_DEBUG
is enabled, replace instances of NFSD_DEBUG with CONFIG_SUNRPC_DEBUG.

Cc: stable@vger.kernel.org
Fixes: f895b252d4edf "sunrpc: eliminate RPC_DEBUG"
Signed-off-by: Mark Salter <msalter@redhat.com>
Reviewed-by: Jeff Layton <jlayton@primarydata.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/lockd/svcsubs.c              | 2 +-
 fs/nfsd/nfs4state.c             | 2 +-
 fs/nfsd/nfsd.h                  | 2 +-
 include/uapi/linux/nfsd/debug.h | 8 --------
 4 files changed, 3 insertions(+), 11 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 665ef5a05183..a563ddbc19e6 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -31,7 +31,7 @@
 static struct hlist_head	nlm_files[FILE_NRHASH];
 static DEFINE_MUTEX(nlm_file_mutex);
 
-#ifdef NFSD_DEBUG
+#ifdef CONFIG_SUNRPC_DEBUG
 static inline void nlm_debug_print_fh(char *msg, struct nfs_fh *f)
 {
 	u32 *fhp = (u32*)f->data;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index fd875b6107ba..693495551bb2 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1139,7 +1139,7 @@ hash_sessionid(struct nfs4_sessionid *sessionid)
 	return sid->sequence % SESSION_HASH_SIZE;
 }
 
-#ifdef NFSD_DEBUG
+#ifdef CONFIG_SUNRPC_DEBUG
 static inline void
 dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid)
 {
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 565c4da1a9eb..cf980523898b 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -24,7 +24,7 @@
 #include "export.h"
 
 #undef ifdebug
-#ifdef NFSD_DEBUG
+#ifdef CONFIG_SUNRPC_DEBUG
 # define ifdebug(flag)		if (nfsd_debug & NFSDDBG_##flag)
 #else
 # define ifdebug(flag)		if (0)
diff --git a/include/uapi/linux/nfsd/debug.h b/include/uapi/linux/nfsd/debug.h
index 0bf130a1c58d..28ec6c9c421a 100644
--- a/include/uapi/linux/nfsd/debug.h
+++ b/include/uapi/linux/nfsd/debug.h
@@ -11,14 +11,6 @@
 
 #include <linux/sunrpc/debug.h>
 
-/*
- * Enable debugging for nfsd.
- * Requires RPC_DEBUG.
- */
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define NFSD_DEBUG		1
-#endif
-
 /*
  * knfsd debug flags
  */
-- 
cgit 


From bff175238a2416110e2258989c462234baeb5f46 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <andreas.gruenbacher@gmail.com>
Date: Fri, 27 Mar 2015 17:50:00 +0100
Subject: uapi: Remove kernel internal declaration

The enum nfs4_acl_whotype is only used in nfs4d's internal nfs4 acl
representation. No longer expose it to user space.

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/nfs4.h      | 7 +++++++
 include/uapi/linux/nfs4.h | 7 -------
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index ed43cb74b11d..32201c269890 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -16,6 +16,13 @@
 #include <linux/uidgid.h>
 #include <uapi/linux/nfs4.h>
 
+enum nfs4_acl_whotype {
+	NFS4_ACL_WHO_NAMED = 0,
+	NFS4_ACL_WHO_OWNER,
+	NFS4_ACL_WHO_GROUP,
+	NFS4_ACL_WHO_EVERYONE,
+};
+
 struct nfs4_ace {
 	uint32_t	type;
 	uint32_t	flag;
diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h
index 35f5f4c6c260..adc0aff83fbb 100644
--- a/include/uapi/linux/nfs4.h
+++ b/include/uapi/linux/nfs4.h
@@ -162,13 +162,6 @@
  */
 #define NFS4_MAX_BACK_CHANNEL_OPS 2
 
-enum nfs4_acl_whotype {
-	NFS4_ACL_WHO_NAMED = 0,
-	NFS4_ACL_WHO_OWNER,
-	NFS4_ACL_WHO_GROUP,
-	NFS4_ACL_WHO_EVERYONE,
-};
-
 #endif /* _UAPI_LINUX_NFS4_H */
 
 /*
-- 
cgit 


From 40c64c26a43494ba9982fd1b87dc54e3819566fc Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Wed, 15 Apr 2015 13:00:05 -0400
Subject: NFS: Move nfs_idmap.h into fs/nfs/

This file is only used internally to the NFS v4 module, so it doesn't
need to be in the global include path.  I also renamed it from
nfs_idmap.h to nfs4idmap.h to emphasize that it's an NFSv4-only include
file.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/flexfilelayout/flexfilelayout.c |  2 +-
 fs/nfs/idmap.c                         |  2 +-
 fs/nfs/nfs4client.c                    |  2 +-
 fs/nfs/nfs4idmap.h                     | 68 ++++++++++++++++++++++++++++++++++
 fs/nfs/nfs4proc.c                      |  2 +-
 fs/nfs/nfs4state.c                     |  2 +-
 fs/nfs/nfs4super.c                     |  2 +-
 fs/nfs/nfs4sysctl.c                    |  2 +-
 fs/nfs/nfs4xdr.c                       |  2 +-
 include/linux/nfs_idmap.h              | 68 ----------------------------------
 include/uapi/linux/nfs_idmap.h         |  2 +-
 11 files changed, 77 insertions(+), 77 deletions(-)
 create mode 100644 fs/nfs/nfs4idmap.h
 delete mode 100644 include/linux/nfs_idmap.h

(limited to 'include/uapi/linux')

diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index f3ff66e4fb6a..7d05089e52d6 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -11,10 +11,10 @@
 #include <linux/module.h>
 
 #include <linux/sunrpc/metrics.h>
-#include <linux/nfs_idmap.h>
 
 #include "flexfilelayout.h"
 #include "../nfs4session.h"
+#include "../nfs4idmap.h"
 #include "../internal.h"
 #include "../delegation.h"
 #include "../nfs4trace.h"
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 857e2a99acc8..2e1737c40a29 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -36,7 +36,6 @@
 #include <linux/types.h>
 #include <linux/parser.h>
 #include <linux/fs.h>
-#include <linux/nfs_idmap.h>
 #include <net/net_namespace.h>
 #include <linux/sunrpc/rpc_pipe_fs.h>
 #include <linux/nfs_fs.h>
@@ -49,6 +48,7 @@
 
 #include "internal.h"
 #include "netns.h"
+#include "nfs4idmap.h"
 #include "nfs4trace.h"
 
 #define NFS_UINT_MAXLEN 11
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 86d6214ea022..43ca5a7ac1da 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -4,7 +4,6 @@
  */
 #include <linux/module.h>
 #include <linux/nfs_fs.h>
-#include <linux/nfs_idmap.h>
 #include <linux/nfs_mount.h>
 #include <linux/sunrpc/addr.h>
 #include <linux/sunrpc/auth.h>
@@ -15,6 +14,7 @@
 #include "callback.h"
 #include "delegation.h"
 #include "nfs4session.h"
+#include "nfs4idmap.h"
 #include "pnfs.h"
 #include "netns.h"
 
diff --git a/fs/nfs/nfs4idmap.h b/fs/nfs/nfs4idmap.h
new file mode 100644
index 000000000000..de44d7330ab3
--- /dev/null
+++ b/fs/nfs/nfs4idmap.h
@@ -0,0 +1,68 @@
+/*
+ * fs/nfs/nfs4idmap.h
+ *
+ *  UID and GID to name mapping for clients.
+ *
+ *  Copyright (c) 2002 The Regents of the University of Michigan.
+ *  All rights reserved.
+ *
+ *  Marius Aamodt Eriksen <marius@umich.edu>
+ *
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions
+ *  are met:
+ *
+ *  1. Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *  2. Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *  3. Neither the name of the University nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef NFS_IDMAP_H
+#define NFS_IDMAP_H
+
+#include <linux/uidgid.h>
+#include <uapi/linux/nfs_idmap.h>
+
+
+/* Forward declaration to make this header independent of others */
+struct nfs_client;
+struct nfs_server;
+struct nfs_fattr;
+struct nfs4_string;
+
+int nfs_idmap_init(void);
+void nfs_idmap_quit(void);
+int nfs_idmap_new(struct nfs_client *);
+void nfs_idmap_delete(struct nfs_client *);
+
+void nfs_fattr_init_names(struct nfs_fattr *fattr,
+		struct nfs4_string *owner_name,
+		struct nfs4_string *group_name);
+void nfs_fattr_free_names(struct nfs_fattr *);
+void nfs_fattr_map_and_free_names(struct nfs_server *, struct nfs_fattr *);
+
+int nfs_map_name_to_uid(const struct nfs_server *, const char *, size_t, kuid_t *);
+int nfs_map_group_to_gid(const struct nfs_server *, const char *, size_t, kgid_t *);
+int nfs_map_uid_to_name(const struct nfs_server *, kuid_t, char *, size_t);
+int nfs_map_gid_to_group(const struct nfs_server *, kgid_t, char *, size_t);
+
+int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res);
+
+extern unsigned int nfs_idmap_cache_timeout;
+#endif /* NFS_IDMAP_H */
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 6d48f37cfe8a..60396330044f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -51,7 +51,6 @@
 #include <linux/namei.h>
 #include <linux/mount.h>
 #include <linux/module.h>
-#include <linux/nfs_idmap.h>
 #include <linux/xattr.h>
 #include <linux/utsname.h>
 #include <linux/freezer.h>
@@ -63,6 +62,7 @@
 #include "callback.h"
 #include "pnfs.h"
 #include "netns.h"
+#include "nfs4idmap.h"
 #include "nfs4session.h"
 #include "fscache.h"
 
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index f95e3b58bbc3..935a6ffe8643 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -42,7 +42,6 @@
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/nfs_fs.h>
-#include <linux/nfs_idmap.h>
 #include <linux/kthread.h>
 #include <linux/module.h>
 #include <linux/random.h>
@@ -57,6 +56,7 @@
 #include "callback.h"
 #include "delegation.h"
 #include "internal.h"
+#include "nfs4idmap.h"
 #include "nfs4session.h"
 #include "pnfs.h"
 #include "netns.h"
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c
index d91898193b2f..6fb7cb6b3f4b 100644
--- a/fs/nfs/nfs4super.c
+++ b/fs/nfs/nfs4super.c
@@ -3,12 +3,12 @@
  */
 #include <linux/init.h>
 #include <linux/module.h>
-#include <linux/nfs_idmap.h>
 #include <linux/nfs4_mount.h>
 #include <linux/nfs_fs.h>
 #include "delegation.h"
 #include "internal.h"
 #include "nfs4_fs.h"
+#include "nfs4idmap.h"
 #include "dns_resolve.h"
 #include "pnfs.h"
 #include "nfs.h"
diff --git a/fs/nfs/nfs4sysctl.c b/fs/nfs/nfs4sysctl.c
index b6ebe7e445f6..0fbd3ab1be22 100644
--- a/fs/nfs/nfs4sysctl.c
+++ b/fs/nfs/nfs4sysctl.c
@@ -6,10 +6,10 @@
  * Copyright (c) 2006 Trond Myklebust <Trond.Myklebust@netapp.com>
  */
 #include <linux/sysctl.h>
-#include <linux/nfs_idmap.h>
 #include <linux/nfs_fs.h>
 
 #include "nfs4_fs.h"
+#include "nfs4idmap.h"
 #include "callback.h"
 
 static const int nfs_set_port_min = 0;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index a26880cb8ce8..0aea97841d30 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -52,10 +52,10 @@
 #include <linux/nfs.h>
 #include <linux/nfs4.h>
 #include <linux/nfs_fs.h>
-#include <linux/nfs_idmap.h>
 
 #include "nfs4_fs.h"
 #include "internal.h"
+#include "nfs4idmap.h"
 #include "nfs4session.h"
 #include "pnfs.h"
 #include "netns.h"
diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h
deleted file mode 100644
index daaf3ea5e9d8..000000000000
--- a/include/linux/nfs_idmap.h
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * include/linux/nfs_idmap.h
- *
- *  UID and GID to name mapping for clients.
- *
- *  Copyright (c) 2002 The Regents of the University of Michigan.
- *  All rights reserved.
- *
- *  Marius Aamodt Eriksen <marius@umich.edu>
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions
- *  are met:
- *
- *  1. Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *  2. Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in the
- *     documentation and/or other materials provided with the distribution.
- *  3. Neither the name of the University nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- *  WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- *  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- *  DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- *  CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- *  SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- *  BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- *  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- *  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- *  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#ifndef NFS_IDMAP_H
-#define NFS_IDMAP_H
-
-#include <linux/uidgid.h>
-#include <uapi/linux/nfs_idmap.h>
-
-
-/* Forward declaration to make this header independent of others */
-struct nfs_client;
-struct nfs_server;
-struct nfs_fattr;
-struct nfs4_string;
-
-int nfs_idmap_init(void);
-void nfs_idmap_quit(void);
-int nfs_idmap_new(struct nfs_client *);
-void nfs_idmap_delete(struct nfs_client *);
-
-void nfs_fattr_init_names(struct nfs_fattr *fattr,
-		struct nfs4_string *owner_name,
-		struct nfs4_string *group_name);
-void nfs_fattr_free_names(struct nfs_fattr *);
-void nfs_fattr_map_and_free_names(struct nfs_server *, struct nfs_fattr *);
-
-int nfs_map_name_to_uid(const struct nfs_server *, const char *, size_t, kuid_t *);
-int nfs_map_group_to_gid(const struct nfs_server *, const char *, size_t, kgid_t *);
-int nfs_map_uid_to_name(const struct nfs_server *, kuid_t, char *, size_t);
-int nfs_map_gid_to_group(const struct nfs_server *, kgid_t, char *, size_t);
-
-int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res);
-
-extern unsigned int nfs_idmap_cache_timeout;
-#endif /* NFS_IDMAP_H */
diff --git a/include/uapi/linux/nfs_idmap.h b/include/uapi/linux/nfs_idmap.h
index 8d4b1c7b24d4..038e36c96669 100644
--- a/include/uapi/linux/nfs_idmap.h
+++ b/include/uapi/linux/nfs_idmap.h
@@ -1,5 +1,5 @@
 /*
- * include/linux/nfs_idmap.h
+ * include/uapi/linux/nfs_idmap.h
  *
  *  UID and GID to name mapping for clients.
  *
-- 
cgit 


From 0df48c26d8418c5c9fba63fac15b660d70ca2f1c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 28 Apr 2015 15:28:17 -0700
Subject: tcp: add tcpi_bytes_acked to tcp_info

This patch tracks total number of bytes acked for a TCP socket.
This is the sum of all changes done to tp->snd_una, and allows
for precise tracking of delivered data.

RFC4898 named this : tcpEStatsAppHCThruOctetsAcked

This is a 64bit field, and can be fetched both from TCP_INFO
getsockopt() if one has a handle on a TCP socket, or from inet_diag
netlink facility (iproute2/ss patch will follow)

Note that tp->bytes_acked was placed near tp->snd_una for
best data locality and minimal performance impact.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Cc: Matt Mathis <mattmathis@google.com>
Cc: Eric Salo <salo@google.com>
Cc: Martin Lau <kafai@fb.com>
Cc: Chris Rapier <rapier@psc.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      |  4 ++++
 include/net/tcp.h        |  2 +-
 include/uapi/linux/tcp.h |  1 +
 net/ipv4/tcp.c           |  6 +++++-
 net/ipv4/tcp_input.c     | 13 +++++++++++--
 5 files changed, 22 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 0caa3a2d4106..0f73b43171da 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -150,6 +150,10 @@ struct tcp_sock {
 	u32	rcv_wup;	/* rcv_nxt on last window update sent	*/
  	u32	snd_nxt;	/* Next sequence we send		*/
 
+	u64	bytes_acked;	/* RFC4898 tcpEStatsAppHCThruOctetsAcked
+				 * sum(delta(snd_una)), or how many bytes
+				 * were acked.
+				 */
  	u32	snd_una;	/* First byte we want an ack for	*/
  	u32	snd_sml;	/* Last byte of the most recently transmitted small packet */
 	u32	rcv_tstamp;	/* timestamp of last received ACK (for keepalives) */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 051dc5c2802d..dd7b4ea6a10c 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -576,7 +576,7 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize)
 }
 
 /* tcp.c */
-void tcp_get_info(const struct sock *, struct tcp_info *);
+void tcp_get_info(struct sock *, struct tcp_info *);
 
 /* Read 'sendfile()'-style from a TCP socket */
 typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 3b9718328d8b..6666e98a0af9 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -189,6 +189,7 @@ struct tcp_info {
 
 	__u64	tcpi_pacing_rate;
 	__u64	tcpi_max_pacing_rate;
+	__u64	tcpi_bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked */
 };
 
 /* for TCP_MD5SIG socket option */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 8c5cd9efebbc..4bf0e8ca7b5b 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2592,7 +2592,7 @@ EXPORT_SYMBOL(compat_tcp_setsockopt);
 #endif
 
 /* Return information about state of tcp endpoint in API format. */
-void tcp_get_info(const struct sock *sk, struct tcp_info *info)
+void tcp_get_info(struct sock *sk, struct tcp_info *info)
 {
 	const struct tcp_sock *tp = tcp_sk(sk);
 	const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -2663,6 +2663,10 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info)
 
 	rate = READ_ONCE(sk->sk_max_pacing_rate);
 	info->tcpi_max_pacing_rate = rate != ~0U ? rate : ~0ULL;
+
+	spin_lock_bh(&sk->sk_lock.slock);
+	info->tcpi_bytes_acked = tp->bytes_acked;
+	spin_unlock_bh(&sk->sk_lock.slock);
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 3a4d9b34bed4..378d3f4d4dc3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3280,6 +3280,15 @@ static inline bool tcp_may_update_window(const struct tcp_sock *tp,
 		(ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd);
 }
 
+/* If we update tp->snd_una, also update tp->bytes_acked */
+static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
+{
+	u32 delta = ack - tp->snd_una;
+
+	tp->bytes_acked += delta;
+	tp->snd_una = ack;
+}
+
 /* Update our send window.
  *
  * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2
@@ -3315,7 +3324,7 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
 		}
 	}
 
-	tp->snd_una = ack;
+	tcp_snd_una_update(tp, ack);
 
 	return flag;
 }
@@ -3497,7 +3506,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 		 * Note, we use the fact that SND.UNA>=SND.WL2.
 		 */
 		tcp_update_wl(tp, ack_seq);
-		tp->snd_una = ack;
+		tcp_snd_una_update(tp, ack);
 		flag |= FLAG_WIN_UPDATE;
 
 		tcp_in_ack_event(sk, CA_ACK_WIN_UPDATE);
-- 
cgit 


From bdd1f9edacb5f5835d1e6276571bbbe5b88ded48 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 28 Apr 2015 15:28:18 -0700
Subject: tcp: add tcpi_bytes_received to tcp_info

This patch tracks total number of payload bytes received on a TCP socket.
This is the sum of all changes done to tp->rcv_nxt

RFC4898 named this : tcpEStatsAppHCThruOctetsReceived

This is a 64bit field, and can be fetched both from TCP_INFO
getsockopt() if one has a handle on a TCP socket, or from inet_diag
netlink facility (iproute2/ss patch will follow)

Note that tp->bytes_received was placed near tp->rcv_nxt for
best data locality and minimal performance impact.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Matt Mathis <mattmathis@google.com>
Cc: Eric Salo <salo@google.com>
Cc: Martin Lau <kafai@fb.com>
Cc: Chris Rapier <rapier@psc.edu>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      |  4 ++++
 include/uapi/linux/tcp.h |  1 +
 net/ipv4/tcp.c           |  1 +
 net/ipv4/tcp_fastopen.c  |  1 +
 net/ipv4/tcp_input.c     | 17 +++++++++++++----
 5 files changed, 20 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 0f73b43171da..3b2911502a8c 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -145,6 +145,10 @@ struct tcp_sock {
  *	read the code and the spec side by side (and laugh ...)
  *	See RFC793 and RFC1122. The RFC writes these in capitals.
  */
+	u64	bytes_received;	/* RFC4898 tcpEStatsAppHCThruOctetsReceived
+				 * sum(delta(rcv_nxt)), or how many bytes
+				 * were acked.
+				 */
  	u32	rcv_nxt;	/* What we want to receive next 	*/
 	u32	copied_seq;	/* Head of yet unread data		*/
 	u32	rcv_wup;	/* rcv_nxt on last window update sent	*/
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 6666e98a0af9..a48f93f3207b 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -190,6 +190,7 @@ struct tcp_info {
 	__u64	tcpi_pacing_rate;
 	__u64	tcpi_max_pacing_rate;
 	__u64	tcpi_bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked */
+	__u64	tcpi_bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived */
 };
 
 /* for TCP_MD5SIG socket option */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4bf0e8ca7b5b..99fcc0b22c92 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2666,6 +2666,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 
 	spin_lock_bh(&sk->sk_lock.slock);
 	info->tcpi_bytes_acked = tp->bytes_acked;
+	info->tcpi_bytes_received = tp->bytes_received;
 	spin_unlock_bh(&sk->sk_lock.slock);
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index e3d87aca6be8..3c673d5e6cff 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -206,6 +206,7 @@ static bool tcp_fastopen_create_child(struct sock *sk,
 			skb_set_owner_r(skb2, child);
 			__skb_queue_tail(&child->sk_receive_queue, skb2);
 			tp->syn_data_acked = 1;
+			tp->bytes_received = end_seq - TCP_SKB_CB(skb)->seq - 1;
 		} else {
 			end_seq = TCP_SKB_CB(skb)->seq + 1;
 		}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 378d3f4d4dc3..7e6962bcfc30 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3289,6 +3289,15 @@ static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack)
 	tp->snd_una = ack;
 }
 
+/* If we update tp->rcv_nxt, also update tp->bytes_received */
+static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq)
+{
+	u32 delta = seq - tp->rcv_nxt;
+
+	tp->bytes_received += delta;
+	tp->rcv_nxt = seq;
+}
+
 /* Update our send window.
  *
  * Window update algorithm, described in RFC793/RFC1122 (used in linux-2.2
@@ -4245,7 +4254,7 @@ static void tcp_ofo_queue(struct sock *sk)
 
 		tail = skb_peek_tail(&sk->sk_receive_queue);
 		eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen);
-		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+		tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
 		if (!eaten)
 			__skb_queue_tail(&sk->sk_receive_queue, skb);
 		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
@@ -4413,7 +4422,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int
 	__skb_pull(skb, hdrlen);
 	eaten = (tail &&
 		 tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0;
-	tcp_sk(sk)->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+	tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
 	if (!eaten) {
 		__skb_queue_tail(&sk->sk_receive_queue, skb);
 		skb_set_owner_r(skb, sk);
@@ -4506,7 +4515,7 @@ queue_and_out:
 
 			eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen);
 		}
-		tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+		tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
 		if (skb->len)
 			tcp_event_data_recv(sk, skb);
 		if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN)
@@ -5254,7 +5263,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 					tcp_rcv_rtt_measure_ts(sk, skb);
 
 					__skb_pull(skb, tcp_header_len);
-					tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+					tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
 					NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITSTOUSER);
 					eaten = 1;
 				}
-- 
cgit 


From 64f40ff5bbdb1b679fb3c4dbc8230d6517d2b8dc Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 28 Apr 2015 16:23:48 -0700
Subject: tcp: prepare CC get_info() access from getsockopt()

We would like that optional info provided by Congestion Control
modules using netlink can also be read using getsockopt()

This patch changes get_info() to put this information in a buffer,
instead of skb, like tcp_get_info(), so that following patch
can reuse this common infrastructure.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h              |  5 ++++-
 include/uapi/linux/inet_diag.h |  4 ++++
 net/ipv4/inet_diag.c           |  8 +++++---
 net/ipv4/tcp_dctcp.c           | 20 ++++++++++----------
 net/ipv4/tcp_illinois.c        | 21 +++++++++++----------
 net/ipv4/tcp_vegas.c           | 19 ++++++++++---------
 net/ipv4/tcp_vegas.h           |  3 ++-
 7 files changed, 46 insertions(+), 34 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index dd7b4ea6a10c..6d204f3f9df8 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -804,6 +804,8 @@ enum tcp_ca_ack_event_flags {
 /* Requires ECN/ECT set on all packets */
 #define TCP_CONG_NEEDS_ECN	0x2
 
+union tcp_cc_info;
+
 struct tcp_congestion_ops {
 	struct list_head	list;
 	u32 key;
@@ -829,7 +831,8 @@ struct tcp_congestion_ops {
 	/* hook for packet ack accounting (optional) */
 	void (*pkts_acked)(struct sock *sk, u32 num_acked, s32 rtt_us);
 	/* get info for inet_diag (optional) */
-	int (*get_info)(struct sock *sk, u32 ext, struct sk_buff *skb);
+	size_t (*get_info)(struct sock *sk, u32 ext, int *attr,
+			   union tcp_cc_info *info);
 
 	char 		name[TCP_CA_NAME_MAX];
 	struct module 	*owner;
diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
index d65c0a09efd3..c7093c75bdd6 100644
--- a/include/uapi/linux/inet_diag.h
+++ b/include/uapi/linux/inet_diag.h
@@ -143,4 +143,8 @@ struct tcp_dctcp_info {
 	__u32	dctcp_ab_tot;
 };
 
+union tcp_cc_info {
+	struct tcpvegas_info	vegas;
+	struct tcp_dctcp_info	dctcp;
+};
 #endif /* _UAPI_INET_DIAG_H_ */
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index bb77ebdae3b3..4d32262c7502 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -224,14 +224,16 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 	handler->idiag_get_info(sk, r, info);
 
 	if (sk->sk_state < TCP_TIME_WAIT) {
-		int err = 0;
+		union tcp_cc_info info;
+		size_t sz = 0;
+		int attr;
 
 		rcu_read_lock();
 		ca_ops = READ_ONCE(icsk->icsk_ca_ops);
 		if (ca_ops && ca_ops->get_info)
-			err = ca_ops->get_info(sk, ext, skb);
+			sz = ca_ops->get_info(sk, ext, &attr, &info);
 		rcu_read_unlock();
-		if (err < 0)
+		if (sz && nla_put(skb, attr, sz, &info) < 0)
 			goto errout;
 	}
 
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index 4376016f7fa5..4c41c1287197 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -277,7 +277,8 @@ static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
 	}
 }
 
-static int dctcp_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
+static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
+			     union tcp_cc_info *info)
 {
 	const struct dctcp *ca = inet_csk_ca(sk);
 
@@ -286,18 +287,17 @@ static int dctcp_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
 	 */
 	if (ext & (1 << (INET_DIAG_DCTCPINFO - 1)) ||
 	    ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
-		struct tcp_dctcp_info info;
-
-		memset(&info, 0, sizeof(info));
+		memset(info, 0, sizeof(struct tcp_dctcp_info));
 		if (inet_csk(sk)->icsk_ca_ops != &dctcp_reno) {
-			info.dctcp_enabled = 1;
-			info.dctcp_ce_state = (u16) ca->ce_state;
-			info.dctcp_alpha = ca->dctcp_alpha;
-			info.dctcp_ab_ecn = ca->acked_bytes_ecn;
-			info.dctcp_ab_tot = ca->acked_bytes_total;
+			info->dctcp.dctcp_enabled = 1;
+			info->dctcp.dctcp_ce_state = (u16) ca->ce_state;
+			info->dctcp.dctcp_alpha = ca->dctcp_alpha;
+			info->dctcp.dctcp_ab_ecn = ca->acked_bytes_ecn;
+			info->dctcp.dctcp_ab_tot = ca->acked_bytes_total;
 		}
 
-		return nla_put(skb, INET_DIAG_DCTCPINFO, sizeof(info), &info);
+		*attr = INET_DIAG_DCTCPINFO;
+		return sizeof(*info);
 	}
 	return 0;
 }
diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c
index 67476f085e48..f71002e4db0b 100644
--- a/net/ipv4/tcp_illinois.c
+++ b/net/ipv4/tcp_illinois.c
@@ -300,24 +300,25 @@ static u32 tcp_illinois_ssthresh(struct sock *sk)
 }
 
 /* Extract info for Tcp socket info provided via netlink. */
-static int tcp_illinois_info(struct sock *sk, u32 ext, struct sk_buff *skb)
+static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr,
+				union tcp_cc_info *info)
 {
 	const struct illinois *ca = inet_csk_ca(sk);
 
 	if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
-		struct tcpvegas_info info = {
-			.tcpv_enabled = 1,
-			.tcpv_rttcnt = ca->cnt_rtt,
-			.tcpv_minrtt = ca->base_rtt,
-		};
+		info->vegas.tcpv_enabled = 1;
+		info->vegas.tcpv_rttcnt = ca->cnt_rtt;
+		info->vegas.tcpv_minrtt = ca->base_rtt;
+		info->vegas.tcpv_rtt = 0;
 
-		if (info.tcpv_rttcnt > 0) {
+		if (info->vegas.tcpv_rttcnt > 0) {
 			u64 t = ca->sum_rtt;
 
-			do_div(t, info.tcpv_rttcnt);
-			info.tcpv_rtt = t;
+			do_div(t, info->vegas.tcpv_rttcnt);
+			info->vegas.tcpv_rtt = t;
 		}
-		return nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
+		*attr = INET_DIAG_VEGASINFO;
+		return sizeof(struct tcpvegas_info);
 	}
 	return 0;
 }
diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c
index c71a1b8f7bde..a6cea1d5e20d 100644
--- a/net/ipv4/tcp_vegas.c
+++ b/net/ipv4/tcp_vegas.c
@@ -286,18 +286,19 @@ static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked)
 }
 
 /* Extract info for Tcp socket info provided via netlink. */
-int tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb)
+size_t tcp_vegas_get_info(struct sock *sk, u32 ext, int *attr,
+			  union tcp_cc_info *info)
 {
 	const struct vegas *ca = inet_csk_ca(sk);
+
 	if (ext & (1 << (INET_DIAG_VEGASINFO - 1))) {
-		struct tcpvegas_info info = {
-			.tcpv_enabled = ca->doing_vegas_now,
-			.tcpv_rttcnt = ca->cntRTT,
-			.tcpv_rtt = ca->baseRTT,
-			.tcpv_minrtt = ca->minRTT,
-		};
-
-		return nla_put(skb, INET_DIAG_VEGASINFO, sizeof(info), &info);
+		info->vegas.tcpv_enabled = ca->doing_vegas_now,
+		info->vegas.tcpv_rttcnt = ca->cntRTT,
+		info->vegas.tcpv_rtt = ca->baseRTT,
+		info->vegas.tcpv_minrtt = ca->minRTT,
+
+		*attr = INET_DIAG_VEGASINFO;
+		return sizeof(struct tcpvegas_info);
 	}
 	return 0;
 }
diff --git a/net/ipv4/tcp_vegas.h b/net/ipv4/tcp_vegas.h
index e8a6b33cc61d..ef9da5306c68 100644
--- a/net/ipv4/tcp_vegas.h
+++ b/net/ipv4/tcp_vegas.h
@@ -19,6 +19,7 @@ void tcp_vegas_init(struct sock *sk);
 void tcp_vegas_state(struct sock *sk, u8 ca_state);
 void tcp_vegas_pkts_acked(struct sock *sk, u32 cnt, s32 rtt_us);
 void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event);
-int tcp_vegas_get_info(struct sock *sk, u32 ext, struct sk_buff *skb);
+size_t tcp_vegas_get_info(struct sock *sk, u32 ext, int *attr,
+			  union tcp_cc_info *info);
 
 #endif	/* __TCP_VEGAS_H */
-- 
cgit 


From 6e9250f59ef9efb932c84850cd221f22c2a03c4a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 28 Apr 2015 16:23:49 -0700
Subject: tcp: add TCP_CC_INFO socket option

Some Congestion Control modules can provide per flow information,
but current way to get this information is to use netlink.

Like TCP_INFO, let's add TCP_CC_INFO so that applications can
issue a getsockopt() if they have a socket file descriptor,
instead of playing complex netlink games.

Sample usage would be :

  union tcp_cc_info info;
  socklen_t len = sizeof(info);

  if (getsockopt(fd, SOL_TCP, TCP_CC_INFO, &info, &len) == -1)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tcp.h |  1 +
 net/ipv4/tcp.c           | 21 +++++++++++++++++++++
 2 files changed, 22 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index a48f93f3207b..faa72f4fa547 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -112,6 +112,7 @@ enum {
 #define TCP_FASTOPEN		23	/* Enable FastOpen on listeners */
 #define TCP_TIMESTAMP		24
 #define TCP_NOTSENT_LOWAT	25	/* limit number of unsent bytes in write queue */
+#define TCP_CC_INFO		26	/* Get Congestion Control (optional) info */
 
 struct tcp_repair_opt {
 	__u32	opt_code;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 99fcc0b22c92..46efa03d2b11 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -252,6 +252,7 @@
 #include <linux/types.h>
 #include <linux/fcntl.h>
 #include <linux/poll.h>
+#include <linux/inet_diag.h>
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/skbuff.h>
@@ -2739,6 +2740,26 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 			return -EFAULT;
 		return 0;
 	}
+	case TCP_CC_INFO: {
+		const struct tcp_congestion_ops *ca_ops;
+		union tcp_cc_info info;
+		size_t sz = 0;
+		int attr;
+
+		if (get_user(len, optlen))
+			return -EFAULT;
+
+		ca_ops = icsk->icsk_ca_ops;
+		if (ca_ops && ca_ops->get_info)
+			sz = ca_ops->get_info(sk, ~0U, &attr, &info);
+
+		len = min_t(unsigned int, len, sz);
+		if (put_user(len, optlen))
+			return -EFAULT;
+		if (copy_to_user(optval, &info, len))
+			return -EFAULT;
+		return 0;
+	}
 	case TCP_QUICKACK:
 		val = !icsk->icsk_ack.pingpong;
 		break;
-- 
cgit 


From e412d3a32badcf17541d7443b033769fdf39b545 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Sat, 2 May 2015 08:42:29 +0930
Subject: virtio: fix typo in vring_need_event() doc comment

Here the "other side" refers to the guest or host.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/uapi/linux/virtio_ring.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/virtio_ring.h b/include/uapi/linux/virtio_ring.h
index a3318f31e8e7..915980ac68df 100644
--- a/include/uapi/linux/virtio_ring.h
+++ b/include/uapi/linux/virtio_ring.h
@@ -155,7 +155,7 @@ static inline unsigned vring_size(unsigned int num, unsigned long align)
 }
 
 /* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */
-/* Assuming a given event_idx value from the other size, if
+/* Assuming a given event_idx value from the other side, if
  * we have just incremented index from old to new_idx,
  * should we trigger an event? */
 static inline int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old)
-- 
cgit 


From 4749c3ef854e3a5d3dd3cc0ccd2dcb7e05d583bd Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 30 Apr 2015 12:12:00 +0200
Subject: net: sched: remove TC_MUNGED bits

Not used.

pedit sets TC_MUNGED when packet content was altered, but all the core
does is unset MUNGED again and then set OK2MUNGE.

And the latter isn't tested anywhere. So lets remove both
TC_MUNGED and TC_OK2MUNGE.

Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/tc-actions-env-rules.txt | 2 --
 include/net/sch_generic.h                         | 2 --
 include/uapi/linux/pkt_cls.h                      | 3 +++
 net/sched/act_api.c                               | 5 -----
 net/sched/act_pedit.c                             | 5 +----
 5 files changed, 4 insertions(+), 13 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/networking/tc-actions-env-rules.txt b/Documentation/networking/tc-actions-env-rules.txt
index 70d6cf608251..95c71716b2e2 100644
--- a/Documentation/networking/tc-actions-env-rules.txt
+++ b/Documentation/networking/tc-actions-env-rules.txt
@@ -14,8 +14,6 @@ resets them for you, so invoke skb_act_clone() rather than skb_clone().
 
 2) If you munge any packet thou shalt call pskb_expand_head in the case
 someone else is referencing the skb. After that you "own" the skb.
-You must also tell us if it is ok to munge the packet (TC_OK2MUNGE),
-this way any action downstream can stomp on the packet.
 
 3) Dropping packets you don't own is a no-no. You simply return
 TC_ACT_SHOT to the caller and they will drop it.
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 6d778efcfdfd..994b5a092f33 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -755,8 +755,6 @@ static inline struct sk_buff *skb_act_clone(struct sk_buff *skb, gfp_t gfp_mask,
 
 	if (n) {
 		n->tc_verd = SET_TC_VERD(n->tc_verd, 0);
-		n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd);
-		n->tc_verd = CLR_TC_MUNGED(n->tc_verd);
 	}
 	return n;
 }
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index bf08e76bf505..6810ca43a80a 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -35,6 +35,8 @@ bits 9,10,11: redirect counter -  redirect TTL. Loop avoidance
  *
  * */
 
+#ifndef __KERNEL__
+/* backwards compat for userspace only */
 #define TC_MUNGED          _TC_MAKEMASK1(0)
 #define SET_TC_MUNGED(v)   ( TC_MUNGED | (v & ~TC_MUNGED))
 #define CLR_TC_MUNGED(v)   ( v & ~TC_MUNGED)
@@ -42,6 +44,7 @@ bits 9,10,11: redirect counter -  redirect TTL. Loop avoidance
 #define TC_OK2MUNGE        _TC_MAKEMASK1(1)
 #define SET_TC_OK2MUNGE(v)   ( TC_OK2MUNGE | (v & ~TC_OK2MUNGE))
 #define CLR_TC_OK2MUNGE(v)   ( v & ~TC_OK2MUNGE)
+#endif
 
 #define S_TC_VERD          _TC_MAKE32(2)
 #define M_TC_VERD          _TC_MAKEMASK(4,S_TC_VERD)
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 3d43e4979f27..af427a3dbcba 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -392,11 +392,6 @@ int tcf_action_exec(struct sk_buff *skb, const struct list_head *actions,
 	list_for_each_entry(a, actions, list) {
 repeat:
 		ret = a->ops->act(skb, a, res);
-		if (TC_MUNGED & skb->tc_verd) {
-			/* copied already, allow trampling */
-			skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
-			skb->tc_verd = CLR_TC_MUNGED(skb->tc_verd);
-		}
 		if (ret == TC_ACT_REPEAT)
 			goto repeat;	/* we need a ttl - JHS */
 		if (ret != TC_ACT_PIPE)
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 59649d588d79..17e6d6669c7f 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -108,7 +108,7 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
 		     struct tcf_result *res)
 {
 	struct tcf_pedit *p = a->priv;
-	int i, munged = 0;
+	int i;
 	unsigned int off;
 
 	if (skb_unclone(skb, GFP_ATOMIC))
@@ -156,11 +156,8 @@ static int tcf_pedit(struct sk_buff *skb, const struct tc_action *a,
 			*ptr = ((*ptr & tkey->mask) ^ tkey->val);
 			if (ptr == &_data)
 				skb_store_bits(skb, off + offset, ptr, 4);
-			munged++;
 		}
 
-		if (munged)
-			skb->tc_verd = SET_TC_MUNGED(skb->tc_verd);
 		goto done;
 	} else
 		WARN(1, "pedit BUG: index %d\n", p->tcf_index);
-- 
cgit 


From c19ae86a510cf4332af64caab04718bc853d3184 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <jhs@mojatatu.com>
Date: Fri, 1 May 2015 22:19:43 -0700
Subject: tc: remove unused redirect ttl

improves ingress+u32 performance from 22.4 Mpps to 22.9 Mpps

Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Florian Westphal <fw@strlen.de>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_cls.h | 2 ++
 net/core/dev.c               | 9 ---------
 2 files changed, 2 insertions(+), 9 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 6810ca43a80a..596ffa0c7084 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -65,11 +65,13 @@ bits 9,10,11: redirect counter -  redirect TTL. Loop avoidance
 #define SET_TC_NCLS(v)   ( TC_NCLS | (v & ~TC_NCLS))
 #define CLR_TC_NCLS(v)   ( v & ~TC_NCLS)
 
+#ifndef __KERNEL__
 #define S_TC_RTTL          _TC_MAKE32(9)
 #define M_TC_RTTL          _TC_MAKEMASK(3,S_TC_RTTL)
 #define G_TC_RTTL(x)       _TC_GETVALUE(x,S_TC_RTTL,M_TC_RTTL)
 #define V_TC_RTTL(x)       _TC_MAKEVALUE(x,S_TC_RTTL)
 #define SET_TC_RTTL(v,n)   ((V_TC_RTTL(n)) | (v & ~M_TC_RTTL))
+#endif
 
 #define S_TC_AT          _TC_MAKE32(12)
 #define M_TC_AT          _TC_MAKEMASK(2,S_TC_AT)
diff --git a/net/core/dev.c b/net/core/dev.c
index 74a5b62f7568..862875ec8f2f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3531,18 +3531,9 @@ EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
  */
 static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
 {
-	struct net_device *dev = skb->dev;
-	u32 ttl = G_TC_RTTL(skb->tc_verd);
 	int result = TC_ACT_OK;
 	struct Qdisc *q;
 
-	if (unlikely(MAX_RED_LOOP < ttl++)) {
-		net_warn_ratelimited("Redir loop detected Dropping packet (%d->%d)\n",
-				     skb->skb_iif, dev->ifindex);
-		return TC_ACT_SHOT;
-	}
-
-	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
 
 	q = rcu_dereference(rxq->qdisc);
-- 
cgit 


From 9dc6c806b3c4812619e305685b3c86835bf784ab Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 17 Apr 2015 22:37:21 +0200
Subject: nbd: stop using req->cmd

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nbd.c      | 48 +++++++++++++++++++++++-------------------------
 include/uapi/linux/nbd.h |  2 --
 2 files changed, 23 insertions(+), 27 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 9cf52ac328fe..83a7ba4a3eec 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -230,29 +230,40 @@ static int nbd_send_req(struct nbd_device *nbd, struct request *req)
 	int result, flags;
 	struct nbd_request request;
 	unsigned long size = blk_rq_bytes(req);
+	u32 type;
+
+	if (req->cmd_type == REQ_TYPE_DRV_PRIV)
+		type = NBD_CMD_DISC;
+	else if (req->cmd_flags & REQ_DISCARD)
+		type = NBD_CMD_TRIM;
+	else if (req->cmd_flags & REQ_FLUSH)
+		type = NBD_CMD_FLUSH;
+	else if (rq_data_dir(req) == WRITE)
+		type = NBD_CMD_WRITE;
+	else
+		type = NBD_CMD_READ;
 
 	memset(&request, 0, sizeof(request));
 	request.magic = htonl(NBD_REQUEST_MAGIC);
-	request.type = htonl(nbd_cmd(req));
-
-	if (nbd_cmd(req) != NBD_CMD_FLUSH && nbd_cmd(req) != NBD_CMD_DISC) {
+	request.type = htonl(type);
+	if (type != NBD_CMD_FLUSH && type != NBD_CMD_DISC) {
 		request.from = cpu_to_be64((u64)blk_rq_pos(req) << 9);
 		request.len = htonl(size);
 	}
 	memcpy(request.handle, &req, sizeof(req));
 
 	dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
-		req, nbdcmd_to_ascii(nbd_cmd(req)),
+		req, nbdcmd_to_ascii(type),
 		(unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
 	result = sock_xmit(nbd, 1, &request, sizeof(request),
-			(nbd_cmd(req) == NBD_CMD_WRITE) ? MSG_MORE : 0);
+			(type == NBD_CMD_WRITE) ? MSG_MORE : 0);
 	if (result <= 0) {
 		dev_err(disk_to_dev(nbd->disk),
 			"Send control failed (result %d)\n", result);
 		return -EIO;
 	}
 
-	if (nbd_cmd(req) == NBD_CMD_WRITE) {
+	if (type == NBD_CMD_WRITE) {
 		struct req_iterator iter;
 		struct bio_vec bvec;
 		/*
@@ -352,7 +363,7 @@ static struct request *nbd_read_stat(struct nbd_device *nbd)
 	}
 
 	dev_dbg(nbd_to_dev(nbd), "request %p: got reply\n", req);
-	if (nbd_cmd(req) == NBD_CMD_READ) {
+	if (rq_data_dir(req) != WRITE) {
 		struct req_iterator iter;
 		struct bio_vec bvec;
 
@@ -452,23 +463,11 @@ static void nbd_handle_req(struct nbd_device *nbd, struct request *req)
 	if (req->cmd_type != REQ_TYPE_FS)
 		goto error_out;
 
-	nbd_cmd(req) = NBD_CMD_READ;
-	if (rq_data_dir(req) == WRITE) {
-		if ((req->cmd_flags & REQ_DISCARD)) {
-			WARN_ON(!(nbd->flags & NBD_FLAG_SEND_TRIM));
-			nbd_cmd(req) = NBD_CMD_TRIM;
-		} else
-			nbd_cmd(req) = NBD_CMD_WRITE;
-		if (nbd->flags & NBD_FLAG_READ_ONLY) {
-			dev_err(disk_to_dev(nbd->disk),
-				"Write on read-only\n");
-			goto error_out;
-		}
-	}
-
-	if (req->cmd_flags & REQ_FLUSH) {
-		BUG_ON(unlikely(blk_rq_sectors(req)));
-		nbd_cmd(req) = NBD_CMD_FLUSH;
+	if (rq_data_dir(req) == WRITE &&
+	    (nbd->flags & NBD_FLAG_READ_ONLY)) {
+		dev_err(disk_to_dev(nbd->disk),
+			"Write on read-only\n");
+		goto error_out;
 	}
 
 	req->errors = 0;
@@ -593,7 +592,6 @@ static int __nbd_ioctl(struct block_device *bdev, struct nbd_device *nbd,
 		mutex_lock(&nbd->tx_lock);
 		blk_rq_init(NULL, &sreq);
 		sreq.cmd_type = REQ_TYPE_DRV_PRIV;
-		nbd_cmd(&sreq) = NBD_CMD_DISC;
 
 		/* Check again after getting mutex back.  */
 		if (!nbd->sock)
diff --git a/include/uapi/linux/nbd.h b/include/uapi/linux/nbd.h
index 4f52549b23ff..e08e413d5f71 100644
--- a/include/uapi/linux/nbd.h
+++ b/include/uapi/linux/nbd.h
@@ -44,8 +44,6 @@ enum {
 /* there is a gap here to match userspace */
 #define NBD_FLAG_SEND_TRIM    (1 << 5) /* send trim/discard */
 
-#define nbd_cmd(req) ((req)->cmd[0])
-
 /* userspace doesn't need the nbd_device structure */
 
 /* These are sent over the network in the request/reply magic fields */
-- 
cgit 


From cd8ae85299d54155702a56811b2e035e63064d3d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 3 May 2015 21:34:46 -0700
Subject: tcp: provide SYN headers for passive connections

This patch allows a server application to get the TCP SYN headers for
its passive connections.  This is useful if the server is doing
fingerprinting of clients based on SYN packet contents.

Two socket options are added: TCP_SAVE_SYN and TCP_SAVED_SYN.

The first is used on a socket to enable saving the SYN headers
for child connections. This can be set before or after the listen()
call.

The latter is used to retrieve the SYN headers for passive connections,
if the parent listener has enabled TCP_SAVE_SYN.

TCP_SAVED_SYN is read once, it frees the saved SYN headers.

The data returned in TCP_SAVED_SYN are network (IPv4/IPv6) and TCP
headers.

Original patch was written by Tom Herbert, I changed it to not hold
a full skb (and associated dst and conntracking reference).

We have used such patch for about 3 years at Google.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Tested-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h        |  8 ++++++++
 include/net/request_sock.h |  4 +++-
 include/uapi/linux/tcp.h   |  2 ++
 net/ipv4/tcp.c             | 35 +++++++++++++++++++++++++++++++++++
 net/ipv4/tcp_input.c       | 18 ++++++++++++++++++
 net/ipv4/tcp_ipv4.c        |  1 +
 net/ipv4/tcp_minisocks.c   |  3 +++
 7 files changed, 70 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 3b2911502a8c..e6fb5df22db1 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -199,6 +199,7 @@ struct tcp_sock {
 		syn_fastopen:1,	/* SYN includes Fast Open option */
 		syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */
 		syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
+		save_syn:1,	/* Save headers of SYN packet */
 		is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */
 	u32	tlp_high_seq;	/* snd_nxt at the time of TLP retransmit. */
 
@@ -326,6 +327,7 @@ struct tcp_sock {
 	 * socket. Used to retransmit SYNACKs etc.
 	 */
 	struct request_sock *fastopen_rsk;
+	u32	*saved_syn;
 };
 
 enum tsq_flags {
@@ -393,4 +395,10 @@ static inline int fastopen_init_queue(struct sock *sk, int backlog)
 	return 0;
 }
 
+static inline void tcp_saved_syn_free(struct tcp_sock *tp)
+{
+	kfree(tp->saved_syn);
+	tp->saved_syn = NULL;
+}
+
 #endif	/* _LINUX_TCP_H */
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index 9f4265ce8892..87935cad2f7b 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -64,6 +64,7 @@ struct request_sock {
 	struct timer_list		rsk_timer;
 	const struct request_sock_ops	*rsk_ops;
 	struct sock			*sk;
+	u32				*saved_syn;
 	u32				secid;
 	u32				peer_secid;
 };
@@ -77,7 +78,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener)
 		req->rsk_ops = ops;
 		sock_hold(sk_listener);
 		req->rsk_listener = sk_listener;
-
+		req->saved_syn = NULL;
 		/* Following is temporary. It is coupled with debugging
 		 * helpers in reqsk_put() & reqsk_free()
 		 */
@@ -104,6 +105,7 @@ static inline void reqsk_free(struct request_sock *req)
 	req->rsk_ops->destructor(req);
 	if (req->rsk_listener)
 		sock_put(req->rsk_listener);
+	kfree(req->saved_syn);
 	kmem_cache_free(req->rsk_ops->slab, req);
 }
 
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index faa72f4fa547..51ebedba577f 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -113,6 +113,8 @@ enum {
 #define TCP_TIMESTAMP		24
 #define TCP_NOTSENT_LOWAT	25	/* limit number of unsent bytes in write queue */
 #define TCP_CC_INFO		26	/* Get Congestion Control (optional) info */
+#define TCP_SAVE_SYN		27	/* Record SYN headers for new connections */
+#define TCP_SAVED_SYN		28	/* Get SYN headers recorded for connection */
 
 struct tcp_repair_opt {
 	__u32	opt_code;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 46efa03d2b11..ecccfdc50d76 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2482,6 +2482,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 			icsk->icsk_syn_retries = val;
 		break;
 
+	case TCP_SAVE_SYN:
+		if (val < 0 || val > 1)
+			err = -EINVAL;
+		else
+			tp->save_syn = val;
+		break;
+
 	case TCP_LINGER2:
 		if (val < 0)
 			tp->linger2 = -1;
@@ -2818,6 +2825,34 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 	case TCP_NOTSENT_LOWAT:
 		val = tp->notsent_lowat;
 		break;
+	case TCP_SAVE_SYN:
+		val = tp->save_syn;
+		break;
+	case TCP_SAVED_SYN: {
+		if (get_user(len, optlen))
+			return -EFAULT;
+
+		lock_sock(sk);
+		if (tp->saved_syn) {
+			len = min_t(unsigned int, tp->saved_syn[0], len);
+			if (put_user(len, optlen)) {
+				release_sock(sk);
+				return -EFAULT;
+			}
+			if (copy_to_user(optval, tp->saved_syn + 1, len)) {
+				release_sock(sk);
+				return -EFAULT;
+			}
+			tcp_saved_syn_free(tp);
+			release_sock(sk);
+		} else {
+			release_sock(sk);
+			len = 0;
+			if (put_user(len, optlen))
+				return -EFAULT;
+		}
+		return 0;
+	}
 	default:
 		return -ENOPROTOOPT;
 	}
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 09bdc4abfcbb..df2ca615cd0c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -6060,6 +6060,23 @@ static bool tcp_syn_flood_action(struct sock *sk,
 	return want_cookie;
 }
 
+static void tcp_reqsk_record_syn(const struct sock *sk,
+				 struct request_sock *req,
+				 const struct sk_buff *skb)
+{
+	if (tcp_sk(sk)->save_syn) {
+		u32 len = skb_network_header_len(skb) + tcp_hdrlen(skb);
+		u32 *copy;
+
+		copy = kmalloc(len + sizeof(u32), GFP_ATOMIC);
+		if (copy) {
+			copy[0] = len;
+			memcpy(&copy[1], skb_network_header(skb), len);
+			req->saved_syn = copy;
+		}
+	}
+}
+
 int tcp_conn_request(struct request_sock_ops *rsk_ops,
 		     const struct tcp_request_sock_ops *af_ops,
 		     struct sock *sk, struct sk_buff *skb)
@@ -6192,6 +6209,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops,
 		tcp_rsk(req)->tfo_listener = false;
 		af_ops->queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
 	}
+	tcp_reqsk_record_syn(sk, req, skb);
 
 	return 0;
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index fc1c658ec6c1..91cb4768a860 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1802,6 +1802,7 @@ void tcp_v4_destroy_sock(struct sock *sk)
 
 	/* If socket is aborted during connect operation */
 	tcp_free_fastopen_req(tp);
+	tcp_saved_syn_free(tp);
 
 	sk_sockets_allocated_dec(sk);
 	sock_release_memcg(sk);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index e5d7649136fc..ebe2ab2596ed 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -536,6 +536,9 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		newtp->fastopen_rsk = NULL;
 		newtp->syn_data_acked = 0;
 
+		newtp->saved_syn = req->saved_syn;
+		req->saved_syn = NULL;
+
 		TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS);
 	}
 	return newsk;
-- 
cgit 


From c967a0873a7836c7a77bf611f1c7d3f47c554c45 Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@herbertland.com>
Date: Tue, 5 May 2015 09:06:30 -0700
Subject: mpls: Move reserved label definitions

Move to include/uapi/linux/mpls.h to be externally visibile.

Signed-off-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/mpls.h | 10 ++++++++++
 net/mpls/af_mpls.c        | 18 +++++++++---------
 net/mpls/internal.h       | 10 ----------
 3 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/mpls.h b/include/uapi/linux/mpls.h
index bc9abfe88c9a..0fe6ea5a41d5 100644
--- a/include/uapi/linux/mpls.h
+++ b/include/uapi/linux/mpls.h
@@ -31,4 +31,14 @@ struct mpls_label {
 #define MPLS_LS_TTL_MASK        0x000000FF
 #define MPLS_LS_TTL_SHIFT       0
 
+/* Reserved labels */
+#define MPLS_LABEL_IPV4_EXPLICIT_NULL	0 /* RFC3032 */
+#define MPLS_LABEL_ROUTER_ALERT		1 /* RFC3032 */
+#define MPLS_LABEL_IPV6_EXPLICIT_NULL	2 /* RFC3032 */
+#define MPLS_LABEL_IMPLICIT_NULL	3 /* RFC3032 */
+#define MPLS_LABEL_ENTROPY_INDICATOR	7 /* RFC6790 */
+#define MPLS_LABEL_GAL			13 /* RFC5586 */
+#define MPLS_LABEL_OAM_ALERT		14 /* RFC3429 */
+#define MPLS_LABEL_EXTENSION		15 /* RFC7274 */
+
 #endif /* _UAPI_MPLS_H */
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 954810c76a86..b6eb7615960a 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -647,7 +647,7 @@ int nla_get_labels(const struct nlattr *nla,
 			return -EINVAL;
 
 		switch (dec.label) {
-		case LABEL_IMPLICIT_NULL:
+		case MPLS_LABEL_IMPLICIT_NULL:
 			/* RFC3032: This is a label that an LSR may
 			 * assign and distribute, but which never
 			 * actually appears in the encapsulation.
@@ -935,7 +935,7 @@ static int resize_platform_label_table(struct net *net, size_t limit)
 	}
 
 	/* In case the predefined labels need to be populated */
-	if (limit > LABEL_IPV4_EXPLICIT_NULL) {
+	if (limit > MPLS_LABEL_IPV4_EXPLICIT_NULL) {
 		struct net_device *lo = net->loopback_dev;
 		rt0 = mpls_rt_alloc(lo->addr_len);
 		if (!rt0)
@@ -945,7 +945,7 @@ static int resize_platform_label_table(struct net *net, size_t limit)
 		rt0->rt_via_table = NEIGH_LINK_TABLE;
 		memcpy(rt0->rt_via, lo->dev_addr, lo->addr_len);
 	}
-	if (limit > LABEL_IPV6_EXPLICIT_NULL) {
+	if (limit > MPLS_LABEL_IPV6_EXPLICIT_NULL) {
 		struct net_device *lo = net->loopback_dev;
 		rt2 = mpls_rt_alloc(lo->addr_len);
 		if (!rt2)
@@ -973,15 +973,15 @@ static int resize_platform_label_table(struct net *net, size_t limit)
 	memcpy(labels, old, cp_size);
 
 	/* If needed set the predefined labels */
-	if ((old_limit <= LABEL_IPV6_EXPLICIT_NULL) &&
-	    (limit > LABEL_IPV6_EXPLICIT_NULL)) {
-		RCU_INIT_POINTER(labels[LABEL_IPV6_EXPLICIT_NULL], rt2);
+	if ((old_limit <= MPLS_LABEL_IPV6_EXPLICIT_NULL) &&
+	    (limit > MPLS_LABEL_IPV6_EXPLICIT_NULL)) {
+		RCU_INIT_POINTER(labels[MPLS_LABEL_IPV6_EXPLICIT_NULL], rt2);
 		rt2 = NULL;
 	}
 
-	if ((old_limit <= LABEL_IPV4_EXPLICIT_NULL) &&
-	    (limit > LABEL_IPV4_EXPLICIT_NULL)) {
-		RCU_INIT_POINTER(labels[LABEL_IPV4_EXPLICIT_NULL], rt0);
+	if ((old_limit <= MPLS_LABEL_IPV4_EXPLICIT_NULL) &&
+	    (limit > MPLS_LABEL_IPV4_EXPLICIT_NULL)) {
+		RCU_INIT_POINTER(labels[MPLS_LABEL_IPV4_EXPLICIT_NULL], rt0);
 		rt0 = NULL;
 	}
 
diff --git a/net/mpls/internal.h b/net/mpls/internal.h
index 693877d69606..b064c345042c 100644
--- a/net/mpls/internal.h
+++ b/net/mpls/internal.h
@@ -1,16 +1,6 @@
 #ifndef MPLS_INTERNAL_H
 #define MPLS_INTERNAL_H
 
-#define LABEL_IPV4_EXPLICIT_NULL	0 /* RFC3032 */
-#define LABEL_ROUTER_ALERT_LABEL	1 /* RFC3032 */
-#define LABEL_IPV6_EXPLICIT_NULL	2 /* RFC3032 */
-#define LABEL_IMPLICIT_NULL		3 /* RFC3032 */
-#define LABEL_ENTROPY_INDICATOR		7 /* RFC6790 */
-#define LABEL_GAL			13 /* RFC5586 */
-#define LABEL_OAM_ALERT			14 /* RFC3429 */
-#define LABEL_EXTENSION			15 /* RFC7274 */
-
-
 struct mpls_shim_hdr {
 	__be32 label_stack_entry;
 };
-- 
cgit 


From a2f11835994ed5bcd6d66c7205947cc482231b08 Mon Sep 17 00:00:00 2001
From: Shawn Landden <shawn@churchofgit.com>
Date: Tue, 5 May 2015 09:07:16 -0700
Subject: can.h: make padding given by gcc explicit

The current definition of struct can_frame has a 16-byte size, with 8-byte
alignment, but the 3 bytes of padding are not explicit like the similar 2 bytes
of padding of struct canfd_frame. Make it explicit so it is easier to read.

Signed-off-by: Shawn Landden <shawn@churchofgit.com>
Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 Documentation/networking/can.txt | 3 +++
 include/uapi/linux/can.h         | 6 ++++++
 2 files changed, 9 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/Documentation/networking/can.txt b/Documentation/networking/can.txt
index 5abad1e921ca..b48d4a149411 100644
--- a/Documentation/networking/can.txt
+++ b/Documentation/networking/can.txt
@@ -268,6 +268,9 @@ solution for a couple of reasons:
     struct can_frame {
             canid_t can_id;  /* 32 bit CAN_ID + EFF/RTR/ERR flags */
             __u8    can_dlc; /* frame payload length in byte (0 .. 8) */
+            __u8    __pad;   /* padding */
+            __u8    __res0;  /* reserved / padding */
+            __u8    __res1;  /* reserved / padding */
             __u8    data[8] __attribute__((aligned(8)));
     };
 
diff --git a/include/uapi/linux/can.h b/include/uapi/linux/can.h
index 41892f720057..9692cda5f8fc 100644
--- a/include/uapi/linux/can.h
+++ b/include/uapi/linux/can.h
@@ -95,11 +95,17 @@ typedef __u32 can_err_mask_t;
  * @can_dlc: frame payload length in byte (0 .. 8) aka data length code
  *           N.B. the DLC field from ISO 11898-1 Chapter 8.4.2.3 has a 1:1
  *           mapping of the 'data length code' to the real payload length
+ * @__pad:   padding
+ * @__res0:  reserved / padding
+ * @__res1:  reserved / padding
  * @data:    CAN frame payload (up to 8 byte)
  */
 struct can_frame {
 	canid_t can_id;  /* 32 bit CAN_ID + EFF/RTR/ERR flags */
 	__u8    can_dlc; /* frame payload length in byte (0 .. CAN_MAX_DLEN) */
+	__u8    __pad;   /* padding */
+	__u8    __res0;  /* reserved / padding */
+	__u8    __res1;  /* reserved / padding */
 	__u8    data[CAN_MAX_DLEN] __attribute__((aligned(8)));
 };
 
-- 
cgit 


From 06f207fc541862ba8902ceda0ddeade6ea6bce72 Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Wed, 6 May 2015 16:28:31 +0300
Subject: cfg80211: change GO_CONCURRENT to IR_CONCURRENT for STA

The GO_CONCURRENT regulatory definition can be extended to station
interfaces requesting to IR as part of TDLS off-channel operations.
Rename the GO_CONCURRENT flag to IR_CONCURRENT and allow the added
use-case.

Change internal users of GO_CONCURRENT to use the new definition.

Signed-off-by: Arik Nemtsov <arikx.nemtsov@intel.com>
Reviewed-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/iwlwifi/iwl-nvm-parse.c |  2 +-
 include/net/cfg80211.h                       |  4 +--
 include/uapi/linux/nl80211.h                 | 28 +++++++++++---------
 net/wireless/chan.c                          | 38 ++++++++++++++++------------
 net/wireless/nl80211.c                       |  4 +--
 net/wireless/reg.c                           |  4 +--
 6 files changed, 45 insertions(+), 35 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c
index 83903a5025c2..0b5a81d52a3e 100644
--- a/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c
+++ b/drivers/net/wireless/iwlwifi/iwl-nvm-parse.c
@@ -248,7 +248,7 @@ static u32 iwl_get_channel_flags(u8 ch_num, int ch_idx, bool is_5ghz,
 	 */
 	if ((nvm_flags & NVM_CHANNEL_GO_CONCURRENT) &&
 	    (flags & IEEE80211_CHAN_NO_IR))
-		flags |= IEEE80211_CHAN_GO_CONCURRENT;
+		flags |= IEEE80211_CHAN_IR_CONCURRENT;
 
 	return flags;
 }
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index f8d6813cd5b2..d63ecec73090 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -111,7 +111,7 @@ enum ieee80211_band {
  *	This may be due to the driver or due to regulatory bandwidth
  *	restrictions.
  * @IEEE80211_CHAN_INDOOR_ONLY: see %NL80211_FREQUENCY_ATTR_INDOOR_ONLY
- * @IEEE80211_CHAN_GO_CONCURRENT: see %NL80211_FREQUENCY_ATTR_GO_CONCURRENT
+ * @IEEE80211_CHAN_IR_CONCURRENT: see %NL80211_FREQUENCY_ATTR_IR_CONCURRENT
  * @IEEE80211_CHAN_NO_20MHZ: 20 MHz bandwidth is not permitted
  *	on this channel.
  * @IEEE80211_CHAN_NO_10MHZ: 10 MHz bandwidth is not permitted
@@ -129,7 +129,7 @@ enum ieee80211_channel_flags {
 	IEEE80211_CHAN_NO_80MHZ		= 1<<7,
 	IEEE80211_CHAN_NO_160MHZ	= 1<<8,
 	IEEE80211_CHAN_INDOOR_ONLY	= 1<<9,
-	IEEE80211_CHAN_GO_CONCURRENT	= 1<<10,
+	IEEE80211_CHAN_IR_CONCURRENT	= 1<<10,
 	IEEE80211_CHAN_NO_20MHZ		= 1<<11,
 	IEEE80211_CHAN_NO_10MHZ		= 1<<12,
 };
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 241220c43e86..c0ab6b0a3919 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2620,16 +2620,17 @@ enum nl80211_band_attr {
  *	an indoor surroundings, i.e., it is connected to AC power (and not
  *	through portable DC inverters) or is under the control of a master
  *	that is acting as an AP and is connected to AC power.
- * @NL80211_FREQUENCY_ATTR_GO_CONCURRENT: GO operation is allowed on this
+ * @NL80211_FREQUENCY_ATTR_IR_CONCURRENT: IR operation is allowed on this
  *	channel if it's connected concurrently to a BSS on the same channel on
  *	the 2 GHz band or to a channel in the same UNII band (on the 5 GHz
- *	band), and IEEE80211_CHAN_RADAR is not set. Instantiating a GO on a
- *	channel that has the GO_CONCURRENT attribute set can be done when there
- *	is a clear assessment that the device is operating under the guidance of
- *	an authorized master, i.e., setting up a GO while the device is also
- *	connected to an AP with DFS and radar detection on the UNII band (it is
- *	up to user-space, i.e., wpa_supplicant to perform the required
- *	verifications)
+ *	band), and IEEE80211_CHAN_RADAR is not set. Instantiating a GO or TDLS
+ *	off-channel on a channel that has the IR_CONCURRENT attribute set can be
+ *	done when there is a clear assessment that the device is operating under
+ *	the guidance of an authorized master, i.e., setting up a GO or TDLS
+ *	off-channel while the device is also connected to an AP with DFS and
+ *	radar detection on the UNII band (it is up to user-space, i.e.,
+ *	wpa_supplicant to perform the required verifications). Using this
+ *	attribute for IR is disallowed for master interfaces (IBSS, AP).
  * @NL80211_FREQUENCY_ATTR_NO_20MHZ: 20 MHz operation is not allowed
  *	on this channel in current regulatory domain.
  * @NL80211_FREQUENCY_ATTR_NO_10MHZ: 10 MHz operation is not allowed
@@ -2641,7 +2642,7 @@ enum nl80211_band_attr {
  * See https://apps.fcc.gov/eas/comments/GetPublishedDocument.html?id=327&tn=528122
  * for more information on the FCC description of the relaxations allowed
  * by NL80211_FREQUENCY_ATTR_INDOOR_ONLY and
- * NL80211_FREQUENCY_ATTR_GO_CONCURRENT.
+ * NL80211_FREQUENCY_ATTR_IR_CONCURRENT.
  */
 enum nl80211_frequency_attr {
 	__NL80211_FREQUENCY_ATTR_INVALID,
@@ -2659,7 +2660,7 @@ enum nl80211_frequency_attr {
 	NL80211_FREQUENCY_ATTR_NO_160MHZ,
 	NL80211_FREQUENCY_ATTR_DFS_CAC_TIME,
 	NL80211_FREQUENCY_ATTR_INDOOR_ONLY,
-	NL80211_FREQUENCY_ATTR_GO_CONCURRENT,
+	NL80211_FREQUENCY_ATTR_IR_CONCURRENT,
 	NL80211_FREQUENCY_ATTR_NO_20MHZ,
 	NL80211_FREQUENCY_ATTR_NO_10MHZ,
 
@@ -2672,6 +2673,8 @@ enum nl80211_frequency_attr {
 #define NL80211_FREQUENCY_ATTR_PASSIVE_SCAN	NL80211_FREQUENCY_ATTR_NO_IR
 #define NL80211_FREQUENCY_ATTR_NO_IBSS		NL80211_FREQUENCY_ATTR_NO_IR
 #define NL80211_FREQUENCY_ATTR_NO_IR		NL80211_FREQUENCY_ATTR_NO_IR
+#define NL80211_FREQUENCY_ATTR_GO_CONCURRENT \
+					NL80211_FREQUENCY_ATTR_IR_CONCURRENT
 
 /**
  * enum nl80211_bitrate_attr - bitrate attributes
@@ -2830,7 +2833,7 @@ enum nl80211_sched_scan_match_attr {
  * @NL80211_RRF_AUTO_BW: maximum available bandwidth should be calculated
  *	base on contiguous rules and wider channels will be allowed to cross
  *	multiple contiguous/overlapping frequency ranges.
- * @NL80211_RRF_GO_CONCURRENT: See &NL80211_FREQUENCY_ATTR_GO_CONCURRENT
+ * @NL80211_RRF_IR_CONCURRENT: See &NL80211_FREQUENCY_ATTR_IR_CONCURRENT
  * @NL80211_RRF_NO_HT40MINUS: channels can't be used in HT40- operation
  * @NL80211_RRF_NO_HT40PLUS: channels can't be used in HT40+ operation
  * @NL80211_RRF_NO_80MHZ: 80MHz operation not allowed
@@ -2847,7 +2850,7 @@ enum nl80211_reg_rule_flags {
 	NL80211_RRF_NO_IR		= 1<<7,
 	__NL80211_RRF_NO_IBSS		= 1<<8,
 	NL80211_RRF_AUTO_BW		= 1<<11,
-	NL80211_RRF_GO_CONCURRENT	= 1<<12,
+	NL80211_RRF_IR_CONCURRENT	= 1<<12,
 	NL80211_RRF_NO_HT40MINUS	= 1<<13,
 	NL80211_RRF_NO_HT40PLUS		= 1<<14,
 	NL80211_RRF_NO_80MHZ		= 1<<15,
@@ -2859,6 +2862,7 @@ enum nl80211_reg_rule_flags {
 #define NL80211_RRF_NO_IR		NL80211_RRF_NO_IR
 #define NL80211_RRF_NO_HT40		(NL80211_RRF_NO_HT40MINUS |\
 					 NL80211_RRF_NO_HT40PLUS)
+#define NL80211_RRF_GO_CONCURRENT	NL80211_RRF_IR_CONCURRENT
 
 /* For backport compatibility with older userspace */
 #define NL80211_RRF_NO_IR_ALL		(NL80211_RRF_NO_IR | __NL80211_RRF_NO_IBSS)
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 5bcffdbf3e88..915b328b9ac5 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -698,19 +698,20 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
 EXPORT_SYMBOL(cfg80211_chandef_usable);
 
 /*
- * For GO only, check if the channel can be used under permissive conditions
- * mandated by the some regulatory bodies, i.e., the channel is marked with
- * IEEE80211_CHAN_GO_CONCURRENT and there is an additional station interface
+ * Check if the channel can be used under permissive conditions mandated by
+ * some regulatory bodies, i.e., the channel is marked with
+ * IEEE80211_CHAN_IR_CONCURRENT and there is an additional station interface
  * associated to an AP on the same channel or on the same UNII band
  * (assuming that the AP is an authorized master).
- * In addition allow the GO to operate on a channel on which indoor operation is
+ * In addition allow operation on a channel on which indoor operation is
  * allowed, iff we are currently operating in an indoor environment.
  */
-static bool cfg80211_go_permissive_chan(struct cfg80211_registered_device *rdev,
+static bool cfg80211_ir_permissive_chan(struct wiphy *wiphy,
+					enum nl80211_iftype iftype,
 					struct ieee80211_channel *chan)
 {
 	struct wireless_dev *wdev;
-	struct wiphy *wiphy = wiphy_idx_to_wiphy(rdev->wiphy_idx);
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 
 	ASSERT_RTNL();
 
@@ -718,16 +719,22 @@ static bool cfg80211_go_permissive_chan(struct cfg80211_registered_device *rdev,
 	    !(wiphy->regulatory_flags & REGULATORY_ENABLE_RELAX_NO_IR))
 		return false;
 
+	/* only valid for GO and TDLS off-channel (station/p2p-CL) */
+	if (iftype != NL80211_IFTYPE_P2P_GO &&
+	    iftype != NL80211_IFTYPE_STATION &&
+	    iftype != NL80211_IFTYPE_P2P_CLIENT)
+		return false;
+
 	if (regulatory_indoor_allowed() &&
 	    (chan->flags & IEEE80211_CHAN_INDOOR_ONLY))
 		return true;
 
-	if (!(chan->flags & IEEE80211_CHAN_GO_CONCURRENT))
+	if (!(chan->flags & IEEE80211_CHAN_IR_CONCURRENT))
 		return false;
 
 	/*
 	 * Generally, it is possible to rely on another device/driver to allow
-	 * the GO concurrent relaxation, however, since the device can further
+	 * the IR concurrent relaxation, however, since the device can further
 	 * enforce the relaxation (by doing a similar verifications as this),
 	 * and thus fail the GO instantiation, consider only the interfaces of
 	 * the current registered device.
@@ -748,7 +755,8 @@ static bool cfg80211_go_permissive_chan(struct cfg80211_registered_device *rdev,
 		 * GO_CONCURRENT is disconnected now. But then we must make sure
 		 * we're not outdoor on an indoor-only channel.
 		 */
-		if (wdev->iftype == NL80211_IFTYPE_P2P_GO &&
+		if (iftype == NL80211_IFTYPE_P2P_GO &&
+		    wdev->iftype == NL80211_IFTYPE_P2P_GO &&
 		    wdev->beacon_interval &&
 		    !(chan->flags & IEEE80211_CHAN_INDOOR_ONLY))
 			other_chan = wdev->chandef.chan;
@@ -793,7 +801,6 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy,
 			     struct cfg80211_chan_def *chandef,
 			     enum nl80211_iftype iftype)
 {
-	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	bool res;
 	u32 prohibited_flags = IEEE80211_CHAN_DISABLED |
 			       IEEE80211_CHAN_RADAR;
@@ -801,13 +808,12 @@ bool cfg80211_reg_can_beacon(struct wiphy *wiphy,
 	trace_cfg80211_reg_can_beacon(wiphy, chandef, iftype);
 
 	/*
-	 * Under certain conditions suggested by the some regulatory bodies
-	 * a GO can operate on channels marked with IEEE80211_NO_IR
-	 * so set this flag only if such relaxations are not enabled and
-	 * the conditions are not met.
+	 * Under certain conditions suggested by some regulatory bodies a
+	 * GO/STA can IR on channels marked with IEEE80211_NO_IR. Set this flag
+	 * only if such relaxations are not enabled and the conditions are not
+	 * met.
 	 */
-	if (iftype != NL80211_IFTYPE_P2P_GO ||
-	    !cfg80211_go_permissive_chan(rdev, chandef->chan))
+	if (!cfg80211_ir_permissive_chan(wiphy, iftype, chandef->chan))
 		prohibited_flags |= IEEE80211_CHAN_NO_IR;
 
 	if (cfg80211_chandef_dfs_required(wiphy, chandef, iftype) > 0 &&
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 8a33bbae9ec5..c264effd00a6 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -639,8 +639,8 @@ static int nl80211_msg_put_channel(struct sk_buff *msg,
 		if ((chan->flags & IEEE80211_CHAN_INDOOR_ONLY) &&
 		    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_INDOOR_ONLY))
 			goto nla_put_failure;
-		if ((chan->flags & IEEE80211_CHAN_GO_CONCURRENT) &&
-		    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_GO_CONCURRENT))
+		if ((chan->flags & IEEE80211_CHAN_IR_CONCURRENT) &&
+		    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_IR_CONCURRENT))
 			goto nla_put_failure;
 		if ((chan->flags & IEEE80211_CHAN_NO_20MHZ) &&
 		    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_20MHZ))
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 0e347f888fe9..d359e0610198 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -989,8 +989,8 @@ static u32 map_regdom_flags(u32 rd_flags)
 		channel_flags |= IEEE80211_CHAN_NO_OFDM;
 	if (rd_flags & NL80211_RRF_NO_OUTDOOR)
 		channel_flags |= IEEE80211_CHAN_INDOOR_ONLY;
-	if (rd_flags & NL80211_RRF_GO_CONCURRENT)
-		channel_flags |= IEEE80211_CHAN_GO_CONCURRENT;
+	if (rd_flags & NL80211_RRF_IR_CONCURRENT)
+		channel_flags |= IEEE80211_CHAN_IR_CONCURRENT;
 	if (rd_flags & NL80211_RRF_NO_HT40MINUS)
 		channel_flags |= IEEE80211_CHAN_NO_HT40MINUS;
 	if (rd_flags & NL80211_RRF_NO_HT40PLUS)
-- 
cgit 


From 90de4a1875180f8347c075319af2cce586c96ab6 Mon Sep 17 00:00:00 2001
From: Nadav Amit <namit@cs.technion.ac.il>
Date: Mon, 13 Apr 2015 01:53:41 +0300
Subject: KVM: x86: Support for disabling quirks

Introducing KVM_CAP_DISABLE_QUIRKS for disabling x86 quirks that were previous
created in order to overcome QEMU issues. Those issue were mostly result of
invalid VM BIOS.  Currently there are two quirks that can be disabled:

1. KVM_QUIRK_LINT0_REENABLED - LINT0 was enabled after boot
2. KVM_QUIRK_CD_NW_CLEARED - CD and NW are cleared after boot

These two issues are already resolved in recent releases of QEMU, and would
therefore be disabled by QEMU.

Signed-off-by: Nadav Amit <namit@cs.technion.ac.il>
Message-Id: <1428879221-29996-1-git-send-email-namit@cs.technion.ac.il>
[Report capability from KVM_CHECK_EXTENSION too. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/api.txt |  3 ++-
 arch/x86/include/asm/kvm_host.h   |  2 ++
 arch/x86/include/uapi/asm/kvm.h   |  3 +++
 arch/x86/kvm/lapic.c              |  5 +++--
 arch/x86/kvm/svm.c                |  3 ++-
 arch/x86/kvm/x86.c                | 30 ++++++++++++++++++++++++++++++
 include/uapi/linux/kvm.h          |  1 +
 7 files changed, 43 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 9fa2bf8c3f6f..695544420ff2 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -959,7 +959,8 @@ documentation when it pops into existence).
 4.37 KVM_ENABLE_CAP
 
 Capability: KVM_CAP_ENABLE_CAP, KVM_CAP_ENABLE_CAP_VM
-Architectures: ppc, s390
+Architectures: x86 (only KVM_CAP_ENABLE_CAP_VM),
+	       mips (only KVM_CAP_ENABLE_CAP), ppc, s390
 Type: vcpu ioctl, vm ioctl (with KVM_CAP_ENABLE_CAP_VM)
 Parameters: struct kvm_enable_cap (in)
 Returns: 0 on success; -1 on error
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index dea2e7e962e3..f80ad591aa61 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -635,6 +635,8 @@ struct kvm_arch {
 	#endif
 
 	bool boot_vcpu_runs_old_kvmclock;
+
+	u64 disabled_quirks;
 };
 
 struct kvm_vm_stat {
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index d7dcef58aefa..2fec75e4b1e1 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -345,4 +345,7 @@ struct kvm_xcrs {
 struct kvm_sync_regs {
 };
 
+#define KVM_QUIRK_LINT0_REENABLED	(1 << 0)
+#define KVM_QUIRK_CD_NW_CLEARED		(1 << 1)
+
 #endif /* _ASM_X86_KVM_H */
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 629af0f1c5c4..4071eb161c8f 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -1577,8 +1577,9 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu)
 	for (i = 0; i < APIC_LVT_NUM; i++)
 		apic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
 	apic->lapic_timer.timer_mode = 0;
-	apic_set_reg(apic, APIC_LVT0,
-		     SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
+	if (!(vcpu->kvm->arch.disabled_quirks & KVM_QUIRK_LINT0_REENABLED))
+		apic_set_reg(apic, APIC_LVT0,
+			     SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
 
 	apic_set_reg(apic, APIC_DFR, 0xffffffffU);
 	apic_set_spiv(apic, 0xff);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index ce741b8650f6..46299dac7c6d 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1575,7 +1575,8 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
 	 * does not do it - this results in some delay at
 	 * reboot
 	 */
-	cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
+	if (!(vcpu->kvm->arch.disabled_quirks & KVM_QUIRK_CD_NW_CLEARED))
+		cr0 &= ~(X86_CR0_CD | X86_CR0_NW);
 	svm->vmcb->save.cr0 = cr0;
 	mark_dirty(svm->vmcb, VMCB_CR);
 	update_cr0_intercept(svm);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7a959be0aebc..0435b653f583 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2800,6 +2800,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	case KVM_CAP_HYPERV_TIME:
 	case KVM_CAP_IOAPIC_POLARITY_IGNORED:
 	case KVM_CAP_TSC_DEADLINE_TIMER:
+	case KVM_CAP_ENABLE_CAP_VM:
+	case KVM_CAP_DISABLE_QUIRKS:
 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
 	case KVM_CAP_ASSIGN_DEV_IRQ:
 	case KVM_CAP_PCI_2_3:
@@ -3847,6 +3849,26 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
 	return 0;
 }
 
+static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
+				   struct kvm_enable_cap *cap)
+{
+	int r;
+
+	if (cap->flags)
+		return -EINVAL;
+
+	switch (cap->cap) {
+	case KVM_CAP_DISABLE_QUIRKS:
+		kvm->arch.disabled_quirks = cap->args[0];
+		r = 0;
+		break;
+	default:
+		r = -EINVAL;
+		break;
+	}
+	return r;
+}
+
 long kvm_arch_vm_ioctl(struct file *filp,
 		       unsigned int ioctl, unsigned long arg)
 {
@@ -4099,7 +4121,15 @@ long kvm_arch_vm_ioctl(struct file *filp,
 		r = 0;
 		break;
 	}
+	case KVM_ENABLE_CAP: {
+		struct kvm_enable_cap cap;
 
+		r = -EFAULT;
+		if (copy_from_user(&cap, argp, sizeof(cap)))
+			goto out;
+		r = kvm_vm_ioctl_enable_cap(kvm, &cap);
+		break;
+	}
 	default:
 		r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
 	}
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 4b60056776d1..75bd9f7fd846 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -814,6 +814,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_S390_INJECT_IRQ 113
 #define KVM_CAP_S390_IRQ_STATE 114
 #define KVM_CAP_PPC_HWRNG 115
+#define KVM_CAP_DISABLE_QUIRKS 116
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
cgit 


From e520af48c7e5acae5f17f82a79ba7ab7cf156f3b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 6 May 2015 14:26:25 -0700
Subject: tcp: add TCPWinProbe and TCPKeepAlive SNMP counters

Diagnosing problems related to Window Probes has been hard because
we lack a counter.

TCPWinProbe counts the number of ACK packets a sender has to send
at regular intervals to make sure a reverse ACK packet opening back
a window had not been lost.

TCPKeepAlive counts the number of ACK packets sent to keep TCP
flows alive (SO_KEEPALIVE)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Nandita Dukkipati <nanditad@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h         |  2 +-
 include/uapi/linux/snmp.h |  2 ++
 net/ipv4/proc.c           |  2 ++
 net/ipv4/tcp_output.c     | 13 +++++++------
 net/ipv4/tcp_timer.c      |  2 +-
 5 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 7a2248a35b13..b8ea12880fd9 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -527,7 +527,7 @@ int tcp_fragment(struct sock *, struct sk_buff *, u32, unsigned int, gfp_t);
 
 void tcp_send_probe0(struct sock *);
 void tcp_send_partial(struct sock *);
-int tcp_write_wakeup(struct sock *);
+int tcp_write_wakeup(struct sock *, int mib);
 void tcp_send_fin(struct sock *sk);
 void tcp_send_active_reset(struct sock *sk, gfp_t priority);
 int tcp_send_synack(struct sock *);
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index 6a6fb747c78d..eee8968407f0 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -276,6 +276,8 @@ enum
 	LINUX_MIB_TCPACKSKIPPEDFINWAIT2,	/* TCPACKSkippedFinWait2 */
 	LINUX_MIB_TCPACKSKIPPEDTIMEWAIT,	/* TCPACKSkippedTimeWait */
 	LINUX_MIB_TCPACKSKIPPEDCHALLENGE,	/* TCPACKSkippedChallenge */
+	LINUX_MIB_TCPWINPROBE,			/* TCPWinProbe */
+	LINUX_MIB_TCPKEEPALIVE,			/* TCPKeepAlive */
 	__LINUX_MIB_MAX
 };
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index e1f3b911dd1e..da5d483e236a 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -298,6 +298,8 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPACKSkippedFinWait2", LINUX_MIB_TCPACKSKIPPEDFINWAIT2),
 	SNMP_MIB_ITEM("TCPACKSkippedTimeWait", LINUX_MIB_TCPACKSKIPPEDTIMEWAIT),
 	SNMP_MIB_ITEM("TCPACKSkippedChallenge", LINUX_MIB_TCPACKSKIPPEDCHALLENGE),
+	SNMP_MIB_ITEM("TCPWinProbe", LINUX_MIB_TCPWINPROBE),
+	SNMP_MIB_ITEM("TCPKeepAlive", LINUX_MIB_TCPKEEPALIVE),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b76c719e1979..7386d32cd670 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3382,7 +3382,7 @@ EXPORT_SYMBOL_GPL(tcp_send_ack);
  * one is with SEG.SEQ=SND.UNA to deliver urgent pointer, another is
  * out-of-date with SND.UNA-1 to probe window.
  */
-static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
+static int tcp_xmit_probe_skb(struct sock *sk, int urgent, int mib)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
@@ -3400,6 +3400,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent)
 	 */
 	tcp_init_nondata_skb(skb, tp->snd_una - !urgent, TCPHDR_ACK);
 	skb_mstamp_get(&skb->skb_mstamp);
+	NET_INC_STATS_BH(sock_net(sk), mib);
 	return tcp_transmit_skb(sk, skb, 0, GFP_ATOMIC);
 }
 
@@ -3407,12 +3408,12 @@ void tcp_send_window_probe(struct sock *sk)
 {
 	if (sk->sk_state == TCP_ESTABLISHED) {
 		tcp_sk(sk)->snd_wl1 = tcp_sk(sk)->rcv_nxt - 1;
-		tcp_xmit_probe_skb(sk, 0);
+		tcp_xmit_probe_skb(sk, 0, LINUX_MIB_TCPWINPROBE);
 	}
 }
 
 /* Initiate keepalive or window probe from timer. */
-int tcp_write_wakeup(struct sock *sk)
+int tcp_write_wakeup(struct sock *sk, int mib)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
@@ -3449,8 +3450,8 @@ int tcp_write_wakeup(struct sock *sk)
 		return err;
 	} else {
 		if (between(tp->snd_up, tp->snd_una + 1, tp->snd_una + 0xFFFF))
-			tcp_xmit_probe_skb(sk, 1);
-		return tcp_xmit_probe_skb(sk, 0);
+			tcp_xmit_probe_skb(sk, 1, mib);
+		return tcp_xmit_probe_skb(sk, 0, mib);
 	}
 }
 
@@ -3464,7 +3465,7 @@ void tcp_send_probe0(struct sock *sk)
 	unsigned long probe_max;
 	int err;
 
-	err = tcp_write_wakeup(sk);
+	err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE);
 
 	if (tp->packets_out || !tcp_send_head(sk)) {
 		/* Cancel probe timer, if it is not required. */
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 8c65dc147d8b..65bf670e8714 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -616,7 +616,7 @@ static void tcp_keepalive_timer (unsigned long data)
 			tcp_write_err(sk);
 			goto out;
 		}
-		if (tcp_write_wakeup(sk) <= 0) {
+		if (tcp_write_wakeup(sk, LINUX_MIB_TCPKEEPALIVE) <= 0) {
 			icsk->icsk_probes_out++;
 			elapsed = keepalive_intvl_when(tp);
 		} else {
-- 
cgit 


From 59324cf35aba5336b611074028777838a963d03b Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 7 May 2015 11:02:53 +0200
Subject: netlink: allow to listen "all" netns

More accurately, listen all netns that have a nsid assigned into the netns
where the netlink socket is opened.
For this purpose, a netlink socket option is added:
NETLINK_LISTEN_ALL_NSID. When this option is set on a netlink socket, this
socket will receive netlink notifications from all netns that have a nsid
assigned into the netns where the socket has been opened. The nsid is sent
to userland via an anscillary data.

With this patch, a daemon needs only one socket to listen many netns. This
is useful when the number of netns is high.

Because 0 is a valid value for a nsid, the field nsid_is_set indicates if
the field nsid is valid or not. skb->cb is initialized to 0 on skb
allocation, thus we are sure that we will never send a nsid 0 by error to
the userland.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h      |  2 ++
 include/net/net_namespace.h  |  2 ++
 include/uapi/linux/netlink.h |  1 +
 net/core/net_namespace.c     | 10 ++++++++-
 net/netlink/af_netlink.c     | 52 +++++++++++++++++++++++++++++++++++++++-----
 5 files changed, 61 insertions(+), 6 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 6835c1279df7..9120edb650a0 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -28,6 +28,8 @@ struct netlink_skb_parms {
 	__u32			dst_group;
 	__u32			flags;
 	struct sock		*sk;
+	bool			nsid_is_set;
+	int			nsid;
 };
 
 #define NETLINK_CB(skb)		(*(struct netlink_skb_parms*)&((skb)->cb))
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 6d1e2eae32fb..3f850acc844e 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -272,6 +272,8 @@ static inline struct net *read_pnet(const possible_net_t *pnet)
 #endif
 
 int peernet2id_alloc(struct net *net, struct net *peer);
+int peernet2id(struct net *net, struct net *peer);
+bool peernet_has_id(struct net *net, struct net *peer);
 struct net *get_net_ns_by_id(struct net *net, int id);
 
 struct pernet_operations {
diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index 1a85940f8ab7..3e34b7d702f8 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -108,6 +108,7 @@ struct nlmsgerr {
 #define NETLINK_NO_ENOBUFS	5
 #define NETLINK_RX_RING		6
 #define NETLINK_TX_RING		7
+#define NETLINK_LISTEN_ALL_NSID	8
 
 struct nl_pktinfo {
 	__u32	group;
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index ae5008b097de..a665bf490c88 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -229,7 +229,7 @@ int peernet2id_alloc(struct net *net, struct net *peer)
 EXPORT_SYMBOL(peernet2id_alloc);
 
 /* This function returns, if assigned, the id of a peer netns. */
-static int peernet2id(struct net *net, struct net *peer)
+int peernet2id(struct net *net, struct net *peer)
 {
 	unsigned long flags;
 	int id;
@@ -240,6 +240,14 @@ static int peernet2id(struct net *net, struct net *peer)
 	return id;
 }
 
+/* This function returns true is the peer netns has an id assigned into the
+ * current netns.
+ */
+bool peernet_has_id(struct net *net, struct net *peer)
+{
+	return peernet2id(net, peer) >= 0;
+}
+
 struct net *get_net_ns_by_id(struct net *net, int id)
 {
 	unsigned long flags;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index bf7f56d7a9aa..a5fff75accf8 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -83,6 +83,7 @@ struct listeners {
 #define NETLINK_F_RECV_PKTINFO		0x2
 #define NETLINK_F_BROADCAST_SEND_ERROR	0x4
 #define NETLINK_F_RECV_NO_ENOBUFS	0x8
+#define NETLINK_F_LISTEN_ALL_NSID	0x10
 
 static inline int netlink_is_kernel(struct sock *sk)
 {
@@ -1932,8 +1933,17 @@ static void do_one_broadcast(struct sock *sk,
 	    !test_bit(p->group - 1, nlk->groups))
 		return;
 
-	if (!net_eq(sock_net(sk), p->net))
-		return;
+	if (!net_eq(sock_net(sk), p->net)) {
+		if (!(nlk->flags & NETLINK_F_LISTEN_ALL_NSID))
+			return;
+
+		if (!peernet_has_id(sock_net(sk), p->net))
+			return;
+
+		if (!file_ns_capable(sk->sk_socket->file, p->net->user_ns,
+				     CAP_NET_BROADCAST))
+			return;
+	}
 
 	if (p->failure) {
 		netlink_overrun(sk);
@@ -1959,13 +1969,22 @@ static void do_one_broadcast(struct sock *sk,
 		p->failure = 1;
 		if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR)
 			p->delivery_failure = 1;
-	} else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
+		goto out;
+	}
+	if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
 		kfree_skb(p->skb2);
 		p->skb2 = NULL;
-	} else if (sk_filter(sk, p->skb2)) {
+		goto out;
+	}
+	if (sk_filter(sk, p->skb2)) {
 		kfree_skb(p->skb2);
 		p->skb2 = NULL;
-	} else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) {
+		goto out;
+	}
+	NETLINK_CB(p->skb2).nsid = peernet2id(sock_net(sk), p->net);
+	NETLINK_CB(p->skb2).nsid_is_set = true;
+	val = netlink_broadcast_deliver(sk, p->skb2);
+	if (val < 0) {
 		netlink_overrun(sk);
 		if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR)
 			p->delivery_failure = 1;
@@ -1974,6 +1993,7 @@ static void do_one_broadcast(struct sock *sk,
 		p->delivered = 1;
 		p->skb2 = NULL;
 	}
+out:
 	sock_put(sk);
 }
 
@@ -2202,6 +2222,16 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
 		break;
 	}
 #endif /* CONFIG_NETLINK_MMAP */
+	case NETLINK_LISTEN_ALL_NSID:
+		if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST))
+			return -EPERM;
+
+		if (val)
+			nlk->flags |= NETLINK_F_LISTEN_ALL_NSID;
+		else
+			nlk->flags &= ~NETLINK_F_LISTEN_ALL_NSID;
+		err = 0;
+		break;
 	default:
 		err = -ENOPROTOOPT;
 	}
@@ -2268,6 +2298,16 @@ static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
 	put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info);
 }
 
+static void netlink_cmsg_listen_all_nsid(struct sock *sk, struct msghdr *msg,
+					 struct sk_buff *skb)
+{
+	if (!NETLINK_CB(skb).nsid_is_set)
+		return;
+
+	put_cmsg(msg, SOL_NETLINK, NETLINK_LISTEN_ALL_NSID, sizeof(int),
+		 &NETLINK_CB(skb).nsid);
+}
+
 static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
 {
 	struct sock *sk = sock->sk;
@@ -2421,6 +2461,8 @@ static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
 
 	if (nlk->flags & NETLINK_F_RECV_PKTINFO)
 		netlink_cmsg_recv_pktinfo(msg, skb);
+	if (nlk->flags & NETLINK_F_LISTEN_ALL_NSID)
+		netlink_cmsg_listen_all_nsid(sk, msg, skb);
 
 	memset(&scm, 0, sizeof(scm));
 	scm.creds = *NETLINK_CREDS(skb);
-- 
cgit 


From 78f5b899195019f71f7593c604d75ca61658eae3 Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@herbertland.com>
Date: Thu, 7 May 2015 08:08:51 -0700
Subject: mpls: Change reserved label names to be consistent with netbsd

Since these are now visible to userspace it is nice to be consistent
with BSD (sys/netmpls/mpls.h in netBSD).

Signed-off-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/mpls.h | 12 ++++++------
 net/mpls/af_mpls.c        | 18 +++++++++---------
 2 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/mpls.h b/include/uapi/linux/mpls.h
index 0fe6ea5a41d5..139d4dd1cab8 100644
--- a/include/uapi/linux/mpls.h
+++ b/include/uapi/linux/mpls.h
@@ -32,13 +32,13 @@ struct mpls_label {
 #define MPLS_LS_TTL_SHIFT       0
 
 /* Reserved labels */
-#define MPLS_LABEL_IPV4_EXPLICIT_NULL	0 /* RFC3032 */
-#define MPLS_LABEL_ROUTER_ALERT		1 /* RFC3032 */
-#define MPLS_LABEL_IPV6_EXPLICIT_NULL	2 /* RFC3032 */
-#define MPLS_LABEL_IMPLICIT_NULL	3 /* RFC3032 */
-#define MPLS_LABEL_ENTROPY_INDICATOR	7 /* RFC6790 */
+#define MPLS_LABEL_IPV4NULL		0 /* RFC3032 */
+#define MPLS_LABEL_RTALERT		1 /* RFC3032 */
+#define MPLS_LABEL_IPV6NULL		2 /* RFC3032 */
+#define MPLS_LABEL_IMPLNULL		3 /* RFC3032 */
+#define MPLS_LABEL_ENTROPY		7 /* RFC6790 */
 #define MPLS_LABEL_GAL			13 /* RFC5586 */
-#define MPLS_LABEL_OAM_ALERT		14 /* RFC3429 */
+#define MPLS_LABEL_OAMALERT		14 /* RFC3429 */
 #define MPLS_LABEL_EXTENSION		15 /* RFC7274 */
 
 #endif /* _UAPI_MPLS_H */
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index b6eb7615960a..7b3f732269e4 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -647,7 +647,7 @@ int nla_get_labels(const struct nlattr *nla,
 			return -EINVAL;
 
 		switch (dec.label) {
-		case MPLS_LABEL_IMPLICIT_NULL:
+		case MPLS_LABEL_IMPLNULL:
 			/* RFC3032: This is a label that an LSR may
 			 * assign and distribute, but which never
 			 * actually appears in the encapsulation.
@@ -935,7 +935,7 @@ static int resize_platform_label_table(struct net *net, size_t limit)
 	}
 
 	/* In case the predefined labels need to be populated */
-	if (limit > MPLS_LABEL_IPV4_EXPLICIT_NULL) {
+	if (limit > MPLS_LABEL_IPV4NULL) {
 		struct net_device *lo = net->loopback_dev;
 		rt0 = mpls_rt_alloc(lo->addr_len);
 		if (!rt0)
@@ -945,7 +945,7 @@ static int resize_platform_label_table(struct net *net, size_t limit)
 		rt0->rt_via_table = NEIGH_LINK_TABLE;
 		memcpy(rt0->rt_via, lo->dev_addr, lo->addr_len);
 	}
-	if (limit > MPLS_LABEL_IPV6_EXPLICIT_NULL) {
+	if (limit > MPLS_LABEL_IPV6NULL) {
 		struct net_device *lo = net->loopback_dev;
 		rt2 = mpls_rt_alloc(lo->addr_len);
 		if (!rt2)
@@ -973,15 +973,15 @@ static int resize_platform_label_table(struct net *net, size_t limit)
 	memcpy(labels, old, cp_size);
 
 	/* If needed set the predefined labels */
-	if ((old_limit <= MPLS_LABEL_IPV6_EXPLICIT_NULL) &&
-	    (limit > MPLS_LABEL_IPV6_EXPLICIT_NULL)) {
-		RCU_INIT_POINTER(labels[MPLS_LABEL_IPV6_EXPLICIT_NULL], rt2);
+	if ((old_limit <= MPLS_LABEL_IPV6NULL) &&
+	    (limit > MPLS_LABEL_IPV6NULL)) {
+		RCU_INIT_POINTER(labels[MPLS_LABEL_IPV6NULL], rt2);
 		rt2 = NULL;
 	}
 
-	if ((old_limit <= MPLS_LABEL_IPV4_EXPLICIT_NULL) &&
-	    (limit > MPLS_LABEL_IPV4_EXPLICIT_NULL)) {
-		RCU_INIT_POINTER(labels[MPLS_LABEL_IPV4_EXPLICIT_NULL], rt0);
+	if ((old_limit <= MPLS_LABEL_IPV4NULL) &&
+	    (limit > MPLS_LABEL_IPV4NULL)) {
+		RCU_INIT_POINTER(labels[MPLS_LABEL_IPV4NULL], rt0);
 		rt0 = NULL;
 	}
 
-- 
cgit 


From 80ba92fa1a92dea128283f69f55b02242e213650 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 8 May 2015 15:05:12 -0700
Subject: codel: add ce_threshold attribute

For DCTCP or similar ECN based deployments on fabrics with shallow
buffers, hosts are responsible for a good part of the buffering.

This patch adds an optional ce_threshold to codel & fq_codel qdiscs,
so that DCTCP can have feedback from queuing in the host.

A DCTCP enabled egress port simply have a queue occupancy threshold
above which ECT packets get CE mark.

In codel language this translates to a sojourn time, so that one doesn't
have to worry about bytes or bandwidth but delays.

This makes the host an active participant in the health of the whole
network.

This also helps experimenting DCTCP in a setup without DCTCP compliant
fabric.

On following example, ce_threshold is set to 1ms, and we can see from
'ldelay xxx us' that TCP is not trying to go around the 5ms codel
target.

Queue has more capacity to absorb inelastic bursts (say from UDP
traffic), as queues are maintained to an optimal level.

lpaa23:~# ./tc -s -d qd sh dev eth1
qdisc mq 1: dev eth1 root
 Sent 87910654696 bytes 58065331 pkt (dropped 0, overlimits 0 requeues 42961)
 backlog 3108242b 364p requeues 42961
qdisc codel 8063: dev eth1 parent 1:1 limit 1000p target 5.0ms ce_threshold 1.0ms interval 100.0ms
 Sent 7363778701 bytes 4863809 pkt (dropped 0, overlimits 0 requeues 5503)
 rate 2348Mbit 193919pps backlog 255866b 46p requeues 5503
  count 0 lastcount 0 ldelay 1.0ms drop_next 0us
  maxpacket 68130 ecn_mark 0 drop_overlimit 0 ce_mark 72384
qdisc codel 8064: dev eth1 parent 1:2 limit 1000p target 5.0ms ce_threshold 1.0ms interval 100.0ms
 Sent 7636486190 bytes 5043942 pkt (dropped 0, overlimits 0 requeues 5186)
 rate 2319Mbit 191538pps backlog 207418b 64p requeues 5186
  count 0 lastcount 0 ldelay 694us drop_next 0us
  maxpacket 68130 ecn_mark 0 drop_overlimit 0 ce_mark 69873
qdisc codel 8065: dev eth1 parent 1:3 limit 1000p target 5.0ms ce_threshold 1.0ms interval 100.0ms
 Sent 11569360142 bytes 7641602 pkt (dropped 0, overlimits 0 requeues 5554)
 rate 3041Mbit 251096pps backlog 210446b 59p requeues 5554
  count 0 lastcount 0 ldelay 889us drop_next 0us
  maxpacket 68130 ecn_mark 0 drop_overlimit 0 ce_mark 37780
...

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Florian Westphal <fw@strlen.de>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Glenn Judd <glenn.judd@morganstanley.com>
Cc: Nandita Dukkipati <nanditad@google.com>
Cc: Neal Cardwell <ncardwell@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/codel.h            | 12 +++++++++++-
 include/uapi/linux/pkt_sched.h |  4 ++++
 net/sched/sch_codel.c          | 15 +++++++++++++--
 net/sched/sch_fq_codel.c       | 15 ++++++++++++++-
 4 files changed, 42 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/codel.h b/include/net/codel.h
index aeee28081245..8c0f78f209e8 100644
--- a/include/net/codel.h
+++ b/include/net/codel.h
@@ -7,7 +7,7 @@
  *  Copyright (C) 2011-2012 Kathleen Nichols <nichols@pollere.com>
  *  Copyright (C) 2011-2012 Van Jacobson <van@pollere.net>
  *  Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net>
- *  Copyright (C) 2012 Eric Dumazet <edumazet@google.com>
+ *  Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -119,11 +119,13 @@ static inline u32 codel_time_to_us(codel_time_t val)
 /**
  * struct codel_params - contains codel parameters
  * @target:	target queue size (in time units)
+ * @ce_threshold:  threshold for marking packets with ECN CE
  * @interval:	width of moving time window
  * @ecn:	is Explicit Congestion Notification enabled
  */
 struct codel_params {
 	codel_time_t	target;
+	codel_time_t	ce_threshold;
 	codel_time_t	interval;
 	bool		ecn;
 };
@@ -159,17 +161,22 @@ struct codel_vars {
  * @maxpacket:	largest packet we've seen so far
  * @drop_count:	temp count of dropped packets in dequeue()
  * ecn_mark:	number of packets we ECN marked instead of dropping
+ * ce_mark:	number of packets CE marked because sojourn time was above ce_threshold
  */
 struct codel_stats {
 	u32		maxpacket;
 	u32		drop_count;
 	u32		ecn_mark;
+	u32		ce_mark;
 };
 
+#define CODEL_DISABLED_THRESHOLD INT_MAX
+
 static void codel_params_init(struct codel_params *params)
 {
 	params->interval = MS2TIME(100);
 	params->target = MS2TIME(5);
+	params->ce_threshold = CODEL_DISABLED_THRESHOLD;
 	params->ecn = false;
 }
 
@@ -350,6 +357,9 @@ static struct sk_buff *codel_dequeue(struct Qdisc *sch,
 						    vars->rec_inv_sqrt);
 	}
 end:
+	if (skb && codel_time_after(vars->ldelay, params->ce_threshold) &&
+	    INET_ECN_set_ce(skb))
+		stats->ce_mark++;
 	return skb;
 }
 #endif
diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 534b84710745..69d88b309cc7 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -679,6 +679,7 @@ enum {
 	TCA_CODEL_LIMIT,
 	TCA_CODEL_INTERVAL,
 	TCA_CODEL_ECN,
+	TCA_CODEL_CE_THRESHOLD,
 	__TCA_CODEL_MAX
 };
 
@@ -695,6 +696,7 @@ struct tc_codel_xstats {
 	__u32	drop_overlimit; /* number of time max qdisc packet limit was hit */
 	__u32	ecn_mark;  /* number of packets we ECN marked instead of dropped */
 	__u32	dropping;  /* are we in dropping state ? */
+	__u32	ce_mark;   /* number of CE marked packets because of ce_threshold */
 };
 
 /* FQ_CODEL */
@@ -707,6 +709,7 @@ enum {
 	TCA_FQ_CODEL_ECN,
 	TCA_FQ_CODEL_FLOWS,
 	TCA_FQ_CODEL_QUANTUM,
+	TCA_FQ_CODEL_CE_THRESHOLD,
 	__TCA_FQ_CODEL_MAX
 };
 
@@ -730,6 +733,7 @@ struct tc_fq_codel_qd_stats {
 				 */
 	__u32	new_flows_len;	/* count of flows in new list */
 	__u32	old_flows_len;	/* count of flows in old list */
+	__u32	ce_mark;	/* packets above ce_threshold */
 };
 
 struct tc_fq_codel_cl_stats {
diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c
index de28f8e968e8..1474b6560fac 100644
--- a/net/sched/sch_codel.c
+++ b/net/sched/sch_codel.c
@@ -6,7 +6,7 @@
  *
  *  Implemented on linux by :
  *  Copyright (C) 2012 Michael D. Taht <dave.taht@bufferbloat.net>
- *  Copyright (C) 2012 Eric Dumazet <edumazet@google.com>
+ *  Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com>
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -109,6 +109,7 @@ static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = {
 	[TCA_CODEL_LIMIT]	= { .type = NLA_U32 },
 	[TCA_CODEL_INTERVAL]	= { .type = NLA_U32 },
 	[TCA_CODEL_ECN]		= { .type = NLA_U32 },
+	[TCA_CODEL_CE_THRESHOLD]= { .type = NLA_U32 },
 };
 
 static int codel_change(struct Qdisc *sch, struct nlattr *opt)
@@ -133,6 +134,12 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt)
 		q->params.target = ((u64)target * NSEC_PER_USEC) >> CODEL_SHIFT;
 	}
 
+	if (tb[TCA_CODEL_CE_THRESHOLD]) {
+		u64 val = nla_get_u32(tb[TCA_CODEL_CE_THRESHOLD]);
+
+		q->params.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT;
+	}
+
 	if (tb[TCA_CODEL_INTERVAL]) {
 		u32 interval = nla_get_u32(tb[TCA_CODEL_INTERVAL]);
 
@@ -201,7 +208,10 @@ static int codel_dump(struct Qdisc *sch, struct sk_buff *skb)
 	    nla_put_u32(skb, TCA_CODEL_ECN,
 			q->params.ecn))
 		goto nla_put_failure;
-
+	if (q->params.ce_threshold != CODEL_DISABLED_THRESHOLD &&
+	    nla_put_u32(skb, TCA_CODEL_CE_THRESHOLD,
+			codel_time_to_us(q->params.ce_threshold)))
+		goto nla_put_failure;
 	return nla_nest_end(skb, opts);
 
 nla_put_failure:
@@ -220,6 +230,7 @@ static int codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 		.ldelay		= codel_time_to_us(q->vars.ldelay),
 		.dropping	= q->vars.dropping,
 		.ecn_mark	= q->stats.ecn_mark,
+		.ce_mark	= q->stats.ce_mark,
 	};
 
 	if (q->vars.dropping) {
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index a6fc53d69513..778739786b32 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -6,7 +6,7 @@
  *	as published by the Free Software Foundation; either version
  *	2 of the License, or (at your option) any later version.
  *
- *  Copyright (C) 2012 Eric Dumazet <edumazet@google.com>
+ *  Copyright (C) 2012,2015 Eric Dumazet <edumazet@google.com>
  */
 
 #include <linux/module.h>
@@ -292,6 +292,7 @@ static const struct nla_policy fq_codel_policy[TCA_FQ_CODEL_MAX + 1] = {
 	[TCA_FQ_CODEL_ECN]	= { .type = NLA_U32 },
 	[TCA_FQ_CODEL_FLOWS]	= { .type = NLA_U32 },
 	[TCA_FQ_CODEL_QUANTUM]	= { .type = NLA_U32 },
+	[TCA_FQ_CODEL_CE_THRESHOLD] = { .type = NLA_U32 },
 };
 
 static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
@@ -322,6 +323,12 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt)
 		q->cparams.target = (target * NSEC_PER_USEC) >> CODEL_SHIFT;
 	}
 
+	if (tb[TCA_FQ_CODEL_CE_THRESHOLD]) {
+		u64 val = nla_get_u32(tb[TCA_FQ_CODEL_CE_THRESHOLD]);
+
+		q->cparams.ce_threshold = (val * NSEC_PER_USEC) >> CODEL_SHIFT;
+	}
+
 	if (tb[TCA_FQ_CODEL_INTERVAL]) {
 		u64 interval = nla_get_u32(tb[TCA_FQ_CODEL_INTERVAL]);
 
@@ -441,6 +448,11 @@ static int fq_codel_dump(struct Qdisc *sch, struct sk_buff *skb)
 			q->flows_cnt))
 		goto nla_put_failure;
 
+	if (q->cparams.ce_threshold != CODEL_DISABLED_THRESHOLD &&
+	    nla_put_u32(skb, TCA_FQ_CODEL_CE_THRESHOLD,
+			codel_time_to_us(q->cparams.ce_threshold)))
+		goto nla_put_failure;
+
 	return nla_nest_end(skb, opts);
 
 nla_put_failure:
@@ -459,6 +471,7 @@ static int fq_codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 	st.qdisc_stats.drop_overlimit = q->drop_overlimit;
 	st.qdisc_stats.ecn_mark = q->cstats.ecn_mark;
 	st.qdisc_stats.new_flow_count = q->new_flow_count;
+	st.qdisc_stats.ce_mark = q->cstats.ce_mark;
 
 	list_for_each(pos, &q->new_flows)
 		st.qdisc_stats.new_flows_len++;
-- 
cgit 


From 171a42c38c6e1a5a076d6276e94e55a0b5b7868c Mon Sep 17 00:00:00 2001
From: Andy Gospodarek <gospo@cumulusnetworks.com>
Date: Sat, 9 May 2015 00:01:58 -0700
Subject: bonding: add netlink support for sys prio, actor sys mac, and port
 key

Adds netlink support for the following bonding options:
* BOND_OPT_AD_ACTOR_SYS_PRIO
* BOND_OPT_AD_ACTOR_SYSTEM
* BOND_OPT_AD_USER_PORT_KEY

When setting the actor system mac address we assume the netlink message
contains a binary mac and not a string representation of a mac.

Signed-off-by: Andy Gospodarek <gospo@cumulusnetworks.com>
[jt: completed the setting side of the netlink attributes]
Signed-off-by: Jonathan Toppins <jtoppins@cumulusnetworks.com>
Signed-off-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_netlink.c | 50 ++++++++++++++++++++++++++++++++++++++
 drivers/net/bonding/bond_options.c | 30 ++++++++++++++++-------
 include/uapi/linux/if_link.h       |  3 +++
 3 files changed, 74 insertions(+), 9 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
index 7b1124366011..f7015eb4f8db 100644
--- a/drivers/net/bonding/bond_netlink.c
+++ b/drivers/net/bonding/bond_netlink.c
@@ -94,6 +94,10 @@ static const struct nla_policy bond_policy[IFLA_BOND_MAX + 1] = {
 	[IFLA_BOND_AD_LACP_RATE]	= { .type = NLA_U8 },
 	[IFLA_BOND_AD_SELECT]		= { .type = NLA_U8 },
 	[IFLA_BOND_AD_INFO]		= { .type = NLA_NESTED },
+	[IFLA_BOND_AD_ACTOR_SYS_PRIO]	= { .type = NLA_U16 },
+	[IFLA_BOND_AD_USER_PORT_KEY]	= { .type = NLA_U16 },
+	[IFLA_BOND_AD_ACTOR_SYSTEM]	= { .type = NLA_BINARY,
+					    .len  = ETH_ALEN },
 };
 
 static const struct nla_policy bond_slave_policy[IFLA_BOND_SLAVE_MAX + 1] = {
@@ -379,6 +383,36 @@ static int bond_changelink(struct net_device *bond_dev,
 		if (err)
 			return err;
 	}
+	if (data[IFLA_BOND_AD_ACTOR_SYS_PRIO]) {
+		int actor_sys_prio =
+			nla_get_u16(data[IFLA_BOND_AD_ACTOR_SYS_PRIO]);
+
+		bond_opt_initval(&newval, actor_sys_prio);
+		err = __bond_opt_set(bond, BOND_OPT_AD_ACTOR_SYS_PRIO, &newval);
+		if (err)
+			return err;
+	}
+
+	if (data[IFLA_BOND_AD_USER_PORT_KEY]) {
+		int port_key =
+			nla_get_u16(data[IFLA_BOND_AD_USER_PORT_KEY]);
+
+		bond_opt_initval(&newval, port_key);
+		err = __bond_opt_set(bond, BOND_OPT_AD_USER_PORT_KEY, &newval);
+		if (err)
+			return err;
+	}
+
+	if (data[IFLA_BOND_AD_ACTOR_SYSTEM]) {
+		if (nla_len(data[IFLA_BOND_AD_ACTOR_SYSTEM]) != ETH_ALEN)
+			return -EINVAL;
+
+		bond_opt_initval(&newval,
+				 nla_get_be64(data[IFLA_BOND_AD_ACTOR_SYSTEM]));
+		err = __bond_opt_set(bond, BOND_OPT_AD_ACTOR_SYSTEM, &newval);
+		if (err)
+			return err;
+	}
 	return 0;
 }
 
@@ -426,6 +460,9 @@ static size_t bond_get_size(const struct net_device *bond_dev)
 		nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_INFO_ACTOR_KEY */
 		nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_INFO_PARTNER_KEY*/
 		nla_total_size(ETH_ALEN) +    /* IFLA_BOND_AD_INFO_PARTNER_MAC*/
+		nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_ACTOR_SYS_PRIO */
+		nla_total_size(sizeof(u16)) + /* IFLA_BOND_AD_USER_PORT_KEY */
+		nla_total_size(ETH_ALEN) + /* IFLA_BOND_AD_ACTOR_SYSTEM */
 		0;
 }
 
@@ -551,6 +588,19 @@ static int bond_fill_info(struct sk_buff *skb,
 	if (BOND_MODE(bond) == BOND_MODE_8023AD) {
 		struct ad_info info;
 
+		if (nla_put_u16(skb, IFLA_BOND_AD_ACTOR_SYS_PRIO,
+				bond->params.ad_actor_sys_prio))
+			goto nla_put_failure;
+
+		if (nla_put_u16(skb, IFLA_BOND_AD_USER_PORT_KEY,
+				bond->params.ad_user_port_key))
+			goto nla_put_failure;
+
+		if (nla_put(skb, IFLA_BOND_AD_ACTOR_SYSTEM,
+			    sizeof(bond->params.ad_actor_system),
+			    &bond->params.ad_actor_system))
+			goto nla_put_failure;
+
 		if (!bond_3ad_get_active_agg_info(bond, &info)) {
 			struct nlattr *nest;
 
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index c85da05721e6..9a32bbd7724e 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -1394,7 +1394,7 @@ static int bond_option_tlb_dynamic_lb_set(struct bonding *bond,
 static int bond_option_ad_actor_sys_prio_set(struct bonding *bond,
 					     const struct bond_opt_value *newval)
 {
-	netdev_info(bond->dev, "Setting ad_actor_sys_prio to (%llu)\n",
+	netdev_info(bond->dev, "Setting ad_actor_sys_prio to %llu\n",
 		    newval->value);
 
 	bond->params.ad_actor_sys_prio = newval->value;
@@ -1405,24 +1405,36 @@ static int bond_option_ad_actor_system_set(struct bonding *bond,
 					   const struct bond_opt_value *newval)
 {
 	u8 macaddr[ETH_ALEN];
+	u8 *mac;
 	int i;
 
-	i = sscanf(newval->string, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
-		   &macaddr[0], &macaddr[1], &macaddr[2],
-		   &macaddr[3], &macaddr[4], &macaddr[5]);
-	if (i != ETH_ALEN || !is_valid_ether_addr(macaddr)) {
-		netdev_err(bond->dev, "Invalid MAC address.\n");
-		return -EINVAL;
+	if (newval->string) {
+		i = sscanf(newval->string, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
+			   &macaddr[0], &macaddr[1], &macaddr[2],
+			   &macaddr[3], &macaddr[4], &macaddr[5]);
+		if (i != ETH_ALEN)
+			goto err;
+		mac = macaddr;
+	} else {
+		mac = (u8 *)&newval->value;
 	}
 
-	ether_addr_copy(bond->params.ad_actor_system, macaddr);
+	if (!is_valid_ether_addr(mac))
+		goto err;
+
+	netdev_info(bond->dev, "Setting ad_actor_system to %pM\n", mac);
+	ether_addr_copy(bond->params.ad_actor_system, mac);
 	return 0;
+
+err:
+	netdev_err(bond->dev, "Invalid MAC address.\n");
+	return -EINVAL;
 }
 
 static int bond_option_ad_user_port_key_set(struct bonding *bond,
 					    const struct bond_opt_value *newval)
 {
-	netdev_info(bond->dev, "Setting ad_user_port_key to (%llu)\n",
+	netdev_info(bond->dev, "Setting ad_user_port_key to %llu\n",
 		    newval->value);
 
 	bond->params.ad_user_port_key = newval->value;
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index d9cd19214b98..6d6e502e1051 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -417,6 +417,9 @@ enum {
 	IFLA_BOND_AD_LACP_RATE,
 	IFLA_BOND_AD_SELECT,
 	IFLA_BOND_AD_INFO,
+	IFLA_BOND_AD_ACTOR_SYS_PRIO,
+	IFLA_BOND_AD_USER_PORT_KEY,
+	IFLA_BOND_AD_ACTOR_SYSTEM,
 	__IFLA_BOND_MAX,
 };
 
-- 
cgit 


From dc199241624a2fd85d9b0d8303babd60feadd0e6 Mon Sep 17 00:00:00 2001
From: Peter Seiderer <ps.report@gmx.net>
Date: Mon, 4 May 2015 07:51:05 -0300
Subject: [media] videodev2: Add V4L2_BUF_FLAG_LAST

This v4l2_buffer flag can be used by drivers to mark a capture buffer
as the last generated buffer, for example after a V4L2_DEC_CMD_STOP
command was issued.

Signed-off-by: Peter Seiderer <ps.report@gmx.net>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Acked-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Kamil Debski <k.debski@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/trace/events/v4l2.h    | 3 ++-
 include/uapi/linux/videodev2.h | 2 ++
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/trace/events/v4l2.h b/include/trace/events/v4l2.h
index 20112170ff11..89d0497c058a 100644
--- a/include/trace/events/v4l2.h
+++ b/include/trace/events/v4l2.h
@@ -83,7 +83,8 @@ SHOW_FIELD
 		{ V4L2_BUF_FLAG_TIMESTAMP_MASK,	     "TIMESTAMP_MASK" },      \
 		{ V4L2_BUF_FLAG_TIMESTAMP_UNKNOWN,   "TIMESTAMP_UNKNOWN" },   \
 		{ V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC, "TIMESTAMP_MONOTONIC" }, \
-		{ V4L2_BUF_FLAG_TIMESTAMP_COPY,	     "TIMESTAMP_COPY" })
+		{ V4L2_BUF_FLAG_TIMESTAMP_COPY,	     "TIMESTAMP_COPY" },      \
+		{ V4L2_BUF_FLAG_LAST,                "LAST" })
 
 #define show_timecode_flags(flags)					  \
 	__print_flags(flags, "|",					  \
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index fa376f7666ba..0f5a4673f3e4 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -810,6 +810,8 @@ struct v4l2_buffer {
 #define V4L2_BUF_FLAG_TSTAMP_SRC_MASK		0x00070000
 #define V4L2_BUF_FLAG_TSTAMP_SRC_EOF		0x00000000
 #define V4L2_BUF_FLAG_TSTAMP_SRC_SOE		0x00010000
+/* mem2mem encoder/decoder */
+#define V4L2_BUF_FLAG_LAST			0x00100000
 
 /**
  * struct v4l2_exportbuffer - export of video buffer as DMABUF file descriptor
-- 
cgit 


From 68cbbc3a9d1fc231810b2490bca73b3b444ef542 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gwshan@linux.vnet.ibm.com>
Date: Thu, 26 Mar 2015 16:42:09 +1100
Subject: drivers/vfio: Support EEH error injection

The patch adds one more EEH sub-command (VFIO_EEH_PE_INJECT_ERR)
to inject the specified EEH error, which is represented by
(struct vfio_eeh_pe_err), to the indicated PE for testing purpose.

Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Acked-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 Documentation/vfio.txt        | 12 ++++++++++++
 drivers/vfio/vfio_spapr_eeh.c | 10 ++++++++++
 include/uapi/linux/vfio.h     | 14 +++++++++++++-
 3 files changed, 35 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
index 96978eced341..4c746a7e717a 100644
--- a/Documentation/vfio.txt
+++ b/Documentation/vfio.txt
@@ -385,6 +385,18 @@ The code flow from the example above should be slightly changed:
 
 	....
 
+	/* Inject EEH error, which is expected to be caused by 32-bits
+	 * config load.
+	 */
+	pe_op.op = VFIO_EEH_PE_INJECT_ERR;
+	pe_op.err.type = EEH_ERR_TYPE_32;
+	pe_op.err.func = EEH_ERR_FUNC_LD_CFG_ADDR;
+	pe_op.err.addr = 0ul;
+	pe_op.err.mask = 0ul;
+	ioctl(container, VFIO_EEH_PE_OP, &pe_op);
+
+	....
+
 	/* When 0xFF's returned from reading PCI config space or IO BARs
 	 * of the PCI device. Check the PE's state to see if that has been
 	 * frozen.
diff --git a/drivers/vfio/vfio_spapr_eeh.c b/drivers/vfio/vfio_spapr_eeh.c
index 5fa42db769ee..38edeb4729a9 100644
--- a/drivers/vfio/vfio_spapr_eeh.c
+++ b/drivers/vfio/vfio_spapr_eeh.c
@@ -85,6 +85,16 @@ long vfio_spapr_iommu_eeh_ioctl(struct iommu_group *group,
 		case VFIO_EEH_PE_CONFIGURE:
 			ret = eeh_pe_configure(pe);
 			break;
+		case VFIO_EEH_PE_INJECT_ERR:
+			minsz = offsetofend(struct vfio_eeh_pe_op, err.mask);
+			if (op.argsz < minsz)
+				return -EINVAL;
+			if (copy_from_user(&op, (void __user *)arg, minsz))
+				return -EFAULT;
+
+			ret = eeh_pe_inject_err(pe, op.err.type, op.err.func,
+						op.err.addr, op.err.mask);
+			break;
 		default:
 			ret = -EINVAL;
 		}
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index b57b750c222f..e4fa1995f613 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -470,12 +470,23 @@ struct vfio_iommu_spapr_tce_info {
  * - unfreeze IO/DMA for frozen PE;
  * - read PE state;
  * - reset PE;
- * - configure PE.
+ * - configure PE;
+ * - inject EEH error.
  */
+struct vfio_eeh_pe_err {
+	__u32 type;
+	__u32 func;
+	__u64 addr;
+	__u64 mask;
+};
+
 struct vfio_eeh_pe_op {
 	__u32 argsz;
 	__u32 flags;
 	__u32 op;
+	union {
+		struct vfio_eeh_pe_err err;
+	};
 };
 
 #define VFIO_EEH_PE_DISABLE		0	/* Disable EEH functionality */
@@ -492,6 +503,7 @@ struct vfio_eeh_pe_op {
 #define VFIO_EEH_PE_RESET_HOT		6	/* Assert hot reset          */
 #define VFIO_EEH_PE_RESET_FUNDAMENTAL	7	/* Assert fundamental reset  */
 #define VFIO_EEH_PE_CONFIGURE		8	/* PE configuration          */
+#define VFIO_EEH_PE_INJECT_ERR		9	/* Inject EEH error          */
 
 #define VFIO_EEH_PE_OP			_IO(VFIO_TYPE, VFIO_BASE + 21)
 
-- 
cgit 


From a3eb95f891d6130b1fc03dd07a8b54cf0a5c8ab8 Mon Sep 17 00:00:00 2001
From: David Ward <david.ward@ll.mit.edu>
Date: Sat, 9 May 2015 22:01:46 -0400
Subject: net_sched: gred: add TCA_GRED_LIMIT attribute

In a GRED qdisc, if the default "virtual queue" (VQ) does not have drop
parameters configured, then packets for the default VQ are not subjected
to RED and are only dropped if the queue is larger than the net_device's
tx_queue_len. This behavior is useful for WRED mode, since these packets
will still influence the calculated average queue length and (therefore)
the drop probability for all of the other VQs. However, for some drivers
tx_queue_len is zero. In other cases the user may wish to make the limit
the same for all VQs (including the default VQ with no drop parameters).

This change adds a TCA_GRED_LIMIT attribute to set the GRED queue limit,
in bytes, during qdisc setup. (This limit is in bytes to be consistent
with the drop parameters.) The default limit is the same as for a bfifo
queue (tx_queue_len * psched_mtu). If the drop parameters of any VQ are
configured with a smaller limit than the GRED queue limit, that VQ will
still observe the smaller limit instead.

Signed-off-by: David Ward <david.ward@ll.mit.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_sched.h |  3 ++-
 net/sched/sch_gred.c           | 28 ++++++++++++++++++++++++----
 2 files changed, 26 insertions(+), 5 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 69d88b309cc7..8d2530daca9f 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -268,7 +268,8 @@ enum {
        TCA_GRED_STAB,
        TCA_GRED_DPS,
        TCA_GRED_MAX_P,
-	   __TCA_GRED_MAX,
+       TCA_GRED_LIMIT,
+       __TCA_GRED_MAX,
 };
 
 #define TCA_GRED_MAX (__TCA_GRED_MAX - 1)
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index a4ca4517cdc8..826e2994152b 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -165,7 +165,8 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 			 * if no default DP has been configured. This
 			 * allows for DP flows to be left untouched.
 			 */
-			if (skb_queue_len(&sch->q) < qdisc_dev(sch)->tx_queue_len)
+			if (likely(sch->qstats.backlog + qdisc_pkt_len(skb) <=
+					sch->limit))
 				return qdisc_enqueue_tail(skb, sch);
 			else
 				goto drop;
@@ -397,7 +398,10 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp,
 
 	q->DP = dp;
 	q->prio = prio;
-	q->limit = ctl->limit;
+	if (ctl->limit > sch->limit)
+		q->limit = sch->limit;
+	else
+		q->limit = ctl->limit;
 
 	if (q->backlog == 0)
 		red_end_of_idle_period(&q->vars);
@@ -414,6 +418,7 @@ static const struct nla_policy gred_policy[TCA_GRED_MAX + 1] = {
 	[TCA_GRED_STAB]		= { .len = 256 },
 	[TCA_GRED_DPS]		= { .len = sizeof(struct tc_gred_sopt) },
 	[TCA_GRED_MAX_P]	= { .type = NLA_U32 },
+	[TCA_GRED_LIMIT]	= { .type = NLA_U32 },
 };
 
 static int gred_change(struct Qdisc *sch, struct nlattr *opt)
@@ -433,11 +438,15 @@ static int gred_change(struct Qdisc *sch, struct nlattr *opt)
 	if (err < 0)
 		return err;
 
-	if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL)
+	if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) {
+		if (tb[TCA_GRED_LIMIT] != NULL)
+			sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]);
 		return gred_change_table_def(sch, opt);
+	}
 
 	if (tb[TCA_GRED_PARMS] == NULL ||
-	    tb[TCA_GRED_STAB] == NULL)
+	    tb[TCA_GRED_STAB] == NULL ||
+	    tb[TCA_GRED_LIMIT] != NULL)
 		return -EINVAL;
 
 	max_P = tb[TCA_GRED_MAX_P] ? nla_get_u32(tb[TCA_GRED_MAX_P]) : 0;
@@ -501,6 +510,14 @@ static int gred_init(struct Qdisc *sch, struct nlattr *opt)
 	if (tb[TCA_GRED_PARMS] || tb[TCA_GRED_STAB])
 		return -EINVAL;
 
+	if (tb[TCA_GRED_LIMIT])
+		sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]);
+	else {
+		u32 qlen = qdisc_dev(sch)->tx_queue_len ? : 1;
+
+		sch->limit = qlen * psched_mtu(qdisc_dev(sch));
+	}
+
 	return gred_change_table_def(sch, tb[TCA_GRED_DPS]);
 }
 
@@ -531,6 +548,9 @@ static int gred_dump(struct Qdisc *sch, struct sk_buff *skb)
 	if (nla_put(skb, TCA_GRED_MAX_P, sizeof(max_p), max_p))
 		goto nla_put_failure;
 
+	if (nla_put_u32(skb, TCA_GRED_LIMIT, sch->limit))
+		goto nla_put_failure;
+
 	parms = nla_nest_start(skb, TCA_GRED_PARMS);
 	if (parms == NULL)
 		goto nla_put_failure;
-- 
cgit 


From e578d9c02587d57bfa7b560767c698a668a468c6 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 11 May 2015 19:50:41 +0200
Subject: net: sched: use counter to break reclassify loops

Seems all we want here is to avoid endless 'goto reclassify' loop.
tc_classify_compat even resets this counter when something other
than TC_ACT_RECLASSIFY is returned, so this skb-counter doesn't
break hypothetical loops induced by something other than perpetual
TC_ACT_RECLASSIFY return values.

skb_act_clone is now identical to skb_clone, so just use that.

Tested with following (bogus) filter:
tc filter add dev eth0 parent ffff: \
 protocol ip u32 match u32 0 0 police rate 10Kbit burst \
 64000 mtu 1500 action reclassify

Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Florian Westphal <fw@strlen.de>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/tc-actions-env-rules.txt |  4 ----
 include/net/sch_generic.h                         | 15 ---------------
 include/uapi/linux/pkt_cls.h                      |  2 +-
 net/sched/act_mirred.c                            |  2 +-
 net/sched/sch_api.c                               | 12 +++---------
 5 files changed, 5 insertions(+), 30 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/networking/tc-actions-env-rules.txt b/Documentation/networking/tc-actions-env-rules.txt
index 95c71716b2e2..f37814693ad3 100644
--- a/Documentation/networking/tc-actions-env-rules.txt
+++ b/Documentation/networking/tc-actions-env-rules.txt
@@ -8,10 +8,6 @@ For example if your action queues a packet to be processed later,
 or intentionally branches by redirecting a packet, then you need to
 clone the packet.
 
-There are certain fields in the skb tc_verd that need to be reset so we
-avoid loops, etc.  A few are generic enough that skb_act_clone()
-resets them for you, so invoke skb_act_clone() rather than skb_clone().
-
 2) If you munge any packet thou shalt call pskb_expand_head in the case
 someone else is referencing the skb. After that you "own" the skb.
 
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 1b0a2e88ed2b..2738f6f87908 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -739,21 +739,6 @@ static inline u32 qdisc_l2t(struct qdisc_rate_table* rtab, unsigned int pktlen)
 	return rtab->data[slot];
 }
 
-#ifdef CONFIG_NET_CLS_ACT
-static inline struct sk_buff *skb_act_clone(struct sk_buff *skb, gfp_t gfp_mask,
-					    int action)
-{
-	struct sk_buff *n;
-
-	n = skb_clone(skb, gfp_mask);
-
-	if (n) {
-		n->tc_verd = SET_TC_VERD(n->tc_verd, 0);
-	}
-	return n;
-}
-#endif
-
 struct psched_ratecfg {
 	u64	rate_bytes_ps; /* bytes per second */
 	u32	mult;
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 596ffa0c7084..ffc112c8e1c2 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -44,13 +44,13 @@ bits 9,10,11: redirect counter -  redirect TTL. Loop avoidance
 #define TC_OK2MUNGE        _TC_MAKEMASK1(1)
 #define SET_TC_OK2MUNGE(v)   ( TC_OK2MUNGE | (v & ~TC_OK2MUNGE))
 #define CLR_TC_OK2MUNGE(v)   ( v & ~TC_OK2MUNGE)
-#endif
 
 #define S_TC_VERD          _TC_MAKE32(2)
 #define M_TC_VERD          _TC_MAKEMASK(4,S_TC_VERD)
 #define G_TC_VERD(x)       _TC_GETVALUE(x,S_TC_VERD,M_TC_VERD)
 #define V_TC_VERD(x)       _TC_MAKEVALUE(x,S_TC_VERD)
 #define SET_TC_VERD(v,n)   ((V_TC_VERD(n)) | (v & ~M_TC_VERD))
+#endif
 
 #define S_TC_FROM          _TC_MAKE32(6)
 #define M_TC_FROM          _TC_MAKEMASK(2,S_TC_FROM)
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 3f63ceac8e01..a42a3b257226 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -151,7 +151,7 @@ static int tcf_mirred(struct sk_buff *skb, const struct tc_action *a,
 	}
 
 	at = G_TC_AT(skb->tc_verd);
-	skb2 = skb_act_clone(skb, GFP_ATOMIC, m->tcf_action);
+	skb2 = skb_clone(skb, GFP_ATOMIC);
 	if (skb2 == NULL)
 		goto out;
 
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index ad9eed70bc8f..0b74dc0ede9c 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1816,13 +1816,8 @@ int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp,
 			continue;
 		err = tp->classify(skb, tp, res);
 
-		if (err >= 0) {
-#ifdef CONFIG_NET_CLS_ACT
-			if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
-				skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
-#endif
+		if (err >= 0)
 			return err;
-		}
 	}
 	return -1;
 }
@@ -1834,23 +1829,22 @@ int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 	int err = 0;
 #ifdef CONFIG_NET_CLS_ACT
 	const struct tcf_proto *otp = tp;
+	int limit = 0;
 reclassify:
 #endif
 
 	err = tc_classify_compat(skb, tp, res);
 #ifdef CONFIG_NET_CLS_ACT
 	if (err == TC_ACT_RECLASSIFY) {
-		u32 verd = G_TC_VERD(skb->tc_verd);
 		tp = otp;
 
-		if (verd++ >= MAX_REC_LOOP) {
+		if (unlikely(limit++ >= MAX_REC_LOOP)) {
 			net_notice_ratelimited("%s: packet reclassify loop rule prio %u protocol %02x\n",
 					       tp->q->ops->id,
 					       tp->prio & 0xffff,
 					       ntohs(tp->protocol));
 			return TC_ACT_SHOT;
 		}
-		skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
 		goto reclassify;
 	}
 #endif
-- 
cgit 


From 77b9900ef53ae047e36a37d13a2aa33bb2d60641 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Tue, 12 May 2015 14:56:21 +0200
Subject: tc: introduce Flower classifier

This patch introduces a flow-based filter. So far, the very essential
packet fields are supported.

This patch is only the first step. There is a lot of potential performance
improvements possible to implement. Also a lot of features are missing
now. They will be addressed in follow-up patches.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_cls.h |  30 ++
 net/sched/Kconfig            |  10 +
 net/sched/Makefile           |   1 +
 net/sched/cls_flower.c       | 688 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 729 insertions(+)
 create mode 100644 net/sched/cls_flower.c

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index ffc112c8e1c2..39fb53d67b11 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -409,6 +409,36 @@ enum {
 
 #define TCA_BPF_MAX (__TCA_BPF_MAX - 1)
 
+/* Flower classifier */
+
+enum {
+	TCA_FLOWER_UNSPEC,
+	TCA_FLOWER_CLASSID,
+	TCA_FLOWER_INDEV,
+	TCA_FLOWER_ACT,
+	TCA_FLOWER_KEY_ETH_DST,		/* ETH_ALEN */
+	TCA_FLOWER_KEY_ETH_DST_MASK,	/* ETH_ALEN */
+	TCA_FLOWER_KEY_ETH_SRC,		/* ETH_ALEN */
+	TCA_FLOWER_KEY_ETH_SRC_MASK,	/* ETH_ALEN */
+	TCA_FLOWER_KEY_ETH_TYPE,	/* be16 */
+	TCA_FLOWER_KEY_IP_PROTO,	/* u8 */
+	TCA_FLOWER_KEY_IPV4_SRC,	/* be32 */
+	TCA_FLOWER_KEY_IPV4_SRC_MASK,	/* be32 */
+	TCA_FLOWER_KEY_IPV4_DST,	/* be32 */
+	TCA_FLOWER_KEY_IPV4_DST_MASK,	/* be32 */
+	TCA_FLOWER_KEY_IPV6_SRC,	/* struct in6_addr */
+	TCA_FLOWER_KEY_IPV6_SRC_MASK,	/* struct in6_addr */
+	TCA_FLOWER_KEY_IPV6_DST,	/* struct in6_addr */
+	TCA_FLOWER_KEY_IPV6_DST_MASK,	/* struct in6_addr */
+	TCA_FLOWER_KEY_TCP_SRC,		/* be16 */
+	TCA_FLOWER_KEY_TCP_DST,		/* be16 */
+	TCA_FLOWER_KEY_UDP_SRC,		/* be16 */
+	TCA_FLOWER_KEY_UDP_DST,		/* be16 */
+	__TCA_FLOWER_MAX,
+};
+
+#define TCA_FLOWER_MAX (__TCA_FLOWER_MAX - 1)
+
 /* Extended Matches */
 
 struct tcf_ematch_tree_hdr {
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 2274e723a3df..5fd1c2f487d2 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -477,6 +477,16 @@ config NET_CLS_BPF
 	  To compile this code as a module, choose M here: the module will
 	  be called cls_bpf.
 
+config NET_CLS_FLOWER
+	tristate "Flower classifier"
+	select NET_CLS
+	---help---
+	  If you say Y here, you will be able to classify packets based on
+	  a configurable combination of packet keys and masks.
+
+	  To compile this code as a module, choose M here: the module will
+	  be called cls_flower.
+
 config NET_EMATCH
 	bool "Extended Matches"
 	select NET_CLS
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 7ca7f4c1b8c2..690c1689e090 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -56,6 +56,7 @@ obj-$(CONFIG_NET_CLS_BASIC)	+= cls_basic.o
 obj-$(CONFIG_NET_CLS_FLOW)	+= cls_flow.o
 obj-$(CONFIG_NET_CLS_CGROUP)	+= cls_cgroup.o
 obj-$(CONFIG_NET_CLS_BPF)	+= cls_bpf.o
+obj-$(CONFIG_NET_CLS_FLOWER)	+= cls_flower.o
 obj-$(CONFIG_NET_EMATCH)	+= ematch.o
 obj-$(CONFIG_NET_EMATCH_CMP)	+= em_cmp.o
 obj-$(CONFIG_NET_EMATCH_NBYTE)	+= em_nbyte.o
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
new file mode 100644
index 000000000000..9bc654c764cd
--- /dev/null
+++ b/net/sched/cls_flower.c
@@ -0,0 +1,688 @@
+/*
+ * net/sched/cls_flower.c		Flower classifier
+ *
+ * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/rhashtable.h>
+
+#include <linux/if_ether.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+
+#include <net/sch_generic.h>
+#include <net/pkt_cls.h>
+#include <net/ip.h>
+#include <net/flow_dissector.h>
+
+struct fl_flow_key {
+	int	indev_ifindex;
+	struct flow_dissector_key_basic basic;
+	struct flow_dissector_key_eth_addrs eth;
+	union {
+		struct flow_dissector_key_addrs ipv4;
+		struct flow_dissector_key_ipv6_addrs ipv6;
+	};
+	struct flow_dissector_key_ports tp;
+} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */
+
+struct fl_flow_mask_range {
+	unsigned short int start;
+	unsigned short int end;
+};
+
+struct fl_flow_mask {
+	struct fl_flow_key key;
+	struct fl_flow_mask_range range;
+	struct rcu_head	rcu;
+};
+
+struct cls_fl_head {
+	struct rhashtable ht;
+	struct fl_flow_mask mask;
+	struct flow_dissector dissector;
+	u32 hgen;
+	bool mask_assigned;
+	struct list_head filters;
+	struct rhashtable_params ht_params;
+	struct rcu_head rcu;
+};
+
+struct cls_fl_filter {
+	struct rhash_head ht_node;
+	struct fl_flow_key mkey;
+	struct tcf_exts exts;
+	struct tcf_result res;
+	struct fl_flow_key key;
+	struct list_head list;
+	u32 handle;
+	struct rcu_head	rcu;
+};
+
+static unsigned short int fl_mask_range(const struct fl_flow_mask *mask)
+{
+	return mask->range.end - mask->range.start;
+}
+
+static void fl_mask_update_range(struct fl_flow_mask *mask)
+{
+	const u8 *bytes = (const u8 *) &mask->key;
+	size_t size = sizeof(mask->key);
+	size_t i, first = 0, last = size - 1;
+
+	for (i = 0; i < sizeof(mask->key); i++) {
+		if (bytes[i]) {
+			if (!first && i)
+				first = i;
+			last = i;
+		}
+	}
+	mask->range.start = rounddown(first, sizeof(long));
+	mask->range.end = roundup(last + 1, sizeof(long));
+}
+
+static void *fl_key_get_start(struct fl_flow_key *key,
+			      const struct fl_flow_mask *mask)
+{
+	return (u8 *) key + mask->range.start;
+}
+
+static void fl_set_masked_key(struct fl_flow_key *mkey, struct fl_flow_key *key,
+			      struct fl_flow_mask *mask)
+{
+	const long *lkey = fl_key_get_start(key, mask);
+	const long *lmask = fl_key_get_start(&mask->key, mask);
+	long *lmkey = fl_key_get_start(mkey, mask);
+	int i;
+
+	for (i = 0; i < fl_mask_range(mask); i += sizeof(long))
+		*lmkey++ = *lkey++ & *lmask++;
+}
+
+static void fl_clear_masked_range(struct fl_flow_key *key,
+				  struct fl_flow_mask *mask)
+{
+	memset(fl_key_get_start(key, mask), 0, fl_mask_range(mask));
+}
+
+static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
+		       struct tcf_result *res)
+{
+	struct cls_fl_head *head = rcu_dereference_bh(tp->root);
+	struct cls_fl_filter *f;
+	struct fl_flow_key skb_key;
+	struct fl_flow_key skb_mkey;
+
+	fl_clear_masked_range(&skb_key, &head->mask);
+	skb_key.indev_ifindex = skb->skb_iif;
+	/* skb_flow_dissect() does not set n_proto in case an unknown protocol,
+	 * so do it rather here.
+	 */
+	skb_key.basic.n_proto = skb->protocol;
+	skb_flow_dissect(skb, &head->dissector, &skb_key);
+
+	fl_set_masked_key(&skb_mkey, &skb_key, &head->mask);
+
+	f = rhashtable_lookup_fast(&head->ht,
+				   fl_key_get_start(&skb_mkey, &head->mask),
+				   head->ht_params);
+	if (f) {
+		*res = f->res;
+		return tcf_exts_exec(skb, &f->exts, res);
+	}
+	return -1;
+}
+
+static int fl_init(struct tcf_proto *tp)
+{
+	struct cls_fl_head *head;
+
+	head = kzalloc(sizeof(*head), GFP_KERNEL);
+	if (!head)
+		return -ENOBUFS;
+
+	INIT_LIST_HEAD_RCU(&head->filters);
+	rcu_assign_pointer(tp->root, head);
+
+	return 0;
+}
+
+static void fl_destroy_filter(struct rcu_head *head)
+{
+	struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu);
+
+	tcf_exts_destroy(&f->exts);
+	kfree(f);
+}
+
+static bool fl_destroy(struct tcf_proto *tp, bool force)
+{
+	struct cls_fl_head *head = rtnl_dereference(tp->root);
+	struct cls_fl_filter *f, *next;
+
+	if (!force && !list_empty(&head->filters))
+		return false;
+
+	list_for_each_entry_safe(f, next, &head->filters, list) {
+		list_del_rcu(&f->list);
+		call_rcu(&f->rcu, fl_destroy_filter);
+	}
+	RCU_INIT_POINTER(tp->root, NULL);
+	if (head->mask_assigned)
+		rhashtable_destroy(&head->ht);
+	kfree_rcu(head, rcu);
+	return true;
+}
+
+static unsigned long fl_get(struct tcf_proto *tp, u32 handle)
+{
+	struct cls_fl_head *head = rtnl_dereference(tp->root);
+	struct cls_fl_filter *f;
+
+	list_for_each_entry(f, &head->filters, list)
+		if (f->handle == handle)
+			return (unsigned long) f;
+	return 0;
+}
+
+static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
+	[TCA_FLOWER_UNSPEC]		= { .type = NLA_UNSPEC },
+	[TCA_FLOWER_CLASSID]		= { .type = NLA_U32 },
+	[TCA_FLOWER_INDEV]		= { .type = NLA_STRING,
+					    .len = IFNAMSIZ },
+	[TCA_FLOWER_KEY_ETH_DST]	= { .len = ETH_ALEN },
+	[TCA_FLOWER_KEY_ETH_DST_MASK]	= { .len = ETH_ALEN },
+	[TCA_FLOWER_KEY_ETH_SRC]	= { .len = ETH_ALEN },
+	[TCA_FLOWER_KEY_ETH_SRC_MASK]	= { .len = ETH_ALEN },
+	[TCA_FLOWER_KEY_ETH_TYPE]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_IP_PROTO]	= { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_IPV4_SRC]	= { .type = NLA_U32 },
+	[TCA_FLOWER_KEY_IPV4_SRC_MASK]	= { .type = NLA_U32 },
+	[TCA_FLOWER_KEY_IPV4_DST]	= { .type = NLA_U32 },
+	[TCA_FLOWER_KEY_IPV4_DST_MASK]	= { .type = NLA_U32 },
+	[TCA_FLOWER_KEY_IPV6_SRC]	= { .len = sizeof(struct in6_addr) },
+	[TCA_FLOWER_KEY_IPV6_SRC_MASK]	= { .len = sizeof(struct in6_addr) },
+	[TCA_FLOWER_KEY_IPV6_DST]	= { .len = sizeof(struct in6_addr) },
+	[TCA_FLOWER_KEY_IPV6_DST_MASK]	= { .len = sizeof(struct in6_addr) },
+	[TCA_FLOWER_KEY_TCP_SRC]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_TCP_DST]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_TCP_SRC]	= { .type = NLA_U16 },
+	[TCA_FLOWER_KEY_TCP_DST]	= { .type = NLA_U16 },
+};
+
+static void fl_set_key_val(struct nlattr **tb,
+			   void *val, int val_type,
+			   void *mask, int mask_type, int len)
+{
+	if (!tb[val_type])
+		return;
+	memcpy(val, nla_data(tb[val_type]), len);
+	if (mask_type == TCA_FLOWER_UNSPEC || !tb[mask_type])
+		memset(mask, 0xff, len);
+	else
+		memcpy(mask, nla_data(tb[mask_type]), len);
+}
+
+static int fl_set_key(struct net *net, struct nlattr **tb,
+		      struct fl_flow_key *key, struct fl_flow_key *mask)
+{
+	int err;
+
+	if (tb[TCA_FLOWER_INDEV]) {
+		err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]);
+		if (err < 0)
+			return err;
+		key->indev_ifindex = err;
+		mask->indev_ifindex = 0xffffffff;
+	}
+
+	fl_set_key_val(tb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
+		       mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
+		       sizeof(key->eth.dst));
+	fl_set_key_val(tb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
+		       mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
+		       sizeof(key->eth.src));
+	fl_set_key_val(tb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
+		       &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
+		       sizeof(key->basic.n_proto));
+	if (key->basic.n_proto == htons(ETH_P_IP) ||
+	    key->basic.n_proto == htons(ETH_P_IPV6)) {
+		fl_set_key_val(tb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
+			       &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
+			       sizeof(key->basic.ip_proto));
+	}
+	if (key->basic.n_proto == htons(ETH_P_IP)) {
+		fl_set_key_val(tb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
+			       &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
+			       sizeof(key->ipv4.src));
+		fl_set_key_val(tb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
+			       &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
+			       sizeof(key->ipv4.dst));
+	} else if (key->basic.n_proto == htons(ETH_P_IPV6)) {
+		fl_set_key_val(tb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
+			       &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
+			       sizeof(key->ipv6.src));
+		fl_set_key_val(tb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
+			       &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
+			       sizeof(key->ipv6.dst));
+	}
+	if (key->basic.ip_proto == IPPROTO_TCP) {
+		fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
+			       &mask->tp.src, TCA_FLOWER_UNSPEC,
+			       sizeof(key->tp.src));
+		fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
+			       &mask->tp.dst, TCA_FLOWER_UNSPEC,
+			       sizeof(key->tp.dst));
+	} else if (key->basic.ip_proto == IPPROTO_UDP) {
+		fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
+			       &mask->tp.src, TCA_FLOWER_UNSPEC,
+			       sizeof(key->tp.src));
+		fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
+			       &mask->tp.dst, TCA_FLOWER_UNSPEC,
+			       sizeof(key->tp.dst));
+	}
+
+	return 0;
+}
+
+static bool fl_mask_eq(struct fl_flow_mask *mask1,
+		       struct fl_flow_mask *mask2)
+{
+	const long *lmask1 = fl_key_get_start(&mask1->key, mask1);
+	const long *lmask2 = fl_key_get_start(&mask2->key, mask2);
+
+	return !memcmp(&mask1->range, &mask2->range, sizeof(mask1->range)) &&
+	       !memcmp(lmask1, lmask2, fl_mask_range(mask1));
+}
+
+static const struct rhashtable_params fl_ht_params = {
+	.key_offset = offsetof(struct cls_fl_filter, mkey), /* base offset */
+	.head_offset = offsetof(struct cls_fl_filter, ht_node),
+	.automatic_shrinking = true,
+};
+
+static int fl_init_hashtable(struct cls_fl_head *head,
+			     struct fl_flow_mask *mask)
+{
+	head->ht_params = fl_ht_params;
+	head->ht_params.key_len = fl_mask_range(mask);
+	head->ht_params.key_offset += mask->range.start;
+
+	return rhashtable_init(&head->ht, &head->ht_params);
+}
+
+#define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member)
+#define FL_KEY_MEMBER_SIZE(member) (sizeof(((struct fl_flow_key *) 0)->member))
+#define FL_KEY_MEMBER_END_OFFSET(member)					\
+	(FL_KEY_MEMBER_OFFSET(member) + FL_KEY_MEMBER_SIZE(member))
+
+#define FL_KEY_IN_RANGE(mask, member)						\
+        (FL_KEY_MEMBER_OFFSET(member) <= (mask)->range.end &&			\
+         FL_KEY_MEMBER_END_OFFSET(member) >= (mask)->range.start)
+
+#define FL_KEY_SET(keys, cnt, id, member)					\
+	do {									\
+		keys[cnt].key_id = id;						\
+		keys[cnt].offset = FL_KEY_MEMBER_OFFSET(member);		\
+		cnt++;								\
+	} while(0);
+
+#define FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, id, member)			\
+	do {									\
+		if (FL_KEY_IN_RANGE(mask, member))				\
+			FL_KEY_SET(keys, cnt, id, member);			\
+	} while(0);
+
+static void fl_init_dissector(struct cls_fl_head *head,
+			      struct fl_flow_mask *mask)
+{
+	struct flow_dissector_key keys[FLOW_DISSECTOR_KEY_MAX];
+	size_t cnt = 0;
+
+	FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic);
+	FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
+			       FLOW_DISSECTOR_KEY_ETH_ADDRS, eth);
+	FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
+			       FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4);
+	FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
+			       FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6);
+	FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt,
+			       FLOW_DISSECTOR_KEY_PORTS, tp);
+
+	skb_flow_dissector_init(&head->dissector, keys, cnt);
+}
+
+static int fl_check_assign_mask(struct cls_fl_head *head,
+				struct fl_flow_mask *mask)
+{
+	int err;
+
+	if (head->mask_assigned) {
+		if (!fl_mask_eq(&head->mask, mask))
+			return -EINVAL;
+		else
+			return 0;
+	}
+
+	/* Mask is not assigned yet. So assign it and init hashtable
+	 * according to that.
+	 */
+	err = fl_init_hashtable(head, mask);
+	if (err)
+		return err;
+	memcpy(&head->mask, mask, sizeof(head->mask));
+	head->mask_assigned = true;
+
+	fl_init_dissector(head, mask);
+
+	return 0;
+}
+
+static int fl_set_parms(struct net *net, struct tcf_proto *tp,
+			struct cls_fl_filter *f, struct fl_flow_mask *mask,
+			unsigned long base, struct nlattr **tb,
+			struct nlattr *est, bool ovr)
+{
+	struct tcf_exts e;
+	int err;
+
+	tcf_exts_init(&e, TCA_FLOWER_ACT, 0);
+	err = tcf_exts_validate(net, tp, tb, est, &e, ovr);
+	if (err < 0)
+		return err;
+
+	if (tb[TCA_FLOWER_CLASSID]) {
+		f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]);
+		tcf_bind_filter(tp, &f->res, base);
+	}
+
+	err = fl_set_key(net, tb, &f->key, &mask->key);
+	if (err)
+		goto errout;
+
+	fl_mask_update_range(mask);
+	fl_set_masked_key(&f->mkey, &f->key, mask);
+
+	tcf_exts_change(tp, &f->exts, &e);
+
+	return 0;
+errout:
+	tcf_exts_destroy(&e);
+	return err;
+}
+
+static u32 fl_grab_new_handle(struct tcf_proto *tp,
+			      struct cls_fl_head *head)
+{
+	unsigned int i = 0x80000000;
+	u32 handle;
+
+	do {
+		if (++head->hgen == 0x7FFFFFFF)
+			head->hgen = 1;
+	} while (--i > 0 && fl_get(tp, head->hgen));
+
+	if (unlikely(i == 0)) {
+		pr_err("Insufficient number of handles\n");
+		handle = 0;
+	} else {
+		handle = head->hgen;
+	}
+
+	return handle;
+}
+
+static int fl_change(struct net *net, struct sk_buff *in_skb,
+		     struct tcf_proto *tp, unsigned long base,
+		     u32 handle, struct nlattr **tca,
+		     unsigned long *arg, bool ovr)
+{
+	struct cls_fl_head *head = rtnl_dereference(tp->root);
+	struct cls_fl_filter *fold = (struct cls_fl_filter *) *arg;
+	struct cls_fl_filter *fnew;
+	struct nlattr *tb[TCA_FLOWER_MAX + 1];
+	struct fl_flow_mask mask = {};
+	int err;
+
+	if (!tca[TCA_OPTIONS])
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_FLOWER_MAX, tca[TCA_OPTIONS], fl_policy);
+	if (err < 0)
+		return err;
+
+	if (fold && handle && fold->handle != handle)
+		return -EINVAL;
+
+	fnew = kzalloc(sizeof(*fnew), GFP_KERNEL);
+	if (!fnew)
+		return -ENOBUFS;
+
+	tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0);
+
+	if (!handle) {
+		handle = fl_grab_new_handle(tp, head);
+		if (!handle) {
+			err = -EINVAL;
+			goto errout;
+		}
+	}
+	fnew->handle = handle;
+
+	err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr);
+	if (err)
+		goto errout;
+
+	err = fl_check_assign_mask(head, &mask);
+	if (err)
+		goto errout;
+
+	err = rhashtable_insert_fast(&head->ht, &fnew->ht_node,
+				     head->ht_params);
+	if (err)
+		goto errout;
+	if (fold)
+		rhashtable_remove_fast(&head->ht, &fold->ht_node,
+				       head->ht_params);
+
+	*arg = (unsigned long) fnew;
+
+	if (fold) {
+		list_replace_rcu(&fnew->list, &fold->list);
+		tcf_unbind_filter(tp, &fold->res);
+		call_rcu(&fold->rcu, fl_destroy_filter);
+	} else {
+		list_add_tail_rcu(&fnew->list, &head->filters);
+	}
+
+	return 0;
+
+errout:
+	kfree(fnew);
+	return err;
+}
+
+static int fl_delete(struct tcf_proto *tp, unsigned long arg)
+{
+	struct cls_fl_head *head = rtnl_dereference(tp->root);
+	struct cls_fl_filter *f = (struct cls_fl_filter *) arg;
+
+	rhashtable_remove_fast(&head->ht, &f->ht_node,
+			       head->ht_params);
+	list_del_rcu(&f->list);
+	tcf_unbind_filter(tp, &f->res);
+	call_rcu(&f->rcu, fl_destroy_filter);
+	return 0;
+}
+
+static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg)
+{
+	struct cls_fl_head *head = rtnl_dereference(tp->root);
+	struct cls_fl_filter *f;
+
+	list_for_each_entry_rcu(f, &head->filters, list) {
+		if (arg->count < arg->skip)
+			goto skip;
+		if (arg->fn(tp, (unsigned long) f, arg) < 0) {
+			arg->stop = 1;
+			break;
+		}
+skip:
+		arg->count++;
+	}
+}
+
+static int fl_dump_key_val(struct sk_buff *skb,
+			   void *val, int val_type,
+			   void *mask, int mask_type, int len)
+{
+	int err;
+
+	if (!memchr_inv(mask, 0, len))
+		return 0;
+	err = nla_put(skb, val_type, len, val);
+	if (err)
+		return err;
+	if (mask_type != TCA_FLOWER_UNSPEC) {
+		err = nla_put(skb, mask_type, len, mask);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
+		   struct sk_buff *skb, struct tcmsg *t)
+{
+	struct cls_fl_head *head = rtnl_dereference(tp->root);
+	struct cls_fl_filter *f = (struct cls_fl_filter *) fh;
+	struct nlattr *nest;
+	struct fl_flow_key *key, *mask;
+
+	if (!f)
+		return skb->len;
+
+	t->tcm_handle = f->handle;
+
+	nest = nla_nest_start(skb, TCA_OPTIONS);
+	if (!nest)
+		goto nla_put_failure;
+
+	if (f->res.classid &&
+	    nla_put_u32(skb, TCA_FLOWER_CLASSID, f->res.classid))
+		goto nla_put_failure;
+
+	key = &f->key;
+	mask = &head->mask.key;
+
+	if (mask->indev_ifindex) {
+		struct net_device *dev;
+
+		dev = __dev_get_by_index(net, key->indev_ifindex);
+		if (dev && nla_put_string(skb, TCA_FLOWER_INDEV, dev->name))
+			goto nla_put_failure;
+	}
+
+	if (fl_dump_key_val(skb, key->eth.dst, TCA_FLOWER_KEY_ETH_DST,
+			    mask->eth.dst, TCA_FLOWER_KEY_ETH_DST_MASK,
+			    sizeof(key->eth.dst)) ||
+	    fl_dump_key_val(skb, key->eth.src, TCA_FLOWER_KEY_ETH_SRC,
+			    mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK,
+			    sizeof(key->eth.src)) ||
+	    fl_dump_key_val(skb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE,
+			    &mask->basic.n_proto, TCA_FLOWER_UNSPEC,
+			    sizeof(key->basic.n_proto)))
+		goto nla_put_failure;
+	if ((key->basic.n_proto == htons(ETH_P_IP) ||
+	     key->basic.n_proto == htons(ETH_P_IPV6)) &&
+	    fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO,
+			    &mask->basic.ip_proto, TCA_FLOWER_UNSPEC,
+			    sizeof(key->basic.ip_proto)))
+		goto nla_put_failure;
+
+	if (key->basic.n_proto == htons(ETH_P_IP) &&
+	    (fl_dump_key_val(skb, &key->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC,
+			     &mask->ipv4.src, TCA_FLOWER_KEY_IPV4_SRC_MASK,
+			     sizeof(key->ipv4.src)) ||
+	     fl_dump_key_val(skb, &key->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST,
+			     &mask->ipv4.dst, TCA_FLOWER_KEY_IPV4_DST_MASK,
+			     sizeof(key->ipv4.dst))))
+		goto nla_put_failure;
+	else if (key->basic.n_proto == htons(ETH_P_IPV6) &&
+		 (fl_dump_key_val(skb, &key->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC,
+				  &mask->ipv6.src, TCA_FLOWER_KEY_IPV6_SRC_MASK,
+				  sizeof(key->ipv6.src)) ||
+		  fl_dump_key_val(skb, &key->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST,
+				  &mask->ipv6.dst, TCA_FLOWER_KEY_IPV6_DST_MASK,
+				  sizeof(key->ipv6.dst))))
+		goto nla_put_failure;
+
+	if (key->basic.ip_proto == IPPROTO_TCP &&
+	    (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC,
+			     &mask->tp.src, TCA_FLOWER_UNSPEC,
+			     sizeof(key->tp.src)) ||
+	     fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST,
+			     &mask->tp.dst, TCA_FLOWER_UNSPEC,
+			     sizeof(key->tp.dst))))
+		goto nla_put_failure;
+	else if (key->basic.ip_proto == IPPROTO_UDP &&
+		 (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC,
+				  &mask->tp.src, TCA_FLOWER_UNSPEC,
+				  sizeof(key->tp.src)) ||
+		  fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST,
+				  &mask->tp.dst, TCA_FLOWER_UNSPEC,
+				  sizeof(key->tp.dst))))
+		goto nla_put_failure;
+
+	if (tcf_exts_dump(skb, &f->exts))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest);
+
+	if (tcf_exts_dump_stats(skb, &f->exts) < 0)
+		goto nla_put_failure;
+
+	return skb->len;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -1;
+}
+
+static struct tcf_proto_ops cls_fl_ops __read_mostly = {
+	.kind		= "flower",
+	.classify	= fl_classify,
+	.init		= fl_init,
+	.destroy	= fl_destroy,
+	.get		= fl_get,
+	.change		= fl_change,
+	.delete		= fl_delete,
+	.walk		= fl_walk,
+	.dump		= fl_dump,
+	.owner		= THIS_MODULE,
+};
+
+static int __init cls_fl_init(void)
+{
+	return register_tcf_proto_ops(&cls_fl_ops);
+}
+
+static void __exit cls_fl_exit(void)
+{
+	unregister_tcf_proto_ops(&cls_fl_ops);
+}
+
+module_init(cls_fl_init);
+module_exit(cls_fl_exit);
+
+MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>");
+MODULE_DESCRIPTION("Flower classifier");
+MODULE_LICENSE("GPL v2");
-- 
cgit 


From a9b6391814d5d6b8668fca2dace86949b7244e2e Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Tue, 12 May 2015 11:56:50 -0400
Subject: packet: rollover statistics

Rollover indicates exceptional conditions. Export a counter to inform
socket owners of this state.

If no socket with sufficient room is found, rollover fails. Also count
these events.

Finally, also count when flows are rolled over early thanks to huge
flow detection, to validate its correctness.

Tested:
  Read counters in bench_rollover on all other tests in the patchset

Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_packet.h |  7 +++++++
 net/packet/af_packet.c         | 19 ++++++++++++++++++-
 net/packet/internal.h          |  3 +++
 3 files changed, 28 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h
index 053bd102fbe0..d3d715f8c88f 100644
--- a/include/uapi/linux/if_packet.h
+++ b/include/uapi/linux/if_packet.h
@@ -54,6 +54,7 @@ struct sockaddr_ll {
 #define PACKET_FANOUT			18
 #define PACKET_TX_HAS_OFF		19
 #define PACKET_QDISC_BYPASS		20
+#define PACKET_ROLLOVER_STATS		21
 
 #define PACKET_FANOUT_HASH		0
 #define PACKET_FANOUT_LB		1
@@ -75,6 +76,12 @@ struct tpacket_stats_v3 {
 	unsigned int	tp_freeze_q_cnt;
 };
 
+struct tpacket_rollover_stats {
+	__aligned_u64	tp_all;
+	__aligned_u64	tp_huge;
+	__aligned_u64	tp_failed;
+};
+
 union tpacket_stats_u {
 	struct tpacket_stats stats1;
 	struct tpacket_stats_v3 stats3;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8f0156b10f8d..31d58565726c 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1395,7 +1395,7 @@ static unsigned int fanout_demux_rollover(struct packet_fanout *f,
 					  unsigned int num)
 {
 	struct packet_sock *po, *po_next;
-	unsigned int i, j, room;
+	unsigned int i, j, room = ROOM_NONE;
 
 	po = pkt_sk(f->arr[idx]);
 
@@ -1413,6 +1413,9 @@ static unsigned int fanout_demux_rollover(struct packet_fanout *f,
 		    packet_rcv_has_room(po_next, skb) == ROOM_NORMAL) {
 			if (i != j)
 				po->rollover->sock = i;
+			atomic_long_inc(&po->rollover->num);
+			if (room == ROOM_LOW)
+				atomic_long_inc(&po->rollover->num_huge);
 			return i;
 		}
 
@@ -1420,6 +1423,7 @@ static unsigned int fanout_demux_rollover(struct packet_fanout *f,
 			i = 0;
 	} while (i != j);
 
+	atomic_long_inc(&po->rollover->num_failed);
 	return idx;
 }
 
@@ -1554,6 +1558,9 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 		po->rollover = kzalloc(sizeof(*po->rollover), GFP_KERNEL);
 		if (!po->rollover)
 			return -ENOMEM;
+		atomic_long_set(&po->rollover->num, 0);
+		atomic_long_set(&po->rollover->num_huge, 0);
+		atomic_long_set(&po->rollover->num_failed, 0);
 	}
 
 	mutex_lock(&fanout_mutex);
@@ -3584,6 +3591,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 	struct packet_sock *po = pkt_sk(sk);
 	void *data = &val;
 	union tpacket_stats_u st;
+	struct tpacket_rollover_stats rstats;
 
 	if (level != SOL_PACKET)
 		return -ENOPROTOOPT;
@@ -3659,6 +3667,15 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 			((u32)po->fanout->flags << 24)) :
 		       0);
 		break;
+	case PACKET_ROLLOVER_STATS:
+		if (!po->rollover)
+			return -EINVAL;
+		rstats.tp_all = atomic_long_read(&po->rollover->num);
+		rstats.tp_huge = atomic_long_read(&po->rollover->num_huge);
+		rstats.tp_failed = atomic_long_read(&po->rollover->num_failed);
+		data = &rstats;
+		lv = sizeof(rstats);
+		break;
 	case PACKET_TX_HAS_OFF:
 		val = po->tp_tx_has_off;
 		break;
diff --git a/net/packet/internal.h b/net/packet/internal.h
index a9d30a17c714..c035d263c1e8 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -89,6 +89,9 @@ struct packet_fanout {
 
 struct packet_rollover {
 	int			sock;
+	atomic_long_t		num;
+	atomic_long_t		num_huge;
+	atomic_long_t		num_failed;
 #define ROLLOVER_HLEN	(L1_CACHE_BYTES / sizeof(u32))
 	u32			history[ROLLOVER_HLEN] ____cacheline_aligned;
 } ____cacheline_aligned_in_smp;
-- 
cgit 


From 2d07dc79fe04a43d82a346ced6bbf07bdb523f1b Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Wed, 13 May 2015 12:57:30 -0400
Subject: geneve: add initial netdev driver for GENEVE tunnels

This is an initial implementation of a netdev driver for GENEVE
tunnels.  This implementation uses a fixed UDP port, and only supports
point-to-point links with specific partner endpoints.  Only IPv4
links are supported at this time.

Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/Kconfig          |  14 ++
 drivers/net/Makefile         |   1 +
 drivers/net/geneve.c         | 503 +++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/if_link.h |   9 +
 4 files changed, 527 insertions(+)
 create mode 100644 drivers/net/geneve.c

(limited to 'include/uapi/linux')

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index df51d6025a90..019fceffc9e5 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -179,6 +179,20 @@ config VXLAN
 	  To compile this driver as a module, choose M here: the module
 	  will be called vxlan.
 
+config GENEVE
+       tristate "Generic Network Virtualization Encapsulation netdev"
+       depends on INET && GENEVE_CORE
+       select NET_IP_TUNNEL
+       ---help---
+	  This allows one to create geneve virtual interfaces that provide
+	  Layer 2 Networks over Layer 3 Networks. GENEVE is often used
+	  to tunnel virtual network infrastructure in virtualized environments.
+	  For more information see:
+	    http://tools.ietf.org/html/draft-gross-geneve-02
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called geneve.
+
 config NETCONSOLE
 	tristate "Network console logging support"
 	---help---
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index e25fdd7d905e..c12cb22478a7 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_TUN) += tun.o
 obj-$(CONFIG_VETH) += veth.o
 obj-$(CONFIG_VIRTIO_NET) += virtio_net.o
 obj-$(CONFIG_VXLAN) += vxlan.o
+obj-$(CONFIG_GENEVE) += geneve.o
 obj-$(CONFIG_NLMON) += nlmon.o
 
 #
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
new file mode 100644
index 000000000000..b7eafa4c1a67
--- /dev/null
+++ b/drivers/net/geneve.c
@@ -0,0 +1,503 @@
+/*
+ * GENEVE: Generic Network Virtualization Encapsulation
+ *
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/hash.h>
+#include <net/rtnetlink.h>
+#include <net/geneve.h>
+
+#define GENEVE_NETDEV_VER	"0.6"
+
+#define GENEVE_UDP_PORT		6081
+
+#define GENEVE_N_VID		(1u << 24)
+#define GENEVE_VID_MASK		(GENEVE_N_VID - 1)
+
+#define VNI_HASH_BITS		10
+#define VNI_HASH_SIZE		(1<<VNI_HASH_BITS)
+
+static bool log_ecn_error = true;
+module_param(log_ecn_error, bool, 0644);
+MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
+
+/* per-network namespace private data for this module */
+struct geneve_net {
+	struct list_head  geneve_list;
+	struct hlist_head vni_list[VNI_HASH_SIZE];
+};
+
+/* Pseudo network device */
+struct geneve_dev {
+	struct hlist_node  hlist;	/* vni hash table */
+	struct net	   *net;	/* netns for packet i/o */
+	struct net_device  *dev;	/* netdev for geneve tunnel */
+	struct geneve_sock *sock;	/* socket used for geneve tunnel */
+	u8 vni[3];			/* virtual network ID for tunnel */
+	struct sockaddr_in remote;	/* IPv4 address for link partner */
+	struct list_head   next;	/* geneve's per namespace list */
+};
+
+static int geneve_net_id;
+
+static inline __u32 geneve_net_vni_hash(u8 vni[3])
+{
+	__u32 vnid;
+
+	vnid = (vni[0] << 16) | (vni[1] << 8) | vni[2];
+	return hash_32(vnid, VNI_HASH_BITS);
+}
+
+/* geneve receive/decap routine */
+static void geneve_rx(struct geneve_sock *gs, struct sk_buff *skb)
+{
+	struct genevehdr *gnvh = geneve_hdr(skb);
+	struct geneve_dev *dummy, *geneve = NULL;
+	struct geneve_net *gn;
+	struct iphdr *iph = NULL;
+	struct pcpu_sw_netstats *stats;
+	struct hlist_head *vni_list_head;
+	int err = 0;
+	__u32 hash;
+
+	iph = ip_hdr(skb); /* Still outer IP header... */
+
+	gn = gs->rcv_data;
+
+	/* Find the device for this VNI */
+	hash = geneve_net_vni_hash(gnvh->vni);
+	vni_list_head = &gn->vni_list[hash];
+	hlist_for_each_entry_rcu(dummy, vni_list_head, hlist) {
+		if (!memcmp(gnvh->vni, dummy->vni, sizeof(dummy->vni)) &&
+		    iph->saddr == dummy->remote.sin_addr.s_addr) {
+			geneve = dummy;
+			break;
+		}
+	}
+	if (!geneve)
+		goto drop;
+
+	/* Drop packets w/ critical options,
+	 * since we don't support any...
+	 */
+	if (gnvh->critical)
+		goto drop;
+
+	skb_reset_mac_header(skb);
+	skb_scrub_packet(skb, !net_eq(geneve->net, dev_net(geneve->dev)));
+	skb->protocol = eth_type_trans(skb, geneve->dev);
+	skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+
+	/* Ignore packet loops (and multicast echo) */
+	if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr))
+		goto drop;
+
+	skb_reset_network_header(skb);
+
+	iph = ip_hdr(skb); /* Now inner IP header... */
+	err = IP_ECN_decapsulate(iph, skb);
+
+	if (unlikely(err)) {
+		if (log_ecn_error)
+			net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
+					     &iph->saddr, iph->tos);
+		if (err > 1) {
+			++geneve->dev->stats.rx_frame_errors;
+			++geneve->dev->stats.rx_errors;
+			goto drop;
+		}
+	}
+
+	stats = this_cpu_ptr(geneve->dev->tstats);
+	u64_stats_update_begin(&stats->syncp);
+	stats->rx_packets++;
+	stats->rx_bytes += skb->len;
+	u64_stats_update_end(&stats->syncp);
+
+	netif_rx(skb);
+
+	return;
+drop:
+	/* Consume bad packet */
+	kfree_skb(skb);
+}
+
+/* Setup stats when device is created */
+static int geneve_init(struct net_device *dev)
+{
+	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
+	if (!dev->tstats)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void geneve_uninit(struct net_device *dev)
+{
+	free_percpu(dev->tstats);
+}
+
+static int geneve_open(struct net_device *dev)
+{
+	struct geneve_dev *geneve = netdev_priv(dev);
+	struct net *net = geneve->net;
+	struct geneve_net *gn = net_generic(geneve->net, geneve_net_id);
+	struct geneve_sock *gs;
+
+	gs = geneve_sock_add(net, htons(GENEVE_UDP_PORT), geneve_rx, gn,
+	                     false, false);
+	if (IS_ERR(gs))
+		return PTR_ERR(gs);
+
+	geneve->sock = gs;
+
+	return 0;
+}
+
+static int geneve_stop(struct net_device *dev)
+{
+	struct geneve_dev *geneve = netdev_priv(dev);
+	struct geneve_sock *gs = geneve->sock;
+
+	geneve_sock_release(gs);
+
+	return 0;
+}
+
+static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct geneve_dev *geneve = netdev_priv(dev);
+	struct geneve_sock *gs = geneve->sock;
+	struct rtable *rt = NULL;
+	const struct iphdr *iip; /* interior IP header */
+	struct flowi4 fl4;
+	int err;
+	__be16 sport;
+	__u8 tos, ttl = 0;
+
+	iip = ip_hdr(skb);
+
+	skb_reset_mac_header(skb);
+
+	/* TODO: port min/max limits should be configurable */
+	sport = udp_flow_src_port(dev_net(dev), skb, 0, 0, true);
+
+	memset(&fl4, 0, sizeof(fl4));
+	fl4.daddr = geneve->remote.sin_addr.s_addr;
+	rt = ip_route_output_key(geneve->net, &fl4);
+	if (IS_ERR(rt)) {
+		netdev_dbg(dev, "no route to %pI4\n", &fl4.daddr);
+		dev->stats.tx_carrier_errors++;
+		goto tx_error;
+	}
+	if (rt->dst.dev == dev) { /* is this necessary? */
+		netdev_dbg(dev, "circular route to %pI4\n", &fl4.daddr);
+		dev->stats.collisions++;
+		goto rt_tx_error;
+	}
+
+	/* TODO: tos and ttl should be configurable */
+
+	tos = ip_tunnel_ecn_encap(0, iip, skb);
+
+	if (IN_MULTICAST(ntohl(fl4.daddr)))
+		ttl = 1;
+
+	ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
+
+	/* no need to handle local destination and encap bypass...yet... */
+
+	err = geneve_xmit_skb(gs, rt, skb, fl4.saddr, fl4.daddr,
+	                      tos, ttl, 0, sport, htons(GENEVE_UDP_PORT), 0,
+	                      geneve->vni, 0, NULL, false,
+	                      !net_eq(geneve->net, dev_net(geneve->dev)));
+	if (err < 0)
+		ip_rt_put(rt);
+
+	iptunnel_xmit_stats(err, &dev->stats, dev->tstats);
+
+	return NETDEV_TX_OK;
+
+rt_tx_error:
+	ip_rt_put(rt);
+tx_error:
+	dev->stats.tx_errors++;
+	dev_kfree_skb(skb);
+	return NETDEV_TX_OK;
+}
+
+static const struct net_device_ops geneve_netdev_ops = {
+	.ndo_init		= geneve_init,
+	.ndo_uninit		= geneve_uninit,
+	.ndo_open		= geneve_open,
+	.ndo_stop		= geneve_stop,
+	.ndo_start_xmit		= geneve_xmit,
+	.ndo_get_stats64	= ip_tunnel_get_stats64,
+	.ndo_change_mtu		= eth_change_mtu,
+	.ndo_validate_addr	= eth_validate_addr,
+	.ndo_set_mac_address	= eth_mac_addr,
+};
+
+static void geneve_get_drvinfo(struct net_device *dev,
+			       struct ethtool_drvinfo *drvinfo)
+{
+	strlcpy(drvinfo->version, GENEVE_NETDEV_VER, sizeof(drvinfo->version));
+	strlcpy(drvinfo->driver, "geneve", sizeof(drvinfo->driver));
+}
+
+static const struct ethtool_ops geneve_ethtool_ops = {
+	.get_drvinfo	= geneve_get_drvinfo,
+	.get_link	= ethtool_op_get_link,
+};
+
+/* Info for udev, that this is a virtual tunnel endpoint */
+static struct device_type geneve_type = {
+	.name = "geneve",
+};
+
+/* Initialize the device structure. */
+static void geneve_setup(struct net_device *dev)
+{
+	ether_setup(dev);
+
+	dev->netdev_ops = &geneve_netdev_ops;
+	dev->ethtool_ops = &geneve_ethtool_ops;
+	dev->destructor = free_netdev;
+
+	SET_NETDEV_DEVTYPE(dev, &geneve_type);
+
+	dev->tx_queue_len = 0;
+	dev->features    |= NETIF_F_LLTX;
+	dev->features    |= NETIF_F_SG | NETIF_F_HW_CSUM;
+	dev->features    |= NETIF_F_RXCSUM;
+	dev->features    |= NETIF_F_GSO_SOFTWARE;
+
+	dev->vlan_features = dev->features;
+	dev->features    |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
+
+	dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
+	dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+	dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
+
+	netif_keep_dst(dev);
+	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+}
+
+static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = {
+	[IFLA_GENEVE_ID]		= { .type = NLA_U32 },
+	[IFLA_GENEVE_REMOTE]		= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
+};
+
+static int geneve_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+	if (tb[IFLA_ADDRESS]) {
+		if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN)
+			return -EINVAL;
+
+		if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS])))
+			return -EADDRNOTAVAIL;
+	}
+
+	if (!data)
+		return -EINVAL;
+
+	if (data[IFLA_GENEVE_ID]) {
+		__u32 vni =  nla_get_u32(data[IFLA_GENEVE_ID]);
+
+		if (vni >= GENEVE_VID_MASK)
+			return -ERANGE;
+	}
+
+	return 0;
+}
+
+static int geneve_newlink(struct net *net, struct net_device *dev,
+			 struct nlattr *tb[], struct nlattr *data[])
+{
+	struct geneve_net *gn = net_generic(net, geneve_net_id);
+	struct geneve_dev *dummy, *geneve = netdev_priv(dev);
+	struct hlist_head *vni_list_head;
+	struct sockaddr_in remote;	/* IPv4 address for link partner */
+	__u32 vni, hash;
+	int err;
+
+	if (!data[IFLA_GENEVE_ID] || !data[IFLA_GENEVE_REMOTE])
+		return -EINVAL;
+
+	geneve->net = net;
+	geneve->dev = dev;
+
+	vni = nla_get_u32(data[IFLA_GENEVE_ID]);
+	geneve->vni[0] = (vni & 0x00ff0000) >> 16;
+	geneve->vni[1] = (vni & 0x0000ff00) >> 8;
+	geneve->vni[2] =  vni & 0x000000ff;
+
+	geneve->remote.sin_addr.s_addr =
+		nla_get_in_addr(data[IFLA_GENEVE_REMOTE]);
+	if (IN_MULTICAST(ntohl(geneve->remote.sin_addr.s_addr)))
+		return -EINVAL;
+
+	remote = geneve->remote;
+	hash = geneve_net_vni_hash(geneve->vni);
+	vni_list_head = &gn->vni_list[hash];
+	hlist_for_each_entry_rcu(dummy, vni_list_head, hlist) {
+		if (!memcmp(geneve->vni, dummy->vni, sizeof(dummy->vni)) &&
+		    !memcmp(&remote, &dummy->remote, sizeof(dummy->remote)))
+			return -EBUSY;
+	}
+
+	if (tb[IFLA_ADDRESS] == NULL)
+		eth_hw_addr_random(dev);
+
+	err = register_netdevice(dev);
+	if (err)
+		return err;
+
+	list_add(&geneve->next, &gn->geneve_list);
+
+	hlist_add_head_rcu(&geneve->hlist, &gn->vni_list[hash]);
+
+	return 0;
+}
+
+static void geneve_dellink(struct net_device *dev, struct list_head *head)
+{
+	struct geneve_dev *geneve = netdev_priv(dev);
+
+	if (!hlist_unhashed(&geneve->hlist))
+		hlist_del_rcu(&geneve->hlist);
+
+	list_del(&geneve->next);
+	unregister_netdevice_queue(dev, head);
+}
+
+static size_t geneve_get_size(const struct net_device *dev)
+{
+	return nla_total_size(sizeof(__u32)) +	/* IFLA_GENEVE_ID */
+		nla_total_size(sizeof(struct in_addr)) + /* IFLA_GENEVE_REMOTE */
+		0;
+}
+
+static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+	struct geneve_dev *geneve = netdev_priv(dev);
+	__u32 vni;
+
+	vni = (geneve->vni[0] << 16) | (geneve->vni[1] << 8) | geneve->vni[2];
+	if (nla_put_u32(skb, IFLA_GENEVE_ID, vni))
+		goto nla_put_failure;
+
+	if (nla_put_in_addr(skb, IFLA_GENEVE_REMOTE,
+			    geneve->remote.sin_addr.s_addr))
+		goto nla_put_failure;
+
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static struct rtnl_link_ops geneve_link_ops __read_mostly = {
+	.kind		= "geneve",
+	.maxtype	= IFLA_GENEVE_MAX,
+	.policy		= geneve_policy,
+	.priv_size	= sizeof(struct geneve_dev),
+	.setup		= geneve_setup,
+	.validate	= geneve_validate,
+	.newlink	= geneve_newlink,
+	.dellink	= geneve_dellink,
+	.get_size	= geneve_get_size,
+	.fill_info	= geneve_fill_info,
+};
+
+static __net_init int geneve_init_net(struct net *net)
+{
+	struct geneve_net *gn = net_generic(net, geneve_net_id);
+	unsigned int h;
+
+	INIT_LIST_HEAD(&gn->geneve_list);
+
+	for (h = 0; h < VNI_HASH_SIZE; ++h)
+		INIT_HLIST_HEAD(&gn->vni_list[h]);
+
+	return 0;
+}
+
+static void __net_exit geneve_exit_net(struct net *net)
+{
+	struct geneve_net *gn = net_generic(net, geneve_net_id);
+	struct geneve_dev *geneve, *next;
+	struct net_device *dev, *aux;
+	LIST_HEAD(list);
+
+	rtnl_lock();
+
+	/* gather any geneve devices that were moved into this ns */
+	for_each_netdev_safe(net, dev, aux)
+		if (dev->rtnl_link_ops == &geneve_link_ops)
+			unregister_netdevice_queue(dev, &list);
+
+	/* now gather any other geneve devices that were created in this ns */
+	list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) {
+		/* If geneve->dev is in the same netns, it was already added
+		 * to the list by the previous loop.
+		 */
+		if (!net_eq(dev_net(geneve->dev), net))
+			unregister_netdevice_queue(geneve->dev, &list);
+	}
+
+	/* unregister the devices gathered above */
+	unregister_netdevice_many(&list);
+	rtnl_unlock();
+}
+
+static struct pernet_operations geneve_net_ops = {
+	.init = geneve_init_net,
+	.exit = geneve_exit_net,
+	.id   = &geneve_net_id,
+	.size = sizeof(struct geneve_net),
+};
+
+static int __init geneve_init_module(void)
+{
+	int rc;
+
+	rc = register_pernet_subsys(&geneve_net_ops);
+	if (rc)
+		goto out1;
+
+	rc = rtnl_link_register(&geneve_link_ops);
+	if (rc)
+		goto out2;
+
+	return 0;
+out2:
+	unregister_pernet_subsys(&geneve_net_ops);
+out1:
+	return rc;
+}
+late_initcall(geneve_init_module);
+
+static void __exit geneve_cleanup_module(void)
+{
+	rtnl_link_unregister(&geneve_link_ops);
+	unregister_pernet_subsys(&geneve_net_ops);
+}
+module_exit(geneve_cleanup_module);
+
+MODULE_LICENSE("GPL");
+MODULE_VERSION(GENEVE_NETDEV_VER);
+MODULE_AUTHOR("John W. Linville <linville@tuxdriver.com>");
+MODULE_DESCRIPTION("Interface driver for GENEVE encapsulated traffic");
+MODULE_ALIAS_RTNL_LINK("geneve");
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 6d6e502e1051..afccc9393fef 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -390,6 +390,15 @@ struct ifla_vxlan_port_range {
 	__be16	high;
 };
 
+/* GENEVE section */
+enum {
+	IFLA_GENEVE_UNSPEC,
+	IFLA_GENEVE_ID,
+	IFLA_GENEVE_REMOTE,
+	__IFLA_GENEVE_MAX
+};
+#define IFLA_GENEVE_MAX	(__IFLA_GENEVE_MAX - 1)
+
 /* Bonding section */
 
 enum {
-- 
cgit 


From e687ad60af09010936bbd0b2a3b5d90a8ee8353c Mon Sep 17 00:00:00 2001
From: Pablo Neira <pablo@netfilter.org>
Date: Wed, 13 May 2015 18:19:38 +0200
Subject: netfilter: add netfilter ingress hook after handle_ing() under unique
 static key

This patch adds the Netfilter ingress hook just after the existing tc ingress
hook, that seems to be the consensus solution for this.

Note that the Netfilter hook resides under the global static key that enables
ingress filtering. Nonetheless, Netfilter still also has its own static key for
minimal impact on the existing handle_ing().

* Without this patch:

Result: OK: 6216490(c6216338+d152) usec, 100000000 (60byte,0frags)
  16086246pps 7721Mb/sec (7721398080bps) errors: 100000000

    42.46%  kpktgend_0   [kernel.kallsyms]   [k] __netif_receive_skb_core
    25.92%  kpktgend_0   [kernel.kallsyms]   [k] kfree_skb
     7.81%  kpktgend_0   [pktgen]            [k] pktgen_thread_worker
     5.62%  kpktgend_0   [kernel.kallsyms]   [k] ip_rcv
     2.70%  kpktgend_0   [kernel.kallsyms]   [k] netif_receive_skb_internal
     2.34%  kpktgend_0   [kernel.kallsyms]   [k] netif_receive_skb_sk
     1.44%  kpktgend_0   [kernel.kallsyms]   [k] __build_skb

* With this patch:

Result: OK: 6214833(c6214731+d101) usec, 100000000 (60byte,0frags)
  16090536pps 7723Mb/sec (7723457280bps) errors: 100000000

    41.23%  kpktgend_0      [kernel.kallsyms]  [k] __netif_receive_skb_core
    26.57%  kpktgend_0      [kernel.kallsyms]  [k] kfree_skb
     7.72%  kpktgend_0      [pktgen]           [k] pktgen_thread_worker
     5.55%  kpktgend_0      [kernel.kallsyms]  [k] ip_rcv
     2.78%  kpktgend_0      [kernel.kallsyms]  [k] netif_receive_skb_internal
     2.06%  kpktgend_0      [kernel.kallsyms]  [k] netif_receive_skb_sk
     1.43%  kpktgend_0      [kernel.kallsyms]  [k] __build_skb

* Without this patch + tc ingress:

        tc filter add dev eth4 parent ffff: protocol ip prio 1 \
                u32 match ip dst 4.3.2.1/32

Result: OK: 9269001(c9268821+d179) usec, 100000000 (60byte,0frags)
  10788648pps 5178Mb/sec (5178551040bps) errors: 100000000

    40.99%  kpktgend_0   [kernel.kallsyms]  [k] __netif_receive_skb_core
    17.50%  kpktgend_0   [kernel.kallsyms]  [k] kfree_skb
    11.77%  kpktgend_0   [cls_u32]          [k] u32_classify
     5.62%  kpktgend_0   [kernel.kallsyms]  [k] tc_classify_compat
     5.18%  kpktgend_0   [pktgen]           [k] pktgen_thread_worker
     3.23%  kpktgend_0   [kernel.kallsyms]  [k] tc_classify
     2.97%  kpktgend_0   [kernel.kallsyms]  [k] ip_rcv
     1.83%  kpktgend_0   [kernel.kallsyms]  [k] netif_receive_skb_internal
     1.50%  kpktgend_0   [kernel.kallsyms]  [k] netif_receive_skb_sk
     0.99%  kpktgend_0   [kernel.kallsyms]  [k] __build_skb

* With this patch + tc ingress:

        tc filter add dev eth4 parent ffff: protocol ip prio 1 \
                u32 match ip dst 4.3.2.1/32

Result: OK: 9308218(c9308091+d126) usec, 100000000 (60byte,0frags)
  10743194pps 5156Mb/sec (5156733120bps) errors: 100000000

    42.01%  kpktgend_0   [kernel.kallsyms]   [k] __netif_receive_skb_core
    17.78%  kpktgend_0   [kernel.kallsyms]   [k] kfree_skb
    11.70%  kpktgend_0   [cls_u32]           [k] u32_classify
     5.46%  kpktgend_0   [kernel.kallsyms]   [k] tc_classify_compat
     5.16%  kpktgend_0   [pktgen]            [k] pktgen_thread_worker
     2.98%  kpktgend_0   [kernel.kallsyms]   [k] ip_rcv
     2.84%  kpktgend_0   [kernel.kallsyms]   [k] tc_classify
     1.96%  kpktgend_0   [kernel.kallsyms]   [k] netif_receive_skb_internal
     1.57%  kpktgend_0   [kernel.kallsyms]   [k] netif_receive_skb_sk

Note that the results are very similar before and after.

I can see gcc gets the code under the ingress static key out of the hot path.
Then, on that cold branch, it generates the code to accomodate the netfilter
ingress static key. My explanation for this is that this reduces the pressure
on the instruction cache for non-users as the new code is out of the hot path,
and it comes with minimal impact for tc ingress users.

Using gcc version 4.8.4 on:

Architecture:          x86_64
CPU op-mode(s):        32-bit, 64-bit
Byte Order:            Little Endian
CPU(s):                8
[...]
L1d cache:             16K
L1i cache:             64K
L2 cache:              2048K
L3 cache:              8192K

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h         |  3 +++
 include/linux/netfilter.h         |  1 +
 include/linux/netfilter_ingress.h | 41 +++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/netfilter.h    |  6 ++++++
 net/core/dev.c                    | 36 ++++++++++++++++++++++++++++++++++
 net/netfilter/Kconfig             |  7 +++++++
 net/netfilter/core.c              | 31 ++++++++++++++++++++++++++++-
 7 files changed, 124 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/netfilter_ingress.h

(limited to 'include/uapi/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d3ed01c18247..51f8d2f5dc3f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1656,6 +1656,9 @@ struct net_device {
 	struct tcf_proto __rcu  *ingress_cl_list;
 #endif
 	struct netdev_queue __rcu *ingress_queue;
+#ifdef CONFIG_NETFILTER_INGRESS
+	struct list_head	nf_hooks_ingress;
+#endif
 
 	unsigned char		broadcast[MAX_ADDR_LEN];
 #ifdef CONFIG_RFS_ACCEL
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index 49d00638d1fa..f5ff5d156da8 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -86,6 +86,7 @@ struct nf_hook_ops {
 
 	/* User fills in from here down. */
 	nf_hookfn		*hook;
+	struct net_device	*dev;
 	struct module		*owner;
 	void			*priv;
 	u_int8_t		pf;
diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h
new file mode 100644
index 000000000000..cb0727fe2b3d
--- /dev/null
+++ b/include/linux/netfilter_ingress.h
@@ -0,0 +1,41 @@
+#ifndef _NETFILTER_INGRESS_H_
+#define _NETFILTER_INGRESS_H_
+
+#include <linux/netfilter.h>
+#include <linux/netdevice.h>
+
+#ifdef CONFIG_NETFILTER_INGRESS
+static inline int nf_hook_ingress_active(struct sk_buff *skb)
+{
+	return nf_hook_list_active(&skb->dev->nf_hooks_ingress,
+				   NFPROTO_NETDEV, NF_NETDEV_INGRESS);
+}
+
+static inline int nf_hook_ingress(struct sk_buff *skb)
+{
+	struct nf_hook_state state;
+
+	nf_hook_state_init(&state, &skb->dev->nf_hooks_ingress,
+			   NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV, NULL,
+			   skb->dev, NULL, NULL);
+	return nf_hook_slow(skb, &state);
+}
+
+static inline void nf_hook_ingress_init(struct net_device *dev)
+{
+	INIT_LIST_HEAD(&dev->nf_hooks_ingress);
+}
+#else /* CONFIG_NETFILTER_INGRESS */
+static inline int nf_hook_ingress_active(struct sk_buff *skb)
+{
+	return 0;
+}
+
+static inline int nf_hook_ingress(struct sk_buff *skb)
+{
+	return 0;
+}
+
+static inline void nf_hook_ingress_init(struct net_device *dev) {}
+#endif /* CONFIG_NETFILTER_INGRESS */
+#endif /* _NETFILTER_INGRESS_H_ */
diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h
index ef1b1f88ca18..177027cce6b3 100644
--- a/include/uapi/linux/netfilter.h
+++ b/include/uapi/linux/netfilter.h
@@ -51,11 +51,17 @@ enum nf_inet_hooks {
 	NF_INET_NUMHOOKS
 };
 
+enum nf_dev_hooks {
+	NF_NETDEV_INGRESS,
+	NF_NETDEV_NUMHOOKS
+};
+
 enum {
 	NFPROTO_UNSPEC =  0,
 	NFPROTO_INET   =  1,
 	NFPROTO_IPV4   =  2,
 	NFPROTO_ARP    =  3,
+	NFPROTO_NETDEV =  5,
 	NFPROTO_BRIDGE =  7,
 	NFPROTO_IPV6   = 10,
 	NFPROTO_DECNET = 12,
diff --git a/net/core/dev.c b/net/core/dev.c
index a5ef90016ce7..29f0d6e6542c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -135,6 +135,7 @@
 #include <linux/if_macvlan.h>
 #include <linux/errqueue.h>
 #include <linux/hrtimer.h>
+#include <linux/netfilter_ingress.h>
 
 #include "net-sysfs.h"
 
@@ -3666,6 +3667,13 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
 
 	return skb;
 }
+#else
+static inline struct sk_buff *handle_ing(struct sk_buff *skb,
+					 struct packet_type **pt_prev,
+					 int *ret, struct net_device *orig_dev)
+{
+	return skb;
+}
 #endif
 
 /**
@@ -3739,6 +3747,28 @@ static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
 	}
 }
 
+#ifdef CONFIG_NETFILTER_INGRESS
+static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
+			     int *ret, struct net_device *orig_dev)
+{
+	if (nf_hook_ingress_active(skb)) {
+		if (*pt_prev) {
+			*ret = deliver_skb(skb, *pt_prev, orig_dev);
+			*pt_prev = NULL;
+		}
+
+		return nf_hook_ingress(skb);
+	}
+	return 0;
+}
+#else
+static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev,
+			     int *ret, struct net_device *orig_dev)
+{
+	return 0;
+}
+#endif
+
 static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
 {
 	struct packet_type *ptype, *pt_prev;
@@ -3803,6 +3833,9 @@ skip_taps:
 		skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
 		if (!skb)
 			goto unlock;
+
+		if (nf_ingress(skb, &pt_prev, &ret, orig_dev) < 0)
+			goto unlock;
 	}
 #endif
 #ifdef CONFIG_NET_CLS_ACT
@@ -6968,6 +7001,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	dev->group = INIT_NETDEV_GROUP;
 	if (!dev->ethtool_ops)
 		dev->ethtool_ops = &default_ethtool_ops;
+
+	nf_hook_ingress_init(dev);
+
 	return dev;
 
 free_all:
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index f70e34a68f70..db1c674397ad 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -1,6 +1,13 @@
 menu "Core Netfilter Configuration"
 	depends on NET && INET && NETFILTER
 
+config NETFILTER_INGRESS
+	bool "Netfilter ingress support"
+	select NET_INGRESS
+	help
+	  This allows you to classify packets from ingress using the Netfilter
+	  infrastructure.
+
 config NETFILTER_NETLINK
 	tristate
 
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index e418cfd603c0..653e32eac08c 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -64,10 +64,27 @@ static DEFINE_MUTEX(nf_hook_mutex);
 
 int nf_register_hook(struct nf_hook_ops *reg)
 {
+	struct list_head *nf_hook_list;
 	struct nf_hook_ops *elem;
 
 	mutex_lock(&nf_hook_mutex);
-	list_for_each_entry(elem, &nf_hooks[reg->pf][reg->hooknum], list) {
+	switch (reg->pf) {
+	case NFPROTO_NETDEV:
+#ifdef CONFIG_NETFILTER_INGRESS
+		if (reg->hooknum == NF_NETDEV_INGRESS) {
+			BUG_ON(reg->dev == NULL);
+			nf_hook_list = &reg->dev->nf_hooks_ingress;
+			net_inc_ingress_queue();
+			break;
+		}
+#endif
+		/* Fall through. */
+	default:
+		nf_hook_list = &nf_hooks[reg->pf][reg->hooknum];
+		break;
+	}
+
+	list_for_each_entry(elem, nf_hook_list, list) {
 		if (reg->priority < elem->priority)
 			break;
 	}
@@ -85,6 +102,18 @@ void nf_unregister_hook(struct nf_hook_ops *reg)
 	mutex_lock(&nf_hook_mutex);
 	list_del_rcu(&reg->list);
 	mutex_unlock(&nf_hook_mutex);
+	switch (reg->pf) {
+	case NFPROTO_NETDEV:
+#ifdef CONFIG_NETFILTER_INGRESS
+		if (reg->hooknum == NF_NETDEV_INGRESS) {
+			net_dec_ingress_queue();
+			break;
+		}
+		break;
+#endif
+	default:
+		break;
+	}
 #ifdef HAVE_JUMP_LABEL
 	static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
 #endif
-- 
cgit 


From 4c12adad26f059fa207d6b07aa61f39bc459211b Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 9 Apr 2015 16:36:49 -0300
Subject: [media] dvb: Document FE_SCALE_DECIBEL units consistently

In comments and in the documentation, the units of properties marked
with the FE_SCALE_DECIBEL scale are specified in terms of 1/1000 dB
or 0.0001 dB.  This is inconsistent, however, as 1/1000 is 0.001,
not 0.0001.

Note that the v4l-utils divide the value by 1000 for the signal
strength suggesting that the 1/1000 is correct.

Settle on millidecibels, ie. 1/1000dB or 0.001dB.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/dvb/dvbproperty.xml | 4 ++--
 include/uapi/linux/dvb/frontend.h               | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/dvb/dvbproperty.xml b/Documentation/DocBook/media/dvb/dvbproperty.xml
index 3018564ddfd9..7ddab2ba9b40 100644
--- a/Documentation/DocBook/media/dvb/dvbproperty.xml
+++ b/Documentation/DocBook/media/dvb/dvbproperty.xml
@@ -953,7 +953,7 @@ enum fe_interleaving {
 		<para>Possible scales for this metric are:</para>
 		<itemizedlist mark='bullet'>
 			<listitem><para><constant>FE_SCALE_NOT_AVAILABLE</constant> - it failed to measure it, or the measurement was not complete yet.</para></listitem>
-			<listitem><para><constant>FE_SCALE_DECIBEL</constant> - signal strength is in 0.0001 dBm units, power measured in miliwatts. This value is generally negative.</para></listitem>
+			<listitem><para><constant>FE_SCALE_DECIBEL</constant> - signal strength is in 0.001 dBm units, power measured in miliwatts. This value is generally negative.</para></listitem>
 			<listitem><para><constant>FE_SCALE_RELATIVE</constant> - The frontend provides a 0% to 100% measurement for power (actually, 0 to 65535).</para></listitem>
 		</itemizedlist>
 	</section>
@@ -963,7 +963,7 @@ enum fe_interleaving {
 		<para>Possible scales for this metric are:</para>
 		<itemizedlist mark='bullet'>
 			<listitem><para><constant>FE_SCALE_NOT_AVAILABLE</constant> - it failed to measure it, or the measurement was not complete yet.</para></listitem>
-			<listitem><para><constant>FE_SCALE_DECIBEL</constant> - Signal/Noise ratio is in 0.0001 dB units.</para></listitem>
+			<listitem><para><constant>FE_SCALE_DECIBEL</constant> - Signal/Noise ratio is in 0.001 dB units.</para></listitem>
 			<listitem><para><constant>FE_SCALE_RELATIVE</constant> - The frontend provides a 0% to 100% measurement for Signal/Noise (actually, 0 to 65535).</para></listitem>
 		</itemizedlist>
 	</section>
diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index c56d77c496a5..466f56997272 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -467,7 +467,7 @@ struct dtv_cmds_h {
  * @FE_SCALE_NOT_AVAILABLE: That QoS measure is not available. That
  *			    could indicate a temporary or a permanent
  *			    condition.
- * @FE_SCALE_DECIBEL: The scale is measured in 0.0001 dB steps, typically
+ * @FE_SCALE_DECIBEL: The scale is measured in 0.001 dB steps, typically
  *		  used on signal measures.
  * @FE_SCALE_RELATIVE: The scale is a relative percentual measure,
  *			ranging from 0 (0%) to 0xffff (100%).
@@ -516,7 +516,7 @@ struct dtv_stats {
 	__u8 scale;	/* enum fecap_scale_params type */
 	union {
 		__u64 uvalue;	/* for counters and relative scales */
-		__s64 svalue;	/* for 0.0001 dB measures */
+		__s64 svalue;	/* for 0.001 dB measures */
 	};
 } __attribute__ ((packed));
 
-- 
cgit 


From eea39946a1f36e8a5a47c86e7ecfca6076868505 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Wed, 13 May 2015 21:17:41 -0700
Subject: rename RTNH_F_EXTERNAL to RTNH_F_OFFLOAD

RTNH_F_EXTERNAL today is printed as "offload" in iproute2 output.

This patch renames the flag to be consistent with what the user sees.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/rtnetlink.h | 2 +-
 net/ipv4/fib_trie.c            | 2 +-
 net/switchdev/switchdev.c      | 6 +++---
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 974db03f7b1a..17fb02f488da 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -337,7 +337,7 @@ struct rtnexthop {
 #define RTNH_F_DEAD		1	/* Nexthop is dead (used by multipath)	*/
 #define RTNH_F_PERVASIVE	2	/* Do recursive gateway lookup	*/
 #define RTNH_F_ONLINK		4	/* Gateway is forced on link	*/
-#define RTNH_F_EXTERNAL		8	/* Route installed externally	*/
+#define RTNH_F_OFFLOAD		8	/* offloaded route */
 
 /* Macros to handle hexthops */
 
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index e13fcc602da2..64c2076ced54 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1764,7 +1764,7 @@ void fib_table_flush_external(struct fib_table *tb)
 			/* record local slen */
 			slen = fa->fa_slen;
 
-			if (!fi || !(fi->fib_flags & RTNH_F_EXTERNAL))
+			if (!fi || !(fi->fib_flags & RTNH_F_OFFLOAD))
 				continue;
 
 			netdev_switch_fib_ipv4_del(n->key,
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 46568b85c333..055453d48668 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -338,7 +338,7 @@ int netdev_switch_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi,
 					      fi, tos, type, nlflags,
 					      tb_id);
 		if (!err)
-			fi->fib_flags |= RTNH_F_EXTERNAL;
+			fi->fib_flags |= RTNH_F_OFFLOAD;
 	}
 
 	return err;
@@ -364,7 +364,7 @@ int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
 	const struct swdev_ops *ops;
 	int err = 0;
 
-	if (!(fi->fib_flags & RTNH_F_EXTERNAL))
+	if (!(fi->fib_flags & RTNH_F_OFFLOAD))
 		return 0;
 
 	dev = netdev_switch_get_dev_by_nhs(fi);
@@ -376,7 +376,7 @@ int netdev_switch_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi,
 		err = ops->swdev_fib_ipv4_del(dev, htonl(dst), dst_len,
 					      fi, tos, type, tb_id);
 		if (!err)
-			fi->fib_flags &= ~RTNH_F_EXTERNAL;
+			fi->fib_flags &= ~RTNH_F_OFFLOAD;
 	}
 
 	return err;
-- 
cgit 


From b3cad287d13b5f6695c6b4aab72969cd64bf0171 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 7 May 2015 14:54:16 +0200
Subject: conntrack: RFC5961 challenge ACK confuse conntrack LAST-ACK
 transition

In compliance with RFC5961, the network stack send challenge ACK in
response to spurious SYN packets, since commit 0c228e833c88 ("tcp:
Restore RFC5961-compliant behavior for SYN packets").

This pose a problem for netfilter conntrack in state LAST_ACK, because
this challenge ACK is (falsely) seen as ACKing last FIN, causing a
false state transition (into TIME_WAIT).

The challenge ACK is hard to distinguish from real last ACK.  Thus,
solution introduce a flag that tracks the potential for seeing a
challenge ACK, in case a SYN packet is let through and current state
is LAST_ACK.

When conntrack transition LAST_ACK to TIME_WAIT happens, this flag is
used for determining if we are expecting a challenge ACK.

Scapy based reproducer script avail here:
 https://github.com/netoptimizer/network-testing/blob/master/scapy/tcp_hacks_3WHS_LAST_ACK.py

Fixes: 0c228e833c88 ("tcp: Restore RFC5961-compliant behavior for SYN packets")
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_conntrack_tcp.h |  3 +++
 net/netfilter/nf_conntrack_proto_tcp.c          | 35 ++++++++++++++++++++++---
 2 files changed, 35 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netfilter/nf_conntrack_tcp.h b/include/uapi/linux/netfilter/nf_conntrack_tcp.h
index 9993a421201c..ef9f80f0f529 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_tcp.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_tcp.h
@@ -42,6 +42,9 @@ enum tcp_conntrack {
 /* The field td_maxack has been set */
 #define IP_CT_TCP_FLAG_MAXACK_SET		0x20
 
+/* Marks possibility for expected RFC5961 challenge ACK */
+#define IP_CT_EXP_CHALLENGE_ACK 		0x40
+
 struct nf_ct_tcp_flags {
 	__u8 flags;
 	__u8 mask;
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 5caa0c41bf26..70383de72054 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -202,7 +202,7 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
  *	sES -> sES	:-)
  *	sFW -> sCW	Normal close request answered by ACK.
  *	sCW -> sCW
- *	sLA -> sTW	Last ACK detected.
+ *	sLA -> sTW	Last ACK detected (RFC5961 challenged)
  *	sTW -> sTW	Retransmitted last ACK. Remain in the same state.
  *	sCL -> sCL
  */
@@ -261,7 +261,7 @@ static const u8 tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
  *	sES -> sES	:-)
  *	sFW -> sCW	Normal close request answered by ACK.
  *	sCW -> sCW
- *	sLA -> sTW	Last ACK detected.
+ *	sLA -> sTW	Last ACK detected (RFC5961 challenged)
  *	sTW -> sTW	Retransmitted last ACK.
  *	sCL -> sCL
  */
@@ -906,6 +906,7 @@ static int tcp_packet(struct nf_conn *ct,
 					1 : ct->proto.tcp.last_win;
 			ct->proto.tcp.seen[ct->proto.tcp.last_dir].td_scale =
 				ct->proto.tcp.last_wscale;
+			ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
 			ct->proto.tcp.seen[ct->proto.tcp.last_dir].flags =
 				ct->proto.tcp.last_flags;
 			memset(&ct->proto.tcp.seen[dir], 0,
@@ -923,7 +924,9 @@ static int tcp_packet(struct nf_conn *ct,
 		 * may be in sync but we are not. In that case, we annotate
 		 * the TCP options and let the packet go through. If it is a
 		 * valid SYN packet, the server will reply with a SYN/ACK, and
-		 * then we'll get in sync. Otherwise, the server ignores it. */
+		 * then we'll get in sync. Otherwise, the server potentially
+		 * responds with a challenge ACK if implementing RFC5961.
+		 */
 		if (index == TCP_SYN_SET && dir == IP_CT_DIR_ORIGINAL) {
 			struct ip_ct_tcp_state seen = {};
 
@@ -939,6 +942,13 @@ static int tcp_packet(struct nf_conn *ct,
 				ct->proto.tcp.last_flags |=
 					IP_CT_TCP_FLAG_SACK_PERM;
 			}
+			/* Mark the potential for RFC5961 challenge ACK,
+			 * this pose a special problem for LAST_ACK state
+			 * as ACK is intrepretated as ACKing last FIN.
+			 */
+			if (old_state == TCP_CONNTRACK_LAST_ACK)
+				ct->proto.tcp.last_flags |=
+					IP_CT_EXP_CHALLENGE_ACK;
 		}
 		spin_unlock_bh(&ct->lock);
 		if (LOG_INVALID(net, IPPROTO_TCP))
@@ -970,6 +980,25 @@ static int tcp_packet(struct nf_conn *ct,
 			nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
 				  "nf_ct_tcp: invalid state ");
 		return -NF_ACCEPT;
+	case TCP_CONNTRACK_TIME_WAIT:
+		/* RFC5961 compliance cause stack to send "challenge-ACK"
+		 * e.g. in response to spurious SYNs.  Conntrack MUST
+		 * not believe this ACK is acking last FIN.
+		 */
+		if (old_state == TCP_CONNTRACK_LAST_ACK &&
+		    index == TCP_ACK_SET &&
+		    ct->proto.tcp.last_dir != dir &&
+		    ct->proto.tcp.last_index == TCP_SYN_SET &&
+		    (ct->proto.tcp.last_flags & IP_CT_EXP_CHALLENGE_ACK)) {
+			/* Detected RFC5961 challenge ACK */
+			ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK;
+			spin_unlock_bh(&ct->lock);
+			if (LOG_INVALID(net, IPPROTO_TCP))
+				nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL,
+				      "nf_ct_tcp: challenge-ACK ignored ");
+			return NF_ACCEPT; /* Don't change state */
+		}
+		break;
 	case TCP_CONNTRACK_CLOSE:
 		if (index == TCP_RST_SET
 		    && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET)
-- 
cgit 


From 29a5d3eb9a7612b26ba098a0db65e54372612d07 Mon Sep 17 00:00:00 2001
From: Andrew Lewycky <Andrew.Lewycky@amd.com>
Date: Sun, 7 Dec 2014 17:05:11 +0200
Subject: drm/amdkfd: add events IOCTL set definitions

- AMDKFD_IOC_CREATE_EVENT:
	Creates a new event of a specified type

- AMDKFD_IOC_DESTROY_EVENT:
	Destroys an existing event

- AMDKFD_IOC_SET_EVENT:
	Signal an existing event

- AMDKFD_IOC_RESET_EVENT:
	Reset an existing event

- AMDKFD_IOC_WAIT_EVENTS:
	Wait on event(s) until they are signaled

v2:

- Move the limit of the signal events to kfd_ioctl.h so it
  can be used by userspace

v3:
- Change all bool fields in struct kfd_memory_exception_failure
to uint32_t

Signed-off-by: Andrew Lewycky <Andrew.Lewycky@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 45 +++++++++++++++
 include/uapi/linux/kfd_ioctl.h           | 96 +++++++++++++++++++++++++++++++-
 2 files changed, 139 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 19a4fba46e4e..9933b2efe5dd 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -514,6 +514,36 @@ static int kfd_ioctl_get_process_apertures(struct file *filp,
 	return 0;
 }
 
+static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p,
+					void *data)
+{
+	return -ENODEV;
+}
+
+static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p,
+					void *data)
+{
+	return -ENODEV;
+}
+
+static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p,
+				void *data)
+{
+	return -ENODEV;
+}
+
+static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p,
+				void *data)
+{
+	return -ENODEV;
+}
+
+static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p,
+				void *data)
+{
+	return -ENODEV;
+}
+
 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
 	[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, .cmd_drv = 0, .name = #ioctl}
 
@@ -539,6 +569,21 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
 
 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE,
 			kfd_ioctl_update_queue, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT,
+			kfd_ioctl_create_event, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT,
+			kfd_ioctl_destroy_event, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT,
+			kfd_ioctl_set_event, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT,
+			kfd_ioctl_reset_event, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
+			kfd_ioctl_wait_events, 0),
 };
 
 #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index af94f31e33ac..4ca35a8f9891 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -27,7 +27,7 @@
 #include <linux/ioctl.h>
 
 #define KFD_IOCTL_MAJOR_VERSION 1
-#define KFD_IOCTL_MINOR_VERSION 0
+#define KFD_IOCTL_MINOR_VERSION 1
 
 struct kfd_ioctl_get_version_args {
 	uint32_t major_version;	/* from KFD */
@@ -128,6 +128,83 @@ struct kfd_ioctl_get_process_apertures_args {
 	uint32_t pad;
 };
 
+/* Matching HSA_EVENTTYPE */
+#define KFD_IOC_EVENT_SIGNAL			0
+#define KFD_IOC_EVENT_NODECHANGE		1
+#define KFD_IOC_EVENT_DEVICESTATECHANGE		2
+#define KFD_IOC_EVENT_HW_EXCEPTION		3
+#define KFD_IOC_EVENT_SYSTEM_EVENT		4
+#define KFD_IOC_EVENT_DEBUG_EVENT		5
+#define KFD_IOC_EVENT_PROFILE_EVENT		6
+#define KFD_IOC_EVENT_QUEUE_EVENT		7
+#define KFD_IOC_EVENT_MEMORY			8
+
+#define KFD_IOC_WAIT_RESULT_COMPLETE		0
+#define KFD_IOC_WAIT_RESULT_TIMEOUT		1
+#define KFD_IOC_WAIT_RESULT_FAIL		2
+
+#define KFD_SIGNAL_EVENT_LIMIT			256
+
+struct kfd_ioctl_create_event_args {
+	uint64_t event_page_offset;	/* from KFD */
+	uint32_t event_trigger_data;	/* from KFD - signal events only */
+	uint32_t event_type;		/* to KFD */
+	uint32_t auto_reset;		/* to KFD */
+	uint32_t node_id;		/* to KFD - only valid for certain
+							event types */
+	uint32_t event_id;		/* from KFD */
+	uint32_t event_slot_index;	/* from KFD */
+};
+
+struct kfd_ioctl_destroy_event_args {
+	uint32_t event_id;		/* to KFD */
+	uint32_t pad;
+};
+
+struct kfd_ioctl_set_event_args {
+	uint32_t event_id;		/* to KFD */
+	uint32_t pad;
+};
+
+struct kfd_ioctl_reset_event_args {
+	uint32_t event_id;		/* to KFD */
+	uint32_t pad;
+};
+
+struct kfd_memory_exception_failure {
+	uint32_t NotPresent;	/* Page not present or supervisor privilege */
+	uint32_t ReadOnly;	/* Write access to a read-only page */
+	uint32_t NoExecute;	/* Execute access to a page marked NX */
+	uint32_t pad;
+};
+
+/* memory exception data*/
+struct kfd_hsa_memory_exception_data {
+	struct kfd_memory_exception_failure failure;
+	uint64_t va;
+	uint32_t gpu_id;
+	uint32_t pad;
+};
+
+/* Event data*/
+struct kfd_event_data {
+	union {
+		struct kfd_hsa_memory_exception_data memory_exception_data;
+	};				/* From KFD */
+	uint64_t kfd_event_data_ext;	/* pointer to an extension structure
+					   for future exception types */
+	uint32_t event_id;		/* to KFD */
+	uint32_t pad;
+};
+
+struct kfd_ioctl_wait_events_args {
+	uint64_t events_ptr;		/* to KFD */
+	uint32_t num_events;		/* to KFD */
+	uint32_t wait_for_all;		/* to KFD */
+	uint32_t timeout;		/* to KFD */
+	uint32_t wait_result;		/* from KFD */
+};
+
 #define AMDKFD_IOCTL_BASE 'K'
 #define AMDKFD_IO(nr)			_IO(AMDKFD_IOCTL_BASE, nr)
 #define AMDKFD_IOR(nr, type)		_IOR(AMDKFD_IOCTL_BASE, nr, type)
@@ -155,7 +232,22 @@ struct kfd_ioctl_get_process_apertures_args {
 #define AMDKFD_IOC_UPDATE_QUEUE			\
 		AMDKFD_IOW(0x07, struct kfd_ioctl_update_queue_args)
 
+#define AMDKFD_IOC_CREATE_EVENT			\
+		AMDKFD_IOWR(0x08, struct kfd_ioctl_create_event_args)
+
+#define AMDKFD_IOC_DESTROY_EVENT		\
+		AMDKFD_IOW(0x09, struct kfd_ioctl_destroy_event_args)
+
+#define AMDKFD_IOC_SET_EVENT			\
+		AMDKFD_IOW(0x0A, struct kfd_ioctl_set_event_args)
+
+#define AMDKFD_IOC_RESET_EVENT			\
+		AMDKFD_IOW(0x0B, struct kfd_ioctl_reset_event_args)
+
+#define AMDKFD_IOC_WAIT_EVENTS			\
+		AMDKFD_IOWR(0x0C, struct kfd_ioctl_wait_events_args)
+
 #define AMDKFD_COMMAND_START		0x01
-#define AMDKFD_COMMAND_END		0x08
+#define AMDKFD_COMMAND_END		0x0D
 
 #endif
-- 
cgit 


From c9a70d43461d83818825ae065bb8fc887421e150 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav.pandit@avagotech.com>
Date: Mon, 18 May 2015 16:31:47 +0530
Subject: net-next: ethtool: Added port speed macros.

Signed-off-by: Parav Pandit <parav.pandit@avagotech.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 2e49fc880d29..ae832b45b44c 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1264,15 +1264,19 @@ enum ethtool_sfeatures_retval_bits {
  * it was forced up into this mode or autonegotiated.
  */
 
-/* The forced speed, 10Mb, 100Mb, gigabit, [2.5|10|20|40|56]GbE. */
+/* The forced speed, 10Mb, 100Mb, gigabit, [2.5|5|10|20|25|40|50|56|100]GbE. */
 #define SPEED_10		10
 #define SPEED_100		100
 #define SPEED_1000		1000
 #define SPEED_2500		2500
+#define SPEED_5000		5000
 #define SPEED_10000		10000
 #define SPEED_20000		20000
+#define SPEED_25000		25000
 #define SPEED_40000		40000
+#define SPEED_50000		50000
 #define SPEED_56000		56000
+#define SPEED_100000		100000
 
 #define SPEED_UNKNOWN		-1
 
-- 
cgit 


From 928f81aa619d845d6faa5a459cdbc18b7a78ddce Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Wed, 20 May 2015 09:01:20 -0700
Subject: ARM: OMAP1: Move UART defines to prepare for sparse IRQ

These have been indirectly included via asm/irqs.h that
has included mach/hardware.h unless SPARSE_IRQ is specified.
Let's move them to where the other OMAP serial defines for
8250 are.

Cc: Aaro Koskinen <aaro.koskinen@iki.fi>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap1/include/mach/serial.h | 5 -----
 include/uapi/linux/serial_reg.h           | 3 +++
 2 files changed, 3 insertions(+), 5 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/arch/arm/mach-omap1/include/mach/serial.h b/arch/arm/mach-omap1/include/mach/serial.h
index 2ce6a2db470b..4700e384c3d9 100644
--- a/arch/arm/mach-omap1/include/mach/serial.h
+++ b/arch/arm/mach-omap1/include/mach/serial.h
@@ -27,11 +27,6 @@
  */
 #define OMAP_UART_INFO_OFS	0x3ffc
 
-/* OMAP1 serial ports */
-#define OMAP1_UART1_BASE	0xfffb0000
-#define OMAP1_UART2_BASE	0xfffb0800
-#define OMAP1_UART3_BASE	0xfffb9800
-
 #define OMAP_PORT_SHIFT		2
 #define OMAP7XX_PORT_SHIFT	0
 
diff --git a/include/uapi/linux/serial_reg.h b/include/uapi/linux/serial_reg.h
index e9b4cb0cd7ed..1e5ac4e776da 100644
--- a/include/uapi/linux/serial_reg.h
+++ b/include/uapi/linux/serial_reg.h
@@ -331,6 +331,9 @@
  * Extra serial register definitions for the internal UARTs
  * in TI OMAP processors.
  */
+#define OMAP1_UART1_BASE	0xfffb0000
+#define OMAP1_UART2_BASE	0xfffb0800
+#define OMAP1_UART3_BASE	0xfffb9800
 #define UART_OMAP_MDR1		0x08	/* Mode definition register */
 #define UART_OMAP_MDR2		0x09	/* Mode definition register 2 */
 #define UART_OMAP_SCR		0x10	/* Supplementary control register */
-- 
cgit 


From 04fd61ab36ec065e194ab5e74ae34a5240d992bb Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Tue, 19 May 2015 16:59:03 -0700
Subject: bpf: allow bpf programs to tail-call other bpf programs

introduce bpf_tail_call(ctx, &jmp_table, index) helper function
which can be used from BPF programs like:
int bpf_prog(struct pt_regs *ctx)
{
  ...
  bpf_tail_call(ctx, &jmp_table, index);
  ...
}
that is roughly equivalent to:
int bpf_prog(struct pt_regs *ctx)
{
  ...
  if (jmp_table[index])
    return (*jmp_table[index])(ctx);
  ...
}
The important detail that it's not a normal call, but a tail call.
The kernel stack is precious, so this helper reuses the current
stack frame and jumps into another BPF program without adding
extra call frame.
It's trivially done in interpreter and a bit trickier in JITs.
In case of x64 JIT the bigger part of generated assembler prologue
is common for all programs, so it is simply skipped while jumping.
Other JITs can do similar prologue-skipping optimization or
do stack unwind before jumping into the next program.

bpf_tail_call() arguments:
ctx - context pointer
jmp_table - one of BPF_MAP_TYPE_PROG_ARRAY maps used as the jump table
index - index in the jump table

Since all BPF programs are idenitified by file descriptor, user space
need to populate the jmp_table with FDs of other BPF programs.
If jmp_table[index] is empty the bpf_tail_call() doesn't jump anywhere
and program execution continues as normal.

New BPF_MAP_TYPE_PROG_ARRAY map type is introduced so that user space can
populate this jmp_table array with FDs of other bpf programs.
Programs can share the same jmp_table array or use multiple jmp_tables.

The chain of tail calls can form unpredictable dynamic loops therefore
tail_call_cnt is used to limit the number of calls and currently is set to 32.

Use cases:
Acked-by: Daniel Borkmann <daniel@iogearbox.net>

==========
- simplify complex programs by splitting them into a sequence of small programs

- dispatch routine
  For tracing and future seccomp the program may be triggered on all system
  calls, but processing of syscall arguments will be different. It's more
  efficient to implement them as:
  int syscall_entry(struct seccomp_data *ctx)
  {
     bpf_tail_call(ctx, &syscall_jmp_table, ctx->nr /* syscall number */);
     ... default: process unknown syscall ...
  }
  int sys_write_event(struct seccomp_data *ctx) {...}
  int sys_read_event(struct seccomp_data *ctx) {...}
  syscall_jmp_table[__NR_write] = sys_write_event;
  syscall_jmp_table[__NR_read] = sys_read_event;

  For networking the program may call into different parsers depending on
  packet format, like:
  int packet_parser(struct __sk_buff *skb)
  {
     ... parse L2, L3 here ...
     __u8 ipproto = load_byte(skb, ... offsetof(struct iphdr, protocol));
     bpf_tail_call(skb, &ipproto_jmp_table, ipproto);
     ... default: process unknown protocol ...
  }
  int parse_tcp(struct __sk_buff *skb) {...}
  int parse_udp(struct __sk_buff *skb) {...}
  ipproto_jmp_table[IPPROTO_TCP] = parse_tcp;
  ipproto_jmp_table[IPPROTO_UDP] = parse_udp;

- for TC use case, bpf_tail_call() allows to implement reclassify-like logic

- bpf_map_update_elem/delete calls into BPF_MAP_TYPE_PROG_ARRAY jump table
  are atomic, so user space can build chains of BPF programs on the fly

Implementation details:
=======================
- high performance of bpf_tail_call() is the goal.
  It could have been implemented without JIT changes as a wrapper on top of
  BPF_PROG_RUN() macro, but with two downsides:
  . all programs would have to pay performance penalty for this feature and
    tail call itself would be slower, since mandatory stack unwind, return,
    stack allocate would be done for every tailcall.
  . tailcall would be limited to programs running preempt_disabled, since
    generic 'void *ctx' doesn't have room for 'tail_call_cnt' and it would
    need to be either global per_cpu variable accessed by helper and by wrapper
    or global variable protected by locks.

  In this implementation x64 JIT bypasses stack unwind and jumps into the
  callee program after prologue.

- bpf_prog_array_compatible() ensures that prog_type of callee and caller
  are the same and JITed/non-JITed flag is the same, since calling JITed
  program from non-JITed is invalid, since stack frames are different.
  Similarly calling kprobe type program from socket type program is invalid.

- jump table is implemented as BPF_MAP_TYPE_PROG_ARRAY to reuse 'map'
  abstraction, its user space API and all of verifier logic.
  It's in the existing arraymap.c file, since several functions are
  shared with regular array map.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h      |  22 +++++++++
 include/linux/filter.h   |   2 +-
 include/uapi/linux/bpf.h |  10 +++++
 kernel/bpf/arraymap.c    | 113 ++++++++++++++++++++++++++++++++++++++++++++---
 kernel/bpf/core.c        |  73 +++++++++++++++++++++++++++++-
 kernel/bpf/syscall.c     |  23 +++++++++-
 kernel/bpf/verifier.c    |  17 +++++++
 kernel/trace/bpf_trace.c |   2 +
 net/core/filter.c        |   2 +
 9 files changed, 255 insertions(+), 9 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index d5cda067115a..8821b9a8689e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -126,6 +126,27 @@ struct bpf_prog_aux {
 	struct work_struct work;
 };
 
+struct bpf_array {
+	struct bpf_map map;
+	u32 elem_size;
+	/* 'ownership' of prog_array is claimed by the first program that
+	 * is going to use this map or by the first program which FD is stored
+	 * in the map to make sure that all callers and callees have the same
+	 * prog_type and JITed flag
+	 */
+	enum bpf_prog_type owner_prog_type;
+	bool owner_jited;
+	union {
+		char value[0] __aligned(8);
+		struct bpf_prog *prog[0] __aligned(8);
+	};
+};
+#define MAX_TAIL_CALL_CNT 32
+
+u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5);
+void bpf_prog_array_map_clear(struct bpf_map *map);
+bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
+
 #ifdef CONFIG_BPF_SYSCALL
 void bpf_register_prog_type(struct bpf_prog_type_list *tl);
 void bpf_register_map_type(struct bpf_map_type_list *tl);
@@ -160,5 +181,6 @@ extern const struct bpf_func_proto bpf_map_delete_elem_proto;
 
 extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
 extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
+extern const struct bpf_func_proto bpf_tail_call_proto;
 
 #endif /* _LINUX_BPF_H */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 200be4a74a33..17724f6ea983 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -378,7 +378,7 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
 
 int sk_filter(struct sock *sk, struct sk_buff *skb);
 
-void bpf_prog_select_runtime(struct bpf_prog *fp);
+int bpf_prog_select_runtime(struct bpf_prog *fp);
 void bpf_prog_free(struct bpf_prog *fp);
 
 struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index a9ebdf5701e8..f0a9af8b4dae 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -113,6 +113,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_UNSPEC,
 	BPF_MAP_TYPE_HASH,
 	BPF_MAP_TYPE_ARRAY,
+	BPF_MAP_TYPE_PROG_ARRAY,
 };
 
 enum bpf_prog_type {
@@ -210,6 +211,15 @@ enum bpf_func_id {
 	 * Return: 0 on success
 	 */
 	BPF_FUNC_l4_csum_replace,
+
+	/**
+	 * bpf_tail_call(ctx, prog_array_map, index) - jump into another BPF program
+	 * @ctx: context pointer passed to next program
+	 * @prog_array_map: pointer to map which type is BPF_MAP_TYPE_PROG_ARRAY
+	 * @index: index inside array that selects specific program to run
+	 * Return: 0 on success
+	 */
+	BPF_FUNC_tail_call,
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 8a6616583f38..614bcd4c1d74 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -14,12 +14,7 @@
 #include <linux/vmalloc.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
-
-struct bpf_array {
-	struct bpf_map map;
-	u32 elem_size;
-	char value[0] __aligned(8);
-};
+#include <linux/filter.h>
 
 /* Called from syscall */
 static struct bpf_map *array_map_alloc(union bpf_attr *attr)
@@ -154,3 +149,109 @@ static int __init register_array_map(void)
 	return 0;
 }
 late_initcall(register_array_map);
+
+static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
+{
+	/* only bpf_prog file descriptors can be stored in prog_array map */
+	if (attr->value_size != sizeof(u32))
+		return ERR_PTR(-EINVAL);
+	return array_map_alloc(attr);
+}
+
+static void prog_array_map_free(struct bpf_map *map)
+{
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
+	int i;
+
+	synchronize_rcu();
+
+	/* make sure it's empty */
+	for (i = 0; i < array->map.max_entries; i++)
+		BUG_ON(array->prog[i] != NULL);
+	kvfree(array);
+}
+
+static void *prog_array_map_lookup_elem(struct bpf_map *map, void *key)
+{
+	return NULL;
+}
+
+/* only called from syscall */
+static int prog_array_map_update_elem(struct bpf_map *map, void *key,
+				      void *value, u64 map_flags)
+{
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
+	struct bpf_prog *prog, *old_prog;
+	u32 index = *(u32 *)key, ufd;
+
+	if (map_flags != BPF_ANY)
+		return -EINVAL;
+
+	if (index >= array->map.max_entries)
+		return -E2BIG;
+
+	ufd = *(u32 *)value;
+	prog = bpf_prog_get(ufd);
+	if (IS_ERR(prog))
+		return PTR_ERR(prog);
+
+	if (!bpf_prog_array_compatible(array, prog)) {
+		bpf_prog_put(prog);
+		return -EINVAL;
+	}
+
+	old_prog = xchg(array->prog + index, prog);
+	if (old_prog)
+		bpf_prog_put(old_prog);
+
+	return 0;
+}
+
+static int prog_array_map_delete_elem(struct bpf_map *map, void *key)
+{
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
+	struct bpf_prog *old_prog;
+	u32 index = *(u32 *)key;
+
+	if (index >= array->map.max_entries)
+		return -E2BIG;
+
+	old_prog = xchg(array->prog + index, NULL);
+	if (old_prog) {
+		bpf_prog_put(old_prog);
+		return 0;
+	} else {
+		return -ENOENT;
+	}
+}
+
+/* decrement refcnt of all bpf_progs that are stored in this map */
+void bpf_prog_array_map_clear(struct bpf_map *map)
+{
+	struct bpf_array *array = container_of(map, struct bpf_array, map);
+	int i;
+
+	for (i = 0; i < array->map.max_entries; i++)
+		prog_array_map_delete_elem(map, &i);
+}
+
+static const struct bpf_map_ops prog_array_ops = {
+	.map_alloc = prog_array_map_alloc,
+	.map_free = prog_array_map_free,
+	.map_get_next_key = array_map_get_next_key,
+	.map_lookup_elem = prog_array_map_lookup_elem,
+	.map_update_elem = prog_array_map_update_elem,
+	.map_delete_elem = prog_array_map_delete_elem,
+};
+
+static struct bpf_map_type_list prog_array_type __read_mostly = {
+	.ops = &prog_array_ops,
+	.type = BPF_MAP_TYPE_PROG_ARRAY,
+};
+
+static int __init register_prog_array_map(void)
+{
+	bpf_register_map_type(&prog_array_type);
+	return 0;
+}
+late_initcall(register_prog_array_map);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 54f0e7fcd0e2..d44b25cbe460 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -176,6 +176,15 @@ noinline u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
 	return 0;
 }
 
+const struct bpf_func_proto bpf_tail_call_proto = {
+	.func = NULL,
+	.gpl_only = false,
+	.ret_type = RET_VOID,
+	.arg1_type = ARG_PTR_TO_CTX,
+	.arg2_type = ARG_CONST_MAP_PTR,
+	.arg3_type = ARG_ANYTHING,
+};
+
 /**
  *	__bpf_prog_run - run eBPF program on a given context
  *	@ctx: is the data we are operating on
@@ -244,6 +253,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
 		[BPF_ALU64 | BPF_NEG] = &&ALU64_NEG,
 		/* Call instruction */
 		[BPF_JMP | BPF_CALL] = &&JMP_CALL,
+		[BPF_JMP | BPF_CALL | BPF_X] = &&JMP_TAIL_CALL,
 		/* Jumps */
 		[BPF_JMP | BPF_JA] = &&JMP_JA,
 		[BPF_JMP | BPF_JEQ | BPF_X] = &&JMP_JEQ_X,
@@ -286,6 +296,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
 		[BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B,
 		[BPF_LD | BPF_IMM | BPF_DW] = &&LD_IMM_DW,
 	};
+	u32 tail_call_cnt = 0;
 	void *ptr;
 	int off;
 
@@ -431,6 +442,30 @@ select_insn:
 						       BPF_R4, BPF_R5);
 		CONT;
 
+	JMP_TAIL_CALL: {
+		struct bpf_map *map = (struct bpf_map *) (unsigned long) BPF_R2;
+		struct bpf_array *array = container_of(map, struct bpf_array, map);
+		struct bpf_prog *prog;
+		u64 index = BPF_R3;
+
+		if (unlikely(index >= array->map.max_entries))
+			goto out;
+
+		if (unlikely(tail_call_cnt > MAX_TAIL_CALL_CNT))
+			goto out;
+
+		tail_call_cnt++;
+
+		prog = READ_ONCE(array->prog[index]);
+		if (unlikely(!prog))
+			goto out;
+
+		ARG1 = BPF_R1;
+		insn = prog->insnsi;
+		goto select_insn;
+out:
+		CONT;
+	}
 	/* JMP */
 	JMP_JA:
 		insn += insn->off;
@@ -619,6 +654,40 @@ void __weak bpf_int_jit_compile(struct bpf_prog *prog)
 {
 }
 
+bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp)
+{
+	if (array->owner_prog_type) {
+		if (array->owner_prog_type != fp->type)
+			return false;
+		if (array->owner_jited != fp->jited)
+			return false;
+	} else {
+		array->owner_prog_type = fp->type;
+		array->owner_jited = fp->jited;
+	}
+	return true;
+}
+
+static int check_tail_call(const struct bpf_prog *fp)
+{
+	struct bpf_prog_aux *aux = fp->aux;
+	int i;
+
+	for (i = 0; i < aux->used_map_cnt; i++) {
+		struct bpf_array *array;
+		struct bpf_map *map;
+
+		map = aux->used_maps[i];
+		if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
+			continue;
+		array = container_of(map, struct bpf_array, map);
+		if (!bpf_prog_array_compatible(array, fp))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
 /**
  *	bpf_prog_select_runtime - select execution runtime for BPF program
  *	@fp: bpf_prog populated with internal BPF program
@@ -626,7 +695,7 @@ void __weak bpf_int_jit_compile(struct bpf_prog *prog)
  * try to JIT internal BPF program, if JIT is not available select interpreter
  * BPF program will be executed via BPF_PROG_RUN() macro
  */
-void bpf_prog_select_runtime(struct bpf_prog *fp)
+int bpf_prog_select_runtime(struct bpf_prog *fp)
 {
 	fp->bpf_func = (void *) __bpf_prog_run;
 
@@ -634,6 +703,8 @@ void bpf_prog_select_runtime(struct bpf_prog *fp)
 	bpf_int_jit_compile(fp);
 	/* Lock whole bpf_prog as read-only */
 	bpf_prog_lock_ro(fp);
+
+	return check_tail_call(fp);
 }
 EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 3bae6c591914..98a69bd83069 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -68,6 +68,12 @@ static int bpf_map_release(struct inode *inode, struct file *filp)
 {
 	struct bpf_map *map = filp->private_data;
 
+	if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
+		/* prog_array stores refcnt-ed bpf_prog pointers
+		 * release them all when user space closes prog_array_fd
+		 */
+		bpf_prog_array_map_clear(map);
+
 	bpf_map_put(map);
 	return 0;
 }
@@ -392,6 +398,19 @@ static void fixup_bpf_calls(struct bpf_prog *prog)
 			 */
 			BUG_ON(!prog->aux->ops->get_func_proto);
 
+			if (insn->imm == BPF_FUNC_tail_call) {
+				/* mark bpf_tail_call as different opcode
+				 * to avoid conditional branch in
+				 * interpeter for every normal call
+				 * and to prevent accidental JITing by
+				 * JIT compiler that doesn't support
+				 * bpf_tail_call yet
+				 */
+				insn->imm = 0;
+				insn->code |= BPF_X;
+				continue;
+			}
+
 			fn = prog->aux->ops->get_func_proto(insn->imm);
 			/* all functions that have prototype and verifier allowed
 			 * programs to call them, must be real in-kernel functions
@@ -532,7 +551,9 @@ static int bpf_prog_load(union bpf_attr *attr)
 	fixup_bpf_calls(prog);
 
 	/* eBPF program is ready to be JITed */
-	bpf_prog_select_runtime(prog);
+	err = bpf_prog_select_runtime(prog);
+	if (err < 0)
+		goto free_used_maps;
 
 	err = anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC);
 	if (err < 0)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 47dcd3aa6e23..cfd9a40b9a5a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -907,6 +907,23 @@ static int check_call(struct verifier_env *env, int func_id)
 			fn->ret_type, func_id);
 		return -EINVAL;
 	}
+
+	if (map && map->map_type == BPF_MAP_TYPE_PROG_ARRAY &&
+	    func_id != BPF_FUNC_tail_call)
+		/* prog_array map type needs extra care:
+		 * only allow to pass it into bpf_tail_call() for now.
+		 * bpf_map_delete_elem() can be allowed in the future,
+		 * while bpf_map_update_elem() must only be done via syscall
+		 */
+		return -EINVAL;
+
+	if (func_id == BPF_FUNC_tail_call &&
+	    map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
+		/* don't allow any other map type to be passed into
+		 * bpf_tail_call()
+		 */
+		return -EINVAL;
+
 	return 0;
 }
 
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 2d56ce501632..646445e41bd4 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -172,6 +172,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
 		return &bpf_probe_read_proto;
 	case BPF_FUNC_ktime_get_ns:
 		return &bpf_ktime_get_ns_proto;
+	case BPF_FUNC_tail_call:
+		return &bpf_tail_call_proto;
 
 	case BPF_FUNC_trace_printk:
 		/*
diff --git a/net/core/filter.c b/net/core/filter.c
index 6805717be614..3adcca6f17a4 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1421,6 +1421,8 @@ sk_filter_func_proto(enum bpf_func_id func_id)
 		return &bpf_get_prandom_u32_proto;
 	case BPF_FUNC_get_smp_processor_id:
 		return &bpf_get_smp_processor_id_proto;
+	case BPF_FUNC_tail_call:
+		return &bpf_tail_call_proto;
 	default:
 		return NULL;
 	}
-- 
cgit 


From 2efd055c53c06b7e89c167c98069bab9afce7e59 Mon Sep 17 00:00:00 2001
From: Marcelo Ricardo Leitner <mleitner@redhat.com>
Date: Wed, 20 May 2015 16:35:41 -0700
Subject: tcp: add tcpi_segs_in and tcpi_segs_out to tcp_info

This patch tracks the total number of inbound and outbound segments on a
TCP socket. One may use this number to have an idea on connection
quality when compared against the retransmissions.

RFC4898 named these : tcpEStatsPerfSegsIn and tcpEStatsPerfSegsOut

These are a 32bit field each and can be fetched both from TCP_INFO
getsockopt() if one has a handle on a TCP socket, or from inet_diag
netlink facility (iproute2/ss patch will follow)

Note that tp->segs_out was placed near tp->snd_nxt for good data
locality and minimal performance impact, while tp->segs_in was placed
near tp->bytes_received for the same reason.

Join work with Eric Dumazet.

Note that received SYN are accounted on the listener, but sent SYNACK
are not accounted.

Signed-off-by: Marcelo Ricardo Leitner <mleitner@redhat.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      | 7 ++++++-
 include/uapi/linux/tcp.h | 4 +++-
 net/ipv4/tcp.c           | 2 ++
 net/ipv4/tcp_ipv4.c      | 1 +
 net/ipv4/tcp_minisocks.c | 1 +
 net/ipv4/tcp_output.c    | 1 +
 net/ipv6/tcp_ipv6.c      | 1 +
 7 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index e6fb5df22db1..f0212026c77f 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -149,11 +149,16 @@ struct tcp_sock {
 				 * sum(delta(rcv_nxt)), or how many bytes
 				 * were acked.
 				 */
+	u32	segs_in;	/* RFC4898 tcpEStatsPerfSegsIn
+				 * total number of segments in.
+				 */
  	u32	rcv_nxt;	/* What we want to receive next 	*/
 	u32	copied_seq;	/* Head of yet unread data		*/
 	u32	rcv_wup;	/* rcv_nxt on last window update sent	*/
  	u32	snd_nxt;	/* Next sequence we send		*/
-
+	u32	segs_out;	/* RFC4898 tcpEStatsPerfSegsOut
+				 * The total number of segments sent.
+				 */
 	u64	bytes_acked;	/* RFC4898 tcpEStatsAppHCThruOctetsAcked
 				 * sum(delta(snd_una)), or how many bytes
 				 * were acked.
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 51ebedba577f..65a77b071e22 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -192,8 +192,10 @@ struct tcp_info {
 
 	__u64	tcpi_pacing_rate;
 	__u64	tcpi_max_pacing_rate;
-	__u64	tcpi_bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked */
+	__u64	tcpi_bytes_acked;    /* RFC4898 tcpEStatsAppHCThruOctetsAcked */
 	__u64	tcpi_bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived */
+	__u32	tcpi_segs_out;	     /* RFC4898 tcpEStatsPerfSegsOut */
+	__u32	tcpi_segs_in;	     /* RFC4898 tcpEStatsPerfSegsIn */
 };
 
 /* for TCP_MD5SIG socket option */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0a3f9a00565b..7f3e721b9e69 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2695,6 +2695,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	spin_lock_bh(&sk->sk_lock.slock);
 	info->tcpi_bytes_acked = tp->bytes_acked;
 	info->tcpi_bytes_received = tp->bytes_received;
+	info->tcpi_segs_out = tp->segs_out;
+	info->tcpi_segs_in = tp->segs_in;
 	spin_unlock_bh(&sk->sk_lock.slock);
 }
 EXPORT_SYMBOL_GPL(tcp_get_info);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0cc4b5a630cd..feb875769b8d 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1626,6 +1626,7 @@ process:
 	skb->dev = NULL;
 
 	bh_lock_sock_nested(sk);
+	tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
 	ret = 0;
 	if (!sock_owned_by_user(sk)) {
 		if (!tcp_prequeue(sk, skb))
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index ebe2ab2596ed..b62d15c86946 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -448,6 +448,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 
 		newtp->rcv_wup = newtp->copied_seq =
 		newtp->rcv_nxt = treq->rcv_isn + 1;
+		newtp->segs_in = 0;
 
 		newtp->snd_sml = newtp->snd_una =
 		newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3aebe0157dfa..534e5fdb04c1 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1027,6 +1027,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 		TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
 			      tcp_skb_pcount(skb));
 
+	tp->segs_out += tcp_skb_pcount(skb);
 	/* OK, its time to fill skb_shinfo(skb)->gso_segs */
 	skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb);
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b6575d665568..beac6bf840b9 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1421,6 +1421,7 @@ process:
 	skb->dev = NULL;
 
 	bh_lock_sock_nested(sk);
+	tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs);
 	ret = 0;
 	if (!sock_owned_by_user(sk)) {
 		if (!tcp_prequeue(sk, skb))
-- 
cgit 


From bd5850d39f10f9d216bff69bcbf5938680b862ae Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Thu, 21 May 2015 02:26:24 +0200
Subject: net: sched: pkt_cls: remove unused macros from uapi

Jamal points out that this header also contains kernel internal magic that
cannot be used from userspace for anything meaningful.

Lets remove what the kernel doesn't use anymore and wrap remainder with
__KERNEL__.

Suggested-by: Jamal Hadi Salim <jhs@mojatatu.com>
Suggested-by: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_cls.h | 32 +++++---------------------------
 1 file changed, 5 insertions(+), 27 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 39fb53d67b11..4f0d1bc3647d 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -4,6 +4,7 @@
 #include <linux/types.h>
 #include <linux/pkt_sched.h>
 
+#ifdef __KERNEL__
 /* I think i could have done better macros ; for now this is stolen from
  * some arch/mips code - jhs
 */
@@ -35,23 +36,6 @@ bits 9,10,11: redirect counter -  redirect TTL. Loop avoidance
  *
  * */
 
-#ifndef __KERNEL__
-/* backwards compat for userspace only */
-#define TC_MUNGED          _TC_MAKEMASK1(0)
-#define SET_TC_MUNGED(v)   ( TC_MUNGED | (v & ~TC_MUNGED))
-#define CLR_TC_MUNGED(v)   ( v & ~TC_MUNGED)
-
-#define TC_OK2MUNGE        _TC_MAKEMASK1(1)
-#define SET_TC_OK2MUNGE(v)   ( TC_OK2MUNGE | (v & ~TC_OK2MUNGE))
-#define CLR_TC_OK2MUNGE(v)   ( v & ~TC_OK2MUNGE)
-
-#define S_TC_VERD          _TC_MAKE32(2)
-#define M_TC_VERD          _TC_MAKEMASK(4,S_TC_VERD)
-#define G_TC_VERD(x)       _TC_GETVALUE(x,S_TC_VERD,M_TC_VERD)
-#define V_TC_VERD(x)       _TC_MAKEVALUE(x,S_TC_VERD)
-#define SET_TC_VERD(v,n)   ((V_TC_VERD(n)) | (v & ~M_TC_VERD))
-#endif
-
 #define S_TC_FROM          _TC_MAKE32(6)
 #define M_TC_FROM          _TC_MAKEMASK(2,S_TC_FROM)
 #define G_TC_FROM(x)       _TC_GETVALUE(x,S_TC_FROM,M_TC_FROM)
@@ -65,20 +49,16 @@ bits 9,10,11: redirect counter -  redirect TTL. Loop avoidance
 #define SET_TC_NCLS(v)   ( TC_NCLS | (v & ~TC_NCLS))
 #define CLR_TC_NCLS(v)   ( v & ~TC_NCLS)
 
-#ifndef __KERNEL__
-#define S_TC_RTTL          _TC_MAKE32(9)
-#define M_TC_RTTL          _TC_MAKEMASK(3,S_TC_RTTL)
-#define G_TC_RTTL(x)       _TC_GETVALUE(x,S_TC_RTTL,M_TC_RTTL)
-#define V_TC_RTTL(x)       _TC_MAKEVALUE(x,S_TC_RTTL)
-#define SET_TC_RTTL(v,n)   ((V_TC_RTTL(n)) | (v & ~M_TC_RTTL))
-#endif
-
 #define S_TC_AT          _TC_MAKE32(12)
 #define M_TC_AT          _TC_MAKEMASK(2,S_TC_AT)
 #define G_TC_AT(x)       _TC_GETVALUE(x,S_TC_AT,M_TC_AT)
 #define V_TC_AT(x)       _TC_MAKEVALUE(x,S_TC_AT)
 #define SET_TC_AT(v,n)   ((V_TC_AT(n)) | (v & ~M_TC_AT))
 
+#define MAX_REC_LOOP 4
+#define MAX_RED_LOOP 4
+#endif
+
 /* Action attributes */
 enum {
 	TCA_ACT_UNSPEC,
@@ -98,8 +78,6 @@ enum {
 #define TCA_ACT_NOUNBIND	0
 #define TCA_ACT_REPLACE		1
 #define TCA_ACT_NOREPLACE	0
-#define MAX_REC_LOOP 4
-#define MAX_RED_LOOP 4
 
 #define TC_ACT_UNSPEC	(-1)
 #define TC_ACT_OK		0
-- 
cgit 


From 1ce87f21edf6a071a7cc6bc77d628d7c7650d0d8 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Fri, 22 May 2015 18:17:38 +0200
Subject: iio: Add I/Q modifiers

I/Q modifiers can be used to denote signals which are represented by a
in-phase and a quadrature component.

The ABI documentation describes the I and Q modifiers for current and
voltage channels for now as those will be the most likely users.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 Documentation/ABI/testing/sysfs-bus-iio | 46 +++++++++++++++++++++++++++++++++
 drivers/iio/industrialio-core.c         |  2 ++
 include/uapi/linux/iio/types.h          |  2 ++
 3 files changed, 50 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/Documentation/ABI/testing/sysfs-bus-iio b/Documentation/ABI/testing/sysfs-bus-iio
index f66262c64e2f..bbed111c31b4 100644
--- a/Documentation/ABI/testing/sysfs-bus-iio
+++ b/Documentation/ABI/testing/sysfs-bus-iio
@@ -71,6 +71,8 @@ Description:
 
 What:		/sys/bus/iio/devices/iio:deviceX/in_voltageY_raw
 What:		/sys/bus/iio/devices/iio:deviceX/in_voltageY_supply_raw
+What:		/sys/bus/iio/devices/iio:deviceX/in_voltageY_i_raw
+What:		/sys/bus/iio/devices/iio:deviceX/in_voltageY_q_raw
 KernelVersion:	2.6.35
 Contact:	linux-iio@vger.kernel.org
 Description:
@@ -81,6 +83,11 @@ Description:
 		unique to allow association with event codes. Units after
 		application of scale and offset are millivolts.
 
+		Channels with 'i' and 'q' modifiers always exist in pairs and both
+		channels refer to the same signal. The 'i' channel contains the in-phase
+		component of the signal while the 'q' channel contains the quadrature
+		component.
+
 What:		/sys/bus/iio/devices/iio:deviceX/in_voltageY-voltageZ_raw
 KernelVersion:	2.6.35
 Contact:	linux-iio@vger.kernel.org
@@ -246,8 +253,16 @@ What:		/sys/bus/iio/devices/iio:deviceX/in_accel_y_offset
 What:		/sys/bus/iio/devices/iio:deviceX/in_accel_z_offset
 What:		/sys/bus/iio/devices/iio:deviceX/in_voltageY_offset
 What:		/sys/bus/iio/devices/iio:deviceX/in_voltage_offset
+What:		/sys/bus/iio/devices/iio:deviceX/in_voltageY_i_offset
+What:		/sys/bus/iio/devices/iio:deviceX/in_voltageY_q_offset
+What:		/sys/bus/iio/devices/iio:deviceX/in_voltage_q_offset
+What:		/sys/bus/iio/devices/iio:deviceX/in_voltage_i_offset
 What:		/sys/bus/iio/devices/iio:deviceX/in_currentY_offset
 What:		/sys/bus/iio/devices/iio:deviceX/in_current_offset
+What:		/sys/bus/iio/devices/iio:deviceX/in_currentY_i_offset
+What:		/sys/bus/iio/devices/iio:deviceX/in_currentY_q_offset
+What:		/sys/bus/iio/devices/iio:deviceX/in_current_q_offset
+What:		/sys/bus/iio/devices/iio:deviceX/in_current_i_offset
 What:		/sys/bus/iio/devices/iio:deviceX/in_tempY_offset
 What:		/sys/bus/iio/devices/iio:deviceX/in_temp_offset
 What:		/sys/bus/iio/devices/iio:deviceX/in_pressureY_offset
@@ -273,14 +288,22 @@ Description:
 		to the _raw output.
 
 What:		/sys/bus/iio/devices/iio:deviceX/in_voltageY_scale
+What:		/sys/bus/iio/devices/iio:deviceX/in_voltageY_i_scale
+What:		/sys/bus/iio/devices/iio:deviceX/in_voltageY_q_scale
 What:		/sys/bus/iio/devices/iio:deviceX/in_voltageY_supply_scale
 What:		/sys/bus/iio/devices/iio:deviceX/in_voltage_scale
+What:		/sys/bus/iio/devices/iio:deviceX/in_voltage_i_scale
+What:		/sys/bus/iio/devices/iio:deviceX/in_voltage_q_scale
 What:		/sys/bus/iio/devices/iio:deviceX/in_voltage-voltage_scale
 What:		/sys/bus/iio/devices/iio:deviceX/out_voltageY_scale
 What:		/sys/bus/iio/devices/iio:deviceX/out_altvoltageY_scale
 What:		/sys/bus/iio/devices/iio:deviceX/in_currentY_scale
 What:		/sys/bus/iio/devices/iio:deviceX/in_currentY_supply_scale
 What:		/sys/bus/iio/devices/iio:deviceX/in_current_scale
+What:		/sys/bus/iio/devices/iio:deviceX/in_currentY_i_scale
+What:		/sys/bus/iio/devices/iio:deviceX/in_currentY_q_scale
+What:		/sys/bus/iio/devices/iio:deviceX/in_current_i_scale
+What:		/sys/bus/iio/devices/iio:deviceX/in_current_q_scale
 What:		/sys/bus/iio/devices/iio:deviceX/in_accel_scale
 What:		/sys/bus/iio/devices/iio:deviceX/in_accel_peak_scale
 What:		/sys/bus/iio/devices/iio:deviceX/in_anglvel_scale
@@ -328,6 +351,10 @@ Description:
 
 What		/sys/bus/iio/devices/iio:deviceX/in_voltageY_calibscale
 What		/sys/bus/iio/devices/iio:deviceX/in_voltageY_supply_calibscale
+What		/sys/bus/iio/devices/iio:deviceX/in_voltageY_i_calibscale
+What		/sys/bus/iio/devices/iio:deviceX/in_voltageY_q_calibscale
+What		/sys/bus/iio/devices/iio:deviceX/in_voltage_i_calibscale
+What		/sys/bus/iio/devices/iio:deviceX/in_voltage_q_calibscale
 What		/sys/bus/iio/devices/iio:deviceX/in_voltage_calibscale
 What		/sys/bus/iio/devices/iio:deviceX/in_accel_x_calibscale
 What		/sys/bus/iio/devices/iio:deviceX/in_accel_y_calibscale
@@ -1046,6 +1073,10 @@ What:		/sys/.../iio:deviceX/scan_elements/in_timestamp_en
 What:		/sys/.../iio:deviceX/scan_elements/in_voltageY_supply_en
 What:		/sys/.../iio:deviceX/scan_elements/in_voltageY_en
 What:		/sys/.../iio:deviceX/scan_elements/in_voltageY-voltageZ_en
+What:		/sys/.../iio:deviceX/scan_elements/in_voltageY_i_en
+What:		/sys/.../iio:deviceX/scan_elements/in_voltageY_q_en
+What:		/sys/.../iio:deviceX/scan_elements/in_voltage_i_en
+What:		/sys/.../iio:deviceX/scan_elements/in_voltage_q_en
 What:		/sys/.../iio:deviceX/scan_elements/in_incli_x_en
 What:		/sys/.../iio:deviceX/scan_elements/in_incli_y_en
 What:		/sys/.../iio:deviceX/scan_elements/in_pressureY_en
@@ -1064,6 +1095,10 @@ What:		/sys/.../iio:deviceX/scan_elements/in_incli_type
 What:		/sys/.../iio:deviceX/scan_elements/in_voltageY_type
 What:		/sys/.../iio:deviceX/scan_elements/in_voltage_type
 What:		/sys/.../iio:deviceX/scan_elements/in_voltageY_supply_type
+What:		/sys/.../iio:deviceX/scan_elements/in_voltageY_i_type
+What:		/sys/.../iio:deviceX/scan_elements/in_voltageY_q_type
+What:		/sys/.../iio:deviceX/scan_elements/in_voltage_i_type
+What:		/sys/.../iio:deviceX/scan_elements/in_voltage_q_type
 What:		/sys/.../iio:deviceX/scan_elements/in_timestamp_type
 What:		/sys/.../iio:deviceX/scan_elements/in_pressureY_type
 What:		/sys/.../iio:deviceX/scan_elements/in_pressure_type
@@ -1101,6 +1136,10 @@ Description:
 
 What:		/sys/.../iio:deviceX/scan_elements/in_voltageY_index
 What:		/sys/.../iio:deviceX/scan_elements/in_voltageY_supply_index
+What:		/sys/.../iio:deviceX/scan_elements/in_voltageY_i_index
+What:		/sys/.../iio:deviceX/scan_elements/in_voltageY_q_index
+What:		/sys/.../iio:deviceX/scan_elements/in_voltage_i_index
+What:		/sys/.../iio:deviceX/scan_elements/in_voltage_q_index
 What:		/sys/.../iio:deviceX/scan_elements/in_accel_x_index
 What:		/sys/.../iio:deviceX/scan_elements/in_accel_y_index
 What:		/sys/.../iio:deviceX/scan_elements/in_accel_z_index
@@ -1260,6 +1299,8 @@ Description:
 		or without compensation from tilt sensors.
 
 What:		/sys/bus/iio/devices/iio:deviceX/in_currentX_raw
+What:		/sys/bus/iio/devices/iio:deviceX/in_currentX_i_raw
+What:		/sys/bus/iio/devices/iio:deviceX/in_currentX_q_raw
 KernelVersion:	3.18
 Contact:	linux-iio@vger.kernel.org
 Description:
@@ -1268,6 +1309,11 @@ Description:
 		present, output should be considered as processed with the
 		unit in milliamps.
 
+		Channels with 'i' and 'q' modifiers always exist in pairs and both
+		channels refer to the same signal. The 'i' channel contains the in-phase
+		component of the signal while the 'q' channel contains the quadrature
+		component.
+
 What:		/sys/.../iio:deviceX/in_energy_en
 What:		/sys/.../iio:deviceX/in_distance_en
 What:		/sys/.../iio:deviceX/in_velocity_sqrt(x^2+y^2+z^2)_en
diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
index 9688a88b6198..3524b0de8721 100644
--- a/drivers/iio/industrialio-core.c
+++ b/drivers/iio/industrialio-core.c
@@ -101,6 +101,8 @@ static const char * const iio_modifier_names[] = {
 	[IIO_MOD_WALKING] = "walking",
 	[IIO_MOD_STILL] = "still",
 	[IIO_MOD_ROOT_SUM_SQUARED_X_Y_Z] = "sqrt(x^2+y^2+z^2)",
+	[IIO_MOD_I] = "i",
+	[IIO_MOD_Q] = "q",
 };
 
 /* relies on pairs of these shared then separate */
diff --git a/include/uapi/linux/iio/types.h b/include/uapi/linux/iio/types.h
index 5c4601935005..2f8b11722204 100644
--- a/include/uapi/linux/iio/types.h
+++ b/include/uapi/linux/iio/types.h
@@ -70,6 +70,8 @@ enum iio_modifier {
 	IIO_MOD_WALKING,
 	IIO_MOD_STILL,
 	IIO_MOD_ROOT_SUM_SQUARED_X_Y_Z,
+	IIO_MOD_I,
+	IIO_MOD_Q,
 };
 
 enum iio_event_type {
-- 
cgit 


From 7df20f2d893db42eaa1ea1e30a2573c971ec9238 Mon Sep 17 00:00:00 2001
From: Sudeep Dutt <sudeep.dutt@intel.com>
Date: Wed, 29 Apr 2015 05:32:28 -0700
Subject: misc: mic: SCIF header file and IOCTL interface

This patch introduces the SCIF documentation in the header file
and describes the IOCTL interface for user mode. mic_overview.txt
is updated with documentation on SCIF and a new document
describing SCIF in more details is available in scif_overview.txt.

Reviewed-by: Nikhil Rao <nikhil.rao@intel.com>
Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Sudeep Dutt <sudeep.dutt@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/mic/mic_overview.txt  |  28 +-
 Documentation/mic/scif_overview.txt |  98 ++++
 include/linux/scif.h                | 993 ++++++++++++++++++++++++++++++++++++
 include/uapi/linux/Kbuild           |   1 +
 include/uapi/linux/scif_ioctl.h     | 130 +++++
 5 files changed, 1238 insertions(+), 12 deletions(-)
 create mode 100644 Documentation/mic/scif_overview.txt
 create mode 100644 include/linux/scif.h
 create mode 100644 include/uapi/linux/scif_ioctl.h

(limited to 'include/uapi/linux')

diff --git a/Documentation/mic/mic_overview.txt b/Documentation/mic/mic_overview.txt
index 77c541802ad9..1a2f2c8ec59e 100644
--- a/Documentation/mic/mic_overview.txt
+++ b/Documentation/mic/mic_overview.txt
@@ -24,6 +24,10 @@ a virtual bus called mic bus is created and virtual dma devices are
 created on it by the host/card drivers. On host the channels are private
 and used only by the host driver to transfer data for the virtio devices.
 
+The Symmetric Communication Interface (SCIF (pronounced as skiff)) is a
+low level communications API across PCIe currently implemented for MIC.
+More details are available at scif_overview.txt.
+
 Here is a block diagram of the various components described above. The
 virtio backends are situated on the host rather than the card given better
 single threaded performance for the host compared to MIC, the ability of
@@ -47,18 +51,18 @@ the fact that the virtio block storage backend can only be on the host.
                       |               |       | Virtio over PCIe IOCTLs  |
                       |               |       +--------------------------+
 +-----------+         |               |                   |  +-----------+
-| MIC DMA   |         |               |                   |  | MIC DMA   |
-| Driver    |         |               |                   |  | Driver    |
-+-----------+         |               |                   |  +-----------+
-      |               |               |                   |        |
-+---------------+     |               |                   |  +----------------+
-|MIC virtual Bus|     |               |                   |  |MIC virtual Bus |
-+---------------+     |               |                   |  +----------------+
-      |               |               |                   |              |
-      |   +--------------+            |            +---------------+     |
-      |   |Intel MIC     |            |            |Intel MIC      |     |
-      +---|Card Driver   |            |            |Host Driver    |     |
-          +--------------+            |            +---------------+-----+
+| MIC DMA   |         |  +----------+ | +-----------+     |  | MIC DMA   |
+| Driver    |         |  |  SCIF    | | |   SCIF    |     |  | Driver    |
++-----------+         |  +----------+ | +-----------+     |  +-----------+
+      |               |       |       |       |           |        |
++---------------+     | +-----+-----+ | +-----+-----+     | +---------------+
+|MIC virtual Bus|     | |SCIF HW Bus| | |SCIF HW BUS|     | |MIC virtual Bus|
++---------------+     | +-----------+ | +-----+-----+     | +---------------+
+      |               |       |       |       |           |              |
+      |   +--------------+    |       |       |    +---------------+     |
+      |   |Intel MIC     |    |       |       |    |Intel MIC      |     |
+      +---|Card Driver   +----+       |       |    |Host Driver    |     |
+          +--------------+            |       +----+---------------+-----+
                       |               |                   |
              +-------------------------------------------------------------+
              |                                                             |
diff --git a/Documentation/mic/scif_overview.txt b/Documentation/mic/scif_overview.txt
new file mode 100644
index 000000000000..0a280d986731
--- /dev/null
+++ b/Documentation/mic/scif_overview.txt
@@ -0,0 +1,98 @@
+The Symmetric Communication Interface (SCIF (pronounced as skiff)) is a low
+level communications API across PCIe currently implemented for MIC. Currently
+SCIF provides inter-node communication within a single host platform, where a
+node is a MIC Coprocessor or Xeon based host. SCIF abstracts the details of
+communicating over the PCIe bus while providing an API that is symmetric
+across all the nodes in the PCIe network. An important design objective for SCIF
+is to deliver the maximum possible performance given the communication
+abilities of the hardware. SCIF has been used to implement an offload compiler
+runtime and OFED support for MPI implementations for MIC coprocessors.
+
+==== SCIF API Components ====
+The SCIF API has the following parts:
+1. Connection establishment using a client server model
+2. Byte stream messaging intended for short messages
+3. Node enumeration to determine online nodes
+4. Poll semantics for detection of incoming connections and messages
+5. Memory registration to pin down pages
+6. Remote memory mapping for low latency CPU accesses via mmap
+7. Remote DMA (RDMA) for high bandwidth DMA transfers
+8. Fence APIs for RDMA synchronization
+
+SCIF exposes the notion of a connection which can be used by peer processes on
+nodes in a SCIF PCIe "network" to share memory "windows" and to communicate. A
+process in a SCIF node initiates a SCIF connection to a peer process on a
+different node via a SCIF "endpoint". SCIF endpoints support messaging APIs
+which are similar to connection oriented socket APIs. Connected SCIF endpoints
+can also register local memory which is followed by data transfer using either
+DMA, CPU copies or remote memory mapping via mmap. SCIF supports both user and
+kernel mode clients which are functionally equivalent.
+
+==== SCIF Performance for MIC ====
+DMA bandwidth comparison between the TCP (over ethernet over PCIe) stack versus
+SCIF shows the performance advantages of SCIF for HPC applications and runtimes.
+
+             Comparison of TCP and SCIF based BW
+
+  Throughput (GB/sec)
+    8 +                                             PCIe Bandwidth ******
+      +                                                        TCP ######
+    7 +    **************************************             SCIF %%%%%%
+      |                       %%%%%%%%%%%%%%%%%%%
+    6 +                   %%%%
+      |                 %%
+      |               %%%
+    5 +              %%
+      |            %%
+    4 +           %%
+      |          %%
+    3 +         %%
+      |        %
+    2 +      %%
+      |     %%
+      |    %
+    1 +
+      +    ######################################
+    0 +++---+++--+--+-+--+--+-++-+--+-++-+--+-++-+-
+      1       10     100      1000   10000   100000
+                   Transfer Size (KBytes)
+
+SCIF allows memory sharing via mmap(..) between processes on different PCIe
+nodes and thus provides bare-metal PCIe latency. The round trip SCIF mmap
+latency from the host to an x100 MIC for an 8 byte message is 0.44 usecs.
+
+SCIF has a user space library which is a thin IOCTL wrapper providing a user
+space API similar to the kernel API in scif.h. The SCIF user space library
+is distributed @ https://software.intel.com/en-us/mic-developer
+
+Here is some pseudo code for an example of how two applications on two PCIe
+nodes would typically use the SCIF API:
+
+Process A (on node A)			Process B (on node B)
+
+/* get online node information */
+scif_get_node_ids(..)			scif_get_node_ids(..)
+scif_open(..)				scif_open(..)
+scif_bind(..)				scif_bind(..)
+scif_listen(..)
+scif_accept(..)				scif_connect(..)
+/* SCIF connection established */
+
+/* Send and receive short messages */
+scif_send(..)/scif_recv(..)		scif_send(..)/scif_recv(..)
+
+/* Register memory */
+scif_register(..)			scif_register(..)
+
+/* RDMA */
+scif_readfrom(..)/scif_writeto(..)	scif_readfrom(..)/scif_writeto(..)
+
+/* Fence DMAs */
+scif_fence_signal(..)			scif_fence_signal(..)
+
+mmap(..)				mmap(..)
+
+/* Access remote registered memory */
+
+/* Close the endpoints */
+scif_close(..)				scif_close(..)
diff --git a/include/linux/scif.h b/include/linux/scif.h
new file mode 100644
index 000000000000..44f4f3898bbe
--- /dev/null
+++ b/include/linux/scif.h
@@ -0,0 +1,993 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Intel SCIF driver.
+ *
+ */
+#ifndef __SCIF_H__
+#define __SCIF_H__
+
+#include <linux/types.h>
+#include <linux/poll.h>
+#include <linux/scif_ioctl.h>
+
+#define SCIF_ACCEPT_SYNC	1
+#define SCIF_SEND_BLOCK		1
+#define SCIF_RECV_BLOCK		1
+
+enum {
+	SCIF_PROT_READ = (1 << 0),
+	SCIF_PROT_WRITE = (1 << 1)
+};
+
+enum {
+	SCIF_MAP_FIXED = 0x10,
+	SCIF_MAP_KERNEL	= 0x20,
+};
+
+enum {
+	SCIF_FENCE_INIT_SELF = (1 << 0),
+	SCIF_FENCE_INIT_PEER = (1 << 1),
+	SCIF_SIGNAL_LOCAL = (1 << 4),
+	SCIF_SIGNAL_REMOTE = (1 << 5)
+};
+
+enum {
+	SCIF_RMA_USECPU = (1 << 0),
+	SCIF_RMA_USECACHE = (1 << 1),
+	SCIF_RMA_SYNC = (1 << 2),
+	SCIF_RMA_ORDERED = (1 << 3)
+};
+
+/* End of SCIF Admin Reserved Ports */
+#define SCIF_ADMIN_PORT_END	1024
+
+/* End of SCIF Reserved Ports */
+#define SCIF_PORT_RSVD		1088
+
+typedef struct scif_endpt *scif_epd_t;
+
+#define SCIF_OPEN_FAILED ((scif_epd_t)-1)
+#define SCIF_REGISTER_FAILED ((off_t)-1)
+#define SCIF_MMAP_FAILED ((void *)-1)
+
+/**
+ * scif_open() - Create an endpoint
+ *
+ * Return:
+ * Upon successful completion, scif_open() returns an endpoint descriptor to
+ * be used in subsequent SCIF functions calls to refer to that endpoint;
+ * otherwise in user mode SCIF_OPEN_FAILED (that is ((scif_epd_t)-1)) is
+ * returned and errno is set to indicate the error; in kernel mode a NULL
+ * scif_epd_t is returned.
+ *
+ * Errors:
+ * ENOMEM - Insufficient kernel memory was available
+ */
+scif_epd_t scif_open(void);
+
+/**
+ * scif_bind() - Bind an endpoint to a port
+ * @epd:	endpoint descriptor
+ * @pn:		port number
+ *
+ * scif_bind() binds endpoint epd to port pn, where pn is a port number on the
+ * local node. If pn is zero, a port number greater than or equal to
+ * SCIF_PORT_RSVD is assigned and returned. Each endpoint may be bound to
+ * exactly one local port. Ports less than 1024 when requested can only be bound
+ * by system (or root) processes or by processes executed by privileged users.
+ *
+ * Return:
+ * Upon successful completion, scif_bind() returns the port number to which epd
+ * is bound; otherwise in user mode -1 is returned and errno is set to
+ * indicate the error; in kernel mode the negative of one of the following
+ * errors is returned.
+ *
+ * Errors:
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * EINVAL - the endpoint or the port is already bound
+ * EISCONN - The endpoint is already connected
+ * ENOSPC - No port number available for assignment
+ * EACCES - The port requested is protected and the user is not the superuser
+ */
+int scif_bind(scif_epd_t epd, u16 pn);
+
+/**
+ * scif_listen() - Listen for connections on an endpoint
+ * @epd:	endpoint descriptor
+ * @backlog:	maximum pending connection requests
+ *
+ * scif_listen() marks the endpoint epd as a listening endpoint - that is, as
+ * an endpoint that will be used to accept incoming connection requests. Once
+ * so marked, the endpoint is said to be in the listening state and may not be
+ * used as the endpoint of a connection.
+ *
+ * The endpoint, epd, must have been bound to a port.
+ *
+ * The backlog argument defines the maximum length to which the queue of
+ * pending connections for epd may grow. If a connection request arrives when
+ * the queue is full, the client may receive an error with an indication that
+ * the connection was refused.
+ *
+ * Return:
+ * Upon successful completion, scif_listen() returns 0; otherwise in user mode
+ * -1 is returned and errno is set to indicate the error; in kernel mode the
+ * negative of one of the following errors is returned.
+ *
+ * Errors:
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * EINVAL - the endpoint is not bound to a port
+ * EISCONN - The endpoint is already connected or listening
+ */
+int scif_listen(scif_epd_t epd, int backlog);
+
+/**
+ * scif_connect() - Initiate a connection on a port
+ * @epd:	endpoint descriptor
+ * @dst:	global id of port to which to connect
+ *
+ * The scif_connect() function requests the connection of endpoint epd to remote
+ * port dst. If the connection is successful, a peer endpoint, bound to dst, is
+ * created on node dst.node. On successful return, the connection is complete.
+ *
+ * If the endpoint epd has not already been bound to a port, scif_connect()
+ * will bind it to an unused local port.
+ *
+ * A connection is terminated when an endpoint of the connection is closed,
+ * either explicitly by scif_close(), or when a process that owns one of the
+ * endpoints of the connection is terminated.
+ *
+ * In user space, scif_connect() supports an asynchronous connection mode
+ * if the application has set the O_NONBLOCK flag on the endpoint via the
+ * fcntl() system call. Setting this flag will result in the calling process
+ * not to wait during scif_connect().
+ *
+ * Return:
+ * Upon successful completion, scif_connect() returns the port ID to which the
+ * endpoint, epd, is bound; otherwise in user mode -1 is returned and errno is
+ * set to indicate the error; in kernel mode the negative of one of the
+ * following errors is returned.
+ *
+ * Errors:
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNREFUSED - The destination was not listening for connections or refused
+ * the connection request
+ * EINVAL - dst.port is not a valid port ID
+ * EISCONN - The endpoint is already connected
+ * ENOMEM - No buffer space is available
+ * ENODEV - The destination node does not exist, or the node is lost or existed,
+ * but is not currently in the network since it may have crashed
+ * ENOSPC - No port number available for assignment
+ * EOPNOTSUPP - The endpoint is listening and cannot be connected
+ */
+int scif_connect(scif_epd_t epd, struct scif_port_id *dst);
+
+/**
+ * scif_accept() - Accept a connection on an endpoint
+ * @epd:	endpoint descriptor
+ * @peer:	global id of port to which connected
+ * @newepd:	new connected endpoint descriptor
+ * @flags:	flags
+ *
+ * The scif_accept() call extracts the first connection request from the queue
+ * of pending connections for the port on which epd is listening. scif_accept()
+ * creates a new endpoint, bound to the same port as epd, and allocates a new
+ * SCIF endpoint descriptor, returned in newepd, for the endpoint. The new
+ * endpoint is connected to the endpoint through which the connection was
+ * requested. epd is unaffected by this call, and remains in the listening
+ * state.
+ *
+ * On successful return, peer holds the global port identifier (node id and
+ * local port number) of the port which requested the connection.
+ *
+ * A connection is terminated when an endpoint of the connection is closed,
+ * either explicitly by scif_close(), or when a process that owns one of the
+ * endpoints of the connection is terminated.
+ *
+ * The number of connections that can (subsequently) be accepted on epd is only
+ * limited by system resources (memory).
+ *
+ * The flags argument is formed by OR'ing together zero or more of the
+ * following values.
+ * SCIF_ACCEPT_SYNC - block until a connection request is presented. If
+ *			SCIF_ACCEPT_SYNC is not in flags, and no pending
+ *			connections are present on the queue, scif_accept()
+ *			fails with an EAGAIN error
+ *
+ * In user mode, the select() and poll() functions can be used to determine
+ * when there is a connection request. In kernel mode, the scif_poll()
+ * function may be used for this purpose. A readable event will be delivered
+ * when a connection is requested.
+ *
+ * Return:
+ * Upon successful completion, scif_accept() returns 0; otherwise in user mode
+ * -1 is returned and errno is set to indicate the error; in kernel mode the
+ *	negative of one of the following errors is returned.
+ *
+ * Errors:
+ * EAGAIN - SCIF_ACCEPT_SYNC is not set and no connections are present to be
+ * accepted or SCIF_ACCEPT_SYNC is not set and remote node failed to complete
+ * its connection request
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * EINTR - Interrupted function
+ * EINVAL - epd is not a listening endpoint, or flags is invalid, or peer is
+ * NULL, or newepd is NULL
+ * ENODEV - The requesting node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOMEM - Not enough space
+ * ENOENT - Secondary part of epd registration failed
+ */
+int scif_accept(scif_epd_t epd, struct scif_port_id *peer, scif_epd_t
+		*newepd, int flags);
+
+/**
+ * scif_close() - Close an endpoint
+ * @epd:	endpoint descriptor
+ *
+ * scif_close() closes an endpoint and performs necessary teardown of
+ * facilities associated with that endpoint.
+ *
+ * If epd is a listening endpoint then it will no longer accept connection
+ * requests on the port to which it is bound. Any pending connection requests
+ * are rejected.
+ *
+ * If epd is a connected endpoint, then its peer endpoint is also closed. RMAs
+ * which are in-process through epd or its peer endpoint will complete before
+ * scif_close() returns. Registered windows of the local and peer endpoints are
+ * released as if scif_unregister() was called against each window.
+ *
+ * Closing a SCIF endpoint does not affect local registered memory mapped by
+ * a SCIF endpoint on a remote node. The local memory remains mapped by the peer
+ * SCIF endpoint explicitly removed by calling munmap(..) by the peer.
+ *
+ * If the peer endpoint's receive queue is not empty at the time that epd is
+ * closed, then the peer endpoint can be passed as the endpoint parameter to
+ * scif_recv() until the receive queue is empty.
+ *
+ * epd is freed and may no longer be accessed.
+ *
+ * Return:
+ * Upon successful completion, scif_close() returns 0; otherwise in user mode
+ * -1 is returned and errno is set to indicate the error; in kernel mode the
+ * negative of one of the following errors is returned.
+ *
+ * Errors:
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ */
+int scif_close(scif_epd_t epd);
+
+/**
+ * scif_send() - Send a message
+ * @epd:	endpoint descriptor
+ * @msg:	message buffer address
+ * @len:	message length
+ * @flags:	blocking mode flags
+ *
+ * scif_send() sends data to the peer of endpoint epd. Up to len bytes of data
+ * are copied from memory starting at address msg. On successful execution the
+ * return value of scif_send() is the number of bytes that were sent, and is
+ * zero if no bytes were sent because len was zero. scif_send() may be called
+ * only when the endpoint is in a connected state.
+ *
+ * If a scif_send() call is non-blocking, then it sends only those bytes which
+ * can be sent without waiting, up to a maximum of len bytes.
+ *
+ * If a scif_send() call is blocking, then it normally returns after sending
+ * all len bytes. If a blocking call is interrupted or the connection is
+ * reset, the call is considered successful if some bytes were sent or len is
+ * zero, otherwise the call is considered unsuccessful.
+ *
+ * In user mode, the select() and poll() functions can be used to determine
+ * when the send queue is not full. In kernel mode, the scif_poll() function
+ * may be used for this purpose.
+ *
+ * It is recommended that scif_send()/scif_recv() only be used for short
+ * control-type message communication between SCIF endpoints. The SCIF RMA
+ * APIs are expected to provide better performance for transfer sizes of
+ * 1024 bytes or longer for the current MIC hardware and software
+ * implementation.
+ *
+ * scif_send() will block until the entire message is sent if SCIF_SEND_BLOCK
+ * is passed as the flags argument.
+ *
+ * Return:
+ * Upon successful completion, scif_send() returns the number of bytes sent;
+ * otherwise in user mode -1 is returned and errno is set to indicate the
+ * error; in kernel mode the negative of one of the following errors is
+ * returned.
+ *
+ * Errors:
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNRESET - Connection reset by peer
+ * EFAULT - An invalid address was specified for a parameter
+ * EINVAL - flags is invalid, or len is negative
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOMEM - Not enough space
+ * ENOTCONN - The endpoint is not connected
+ */
+int scif_send(scif_epd_t epd, void *msg, int len, int flags);
+
+/**
+ * scif_recv() - Receive a message
+ * @epd:	endpoint descriptor
+ * @msg:	message buffer address
+ * @len:	message buffer length
+ * @flags:	blocking mode flags
+ *
+ * scif_recv() receives data from the peer of endpoint epd. Up to len bytes of
+ * data are copied to memory starting at address msg. On successful execution
+ * the return value of scif_recv() is the number of bytes that were received,
+ * and is zero if no bytes were received because len was zero. scif_recv() may
+ * be called only when the endpoint is in a connected state.
+ *
+ * If a scif_recv() call is non-blocking, then it receives only those bytes
+ * which can be received without waiting, up to a maximum of len bytes.
+ *
+ * If a scif_recv() call is blocking, then it normally returns after receiving
+ * all len bytes. If the blocking call was interrupted due to a disconnection,
+ * subsequent calls to scif_recv() will copy all bytes received upto the point
+ * of disconnection.
+ *
+ * In user mode, the select() and poll() functions can be used to determine
+ * when data is available to be received. In kernel mode, the scif_poll()
+ * function may be used for this purpose.
+ *
+ * It is recommended that scif_send()/scif_recv() only be used for short
+ * control-type message communication between SCIF endpoints. The SCIF RMA
+ * APIs are expected to provide better performance for transfer sizes of
+ * 1024 bytes or longer for the current MIC hardware and software
+ * implementation.
+ *
+ * scif_recv() will block until the entire message is received if
+ * SCIF_RECV_BLOCK is passed as the flags argument.
+ *
+ * Return:
+ * Upon successful completion, scif_recv() returns the number of bytes
+ * received; otherwise in user mode -1 is returned and errno is set to
+ * indicate the error; in kernel mode the negative of one of the following
+ * errors is returned.
+ *
+ * Errors:
+ * EAGAIN - The destination node is returning from a low power state
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNRESET - Connection reset by peer
+ * EFAULT - An invalid address was specified for a parameter
+ * EINVAL - flags is invalid, or len is negative
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOMEM - Not enough space
+ * ENOTCONN - The endpoint is not connected
+ */
+int scif_recv(scif_epd_t epd, void *msg, int len, int flags);
+
+/**
+ * scif_register() - Mark a memory region for remote access.
+ * @epd:		endpoint descriptor
+ * @addr:		starting virtual address
+ * @len:		length of range
+ * @offset:		offset of window
+ * @prot_flags:		read/write protection flags
+ * @map_flags:		mapping flags
+ *
+ * The scif_register() function opens a window, a range of whole pages of the
+ * registered address space of the endpoint epd, starting at offset po and
+ * continuing for len bytes. The value of po, further described below, is a
+ * function of the parameters offset and len, and the value of map_flags. Each
+ * page of the window represents the physical memory page which backs the
+ * corresponding page of the range of virtual address pages starting at addr
+ * and continuing for len bytes. addr and len are constrained to be multiples
+ * of the page size. A successful scif_register() call returns po.
+ *
+ * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset
+ * exactly, and offset is constrained to be a multiple of the page size. The
+ * mapping established by scif_register() will not replace any existing
+ * registration; an error is returned if any page within the range [offset,
+ * offset + len - 1] intersects an existing window.
+ *
+ * When SCIF_MAP_FIXED is not set, the implementation uses offset in an
+ * implementation-defined manner to arrive at po. The po value so chosen will
+ * be an area of the registered address space that the implementation deems
+ * suitable for a mapping of len bytes. An offset value of 0 is interpreted as
+ * granting the implementation complete freedom in selecting po, subject to
+ * constraints described below. A non-zero value of offset is taken to be a
+ * suggestion of an offset near which the mapping should be placed. When the
+ * implementation selects a value for po, it does not replace any extant
+ * window. In all cases, po will be a multiple of the page size.
+ *
+ * The physical pages which are so represented by a window are available for
+ * access in calls to mmap(), scif_readfrom(), scif_writeto(),
+ * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the
+ * physical pages represented by the window will not be reused by the memory
+ * subsystem for any other purpose. Note that the same physical page may be
+ * represented by multiple windows.
+ *
+ * Subsequent operations which change the memory pages to which virtual
+ * addresses are mapped (such as mmap(), munmap()) have no effect on
+ * existing window.
+ *
+ * If the process will fork(), it is recommended that the registered
+ * virtual address range be marked with MADV_DONTFORK. Doing so will prevent
+ * problems due to copy-on-write semantics.
+ *
+ * The prot_flags argument is formed by OR'ing together one or more of the
+ * following values.
+ * SCIF_PROT_READ - allow read operations from the window
+ * SCIF_PROT_WRITE - allow write operations to the window
+ *
+ * The map_flags argument can be set to SCIF_MAP_FIXED which interprets a
+ * fixed offset.
+ *
+ * Return:
+ * Upon successful completion, scif_register() returns the offset at which the
+ * mapping was placed (po); otherwise in user mode SCIF_REGISTER_FAILED (that
+ * is (off_t *)-1) is returned and errno is set to indicate the error; in
+ * kernel mode the negative of one of the following errors is returned.
+ *
+ * Errors:
+ * EADDRINUSE - SCIF_MAP_FIXED is set in map_flags, and pages in the range
+ * [offset, offset + len -1] are already registered
+ * EAGAIN - The mapping could not be performed due to lack of resources
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNRESET - Connection reset by peer
+ * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid
+ * EINVAL - map_flags is invalid, or prot_flags is invalid, or SCIF_MAP_FIXED is
+ * set in flags, and offset is not a multiple of the page size, or addr is not a
+ * multiple of the page size, or len is not a multiple of the page size, or is
+ * 0, or offset is negative
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOMEM - Not enough space
+ * ENOTCONN -The endpoint is not connected
+ */
+off_t scif_register(scif_epd_t epd, void *addr, size_t len, off_t offset,
+		    int prot_flags, int map_flags);
+
+/**
+ * scif_unregister() - Mark a memory region for remote access.
+ * @epd:	endpoint descriptor
+ * @offset:	start of range to unregister
+ * @len:	length of range to unregister
+ *
+ * The scif_unregister() function closes those previously registered windows
+ * which are entirely within the range [offset, offset + len - 1]. It is an
+ * error to specify a range which intersects only a subrange of a window.
+ *
+ * On a successful return, pages within the window may no longer be specified
+ * in calls to mmap(), scif_readfrom(), scif_writeto(), scif_vreadfrom(),
+ * scif_vwriteto(), scif_get_pages, and scif_fence_signal(). The window,
+ * however, continues to exist until all previous references against it are
+ * removed. A window is referenced if there is a mapping to it created by
+ * mmap(), or if scif_get_pages() was called against the window
+ * (and the pages have not been returned via scif_put_pages()). A window is
+ * also referenced while an RMA, in which some range of the window is a source
+ * or destination, is in progress. Finally a window is referenced while some
+ * offset in that window was specified to scif_fence_signal(), and the RMAs
+ * marked by that call to scif_fence_signal() have not completed. While a
+ * window is in this state, its registered address space pages are not
+ * available for use in a new registered window.
+ *
+ * When all such references to the window have been removed, its references to
+ * all the physical pages which it represents are removed. Similarly, the
+ * registered address space pages of the window become available for
+ * registration in a new window.
+ *
+ * Return:
+ * Upon successful completion, scif_unregister() returns 0; otherwise in user
+ * mode -1 is returned and errno is set to indicate the error; in kernel mode
+ * the negative of one of the following errors is returned. In the event of an
+ * error, no windows are unregistered.
+ *
+ * Errors:
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNRESET - Connection reset by peer
+ * EINVAL - the range [offset, offset + len - 1] intersects a subrange of a
+ * window, or offset is negative
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOTCONN - The endpoint is not connected
+ * ENXIO - Offsets in the range [offset, offset + len - 1] are invalid for the
+ * registered address space of epd
+ */
+int scif_unregister(scif_epd_t epd, off_t offset, size_t len);
+
+/**
+ * scif_readfrom() - Copy from a remote address space
+ * @epd:	endpoint descriptor
+ * @loffset:	offset in local registered address space to
+ *		which to copy
+ * @len:	length of range to copy
+ * @roffset:	offset in remote registered address space
+ *		from which to copy
+ * @rma_flags:	transfer mode flags
+ *
+ * scif_readfrom() copies len bytes from the remote registered address space of
+ * the peer of endpoint epd, starting at the offset roffset to the local
+ * registered address space of epd, starting at the offset loffset.
+ *
+ * Each of the specified ranges [loffset, loffset + len - 1] and [roffset,
+ * roffset + len - 1] must be within some registered window or windows of the
+ * local and remote nodes. A range may intersect multiple registered windows,
+ * but only if those windows are contiguous in the registered address space.
+ *
+ * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
+ * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
+ * flags includes SCIF_RMA_SYNC, then scif_readfrom() will return after the
+ * transfer is complete. Otherwise, the transfer may be performed asynchron-
+ * ously. The order in which any two asynchronous RMA operations complete
+ * is non-deterministic. The synchronization functions, scif_fence_mark()/
+ * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
+ * the completion of asynchronous RMA operations on the same endpoint.
+ *
+ * The DMA transfer of individual bytes is not guaranteed to complete in
+ * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
+ * cacheline or partial cacheline of the source range will become visible on
+ * the destination node after all other transferred data in the source
+ * range has become visible on the destination node.
+ *
+ * The optimal DMA performance will likely be realized if both
+ * loffset and roffset are cacheline aligned (are a multiple of 64). Lower
+ * performance will likely be realized if loffset and roffset are not
+ * cacheline aligned but are separated by some multiple of 64. The lowest level
+ * of performance is likely if loffset and roffset are not separated by a
+ * multiple of 64.
+ *
+ * The rma_flags argument is formed by ORing together zero or more of the
+ * following values.
+ * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA
+ *	engine.
+ * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the
+ *		transfer has completed. Passing this flag results in the
+ *		current implementation busy waiting and consuming CPU cycles
+ *		while the DMA transfer is in progress for best performance by
+ *		avoiding the interrupt latency.
+ * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of
+ *		the source range becomes visible on the destination node
+ *		after all other transferred data in the source range has
+ *		become visible on the destination
+ *
+ * Return:
+ * Upon successful completion, scif_readfrom() returns 0; otherwise in user
+ * mode -1 is returned and errno is set to indicate the error; in kernel mode
+ * the negative of one of the following errors is returned.
+ *
+ * Errors:
+ * EACCESS - Attempt to write to a read-only range
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNRESET - Connection reset by peer
+ * EINVAL - rma_flags is invalid
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOTCONN - The endpoint is not connected
+ * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered
+ * address space of epd, or, The range [roffset, roffset + len - 1] is invalid
+ * for the registered address space of the peer of epd, or loffset or roffset
+ * is negative
+ */
+int scif_readfrom(scif_epd_t epd, off_t loffset, size_t len, off_t
+		  roffset, int rma_flags);
+
+/**
+ * scif_writeto() - Copy to a remote address space
+ * @epd:	endpoint descriptor
+ * @loffset:	offset in local registered address space
+ *		from which to copy
+ * @len:	length of range to copy
+ * @roffset:	offset in remote registered address space to
+ *		which to copy
+ * @rma_flags:	transfer mode flags
+ *
+ * scif_writeto() copies len bytes from the local registered address space of
+ * epd, starting at the offset loffset to the remote registered address space
+ * of the peer of endpoint epd, starting at the offset roffset.
+ *
+ * Each of the specified ranges [loffset, loffset + len - 1] and [roffset,
+ * roffset + len - 1] must be within some registered window or windows of the
+ * local and remote nodes. A range may intersect multiple registered windows,
+ * but only if those windows are contiguous in the registered address space.
+ *
+ * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
+ * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
+ * flags includes SCIF_RMA_SYNC, then scif_writeto() will return after the
+ * transfer is complete. Otherwise, the transfer may be performed asynchron-
+ * ously. The order in which any two asynchronous RMA operations complete
+ * is non-deterministic. The synchronization functions, scif_fence_mark()/
+ * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
+ * the completion of asynchronous RMA operations on the same endpoint.
+ *
+ * The DMA transfer of individual bytes is not guaranteed to complete in
+ * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
+ * cacheline or partial cacheline of the source range will become visible on
+ * the destination node after all other transferred data in the source
+ * range has become visible on the destination node.
+ *
+ * The optimal DMA performance will likely be realized if both
+ * loffset and roffset are cacheline aligned (are a multiple of 64). Lower
+ * performance will likely be realized if loffset and roffset are not cacheline
+ * aligned but are separated by some multiple of 64. The lowest level of
+ * performance is likely if loffset and roffset are not separated by a multiple
+ * of 64.
+ *
+ * The rma_flags argument is formed by ORing together zero or more of the
+ * following values.
+ * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA
+ *			engine.
+ * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the
+ *		transfer has completed. Passing this flag results in the
+ *		current implementation busy waiting and consuming CPU cycles
+ *		while the DMA transfer is in progress for best performance by
+ *		avoiding the interrupt latency.
+ * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of
+ *		the source range becomes visible on the destination node
+ *		after all other transferred data in the source range has
+ *		become visible on the destination
+ *
+ * Return:
+ * Upon successful completion, scif_readfrom() returns 0; otherwise in user
+ * mode -1 is returned and errno is set to indicate the error; in kernel mode
+ * the negative of one of the following errors is returned.
+ *
+ * Errors:
+ * EACCESS - Attempt to write to a read-only range
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNRESET - Connection reset by peer
+ * EINVAL - rma_flags is invalid
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOTCONN - The endpoint is not connected
+ * ENXIO - The range [loffset, loffset + len - 1] is invalid for the registered
+ * address space of epd, or, The range [roffset , roffset + len -1] is invalid
+ * for the registered address space of the peer of epd, or loffset or roffset
+ * is negative
+ */
+int scif_writeto(scif_epd_t epd, off_t loffset, size_t len, off_t
+		 roffset, int rma_flags);
+
+/**
+ * scif_vreadfrom() - Copy from a remote address space
+ * @epd:	endpoint descriptor
+ * @addr:	address to which to copy
+ * @len:	length of range to copy
+ * @roffset:	offset in remote registered address space
+ *		from which to copy
+ * @rma_flags:	transfer mode flags
+ *
+ * scif_vreadfrom() copies len bytes from the remote registered address
+ * space of the peer of endpoint epd, starting at the offset roffset, to local
+ * memory, starting at addr.
+ *
+ * The specified range [roffset, roffset + len - 1] must be within some
+ * registered window or windows of the remote nodes. The range may
+ * intersect multiple registered windows, but only if those windows are
+ * contiguous in the registered address space.
+ *
+ * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
+ * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
+ * flags includes SCIF_RMA_SYNC, then scif_vreadfrom() will return after the
+ * transfer is complete. Otherwise, the transfer may be performed asynchron-
+ * ously. The order in which any two asynchronous RMA operations complete
+ * is non-deterministic. The synchronization functions, scif_fence_mark()/
+ * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
+ * the completion of asynchronous RMA operations on the same endpoint.
+ *
+ * The DMA transfer of individual bytes is not guaranteed to complete in
+ * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
+ * cacheline or partial cacheline of the source range will become visible on
+ * the destination node after all other transferred data in the source
+ * range has become visible on the destination node.
+ *
+ * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back
+ * the specified local memory range may be remain in a pinned state even after
+ * the specified transfer completes. This may reduce overhead if some or all of
+ * the same virtual address range is referenced in a subsequent call of
+ * scif_vreadfrom() or scif_vwriteto().
+ *
+ * The optimal DMA performance will likely be realized if both
+ * addr and roffset are cacheline aligned (are a multiple of 64). Lower
+ * performance will likely be realized if addr and roffset are not
+ * cacheline aligned but are separated by some multiple of 64. The lowest level
+ * of performance is likely if addr and roffset are not separated by a
+ * multiple of 64.
+ *
+ * The rma_flags argument is formed by ORing together zero or more of the
+ * following values.
+ * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA
+ *	engine.
+ * SCIF_RMA_USECACHE - enable registration caching
+ * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the
+ *		transfer has completed. Passing this flag results in the
+ *		current implementation busy waiting and consuming CPU cycles
+ *		while the DMA transfer is in progress for best performance by
+ *		avoiding the interrupt latency.
+ * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of
+ *	the source range becomes visible on the destination node
+ *	after all other transferred data in the source range has
+ *	become visible on the destination
+ *
+ * Return:
+ * Upon successful completion, scif_vreadfrom() returns 0; otherwise in user
+ * mode -1 is returned and errno is set to indicate the error; in kernel mode
+ * the negative of one of the following errors is returned.
+ *
+ * Errors:
+ * EACCESS - Attempt to write to a read-only range
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNRESET - Connection reset by peer
+ * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid
+ * EINVAL - rma_flags is invalid
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOTCONN - The endpoint is not connected
+ * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the
+ * registered address space of epd
+ */
+int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len, off_t roffset,
+		   int rma_flags);
+
+/**
+ * scif_vwriteto() - Copy to a remote address space
+ * @epd:	endpoint descriptor
+ * @addr:	address from which to copy
+ * @len:	length of range to copy
+ * @roffset:	offset in remote registered address space to
+ *		which to copy
+ * @rma_flags:	transfer mode flags
+ *
+ * scif_vwriteto() copies len bytes from the local memory, starting at addr, to
+ * the remote registered address space of the peer of endpoint epd, starting at
+ * the offset roffset.
+ *
+ * The specified range [roffset, roffset + len - 1] must be within some
+ * registered window or windows of the remote nodes. The range may intersect
+ * multiple registered windows, but only if those windows are contiguous in the
+ * registered address space.
+ *
+ * If rma_flags includes SCIF_RMA_USECPU, then the data is copied using
+ * programmed read/writes. Otherwise the data is copied using DMA. If rma_-
+ * flags includes SCIF_RMA_SYNC, then scif_vwriteto() will return after the
+ * transfer is complete. Otherwise, the transfer may be performed asynchron-
+ * ously. The order in which any two asynchronous RMA operations complete
+ * is non-deterministic. The synchronization functions, scif_fence_mark()/
+ * scif_fence_wait() and scif_fence_signal(), can be used to synchronize to
+ * the completion of asynchronous RMA operations on the same endpoint.
+ *
+ * The DMA transfer of individual bytes is not guaranteed to complete in
+ * address order. If rma_flags includes SCIF_RMA_ORDERED, then the last
+ * cacheline or partial cacheline of the source range will become visible on
+ * the destination node after all other transferred data in the source
+ * range has become visible on the destination node.
+ *
+ * If rma_flags includes SCIF_RMA_USECACHE, then the physical pages which back
+ * the specified local memory range may be remain in a pinned state even after
+ * the specified transfer completes. This may reduce overhead if some or all of
+ * the same virtual address range is referenced in a subsequent call of
+ * scif_vreadfrom() or scif_vwriteto().
+ *
+ * The optimal DMA performance will likely be realized if both
+ * addr and offset are cacheline aligned (are a multiple of 64). Lower
+ * performance will likely be realized if addr and offset are not cacheline
+ * aligned but are separated by some multiple of 64. The lowest level of
+ * performance is likely if addr and offset are not separated by a multiple of
+ * 64.
+ *
+ * The rma_flags argument is formed by ORing together zero or more of the
+ * following values.
+ * SCIF_RMA_USECPU - perform the transfer using the CPU, otherwise use the DMA
+ *	engine.
+ * SCIF_RMA_USECACHE - allow registration caching
+ * SCIF_RMA_SYNC - perform the transfer synchronously, returning after the
+ *		transfer has completed. Passing this flag results in the
+ *		current implementation busy waiting and consuming CPU cycles
+ *		while the DMA transfer is in progress for best performance by
+ *		avoiding the interrupt latency.
+ * SCIF_RMA_ORDERED - ensure that the last cacheline or partial cacheline of
+ *		the source range becomes visible on the destination node
+ *		after all other transferred data in the source range has
+ *		become visible on the destination
+ *
+ * Return:
+ * Upon successful completion, scif_vwriteto() returns 0; otherwise in user
+ * mode -1 is returned and errno is set to indicate the error; in kernel mode
+ * the negative of one of the following errors is returned.
+ *
+ * Errors:
+ * EACCESS - Attempt to write to a read-only range
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNRESET - Connection reset by peer
+ * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid
+ * EINVAL - rma_flags is invalid
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOTCONN - The endpoint is not connected
+ * ENXIO - Offsets in the range [roffset, roffset + len - 1] are invalid for the
+ * registered address space of epd
+ */
+int scif_vwriteto(scif_epd_t epd, void *addr, size_t len, off_t roffset,
+		  int rma_flags);
+
+/**
+ * scif_fence_mark() - Mark previously issued RMAs
+ * @epd:	endpoint descriptor
+ * @flags:	control flags
+ * @mark:	marked value returned as output.
+ *
+ * scif_fence_mark() returns after marking the current set of all uncompleted
+ * RMAs initiated through the endpoint epd or the current set of all
+ * uncompleted RMAs initiated through the peer of endpoint epd. The RMAs are
+ * marked with a value returned at mark. The application may subsequently call
+ * scif_fence_wait(), passing the value returned at mark, to await completion
+ * of all RMAs so marked.
+ *
+ * The flags argument has exactly one of the following values.
+ * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint
+ *	epd are marked
+ * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer
+ *	of endpoint epd are marked
+ *
+ * Return:
+ * Upon successful completion, scif_fence_mark() returns 0; otherwise in user
+ * mode -1 is returned and errno is set to indicate the error; in kernel mode
+ * the negative of one of the following errors is returned.
+ *
+ * Errors:
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNRESET - Connection reset by peer
+ * EINVAL - flags is invalid
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOTCONN - The endpoint is not connected
+ * ENOMEM - Insufficient kernel memory was available
+ */
+int scif_fence_mark(scif_epd_t epd, int flags, int *mark);
+
+/**
+ * scif_fence_wait() - Wait for completion of marked RMAs
+ * @epd:	endpoint descriptor
+ * @mark:	mark request
+ *
+ * scif_fence_wait() returns after all RMAs marked with mark have completed.
+ * The value passed in mark must have been obtained in a previous call to
+ * scif_fence_mark().
+ *
+ * Return:
+ * Upon successful completion, scif_fence_wait() returns 0; otherwise in user
+ * mode -1 is returned and errno is set to indicate the error; in kernel mode
+ * the negative of one of the following errors is returned.
+ *
+ * Errors:
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNRESET - Connection reset by peer
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOTCONN - The endpoint is not connected
+ * ENOMEM - Insufficient kernel memory was available
+ */
+int scif_fence_wait(scif_epd_t epd, int mark);
+
+/**
+ * scif_fence_signal() - Request a memory update on completion of RMAs
+ * @epd:	endpoint descriptor
+ * @loff:	local offset
+ * @lval:	local value to write to loffset
+ * @roff:	remote offset
+ * @rval:	remote value to write to roffset
+ * @flags:	flags
+ *
+ * scif_fence_signal() returns after marking the current set of all uncompleted
+ * RMAs initiated through the endpoint epd or marking the current set of all
+ * uncompleted RMAs initiated through the peer of endpoint epd.
+ *
+ * If flags includes SCIF_SIGNAL_LOCAL, then on completion of the RMAs in the
+ * marked set, lval is written to memory at the address corresponding to offset
+ * loff in the local registered address space of epd. loff must be within a
+ * registered window. If flags includes SCIF_SIGNAL_REMOTE, then on completion
+ * of the RMAs in the marked set, rval is written to memory at the address
+ * corresponding to offset roff in the remote registered address space of epd.
+ * roff must be within a remote registered window of the peer of epd. Note
+ * that any specified offset must be DWORD (4 byte / 32 bit) aligned.
+ *
+ * The flags argument is formed by OR'ing together the following.
+ * Exactly one of the following values.
+ * SCIF_FENCE_INIT_SELF - RMA operations initiated through endpoint
+ *	epd are marked
+ * SCIF_FENCE_INIT_PEER - RMA operations initiated through the peer
+ *	of endpoint epd are marked
+ * One or more of the following values.
+ * SCIF_SIGNAL_LOCAL - On completion of the marked set of RMAs, write lval to
+ *	memory at the address corresponding to offset loff in the local
+ *	registered address space of epd.
+ * SCIF_SIGNAL_REMOTE - On completion of the marked set of RMAs, write rval to
+ *	memory at the address corresponding to offset roff in the remote
+ *	registered address space of epd.
+ *
+ * Return:
+ * Upon successful completion, scif_fence_signal() returns 0; otherwise in
+ * user mode -1 is returned and errno is set to indicate the error; in kernel
+ * mode the negative of one of the following errors is returned.
+ *
+ * Errors:
+ * EBADF, ENOTTY - epd is not a valid endpoint descriptor
+ * ECONNRESET - Connection reset by peer
+ * EINVAL - flags is invalid, or loff or roff are not DWORD aligned
+ * ENODEV - The remote node is lost or existed, but is not currently in the
+ * network since it may have crashed
+ * ENOTCONN - The endpoint is not connected
+ * ENXIO - loff is invalid for the registered address of epd, or roff is invalid
+ * for the registered address space, of the peer of epd
+ */
+int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval, off_t roff,
+		      u64 rval, int flags);
+
+/**
+ * scif_get_node_ids() - Return information about online nodes
+ * @nodes:	array in which to return online node IDs
+ * @len:	number of entries in the nodes array
+ * @self:	address to place the node ID of the local node
+ *
+ * scif_get_node_ids() fills in the nodes array with up to len node IDs of the
+ * nodes in the SCIF network. If there is not enough space in nodes, as
+ * indicated by the len parameter, only len node IDs are returned in nodes. The
+ * return value of scif_get_node_ids() is the total number of nodes currently in
+ * the SCIF network. By checking the return value against the len parameter,
+ * the user may determine if enough space for nodes was allocated.
+ *
+ * The node ID of the local node is returned at self.
+ *
+ * Return:
+ * Upon successful completion, scif_get_node_ids() returns the actual number of
+ * online nodes in the SCIF network including 'self'; otherwise in user mode
+ * -1 is returned and errno is set to indicate the error; in kernel mode no
+ * errors are returned.
+ *
+ * Errors:
+ * EFAULT - Bad address
+ */
+int scif_get_node_ids(u16 *nodes, int len, u16 *self);
+
+#endif /* __SCIF_H__ */
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 1a0006a76b00..4ad65eebbff8 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -352,6 +352,7 @@ header-y += rtc.h
 header-y += rtnetlink.h
 header-y += scc.h
 header-y += sched.h
+header-y += scif_ioctl.h
 header-y += screen_info.h
 header-y += sctp.h
 header-y += sdla.h
diff --git a/include/uapi/linux/scif_ioctl.h b/include/uapi/linux/scif_ioctl.h
new file mode 100644
index 000000000000..4a94d917cf99
--- /dev/null
+++ b/include/uapi/linux/scif_ioctl.h
@@ -0,0 +1,130 @@
+/*
+ * Intel MIC Platform Software Stack (MPSS)
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ *   notice, this list of conditions and the following disclaimer in
+ *   the documentation and/or other materials provided with the
+ *   distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ *   contributors may be used to endorse or promote products derived
+ *   from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Intel SCIF driver.
+ *
+ */
+/*
+ * -----------------------------------------
+ * SCIF IOCTL interface information
+ * -----------------------------------------
+ */
+#ifndef SCIF_IOCTL_H
+#define SCIF_IOCTL_H
+
+#include <linux/types.h>
+
+/**
+ * struct scif_port_id - SCIF port information
+ * @node:	node on which port resides
+ * @port:	local port number
+ */
+struct scif_port_id {
+	__u16 node;
+	__u16 port;
+};
+
+/**
+ * struct scifioctl_connect - used for SCIF_CONNECT IOCTL
+ * @self:	used to read back the assigned port_id
+ * @peer:	destination node and port to connect to
+ */
+struct scifioctl_connect {
+	struct scif_port_id	self;
+	struct scif_port_id	peer;
+};
+
+/**
+ * struct scifioctl_accept - used for SCIF_ACCEPTREQ IOCTL
+ * @flags:	flags
+ * @peer:	global id of peer endpoint
+ * @endpt:	new connected endpoint descriptor
+ */
+struct scifioctl_accept {
+	__s32			flags;
+	struct scif_port_id	peer;
+	__u64			endpt;
+};
+
+/**
+ * struct scifioctl_msg - used for SCIF_SEND/SCIF_RECV IOCTL
+ * @msg:	message buffer address
+ * @len:	message length
+ * @flags:	flags
+ * @out_len:	number of bytes sent/received
+ */
+struct scifioctl_msg {
+	__u64	msg;
+	__s32	len;
+	__s32	flags;
+	__s32	out_len;
+};
+
+/**
+ * struct scifioctl_node_ids - used for SCIF_GET_NODEIDS IOCTL
+ * @nodes:	pointer to an array of node_ids
+ * @self:	ID of the current node
+ * @len:	length of array
+ */
+struct scifioctl_node_ids {
+	__u64	nodes;
+	__u64	self;
+	__s32	len;
+};
+
+#define SCIF_BIND		_IOWR('s', 1, __u64)
+#define SCIF_LISTEN		_IOW('s', 2, __s32)
+#define SCIF_CONNECT		_IOWR('s', 3, struct scifioctl_connect)
+#define SCIF_ACCEPTREQ		_IOWR('s', 4, struct scifioctl_accept)
+#define SCIF_ACCEPTREG		_IOWR('s', 5, __u64)
+#define SCIF_SEND		_IOWR('s', 6, struct scifioctl_msg)
+#define SCIF_RECV		_IOWR('s', 7, struct scifioctl_msg)
+#define SCIF_GET_NODEIDS	_IOWR('s', 14, struct scifioctl_node_ids)
+
+#endif /* SCIF_IOCTL_H */
-- 
cgit 


From c9d5c53db959e587d8b59c6a202e2dca741baac4 Mon Sep 17 00:00:00 2001
From: Sudeep Dutt <sudeep.dutt@intel.com>
Date: Wed, 29 Apr 2015 05:32:32 -0700
Subject: misc: mic: Common MIC header file changes in preparation for SCIF

Update mic_bootparam and define the maximum number of DMA channels

Reviewed-by: Nikhil Rao <nikhil.rao@intel.com>
Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Sudeep Dutt <sudeep.dutt@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mic/common/mic_dev.h |  3 +++
 include/uapi/linux/mic_common.h   | 12 ++++++++++++
 2 files changed, 15 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/drivers/misc/mic/common/mic_dev.h b/drivers/misc/mic/common/mic_dev.h
index 92999c2bbf82..0b58c46045dc 100644
--- a/drivers/misc/mic/common/mic_dev.h
+++ b/drivers/misc/mic/common/mic_dev.h
@@ -48,4 +48,7 @@ struct mic_mw {
 #define MIC_VIRTIO_PARAM_DEV_REMOVE 0x1
 #define MIC_VIRTIO_PARAM_CONFIG_CHANGED 0x2
 
+/* Maximum number of DMA channels */
+#define MIC_MAX_DMA_CHAN 4
+
 #endif
diff --git a/include/uapi/linux/mic_common.h b/include/uapi/linux/mic_common.h
index 6eb40244e019..302a2ced373c 100644
--- a/include/uapi/linux/mic_common.h
+++ b/include/uapi/linux/mic_common.h
@@ -80,6 +80,12 @@ struct mic_device_ctrl {
  * @h2c_config_db: Host to Card Virtio config doorbell set by card
  * @shutdown_status: Card shutdown status set by card
  * @shutdown_card: Set to 1 by the host when a card shutdown is initiated
+ * @tot_nodes: Total number of nodes in the SCIF network
+ * @node_id: Unique id of the node
+ * @h2c_scif_db - Host to card SCIF doorbell set by card
+ * @c2h_scif_db - Card to host SCIF doorbell set by host
+ * @scif_host_dma_addr - SCIF host queue pair DMA address
+ * @scif_card_dma_addr - SCIF card queue pair DMA address
  */
 struct mic_bootparam {
 	__le32 magic;
@@ -88,6 +94,12 @@ struct mic_bootparam {
 	__s8 h2c_config_db;
 	__u8 shutdown_status;
 	__u8 shutdown_card;
+	__u8 tot_nodes;
+	__u8 node_id;
+	__u8 h2c_scif_db;
+	__u8 c2h_scif_db;
+	__u64 scif_host_dma_addr;
+	__u64 scif_card_dma_addr;
 } __attribute__ ((aligned(8)));
 
 /**
-- 
cgit 


From cd8dc0548511efff7a97d978f989ce67a883f9a5 Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Sat, 11 Apr 2015 18:07:57 -0700
Subject: Drivers: hv: vss: full handshake support

Introduce VSS_OP_REGISTER1 to support kernel replying to the negotiation
message with its own version.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Tested-by: Alex Ng <alexng@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/hv_snapshot.c    | 49 ++++++++++++++++++++++++++++++++++-----------
 include/uapi/linux/hyperv.h |  5 +++++
 tools/hv/hv_vss_daemon.c    | 14 +++++++++++++
 3 files changed, 56 insertions(+), 12 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c
index 2c8c246d09eb..ee1762b39bf2 100644
--- a/drivers/hv/hv_snapshot.c
+++ b/drivers/hv/hv_snapshot.c
@@ -59,6 +59,11 @@ static struct {
 
 static void vss_respond_to_host(int error);
 
+/*
+ * This state maintains the version number registered by the daemon.
+ */
+static int dm_reg_value;
+
 static const char vss_devname[] = "vmbus/hv_vss";
 static __u8 *recv_buffer;
 static struct hvutil_transport *hvt;
@@ -89,6 +94,29 @@ static void vss_timeout_func(struct work_struct *dummy)
 			hv_vss_onchannelcallback);
 }
 
+static int vss_handle_handshake(struct hv_vss_msg *vss_msg)
+{
+	u32 our_ver = VSS_OP_REGISTER1;
+
+	switch (vss_msg->vss_hdr.operation) {
+	case VSS_OP_REGISTER:
+		/* Daemon doesn't expect us to reply */
+		dm_reg_value = VSS_OP_REGISTER;
+		break;
+	case VSS_OP_REGISTER1:
+		/* Daemon expects us to reply with our own version*/
+		if (hvutil_transport_send(hvt, &our_ver, sizeof(our_ver)))
+			return -EFAULT;
+		dm_reg_value = VSS_OP_REGISTER1;
+		break;
+	default:
+		return -EINVAL;
+	}
+	vss_transaction.state = HVUTIL_READY;
+	pr_info("VSS daemon registered\n");
+	return 0;
+}
+
 static int vss_on_msg(void *msg, int len)
 {
 	struct hv_vss_msg *vss_msg = (struct hv_vss_msg *)msg;
@@ -96,18 +124,15 @@ static int vss_on_msg(void *msg, int len)
 	if (len != sizeof(*vss_msg))
 		return -EINVAL;
 
-	/*
-	 * Don't process registration messages if we're in the middle of
-	 * a transaction processing.
-	 */
-	if (vss_transaction.state > HVUTIL_READY &&
-	    vss_msg->vss_hdr.operation == VSS_OP_REGISTER)
-		return -EINVAL;
-
-	if (vss_transaction.state == HVUTIL_DEVICE_INIT &&
-	    vss_msg->vss_hdr.operation == VSS_OP_REGISTER) {
-		pr_info("VSS daemon registered\n");
-		vss_transaction.state = HVUTIL_READY;
+	if (vss_msg->vss_hdr.operation == VSS_OP_REGISTER ||
+	    vss_msg->vss_hdr.operation == VSS_OP_REGISTER1) {
+		/*
+		 * Don't process registration messages if we're in the middle
+		 * of a transaction processing.
+		 */
+		if (vss_transaction.state > HVUTIL_READY)
+			return -EINVAL;
+		return vss_handle_handshake(vss_msg);
 	} else if (vss_transaction.state == HVUTIL_USERSPACE_REQ) {
 		vss_transaction.state = HVUTIL_USERSPACE_RECV;
 		if (cancel_delayed_work_sync(&vss_timeout_work)) {
diff --git a/include/uapi/linux/hyperv.h b/include/uapi/linux/hyperv.h
index bb1cb73c927a..66c76df2c32d 100644
--- a/include/uapi/linux/hyperv.h
+++ b/include/uapi/linux/hyperv.h
@@ -45,6 +45,11 @@
 
 #define VSS_OP_REGISTER 128
 
+/*
+  Daemon code with full handshake support.
+ */
+#define VSS_OP_REGISTER1 129
+
 enum hv_vss_op {
 	VSS_OP_CREATE = 0,
 	VSS_OP_DELETE,
diff --git a/tools/hv/hv_vss_daemon.c b/tools/hv/hv_vss_daemon.c
index 36f18211afa7..96234b638249 100644
--- a/tools/hv/hv_vss_daemon.c
+++ b/tools/hv/hv_vss_daemon.c
@@ -148,6 +148,8 @@ int main(int argc, char *argv[])
 	int	op;
 	struct hv_vss_msg vss_msg[1];
 	int daemonize = 1, long_index = 0, opt;
+	int in_handshake = 1;
+	__u32 kernel_modver;
 
 	static struct option long_options[] = {
 		{"help",	no_argument,	   0,  'h' },
@@ -211,6 +213,18 @@ int main(int argc, char *argv[])
 
 		len = read(vss_fd, vss_msg, sizeof(struct hv_vss_msg));
 
+		if (in_handshake) {
+			if (len != sizeof(kernel_modver)) {
+				syslog(LOG_ERR, "invalid version negotiation");
+				exit(EXIT_FAILURE);
+			}
+			kernel_modver = *(__u32 *)vss_msg;
+			in_handshake = 0;
+			syslog(LOG_INFO, "VSS: kernel module version: %d",
+			       kernel_modver);
+			continue;
+		}
+
 		if (len != sizeof(struct hv_vss_msg)) {
 			syslog(LOG_ERR, "read failed; error:%d %s",
 			       errno, strerror(errno));
-- 
cgit 


From a4d1ee5b0255a135fead1d62a7fc7e6fe718b66e Mon Sep 17 00:00:00 2001
From: Vitaly Kuznetsov <vkuznets@redhat.com>
Date: Sat, 11 Apr 2015 18:07:58 -0700
Subject: Drivers: hv: fcopy: full handshake support

Introduce FCOPY_VERSION_1 to support kernel replying to the negotiation
message with its own version.

Signed-off-by: Vitaly Kuznetsov <vkuznets@redhat.com>
Tested-by: Alex Ng <alexng@microsoft.com>
Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/hv/hv_fcopy.c       | 16 +++++++++++++++-
 include/uapi/linux/hyperv.h |  3 ++-
 tools/hv/hv_fcopy_daemon.c  | 15 +++++++++++++++
 3 files changed, 32 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/hv/hv_fcopy.c b/drivers/hv/hv_fcopy.c
index 6a8ec9fb0869..b7b528cd481b 100644
--- a/drivers/hv/hv_fcopy.c
+++ b/drivers/hv/hv_fcopy.c
@@ -62,6 +62,10 @@ static DECLARE_WORK(fcopy_send_work, fcopy_send_data);
 static const char fcopy_devname[] = "vmbus/hv_fcopy";
 static u8 *recv_buffer;
 static struct hvutil_transport *hvt;
+/*
+ * This state maintains the version number registered by the daemon.
+ */
+static int dm_reg_value;
 
 static void fcopy_timeout_func(struct work_struct *dummy)
 {
@@ -81,8 +85,18 @@ static void fcopy_timeout_func(struct work_struct *dummy)
 
 static int fcopy_handle_handshake(u32 version)
 {
+	u32 our_ver = FCOPY_CURRENT_VERSION;
+
 	switch (version) {
-	case FCOPY_CURRENT_VERSION:
+	case FCOPY_VERSION_0:
+		/* Daemon doesn't expect us to reply */
+		dm_reg_value = version;
+		break;
+	case FCOPY_VERSION_1:
+		/* Daemon expects us to reply with our own version */
+		if (hvutil_transport_send(hvt, &our_ver, sizeof(our_ver)))
+			return -EFAULT;
+		dm_reg_value = version;
 		break;
 	default:
 		/*
diff --git a/include/uapi/linux/hyperv.h b/include/uapi/linux/hyperv.h
index 66c76df2c32d..e4c0a35d6417 100644
--- a/include/uapi/linux/hyperv.h
+++ b/include/uapi/linux/hyperv.h
@@ -105,7 +105,8 @@ struct hv_vss_msg {
  */
 
 #define FCOPY_VERSION_0 0
-#define FCOPY_CURRENT_VERSION FCOPY_VERSION_0
+#define FCOPY_VERSION_1 1
+#define FCOPY_CURRENT_VERSION FCOPY_VERSION_1
 #define W_MAX_PATH 260
 
 enum hv_fcopy_op {
diff --git a/tools/hv/hv_fcopy_daemon.c b/tools/hv/hv_fcopy_daemon.c
index 9445d8f264a4..5480e4e424eb 100644
--- a/tools/hv/hv_fcopy_daemon.c
+++ b/tools/hv/hv_fcopy_daemon.c
@@ -137,6 +137,8 @@ int main(int argc, char *argv[])
 	int version = FCOPY_CURRENT_VERSION;
 	char *buffer[4096 * 2];
 	struct hv_fcopy_hdr *in_msg;
+	int in_handshake = 1;
+	__u32 kernel_modver;
 
 	static struct option long_options[] = {
 		{"help",	no_argument,	   0,  'h' },
@@ -191,6 +193,19 @@ int main(int argc, char *argv[])
 			syslog(LOG_ERR, "pread failed: %s", strerror(errno));
 			exit(EXIT_FAILURE);
 		}
+
+		if (in_handshake) {
+			if (len != sizeof(kernel_modver)) {
+				syslog(LOG_ERR, "invalid version negotiation");
+				exit(EXIT_FAILURE);
+			}
+			kernel_modver = *(__u32 *)buffer;
+			in_handshake = 0;
+			syslog(LOG_INFO, "HV_FCOPY: kernel module version: %d",
+			       kernel_modver);
+			continue;
+		}
+
 		in_msg = (struct hv_fcopy_hdr *)buffer;
 
 		switch (in_msg->operation) {
-- 
cgit 


From 113c62ee49d212ecb934147c6ba84cfa79c26121 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 11 May 2015 10:23:38 +0900
Subject: tty: fix comment of ASYNCB_SPD_HI

This comment does not reflect the actual code.  It should be 57600,
not 56000.

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/uapi/linux/tty_flags.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/tty_flags.h b/include/uapi/linux/tty_flags.h
index fae4864737fa..072e41e45ee2 100644
--- a/include/uapi/linux/tty_flags.h
+++ b/include/uapi/linux/tty_flags.h
@@ -15,7 +15,7 @@
 #define ASYNCB_FOURPORT		 1 /* Set OU1, OUT2 per AST Fourport settings */
 #define ASYNCB_SAK		 2 /* Secure Attention Key (Orange book) */
 #define ASYNCB_SPLIT_TERMIOS	 3 /* [x] Separate termios for dialin/callout */
-#define ASYNCB_SPD_HI		 4 /* Use 56000 instead of 38400 bps */
+#define ASYNCB_SPD_HI		 4 /* Use 57600 instead of 38400 bps */
 #define ASYNCB_SPD_VHI		 5 /* Use 115200 instead of 38400 bps */
 #define ASYNCB_SKIP_TEST	 6 /* Skip UART test during autoconfiguration */
 #define ASYNCB_AUTO_IRQ		 7 /* Do automatic IRQ during
-- 
cgit 


From d52d3997f843ffefaa8d8462790ffcaca6c74192 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Fri, 22 May 2015 20:56:06 -0700
Subject: ipv6: Create percpu rt6_info

After the patch
'ipv6: Only create RTF_CACHE routes after encountering pmtu exception',
we need to compensate the performance hit (bouncing dst->__refcnt).

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Cc: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h           |   3 +-
 include/uapi/linux/ipv6_route.h |   1 +
 net/ipv6/ip6_fib.c              |  24 +++++++-
 net/ipv6/route.c                | 132 +++++++++++++++++++++++++++++++++++-----
 4 files changed, 142 insertions(+), 18 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index cc8f03c10c43..3b76849c190f 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -124,6 +124,7 @@ struct rt6_info {
 	struct uncached_list		*rt6i_uncached_list;
 
 	struct inet6_dev		*rt6i_idev;
+	struct rt6_info * __percpu	*rt6i_pcpu;
 
 	u32				rt6i_metric;
 	u32				rt6i_pmtu;
@@ -164,7 +165,7 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout)
 
 static inline u32 rt6_get_cookie(const struct rt6_info *rt)
 {
-	if (unlikely(rt->dst.flags & DST_NOCACHE))
+	if (rt->rt6i_flags & RTF_PCPU || unlikely(rt->dst.flags & DST_NOCACHE))
 		rt = (struct rt6_info *)(rt->dst.from);
 
 	return rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
diff --git a/include/uapi/linux/ipv6_route.h b/include/uapi/linux/ipv6_route.h
index 2be7bd174751..f6598d1c886e 100644
--- a/include/uapi/linux/ipv6_route.h
+++ b/include/uapi/linux/ipv6_route.h
@@ -34,6 +34,7 @@
 #define RTF_PREF(pref)	((pref) << 27)
 #define RTF_PREF_MASK	0x18000000
 
+#define RTF_PCPU	0x40000000
 #define RTF_LOCAL	0x80000000
 
 
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 83341b3a248d..55d19861ab20 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -154,10 +154,32 @@ static void node_free(struct fib6_node *fn)
 	kmem_cache_free(fib6_node_kmem, fn);
 }
 
+static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
+{
+	int cpu;
+
+	if (!non_pcpu_rt->rt6i_pcpu)
+		return;
+
+	for_each_possible_cpu(cpu) {
+		struct rt6_info **ppcpu_rt;
+		struct rt6_info *pcpu_rt;
+
+		ppcpu_rt = per_cpu_ptr(non_pcpu_rt->rt6i_pcpu, cpu);
+		pcpu_rt = *ppcpu_rt;
+		if (pcpu_rt) {
+			dst_free(&pcpu_rt->dst);
+			*ppcpu_rt = NULL;
+		}
+	}
+}
+
 static void rt6_release(struct rt6_info *rt)
 {
-	if (atomic_dec_and_test(&rt->rt6i_ref))
+	if (atomic_dec_and_test(&rt->rt6i_ref)) {
+		rt6_free_pcpu(rt);
 		dst_free(&rt->dst);
+	}
 }
 
 static void fib6_link_table(struct net *net, struct fib6_table *tb)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 90c8eaa24565..1a1122a6bbf5 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -165,11 +165,18 @@ static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev)
 	}
 }
 
+static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt)
+{
+	return dst_metrics_write_ptr(rt->dst.from);
+}
+
 static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
 {
 	struct rt6_info *rt = (struct rt6_info *)dst;
 
-	if (rt->rt6i_flags & RTF_CACHE)
+	if (rt->rt6i_flags & RTF_PCPU)
+		return rt6_pcpu_cow_metrics(rt);
+	else if (rt->rt6i_flags & RTF_CACHE)
 		return NULL;
 	else
 		return dst_cow_metrics_generic(dst, old);
@@ -309,10 +316,10 @@ static const struct rt6_info ip6_blk_hole_entry_template = {
 #endif
 
 /* allocate dst with ip6_dst_ops */
-static inline struct rt6_info *ip6_dst_alloc(struct net *net,
-					     struct net_device *dev,
-					     int flags,
-					     struct fib6_table *table)
+static struct rt6_info *__ip6_dst_alloc(struct net *net,
+					struct net_device *dev,
+					int flags,
+					struct fib6_table *table)
 {
 	struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev,
 					0, DST_OBSOLETE_FORCE_CHK, flags);
@@ -327,6 +334,34 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net,
 	return rt;
 }
 
+static struct rt6_info *ip6_dst_alloc(struct net *net,
+				      struct net_device *dev,
+				      int flags,
+				      struct fib6_table *table)
+{
+	struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags, table);
+
+	if (rt) {
+		rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC);
+		if (rt->rt6i_pcpu) {
+			int cpu;
+
+			for_each_possible_cpu(cpu) {
+				struct rt6_info **p;
+
+				p = per_cpu_ptr(rt->rt6i_pcpu, cpu);
+				/* no one shares rt */
+				*p =  NULL;
+			}
+		} else {
+			dst_destroy((struct dst_entry *)rt);
+			return NULL;
+		}
+	}
+
+	return rt;
+}
+
 static void ip6_dst_destroy(struct dst_entry *dst)
 {
 	struct rt6_info *rt = (struct rt6_info *)dst;
@@ -335,6 +370,9 @@ static void ip6_dst_destroy(struct dst_entry *dst)
 
 	dst_destroy_metrics_generic(dst);
 
+	if (rt->rt6i_pcpu)
+		free_percpu(rt->rt6i_pcpu);
+
 	rt6_uncached_list_del(rt);
 
 	idev = rt->rt6i_idev;
@@ -912,11 +950,11 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
 	 *	Clone the route.
 	 */
 
-	if (ort->rt6i_flags & RTF_CACHE)
+	if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU))
 		ort = (struct rt6_info *)ort->dst.from;
 
-	rt = ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev,
-			   0, ort->rt6i_table);
+	rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev,
+			     0, ort->rt6i_table);
 
 	if (!rt)
 		return NULL;
@@ -943,6 +981,54 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort,
 	return rt;
 }
 
+static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt)
+{
+	struct rt6_info *pcpu_rt;
+
+	pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev),
+				  rt->dst.dev, rt->dst.flags,
+				  rt->rt6i_table);
+
+	if (!pcpu_rt)
+		return NULL;
+	ip6_rt_copy_init(pcpu_rt, rt);
+	pcpu_rt->rt6i_protocol = rt->rt6i_protocol;
+	pcpu_rt->rt6i_flags |= RTF_PCPU;
+	return pcpu_rt;
+}
+
+/* It should be called with read_lock_bh(&tb6_lock) acquired */
+static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt)
+{
+	struct rt6_info *pcpu_rt, *prev, **p;
+
+	p = this_cpu_ptr(rt->rt6i_pcpu);
+	pcpu_rt = *p;
+
+	if (pcpu_rt)
+		goto done;
+
+	pcpu_rt = ip6_rt_pcpu_alloc(rt);
+	if (!pcpu_rt) {
+		struct net *net = dev_net(rt->dst.dev);
+
+		pcpu_rt = net->ipv6.ip6_null_entry;
+		goto done;
+	}
+
+	prev = cmpxchg(p, NULL, pcpu_rt);
+	if (prev) {
+		/* If someone did it before us, return prev instead */
+		dst_destroy(&pcpu_rt->dst);
+		pcpu_rt = prev;
+	}
+
+done:
+	dst_hold(&pcpu_rt->dst);
+	rt6_dst_from_metrics_check(pcpu_rt);
+	return pcpu_rt;
+}
+
 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
 				      struct flowi6 *fl6, int flags)
 {
@@ -975,11 +1061,13 @@ redo_rt6_select:
 		}
 	}
 
-	dst_use(&rt->dst, jiffies);
-	read_unlock_bh(&table->tb6_lock);
 
 	if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) {
-		goto done;
+		dst_use(&rt->dst, jiffies);
+		read_unlock_bh(&table->tb6_lock);
+
+		rt6_dst_from_metrics_check(rt);
+		return rt;
 	} else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) &&
 			    !(rt->rt6i_flags & RTF_GATEWAY))) {
 		/* Create a RTF_CACHE clone which will not be
@@ -990,6 +1078,9 @@ redo_rt6_select:
 
 		struct rt6_info *uncached_rt;
 
+		dst_use(&rt->dst, jiffies);
+		read_unlock_bh(&table->tb6_lock);
+
 		uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL);
 		dst_release(&rt->dst);
 
@@ -997,13 +1088,22 @@ redo_rt6_select:
 			rt6_uncached_list_add(uncached_rt);
 		else
 			uncached_rt = net->ipv6.ip6_null_entry;
+
 		dst_hold(&uncached_rt->dst);
 		return uncached_rt;
-	}
 
-done:
-	rt6_dst_from_metrics_check(rt);
-	return rt;
+	} else {
+		/* Get a percpu copy */
+
+		struct rt6_info *pcpu_rt;
+
+		rt->dst.lastuse = jiffies;
+		rt->dst.__use++;
+		pcpu_rt = rt6_get_pcpu_route(rt);
+		read_unlock_bh(&table->tb6_lock);
+
+		return pcpu_rt;
+	}
 }
 
 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
@@ -1147,7 +1247,7 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
 
 	rt6_dst_from_metrics_check(rt);
 
-	if (unlikely(dst->flags & DST_NOCACHE))
+	if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE))
 		return rt6_dst_from_check(rt, cookie);
 	else
 		return rt6_check(rt, cookie);
-- 
cgit 


From 069d4a7b583274e3fd8712c92a035626e0ebf7be Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 3 Mar 2015 11:58:14 +0100
Subject: netfilter: ebtables: fix comment grammar

s/stongly inspired on/strongly inspired by/

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/netfilter_bridge/ebtables.h      | 2 +-
 include/uapi/linux/netfilter_bridge/ebtables.h | 2 +-
 net/bridge/netfilter/ebtables.c                | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h
index 34e7a2b7f867..9ac6f263956b 100644
--- a/include/linux/netfilter_bridge/ebtables.h
+++ b/include/linux/netfilter_bridge/ebtables.h
@@ -6,7 +6,7 @@
  *
  *  ebtables.c,v 2.0, April, 2002
  *
- *  This code is stongly inspired on the iptables code which is
+ *  This code is strongly inspired by the iptables code which is
  *  Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
  */
 #ifndef __LINUX_BRIDGE_EFF_H
diff --git a/include/uapi/linux/netfilter_bridge/ebtables.h b/include/uapi/linux/netfilter_bridge/ebtables.h
index ba993360dbe9..ab46c805c455 100644
--- a/include/uapi/linux/netfilter_bridge/ebtables.h
+++ b/include/uapi/linux/netfilter_bridge/ebtables.h
@@ -6,7 +6,7 @@
  *
  *  ebtables.c,v 2.0, April, 2002
  *
- *  This code is stongly inspired on the iptables code which is
+ *  This code is strongly inspired by the iptables code which is
  *  Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
  */
 
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index d9a8c05d995d..54df89edcf20 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -6,7 +6,7 @@
  *
  *  ebtables.c,v 2.0, July, 2002
  *
- *  This code is stongly inspired on the iptables code which is
+ *  This code is strongly inspired by the iptables code which is
  *  Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
  *
  *  This program is free software; you can redistribute it and/or
-- 
cgit 


From ebddf1a8d78aa3436353fae75c4396e50cb2d6cf Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 26 May 2015 18:41:20 +0200
Subject: netfilter: nf_tables: allow to bind table to net_device

This patch adds the internal NFT_AF_NEEDS_DEV flag to indicate that you must
attach this table to a net_device.

This change is required by the follow up patch that introduces the new netdev
table.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h        |  8 ++++++
 include/uapi/linux/netfilter/nf_tables.h |  2 ++
 net/netfilter/nf_tables_api.c            | 46 ++++++++++++++++++++++++++++----
 3 files changed, 51 insertions(+), 5 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index e6bcf55dcf20..3d6f48ca40a7 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -819,6 +819,7 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt,
  *	@use: number of chain references to this table
  *	@flags: table flag (see enum nft_table_flags)
  *	@name: name of the table
+ *	@dev: this table is bound to this device (if any)
  */
 struct nft_table {
 	struct list_head		list;
@@ -828,6 +829,11 @@ struct nft_table {
 	u32				use;
 	u16				flags;
 	char				name[NFT_TABLE_MAXNAMELEN];
+	struct net_device		*dev;
+};
+
+enum nft_af_flags {
+	NFT_AF_NEEDS_DEV	= (1 << 0),
 };
 
 /**
@@ -838,6 +844,7 @@ struct nft_table {
  *	@nhooks: number of hooks in this family
  *	@owner: module owner
  *	@tables: used internally
+ *	@flags: family flags
  *	@nops: number of hook ops in this family
  *	@hook_ops_init: initialization function for chain hook ops
  *	@hooks: hookfn overrides for packet validation
@@ -848,6 +855,7 @@ struct nft_af_info {
 	unsigned int			nhooks;
 	struct module			*owner;
 	struct list_head		tables;
+	u32				flags;
 	unsigned int			nops;
 	void				(*hook_ops_init)(struct nf_hook_ops *,
 							 unsigned int);
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 5fa1cd04762e..89a671e0f5e7 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -146,12 +146,14 @@ enum nft_table_flags {
  * @NFTA_TABLE_NAME: name of the table (NLA_STRING)
  * @NFTA_TABLE_FLAGS: bitmask of enum nft_table_flags (NLA_U32)
  * @NFTA_TABLE_USE: number of chains in this table (NLA_U32)
+ * @NFTA_TABLE_DEV: net device name (NLA_STRING)
  */
 enum nft_table_attributes {
 	NFTA_TABLE_UNSPEC,
 	NFTA_TABLE_NAME,
 	NFTA_TABLE_FLAGS,
 	NFTA_TABLE_USE,
+	NFTA_TABLE_DEV,
 	__NFTA_TABLE_MAX
 };
 #define NFTA_TABLE_MAX		(__NFTA_TABLE_MAX - 1)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index ad9d11fb29fd..2fd4e99dd074 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -399,6 +399,8 @@ static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
 	[NFTA_TABLE_NAME]	= { .type = NLA_STRING,
 				    .len = NFT_TABLE_MAXNAMELEN - 1 },
 	[NFTA_TABLE_FLAGS]	= { .type = NLA_U32 },
+	[NFTA_TABLE_DEV]	= { .type = NLA_STRING,
+				    .len = IFNAMSIZ - 1 },
 };
 
 static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
@@ -423,6 +425,10 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
 	    nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)))
 		goto nla_put_failure;
 
+	if (table->dev &&
+	    nla_put_string(skb, NFTA_TABLE_DEV, table->dev->name))
+		goto nla_put_failure;
+
 	nlmsg_end(skb, nlh);
 	return 0;
 
@@ -608,6 +614,11 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
 	if (flags == ctx->table->flags)
 		return 0;
 
+	if ((ctx->afi->flags & NFT_AF_NEEDS_DEV) &&
+	    ctx->nla[NFTA_TABLE_DEV] &&
+	    nla_strcmp(ctx->nla[NFTA_TABLE_DEV], ctx->table->dev->name))
+		return -EOPNOTSUPP;
+
 	trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
 				sizeof(struct nft_trans_table));
 	if (trans == NULL)
@@ -645,6 +656,7 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
 	struct nft_table *table;
 	struct net *net = sock_net(skb->sk);
 	int family = nfmsg->nfgen_family;
+	struct net_device *dev = NULL;
 	u32 flags = 0;
 	struct nft_ctx ctx;
 	int err;
@@ -679,30 +691,50 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
 			return -EINVAL;
 	}
 
+	if (afi->flags & NFT_AF_NEEDS_DEV) {
+		char ifname[IFNAMSIZ];
+
+		if (!nla[NFTA_TABLE_DEV])
+			return -EOPNOTSUPP;
+
+		nla_strlcpy(ifname, nla[NFTA_TABLE_DEV], IFNAMSIZ);
+		dev = dev_get_by_name(net, ifname);
+		if (!dev)
+			return -ENOENT;
+	} else if (nla[NFTA_TABLE_DEV]) {
+		return -EOPNOTSUPP;
+	}
+
+	err = -EAFNOSUPPORT;
 	if (!try_module_get(afi->owner))
-		return -EAFNOSUPPORT;
+		goto err1;
 
 	err = -ENOMEM;
 	table = kzalloc(sizeof(*table), GFP_KERNEL);
 	if (table == NULL)
-		goto err1;
+		goto err2;
 
 	nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN);
 	INIT_LIST_HEAD(&table->chains);
 	INIT_LIST_HEAD(&table->sets);
 	table->flags = flags;
+	table->dev   = dev;
 
 	nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
 	err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
 	if (err < 0)
-		goto err2;
+		goto err3;
 
 	list_add_tail_rcu(&table->list, &afi->tables);
 	return 0;
-err2:
+err3:
 	kfree(table);
-err1:
+err2:
 	module_put(afi->owner);
+err1:
+	if (dev != NULL)
+		dev_put(dev);
+
 	return err;
 }
 
@@ -806,6 +838,9 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx)
 {
 	BUG_ON(ctx->table->use > 0);
 
+	if (ctx->table->dev)
+		dev_put(ctx->table->dev);
+
 	kfree(ctx->table);
 	module_put(ctx->afi->owner);
 }
@@ -1361,6 +1396,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
 			ops->priority	= priority;
 			ops->priv	= chain;
 			ops->hook	= afi->hooks[ops->hooknum];
+			ops->dev	= table->dev;
 			if (hookfn)
 				ops->hook = hookfn;
 			if (afi->hook_ops_init)
-- 
cgit 


From 8cf6f497de405ca9eb87ffb34d90699962d10125 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.r.fastabend@intel.com>
Date: Tue, 26 May 2015 08:22:49 -0700
Subject: ethtool: Add helper routines to pass vf to rx_flow_spec

The ring_cookie is 64 bits wide which is much larger than can be used
for actual queue index values. So provide some helper routines to
pack a VF index into the cookie. This is useful to steer packets to
a VF ring without having to know the queue layout of the device.

CC: Alex Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 include/uapi/linux/ethtool.h | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index ae832b45b44c..0594933cdf55 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -796,6 +796,31 @@ struct ethtool_rx_flow_spec {
 	__u32		location;
 };
 
+/* How rings are layed out when accessing virtual functions or
+ * offloaded queues is device specific. To allow users to do flow
+ * steering and specify these queues the ring cookie is partitioned
+ * into a 32bit queue index with an 8 bit virtual function id.
+ * This also leaves the 3bytes for further specifiers. It is possible
+ * future devices may support more than 256 virtual functions if
+ * devices start supporting PCIe w/ARI. However at the moment I
+ * do not know of any devices that support this so I do not reserve
+ * space for this at this time. If a future patch consumes the next
+ * byte it should be aware of this possiblity.
+ */
+#define ETHTOOL_RX_FLOW_SPEC_RING	0x00000000FFFFFFFFLL
+#define ETHTOOL_RX_FLOW_SPEC_RING_VF	0x000000FF00000000LL
+#define ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF 32
+static inline __u64 ethtool_get_flow_spec_ring(__u64 ring_cookie)
+{
+	return ETHTOOL_RX_FLOW_SPEC_RING & ring_cookie;
+};
+
+static inline __u64 ethtool_get_flow_spec_ring_vf(__u64 ring_cookie)
+{
+	return (ETHTOOL_RX_FLOW_SPEC_RING_VF & ring_cookie) >>
+				ETHTOOL_RX_FLOW_SPEC_RING_VF_OFF;
+};
+
 /**
  * struct ethtool_rxnfc - command to get or set RX flow classification rules
  * @cmd: Specific command number - %ETHTOOL_GRXFH, %ETHTOOL_SRXFH,
-- 
cgit 


From 97f411d9175f018503e67e7552f92bc28844001b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Tue, 26 May 2015 07:34:21 -0300
Subject: [media] dvb: split enum from typedefs at frontend.h

Using typedefs is already bad enough, but doing it together
with enum declaration is even worse.

Also, it breaks the scripts at DocBook that would be generating
reference pointers for the enums.

Well, we can't get rid of typedef right now, but let's at least
declare it on a separate line, and let the scripts to generate
the cross-reference, as this is needed for the next DocBook
patches.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/dvb/frontend.h | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index 466f56997272..ae481bc53a9c 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -36,7 +36,7 @@ typedef enum fe_type {
 } fe_type_t;
 
 
-typedef enum fe_caps {
+enum fe_caps {
 	FE_IS_STUPID			= 0,
 	FE_CAN_INVERSION_AUTO		= 0x1,
 	FE_CAN_FEC_1_2			= 0x2,
@@ -68,7 +68,9 @@ typedef enum fe_caps {
 	FE_NEEDS_BENDING		= 0x20000000, /* not supported anymore, don't use (frontend requires frequency bending) */
 	FE_CAN_RECOVER			= 0x40000000, /* frontend can recover from a cable unplug automatically */
 	FE_CAN_MUTE_TS			= 0x80000000  /* frontend can stop spurious TS data output */
-} fe_caps_t;
+};
+
+typedef enum fe_caps fe_caps_t;
 
 
 struct dvb_frontend_info {
@@ -134,7 +136,7 @@ typedef enum fe_sec_mini_cmd {
  *			to reset DiSEqC, tone and parameters
  */
 
-typedef enum fe_status {
+enum fe_status {
 	FE_HAS_SIGNAL		= 0x01,
 	FE_HAS_CARRIER		= 0x02,
 	FE_HAS_VITERBI		= 0x04,
@@ -142,7 +144,9 @@ typedef enum fe_status {
 	FE_HAS_LOCK		= 0x10,
 	FE_TIMEDOUT		= 0x20,
 	FE_REINIT		= 0x40,
-} fe_status_t;
+};
+
+typedef enum fe_status fe_status_t;
 
 typedef enum fe_spectral_inversion {
 	INVERSION_OFF,
-- 
cgit 


From d6b6d346e5605ee2af0f0349e71901121b984258 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Tue, 26 May 2015 19:33:58 -0300
Subject: [media] DocBook: better document FE_SET_VOLTAGE ioctl

Use the proper format for FE_SET_VOLTAGE documentation and fix
the documentation. The description for the enum is not 100%,
and it is missing the voltage off value.

Also, it is better to keep the enum description together with
the ioctl, as both are used together.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/dvb/fe-set-voltage.xml | 94 ++++++++++++++++++++++
 Documentation/DocBook/media/dvb/frontend.xml       | 62 +-------------
 include/uapi/linux/dvb/frontend.h                  |  6 +-
 3 files changed, 99 insertions(+), 63 deletions(-)
 create mode 100644 Documentation/DocBook/media/dvb/fe-set-voltage.xml

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/dvb/fe-set-voltage.xml b/Documentation/DocBook/media/dvb/fe-set-voltage.xml
new file mode 100644
index 000000000000..a1ee5f9c28e0
--- /dev/null
+++ b/Documentation/DocBook/media/dvb/fe-set-voltage.xml
@@ -0,0 +1,94 @@
+<refentry id="FE_SET_VOLTAGE">
+  <refmeta>
+    <refentrytitle>ioctl FE_SET_VOLTAGE</refentrytitle>
+    &manvol;
+  </refmeta>
+
+  <refnamediv>
+    <refname>FE_SET_VOLTAGE</refname>
+    <refpurpose>Allow setting the DC level sent to the antenna subsystem.</refpurpose>
+  </refnamediv>
+
+  <refsynopsisdiv>
+    <funcsynopsis>
+      <funcprototype>
+	<funcdef>int <function>ioctl</function></funcdef>
+	<paramdef>int <parameter>fd</parameter></paramdef>
+	<paramdef>int <parameter>request</parameter></paramdef>
+	<paramdef>&fe-sec-voltage; *<parameter>voltage</parameter></paramdef>
+      </funcprototype>
+    </funcsynopsis>
+  </refsynopsisdiv>
+
+  <refsect1>
+    <title>Arguments</title>
+        <variablelist>
+      <varlistentry>
+	<term><parameter>fd</parameter></term>
+	<listitem>
+	  <para>&fe_fd;</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><parameter>request</parameter></term>
+	<listitem>
+	  <para>FE_SET_VOLTAGE</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><parameter>pointer to &fe-sec-voltage;</parameter></term>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>Description</title>
+
+<para>This ioctl allows to set the DC voltage level sent through the antenna
+    cable to 13V, 18V or off.</para>
+<para>Usually, a satellital antenna subsystems require that the digital TV
+    device to send a DC voltage to feed power to the LNBf. Depending on the
+    LNBf type, the polarization or the intermediate frequency (IF) of the LNBf
+    can controlled by the voltage level. Other devices (for example, the ones
+    that implement DISEqC and multipoint LNBf's don't need to control the
+    voltage level, provided that either 13V or 18V is sent to power up the
+    LNBf.</para>
+<para>NOTE: if more than one device is connected to the same antenna,
+    setting a voltage level may interfere on other devices, as they may lose
+    the capability of setting polarization or IF. So, on those
+    cases, setting the voltage to SEC_VOLTAGE_OFF while the device is not is
+    used is recommended.</para>
+
+&return-value-dvb;
+</refsect1>
+
+<section id="fe-sec-voltage-t">
+<title>enum fe_sec_voltage</title>
+
+<table pgwide="1" frame="none" id="fe-sec-voltage">
+    <title>enum fe_status</title>
+    <tgroup cols="2">
+	&cs-def;
+	<thead>
+	<row>
+	    <entry>ID</entry>
+	    <entry>Description</entry>
+	</row>
+	</thead>
+	<tbody valign="top">
+	<row>
+	    <entry align="char">SEC_VOLTAGE_13</entry>
+	    <entry align="char">Set DC voltage level to 13V</entry>
+	</row><row>
+	    <entry align="char">SEC_VOLTAGE_18</entry>
+	    <entry align="char">Set DC voltage level to 18V</entry>
+	</row><row>
+	    <entry align="char">SEC_VOLTAGE_OFF</entry>
+	    <entry align="char">Don't send any voltage to the antenna</entry>
+	</row>
+        </tbody>
+    </tgroup>
+</table>
+</section>
+
+</refentry>
diff --git a/Documentation/DocBook/media/dvb/frontend.xml b/Documentation/DocBook/media/dvb/frontend.xml
index bb2cd9ef3b03..584c759b6bbe 100644
--- a/Documentation/DocBook/media/dvb/frontend.xml
+++ b/Documentation/DocBook/media/dvb/frontend.xml
@@ -75,19 +75,6 @@ specification is available at
 </programlisting>
 </section>
 
-<section id="fe-sec-voltage-t">
-<title>diseqc slave reply</title>
-<para>The voltage is usually used with non-DiSEqC capable LNBs to switch the polarzation
-(horizontal/vertical). When using DiSEqC epuipment this voltage has to be switched
-consistently to the DiSEqC commands as described in the DiSEqC spec.</para>
-<programlisting>
-	typedef enum fe_sec_voltage {
-	SEC_VOLTAGE_13,
-	SEC_VOLTAGE_18
-	} fe_sec_voltage_t;
-</programlisting>
-</section>
-
 <section id="fe-sec-tone-mode-t">
 <title>SEC continuous tone</title>
 
@@ -641,54 +628,7 @@ typedef enum fe_hierarchy {
 &return-value-dvb;
 </section>
 
-<section id="FE_SET_VOLTAGE">
-<title>FE_SET_VOLTAGE</title>
-<para>DESCRIPTION
-</para>
-<informaltable><tgroup cols="1"><tbody><row><entry
- align="char">
-<para>This call is used to set the bus voltage. This call requires read/write
- permissions.</para>
-</entry>
- </row></tbody></tgroup></informaltable>
-<para>SYNOPSIS
-</para>
-<informaltable><tgroup cols="1"><tbody><row><entry
- align="char">
-<para>int ioctl(int fd, int request = <link linkend="FE_SET_VOLTAGE">FE_SET_VOLTAGE</link>,
- fe_sec_voltage_t voltage);</para>
-</entry>
- </row></tbody></tgroup></informaltable>
-
-<para>PARAMETERS
-</para>
-<informaltable><tgroup cols="2"><tbody><row><entry
- align="char">
-<para>int fd</para>
-</entry><entry
- align="char">
-<para>File descriptor returned by a previous call to open().</para>
-</entry>
- </row><row><entry
- align="char">
-<para>int request</para>
-</entry><entry
- align="char">
-<para>Equals <link linkend="FE_SET_VOLTAGE">FE_SET_VOLTAGE</link> for this command.</para>
-</entry>
- </row><row><entry
- align="char">
-<para>fe_sec_voltage_t
- voltage</para>
-</entry><entry
- align="char">
-<para>The requested bus voltage.</para>
-</entry>
- </row></tbody></tgroup></informaltable>
-
-&return-value-dvb;
-</section>
-
+&sub-fe-set-voltage;
 &sub-fe-enable-high-lnb-voltage;
 &sub-fe-set-frontend-tune-mode;
 
diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index ae481bc53a9c..c1ccbc82024c 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -105,11 +105,13 @@ struct dvb_diseqc_slave_reply {
 };			/*  errorcode when no message was received  */
 
 
-typedef enum fe_sec_voltage {
+enum fe_sec_voltage {
 	SEC_VOLTAGE_13,
 	SEC_VOLTAGE_18,
 	SEC_VOLTAGE_OFF
-} fe_sec_voltage_t;
+};
+
+typedef enum fe_sec_voltage fe_sec_voltage_t;
 
 
 typedef enum fe_sec_tone_mode {
-- 
cgit 


From 6dc59e7a195fc8852e98d64805f44c46c35e40cd Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Wed, 27 May 2015 07:15:50 -0300
Subject: [media] DocBook: better document FE_SET_TONE ioctl

Use the proper format for FE_SET_TONE documentation and
improve the documentation.

Keep the enum fe_sec_tone_mode description together with
the ioctl, as both are used together.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/dvb/fe-set-tone.xml | 88 +++++++++++++++++++++++++
 Documentation/DocBook/media/dvb/frontend.xml    | 61 +----------------
 include/uapi/linux/dvb/frontend.h               |  6 +-
 3 files changed, 93 insertions(+), 62 deletions(-)
 create mode 100644 Documentation/DocBook/media/dvb/fe-set-tone.xml

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/dvb/fe-set-tone.xml b/Documentation/DocBook/media/dvb/fe-set-tone.xml
new file mode 100644
index 000000000000..b4b1f5303170
--- /dev/null
+++ b/Documentation/DocBook/media/dvb/fe-set-tone.xml
@@ -0,0 +1,88 @@
+<refentry id="FE_SET_TONE">
+  <refmeta>
+    <refentrytitle>ioctl FE_SET_TONE</refentrytitle>
+    &manvol;
+  </refmeta>
+
+  <refnamediv>
+    <refname>FE_SET_TONE</refname>
+    <refpurpose>Sets/resets the generation of the continuous 22kHz tone.</refpurpose>
+  </refnamediv>
+
+  <refsynopsisdiv>
+    <funcsynopsis>
+      <funcprototype>
+	<funcdef>int <function>ioctl</function></funcdef>
+	<paramdef>int <parameter>fd</parameter></paramdef>
+	<paramdef>int <parameter>request</parameter></paramdef>
+	<paramdef>&fe-sec-tone-mode; *<parameter>tone</parameter></paramdef>
+      </funcprototype>
+    </funcsynopsis>
+  </refsynopsisdiv>
+
+  <refsect1>
+    <title>Arguments</title>
+        <variablelist>
+      <varlistentry>
+	<term><parameter>fd</parameter></term>
+	<listitem>
+	  <para>&fe_fd;</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><parameter>request</parameter></term>
+	<listitem>
+	  <para>FE_SET_TONE</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><parameter>pointer to &fe-sec-tone-mode;</parameter></term>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>Description</title>
+
+<para>This ioctl is used to set the generation of the continuous 22kHz tone.
+    This call requires read/write permissions.</para>
+<para>Usually, satellital antenna subsystems require that the digital TV
+    device to send a 22kHz tone in order to select between high/low band on
+    some dual-band LNBf. It is also used to send signals to DiSEqC equipment,
+    but this is done using the DiSEqC ioctls.</para>
+<para>NOTE: if more than one device is connected to the same antenna,
+    setting a tone may interfere on other devices, as they may lose
+    the capability of selecting the band. So, it is recommended that
+    applications would change to SEC_TONE_OFF when the device is not used.</para>
+
+&return-value-dvb;
+</refsect1>
+
+<section id="fe-sec-tone-mode-t">
+<title>enum fe_sec_voltage</title>
+
+<table pgwide="1" frame="none" id="fe-sec-tone-mode">
+    <title>enum fe_sec_tone_mode</title>
+    <tgroup cols="2">
+	&cs-def;
+	<thead>
+	<row>
+	    <entry>ID</entry>
+	    <entry>Description</entry>
+	</row>
+	</thead>
+	<tbody valign="top">
+	<row>
+	    <entry align="char">SEC_TONE_ON</entry>
+	    <entry align="char">Sends a 22kHz tone burst to the antenna</entry>
+	</row><row>
+	    <entry align="char">SEC_TONE_OFF</entry>
+	    <entry align="char">Don't send a 22kHz tone to the antenna
+		(except if the FE_DISEQC_* ioctls are called)</entry>
+	</row>
+        </tbody>
+    </tgroup>
+</table>
+</section>
+
+</refentry>
diff --git a/Documentation/DocBook/media/dvb/frontend.xml b/Documentation/DocBook/media/dvb/frontend.xml
index 584c759b6bbe..f05da4abb3fe 100644
--- a/Documentation/DocBook/media/dvb/frontend.xml
+++ b/Documentation/DocBook/media/dvb/frontend.xml
@@ -75,21 +75,6 @@ specification is available at
 </programlisting>
 </section>
 
-<section id="fe-sec-tone-mode-t">
-<title>SEC continuous tone</title>
-
-<para>The continuous 22KHz tone is usually used with non-DiSEqC capable LNBs to switch the
-high/low band of a dual-band LNB. When using DiSEqC epuipment this voltage has to
-be switched consistently to the DiSEqC commands as described in the DiSEqC
-spec.</para>
-<programlisting>
-	typedef enum fe_sec_tone_mode {
-	SEC_TONE_ON,
-	SEC_TONE_OFF
-	} fe_sec_tone_mode_t;
-</programlisting>
-</section>
-
 <section id="fe-sec-mini-cmd-t">
 <title>SEC tone burst</title>
 
@@ -582,52 +567,8 @@ typedef enum fe_hierarchy {
 &return-value-dvb;
 </section>
 
-<section id="FE_SET_TONE">
-<title>FE_SET_TONE</title>
-<para>DESCRIPTION
-</para>
-<informaltable><tgroup cols="1"><tbody><row><entry
- align="char">
-<para>This call is used to set the generation of the continuous 22kHz tone. This call
- requires read/write permissions.</para>
-</entry>
- </row></tbody></tgroup></informaltable>
-<para>SYNOPSIS
-</para>
-<informaltable><tgroup cols="1"><tbody><row><entry
- align="char">
-<para>int ioctl(int fd, int request = <link linkend="FE_SET_TONE">FE_SET_TONE</link>,
- fe_sec_tone_mode_t tone);</para>
-</entry>
- </row></tbody></tgroup></informaltable>
-<para>PARAMETERS
-</para>
-<informaltable><tgroup cols="2"><tbody><row><entry
- align="char">
-<para>int fd</para>
-</entry><entry
- align="char">
-<para>File descriptor returned by a previous call to open().</para>
-</entry>
- </row><row><entry
- align="char">
-<para>int request</para>
-</entry><entry
- align="char">
-<para>Equals <link linkend="FE_SET_TONE">FE_SET_TONE</link> for this command.</para>
-</entry>
- </row><row><entry
- align="char">
-<para>fe_sec_tone_mode_t
- tone</para>
-</entry><entry
- align="char">
-<para>The requested tone generation mode (on/off).</para>
-</entry>
- </row></tbody></tgroup></informaltable>
-&return-value-dvb;
-</section>
 
+&sub-fe-set-tone;
 &sub-fe-set-voltage;
 &sub-fe-enable-high-lnb-voltage;
 &sub-fe-set-frontend-tune-mode;
diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index c1ccbc82024c..1a098819473f 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -114,10 +114,12 @@ enum fe_sec_voltage {
 typedef enum fe_sec_voltage fe_sec_voltage_t;
 
 
-typedef enum fe_sec_tone_mode {
+enum fe_sec_tone_mode {
 	SEC_TONE_ON,
 	SEC_TONE_OFF
-} fe_sec_tone_mode_t;
+};
+
+typedef enum fe_sec_tone_mode fe_sec_tone_mode_t;
 
 
 typedef enum fe_sec_mini_cmd {
-- 
cgit 


From 81959d996a3b6ea542ebffc7e394530f4638c6ca Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Wed, 27 May 2015 22:20:14 -0300
Subject: [media] DocBook: better document FE_DISEQC_SEND_BURST ioctl

Use the proper format for FE_DISEQC_SEND_BURST documentation
and  improve the documentation.

Keep the enum fe_sec_mini_cmd description together with
the ioctl, as both are used together.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 .../DocBook/media/dvb/fe-diseqc-send-burst.xml     | 86 ++++++++++++++++++++++
 Documentation/DocBook/media/dvb/frontend.xml       | 67 +----------------
 include/uapi/linux/dvb/frontend.h                  |  6 +-
 3 files changed, 91 insertions(+), 68 deletions(-)
 create mode 100644 Documentation/DocBook/media/dvb/fe-diseqc-send-burst.xml

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/dvb/fe-diseqc-send-burst.xml b/Documentation/DocBook/media/dvb/fe-diseqc-send-burst.xml
new file mode 100644
index 000000000000..d1a798048641
--- /dev/null
+++ b/Documentation/DocBook/media/dvb/fe-diseqc-send-burst.xml
@@ -0,0 +1,86 @@
+<refentry id="FE_DISEQC_SEND_BURST">
+  <refmeta>
+    <refentrytitle>ioctl FE_DISEQC_SEND_BURST</refentrytitle>
+    &manvol;
+  </refmeta>
+
+  <refnamediv>
+    <refname>FE_DISEQC_SEND_BURST</refname>
+    <refpurpose>Sends a 22KHz tone burst for 2x1 mini DiSEqC satellite selection.</refpurpose>
+  </refnamediv>
+
+  <refsynopsisdiv>
+    <funcsynopsis>
+      <funcprototype>
+	<funcdef>int <function>ioctl</function></funcdef>
+	<paramdef>int <parameter>fd</parameter></paramdef>
+	<paramdef>int <parameter>request</parameter></paramdef>
+	<paramdef>&fe-sec-mini-cmd; *<parameter>tone</parameter></paramdef>
+      </funcprototype>
+    </funcsynopsis>
+  </refsynopsisdiv>
+
+  <refsect1>
+    <title>Arguments</title>
+        <variablelist>
+      <varlistentry>
+	<term><parameter>fd</parameter></term>
+	<listitem>
+	  <para>&fe_fd;</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><parameter>request</parameter></term>
+	<listitem>
+	  <para>FE_DISEQC_SEND_BURST</para>
+	</listitem>
+      </varlistentry>
+      <varlistentry>
+	<term><parameter>pointer to &fe-sec-mini-cmd;</parameter></term>
+      </varlistentry>
+    </variablelist>
+  </refsect1>
+
+  <refsect1>
+    <title>Description</title>
+
+<para>This ioctl is used to set the generation of a 22kHz tone burst for mini
+    DiSEqC satellite
+    selection for 2x1 switches.
+    This call requires read/write permissions.</para>
+<para>It provides support for what's specified at
+    <ulink url="http://www.eutelsat.com/files/contributed/satellites/pdf/Diseqc/associated%20docs/simple_tone_burst_detec.pdf">Digital Satellite Equipment Control
+	(DiSEqC) - Simple "ToneBurst" Detection Circuit specification.</ulink>
+    </para>
+&return-value-dvb;
+</refsect1>
+
+<section id="fe-sec-mini-cmd-t">
+<title>enum fe_sec_mini_cmd</title>
+
+<table pgwide="1" frame="none" id="fe-sec-mini-cmd">
+    <title>enum fe_sec_tone_mode</title>
+    <tgroup cols="2">
+	&cs-def;
+	<thead>
+	<row>
+	    <entry>ID</entry>
+	    <entry>Description</entry>
+	</row>
+	</thead>
+	<tbody valign="top">
+	<row>
+	    <entry align="char">SEC_MINI_A</entry>
+	    <entry align="char">Sends a mini-DiSEqC 22kHz '0' Tone Burst to
+		select satellite-A</entry>
+	</row><row>
+	    <entry align="char">SEC_MINI_B</entry>
+	    <entry align="char">Sends a mini-DiSEqC 22kHz '1' Data Burst to
+		select satellite-B</entry>
+	</row>
+        </tbody>
+    </tgroup>
+</table>
+</section>
+
+</refentry>
diff --git a/Documentation/DocBook/media/dvb/frontend.xml b/Documentation/DocBook/media/dvb/frontend.xml
index f05da4abb3fe..17050152a48a 100644
--- a/Documentation/DocBook/media/dvb/frontend.xml
+++ b/Documentation/DocBook/media/dvb/frontend.xml
@@ -75,23 +75,6 @@ specification is available at
 </programlisting>
 </section>
 
-<section id="fe-sec-mini-cmd-t">
-<title>SEC tone burst</title>
-
-<para>The 22KHz tone burst is usually used with non-DiSEqC capable switches to select
-between two connected LNBs/satellites. When using DiSEqC epuipment this voltage has to
-be switched consistently to the DiSEqC commands as described in the DiSEqC
-spec.</para>
-<programlisting>
-	typedef enum fe_sec_mini_cmd {
-	SEC_MINI_A,
-	SEC_MINI_B
-	} fe_sec_mini_cmd_t;
-</programlisting>
-
-<para></para>
-</section>
-
 <section id="fe-spectral-inversion-t">
 <title>frontend spectral inversion</title>
 <para>The Inversion field can take one of these values:
@@ -519,55 +502,7 @@ typedef enum fe_hierarchy {
 &return-value-dvb;
 </section>
 
-<section id="FE_DISEQC_SEND_BURST">
-<title>FE_DISEQC_SEND_BURST</title>
-<para>DESCRIPTION
-</para>
-<informaltable><tgroup cols="1"><tbody><row><entry
- align="char">
-<para>This ioctl call is used to send a 22KHz tone burst.</para>
-</entry>
- </row></tbody></tgroup></informaltable>
-
-<para>SYNOPSIS
-</para>
-<informaltable><tgroup cols="1"><tbody><row><entry
- align="char">
-<para>int ioctl(int fd, int request =
- <link linkend="FE_DISEQC_SEND_BURST">FE_DISEQC_SEND_BURST</link>, fe_sec_mini_cmd_t burst);</para>
-</entry>
- </row></tbody></tgroup></informaltable>
-
-<para>PARAMETERS
-</para>
-<informaltable><tgroup cols="2"><tbody><row><entry
- align="char">
-<para>int fd</para>
-</entry><entry
- align="char">
-<para>File descriptor returned by a previous call to open().</para>
-</entry>
- </row><row><entry
- align="char">
-<para>int request</para>
-</entry><entry
- align="char">
-<para>Equals <link linkend="FE_DISEQC_SEND_BURST">FE_DISEQC_SEND_BURST</link> for this command.</para>
-</entry>
- </row><row><entry
- align="char">
-<para>fe_sec_mini_cmd_t
- burst</para>
-</entry><entry
- align="char">
-<para>burst A or B.</para>
-</entry>
- </row></tbody></tgroup></informaltable>
-
-&return-value-dvb;
-</section>
-
-
+&sub-fe-diseqc-send-burst;
 &sub-fe-set-tone;
 &sub-fe-set-voltage;
 &sub-fe-enable-high-lnb-voltage;
diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index 1a098819473f..dd64e6d5d881 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -122,10 +122,12 @@ enum fe_sec_tone_mode {
 typedef enum fe_sec_tone_mode fe_sec_tone_mode_t;
 
 
-typedef enum fe_sec_mini_cmd {
+enum fe_sec_mini_cmd {
 	SEC_MINI_A,
 	SEC_MINI_B
-} fe_sec_mini_cmd_t;
+};
+
+typedef enum fe_sec_mini_cmd fe_sec_mini_cmd_t;
 
 
 /**
-- 
cgit 


From 997eb9039df27dfd5b1901e26ebae09d5dbe6cff Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Thu, 28 May 2015 17:21:05 -0300
Subject: [media] DocBook: Better document enum fe_modulation

Instead of using programlisting, use a table, as this provides
a better view of the structure.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/dvb/dvbproperty.xml | 91 +++++++++++++++++++------
 Documentation/DocBook/media/dvb/frontend.xml    | 26 -------
 include/uapi/linux/dvb/frontend.h               |  6 +-
 3 files changed, 76 insertions(+), 47 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/dvb/dvbproperty.xml b/Documentation/DocBook/media/dvb/dvbproperty.xml
index 0fa4ccfd406d..d9861b54f8c8 100644
--- a/Documentation/DocBook/media/dvb/dvbproperty.xml
+++ b/Documentation/DocBook/media/dvb/dvbproperty.xml
@@ -137,25 +137,78 @@ get/set up to 64 properties. The actual meaning of each property is described on
 	</section>
 	<section id="DTV-MODULATION">
 	<title><constant>DTV_MODULATION</constant></title>
-<para>Specifies the frontend modulation type for cable and satellite types. The modulation can be one of the types bellow:</para>
-<programlisting>
- typedef enum fe_modulation {
-	QPSK,
-	QAM_16,
-	QAM_32,
-	QAM_64,
-	QAM_128,
-	QAM_256,
-	QAM_AUTO,
-	VSB_8,
-	VSB_16,
-	PSK_8,
-	APSK_16,
-	APSK_32,
-	DQPSK,
-	QAM_4_NR,
- } fe_modulation_t;
-</programlisting>
+<para>Specifies the frontend modulation type for delivery systems that supports
+    more than one modulation type. The modulation can be one of the types
+    defined by &fe-modulation;.</para>
+
+
+<section id="fe-modulation-t">
+<title>Modulation property</title>
+
+<para>Most of the digital TV standards currently offers more than one possible
+    modulation (sometimes called as "constellation" on some standards). This
+    enum contains the values used by the Kernel. Please notice that not all
+    modulations are supported by a given standard.</para>
+
+<table pgwide="1" frame="none" id="fe-modulation">
+    <title>enum fe_modulation</title>
+    <tgroup cols="2">
+	&cs-def;
+	<thead>
+	<row>
+	    <entry>ID</entry>
+	    <entry>Description</entry>
+	</row>
+	</thead>
+	<tbody valign="top">
+	<row>
+	    <entry>QPSK</entry>
+	    <entry>QPSK modulation</entry>
+	</row><row>
+	    <entry>QAM_16</entry>
+	    <entry>16-QAM modulation</entry>
+	</row><row>
+	    <entry>QAM_32</entry>
+	    <entry>32-QAM modulation</entry>
+	</row><row>
+	    <entry>QAM_64</entry>
+	    <entry>64-QAM modulation</entry>
+	</row><row>
+	    <entry>QAM_128</entry>
+	    <entry>128-QAM modulation</entry>
+	</row><row>
+	    <entry>QAM_256</entry>
+	    <entry>256-QAM modulation</entry>
+	</row><row>
+	    <entry>QAM_AUTO</entry>
+	    <entry>Autodetect QAM modulation</entry>
+	</row><row>
+	    <entry>VSB_8</entry>
+	    <entry>8-VSB modulation</entry>
+	</row><row>
+	    <entry>VSB_16</entry>
+	    <entry>16-VSB modulation</entry>
+	</row><row>
+	    <entry>PSK_8</entry>
+	    <entry>8-PSK modulation</entry>
+	</row><row>
+	    <entry>APSK_16</entry>
+	    <entry>16-APSK modulation</entry>
+	</row><row>
+	    <entry>APSK_32</entry>
+	    <entry>32-APSK modulation</entry>
+	</row><row>
+	    <entry>DQPSK</entry>
+	    <entry>DQPSK modulation</entry>
+	</row><row>
+	    <entry>QAM_4_NR</entry>
+	    <entry>4-QAM-NR modulation</entry>
+	</row>
+        </tbody>
+    </tgroup>
+</table>
+</section>
+
 	</section>
 	<section id="DTV-BANDWIDTH-HZ">
 		<title><constant>DTV_BANDWIDTH_HZ</constant></title>
diff --git a/Documentation/DocBook/media/dvb/frontend.xml b/Documentation/DocBook/media/dvb/frontend.xml
index 16a4648043d6..07c1284e88c8 100644
--- a/Documentation/DocBook/media/dvb/frontend.xml
+++ b/Documentation/DocBook/media/dvb/frontend.xml
@@ -100,32 +100,6 @@ detection.
 </para>
 </section>
 
-<section id="fe-modulation-t">
-<title>frontend modulation type for QAM, OFDM and VSB</title>
-<para>For cable and terrestrial frontends, e. g. for
-<link linkend="dvb-qam-parameters"><constant>struct dvb_qpsk_parameters</constant></link>,
-<link linkend="dvb-ofdm-parameters"><constant>struct dvb_qam_parameters</constant></link> and
-<link linkend="dvb-vsb-parameters"><constant>struct dvb_qam_parameters</constant></link>,
-it needs to specify the quadrature modulation mode which can be one of the following:
-</para>
-<programlisting>
- typedef enum fe_modulation {
-	QPSK,
-	QAM_16,
-	QAM_32,
-	QAM_64,
-	QAM_128,
-	QAM_256,
-	QAM_AUTO,
-	VSB_8,
-	VSB_16,
-	PSK_8,
-	APSK_16,
-	APSK_32,
-	DQPSK,
- } fe_modulation_t;
-</programlisting>
-</section>
 
 <section>
 <title>More OFDM parameters</title>
diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index dd64e6d5d881..d4b1718046ae 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -178,7 +178,7 @@ typedef enum fe_code_rate {
 } fe_code_rate_t;
 
 
-typedef enum fe_modulation {
+enum fe_modulation {
 	QPSK,
 	QAM_16,
 	QAM_32,
@@ -193,7 +193,9 @@ typedef enum fe_modulation {
 	APSK_32,
 	DQPSK,
 	QAM_4_NR,
-} fe_modulation_t;
+};
+
+typedef enum fe_modulation fe_modulation_t;
 
 typedef enum fe_transmit_mode {
 	TRANSMISSION_MODE_2K,
-- 
cgit 


From 58e11cc3c1f7d7e9fa70ba6c3d363456151fcffd Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Thu, 28 May 2015 20:00:43 -0300
Subject: [media] DocBook: improve documentation for DVB spectral inversion

Format it as a table and provide more details.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/dvb/dvbproperty.xml    | 52 ++++++++++++++++------
 Documentation/DocBook/media/dvb/frontend.xml       | 17 -------
 .../DocBook/media/dvb/frontend_legacy_api.xml      |  2 +-
 include/uapi/linux/dvb/frontend.h                  |  5 ++-
 4 files changed, 43 insertions(+), 33 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/dvb/dvbproperty.xml b/Documentation/DocBook/media/dvb/dvbproperty.xml
index d9861b54f8c8..41085537acfc 100644
--- a/Documentation/DocBook/media/dvb/dvbproperty.xml
+++ b/Documentation/DocBook/media/dvb/dvbproperty.xml
@@ -238,19 +238,45 @@ get/set up to 64 properties. The actual meaning of each property is described on
 	</section>
 	<section id="DTV-INVERSION">
 	<title><constant>DTV_INVERSION</constant></title>
-	<para>The Inversion field can take one of these values:
-	</para>
-	<programlisting>
-	typedef enum fe_spectral_inversion {
-		INVERSION_OFF,
-		INVERSION_ON,
-		INVERSION_AUTO
-	} fe_spectral_inversion_t;
-	</programlisting>
-	<para>It indicates if spectral inversion should be presumed or not. In the automatic setting
-	(<constant>INVERSION_AUTO</constant>) the hardware will try to figure out the correct setting by
-	itself.
-	</para>
+
+	<para>Specifies if the frontend should do spectral inversion or not.</para>
+
+<section id="fe-spectral-inversion-t">
+<title>enum fe_modulation: Frontend spectral inversion</title>
+
+<para>This parameter indicates if spectral inversion should be presumed or not.
+    In the automatic setting (<constant>INVERSION_AUTO</constant>) the hardware
+    will try to figure out the correct setting by itself. If the hardware
+    doesn't support, the DVB core will try to lock at the carrier first with
+    inversion off. If it fails, it will try to enable inversion.
+</para>
+
+<table pgwide="1" frame="none" id="fe-spectral-inversion">
+    <title>enum fe_modulation</title>
+    <tgroup cols="2">
+	&cs-def;
+	<thead>
+	<row>
+	    <entry>ID</entry>
+	    <entry>Description</entry>
+	</row>
+	</thead>
+	<tbody valign="top">
+	<row>
+	    <entry>INVERSION_OFF</entry>
+	    <entry>Don't do spectral band inversion.</entry>
+	</row><row>
+	    <entry>INVERSION_ON</entry>
+	    <entry>Do spectral band inversion.</entry>
+	</row><row>
+	    <entry>INVERSION_AUTO</entry>
+	    <entry>Autodetect spectral band inversion.</entry>
+	</row>
+        </tbody>
+    </tgroup>
+</table>
+</section>
+
 	</section>
 	<section id="DTV-DISEQC-MASTER">
 	<title><constant>DTV_DISEQC_MASTER</constant></title>
diff --git a/Documentation/DocBook/media/dvb/frontend.xml b/Documentation/DocBook/media/dvb/frontend.xml
index 07c1284e88c8..77dd88ceeedd 100644
--- a/Documentation/DocBook/media/dvb/frontend.xml
+++ b/Documentation/DocBook/media/dvb/frontend.xml
@@ -56,23 +56,6 @@ specification is available at
 
 &sub-dvbproperty;
 
-<section id="fe-spectral-inversion-t">
-<title>frontend spectral inversion</title>
-<para>The Inversion field can take one of these values:
-</para>
-<programlisting>
-typedef enum fe_spectral_inversion {
-	INVERSION_OFF,
-	INVERSION_ON,
-	INVERSION_AUTO
-} fe_spectral_inversion_t;
-</programlisting>
-<para>It indicates if spectral inversion should be presumed or not. In the automatic setting
-(<constant>INVERSION_AUTO</constant>) the hardware will try to figure out the correct setting by
-itself.
-</para>
-</section>
-
 <section id="fe-code-rate-t">
 <title>frontend code rate</title>
 <para>The possible values for the <constant>fec_inner</constant> field used on
diff --git a/Documentation/DocBook/media/dvb/frontend_legacy_api.xml b/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
index 7d5823858df0..fe1117e91f51 100644
--- a/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
+++ b/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
@@ -82,7 +82,7 @@ DVB-C2, ISDB, etc.</para>
 struct dvb_frontend_parameters {
 	uint32_t frequency;     /&#x22C6; (absolute) frequency in Hz for QAM/OFDM &#x22C6;/
 				/&#x22C6; intermediate frequency in kHz for QPSK &#x22C6;/
-	fe_spectral_inversion_t inversion;
+	&fe-spectral-inversion-t; inversion;
 	union {
 		struct dvb_qpsk_parameters qpsk;
 		struct dvb_qam_parameters  qam;
diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index d4b1718046ae..223905563676 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -154,12 +154,13 @@ enum fe_status {
 
 typedef enum fe_status fe_status_t;
 
-typedef enum fe_spectral_inversion {
+enum fe_spectral_inversion {
 	INVERSION_OFF,
 	INVERSION_ON,
 	INVERSION_AUTO
-} fe_spectral_inversion_t;
+};
 
+typedef enum fe_spectral_inversion fe_spectral_inversion_t;
 
 typedef enum fe_code_rate {
 	FEC_NONE = 0,
-- 
cgit 


From 0577a2f6d84a08da96c908a885db16b4d3532dc4 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Thu, 28 May 2015 20:52:52 -0300
Subject: [media] DocBook: improve documentation for OFDM transmission mode

Format it as a table and add more details, in special, for
the DTMB modes.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/dvb/dvbproperty.xml    | 69 +++++++++++++++++-----
 Documentation/DocBook/media/dvb/frontend.xml       | 15 -----
 .../DocBook/media/dvb/frontend_legacy_api.xml      |  2 +-
 include/uapi/linux/dvb/frontend.h                  |  6 +-
 4 files changed, 59 insertions(+), 33 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/dvb/dvbproperty.xml b/Documentation/DocBook/media/dvb/dvbproperty.xml
index 41085537acfc..06a12f1c57c5 100644
--- a/Documentation/DocBook/media/dvb/dvbproperty.xml
+++ b/Documentation/DocBook/media/dvb/dvbproperty.xml
@@ -827,22 +827,61 @@ typedef enum fe_guard_interval {
 	<section id="DTV-TRANSMISSION-MODE">
 		<title><constant>DTV_TRANSMISSION_MODE</constant></title>
 
-		<para>Specifies the number of carriers used by the standard</para>
+		<para>Specifies the number of carriers used by the standard.
+		    This is used only on OFTM-based standards, e. g.
+		    DVB-T/T2, ISDB-T, DTMB</para>
+
+<section id="fe-transmit-mode-t">
+<title>enum fe_transmit_mode: Number of carriers per channel</title>
+
+<table pgwide="1" frame="none" id="fe-transmit-mode">
+    <title>enum fe_transmit_mode</title>
+    <tgroup cols="2">
+	&cs-def;
+	<thead>
+	<row>
+	    <entry>ID</entry>
+	    <entry>Description</entry>
+	</row>
+	</thead>
+	<tbody valign="top">
+	<row>
+	    <entry>TRANSMISSION_MODE_AUTO</entry>
+	    <entry>Autodetect transmission mode. The hardware will try to find
+		the correct FFT-size (if capable) to fill in the missing
+		parameters.</entry>
+	</row><row>
+	    <entry>TRANSMISSION_MODE_1K</entry>
+	    <entry>Transmission mode 1K</entry>
+	</row><row>
+	    <entry>TRANSMISSION_MODE_2K</entry>
+	    <entry>Transmission mode 2K</entry>
+	</row><row>
+	    <entry>TRANSMISSION_MODE_8K</entry>
+	    <entry>Transmission mode 8K</entry>
+	</row><row>
+	    <entry>TRANSMISSION_MODE_4K</entry>
+	    <entry>Transmission mode 4K</entry>
+	</row><row>
+	    <entry>TRANSMISSION_MODE_16K</entry>
+	    <entry>Transmission mode 16K</entry>
+	</row><row>
+	    <entry>TRANSMISSION_MODE_32K</entry>
+	    <entry>Transmission mode 32K</entry>
+	</row><row>
+	    <entry>TRANSMISSION_MODE_C1</entry>
+	    <entry>Single Carrier (C=1) transmission mode (DTMB)</entry>
+	</row><row>
+	    <entry>TRANSMISSION_MODE_C3780</entry>
+	    <entry>Multi Carrier (C=3780) transmission mode (DTMB)</entry>
+	</row><row>
+	</row>
+        </tbody>
+    </tgroup>
+</table>
+</section>
+
 
-		<para>Possible values are:</para>
-<programlisting>
-typedef enum fe_transmit_mode {
-	TRANSMISSION_MODE_2K,
-	TRANSMISSION_MODE_8K,
-	TRANSMISSION_MODE_AUTO,
-	TRANSMISSION_MODE_4K,
-	TRANSMISSION_MODE_1K,
-	TRANSMISSION_MODE_16K,
-	TRANSMISSION_MODE_32K,
-	TRANSMISSION_MODE_C1,
-	TRANSMISSION_MODE_C3780,
-} fe_transmit_mode_t;
-</programlisting>
 		<para>Notes:</para>
 		<para>1) ISDB-T supports three carrier/symbol-size: 8K, 4K, 2K. It is called
 			'mode' in the standard: Mode 1 is 2K, mode 2 is 4K, mode 3 is 8K</para>
diff --git a/Documentation/DocBook/media/dvb/frontend.xml b/Documentation/DocBook/media/dvb/frontend.xml
index 77dd88ceeedd..3b6a169ac8f3 100644
--- a/Documentation/DocBook/media/dvb/frontend.xml
+++ b/Documentation/DocBook/media/dvb/frontend.xml
@@ -87,21 +87,6 @@ detection.
 <section>
 <title>More OFDM parameters</title>
 
-<section id="fe-transmit-mode-t">
-<title>Number of carriers per channel</title>
-<programlisting>
-typedef enum fe_transmit_mode {
-	TRANSMISSION_MODE_2K,
-	TRANSMISSION_MODE_8K,
-	TRANSMISSION_MODE_AUTO,
-	TRANSMISSION_MODE_4K,
-	TRANSMISSION_MODE_1K,
-	TRANSMISSION_MODE_16K,
-	TRANSMISSION_MODE_32K,
- } fe_transmit_mode_t;
-</programlisting>
-</section>
-
 <section id="fe-bandwidth-t">
 <title>frontend bandwidth</title>
 <programlisting>
diff --git a/Documentation/DocBook/media/dvb/frontend_legacy_api.xml b/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
index fe1117e91f51..fa0c6649abfd 100644
--- a/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
+++ b/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
@@ -139,7 +139,7 @@ struct dvb_vsb_parameters {
 	 fe_code_rate_t      code_rate_HP;  /&#x22C6; high priority stream code rate &#x22C6;/
 	 fe_code_rate_t      code_rate_LP;  /&#x22C6; low priority stream code rate &#x22C6;/
 	 &fe-modulation-t;     constellation; /&#x22C6; modulation type (see above) &#x22C6;/
-	 fe_transmit_mode_t  transmission_mode;
+	 &fe-transmit-mode-t;  transmission_mode;
 	 fe_guard_interval_t guard_interval;
 	 fe_hierarchy_t      hierarchy_information;
  };
diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index 223905563676..c42e6d849f52 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -198,7 +198,7 @@ enum fe_modulation {
 
 typedef enum fe_modulation fe_modulation_t;
 
-typedef enum fe_transmit_mode {
+enum fe_transmit_mode {
 	TRANSMISSION_MODE_2K,
 	TRANSMISSION_MODE_8K,
 	TRANSMISSION_MODE_AUTO,
@@ -208,7 +208,9 @@ typedef enum fe_transmit_mode {
 	TRANSMISSION_MODE_32K,
 	TRANSMISSION_MODE_C1,
 	TRANSMISSION_MODE_C3780,
-} fe_transmit_mode_t;
+};
+
+typedef enum fe_transmit_mode fe_transmit_mode_t;
 
 #if defined(__DVB_CORE__) || !defined (__KERNEL__)
 typedef enum fe_bandwidth {
-- 
cgit 


From b174fb71e82eef2355aabece4b50fe1540e67544 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Thu, 28 May 2015 20:57:53 -0300
Subject: [media] DocBook: move fe_bandwidth to the frontend legacy section

fe_bandwidth/fe_bandwidth_t is used only on DVBv3 API. So, move
it to the frontend legacy xml, and convert it into a table.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/dvb/frontend.xml       | 15 --------
 .../DocBook/media/dvb/frontend_legacy_api.xml      | 44 +++++++++++++++++++++-
 include/uapi/linux/dvb/frontend.h                  |  6 ++-
 3 files changed, 47 insertions(+), 18 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/dvb/frontend.xml b/Documentation/DocBook/media/dvb/frontend.xml
index 3b6a169ac8f3..93d22486f20c 100644
--- a/Documentation/DocBook/media/dvb/frontend.xml
+++ b/Documentation/DocBook/media/dvb/frontend.xml
@@ -87,21 +87,6 @@ detection.
 <section>
 <title>More OFDM parameters</title>
 
-<section id="fe-bandwidth-t">
-<title>frontend bandwidth</title>
-<programlisting>
-typedef enum fe_bandwidth {
-	BANDWIDTH_8_MHZ,
-	BANDWIDTH_7_MHZ,
-	BANDWIDTH_6_MHZ,
-	BANDWIDTH_AUTO,
-	BANDWIDTH_5_MHZ,
-	BANDWIDTH_10_MHZ,
-	BANDWIDTH_1_712_MHZ,
-} fe_bandwidth_t;
-</programlisting>
-</section>
-
 <section id="fe-guard-interval-t">
 <title>frontend guard inverval</title>
 <programlisting>
diff --git a/Documentation/DocBook/media/dvb/frontend_legacy_api.xml b/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
index fa0c6649abfd..ed393f22f7a7 100644
--- a/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
+++ b/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
@@ -60,6 +60,48 @@ supported via the new <link linkend="FE_GET_PROPERTY">FE_GET_PROPERTY/FE_GET_SET
     using the &DTV-DELIVERY-SYSTEM; property.</para>
 </section>
 
+<section id="fe-bandwidth-t">
+<title>Frontend bandwidth</title>
+
+<table pgwide="1" frame="none" id="fe-bandwidth">
+    <title>enum fe_bandwidth</title>
+    <tgroup cols="2">
+	&cs-def;
+	<thead>
+	<row>
+	    <entry>ID</entry>
+	    <entry>Description</entry>
+	</row>
+	</thead>
+	<tbody valign="top">
+	<row>
+	    <entry>BANDWIDTH_AUTO</entry>
+	    <entry>Autodetect bandwidth (if supported)</entry>
+	</row><row>
+	    <entry>BANDWIDTH_1_712_MHZ</entry>
+	    <entry>1.712 MHz</entry>
+	</row><row>
+	    <entry>BANDWIDTH_5_MHZ</entry>
+	    <entry>5 MHz</entry>
+	</row><row>
+	    <entry>BANDWIDTH_6_MHZ</entry>
+	    <entry>6 MHz</entry>
+	</row><row>
+	    <entry>BANDWIDTH_7_MHZ</entry>
+	    <entry>7 MHz</entry>
+	</row><row>
+	    <entry>BANDWIDTH_8_MHZ</entry>
+	    <entry>8 MHz</entry>
+	</row><row>
+	    <entry>BANDWIDTH_10_MHZ</entry>
+	    <entry>10 MHz</entry>
+	</row><row>
+	</row>
+        </tbody>
+    </tgroup>
+</table>
+
+</section>
 
 <section id="dvb-frontend-parameters">
 <title>frontend parameters</title>
@@ -135,7 +177,7 @@ struct dvb_vsb_parameters {
 <para>DVB-T frontends are supported by the <constant>dvb_ofdm_parameters</constant> structure:</para>
 <programlisting>
  struct dvb_ofdm_parameters {
-	 fe_bandwidth_t      bandwidth;
+	 &fe-bandwidth-t;      bandwidth;
 	 fe_code_rate_t      code_rate_HP;  /&#x22C6; high priority stream code rate &#x22C6;/
 	 fe_code_rate_t      code_rate_LP;  /&#x22C6; low priority stream code rate &#x22C6;/
 	 &fe-modulation-t;     constellation; /&#x22C6; modulation type (see above) &#x22C6;/
diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index c42e6d849f52..43e6faf91849 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -213,7 +213,7 @@ enum fe_transmit_mode {
 typedef enum fe_transmit_mode fe_transmit_mode_t;
 
 #if defined(__DVB_CORE__) || !defined (__KERNEL__)
-typedef enum fe_bandwidth {
+enum fe_bandwidth {
 	BANDWIDTH_8_MHZ,
 	BANDWIDTH_7_MHZ,
 	BANDWIDTH_6_MHZ,
@@ -221,7 +221,9 @@ typedef enum fe_bandwidth {
 	BANDWIDTH_5_MHZ,
 	BANDWIDTH_10_MHZ,
 	BANDWIDTH_1_712_MHZ,
-} fe_bandwidth_t;
+};
+
+typedef enum fe_bandwidth fe_bandwidth_t;
 #endif
 
 typedef enum fe_guard_interval {
-- 
cgit 


From 2d457b8a9054b9c5b1fcfbc5702b7d0e9f6cda2b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Thu, 28 May 2015 21:38:44 -0300
Subject: [media] DocBook: improve documentation for FEC fields

Format it as a table and add more details. Also, remove the
duplicated occurrences.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/dvb/dvbproperty.xml    | 123 ++++++++++++---------
 Documentation/DocBook/media/dvb/frontend.xml       |  28 -----
 .../DocBook/media/dvb/frontend_legacy_api.xml      |   8 +-
 include/uapi/linux/dvb/frontend.h                  |   6 +-
 4 files changed, 78 insertions(+), 87 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/dvb/dvbproperty.xml b/Documentation/DocBook/media/dvb/dvbproperty.xml
index 06a12f1c57c5..b96a91a1494d 100644
--- a/Documentation/DocBook/media/dvb/dvbproperty.xml
+++ b/Documentation/DocBook/media/dvb/dvbproperty.xml
@@ -290,25 +290,70 @@ get/set up to 64 properties. The actual meaning of each property is described on
 	<title><constant>DTV_INNER_FEC</constant></title>
 	<para>Used cable/satellite transmissions. The acceptable values are:
 	</para>
-	<programlisting>
-typedef enum fe_code_rate {
-	FEC_NONE = 0,
-	FEC_1_2,
-	FEC_2_3,
-	FEC_3_4,
-	FEC_4_5,
-	FEC_5_6,
-	FEC_6_7,
-	FEC_7_8,
-	FEC_8_9,
-	FEC_AUTO,
-	FEC_3_5,
-	FEC_9_10,
-	FEC_2_5,
-} fe_code_rate_t;
-	</programlisting>
-	<para>which correspond to error correction rates of 1/2, 2/3, etc.,
-	no error correction or auto detection.</para>
+<section id="fe-code-rate-t">
+<title>enum fe_code_rate: type of the Forward Error Correction.</title>
+
+<table pgwide="1" frame="none" id="fe-code-rate">
+    <title>enum fe_code_rate</title>
+    <tgroup cols="2">
+	&cs-def;
+	<thead>
+	<row>
+	    <entry>ID</entry>
+	    <entry>Description</entry>
+	</row>
+	</thead>
+	<tbody valign="top">
+	<row>
+	    <entry>TRANSMISSION_MODE_AUTO</entry>
+	    <entry>Autodetect transmission mode. The hardware will try to find
+		the correct FFT-size (if capable) to fill in the missing
+		parameters.</entry>
+	</row><row>
+	    <entry>FEC_NONE</entry>
+	    <entry>No Forward Error Correction Code</entry>
+	</row><row>
+	    <entry>FEC_AUTO</entry>
+	    <entry>Autodetect Error Correction Code</entry>
+	</row><row>
+	    <entry>FEC_1_2</entry>
+	    <entry>Forward Error Correction Code 1/2</entry>
+	</row><row>
+	    <entry>FEC_2_3</entry>
+	    <entry>Forward Error Correction Code 2/3</entry>
+	</row><row>
+	    <entry>FEC_3_4</entry>
+	    <entry>Forward Error Correction Code 3/4</entry>
+	</row><row>
+	    <entry>FEC_4_5</entry>
+	    <entry>Forward Error Correction Code 4/5</entry>
+	</row><row>
+	    <entry>FEC_5_6</entry>
+	    <entry>Forward Error Correction Code 5/6</entry>
+	</row><row>
+	    <entry>FEC_6_7</entry>
+	    <entry>Forward Error Correction Code 6/7</entry>
+	</row><row>
+	    <entry>FEC_7_8</entry>
+	    <entry>Forward Error Correction Code 7/8</entry>
+	</row><row>
+	    <entry>FEC_8_9</entry>
+	    <entry>Forward Error Correction Code 8/9</entry>
+	</row><row>
+	    <entry>FEC_9_10</entry>
+	    <entry>Forward Error Correction Code 9/10</entry>
+	</row><row>
+	    <entry>FEC_2_5</entry>
+	    <entry>Forward Error Correction Code 2/5</entry>
+	</row><row>
+	    <entry>FEC_3_5</entry>
+	    <entry>Forward Error Correction Code 3/5</entry>
+	</row><row>
+	</row>
+        </tbody>
+    </tgroup>
+</table>
+</section>
 	</section>
 	<section id="DTV-VOLTAGE">
 	<title><constant>DTV_VOLTAGE</constant></title>
@@ -757,46 +802,18 @@ typedef enum atscmh_sccc_code_mode {
 	</section>
 	<section id="DTV-CODE-RATE-HP">
 	<title><constant>DTV_CODE_RATE_HP</constant></title>
-	<para>Used on terrestrial transmissions. The acceptable values are:
+	<para>Used on terrestrial transmissions.  The acceptable values are
+	    the ones described at &fe-transmit-mode-t;.
 	</para>
-	<programlisting>
-typedef enum fe_code_rate {
-	FEC_NONE = 0,
-	FEC_1_2,
-	FEC_2_3,
-	FEC_3_4,
-	FEC_4_5,
-	FEC_5_6,
-	FEC_6_7,
-	FEC_7_8,
-	FEC_8_9,
-	FEC_AUTO,
-	FEC_3_5,
-	FEC_9_10,
-} fe_code_rate_t;
-	</programlisting>
 	</section>
 	<section id="DTV-CODE-RATE-LP">
 	<title><constant>DTV_CODE_RATE_LP</constant></title>
-	<para>Used on terrestrial transmissions. The acceptable values are:
+	<para>Used on terrestrial transmissions. The acceptable values are
+	    the ones described at &fe-transmit-mode-t;.
 	</para>
-	<programlisting>
-typedef enum fe_code_rate {
-	FEC_NONE = 0,
-	FEC_1_2,
-	FEC_2_3,
-	FEC_3_4,
-	FEC_4_5,
-	FEC_5_6,
-	FEC_6_7,
-	FEC_7_8,
-	FEC_8_9,
-	FEC_AUTO,
-	FEC_3_5,
-	FEC_9_10,
-} fe_code_rate_t;
-	</programlisting>
+
 	</section>
+
 	<section id="DTV-GUARD-INTERVAL">
 		<title><constant>DTV_GUARD_INTERVAL</constant></title>
 
diff --git a/Documentation/DocBook/media/dvb/frontend.xml b/Documentation/DocBook/media/dvb/frontend.xml
index 93d22486f20c..563800eb1216 100644
--- a/Documentation/DocBook/media/dvb/frontend.xml
+++ b/Documentation/DocBook/media/dvb/frontend.xml
@@ -56,34 +56,6 @@ specification is available at
 
 &sub-dvbproperty;
 
-<section id="fe-code-rate-t">
-<title>frontend code rate</title>
-<para>The possible values for the <constant>fec_inner</constant> field used on
-<link linkend="dvb-qpsk-parameters"><constant>struct dvb_qpsk_parameters</constant></link> and
-<link linkend="dvb-qam-parameters"><constant>struct dvb_qam_parameters</constant></link> are:
-</para>
-<programlisting>
-typedef enum fe_code_rate {
-	FEC_NONE = 0,
-	FEC_1_2,
-	FEC_2_3,
-	FEC_3_4,
-	FEC_4_5,
-	FEC_5_6,
-	FEC_6_7,
-	FEC_7_8,
-	FEC_8_9,
-	FEC_AUTO,
-	FEC_3_5,
-	FEC_9_10,
-} fe_code_rate_t;
-</programlisting>
-<para>which correspond to error correction rates of 1/2, 2/3, etc., no error correction or auto
-detection.
-</para>
-</section>
-
-
 <section>
 <title>More OFDM parameters</title>
 
diff --git a/Documentation/DocBook/media/dvb/frontend_legacy_api.xml b/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
index ed393f22f7a7..c1dfbd8096bd 100644
--- a/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
+++ b/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
@@ -145,7 +145,7 @@ OFDM frontends the <constant>frequency</constant> specifies the absolute frequen
 <programlisting>
  struct dvb_qpsk_parameters {
 	 uint32_t        symbol_rate;  /&#x22C6; symbol rate in Symbols per second &#x22C6;/
-	 fe_code_rate_t  fec_inner;    /&#x22C6; forward error correction (see above) &#x22C6;/
+	 &fe-code-rate-t;  fec_inner;    /&#x22C6; forward error correction (see above) &#x22C6;/
  };
 </programlisting>
 </section>
@@ -156,7 +156,7 @@ OFDM frontends the <constant>frequency</constant> specifies the absolute frequen
 <programlisting>
  struct dvb_qam_parameters {
 	 uint32_t         symbol_rate; /&#x22C6; symbol rate in Symbols per second &#x22C6;/
-	 fe_code_rate_t   fec_inner;   /&#x22C6; forward error correction (see above) &#x22C6;/
+	 &fe-code-rate-t;   fec_inner;   /&#x22C6; forward error correction (see above) &#x22C6;/
 	 &fe-modulation-t;  modulation;  /&#x22C6; modulation type (see above) &#x22C6;/
  };
 </programlisting>
@@ -178,8 +178,8 @@ struct dvb_vsb_parameters {
 <programlisting>
  struct dvb_ofdm_parameters {
 	 &fe-bandwidth-t;      bandwidth;
-	 fe_code_rate_t      code_rate_HP;  /&#x22C6; high priority stream code rate &#x22C6;/
-	 fe_code_rate_t      code_rate_LP;  /&#x22C6; low priority stream code rate &#x22C6;/
+	 &fe-code-rate-t;      code_rate_HP;  /&#x22C6; high priority stream code rate &#x22C6;/
+	 &fe-code-rate-t;      code_rate_LP;  /&#x22C6; low priority stream code rate &#x22C6;/
 	 &fe-modulation-t;     constellation; /&#x22C6; modulation type (see above) &#x22C6;/
 	 &fe-transmit-mode-t;  transmission_mode;
 	 fe_guard_interval_t guard_interval;
diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index 43e6faf91849..49f6e980125b 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -162,7 +162,7 @@ enum fe_spectral_inversion {
 
 typedef enum fe_spectral_inversion fe_spectral_inversion_t;
 
-typedef enum fe_code_rate {
+enum fe_code_rate {
 	FEC_NONE = 0,
 	FEC_1_2,
 	FEC_2_3,
@@ -176,7 +176,9 @@ typedef enum fe_code_rate {
 	FEC_3_5,
 	FEC_9_10,
 	FEC_2_5,
-} fe_code_rate_t;
+};
+
+typedef enum fe_code_rate fe_code_rate_t;
 
 
 enum fe_modulation {
-- 
cgit 


From 903142e53c648ee61c00f5c3b420b16bc6336ad7 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Thu, 28 May 2015 22:01:41 -0300
Subject: [media] DocBook: improve documentation for guard interval

Format it as a table and add more details, in special for
DTMB guard intervals.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/dvb/dvbproperty.xml    | 68 +++++++++++++++++-----
 Documentation/DocBook/media/dvb/frontend.xml       | 16 -----
 .../DocBook/media/dvb/frontend_legacy_api.xml      |  2 +-
 include/uapi/linux/dvb/frontend.h                  |  5 +-
 4 files changed, 57 insertions(+), 34 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/dvb/dvbproperty.xml b/Documentation/DocBook/media/dvb/dvbproperty.xml
index b96a91a1494d..5f30a28a15b0 100644
--- a/Documentation/DocBook/media/dvb/dvbproperty.xml
+++ b/Documentation/DocBook/media/dvb/dvbproperty.xml
@@ -818,21 +818,59 @@ typedef enum atscmh_sccc_code_mode {
 		<title><constant>DTV_GUARD_INTERVAL</constant></title>
 
 		<para>Possible values are:</para>
-<programlisting>
-typedef enum fe_guard_interval {
-	GUARD_INTERVAL_1_32,
-	GUARD_INTERVAL_1_16,
-	GUARD_INTERVAL_1_8,
-	GUARD_INTERVAL_1_4,
-	GUARD_INTERVAL_AUTO,
-	GUARD_INTERVAL_1_128,
-	GUARD_INTERVAL_19_128,
-	GUARD_INTERVAL_19_256,
-	GUARD_INTERVAL_PN420,
-	GUARD_INTERVAL_PN595,
-	GUARD_INTERVAL_PN945,
-} fe_guard_interval_t;
-</programlisting>
+
+<section id="fe-guard-interval-t">
+<title>Modulation guard interval</title>
+
+<table pgwide="1" frame="none" id="fe-guard-interval">
+    <title>enum fe_guard_interval</title>
+    <tgroup cols="2">
+	&cs-def;
+	<thead>
+	<row>
+	    <entry>ID</entry>
+	    <entry>Description</entry>
+	</row>
+	</thead>
+	<tbody valign="top">
+	<row>
+	    <entry>GUARD_INTERVAL_AUTO</entry>
+	    <entry>Autodetect the guard interval</entry>
+	</row><row>
+	    <entry>GUARD_INTERVAL_1_128</entry>
+	    <entry>Guard interval 1/128</entry>
+	</row><row>
+	    <entry>GUARD_INTERVAL_1_32</entry>
+	    <entry>Guard interval 1/32</entry>
+	</row><row>
+	    <entry>GUARD_INTERVAL_1_16</entry>
+	    <entry>Guard interval 1/16</entry>
+	</row><row>
+	    <entry>GUARD_INTERVAL_1_8</entry>
+	    <entry>Guard interval 1/8</entry>
+	</row><row>
+	    <entry>GUARD_INTERVAL_1_4</entry>
+	    <entry>Guard interval 1/4</entry>
+	</row><row>
+	    <entry>GUARD_INTERVAL_19_128</entry>
+	    <entry>Guard interval 19/128</entry>
+	</row><row>
+	    <entry>GUARD_INTERVAL_19_256</entry>
+	    <entry>Guard interval 19/256</entry>
+	</row><row>
+	    <entry>GUARD_INTERVAL_PN420</entry>
+	    <entry>PN length 420 (1/4)</entry>
+	</row><row>
+	    <entry>GUARD_INTERVAL_PN595</entry>
+	    <entry>PN length 595 (1/6)</entry>
+	</row><row>
+	    <entry>GUARD_INTERVAL_PN945</entry>
+	    <entry>PN length 945 (1/9)</entry>
+	</row>
+        </tbody>
+    </tgroup>
+</table>
+</section>
 
 		<para>Notes:</para>
 		<para>1) If <constant>DTV_GUARD_INTERVAL</constant> is set the <constant>GUARD_INTERVAL_AUTO</constant> the hardware will
diff --git a/Documentation/DocBook/media/dvb/frontend.xml b/Documentation/DocBook/media/dvb/frontend.xml
index 563800eb1216..a005c4b472f5 100644
--- a/Documentation/DocBook/media/dvb/frontend.xml
+++ b/Documentation/DocBook/media/dvb/frontend.xml
@@ -59,22 +59,6 @@ specification is available at
 <section>
 <title>More OFDM parameters</title>
 
-<section id="fe-guard-interval-t">
-<title>frontend guard inverval</title>
-<programlisting>
-typedef enum fe_guard_interval {
-	GUARD_INTERVAL_1_32,
-	GUARD_INTERVAL_1_16,
-	GUARD_INTERVAL_1_8,
-	GUARD_INTERVAL_1_4,
-	GUARD_INTERVAL_AUTO,
-	GUARD_INTERVAL_1_128,
-	GUARD_INTERVAL_19_128,
-	GUARD_INTERVAL_19_256,
-} fe_guard_interval_t;
-</programlisting>
-</section>
-
 <section id="fe-hierarchy-t">
 <title>frontend hierarchy</title>
 <programlisting>
diff --git a/Documentation/DocBook/media/dvb/frontend_legacy_api.xml b/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
index c1dfbd8096bd..d20f1fd75fa9 100644
--- a/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
+++ b/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
@@ -182,7 +182,7 @@ struct dvb_vsb_parameters {
 	 &fe-code-rate-t;      code_rate_LP;  /&#x22C6; low priority stream code rate &#x22C6;/
 	 &fe-modulation-t;     constellation; /&#x22C6; modulation type (see above) &#x22C6;/
 	 &fe-transmit-mode-t;  transmission_mode;
-	 fe_guard_interval_t guard_interval;
+	 &fe-guard-interval-t; guard_interval;
 	 fe_hierarchy_t      hierarchy_information;
  };
 </programlisting>
diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index 49f6e980125b..1d2b7c6dee04 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -228,7 +228,7 @@ enum fe_bandwidth {
 typedef enum fe_bandwidth fe_bandwidth_t;
 #endif
 
-typedef enum fe_guard_interval {
+enum fe_guard_interval {
 	GUARD_INTERVAL_1_32,
 	GUARD_INTERVAL_1_16,
 	GUARD_INTERVAL_1_8,
@@ -240,8 +240,9 @@ typedef enum fe_guard_interval {
 	GUARD_INTERVAL_PN420,
 	GUARD_INTERVAL_PN595,
 	GUARD_INTERVAL_PN945,
-} fe_guard_interval_t;
+};
 
+typedef enum fe_guard_interval fe_guard_interval_t;
 
 typedef enum fe_hierarchy {
 	HIERARCHY_NONE,
-- 
cgit 


From 9df4fc5b8f34383d116a160809e782b4ca50a808 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Thu, 28 May 2015 22:06:56 -0300
Subject: [media] DocBook: improve documentation for hierarchy

Format it as a table and links it with the legacy API xml.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/dvb/dvbproperty.xml    | 46 +++++++++++++++++-----
 Documentation/DocBook/media/dvb/frontend.xml       | 18 ---------
 .../DocBook/media/dvb/frontend_legacy_api.xml      |  2 +-
 include/uapi/linux/dvb/frontend.h                  |  6 ++-
 4 files changed, 42 insertions(+), 30 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/dvb/dvbproperty.xml b/Documentation/DocBook/media/dvb/dvbproperty.xml
index 5f30a28a15b0..ae9bc1e089cc 100644
--- a/Documentation/DocBook/media/dvb/dvbproperty.xml
+++ b/Documentation/DocBook/media/dvb/dvbproperty.xml
@@ -951,15 +951,43 @@ typedef enum atscmh_sccc_code_mode {
 	<section id="DTV-HIERARCHY">
 	<title><constant>DTV_HIERARCHY</constant></title>
 	<para>Frontend hierarchy</para>
-	<programlisting>
-typedef enum fe_hierarchy {
-	 HIERARCHY_NONE,
-	 HIERARCHY_1,
-	 HIERARCHY_2,
-	 HIERARCHY_4,
-	 HIERARCHY_AUTO
- } fe_hierarchy_t;
-	</programlisting>
+
+
+<section id="fe-hierarchy-t">
+<title>Frontend hierarchy</title>
+
+<table pgwide="1" frame="none" id="fe-hierarchy">
+    <title>enum fe_hierarchy</title>
+    <tgroup cols="2">
+	&cs-def;
+	<thead>
+	<row>
+	    <entry>ID</entry>
+	    <entry>Description</entry>
+	</row>
+	</thead>
+	<tbody valign="top">
+	<row>
+	     <entry>HIERARCHY_NONE</entry>
+	    <entry>No hierarchy</entry>
+	</row><row>
+	     <entry>HIERARCHY_AUTO</entry>
+	    <entry>Autodetect hierarchy (if supported)</entry>
+	</row><row>
+	     <entry>HIERARCHY_1</entry>
+	    <entry>Hierarchy 1</entry>
+	</row><row>
+	     <entry>HIERARCHY_2</entry>
+	    <entry>Hierarchy 2</entry>
+	</row><row>
+	     <entry>HIERARCHY_4</entry>
+	    <entry>Hierarchy 4</entry>
+	</row>
+        </tbody>
+    </tgroup>
+</table>
+</section>
+
 	</section>
 	<section id="DTV-STREAM-ID">
 	<title><constant>DTV_STREAM_ID</constant></title>
diff --git a/Documentation/DocBook/media/dvb/frontend.xml b/Documentation/DocBook/media/dvb/frontend.xml
index a005c4b472f5..d81b3ff33295 100644
--- a/Documentation/DocBook/media/dvb/frontend.xml
+++ b/Documentation/DocBook/media/dvb/frontend.xml
@@ -56,24 +56,6 @@ specification is available at
 
 &sub-dvbproperty;
 
-<section>
-<title>More OFDM parameters</title>
-
-<section id="fe-hierarchy-t">
-<title>frontend hierarchy</title>
-<programlisting>
-typedef enum fe_hierarchy {
-	 HIERARCHY_NONE,
-	 HIERARCHY_1,
-	 HIERARCHY_2,
-	 HIERARCHY_4,
-	 HIERARCHY_AUTO
- } fe_hierarchy_t;
-</programlisting>
-</section>
-
-</section>
-
 <section id="frontend_fcalls">
 <title>Frontend Function Calls</title>
 
diff --git a/Documentation/DocBook/media/dvb/frontend_legacy_api.xml b/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
index d20f1fd75fa9..cb2e18381305 100644
--- a/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
+++ b/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
@@ -183,7 +183,7 @@ struct dvb_vsb_parameters {
 	 &fe-modulation-t;     constellation; /&#x22C6; modulation type (see above) &#x22C6;/
 	 &fe-transmit-mode-t;  transmission_mode;
 	 &fe-guard-interval-t; guard_interval;
-	 fe_hierarchy_t      hierarchy_information;
+	 &fe-hierarchy-t;      hierarchy_information;
  };
 </programlisting>
 </section>
diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index 1d2b7c6dee04..3a7ff9002654 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -244,13 +244,15 @@ enum fe_guard_interval {
 
 typedef enum fe_guard_interval fe_guard_interval_t;
 
-typedef enum fe_hierarchy {
+enum fe_hierarchy {
 	HIERARCHY_NONE,
 	HIERARCHY_1,
 	HIERARCHY_2,
 	HIERARCHY_4,
 	HIERARCHY_AUTO
-} fe_hierarchy_t;
+};
+
+typedef enum fe_hierarchy fe_hierarchy_t;
 
 enum fe_interleaving {
 	INTERLEAVING_NONE,
-- 
cgit 


From 2e5e435fb4fdcc64db49e903baddb1ea8827385e Mon Sep 17 00:00:00 2001
From: Ricardo Ribalda <ricardo.ribalda@gmail.com>
Date: Mon, 4 May 2015 05:07:30 -0300
Subject: [media] media/v4l2-core: Add support for V4L2_PIX_FMT_Y16_BE

16 bit greyscale format, structured in Big Endian. Such a format can be
converted into a PMN image just by adding a header.

Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 drivers/media/v4l2-core/v4l2-ioctl.c | 3 ++-
 include/uapi/linux/videodev2.h       | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index 14766029bf49..c5677e4030bb 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -1146,6 +1146,7 @@ static void v4l_fill_fmtdesc(struct v4l2_fmtdesc *fmt)
 	case V4L2_PIX_FMT_Y10:		descr = "10-bit Greyscale"; break;
 	case V4L2_PIX_FMT_Y12:		descr = "12-bit Greyscale"; break;
 	case V4L2_PIX_FMT_Y16:		descr = "16-bit Greyscale"; break;
+	case V4L2_PIX_FMT_Y16_BE:	descr = "16-bit Greyscale BE"; break;
 	case V4L2_PIX_FMT_Y10BPACK:	descr = "10-bit Greyscale (Packed)"; break;
 	case V4L2_PIX_FMT_PAL8:		descr = "8-bit Palette"; break;
 	case V4L2_PIX_FMT_UV8:		descr = "8-bit Chrominance UV 4-4"; break;
@@ -2539,7 +2540,7 @@ static long __video_do_ioctl(struct file *file,
 	if (v4l2_is_known_ioctl(cmd)) {
 		info = &v4l2_ioctls[_IOC_NR(cmd)];
 
-	        if (!test_bit(_IOC_NR(cmd), vfd->valid_ioctls) &&
+		if (!test_bit(_IOC_NR(cmd), vfd->valid_ioctls) &&
 		    !((info->flags & INFO_FL_CTRL) && vfh && vfh->ctrl_handler))
 			goto done;
 
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 0f5a4673f3e4..bda496adb50b 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -404,6 +404,7 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_Y10     v4l2_fourcc('Y', '1', '0', ' ') /* 10  Greyscale     */
 #define V4L2_PIX_FMT_Y12     v4l2_fourcc('Y', '1', '2', ' ') /* 12  Greyscale     */
 #define V4L2_PIX_FMT_Y16     v4l2_fourcc('Y', '1', '6', ' ') /* 16  Greyscale     */
+#define V4L2_PIX_FMT_Y16_BE  v4l2_fourcc_be('Y', '1', '6', ' ') /* 16  Greyscale BE  */
 
 /* Grey bit-packed formats */
 #define V4L2_PIX_FMT_Y10BPACK    v4l2_fourcc('Y', '1', '0', 'B') /* 10  Greyscale bit-packed */
-- 
cgit 


From e01dfc01914ab9a078ca8d08287c19c6663b5438 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Fri, 15 May 2015 09:29:05 -0300
Subject: [media] videodev2.h: add COLORSPACE_DEFAULT

V4L2_COLORSPACE_DEFAULT is added so we have a specific define for
the default case where applications do not set it but leave it to 0.
In that case the driver will set the colorspace based on what it
captures.

This is already used, but we never had a define for the value 0.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/videodev2.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index bda496adb50b..c5e89ab21cd9 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -178,6 +178,12 @@ enum v4l2_memory {
 
 /* see also http://vektor.theorem.ca/graphics/ycbcr/ */
 enum v4l2_colorspace {
+	/*
+	 * Default colorspace, i.e. let the driver figure it out.
+	 * Can only be used with video capture.
+	 */
+	V4L2_COLORSPACE_DEFAULT       = 0,
+
 	/* SMPTE 170M: used for broadcast NTSC/PAL SDTV */
 	V4L2_COLORSPACE_SMPTE170M     = 1,
 
-- 
cgit 


From addad1050827136e4f8d22c5c81df42f88f44651 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Fri, 15 May 2015 09:29:07 -0300
Subject: [media] videodev2.h: add COLORSPACE_RAW

V4L2_COLORSPACE_RAW is added for raw image formats where the picture
is minimally processed and is in the internal colorspace of the sensor.

This is typically used in digital cameras where the image processing is
done later.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/videodev2.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index c5e89ab21cd9..81045aaabec9 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -226,6 +226,9 @@ enum v4l2_colorspace {
 
 	/* BT.2020 colorspace, used for UHDTV. */
 	V4L2_COLORSPACE_BT2020        = 10,
+
+	/* Raw colorspace: for RAW unprocessed images */
+	V4L2_COLORSPACE_RAW           = 11,
 };
 
 enum v4l2_ycbcr_encoding {
-- 
cgit 


From 3818c4da43af9e67fed66174cc25f8fce4043d99 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Fri, 15 May 2015 09:29:09 -0300
Subject: [media] videodev2.h: add macros to map colorspace defaults

The mapping of COLORSPACE_DEFAULT, YCBCR_ENC_DEFAULT or QUANTIZATION_DEFAULT
to proper non-default values is fairly complex, and it is something that
needs to be done both in the kernel and in userspace.

So add macros that can do this conversion, making this available to both
kernel and userspace.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/videodev2.h | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 81045aaabec9..003a91292a4f 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -231,6 +231,15 @@ enum v4l2_colorspace {
 	V4L2_COLORSPACE_RAW           = 11,
 };
 
+/*
+ * Determine how COLORSPACE_DEFAULT should map to a proper colorspace.
+ * This depends on whether this is a SDTV image (use SMPTE 170M), an
+ * HDTV image (use Rec. 709), or something else (use sRGB).
+ */
+#define V4L2_MAP_COLORSPACE_DEFAULT(is_sdtv, is_hdtv) \
+	((is_sdtv) ? V4L2_COLORSPACE_SMPTE170M : \
+	 ((is_hdtv) ? V4L2_COLORSPACE_REC709 : V4L2_COLORSPACE_SRGB))
+
 enum v4l2_ycbcr_encoding {
 	/*
 	 * Mapping of V4L2_YCBCR_ENC_DEFAULT to actual encodings for the
@@ -275,6 +284,16 @@ enum v4l2_ycbcr_encoding {
 	V4L2_YCBCR_ENC_SMPTE240M      = 8,
 };
 
+/*
+ * Determine how YCBCR_ENC_DEFAULT should map to a proper Y'CbCr encoding.
+ * This depends on the colorspace.
+ */
+#define V4L2_MAP_YCBCR_ENC_DEFAULT(colsp) \
+	((colsp) == V4L2_COLORSPACE_REC709 ? V4L2_YCBCR_ENC_709 : \
+	 ((colsp) == V4L2_COLORSPACE_BT2020 ? V4L2_YCBCR_ENC_BT2020 : \
+	  ((colsp) == V4L2_COLORSPACE_SMPTE240M ? V4L2_YCBCR_ENC_SMPTE240M : \
+	   V4L2_YCBCR_ENC_601)))
+
 enum v4l2_quantization {
 	/*
 	 * The default for R'G'B' quantization is always full range, except
@@ -287,6 +306,17 @@ enum v4l2_quantization {
 	V4L2_QUANTIZATION_LIM_RANGE   = 2,
 };
 
+/*
+ * Determine how QUANTIZATION_DEFAULT should map to a proper quantization.
+ * This depends on whether the image is RGB or not, the colorspace and the
+ * Y'CbCr encoding.
+ */
+#define V4L2_MAP_QUANTIZATION_DEFAULT(is_rgb, colsp, ycbcr_enc) \
+	(((is_rgb) && (colsp) == V4L2_COLORSPACE_BT2020) ? V4L2_QUANTIZATION_LIM_RANGE : \
+	 (((is_rgb) || (ycbcr_enc) == V4L2_YCBCR_ENC_XV601 || \
+	  (ycbcr_enc) == V4L2_YCBCR_ENC_XV709 || (colsp) == V4L2_COLORSPACE_JPEG) ? \
+	 V4L2_QUANTIZATION_FULL_RANGE : V4L2_QUANTIZATION_LIM_RANGE))
+
 enum v4l2_priority {
 	V4L2_PRIORITY_UNSET       = 0,  /* not initialized */
 	V4L2_PRIORITY_BACKGROUND  = 1,
-- 
cgit 


From 37e82c2f974b72c9ab49c787ef7b5bb1aec12768 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Wed, 27 May 2015 15:30:39 -0700
Subject: bpf: allow BPF programs access skb->skb_iif and skb->dev->ifindex
 fields

classic BPF already exposes skb->dev->ifindex via SKF_AD_IFINDEX extension.
Allow eBPF program to access it as well. Note that classic aborts execution
of the program if 'skb->dev == NULL' (which is inconvenient for program
writers), whereas eBPF returns zero in such case.
Also expose the 'skb_iif' field, since programs triggered by redirected
packet need to known the original interface index.
Summary:
__skb->ifindex         -> skb->dev->ifindex
__skb->ingress_ifindex -> skb->skb_iif

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h |  2 ++
 net/core/filter.c        | 18 ++++++++++++++++++
 2 files changed, 20 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f0a9af8b4dae..72f3080afa1e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -236,6 +236,8 @@ struct __sk_buff {
 	__u32 vlan_tci;
 	__u32 vlan_proto;
 	__u32 priority;
+	__u32 ingress_ifindex;
+	__u32 ifindex;
 };
 
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/net/core/filter.c b/net/core/filter.c
index 3adcca6f17a4..2c30d6632d66 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1499,6 +1499,24 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
 				      offsetof(struct sk_buff, priority));
 		break;
 
+	case offsetof(struct __sk_buff, ingress_ifindex):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, skb_iif) != 4);
+
+		*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+				      offsetof(struct sk_buff, skb_iif));
+		break;
+
+	case offsetof(struct __sk_buff, ifindex):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4);
+
+		*insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)),
+				      dst_reg, src_reg,
+				      offsetof(struct sk_buff, dev));
+		*insn++ = BPF_JMP_IMM(BPF_JEQ, dst_reg, 0, 1);
+		*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, dst_reg,
+				      offsetof(struct net_device, ifindex));
+		break;
+
 	case offsetof(struct __sk_buff, mark):
 		return convert_skb_access(SKF_AD_MARK, dst_reg, src_reg, insn);
 
-- 
cgit 


From 83caf9896a48de5f838227162c25de405c61ea9d Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Thu, 14 May 2015 14:40:02 +0200
Subject: i2c: add FUNC flag for slave capabilities

So users can check in advance if there is slave support.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 include/uapi/linux/i2c.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/i2c.h b/include/uapi/linux/i2c.h
index 0e949cbfd333..b0a7dd61eb35 100644
--- a/include/uapi/linux/i2c.h
+++ b/include/uapi/linux/i2c.h
@@ -87,6 +87,7 @@ struct i2c_msg {
 #define I2C_FUNC_PROTOCOL_MANGLING	0x00000004 /* I2C_M_IGNORE_NAK etc. */
 #define I2C_FUNC_SMBUS_PEC		0x00000008
 #define I2C_FUNC_NOSTART		0x00000010 /* I2C_M_NOSTART */
+#define I2C_FUNC_SLAVE			0x00000020
 #define I2C_FUNC_SMBUS_BLOCK_PROC_CALL	0x00008000 /* SMBus 2.0 */
 #define I2C_FUNC_SMBUS_QUICK		0x00010000
 #define I2C_FUNC_SMBUS_READ_BYTE	0x00020000
-- 
cgit 


From 1c4b1d73bacc546ba4e42f7eb4cb88c54139820b Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Wed, 27 May 2015 13:57:46 +0200
Subject: tty: move linux/gsmmux.h to uapi

linux/gsmmux.h defines a user interface and therefore should be
installed with other headers.

Make the file include:
* linux/if.h for IFNAMSIZ
* linux/ioctl.h for _IO* macros

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/gsmmux.h      | 36 ------------------------------------
 include/uapi/linux/Kbuild   |  1 +
 include/uapi/linux/gsmmux.h | 39 +++++++++++++++++++++++++++++++++++++++
 3 files changed, 40 insertions(+), 36 deletions(-)
 delete mode 100644 include/linux/gsmmux.h
 create mode 100644 include/uapi/linux/gsmmux.h

(limited to 'include/uapi/linux')

diff --git a/include/linux/gsmmux.h b/include/linux/gsmmux.h
deleted file mode 100644
index c25e9477f7c3..000000000000
--- a/include/linux/gsmmux.h
+++ /dev/null
@@ -1,36 +0,0 @@
-#ifndef _LINUX_GSMMUX_H
-#define _LINUX_GSMMUX_H
-
-struct gsm_config
-{
-	unsigned int adaption;
-	unsigned int encapsulation;
-	unsigned int initiator;
-	unsigned int t1;
-	unsigned int t2;
-	unsigned int t3;
-	unsigned int n2;
-	unsigned int mru;
-	unsigned int mtu;
-	unsigned int k;
-	unsigned int i;
-	unsigned int unused[8];		/* Padding for expansion without
-					   breaking stuff */
-};
-
-#define GSMIOC_GETCONF		_IOR('G', 0, struct gsm_config)
-#define GSMIOC_SETCONF		_IOW('G', 1, struct gsm_config)
-
-struct gsm_netconfig {
-	unsigned int adaption;  /* Adaption to use in network mode */
-	unsigned short protocol;/* Protocol to use - only ETH_P_IP supported */
-	unsigned short unused2;
-	char if_name[IFNAMSIZ];	/* interface name format string */
-	__u8 unused[28];        /* For future use */
-};
-
-#define GSMIOC_ENABLE_NET      _IOW('G', 2, struct gsm_netconfig)
-#define GSMIOC_DISABLE_NET     _IO('G', 3)
-
-
-#endif
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 1a0006a76b00..1d3db6a74d1f 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -138,6 +138,7 @@ header-y += genetlink.h
 header-y += gen_stats.h
 header-y += gfs2_ondisk.h
 header-y += gigaset_dev.h
+header-y += gsmmux.h
 header-y += hdlcdrv.h
 header-y += hdlc.h
 header-y += hdreg.h
diff --git a/include/uapi/linux/gsmmux.h b/include/uapi/linux/gsmmux.h
new file mode 100644
index 000000000000..c06742d52856
--- /dev/null
+++ b/include/uapi/linux/gsmmux.h
@@ -0,0 +1,39 @@
+#ifndef _LINUX_GSMMUX_H
+#define _LINUX_GSMMUX_H
+
+#include <linux/if.h>
+#include <linux/ioctl.h>
+
+struct gsm_config
+{
+	unsigned int adaption;
+	unsigned int encapsulation;
+	unsigned int initiator;
+	unsigned int t1;
+	unsigned int t2;
+	unsigned int t3;
+	unsigned int n2;
+	unsigned int mru;
+	unsigned int mtu;
+	unsigned int k;
+	unsigned int i;
+	unsigned int unused[8];		/* Padding for expansion without
+					   breaking stuff */
+};
+
+#define GSMIOC_GETCONF		_IOR('G', 0, struct gsm_config)
+#define GSMIOC_SETCONF		_IOW('G', 1, struct gsm_config)
+
+struct gsm_netconfig {
+	unsigned int adaption;  /* Adaption to use in network mode */
+	unsigned short protocol;/* Protocol to use - only ETH_P_IP supported */
+	unsigned short unused2;
+	char if_name[IFNAMSIZ];	/* interface name format string */
+	__u8 unused[28];        /* For future use */
+};
+
+#define GSMIOC_ENABLE_NET      _IOW('G', 2, struct gsm_netconfig)
+#define GSMIOC_DISABLE_NET     _IO('G', 3)
+
+
+#endif
-- 
cgit 


From a28c257c9eb0bd76a4adcac97c07e34044ec71fb Mon Sep 17 00:00:00 2001
From: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Date: Fri, 29 May 2015 17:28:07 -0400
Subject: net/rds: Declare SO_RDS_TRANSPORT and RDS_TRANS_* constants in
 uapi/linux/rds.h

User space applications that desire to explicitly select the
underlying transport for a PF_RDS socket may do so by using the
SO_RDS_TRANSPORT socket option at the SOL_RDS level before bind().
The integer argument provided to the socket option would be one
of the RDS_TRANS_* values, e.g., RDS_TRANS_TCP. This commit exports
the constant values need by such applications via <linux/rds.h>

Signed-off-by: Sowmini Varadhan <sowmini.varadhan@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/rds.h | 10 ++++++++++
 net/rds/rds.h            |  5 -----
 2 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h
index 91950950aa59..0f9265cb2a96 100644
--- a/include/uapi/linux/rds.h
+++ b/include/uapi/linux/rds.h
@@ -38,6 +38,8 @@
 
 #define RDS_IB_ABI_VERSION		0x301
 
+#define	SOL_RDS		276
+
 /*
  * setsockopt/getsockopt for SOL_RDS
  */
@@ -48,6 +50,14 @@
 #define RDS_RECVERR			5
 #define RDS_CONG_MONITOR		6
 #define RDS_GET_MR_FOR_DEST		7
+#define SO_RDS_TRANSPORT		8
+
+/* supported values for SO_RDS_TRANSPORT */
+#define	RDS_TRANS_IB	0
+#define	RDS_TRANS_IWARP	1
+#define	RDS_TRANS_TCP	2
+#define RDS_TRANS_COUNT	3
+#define	RDS_TRANS_NONE	(~0)
 
 /*
  * Control message types for SOL_RDS.
diff --git a/net/rds/rds.h b/net/rds/rds.h
index 0d41155a2258..76db508f73a1 100644
--- a/net/rds/rds.h
+++ b/net/rds/rds.h
@@ -408,11 +408,6 @@ struct rds_notifier {
  *                 should try hard not to block.
  */
 
-#define RDS_TRANS_IB	0
-#define RDS_TRANS_IWARP	1
-#define RDS_TRANS_TCP	2
-#define RDS_TRANS_COUNT	3
-
 struct rds_transport {
 	char			t_name[TRANSNAMSIZ];
 	struct list_head	t_item;
-- 
cgit 


From 8a7b19d8b542b87bccc3eaaf81dcc90a5ca48aea Mon Sep 17 00:00:00 2001
From: Mikko Rapeli <mikko.rapeli@iki.fi>
Date: Sat, 30 May 2015 17:39:25 +0200
Subject: include/uapi/linux/virtio_balloon.h: include linux/virtio_types.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes userspace compilation error:

error: unknown type name ‘__virtio16’
  __virtio16 tag;

Signed-off-by: Mikko Rapeli <mikko.rapeli@iki.fi>

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/uapi/linux/virtio_balloon.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/virtio_balloon.h b/include/uapi/linux/virtio_balloon.h
index 984169a819ee..d7f1cbc3766c 100644
--- a/include/uapi/linux/virtio_balloon.h
+++ b/include/uapi/linux/virtio_balloon.h
@@ -26,6 +26,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE. */
 #include <linux/types.h>
+#include <linux/virtio_types.h>
 #include <linux/virtio_ids.h>
 #include <linux/virtio_config.h>
 
-- 
cgit 


From 2751c9882b947292fcfb084c4f604e01724af804 Mon Sep 17 00:00:00 2001
From: Greg Kurz <gkurz@linux.vnet.ibm.com>
Date: Fri, 24 Apr 2015 14:27:24 +0200
Subject: vhost: cross-endian support for legacy devices

This patch brings cross-endian support to vhost when used to implement
legacy virtio devices. Since it is a relatively rare situation, the
feature availability is controlled by a kernel config option (not set
by default).

The vq->is_le boolean field is added to cache the endianness to be
used for ring accesses. It defaults to native endian, as expected
by legacy virtio devices. When the ring gets active, we force little
endian if the device is modern. When the ring is deactivated, we
revert to the native endian default.

If cross-endian was compiled in, a vq->user_be boolean field is added
so that userspace may request a specific endianness. This field is
used to override the default when activating the ring of a legacy
device. It has no effect on modern devices.

Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com>

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
---
 drivers/vhost/Kconfig      | 15 ++++++++
 drivers/vhost/vhost.c      | 85 +++++++++++++++++++++++++++++++++++++++++++++-
 drivers/vhost/vhost.h      | 11 ++++--
 include/uapi/linux/vhost.h | 14 ++++++++
 4 files changed, 122 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig
index 017a1e8a8f6f..533eaf04f12f 100644
--- a/drivers/vhost/Kconfig
+++ b/drivers/vhost/Kconfig
@@ -32,3 +32,18 @@ config VHOST
 	---help---
 	  This option is selected by any driver which needs to access
 	  the core of vhost.
+
+config VHOST_CROSS_ENDIAN_LEGACY
+	bool "Cross-endian support for vhost"
+	default n
+	---help---
+	  This option allows vhost to support guests with a different byte
+	  ordering from host while using legacy virtio.
+
+	  Userspace programs can control the feature using the
+	  VHOST_SET_VRING_ENDIAN and VHOST_GET_VRING_ENDIAN ioctls.
+
+	  This is only useful on a few platforms (ppc64 and arm64). Since it
+	  adds some overhead, it is disabled by default.
+
+	  If unsure, say "N".
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 2ee28266fd07..9e8e004bb1c3 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -36,6 +36,77 @@ enum {
 #define vhost_used_event(vq) ((__virtio16 __user *)&vq->avail->ring[vq->num])
 #define vhost_avail_event(vq) ((__virtio16 __user *)&vq->used->ring[vq->num])
 
+#ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY
+static void vhost_vq_reset_user_be(struct vhost_virtqueue *vq)
+{
+	vq->user_be = !virtio_legacy_is_little_endian();
+}
+
+static long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp)
+{
+	struct vhost_vring_state s;
+
+	if (vq->private_data)
+		return -EBUSY;
+
+	if (copy_from_user(&s, argp, sizeof(s)))
+		return -EFAULT;
+
+	if (s.num != VHOST_VRING_LITTLE_ENDIAN &&
+	    s.num != VHOST_VRING_BIG_ENDIAN)
+		return -EINVAL;
+
+	vq->user_be = s.num;
+
+	return 0;
+}
+
+static long vhost_get_vring_endian(struct vhost_virtqueue *vq, u32 idx,
+				   int __user *argp)
+{
+	struct vhost_vring_state s = {
+		.index = idx,
+		.num = vq->user_be
+	};
+
+	if (copy_to_user(argp, &s, sizeof(s)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static void vhost_init_is_le(struct vhost_virtqueue *vq)
+{
+	/* Note for legacy virtio: user_be is initialized at reset time
+	 * according to the host endianness. If userspace does not set an
+	 * explicit endianness, the default behavior is native endian, as
+	 * expected by legacy virtio.
+	 */
+	vq->is_le = vhost_has_feature(vq, VIRTIO_F_VERSION_1) || !vq->user_be;
+}
+#else
+static void vhost_vq_reset_user_be(struct vhost_virtqueue *vq)
+{
+}
+
+static long vhost_set_vring_endian(struct vhost_virtqueue *vq, int __user *argp)
+{
+	return -ENOIOCTLCMD;
+}
+
+static long vhost_get_vring_endian(struct vhost_virtqueue *vq, u32 idx,
+				   int __user *argp)
+{
+	return -ENOIOCTLCMD;
+}
+
+static void vhost_init_is_le(struct vhost_virtqueue *vq)
+{
+	if (vhost_has_feature(vq, VIRTIO_F_VERSION_1))
+		vq->is_le = true;
+}
+#endif /* CONFIG_VHOST_CROSS_ENDIAN_LEGACY */
+
 static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh,
 			    poll_table *pt)
 {
@@ -199,6 +270,8 @@ static void vhost_vq_reset(struct vhost_dev *dev,
 	vq->call = NULL;
 	vq->log_ctx = NULL;
 	vq->memory = NULL;
+	vq->is_le = virtio_legacy_is_little_endian();
+	vhost_vq_reset_user_be(vq);
 }
 
 static int vhost_worker(void *data)
@@ -806,6 +879,12 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp)
 		} else
 			filep = eventfp;
 		break;
+	case VHOST_SET_VRING_ENDIAN:
+		r = vhost_set_vring_endian(vq, argp);
+		break;
+	case VHOST_GET_VRING_ENDIAN:
+		r = vhost_get_vring_endian(vq, idx, argp);
+		break;
 	default:
 		r = -ENOIOCTLCMD;
 	}
@@ -1044,8 +1123,12 @@ int vhost_init_used(struct vhost_virtqueue *vq)
 {
 	__virtio16 last_used_idx;
 	int r;
-	if (!vq->private_data)
+	if (!vq->private_data) {
+		vq->is_le = virtio_legacy_is_little_endian();
 		return 0;
+	}
+
+	vhost_init_is_le(vq);
 
 	r = vhost_update_used_flags(vq);
 	if (r)
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index a4fa33a79bf2..ce6f6da4b09f 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -106,6 +106,14 @@ struct vhost_virtqueue {
 	/* Log write descriptors */
 	void __user *log_base;
 	struct vhost_log *log;
+
+	/* Ring endianness. Defaults to legacy native endianness.
+	 * Set to true when starting a modern virtio device. */
+	bool is_le;
+#ifdef CONFIG_VHOST_CROSS_ENDIAN_LEGACY
+	/* Ring endianness requested by userspace for cross-endian support. */
+	bool user_be;
+#endif
 };
 
 struct vhost_dev {
@@ -175,8 +183,7 @@ static inline bool vhost_has_feature(struct vhost_virtqueue *vq, int bit)
 
 static inline bool vhost_is_little_endian(struct vhost_virtqueue *vq)
 {
-	return vhost_has_feature(vq, VIRTIO_F_VERSION_1) ||
-		virtio_legacy_is_little_endian();
+	return vq->is_le;
 }
 
 /* Memory accessors */
diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
index bb6a5b4cb3c5..ab3731917bac 100644
--- a/include/uapi/linux/vhost.h
+++ b/include/uapi/linux/vhost.h
@@ -103,6 +103,20 @@ struct vhost_memory {
 /* Get accessor: reads index, writes value in num */
 #define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
 
+/* Set the vring byte order in num. Valid values are VHOST_VRING_LITTLE_ENDIAN
+ * or VHOST_VRING_BIG_ENDIAN (other values return -EINVAL).
+ * The byte order cannot be changed while the device is active: trying to do so
+ * returns -EBUSY.
+ * This is a legacy only API that is simply ignored when VIRTIO_F_VERSION_1 is
+ * set.
+ * Not all kernel configurations support this ioctl, but all configurations that
+ * support SET also support GET.
+ */
+#define VHOST_VRING_LITTLE_ENDIAN 0
+#define VHOST_VRING_BIG_ENDIAN 1
+#define VHOST_SET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state)
+#define VHOST_GET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state)
+
 /* The following ioctls use eventfd file descriptors to signal and poll
  * for events. */
 
-- 
cgit 


From 8b8e658b16336f0f50aba733f51db636ef121f50 Mon Sep 17 00:00:00 2001
From: Greg Kurz <gkurz@linux.vnet.ibm.com>
Date: Fri, 24 Apr 2015 14:50:36 +0200
Subject: macvtap/tun: cross-endian support for little-endian hosts

The VNET_LE flag was introduced to fix accesses to virtio 1.0 headers
that are always little-endian. It can also be used to handle the special
case of a legacy little-endian device implemented by a big-endian host.

Let's add a flag and ioctls for big-endian devices as well. If both flags
are set, little-endian wins.

Since this is isn't a common usecase, the feature is controlled by a kernel
config option (not set by default).

Both macvtap and tun are covered by this patch since they share the same
API with userland.

Signed-off-by: Greg Kurz <gkurz@linux.vnet.ibm.com>

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
---
 drivers/net/Kconfig         | 14 +++++++++++
 drivers/net/macvtap.c       | 57 ++++++++++++++++++++++++++++++++++++++++++-
 drivers/net/tun.c           | 59 ++++++++++++++++++++++++++++++++++++++++++++-
 include/uapi/linux/if_tun.h |  6 +++++
 4 files changed, 134 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index df51d6025a90..71ac0ece2d75 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -244,6 +244,20 @@ config TUN
 
 	  If you don't know what to use this for, you don't need it.
 
+config TUN_VNET_CROSS_LE
+	bool "Support for cross-endian vnet headers on little-endian kernels"
+	default n
+	---help---
+	  This option allows TUN/TAP and MACVTAP device drivers in a
+	  little-endian kernel to parse vnet headers that come from a
+	  big-endian legacy virtio device.
+
+	  Userspace programs can control the feature using the TUNSETVNETBE
+	  and TUNGETVNETBE ioctls.
+
+	  Unless you have a little-endian system hosting a big-endian virtual
+	  machine with a legacy virtio NIC, you should say N.
+
 config VETH
 	tristate "Virtual ethernet pair device"
 	---help---
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 072ccbaf7c45..928f3f4db629 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -48,11 +48,60 @@ struct macvtap_queue {
 #define MACVTAP_FEATURES (IFF_VNET_HDR | IFF_MULTI_QUEUE)
 
 #define MACVTAP_VNET_LE 0x80000000
+#define MACVTAP_VNET_BE 0x40000000
+
+#ifdef CONFIG_TUN_VNET_CROSS_LE
+static inline bool macvtap_legacy_is_little_endian(struct macvtap_queue *q)
+{
+	return q->flags & MACVTAP_VNET_BE ? false :
+		virtio_legacy_is_little_endian();
+}
+
+static long macvtap_get_vnet_be(struct macvtap_queue *q, int __user *sp)
+{
+	int s = !!(q->flags & MACVTAP_VNET_BE);
+
+	if (put_user(s, sp))
+		return -EFAULT;
+
+	return 0;
+}
+
+static long macvtap_set_vnet_be(struct macvtap_queue *q, int __user *sp)
+{
+	int s;
+
+	if (get_user(s, sp))
+		return -EFAULT;
+
+	if (s)
+		q->flags |= MACVTAP_VNET_BE;
+	else
+		q->flags &= ~MACVTAP_VNET_BE;
+
+	return 0;
+}
+#else
+static inline bool macvtap_legacy_is_little_endian(struct macvtap_queue *q)
+{
+	return virtio_legacy_is_little_endian();
+}
+
+static long macvtap_get_vnet_be(struct macvtap_queue *q, int __user *argp)
+{
+	return -EINVAL;
+}
+
+static long macvtap_set_vnet_be(struct macvtap_queue *q, int __user *argp)
+{
+	return -EINVAL;
+}
+#endif /* CONFIG_TUN_VNET_CROSS_LE */
 
 static inline bool macvtap_is_little_endian(struct macvtap_queue *q)
 {
 	return q->flags & MACVTAP_VNET_LE ||
-		virtio_legacy_is_little_endian();
+		macvtap_legacy_is_little_endian(q);
 }
 
 static inline u16 macvtap16_to_cpu(struct macvtap_queue *q, __virtio16 val)
@@ -1096,6 +1145,12 @@ static long macvtap_ioctl(struct file *file, unsigned int cmd,
 			q->flags &= ~MACVTAP_VNET_LE;
 		return 0;
 
+	case TUNGETVNETBE:
+		return macvtap_get_vnet_be(q, sp);
+
+	case TUNSETVNETBE:
+		return macvtap_set_vnet_be(q, sp);
+
 	case TUNSETOFFLOAD:
 		/* let the user check for future flags */
 		if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index b210139bef8f..cb376b2d548a 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -111,6 +111,7 @@ do {								\
 #define TUN_FASYNC	IFF_ATTACH_QUEUE
 /* High bits in flags field are unused. */
 #define TUN_VNET_LE     0x80000000
+#define TUN_VNET_BE     0x40000000
 
 #define TUN_FEATURES (IFF_NO_PI | IFF_ONE_QUEUE | IFF_VNET_HDR | \
 		      IFF_MULTI_QUEUE)
@@ -206,10 +207,58 @@ struct tun_struct {
 	u32 flow_count;
 };
 
+#ifdef CONFIG_TUN_VNET_CROSS_LE
+static inline bool tun_legacy_is_little_endian(struct tun_struct *tun)
+{
+	return tun->flags & TUN_VNET_BE ? false :
+		virtio_legacy_is_little_endian();
+}
+
+static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp)
+{
+	int be = !!(tun->flags & TUN_VNET_BE);
+
+	if (put_user(be, argp))
+		return -EFAULT;
+
+	return 0;
+}
+
+static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp)
+{
+	int be;
+
+	if (get_user(be, argp))
+		return -EFAULT;
+
+	if (be)
+		tun->flags |= TUN_VNET_BE;
+	else
+		tun->flags &= ~TUN_VNET_BE;
+
+	return 0;
+}
+#else
+static inline bool tun_legacy_is_little_endian(struct tun_struct *tun)
+{
+	return virtio_legacy_is_little_endian();
+}
+
+static long tun_get_vnet_be(struct tun_struct *tun, int __user *argp)
+{
+	return -EINVAL;
+}
+
+static long tun_set_vnet_be(struct tun_struct *tun, int __user *argp)
+{
+	return -EINVAL;
+}
+#endif /* CONFIG_TUN_VNET_CROSS_LE */
+
 static inline bool tun_is_little_endian(struct tun_struct *tun)
 {
 	return tun->flags & TUN_VNET_LE ||
-		virtio_legacy_is_little_endian();
+		tun_legacy_is_little_endian(tun);
 }
 
 static inline u16 tun16_to_cpu(struct tun_struct *tun, __virtio16 val)
@@ -2062,6 +2111,14 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 			tun->flags &= ~TUN_VNET_LE;
 		break;
 
+	case TUNGETVNETBE:
+		ret = tun_get_vnet_be(tun, argp);
+		break;
+
+	case TUNSETVNETBE:
+		ret = tun_set_vnet_be(tun, argp);
+		break;
+
 	case TUNATTACHFILTER:
 		/* Can be set only for TAPs */
 		ret = -EINVAL;
diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
index 50ae24335444..3cb5e1d85ddd 100644
--- a/include/uapi/linux/if_tun.h
+++ b/include/uapi/linux/if_tun.h
@@ -50,6 +50,12 @@
 #define TUNGETFILTER _IOR('T', 219, struct sock_fprog)
 #define TUNSETVNETLE _IOW('T', 220, int)
 #define TUNGETVNETLE _IOR('T', 221, int)
+/* The TUNSETVNETBE and TUNGETVNETBE ioctls are for cross-endian support on
+ * little-endian hosts. Not all kernel configurations support them, but all
+ * configurations that support SET also support GET.
+ */
+#define TUNSETVNETBE _IOW('T', 222, int)
+#define TUNGETVNETBE _IOR('T', 223, int)
 
 /* TUNSETIFF ifr flags */
 #define IFF_TUN		0x0001
-- 
cgit 


From e2b836cfb45d27a5efc5b1b65fe2442c53137d9c Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Wed, 22 Apr 2015 23:59:24 +0200
Subject: uapi/nfs: Add NFSv4.1 ACL definitions

Add the ACL related protocol definitions which were added in the NFSv4.1
specification.

(But we're not using them yet.)

Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/uapi/linux/nfs4.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nfs4.h b/include/uapi/linux/nfs4.h
index adc0aff83fbb..2119c7c274d7 100644
--- a/include/uapi/linux/nfs4.h
+++ b/include/uapi/linux/nfs4.h
@@ -86,6 +86,10 @@
 #define ACL4_SUPPORT_AUDIT_ACL 0x04
 #define ACL4_SUPPORT_ALARM_ACL 0x08
 
+#define NFS4_ACL_AUTO_INHERIT 0x00000001
+#define NFS4_ACL_PROTECTED    0x00000002
+#define NFS4_ACL_DEFAULTED    0x00000004
+
 #define NFS4_ACE_FILE_INHERIT_ACE             0x00000001
 #define NFS4_ACE_DIRECTORY_INHERIT_ACE        0x00000002
 #define NFS4_ACE_NO_PROPAGATE_INHERIT_ACE     0x00000004
@@ -93,6 +97,7 @@
 #define NFS4_ACE_SUCCESSFUL_ACCESS_ACE_FLAG   0x00000010
 #define NFS4_ACE_FAILED_ACCESS_ACE_FLAG       0x00000020
 #define NFS4_ACE_IDENTIFIER_GROUP             0x00000040
+#define NFS4_ACE_INHERITED_ACE                0x00000080
 
 #define NFS4_ACE_READ_DATA                    0x00000001
 #define NFS4_ACE_LIST_DIRECTORY               0x00000001
@@ -106,6 +111,8 @@
 #define NFS4_ACE_DELETE_CHILD                 0x00000040
 #define NFS4_ACE_READ_ATTRIBUTES              0x00000080
 #define NFS4_ACE_WRITE_ATTRIBUTES             0x00000100
+#define NFS4_ACE_WRITE_RETENTION              0x00000200
+#define NFS4_ACE_WRITE_RETENTION_HOLD         0x00000400
 #define NFS4_ACE_DELETE                       0x00010000
 #define NFS4_ACE_READ_ACL                     0x00020000
 #define NFS4_ACE_WRITE_ACL                    0x00040000
-- 
cgit 


From ccea74457bbdafe33dce8bffcb5cb183aeb5f2bb Mon Sep 17 00:00:00 2001
From: Neil McKee <neil.mckee@inmon.com>
Date: Tue, 26 May 2015 20:59:43 -0700
Subject: openvswitch: include datapath actions with sampled-packet upcall to
 userspace

If new optional attribute OVS_USERSPACE_ATTR_ACTIONS is added to an
OVS_ACTION_ATTR_USERSPACE action, then include the datapath actions
in the upcall.

This Directly associates the sampled packet with the path it takes
through the virtual switch. Path information currently includes mangling,
encapsulation and decapsulation actions for tunneling protocols GRE,
VXLAN, Geneve, MPLS and QinQ, but this extension requires no further
changes to accommodate datapath actions that may be added in the
future.

Adding path information enhances visibility into complex virtual
networks.

Signed-off-by: Neil McKee <neil.mckee@inmon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/openvswitch.h |  4 ++++
 net/openvswitch/actions.c        | 23 +++++++++++++++--------
 net/openvswitch/datapath.c       | 18 ++++++++++++++++--
 net/openvswitch/datapath.h       |  2 ++
 4 files changed, 37 insertions(+), 10 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index bbd49a0c46c7..1dab77601c21 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -153,6 +153,8 @@ enum ovs_packet_cmd {
  * flow key against the kernel's.
  * @OVS_PACKET_ATTR_ACTIONS: Contains actions for the packet.  Used
  * for %OVS_PACKET_CMD_EXECUTE.  It has nested %OVS_ACTION_ATTR_* attributes.
+ * Also used in upcall when %OVS_ACTION_ATTR_USERSPACE has optional
+ * %OVS_USERSPACE_ATTR_ACTIONS attribute.
  * @OVS_PACKET_ATTR_USERDATA: Present for an %OVS_PACKET_CMD_ACTION
  * notification if the %OVS_ACTION_ATTR_USERSPACE action specified an
  * %OVS_USERSPACE_ATTR_USERDATA attribute, with the same length and content
@@ -528,6 +530,7 @@ enum ovs_sample_attr {
  * copied to the %OVS_PACKET_CMD_ACTION message as %OVS_PACKET_ATTR_USERDATA.
  * @OVS_USERSPACE_ATTR_EGRESS_TUN_PORT: If present, u32 output port to get
  * tunnel info.
+ * @OVS_USERSPACE_ATTR_ACTIONS: If present, send actions with upcall.
  */
 enum ovs_userspace_attr {
 	OVS_USERSPACE_ATTR_UNSPEC,
@@ -535,6 +538,7 @@ enum ovs_userspace_attr {
 	OVS_USERSPACE_ATTR_USERDATA,  /* Optional user-specified cookie. */
 	OVS_USERSPACE_ATTR_EGRESS_TUN_PORT,  /* Optional, u32 output port
 					      * to get tunnel info. */
+	OVS_USERSPACE_ATTR_ACTIONS,   /* Optional flag to get actions. */
 	__OVS_USERSPACE_ATTR_MAX
 };
 
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index b491c1c296fe..8a8c0b8b4f63 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -608,17 +608,16 @@ static void do_output(struct datapath *dp, struct sk_buff *skb, int out_port)
 }
 
 static int output_userspace(struct datapath *dp, struct sk_buff *skb,
-			    struct sw_flow_key *key, const struct nlattr *attr)
+			    struct sw_flow_key *key, const struct nlattr *attr,
+			    const struct nlattr *actions, int actions_len)
 {
 	struct ovs_tunnel_info info;
 	struct dp_upcall_info upcall;
 	const struct nlattr *a;
 	int rem;
 
+	memset(&upcall, 0, sizeof(upcall));
 	upcall.cmd = OVS_PACKET_CMD_ACTION;
-	upcall.userdata = NULL;
-	upcall.portid = 0;
-	upcall.egress_tun_info = NULL;
 
 	for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
 		 a = nla_next(a, &rem)) {
@@ -647,6 +646,13 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
 			break;
 		}
 
+		case OVS_USERSPACE_ATTR_ACTIONS: {
+			/* Include actions. */
+			upcall.actions = actions;
+			upcall.actions_len = actions_len;
+			break;
+		}
+
 		} /* End of switch. */
 	}
 
@@ -654,7 +660,8 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb,
 }
 
 static int sample(struct datapath *dp, struct sk_buff *skb,
-		  struct sw_flow_key *key, const struct nlattr *attr)
+		  struct sw_flow_key *key, const struct nlattr *attr,
+		  const struct nlattr *actions, int actions_len)
 {
 	const struct nlattr *acts_list = NULL;
 	const struct nlattr *a;
@@ -688,7 +695,7 @@ static int sample(struct datapath *dp, struct sk_buff *skb,
 	 */
 	if (likely(nla_type(a) == OVS_ACTION_ATTR_USERSPACE &&
 		   nla_is_last(a, rem)))
-		return output_userspace(dp, skb, key, a);
+		return output_userspace(dp, skb, key, a, actions, actions_len);
 
 	skb = skb_clone(skb, GFP_ATOMIC);
 	if (!skb)
@@ -872,7 +879,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 			break;
 
 		case OVS_ACTION_ATTR_USERSPACE:
-			output_userspace(dp, skb, key, a);
+			output_userspace(dp, skb, key, a, attr, len);
 			break;
 
 		case OVS_ACTION_ATTR_HASH:
@@ -916,7 +923,7 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 			break;
 
 		case OVS_ACTION_ATTR_SAMPLE:
-			err = sample(dp, skb, key, a);
+			err = sample(dp, skb, key, a, attr, len);
 			break;
 		}
 
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 3b90461317ec..ff8c4a4c1609 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -272,10 +272,9 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
 		struct dp_upcall_info upcall;
 		int error;
 
+		memset(&upcall, 0, sizeof(upcall));
 		upcall.cmd = OVS_PACKET_CMD_MISS;
-		upcall.userdata = NULL;
 		upcall.portid = ovs_vport_find_upcall_portid(p, skb);
-		upcall.egress_tun_info = NULL;
 		error = ovs_dp_upcall(dp, skb, key, &upcall);
 		if (unlikely(error))
 			kfree_skb(skb);
@@ -397,6 +396,10 @@ static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
 	if (upcall_info->egress_tun_info)
 		size += nla_total_size(ovs_tun_key_attr_size());
 
+	/* OVS_PACKET_ATTR_ACTIONS */
+	if (upcall_info->actions_len)
+		size += nla_total_size(upcall_info->actions_len);
+
 	return size;
 }
 
@@ -478,6 +481,17 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
 		nla_nest_end(user_skb, nla);
 	}
 
+	if (upcall_info->actions_len) {
+		nla = nla_nest_start(user_skb, OVS_PACKET_ATTR_ACTIONS);
+		err = ovs_nla_put_actions(upcall_info->actions,
+					  upcall_info->actions_len,
+					  user_skb);
+		if (!err)
+			nla_nest_end(user_skb, nla);
+		else
+			nla_nest_cancel(user_skb, nla);
+	}
+
 	/* Only reserve room for attribute header, packet data is added
 	 * in skb_zerocopy() */
 	if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 4ec4a480b147..cd691e935e08 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -116,6 +116,8 @@ struct ovs_skb_cb {
 struct dp_upcall_info {
 	const struct ovs_tunnel_info *egress_tun_info;
 	const struct nlattr *userdata;
+	const struct nlattr *actions;
+	int actions_len;
 	u32 portid;
 	u8 cmd;
 };
-- 
cgit 


From 8760ce58353c2099be35ead62a572ee2d1e83b5b Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Mon, 1 Jun 2015 15:51:34 -0400
Subject: geneve: allow user to specify TTL for tunnel frames

Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/geneve.c         | 18 ++++++++++++++----
 include/uapi/linux/if_link.h |  1 +
 2 files changed, 15 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index b7eafa4c1a67..1675dfdbfa70 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -44,7 +44,8 @@ struct geneve_dev {
 	struct net	   *net;	/* netns for packet i/o */
 	struct net_device  *dev;	/* netdev for geneve tunnel */
 	struct geneve_sock *sock;	/* socket used for geneve tunnel */
-	u8 vni[3];			/* virtual network ID for tunnel */
+	u8                 vni[3];	/* virtual network ID for tunnel */
+	u8                 ttl;		/* TTL override */
 	struct sockaddr_in remote;	/* IPv4 address for link partner */
 	struct list_head   next;	/* geneve's per namespace list */
 };
@@ -184,7 +185,7 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct flowi4 fl4;
 	int err;
 	__be16 sport;
-	__u8 tos, ttl = 0;
+	__u8 tos, ttl;
 
 	iip = ip_hdr(skb);
 
@@ -207,11 +208,12 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
 		goto rt_tx_error;
 	}
 
-	/* TODO: tos and ttl should be configurable */
+	/* TODO: tos should be configurable */
 
 	tos = ip_tunnel_ecn_encap(0, iip, skb);
 
-	if (IN_MULTICAST(ntohl(fl4.daddr)))
+	ttl = geneve->ttl;
+	if (!ttl && IN_MULTICAST(ntohl(fl4.daddr)))
 		ttl = 1;
 
 	ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
@@ -297,6 +299,7 @@ static void geneve_setup(struct net_device *dev)
 static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = {
 	[IFLA_GENEVE_ID]		= { .type = NLA_U32 },
 	[IFLA_GENEVE_REMOTE]		= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
+	[IFLA_GENEVE_TTL]		= { .type = NLA_U8 },
 };
 
 static int geneve_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -364,6 +367,9 @@ static int geneve_newlink(struct net *net, struct net_device *dev,
 	if (err)
 		return err;
 
+	if (data[IFLA_GENEVE_TTL])
+		geneve->ttl = nla_get_u8(data[IFLA_GENEVE_TTL]);
+
 	list_add(&geneve->next, &gn->geneve_list);
 
 	hlist_add_head_rcu(&geneve->hlist, &gn->vni_list[hash]);
@@ -386,6 +392,7 @@ static size_t geneve_get_size(const struct net_device *dev)
 {
 	return nla_total_size(sizeof(__u32)) +	/* IFLA_GENEVE_ID */
 		nla_total_size(sizeof(struct in_addr)) + /* IFLA_GENEVE_REMOTE */
+		nla_total_size(sizeof(__u8)) +  /* IFLA_GENEVE_TTL */
 		0;
 }
 
@@ -402,6 +409,9 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
 			    geneve->remote.sin_addr.s_addr))
 		goto nla_put_failure;
 
+	if (nla_put_u8(skb, IFLA_GENEVE_TTL, geneve->ttl))
+		goto nla_put_failure;
+
 	return 0;
 
 nla_put_failure:
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index afccc9393fef..a834f31db915 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -395,6 +395,7 @@ enum {
 	IFLA_GENEVE_UNSPEC,
 	IFLA_GENEVE_ID,
 	IFLA_GENEVE_REMOTE,
+	IFLA_GENEVE_TTL,
 	__IFLA_GENEVE_MAX
 };
 #define IFLA_GENEVE_MAX	(__IFLA_GENEVE_MAX - 1)
-- 
cgit 


From d89511251f6519599b109dc6cda87a6ab314ed8c Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Mon, 1 Jun 2015 15:51:35 -0400
Subject: geneve: allow user to specify TOS info for tunnel frames

Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/geneve.c         | 18 ++++++++++++++----
 include/uapi/linux/if_link.h |  1 +
 2 files changed, 15 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 1675dfdbfa70..78d49d186e05 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -46,6 +46,7 @@ struct geneve_dev {
 	struct geneve_sock *sock;	/* socket used for geneve tunnel */
 	u8                 vni[3];	/* virtual network ID for tunnel */
 	u8                 ttl;		/* TTL override */
+	u8                 tos;		/* TOS override */
 	struct sockaddr_in remote;	/* IPv4 address for link partner */
 	struct list_head   next;	/* geneve's per namespace list */
 };
@@ -194,7 +195,12 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* TODO: port min/max limits should be configurable */
 	sport = udp_flow_src_port(dev_net(dev), skb, 0, 0, true);
 
+	tos = geneve->tos;
+	if (tos == 1)
+		tos = ip_tunnel_get_dsfield(iip, skb);
+
 	memset(&fl4, 0, sizeof(fl4));
+	fl4.flowi4_tos = RT_TOS(tos);
 	fl4.daddr = geneve->remote.sin_addr.s_addr;
 	rt = ip_route_output_key(geneve->net, &fl4);
 	if (IS_ERR(rt)) {
@@ -208,9 +214,7 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
 		goto rt_tx_error;
 	}
 
-	/* TODO: tos should be configurable */
-
-	tos = ip_tunnel_ecn_encap(0, iip, skb);
+	tos = ip_tunnel_ecn_encap(tos, iip, skb);
 
 	ttl = geneve->ttl;
 	if (!ttl && IN_MULTICAST(ntohl(fl4.daddr)))
@@ -300,6 +304,7 @@ static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = {
 	[IFLA_GENEVE_ID]		= { .type = NLA_U32 },
 	[IFLA_GENEVE_REMOTE]		= { .len = FIELD_SIZEOF(struct iphdr, daddr) },
 	[IFLA_GENEVE_TTL]		= { .type = NLA_U8 },
+	[IFLA_GENEVE_TOS]		= { .type = NLA_U8 },
 };
 
 static int geneve_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -370,6 +375,9 @@ static int geneve_newlink(struct net *net, struct net_device *dev,
 	if (data[IFLA_GENEVE_TTL])
 		geneve->ttl = nla_get_u8(data[IFLA_GENEVE_TTL]);
 
+	if (data[IFLA_GENEVE_TOS])
+		geneve->tos = nla_get_u8(data[IFLA_GENEVE_TOS]);
+
 	list_add(&geneve->next, &gn->geneve_list);
 
 	hlist_add_head_rcu(&geneve->hlist, &gn->vni_list[hash]);
@@ -393,6 +401,7 @@ static size_t geneve_get_size(const struct net_device *dev)
 	return nla_total_size(sizeof(__u32)) +	/* IFLA_GENEVE_ID */
 		nla_total_size(sizeof(struct in_addr)) + /* IFLA_GENEVE_REMOTE */
 		nla_total_size(sizeof(__u8)) +  /* IFLA_GENEVE_TTL */
+		nla_total_size(sizeof(__u8)) +  /* IFLA_GENEVE_TOS */
 		0;
 }
 
@@ -409,7 +418,8 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
 			    geneve->remote.sin_addr.s_addr))
 		goto nla_put_failure;
 
-	if (nla_put_u8(skb, IFLA_GENEVE_TTL, geneve->ttl))
+	if (nla_put_u8(skb, IFLA_GENEVE_TTL, geneve->ttl) ||
+	    nla_put_u8(skb, IFLA_GENEVE_TOS, geneve->tos))
 		goto nla_put_failure;
 
 	return 0;
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index a834f31db915..1737b7a8272b 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -396,6 +396,7 @@ enum {
 	IFLA_GENEVE_ID,
 	IFLA_GENEVE_REMOTE,
 	IFLA_GENEVE_TTL,
+	IFLA_GENEVE_TOS,
 	__IFLA_GENEVE_MAX
 };
 #define IFLA_GENEVE_MAX	(__IFLA_GENEVE_MAX - 1)
-- 
cgit 


From aef11009c45ca594c18ecc822f101e3908ca3fb4 Mon Sep 17 00:00:00 2001
From: Yair Shachar <yair.shachar@amd.com>
Date: Sun, 7 Dec 2014 17:05:22 +0200
Subject: drm/amdkfd: add H/W debugger IOCTL set definitions

This patch adds four new IOCTLs to amdkfd. These IOCTLs expose a H/W
debugger functionality to the userspace.

The IOCTLs are:

- AMDKFD_IOC_DBG_REGISTER:

The purpose of this IOCTL is to notify amdkfd that a process wants to use
GPU debugging facilities on itself only.
It is expected that this IOCTL would be called before any other H/W
debugger requests are sent to amdkfd and for each GPU where the H/W
debugging needs to be enabled. The use of this IOCTL ensures that only
one instance of a debugger is active in the system.

- AMDKFD_IOC_DBG_UNREGISTER:

This IOCTL detaches the debugger/debugged process from the H/W
Debug which was established by the AMDKFD_IOC_DBG_REGISTER IOCTL.

- AMDKFD_IOC_DBG_ADDRESS_WATCH:

This IOCTL allows to set different watchpoints with various conditions as
indicated by the IOCTL's arguments. The available number of watchpoints
is retrieved from topology. This operation is confined to the current
debugged process, which was registered through AMDKFD_IOC_DBG_REGISTER.

- AMDKFD_IOC_DBG_WAVE_CONTROL:

This IOCTL allows to control a wavefront as indicated by the IOCTL's
arguments. For example, you can halt/resume or kill either a
single wavefront or a set of wavefronts. This operation is confined to
the current debugged process, which was registered through
AMDKFD_IOC_DBG_REGISTER.

Because the arguments for the address watch IOCTL and wave control IOCTL
are dynamic, meaning that they could vary in size, the userspace passes a
pointer to a structure (in userspace) that contains the value of the
arguments. The kernel driver is responsible to parse this structure and
validate its contents.

v2: change void* to uint64_t inside ioctl arguments

Signed-off-by: Yair Shachar <yair.shachar@amd.com>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 54 ++++++++++++++++++++++++++++++++
 include/uapi/linux/kfd_ioctl.h           | 43 +++++++++++++++++++++++--
 2 files changed, 95 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index b2c6109bd7af..b358e910378f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -432,6 +432,48 @@ out:
 	return err;
 }
 
+static int kfd_ioctl_dbg_register(struct file *filep,
+				struct kfd_process *p, void *data)
+{
+	long status = -EFAULT;
+
+	return status;
+}
+
+static int kfd_ioctl_dbg_unrgesiter(struct file *filep,
+				struct kfd_process *p, void *data)
+{
+	long status = -EFAULT;
+
+	return status;
+}
+
+/*
+ * Parse and generate variable size data structure for address watch.
+ * Total size of the buffer and # watch points is limited in order
+ * to prevent kernel abuse. (no bearing to the much smaller HW limitation
+ * which is enforced by dbgdev module)
+ * please also note that the watch address itself are not "copied from user",
+ * since it be set into the HW in user mode values.
+ *
+ */
+static int kfd_ioctl_dbg_address_watch(struct file *filep,
+					struct kfd_process *p, void *data)
+{
+	long status = -EFAULT;
+
+	return status;
+}
+
+/* Parse and generate fixed size data structure for wave control */
+static int kfd_ioctl_dbg_wave_control(struct file *filep,
+					struct kfd_process *p, void *data)
+{
+	long status = -EFAULT;
+
+	return status;
+}
+
 static int kfd_ioctl_get_clock_counters(struct file *filep,
 				struct kfd_process *p, void *data)
 {
@@ -612,6 +654,18 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
 
 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS,
 			kfd_ioctl_wait_events, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER,
+			kfd_ioctl_dbg_register, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER,
+			kfd_ioctl_dbg_unrgesiter, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH,
+			kfd_ioctl_dbg_address_watch, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL,
+			kfd_ioctl_dbg_wave_control, 0),
 };
 
 #define AMDKFD_CORE_IOCTL_COUNT	ARRAY_SIZE(amdkfd_ioctls)
diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 4ca35a8f9891..d6833426fdef 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -128,6 +128,32 @@ struct kfd_ioctl_get_process_apertures_args {
 	uint32_t pad;
 };
 
+#define MAX_ALLOWED_NUM_POINTS    100
+#define MAX_ALLOWED_AW_BUFF_SIZE 4096
+#define MAX_ALLOWED_WAC_BUFF_SIZE  128
+
+struct kfd_ioctl_dbg_register_args {
+	uint32_t gpu_id;		/* to KFD */
+	uint32_t pad;
+};
+
+struct kfd_ioctl_dbg_unregister_args {
+	uint32_t gpu_id;		/* to KFD */
+	uint32_t pad;
+};
+
+struct kfd_ioctl_dbg_address_watch_args {
+	uint64_t content_ptr;		/* a pointer to the actual content */
+	uint32_t gpu_id;		/* to KFD */
+	uint32_t buf_size_in_bytes;	/*including gpu_id and buf_size */
+};
+
+struct kfd_ioctl_dbg_wave_control_args {
+	uint64_t content_ptr;		/* a pointer to the actual content */
+	uint32_t gpu_id;		/* to KFD */
+	uint32_t buf_size_in_bytes;	/*including gpu_id and buf_size */
+};
+
 /* Matching HSA_EVENTTYPE */
 #define KFD_IOC_EVENT_SIGNAL			0
 #define KFD_IOC_EVENT_NODECHANGE		1
@@ -198,7 +224,8 @@ struct kfd_event_data {
 };
 
 struct kfd_ioctl_wait_events_args {
-	uint64_t events_ptr;		/* to KFD */
+	uint64_t events_ptr;		/* pointed to struct
+					   kfd_event_data array, to KFD */
 	uint32_t num_events;		/* to KFD */
 	uint32_t wait_for_all;		/* to KFD */
 	uint32_t timeout;		/* to KFD */
@@ -247,7 +274,19 @@ struct kfd_ioctl_wait_events_args {
 #define AMDKFD_IOC_WAIT_EVENTS			\
 		AMDKFD_IOWR(0x0C, struct kfd_ioctl_wait_events_args)
 
+#define AMDKFD_IOC_DBG_REGISTER			\
+		AMDKFD_IOW(0x0D, struct kfd_ioctl_dbg_register_args)
+
+#define AMDKFD_IOC_DBG_UNREGISTER		\
+		AMDKFD_IOW(0x0E, struct kfd_ioctl_dbg_unregister_args)
+
+#define AMDKFD_IOC_DBG_ADDRESS_WATCH		\
+		AMDKFD_IOW(0x0F, struct kfd_ioctl_dbg_address_watch_args)
+
+#define AMDKFD_IOC_DBG_WAVE_CONTROL		\
+		AMDKFD_IOW(0x10, struct kfd_ioctl_dbg_wave_control_args)
+
 #define AMDKFD_COMMAND_START		0x01
-#define AMDKFD_COMMAND_END		0x0D
+#define AMDKFD_COMMAND_END		0x11
 
 #endif
-- 
cgit 


From dc5698e80cf724770283e10414054662bdf6ccfa Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@gmail.com>
Date: Mon, 9 Sep 2013 10:02:56 +1000
Subject: Add virtio gpu driver.

This patch adds a kms driver for the virtio gpu.  The xorg modesetting
driver can handle the device just fine, the framebuffer for fbcon is
there too.

Qemu patches for the host side are under review currently.

The pci version of the device comes in two variants: with and without
vga compatibility.  The former has a extra memory bar for the vga
framebuffer, the later is a pure virtio device.  The only concern for
this driver is that in the virtio-vga case we have to kick out the
firmware framebuffer.

Initial revision has only 2d support, 3d (virgl) support requires
some more work on the qemu side and will be added later.

Signed-off-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/gpu/drm/Kconfig                  |   2 +
 drivers/gpu/drm/Makefile                 |   1 +
 drivers/gpu/drm/virtio/Kconfig           |  14 +
 drivers/gpu/drm/virtio/Makefile          |  11 +
 drivers/gpu/drm/virtio/virtgpu_debugfs.c |  64 ++++
 drivers/gpu/drm/virtio/virtgpu_display.c | 473 ++++++++++++++++++++++++
 drivers/gpu/drm/virtio/virtgpu_drm_bus.c |  91 +++++
 drivers/gpu/drm/virtio/virtgpu_drv.c     | 136 +++++++
 drivers/gpu/drm/virtio/virtgpu_drv.h     | 350 ++++++++++++++++++
 drivers/gpu/drm/virtio/virtgpu_fb.c      | 431 ++++++++++++++++++++++
 drivers/gpu/drm/virtio/virtgpu_fence.c   | 119 ++++++
 drivers/gpu/drm/virtio/virtgpu_gem.c     | 140 +++++++
 drivers/gpu/drm/virtio/virtgpu_kms.c     | 173 +++++++++
 drivers/gpu/drm/virtio/virtgpu_object.c  | 170 +++++++++
 drivers/gpu/drm/virtio/virtgpu_plane.c   | 120 ++++++
 drivers/gpu/drm/virtio/virtgpu_ttm.c     | 469 +++++++++++++++++++++++
 drivers/gpu/drm/virtio/virtgpu_vq.c      | 614 +++++++++++++++++++++++++++++++
 include/drm/drmP.h                       |   1 +
 include/uapi/linux/Kbuild                |   1 +
 include/uapi/linux/virtio_gpu.h          | 204 ++++++++++
 include/uapi/linux/virtio_ids.h          |   1 +
 21 files changed, 3585 insertions(+)
 create mode 100644 drivers/gpu/drm/virtio/Kconfig
 create mode 100644 drivers/gpu/drm/virtio/Makefile
 create mode 100644 drivers/gpu/drm/virtio/virtgpu_debugfs.c
 create mode 100644 drivers/gpu/drm/virtio/virtgpu_display.c
 create mode 100644 drivers/gpu/drm/virtio/virtgpu_drm_bus.c
 create mode 100644 drivers/gpu/drm/virtio/virtgpu_drv.c
 create mode 100644 drivers/gpu/drm/virtio/virtgpu_drv.h
 create mode 100644 drivers/gpu/drm/virtio/virtgpu_fb.c
 create mode 100644 drivers/gpu/drm/virtio/virtgpu_fence.c
 create mode 100644 drivers/gpu/drm/virtio/virtgpu_gem.c
 create mode 100644 drivers/gpu/drm/virtio/virtgpu_kms.c
 create mode 100644 drivers/gpu/drm/virtio/virtgpu_object.c
 create mode 100644 drivers/gpu/drm/virtio/virtgpu_plane.c
 create mode 100644 drivers/gpu/drm/virtio/virtgpu_ttm.c
 create mode 100644 drivers/gpu/drm/virtio/virtgpu_vq.c
 create mode 100644 include/uapi/linux/virtio_gpu.h

(limited to 'include/uapi/linux')

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 47f2ce81b412..d4b65457122d 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -206,6 +206,8 @@ source "drivers/gpu/drm/qxl/Kconfig"
 
 source "drivers/gpu/drm/bochs/Kconfig"
 
+source "drivers/gpu/drm/virtio/Kconfig"
+
 source "drivers/gpu/drm/msm/Kconfig"
 
 source "drivers/gpu/drm/tegra/Kconfig"
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 4de8d9b006ec..8e75f8194bde 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -61,6 +61,7 @@ obj-$(CONFIG_DRM_OMAP)	+= omapdrm/
 obj-$(CONFIG_DRM_TILCDC)	+= tilcdc/
 obj-$(CONFIG_DRM_QXL) += qxl/
 obj-$(CONFIG_DRM_BOCHS) += bochs/
+obj-$(CONFIG_DRM_VIRTIO_GPU) += virtio/
 obj-$(CONFIG_DRM_MSM) += msm/
 obj-$(CONFIG_DRM_TEGRA) += tegra/
 obj-$(CONFIG_DRM_STI) += sti/
diff --git a/drivers/gpu/drm/virtio/Kconfig b/drivers/gpu/drm/virtio/Kconfig
new file mode 100644
index 000000000000..9983eadb81b6
--- /dev/null
+++ b/drivers/gpu/drm/virtio/Kconfig
@@ -0,0 +1,14 @@
+config DRM_VIRTIO_GPU
+	tristate "Virtio GPU driver"
+	depends on DRM && VIRTIO
+	select FB_SYS_FILLRECT
+	select FB_SYS_COPYAREA
+	select FB_SYS_IMAGEBLIT
+        select DRM_KMS_HELPER
+        select DRM_KMS_FB_HELPER
+        select DRM_TTM
+	help
+	   This is the virtual GPU driver for virtio.  It can be used with
+           QEMU based VMMs (like KVM or Xen).
+
+	   If unsure say M.
diff --git a/drivers/gpu/drm/virtio/Makefile b/drivers/gpu/drm/virtio/Makefile
new file mode 100644
index 000000000000..2ee1602d77d4
--- /dev/null
+++ b/drivers/gpu/drm/virtio/Makefile
@@ -0,0 +1,11 @@
+#
+# Makefile for the drm device driver.  This driver provides support for the
+# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
+
+ccflags-y := -Iinclude/drm
+
+virtio-gpu-y := virtgpu_drv.o virtgpu_kms.o virtgpu_drm_bus.o virtgpu_gem.o \
+	virtgpu_fb.o virtgpu_display.o virtgpu_vq.o virtgpu_ttm.o \
+	virtgpu_fence.o virtgpu_object.o virtgpu_debugfs.o virtgpu_plane.o
+
+obj-$(CONFIG_DRM_VIRTIO_GPU) += virtio-gpu.o
diff --git a/drivers/gpu/drm/virtio/virtgpu_debugfs.c b/drivers/gpu/drm/virtio/virtgpu_debugfs.c
new file mode 100644
index 000000000000..db8b49101a8b
--- /dev/null
+++ b/drivers/gpu/drm/virtio/virtgpu_debugfs.c
@@ -0,0 +1,64 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/debugfs.h>
+
+#include "drmP.h"
+#include "virtgpu_drv.h"
+
+static int
+virtio_gpu_debugfs_irq_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct virtio_gpu_device *vgdev = node->minor->dev->dev_private;
+
+	seq_printf(m, "fence %ld %lld\n",
+		   atomic64_read(&vgdev->fence_drv.last_seq),
+		   vgdev->fence_drv.sync_seq);
+	return 0;
+}
+
+static struct drm_info_list virtio_gpu_debugfs_list[] = {
+	{ "irq_fence", virtio_gpu_debugfs_irq_info, 0, NULL },
+};
+
+#define VIRTIO_GPU_DEBUGFS_ENTRIES ARRAY_SIZE(virtio_gpu_debugfs_list)
+
+int
+virtio_gpu_debugfs_init(struct drm_minor *minor)
+{
+	drm_debugfs_create_files(virtio_gpu_debugfs_list,
+				 VIRTIO_GPU_DEBUGFS_ENTRIES,
+				 minor->debugfs_root, minor);
+	return 0;
+}
+
+void
+virtio_gpu_debugfs_takedown(struct drm_minor *minor)
+{
+	drm_debugfs_remove_files(virtio_gpu_debugfs_list,
+				 VIRTIO_GPU_DEBUGFS_ENTRIES,
+				 minor);
+}
diff --git a/drivers/gpu/drm/virtio/virtgpu_display.c b/drivers/gpu/drm/virtio/virtgpu_display.c
new file mode 100644
index 000000000000..4e160efc9402
--- /dev/null
+++ b/drivers/gpu/drm/virtio/virtgpu_display.c
@@ -0,0 +1,473 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * Authors:
+ *    Dave Airlie
+ *    Alon Levy
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "virtgpu_drv.h"
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_atomic_helper.h>
+
+#define XRES_MIN   320
+#define YRES_MIN   200
+
+#define XRES_DEF  1024
+#define YRES_DEF   768
+
+#define XRES_MAX  8192
+#define YRES_MAX  8192
+
+static void virtio_gpu_crtc_gamma_set(struct drm_crtc *crtc,
+				      u16 *red, u16 *green, u16 *blue,
+				      uint32_t start, uint32_t size)
+{
+	/* TODO */
+}
+
+static void
+virtio_gpu_hide_cursor(struct virtio_gpu_device *vgdev,
+		       struct virtio_gpu_output *output)
+{
+	output->cursor.hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_UPDATE_CURSOR);
+	output->cursor.resource_id = 0;
+	virtio_gpu_cursor_ping(vgdev, output);
+}
+
+static int virtio_gpu_crtc_cursor_set(struct drm_crtc *crtc,
+				      struct drm_file *file_priv,
+				      uint32_t handle,
+				      uint32_t width,
+				      uint32_t height,
+				      int32_t hot_x, int32_t hot_y)
+{
+	struct virtio_gpu_device *vgdev = crtc->dev->dev_private;
+	struct virtio_gpu_output *output =
+		container_of(crtc, struct virtio_gpu_output, crtc);
+	struct drm_gem_object *gobj = NULL;
+	struct virtio_gpu_object *qobj = NULL;
+	struct virtio_gpu_fence *fence = NULL;
+	int ret = 0;
+
+	if (handle == 0) {
+		virtio_gpu_hide_cursor(vgdev, output);
+		return 0;
+	}
+
+	/* lookup the cursor */
+	gobj = drm_gem_object_lookup(crtc->dev, file_priv, handle);
+	if (gobj == NULL)
+		return -ENOENT;
+
+	qobj = gem_to_virtio_gpu_obj(gobj);
+
+	if (!qobj->hw_res_handle) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	virtio_gpu_cmd_transfer_to_host_2d(vgdev, qobj->hw_res_handle, 0,
+					   cpu_to_le32(64),
+					   cpu_to_le32(64),
+					   0, 0, &fence);
+
+	output->cursor.hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_UPDATE_CURSOR);
+	output->cursor.resource_id = cpu_to_le32(qobj->hw_res_handle);
+	output->cursor.hot_x = cpu_to_le32(hot_x);
+	output->cursor.hot_y = cpu_to_le32(hot_y);
+	virtio_gpu_cursor_ping(vgdev, output);
+	ret = 0;
+
+out:
+	drm_gem_object_unreference_unlocked(gobj);
+	return ret;
+}
+
+static int virtio_gpu_crtc_cursor_move(struct drm_crtc *crtc,
+				    int x, int y)
+{
+	struct virtio_gpu_device *vgdev = crtc->dev->dev_private;
+	struct virtio_gpu_output *output =
+		container_of(crtc, struct virtio_gpu_output, crtc);
+
+	output->cursor.hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_MOVE_CURSOR);
+	output->cursor.pos.x = cpu_to_le32(x);
+	output->cursor.pos.y = cpu_to_le32(y);
+	virtio_gpu_cursor_ping(vgdev, output);
+	return 0;
+}
+
+static const struct drm_crtc_funcs virtio_gpu_crtc_funcs = {
+	.cursor_set2            = virtio_gpu_crtc_cursor_set,
+	.cursor_move            = virtio_gpu_crtc_cursor_move,
+	.gamma_set              = virtio_gpu_crtc_gamma_set,
+	.set_config             = drm_atomic_helper_set_config,
+	.destroy                = drm_crtc_cleanup,
+
+#if 0 /* not (yet) working without vblank support according to docs */
+	.page_flip              = drm_atomic_helper_page_flip,
+#endif
+	.reset                  = drm_atomic_helper_crtc_reset,
+	.atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state,
+	.atomic_destroy_state   = drm_atomic_helper_crtc_destroy_state,
+};
+
+static void virtio_gpu_user_framebuffer_destroy(struct drm_framebuffer *fb)
+{
+	struct virtio_gpu_framebuffer *virtio_gpu_fb
+		= to_virtio_gpu_framebuffer(fb);
+
+	if (virtio_gpu_fb->obj)
+		drm_gem_object_unreference_unlocked(virtio_gpu_fb->obj);
+	drm_framebuffer_cleanup(fb);
+	kfree(virtio_gpu_fb);
+}
+
+static int
+virtio_gpu_framebuffer_surface_dirty(struct drm_framebuffer *fb,
+				     struct drm_file *file_priv,
+				     unsigned flags, unsigned color,
+				     struct drm_clip_rect *clips,
+				     unsigned num_clips)
+{
+	struct virtio_gpu_framebuffer *virtio_gpu_fb
+		= to_virtio_gpu_framebuffer(fb);
+
+	return virtio_gpu_surface_dirty(virtio_gpu_fb, clips, num_clips);
+}
+
+static const struct drm_framebuffer_funcs virtio_gpu_fb_funcs = {
+	.destroy = virtio_gpu_user_framebuffer_destroy,
+	.dirty = virtio_gpu_framebuffer_surface_dirty,
+};
+
+int
+virtio_gpu_framebuffer_init(struct drm_device *dev,
+			    struct virtio_gpu_framebuffer *vgfb,
+			    struct drm_mode_fb_cmd2 *mode_cmd,
+			    struct drm_gem_object *obj)
+{
+	int ret;
+	struct virtio_gpu_object *bo;
+	vgfb->obj = obj;
+
+	bo = gem_to_virtio_gpu_obj(obj);
+
+	ret = drm_framebuffer_init(dev, &vgfb->base, &virtio_gpu_fb_funcs);
+	if (ret) {
+		vgfb->obj = NULL;
+		return ret;
+	}
+	drm_helper_mode_fill_fb_struct(&vgfb->base, mode_cmd);
+
+	spin_lock_init(&vgfb->dirty_lock);
+	vgfb->x1 = vgfb->y1 = INT_MAX;
+	vgfb->x2 = vgfb->y2 = 0;
+	return 0;
+}
+
+static bool virtio_gpu_crtc_mode_fixup(struct drm_crtc *crtc,
+				       const struct drm_display_mode *mode,
+				       struct drm_display_mode *adjusted_mode)
+{
+	return true;
+}
+
+static void virtio_gpu_crtc_mode_set_nofb(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct virtio_gpu_device *vgdev = dev->dev_private;
+	struct virtio_gpu_output *output = drm_crtc_to_virtio_gpu_output(crtc);
+
+	virtio_gpu_cmd_set_scanout(vgdev, output->index, 0,
+				   crtc->mode.hdisplay,
+				   crtc->mode.vdisplay, 0, 0);
+}
+
+static void virtio_gpu_crtc_enable(struct drm_crtc *crtc)
+{
+}
+
+static void virtio_gpu_crtc_disable(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct virtio_gpu_device *vgdev = dev->dev_private;
+	struct virtio_gpu_output *output = drm_crtc_to_virtio_gpu_output(crtc);
+
+	virtio_gpu_cmd_set_scanout(vgdev, output->index, 0, 0, 0, 0, 0);
+}
+
+static int virtio_gpu_crtc_atomic_check(struct drm_crtc *crtc,
+					struct drm_crtc_state *state)
+{
+	return 0;
+}
+
+static const struct drm_crtc_helper_funcs virtio_gpu_crtc_helper_funcs = {
+	.enable        = virtio_gpu_crtc_enable,
+	.disable       = virtio_gpu_crtc_disable,
+	.mode_fixup    = virtio_gpu_crtc_mode_fixup,
+	.mode_set_nofb = virtio_gpu_crtc_mode_set_nofb,
+	.atomic_check  = virtio_gpu_crtc_atomic_check,
+};
+
+static bool virtio_gpu_enc_mode_fixup(struct drm_encoder *encoder,
+				      const struct drm_display_mode *mode,
+				      struct drm_display_mode *adjusted_mode)
+{
+	return true;
+}
+
+static void virtio_gpu_enc_mode_set(struct drm_encoder *encoder,
+				    struct drm_display_mode *mode,
+				    struct drm_display_mode *adjusted_mode)
+{
+}
+
+static void virtio_gpu_enc_enable(struct drm_encoder *encoder)
+{
+}
+
+static void virtio_gpu_enc_disable(struct drm_encoder *encoder)
+{
+}
+
+static int virtio_gpu_conn_get_modes(struct drm_connector *connector)
+{
+	struct virtio_gpu_output *output =
+		drm_connector_to_virtio_gpu_output(connector);
+	struct drm_display_mode *mode = NULL;
+	int count, width, height;
+
+	width  = le32_to_cpu(output->info.r.width);
+	height = le32_to_cpu(output->info.r.height);
+	count = drm_add_modes_noedid(connector, XRES_MAX, YRES_MAX);
+
+	if (width == 0 || height == 0) {
+		width = XRES_DEF;
+		height = YRES_DEF;
+		drm_set_preferred_mode(connector, XRES_DEF, YRES_DEF);
+	} else {
+		DRM_DEBUG("add mode: %dx%d\n", width, height);
+		mode = drm_cvt_mode(connector->dev, width, height, 60,
+				    false, false, false);
+		mode->type |= DRM_MODE_TYPE_PREFERRED;
+		drm_mode_probed_add(connector, mode);
+		count++;
+	}
+
+	return count;
+}
+
+static int virtio_gpu_conn_mode_valid(struct drm_connector *connector,
+				      struct drm_display_mode *mode)
+{
+	struct virtio_gpu_output *output =
+		drm_connector_to_virtio_gpu_output(connector);
+	int width, height;
+
+	width  = le32_to_cpu(output->info.r.width);
+	height = le32_to_cpu(output->info.r.height);
+
+	if (!(mode->type & DRM_MODE_TYPE_PREFERRED))
+		return MODE_OK;
+	if (mode->hdisplay == XRES_DEF && mode->vdisplay == YRES_DEF)
+		return MODE_OK;
+	if (mode->hdisplay <= width  && mode->hdisplay >= width - 16 &&
+	    mode->vdisplay <= height && mode->vdisplay >= height - 16)
+		return MODE_OK;
+
+	DRM_DEBUG("del mode: %dx%d\n", mode->hdisplay, mode->vdisplay);
+	return MODE_BAD;
+}
+
+static struct drm_encoder*
+virtio_gpu_best_encoder(struct drm_connector *connector)
+{
+	struct virtio_gpu_output *virtio_gpu_output =
+		drm_connector_to_virtio_gpu_output(connector);
+
+	return &virtio_gpu_output->enc;
+}
+
+static const struct drm_encoder_helper_funcs virtio_gpu_enc_helper_funcs = {
+	.mode_fixup = virtio_gpu_enc_mode_fixup,
+	.mode_set   = virtio_gpu_enc_mode_set,
+	.enable     = virtio_gpu_enc_enable,
+	.disable    = virtio_gpu_enc_disable,
+};
+
+static const struct drm_connector_helper_funcs virtio_gpu_conn_helper_funcs = {
+	.get_modes    = virtio_gpu_conn_get_modes,
+	.mode_valid   = virtio_gpu_conn_mode_valid,
+	.best_encoder = virtio_gpu_best_encoder,
+};
+
+static void virtio_gpu_conn_save(struct drm_connector *connector)
+{
+	DRM_DEBUG("\n");
+}
+
+static void virtio_gpu_conn_restore(struct drm_connector *connector)
+{
+	DRM_DEBUG("\n");
+}
+
+static enum drm_connector_status virtio_gpu_conn_detect(
+			struct drm_connector *connector,
+			bool force)
+{
+	struct virtio_gpu_output *output =
+		drm_connector_to_virtio_gpu_output(connector);
+
+	if (output->info.enabled)
+		return connector_status_connected;
+	else
+		return connector_status_disconnected;
+}
+
+static void virtio_gpu_conn_destroy(struct drm_connector *connector)
+{
+	struct virtio_gpu_output *virtio_gpu_output =
+		drm_connector_to_virtio_gpu_output(connector);
+
+	drm_connector_unregister(connector);
+	drm_connector_cleanup(connector);
+	kfree(virtio_gpu_output);
+}
+
+static const struct drm_connector_funcs virtio_gpu_connector_funcs = {
+	.dpms = drm_atomic_helper_connector_dpms,
+	.save = virtio_gpu_conn_save,
+	.restore = virtio_gpu_conn_restore,
+	.detect = virtio_gpu_conn_detect,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.destroy = virtio_gpu_conn_destroy,
+	.reset = drm_atomic_helper_connector_reset,
+	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+};
+
+static const struct drm_encoder_funcs virtio_gpu_enc_funcs = {
+	.destroy = drm_encoder_cleanup,
+};
+
+static int vgdev_output_init(struct virtio_gpu_device *vgdev, int index)
+{
+	struct drm_device *dev = vgdev->ddev;
+	struct virtio_gpu_output *output = vgdev->outputs + index;
+	struct drm_connector *connector = &output->conn;
+	struct drm_encoder *encoder = &output->enc;
+	struct drm_crtc *crtc = &output->crtc;
+	struct drm_plane *plane;
+
+	output->index = index;
+	if (index == 0) {
+		output->info.enabled = cpu_to_le32(true);
+		output->info.r.width = cpu_to_le32(XRES_DEF);
+		output->info.r.height = cpu_to_le32(YRES_DEF);
+	}
+
+	plane = virtio_gpu_plane_init(vgdev, index);
+	if (IS_ERR(plane))
+		return PTR_ERR(plane);
+	drm_crtc_init_with_planes(dev, crtc, plane, NULL,
+				  &virtio_gpu_crtc_funcs);
+	drm_mode_crtc_set_gamma_size(crtc, 256);
+	drm_crtc_helper_add(crtc, &virtio_gpu_crtc_helper_funcs);
+	plane->crtc = crtc;
+
+	drm_connector_init(dev, connector, &virtio_gpu_connector_funcs,
+			   DRM_MODE_CONNECTOR_VIRTUAL);
+	drm_connector_helper_add(connector, &virtio_gpu_conn_helper_funcs);
+
+	drm_encoder_init(dev, encoder, &virtio_gpu_enc_funcs,
+			 DRM_MODE_ENCODER_VIRTUAL);
+	drm_encoder_helper_add(encoder, &virtio_gpu_enc_helper_funcs);
+	encoder->possible_crtcs = 1 << index;
+
+	drm_mode_connector_attach_encoder(connector, encoder);
+	drm_connector_register(connector);
+	return 0;
+}
+
+static struct drm_framebuffer *
+virtio_gpu_user_framebuffer_create(struct drm_device *dev,
+				   struct drm_file *file_priv,
+				   struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	struct drm_gem_object *obj = NULL;
+	struct virtio_gpu_framebuffer *virtio_gpu_fb;
+	int ret;
+
+	/* lookup object associated with res handle */
+	obj = drm_gem_object_lookup(dev, file_priv, mode_cmd->handles[0]);
+	if (!obj)
+		return ERR_PTR(-EINVAL);
+
+	virtio_gpu_fb = kzalloc(sizeof(*virtio_gpu_fb), GFP_KERNEL);
+	if (virtio_gpu_fb == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	ret = virtio_gpu_framebuffer_init(dev, virtio_gpu_fb, mode_cmd, obj);
+	if (ret) {
+		kfree(virtio_gpu_fb);
+		if (obj)
+			drm_gem_object_unreference_unlocked(obj);
+		return NULL;
+	}
+
+	return &virtio_gpu_fb->base;
+}
+
+static const struct drm_mode_config_funcs virtio_gpu_mode_funcs = {
+	.fb_create = virtio_gpu_user_framebuffer_create,
+	.atomic_check = drm_atomic_helper_check,
+	.atomic_commit = drm_atomic_helper_commit,
+};
+
+int virtio_gpu_modeset_init(struct virtio_gpu_device *vgdev)
+{
+	int i;
+
+	drm_mode_config_init(vgdev->ddev);
+	vgdev->ddev->mode_config.funcs = (void *)&virtio_gpu_mode_funcs;
+
+	/* modes will be validated against the framebuffer size */
+	vgdev->ddev->mode_config.min_width = XRES_MIN;
+	vgdev->ddev->mode_config.min_height = YRES_MIN;
+	vgdev->ddev->mode_config.max_width = XRES_MAX;
+	vgdev->ddev->mode_config.max_height = YRES_MAX;
+
+	for (i = 0 ; i < vgdev->num_scanouts; ++i)
+		vgdev_output_init(vgdev, i);
+
+        drm_mode_config_reset(vgdev->ddev);
+	return 0;
+}
+
+void virtio_gpu_modeset_fini(struct virtio_gpu_device *vgdev)
+{
+	virtio_gpu_fbdev_fini(vgdev);
+	drm_mode_config_cleanup(vgdev->ddev);
+}
diff --git a/drivers/gpu/drm/virtio/virtgpu_drm_bus.c b/drivers/gpu/drm/virtio/virtgpu_drm_bus.c
new file mode 100644
index 000000000000..f4ec816e9468
--- /dev/null
+++ b/drivers/gpu/drm/virtio/virtgpu_drm_bus.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/pci.h>
+
+#include "virtgpu_drv.h"
+
+int drm_virtio_set_busid(struct drm_device *dev, struct drm_master *master)
+{
+	struct pci_dev *pdev = dev->pdev;
+
+	if (pdev) {
+		return drm_pci_set_busid(dev, master);
+	}
+	return 0;
+}
+
+int drm_virtio_init(struct drm_driver *driver, struct virtio_device *vdev)
+{
+	struct drm_device *dev;
+	int ret;
+
+	dev = drm_dev_alloc(driver, &vdev->dev);
+	if (!dev)
+		return -ENOMEM;
+	dev->virtdev = vdev;
+	vdev->priv = dev;
+
+	if (strcmp(vdev->dev.parent->bus->name, "pci") == 0) {
+		struct pci_dev *pdev = to_pci_dev(vdev->dev.parent);
+		bool vga = (pdev->class >> 8) == PCI_CLASS_DISPLAY_VGA;
+
+		if (vga) {
+			/*
+			 * Need to make sure we don't have two drivers
+			 * for the same hardware here.  Some day we
+			 * will simply kick out the firmware
+			 * (vesa/efi) framebuffer.
+			 *
+			 * Virtual hardware specs for virtio-vga are
+			 * not finalized yet, therefore we can't add
+			 * code for that yet.
+			 *
+			 * So ignore the device for the time being,
+			 * and suggest to the user use the device
+			 * variant without vga compatibility mode.
+			 */
+			DRM_ERROR("virtio-vga not (yet) supported\n");
+			DRM_ERROR("please use virtio-gpu-pci instead\n");
+			ret = -ENODEV;
+			goto err_free;
+		}
+		dev->pdev = pdev;
+	}
+
+	ret = drm_dev_register(dev, 0);
+	if (ret)
+		goto err_free;
+
+	DRM_INFO("Initialized %s %d.%d.%d %s on minor %d\n", driver->name,
+		 driver->major, driver->minor, driver->patchlevel,
+		 driver->date, dev->primary->index);
+
+	return 0;
+
+err_free:
+	drm_dev_unref(dev);
+	return ret;
+}
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c
new file mode 100644
index 000000000000..7d9610aaeff9
--- /dev/null
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * Authors:
+ *    Dave Airlie <airlied@redhat.com>
+ *    Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/module.h>
+#include <linux/console.h>
+#include <linux/pci.h>
+#include "drmP.h"
+#include "drm/drm.h"
+
+#include "virtgpu_drv.h"
+static struct drm_driver driver;
+
+static int virtio_gpu_modeset = -1;
+
+MODULE_PARM_DESC(modeset, "Disable/Enable modesetting");
+module_param_named(modeset, virtio_gpu_modeset, int, 0400);
+
+static int virtio_gpu_probe(struct virtio_device *vdev)
+{
+#ifdef CONFIG_VGA_CONSOLE
+	if (vgacon_text_force() && virtio_gpu_modeset == -1)
+		return -EINVAL;
+#endif
+
+	if (virtio_gpu_modeset == 0)
+		return -EINVAL;
+
+	return drm_virtio_init(&driver, vdev);
+}
+
+static void virtio_gpu_remove(struct virtio_device *vdev)
+{
+	struct drm_device *dev = vdev->priv;
+	drm_put_dev(dev);
+}
+
+static void virtio_gpu_config_changed(struct virtio_device *vdev)
+{
+	struct drm_device *dev = vdev->priv;
+	struct virtio_gpu_device *vgdev = dev->dev_private;
+
+	schedule_work(&vgdev->config_changed_work);
+}
+
+static struct virtio_device_id id_table[] = {
+	{ VIRTIO_ID_GPU, VIRTIO_DEV_ANY_ID },
+	{ 0 },
+};
+
+static unsigned int features[] = {
+};
+static struct virtio_driver virtio_gpu_driver = {
+	.feature_table = features,
+	.feature_table_size = ARRAY_SIZE(features),
+	.driver.name = KBUILD_MODNAME,
+	.driver.owner = THIS_MODULE,
+	.id_table = id_table,
+	.probe = virtio_gpu_probe,
+	.remove = virtio_gpu_remove,
+	.config_changed = virtio_gpu_config_changed
+};
+
+module_virtio_driver(virtio_gpu_driver);
+
+MODULE_DEVICE_TABLE(virtio, id_table);
+MODULE_DESCRIPTION("Virtio GPU driver");
+MODULE_LICENSE("GPL and additional rights");
+MODULE_AUTHOR("Dave Airlie <airlied@redhat.com>");
+MODULE_AUTHOR("Gerd Hoffmann <kraxel@redhat.com>");
+MODULE_AUTHOR("Alon Levy");
+
+static const struct file_operations virtio_gpu_driver_fops = {
+	.owner = THIS_MODULE,
+	.open = drm_open,
+	.mmap = virtio_gpu_mmap,
+	.poll = drm_poll,
+	.read = drm_read,
+	.unlocked_ioctl	= drm_ioctl,
+	.release = drm_release,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl = drm_compat_ioctl,
+#endif
+	.llseek = noop_llseek,
+};
+
+
+static struct drm_driver driver = {
+	.driver_features = DRIVER_MODESET | DRIVER_GEM,
+	.set_busid = drm_virtio_set_busid,
+	.load = virtio_gpu_driver_load,
+	.unload = virtio_gpu_driver_unload,
+
+	.dumb_create = virtio_gpu_mode_dumb_create,
+	.dumb_map_offset = virtio_gpu_mode_dumb_mmap,
+	.dumb_destroy = virtio_gpu_mode_dumb_destroy,
+
+#if defined(CONFIG_DEBUG_FS)
+	.debugfs_init = virtio_gpu_debugfs_init,
+	.debugfs_cleanup = virtio_gpu_debugfs_takedown,
+#endif
+
+	.gem_free_object = virtio_gpu_gem_free_object,
+	.fops = &virtio_gpu_driver_fops,
+
+	.name = DRIVER_NAME,
+	.desc = DRIVER_DESC,
+	.date = DRIVER_DATE,
+	.major = DRIVER_MAJOR,
+	.minor = DRIVER_MINOR,
+	.patchlevel = DRIVER_PATCHLEVEL,
+};
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h
new file mode 100644
index 000000000000..e5a2c092460b
--- /dev/null
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
@@ -0,0 +1,350 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef VIRTIO_DRV_H
+#define VIRTIO_DRV_H
+
+#include <linux/virtio.h>
+#include <linux/virtio_ids.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_gpu.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_crtc_helper.h>
+#include <ttm/ttm_bo_api.h>
+#include <ttm/ttm_bo_driver.h>
+#include <ttm/ttm_placement.h>
+#include <ttm/ttm_module.h>
+
+#define DRIVER_NAME "virtio_gpu"
+#define DRIVER_DESC "virtio GPU"
+#define DRIVER_DATE "0"
+
+#define DRIVER_MAJOR 0
+#define DRIVER_MINOR 0
+#define DRIVER_PATCHLEVEL 1
+
+/* virtgpu_drm_bus.c */
+int drm_virtio_set_busid(struct drm_device *dev, struct drm_master *master);
+int drm_virtio_init(struct drm_driver *driver, struct virtio_device *vdev);
+
+struct virtio_gpu_object {
+	struct drm_gem_object gem_base;
+	uint32_t hw_res_handle;
+
+	struct sg_table *pages;
+	void *vmap;
+	bool dumb;
+	struct ttm_place                placement_code;
+	struct ttm_placement		placement;
+	struct ttm_buffer_object	tbo;
+	struct ttm_bo_kmap_obj		kmap;
+};
+#define gem_to_virtio_gpu_obj(gobj) \
+	container_of((gobj), struct virtio_gpu_object, gem_base)
+
+struct virtio_gpu_vbuffer;
+struct virtio_gpu_device;
+
+typedef void (*virtio_gpu_resp_cb)(struct virtio_gpu_device *vgdev,
+				   struct virtio_gpu_vbuffer *vbuf);
+
+struct virtio_gpu_fence_driver {
+	atomic64_t       last_seq;
+	uint64_t         sync_seq;
+	struct list_head fences;
+	spinlock_t       lock;
+};
+
+struct virtio_gpu_fence {
+	struct fence f;
+	struct virtio_gpu_fence_driver *drv;
+	struct list_head node;
+	uint64_t seq;
+};
+#define to_virtio_fence(x) \
+	container_of(x, struct virtio_gpu_fence, f)
+
+struct virtio_gpu_vbuffer {
+	char *buf;
+	int size;
+
+	void *data_buf;
+	uint32_t data_size;
+
+	char *resp_buf;
+	int resp_size;
+
+	virtio_gpu_resp_cb resp_cb;
+
+	struct list_head list;
+};
+
+struct virtio_gpu_output {
+	int index;
+	struct drm_crtc crtc;
+	struct drm_connector conn;
+	struct drm_encoder enc;
+	struct virtio_gpu_display_one info;
+	struct virtio_gpu_update_cursor cursor;
+	int cur_x;
+	int cur_y;
+};
+#define drm_crtc_to_virtio_gpu_output(x) \
+	container_of(x, struct virtio_gpu_output, crtc)
+#define drm_connector_to_virtio_gpu_output(x) \
+	container_of(x, struct virtio_gpu_output, conn)
+#define drm_encoder_to_virtio_gpu_output(x) \
+	container_of(x, struct virtio_gpu_output, enc)
+
+struct virtio_gpu_framebuffer {
+	struct drm_framebuffer base;
+	struct drm_gem_object *obj;
+	int x1, y1, x2, y2; /* dirty rect */
+	spinlock_t dirty_lock;
+	uint32_t hw_res_handle;
+};
+#define to_virtio_gpu_framebuffer(x) \
+	container_of(x, struct virtio_gpu_framebuffer, base)
+
+struct virtio_gpu_mman {
+	struct ttm_bo_global_ref        bo_global_ref;
+	struct drm_global_reference	mem_global_ref;
+	bool				mem_global_referenced;
+	struct ttm_bo_device		bdev;
+};
+
+struct virtio_gpu_fbdev;
+
+struct virtio_gpu_queue {
+	struct virtqueue *vq;
+	spinlock_t qlock;
+	wait_queue_head_t ack_queue;
+	struct work_struct dequeue_work;
+};
+
+struct virtio_gpu_device {
+	struct device *dev;
+	struct drm_device *ddev;
+
+	struct virtio_device *vdev;
+
+	struct virtio_gpu_mman mman;
+
+	/* pointer to fbdev info structure */
+	struct virtio_gpu_fbdev *vgfbdev;
+	struct virtio_gpu_output outputs[VIRTIO_GPU_MAX_SCANOUTS];
+	uint32_t num_scanouts;
+
+	struct virtio_gpu_queue ctrlq;
+	struct virtio_gpu_queue cursorq;
+	struct list_head free_vbufs;
+	void *vbufs;
+	bool vqs_ready;
+
+	struct idr	resource_idr;
+	spinlock_t resource_idr_lock;
+
+	wait_queue_head_t resp_wq;
+	/* current display info */
+	spinlock_t display_info_lock;
+
+	struct virtio_gpu_fence_driver fence_drv;
+
+	struct idr	ctx_id_idr;
+	spinlock_t ctx_id_idr_lock;
+
+	struct work_struct config_changed_work;
+};
+
+struct virtio_gpu_fpriv {
+	uint32_t ctx_id;
+};
+
+/* virtio_ioctl.c */
+#define DRM_VIRTIO_NUM_IOCTLS 10
+extern struct drm_ioctl_desc virtio_gpu_ioctls[DRM_VIRTIO_NUM_IOCTLS];
+
+/* virtio_kms.c */
+int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags);
+int virtio_gpu_driver_unload(struct drm_device *dev);
+
+/* virtio_gem.c */
+void virtio_gpu_gem_free_object(struct drm_gem_object *gem_obj);
+int virtio_gpu_gem_init(struct virtio_gpu_device *vgdev);
+void virtio_gpu_gem_fini(struct virtio_gpu_device *vgdev);
+int virtio_gpu_gem_create(struct drm_file *file,
+			  struct drm_device *dev,
+			  uint64_t size,
+			  struct drm_gem_object **obj_p,
+			  uint32_t *handle_p);
+struct virtio_gpu_object *virtio_gpu_alloc_object(struct drm_device *dev,
+						  size_t size, bool kernel,
+						  bool pinned);
+int virtio_gpu_mode_dumb_create(struct drm_file *file_priv,
+				struct drm_device *dev,
+				struct drm_mode_create_dumb *args);
+int virtio_gpu_mode_dumb_destroy(struct drm_file *file_priv,
+				 struct drm_device *dev,
+				 uint32_t handle);
+int virtio_gpu_mode_dumb_mmap(struct drm_file *file_priv,
+			      struct drm_device *dev,
+			      uint32_t handle, uint64_t *offset_p);
+
+/* virtio_fb */
+#define VIRTIO_GPUFB_CONN_LIMIT 1
+int virtio_gpu_fbdev_init(struct virtio_gpu_device *vgdev);
+void virtio_gpu_fbdev_fini(struct virtio_gpu_device *vgdev);
+int virtio_gpu_surface_dirty(struct virtio_gpu_framebuffer *qfb,
+			     struct drm_clip_rect *clips,
+			     unsigned num_clips);
+/* virtio vg */
+int virtio_gpu_alloc_vbufs(struct virtio_gpu_device *vgdev);
+void virtio_gpu_free_vbufs(struct virtio_gpu_device *vgdev);
+void virtio_gpu_resource_id_get(struct virtio_gpu_device *vgdev,
+			       uint32_t *resid);
+void virtio_gpu_resource_id_put(struct virtio_gpu_device *vgdev, uint32_t id);
+void virtio_gpu_cmd_create_resource(struct virtio_gpu_device *vgdev,
+				    uint32_t resource_id,
+				    uint32_t format,
+				    uint32_t width,
+				    uint32_t height);
+void virtio_gpu_cmd_unref_resource(struct virtio_gpu_device *vgdev,
+				   uint32_t resource_id);
+void virtio_gpu_cmd_transfer_to_host_2d(struct virtio_gpu_device *vgdev,
+					uint32_t resource_id, uint64_t offset,
+					__le32 width, __le32 height,
+					__le32 x, __le32 y,
+					struct virtio_gpu_fence **fence);
+void virtio_gpu_cmd_resource_flush(struct virtio_gpu_device *vgdev,
+				   uint32_t resource_id,
+				   uint32_t x, uint32_t y,
+				   uint32_t width, uint32_t height);
+void virtio_gpu_cmd_set_scanout(struct virtio_gpu_device *vgdev,
+				uint32_t scanout_id, uint32_t resource_id,
+				uint32_t width, uint32_t height,
+				uint32_t x, uint32_t y);
+int virtio_gpu_object_attach(struct virtio_gpu_device *vgdev,
+			     struct virtio_gpu_object *obj,
+			     uint32_t resource_id,
+			     struct virtio_gpu_fence **fence);
+int virtio_gpu_attach_status_page(struct virtio_gpu_device *vgdev);
+int virtio_gpu_detach_status_page(struct virtio_gpu_device *vgdev);
+void virtio_gpu_cursor_ping(struct virtio_gpu_device *vgdev,
+			    struct virtio_gpu_output *output);
+int virtio_gpu_cmd_get_display_info(struct virtio_gpu_device *vgdev);
+void virtio_gpu_cmd_resource_inval_backing(struct virtio_gpu_device *vgdev,
+					   uint32_t resource_id);
+void virtio_gpu_ctrl_ack(struct virtqueue *vq);
+void virtio_gpu_cursor_ack(struct virtqueue *vq);
+void virtio_gpu_dequeue_ctrl_func(struct work_struct *work);
+void virtio_gpu_dequeue_cursor_func(struct work_struct *work);
+
+/* virtio_gpu_display.c */
+int virtio_gpu_framebuffer_init(struct drm_device *dev,
+				struct virtio_gpu_framebuffer *vgfb,
+				struct drm_mode_fb_cmd2 *mode_cmd,
+				struct drm_gem_object *obj);
+int virtio_gpu_modeset_init(struct virtio_gpu_device *vgdev);
+void virtio_gpu_modeset_fini(struct virtio_gpu_device *vgdev);
+
+/* virtio_gpu_plane.c */
+struct drm_plane *virtio_gpu_plane_init(struct virtio_gpu_device *vgdev,
+					int index);
+
+/* virtio_gpu_ttm.c */
+int virtio_gpu_ttm_init(struct virtio_gpu_device *vgdev);
+void virtio_gpu_ttm_fini(struct virtio_gpu_device *vgdev);
+int virtio_gpu_mmap(struct file *filp, struct vm_area_struct *vma);
+
+/* virtio_gpu_fence.c */
+int virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev,
+			  struct virtio_gpu_ctrl_hdr *cmd_hdr,
+			  struct virtio_gpu_fence **fence);
+void virtio_gpu_fence_event_process(struct virtio_gpu_device *vdev,
+				    u64 last_seq);
+
+/* virtio_gpu_object */
+int virtio_gpu_object_create(struct virtio_gpu_device *vgdev,
+			     unsigned long size, bool kernel, bool pinned,
+			     struct virtio_gpu_object **bo_ptr);
+int virtio_gpu_object_kmap(struct virtio_gpu_object *bo, void **ptr);
+int virtio_gpu_object_get_sg_table(struct virtio_gpu_device *qdev,
+				   struct virtio_gpu_object *bo);
+void virtio_gpu_object_free_sg_table(struct virtio_gpu_object *bo);
+int virtio_gpu_object_wait(struct virtio_gpu_object *bo, bool no_wait);
+
+static inline struct virtio_gpu_object*
+virtio_gpu_object_ref(struct virtio_gpu_object *bo)
+{
+	ttm_bo_reference(&bo->tbo);
+	return bo;
+}
+
+static inline void virtio_gpu_object_unref(struct virtio_gpu_object **bo)
+{
+	struct ttm_buffer_object *tbo;
+
+	if ((*bo) == NULL)
+		return;
+	tbo = &((*bo)->tbo);
+	ttm_bo_unref(&tbo);
+	if (tbo == NULL)
+		*bo = NULL;
+}
+
+static inline u64 virtio_gpu_object_mmap_offset(struct virtio_gpu_object *bo)
+{
+	return drm_vma_node_offset_addr(&bo->tbo.vma_node);
+}
+
+static inline int virtio_gpu_object_reserve(struct virtio_gpu_object *bo,
+					 bool no_wait)
+{
+	int r;
+
+	r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, NULL);
+	if (unlikely(r != 0)) {
+		if (r != -ERESTARTSYS) {
+			struct virtio_gpu_device *qdev =
+				bo->gem_base.dev->dev_private;
+			dev_err(qdev->dev, "%p reserve failed\n", bo);
+		}
+		return r;
+	}
+	return 0;
+}
+
+static inline void virtio_gpu_object_unreserve(struct virtio_gpu_object *bo)
+{
+	ttm_bo_unreserve(&bo->tbo);
+}
+
+/* virgl debufs */
+int virtio_gpu_debugfs_init(struct drm_minor *minor);
+void virtio_gpu_debugfs_takedown(struct drm_minor *minor);
+
+#endif
diff --git a/drivers/gpu/drm/virtio/virtgpu_fb.c b/drivers/gpu/drm/virtio/virtgpu_fb.c
new file mode 100644
index 000000000000..25bf333d5a45
--- /dev/null
+++ b/drivers/gpu/drm/virtio/virtgpu_fb.c
@@ -0,0 +1,431 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <drm/drmP.h>
+#include <drm/drm_fb_helper.h>
+#include "virtgpu_drv.h"
+
+#define VIRTIO_GPU_FBCON_POLL_PERIOD (HZ / 60)
+
+struct virtio_gpu_fbdev {
+	struct drm_fb_helper           helper;
+	struct virtio_gpu_framebuffer  vgfb;
+	struct list_head	       fbdev_list;
+	struct virtio_gpu_device       *vgdev;
+	struct delayed_work            work;
+};
+
+static int virtio_gpu_dirty_update(struct virtio_gpu_framebuffer *fb,
+				   bool store, int x, int y,
+				   int width, int height)
+{
+	struct drm_device *dev = fb->base.dev;
+	struct virtio_gpu_device *vgdev = dev->dev_private;
+	bool store_for_later = false;
+	int bpp = fb->base.bits_per_pixel / 8;
+	int x2, y2;
+	unsigned long flags;
+	struct virtio_gpu_object *obj = gem_to_virtio_gpu_obj(fb->obj);
+
+	if ((width <= 0) ||
+	    (x + width > fb->base.width) ||
+	    (y + height > fb->base.height)) {
+		DRM_DEBUG("values out of range %dx%d+%d+%d, fb %dx%d\n",
+			  width, height, x, y,
+			  fb->base.width, fb->base.height);
+		return -EINVAL;
+	}
+
+	/*
+	 * Can be called with pretty much any context (console output
+	 * path).  If we are in atomic just store the dirty rect info
+	 * to send out the update later.
+	 *
+	 * Can't test inside spin lock.
+	 */
+	if (in_atomic() || store)
+		store_for_later = true;
+
+	x2 = x + width - 1;
+	y2 = y + height - 1;
+
+	spin_lock_irqsave(&fb->dirty_lock, flags);
+
+	if (fb->y1 < y)
+		y = fb->y1;
+	if (fb->y2 > y2)
+		y2 = fb->y2;
+	if (fb->x1 < x)
+		x = fb->x1;
+	if (fb->x2 > x2)
+		x2 = fb->x2;
+
+	if (store_for_later) {
+		fb->x1 = x;
+		fb->x2 = x2;
+		fb->y1 = y;
+		fb->y2 = y2;
+		spin_unlock_irqrestore(&fb->dirty_lock, flags);
+		return 0;
+	}
+
+	fb->x1 = fb->y1 = INT_MAX;
+	fb->x2 = fb->y2 = 0;
+
+	spin_unlock_irqrestore(&fb->dirty_lock, flags);
+
+	{
+		uint32_t offset;
+		uint32_t w = x2 - x + 1;
+		uint32_t h = y2 - y + 1;
+
+		offset = (y * fb->base.pitches[0]) + x * bpp;
+
+		virtio_gpu_cmd_transfer_to_host_2d(vgdev, obj->hw_res_handle,
+						   offset,
+						   cpu_to_le32(w),
+						   cpu_to_le32(h),
+						   cpu_to_le32(x),
+						   cpu_to_le32(y),
+						   NULL);
+
+	}
+	virtio_gpu_cmd_resource_flush(vgdev, obj->hw_res_handle,
+				      x, y, x2 - x + 1, y2 - y + 1);
+	return 0;
+}
+
+int virtio_gpu_surface_dirty(struct virtio_gpu_framebuffer *vgfb,
+			     struct drm_clip_rect *clips,
+			     unsigned num_clips)
+{
+	struct virtio_gpu_device *vgdev = vgfb->base.dev->dev_private;
+	struct virtio_gpu_object *obj = gem_to_virtio_gpu_obj(vgfb->obj);
+	struct drm_clip_rect norect;
+	struct drm_clip_rect *clips_ptr;
+	int left, right, top, bottom;
+	int i;
+	int inc = 1;
+	if (!num_clips) {
+		num_clips = 1;
+		clips = &norect;
+		norect.x1 = norect.y1 = 0;
+		norect.x2 = vgfb->base.width;
+		norect.y2 = vgfb->base.height;
+	}
+	left = clips->x1;
+	right = clips->x2;
+	top = clips->y1;
+	bottom = clips->y2;
+
+	/* skip the first clip rect */
+	for (i = 1, clips_ptr = clips + inc;
+	     i < num_clips; i++, clips_ptr += inc) {
+		left = min_t(int, left, (int)clips_ptr->x1);
+		right = max_t(int, right, (int)clips_ptr->x2);
+		top = min_t(int, top, (int)clips_ptr->y1);
+		bottom = max_t(int, bottom, (int)clips_ptr->y2);
+	}
+
+	if (obj->dumb)
+		return virtio_gpu_dirty_update(vgfb, false, left, top,
+					       right - left, bottom - top);
+
+	virtio_gpu_cmd_resource_flush(vgdev, obj->hw_res_handle,
+				      left, top, right - left, bottom - top);
+	return 0;
+}
+
+static void virtio_gpu_fb_dirty_work(struct work_struct *work)
+{
+	struct delayed_work *delayed_work = to_delayed_work(work);
+	struct virtio_gpu_fbdev *vfbdev =
+		container_of(delayed_work, struct virtio_gpu_fbdev, work);
+	struct virtio_gpu_framebuffer *vgfb = &vfbdev->vgfb;
+
+	virtio_gpu_dirty_update(&vfbdev->vgfb, false, vgfb->x1, vgfb->y1,
+				vgfb->x2 - vgfb->x1, vgfb->y2 - vgfb->y1);
+}
+
+static void virtio_gpu_3d_fillrect(struct fb_info *info,
+				   const struct fb_fillrect *rect)
+{
+	struct virtio_gpu_fbdev *vfbdev = info->par;
+	sys_fillrect(info, rect);
+	virtio_gpu_dirty_update(&vfbdev->vgfb, true, rect->dx, rect->dy,
+			     rect->width, rect->height);
+	schedule_delayed_work(&vfbdev->work, VIRTIO_GPU_FBCON_POLL_PERIOD);
+}
+
+static void virtio_gpu_3d_copyarea(struct fb_info *info,
+				   const struct fb_copyarea *area)
+{
+	struct virtio_gpu_fbdev *vfbdev = info->par;
+	sys_copyarea(info, area);
+	virtio_gpu_dirty_update(&vfbdev->vgfb, true, area->dx, area->dy,
+			   area->width, area->height);
+	schedule_delayed_work(&vfbdev->work, VIRTIO_GPU_FBCON_POLL_PERIOD);
+}
+
+static void virtio_gpu_3d_imageblit(struct fb_info *info,
+				    const struct fb_image *image)
+{
+	struct virtio_gpu_fbdev *vfbdev = info->par;
+	sys_imageblit(info, image);
+	virtio_gpu_dirty_update(&vfbdev->vgfb, true, image->dx, image->dy,
+			     image->width, image->height);
+	schedule_delayed_work(&vfbdev->work, VIRTIO_GPU_FBCON_POLL_PERIOD);
+}
+
+static struct fb_ops virtio_gpufb_ops = {
+	.owner = THIS_MODULE,
+	.fb_check_var = drm_fb_helper_check_var,
+	.fb_set_par = drm_fb_helper_set_par, /* TODO: copy vmwgfx */
+	.fb_fillrect = virtio_gpu_3d_fillrect,
+	.fb_copyarea = virtio_gpu_3d_copyarea,
+	.fb_imageblit = virtio_gpu_3d_imageblit,
+	.fb_pan_display = drm_fb_helper_pan_display,
+	.fb_blank = drm_fb_helper_blank,
+	.fb_setcmap = drm_fb_helper_setcmap,
+	.fb_debug_enter = drm_fb_helper_debug_enter,
+	.fb_debug_leave = drm_fb_helper_debug_leave,
+};
+
+static int virtio_gpu_vmap_fb(struct virtio_gpu_device *vgdev,
+			      struct virtio_gpu_object *obj)
+{
+	return virtio_gpu_object_kmap(obj, NULL);
+}
+
+static int virtio_gpufb_create(struct drm_fb_helper *helper,
+			       struct drm_fb_helper_surface_size *sizes)
+{
+	struct virtio_gpu_fbdev *vfbdev =
+		container_of(helper, struct virtio_gpu_fbdev, helper);
+	struct drm_device *dev = helper->dev;
+	struct virtio_gpu_device *vgdev = dev->dev_private;
+	struct fb_info *info;
+	struct drm_framebuffer *fb;
+	struct drm_mode_fb_cmd2 mode_cmd = {};
+	struct virtio_gpu_object *obj;
+	struct device *device = vgdev->dev;
+	uint32_t resid, format, size;
+	int ret;
+
+	mode_cmd.width = sizes->surface_width;
+	mode_cmd.height = sizes->surface_height;
+	mode_cmd.pitches[0] = mode_cmd.width * 4;
+	mode_cmd.pixel_format = drm_mode_legacy_fb_format(32, 24);
+
+	switch (mode_cmd.pixel_format) {
+#ifdef __BIG_ENDIAN
+	case DRM_FORMAT_XRGB8888:
+		format = VIRTIO_GPU_FORMAT_X8R8G8B8_UNORM;
+		break;
+	case DRM_FORMAT_ARGB8888:
+		format = VIRTIO_GPU_FORMAT_A8R8G8B8_UNORM;
+		break;
+	case DRM_FORMAT_BGRX8888:
+		format = VIRTIO_GPU_FORMAT_B8G8R8X8_UNORM;
+		break;
+	case DRM_FORMAT_BGRA8888:
+		format = VIRTIO_GPU_FORMAT_B8G8R8A8_UNORM;
+		break;
+	case DRM_FORMAT_RGBX8888:
+		format = VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM;
+		break;
+	case DRM_FORMAT_RGBA8888:
+		format = VIRTIO_GPU_FORMAT_R8G8B8A8_UNORM;
+		break;
+	case DRM_FORMAT_XBGR8888:
+		format = VIRTIO_GPU_FORMAT_X8B8G8R8_UNORM;
+		break;
+	case DRM_FORMAT_ABGR8888:
+		format = VIRTIO_GPU_FORMAT_A8B8G8R8_UNORM;
+		break;
+#else
+	case DRM_FORMAT_XRGB8888:
+		format = VIRTIO_GPU_FORMAT_B8G8R8X8_UNORM;
+		break;
+	case DRM_FORMAT_ARGB8888:
+		format = VIRTIO_GPU_FORMAT_B8G8R8A8_UNORM;
+		break;
+	case DRM_FORMAT_BGRX8888:
+		format = VIRTIO_GPU_FORMAT_X8R8G8B8_UNORM;
+		break;
+	case DRM_FORMAT_BGRA8888:
+		format = VIRTIO_GPU_FORMAT_A8R8G8B8_UNORM;
+		break;
+	case DRM_FORMAT_RGBX8888:
+		format = VIRTIO_GPU_FORMAT_X8B8G8R8_UNORM;
+		break;
+	case DRM_FORMAT_RGBA8888:
+		format = VIRTIO_GPU_FORMAT_A8B8G8R8_UNORM;
+		break;
+	case DRM_FORMAT_XBGR8888:
+		format = VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM;
+		break;
+	case DRM_FORMAT_ABGR8888:
+		format = VIRTIO_GPU_FORMAT_R8G8B8A8_UNORM;
+		break;
+#endif
+	default:
+		DRM_ERROR("failed to find virtio gpu format for %d\n",
+			  mode_cmd.pixel_format);
+		return -EINVAL;
+	}
+
+	size = mode_cmd.pitches[0] * mode_cmd.height;
+	obj = virtio_gpu_alloc_object(dev, size, false, true);
+	if (!obj)
+		return -ENOMEM;
+
+	virtio_gpu_resource_id_get(vgdev, &resid);
+	virtio_gpu_cmd_create_resource(vgdev, resid, format,
+				       mode_cmd.width, mode_cmd.height);
+
+	ret = virtio_gpu_vmap_fb(vgdev, obj);
+	if (ret) {
+		DRM_ERROR("failed to vmap fb %d\n", ret);
+		goto err_obj_vmap;
+	}
+
+	/* attach the object to the resource */
+	ret = virtio_gpu_object_attach(vgdev, obj, resid, NULL);
+	if (ret)
+		goto err_obj_attach;
+
+	info = framebuffer_alloc(0, device);
+	if (!info) {
+		ret = -ENOMEM;
+		goto err_fb_alloc;
+	}
+
+	ret = fb_alloc_cmap(&info->cmap, 256, 0);
+	if (ret) {
+		ret = -ENOMEM;
+		goto err_fb_alloc_cmap;
+	}
+
+	info->par = helper;
+
+	ret = virtio_gpu_framebuffer_init(dev, &vfbdev->vgfb,
+					  &mode_cmd, &obj->gem_base);
+	if (ret)
+		goto err_fb_init;
+
+	fb = &vfbdev->vgfb.base;
+
+	vfbdev->helper.fb = fb;
+	vfbdev->helper.fbdev = info;
+
+	strcpy(info->fix.id, "virtiodrmfb");
+	info->flags = FBINFO_DEFAULT;
+	info->fbops = &virtio_gpufb_ops;
+	info->pixmap.flags = FB_PIXMAP_SYSTEM;
+
+	info->screen_base = obj->vmap;
+	info->screen_size = obj->gem_base.size;
+	drm_fb_helper_fill_fix(info, fb->pitches[0], fb->depth);
+	drm_fb_helper_fill_var(info, &vfbdev->helper,
+			       sizes->fb_width, sizes->fb_height);
+
+	info->fix.mmio_start = 0;
+	info->fix.mmio_len = 0;
+	return 0;
+
+err_fb_init:
+	fb_dealloc_cmap(&info->cmap);
+err_fb_alloc_cmap:
+	framebuffer_release(info);
+err_fb_alloc:
+	virtio_gpu_cmd_resource_inval_backing(vgdev, resid);
+err_obj_attach:
+err_obj_vmap:
+	virtio_gpu_gem_free_object(&obj->gem_base);
+	return ret;
+}
+
+static int virtio_gpu_fbdev_destroy(struct drm_device *dev,
+				    struct virtio_gpu_fbdev *vgfbdev)
+{
+	struct fb_info *info;
+	struct virtio_gpu_framebuffer *vgfb = &vgfbdev->vgfb;
+
+	if (vgfbdev->helper.fbdev) {
+		info = vgfbdev->helper.fbdev;
+
+		unregister_framebuffer(info);
+		framebuffer_release(info);
+	}
+	if (vgfb->obj)
+		vgfb->obj = NULL;
+	drm_fb_helper_fini(&vgfbdev->helper);
+	drm_framebuffer_cleanup(&vgfb->base);
+
+	return 0;
+}
+static struct drm_fb_helper_funcs virtio_gpu_fb_helper_funcs = {
+	.fb_probe = virtio_gpufb_create,
+};
+
+int virtio_gpu_fbdev_init(struct virtio_gpu_device *vgdev)
+{
+	struct virtio_gpu_fbdev *vgfbdev;
+	int bpp_sel = 32; /* TODO: parameter from somewhere? */
+	int ret;
+
+	vgfbdev = kzalloc(sizeof(struct virtio_gpu_fbdev), GFP_KERNEL);
+	if (!vgfbdev)
+		return -ENOMEM;
+
+	vgfbdev->vgdev = vgdev;
+	vgdev->vgfbdev = vgfbdev;
+	INIT_DELAYED_WORK(&vgfbdev->work, virtio_gpu_fb_dirty_work);
+
+	drm_fb_helper_prepare(vgdev->ddev, &vgfbdev->helper,
+			      &virtio_gpu_fb_helper_funcs);
+	ret = drm_fb_helper_init(vgdev->ddev, &vgfbdev->helper,
+				 vgdev->num_scanouts,
+				 VIRTIO_GPUFB_CONN_LIMIT);
+	if (ret) {
+		kfree(vgfbdev);
+		return ret;
+	}
+
+	drm_fb_helper_single_add_all_connectors(&vgfbdev->helper);
+	drm_fb_helper_initial_config(&vgfbdev->helper, bpp_sel);
+	return 0;
+}
+
+void virtio_gpu_fbdev_fini(struct virtio_gpu_device *vgdev)
+{
+	if (!vgdev->vgfbdev)
+		return;
+
+	virtio_gpu_fbdev_destroy(vgdev->ddev, vgdev->vgfbdev);
+	kfree(vgdev->vgfbdev);
+	vgdev->vgfbdev = NULL;
+}
diff --git a/drivers/gpu/drm/virtio/virtgpu_fence.c b/drivers/gpu/drm/virtio/virtgpu_fence.c
new file mode 100644
index 000000000000..1da632631dac
--- /dev/null
+++ b/drivers/gpu/drm/virtio/virtgpu_fence.c
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <drm/drmP.h>
+#include "virtgpu_drv.h"
+
+static const char *virtio_get_driver_name(struct fence *f)
+{
+	return "virtio_gpu";
+}
+
+static const char *virtio_get_timeline_name(struct fence *f)
+{
+	return "controlq";
+}
+
+static bool virtio_enable_signaling(struct fence *f)
+{
+	return true;
+}
+
+static bool virtio_signaled(struct fence *f)
+{
+	struct virtio_gpu_fence *fence = to_virtio_fence(f);
+
+	if (atomic64_read(&fence->drv->last_seq) >= fence->seq)
+		return true;
+	return false;
+}
+
+static void virtio_fence_value_str(struct fence *f, char *str, int size)
+{
+	struct virtio_gpu_fence *fence = to_virtio_fence(f);
+
+	snprintf(str, size, "%llu", fence->seq);
+}
+
+static void virtio_timeline_value_str(struct fence *f, char *str, int size)
+{
+	struct virtio_gpu_fence *fence = to_virtio_fence(f);
+
+	snprintf(str, size, "%lu", atomic64_read(&fence->drv->last_seq));
+}
+
+static const struct fence_ops virtio_fence_ops = {
+	.get_driver_name     = virtio_get_driver_name,
+	.get_timeline_name   = virtio_get_timeline_name,
+	.enable_signaling    = virtio_enable_signaling,
+	.signaled            = virtio_signaled,
+	.wait                = fence_default_wait,
+	.fence_value_str     = virtio_fence_value_str,
+	.timeline_value_str  = virtio_timeline_value_str,
+};
+
+int virtio_gpu_fence_emit(struct virtio_gpu_device *vgdev,
+			  struct virtio_gpu_ctrl_hdr *cmd_hdr,
+			  struct virtio_gpu_fence **fence)
+{
+	struct virtio_gpu_fence_driver *drv = &vgdev->fence_drv;
+	unsigned long irq_flags;
+
+	*fence = kmalloc(sizeof(struct virtio_gpu_fence), GFP_KERNEL);
+	if ((*fence) == NULL)
+		return -ENOMEM;
+
+	spin_lock_irqsave(&drv->lock, irq_flags);
+	(*fence)->drv = drv;
+	(*fence)->seq = ++drv->sync_seq;
+	fence_init(&(*fence)->f, &virtio_fence_ops, &drv->lock,
+		   0, (*fence)->seq);
+	fence_get(&(*fence)->f);
+	list_add_tail(&(*fence)->node, &drv->fences);
+	spin_unlock_irqrestore(&drv->lock, irq_flags);
+
+	cmd_hdr->flags |= cpu_to_le32(VIRTIO_GPU_FLAG_FENCE);
+	cmd_hdr->fence_id = cpu_to_le64((*fence)->seq);
+	return 0;
+}
+
+void virtio_gpu_fence_event_process(struct virtio_gpu_device *vgdev,
+				    u64 last_seq)
+{
+	struct virtio_gpu_fence_driver *drv = &vgdev->fence_drv;
+	struct virtio_gpu_fence *fence, *tmp;
+	unsigned long irq_flags;
+
+	spin_lock_irqsave(&drv->lock, irq_flags);
+	atomic64_set(&vgdev->fence_drv.last_seq, last_seq);
+	list_for_each_entry_safe(fence, tmp, &drv->fences, node) {
+		if (last_seq < fence->seq)
+			continue;
+		fence_signal_locked(&fence->f);
+		list_del(&fence->node);
+		fence_put(&fence->f);
+	}
+	spin_unlock_irqrestore(&drv->lock, irq_flags);
+}
diff --git a/drivers/gpu/drm/virtio/virtgpu_gem.c b/drivers/gpu/drm/virtio/virtgpu_gem.c
new file mode 100644
index 000000000000..cfa0d27150bd
--- /dev/null
+++ b/drivers/gpu/drm/virtio/virtgpu_gem.c
@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <drm/drmP.h>
+#include "virtgpu_drv.h"
+
+void virtio_gpu_gem_free_object(struct drm_gem_object *gem_obj)
+{
+	struct virtio_gpu_object *obj = gem_to_virtio_gpu_obj(gem_obj);
+
+	if (obj)
+		virtio_gpu_object_unref(&obj);
+}
+
+struct virtio_gpu_object *virtio_gpu_alloc_object(struct drm_device *dev,
+						  size_t size, bool kernel,
+						  bool pinned)
+{
+	struct virtio_gpu_device *vgdev = dev->dev_private;
+	struct virtio_gpu_object *obj;
+	int ret;
+
+	ret = virtio_gpu_object_create(vgdev, size, kernel, pinned, &obj);
+	if (ret)
+		return ERR_PTR(ret);
+
+	return obj;
+}
+
+int virtio_gpu_gem_create(struct drm_file *file,
+			  struct drm_device *dev,
+			  uint64_t size,
+			  struct drm_gem_object **obj_p,
+			  uint32_t *handle_p)
+{
+	struct virtio_gpu_object *obj;
+	int ret;
+	u32 handle;
+
+	obj = virtio_gpu_alloc_object(dev, size, false, false);
+	if (IS_ERR(obj))
+		return PTR_ERR(obj);
+
+	ret = drm_gem_handle_create(file, &obj->gem_base, &handle);
+	if (ret) {
+		drm_gem_object_release(&obj->gem_base);
+		return ret;
+	}
+
+	*obj_p = &obj->gem_base;
+
+	/* drop reference from allocate - handle holds it now */
+	drm_gem_object_unreference_unlocked(&obj->gem_base);
+
+	*handle_p = handle;
+	return 0;
+}
+
+int virtio_gpu_mode_dumb_create(struct drm_file *file_priv,
+				struct drm_device *dev,
+				struct drm_mode_create_dumb *args)
+{
+	struct virtio_gpu_device *vgdev = dev->dev_private;
+	struct drm_gem_object *gobj;
+	struct virtio_gpu_object *obj;
+	int ret;
+	uint32_t pitch;
+	uint32_t resid;
+
+	pitch = args->width * ((args->bpp + 1) / 8);
+	args->size = pitch * args->height;
+	args->size = ALIGN(args->size, PAGE_SIZE);
+
+	ret = virtio_gpu_gem_create(file_priv, dev, args->size, &gobj,
+				    &args->handle);
+	if (ret)
+		goto fail;
+
+	virtio_gpu_resource_id_get(vgdev, &resid);
+	virtio_gpu_cmd_create_resource(vgdev, resid,
+				       2, args->width, args->height);
+
+	/* attach the object to the resource */
+	obj = gem_to_virtio_gpu_obj(gobj);
+	ret = virtio_gpu_object_attach(vgdev, obj, resid, NULL);
+	if (ret)
+		goto fail;
+
+	obj->dumb = true;
+	args->pitch = pitch;
+	return ret;
+
+fail:
+	return ret;
+}
+
+int virtio_gpu_mode_dumb_destroy(struct drm_file *file_priv,
+				 struct drm_device *dev,
+				 uint32_t handle)
+{
+	return drm_gem_handle_delete(file_priv, handle);
+}
+
+int virtio_gpu_mode_dumb_mmap(struct drm_file *file_priv,
+			      struct drm_device *dev,
+			      uint32_t handle, uint64_t *offset_p)
+{
+	struct drm_gem_object *gobj;
+	struct virtio_gpu_object *obj;
+	BUG_ON(!offset_p);
+	gobj = drm_gem_object_lookup(dev, file_priv, handle);
+	if (gobj == NULL)
+		return -ENOENT;
+	obj = gem_to_virtio_gpu_obj(gobj);
+	*offset_p = virtio_gpu_object_mmap_offset(obj);
+	drm_gem_object_unreference_unlocked(gobj);
+	return 0;
+}
diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c
new file mode 100644
index 000000000000..132405f15389
--- /dev/null
+++ b/drivers/gpu/drm/virtio/virtgpu_kms.c
@@ -0,0 +1,173 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <drm/drmP.h>
+#include "virtgpu_drv.h"
+
+static int virtio_gpu_fbdev = 1;
+
+MODULE_PARM_DESC(fbdev, "Disable/Enable framebuffer device & console");
+module_param_named(fbdev, virtio_gpu_fbdev, int, 0400);
+
+static void virtio_gpu_config_changed_work_func(struct work_struct *work)
+{
+	struct virtio_gpu_device *vgdev =
+		container_of(work, struct virtio_gpu_device,
+			     config_changed_work);
+	u32 events_read, events_clear = 0;
+
+	/* read the config space */
+	virtio_cread(vgdev->vdev, struct virtio_gpu_config,
+		     events_read, &events_read);
+	if (events_read & VIRTIO_GPU_EVENT_DISPLAY) {
+		virtio_gpu_cmd_get_display_info(vgdev);
+		drm_helper_hpd_irq_event(vgdev->ddev);
+		events_clear |= VIRTIO_GPU_EVENT_DISPLAY;
+	}
+	virtio_cwrite(vgdev->vdev, struct virtio_gpu_config,
+		      events_clear, &events_clear);
+}
+
+static void virtio_gpu_init_vq(struct virtio_gpu_queue *vgvq,
+			       void (*work_func)(struct work_struct *work))
+{
+	spin_lock_init(&vgvq->qlock);
+	init_waitqueue_head(&vgvq->ack_queue);
+	INIT_WORK(&vgvq->dequeue_work, work_func);
+}
+
+int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags)
+{
+	static vq_callback_t *callbacks[] = {
+		virtio_gpu_ctrl_ack, virtio_gpu_cursor_ack
+	};
+	static const char *names[] = { "control", "cursor" };
+
+	struct virtio_gpu_device *vgdev;
+	/* this will expand later */
+	struct virtqueue *vqs[2];
+	u32 num_scanouts;
+	int ret;
+
+	if (!virtio_has_feature(dev->virtdev, VIRTIO_F_VERSION_1))
+		return -ENODEV;
+
+	vgdev = kzalloc(sizeof(struct virtio_gpu_device), GFP_KERNEL);
+	if (!vgdev)
+		return -ENOMEM;
+
+	vgdev->ddev = dev;
+	dev->dev_private = vgdev;
+	vgdev->vdev = dev->virtdev;
+	vgdev->dev = dev->dev;
+
+	spin_lock_init(&vgdev->display_info_lock);
+	spin_lock_init(&vgdev->ctx_id_idr_lock);
+	idr_init(&vgdev->ctx_id_idr);
+	spin_lock_init(&vgdev->resource_idr_lock);
+	idr_init(&vgdev->resource_idr);
+	init_waitqueue_head(&vgdev->resp_wq);
+	virtio_gpu_init_vq(&vgdev->ctrlq, virtio_gpu_dequeue_ctrl_func);
+	virtio_gpu_init_vq(&vgdev->cursorq, virtio_gpu_dequeue_cursor_func);
+
+	spin_lock_init(&vgdev->fence_drv.lock);
+	INIT_LIST_HEAD(&vgdev->fence_drv.fences);
+	INIT_WORK(&vgdev->config_changed_work,
+		  virtio_gpu_config_changed_work_func);
+
+	ret = vgdev->vdev->config->find_vqs(vgdev->vdev, 2, vqs,
+					    callbacks, names);
+	if (ret) {
+		DRM_ERROR("failed to find virt queues\n");
+		goto err_vqs;
+	}
+	vgdev->ctrlq.vq = vqs[0];
+	vgdev->cursorq.vq = vqs[1];
+	ret = virtio_gpu_alloc_vbufs(vgdev);
+	if (ret) {
+		DRM_ERROR("failed to alloc vbufs\n");
+		goto err_vbufs;
+	}
+
+	ret = virtio_gpu_ttm_init(vgdev);
+	if (ret) {
+		DRM_ERROR("failed to init ttm %d\n", ret);
+		goto err_ttm;
+	}
+
+	/* get display info */
+	virtio_cread(vgdev->vdev, struct virtio_gpu_config,
+		     num_scanouts, &num_scanouts);
+	vgdev->num_scanouts = min_t(uint32_t, num_scanouts,
+				    VIRTIO_GPU_MAX_SCANOUTS);
+	if (!vgdev->num_scanouts) {
+		DRM_ERROR("num_scanouts is zero\n");
+		ret = -EINVAL;
+		goto err_scanouts;
+	}
+
+	ret = virtio_gpu_modeset_init(vgdev);
+	if (ret)
+		goto err_modeset;
+
+	virtio_device_ready(vgdev->vdev);
+	vgdev->vqs_ready = true;
+
+	if (virtio_gpu_fbdev)
+		virtio_gpu_fbdev_init(vgdev);
+	virtio_gpu_cmd_get_display_info(vgdev);
+
+	return 0;
+
+err_modeset:
+err_scanouts:
+	virtio_gpu_ttm_fini(vgdev);
+err_ttm:
+	virtio_gpu_free_vbufs(vgdev);
+err_vbufs:
+	vgdev->vdev->config->del_vqs(vgdev->vdev);
+err_vqs:
+	kfree(vgdev);
+	return ret;
+}
+
+int virtio_gpu_driver_unload(struct drm_device *dev)
+{
+	struct virtio_gpu_device *vgdev = dev->dev_private;
+
+	vgdev->vqs_ready = false;
+	flush_work(&vgdev->ctrlq.dequeue_work);
+	flush_work(&vgdev->cursorq.dequeue_work);
+	flush_work(&vgdev->config_changed_work);
+	vgdev->vdev->config->del_vqs(vgdev->vdev);
+
+	virtio_gpu_modeset_fini(vgdev);
+	virtio_gpu_ttm_fini(vgdev);
+	virtio_gpu_free_vbufs(vgdev);
+	kfree(vgdev);
+	return 0;
+}
diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c
new file mode 100644
index 000000000000..2c624c784c1d
--- /dev/null
+++ b/drivers/gpu/drm/virtio/virtgpu_object.c
@@ -0,0 +1,170 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "virtgpu_drv.h"
+
+static void virtio_gpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
+{
+	struct virtio_gpu_object *bo;
+	struct virtio_gpu_device *vgdev;
+
+	bo = container_of(tbo, struct virtio_gpu_object, tbo);
+	vgdev = (struct virtio_gpu_device *)bo->gem_base.dev->dev_private;
+
+	if (bo->hw_res_handle)
+		virtio_gpu_cmd_unref_resource(vgdev, bo->hw_res_handle);
+	if (bo->pages)
+		virtio_gpu_object_free_sg_table(bo);
+	drm_gem_object_release(&bo->gem_base);
+	kfree(bo);
+}
+
+static void virtio_gpu_init_ttm_placement(struct virtio_gpu_object *vgbo,
+					  bool pinned)
+{
+	u32 c = 1;
+	u32 pflag = pinned ? TTM_PL_FLAG_NO_EVICT : 0;
+
+	vgbo->placement.placement = &vgbo->placement_code;
+	vgbo->placement.busy_placement = &vgbo->placement_code;
+	vgbo->placement_code.fpfn = 0;
+	vgbo->placement_code.lpfn = 0;
+	vgbo->placement_code.flags =
+		TTM_PL_MASK_CACHING | TTM_PL_FLAG_TT | pflag;
+	vgbo->placement.num_placement = c;
+	vgbo->placement.num_busy_placement = c;
+
+}
+
+int virtio_gpu_object_create(struct virtio_gpu_device *vgdev,
+			     unsigned long size, bool kernel, bool pinned,
+			     struct virtio_gpu_object **bo_ptr)
+{
+	struct virtio_gpu_object *bo;
+	enum ttm_bo_type type;
+	size_t acc_size;
+	int ret;
+
+	if (kernel)
+		type = ttm_bo_type_kernel;
+	else
+		type = ttm_bo_type_device;
+	*bo_ptr = NULL;
+
+	acc_size = ttm_bo_dma_acc_size(&vgdev->mman.bdev, size,
+				       sizeof(struct virtio_gpu_object));
+
+	bo = kzalloc(sizeof(struct virtio_gpu_object), GFP_KERNEL);
+	if (bo == NULL)
+		return -ENOMEM;
+	size = roundup(size, PAGE_SIZE);
+	ret = drm_gem_object_init(vgdev->ddev, &bo->gem_base, size);
+	if (ret != 0)
+		goto err_gem_init;
+	bo->dumb = false;
+	virtio_gpu_init_ttm_placement(bo, pinned);
+
+	ret = ttm_bo_init(&vgdev->mman.bdev, &bo->tbo, size, type,
+			  &bo->placement, 0, !kernel, NULL, acc_size,
+			  NULL, NULL, &virtio_gpu_ttm_bo_destroy);
+	if (ret != 0)
+		goto err_ttm_init;
+
+	*bo_ptr = bo;
+	return 0;
+
+err_ttm_init:
+	drm_gem_object_release(&bo->gem_base);
+err_gem_init:
+	kfree(bo);
+	return ret;
+}
+
+int virtio_gpu_object_kmap(struct virtio_gpu_object *bo, void **ptr)
+{
+	bool is_iomem;
+	int r;
+
+	if (bo->vmap) {
+		if (ptr)
+			*ptr = bo->vmap;
+		return 0;
+	}
+	r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.num_pages, &bo->kmap);
+	if (r)
+		return r;
+	bo->vmap = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
+	if (ptr)
+		*ptr = bo->vmap;
+	return 0;
+}
+
+int virtio_gpu_object_get_sg_table(struct virtio_gpu_device *qdev,
+				   struct virtio_gpu_object *bo)
+{
+	int ret;
+	struct page **pages = bo->tbo.ttm->pages;
+	int nr_pages = bo->tbo.num_pages;
+
+	/* wtf swapping */
+	if (bo->pages)
+		return 0;
+
+	if (bo->tbo.ttm->state == tt_unpopulated)
+		bo->tbo.ttm->bdev->driver->ttm_tt_populate(bo->tbo.ttm);
+	bo->pages = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
+	if (!bo->pages)
+		goto out;
+
+	ret = sg_alloc_table_from_pages(bo->pages, pages, nr_pages, 0,
+					nr_pages << PAGE_SHIFT, GFP_KERNEL);
+	if (ret)
+		goto out;
+	return 0;
+out:
+	kfree(bo->pages);
+	bo->pages = NULL;
+	return -ENOMEM;
+}
+
+void virtio_gpu_object_free_sg_table(struct virtio_gpu_object *bo)
+{
+	sg_free_table(bo->pages);
+	kfree(bo->pages);
+	bo->pages = NULL;
+}
+
+int virtio_gpu_object_wait(struct virtio_gpu_object *bo, bool no_wait)
+{
+	int r;
+
+	r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, NULL);
+	if (unlikely(r != 0))
+		return r;
+	r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
+	ttm_bo_unreserve(&bo->tbo);
+	return r;
+}
+
diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c
new file mode 100644
index 000000000000..4a74129c5708
--- /dev/null
+++ b/drivers/gpu/drm/virtio/virtgpu_plane.c
@@ -0,0 +1,120 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "virtgpu_drv.h"
+#include <drm/drm_plane_helper.h>
+#include <drm/drm_atomic_helper.h>
+
+static const uint32_t virtio_gpu_formats[] = {
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_ARGB8888,
+	DRM_FORMAT_BGRX8888,
+	DRM_FORMAT_BGRA8888,
+	DRM_FORMAT_RGBX8888,
+	DRM_FORMAT_RGBA8888,
+	DRM_FORMAT_XBGR8888,
+	DRM_FORMAT_ABGR8888,
+};
+
+static void virtio_gpu_plane_destroy(struct drm_plane *plane)
+{
+	kfree(plane);
+}
+
+static const struct drm_plane_funcs virtio_gpu_plane_funcs = {
+	.update_plane		= drm_atomic_helper_update_plane,
+	.disable_plane		= drm_atomic_helper_disable_plane,
+	.destroy		= virtio_gpu_plane_destroy,
+	.reset			= drm_atomic_helper_plane_reset,
+	.atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state,
+	.atomic_destroy_state	= drm_atomic_helper_plane_destroy_state,
+};
+
+static int virtio_gpu_plane_atomic_check(struct drm_plane *plane,
+					 struct drm_plane_state *state)
+{
+	return 0;
+}
+
+static void virtio_gpu_plane_atomic_update(struct drm_plane *plane,
+					   struct drm_plane_state *old_state)
+{
+	struct drm_device *dev = plane->dev;
+	struct virtio_gpu_device *vgdev = dev->dev_private;
+	struct virtio_gpu_output *output = drm_crtc_to_virtio_gpu_output(plane->crtc);
+	struct virtio_gpu_framebuffer *vgfb;
+	struct virtio_gpu_object *bo;
+	uint32_t handle;
+
+	if (plane->fb) {
+		vgfb = to_virtio_gpu_framebuffer(plane->fb);
+		bo = gem_to_virtio_gpu_obj(vgfb->obj);
+		handle = bo->hw_res_handle;
+	} else {
+		handle = 0;
+	}
+
+	DRM_DEBUG("handle 0x%x, crtc %dx%d+%d+%d\n", handle,
+		  plane->state->crtc_w, plane->state->crtc_h,
+		  plane->state->crtc_x, plane->state->crtc_y);
+	virtio_gpu_cmd_set_scanout(vgdev, output->index, handle,
+				   plane->state->crtc_w,
+				   plane->state->crtc_h,
+				   plane->state->crtc_x,
+				   plane->state->crtc_y);
+}
+
+
+static const struct drm_plane_helper_funcs virtio_gpu_plane_helper_funcs = {
+	.atomic_check		= virtio_gpu_plane_atomic_check,
+	.atomic_update		= virtio_gpu_plane_atomic_update,
+};
+
+struct drm_plane *virtio_gpu_plane_init(struct virtio_gpu_device *vgdev,
+					int index)
+{
+	struct drm_device *dev = vgdev->ddev;
+	struct drm_plane *plane;
+	int ret;
+
+	plane = kzalloc(sizeof(*plane), GFP_KERNEL);
+	if (!plane)
+		return ERR_PTR(-ENOMEM);
+
+	ret = drm_universal_plane_init(dev, plane, 1 << index,
+				       &virtio_gpu_plane_funcs,
+				       virtio_gpu_formats,
+				       ARRAY_SIZE(virtio_gpu_formats),
+				       DRM_PLANE_TYPE_PRIMARY);
+	if (ret)
+		goto err_plane_init;
+
+	drm_plane_helper_add(plane, &virtio_gpu_plane_helper_funcs);
+	return plane;
+
+err_plane_init:
+	kfree(plane);
+	return ERR_PTR(ret);
+}
diff --git a/drivers/gpu/drm/virtio/virtgpu_ttm.c b/drivers/gpu/drm/virtio/virtgpu_ttm.c
new file mode 100644
index 000000000000..e0e74c6bb959
--- /dev/null
+++ b/drivers/gpu/drm/virtio/virtgpu_ttm.c
@@ -0,0 +1,469 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * Authors:
+ *    Dave Airlie
+ *    Alon Levy
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <ttm/ttm_bo_api.h>
+#include <ttm/ttm_bo_driver.h>
+#include <ttm/ttm_placement.h>
+#include <ttm/ttm_page_alloc.h>
+#include <ttm/ttm_module.h>
+#include <drm/drmP.h>
+#include <drm/drm.h>
+#include "virtgpu_drv.h"
+
+#include <linux/delay.h>
+
+#define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT)
+
+static struct
+virtio_gpu_device *virtio_gpu_get_vgdev(struct ttm_bo_device *bdev)
+{
+	struct virtio_gpu_mman *mman;
+	struct virtio_gpu_device *vgdev;
+
+	mman = container_of(bdev, struct virtio_gpu_mman, bdev);
+	vgdev = container_of(mman, struct virtio_gpu_device, mman);
+	return vgdev;
+}
+
+static int virtio_gpu_ttm_mem_global_init(struct drm_global_reference *ref)
+{
+	return ttm_mem_global_init(ref->object);
+}
+
+static void virtio_gpu_ttm_mem_global_release(struct drm_global_reference *ref)
+{
+	ttm_mem_global_release(ref->object);
+}
+
+static int virtio_gpu_ttm_global_init(struct virtio_gpu_device *vgdev)
+{
+	struct drm_global_reference *global_ref;
+	int r;
+
+	vgdev->mman.mem_global_referenced = false;
+	global_ref = &vgdev->mman.mem_global_ref;
+	global_ref->global_type = DRM_GLOBAL_TTM_MEM;
+	global_ref->size = sizeof(struct ttm_mem_global);
+	global_ref->init = &virtio_gpu_ttm_mem_global_init;
+	global_ref->release = &virtio_gpu_ttm_mem_global_release;
+
+	r = drm_global_item_ref(global_ref);
+	if (r != 0) {
+		DRM_ERROR("Failed setting up TTM memory accounting "
+			  "subsystem.\n");
+		return r;
+	}
+
+	vgdev->mman.bo_global_ref.mem_glob =
+		vgdev->mman.mem_global_ref.object;
+	global_ref = &vgdev->mman.bo_global_ref.ref;
+	global_ref->global_type = DRM_GLOBAL_TTM_BO;
+	global_ref->size = sizeof(struct ttm_bo_global);
+	global_ref->init = &ttm_bo_global_init;
+	global_ref->release = &ttm_bo_global_release;
+	r = drm_global_item_ref(global_ref);
+	if (r != 0) {
+		DRM_ERROR("Failed setting up TTM BO subsystem.\n");
+		drm_global_item_unref(&vgdev->mman.mem_global_ref);
+		return r;
+	}
+
+	vgdev->mman.mem_global_referenced = true;
+	return 0;
+}
+
+static void virtio_gpu_ttm_global_fini(struct virtio_gpu_device *vgdev)
+{
+	if (vgdev->mman.mem_global_referenced) {
+		drm_global_item_unref(&vgdev->mman.bo_global_ref.ref);
+		drm_global_item_unref(&vgdev->mman.mem_global_ref);
+		vgdev->mman.mem_global_referenced = false;
+	}
+}
+
+#if 0
+/*
+ * Hmm, seems to not do anything useful.  Leftover debug hack?
+ * Something like printing pagefaults to kernel log?
+ */
+static struct vm_operations_struct virtio_gpu_ttm_vm_ops;
+static const struct vm_operations_struct *ttm_vm_ops;
+
+static int virtio_gpu_ttm_fault(struct vm_area_struct *vma,
+				struct vm_fault *vmf)
+{
+	struct ttm_buffer_object *bo;
+	struct virtio_gpu_device *vgdev;
+	int r;
+
+	bo = (struct ttm_buffer_object *)vma->vm_private_data;
+	if (bo == NULL)
+		return VM_FAULT_NOPAGE;
+	vgdev = virtio_gpu_get_vgdev(bo->bdev);
+	r = ttm_vm_ops->fault(vma, vmf);
+	return r;
+}
+#endif
+
+int virtio_gpu_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct drm_file *file_priv;
+	struct virtio_gpu_device *vgdev;
+	int r;
+
+	file_priv = filp->private_data;
+	vgdev = file_priv->minor->dev->dev_private;
+	if (vgdev == NULL) {
+		DRM_ERROR(
+		 "filp->private_data->minor->dev->dev_private == NULL\n");
+		return -EINVAL;
+	}
+	r = ttm_bo_mmap(filp, vma, &vgdev->mman.bdev);
+#if 0
+	if (unlikely(r != 0))
+		return r;
+	if (unlikely(ttm_vm_ops == NULL)) {
+		ttm_vm_ops = vma->vm_ops;
+		virtio_gpu_ttm_vm_ops = *ttm_vm_ops;
+		virtio_gpu_ttm_vm_ops.fault = &virtio_gpu_ttm_fault;
+	}
+	vma->vm_ops = &virtio_gpu_ttm_vm_ops;
+	return 0;
+#else
+	return r;
+#endif
+}
+
+static int virtio_gpu_invalidate_caches(struct ttm_bo_device *bdev,
+					uint32_t flags)
+{
+	return 0;
+}
+
+static int ttm_bo_man_get_node(struct ttm_mem_type_manager *man,
+			       struct ttm_buffer_object *bo,
+			       const struct ttm_place *place,
+			       struct ttm_mem_reg *mem)
+{
+	mem->mm_node = (void *)1;
+	return 0;
+}
+
+static void ttm_bo_man_put_node(struct ttm_mem_type_manager *man,
+				struct ttm_mem_reg *mem)
+{
+	mem->mm_node = (void *)NULL;
+	return;
+}
+
+static int ttm_bo_man_init(struct ttm_mem_type_manager *man,
+			   unsigned long p_size)
+{
+	return 0;
+}
+
+static int ttm_bo_man_takedown(struct ttm_mem_type_manager *man)
+{
+	return 0;
+}
+
+static void ttm_bo_man_debug(struct ttm_mem_type_manager *man,
+			     const char *prefix)
+{
+}
+
+static const struct ttm_mem_type_manager_func virtio_gpu_bo_manager_func = {
+	ttm_bo_man_init,
+	ttm_bo_man_takedown,
+	ttm_bo_man_get_node,
+	ttm_bo_man_put_node,
+	ttm_bo_man_debug
+};
+
+static int virtio_gpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
+				    struct ttm_mem_type_manager *man)
+{
+	struct virtio_gpu_device *vgdev;
+
+	vgdev = virtio_gpu_get_vgdev(bdev);
+
+	switch (type) {
+	case TTM_PL_SYSTEM:
+		/* System memory */
+		man->flags = TTM_MEMTYPE_FLAG_MAPPABLE;
+		man->available_caching = TTM_PL_MASK_CACHING;
+		man->default_caching = TTM_PL_FLAG_CACHED;
+		break;
+	case TTM_PL_TT:
+		man->func = &virtio_gpu_bo_manager_func;
+		man->flags = TTM_MEMTYPE_FLAG_MAPPABLE;
+		man->available_caching = TTM_PL_MASK_CACHING;
+		man->default_caching = TTM_PL_FLAG_CACHED;
+		break;
+	default:
+		DRM_ERROR("Unsupported memory type %u\n", (unsigned)type);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void virtio_gpu_evict_flags(struct ttm_buffer_object *bo,
+				struct ttm_placement *placement)
+{
+	static struct ttm_place placements = {
+		.fpfn  = 0,
+		.lpfn  = 0,
+		.flags = TTM_PL_MASK_CACHING | TTM_PL_FLAG_SYSTEM,
+	};
+
+	placement->placement = &placements;
+	placement->busy_placement = &placements;
+	placement->num_placement = 1;
+	placement->num_busy_placement = 1;
+	return;
+}
+
+static int virtio_gpu_verify_access(struct ttm_buffer_object *bo,
+				    struct file *filp)
+{
+	return 0;
+}
+
+static int virtio_gpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev,
+					 struct ttm_mem_reg *mem)
+{
+	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
+
+	mem->bus.addr = NULL;
+	mem->bus.offset = 0;
+	mem->bus.size = mem->num_pages << PAGE_SHIFT;
+	mem->bus.base = 0;
+	mem->bus.is_iomem = false;
+	if (!(man->flags & TTM_MEMTYPE_FLAG_MAPPABLE))
+		return -EINVAL;
+	switch (mem->mem_type) {
+	case TTM_PL_SYSTEM:
+	case TTM_PL_TT:
+		/* system memory */
+		return 0;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static void virtio_gpu_ttm_io_mem_free(struct ttm_bo_device *bdev,
+				       struct ttm_mem_reg *mem)
+{
+}
+
+/*
+ * TTM backend functions.
+ */
+struct virtio_gpu_ttm_tt {
+	struct ttm_dma_tt		ttm;
+	struct virtio_gpu_device	*vgdev;
+	u64				offset;
+};
+
+static int virtio_gpu_ttm_backend_bind(struct ttm_tt *ttm,
+				       struct ttm_mem_reg *bo_mem)
+{
+	struct virtio_gpu_ttm_tt *gtt = (void *)ttm;
+
+	gtt->offset = (unsigned long)(bo_mem->start << PAGE_SHIFT);
+	if (!ttm->num_pages)
+		WARN(1, "nothing to bind %lu pages for mreg %p back %p!\n",
+		     ttm->num_pages, bo_mem, ttm);
+
+	/* Not implemented */
+	return 0;
+}
+
+static int virtio_gpu_ttm_backend_unbind(struct ttm_tt *ttm)
+{
+	/* Not implemented */
+	return 0;
+}
+
+static void virtio_gpu_ttm_backend_destroy(struct ttm_tt *ttm)
+{
+	struct virtio_gpu_ttm_tt *gtt = (void *)ttm;
+
+	ttm_dma_tt_fini(&gtt->ttm);
+	kfree(gtt);
+}
+
+static struct ttm_backend_func virtio_gpu_backend_func = {
+	.bind = &virtio_gpu_ttm_backend_bind,
+	.unbind = &virtio_gpu_ttm_backend_unbind,
+	.destroy = &virtio_gpu_ttm_backend_destroy,
+};
+
+static int virtio_gpu_ttm_tt_populate(struct ttm_tt *ttm)
+{
+	if (ttm->state != tt_unpopulated)
+		return 0;
+
+	return ttm_pool_populate(ttm);
+}
+
+static void virtio_gpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
+{
+	ttm_pool_unpopulate(ttm);
+}
+
+static struct ttm_tt *virtio_gpu_ttm_tt_create(struct ttm_bo_device *bdev,
+					       unsigned long size,
+					       uint32_t page_flags,
+					       struct page *dummy_read_page)
+{
+	struct virtio_gpu_device *vgdev;
+	struct virtio_gpu_ttm_tt *gtt;
+
+	vgdev = virtio_gpu_get_vgdev(bdev);
+	gtt = kzalloc(sizeof(struct virtio_gpu_ttm_tt), GFP_KERNEL);
+	if (gtt == NULL)
+		return NULL;
+	gtt->ttm.ttm.func = &virtio_gpu_backend_func;
+	gtt->vgdev = vgdev;
+	if (ttm_dma_tt_init(&gtt->ttm, bdev, size, page_flags,
+			    dummy_read_page)) {
+		kfree(gtt);
+		return NULL;
+	}
+	return &gtt->ttm.ttm;
+}
+
+static void virtio_gpu_move_null(struct ttm_buffer_object *bo,
+				 struct ttm_mem_reg *new_mem)
+{
+	struct ttm_mem_reg *old_mem = &bo->mem;
+
+	BUG_ON(old_mem->mm_node != NULL);
+	*old_mem = *new_mem;
+	new_mem->mm_node = NULL;
+}
+
+static int virtio_gpu_bo_move(struct ttm_buffer_object *bo,
+			      bool evict, bool interruptible,
+			      bool no_wait_gpu,
+			      struct ttm_mem_reg *new_mem)
+{
+	virtio_gpu_move_null(bo, new_mem);
+	return 0;
+}
+
+static void virtio_gpu_bo_move_notify(struct ttm_buffer_object *tbo,
+				      struct ttm_mem_reg *new_mem)
+{
+	struct virtio_gpu_object *bo;
+	struct virtio_gpu_device *vgdev;
+
+	bo = container_of(tbo, struct virtio_gpu_object, tbo);
+	vgdev = (struct virtio_gpu_device *)bo->gem_base.dev->dev_private;
+
+	if (!new_mem || (new_mem->placement & TTM_PL_FLAG_SYSTEM)) {
+		if (bo->hw_res_handle)
+			virtio_gpu_cmd_resource_inval_backing(vgdev,
+							   bo->hw_res_handle);
+
+	} else if (new_mem->placement & TTM_PL_FLAG_TT) {
+		if (bo->hw_res_handle) {
+			virtio_gpu_object_attach(vgdev, bo, bo->hw_res_handle,
+						 NULL);
+		}
+	}
+}
+
+static void virtio_gpu_bo_swap_notify(struct ttm_buffer_object *tbo)
+{
+	struct virtio_gpu_object *bo;
+	struct virtio_gpu_device *vgdev;
+
+	bo = container_of(tbo, struct virtio_gpu_object, tbo);
+	vgdev = (struct virtio_gpu_device *)bo->gem_base.dev->dev_private;
+
+	if (bo->pages)
+		virtio_gpu_object_free_sg_table(bo);
+}
+
+static struct ttm_bo_driver virtio_gpu_bo_driver = {
+	.ttm_tt_create = &virtio_gpu_ttm_tt_create,
+	.ttm_tt_populate = &virtio_gpu_ttm_tt_populate,
+	.ttm_tt_unpopulate = &virtio_gpu_ttm_tt_unpopulate,
+	.invalidate_caches = &virtio_gpu_invalidate_caches,
+	.init_mem_type = &virtio_gpu_init_mem_type,
+	.evict_flags = &virtio_gpu_evict_flags,
+	.move = &virtio_gpu_bo_move,
+	.verify_access = &virtio_gpu_verify_access,
+	.io_mem_reserve = &virtio_gpu_ttm_io_mem_reserve,
+	.io_mem_free = &virtio_gpu_ttm_io_mem_free,
+	.move_notify = &virtio_gpu_bo_move_notify,
+	.swap_notify = &virtio_gpu_bo_swap_notify,
+};
+
+int virtio_gpu_ttm_init(struct virtio_gpu_device *vgdev)
+{
+	int r;
+
+	r = virtio_gpu_ttm_global_init(vgdev);
+	if (r)
+		return r;
+	/* No others user of address space so set it to 0 */
+	r = ttm_bo_device_init(&vgdev->mman.bdev,
+			       vgdev->mman.bo_global_ref.ref.object,
+			       &virtio_gpu_bo_driver,
+			       vgdev->ddev->anon_inode->i_mapping,
+			       DRM_FILE_PAGE_OFFSET, 0);
+	if (r) {
+		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
+		goto err_dev_init;
+		return r;
+	}
+
+	r = ttm_bo_init_mm(&vgdev->mman.bdev, TTM_PL_TT, 0);
+	if (r) {
+		DRM_ERROR("Failed initializing GTT heap.\n");
+		goto err_mm_init;
+		return r;
+	}
+	return 0;
+
+err_mm_init:
+	ttm_bo_device_release(&vgdev->mman.bdev);
+err_dev_init:
+	virtio_gpu_ttm_global_fini(vgdev);
+	return r;
+}
+
+void virtio_gpu_ttm_fini(struct virtio_gpu_device *vgdev)
+{
+	ttm_bo_device_release(&vgdev->mman.bdev);
+	virtio_gpu_ttm_global_fini(vgdev);
+	DRM_INFO("virtio_gpu: ttm finalized\n");
+}
diff --git a/drivers/gpu/drm/virtio/virtgpu_vq.c b/drivers/gpu/drm/virtio/virtgpu_vq.c
new file mode 100644
index 000000000000..8fa6513eb3bc
--- /dev/null
+++ b/drivers/gpu/drm/virtio/virtgpu_vq.c
@@ -0,0 +1,614 @@
+/*
+ * Copyright (C) 2015 Red Hat, Inc.
+ * All Rights Reserved.
+ *
+ * Authors:
+ *    Dave Airlie <airlied@redhat.com>
+ *    Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * VA LINUX SYSTEMS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <drm/drmP.h>
+#include "virtgpu_drv.h"
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_ring.h>
+
+#define MAX_INLINE_CMD_SIZE   96
+#define MAX_INLINE_RESP_SIZE  24
+#define VBUFFER_SIZE          (sizeof(struct virtio_gpu_vbuffer) \
+			       + MAX_INLINE_CMD_SIZE		 \
+			       + MAX_INLINE_RESP_SIZE)
+
+void virtio_gpu_resource_id_get(struct virtio_gpu_device *vgdev,
+				uint32_t *resid)
+{
+	int handle;
+
+	idr_preload(GFP_KERNEL);
+	spin_lock(&vgdev->resource_idr_lock);
+	handle = idr_alloc(&vgdev->resource_idr, NULL, 1, 0, GFP_NOWAIT);
+	spin_unlock(&vgdev->resource_idr_lock);
+	idr_preload_end();
+	*resid = handle;
+}
+
+void virtio_gpu_resource_id_put(struct virtio_gpu_device *vgdev, uint32_t id)
+{
+	spin_lock(&vgdev->resource_idr_lock);
+	idr_remove(&vgdev->resource_idr, id);
+	spin_unlock(&vgdev->resource_idr_lock);
+}
+
+void virtio_gpu_ctrl_ack(struct virtqueue *vq)
+{
+	struct drm_device *dev = vq->vdev->priv;
+	struct virtio_gpu_device *vgdev = dev->dev_private;
+	schedule_work(&vgdev->ctrlq.dequeue_work);
+}
+
+void virtio_gpu_cursor_ack(struct virtqueue *vq)
+{
+	struct drm_device *dev = vq->vdev->priv;
+	struct virtio_gpu_device *vgdev = dev->dev_private;
+	schedule_work(&vgdev->cursorq.dequeue_work);
+}
+
+int virtio_gpu_alloc_vbufs(struct virtio_gpu_device *vgdev)
+{
+	struct virtio_gpu_vbuffer *vbuf;
+	int i, size, count = 0;
+	void *ptr;
+
+	INIT_LIST_HEAD(&vgdev->free_vbufs);
+	count += virtqueue_get_vring_size(vgdev->ctrlq.vq);
+	count += virtqueue_get_vring_size(vgdev->cursorq.vq);
+	size = count * VBUFFER_SIZE;
+	DRM_INFO("virtio vbuffers: %d bufs, %zdB each, %dkB total.\n",
+		 count, VBUFFER_SIZE, size / 1024);
+
+	vgdev->vbufs = kzalloc(size, GFP_KERNEL);
+	if (!vgdev->vbufs)
+		return -ENOMEM;
+
+	for (i = 0, ptr = vgdev->vbufs;
+	     i < count;
+	     i++, ptr += VBUFFER_SIZE) {
+		vbuf = ptr;
+		list_add(&vbuf->list, &vgdev->free_vbufs);
+	}
+	return 0;
+}
+
+void virtio_gpu_free_vbufs(struct virtio_gpu_device *vgdev)
+{
+	struct virtio_gpu_vbuffer *vbuf;
+	int i, count = 0;
+
+	count += virtqueue_get_vring_size(vgdev->ctrlq.vq);
+	count += virtqueue_get_vring_size(vgdev->cursorq.vq);
+
+	for (i = 0; i < count; i++) {
+		if (WARN_ON(list_empty(&vgdev->free_vbufs)))
+			return;
+		vbuf = list_first_entry(&vgdev->free_vbufs,
+					struct virtio_gpu_vbuffer, list);
+		list_del(&vbuf->list);
+	}
+	kfree(vgdev->vbufs);
+}
+
+static struct virtio_gpu_vbuffer*
+virtio_gpu_get_vbuf(struct virtio_gpu_device *vgdev,
+		    int size, int resp_size, void *resp_buf,
+		    virtio_gpu_resp_cb resp_cb)
+{
+	struct virtio_gpu_vbuffer *vbuf;
+
+	BUG_ON(list_empty(&vgdev->free_vbufs));
+	vbuf = list_first_entry(&vgdev->free_vbufs,
+				struct virtio_gpu_vbuffer, list);
+	list_del(&vbuf->list);
+	memset(vbuf, 0, VBUFFER_SIZE);
+
+	BUG_ON(size > MAX_INLINE_CMD_SIZE);
+	vbuf->buf = (void *)vbuf + sizeof(*vbuf);
+	vbuf->size = size;
+
+	vbuf->resp_cb = resp_cb;
+	vbuf->resp_size = resp_size;
+	if (resp_size <= MAX_INLINE_RESP_SIZE)
+		vbuf->resp_buf = (void *)vbuf->buf + size;
+	else
+		vbuf->resp_buf = resp_buf;
+	BUG_ON(!vbuf->resp_buf);
+	return vbuf;
+}
+
+static void *virtio_gpu_alloc_cmd(struct virtio_gpu_device *vgdev,
+				  struct virtio_gpu_vbuffer **vbuffer_p,
+				  int size)
+{
+	struct virtio_gpu_vbuffer *vbuf;
+
+	vbuf = virtio_gpu_get_vbuf(vgdev, size,
+				   sizeof(struct virtio_gpu_ctrl_hdr),
+				   NULL, NULL);
+	if (IS_ERR(vbuf)) {
+		*vbuffer_p = NULL;
+		return ERR_CAST(vbuf);
+	}
+	*vbuffer_p = vbuf;
+	return vbuf->buf;
+}
+
+static struct virtio_gpu_update_cursor*
+virtio_gpu_alloc_cursor(struct virtio_gpu_device *vgdev,
+			struct virtio_gpu_vbuffer **vbuffer_p)
+{
+	struct virtio_gpu_vbuffer *vbuf;
+
+	vbuf = virtio_gpu_get_vbuf
+		(vgdev, sizeof(struct virtio_gpu_update_cursor),
+		 0, NULL, NULL);
+	if (IS_ERR(vbuf)) {
+		*vbuffer_p = NULL;
+		return ERR_CAST(vbuf);
+	}
+	*vbuffer_p = vbuf;
+	return (struct virtio_gpu_update_cursor *)vbuf->buf;
+}
+
+static void *virtio_gpu_alloc_cmd_resp(struct virtio_gpu_device *vgdev,
+				       virtio_gpu_resp_cb cb,
+				       struct virtio_gpu_vbuffer **vbuffer_p,
+				       int cmd_size, int resp_size,
+				       void *resp_buf)
+{
+	struct virtio_gpu_vbuffer *vbuf;
+
+	vbuf = virtio_gpu_get_vbuf(vgdev, cmd_size,
+				   resp_size, resp_buf, cb);
+	if (IS_ERR(vbuf)) {
+		*vbuffer_p = NULL;
+		return ERR_CAST(vbuf);
+	}
+	*vbuffer_p = vbuf;
+	return (struct virtio_gpu_command *)vbuf->buf;
+}
+
+static void free_vbuf(struct virtio_gpu_device *vgdev,
+		      struct virtio_gpu_vbuffer *vbuf)
+{
+	if (vbuf->resp_size > MAX_INLINE_RESP_SIZE)
+		kfree(vbuf->resp_buf);
+	kfree(vbuf->data_buf);
+	list_add(&vbuf->list, &vgdev->free_vbufs);
+}
+
+static void reclaim_vbufs(struct virtqueue *vq, struct list_head *reclaim_list)
+{
+	struct virtio_gpu_vbuffer *vbuf;
+	unsigned int len;
+	int freed = 0;
+
+	while ((vbuf = virtqueue_get_buf(vq, &len))) {
+		list_add_tail(&vbuf->list, reclaim_list);
+		freed++;
+	}
+	if (freed == 0)
+		DRM_DEBUG("Huh? zero vbufs reclaimed");
+}
+
+void virtio_gpu_dequeue_ctrl_func(struct work_struct *work)
+{
+	struct virtio_gpu_device *vgdev =
+		container_of(work, struct virtio_gpu_device,
+			     ctrlq.dequeue_work);
+	struct list_head reclaim_list;
+	struct virtio_gpu_vbuffer *entry, *tmp;
+	struct virtio_gpu_ctrl_hdr *resp;
+	u64 fence_id = 0;
+
+	INIT_LIST_HEAD(&reclaim_list);
+	spin_lock(&vgdev->ctrlq.qlock);
+	do {
+		virtqueue_disable_cb(vgdev->ctrlq.vq);
+		reclaim_vbufs(vgdev->ctrlq.vq, &reclaim_list);
+
+	} while (!virtqueue_enable_cb(vgdev->ctrlq.vq));
+	spin_unlock(&vgdev->ctrlq.qlock);
+
+	list_for_each_entry_safe(entry, tmp, &reclaim_list, list) {
+		resp = (struct virtio_gpu_ctrl_hdr *)entry->resp_buf;
+		if (resp->type != cpu_to_le32(VIRTIO_GPU_RESP_OK_NODATA))
+			DRM_DEBUG("response 0x%x\n", le32_to_cpu(resp->type));
+		if (resp->flags & cpu_to_le32(VIRTIO_GPU_FLAG_FENCE)) {
+			u64 f = le64_to_cpu(resp->fence_id);
+
+			if (fence_id > f) {
+				DRM_ERROR("%s: Oops: fence %llx -> %llx\n",
+					  __func__, fence_id, f);
+			} else {
+				fence_id = f;
+			}
+		}
+		if (entry->resp_cb)
+			entry->resp_cb(vgdev, entry);
+
+		list_del(&entry->list);
+		free_vbuf(vgdev, entry);
+	}
+	wake_up(&vgdev->ctrlq.ack_queue);
+
+	if (fence_id)
+		virtio_gpu_fence_event_process(vgdev, fence_id);
+}
+
+void virtio_gpu_dequeue_cursor_func(struct work_struct *work)
+{
+	struct virtio_gpu_device *vgdev =
+		container_of(work, struct virtio_gpu_device,
+			     cursorq.dequeue_work);
+	struct list_head reclaim_list;
+	struct virtio_gpu_vbuffer *entry, *tmp;
+
+	INIT_LIST_HEAD(&reclaim_list);
+	spin_lock(&vgdev->cursorq.qlock);
+	do {
+		virtqueue_disable_cb(vgdev->cursorq.vq);
+		reclaim_vbufs(vgdev->cursorq.vq, &reclaim_list);
+	} while (!virtqueue_enable_cb(vgdev->cursorq.vq));
+	spin_unlock(&vgdev->cursorq.qlock);
+
+	list_for_each_entry_safe(entry, tmp, &reclaim_list, list) {
+		list_del(&entry->list);
+		free_vbuf(vgdev, entry);
+	}
+	wake_up(&vgdev->cursorq.ack_queue);
+}
+
+static int virtio_gpu_queue_ctrl_buffer(struct virtio_gpu_device *vgdev,
+					struct virtio_gpu_vbuffer *vbuf)
+{
+	struct virtqueue *vq = vgdev->ctrlq.vq;
+	struct scatterlist *sgs[3], vcmd, vout, vresp;
+	int outcnt = 0, incnt = 0;
+	int ret;
+
+	if (!vgdev->vqs_ready)
+		return -ENODEV;
+
+	sg_init_one(&vcmd, vbuf->buf, vbuf->size);
+	sgs[outcnt+incnt] = &vcmd;
+	outcnt++;
+
+	if (vbuf->data_size) {
+		sg_init_one(&vout, vbuf->data_buf, vbuf->data_size);
+		sgs[outcnt + incnt] = &vout;
+		outcnt++;
+	}
+
+	if (vbuf->resp_size) {
+		sg_init_one(&vresp, vbuf->resp_buf, vbuf->resp_size);
+		sgs[outcnt + incnt] = &vresp;
+		incnt++;
+	}
+
+	spin_lock(&vgdev->ctrlq.qlock);
+retry:
+	ret = virtqueue_add_sgs(vq, sgs, outcnt, incnt, vbuf, GFP_ATOMIC);
+	if (ret == -ENOSPC) {
+		spin_unlock(&vgdev->ctrlq.qlock);
+		wait_event(vgdev->ctrlq.ack_queue, vq->num_free);
+		spin_lock(&vgdev->ctrlq.qlock);
+		goto retry;
+	} else {
+		virtqueue_kick(vq);
+	}
+	spin_unlock(&vgdev->ctrlq.qlock);
+
+	if (!ret)
+		ret = vq->num_free;
+	return ret;
+}
+
+static int virtio_gpu_queue_cursor(struct virtio_gpu_device *vgdev,
+				   struct virtio_gpu_vbuffer *vbuf)
+{
+	struct virtqueue *vq = vgdev->cursorq.vq;
+	struct scatterlist *sgs[1], ccmd;
+	int ret;
+	int outcnt;
+
+	if (!vgdev->vqs_ready)
+		return -ENODEV;
+
+	sg_init_one(&ccmd, vbuf->buf, vbuf->size);
+	sgs[0] = &ccmd;
+	outcnt = 1;
+
+	spin_lock(&vgdev->cursorq.qlock);
+retry:
+	ret = virtqueue_add_sgs(vq, sgs, outcnt, 0, vbuf, GFP_ATOMIC);
+	if (ret == -ENOSPC) {
+		spin_unlock(&vgdev->cursorq.qlock);
+		wait_event(vgdev->cursorq.ack_queue, vq->num_free);
+		spin_lock(&vgdev->cursorq.qlock);
+		goto retry;
+	} else {
+		virtqueue_kick(vq);
+	}
+
+	spin_unlock(&vgdev->cursorq.qlock);
+
+	if (!ret)
+		ret = vq->num_free;
+	return ret;
+}
+
+/* just create gem objects for userspace and long lived objects,
+   just use dma_alloced pages for the queue objects? */
+
+/* create a basic resource */
+void virtio_gpu_cmd_create_resource(struct virtio_gpu_device *vgdev,
+				    uint32_t resource_id,
+				    uint32_t format,
+				    uint32_t width,
+				    uint32_t height)
+{
+	struct virtio_gpu_resource_create_2d *cmd_p;
+	struct virtio_gpu_vbuffer *vbuf;
+
+	cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p));
+	memset(cmd_p, 0, sizeof(*cmd_p));
+
+	cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_RESOURCE_CREATE_2D);
+	cmd_p->resource_id = cpu_to_le32(resource_id);
+	cmd_p->format = cpu_to_le32(format);
+	cmd_p->width = cpu_to_le32(width);
+	cmd_p->height = cpu_to_le32(height);
+
+	virtio_gpu_queue_ctrl_buffer(vgdev, vbuf);
+}
+
+void virtio_gpu_cmd_unref_resource(struct virtio_gpu_device *vgdev,
+				   uint32_t resource_id)
+{
+	struct virtio_gpu_resource_unref *cmd_p;
+	struct virtio_gpu_vbuffer *vbuf;
+
+	cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p));
+	memset(cmd_p, 0, sizeof(*cmd_p));
+
+	cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_RESOURCE_UNREF);
+	cmd_p->resource_id = cpu_to_le32(resource_id);
+
+	virtio_gpu_queue_ctrl_buffer(vgdev, vbuf);
+}
+
+void virtio_gpu_cmd_resource_inval_backing(struct virtio_gpu_device *vgdev,
+					   uint32_t resource_id)
+{
+	struct virtio_gpu_resource_detach_backing *cmd_p;
+	struct virtio_gpu_vbuffer *vbuf;
+
+	cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p));
+	memset(cmd_p, 0, sizeof(*cmd_p));
+
+	cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING);
+	cmd_p->resource_id = cpu_to_le32(resource_id);
+
+	virtio_gpu_queue_ctrl_buffer(vgdev, vbuf);
+}
+
+void virtio_gpu_cmd_set_scanout(struct virtio_gpu_device *vgdev,
+				uint32_t scanout_id, uint32_t resource_id,
+				uint32_t width, uint32_t height,
+				uint32_t x, uint32_t y)
+{
+	struct virtio_gpu_set_scanout *cmd_p;
+	struct virtio_gpu_vbuffer *vbuf;
+
+	cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p));
+	memset(cmd_p, 0, sizeof(*cmd_p));
+
+	cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_SET_SCANOUT);
+	cmd_p->resource_id = cpu_to_le32(resource_id);
+	cmd_p->scanout_id = cpu_to_le32(scanout_id);
+	cmd_p->r.width = cpu_to_le32(width);
+	cmd_p->r.height = cpu_to_le32(height);
+	cmd_p->r.x = cpu_to_le32(x);
+	cmd_p->r.y = cpu_to_le32(y);
+
+	virtio_gpu_queue_ctrl_buffer(vgdev, vbuf);
+}
+
+void virtio_gpu_cmd_resource_flush(struct virtio_gpu_device *vgdev,
+				   uint32_t resource_id,
+				   uint32_t x, uint32_t y,
+				   uint32_t width, uint32_t height)
+{
+	struct virtio_gpu_resource_flush *cmd_p;
+	struct virtio_gpu_vbuffer *vbuf;
+
+	cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p));
+	memset(cmd_p, 0, sizeof(*cmd_p));
+
+	cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_RESOURCE_FLUSH);
+	cmd_p->resource_id = cpu_to_le32(resource_id);
+	cmd_p->r.width = cpu_to_le32(width);
+	cmd_p->r.height = cpu_to_le32(height);
+	cmd_p->r.x = cpu_to_le32(x);
+	cmd_p->r.y = cpu_to_le32(y);
+
+	virtio_gpu_queue_ctrl_buffer(vgdev, vbuf);
+}
+
+void virtio_gpu_cmd_transfer_to_host_2d(struct virtio_gpu_device *vgdev,
+					uint32_t resource_id, uint64_t offset,
+					__le32 width, __le32 height,
+					__le32 x, __le32 y,
+					struct virtio_gpu_fence **fence)
+{
+	struct virtio_gpu_transfer_to_host_2d *cmd_p;
+	struct virtio_gpu_vbuffer *vbuf;
+
+	cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p));
+	memset(cmd_p, 0, sizeof(*cmd_p));
+
+	cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D);
+	cmd_p->resource_id = cpu_to_le32(resource_id);
+	cmd_p->offset = cpu_to_le64(offset);
+	cmd_p->r.width = width;
+	cmd_p->r.height = height;
+	cmd_p->r.x = x;
+	cmd_p->r.y = y;
+
+	if (fence)
+		virtio_gpu_fence_emit(vgdev, &cmd_p->hdr, fence);
+	virtio_gpu_queue_ctrl_buffer(vgdev, vbuf);
+}
+
+static void
+virtio_gpu_cmd_resource_attach_backing(struct virtio_gpu_device *vgdev,
+				       uint32_t resource_id,
+				       struct virtio_gpu_mem_entry *ents,
+				       uint32_t nents,
+				       struct virtio_gpu_fence **fence)
+{
+	struct virtio_gpu_resource_attach_backing *cmd_p;
+	struct virtio_gpu_vbuffer *vbuf;
+
+	cmd_p = virtio_gpu_alloc_cmd(vgdev, &vbuf, sizeof(*cmd_p));
+	memset(cmd_p, 0, sizeof(*cmd_p));
+
+	cmd_p->hdr.type = cpu_to_le32(VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING);
+	cmd_p->resource_id = cpu_to_le32(resource_id);
+	cmd_p->nr_entries = cpu_to_le32(nents);
+
+	vbuf->data_buf = ents;
+	vbuf->data_size = sizeof(*ents) * nents;
+
+	if (fence)
+		virtio_gpu_fence_emit(vgdev, &cmd_p->hdr, fence);
+	virtio_gpu_queue_ctrl_buffer(vgdev, vbuf);
+}
+
+static void virtio_gpu_cmd_get_display_info_cb(struct virtio_gpu_device *vgdev,
+					       struct virtio_gpu_vbuffer *vbuf)
+{
+	struct virtio_gpu_resp_display_info *resp =
+		(struct virtio_gpu_resp_display_info *)vbuf->resp_buf;
+	int i;
+
+	spin_lock(&vgdev->display_info_lock);
+	for (i = 0; i < vgdev->num_scanouts; i++) {
+		vgdev->outputs[i].info = resp->pmodes[i];
+		if (resp->pmodes[i].enabled) {
+			DRM_DEBUG("output %d: %dx%d+%d+%d", i,
+				  le32_to_cpu(resp->pmodes[i].r.width),
+				  le32_to_cpu(resp->pmodes[i].r.height),
+				  le32_to_cpu(resp->pmodes[i].r.x),
+				  le32_to_cpu(resp->pmodes[i].r.y));
+		} else {
+			DRM_DEBUG("output %d: disabled", i);
+		}
+	}
+
+	spin_unlock(&vgdev->display_info_lock);
+	wake_up(&vgdev->resp_wq);
+
+	if (!drm_helper_hpd_irq_event(vgdev->ddev))
+		drm_kms_helper_hotplug_event(vgdev->ddev);
+}
+
+int virtio_gpu_cmd_get_display_info(struct virtio_gpu_device *vgdev)
+{
+	struct virtio_gpu_ctrl_hdr *cmd_p;
+	struct virtio_gpu_vbuffer *vbuf;
+	void *resp_buf;
+
+	resp_buf = kzalloc(sizeof(struct virtio_gpu_resp_display_info),
+			   GFP_KERNEL);
+	if (!resp_buf)
+		return -ENOMEM;
+
+	cmd_p = virtio_gpu_alloc_cmd_resp
+		(vgdev, &virtio_gpu_cmd_get_display_info_cb, &vbuf,
+		 sizeof(*cmd_p), sizeof(struct virtio_gpu_resp_display_info),
+		 resp_buf);
+	memset(cmd_p, 0, sizeof(*cmd_p));
+
+	cmd_p->type = cpu_to_le32(VIRTIO_GPU_CMD_GET_DISPLAY_INFO);
+	virtio_gpu_queue_ctrl_buffer(vgdev, vbuf);
+	return 0;
+}
+
+int virtio_gpu_object_attach(struct virtio_gpu_device *vgdev,
+			     struct virtio_gpu_object *obj,
+			     uint32_t resource_id,
+			     struct virtio_gpu_fence **fence)
+{
+	struct virtio_gpu_mem_entry *ents;
+	struct scatterlist *sg;
+	int si;
+
+	if (!obj->pages) {
+		int ret;
+		ret = virtio_gpu_object_get_sg_table(vgdev, obj);
+		if (ret)
+			return ret;
+	}
+
+	/* gets freed when the ring has consumed it */
+	ents = kmalloc_array(obj->pages->nents,
+			     sizeof(struct virtio_gpu_mem_entry),
+			     GFP_KERNEL);
+	if (!ents) {
+		DRM_ERROR("failed to allocate ent list\n");
+		return -ENOMEM;
+	}
+
+	for_each_sg(obj->pages->sgl, sg, obj->pages->nents, si) {
+		ents[si].addr = cpu_to_le64(sg_phys(sg));
+		ents[si].length = cpu_to_le32(sg->length);
+		ents[si].padding = 0;
+	}
+
+	virtio_gpu_cmd_resource_attach_backing(vgdev, resource_id,
+					       ents, obj->pages->nents,
+					       fence);
+	obj->hw_res_handle = resource_id;
+	return 0;
+}
+
+void virtio_gpu_cursor_ping(struct virtio_gpu_device *vgdev,
+			    struct virtio_gpu_output *output)
+{
+	struct virtio_gpu_vbuffer *vbuf;
+	struct virtio_gpu_update_cursor *cur_p;
+
+	output->cursor.pos.scanout_id = cpu_to_le32(output->index);
+	cur_p = virtio_gpu_alloc_cursor(vgdev, &vbuf);
+	memcpy(cur_p, &output->cursor, sizeof(output->cursor));
+	virtio_gpu_queue_cursor(vgdev, vbuf);
+}
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index df6d9970d9a4..59ce5587ed90 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -814,6 +814,7 @@ struct drm_device {
 #endif
 
 	struct platform_device *platformdev; /**< Platform device struture */
+	struct virtio_device *virtdev;
 
 	struct drm_sg_mem *sg;	/**< Scatter gather memory */
 	unsigned int num_crtcs;                  /**< Number of CRTCs on this device */
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 1a0006a76b00..4460e5820b0e 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -430,6 +430,7 @@ header-y += virtio_balloon.h
 header-y += virtio_blk.h
 header-y += virtio_config.h
 header-y += virtio_console.h
+header-y += virtio_gpu.h
 header-y += virtio_ids.h
 header-y += virtio_input.h
 header-y += virtio_net.h
diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h
new file mode 100644
index 000000000000..571c4cbd3f93
--- /dev/null
+++ b/include/uapi/linux/virtio_gpu.h
@@ -0,0 +1,204 @@
+/*
+ * Virtio GPU Device
+ *
+ * Copyright Red Hat, Inc. 2013-2014
+ *
+ * Authors:
+ *     Dave Airlie <airlied@redhat.com>
+ *     Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * This header is BSD licensed so anyone can use the definitions
+ * to implement compatible drivers/servers:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef VIRTIO_GPU_HW_H
+#define VIRTIO_GPU_HW_H
+
+enum virtio_gpu_ctrl_type {
+	VIRTIO_GPU_UNDEFINED = 0,
+
+	/* 2d commands */
+	VIRTIO_GPU_CMD_GET_DISPLAY_INFO = 0x0100,
+	VIRTIO_GPU_CMD_RESOURCE_CREATE_2D,
+	VIRTIO_GPU_CMD_RESOURCE_UNREF,
+	VIRTIO_GPU_CMD_SET_SCANOUT,
+	VIRTIO_GPU_CMD_RESOURCE_FLUSH,
+	VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D,
+	VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING,
+	VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING,
+
+	/* cursor commands */
+	VIRTIO_GPU_CMD_UPDATE_CURSOR = 0x0300,
+	VIRTIO_GPU_CMD_MOVE_CURSOR,
+
+	/* success responses */
+	VIRTIO_GPU_RESP_OK_NODATA = 0x1100,
+	VIRTIO_GPU_RESP_OK_DISPLAY_INFO,
+
+	/* error responses */
+	VIRTIO_GPU_RESP_ERR_UNSPEC = 0x1200,
+	VIRTIO_GPU_RESP_ERR_OUT_OF_MEMORY,
+	VIRTIO_GPU_RESP_ERR_INVALID_SCANOUT_ID,
+	VIRTIO_GPU_RESP_ERR_INVALID_RESOURCE_ID,
+	VIRTIO_GPU_RESP_ERR_INVALID_CONTEXT_ID,
+	VIRTIO_GPU_RESP_ERR_INVALID_PARAMETER,
+};
+
+#define VIRTIO_GPU_FLAG_FENCE (1 << 0)
+
+struct virtio_gpu_ctrl_hdr {
+	__le32 type;
+	__le32 flags;
+	__le64 fence_id;
+	__le32 ctx_id;
+	__le32 padding;
+};
+
+/* data passed in the cursor vq */
+
+struct virtio_gpu_cursor_pos {
+	__le32 scanout_id;
+	__le32 x;
+	__le32 y;
+	__le32 padding;
+};
+
+/* VIRTIO_GPU_CMD_UPDATE_CURSOR, VIRTIO_GPU_CMD_MOVE_CURSOR */
+struct virtio_gpu_update_cursor {
+	struct virtio_gpu_ctrl_hdr hdr;
+	struct virtio_gpu_cursor_pos pos;  /* update & move */
+	__le32 resource_id;           /* update only */
+	__le32 hot_x;                 /* update only */
+	__le32 hot_y;                 /* update only */
+	__le32 padding;
+};
+
+/* data passed in the control vq, 2d related */
+
+struct virtio_gpu_rect {
+	__le32 x;
+	__le32 y;
+	__le32 width;
+	__le32 height;
+};
+
+/* VIRTIO_GPU_CMD_RESOURCE_UNREF */
+struct virtio_gpu_resource_unref {
+	struct virtio_gpu_ctrl_hdr hdr;
+	__le32 resource_id;
+	__le32 padding;
+};
+
+/* VIRTIO_GPU_CMD_RESOURCE_CREATE_2D: create a 2d resource with a format */
+struct virtio_gpu_resource_create_2d {
+	struct virtio_gpu_ctrl_hdr hdr;
+	__le32 resource_id;
+	__le32 format;
+	__le32 width;
+	__le32 height;
+};
+
+/* VIRTIO_GPU_CMD_SET_SCANOUT */
+struct virtio_gpu_set_scanout {
+	struct virtio_gpu_ctrl_hdr hdr;
+	struct virtio_gpu_rect r;
+	__le32 scanout_id;
+	__le32 resource_id;
+};
+
+/* VIRTIO_GPU_CMD_RESOURCE_FLUSH */
+struct virtio_gpu_resource_flush {
+	struct virtio_gpu_ctrl_hdr hdr;
+	struct virtio_gpu_rect r;
+	__le32 resource_id;
+	__le32 padding;
+};
+
+/* VIRTIO_GPU_CMD_TRANSFER_TO_HOST_2D: simple transfer to_host */
+struct virtio_gpu_transfer_to_host_2d {
+	struct virtio_gpu_ctrl_hdr hdr;
+	struct virtio_gpu_rect r;
+	__le64 offset;
+	__le32 resource_id;
+	__le32 padding;
+};
+
+struct virtio_gpu_mem_entry {
+	__le64 addr;
+	__le32 length;
+	__le32 padding;
+};
+
+/* VIRTIO_GPU_CMD_RESOURCE_ATTACH_BACKING */
+struct virtio_gpu_resource_attach_backing {
+	struct virtio_gpu_ctrl_hdr hdr;
+	__le32 resource_id;
+	__le32 nr_entries;
+};
+
+/* VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING */
+struct virtio_gpu_resource_detach_backing {
+	struct virtio_gpu_ctrl_hdr hdr;
+	__le32 resource_id;
+	__le32 padding;
+};
+
+/* VIRTIO_GPU_RESP_OK_DISPLAY_INFO */
+#define VIRTIO_GPU_MAX_SCANOUTS 16
+struct virtio_gpu_resp_display_info {
+	struct virtio_gpu_ctrl_hdr hdr;
+	struct virtio_gpu_display_one {
+		struct virtio_gpu_rect r;
+		__le32 enabled;
+		__le32 flags;
+	} pmodes[VIRTIO_GPU_MAX_SCANOUTS];
+};
+
+#define VIRTIO_GPU_EVENT_DISPLAY (1 << 0)
+
+struct virtio_gpu_config {
+	__u32 events_read;
+	__u32 events_clear;
+	__u32 num_scanouts;
+	__u32 reserved;
+};
+
+/* simple formats for fbcon/X use */
+enum virtio_gpu_formats {
+	VIRTIO_GPU_FORMAT_B8G8R8A8_UNORM  = 1,
+	VIRTIO_GPU_FORMAT_B8G8R8X8_UNORM  = 2,
+	VIRTIO_GPU_FORMAT_A8R8G8B8_UNORM  = 3,
+	VIRTIO_GPU_FORMAT_X8R8G8B8_UNORM  = 4,
+
+	VIRTIO_GPU_FORMAT_R8G8B8A8_UNORM  = 67,
+	VIRTIO_GPU_FORMAT_X8B8G8R8_UNORM  = 68,
+
+	VIRTIO_GPU_FORMAT_A8B8G8R8_UNORM  = 121,
+	VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM  = 134,
+};
+
+#endif
diff --git a/include/uapi/linux/virtio_ids.h b/include/uapi/linux/virtio_ids.h
index 5f60aa4be50a..77925f587b15 100644
--- a/include/uapi/linux/virtio_ids.h
+++ b/include/uapi/linux/virtio_ids.h
@@ -39,6 +39,7 @@
 #define VIRTIO_ID_9P		9 /* 9p virtio console */
 #define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */
 #define VIRTIO_ID_CAIF	       12 /* Virtio caif */
+#define VIRTIO_ID_GPU          16 /* virtio GPU */
 #define VIRTIO_ID_INPUT        18 /* virtio input */
 
 #endif /* _LINUX_VIRTIO_IDS_H */
-- 
cgit 


From 3896d655f4d491c67d669a15f275a39f713410f8 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Tue, 2 Jun 2015 16:03:14 -0700
Subject: bpf: introduce bpf_clone_redirect() helper

Allow eBPF programs attached to classifier/actions to call
bpf_clone_redirect(skb, ifindex, flags) helper which will
mirror or redirect the packet by dynamic ifindex selection
from within the program to a target device either at ingress
or at egress. Can be used for various scenarios, for example,
to load balance skbs into veths, split parts of the traffic
to local taps, etc.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/bpf.h | 10 ++++++++++
 net/core/filter.c        | 40 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 50 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 72f3080afa1e..42aa19abab86 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -220,6 +220,16 @@ enum bpf_func_id {
 	 * Return: 0 on success
 	 */
 	BPF_FUNC_tail_call,
+
+	/**
+	 * bpf_clone_redirect(skb, ifindex, flags) - redirect to another netdev
+	 * @skb: pointer to skb
+	 * @ifindex: ifindex of the net device
+	 * @flags: bit 0 - if set, redirect to ingress instead of egress
+	 *         other bits - reserved
+	 * Return: 0 on success
+	 */
+	BPF_FUNC_clone_redirect,
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/net/core/filter.c b/net/core/filter.c
index b78a010a957f..64c121c09655 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -46,6 +46,7 @@
 #include <linux/seccomp.h>
 #include <linux/if_vlan.h>
 #include <linux/bpf.h>
+#include <net/sch_generic.h>
 
 /**
  *	sk_filter - run a packet through a socket filter
@@ -1407,6 +1408,43 @@ const struct bpf_func_proto bpf_l4_csum_replace_proto = {
 	.arg5_type	= ARG_ANYTHING,
 };
 
+#define BPF_IS_REDIRECT_INGRESS(flags)	((flags) & 1)
+
+static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5)
+{
+	struct sk_buff *skb = (struct sk_buff *) (long) r1, *skb2;
+	struct net_device *dev;
+
+	dev = dev_get_by_index_rcu(dev_net(skb->dev), ifindex);
+	if (unlikely(!dev))
+		return -EINVAL;
+
+	if (unlikely(!(dev->flags & IFF_UP)))
+		return -EINVAL;
+
+	skb2 = skb_clone(skb, GFP_ATOMIC);
+	if (unlikely(!skb2))
+		return -ENOMEM;
+
+	if (G_TC_AT(skb2->tc_verd) & AT_INGRESS)
+		skb_push(skb2, skb2->mac_len);
+
+	if (BPF_IS_REDIRECT_INGRESS(flags))
+		return dev_forward_skb(dev, skb2);
+
+	skb2->dev = dev;
+	return dev_queue_xmit(skb2);
+}
+
+const struct bpf_func_proto bpf_clone_redirect_proto = {
+	.func           = bpf_clone_redirect,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_ANYTHING,
+	.arg3_type      = ARG_ANYTHING,
+};
+
 static const struct bpf_func_proto *
 sk_filter_func_proto(enum bpf_func_id func_id)
 {
@@ -1440,6 +1478,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 		return &bpf_l3_csum_replace_proto;
 	case BPF_FUNC_l4_csum_replace:
 		return &bpf_l4_csum_replace_proto;
+	case BPF_FUNC_clone_redirect:
+		return &bpf_clone_redirect_proto;
 	default:
 		return sk_filter_func_proto(func_id);
 	}
-- 
cgit 


From f077825a8758d79838a757dafb79adcdd047ef3a Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 1 Apr 2015 15:06:40 +0200
Subject: KVM: x86: API changes for SMM support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch includes changes to the external API for SMM support.
Userspace can predicate the availability of the new fields and
ioctls on a new capability, KVM_CAP_X86_SMM, which is added at the end
of the patch series.

Reviewed-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/api.txt | 40 +++++++++++++++++++++++++++++++++------
 arch/x86/include/asm/kvm_host.h   |  3 +++
 arch/x86/include/uapi/asm/kvm.h   | 11 ++++++++++-
 arch/x86/kvm/kvm_cache_regs.h     |  5 +++++
 arch/x86/kvm/lapic.h              |  5 +++++
 arch/x86/kvm/x86.c                | 40 +++++++++++++++++++++++++++++++++++++--
 include/uapi/linux/kvm.h          |  5 ++++-
 7 files changed, 99 insertions(+), 10 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 695544420ff2..2ddefd58b1aa 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -820,11 +820,21 @@ struct kvm_vcpu_events {
 	} nmi;
 	__u32 sipi_vector;
 	__u32 flags;
+	struct {
+		__u8 smm;
+		__u8 pending;
+		__u8 smm_inside_nmi;
+		__u8 latched_init;
+	} smi;
 };
 
-KVM_VCPUEVENT_VALID_SHADOW may be set in the flags field to signal that
-interrupt.shadow contains a valid state. Otherwise, this field is undefined.
+Only two fields are defined in the flags field:
+
+- KVM_VCPUEVENT_VALID_SHADOW may be set in the flags field to signal that
+  interrupt.shadow contains a valid state.
 
+- KVM_VCPUEVENT_VALID_SMM may be set in the flags field to signal that
+  smi contains a valid state.
 
 4.32 KVM_SET_VCPU_EVENTS
 
@@ -841,17 +851,20 @@ vcpu.
 See KVM_GET_VCPU_EVENTS for the data structure.
 
 Fields that may be modified asynchronously by running VCPUs can be excluded
-from the update. These fields are nmi.pending and sipi_vector. Keep the
-corresponding bits in the flags field cleared to suppress overwriting the
-current in-kernel state. The bits are:
+from the update. These fields are nmi.pending, sipi_vector, smi.smm,
+smi.pending. Keep the corresponding bits in the flags field cleared to
+suppress overwriting the current in-kernel state. The bits are:
 
 KVM_VCPUEVENT_VALID_NMI_PENDING - transfer nmi.pending to the kernel
 KVM_VCPUEVENT_VALID_SIPI_VECTOR - transfer sipi_vector
+KVM_VCPUEVENT_VALID_SMM         - transfer the smi sub-struct.
 
 If KVM_CAP_INTR_SHADOW is available, KVM_VCPUEVENT_VALID_SHADOW can be set in
 the flags field to signal that interrupt.shadow contains a valid state and
 shall be written into the VCPU.
 
+KVM_VCPUEVENT_VALID_SMM can only be set if KVM_CAP_X86_SMM is available.
+
 
 4.33 KVM_GET_DEBUGREGS
 
@@ -2979,6 +2992,16 @@ len must be a multiple of sizeof(struct kvm_s390_irq). It must be > 0
 and it must not exceed (max_vcpus + 32) * sizeof(struct kvm_s390_irq),
 which is the maximum number of possibly pending cpu-local interrupts.
 
+4.90 KVM_SMI
+
+Capability: KVM_CAP_X86_SMM
+Architectures: x86
+Type: vcpu ioctl
+Parameters: none
+Returns: 0 on success, -1 on error
+
+Queues an SMI on the thread's vcpu.
+
 5. The kvm_run structure
 ------------------------
 
@@ -3014,7 +3037,12 @@ an interrupt can be injected now with KVM_INTERRUPT.
 The value of the current interrupt flag.  Only valid if in-kernel
 local APIC is not used.
 
-	__u8 padding2[2];
+	__u16 flags;
+
+More architecture-specific flags detailing state of the VCPU that may
+affect the device's behavior.  The only currently defined flag is
+KVM_RUN_X86_SMM, which is valid on x86 machines and is set if the
+VCPU is in system management mode.
 
 	/* in (pre_kvm_run), out (post_kvm_run) */
 	__u64 cr8;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 4e299fcd0eb6..d52d7aea375f 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -471,6 +471,7 @@ struct kvm_vcpu_arch {
 	atomic_t nmi_queued;  /* unprocessed asynchronous NMIs */
 	unsigned nmi_pending; /* NMI queued after currently running handler */
 	bool nmi_injected;    /* Trying to inject an NMI this entry */
+	bool smi_pending;    /* SMI queued after currently running handler */
 
 	struct mtrr_state_type mtrr_state;
 	u64 pat;
@@ -1115,6 +1116,8 @@ enum {
 #define HF_NMI_MASK		(1 << 3)
 #define HF_IRET_MASK		(1 << 4)
 #define HF_GUEST_MASK		(1 << 5) /* VCPU is in guest-mode */
+#define HF_SMM_MASK		(1 << 6)
+#define HF_SMM_INSIDE_NMI_MASK	(1 << 7)
 
 /*
  * Hardware virtualization extension instructions may fault if a
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 2fec75e4b1e1..a4ae82eb82aa 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -106,6 +106,8 @@ struct kvm_ioapic_state {
 #define KVM_IRQCHIP_IOAPIC       2
 #define KVM_NR_IRQCHIPS          3
 
+#define KVM_RUN_X86_SMM		 (1 << 0)
+
 /* for KVM_GET_REGS and KVM_SET_REGS */
 struct kvm_regs {
 	/* out (KVM_GET_REGS) / in (KVM_SET_REGS) */
@@ -281,6 +283,7 @@ struct kvm_reinject_control {
 #define KVM_VCPUEVENT_VALID_NMI_PENDING	0x00000001
 #define KVM_VCPUEVENT_VALID_SIPI_VECTOR	0x00000002
 #define KVM_VCPUEVENT_VALID_SHADOW	0x00000004
+#define KVM_VCPUEVENT_VALID_SMM		0x00000008
 
 /* Interrupt shadow states */
 #define KVM_X86_SHADOW_INT_MOV_SS	0x01
@@ -309,7 +312,13 @@ struct kvm_vcpu_events {
 	} nmi;
 	__u32 sipi_vector;
 	__u32 flags;
-	__u32 reserved[10];
+	struct {
+		__u8 smm;
+		__u8 pending;
+		__u8 smm_inside_nmi;
+		__u8 latched_init;
+	} smi;
+	__u32 reserved[9];
 };
 
 /* for KVM_GET/SET_DEBUGREGS */
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index 544076c4f44b..e1e89ee4af75 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -99,4 +99,9 @@ static inline bool is_guest_mode(struct kvm_vcpu *vcpu)
 	return vcpu->arch.hflags & HF_GUEST_MASK;
 }
 
+static inline bool is_smm(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.hflags & HF_SMM_MASK;
+}
+
 #endif
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 9d8fcde52027..f2f4e10ab772 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -159,6 +159,11 @@ static inline bool kvm_lowest_prio_delivery(struct kvm_lapic_irq *irq)
 			irq->msi_redir_hint);
 }
 
+static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
+{
+	return kvm_vcpu_has_lapic(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
+}
+
 bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
 
 void wait_lapic_expire(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 7aec25f2f45c..aa46ac1ff48b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3101,6 +3101,11 @@ static int kvm_vcpu_ioctl_nmi(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
+static int kvm_vcpu_ioctl_smi(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+
 static int vcpu_ioctl_tpr_access_reporting(struct kvm_vcpu *vcpu,
 					   struct kvm_tpr_access_ctl *tac)
 {
@@ -3206,8 +3211,15 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
 
 	events->sipi_vector = 0; /* never valid when reporting to user space */
 
+	events->smi.smm = is_smm(vcpu);
+	events->smi.pending = vcpu->arch.smi_pending;
+	events->smi.smm_inside_nmi =
+		!!(vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK);
+	events->smi.latched_init = kvm_lapic_latched_init(vcpu);
+
 	events->flags = (KVM_VCPUEVENT_VALID_NMI_PENDING
-			 | KVM_VCPUEVENT_VALID_SHADOW);
+			 | KVM_VCPUEVENT_VALID_SHADOW
+			 | KVM_VCPUEVENT_VALID_SMM);
 	memset(&events->reserved, 0, sizeof(events->reserved));
 }
 
@@ -3216,7 +3228,8 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
 {
 	if (events->flags & ~(KVM_VCPUEVENT_VALID_NMI_PENDING
 			      | KVM_VCPUEVENT_VALID_SIPI_VECTOR
-			      | KVM_VCPUEVENT_VALID_SHADOW))
+			      | KVM_VCPUEVENT_VALID_SHADOW
+			      | KVM_VCPUEVENT_VALID_SMM))
 		return -EINVAL;
 
 	process_nmi(vcpu);
@@ -3241,6 +3254,24 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
 	    kvm_vcpu_has_lapic(vcpu))
 		vcpu->arch.apic->sipi_vector = events->sipi_vector;
 
+	if (events->flags & KVM_VCPUEVENT_VALID_SMM) {
+		if (events->smi.smm)
+			vcpu->arch.hflags |= HF_SMM_MASK;
+		else
+			vcpu->arch.hflags &= ~HF_SMM_MASK;
+		vcpu->arch.smi_pending = events->smi.pending;
+		if (events->smi.smm_inside_nmi)
+			vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
+		else
+			vcpu->arch.hflags &= ~HF_SMM_INSIDE_NMI_MASK;
+		if (kvm_vcpu_has_lapic(vcpu)) {
+			if (events->smi.latched_init)
+				set_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
+			else
+				clear_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events);
+		}
+	}
+
 	kvm_make_request(KVM_REQ_EVENT, vcpu);
 
 	return 0;
@@ -3500,6 +3531,10 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		r = kvm_vcpu_ioctl_nmi(vcpu);
 		break;
 	}
+	case KVM_SMI: {
+		r = kvm_vcpu_ioctl_smi(vcpu);
+		break;
+	}
 	case KVM_SET_CPUID: {
 		struct kvm_cpuid __user *cpuid_arg = argp;
 		struct kvm_cpuid cpuid;
@@ -6182,6 +6217,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
 	struct kvm_run *kvm_run = vcpu->run;
 
 	kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
+	kvm_run->flags = is_smm(vcpu) ? KVM_RUN_X86_SMM : 0;
 	kvm_run->cr8 = kvm_get_cr8(vcpu);
 	kvm_run->apic_base = kvm_get_apic_base(vcpu);
 	if (irqchip_in_kernel(vcpu->kvm))
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 75bd9f7fd846..eace8babd227 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -202,7 +202,7 @@ struct kvm_run {
 	__u32 exit_reason;
 	__u8 ready_for_interrupt_injection;
 	__u8 if_flag;
-	__u8 padding2[2];
+	__u16 flags;
 
 	/* in (pre_kvm_run), out (post_kvm_run) */
 	__u64 cr8;
@@ -815,6 +815,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_S390_IRQ_STATE 114
 #define KVM_CAP_PPC_HWRNG 115
 #define KVM_CAP_DISABLE_QUIRKS 116
+#define KVM_CAP_X86_SMM 117
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1200,6 +1201,8 @@ struct kvm_s390_ucas_mapping {
 /* Available with KVM_CAP_S390_IRQ_STATE */
 #define KVM_S390_SET_IRQ_STATE	  _IOW(KVMIO, 0xb5, struct kvm_s390_irq_state)
 #define KVM_S390_GET_IRQ_STATE	  _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state)
+/* Available with KVM_CAP_X86_SMM */
+#define KVM_SMI                   _IO(KVMIO,   0xb7)
 
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
 #define KVM_DEV_ASSIGN_PCI_2_3		(1 << 1)
-- 
cgit 


From 730fc4371333636a00fed32c587fc1e85c5367e2 Mon Sep 17 00:00:00 2001
From: Tom Herbert <tom@herbertland.com>
Date: Thu, 4 Jun 2015 09:16:37 -0700
Subject: mpls: Add definition for IPPROTO_MPLS

Add uapi define for MPLS over IP.

Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Tom Herbert <tom@herbertland.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/in.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index 589ced069e8a..641338bef651 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -69,6 +69,8 @@ enum {
 #define IPPROTO_SCTP		IPPROTO_SCTP
   IPPROTO_UDPLITE = 136,	/* UDP-Lite (RFC 3828)			*/
 #define IPPROTO_UDPLITE		IPPROTO_UDPLITE
+  IPPROTO_MPLS = 137,		/* MPLS in IP (RFC 4023)		*/
+#define IPPROTO_MPLS		IPPROTO_MPLS
   IPPROTO_RAW = 255,		/* Raw IP packets			*/
 #define IPPROTO_RAW		IPPROTO_RAW
   IPPROTO_MAX
-- 
cgit 


From ae45577324d1f749c907840247d443696ac3bc7a Mon Sep 17 00:00:00 2001
From: Dave Airlie <airlied@redhat.com>
Date: Fri, 5 Jun 2015 12:31:12 +1000
Subject: virtgpu: include linux/types.h to avoid warning.

Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/uapi/linux/virtio_gpu.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h
index 571c4cbd3f93..478be5270e26 100644
--- a/include/uapi/linux/virtio_gpu.h
+++ b/include/uapi/linux/virtio_gpu.h
@@ -38,6 +38,8 @@
 #ifndef VIRTIO_GPU_HW_H
 #define VIRTIO_GPU_HW_H
 
+#include <linux/types.h>
+
 enum virtio_gpu_ctrl_type {
 	VIRTIO_GPU_UNDEFINED = 0,
 
-- 
cgit 


From 74fdcb2ee1786a92584e89c91006e449813527ce Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hans.verkuil@cisco.com>
Date: Tue, 28 Apr 2015 08:49:09 -0300
Subject: [media] videodev2.h: add support for transfer functions

In the past the transfer function was implied by the colorspace. However,
it is an independent entity in its own right. Add support for explicitly
choosing the transfer function.

This change will allow us to represent linear RGB (as is used by openGL), and
it will make it easier to work with decoded video material since most codecs
store the transfer function as a separate property as well.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 drivers/media/v4l2-core/v4l2-ioctl.c |  9 ++++----
 include/media/v4l2-mediabus.h        |  2 ++
 include/uapi/linux/v4l2-mediabus.h   |  4 +++-
 include/uapi/linux/videodev2.h       | 41 +++++++++++++++++++++++++++++++++++-
 4 files changed, 50 insertions(+), 6 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/media/v4l2-core/v4l2-ioctl.c b/drivers/media/v4l2-core/v4l2-ioctl.c
index 368bc3a973d4..691295984225 100644
--- a/drivers/media/v4l2-core/v4l2-ioctl.c
+++ b/drivers/media/v4l2-core/v4l2-ioctl.c
@@ -257,7 +257,8 @@ static void v4l_print_format(const void *arg, bool write_only)
 		pr_cont(", width=%u, height=%u, "
 			"pixelformat=%c%c%c%c, field=%s, "
 			"bytesperline=%u, sizeimage=%u, colorspace=%d, "
-			"flags=0x%x, ycbcr_enc=%u, quantization=%u\n",
+			"flags=0x%x, ycbcr_enc=%u, quantization=%u, "
+			"xfer_func=%u\n",
 			pix->width, pix->height,
 			(pix->pixelformat & 0xff),
 			(pix->pixelformat >>  8) & 0xff,
@@ -266,7 +267,7 @@ static void v4l_print_format(const void *arg, bool write_only)
 			prt_names(pix->field, v4l2_field_names),
 			pix->bytesperline, pix->sizeimage,
 			pix->colorspace, pix->flags, pix->ycbcr_enc,
-			pix->quantization);
+			pix->quantization, pix->xfer_func);
 		break;
 	case V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE:
 	case V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE:
@@ -274,7 +275,7 @@ static void v4l_print_format(const void *arg, bool write_only)
 		pr_cont(", width=%u, height=%u, "
 			"format=%c%c%c%c, field=%s, "
 			"colorspace=%d, num_planes=%u, flags=0x%x, "
-			"ycbcr_enc=%u, quantization=%u\n",
+			"ycbcr_enc=%u, quantization=%u, xfer_func=%u\n",
 			mp->width, mp->height,
 			(mp->pixelformat & 0xff),
 			(mp->pixelformat >>  8) & 0xff,
@@ -282,7 +283,7 @@ static void v4l_print_format(const void *arg, bool write_only)
 			(mp->pixelformat >> 24) & 0xff,
 			prt_names(mp->field, v4l2_field_names),
 			mp->colorspace, mp->num_planes, mp->flags,
-			mp->ycbcr_enc, mp->quantization);
+			mp->ycbcr_enc, mp->quantization, mp->xfer_func);
 		for (i = 0; i < mp->num_planes; i++)
 			printk(KERN_DEBUG "plane %u: bytesperline=%u sizeimage=%u\n", i,
 					mp->plane_fmt[i].bytesperline,
diff --git a/include/media/v4l2-mediabus.h b/include/media/v4l2-mediabus.h
index 38d960d8dccd..73069e4c2796 100644
--- a/include/media/v4l2-mediabus.h
+++ b/include/media/v4l2-mediabus.h
@@ -96,6 +96,7 @@ static inline void v4l2_fill_pix_format(struct v4l2_pix_format *pix_fmt,
 	pix_fmt->colorspace = mbus_fmt->colorspace;
 	pix_fmt->ycbcr_enc = mbus_fmt->ycbcr_enc;
 	pix_fmt->quantization = mbus_fmt->quantization;
+	pix_fmt->xfer_func = mbus_fmt->xfer_func;
 }
 
 static inline void v4l2_fill_mbus_format(struct v4l2_mbus_framefmt *mbus_fmt,
@@ -108,6 +109,7 @@ static inline void v4l2_fill_mbus_format(struct v4l2_mbus_framefmt *mbus_fmt,
 	mbus_fmt->colorspace = pix_fmt->colorspace;
 	mbus_fmt->ycbcr_enc = pix_fmt->ycbcr_enc;
 	mbus_fmt->quantization = pix_fmt->quantization;
+	mbus_fmt->xfer_func = pix_fmt->xfer_func;
 	mbus_fmt->code = code;
 }
 
diff --git a/include/uapi/linux/v4l2-mediabus.h b/include/uapi/linux/v4l2-mediabus.h
index 26db20647e6f..9cac6325cc7e 100644
--- a/include/uapi/linux/v4l2-mediabus.h
+++ b/include/uapi/linux/v4l2-mediabus.h
@@ -24,6 +24,7 @@
  * @colorspace:	colorspace of the data (from enum v4l2_colorspace)
  * @ycbcr_enc:	YCbCr encoding of the data (from enum v4l2_ycbcr_encoding)
  * @quantization: quantization of the data (from enum v4l2_quantization)
+ * @xfer_func:  transfer function of the data (from enum v4l2_xfer_func)
  */
 struct v4l2_mbus_framefmt {
 	__u32			width;
@@ -33,7 +34,8 @@ struct v4l2_mbus_framefmt {
 	__u32			colorspace;
 	__u16			ycbcr_enc;
 	__u16			quantization;
-	__u32			reserved[6];
+	__u16			xfer_func;
+	__u16			reserved[11];
 };
 
 #ifndef __KERNEL__
diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 003a91292a4f..3d5fc72d53a7 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -240,6 +240,42 @@ enum v4l2_colorspace {
 	((is_sdtv) ? V4L2_COLORSPACE_SMPTE170M : \
 	 ((is_hdtv) ? V4L2_COLORSPACE_REC709 : V4L2_COLORSPACE_SRGB))
 
+enum v4l2_xfer_func {
+	/*
+	 * Mapping of V4L2_XFER_FUNC_DEFAULT to actual transfer functions
+	 * for the various colorspaces:
+	 *
+	 * V4L2_COLORSPACE_SMPTE170M, V4L2_COLORSPACE_470_SYSTEM_M,
+	 * V4L2_COLORSPACE_470_SYSTEM_BG, V4L2_COLORSPACE_REC709 and
+	 * V4L2_COLORSPACE_BT2020: V4L2_XFER_FUNC_709
+	 *
+	 * V4L2_COLORSPACE_SRGB, V4L2_COLORSPACE_JPEG: V4L2_XFER_FUNC_SRGB
+	 *
+	 * V4L2_COLORSPACE_ADOBERGB: V4L2_XFER_FUNC_ADOBERGB
+	 *
+	 * V4L2_COLORSPACE_SMPTE240M: V4L2_XFER_FUNC_SMPTE240M
+	 *
+	 * V4L2_COLORSPACE_RAW: V4L2_XFER_FUNC_NONE
+	 */
+	V4L2_XFER_FUNC_DEFAULT     = 0,
+	V4L2_XFER_FUNC_709         = 1,
+	V4L2_XFER_FUNC_SRGB        = 2,
+	V4L2_XFER_FUNC_ADOBERGB    = 3,
+	V4L2_XFER_FUNC_SMPTE240M   = 4,
+	V4L2_XFER_FUNC_NONE        = 5,
+};
+
+/*
+ * Determine how XFER_FUNC_DEFAULT should map to a proper transfer function.
+ * This depends on the colorspace.
+ */
+#define V4L2_MAP_XFER_FUNC_DEFAULT(colsp) \
+	((colsp) == V4L2_XFER_FUNC_ADOBERGB ? V4L2_XFER_FUNC_ADOBERGB : \
+	 ((colsp) == V4L2_COLORSPACE_SMPTE240M ? V4L2_XFER_FUNC_SMPTE240M : \
+	  ((colsp) == V4L2_COLORSPACE_RAW ? V4L2_XFER_FUNC_NONE : \
+	   ((colsp) == V4L2_COLORSPACE_SRGB || (colsp) == V4L2_COLORSPACE_JPEG ? \
+	    V4L2_XFER_FUNC_SRGB : V4L2_XFER_FUNC_709))))
+
 enum v4l2_ycbcr_encoding {
 	/*
 	 * Mapping of V4L2_YCBCR_ENC_DEFAULT to actual encodings for the
@@ -409,6 +445,7 @@ struct v4l2_pix_format {
 	__u32			flags;		/* format flags (V4L2_PIX_FMT_FLAG_*) */
 	__u32			ycbcr_enc;	/* enum v4l2_ycbcr_encoding */
 	__u32			quantization;	/* enum v4l2_quantization */
+	__u32			xfer_func;	/* enum v4l2_xfer_func */
 };
 
 /*      Pixel format         FOURCC                          depth  Description  */
@@ -1907,6 +1944,7 @@ struct v4l2_plane_pix_format {
  * @flags:		format flags (V4L2_PIX_FMT_FLAG_*)
  * @ycbcr_enc:		enum v4l2_ycbcr_encoding, Y'CbCr encoding
  * @quantization:	enum v4l2_quantization, colorspace quantization
+ * @xfer_func:		enum v4l2_xfer_func, colorspace transfer function
  */
 struct v4l2_pix_format_mplane {
 	__u32				width;
@@ -1920,7 +1958,8 @@ struct v4l2_pix_format_mplane {
 	__u8				flags;
 	__u8				ycbcr_enc;
 	__u8				quantization;
-	__u8				reserved[8];
+	__u8				xfer_func;
+	__u8				reserved[7];
 } __attribute__ ((packed));
 
 /**
-- 
cgit 


From f481b069e674378758c73761827e83ab05c46b52 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Sun, 17 May 2015 17:30:37 +0200
Subject: KVM: implement multiple address spaces
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Only two ioctls have to be modified; the address space id is
placed in the higher 16 bits of their slot id argument.

As of this patch, no architecture defines more than one
address space; x86 will be the first.

Reviewed-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 Documentation/virtual/kvm/api.txt        | 12 ++++++
 arch/powerpc/include/asm/kvm_book3s_64.h |  2 +-
 include/linux/kvm_host.h                 | 26 ++++++++++--
 include/uapi/linux/kvm.h                 |  1 +
 virt/kvm/kvm_main.c                      | 70 ++++++++++++++++++++------------
 5 files changed, 79 insertions(+), 32 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt
index 2ddefd58b1aa..461956a0ee8e 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -254,6 +254,11 @@ since the last call to this ioctl.  Bit 0 is the first page in the
 memory slot.  Ensure the entire structure is cleared to avoid padding
 issues.
 
+If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 specifies
+the address space for which you want to return the dirty bitmap.
+They must be less than the value that KVM_CHECK_EXTENSION returns for
+the KVM_CAP_MULTI_ADDRESS_SPACE capability.
+
 
 4.9 KVM_SET_MEMORY_ALIAS
 
@@ -924,6 +929,13 @@ slot.  When changing an existing slot, it may be moved in the guest
 physical memory space, or its flags may be modified.  It may not be
 resized.  Slots may not overlap in guest physical address space.
 
+If KVM_CAP_MULTI_ADDRESS_SPACE is available, bits 16-31 of "slot"
+specifies the address space which is being modified.  They must be
+less than the value that KVM_CHECK_EXTENSION returns for the
+KVM_CAP_MULTI_ADDRESS_SPACE capability.  Slots in separate address spaces
+are unrelated; the restriction on overlapping slots only applies within
+each address space.
+
 Memory for the region is taken starting at the address denoted by the
 field userspace_addr, which must point at user addressable memory for
 the entire memory slot size.  Any object may back this memory, including
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 3536d12eb798..2aa79c864e91 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -430,7 +430,7 @@ static inline void note_hpte_modification(struct kvm *kvm,
  */
 static inline struct kvm_memslots *kvm_memslots_raw(struct kvm *kvm)
 {
-	return rcu_dereference_raw_notrace(kvm->memslots);
+	return rcu_dereference_raw_notrace(kvm->memslots[0]);
 }
 
 extern void kvmppc_mmu_debugfs_init(struct kvm *kvm);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ba1ea43998e4..9564fd78c547 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -44,6 +44,10 @@
 /* Two fragments for cross MMIO pages. */
 #define KVM_MAX_MMIO_FRAGMENTS	2
 
+#ifndef KVM_ADDRESS_SPACE_NUM
+#define KVM_ADDRESS_SPACE_NUM	1
+#endif
+
 /*
  * For the normal pfn, the highest 12 bits should be zero,
  * so we can mask bit 62 ~ bit 52  to indicate the error pfn,
@@ -331,6 +335,13 @@ struct kvm_kernel_irq_routing_entry {
 #define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
 #endif
 
+#ifndef __KVM_VCPU_MULTIPLE_ADDRESS_SPACE
+static inline int kvm_arch_vcpu_memslots_id(struct kvm_vcpu *vcpu)
+{
+	return 0;
+}
+#endif
+
 /*
  * Note:
  * memslots are not sorted by id anymore, please use id_to_memslot()
@@ -349,7 +360,7 @@ struct kvm {
 	spinlock_t mmu_lock;
 	struct mutex slots_lock;
 	struct mm_struct *mm; /* userspace tied to this vm */
-	struct kvm_memslots *memslots;
+	struct kvm_memslots *memslots[KVM_ADDRESS_SPACE_NUM];
 	struct srcu_struct srcu;
 	struct srcu_struct irq_srcu;
 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
@@ -464,16 +475,23 @@ void kvm_exit(void);
 void kvm_get_kvm(struct kvm *kvm);
 void kvm_put_kvm(struct kvm *kvm);
 
-static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
+static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
 {
-	return rcu_dereference_check(kvm->memslots,
+	return rcu_dereference_check(kvm->memslots[as_id],
 			srcu_read_lock_held(&kvm->srcu)
 			|| lockdep_is_held(&kvm->slots_lock));
 }
 
+static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)
+{
+	return __kvm_memslots(kvm, 0);
+}
+
 static inline struct kvm_memslots *kvm_vcpu_memslots(struct kvm_vcpu *vcpu)
 {
-	return kvm_memslots(vcpu->kvm);
+	int as_id = kvm_arch_vcpu_memslots_id(vcpu);
+
+	return __kvm_memslots(vcpu->kvm, as_id);
 }
 
 static inline struct kvm_memory_slot *
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index eace8babd227..5ff1038437e3 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -816,6 +816,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_PPC_HWRNG 115
 #define KVM_CAP_DISABLE_QUIRKS 116
 #define KVM_CAP_X86_SMM 117
+#define KVM_CAP_MULTI_ADDRESS_SPACE 118
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3a121cedcc77..848af90b8091 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -518,9 +518,11 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
 
 	r = -ENOMEM;
-	kvm->memslots = kvm_alloc_memslots();
-	if (!kvm->memslots)
-		goto out_err_no_srcu;
+	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
+		kvm->memslots[i] = kvm_alloc_memslots();
+		if (!kvm->memslots[i])
+			goto out_err_no_srcu;
+	}
 
 	if (init_srcu_struct(&kvm->srcu))
 		goto out_err_no_srcu;
@@ -562,7 +564,8 @@ out_err_no_srcu:
 out_err_no_disable:
 	for (i = 0; i < KVM_NR_BUSES; i++)
 		kfree(kvm->buses[i]);
-	kvm_free_memslots(kvm, kvm->memslots);
+	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
+		kvm_free_memslots(kvm, kvm->memslots[i]);
 	kvm_arch_free_vm(kvm);
 	return ERR_PTR(r);
 }
@@ -612,7 +615,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
 #endif
 	kvm_arch_destroy_vm(kvm);
 	kvm_destroy_devices(kvm);
-	kvm_free_memslots(kvm, kvm->memslots);
+	for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
+		kvm_free_memslots(kvm, kvm->memslots[i]);
 	cleanup_srcu_struct(&kvm->irq_srcu);
 	cleanup_srcu_struct(&kvm->srcu);
 	kvm_arch_free_vm(kvm);
@@ -729,9 +733,9 @@ static int check_memory_region_flags(const struct kvm_userspace_memory_region *m
 }
 
 static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
-		struct kvm_memslots *slots)
+		int as_id, struct kvm_memslots *slots)
 {
-	struct kvm_memslots *old_memslots = kvm_memslots(kvm);
+	struct kvm_memslots *old_memslots = __kvm_memslots(kvm, as_id);
 
 	/*
 	 * Set the low bit in the generation, which disables SPTE caching
@@ -740,7 +744,7 @@ static struct kvm_memslots *install_new_memslots(struct kvm *kvm,
 	WARN_ON(old_memslots->generation & 1);
 	slots->generation = old_memslots->generation + 1;
 
-	rcu_assign_pointer(kvm->memslots, slots);
+	rcu_assign_pointer(kvm->memslots[as_id], slots);
 	synchronize_srcu_expedited(&kvm->srcu);
 
 	/*
@@ -772,6 +776,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	struct kvm_memory_slot *slot;
 	struct kvm_memory_slot old, new;
 	struct kvm_memslots *slots = NULL, *old_memslots;
+	int as_id, id;
 	enum kvm_mr_change change;
 
 	r = check_memory_region_flags(mem);
@@ -779,24 +784,27 @@ int __kvm_set_memory_region(struct kvm *kvm,
 		goto out;
 
 	r = -EINVAL;
+	as_id = mem->slot >> 16;
+	id = (u16)mem->slot;
+
 	/* General sanity checks */
 	if (mem->memory_size & (PAGE_SIZE - 1))
 		goto out;
 	if (mem->guest_phys_addr & (PAGE_SIZE - 1))
 		goto out;
 	/* We can read the guest memory with __xxx_user() later on. */
-	if ((mem->slot < KVM_USER_MEM_SLOTS) &&
+	if ((id < KVM_USER_MEM_SLOTS) &&
 	    ((mem->userspace_addr & (PAGE_SIZE - 1)) ||
 	     !access_ok(VERIFY_WRITE,
 			(void __user *)(unsigned long)mem->userspace_addr,
 			mem->memory_size)))
 		goto out;
-	if (mem->slot >= KVM_MEM_SLOTS_NUM)
+	if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_MEM_SLOTS_NUM)
 		goto out;
 	if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
 		goto out;
 
-	slot = id_to_memslot(kvm_memslots(kvm), mem->slot);
+	slot = id_to_memslot(__kvm_memslots(kvm, as_id), id);
 	base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
 	npages = mem->memory_size >> PAGE_SHIFT;
 
@@ -805,7 +813,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 
 	new = old = *slot;
 
-	new.id = mem->slot;
+	new.id = id;
 	new.base_gfn = base_gfn;
 	new.npages = npages;
 	new.flags = mem->flags;
@@ -840,9 +848,9 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	if ((change == KVM_MR_CREATE) || (change == KVM_MR_MOVE)) {
 		/* Check for overlaps */
 		r = -EEXIST;
-		kvm_for_each_memslot(slot, kvm_memslots(kvm)) {
+		kvm_for_each_memslot(slot, __kvm_memslots(kvm, as_id)) {
 			if ((slot->id >= KVM_USER_MEM_SLOTS) ||
-			    (slot->id == mem->slot))
+			    (slot->id == id))
 				continue;
 			if (!((base_gfn + npages <= slot->base_gfn) ||
 			      (base_gfn >= slot->base_gfn + slot->npages)))
@@ -871,13 +879,13 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	slots = kvm_kvzalloc(sizeof(struct kvm_memslots));
 	if (!slots)
 		goto out_free;
-	memcpy(slots, kvm_memslots(kvm), sizeof(struct kvm_memslots));
+	memcpy(slots, __kvm_memslots(kvm, as_id), sizeof(struct kvm_memslots));
 
 	if ((change == KVM_MR_DELETE) || (change == KVM_MR_MOVE)) {
-		slot = id_to_memslot(slots, mem->slot);
+		slot = id_to_memslot(slots, id);
 		slot->flags |= KVM_MEMSLOT_INVALID;
 
-		old_memslots = install_new_memslots(kvm, slots);
+		old_memslots = install_new_memslots(kvm, as_id, slots);
 
 		/* slot was deleted or moved, clear iommu mapping */
 		kvm_iommu_unmap_pages(kvm, &old);
@@ -909,7 +917,7 @@ int __kvm_set_memory_region(struct kvm *kvm,
 	}
 
 	update_memslots(slots, &new);
-	old_memslots = install_new_memslots(kvm, slots);
+	old_memslots = install_new_memslots(kvm, as_id, slots);
 
 	kvm_arch_commit_memory_region(kvm, mem, &old, &new, change);
 
@@ -956,7 +964,7 @@ EXPORT_SYMBOL_GPL(kvm_set_memory_region);
 static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
 					  struct kvm_userspace_memory_region *mem)
 {
-	if (mem->slot >= KVM_USER_MEM_SLOTS)
+	if ((u16)mem->slot >= KVM_USER_MEM_SLOTS)
 		return -EINVAL;
 
 	return kvm_set_memory_region(kvm, mem);
@@ -967,16 +975,18 @@ int kvm_get_dirty_log(struct kvm *kvm,
 {
 	struct kvm_memslots *slots;
 	struct kvm_memory_slot *memslot;
-	int r, i;
+	int r, i, as_id, id;
 	unsigned long n;
 	unsigned long any = 0;
 
 	r = -EINVAL;
-	if (log->slot >= KVM_USER_MEM_SLOTS)
+	as_id = log->slot >> 16;
+	id = (u16)log->slot;
+	if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
 		goto out;
 
-	slots = kvm_memslots(kvm);
-	memslot = id_to_memslot(slots, log->slot);
+	slots = __kvm_memslots(kvm, as_id);
+	memslot = id_to_memslot(slots, id);
 	r = -ENOENT;
 	if (!memslot->dirty_bitmap)
 		goto out;
@@ -1027,17 +1037,19 @@ int kvm_get_dirty_log_protect(struct kvm *kvm,
 {
 	struct kvm_memslots *slots;
 	struct kvm_memory_slot *memslot;
-	int r, i;
+	int r, i, as_id, id;
 	unsigned long n;
 	unsigned long *dirty_bitmap;
 	unsigned long *dirty_bitmap_buffer;
 
 	r = -EINVAL;
-	if (log->slot >= KVM_USER_MEM_SLOTS)
+	as_id = log->slot >> 16;
+	id = (u16)log->slot;
+	if (as_id >= KVM_ADDRESS_SPACE_NUM || id >= KVM_USER_MEM_SLOTS)
 		goto out;
 
-	slots = kvm_memslots(kvm);
-	memslot = id_to_memslot(slots, log->slot);
+	slots = __kvm_memslots(kvm, as_id);
+	memslot = id_to_memslot(slots, id);
 
 	dirty_bitmap = memslot->dirty_bitmap;
 	r = -ENOENT;
@@ -2619,6 +2631,10 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg)
 #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
 	case KVM_CAP_IRQ_ROUTING:
 		return KVM_MAX_IRQ_ROUTES;
+#endif
+#if KVM_ADDRESS_SPACE_NUM > 1
+	case KVM_CAP_MULTI_ADDRESS_SPACE:
+		return KVM_ADDRESS_SPACE_NUM;
 #endif
 	default:
 		break;
-- 
cgit 


From 4cc06521ee1f153e0d292413a5bff7bbbdee92d0 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Fri, 5 Jun 2015 10:30:08 -0600
Subject: NVMe: add sysfs and ioctl controller reset

We need the ability to perform an nvme controller reset as discussed on
the mailing list thread:

  http://lists.infradead.org/pipermail/linux-nvme/2015-March/001585.html

This adds a sysfs entry that when written to will reset perform an NVMe
controller reset if the controller was successfully initialized in the
first place.

This also adds locking around resetting the device in the async probe
method so the driver can't schedule two resets.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Cc: Brandon Schultz <brandon.schulz@hgst.com>
Cc: David Sariel <david.sariel@pmcs.com>

Updated by Jens to:

1) Merge this with the ioctl reset patch from David Sariel. The ioctl
   path now shares the reset code from the sysfs path.

2) Don't flush work if we fail issuing the reset.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nvme-core.c | 53 +++++++++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/nvme.h |  1 +
 2 files changed, 54 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 513908ff46c4..9682e29b4171 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -80,6 +80,7 @@ static wait_queue_head_t nvme_kthread_wait;
 static struct class *nvme_class;
 
 static void nvme_reset_failed_dev(struct work_struct *ws);
+static int nvme_reset(struct nvme_dev *dev);
 static int nvme_process_cq(struct nvme_queue *nvmeq);
 
 struct async_cmd_info {
@@ -2689,6 +2690,9 @@ static long nvme_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 			return -ENOTTY;
 		ns = list_first_entry(&dev->namespaces, struct nvme_ns, list);
 		return nvme_user_cmd(dev, ns, (void __user *)arg);
+	case NVME_IOCTL_RESET:
+		dev_warn(dev->dev, "resetting controller\n");
+		return nvme_reset(dev);
 	default:
 		return -ENOTTY;
 	}
@@ -2839,6 +2843,44 @@ static void nvme_reset_workfn(struct work_struct *work)
 	dev->reset_workfn(work);
 }
 
+static int nvme_reset(struct nvme_dev *dev)
+{
+	int ret = -EBUSY;
+
+	if (!dev->admin_q || blk_queue_dying(dev->admin_q))
+		return -ENODEV;
+
+	spin_lock(&dev_list_lock);
+	if (!work_pending(&dev->reset_work)) {
+		dev->reset_workfn = nvme_reset_failed_dev;
+		queue_work(nvme_workq, &dev->reset_work);
+		ret = 0;
+	}
+	spin_unlock(&dev_list_lock);
+
+	if (!ret) {
+		flush_work(&dev->reset_work);
+		return 0;
+	}
+
+	return ret;
+}
+
+static ssize_t nvme_sysfs_reset(struct device *dev,
+				struct device_attribute *attr, const char *buf,
+				size_t count)
+{
+	struct nvme_dev *ndev = dev_get_drvdata(dev);
+	int ret;
+
+	ret = nvme_reset(ndev);
+	if (ret < 0)
+		return ret;
+
+	return count;
+}
+static DEVICE_ATTR(reset_controller, S_IWUSR, NULL, nvme_sysfs_reset);
+
 static void nvme_async_probe(struct work_struct *work);
 static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
@@ -2883,12 +2925,20 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto release_pools;
 	}
 	get_device(dev->device);
+	dev_set_drvdata(dev->device, dev);
+
+	result = device_create_file(dev->device, &dev_attr_reset_controller);
+	if (result)
+		goto put_dev;
 
 	INIT_LIST_HEAD(&dev->node);
 	INIT_WORK(&dev->probe_work, nvme_async_probe);
 	schedule_work(&dev->probe_work);
 	return 0;
 
+ put_dev:
+	device_destroy(nvme_class, MKDEV(nvme_char_major, dev->instance));
+	put_device(dev->device);
  release_pools:
 	nvme_release_prp_pools(dev);
  release:
@@ -2919,10 +2969,12 @@ static void nvme_async_probe(struct work_struct *work)
 	nvme_set_irq_hints(dev);
 	return;
  reset:
+	spin_lock(&dev_list_lock);
 	if (!work_busy(&dev->reset_work)) {
 		dev->reset_workfn = nvme_reset_failed_dev;
 		queue_work(nvme_workq, &dev->reset_work);
 	}
+	spin_unlock(&dev_list_lock);
 }
 
 static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
@@ -2952,6 +3004,7 @@ static void nvme_remove(struct pci_dev *pdev)
 	pci_set_drvdata(pdev, NULL);
 	flush_work(&dev->probe_work);
 	flush_work(&dev->reset_work);
+	device_remove_file(dev->device, &dev_attr_reset_controller);
 	nvme_dev_shutdown(dev);
 	nvme_dev_remove(dev);
 	nvme_dev_remove_admin(dev);
diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h
index aef9a81b2d75..b660dc2fadfb 100644
--- a/include/uapi/linux/nvme.h
+++ b/include/uapi/linux/nvme.h
@@ -579,5 +579,6 @@ struct nvme_passthru_cmd {
 #define NVME_IOCTL_ADMIN_CMD	_IOWR('N', 0x41, struct nvme_admin_cmd)
 #define NVME_IOCTL_SUBMIT_IO	_IOW('N', 0x42, struct nvme_user_io)
 #define NVME_IOCTL_IO_CMD	_IOWR('N', 0x43, struct nvme_passthru_cmd)
+#define NVME_IOCTL_RESET	_IO('N', 0x44)
 
 #endif /* _UAPI_LINUX_NVME_H */
-- 
cgit 


From a5768aa887fb636f0cc4c83a2f1242506aaf50f6 Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Mon, 1 Jun 2015 14:28:14 -0600
Subject: NVMe: Automatic namespace rescan

Namespaces may be dynamically allocated and deleted or attached and
detached. This has the driver rescan the device for namespace changes
after each device reset or namespace change asynchronous event.

There could potentially be many detached namespaces that we don't want
polluting /dev/ with unusable block handles, so this will delete disks
if the namespace is not active as indicated by the response from identify
namespace. This also skips adding the disk if no capacity is provisioned
to the namespace in the first place.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nvme-core.c | 159 ++++++++++++++++++++++++++++++++++++----------
 include/linux/nvme.h      |   1 +
 include/uapi/linux/nvme.h |   4 ++
 3 files changed, 132 insertions(+), 32 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index cae7cac6cc43..2072ae81c13a 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -29,6 +29,7 @@
 #include <linux/kdev_t.h>
 #include <linux/kthread.h>
 #include <linux/kernel.h>
+#include <linux/list_sort.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
@@ -300,9 +301,16 @@ static void async_req_completion(struct nvme_queue *nvmeq, void *ctx,
 
 	if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ)
 		++nvmeq->dev->event_limit;
-	if (status == NVME_SC_SUCCESS)
-		dev_warn(nvmeq->q_dmadev,
-			"async event result %08x\n", result);
+	if (status != NVME_SC_SUCCESS)
+		return;
+
+	switch (result & 0xff07) {
+	case NVME_AER_NOTICE_NS_CHANGED:
+		dev_info(nvmeq->q_dmadev, "rescanning\n");
+		schedule_work(&nvmeq->dev->scan_work);
+	default:
+		dev_warn(nvmeq->q_dmadev, "async event result %08x\n", result);
+	}
 }
 
 static void abort_completion(struct nvme_queue *nvmeq, void *ctx,
@@ -1923,8 +1931,13 @@ static int nvme_revalidate_disk(struct gendisk *disk)
 	unsigned short bs;
 
 	if (nvme_identify_ns(dev, ns->ns_id, &id)) {
-		dev_warn(dev->dev, "%s: Identify failure\n", __func__);
-		return 0;
+		dev_warn(dev->dev, "%s: Identify failure nvme%dn%d\n", __func__,
+						dev->instance, ns->ns_id);
+		return -ENODEV;
+	}
+	if (id->ncap == 0) {
+		kfree(id);
+		return -ENODEV;
 	}
 
 	old_ms = ns->ms;
@@ -1958,7 +1971,7 @@ static int nvme_revalidate_disk(struct gendisk *disk)
 								!ns->ext)
 		nvme_init_integrity(ns);
 
-	if (id->ncap == 0 || (ns->ms && !blk_get_integrity(disk)))
+	if (ns->ms && !blk_get_integrity(disk))
 		set_capacity(disk, 0);
 	else
 		set_capacity(disk, le64_to_cpup(&id->nsze) << (ns->lba_shift - 9));
@@ -2073,11 +2086,16 @@ static void nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid)
 	 * requires it.
 	 */
 	set_capacity(disk, 0);
-	nvme_revalidate_disk(ns->disk);
+	if (nvme_revalidate_disk(ns->disk))
+		goto out_free_disk;
+
 	add_disk(ns->disk);
 	if (ns->ms)
 		revalidate_disk(ns->disk);
 	return;
+ out_free_disk:
+	kfree(disk);
+	list_del(&ns->list);
  out_free_queue:
 	blk_cleanup_queue(ns->queue);
  out_free_ns:
@@ -2194,6 +2212,99 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 	return result;
 }
 
+static void nvme_free_namespace(struct nvme_ns *ns)
+{
+	list_del(&ns->list);
+
+	spin_lock(&dev_list_lock);
+	ns->disk->private_data = NULL;
+	spin_unlock(&dev_list_lock);
+
+	put_disk(ns->disk);
+	kfree(ns);
+}
+
+static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+	struct nvme_ns *nsa = container_of(a, struct nvme_ns, list);
+	struct nvme_ns *nsb = container_of(b, struct nvme_ns, list);
+
+	return nsa->ns_id - nsb->ns_id;
+}
+
+static struct nvme_ns *nvme_find_ns(struct nvme_dev *dev, unsigned nsid)
+{
+	struct nvme_ns *ns;
+
+	list_for_each_entry(ns, &dev->namespaces, list) {
+		if (ns->ns_id == nsid)
+			return ns;
+		if (ns->ns_id > nsid)
+			break;
+	}
+	return NULL;
+}
+
+static inline bool nvme_io_incapable(struct nvme_dev *dev)
+{
+	return (!dev->bar || readl(&dev->bar->csts) & NVME_CSTS_CFS ||
+							dev->online_queues < 2);
+}
+
+static void nvme_ns_remove(struct nvme_ns *ns)
+{
+	bool kill = nvme_io_incapable(ns->dev) && !blk_queue_dying(ns->queue);
+
+	if (kill)
+		blk_set_queue_dying(ns->queue);
+	if (ns->disk->flags & GENHD_FL_UP) {
+		if (blk_get_integrity(ns->disk))
+			blk_integrity_unregister(ns->disk);
+		del_gendisk(ns->disk);
+	}
+	if (kill || !blk_queue_dying(ns->queue)) {
+		blk_mq_abort_requeue_list(ns->queue);
+		blk_cleanup_queue(ns->queue);
+        }
+}
+
+static void nvme_scan_namespaces(struct nvme_dev *dev, unsigned nn)
+{
+	struct nvme_ns *ns, *next;
+	unsigned i;
+
+	for (i = 1; i <= nn; i++) {
+		ns = nvme_find_ns(dev, i);
+		if (ns) {
+			if (revalidate_disk(ns->disk)) {
+				nvme_ns_remove(ns);
+				nvme_free_namespace(ns);
+			}
+		} else
+			nvme_alloc_ns(dev, i);
+	}
+	list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
+		if (ns->ns_id > nn) {
+			nvme_ns_remove(ns);
+			nvme_free_namespace(ns);
+		}
+	}
+	list_sort(NULL, &dev->namespaces, ns_cmp);
+}
+
+static void nvme_dev_scan(struct work_struct *work)
+{
+	struct nvme_dev *dev = container_of(work, struct nvme_dev, scan_work);
+	struct nvme_id_ctrl *ctrl;
+
+	if (!dev->tagset.tags)
+		return;
+	if (nvme_identify_ctrl(dev, &ctrl))
+		return;
+	nvme_scan_namespaces(dev, le32_to_cpup(&ctrl->nn));
+	kfree(ctrl);
+}
+
 /*
  * Return: error value if an error occurred setting up the queues or calling
  * Identify Device.  0 if these succeeded, even if adding some of the
@@ -2204,7 +2315,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev->dev);
 	int res;
-	unsigned nn, i;
+	unsigned nn;
 	struct nvme_id_ctrl *ctrl;
 	int shift = NVME_CAP_MPSMIN(readq(&dev->bar->cap)) + 12;
 
@@ -2250,9 +2361,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
 	if (blk_mq_alloc_tag_set(&dev->tagset))
 		return 0;
 
-	for (i = 1; i <= nn; i++)
-		nvme_alloc_ns(dev, i);
-
+	schedule_work(&dev->scan_work);
 	return 0;
 }
 
@@ -2552,17 +2661,8 @@ static void nvme_dev_remove(struct nvme_dev *dev)
 {
 	struct nvme_ns *ns;
 
-	list_for_each_entry(ns, &dev->namespaces, list) {
-		if (ns->disk->flags & GENHD_FL_UP) {
-			if (blk_get_integrity(ns->disk))
-				blk_integrity_unregister(ns->disk);
-			del_gendisk(ns->disk);
-		}
-		if (!blk_queue_dying(ns->queue)) {
-			blk_mq_abort_requeue_list(ns->queue);
-			blk_cleanup_queue(ns->queue);
-		}
-	}
+	list_for_each_entry(ns, &dev->namespaces, list)
+		nvme_ns_remove(ns);
 }
 
 static int nvme_setup_prp_pools(struct nvme_dev *dev)
@@ -2621,16 +2721,8 @@ static void nvme_free_namespaces(struct nvme_dev *dev)
 {
 	struct nvme_ns *ns, *next;
 
-	list_for_each_entry_safe(ns, next, &dev->namespaces, list) {
-		list_del(&ns->list);
-
-		spin_lock(&dev_list_lock);
-		ns->disk->private_data = NULL;
-		spin_unlock(&dev_list_lock);
-
-		put_disk(ns->disk);
-		kfree(ns);
-	}
+	list_for_each_entry_safe(ns, next, &dev->namespaces, list)
+		nvme_free_namespace(ns);
 }
 
 static void nvme_free_dev(struct kref *kref)
@@ -2814,6 +2906,7 @@ static int nvme_dev_resume(struct nvme_dev *dev)
 		spin_unlock(&dev_list_lock);
 	} else {
 		nvme_unfreeze_queues(dev);
+		schedule_work(&dev->scan_work);
 		nvme_set_irq_hints(dev);
 	}
 	return 0;
@@ -2935,6 +3028,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 		goto put_dev;
 
 	INIT_LIST_HEAD(&dev->node);
+	INIT_WORK(&dev->scan_work, nvme_dev_scan);
 	INIT_WORK(&dev->probe_work, nvme_async_probe);
 	schedule_work(&dev->probe_work);
 	return 0;
@@ -3007,6 +3101,7 @@ static void nvme_remove(struct pci_dev *pdev)
 	pci_set_drvdata(pdev, NULL);
 	flush_work(&dev->probe_work);
 	flush_work(&dev->reset_work);
+	flush_work(&dev->scan_work);
 	device_remove_file(dev->device, &dev_attr_reset_controller);
 	nvme_dev_shutdown(dev);
 	nvme_dev_remove(dev);
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 986bf8ad8e93..c0d94ed8ce9a 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -92,6 +92,7 @@ struct nvme_dev {
 	work_func_t reset_workfn;
 	struct work_struct reset_work;
 	struct work_struct probe_work;
+	struct work_struct scan_work;
 	char name[12];
 	char serial[20];
 	char model[40];
diff --git a/include/uapi/linux/nvme.h b/include/uapi/linux/nvme.h
index b660dc2fadfb..732b32e92b02 100644
--- a/include/uapi/linux/nvme.h
+++ b/include/uapi/linux/nvme.h
@@ -179,6 +179,10 @@ enum {
 	NVME_SMART_CRIT_VOLATILE_MEMORY	= 1 << 4,
 };
 
+enum {
+	NVME_AER_NOTICE_NS_CHANGED	= 0x0002,
+};
+
 struct nvme_lba_range_type {
 	__u8			type;
 	__u8			attributes;
-- 
cgit 


From 90c337da1524863838658078ec34241f45d8394d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 6 Jun 2015 21:17:57 -0700
Subject: inet: add IP_BIND_ADDRESS_NO_PORT to overcome bind(0) limitations

When an application needs to force a source IP on an active TCP socket
it has to use bind(IP, port=x).

As most applications do not want to deal with already used ports, x is
often set to 0, meaning the kernel is in charge to find an available
port.
But kernel does not know yet if this socket is going to be a listener or
be connected.
It has very limited choices (no full knowledge of final 4-tuple for a
connect())

With limited ephemeral port range (about 32K ports), it is very easy to
fill the space.

This patch adds a new SOL_IP socket option, asking kernel to ignore
the 0 port provided by application in bind(IP, port=0) and only
remember the given IP address.

The port will be automatically chosen at connect() time, in a way
that allows sharing a source port as long as the 4-tuples are unique.

This new feature is available for both IPv4 and IPv6 (Thanks Neal)

Tested:

Wrote a test program and checked its behavior on IPv4 and IPv6.

strace(1) shows sequences of bind(IP=127.0.0.2, port=0) followed by
connect().
Also getsockname() show that the port is still 0 right after bind()
but properly allocated after connect().

socket(PF_INET, SOCK_STREAM, IPPROTO_IP) = 5
setsockopt(5, SOL_IP, IP_BIND_ADDRESS_NO_PORT, [1], 4) = 0
bind(5, {sa_family=AF_INET, sin_port=htons(0), sin_addr=inet_addr("127.0.0.2")}, 16) = 0
getsockname(5, {sa_family=AF_INET, sin_port=htons(0), sin_addr=inet_addr("127.0.0.2")}, [16]) = 0
connect(5, {sa_family=AF_INET, sin_port=htons(53174), sin_addr=inet_addr("127.0.0.3")}, 16) = 0
getsockname(5, {sa_family=AF_INET, sin_port=htons(38050), sin_addr=inet_addr("127.0.0.2")}, [16]) = 0

IPv6 test :

socket(PF_INET6, SOCK_STREAM, IPPROTO_IP) = 7
setsockopt(7, SOL_IP, IP_BIND_ADDRESS_NO_PORT, [1], 4) = 0
bind(7, {sa_family=AF_INET6, sin6_port=htons(0), inet_pton(AF_INET6, "::1", &sin6_addr), sin6_flowinfo=0, sin6_scope_id=0}, 28) = 0
getsockname(7, {sa_family=AF_INET6, sin6_port=htons(0), inet_pton(AF_INET6, "::1", &sin6_addr), sin6_flowinfo=0, sin6_scope_id=0}, [28]) = 0
connect(7, {sa_family=AF_INET6, sin6_port=htons(57300), inet_pton(AF_INET6, "::1", &sin6_addr), sin6_flowinfo=0, sin6_scope_id=0}, 28) = 0
getsockname(7, {sa_family=AF_INET6, sin6_port=htons(60964), inet_pton(AF_INET6, "::1", &sin6_addr), sin6_flowinfo=0, sin6_scope_id=0}, [28]) = 0

I was able to bind()/connect() a million concurrent IPv4 sockets,
instead of ~32000 before patch.

lpaa23:~# ulimit -n 1000010
lpaa23:~# ./bind --connect --num-flows=1000000 &
1000000 sockets

lpaa23:~# grep TCP /proc/net/sockstat
TCP: inuse 2000063 orphan 0 tw 47 alloc 2000157 mem 66

Check that a given source port is indeed used by many different
connections :

lpaa23:~# ss -t src :40000 | head -10
State      Recv-Q Send-Q   Local Address:Port          Peer Address:Port
ESTAB      0      0           127.0.0.2:40000         127.0.202.33:44983
ESTAB      0      0           127.0.0.2:40000         127.2.27.240:44983
ESTAB      0      0           127.0.0.2:40000           127.2.98.5:44983
ESTAB      0      0           127.0.0.2:40000        127.0.124.196:44983
ESTAB      0      0           127.0.0.2:40000         127.2.139.38:44983
ESTAB      0      0           127.0.0.2:40000          127.1.59.80:44983
ESTAB      0      0           127.0.0.2:40000          127.3.6.228:44983
ESTAB      0      0           127.0.0.2:40000          127.0.38.53:44983
ESTAB      0      0           127.0.0.2:40000         127.1.197.10:44983

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h | 1 +
 include/uapi/linux/in.h | 1 +
 net/ipv4/af_inet.c      | 3 ++-
 net/ipv4/ip_sockglue.c  | 7 +++++++
 net/ipv6/af_inet6.c     | 3 ++-
 5 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index b6c3737da4e9..47eb67b08abd 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -187,6 +187,7 @@ struct inet_sock {
 				transparent:1,
 				mc_all:1,
 				nodefrag:1;
+	__u8			bind_address_no_port:1;
 	__u8			rcv_tos;
 	__u8			convert_csum;
 	int			uc_index;
diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index 641338bef651..83d6236a2f08 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -112,6 +112,7 @@ struct in_addr {
 #define IP_MINTTL       21
 #define IP_NODEFRAG     22
 #define IP_CHECKSUM	23
+#define IP_BIND_ADDRESS_NO_PORT	24
 
 /* IP_MTU_DISCOVER values */
 #define IP_PMTUDISC_DONT		0	/* Never send DF frames */
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 6ad0f7a711c9..cc858ef44451 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -488,7 +488,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		inet->inet_saddr = 0;  /* Use device */
 
 	/* Make sure we are allowed to bind here. */
-	if (sk->sk_prot->get_port(sk, snum)) {
+	if ((snum || !inet->bind_address_no_port) &&
+	    sk->sk_prot->get_port(sk, snum)) {
 		inet->inet_saddr = inet->inet_rcv_saddr = 0;
 		err = -EADDRINUSE;
 		goto out_release_sock;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 7cfb0893f263..04ae2992a5cd 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -582,6 +582,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 	case IP_TRANSPARENT:
 	case IP_MINTTL:
 	case IP_NODEFRAG:
+	case IP_BIND_ADDRESS_NO_PORT:
 	case IP_UNICAST_IF:
 	case IP_MULTICAST_TTL:
 	case IP_MULTICAST_ALL:
@@ -732,6 +733,9 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 		}
 		inet->nodefrag = val ? 1 : 0;
 		break;
+	case IP_BIND_ADDRESS_NO_PORT:
+		inet->bind_address_no_port = val ? 1 : 0;
+		break;
 	case IP_MTU_DISCOVER:
 		if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT)
 			goto e_inval;
@@ -1324,6 +1328,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
 	case IP_NODEFRAG:
 		val = inet->nodefrag;
 		break;
+	case IP_BIND_ADDRESS_NO_PORT:
+		val = inet->bind_address_no_port;
+		break;
 	case IP_MTU_DISCOVER:
 		val = inet->pmtudisc;
 		break;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index f3866c0b6cfe..7de52b65173f 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -362,7 +362,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		np->saddr = addr->sin6_addr;
 
 	/* Make sure we are allowed to bind here. */
-	if (sk->sk_prot->get_port(sk, snum)) {
+	if ((snum || !inet->bind_address_no_port) &&
+	    sk->sk_prot->get_port(sk, snum)) {
 		inet_reset_saddr(sk);
 		err = -EADDRINUSE;
 		goto out;
-- 
cgit 


From d691f9e8d4405c334aa10d556e73c8bf44cb0e01 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Thu, 4 Jun 2015 10:11:54 -0700
Subject: bpf: allow programs to write to certain skb fields

allow programs read/write skb->mark, tc_index fields and
((struct qdisc_skb_cb *)cb)->data.

mark and tc_index are generically useful in TC.
cb[0]-cb[4] are primarily used to pass arguments from one
program to another called via bpf_tail_call() which can
be seen in sockex3_kern.c example.

All fields of 'struct __sk_buff' are readable to socket and tc_cls_act progs.
mark, tc_index are writeable from tc_cls_act only.
cb[0]-cb[4] are writeable by both sockets and tc_cls_act.

Add verifier tests and improve sample code.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h         |  3 +-
 include/uapi/linux/bpf.h    |  2 +
 kernel/bpf/verifier.c       | 37 +++++++++++++-----
 net/core/filter.c           | 94 +++++++++++++++++++++++++++++++++++++++------
 samples/bpf/sockex3_kern.c  | 35 ++++++-----------
 samples/bpf/test_verifier.c | 84 +++++++++++++++++++++++++++++++++++++++-
 6 files changed, 207 insertions(+), 48 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ca854e5bb2f7..2235aee8096a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -105,7 +105,8 @@ struct bpf_verifier_ops {
 	 */
 	bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
 
-	u32 (*convert_ctx_access)(int dst_reg, int src_reg, int ctx_off,
+	u32 (*convert_ctx_access)(enum bpf_access_type type, int dst_reg,
+				  int src_reg, int ctx_off,
 				  struct bpf_insn *insn);
 };
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 42aa19abab86..602f05b7a275 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -248,6 +248,8 @@ struct __sk_buff {
 	__u32 priority;
 	__u32 ingress_ifindex;
 	__u32 ifindex;
+	__u32 tc_index;
+	__u32 cb[5];
 };
 
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index cfd9a40b9a5a..039d866fd36a 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1692,6 +1692,8 @@ static int do_check(struct verifier_env *env)
 			}
 
 		} else if (class == BPF_STX) {
+			enum bpf_reg_type dst_reg_type;
+
 			if (BPF_MODE(insn->code) == BPF_XADD) {
 				err = check_xadd(env, insn);
 				if (err)
@@ -1700,11 +1702,6 @@ static int do_check(struct verifier_env *env)
 				continue;
 			}
 
-			if (BPF_MODE(insn->code) != BPF_MEM ||
-			    insn->imm != 0) {
-				verbose("BPF_STX uses reserved fields\n");
-				return -EINVAL;
-			}
 			/* check src1 operand */
 			err = check_reg_arg(regs, insn->src_reg, SRC_OP);
 			if (err)
@@ -1714,6 +1711,8 @@ static int do_check(struct verifier_env *env)
 			if (err)
 				return err;
 
+			dst_reg_type = regs[insn->dst_reg].type;
+
 			/* check that memory (dst_reg + off) is writeable */
 			err = check_mem_access(env, insn->dst_reg, insn->off,
 					       BPF_SIZE(insn->code), BPF_WRITE,
@@ -1721,6 +1720,15 @@ static int do_check(struct verifier_env *env)
 			if (err)
 				return err;
 
+			if (insn->imm == 0) {
+				insn->imm = dst_reg_type;
+			} else if (dst_reg_type != insn->imm &&
+				   (dst_reg_type == PTR_TO_CTX ||
+				    insn->imm == PTR_TO_CTX)) {
+				verbose("same insn cannot be used with different pointers\n");
+				return -EINVAL;
+			}
+
 		} else if (class == BPF_ST) {
 			if (BPF_MODE(insn->code) != BPF_MEM ||
 			    insn->src_reg != BPF_REG_0) {
@@ -1839,12 +1847,18 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env)
 
 	for (i = 0; i < insn_cnt; i++, insn++) {
 		if (BPF_CLASS(insn->code) == BPF_LDX &&
-		    (BPF_MODE(insn->code) != BPF_MEM ||
-		     insn->imm != 0)) {
+		    (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
 			verbose("BPF_LDX uses reserved fields\n");
 			return -EINVAL;
 		}
 
+		if (BPF_CLASS(insn->code) == BPF_STX &&
+		    ((BPF_MODE(insn->code) != BPF_MEM &&
+		      BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
+			verbose("BPF_STX uses reserved fields\n");
+			return -EINVAL;
+		}
+
 		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
 			struct bpf_map *map;
 			struct fd f;
@@ -1967,12 +1981,17 @@ static int convert_ctx_accesses(struct verifier_env *env)
 	struct bpf_prog *new_prog;
 	u32 cnt;
 	int i;
+	enum bpf_access_type type;
 
 	if (!env->prog->aux->ops->convert_ctx_access)
 		return 0;
 
 	for (i = 0; i < insn_cnt; i++, insn++) {
-		if (insn->code != (BPF_LDX | BPF_MEM | BPF_W))
+		if (insn->code == (BPF_LDX | BPF_MEM | BPF_W))
+			type = BPF_READ;
+		else if (insn->code == (BPF_STX | BPF_MEM | BPF_W))
+			type = BPF_WRITE;
+		else
 			continue;
 
 		if (insn->imm != PTR_TO_CTX) {
@@ -1982,7 +2001,7 @@ static int convert_ctx_accesses(struct verifier_env *env)
 		}
 
 		cnt = env->prog->aux->ops->
-			convert_ctx_access(insn->dst_reg, insn->src_reg,
+			convert_ctx_access(type, insn->dst_reg, insn->src_reg,
 					   insn->off, insn_buf);
 		if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
 			verbose("bpf verifier is misconfigured\n");
diff --git a/net/core/filter.c b/net/core/filter.c
index 36a69e33d76b..d271c06bf01f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -46,6 +46,7 @@
 #include <linux/seccomp.h>
 #include <linux/if_vlan.h>
 #include <linux/bpf.h>
+#include <net/sch_generic.h>
 
 /**
  *	sk_filter - run a packet through a socket filter
@@ -1463,13 +1464,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 	}
 }
 
-static bool sk_filter_is_valid_access(int off, int size,
-				      enum bpf_access_type type)
+static bool __is_valid_access(int off, int size, enum bpf_access_type type)
 {
-	/* only read is allowed */
-	if (type != BPF_READ)
-		return false;
-
 	/* check bounds */
 	if (off < 0 || off >= sizeof(struct __sk_buff))
 		return false;
@@ -1485,8 +1481,42 @@ static bool sk_filter_is_valid_access(int off, int size,
 	return true;
 }
 
-static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
-					struct bpf_insn *insn_buf)
+static bool sk_filter_is_valid_access(int off, int size,
+				      enum bpf_access_type type)
+{
+	if (type == BPF_WRITE) {
+		switch (off) {
+		case offsetof(struct __sk_buff, cb[0]) ...
+			offsetof(struct __sk_buff, cb[4]):
+			break;
+		default:
+			return false;
+		}
+	}
+
+	return __is_valid_access(off, size, type);
+}
+
+static bool tc_cls_act_is_valid_access(int off, int size,
+				       enum bpf_access_type type)
+{
+	if (type == BPF_WRITE) {
+		switch (off) {
+		case offsetof(struct __sk_buff, mark):
+		case offsetof(struct __sk_buff, tc_index):
+		case offsetof(struct __sk_buff, cb[0]) ...
+			offsetof(struct __sk_buff, cb[4]):
+			break;
+		default:
+			return false;
+		}
+	}
+	return __is_valid_access(off, size, type);
+}
+
+static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg,
+				      int src_reg, int ctx_off,
+				      struct bpf_insn *insn_buf)
 {
 	struct bpf_insn *insn = insn_buf;
 
@@ -1538,7 +1568,15 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
 		break;
 
 	case offsetof(struct __sk_buff, mark):
-		return convert_skb_access(SKF_AD_MARK, dst_reg, src_reg, insn);
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, mark) != 4);
+
+		if (type == BPF_WRITE)
+			*insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg,
+					      offsetof(struct sk_buff, mark));
+		else
+			*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg,
+					      offsetof(struct sk_buff, mark));
+		break;
 
 	case offsetof(struct __sk_buff, pkt_type):
 		return convert_skb_access(SKF_AD_PKTTYPE, dst_reg, src_reg, insn);
@@ -1553,6 +1591,38 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
 	case offsetof(struct __sk_buff, vlan_tci):
 		return convert_skb_access(SKF_AD_VLAN_TAG,
 					  dst_reg, src_reg, insn);
+
+	case offsetof(struct __sk_buff, cb[0]) ...
+		offsetof(struct __sk_buff, cb[4]):
+		BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20);
+
+		ctx_off -= offsetof(struct __sk_buff, cb[0]);
+		ctx_off += offsetof(struct sk_buff, cb);
+		ctx_off += offsetof(struct qdisc_skb_cb, data);
+		if (type == BPF_WRITE)
+			*insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg, ctx_off);
+		else
+			*insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, ctx_off);
+		break;
+
+	case offsetof(struct __sk_buff, tc_index):
+#ifdef CONFIG_NET_SCHED
+		BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, tc_index) != 2);
+
+		if (type == BPF_WRITE)
+			*insn++ = BPF_STX_MEM(BPF_H, dst_reg, src_reg,
+					      offsetof(struct sk_buff, tc_index));
+		else
+			*insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg,
+					      offsetof(struct sk_buff, tc_index));
+		break;
+#else
+		if (type == BPF_WRITE)
+			*insn++ = BPF_MOV64_REG(dst_reg, dst_reg);
+		else
+			*insn++ = BPF_MOV64_IMM(dst_reg, 0);
+		break;
+#endif
 	}
 
 	return insn - insn_buf;
@@ -1561,13 +1631,13 @@ static u32 sk_filter_convert_ctx_access(int dst_reg, int src_reg, int ctx_off,
 static const struct bpf_verifier_ops sk_filter_ops = {
 	.get_func_proto = sk_filter_func_proto,
 	.is_valid_access = sk_filter_is_valid_access,
-	.convert_ctx_access = sk_filter_convert_ctx_access,
+	.convert_ctx_access = bpf_net_convert_ctx_access,
 };
 
 static const struct bpf_verifier_ops tc_cls_act_ops = {
 	.get_func_proto = tc_cls_act_func_proto,
-	.is_valid_access = sk_filter_is_valid_access,
-	.convert_ctx_access = sk_filter_convert_ctx_access,
+	.is_valid_access = tc_cls_act_is_valid_access,
+	.convert_ctx_access = bpf_net_convert_ctx_access,
 };
 
 static struct bpf_prog_type_list sk_filter_type __read_mostly = {
diff --git a/samples/bpf/sockex3_kern.c b/samples/bpf/sockex3_kern.c
index 2625b987944f..41ae2fd21b13 100644
--- a/samples/bpf/sockex3_kern.c
+++ b/samples/bpf/sockex3_kern.c
@@ -89,7 +89,6 @@ static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off)
 
 struct globals {
 	struct flow_keys flow;
-	__u32 nhoff;
 };
 
 struct bpf_map_def SEC("maps") percpu_map = {
@@ -139,7 +138,7 @@ static void update_stats(struct __sk_buff *skb, struct globals *g)
 static __always_inline void parse_ip_proto(struct __sk_buff *skb,
 					   struct globals *g, __u32 ip_proto)
 {
-	__u32 nhoff = g->nhoff;
+	__u32 nhoff = skb->cb[0];
 	int poff;
 
 	switch (ip_proto) {
@@ -165,7 +164,7 @@ static __always_inline void parse_ip_proto(struct __sk_buff *skb,
 		if (gre_flags & GRE_SEQ)
 			nhoff += 4;
 
-		g->nhoff = nhoff;
+		skb->cb[0] = nhoff;
 		parse_eth_proto(skb, gre_proto);
 		break;
 	}
@@ -195,7 +194,7 @@ PROG(PARSE_IP)(struct __sk_buff *skb)
 	if (!g)
 		return 0;
 
-	nhoff = g->nhoff;
+	nhoff = skb->cb[0];
 
 	if (unlikely(ip_is_fragment(skb, nhoff)))
 		return 0;
@@ -210,7 +209,7 @@ PROG(PARSE_IP)(struct __sk_buff *skb)
 	verlen = load_byte(skb, nhoff + 0/*offsetof(struct iphdr, ihl)*/);
 	nhoff += (verlen & 0xF) << 2;
 
-	g->nhoff = nhoff;
+	skb->cb[0] = nhoff;
 	parse_ip_proto(skb, g, ip_proto);
 	return 0;
 }
@@ -223,7 +222,7 @@ PROG(PARSE_IPV6)(struct __sk_buff *skb)
 	if (!g)
 		return 0;
 
-	nhoff = g->nhoff;
+	nhoff = skb->cb[0];
 
 	ip_proto = load_byte(skb,
 			     nhoff + offsetof(struct ipv6hdr, nexthdr));
@@ -233,25 +232,21 @@ PROG(PARSE_IPV6)(struct __sk_buff *skb)
 				     nhoff + offsetof(struct ipv6hdr, daddr));
 	nhoff += sizeof(struct ipv6hdr);
 
-	g->nhoff = nhoff;
+	skb->cb[0] = nhoff;
 	parse_ip_proto(skb, g, ip_proto);
 	return 0;
 }
 
 PROG(PARSE_VLAN)(struct __sk_buff *skb)
 {
-	struct globals *g = this_cpu_globals();
 	__u32 nhoff, proto;
 
-	if (!g)
-		return 0;
-
-	nhoff = g->nhoff;
+	nhoff = skb->cb[0];
 
 	proto = load_half(skb, nhoff + offsetof(struct vlan_hdr,
 						h_vlan_encapsulated_proto));
 	nhoff += sizeof(struct vlan_hdr);
-	g->nhoff = nhoff;
+	skb->cb[0] = nhoff;
 
 	parse_eth_proto(skb, proto);
 
@@ -260,17 +255,13 @@ PROG(PARSE_VLAN)(struct __sk_buff *skb)
 
 PROG(PARSE_MPLS)(struct __sk_buff *skb)
 {
-	struct globals *g = this_cpu_globals();
 	__u32 nhoff, label;
 
-	if (!g)
-		return 0;
-
-	nhoff = g->nhoff;
+	nhoff = skb->cb[0];
 
 	label = load_word(skb, nhoff);
 	nhoff += sizeof(struct mpls_label);
-	g->nhoff = nhoff;
+	skb->cb[0] = nhoff;
 
 	if (label & MPLS_LS_S_MASK) {
 		__u8 verlen = load_byte(skb, nhoff);
@@ -288,14 +279,10 @@ PROG(PARSE_MPLS)(struct __sk_buff *skb)
 SEC("socket/0")
 int main_prog(struct __sk_buff *skb)
 {
-	struct globals *g = this_cpu_globals();
 	__u32 nhoff = ETH_HLEN;
 	__u32 proto = load_half(skb, 12);
 
-	if (!g)
-		return 0;
-
-	g->nhoff = nhoff;
+	skb->cb[0] = nhoff;
 	parse_eth_proto(skb, proto);
 	return 0;
 }
diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c
index 12f3780af73f..693605997abc 100644
--- a/samples/bpf/test_verifier.c
+++ b/samples/bpf/test_verifier.c
@@ -29,6 +29,7 @@ struct bpf_test {
 		ACCEPT,
 		REJECT
 	} result;
+	enum bpf_prog_type prog_type;
 };
 
 static struct bpf_test tests[] = {
@@ -743,6 +744,84 @@ static struct bpf_test tests[] = {
 		.errstr = "different pointers",
 		.result = REJECT,
 	},
+	{
+		"check skb->mark is not writeable by sockets",
+		.insns = {
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"check skb->tc_index is not writeable by sockets",
+		.insns = {
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+				    offsetof(struct __sk_buff, tc_index)),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"check non-u32 access to cb",
+		.insns = {
+			BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+	},
+	{
+		"check out of range skb->cb access",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[60])),
+			BPF_EXIT_INSN(),
+		},
+		.errstr = "invalid bpf_context access",
+		.result = REJECT,
+		.prog_type = BPF_PROG_TYPE_SCHED_ACT,
+	},
+	{
+		"write skb fields from socket prog",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[4])),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, tc_index)),
+			BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[2])),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+	},
+	{
+		"write skb fields from tc_cls_act prog",
+		.insns = {
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, cb[0])),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, mark)),
+			BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+				    offsetof(struct __sk_buff, tc_index)),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, tc_index)),
+			BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+				    offsetof(struct __sk_buff, cb[3])),
+			BPF_EXIT_INSN(),
+		},
+		.result = ACCEPT,
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	},
 };
 
 static int probe_filter_length(struct bpf_insn *fp)
@@ -775,6 +854,7 @@ static int test(void)
 
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
 		struct bpf_insn *prog = tests[i].insns;
+		int prog_type = tests[i].prog_type;
 		int prog_len = probe_filter_length(prog);
 		int *fixup = tests[i].fixup;
 		int map_fd = -1;
@@ -789,8 +869,8 @@ static int test(void)
 		}
 		printf("#%d %s ", i, tests[i].descr);
 
-		prog_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, prog,
-					prog_len * sizeof(struct bpf_insn),
+		prog_fd = bpf_prog_load(prog_type ?: BPF_PROG_TYPE_SOCKET_FILTER,
+					prog, prog_len * sizeof(struct bpf_insn),
 					"GPL", 0);
 
 		if (tests[i].result == ACCEPT) {
-- 
cgit 


From c9fdfa14c3792c0160849c484e83aa57afd80ccc Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Thu, 14 May 2015 23:09:58 +0200
Subject: perf: add new PERF_SAMPLE_BRANCH_IND_JUMP branch sample type

This patch adds a new branch_sample_type flag to enable
filtering branch sampling to indirect jumps. The support
is subject to hardware or kernel software support on each
architecture.

Filtering on indirect jump is useful to study the targets
of the jump.

Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@redhat.com
Cc: dsahern@gmail.com
Cc: jolsa@redhat.com
Cc: kan.liang@intel.com
Cc: namhyung@kernel.org
Link: http://lkml.kernel.org/r/1431637800-31061-2-git-send-email-eranian@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/uapi/linux/perf_event.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 309211b3eb67..c4622f1ce046 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -167,6 +167,7 @@ enum perf_branch_sample_type_shift {
 	PERF_SAMPLE_BRANCH_COND_SHIFT		= 10, /* conditional branches */
 
 	PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT	= 11, /* call/ret stack */
+	PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT	= 12, /* indirect jumps */
 
 	PERF_SAMPLE_BRANCH_MAX_SHIFT		/* non-ABI */
 };
@@ -186,6 +187,7 @@ enum perf_branch_sample_type {
 	PERF_SAMPLE_BRANCH_COND		= 1U << PERF_SAMPLE_BRANCH_COND_SHIFT,
 
 	PERF_SAMPLE_BRANCH_CALL_STACK	= 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT,
+	PERF_SAMPLE_BRANCH_IND_JUMP	= 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT,
 
 	PERF_SAMPLE_BRANCH_MAX		= 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
 };
-- 
cgit 


From f38b0dbb491a6987e198aa6b428db8692a6480f8 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Sun, 10 May 2015 15:13:14 -0400
Subject: perf/x86/intel: Introduce PERF_RECORD_LOST_SAMPLES

After enlarging the PEBS interrupt threshold, there may be some mixed up
PEBS samples which are discarded by the kernel.

This patch makes the kernel emit a PERF_RECORD_LOST_SAMPLES record with
the number of possible discarded records when it is impossible to demux
the samples.

It makes sure the user is not left in the dark about such discards.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: eranian@google.com
Link: http://lkml.kernel.org/r/1431285195-14269-8-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_ds.c | 20 ++++++++++++++++---
 include/linux/perf_event.h                |  3 +++
 include/uapi/linux/perf_event.h           | 12 +++++++++++
 kernel/events/core.c                      | 33 +++++++++++++++++++++++++++++++
 4 files changed, 65 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 266079a3a646..34d0c4816141 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -1126,6 +1126,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 	void *base, *at, *top;
 	int bit;
 	short counts[MAX_PEBS_EVENTS] = {};
+	short error[MAX_PEBS_EVENTS] = {};
 
 	if (!x86_pmu.pebs_active)
 		return;
@@ -1169,20 +1170,33 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 			/* slow path */
 			pebs_status = p->status & cpuc->pebs_enabled;
 			pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
-			if (pebs_status != (1 << bit))
+			if (pebs_status != (1 << bit)) {
+				u8 i;
+
+				for_each_set_bit(i, (unsigned long *)&pebs_status,
+						 MAX_PEBS_EVENTS)
+					error[i]++;
 				continue;
+			}
 		}
 		counts[bit]++;
 	}
 
 	for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) {
-		if (counts[bit] == 0)
+		if ((counts[bit] == 0) && (error[bit] == 0))
 			continue;
 		event = cpuc->events[bit];
 		WARN_ON_ONCE(!event);
 		WARN_ON_ONCE(!event->attr.precise_ip);
 
-		__intel_pmu_pebs_event(event, iregs, base, top, bit, counts[bit]);
+		/* log dropped samples number */
+		if (error[bit])
+			perf_log_lost_samples(event, error[bit]);
+
+		if (counts[bit]) {
+			__intel_pmu_pebs_event(event, iregs, base,
+					       top, bit, counts[bit]);
+		}
 	}
 }
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 5f192e1bc98e..a204d5266f5f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -743,6 +743,9 @@ perf_event__output_id_sample(struct perf_event *event,
 			     struct perf_output_handle *handle,
 			     struct perf_sample_data *sample);
 
+extern void
+perf_log_lost_samples(struct perf_event *event, u64 lost);
+
 static inline bool is_sampling_event(struct perf_event *event)
 {
 	return event->attr.sample_period != 0;
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index c4622f1ce046..613ed9ad588f 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -802,6 +802,18 @@ enum perf_event_type {
 	 */
 	PERF_RECORD_ITRACE_START		= 12,
 
+	/*
+	 * Records the dropped/lost sample number.
+	 *
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	u64				lost;
+	 *	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_LOST_SAMPLES		= 13,
+
 	PERF_RECORD_MAX,			/* non-ABI */
 };
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index e499b4e43aff..9e0773d5d110 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5974,6 +5974,39 @@ void perf_event_aux_event(struct perf_event *event, unsigned long head,
 	perf_output_end(&handle);
 }
 
+/*
+ * Lost/dropped samples logging
+ */
+void perf_log_lost_samples(struct perf_event *event, u64 lost)
+{
+	struct perf_output_handle handle;
+	struct perf_sample_data sample;
+	int ret;
+
+	struct {
+		struct perf_event_header	header;
+		u64				lost;
+	} lost_samples_event = {
+		.header = {
+			.type = PERF_RECORD_LOST_SAMPLES,
+			.misc = 0,
+			.size = sizeof(lost_samples_event),
+		},
+		.lost		= lost,
+	};
+
+	perf_event_header__init_id(&lost_samples_event.header, &sample, event);
+
+	ret = perf_output_begin(&handle, event,
+				lost_samples_event.header.size);
+	if (ret)
+		return;
+
+	perf_output_put(&handle, lost_samples_event);
+	perf_event__output_id_sample(event, &handle, &sample);
+	perf_output_end(&handle);
+}
+
 /*
  * IRQ throttle logging
  */
-- 
cgit 


From 9e58095f9660f88d6a2febe87d5073a6b2e9c399 Mon Sep 17 00:00:00 2001
From: Samuel Ortiz <sameo@linux.intel.com>
Date: Tue, 14 Oct 2014 02:19:46 +0200
Subject: NFC: netlink: Implement vendor command support

Vendor commands are passed from userspace through the
NFC_CMD_VENDOR netlink command, allowing driver and hardware
specific operations implementations like for example RF tuning
or production line calibration.

Drivers will associate a set of vendor commands to a vendor
id, which could typically be an OUI. The netlink kernel
implementation will try to match the received vendor id
and sub command attributes with the registered ones. When
such match is found, the driver defined sub command routine
is called.

Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/uapi/linux/nfc.h | 10 +++++++++
 net/nfc/netlink.c        | 55 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h
index c1e2e63cf9b5..dd3f75389076 100644
--- a/include/uapi/linux/nfc.h
+++ b/include/uapi/linux/nfc.h
@@ -86,6 +86,8 @@
  *	for this event is the application ID (AID).
  * @NFC_CMD_GET_SE: Dump all discovered secure elements from an NFC controller.
  * @NFC_CMD_SE_IO: Send/Receive APDUs to/from the selected secure element.
+ * @NFC_CMD_VENDOR: Vendor specific command, to be implemented directly
+ *	from the driver in order to support hardware specific operations.
  */
 enum nfc_commands {
 	NFC_CMD_UNSPEC,
@@ -117,6 +119,7 @@ enum nfc_commands {
 	NFC_CMD_GET_SE,
 	NFC_CMD_SE_IO,
 	NFC_CMD_ACTIVATE_TARGET,
+	NFC_CMD_VENDOR,
 /* private: internal use only */
 	__NFC_CMD_AFTER_LAST
 };
@@ -153,6 +156,10 @@ enum nfc_commands {
  * @NFC_ATTR_APDU: Secure element APDU
  * @NFC_ATTR_TARGET_ISO15693_DSFID: ISO 15693 Data Storage Format Identifier
  * @NFC_ATTR_TARGET_ISO15693_UID: ISO 15693 Unique Identifier
+ * @NFC_ATTR_VENDOR_ID: NFC manufacturer unique ID, typically an OUI
+ * @NFC_ATTR_VENDOR_SUBCMD: Vendor specific sub command
+ * @NFC_ATTR_VENDOR_DATA: Vendor specific data, to be optionally passed
+ *	to a vendor specific command implementation
  */
 enum nfc_attrs {
 	NFC_ATTR_UNSPEC,
@@ -184,6 +191,9 @@ enum nfc_attrs {
 	NFC_ATTR_TARGET_ISO15693_DSFID,
 	NFC_ATTR_TARGET_ISO15693_UID,
 	NFC_ATTR_SE_PARAMS,
+	NFC_ATTR_VENDOR_ID,
+	NFC_ATTR_VENDOR_SUBCMD,
+	NFC_ATTR_VENDOR_DATA,
 /* private: internal use only */
 	__NFC_ATTR_AFTER_LAST
 };
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 3763036710ae..f85f37ed19b2 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -5,6 +5,12 @@
  *    Lauro Ramos Venancio <lauro.venancio@openbossa.org>
  *    Aloisio Almeida Jr <aloisio.almeida@openbossa.org>
  *
+ * Vendor commands implementation based on net/wireless/nl80211.c
+ * which is:
+ *
+ * Copyright 2006-2010	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014  Intel Mobile Communications GmbH
+ *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
  * the Free Software Foundation; either version 2 of the License, or
@@ -1489,6 +1495,50 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info)
 	return nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx);
 }
 
+static int nfc_genl_vendor_cmd(struct sk_buff *skb,
+			       struct genl_info *info)
+{
+	struct nfc_dev *dev;
+	struct nfc_vendor_cmd *cmd;
+	u32 dev_idx, vid, subcmd;
+	u8 *data;
+	size_t data_len;
+	int i;
+
+	if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
+	    !info->attrs[NFC_ATTR_VENDOR_ID] ||
+	    !info->attrs[NFC_ATTR_VENDOR_SUBCMD])
+		return -EINVAL;
+
+	dev_idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]);
+	vid = nla_get_u32(info->attrs[NFC_ATTR_VENDOR_ID]);
+	subcmd = nla_get_u32(info->attrs[NFC_ATTR_VENDOR_SUBCMD]);
+
+	dev = nfc_get_device(dev_idx);
+	if (!dev || !dev->vendor_cmds || !dev->n_vendor_cmds)
+		return -ENODEV;
+
+	data = nla_data(info->attrs[NFC_ATTR_VENDOR_DATA]);
+	if (data) {
+		data_len = nla_len(info->attrs[NFC_ATTR_VENDOR_DATA]);
+		if (data_len == 0)
+			return -EINVAL;
+	} else {
+		data_len = 0;
+	}
+
+	for (i = 0; i < dev->n_vendor_cmds; i++) {
+		cmd = &dev->vendor_cmds[i];
+
+		if (cmd->vendor_id != vid || cmd->subcmd != subcmd)
+			continue;
+
+		return cmd->doit(dev, data, data_len);
+	}
+
+	return -EOPNOTSUPP;
+}
+
 static const struct genl_ops nfc_genl_ops[] = {
 	{
 		.cmd = NFC_CMD_GET_DEVICE,
@@ -1579,6 +1629,11 @@ static const struct genl_ops nfc_genl_ops[] = {
 		.doit = nfc_genl_activate_target,
 		.policy = nfc_genl_policy,
 	},
+	{
+		.cmd = NFC_CMD_VENDOR,
+		.doit = nfc_genl_vendor_cmd,
+		.policy = nfc_genl_policy,
+	},
 };
 
 
-- 
cgit 


From dd895d7f21b244e7fd4c7477697e274de7e44ecb Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <socketcan@hartkopp.net>
Date: Tue, 9 Jun 2015 08:05:10 +0200
Subject: can: cangw: introduce optional uid to reference created routing jobs

Similar to referencing iptables rules by their line number this UID allows to
reference created routing jobs, e.g. to alter configured data modifications.

The UID is an optional non-zero value which can be provided at routing job
creation time. When the UID is set the UID replaces the data modification
configuration as job identification attribute e.g. at job removal time.

Signed-off-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/uapi/linux/can/gw.h |  5 ++++
 net/can/gw.c                | 68 +++++++++++++++++++++++++++++++++++++--------
 2 files changed, 61 insertions(+), 12 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/can/gw.h b/include/uapi/linux/can/gw.h
index 3e6184cf2f6d..5079b9d57e31 100644
--- a/include/uapi/linux/can/gw.h
+++ b/include/uapi/linux/can/gw.h
@@ -78,6 +78,7 @@ enum {
 	CGW_FILTER,	/* specify struct can_filter on source CAN device */
 	CGW_DELETED,	/* number of deleted CAN frames (see max_hops param) */
 	CGW_LIM_HOPS,	/* limit the number of hops of this specific rule */
+	CGW_MOD_UID,	/* user defined identifier for modification updates */
 	__CGW_MAX
 };
 
@@ -162,6 +163,10 @@ enum {
  * load time of the can-gw module). This value is used to reduce the number of
  * possible hops for this gateway rule to a value smaller then max_hops.
  *
+ * CGW_MOD_UID (length 4 bytes):
+ * Optional non-zero user defined routing job identifier to alter existing
+ * modification settings at runtime.
+ *
  * CGW_CS_XOR (length 4 bytes):
  * Set a simple XOR checksum starting with an initial value into
  * data[result-idx] using data[start-idx] .. data[end-idx]
diff --git a/net/can/gw.c b/net/can/gw.c
index a6f448e18ea8..455168718c2e 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -110,6 +110,7 @@ struct cf_mod {
 		void (*xor)(struct can_frame *cf, struct cgw_csum_xor *xor);
 		void (*crc8)(struct can_frame *cf, struct cgw_csum_crc8 *crc8);
 	} csumfunc;
+	u32 uid;
 };
 
 
@@ -548,6 +549,11 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type,
 			goto cancel;
 	}
 
+	if (gwj->mod.uid) {
+		if (nla_put_u32(skb, CGW_MOD_UID, gwj->mod.uid) < 0)
+			goto cancel;
+	}
+
 	if (gwj->mod.csumfunc.crc8) {
 		if (nla_put(skb, CGW_CS_CRC8, CGW_CS_CRC8_LEN,
 			    &gwj->mod.csum.crc8) < 0)
@@ -619,6 +625,7 @@ static const struct nla_policy cgw_policy[CGW_MAX+1] = {
 	[CGW_DST_IF]	= { .type = NLA_U32 },
 	[CGW_FILTER]	= { .len = sizeof(struct can_filter) },
 	[CGW_LIM_HOPS]	= { .type = NLA_U8 },
+	[CGW_MOD_UID]	= { .type = NLA_U32 },
 };
 
 /* check for common and gwtype specific attributes */
@@ -761,6 +768,10 @@ static int cgw_parse_attr(struct nlmsghdr *nlh, struct cf_mod *mod,
 			else
 				mod->csumfunc.xor = cgw_csum_xor_neg;
 		}
+
+		if (tb[CGW_MOD_UID]) {
+			nla_memcpy(&mod->uid, tb[CGW_MOD_UID], sizeof(u32));
+		}
 	}
 
 	if (gwtype == CGW_TYPE_CAN_CAN) {
@@ -802,6 +813,8 @@ static int cgw_create_job(struct sk_buff *skb,  struct nlmsghdr *nlh)
 {
 	struct rtcanmsg *r;
 	struct cgw_job *gwj;
+	struct cf_mod mod;
+	struct can_can_gw ccgw;
 	u8 limhops = 0;
 	int err = 0;
 
@@ -819,6 +832,36 @@ static int cgw_create_job(struct sk_buff *skb,  struct nlmsghdr *nlh)
 	if (r->gwtype != CGW_TYPE_CAN_CAN)
 		return -EINVAL;
 
+	err = cgw_parse_attr(nlh, &mod, CGW_TYPE_CAN_CAN, &ccgw, &limhops);
+	if (err < 0)
+		return err;
+
+	if (mod.uid) {
+
+		ASSERT_RTNL();
+
+		/* check for updating an existing job with identical uid */
+		hlist_for_each_entry(gwj, &cgw_list, list) {
+
+			if (gwj->mod.uid != mod.uid)
+				continue;
+
+			/* interfaces & filters must be identical */
+			if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw)))
+				return -EINVAL;
+
+			/* update modifications with disabled softirq & quit */
+			local_bh_disable();
+			memcpy(&gwj->mod, &mod, sizeof(mod));
+			local_bh_enable();
+			return 0;
+		}
+	}
+
+	/* ifindex == 0 is not allowed for job creation */
+	if (!ccgw.src_idx || !ccgw.dst_idx)
+		return -ENODEV;
+
 	gwj = kmem_cache_alloc(cgw_cache, GFP_KERNEL);
 	if (!gwj)
 		return -ENOMEM;
@@ -828,18 +871,14 @@ static int cgw_create_job(struct sk_buff *skb,  struct nlmsghdr *nlh)
 	gwj->deleted_frames = 0;
 	gwj->flags = r->flags;
 	gwj->gwtype = r->gwtype;
+	gwj->limit_hops = limhops;
 
-	err = cgw_parse_attr(nlh, &gwj->mod, CGW_TYPE_CAN_CAN, &gwj->ccgw,
-			     &limhops);
-	if (err < 0)
-		goto out;
+	/* insert already parsed information */
+	memcpy(&gwj->mod, &mod, sizeof(mod));
+	memcpy(&gwj->ccgw, &ccgw, sizeof(ccgw));
 
 	err = -ENODEV;
 
-	/* ifindex == 0 is not allowed for job creation */
-	if (!gwj->ccgw.src_idx || !gwj->ccgw.dst_idx)
-		goto out;
-
 	gwj->src.dev = __dev_get_by_index(&init_net, gwj->ccgw.src_idx);
 
 	if (!gwj->src.dev)
@@ -856,8 +895,6 @@ static int cgw_create_job(struct sk_buff *skb,  struct nlmsghdr *nlh)
 	if (gwj->dst.dev->type != ARPHRD_CAN)
 		goto out;
 
-	gwj->limit_hops = limhops;
-
 	ASSERT_RTNL();
 
 	err = cgw_register_filter(gwj);
@@ -931,8 +968,15 @@ static int cgw_remove_job(struct sk_buff *skb, struct nlmsghdr *nlh)
 		if (gwj->limit_hops != limhops)
 			continue;
 
-		if (memcmp(&gwj->mod, &mod, sizeof(mod)))
-			continue;
+		/* we have a match when uid is enabled and identical */
+		if (gwj->mod.uid || mod.uid) {
+			if (gwj->mod.uid != mod.uid)
+				continue;
+		} else {
+			/* no uid => check for identical modifications */
+			if (memcmp(&gwj->mod, &mod, sizeof(mod)))
+				continue;
+		}
 
 		/* if (r->gwtype == CGW_TYPE_CAN_CAN) - is made sure here */
 		if (memcmp(&gwj->ccgw, &ccgw, sizeof(ccgw)))
-- 
cgit 


From 448bac10318977e2c41548d7e6a1d87f9d48784d Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Sun, 7 Jun 2015 11:18:16 -0300
Subject: [media] DocBook: document DVB-S2 pilot in a table

Putting it into a table allows to comment each possible
values, with makes more clear what field means.

Also, it allows to do cross-references with the frontend.h.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/dvb/dvbproperty.xml | 31 +++++++++++++++++++------
 include/uapi/linux/dvb/frontend.h               |  6 +++--
 2 files changed, 28 insertions(+), 9 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/dvb/dvbproperty.xml b/Documentation/DocBook/media/dvb/dvbproperty.xml
index 8d57f0c9b6aa..e31d9457671f 100644
--- a/Documentation/DocBook/media/dvb/dvbproperty.xml
+++ b/Documentation/DocBook/media/dvb/dvbproperty.xml
@@ -417,13 +417,30 @@ get/set up to 64 properties. The actual meaning of each property is described on
 	<para>Sets DVB-S2 pilot</para>
 	<section id="fe-pilot-t">
 		<title>fe_pilot type</title>
-		<programlisting>
-typedef enum fe_pilot {
-	PILOT_ON,
-	PILOT_OFF,
-	PILOT_AUTO,
-} fe_pilot_t;
-		</programlisting>
+<table pgwide="1" frame="none" id="fe-pilot">
+    <title>enum fe_pilot</title>
+    <tgroup cols="2">
+	&cs-def;
+	<thead>
+	<row>
+	    <entry>ID</entry>
+	    <entry>Description</entry>
+	</row>
+	</thead>
+	<tbody valign="top">
+	<row>
+	    <entry align="char" id="PILOT-ON"><constant>PILOT_ON</constant></entry>
+	    <entry align="char">Pilot tones enabled</entry>
+	</row><row>
+	    <entry align="char" id="PILOT-OFF"><constant>PILOT_OFF</constant></entry>
+	    <entry align="char">Pilot tones disabled</entry>
+	</row><row>
+	    <entry align="char" id="PILOT-AUTO"><constant>PILOT_AUTO</constant></entry>
+	    <entry align="char">Autodetect pilot tones</entry>
+	</row>
+        </tbody>
+    </tgroup>
+</table>
 		</section>
 	</section>
 	<section id="DTV-ROLLOFF">
diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index 3a7ff9002654..bb222eb04627 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -399,11 +399,13 @@ struct dvb_frontend_event {
 
 #define DTV_MAX_COMMAND		DTV_STAT_TOTAL_BLOCK_COUNT
 
-typedef enum fe_pilot {
+enum fe_pilot {
 	PILOT_ON,
 	PILOT_OFF,
 	PILOT_AUTO,
-} fe_pilot_t;
+};
+
+typedef enum fe_pilot fe_pilot_t;
 
 typedef enum fe_rolloff {
 	ROLLOFF_35, /* Implied value in DVB-S, default for DVB-S2 */
-- 
cgit 


From b35f6ba97882ef4e00c1faae1d66232f7314fe91 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Sun, 7 Jun 2015 11:59:27 -0300
Subject: [media] DocBook: better document the DVB-S2 rolloff factor

Instead of using a program listing, use a table and make clearer
what each define means.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/dvb/dvbproperty.xml | 35 +++++++++++++++++++------
 include/uapi/linux/dvb/frontend.h               |  6 +++--
 2 files changed, 31 insertions(+), 10 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/dvb/dvbproperty.xml b/Documentation/DocBook/media/dvb/dvbproperty.xml
index e1d1e2469029..c0aa1ad9eccf 100644
--- a/Documentation/DocBook/media/dvb/dvbproperty.xml
+++ b/Documentation/DocBook/media/dvb/dvbproperty.xml
@@ -468,14 +468,33 @@ get/set up to 64 properties. The actual meaning of each property is described on
 
 	<section id="fe-rolloff-t">
 		<title>fe_rolloff type</title>
-		<programlisting>
-typedef enum fe_rolloff {
-	ROLLOFF_35, /* Implied value in DVB-S, default for DVB-S2 */
-	ROLLOFF_20,
-	ROLLOFF_25,
-	ROLLOFF_AUTO,
-} fe_rolloff_t;
-		</programlisting>
+<table pgwide="1" frame="none" id="fe-rolloff">
+    <title>enum fe_rolloff</title>
+    <tgroup cols="2">
+	&cs-def;
+	<thead>
+	<row>
+	    <entry>ID</entry>
+	    <entry>Description</entry>
+	</row>
+	</thead>
+	<tbody valign="top">
+	<row>
+	    <entry align="char" id="ROLLOFF-35"><constant>ROLLOFF_35</constant></entry>
+	    <entry align="char">Roloff factor: &alpha;=35%</entry>
+	</row><row>
+	    <entry align="char" id="ROLLOFF-20"><constant>ROLLOFF_20</constant></entry>
+	    <entry align="char">Roloff factor: &alpha;=20%</entry>
+	</row><row>
+	    <entry align="char" id="ROLLOFF-25"><constant>ROLLOFF_25</constant></entry>
+	    <entry align="char">Roloff factor: &alpha;=25%</entry>
+	</row><row>
+	    <entry align="char" id="ROLLOFF-AUTO"><constant>ROLLOFF_AUTO</constant></entry>
+	    <entry align="char">Auto-detect the roloff factor.</entry>
+	</row>
+        </tbody>
+    </tgroup>
+</table>
 		</section>
 	</section>
 	<section id="DTV-DISEQC-SLAVE-REPLY">
diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index bb222eb04627..cdd9e2fc030d 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -407,12 +407,14 @@ enum fe_pilot {
 
 typedef enum fe_pilot fe_pilot_t;
 
-typedef enum fe_rolloff {
+enum fe_rolloff {
 	ROLLOFF_35, /* Implied value in DVB-S, default for DVB-S2 */
 	ROLLOFF_20,
 	ROLLOFF_25,
 	ROLLOFF_AUTO,
-} fe_rolloff_t;
+};
+
+typedef enum fe_rolloff fe_rolloff_t;
 
 typedef enum fe_delivery_system {
 	SYS_UNDEFINED,
-- 
cgit 


From d21ddba826d8704525fa637e69cf90b8e034d94a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Sun, 7 Jun 2015 14:21:09 -0300
Subject: [media] DocBook: properly document the delivery systems

Use a table for the delivery systems. The table is organized
by the type (cable, satellite, terrestrial) and shows what
standards are not fully implemented.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/dvb/dvbproperty.xml | 99 +++++++++++++++++++------
 include/uapi/linux/dvb/frontend.h               |  6 +-
 2 files changed, 79 insertions(+), 26 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/dvb/dvbproperty.xml b/Documentation/DocBook/media/dvb/dvbproperty.xml
index c0aa1ad9eccf..08227d4e9150 100644
--- a/Documentation/DocBook/media/dvb/dvbproperty.xml
+++ b/Documentation/DocBook/media/dvb/dvbproperty.xml
@@ -515,31 +515,82 @@ get/set up to 64 properties. The actual meaning of each property is described on
 		<section id="fe-delivery-system-t">
 		<title>fe_delivery_system type</title>
 		<para>Possible values: </para>
-<programlisting>
 
-typedef enum fe_delivery_system {
-	SYS_UNDEFINED,
-	SYS_DVBC_ANNEX_A,
-	SYS_DVBC_ANNEX_B,
-	SYS_DVBT,
-	SYS_DSS,
-	SYS_DVBS,
-	SYS_DVBS2,
-	SYS_DVBH,
-	SYS_ISDBT,
-	SYS_ISDBS,
-	SYS_ISDBC,
-	SYS_ATSC,
-	SYS_ATSCMH,
-	SYS_DTMB,
-	SYS_CMMB,
-	SYS_DAB,
-	SYS_DVBT2,
-	SYS_TURBO,
-	SYS_DVBC_ANNEX_C,
-} fe_delivery_system_t;
-</programlisting>
-		</section>
+<table pgwide="1" frame="none" id="fe-delivery-system">
+    <title>enum fe_delivery_system</title>
+    <tgroup cols="2">
+	&cs-def;
+	<thead>
+	<row>
+	    <entry>ID</entry>
+	    <entry>Description</entry>
+	</row>
+	</thead>
+	<tbody valign="top">
+	<row>
+		<entry id="SYS-UNDEFINED"><constant>SYS_UNDEFINED</constant></entry>
+		<entry>Undefined standard. Generally, indicates an error</entry>
+	</row><row>
+		<entry id="SYS-DVBC-ANNEX-A"><constant>SYS_DVBC_ANNEX_A</constant></entry>
+		<entry>Cable TV: DVB-C following ITU-T J.83 Annex A spec</entry>
+	</row><row>
+		<entry id="SYS-DVBC-ANNEX-B"><constant>SYS_DVBC_ANNEX_B</constant></entry>
+		<entry>Cable TV: DVB-C following ITU-T J.83 Annex B spec (ClearQAM)</entry>
+	</row><row>
+		<entry id="SYS-DVBC-ANNEX-C"><constant>SYS_DVBC_ANNEX_C</constant></entry>
+		<entry>Cable TV: DVB-C following ITU-T J.83 Annex C spec</entry>
+	</row><row>
+		<entry id="SYS-ISDBC"><constant>SYS_ISDBC</constant></entry>
+		<entry>Cable TV: ISDB-C (no drivers yet)</entry>
+	</row><row>
+		<entry id="SYS-DVBT"><constant>SYS_DVBT</constant></entry>
+		<entry>Terrestral TV: DVB-T</entry>
+	</row><row>
+		<entry id="SYS-DVBT2"><constant>SYS_DVBT2</constant></entry>
+		<entry>Terrestral TV: DVB-T2</entry>
+	</row><row>
+		<entry id="SYS-ISDBT"><constant>SYS_ISDBT</constant></entry>
+		<entry>Terrestral TV: ISDB-T</entry>
+	</row><row>
+		<entry id="SYS-ATSC"><constant>SYS_ATSC</constant></entry>
+		<entry>Terrestral TV: ATSC</entry>
+	</row><row>
+		<entry id="SYS-ATSCMH"><constant>SYS_ATSCMH</constant></entry>
+		<entry>Terrestral TV (mobile): ATSC-M/H</entry>
+	</row><row>
+		<entry id="SYS-DTMB"><constant>SYS_DTMB</constant></entry>
+		<entry>Terrestrial TV: DTMB</entry>
+	</row><row>
+		<entry id="SYS-DVBS"><constant>SYS_DVBS</constant></entry>
+		<entry>Satellite TV: DVB-S</entry>
+	</row><row>
+		<entry id="SYS-DVBS2"><constant>SYS_DVBS2</constant></entry>
+		<entry>Satellite TV: DVB-S2</entry>
+	</row><row>
+		<entry id="SYS-TURBO"><constant>SYS_TURBO</constant></entry>
+		<entry>Satellite TV: DVB-S Turbo</entry>
+	</row><row>
+		<entry id="SYS-ISDBS"><constant>SYS_ISDBS</constant></entry>
+		<entry>Satellite TV: ISDB-S</entry>
+	</row><row>
+		<entry id="SYS-DAB"><constant>SYS_DAB</constant></entry>
+		<entry>Digital audio: DAB (not fully supported)</entry>
+	</row><row>
+		<entry id="SYS-DSS"><constant>SYS_DSS</constant></entry>
+		<entry>Satellite TV:"DSS (not fully supported)</entry>
+	</row><row>
+		<entry id="SYS-CMMB"><constant>SYS_CMMB</constant></entry>
+		<entry>Terrestral TV (mobile):CMMB (not fully supported)</entry>
+	</row><row>
+		<entry id="SYS-DVBH"><constant>SYS_DVBH</constant></entry>
+		<entry>Terrestral TV (mobile): DVB-H (standard deprecated)</entry>
+	</row>
+        </tbody>
+    </tgroup>
+</table>
+
+
+</section>
 	</section>
 	<section id="DTV-ISDBT-PARTIAL-RECEPTION">
 		<title><constant>DTV_ISDBT_PARTIAL_RECEPTION</constant></title>
diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index cdd9e2fc030d..66499f238204 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -416,7 +416,7 @@ enum fe_rolloff {
 
 typedef enum fe_rolloff fe_rolloff_t;
 
-typedef enum fe_delivery_system {
+enum fe_delivery_system {
 	SYS_UNDEFINED,
 	SYS_DVBC_ANNEX_A,
 	SYS_DVBC_ANNEX_B,
@@ -436,7 +436,9 @@ typedef enum fe_delivery_system {
 	SYS_DVBT2,
 	SYS_TURBO,
 	SYS_DVBC_ANNEX_C,
-} fe_delivery_system_t;
+};
+
+typedef enum fe_delivery_system fe_delivery_system_t;
 
 /* backward compatibility */
 #define SYS_DVBC_ANNEX_AC	SYS_DVBC_ANNEX_A
-- 
cgit 


From fe557e40f576741308d3546906eba7094e940de4 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Sun, 7 Jun 2015 14:31:13 -0300
Subject: [media] DocBook: add xrefs for enum fe_type

The only enum that was missing xrefs at frontend.h is fe_type.
Add xrefs for them.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/dvb/frontend_legacy_api.xml | 8 ++++----
 include/uapi/linux/dvb/frontend.h                       | 6 ++++--
 2 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/dvb/frontend_legacy_api.xml b/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
index 8523caf91a2c..8fadf3a4ba44 100644
--- a/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
+++ b/Documentation/DocBook/media/dvb/frontend_legacy_api.xml
@@ -20,22 +20,22 @@
   </thead>
   <tbody valign="top">
   <row>
-     <entry id="FE_QPSK"><constant>FE_QPSK</constant></entry>
+     <entry id="FE-QPSK"><constant>FE_QPSK</constant></entry>
      <entry>For DVB-S standard</entry>
      <entry><constant>SYS_DVBS</constant></entry>
   </row>
   <row>
-     <entry id="FE_QAM"><constant>FE_QAM</constant></entry>
+     <entry id="FE-QAM"><constant>FE_QAM</constant></entry>
      <entry>For DVB-C annex A standard</entry>
      <entry><constant>SYS_DVBC_ANNEX_A</constant></entry>
   </row>
   <row>
-     <entry id="FE_OFDM"><constant>FE_OFDM</constant></entry>
+     <entry id="FE-OFDM"><constant>FE_OFDM</constant></entry>
      <entry>For DVB-T standard</entry>
      <entry><constant>SYS_DVBT</constant></entry>
   </row>
   <row>
-     <entry id="FE_ATSC"><constant>FE_ATSC</constant></entry>
+     <entry id="FE-ATSC"><constant>FE_ATSC</constant></entry>
      <entry>For ATSC standard (terrestrial) or for DVB-C Annex B (cable) used in US.</entry>
      <entry><constant>SYS_ATSC</constant> (terrestrial) or <constant>SYS_DVBC_ANNEX_B</constant> (cable)</entry>
   </row>
diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index 66499f238204..a36d802fae0c 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -28,12 +28,14 @@
 
 #include <linux/types.h>
 
-typedef enum fe_type {
+enum fe_type {
 	FE_QPSK,
 	FE_QAM,
 	FE_OFDM,
 	FE_ATSC
-} fe_type_t;
+};
+
+typedef enum fe_type fe_type_t;
 
 
 enum fe_caps {
-- 
cgit 


From 0df289a209e02f0926042ab07d7d2595ea2d2e9b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Sun, 7 Jun 2015 14:53:52 -0300
Subject: [media] dvb: Get rid of typedev usage for enums

The DVB API was originally defined using typedefs. This is against
Kernel CodingStyle, and there's no good usage here. While we can't
remove its usage on userspace, we can avoid its usage in Kernelspace.

So, let's do it.

This patch was generated by this shell script:

	for j in $(grep typedef include/uapi/linux/dvb/frontend.h |cut -d' ' -f 3); do for i in $(find drivers/media -name '*.[ch]' -type f) $(find drivers/staging/media -name '*.[ch]' -type f); do sed "s,${j}_t,enum $j," <$i >a && mv a $i; done; done

While here, make CodingStyle fixes on the affected lines.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Acked-by: Stefan Richter <stefanr@s5r6.in-berlin.de> # for drivers/media/firewire/*
---
 drivers/media/common/b2c2/flexcop-fe-tuner.c      |  7 ++--
 drivers/media/common/siano/smsdvb-main.c          |  6 +--
 drivers/media/common/siano/smsdvb.h               |  2 +-
 drivers/media/dvb-core/dvb_frontend.c             | 27 +++++++-----
 drivers/media/dvb-core/dvb_frontend.h             | 42 ++++++++++---------
 drivers/media/dvb-frontends/a8293.c               |  2 +-
 drivers/media/dvb-frontends/af9013.c              |  4 +-
 drivers/media/dvb-frontends/af9033.c              |  4 +-
 drivers/media/dvb-frontends/as102_fe.c            |  4 +-
 drivers/media/dvb-frontends/atbm8830.c            |  3 +-
 drivers/media/dvb-frontends/au8522_dig.c          |  4 +-
 drivers/media/dvb-frontends/au8522_priv.h         |  2 +-
 drivers/media/dvb-frontends/bcm3510.c             |  2 +-
 drivers/media/dvb-frontends/cx22700.c             |  9 ++--
 drivers/media/dvb-frontends/cx22702.c             |  2 +-
 drivers/media/dvb-frontends/cx24110.c             | 19 +++++----
 drivers/media/dvb-frontends/cx24116.c             | 38 +++++++++--------
 drivers/media/dvb-frontends/cx24117.c             | 40 +++++++++---------
 drivers/media/dvb-frontends/cx24120.c             | 50 +++++++++++------------
 drivers/media/dvb-frontends/cx24123.c             | 18 ++++----
 drivers/media/dvb-frontends/cxd2820r_c.c          |  2 +-
 drivers/media/dvb-frontends/cxd2820r_core.c       |  5 ++-
 drivers/media/dvb-frontends/cxd2820r_priv.h       |  8 ++--
 drivers/media/dvb-frontends/cxd2820r_t.c          |  2 +-
 drivers/media/dvb-frontends/cxd2820r_t2.c         |  2 +-
 drivers/media/dvb-frontends/dib3000mb.c           |  7 ++--
 drivers/media/dvb-frontends/dib3000mc.c           |  2 +-
 drivers/media/dvb-frontends/dib7000m.c            |  2 +-
 drivers/media/dvb-frontends/dib7000p.c            |  6 +--
 drivers/media/dvb-frontends/dib8000.c             | 10 ++---
 drivers/media/dvb-frontends/dib9000.c             |  4 +-
 drivers/media/dvb-frontends/drx39xyj/drxj.c       |  2 +-
 drivers/media/dvb-frontends/drxd_hard.c           |  2 +-
 drivers/media/dvb-frontends/drxk_hard.c           |  2 +-
 drivers/media/dvb-frontends/drxk_hard.h           |  2 +-
 drivers/media/dvb-frontends/ds3000.c              | 13 +++---
 drivers/media/dvb-frontends/dvb_dummy_fe.c        |  9 ++--
 drivers/media/dvb-frontends/ec100.c               |  2 +-
 drivers/media/dvb-frontends/hd29l2.c              |  2 +-
 drivers/media/dvb-frontends/hd29l2_priv.h         |  2 +-
 drivers/media/dvb-frontends/isl6405.c             |  3 +-
 drivers/media/dvb-frontends/isl6421.c             |  6 ++-
 drivers/media/dvb-frontends/l64781.c              |  2 +-
 drivers/media/dvb-frontends/lg2160.c              |  2 +-
 drivers/media/dvb-frontends/lgdt3305.c            |  4 +-
 drivers/media/dvb-frontends/lgdt3306a.c           |  9 ++--
 drivers/media/dvb-frontends/lgdt330x.c            |  8 ++--
 drivers/media/dvb-frontends/lgs8gl5.c             |  2 +-
 drivers/media/dvb-frontends/lgs8gxx.c             |  3 +-
 drivers/media/dvb-frontends/lnbp21.c              |  4 +-
 drivers/media/dvb-frontends/lnbp22.c              |  3 +-
 drivers/media/dvb-frontends/m88ds3103.c           |  9 ++--
 drivers/media/dvb-frontends/m88ds3103_priv.h      |  4 +-
 drivers/media/dvb-frontends/m88rs2000.c           | 19 +++++----
 drivers/media/dvb-frontends/mb86a16.c             |  7 ++--
 drivers/media/dvb-frontends/mb86a16.h             |  3 +-
 drivers/media/dvb-frontends/mb86a20s.c            |  6 +--
 drivers/media/dvb-frontends/mt312.c               | 17 ++++----
 drivers/media/dvb-frontends/mt352.c               |  2 +-
 drivers/media/dvb-frontends/nxt200x.c             |  2 +-
 drivers/media/dvb-frontends/nxt6000.c             | 12 ++++--
 drivers/media/dvb-frontends/or51132.c             |  6 +--
 drivers/media/dvb-frontends/or51211.c             |  2 +-
 drivers/media/dvb-frontends/rtl2830.c             |  2 +-
 drivers/media/dvb-frontends/rtl2830_priv.h        |  2 +-
 drivers/media/dvb-frontends/rtl2832.c             |  2 +-
 drivers/media/dvb-frontends/rtl2832_priv.h        |  2 +-
 drivers/media/dvb-frontends/s5h1409.c             |  6 +--
 drivers/media/dvb-frontends/s5h1411.c             |  6 +--
 drivers/media/dvb-frontends/s5h1420.c             | 23 +++++++----
 drivers/media/dvb-frontends/s5h1432.c             |  4 +-
 drivers/media/dvb-frontends/s921.c                |  6 +--
 drivers/media/dvb-frontends/si2165.c              |  2 +-
 drivers/media/dvb-frontends/si2168.c              |  2 +-
 drivers/media/dvb-frontends/si2168_priv.h         |  4 +-
 drivers/media/dvb-frontends/si21xx.c              | 10 ++---
 drivers/media/dvb-frontends/sp8870.c              |  3 +-
 drivers/media/dvb-frontends/sp887x.c              |  2 +-
 drivers/media/dvb-frontends/stb0899_drv.c         |  8 ++--
 drivers/media/dvb-frontends/stv0288.c             | 11 ++---
 drivers/media/dvb-frontends/stv0297.c             | 11 +++--
 drivers/media/dvb-frontends/stv0299.c             | 22 ++++++----
 drivers/media/dvb-frontends/stv0367.c             | 12 +++---
 drivers/media/dvb-frontends/stv0367_priv.h        |  2 +-
 drivers/media/dvb-frontends/stv0900_core.c        |  6 ++-
 drivers/media/dvb-frontends/stv090x.c             |  5 ++-
 drivers/media/dvb-frontends/stv6110.c             |  2 +-
 drivers/media/dvb-frontends/tc90522.c             | 17 ++++----
 drivers/media/dvb-frontends/tda10021.c            |  7 ++--
 drivers/media/dvb-frontends/tda10023.c            |  3 +-
 drivers/media/dvb-frontends/tda10048.c            |  2 +-
 drivers/media/dvb-frontends/tda1004x.c            |  3 +-
 drivers/media/dvb-frontends/tda10071.c            | 10 ++---
 drivers/media/dvb-frontends/tda10071_priv.h       | 10 ++---
 drivers/media/dvb-frontends/tda10086.c            |  9 ++--
 drivers/media/dvb-frontends/tda8083.c             | 38 ++++++++++-------
 drivers/media/dvb-frontends/ves1820.c             |  6 ++-
 drivers/media/dvb-frontends/ves1x93.c             | 15 ++++---
 drivers/media/dvb-frontends/zl10353.c             |  2 +-
 drivers/media/firewire/firedtv-fe.c               |  8 ++--
 drivers/media/firewire/firedtv.h                  |  4 +-
 drivers/media/pci/bt8xx/dst.c                     | 25 +++++++-----
 drivers/media/pci/bt8xx/dst_common.h              | 12 +++---
 drivers/media/pci/cx23885/cx23885-dvb.c           | 10 +++--
 drivers/media/pci/cx23885/cx23885-f300.c          |  2 +-
 drivers/media/pci/cx23885/cx23885-f300.h          |  2 +-
 drivers/media/pci/cx23885/cx23885.h               |  2 +-
 drivers/media/pci/cx88/cx88-dvb.c                 | 12 +++---
 drivers/media/pci/cx88/cx88.h                     |  5 ++-
 drivers/media/pci/dm1105/dm1105.c                 |  3 +-
 drivers/media/pci/mantis/mantis_vp1034.c          |  2 +-
 drivers/media/pci/mantis/mantis_vp1034.h          |  3 +-
 drivers/media/pci/ngene/ngene.h                   |  2 +-
 drivers/media/pci/pt1/pt1.c                       |  6 +--
 drivers/media/pci/pt1/va1j5jf8007s.c              |  4 +-
 drivers/media/pci/pt1/va1j5jf8007t.c              |  4 +-
 drivers/media/pci/pt3/pt3.c                       |  2 +-
 drivers/media/pci/saa7134/saa7134-dvb.c           |  6 ++-
 drivers/media/pci/saa7134/saa7134.h               |  3 +-
 drivers/media/pci/ttpci/av7110.c                  | 18 ++++----
 drivers/media/pci/ttpci/av7110.h                  | 27 +++++++-----
 drivers/media/pci/ttpci/budget-core.c             |  3 +-
 drivers/media/pci/ttpci/budget-patch.c            | 15 ++++---
 drivers/media/pci/ttpci/budget.c                  | 12 ++++--
 drivers/media/pci/ttpci/budget.h                  |  2 +-
 drivers/media/usb/dvb-usb-v2/af9015.c             |  2 +-
 drivers/media/usb/dvb-usb-v2/af9015.h             |  2 +-
 drivers/media/usb/dvb-usb-v2/dvbsky.c             | 11 ++---
 drivers/media/usb/dvb-usb-v2/lmedm04.c            | 10 ++---
 drivers/media/usb/dvb-usb-v2/mxl111sf-demod.c     | 14 +++----
 drivers/media/usb/dvb-usb/af9005-fe.c             |  5 ++-
 drivers/media/usb/dvb-usb/az6027.c                |  3 +-
 drivers/media/usb/dvb-usb/cinergyT2-fe.c          |  2 +-
 drivers/media/usb/dvb-usb/dib0700.h               |  2 +-
 drivers/media/usb/dvb-usb/dib0700_devices.c       |  2 +-
 drivers/media/usb/dvb-usb/dtt200u-fe.c            |  7 ++--
 drivers/media/usb/dvb-usb/dw2102.c                | 13 +++---
 drivers/media/usb/dvb-usb/friio-fe.c              |  3 +-
 drivers/media/usb/dvb-usb/gp8psk-fe.c             | 13 +++---
 drivers/media/usb/dvb-usb/opera1.c                |  3 +-
 drivers/media/usb/dvb-usb/technisat-usb2.c        |  2 +-
 drivers/media/usb/dvb-usb/vp702x-fe.c             | 17 ++++----
 drivers/media/usb/dvb-usb/vp7045-fe.c             |  3 +-
 drivers/media/usb/ttusb-budget/dvb-ttusb-budget.c |  9 ++--
 drivers/media/usb/ttusb-dec/ttusbdecfe.c          | 10 +++--
 drivers/staging/media/mn88472/mn88472.c           |  2 +-
 drivers/staging/media/mn88472/mn88472_priv.h      |  2 +-
 drivers/staging/media/mn88473/mn88473.c           |  2 +-
 drivers/staging/media/mn88473/mn88473_priv.h      |  2 +-
 include/uapi/linux/dvb/frontend.h                 |  4 +-
 150 files changed, 630 insertions(+), 492 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/media/common/b2c2/flexcop-fe-tuner.c b/drivers/media/common/b2c2/flexcop-fe-tuner.c
index 2426062fcb3c..5e5696729eca 100644
--- a/drivers/media/common/b2c2/flexcop-fe-tuner.c
+++ b/drivers/media/common/b2c2/flexcop-fe-tuner.c
@@ -39,7 +39,8 @@ static int flexcop_fe_request_firmware(struct dvb_frontend *fe,
 
 /* lnb control */
 #if FE_SUPPORTED(MT312) || FE_SUPPORTED(STV0299)
-static int flexcop_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+static int flexcop_set_voltage(struct dvb_frontend *fe,
+			       enum fe_sec_voltage voltage)
 {
 	struct flexcop_device *fc = fe->dvb->priv;
 	flexcop_ibi_value v;
@@ -78,7 +79,7 @@ static int flexcop_sleep(struct dvb_frontend* fe)
 
 /* SkyStar2 DVB-S rev 2.3 */
 #if FE_SUPPORTED(MT312) && FE_SUPPORTED(PLL)
-static int flexcop_set_tone(struct dvb_frontend *fe, fe_sec_tone_mode_t tone)
+static int flexcop_set_tone(struct dvb_frontend *fe, enum fe_sec_tone_mode tone)
 {
 /* u16 wz_half_period_for_45_mhz[] = { 0x01ff, 0x0154, 0x00ff, 0x00cc }; */
 	struct flexcop_device *fc = fe->dvb->priv;
@@ -157,7 +158,7 @@ static int flexcop_diseqc_send_master_cmd(struct dvb_frontend *fe,
 }
 
 static int flexcop_diseqc_send_burst(struct dvb_frontend *fe,
-	fe_sec_mini_cmd_t minicmd)
+				     enum fe_sec_mini_cmd minicmd)
 {
 	return flexcop_send_diseqc_msg(fe, 0, NULL, minicmd);
 }
diff --git a/drivers/media/common/siano/smsdvb-main.c b/drivers/media/common/siano/smsdvb-main.c
index 367b8e77feb8..f4305ae800f4 100644
--- a/drivers/media/common/siano/smsdvb-main.c
+++ b/drivers/media/common/siano/smsdvb-main.c
@@ -753,7 +753,7 @@ static inline int led_feedback(struct smsdvb_client_t *client)
 				     SMS_LED_HI : SMS_LED_LO);
 }
 
-static int smsdvb_read_status(struct dvb_frontend *fe, fe_status_t *stat)
+static int smsdvb_read_status(struct dvb_frontend *fe, enum fe_status *stat)
 {
 	int rc;
 	struct smsdvb_client_t *client;
@@ -900,7 +900,7 @@ static int smsdvb_dvbt_set_frontend(struct dvb_frontend *fe)
 	/* Disable LNA, if any. An error is returned if no LNA is present */
 	ret = sms_board_lna_control(client->coredev, 0);
 	if (ret == 0) {
-		fe_status_t status;
+		enum fe_status status;
 
 		/* tune with LNA off at first */
 		ret = smsdvb_sendrequest_and_wait(client, &msg, sizeof(msg),
@@ -971,7 +971,7 @@ static int smsdvb_isdbt_set_frontend(struct dvb_frontend *fe)
 	/* Disable LNA, if any. An error is returned if no LNA is present */
 	ret = sms_board_lna_control(client->coredev, 0);
 	if (ret == 0) {
-		fe_status_t status;
+		enum fe_status status;
 
 		/* tune with LNA off at first */
 		ret = smsdvb_sendrequest_and_wait(client, &msg, sizeof(msg),
diff --git a/drivers/media/common/siano/smsdvb.h b/drivers/media/common/siano/smsdvb.h
index ae36d0ae0fb1..b15754d95ec0 100644
--- a/drivers/media/common/siano/smsdvb.h
+++ b/drivers/media/common/siano/smsdvb.h
@@ -40,7 +40,7 @@ struct smsdvb_client_t {
 	struct dmxdev           dmxdev;
 	struct dvb_frontend     frontend;
 
-	fe_status_t             fe_status;
+	enum fe_status          fe_status;
 
 	struct completion       tune_done;
 	struct completion       stats_done;
diff --git a/drivers/media/dvb-core/dvb_frontend.c b/drivers/media/dvb-core/dvb_frontend.c
index a894d4c99ee8..55a6b0500615 100644
--- a/drivers/media/dvb-core/dvb_frontend.c
+++ b/drivers/media/dvb-core/dvb_frontend.c
@@ -110,7 +110,7 @@ struct dvb_frontend_private {
 	struct task_struct *thread;
 	unsigned long release_jiffies;
 	unsigned int wakeup;
-	fe_status_t status;
+	enum fe_status status;
 	unsigned long tune_mode_flags;
 	unsigned int delay;
 	unsigned int reinitialise;
@@ -198,7 +198,8 @@ static enum dvbv3_emulation_type dvbv3_type(u32 delivery_system)
 	}
 }
 
-static void dvb_frontend_add_event(struct dvb_frontend *fe, fe_status_t status)
+static void dvb_frontend_add_event(struct dvb_frontend *fe,
+				   enum fe_status status)
 {
 	struct dvb_frontend_private *fepriv = fe->frontend_priv;
 	struct dvb_fe_events *events = &fepriv->events;
@@ -429,7 +430,7 @@ static int dvb_frontend_swzigzag_autotune(struct dvb_frontend *fe, int check_wra
 
 static void dvb_frontend_swzigzag(struct dvb_frontend *fe)
 {
-	fe_status_t s = 0;
+	enum fe_status s = 0;
 	int retval = 0;
 	struct dvb_frontend_private *fepriv = fe->frontend_priv;
 	struct dtv_frontend_properties *c = &fe->dtv_property_cache, tmp;
@@ -690,7 +691,7 @@ static int dvb_frontend_thread(void *data)
 {
 	struct dvb_frontend *fe = data;
 	struct dvb_frontend_private *fepriv = fe->frontend_priv;
-	fe_status_t s;
+	enum fe_status s;
 	enum dvbfe_algo algo;
 #ifdef CONFIG_MEDIA_CONTROLLER_DVB
 	int ret;
@@ -2341,7 +2342,7 @@ static int dvb_frontend_ioctl_legacy(struct file *file,
 	}
 
 	case FE_READ_STATUS: {
-		fe_status_t* status = parg;
+		enum fe_status *status = parg;
 
 		/* if retune was requested but hasn't occurred yet, prevent
 		 * that user get signal state from previous tuning */
@@ -2411,7 +2412,8 @@ static int dvb_frontend_ioctl_legacy(struct file *file,
 
 	case FE_DISEQC_SEND_BURST:
 		if (fe->ops.diseqc_send_burst) {
-			err = fe->ops.diseqc_send_burst(fe, (fe_sec_mini_cmd_t) parg);
+			err = fe->ops.diseqc_send_burst(fe,
+						(enum fe_sec_mini_cmd)parg);
 			fepriv->state = FESTATE_DISEQC;
 			fepriv->status = 0;
 		}
@@ -2419,8 +2421,9 @@ static int dvb_frontend_ioctl_legacy(struct file *file,
 
 	case FE_SET_TONE:
 		if (fe->ops.set_tone) {
-			err = fe->ops.set_tone(fe, (fe_sec_tone_mode_t) parg);
-			fepriv->tone = (fe_sec_tone_mode_t) parg;
+			err = fe->ops.set_tone(fe,
+					       (enum fe_sec_tone_mode)parg);
+			fepriv->tone = (enum fe_sec_tone_mode)parg;
 			fepriv->state = FESTATE_DISEQC;
 			fepriv->status = 0;
 		}
@@ -2428,8 +2431,9 @@ static int dvb_frontend_ioctl_legacy(struct file *file,
 
 	case FE_SET_VOLTAGE:
 		if (fe->ops.set_voltage) {
-			err = fe->ops.set_voltage(fe, (fe_sec_voltage_t) parg);
-			fepriv->voltage = (fe_sec_voltage_t) parg;
+			err = fe->ops.set_voltage(fe,
+						  (enum fe_sec_voltage)parg);
+			fepriv->voltage = (enum fe_sec_voltage)parg;
 			fepriv->state = FESTATE_DISEQC;
 			fepriv->status = 0;
 		}
@@ -2437,7 +2441,8 @@ static int dvb_frontend_ioctl_legacy(struct file *file,
 
 	case FE_DISHNETWORK_SEND_LEGACY_CMD:
 		if (fe->ops.dishnetwork_send_legacy_command) {
-			err = fe->ops.dishnetwork_send_legacy_command(fe, (unsigned long) parg);
+			err = fe->ops.dishnetwork_send_legacy_command(fe,
+							 (unsigned long)parg);
 			fepriv->state = FESTATE_DISEQC;
 			fepriv->status = 0;
 		} else if (fe->ops.set_voltage) {
diff --git a/drivers/media/dvb-core/dvb_frontend.h b/drivers/media/dvb-core/dvb_frontend.h
index 816269e5f706..4ff82041fdfd 100644
--- a/drivers/media/dvb-core/dvb_frontend.h
+++ b/drivers/media/dvb-core/dvb_frontend.h
@@ -279,7 +279,7 @@ struct dvb_frontend_ops {
 		    bool re_tune,
 		    unsigned int mode_flags,
 		    unsigned int *delay,
-		    fe_status_t *status);
+		    enum fe_status *status);
 	/* get frontend tuning algorithm from the module */
 	enum dvbfe_algo (*get_frontend_algo)(struct dvb_frontend *fe);
 
@@ -289,7 +289,7 @@ struct dvb_frontend_ops {
 
 	int (*get_frontend)(struct dvb_frontend *fe);
 
-	int (*read_status)(struct dvb_frontend* fe, fe_status_t* status);
+	int (*read_status)(struct dvb_frontend *fe, enum fe_status *status);
 	int (*read_ber)(struct dvb_frontend* fe, u32* ber);
 	int (*read_signal_strength)(struct dvb_frontend* fe, u16* strength);
 	int (*read_snr)(struct dvb_frontend* fe, u16* snr);
@@ -298,9 +298,11 @@ struct dvb_frontend_ops {
 	int (*diseqc_reset_overload)(struct dvb_frontend* fe);
 	int (*diseqc_send_master_cmd)(struct dvb_frontend* fe, struct dvb_diseqc_master_cmd* cmd);
 	int (*diseqc_recv_slave_reply)(struct dvb_frontend* fe, struct dvb_diseqc_slave_reply* reply);
-	int (*diseqc_send_burst)(struct dvb_frontend* fe, fe_sec_mini_cmd_t minicmd);
-	int (*set_tone)(struct dvb_frontend* fe, fe_sec_tone_mode_t tone);
-	int (*set_voltage)(struct dvb_frontend* fe, fe_sec_voltage_t voltage);
+	int (*diseqc_send_burst)(struct dvb_frontend *fe,
+				 enum fe_sec_mini_cmd minicmd);
+	int (*set_tone)(struct dvb_frontend *fe, enum fe_sec_tone_mode tone);
+	int (*set_voltage)(struct dvb_frontend *fe,
+			   enum fe_sec_voltage voltage);
 	int (*enable_high_lnb_voltage)(struct dvb_frontend* fe, long arg);
 	int (*dishnetwork_send_legacy_command)(struct dvb_frontend* fe, unsigned long cmd);
 	int (*i2c_gate_ctrl)(struct dvb_frontend* fe, int enable);
@@ -338,24 +340,24 @@ struct dtv_frontend_properties {
 	u32			state;
 
 	u32			frequency;
-	fe_modulation_t		modulation;
+	enum fe_modulation		modulation;
 
-	fe_sec_voltage_t	voltage;
-	fe_sec_tone_mode_t	sectone;
-	fe_spectral_inversion_t	inversion;
-	fe_code_rate_t		fec_inner;
-	fe_transmit_mode_t	transmission_mode;
+	enum fe_sec_voltage	voltage;
+	enum fe_sec_tone_mode	sectone;
+	enum fe_spectral_inversion	inversion;
+	enum fe_code_rate		fec_inner;
+	enum fe_transmit_mode	transmission_mode;
 	u32			bandwidth_hz;	/* 0 = AUTO */
-	fe_guard_interval_t	guard_interval;
-	fe_hierarchy_t		hierarchy;
+	enum fe_guard_interval	guard_interval;
+	enum fe_hierarchy		hierarchy;
 	u32			symbol_rate;
-	fe_code_rate_t		code_rate_HP;
-	fe_code_rate_t		code_rate_LP;
+	enum fe_code_rate		code_rate_HP;
+	enum fe_code_rate		code_rate_LP;
 
-	fe_pilot_t		pilot;
-	fe_rolloff_t		rolloff;
+	enum fe_pilot		pilot;
+	enum fe_rolloff		rolloff;
 
-	fe_delivery_system_t	delivery_system;
+	enum fe_delivery_system	delivery_system;
 
 	enum fe_interleaving	interleaving;
 
@@ -368,8 +370,8 @@ struct dtv_frontend_properties {
 	u8			isdbt_layer_enabled;
 	struct {
 	    u8			segment_count;
-	    fe_code_rate_t	fec;
-	    fe_modulation_t	modulation;
+	    enum fe_code_rate	fec;
+	    enum fe_modulation	modulation;
 	    u8			interleaving;
 	} layer[3];
 
diff --git a/drivers/media/dvb-frontends/a8293.c b/drivers/media/dvb-frontends/a8293.c
index 3f0cf9ee6672..97ecbe01034c 100644
--- a/drivers/media/dvb-frontends/a8293.c
+++ b/drivers/media/dvb-frontends/a8293.c
@@ -67,7 +67,7 @@ static int a8293_rd(struct a8293_priv *priv, u8 *val, int len)
 }
 
 static int a8293_set_voltage(struct dvb_frontend *fe,
-	fe_sec_voltage_t fe_sec_voltage)
+	enum fe_sec_voltage fe_sec_voltage)
 {
 	struct a8293_priv *priv = fe->sec_priv;
 	int ret;
diff --git a/drivers/media/dvb-frontends/af9013.c b/drivers/media/dvb-frontends/af9013.c
index ba6c8f6c42a1..e23197da84af 100644
--- a/drivers/media/dvb-frontends/af9013.c
+++ b/drivers/media/dvb-frontends/af9013.c
@@ -39,7 +39,7 @@ struct af9013_state {
 	u32 ucblocks;
 	u16 snr;
 	u32 bandwidth_hz;
-	fe_status_t fe_status;
+	enum fe_status fe_status;
 	unsigned long set_frontend_jiffies;
 	unsigned long read_status_jiffies;
 	bool first_tune;
@@ -983,7 +983,7 @@ err:
 	return ret;
 }
 
-static int af9013_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int af9013_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct af9013_state *state = fe->demodulator_priv;
 	int ret;
diff --git a/drivers/media/dvb-frontends/af9033.c b/drivers/media/dvb-frontends/af9033.c
index 82ce47bdf5dc..59018afaa95f 100644
--- a/drivers/media/dvb-frontends/af9033.c
+++ b/drivers/media/dvb-frontends/af9033.c
@@ -35,7 +35,7 @@ struct af9033_dev {
 	bool ts_mode_parallel;
 	bool ts_mode_serial;
 
-	fe_status_t fe_status;
+	enum fe_status fe_status;
 	u64 post_bit_error_prev; /* for old read_ber we return (curr - prev) */
 	u64 post_bit_error;
 	u64 post_bit_count;
@@ -818,7 +818,7 @@ err:
 	return ret;
 }
 
-static int af9033_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int af9033_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct af9033_dev *dev = fe->demodulator_priv;
 	int ret;
diff --git a/drivers/media/dvb-frontends/as102_fe.c b/drivers/media/dvb-frontends/as102_fe.c
index 493665899565..544c5f65d19a 100644
--- a/drivers/media/dvb-frontends/as102_fe.c
+++ b/drivers/media/dvb-frontends/as102_fe.c
@@ -32,7 +32,7 @@ struct as102_state {
 	uint32_t ber;
 };
 
-static uint8_t as102_fe_get_code_rate(fe_code_rate_t arg)
+static uint8_t as102_fe_get_code_rate(enum fe_code_rate arg)
 {
 	uint8_t c;
 
@@ -306,7 +306,7 @@ static int as102_fe_get_tune_settings(struct dvb_frontend *fe,
 	return 0;
 }
 
-static int as102_fe_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int as102_fe_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	int ret = 0;
 	struct as102_state *state = fe->demodulator_priv;
diff --git a/drivers/media/dvb-frontends/atbm8830.c b/drivers/media/dvb-frontends/atbm8830.c
index 4e11dc4b1335..8fe552e293ed 100644
--- a/drivers/media/dvb-frontends/atbm8830.c
+++ b/drivers/media/dvb-frontends/atbm8830.c
@@ -335,7 +335,8 @@ static int atbm8830_get_tune_settings(struct dvb_frontend *fe,
 	return 0;
 }
 
-static int atbm8830_read_status(struct dvb_frontend *fe, fe_status_t *fe_status)
+static int atbm8830_read_status(struct dvb_frontend *fe,
+				enum fe_status *fe_status)
 {
 	struct atbm_state *priv = fe->demodulator_priv;
 	u8 locked = 0;
diff --git a/drivers/media/dvb-frontends/au8522_dig.c b/drivers/media/dvb-frontends/au8522_dig.c
index 5d06c99b0e97..b744a3f8d467 100644
--- a/drivers/media/dvb-frontends/au8522_dig.c
+++ b/drivers/media/dvb-frontends/au8522_dig.c
@@ -552,7 +552,7 @@ static struct {
 };
 
 static int au8522_enable_modulation(struct dvb_frontend *fe,
-				    fe_modulation_t m)
+				    enum fe_modulation m)
 {
 	struct au8522_state *state = fe->demodulator_priv;
 	int i;
@@ -644,7 +644,7 @@ static int au8522_set_frontend(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int au8522_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int au8522_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct au8522_state *state = fe->demodulator_priv;
 	u8 reg;
diff --git a/drivers/media/dvb-frontends/au8522_priv.h b/drivers/media/dvb-frontends/au8522_priv.h
index b8aca1c84786..951b3847e6f6 100644
--- a/drivers/media/dvb-frontends/au8522_priv.h
+++ b/drivers/media/dvb-frontends/au8522_priv.h
@@ -55,7 +55,7 @@ struct au8522_state {
 	struct dvb_frontend frontend;
 
 	u32 current_frequency;
-	fe_modulation_t current_modulation;
+	enum fe_modulation current_modulation;
 
 	u32 fe_status;
 	unsigned int led_state;
diff --git a/drivers/media/dvb-frontends/bcm3510.c b/drivers/media/dvb-frontends/bcm3510.c
index 23bfd00d42db..d30275f27644 100644
--- a/drivers/media/dvb-frontends/bcm3510.c
+++ b/drivers/media/dvb-frontends/bcm3510.c
@@ -289,7 +289,7 @@ static int bcm3510_refresh_state(struct bcm3510_state *st)
 	return 0;
 }
 
-static int bcm3510_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int bcm3510_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct bcm3510_state* st = fe->demodulator_priv;
 	bcm3510_refresh_state(st);
diff --git a/drivers/media/dvb-frontends/cx22700.c b/drivers/media/dvb-frontends/cx22700.c
index 86563260d0f2..fd033cca6e11 100644
--- a/drivers/media/dvb-frontends/cx22700.c
+++ b/drivers/media/dvb-frontends/cx22700.c
@@ -191,9 +191,10 @@ static int cx22700_set_tps(struct cx22700_state *state,
 static int cx22700_get_tps(struct cx22700_state *state,
 			   struct dtv_frontend_properties *p)
 {
-	static const fe_modulation_t qam_tab [3] = { QPSK, QAM_16, QAM_64 };
-	static const fe_code_rate_t fec_tab [5] = { FEC_1_2, FEC_2_3, FEC_3_4,
-						    FEC_5_6, FEC_7_8 };
+	static const enum fe_modulation qam_tab[3] = { QPSK, QAM_16, QAM_64 };
+	static const enum fe_code_rate fec_tab[5] = {
+		FEC_1_2, FEC_2_3, FEC_3_4, FEC_5_6, FEC_7_8
+	};
 	u8 val;
 
 	dprintk ("%s\n", __func__);
@@ -253,7 +254,7 @@ static int cx22700_init (struct dvb_frontend* fe)
 	return 0;
 }
 
-static int cx22700_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int cx22700_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct cx22700_state* state = fe->demodulator_priv;
 
diff --git a/drivers/media/dvb-frontends/cx22702.c b/drivers/media/dvb-frontends/cx22702.c
index edc8eafc5c09..d2d06dcd7683 100644
--- a/drivers/media/dvb-frontends/cx22702.c
+++ b/drivers/media/dvb-frontends/cx22702.c
@@ -452,7 +452,7 @@ static int cx22702_init(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int cx22702_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int cx22702_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct cx22702_state *state = fe->demodulator_priv;
 	u8 reg0A;
diff --git a/drivers/media/dvb-frontends/cx24110.c b/drivers/media/dvb-frontends/cx24110.c
index 7b510f2ae20f..cb36475e322b 100644
--- a/drivers/media/dvb-frontends/cx24110.c
+++ b/drivers/media/dvb-frontends/cx24110.c
@@ -143,7 +143,8 @@ static int cx24110_readreg (struct cx24110_state* state, u8 reg)
 	return b1[0];
 }
 
-static int cx24110_set_inversion (struct cx24110_state* state, fe_spectral_inversion_t inversion)
+static int cx24110_set_inversion(struct cx24110_state *state,
+				 enum fe_spectral_inversion inversion)
 {
 /* fixme (low): error handling */
 
@@ -177,7 +178,7 @@ static int cx24110_set_inversion (struct cx24110_state* state, fe_spectral_inver
 	return 0;
 }
 
-static int cx24110_set_fec(struct cx24110_state* state, fe_code_rate_t fec)
+static int cx24110_set_fec(struct cx24110_state *state, enum fe_code_rate fec)
 {
 	static const int rate[FEC_AUTO] = {-1,    1,    2,    3,    5,    7, -1};
 	static const int g1[FEC_AUTO]   = {-1, 0x01, 0x02, 0x05, 0x15, 0x45, -1};
@@ -220,7 +221,7 @@ static int cx24110_set_fec(struct cx24110_state* state, fe_code_rate_t fec)
 	return 0;
 }
 
-static fe_code_rate_t cx24110_get_fec (struct cx24110_state* state)
+static enum fe_code_rate cx24110_get_fec(struct cx24110_state *state)
 {
 	int i;
 
@@ -365,7 +366,8 @@ static int cx24110_initfe(struct dvb_frontend* fe)
 	return 0;
 }
 
-static int cx24110_set_voltage (struct dvb_frontend* fe, fe_sec_voltage_t voltage)
+static int cx24110_set_voltage(struct dvb_frontend *fe,
+			       enum fe_sec_voltage voltage)
 {
 	struct cx24110_state *state = fe->demodulator_priv;
 
@@ -379,7 +381,8 @@ static int cx24110_set_voltage (struct dvb_frontend* fe, fe_sec_voltage_t voltag
 	}
 }
 
-static int cx24110_diseqc_send_burst(struct dvb_frontend* fe, fe_sec_mini_cmd_t burst)
+static int cx24110_diseqc_send_burst(struct dvb_frontend *fe,
+				     enum fe_sec_mini_cmd burst)
 {
 	int rv, bit;
 	struct cx24110_state *state = fe->demodulator_priv;
@@ -434,7 +437,8 @@ static int cx24110_send_diseqc_msg(struct dvb_frontend* fe,
 	return 0;
 }
 
-static int cx24110_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int cx24110_read_status(struct dvb_frontend *fe,
+			       enum fe_status *status)
 {
 	struct cx24110_state *state = fe->demodulator_priv;
 
@@ -574,7 +578,8 @@ static int cx24110_get_frontend(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int cx24110_set_tone(struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
+static int cx24110_set_tone(struct dvb_frontend *fe,
+			    enum fe_sec_tone_mode tone)
 {
 	struct cx24110_state *state = fe->demodulator_priv;
 
diff --git a/drivers/media/dvb-frontends/cx24116.c b/drivers/media/dvb-frontends/cx24116.c
index 7bc68b355c0b..8814f36d53fb 100644
--- a/drivers/media/dvb-frontends/cx24116.c
+++ b/drivers/media/dvb-frontends/cx24116.c
@@ -160,13 +160,13 @@ enum cmds {
 struct cx24116_tuning {
 	u32 frequency;
 	u32 symbol_rate;
-	fe_spectral_inversion_t inversion;
-	fe_code_rate_t fec;
+	enum fe_spectral_inversion inversion;
+	enum fe_code_rate fec;
 
-	fe_delivery_system_t delsys;
-	fe_modulation_t modulation;
-	fe_pilot_t pilot;
-	fe_rolloff_t rolloff;
+	enum fe_delivery_system delsys;
+	enum fe_modulation modulation;
+	enum fe_pilot pilot;
+	enum fe_rolloff rolloff;
 
 	/* Demod values */
 	u8 fec_val;
@@ -285,7 +285,7 @@ static int cx24116_readreg(struct cx24116_state *state, u8 reg)
 }
 
 static int cx24116_set_inversion(struct cx24116_state *state,
-	fe_spectral_inversion_t inversion)
+	enum fe_spectral_inversion inversion)
 {
 	dprintk("%s(%d)\n", __func__, inversion);
 
@@ -373,9 +373,9 @@ static int cx24116_set_inversion(struct cx24116_state *state,
  * a scheme are support. Especially, no auto detect when in S2 mode.
  */
 static struct cx24116_modfec {
-	fe_delivery_system_t delivery_system;
-	fe_modulation_t modulation;
-	fe_code_rate_t fec;
+	enum fe_delivery_system delivery_system;
+	enum fe_modulation modulation;
+	enum fe_code_rate fec;
 	u8 mask;	/* In DVBS mode this is used to autodetect */
 	u8 val;		/* Passed to the firmware to indicate mode selection */
 } CX24116_MODFEC_MODES[] = {
@@ -415,7 +415,7 @@ static struct cx24116_modfec {
 };
 
 static int cx24116_lookup_fecmod(struct cx24116_state *state,
-	fe_delivery_system_t d, fe_modulation_t m, fe_code_rate_t f)
+	enum fe_delivery_system d, enum fe_modulation m, enum fe_code_rate f)
 {
 	int i, ret = -EOPNOTSUPP;
 
@@ -434,7 +434,9 @@ static int cx24116_lookup_fecmod(struct cx24116_state *state,
 }
 
 static int cx24116_set_fec(struct cx24116_state *state,
-	fe_delivery_system_t delsys, fe_modulation_t mod, fe_code_rate_t fec)
+			   enum fe_delivery_system delsys,
+			   enum fe_modulation mod,
+			   enum fe_code_rate fec)
 {
 	int ret = 0;
 
@@ -683,7 +685,7 @@ static int cx24116_load_firmware(struct dvb_frontend *fe,
 	return 0;
 }
 
-static int cx24116_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int cx24116_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct cx24116_state *state = fe->demodulator_priv;
 
@@ -844,7 +846,7 @@ static int cx24116_wait_for_lnb(struct dvb_frontend *fe)
 }
 
 static int cx24116_set_voltage(struct dvb_frontend *fe,
-	fe_sec_voltage_t voltage)
+	enum fe_sec_voltage voltage)
 {
 	struct cx24116_cmd cmd;
 	int ret;
@@ -872,7 +874,7 @@ static int cx24116_set_voltage(struct dvb_frontend *fe,
 }
 
 static int cx24116_set_tone(struct dvb_frontend *fe,
-	fe_sec_tone_mode_t tone)
+	enum fe_sec_tone_mode tone)
 {
 	struct cx24116_cmd cmd;
 	int ret;
@@ -1055,7 +1057,7 @@ static int cx24116_send_diseqc_msg(struct dvb_frontend *fe,
 
 /* Send DiSEqC burst */
 static int cx24116_diseqc_send_burst(struct dvb_frontend *fe,
-	fe_sec_mini_cmd_t burst)
+	enum fe_sec_mini_cmd burst)
 {
 	struct cx24116_state *state = fe->demodulator_priv;
 	int ret;
@@ -1220,7 +1222,7 @@ static int cx24116_set_frontend(struct dvb_frontend *fe)
 	struct cx24116_state *state = fe->demodulator_priv;
 	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
 	struct cx24116_cmd cmd;
-	fe_status_t tunerstat;
+	enum fe_status tunerstat;
 	int i, status, ret, retune = 1;
 
 	dprintk("%s()\n", __func__);
@@ -1441,7 +1443,7 @@ tuned:  /* Set/Reset B/W */
 }
 
 static int cx24116_tune(struct dvb_frontend *fe, bool re_tune,
-	unsigned int mode_flags, unsigned int *delay, fe_status_t *status)
+	unsigned int mode_flags, unsigned int *delay, enum fe_status *status)
 {
 	/*
 	 * It is safe to discard "params" here, as the DVB core will sync
diff --git a/drivers/media/dvb-frontends/cx24117.c b/drivers/media/dvb-frontends/cx24117.c
index af6363573efd..5f77bc80a896 100644
--- a/drivers/media/dvb-frontends/cx24117.c
+++ b/drivers/media/dvb-frontends/cx24117.c
@@ -171,13 +171,13 @@ static DEFINE_MUTEX(cx24117_list_mutex);
 struct cx24117_tuning {
 	u32 frequency;
 	u32 symbol_rate;
-	fe_spectral_inversion_t inversion;
-	fe_code_rate_t fec;
+	enum fe_spectral_inversion inversion;
+	enum fe_code_rate fec;
 
-	fe_delivery_system_t delsys;
-	fe_modulation_t modulation;
-	fe_pilot_t pilot;
-	fe_rolloff_t rolloff;
+	enum fe_delivery_system delsys;
+	enum fe_modulation modulation;
+	enum fe_pilot pilot;
+	enum fe_rolloff rolloff;
 
 	/* Demod values */
 	u8 fec_val;
@@ -220,9 +220,9 @@ struct cx24117_state {
 /* modfec (modulation and FEC) lookup table */
 /* Check cx24116.c for a detailed description of each field */
 static struct cx24117_modfec {
-	fe_delivery_system_t delivery_system;
-	fe_modulation_t modulation;
-	fe_code_rate_t fec;
+	enum fe_delivery_system delivery_system;
+	enum fe_modulation modulation;
+	enum fe_code_rate fec;
 	u8 mask;	/* In DVBS mode this is used to autodetect */
 	u8 val;		/* Passed to the firmware to indicate mode selection */
 } cx24117_modfec_modes[] = {
@@ -362,7 +362,7 @@ static int cx24117_readregN(struct cx24117_state *state,
 }
 
 static int cx24117_set_inversion(struct cx24117_state *state,
-	fe_spectral_inversion_t inversion)
+	enum fe_spectral_inversion inversion)
 {
 	dev_dbg(&state->priv->i2c->dev, "%s(%d) demod%d\n",
 		__func__, inversion, state->demod);
@@ -387,7 +387,7 @@ static int cx24117_set_inversion(struct cx24117_state *state,
 }
 
 static int cx24117_lookup_fecmod(struct cx24117_state *state,
-	fe_delivery_system_t d, fe_modulation_t m, fe_code_rate_t f)
+	enum fe_delivery_system d, enum fe_modulation m, enum fe_code_rate f)
 {
 	int i, ret = -EINVAL;
 
@@ -408,7 +408,9 @@ static int cx24117_lookup_fecmod(struct cx24117_state *state,
 }
 
 static int cx24117_set_fec(struct cx24117_state *state,
-	fe_delivery_system_t delsys, fe_modulation_t mod, fe_code_rate_t fec)
+			   enum fe_delivery_system delsys,
+			   enum fe_modulation mod,
+			   enum fe_code_rate fec)
 {
 	int ret;
 
@@ -737,7 +739,7 @@ error:
 	return ret;
 }
 
-static int cx24117_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int cx24117_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct cx24117_state *state = fe->demodulator_priv;
 	int lock;
@@ -843,7 +845,7 @@ static int cx24117_read_snr(struct dvb_frontend *fe, u16 *snr)
 static int cx24117_read_ucblocks(struct dvb_frontend *fe, u32 *ucblocks)
 {
 	struct cx24117_state *state = fe->demodulator_priv;
-	fe_delivery_system_t delsys = fe->dtv_property_cache.delivery_system;
+	enum fe_delivery_system delsys = fe->dtv_property_cache.delivery_system;
 	int ret;
 	u8 buf[2];
 	u8 reg = (state->demod == 0) ?
@@ -904,7 +906,7 @@ static int cx24117_wait_for_lnb(struct dvb_frontend *fe)
 }
 
 static int cx24117_set_voltage(struct dvb_frontend *fe,
-	fe_sec_voltage_t voltage)
+			       enum fe_sec_voltage voltage)
 {
 	struct cx24117_state *state = fe->demodulator_priv;
 	struct cx24117_cmd cmd;
@@ -956,7 +958,7 @@ static int cx24117_set_voltage(struct dvb_frontend *fe,
 }
 
 static int cx24117_set_tone(struct dvb_frontend *fe,
-	fe_sec_tone_mode_t tone)
+			    enum fe_sec_tone_mode tone)
 {
 	struct cx24117_state *state = fe->demodulator_priv;
 	struct cx24117_cmd cmd;
@@ -1112,7 +1114,7 @@ static int cx24117_send_diseqc_msg(struct dvb_frontend *fe,
 
 /* Send DiSEqC burst */
 static int cx24117_diseqc_send_burst(struct dvb_frontend *fe,
-	fe_sec_mini_cmd_t burst)
+	enum fe_sec_mini_cmd burst)
 {
 	struct cx24117_state *state = fe->demodulator_priv;
 
@@ -1306,7 +1308,7 @@ static int cx24117_set_frontend(struct dvb_frontend *fe)
 	struct cx24117_state *state = fe->demodulator_priv;
 	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
 	struct cx24117_cmd cmd;
-	fe_status_t tunerstat;
+	enum fe_status tunerstat;
 	int i, status, ret, retune = 1;
 	u8 reg_clkdiv, reg_ratediv;
 
@@ -1537,7 +1539,7 @@ static int cx24117_set_frontend(struct dvb_frontend *fe)
 }
 
 static int cx24117_tune(struct dvb_frontend *fe, bool re_tune,
-	unsigned int mode_flags, unsigned int *delay, fe_status_t *status)
+	unsigned int mode_flags, unsigned int *delay, enum fe_status *status)
 {
 	struct cx24117_state *state = fe->demodulator_priv;
 
diff --git a/drivers/media/dvb-frontends/cx24120.c b/drivers/media/dvb-frontends/cx24120.c
index a1d1b1c44b40..3b0ef52bb834 100644
--- a/drivers/media/dvb-frontends/cx24120.c
+++ b/drivers/media/dvb-frontends/cx24120.c
@@ -118,12 +118,12 @@ enum command_message_id {
 struct cx24120_tuning {
 	u32 frequency;
 	u32 symbol_rate;
-	fe_spectral_inversion_t inversion;
-	fe_code_rate_t fec;
+	enum fe_spectral_inversion inversion;
+	enum fe_code_rate fec;
 
-	fe_delivery_system_t delsys;
-	fe_modulation_t modulation;
-	fe_pilot_t pilot;
+	enum fe_delivery_system delsys;
+	enum fe_modulation modulation;
+	enum fe_pilot pilot;
 
 	/* Demod values */
 	u8 fec_val;
@@ -148,7 +148,7 @@ struct cx24120_state {
 	struct cx24120_tuning dcur;
 	struct cx24120_tuning dnxt;
 
-	fe_status_t fe_status;
+	enum fe_status fe_status;
 
 	/* dvbv5 stats calculations */
 	u32 bitrate;
@@ -491,7 +491,7 @@ static int cx24120_msg_mpeg_output_config(struct cx24120_state *state, u8 seq)
 }
 
 static int cx24120_diseqc_send_burst(struct dvb_frontend *fe,
-				     fe_sec_mini_cmd_t burst)
+				     enum fe_sec_mini_cmd burst)
 {
 	struct cx24120_state *state = fe->demodulator_priv;
 	struct cx24120_cmd cmd;
@@ -513,7 +513,7 @@ static int cx24120_diseqc_send_burst(struct dvb_frontend *fe,
 	return cx24120_message_send(state, &cmd);
 }
 
-static int cx24120_set_tone(struct dvb_frontend *fe, fe_sec_tone_mode_t tone)
+static int cx24120_set_tone(struct dvb_frontend *fe, enum fe_sec_tone_mode tone)
 {
 	struct cx24120_state *state = fe->demodulator_priv;
 	struct cx24120_cmd cmd;
@@ -536,7 +536,7 @@ static int cx24120_set_tone(struct dvb_frontend *fe, fe_sec_tone_mode_t tone)
 }
 
 static int cx24120_set_voltage(struct dvb_frontend *fe,
-			       fe_sec_voltage_t voltage)
+			       enum fe_sec_voltage voltage)
 {
 	struct cx24120_state *state = fe->demodulator_priv;
 	struct cx24120_cmd cmd;
@@ -713,7 +713,7 @@ static void cx24120_get_stats(struct cx24120_state *state)
 static void cx24120_set_clock_ratios(struct dvb_frontend *fe);
 
 /* Read current tuning status */
-static int cx24120_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int cx24120_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct cx24120_state *state = fe->demodulator_priv;
 	int lock;
@@ -765,9 +765,9 @@ static int cx24120_read_status(struct dvb_frontend *fe, fe_status_t *status)
  * once tuned in.
  */
 struct cx24120_modfec {
-	fe_delivery_system_t delsys;
-	fe_modulation_t mod;
-	fe_code_rate_t fec;
+	enum fe_delivery_system delsys;
+	enum fe_modulation mod;
+	enum fe_code_rate fec;
 	u8 val;
 };
 
@@ -871,10 +871,10 @@ static void cx24120_calculate_ber_window(struct cx24120_state *state, u32 rate)
  * can't determine the pattern
  */
 struct cx24120_clock_ratios_table {
-	fe_delivery_system_t delsys;
-	fe_pilot_t pilot;
-	fe_modulation_t mod;
-	fe_code_rate_t fec;
+	enum fe_delivery_system delsys;
+	enum fe_pilot pilot;
+	enum fe_modulation mod;
+	enum fe_code_rate fec;
 	u32 m_rat;
 	u32 n_rat;
 	u32 rate;
@@ -988,7 +988,7 @@ static void cx24120_set_clock_ratios(struct dvb_frontend *fe)
 
 /* Set inversion value */
 static int cx24120_set_inversion(struct cx24120_state *state,
-				 fe_spectral_inversion_t inversion)
+				 enum fe_spectral_inversion inversion)
 {
 	dev_dbg(&state->i2c->dev, "(%d)\n", inversion);
 
@@ -1013,9 +1013,9 @@ static int cx24120_set_inversion(struct cx24120_state *state,
 
 /* FEC lookup table for tuning */
 struct cx24120_modfec_table {
-	fe_delivery_system_t delsys;
-	fe_modulation_t mod;
-	fe_code_rate_t fec;
+	enum fe_delivery_system delsys;
+	enum fe_modulation mod;
+	enum fe_code_rate fec;
 	u8 val;
 };
 
@@ -1046,8 +1046,8 @@ static const struct cx24120_modfec_table modfec_table[] = {
 };
 
 /* Set fec_val & fec_mask values from delsys, modulation & fec */
-static int cx24120_set_fec(struct cx24120_state *state, fe_modulation_t mod,
-			   fe_code_rate_t fec)
+static int cx24120_set_fec(struct cx24120_state *state, enum fe_modulation mod,
+			   enum fe_code_rate fec)
 {
 	int idx;
 
@@ -1084,7 +1084,7 @@ static int cx24120_set_fec(struct cx24120_state *state, fe_modulation_t mod,
 }
 
 /* Set pilot */
-static int cx24120_set_pilot(struct cx24120_state *state, fe_pilot_t pilot)
+static int cx24120_set_pilot(struct cx24120_state *state, enum fe_pilot pilot)
 {
 	dev_dbg(&state->i2c->dev, "(%d)\n", pilot);
 
@@ -1474,7 +1474,7 @@ static int cx24120_init(struct dvb_frontend *fe)
 
 static int cx24120_tune(struct dvb_frontend *fe, bool re_tune,
 			unsigned int mode_flags, unsigned int *delay,
-			fe_status_t *status)
+			enum fe_status *status)
 {
 	struct cx24120_state *state = fe->demodulator_priv;
 	int ret;
diff --git a/drivers/media/dvb-frontends/cx24123.c b/drivers/media/dvb-frontends/cx24123.c
index 7975c6608e20..e18cf9e1185e 100644
--- a/drivers/media/dvb-frontends/cx24123.c
+++ b/drivers/media/dvb-frontends/cx24123.c
@@ -290,7 +290,7 @@ static int cx24123_i2c_readreg(struct cx24123_state *state, u8 i2c_addr, u8 reg)
 	cx24123_i2c_writereg(state, state->config->demod_address, reg, val)
 
 static int cx24123_set_inversion(struct cx24123_state *state,
-	fe_spectral_inversion_t inversion)
+				 enum fe_spectral_inversion inversion)
 {
 	u8 nom_reg = cx24123_readreg(state, 0x0e);
 	u8 auto_reg = cx24123_readreg(state, 0x10);
@@ -318,7 +318,7 @@ static int cx24123_set_inversion(struct cx24123_state *state,
 }
 
 static int cx24123_get_inversion(struct cx24123_state *state,
-	fe_spectral_inversion_t *inversion)
+				 enum fe_spectral_inversion *inversion)
 {
 	u8 val;
 
@@ -335,7 +335,7 @@ static int cx24123_get_inversion(struct cx24123_state *state,
 	return 0;
 }
 
-static int cx24123_set_fec(struct cx24123_state *state, fe_code_rate_t fec)
+static int cx24123_set_fec(struct cx24123_state *state, enum fe_code_rate fec)
 {
 	u8 nom_reg = cx24123_readreg(state, 0x0e) & ~0x07;
 
@@ -397,7 +397,7 @@ static int cx24123_set_fec(struct cx24123_state *state, fe_code_rate_t fec)
 	return 0;
 }
 
-static int cx24123_get_fec(struct cx24123_state *state, fe_code_rate_t *fec)
+static int cx24123_get_fec(struct cx24123_state *state, enum fe_code_rate *fec)
 {
 	int ret;
 
@@ -720,7 +720,7 @@ static int cx24123_initfe(struct dvb_frontend *fe)
 }
 
 static int cx24123_set_voltage(struct dvb_frontend *fe,
-	fe_sec_voltage_t voltage)
+			       enum fe_sec_voltage voltage)
 {
 	struct cx24123_state *state = fe->demodulator_priv;
 	u8 val;
@@ -795,7 +795,7 @@ static int cx24123_send_diseqc_msg(struct dvb_frontend *fe,
 }
 
 static int cx24123_diseqc_send_burst(struct dvb_frontend *fe,
-	fe_sec_mini_cmd_t burst)
+				     enum fe_sec_mini_cmd burst)
 {
 	struct cx24123_state *state = fe->demodulator_priv;
 	int val, tone;
@@ -831,7 +831,7 @@ static int cx24123_diseqc_send_burst(struct dvb_frontend *fe,
 	return 0;
 }
 
-static int cx24123_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int cx24123_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct cx24123_state *state = fe->demodulator_priv;
 	int sync = cx24123_readreg(state, 0x14);
@@ -966,7 +966,7 @@ static int cx24123_get_frontend(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int cx24123_set_tone(struct dvb_frontend *fe, fe_sec_tone_mode_t tone)
+static int cx24123_set_tone(struct dvb_frontend *fe, enum fe_sec_tone_mode tone)
 {
 	struct cx24123_state *state = fe->demodulator_priv;
 	u8 val;
@@ -995,7 +995,7 @@ static int cx24123_tune(struct dvb_frontend *fe,
 			bool re_tune,
 			unsigned int mode_flags,
 			unsigned int *delay,
-			fe_status_t *status)
+			enum fe_status *status)
 {
 	int retval = 0;
 
diff --git a/drivers/media/dvb-frontends/cxd2820r_c.c b/drivers/media/dvb-frontends/cxd2820r_c.c
index 72b0e2db3aab..42fad6aa3958 100644
--- a/drivers/media/dvb-frontends/cxd2820r_c.c
+++ b/drivers/media/dvb-frontends/cxd2820r_c.c
@@ -259,7 +259,7 @@ int cxd2820r_read_ucblocks_c(struct dvb_frontend *fe, u32 *ucblocks)
 	return 0;
 }
 
-int cxd2820r_read_status_c(struct dvb_frontend *fe, fe_status_t *status)
+int cxd2820r_read_status_c(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct cxd2820r_priv *priv = fe->demodulator_priv;
 	int ret;
diff --git a/drivers/media/dvb-frontends/cxd2820r_core.c b/drivers/media/dvb-frontends/cxd2820r_core.c
index 490e090048ef..def6d21d1445 100644
--- a/drivers/media/dvb-frontends/cxd2820r_core.c
+++ b/drivers/media/dvb-frontends/cxd2820r_core.c
@@ -287,7 +287,8 @@ static int cxd2820r_set_frontend(struct dvb_frontend *fe)
 err:
 	return ret;
 }
-static int cxd2820r_read_status(struct dvb_frontend *fe, fe_status_t *status)
+
+static int cxd2820r_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct cxd2820r_priv *priv = fe->demodulator_priv;
 	int ret;
@@ -501,7 +502,7 @@ static enum dvbfe_search cxd2820r_search(struct dvb_frontend *fe)
 	struct cxd2820r_priv *priv = fe->demodulator_priv;
 	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
 	int ret, i;
-	fe_status_t status = 0;
+	enum fe_status status = 0;
 
 	dev_dbg(&priv->i2c->dev, "%s: delsys=%d\n", __func__,
 			fe->dtv_property_cache.delivery_system);
diff --git a/drivers/media/dvb-frontends/cxd2820r_priv.h b/drivers/media/dvb-frontends/cxd2820r_priv.h
index 4b428959b16e..a0d53f01a8bf 100644
--- a/drivers/media/dvb-frontends/cxd2820r_priv.h
+++ b/drivers/media/dvb-frontends/cxd2820r_priv.h
@@ -48,7 +48,7 @@ struct cxd2820r_priv {
 	struct gpio_chip gpio_chip;
 #endif
 
-	fe_delivery_system_t delivery_system;
+	enum fe_delivery_system delivery_system;
 	bool last_tune_failed; /* for switch between T and T2 tune */
 };
 
@@ -80,7 +80,7 @@ int cxd2820r_get_frontend_c(struct dvb_frontend *fe);
 
 int cxd2820r_set_frontend_c(struct dvb_frontend *fe);
 
-int cxd2820r_read_status_c(struct dvb_frontend *fe, fe_status_t *status);
+int cxd2820r_read_status_c(struct dvb_frontend *fe, enum fe_status *status);
 
 int cxd2820r_read_ber_c(struct dvb_frontend *fe, u32 *ber);
 
@@ -103,7 +103,7 @@ int cxd2820r_get_frontend_t(struct dvb_frontend *fe);
 
 int cxd2820r_set_frontend_t(struct dvb_frontend *fe);
 
-int cxd2820r_read_status_t(struct dvb_frontend *fe, fe_status_t *status);
+int cxd2820r_read_status_t(struct dvb_frontend *fe, enum fe_status *status);
 
 int cxd2820r_read_ber_t(struct dvb_frontend *fe, u32 *ber);
 
@@ -126,7 +126,7 @@ int cxd2820r_get_frontend_t2(struct dvb_frontend *fe);
 
 int cxd2820r_set_frontend_t2(struct dvb_frontend *fe);
 
-int cxd2820r_read_status_t2(struct dvb_frontend *fe, fe_status_t *status);
+int cxd2820r_read_status_t2(struct dvb_frontend *fe, enum fe_status *status);
 
 int cxd2820r_read_ber_t2(struct dvb_frontend *fe, u32 *ber);
 
diff --git a/drivers/media/dvb-frontends/cxd2820r_t.c b/drivers/media/dvb-frontends/cxd2820r_t.c
index 008cb2ac8480..21abf1b4ed4d 100644
--- a/drivers/media/dvb-frontends/cxd2820r_t.c
+++ b/drivers/media/dvb-frontends/cxd2820r_t.c
@@ -349,7 +349,7 @@ int cxd2820r_read_ucblocks_t(struct dvb_frontend *fe, u32 *ucblocks)
 	return 0;
 }
 
-int cxd2820r_read_status_t(struct dvb_frontend *fe, fe_status_t *status)
+int cxd2820r_read_status_t(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct cxd2820r_priv *priv = fe->demodulator_priv;
 	int ret;
diff --git a/drivers/media/dvb-frontends/cxd2820r_t2.c b/drivers/media/dvb-frontends/cxd2820r_t2.c
index 35fe364c7182..4e028b41c0d5 100644
--- a/drivers/media/dvb-frontends/cxd2820r_t2.c
+++ b/drivers/media/dvb-frontends/cxd2820r_t2.c
@@ -284,7 +284,7 @@ error:
 	return ret;
 }
 
-int cxd2820r_read_status_t2(struct dvb_frontend *fe, fe_status_t *status)
+int cxd2820r_read_status_t2(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct cxd2820r_priv *priv = fe->demodulator_priv;
 	int ret;
diff --git a/drivers/media/dvb-frontends/dib3000mb.c b/drivers/media/dvb-frontends/dib3000mb.c
index af91e0c92339..7a61172d0d45 100644
--- a/drivers/media/dvb-frontends/dib3000mb.c
+++ b/drivers/media/dvb-frontends/dib3000mb.c
@@ -118,7 +118,7 @@ static int dib3000mb_set_frontend(struct dvb_frontend *fe, int tuner)
 {
 	struct dib3000_state* state = fe->demodulator_priv;
 	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
-	fe_code_rate_t fe_cr = FEC_NONE;
+	enum fe_code_rate fe_cr = FEC_NONE;
 	int search_state, seq;
 
 	if (tuner && fe->ops.tuner_ops.set_params) {
@@ -454,7 +454,7 @@ static int dib3000mb_get_frontend(struct dvb_frontend* fe)
 {
 	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
 	struct dib3000_state* state = fe->demodulator_priv;
-	fe_code_rate_t *cr;
+	enum fe_code_rate *cr;
 	u16 tps_val;
 	int inv_test1,inv_test2;
 	u32 dds_val, threshold = 0x800000;
@@ -611,7 +611,8 @@ static int dib3000mb_get_frontend(struct dvb_frontend* fe)
 	return 0;
 }
 
-static int dib3000mb_read_status(struct dvb_frontend* fe, fe_status_t *stat)
+static int dib3000mb_read_status(struct dvb_frontend *fe,
+				 enum fe_status *stat)
 {
 	struct dib3000_state* state = fe->demodulator_priv;
 
diff --git a/drivers/media/dvb-frontends/dib3000mc.c b/drivers/media/dvb-frontends/dib3000mc.c
index a9b8081a0fbc..583d6b7fabed 100644
--- a/drivers/media/dvb-frontends/dib3000mc.c
+++ b/drivers/media/dvb-frontends/dib3000mc.c
@@ -736,7 +736,7 @@ static int dib3000mc_set_frontend(struct dvb_frontend *fe)
 	return ret;
 }
 
-static int dib3000mc_read_status(struct dvb_frontend *fe, fe_status_t *stat)
+static int dib3000mc_read_status(struct dvb_frontend *fe, enum fe_status *stat)
 {
 	struct dib3000mc_state *state = fe->demodulator_priv;
 	u16 lock = dib3000mc_read_word(state, 509);
diff --git a/drivers/media/dvb-frontends/dib7000m.c b/drivers/media/dvb-frontends/dib7000m.c
index dcb9a15ef0c2..35eb71fe3c2b 100644
--- a/drivers/media/dvb-frontends/dib7000m.c
+++ b/drivers/media/dvb-frontends/dib7000m.c
@@ -1256,7 +1256,7 @@ static int dib7000m_set_frontend(struct dvb_frontend *fe)
 	return ret;
 }
 
-static int dib7000m_read_status(struct dvb_frontend *fe, fe_status_t *stat)
+static int dib7000m_read_status(struct dvb_frontend *fe, enum fe_status *stat)
 {
 	struct dib7000m_state *state = fe->demodulator_priv;
 	u16 lock = dib7000m_read_word(state, 535);
diff --git a/drivers/media/dvb-frontends/dib7000p.c b/drivers/media/dvb-frontends/dib7000p.c
index c505d696f92d..33be5d6b9e10 100644
--- a/drivers/media/dvb-frontends/dib7000p.c
+++ b/drivers/media/dvb-frontends/dib7000p.c
@@ -1558,9 +1558,9 @@ static int dib7000p_set_frontend(struct dvb_frontend *fe)
 	return ret;
 }
 
-static int dib7000p_get_stats(struct dvb_frontend *fe, fe_status_t stat);
+static int dib7000p_get_stats(struct dvb_frontend *fe, enum fe_status stat);
 
-static int dib7000p_read_status(struct dvb_frontend *fe, fe_status_t * stat)
+static int dib7000p_read_status(struct dvb_frontend *fe, enum fe_status *stat)
 {
 	struct dib7000p_state *state = fe->demodulator_priv;
 	u16 lock = dib7000p_read_word(state, 509);
@@ -1877,7 +1877,7 @@ static u32 dib7000p_get_time_us(struct dvb_frontend *demod)
 	return time_us;
 }
 
-static int dib7000p_get_stats(struct dvb_frontend *demod, fe_status_t stat)
+static int dib7000p_get_stats(struct dvb_frontend *demod, enum fe_status stat)
 {
 	struct dib7000p_state *state = demod->demodulator_priv;
 	struct dtv_frontend_properties *c = &demod->dtv_property_cache;
diff --git a/drivers/media/dvb-frontends/dib8000.c b/drivers/media/dvb-frontends/dib8000.c
index 8c6663b6399d..94c26270fff0 100644
--- a/drivers/media/dvb-frontends/dib8000.c
+++ b/drivers/media/dvb-frontends/dib8000.c
@@ -3380,13 +3380,13 @@ static int dib8000_sleep(struct dvb_frontend *fe)
 	return dib8000_set_adc_state(state, DIBX000_SLOW_ADC_OFF) | dib8000_set_adc_state(state, DIBX000_ADC_OFF);
 }
 
-static int dib8000_read_status(struct dvb_frontend *fe, fe_status_t * stat);
+static int dib8000_read_status(struct dvb_frontend *fe, enum fe_status *stat);
 
 static int dib8000_get_frontend(struct dvb_frontend *fe)
 {
 	struct dib8000_state *state = fe->demodulator_priv;
 	u16 i, val = 0;
-	fe_status_t stat = 0;
+	enum fe_status stat = 0;
 	u8 index_frontend, sub_index_frontend;
 
 	fe->dtv_property_cache.bandwidth_hz = 6000000;
@@ -3733,9 +3733,9 @@ static int dib8000_set_frontend(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int dib8000_get_stats(struct dvb_frontend *fe, fe_status_t stat);
+static int dib8000_get_stats(struct dvb_frontend *fe, enum fe_status stat);
 
-static int dib8000_read_status(struct dvb_frontend *fe, fe_status_t * stat)
+static int dib8000_read_status(struct dvb_frontend *fe, enum fe_status *stat)
 {
 	struct dib8000_state *state = fe->demodulator_priv;
 	u16 lock_slave = 0, lock;
@@ -4089,7 +4089,7 @@ static u32 dib8000_get_time_us(struct dvb_frontend *fe, int layer)
 	return time_us;
 }
 
-static int dib8000_get_stats(struct dvb_frontend *fe, fe_status_t stat)
+static int dib8000_get_stats(struct dvb_frontend *fe, enum fe_status stat)
 {
 	struct dib8000_state *state = fe->demodulator_priv;
 	struct dtv_frontend_properties *c = &state->fe[0]->dtv_property_cache;
diff --git a/drivers/media/dvb-frontends/dib9000.c b/drivers/media/dvb-frontends/dib9000.c
index f75dec443783..8f92aca0b073 100644
--- a/drivers/media/dvb-frontends/dib9000.c
+++ b/drivers/media/dvb-frontends/dib9000.c
@@ -1893,7 +1893,7 @@ static int dib9000_get_frontend(struct dvb_frontend *fe)
 {
 	struct dib9000_state *state = fe->demodulator_priv;
 	u8 index_frontend, sub_index_frontend;
-	fe_status_t stat;
+	enum fe_status stat;
 	int ret = 0;
 
 	if (state->get_frontend_internal == 0) {
@@ -2161,7 +2161,7 @@ static u16 dib9000_read_lock(struct dvb_frontend *fe)
 	return dib9000_read_word(state, 535);
 }
 
-static int dib9000_read_status(struct dvb_frontend *fe, fe_status_t * stat)
+static int dib9000_read_status(struct dvb_frontend *fe, enum fe_status *stat)
 {
 	struct dib9000_state *state = fe->demodulator_priv;
 	u8 index_frontend;
diff --git a/drivers/media/dvb-frontends/drx39xyj/drxj.c b/drivers/media/dvb-frontends/drx39xyj/drxj.c
index 52245354bf04..b28b5787b39a 100644
--- a/drivers/media/dvb-frontends/drx39xyj/drxj.c
+++ b/drivers/media/dvb-frontends/drx39xyj/drxj.c
@@ -11946,7 +11946,7 @@ static int drx39xxj_set_powerstate(struct dvb_frontend *fe, int enable)
 	return 0;
 }
 
-static int drx39xxj_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int drx39xxj_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct drx39xxj_state *state = fe->demodulator_priv;
 	struct drx_demod_instance *demod = state->demod;
diff --git a/drivers/media/dvb-frontends/drxd_hard.c b/drivers/media/dvb-frontends/drxd_hard.c
index 687e893d29fe..34b9441840da 100644
--- a/drivers/media/dvb-frontends/drxd_hard.c
+++ b/drivers/media/dvb-frontends/drxd_hard.c
@@ -2805,7 +2805,7 @@ static int drxd_read_signal_strength(struct dvb_frontend *fe, u16 * strength)
 	return 0;
 }
 
-static int drxd_read_status(struct dvb_frontend *fe, fe_status_t * status)
+static int drxd_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct drxd_state *state = fe->demodulator_priv;
 	u32 lock;
diff --git a/drivers/media/dvb-frontends/drxk_hard.c b/drivers/media/dvb-frontends/drxk_hard.c
index b1fc4bd44a2b..b975da099929 100644
--- a/drivers/media/dvb-frontends/drxk_hard.c
+++ b/drivers/media/dvb-frontends/drxk_hard.c
@@ -6640,7 +6640,7 @@ error:
 }
 
 
-static int drxk_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int drxk_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct drxk_state *state = fe->demodulator_priv;
 	int rc;
diff --git a/drivers/media/dvb-frontends/drxk_hard.h b/drivers/media/dvb-frontends/drxk_hard.h
index bae9c71dc3e9..9ed88e014942 100644
--- a/drivers/media/dvb-frontends/drxk_hard.h
+++ b/drivers/media/dvb-frontends/drxk_hard.h
@@ -350,7 +350,7 @@ struct drxk_state {
 	bool	antenna_dvbt;
 	u16	antenna_gpio;
 
-	fe_status_t fe_status;
+	enum fe_status fe_status;
 
 	/* Firmware */
 	const char *microcode_name;
diff --git a/drivers/media/dvb-frontends/ds3000.c b/drivers/media/dvb-frontends/ds3000.c
index 9d0d0347758f..e8fc0329ea64 100644
--- a/drivers/media/dvb-frontends/ds3000.c
+++ b/drivers/media/dvb-frontends/ds3000.c
@@ -404,7 +404,8 @@ static int ds3000_load_firmware(struct dvb_frontend *fe,
 	return ret;
 }
 
-static int ds3000_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+static int ds3000_set_voltage(struct dvb_frontend *fe,
+			      enum fe_sec_voltage voltage)
 {
 	struct ds3000_state *state = fe->demodulator_priv;
 	u8 data;
@@ -431,7 +432,7 @@ static int ds3000_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
 	return 0;
 }
 
-static int ds3000_read_status(struct dvb_frontend *fe, fe_status_t* status)
+static int ds3000_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct ds3000_state *state = fe->demodulator_priv;
 	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
@@ -666,7 +667,7 @@ static int ds3000_read_ucblocks(struct dvb_frontend *fe, u32 *ucblocks)
 	return 0;
 }
 
-static int ds3000_set_tone(struct dvb_frontend *fe, fe_sec_tone_mode_t tone)
+static int ds3000_set_tone(struct dvb_frontend *fe, enum fe_sec_tone_mode tone)
 {
 	struct ds3000_state *state = fe->demodulator_priv;
 	u8 data;
@@ -766,7 +767,7 @@ static int ds3000_send_diseqc_msg(struct dvb_frontend *fe,
 
 /* Send DiSEqC burst */
 static int ds3000_diseqc_send_burst(struct dvb_frontend *fe,
-					fe_sec_mini_cmd_t burst)
+				    enum fe_sec_mini_cmd burst)
 {
 	struct ds3000_state *state = fe->demodulator_priv;
 	int i;
@@ -905,7 +906,7 @@ static int ds3000_set_frontend(struct dvb_frontend *fe)
 	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
 
 	int i;
-	fe_status_t status;
+	enum fe_status status;
 	s32 offset_khz;
 	u32 frequency;
 	u16 value;
@@ -1045,7 +1046,7 @@ static int ds3000_tune(struct dvb_frontend *fe,
 			bool re_tune,
 			unsigned int mode_flags,
 			unsigned int *delay,
-			fe_status_t *status)
+			enum fe_status *status)
 {
 	if (re_tune) {
 		int ret = ds3000_set_frontend(fe);
diff --git a/drivers/media/dvb-frontends/dvb_dummy_fe.c b/drivers/media/dvb-frontends/dvb_dummy_fe.c
index d5acc304786b..14e996d45fac 100644
--- a/drivers/media/dvb-frontends/dvb_dummy_fe.c
+++ b/drivers/media/dvb-frontends/dvb_dummy_fe.c
@@ -33,7 +33,8 @@ struct dvb_dummy_fe_state {
 };
 
 
-static int dvb_dummy_fe_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int dvb_dummy_fe_read_status(struct dvb_frontend *fe,
+				    enum fe_status *status)
 {
 	*status = FE_HAS_SIGNAL
 		| FE_HAS_CARRIER
@@ -97,12 +98,14 @@ static int dvb_dummy_fe_init(struct dvb_frontend* fe)
 	return 0;
 }
 
-static int dvb_dummy_fe_set_tone(struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
+static int dvb_dummy_fe_set_tone(struct dvb_frontend *fe,
+				 enum fe_sec_tone_mode tone)
 {
 	return 0;
 }
 
-static int dvb_dummy_fe_set_voltage(struct dvb_frontend* fe, fe_sec_voltage_t voltage)
+static int dvb_dummy_fe_set_voltage(struct dvb_frontend *fe,
+				    enum fe_sec_voltage voltage)
 {
 	return 0;
 }
diff --git a/drivers/media/dvb-frontends/ec100.c b/drivers/media/dvb-frontends/ec100.c
index 9d424809d06b..c9012e677cd1 100644
--- a/drivers/media/dvb-frontends/ec100.c
+++ b/drivers/media/dvb-frontends/ec100.c
@@ -174,7 +174,7 @@ static int ec100_get_tune_settings(struct dvb_frontend *fe,
 	return 0;
 }
 
-static int ec100_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int ec100_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct ec100_state *state = fe->demodulator_priv;
 	int ret;
diff --git a/drivers/media/dvb-frontends/hd29l2.c b/drivers/media/dvb-frontends/hd29l2.c
index 67c8e6df42e8..40e359f2d17d 100644
--- a/drivers/media/dvb-frontends/hd29l2.c
+++ b/drivers/media/dvb-frontends/hd29l2.c
@@ -211,7 +211,7 @@ err:
 	return ret;
 }
 
-static int hd29l2_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int hd29l2_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	int ret;
 	struct hd29l2_priv *priv = fe->demodulator_priv;
diff --git a/drivers/media/dvb-frontends/hd29l2_priv.h b/drivers/media/dvb-frontends/hd29l2_priv.h
index 4d571a2282d4..6dc225c4bc91 100644
--- a/drivers/media/dvb-frontends/hd29l2_priv.h
+++ b/drivers/media/dvb-frontends/hd29l2_priv.h
@@ -67,7 +67,7 @@ struct hd29l2_priv {
 	struct hd29l2_config cfg;
 	u8 tuner_i2c_addr_programmed:1;
 
-	fe_status_t fe_status;
+	enum fe_status fe_status;
 };
 
 static const struct reg_mod_vals reg_mod_vals_tab[] = {
diff --git a/drivers/media/dvb-frontends/isl6405.c b/drivers/media/dvb-frontends/isl6405.c
index 0c642a5bf823..b46450a10b80 100644
--- a/drivers/media/dvb-frontends/isl6405.c
+++ b/drivers/media/dvb-frontends/isl6405.c
@@ -43,7 +43,8 @@ struct isl6405 {
 	u8			i2c_addr;
 };
 
-static int isl6405_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+static int isl6405_set_voltage(struct dvb_frontend *fe,
+			       enum fe_sec_voltage voltage)
 {
 	struct isl6405 *isl6405 = (struct isl6405 *) fe->sec_priv;
 	struct i2c_msg msg = {	.addr = isl6405->i2c_addr, .flags = 0,
diff --git a/drivers/media/dvb-frontends/isl6421.c b/drivers/media/dvb-frontends/isl6421.c
index c77002fcc8e2..3a4d4606a426 100644
--- a/drivers/media/dvb-frontends/isl6421.c
+++ b/drivers/media/dvb-frontends/isl6421.c
@@ -43,7 +43,8 @@ struct isl6421 {
 	u8			i2c_addr;
 };
 
-static int isl6421_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+static int isl6421_set_voltage(struct dvb_frontend *fe,
+			       enum fe_sec_voltage voltage)
 {
 	struct isl6421 *isl6421 = (struct isl6421 *) fe->sec_priv;
 	struct i2c_msg msg = {	.addr = isl6421->i2c_addr, .flags = 0,
@@ -89,7 +90,8 @@ static int isl6421_enable_high_lnb_voltage(struct dvb_frontend *fe, long arg)
 	return (i2c_transfer(isl6421->i2c, &msg, 1) == 1) ? 0 : -EIO;
 }
 
-static int isl6421_set_tone(struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
+static int isl6421_set_tone(struct dvb_frontend *fe,
+			    enum fe_sec_tone_mode tone)
 {
 	struct isl6421 *isl6421 = (struct isl6421 *) fe->sec_priv;
 	struct i2c_msg msg = { .addr = isl6421->i2c_addr, .flags = 0,
diff --git a/drivers/media/dvb-frontends/l64781.c b/drivers/media/dvb-frontends/l64781.c
index ddf866c46f8b..0977871232a2 100644
--- a/drivers/media/dvb-frontends/l64781.c
+++ b/drivers/media/dvb-frontends/l64781.c
@@ -359,7 +359,7 @@ static int get_frontend(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int l64781_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int l64781_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct l64781_state* state = fe->demodulator_priv;
 	int sync = l64781_readreg (state, 0x32);
diff --git a/drivers/media/dvb-frontends/lg2160.c b/drivers/media/dvb-frontends/lg2160.c
index 99efeba3c31a..7880f71ccd8a 100644
--- a/drivers/media/dvb-frontends/lg2160.c
+++ b/drivers/media/dvb-frontends/lg2160.c
@@ -1203,7 +1203,7 @@ static int lg216x_read_lock_status(struct lg216x_state *state,
 #endif
 }
 
-static int lg216x_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int lg216x_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct lg216x_state *state = fe->demodulator_priv;
 	int ret, acq_lock, sync_lock;
diff --git a/drivers/media/dvb-frontends/lgdt3305.c b/drivers/media/dvb-frontends/lgdt3305.c
index d08570af1c10..47121866163d 100644
--- a/drivers/media/dvb-frontends/lgdt3305.c
+++ b/drivers/media/dvb-frontends/lgdt3305.c
@@ -60,7 +60,7 @@ struct lgdt3305_state {
 
 	struct dvb_frontend frontend;
 
-	fe_modulation_t current_modulation;
+	enum fe_modulation current_modulation;
 	u32 current_frequency;
 	u32 snr;
 };
@@ -912,7 +912,7 @@ fail:
 	return ret;
 }
 
-static int lgdt3305_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int lgdt3305_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct lgdt3305_state *state = fe->demodulator_priv;
 	u8 val;
diff --git a/drivers/media/dvb-frontends/lgdt3306a.c b/drivers/media/dvb-frontends/lgdt3306a.c
index 0e2e43e9ede5..721fbc07e9ee 100644
--- a/drivers/media/dvb-frontends/lgdt3306a.c
+++ b/drivers/media/dvb-frontends/lgdt3306a.c
@@ -62,7 +62,7 @@ struct lgdt3306a_state {
 
 	struct dvb_frontend frontend;
 
-	fe_modulation_t current_modulation;
+	enum fe_modulation current_modulation;
 	u32 current_frequency;
 	u32 snr;
 };
@@ -1558,7 +1558,8 @@ lgdt3306a_qam_lock_poll(struct lgdt3306a_state *state)
 	return LG3306_UNLOCK;
 }
 
-static int lgdt3306a_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int lgdt3306a_read_status(struct dvb_frontend *fe,
+				 enum fe_status *status)
 {
 	struct lgdt3306a_state *state = fe->demodulator_priv;
 	u16 strength = 0;
@@ -1705,7 +1706,7 @@ static int lgdt3306a_read_ucblocks(struct dvb_frontend *fe, u32 *ucblocks)
 
 static int lgdt3306a_tune(struct dvb_frontend *fe, bool re_tune,
 			  unsigned int mode_flags, unsigned int *delay,
-			  fe_status_t *status)
+			  enum fe_status *status)
 {
 	int ret = 0;
 	struct lgdt3306a_state *state = fe->demodulator_priv;
@@ -1735,7 +1736,7 @@ static int lgdt3306a_get_tune_settings(struct dvb_frontend *fe,
 
 static int lgdt3306a_search(struct dvb_frontend *fe)
 {
-	fe_status_t status = 0;
+	enum fe_status status = 0;
 	int i, ret;
 
 	/* set frontend */
diff --git a/drivers/media/dvb-frontends/lgdt330x.c b/drivers/media/dvb-frontends/lgdt330x.c
index 2e1a61893fc1..cf3cc20510da 100644
--- a/drivers/media/dvb-frontends/lgdt330x.c
+++ b/drivers/media/dvb-frontends/lgdt330x.c
@@ -67,7 +67,7 @@ struct lgdt330x_state
 	struct dvb_frontend frontend;
 
 	/* Demodulator private data */
-	fe_modulation_t current_modulation;
+	enum fe_modulation current_modulation;
 	u32 snr; /* Result of last SNR calculation */
 
 	/* Tuner private data */
@@ -447,7 +447,8 @@ static int lgdt330x_get_frontend(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int lgdt3302_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int lgdt3302_read_status(struct dvb_frontend *fe,
+				enum fe_status *status)
 {
 	struct lgdt330x_state* state = fe->demodulator_priv;
 	u8 buf[3];
@@ -505,7 +506,8 @@ static int lgdt3302_read_status(struct dvb_frontend* fe, fe_status_t* status)
 	return 0;
 }
 
-static int lgdt3303_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int lgdt3303_read_status(struct dvb_frontend *fe,
+				enum fe_status *status)
 {
 	struct lgdt330x_state* state = fe->demodulator_priv;
 	int err;
diff --git a/drivers/media/dvb-frontends/lgs8gl5.c b/drivers/media/dvb-frontends/lgs8gl5.c
index 416cce3fefc7..7bbb2c18c2dd 100644
--- a/drivers/media/dvb-frontends/lgs8gl5.c
+++ b/drivers/media/dvb-frontends/lgs8gl5.c
@@ -249,7 +249,7 @@ lgs8gl5_init(struct dvb_frontend *fe)
 
 
 static int
-lgs8gl5_read_status(struct dvb_frontend *fe, fe_status_t *status)
+lgs8gl5_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct lgs8gl5_state *state = fe->demodulator_priv;
 	u8 level = lgs8gl5_read_reg(state, REG_STRENGTH);
diff --git a/drivers/media/dvb-frontends/lgs8gxx.c b/drivers/media/dvb-frontends/lgs8gxx.c
index 3c92f36ea5c7..e2c191c8b196 100644
--- a/drivers/media/dvb-frontends/lgs8gxx.c
+++ b/drivers/media/dvb-frontends/lgs8gxx.c
@@ -732,7 +732,8 @@ int lgs8gxx_get_tune_settings(struct dvb_frontend *fe,
 	return 0;
 }
 
-static int lgs8gxx_read_status(struct dvb_frontend *fe, fe_status_t *fe_status)
+static int lgs8gxx_read_status(struct dvb_frontend *fe,
+			       enum fe_status *fe_status)
 {
 	struct lgs8gxx_state *priv = fe->demodulator_priv;
 	s8 ret;
diff --git a/drivers/media/dvb-frontends/lnbp21.c b/drivers/media/dvb-frontends/lnbp21.c
index f3ba7b5faa2e..4aca0fb9a8a7 100644
--- a/drivers/media/dvb-frontends/lnbp21.c
+++ b/drivers/media/dvb-frontends/lnbp21.c
@@ -45,7 +45,7 @@ struct lnbp21 {
 };
 
 static int lnbp21_set_voltage(struct dvb_frontend *fe,
-					fe_sec_voltage_t voltage)
+			      enum fe_sec_voltage voltage)
 {
 	struct lnbp21 *lnbp21 = (struct lnbp21 *) fe->sec_priv;
 	struct i2c_msg msg = {	.addr = lnbp21->i2c_addr, .flags = 0,
@@ -92,7 +92,7 @@ static int lnbp21_enable_high_lnb_voltage(struct dvb_frontend *fe, long arg)
 }
 
 static int lnbp21_set_tone(struct dvb_frontend *fe,
-				fe_sec_tone_mode_t tone)
+			   enum fe_sec_tone_mode tone)
 {
 	struct lnbp21 *lnbp21 = (struct lnbp21 *) fe->sec_priv;
 	struct i2c_msg msg = {	.addr = lnbp21->i2c_addr, .flags = 0,
diff --git a/drivers/media/dvb-frontends/lnbp22.c b/drivers/media/dvb-frontends/lnbp22.c
index c463da7f6dcc..d7ca0fdd0084 100644
--- a/drivers/media/dvb-frontends/lnbp22.c
+++ b/drivers/media/dvb-frontends/lnbp22.c
@@ -48,7 +48,8 @@ struct lnbp22 {
 	struct i2c_adapter *i2c;
 };
 
-static int lnbp22_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+static int lnbp22_set_voltage(struct dvb_frontend *fe,
+			      enum fe_sec_voltage voltage)
 {
 	struct lnbp22 *lnbp22 = (struct lnbp22 *)fe->sec_priv;
 	struct i2c_msg msg = {
diff --git a/drivers/media/dvb-frontends/m88ds3103.c b/drivers/media/dvb-frontends/m88ds3103.c
index 7b21f1ad4542..c24b15238a8e 100644
--- a/drivers/media/dvb-frontends/m88ds3103.c
+++ b/drivers/media/dvb-frontends/m88ds3103.c
@@ -186,7 +186,8 @@ err:
 	return ret;
 }
 
-static int m88ds3103_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int m88ds3103_read_status(struct dvb_frontend *fe,
+				 enum fe_status *status)
 {
 	struct m88ds3103_priv *priv = fe->demodulator_priv;
 	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
@@ -1094,7 +1095,7 @@ static int m88ds3103_read_ber(struct dvb_frontend *fe, u32 *ber)
 }
 
 static int m88ds3103_set_tone(struct dvb_frontend *fe,
-	fe_sec_tone_mode_t fe_sec_tone_mode)
+	enum fe_sec_tone_mode fe_sec_tone_mode)
 {
 	struct m88ds3103_priv *priv = fe->demodulator_priv;
 	int ret;
@@ -1141,7 +1142,7 @@ err:
 }
 
 static int m88ds3103_set_voltage(struct dvb_frontend *fe,
-	fe_sec_voltage_t fe_sec_voltage)
+	enum fe_sec_voltage fe_sec_voltage)
 {
 	struct m88ds3103_priv *priv = fe->demodulator_priv;
 	int ret;
@@ -1268,7 +1269,7 @@ err:
 }
 
 static int m88ds3103_diseqc_send_burst(struct dvb_frontend *fe,
-	fe_sec_mini_cmd_t fe_sec_mini_cmd)
+	enum fe_sec_mini_cmd fe_sec_mini_cmd)
 {
 	struct m88ds3103_priv *priv = fe->demodulator_priv;
 	int ret;
diff --git a/drivers/media/dvb-frontends/m88ds3103_priv.h b/drivers/media/dvb-frontends/m88ds3103_priv.h
index 6217d928c23e..9d7d33430e40 100644
--- a/drivers/media/dvb-frontends/m88ds3103_priv.h
+++ b/drivers/media/dvb-frontends/m88ds3103_priv.h
@@ -38,8 +38,8 @@ struct m88ds3103_priv {
 	struct m88ds3103_config config;
 	const struct m88ds3103_config *cfg;
 	struct dvb_frontend fe;
-	fe_delivery_system_t delivery_system;
-	fe_status_t fe_status;
+	enum fe_delivery_system delivery_system;
+	enum fe_status fe_status;
 	u32 dvbv3_ber; /* for old DVBv3 API read_ber */
 	bool warm; /* FW running */
 	struct i2c_adapter *i2c_adapter;
diff --git a/drivers/media/dvb-frontends/m88rs2000.c b/drivers/media/dvb-frontends/m88rs2000.c
index d63bc9c13dce..9b6f464c48bd 100644
--- a/drivers/media/dvb-frontends/m88rs2000.c
+++ b/drivers/media/dvb-frontends/m88rs2000.c
@@ -41,7 +41,7 @@ struct m88rs2000_state {
 	u8 no_lock_count;
 	u32 tuner_frequency;
 	u32 symbol_rate;
-	fe_code_rate_t fec_inner;
+	enum fe_code_rate fec_inner;
 	u8 tuner_level;
 	int errmode;
 };
@@ -247,7 +247,7 @@ static int m88rs2000_send_diseqc_msg(struct dvb_frontend *fe,
 }
 
 static int m88rs2000_send_diseqc_burst(struct dvb_frontend *fe,
-						fe_sec_mini_cmd_t burst)
+				       enum fe_sec_mini_cmd burst)
 {
 	struct m88rs2000_state *state = fe->demodulator_priv;
 	u8 reg0, reg1;
@@ -264,7 +264,8 @@ static int m88rs2000_send_diseqc_burst(struct dvb_frontend *fe,
 	return 0;
 }
 
-static int m88rs2000_set_tone(struct dvb_frontend *fe, fe_sec_tone_mode_t tone)
+static int m88rs2000_set_tone(struct dvb_frontend *fe,
+			      enum fe_sec_tone_mode tone)
 {
 	struct m88rs2000_state *state = fe->demodulator_priv;
 	u8 reg0, reg1;
@@ -412,7 +413,8 @@ static int m88rs2000_tab_set(struct m88rs2000_state *state,
 	return 0;
 }
 
-static int m88rs2000_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t volt)
+static int m88rs2000_set_voltage(struct dvb_frontend *fe,
+				 enum fe_sec_voltage volt)
 {
 	struct m88rs2000_state *state = fe->demodulator_priv;
 	u8 data;
@@ -462,7 +464,8 @@ static int m88rs2000_sleep(struct dvb_frontend *fe)
 	return ret;
 }
 
-static int m88rs2000_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int m88rs2000_read_status(struct dvb_frontend *fe,
+				 enum fe_status *status)
 {
 	struct m88rs2000_state *state = fe->demodulator_priv;
 	u8 reg = m88rs2000_readreg(state, 0x8c);
@@ -539,7 +542,7 @@ static int m88rs2000_read_ucblocks(struct dvb_frontend *fe, u32 *ucblocks)
 }
 
 static int m88rs2000_set_fec(struct m88rs2000_state *state,
-		fe_code_rate_t fec)
+			     enum fe_code_rate fec)
 {
 	u8 fec_set, reg;
 	int ret;
@@ -574,7 +577,7 @@ static int m88rs2000_set_fec(struct m88rs2000_state *state,
 	return ret;
 }
 
-static fe_code_rate_t m88rs2000_get_fec(struct m88rs2000_state *state)
+static enum fe_code_rate m88rs2000_get_fec(struct m88rs2000_state *state)
 {
 	u8 reg;
 	m88rs2000_writereg(state, 0x9a, 0x30);
@@ -606,7 +609,7 @@ static int m88rs2000_set_frontend(struct dvb_frontend *fe)
 {
 	struct m88rs2000_state *state = fe->demodulator_priv;
 	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
-	fe_status_t status;
+	enum fe_status status;
 	int i, ret = 0;
 	u32 tuner_freq;
 	s16 offset = 0;
diff --git a/drivers/media/dvb-frontends/mb86a16.c b/drivers/media/dvb-frontends/mb86a16.c
index 3ddea4471d2b..79bc671e8769 100644
--- a/drivers/media/dvb-frontends/mb86a16.c
+++ b/drivers/media/dvb-frontends/mb86a16.c
@@ -593,7 +593,7 @@ err:
 	return -EREMOTEIO;
 }
 
-static int mb86a16_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int mb86a16_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	u8 stat, stat2;
 	struct mb86a16_state *state = fe->demodulator_priv;
@@ -1562,7 +1562,8 @@ err:
 	return -EREMOTEIO;
 }
 
-static int mb86a16_send_diseqc_burst(struct dvb_frontend *fe, fe_sec_mini_cmd_t burst)
+static int mb86a16_send_diseqc_burst(struct dvb_frontend *fe,
+				     enum fe_sec_mini_cmd burst)
 {
 	struct mb86a16_state *state = fe->demodulator_priv;
 
@@ -1590,7 +1591,7 @@ err:
 	return -EREMOTEIO;
 }
 
-static int mb86a16_set_tone(struct dvb_frontend *fe, fe_sec_tone_mode_t tone)
+static int mb86a16_set_tone(struct dvb_frontend *fe, enum fe_sec_tone_mode tone)
 {
 	struct mb86a16_state *state = fe->demodulator_priv;
 
diff --git a/drivers/media/dvb-frontends/mb86a16.h b/drivers/media/dvb-frontends/mb86a16.h
index e486dc0d8e60..dbd5f43fa128 100644
--- a/drivers/media/dvb-frontends/mb86a16.h
+++ b/drivers/media/dvb-frontends/mb86a16.h
@@ -28,7 +28,8 @@
 struct mb86a16_config {
 	u8 demod_address;
 
-	int (*set_voltage)(struct dvb_frontend *fe, fe_sec_voltage_t voltage);
+	int (*set_voltage)(struct dvb_frontend *fe,
+			   enum fe_sec_voltage voltage);
 };
 
 
diff --git a/drivers/media/dvb-frontends/mb86a20s.c b/drivers/media/dvb-frontends/mb86a20s.c
index 8f54c39ca63f..cfc005ee11d8 100644
--- a/drivers/media/dvb-frontends/mb86a20s.c
+++ b/drivers/media/dvb-frontends/mb86a20s.c
@@ -294,7 +294,7 @@ static int mb86a20s_i2c_readreg(struct mb86a20s_state *state,
  * The functions below assume that gateway lock has already obtained
  */
 
-static int mb86a20s_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int mb86a20s_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct mb86a20s_state *state = fe->demodulator_priv;
 	int val;
@@ -1951,7 +1951,7 @@ static int mb86a20s_set_frontend(struct dvb_frontend *fe)
 }
 
 static int mb86a20s_read_status_and_stats(struct dvb_frontend *fe,
-					  fe_status_t *status)
+					  enum fe_status *status)
 {
 	struct mb86a20s_state *state = fe->demodulator_priv;
 	int rc, status_nr;
@@ -2042,7 +2042,7 @@ static int mb86a20s_tune(struct dvb_frontend *fe,
 			bool re_tune,
 			unsigned int mode_flags,
 			unsigned int *delay,
-			fe_status_t *status)
+			enum fe_status *status)
 {
 	struct mb86a20s_state *state = fe->demodulator_priv;
 	int rc = 0;
diff --git a/drivers/media/dvb-frontends/mt312.c b/drivers/media/dvb-frontends/mt312.c
index 2163490c1e6b..c36e6764eead 100644
--- a/drivers/media/dvb-frontends/mt312.c
+++ b/drivers/media/dvb-frontends/mt312.c
@@ -156,7 +156,7 @@ static int mt312_reset(struct mt312_state *state, const u8 full)
 }
 
 static int mt312_get_inversion(struct mt312_state *state,
-			       fe_spectral_inversion_t *i)
+			       enum fe_spectral_inversion *i)
 {
 	int ret;
 	u8 vit_mode;
@@ -225,9 +225,9 @@ static int mt312_get_symbol_rate(struct mt312_state *state, u32 *sr)
 	return 0;
 }
 
-static int mt312_get_code_rate(struct mt312_state *state, fe_code_rate_t *cr)
+static int mt312_get_code_rate(struct mt312_state *state, enum fe_code_rate *cr)
 {
-	const fe_code_rate_t fec_tab[8] =
+	const enum fe_code_rate fec_tab[8] =
 	    { FEC_1_2, FEC_2_3, FEC_3_4, FEC_5_6, FEC_6_7, FEC_7_8,
 		FEC_AUTO, FEC_AUTO };
 
@@ -380,7 +380,8 @@ static int mt312_send_master_cmd(struct dvb_frontend *fe,
 	return 0;
 }
 
-static int mt312_send_burst(struct dvb_frontend *fe, const fe_sec_mini_cmd_t c)
+static int mt312_send_burst(struct dvb_frontend *fe,
+			    const enum fe_sec_mini_cmd c)
 {
 	struct mt312_state *state = fe->demodulator_priv;
 	const u8 mini_tab[2] = { 0x02, 0x03 };
@@ -403,7 +404,8 @@ static int mt312_send_burst(struct dvb_frontend *fe, const fe_sec_mini_cmd_t c)
 	return 0;
 }
 
-static int mt312_set_tone(struct dvb_frontend *fe, const fe_sec_tone_mode_t t)
+static int mt312_set_tone(struct dvb_frontend *fe,
+			  const enum fe_sec_tone_mode t)
 {
 	struct mt312_state *state = fe->demodulator_priv;
 	const u8 tone_tab[2] = { 0x01, 0x00 };
@@ -426,7 +428,8 @@ static int mt312_set_tone(struct dvb_frontend *fe, const fe_sec_tone_mode_t t)
 	return 0;
 }
 
-static int mt312_set_voltage(struct dvb_frontend *fe, const fe_sec_voltage_t v)
+static int mt312_set_voltage(struct dvb_frontend *fe,
+			     const enum fe_sec_voltage v)
 {
 	struct mt312_state *state = fe->demodulator_priv;
 	const u8 volt_tab[3] = { 0x00, 0x40, 0x00 };
@@ -442,7 +445,7 @@ static int mt312_set_voltage(struct dvb_frontend *fe, const fe_sec_voltage_t v)
 	return mt312_writereg(state, DISEQC_MODE, val);
 }
 
-static int mt312_read_status(struct dvb_frontend *fe, fe_status_t *s)
+static int mt312_read_status(struct dvb_frontend *fe, enum fe_status *s)
 {
 	struct mt312_state *state = fe->demodulator_priv;
 	int ret;
diff --git a/drivers/media/dvb-frontends/mt352.c b/drivers/media/dvb-frontends/mt352.c
index 2c3b50e828d7..123bb2f8e4b6 100644
--- a/drivers/media/dvb-frontends/mt352.c
+++ b/drivers/media/dvb-frontends/mt352.c
@@ -417,7 +417,7 @@ static int mt352_get_parameters(struct dvb_frontend* fe)
 	return 0;
 }
 
-static int mt352_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int mt352_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct mt352_state* state = fe->demodulator_priv;
 	int s0, s1, s3;
diff --git a/drivers/media/dvb-frontends/nxt200x.c b/drivers/media/dvb-frontends/nxt200x.c
index 8a8e1ecb762d..79c3040912ab 100644
--- a/drivers/media/dvb-frontends/nxt200x.c
+++ b/drivers/media/dvb-frontends/nxt200x.c
@@ -781,7 +781,7 @@ static int nxt200x_setup_frontend_parameters(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int nxt200x_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int nxt200x_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct nxt200x_state* state = fe->demodulator_priv;
 	u8 lock;
diff --git a/drivers/media/dvb-frontends/nxt6000.c b/drivers/media/dvb-frontends/nxt6000.c
index 90ae6c72c0e3..73f9505367ac 100644
--- a/drivers/media/dvb-frontends/nxt6000.c
+++ b/drivers/media/dvb-frontends/nxt6000.c
@@ -109,7 +109,8 @@ static int nxt6000_set_bandwidth(struct nxt6000_state *state, u32 bandwidth)
 	return nxt6000_writereg(state, OFDM_TRL_NOMINALRATE_2, (nominal_rate >> 8) & 0xFF);
 }
 
-static int nxt6000_set_guard_interval(struct nxt6000_state* state, fe_guard_interval_t guard_interval)
+static int nxt6000_set_guard_interval(struct nxt6000_state *state,
+				      enum fe_guard_interval guard_interval)
 {
 	switch (guard_interval) {
 
@@ -131,7 +132,8 @@ static int nxt6000_set_guard_interval(struct nxt6000_state* state, fe_guard_inte
 	}
 }
 
-static int nxt6000_set_inversion(struct nxt6000_state* state, fe_spectral_inversion_t inversion)
+static int nxt6000_set_inversion(struct nxt6000_state *state,
+				 enum fe_spectral_inversion inversion)
 {
 	switch (inversion) {
 
@@ -147,7 +149,9 @@ static int nxt6000_set_inversion(struct nxt6000_state* state, fe_spectral_invers
 	}
 }
 
-static int nxt6000_set_transmission_mode(struct nxt6000_state* state, fe_transmit_mode_t transmission_mode)
+static int
+nxt6000_set_transmission_mode(struct nxt6000_state *state,
+			      enum fe_transmit_mode transmission_mode)
 {
 	int result;
 
@@ -416,7 +420,7 @@ static void nxt6000_dump_status(struct nxt6000_state *state)
 	printk("\n");
 }
 
-static int nxt6000_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int nxt6000_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	u8 core_status;
 	struct nxt6000_state* state = fe->demodulator_priv;
diff --git a/drivers/media/dvb-frontends/or51132.c b/drivers/media/dvb-frontends/or51132.c
index cbbd259eacfe..35b1053b3640 100644
--- a/drivers/media/dvb-frontends/or51132.c
+++ b/drivers/media/dvb-frontends/or51132.c
@@ -63,7 +63,7 @@ struct or51132_state
 	struct dvb_frontend frontend;
 
 	/* Demodulator private data */
-	fe_modulation_t current_modulation;
+	enum fe_modulation current_modulation;
 	u32 snr; /* Result of last SNR calculation */
 
 	/* Tuner private data */
@@ -292,7 +292,7 @@ static int or51132_setmode(struct dvb_frontend* fe)
 #define MOD_FWCLASS_UNKNOWN	0
 #define MOD_FWCLASS_VSB		1
 #define MOD_FWCLASS_QAM		2
-static int modulation_fw_class(fe_modulation_t modulation)
+static int modulation_fw_class(enum fe_modulation modulation)
 {
 	switch(modulation) {
 	case VSB_8:
@@ -415,7 +415,7 @@ start:
 	return 0;
 }
 
-static int or51132_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int or51132_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct or51132_state* state = fe->demodulator_priv;
 	int reg;
diff --git a/drivers/media/dvb-frontends/or51211.c b/drivers/media/dvb-frontends/or51211.c
index 873ea1da844b..e82413b975e6 100644
--- a/drivers/media/dvb-frontends/or51211.c
+++ b/drivers/media/dvb-frontends/or51211.c
@@ -237,7 +237,7 @@ static int or51211_set_parameters(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int or51211_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int or51211_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct or51211_state* state = fe->demodulator_priv;
 	unsigned char rec_buf[2];
diff --git a/drivers/media/dvb-frontends/rtl2830.c b/drivers/media/dvb-frontends/rtl2830.c
index e1b8df62bd59..3d01f4f22aca 100644
--- a/drivers/media/dvb-frontends/rtl2830.c
+++ b/drivers/media/dvb-frontends/rtl2830.c
@@ -392,7 +392,7 @@ err:
 	return ret;
 }
 
-static int rtl2830_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int rtl2830_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct i2c_client *client = fe->demodulator_priv;
 	struct rtl2830_dev *dev = i2c_get_clientdata(client);
diff --git a/drivers/media/dvb-frontends/rtl2830_priv.h b/drivers/media/dvb-frontends/rtl2830_priv.h
index d50d5376c9c5..cf793f39a09b 100644
--- a/drivers/media/dvb-frontends/rtl2830_priv.h
+++ b/drivers/media/dvb-frontends/rtl2830_priv.h
@@ -34,7 +34,7 @@ struct rtl2830_dev {
 	bool sleeping;
 	unsigned long filters;
 	struct delayed_work stat_work;
-	fe_status_t fe_status;
+	enum fe_status fe_status;
 	u64 post_bit_error_prev; /* for old DVBv3 read_ber() calculation */
 	u64 post_bit_error;
 	u64 post_bit_count;
diff --git a/drivers/media/dvb-frontends/rtl2832.c b/drivers/media/dvb-frontends/rtl2832.c
index a57c478e2306..822ea4b7a7ff 100644
--- a/drivers/media/dvb-frontends/rtl2832.c
+++ b/drivers/media/dvb-frontends/rtl2832.c
@@ -688,7 +688,7 @@ err:
 	return ret;
 }
 
-static int rtl2832_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int rtl2832_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct rtl2832_dev *dev = fe->demodulator_priv;
 	struct i2c_client *client = dev->client;
diff --git a/drivers/media/dvb-frontends/rtl2832_priv.h b/drivers/media/dvb-frontends/rtl2832_priv.h
index 6f3a49c63ab0..5dcd3a41d23f 100644
--- a/drivers/media/dvb-frontends/rtl2832_priv.h
+++ b/drivers/media/dvb-frontends/rtl2832_priv.h
@@ -39,7 +39,7 @@ struct rtl2832_dev {
 	struct i2c_adapter *i2c_adapter_tuner;
 	struct dvb_frontend fe;
 	struct delayed_work stat_work;
-	fe_status_t fe_status;
+	enum fe_status fe_status;
 	u64 post_bit_error_prev; /* for old DVBv3 read_ber() calculation */
 	u64 post_bit_error;
 	u64 post_bit_count;
diff --git a/drivers/media/dvb-frontends/s5h1409.c b/drivers/media/dvb-frontends/s5h1409.c
index 5ff474a7ff29..10964848a2f1 100644
--- a/drivers/media/dvb-frontends/s5h1409.c
+++ b/drivers/media/dvb-frontends/s5h1409.c
@@ -38,7 +38,7 @@ struct s5h1409_state {
 	struct dvb_frontend frontend;
 
 	/* previous uncorrected block counter */
-	fe_modulation_t current_modulation;
+	enum fe_modulation current_modulation;
 
 	u32 current_frequency;
 	int if_freq;
@@ -400,7 +400,7 @@ static int s5h1409_set_spectralinversion(struct dvb_frontend *fe, int inverted)
 }
 
 static int s5h1409_enable_modulation(struct dvb_frontend *fe,
-				     fe_modulation_t m)
+				     enum fe_modulation m)
 {
 	struct s5h1409_state *state = fe->demodulator_priv;
 
@@ -755,7 +755,7 @@ static int s5h1409_init(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int s5h1409_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int s5h1409_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct s5h1409_state *state = fe->demodulator_priv;
 	u16 reg;
diff --git a/drivers/media/dvb-frontends/s5h1411.c b/drivers/media/dvb-frontends/s5h1411.c
index 64f35fed7ae1..9afc3f42290e 100644
--- a/drivers/media/dvb-frontends/s5h1411.c
+++ b/drivers/media/dvb-frontends/s5h1411.c
@@ -37,7 +37,7 @@ struct s5h1411_state {
 
 	struct dvb_frontend frontend;
 
-	fe_modulation_t current_modulation;
+	enum fe_modulation current_modulation;
 	unsigned int first_tune:1;
 
 	u32 current_frequency;
@@ -484,7 +484,7 @@ static int s5h1411_set_serialmode(struct dvb_frontend *fe, int serial)
 }
 
 static int s5h1411_enable_modulation(struct dvb_frontend *fe,
-				     fe_modulation_t m)
+				     enum fe_modulation m)
 {
 	struct s5h1411_state *state = fe->demodulator_priv;
 
@@ -659,7 +659,7 @@ static int s5h1411_init(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int s5h1411_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int s5h1411_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct s5h1411_state *state = fe->demodulator_priv;
 	u16 reg;
diff --git a/drivers/media/dvb-frontends/s5h1420.c b/drivers/media/dvb-frontends/s5h1420.c
index dfc20665e372..9c22a4c70d87 100644
--- a/drivers/media/dvb-frontends/s5h1420.c
+++ b/drivers/media/dvb-frontends/s5h1420.c
@@ -52,7 +52,7 @@ struct s5h1420_state {
 	u8 postlocked:1;
 	u32 fclk;
 	u32 tunedfreq;
-	fe_code_rate_t fec_inner;
+	enum fe_code_rate fec_inner;
 	u32 symbol_rate;
 
 	/* FIXME: ugly workaround for flexcop's incapable i2c-controller
@@ -124,7 +124,8 @@ static int s5h1420_writereg (struct s5h1420_state* state, u8 reg, u8 data)
 	return 0;
 }
 
-static int s5h1420_set_voltage (struct dvb_frontend* fe, fe_sec_voltage_t voltage)
+static int s5h1420_set_voltage(struct dvb_frontend *fe,
+			       enum fe_sec_voltage voltage)
 {
 	struct s5h1420_state* state = fe->demodulator_priv;
 
@@ -149,7 +150,8 @@ static int s5h1420_set_voltage (struct dvb_frontend* fe, fe_sec_voltage_t voltag
 	return 0;
 }
 
-static int s5h1420_set_tone (struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
+static int s5h1420_set_tone(struct dvb_frontend *fe,
+			    enum fe_sec_tone_mode tone)
 {
 	struct s5h1420_state* state = fe->demodulator_priv;
 
@@ -270,7 +272,8 @@ exit:
 	return result;
 }
 
-static int s5h1420_send_burst (struct dvb_frontend* fe, fe_sec_mini_cmd_t minicmd)
+static int s5h1420_send_burst(struct dvb_frontend *fe,
+			      enum fe_sec_mini_cmd minicmd)
 {
 	struct s5h1420_state* state = fe->demodulator_priv;
 	u8 val;
@@ -307,10 +310,10 @@ static int s5h1420_send_burst (struct dvb_frontend* fe, fe_sec_mini_cmd_t minicm
 	return result;
 }
 
-static fe_status_t s5h1420_get_status_bits(struct s5h1420_state* state)
+static enum fe_status s5h1420_get_status_bits(struct s5h1420_state *state)
 {
 	u8 val;
-	fe_status_t status = 0;
+	enum fe_status status = 0;
 
 	val = s5h1420_readreg(state, 0x14);
 	if (val & 0x02)
@@ -328,7 +331,8 @@ static fe_status_t s5h1420_get_status_bits(struct s5h1420_state* state)
 	return status;
 }
 
-static int s5h1420_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int s5h1420_read_status(struct dvb_frontend *fe,
+			       enum fe_status *status)
 {
 	struct s5h1420_state* state = fe->demodulator_priv;
 	u8 val;
@@ -601,7 +605,7 @@ static void s5h1420_setfec_inversion(struct s5h1420_state* state,
 	dprintk("leave %s\n", __func__);
 }
 
-static fe_code_rate_t s5h1420_getfec(struct s5h1420_state* state)
+static enum fe_code_rate s5h1420_getfec(struct s5h1420_state *state)
 {
 	switch(s5h1420_readreg(state, 0x32) & 0x07) {
 	case 0:
@@ -626,7 +630,8 @@ static fe_code_rate_t s5h1420_getfec(struct s5h1420_state* state)
 	return FEC_NONE;
 }
 
-static fe_spectral_inversion_t s5h1420_getinversion(struct s5h1420_state* state)
+static enum fe_spectral_inversion
+s5h1420_getinversion(struct s5h1420_state *state)
 {
 	if (s5h1420_readreg(state, 0x32) & 0x08)
 		return INVERSION_ON;
diff --git a/drivers/media/dvb-frontends/s5h1432.c b/drivers/media/dvb-frontends/s5h1432.c
index 6ec16a243741..4215652f8eb7 100644
--- a/drivers/media/dvb-frontends/s5h1432.c
+++ b/drivers/media/dvb-frontends/s5h1432.c
@@ -36,7 +36,7 @@ struct s5h1432_state {
 
 	struct dvb_frontend frontend;
 
-	fe_modulation_t current_modulation;
+	enum fe_modulation current_modulation;
 	unsigned int first_tune:1;
 
 	u32 current_frequency;
@@ -302,7 +302,7 @@ static int s5h1432_init(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int s5h1432_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int s5h1432_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	return 0;
 }
diff --git a/drivers/media/dvb-frontends/s921.c b/drivers/media/dvb-frontends/s921.c
index 69862e1fd9e9..b2d9fe13e1a0 100644
--- a/drivers/media/dvb-frontends/s921.c
+++ b/drivers/media/dvb-frontends/s921.c
@@ -348,7 +348,7 @@ static int s921_initfe(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int s921_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int s921_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct s921_state *state = fe->demodulator_priv;
 	int regstatus, rc;
@@ -389,7 +389,7 @@ static int s921_read_status(struct dvb_frontend *fe, fe_status_t *status)
 
 static int s921_read_signal_strength(struct dvb_frontend *fe, u16 *strength)
 {
-	fe_status_t	status;
+	enum fe_status	status;
 	struct s921_state *state = fe->demodulator_priv;
 	int rc;
 
@@ -449,7 +449,7 @@ static int s921_tune(struct dvb_frontend *fe,
 			bool re_tune,
 			unsigned int mode_flags,
 			unsigned int *delay,
-			fe_status_t *status)
+			enum fe_status *status)
 {
 	int rc = 0;
 
diff --git a/drivers/media/dvb-frontends/si2165.c b/drivers/media/dvb-frontends/si2165.c
index 4cc5d10ed0d4..7c2eeee69757 100644
--- a/drivers/media/dvb-frontends/si2165.c
+++ b/drivers/media/dvb-frontends/si2165.c
@@ -698,7 +698,7 @@ static int si2165_sleep(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int si2165_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int si2165_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	int ret;
 	u8 fec_lock = 0;
diff --git a/drivers/media/dvb-frontends/si2168.c b/drivers/media/dvb-frontends/si2168.c
index d6a4cb0688c3..25e238c370e5 100644
--- a/drivers/media/dvb-frontends/si2168.c
+++ b/drivers/media/dvb-frontends/si2168.c
@@ -120,7 +120,7 @@ static int si2168_cmd_execute(struct i2c_client *client, struct si2168_cmd *cmd)
 	return ret;
 }
 
-static int si2168_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int si2168_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct i2c_client *client = fe->demodulator_priv;
 	struct si2168_dev *dev = i2c_get_clientdata(client);
diff --git a/drivers/media/dvb-frontends/si2168_priv.h b/drivers/media/dvb-frontends/si2168_priv.h
index 90b6b6eace24..c07e6fe2cb10 100644
--- a/drivers/media/dvb-frontends/si2168_priv.h
+++ b/drivers/media/dvb-frontends/si2168_priv.h
@@ -31,8 +31,8 @@
 struct si2168_dev {
 	struct i2c_adapter *adapter;
 	struct dvb_frontend fe;
-	fe_delivery_system_t delivery_system;
-	fe_status_t fe_status;
+	enum fe_delivery_system delivery_system;
+	enum fe_status fe_status;
 	bool active;
 	bool fw_loaded;
 	u8 ts_mode;
diff --git a/drivers/media/dvb-frontends/si21xx.c b/drivers/media/dvb-frontends/si21xx.c
index 16850e2bf02f..62ad7a7be9f8 100644
--- a/drivers/media/dvb-frontends/si21xx.c
+++ b/drivers/media/dvb-frontends/si21xx.c
@@ -410,7 +410,7 @@ static int si21xx_send_diseqc_msg(struct dvb_frontend *fe,
 }
 
 static int si21xx_send_diseqc_burst(struct dvb_frontend *fe,
-						fe_sec_mini_cmd_t burst)
+				    enum fe_sec_mini_cmd burst)
 {
 	struct si21xx_state *state = fe->demodulator_priv;
 	u8 val;
@@ -434,7 +434,7 @@ static int si21xx_send_diseqc_burst(struct dvb_frontend *fe,
 	return 0;
 }
 /*	30.06.2008 */
-static int si21xx_set_tone(struct dvb_frontend *fe, fe_sec_tone_mode_t tone)
+static int si21xx_set_tone(struct dvb_frontend *fe, enum fe_sec_tone_mode tone)
 {
 	struct si21xx_state *state = fe->demodulator_priv;
 	u8 val;
@@ -454,7 +454,7 @@ static int si21xx_set_tone(struct dvb_frontend *fe, fe_sec_tone_mode_t tone)
 	}
 }
 
-static int si21xx_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t volt)
+static int si21xx_set_voltage(struct dvb_frontend *fe, enum fe_sec_voltage volt)
 {
 	struct si21xx_state *state = fe->demodulator_priv;
 
@@ -536,7 +536,7 @@ static int si21xx_init(struct dvb_frontend *fe)
 
 }
 
-static int si21_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int si21_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct si21xx_state *state = fe->demodulator_priv;
 	u8 regs_read[2];
@@ -641,7 +641,7 @@ static int si21_read_ucblocks(struct dvb_frontend *fe, u32 *ucblocks)
 /*	initiates a channel acquisition sequence
 	using the specified symbol rate and code rate */
 static int si21xx_setacquire(struct dvb_frontend *fe, int symbrate,
-						fe_code_rate_t crate)
+			     enum fe_code_rate crate)
 {
 
 	struct si21xx_state *state = fe->demodulator_priv;
diff --git a/drivers/media/dvb-frontends/sp8870.c b/drivers/media/dvb-frontends/sp8870.c
index 57dc2abaa87b..e87ac30d7fb8 100644
--- a/drivers/media/dvb-frontends/sp8870.c
+++ b/drivers/media/dvb-frontends/sp8870.c
@@ -350,7 +350,8 @@ static int sp8870_init (struct dvb_frontend* fe)
 	return 0;
 }
 
-static int sp8870_read_status (struct dvb_frontend* fe, fe_status_t * fe_status)
+static int sp8870_read_status(struct dvb_frontend *fe,
+			      enum fe_status *fe_status)
 {
 	struct sp8870_state* state = fe->demodulator_priv;
 	int status;
diff --git a/drivers/media/dvb-frontends/sp887x.c b/drivers/media/dvb-frontends/sp887x.c
index 1bb81b5ae6e0..4378fe1b978e 100644
--- a/drivers/media/dvb-frontends/sp887x.c
+++ b/drivers/media/dvb-frontends/sp887x.c
@@ -416,7 +416,7 @@ static int sp887x_setup_frontend_parameters(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int sp887x_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int sp887x_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct sp887x_state* state = fe->demodulator_priv;
 	u16 snr12 = sp887x_readreg(state, 0xf16);
diff --git a/drivers/media/dvb-frontends/stb0899_drv.c b/drivers/media/dvb-frontends/stb0899_drv.c
index c73899d3a53d..756650f154ab 100644
--- a/drivers/media/dvb-frontends/stb0899_drv.c
+++ b/drivers/media/dvb-frontends/stb0899_drv.c
@@ -792,7 +792,8 @@ static int stb0899_wait_diseqc_txidle(struct stb0899_state *state, int timeout)
 	return 0;
 }
 
-static int stb0899_send_diseqc_burst(struct dvb_frontend *fe, fe_sec_mini_cmd_t burst)
+static int stb0899_send_diseqc_burst(struct dvb_frontend *fe,
+				     enum fe_sec_mini_cmd burst)
 {
 	struct stb0899_state *state = fe->demodulator_priv;
 	u8 reg, old_state;
@@ -1178,7 +1179,8 @@ static int stb0899_read_ber(struct dvb_frontend *fe, u32 *ber)
 	return 0;
 }
 
-static int stb0899_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+static int stb0899_set_voltage(struct dvb_frontend *fe,
+			       enum fe_sec_voltage voltage)
 {
 	struct stb0899_state *state = fe->demodulator_priv;
 
@@ -1205,7 +1207,7 @@ static int stb0899_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage
 	return 0;
 }
 
-static int stb0899_set_tone(struct dvb_frontend *fe, fe_sec_tone_mode_t tone)
+static int stb0899_set_tone(struct dvb_frontend *fe, enum fe_sec_tone_mode tone)
 {
 	struct stb0899_state *state = fe->demodulator_priv;
 	struct stb0899_internal *internal = &state->internal;
diff --git a/drivers/media/dvb-frontends/stv0288.c b/drivers/media/dvb-frontends/stv0288.c
index ecf4bb3a3b6b..c93d9a45f7f7 100644
--- a/drivers/media/dvb-frontends/stv0288.c
+++ b/drivers/media/dvb-frontends/stv0288.c
@@ -44,7 +44,7 @@ struct stv0288_state {
 	u8 initialised:1;
 	u32 tuner_frequency;
 	u32 symbol_rate;
-	fe_code_rate_t fec_inner;
+	enum fe_code_rate fec_inner;
 	int errmode;
 };
 
@@ -174,7 +174,7 @@ static int stv0288_send_diseqc_msg(struct dvb_frontend *fe,
 }
 
 static int stv0288_send_diseqc_burst(struct dvb_frontend *fe,
-						fe_sec_mini_cmd_t burst)
+				     enum fe_sec_mini_cmd burst)
 {
 	struct stv0288_state *state = fe->demodulator_priv;
 
@@ -193,7 +193,7 @@ static int stv0288_send_diseqc_burst(struct dvb_frontend *fe,
 	return 0;
 }
 
-static int stv0288_set_tone(struct dvb_frontend *fe, fe_sec_tone_mode_t tone)
+static int stv0288_set_tone(struct dvb_frontend *fe, enum fe_sec_tone_mode tone)
 {
 	struct stv0288_state *state = fe->demodulator_priv;
 
@@ -323,7 +323,8 @@ static u8 stv0288_inittab[] = {
 	0xff, 0xff,
 };
 
-static int stv0288_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t volt)
+static int stv0288_set_voltage(struct dvb_frontend *fe,
+			       enum fe_sec_voltage volt)
 {
 	dprintk("%s: %s\n", __func__,
 		volt == SEC_VOLTAGE_13 ? "SEC_VOLTAGE_13" :
@@ -361,7 +362,7 @@ static int stv0288_init(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int stv0288_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int stv0288_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct stv0288_state *state = fe->demodulator_priv;
 
diff --git a/drivers/media/dvb-frontends/stv0297.c b/drivers/media/dvb-frontends/stv0297.c
index dfc14d5c3999..75b4d8b25657 100644
--- a/drivers/media/dvb-frontends/stv0297.c
+++ b/drivers/media/dvb-frontends/stv0297.c
@@ -233,7 +233,8 @@ static void stv0297_set_initialdemodfreq(struct stv0297_state *state, long freq)
 	stv0297_writereg(state, 0x20, tmp);
 }
 
-static int stv0297_set_qam(struct stv0297_state *state, fe_modulation_t modulation)
+static int stv0297_set_qam(struct stv0297_state *state,
+			   enum fe_modulation modulation)
 {
 	int val = 0;
 
@@ -267,7 +268,8 @@ static int stv0297_set_qam(struct stv0297_state *state, fe_modulation_t modulati
 	return 0;
 }
 
-static int stv0297_set_inversion(struct stv0297_state *state, fe_spectral_inversion_t inversion)
+static int stv0297_set_inversion(struct stv0297_state *state,
+				 enum fe_spectral_inversion inversion)
 {
 	int val = 0;
 
@@ -325,7 +327,8 @@ static int stv0297_sleep(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int stv0297_read_status(struct dvb_frontend *fe, fe_status_t * status)
+static int stv0297_read_status(struct dvb_frontend *fe,
+			       enum fe_status *status)
 {
 	struct stv0297_state *state = fe->demodulator_priv;
 
@@ -415,7 +418,7 @@ static int stv0297_set_frontend(struct dvb_frontend *fe)
 	int sweeprate;
 	int carrieroffset;
 	unsigned long timeout;
-	fe_spectral_inversion_t inversion;
+	enum fe_spectral_inversion inversion;
 
 	switch (p->modulation) {
 	case QAM_16:
diff --git a/drivers/media/dvb-frontends/stv0299.c b/drivers/media/dvb-frontends/stv0299.c
index b57ecf42e75a..0ca5d9f0d851 100644
--- a/drivers/media/dvb-frontends/stv0299.c
+++ b/drivers/media/dvb-frontends/stv0299.c
@@ -61,7 +61,7 @@ struct stv0299_state {
 	u8 initialised:1;
 	u32 tuner_frequency;
 	u32 symbol_rate;
-	fe_code_rate_t fec_inner;
+	enum fe_code_rate fec_inner;
 	int errmode;
 	u32 ucblocks;
 	u8 mcr_reg;
@@ -134,7 +134,7 @@ static int stv0299_readregs (struct stv0299_state* state, u8 reg1, u8 *b, u8 len
 	return ret == 2 ? 0 : ret;
 }
 
-static int stv0299_set_FEC (struct stv0299_state* state, fe_code_rate_t fec)
+static int stv0299_set_FEC(struct stv0299_state *state, enum fe_code_rate fec)
 {
 	dprintk ("%s\n", __func__);
 
@@ -170,10 +170,10 @@ static int stv0299_set_FEC (struct stv0299_state* state, fe_code_rate_t fec)
     }
 }
 
-static fe_code_rate_t stv0299_get_fec (struct stv0299_state* state)
+static enum fe_code_rate stv0299_get_fec(struct stv0299_state *state)
 {
-	static fe_code_rate_t fec_tab [] = { FEC_2_3, FEC_3_4, FEC_5_6,
-					     FEC_7_8, FEC_1_2 };
+	static enum fe_code_rate fec_tab[] = { FEC_2_3, FEC_3_4, FEC_5_6,
+					       FEC_7_8, FEC_1_2 };
 	u8 index;
 
 	dprintk ("%s\n", __func__);
@@ -302,7 +302,8 @@ static int stv0299_send_diseqc_msg (struct dvb_frontend* fe,
 	return 0;
 }
 
-static int stv0299_send_diseqc_burst (struct dvb_frontend* fe, fe_sec_mini_cmd_t burst)
+static int stv0299_send_diseqc_burst(struct dvb_frontend *fe,
+				     enum fe_sec_mini_cmd burst)
 {
 	struct stv0299_state* state = fe->demodulator_priv;
 	u8 val;
@@ -329,7 +330,8 @@ static int stv0299_send_diseqc_burst (struct dvb_frontend* fe, fe_sec_mini_cmd_t
 	return 0;
 }
 
-static int stv0299_set_tone (struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
+static int stv0299_set_tone(struct dvb_frontend *fe,
+			    enum fe_sec_tone_mode tone)
 {
 	struct stv0299_state* state = fe->demodulator_priv;
 	u8 val;
@@ -351,7 +353,8 @@ static int stv0299_set_tone (struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
 	}
 }
 
-static int stv0299_set_voltage (struct dvb_frontend* fe, fe_sec_voltage_t voltage)
+static int stv0299_set_voltage(struct dvb_frontend *fe,
+			       enum fe_sec_voltage voltage)
 {
 	struct stv0299_state* state = fe->demodulator_priv;
 	u8 reg0x08;
@@ -476,7 +479,8 @@ static int stv0299_init (struct dvb_frontend* fe)
 	return 0;
 }
 
-static int stv0299_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int stv0299_read_status(struct dvb_frontend *fe,
+			       enum fe_status *status)
 {
 	struct stv0299_state* state = fe->demodulator_priv;
 
diff --git a/drivers/media/dvb-frontends/stv0367.c b/drivers/media/dvb-frontends/stv0367.c
index b31ff265ff24..ec3e18e5ff50 100644
--- a/drivers/media/dvb-frontends/stv0367.c
+++ b/drivers/media/dvb-frontends/stv0367.c
@@ -59,7 +59,7 @@ struct stv0367cab_state {
 	int locked;			/* channel found		*/
 	u32 freq_khz;			/* found frequency (in kHz)	*/
 	u32 symbol_rate;		/* found symbol rate (in Bds)	*/
-	fe_spectral_inversion_t	spect_inv; /* Spectrum Inversion	*/
+	enum fe_spectral_inversion spect_inv; /* Spectrum Inversion	*/
 };
 
 struct stv0367ter_state {
@@ -67,10 +67,10 @@ struct stv0367ter_state {
 	enum stv0367_ter_signal_type state;
 	enum stv0367_ter_if_iq_mode if_iq_mode;
 	enum stv0367_ter_mode mode;/* mode 2K or 8K */
-	fe_guard_interval_t guard;
+	enum fe_guard_interval guard;
 	enum stv0367_ter_hierarchy hierarchy;
 	u32 frequency;
-	fe_spectral_inversion_t  sense; /*  current search spectrum */
+	enum fe_spectral_inversion sense; /*  current search spectrum */
 	u8  force; /* force mode/guard */
 	u8  bw; /* channel width 6, 7 or 8 in MHz */
 	u8  pBW; /* channel width used during previous lock */
@@ -2074,7 +2074,8 @@ static int stv0367ter_status(struct dvb_frontend *fe)
 	return locked;
 }
 #endif
-static int stv0367ter_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int stv0367ter_read_status(struct dvb_frontend *fe,
+				  enum fe_status *status)
 {
 	struct stv0367_state *state = fe->demodulator_priv;
 
@@ -2716,7 +2717,8 @@ static u32 stv0367cab_GetSymbolRate(struct stv0367_state *state, u32 mclk_hz)
 	return regsym;
 }
 
-static int stv0367cab_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int stv0367cab_read_status(struct dvb_frontend *fe,
+				  enum fe_status *status)
 {
 	struct stv0367_state *state = fe->demodulator_priv;
 
diff --git a/drivers/media/dvb-frontends/stv0367_priv.h b/drivers/media/dvb-frontends/stv0367_priv.h
index 995db0689ddd..89bf6f64b078 100644
--- a/drivers/media/dvb-frontends/stv0367_priv.h
+++ b/drivers/media/dvb-frontends/stv0367_priv.h
@@ -188,7 +188,7 @@ struct stv0367_cab_signal_info {
 	u32 frequency; /* kHz */
 	u32 symbol_rate; /* Mbds */
 	enum stv0367cab_mod modulation;
-	fe_spectral_inversion_t spect_inv;
+	enum fe_spectral_inversion spect_inv;
 	s32 Power_dBmx10;	/* Power of the RF signal (dBm x 10) */
 	u32	CN_dBx10;	/* Carrier to noise ratio (dB x 10) */
 	u32	BER;		/* Bit error rate (x 10000000)	*/
diff --git a/drivers/media/dvb-frontends/stv0900_core.c b/drivers/media/dvb-frontends/stv0900_core.c
index 2c88abfab531..fe31dd541955 100644
--- a/drivers/media/dvb-frontends/stv0900_core.c
+++ b/drivers/media/dvb-frontends/stv0900_core.c
@@ -1744,7 +1744,8 @@ static int stv0900_send_master_cmd(struct dvb_frontend *fe,
 				state->demod);
 }
 
-static int stv0900_send_burst(struct dvb_frontend *fe, fe_sec_mini_cmd_t burst)
+static int stv0900_send_burst(struct dvb_frontend *fe,
+			      enum fe_sec_mini_cmd burst)
 {
 	struct stv0900_state *state = fe->demodulator_priv;
 	struct stv0900_internal *intp = state->internal;
@@ -1793,7 +1794,8 @@ static int stv0900_recv_slave_reply(struct dvb_frontend *fe,
 	return 0;
 }
 
-static int stv0900_set_tone(struct dvb_frontend *fe, fe_sec_tone_mode_t toneoff)
+static int stv0900_set_tone(struct dvb_frontend *fe,
+			    enum fe_sec_tone_mode toneoff)
 {
 	struct stv0900_state *state = fe->demodulator_priv;
 	struct stv0900_internal *intp = state->internal;
diff --git a/drivers/media/dvb-frontends/stv090x.c b/drivers/media/dvb-frontends/stv090x.c
index 0b2a934f53e5..25bdf6e0f963 100644
--- a/drivers/media/dvb-frontends/stv090x.c
+++ b/drivers/media/dvb-frontends/stv090x.c
@@ -3732,7 +3732,7 @@ static int stv090x_read_cnr(struct dvb_frontend *fe, u16 *cnr)
 	return 0;
 }
 
-static int stv090x_set_tone(struct dvb_frontend *fe, fe_sec_tone_mode_t tone)
+static int stv090x_set_tone(struct dvb_frontend *fe, enum fe_sec_tone_mode tone)
 {
 	struct stv090x_state *state = fe->demodulator_priv;
 	u32 reg;
@@ -3822,7 +3822,8 @@ err:
 	return -1;
 }
 
-static int stv090x_send_diseqc_burst(struct dvb_frontend *fe, fe_sec_mini_cmd_t burst)
+static int stv090x_send_diseqc_burst(struct dvb_frontend *fe,
+				     enum fe_sec_mini_cmd burst)
 {
 	struct stv090x_state *state = fe->demodulator_priv;
 	u32 reg, idle = 0, fifo_full = 1;
diff --git a/drivers/media/dvb-frontends/stv6110.c b/drivers/media/dvb-frontends/stv6110.c
index b1425830a24e..91c6dcf65d2a 100644
--- a/drivers/media/dvb-frontends/stv6110.c
+++ b/drivers/media/dvb-frontends/stv6110.c
@@ -158,7 +158,7 @@ static int stv6110_sleep(struct dvb_frontend *fe)
 	return 0;
 }
 
-static u32 carrier_width(u32 symbol_rate, fe_rolloff_t rolloff)
+static u32 carrier_width(u32 symbol_rate, enum fe_rolloff rolloff)
 {
 	u32 rlf;
 
diff --git a/drivers/media/dvb-frontends/tc90522.c b/drivers/media/dvb-frontends/tc90522.c
index dce22ce35d20..456cdc7fb1e7 100644
--- a/drivers/media/dvb-frontends/tc90522.c
+++ b/drivers/media/dvb-frontends/tc90522.c
@@ -130,7 +130,7 @@ static int tc90522t_set_layers(struct dvb_frontend *fe)
 
 /* frontend ops */
 
-static int tc90522s_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int tc90522s_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct tc90522_state *state;
 	int ret;
@@ -158,7 +158,7 @@ static int tc90522s_read_status(struct dvb_frontend *fe, fe_status_t *status)
 	return 0;
 }
 
-static int tc90522t_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int tc90522t_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct tc90522_state *state;
 	int ret;
@@ -194,7 +194,7 @@ static int tc90522t_read_status(struct dvb_frontend *fe, fe_status_t *status)
 	return 0;
 }
 
-static const fe_code_rate_t fec_conv_sat[] = {
+static const enum fe_code_rate fec_conv_sat[] = {
 	FEC_NONE, /* unused */
 	FEC_1_2, /* for BPSK */
 	FEC_1_2, FEC_2_3, FEC_3_4, FEC_5_6, FEC_7_8, /* for QPSK */
@@ -238,7 +238,10 @@ static int tc90522s_get_frontend(struct dvb_frontend *fe)
 			c->layer[1].segment_count = 0;
 		else
 			c->layer[1].segment_count = val[4] & 0x3f; /* slots */
-		/* actually, BPSK if v==1, but not defined in fe_modulation_t */
+		/*
+		 * actually, BPSK if v==1, but not defined in
+		 * enum fe_modulation
+		 */
 		c->layer[1].modulation = QPSK;
 		layers = (v > 0) ? 2 : 1;
 	}
@@ -319,18 +322,18 @@ static int tc90522s_get_frontend(struct dvb_frontend *fe)
 }
 
 
-static const fe_transmit_mode_t tm_conv[] = {
+static const enum fe_transmit_mode tm_conv[] = {
 	TRANSMISSION_MODE_2K,
 	TRANSMISSION_MODE_4K,
 	TRANSMISSION_MODE_8K,
 	0
 };
 
-static const fe_code_rate_t fec_conv_ter[] = {
+static const enum fe_code_rate fec_conv_ter[] = {
 	FEC_1_2, FEC_2_3, FEC_3_4, FEC_5_6, FEC_7_8, 0, 0, 0
 };
 
-static const fe_modulation_t mod_conv[] = {
+static const enum fe_modulation mod_conv[] = {
 	DQPSK, QPSK, QAM_16, QAM_64, 0, 0, 0, 0
 };
 
diff --git a/drivers/media/dvb-frontends/tda10021.c b/drivers/media/dvb-frontends/tda10021.c
index 28d987068048..a684424e665a 100644
--- a/drivers/media/dvb-frontends/tda10021.c
+++ b/drivers/media/dvb-frontends/tda10021.c
@@ -129,8 +129,8 @@ static int unlock_tuner(struct tda10021_state* state)
 	return 0;
 }
 
-static int tda10021_setup_reg0 (struct tda10021_state* state, u8 reg0,
-				fe_spectral_inversion_t inversion)
+static int tda10021_setup_reg0(struct tda10021_state *state, u8 reg0,
+			       enum fe_spectral_inversion inversion)
 {
 	reg0 |= state->reg0 & 0x63;
 
@@ -308,7 +308,8 @@ static int tda10021_set_parameters(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int tda10021_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int tda10021_read_status(struct dvb_frontend *fe,
+				enum fe_status *status)
 {
 	struct tda10021_state* state = fe->demodulator_priv;
 	int sync;
diff --git a/drivers/media/dvb-frontends/tda10023.c b/drivers/media/dvb-frontends/tda10023.c
index f92fbbbb4a71..44a55656093f 100644
--- a/drivers/media/dvb-frontends/tda10023.c
+++ b/drivers/media/dvb-frontends/tda10023.c
@@ -376,7 +376,8 @@ static int tda10023_set_parameters(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int tda10023_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int tda10023_read_status(struct dvb_frontend *fe,
+				enum fe_status *status)
 {
 	struct tda10023_state* state = fe->demodulator_priv;
 	int sync;
diff --git a/drivers/media/dvb-frontends/tda10048.c b/drivers/media/dvb-frontends/tda10048.c
index 71fb63299de7..8451086c563f 100644
--- a/drivers/media/dvb-frontends/tda10048.c
+++ b/drivers/media/dvb-frontends/tda10048.c
@@ -792,7 +792,7 @@ static int tda10048_init(struct dvb_frontend *fe)
 	return ret;
 }
 
-static int tda10048_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int tda10048_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct tda10048_state *state = fe->demodulator_priv;
 	u8 reg;
diff --git a/drivers/media/dvb-frontends/tda1004x.c b/drivers/media/dvb-frontends/tda1004x.c
index d2b8ecbea81e..0e209b56c76c 100644
--- a/drivers/media/dvb-frontends/tda1004x.c
+++ b/drivers/media/dvb-frontends/tda1004x.c
@@ -1005,7 +1005,8 @@ static int tda1004x_get_fe(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int tda1004x_read_status(struct dvb_frontend* fe, fe_status_t * fe_status)
+static int tda1004x_read_status(struct dvb_frontend *fe,
+				enum fe_status *fe_status)
 {
 	struct tda1004x_state* state = fe->demodulator_priv;
 	int status;
diff --git a/drivers/media/dvb-frontends/tda10071.c b/drivers/media/dvb-frontends/tda10071.c
index 1470a5d63f58..f6dc6307d35a 100644
--- a/drivers/media/dvb-frontends/tda10071.c
+++ b/drivers/media/dvb-frontends/tda10071.c
@@ -203,7 +203,7 @@ error:
 }
 
 static int tda10071_set_tone(struct dvb_frontend *fe,
-	fe_sec_tone_mode_t fe_sec_tone_mode)
+	enum fe_sec_tone_mode fe_sec_tone_mode)
 {
 	struct tda10071_priv *priv = fe->demodulator_priv;
 	struct tda10071_cmd cmd;
@@ -249,7 +249,7 @@ error:
 }
 
 static int tda10071_set_voltage(struct dvb_frontend *fe,
-	fe_sec_voltage_t fe_sec_voltage)
+	enum fe_sec_voltage fe_sec_voltage)
 {
 	struct tda10071_priv *priv = fe->demodulator_priv;
 	struct tda10071_cmd cmd;
@@ -413,7 +413,7 @@ error:
 }
 
 static int tda10071_diseqc_send_burst(struct dvb_frontend *fe,
-	fe_sec_mini_cmd_t fe_sec_mini_cmd)
+	enum fe_sec_mini_cmd fe_sec_mini_cmd)
 {
 	struct tda10071_priv *priv = fe->demodulator_priv;
 	struct tda10071_cmd cmd;
@@ -476,7 +476,7 @@ error:
 	return ret;
 }
 
-static int tda10071_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int tda10071_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct tda10071_priv *priv = fe->demodulator_priv;
 	int ret;
@@ -668,7 +668,7 @@ static int tda10071_set_frontend(struct dvb_frontend *fe)
 	struct dtv_frontend_properties *c = &fe->dtv_property_cache;
 	int ret, i;
 	u8 mode, rolloff, pilot, inversion, div;
-	fe_modulation_t modulation;
+	enum fe_modulation modulation;
 
 	dev_dbg(&priv->i2c->dev,
 			"%s: delivery_system=%d modulation=%d frequency=%d symbol_rate=%d inversion=%d pilot=%d rolloff=%d\n",
diff --git a/drivers/media/dvb-frontends/tda10071_priv.h b/drivers/media/dvb-frontends/tda10071_priv.h
index 7ec69ac510d7..54d7c713eec8 100644
--- a/drivers/media/dvb-frontends/tda10071_priv.h
+++ b/drivers/media/dvb-frontends/tda10071_priv.h
@@ -34,15 +34,15 @@ struct tda10071_priv {
 	u8 meas_count[2];
 	u32 ber;
 	u32 ucb;
-	fe_status_t fe_status;
-	fe_delivery_system_t delivery_system;
+	enum fe_status fe_status;
+	enum fe_delivery_system delivery_system;
 	bool warm; /* FW running */
 };
 
 static struct tda10071_modcod {
-	fe_delivery_system_t delivery_system;
-	fe_modulation_t modulation;
-	fe_code_rate_t fec;
+	enum fe_delivery_system delivery_system;
+	enum fe_modulation modulation;
+	enum fe_code_rate fec;
 	u8 val;
 } TDA10071_MODCOD[] = {
 	/* NBC-QPSK */
diff --git a/drivers/media/dvb-frontends/tda10086.c b/drivers/media/dvb-frontends/tda10086.c
index f1a752187d08..95a33e187f8e 100644
--- a/drivers/media/dvb-frontends/tda10086.c
+++ b/drivers/media/dvb-frontends/tda10086.c
@@ -185,7 +185,8 @@ static void tda10086_diseqc_wait(struct tda10086_state *state)
 	}
 }
 
-static int tda10086_set_tone (struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
+static int tda10086_set_tone(struct dvb_frontend *fe,
+			     enum fe_sec_tone_mode tone)
 {
 	struct tda10086_state* state = fe->demodulator_priv;
 	u8 t22k_off = 0x80;
@@ -238,7 +239,8 @@ static int tda10086_send_master_cmd (struct dvb_frontend* fe,
 	return 0;
 }
 
-static int tda10086_send_burst (struct dvb_frontend* fe, fe_sec_mini_cmd_t minicmd)
+static int tda10086_send_burst(struct dvb_frontend *fe,
+			       enum fe_sec_mini_cmd minicmd)
 {
 	struct tda10086_state* state = fe->demodulator_priv;
 	u8 oldval = tda10086_read_byte(state, 0x36);
@@ -551,7 +553,8 @@ static int tda10086_get_frontend(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int tda10086_read_status(struct dvb_frontend* fe, fe_status_t *fe_status)
+static int tda10086_read_status(struct dvb_frontend *fe,
+				enum fe_status *fe_status)
 {
 	struct tda10086_state* state = fe->demodulator_priv;
 	u8 val;
diff --git a/drivers/media/dvb-frontends/tda8083.c b/drivers/media/dvb-frontends/tda8083.c
index 69e62f42e2e1..796543fa2c8d 100644
--- a/drivers/media/dvb-frontends/tda8083.c
+++ b/drivers/media/dvb-frontends/tda8083.c
@@ -97,7 +97,8 @@ static inline u8 tda8083_readreg (struct tda8083_state* state, u8 reg)
 	return val;
 }
 
-static int tda8083_set_inversion (struct tda8083_state* state, fe_spectral_inversion_t inversion)
+static int tda8083_set_inversion(struct tda8083_state *state,
+				 enum fe_spectral_inversion inversion)
 {
 	/*  XXX FIXME: implement other modes than FEC_AUTO */
 	if (inversion == INVERSION_AUTO)
@@ -106,7 +107,7 @@ static int tda8083_set_inversion (struct tda8083_state* state, fe_spectral_inver
 	return -EINVAL;
 }
 
-static int tda8083_set_fec (struct tda8083_state* state, fe_code_rate_t fec)
+static int tda8083_set_fec(struct tda8083_state *state, enum fe_code_rate fec)
 {
 	if (fec == FEC_AUTO)
 		return tda8083_writereg (state, 0x07, 0xff);
@@ -117,11 +118,13 @@ static int tda8083_set_fec (struct tda8083_state* state, fe_code_rate_t fec)
 	return -EINVAL;
 }
 
-static fe_code_rate_t tda8083_get_fec (struct tda8083_state* state)
+static enum fe_code_rate tda8083_get_fec(struct tda8083_state *state)
 {
 	u8 index;
-	static fe_code_rate_t fec_tab [] = { FEC_8_9, FEC_1_2, FEC_2_3, FEC_3_4,
-				       FEC_4_5, FEC_5_6, FEC_6_7, FEC_7_8 };
+	static enum fe_code_rate fec_tab[] = {
+		FEC_8_9, FEC_1_2, FEC_2_3, FEC_3_4,
+		FEC_4_5, FEC_5_6, FEC_6_7, FEC_7_8
+	};
 
 	index = tda8083_readreg(state, 0x0e) & 0x07;
 
@@ -178,7 +181,8 @@ static void tda8083_wait_diseqc_fifo (struct tda8083_state* state, int timeout)
 	}
 }
 
-static int tda8083_set_tone (struct tda8083_state* state, fe_sec_tone_mode_t tone)
+static int tda8083_set_tone(struct tda8083_state *state,
+			    enum fe_sec_tone_mode tone)
 {
 	tda8083_writereg (state, 0x26, 0xf1);
 
@@ -192,7 +196,8 @@ static int tda8083_set_tone (struct tda8083_state* state, fe_sec_tone_mode_t ton
 	}
 }
 
-static int tda8083_set_voltage (struct tda8083_state* state, fe_sec_voltage_t voltage)
+static int tda8083_set_voltage(struct tda8083_state *state,
+			       enum fe_sec_voltage voltage)
 {
 	switch (voltage) {
 	case SEC_VOLTAGE_13:
@@ -204,7 +209,8 @@ static int tda8083_set_voltage (struct tda8083_state* state, fe_sec_voltage_t vo
 	}
 }
 
-static int tda8083_send_diseqc_burst (struct tda8083_state* state, fe_sec_mini_cmd_t burst)
+static int tda8083_send_diseqc_burst(struct tda8083_state *state,
+				     enum fe_sec_mini_cmd burst)
 {
 	switch (burst) {
 	case SEC_MINI_A:
@@ -222,8 +228,8 @@ static int tda8083_send_diseqc_burst (struct tda8083_state* state, fe_sec_mini_c
 	return 0;
 }
 
-static int tda8083_send_diseqc_msg (struct dvb_frontend* fe,
-				    struct dvb_diseqc_master_cmd *m)
+static int tda8083_send_diseqc_msg(struct dvb_frontend *fe,
+				   struct dvb_diseqc_master_cmd *m)
 {
 	struct tda8083_state* state = fe->demodulator_priv;
 	int i;
@@ -240,7 +246,8 @@ static int tda8083_send_diseqc_msg (struct dvb_frontend* fe,
 	return 0;
 }
 
-static int tda8083_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int tda8083_read_status(struct dvb_frontend *fe,
+			       enum fe_status *status)
 {
 	struct tda8083_state* state = fe->demodulator_priv;
 
@@ -372,7 +379,8 @@ static int tda8083_init(struct dvb_frontend* fe)
 	return 0;
 }
 
-static int tda8083_diseqc_send_burst(struct dvb_frontend* fe, fe_sec_mini_cmd_t burst)
+static int tda8083_diseqc_send_burst(struct dvb_frontend *fe,
+				     enum fe_sec_mini_cmd burst)
 {
 	struct tda8083_state* state = fe->demodulator_priv;
 
@@ -383,7 +391,8 @@ static int tda8083_diseqc_send_burst(struct dvb_frontend* fe, fe_sec_mini_cmd_t
 	return 0;
 }
 
-static int tda8083_diseqc_set_tone(struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
+static int tda8083_diseqc_set_tone(struct dvb_frontend *fe,
+				   enum fe_sec_tone_mode tone)
 {
 	struct tda8083_state* state = fe->demodulator_priv;
 
@@ -394,7 +403,8 @@ static int tda8083_diseqc_set_tone(struct dvb_frontend* fe, fe_sec_tone_mode_t t
 	return 0;
 }
 
-static int tda8083_diseqc_set_voltage(struct dvb_frontend* fe, fe_sec_voltage_t voltage)
+static int tda8083_diseqc_set_voltage(struct dvb_frontend *fe,
+				      enum fe_sec_voltage voltage)
 {
 	struct tda8083_state* state = fe->demodulator_priv;
 
diff --git a/drivers/media/dvb-frontends/ves1820.c b/drivers/media/dvb-frontends/ves1820.c
index bb42b563c42d..aacfdda3e005 100644
--- a/drivers/media/dvb-frontends/ves1820.c
+++ b/drivers/media/dvb-frontends/ves1820.c
@@ -90,7 +90,8 @@ static u8 ves1820_readreg(struct ves1820_state *state, u8 reg)
 	return b1[0];
 }
 
-static int ves1820_setup_reg0(struct ves1820_state *state, u8 reg0, fe_spectral_inversion_t inversion)
+static int ves1820_setup_reg0(struct ves1820_state *state,
+			      u8 reg0, enum fe_spectral_inversion inversion)
 {
 	reg0 |= state->reg0 & 0x62;
 
@@ -237,7 +238,8 @@ static int ves1820_set_parameters(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int ves1820_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int ves1820_read_status(struct dvb_frontend *fe,
+			       enum fe_status *status)
 {
 	struct ves1820_state* state = fe->demodulator_priv;
 	int sync;
diff --git a/drivers/media/dvb-frontends/ves1x93.c b/drivers/media/dvb-frontends/ves1x93.c
index 9c17eacaec24..526952396422 100644
--- a/drivers/media/dvb-frontends/ves1x93.c
+++ b/drivers/media/dvb-frontends/ves1x93.c
@@ -41,7 +41,7 @@ struct ves1x93_state {
 	struct dvb_frontend frontend;
 
 	/* previous uncorrected block counter */
-	fe_spectral_inversion_t inversion;
+	enum fe_spectral_inversion inversion;
 	u8 *init_1x93_tab;
 	u8 *init_1x93_wtab;
 	u8 tab_size;
@@ -130,7 +130,8 @@ static int ves1x93_clr_bit (struct ves1x93_state* state)
 	return 0;
 }
 
-static int ves1x93_set_inversion (struct ves1x93_state* state, fe_spectral_inversion_t inversion)
+static int ves1x93_set_inversion(struct ves1x93_state *state,
+				 enum fe_spectral_inversion inversion)
 {
 	u8 val;
 
@@ -156,7 +157,7 @@ static int ves1x93_set_inversion (struct ves1x93_state* state, fe_spectral_inver
 	return ves1x93_writereg (state, 0x0c, (state->init_1x93_tab[0x0c] & 0x3f) | val);
 }
 
-static int ves1x93_set_fec (struct ves1x93_state* state, fe_code_rate_t fec)
+static int ves1x93_set_fec(struct ves1x93_state *state, enum fe_code_rate fec)
 {
 	if (fec == FEC_AUTO)
 		return ves1x93_writereg (state, 0x0d, 0x08);
@@ -166,7 +167,7 @@ static int ves1x93_set_fec (struct ves1x93_state* state, fe_code_rate_t fec)
 		return ves1x93_writereg (state, 0x0d, fec - FEC_1_2);
 }
 
-static fe_code_rate_t ves1x93_get_fec (struct ves1x93_state* state)
+static enum fe_code_rate ves1x93_get_fec(struct ves1x93_state *state)
 {
 	return FEC_1_2 + ((ves1x93_readreg (state, 0x0d) >> 4) & 0x7);
 }
@@ -281,7 +282,8 @@ static int ves1x93_init (struct dvb_frontend* fe)
 	return 0;
 }
 
-static int ves1x93_set_voltage (struct dvb_frontend* fe, fe_sec_voltage_t voltage)
+static int ves1x93_set_voltage(struct dvb_frontend *fe,
+			       enum fe_sec_voltage voltage)
 {
 	struct ves1x93_state* state = fe->demodulator_priv;
 
@@ -297,7 +299,8 @@ static int ves1x93_set_voltage (struct dvb_frontend* fe, fe_sec_voltage_t voltag
 	}
 }
 
-static int ves1x93_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int ves1x93_read_status(struct dvb_frontend *fe,
+			       enum fe_status *status)
 {
 	struct ves1x93_state* state = fe->demodulator_priv;
 
diff --git a/drivers/media/dvb-frontends/zl10353.c b/drivers/media/dvb-frontends/zl10353.c
index 4e62a6611847..ef9764a02d4c 100644
--- a/drivers/media/dvb-frontends/zl10353.c
+++ b/drivers/media/dvb-frontends/zl10353.c
@@ -462,7 +462,7 @@ static int zl10353_get_parameters(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int zl10353_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int zl10353_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct zl10353_state *state = fe->demodulator_priv;
 	int s6, s7, s8;
diff --git a/drivers/media/firewire/firedtv-fe.c b/drivers/media/firewire/firedtv-fe.c
index 6fe9793b98b3..17acda6bcb6e 100644
--- a/drivers/media/firewire/firedtv-fe.c
+++ b/drivers/media/firewire/firedtv-fe.c
@@ -61,12 +61,12 @@ static int fdtv_diseqc_send_master_cmd(struct dvb_frontend *fe,
 }
 
 static int fdtv_diseqc_send_burst(struct dvb_frontend *fe,
-				  fe_sec_mini_cmd_t minicmd)
+				  enum fe_sec_mini_cmd minicmd)
 {
 	return 0;
 }
 
-static int fdtv_set_tone(struct dvb_frontend *fe, fe_sec_tone_mode_t tone)
+static int fdtv_set_tone(struct dvb_frontend *fe, enum fe_sec_tone_mode tone)
 {
 	struct firedtv *fdtv = fe->sec_priv;
 
@@ -75,7 +75,7 @@ static int fdtv_set_tone(struct dvb_frontend *fe, fe_sec_tone_mode_t tone)
 }
 
 static int fdtv_set_voltage(struct dvb_frontend *fe,
-			    fe_sec_voltage_t voltage)
+			    enum fe_sec_voltage voltage)
 {
 	struct firedtv *fdtv = fe->sec_priv;
 
@@ -83,7 +83,7 @@ static int fdtv_set_voltage(struct dvb_frontend *fe,
 	return 0;
 }
 
-static int fdtv_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int fdtv_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct firedtv *fdtv = fe->sec_priv;
 	struct firedtv_tuner_status stat;
diff --git a/drivers/media/firewire/firedtv.h b/drivers/media/firewire/firedtv.h
index 346a85be6de2..345d1eda8c05 100644
--- a/drivers/media/firewire/firedtv.h
+++ b/drivers/media/firewire/firedtv.h
@@ -99,8 +99,8 @@ struct firedtv {
 	s8			isochannel;
 	struct fdtv_ir_context	*ir_context;
 
-	fe_sec_voltage_t	voltage;
-	fe_sec_tone_mode_t	tone;
+	enum fe_sec_voltage	voltage;
+	enum fe_sec_tone_mode	tone;
 
 	struct mutex		demux_mutex;
 	unsigned long		channel_active;
diff --git a/drivers/media/pci/bt8xx/dst.c b/drivers/media/pci/bt8xx/dst.c
index f2261dfe5d1a..4a90eee5e3bb 100644
--- a/drivers/media/pci/bt8xx/dst.c
+++ b/drivers/media/pci/bt8xx/dst.c
@@ -425,7 +425,8 @@ static int dst_set_bandwidth(struct dst_state *state, u32 bandwidth)
 	return 0;
 }
 
-static int dst_set_inversion(struct dst_state *state, fe_spectral_inversion_t inversion)
+static int dst_set_inversion(struct dst_state *state,
+			     enum fe_spectral_inversion inversion)
 {
 	state->inversion = inversion;
 	switch (inversion) {
@@ -442,13 +443,13 @@ static int dst_set_inversion(struct dst_state *state, fe_spectral_inversion_t in
 	return 0;
 }
 
-static int dst_set_fec(struct dst_state *state, fe_code_rate_t fec)
+static int dst_set_fec(struct dst_state *state, enum fe_code_rate fec)
 {
 	state->fec = fec;
 	return 0;
 }
 
-static fe_code_rate_t dst_get_fec(struct dst_state *state)
+static enum fe_code_rate dst_get_fec(struct dst_state *state)
 {
 	return state->fec;
 }
@@ -499,7 +500,8 @@ static int dst_set_symbolrate(struct dst_state *state, u32 srate)
 	return 0;
 }
 
-static int dst_set_modulation(struct dst_state *state, fe_modulation_t modulation)
+static int dst_set_modulation(struct dst_state *state,
+			      enum fe_modulation modulation)
 {
 	if (state->dst_type != DST_TYPE_IS_CABLE)
 		return -EOPNOTSUPP;
@@ -536,7 +538,7 @@ static int dst_set_modulation(struct dst_state *state, fe_modulation_t modulatio
 	return 0;
 }
 
-static fe_modulation_t dst_get_modulation(struct dst_state *state)
+static enum fe_modulation dst_get_modulation(struct dst_state *state)
 {
 	return state->modulation;
 }
@@ -1376,7 +1378,8 @@ static int dst_get_tuna(struct dst_state *state)
 	return 1;
 }
 
-static int dst_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage);
+static int dst_set_voltage(struct dvb_frontend *fe,
+			   enum fe_sec_voltage voltage);
 
 static int dst_write_tuna(struct dvb_frontend *fe)
 {
@@ -1466,7 +1469,7 @@ static int dst_set_diseqc(struct dvb_frontend *fe, struct dvb_diseqc_master_cmd
 	return dst_command(state, paket, 8);
 }
 
-static int dst_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+static int dst_set_voltage(struct dvb_frontend *fe, enum fe_sec_voltage voltage)
 {
 	int need_cmd, retval = 0;
 	struct dst_state *state = fe->demodulator_priv;
@@ -1500,7 +1503,7 @@ static int dst_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
 	return retval;
 }
 
-static int dst_set_tone(struct dvb_frontend *fe, fe_sec_tone_mode_t tone)
+static int dst_set_tone(struct dvb_frontend *fe, enum fe_sec_tone_mode tone)
 {
 	struct dst_state *state = fe->demodulator_priv;
 
@@ -1525,7 +1528,7 @@ static int dst_set_tone(struct dvb_frontend *fe, fe_sec_tone_mode_t tone)
 	return dst_tone_power_cmd(state);
 }
 
-static int dst_send_burst(struct dvb_frontend *fe, fe_sec_mini_cmd_t minicmd)
+static int dst_send_burst(struct dvb_frontend *fe, enum fe_sec_mini_cmd minicmd)
 {
 	struct dst_state *state = fe->demodulator_priv;
 
@@ -1575,7 +1578,7 @@ static int bt8xx_dst_init(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int dst_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int dst_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct dst_state *state = fe->demodulator_priv;
 
@@ -1646,7 +1649,7 @@ static int dst_tune_frontend(struct dvb_frontend* fe,
 			    bool re_tune,
 			    unsigned int mode_flags,
 			    unsigned int *delay,
-			    fe_status_t *status)
+			    enum fe_status *status)
 {
 	struct dst_state *state = fe->demodulator_priv;
 	struct dtv_frontend_properties *p = &fe->dtv_property_cache;
diff --git a/drivers/media/pci/bt8xx/dst_common.h b/drivers/media/pci/bt8xx/dst_common.h
index d70d98f1a571..6a2cfdd44e3e 100644
--- a/drivers/media/pci/bt8xx/dst_common.h
+++ b/drivers/media/pci/bt8xx/dst_common.h
@@ -113,11 +113,11 @@ struct dst_state {
 	u8 dst_type;
 	u32 type_flags;
 	u32 frequency;		/* intermediate frequency in kHz for QPSK */
-	fe_spectral_inversion_t inversion;
+	enum fe_spectral_inversion inversion;
 	u32 symbol_rate;	/* symbol rate in Symbols per second */
-	fe_code_rate_t fec;
-	fe_sec_voltage_t voltage;
-	fe_sec_tone_mode_t tone;
+	enum fe_code_rate fec;
+	enum fe_sec_voltage voltage;
+	enum fe_sec_tone_mode tone;
 	u32 decode_freq;
 	u8 decode_lock;
 	u16 decode_strength;
@@ -127,8 +127,8 @@ struct dst_state {
 	u32 bandwidth;
 	u32 dst_hw_cap;
 	u8 dst_fw_version;
-	fe_sec_mini_cmd_t minicmd;
-	fe_modulation_t modulation;
+	enum fe_sec_mini_cmd minicmd;
+	enum fe_modulation modulation;
 	u8 messages[256];
 	u8 mac_address[8];
 	u8 fw_version[8];
diff --git a/drivers/media/pci/cx23885/cx23885-dvb.c b/drivers/media/pci/cx23885/cx23885-dvb.c
index 9f377ad5e845..a77c2d3b50fb 100644
--- a/drivers/media/pci/cx23885/cx23885-dvb.c
+++ b/drivers/media/pci/cx23885/cx23885-dvb.c
@@ -572,7 +572,8 @@ static struct stb6100_config prof_8000_stb6100_config = {
 	.refclock = 27000000,
 };
 
-static int p8000_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+static int p8000_set_voltage(struct dvb_frontend *fe,
+			     enum fe_sec_voltage voltage)
 {
 	struct cx23885_tsport *port = fe->dvb->priv;
 	struct cx23885_dev *dev = port->dev;
@@ -587,7 +588,7 @@ static int p8000_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
 }
 
 static int dvbsky_t9580_set_voltage(struct dvb_frontend *fe,
-					fe_sec_voltage_t voltage)
+					enum fe_sec_voltage voltage)
 {
 	struct cx23885_tsport *port = fe->dvb->priv;
 	struct cx23885_dev *dev = port->dev;
@@ -616,7 +617,7 @@ static int dvbsky_t9580_set_voltage(struct dvb_frontend *fe,
 }
 
 static int dvbsky_s952_portc_set_voltage(struct dvb_frontend *fe,
-					fe_sec_voltage_t voltage)
+					enum fe_sec_voltage voltage)
 {
 	struct cx23885_tsport *port = fe->dvb->priv;
 	struct cx23885_dev *dev = port->dev;
@@ -1186,7 +1187,8 @@ static int dvb_register(struct cx23885_tsport *port)
 	struct i2c_client *client_demod = NULL, *client_tuner = NULL;
 	struct i2c_client *client_sec = NULL;
 	const struct m88ds3103_config *p_m88ds3103_config = NULL;
-	int (*p_set_voltage)(struct dvb_frontend *fe, fe_sec_voltage_t voltage) = NULL;
+	int (*p_set_voltage)(struct dvb_frontend *fe,
+			     enum fe_sec_voltage voltage) = NULL;
 	int mfe_shared = 0; /* bus not shared by default */
 	int ret;
 
diff --git a/drivers/media/pci/cx23885/cx23885-f300.c b/drivers/media/pci/cx23885/cx23885-f300.c
index 6f817d8732da..a6c45eb0a105 100644
--- a/drivers/media/pci/cx23885/cx23885-f300.c
+++ b/drivers/media/pci/cx23885/cx23885-f300.c
@@ -144,7 +144,7 @@ static u8 f300_xfer(struct dvb_frontend *fe, u8 *buf)
 	return ret;
 }
 
-int f300_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+int f300_set_voltage(struct dvb_frontend *fe, enum fe_sec_voltage voltage)
 {
 	u8 buf[16];
 
diff --git a/drivers/media/pci/cx23885/cx23885-f300.h b/drivers/media/pci/cx23885/cx23885-f300.h
index e73344c94963..be14d7de7cd8 100644
--- a/drivers/media/pci/cx23885/cx23885-f300.h
+++ b/drivers/media/pci/cx23885/cx23885-f300.h
@@ -1,2 +1,2 @@
 extern int f300_set_voltage(struct dvb_frontend *fe,
-				fe_sec_voltage_t voltage);
+			    enum fe_sec_voltage voltage);
diff --git a/drivers/media/pci/cx23885/cx23885.h b/drivers/media/pci/cx23885/cx23885.h
index 81e25194986b..027ead438194 100644
--- a/drivers/media/pci/cx23885/cx23885.h
+++ b/drivers/media/pci/cx23885/cx23885.h
@@ -309,7 +309,7 @@ struct cx23885_tsport {
 
 	int (*set_frontend)(struct dvb_frontend *fe);
 	int (*fe_set_voltage)(struct dvb_frontend *fe,
-				fe_sec_voltage_t voltage);
+			      enum fe_sec_voltage voltage);
 };
 
 struct cx23885_kernel_ir {
diff --git a/drivers/media/pci/cx88/cx88-dvb.c b/drivers/media/pci/cx88/cx88-dvb.c
index 1b2ed238cdb6..9dfa5ee32a8f 100644
--- a/drivers/media/pci/cx88/cx88-dvb.c
+++ b/drivers/media/pci/cx88/cx88-dvb.c
@@ -449,7 +449,7 @@ static int cx24123_set_ts_param(struct dvb_frontend* fe,
 }
 
 static int kworld_dvbs_100_set_voltage(struct dvb_frontend* fe,
-				       fe_sec_voltage_t voltage)
+				       enum fe_sec_voltage voltage)
 {
 	struct cx8802_dev *dev= fe->dvb->priv;
 	struct cx88_core *core = dev->core;
@@ -465,7 +465,7 @@ static int kworld_dvbs_100_set_voltage(struct dvb_frontend* fe,
 }
 
 static int geniatech_dvbs_set_voltage(struct dvb_frontend *fe,
-				      fe_sec_voltage_t voltage)
+				      enum fe_sec_voltage voltage)
 {
 	struct cx8802_dev *dev= fe->dvb->priv;
 	struct cx88_core *core = dev->core;
@@ -481,7 +481,7 @@ static int geniatech_dvbs_set_voltage(struct dvb_frontend *fe,
 }
 
 static int tevii_dvbs_set_voltage(struct dvb_frontend *fe,
-				      fe_sec_voltage_t voltage)
+				  enum fe_sec_voltage voltage)
 {
 	struct cx8802_dev *dev= fe->dvb->priv;
 	struct cx88_core *core = dev->core;
@@ -505,7 +505,7 @@ static int tevii_dvbs_set_voltage(struct dvb_frontend *fe,
 }
 
 static int vp1027_set_voltage(struct dvb_frontend *fe,
-				    fe_sec_voltage_t voltage)
+			      enum fe_sec_voltage voltage)
 {
 	struct cx8802_dev *dev = fe->dvb->priv;
 	struct cx88_core *core = dev->core;
@@ -897,7 +897,7 @@ static int samsung_smt_7020_tuner_set_params(struct dvb_frontend *fe)
 }
 
 static int samsung_smt_7020_set_tone(struct dvb_frontend *fe,
-	fe_sec_tone_mode_t tone)
+	enum fe_sec_tone_mode tone)
 {
 	struct cx8802_dev *dev = fe->dvb->priv;
 	struct cx88_core *core = dev->core;
@@ -919,7 +919,7 @@ static int samsung_smt_7020_set_tone(struct dvb_frontend *fe,
 }
 
 static int samsung_smt_7020_set_voltage(struct dvb_frontend *fe,
-	fe_sec_voltage_t voltage)
+					enum fe_sec_voltage voltage)
 {
 	struct cx8802_dev *dev = fe->dvb->priv;
 	struct cx88_core *core = dev->core;
diff --git a/drivers/media/pci/cx88/cx88.h b/drivers/media/pci/cx88/cx88.h
index e75547827c52..785fe2e0d702 100644
--- a/drivers/media/pci/cx88/cx88.h
+++ b/drivers/media/pci/cx88/cx88.h
@@ -375,9 +375,10 @@ struct cx88_core {
 
 	/* config info -- dvb */
 #if IS_ENABLED(CONFIG_VIDEO_CX88_DVB)
-	int 			   (*prev_set_voltage)(struct dvb_frontend *fe, fe_sec_voltage_t voltage);
+	int	(*prev_set_voltage)(struct dvb_frontend *fe,
+				    enum fe_sec_voltage voltage);
 #endif
-	void			   (*gate_ctrl)(struct cx88_core  *core, int open);
+	void	(*gate_ctrl)(struct cx88_core *core, int open);
 
 	/* state info */
 	struct task_struct         *kthread;
diff --git a/drivers/media/pci/dm1105/dm1105.c b/drivers/media/pci/dm1105/dm1105.c
index ed11716731e9..88915fb87e80 100644
--- a/drivers/media/pci/dm1105/dm1105.c
+++ b/drivers/media/pci/dm1105/dm1105.c
@@ -591,7 +591,8 @@ static inline struct dm1105_dev *frontend_to_dm1105_dev(struct dvb_frontend *fe)
 	return container_of(fe->dvb, struct dm1105_dev, dvb_adapter);
 }
 
-static int dm1105_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+static int dm1105_set_voltage(struct dvb_frontend *fe,
+			      enum fe_sec_voltage voltage)
 {
 	struct dm1105_dev *dev = frontend_to_dm1105_dev(fe);
 
diff --git a/drivers/media/pci/mantis/mantis_vp1034.c b/drivers/media/pci/mantis/mantis_vp1034.c
index 7c1bd167225c..3b1928594b12 100644
--- a/drivers/media/pci/mantis/mantis_vp1034.c
+++ b/drivers/media/pci/mantis/mantis_vp1034.c
@@ -44,7 +44,7 @@ static struct mb86a16_config vp1034_mb86a16_config = {
 #define MANTIS_MODEL_NAME	"VP-1034"
 #define MANTIS_DEV_TYPE		"DVB-S/DSS"
 
-int vp1034_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+int vp1034_set_voltage(struct dvb_frontend *fe, enum fe_sec_voltage voltage)
 {
 	struct mantis_pci *mantis = fe->dvb->priv;
 
diff --git a/drivers/media/pci/mantis/mantis_vp1034.h b/drivers/media/pci/mantis/mantis_vp1034.h
index 323f38ef8e3d..764b1c66ea1b 100644
--- a/drivers/media/pci/mantis/mantis_vp1034.h
+++ b/drivers/media/pci/mantis/mantis_vp1034.h
@@ -28,6 +28,7 @@
 #define MANTIS_VP_1034_DVB_S	0x0014
 
 extern struct mantis_hwconfig vp1034_config;
-extern int vp1034_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage);
+extern int vp1034_set_voltage(struct dvb_frontend *fe,
+			      enum fe_sec_voltage voltage);
 
 #endif /* __MANTIS_VP1034_H */
diff --git a/drivers/media/pci/ngene/ngene.h b/drivers/media/pci/ngene/ngene.h
index 51e2fbd18b1b..fa30930d7047 100644
--- a/drivers/media/pci/ngene/ngene.h
+++ b/drivers/media/pci/ngene/ngene.h
@@ -682,7 +682,7 @@ struct ngene_channel {
 	int                   AudioDTOUpdated;
 	u32                   AudioDTOValue;
 
-	int (*set_tone)(struct dvb_frontend *, fe_sec_tone_mode_t);
+	int (*set_tone)(struct dvb_frontend *, enum fe_sec_tone_mode);
 	u8 lnbh;
 
 	/* stuff from analog driver */
diff --git a/drivers/media/pci/pt1/pt1.c b/drivers/media/pci/pt1/pt1.c
index acc35b42e53c..e7e4428109c3 100644
--- a/drivers/media/pci/pt1/pt1.c
+++ b/drivers/media/pci/pt1/pt1.c
@@ -101,11 +101,11 @@ struct pt1_adapter {
 	struct dmxdev dmxdev;
 	struct dvb_frontend *fe;
 	int (*orig_set_voltage)(struct dvb_frontend *fe,
-				fe_sec_voltage_t voltage);
+				enum fe_sec_voltage voltage);
 	int (*orig_sleep)(struct dvb_frontend *fe);
 	int (*orig_init)(struct dvb_frontend *fe);
 
-	fe_sec_voltage_t voltage;
+	enum fe_sec_voltage voltage;
 	int sleep;
 };
 
@@ -575,7 +575,7 @@ pt1_update_power(struct pt1 *pt1)
 	mutex_unlock(&pt1->lock);
 }
 
-static int pt1_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+static int pt1_set_voltage(struct dvb_frontend *fe, enum fe_sec_voltage voltage)
 {
 	struct pt1_adapter *adap;
 
diff --git a/drivers/media/pci/pt1/va1j5jf8007s.c b/drivers/media/pci/pt1/va1j5jf8007s.c
index 1b637b74ef58..d0e70dc0e16f 100644
--- a/drivers/media/pci/pt1/va1j5jf8007s.c
+++ b/drivers/media/pci/pt1/va1j5jf8007s.c
@@ -108,7 +108,7 @@ static int va1j5jf8007s_get_frontend_algo(struct dvb_frontend *fe)
 }
 
 static int
-va1j5jf8007s_read_status(struct dvb_frontend *fe, fe_status_t *status)
+va1j5jf8007s_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct va1j5jf8007s_state *state;
 
@@ -387,7 +387,7 @@ static int
 va1j5jf8007s_tune(struct dvb_frontend *fe,
 		  bool re_tune,
 		  unsigned int mode_flags,  unsigned int *delay,
-		  fe_status_t *status)
+		  enum fe_status *status)
 {
 	struct va1j5jf8007s_state *state;
 	int ret;
diff --git a/drivers/media/pci/pt1/va1j5jf8007t.c b/drivers/media/pci/pt1/va1j5jf8007t.c
index 2db15159d514..0268f20b8097 100644
--- a/drivers/media/pci/pt1/va1j5jf8007t.c
+++ b/drivers/media/pci/pt1/va1j5jf8007t.c
@@ -98,7 +98,7 @@ static int va1j5jf8007t_get_frontend_algo(struct dvb_frontend *fe)
 }
 
 static int
-va1j5jf8007t_read_status(struct dvb_frontend *fe, fe_status_t *status)
+va1j5jf8007t_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct va1j5jf8007t_state *state;
 
@@ -266,7 +266,7 @@ static int
 va1j5jf8007t_tune(struct dvb_frontend *fe,
 		  bool re_tune,
 		  unsigned int mode_flags,  unsigned int *delay,
-		  fe_status_t *status)
+		  enum fe_status *status)
 {
 	struct va1j5jf8007t_state *state;
 	int ret;
diff --git a/drivers/media/pci/pt3/pt3.c b/drivers/media/pci/pt3/pt3.c
index 7a37e8fe2ee2..0d2e2b217121 100644
--- a/drivers/media/pci/pt3/pt3.c
+++ b/drivers/media/pci/pt3/pt3.c
@@ -188,7 +188,7 @@ static int pt3_set_lna(struct dvb_frontend *fe)
 	return ret;
 }
 
-static int pt3_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t volt)
+static int pt3_set_voltage(struct dvb_frontend *fe, enum fe_sec_voltage volt)
 {
 	struct pt3_adapter *adap;
 	struct pt3_board *pt3;
diff --git a/drivers/media/pci/saa7134/saa7134-dvb.c b/drivers/media/pci/saa7134/saa7134-dvb.c
index d47fb22e12f2..101ba8729416 100644
--- a/drivers/media/pci/saa7134/saa7134-dvb.c
+++ b/drivers/media/pci/saa7134/saa7134-dvb.c
@@ -987,7 +987,8 @@ static struct tda10086_config sd1878_4m = {
  * special case: lnb supply is connected to the gated i2c
  */
 
-static int md8800_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+static int md8800_set_voltage(struct dvb_frontend *fe,
+			      enum fe_sec_voltage voltage)
 {
 	int res = -EIO;
 	struct saa7134_dev *dev = fe->dvb->priv;
@@ -1013,7 +1014,8 @@ static int md8800_set_high_voltage(struct dvb_frontend *fe, long arg)
 	return res;
 };
 
-static int md8800_set_voltage2(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+static int md8800_set_voltage2(struct dvb_frontend *fe,
+			       enum fe_sec_voltage voltage)
 {
 	struct saa7134_dev *dev = fe->dvb->priv;
 	u8 wbuf[2] = { 0x1f, 00 };
diff --git a/drivers/media/pci/saa7134/saa7134.h b/drivers/media/pci/saa7134/saa7134.h
index 6fec01711680..f682ba9b34a7 100644
--- a/drivers/media/pci/saa7134/saa7134.h
+++ b/drivers/media/pci/saa7134/saa7134.h
@@ -656,7 +656,8 @@ struct saa7134_dev {
 	/* SAA7134_MPEG_DVB only */
 	struct vb2_dvb_frontends frontends;
 	int (*original_demod_sleep)(struct dvb_frontend *fe);
-	int (*original_set_voltage)(struct dvb_frontend *fe, fe_sec_voltage_t voltage);
+	int (*original_set_voltage)(struct dvb_frontend *fe,
+				    enum fe_sec_voltage voltage);
 	int (*original_set_high_voltage)(struct dvb_frontend *fe, long arg);
 #endif
 	void (*gate_ctrl)(struct saa7134_dev *dev, int open);
diff --git a/drivers/media/pci/ttpci/av7110.c b/drivers/media/pci/ttpci/av7110.c
index 45199a12b9d9..3f24fce74fc1 100644
--- a/drivers/media/pci/ttpci/av7110.c
+++ b/drivers/media/pci/ttpci/av7110.c
@@ -1172,7 +1172,7 @@ static int dvb_get_stc(struct dmx_demux *demux, unsigned int num,
  ******************************************************************************/
 
 
-static int av7110_set_tone(struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
+static int av7110_set_tone(struct dvb_frontend *fe, enum fe_sec_tone_mode tone)
 {
 	struct av7110* av7110 = fe->dvb->priv;
 
@@ -1197,7 +1197,7 @@ static int av7110_diseqc_send_master_cmd(struct dvb_frontend* fe,
 }
 
 static int av7110_diseqc_send_burst(struct dvb_frontend* fe,
-				    fe_sec_mini_cmd_t minicmd)
+				    enum fe_sec_mini_cmd minicmd)
 {
 	struct av7110* av7110 = fe->dvb->priv;
 
@@ -1946,7 +1946,7 @@ static struct l64781_config grundig_29504_401_config = {
 
 
-static int av7110_fe_lock_fix(struct av7110* av7110, fe_status_t status)
+static int av7110_fe_lock_fix(struct av7110 *av7110, enum fe_status status)
 {
 	int ret = 0;
 	int synced = (status & FE_HAS_LOCK) ? 1 : 0;
@@ -2008,7 +2008,8 @@ static int av7110_fe_init(struct dvb_frontend* fe)
 	return ret;
 }
 
-static int av7110_fe_read_status(struct dvb_frontend* fe, fe_status_t* status)
+static int av7110_fe_read_status(struct dvb_frontend *fe,
+				 enum fe_status *status)
 {
 	struct av7110* av7110 = fe->dvb->priv;
 
@@ -2043,7 +2044,8 @@ static int av7110_fe_diseqc_send_master_cmd(struct dvb_frontend* fe,
 	return ret;
 }
 
-static int av7110_fe_diseqc_send_burst(struct dvb_frontend* fe, fe_sec_mini_cmd_t minicmd)
+static int av7110_fe_diseqc_send_burst(struct dvb_frontend *fe,
+				       enum fe_sec_mini_cmd minicmd)
 {
 	struct av7110* av7110 = fe->dvb->priv;
 
@@ -2055,7 +2057,8 @@ static int av7110_fe_diseqc_send_burst(struct dvb_frontend* fe, fe_sec_mini_cmd_
 	return ret;
 }
 
-static int av7110_fe_set_tone(struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
+static int av7110_fe_set_tone(struct dvb_frontend *fe,
+			      enum fe_sec_tone_mode tone)
 {
 	struct av7110* av7110 = fe->dvb->priv;
 
@@ -2067,7 +2070,8 @@ static int av7110_fe_set_tone(struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
 	return ret;
 }
 
-static int av7110_fe_set_voltage(struct dvb_frontend* fe, fe_sec_voltage_t voltage)
+static int av7110_fe_set_voltage(struct dvb_frontend *fe,
+				 enum fe_sec_voltage voltage)
 {
 	struct av7110* av7110 = fe->dvb->priv;
 
diff --git a/drivers/media/pci/ttpci/av7110.h b/drivers/media/pci/ttpci/av7110.h
index 835635b0c712..3a55927edb95 100644
--- a/drivers/media/pci/ttpci/av7110.h
+++ b/drivers/media/pci/ttpci/av7110.h
@@ -269,25 +269,30 @@ struct av7110 {
 	unsigned long size_root;
 
 	struct dvb_frontend* fe;
-	fe_status_t fe_status;
+	enum fe_status fe_status;
 
 	struct mutex ioctl_mutex;
 
 	/* crash recovery */
 	void				(*recover)(struct av7110* av7110);
-	fe_sec_voltage_t		saved_voltage;
-	fe_sec_tone_mode_t		saved_tone;
+	enum fe_sec_voltage		saved_voltage;
+	enum fe_sec_tone_mode		saved_tone;
 	struct dvb_diseqc_master_cmd	saved_master_cmd;
-	fe_sec_mini_cmd_t		saved_minicmd;
+	enum fe_sec_mini_cmd		saved_minicmd;
 
 	int (*fe_init)(struct dvb_frontend* fe);
-	int (*fe_read_status)(struct dvb_frontend* fe, fe_status_t* status);
-	int (*fe_diseqc_reset_overload)(struct dvb_frontend* fe);
-	int (*fe_diseqc_send_master_cmd)(struct dvb_frontend* fe, struct dvb_diseqc_master_cmd* cmd);
-	int (*fe_diseqc_send_burst)(struct dvb_frontend* fe, fe_sec_mini_cmd_t minicmd);
-	int (*fe_set_tone)(struct dvb_frontend* fe, fe_sec_tone_mode_t tone);
-	int (*fe_set_voltage)(struct dvb_frontend* fe, fe_sec_voltage_t voltage);
-	int (*fe_dishnetwork_send_legacy_command)(struct dvb_frontend* fe, unsigned long cmd);
+	int (*fe_read_status)(struct dvb_frontend *fe, enum fe_status *status);
+	int (*fe_diseqc_reset_overload)(struct dvb_frontend *fe);
+	int (*fe_diseqc_send_master_cmd)(struct dvb_frontend *fe,
+					 struct dvb_diseqc_master_cmd *cmd);
+	int (*fe_diseqc_send_burst)(struct dvb_frontend *fe,
+				    enum fe_sec_mini_cmd minicmd);
+	int (*fe_set_tone)(struct dvb_frontend *fe,
+			   enum fe_sec_tone_mode tone);
+	int (*fe_set_voltage)(struct dvb_frontend *fe,
+			      enum fe_sec_voltage voltage);
+	int (*fe_dishnetwork_send_legacy_command)(struct dvb_frontend *fe,
+						  unsigned long cmd);
 	int (*fe_set_frontend)(struct dvb_frontend *fe);
 };
 
diff --git a/drivers/media/pci/ttpci/budget-core.c b/drivers/media/pci/ttpci/budget-core.c
index 23e05499b509..e9674b40007c 100644
--- a/drivers/media/pci/ttpci/budget-core.c
+++ b/drivers/media/pci/ttpci/budget-core.c
@@ -161,7 +161,8 @@ static int start_ts_capture(struct budget *budget)
 	return 0;
 }
 
-static int budget_read_fe_status(struct dvb_frontend *fe, fe_status_t *status)
+static int budget_read_fe_status(struct dvb_frontend *fe,
+				 enum fe_status *status)
 {
 	struct budget *budget = (struct budget *) fe->dvb->priv;
 	int synced;
diff --git a/drivers/media/pci/ttpci/budget-patch.c b/drivers/media/pci/ttpci/budget-patch.c
index a4d8867e1d7b..b5b65962ce8f 100644
--- a/drivers/media/pci/ttpci/budget-patch.c
+++ b/drivers/media/pci/ttpci/budget-patch.c
@@ -128,9 +128,9 @@ static int SendDiSEqCMsg (struct budget *budget, int len, u8 *msg, unsigned long
 	return 0;
 }
 
-/* shamelessly copy/pasted from budget.c
-*/
-static int budget_set_tone(struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
+/* shamelessly copy/pasted from budget.c */
+static int budget_set_tone(struct dvb_frontend *fe,
+			   enum fe_sec_tone_mode tone)
 {
 	struct budget* budget = (struct budget*) fe->dvb->priv;
 
@@ -159,7 +159,8 @@ static int budget_diseqc_send_master_cmd(struct dvb_frontend* fe, struct dvb_dis
 	return 0;
 }
 
-static int budget_diseqc_send_burst(struct dvb_frontend* fe, fe_sec_mini_cmd_t minicmd)
+static int budget_diseqc_send_burst(struct dvb_frontend *fe,
+				    enum fe_sec_mini_cmd minicmd)
 {
 	struct budget* budget = (struct budget*) fe->dvb->priv;
 
@@ -223,7 +224,8 @@ static int av7110_send_diseqc_msg(struct budget_patch *budget, int len, u8 *msg,
 	return 0;
 }
 
-static int budget_patch_set_tone(struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
+static int budget_patch_set_tone(struct dvb_frontend *fe,
+				 enum fe_sec_tone_mode tone)
 {
 	struct budget_patch* budget = (struct budget_patch*) fe->dvb->priv;
 
@@ -252,7 +254,8 @@ static int budget_patch_diseqc_send_master_cmd(struct dvb_frontend* fe, struct d
 	return 0;
 }
 
-static int budget_patch_diseqc_send_burst(struct dvb_frontend* fe, fe_sec_mini_cmd_t minicmd)
+static int budget_patch_diseqc_send_burst(struct dvb_frontend *fe,
+					  enum fe_sec_mini_cmd minicmd)
 {
 	struct budget_patch* budget = (struct budget_patch*) fe->dvb->priv;
 
diff --git a/drivers/media/pci/ttpci/budget.c b/drivers/media/pci/ttpci/budget.c
index 6ccc48833fd8..99972beca262 100644
--- a/drivers/media/pci/ttpci/budget.c
+++ b/drivers/media/pci/ttpci/budget.c
@@ -132,7 +132,8 @@ static int SendDiSEqCMsg (struct budget *budget, int len, u8 *msg, unsigned long
  *   Voltage must be set here.
  *   GPIO 1: LNBP EN, GPIO 2: LNBP VSEL
  */
-static int SetVoltage_Activy (struct budget *budget, fe_sec_voltage_t voltage)
+static int SetVoltage_Activy(struct budget *budget,
+			     enum fe_sec_voltage voltage)
 {
 	struct saa7146_dev *dev=budget->dev;
 
@@ -157,14 +158,16 @@ static int SetVoltage_Activy (struct budget *budget, fe_sec_voltage_t voltage)
 	return 0;
 }
 
-static int siemens_budget_set_voltage(struct dvb_frontend* fe, fe_sec_voltage_t voltage)
+static int siemens_budget_set_voltage(struct dvb_frontend *fe,
+				      enum fe_sec_voltage voltage)
 {
 	struct budget* budget = (struct budget*) fe->dvb->priv;
 
 	return SetVoltage_Activy (budget, voltage);
 }
 
-static int budget_set_tone(struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
+static int budget_set_tone(struct dvb_frontend *fe,
+			   enum fe_sec_tone_mode tone)
 {
 	struct budget* budget = (struct budget*) fe->dvb->priv;
 
@@ -193,7 +196,8 @@ static int budget_diseqc_send_master_cmd(struct dvb_frontend* fe, struct dvb_dis
 	return 0;
 }
 
-static int budget_diseqc_send_burst(struct dvb_frontend* fe, fe_sec_mini_cmd_t minicmd)
+static int budget_diseqc_send_burst(struct dvb_frontend *fe,
+				    enum fe_sec_mini_cmd minicmd)
 {
 	struct budget* budget = (struct budget*) fe->dvb->priv;
 
diff --git a/drivers/media/pci/ttpci/budget.h b/drivers/media/pci/ttpci/budget.h
index 3d8a806c20bb..1ccbe1a49a4b 100644
--- a/drivers/media/pci/ttpci/budget.h
+++ b/drivers/media/pci/ttpci/budget.h
@@ -72,7 +72,7 @@ struct budget {
 
 	struct dvb_adapter dvb_adapter;
 	struct dvb_frontend *dvb_frontend;
-	int (*read_fe_status)(struct dvb_frontend *fe, fe_status_t *status);
+	int (*read_fe_status)(struct dvb_frontend *fe, enum fe_status *status);
 	int fe_synced;
 
 	void *priv;
diff --git a/drivers/media/usb/dvb-usb-v2/af9015.c b/drivers/media/usb/dvb-usb-v2/af9015.c
index 16c0b7d4f8e7..95a7388e89d4 100644
--- a/drivers/media/usb/dvb-usb-v2/af9015.c
+++ b/drivers/media/usb/dvb-usb-v2/af9015.c
@@ -641,7 +641,7 @@ static int af9015_af9013_set_frontend(struct dvb_frontend *fe)
 
 /* override demod callbacks for resource locking */
 static int af9015_af9013_read_status(struct dvb_frontend *fe,
-	fe_status_t *status)
+	enum fe_status *status)
 {
 	int ret;
 	struct af9015_state *state = fe_to_priv(fe);
diff --git a/drivers/media/usb/dvb-usb-v2/af9015.h b/drivers/media/usb/dvb-usb-v2/af9015.h
index 3a6f3ad1eadb..1db1bb0d57bc 100644
--- a/drivers/media/usb/dvb-usb-v2/af9015.h
+++ b/drivers/media/usb/dvb-usb-v2/af9015.h
@@ -133,7 +133,7 @@ struct af9015_state {
 
 	/* for demod callback override */
 	int (*set_frontend[2]) (struct dvb_frontend *fe);
-	int (*read_status[2]) (struct dvb_frontend *fe, fe_status_t *status);
+	int (*read_status[2]) (struct dvb_frontend *fe, enum fe_status *status);
 	int (*init[2]) (struct dvb_frontend *fe);
 	int (*sleep[2]) (struct dvb_frontend *fe);
 	int (*tuner_init[2]) (struct dvb_frontend *fe);
diff --git a/drivers/media/usb/dvb-usb-v2/dvbsky.c b/drivers/media/usb/dvb-usb-v2/dvbsky.c
index 57c8c2db9f2d..5cc01bbdede9 100644
--- a/drivers/media/usb/dvb-usb-v2/dvbsky.c
+++ b/drivers/media/usb/dvb-usb-v2/dvbsky.c
@@ -45,9 +45,9 @@ struct dvbsky_state {
 
 	/* fe hook functions*/
 	int (*fe_set_voltage)(struct dvb_frontend *fe,
-		fe_sec_voltage_t voltage);
+		enum fe_sec_voltage voltage);
 	int (*fe_read_status)(struct dvb_frontend *fe,
-		fe_status_t *status);
+		enum fe_status *status);
 };
 
 static int dvbsky_usb_generic_rw(struct dvb_usb_device *d,
@@ -237,7 +237,7 @@ static int dvbsky_get_rc_config(struct dvb_usb_device *d, struct dvb_usb_rc *rc)
 #endif
 
 static int dvbsky_usb_set_voltage(struct dvb_frontend *fe,
-	fe_sec_voltage_t voltage)
+	enum fe_sec_voltage voltage)
 {
 	struct dvb_usb_device *d = fe_to_d(fe);
 	struct dvbsky_state *state = d_to_priv(d);
@@ -277,7 +277,8 @@ static int dvbsky_read_mac_addr(struct dvb_usb_adapter *adap, u8 mac[6])
 	return 0;
 }
 
-static int dvbsky_usb_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int dvbsky_usb_read_status(struct dvb_frontend *fe,
+				  enum fe_status *status)
 {
 	struct dvb_usb_device *d = fe_to_d(fe);
 	struct dvbsky_state *state = d_to_priv(d);
@@ -368,7 +369,7 @@ fail_attach:
 }
 
 static int dvbsky_usb_ci_set_voltage(struct dvb_frontend *fe,
-	fe_sec_voltage_t voltage)
+	enum fe_sec_voltage voltage)
 {
 	struct dvb_usb_device *d = fe_to_d(fe);
 	struct dvbsky_state *state = d_to_priv(d);
diff --git a/drivers/media/usb/dvb-usb-v2/lmedm04.c b/drivers/media/usb/dvb-usb-v2/lmedm04.c
index 5de6f7c04d09..1b6ca42ad116 100644
--- a/drivers/media/usb/dvb-usb-v2/lmedm04.c
+++ b/drivers/media/usb/dvb-usb-v2/lmedm04.c
@@ -126,7 +126,7 @@ DVB_DEFINE_MOD_OPT_ADAPTER_NR(adapter_nr);
 
 struct lme2510_state {
 	unsigned long int_urb_due;
-	fe_status_t lock_status;
+	enum fe_status lock_status;
 	u8 id;
 	u8 tuner_config;
 	u8 signal_level;
@@ -144,12 +144,12 @@ struct lme2510_state {
 	struct urb *lme_urb;
 	void *usb_buffer;
 	/* Frontend original calls */
-	int (*fe_read_status)(struct dvb_frontend *, fe_status_t *);
+	int (*fe_read_status)(struct dvb_frontend *, enum fe_status *);
 	int (*fe_read_signal_strength)(struct dvb_frontend *, u16 *);
 	int (*fe_read_snr)(struct dvb_frontend *, u16 *);
 	int (*fe_read_ber)(struct dvb_frontend *, u32 *);
 	int (*fe_read_ucblocks)(struct dvb_frontend *, u32 *);
-	int (*fe_set_voltage)(struct dvb_frontend *, fe_sec_voltage_t);
+	int (*fe_set_voltage)(struct dvb_frontend *, enum fe_sec_voltage);
 	u8 dvb_usb_lme2510_firmware;
 };
 
@@ -802,7 +802,7 @@ static struct ts2020_config ts2020_config = {
 };
 
 static int dm04_lme2510_set_voltage(struct dvb_frontend *fe,
-					fe_sec_voltage_t voltage)
+				    enum fe_sec_voltage voltage)
 {
 	struct dvb_usb_device *d = fe_to_d(fe);
 	struct lme2510_state *st = fe_to_priv(fe);
@@ -837,7 +837,7 @@ static int dm04_lme2510_set_voltage(struct dvb_frontend *fe,
 	return (ret < 0) ? -ENODEV : 0;
 }
 
-static int dm04_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int dm04_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct dvb_usb_device *d = fe_to_d(fe);
 	struct lme2510_state *st = d->priv;
diff --git a/drivers/media/usb/dvb-usb-v2/mxl111sf-demod.c b/drivers/media/usb/dvb-usb-v2/mxl111sf-demod.c
index ecefa5c477fa..ea3753653368 100644
--- a/drivers/media/usb/dvb-usb-v2/mxl111sf-demod.c
+++ b/drivers/media/usb/dvb-usb-v2/mxl111sf-demod.c
@@ -72,7 +72,7 @@ int mxl111sf_demod_program_regs(struct mxl111sf_demod_state *state,
 
 static
 int mxl1x1sf_demod_get_tps_code_rate(struct mxl111sf_demod_state *state,
-				     fe_code_rate_t *code_rate)
+				     enum fe_code_rate *code_rate)
 {
 	u8 val;
 	int ret = mxl111sf_demod_read_reg(state, V6_CODE_RATE_TPS_REG, &val);
@@ -103,7 +103,7 @@ fail:
 
 static
 int mxl1x1sf_demod_get_tps_modulation(struct mxl111sf_demod_state *state,
-					 fe_modulation_t *modulation)
+				      enum fe_modulation *modulation)
 {
 	u8 val;
 	int ret = mxl111sf_demod_read_reg(state, V6_MODORDER_TPS_REG, &val);
@@ -128,7 +128,7 @@ fail:
 
 static
 int mxl1x1sf_demod_get_tps_guard_fft_mode(struct mxl111sf_demod_state *state,
-					  fe_transmit_mode_t *fft_mode)
+					  enum fe_transmit_mode *fft_mode)
 {
 	u8 val;
 	int ret = mxl111sf_demod_read_reg(state, V6_MODE_TPS_REG, &val);
@@ -153,7 +153,7 @@ fail:
 
 static
 int mxl1x1sf_demod_get_tps_guard_interval(struct mxl111sf_demod_state *state,
-					  fe_guard_interval_t *guard)
+					  enum fe_guard_interval *guard)
 {
 	u8 val;
 	int ret = mxl111sf_demod_read_reg(state, V6_CP_TPS_REG, &val);
@@ -181,7 +181,7 @@ fail:
 
 static
 int mxl1x1sf_demod_get_tps_hierarchy(struct mxl111sf_demod_state *state,
-				     fe_hierarchy_t *hierarchy)
+				     enum fe_hierarchy *hierarchy)
 {
 	u8 val;
 	int ret = mxl111sf_demod_read_reg(state, V6_TPS_HIERACHY_REG, &val);
@@ -441,7 +441,7 @@ fail:
 }
 
 static int mxl111sf_demod_read_status(struct dvb_frontend *fe,
-				      fe_status_t *status)
+				      enum fe_status *status)
 {
 	struct mxl111sf_demod_state *state = fe->demodulator_priv;
 	int ret, locked, cr_lock, sync_lock, fec_lock;
@@ -480,7 +480,7 @@ static int mxl111sf_demod_read_signal_strength(struct dvb_frontend *fe,
 					       u16 *signal_strength)
 {
 	struct mxl111sf_demod_state *state = fe->demodulator_priv;
-	fe_modulation_t modulation;
+	enum fe_modulation modulation;
 	u16 snr;
 
 	mxl111sf_demod_calc_snr(state, &snr);
diff --git a/drivers/media/usb/dvb-usb/af9005-fe.c b/drivers/media/usb/dvb-usb/af9005-fe.c
index 6e84a546dfdc..ac97075d75f7 100644
--- a/drivers/media/usb/dvb-usb/af9005-fe.c
+++ b/drivers/media/usb/dvb-usb/af9005-fe.c
@@ -29,7 +29,7 @@
 
 struct af9005_fe_state {
 	struct dvb_usb_device *d;
-	fe_status_t stat;
+	enum fe_status stat;
 
 	/* retraining parameters */
 	u32 original_fcw;
@@ -437,7 +437,8 @@ static int af9005_fe_refresh_state(struct dvb_frontend *fe)
 	return 0;
 }
 
-static int af9005_fe_read_status(struct dvb_frontend *fe, fe_status_t * stat)
+static int af9005_fe_read_status(struct dvb_frontend *fe,
+				 enum fe_status *stat)
 {
 	struct af9005_fe_state *state = fe->demodulator_priv;
 	u8 temp;
diff --git a/drivers/media/usb/dvb-usb/az6027.c b/drivers/media/usb/dvb-usb/az6027.c
index 0df52ab32a7b..92e47d6c3ee3 100644
--- a/drivers/media/usb/dvb-usb/az6027.c
+++ b/drivers/media/usb/dvb-usb/az6027.c
@@ -778,7 +778,8 @@ static int az6027_read_mac_addr(struct dvb_usb_device *d, u8 mac[6])
 }
 */
 
-static int az6027_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+static int az6027_set_voltage(struct dvb_frontend *fe,
+			      enum fe_sec_voltage voltage)
 {
 
 	u8 buf;
diff --git a/drivers/media/usb/dvb-usb/cinergyT2-fe.c b/drivers/media/usb/dvb-usb/cinergyT2-fe.c
index c890fe46acd3..b3ec743a7a2e 100644
--- a/drivers/media/usb/dvb-usb/cinergyT2-fe.c
+++ b/drivers/media/usb/dvb-usb/cinergyT2-fe.c
@@ -142,7 +142,7 @@ struct cinergyt2_fe_state {
 };
 
 static int cinergyt2_fe_read_status(struct dvb_frontend *fe,
-					fe_status_t *status)
+				    enum fe_status *status)
 {
 	struct cinergyt2_fe_state *state = fe->demodulator_priv;
 	struct dvbt_get_status_msg result;
diff --git a/drivers/media/usb/dvb-usb/dib0700.h b/drivers/media/usb/dvb-usb/dib0700.h
index 927617d95616..8fd8f5b489d2 100644
--- a/drivers/media/usb/dvb-usb/dib0700.h
+++ b/drivers/media/usb/dvb-usb/dib0700.h
@@ -48,7 +48,7 @@ struct dib0700_state {
 	u8 disable_streaming_master_mode;
 	u32 fw_version;
 	u32 nb_packet_buffer_size;
-	int (*read_status)(struct dvb_frontend *, fe_status_t *);
+	int (*read_status)(struct dvb_frontend *, enum fe_status *);
 	int (*sleep)(struct dvb_frontend* fe);
 	u8 buf[255];
 };
diff --git a/drivers/media/usb/dvb-usb/dib0700_devices.c b/drivers/media/usb/dvb-usb/dib0700_devices.c
index 5a3dbb8c7658..7ed49646a699 100644
--- a/drivers/media/usb/dvb-usb/dib0700_devices.c
+++ b/drivers/media/usb/dvb-usb/dib0700_devices.c
@@ -3309,7 +3309,7 @@ static int stk7070pd_frontend_attach1(struct dvb_usb_adapter *adap)
 }
 
 static int novatd_read_status_override(struct dvb_frontend *fe,
-		fe_status_t *stat)
+				       enum fe_status *stat)
 {
 	struct dvb_usb_adapter *adap = fe->dvb->priv;
 	struct dvb_usb_device *dev = adap->dev;
diff --git a/drivers/media/usb/dvb-usb/dtt200u-fe.c b/drivers/media/usb/dvb-usb/dtt200u-fe.c
index 3d81daa49172..8637ad1be6be 100644
--- a/drivers/media/usb/dvb-usb/dtt200u-fe.c
+++ b/drivers/media/usb/dvb-usb/dtt200u-fe.c
@@ -14,13 +14,14 @@
 struct dtt200u_fe_state {
 	struct dvb_usb_device *d;
 
-	fe_status_t stat;
+	enum fe_status stat;
 
 	struct dtv_frontend_properties fep;
 	struct dvb_frontend frontend;
 };
 
-static int dtt200u_fe_read_status(struct dvb_frontend* fe, fe_status_t *stat)
+static int dtt200u_fe_read_status(struct dvb_frontend *fe,
+				  enum fe_status *stat)
 {
 	struct dtt200u_fe_state *state = fe->demodulator_priv;
 	u8 st = GET_TUNE_STATUS, b[3];
@@ -105,7 +106,7 @@ static int dtt200u_fe_set_frontend(struct dvb_frontend *fe)
 	struct dtv_frontend_properties *fep = &fe->dtv_property_cache;
 	struct dtt200u_fe_state *state = fe->demodulator_priv;
 	int i;
-	fe_status_t st;
+	enum fe_status st;
 	u16 freq = fep->frequency / 250000;
 	u8 bwbuf[2] = { SET_BANDWIDTH, 0 },freqbuf[3] = { SET_RF_FREQ, 0, 0 };
 
diff --git a/drivers/media/usb/dvb-usb/dw2102.c b/drivers/media/usb/dvb-usb/dw2102.c
index c2632bc9e530..14ef25dc6cd3 100644
--- a/drivers/media/usb/dvb-usb/dw2102.c
+++ b/drivers/media/usb/dvb-usb/dw2102.c
@@ -121,9 +121,9 @@ struct dw2102_state {
 	struct i2c_client *i2c_client_tuner;
 
 	/* fe hook functions*/
-	int (*old_set_voltage)(struct dvb_frontend *f, fe_sec_voltage_t v);
+	int (*old_set_voltage)(struct dvb_frontend *f, enum fe_sec_voltage v);
 	int (*fe_read_status)(struct dvb_frontend *fe,
-		fe_status_t *status);
+			      enum fe_status *status);
 };
 
 /* debug */
@@ -949,7 +949,8 @@ static int su3000_identify_state(struct usb_device *udev,
 	return 0;
 }
 
-static int dw210x_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+static int dw210x_set_voltage(struct dvb_frontend *fe,
+			      enum fe_sec_voltage voltage)
 {
 	static u8 command_13v[] = {0x00, 0x01};
 	static u8 command_18v[] = {0x01, 0x01};
@@ -973,7 +974,8 @@ static int dw210x_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
 	return 0;
 }
 
-static int s660_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+static int s660_set_voltage(struct dvb_frontend *fe,
+			    enum fe_sec_voltage voltage)
 {
 	struct dvb_usb_adapter *d =
 		(struct dvb_usb_adapter *)(fe->dvb->priv);
@@ -1004,7 +1006,8 @@ static void dw210x_led_ctrl(struct dvb_frontend *fe, int offon)
 	i2c_transfer(&udev_adap->dev->i2c_adap, &msg, 1);
 }
 
-static int tt_s2_4600_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int tt_s2_4600_read_status(struct dvb_frontend *fe,
+				  enum fe_status *status)
 {
 	struct dvb_usb_adapter *d =
 		(struct dvb_usb_adapter *)(fe->dvb->priv);
diff --git a/drivers/media/usb/dvb-usb/friio-fe.c b/drivers/media/usb/dvb-usb/friio-fe.c
index d56f927fc31a..8ec92fbeabad 100644
--- a/drivers/media/usb/dvb-usb/friio-fe.c
+++ b/drivers/media/usb/dvb-usb/friio-fe.c
@@ -210,7 +210,8 @@ error:
 	return -EREMOTEIO;
 }
 
-static int jdvbt90502_read_status(struct dvb_frontend *fe, fe_status_t *state)
+static int jdvbt90502_read_status(struct dvb_frontend *fe,
+				  enum fe_status *state)
 {
 	u8 result;
 	int ret;
diff --git a/drivers/media/usb/dvb-usb/gp8psk-fe.c b/drivers/media/usb/dvb-usb/gp8psk-fe.c
index 67957dd99ede..db6eb79cde07 100644
--- a/drivers/media/usb/dvb-usb/gp8psk-fe.c
+++ b/drivers/media/usb/dvb-usb/gp8psk-fe.c
@@ -51,7 +51,8 @@ static int gp8psk_fe_update_status(struct gp8psk_fe_state *st)
 	return 0;
 }
 
-static int gp8psk_fe_read_status(struct dvb_frontend* fe, fe_status_t *status)
+static int gp8psk_fe_read_status(struct dvb_frontend *fe,
+				 enum fe_status *status)
 {
 	struct gp8psk_fe_state *st = fe->demodulator_priv;
 	gp8psk_fe_update_status(st);
@@ -236,8 +237,8 @@ static int gp8psk_fe_send_diseqc_msg (struct dvb_frontend* fe,
 	return 0;
 }
 
-static int gp8psk_fe_send_diseqc_burst (struct dvb_frontend* fe,
-				    fe_sec_mini_cmd_t burst)
+static int gp8psk_fe_send_diseqc_burst(struct dvb_frontend *fe,
+				       enum fe_sec_mini_cmd burst)
 {
 	struct gp8psk_fe_state *st = fe->demodulator_priv;
 	u8 cmd;
@@ -254,7 +255,8 @@ static int gp8psk_fe_send_diseqc_burst (struct dvb_frontend* fe,
 	return 0;
 }
 
-static int gp8psk_fe_set_tone (struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
+static int gp8psk_fe_set_tone(struct dvb_frontend *fe,
+			      enum fe_sec_tone_mode tone)
 {
 	struct gp8psk_fe_state* state = fe->demodulator_priv;
 
@@ -265,7 +267,8 @@ static int gp8psk_fe_set_tone (struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
 	return 0;
 }
 
-static int gp8psk_fe_set_voltage (struct dvb_frontend* fe, fe_sec_voltage_t voltage)
+static int gp8psk_fe_set_voltage(struct dvb_frontend *fe,
+				 enum fe_sec_voltage voltage)
 {
 	struct gp8psk_fe_state* state = fe->demodulator_priv;
 
diff --git a/drivers/media/usb/dvb-usb/opera1.c b/drivers/media/usb/dvb-usb/opera1.c
index 14a2119912ba..2566d2f1c2ad 100644
--- a/drivers/media/usb/dvb-usb/opera1.c
+++ b/drivers/media/usb/dvb-usb/opera1.c
@@ -167,7 +167,8 @@ static struct i2c_algorithm opera1_i2c_algo = {
 	.functionality = opera1_i2c_func,
 };
 
-static int opera1_set_voltage(struct dvb_frontend *fe, fe_sec_voltage_t voltage)
+static int opera1_set_voltage(struct dvb_frontend *fe,
+			      enum fe_sec_voltage voltage)
 {
 	static u8 command_13v[1]={0x00};
 	static u8 command_18v[1]={0x01};
diff --git a/drivers/media/usb/dvb-usb/technisat-usb2.c b/drivers/media/usb/dvb-usb/technisat-usb2.c
index 5801ae7f672a..03f334d3a8f4 100644
--- a/drivers/media/usb/dvb-usb/technisat-usb2.c
+++ b/drivers/media/usb/dvb-usb/technisat-usb2.c
@@ -453,7 +453,7 @@ static struct stv090x_config technisat_usb2_stv090x_config;
 
 /* frontend attach */
 static int technisat_usb2_set_voltage(struct dvb_frontend *fe,
-		fe_sec_voltage_t voltage)
+				      enum fe_sec_voltage voltage)
 {
 	int i;
 	u8 gpio[3] = { 0 }; /* 0 = 2, 1 = 3, 2 = 4 */
diff --git a/drivers/media/usb/dvb-usb/vp702x-fe.c b/drivers/media/usb/dvb-usb/vp702x-fe.c
index 5eab468dd904..d361a72ca0fa 100644
--- a/drivers/media/usb/dvb-usb/vp702x-fe.c
+++ b/drivers/media/usb/dvb-usb/vp702x-fe.c
@@ -26,8 +26,8 @@ struct vp702x_fe_state {
 
 	struct dvb_frontend_ops ops;
 
-	fe_sec_voltage_t voltage;
-	fe_sec_tone_mode_t tone_mode;
+	enum fe_sec_voltage voltage;
+	enum fe_sec_tone_mode tone_mode;
 
 	u8 lnb_buf[8];
 
@@ -72,7 +72,8 @@ static u8 vp702x_chksum(u8 *buf,int f, int count)
 	return ~s+1;
 }
 
-static int vp702x_fe_read_status(struct dvb_frontend* fe, fe_status_t *status)
+static int vp702x_fe_read_status(struct dvb_frontend *fe,
+				 enum fe_status *status)
 {
 	struct vp702x_fe_state *st = fe->demodulator_priv;
 	vp702x_fe_refresh_state(st);
@@ -243,13 +244,15 @@ static int vp702x_fe_send_diseqc_msg (struct dvb_frontend* fe,
 	return 0;
 }
 
-static int vp702x_fe_send_diseqc_burst (struct dvb_frontend* fe, fe_sec_mini_cmd_t burst)
+static int vp702x_fe_send_diseqc_burst(struct dvb_frontend *fe,
+				       enum fe_sec_mini_cmd burst)
 {
 	deb_fe("%s\n",__func__);
 	return 0;
 }
 
-static int vp702x_fe_set_tone(struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
+static int vp702x_fe_set_tone(struct dvb_frontend *fe,
+			      enum fe_sec_tone_mode tone)
 {
 	struct vp702x_fe_state *st = fe->demodulator_priv;
 	struct vp702x_device_state *dst = st->d->priv;
@@ -282,8 +285,8 @@ static int vp702x_fe_set_tone(struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
 	return 0;
 }
 
-static int vp702x_fe_set_voltage (struct dvb_frontend* fe, fe_sec_voltage_t
-		voltage)
+static int vp702x_fe_set_voltage(struct dvb_frontend *fe,
+				 enum fe_sec_voltage voltage)
 {
 	struct vp702x_fe_state *st = fe->demodulator_priv;
 	struct vp702x_device_state *dst = st->d->priv;
diff --git a/drivers/media/usb/dvb-usb/vp7045-fe.c b/drivers/media/usb/dvb-usb/vp7045-fe.c
index b8825b18c003..e708afc6a57f 100644
--- a/drivers/media/usb/dvb-usb/vp7045-fe.c
+++ b/drivers/media/usb/dvb-usb/vp7045-fe.c
@@ -26,7 +26,8 @@ struct vp7045_fe_state {
 	struct dvb_usb_device *d;
 };
 
-static int vp7045_fe_read_status(struct dvb_frontend* fe, fe_status_t *status)
+static int vp7045_fe_read_status(struct dvb_frontend *fe,
+				 enum fe_status *status)
 {
 	struct vp7045_fe_state *state = fe->demodulator_priv;
 	u8 s0 = vp7045_read_reg(state->d,0x00),
diff --git a/drivers/media/usb/ttusb-budget/dvb-ttusb-budget.c b/drivers/media/usb/ttusb-budget/dvb-ttusb-budget.c
index cef7a00099ea..d52d4a8d39ad 100644
--- a/drivers/media/usb/ttusb-budget/dvb-ttusb-budget.c
+++ b/drivers/media/usb/ttusb-budget/dvb-ttusb-budget.c
@@ -111,8 +111,8 @@ struct ttusb {
 	int last_filter;
 
 	u8 c;			/* transaction counter, wraps around...  */
-	fe_sec_tone_mode_t tone;
-	fe_sec_voltage_t voltage;
+	enum fe_sec_tone_mode tone;
+	enum fe_sec_voltage voltage;
 
 	int mux_state;		// 0..2 - MuxSyncWord, 3 - nMuxPacks,    4 - muxpack
 	u8 mux_npacks;
@@ -511,7 +511,8 @@ static int ttusb_update_lnb(struct ttusb *ttusb)
 	return err;
 }
 
-static int ttusb_set_voltage(struct dvb_frontend* fe, fe_sec_voltage_t voltage)
+static int ttusb_set_voltage(struct dvb_frontend *fe,
+			     enum fe_sec_voltage voltage)
 {
 	struct ttusb* ttusb = (struct ttusb*) fe->dvb->priv;
 
@@ -520,7 +521,7 @@ static int ttusb_set_voltage(struct dvb_frontend* fe, fe_sec_voltage_t voltage)
 }
 
 #ifdef TTUSB_TONE
-static int ttusb_set_tone(struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
+static int ttusb_set_tone(struct dvb_frontend *fe, enum fe_sec_tone_mode tone)
 {
 	struct ttusb* ttusb = (struct ttusb*) fe->dvb->priv;
 
diff --git a/drivers/media/usb/ttusb-dec/ttusbdecfe.c b/drivers/media/usb/ttusb-dec/ttusbdecfe.c
index 9c29552aedec..8781335ab92f 100644
--- a/drivers/media/usb/ttusb-dec/ttusbdecfe.c
+++ b/drivers/media/usb/ttusb-dec/ttusbdecfe.c
@@ -39,7 +39,7 @@ struct ttusbdecfe_state {
 
 
 static int ttusbdecfe_dvbs_read_status(struct dvb_frontend *fe,
-	fe_status_t *status)
+				       enum fe_status *status)
 {
 	*status = FE_HAS_SIGNAL | FE_HAS_VITERBI |
 		FE_HAS_SYNC | FE_HAS_CARRIER | FE_HAS_LOCK;
@@ -48,7 +48,7 @@ static int ttusbdecfe_dvbs_read_status(struct dvb_frontend *fe,
 
 
 static int ttusbdecfe_dvbt_read_status(struct dvb_frontend *fe,
-	fe_status_t *status)
+				       enum fe_status *status)
 {
 	struct ttusbdecfe_state* state = fe->demodulator_priv;
 	u8 b[] = { 0x00, 0x00, 0x00, 0x00,
@@ -169,7 +169,8 @@ static int ttusbdecfe_dvbs_diseqc_send_master_cmd(struct dvb_frontend* fe, struc
 }
 
 
-static int ttusbdecfe_dvbs_set_tone(struct dvb_frontend* fe, fe_sec_tone_mode_t tone)
+static int ttusbdecfe_dvbs_set_tone(struct dvb_frontend *fe,
+				    enum fe_sec_tone_mode tone)
 {
 	struct ttusbdecfe_state* state = (struct ttusbdecfe_state*) fe->demodulator_priv;
 
@@ -179,7 +180,8 @@ static int ttusbdecfe_dvbs_set_tone(struct dvb_frontend* fe, fe_sec_tone_mode_t
 }
 
 
-static int ttusbdecfe_dvbs_set_voltage(struct dvb_frontend* fe, fe_sec_voltage_t voltage)
+static int ttusbdecfe_dvbs_set_voltage(struct dvb_frontend *fe,
+				       enum fe_sec_voltage voltage)
 {
 	struct ttusbdecfe_state* state = (struct ttusbdecfe_state*) fe->demodulator_priv;
 
diff --git a/drivers/staging/media/mn88472/mn88472.c b/drivers/staging/media/mn88472/mn88472.c
index 6863c431c648..a8d45f44765c 100644
--- a/drivers/staging/media/mn88472/mn88472.c
+++ b/drivers/staging/media/mn88472/mn88472.c
@@ -218,7 +218,7 @@ err:
 	return ret;
 }
 
-static int mn88472_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int mn88472_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct i2c_client *client = fe->demodulator_priv;
 	struct mn88472_dev *dev = i2c_get_clientdata(client);
diff --git a/drivers/staging/media/mn88472/mn88472_priv.h b/drivers/staging/media/mn88472/mn88472_priv.h
index 9ba8c8b3823e..1a0de9e46b66 100644
--- a/drivers/staging/media/mn88472/mn88472_priv.h
+++ b/drivers/staging/media/mn88472/mn88472_priv.h
@@ -29,7 +29,7 @@ struct mn88472_dev {
 	struct regmap *regmap[3];
 	struct dvb_frontend fe;
 	u16 i2c_wr_max;
-	fe_delivery_system_t delivery_system;
+	enum fe_delivery_system delivery_system;
 	bool warm; /* FW running */
 	u32 xtal;
 	int ts_mode;
diff --git a/drivers/staging/media/mn88473/mn88473.c b/drivers/staging/media/mn88473/mn88473.c
index 8b6736c70057..f9146a146d07 100644
--- a/drivers/staging/media/mn88473/mn88473.c
+++ b/drivers/staging/media/mn88473/mn88473.c
@@ -167,7 +167,7 @@ err:
 	return ret;
 }
 
-static int mn88473_read_status(struct dvb_frontend *fe, fe_status_t *status)
+static int mn88473_read_status(struct dvb_frontend *fe, enum fe_status *status)
 {
 	struct i2c_client *client = fe->demodulator_priv;
 	struct mn88473_dev *dev = i2c_get_clientdata(client);
diff --git a/drivers/staging/media/mn88473/mn88473_priv.h b/drivers/staging/media/mn88473/mn88473_priv.h
index ef6f01323ac9..54beb4241ccf 100644
--- a/drivers/staging/media/mn88473/mn88473_priv.h
+++ b/drivers/staging/media/mn88473/mn88473_priv.h
@@ -29,7 +29,7 @@ struct mn88473_dev {
 	struct regmap *regmap[3];
 	struct dvb_frontend fe;
 	u16 i2c_wr_max;
-	fe_delivery_system_t delivery_system;
+	enum fe_delivery_system delivery_system;
 	bool warm; /* FW running */
 	u32 xtal;
 };
diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index a36d802fae0c..7f829c92dd64 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -77,7 +77,7 @@ typedef enum fe_caps fe_caps_t;
 
 struct dvb_frontend_info {
 	char       name[128];
-	fe_type_t  type;			/* DEPRECATED. Use DTV_ENUM_DELSYS instead */
+	enum fe_type type;	/* DEPRECATED. Use DTV_ENUM_DELSYS instead */
 	__u32      frequency_min;
 	__u32      frequency_max;
 	__u32      frequency_stepsize;
@@ -86,7 +86,7 @@ struct dvb_frontend_info {
 	__u32      symbol_rate_max;
 	__u32      symbol_rate_tolerance;	/* ppm */
 	__u32      notifier_delay;		/* DEPRECATED */
-	fe_caps_t  caps;
+	enum fe_caps caps;
 };
 
 
-- 
cgit 


From af6392dec9421ad9de973d1f0558813f52537eba Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Sun, 7 Jun 2015 15:00:17 -0300
Subject: [media] frontend: Move legacy API enums/structs to the end

In order to better organize the header file, move the legacy
API (DVBv3) support to the end, just before the ioctl definitions.

This way, we can use just one #if for all of them.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/dvb/frontend.h | 116 +++++++++++++++++++-------------------
 1 file changed, 58 insertions(+), 58 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index 7f829c92dd64..75605a7670a9 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -216,19 +216,6 @@ enum fe_transmit_mode {
 
 typedef enum fe_transmit_mode fe_transmit_mode_t;
 
-#if defined(__DVB_CORE__) || !defined (__KERNEL__)
-enum fe_bandwidth {
-	BANDWIDTH_8_MHZ,
-	BANDWIDTH_7_MHZ,
-	BANDWIDTH_6_MHZ,
-	BANDWIDTH_AUTO,
-	BANDWIDTH_5_MHZ,
-	BANDWIDTH_10_MHZ,
-	BANDWIDTH_1_712_MHZ,
-};
-
-typedef enum fe_bandwidth fe_bandwidth_t;
-#endif
 
 enum fe_guard_interval {
 	GUARD_INTERVAL_1_32,
@@ -263,51 +250,6 @@ enum fe_interleaving {
 	INTERLEAVING_720,
 };
 
-#if defined(__DVB_CORE__) || !defined (__KERNEL__)
-struct dvb_qpsk_parameters {
-	__u32		symbol_rate;  /* symbol rate in Symbols per second */
-	fe_code_rate_t	fec_inner;    /* forward error correction (see above) */
-};
-
-struct dvb_qam_parameters {
-	__u32		symbol_rate; /* symbol rate in Symbols per second */
-	fe_code_rate_t	fec_inner;   /* forward error correction (see above) */
-	fe_modulation_t	modulation;  /* modulation type (see above) */
-};
-
-struct dvb_vsb_parameters {
-	fe_modulation_t	modulation;  /* modulation type (see above) */
-};
-
-struct dvb_ofdm_parameters {
-	fe_bandwidth_t      bandwidth;
-	fe_code_rate_t      code_rate_HP;  /* high priority stream code rate */
-	fe_code_rate_t      code_rate_LP;  /* low priority stream code rate */
-	fe_modulation_t     constellation; /* modulation type (see above) */
-	fe_transmit_mode_t  transmission_mode;
-	fe_guard_interval_t guard_interval;
-	fe_hierarchy_t      hierarchy_information;
-};
-
-
-struct dvb_frontend_parameters {
-	__u32 frequency;     /* (absolute) frequency in Hz for QAM/OFDM/ATSC */
-			     /* intermediate frequency in kHz for QPSK */
-	fe_spectral_inversion_t inversion;
-	union {
-		struct dvb_qpsk_parameters qpsk;
-		struct dvb_qam_parameters  qam;
-		struct dvb_ofdm_parameters ofdm;
-		struct dvb_vsb_parameters vsb;
-	} u;
-};
-
-struct dvb_frontend_event {
-	fe_status_t status;
-	struct dvb_frontend_parameters parameters;
-};
-#endif
-
 /* S2API Commands */
 #define DTV_UNDEFINED		0
 #define DTV_TUNE		1
@@ -582,6 +524,64 @@ struct dtv_properties {
 	struct dtv_property *props;
 };
 
+#if defined(__DVB_CORE__) || !defined (__KERNEL__)
+
+enum fe_bandwidth {
+	BANDWIDTH_8_MHZ,
+	BANDWIDTH_7_MHZ,
+	BANDWIDTH_6_MHZ,
+	BANDWIDTH_AUTO,
+	BANDWIDTH_5_MHZ,
+	BANDWIDTH_10_MHZ,
+	BANDWIDTH_1_712_MHZ,
+};
+
+typedef enum fe_bandwidth fe_bandwidth_t;
+
+struct dvb_qpsk_parameters {
+	__u32		symbol_rate;  /* symbol rate in Symbols per second */
+	fe_code_rate_t	fec_inner;    /* forward error correction (see above) */
+};
+
+struct dvb_qam_parameters {
+	__u32		symbol_rate; /* symbol rate in Symbols per second */
+	fe_code_rate_t	fec_inner;   /* forward error correction (see above) */
+	fe_modulation_t	modulation;  /* modulation type (see above) */
+};
+
+struct dvb_vsb_parameters {
+	fe_modulation_t	modulation;  /* modulation type (see above) */
+};
+
+struct dvb_ofdm_parameters {
+	fe_bandwidth_t      bandwidth;
+	fe_code_rate_t      code_rate_HP;  /* high priority stream code rate */
+	fe_code_rate_t      code_rate_LP;  /* low priority stream code rate */
+	fe_modulation_t     constellation; /* modulation type (see above) */
+	fe_transmit_mode_t  transmission_mode;
+	fe_guard_interval_t guard_interval;
+	fe_hierarchy_t      hierarchy_information;
+};
+
+
+struct dvb_frontend_parameters {
+	__u32 frequency;     /* (absolute) frequency in Hz for QAM/OFDM/ATSC */
+			     /* intermediate frequency in kHz for QPSK */
+	fe_spectral_inversion_t inversion;
+	union {
+		struct dvb_qpsk_parameters qpsk;
+		struct dvb_qam_parameters  qam;
+		struct dvb_ofdm_parameters ofdm;
+		struct dvb_vsb_parameters vsb;
+	} u;
+};
+
+struct dvb_frontend_event {
+	fe_status_t status;
+	struct dvb_frontend_parameters parameters;
+};
+#endif
+
 #define FE_SET_PROPERTY		   _IOW('o', 82, struct dtv_properties)
 #define FE_GET_PROPERTY		   _IOR('o', 83, struct dtv_properties)
 
-- 
cgit 


From b1e28ffaebb96feb2d3a07be9468a52d0782d427 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Sun, 7 Jun 2015 15:01:15 -0300
Subject: [media] frontend: move legacy typedefs to the end

Just userspace need those typedefs. So, put it in the compat part
of the header.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/dvb/frontend.h | 59 +++++++++++----------------------------
 1 file changed, 16 insertions(+), 43 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index 75605a7670a9..46c7fd1143a5 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -35,9 +35,6 @@ enum fe_type {
 	FE_ATSC
 };
 
-typedef enum fe_type fe_type_t;
-
-
 enum fe_caps {
 	FE_IS_STUPID			= 0,
 	FE_CAN_INVERSION_AUTO		= 0x1,
@@ -72,9 +69,6 @@ enum fe_caps {
 	FE_CAN_MUTE_TS			= 0x80000000  /* frontend can stop spurious TS data output */
 };
 
-typedef enum fe_caps fe_caps_t;
-
-
 struct dvb_frontend_info {
 	char       name[128];
 	enum fe_type type;	/* DEPRECATED. Use DTV_ENUM_DELSYS instead */
@@ -99,39 +93,28 @@ struct dvb_diseqc_master_cmd {
 	__u8 msg_len;	/*  valid values are 3...6  */
 };
 
-
 struct dvb_diseqc_slave_reply {
 	__u8 msg [4];	/*  { framing, data [3] } */
 	__u8 msg_len;	/*  valid values are 0...4, 0 means no msg  */
 	int  timeout;	/*  return from ioctl after timeout ms with */
 };			/*  errorcode when no message was received  */
 
-
 enum fe_sec_voltage {
 	SEC_VOLTAGE_13,
 	SEC_VOLTAGE_18,
 	SEC_VOLTAGE_OFF
 };
 
-typedef enum fe_sec_voltage fe_sec_voltage_t;
-
-
 enum fe_sec_tone_mode {
 	SEC_TONE_ON,
 	SEC_TONE_OFF
 };
 
-typedef enum fe_sec_tone_mode fe_sec_tone_mode_t;
-
-
 enum fe_sec_mini_cmd {
 	SEC_MINI_A,
 	SEC_MINI_B
 };
 
-typedef enum fe_sec_mini_cmd fe_sec_mini_cmd_t;
-
-
 /**
  * enum fe_status - enumerates the possible frontend status
  * @FE_HAS_SIGNAL:	found something above the noise level
@@ -143,7 +126,6 @@ typedef enum fe_sec_mini_cmd fe_sec_mini_cmd_t;
  * @FE_REINIT:		frontend was reinitialized, application is recommended
  *			to reset DiSEqC, tone and parameters
  */
-
 enum fe_status {
 	FE_HAS_SIGNAL		= 0x01,
 	FE_HAS_CARRIER		= 0x02,
@@ -154,16 +136,12 @@ enum fe_status {
 	FE_REINIT		= 0x40,
 };
 
-typedef enum fe_status fe_status_t;
-
 enum fe_spectral_inversion {
 	INVERSION_OFF,
 	INVERSION_ON,
 	INVERSION_AUTO
 };
 
-typedef enum fe_spectral_inversion fe_spectral_inversion_t;
-
 enum fe_code_rate {
 	FEC_NONE = 0,
 	FEC_1_2,
@@ -180,9 +158,6 @@ enum fe_code_rate {
 	FEC_2_5,
 };
 
-typedef enum fe_code_rate fe_code_rate_t;
-
-
 enum fe_modulation {
 	QPSK,
 	QAM_16,
@@ -200,8 +175,6 @@ enum fe_modulation {
 	QAM_4_NR,
 };
 
-typedef enum fe_modulation fe_modulation_t;
-
 enum fe_transmit_mode {
 	TRANSMISSION_MODE_2K,
 	TRANSMISSION_MODE_8K,
@@ -214,9 +187,6 @@ enum fe_transmit_mode {
 	TRANSMISSION_MODE_C3780,
 };
 
-typedef enum fe_transmit_mode fe_transmit_mode_t;
-
-
 enum fe_guard_interval {
 	GUARD_INTERVAL_1_32,
 	GUARD_INTERVAL_1_16,
@@ -231,8 +201,6 @@ enum fe_guard_interval {
 	GUARD_INTERVAL_PN945,
 };
 
-typedef enum fe_guard_interval fe_guard_interval_t;
-
 enum fe_hierarchy {
 	HIERARCHY_NONE,
 	HIERARCHY_1,
@@ -241,8 +209,6 @@ enum fe_hierarchy {
 	HIERARCHY_AUTO
 };
 
-typedef enum fe_hierarchy fe_hierarchy_t;
-
 enum fe_interleaving {
 	INTERLEAVING_NONE,
 	INTERLEAVING_AUTO,
@@ -349,8 +315,6 @@ enum fe_pilot {
 	PILOT_AUTO,
 };
 
-typedef enum fe_pilot fe_pilot_t;
-
 enum fe_rolloff {
 	ROLLOFF_35, /* Implied value in DVB-S, default for DVB-S2 */
 	ROLLOFF_20,
@@ -358,8 +322,6 @@ enum fe_rolloff {
 	ROLLOFF_AUTO,
 };
 
-typedef enum fe_rolloff fe_rolloff_t;
-
 enum fe_delivery_system {
 	SYS_UNDEFINED,
 	SYS_DVBC_ANNEX_A,
@@ -382,8 +344,6 @@ enum fe_delivery_system {
 	SYS_DVBC_ANNEX_C,
 };
 
-typedef enum fe_delivery_system fe_delivery_system_t;
-
 /* backward compatibility */
 #define SYS_DVBC_ANNEX_AC	SYS_DVBC_ANNEX_A
 #define SYS_DMBTH SYS_DTMB /* DMB-TH is legacy name, use DTMB instead */
@@ -536,7 +496,23 @@ enum fe_bandwidth {
 	BANDWIDTH_1_712_MHZ,
 };
 
+/* This is needed for legacy userspace support */
+typedef enum fe_sec_voltage fe_sec_voltage_t;
+typedef enum fe_caps fe_caps_t;
+typedef enum fe_type fe_type_t;
+typedef enum fe_sec_tone_mode fe_sec_tone_mode_t;
+typedef enum fe_sec_mini_cmd fe_sec_mini_cmd_t;
+typedef enum fe_status fe_status_t;
+typedef enum fe_spectral_inversion fe_spectral_inversion_t;
+typedef enum fe_code_rate fe_code_rate_t;
+typedef enum fe_modulation fe_modulation_t;
+typedef enum fe_transmit_mode fe_transmit_mode_t;
 typedef enum fe_bandwidth fe_bandwidth_t;
+typedef enum fe_guard_interval fe_guard_interval_t;
+typedef enum fe_hierarchy fe_hierarchy_t;
+typedef enum fe_pilot fe_pilot_t;
+typedef enum fe_rolloff fe_rolloff_t;
+typedef enum fe_delivery_system fe_delivery_system_t;
 
 struct dvb_qpsk_parameters {
 	__u32		symbol_rate;  /* symbol rate in Symbols per second */
@@ -563,7 +539,6 @@ struct dvb_ofdm_parameters {
 	fe_hierarchy_t      hierarchy_information;
 };
 
-
 struct dvb_frontend_parameters {
 	__u32 frequency;     /* (absolute) frequency in Hz for QAM/OFDM/ATSC */
 			     /* intermediate frequency in kHz for QPSK */
@@ -585,7 +560,6 @@ struct dvb_frontend_event {
 #define FE_SET_PROPERTY		   _IOW('o', 82, struct dtv_properties)
 #define FE_GET_PROPERTY		   _IOR('o', 83, struct dtv_properties)
 
-
 /**
  * When set, this flag will disable any zigzagging or other "normal" tuning
  * behaviour. Additionally, there will be no automatic monitoring of the lock
@@ -595,7 +569,6 @@ struct dvb_frontend_event {
  */
 #define FE_TUNE_MODE_ONESHOT 0x01
 
-
 #define FE_GET_INFO		   _IOR('o', 61, struct dvb_frontend_info)
 
 #define FE_DISEQC_RESET_OVERLOAD   _IO('o', 62)
-- 
cgit 


From 76add03db97c11142e30d1e63c6f19e179b8b6c6 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Sun, 7 Jun 2015 16:28:33 -0300
Subject: [media] frontend: Fix a typo at the comments

The description of struct dtv_stats has a spmall typo:
	FE_SCALE_DECIBELS instead of FE_SCALE_DECIBEL

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/dvb/frontend.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index 46c7fd1143a5..0380e62fc8b2 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -435,13 +435,13 @@ enum fecap_scale_params {
  *
  * In other words, for ISDB, those values should be filled like:
  *	u.st.stat.svalue[0] = global statistics;
- *	u.st.stat.scale[0] = FE_SCALE_DECIBELS;
+ *	u.st.stat.scale[0] = FE_SCALE_DECIBEL;
  *	u.st.stat.value[1] = layer A statistics;
  *	u.st.stat.scale[1] = FE_SCALE_NOT_AVAILABLE (if not available);
  *	u.st.stat.svalue[2] = layer B statistics;
- *	u.st.stat.scale[2] = FE_SCALE_DECIBELS;
+ *	u.st.stat.scale[2] = FE_SCALE_DECIBEL;
  *	u.st.stat.svalue[3] = layer C statistics;
- *	u.st.stat.scale[3] = FE_SCALE_DECIBELS;
+ *	u.st.stat.scale[3] = FE_SCALE_DECIBEL;
  *	u.st.len = 4;
  */
 struct dtv_stats {
-- 
cgit 


From 81a7c6d9bb14956d9feb619ae724aaa90ea55288 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Sun, 7 Jun 2015 16:33:30 -0300
Subject: [media] dvb: frontend.h: improve dvb_frontent_parameters comment

The comment for struct dvb_frontend_parameters is weird, as it
mixes delivery system name (ATSC) with modulation names
(QPSK, QAM, OFDM).

Use delivery system names there on the frequency comment, as this
is clearer, specially after 2GEN delivery systems.

While here, add comments at the union, to make live easier for ones
that may try to understand the convention used by the legacy API.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/dvb/frontend.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index 0380e62fc8b2..e764fd8b7e35 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -540,14 +540,14 @@ struct dvb_ofdm_parameters {
 };
 
 struct dvb_frontend_parameters {
-	__u32 frequency;     /* (absolute) frequency in Hz for QAM/OFDM/ATSC */
-			     /* intermediate frequency in kHz for QPSK */
+	__u32 frequency;     /* (absolute) frequency in Hz for DVB-C/DVB-T/ATSC */
+			     /* intermediate frequency in kHz for DVB-S */
 	fe_spectral_inversion_t inversion;
 	union {
-		struct dvb_qpsk_parameters qpsk;
-		struct dvb_qam_parameters  qam;
-		struct dvb_ofdm_parameters ofdm;
-		struct dvb_vsb_parameters vsb;
+		struct dvb_qpsk_parameters qpsk;	/* DVB-S */
+		struct dvb_qam_parameters  qam;		/* DVB-C */
+		struct dvb_ofdm_parameters ofdm;	/* DVB-T */
+		struct dvb_vsb_parameters vsb;		/* ATSC */
 	} u;
 };
 
-- 
cgit 


From 486ef85e93a4829e8f49d3bfe3ee3a29379868d3 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Sun, 7 Jun 2015 16:44:21 -0300
Subject: [media] dvb: frontend.h: add a note for the deprecated enums/structs

Let be clear, at the header, about what got deprecated.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/dvb/frontend.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/dvb/frontend.h b/include/uapi/linux/dvb/frontend.h
index e764fd8b7e35..00a20cd21ee2 100644
--- a/include/uapi/linux/dvb/frontend.h
+++ b/include/uapi/linux/dvb/frontend.h
@@ -486,6 +486,12 @@ struct dtv_properties {
 
 #if defined(__DVB_CORE__) || !defined (__KERNEL__)
 
+/*
+ * DEPRECATED: The DVBv3 ioctls, structs and enums should not be used on
+ * newer programs, as it doesn't support the second generation of digital
+ * TV standards, nor supports newer delivery systems.
+ */
+
 enum fe_bandwidth {
 	BANDWIDTH_8_MHZ,
 	BANDWIDTH_7_MHZ,
-- 
cgit 


From 6c72edab024757267f43de337f093203efea714b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Sun, 7 Jun 2015 19:34:50 -0300
Subject: [media] dvb: dmx.h: don't use anonymous enums

There are several anonymous enums here, used via a typedef.

Well, we don't like typedefs on Kernel, so let's de-anonimize
those enums. Then, latter, we may be able to get rid of the
typedefs, at least from Kernelspace.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/dvb/dmx.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/dvb/dmx.h b/include/uapi/linux/dvb/dmx.h
index b4fb650d9d4f..ece3661a3cac 100644
--- a/include/uapi/linux/dvb/dmx.h
+++ b/include/uapi/linux/dvb/dmx.h
@@ -32,7 +32,7 @@
 
 #define DMX_FILTER_SIZE 16
 
-typedef enum
+typedef enum dmx_output
 {
 	DMX_OUT_DECODER, /* Streaming directly to decoder. */
 	DMX_OUT_TAP,     /* Output going to a memory buffer */
@@ -44,7 +44,7 @@ typedef enum
 } dmx_output_t;
 
 
-typedef enum
+typedef enum dmx_input
 {
 	DMX_IN_FRONTEND, /* Input from a front-end device.  */
 	DMX_IN_DVR       /* Input from the logical DVR device.  */
@@ -122,7 +122,7 @@ typedef struct dmx_caps {
 	int num_decoders;
 } dmx_caps_t;
 
-typedef enum {
+typedef enum dmx_source {
 	DMX_SOURCE_FRONT0 = 0,
 	DMX_SOURCE_FRONT1,
 	DMX_SOURCE_FRONT2,
-- 
cgit 


From 6eab8043f9b91a998dc06ce0b770b9664e51bfc3 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Date: Sun, 7 Jun 2015 19:29:43 -0300
Subject: [media] DocBook: Change format for enum dmx_output documentation

Use a table for the Demux output. No new information added
here. They were all merged inside the table.

Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 Documentation/DocBook/media/Makefile      | 13 +++----
 Documentation/DocBook/media/dvb/demux.xml | 57 ++++++++++++++++++++-----------
 include/uapi/linux/dvb/dmx.h              |  6 ++--
 3 files changed, 47 insertions(+), 29 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/DocBook/media/Makefile b/Documentation/DocBook/media/Makefile
index e7d44a0b00d6..23996f88cd58 100644
--- a/Documentation/DocBook/media/Makefile
+++ b/Documentation/DocBook/media/Makefile
@@ -88,12 +88,8 @@ ENUMS = \
 
 ENUM_DEFS = \
 	$(shell perl -e 'open IN,"cat @ARGV| cpp -fpreprocessed |"; while (<IN>) { if ($$enum) {print "$$1\n" if (/\s*([A-Z]\S+)\b/); } $$enum = 0 if ($$enum && /^\}/); $$enum = 1 if(/^\s*enum\s/); }; close IN;' \
-		$(srctree)/include/uapi/linux/dvb/audio.h \
-		$(srctree)/include/uapi/linux/dvb/ca.h \
 		$(srctree)/include/uapi/linux/dvb/dmx.h \
-		$(srctree)/include/uapi/linux/dvb/frontend.h \
-		$(srctree)/include/uapi/linux/dvb/net.h \
-		$(srctree)/include/uapi/linux/dvb/video.h)
+		$(srctree)/include/uapi/linux/dvb/frontend.h)
 
 STRUCTS = \
 	$(shell perl -ne 'print "$$1 " if /^struct\s+([^\s]+)\s+/' $(srctree)/include/uapi/linux/videodev2.h) \
@@ -251,9 +247,14 @@ $(MEDIA_OBJ_DIR)/dmx.h.xml: $(srctree)/include/uapi/linux/dvb/dmx.h $(MEDIA_OBJ_
 	@(					\
 	echo "<programlisting>") > $@
 	@(					\
+	for ident in $(ENUM_DEFS) ; do		\
+	  entity=`echo $$ident | tr _ -` ;	\
+	  r="$$r s/([^\w\-])$$ident([^\w\-])/\1\&$$entity\;\2/g;";\
+	done;					\
 	expand --tabs=8 < $< |			\
 	  sed $(ESCAPE) $(DVB_DOCUMENTED) |	\
-	  sed 's/i\.e\./&ie;/') >> $@
+	  sed 's/i\.e\./&ie;/' |		\
+	  perl -ne "$$r print $$_;") >> $@
 	@(					\
 	echo "</programlisting>") >> $@
 
diff --git a/Documentation/DocBook/media/dvb/demux.xml b/Documentation/DocBook/media/dvb/demux.xml
index 11a831d58643..34f2fb1cd601 100644
--- a/Documentation/DocBook/media/dvb/demux.xml
+++ b/Documentation/DocBook/media/dvb/demux.xml
@@ -8,26 +8,43 @@ accessed by including <constant>linux/dvb/dmx.h</constant> in your application.
 <title>Demux Data Types</title>
 
 <section id="dmx-output-t">
-<title>dmx_output_t</title>
-<programlisting>
-typedef enum
-{
-	DMX_OUT_DECODER, /&#x22C6; Streaming directly to decoder. &#x22C6;/
-	DMX_OUT_TAP,     /&#x22C6; Output going to a memory buffer &#x22C6;/
-			 /&#x22C6; (to be retrieved via the read command).&#x22C6;/
-	DMX_OUT_TS_TAP,  /&#x22C6; Output multiplexed into a new TS  &#x22C6;/
-			 /&#x22C6; (to be retrieved by reading from the &#x22C6;/
-			 /&#x22C6; logical DVR device).                 &#x22C6;/
-	DMX_OUT_TSDEMUX_TAP /&#x22C6; Like TS_TAP but retrieved from the DMX device &#x22C6;/
-} dmx_output_t;
-</programlisting>
-<para><constant>DMX_OUT_TAP</constant> delivers the stream output to the demux device on which the ioctl is
-called.
-</para>
-<para><constant>DMX_OUT_TS_TAP</constant> routes output to the logical DVR device <constant>/dev/dvb/adapter?/dvr?</constant>,
-which delivers a TS multiplexed from all filters for which <constant>DMX_OUT_TS_TAP</constant> was
-specified.
-</para>
+<title>Output for the demux</title>
+
+<table pgwide="1" frame="none" id="dmx-output">
+    <title>enum dmx_output</title>
+    <tgroup cols="2">
+	&cs-def;
+	<thead>
+	<row>
+	    <entry>ID</entry>
+	    <entry>Description</entry>
+	</row>
+	</thead>
+	<tbody valign="top">
+	<row>
+		<entry align="char" id="DMX-OUT-DECODER">DMX_OUT_DECODER</entry>
+		<entry>Streaming directly to decoder.</entry>
+	</row><row>
+		<entry align="char" id="DMX-OUT-TAP">DMX_OUT_TAP</entry>
+		<entry>Output going to a memory buffer (to be retrieved via the
+		    read command). Delivers the stream output to the demux
+		    device on which the ioctl is called.</entry>
+	</row><row>
+		<entry align="char" id="DMX-OUT-TS-TAP">DMX_OUT_TS_TAP</entry>
+		<entry>Output multiplexed into a new TS (to be retrieved by
+		    reading from the logical DVR device). Routes output to the
+		    logical DVR device <constant>/dev/dvb/adapter?/dvr?</constant>,
+		    which delivers a TS multiplexed from all filters for which
+		    <constant>DMX_OUT_TS_TAP</constant> was specified.</entry>
+	</row><row>
+		<entry align="char" id="DMX-OUT-TSDEMUX-TAP">DMX_OUT_TSDEMUX_TAP</entry>
+		<entry>Like &DMX-OUT-TS-TAP; but retrieved from the DMX
+		    device.</entry>
+	</row>
+        </tbody>
+    </tgroup>
+</table>
+
 </section>
 
 <section id="dmx-input-t">
diff --git a/include/uapi/linux/dvb/dmx.h b/include/uapi/linux/dvb/dmx.h
index ece3661a3cac..427e4899ed69 100644
--- a/include/uapi/linux/dvb/dmx.h
+++ b/include/uapi/linux/dvb/dmx.h
@@ -32,7 +32,7 @@
 
 #define DMX_FILTER_SIZE 16
 
-typedef enum dmx_output
+enum dmx_output
 {
 	DMX_OUT_DECODER, /* Streaming directly to decoder. */
 	DMX_OUT_TAP,     /* Output going to a memory buffer */
@@ -41,8 +41,9 @@ typedef enum dmx_output
 			 /* (to be retrieved by reading from the */
 			 /* logical DVR device).                 */
 	DMX_OUT_TSDEMUX_TAP /* Like TS_TAP but retrieved from the DMX device */
-} dmx_output_t;
+};
 
+typedef enum dmx_output dmx_output_t;
 
 typedef enum dmx_input
 {
@@ -139,7 +140,6 @@ struct dmx_stc {
 	__u64 stc;		/* output: stc in 'base'*90 kHz units */
 };
 
-
 #define DMX_START                _IO('o', 41)
 #define DMX_STOP                 _IO('o', 42)
 #define DMX_SET_FILTER           _IOW('o', 43, struct dmx_sct_filter_params)
-- 
cgit 


From 48a6092fb41fab5b80064c3fac786f8ec86457a3 Mon Sep 17 00:00:00 2001
From: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Date: Wed, 10 Jun 2015 21:19:36 +0200
Subject: serial: stm32-usart: Add STM32 USART Driver

This drivers adds support to the STM32 USART controller, which is a
standard serial driver.

Tested-by: Chanwoo Choi <cw00.choi@samsung.com>
Reviewed-by: Peter Hurley <peter@hurleysoftware.com>
Reviewed-by: Vladimir Zapolskiy <vladimir_zapolskiy@mentor.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Maxime Coquelin <mcoquelin.stm32@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/Kconfig       |  17 +
 drivers/tty/serial/Makefile      |   1 +
 drivers/tty/serial/stm32-usart.c | 739 +++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/serial_core.h |   3 +
 4 files changed, 760 insertions(+)
 create mode 100644 drivers/tty/serial/stm32-usart.c

(limited to 'include/uapi/linux')

diff --git a/drivers/tty/serial/Kconfig b/drivers/tty/serial/Kconfig
index da45877e79fb..a74dabc8f108 100644
--- a/drivers/tty/serial/Kconfig
+++ b/drivers/tty/serial/Kconfig
@@ -1608,6 +1608,23 @@ config SERIAL_SPRD_CONSOLE
 	  with "earlycon" on the kernel command line. The console is
 	  enabled when early_param is processed.
 
+config SERIAL_STM32
+	tristate "STMicroelectronics STM32 serial port support"
+	select SERIAL_CORE
+	depends on ARM || COMPILE_TEST
+	help
+	  This driver is for the on-chip Serial Controller on
+	  STMicroelectronics STM32 MCUs.
+	  USART supports Rx & Tx functionality.
+	  It support all industry standard baud rates.
+
+	  If unsure, say N.
+
+config SERIAL_STM32_CONSOLE
+	bool "Support for console on STM32"
+	depends on SERIAL_STM32=y
+	select SERIAL_CORE_CONSOLE
+
 endmenu
 
 config SERIAL_MCTRL_GPIO
diff --git a/drivers/tty/serial/Makefile b/drivers/tty/serial/Makefile
index d296cee2e132..5ab41119b3dc 100644
--- a/drivers/tty/serial/Makefile
+++ b/drivers/tty/serial/Makefile
@@ -92,6 +92,7 @@ obj-$(CONFIG_SERIAL_FSL_LPUART)	+= fsl_lpuart.o
 obj-$(CONFIG_SERIAL_CONEXANT_DIGICOLOR)	+= digicolor-usart.o
 obj-$(CONFIG_SERIAL_MEN_Z135)	+= men_z135_uart.o
 obj-$(CONFIG_SERIAL_SPRD) += sprd_serial.o
+obj-$(CONFIG_SERIAL_STM32)	+= stm32-usart.o
 
 # GPIOLIB helpers for modem control lines
 obj-$(CONFIG_SERIAL_MCTRL_GPIO)	+= serial_mctrl_gpio.o
diff --git a/drivers/tty/serial/stm32-usart.c b/drivers/tty/serial/stm32-usart.c
new file mode 100644
index 000000000000..4a6eab6da63e
--- /dev/null
+++ b/drivers/tty/serial/stm32-usart.c
@@ -0,0 +1,739 @@
+/*
+ * Copyright (C) Maxime Coquelin 2015
+ * Author:  Maxime Coquelin <mcoquelin.stm32@gmail.com>
+ * License terms:  GNU General Public License (GPL), version 2
+ *
+ * Inspired by st-asc.c from STMicroelectronics (c)
+ */
+
+#if defined(CONFIG_SERIAL_STM32_USART_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
+#define SUPPORT_SYSRQ
+#endif
+
+#include <linux/module.h>
+#include <linux/serial.h>
+#include <linux/console.h>
+#include <linux/sysrq.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+#include <linux/delay.h>
+#include <linux/spinlock.h>
+#include <linux/pm_runtime.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/serial_core.h>
+#include <linux/clk.h>
+
+#define DRIVER_NAME "stm32-usart"
+
+/* Register offsets */
+#define USART_SR		0x00
+#define USART_DR		0x04
+#define USART_BRR		0x08
+#define USART_CR1		0x0c
+#define USART_CR2		0x10
+#define USART_CR3		0x14
+#define USART_GTPR		0x18
+
+/* USART_SR */
+#define USART_SR_PE		BIT(0)
+#define USART_SR_FE		BIT(1)
+#define USART_SR_NF		BIT(2)
+#define USART_SR_ORE		BIT(3)
+#define USART_SR_IDLE		BIT(4)
+#define USART_SR_RXNE		BIT(5)
+#define USART_SR_TC		BIT(6)
+#define USART_SR_TXE		BIT(7)
+#define USART_SR_LBD		BIT(8)
+#define USART_SR_CTS		BIT(9)
+#define USART_SR_ERR_MASK	(USART_SR_LBD | USART_SR_ORE | \
+				 USART_SR_FE | USART_SR_PE)
+/* Dummy bits */
+#define USART_SR_DUMMY_RX	BIT(16)
+
+/* USART_DR */
+#define USART_DR_MASK		GENMASK(8, 0)
+
+/* USART_BRR */
+#define USART_BRR_DIV_F_MASK	GENMASK(3, 0)
+#define USART_BRR_DIV_M_MASK	GENMASK(15, 4)
+#define USART_BRR_DIV_M_SHIFT	4
+
+/* USART_CR1 */
+#define USART_CR1_SBK		BIT(0)
+#define USART_CR1_RWU		BIT(1)
+#define USART_CR1_RE		BIT(2)
+#define USART_CR1_TE		BIT(3)
+#define USART_CR1_IDLEIE	BIT(4)
+#define USART_CR1_RXNEIE	BIT(5)
+#define USART_CR1_TCIE		BIT(6)
+#define USART_CR1_TXEIE		BIT(7)
+#define USART_CR1_PEIE		BIT(8)
+#define USART_CR1_PS		BIT(9)
+#define USART_CR1_PCE		BIT(10)
+#define USART_CR1_WAKE		BIT(11)
+#define USART_CR1_M		BIT(12)
+#define USART_CR1_UE		BIT(13)
+#define USART_CR1_OVER8		BIT(15)
+#define USART_CR1_IE_MASK	GENMASK(8, 4)
+
+/* USART_CR2 */
+#define USART_CR2_ADD_MASK	GENMASK(3, 0)
+#define USART_CR2_LBDL		BIT(5)
+#define USART_CR2_LBDIE		BIT(6)
+#define USART_CR2_LBCL		BIT(8)
+#define USART_CR2_CPHA		BIT(9)
+#define USART_CR2_CPOL		BIT(10)
+#define USART_CR2_CLKEN		BIT(11)
+#define USART_CR2_STOP_2B	BIT(13)
+#define USART_CR2_STOP_MASK	GENMASK(13, 12)
+#define USART_CR2_LINEN		BIT(14)
+
+/* USART_CR3 */
+#define USART_CR3_EIE		BIT(0)
+#define USART_CR3_IREN		BIT(1)
+#define USART_CR3_IRLP		BIT(2)
+#define USART_CR3_HDSEL		BIT(3)
+#define USART_CR3_NACK		BIT(4)
+#define USART_CR3_SCEN		BIT(5)
+#define USART_CR3_DMAR		BIT(6)
+#define USART_CR3_DMAT		BIT(7)
+#define USART_CR3_RTSE		BIT(8)
+#define USART_CR3_CTSE		BIT(9)
+#define USART_CR3_CTSIE		BIT(10)
+#define USART_CR3_ONEBIT	BIT(11)
+
+/* USART_GTPR */
+#define USART_GTPR_PSC_MASK	GENMASK(7, 0)
+#define USART_GTPR_GT_MASK	GENMASK(15, 8)
+
+#define DRIVER_NAME "stm32-usart"
+#define STM32_SERIAL_NAME "ttyS"
+#define STM32_MAX_PORTS 6
+
+struct stm32_port {
+	struct uart_port port;
+	struct clk *clk;
+	bool hw_flow_control;
+};
+
+static struct stm32_port stm32_ports[STM32_MAX_PORTS];
+static struct uart_driver stm32_usart_driver;
+
+static void stm32_stop_tx(struct uart_port *port);
+
+static inline struct stm32_port *to_stm32_port(struct uart_port *port)
+{
+	return container_of(port, struct stm32_port, port);
+}
+
+static void stm32_set_bits(struct uart_port *port, u32 reg, u32 bits)
+{
+	u32 val;
+
+	val = readl_relaxed(port->membase + reg);
+	val |= bits;
+	writel_relaxed(val, port->membase + reg);
+}
+
+static void stm32_clr_bits(struct uart_port *port, u32 reg, u32 bits)
+{
+	u32 val;
+
+	val = readl_relaxed(port->membase + reg);
+	val &= ~bits;
+	writel_relaxed(val, port->membase + reg);
+}
+
+static void stm32_receive_chars(struct uart_port *port)
+{
+	struct tty_port *tport = &port->state->port;
+	unsigned long c;
+	u32 sr;
+	char flag;
+
+	if (port->irq_wake)
+		pm_wakeup_event(tport->tty->dev, 0);
+
+	while ((sr = readl_relaxed(port->membase + USART_SR)) & USART_SR_RXNE) {
+		sr |= USART_SR_DUMMY_RX;
+		c = readl_relaxed(port->membase + USART_DR);
+		flag = TTY_NORMAL;
+		port->icount.rx++;
+
+		if (sr & USART_SR_ERR_MASK) {
+			if (sr & USART_SR_LBD) {
+				port->icount.brk++;
+				if (uart_handle_break(port))
+					continue;
+			} else if (sr & USART_SR_ORE) {
+				port->icount.overrun++;
+			} else if (sr & USART_SR_PE) {
+				port->icount.parity++;
+			} else if (sr & USART_SR_FE) {
+				port->icount.frame++;
+			}
+
+			sr &= port->read_status_mask;
+
+			if (sr & USART_SR_LBD)
+				flag = TTY_BREAK;
+			else if (sr & USART_SR_PE)
+				flag = TTY_PARITY;
+			else if (sr & USART_SR_FE)
+				flag = TTY_FRAME;
+		}
+
+		if (uart_handle_sysrq_char(port, c))
+			continue;
+		uart_insert_char(port, sr, USART_SR_ORE, c, flag);
+	}
+
+	spin_unlock(&port->lock);
+	tty_flip_buffer_push(tport);
+	spin_lock(&port->lock);
+}
+
+static void stm32_transmit_chars(struct uart_port *port)
+{
+	struct circ_buf *xmit = &port->state->xmit;
+
+	if (port->x_char) {
+		writel_relaxed(port->x_char, port->membase + USART_DR);
+		port->x_char = 0;
+		port->icount.tx++;
+		return;
+	}
+
+	if (uart_tx_stopped(port)) {
+		stm32_stop_tx(port);
+		return;
+	}
+
+	if (uart_circ_empty(xmit)) {
+		stm32_stop_tx(port);
+		return;
+	}
+
+	writel_relaxed(xmit->buf[xmit->tail], port->membase + USART_DR);
+	xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
+	port->icount.tx++;
+
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(port);
+
+	if (uart_circ_empty(xmit))
+		stm32_stop_tx(port);
+}
+
+static irqreturn_t stm32_interrupt(int irq, void *ptr)
+{
+	struct uart_port *port = ptr;
+	u32 sr;
+
+	spin_lock(&port->lock);
+
+	sr = readl_relaxed(port->membase + USART_SR);
+
+	if (sr & USART_SR_RXNE)
+		stm32_receive_chars(port);
+
+	if (sr & USART_SR_TXE)
+		stm32_transmit_chars(port);
+
+	spin_unlock(&port->lock);
+
+	return IRQ_HANDLED;
+}
+
+static unsigned int stm32_tx_empty(struct uart_port *port)
+{
+	return readl_relaxed(port->membase + USART_SR) & USART_SR_TXE;
+}
+
+static void stm32_set_mctrl(struct uart_port *port, unsigned int mctrl)
+{
+	if ((mctrl & TIOCM_RTS) && (port->status & UPSTAT_AUTORTS))
+		stm32_set_bits(port, USART_CR3, USART_CR3_RTSE);
+	else
+		stm32_clr_bits(port, USART_CR3, USART_CR3_RTSE);
+}
+
+static unsigned int stm32_get_mctrl(struct uart_port *port)
+{
+	/* This routine is used to get signals of: DCD, DSR, RI, and CTS */
+	return TIOCM_CAR | TIOCM_DSR | TIOCM_CTS;
+}
+
+/* Transmit stop */
+static void stm32_stop_tx(struct uart_port *port)
+{
+	stm32_clr_bits(port, USART_CR1, USART_CR1_TXEIE);
+}
+
+/* There are probably characters waiting to be transmitted. */
+static void stm32_start_tx(struct uart_port *port)
+{
+	struct circ_buf *xmit = &port->state->xmit;
+
+	if (uart_circ_empty(xmit))
+		return;
+
+	stm32_set_bits(port, USART_CR1, USART_CR1_TXEIE | USART_CR1_TE);
+}
+
+/* Throttle the remote when input buffer is about to overflow. */
+static void stm32_throttle(struct uart_port *port)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+	stm32_clr_bits(port, USART_CR1, USART_CR1_RXNEIE);
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+/* Unthrottle the remote, the input buffer can now accept data. */
+static void stm32_unthrottle(struct uart_port *port)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+	stm32_set_bits(port, USART_CR1, USART_CR1_RXNEIE);
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+/* Receive stop */
+static void stm32_stop_rx(struct uart_port *port)
+{
+	stm32_clr_bits(port, USART_CR1, USART_CR1_RXNEIE);
+}
+
+/* Handle breaks - ignored by us */
+static void stm32_break_ctl(struct uart_port *port, int break_state)
+{
+}
+
+static int stm32_startup(struct uart_port *port)
+{
+	const char *name = to_platform_device(port->dev)->name;
+	u32 val;
+	int ret;
+
+	ret = request_irq(port->irq, stm32_interrupt, IRQF_NO_SUSPEND,
+			  name, port);
+	if (ret)
+		return ret;
+
+	val = USART_CR1_RXNEIE | USART_CR1_TE | USART_CR1_RE;
+	stm32_set_bits(port, USART_CR1, val);
+
+	return 0;
+}
+
+static void stm32_shutdown(struct uart_port *port)
+{
+	u32 val;
+
+	val = USART_CR1_TXEIE | USART_CR1_RXNEIE | USART_CR1_TE | USART_CR1_RE;
+	stm32_set_bits(port, USART_CR1, val);
+
+	free_irq(port->irq, port);
+}
+
+static void stm32_set_termios(struct uart_port *port, struct ktermios *termios,
+			    struct ktermios *old)
+{
+	struct stm32_port *stm32_port = to_stm32_port(port);
+	unsigned int baud;
+	u32 usartdiv, mantissa, fraction, oversampling;
+	tcflag_t cflag = termios->c_cflag;
+	u32 cr1, cr2, cr3;
+	unsigned long flags;
+
+	if (!stm32_port->hw_flow_control)
+		cflag &= ~CRTSCTS;
+
+	baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk / 8);
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	/* Stop serial port and reset value */
+	writel_relaxed(0, port->membase + USART_CR1);
+
+	cr1 = USART_CR1_TE | USART_CR1_RE | USART_CR1_UE | USART_CR1_RXNEIE;
+	cr2 = 0;
+	cr3 = 0;
+
+	if (cflag & CSTOPB)
+		cr2 |= USART_CR2_STOP_2B;
+
+	if (cflag & PARENB) {
+		cr1 |= USART_CR1_PCE;
+		if ((cflag & CSIZE) == CS8)
+			cr1 |= USART_CR1_M;
+	}
+
+	if (cflag & PARODD)
+		cr1 |= USART_CR1_PS;
+
+	port->status &= ~(UPSTAT_AUTOCTS | UPSTAT_AUTORTS);
+	if (cflag & CRTSCTS) {
+		port->status |= UPSTAT_AUTOCTS | UPSTAT_AUTORTS;
+		cr3 |= USART_CR3_CTSE;
+	}
+
+	usartdiv = DIV_ROUND_CLOSEST(port->uartclk, baud);
+
+	/*
+	 * The USART supports 16 or 8 times oversampling.
+	 * By default we prefer 16 times oversampling, so that the receiver
+	 * has a better tolerance to clock deviations.
+	 * 8 times oversampling is only used to achieve higher speeds.
+	 */
+	if (usartdiv < 16) {
+		oversampling = 8;
+		stm32_set_bits(port, USART_CR1, USART_CR1_OVER8);
+	} else {
+		oversampling = 16;
+		stm32_clr_bits(port, USART_CR1, USART_CR1_OVER8);
+	}
+
+	mantissa = (usartdiv / oversampling) << USART_BRR_DIV_M_SHIFT;
+	fraction = usartdiv % oversampling;
+	writel_relaxed(mantissa | fraction, port->membase + USART_BRR);
+
+	uart_update_timeout(port, cflag, baud);
+
+	port->read_status_mask = USART_SR_ORE;
+	if (termios->c_iflag & INPCK)
+		port->read_status_mask |= USART_SR_PE | USART_SR_FE;
+	if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK))
+		port->read_status_mask |= USART_SR_LBD;
+
+	/* Characters to ignore */
+	port->ignore_status_mask = 0;
+	if (termios->c_iflag & IGNPAR)
+		port->ignore_status_mask = USART_SR_PE | USART_SR_FE;
+	if (termios->c_iflag & IGNBRK) {
+		port->ignore_status_mask |= USART_SR_LBD;
+		/*
+		 * If we're ignoring parity and break indicators,
+		 * ignore overruns too (for real raw support).
+		 */
+		if (termios->c_iflag & IGNPAR)
+			port->ignore_status_mask |= USART_SR_ORE;
+	}
+
+	/* Ignore all characters if CREAD is not set */
+	if ((termios->c_cflag & CREAD) == 0)
+		port->ignore_status_mask |= USART_SR_DUMMY_RX;
+
+	writel_relaxed(cr3, port->membase + USART_CR3);
+	writel_relaxed(cr2, port->membase + USART_CR2);
+	writel_relaxed(cr1, port->membase + USART_CR1);
+
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static const char *stm32_type(struct uart_port *port)
+{
+	return (port->type == PORT_STM32) ? DRIVER_NAME : NULL;
+}
+
+static void stm32_release_port(struct uart_port *port)
+{
+}
+
+static int stm32_request_port(struct uart_port *port)
+{
+	return 0;
+}
+
+static void stm32_config_port(struct uart_port *port, int flags)
+{
+	if (flags & UART_CONFIG_TYPE)
+		port->type = PORT_STM32;
+}
+
+static int
+stm32_verify_port(struct uart_port *port, struct serial_struct *ser)
+{
+	/* No user changeable parameters */
+	return -EINVAL;
+}
+
+static void stm32_pm(struct uart_port *port, unsigned int state,
+		unsigned int oldstate)
+{
+	struct stm32_port *stm32port = container_of(port,
+			struct stm32_port, port);
+	unsigned long flags = 0;
+
+	switch (state) {
+	case UART_PM_STATE_ON:
+		clk_prepare_enable(stm32port->clk);
+		break;
+	case UART_PM_STATE_OFF:
+		spin_lock_irqsave(&port->lock, flags);
+		stm32_clr_bits(port, USART_CR1, USART_CR1_UE);
+		spin_unlock_irqrestore(&port->lock, flags);
+		clk_disable_unprepare(stm32port->clk);
+		break;
+	}
+}
+
+static const struct uart_ops stm32_uart_ops = {
+	.tx_empty	= stm32_tx_empty,
+	.set_mctrl	= stm32_set_mctrl,
+	.get_mctrl	= stm32_get_mctrl,
+	.stop_tx	= stm32_stop_tx,
+	.start_tx	= stm32_start_tx,
+	.throttle	= stm32_throttle,
+	.unthrottle	= stm32_unthrottle,
+	.stop_rx	= stm32_stop_rx,
+	.break_ctl	= stm32_break_ctl,
+	.startup	= stm32_startup,
+	.shutdown	= stm32_shutdown,
+	.set_termios	= stm32_set_termios,
+	.pm		= stm32_pm,
+	.type		= stm32_type,
+	.release_port	= stm32_release_port,
+	.request_port	= stm32_request_port,
+	.config_port	= stm32_config_port,
+	.verify_port	= stm32_verify_port,
+};
+
+static int stm32_init_port(struct stm32_port *stm32port,
+			  struct platform_device *pdev)
+{
+	struct uart_port *port = &stm32port->port;
+	struct resource *res;
+	int ret;
+
+	port->iotype	= UPIO_MEM;
+	port->flags	= UPF_BOOT_AUTOCONF;
+	port->ops	= &stm32_uart_ops;
+	port->dev	= &pdev->dev;
+	port->irq	= platform_get_irq(pdev, 0);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	port->membase = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(port->membase))
+		return PTR_ERR(port->membase);
+	port->mapbase = res->start;
+
+	spin_lock_init(&port->lock);
+
+	stm32port->clk = devm_clk_get(&pdev->dev, NULL);
+	if (IS_ERR(stm32port->clk))
+		return PTR_ERR(stm32port->clk);
+
+	/* Ensure that clk rate is correct by enabling the clk */
+	ret = clk_prepare_enable(stm32port->clk);
+	if (ret)
+		return ret;
+
+	stm32port->port.uartclk = clk_get_rate(stm32port->clk);
+	if (!stm32port->port.uartclk)
+		ret = -EINVAL;
+
+	clk_disable_unprepare(stm32port->clk);
+
+	return ret;
+}
+
+static struct stm32_port *stm32_of_get_stm32_port(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	int id;
+
+	if (!np)
+		return NULL;
+
+	id = of_alias_get_id(np, "serial");
+	if (id < 0)
+		id = 0;
+
+	if (WARN_ON(id >= STM32_MAX_PORTS))
+		return NULL;
+
+	stm32_ports[id].hw_flow_control = of_property_read_bool(np,
+							"auto-flow-control");
+	stm32_ports[id].port.line = id;
+	return &stm32_ports[id];
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id stm32_match[] = {
+	{ .compatible = "st,stm32-usart", },
+	{ .compatible = "st,stm32-uart", },
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, stm32_match);
+#endif
+
+static int stm32_serial_probe(struct platform_device *pdev)
+{
+	int ret;
+	struct stm32_port *stm32port;
+
+	stm32port = stm32_of_get_stm32_port(pdev);
+	if (!stm32port)
+		return -ENODEV;
+
+	ret = stm32_init_port(stm32port, pdev);
+	if (ret)
+		return ret;
+
+	ret = uart_add_one_port(&stm32_usart_driver, &stm32port->port);
+	if (ret)
+		return ret;
+
+	platform_set_drvdata(pdev, &stm32port->port);
+
+	return 0;
+}
+
+static int stm32_serial_remove(struct platform_device *pdev)
+{
+	struct uart_port *port = platform_get_drvdata(pdev);
+
+	return uart_remove_one_port(&stm32_usart_driver, port);
+}
+
+
+#ifdef CONFIG_SERIAL_STM32_CONSOLE
+static void stm32_console_putchar(struct uart_port *port, int ch)
+{
+	while (!(readl_relaxed(port->membase + USART_SR) & USART_SR_TXE))
+		cpu_relax();
+
+	writel_relaxed(ch, port->membase + USART_DR);
+}
+
+static void stm32_console_write(struct console *co, const char *s, unsigned cnt)
+{
+	struct uart_port *port = &stm32_ports[co->index].port;
+	unsigned long flags;
+	u32 old_cr1, new_cr1;
+	int locked = 1;
+
+	local_irq_save(flags);
+	if (port->sysrq)
+		locked = 0;
+	else if (oops_in_progress)
+		locked = spin_trylock(&port->lock);
+	else
+		spin_lock(&port->lock);
+
+	/* Save and disable interrupts */
+	old_cr1 = readl_relaxed(port->membase + USART_CR1);
+	new_cr1 = old_cr1 & ~USART_CR1_IE_MASK;
+	writel_relaxed(new_cr1, port->membase + USART_CR1);
+
+	uart_console_write(port, s, cnt, stm32_console_putchar);
+
+	/* Restore interrupt state */
+	writel_relaxed(old_cr1, port->membase + USART_CR1);
+
+	if (locked)
+		spin_unlock(&port->lock);
+	local_irq_restore(flags);
+}
+
+static int stm32_console_setup(struct console *co, char *options)
+{
+	struct stm32_port *stm32port;
+	int baud = 9600;
+	int bits = 8;
+	int parity = 'n';
+	int flow = 'n';
+
+	if (co->index >= STM32_MAX_PORTS)
+		return -ENODEV;
+
+	stm32port = &stm32_ports[co->index];
+
+	/*
+	 * This driver does not support early console initialization
+	 * (use ARM early printk support instead), so we only expect
+	 * this to be called during the uart port registration when the
+	 * driver gets probed and the port should be mapped at that point.
+	 */
+	if (stm32port->port.mapbase == 0 || stm32port->port.membase == NULL)
+		return -ENXIO;
+
+	if (options)
+		uart_parse_options(options, &baud, &parity, &bits, &flow);
+
+	return uart_set_options(&stm32port->port, co, baud, parity, bits, flow);
+}
+
+static struct console stm32_console = {
+	.name		= STM32_SERIAL_NAME,
+	.device		= uart_console_device,
+	.write		= stm32_console_write,
+	.setup		= stm32_console_setup,
+	.flags		= CON_PRINTBUFFER,
+	.index		= -1,
+	.data		= &stm32_usart_driver,
+};
+
+#define STM32_SERIAL_CONSOLE (&stm32_console)
+
+#else
+#define STM32_SERIAL_CONSOLE NULL
+#endif /* CONFIG_SERIAL_STM32_CONSOLE */
+
+static struct uart_driver stm32_usart_driver = {
+	.driver_name	= DRIVER_NAME,
+	.dev_name	= STM32_SERIAL_NAME,
+	.major		= 0,
+	.minor		= 0,
+	.nr		= STM32_MAX_PORTS,
+	.cons		= STM32_SERIAL_CONSOLE,
+};
+
+static struct platform_driver stm32_serial_driver = {
+	.probe		= stm32_serial_probe,
+	.remove		= stm32_serial_remove,
+	.driver	= {
+		.name	= DRIVER_NAME,
+		.of_match_table = of_match_ptr(stm32_match),
+	},
+};
+
+static int __init usart_init(void)
+{
+	static char banner[] __initdata = "STM32 USART driver initialized";
+	int ret;
+
+	pr_info("%s\n", banner);
+
+	ret = uart_register_driver(&stm32_usart_driver);
+	if (ret)
+		return ret;
+
+	ret = platform_driver_register(&stm32_serial_driver);
+	if (ret)
+		uart_unregister_driver(&stm32_usart_driver);
+
+	return ret;
+}
+
+static void __exit usart_exit(void)
+{
+	platform_driver_unregister(&stm32_serial_driver);
+	uart_unregister_driver(&stm32_usart_driver);
+}
+
+module_init(usart_init);
+module_exit(usart_exit);
+
+MODULE_ALIAS("platform:" DRIVER_NAME);
+MODULE_DESCRIPTION("STMicroelectronics STM32 serial port driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/uapi/linux/serial_core.h b/include/uapi/linux/serial_core.h
index b2122813f18a..93ba148f923e 100644
--- a/include/uapi/linux/serial_core.h
+++ b/include/uapi/linux/serial_core.h
@@ -258,4 +258,7 @@
 /* Cris v10 / v32 SoC */
 #define PORT_CRIS	112
 
+/* STM32 USART */
+#define PORT_STM32	113
+
 #endif /* _UAPILINUX_SERIAL_CORE_H */
-- 
cgit 


From 2157e7b82f3b81f57bd80cd67cef09ef26e5f74c Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Fri, 5 Jun 2015 16:35:25 +1000
Subject: vfio: powerpc/spapr: Register memory and define IOMMU v2

The existing implementation accounts the whole DMA window in
the locked_vm counter. This is going to be worse with multiple
containers and huge DMA windows. Also, real-time accounting would requite
additional tracking of accounted pages due to the page size difference -
IOMMU uses 4K pages and system uses 4K or 64K pages.

Another issue is that actual pages pinning/unpinning happens on every
DMA map/unmap request. This does not affect the performance much now as
we spend way too much time now on switching context between
guest/userspace/host but this will start to matter when we add in-kernel
DMA map/unmap acceleration.

This introduces a new IOMMU type for SPAPR - VFIO_SPAPR_TCE_v2_IOMMU.
New IOMMU deprecates VFIO_IOMMU_ENABLE/VFIO_IOMMU_DISABLE and introduces
2 new ioctls to register/unregister DMA memory -
VFIO_IOMMU_SPAPR_REGISTER_MEMORY and VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY -
which receive user space address and size of a memory region which
needs to be pinned/unpinned and counted in locked_vm.
New IOMMU splits physical pages pinning and TCE table update
into 2 different operations. It requires:
1) guest pages to be registered first
2) consequent map/unmap requests to work only with pre-registered memory.
For the default single window case this means that the entire guest
(instead of 2GB) needs to be pinned before using VFIO.
When a huge DMA window is added, no additional pinning will be
required, otherwise it would be guest RAM + 2GB.

The new memory registration ioctls are not supported by
VFIO_SPAPR_TCE_IOMMU. Dynamic DMA window and in-kernel acceleration
will require memory to be preregistered in order to work.

The accounting is done per the user process.

This advertises v2 SPAPR TCE IOMMU and restricts what the userspace
can do with v1 or v2 IOMMUs.

In order to support memory pre-registration, we need a way to track
the use of every registered memory region and only allow unregistration
if a region is not in use anymore. So we need a way to tell from what
region the just cleared TCE was from.

This adds a userspace view of the TCE table into iommu_table struct.
It contains userspace address, one per TCE entry. The table is only
allocated when the ownership over an IOMMU group is taken which means
it is only used from outside of the powernv code (such as VFIO).

As v2 IOMMU supports IODA2 and pre-IODA2 IOMMUs (which do not support
DDW API), this creates a default DMA window for IODA2 for consistency.

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
[aw: for the vfio related changes]
Acked-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 Documentation/vfio.txt              |  31 ++-
 arch/powerpc/include/asm/iommu.h    |   6 +
 drivers/vfio/vfio_iommu_spapr_tce.c | 501 ++++++++++++++++++++++++++++++------
 include/uapi/linux/vfio.h           |  27 ++
 4 files changed, 482 insertions(+), 83 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
index 4c746a7e717a..dcc37e109c68 100644
--- a/Documentation/vfio.txt
+++ b/Documentation/vfio.txt
@@ -289,10 +289,12 @@ PPC64 sPAPR implementation note
 
 This implementation has some specifics:
 
-1) Only one IOMMU group per container is supported as an IOMMU group
-represents the minimal entity which isolation can be guaranteed for and
-groups are allocated statically, one per a Partitionable Endpoint (PE)
+1) On older systems (POWER7 with P5IOC2/IODA1) only one IOMMU group per
+container is supported as an IOMMU table is allocated at the boot time,
+one table per a IOMMU group which is a Partitionable Endpoint (PE)
 (PE is often a PCI domain but not always).
+Newer systems (POWER8 with IODA2) have improved hardware design which allows
+to remove this limitation and have multiple IOMMU groups per a VFIO container.
 
 2) The hardware supports so called DMA windows - the PCI address range
 within which DMA transfer is allowed, any attempt to access address space
@@ -439,6 +441,29 @@ The code flow from the example above should be slightly changed:
 
 	....
 
+5) There is v2 of SPAPR TCE IOMMU. It deprecates VFIO_IOMMU_ENABLE/
+VFIO_IOMMU_DISABLE and implements 2 new ioctls:
+VFIO_IOMMU_SPAPR_REGISTER_MEMORY and VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY
+(which are unsupported in v1 IOMMU).
+
+PPC64 paravirtualized guests generate a lot of map/unmap requests,
+and the handling of those includes pinning/unpinning pages and updating
+mm::locked_vm counter to make sure we do not exceed the rlimit.
+The v2 IOMMU splits accounting and pinning into separate operations:
+
+- VFIO_IOMMU_SPAPR_REGISTER_MEMORY/VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY ioctls
+receive a user space address and size of the block to be pinned.
+Bisecting is not supported and VFIO_IOMMU_UNREGISTER_MEMORY is expected to
+be called with the exact address and size used for registering
+the memory block. The userspace is not expected to call these often.
+The ranges are stored in a linked list in a VFIO container.
+
+- VFIO_IOMMU_MAP_DMA/VFIO_IOMMU_UNMAP_DMA ioctls only update the actual
+IOMMU table and do not do pinning; instead these check that the userspace
+address is from pre-registered range.
+
+This separation helps in optimizing DMA for guests.
+
 -------------------------------------------------------------------------------
 
 [1] VFIO was originally an acronym for "Virtual Function I/O" in its
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 9d3749287689..f9957eb4c659 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -112,9 +112,15 @@ struct iommu_table {
 	unsigned long *it_map;       /* A simple allocation bitmap for now */
 	unsigned long  it_page_shift;/* table iommu page size */
 	struct list_head it_group_list;/* List of iommu_table_group_link */
+	unsigned long *it_userspace; /* userspace view of the table */
 	struct iommu_table_ops *it_ops;
 };
 
+#define IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry) \
+		((tbl)->it_userspace ? \
+			&((tbl)->it_userspace[(entry) - (tbl)->it_offset]) : \
+			NULL)
+
 /* Pure 2^n version of get_order */
 static inline __attribute_const__
 int get_iommu_order(unsigned long size, struct iommu_table *tbl)
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index 203caacf2242..91a32239bd0a 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -19,8 +19,10 @@
 #include <linux/uaccess.h>
 #include <linux/err.h>
 #include <linux/vfio.h>
+#include <linux/vmalloc.h>
 #include <asm/iommu.h>
 #include <asm/tce.h>
+#include <asm/mmu_context.h>
 
 #define DRIVER_VERSION  "0.1"
 #define DRIVER_AUTHOR   "aik@ozlabs.ru"
@@ -81,6 +83,11 @@ static void decrement_locked_vm(long npages)
  * into DMA'ble space using the IOMMU
  */
 
+struct tce_iommu_group {
+	struct list_head next;
+	struct iommu_group *grp;
+};
+
 /*
  * The container descriptor supports only a single group per container.
  * Required by the API as the container is not supplied with the IOMMU group
@@ -88,11 +95,84 @@ static void decrement_locked_vm(long npages)
  */
 struct tce_container {
 	struct mutex lock;
-	struct iommu_group *grp;
 	bool enabled;
+	bool v2;
 	unsigned long locked_pages;
+	struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES];
+	struct list_head group_list;
 };
 
+static long tce_iommu_unregister_pages(struct tce_container *container,
+		__u64 vaddr, __u64 size)
+{
+	struct mm_iommu_table_group_mem_t *mem;
+
+	if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK))
+		return -EINVAL;
+
+	mem = mm_iommu_find(vaddr, size >> PAGE_SHIFT);
+	if (!mem)
+		return -ENOENT;
+
+	return mm_iommu_put(mem);
+}
+
+static long tce_iommu_register_pages(struct tce_container *container,
+		__u64 vaddr, __u64 size)
+{
+	long ret = 0;
+	struct mm_iommu_table_group_mem_t *mem = NULL;
+	unsigned long entries = size >> PAGE_SHIFT;
+
+	if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK) ||
+			((vaddr + size) < vaddr))
+		return -EINVAL;
+
+	ret = mm_iommu_get(vaddr, entries, &mem);
+	if (ret)
+		return ret;
+
+	container->enabled = true;
+
+	return 0;
+}
+
+static long tce_iommu_userspace_view_alloc(struct iommu_table *tbl)
+{
+	unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) *
+			tbl->it_size, PAGE_SIZE);
+	unsigned long *uas;
+	long ret;
+
+	BUG_ON(tbl->it_userspace);
+
+	ret = try_increment_locked_vm(cb >> PAGE_SHIFT);
+	if (ret)
+		return ret;
+
+	uas = vzalloc(cb);
+	if (!uas) {
+		decrement_locked_vm(cb >> PAGE_SHIFT);
+		return -ENOMEM;
+	}
+	tbl->it_userspace = uas;
+
+	return 0;
+}
+
+static void tce_iommu_userspace_view_free(struct iommu_table *tbl)
+{
+	unsigned long cb = _ALIGN_UP(sizeof(tbl->it_userspace[0]) *
+			tbl->it_size, PAGE_SIZE);
+
+	if (!tbl->it_userspace)
+		return;
+
+	vfree(tbl->it_userspace);
+	tbl->it_userspace = NULL;
+	decrement_locked_vm(cb >> PAGE_SHIFT);
+}
+
 static bool tce_page_is_contained(struct page *page, unsigned page_shift)
 {
 	/*
@@ -103,18 +183,18 @@ static bool tce_page_is_contained(struct page *page, unsigned page_shift)
 	return (PAGE_SHIFT + compound_order(compound_head(page))) >= page_shift;
 }
 
+static inline bool tce_groups_attached(struct tce_container *container)
+{
+	return !list_empty(&container->group_list);
+}
+
 static long tce_iommu_find_table(struct tce_container *container,
 		phys_addr_t ioba, struct iommu_table **ptbl)
 {
 	long i;
-	struct iommu_table_group *table_group;
-
-	table_group = iommu_group_get_iommudata(container->grp);
-	if (!table_group)
-		return -1;
 
 	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
-		struct iommu_table *tbl = table_group->tables[i];
+		struct iommu_table *tbl = container->tables[i];
 
 		if (tbl) {
 			unsigned long entry = ioba >> tbl->it_page_shift;
@@ -136,9 +216,7 @@ static int tce_iommu_enable(struct tce_container *container)
 	int ret = 0;
 	unsigned long locked;
 	struct iommu_table_group *table_group;
-
-	if (!container->grp)
-		return -ENXIO;
+	struct tce_iommu_group *tcegrp;
 
 	if (!current->mm)
 		return -ESRCH; /* process exited */
@@ -175,7 +253,12 @@ static int tce_iommu_enable(struct tce_container *container)
 	 * as there is no way to know how much we should increment
 	 * the locked_vm counter.
 	 */
-	table_group = iommu_group_get_iommudata(container->grp);
+	if (!tce_groups_attached(container))
+		return -ENODEV;
+
+	tcegrp = list_first_entry(&container->group_list,
+			struct tce_iommu_group, next);
+	table_group = iommu_group_get_iommudata(tcegrp->grp);
 	if (!table_group)
 		return -ENODEV;
 
@@ -211,7 +294,7 @@ static void *tce_iommu_open(unsigned long arg)
 {
 	struct tce_container *container;
 
-	if (arg != VFIO_SPAPR_TCE_IOMMU) {
+	if ((arg != VFIO_SPAPR_TCE_IOMMU) && (arg != VFIO_SPAPR_TCE_v2_IOMMU)) {
 		pr_err("tce_vfio: Wrong IOMMU type\n");
 		return ERR_PTR(-EINVAL);
 	}
@@ -221,18 +304,45 @@ static void *tce_iommu_open(unsigned long arg)
 		return ERR_PTR(-ENOMEM);
 
 	mutex_init(&container->lock);
+	INIT_LIST_HEAD_RCU(&container->group_list);
+
+	container->v2 = arg == VFIO_SPAPR_TCE_v2_IOMMU;
 
 	return container;
 }
 
+static int tce_iommu_clear(struct tce_container *container,
+		struct iommu_table *tbl,
+		unsigned long entry, unsigned long pages);
+static void tce_iommu_free_table(struct iommu_table *tbl);
+
 static void tce_iommu_release(void *iommu_data)
 {
 	struct tce_container *container = iommu_data;
+	struct iommu_table_group *table_group;
+	struct tce_iommu_group *tcegrp;
+	long i;
 
-	WARN_ON(container->grp);
+	while (tce_groups_attached(container)) {
+		tcegrp = list_first_entry(&container->group_list,
+				struct tce_iommu_group, next);
+		table_group = iommu_group_get_iommudata(tcegrp->grp);
+		tce_iommu_detach_group(iommu_data, tcegrp->grp);
+	}
 
-	if (container->grp)
-		tce_iommu_detach_group(iommu_data, container->grp);
+	/*
+	 * If VFIO created a table, it was not disposed
+	 * by tce_iommu_detach_group() so do it now.
+	 */
+	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+		struct iommu_table *tbl = container->tables[i];
+
+		if (!tbl)
+			continue;
+
+		tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
+		tce_iommu_free_table(tbl);
+	}
 
 	tce_iommu_disable(container);
 	mutex_destroy(&container->lock);
@@ -249,6 +359,47 @@ static void tce_iommu_unuse_page(struct tce_container *container,
 	put_page(page);
 }
 
+static int tce_iommu_prereg_ua_to_hpa(unsigned long tce, unsigned long size,
+		unsigned long *phpa, struct mm_iommu_table_group_mem_t **pmem)
+{
+	long ret = 0;
+	struct mm_iommu_table_group_mem_t *mem;
+
+	mem = mm_iommu_lookup(tce, size);
+	if (!mem)
+		return -EINVAL;
+
+	ret = mm_iommu_ua_to_hpa(mem, tce, phpa);
+	if (ret)
+		return -EINVAL;
+
+	*pmem = mem;
+
+	return 0;
+}
+
+static void tce_iommu_unuse_page_v2(struct iommu_table *tbl,
+		unsigned long entry)
+{
+	struct mm_iommu_table_group_mem_t *mem = NULL;
+	int ret;
+	unsigned long hpa = 0;
+	unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+
+	if (!pua || !current || !current->mm)
+		return;
+
+	ret = tce_iommu_prereg_ua_to_hpa(*pua, IOMMU_PAGE_SIZE(tbl),
+			&hpa, &mem);
+	if (ret)
+		pr_debug("%s: tce %lx at #%lx was not cached, ret=%d\n",
+				__func__, *pua, entry, ret);
+	if (mem)
+		mm_iommu_mapped_dec(mem);
+
+	*pua = 0;
+}
+
 static int tce_iommu_clear(struct tce_container *container,
 		struct iommu_table *tbl,
 		unsigned long entry, unsigned long pages)
@@ -267,6 +418,11 @@ static int tce_iommu_clear(struct tce_container *container,
 		if (direction == DMA_NONE)
 			continue;
 
+		if (container->v2) {
+			tce_iommu_unuse_page_v2(tbl, entry);
+			continue;
+		}
+
 		tce_iommu_unuse_page(container, oldhpa);
 	}
 
@@ -333,6 +489,64 @@ static long tce_iommu_build(struct tce_container *container,
 	return ret;
 }
 
+static long tce_iommu_build_v2(struct tce_container *container,
+		struct iommu_table *tbl,
+		unsigned long entry, unsigned long tce, unsigned long pages,
+		enum dma_data_direction direction)
+{
+	long i, ret = 0;
+	struct page *page;
+	unsigned long hpa;
+	enum dma_data_direction dirtmp;
+
+	for (i = 0; i < pages; ++i) {
+		struct mm_iommu_table_group_mem_t *mem = NULL;
+		unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl,
+				entry + i);
+
+		ret = tce_iommu_prereg_ua_to_hpa(tce, IOMMU_PAGE_SIZE(tbl),
+				&hpa, &mem);
+		if (ret)
+			break;
+
+		page = pfn_to_page(hpa >> PAGE_SHIFT);
+		if (!tce_page_is_contained(page, tbl->it_page_shift)) {
+			ret = -EPERM;
+			break;
+		}
+
+		/* Preserve offset within IOMMU page */
+		hpa |= tce & IOMMU_PAGE_MASK(tbl) & ~PAGE_MASK;
+		dirtmp = direction;
+
+		/* The registered region is being unregistered */
+		if (mm_iommu_mapped_inc(mem))
+			break;
+
+		ret = iommu_tce_xchg(tbl, entry + i, &hpa, &dirtmp);
+		if (ret) {
+			/* dirtmp cannot be DMA_NONE here */
+			tce_iommu_unuse_page_v2(tbl, entry + i);
+			pr_err("iommu_tce: %s failed ioba=%lx, tce=%lx, ret=%ld\n",
+					__func__, entry << tbl->it_page_shift,
+					tce, ret);
+			break;
+		}
+
+		if (dirtmp != DMA_NONE)
+			tce_iommu_unuse_page_v2(tbl, entry + i);
+
+		*pua = tce;
+
+		tce += IOMMU_PAGE_SIZE(tbl);
+	}
+
+	if (ret)
+		tce_iommu_clear(container, tbl, entry, i);
+
+	return ret;
+}
+
 static long tce_iommu_create_table(struct tce_container *container,
 			struct iommu_table_group *table_group,
 			int num,
@@ -358,6 +572,12 @@ static long tce_iommu_create_table(struct tce_container *container,
 	WARN_ON(!ret && !(*ptbl)->it_ops->free);
 	WARN_ON(!ret && ((*ptbl)->it_allocated_size != table_size));
 
+	if (!ret && container->v2) {
+		ret = tce_iommu_userspace_view_alloc(*ptbl);
+		if (ret)
+			(*ptbl)->it_ops->free(*ptbl);
+	}
+
 	if (ret)
 		decrement_locked_vm(table_size >> PAGE_SHIFT);
 
@@ -368,6 +588,7 @@ static void tce_iommu_free_table(struct iommu_table *tbl)
 {
 	unsigned long pages = tbl->it_allocated_size >> PAGE_SHIFT;
 
+	tce_iommu_userspace_view_free(tbl);
 	tbl->it_ops->free(tbl);
 	decrement_locked_vm(pages);
 }
@@ -383,6 +604,7 @@ static long tce_iommu_ioctl(void *iommu_data,
 	case VFIO_CHECK_EXTENSION:
 		switch (arg) {
 		case VFIO_SPAPR_TCE_IOMMU:
+		case VFIO_SPAPR_TCE_v2_IOMMU:
 			ret = 1;
 			break;
 		default:
@@ -394,12 +616,15 @@ static long tce_iommu_ioctl(void *iommu_data,
 
 	case VFIO_IOMMU_SPAPR_TCE_GET_INFO: {
 		struct vfio_iommu_spapr_tce_info info;
+		struct tce_iommu_group *tcegrp;
 		struct iommu_table_group *table_group;
 
-		if (WARN_ON(!container->grp))
+		if (!tce_groups_attached(container))
 			return -ENXIO;
 
-		table_group = iommu_group_get_iommudata(container->grp);
+		tcegrp = list_first_entry(&container->group_list,
+				struct tce_iommu_group, next);
+		table_group = iommu_group_get_iommudata(tcegrp->grp);
 
 		if (!table_group)
 			return -ENXIO;
@@ -468,11 +693,18 @@ static long tce_iommu_ioctl(void *iommu_data,
 		if (ret)
 			return ret;
 
-		ret = tce_iommu_build(container, tbl,
-				param.iova >> tbl->it_page_shift,
-				param.vaddr,
-				param.size >> tbl->it_page_shift,
-				direction);
+		if (container->v2)
+			ret = tce_iommu_build_v2(container, tbl,
+					param.iova >> tbl->it_page_shift,
+					param.vaddr,
+					param.size >> tbl->it_page_shift,
+					direction);
+		else
+			ret = tce_iommu_build(container, tbl,
+					param.iova >> tbl->it_page_shift,
+					param.vaddr,
+					param.size >> tbl->it_page_shift,
+					direction);
 
 		iommu_flush_tce(tbl);
 
@@ -518,7 +750,62 @@ static long tce_iommu_ioctl(void *iommu_data,
 
 		return ret;
 	}
+	case VFIO_IOMMU_SPAPR_REGISTER_MEMORY: {
+		struct vfio_iommu_spapr_register_memory param;
+
+		if (!container->v2)
+			break;
+
+		minsz = offsetofend(struct vfio_iommu_spapr_register_memory,
+				size);
+
+		if (copy_from_user(&param, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (param.argsz < minsz)
+			return -EINVAL;
+
+		/* No flag is supported now */
+		if (param.flags)
+			return -EINVAL;
+
+		mutex_lock(&container->lock);
+		ret = tce_iommu_register_pages(container, param.vaddr,
+				param.size);
+		mutex_unlock(&container->lock);
+
+		return ret;
+	}
+	case VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY: {
+		struct vfio_iommu_spapr_register_memory param;
+
+		if (!container->v2)
+			break;
+
+		minsz = offsetofend(struct vfio_iommu_spapr_register_memory,
+				size);
+
+		if (copy_from_user(&param, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (param.argsz < minsz)
+			return -EINVAL;
+
+		/* No flag is supported now */
+		if (param.flags)
+			return -EINVAL;
+
+		mutex_lock(&container->lock);
+		ret = tce_iommu_unregister_pages(container, param.vaddr,
+				param.size);
+		mutex_unlock(&container->lock);
+
+		return ret;
+	}
 	case VFIO_IOMMU_ENABLE:
+		if (container->v2)
+			break;
+
 		mutex_lock(&container->lock);
 		ret = tce_iommu_enable(container);
 		mutex_unlock(&container->lock);
@@ -526,16 +813,27 @@ static long tce_iommu_ioctl(void *iommu_data,
 
 
 	case VFIO_IOMMU_DISABLE:
+		if (container->v2)
+			break;
+
 		mutex_lock(&container->lock);
 		tce_iommu_disable(container);
 		mutex_unlock(&container->lock);
 		return 0;
-	case VFIO_EEH_PE_OP:
-		if (!container->grp)
-			return -ENODEV;
 
-		return vfio_spapr_iommu_eeh_ioctl(container->grp,
-						  cmd, arg);
+	case VFIO_EEH_PE_OP: {
+		struct tce_iommu_group *tcegrp;
+
+		ret = 0;
+		list_for_each_entry(tcegrp, &container->group_list, next) {
+			ret = vfio_spapr_iommu_eeh_ioctl(tcegrp->grp,
+					cmd, arg);
+			if (ret)
+				return ret;
+		}
+		return ret;
+	}
+
 	}
 
 	return -ENOTTY;
@@ -547,14 +845,17 @@ static void tce_iommu_release_ownership(struct tce_container *container,
 	int i;
 
 	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
-		struct iommu_table *tbl = table_group->tables[i];
+		struct iommu_table *tbl = container->tables[i];
 
 		if (!tbl)
 			continue;
 
 		tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
+		tce_iommu_userspace_view_free(tbl);
 		if (tbl->it_map)
 			iommu_release_ownership(tbl);
+
+		container->tables[i] = NULL;
 	}
 }
 
@@ -569,7 +870,10 @@ static int tce_iommu_take_ownership(struct tce_container *container,
 		if (!tbl || !tbl->it_map)
 			continue;
 
-		rc = iommu_take_ownership(tbl);
+		rc = tce_iommu_userspace_view_alloc(tbl);
+		if (!rc)
+			rc = iommu_take_ownership(tbl);
+
 		if (rc) {
 			for (j = 0; j < i; ++j)
 				iommu_release_ownership(
@@ -579,6 +883,9 @@ static int tce_iommu_take_ownership(struct tce_container *container,
 		}
 	}
 
+	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
+		container->tables[i] = table_group->tables[i];
+
 	return 0;
 }
 
@@ -592,18 +899,8 @@ static void tce_iommu_release_ownership_ddw(struct tce_container *container,
 		return;
 	}
 
-	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
-		/* Store table pointer as unset_window resets it */
-		struct iommu_table *tbl = table_group->tables[i];
-
-		if (!tbl)
-			continue;
-
+	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
 		table_group->ops->unset_window(table_group, i);
-		tce_iommu_clear(container, tbl,
-				tbl->it_offset, tbl->it_size);
-		tce_iommu_free_table(tbl);
-	}
 
 	table_group->ops->release_ownership(table_group);
 }
@@ -611,7 +908,7 @@ static void tce_iommu_release_ownership_ddw(struct tce_container *container,
 static long tce_iommu_take_ownership_ddw(struct tce_container *container,
 		struct iommu_table_group *table_group)
 {
-	long ret;
+	long i, ret = 0;
 	struct iommu_table *tbl = NULL;
 
 	if (!table_group->ops->create_table || !table_group->ops->set_window ||
@@ -622,23 +919,45 @@ static long tce_iommu_take_ownership_ddw(struct tce_container *container,
 
 	table_group->ops->take_ownership(table_group);
 
-	ret = tce_iommu_create_table(container,
-			table_group,
-			0, /* window number */
-			IOMMU_PAGE_SHIFT_4K,
-			table_group->tce32_size,
-			1, /* default levels */
-			&tbl);
-	if (!ret) {
-		ret = table_group->ops->set_window(table_group, 0, tbl);
+	/*
+	 * If it the first group attached, check if there is
+	 * a default DMA window and create one if none as
+	 * the userspace expects it to exist.
+	 */
+	if (!tce_groups_attached(container) && !container->tables[0]) {
+		ret = tce_iommu_create_table(container,
+				table_group,
+				0, /* window number */
+				IOMMU_PAGE_SHIFT_4K,
+				table_group->tce32_size,
+				1, /* default levels */
+				&tbl);
 		if (ret)
-			tce_iommu_free_table(tbl);
+			goto release_exit;
 		else
-			table_group->tables[0] = tbl;
+			container->tables[0] = tbl;
 	}
 
-	if (ret)
-		table_group->ops->release_ownership(table_group);
+	/* Set all windows to the new group */
+	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+		tbl = container->tables[i];
+
+		if (!tbl)
+			continue;
+
+		/* Set the default window to a new group */
+		ret = table_group->ops->set_window(table_group, i, tbl);
+		if (ret)
+			goto release_exit;
+	}
+
+	return 0;
+
+release_exit:
+	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i)
+		table_group->ops->unset_window(table_group, i);
+
+	table_group->ops->release_ownership(table_group);
 
 	return ret;
 }
@@ -649,29 +968,44 @@ static int tce_iommu_attach_group(void *iommu_data,
 	int ret;
 	struct tce_container *container = iommu_data;
 	struct iommu_table_group *table_group;
+	struct tce_iommu_group *tcegrp = NULL;
 
 	mutex_lock(&container->lock);
 
 	/* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n",
 			iommu_group_id(iommu_group), iommu_group); */
-	if (container->grp) {
-		pr_warn("tce_vfio: Only one group per IOMMU container is allowed, existing id=%d, attaching id=%d\n",
-				iommu_group_id(container->grp),
-				iommu_group_id(iommu_group));
+	table_group = iommu_group_get_iommudata(iommu_group);
+
+	if (tce_groups_attached(container) && (!table_group->ops ||
+			!table_group->ops->take_ownership ||
+			!table_group->ops->release_ownership)) {
 		ret = -EBUSY;
 		goto unlock_exit;
 	}
 
-	if (container->enabled) {
-		pr_err("tce_vfio: attaching group #%u to enabled container\n",
-				iommu_group_id(iommu_group));
-		ret = -EBUSY;
-		goto unlock_exit;
+	/* Check if new group has the same iommu_ops (i.e. compatible) */
+	list_for_each_entry(tcegrp, &container->group_list, next) {
+		struct iommu_table_group *table_group_tmp;
+
+		if (tcegrp->grp == iommu_group) {
+			pr_warn("tce_vfio: Group %d is already attached\n",
+					iommu_group_id(iommu_group));
+			ret = -EBUSY;
+			goto unlock_exit;
+		}
+		table_group_tmp = iommu_group_get_iommudata(tcegrp->grp);
+		if (table_group_tmp->ops != table_group->ops) {
+			pr_warn("tce_vfio: Group %d is incompatible with group %d\n",
+					iommu_group_id(iommu_group),
+					iommu_group_id(tcegrp->grp));
+			ret = -EPERM;
+			goto unlock_exit;
+		}
 	}
 
-	table_group = iommu_group_get_iommudata(iommu_group);
-	if (!table_group) {
-		ret = -ENXIO;
+	tcegrp = kzalloc(sizeof(*tcegrp), GFP_KERNEL);
+	if (!tcegrp) {
+		ret = -ENOMEM;
 		goto unlock_exit;
 	}
 
@@ -681,10 +1015,15 @@ static int tce_iommu_attach_group(void *iommu_data,
 	else
 		ret = tce_iommu_take_ownership_ddw(container, table_group);
 
-	if (!ret)
-		container->grp = iommu_group;
+	if (!ret) {
+		tcegrp->grp = iommu_group;
+		list_add(&tcegrp->next, &container->group_list);
+	}
 
 unlock_exit:
+	if (ret && tcegrp)
+		kfree(tcegrp);
+
 	mutex_unlock(&container->lock);
 
 	return ret;
@@ -695,24 +1034,26 @@ static void tce_iommu_detach_group(void *iommu_data,
 {
 	struct tce_container *container = iommu_data;
 	struct iommu_table_group *table_group;
+	bool found = false;
+	struct tce_iommu_group *tcegrp;
 
 	mutex_lock(&container->lock);
-	if (iommu_group != container->grp) {
-		pr_warn("tce_vfio: detaching group #%u, expected group is #%u\n",
-				iommu_group_id(iommu_group),
-				iommu_group_id(container->grp));
-		goto unlock_exit;
+
+	list_for_each_entry(tcegrp, &container->group_list, next) {
+		if (tcegrp->grp == iommu_group) {
+			found = true;
+			break;
+		}
 	}
 
-	if (container->enabled) {
-		pr_warn("tce_vfio: detaching group #%u from enabled container, forcing disable\n",
-				iommu_group_id(container->grp));
-		tce_iommu_disable(container);
+	if (!found) {
+		pr_warn("tce_vfio: detaching unattached group #%u\n",
+				iommu_group_id(iommu_group));
+		goto unlock_exit;
 	}
 
-	/* pr_debug("tce_vfio: detaching group #%u from iommu %p\n",
-	   iommu_group_id(iommu_group), iommu_group); */
-	container->grp = NULL;
+	list_del(&tcegrp->next);
+	kfree(tcegrp);
 
 	table_group = iommu_group_get_iommudata(iommu_group);
 	BUG_ON(!table_group);
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index e4fa1995f613..fa84391a0d00 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -36,6 +36,8 @@
 /* Two-stage IOMMU */
 #define VFIO_TYPE1_NESTING_IOMMU	6	/* Implies v2 */
 
+#define VFIO_SPAPR_TCE_v2_IOMMU		7
+
 /*
  * The IOCTL interface is designed for extensibility by embedding the
  * structure length (argsz) and flags into structures passed between
@@ -507,6 +509,31 @@ struct vfio_eeh_pe_op {
 
 #define VFIO_EEH_PE_OP			_IO(VFIO_TYPE, VFIO_BASE + 21)
 
+/**
+ * VFIO_IOMMU_SPAPR_REGISTER_MEMORY - _IOW(VFIO_TYPE, VFIO_BASE + 17, struct vfio_iommu_spapr_register_memory)
+ *
+ * Registers user space memory where DMA is allowed. It pins
+ * user pages and does the locked memory accounting so
+ * subsequent VFIO_IOMMU_MAP_DMA/VFIO_IOMMU_UNMAP_DMA calls
+ * get faster.
+ */
+struct vfio_iommu_spapr_register_memory {
+	__u32	argsz;
+	__u32	flags;
+	__u64	vaddr;				/* Process virtual address */
+	__u64	size;				/* Size of mapping (bytes) */
+};
+#define VFIO_IOMMU_SPAPR_REGISTER_MEMORY	_IO(VFIO_TYPE, VFIO_BASE + 17)
+
+/**
+ * VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY - _IOW(VFIO_TYPE, VFIO_BASE + 18, struct vfio_iommu_spapr_register_memory)
+ *
+ * Unregisters user space memory registered with
+ * VFIO_IOMMU_SPAPR_REGISTER_MEMORY.
+ * Uses vfio_iommu_spapr_register_memory for parameters.
+ */
+#define VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY	_IO(VFIO_TYPE, VFIO_BASE + 18)
+
 /* ***************************************************************** */
 
 #endif /* _UAPIVFIO_H */
-- 
cgit 


From e633bc86a922468a82300eef5b9802e17be5e23d Mon Sep 17 00:00:00 2001
From: Alexey Kardashevskiy <aik@ozlabs.ru>
Date: Fri, 5 Jun 2015 16:35:26 +1000
Subject: vfio: powerpc/spapr: Support Dynamic DMA windows

This adds create/remove window ioctls to create and remove DMA windows.
sPAPR defines a Dynamic DMA windows capability which allows
para-virtualized guests to create additional DMA windows on a PCI bus.
The existing linux kernels use this new window to map the entire guest
memory and switch to the direct DMA operations saving time on map/unmap
requests which would normally happen in a big amounts.

This adds 2 ioctl handlers - VFIO_IOMMU_SPAPR_TCE_CREATE and
VFIO_IOMMU_SPAPR_TCE_REMOVE - to create and remove windows.
Up to 2 windows are supported now by the hardware and by this driver.

This changes VFIO_IOMMU_SPAPR_TCE_GET_INFO handler to return additional
information such as a number of supported windows and maximum number
levels of TCE tables.

DDW is added as a capability, not as a SPAPR TCE IOMMU v2 unique feature
as we still want to support v2 on platforms which cannot do DDW for
the sake of TCE acceleration in KVM (coming soon).

Signed-off-by: Alexey Kardashevskiy <aik@ozlabs.ru>
[aw: for the vfio related changes]
Acked-by: Alex Williamson <alex.williamson@redhat.com>
Reviewed-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 Documentation/vfio.txt              |  19 ++++
 arch/powerpc/include/asm/iommu.h    |   2 +-
 drivers/vfio/vfio_iommu_spapr_tce.c | 196 +++++++++++++++++++++++++++++++++++-
 include/uapi/linux/vfio.h           |  61 ++++++++++-
 4 files changed, 273 insertions(+), 5 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt
index dcc37e109c68..1dd3fddfd3a1 100644
--- a/Documentation/vfio.txt
+++ b/Documentation/vfio.txt
@@ -464,6 +464,25 @@ address is from pre-registered range.
 
 This separation helps in optimizing DMA for guests.
 
+6) sPAPR specification allows guests to have an additional DMA window(s) on
+a PCI bus with a variable page size. Two ioctls have been added to support
+this: VFIO_IOMMU_SPAPR_TCE_CREATE and VFIO_IOMMU_SPAPR_TCE_REMOVE.
+The platform has to support the functionality or error will be returned to
+the userspace. The existing hardware supports up to 2 DMA windows, one is
+2GB long, uses 4K pages and called "default 32bit window"; the other can
+be as big as entire RAM, use different page size, it is optional - guests
+create those in run-time if the guest driver supports 64bit DMA.
+
+VFIO_IOMMU_SPAPR_TCE_CREATE receives a page shift, a DMA window size and
+a number of TCE table levels (if a TCE table is going to be big enough and
+the kernel may not be able to allocate enough of physically contiguous memory).
+It creates a new window in the available slot and returns the bus address where
+the new window starts. Due to hardware limitation, the user space cannot choose
+the location of DMA windows.
+
+VFIO_IOMMU_SPAPR_TCE_REMOVE receives the bus start address of the window
+and removes it.
+
 -------------------------------------------------------------------------------
 
 [1] VFIO was originally an acronym for "Virtual Function I/O" in its
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index f9957eb4c659..ca18cff90900 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -149,7 +149,7 @@ extern void iommu_free_table(struct iommu_table *tbl, const char *node_name);
  */
 extern struct iommu_table *iommu_init_table(struct iommu_table * tbl,
 					    int nid);
-#define IOMMU_TABLE_GROUP_MAX_TABLES	1
+#define IOMMU_TABLE_GROUP_MAX_TABLES	2
 
 struct iommu_table_group;
 
diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c
index 91a32239bd0a..0582b72ef377 100644
--- a/drivers/vfio/vfio_iommu_spapr_tce.c
+++ b/drivers/vfio/vfio_iommu_spapr_tce.c
@@ -211,6 +211,18 @@ static long tce_iommu_find_table(struct tce_container *container,
 	return -1;
 }
 
+static int tce_iommu_find_free_table(struct tce_container *container)
+{
+	int i;
+
+	for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+		if (!container->tables[i])
+			return i;
+	}
+
+	return -ENOSPC;
+}
+
 static int tce_iommu_enable(struct tce_container *container)
 {
 	int ret = 0;
@@ -593,11 +605,115 @@ static void tce_iommu_free_table(struct iommu_table *tbl)
 	decrement_locked_vm(pages);
 }
 
+static long tce_iommu_create_window(struct tce_container *container,
+		__u32 page_shift, __u64 window_size, __u32 levels,
+		__u64 *start_addr)
+{
+	struct tce_iommu_group *tcegrp;
+	struct iommu_table_group *table_group;
+	struct iommu_table *tbl = NULL;
+	long ret, num;
+
+	num = tce_iommu_find_free_table(container);
+	if (num < 0)
+		return num;
+
+	/* Get the first group for ops::create_table */
+	tcegrp = list_first_entry(&container->group_list,
+			struct tce_iommu_group, next);
+	table_group = iommu_group_get_iommudata(tcegrp->grp);
+	if (!table_group)
+		return -EFAULT;
+
+	if (!(table_group->pgsizes & (1ULL << page_shift)))
+		return -EINVAL;
+
+	if (!table_group->ops->set_window || !table_group->ops->unset_window ||
+			!table_group->ops->get_table_size ||
+			!table_group->ops->create_table)
+		return -EPERM;
+
+	/* Create TCE table */
+	ret = tce_iommu_create_table(container, table_group, num,
+			page_shift, window_size, levels, &tbl);
+	if (ret)
+		return ret;
+
+	BUG_ON(!tbl->it_ops->free);
+
+	/*
+	 * Program the table to every group.
+	 * Groups have been tested for compatibility at the attach time.
+	 */
+	list_for_each_entry(tcegrp, &container->group_list, next) {
+		table_group = iommu_group_get_iommudata(tcegrp->grp);
+
+		ret = table_group->ops->set_window(table_group, num, tbl);
+		if (ret)
+			goto unset_exit;
+	}
+
+	container->tables[num] = tbl;
+
+	/* Return start address assigned by platform in create_table() */
+	*start_addr = tbl->it_offset << tbl->it_page_shift;
+
+	return 0;
+
+unset_exit:
+	list_for_each_entry(tcegrp, &container->group_list, next) {
+		table_group = iommu_group_get_iommudata(tcegrp->grp);
+		table_group->ops->unset_window(table_group, num);
+	}
+	tce_iommu_free_table(tbl);
+
+	return ret;
+}
+
+static long tce_iommu_remove_window(struct tce_container *container,
+		__u64 start_addr)
+{
+	struct iommu_table_group *table_group = NULL;
+	struct iommu_table *tbl;
+	struct tce_iommu_group *tcegrp;
+	int num;
+
+	num = tce_iommu_find_table(container, start_addr, &tbl);
+	if (num < 0)
+		return -EINVAL;
+
+	BUG_ON(!tbl->it_size);
+
+	/* Detach groups from IOMMUs */
+	list_for_each_entry(tcegrp, &container->group_list, next) {
+		table_group = iommu_group_get_iommudata(tcegrp->grp);
+
+		/*
+		 * SPAPR TCE IOMMU exposes the default DMA window to
+		 * the guest via dma32_window_start/size of
+		 * VFIO_IOMMU_SPAPR_TCE_GET_INFO. Some platforms allow
+		 * the userspace to remove this window, some do not so
+		 * here we check for the platform capability.
+		 */
+		if (!table_group->ops || !table_group->ops->unset_window)
+			return -EPERM;
+
+		table_group->ops->unset_window(table_group, num);
+	}
+
+	/* Free table */
+	tce_iommu_clear(container, tbl, tbl->it_offset, tbl->it_size);
+	tce_iommu_free_table(tbl);
+	container->tables[num] = NULL;
+
+	return 0;
+}
+
 static long tce_iommu_ioctl(void *iommu_data,
 				 unsigned int cmd, unsigned long arg)
 {
 	struct tce_container *container = iommu_data;
-	unsigned long minsz;
+	unsigned long minsz, ddwsz;
 	long ret;
 
 	switch (cmd) {
@@ -641,6 +757,21 @@ static long tce_iommu_ioctl(void *iommu_data,
 		info.dma32_window_start = table_group->tce32_start;
 		info.dma32_window_size = table_group->tce32_size;
 		info.flags = 0;
+		memset(&info.ddw, 0, sizeof(info.ddw));
+
+		if (table_group->max_dynamic_windows_supported &&
+				container->v2) {
+			info.flags |= VFIO_IOMMU_SPAPR_INFO_DDW;
+			info.ddw.pgsizes = table_group->pgsizes;
+			info.ddw.max_dynamic_windows_supported =
+				table_group->max_dynamic_windows_supported;
+			info.ddw.levels = table_group->max_levels;
+		}
+
+		ddwsz = offsetofend(struct vfio_iommu_spapr_tce_info, ddw);
+
+		if (info.argsz >= ddwsz)
+			minsz = ddwsz;
 
 		if (copy_to_user((void __user *)arg, &info, minsz))
 			return -EFAULT;
@@ -834,6 +965,69 @@ static long tce_iommu_ioctl(void *iommu_data,
 		return ret;
 	}
 
+	case VFIO_IOMMU_SPAPR_TCE_CREATE: {
+		struct vfio_iommu_spapr_tce_create create;
+
+		if (!container->v2)
+			break;
+
+		if (!tce_groups_attached(container))
+			return -ENXIO;
+
+		minsz = offsetofend(struct vfio_iommu_spapr_tce_create,
+				start_addr);
+
+		if (copy_from_user(&create, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (create.argsz < minsz)
+			return -EINVAL;
+
+		if (create.flags)
+			return -EINVAL;
+
+		mutex_lock(&container->lock);
+
+		ret = tce_iommu_create_window(container, create.page_shift,
+				create.window_size, create.levels,
+				&create.start_addr);
+
+		mutex_unlock(&container->lock);
+
+		if (!ret && copy_to_user((void __user *)arg, &create, minsz))
+			ret = -EFAULT;
+
+		return ret;
+	}
+	case VFIO_IOMMU_SPAPR_TCE_REMOVE: {
+		struct vfio_iommu_spapr_tce_remove remove;
+
+		if (!container->v2)
+			break;
+
+		if (!tce_groups_attached(container))
+			return -ENXIO;
+
+		minsz = offsetofend(struct vfio_iommu_spapr_tce_remove,
+				start_addr);
+
+		if (copy_from_user(&remove, (void __user *)arg, minsz))
+			return -EFAULT;
+
+		if (remove.argsz < minsz)
+			return -EINVAL;
+
+		if (remove.flags)
+			return -EINVAL;
+
+		mutex_lock(&container->lock);
+
+		ret = tce_iommu_remove_window(container, remove.start_addr);
+
+		mutex_unlock(&container->lock);
+
+		return ret;
+	}
 	}
 
 	return -ENOTTY;
diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h
index fa84391a0d00..9fd7b5d8df2f 100644
--- a/include/uapi/linux/vfio.h
+++ b/include/uapi/linux/vfio.h
@@ -444,6 +444,23 @@ struct vfio_iommu_type1_dma_unmap {
 
 /* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */
 
+/*
+ * The SPAPR TCE DDW info struct provides the information about
+ * the details of Dynamic DMA window capability.
+ *
+ * @pgsizes contains a page size bitmask, 4K/64K/16M are supported.
+ * @max_dynamic_windows_supported tells the maximum number of windows
+ * which the platform can create.
+ * @levels tells the maximum number of levels in multi-level IOMMU tables;
+ * this allows splitting a table into smaller chunks which reduces
+ * the amount of physically contiguous memory required for the table.
+ */
+struct vfio_iommu_spapr_tce_ddw_info {
+	__u64 pgsizes;			/* Bitmap of supported page sizes */
+	__u32 max_dynamic_windows_supported;
+	__u32 levels;
+};
+
 /*
  * The SPAPR TCE info struct provides the information about the PCI bus
  * address ranges available for DMA, these values are programmed into
@@ -454,14 +471,17 @@ struct vfio_iommu_type1_dma_unmap {
  * addresses too so the window works as a filter rather than an offset
  * for IOVA addresses.
  *
- * A flag will need to be added if other page sizes are supported,
- * so as defined here, it is always 4k.
+ * Flags supported:
+ * - VFIO_IOMMU_SPAPR_INFO_DDW: informs the userspace that dynamic DMA windows
+ *   (DDW) support is present. @ddw is only supported when DDW is present.
  */
 struct vfio_iommu_spapr_tce_info {
 	__u32 argsz;
-	__u32 flags;			/* reserved for future use */
+	__u32 flags;
+#define VFIO_IOMMU_SPAPR_INFO_DDW	(1 << 0)	/* DDW supported */
 	__u32 dma32_window_start;	/* 32 bit window start (bytes) */
 	__u32 dma32_window_size;	/* 32 bit window size (bytes) */
+	struct vfio_iommu_spapr_tce_ddw_info ddw;
 };
 
 #define VFIO_IOMMU_SPAPR_TCE_GET_INFO	_IO(VFIO_TYPE, VFIO_BASE + 12)
@@ -534,6 +554,41 @@ struct vfio_iommu_spapr_register_memory {
  */
 #define VFIO_IOMMU_SPAPR_UNREGISTER_MEMORY	_IO(VFIO_TYPE, VFIO_BASE + 18)
 
+/**
+ * VFIO_IOMMU_SPAPR_TCE_CREATE - _IOWR(VFIO_TYPE, VFIO_BASE + 19, struct vfio_iommu_spapr_tce_create)
+ *
+ * Creates an additional TCE table and programs it (sets a new DMA window)
+ * to every IOMMU group in the container. It receives page shift, window
+ * size and number of levels in the TCE table being created.
+ *
+ * It allocates and returns an offset on a PCI bus of the new DMA window.
+ */
+struct vfio_iommu_spapr_tce_create {
+	__u32 argsz;
+	__u32 flags;
+	/* in */
+	__u32 page_shift;
+	__u64 window_size;
+	__u32 levels;
+	/* out */
+	__u64 start_addr;
+};
+#define VFIO_IOMMU_SPAPR_TCE_CREATE	_IO(VFIO_TYPE, VFIO_BASE + 19)
+
+/**
+ * VFIO_IOMMU_SPAPR_TCE_REMOVE - _IOW(VFIO_TYPE, VFIO_BASE + 20, struct vfio_iommu_spapr_tce_remove)
+ *
+ * Unprograms a TCE table from all groups in the container and destroys it.
+ * It receives a PCI bus offset as a window id.
+ */
+struct vfio_iommu_spapr_tce_remove {
+	__u32 argsz;
+	__u32 flags;
+	/* in */
+	__u64 start_addr;
+};
+#define VFIO_IOMMU_SPAPR_TCE_REMOVE	_IO(VFIO_TYPE, VFIO_BASE + 20)
+
 /* ***************************************************************** */
 
 #endif /* _UAPIVFIO_H */
-- 
cgit 


From a4244b0cf58d56c171874e85228ba5deffeb017a Mon Sep 17 00:00:00 2001
From: Hadar Hen Zion <hadarh@mellanox.com>
Date: Thu, 11 Jun 2015 10:28:16 +0300
Subject: net/ethtool: Add current supported tunable options

Add strings array of the current supported tunable options.

Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Reviewed-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h |  6 ++++++
 net/core/ethtool.c           | 12 ++++++++++++
 2 files changed, 18 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 0594933cdf55..cd67aec187d9 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -215,6 +215,11 @@ enum tunable_id {
 	ETHTOOL_ID_UNSPEC,
 	ETHTOOL_RX_COPYBREAK,
 	ETHTOOL_TX_COPYBREAK,
+	/*
+	 * Add your fresh new tubale attribute above and remember to update
+	 * tunable_strings[] in net/core/ethtool.c
+	 */
+	__ETHTOOL_TUNABLE_COUNT,
 };
 
 enum tunable_type_id {
@@ -545,6 +550,7 @@ enum ethtool_stringset {
 	ETH_SS_NTUPLE_FILTERS,
 	ETH_SS_FEATURES,
 	ETH_SS_RSS_HASH_FUNCS,
+	ETH_SS_TUNABLES,
 };
 
 /**
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index eb0c3ace7458..b495ab1797fa 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -106,6 +106,13 @@ rss_hash_func_strings[ETH_RSS_HASH_FUNCS_COUNT][ETH_GSTRING_LEN] = {
 	[ETH_RSS_HASH_XOR_BIT] =	"xor",
 };
 
+static const char
+tunable_strings[__ETHTOOL_TUNABLE_COUNT][ETH_GSTRING_LEN] = {
+	[ETHTOOL_ID_UNSPEC]     = "Unspec",
+	[ETHTOOL_RX_COPYBREAK]	= "rx-copybreak",
+	[ETHTOOL_TX_COPYBREAK]	= "tx-copybreak",
+};
+
 static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_gfeatures cmd = {
@@ -194,6 +201,9 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset)
 	if (sset == ETH_SS_RSS_HASH_FUNCS)
 		return ARRAY_SIZE(rss_hash_func_strings);
 
+	if (sset == ETH_SS_TUNABLES)
+		return ARRAY_SIZE(tunable_strings);
+
 	if (ops->get_sset_count && ops->get_strings)
 		return ops->get_sset_count(dev, sset);
 	else
@@ -211,6 +221,8 @@ static void __ethtool_get_strings(struct net_device *dev,
 	else if (stringset == ETH_SS_RSS_HASH_FUNCS)
 		memcpy(data, rss_hash_func_strings,
 		       sizeof(rss_hash_func_strings));
+	else if (stringset == ETH_SS_TUNABLES)
+		memcpy(data, tunable_strings, sizeof(tunable_strings));
 	else
 		/* ops->get_strings is valid because checked earlier */
 		ops->get_strings(dev, stringset, data);
-- 
cgit 


From 9961127d4bce6325e9a0b0fb105e0c85a6c62cb7 Mon Sep 17 00:00:00 2001
From: Vincent Cuissard <cuissard@marvell.com>
Date: Thu, 11 Jun 2015 11:25:47 +0200
Subject: NFC: nci: add generic uart support

Some NFC controller supports UART as host interface.
As with SPI, a lot of code can be shared between vendor
drivers. This patch add the generic support of UART and
provides some extension API for vendor specific needs.

This code is strongly inspired by the Bluetooth HCI ldisc
implementation. NCI UART vendor drivers will have to register
themselves to this layer via nci_uart_register.

Underlying tty will have to be configured from user land
thanks to an ioctl.

Signed-off-by: Vincent Cuissard <cuissard@marvell.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/nci.h      |   1 +
 include/net/nfc/nci_core.h |  47 +++++
 include/uapi/linux/tty.h   |   1 +
 net/nfc/nci/Kconfig        |   7 +
 net/nfc/nci/Makefile       |   3 +
 net/nfc/nci/uart.c         | 495 +++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 554 insertions(+)
 create mode 100644 net/nfc/nci/uart.c

(limited to 'include/uapi/linux')

diff --git a/include/net/nfc/nci.h b/include/net/nfc/nci.h
index a2f2f3d3196d..75d2e1880059 100644
--- a/include/net/nfc/nci.h
+++ b/include/net/nfc/nci.h
@@ -35,6 +35,7 @@
 #define NCI_MAX_NUM_RF_CONFIGS					10
 #define NCI_MAX_NUM_CONN					10
 #define NCI_MAX_PARAM_LEN					251
+#define NCI_MAX_PACKET_SIZE					258
 
 /* NCI Status Codes */
 #define NCI_STATUS_OK						0x00
diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index 9d77ed556b78..01fc8c531115 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -31,6 +31,7 @@
 
 #include <linux/interrupt.h>
 #include <linux/skbuff.h>
+#include <linux/tty.h>
 
 #include <net/nfc/nfc.h>
 #include <net/nfc/nci.h>
@@ -391,4 +392,50 @@ int nci_spi_send(struct nci_spi *nspi,
 		 struct sk_buff *skb);
 struct sk_buff *nci_spi_read(struct nci_spi *nspi);
 
+/* ----- NCI UART ---- */
+
+/* Ioctl */
+#define NCIUARTSETDRIVER	_IOW('U', 0, char *)
+
+enum nci_uart_driver {
+	NCI_UART_DRIVER_MARVELL = 0,
+	NCI_UART_DRIVER_MAX
+};
+
+struct nci_uart;
+
+struct nci_uart_ops {
+	int (*open)(struct nci_uart *nci_uart);
+	void (*close)(struct nci_uart *nci_uart);
+	int (*recv)(struct nci_uart *nci_uart, struct sk_buff *skb);
+	int (*recv_buf)(struct nci_uart *nci_uart, const u8 *data, char *flags,
+			int count);
+	int (*send)(struct nci_uart *nci_uart, struct sk_buff *skb);
+	void (*tx_start)(struct nci_uart *nci_uart);
+	void (*tx_done)(struct nci_uart *nci_uart);
+};
+
+struct nci_uart {
+	struct module		*owner;
+	struct nci_uart_ops	ops;
+	const char		*name;
+	enum nci_uart_driver	driver;
+
+	/* Dynamic data */
+	struct nci_dev		*ndev;
+	spinlock_t		rx_lock;
+	struct work_struct	write_work;
+	struct tty_struct	*tty;
+	unsigned long		tx_state;
+	struct sk_buff_head	tx_q;
+	struct sk_buff		*tx_skb;
+	struct sk_buff		*rx_skb;
+	int			rx_packet_len;
+	void			*drv_data;
+};
+
+int nci_uart_register(struct nci_uart *nu);
+void nci_uart_unregister(struct nci_uart *nu);
+void nci_uart_set_config(struct nci_uart *nu, int baudrate, int flow_ctrl);
+
 #endif /* __NCI_CORE_H */
diff --git a/include/uapi/linux/tty.h b/include/uapi/linux/tty.h
index dac199a2dba5..01c4410352ff 100644
--- a/include/uapi/linux/tty.h
+++ b/include/uapi/linux/tty.h
@@ -34,5 +34,6 @@
 #define N_TI_WL		22	/* for TI's WL BT, FM, GPS combo chips */
 #define N_TRACESINK	23	/* Trace data routing for MIPI P1149.7 */
 #define N_TRACEROUTER	24	/* Trace data routing for MIPI P1149.7 */
+#define N_NCI		25	/* NFC NCI UART */
 
 #endif /* _UAPI_LINUX_TTY_H */
diff --git a/net/nfc/nci/Kconfig b/net/nfc/nci/Kconfig
index a4f1e42e3481..901c1ddba841 100644
--- a/net/nfc/nci/Kconfig
+++ b/net/nfc/nci/Kconfig
@@ -19,3 +19,10 @@ config NFC_NCI_SPI
 	  an NFC Controller (NFCC) and a Device Host (DH).
 
 	  Say yes if you use an NCI driver that requires SPI link layer.
+
+config NFC_NCI_UART
+	depends on NFC_NCI && TTY
+	tristate "NCI over UART protocol support"
+	default n
+	help
+	  Say yes if you use an NCI driver that requires UART link layer.
diff --git a/net/nfc/nci/Makefile b/net/nfc/nci/Makefile
index 7ed8949266cc..b4b85b82e988 100644
--- a/net/nfc/nci/Makefile
+++ b/net/nfc/nci/Makefile
@@ -7,3 +7,6 @@ obj-$(CONFIG_NFC_NCI) += nci.o
 nci-objs := core.o data.o lib.o ntf.o rsp.o hci.o
 
 nci-$(CONFIG_NFC_NCI_SPI) += spi.o
+
+nci_uart-y += uart.o
+obj-$(CONFIG_NFC_NCI_UART) += nci_uart.o
diff --git a/net/nfc/nci/uart.c b/net/nfc/nci/uart.c
new file mode 100644
index 000000000000..70c279543074
--- /dev/null
+++ b/net/nfc/nci/uart.c
@@ -0,0 +1,495 @@
+/*
+ * Copyright (C) 2015, Marvell International Ltd.
+ *
+ * This software file (the "File") is distributed by Marvell International
+ * Ltd. under the terms of the GNU General Public License Version 2, June 1991
+ * (the "License").  You may use, redistribute and/or modify this File in
+ * accordance with the terms and conditions of the License, a copy of which
+ * is available on the worldwide web at
+ * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
+ *
+ * THE FILE IS DISTRIBUTED AS-IS, WITHOUT WARRANTY OF ANY KIND, AND THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
+ * ARE EXPRESSLY DISCLAIMED.  The License provides additional details about
+ * this warranty disclaimer.
+
+ */
+
+/* Inspired (hugely) by HCI LDISC implementation in Bluetooth.
+ *
+ *  Copyright (C) 2000-2001  Qualcomm Incorporated
+ *  Copyright (C) 2002-2003  Maxim Krasnyansky <maxk@qualcomm.com>
+ *  Copyright (C) 2004-2005  Marcel Holtmann <marcel@holtmann.org>
+ */
+
+#include <linux/module.h>
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/fcntl.h>
+#include <linux/interrupt.h>
+#include <linux/ptrace.h>
+#include <linux/poll.h>
+
+#include <linux/slab.h>
+#include <linux/tty.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/signal.h>
+#include <linux/ioctl.h>
+#include <linux/skbuff.h>
+
+#include <net/nfc/nci.h>
+#include <net/nfc/nci_core.h>
+
+/* TX states  */
+#define NCI_UART_SENDING	1
+#define NCI_UART_TX_WAKEUP	2
+
+static struct nci_uart *nci_uart_drivers[NCI_UART_DRIVER_MAX];
+
+static inline struct sk_buff *nci_uart_dequeue(struct nci_uart *nu)
+{
+	struct sk_buff *skb = nu->tx_skb;
+
+	if (!skb)
+		skb = skb_dequeue(&nu->tx_q);
+	else
+		nu->tx_skb = NULL;
+
+	return skb;
+}
+
+static inline int nci_uart_queue_empty(struct nci_uart *nu)
+{
+	if (nu->tx_skb)
+		return 0;
+
+	return skb_queue_empty(&nu->tx_q);
+}
+
+static int nci_uart_tx_wakeup(struct nci_uart *nu)
+{
+	if (test_and_set_bit(NCI_UART_SENDING, &nu->tx_state)) {
+		set_bit(NCI_UART_TX_WAKEUP, &nu->tx_state);
+		return 0;
+	}
+
+	schedule_work(&nu->write_work);
+
+	return 0;
+}
+
+static void nci_uart_write_work(struct work_struct *work)
+{
+	struct nci_uart *nu = container_of(work, struct nci_uart, write_work);
+	struct tty_struct *tty = nu->tty;
+	struct sk_buff *skb;
+
+restart:
+	clear_bit(NCI_UART_TX_WAKEUP, &nu->tx_state);
+
+	if (nu->ops.tx_start)
+		nu->ops.tx_start(nu);
+
+	while ((skb = nci_uart_dequeue(nu))) {
+		int len;
+
+		set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
+		len = tty->ops->write(tty, skb->data, skb->len);
+		skb_pull(skb, len);
+		if (skb->len) {
+			nu->tx_skb = skb;
+			break;
+		}
+		kfree_skb(skb);
+	}
+
+	if (test_bit(NCI_UART_TX_WAKEUP, &nu->tx_state))
+		goto restart;
+
+	if (nu->ops.tx_done && nci_uart_queue_empty(nu))
+		nu->ops.tx_done(nu);
+
+	clear_bit(NCI_UART_SENDING, &nu->tx_state);
+}
+
+static int nci_uart_set_driver(struct tty_struct *tty, unsigned int driver)
+{
+	struct nci_uart *nu = NULL;
+	int ret;
+
+	if (driver >= NCI_UART_DRIVER_MAX)
+		return -EINVAL;
+
+	if (!nci_uart_drivers[driver])
+		return -ENOENT;
+
+	nu = kzalloc(sizeof(*nu), GFP_KERNEL);
+	if (!nu)
+		return -ENOMEM;
+
+	memcpy(nu, nci_uart_drivers[driver], sizeof(struct nci_uart));
+	nu->tty = tty;
+	tty->disc_data = nu;
+	skb_queue_head_init(&nu->tx_q);
+	INIT_WORK(&nu->write_work, nci_uart_write_work);
+	spin_lock_init(&nu->rx_lock);
+
+	ret = nu->ops.open(nu);
+	if (ret) {
+		tty->disc_data = NULL;
+		kfree(nu);
+	} else if (!try_module_get(nu->owner)) {
+		nu->ops.close(nu);
+		tty->disc_data = NULL;
+		kfree(nu);
+		return -ENOENT;
+	}
+	return ret;
+}
+
+/* ------ LDISC part ------ */
+
+/* nci_uart_tty_open
+ *
+ *     Called when line discipline changed to NCI_UART.
+ *
+ * Arguments:
+ *     tty    pointer to tty info structure
+ * Return Value:
+ *     0 if success, otherwise error code
+ */
+static int nci_uart_tty_open(struct tty_struct *tty)
+{
+	/* Error if the tty has no write op instead of leaving an exploitable
+	 * hole
+	 */
+	if (!tty->ops->write)
+		return -EOPNOTSUPP;
+
+	tty->disc_data = NULL;
+	tty->receive_room = 65536;
+
+	/* Flush any pending characters in the driver and line discipline. */
+
+	/* FIXME: why is this needed. Note don't use ldisc_ref here as the
+	 * open path is before the ldisc is referencable.
+	 */
+
+	if (tty->ldisc->ops->flush_buffer)
+		tty->ldisc->ops->flush_buffer(tty);
+	tty_driver_flush_buffer(tty);
+
+	return 0;
+}
+
+/* nci_uart_tty_close()
+ *
+ *    Called when the line discipline is changed to something
+ *    else, the tty is closed, or the tty detects a hangup.
+ */
+static void nci_uart_tty_close(struct tty_struct *tty)
+{
+	struct nci_uart *nu = (void *)tty->disc_data;
+
+	/* Detach from the tty */
+	tty->disc_data = NULL;
+
+	if (!nu)
+		return;
+
+	if (nu->tx_skb)
+		kfree_skb(nu->tx_skb);
+	if (nu->rx_skb)
+		kfree_skb(nu->rx_skb);
+
+	skb_queue_purge(&nu->tx_q);
+
+	nu->ops.close(nu);
+	nu->tty = NULL;
+	module_put(nu->owner);
+
+	cancel_work_sync(&nu->write_work);
+
+	kfree(nu);
+}
+
+/* nci_uart_tty_wakeup()
+ *
+ *    Callback for transmit wakeup. Called when low level
+ *    device driver can accept more send data.
+ *
+ * Arguments:        tty    pointer to associated tty instance data
+ * Return Value:    None
+ */
+static void nci_uart_tty_wakeup(struct tty_struct *tty)
+{
+	struct nci_uart *nu = (void *)tty->disc_data;
+
+	if (!nu)
+		return;
+
+	clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
+
+	if (tty != nu->tty)
+		return;
+
+	nci_uart_tx_wakeup(nu);
+}
+
+/* nci_uart_tty_receive()
+ *
+ *     Called by tty low level driver when receive data is
+ *     available.
+ *
+ * Arguments:  tty          pointer to tty isntance data
+ *             data         pointer to received data
+ *             flags        pointer to flags for data
+ *             count        count of received data in bytes
+ *
+ * Return Value:    None
+ */
+static void nci_uart_tty_receive(struct tty_struct *tty, const u8 *data,
+				 char *flags, int count)
+{
+	struct nci_uart *nu = (void *)tty->disc_data;
+
+	if (!nu || tty != nu->tty)
+		return;
+
+	spin_lock(&nu->rx_lock);
+	nu->ops.recv_buf(nu, (void *)data, flags, count);
+	spin_unlock(&nu->rx_lock);
+
+	tty_unthrottle(tty);
+}
+
+/* nci_uart_tty_ioctl()
+ *
+ *    Process IOCTL system call for the tty device.
+ *
+ * Arguments:
+ *
+ *    tty        pointer to tty instance data
+ *    file       pointer to open file object for device
+ *    cmd        IOCTL command code
+ *    arg        argument for IOCTL call (cmd dependent)
+ *
+ * Return Value:    Command dependent
+ */
+static int nci_uart_tty_ioctl(struct tty_struct *tty, struct file *file,
+			      unsigned int cmd, unsigned long arg)
+{
+	struct nci_uart *nu = (void *)tty->disc_data;
+	int err = 0;
+
+	switch (cmd) {
+	case NCIUARTSETDRIVER:
+		if (!nu)
+			return nci_uart_set_driver(tty, (unsigned int)arg);
+		else
+			return -EBUSY;
+		break;
+	default:
+		err = n_tty_ioctl_helper(tty, file, cmd, arg);
+		break;
+	}
+
+	return err;
+}
+
+/* We don't provide read/write/poll interface for user space. */
+static ssize_t nci_uart_tty_read(struct tty_struct *tty, struct file *file,
+				 unsigned char __user *buf, size_t nr)
+{
+	return 0;
+}
+
+static ssize_t nci_uart_tty_write(struct tty_struct *tty, struct file *file,
+				  const unsigned char *data, size_t count)
+{
+	return 0;
+}
+
+static unsigned int nci_uart_tty_poll(struct tty_struct *tty,
+				      struct file *filp, poll_table *wait)
+{
+	return 0;
+}
+
+static int nci_uart_send(struct nci_uart *nu, struct sk_buff *skb)
+{
+	/* Queue TX packet */
+	skb_queue_tail(&nu->tx_q, skb);
+
+	/* Try to start TX (if possible) */
+	nci_uart_tx_wakeup(nu);
+
+	return 0;
+}
+
+/* -- Default recv_buf handler --
+ *
+ * This handler supposes that NCI frames are sent over UART link without any
+ * framing. It reads NCI header, retrieve the packet size and once all packet
+ * bytes are received it passes it to nci_uart driver for processing.
+ */
+static int nci_uart_default_recv_buf(struct nci_uart *nu, const u8 *data,
+				     char *flags, int count)
+{
+	int chunk_len;
+
+	if (!nu->ndev) {
+		nfc_err(nu->tty->dev,
+			"receive data from tty but no NCI dev is attached yet, drop buffer\n");
+		return 0;
+	}
+
+	/* Decode all incoming data in packets
+	 * and enqueue then for processing.
+	 */
+	while (count > 0) {
+		/* If this is the first data of a packet, allocate a buffer */
+		if (!nu->rx_skb) {
+			nu->rx_packet_len = -1;
+			nu->rx_skb = nci_skb_alloc(nu->ndev,
+						   NCI_MAX_PACKET_SIZE,
+						   GFP_KERNEL);
+			if (!nu->rx_skb)
+				return -ENOMEM;
+		}
+
+		/* Eat byte after byte till full packet header is received */
+		if (nu->rx_skb->len < NCI_CTRL_HDR_SIZE) {
+			*skb_put(nu->rx_skb, 1) = *data++;
+			--count;
+			continue;
+		}
+
+		/* Header was received but packet len was not read */
+		if (nu->rx_packet_len < 0)
+			nu->rx_packet_len = NCI_CTRL_HDR_SIZE +
+				nci_plen(nu->rx_skb->data);
+
+		/* Compute how many bytes are missing and how many bytes can
+		 * be consumed.
+		 */
+		chunk_len = nu->rx_packet_len - nu->rx_skb->len;
+		if (count < chunk_len)
+			chunk_len = count;
+		memcpy(skb_put(nu->rx_skb, chunk_len), data, chunk_len);
+		data += chunk_len;
+		count -= chunk_len;
+
+		/* Chcek if packet is fully received */
+		if (nu->rx_packet_len == nu->rx_skb->len) {
+			/* Pass RX packet to driver */
+			if (nu->ops.recv(nu, nu->rx_skb) != 0)
+				nfc_err(nu->tty->dev, "corrupted RX packet\n");
+			/* Next packet will be a new one */
+			nu->rx_skb = NULL;
+		}
+	}
+
+	return 0;
+}
+
+/* -- Default recv handler -- */
+static int nci_uart_default_recv(struct nci_uart *nu, struct sk_buff *skb)
+{
+	return nci_recv_frame(nu->ndev, skb);
+}
+
+int nci_uart_register(struct nci_uart *nu)
+{
+	if (!nu || !nu->ops.open ||
+	    !nu->ops.recv || !nu->ops.close)
+		return -EINVAL;
+
+	/* Set the send callback */
+	nu->ops.send = nci_uart_send;
+
+	/* Install default handlers if not overridden */
+	if (!nu->ops.recv_buf)
+		nu->ops.recv_buf = nci_uart_default_recv_buf;
+	if (!nu->ops.recv)
+		nu->ops.recv = nci_uart_default_recv;
+
+	/* Add this driver in the driver list */
+	if (!nci_uart_drivers[nu->driver]) {
+		pr_err("driver %d is already registered\n", nu->driver);
+		return -EBUSY;
+	}
+	nci_uart_drivers[nu->driver] = nu;
+
+	pr_info("NCI uart driver '%s [%d]' registered\n", nu->name, nu->driver);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nci_uart_register);
+
+void nci_uart_unregister(struct nci_uart *nu)
+{
+	pr_info("NCI uart driver '%s [%d]' unregistered\n", nu->name,
+		nu->driver);
+
+	/* Remove this driver from the driver list */
+	nci_uart_drivers[nu->driver] = NULL;
+}
+EXPORT_SYMBOL_GPL(nci_uart_unregister);
+
+void nci_uart_set_config(struct nci_uart *nu, int baudrate, int flow_ctrl)
+{
+	struct ktermios new_termios;
+
+	if (!nu->tty)
+		return;
+
+	down_read(&nu->tty->termios_rwsem);
+	new_termios = nu->tty->termios;
+	up_read(&nu->tty->termios_rwsem);
+	tty_termios_encode_baud_rate(&new_termios, baudrate, baudrate);
+
+	if (flow_ctrl)
+		new_termios.c_cflag |= CRTSCTS;
+	else
+		new_termios.c_cflag &= ~CRTSCTS;
+
+	tty_set_termios(nu->tty, &new_termios);
+}
+EXPORT_SYMBOL_GPL(nci_uart_set_config);
+
+static struct tty_ldisc_ops nci_uart_ldisc = {
+	.magic		= TTY_LDISC_MAGIC,
+	.owner		= THIS_MODULE,
+	.name		= "n_nci",
+	.open		= nci_uart_tty_open,
+	.close		= nci_uart_tty_close,
+	.read		= nci_uart_tty_read,
+	.write		= nci_uart_tty_write,
+	.poll		= nci_uart_tty_poll,
+	.receive_buf	= nci_uart_tty_receive,
+	.write_wakeup	= nci_uart_tty_wakeup,
+	.ioctl		= nci_uart_tty_ioctl,
+};
+
+static int __init nci_uart_init(void)
+{
+	memset(nci_uart_drivers, 0, sizeof(nci_uart_drivers));
+	return tty_register_ldisc(N_NCI, &nci_uart_ldisc);
+}
+
+static void __exit nci_uart_exit(void)
+{
+	tty_unregister_ldisc(N_NCI);
+}
+
+module_init(nci_uart_init);
+module_exit(nci_uart_exit);
+
+MODULE_AUTHOR("Marvell International Ltd.");
+MODULE_DESCRIPTION("NFC NCI UART driver");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_LDISC(N_NCI);
-- 
cgit 


From ca0f6a5cd99e0c6ba4bb78dc402817f636370f26 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Sat, 13 Jun 2015 19:45:33 +0200
Subject: netfilter: ipset: Fix coding styles reported by checkpatch.pl

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h       |   5 +-
 include/uapi/linux/netfilter/ipset/ip_set.h  |   6 +-
 net/netfilter/ipset/ip_set_bitmap_gen.h      |  11 +-
 net/netfilter/ipset/ip_set_bitmap_ip.c       |  12 +-
 net/netfilter/ipset/ip_set_bitmap_ipmac.c    |  21 +--
 net/netfilter/ipset/ip_set_bitmap_port.c     |   7 +-
 net/netfilter/ipset/ip_set_core.c            | 201 +++++++++++++--------------
 net/netfilter/ipset/ip_set_getport.c         |  13 +-
 net/netfilter/ipset/ip_set_hash_gen.h        |  55 ++++----
 net/netfilter/ipset/ip_set_hash_ip.c         |   4 +-
 net/netfilter/ipset/ip_set_hash_ipmark.c     |   9 +-
 net/netfilter/ipset/ip_set_hash_ipport.c     |  14 +-
 net/netfilter/ipset/ip_set_hash_ipportip.c   |  16 ++-
 net/netfilter/ipset/ip_set_hash_ipportnet.c  |  19 ++-
 net/netfilter/ipset/ip_set_hash_mac.c        |   6 +-
 net/netfilter/ipset/ip_set_hash_net.c        |   8 +-
 net/netfilter/ipset/ip_set_hash_netiface.c   |  25 ++--
 net/netfilter/ipset/ip_set_hash_netnet.c     |  46 +++---
 net/netfilter/ipset/ip_set_hash_netport.c    |  19 ++-
 net/netfilter/ipset/ip_set_hash_netportnet.c |  54 +++----
 net/netfilter/ipset/ip_set_list_set.c        |  11 +-
 net/netfilter/ipset/pfxlen.c                 |  16 +--
 net/netfilter/xt_set.c                       |  44 +++---
 23 files changed, 327 insertions(+), 295 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 19b4969a25fe..48bb01edcf30 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -349,12 +349,11 @@ ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo)
 			      cpu_to_be64((u64)skbinfo->skbmark << 32 |
 					  skbinfo->skbmarkmask))) ||
 	       (skbinfo->skbprio &&
-	        nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
+		nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
 			      cpu_to_be32(skbinfo->skbprio))) ||
 	       (skbinfo->skbqueue &&
-	        nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
+		nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
 			     cpu_to_be16(skbinfo->skbqueue)));
-
 }
 
 static inline void
diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h
index 5ab4e60894cf..63b2e34f1b60 100644
--- a/include/uapi/linux/netfilter/ipset/ip_set.h
+++ b/include/uapi/linux/netfilter/ipset/ip_set.h
@@ -15,12 +15,12 @@
 /* The protocol version */
 #define IPSET_PROTOCOL		6
 
-/* The maximum permissible comment length we will accept over netlink */
-#define IPSET_MAX_COMMENT_SIZE	255
-
 /* The max length of strings including NUL: set and type identifiers */
 #define IPSET_MAXNAMELEN	32
 
+/* The maximum permissible comment length we will accept over netlink */
+#define IPSET_MAX_COMMENT_SIZE	255
+
 /* Message types and commands */
 enum ipset_cmd {
 	IPSET_CMD_NONE,
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 86429f369128..d05e759ed0fa 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -41,7 +41,7 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
 	struct mtype *map = set->data;
 
 	init_timer(&map->gc);
-	map->gc.data = (unsigned long) set;
+	map->gc.data = (unsigned long)set;
 	map->gc.function = gc;
 	map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
 	add_timer(&map->gc);
@@ -223,7 +223,7 @@ mtype_list(const struct ip_set *set,
 		if (!test_bit(id, map->members) ||
 		    (SET_WITH_TIMEOUT(set) &&
 #ifdef IP_SET_BITMAP_STORED_TIMEOUT
-		     mtype_is_filled((const struct mtype_elem *) x) &&
+		     mtype_is_filled((const struct mtype_elem *)x) &&
 #endif
 		     ip_set_timeout_expired(ext_timeout(x, set))))
 			continue;
@@ -240,7 +240,7 @@ mtype_list(const struct ip_set *set,
 		if (mtype_do_list(skb, map, id, set->dsize))
 			goto nla_put_failure;
 		if (ip_set_put_extensions(skb, set, x,
-		    mtype_is_filled((const struct mtype_elem *) x)))
+		    mtype_is_filled((const struct mtype_elem *)x)))
 			goto nla_put_failure;
 		ipset_nest_end(skb, nested);
 	}
@@ -266,13 +266,14 @@ out:
 static void
 mtype_gc(unsigned long ul_set)
 {
-	struct ip_set *set = (struct ip_set *) ul_set;
+	struct ip_set *set = (struct ip_set *)ul_set;
 	struct mtype *map = set->data;
 	void *x;
 	u32 id;
 
 	/* We run parallel with other readers (test element)
-	 * but adding/deleting new entries is locked out */
+	 * but adding/deleting new entries is locked out
+	 */
 	spin_lock_bh(&set->lock);
 	for (id = 0; id < map->elements; id++)
 		if (mtype_gc_test(id, map, set->dsize)) {
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index b8ce474c038d..64a564334418 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -59,7 +59,7 @@ struct bitmap_ip_adt_elem {
 static inline u32
 ip_to_id(const struct bitmap_ip *m, u32 ip)
 {
-	return ((ip & ip_set_hostmask(m->netmask)) - m->first_ip)/m->hosts;
+	return ((ip & ip_set_hostmask(m->netmask)) - m->first_ip) / m->hosts;
 }
 
 /* Common functions */
@@ -175,8 +175,9 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
 		if (!cidr || cidr > HOST_MASK)
 			return -IPSET_ERR_INVALID_CIDR;
 		ip_set_mask_from_to(ip, ip_to, cidr);
-	} else
+	} else {
 		ip_to = ip;
+	}
 
 	if (ip_to > map->last_ip)
 		return -IPSET_ERR_BITMAP_RANGE;
@@ -187,8 +188,8 @@ bitmap_ip_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (ret && !ip_set_eexist(ret, flags))
 			return ret;
-		else
-			ret = 0;
+
+		ret = 0;
 	}
 	return ret;
 }
@@ -278,8 +279,9 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
 		if (cidr >= HOST_MASK)
 			return -IPSET_ERR_INVALID_CIDR;
 		ip_set_mask_from_to(first_ip, last_ip, cidr);
-	} else
+	} else {
 		return -IPSET_ERR_PROTOCOL;
+	}
 
 	if (tb[IPSET_ATTR_NETMASK]) {
 		netmask = nla_get_u8(tb[IPSET_ATTR_NETMASK]);
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index fe00e87decc8..1430535118fb 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -90,7 +90,7 @@ bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
 		return 0;
 	elem = get_elem(map->extensions, e->id, dsize);
 	if (elem->filled == MAC_FILLED)
-		return e->ether == NULL ||
+		return !e->ether ||
 		       ether_addr_equal(e->ether, elem->ether);
 	/* Trigger kernel to fill out the ethernet address */
 	return -EAGAIN;
@@ -131,7 +131,8 @@ bitmap_ipmac_add_timeout(unsigned long *timeout,
 		/* If MAC is unset yet, we store plain timeout value
 		 * because the timer is not activated yet
 		 * and we can reuse it later when MAC is filled out,
-		 * possibly by the kernel */
+		 * possibly by the kernel
+		 */
 		if (e->ether)
 			ip_set_timeout_set(timeout, t);
 		else
@@ -155,7 +156,7 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
 				/* memcpy isn't atomic */
 				clear_bit(e->id, map->members);
 				smp_mb__after_atomic();
-				memcpy(elem->ether, e->ether, ETH_ALEN);
+				ether_addr_copy(elem->ether, e->ether);
 			}
 			return IPSET_ADD_FAILED;
 		} else if (!e->ether)
@@ -164,19 +165,18 @@ bitmap_ipmac_do_add(const struct bitmap_ipmac_adt_elem *e,
 		/* Fill the MAC address and trigger the timer activation */
 		clear_bit(e->id, map->members);
 		smp_mb__after_atomic();
-		memcpy(elem->ether, e->ether, ETH_ALEN);
+		ether_addr_copy(elem->ether, e->ether);
 		elem->filled = MAC_FILLED;
 		return IPSET_ADD_START_STORED_TIMEOUT;
 	} else if (e->ether) {
 		/* We can store MAC too */
-		memcpy(elem->ether, e->ether, ETH_ALEN);
+		ether_addr_copy(elem->ether, e->ether);
 		elem->filled = MAC_FILLED;
 		return 0;
-	} else {
-		elem->filled = MAC_UNSET;
-		/* MAC is not stored yet, don't start timer */
-		return IPSET_ADD_STORE_PLAIN_TIMEOUT;
 	}
+	elem->filled = MAC_UNSET;
+	/* MAC is not stored yet, don't start timer */
+	return IPSET_ADD_STORE_PLAIN_TIMEOUT;
 }
 
 static inline int
@@ -352,8 +352,9 @@ bitmap_ipmac_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
 		if (cidr >= HOST_MASK)
 			return -IPSET_ERR_INVALID_CIDR;
 		ip_set_mask_from_to(first_ip, last_ip, cidr);
-	} else
+	} else {
 		return -IPSET_ERR_PROTOCOL;
+	}
 
 	elements = (u64)last_ip - first_ip + 1;
 
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 2d360f951d18..5338ccd5da46 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -162,8 +162,9 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
 			if (port < map->first_port)
 				return -IPSET_ERR_BITMAP_RANGE;
 		}
-	} else
+	} else {
 		port_to = port;
+	}
 
 	if (port_to > map->last_port)
 		return -IPSET_ERR_BITMAP_RANGE;
@@ -174,8 +175,8 @@ bitmap_port_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (ret && !ip_set_eexist(ret, flags))
 			return ret;
-		else
-			ret = 0;
+
+		ret = 0;
 	}
 	return ret;
 }
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 2b21a1983a98..338b4047776f 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -35,6 +35,7 @@ struct ip_set_net {
 	bool		is_deleted;	/* deleted by ip_set_net_exit */
 	bool		is_destroyed;	/* all sets are destroyed */
 };
+
 static int ip_set_net_id __read_mostly;
 
 static inline struct ip_set_net *ip_set_pernet(struct net *net)
@@ -60,8 +61,7 @@ MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_IPSET);
 #define ip_set(inst, id)		\
 	ip_set_dereference((inst)->ip_set_list)[id]
 
-/*
- * The set types are implemented in modules and registered set types
+/* The set types are implemented in modules and registered set types
  * can be found in ip_set_type_list. Adding/deleting types is
  * serialized by ip_set_type_mutex.
  */
@@ -131,7 +131,8 @@ __find_set_type_get(const char *name, u8 family, u8 revision,
 		goto unlock;
 	}
 	/* Make sure the type is already loaded
-	 * but we don't support the revision */
+	 * but we don't support the revision
+	 */
 	list_for_each_entry_rcu(type, &ip_set_type_list, list)
 		if (STRNCMP(type->name, name)) {
 			err = -IPSET_ERR_FIND_TYPE;
@@ -290,7 +291,7 @@ static const struct nla_policy ipaddr_policy[IPSET_ATTR_IPADDR_MAX + 1] = {
 int
 ip_set_get_ipaddr4(struct nlattr *nla,  __be32 *ipaddr)
 {
-	struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1];
+	struct nlattr *tb[IPSET_ATTR_IPADDR_MAX + 1];
 
 	if (unlikely(!flag_nested(nla)))
 		return -IPSET_ERR_PROTOCOL;
@@ -307,7 +308,7 @@ EXPORT_SYMBOL_GPL(ip_set_get_ipaddr4);
 int
 ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
 {
-	struct nlattr *tb[IPSET_ATTR_IPADDR_MAX+1];
+	struct nlattr *tb[IPSET_ATTR_IPADDR_MAX + 1];
 
 	if (unlikely(!flag_nested(nla)))
 		return -IPSET_ERR_PROTOCOL;
@@ -318,7 +319,7 @@ ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr)
 		return -IPSET_ERR_PROTOCOL;
 
 	memcpy(ipaddr, nla_data(tb[IPSET_ATTR_IPADDR_IPV6]),
-		sizeof(struct in6_addr));
+	       sizeof(struct in6_addr));
 	return 0;
 }
 EXPORT_SYMBOL_GPL(ip_set_get_ipaddr6);
@@ -467,8 +468,7 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
 }
 EXPORT_SYMBOL_GPL(ip_set_put_extensions);
 
-/*
- * Creating/destroying/renaming/swapping affect the existence and
+/* Creating/destroying/renaming/swapping affect the existence and
  * the properties of a set. All of these can be executed from userspace
  * only and serialized by the nfnl mutex indirectly from nfnetlink.
  *
@@ -495,8 +495,7 @@ __ip_set_put(struct ip_set *set)
 	write_unlock_bh(&ip_set_ref_lock);
 }
 
-/*
- * Add, del and test set entries from kernel.
+/* Add, del and test set entries from kernel.
  *
  * The set behind the index must exist and must be referenced
  * so it can't be destroyed (or changed) under our foot.
@@ -524,7 +523,7 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
 			dev_net(par->in ? par->in : par->out), index);
 	int ret = 0;
 
-	BUG_ON(set == NULL);
+	BUG_ON(!set);
 	pr_debug("set %s, index %u\n", set->name, index);
 
 	if (opt->dim < set->type->dimension ||
@@ -563,7 +562,7 @@ ip_set_add(ip_set_id_t index, const struct sk_buff *skb,
 			dev_net(par->in ? par->in : par->out), index);
 	int ret;
 
-	BUG_ON(set == NULL);
+	BUG_ON(!set);
 	pr_debug("set %s, index %u\n", set->name, index);
 
 	if (opt->dim < set->type->dimension ||
@@ -586,7 +585,7 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
 			dev_net(par->in ? par->in : par->out), index);
 	int ret = 0;
 
-	BUG_ON(set == NULL);
+	BUG_ON(!set);
 	pr_debug("set %s, index %u\n", set->name, index);
 
 	if (opt->dim < set->type->dimension ||
@@ -601,8 +600,7 @@ ip_set_del(ip_set_id_t index, const struct sk_buff *skb,
 }
 EXPORT_SYMBOL_GPL(ip_set_del);
 
-/*
- * Find set by name, reference it once. The reference makes sure the
+/* Find set by name, reference it once. The reference makes sure the
  * thing pointed to, does not go away under our feet.
  *
  */
@@ -616,7 +614,7 @@ ip_set_get_byname(struct net *net, const char *name, struct ip_set **set)
 	rcu_read_lock();
 	for (i = 0; i < inst->ip_set_max; i++) {
 		s = rcu_dereference(inst->ip_set_list)[i];
-		if (s != NULL && STRNCMP(s->name, name)) {
+		if (s && STRNCMP(s->name, name)) {
 			__ip_set_get(s);
 			index = i;
 			*set = s;
@@ -629,8 +627,7 @@ ip_set_get_byname(struct net *net, const char *name, struct ip_set **set)
 }
 EXPORT_SYMBOL_GPL(ip_set_get_byname);
 
-/*
- * If the given set pointer points to a valid set, decrement
+/* If the given set pointer points to a valid set, decrement
  * reference count by 1. The caller shall not assume the index
  * to be valid, after calling this function.
  *
@@ -643,7 +640,7 @@ __ip_set_put_byindex(struct ip_set_net *inst, ip_set_id_t index)
 
 	rcu_read_lock();
 	set = rcu_dereference(inst->ip_set_list)[index];
-	if (set != NULL)
+	if (set)
 		__ip_set_put(set);
 	rcu_read_unlock();
 }
@@ -657,8 +654,7 @@ ip_set_put_byindex(struct net *net, ip_set_id_t index)
 }
 EXPORT_SYMBOL_GPL(ip_set_put_byindex);
 
-/*
- * Get the name of a set behind a set index.
+/* Get the name of a set behind a set index.
  * We assume the set is referenced, so it does exist and
  * can't be destroyed. The set cannot be renamed due to
  * the referencing either.
@@ -669,7 +665,7 @@ ip_set_name_byindex(struct net *net, ip_set_id_t index)
 {
 	const struct ip_set *set = ip_set_rcu_get(net, index);
 
-	BUG_ON(set == NULL);
+	BUG_ON(!set);
 	BUG_ON(set->ref == 0);
 
 	/* Referenced, so it's safe */
@@ -677,13 +673,11 @@ ip_set_name_byindex(struct net *net, ip_set_id_t index)
 }
 EXPORT_SYMBOL_GPL(ip_set_name_byindex);
 
-/*
- * Routines to call by external subsystems, which do not
+/* Routines to call by external subsystems, which do not
  * call nfnl_lock for us.
  */
 
-/*
- * Find set by index, reference it once. The reference makes sure the
+/* Find set by index, reference it once. The reference makes sure the
  * thing pointed to, does not go away under our feet.
  *
  * The nfnl mutex is used in the function.
@@ -709,8 +703,7 @@ ip_set_nfnl_get_byindex(struct net *net, ip_set_id_t index)
 }
 EXPORT_SYMBOL_GPL(ip_set_nfnl_get_byindex);
 
-/*
- * If the given set pointer points to a valid set, decrement
+/* If the given set pointer points to a valid set, decrement
  * reference count by 1. The caller shall not assume the index
  * to be valid, after calling this function.
  *
@@ -725,15 +718,14 @@ ip_set_nfnl_put(struct net *net, ip_set_id_t index)
 	nfnl_lock(NFNL_SUBSYS_IPSET);
 	if (!inst->is_deleted) { /* already deleted from ip_set_net_exit() */
 		set = ip_set(inst, index);
-		if (set != NULL)
+		if (set)
 			__ip_set_put(set);
 	}
 	nfnl_unlock(NFNL_SUBSYS_IPSET);
 }
 EXPORT_SYMBOL_GPL(ip_set_nfnl_put);
 
-/*
- * Communication protocol with userspace over netlink.
+/* Communication protocol with userspace over netlink.
  *
  * The commands are serialized by the nfnl mutex.
  */
@@ -760,7 +752,7 @@ start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags,
 
 	nlh = nlmsg_put(skb, portid, seq, cmd | (NFNL_SUBSYS_IPSET << 8),
 			sizeof(*nfmsg), flags);
-	if (nlh == NULL)
+	if (!nlh)
 		return NULL;
 
 	nfmsg = nlmsg_data(nlh);
@@ -793,7 +785,7 @@ find_set_and_id(struct ip_set_net *inst, const char *name, ip_set_id_t *id)
 	*id = IPSET_INVALID_ID;
 	for (i = 0; i < inst->ip_set_max; i++) {
 		set = ip_set(inst, i);
-		if (set != NULL && STRNCMP(set->name, name)) {
+		if (set && STRNCMP(set->name, name)) {
 			*id = i;
 			break;
 		}
@@ -819,7 +811,7 @@ find_free_id(struct ip_set_net *inst, const char *name, ip_set_id_t *index,
 	*index = IPSET_INVALID_ID;
 	for (i = 0;  i < inst->ip_set_max; i++) {
 		s = ip_set(inst, i);
-		if (s == NULL) {
+		if (!s) {
 			if (*index == IPSET_INVALID_ID)
 				*index = i;
 		} else if (STRNCMP(name, s->name)) {
@@ -851,18 +843,18 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 	struct ip_set_net *inst = ip_set_pernet(net);
 	struct ip_set *set, *clash = NULL;
 	ip_set_id_t index = IPSET_INVALID_ID;
-	struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {};
+	struct nlattr *tb[IPSET_ATTR_CREATE_MAX + 1] = {};
 	const char *name, *typename;
 	u8 family, revision;
 	u32 flags = flag_exist(nlh);
 	int ret = 0;
 
 	if (unlikely(protocol_failed(attr) ||
-		     attr[IPSET_ATTR_SETNAME] == NULL ||
-		     attr[IPSET_ATTR_TYPENAME] == NULL ||
-		     attr[IPSET_ATTR_REVISION] == NULL ||
-		     attr[IPSET_ATTR_FAMILY] == NULL ||
-		     (attr[IPSET_ATTR_DATA] != NULL &&
+		     !attr[IPSET_ATTR_SETNAME] ||
+		     !attr[IPSET_ATTR_TYPENAME] ||
+		     !attr[IPSET_ATTR_REVISION] ||
+		     !attr[IPSET_ATTR_FAMILY] ||
+		     (attr[IPSET_ATTR_DATA] &&
 		      !flag_nested(attr[IPSET_ATTR_DATA]))))
 		return -IPSET_ERR_PROTOCOL;
 
@@ -873,11 +865,10 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 	pr_debug("setname: %s, typename: %s, family: %s, revision: %u\n",
 		 name, typename, family_name(family), revision);
 
-	/*
-	 * First, and without any locks, allocate and initialize
+	/* First, and without any locks, allocate and initialize
 	 * a normal base set structure.
 	 */
-	set = kzalloc(sizeof(struct ip_set), GFP_KERNEL);
+	set = kzalloc(sizeof(*set), GFP_KERNEL);
 	if (!set)
 		return -ENOMEM;
 	spin_lock_init(&set->lock);
@@ -885,21 +876,18 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 	set->family = family;
 	set->revision = revision;
 
-	/*
-	 * Next, check that we know the type, and take
+	/* Next, check that we know the type, and take
 	 * a reference on the type, to make sure it stays available
 	 * while constructing our new set.
 	 *
 	 * After referencing the type, we try to create the type
 	 * specific part of the set without holding any locks.
 	 */
-	ret = find_set_type_get(typename, family, revision, &(set->type));
+	ret = find_set_type_get(typename, family, revision, &set->type);
 	if (ret)
 		goto out;
 
-	/*
-	 * Without holding any locks, create private part.
-	 */
+	/* Without holding any locks, create private part. */
 	if (attr[IPSET_ATTR_DATA] &&
 	    nla_parse_nested(tb, IPSET_ATTR_CREATE_MAX, attr[IPSET_ATTR_DATA],
 			     set->type->create_policy)) {
@@ -913,8 +901,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 
 	/* BTW, ret==0 here. */
 
-	/*
-	 * Here, we have a valid, constructed set and we are protected
+	/* Here, we have a valid, constructed set and we are protected
 	 * by the nfnl mutex. Find the first free index in ip_set_list
 	 * and check clashing.
 	 */
@@ -937,7 +924,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 			/* Wraparound */
 			goto cleanup;
 
-		list = kzalloc(sizeof(struct ip_set *) * i, GFP_KERNEL);
+		list = kcalloc(i, sizeof(struct ip_set *), GFP_KERNEL);
 		if (!list)
 			goto cleanup;
 		/* nfnl mutex is held, both lists are valid */
@@ -951,12 +938,11 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
 		inst->ip_set_max = i;
 		kfree(tmp);
 		ret = 0;
-	} else if (ret)
+	} else if (ret) {
 		goto cleanup;
+	}
 
-	/*
-	 * Finally! Add our shiny new set to the list, and be done.
-	 */
+	/* Finally! Add our shiny new set to the list, and be done. */
 	pr_debug("create: '%s' created with index %u!\n", set->name, index);
 	ip_set(inst, index) = set;
 
@@ -1018,7 +1004,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
 	if (!attr[IPSET_ATTR_SETNAME]) {
 		for (i = 0; i < inst->ip_set_max; i++) {
 			s = ip_set(inst, i);
-			if (s != NULL && s->ref) {
+			if (s && s->ref) {
 				ret = -IPSET_ERR_BUSY;
 				goto out;
 			}
@@ -1037,7 +1023,7 @@ ip_set_destroy(struct sock *ctnl, struct sk_buff *skb,
 	} else {
 		s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
 				    &i);
-		if (s == NULL) {
+		if (!s) {
 			ret = -ENOENT;
 			goto out;
 		} else if (s->ref) {
@@ -1082,12 +1068,12 @@ ip_set_flush(struct sock *ctnl, struct sk_buff *skb,
 	if (!attr[IPSET_ATTR_SETNAME]) {
 		for (i = 0; i < inst->ip_set_max; i++) {
 			s = ip_set(inst, i);
-			if (s != NULL)
+			if (s)
 				ip_set_flush_set(s);
 		}
 	} else {
 		s = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
-		if (s == NULL)
+		if (!s)
 			return -ENOENT;
 
 		ip_set_flush_set(s);
@@ -1119,12 +1105,12 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
 	int ret = 0;
 
 	if (unlikely(protocol_failed(attr) ||
-		     attr[IPSET_ATTR_SETNAME] == NULL ||
-		     attr[IPSET_ATTR_SETNAME2] == NULL))
+		     !attr[IPSET_ATTR_SETNAME] ||
+		     !attr[IPSET_ATTR_SETNAME2]))
 		return -IPSET_ERR_PROTOCOL;
 
 	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
-	if (set == NULL)
+	if (!set)
 		return -ENOENT;
 
 	read_lock_bh(&ip_set_ref_lock);
@@ -1136,7 +1122,7 @@ ip_set_rename(struct sock *ctnl, struct sk_buff *skb,
 	name2 = nla_data(attr[IPSET_ATTR_SETNAME2]);
 	for (i = 0; i < inst->ip_set_max; i++) {
 		s = ip_set(inst, i);
-		if (s != NULL && STRNCMP(s->name, name2)) {
+		if (s && STRNCMP(s->name, name2)) {
 			ret = -IPSET_ERR_EXIST_SETNAME2;
 			goto out;
 		}
@@ -1168,23 +1154,24 @@ ip_set_swap(struct sock *ctnl, struct sk_buff *skb,
 	char from_name[IPSET_MAXNAMELEN];
 
 	if (unlikely(protocol_failed(attr) ||
-		     attr[IPSET_ATTR_SETNAME] == NULL ||
-		     attr[IPSET_ATTR_SETNAME2] == NULL))
+		     !attr[IPSET_ATTR_SETNAME] ||
+		     !attr[IPSET_ATTR_SETNAME2]))
 		return -IPSET_ERR_PROTOCOL;
 
 	from = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
 			       &from_id);
-	if (from == NULL)
+	if (!from)
 		return -ENOENT;
 
 	to = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME2]),
 			     &to_id);
-	if (to == NULL)
+	if (!to)
 		return -IPSET_ERR_EXIST_SETNAME2;
 
 	/* Features must not change.
-	 * Not an artificial restriction anymore, as we must prevent
-	 * possible loops created by swapping in setlist type of sets. */
+	 * Not an artifical restriction anymore, as we must prevent
+	 * possible loops created by swapping in setlist type of sets.
+	 */
 	if (!(from->type->features == to->type->features &&
 	      from->family == to->family))
 		return -IPSET_ERR_TYPE_MISMATCH;
@@ -1246,7 +1233,7 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst)
 {
 	struct nlmsghdr *nlh = nlmsg_hdr(cb->skb);
 	int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
-	struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
+	struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1];
 	struct nlattr *attr = (void *)nlh + min_len;
 	u32 dump_type;
 	ip_set_id_t index;
@@ -1260,16 +1247,18 @@ dump_init(struct netlink_callback *cb, struct ip_set_net *inst)
 
 		set = find_set_and_id(inst, nla_data(cda[IPSET_ATTR_SETNAME]),
 				      &index);
-		if (set == NULL)
+		if (!set)
 			return -ENOENT;
 
 		dump_type = DUMP_ONE;
 		cb->args[IPSET_CB_INDEX] = index;
-	} else
+	} else {
 		dump_type = DUMP_ALL;
+	}
 
 	if (cda[IPSET_ATTR_FLAGS]) {
 		u32 f = ip_set_get_h32(cda[IPSET_ATTR_FLAGS]);
+
 		dump_type |= (f << 16);
 	}
 	cb->args[IPSET_CB_NET] = (unsigned long)inst;
@@ -1295,7 +1284,8 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb)
 		if (ret < 0) {
 			nlh = nlmsg_hdr(cb->skb);
 			/* We have to create and send the error message
-			 * manually :-( */
+			 * manually :-(
+			 */
 			if (nlh->nlmsg_flags & NLM_F_ACK)
 				netlink_ack(cb->skb, nlh, ret);
 			return ret;
@@ -1313,7 +1303,7 @@ dump_last:
 	pr_debug("dump type, flag: %u %u index: %ld\n",
 		 dump_type, dump_flags, cb->args[IPSET_CB_INDEX]);
 	for (; cb->args[IPSET_CB_INDEX] < max; cb->args[IPSET_CB_INDEX]++) {
-		index = (ip_set_id_t) cb->args[IPSET_CB_INDEX];
+		index = (ip_set_id_t)cb->args[IPSET_CB_INDEX];
 		write_lock_bh(&ip_set_ref_lock);
 		set = ip_set(inst, index);
 		is_destroyed = inst->is_destroyed;
@@ -1480,12 +1470,12 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
 		size_t payload = min(SIZE_MAX,
 				     sizeof(*errmsg) + nlmsg_len(nlh));
 		int min_len = nlmsg_total_size(sizeof(struct nfgenmsg));
-		struct nlattr *cda[IPSET_ATTR_CMD_MAX+1];
+		struct nlattr *cda[IPSET_ATTR_CMD_MAX + 1];
 		struct nlattr *cmdattr;
 		u32 *errline;
 
 		skb2 = nlmsg_new(payload, GFP_KERNEL);
-		if (skb2 == NULL)
+		if (!skb2)
 			return -ENOMEM;
 		rep = __nlmsg_put(skb2, NETLINK_CB(skb).portid,
 				  nlh->nlmsg_seq, NLMSG_ERROR, payload, 0);
@@ -1502,7 +1492,8 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set,
 
 		*errline = lineno;
 
-		netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT);
+		netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid,
+				MSG_DONTWAIT);
 		/* Signal netlink not to send its ACK/errmsg.  */
 		return -EINTR;
 	}
@@ -1517,25 +1508,25 @@ ip_set_uadd(struct sock *ctnl, struct sk_buff *skb,
 {
 	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
 	struct ip_set *set;
-	struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
+	struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
 	const struct nlattr *nla;
 	u32 flags = flag_exist(nlh);
 	bool use_lineno;
 	int ret = 0;
 
 	if (unlikely(protocol_failed(attr) ||
-		     attr[IPSET_ATTR_SETNAME] == NULL ||
+		     !attr[IPSET_ATTR_SETNAME] ||
 		     !((attr[IPSET_ATTR_DATA] != NULL) ^
 		       (attr[IPSET_ATTR_ADT] != NULL)) ||
-		     (attr[IPSET_ATTR_DATA] != NULL &&
+		     (attr[IPSET_ATTR_DATA] &&
 		      !flag_nested(attr[IPSET_ATTR_DATA])) ||
-		     (attr[IPSET_ATTR_ADT] != NULL &&
+		     (attr[IPSET_ATTR_ADT] &&
 		      (!flag_nested(attr[IPSET_ATTR_ADT]) ||
-		       attr[IPSET_ATTR_LINENO] == NULL))))
+		       !attr[IPSET_ATTR_LINENO]))))
 		return -IPSET_ERR_PROTOCOL;
 
 	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
-	if (set == NULL)
+	if (!set)
 		return -ENOENT;
 
 	use_lineno = !!attr[IPSET_ATTR_LINENO];
@@ -1572,25 +1563,25 @@ ip_set_udel(struct sock *ctnl, struct sk_buff *skb,
 {
 	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
 	struct ip_set *set;
-	struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
+	struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
 	const struct nlattr *nla;
 	u32 flags = flag_exist(nlh);
 	bool use_lineno;
 	int ret = 0;
 
 	if (unlikely(protocol_failed(attr) ||
-		     attr[IPSET_ATTR_SETNAME] == NULL ||
+		     !attr[IPSET_ATTR_SETNAME] ||
 		     !((attr[IPSET_ATTR_DATA] != NULL) ^
 		       (attr[IPSET_ATTR_ADT] != NULL)) ||
-		     (attr[IPSET_ATTR_DATA] != NULL &&
+		     (attr[IPSET_ATTR_DATA] &&
 		      !flag_nested(attr[IPSET_ATTR_DATA])) ||
-		     (attr[IPSET_ATTR_ADT] != NULL &&
+		     (attr[IPSET_ATTR_ADT] &&
 		      (!flag_nested(attr[IPSET_ATTR_ADT]) ||
-		       attr[IPSET_ATTR_LINENO] == NULL))))
+		       !attr[IPSET_ATTR_LINENO]))))
 		return -IPSET_ERR_PROTOCOL;
 
 	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
-	if (set == NULL)
+	if (!set)
 		return -ENOENT;
 
 	use_lineno = !!attr[IPSET_ATTR_LINENO];
@@ -1627,17 +1618,17 @@ ip_set_utest(struct sock *ctnl, struct sk_buff *skb,
 {
 	struct ip_set_net *inst = ip_set_pernet(sock_net(ctnl));
 	struct ip_set *set;
-	struct nlattr *tb[IPSET_ATTR_ADT_MAX+1] = {};
+	struct nlattr *tb[IPSET_ATTR_ADT_MAX + 1] = {};
 	int ret = 0;
 
 	if (unlikely(protocol_failed(attr) ||
-		     attr[IPSET_ATTR_SETNAME] == NULL ||
-		     attr[IPSET_ATTR_DATA] == NULL ||
+		     !attr[IPSET_ATTR_SETNAME] ||
+		     !attr[IPSET_ATTR_DATA] ||
 		     !flag_nested(attr[IPSET_ATTR_DATA])))
 		return -IPSET_ERR_PROTOCOL;
 
 	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
-	if (set == NULL)
+	if (!set)
 		return -ENOENT;
 
 	if (nla_parse_nested(tb, IPSET_ATTR_ADT_MAX, attr[IPSET_ATTR_DATA],
@@ -1668,15 +1659,15 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb,
 	int ret = 0;
 
 	if (unlikely(protocol_failed(attr) ||
-		     attr[IPSET_ATTR_SETNAME] == NULL))
+		     !attr[IPSET_ATTR_SETNAME]))
 		return -IPSET_ERR_PROTOCOL;
 
 	set = find_set(inst, nla_data(attr[IPSET_ATTR_SETNAME]));
-	if (set == NULL)
+	if (!set)
 		return -ENOENT;
 
 	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (skb2 == NULL)
+	if (!skb2)
 		return -ENOMEM;
 
 	nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
@@ -1725,8 +1716,8 @@ ip_set_type(struct sock *ctnl, struct sk_buff *skb,
 	int ret = 0;
 
 	if (unlikely(protocol_failed(attr) ||
-		     attr[IPSET_ATTR_TYPENAME] == NULL ||
-		     attr[IPSET_ATTR_FAMILY] == NULL))
+		     !attr[IPSET_ATTR_TYPENAME] ||
+		     !attr[IPSET_ATTR_FAMILY]))
 		return -IPSET_ERR_PROTOCOL;
 
 	family = nla_get_u8(attr[IPSET_ATTR_FAMILY]);
@@ -1736,7 +1727,7 @@ ip_set_type(struct sock *ctnl, struct sk_buff *skb,
 		return ret;
 
 	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (skb2 == NULL)
+	if (!skb2)
 		return -ENOMEM;
 
 	nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
@@ -1781,11 +1772,11 @@ ip_set_protocol(struct sock *ctnl, struct sk_buff *skb,
 	struct nlmsghdr *nlh2;
 	int ret = 0;
 
-	if (unlikely(attr[IPSET_ATTR_PROTOCOL] == NULL))
+	if (unlikely(!attr[IPSET_ATTR_PROTOCOL]))
 		return -IPSET_ERR_PROTOCOL;
 
 	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (skb2 == NULL)
+	if (!skb2)
 		return -ENOMEM;
 
 	nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
@@ -1913,7 +1904,7 @@ ip_set_sockfn_get(struct sock *sk, int optval, void __user *user, int *len)
 		ret = -EFAULT;
 		goto done;
 	}
-	op = (unsigned int *) data;
+	op = (unsigned int *)data;
 
 	if (*op < IP_SET_OP_VERSION) {
 		/* Check the version at the beginning of operations */
@@ -2025,7 +2016,7 @@ ip_set_net_init(struct net *net)
 	if (inst->ip_set_max >= IPSET_INVALID_ID)
 		inst->ip_set_max = IPSET_INVALID_ID - 1;
 
-	list = kzalloc(sizeof(struct ip_set *) * inst->ip_set_max, GFP_KERNEL);
+	list = kcalloc(inst->ip_set_max, sizeof(struct ip_set *), GFP_KERNEL);
 	if (!list)
 		return -ENOMEM;
 	inst->is_deleted = false;
@@ -2061,11 +2052,11 @@ static struct pernet_operations ip_set_net_ops = {
 	.size	= sizeof(struct ip_set_net)
 };
 
-
 static int __init
 ip_set_init(void)
 {
 	int ret = nfnetlink_subsys_register(&ip_set_netlink_subsys);
+
 	if (ret != 0) {
 		pr_err("ip_set: cannot register with nfnetlink.\n");
 		return ret;
diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c
index 1981f021cc60..42c3e3ba1b94 100644
--- a/net/netfilter/ipset/ip_set_getport.c
+++ b/net/netfilter/ipset/ip_set_getport.c
@@ -30,7 +30,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
 		const struct tcphdr *th;
 
 		th = skb_header_pointer(skb, protooff, sizeof(_tcph), &_tcph);
-		if (th == NULL)
+		if (!th)
 			/* No choice either */
 			return false;
 
@@ -42,7 +42,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
 		const sctp_sctphdr_t *sh;
 
 		sh = skb_header_pointer(skb, protooff, sizeof(_sh), &_sh);
-		if (sh == NULL)
+		if (!sh)
 			/* No choice either */
 			return false;
 
@@ -55,7 +55,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
 		const struct udphdr *uh;
 
 		uh = skb_header_pointer(skb, protooff, sizeof(_udph), &_udph);
-		if (uh == NULL)
+		if (!uh)
 			/* No choice either */
 			return false;
 
@@ -67,7 +67,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
 		const struct icmphdr *ic;
 
 		ic = skb_header_pointer(skb, protooff, sizeof(_ich), &_ich);
-		if (ic == NULL)
+		if (!ic)
 			return false;
 
 		*port = (__force __be16)htons((ic->type << 8) | ic->code);
@@ -78,7 +78,7 @@ get_port(const struct sk_buff *skb, int protocol, unsigned int protooff,
 		const struct icmp6hdr *ic;
 
 		ic = skb_header_pointer(skb, protooff, sizeof(_ich), &_ich);
-		if (ic == NULL)
+		if (!ic)
 			return false;
 
 		*port = (__force __be16)
@@ -116,7 +116,8 @@ ip_set_get_ip4_port(const struct sk_buff *skb, bool src,
 			return false;
 		default:
 			/* Other protocols doesn't have ports,
-			   so we can match fragments */
+			 * so we can match fragments.
+			 */
 			*proto = protocol;
 			return true;
 		}
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index f352cc022010..afe905c208af 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -35,7 +35,7 @@
 /* Number of elements to store in an initial array block */
 #define AHASH_INIT_SIZE			4
 /* Max number of elements to store in an array block */
-#define AHASH_MAX_SIZE			(3*AHASH_INIT_SIZE)
+#define AHASH_MAX_SIZE			(3 * AHASH_INIT_SIZE)
 /* Max muber of elements in the array block when tuned */
 #define AHASH_MAX_TUNED			64
 
@@ -57,6 +57,7 @@ tune_ahash_max(u8 curr, u32 multi)
 	 */
 	return n > curr && n <= AHASH_MAX_TUNED ? n : curr;
 }
+
 #define TUNE_AHASH_MAX(h, multi)	\
 	((h)->ahash_max = tune_ahash_max((h)->ahash_max, multi))
 #else
@@ -256,7 +257,7 @@ htable_bits(u32 hashsize)
 #endif
 
 #define HKEY(data, initval, htable_bits)			\
-(jhash2((u32 *)(data), HKEY_DATALEN/sizeof(u32), initval)	\
+(jhash2((u32 *)(data), HKEY_DATALEN / sizeof(u32), initval)	\
 	& jhash_mask(htable_bits))
 
 #ifndef htype
@@ -299,11 +300,11 @@ mtype_add_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
 
 	/* Add in increasing prefix order, so larger cidr first */
 	for (i = 0, j = -1; i < nets_length && h->nets[i].cidr[n]; i++) {
-		if (j != -1)
+		if (j != -1) {
 			continue;
-		else if (h->nets[i].cidr[n] < cidr)
+		} else if (h->nets[i].cidr[n] < cidr) {
 			j = i;
-		else if (h->nets[i].cidr[n] == cidr) {
+		} else if (h->nets[i].cidr[n] == cidr) {
 			h->nets[cidr - 1].nets[n]++;
 			return;
 		}
@@ -322,15 +323,15 @@ mtype_del_cidr(struct htype *h, u8 cidr, u8 nets_length, u8 n)
 	u8 i, j, net_end = nets_length - 1;
 
 	for (i = 0; i < nets_length; i++) {
-	        if (h->nets[i].cidr[n] != cidr)
-	                continue;
+		if (h->nets[i].cidr[n] != cidr)
+			continue;
 		h->nets[cidr - 1].nets[n]--;
 		if (h->nets[cidr - 1].nets[n] > 0)
-                        return;
+			return;
 		for (j = i; j < net_end && h->nets[j].cidr[n]; j++)
-		        h->nets[j].cidr[n] = h->nets[j + 1].cidr[n];
+			h->nets[j].cidr[n] = h->nets[j + 1].cidr[n];
 		h->nets[j].cidr[n] = 0;
-                return;
+		return;
 	}
 }
 #endif
@@ -426,8 +427,8 @@ mtype_destroy(struct ip_set *set)
 	if (SET_WITH_TIMEOUT(set))
 		del_timer_sync(&h->gc);
 
-	mtype_ahash_destroy(set, __ipset_dereference_protected(h->table, 1),
-			    true);
+	mtype_ahash_destroy(set,
+			    __ipset_dereference_protected(h->table, 1), true);
 	kfree(h);
 
 	set->data = NULL;
@@ -439,7 +440,7 @@ mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
 	struct htype *h = set->data;
 
 	init_timer(&h->gc);
-	h->gc.data = (unsigned long) set;
+	h->gc.data = (unsigned long)set;
 	h->gc.function = gc;
 	h->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
 	add_timer(&h->gc);
@@ -530,7 +531,7 @@ mtype_expire(struct ip_set *set, struct htype *h, u8 nets_length, size_t dsize)
 static void
 mtype_gc(unsigned long ul_set)
 {
-	struct ip_set *set = (struct ip_set *) ul_set;
+	struct ip_set *set = (struct ip_set *)ul_set;
 	struct htype *h = set->data;
 
 	pr_debug("called\n");
@@ -544,7 +545,8 @@ mtype_gc(unsigned long ul_set)
 
 /* Resize a hash: create a new hash table with doubling the hashsize
  * and inserting the elements to it. Repeat until we succeed or
- * fail due to memory pressures. */
+ * fail due to memory pressures.
+ */
 static int
 mtype_resize(struct ip_set *set, bool retried)
 {
@@ -687,7 +689,8 @@ cleanup:
 }
 
 /* Add an element to a hash and update the internal counters when succeeded,
- * otherwise report the proper error code. */
+ * otherwise report the proper error code.
+ */
 static int
 mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 	  struct ip_set_ext *mext, u32 flags)
@@ -926,7 +929,8 @@ mtype_data_match(struct mtype_elem *data, const struct ip_set_ext *ext,
 
 #ifdef IP_SET_HASH_WITH_NETS
 /* Special test function which takes into account the different network
- * sizes added to the set */
+ * sizes added to the set
+ */
 static int
 mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
 		 const struct ip_set_ext *ext,
@@ -1004,7 +1008,8 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 	t = rcu_dereference_bh(h->table);
 #ifdef IP_SET_HASH_WITH_NETS
 	/* If we test an IP address and not a network address,
-	 * try all possible network sizes */
+	 * try all possible network sizes
+	 */
 	for (i = 0; i < IPSET_NET_COUNT; i++)
 		if (DCIDR_GET(d->cidr, i) != SET_HOST_MASK(set->family))
 			break;
@@ -1148,8 +1153,8 @@ mtype_list(const struct ip_set *set,
 					nla_nest_cancel(skb, atd);
 					ret = -EMSGSIZE;
 					goto out;
-				} else
-					goto nla_put_failure;
+				}
+				goto nla_put_failure;
 			}
 			if (mtype_data_list(skb, e))
 				goto nla_put_failure;
@@ -1171,8 +1176,9 @@ nla_put_failure:
 			set->name);
 		cb->args[IPSET_CB_ARG0] = 0;
 		ret = -EMSGSIZE;
-	} else
+	} else {
 		ipset_nest_end(skb, atd);
+	}
 out:
 	rcu_read_unlock();
 	return ret;
@@ -1180,12 +1186,13 @@ out:
 
 static int
 IPSET_TOKEN(MTYPE, _kadt)(struct ip_set *set, const struct sk_buff *skb,
-	    const struct xt_action_param *par,
-	    enum ipset_adt adt, struct ip_set_adt_opt *opt);
+			  const struct xt_action_param *par,
+			  enum ipset_adt adt, struct ip_set_adt_opt *opt);
 
 static int
 IPSET_TOKEN(MTYPE, _uadt)(struct ip_set *set, struct nlattr *tb[],
-	    enum ipset_adt adt, u32 *lineno, u32 flags, bool retried);
+			  enum ipset_adt adt, u32 *lineno, u32 flags,
+			  bool retried);
 
 static const struct ip_set_type_variant mtype_variant = {
 	.kadt	= mtype_kadt,
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index f54d7069d633..9d6bf19f7b78 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -158,8 +158,8 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (ret && !ip_set_eexist(ret, flags))
 			return ret;
-		else
-			ret = 0;
+
+		ret = 0;
 	}
 	return ret;
 }
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index f8fbc325ad34..a0695a2ab585 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -155,8 +155,8 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (ret && !ip_set_eexist(ret, flags))
 			return ret;
-		else
-			ret = 0;
+
+		ret = 0;
 	}
 	return ret;
 }
@@ -206,7 +206,6 @@ hash_ipmark6_data_next(struct hash_ipmark4_elem *next,
 #define IP_SET_EMIT_CREATE
 #include "ip_set_hash_gen.h"
 
-
 static int
 hash_ipmark6_kadt(struct ip_set *set, const struct sk_buff *skb,
 		  const struct xt_action_param *par,
@@ -268,10 +267,8 @@ hash_ipmark6_uadt(struct ip_set *set, struct nlattr *tb[],
 	ret = adtfn(set, &e, &ext, &ext, flags);
 	if (ret && !ip_set_eexist(ret, flags))
 		return ret;
-	else
-		ret = 0;
 
-	return ret;
+	return 0;
 }
 
 static struct ip_set_type hash_ipmark_type __read_mostly = {
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 9a31db8ccca6..9d84b3dff603 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -140,8 +140,9 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (e.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
-	} else
+	} else {
 		return -IPSET_ERR_MISSING_PROTO;
+	}
 
 	if (!(with_ports || e.proto == IPPROTO_ICMP))
 		e.port = 0;
@@ -187,8 +188,8 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 			if (ret && !ip_set_eexist(ret, flags))
 				return ret;
-			else
-				ret = 0;
+
+			ret = 0;
 		}
 	}
 	return ret;
@@ -305,8 +306,9 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (e.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
-	} else
+	} else {
 		return -IPSET_ERR_MISSING_PROTO;
+	}
 
 	if (!(with_ports || e.proto == IPPROTO_ICMPV6))
 		e.port = 0;
@@ -329,8 +331,8 @@ hash_ipport6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (ret && !ip_set_eexist(ret, flags))
 			return ret;
-		else
-			ret = 0;
+
+		ret = 0;
 	}
 	return ret;
 }
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index fc42489f8795..215b7b942038 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -63,7 +63,7 @@ hash_ipportip4_data_equal(const struct hash_ipportip4_elem *ip1,
 
 static bool
 hash_ipportip4_data_list(struct sk_buff *skb,
-		       const struct hash_ipportip4_elem *data)
+			 const struct hash_ipportip4_elem *data)
 {
 	if (nla_put_ipaddr4(skb, IPSET_ATTR_IP, data->ip) ||
 	    nla_put_ipaddr4(skb, IPSET_ATTR_IP2, data->ip2) ||
@@ -147,8 +147,9 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (e.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
-	} else
+	} else {
 		return -IPSET_ERR_MISSING_PROTO;
+	}
 
 	if (!(with_ports || e.proto == IPPROTO_ICMP))
 		e.port = 0;
@@ -194,8 +195,8 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 			if (ret && !ip_set_eexist(ret, flags))
 				return ret;
-			else
-				ret = 0;
+
+			ret = 0;
 		}
 	}
 	return ret;
@@ -320,8 +321,9 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (e.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
-	} else
+	} else {
 		return -IPSET_ERR_MISSING_PROTO;
+	}
 
 	if (!(with_ports || e.proto == IPPROTO_ICMPV6))
 		e.port = 0;
@@ -344,8 +346,8 @@ hash_ipportip6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (ret && !ip_set_eexist(ret, flags))
 			return ret;
-		else
-			ret = 0;
+
+		ret = 0;
 	}
 	return ret;
 }
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 2a69b9bf66b8..9ca719625ea3 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -209,14 +209,16 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (e.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
-	} else
+	} else {
 		return -IPSET_ERR_MISSING_PROTO;
+	}
 
 	if (!(with_ports || e.proto == IPPROTO_ICMP))
 		e.port = 0;
 
 	if (tb[IPSET_ATTR_CADT_FLAGS]) {
 		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
 		if (cadt_flags & IPSET_FLAG_NOMATCH)
 			flags |= (IPSET_FLAG_NOMATCH << 16);
 	}
@@ -263,8 +265,9 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 			swap(ip2_from, ip2_to);
 		if (ip2_from + UINT_MAX == ip2_to)
 			return -IPSET_ERR_HASH_RANGE;
-	} else
+	} else {
 		ip_set_mask_from_to(ip2_from, ip2_to, e.cidr + 1);
+	}
 
 	if (retried)
 		ip = ntohl(h->next.ip);
@@ -287,8 +290,8 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 				if (ret && !ip_set_eexist(ret, flags))
 					return ret;
-				else
-					ret = 0;
+
+				ret = 0;
 				ip2 = ip2_last + 1;
 			}
 		}
@@ -466,14 +469,16 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (e.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
-	} else
+	} else {
 		return -IPSET_ERR_MISSING_PROTO;
+	}
 
 	if (!(with_ports || e.proto == IPPROTO_ICMPV6))
 		e.port = 0;
 
 	if (tb[IPSET_ATTR_CADT_FLAGS]) {
 		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
 		if (cadt_flags & IPSET_FLAG_NOMATCH)
 			flags |= (IPSET_FLAG_NOMATCH << 16);
 	}
@@ -497,8 +502,8 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (ret && !ip_set_eexist(ret, flags))
 			return ret;
-		else
-			ret = 0;
+
+		ret = 0;
 	}
 	return ret;
 }
diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c
index 112aff3cda96..f1e7d2c0f685 100644
--- a/net/netfilter/ipset/ip_set_hash_mac.c
+++ b/net/netfilter/ipset/ip_set_hash_mac.c
@@ -89,10 +89,10 @@ hash_mac4_kadt(struct ip_set *set, const struct sk_buff *skb,
 		return 0;
 
 	if (skb_mac_header(skb) < skb->head ||
-	     (skb_mac_header(skb) + ETH_HLEN) > skb->data)
+	    (skb_mac_header(skb) + ETH_HLEN) > skb->data)
 		return -EINVAL;
 
-	memcpy(e.ether, eth_hdr(skb)->h_source, ETH_ALEN);
+	ether_addr_copy(e.ether, eth_hdr(skb)->h_source);
 	if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0)
 		return -EINVAL;
 	return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
@@ -116,7 +116,7 @@ hash_mac4_uadt(struct ip_set *set, struct nlattr *tb[],
 	ret = ip_set_get_extensions(set, tb, &ext);
 	if (ret)
 		return ret;
-	memcpy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]), ETH_ALEN);
+	ether_addr_copy(e.ether, nla_data(tb[IPSET_ATTR_ETHER]));
 	if (memcmp(e.ether, invalid_ether, ETH_ALEN) == 0)
 		return -IPSET_ERR_HASH_ELEM;
 
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index e49b1d010d30..3e4bffdc1cc0 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -169,6 +169,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_CADT_FLAGS]) {
 		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
 		if (cadt_flags & IPSET_FLAG_NOMATCH)
 			flags |= (IPSET_FLAG_NOMATCH << 16);
 	}
@@ -176,7 +177,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
 	if (adt == IPSET_TEST || !tb[IPSET_ATTR_IP_TO]) {
 		e.ip = htonl(ip & ip_set_hostmask(e.cidr));
 		ret = adtfn(set, &e, &ext, &ext, flags);
-		return ip_set_enomatch(ret, flags, adt, set) ? -ret:
+		return ip_set_enomatch(ret, flags, adt, set) ? -ret :
 		       ip_set_eexist(ret, flags) ? 0 : ret;
 	}
 
@@ -198,8 +199,8 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
 		ret = adtfn(set, &e, &ext, &ext, flags);
 		if (ret && !ip_set_eexist(ret, flags))
 			return ret;
-		else
-			ret = 0;
+
+		ret = 0;
 		ip = last + 1;
 	}
 	return ret;
@@ -339,6 +340,7 @@ hash_net6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_CADT_FLAGS]) {
 		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
 		if (cadt_flags & IPSET_FLAG_NOMATCH)
 			flags |= (IPSET_FLAG_NOMATCH << 16);
 	}
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 42c893e08842..43d8c9896fa3 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -143,7 +143,7 @@ static const char *get_physindev_name(const struct sk_buff *skb)
 	return dev ? dev->name : NULL;
 }
 
-static const char *get_phyoutdev_name(const struct sk_buff *skb)
+static const char *get_physoutdev_name(const struct sk_buff *skb)
 {
 	struct net_device *dev = nf_bridge_get_physoutdev(skb);
 
@@ -178,15 +178,16 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
 	if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 		const char *eiface = SRCDIR ? get_physindev_name(skb) :
-					      get_phyoutdev_name(skb);
+					      get_physoutdev_name(skb);
 
 		if (!eiface)
 			return -EINVAL;
 		STRLCPY(e.iface, eiface);
 		e.physdev = 1;
 #endif
-	} else
+	} else {
 		STRLCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out));
+	}
 
 	if (strlen(e.iface) == 0)
 		return -EINVAL;
@@ -229,6 +230,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_CADT_FLAGS]) {
 		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
 		if (cadt_flags & IPSET_FLAG_PHYSDEV)
 			e.physdev = 1;
 		if (cadt_flags & IPSET_FLAG_NOMATCH)
@@ -249,8 +251,9 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
 			swap(ip, ip_to);
 		if (ip + UINT_MAX == ip_to)
 			return -IPSET_ERR_HASH_RANGE;
-	} else
+	} else {
 		ip_set_mask_from_to(ip, ip_to, e.cidr);
+	}
 
 	if (retried)
 		ip = ntohl(h->next.ip);
@@ -261,8 +264,8 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (ret && !ip_set_eexist(ret, flags))
 			return ret;
-		else
-			ret = 0;
+
+		ret = 0;
 		ip = last + 1;
 	}
 	return ret;
@@ -385,15 +388,16 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
 	if (opt->cmdflags & IPSET_FLAG_PHYSDEV) {
 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 		const char *eiface = SRCDIR ? get_physindev_name(skb) :
-					      get_phyoutdev_name(skb);
+					      get_physoutdev_name(skb);
+
 		if (!eiface)
 			return -EINVAL;
-
 		STRLCPY(e.iface, eiface);
 		e.physdev = 1;
 #endif
-	} else
+	} else {
 		STRLCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out));
+	}
 
 	if (strlen(e.iface) == 0)
 		return -EINVAL;
@@ -403,7 +407,7 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
 
 static int
 hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
-		   enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+		    enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_netiface6_elem e = { .cidr = HOST_MASK, .elem = 1 };
@@ -440,6 +444,7 @@ hash_netiface6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_CADT_FLAGS]) {
 		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
 		if (cadt_flags & IPSET_FLAG_PHYSDEV)
 			e.physdev = 1;
 		if (cadt_flags & IPSET_FLAG_NOMATCH)
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index b5428be1f159..3c862c0a76d1 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -57,8 +57,8 @@ struct hash_netnet4_elem {
 
 static inline bool
 hash_netnet4_data_equal(const struct hash_netnet4_elem *ip1,
-		     const struct hash_netnet4_elem *ip2,
-		     u32 *multi)
+			const struct hash_netnet4_elem *ip2,
+			u32 *multi)
 {
 	return ip1->ipcmp == ip2->ipcmp &&
 	       ip1->ccmp == ip2->ccmp;
@@ -84,7 +84,7 @@ hash_netnet4_data_reset_flags(struct hash_netnet4_elem *elem, u8 *flags)
 
 static inline void
 hash_netnet4_data_reset_elem(struct hash_netnet4_elem *elem,
-			  struct hash_netnet4_elem *orig)
+			     struct hash_netnet4_elem *orig)
 {
 	elem->ip[1] = orig->ip[1];
 }
@@ -103,7 +103,7 @@ hash_netnet4_data_netmask(struct hash_netnet4_elem *elem, u8 cidr, bool inner)
 
 static bool
 hash_netnet4_data_list(struct sk_buff *skb,
-		    const struct hash_netnet4_elem *data)
+		       const struct hash_netnet4_elem *data)
 {
 	u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
 
@@ -122,7 +122,7 @@ nla_put_failure:
 
 static inline void
 hash_netnet4_data_next(struct hash_netnet4_elem *next,
-		    const struct hash_netnet4_elem *d)
+		       const struct hash_netnet4_elem *d)
 {
 	next->ipcmp = d->ipcmp;
 }
@@ -133,8 +133,8 @@ hash_netnet4_data_next(struct hash_netnet4_elem *next,
 
 static int
 hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
-	       const struct xt_action_param *par,
-	       enum ipset_adt adt, struct ip_set_adt_opt *opt)
+		  const struct xt_action_param *par,
+		  enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
 	const struct hash_netnet *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
@@ -156,7 +156,7 @@ hash_netnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
 
 static int
 hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
-	       enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+		  enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
 	const struct hash_netnet *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
@@ -199,6 +199,7 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_CADT_FLAGS]) {
 		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
 		if (cadt_flags & IPSET_FLAG_NOMATCH)
 			flags |= (IPSET_FLAG_NOMATCH << 16);
 	}
@@ -221,8 +222,9 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 			swap(ip, ip_to);
 		if (unlikely(ip + UINT_MAX == ip_to))
 			return -IPSET_ERR_HASH_RANGE;
-	} else
+	} else {
 		ip_set_mask_from_to(ip, ip_to, e.cidr[0]);
+	}
 
 	ip2_to = ip2_from;
 	if (tb[IPSET_ATTR_IP2_TO]) {
@@ -233,8 +235,9 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 			swap(ip2_from, ip2_to);
 		if (unlikely(ip2_from + UINT_MAX == ip2_to))
 			return -IPSET_ERR_HASH_RANGE;
-	} else
+	} else {
 		ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
+	}
 
 	if (retried)
 		ip = ntohl(h->next.ip[0]);
@@ -251,8 +254,8 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 			ret = adtfn(set, &e, &ext, &ext, flags);
 			if (ret && !ip_set_eexist(ret, flags))
 				return ret;
-			else
-				ret = 0;
+
+			ret = 0;
 			ip2 = last2 + 1;
 		}
 		ip = last + 1;
@@ -276,8 +279,8 @@ struct hash_netnet6_elem {
 
 static inline bool
 hash_netnet6_data_equal(const struct hash_netnet6_elem *ip1,
-		     const struct hash_netnet6_elem *ip2,
-		     u32 *multi)
+			const struct hash_netnet6_elem *ip2,
+			u32 *multi)
 {
 	return ipv6_addr_equal(&ip1->ip[0].in6, &ip2->ip[0].in6) &&
 	       ipv6_addr_equal(&ip1->ip[1].in6, &ip2->ip[1].in6) &&
@@ -304,7 +307,7 @@ hash_netnet6_data_reset_flags(struct hash_netnet6_elem *elem, u8 *flags)
 
 static inline void
 hash_netnet6_data_reset_elem(struct hash_netnet6_elem *elem,
-			  struct hash_netnet6_elem *orig)
+			     struct hash_netnet6_elem *orig)
 {
 	elem->ip[1] = orig->ip[1];
 }
@@ -323,7 +326,7 @@ hash_netnet6_data_netmask(struct hash_netnet6_elem *elem, u8 cidr, bool inner)
 
 static bool
 hash_netnet6_data_list(struct sk_buff *skb,
-		    const struct hash_netnet6_elem *data)
+		       const struct hash_netnet6_elem *data)
 {
 	u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
 
@@ -342,7 +345,7 @@ nla_put_failure:
 
 static inline void
 hash_netnet6_data_next(struct hash_netnet4_elem *next,
-		    const struct hash_netnet6_elem *d)
+		       const struct hash_netnet6_elem *d)
 {
 }
 
@@ -356,8 +359,8 @@ hash_netnet6_data_next(struct hash_netnet4_elem *next,
 
 static int
 hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
-	       const struct xt_action_param *par,
-	       enum ipset_adt adt, struct ip_set_adt_opt *opt)
+		  const struct xt_action_param *par,
+		  enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
 	const struct hash_netnet *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
@@ -367,7 +370,7 @@ hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
 	e.cidr[0] = INIT_CIDR(h->nets[0].cidr[0], HOST_MASK);
 	e.cidr[1] = INIT_CIDR(h->nets[0].cidr[1], HOST_MASK);
 	if (adt == IPSET_TEST)
-		e.ccmp = (HOST_MASK << (sizeof(u8)*8)) | HOST_MASK;
+		e.ccmp = (HOST_MASK << (sizeof(u8) * 8)) | HOST_MASK;
 
 	ip6addrptr(skb, opt->flags & IPSET_DIM_ONE_SRC, &e.ip[0].in6);
 	ip6addrptr(skb, opt->flags & IPSET_DIM_TWO_SRC, &e.ip[1].in6);
@@ -379,7 +382,7 @@ hash_netnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
 
 static int
 hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[],
-	       enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+		  enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
 	ipset_adtfn adtfn = set->variant->adt[adt];
 	struct hash_netnet6_elem e = { .cidr = { HOST_MASK, HOST_MASK, }, };
@@ -424,6 +427,7 @@ hash_netnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_CADT_FLAGS]) {
 		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
 		if (cadt_flags & IPSET_FLAG_NOMATCH)
 			flags |= (IPSET_FLAG_NOMATCH << 16);
 	}
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 27307d0a8a5d..731813e0f08c 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -198,8 +198,9 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (e.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
-	} else
+	} else {
 		return -IPSET_ERR_MISSING_PROTO;
+	}
 
 	if (!(with_ports || e.proto == IPPROTO_ICMP))
 		e.port = 0;
@@ -208,6 +209,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 	if (tb[IPSET_ATTR_CADT_FLAGS]) {
 		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
 		if (cadt_flags & IPSET_FLAG_NOMATCH)
 			flags |= (IPSET_FLAG_NOMATCH << 16);
 	}
@@ -233,8 +235,9 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 			swap(ip, ip_to);
 		if (ip + UINT_MAX == ip_to)
 			return -IPSET_ERR_HASH_RANGE;
-	} else
+	} else {
 		ip_set_mask_from_to(ip, ip_to, e.cidr + 1);
+	}
 
 	if (retried)
 		ip = ntohl(h->next.ip);
@@ -250,8 +253,8 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 			if (ret && !ip_set_eexist(ret, flags))
 				return ret;
-			else
-				ret = 0;
+
+			ret = 0;
 		}
 		ip = last + 1;
 	}
@@ -413,14 +416,16 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (e.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
-	} else
+	} else {
 		return -IPSET_ERR_MISSING_PROTO;
+	}
 
 	if (!(with_ports || e.proto == IPPROTO_ICMPV6))
 		e.port = 0;
 
 	if (tb[IPSET_ATTR_CADT_FLAGS]) {
 		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
 		if (cadt_flags & IPSET_FLAG_NOMATCH)
 			flags |= (IPSET_FLAG_NOMATCH << 16);
 	}
@@ -444,8 +449,8 @@ hash_netport6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (ret && !ip_set_eexist(ret, flags))
 			return ret;
-		else
-			ret = 0;
+
+		ret = 0;
 	}
 	return ret;
 }
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index 1e0e47ae40a4..0c68734f5cc4 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -62,8 +62,8 @@ struct hash_netportnet4_elem {
 
 static inline bool
 hash_netportnet4_data_equal(const struct hash_netportnet4_elem *ip1,
-			   const struct hash_netportnet4_elem *ip2,
-			   u32 *multi)
+			    const struct hash_netportnet4_elem *ip2,
+			    u32 *multi)
 {
 	return ip1->ipcmp == ip2->ipcmp &&
 	       ip1->ccmp == ip2->ccmp &&
@@ -91,7 +91,7 @@ hash_netportnet4_data_reset_flags(struct hash_netportnet4_elem *elem, u8 *flags)
 
 static inline void
 hash_netportnet4_data_reset_elem(struct hash_netportnet4_elem *elem,
-				struct hash_netportnet4_elem *orig)
+				 struct hash_netportnet4_elem *orig)
 {
 	elem->ip[1] = orig->ip[1];
 }
@@ -111,7 +111,7 @@ hash_netportnet4_data_netmask(struct hash_netportnet4_elem *elem,
 
 static bool
 hash_netportnet4_data_list(struct sk_buff *skb,
-			  const struct hash_netportnet4_elem *data)
+			   const struct hash_netportnet4_elem *data)
 {
 	u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
 
@@ -132,7 +132,7 @@ nla_put_failure:
 
 static inline void
 hash_netportnet4_data_next(struct hash_netportnet4_elem *next,
-			  const struct hash_netportnet4_elem *d)
+			   const struct hash_netportnet4_elem *d)
 {
 	next->ipcmp = d->ipcmp;
 	next->port = d->port;
@@ -144,8 +144,8 @@ hash_netportnet4_data_next(struct hash_netportnet4_elem *next,
 
 static int
 hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
-		     const struct xt_action_param *par,
-		     enum ipset_adt adt, struct ip_set_adt_opt *opt)
+		      const struct xt_action_param *par,
+		      enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
 	const struct hash_netportnet *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
@@ -171,7 +171,7 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
 
 static int
 hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
-		     enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+		      enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
 	const struct hash_netportnet *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
@@ -223,14 +223,16 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (e.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
-	} else
+	} else {
 		return -IPSET_ERR_MISSING_PROTO;
+	}
 
 	if (!(with_ports || e.proto == IPPROTO_ICMP))
 		e.port = 0;
 
 	if (tb[IPSET_ATTR_CADT_FLAGS]) {
 		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
 		if (cadt_flags & IPSET_FLAG_NOMATCH)
 			flags |= (IPSET_FLAG_NOMATCH << 16);
 	}
@@ -254,8 +256,9 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 			swap(ip, ip_to);
 		if (unlikely(ip + UINT_MAX == ip_to))
 			return -IPSET_ERR_HASH_RANGE;
-	} else
+	} else {
 		ip_set_mask_from_to(ip, ip_to, e.cidr[0]);
+	}
 
 	port_to = port = ntohs(e.port);
 	if (tb[IPSET_ATTR_PORT_TO]) {
@@ -273,8 +276,9 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 			swap(ip2_from, ip2_to);
 		if (unlikely(ip2_from + UINT_MAX == ip2_to))
 			return -IPSET_ERR_HASH_RANGE;
-	} else
+	} else {
 		ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
+	}
 
 	if (retried)
 		ip = ntohl(h->next.ip[0]);
@@ -296,8 +300,8 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
 				ret = adtfn(set, &e, &ext, &ext, flags);
 				if (ret && !ip_set_eexist(ret, flags))
 					return ret;
-				else
-					ret = 0;
+
+				ret = 0;
 				ip2 = ip2_last + 1;
 			}
 		}
@@ -324,8 +328,8 @@ struct hash_netportnet6_elem {
 
 static inline bool
 hash_netportnet6_data_equal(const struct hash_netportnet6_elem *ip1,
-			   const struct hash_netportnet6_elem *ip2,
-			   u32 *multi)
+			    const struct hash_netportnet6_elem *ip2,
+			    u32 *multi)
 {
 	return ipv6_addr_equal(&ip1->ip[0].in6, &ip2->ip[0].in6) &&
 	       ipv6_addr_equal(&ip1->ip[1].in6, &ip2->ip[1].in6) &&
@@ -354,7 +358,7 @@ hash_netportnet6_data_reset_flags(struct hash_netportnet6_elem *elem, u8 *flags)
 
 static inline void
 hash_netportnet6_data_reset_elem(struct hash_netportnet6_elem *elem,
-				struct hash_netportnet6_elem *orig)
+				 struct hash_netportnet6_elem *orig)
 {
 	elem->ip[1] = orig->ip[1];
 }
@@ -374,7 +378,7 @@ hash_netportnet6_data_netmask(struct hash_netportnet6_elem *elem,
 
 static bool
 hash_netportnet6_data_list(struct sk_buff *skb,
-			  const struct hash_netportnet6_elem *data)
+			   const struct hash_netportnet6_elem *data)
 {
 	u32 flags = data->nomatch ? IPSET_FLAG_NOMATCH : 0;
 
@@ -395,7 +399,7 @@ nla_put_failure:
 
 static inline void
 hash_netportnet6_data_next(struct hash_netportnet4_elem *next,
-			  const struct hash_netportnet6_elem *d)
+			   const struct hash_netportnet6_elem *d)
 {
 	next->port = d->port;
 }
@@ -410,8 +414,8 @@ hash_netportnet6_data_next(struct hash_netportnet4_elem *next,
 
 static int
 hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
-		     const struct xt_action_param *par,
-		     enum ipset_adt adt, struct ip_set_adt_opt *opt)
+		      const struct xt_action_param *par,
+		      enum ipset_adt adt, struct ip_set_adt_opt *opt)
 {
 	const struct hash_netportnet *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
@@ -437,7 +441,7 @@ hash_netportnet6_kadt(struct ip_set *set, const struct sk_buff *skb,
 
 static int
 hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
-		     enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
+		      enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
 {
 	const struct hash_netportnet *h = set->data;
 	ipset_adtfn adtfn = set->variant->adt[adt];
@@ -493,14 +497,16 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (e.proto == 0)
 			return -IPSET_ERR_INVALID_PROTO;
-	} else
+	} else {
 		return -IPSET_ERR_MISSING_PROTO;
+	}
 
 	if (!(with_ports || e.proto == IPPROTO_ICMPV6))
 		e.port = 0;
 
 	if (tb[IPSET_ATTR_CADT_FLAGS]) {
 		u32 cadt_flags = ip_set_get_h32(tb[IPSET_ATTR_CADT_FLAGS]);
+
 		if (cadt_flags & IPSET_FLAG_NOMATCH)
 			flags |= (IPSET_FLAG_NOMATCH << 16);
 	}
@@ -524,8 +530,8 @@ hash_netportnet6_uadt(struct ip_set *set, struct nlattr *tb[],
 
 		if (ret && !ip_set_eexist(ret, flags))
 			return ret;
-		else
-			ret = 0;
+
+		ret = 0;
 	}
 	return ret;
 }
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 9f624ee9a41e..a1fe5377a2b3 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -206,14 +206,15 @@ list_set_utest(struct ip_set *set, void *value, const struct ip_set_ext *ext,
 			continue;
 		}
 
-		if (d->before == 0)
+		if (d->before == 0) {
 			ret = 1;
-		else if (d->before > 0) {
+		} else if (d->before > 0) {
 			next = list_next_entry(e, list);
 			ret = !list_is_last(&e->list, &map->members) &&
 			      next->id == d->refid;
-		} else
+		} else {
 			ret = prev && prev->id == d->refid;
+		}
 		return ret;
 	}
 	return 0;
@@ -558,7 +559,7 @@ static const struct ip_set_type_variant set_variant = {
 static void
 list_set_gc(unsigned long ul_set)
 {
-	struct ip_set *set = (struct ip_set *) ul_set;
+	struct ip_set *set = (struct ip_set *)ul_set;
 	struct list_set *map = set->data;
 
 	spin_lock_bh(&set->lock);
@@ -575,7 +576,7 @@ list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set))
 	struct list_set *map = set->data;
 
 	init_timer(&map->gc);
-	map->gc.data = (unsigned long) set;
+	map->gc.data = (unsigned long)set;
 	map->gc.function = gc;
 	map->gc.expires = jiffies + IPSET_GC_PERIOD(set->timeout) * HZ;
 	add_timer(&map->gc);
diff --git a/net/netfilter/ipset/pfxlen.c b/net/netfilter/ipset/pfxlen.c
index 04d15fdc99ee..1c8a42c1056c 100644
--- a/net/netfilter/ipset/pfxlen.c
+++ b/net/netfilter/ipset/pfxlen.c
@@ -1,9 +1,7 @@
 #include <linux/export.h>
 #include <linux/netfilter/ipset/pfxlen.h>
 
-/*
- * Prefixlen maps for fast conversions, by Jan Engelhardt.
- */
+/* Prefixlen maps for fast conversions, by Jan Engelhardt. */
 
 #define E(a, b, c, d) \
 	{.ip6 = { \
@@ -11,8 +9,7 @@
 		htonl(c), htonl(d), \
 	} }
 
-/*
- * This table works for both IPv4 and IPv6;
+/* This table works for both IPv4 and IPv6;
  * just use prefixlen_netmask_map[prefixlength].ip.
  */
 const union nf_inet_addr ip_set_netmask_map[] = {
@@ -149,13 +146,12 @@ const union nf_inet_addr ip_set_netmask_map[] = {
 EXPORT_SYMBOL_GPL(ip_set_netmask_map);
 
 #undef  E
-#define E(a, b, c, d)						\
-	{.ip6 = { (__force __be32) a, (__force __be32) b,	\
-		  (__force __be32) c, (__force __be32) d,	\
+#define E(a, b, c, d)					\
+	{.ip6 = { (__force __be32)a, (__force __be32)b,	\
+		  (__force __be32)c, (__force __be32)d,	\
 	} }
 
-/*
- * This table works for both IPv4 and IPv6;
+/* This table works for both IPv4 and IPv6;
  * just use prefixlen_hostmask_map[prefixlength].ip.
  */
 const union nf_inet_addr ip_set_hostmask_map[] = {
diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c
index b103e9627716..5669e5b453f4 100644
--- a/net/netfilter/xt_set.c
+++ b/net/netfilter/xt_set.c
@@ -9,7 +9,8 @@
  */
 
 /* Kernel module which implements the set match and SET target
- * for netfilter/iptables. */
+ * for netfilter/iptables.
+ */
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
@@ -53,6 +54,7 @@ static bool
 set_match_v0(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_set_info_match_v0 *info = par->matchinfo;
+
 	ADT_OPT(opt, par->family, info->match_set.u.compat.dim,
 		info->match_set.u.compat.flags, 0, UINT_MAX);
 
@@ -69,10 +71,10 @@ compat_flags(struct xt_set_info_v0 *info)
 	info->u.compat.dim = IPSET_DIM_ZERO;
 	if (info->u.flags[0] & IPSET_MATCH_INV)
 		info->u.compat.flags |= IPSET_INV_MATCH;
-	for (i = 0; i < IPSET_DIM_MAX-1 && info->u.flags[i]; i++) {
+	for (i = 0; i < IPSET_DIM_MAX - 1 && info->u.flags[i]; i++) {
 		info->u.compat.dim++;
 		if (info->u.flags[i] & IPSET_SRC)
-			info->u.compat.flags |= (1<<info->u.compat.dim);
+			info->u.compat.flags |= (1 << info->u.compat.dim);
 	}
 }
 
@@ -89,7 +91,7 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par)
 			info->match_set.index);
 		return -ENOENT;
 	}
-	if (info->match_set.u.flags[IPSET_DIM_MAX-1] != 0) {
+	if (info->match_set.u.flags[IPSET_DIM_MAX - 1] != 0) {
 		pr_warn("Protocol error: set match dimension is over the limit!\n");
 		ip_set_nfnl_put(par->net, info->match_set.index);
 		return -ERANGE;
@@ -115,6 +117,7 @@ static bool
 set_match_v1(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_set_info_match_v1 *info = par->matchinfo;
+
 	ADT_OPT(opt, par->family, info->match_set.dim,
 		info->match_set.flags, 0, UINT_MAX);
 
@@ -179,9 +182,10 @@ static bool
 set_match_v3(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_set_info_match_v3 *info = par->matchinfo;
+	int ret;
+
 	ADT_OPT(opt, par->family, info->match_set.dim,
 		info->match_set.flags, info->flags, UINT_MAX);
-	int ret;
 
 	if (info->packets.op != IPSET_COUNTER_NONE ||
 	    info->bytes.op != IPSET_COUNTER_NONE)
@@ -225,9 +229,10 @@ static bool
 set_match_v4(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_set_info_match_v4 *info = par->matchinfo;
+	int ret;
+
 	ADT_OPT(opt, par->family, info->match_set.dim,
 		info->match_set.flags, info->flags, UINT_MAX);
-	int ret;
 
 	if (info->packets.op != IPSET_COUNTER_NONE ||
 	    info->bytes.op != IPSET_COUNTER_NONE)
@@ -253,6 +258,7 @@ static unsigned int
 set_target_v0(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_set_info_target_v0 *info = par->targinfo;
+
 	ADT_OPT(add_opt, par->family, info->add_set.u.compat.dim,
 		info->add_set.u.compat.flags, 0, UINT_MAX);
 	ADT_OPT(del_opt, par->family, info->del_set.u.compat.dim,
@@ -291,8 +297,8 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par)
 			return -ENOENT;
 		}
 	}
-	if (info->add_set.u.flags[IPSET_DIM_MAX-1] != 0 ||
-	    info->del_set.u.flags[IPSET_DIM_MAX-1] != 0) {
+	if (info->add_set.u.flags[IPSET_DIM_MAX - 1] != 0 ||
+	    info->del_set.u.flags[IPSET_DIM_MAX - 1] != 0) {
 		pr_warn("Protocol error: SET target dimension is over the limit!\n");
 		if (info->add_set.index != IPSET_INVALID_ID)
 			ip_set_nfnl_put(par->net, info->add_set.index);
@@ -325,6 +331,7 @@ static unsigned int
 set_target_v1(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_set_info_target_v1 *info = par->targinfo;
+
 	ADT_OPT(add_opt, par->family, info->add_set.dim,
 		info->add_set.flags, 0, UINT_MAX);
 	ADT_OPT(del_opt, par->family, info->del_set.dim,
@@ -393,6 +400,7 @@ static unsigned int
 set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_set_info_target_v2 *info = par->targinfo;
+
 	ADT_OPT(add_opt, par->family, info->add_set.dim,
 		info->add_set.flags, info->flags, info->timeout);
 	ADT_OPT(del_opt, par->family, info->del_set.dim,
@@ -400,8 +408,8 @@ set_target_v2(struct sk_buff *skb, const struct xt_action_param *par)
 
 	/* Normalize to fit into jiffies */
 	if (add_opt.ext.timeout != IPSET_NO_TIMEOUT &&
-	    add_opt.ext.timeout > UINT_MAX/MSEC_PER_SEC)
-		add_opt.ext.timeout = UINT_MAX/MSEC_PER_SEC;
+	    add_opt.ext.timeout > UINT_MAX / MSEC_PER_SEC)
+		add_opt.ext.timeout = UINT_MAX / MSEC_PER_SEC;
 	if (info->add_set.index != IPSET_INVALID_ID)
 		ip_set_add(info->add_set.index, skb, par, &add_opt);
 	if (info->del_set.index != IPSET_INVALID_ID)
@@ -419,6 +427,8 @@ static unsigned int
 set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_set_info_target_v3 *info = par->targinfo;
+	int ret;
+
 	ADT_OPT(add_opt, par->family, info->add_set.dim,
 		info->add_set.flags, info->flags, info->timeout);
 	ADT_OPT(del_opt, par->family, info->del_set.dim,
@@ -426,12 +436,10 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
 	ADT_OPT(map_opt, par->family, info->map_set.dim,
 		info->map_set.flags, 0, UINT_MAX);
 
-	int ret;
-
 	/* Normalize to fit into jiffies */
 	if (add_opt.ext.timeout != IPSET_NO_TIMEOUT &&
-	    add_opt.ext.timeout > UINT_MAX/MSEC_PER_SEC)
-		add_opt.ext.timeout = UINT_MAX/MSEC_PER_SEC;
+	    add_opt.ext.timeout > UINT_MAX / MSEC_PER_SEC)
+		add_opt.ext.timeout = UINT_MAX / MSEC_PER_SEC;
 	if (info->add_set.index != IPSET_INVALID_ID)
 		ip_set_add(info->add_set.index, skb, par, &add_opt);
 	if (info->del_set.index != IPSET_INVALID_ID)
@@ -457,7 +465,6 @@ set_target_v3(struct sk_buff *skb, const struct xt_action_param *par)
 	return XT_CONTINUE;
 }
 
-
 static int
 set_target_v3_checkentry(const struct xt_tgchk_param *par)
 {
@@ -497,8 +504,7 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par)
 		     !(par->hook_mask & (1 << NF_INET_FORWARD |
 					 1 << NF_INET_LOCAL_OUT |
 					 1 << NF_INET_POST_ROUTING))) {
-			pr_warn("mapping of prio or/and queue is allowed only"
-				"from OUTPUT/FORWARD/POSTROUTING chains\n");
+			pr_warn("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n");
 			return -EINVAL;
 		}
 		index = ip_set_nfnl_get_byindex(par->net,
@@ -519,8 +525,7 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par)
 	if (info->add_set.dim > IPSET_DIM_MAX ||
 	    info->del_set.dim > IPSET_DIM_MAX ||
 	    info->map_set.dim > IPSET_DIM_MAX) {
-		pr_warn("Protocol error: SET target dimension "
-			"is over the limit!\n");
+		pr_warn("Protocol error: SET target dimension is over the limit!\n");
 		if (info->add_set.index != IPSET_INVALID_ID)
 			ip_set_nfnl_put(par->net, info->add_set.index);
 		if (info->del_set.index != IPSET_INVALID_ID)
@@ -546,7 +551,6 @@ set_target_v3_destroy(const struct xt_tgdtor_param *par)
 		ip_set_nfnl_put(par->net, info->map_set.index);
 }
 
-
 static struct xt_match set_matches[] __read_mostly = {
 	{
 		.name		= "set",
-- 
cgit 


From 2cbce139fc57bc2625f88add055d0b94f00c3352 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 12 Jun 2015 13:55:41 +0200
Subject: netfilter: nf_tables: attach net_device to basechain

The device is part of the hook configuration, so instead of a global
configuration per table, set it to each of the basechain that we create.

This patch reworks ebddf1a8d78a ("netfilter: nf_tables: allow to bind table to
net_device").

Note that this adds a dev_name field in the nft_base_chain structure which is
required the netdev notification subscription that follows up in a patch to
handle gone net_devices.

Suggested-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h        |  4 +-
 include/uapi/linux/netfilter/nf_tables.h |  4 +-
 net/netfilter/nf_tables_api.c            | 79 +++++++++++++++++---------------
 3 files changed, 46 insertions(+), 41 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 3d6f48ca40a7..09d6f8df60f0 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -791,6 +791,7 @@ struct nft_stats {
  *	@policy: default policy
  *	@stats: per-cpu chain stats
  *	@chain: the chain
+ *	@dev_name: device name that this base chain is attached to (if any)
  */
 struct nft_base_chain {
 	struct nf_hook_ops		ops[NFT_HOOK_OPS_MAX];
@@ -799,6 +800,7 @@ struct nft_base_chain {
 	u8				policy;
 	struct nft_stats __percpu	*stats;
 	struct nft_chain		chain;
+	char 				dev_name[IFNAMSIZ];
 };
 
 static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chain)
@@ -819,7 +821,6 @@ unsigned int nft_do_chain(struct nft_pktinfo *pkt,
  *	@use: number of chain references to this table
  *	@flags: table flag (see enum nft_table_flags)
  *	@name: name of the table
- *	@dev: this table is bound to this device (if any)
  */
 struct nft_table {
 	struct list_head		list;
@@ -829,7 +830,6 @@ struct nft_table {
 	u32				use;
 	u16				flags;
 	char				name[NFT_TABLE_MAXNAMELEN];
-	struct net_device		*dev;
 };
 
 enum nft_af_flags {
diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h
index 89a671e0f5e7..a99e6a997140 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -122,11 +122,13 @@ enum nft_list_attributes {
  *
  * @NFTA_HOOK_HOOKNUM: netfilter hook number (NLA_U32)
  * @NFTA_HOOK_PRIORITY: netfilter hook priority (NLA_U32)
+ * @NFTA_HOOK_DEV: netdevice name (NLA_STRING)
  */
 enum nft_hook_attributes {
 	NFTA_HOOK_UNSPEC,
 	NFTA_HOOK_HOOKNUM,
 	NFTA_HOOK_PRIORITY,
+	NFTA_HOOK_DEV,
 	__NFTA_HOOK_MAX
 };
 #define NFTA_HOOK_MAX		(__NFTA_HOOK_MAX - 1)
@@ -146,14 +148,12 @@ enum nft_table_flags {
  * @NFTA_TABLE_NAME: name of the table (NLA_STRING)
  * @NFTA_TABLE_FLAGS: bitmask of enum nft_table_flags (NLA_U32)
  * @NFTA_TABLE_USE: number of chains in this table (NLA_U32)
- * @NFTA_TABLE_DEV: net device name (NLA_STRING)
  */
 enum nft_table_attributes {
 	NFTA_TABLE_UNSPEC,
 	NFTA_TABLE_NAME,
 	NFTA_TABLE_FLAGS,
 	NFTA_TABLE_USE,
-	NFTA_TABLE_DEV,
 	__NFTA_TABLE_MAX
 };
 #define NFTA_TABLE_MAX		(__NFTA_TABLE_MAX - 1)
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 4528f122bcd2..900c81a2f89a 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -399,8 +399,6 @@ static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = {
 	[NFTA_TABLE_NAME]	= { .type = NLA_STRING,
 				    .len = NFT_TABLE_MAXNAMELEN - 1 },
 	[NFTA_TABLE_FLAGS]	= { .type = NLA_U32 },
-	[NFTA_TABLE_DEV]	= { .type = NLA_STRING,
-				    .len = IFNAMSIZ - 1 },
 };
 
 static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
@@ -425,10 +423,6 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
 	    nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)))
 		goto nla_put_failure;
 
-	if (table->dev &&
-	    nla_put_string(skb, NFTA_TABLE_DEV, table->dev->name))
-		goto nla_put_failure;
-
 	nlmsg_end(skb, nlh);
 	return 0;
 
@@ -614,11 +608,6 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
 	if (flags == ctx->table->flags)
 		return 0;
 
-	if ((ctx->afi->flags & NFT_AF_NEEDS_DEV) &&
-	    ctx->nla[NFTA_TABLE_DEV] &&
-	    nla_strcmp(ctx->nla[NFTA_TABLE_DEV], ctx->table->dev->name))
-		return -EOPNOTSUPP;
-
 	trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
 				sizeof(struct nft_trans_table));
 	if (trans == NULL)
@@ -656,7 +645,6 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
 	struct nft_table *table;
 	struct net *net = sock_net(skb->sk);
 	int family = nfmsg->nfgen_family;
-	struct net_device *dev = NULL;
 	u32 flags = 0;
 	struct nft_ctx ctx;
 	int err;
@@ -691,20 +679,6 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
 			return -EINVAL;
 	}
 
-	if (afi->flags & NFT_AF_NEEDS_DEV) {
-		char ifname[IFNAMSIZ];
-
-		if (!nla[NFTA_TABLE_DEV])
-			return -EOPNOTSUPP;
-
-		nla_strlcpy(ifname, nla[NFTA_TABLE_DEV], IFNAMSIZ);
-		dev = dev_get_by_name(net, ifname);
-		if (!dev)
-			return -ENOENT;
-	} else if (nla[NFTA_TABLE_DEV]) {
-		return -EOPNOTSUPP;
-	}
-
 	err = -EAFNOSUPPORT;
 	if (!try_module_get(afi->owner))
 		goto err1;
@@ -718,7 +692,6 @@ static int nf_tables_newtable(struct sock *nlsk, struct sk_buff *skb,
 	INIT_LIST_HEAD(&table->chains);
 	INIT_LIST_HEAD(&table->sets);
 	table->flags = flags;
-	table->dev   = dev;
 
 	nft_ctx_init(&ctx, skb, nlh, afi, table, NULL, nla);
 	err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE);
@@ -732,9 +705,6 @@ err3:
 err2:
 	module_put(afi->owner);
 err1:
-	if (dev != NULL)
-		dev_put(dev);
-
 	return err;
 }
 
@@ -838,9 +808,6 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx)
 {
 	BUG_ON(ctx->table->use > 0);
 
-	if (ctx->table->dev)
-		dev_put(ctx->table->dev);
-
 	kfree(ctx->table);
 	module_put(ctx->afi->owner);
 }
@@ -916,6 +883,8 @@ static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = {
 static const struct nla_policy nft_hook_policy[NFTA_HOOK_MAX + 1] = {
 	[NFTA_HOOK_HOOKNUM]	= { .type = NLA_U32 },
 	[NFTA_HOOK_PRIORITY]	= { .type = NLA_U32 },
+	[NFTA_HOOK_DEV]		= { .type = NLA_STRING,
+				    .len = IFNAMSIZ - 1 },
 };
 
 static int nft_dump_stats(struct sk_buff *skb, struct nft_stats __percpu *stats)
@@ -989,6 +958,9 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
 			goto nla_put_failure;
 		if (nla_put_be32(skb, NFTA_HOOK_PRIORITY, htonl(ops->priority)))
 			goto nla_put_failure;
+		if (basechain->dev_name[0] &&
+		    nla_put_string(skb, NFTA_HOOK_DEV, basechain->dev_name))
+			goto nla_put_failure;
 		nla_nest_end(skb, nest);
 
 		if (nla_put_be32(skb, NFTA_CHAIN_POLICY,
@@ -1200,9 +1172,13 @@ static void nf_tables_chain_destroy(struct nft_chain *chain)
 	BUG_ON(chain->use > 0);
 
 	if (chain->flags & NFT_BASE_CHAIN) {
-		module_put(nft_base_chain(chain)->type->owner);
-		free_percpu(nft_base_chain(chain)->stats);
-		kfree(nft_base_chain(chain));
+		struct nft_base_chain *basechain = nft_base_chain(chain);
+
+		module_put(basechain->type->owner);
+		free_percpu(basechain->stats);
+		if (basechain->ops[0].dev != NULL)
+			dev_put(basechain->ops[0].dev);
+		kfree(basechain);
 	} else {
 		kfree(chain);
 	}
@@ -1221,6 +1197,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
 	struct nlattr *ha[NFTA_HOOK_MAX + 1];
 	struct net *net = sock_net(skb->sk);
 	int family = nfmsg->nfgen_family;
+	struct net_device *dev = NULL;
 	u8 policy = NF_ACCEPT;
 	u64 handle = 0;
 	unsigned int i;
@@ -1360,17 +1337,43 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
 			return -ENOENT;
 		hookfn = type->hooks[hooknum];
 
+		if (afi->flags & NFT_AF_NEEDS_DEV) {
+			char ifname[IFNAMSIZ];
+
+			if (!ha[NFTA_HOOK_DEV]) {
+				module_put(type->owner);
+				return -EOPNOTSUPP;
+			}
+
+			nla_strlcpy(ifname, ha[NFTA_HOOK_DEV], IFNAMSIZ);
+			dev = dev_get_by_name(net, ifname);
+			if (!dev) {
+				module_put(type->owner);
+				return -ENOENT;
+			}
+		} else if (ha[NFTA_HOOK_DEV]) {
+			module_put(type->owner);
+			return -EOPNOTSUPP;
+		}
+
 		basechain = kzalloc(sizeof(*basechain), GFP_KERNEL);
 		if (basechain == NULL) {
 			module_put(type->owner);
+			if (dev != NULL)
+				dev_put(dev);
 			return -ENOMEM;
 		}
 
+		if (dev != NULL)
+			strncpy(basechain->dev_name, dev->name, IFNAMSIZ);
+
 		if (nla[NFTA_CHAIN_COUNTERS]) {
 			stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]);
 			if (IS_ERR(stats)) {
 				module_put(type->owner);
 				kfree(basechain);
+				if (dev != NULL)
+					dev_put(dev);
 				return PTR_ERR(stats);
 			}
 			basechain->stats = stats;
@@ -1379,6 +1382,8 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
 			if (stats == NULL) {
 				module_put(type->owner);
 				kfree(basechain);
+				if (dev != NULL)
+					dev_put(dev);
 				return -ENOMEM;
 			}
 			rcu_assign_pointer(basechain->stats, stats);
@@ -1396,7 +1401,7 @@ static int nf_tables_newchain(struct sock *nlsk, struct sk_buff *skb,
 			ops->priority	= priority;
 			ops->priv	= chain;
 			ops->hook	= afi->hooks[ops->hooknum];
-			ops->dev	= table->dev;
+			ops->dev	= dev;
 			if (hookfn)
 				ops->hook = hookfn;
 			if (afi->hook_ops_init)
-- 
cgit 


From ffeedafbf0236f03aeb2e8db273b3e5ae5f5bc89 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 12 Jun 2015 19:39:12 -0700
Subject: bpf: introduce current->pid, tgid, uid, gid, comm accessors

eBPF programs attached to kprobes need to filter based on
current->pid, uid and other fields, so introduce helper functions:

u64 bpf_get_current_pid_tgid(void)
Return: current->tgid << 32 | current->pid

u64 bpf_get_current_uid_gid(void)
Return: current_gid << 32 | current_uid

bpf_get_current_comm(char *buf, int size_of_buf)
stores current->comm into buf

They can be used from the programs attached to TC as well to classify packets
based on current task fields.

Update tracex2 example to print histogram of write syscalls for each process
instead of aggregated for all.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h        |  3 +++
 include/uapi/linux/bpf.h   | 19 +++++++++++++
 kernel/bpf/core.c          |  3 +++
 kernel/bpf/helpers.c       | 58 +++++++++++++++++++++++++++++++++++++++
 kernel/trace/bpf_trace.c   |  6 +++++
 net/core/filter.c          |  6 +++++
 samples/bpf/bpf_helpers.h  |  6 +++++
 samples/bpf/tracex2_kern.c | 24 +++++++++++++----
 samples/bpf/tracex2_user.c | 67 +++++++++++++++++++++++++++++++++++++++-------
 9 files changed, 178 insertions(+), 14 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 2235aee8096a..1b9a3f5b27f6 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -188,5 +188,8 @@ extern const struct bpf_func_proto bpf_get_prandom_u32_proto;
 extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
 extern const struct bpf_func_proto bpf_tail_call_proto;
 extern const struct bpf_func_proto bpf_ktime_get_ns_proto;
+extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto;
+extern const struct bpf_func_proto bpf_get_current_uid_gid_proto;
+extern const struct bpf_func_proto bpf_get_current_comm_proto;
 
 #endif /* _LINUX_BPF_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 602f05b7a275..29ef6f99e43d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -230,6 +230,25 @@ enum bpf_func_id {
 	 * Return: 0 on success
 	 */
 	BPF_FUNC_clone_redirect,
+
+	/**
+	 * u64 bpf_get_current_pid_tgid(void)
+	 * Return: current->tgid << 32 | current->pid
+	 */
+	BPF_FUNC_get_current_pid_tgid,
+
+	/**
+	 * u64 bpf_get_current_uid_gid(void)
+	 * Return: current_gid << 32 | current_uid
+	 */
+	BPF_FUNC_get_current_uid_gid,
+
+	/**
+	 * bpf_get_current_comm(char *buf, int size_of_buf)
+	 * stores current->comm into buf
+	 * Return: 0 on success
+	 */
+	BPF_FUNC_get_current_comm,
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 1e00aa3316dc..1fc45cc83076 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -730,6 +730,9 @@ const struct bpf_func_proto bpf_map_delete_elem_proto __weak;
 const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
 const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
 const struct bpf_func_proto bpf_ktime_get_ns_proto __weak;
+const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
+const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
+const struct bpf_func_proto bpf_get_current_comm_proto __weak;
 
 /* Always built-in helper functions. */
 const struct bpf_func_proto bpf_tail_call_proto = {
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 7ad5d8842d5b..1447ec09421e 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -14,6 +14,8 @@
 #include <linux/random.h>
 #include <linux/smp.h>
 #include <linux/ktime.h>
+#include <linux/sched.h>
+#include <linux/uidgid.h>
 
 /* If kernel subsystem is allowing eBPF programs to call this function,
  * inside its own verifier_ops->get_func_proto() callback it should return
@@ -124,3 +126,59 @@ const struct bpf_func_proto bpf_ktime_get_ns_proto = {
 	.gpl_only	= true,
 	.ret_type	= RET_INTEGER,
 };
+
+static u64 bpf_get_current_pid_tgid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	struct task_struct *task = current;
+
+	if (!task)
+		return -EINVAL;
+
+	return (u64) task->tgid << 32 | task->pid;
+}
+
+const struct bpf_func_proto bpf_get_current_pid_tgid_proto = {
+	.func		= bpf_get_current_pid_tgid,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+};
+
+static u64 bpf_get_current_uid_gid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	struct task_struct *task = current;
+	kuid_t uid;
+	kgid_t gid;
+
+	if (!task)
+		return -EINVAL;
+
+	current_uid_gid(&uid, &gid);
+	return (u64) from_kgid(&init_user_ns, gid) << 32 |
+		from_kuid(&init_user_ns, uid);
+}
+
+const struct bpf_func_proto bpf_get_current_uid_gid_proto = {
+	.func		= bpf_get_current_uid_gid,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+};
+
+static u64 bpf_get_current_comm(u64 r1, u64 size, u64 r3, u64 r4, u64 r5)
+{
+	struct task_struct *task = current;
+	char *buf = (char *) (long) r1;
+
+	if (!task)
+		return -EINVAL;
+
+	memcpy(buf, task->comm, min_t(size_t, size, sizeof(task->comm)));
+	return 0;
+}
+
+const struct bpf_func_proto bpf_get_current_comm_proto = {
+	.func		= bpf_get_current_comm,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_STACK,
+	.arg2_type	= ARG_CONST_STACK_SIZE,
+};
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 50c4015a8ad3..3a17638cdf46 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -162,6 +162,12 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func
 		return &bpf_ktime_get_ns_proto;
 	case BPF_FUNC_tail_call:
 		return &bpf_tail_call_proto;
+	case BPF_FUNC_get_current_pid_tgid:
+		return &bpf_get_current_pid_tgid_proto;
+	case BPF_FUNC_get_current_uid_gid:
+		return &bpf_get_current_uid_gid_proto;
+	case BPF_FUNC_get_current_comm:
+		return &bpf_get_current_comm_proto;
 
 	case BPF_FUNC_trace_printk:
 		/*
diff --git a/net/core/filter.c b/net/core/filter.c
index d271c06bf01f..20aa51ccbf9d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1459,6 +1459,12 @@ tc_cls_act_func_proto(enum bpf_func_id func_id)
 		return &bpf_l4_csum_replace_proto;
 	case BPF_FUNC_clone_redirect:
 		return &bpf_clone_redirect_proto;
+	case BPF_FUNC_get_current_pid_tgid:
+		return &bpf_get_current_pid_tgid_proto;
+	case BPF_FUNC_get_current_uid_gid:
+		return &bpf_get_current_uid_gid_proto;
+	case BPF_FUNC_get_current_comm:
+		return &bpf_get_current_comm_proto;
 	default:
 		return sk_filter_func_proto(func_id);
 	}
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index f531a0b3282d..bdf1c1607b80 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -25,6 +25,12 @@ static void (*bpf_tail_call)(void *ctx, void *map, int index) =
 	(void *) BPF_FUNC_tail_call;
 static unsigned long long (*bpf_get_smp_processor_id)(void) =
 	(void *) BPF_FUNC_get_smp_processor_id;
+static unsigned long long (*bpf_get_current_pid_tgid)(void) =
+	(void *) BPF_FUNC_get_current_pid_tgid;
+static unsigned long long (*bpf_get_current_uid_gid)(void) =
+	(void *) BPF_FUNC_get_current_uid_gid;
+static int (*bpf_get_current_comm)(void *buf, int buf_size) =
+	(void *) BPF_FUNC_get_current_comm;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/samples/bpf/tracex2_kern.c b/samples/bpf/tracex2_kern.c
index 19ec1cfc45db..dc50f4f2943f 100644
--- a/samples/bpf/tracex2_kern.c
+++ b/samples/bpf/tracex2_kern.c
@@ -62,11 +62,18 @@ static unsigned int log2l(unsigned long v)
 		return log2(v);
 }
 
+struct hist_key {
+	char comm[16];
+	u64 pid_tgid;
+	u64 uid_gid;
+	u32 index;
+};
+
 struct bpf_map_def SEC("maps") my_hist_map = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(u32),
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(struct hist_key),
 	.value_size = sizeof(long),
-	.max_entries = 64,
+	.max_entries = 1024,
 };
 
 SEC("kprobe/sys_write")
@@ -75,11 +82,18 @@ int bpf_prog3(struct pt_regs *ctx)
 	long write_size = ctx->dx; /* arg3 */
 	long init_val = 1;
 	long *value;
-	u32 index = log2l(write_size);
+	struct hist_key key = {};
+
+	key.index = log2l(write_size);
+	key.pid_tgid = bpf_get_current_pid_tgid();
+	key.uid_gid = bpf_get_current_uid_gid();
+	bpf_get_current_comm(&key.comm, sizeof(key.comm));
 
-	value = bpf_map_lookup_elem(&my_hist_map, &index);
+	value = bpf_map_lookup_elem(&my_hist_map, &key);
 	if (value)
 		__sync_fetch_and_add(value, 1);
+	else
+		bpf_map_update_elem(&my_hist_map, &key, &init_val, BPF_ANY);
 	return 0;
 }
 char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c
index 91b8d0896fbb..cd0241c1447a 100644
--- a/samples/bpf/tracex2_user.c
+++ b/samples/bpf/tracex2_user.c
@@ -3,6 +3,7 @@
 #include <stdlib.h>
 #include <signal.h>
 #include <linux/bpf.h>
+#include <string.h>
 #include "libbpf.h"
 #include "bpf_load.h"
 
@@ -20,23 +21,42 @@ static void stars(char *str, long val, long max, int width)
 	str[i] = '\0';
 }
 
-static void print_hist(int fd)
+struct task {
+	char comm[16];
+	__u64 pid_tgid;
+	__u64 uid_gid;
+};
+
+struct hist_key {
+	struct task t;
+	__u32 index;
+};
+
+#define SIZE sizeof(struct task)
+
+static void print_hist_for_pid(int fd, void *task)
 {
-	int key;
+	struct hist_key key = {}, next_key;
+	char starstr[MAX_STARS];
 	long value;
 	long data[MAX_INDEX] = {};
-	char starstr[MAX_STARS];
-	int i;
 	int max_ind = -1;
 	long max_value = 0;
+	int i, ind;
 
-	for (key = 0; key < MAX_INDEX; key++) {
-		bpf_lookup_elem(fd, &key, &value);
-		data[key] = value;
-		if (value && key > max_ind)
-			max_ind = key;
+	while (bpf_get_next_key(fd, &key, &next_key) == 0) {
+		if (memcmp(&next_key, task, SIZE)) {
+			key = next_key;
+			continue;
+		}
+		bpf_lookup_elem(fd, &next_key, &value);
+		ind = next_key.index;
+		data[ind] = value;
+		if (value && ind > max_ind)
+			max_ind = ind;
 		if (value > max_value)
 			max_value = value;
+		key = next_key;
 	}
 
 	printf("           syscall write() stats\n");
@@ -48,6 +68,35 @@ static void print_hist(int fd)
 		       MAX_STARS, starstr);
 	}
 }
+
+static void print_hist(int fd)
+{
+	struct hist_key key = {}, next_key;
+	static struct task tasks[1024];
+	int task_cnt = 0;
+	int i;
+
+	while (bpf_get_next_key(fd, &key, &next_key) == 0) {
+		int found = 0;
+
+		for (i = 0; i < task_cnt; i++)
+			if (memcmp(&tasks[i], &next_key, SIZE) == 0)
+				found = 1;
+		if (!found)
+			memcpy(&tasks[task_cnt++], &next_key, SIZE);
+		key = next_key;
+	}
+
+	for (i = 0; i < task_cnt; i++) {
+		printf("\npid %d cmd %s uid %d\n",
+		       (__u32) tasks[i].pid_tgid,
+		       tasks[i].comm,
+		       (__u32) tasks[i].uid_gid);
+		print_hist_for_pid(fd, &tasks[i]);
+	}
+
+}
+
 static void int_exit(int sig)
 {
 	print_hist(map_fd[1]);
-- 
cgit 


From 254cb6dbfd8894743fbf814ec856ccd0874af691 Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <razor@blackwall.org>
Date: Sun, 14 Jun 2015 16:36:34 +0300
Subject: bonding: export slave's actor_oper_port_state via sysfs and netlink

Export the actor_oper_port_state of each port via sysfs and netlink.
In 802.3ad mode it is valuable for the user to be able to check the
actor_oper state, it is already exported via bond's proc entry.

Signed-off-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: Andy Gospodarek <gospo@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_netlink.c     | 10 +++++++++-
 drivers/net/bonding/bond_sysfs_slave.c | 16 ++++++++++++++++
 include/uapi/linux/if_link.h           |  1 +
 3 files changed, 26 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
index f7015eb4f8db..a0e600db4236 100644
--- a/drivers/net/bonding/bond_netlink.c
+++ b/drivers/net/bonding/bond_netlink.c
@@ -28,6 +28,7 @@ static size_t bond_get_slave_size(const struct net_device *bond_dev,
 		nla_total_size(MAX_ADDR_LEN) +	/* IFLA_BOND_SLAVE_PERM_HWADDR */
 		nla_total_size(sizeof(u16)) +	/* IFLA_BOND_SLAVE_QUEUE_ID */
 		nla_total_size(sizeof(u16)) +	/* IFLA_BOND_SLAVE_AD_AGGREGATOR_ID */
+		nla_total_size(sizeof(u8)) +	/* IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE */
 		0;
 }
 
@@ -56,12 +57,19 @@ static int bond_fill_slave_info(struct sk_buff *skb,
 
 	if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) {
 		const struct aggregator *agg;
+		const struct port *ad_port;
 
+		ad_port = &SLAVE_AD_INFO(slave)->port;
 		agg = SLAVE_AD_INFO(slave)->port.aggregator;
-		if (agg)
+		if (agg) {
 			if (nla_put_u16(skb, IFLA_BOND_SLAVE_AD_AGGREGATOR_ID,
 					agg->aggregator_identifier))
 				goto nla_put_failure;
+			if (nla_put_u8(skb,
+				       IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE,
+				       ad_port->actor_oper_port_state))
+				goto nla_put_failure;
+		}
 	}
 
 	return 0;
diff --git a/drivers/net/bonding/bond_sysfs_slave.c b/drivers/net/bonding/bond_sysfs_slave.c
index 23618a831612..f6c197cee669 100644
--- a/drivers/net/bonding/bond_sysfs_slave.c
+++ b/drivers/net/bonding/bond_sysfs_slave.c
@@ -80,6 +80,21 @@ static ssize_t ad_aggregator_id_show(struct slave *slave, char *buf)
 }
 static SLAVE_ATTR_RO(ad_aggregator_id);
 
+static ssize_t ad_actor_oper_port_state_show(struct slave *slave, char *buf)
+{
+	const struct port *ad_port;
+
+	if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) {
+		ad_port = &SLAVE_AD_INFO(slave)->port;
+		if (ad_port->aggregator)
+			return sprintf(buf, "%u\n",
+				       ad_port->actor_oper_port_state);
+	}
+
+	return sprintf(buf, "N/A\n");
+}
+static SLAVE_ATTR_RO(ad_actor_oper_port_state);
+
 static const struct slave_attribute *slave_attrs[] = {
 	&slave_attr_state,
 	&slave_attr_mii_status,
@@ -87,6 +102,7 @@ static const struct slave_attribute *slave_attrs[] = {
 	&slave_attr_perm_hwaddr,
 	&slave_attr_queue_id,
 	&slave_attr_ad_aggregator_id,
+	&slave_attr_ad_actor_oper_port_state,
 	NULL
 };
 
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 1737b7a8272b..1b3e357223f2 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -456,6 +456,7 @@ enum {
 	IFLA_BOND_SLAVE_PERM_HWADDR,
 	IFLA_BOND_SLAVE_QUEUE_ID,
 	IFLA_BOND_SLAVE_AD_AGGREGATOR_ID,
+	IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE,
 	__IFLA_BOND_SLAVE_MAX,
 };
 
-- 
cgit 


From 46ea297ed67cdeeb0142244873458b911037d0ba Mon Sep 17 00:00:00 2001
From: Nikolay Aleksandrov <razor@blackwall.org>
Date: Sun, 14 Jun 2015 16:36:35 +0300
Subject: bonding: export slave's partner_oper_port_state via sysfs and netlink

Export the partner_oper_port_state of each port via sysfs and netlink.
In 802.3ad mode it is valuable for the user to be able to check the
partner_oper state, it is already exported via bond's proc entry.

Signed-off-by: Nikolay Aleksandrov <razor@blackwall.org>
Signed-off-by: Andy Gospodarek <gospo@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_netlink.c     |  5 +++++
 drivers/net/bonding/bond_sysfs_slave.c | 16 ++++++++++++++++
 include/uapi/linux/if_link.h           |  1 +
 3 files changed, 22 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/drivers/net/bonding/bond_netlink.c b/drivers/net/bonding/bond_netlink.c
index a0e600db4236..5580fcde738f 100644
--- a/drivers/net/bonding/bond_netlink.c
+++ b/drivers/net/bonding/bond_netlink.c
@@ -29,6 +29,7 @@ static size_t bond_get_slave_size(const struct net_device *bond_dev,
 		nla_total_size(sizeof(u16)) +	/* IFLA_BOND_SLAVE_QUEUE_ID */
 		nla_total_size(sizeof(u16)) +	/* IFLA_BOND_SLAVE_AD_AGGREGATOR_ID */
 		nla_total_size(sizeof(u8)) +	/* IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE */
+		nla_total_size(sizeof(u16)) +	/* IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE */
 		0;
 }
 
@@ -69,6 +70,10 @@ static int bond_fill_slave_info(struct sk_buff *skb,
 				       IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE,
 				       ad_port->actor_oper_port_state))
 				goto nla_put_failure;
+			if (nla_put_u16(skb,
+					IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE,
+					ad_port->partner_oper.port_state))
+				goto nla_put_failure;
 		}
 	}
 
diff --git a/drivers/net/bonding/bond_sysfs_slave.c b/drivers/net/bonding/bond_sysfs_slave.c
index f6c197cee669..7d16c51e6913 100644
--- a/drivers/net/bonding/bond_sysfs_slave.c
+++ b/drivers/net/bonding/bond_sysfs_slave.c
@@ -95,6 +95,21 @@ static ssize_t ad_actor_oper_port_state_show(struct slave *slave, char *buf)
 }
 static SLAVE_ATTR_RO(ad_actor_oper_port_state);
 
+static ssize_t ad_partner_oper_port_state_show(struct slave *slave, char *buf)
+{
+	const struct port *ad_port;
+
+	if (BOND_MODE(slave->bond) == BOND_MODE_8023AD) {
+		ad_port = &SLAVE_AD_INFO(slave)->port;
+		if (ad_port->aggregator)
+			return sprintf(buf, "%u\n",
+				       ad_port->partner_oper.port_state);
+	}
+
+	return sprintf(buf, "N/A\n");
+}
+static SLAVE_ATTR_RO(ad_partner_oper_port_state);
+
 static const struct slave_attribute *slave_attrs[] = {
 	&slave_attr_state,
 	&slave_attr_mii_status,
@@ -103,6 +118,7 @@ static const struct slave_attribute *slave_attrs[] = {
 	&slave_attr_queue_id,
 	&slave_attr_ad_aggregator_id,
 	&slave_attr_ad_actor_oper_port_state,
+	&slave_attr_ad_partner_oper_port_state,
 	NULL
 };
 
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 1b3e357223f2..510efb360580 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -457,6 +457,7 @@ enum {
 	IFLA_BOND_SLAVE_QUEUE_ID,
 	IFLA_BOND_SLAVE_AD_AGGREGATOR_ID,
 	IFLA_BOND_SLAVE_AD_ACTOR_OPER_PORT_STATE,
+	IFLA_BOND_SLAVE_AD_PARTNER_OPER_PORT_STATE,
 	__IFLA_BOND_SLAVE_MAX,
 };
 
-- 
cgit 


From 3b766cd832328fcb87db3507e7b98cf42f21689d Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Mon, 15 Jun 2015 17:59:07 +0300
Subject: net/core: Add reading VF statistics through the PF netdevice

Add ndo_get_vf_stats where the PF retrieves and fills the VFs traffic
statistics. We encode the VF stats in a nested manner to allow for
future extensions.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Hadar Hen Zion <hadarh@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_link.h      |  9 ++++++++
 include/linux/netdevice.h    |  4 ++++
 include/uapi/linux/if_link.h | 13 +++++++++++
 net/core/rtnetlink.c         | 51 ++++++++++++++++++++++++++++++++++++++++++--
 4 files changed, 75 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index da4929927f69..ae5d0d22955d 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -5,6 +5,15 @@
 
 
 /* We don't want this structure exposed to user space */
+struct ifla_vf_stats {
+	__u64 rx_packets;
+	__u64 tx_packets;
+	__u64 rx_bytes;
+	__u64 tx_bytes;
+	__u64 broadcast;
+	__u64 multicast;
+};
+
 struct ifla_vf_info {
 	__u32 vf;
 	__u8 mac[32];
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 6f5f71ff5169..e20979dfd6a9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1100,6 +1100,10 @@ struct net_device_ops {
 						     struct ifla_vf_info *ivf);
 	int			(*ndo_set_vf_link_state)(struct net_device *dev,
 							 int vf, int link_state);
+	int			(*ndo_get_vf_stats)(struct net_device *dev,
+						    int vf,
+						    struct ifla_vf_stats
+						    *vf_stats);
 	int			(*ndo_set_vf_port)(struct net_device *dev,
 						   int vf,
 						   struct nlattr *port[]);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 510efb360580..2c7e8e3d3981 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -484,6 +484,7 @@ enum {
 	IFLA_VF_RSS_QUERY_EN,	/* RSS Redirection Table and Hash Key query
 				 * on/off switch
 				 */
+	IFLA_VF_STATS,		/* network device statistics */
 	__IFLA_VF_MAX,
 };
 
@@ -533,6 +534,18 @@ struct ifla_vf_rss_query_en {
 	__u32 setting;
 };
 
+enum {
+	IFLA_VF_STATS_RX_PACKETS,
+	IFLA_VF_STATS_TX_PACKETS,
+	IFLA_VF_STATS_RX_BYTES,
+	IFLA_VF_STATS_TX_BYTES,
+	IFLA_VF_STATS_BROADCAST,
+	IFLA_VF_STATS_MULTICAST,
+	__IFLA_VF_STATS_MAX,
+};
+
+#define IFLA_VF_STATS_MAX (__IFLA_VF_STATS_MAX - 1)
+
 /* VF ports management section
  *
  *	Nested layout of set/get msg is:
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 077b6d280371..2d102ce1474f 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -819,7 +819,19 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev,
 			 nla_total_size(sizeof(struct ifla_vf_spoofchk)) +
 			 nla_total_size(sizeof(struct ifla_vf_rate)) +
 			 nla_total_size(sizeof(struct ifla_vf_link_state)) +
-			 nla_total_size(sizeof(struct ifla_vf_rss_query_en)));
+			 nla_total_size(sizeof(struct ifla_vf_rss_query_en)) +
+			 /* IFLA_VF_STATS_RX_PACKETS */
+			 nla_total_size(sizeof(__u64)) +
+			 /* IFLA_VF_STATS_TX_PACKETS */
+			 nla_total_size(sizeof(__u64)) +
+			 /* IFLA_VF_STATS_RX_BYTES */
+			 nla_total_size(sizeof(__u64)) +
+			 /* IFLA_VF_STATS_TX_BYTES */
+			 nla_total_size(sizeof(__u64)) +
+			 /* IFLA_VF_STATS_BROADCAST */
+			 nla_total_size(sizeof(__u64)) +
+			 /* IFLA_VF_STATS_MULTICAST */
+			 nla_total_size(sizeof(__u64)));
 		return size;
 	} else
 		return 0;
@@ -1123,7 +1135,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	    && (ext_filter_mask & RTEXT_FILTER_VF)) {
 		int i;
 
-		struct nlattr *vfinfo, *vf;
+		struct nlattr *vfinfo, *vf, *vfstats;
 		int num_vfs = dev_num_vf(dev->dev.parent);
 
 		vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
@@ -1138,6 +1150,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			struct ifla_vf_spoofchk vf_spoofchk;
 			struct ifla_vf_link_state vf_linkstate;
 			struct ifla_vf_rss_query_en vf_rss_query_en;
+			struct ifla_vf_stats vf_stats;
 
 			/*
 			 * Not all SR-IOV capable drivers support the
@@ -1190,6 +1203,30 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 				    sizeof(vf_rss_query_en),
 				    &vf_rss_query_en))
 				goto nla_put_failure;
+			memset(&vf_stats, 0, sizeof(vf_stats));
+			if (dev->netdev_ops->ndo_get_vf_stats)
+				dev->netdev_ops->ndo_get_vf_stats(dev, i,
+								  &vf_stats);
+			vfstats = nla_nest_start(skb, IFLA_VF_STATS);
+			if (!vfstats) {
+				nla_nest_cancel(skb, vf);
+				nla_nest_cancel(skb, vfinfo);
+				goto nla_put_failure;
+			}
+			if (nla_put_u64(skb, IFLA_VF_STATS_RX_PACKETS,
+					vf_stats.rx_packets) ||
+			    nla_put_u64(skb, IFLA_VF_STATS_TX_PACKETS,
+					vf_stats.tx_packets) ||
+			    nla_put_u64(skb, IFLA_VF_STATS_RX_BYTES,
+					vf_stats.rx_bytes) ||
+			    nla_put_u64(skb, IFLA_VF_STATS_TX_BYTES,
+					vf_stats.tx_bytes) ||
+			    nla_put_u64(skb, IFLA_VF_STATS_BROADCAST,
+					vf_stats.broadcast) ||
+			    nla_put_u64(skb, IFLA_VF_STATS_MULTICAST,
+					vf_stats.multicast))
+				goto nla_put_failure;
+			nla_nest_end(skb, vfstats);
 			nla_nest_end(skb, vf);
 		}
 		nla_nest_end(skb, vfinfo);
@@ -1303,6 +1340,16 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
 	[IFLA_VF_RATE]		= { .len = sizeof(struct ifla_vf_rate) },
 	[IFLA_VF_LINK_STATE]	= { .len = sizeof(struct ifla_vf_link_state) },
 	[IFLA_VF_RSS_QUERY_EN]	= { .len = sizeof(struct ifla_vf_rss_query_en) },
+	[IFLA_VF_STATS]		= { .type = NLA_NESTED },
+};
+
+static const struct nla_policy ifla_vf_stats_policy[IFLA_VF_STATS_MAX + 1] = {
+	[IFLA_VF_STATS_RX_PACKETS]	= { .type = NLA_U64 },
+	[IFLA_VF_STATS_TX_PACKETS]	= { .type = NLA_U64 },
+	[IFLA_VF_STATS_RX_BYTES]	= { .type = NLA_U64 },
+	[IFLA_VF_STATS_TX_BYTES]	= { .type = NLA_U64 },
+	[IFLA_VF_STATS_BROADCAST]	= { .type = NLA_U64 },
+	[IFLA_VF_STATS_MULTICAST]	= { .type = NLA_U64 },
 };
 
 static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
-- 
cgit 


From eb4cb008529ca08e0d8c0fa54e8f739520197a65 Mon Sep 17 00:00:00 2001
From: Craig Gallek <kraig@google.com>
Date: Mon, 15 Jun 2015 11:26:18 -0400
Subject: sock_diag: define destruction multicast groups

These groups will contain socket-destruction events for
AF_INET/AF_INET6, IPPROTO_TCP/IPPROTO_UDP.

Near the end of socket destruction, a check for listeners is
performed.  In the presence of a listener, rather than completely
cleanup the socket, a unit of work will be added to a private
work queue which will first broadcast information about the socket
and then finish the cleanup operation.

Signed-off-by: Craig Gallek <kraig@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sock_diag.h      | 42 +++++++++++++++++++++
 include/net/sock.h             |  1 +
 include/uapi/linux/sock_diag.h | 10 +++++
 net/core/sock.c                | 11 +++++-
 net/core/sock_diag.c           | 85 ++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 148 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h
index 083ac388098e..fddebc617469 100644
--- a/include/linux/sock_diag.h
+++ b/include/linux/sock_diag.h
@@ -1,7 +1,10 @@
 #ifndef __SOCK_DIAG_H__
 #define __SOCK_DIAG_H__
 
+#include <linux/netlink.h>
 #include <linux/user_namespace.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
 #include <uapi/linux/sock_diag.h>
 
 struct sk_buff;
@@ -11,6 +14,7 @@ struct sock;
 struct sock_diag_handler {
 	__u8 family;
 	int (*dump)(struct sk_buff *skb, struct nlmsghdr *nlh);
+	int (*get_info)(struct sk_buff *skb, struct sock *sk);
 };
 
 int sock_diag_register(const struct sock_diag_handler *h);
@@ -26,4 +30,42 @@ int sock_diag_put_meminfo(struct sock *sk, struct sk_buff *skb, int attr);
 int sock_diag_put_filterinfo(bool may_report_filterinfo, struct sock *sk,
 			     struct sk_buff *skb, int attrtype);
 
+static inline
+enum sknetlink_groups sock_diag_destroy_group(const struct sock *sk)
+{
+	switch (sk->sk_family) {
+	case AF_INET:
+		switch (sk->sk_protocol) {
+		case IPPROTO_TCP:
+			return SKNLGRP_INET_TCP_DESTROY;
+		case IPPROTO_UDP:
+			return SKNLGRP_INET_UDP_DESTROY;
+		default:
+			return SKNLGRP_NONE;
+		}
+	case AF_INET6:
+		switch (sk->sk_protocol) {
+		case IPPROTO_TCP:
+			return SKNLGRP_INET6_TCP_DESTROY;
+		case IPPROTO_UDP:
+			return SKNLGRP_INET6_UDP_DESTROY;
+		default:
+			return SKNLGRP_NONE;
+		}
+	default:
+		return SKNLGRP_NONE;
+	}
+}
+
+static inline
+bool sock_diag_has_destroy_listeners(const struct sock *sk)
+{
+	const struct net *n = sock_net(sk);
+	const enum sknetlink_groups group = sock_diag_destroy_group(sk);
+
+	return group != SKNLGRP_NONE && n->diag_nlsk &&
+		netlink_has_listeners(n->diag_nlsk, group);
+}
+void sock_diag_broadcast_destroy(struct sock *sk);
+
 #endif
diff --git a/include/net/sock.h b/include/net/sock.h
index 26c1c3171e00..3e8258699270 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1518,6 +1518,7 @@ static inline void unlock_sock_fast(struct sock *sk, bool slow)
 struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
 		      struct proto *prot, int kern);
 void sk_free(struct sock *sk);
+void sk_destruct(struct sock *sk);
 struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority);
 
 struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
diff --git a/include/uapi/linux/sock_diag.h b/include/uapi/linux/sock_diag.h
index b00e29efb161..49230d36f9ce 100644
--- a/include/uapi/linux/sock_diag.h
+++ b/include/uapi/linux/sock_diag.h
@@ -23,4 +23,14 @@ enum {
 	SK_MEMINFO_VARS,
 };
 
+enum sknetlink_groups {
+	SKNLGRP_NONE,
+	SKNLGRP_INET_TCP_DESTROY,
+	SKNLGRP_INET_UDP_DESTROY,
+	SKNLGRP_INET6_TCP_DESTROY,
+	SKNLGRP_INET6_UDP_DESTROY,
+	__SKNLGRP_MAX,
+};
+#define SKNLGRP_MAX	(__SKNLGRP_MAX - 1)
+
 #endif /* _UAPI__SOCK_DIAG_H__ */
diff --git a/net/core/sock.c b/net/core/sock.c
index 7063c329c1b6..1e1fe9a68d83 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -131,6 +131,7 @@
 #include <linux/ipsec.h>
 #include <net/cls_cgroup.h>
 #include <net/netprio_cgroup.h>
+#include <linux/sock_diag.h>
 
 #include <linux/filter.h>
 
@@ -1423,7 +1424,7 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
 }
 EXPORT_SYMBOL(sk_alloc);
 
-static void __sk_free(struct sock *sk)
+void sk_destruct(struct sock *sk)
 {
 	struct sk_filter *filter;
 
@@ -1451,6 +1452,14 @@ static void __sk_free(struct sock *sk)
 	sk_prot_free(sk->sk_prot_creator, sk);
 }
 
+static void __sk_free(struct sock *sk)
+{
+	if (unlikely(sock_diag_has_destroy_listeners(sk)))
+		sock_diag_broadcast_destroy(sk);
+	else
+		sk_destruct(sk);
+}
+
 void sk_free(struct sock *sk)
 {
 	/*
diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c
index 74dddf84adcd..d79866c5f8bc 100644
--- a/net/core/sock_diag.c
+++ b/net/core/sock_diag.c
@@ -5,6 +5,9 @@
 #include <net/net_namespace.h>
 #include <linux/module.h>
 #include <net/sock.h>
+#include <linux/kernel.h>
+#include <linux/tcp.h>
+#include <linux/workqueue.h>
 
 #include <linux/inet_diag.h>
 #include <linux/sock_diag.h>
@@ -12,6 +15,7 @@
 static const struct sock_diag_handler *sock_diag_handlers[AF_MAX];
 static int (*inet_rcv_compat)(struct sk_buff *skb, struct nlmsghdr *nlh);
 static DEFINE_MUTEX(sock_diag_table_mutex);
+static struct workqueue_struct *broadcast_wq;
 
 static u64 sock_gen_cookie(struct sock *sk)
 {
@@ -101,6 +105,62 @@ out:
 }
 EXPORT_SYMBOL(sock_diag_put_filterinfo);
 
+struct broadcast_sk {
+	struct sock *sk;
+	struct work_struct work;
+};
+
+static size_t sock_diag_nlmsg_size(void)
+{
+	return NLMSG_ALIGN(sizeof(struct inet_diag_msg)
+	       + nla_total_size(sizeof(u8)) /* INET_DIAG_PROTOCOL */
+	       + nla_total_size(sizeof(struct tcp_info))); /* INET_DIAG_INFO */
+}
+
+static void sock_diag_broadcast_destroy_work(struct work_struct *work)
+{
+	struct broadcast_sk *bsk =
+		container_of(work, struct broadcast_sk, work);
+	struct sock *sk = bsk->sk;
+	const struct sock_diag_handler *hndl;
+	struct sk_buff *skb;
+	const enum sknetlink_groups group = sock_diag_destroy_group(sk);
+	int err = -1;
+
+	WARN_ON(group == SKNLGRP_NONE);
+
+	skb = nlmsg_new(sock_diag_nlmsg_size(), GFP_KERNEL);
+	if (!skb)
+		goto out;
+
+	mutex_lock(&sock_diag_table_mutex);
+	hndl = sock_diag_handlers[sk->sk_family];
+	if (hndl && hndl->get_info)
+		err = hndl->get_info(skb, sk);
+	mutex_unlock(&sock_diag_table_mutex);
+
+	if (!err)
+		nlmsg_multicast(sock_net(sk)->diag_nlsk, skb, 0, group,
+				GFP_KERNEL);
+	else
+		kfree_skb(skb);
+out:
+	sk_destruct(sk);
+	kfree(bsk);
+}
+
+void sock_diag_broadcast_destroy(struct sock *sk)
+{
+	/* Note, this function is often called from an interrupt context. */
+	struct broadcast_sk *bsk =
+		kmalloc(sizeof(struct broadcast_sk), GFP_ATOMIC);
+	if (!bsk)
+		return sk_destruct(sk);
+	bsk->sk = sk;
+	INIT_WORK(&bsk->work, sock_diag_broadcast_destroy_work);
+	queue_work(broadcast_wq, &bsk->work);
+}
+
 void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh))
 {
 	mutex_lock(&sock_diag_table_mutex);
@@ -211,10 +271,32 @@ static void sock_diag_rcv(struct sk_buff *skb)
 	mutex_unlock(&sock_diag_mutex);
 }
 
+static int sock_diag_bind(struct net *net, int group)
+{
+	switch (group) {
+	case SKNLGRP_INET_TCP_DESTROY:
+	case SKNLGRP_INET_UDP_DESTROY:
+		if (!sock_diag_handlers[AF_INET])
+			request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
+				       NETLINK_SOCK_DIAG, AF_INET);
+		break;
+	case SKNLGRP_INET6_TCP_DESTROY:
+	case SKNLGRP_INET6_UDP_DESTROY:
+		if (!sock_diag_handlers[AF_INET6])
+			request_module("net-pf-%d-proto-%d-type-%d", PF_NETLINK,
+				       NETLINK_SOCK_DIAG, AF_INET);
+		break;
+	}
+	return 0;
+}
+
 static int __net_init diag_net_init(struct net *net)
 {
 	struct netlink_kernel_cfg cfg = {
+		.groups	= SKNLGRP_MAX,
 		.input	= sock_diag_rcv,
+		.bind	= sock_diag_bind,
+		.flags	= NL_CFG_F_NONROOT_RECV,
 	};
 
 	net->diag_nlsk = netlink_kernel_create(net, NETLINK_SOCK_DIAG, &cfg);
@@ -234,12 +316,15 @@ static struct pernet_operations diag_net_ops = {
 
 static int __init sock_diag_init(void)
 {
+	broadcast_wq = alloc_workqueue("sock_diag_events", 0, 0);
+	BUG_ON(!broadcast_wq);
 	return register_pernet_subsys(&diag_net_ops);
 }
 
 static void __exit sock_diag_exit(void)
 {
 	unregister_pernet_subsys(&diag_net_ops);
+	destroy_workqueue(broadcast_wq);
 }
 
 module_init(sock_diag_init);
-- 
cgit 


From 35ac838a9b96470f999db04320f53a2033642bfb Mon Sep 17 00:00:00 2001
From: Craig Gallek <kraig@google.com>
Date: Mon, 15 Jun 2015 11:26:20 -0400
Subject: sock_diag: implement a get_info handler for inet

This get_info handler will simply dispatch to the appropriate
existing inet protocol handler.

This patch also includes a new netlink attribute
(INET_DIAG_PROTOCOL).  This attribute is currently only used
for multicast messages.  Without this attribute, there is no
way of knowing the IP protocol used by the socket information
being broadcast.  This attribute is not necessary in the 'dump'
variant of this protocol (though it could easily be added)
because dump requests are issued for specific family/protocol
pairs.

Tested: ss -E (note, the -E option has not yet been merged into
the upstream version of ss).

Signed-off-by: Craig Gallek <kraig@google.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/inet_diag.h |  3 ++-
 net/ipv4/inet_diag.c           | 46 ++++++++++++++++++++++++++++++++++++++++++
 net/ipv4/tcp.c                 |  4 +++-
 net/ipv4/tcp_diag.c            |  5 +++--
 4 files changed, 54 insertions(+), 4 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
index c7093c75bdd6..b629fc53b109 100644
--- a/include/uapi/linux/inet_diag.h
+++ b/include/uapi/linux/inet_diag.h
@@ -111,9 +111,10 @@ enum {
 	INET_DIAG_SKMEMINFO,
 	INET_DIAG_SHUTDOWN,
 	INET_DIAG_DCTCPINFO,
+	INET_DIAG_PROTOCOL,  /* response attribute only */
 };
 
-#define INET_DIAG_MAX INET_DIAG_DCTCPINFO
+#define INET_DIAG_MAX INET_DIAG_PROTOCOL
 
 /* INET_DIAG_MEM */
 
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index b1f01174bf32..21985d8d41e7 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -1078,14 +1078,60 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h)
 	return inet_diag_get_exact(skb, h, nlmsg_data(h));
 }
 
+static
+int inet_diag_handler_get_info(struct sk_buff *skb, struct sock *sk)
+{
+	const struct inet_diag_handler *handler;
+	struct nlmsghdr *nlh;
+	struct nlattr *attr;
+	struct inet_diag_msg *r;
+	void *info = NULL;
+	int err = 0;
+
+	nlh = nlmsg_put(skb, 0, 0, SOCK_DIAG_BY_FAMILY, sizeof(*r), 0);
+	if (!nlh)
+		return -ENOMEM;
+
+	r = nlmsg_data(nlh);
+	memset(r, 0, sizeof(*r));
+	inet_diag_msg_common_fill(r, sk);
+	r->idiag_state = sk->sk_state;
+
+	if ((err = nla_put_u8(skb, INET_DIAG_PROTOCOL, sk->sk_protocol))) {
+		nlmsg_cancel(skb, nlh);
+		return err;
+	}
+
+	handler = inet_diag_lock_handler(sk->sk_protocol);
+	if (IS_ERR(handler)) {
+		inet_diag_unlock_handler(handler);
+		nlmsg_cancel(skb, nlh);
+		return PTR_ERR(handler);
+	}
+
+	attr = handler->idiag_info_size
+		? nla_reserve(skb, INET_DIAG_INFO, handler->idiag_info_size)
+		: NULL;
+	if (attr)
+		info = nla_data(attr);
+
+	handler->idiag_get_info(sk, r, info);
+	inet_diag_unlock_handler(handler);
+
+	nlmsg_end(skb, nlh);
+	return 0;
+}
+
 static const struct sock_diag_handler inet_diag_handler = {
 	.family = AF_INET,
 	.dump = inet_diag_handler_dump,
+	.get_info = inet_diag_handler_get_info,
 };
 
 static const struct sock_diag_handler inet6_diag_handler = {
 	.family = AF_INET6,
 	.dump = inet_diag_handler_dump,
+	.get_info = inet_diag_handler_get_info,
 };
 
 int inet_diag_register(const struct inet_diag_handler *h)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 65f791f74845..697b86dd45b3 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2624,13 +2624,15 @@ EXPORT_SYMBOL(compat_tcp_setsockopt);
 /* Return information about state of tcp endpoint in API format. */
 void tcp_get_info(struct sock *sk, struct tcp_info *info)
 {
-	const struct tcp_sock *tp = tcp_sk(sk);
+	const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	u32 now = tcp_time_stamp;
 	unsigned int start;
 	u32 rate;
 
 	memset(info, 0, sizeof(*info));
+	if (sk->sk_type != SOCK_STREAM)
+		return;
 
 	info->tcpi_state = sk->sk_state;
 	info->tcpi_ca_state = icsk->icsk_ca_state;
diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c
index 423e3881a40b..479f34946177 100644
--- a/net/ipv4/tcp_diag.c
+++ b/net/ipv4/tcp_diag.c
@@ -19,13 +19,14 @@
 static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r,
 			      void *_info)
 {
-	const struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_info *info = _info;
 
 	if (sk->sk_state == TCP_LISTEN) {
 		r->idiag_rqueue = sk->sk_ack_backlog;
 		r->idiag_wqueue = sk->sk_max_ack_backlog;
-	} else {
+	} else if (sk->sk_type == SOCK_STREAM) {
+		const struct tcp_sock *tp = tcp_sk(sk);
+
 		r->idiag_rqueue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
 		r->idiag_wqueue = tp->write_seq - tp->snd_una;
 	}
-- 
cgit 


From ea2c6d9745c6698d9f820bc230aa1a80d9e908ac Mon Sep 17 00:00:00 2001
From: Tiejun Chen <tiejun.chen@intel.com>
Date: Mon, 4 May 2015 10:48:49 +0800
Subject: kvm: remove one useless check extension

We already check KVM_CAP_IRQFD in generic once enable CONFIG_HAVE_KVM_IRQFD,

kvm_vm_ioctl_check_extension_generic()
    |
    + switch (arg) {
    +   ...
    +   #ifdef CONFIG_HAVE_KVM_IRQFD
    +       case KVM_CAP_IRQFD:
    +   #endif
    +   ...
    +   return 1;
    +   ...
    + }
    |
    + kvm_vm_ioctl_check_extension()

So its not necessary to check this in arch again, and also fix one typo,
s/emlation/emulation.

Signed-off-by: Tiejun Chen <tiejun.chen@intel.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c       | 1 -
 include/uapi/linux/kvm.h | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index e41cb11f71b2..7e8233015ad8 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -171,7 +171,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
 	int r;
 	switch (ext) {
 	case KVM_CAP_IRQCHIP:
-	case KVM_CAP_IRQFD:
 	case KVM_CAP_IOEVENTFD:
 	case KVM_CAP_DEVICE_CTRL:
 	case KVM_CAP_USER_MEMORY:
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 75bd9f7fd846..cbfd1acdeda7 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -895,7 +895,7 @@ struct kvm_xen_hvm_config {
  *
  * KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies
  * the irqfd to operate in resampling mode for level triggered interrupt
- * emlation.  See Documentation/virtual/kvm/api.txt.
+ * emulation.  See Documentation/virtual/kvm/api.txt.
  */
 #define KVM_IRQFD_FLAG_RESAMPLE (1 << 1)
 
-- 
cgit 


From ef493bd930ae482c608c5999b40d79dda3f2a674 Mon Sep 17 00:00:00 2001
From: Roman Kubiak <r.kubiak@samsung.com>
Date: Fri, 12 Jun 2015 12:32:57 +0200
Subject: netfilter: nfnetlink_queue: add security context information

This patch adds an additional attribute when sending
packet information via netlink in netfilter_queue module.
It will send additional security context data, so that
userspace applications can verify this context against
their own security databases.

Signed-off-by: Roman Kubiak <r.kubiak@samsung.com>
Acked-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nfnetlink_queue.h |  4 ++-
 net/netfilter/nfnetlink_queue_core.c           | 35 +++++++++++++++++++++++++-
 2 files changed, 37 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netfilter/nfnetlink_queue.h b/include/uapi/linux/netfilter/nfnetlink_queue.h
index 8dd819e2b5fe..b67a853638ff 100644
--- a/include/uapi/linux/netfilter/nfnetlink_queue.h
+++ b/include/uapi/linux/netfilter/nfnetlink_queue.h
@@ -49,6 +49,7 @@ enum nfqnl_attr_type {
 	NFQA_EXP,			/* nf_conntrack_netlink.h */
 	NFQA_UID,			/* __u32 sk uid */
 	NFQA_GID,			/* __u32 sk gid */
+	NFQA_SECCTX,			/* security context string */
 
 	__NFQA_MAX
 };
@@ -102,7 +103,8 @@ enum nfqnl_attr_config {
 #define NFQA_CFG_F_CONNTRACK			(1 << 1)
 #define NFQA_CFG_F_GSO				(1 << 2)
 #define NFQA_CFG_F_UID_GID			(1 << 3)
-#define NFQA_CFG_F_MAX				(1 << 4)
+#define NFQA_CFG_F_SECCTX			(1 << 4)
+#define NFQA_CFG_F_MAX				(1 << 5)
 
 /* flags for NFQA_SKB_INFO */
 /* packet appears to have wrong checksums, but they are ok */
diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c
index 22a5ac76683e..6eccf0fcdc63 100644
--- a/net/netfilter/nfnetlink_queue_core.c
+++ b/net/netfilter/nfnetlink_queue_core.c
@@ -278,6 +278,23 @@ nla_put_failure:
 	return -1;
 }
 
+static u32 nfqnl_get_sk_secctx(struct sk_buff *skb, char **secdata)
+{
+	u32 seclen = 0;
+#if IS_ENABLED(CONFIG_NETWORK_SECMARK)
+	if (!skb || !sk_fullsock(skb->sk))
+		return 0;
+
+	read_lock_bh(&skb->sk->sk_callback_lock);
+
+	if (skb->secmark)
+		security_secid_to_secctx(skb->secmark, secdata, &seclen);
+
+	read_unlock_bh(&skb->sk->sk_callback_lock);
+#endif
+	return seclen;
+}
+
 static struct sk_buff *
 nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 			   struct nf_queue_entry *entry,
@@ -297,6 +314,8 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 	struct nf_conn *ct = NULL;
 	enum ip_conntrack_info uninitialized_var(ctinfo);
 	bool csum_verify;
+	char *secdata = NULL;
+	u32 seclen = 0;
 
 	size =    nlmsg_total_size(sizeof(struct nfgenmsg))
 		+ nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
@@ -352,6 +371,12 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 			+ nla_total_size(sizeof(u_int32_t)));	/* gid */
 	}
 
+	if ((queue->flags & NFQA_CFG_F_SECCTX) && entskb->sk) {
+		seclen = nfqnl_get_sk_secctx(entskb, &secdata);
+		if (seclen)
+			size += nla_total_size(seclen);
+	}
+
 	skb = nfnetlink_alloc_skb(net, size, queue->peer_portid,
 				  GFP_ATOMIC);
 	if (!skb) {
@@ -479,6 +504,9 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
 	    nfqnl_put_sk_uidgid(skb, entskb->sk) < 0)
 		goto nla_put_failure;
 
+	if (seclen && nla_put(skb, NFQA_SECCTX, seclen, secdata))
+		goto nla_put_failure;
+
 	if (ct && nfqnl_ct_put(skb, ct, ctinfo) < 0)
 		goto nla_put_failure;
 
@@ -1142,7 +1170,12 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb,
 			ret = -EOPNOTSUPP;
 			goto err_out_unlock;
 		}
-
+#if !IS_ENABLED(CONFIG_NETWORK_SECMARK)
+		if (flags & mask & NFQA_CFG_F_SECCTX) {
+			ret = -EOPNOTSUPP;
+			goto err_out_unlock;
+		}
+#endif
 		spin_lock_bh(&queue->lock);
 		queue->flags &= ~mask;
 		queue->flags |= flags & mask;
-- 
cgit 


From 01555e74bde51444c6898ef1800fb2bc697d479e Mon Sep 17 00:00:00 2001
From: Harout Hedeshian <harouth@codeaurora.org>
Date: Mon, 15 Jun 2015 18:40:43 -0600
Subject: netfilter: xt_socket: add XT_SOCKET_RESTORESKMARK flag

xt_socket is useful for matching sockets with IP_TRANSPARENT and
taking some action on the matching packets. However, it lacks the
ability to match only a small subset of transparent sockets.

Suppose there are 2 applications, each with its own set of transparent
sockets. The first application wants all matching packets dropped,
while the second application wants them forwarded somewhere else.

Add the ability to retore the skb->mark from the sk_mark. The mark
is only restored if a matching socket is found and the transparent /
nowildcard conditions are satisfied.

Now the 2 hypothetical applications can differentiate their sockets
based on a mark value set with SO_MARK.

iptables -t mangle -I PREROUTING -m socket --transparent \
                                           --restore-skmark -j action
iptables -t mangle -A action -m mark --mark 10 -j action2
iptables -t mangle -A action -m mark --mark 11 -j action3

Signed-off-by: Harout Hedeshian <harouth@codeaurora.org>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/xt_socket.h |  8 +++++
 net/netfilter/xt_socket.c                | 59 ++++++++++++++++++++++++++++----
 2 files changed, 61 insertions(+), 6 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netfilter/xt_socket.h b/include/uapi/linux/netfilter/xt_socket.h
index 6315e2ac3474..87644f832494 100644
--- a/include/uapi/linux/netfilter/xt_socket.h
+++ b/include/uapi/linux/netfilter/xt_socket.h
@@ -6,6 +6,7 @@
 enum {
 	XT_SOCKET_TRANSPARENT = 1 << 0,
 	XT_SOCKET_NOWILDCARD = 1 << 1,
+	XT_SOCKET_RESTORESKMARK = 1 << 2,
 };
 
 struct xt_socket_mtinfo1 {
@@ -18,4 +19,11 @@ struct xt_socket_mtinfo2 {
 };
 #define XT_SOCKET_FLAGS_V2 (XT_SOCKET_TRANSPARENT | XT_SOCKET_NOWILDCARD)
 
+struct xt_socket_mtinfo3 {
+	__u8 flags;
+};
+#define XT_SOCKET_FLAGS_V3 (XT_SOCKET_TRANSPARENT \
+			   | XT_SOCKET_NOWILDCARD \
+			   | XT_SOCKET_RESTORESKMARK)
+
 #endif /* _XT_SOCKET_H */
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index e092cb046326..43e26c881100 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -205,6 +205,7 @@ static bool
 socket_match(const struct sk_buff *skb, struct xt_action_param *par,
 	     const struct xt_socket_mtinfo1 *info)
 {
+	struct sk_buff *pskb = (struct sk_buff *)skb;
 	struct sock *sk = skb->sk;
 
 	if (!sk)
@@ -226,6 +227,10 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par,
 		if (info->flags & XT_SOCKET_TRANSPARENT)
 			transparent = xt_socket_sk_is_transparent(sk);
 
+		if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
+		    transparent)
+			pskb->mark = sk->sk_mark;
+
 		if (sk != skb->sk)
 			sock_gen_put(sk);
 
@@ -247,7 +252,7 @@ socket_mt4_v0(const struct sk_buff *skb, struct xt_action_param *par)
 }
 
 static bool
-socket_mt4_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
+socket_mt4_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	return socket_match(skb, par, par->matchinfo);
 }
@@ -371,9 +376,10 @@ static struct sock *xt_socket_lookup_slow_v6(const struct sk_buff *skb,
 }
 
 static bool
-socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
+socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo;
+	struct sk_buff *pskb = (struct sk_buff *)skb;
 	struct sock *sk = skb->sk;
 
 	if (!sk)
@@ -395,6 +401,10 @@ socket_mt6_v1_v2(const struct sk_buff *skb, struct xt_action_param *par)
 		if (info->flags & XT_SOCKET_TRANSPARENT)
 			transparent = xt_socket_sk_is_transparent(sk);
 
+		if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard &&
+		    transparent)
+			pskb->mark = sk->sk_mark;
+
 		if (sk != skb->sk)
 			sock_gen_put(sk);
 
@@ -428,6 +438,19 @@ static int socket_mt_v2_check(const struct xt_mtchk_param *par)
 	return 0;
 }
 
+static int socket_mt_v3_check(const struct xt_mtchk_param *par)
+{
+	const struct xt_socket_mtinfo3 *info =
+				    (struct xt_socket_mtinfo3 *)par->matchinfo;
+
+	if (info->flags & ~XT_SOCKET_FLAGS_V3) {
+		pr_info("unknown flags 0x%x\n",
+			info->flags & ~XT_SOCKET_FLAGS_V3);
+		return -EINVAL;
+	}
+	return 0;
+}
+
 static struct xt_match socket_mt_reg[] __read_mostly = {
 	{
 		.name		= "socket",
@@ -442,7 +465,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
 		.name		= "socket",
 		.revision	= 1,
 		.family		= NFPROTO_IPV4,
-		.match		= socket_mt4_v1_v2,
+		.match		= socket_mt4_v1_v2_v3,
 		.checkentry	= socket_mt_v1_check,
 		.matchsize	= sizeof(struct xt_socket_mtinfo1),
 		.hooks		= (1 << NF_INET_PRE_ROUTING) |
@@ -454,7 +477,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
 		.name		= "socket",
 		.revision	= 1,
 		.family		= NFPROTO_IPV6,
-		.match		= socket_mt6_v1_v2,
+		.match		= socket_mt6_v1_v2_v3,
 		.checkentry	= socket_mt_v1_check,
 		.matchsize	= sizeof(struct xt_socket_mtinfo1),
 		.hooks		= (1 << NF_INET_PRE_ROUTING) |
@@ -466,7 +489,7 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
 		.name		= "socket",
 		.revision	= 2,
 		.family		= NFPROTO_IPV4,
-		.match		= socket_mt4_v1_v2,
+		.match		= socket_mt4_v1_v2_v3,
 		.checkentry	= socket_mt_v2_check,
 		.matchsize	= sizeof(struct xt_socket_mtinfo1),
 		.hooks		= (1 << NF_INET_PRE_ROUTING) |
@@ -478,13 +501,37 @@ static struct xt_match socket_mt_reg[] __read_mostly = {
 		.name		= "socket",
 		.revision	= 2,
 		.family		= NFPROTO_IPV6,
-		.match		= socket_mt6_v1_v2,
+		.match		= socket_mt6_v1_v2_v3,
 		.checkentry	= socket_mt_v2_check,
 		.matchsize	= sizeof(struct xt_socket_mtinfo1),
 		.hooks		= (1 << NF_INET_PRE_ROUTING) |
 				  (1 << NF_INET_LOCAL_IN),
 		.me		= THIS_MODULE,
 	},
+#endif
+	{
+		.name		= "socket",
+		.revision	= 3,
+		.family		= NFPROTO_IPV4,
+		.match		= socket_mt4_v1_v2_v3,
+		.checkentry	= socket_mt_v3_check,
+		.matchsize	= sizeof(struct xt_socket_mtinfo1),
+		.hooks		= (1 << NF_INET_PRE_ROUTING) |
+				  (1 << NF_INET_LOCAL_IN),
+		.me		= THIS_MODULE,
+	},
+#ifdef XT_SOCKET_HAVE_IPV6
+	{
+		.name		= "socket",
+		.revision	= 3,
+		.family		= NFPROTO_IPV6,
+		.match		= socket_mt6_v1_v2_v3,
+		.checkentry	= socket_mt_v3_check,
+		.matchsize	= sizeof(struct xt_socket_mtinfo1),
+		.hooks		= (1 << NF_INET_PRE_ROUTING) |
+				  (1 << NF_INET_LOCAL_IN),
+		.me		= THIS_MODULE,
+	},
 #endif
 };
 
-- 
cgit 


From f8d5556fa9dbf6b88e1a8fe88e47ad1b8ddb4742 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Mon, 15 Jun 2015 10:52:40 -0300
Subject: [media] videodev2.h: fix copy-and-paste error in
 V4L2_MAP_XFER_FUNC_DEFAULT

The colorspace argument was compared against a V4L2_XFER_FUNC define instead
of against a V4L2_COLORSPACE define, returning the wrong answer.

Signed-off-by: Hans Verkuil <hans.verkuil@cisco.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 include/uapi/linux/videodev2.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/videodev2.h b/include/uapi/linux/videodev2.h
index 3d5fc72d53a7..3228fbebcd63 100644
--- a/include/uapi/linux/videodev2.h
+++ b/include/uapi/linux/videodev2.h
@@ -270,7 +270,7 @@ enum v4l2_xfer_func {
  * This depends on the colorspace.
  */
 #define V4L2_MAP_XFER_FUNC_DEFAULT(colsp) \
-	((colsp) == V4L2_XFER_FUNC_ADOBERGB ? V4L2_XFER_FUNC_ADOBERGB : \
+	((colsp) == V4L2_COLORSPACE_ADOBERGB ? V4L2_XFER_FUNC_ADOBERGB : \
 	 ((colsp) == V4L2_COLORSPACE_SMPTE240M ? V4L2_XFER_FUNC_SMPTE240M : \
 	  ((colsp) == V4L2_COLORSPACE_RAW ? V4L2_XFER_FUNC_NONE : \
 	   ((colsp) == V4L2_COLORSPACE_SRGB || (colsp) == V4L2_COLORSPACE_JPEG ? \
-- 
cgit 


From a263653ed798216c0069922d7b5237ca49436007 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 17 Jun 2015 10:28:27 -0500
Subject: netfilter: don't pull include/linux/netfilter.h from netns headers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This pulls the full hook netfilter definitions from all those that include
net_namespace.h.

Instead let's just include the bare minimum required in the new
linux/netfilter_defs.h file, and use it from the netfilter netns header files.

I also needed to include in.h and in6.h from linux/netfilter.h otherwise we hit
this compilation error:

In file included from include/linux/netfilter_defs.h:4:0,
                 from include/net/netns/netfilter.h:4,
                 from include/net/net_namespace.h:22,
                 from include/linux/netdevice.h:43,
                 from net/netfilter/nfnetlink_queue_core.c:23:
include/uapi/linux/netfilter.h:76:17: error: field ‘in’ has incomplete type struct in_addr in;

And also explicit include linux/netfilter.h in several spots.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
---
 include/linux/netfilter.h      | 6 ++----
 include/linux/netfilter_defs.h | 9 +++++++++
 include/net/netns/netfilter.h  | 2 +-
 include/net/netns/x_tables.h   | 2 +-
 include/uapi/linux/netfilter.h | 3 ++-
 net/ipv6/output_core.c         | 1 +
 6 files changed, 16 insertions(+), 7 deletions(-)
 create mode 100644 include/linux/netfilter_defs.h

(limited to 'include/uapi/linux')

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index f5ff5d156da8..00050dfd9f23 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -10,7 +10,8 @@
 #include <linux/wait.h>
 #include <linux/list.h>
 #include <linux/static_key.h>
-#include <uapi/linux/netfilter.h>
+#include <linux/netfilter_defs.h>
+
 #ifdef CONFIG_NETFILTER
 static inline int NF_DROP_GETERR(int verdict)
 {
@@ -38,9 +39,6 @@ static inline void nf_inet_addr_mask(const union nf_inet_addr *a1,
 
 int netfilter_init(void);
 
-/* Largest hook number + 1 */
-#define NF_MAX_HOOKS 8
-
 struct sk_buff;
 
 struct nf_hook_ops;
diff --git a/include/linux/netfilter_defs.h b/include/linux/netfilter_defs.h
new file mode 100644
index 000000000000..d3a7f8597e82
--- /dev/null
+++ b/include/linux/netfilter_defs.h
@@ -0,0 +1,9 @@
+#ifndef __LINUX_NETFILTER_CORE_H_
+#define __LINUX_NETFILTER_CORE_H_
+
+#include <uapi/linux/netfilter.h>
+
+/* Largest hook number + 1, see uapi/linux/netfilter_decnet.h */
+#define NF_MAX_HOOKS 8
+
+#endif
diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h
index cf25b5e35f3c..532e4ba64f49 100644
--- a/include/net/netns/netfilter.h
+++ b/include/net/netns/netfilter.h
@@ -1,7 +1,7 @@
 #ifndef __NETNS_NETFILTER_H
 #define __NETNS_NETFILTER_H
 
-#include <linux/netfilter.h>
+#include <linux/netfilter_defs.h>
 
 struct proc_dir_entry;
 struct nf_logger;
diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h
index 4d6597ad6067..c8a7681efa6a 100644
--- a/include/net/netns/x_tables.h
+++ b/include/net/netns/x_tables.h
@@ -2,7 +2,7 @@
 #define __NETNS_X_TABLES_H
 
 #include <linux/list.h>
-#include <linux/netfilter.h>
+#include <linux/netfilter_defs.h>
 
 struct ebt_table;
 
diff --git a/include/uapi/linux/netfilter.h b/include/uapi/linux/netfilter.h
index 177027cce6b3..d93f949d1d9a 100644
--- a/include/uapi/linux/netfilter.h
+++ b/include/uapi/linux/netfilter.h
@@ -4,7 +4,8 @@
 #include <linux/types.h>
 #include <linux/compiler.h>
 #include <linux/sysctl.h>
-
+#include <linux/in.h>
+#include <linux/in6.h>
 
 /* Responses from hook functions. */
 #define NF_DROP 0
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index 21678acd4521..928a0fb0b744 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -8,6 +8,7 @@
 #include <net/ip6_fib.h>
 #include <net/addrconf.h>
 #include <net/secure_seq.h>
+#include <linux/netfilter.h>
 
 static u32 __ipv6_select_ident(struct net *net, u32 hashrnd,
 			       const struct in6_addr *dst,
-- 
cgit 


From 930e6fcd2bcce9bcd9d4aa7e755678d33f3fe6f4 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Wed, 17 Jun 2015 09:51:10 -0400
Subject: perf tools: Add time out to force stop proc map processing

System wide sampling like 'perf top' or 'perf record -a' read all
threads /proc/xxx/maps before sampling. If there are any threads which
generating a keeping growing huge maps, perf will do infinite loop
during synthesizing. Nothing will be sampled.

This patch fixes this issue by adding per-thread timeout to force stop
this kind of endless proc map processing.

PERF_RECORD_MISC_PROC_MAP_PARSE_TIME_OUT is introduced to indicate that
the mmap record are truncated by time out. User will get warning
notification when truncated mmap records are detected.

Reported-by: Ying Huang <ying.huang@intel.com>
Signed-off-by: Kan Liang <kan.liang@intel.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ying Huang <ying.huang@intel.com>
Link: http://lkml.kernel.org/r/1434549071-25611-1-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 include/uapi/linux/perf_event.h |  4 ++++
 tools/perf/util/event.c         | 18 ++++++++++++++++++
 tools/perf/util/event.h         |  1 +
 tools/perf/util/session.c       | 11 +++++++++++
 4 files changed, 34 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 613ed9ad588f..d97f84c080da 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -565,6 +565,10 @@ struct perf_event_mmap_page {
 #define PERF_RECORD_MISC_GUEST_KERNEL		(4 << 0)
 #define PERF_RECORD_MISC_GUEST_USER		(5 << 0)
 
+/*
+ * Indicates that /proc/PID/maps parsing are truncated by time out.
+ */
+#define PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT	(1 << 12)
 /*
  * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on
  * different events so can reuse the same bit position.
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 793b1503d437..416ba80c628f 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -213,6 +213,8 @@ static int perf_event__synthesize_fork(struct perf_tool *tool,
 	return 0;
 }
 
+#define PROC_MAP_PARSE_TIMEOUT	(500 * 1000000ULL)
+
 int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 				       union perf_event *event,
 				       pid_t pid, pid_t tgid,
@@ -222,6 +224,8 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 {
 	char filename[PATH_MAX];
 	FILE *fp;
+	unsigned long long t;
+	bool truncation = false;
 	int rc = 0;
 
 	if (machine__is_default_guest(machine))
@@ -240,6 +244,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 	}
 
 	event->header.type = PERF_RECORD_MMAP2;
+	t = rdclock();
 
 	while (1) {
 		char bf[BUFSIZ];
@@ -253,6 +258,12 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 		if (fgets(bf, sizeof(bf), fp) == NULL)
 			break;
 
+		if ((rdclock() - t) > PROC_MAP_PARSE_TIMEOUT) {
+			pr_warning("Reading %s time out.\n", filename);
+			truncation = true;
+			goto out;
+		}
+
 		/* ensure null termination since stack will be reused. */
 		strcpy(execname, "");
 
@@ -301,6 +312,10 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 			event->header.misc |= PERF_RECORD_MISC_MMAP_DATA;
 		}
 
+out:
+		if (truncation)
+			event->header.misc |= PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT;
+
 		if (!strcmp(execname, ""))
 			strcpy(execname, anonstr);
 
@@ -319,6 +334,9 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 			rc = -1;
 			break;
 		}
+
+		if (truncation)
+			break;
 	}
 
 	fclose(fp);
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 5dc51ada05df..39868f529cab 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -265,6 +265,7 @@ struct events_stats {
 	u32 nr_unknown_id;
 	u32 nr_unprocessable_samples;
 	u32 nr_auxtrace_errors[PERF_AUXTRACE_ERROR_MAX];
+	u32 nr_proc_map_timeout;
 };
 
 struct attr_event {
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index c371336d1eb2..2d882fd1f1b9 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1064,6 +1064,8 @@ static int machines__deliver_event(struct machines *machines,
 	case PERF_RECORD_MMAP:
 		return tool->mmap(tool, event, sample, machine);
 	case PERF_RECORD_MMAP2:
+		if (event->header.misc & PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT)
+			++evlist->stats.nr_proc_map_timeout;
 		return tool->mmap2(tool, event, sample, machine);
 	case PERF_RECORD_COMM:
 		return tool->comm(tool, event, sample, machine);
@@ -1360,6 +1362,15 @@ static void perf_session__warn_about_errors(const struct perf_session *session)
 		ui__warning("%u out of order events recorded.\n", oe->nr_unordered_events);
 
 	events_stats__auxtrace_error_warn(stats);
+
+	if (stats->nr_proc_map_timeout != 0) {
+		ui__warning("%d map information files for pre-existing threads were\n"
+			    "not processed, if there are samples for addresses they\n"
+			    "will not be resolved, you may find out which are these\n"
+			    "threads by running with -v and redirecting the output\n"
+			    "to a file.\n",
+			    stats->nr_proc_map_timeout);
+	}
 }
 
 static int perf_session__flush_thread_stack(struct thread *thread,
-- 
cgit 


From b42be38b2778eda2237fc759e55e3b698b05b315 Mon Sep 17 00:00:00 2001
From: David Herrmann <dh.herrmann@gmail.com>
Date: Wed, 17 Jun 2015 17:14:33 +0200
Subject: netlink: add API to retrieve all group memberships

This patch adds getsockopt(SOL_NETLINK, NETLINK_LIST_MEMBERSHIPS) to
retrieve all groups a socket is a member of. Currently, we have to use
getsockname() and look at the nl.nl_groups bitmask. However, this mask is
limited to 32 groups. Hence, similar to NETLINK_ADD_MEMBERSHIP and
NETLINK_DROP_MEMBERSHIP, this adds a separate sockopt to manager higher
groups IDs than 32.

This new NETLINK_LIST_MEMBERSHIPS option takes a pointer to __u32 and the
size of the array. The array is filled with the full membership-set of the
socket, and the required array size is returned in optlen. Hence,
user-space can retry with a properly sized array in case it was too small.

Signed-off-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/netlink.h | 17 +++++++++--------
 net/netlink/af_netlink.c     | 22 ++++++++++++++++++++++
 2 files changed, 31 insertions(+), 8 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index 3e34b7d702f8..cf6a65cccbdf 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -101,14 +101,15 @@ struct nlmsgerr {
 	struct nlmsghdr msg;
 };
 
-#define NETLINK_ADD_MEMBERSHIP	1
-#define NETLINK_DROP_MEMBERSHIP	2
-#define NETLINK_PKTINFO		3
-#define NETLINK_BROADCAST_ERROR	4
-#define NETLINK_NO_ENOBUFS	5
-#define NETLINK_RX_RING		6
-#define NETLINK_TX_RING		7
-#define NETLINK_LISTEN_ALL_NSID	8
+#define NETLINK_ADD_MEMBERSHIP		1
+#define NETLINK_DROP_MEMBERSHIP		2
+#define NETLINK_PKTINFO			3
+#define NETLINK_BROADCAST_ERROR		4
+#define NETLINK_NO_ENOBUFS		5
+#define NETLINK_RX_RING			6
+#define NETLINK_TX_RING			7
+#define NETLINK_LISTEN_ALL_NSID		8
+#define NETLINK_LIST_MEMBERSHIPS	9
 
 struct nl_pktinfo {
 	__u32	group;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 69d67c300b80..dea925388a5b 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2290,6 +2290,28 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
 			return -EFAULT;
 		err = 0;
 		break;
+	case NETLINK_LIST_MEMBERSHIPS: {
+		int pos, idx, shift;
+
+		err = 0;
+		netlink_table_grab();
+		for (pos = 0; pos * 8 < nlk->ngroups; pos += sizeof(u32)) {
+			if (len - pos < sizeof(u32))
+				break;
+
+			idx = pos / sizeof(unsigned long);
+			shift = (pos % sizeof(unsigned long)) * 8;
+			if (put_user((u32)(nlk->groups[idx] >> shift),
+				     (u32 __user *)(optval + pos))) {
+				err = -EFAULT;
+				break;
+			}
+		}
+		if (put_user(ALIGN(nlk->ngroups / 8, sizeof(u32)), optlen))
+			err = -EFAULT;
+		netlink_table_ungrab();
+		break;
+	}
 	default:
 		err = -ENOPROTOOPT;
 	}
-- 
cgit 


From d0497524658e37956737d7dbee73cc42120255dc Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 21 Jun 2015 19:11:44 +0800
Subject: crypto: user - Move cryptouser.h to uapi

The header file cryptouser.h only contains information that is
exported to user-space.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/cryptouser.h      | 110 ----------------------------------------
 include/uapi/linux/cryptouser.h | 110 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 110 insertions(+), 110 deletions(-)
 delete mode 100644 include/linux/cryptouser.h
 create mode 100644 include/uapi/linux/cryptouser.h

(limited to 'include/uapi/linux')

diff --git a/include/linux/cryptouser.h b/include/linux/cryptouser.h
deleted file mode 100644
index 36efbbbf2f83..000000000000
--- a/include/linux/cryptouser.h
+++ /dev/null
@@ -1,110 +0,0 @@
-/*
- * Crypto user configuration API.
- *
- * Copyright (C) 2011 secunet Security Networks AG
- * Copyright (C) 2011 Steffen Klassert <steffen.klassert@secunet.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- */
-
-/* Netlink configuration messages.  */
-enum {
-	CRYPTO_MSG_BASE = 0x10,
-	CRYPTO_MSG_NEWALG = 0x10,
-	CRYPTO_MSG_DELALG,
-	CRYPTO_MSG_UPDATEALG,
-	CRYPTO_MSG_GETALG,
-	__CRYPTO_MSG_MAX
-};
-#define CRYPTO_MSG_MAX (__CRYPTO_MSG_MAX - 1)
-#define CRYPTO_NR_MSGTYPES (CRYPTO_MSG_MAX + 1 - CRYPTO_MSG_BASE)
-
-#define CRYPTO_MAX_NAME CRYPTO_MAX_ALG_NAME
-
-/* Netlink message attributes.  */
-enum crypto_attr_type_t {
-	CRYPTOCFGA_UNSPEC,
-	CRYPTOCFGA_PRIORITY_VAL,	/* __u32 */
-	CRYPTOCFGA_REPORT_LARVAL,	/* struct crypto_report_larval */
-	CRYPTOCFGA_REPORT_HASH,		/* struct crypto_report_hash */
-	CRYPTOCFGA_REPORT_BLKCIPHER,	/* struct crypto_report_blkcipher */
-	CRYPTOCFGA_REPORT_AEAD,		/* struct crypto_report_aead */
-	CRYPTOCFGA_REPORT_COMPRESS,	/* struct crypto_report_comp */
-	CRYPTOCFGA_REPORT_RNG,		/* struct crypto_report_rng */
-	CRYPTOCFGA_REPORT_CIPHER,	/* struct crypto_report_cipher */
-	CRYPTOCFGA_REPORT_AKCIPHER,	/* struct crypto_report_akcipher */
-	__CRYPTOCFGA_MAX
-
-#define CRYPTOCFGA_MAX (__CRYPTOCFGA_MAX - 1)
-};
-
-struct crypto_user_alg {
-	char cru_name[CRYPTO_MAX_ALG_NAME];
-	char cru_driver_name[CRYPTO_MAX_ALG_NAME];
-	char cru_module_name[CRYPTO_MAX_ALG_NAME];
-	__u32 cru_type;
-	__u32 cru_mask;
-	__u32 cru_refcnt;
-	__u32 cru_flags;
-};
-
-struct crypto_report_larval {
-	char type[CRYPTO_MAX_NAME];
-};
-
-struct crypto_report_hash {
-	char type[CRYPTO_MAX_NAME];
-	unsigned int blocksize;
-	unsigned int digestsize;
-};
-
-struct crypto_report_cipher {
-	char type[CRYPTO_MAX_ALG_NAME];
-	unsigned int blocksize;
-	unsigned int min_keysize;
-	unsigned int max_keysize;
-};
-
-struct crypto_report_blkcipher {
-	char type[CRYPTO_MAX_NAME];
-	char geniv[CRYPTO_MAX_NAME];
-	unsigned int blocksize;
-	unsigned int min_keysize;
-	unsigned int max_keysize;
-	unsigned int ivsize;
-};
-
-struct crypto_report_aead {
-	char type[CRYPTO_MAX_NAME];
-	char geniv[CRYPTO_MAX_NAME];
-	unsigned int blocksize;
-	unsigned int maxauthsize;
-	unsigned int ivsize;
-};
-
-struct crypto_report_comp {
-	char type[CRYPTO_MAX_NAME];
-};
-
-struct crypto_report_rng {
-	char type[CRYPTO_MAX_NAME];
-	unsigned int seedsize;
-};
-
-struct crypto_report_akcipher {
-	char type[CRYPTO_MAX_NAME];
-};
-
-#define CRYPTO_REPORT_MAXSIZE (sizeof(struct crypto_user_alg) + \
-			       sizeof(struct crypto_report_blkcipher))
diff --git a/include/uapi/linux/cryptouser.h b/include/uapi/linux/cryptouser.h
new file mode 100644
index 000000000000..36efbbbf2f83
--- /dev/null
+++ b/include/uapi/linux/cryptouser.h
@@ -0,0 +1,110 @@
+/*
+ * Crypto user configuration API.
+ *
+ * Copyright (C) 2011 secunet Security Networks AG
+ * Copyright (C) 2011 Steffen Klassert <steffen.klassert@secunet.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/* Netlink configuration messages.  */
+enum {
+	CRYPTO_MSG_BASE = 0x10,
+	CRYPTO_MSG_NEWALG = 0x10,
+	CRYPTO_MSG_DELALG,
+	CRYPTO_MSG_UPDATEALG,
+	CRYPTO_MSG_GETALG,
+	__CRYPTO_MSG_MAX
+};
+#define CRYPTO_MSG_MAX (__CRYPTO_MSG_MAX - 1)
+#define CRYPTO_NR_MSGTYPES (CRYPTO_MSG_MAX + 1 - CRYPTO_MSG_BASE)
+
+#define CRYPTO_MAX_NAME CRYPTO_MAX_ALG_NAME
+
+/* Netlink message attributes.  */
+enum crypto_attr_type_t {
+	CRYPTOCFGA_UNSPEC,
+	CRYPTOCFGA_PRIORITY_VAL,	/* __u32 */
+	CRYPTOCFGA_REPORT_LARVAL,	/* struct crypto_report_larval */
+	CRYPTOCFGA_REPORT_HASH,		/* struct crypto_report_hash */
+	CRYPTOCFGA_REPORT_BLKCIPHER,	/* struct crypto_report_blkcipher */
+	CRYPTOCFGA_REPORT_AEAD,		/* struct crypto_report_aead */
+	CRYPTOCFGA_REPORT_COMPRESS,	/* struct crypto_report_comp */
+	CRYPTOCFGA_REPORT_RNG,		/* struct crypto_report_rng */
+	CRYPTOCFGA_REPORT_CIPHER,	/* struct crypto_report_cipher */
+	CRYPTOCFGA_REPORT_AKCIPHER,	/* struct crypto_report_akcipher */
+	__CRYPTOCFGA_MAX
+
+#define CRYPTOCFGA_MAX (__CRYPTOCFGA_MAX - 1)
+};
+
+struct crypto_user_alg {
+	char cru_name[CRYPTO_MAX_ALG_NAME];
+	char cru_driver_name[CRYPTO_MAX_ALG_NAME];
+	char cru_module_name[CRYPTO_MAX_ALG_NAME];
+	__u32 cru_type;
+	__u32 cru_mask;
+	__u32 cru_refcnt;
+	__u32 cru_flags;
+};
+
+struct crypto_report_larval {
+	char type[CRYPTO_MAX_NAME];
+};
+
+struct crypto_report_hash {
+	char type[CRYPTO_MAX_NAME];
+	unsigned int blocksize;
+	unsigned int digestsize;
+};
+
+struct crypto_report_cipher {
+	char type[CRYPTO_MAX_ALG_NAME];
+	unsigned int blocksize;
+	unsigned int min_keysize;
+	unsigned int max_keysize;
+};
+
+struct crypto_report_blkcipher {
+	char type[CRYPTO_MAX_NAME];
+	char geniv[CRYPTO_MAX_NAME];
+	unsigned int blocksize;
+	unsigned int min_keysize;
+	unsigned int max_keysize;
+	unsigned int ivsize;
+};
+
+struct crypto_report_aead {
+	char type[CRYPTO_MAX_NAME];
+	char geniv[CRYPTO_MAX_NAME];
+	unsigned int blocksize;
+	unsigned int maxauthsize;
+	unsigned int ivsize;
+};
+
+struct crypto_report_comp {
+	char type[CRYPTO_MAX_NAME];
+};
+
+struct crypto_report_rng {
+	char type[CRYPTO_MAX_NAME];
+	unsigned int seedsize;
+};
+
+struct crypto_report_akcipher {
+	char type[CRYPTO_MAX_NAME];
+};
+
+#define CRYPTO_REPORT_MAXSIZE (sizeof(struct crypto_user_alg) + \
+			       sizeof(struct crypto_report_blkcipher))
-- 
cgit 


From 9aa867e46565d61491f884c793e4988678fbffa3 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Sun, 21 Jun 2015 19:11:45 +0800
Subject: crypto: user - Add CRYPTO_MSG_DELRNG

This patch adds a new crypto_user command that allows the admin to
delete the crypto system RNG.  Note that this can only be done if
the RNG is currently not in use.  The next time it is used a new
system RNG will be allocated.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 crypto/crypto_user.c            | 12 +++++++++++-
 include/uapi/linux/cryptouser.h |  1 +
 2 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c
index 11dbd5a81c72..08ea2867fc8a 100644
--- a/crypto/crypto_user.c
+++ b/crypto/crypto_user.c
@@ -27,6 +27,7 @@
 #include <net/net_namespace.h>
 #include <crypto/internal/aead.h>
 #include <crypto/internal/skcipher.h>
+#include <crypto/internal/rng.h>
 #include <crypto/akcipher.h>
 
 #include "internal.h"
@@ -472,13 +473,21 @@ static int crypto_add_alg(struct sk_buff *skb, struct nlmsghdr *nlh,
 	return 0;
 }
 
+static int crypto_del_rng(struct sk_buff *skb, struct nlmsghdr *nlh,
+			  struct nlattr **attrs)
+{
+	if (!netlink_capable(skb, CAP_NET_ADMIN))
+		return -EPERM;
+	return crypto_del_default_rng();
+}
+
 #define MSGSIZE(type) sizeof(struct type)
 
 static const int crypto_msg_min[CRYPTO_NR_MSGTYPES] = {
 	[CRYPTO_MSG_NEWALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
 	[CRYPTO_MSG_DELALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
 	[CRYPTO_MSG_UPDATEALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
-	[CRYPTO_MSG_GETALG	- CRYPTO_MSG_BASE] = MSGSIZE(crypto_user_alg),
+	[CRYPTO_MSG_DELRNG	- CRYPTO_MSG_BASE] = 0,
 };
 
 static const struct nla_policy crypto_policy[CRYPTOCFGA_MAX+1] = {
@@ -498,6 +507,7 @@ static const struct crypto_link {
 	[CRYPTO_MSG_GETALG	- CRYPTO_MSG_BASE] = { .doit = crypto_report,
 						       .dump = crypto_dump_report,
 						       .done = crypto_dump_report_done},
+	[CRYPTO_MSG_DELRNG	- CRYPTO_MSG_BASE] = { .doit = crypto_del_rng },
 };
 
 static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
diff --git a/include/uapi/linux/cryptouser.h b/include/uapi/linux/cryptouser.h
index 36efbbbf2f83..2e67bb64c1da 100644
--- a/include/uapi/linux/cryptouser.h
+++ b/include/uapi/linux/cryptouser.h
@@ -25,6 +25,7 @@ enum {
 	CRYPTO_MSG_DELALG,
 	CRYPTO_MSG_UPDATEALG,
 	CRYPTO_MSG_GETALG,
+	CRYPTO_MSG_DELRNG,
 	__CRYPTO_MSG_MAX
 };
 #define CRYPTO_MSG_MAX (__CRYPTO_MSG_MAX - 1)
-- 
cgit 


From 5023a5ca8e144846ec0646554336000abb11e04f Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sre@kernel.org>
Date: Thu, 16 Apr 2015 19:05:18 +0200
Subject: HSI: cmt_speech: fix timestamp interface

The user interface for timestamps in the new cmt_speech
driver is broken in multiple ways:

- The layout is incompatible between 32-bit and 64-bit user
  space, because of the size differences in 'struct timespec'.
  This means that the driver can not work when used with 32-bit
  user space on a 64-bit kernel.

- As there are plans to change 32-bit user space to use
  a 64-bit time_t type in the future, it will also be
  incompatible with new 32-bit user space.

- It is using ktime_get_ts under it's deprecated alias
  (do_posix_clock_monotonic_gettime).

To keep support for the user space tools written for this driver (which
have lived many years out-of-tree), the interface has been hardened to
unsigned 32-bit values.

Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 drivers/hsi/clients/cmt_speech.c     |  9 +++++++--
 include/uapi/linux/hsi/cs-protocol.h | 16 +++++++++++-----
 2 files changed, 18 insertions(+), 7 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/hsi/clients/cmt_speech.c b/drivers/hsi/clients/cmt_speech.c
index 4983529a9c6c..d04643f9548b 100644
--- a/drivers/hsi/clients/cmt_speech.c
+++ b/drivers/hsi/clients/cmt_speech.c
@@ -451,9 +451,14 @@ static void cs_hsi_read_on_control_complete(struct hsi_msg *msg)
 	dev_dbg(&hi->cl->device, "Read on control: %08X\n", cmd);
 	cs_release_cmd(msg);
 	if (hi->flags & CS_FEAT_TSTAMP_RX_CTRL) {
-		struct timespec *tstamp =
+		struct timespec tspec;
+		struct cs_timestamp *tstamp =
 			&hi->mmap_cfg->tstamp_rx_ctrl;
-		do_posix_clock_monotonic_gettime(tstamp);
+
+		ktime_get_ts(&tspec);
+
+		tstamp->tv_sec = (__u32) tspec.tv_sec;
+		tstamp->tv_nsec = (__u32) tspec.tv_nsec;
 	}
 	spin_unlock(&hi->lock);
 
diff --git a/include/uapi/linux/hsi/cs-protocol.h b/include/uapi/linux/hsi/cs-protocol.h
index 4957bba57cbe..f153d6ea7c62 100644
--- a/include/uapi/linux/hsi/cs-protocol.h
+++ b/include/uapi/linux/hsi/cs-protocol.h
@@ -75,6 +75,15 @@ struct cs_buffer_config {
 	__u32 reserved[4];
 };
 
+/*
+ * struct for monotonic timestamp taken when the
+ * last control command was received
+ */
+struct cs_timestamp {
+	__u32 tv_sec;  /* seconds */
+	__u32 tv_nsec; /* nanoseconds */
+};
+
 /*
  * Struct describing the layout and contents of the driver mmap area.
  * This information is meant as read-only information for the application.
@@ -91,11 +100,8 @@ struct cs_mmap_config_block {
 	__u32 rx_ptr;
 	__u32 rx_ptr_boundary;
 	__u32 reserved3[2];
-	/*
-	 * if enabled with CS_FEAT_TSTAMP_RX_CTRL, monotonic
-	 * timestamp taken when the last control command was received
-	 */
-	struct timespec tstamp_rx_ctrl;
+	/* enabled with CS_FEAT_TSTAMP_RX_CTRL */
+	struct cs_timestamp tstamp_rx_ctrl;
 };
 
 #define CS_IO_MAGIC		'C'
-- 
cgit 


From 2600896d659b638f5d4981dac4cd9aa03c8213da Mon Sep 17 00:00:00 2001
From: Yoshinori Sato <ysato@users.sourceforge.jp>
Date: Wed, 28 Jan 2015 02:54:39 +0900
Subject: Add ELF machine

Signed-off-by: Yoshinori Sato <ysato@users.sourceforge.jp>
---
 include/uapi/linux/elf-em.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/elf-em.h b/include/uapi/linux/elf-em.h
index ae99f7743cf4..b08829667ed7 100644
--- a/include/uapi/linux/elf-em.h
+++ b/include/uapi/linux/elf-em.h
@@ -25,6 +25,7 @@
 #define EM_ARM		40	/* ARM 32 bit */
 #define EM_SH		42	/* SuperH */
 #define EM_SPARCV9	43	/* SPARC v9 64-bit */
+#define EM_H8_300	46	/* Renesas H8/300 */
 #define EM_IA_64	50	/* HP/Intel IA-64 */
 #define EM_X86_64	62	/* AMD x86-64 */
 #define EM_S390		22	/* IBM S/390 */
-- 
cgit 


From 42bcce87d763b4d22dc6d3a0c0b60c6b49820de8 Mon Sep 17 00:00:00 2001
From: Anish Bhatt <anish@chelsio.com>
Date: Mon, 22 Jun 2015 17:44:35 -0700
Subject: dcb : Fix incorrect documentation for struct dcb_app

While IEEE and CEE use the same structure to store apps, the selector
and priority fields for both are different. Only the priority field is
explained, add documentation explaining how the selector field differs
for both.

cgdcbxd code shows an example of how selector fields differ.

Signed-off-by: Anish Bhatt <anish@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/dcbnl.h | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/dcbnl.h b/include/uapi/linux/dcbnl.h
index 6497d7933d5b..3ea470f35e40 100644
--- a/include/uapi/linux/dcbnl.h
+++ b/include/uapi/linux/dcbnl.h
@@ -207,8 +207,7 @@ struct cee_pfc {
 #define IEEE_8021QAZ_APP_SEL_ANY	4
 
 /* This structure contains the IEEE 802.1Qaz APP managed object. This
- * object is also used for the CEE std as well. There is no difference
- * between the objects.
+ * object is also used for the CEE std as well.
  *
  * @selector: protocol identifier type
  * @protocol: protocol of type indicated
@@ -216,13 +215,18 @@ struct cee_pfc {
  *            8-bit 802.1p user priority bitmap for CEE
  *
  * ----
- *  Selector field values
+ *  Selector field values for IEEE 802.1Qaz
  *	0	Reserved
  *	1	Ethertype
  *	2	Well known port number over TCP or SCTP
  *	3	Well known port number over UDP or DCCP
  *	4	Well known port number over TCP, SCTP, UDP, or DCCP
  *	5-7	Reserved
+ *
+ *  Selector field values for CEE
+ *	0	Ethertype
+ *	1	Well known port number over TCP or UDP
+ *	2-3	Reserved
  */
 struct dcb_app {
 	__u8	selector;
-- 
cgit 


From 8a3d03166f19329b46c6f9e900f93a89f446077b Mon Sep 17 00:00:00 2001
From: Andy Gospodarek <gospo@cumulusnetworks.com>
Date: Tue, 23 Jun 2015 13:45:36 -0400
Subject: net: track link-status of ipv4 nexthops

Add a fib flag called RTNH_F_LINKDOWN to any ipv4 nexthops that are
reachable via an interface where carrier is off.  No action is taken,
but additional flags are passed to userspace to indicate carrier status.

This also includes a cleanup to fib_disable_ip to more clearly indicate
what event made the function call to replace the more cryptic force
option previously used.

v2: Split out kernel functionality into 2 patches, this patch simply
sets and clears new nexthop flag RTNH_F_LINKDOWN.

v3: Cleanups suggested by Alex as well as a bug noticed in
fib_sync_down_dev and fib_sync_up when multipath was not enabled.

v5: Whitespace and variable declaration fixups suggested by Dave.

v6: Style fixups noticed by Dave; ran checkpatch to be sure I got them
all.

Signed-off-by: Andy Gospodarek <gospo@cumulusnetworks.com>
Signed-off-by: Dinesh Dutt <ddutt@cumulusnetworks.com>
Acked-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h           |  4 +--
 include/uapi/linux/rtnetlink.h |  3 +++
 net/ipv4/fib_frontend.c        | 23 ++++++++++------
 net/ipv4/fib_semantics.c       | 60 +++++++++++++++++++++++++++++++++---------
 4 files changed, 67 insertions(+), 23 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 54271ed0ed45..f73d27c5575a 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -305,9 +305,9 @@ void fib_flush_external(struct net *net);
 
 /* Exported by fib_semantics.c */
 int ip_fib_check_default(__be32 gw, struct net_device *dev);
-int fib_sync_down_dev(struct net_device *dev, int force);
+int fib_sync_down_dev(struct net_device *dev, unsigned long event);
 int fib_sync_down_addr(struct net *net, __be32 local);
-int fib_sync_up(struct net_device *dev);
+int fib_sync_up(struct net_device *dev, unsigned int nh_flags);
 void fib_select_multipath(struct fib_result *res);
 
 /* Exported by fib_trie.c */
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 17fb02f488da..fdd8f07f1d34 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -338,6 +338,9 @@ struct rtnexthop {
 #define RTNH_F_PERVASIVE	2	/* Do recursive gateway lookup	*/
 #define RTNH_F_ONLINK		4	/* Gateway is forced on link	*/
 #define RTNH_F_OFFLOAD		8	/* offloaded route */
+#define RTNH_F_LINKDOWN		16	/* carrier-down on nexthop */
+
+#define RTNH_COMPARE_MASK	(RTNH_F_DEAD | RTNH_F_LINKDOWN)
 
 /* Macros to handle hexthops */
 
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 872494e6e6eb..534eb1485045 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1063,9 +1063,9 @@ static void nl_fib_lookup_exit(struct net *net)
 	net->ipv4.fibnl = NULL;
 }
 
-static void fib_disable_ip(struct net_device *dev, int force)
+static void fib_disable_ip(struct net_device *dev, unsigned long event)
 {
-	if (fib_sync_down_dev(dev, force))
+	if (fib_sync_down_dev(dev, event))
 		fib_flush(dev_net(dev));
 	rt_cache_flush(dev_net(dev));
 	arp_ifdown(dev);
@@ -1081,7 +1081,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
 	case NETDEV_UP:
 		fib_add_ifaddr(ifa);
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-		fib_sync_up(dev);
+		fib_sync_up(dev, RTNH_F_DEAD);
 #endif
 		atomic_inc(&net->ipv4.dev_addr_genid);
 		rt_cache_flush(dev_net(dev));
@@ -1093,7 +1093,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event,
 			/* Last address was deleted from this interface.
 			 * Disable IP.
 			 */
-			fib_disable_ip(dev, 1);
+			fib_disable_ip(dev, event);
 		} else {
 			rt_cache_flush(dev_net(dev));
 		}
@@ -1107,9 +1107,10 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	struct in_device *in_dev;
 	struct net *net = dev_net(dev);
+	unsigned int flags;
 
 	if (event == NETDEV_UNREGISTER) {
-		fib_disable_ip(dev, 2);
+		fib_disable_ip(dev, event);
 		rt_flush_dev(dev);
 		return NOTIFY_DONE;
 	}
@@ -1124,16 +1125,22 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo
 			fib_add_ifaddr(ifa);
 		} endfor_ifa(in_dev);
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-		fib_sync_up(dev);
+		fib_sync_up(dev, RTNH_F_DEAD);
 #endif
 		atomic_inc(&net->ipv4.dev_addr_genid);
 		rt_cache_flush(net);
 		break;
 	case NETDEV_DOWN:
-		fib_disable_ip(dev, 0);
+		fib_disable_ip(dev, event);
 		break;
-	case NETDEV_CHANGEMTU:
 	case NETDEV_CHANGE:
+		flags = dev_get_flags(dev);
+		if (flags & (IFF_RUNNING | IFF_LOWER_UP))
+			fib_sync_up(dev, RTNH_F_LINKDOWN);
+		else
+			fib_sync_down_dev(dev, event);
+		/* fall through */
+	case NETDEV_CHANGEMTU:
 		rt_cache_flush(net);
 		break;
 	}
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 28ec3c1823bf..b1b305b1e340 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -266,7 +266,7 @@ static inline int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
 #ifdef CONFIG_IP_ROUTE_CLASSID
 		    nh->nh_tclassid != onh->nh_tclassid ||
 #endif
-		    ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_F_DEAD))
+		    ((nh->nh_flags ^ onh->nh_flags) & ~RTNH_COMPARE_MASK))
 			return -1;
 		onh++;
 	} endfor_nexthops(fi);
@@ -318,7 +318,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi)
 		    nfi->fib_type == fi->fib_type &&
 		    memcmp(nfi->fib_metrics, fi->fib_metrics,
 			   sizeof(u32) * RTAX_MAX) == 0 &&
-		    ((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_DEAD) == 0 &&
+		    !((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_COMPARE_MASK) &&
 		    (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
 			return fi;
 	}
@@ -604,6 +604,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
 				return -ENODEV;
 			if (!(dev->flags & IFF_UP))
 				return -ENETDOWN;
+			if (!netif_carrier_ok(dev))
+				nh->nh_flags |= RTNH_F_LINKDOWN;
 			nh->nh_dev = dev;
 			dev_hold(dev);
 			nh->nh_scope = RT_SCOPE_LINK;
@@ -636,6 +638,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
 		if (!dev)
 			goto out;
 		dev_hold(dev);
+		if (!netif_carrier_ok(dev))
+			nh->nh_flags |= RTNH_F_LINKDOWN;
 		err = (dev->flags & IFF_UP) ? 0 : -ENETDOWN;
 	} else {
 		struct in_device *in_dev;
@@ -654,6 +658,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
 		nh->nh_dev = in_dev->dev;
 		dev_hold(nh->nh_dev);
 		nh->nh_scope = RT_SCOPE_HOST;
+		if (!netif_carrier_ok(nh->nh_dev))
+			nh->nh_flags |= RTNH_F_LINKDOWN;
 		err = 0;
 	}
 out:
@@ -920,11 +926,17 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 		if (!nh->nh_dev)
 			goto failure;
 	} else {
+		int linkdown = 0;
+
 		change_nexthops(fi) {
 			err = fib_check_nh(cfg, fi, nexthop_nh);
 			if (err != 0)
 				goto failure;
+			if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN)
+				linkdown++;
 		} endfor_nexthops(fi)
+		if (linkdown == fi->fib_nhs)
+			fi->fib_flags |= RTNH_F_LINKDOWN;
 	}
 
 	if (fi->fib_prefsrc) {
@@ -1103,7 +1115,7 @@ int fib_sync_down_addr(struct net *net, __be32 local)
 	return ret;
 }
 
-int fib_sync_down_dev(struct net_device *dev, int force)
+int fib_sync_down_dev(struct net_device *dev, unsigned long event)
 {
 	int ret = 0;
 	int scope = RT_SCOPE_NOWHERE;
@@ -1112,7 +1124,8 @@ int fib_sync_down_dev(struct net_device *dev, int force)
 	struct hlist_head *head = &fib_info_devhash[hash];
 	struct fib_nh *nh;
 
-	if (force)
+	if (event == NETDEV_UNREGISTER ||
+	    event == NETDEV_DOWN)
 		scope = -1;
 
 	hlist_for_each_entry(nh, head, nh_hash) {
@@ -1129,7 +1142,15 @@ int fib_sync_down_dev(struct net_device *dev, int force)
 				dead++;
 			else if (nexthop_nh->nh_dev == dev &&
 				 nexthop_nh->nh_scope != scope) {
-				nexthop_nh->nh_flags |= RTNH_F_DEAD;
+				switch (event) {
+				case NETDEV_DOWN:
+				case NETDEV_UNREGISTER:
+					nexthop_nh->nh_flags |= RTNH_F_DEAD;
+					/* fall through */
+				case NETDEV_CHANGE:
+					nexthop_nh->nh_flags |= RTNH_F_LINKDOWN;
+					break;
+				}
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 				spin_lock_bh(&fib_multipath_lock);
 				fi->fib_power -= nexthop_nh->nh_power;
@@ -1139,14 +1160,23 @@ int fib_sync_down_dev(struct net_device *dev, int force)
 				dead++;
 			}
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
-			if (force > 1 && nexthop_nh->nh_dev == dev) {
+			if (event == NETDEV_UNREGISTER &&
+			    nexthop_nh->nh_dev == dev) {
 				dead = fi->fib_nhs;
 				break;
 			}
 #endif
 		} endfor_nexthops(fi)
 		if (dead == fi->fib_nhs) {
-			fi->fib_flags |= RTNH_F_DEAD;
+			switch (event) {
+			case NETDEV_DOWN:
+			case NETDEV_UNREGISTER:
+				fi->fib_flags |= RTNH_F_DEAD;
+				/* fall through */
+			case NETDEV_CHANGE:
+				fi->fib_flags |= RTNH_F_LINKDOWN;
+				break;
+			}
 			ret++;
 		}
 	}
@@ -1210,13 +1240,11 @@ out:
 	return;
 }
 
-#ifdef CONFIG_IP_ROUTE_MULTIPATH
-
 /*
  * Dead device goes up. We wake up dead nexthops.
  * It takes sense only on multipath routes.
  */
-int fib_sync_up(struct net_device *dev)
+int fib_sync_up(struct net_device *dev, unsigned int nh_flags)
 {
 	struct fib_info *prev_fi;
 	unsigned int hash;
@@ -1243,7 +1271,7 @@ int fib_sync_up(struct net_device *dev)
 		prev_fi = fi;
 		alive = 0;
 		change_nexthops(fi) {
-			if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) {
+			if (!(nexthop_nh->nh_flags & nh_flags)) {
 				alive++;
 				continue;
 			}
@@ -1254,14 +1282,18 @@ int fib_sync_up(struct net_device *dev)
 			    !__in_dev_get_rtnl(dev))
 				continue;
 			alive++;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
 			spin_lock_bh(&fib_multipath_lock);
 			nexthop_nh->nh_power = 0;
-			nexthop_nh->nh_flags &= ~RTNH_F_DEAD;
+			nexthop_nh->nh_flags &= ~nh_flags;
 			spin_unlock_bh(&fib_multipath_lock);
+#else
+			nexthop_nh->nh_flags &= ~nh_flags;
+#endif
 		} endfor_nexthops(fi)
 
 		if (alive > 0) {
-			fi->fib_flags &= ~RTNH_F_DEAD;
+			fi->fib_flags &= ~nh_flags;
 			ret++;
 		}
 	}
@@ -1269,6 +1301,8 @@ int fib_sync_up(struct net_device *dev)
 	return ret;
 }
 
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+
 /*
  * The algorithm is suboptimal, but it provides really
  * fair weighted route distribution.
-- 
cgit 


From 0eeb075fad736fb92620af995c47c204bbb5e829 Mon Sep 17 00:00:00 2001
From: Andy Gospodarek <gospo@cumulusnetworks.com>
Date: Tue, 23 Jun 2015 13:45:37 -0400
Subject: net: ipv4 sysctl option to ignore routes when nexthop link is down

This feature is only enabled with the new per-interface or ipv4 global
sysctls called 'ignore_routes_with_linkdown'.

net.ipv4.conf.all.ignore_routes_with_linkdown = 0
net.ipv4.conf.default.ignore_routes_with_linkdown = 0
net.ipv4.conf.lo.ignore_routes_with_linkdown = 0
...

When the above sysctls are set, will report to userspace that a route is
dead and will no longer resolve to this nexthop when performing a fib
lookup.  This will signal to userspace that the route will not be
selected.  The signalling of a RTNH_F_DEAD is only passed to userspace
if the sysctl is enabled and link is down.  This was done as without it
the netlink listeners would have no idea whether or not a nexthop would
be selected.   The kernel only sets RTNH_F_DEAD internally if the
interface has IFF_UP cleared.

With the new sysctl set, the following behavior can be observed
(interface p8p1 is link-down):

default via 10.0.5.2 dev p9p1
10.0.5.0/24 dev p9p1  proto kernel  scope link  src 10.0.5.15
70.0.0.0/24 dev p7p1  proto kernel  scope link  src 70.0.0.1
80.0.0.0/24 dev p8p1  proto kernel  scope link  src 80.0.0.1 dead linkdown
90.0.0.0/24 via 80.0.0.2 dev p8p1  metric 1 dead linkdown
90.0.0.0/24 via 70.0.0.2 dev p7p1  metric 2
90.0.0.1 via 70.0.0.2 dev p7p1  src 70.0.0.1
    cache
local 80.0.0.1 dev lo  src 80.0.0.1
    cache <local>
80.0.0.2 via 10.0.5.2 dev p9p1  src 10.0.5.15
    cache

While the route does remain in the table (so it can be modified if
needed rather than being wiped away as it would be if IFF_UP was
cleared), the proper next-hop is chosen automatically when the link is
down.  Now interface p8p1 is linked-up:

default via 10.0.5.2 dev p9p1
10.0.5.0/24 dev p9p1  proto kernel  scope link  src 10.0.5.15
70.0.0.0/24 dev p7p1  proto kernel  scope link  src 70.0.0.1
80.0.0.0/24 dev p8p1  proto kernel  scope link  src 80.0.0.1
90.0.0.0/24 via 80.0.0.2 dev p8p1  metric 1
90.0.0.0/24 via 70.0.0.2 dev p7p1  metric 2
192.168.56.0/24 dev p2p1  proto kernel  scope link  src 192.168.56.2
90.0.0.1 via 80.0.0.2 dev p8p1  src 80.0.0.1
    cache
local 80.0.0.1 dev lo  src 80.0.0.1
    cache <local>
80.0.0.2 dev p8p1  src 80.0.0.1
    cache

and the output changes to what one would expect.

If the sysctl is not set, the following output would be expected when
p8p1 is down:

default via 10.0.5.2 dev p9p1
10.0.5.0/24 dev p9p1  proto kernel  scope link  src 10.0.5.15
70.0.0.0/24 dev p7p1  proto kernel  scope link  src 70.0.0.1
80.0.0.0/24 dev p8p1  proto kernel  scope link  src 80.0.0.1 linkdown
90.0.0.0/24 via 80.0.0.2 dev p8p1  metric 1 linkdown
90.0.0.0/24 via 70.0.0.2 dev p7p1  metric 2

Since the dead flag does not appear, there should be no expectation that
the kernel would skip using this route due to link being down.

v2: Split kernel changes into 2 patches, this actually makes a
behavioral change if the sysctl is set.  Also took suggestion from Alex
to simplify code by only checking sysctl during fib lookup and
suggestion from Scott to add a per-interface sysctl.

v3: Code clean-ups to make it more readable and efficient as well as a
reverse path check fix.

v4: Drop binary sysctl

v5: Whitespace fixups from Dave

v6: Style changes from Dave and checkpatch suggestions

v7: One more checkpatch fixup

Signed-off-by: Andy Gospodarek <gospo@cumulusnetworks.com>
Signed-off-by: Dinesh Dutt <ddutt@cumulusnetworks.com>
Acked-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inetdevice.h        |  3 +++
 include/net/fib_rules.h           |  3 ++-
 include/net/ip_fib.h              | 16 +++++++++-------
 include/uapi/linux/ip.h           |  1 +
 net/ipv4/devinet.c                |  2 ++
 net/ipv4/fib_frontend.c           |  6 +++---
 net/ipv4/fib_rules.c              |  5 +++--
 net/ipv4/fib_semantics.c          | 33 ++++++++++++++++++++++++++++-----
 net/ipv4/fib_trie.c               |  6 ++++++
 net/ipv4/netfilter/ipt_rpfilter.c |  2 +-
 net/ipv4/route.c                  | 10 +++++-----
 11 files changed, 63 insertions(+), 24 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 0a21fbefdfbe..a4328cea376a 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -120,6 +120,9 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev)
 	 || (!IN_DEV_FORWARD(in_dev) && \
 	  IN_DEV_ORCONF((in_dev), ACCEPT_REDIRECTS)))
 
+#define IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) \
+	IN_DEV_CONF_GET((in_dev), IGNORE_ROUTES_WITH_LINKDOWN)
+
 #define IN_DEV_ARPFILTER(in_dev)	IN_DEV_ORCONF((in_dev), ARPFILTER)
 #define IN_DEV_ARP_ACCEPT(in_dev)	IN_DEV_ORCONF((in_dev), ARP_ACCEPT)
 #define IN_DEV_ARP_ANNOUNCE(in_dev)	IN_DEV_MAXCONF((in_dev), ARP_ANNOUNCE)
diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h
index 6d67383a5114..903a55efbffe 100644
--- a/include/net/fib_rules.h
+++ b/include/net/fib_rules.h
@@ -36,7 +36,8 @@ struct fib_lookup_arg {
 	void			*result;
 	struct fib_rule		*rule;
 	int			flags;
-#define FIB_LOOKUP_NOREF	1
+#define FIB_LOOKUP_NOREF		1
+#define FIB_LOOKUP_IGNORE_LINKSTATE	2
 };
 
 struct fib_rules_ops {
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index f73d27c5575a..49c142bdf01e 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -226,7 +226,7 @@ static inline struct fib_table *fib_new_table(struct net *net, u32 id)
 }
 
 static inline int fib_lookup(struct net *net, const struct flowi4 *flp,
-			     struct fib_result *res)
+			     struct fib_result *res, unsigned int flags)
 {
 	struct fib_table *tb;
 	int err = -ENETUNREACH;
@@ -234,7 +234,7 @@ static inline int fib_lookup(struct net *net, const struct flowi4 *flp,
 	rcu_read_lock();
 
 	tb = fib_get_table(net, RT_TABLE_MAIN);
-	if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF))
+	if (tb && !fib_table_lookup(tb, flp, res, flags | FIB_LOOKUP_NOREF))
 		err = 0;
 
 	rcu_read_unlock();
@@ -249,16 +249,18 @@ void __net_exit fib4_rules_exit(struct net *net);
 struct fib_table *fib_new_table(struct net *net, u32 id);
 struct fib_table *fib_get_table(struct net *net, u32 id);
 
-int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res);
+int __fib_lookup(struct net *net, struct flowi4 *flp,
+		 struct fib_result *res, unsigned int flags);
 
 static inline int fib_lookup(struct net *net, struct flowi4 *flp,
-			     struct fib_result *res)
+			     struct fib_result *res, unsigned int flags)
 {
 	struct fib_table *tb;
 	int err;
 
+	flags |= FIB_LOOKUP_NOREF;
 	if (net->ipv4.fib_has_custom_rules)
-		return __fib_lookup(net, flp, res);
+		return __fib_lookup(net, flp, res, flags);
 
 	rcu_read_lock();
 
@@ -266,11 +268,11 @@ static inline int fib_lookup(struct net *net, struct flowi4 *flp,
 
 	for (err = 0; !err; err = -ENETUNREACH) {
 		tb = rcu_dereference_rtnl(net->ipv4.fib_main);
-		if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF))
+		if (tb && !fib_table_lookup(tb, flp, res, flags))
 			break;
 
 		tb = rcu_dereference_rtnl(net->ipv4.fib_default);
-		if (tb && !fib_table_lookup(tb, flp, res, FIB_LOOKUP_NOREF))
+		if (tb && !fib_table_lookup(tb, flp, res, flags))
 			break;
 	}
 
diff --git a/include/uapi/linux/ip.h b/include/uapi/linux/ip.h
index 411959405ab6..08f894d2ddbd 100644
--- a/include/uapi/linux/ip.h
+++ b/include/uapi/linux/ip.h
@@ -164,6 +164,7 @@ enum
 	IPV4_DEVCONF_ROUTE_LOCALNET,
 	IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL,
 	IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL,
+	IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN,
 	__IPV4_DEVCONF_MAX
 };
 
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 419d23c53ec7..7498716e8f54 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -2169,6 +2169,8 @@ static struct devinet_sysctl_table {
 					"igmpv2_unsolicited_report_interval"),
 		DEVINET_SYSCTL_RW_ENTRY(IGMPV3_UNSOLICITED_REPORT_INTERVAL,
 					"igmpv3_unsolicited_report_interval"),
+		DEVINET_SYSCTL_RW_ENTRY(IGNORE_ROUTES_WITH_LINKDOWN,
+					"ignore_routes_with_linkdown"),
 
 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 534eb1485045..6bbc54940eb4 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -280,7 +280,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb)
 		fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos);
 		fl4.flowi4_scope = scope;
 		fl4.flowi4_mark = IN_DEV_SRC_VMARK(in_dev) ? skb->mark : 0;
-		if (!fib_lookup(net, &fl4, &res))
+		if (!fib_lookup(net, &fl4, &res, 0))
 			return FIB_RES_PREFSRC(net, res);
 	} else {
 		scope = RT_SCOPE_LINK;
@@ -319,7 +319,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 	fl4.flowi4_mark = IN_DEV_SRC_VMARK(idev) ? skb->mark : 0;
 
 	net = dev_net(dev);
-	if (fib_lookup(net, &fl4, &res))
+	if (fib_lookup(net, &fl4, &res, 0))
 		goto last_resort;
 	if (res.type != RTN_UNICAST &&
 	    (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev)))
@@ -354,7 +354,7 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst,
 	fl4.flowi4_oif = dev->ifindex;
 
 	ret = 0;
-	if (fib_lookup(net, &fl4, &res) == 0) {
+	if (fib_lookup(net, &fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE) == 0) {
 		if (res.type == RTN_UNICAST)
 			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
 	}
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 56151982f74e..18123d50f576 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -47,11 +47,12 @@ struct fib4_rule {
 #endif
 };
 
-int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res)
+int __fib_lookup(struct net *net, struct flowi4 *flp,
+		 struct fib_result *res, unsigned int flags)
 {
 	struct fib_lookup_arg arg = {
 		.result = res,
-		.flags = FIB_LOOKUP_NOREF,
+		.flags = flags,
 	};
 	int err;
 
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index b1b305b1e340..3bfccd83551c 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -623,7 +623,8 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
 			/* It is not necessary, but requires a bit of thinking */
 			if (fl4.flowi4_scope < RT_SCOPE_LINK)
 				fl4.flowi4_scope = RT_SCOPE_LINK;
-			err = fib_lookup(net, &fl4, &res);
+			err = fib_lookup(net, &fl4, &res,
+					 FIB_LOOKUP_IGNORE_LINKSTATE);
 			if (err) {
 				rcu_read_unlock();
 				return err;
@@ -1035,12 +1036,20 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 	    nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc))
 		goto nla_put_failure;
 	if (fi->fib_nhs == 1) {
+		struct in_device *in_dev;
+
 		if (fi->fib_nh->nh_gw &&
 		    nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw))
 			goto nla_put_failure;
 		if (fi->fib_nh->nh_oif &&
 		    nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif))
 			goto nla_put_failure;
+		if (fi->fib_nh->nh_flags & RTNH_F_LINKDOWN) {
+			in_dev = __in_dev_get_rcu(fi->fib_nh->nh_dev);
+			if (in_dev &&
+			    IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
+				rtm->rtm_flags |= RTNH_F_DEAD;
+		}
 #ifdef CONFIG_IP_ROUTE_CLASSID
 		if (fi->fib_nh[0].nh_tclassid &&
 		    nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid))
@@ -1057,11 +1066,19 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 			goto nla_put_failure;
 
 		for_nexthops(fi) {
+			struct in_device *in_dev;
+
 			rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
 			if (!rtnh)
 				goto nla_put_failure;
 
 			rtnh->rtnh_flags = nh->nh_flags & 0xFF;
+			if (nh->nh_flags & RTNH_F_LINKDOWN) {
+				in_dev = __in_dev_get_rcu(nh->nh_dev);
+				if (in_dev &&
+				    IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev))
+					rtnh->rtnh_flags |= RTNH_F_DEAD;
+			}
 			rtnh->rtnh_hops = nh->nh_weight - 1;
 			rtnh->rtnh_ifindex = nh->nh_oif;
 
@@ -1310,16 +1327,22 @@ int fib_sync_up(struct net_device *dev, unsigned int nh_flags)
 void fib_select_multipath(struct fib_result *res)
 {
 	struct fib_info *fi = res->fi;
+	struct in_device *in_dev;
 	int w;
 
 	spin_lock_bh(&fib_multipath_lock);
 	if (fi->fib_power <= 0) {
 		int power = 0;
 		change_nexthops(fi) {
-			if (!(nexthop_nh->nh_flags & RTNH_F_DEAD)) {
-				power += nexthop_nh->nh_weight;
-				nexthop_nh->nh_power = nexthop_nh->nh_weight;
-			}
+			in_dev = __in_dev_get_rcu(nexthop_nh->nh_dev);
+			if (nexthop_nh->nh_flags & RTNH_F_DEAD)
+				continue;
+			if (in_dev &&
+			    IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
+			    nexthop_nh->nh_flags & RTNH_F_LINKDOWN)
+				continue;
+			power += nexthop_nh->nh_weight;
+			nexthop_nh->nh_power = nexthop_nh->nh_weight;
 		} endfor_nexthops(fi);
 		fi->fib_power = power;
 		if (power <= 0) {
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 6c666a9f1bd5..15d32612e3c6 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1412,9 +1412,15 @@ found:
 			continue;
 		for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
 			const struct fib_nh *nh = &fi->fib_nh[nhsel];
+			struct in_device *in_dev = __in_dev_get_rcu(nh->nh_dev);
 
 			if (nh->nh_flags & RTNH_F_DEAD)
 				continue;
+			if (in_dev &&
+			    IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
+			    nh->nh_flags & RTNH_F_LINKDOWN &&
+			    !(fib_flags & FIB_LOOKUP_IGNORE_LINKSTATE))
+				continue;
 			if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif)
 				continue;
 
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index 4bfaedf9b34e..8618fd150c96 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -40,7 +40,7 @@ static bool rpfilter_lookup_reverse(struct flowi4 *fl4,
 	struct net *net = dev_net(dev);
 	int ret __maybe_unused;
 
-	if (fib_lookup(net, fl4, &res))
+	if (fib_lookup(net, fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE))
 		return false;
 
 	if (res.type != RTN_UNICAST) {
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index f6055984c307..d0362a2de3d3 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -747,7 +747,7 @@ static void __ip_do_redirect(struct rtable *rt, struct sk_buff *skb, struct flow
 		if (!(n->nud_state & NUD_VALID)) {
 			neigh_event_send(n, NULL);
 		} else {
-			if (fib_lookup(net, fl4, &res) == 0) {
+			if (fib_lookup(net, fl4, &res, 0) == 0) {
 				struct fib_nh *nh = &FIB_RES_NH(res);
 
 				update_or_create_fnhe(nh, fl4->daddr, new_gw,
@@ -975,7 +975,7 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu)
 		return;
 
 	rcu_read_lock();
-	if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) {
+	if (fib_lookup(dev_net(dst->dev), fl4, &res, 0) == 0) {
 		struct fib_nh *nh = &FIB_RES_NH(res);
 
 		update_or_create_fnhe(nh, fl4->daddr, 0, mtu,
@@ -1186,7 +1186,7 @@ void ip_rt_get_source(u8 *addr, struct sk_buff *skb, struct rtable *rt)
 		fl4.flowi4_mark = skb->mark;
 
 		rcu_read_lock();
-		if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res) == 0)
+		if (fib_lookup(dev_net(rt->dst.dev), &fl4, &res, 0) == 0)
 			src = FIB_RES_PREFSRC(dev_net(rt->dst.dev), res);
 		else
 			src = inet_select_addr(rt->dst.dev,
@@ -1716,7 +1716,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 	fl4.flowi4_scope = RT_SCOPE_UNIVERSE;
 	fl4.daddr = daddr;
 	fl4.saddr = saddr;
-	err = fib_lookup(net, &fl4, &res);
+	err = fib_lookup(net, &fl4, &res, 0);
 	if (err != 0) {
 		if (!IN_DEV_FORWARD(in_dev))
 			err = -EHOSTUNREACH;
@@ -2123,7 +2123,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4)
 		goto make_route;
 	}
 
-	if (fib_lookup(net, fl4, &res)) {
+	if (fib_lookup(net, fl4, &res, 0)) {
 		res.fi = NULL;
 		res.table = NULL;
 		if (fl4->flowi4_oif) {
-- 
cgit 


From 204621551b2a0060a013b92f7add4d5c452fa7cb Mon Sep 17 00:00:00 2001
From: Phil Sutter <phil@nwl.cc>
Date: Wed, 24 Jun 2015 11:02:51 +0200
Subject: net: inet_diag: export IPV6_V6ONLY sockopt

For AF_INET6 sockets, the value of struct ipv6_pinfo.ipv6only is
exported to userspace. It indicates whether a socket bound to in6addr_any
listens on IPv4 as well as IPv6. Since the socket is natively IPv6, it is not
listed by e.g. 'ss -l -4'.

This patch is accompanied by an appropriate one for iproute2 to enable
the additional information in 'ss -e'.

Signed-off-by: Phil Sutter <phil@nwl.cc>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/inet_diag.h | 3 ++-
 net/ipv4/inet_diag.c           | 4 ++++
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
index b629fc53b109..68a1f71fde9f 100644
--- a/include/uapi/linux/inet_diag.h
+++ b/include/uapi/linux/inet_diag.h
@@ -112,9 +112,10 @@ enum {
 	INET_DIAG_SHUTDOWN,
 	INET_DIAG_DCTCPINFO,
 	INET_DIAG_PROTOCOL,  /* response attribute only */
+	INET_DIAG_SKV6ONLY,
 };
 
-#define INET_DIAG_MAX INET_DIAG_PROTOCOL
+#define INET_DIAG_MAX INET_DIAG_SKV6ONLY
 
 /* INET_DIAG_MEM */
 
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 516a1f6fbdd3..9bc26677058e 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -151,6 +151,10 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 			if (nla_put_u8(skb, INET_DIAG_TCLASS,
 				       inet6_sk(sk)->tclass) < 0)
 				goto errout;
+
+		if (ipv6_only_sock(sk) &&
+		    nla_put_u8(skb, INET_DIAG_SKV6ONLY, 1))
+			goto errout;
 	}
 #endif
 
-- 
cgit 


From 62232e45f4a265abb43f0acf16e58f5d0b6e1ec9 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 8 Jun 2015 14:27:06 -0400
Subject: libnvdimm: control (ioctl) messages for nvdimm_bus and nvdimm devices

Most discovery/configuration of the nvdimm-subsystem is done via sysfs
attributes.  However, some nvdimm_bus instances, particularly the
ACPI.NFIT bus, define a small set of messages that can be passed to the
platform.  For convenience we derive the initial libnvdimm-ioctl command
formats directly from the NFIT DSM Interface Example formats.

    ND_CMD_SMART: media health and diagnostics
    ND_CMD_GET_CONFIG_SIZE: size of the label space
    ND_CMD_GET_CONFIG_DATA: read label space
    ND_CMD_SET_CONFIG_DATA: write label space
    ND_CMD_VENDOR: vendor-specific command passthrough
    ND_CMD_ARS_CAP: report address-range-scrubbing capabilities
    ND_CMD_ARS_START: initiate scrubbing
    ND_CMD_ARS_STATUS: report on scrubbing state
    ND_CMD_SMART_THRESHOLD: configure alarm thresholds for smart events

If a platform later defines different commands than this set it is
straightforward to extend support to those formats.

Most of the commands target a specific dimm.  However, the
address-range-scrubbing commands target the bus.  The 'commands'
attribute in sysfs of an nvdimm_bus, or nvdimm, enumerate the supported
commands for that object.

Cc: <linux-acpi@vger.kernel.org>
Cc: Robert Moore <robert.moore@intel.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reported-by: Nicholas Moulin <nicholas.w.moulin@linux.intel.com>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/Kconfig       |  12 ++
 drivers/acpi/nfit.c        | 216 +++++++++++++++++++++++++++++-
 drivers/acpi/nfit.h        |   3 +
 drivers/nvdimm/bus.c       | 326 ++++++++++++++++++++++++++++++++++++++++++++-
 drivers/nvdimm/core.c      |  16 +++
 drivers/nvdimm/dimm_devs.c |  38 +++++-
 drivers/nvdimm/nd-core.h   |   3 +
 include/linux/libnvdimm.h  |  27 +++-
 include/uapi/linux/Kbuild  |   1 +
 include/uapi/linux/ndctl.h | 178 +++++++++++++++++++++++++
 10 files changed, 810 insertions(+), 10 deletions(-)
 create mode 100644 include/uapi/linux/ndctl.h

(limited to 'include/uapi/linux')

diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig
index 300b4ef3712b..9c43ae301300 100644
--- a/drivers/acpi/Kconfig
+++ b/drivers/acpi/Kconfig
@@ -397,6 +397,18 @@ config ACPI_NFIT
 	  To compile this driver as a module, choose M here:
 	  the module will be called nfit.
 
+config ACPI_NFIT_DEBUG
+	bool "NFIT DSM debug"
+	depends on ACPI_NFIT
+	depends on DYNAMIC_DEBUG
+	default n
+	help
+	  Enabling this option causes the nfit driver to dump the
+	  input and output buffers of _DSM operations on the ACPI0012
+	  device and its children.  This can be very verbose, so leave
+	  it disabled unless you are debugging a hardware / firmware
+	  issue.
+
 source "drivers/acpi/apei/Kconfig"
 
 config ACPI_EXTLOG
diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
index 9fd7781f966e..9112a6210a4b 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit.c
@@ -13,6 +13,7 @@
 #include <linux/list_sort.h>
 #include <linux/libnvdimm.h>
 #include <linux/module.h>
+#include <linux/ndctl.h>
 #include <linux/list.h>
 #include <linux/acpi.h>
 #include "nfit.h"
@@ -24,11 +25,153 @@ static const u8 *to_nfit_uuid(enum nfit_uuids id)
 	return nfit_uuid[id];
 }
 
+static struct acpi_nfit_desc *to_acpi_nfit_desc(
+		struct nvdimm_bus_descriptor *nd_desc)
+{
+	return container_of(nd_desc, struct acpi_nfit_desc, nd_desc);
+}
+
+static struct acpi_device *to_acpi_dev(struct acpi_nfit_desc *acpi_desc)
+{
+	struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
+
+	/*
+	 * If provider == 'ACPI.NFIT' we can assume 'dev' is a struct
+	 * acpi_device.
+	 */
+	if (!nd_desc->provider_name
+			|| strcmp(nd_desc->provider_name, "ACPI.NFIT") != 0)
+		return NULL;
+
+	return to_acpi_device(acpi_desc->dev);
+}
+
 static int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc,
 		struct nvdimm *nvdimm, unsigned int cmd, void *buf,
 		unsigned int buf_len)
 {
-	return -ENOTTY;
+	struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
+	const struct nd_cmd_desc *desc = NULL;
+	union acpi_object in_obj, in_buf, *out_obj;
+	struct device *dev = acpi_desc->dev;
+	const char *cmd_name, *dimm_name;
+	unsigned long dsm_mask;
+	acpi_handle handle;
+	const u8 *uuid;
+	u32 offset;
+	int rc, i;
+
+	if (nvdimm) {
+		struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
+		struct acpi_device *adev = nfit_mem->adev;
+
+		if (!adev)
+			return -ENOTTY;
+		dimm_name = dev_name(&adev->dev);
+		cmd_name = nvdimm_cmd_name(cmd);
+		dsm_mask = nfit_mem->dsm_mask;
+		desc = nd_cmd_dimm_desc(cmd);
+		uuid = to_nfit_uuid(NFIT_DEV_DIMM);
+		handle = adev->handle;
+	} else {
+		struct acpi_device *adev = to_acpi_dev(acpi_desc);
+
+		cmd_name = nvdimm_bus_cmd_name(cmd);
+		dsm_mask = nd_desc->dsm_mask;
+		desc = nd_cmd_bus_desc(cmd);
+		uuid = to_nfit_uuid(NFIT_DEV_BUS);
+		handle = adev->handle;
+		dimm_name = "bus";
+	}
+
+	if (!desc || (cmd && (desc->out_num + desc->in_num == 0)))
+		return -ENOTTY;
+
+	if (!test_bit(cmd, &dsm_mask))
+		return -ENOTTY;
+
+	in_obj.type = ACPI_TYPE_PACKAGE;
+	in_obj.package.count = 1;
+	in_obj.package.elements = &in_buf;
+	in_buf.type = ACPI_TYPE_BUFFER;
+	in_buf.buffer.pointer = buf;
+	in_buf.buffer.length = 0;
+
+	/* libnvdimm has already validated the input envelope */
+	for (i = 0; i < desc->in_num; i++)
+		in_buf.buffer.length += nd_cmd_in_size(nvdimm, cmd, desc,
+				i, buf);
+
+	if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) {
+		dev_dbg(dev, "%s:%s cmd: %s input length: %d\n", __func__,
+				dimm_name, cmd_name, in_buf.buffer.length);
+		print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4,
+				4, in_buf.buffer.pointer, min_t(u32, 128,
+					in_buf.buffer.length), true);
+	}
+
+	out_obj = acpi_evaluate_dsm(handle, uuid, 1, cmd, &in_obj);
+	if (!out_obj) {
+		dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name,
+				cmd_name);
+		return -EINVAL;
+	}
+
+	if (out_obj->package.type != ACPI_TYPE_BUFFER) {
+		dev_dbg(dev, "%s:%s unexpected output object type cmd: %s type: %d\n",
+				__func__, dimm_name, cmd_name, out_obj->type);
+		rc = -EINVAL;
+		goto out;
+	}
+
+	if (IS_ENABLED(CONFIG_ACPI_NFIT_DEBUG)) {
+		dev_dbg(dev, "%s:%s cmd: %s output length: %d\n", __func__,
+				dimm_name, cmd_name, out_obj->buffer.length);
+		print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4,
+				4, out_obj->buffer.pointer, min_t(u32, 128,
+					out_obj->buffer.length), true);
+	}
+
+	for (i = 0, offset = 0; i < desc->out_num; i++) {
+		u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i, buf,
+				(u32 *) out_obj->buffer.pointer);
+
+		if (offset + out_size > out_obj->buffer.length) {
+			dev_dbg(dev, "%s:%s output object underflow cmd: %s field: %d\n",
+					__func__, dimm_name, cmd_name, i);
+			break;
+		}
+
+		if (in_buf.buffer.length + offset + out_size > buf_len) {
+			dev_dbg(dev, "%s:%s output overrun cmd: %s field: %d\n",
+					__func__, dimm_name, cmd_name, i);
+			rc = -ENXIO;
+			goto out;
+		}
+		memcpy(buf + in_buf.buffer.length + offset,
+				out_obj->buffer.pointer + offset, out_size);
+		offset += out_size;
+	}
+	if (offset + in_buf.buffer.length < buf_len) {
+		if (i >= 1) {
+			/*
+			 * status valid, return the number of bytes left
+			 * unfilled in the output buffer
+			 */
+			rc = buf_len - offset - in_buf.buffer.length;
+		} else {
+			dev_err(dev, "%s:%s underrun cmd: %s buf_len: %d out_len: %d\n",
+					__func__, dimm_name, cmd_name, buf_len,
+					offset);
+			rc = -ENXIO;
+		}
+	} else
+		rc = 0;
+
+ out:
+	ACPI_FREE(out_obj);
+
+	return rc;
 }
 
 static const char *spa_type_name(u16 type)
@@ -489,6 +632,7 @@ static struct attribute_group acpi_nfit_dimm_attribute_group = {
 };
 
 static const struct attribute_group *acpi_nfit_dimm_attribute_groups[] = {
+	&nvdimm_attribute_group,
 	&acpi_nfit_dimm_attribute_group,
 	NULL,
 };
@@ -505,6 +649,50 @@ static struct nvdimm *acpi_nfit_dimm_by_handle(struct acpi_nfit_desc *acpi_desc,
 	return NULL;
 }
 
+static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
+		struct nfit_mem *nfit_mem, u32 device_handle)
+{
+	struct acpi_device *adev, *adev_dimm;
+	struct device *dev = acpi_desc->dev;
+	const u8 *uuid = to_nfit_uuid(NFIT_DEV_DIMM);
+	unsigned long long sta;
+	int i, rc = -ENODEV;
+	acpi_status status;
+
+	nfit_mem->dsm_mask = acpi_desc->dimm_dsm_force_en;
+	adev = to_acpi_dev(acpi_desc);
+	if (!adev)
+		return 0;
+
+	adev_dimm = acpi_find_child_device(adev, device_handle, false);
+	nfit_mem->adev = adev_dimm;
+	if (!adev_dimm) {
+		dev_err(dev, "no ACPI.NFIT device with _ADR %#x, disabling...\n",
+				device_handle);
+		return -ENODEV;
+	}
+
+	status = acpi_evaluate_integer(adev_dimm->handle, "_STA", NULL, &sta);
+	if (status == AE_NOT_FOUND) {
+		dev_dbg(dev, "%s missing _STA, assuming enabled...\n",
+				dev_name(&adev_dimm->dev));
+		rc = 0;
+	} else if (ACPI_FAILURE(status))
+		dev_err(dev, "%s failed to retrieve_STA, disabling...\n",
+				dev_name(&adev_dimm->dev));
+	else if ((sta & ACPI_STA_DEVICE_ENABLED) == 0)
+		dev_info(dev, "%s disabled by firmware\n",
+				dev_name(&adev_dimm->dev));
+	else
+		rc = 0;
+
+	for (i = ND_CMD_SMART; i <= ND_CMD_VENDOR; i++)
+		if (acpi_check_dsm(adev_dimm->handle, uuid, 1, 1ULL << i))
+			set_bit(i, &nfit_mem->dsm_mask);
+
+	return rc;
+}
+
 static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
 {
 	struct nfit_mem *nfit_mem;
@@ -513,6 +701,7 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
 		struct nvdimm *nvdimm;
 		unsigned long flags = 0;
 		u32 device_handle;
+		int rc;
 
 		device_handle = __to_nfit_memdev(nfit_mem)->device_handle;
 		nvdimm = acpi_nfit_dimm_by_handle(acpi_desc, device_handle);
@@ -529,8 +718,13 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
 		if (nfit_mem->bdw && nfit_mem->memdev_pmem)
 			flags |= NDD_ALIASING;
 
+		rc = acpi_nfit_add_dimm(acpi_desc, nfit_mem, device_handle);
+		if (rc)
+			continue;
+
 		nvdimm = nvdimm_create(acpi_desc->nvdimm_bus, nfit_mem,
-				acpi_nfit_dimm_attribute_groups, flags);
+				acpi_nfit_dimm_attribute_groups,
+				flags, &nfit_mem->dsm_mask);
 		if (!nvdimm)
 			return -ENOMEM;
 
@@ -540,6 +734,22 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
 	return 0;
 }
 
+static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
+{
+	struct nvdimm_bus_descriptor *nd_desc = &acpi_desc->nd_desc;
+	const u8 *uuid = to_nfit_uuid(NFIT_DEV_BUS);
+	struct acpi_device *adev;
+	int i;
+
+	adev = to_acpi_dev(acpi_desc);
+	if (!adev)
+		return;
+
+	for (i = ND_CMD_ARS_CAP; i <= ND_CMD_ARS_STATUS; i++)
+		if (acpi_check_dsm(adev->handle, uuid, 1, 1ULL << i))
+			set_bit(i, &nd_desc->dsm_mask);
+}
+
 static int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz)
 {
 	struct device *dev = acpi_desc->dev;
@@ -567,6 +777,8 @@ static int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, acpi_size sz)
 	if (nfit_mem_init(acpi_desc) != 0)
 		return -ENOMEM;
 
+	acpi_nfit_init_dsms(acpi_desc);
+
 	return acpi_nfit_register_dimms(acpi_desc);
 }
 
diff --git a/drivers/acpi/nfit.h b/drivers/acpi/nfit.h
index 9dd437fe5563..b76e33629098 100644
--- a/drivers/acpi/nfit.h
+++ b/drivers/acpi/nfit.h
@@ -67,6 +67,8 @@ struct nfit_mem {
 	struct acpi_nfit_system_address *spa_dcr;
 	struct acpi_nfit_system_address *spa_bdw;
 	struct list_head list;
+	struct acpi_device *adev;
+	unsigned long dsm_mask;
 };
 
 struct acpi_nfit_desc {
@@ -79,6 +81,7 @@ struct acpi_nfit_desc {
 	struct list_head bdws;
 	struct nvdimm_bus *nvdimm_bus;
 	struct device *dev;
+	unsigned long dimm_dsm_force_en;
 };
 
 static inline struct acpi_nfit_memory_map *__to_nfit_memdev(
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index a8802577fb55..15f3a3ddc225 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -11,14 +11,18 @@
  * General Public License for more details.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/vmalloc.h>
 #include <linux/uaccess.h>
 #include <linux/fcntl.h>
 #include <linux/async.h>
+#include <linux/ndctl.h>
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/io.h>
+#include <linux/mm.h>
 #include "nd-core.h"
 
+int nvdimm_major;
 static int nvdimm_bus_major;
 static struct class *nd_class;
 
@@ -47,19 +51,325 @@ void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus)
 	device_destroy(nd_class, MKDEV(nvdimm_bus_major, nvdimm_bus->id));
 }
 
+static const struct nd_cmd_desc __nd_cmd_dimm_descs[] = {
+	[ND_CMD_IMPLEMENTED] = { },
+	[ND_CMD_SMART] = {
+		.out_num = 2,
+		.out_sizes = { 4, 8, },
+	},
+	[ND_CMD_SMART_THRESHOLD] = {
+		.out_num = 2,
+		.out_sizes = { 4, 8, },
+	},
+	[ND_CMD_DIMM_FLAGS] = {
+		.out_num = 2,
+		.out_sizes = { 4, 4 },
+	},
+	[ND_CMD_GET_CONFIG_SIZE] = {
+		.out_num = 3,
+		.out_sizes = { 4, 4, 4, },
+	},
+	[ND_CMD_GET_CONFIG_DATA] = {
+		.in_num = 2,
+		.in_sizes = { 4, 4, },
+		.out_num = 2,
+		.out_sizes = { 4, UINT_MAX, },
+	},
+	[ND_CMD_SET_CONFIG_DATA] = {
+		.in_num = 3,
+		.in_sizes = { 4, 4, UINT_MAX, },
+		.out_num = 1,
+		.out_sizes = { 4, },
+	},
+	[ND_CMD_VENDOR] = {
+		.in_num = 3,
+		.in_sizes = { 4, 4, UINT_MAX, },
+		.out_num = 3,
+		.out_sizes = { 4, 4, UINT_MAX, },
+	},
+};
+
+const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd)
+{
+	if (cmd < ARRAY_SIZE(__nd_cmd_dimm_descs))
+		return &__nd_cmd_dimm_descs[cmd];
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(nd_cmd_dimm_desc);
+
+static const struct nd_cmd_desc __nd_cmd_bus_descs[] = {
+	[ND_CMD_IMPLEMENTED] = { },
+	[ND_CMD_ARS_CAP] = {
+		.in_num = 2,
+		.in_sizes = { 8, 8, },
+		.out_num = 2,
+		.out_sizes = { 4, 4, },
+	},
+	[ND_CMD_ARS_START] = {
+		.in_num = 4,
+		.in_sizes = { 8, 8, 2, 6, },
+		.out_num = 1,
+		.out_sizes = { 4, },
+	},
+	[ND_CMD_ARS_STATUS] = {
+		.out_num = 2,
+		.out_sizes = { 4, UINT_MAX, },
+	},
+};
+
+const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd)
+{
+	if (cmd < ARRAY_SIZE(__nd_cmd_bus_descs))
+		return &__nd_cmd_bus_descs[cmd];
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(nd_cmd_bus_desc);
+
+u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd,
+		const struct nd_cmd_desc *desc, int idx, void *buf)
+{
+	if (idx >= desc->in_num)
+		return UINT_MAX;
+
+	if (desc->in_sizes[idx] < UINT_MAX)
+		return desc->in_sizes[idx];
+
+	if (nvdimm && cmd == ND_CMD_SET_CONFIG_DATA && idx == 2) {
+		struct nd_cmd_set_config_hdr *hdr = buf;
+
+		return hdr->in_length;
+	} else if (nvdimm && cmd == ND_CMD_VENDOR && idx == 2) {
+		struct nd_cmd_vendor_hdr *hdr = buf;
+
+		return hdr->in_length;
+	}
+
+	return UINT_MAX;
+}
+EXPORT_SYMBOL_GPL(nd_cmd_in_size);
+
+u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd,
+		const struct nd_cmd_desc *desc, int idx, const u32 *in_field,
+		const u32 *out_field)
+{
+	if (idx >= desc->out_num)
+		return UINT_MAX;
+
+	if (desc->out_sizes[idx] < UINT_MAX)
+		return desc->out_sizes[idx];
+
+	if (nvdimm && cmd == ND_CMD_GET_CONFIG_DATA && idx == 1)
+		return in_field[1];
+	else if (nvdimm && cmd == ND_CMD_VENDOR && idx == 2)
+		return out_field[1];
+	else if (!nvdimm && cmd == ND_CMD_ARS_STATUS && idx == 1)
+		return ND_CMD_ARS_STATUS_MAX;
+
+	return UINT_MAX;
+}
+EXPORT_SYMBOL_GPL(nd_cmd_out_size);
+
+static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
+		int read_only, unsigned int ioctl_cmd, unsigned long arg)
+{
+	struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
+	size_t buf_len = 0, in_len = 0, out_len = 0;
+	static char out_env[ND_CMD_MAX_ENVELOPE];
+	static char in_env[ND_CMD_MAX_ENVELOPE];
+	const struct nd_cmd_desc *desc = NULL;
+	unsigned int cmd = _IOC_NR(ioctl_cmd);
+	void __user *p = (void __user *) arg;
+	struct device *dev = &nvdimm_bus->dev;
+	const char *cmd_name, *dimm_name;
+	unsigned long dsm_mask;
+	void *buf;
+	int rc, i;
+
+	if (nvdimm) {
+		desc = nd_cmd_dimm_desc(cmd);
+		cmd_name = nvdimm_cmd_name(cmd);
+		dsm_mask = nvdimm->dsm_mask ? *(nvdimm->dsm_mask) : 0;
+		dimm_name = dev_name(&nvdimm->dev);
+	} else {
+		desc = nd_cmd_bus_desc(cmd);
+		cmd_name = nvdimm_bus_cmd_name(cmd);
+		dsm_mask = nd_desc->dsm_mask;
+		dimm_name = "bus";
+	}
+
+	if (!desc || (desc->out_num + desc->in_num == 0) ||
+			!test_bit(cmd, &dsm_mask))
+		return -ENOTTY;
+
+	/* fail write commands (when read-only) */
+	if (read_only)
+		switch (ioctl_cmd) {
+		case ND_IOCTL_VENDOR:
+		case ND_IOCTL_SET_CONFIG_DATA:
+		case ND_IOCTL_ARS_START:
+			dev_dbg(&nvdimm_bus->dev, "'%s' command while read-only.\n",
+					nvdimm ? nvdimm_cmd_name(cmd)
+					: nvdimm_bus_cmd_name(cmd));
+			return -EPERM;
+		default:
+			break;
+		}
+
+	/* process an input envelope */
+	for (i = 0; i < desc->in_num; i++) {
+		u32 in_size, copy;
+
+		in_size = nd_cmd_in_size(nvdimm, cmd, desc, i, in_env);
+		if (in_size == UINT_MAX) {
+			dev_err(dev, "%s:%s unknown input size cmd: %s field: %d\n",
+					__func__, dimm_name, cmd_name, i);
+			return -ENXIO;
+		}
+		if (!access_ok(VERIFY_READ, p + in_len, in_size))
+			return -EFAULT;
+		if (in_len < sizeof(in_env))
+			copy = min_t(u32, sizeof(in_env) - in_len, in_size);
+		else
+			copy = 0;
+		if (copy && copy_from_user(&in_env[in_len], p + in_len, copy))
+			return -EFAULT;
+		in_len += in_size;
+	}
+
+	/* process an output envelope */
+	for (i = 0; i < desc->out_num; i++) {
+		u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i,
+				(u32 *) in_env, (u32 *) out_env);
+		u32 copy;
+
+		if (out_size == UINT_MAX) {
+			dev_dbg(dev, "%s:%s unknown output size cmd: %s field: %d\n",
+					__func__, dimm_name, cmd_name, i);
+			return -EFAULT;
+		}
+		if (!access_ok(VERIFY_WRITE, p + in_len + out_len, out_size))
+			return -EFAULT;
+		if (out_len < sizeof(out_env))
+			copy = min_t(u32, sizeof(out_env) - out_len, out_size);
+		else
+			copy = 0;
+		if (copy && copy_from_user(&out_env[out_len],
+					p + in_len + out_len, copy))
+			return -EFAULT;
+		out_len += out_size;
+	}
+
+	buf_len = out_len + in_len;
+	if (!access_ok(VERIFY_WRITE, p, sizeof(buf_len)))
+		return -EFAULT;
+
+	if (buf_len > ND_IOCTL_MAX_BUFLEN) {
+		dev_dbg(dev, "%s:%s cmd: %s buf_len: %zu > %d\n", __func__,
+				dimm_name, cmd_name, buf_len,
+				ND_IOCTL_MAX_BUFLEN);
+		return -EINVAL;
+	}
+
+	buf = vmalloc(buf_len);
+	if (!buf)
+		return -ENOMEM;
+
+	if (copy_from_user(buf, p, buf_len)) {
+		rc = -EFAULT;
+		goto out;
+	}
+
+	rc = nd_desc->ndctl(nd_desc, nvdimm, cmd, buf, buf_len);
+	if (rc < 0)
+		goto out;
+	if (copy_to_user(p, buf, buf_len))
+		rc = -EFAULT;
+ out:
+	vfree(buf);
+	return rc;
+}
+
 static long nd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
-	return -ENXIO;
+	long id = (long) file->private_data;
+	int rc = -ENXIO, read_only;
+	struct nvdimm_bus *nvdimm_bus;
+
+	read_only = (O_RDWR != (file->f_flags & O_ACCMODE));
+	mutex_lock(&nvdimm_bus_list_mutex);
+	list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) {
+		if (nvdimm_bus->id == id) {
+			rc = __nd_ioctl(nvdimm_bus, NULL, read_only, cmd, arg);
+			break;
+		}
+	}
+	mutex_unlock(&nvdimm_bus_list_mutex);
+
+	return rc;
+}
+
+static int match_dimm(struct device *dev, void *data)
+{
+	long id = (long) data;
+
+	if (is_nvdimm(dev)) {
+		struct nvdimm *nvdimm = to_nvdimm(dev);
+
+		return nvdimm->id == id;
+	}
+
+	return 0;
+}
+
+static long nvdimm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	int rc = -ENXIO, read_only;
+	struct nvdimm_bus *nvdimm_bus;
+
+	read_only = (O_RDWR != (file->f_flags & O_ACCMODE));
+	mutex_lock(&nvdimm_bus_list_mutex);
+	list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) {
+		struct device *dev = device_find_child(&nvdimm_bus->dev,
+				file->private_data, match_dimm);
+		struct nvdimm *nvdimm;
+
+		if (!dev)
+			continue;
+
+		nvdimm = to_nvdimm(dev);
+		rc = __nd_ioctl(nvdimm_bus, nvdimm, read_only, cmd, arg);
+		put_device(dev);
+		break;
+	}
+	mutex_unlock(&nvdimm_bus_list_mutex);
+
+	return rc;
+}
+
+static int nd_open(struct inode *inode, struct file *file)
+{
+	long minor = iminor(inode);
+
+	file->private_data = (void *) minor;
+	return 0;
 }
 
 static const struct file_operations nvdimm_bus_fops = {
 	.owner = THIS_MODULE,
-	.open = nonseekable_open,
+	.open = nd_open,
 	.unlocked_ioctl = nd_ioctl,
 	.compat_ioctl = nd_ioctl,
 	.llseek = noop_llseek,
 };
 
+static const struct file_operations nvdimm_fops = {
+	.owner = THIS_MODULE,
+	.open = nd_open,
+	.unlocked_ioctl = nvdimm_ioctl,
+	.compat_ioctl = nvdimm_ioctl,
+	.llseek = noop_llseek,
+};
+
 int __init nvdimm_bus_init(void)
 {
 	int rc;
@@ -70,9 +380,14 @@ int __init nvdimm_bus_init(void)
 
 	rc = register_chrdev(0, "ndctl", &nvdimm_bus_fops);
 	if (rc < 0)
-		goto err_chrdev;
+		goto err_bus_chrdev;
 	nvdimm_bus_major = rc;
 
+	rc = register_chrdev(0, "dimmctl", &nvdimm_fops);
+	if (rc < 0)
+		goto err_dimm_chrdev;
+	nvdimm_major = rc;
+
 	nd_class = class_create(THIS_MODULE, "nd");
 	if (IS_ERR(nd_class))
 		goto err_class;
@@ -80,8 +395,10 @@ int __init nvdimm_bus_init(void)
 	return 0;
 
  err_class:
+	unregister_chrdev(nvdimm_major, "dimmctl");
+ err_dimm_chrdev:
 	unregister_chrdev(nvdimm_bus_major, "ndctl");
- err_chrdev:
+ err_bus_chrdev:
 	bus_unregister(&nvdimm_bus_type);
 
 	return rc;
@@ -91,5 +408,6 @@ void __exit nvdimm_bus_exit(void)
 {
 	class_destroy(nd_class);
 	unregister_chrdev(nvdimm_bus_major, "ndctl");
+	unregister_chrdev(nvdimm_major, "dimmctl");
 	bus_unregister(&nvdimm_bus_type);
 }
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index ef957eb37c90..1ce159095c52 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -14,6 +14,7 @@
 #include <linux/export.h>
 #include <linux/module.h>
 #include <linux/device.h>
+#include <linux/ndctl.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include "nd-core.h"
@@ -61,6 +62,20 @@ struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev)
 	return NULL;
 }
 
+static ssize_t commands_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	int cmd, len = 0;
+	struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
+	struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
+
+	for_each_set_bit(cmd, &nd_desc->dsm_mask, BITS_PER_LONG)
+		len += sprintf(buf + len, "%s ", nvdimm_bus_cmd_name(cmd));
+	len += sprintf(buf + len, "\n");
+	return len;
+}
+static DEVICE_ATTR_RO(commands);
+
 static const char *nvdimm_bus_provider(struct nvdimm_bus *nvdimm_bus)
 {
 	struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
@@ -84,6 +99,7 @@ static ssize_t provider_show(struct device *dev,
 static DEVICE_ATTR_RO(provider);
 
 static struct attribute *nvdimm_bus_attributes[] = {
+	&dev_attr_commands.attr,
 	&dev_attr_provider.attr,
 	NULL,
 };
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index 51ea52cc2079..c3dd7227d1bb 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -12,6 +12,7 @@
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/device.h>
+#include <linux/ndctl.h>
 #include <linux/slab.h>
 #include <linux/io.h>
 #include <linux/fs.h>
@@ -33,7 +34,7 @@ static struct device_type nvdimm_device_type = {
 	.release = nvdimm_release,
 };
 
-static bool is_nvdimm(struct device *dev)
+bool is_nvdimm(struct device *dev)
 {
 	return dev->type == &nvdimm_device_type;
 }
@@ -55,12 +56,41 @@ EXPORT_SYMBOL_GPL(nvdimm_name);
 
 void *nvdimm_provider_data(struct nvdimm *nvdimm)
 {
-	return nvdimm->provider_data;
+	if (nvdimm)
+		return nvdimm->provider_data;
+	return NULL;
 }
 EXPORT_SYMBOL_GPL(nvdimm_provider_data);
 
+static ssize_t commands_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nvdimm *nvdimm = to_nvdimm(dev);
+	int cmd, len = 0;
+
+	if (!nvdimm->dsm_mask)
+		return sprintf(buf, "\n");
+
+	for_each_set_bit(cmd, nvdimm->dsm_mask, BITS_PER_LONG)
+		len += sprintf(buf + len, "%s ", nvdimm_cmd_name(cmd));
+	len += sprintf(buf + len, "\n");
+	return len;
+}
+static DEVICE_ATTR_RO(commands);
+
+static struct attribute *nvdimm_attributes[] = {
+	&dev_attr_commands.attr,
+	NULL,
+};
+
+struct attribute_group nvdimm_attribute_group = {
+	.attrs = nvdimm_attributes,
+};
+EXPORT_SYMBOL_GPL(nvdimm_attribute_group);
+
 struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
-		const struct attribute_group **groups, unsigned long flags)
+		const struct attribute_group **groups, unsigned long flags,
+		unsigned long *dsm_mask)
 {
 	struct nvdimm *nvdimm = kzalloc(sizeof(*nvdimm), GFP_KERNEL);
 	struct device *dev;
@@ -75,12 +105,14 @@ struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
 	}
 	nvdimm->provider_data = provider_data;
 	nvdimm->flags = flags;
+	nvdimm->dsm_mask = dsm_mask;
 
 	dev = &nvdimm->dev;
 	dev_set_name(dev, "nmem%d", nvdimm->id);
 	dev->parent = &nvdimm_bus->dev;
 	dev->type = &nvdimm_device_type;
 	dev->bus = &nvdimm_bus_type;
+	dev->devt = MKDEV(nvdimm_major, nvdimm->id);
 	dev->groups = groups;
 	if (device_register(dev) != 0) {
 		put_device(dev);
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index 9b8303413b60..59528b3c9de8 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -18,6 +18,7 @@
 extern struct list_head nvdimm_bus_list;
 extern struct mutex nvdimm_bus_list_mutex;
 extern struct bus_type nvdimm_bus_type;
+extern int nvdimm_major;
 
 struct nvdimm_bus {
 	struct nvdimm_bus_descriptor *nd_desc;
@@ -29,10 +30,12 @@ struct nvdimm_bus {
 struct nvdimm {
 	unsigned long flags;
 	void *provider_data;
+	unsigned long *dsm_mask;
 	struct device dev;
 	int id;
 };
 
+bool is_nvdimm(struct device *dev);
 struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev);
 int __init nvdimm_bus_init(void);
 void __exit nvdimm_bus_exit(void);
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 07787f0dd7de..a39235819af3 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -14,13 +14,22 @@
  */
 #ifndef __LIBNVDIMM_H__
 #define __LIBNVDIMM_H__
+#include <linux/sizes.h>
+#include <linux/types.h>
 
 enum {
 	/* when a dimm supports both PMEM and BLK access a label is required */
 	NDD_ALIASING = 1 << 0,
+
+	/* need to set a limit somewhere, but yes, this is likely overkill */
+	ND_IOCTL_MAX_BUFLEN = SZ_4M,
+	ND_CMD_MAX_ELEM = 4,
+	ND_CMD_MAX_ENVELOPE = 16,
+	ND_CMD_ARS_STATUS_MAX = SZ_4K,
 };
 
 extern struct attribute_group nvdimm_bus_attribute_group;
+extern struct attribute_group nvdimm_attribute_group;
 
 struct nvdimm;
 struct nvdimm_bus_descriptor;
@@ -35,6 +44,14 @@ struct nvdimm_bus_descriptor {
 	ndctl_fn ndctl;
 };
 
+struct nd_cmd_desc {
+	int in_num;
+	int out_num;
+	u32 in_sizes[ND_CMD_MAX_ELEM];
+	int out_sizes[ND_CMD_MAX_ELEM];
+};
+
+struct nvdimm_bus;
 struct device;
 struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
 		struct nvdimm_bus_descriptor *nfit_desc);
@@ -45,5 +62,13 @@ struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus);
 const char *nvdimm_name(struct nvdimm *nvdimm);
 void *nvdimm_provider_data(struct nvdimm *nvdimm);
 struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
-		const struct attribute_group **groups, unsigned long flags);
+		const struct attribute_group **groups, unsigned long flags,
+		unsigned long *dsm_mask);
+const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd);
+const struct nd_cmd_desc *nd_cmd_bus_desc(int cmd);
+u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd,
+		const struct nd_cmd_desc *desc, int idx, void *buf);
+u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd,
+		const struct nd_cmd_desc *desc, int idx, const u32 *in_field,
+		const u32 *out_field);
 #endif /* __LIBNVDIMM_H__ */
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 1a0006a76b00..200cc5ea2998 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -271,6 +271,7 @@ header-y += ncp_fs.h
 header-y += ncp.h
 header-y += ncp_mount.h
 header-y += ncp_no.h
+header-y += ndctl.h
 header-y += neighbour.h
 header-y += netconf.h
 header-y += netdevice.h
diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
new file mode 100644
index 000000000000..ff13c23b26df
--- /dev/null
+++ b/include/uapi/linux/ndctl.h
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 2014-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU Lesser General Public License,
+ * version 2.1, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License for
+ * more details.
+ */
+#ifndef __NDCTL_H__
+#define __NDCTL_H__
+
+#include <linux/types.h>
+
+struct nd_cmd_smart {
+	__u32 status;
+	__u8 data[128];
+} __packed;
+
+struct nd_cmd_smart_threshold {
+	__u32 status;
+	__u8 data[8];
+} __packed;
+
+struct nd_cmd_dimm_flags {
+	__u32 status;
+	__u32 flags;
+} __packed;
+
+struct nd_cmd_get_config_size {
+	__u32 status;
+	__u32 config_size;
+	__u32 max_xfer;
+} __packed;
+
+struct nd_cmd_get_config_data_hdr {
+	__u32 in_offset;
+	__u32 in_length;
+	__u32 status;
+	__u8 out_buf[0];
+} __packed;
+
+struct nd_cmd_set_config_hdr {
+	__u32 in_offset;
+	__u32 in_length;
+	__u8 in_buf[0];
+} __packed;
+
+struct nd_cmd_vendor_hdr {
+	__u32 opcode;
+	__u32 in_length;
+	__u8 in_buf[0];
+} __packed;
+
+struct nd_cmd_vendor_tail {
+	__u32 status;
+	__u32 out_length;
+	__u8 out_buf[0];
+} __packed;
+
+struct nd_cmd_ars_cap {
+	__u64 address;
+	__u64 length;
+	__u32 status;
+	__u32 max_ars_out;
+} __packed;
+
+struct nd_cmd_ars_start {
+	__u64 address;
+	__u64 length;
+	__u16 type;
+	__u8 reserved[6];
+	__u32 status;
+} __packed;
+
+struct nd_cmd_ars_status {
+	__u32 status;
+	__u32 out_length;
+	__u64 address;
+	__u64 length;
+	__u16 type;
+	__u32 num_records;
+	struct nd_ars_record {
+		__u32 handle;
+		__u32 flags;
+		__u64 err_address;
+		__u64 mask;
+	} __packed records[0];
+} __packed;
+
+enum {
+	ND_CMD_IMPLEMENTED = 0,
+
+	/* bus commands */
+	ND_CMD_ARS_CAP = 1,
+	ND_CMD_ARS_START = 2,
+	ND_CMD_ARS_STATUS = 3,
+
+	/* per-dimm commands */
+	ND_CMD_SMART = 1,
+	ND_CMD_SMART_THRESHOLD = 2,
+	ND_CMD_DIMM_FLAGS = 3,
+	ND_CMD_GET_CONFIG_SIZE = 4,
+	ND_CMD_GET_CONFIG_DATA = 5,
+	ND_CMD_SET_CONFIG_DATA = 6,
+	ND_CMD_VENDOR_EFFECT_LOG_SIZE = 7,
+	ND_CMD_VENDOR_EFFECT_LOG = 8,
+	ND_CMD_VENDOR = 9,
+};
+
+static inline const char *nvdimm_bus_cmd_name(unsigned cmd)
+{
+	static const char * const names[] = {
+		[ND_CMD_ARS_CAP] = "ars_cap",
+		[ND_CMD_ARS_START] = "ars_start",
+		[ND_CMD_ARS_STATUS] = "ars_status",
+	};
+
+	if (cmd < ARRAY_SIZE(names) && names[cmd])
+		return names[cmd];
+	return "unknown";
+}
+
+static inline const char *nvdimm_cmd_name(unsigned cmd)
+{
+	static const char * const names[] = {
+		[ND_CMD_SMART] = "smart",
+		[ND_CMD_SMART_THRESHOLD] = "smart_thresh",
+		[ND_CMD_DIMM_FLAGS] = "flags",
+		[ND_CMD_GET_CONFIG_SIZE] = "get_size",
+		[ND_CMD_GET_CONFIG_DATA] = "get_data",
+		[ND_CMD_SET_CONFIG_DATA] = "set_data",
+		[ND_CMD_VENDOR_EFFECT_LOG_SIZE] = "effect_size",
+		[ND_CMD_VENDOR_EFFECT_LOG] = "effect_log",
+		[ND_CMD_VENDOR] = "vendor",
+	};
+
+	if (cmd < ARRAY_SIZE(names) && names[cmd])
+		return names[cmd];
+	return "unknown";
+}
+
+#define ND_IOCTL 'N'
+
+#define ND_IOCTL_SMART			_IOWR(ND_IOCTL, ND_CMD_SMART,\
+					struct nd_cmd_smart)
+
+#define ND_IOCTL_SMART_THRESHOLD	_IOWR(ND_IOCTL, ND_CMD_SMART_THRESHOLD,\
+					struct nd_cmd_smart_threshold)
+
+#define ND_IOCTL_DIMM_FLAGS		_IOWR(ND_IOCTL, ND_CMD_DIMM_FLAGS,\
+					struct nd_cmd_dimm_flags)
+
+#define ND_IOCTL_GET_CONFIG_SIZE	_IOWR(ND_IOCTL, ND_CMD_GET_CONFIG_SIZE,\
+					struct nd_cmd_get_config_size)
+
+#define ND_IOCTL_GET_CONFIG_DATA	_IOWR(ND_IOCTL, ND_CMD_GET_CONFIG_DATA,\
+					struct nd_cmd_get_config_data_hdr)
+
+#define ND_IOCTL_SET_CONFIG_DATA	_IOWR(ND_IOCTL, ND_CMD_SET_CONFIG_DATA,\
+					struct nd_cmd_set_config_hdr)
+
+#define ND_IOCTL_VENDOR			_IOWR(ND_IOCTL, ND_CMD_VENDOR,\
+					struct nd_cmd_vendor_hdr)
+
+#define ND_IOCTL_ARS_CAP		_IOWR(ND_IOCTL, ND_CMD_ARS_CAP,\
+					struct nd_cmd_ars_cap)
+
+#define ND_IOCTL_ARS_START		_IOWR(ND_IOCTL, ND_CMD_ARS_START,\
+					struct nd_cmd_ars_start)
+
+#define ND_IOCTL_ARS_STATUS		_IOWR(ND_IOCTL, ND_CMD_ARS_STATUS,\
+					struct nd_cmd_ars_status)
+
+#endif /* __NDCTL_H__ */
-- 
cgit 


From 4d88a97aa9e8cfa6460aab119c5da60ad2267423 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sun, 31 May 2015 14:41:48 -0400
Subject: libnvdimm, nvdimm: dimm driver and base libnvdimm device-driver
 infrastructure

* Implement the device-model infrastructure for loading modules and
  attaching drivers to nvdimm devices.  This is a simple association of a
  nd-device-type number with a driver that has a bitmask of supported
  device types.  To facilitate userspace bind/unbind operations 'modalias'
  and 'devtype', that also appear in the uevent, are added as generic
  sysfs attributes for all nvdimm devices.  The reason for the device-type
  number is to support sub-types within a given parent devtype, be it a
  vendor-specific sub-type or otherwise.

* The first consumer of this infrastructure is the driver
  for dimm devices.  It simply uses control messages to retrieve and
  store the configuration-data image (label set) from each dimm.

Note: nd_device_register() arranges for asynchronous registration of
      nvdimm bus devices by default.

Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Neil Brown <neilb@suse.de>
Acked-by: Christoph Hellwig <hch@lst.de>
Tested-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit.c        |  13 +++-
 drivers/nvdimm/Makefile    |   1 +
 drivers/nvdimm/bus.c       | 168 ++++++++++++++++++++++++++++++++++++++++++++-
 drivers/nvdimm/core.c      |  43 +++++++++++-
 drivers/nvdimm/dimm.c      |  92 +++++++++++++++++++++++++
 drivers/nvdimm/dimm_devs.c | 136 ++++++++++++++++++++++++++++++++++--
 drivers/nvdimm/nd-core.h   |   6 +-
 drivers/nvdimm/nd.h        |  36 ++++++++++
 include/linux/libnvdimm.h  |   2 +
 include/linux/nd.h         |  39 +++++++++++
 include/uapi/linux/ndctl.h |   6 ++
 11 files changed, 527 insertions(+), 15 deletions(-)
 create mode 100644 drivers/nvdimm/dimm.c
 create mode 100644 drivers/nvdimm/nd.h
 create mode 100644 include/linux/nd.h

(limited to 'include/uapi/linux')

diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
index 9112a6210a4b..c4ccec1bc60b 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit.c
@@ -18,6 +18,10 @@
 #include <linux/acpi.h>
 #include "nfit.h"
 
+static bool force_enable_dimms;
+module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status");
+
 static u8 nfit_uuid[NFIT_UUID_MAX][16];
 
 static const u8 *to_nfit_uuid(enum nfit_uuids id)
@@ -633,6 +637,7 @@ static struct attribute_group acpi_nfit_dimm_attribute_group = {
 
 static const struct attribute_group *acpi_nfit_dimm_attribute_groups[] = {
 	&nvdimm_attribute_group,
+	&nd_device_attribute_group,
 	&acpi_nfit_dimm_attribute_group,
 	NULL,
 };
@@ -669,7 +674,7 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 	if (!adev_dimm) {
 		dev_err(dev, "no ACPI.NFIT device with _ADR %#x, disabling...\n",
 				device_handle);
-		return -ENODEV;
+		return force_enable_dimms ? 0 : -ENODEV;
 	}
 
 	status = acpi_evaluate_integer(adev_dimm->handle, "_STA", NULL, &sta);
@@ -690,12 +695,13 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 		if (acpi_check_dsm(adev_dimm->handle, uuid, 1, 1ULL << i))
 			set_bit(i, &nfit_mem->dsm_mask);
 
-	return rc;
+	return force_enable_dimms ? 0 : rc;
 }
 
 static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
 {
 	struct nfit_mem *nfit_mem;
+	int dimm_count = 0;
 
 	list_for_each_entry(nfit_mem, &acpi_desc->dimms, list) {
 		struct nvdimm *nvdimm;
@@ -729,9 +735,10 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
 			return -ENOMEM;
 
 		nfit_mem->nvdimm = nvdimm;
+		dimm_count++;
 	}
 
-	return 0;
+	return nvdimm_bus_check_dimm_count(acpi_desc->nvdimm_bus, dimm_count);
 }
 
 static void acpi_nfit_init_dsms(struct acpi_nfit_desc *acpi_desc)
diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile
index 5b68738ba406..d44b5c1fcd3b 100644
--- a/drivers/nvdimm/Makefile
+++ b/drivers/nvdimm/Makefile
@@ -3,3 +3,4 @@ obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o
 libnvdimm-y := core.o
 libnvdimm-y += bus.o
 libnvdimm-y += dimm_devs.o
+libnvdimm-y += dimm.o
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 15f3a3ddc225..a0308f1872bf 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -16,19 +16,183 @@
 #include <linux/fcntl.h>
 #include <linux/async.h>
 #include <linux/ndctl.h>
+#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/io.h>
 #include <linux/mm.h>
+#include <linux/nd.h>
 #include "nd-core.h"
+#include "nd.h"
 
 int nvdimm_major;
 static int nvdimm_bus_major;
 static struct class *nd_class;
 
-struct bus_type nvdimm_bus_type = {
+static int to_nd_device_type(struct device *dev)
+{
+	if (is_nvdimm(dev))
+		return ND_DEVICE_DIMM;
+
+	return 0;
+}
+
+static int nvdimm_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+	return add_uevent_var(env, "MODALIAS=" ND_DEVICE_MODALIAS_FMT,
+			to_nd_device_type(dev));
+}
+
+static int nvdimm_bus_match(struct device *dev, struct device_driver *drv)
+{
+	struct nd_device_driver *nd_drv = to_nd_device_driver(drv);
+
+	return test_bit(to_nd_device_type(dev), &nd_drv->type);
+}
+
+static int nvdimm_bus_probe(struct device *dev)
+{
+	struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver);
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+	int rc;
+
+	rc = nd_drv->probe(dev);
+	dev_dbg(&nvdimm_bus->dev, "%s.probe(%s) = %d\n", dev->driver->name,
+			dev_name(dev), rc);
+	return rc;
+}
+
+static int nvdimm_bus_remove(struct device *dev)
+{
+	struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver);
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+	int rc;
+
+	rc = nd_drv->remove(dev);
+	dev_dbg(&nvdimm_bus->dev, "%s.remove(%s) = %d\n", dev->driver->name,
+			dev_name(dev), rc);
+	return rc;
+}
+
+static struct bus_type nvdimm_bus_type = {
 	.name = "nd",
+	.uevent = nvdimm_bus_uevent,
+	.match = nvdimm_bus_match,
+	.probe = nvdimm_bus_probe,
+	.remove = nvdimm_bus_remove,
+};
+
+static ASYNC_DOMAIN_EXCLUSIVE(nd_async_domain);
+
+void nd_synchronize(void)
+{
+	async_synchronize_full_domain(&nd_async_domain);
+}
+EXPORT_SYMBOL_GPL(nd_synchronize);
+
+static void nd_async_device_register(void *d, async_cookie_t cookie)
+{
+	struct device *dev = d;
+
+	if (device_add(dev) != 0) {
+		dev_err(dev, "%s: failed\n", __func__);
+		put_device(dev);
+	}
+	put_device(dev);
+}
+
+static void nd_async_device_unregister(void *d, async_cookie_t cookie)
+{
+	struct device *dev = d;
+
+	device_unregister(dev);
+	put_device(dev);
+}
+
+void nd_device_register(struct device *dev)
+{
+	dev->bus = &nvdimm_bus_type;
+	device_initialize(dev);
+	get_device(dev);
+	async_schedule_domain(nd_async_device_register, dev,
+			&nd_async_domain);
+}
+EXPORT_SYMBOL(nd_device_register);
+
+void nd_device_unregister(struct device *dev, enum nd_async_mode mode)
+{
+	switch (mode) {
+	case ND_ASYNC:
+		get_device(dev);
+		async_schedule_domain(nd_async_device_unregister, dev,
+				&nd_async_domain);
+		break;
+	case ND_SYNC:
+		nd_synchronize();
+		device_unregister(dev);
+		break;
+	}
+}
+EXPORT_SYMBOL(nd_device_unregister);
+
+/**
+ * __nd_driver_register() - register a region or a namespace driver
+ * @nd_drv: driver to register
+ * @owner: automatically set by nd_driver_register() macro
+ * @mod_name: automatically set by nd_driver_register() macro
+ */
+int __nd_driver_register(struct nd_device_driver *nd_drv, struct module *owner,
+		const char *mod_name)
+{
+	struct device_driver *drv = &nd_drv->drv;
+
+	if (!nd_drv->type) {
+		pr_debug("driver type bitmask not set (%pf)\n",
+				__builtin_return_address(0));
+		return -EINVAL;
+	}
+
+	if (!nd_drv->probe || !nd_drv->remove) {
+		pr_debug("->probe() and ->remove() must be specified\n");
+		return -EINVAL;
+	}
+
+	drv->bus = &nvdimm_bus_type;
+	drv->owner = owner;
+	drv->mod_name = mod_name;
+
+	return driver_register(drv);
+}
+EXPORT_SYMBOL(__nd_driver_register);
+
+static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
+		char *buf)
+{
+	return sprintf(buf, ND_DEVICE_MODALIAS_FMT "\n",
+			to_nd_device_type(dev));
+}
+static DEVICE_ATTR_RO(modalias);
+
+static ssize_t devtype_show(struct device *dev, struct device_attribute *attr,
+		char *buf)
+{
+	return sprintf(buf, "%s\n", dev->type->name);
+}
+static DEVICE_ATTR_RO(devtype);
+
+static struct attribute *nd_device_attributes[] = {
+	&dev_attr_modalias.attr,
+	&dev_attr_devtype.attr,
+	NULL,
+};
+
+/**
+ * nd_device_attribute_group - generic attributes for all devices on an nd bus
+ */
+struct attribute_group nd_device_attribute_group = {
+	.attrs = nd_device_attributes,
 };
+EXPORT_SYMBOL_GPL(nd_device_attribute_group);
 
 int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus)
 {
@@ -404,7 +568,7 @@ int __init nvdimm_bus_init(void)
 	return rc;
 }
 
-void __exit nvdimm_bus_exit(void)
+void nvdimm_bus_exit(void)
 {
 	class_destroy(nd_class);
 	unregister_chrdev(nvdimm_bus_major, "ndctl");
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 1ce159095c52..50ab880f0dc0 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -18,6 +18,7 @@
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include "nd-core.h"
+#include "nd.h"
 
 LIST_HEAD(nvdimm_bus_list);
 DEFINE_MUTEX(nvdimm_bus_list_mutex);
@@ -98,8 +99,33 @@ static ssize_t provider_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(provider);
 
+static int flush_namespaces(struct device *dev, void *data)
+{
+	device_lock(dev);
+	device_unlock(dev);
+	return 0;
+}
+
+static int flush_regions_dimms(struct device *dev, void *data)
+{
+	device_lock(dev);
+	device_unlock(dev);
+	device_for_each_child(dev, NULL, flush_namespaces);
+	return 0;
+}
+
+static ssize_t wait_probe_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	nd_synchronize();
+	device_for_each_child(dev, NULL, flush_regions_dimms);
+	return sprintf(buf, "1\n");
+}
+static DEVICE_ATTR_RO(wait_probe);
+
 static struct attribute *nvdimm_bus_attributes[] = {
 	&dev_attr_commands.attr,
+	&dev_attr_wait_probe.attr,
 	&dev_attr_provider.attr,
 	NULL,
 };
@@ -161,7 +187,7 @@ static int child_unregister(struct device *dev, void *data)
 	if (dev->class)
 		/* pass */;
 	else
-		device_unregister(dev);
+		nd_device_unregister(dev, ND_SYNC);
 	return 0;
 }
 
@@ -174,6 +200,7 @@ void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus)
 	list_del_init(&nvdimm_bus->list);
 	mutex_unlock(&nvdimm_bus_list_mutex);
 
+	nd_synchronize();
 	device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
 	nvdimm_bus_destroy_ndctl(nvdimm_bus);
 
@@ -183,12 +210,24 @@ EXPORT_SYMBOL_GPL(nvdimm_bus_unregister);
 
 static __init int libnvdimm_init(void)
 {
-	return nvdimm_bus_init();
+	int rc;
+
+	rc = nvdimm_bus_init();
+	if (rc)
+		return rc;
+	rc = nvdimm_init();
+	if (rc)
+		goto err_dimm;
+	return 0;
+ err_dimm:
+	nvdimm_bus_exit();
+	return rc;
 }
 
 static __exit void libnvdimm_exit(void)
 {
 	WARN_ON(!list_empty(&nvdimm_bus_list));
+	nvdimm_exit();
 	nvdimm_bus_exit();
 }
 
diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c
new file mode 100644
index 000000000000..28001a6ccd4e
--- /dev/null
+++ b/drivers/nvdimm/dimm.c
@@ -0,0 +1,92 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/vmalloc.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/sizes.h>
+#include <linux/ndctl.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/nd.h>
+#include "nd.h"
+
+static void free_data(struct nvdimm_drvdata *ndd)
+{
+	if (!ndd)
+		return;
+
+	if (ndd->data && is_vmalloc_addr(ndd->data))
+		vfree(ndd->data);
+	else
+		kfree(ndd->data);
+	kfree(ndd);
+}
+
+static int nvdimm_probe(struct device *dev)
+{
+	struct nvdimm_drvdata *ndd;
+	int rc;
+
+	ndd = kzalloc(sizeof(*ndd), GFP_KERNEL);
+	if (!ndd)
+		return -ENOMEM;
+
+	dev_set_drvdata(dev, ndd);
+	ndd->dev = dev;
+
+	rc = nvdimm_init_nsarea(ndd);
+	if (rc)
+		goto err;
+
+	rc = nvdimm_init_config_data(ndd);
+	if (rc)
+		goto err;
+
+	dev_dbg(dev, "config data size: %d\n", ndd->nsarea.config_size);
+
+	return 0;
+
+ err:
+	free_data(ndd);
+	return rc;
+}
+
+static int nvdimm_remove(struct device *dev)
+{
+	struct nvdimm_drvdata *ndd = dev_get_drvdata(dev);
+
+	free_data(ndd);
+
+	return 0;
+}
+
+static struct nd_device_driver nvdimm_driver = {
+	.probe = nvdimm_probe,
+	.remove = nvdimm_remove,
+	.drv = {
+		.name = "nvdimm",
+	},
+	.type = ND_DRIVER_DIMM,
+};
+
+int __init nvdimm_init(void)
+{
+	return nd_driver_register(&nvdimm_driver);
+}
+
+void __exit nvdimm_exit(void)
+{
+	driver_unregister(&nvdimm_driver.drv);
+}
+
+MODULE_ALIAS_ND_DEVICE(ND_DEVICE_DIMM);
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index c3dd7227d1bb..b3ae86f2e1da 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -11,6 +11,7 @@
  * General Public License for more details.
  */
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/vmalloc.h>
 #include <linux/device.h>
 #include <linux/ndctl.h>
 #include <linux/slab.h>
@@ -18,9 +19,115 @@
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include "nd-core.h"
+#include "nd.h"
 
 static DEFINE_IDA(dimm_ida);
 
+/*
+ * Retrieve bus and dimm handle and return if this bus supports
+ * get_config_data commands
+ */
+static int __validate_dimm(struct nvdimm_drvdata *ndd)
+{
+	struct nvdimm *nvdimm;
+
+	if (!ndd)
+		return -EINVAL;
+
+	nvdimm = to_nvdimm(ndd->dev);
+
+	if (!nvdimm->dsm_mask)
+		return -ENXIO;
+	if (!test_bit(ND_CMD_GET_CONFIG_DATA, nvdimm->dsm_mask))
+		return -ENXIO;
+
+	return 0;
+}
+
+static int validate_dimm(struct nvdimm_drvdata *ndd)
+{
+	int rc = __validate_dimm(ndd);
+
+	if (rc && ndd)
+		dev_dbg(ndd->dev, "%pf: %s error: %d\n",
+				__builtin_return_address(0), __func__, rc);
+	return rc;
+}
+
+/**
+ * nvdimm_init_nsarea - determine the geometry of a dimm's namespace area
+ * @nvdimm: dimm to initialize
+ */
+int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd)
+{
+	struct nd_cmd_get_config_size *cmd = &ndd->nsarea;
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev);
+	struct nvdimm_bus_descriptor *nd_desc;
+	int rc = validate_dimm(ndd);
+
+	if (rc)
+		return rc;
+
+	if (cmd->config_size)
+		return 0; /* already valid */
+
+	memset(cmd, 0, sizeof(*cmd));
+	nd_desc = nvdimm_bus->nd_desc;
+	return nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev),
+			ND_CMD_GET_CONFIG_SIZE, cmd, sizeof(*cmd));
+}
+
+int nvdimm_init_config_data(struct nvdimm_drvdata *ndd)
+{
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(ndd->dev);
+	struct nd_cmd_get_config_data_hdr *cmd;
+	struct nvdimm_bus_descriptor *nd_desc;
+	int rc = validate_dimm(ndd);
+	u32 max_cmd_size, config_size;
+	size_t offset;
+
+	if (rc)
+		return rc;
+
+	if (ndd->data)
+		return 0;
+
+	if (ndd->nsarea.status || ndd->nsarea.max_xfer == 0)
+		return -ENXIO;
+
+	ndd->data = kmalloc(ndd->nsarea.config_size, GFP_KERNEL);
+	if (!ndd->data)
+		ndd->data = vmalloc(ndd->nsarea.config_size);
+
+	if (!ndd->data)
+		return -ENOMEM;
+
+	max_cmd_size = min_t(u32, PAGE_SIZE, ndd->nsarea.max_xfer);
+	cmd = kzalloc(max_cmd_size + sizeof(*cmd), GFP_KERNEL);
+	if (!cmd)
+		return -ENOMEM;
+
+	nd_desc = nvdimm_bus->nd_desc;
+	for (config_size = ndd->nsarea.config_size, offset = 0;
+			config_size; config_size -= cmd->in_length,
+			offset += cmd->in_length) {
+		cmd->in_length = min(config_size, max_cmd_size);
+		cmd->in_offset = offset;
+		rc = nd_desc->ndctl(nd_desc, to_nvdimm(ndd->dev),
+				ND_CMD_GET_CONFIG_DATA, cmd,
+				cmd->in_length + sizeof(*cmd));
+		if (rc || cmd->status) {
+			rc = -ENXIO;
+			break;
+		}
+		memcpy(ndd->data + offset, cmd->out_buf, cmd->in_length);
+	}
+	dev_dbg(ndd->dev, "%s: len: %zu rc: %d\n", __func__, offset, rc);
+	kfree(cmd);
+
+	return rc;
+}
+
 static void nvdimm_release(struct device *dev)
 {
 	struct nvdimm *nvdimm = to_nvdimm(dev);
@@ -111,14 +218,33 @@ struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
 	dev_set_name(dev, "nmem%d", nvdimm->id);
 	dev->parent = &nvdimm_bus->dev;
 	dev->type = &nvdimm_device_type;
-	dev->bus = &nvdimm_bus_type;
 	dev->devt = MKDEV(nvdimm_major, nvdimm->id);
 	dev->groups = groups;
-	if (device_register(dev) != 0) {
-		put_device(dev);
-		return NULL;
-	}
+	nd_device_register(dev);
 
 	return nvdimm;
 }
 EXPORT_SYMBOL_GPL(nvdimm_create);
+
+static int count_dimms(struct device *dev, void *c)
+{
+	int *count = c;
+
+	if (is_nvdimm(dev))
+		(*count)++;
+	return 0;
+}
+
+int nvdimm_bus_check_dimm_count(struct nvdimm_bus *nvdimm_bus, int dimm_count)
+{
+	int count = 0;
+	/* Flush any possible dimm registration failures */
+	nd_synchronize();
+
+	device_for_each_child(&nvdimm_bus->dev, &count, count_dimms);
+	dev_dbg(&nvdimm_bus->dev, "%s: count: %d\n", __func__, count);
+	if (count != dimm_count)
+		return -ENXIO;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(nvdimm_bus_check_dimm_count);
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index 59528b3c9de8..f2004b790874 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -17,7 +17,6 @@
 
 extern struct list_head nvdimm_bus_list;
 extern struct mutex nvdimm_bus_list_mutex;
-extern struct bus_type nvdimm_bus_type;
 extern int nvdimm_major;
 
 struct nvdimm_bus {
@@ -35,10 +34,11 @@ struct nvdimm {
 	int id;
 };
 
-bool is_nvdimm(struct device *dev);
 struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev);
 int __init nvdimm_bus_init(void);
-void __exit nvdimm_bus_exit(void);
+void nvdimm_bus_exit(void);
 int nvdimm_bus_create_ndctl(struct nvdimm_bus *nvdimm_bus);
 void nvdimm_bus_destroy_ndctl(struct nvdimm_bus *nvdimm_bus);
+void nd_synchronize(void);
+bool is_nvdimm(struct device *dev);
 #endif /* __ND_CORE_H__ */
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
new file mode 100644
index 000000000000..1f7f6ecab0fc
--- /dev/null
+++ b/drivers/nvdimm/nd.h
@@ -0,0 +1,36 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef __ND_H__
+#define __ND_H__
+#include <linux/device.h>
+#include <linux/mutex.h>
+#include <linux/ndctl.h>
+
+struct nvdimm_drvdata {
+	struct device *dev;
+	struct nd_cmd_get_config_size nsarea;
+	void *data;
+};
+
+enum nd_async_mode {
+	ND_SYNC,
+	ND_ASYNC,
+};
+
+void nd_device_register(struct device *dev);
+void nd_device_unregister(struct device *dev, enum nd_async_mode mode);
+int __init nvdimm_init(void);
+void nvdimm_exit(void);
+int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd);
+int nvdimm_init_config_data(struct nvdimm_drvdata *ndd);
+#endif /* __ND_H__ */
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index a39235819af3..d3ebccf4ea8b 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -30,6 +30,7 @@ enum {
 
 extern struct attribute_group nvdimm_bus_attribute_group;
 extern struct attribute_group nvdimm_attribute_group;
+extern struct attribute_group nd_device_attribute_group;
 
 struct nvdimm;
 struct nvdimm_bus_descriptor;
@@ -71,4 +72,5 @@ u32 nd_cmd_in_size(struct nvdimm *nvdimm, int cmd,
 u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd,
 		const struct nd_cmd_desc *desc, int idx, const u32 *in_field,
 		const u32 *out_field);
+int nvdimm_bus_check_dimm_count(struct nvdimm_bus *nvdimm_bus, int dimm_count);
 #endif /* __LIBNVDIMM_H__ */
diff --git a/include/linux/nd.h b/include/linux/nd.h
new file mode 100644
index 000000000000..e074f67e53a3
--- /dev/null
+++ b/include/linux/nd.h
@@ -0,0 +1,39 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef __LINUX_ND_H__
+#define __LINUX_ND_H__
+#include <linux/ndctl.h>
+#include <linux/device.h>
+
+struct nd_device_driver {
+	struct device_driver drv;
+	unsigned long type;
+	int (*probe)(struct device *dev);
+	int (*remove)(struct device *dev);
+};
+
+static inline struct nd_device_driver *to_nd_device_driver(
+		struct device_driver *drv)
+{
+	return container_of(drv, struct nd_device_driver, drv);
+}
+
+#define MODULE_ALIAS_ND_DEVICE(type) \
+	MODULE_ALIAS("nd:t" __stringify(type) "*")
+#define ND_DEVICE_MODALIAS_FMT "nd:t%d"
+
+int __must_check __nd_driver_register(struct nd_device_driver *nd_drv,
+		struct module *module, const char *mod_name);
+#define nd_driver_register(driver) \
+	__nd_driver_register(driver, THIS_MODULE, KBUILD_MODNAME)
+#endif /* __LINUX_ND_H__ */
diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
index ff13c23b26df..37640916d146 100644
--- a/include/uapi/linux/ndctl.h
+++ b/include/uapi/linux/ndctl.h
@@ -175,4 +175,10 @@ static inline const char *nvdimm_cmd_name(unsigned cmd)
 #define ND_IOCTL_ARS_STATUS		_IOWR(ND_IOCTL, ND_CMD_ARS_STATUS,\
 					struct nd_cmd_ars_status)
 
+
+#define ND_DEVICE_DIMM 1            /* nd_dimm: container for "config data" */
+
+enum nd_driver_flags {
+	ND_DRIVER_DIMM            = 1 << ND_DEVICE_DIMM,
+};
 #endif /* __NDCTL_H__ */
-- 
cgit 


From 3d88002e4a7bd40f355550284c6cd140e6fe29dc Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sun, 31 May 2015 15:02:11 -0400
Subject: libnvdimm: support for legacy (non-aliasing) nvdimms

The libnvdimm region driver is an intermediary driver that translates
non-volatile "region"s into "namespace" sub-devices that are surfaced by
persistent memory block-device drivers (PMEM and BLK).

ACPI 6 introduces the concept that a given nvdimm may simultaneously
offer multiple access modes to its media through direct PMEM load/store
access, or windowed BLK mode.  Existing nvdimms mostly implement a PMEM
interface, some offer a BLK-like mode, but never both as ACPI 6 defines.
If an nvdimm is single interfaced, then there is no need for dimm
metadata labels.  For these devices we can take the region boundaries
directly to create a child namespace device (nd_namespace_io).

Acked-by: Christoph Hellwig <hch@lst.de>
Tested-by: Toshi Kani <toshi.kani@hp.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit.c             |   1 +
 drivers/nvdimm/Makefile         |   2 +
 drivers/nvdimm/bus.c            |  26 ++++++++++
 drivers/nvdimm/core.c           |  44 ++++++++++++++--
 drivers/nvdimm/dimm.c           |   2 +-
 drivers/nvdimm/namespace_devs.c | 111 ++++++++++++++++++++++++++++++++++++++++
 drivers/nvdimm/nd-core.h        |   6 ++-
 drivers/nvdimm/nd.h             |  13 +++++
 drivers/nvdimm/region.c         |  93 +++++++++++++++++++++++++++++++++
 drivers/nvdimm/region_devs.c    |  66 +++++++++++++++++++++++-
 include/linux/libnvdimm.h       |   7 ++-
 include/linux/nd.h              |  10 ++++
 include/uapi/linux/ndctl.h      |  10 ++++
 13 files changed, 383 insertions(+), 8 deletions(-)
 create mode 100644 drivers/nvdimm/namespace_devs.c
 create mode 100644 drivers/nvdimm/region.c

(limited to 'include/uapi/linux')

diff --git a/drivers/acpi/nfit.c b/drivers/acpi/nfit.c
index 068f69d70c9e..ce290748fe36 100644
--- a/drivers/acpi/nfit.c
+++ b/drivers/acpi/nfit.c
@@ -780,6 +780,7 @@ static struct attribute_group acpi_nfit_region_attribute_group = {
 static const struct attribute_group *acpi_nfit_region_attribute_groups[] = {
 	&nd_region_attribute_group,
 	&nd_mapping_attribute_group,
+	&nd_device_attribute_group,
 	&acpi_nfit_region_attribute_group,
 	NULL,
 };
diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile
index 88afd0d849c3..af5e2760ddbd 100644
--- a/drivers/nvdimm/Makefile
+++ b/drivers/nvdimm/Makefile
@@ -5,3 +5,5 @@ libnvdimm-y += bus.o
 libnvdimm-y += dimm_devs.o
 libnvdimm-y += dimm.o
 libnvdimm-y += region_devs.o
+libnvdimm-y += region.o
+libnvdimm-y += namespace_devs.o
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index a0308f1872bf..4b77665a6cc8 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -13,6 +13,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 #include <linux/vmalloc.h>
 #include <linux/uaccess.h>
+#include <linux/module.h>
 #include <linux/fcntl.h>
 #include <linux/async.h>
 #include <linux/ndctl.h>
@@ -33,6 +34,12 @@ static int to_nd_device_type(struct device *dev)
 {
 	if (is_nvdimm(dev))
 		return ND_DEVICE_DIMM;
+	else if (is_nd_pmem(dev))
+		return ND_DEVICE_REGION_PMEM;
+	else if (is_nd_blk(dev))
+		return ND_DEVICE_REGION_BLK;
+	else if (is_nd_pmem(dev->parent) || is_nd_blk(dev->parent))
+		return nd_region_to_nstype(to_nd_region(dev->parent));
 
 	return 0;
 }
@@ -50,27 +57,46 @@ static int nvdimm_bus_match(struct device *dev, struct device_driver *drv)
 	return test_bit(to_nd_device_type(dev), &nd_drv->type);
 }
 
+static struct module *to_bus_provider(struct device *dev)
+{
+	/* pin bus providers while regions are enabled */
+	if (is_nd_pmem(dev) || is_nd_blk(dev)) {
+		struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+
+		return nvdimm_bus->module;
+	}
+	return NULL;
+}
+
 static int nvdimm_bus_probe(struct device *dev)
 {
 	struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver);
+	struct module *provider = to_bus_provider(dev);
 	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
 	int rc;
 
+	if (!try_module_get(provider))
+		return -ENXIO;
+
 	rc = nd_drv->probe(dev);
 	dev_dbg(&nvdimm_bus->dev, "%s.probe(%s) = %d\n", dev->driver->name,
 			dev_name(dev), rc);
+	if (rc != 0)
+		module_put(provider);
 	return rc;
 }
 
 static int nvdimm_bus_remove(struct device *dev)
 {
 	struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver);
+	struct module *provider = to_bus_provider(dev);
 	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
 	int rc;
 
 	rc = nd_drv->remove(dev);
 	dev_dbg(&nvdimm_bus->dev, "%s.remove(%s) = %d\n", dev->driver->name,
 			dev_name(dev), rc);
+	module_put(provider);
 	return rc;
 }
 
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 50ab880f0dc0..1b6b15d11f54 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -24,6 +24,36 @@ LIST_HEAD(nvdimm_bus_list);
 DEFINE_MUTEX(nvdimm_bus_list_mutex);
 static DEFINE_IDA(nd_ida);
 
+void nvdimm_bus_lock(struct device *dev)
+{
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+
+	if (!nvdimm_bus)
+		return;
+	mutex_lock(&nvdimm_bus->reconfig_mutex);
+}
+EXPORT_SYMBOL(nvdimm_bus_lock);
+
+void nvdimm_bus_unlock(struct device *dev)
+{
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+
+	if (!nvdimm_bus)
+		return;
+	mutex_unlock(&nvdimm_bus->reconfig_mutex);
+}
+EXPORT_SYMBOL(nvdimm_bus_unlock);
+
+bool is_nvdimm_bus_locked(struct device *dev)
+{
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+
+	if (!nvdimm_bus)
+		return false;
+	return mutex_is_locked(&nvdimm_bus->reconfig_mutex);
+}
+EXPORT_SYMBOL(is_nvdimm_bus_locked);
+
 static void nvdimm_bus_release(struct device *dev)
 {
 	struct nvdimm_bus *nvdimm_bus;
@@ -135,8 +165,8 @@ struct attribute_group nvdimm_bus_attribute_group = {
 };
 EXPORT_SYMBOL_GPL(nvdimm_bus_attribute_group);
 
-struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
-		struct nvdimm_bus_descriptor *nd_desc)
+struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
+		struct nvdimm_bus_descriptor *nd_desc, struct module *module)
 {
 	struct nvdimm_bus *nvdimm_bus;
 	int rc;
@@ -146,11 +176,13 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
 		return NULL;
 	INIT_LIST_HEAD(&nvdimm_bus->list);
 	nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
+	mutex_init(&nvdimm_bus->reconfig_mutex);
 	if (nvdimm_bus->id < 0) {
 		kfree(nvdimm_bus);
 		return NULL;
 	}
 	nvdimm_bus->nd_desc = nd_desc;
+	nvdimm_bus->module = module;
 	nvdimm_bus->dev.parent = parent;
 	nvdimm_bus->dev.release = nvdimm_bus_release;
 	nvdimm_bus->dev.groups = nd_desc->attr_groups;
@@ -174,7 +206,7 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
 	put_device(&nvdimm_bus->dev);
 	return NULL;
 }
-EXPORT_SYMBOL_GPL(nvdimm_bus_register);
+EXPORT_SYMBOL_GPL(__nvdimm_bus_register);
 
 static int child_unregister(struct device *dev, void *data)
 {
@@ -218,7 +250,12 @@ static __init int libnvdimm_init(void)
 	rc = nvdimm_init();
 	if (rc)
 		goto err_dimm;
+	rc = nd_region_init();
+	if (rc)
+		goto err_region;
 	return 0;
+ err_region:
+	nvdimm_exit();
  err_dimm:
 	nvdimm_bus_exit();
 	return rc;
@@ -227,6 +264,7 @@ static __init int libnvdimm_init(void)
 static __exit void libnvdimm_exit(void)
 {
 	WARN_ON(!list_empty(&nvdimm_bus_list));
+	nd_region_exit();
 	nvdimm_exit();
 	nvdimm_bus_exit();
 }
diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c
index 28001a6ccd4e..eb20fc2df32b 100644
--- a/drivers/nvdimm/dimm.c
+++ b/drivers/nvdimm/dimm.c
@@ -84,7 +84,7 @@ int __init nvdimm_init(void)
 	return nd_driver_register(&nvdimm_driver);
 }
 
-void __exit nvdimm_exit(void)
+void nvdimm_exit(void)
 {
 	driver_unregister(&nvdimm_driver.drv);
 }
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
new file mode 100644
index 000000000000..4f653d1e61ad
--- /dev/null
+++ b/drivers/nvdimm/namespace_devs.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/slab.h>
+#include <linux/nd.h>
+#include "nd.h"
+
+static void namespace_io_release(struct device *dev)
+{
+	struct nd_namespace_io *nsio = to_nd_namespace_io(dev);
+
+	kfree(nsio);
+}
+
+static struct device_type namespace_io_device_type = {
+	.name = "nd_namespace_io",
+	.release = namespace_io_release,
+};
+
+static ssize_t nstype_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+
+	return sprintf(buf, "%d\n", nd_region_to_nstype(nd_region));
+}
+static DEVICE_ATTR_RO(nstype);
+
+static struct attribute *nd_namespace_attributes[] = {
+	&dev_attr_nstype.attr,
+	NULL,
+};
+
+static struct attribute_group nd_namespace_attribute_group = {
+	.attrs = nd_namespace_attributes,
+};
+
+static const struct attribute_group *nd_namespace_attribute_groups[] = {
+	&nd_device_attribute_group,
+	&nd_namespace_attribute_group,
+	NULL,
+};
+
+static struct device **create_namespace_io(struct nd_region *nd_region)
+{
+	struct nd_namespace_io *nsio;
+	struct device *dev, **devs;
+	struct resource *res;
+
+	nsio = kzalloc(sizeof(*nsio), GFP_KERNEL);
+	if (!nsio)
+		return NULL;
+
+	devs = kcalloc(2, sizeof(struct device *), GFP_KERNEL);
+	if (!devs) {
+		kfree(nsio);
+		return NULL;
+	}
+
+	dev = &nsio->dev;
+	dev->type = &namespace_io_device_type;
+	dev->parent = &nd_region->dev;
+	res = &nsio->res;
+	res->name = dev_name(&nd_region->dev);
+	res->flags = IORESOURCE_MEM;
+	res->start = nd_region->ndr_start;
+	res->end = res->start + nd_region->ndr_size - 1;
+
+	devs[0] = dev;
+	return devs;
+}
+
+int nd_region_register_namespaces(struct nd_region *nd_region, int *err)
+{
+	struct device **devs = NULL;
+	int i;
+
+	*err = 0;
+	switch (nd_region_to_nstype(nd_region)) {
+	case ND_DEVICE_NAMESPACE_IO:
+		devs = create_namespace_io(nd_region);
+		break;
+	default:
+		break;
+	}
+
+	if (!devs)
+		return -ENODEV;
+
+	for (i = 0; devs[i]; i++) {
+		struct device *dev = devs[i];
+
+		dev_set_name(dev, "namespace%d.%d", nd_region->id, i);
+		dev->groups = nd_namespace_attribute_groups;
+		nd_device_register(dev);
+	}
+	kfree(devs);
+
+	return i;
+}
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index 1d760bf24857..0e9b41fd2546 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -21,9 +21,11 @@ extern int nvdimm_major;
 
 struct nvdimm_bus {
 	struct nvdimm_bus_descriptor *nd_desc;
+	struct module *module;
 	struct list_head list;
 	struct device dev;
 	int id;
+	struct mutex reconfig_mutex;
 };
 
 struct nvdimm {
@@ -34,6 +36,9 @@ struct nvdimm {
 	int id;
 };
 
+bool is_nvdimm(struct device *dev);
+bool is_nd_blk(struct device *dev);
+bool is_nd_pmem(struct device *dev);
 struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev);
 int __init nvdimm_bus_init(void);
 void nvdimm_bus_exit(void);
@@ -43,5 +48,4 @@ void nd_synchronize(void);
 int nvdimm_bus_register_dimms(struct nvdimm_bus *nvdimm_bus);
 int nvdimm_bus_register_regions(struct nvdimm_bus *nvdimm_bus);
 int nd_match_dimm(struct device *dev, void *data);
-bool is_nvdimm(struct device *dev);
 #endif /* __ND_CORE_H__ */
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index ea0cca337aa6..bc5a08e36a25 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -23,6 +23,11 @@ struct nvdimm_drvdata {
 	void *data;
 };
 
+struct nd_region_namespaces {
+	int count;
+	int active;
+};
+
 struct nd_region {
 	struct device dev;
 	u16 ndr_mappings;
@@ -41,7 +46,15 @@ enum nd_async_mode {
 void nd_device_register(struct device *dev);
 void nd_device_unregister(struct device *dev, enum nd_async_mode mode);
 int __init nvdimm_init(void);
+int __init nd_region_init(void);
 void nvdimm_exit(void);
+void nd_region_exit(void);
 int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd);
 int nvdimm_init_config_data(struct nvdimm_drvdata *ndd);
+struct nd_region *to_nd_region(struct device *dev);
+int nd_region_to_nstype(struct nd_region *nd_region);
+int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
+void nvdimm_bus_lock(struct device *dev);
+void nvdimm_bus_unlock(struct device *dev);
+bool is_nvdimm_bus_locked(struct device *dev);
 #endif /* __ND_H__ */
diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
new file mode 100644
index 000000000000..ade3dba81afd
--- /dev/null
+++ b/drivers/nvdimm/region.c
@@ -0,0 +1,93 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/nd.h>
+#include "nd.h"
+
+static int nd_region_probe(struct device *dev)
+{
+	int err;
+	struct nd_region_namespaces *num_ns;
+	struct nd_region *nd_region = to_nd_region(dev);
+	int rc = nd_region_register_namespaces(nd_region, &err);
+
+	num_ns = devm_kzalloc(dev, sizeof(*num_ns), GFP_KERNEL);
+	if (!num_ns)
+		return -ENOMEM;
+
+	if (rc < 0)
+		return rc;
+
+	num_ns->active = rc;
+	num_ns->count = rc + err;
+	dev_set_drvdata(dev, num_ns);
+
+	if (err == 0)
+		return 0;
+
+	if (rc == err)
+		return -ENODEV;
+
+	/*
+	 * Given multiple namespaces per region, we do not want to
+	 * disable all the successfully registered peer namespaces upon
+	 * a single registration failure.  If userspace is missing a
+	 * namespace that it expects it can disable/re-enable the region
+	 * to retry discovery after correcting the failure.
+	 * <regionX>/namespaces returns the current
+	 * "<async-registered>/<total>" namespace count.
+	 */
+	dev_err(dev, "failed to register %d namespace%s, continuing...\n",
+			err, err == 1 ? "" : "s");
+	return 0;
+}
+
+static int child_unregister(struct device *dev, void *data)
+{
+	nd_device_unregister(dev, ND_SYNC);
+	return 0;
+}
+
+static int nd_region_remove(struct device *dev)
+{
+	/* flush attribute readers and disable */
+	nvdimm_bus_lock(dev);
+	dev_set_drvdata(dev, NULL);
+	nvdimm_bus_unlock(dev);
+
+	device_for_each_child(dev, NULL, child_unregister);
+	return 0;
+}
+
+static struct nd_device_driver nd_region_driver = {
+	.probe = nd_region_probe,
+	.remove = nd_region_remove,
+	.drv = {
+		.name = "nd_region",
+	},
+	.type = ND_DRIVER_REGION_BLK | ND_DRIVER_REGION_PMEM,
+};
+
+int __init nd_region_init(void)
+{
+	return nd_driver_register(&nd_region_driver);
+}
+
+void nd_region_exit(void)
+{
+	driver_unregister(&nd_region_driver.drv);
+}
+
+MODULE_ALIAS_ND_DEVICE(ND_DEVICE_REGION_PMEM);
+MODULE_ALIAS_ND_DEVICE(ND_DEVICE_REGION_BLK);
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 4bda2e0df8f7..b5c5b9095b28 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -47,11 +47,16 @@ static struct device_type nd_volatile_device_type = {
 	.release = nd_region_release,
 };
 
-static bool is_nd_pmem(struct device *dev)
+bool is_nd_pmem(struct device *dev)
 {
 	return dev ? dev->type == &nd_pmem_device_type : false;
 }
 
+bool is_nd_blk(struct device *dev)
+{
+	return dev ? dev->type == &nd_blk_device_type : false;
+}
+
 struct nd_region *to_nd_region(struct device *dev)
 {
 	struct nd_region *nd_region = container_of(dev, struct nd_region, dev);
@@ -61,6 +66,37 @@ struct nd_region *to_nd_region(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(to_nd_region);
 
+/**
+ * nd_region_to_nstype() - region to an integer namespace type
+ * @nd_region: region-device to interrogate
+ *
+ * This is the 'nstype' attribute of a region as well, an input to the
+ * MODALIAS for namespace devices, and bit number for a nvdimm_bus to match
+ * namespace devices with namespace drivers.
+ */
+int nd_region_to_nstype(struct nd_region *nd_region)
+{
+	if (is_nd_pmem(&nd_region->dev)) {
+		u16 i, alias;
+
+		for (i = 0, alias = 0; i < nd_region->ndr_mappings; i++) {
+			struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+			struct nvdimm *nvdimm = nd_mapping->nvdimm;
+
+			if (nvdimm->flags & NDD_ALIASING)
+				alias++;
+		}
+		if (alias)
+			return ND_DEVICE_NAMESPACE_PMEM;
+		else
+			return ND_DEVICE_NAMESPACE_IO;
+	} else if (is_nd_blk(&nd_region->dev)) {
+		return ND_DEVICE_NAMESPACE_BLK;
+	}
+
+	return 0;
+}
+
 static ssize_t size_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
@@ -88,9 +124,37 @@ static ssize_t mappings_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(mappings);
 
+static ssize_t nstype_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region *nd_region = to_nd_region(dev);
+
+	return sprintf(buf, "%d\n", nd_region_to_nstype(nd_region));
+}
+static DEVICE_ATTR_RO(nstype);
+
+static ssize_t init_namespaces_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region_namespaces *num_ns = dev_get_drvdata(dev);
+	ssize_t rc;
+
+	nvdimm_bus_lock(dev);
+	if (num_ns)
+		rc = sprintf(buf, "%d/%d\n", num_ns->active, num_ns->count);
+	else
+		rc = -ENXIO;
+	nvdimm_bus_unlock(dev);
+
+	return rc;
+}
+static DEVICE_ATTR_RO(init_namespaces);
+
 static struct attribute *nd_region_attributes[] = {
 	&dev_attr_size.attr,
+	&dev_attr_nstype.attr,
 	&dev_attr_mappings.attr,
+	&dev_attr_init_namespaces.attr,
 	NULL,
 };
 
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 39e7e606092a..37f966aff386 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -71,8 +71,11 @@ struct nd_region_desc {
 
 struct nvdimm_bus;
 struct device;
-struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
-		struct nvdimm_bus_descriptor *nfit_desc);
+struct module;
+struct nvdimm_bus *__nvdimm_bus_register(struct device *parent,
+		struct nvdimm_bus_descriptor *nfit_desc, struct module *module);
+#define nvdimm_bus_register(parent, desc) \
+	__nvdimm_bus_register(parent, desc, THIS_MODULE)
 void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus);
 struct nvdimm_bus *to_nvdimm_bus(struct device *dev);
 struct nvdimm *to_nvdimm(struct device *dev);
diff --git a/include/linux/nd.h b/include/linux/nd.h
index e074f67e53a3..da70e9962197 100644
--- a/include/linux/nd.h
+++ b/include/linux/nd.h
@@ -26,6 +26,16 @@ static inline struct nd_device_driver *to_nd_device_driver(
 		struct device_driver *drv)
 {
 	return container_of(drv, struct nd_device_driver, drv);
+};
+
+struct nd_namespace_io {
+	struct device dev;
+	struct resource res;
+};
+
+static inline struct nd_namespace_io *to_nd_namespace_io(struct device *dev)
+{
+	return container_of(dev, struct nd_namespace_io, dev);
 }
 
 #define MODULE_ALIAS_ND_DEVICE(type) \
diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
index 37640916d146..174b6371dcc1 100644
--- a/include/uapi/linux/ndctl.h
+++ b/include/uapi/linux/ndctl.h
@@ -177,8 +177,18 @@ static inline const char *nvdimm_cmd_name(unsigned cmd)
 
 
 #define ND_DEVICE_DIMM 1            /* nd_dimm: container for "config data" */
+#define ND_DEVICE_REGION_PMEM 2     /* nd_region: (parent of PMEM namespaces) */
+#define ND_DEVICE_REGION_BLK 3      /* nd_region: (parent of BLK namespaces) */
+#define ND_DEVICE_NAMESPACE_IO 4    /* legacy persistent memory */
+#define ND_DEVICE_NAMESPACE_PMEM 5  /* PMEM namespace (may alias with BLK) */
+#define ND_DEVICE_NAMESPACE_BLK 6   /* BLK namespace (may alias with PMEM) */
 
 enum nd_driver_flags {
 	ND_DRIVER_DIMM            = 1 << ND_DEVICE_DIMM,
+	ND_DRIVER_REGION_PMEM     = 1 << ND_DEVICE_REGION_PMEM,
+	ND_DRIVER_REGION_BLK      = 1 << ND_DEVICE_REGION_BLK,
+	ND_DRIVER_NAMESPACE_IO    = 1 << ND_DEVICE_NAMESPACE_IO,
+	ND_DRIVER_NAMESPACE_PMEM  = 1 << ND_DEVICE_NAMESPACE_PMEM,
+	ND_DRIVER_NAMESPACE_BLK   = 1 << ND_DEVICE_NAMESPACE_BLK,
 };
 #endif /* __NDCTL_H__ */
-- 
cgit 


From 4a826c83db4edc040da3a66dbefd53f0cfcf457d Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 9 Jun 2015 16:09:36 -0400
Subject: libnvdimm: namespace indices: read and validate

This on media label format [1] consists of two index blocks followed by
an array of labels.  None of these structures are ever updated in place.
A sequence number tracks the current active index and the next one to
write, while labels are written to free slots.

    +------------+
    |            |
    |  nsindex0  |
    |            |
    +------------+
    |            |
    |  nsindex1  |
    |            |
    +------------+
    |   label0   |
    +------------+
    |   label1   |
    +------------+
    |            |
     ....nslot...
    |            |
    +------------+
    |   labelN   |
    +------------+

After reading valid labels, store the dpa ranges they claim into
per-dimm resource trees.

[1]: http://pmem.io/documents/NVDIMM_Namespace_Spec.pdf

Cc: Neil Brown <neilb@suse.de>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/Makefile    |   1 +
 drivers/nvdimm/dimm.c      |  23 ++++
 drivers/nvdimm/dimm_devs.c |  30 ++++-
 drivers/nvdimm/label.c     | 290 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/nvdimm/label.h     | 128 ++++++++++++++++++++
 drivers/nvdimm/nd.h        |  49 ++++++++
 include/uapi/linux/ndctl.h |   1 -
 7 files changed, 520 insertions(+), 2 deletions(-)
 create mode 100644 drivers/nvdimm/label.c
 create mode 100644 drivers/nvdimm/label.h

(limited to 'include/uapi/linux')

diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile
index 4d2a27f52faa..abce98f87f16 100644
--- a/drivers/nvdimm/Makefile
+++ b/drivers/nvdimm/Makefile
@@ -10,3 +10,4 @@ libnvdimm-y += dimm.o
 libnvdimm-y += region_devs.o
 libnvdimm-y += region.o
 libnvdimm-y += namespace_devs.o
+libnvdimm-y += label.o
diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c
index eb20fc2df32b..2df97c3c3b34 100644
--- a/drivers/nvdimm/dimm.c
+++ b/drivers/nvdimm/dimm.c
@@ -18,6 +18,7 @@
 #include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/nd.h>
+#include "label.h"
 #include "nd.h"
 
 static void free_data(struct nvdimm_drvdata *ndd)
@@ -42,6 +43,11 @@ static int nvdimm_probe(struct device *dev)
 		return -ENOMEM;
 
 	dev_set_drvdata(dev, ndd);
+	ndd->dpa.name = dev_name(dev);
+	ndd->ns_current = -1;
+	ndd->ns_next = -1;
+	ndd->dpa.start = 0;
+	ndd->dpa.end = -1;
 	ndd->dev = dev;
 
 	rc = nvdimm_init_nsarea(ndd);
@@ -54,6 +60,17 @@ static int nvdimm_probe(struct device *dev)
 
 	dev_dbg(dev, "config data size: %d\n", ndd->nsarea.config_size);
 
+	nvdimm_bus_lock(dev);
+	ndd->ns_current = nd_label_validate(ndd);
+	ndd->ns_next = nd_label_next_nsindex(ndd->ns_current);
+	nd_label_copy(ndd, to_next_namespace_index(ndd),
+			to_current_namespace_index(ndd));
+	rc = nd_label_reserve_dpa(ndd);
+	nvdimm_bus_unlock(dev);
+
+	if (rc)
+		goto err;
+
 	return 0;
 
  err:
@@ -64,7 +81,13 @@ static int nvdimm_probe(struct device *dev)
 static int nvdimm_remove(struct device *dev)
 {
 	struct nvdimm_drvdata *ndd = dev_get_drvdata(dev);
+	struct resource *res, *_r;
 
+	nvdimm_bus_lock(dev);
+	dev_set_drvdata(dev, NULL);
+	for_each_dpa_resource_safe(ndd, res, _r)
+		nvdimm_free_dpa(ndd, res);
+	nvdimm_bus_unlock(dev);
 	free_data(ndd);
 
 	return 0;
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index bdf8241b6525..d2ef02e4be6c 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -92,8 +92,12 @@ int nvdimm_init_config_data(struct nvdimm_drvdata *ndd)
 	if (ndd->data)
 		return 0;
 
-	if (ndd->nsarea.status || ndd->nsarea.max_xfer == 0)
+	if (ndd->nsarea.status || ndd->nsarea.max_xfer == 0
+			|| ndd->nsarea.config_size < ND_LABEL_MIN_SIZE) {
+		dev_dbg(ndd->dev, "failed to init config data area: (%d:%d)\n",
+				ndd->nsarea.max_xfer, ndd->nsarea.config_size);
 		return -ENXIO;
+	}
 
 	ndd->data = kmalloc(ndd->nsarea.config_size, GFP_KERNEL);
 	if (!ndd->data)
@@ -243,6 +247,30 @@ struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
 }
 EXPORT_SYMBOL_GPL(nvdimm_create);
 
+void nvdimm_free_dpa(struct nvdimm_drvdata *ndd, struct resource *res)
+{
+	WARN_ON_ONCE(!is_nvdimm_bus_locked(ndd->dev));
+	kfree(res->name);
+	__release_region(&ndd->dpa, res->start, resource_size(res));
+}
+
+struct resource *nvdimm_allocate_dpa(struct nvdimm_drvdata *ndd,
+		struct nd_label_id *label_id, resource_size_t start,
+		resource_size_t n)
+{
+	char *name = kmemdup(label_id, sizeof(*label_id), GFP_KERNEL);
+	struct resource *res;
+
+	if (!name)
+		return NULL;
+
+	WARN_ON_ONCE(!is_nvdimm_bus_locked(ndd->dev));
+	res = __request_region(&ndd->dpa, start, n, name, 0);
+	if (!res)
+		kfree(name);
+	return res;
+}
+
 static int count_dimms(struct device *dev, void *c)
 {
 	int *count = c;
diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c
new file mode 100644
index 000000000000..db5d7492dc8d
--- /dev/null
+++ b/drivers/nvdimm/label.c
@@ -0,0 +1,290 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/device.h>
+#include <linux/ndctl.h>
+#include <linux/io.h>
+#include <linux/nd.h>
+#include "nd-core.h"
+#include "label.h"
+#include "nd.h"
+
+static u32 best_seq(u32 a, u32 b)
+{
+	a &= NSINDEX_SEQ_MASK;
+	b &= NSINDEX_SEQ_MASK;
+
+	if (a == 0 || a == b)
+		return b;
+	else if (b == 0)
+		return a;
+	else if (nd_inc_seq(a) == b)
+		return b;
+	else
+		return a;
+}
+
+size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd)
+{
+	u32 index_span;
+
+	if (ndd->nsindex_size)
+		return ndd->nsindex_size;
+
+	/*
+	 * The minimum index space is 512 bytes, with that amount of
+	 * index we can describe ~1400 labels which is less than a byte
+	 * of overhead per label.  Round up to a byte of overhead per
+	 * label and determine the size of the index region.  Yes, this
+	 * starts to waste space at larger config_sizes, but it's
+	 * unlikely we'll ever see anything but 128K.
+	 */
+	index_span = ndd->nsarea.config_size / 129;
+	index_span /= NSINDEX_ALIGN * 2;
+	ndd->nsindex_size = index_span * NSINDEX_ALIGN;
+
+	return ndd->nsindex_size;
+}
+
+int nd_label_validate(struct nvdimm_drvdata *ndd)
+{
+	/*
+	 * On media label format consists of two index blocks followed
+	 * by an array of labels.  None of these structures are ever
+	 * updated in place.  A sequence number tracks the current
+	 * active index and the next one to write, while labels are
+	 * written to free slots.
+	 *
+	 *     +------------+
+	 *     |            |
+	 *     |  nsindex0  |
+	 *     |            |
+	 *     +------------+
+	 *     |            |
+	 *     |  nsindex1  |
+	 *     |            |
+	 *     +------------+
+	 *     |   label0   |
+	 *     +------------+
+	 *     |   label1   |
+	 *     +------------+
+	 *     |            |
+	 *      ....nslot...
+	 *     |            |
+	 *     +------------+
+	 *     |   labelN   |
+	 *     +------------+
+	 */
+	struct nd_namespace_index *nsindex[] = {
+		to_namespace_index(ndd, 0),
+		to_namespace_index(ndd, 1),
+	};
+	const int num_index = ARRAY_SIZE(nsindex);
+	struct device *dev = ndd->dev;
+	bool valid[2] = { 0 };
+	int i, num_valid = 0;
+	u32 seq;
+
+	for (i = 0; i < num_index; i++) {
+		u32 nslot;
+		u8 sig[NSINDEX_SIG_LEN];
+		u64 sum_save, sum, size;
+
+		memcpy(sig, nsindex[i]->sig, NSINDEX_SIG_LEN);
+		if (memcmp(sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN) != 0) {
+			dev_dbg(dev, "%s: nsindex%d signature invalid\n",
+					__func__, i);
+			continue;
+		}
+		sum_save = __le64_to_cpu(nsindex[i]->checksum);
+		nsindex[i]->checksum = __cpu_to_le64(0);
+		sum = nd_fletcher64(nsindex[i], sizeof_namespace_index(ndd), 1);
+		nsindex[i]->checksum = __cpu_to_le64(sum_save);
+		if (sum != sum_save) {
+			dev_dbg(dev, "%s: nsindex%d checksum invalid\n",
+					__func__, i);
+			continue;
+		}
+
+		seq = __le32_to_cpu(nsindex[i]->seq);
+		if ((seq & NSINDEX_SEQ_MASK) == 0) {
+			dev_dbg(dev, "%s: nsindex%d sequence: %#x invalid\n",
+					__func__, i, seq);
+			continue;
+		}
+
+		/* sanity check the index against expected values */
+		if (__le64_to_cpu(nsindex[i]->myoff)
+				!= i * sizeof_namespace_index(ndd)) {
+			dev_dbg(dev, "%s: nsindex%d myoff: %#llx invalid\n",
+					__func__, i, (unsigned long long)
+					__le64_to_cpu(nsindex[i]->myoff));
+			continue;
+		}
+		if (__le64_to_cpu(nsindex[i]->otheroff)
+				!= (!i) * sizeof_namespace_index(ndd)) {
+			dev_dbg(dev, "%s: nsindex%d otheroff: %#llx invalid\n",
+					__func__, i, (unsigned long long)
+					__le64_to_cpu(nsindex[i]->otheroff));
+			continue;
+		}
+
+		size = __le64_to_cpu(nsindex[i]->mysize);
+		if (size > sizeof_namespace_index(ndd)
+				|| size < sizeof(struct nd_namespace_index)) {
+			dev_dbg(dev, "%s: nsindex%d mysize: %#llx invalid\n",
+					__func__, i, size);
+			continue;
+		}
+
+		nslot = __le32_to_cpu(nsindex[i]->nslot);
+		if (nslot * sizeof(struct nd_namespace_label)
+				+ 2 * sizeof_namespace_index(ndd)
+				> ndd->nsarea.config_size) {
+			dev_dbg(dev, "%s: nsindex%d nslot: %u invalid, config_size: %#x\n",
+					__func__, i, nslot,
+					ndd->nsarea.config_size);
+			continue;
+		}
+		valid[i] = true;
+		num_valid++;
+	}
+
+	switch (num_valid) {
+	case 0:
+		break;
+	case 1:
+		for (i = 0; i < num_index; i++)
+			if (valid[i])
+				return i;
+		/* can't have num_valid > 0 but valid[] = { false, false } */
+		WARN_ON(1);
+		break;
+	default:
+		/* pick the best index... */
+		seq = best_seq(__le32_to_cpu(nsindex[0]->seq),
+				__le32_to_cpu(nsindex[1]->seq));
+		if (seq == (__le32_to_cpu(nsindex[1]->seq) & NSINDEX_SEQ_MASK))
+			return 1;
+		else
+			return 0;
+		break;
+	}
+
+	return -1;
+}
+
+void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst,
+		struct nd_namespace_index *src)
+{
+	if (dst && src)
+		/* pass */;
+	else
+		return;
+
+	memcpy(dst, src, sizeof_namespace_index(ndd));
+}
+
+static struct nd_namespace_label *nd_label_base(struct nvdimm_drvdata *ndd)
+{
+	void *base = to_namespace_index(ndd, 0);
+
+	return base + 2 * sizeof_namespace_index(ndd);
+}
+
+#define for_each_clear_bit_le(bit, addr, size) \
+	for ((bit) = find_next_zero_bit_le((addr), (size), 0);  \
+	     (bit) < (size);                                    \
+	     (bit) = find_next_zero_bit_le((addr), (size), (bit) + 1))
+
+/**
+ * preamble_current - common variable initialization for nd_label_* routines
+ * @ndd: dimm container for the relevant label set
+ * @nsindex_out: on return set to the currently active namespace index
+ * @free: on return set to the free label bitmap in the index
+ * @nslot: on return set to the number of slots in the label space
+ */
+static bool preamble_current(struct nvdimm_drvdata *ndd,
+		struct nd_namespace_index **nsindex_out,
+		unsigned long **free, u32 *nslot)
+{
+	struct nd_namespace_index *nsindex;
+
+	nsindex = to_current_namespace_index(ndd);
+	if (nsindex == NULL)
+		return false;
+
+	*free = (unsigned long *) nsindex->free;
+	*nslot = __le32_to_cpu(nsindex->nslot);
+	*nsindex_out = nsindex;
+
+	return true;
+}
+
+static char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags)
+{
+	if (!label_id || !uuid)
+		return NULL;
+	snprintf(label_id->id, ND_LABEL_ID_SIZE, "%s-%pUb",
+			flags & NSLABEL_FLAG_LOCAL ? "blk" : "pmem", uuid);
+	return label_id->id;
+}
+
+static bool slot_valid(struct nd_namespace_label *nd_label, u32 slot)
+{
+	/* check that we are written where we expect to be written */
+	if (slot != __le32_to_cpu(nd_label->slot))
+		return false;
+
+	/* check that DPA allocations are page aligned */
+	if ((__le64_to_cpu(nd_label->dpa)
+				| __le64_to_cpu(nd_label->rawsize)) % SZ_4K)
+		return false;
+
+	return true;
+}
+
+int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd)
+{
+	struct nd_namespace_index *nsindex;
+	unsigned long *free;
+	u32 nslot, slot;
+
+	if (!preamble_current(ndd, &nsindex, &free, &nslot))
+		return 0; /* no label, nothing to reserve */
+
+	for_each_clear_bit_le(slot, free, nslot) {
+		struct nd_namespace_label *nd_label;
+		struct nd_region *nd_region = NULL;
+		u8 label_uuid[NSLABEL_UUID_LEN];
+		struct nd_label_id label_id;
+		struct resource *res;
+		u32 flags;
+
+		nd_label = nd_label_base(ndd) + slot;
+
+		if (!slot_valid(nd_label, slot))
+			continue;
+
+		memcpy(label_uuid, nd_label->uuid, NSLABEL_UUID_LEN);
+		flags = __le32_to_cpu(nd_label->flags);
+		nd_label_gen_id(&label_id, label_uuid, flags);
+		res = nvdimm_allocate_dpa(ndd, &label_id,
+				__le64_to_cpu(nd_label->dpa),
+				__le64_to_cpu(nd_label->rawsize));
+		nd_dbg_dpa(nd_region, ndd, res, "reserve\n");
+		if (!res)
+			return -EBUSY;
+	}
+
+	return 0;
+}
diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h
new file mode 100644
index 000000000000..d6aa0d5c6b4e
--- /dev/null
+++ b/drivers/nvdimm/label.h
@@ -0,0 +1,128 @@
+/*
+ * Copyright(c) 2013-2015 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#ifndef __LABEL_H__
+#define __LABEL_H__
+
+#include <linux/ndctl.h>
+#include <linux/sizes.h>
+#include <linux/io.h>
+
+enum {
+	NSINDEX_SIG_LEN = 16,
+	NSINDEX_ALIGN = 256,
+	NSINDEX_SEQ_MASK = 0x3,
+	NSLABEL_UUID_LEN = 16,
+	NSLABEL_NAME_LEN = 64,
+	NSLABEL_FLAG_ROLABEL = 0x1,  /* read-only label */
+	NSLABEL_FLAG_LOCAL = 0x2,    /* DIMM-local namespace */
+	NSLABEL_FLAG_BTT = 0x4,      /* namespace contains a BTT */
+	NSLABEL_FLAG_UPDATING = 0x8, /* label being updated */
+	BTT_ALIGN = 4096,            /* all btt structures */
+	BTTINFO_SIG_LEN = 16,
+	BTTINFO_UUID_LEN = 16,
+	BTTINFO_FLAG_ERROR = 0x1,    /* error state (read-only) */
+	BTTINFO_MAJOR_VERSION = 1,
+	ND_LABEL_MIN_SIZE = 512 * 129, /* see sizeof_namespace_index() */
+	ND_LABEL_ID_SIZE = 50,
+};
+
+static const char NSINDEX_SIGNATURE[] = "NAMESPACE_INDEX\0";
+
+/**
+ * struct nd_namespace_index - label set superblock
+ * @sig: NAMESPACE_INDEX\0
+ * @flags: placeholder
+ * @seq: sequence number for this index
+ * @myoff: offset of this index in label area
+ * @mysize: size of this index struct
+ * @otheroff: offset of other index
+ * @labeloff: offset of first label slot
+ * @nslot: total number of label slots
+ * @major: label area major version
+ * @minor: label area minor version
+ * @checksum: fletcher64 of all fields
+ * @free[0]: bitmap, nlabel bits
+ *
+ * The size of free[] is rounded up so the total struct size is a
+ * multiple of NSINDEX_ALIGN bytes.  Any bits this allocates beyond
+ * nlabel bits must be zero.
+ */
+struct nd_namespace_index {
+	u8 sig[NSINDEX_SIG_LEN];
+	__le32 flags;
+	__le32 seq;
+	__le64 myoff;
+	__le64 mysize;
+	__le64 otheroff;
+	__le64 labeloff;
+	__le32 nslot;
+	__le16 major;
+	__le16 minor;
+	__le64 checksum;
+	u8 free[0];
+};
+
+/**
+ * struct nd_namespace_label - namespace superblock
+ * @uuid: UUID per RFC 4122
+ * @name: optional name (NULL-terminated)
+ * @flags: see NSLABEL_FLAG_*
+ * @nlabel: num labels to describe this ns
+ * @position: labels position in set
+ * @isetcookie: interleave set cookie
+ * @lbasize: LBA size in bytes or 0 for pmem
+ * @dpa: DPA of NVM range on this DIMM
+ * @rawsize: size of namespace
+ * @slot: slot of this label in label area
+ * @unused: must be zero
+ */
+struct nd_namespace_label {
+	u8 uuid[NSLABEL_UUID_LEN];
+	u8 name[NSLABEL_NAME_LEN];
+	__le32 flags;
+	__le16 nlabel;
+	__le16 position;
+	__le64 isetcookie;
+	__le64 lbasize;
+	__le64 dpa;
+	__le64 rawsize;
+	__le32 slot;
+	__le32 unused;
+};
+
+/**
+ * struct nd_label_id - identifier string for dpa allocation
+ * @id: "{blk|pmem}-<namespace uuid>"
+ */
+struct nd_label_id {
+	char id[ND_LABEL_ID_SIZE];
+};
+
+/*
+ * If the 'best' index is invalid, so is the 'next' index.  Otherwise,
+ * the next index is MOD(index+1, 2)
+ */
+static inline int nd_label_next_nsindex(int index)
+{
+	if (index < 0)
+		return -1;
+
+	return (index + 1) % 2;
+}
+
+struct nvdimm_drvdata;
+int nd_label_validate(struct nvdimm_drvdata *ndd);
+void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst,
+		struct nd_namespace_index *src);
+size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd);
+#endif /* __LABEL_H__ */
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 0285e4588b03..401fa0d5b6ea 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -16,11 +16,15 @@
 #include <linux/device.h>
 #include <linux/mutex.h>
 #include <linux/ndctl.h>
+#include "label.h"
 
 struct nvdimm_drvdata {
 	struct device *dev;
+	int nsindex_size;
 	struct nd_cmd_get_config_size nsarea;
 	void *data;
+	int ns_current, ns_next;
+	struct resource dpa;
 };
 
 struct nd_region_namespaces {
@@ -28,6 +32,37 @@ struct nd_region_namespaces {
 	int active;
 };
 
+static inline struct nd_namespace_index *to_namespace_index(
+		struct nvdimm_drvdata *ndd, int i)
+{
+	if (i < 0)
+		return NULL;
+
+	return ndd->data + sizeof_namespace_index(ndd) * i;
+}
+
+static inline struct nd_namespace_index *to_current_namespace_index(
+		struct nvdimm_drvdata *ndd)
+{
+	return to_namespace_index(ndd, ndd->ns_current);
+}
+
+static inline struct nd_namespace_index *to_next_namespace_index(
+		struct nvdimm_drvdata *ndd)
+{
+	return to_namespace_index(ndd, ndd->ns_next);
+}
+
+#define nd_dbg_dpa(r, d, res, fmt, arg...) \
+	dev_dbg((r) ? &(r)->dev : (d)->dev, "%s: %.13s: %#llx @ %#llx " fmt, \
+		(r) ? dev_name((d)->dev) : "", res ? res->name : "null", \
+		(unsigned long long) (res ? resource_size(res) : 0), \
+		(unsigned long long) (res ? res->start : 0), ##arg)
+
+#define for_each_dpa_resource_safe(ndd, res, next) \
+	for (res = (ndd)->dpa.child, next = res ? res->sibling : NULL; \
+			res; res = next, next = next ? next->sibling : NULL)
+
 struct nd_region {
 	struct device dev;
 	u16 ndr_mappings;
@@ -39,6 +74,15 @@ struct nd_region {
 	struct nd_mapping mapping[0];
 };
 
+/*
+ * Lookup next in the repeating sequence of 01, 10, and 11.
+ */
+static inline unsigned nd_inc_seq(unsigned seq)
+{
+	static const unsigned next[] = { 0, 2, 3, 1 };
+
+	return next[seq & 3];
+}
 enum nd_async_mode {
 	ND_SYNC,
 	ND_ASYNC,
@@ -58,4 +102,9 @@ int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
 void nvdimm_bus_lock(struct device *dev);
 void nvdimm_bus_unlock(struct device *dev);
 bool is_nvdimm_bus_locked(struct device *dev);
+int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd);
+void nvdimm_free_dpa(struct nvdimm_drvdata *ndd, struct resource *res);
+struct resource *nvdimm_allocate_dpa(struct nvdimm_drvdata *ndd,
+		struct nd_label_id *label_id, resource_size_t start,
+		resource_size_t n);
 #endif /* __ND_H__ */
diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
index 174b6371dcc1..1357a87b8714 100644
--- a/include/uapi/linux/ndctl.h
+++ b/include/uapi/linux/ndctl.h
@@ -175,7 +175,6 @@ static inline const char *nvdimm_cmd_name(unsigned cmd)
 #define ND_IOCTL_ARS_STATUS		_IOWR(ND_IOCTL, ND_CMD_ARS_STATUS,\
 					struct nd_cmd_ars_status)
 
-
 #define ND_DEVICE_DIMM 1            /* nd_dimm: container for "config data" */
 #define ND_DEVICE_REGION_PMEM 2     /* nd_region: (parent of PMEM namespaces) */
 #define ND_DEVICE_REGION_BLK 3      /* nd_region: (parent of BLK namespaces) */
-- 
cgit 


From bf9bccc14c05dae8caba29df6187c731710f5380 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 17 Jun 2015 17:14:46 -0400
Subject: libnvdimm: pmem label sets and namespace instantiation.

A complete label set is a PMEM-label per-dimm per-interleave-set where
all the UUIDs match and the interleave set cookie matches the hosting
interleave set.

Present sysfs attributes for manipulation of a PMEM-namespace's
'alt_name', 'uuid', and 'size' attributes.  A later patch will make
these settings persistent by writing back the label.

Note that PMEM allocations grow forwards from the start of an interleave
set (lowest dimm-physical-address (DPA)).  BLK-namespaces that alias
with a PMEM interleave set will grow allocations backward from the
highest DPA.

Cc: Greg KH <gregkh@linuxfoundation.org>
Cc: Neil Brown <neilb@suse.de>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/bus.c            |    8 +-
 drivers/nvdimm/core.c           |   64 +++
 drivers/nvdimm/dimm.c           |   21 +-
 drivers/nvdimm/dimm_devs.c      |  137 ++++++
 drivers/nvdimm/label.c          |   55 ++-
 drivers/nvdimm/label.h          |    2 +
 drivers/nvdimm/namespace_devs.c | 1002 ++++++++++++++++++++++++++++++++++++++-
 drivers/nvdimm/nd-core.h        |   12 +
 drivers/nvdimm/nd.h             |   17 +
 drivers/nvdimm/pmem.c           |   20 +-
 drivers/nvdimm/region.c         |    3 +
 drivers/nvdimm/region_devs.c    |  158 +++++-
 include/linux/libnvdimm.h       |   10 +
 include/linux/nd.h              |   24 +
 include/uapi/linux/ndctl.h      |    4 +
 15 files changed, 1506 insertions(+), 31 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index ffb43cada625..fddc3f2a8f80 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -97,6 +97,8 @@ static int nvdimm_bus_probe(struct device *dev)
 	rc = nd_drv->probe(dev);
 	if (rc == 0)
 		nd_region_probe_success(nvdimm_bus, dev);
+	else
+		nd_region_disable(nvdimm_bus, dev);
 	nvdimm_bus_probe_end(nvdimm_bus);
 
 	dev_dbg(&nvdimm_bus->dev, "%s.probe(%s) = %d\n", dev->driver->name,
@@ -381,8 +383,10 @@ u32 nd_cmd_out_size(struct nvdimm *nvdimm, int cmd,
 }
 EXPORT_SYMBOL_GPL(nd_cmd_out_size);
 
-static void wait_nvdimm_bus_probe_idle(struct nvdimm_bus *nvdimm_bus)
+void wait_nvdimm_bus_probe_idle(struct device *dev)
 {
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+
 	do {
 		if (nvdimm_bus->probe_active == 0)
 			break;
@@ -402,7 +406,7 @@ static int nd_cmd_clear_to_send(struct nvdimm *nvdimm, unsigned int cmd)
 		return 0;
 
 	nvdimm_bus = walk_to_nvdimm_bus(&nvdimm->dev);
-	wait_nvdimm_bus_probe_idle(nvdimm_bus);
+	wait_nvdimm_bus_probe_idle(&nvdimm_bus->dev);
 
 	if (atomic_read(&nvdimm->busy))
 		return -EBUSY;
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 7806eaaf4707..cf99cce8ef33 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -14,6 +14,7 @@
 #include <linux/export.h>
 #include <linux/module.h>
 #include <linux/device.h>
+#include <linux/ctype.h>
 #include <linux/ndctl.h>
 #include <linux/mutex.h>
 #include <linux/slab.h>
@@ -109,6 +110,69 @@ struct nvdimm_bus *walk_to_nvdimm_bus(struct device *nd_dev)
 	return NULL;
 }
 
+static bool is_uuid_sep(char sep)
+{
+	if (sep == '\n' || sep == '-' || sep == ':' || sep == '\0')
+		return true;
+	return false;
+}
+
+static int nd_uuid_parse(struct device *dev, u8 *uuid_out, const char *buf,
+		size_t len)
+{
+	const char *str = buf;
+	u8 uuid[16];
+	int i;
+
+	for (i = 0; i < 16; i++) {
+		if (!isxdigit(str[0]) || !isxdigit(str[1])) {
+			dev_dbg(dev, "%s: pos: %d buf[%zd]: %c buf[%zd]: %c\n",
+					__func__, i, str - buf, str[0],
+					str + 1 - buf, str[1]);
+			return -EINVAL;
+		}
+
+		uuid[i] = (hex_to_bin(str[0]) << 4) | hex_to_bin(str[1]);
+		str += 2;
+		if (is_uuid_sep(*str))
+			str++;
+	}
+
+	memcpy(uuid_out, uuid, sizeof(uuid));
+	return 0;
+}
+
+/**
+ * nd_uuid_store: common implementation for writing 'uuid' sysfs attributes
+ * @dev: container device for the uuid property
+ * @uuid_out: uuid buffer to replace
+ * @buf: raw sysfs buffer to parse
+ *
+ * Enforce that uuids can only be changed while the device is disabled
+ * (driver detached)
+ * LOCKING: expects device_lock() is held on entry
+ */
+int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf,
+		size_t len)
+{
+	u8 uuid[16];
+	int rc;
+
+	if (dev->driver)
+		return -EBUSY;
+
+	rc = nd_uuid_parse(dev, uuid, buf, len);
+	if (rc)
+		return rc;
+
+	kfree(*uuid_out);
+	*uuid_out = kmemdup(uuid, sizeof(uuid), GFP_KERNEL);
+	if (!(*uuid_out))
+		return -ENOMEM;
+
+	return 0;
+}
+
 static ssize_t commands_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c
index 2df97c3c3b34..71d12bb67339 100644
--- a/drivers/nvdimm/dimm.c
+++ b/drivers/nvdimm/dimm.c
@@ -21,18 +21,6 @@
 #include "label.h"
 #include "nd.h"
 
-static void free_data(struct nvdimm_drvdata *ndd)
-{
-	if (!ndd)
-		return;
-
-	if (ndd->data && is_vmalloc_addr(ndd->data))
-		vfree(ndd->data);
-	else
-		kfree(ndd->data);
-	kfree(ndd);
-}
-
 static int nvdimm_probe(struct device *dev)
 {
 	struct nvdimm_drvdata *ndd;
@@ -49,6 +37,8 @@ static int nvdimm_probe(struct device *dev)
 	ndd->dpa.start = 0;
 	ndd->dpa.end = -1;
 	ndd->dev = dev;
+	get_device(dev);
+	kref_init(&ndd->kref);
 
 	rc = nvdimm_init_nsarea(ndd);
 	if (rc)
@@ -74,21 +64,18 @@ static int nvdimm_probe(struct device *dev)
 	return 0;
 
  err:
-	free_data(ndd);
+	put_ndd(ndd);
 	return rc;
 }
 
 static int nvdimm_remove(struct device *dev)
 {
 	struct nvdimm_drvdata *ndd = dev_get_drvdata(dev);
-	struct resource *res, *_r;
 
 	nvdimm_bus_lock(dev);
 	dev_set_drvdata(dev, NULL);
-	for_each_dpa_resource_safe(ndd, res, _r)
-		nvdimm_free_dpa(ndd, res);
 	nvdimm_bus_unlock(dev);
-	free_data(ndd);
+	put_ndd(ndd);
 
 	return 0;
 }
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index d2ef02e4be6c..b55acef179ba 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -159,6 +159,48 @@ struct nvdimm *to_nvdimm(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(to_nvdimm);
 
+struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping)
+{
+	struct nvdimm *nvdimm = nd_mapping->nvdimm;
+
+	WARN_ON_ONCE(!is_nvdimm_bus_locked(&nvdimm->dev));
+
+	return dev_get_drvdata(&nvdimm->dev);
+}
+EXPORT_SYMBOL(to_ndd);
+
+void nvdimm_drvdata_release(struct kref *kref)
+{
+	struct nvdimm_drvdata *ndd = container_of(kref, typeof(*ndd), kref);
+	struct device *dev = ndd->dev;
+	struct resource *res, *_r;
+
+	dev_dbg(dev, "%s\n", __func__);
+
+	nvdimm_bus_lock(dev);
+	for_each_dpa_resource_safe(ndd, res, _r)
+		nvdimm_free_dpa(ndd, res);
+	nvdimm_bus_unlock(dev);
+
+	if (ndd->data && is_vmalloc_addr(ndd->data))
+		vfree(ndd->data);
+	else
+		kfree(ndd->data);
+	kfree(ndd);
+	put_device(dev);
+}
+
+void get_ndd(struct nvdimm_drvdata *ndd)
+{
+	kref_get(&ndd->kref);
+}
+
+void put_ndd(struct nvdimm_drvdata *ndd)
+{
+	if (ndd)
+		kref_put(&ndd->kref, nvdimm_drvdata_release);
+}
+
 const char *nvdimm_name(struct nvdimm *nvdimm)
 {
 	return dev_name(&nvdimm->dev);
@@ -247,6 +289,83 @@ struct nvdimm *nvdimm_create(struct nvdimm_bus *nvdimm_bus, void *provider_data,
 }
 EXPORT_SYMBOL_GPL(nvdimm_create);
 
+/**
+ * nd_pmem_available_dpa - for the given dimm+region account unallocated dpa
+ * @nd_mapping: container of dpa-resource-root + labels
+ * @nd_region: constrain available space check to this reference region
+ * @overlap: calculate available space assuming this level of overlap
+ *
+ * Validate that a PMEM label, if present, aligns with the start of an
+ * interleave set and truncate the available size at the lowest BLK
+ * overlap point.
+ *
+ * The expectation is that this routine is called multiple times as it
+ * probes for the largest BLK encroachment for any single member DIMM of
+ * the interleave set.  Once that value is determined the PMEM-limit for
+ * the set can be established.
+ */
+resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
+		struct nd_mapping *nd_mapping, resource_size_t *overlap)
+{
+	resource_size_t map_start, map_end, busy = 0, available, blk_start;
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	struct resource *res;
+	const char *reason;
+
+	if (!ndd)
+		return 0;
+
+	map_start = nd_mapping->start;
+	map_end = map_start + nd_mapping->size - 1;
+	blk_start = max(map_start, map_end + 1 - *overlap);
+	for_each_dpa_resource(ndd, res)
+		if (res->start >= map_start && res->start < map_end) {
+			if (strncmp(res->name, "blk", 3) == 0)
+				blk_start = min(blk_start, res->start);
+			else if (res->start != map_start) {
+				reason = "misaligned to iset";
+				goto err;
+			} else {
+				if (busy) {
+					reason = "duplicate overlapping PMEM reservations?";
+					goto err;
+				}
+				busy += resource_size(res);
+				continue;
+			}
+		} else if (res->end >= map_start && res->end <= map_end) {
+			if (strncmp(res->name, "blk", 3) == 0) {
+				/*
+				 * If a BLK allocation overlaps the start of
+				 * PMEM the entire interleave set may now only
+				 * be used for BLK.
+				 */
+				blk_start = map_start;
+			} else {
+				reason = "misaligned to iset";
+				goto err;
+			}
+		} else if (map_start > res->start && map_start < res->end) {
+			/* total eclipse of the mapping */
+			busy += nd_mapping->size;
+			blk_start = map_start;
+		}
+
+	*overlap = map_end + 1 - blk_start;
+	available = blk_start - map_start;
+	if (busy < available)
+		return available - busy;
+	return 0;
+
+ err:
+	/*
+	 * Something is wrong, PMEM must align with the start of the
+	 * interleave set, and there can only be one allocation per set.
+	 */
+	nd_dbg_dpa(nd_region, ndd, res, "%s\n", reason);
+	return 0;
+}
+
 void nvdimm_free_dpa(struct nvdimm_drvdata *ndd, struct resource *res)
 {
 	WARN_ON_ONCE(!is_nvdimm_bus_locked(ndd->dev));
@@ -271,6 +390,24 @@ struct resource *nvdimm_allocate_dpa(struct nvdimm_drvdata *ndd,
 	return res;
 }
 
+/**
+ * nvdimm_allocated_dpa - sum up the dpa currently allocated to this label_id
+ * @nvdimm: container of dpa-resource-root + labels
+ * @label_id: dpa resource name of the form {pmem|blk}-<human readable uuid>
+ */
+resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd,
+		struct nd_label_id *label_id)
+{
+	resource_size_t allocated = 0;
+	struct resource *res;
+
+	for_each_dpa_resource(ndd, res)
+		if (strcmp(res->name, label_id->id) == 0)
+			allocated += resource_size(res);
+
+	return allocated;
+}
+
 static int count_dimms(struct device *dev, void *c)
 {
 	int *count = c;
diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c
index db5d7492dc8d..1a3bcd27a57a 100644
--- a/drivers/nvdimm/label.c
+++ b/drivers/nvdimm/label.c
@@ -230,7 +230,7 @@ static bool preamble_current(struct nvdimm_drvdata *ndd,
 	return true;
 }
 
-static char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags)
+char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags)
 {
 	if (!label_id || !uuid)
 		return NULL;
@@ -288,3 +288,56 @@ int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd)
 
 	return 0;
 }
+
+int nd_label_active_count(struct nvdimm_drvdata *ndd)
+{
+	struct nd_namespace_index *nsindex;
+	unsigned long *free;
+	u32 nslot, slot;
+	int count = 0;
+
+	if (!preamble_current(ndd, &nsindex, &free, &nslot))
+		return 0;
+
+	for_each_clear_bit_le(slot, free, nslot) {
+		struct nd_namespace_label *nd_label;
+
+		nd_label = nd_label_base(ndd) + slot;
+
+		if (!slot_valid(nd_label, slot)) {
+			u32 label_slot = __le32_to_cpu(nd_label->slot);
+			u64 size = __le64_to_cpu(nd_label->rawsize);
+			u64 dpa = __le64_to_cpu(nd_label->dpa);
+
+			dev_dbg(ndd->dev,
+				"%s: slot%d invalid slot: %d dpa: %llx size: %llx\n",
+					__func__, slot, label_slot, dpa, size);
+			continue;
+		}
+		count++;
+	}
+	return count;
+}
+
+struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n)
+{
+	struct nd_namespace_index *nsindex;
+	unsigned long *free;
+	u32 nslot, slot;
+
+	if (!preamble_current(ndd, &nsindex, &free, &nslot))
+		return NULL;
+
+	for_each_clear_bit_le(slot, free, nslot) {
+		struct nd_namespace_label *nd_label;
+
+		nd_label = nd_label_base(ndd) + slot;
+		if (!slot_valid(nd_label, slot))
+			continue;
+
+		if (n-- == 0)
+			return nd_label_base(ndd) + slot;
+	}
+
+	return NULL;
+}
diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h
index d6aa0d5c6b4e..8ee1376526c7 100644
--- a/drivers/nvdimm/label.h
+++ b/drivers/nvdimm/label.h
@@ -125,4 +125,6 @@ int nd_label_validate(struct nvdimm_drvdata *ndd);
 void nd_label_copy(struct nvdimm_drvdata *ndd, struct nd_namespace_index *dst,
 		struct nd_namespace_index *src);
 size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd);
+int nd_label_active_count(struct nvdimm_drvdata *ndd);
+struct nd_namespace_label *nd_label_active(struct nvdimm_drvdata *ndd, int n);
 #endif /* __LABEL_H__ */
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index 4f653d1e61ad..5d81032fcfc5 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -14,6 +14,7 @@
 #include <linux/device.h>
 #include <linux/slab.h>
 #include <linux/nd.h>
+#include "nd-core.h"
 #include "nd.h"
 
 static void namespace_io_release(struct device *dev)
@@ -23,11 +24,50 @@ static void namespace_io_release(struct device *dev)
 	kfree(nsio);
 }
 
+static void namespace_pmem_release(struct device *dev)
+{
+	struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+	kfree(nspm->alt_name);
+	kfree(nspm->uuid);
+	kfree(nspm);
+}
+
+static void namespace_blk_release(struct device *dev)
+{
+	/* TODO: blk namespace support */
+}
+
 static struct device_type namespace_io_device_type = {
 	.name = "nd_namespace_io",
 	.release = namespace_io_release,
 };
 
+static struct device_type namespace_pmem_device_type = {
+	.name = "nd_namespace_pmem",
+	.release = namespace_pmem_release,
+};
+
+static struct device_type namespace_blk_device_type = {
+	.name = "nd_namespace_blk",
+	.release = namespace_blk_release,
+};
+
+static bool is_namespace_pmem(struct device *dev)
+{
+	return dev ? dev->type == &namespace_pmem_device_type : false;
+}
+
+static bool is_namespace_blk(struct device *dev)
+{
+	return dev ? dev->type == &namespace_blk_device_type : false;
+}
+
+static bool is_namespace_io(struct device *dev)
+{
+	return dev ? dev->type == &namespace_io_device_type : false;
+}
+
 static ssize_t nstype_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
@@ -37,13 +77,676 @@ static ssize_t nstype_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(nstype);
 
+static ssize_t __alt_name_store(struct device *dev, const char *buf,
+		const size_t len)
+{
+	char *input, *pos, *alt_name, **ns_altname;
+	ssize_t rc;
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		ns_altname = &nspm->alt_name;
+	} else if (is_namespace_blk(dev)) {
+		/* TODO: blk namespace support */
+		return -ENXIO;
+	} else
+		return -ENXIO;
+
+	if (dev->driver)
+		return -EBUSY;
+
+	input = kmemdup(buf, len + 1, GFP_KERNEL);
+	if (!input)
+		return -ENOMEM;
+
+	input[len] = '\0';
+	pos = strim(input);
+	if (strlen(pos) + 1 > NSLABEL_NAME_LEN) {
+		rc = -EINVAL;
+		goto out;
+	}
+
+	alt_name = kzalloc(NSLABEL_NAME_LEN, GFP_KERNEL);
+	if (!alt_name) {
+		rc = -ENOMEM;
+		goto out;
+	}
+	kfree(*ns_altname);
+	*ns_altname = alt_name;
+	sprintf(*ns_altname, "%s", pos);
+	rc = len;
+
+out:
+	kfree(input);
+	return rc;
+}
+
+static ssize_t alt_name_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	ssize_t rc;
+
+	device_lock(dev);
+	nvdimm_bus_lock(dev);
+	wait_nvdimm_bus_probe_idle(dev);
+	rc = __alt_name_store(dev, buf, len);
+	dev_dbg(dev, "%s: %s(%zd)\n", __func__, rc < 0 ? "fail " : "", rc);
+	nvdimm_bus_unlock(dev);
+	device_unlock(dev);
+
+	return rc;
+}
+
+static ssize_t alt_name_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	char *ns_altname;
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		ns_altname = nspm->alt_name;
+	} else if (is_namespace_blk(dev)) {
+		/* TODO: blk namespace support */
+		return -ENXIO;
+	} else
+		return -ENXIO;
+
+	return sprintf(buf, "%s\n", ns_altname ? ns_altname : "");
+}
+static DEVICE_ATTR_RW(alt_name);
+
+static int scan_free(struct nd_region *nd_region,
+		struct nd_mapping *nd_mapping, struct nd_label_id *label_id,
+		resource_size_t n)
+{
+	bool is_blk = strncmp(label_id->id, "blk", 3) == 0;
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	int rc = 0;
+
+	while (n) {
+		struct resource *res, *last;
+		resource_size_t new_start;
+
+		last = NULL;
+		for_each_dpa_resource(ndd, res)
+			if (strcmp(res->name, label_id->id) == 0)
+				last = res;
+		res = last;
+		if (!res)
+			return 0;
+
+		if (n >= resource_size(res)) {
+			n -= resource_size(res);
+			nd_dbg_dpa(nd_region, ndd, res, "delete %d\n", rc);
+			nvdimm_free_dpa(ndd, res);
+			/* retry with last resource deleted */
+			continue;
+		}
+
+		/*
+		 * Keep BLK allocations relegated to high DPA as much as
+		 * possible
+		 */
+		if (is_blk)
+			new_start = res->start + n;
+		else
+			new_start = res->start;
+
+		rc = adjust_resource(res, new_start, resource_size(res) - n);
+		nd_dbg_dpa(nd_region, ndd, res, "shrink %d\n", rc);
+		break;
+	}
+
+	return rc;
+}
+
+/**
+ * shrink_dpa_allocation - for each dimm in region free n bytes for label_id
+ * @nd_region: the set of dimms to reclaim @n bytes from
+ * @label_id: unique identifier for the namespace consuming this dpa range
+ * @n: number of bytes per-dimm to release
+ *
+ * Assumes resources are ordered.  Starting from the end try to
+ * adjust_resource() the allocation to @n, but if @n is larger than the
+ * allocation delete it and find the 'new' last allocation in the label
+ * set.
+ */
+static int shrink_dpa_allocation(struct nd_region *nd_region,
+		struct nd_label_id *label_id, resource_size_t n)
+{
+	int i;
+
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		int rc;
+
+		rc = scan_free(nd_region, nd_mapping, label_id, n);
+		if (rc)
+			return rc;
+	}
+
+	return 0;
+}
+
+static resource_size_t init_dpa_allocation(struct nd_label_id *label_id,
+		struct nd_region *nd_region, struct nd_mapping *nd_mapping,
+		resource_size_t n)
+{
+	bool is_blk = strncmp(label_id->id, "blk", 3) == 0;
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	resource_size_t first_dpa;
+	struct resource *res;
+	int rc = 0;
+
+	/* allocate blk from highest dpa first */
+	if (is_blk)
+		first_dpa = nd_mapping->start + nd_mapping->size - n;
+	else
+		first_dpa = nd_mapping->start;
+
+	/* first resource allocation for this label-id or dimm */
+	res = nvdimm_allocate_dpa(ndd, label_id, first_dpa, n);
+	if (!res)
+		rc = -EBUSY;
+
+	nd_dbg_dpa(nd_region, ndd, res, "init %d\n", rc);
+	return rc ? n : 0;
+}
+
+static bool space_valid(bool is_pmem, struct nd_label_id *label_id,
+		struct resource *res)
+{
+	/*
+	 * For BLK-space any space is valid, for PMEM-space, it must be
+	 * contiguous with an existing allocation.
+	 */
+	if (!is_pmem)
+		return true;
+	if (!res || strcmp(res->name, label_id->id) == 0)
+		return true;
+	return false;
+}
+
+enum alloc_loc {
+	ALLOC_ERR = 0, ALLOC_BEFORE, ALLOC_MID, ALLOC_AFTER,
+};
+
+static resource_size_t scan_allocate(struct nd_region *nd_region,
+		struct nd_mapping *nd_mapping, struct nd_label_id *label_id,
+		resource_size_t n)
+{
+	resource_size_t mapping_end = nd_mapping->start + nd_mapping->size - 1;
+	bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0;
+	struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+	const resource_size_t to_allocate = n;
+	struct resource *res;
+	int first;
+
+ retry:
+	first = 0;
+	for_each_dpa_resource(ndd, res) {
+		resource_size_t allocate, available = 0, free_start, free_end;
+		struct resource *next = res->sibling, *new_res = NULL;
+		enum alloc_loc loc = ALLOC_ERR;
+		const char *action;
+		int rc = 0;
+
+		/* ignore resources outside this nd_mapping */
+		if (res->start > mapping_end)
+			continue;
+		if (res->end < nd_mapping->start)
+			continue;
+
+		/* space at the beginning of the mapping */
+		if (!first++ && res->start > nd_mapping->start) {
+			free_start = nd_mapping->start;
+			available = res->start - free_start;
+			if (space_valid(is_pmem, label_id, NULL))
+				loc = ALLOC_BEFORE;
+		}
+
+		/* space between allocations */
+		if (!loc && next) {
+			free_start = res->start + resource_size(res);
+			free_end = min(mapping_end, next->start - 1);
+			if (space_valid(is_pmem, label_id, res)
+					&& free_start < free_end) {
+				available = free_end + 1 - free_start;
+				loc = ALLOC_MID;
+			}
+		}
+
+		/* space at the end of the mapping */
+		if (!loc && !next) {
+			free_start = res->start + resource_size(res);
+			free_end = mapping_end;
+			if (space_valid(is_pmem, label_id, res)
+					&& free_start < free_end) {
+				available = free_end + 1 - free_start;
+				loc = ALLOC_AFTER;
+			}
+		}
+
+		if (!loc || !available)
+			continue;
+		allocate = min(available, n);
+		switch (loc) {
+		case ALLOC_BEFORE:
+			if (strcmp(res->name, label_id->id) == 0) {
+				/* adjust current resource up */
+				if (is_pmem)
+					return n;
+				rc = adjust_resource(res, res->start - allocate,
+						resource_size(res) + allocate);
+				action = "cur grow up";
+			} else
+				action = "allocate";
+			break;
+		case ALLOC_MID:
+			if (strcmp(next->name, label_id->id) == 0) {
+				/* adjust next resource up */
+				if (is_pmem)
+					return n;
+				rc = adjust_resource(next, next->start
+						- allocate, resource_size(next)
+						+ allocate);
+				new_res = next;
+				action = "next grow up";
+			} else if (strcmp(res->name, label_id->id) == 0) {
+				action = "grow down";
+			} else
+				action = "allocate";
+			break;
+		case ALLOC_AFTER:
+			if (strcmp(res->name, label_id->id) == 0)
+				action = "grow down";
+			else
+				action = "allocate";
+			break;
+		default:
+			return n;
+		}
+
+		if (strcmp(action, "allocate") == 0) {
+			/* BLK allocate bottom up */
+			if (!is_pmem)
+				free_start += available - allocate;
+			else if (free_start != nd_mapping->start)
+				return n;
+
+			new_res = nvdimm_allocate_dpa(ndd, label_id,
+					free_start, allocate);
+			if (!new_res)
+				rc = -EBUSY;
+		} else if (strcmp(action, "grow down") == 0) {
+			/* adjust current resource down */
+			rc = adjust_resource(res, res->start, resource_size(res)
+					+ allocate);
+		}
+
+		if (!new_res)
+			new_res = res;
+
+		nd_dbg_dpa(nd_region, ndd, new_res, "%s(%d) %d\n",
+				action, loc, rc);
+
+		if (rc)
+			return n;
+
+		n -= allocate;
+		if (n) {
+			/*
+			 * Retry scan with newly inserted resources.
+			 * For example, if we did an ALLOC_BEFORE
+			 * insertion there may also have been space
+			 * available for an ALLOC_AFTER insertion, so we
+			 * need to check this same resource again
+			 */
+			goto retry;
+		} else
+			return 0;
+	}
+
+	if (is_pmem && n == to_allocate)
+		return init_dpa_allocation(label_id, nd_region, nd_mapping, n);
+	return n;
+}
+
+/**
+ * grow_dpa_allocation - for each dimm allocate n bytes for @label_id
+ * @nd_region: the set of dimms to allocate @n more bytes from
+ * @label_id: unique identifier for the namespace consuming this dpa range
+ * @n: number of bytes per-dimm to add to the existing allocation
+ *
+ * Assumes resources are ordered.  For BLK regions, first consume
+ * BLK-only available DPA free space, then consume PMEM-aliased DPA
+ * space starting at the highest DPA.  For PMEM regions start
+ * allocations from the start of an interleave set and end at the first
+ * BLK allocation or the end of the interleave set, whichever comes
+ * first.
+ */
+static int grow_dpa_allocation(struct nd_region *nd_region,
+		struct nd_label_id *label_id, resource_size_t n)
+{
+	int i;
+
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		int rc;
+
+		rc = scan_allocate(nd_region, nd_mapping, label_id, n);
+		if (rc)
+			return rc;
+	}
+
+	return 0;
+}
+
+static void nd_namespace_pmem_set_size(struct nd_region *nd_region,
+		struct nd_namespace_pmem *nspm, resource_size_t size)
+{
+	struct resource *res = &nspm->nsio.res;
+
+	res->start = nd_region->ndr_start;
+	res->end = nd_region->ndr_start + size - 1;
+}
+
+static ssize_t __size_store(struct device *dev, unsigned long long val)
+{
+	resource_size_t allocated = 0, available = 0;
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	struct nd_mapping *nd_mapping;
+	struct nvdimm_drvdata *ndd;
+	struct nd_label_id label_id;
+	u32 flags = 0, remainder;
+	u8 *uuid = NULL;
+	int rc, i;
+
+	if (dev->driver)
+		return -EBUSY;
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		uuid = nspm->uuid;
+	} else if (is_namespace_blk(dev)) {
+		/* TODO: blk namespace support */
+		return -ENXIO;
+	}
+
+	/*
+	 * We need a uuid for the allocation-label and dimm(s) on which
+	 * to store the label.
+	 */
+	if (!uuid || nd_region->ndr_mappings == 0)
+		return -ENXIO;
+
+	div_u64_rem(val, SZ_4K * nd_region->ndr_mappings, &remainder);
+	if (remainder) {
+		dev_dbg(dev, "%llu is not %dK aligned\n", val,
+				(SZ_4K * nd_region->ndr_mappings) / SZ_1K);
+		return -EINVAL;
+	}
+
+	nd_label_gen_id(&label_id, uuid, flags);
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		nd_mapping = &nd_region->mapping[i];
+		ndd = to_ndd(nd_mapping);
+
+		/*
+		 * All dimms in an interleave set, or the base dimm for a blk
+		 * region, need to be enabled for the size to be changed.
+		 */
+		if (!ndd)
+			return -ENXIO;
+
+		allocated += nvdimm_allocated_dpa(ndd, &label_id);
+	}
+	available = nd_region_available_dpa(nd_region);
+
+	if (val > available + allocated)
+		return -ENOSPC;
+
+	if (val == allocated)
+		return 0;
+
+	val = div_u64(val, nd_region->ndr_mappings);
+	allocated = div_u64(allocated, nd_region->ndr_mappings);
+	if (val < allocated)
+		rc = shrink_dpa_allocation(nd_region, &label_id,
+				allocated - val);
+	else
+		rc = grow_dpa_allocation(nd_region, &label_id, val - allocated);
+
+	if (rc)
+		return rc;
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		nd_namespace_pmem_set_size(nd_region, nspm,
+				val * nd_region->ndr_mappings);
+	}
+
+	return rc;
+}
+
+static ssize_t size_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	unsigned long long val;
+	u8 **uuid = NULL;
+	int rc;
+
+	rc = kstrtoull(buf, 0, &val);
+	if (rc)
+		return rc;
+
+	device_lock(dev);
+	nvdimm_bus_lock(dev);
+	wait_nvdimm_bus_probe_idle(dev);
+	rc = __size_store(dev, val);
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		uuid = &nspm->uuid;
+	} else if (is_namespace_blk(dev)) {
+		/* TODO: blk namespace support */
+		rc = -ENXIO;
+	}
+
+	if (rc == 0 && val == 0 && uuid) {
+		/* setting size zero == 'delete namespace' */
+		kfree(*uuid);
+		*uuid = NULL;
+	}
+
+	dev_dbg(dev, "%s: %llx %s (%d)\n", __func__, val, rc < 0
+			? "fail" : "success", rc);
+
+	nvdimm_bus_unlock(dev);
+	device_unlock(dev);
+
+	return rc ? rc : len;
+}
+
+static ssize_t size_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		return sprintf(buf, "%llu\n", (unsigned long long)
+				resource_size(&nspm->nsio.res));
+	} else if (is_namespace_blk(dev)) {
+		/* TODO: blk namespace support */
+		return -ENXIO;
+	} else if (is_namespace_io(dev)) {
+		struct nd_namespace_io *nsio = to_nd_namespace_io(dev);
+
+		return sprintf(buf, "%llu\n", (unsigned long long)
+				resource_size(&nsio->res));
+	} else
+		return -ENXIO;
+}
+static DEVICE_ATTR(size, S_IRUGO, size_show, size_store);
+
+static ssize_t uuid_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	u8 *uuid;
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		uuid = nspm->uuid;
+	} else if (is_namespace_blk(dev)) {
+		/* TODO: blk namespace support */
+		return -ENXIO;
+	} else
+		return -ENXIO;
+
+	if (uuid)
+		return sprintf(buf, "%pUb\n", uuid);
+	return sprintf(buf, "\n");
+}
+
+/**
+ * namespace_update_uuid - check for a unique uuid and whether we're "renaming"
+ * @nd_region: parent region so we can updates all dimms in the set
+ * @dev: namespace type for generating label_id
+ * @new_uuid: incoming uuid
+ * @old_uuid: reference to the uuid storage location in the namespace object
+ */
+static int namespace_update_uuid(struct nd_region *nd_region,
+		struct device *dev, u8 *new_uuid, u8 **old_uuid)
+{
+	u32 flags = is_namespace_blk(dev) ? NSLABEL_FLAG_LOCAL : 0;
+	struct nd_label_id old_label_id;
+	struct nd_label_id new_label_id;
+	int i, rc;
+
+	rc = nd_is_uuid_unique(dev, new_uuid) ? 0 : -EINVAL;
+	if (rc) {
+		kfree(new_uuid);
+		return rc;
+	}
+
+	if (*old_uuid == NULL)
+		goto out;
+
+	nd_label_gen_id(&old_label_id, *old_uuid, flags);
+	nd_label_gen_id(&new_label_id, new_uuid, flags);
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+		struct resource *res;
+
+		for_each_dpa_resource(ndd, res)
+			if (strcmp(res->name, old_label_id.id) == 0)
+				sprintf((void *) res->name, "%s",
+						new_label_id.id);
+	}
+	kfree(*old_uuid);
+ out:
+	*old_uuid = new_uuid;
+	return 0;
+}
+
+static ssize_t uuid_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t len)
+{
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	u8 *uuid = NULL;
+	u8 **ns_uuid;
+	ssize_t rc;
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		ns_uuid = &nspm->uuid;
+	} else if (is_namespace_blk(dev)) {
+		/* TODO: blk namespace support */
+		return -ENXIO;
+	} else
+		return -ENXIO;
+
+	device_lock(dev);
+	nvdimm_bus_lock(dev);
+	wait_nvdimm_bus_probe_idle(dev);
+	rc = nd_uuid_store(dev, &uuid, buf, len);
+	if (rc >= 0)
+		rc = namespace_update_uuid(nd_region, dev, uuid, ns_uuid);
+	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
+			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	nvdimm_bus_unlock(dev);
+	device_unlock(dev);
+
+	return rc ? rc : len;
+}
+static DEVICE_ATTR_RW(uuid);
+
+static ssize_t resource_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct resource *res;
+
+	if (is_namespace_pmem(dev)) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		res = &nspm->nsio.res;
+	} else if (is_namespace_io(dev)) {
+		struct nd_namespace_io *nsio = to_nd_namespace_io(dev);
+
+		res = &nsio->res;
+	} else
+		return -ENXIO;
+
+	/* no address to convey if the namespace has no allocation */
+	if (resource_size(res) == 0)
+		return -ENXIO;
+	return sprintf(buf, "%#llx\n", (unsigned long long) res->start);
+}
+static DEVICE_ATTR_RO(resource);
+
 static struct attribute *nd_namespace_attributes[] = {
 	&dev_attr_nstype.attr,
+	&dev_attr_size.attr,
+	&dev_attr_uuid.attr,
+	&dev_attr_resource.attr,
+	&dev_attr_alt_name.attr,
 	NULL,
 };
 
+static umode_t namespace_visible(struct kobject *kobj,
+		struct attribute *a, int n)
+{
+	struct device *dev = container_of(kobj, struct device, kobj);
+
+	if (a == &dev_attr_resource.attr) {
+		if (is_namespace_blk(dev))
+			return 0;
+		return a->mode;
+	}
+
+	if (is_namespace_pmem(dev) || is_namespace_blk(dev)) {
+		if (a == &dev_attr_size.attr)
+			return S_IWUSR | S_IRUGO;
+		return a->mode;
+	}
+
+	if (a == &dev_attr_nstype.attr || a == &dev_attr_size.attr)
+		return a->mode;
+
+	return 0;
+}
+
 static struct attribute_group nd_namespace_attribute_group = {
 	.attrs = nd_namespace_attributes,
+	.is_visible = namespace_visible,
 };
 
 static const struct attribute_group *nd_namespace_attribute_groups[] = {
@@ -81,23 +784,318 @@ static struct device **create_namespace_io(struct nd_region *nd_region)
 	return devs;
 }
 
+static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid,
+		u64 cookie, u16 pos)
+{
+	struct nd_namespace_label *found = NULL;
+	int i;
+
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		struct nd_namespace_label *nd_label;
+		bool found_uuid = false;
+		int l;
+
+		for_each_label(l, nd_label, nd_mapping->labels) {
+			u64 isetcookie = __le64_to_cpu(nd_label->isetcookie);
+			u16 position = __le16_to_cpu(nd_label->position);
+			u16 nlabel = __le16_to_cpu(nd_label->nlabel);
+
+			if (isetcookie != cookie)
+				continue;
+
+			if (memcmp(nd_label->uuid, uuid, NSLABEL_UUID_LEN) != 0)
+				continue;
+
+			if (found_uuid) {
+				dev_dbg(to_ndd(nd_mapping)->dev,
+						"%s duplicate entry for uuid\n",
+						__func__);
+				return false;
+			}
+			found_uuid = true;
+			if (nlabel != nd_region->ndr_mappings)
+				continue;
+			if (position != pos)
+				continue;
+			found = nd_label;
+			break;
+		}
+		if (found)
+			break;
+	}
+	return found != NULL;
+}
+
+static int select_pmem_id(struct nd_region *nd_region, u8 *pmem_id)
+{
+	struct nd_namespace_label *select = NULL;
+	int i;
+
+	if (!pmem_id)
+		return -ENODEV;
+
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		struct nd_namespace_label *nd_label;
+		u64 hw_start, hw_end, pmem_start, pmem_end;
+		int l;
+
+		for_each_label(l, nd_label, nd_mapping->labels)
+			if (memcmp(nd_label->uuid, pmem_id, NSLABEL_UUID_LEN) == 0)
+				break;
+
+		if (!nd_label) {
+			WARN_ON(1);
+			return -EINVAL;
+		}
+
+		select = nd_label;
+		/*
+		 * Check that this label is compliant with the dpa
+		 * range published in NFIT
+		 */
+		hw_start = nd_mapping->start;
+		hw_end = hw_start + nd_mapping->size;
+		pmem_start = __le64_to_cpu(select->dpa);
+		pmem_end = pmem_start + __le64_to_cpu(select->rawsize);
+		if (pmem_start == hw_start && pmem_end <= hw_end)
+			/* pass */;
+		else
+			return -EINVAL;
+
+		nd_mapping->labels[0] = select;
+		nd_mapping->labels[1] = NULL;
+	}
+	return 0;
+}
+
+/**
+ * find_pmem_label_set - validate interleave set labelling, retrieve label0
+ * @nd_region: region with mappings to validate
+ */
+static int find_pmem_label_set(struct nd_region *nd_region,
+		struct nd_namespace_pmem *nspm)
+{
+	u64 cookie = nd_region_interleave_set_cookie(nd_region);
+	struct nd_namespace_label *nd_label;
+	u8 select_id[NSLABEL_UUID_LEN];
+	resource_size_t size = 0;
+	u8 *pmem_id = NULL;
+	int rc = -ENODEV, l;
+	u16 i;
+
+	if (cookie == 0)
+		return -ENXIO;
+
+	/*
+	 * Find a complete set of labels by uuid.  By definition we can start
+	 * with any mapping as the reference label
+	 */
+	for_each_label(l, nd_label, nd_region->mapping[0].labels) {
+		u64 isetcookie = __le64_to_cpu(nd_label->isetcookie);
+
+		if (isetcookie != cookie)
+			continue;
+
+		for (i = 0; nd_region->ndr_mappings; i++)
+			if (!has_uuid_at_pos(nd_region, nd_label->uuid,
+						cookie, i))
+				break;
+		if (i < nd_region->ndr_mappings) {
+			/*
+			 * Give up if we don't find an instance of a
+			 * uuid at each position (from 0 to
+			 * nd_region->ndr_mappings - 1), or if we find a
+			 * dimm with two instances of the same uuid.
+			 */
+			rc = -EINVAL;
+			goto err;
+		} else if (pmem_id) {
+			/*
+			 * If there is more than one valid uuid set, we
+			 * need userspace to clean this up.
+			 */
+			rc = -EBUSY;
+			goto err;
+		}
+		memcpy(select_id, nd_label->uuid, NSLABEL_UUID_LEN);
+		pmem_id = select_id;
+	}
+
+	/*
+	 * Fix up each mapping's 'labels' to have the validated pmem label for
+	 * that position at labels[0], and NULL at labels[1].  In the process,
+	 * check that the namespace aligns with interleave-set.  We know
+	 * that it does not overlap with any blk namespaces by virtue of
+	 * the dimm being enabled (i.e. nd_label_reserve_dpa()
+	 * succeeded).
+	 */
+	rc = select_pmem_id(nd_region, pmem_id);
+	if (rc)
+		goto err;
+
+	/* Calculate total size and populate namespace properties from label0 */
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		struct nd_namespace_label *label0 = nd_mapping->labels[0];
+
+		size += __le64_to_cpu(label0->rawsize);
+		if (__le16_to_cpu(label0->position) != 0)
+			continue;
+		WARN_ON(nspm->alt_name || nspm->uuid);
+		nspm->alt_name = kmemdup((void __force *) label0->name,
+				NSLABEL_NAME_LEN, GFP_KERNEL);
+		nspm->uuid = kmemdup((void __force *) label0->uuid,
+				NSLABEL_UUID_LEN, GFP_KERNEL);
+	}
+
+	if (!nspm->alt_name || !nspm->uuid) {
+		rc = -ENOMEM;
+		goto err;
+	}
+
+	nd_namespace_pmem_set_size(nd_region, nspm, size);
+
+	return 0;
+ err:
+	switch (rc) {
+	case -EINVAL:
+		dev_dbg(&nd_region->dev, "%s: invalid label(s)\n", __func__);
+		break;
+	case -ENODEV:
+		dev_dbg(&nd_region->dev, "%s: label not found\n", __func__);
+		break;
+	default:
+		dev_dbg(&nd_region->dev, "%s: unexpected err: %d\n",
+				__func__, rc);
+		break;
+	}
+	return rc;
+}
+
+static struct device **create_namespace_pmem(struct nd_region *nd_region)
+{
+	struct nd_namespace_pmem *nspm;
+	struct device *dev, **devs;
+	struct resource *res;
+	int rc;
+
+	nspm = kzalloc(sizeof(*nspm), GFP_KERNEL);
+	if (!nspm)
+		return NULL;
+
+	dev = &nspm->nsio.dev;
+	dev->type = &namespace_pmem_device_type;
+	dev->parent = &nd_region->dev;
+	res = &nspm->nsio.res;
+	res->name = dev_name(&nd_region->dev);
+	res->flags = IORESOURCE_MEM;
+	rc = find_pmem_label_set(nd_region, nspm);
+	if (rc == -ENODEV) {
+		int i;
+
+		/* Pass, try to permit namespace creation... */
+		for (i = 0; i < nd_region->ndr_mappings; i++) {
+			struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+
+			kfree(nd_mapping->labels);
+			nd_mapping->labels = NULL;
+		}
+
+		/* Publish a zero-sized namespace for userspace to configure. */
+		nd_namespace_pmem_set_size(nd_region, nspm, 0);
+
+		rc = 0;
+	} else if (rc)
+		goto err;
+
+	devs = kcalloc(2, sizeof(struct device *), GFP_KERNEL);
+	if (!devs)
+		goto err;
+
+	devs[0] = dev;
+	return devs;
+
+ err:
+	namespace_pmem_release(&nspm->nsio.dev);
+	return NULL;
+}
+
+static int init_active_labels(struct nd_region *nd_region)
+{
+	int i;
+
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+		struct nvdimm *nvdimm = nd_mapping->nvdimm;
+		int count, j;
+
+		/*
+		 * If the dimm is disabled then prevent the region from
+		 * being activated if it aliases DPA.
+		 */
+		if (!ndd) {
+			if ((nvdimm->flags & NDD_ALIASING) == 0)
+				return 0;
+			dev_dbg(&nd_region->dev, "%s: is disabled, failing probe\n",
+					dev_name(&nd_mapping->nvdimm->dev));
+			return -ENXIO;
+		}
+		nd_mapping->ndd = ndd;
+		atomic_inc(&nvdimm->busy);
+		get_ndd(ndd);
+
+		count = nd_label_active_count(ndd);
+		dev_dbg(ndd->dev, "%s: %d\n", __func__, count);
+		if (!count)
+			continue;
+		nd_mapping->labels = kcalloc(count + 1, sizeof(void *),
+				GFP_KERNEL);
+		if (!nd_mapping->labels)
+			return -ENOMEM;
+		for (j = 0; j < count; j++) {
+			struct nd_namespace_label *label;
+
+			label = nd_label_active(ndd, j);
+			nd_mapping->labels[j] = label;
+		}
+	}
+
+	return 0;
+}
+
 int nd_region_register_namespaces(struct nd_region *nd_region, int *err)
 {
 	struct device **devs = NULL;
-	int i;
+	int i, rc = 0, type;
 
 	*err = 0;
-	switch (nd_region_to_nstype(nd_region)) {
+	nvdimm_bus_lock(&nd_region->dev);
+	rc = init_active_labels(nd_region);
+	if (rc) {
+		nvdimm_bus_unlock(&nd_region->dev);
+		return rc;
+	}
+
+	type = nd_region_to_nstype(nd_region);
+	switch (type) {
 	case ND_DEVICE_NAMESPACE_IO:
 		devs = create_namespace_io(nd_region);
 		break;
+	case ND_DEVICE_NAMESPACE_PMEM:
+		devs = create_namespace_pmem(nd_region);
+		break;
 	default:
 		break;
 	}
+	nvdimm_bus_unlock(&nd_region->dev);
 
 	if (!devs)
 		return -ENODEV;
 
+	nd_region->ns_seed = devs[0];
 	for (i = 0; devs[i]; i++) {
 		struct device *dev = devs[i];
 
diff --git a/drivers/nvdimm/nd-core.h b/drivers/nvdimm/nd-core.h
index 6a4b2c066ee7..c6c889292bab 100644
--- a/drivers/nvdimm/nd-core.h
+++ b/drivers/nvdimm/nd-core.h
@@ -56,4 +56,16 @@ int nvdimm_bus_register_dimms(struct nvdimm_bus *nvdimm_bus);
 int nvdimm_bus_register_regions(struct nvdimm_bus *nvdimm_bus);
 int nvdimm_bus_init_interleave_sets(struct nvdimm_bus *nvdimm_bus);
 int nd_match_dimm(struct device *dev, void *data);
+struct nd_label_id;
+char *nd_label_gen_id(struct nd_label_id *label_id, u8 *uuid, u32 flags);
+bool nd_is_uuid_unique(struct device *dev, u8 *uuid);
+struct nd_region;
+struct nvdimm_drvdata;
+struct nd_mapping;
+resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
+		struct nd_mapping *nd_mapping, resource_size_t *overlap);
+resource_size_t nd_region_available_dpa(struct nd_region *nd_region);
+resource_size_t nvdimm_allocated_dpa(struct nvdimm_drvdata *ndd,
+		struct nd_label_id *label_id);
+void get_ndd(struct nvdimm_drvdata *ndd);
 #endif /* __ND_CORE_H__ */
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 401fa0d5b6ea..03e610cd9f43 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -16,6 +16,7 @@
 #include <linux/device.h>
 #include <linux/mutex.h>
 #include <linux/ndctl.h>
+#include <linux/types.h>
 #include "label.h"
 
 struct nvdimm_drvdata {
@@ -25,6 +26,7 @@ struct nvdimm_drvdata {
 	void *data;
 	int ns_current, ns_next;
 	struct resource dpa;
+	struct kref kref;
 };
 
 struct nd_region_namespaces {
@@ -59,12 +61,19 @@ static inline struct nd_namespace_index *to_next_namespace_index(
 		(unsigned long long) (res ? resource_size(res) : 0), \
 		(unsigned long long) (res ? res->start : 0), ##arg)
 
+#define for_each_label(l, label, labels) \
+	for (l = 0; (label = labels ? labels[l] : NULL); l++)
+
+#define for_each_dpa_resource(ndd, res) \
+	for (res = (ndd)->dpa.child; res; res = res->sibling)
+
 #define for_each_dpa_resource_safe(ndd, res, next) \
 	for (res = (ndd)->dpa.child, next = res ? res->sibling : NULL; \
 			res; res = next, next = next ? next->sibling : NULL)
 
 struct nd_region {
 	struct device dev;
+	struct device *ns_seed;
 	u16 ndr_mappings;
 	u64 ndr_size;
 	u64 ndr_start;
@@ -88,20 +97,28 @@ enum nd_async_mode {
 	ND_ASYNC,
 };
 
+void wait_nvdimm_bus_probe_idle(struct device *dev);
 void nd_device_register(struct device *dev);
 void nd_device_unregister(struct device *dev, enum nd_async_mode mode);
+int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf,
+		size_t len);
 int __init nvdimm_init(void);
 int __init nd_region_init(void);
 void nvdimm_exit(void);
 void nd_region_exit(void);
+struct nvdimm;
+struct nvdimm_drvdata *to_ndd(struct nd_mapping *nd_mapping);
 int nvdimm_init_nsarea(struct nvdimm_drvdata *ndd);
 int nvdimm_init_config_data(struct nvdimm_drvdata *ndd);
 struct nd_region *to_nd_region(struct device *dev);
 int nd_region_to_nstype(struct nd_region *nd_region);
 int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
+u64 nd_region_interleave_set_cookie(struct nd_region *nd_region);
 void nvdimm_bus_lock(struct device *dev);
 void nvdimm_bus_unlock(struct device *dev);
 bool is_nvdimm_bus_locked(struct device *dev);
+void nvdimm_drvdata_release(struct kref *kref);
+void put_ndd(struct nvdimm_drvdata *ndd);
 int nd_label_reserve_dpa(struct nvdimm_drvdata *ndd);
 void nvdimm_free_dpa(struct nvdimm_drvdata *ndd, struct resource *res);
 struct resource *nvdimm_allocate_dpa(struct nvdimm_drvdata *ndd,
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index d46975ed9e40..90902a142e35 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -203,6 +203,23 @@ static int nd_pmem_probe(struct device *dev)
 	struct nd_namespace_io *nsio = to_nd_namespace_io(dev);
 	struct pmem_device *pmem;
 
+	if (resource_size(&nsio->res) < ND_MIN_NAMESPACE_SIZE) {
+		resource_size_t size = resource_size(&nsio->res);
+
+		dev_dbg(dev, "%s: size: %pa, too small must be at least %#x\n",
+				__func__, &size, ND_MIN_NAMESPACE_SIZE);
+		return -ENODEV;
+	}
+
+	if (nd_region_to_nstype(nd_region) == ND_DEVICE_NAMESPACE_PMEM) {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		if (!nspm->uuid) {
+			dev_dbg(dev, "%s: uuid not set\n", __func__);
+			return -ENODEV;
+		}
+	}
+
 	pmem = pmem_alloc(dev, &nsio->res, nd_region->id);
 	if (IS_ERR(pmem))
 		return PTR_ERR(pmem);
@@ -222,13 +239,14 @@ static int nd_pmem_remove(struct device *dev)
 
 MODULE_ALIAS("pmem");
 MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_IO);
+MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_PMEM);
 static struct nd_device_driver nd_pmem_driver = {
 	.probe = nd_pmem_probe,
 	.remove = nd_pmem_remove,
 	.drv = {
 		.name = "nd_pmem",
 	},
-	.type = ND_DRIVER_NAMESPACE_IO,
+	.type = ND_DRIVER_NAMESPACE_IO | ND_DRIVER_NAMESPACE_PMEM,
 };
 
 static int __init pmem_init(void)
diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
index ade3dba81afd..9aba44e483e0 100644
--- a/drivers/nvdimm/region.c
+++ b/drivers/nvdimm/region.c
@@ -61,8 +61,11 @@ static int child_unregister(struct device *dev, void *data)
 
 static int nd_region_remove(struct device *dev)
 {
+	struct nd_region *nd_region = to_nd_region(dev);
+
 	/* flush attribute readers and disable */
 	nvdimm_bus_lock(dev);
+	nd_region->ns_seed = NULL;
 	dev_set_drvdata(dev, NULL);
 	nvdimm_bus_unlock(dev);
 
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 1571424578f0..b45806f7176d 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 #include <linux/sort.h>
 #include <linux/io.h>
+#include <linux/nd.h>
 #include "nd-core.h"
 #include "nd.h"
 
@@ -99,6 +100,58 @@ int nd_region_to_nstype(struct nd_region *nd_region)
 
 	return 0;
 }
+EXPORT_SYMBOL(nd_region_to_nstype);
+
+static int is_uuid_busy(struct device *dev, void *data)
+{
+	struct nd_region *nd_region = to_nd_region(dev->parent);
+	u8 *uuid = data;
+
+	switch (nd_region_to_nstype(nd_region)) {
+	case ND_DEVICE_NAMESPACE_PMEM: {
+		struct nd_namespace_pmem *nspm = to_nd_namespace_pmem(dev);
+
+		if (!nspm->uuid)
+			break;
+		if (memcmp(uuid, nspm->uuid, NSLABEL_UUID_LEN) == 0)
+			return -EBUSY;
+		break;
+	}
+	case ND_DEVICE_NAMESPACE_BLK: {
+		/* TODO: blk namespace support */
+		break;
+	}
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int is_namespace_uuid_busy(struct device *dev, void *data)
+{
+	if (is_nd_pmem(dev) || is_nd_blk(dev))
+		return device_for_each_child(dev, data, is_uuid_busy);
+	return 0;
+}
+
+/**
+ * nd_is_uuid_unique - verify that no other namespace has @uuid
+ * @dev: any device on a nvdimm_bus
+ * @uuid: uuid to check
+ */
+bool nd_is_uuid_unique(struct device *dev, u8 *uuid)
+{
+	struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
+
+	if (!nvdimm_bus)
+		return false;
+	WARN_ON_ONCE(!is_nvdimm_bus_locked(&nvdimm_bus->dev));
+	if (device_for_each_child(&nvdimm_bus->dev, uuid,
+				is_namespace_uuid_busy) != 0)
+		return false;
+	return true;
+}
 
 static ssize_t size_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
@@ -151,6 +204,60 @@ static ssize_t set_cookie_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(set_cookie);
 
+resource_size_t nd_region_available_dpa(struct nd_region *nd_region)
+{
+	resource_size_t blk_max_overlap = 0, available, overlap;
+	int i;
+
+	WARN_ON(!is_nvdimm_bus_locked(&nd_region->dev));
+
+ retry:
+	available = 0;
+	overlap = blk_max_overlap;
+	for (i = 0; i < nd_region->ndr_mappings; i++) {
+		struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+		struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
+
+		/* if a dimm is disabled the available capacity is zero */
+		if (!ndd)
+			return 0;
+
+		if (is_nd_pmem(&nd_region->dev)) {
+			available += nd_pmem_available_dpa(nd_region,
+					nd_mapping, &overlap);
+			if (overlap > blk_max_overlap) {
+				blk_max_overlap = overlap;
+				goto retry;
+			}
+		} else if (is_nd_blk(&nd_region->dev)) {
+			/* TODO: BLK Namespace support */
+		}
+	}
+
+	return available;
+}
+
+static ssize_t available_size_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region *nd_region = to_nd_region(dev);
+	unsigned long long available = 0;
+
+	/*
+	 * Flush in-flight updates and grab a snapshot of the available
+	 * size.  Of course, this value is potentially invalidated the
+	 * memory nvdimm_bus_lock() is dropped, but that's userspace's
+	 * problem to not race itself.
+	 */
+	nvdimm_bus_lock(dev);
+	wait_nvdimm_bus_probe_idle(dev);
+	available = nd_region_available_dpa(nd_region);
+	nvdimm_bus_unlock(dev);
+
+	return sprintf(buf, "%llu\n", available);
+}
+static DEVICE_ATTR_RO(available_size);
+
 static ssize_t init_namespaces_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
@@ -168,11 +275,29 @@ static ssize_t init_namespaces_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(init_namespaces);
 
+static ssize_t namespace_seed_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct nd_region *nd_region = to_nd_region(dev);
+	ssize_t rc;
+
+	nvdimm_bus_lock(dev);
+	if (nd_region->ns_seed)
+		rc = sprintf(buf, "%s\n", dev_name(nd_region->ns_seed));
+	else
+		rc = sprintf(buf, "\n");
+	nvdimm_bus_unlock(dev);
+	return rc;
+}
+static DEVICE_ATTR_RO(namespace_seed);
+
 static struct attribute *nd_region_attributes[] = {
 	&dev_attr_size.attr,
 	&dev_attr_nstype.attr,
 	&dev_attr_mappings.attr,
 	&dev_attr_set_cookie.attr,
+	&dev_attr_available_size.attr,
+	&dev_attr_namespace_seed.attr,
 	&dev_attr_init_namespaces.attr,
 	NULL,
 };
@@ -182,12 +307,18 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n)
 	struct device *dev = container_of(kobj, typeof(*dev), kobj);
 	struct nd_region *nd_region = to_nd_region(dev);
 	struct nd_interleave_set *nd_set = nd_region->nd_set;
+	int type = nd_region_to_nstype(nd_region);
 
-	if (a != &dev_attr_set_cookie.attr)
+	if (a != &dev_attr_set_cookie.attr
+			&& a != &dev_attr_available_size.attr)
 		return a->mode;
 
-	if (is_nd_pmem(dev) && nd_set)
-			return a->mode;
+	if ((type == ND_DEVICE_NAMESPACE_PMEM
+				|| type == ND_DEVICE_NAMESPACE_BLK)
+			&& a == &dev_attr_available_size.attr)
+		return a->mode;
+	else if (is_nd_pmem(dev) && nd_set)
+		return a->mode;
 
 	return 0;
 }
@@ -198,6 +329,15 @@ struct attribute_group nd_region_attribute_group = {
 };
 EXPORT_SYMBOL_GPL(nd_region_attribute_group);
 
+u64 nd_region_interleave_set_cookie(struct nd_region *nd_region)
+{
+	struct nd_interleave_set *nd_set = nd_region->nd_set;
+
+	if (nd_set)
+		return nd_set->cookie;
+	return 0;
+}
+
 /*
  * Upon successful probe/remove, take/release a reference on the
  * associated interleave set (if present)
@@ -205,18 +345,20 @@ EXPORT_SYMBOL_GPL(nd_region_attribute_group);
 static void nd_region_notify_driver_action(struct nvdimm_bus *nvdimm_bus,
 		struct device *dev, bool probe)
 {
-	if (is_nd_pmem(dev) || is_nd_blk(dev)) {
+	if (!probe && (is_nd_pmem(dev) || is_nd_blk(dev))) {
 		struct nd_region *nd_region = to_nd_region(dev);
 		int i;
 
 		for (i = 0; i < nd_region->ndr_mappings; i++) {
 			struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+			struct nvdimm_drvdata *ndd = nd_mapping->ndd;
 			struct nvdimm *nvdimm = nd_mapping->nvdimm;
 
-			if (probe)
-				atomic_inc(&nvdimm->busy);
-			else
-				atomic_dec(&nvdimm->busy);
+			kfree(nd_mapping->labels);
+			nd_mapping->labels = NULL;
+			put_ndd(ndd);
+			nd_mapping->ndd = NULL;
+			atomic_dec(&nvdimm->busy);
 		}
 	}
 }
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 1b627b109360..c130972e08c4 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -41,10 +41,20 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc,
 		struct nvdimm *nvdimm, unsigned int cmd, void *buf,
 		unsigned int buf_len);
 
+struct nd_namespace_label;
+struct nvdimm_drvdata;
 struct nd_mapping {
 	struct nvdimm *nvdimm;
+	struct nd_namespace_label **labels;
 	u64 start;
 	u64 size;
+	/*
+	 * @ndd is for private use at region enable / disable time for
+	 * get_ndd() + put_ndd(), all other nd_mapping to ndd
+	 * conversions use to_ndd() which respects enabled state of the
+	 * nvdimm.
+	 */
+	struct nvdimm_drvdata *ndd;
 };
 
 struct nvdimm_bus_descriptor {
diff --git a/include/linux/nd.h b/include/linux/nd.h
index da70e9962197..255c38a83083 100644
--- a/include/linux/nd.h
+++ b/include/linux/nd.h
@@ -28,16 +28,40 @@ static inline struct nd_device_driver *to_nd_device_driver(
 	return container_of(drv, struct nd_device_driver, drv);
 };
 
+/**
+ * struct nd_namespace_io - infrastructure for loading an nd_pmem instance
+ * @dev: namespace device created by the nd region driver
+ * @res: struct resource conversion of a NFIT SPA table
+ */
 struct nd_namespace_io {
 	struct device dev;
 	struct resource res;
 };
 
+/**
+ * struct nd_namespace_pmem - namespace device for dimm-backed interleaved memory
+ * @nsio: device and system physical address range to drive
+ * @alt_name: namespace name supplied in the dimm label
+ * @uuid: namespace name supplied in the dimm label
+ */
+struct nd_namespace_pmem {
+	struct nd_namespace_io nsio;
+	char *alt_name;
+	u8 *uuid;
+};
+
 static inline struct nd_namespace_io *to_nd_namespace_io(struct device *dev)
 {
 	return container_of(dev, struct nd_namespace_io, dev);
 }
 
+static inline struct nd_namespace_pmem *to_nd_namespace_pmem(struct device *dev)
+{
+	struct nd_namespace_io *nsio = to_nd_namespace_io(dev);
+
+	return container_of(nsio, struct nd_namespace_pmem, nsio);
+}
+
 #define MODULE_ALIAS_ND_DEVICE(type) \
 	MODULE_ALIAS("nd:t" __stringify(type) "*")
 #define ND_DEVICE_MODALIAS_FMT "nd:t%d"
diff --git a/include/uapi/linux/ndctl.h b/include/uapi/linux/ndctl.h
index 1357a87b8714..2b94ea2287bb 100644
--- a/include/uapi/linux/ndctl.h
+++ b/include/uapi/linux/ndctl.h
@@ -190,4 +190,8 @@ enum nd_driver_flags {
 	ND_DRIVER_NAMESPACE_PMEM  = 1 << ND_DEVICE_NAMESPACE_PMEM,
 	ND_DRIVER_NAMESPACE_BLK   = 1 << ND_DEVICE_NAMESPACE_BLK,
 };
+
+enum {
+	ND_MIN_NAMESPACE_SIZE = 0x00400000,
+};
 #endif /* __NDCTL_H__ */
-- 
cgit 


From 5532fba2061a203fa05dab87ae3f4213cd52bb8e Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Fri, 26 Jun 2015 10:03:14 -0400
Subject: dm: bump the ioctl version to 4.32.0

This fix enables userspace to detect that the dm-stats changes from the
4.2 merge are in place.

Reported-by: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 include/uapi/linux/dm-ioctl.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h
index eac8c3641f39..061aca3a962d 100644
--- a/include/uapi/linux/dm-ioctl.h
+++ b/include/uapi/linux/dm-ioctl.h
@@ -267,9 +267,9 @@ enum {
 #define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	31
+#define DM_VERSION_MINOR	32
 #define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2015-3-12)"
+#define DM_VERSION_EXTRA	"-ioctl (2015-6-26)"
 
 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */
-- 
cgit 


From 279c6c7fa64f5763e6b9f05e7ab3840092e702e7 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <stephen@networkplumber.org>
Date: Mon, 29 Jun 2015 14:57:48 -1000
Subject: api: fix compatibility of linux/in.h with netinet/in.h

u
This fixes breakage to iproute2 build with recent kernel headers
caused by:
   commit a263653ed798216c0069922d7b5237ca49436007
   Author: Pablo Neira Ayuso <pablo@netfilter.org>
   Date:   Wed Jun 17 10:28:27 2015 -0500

   netfilter: don't pull include/linux/netfilter.h from netns headers

The issue is that definitions in linux/in.h overlap with those
in netinet/in.h. This patch solves this by introducing the same
mechanism as was used to solve the same problem with linux/in6.h

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/in.h          | 16 +++++++++++++---
 include/uapi/linux/libc-compat.h | 22 ++++++++++++++++++++++
 2 files changed, 35 insertions(+), 3 deletions(-)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index 83d6236a2f08..eaf94919291a 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -19,8 +19,10 @@
 #define _UAPI_LINUX_IN_H
 
 #include <linux/types.h>
+#include <linux/libc-compat.h>
 #include <linux/socket.h>
 
+#if __UAPI_DEF_IN_IPPROTO
 /* Standard well-defined IP protocols.  */
 enum {
   IPPROTO_IP = 0,		/* Dummy protocol for TCP		*/
@@ -75,12 +77,14 @@ enum {
 #define IPPROTO_RAW		IPPROTO_RAW
   IPPROTO_MAX
 };
+#endif
 
-
+#if __UAPI_DEF_IN_ADDR
 /* Internet address. */
 struct in_addr {
 	__be32	s_addr;
 };
+#endif
 
 #define IP_TOS		1
 #define IP_TTL		2
@@ -158,6 +162,7 @@ struct in_addr {
 
 /* Request struct for multicast socket ops */
 
+#if __UAPI_DEF_IP_MREQ
 struct ip_mreq  {
 	struct in_addr imr_multiaddr;	/* IP multicast address of group */
 	struct in_addr imr_interface;	/* local IP address of interface */
@@ -209,14 +214,18 @@ struct group_filter {
 #define GROUP_FILTER_SIZE(numsrc) \
 	(sizeof(struct group_filter) - sizeof(struct __kernel_sockaddr_storage) \
 	+ (numsrc) * sizeof(struct __kernel_sockaddr_storage))
+#endif
 
+#if __UAPI_DEF_IN_PKTINFO
 struct in_pktinfo {
 	int		ipi_ifindex;
 	struct in_addr	ipi_spec_dst;
 	struct in_addr	ipi_addr;
 };
+#endif
 
 /* Structure describing an Internet (IP) socket address. */
+#if  __UAPI_DEF_SOCKADDR_IN
 #define __SOCK_SIZE__	16		/* sizeof(struct sockaddr)	*/
 struct sockaddr_in {
   __kernel_sa_family_t	sin_family;	/* Address family		*/
@@ -228,8 +237,9 @@ struct sockaddr_in {
 			sizeof(unsigned short int) - sizeof(struct in_addr)];
 };
 #define sin_zero	__pad		/* for BSD UNIX comp. -FvK	*/
+#endif
 
-
+#if __UAPI_DEF_IN_CLASS
 /*
  * Definitions of the bits in an Internet address integer.
  * On subnets, host and network parts are found according
@@ -280,7 +290,7 @@ struct sockaddr_in {
 #define INADDR_ALLHOSTS_GROUP 	0xe0000001U	/* 224.0.0.1   */
 #define INADDR_ALLRTRS_GROUP    0xe0000002U	/* 224.0.0.2 */
 #define INADDR_MAX_LOCAL_GROUP  0xe00000ffU	/* 224.0.0.255 */
-
+#endif
 
 /* <asm/byteorder.h> contains the htonl type stuff.. */
 #include <asm/byteorder.h> 
diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h
index fa673e9cc040..7d024ceb075d 100644
--- a/include/uapi/linux/libc-compat.h
+++ b/include/uapi/linux/libc-compat.h
@@ -56,6 +56,13 @@
 
 /* GLIBC headers included first so don't define anything
  * that would already be defined. */
+#define __UAPI_DEF_IN_ADDR		0
+#define __UAPI_DEF_IN_IPPROTO		0
+#define __UAPI_DEF_IN_PKTINFO		0
+#define __UAPI_DEF_IP_MREQ		0
+#define __UAPI_DEF_SOCKADDR_IN		0
+#define __UAPI_DEF_IN_CLASS		0
+
 #define __UAPI_DEF_IN6_ADDR		0
 /* The exception is the in6_addr macros which must be defined
  * if the glibc code didn't define them. This guard matches
@@ -78,6 +85,13 @@
 /* Linux headers included first, and we must define everything
  * we need. The expectation is that glibc will check the
  * __UAPI_DEF_* defines and adjust appropriately. */
+#define __UAPI_DEF_IN_ADDR		1
+#define __UAPI_DEF_IN_IPPROTO		1
+#define __UAPI_DEF_IN_PKTINFO		1
+#define __UAPI_DEF_IP_MREQ		1
+#define __UAPI_DEF_SOCKADDR_IN		1
+#define __UAPI_DEF_IN_CLASS		1
+
 #define __UAPI_DEF_IN6_ADDR		1
 /* We unconditionally define the in6_addr macros and glibc must
  * coordinate. */
@@ -103,6 +117,14 @@
  * that we need. */
 #else /* !defined(__GLIBC__) */
 
+/* Definitions for in.h */
+#define __UAPI_DEF_IN_ADDR		1
+#define __UAPI_DEF_IN_IPPROTO		1
+#define __UAPI_DEF_IN_PKTINFO		1
+#define __UAPI_DEF_IP_MREQ		1
+#define __UAPI_DEF_SOCKADDR_IN		1
+#define __UAPI_DEF_IN_CLASS		1
+
 /* Definitions for in6.h */
 #define __UAPI_DEF_IN6_ADDR		1
 #define __UAPI_DEF_IN6_ADDR_ALT		1
-- 
cgit 


From 00c570f4ba43ae73b41fa0a2269c3b0ac20386ef Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Wed, 1 Jul 2015 16:26:08 +0200
Subject: fuse: device fd clone

Allow an open fuse device to be "cloned".  Userspace can create a clone by:

      newfd = open("/dev/fuse", O_RDWR)
      ioctl(newfd, FUSE_DEV_IOC_CLONE, &oldfd);

At this point newfd will refer to the same fuse connection as oldfd.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Reviewed-by: Ashish Samant <ashish.samant@oracle.com>
---
 Documentation/ioctl/ioctl-number.txt |  1 +
 fs/fuse/dev.c                        | 40 ++++++++++++++++++++++++++++++++++++
 include/uapi/linux/fuse.h            |  3 +++
 3 files changed, 44 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/Documentation/ioctl/ioctl-number.txt b/Documentation/ioctl/ioctl-number.txt
index 51f4221657bf..611c52267d24 100644
--- a/Documentation/ioctl/ioctl-number.txt
+++ b/Documentation/ioctl/ioctl-number.txt
@@ -321,6 +321,7 @@ Code  Seq#(hex)	Include File		Comments
 0xDB	00-0F	drivers/char/mwave/mwavepub.h
 0xDD	00-3F	ZFCP device driver	see drivers/s390/scsi/
 					<mailto:aherrman@de.ibm.com>
+0xE5	00-3F	linux/fuse.h
 0xEC	00-01	drivers/platform/chrome/cros_ec_dev.h	ChromeOS EC driver
 0xF3	00-3F	drivers/usb/misc/sisusbvga/sisusb.h	sisfb (in development)
 					<mailto:thomas@winischhofer.net>
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index dcfc87172a47..d405c1fa4618 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -2197,6 +2197,44 @@ static int fuse_dev_fasync(int fd, struct file *file, int on)
 	return fasync_helper(fd, file, on, &fc->iq.fasync);
 }
 
+static int fuse_device_clone(struct fuse_conn *fc, struct file *new)
+{
+	if (new->private_data)
+		return -EINVAL;
+
+	new->private_data = fuse_conn_get(fc);
+
+	return 0;
+}
+
+static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
+			   unsigned long arg)
+{
+	int err = -ENOTTY;
+
+	if (cmd == FUSE_DEV_IOC_CLONE) {
+		int oldfd;
+
+		err = -EFAULT;
+		if (!get_user(oldfd, (__u32 __user *) arg)) {
+			struct file *old = fget(oldfd);
+
+			err = -EINVAL;
+			if (old) {
+				struct fuse_conn *fc = fuse_get_conn(old);
+
+				if (fc) {
+					mutex_lock(&fuse_mutex);
+					err = fuse_device_clone(fc, file);
+					mutex_unlock(&fuse_mutex);
+				}
+				fput(old);
+			}
+		}
+	}
+	return err;
+}
+
 const struct file_operations fuse_dev_operations = {
 	.owner		= THIS_MODULE,
 	.open		= fuse_dev_open,
@@ -2208,6 +2246,8 @@ const struct file_operations fuse_dev_operations = {
 	.poll		= fuse_dev_poll,
 	.release	= fuse_dev_release,
 	.fasync		= fuse_dev_fasync,
+	.unlocked_ioctl = fuse_dev_ioctl,
+	.compat_ioctl   = fuse_dev_ioctl,
 };
 EXPORT_SYMBOL_GPL(fuse_dev_operations);
 
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 25084a052a1e..c9aca042e61d 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -755,4 +755,7 @@ struct fuse_notify_retrieve_in {
 	uint64_t	dummy4;
 };
 
+/* Device ioctls: */
+#define FUSE_DEV_IOC_CLONE	_IOR(229, 0, uint32_t)
+
 #endif /* _LINUX_FUSE_H */
-- 
cgit 


From 974d7af5fcc295dcf8315255142b2fe44fd74b0c Mon Sep 17 00:00:00 2001
From: Andy Gospodarek <gospo@cumulusnetworks.com>
Date: Tue, 7 Jul 2015 13:56:57 -0400
Subject: ipv4: add support for linkdown sysctl to netconf

This kernel patch exports the value of the new
ignore_routes_with_linkdown via netconf.

v2: changes to notify userspace via netlink when sysctl values change
and proposed for 'net' since this could be considered a bugfix

Signed-off-by: Andy Gospodarek <gospo@cumulusnetworks.com>
Suggested-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/netconf.h |  1 +
 net/ipv4/devinet.c           | 13 +++++++++++++
 2 files changed, 14 insertions(+)

(limited to 'include/uapi/linux')

diff --git a/include/uapi/linux/netconf.h b/include/uapi/linux/netconf.h
index 669a1f0b1d97..23cbd34e4ac7 100644
--- a/include/uapi/linux/netconf.h
+++ b/include/uapi/linux/netconf.h
@@ -15,6 +15,7 @@ enum {
 	NETCONFA_RP_FILTER,
 	NETCONFA_MC_FORWARDING,
 	NETCONFA_PROXY_NEIGH,
+	NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
 	__NETCONFA_MAX
 };
 #define NETCONFA_MAX	(__NETCONFA_MAX - 1)
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 7498716e8f54..e813196c91c7 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1740,6 +1740,8 @@ static int inet_netconf_msgsize_devconf(int type)
 		size += nla_total_size(4);
 	if (type == -1 || type == NETCONFA_PROXY_NEIGH)
 		size += nla_total_size(4);
+	if (type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN)
+		size += nla_total_size(4);
 
 	return size;
 }
@@ -1780,6 +1782,10 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH,
 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
 		goto nla_put_failure;
+	if ((type == -1 || type == NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN) &&
+	    nla_put_s32(skb, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
+			IPV4_DEVCONF(*devconf, IGNORE_ROUTES_WITH_LINKDOWN)) < 0)
+		goto nla_put_failure;
 
 	nlmsg_end(skb, nlh);
 	return 0;
@@ -1819,6 +1825,7 @@ static const struct nla_policy devconf_ipv4_policy[NETCONFA_MAX+1] = {
 	[NETCONFA_FORWARDING]	= { .len = sizeof(int) },
 	[NETCONFA_RP_FILTER]	= { .len = sizeof(int) },
 	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },
+	[NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN]	= { .len = sizeof(int) },
 };
 
 static int inet_netconf_get_devconf(struct sk_buff *in_skb,
@@ -2048,6 +2055,12 @@ static int devinet_conf_proc(struct ctl_table *ctl, int write,
 			inet_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH,
 						    ifindex, cnf);
 		}
+		if (i == IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN - 1 &&
+		    new_value != old_value) {
+			ifindex = devinet_conf_ifindex(net, cnf);
+			inet_netconf_notify_devconf(net, NETCONFA_IGNORE_ROUTES_WITH_LINKDOWN,
+						    ifindex, cnf);
+		}
 	}
 
 	return ret;
-- 
cgit