From a9aa53df6e6c768fc0f25a7c80ba586b0290720a Mon Sep 17 00:00:00 2001
From: Simo Sorce <simo@redhat.com>
Date: Thu, 29 Mar 2012 19:18:19 -0400
Subject: svcauth: remove unused define

Signed-off-by: Simo Sorce <simo@redhat.com>
---
 include/linux/sunrpc/svcauth.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index 548790e9113b..2e2af101b59c 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -16,7 +16,6 @@
 #include <linux/sunrpc/cache.h>
 #include <linux/hash.h>
 
-#define SVC_CRED_NGROUPS	32
 struct svc_cred {
 	uid_t			cr_uid;
 	gid_t			cr_gid;
-- 
cgit 


From db3a35326362624dd4d8473e676d63afa52bedcc Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Wed, 28 Mar 2012 19:09:08 +0400
Subject: nfsd: add link to owner cache detail to svc_export structure

Without info about owner cache datail it won't be able to find out, which
per-net cache detail have to be.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/export.c            | 10 +++++-----
 include/linux/nfsd/export.h |  1 +
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 71c5ce35a1a5..99ea4c00240c 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -525,6 +525,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
 		goto out1;
 
 	exp.ex_client = dom;
+	exp.cd = cd;
 
 	/* expiry */
 	err = -EINVAL;
@@ -672,6 +673,7 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem)
 	new->ex_fslocs.locations = NULL;
 	new->ex_fslocs.locations_count = 0;
 	new->ex_fslocs.migrated = 0;
+	new->cd = item->cd;
 }
 
 static void export_update(struct cache_head *cnew, struct cache_head *citem)
@@ -739,8 +741,7 @@ svc_export_lookup(struct svc_export *exp)
 	struct cache_head *ch;
 	int hash = svc_export_hash(exp);
 
-	ch = sunrpc_cache_lookup(&svc_export_cache, &exp->h,
-				 hash);
+	ch = sunrpc_cache_lookup(exp->cd, &exp->h, hash);
 	if (ch)
 		return container_of(ch, struct svc_export, h);
 	else
@@ -753,9 +754,7 @@ svc_export_update(struct svc_export *new, struct svc_export *old)
 	struct cache_head *ch;
 	int hash = svc_export_hash(old);
 
-	ch = sunrpc_cache_update(&svc_export_cache, &new->h,
-				 &old->h,
-				 hash);
+	ch = sunrpc_cache_update(old->cd, &new->h, &old->h, hash);
 	if (ch)
 		return container_of(ch, struct svc_export, h);
 	else
@@ -797,6 +796,7 @@ static svc_export *exp_get_by_name(svc_client *clp, const struct path *path,
 
 	key.ex_client = clp;
 	key.ex_path = *path;
+	key.cd = &svc_export_cache;
 
 	exp = svc_export_lookup(&key);
 	if (exp == NULL)
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index f85308e688fd..64455292bbba 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -103,6 +103,7 @@ struct svc_export {
 	struct nfsd4_fs_locations ex_fslocs;
 	int			ex_nflavors;
 	struct exp_flavor_info	ex_flavors[MAX_SECINFO_LIST];
+	struct cache_detail	*cd;
 };
 
 /* an "export key" (expkey) maps a filehandlefragement to an
-- 
cgit 


From 71234978e81ee515c8025d087a197561b311c183 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Wed, 28 Mar 2012 19:09:15 +0400
Subject: nfsd: use cache detail pointer from svc_export structure on cache put

Hard-coded pointer is redundant now and can be replaced.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/nfsd/export.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 64455292bbba..485c2afa96f7 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -147,7 +147,7 @@ extern struct cache_detail svc_export_cache;
 
 static inline void exp_put(struct svc_export *exp)
 {
-	cache_put(&exp->h, &svc_export_cache);
+	cache_put(&exp->h, exp->cd);
 }
 
 static inline void exp_get(struct svc_export *exp)
-- 
cgit 


From e3f70eadb7dddfb5a2bb9afff7abfc6ee17a29d0 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Thu, 29 Mar 2012 18:54:33 +0400
Subject: Lockd: pass network namespace to creation and destruction routines

v2: dereference of most probably already released nlm_host removed in
nlmclnt_done() and reclaimer().

These routines are called from locks reclaimer() kernel thread. This thread
works in "init_net" network context and currently relays on persence on lockd
thread and it's per-net resources. Thus lockd_up() and lockd_down() can't relay
on current network context. So let's pass corrent one into them.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/lockd/clntlock.c        | 13 ++++++++-----
 fs/lockd/svc.c             |  7 +++----
 fs/nfsd/nfssvc.c           |  6 +++---
 include/linux/lockd/bind.h |  4 ++--
 4 files changed, 16 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index ba1dc2eebd1e..ca0a08001449 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -56,7 +56,7 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init)
 	u32 nlm_version = (nlm_init->nfs_version == 2) ? 1 : 4;
 	int status;
 
-	status = lockd_up();
+	status = lockd_up(nlm_init->net);
 	if (status < 0)
 		return ERR_PTR(status);
 
@@ -65,7 +65,7 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init)
 				   nlm_init->hostname, nlm_init->noresvport,
 				   nlm_init->net);
 	if (host == NULL) {
-		lockd_down();
+		lockd_down(nlm_init->net);
 		return ERR_PTR(-ENOLCK);
 	}
 
@@ -80,8 +80,10 @@ EXPORT_SYMBOL_GPL(nlmclnt_init);
  */
 void nlmclnt_done(struct nlm_host *host)
 {
+	struct net *net = host->net;
+
 	nlmclnt_release_host(host);
-	lockd_down();
+	lockd_down(net);
 }
 EXPORT_SYMBOL_GPL(nlmclnt_done);
 
@@ -220,11 +222,12 @@ reclaimer(void *ptr)
 	struct nlm_wait	  *block;
 	struct file_lock *fl, *next;
 	u32 nsmstate;
+	struct net *net = host->net;
 
 	allow_signal(SIGKILL);
 
 	down_write(&host->h_rwsem);
-	lockd_up();	/* note: this cannot fail as lockd is already running */
+	lockd_up(net);	/* note: this cannot fail as lockd is already running */
 
 	dprintk("lockd: reclaiming locks for host %s\n", host->h_name);
 
@@ -275,6 +278,6 @@ restart:
 
 	/* Release host handle after use */
 	nlmclnt_release_host(host);
-	lockd_down();
+	lockd_down(net);
 	return 0;
 }
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index f49b9afc4436..1ead0750cdbb 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -295,11 +295,10 @@ static void lockd_down_net(struct net *net)
 /*
  * Bring up the lockd process if it's not already up.
  */
-int lockd_up(void)
+int lockd_up(struct net *net)
 {
 	struct svc_serv *serv;
 	int		error = 0;
-	struct net *net = current->nsproxy->net_ns;
 
 	mutex_lock(&nlmsvc_mutex);
 	/*
@@ -378,12 +377,12 @@ EXPORT_SYMBOL_GPL(lockd_up);
  * Decrement the user count and bring down lockd if we're the last.
  */
 void
-lockd_down(void)
+lockd_down(struct net *net)
 {
 	mutex_lock(&nlmsvc_mutex);
 	if (nlmsvc_users) {
 		if (--nlmsvc_users) {
-			lockd_down_net(current->nsproxy->net_ns);
+			lockd_down_net(net);
 			goto out;
 		}
 	} else {
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 28dfad39f0c5..78e521392df1 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -220,7 +220,7 @@ static int nfsd_startup(unsigned short port, int nrservs)
 	ret = nfsd_init_socks(port);
 	if (ret)
 		goto out_racache;
-	ret = lockd_up();
+	ret = lockd_up(&init_net);
 	if (ret)
 		goto out_racache;
 	ret = nfs4_state_start();
@@ -229,7 +229,7 @@ static int nfsd_startup(unsigned short port, int nrservs)
 	nfsd_up = true;
 	return 0;
 out_lockd:
-	lockd_down();
+	lockd_down(&init_net);
 out_racache:
 	nfsd_racache_shutdown();
 	return ret;
@@ -246,7 +246,7 @@ static void nfsd_shutdown(void)
 	if (!nfsd_up)
 		return;
 	nfs4_state_shutdown();
-	lockd_down();
+	lockd_down(&init_net);
 	nfsd_racache_shutdown();
 	nfsd_up = false;
 }
diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index 11a966e5f829..4d24d64578c4 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -54,7 +54,7 @@ extern void	nlmclnt_done(struct nlm_host *host);
 
 extern int	nlmclnt_proc(struct nlm_host *host, int cmd,
 					struct file_lock *fl);
-extern int	lockd_up(void);
-extern void	lockd_down(void);
+extern int	lockd_up(struct net *net);
+extern void	lockd_down(struct net *net);
 
 #endif /* LINUX_LOCKD_BIND_H */
-- 
cgit 


From b89109bef4a6a4a8ab5788778ee0addca0787870 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Wed, 11 Apr 2012 15:13:14 +0400
Subject: nfsd: pass network context to export caches init/shutdown routines

These functions will be called from per-net operations.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/export.c            | 20 ++++++++++----------
 fs/nfsd/nfsctl.c            |  6 +++---
 include/linux/nfsd/export.h |  4 ++--
 3 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 84723bc37c59..6453669dcef7 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1228,17 +1228,17 @@ const struct seq_operations nfs_exports_op = {
  * Initialize the exports module.
  */
 int
-nfsd_export_init(void)
+nfsd_export_init(struct net *net)
 {
 	int rv;
-	dprintk("nfsd: initializing export module.\n");
+	dprintk("nfsd: initializing export module (net: %p).\n", net);
 
-	rv = cache_register_net(&svc_export_cache, &init_net);
+	rv = cache_register_net(&svc_export_cache, net);
 	if (rv)
 		return rv;
-	rv = cache_register_net(&svc_expkey_cache, &init_net);
+	rv = cache_register_net(&svc_expkey_cache, net);
 	if (rv)
-		cache_unregister_net(&svc_export_cache, &init_net);
+		cache_unregister_net(&svc_export_cache, net);
 	return rv;
 
 }
@@ -1257,14 +1257,14 @@ nfsd_export_flush(void)
  * Shutdown the exports module.
  */
 void
-nfsd_export_shutdown(void)
+nfsd_export_shutdown(struct net *net)
 {
 
-	dprintk("nfsd: shutting down export module.\n");
+	dprintk("nfsd: shutting down export module (net: %p).\n", net);
 
-	cache_unregister_net(&svc_expkey_cache, &init_net);
-	cache_unregister_net(&svc_export_cache, &init_net);
+	cache_unregister_net(&svc_expkey_cache, net);
+	cache_unregister_net(&svc_export_cache, net);
 	svcauth_unix_purge();
 
-	dprintk("nfsd: export shutdown complete.\n");
+	dprintk("nfsd: export shutdown complete (net: %p).\n", net);
 }
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index ae19293e68df..bc76f8ebbe5e 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1163,7 +1163,7 @@ static int __init init_nfsd(void)
 	retval = nfsd_reply_cache_init();
 	if (retval)
 		goto out_free_stat;
-	retval = nfsd_export_init();
+	retval = nfsd_export_init(&init_net);
 	if (retval)
 		goto out_free_cache;
 	nfsd_lockd_init();	/* lockd->nfsd callbacks */
@@ -1184,7 +1184,7 @@ out_free_idmap:
 	nfsd_idmap_shutdown();
 out_free_lockd:
 	nfsd_lockd_shutdown();
-	nfsd_export_shutdown();
+	nfsd_export_shutdown(&init_net);
 out_free_cache:
 	nfsd_reply_cache_shutdown();
 out_free_stat:
@@ -1201,7 +1201,7 @@ out_unregister_notifier:
 
 static void __exit exit_nfsd(void)
 {
-	nfsd_export_shutdown();
+	nfsd_export_shutdown(&init_net);
 	nfsd_reply_cache_shutdown();
 	remove_proc_entry("fs/nfs/exports", NULL);
 	remove_proc_entry("fs/nfs", NULL);
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 485c2afa96f7..375096c083d3 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -130,8 +130,8 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp);
 /*
  * Function declarations
  */
-int			nfsd_export_init(void);
-void			nfsd_export_shutdown(void);
+int			nfsd_export_init(struct net *);
+void			nfsd_export_shutdown(struct net *);
 void			nfsd_export_flush(void);
 struct svc_export *	rqst_exp_get_by_name(struct svc_rqst *,
 					     struct path *);
-- 
cgit 


From b3853e0ea1f2ef58f7e7c03e47819e2ae3766dea Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Wed, 11 Apr 2012 15:13:21 +0400
Subject: nfsd: make export cache allocated per network namespace context

This patch also changes prototypes of nfsd_export_flush() and exp_rootfh():
network namespace parameter added.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/export.c            | 47 ++++++++++++++++++++++++++++++---------------
 fs/nfsd/netns.h             |  2 ++
 fs/nfsd/nfsctl.c            |  2 +-
 fs/nfsd/nfssvc.c            |  2 +-
 include/linux/nfsd/export.h |  4 ++--
 5 files changed, 38 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 688264b55a3a..84d020fc0e37 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -15,11 +15,13 @@
 #include <linux/namei.h>
 #include <linux/module.h>
 #include <linux/exportfs.h>
+#include <linux/sunrpc/svc_xprt.h>
 
 #include <net/ipv6.h>
 
 #include "nfsd.h"
 #include "nfsfh.h"
+#include "netns.h"
 
 #define NFSDDBG_FACILITY	NFSDDBG_EXPORT
 
@@ -298,8 +300,6 @@ svc_expkey_update(struct cache_detail *cd, struct svc_expkey *new,
 #define	EXPORT_HASHBITS		8
 #define	EXPORT_HASHMAX		(1<< EXPORT_HASHBITS)
 
-static struct cache_head *export_table[EXPORT_HASHMAX];
-
 static void nfsd4_fslocs_free(struct nfsd4_fs_locations *fsloc)
 {
 	int i;
@@ -708,10 +708,9 @@ static struct cache_head *svc_export_alloc(void)
 		return NULL;
 }
 
-struct cache_detail svc_export_cache = {
+struct cache_detail svc_export_cache_template = {
 	.owner		= THIS_MODULE,
 	.hash_size	= EXPORT_HASHMAX,
-	.hash_table	= export_table,
 	.name		= "nfsd.export",
 	.cache_put	= svc_export_put,
 	.cache_upcall	= svc_export_upcall,
@@ -835,7 +834,7 @@ static struct svc_export *exp_parent(struct cache_detail *cd, svc_client *clp,
  * since its harder to fool a kernel module than a user space program.
  */
 int
-exp_rootfh(svc_client *clp, char *name,
+exp_rootfh(struct net *net, svc_client *clp, char *name,
 	   struct knfsd_fh *f, int maxsize)
 {
 	struct svc_export	*exp;
@@ -843,7 +842,8 @@ exp_rootfh(svc_client *clp, char *name,
 	struct inode		*inode;
 	struct svc_fh		fh;
 	int			err;
-	struct cache_detail	*cd = &svc_export_cache;
+	struct nfsd_net		*nn = net_generic(net, nfsd_net_id);
+	struct cache_detail	*cd = nn->svc_export_cache;
 
 	err = -EPERM;
 	/* NB: we probably ought to check that it's NUL-terminated */
@@ -930,7 +930,8 @@ struct svc_export *
 rqst_exp_get_by_name(struct svc_rqst *rqstp, struct path *path)
 {
 	struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT);
-	struct cache_detail *cd = &svc_export_cache;
+	struct nfsd_net *nn = net_generic(rqstp->rq_xprt->xpt_net, nfsd_net_id);
+	struct cache_detail *cd = nn->svc_export_cache;
 
 	if (rqstp->rq_client == NULL)
 		goto gss;
@@ -960,7 +961,8 @@ struct svc_export *
 rqst_exp_find(struct svc_rqst *rqstp, int fsid_type, u32 *fsidv)
 {
 	struct svc_export *gssexp, *exp = ERR_PTR(-ENOENT);
-	struct cache_detail *cd = &svc_export_cache;
+	struct nfsd_net *nn = net_generic(rqstp->rq_xprt->xpt_net, nfsd_net_id);
+	struct cache_detail *cd = nn->svc_export_cache;
 
 	if (rqstp->rq_client == NULL)
 		goto gss;
@@ -1238,26 +1240,39 @@ int
 nfsd_export_init(struct net *net)
 {
 	int rv;
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
 	dprintk("nfsd: initializing export module (net: %p).\n", net);
 
-	rv = cache_register_net(&svc_export_cache, net);
+	nn->svc_export_cache = cache_create_net(&svc_export_cache_template, net);
+	if (IS_ERR(nn->svc_export_cache))
+		return PTR_ERR(nn->svc_export_cache);
+	rv = cache_register_net(nn->svc_export_cache, net);
 	if (rv)
-		return rv;
+		goto destroy_export_cache;
+
 	rv = cache_register_net(&svc_expkey_cache, net);
 	if (rv)
-		cache_unregister_net(&svc_export_cache, net);
-	return rv;
+		goto unregister_export_cache;
+	return 0;
 
+unregister_export_cache:
+	cache_unregister_net(nn->svc_export_cache, net);
+destroy_export_cache:
+	cache_destroy_net(nn->svc_export_cache, net);
+	return rv;
 }
 
 /*
  * Flush exports table - called when last nfsd thread is killed
  */
 void
-nfsd_export_flush(void)
+nfsd_export_flush(struct net *net)
 {
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+
 	cache_purge(&svc_expkey_cache);
-	cache_purge(&svc_export_cache);
+	cache_purge(nn->svc_export_cache);
 }
 
 /*
@@ -1266,11 +1281,13 @@ nfsd_export_flush(void)
 void
 nfsd_export_shutdown(struct net *net)
 {
+	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
 	dprintk("nfsd: shutting down export module (net: %p).\n", net);
 
 	cache_unregister_net(&svc_expkey_cache, net);
-	cache_unregister_net(&svc_export_cache, net);
+	cache_unregister_net(nn->svc_export_cache, net);
+	cache_destroy_net(nn->svc_export_cache, net);
 	svcauth_unix_purge();
 
 	dprintk("nfsd: export shutdown complete (net: %p).\n", net);
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index 12e0cff435b4..c1c6242942a9 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -28,6 +28,8 @@ struct cld_net;
 
 struct nfsd_net {
 	struct cld_net *cld_net;
+
+	struct cache_detail *svc_export_cache;
 };
 
 extern int nfsd_net_id;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index bc76f8ebbe5e..ddb9f8787379 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -354,7 +354,7 @@ static ssize_t write_filehandle(struct file *file, char *buf, size_t size)
 	if (!dom)
 		return -ENOMEM;
 
-	len = exp_rootfh(dom, path, &fh,  maxsize);
+	len = exp_rootfh(&init_net, dom, path, &fh,  maxsize);
 	auth_domain_put(dom);
 	if (len)
 		return len;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 78e521392df1..cb4d51d8cbdb 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -261,7 +261,7 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
 
 	printk(KERN_WARNING "nfsd: last server has exited, flushing export "
 			    "cache\n");
-	nfsd_export_flush();
+	nfsd_export_flush(net);
 }
 
 void nfsd_reset_versions(void)
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 375096c083d3..565c2122993f 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -132,13 +132,13 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp);
  */
 int			nfsd_export_init(struct net *);
 void			nfsd_export_shutdown(struct net *);
-void			nfsd_export_flush(void);
+void			nfsd_export_flush(struct net *);
 struct svc_export *	rqst_exp_get_by_name(struct svc_rqst *,
 					     struct path *);
 struct svc_export *	rqst_exp_parent(struct svc_rqst *,
 					struct path *);
 struct svc_export *	rqst_find_fsidzero_export(struct svc_rqst *);
-int			exp_rootfh(struct auth_domain *, 
+int			exp_rootfh(struct net *, struct auth_domain *,
 					char *path, struct knfsd_fh *, int maxsize);
 __be32			exp_pseudoroot(struct svc_rqst *, struct svc_fh *);
 __be32			nfserrno(int errno);
-- 
cgit 


From e5f06f720eff24e32f1cc08ec03bcc8c4b2d2934 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Wed, 11 Apr 2012 15:13:28 +0400
Subject: nfsd: make expkey cache allocated per network namespace context

This patch also changes svcauth_unix_purge() function: added network namespace
as a parameter and thus loop over all networks was replaced by only one call
for ip map cache purge.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/export.c               | 28 +++++++++++++++++-----------
 fs/nfsd/netns.h                |  1 +
 fs/nfsd/nfsctl.c               |  3 ++-
 include/linux/nfsd/export.h    |  2 --
 include/linux/sunrpc/svcauth.h |  2 +-
 net/sunrpc/svcauth_unix.c      | 13 ++++---------
 6 files changed, 25 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 84d020fc0e37..dcb52b884519 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -40,7 +40,6 @@ typedef struct svc_export	svc_export;
 #define	EXPKEY_HASHBITS		8
 #define	EXPKEY_HASHMAX		(1 << EXPKEY_HASHBITS)
 #define	EXPKEY_HASHMASK		(EXPKEY_HASHMAX -1)
-static struct cache_head *expkey_table[EXPKEY_HASHMAX];
 
 static void expkey_put(struct kref *ref)
 {
@@ -241,10 +240,9 @@ static struct cache_head *expkey_alloc(void)
 		return NULL;
 }
 
-static struct cache_detail svc_expkey_cache = {
+static struct cache_detail svc_expkey_cache_template = {
 	.owner		= THIS_MODULE,
 	.hash_size	= EXPKEY_HASHMAX,
-	.hash_table	= expkey_table,
 	.name		= "nfsd.fh",
 	.cache_put	= expkey_put,
 	.cache_upcall	= expkey_upcall,
@@ -883,12 +881,13 @@ static struct svc_export *exp_find(struct cache_detail *cd,
 				   u32 *fsidv, struct cache_req *reqp)
 {
 	struct svc_export *exp;
-	struct svc_expkey *ek = exp_find_key(&svc_expkey_cache, clp, fsid_type, fsidv, reqp);
+	struct nfsd_net *nn = net_generic(cd->net, nfsd_net_id);
+	struct svc_expkey *ek = exp_find_key(nn->svc_expkey_cache, clp, fsid_type, fsidv, reqp);
 	if (IS_ERR(ek))
 		return ERR_CAST(ek);
 
 	exp = exp_get_by_name(cd, clp, &ek->ek_path, reqp);
-	cache_put(&ek->h, &svc_expkey_cache);
+	cache_put(&ek->h, nn->svc_expkey_cache);
 
 	if (IS_ERR(exp))
 		return ERR_CAST(exp);
@@ -1232,7 +1231,6 @@ const struct seq_operations nfs_exports_op = {
 	.show	= e_show,
 };
 
-
 /*
  * Initialize the exports module.
  */
@@ -1251,11 +1249,18 @@ nfsd_export_init(struct net *net)
 	if (rv)
 		goto destroy_export_cache;
 
-	rv = cache_register_net(&svc_expkey_cache, net);
-	if (rv)
+	nn->svc_expkey_cache = cache_create_net(&svc_expkey_cache_template, net);
+	if (IS_ERR(nn->svc_expkey_cache)) {
+		rv = PTR_ERR(nn->svc_expkey_cache);
 		goto unregister_export_cache;
+	}
+	rv = cache_register_net(nn->svc_expkey_cache, net);
+	if (rv)
+		goto destroy_expkey_cache;
 	return 0;
 
+destroy_expkey_cache:
+	cache_destroy_net(nn->svc_expkey_cache, net);
 unregister_export_cache:
 	cache_unregister_net(nn->svc_export_cache, net);
 destroy_export_cache:
@@ -1271,7 +1276,7 @@ nfsd_export_flush(struct net *net)
 {
 	struct nfsd_net *nn = net_generic(net, nfsd_net_id);
 
-	cache_purge(&svc_expkey_cache);
+	cache_purge(nn->svc_expkey_cache);
 	cache_purge(nn->svc_export_cache);
 }
 
@@ -1285,10 +1290,11 @@ nfsd_export_shutdown(struct net *net)
 
 	dprintk("nfsd: shutting down export module (net: %p).\n", net);
 
-	cache_unregister_net(&svc_expkey_cache, net);
+	cache_unregister_net(nn->svc_expkey_cache, net);
 	cache_unregister_net(nn->svc_export_cache, net);
+	cache_destroy_net(nn->svc_expkey_cache, net);
 	cache_destroy_net(nn->svc_export_cache, net);
-	svcauth_unix_purge();
+	svcauth_unix_purge(net);
 
 	dprintk("nfsd: export shutdown complete (net: %p).\n", net);
 }
diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h
index c1c6242942a9..9794c6c7d133 100644
--- a/fs/nfsd/netns.h
+++ b/fs/nfsd/netns.h
@@ -29,6 +29,7 @@ struct cld_net;
 struct nfsd_net {
 	struct cld_net *cld_net;
 
+	struct cache_detail *svc_expkey_cache;
 	struct cache_detail *svc_export_cache;
 };
 
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index ddb9f8787379..b14417740816 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -129,13 +129,14 @@ static int exports_open(struct inode *inode, struct file *file)
 {
 	int err;
 	struct seq_file *seq;
+	struct nfsd_net *nn = net_generic(&init_net, nfsd_net_id);
 
 	err = seq_open(file, &nfs_exports_op);
 	if (err)
 		return err;
 
 	seq = file->private_data;
-	seq->private = &svc_export_cache;
+	seq->private = nn->svc_export_cache;
 	return 0;
 }
 
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 565c2122993f..e33f747b173c 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -143,8 +143,6 @@ int			exp_rootfh(struct net *, struct auth_domain *,
 __be32			exp_pseudoroot(struct svc_rqst *, struct svc_fh *);
 __be32			nfserrno(int errno);
 
-extern struct cache_detail svc_export_cache;
-
 static inline void exp_put(struct svc_export *exp)
 {
 	cache_put(&exp->h, exp->cd);
diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index 2e2af101b59c..2c54683b91de 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -130,7 +130,7 @@ extern struct auth_domain *auth_domain_lookup(char *name, struct auth_domain *ne
 extern struct auth_domain *auth_domain_find(char *name);
 extern struct auth_domain *auth_unix_lookup(struct net *net, struct in6_addr *addr);
 extern int auth_unix_forget_old(struct auth_domain *dom);
-extern void svcauth_unix_purge(void);
+extern void svcauth_unix_purge(struct net *net);
 extern void svcauth_unix_info_release(struct svc_xprt *xpt);
 extern int svcauth_unix_set_client(struct svc_rqst *rqstp);
 
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 521d8f7dc833..9c3b9f014468 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -346,17 +346,12 @@ static inline int ip_map_update(struct net *net, struct ip_map *ipm,
 	return __ip_map_update(sn->ip_map_cache, ipm, udom, expiry);
 }
 
-
-void svcauth_unix_purge(void)
+void svcauth_unix_purge(struct net *net)
 {
-	struct net *net;
-
-	for_each_net(net) {
-		struct sunrpc_net *sn;
+	struct sunrpc_net *sn;
 
-		sn = net_generic(net, sunrpc_net_id);
-		cache_purge(sn->ip_map_cache);
-	}
+	sn = net_generic(net, sunrpc_net_id);
+	cache_purge(sn->ip_map_cache);
 }
 EXPORT_SYMBOL_GPL(svcauth_unix_purge);
 
-- 
cgit 


From 05ba1f0823004e947748523782e9c2f07f3bff0d Mon Sep 17 00:00:00 2001
From: Anatol Pomozov <anatol.pomozov@gmail.com>
Date: Sun, 22 Apr 2012 18:45:24 -0700
Subject: fuse: add FALLOCATE operation

fallocate filesystem operation preallocates media space for the given file.
If fallocate returns success then any subsequent write to the given range
never fails with 'not enough space' error.

Signed-off-by: Anatol Pomozov <anatol.pomozov@gmail.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/fuse/file.c       | 33 +++++++++++++++++++++++++++++++++
 include/linux/fuse.h | 14 +++++++++++++-
 2 files changed, 46 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 504e61b7fd75..e3fee88831d4 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2171,6 +2171,37 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
 	return ret;
 }
 
+long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
+			    loff_t length)
+{
+	struct fuse_file *ff = file->private_data;
+	struct fuse_conn *fc = ff->fc;
+	struct fuse_req *req;
+	struct fuse_fallocate_in inarg = {
+		.fh = ff->fh,
+		.offset = offset,
+		.length = length,
+		.mode = mode
+	};
+	int err;
+
+	req = fuse_get_req(fc);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	req->in.h.opcode = FUSE_FALLOCATE;
+	req->in.h.nodeid = ff->nodeid;
+	req->in.numargs = 1;
+	req->in.args[0].size = sizeof(inarg);
+	req->in.args[0].value = &inarg;
+	fuse_request_send(fc, req);
+	err = req->out.h.error;
+	fuse_put_request(fc, req);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(fuse_file_fallocate);
+
 static const struct file_operations fuse_file_operations = {
 	.llseek		= fuse_file_llseek,
 	.read		= do_sync_read,
@@ -2188,6 +2219,7 @@ static const struct file_operations fuse_file_operations = {
 	.unlocked_ioctl	= fuse_file_ioctl,
 	.compat_ioctl	= fuse_file_compat_ioctl,
 	.poll		= fuse_file_poll,
+	.fallocate	= fuse_file_fallocate,
 };
 
 static const struct file_operations fuse_direct_io_file_operations = {
@@ -2204,6 +2236,7 @@ static const struct file_operations fuse_direct_io_file_operations = {
 	.unlocked_ioctl	= fuse_file_ioctl,
 	.compat_ioctl	= fuse_file_compat_ioctl,
 	.poll		= fuse_file_poll,
+	.fallocate	= fuse_file_fallocate,
 	/* no splice_read */
 };
 
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 8f2ab8fef929..9303348965fb 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -54,6 +54,9 @@
  * 7.18
  *  - add FUSE_IOCTL_DIR flag
  *  - add FUSE_NOTIFY_DELETE
+ *
+ * 7.19
+ *  - add FUSE_FALLOCATE
  */
 
 #ifndef _LINUX_FUSE_H
@@ -85,7 +88,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 18
+#define FUSE_KERNEL_MINOR_VERSION 19
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -278,6 +281,7 @@ enum fuse_opcode {
 	FUSE_POLL          = 40,
 	FUSE_NOTIFY_REPLY  = 41,
 	FUSE_BATCH_FORGET  = 42,
+	FUSE_FALLOCATE     = 43,
 
 	/* CUSE specific operations */
 	CUSE_INIT          = 4096,
@@ -571,6 +575,14 @@ struct fuse_notify_poll_wakeup_out {
 	__u64	kh;
 };
 
+struct fuse_fallocate_in {
+	__u64	fh;
+	__u64	offset;
+	__u64	length;
+	__u32	mode;
+	__u32	padding;
+};
+
 struct fuse_in_header {
 	__u32	len;
 	__u32	opcode;
-- 
cgit 


From 1f45f9dbb392f9ca0919e9cd2370ab66ae752ec8 Mon Sep 17 00:00:00 2001
From: Heiko Stübner <heiko@sntech.de>
Date: Sat, 28 Apr 2012 12:19:10 +0200
Subject: fb_defio: add first_io callback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With this optional callback the driver is notified when the first page
is entered into the pagelist and a new deferred_io call is scheduled.

A possible use-case for this is runtime-pm. In the first_io call
	pm_runtime_get()
could be called, which starts an asynchronous runtime_resume of the
device. In the deferred_io callback a call to
	pm_runtime_barrier()
makes the sure, the device is resumed by then and a
	pm_runtime_put()
may put the device back to sleep.

Also, some SoCs may use the runtime-pm system to determine if they
are able to enter deeper idle states. Therefore it is necessary to
keep the use-count from the first written page until the conclusion
of the screen update, to prevent the system from going to sleep before
completing the pending update.

Two users of defio were using kmalloc to allocate the structure.
These allocations are changed to kzalloc, to prevent uninitialised
.first_io members in those drivers.

Signed-off-by: Heiko Stübner <heiko@sntech.de>
Signed-off-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
---
 drivers/video/fb_defio.c | 4 ++++
 drivers/video/smscufx.c  | 2 +-
 drivers/video/udlfb.c    | 2 +-
 include/linux/fb.h       | 1 +
 4 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/fb_defio.c b/drivers/video/fb_defio.c
index c27e153d8882..070f26f0bf99 100644
--- a/drivers/video/fb_defio.c
+++ b/drivers/video/fb_defio.c
@@ -107,6 +107,10 @@ static int fb_deferred_io_mkwrite(struct vm_area_struct *vma,
 	/* protect against the workqueue changing the page list */
 	mutex_lock(&fbdefio->lock);
 
+	/* first write in this cycle, notify the driver */
+	if (fbdefio->first_io && list_empty(&fbdefio->pagelist))
+		fbdefio->first_io(info);
+
 	/*
 	 * We want the page to remain locked from ->page_mkwrite until
 	 * the PTE is marked dirty to avoid page_mkclean() being called
diff --git a/drivers/video/smscufx.c b/drivers/video/smscufx.c
index ccbfef5e828f..99857857f6fa 100644
--- a/drivers/video/smscufx.c
+++ b/drivers/video/smscufx.c
@@ -1083,7 +1083,7 @@ static int ufx_ops_open(struct fb_info *info, int user)
 
 		struct fb_deferred_io *fbdefio;
 
-		fbdefio = kmalloc(sizeof(struct fb_deferred_io), GFP_KERNEL);
+		fbdefio = kzalloc(sizeof(struct fb_deferred_io), GFP_KERNEL);
 
 		if (fbdefio) {
 			fbdefio->delay = UFX_DEFIO_WRITE_DELAY;
diff --git a/drivers/video/udlfb.c b/drivers/video/udlfb.c
index a159b63e18b9..e9d43b403432 100644
--- a/drivers/video/udlfb.c
+++ b/drivers/video/udlfb.c
@@ -893,7 +893,7 @@ static int dlfb_ops_open(struct fb_info *info, int user)
 
 		struct fb_deferred_io *fbdefio;
 
-		fbdefio = kmalloc(sizeof(struct fb_deferred_io), GFP_KERNEL);
+		fbdefio = kzalloc(sizeof(struct fb_deferred_io), GFP_KERNEL);
 
 		if (fbdefio) {
 			fbdefio->delay = DL_DEFIO_WRITE_DELAY;
diff --git a/include/linux/fb.h b/include/linux/fb.h
index d31cb682e173..c10e71efb8f5 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -607,6 +607,7 @@ struct fb_deferred_io {
 	struct mutex lock; /* mutex that protects the page list */
 	struct list_head pagelist; /* list of touched pages */
 	/* callback */
+	void (*first_io)(struct fb_info *info);
 	void (*deferred_io)(struct fb_info *info, struct list_head *pagelist);
 };
 #endif
-- 
cgit 


From f84891289e62a74e9b4942eaad80617368b2d778 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@us.ibm.com>
Date: Sun, 29 Apr 2012 18:21:10 -0400
Subject: ext4: create a new BH_Verified flag to avoid unnecessary metadata
 validation

Create a new BH_Verified flag to indicate that we've verified all the
data in a buffer_head for correctness.  This allows us to bypass
expensive verification steps when they are not necessary without
missing them when they are.

Signed-off-by: Darrick J. Wong <djwong@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/extents.c          | 35 ++++++++++++++++++++++++++---------
 include/linux/jbd_common.h |  2 ++
 2 files changed, 28 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index abcdeab67f52..8c1334ee8c7f 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -412,6 +412,26 @@ int ext4_ext_check_inode(struct inode *inode)
 	return ext4_ext_check(inode, ext_inode_hdr(inode), ext_depth(inode));
 }
 
+static int __ext4_ext_check_block(const char *function, unsigned int line,
+				  struct inode *inode,
+				  struct ext4_extent_header *eh,
+				  int depth,
+				  struct buffer_head *bh)
+{
+	int ret;
+
+	if (buffer_verified(bh))
+		return 0;
+	ret = ext4_ext_check(inode, eh, depth);
+	if (ret)
+		return ret;
+	set_buffer_verified(bh);
+	return ret;
+}
+
+#define ext4_ext_check_block(inode, eh, depth, bh)	\
+	__ext4_ext_check_block(__func__, __LINE__, inode, eh, depth, bh)
+
 #ifdef EXT_DEBUG
 static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
 {
@@ -668,8 +688,6 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
 	i = depth;
 	/* walk through the tree */
 	while (i) {
-		int need_to_validate = 0;
-
 		ext_debug("depth %d: num %d, max %d\n",
 			  ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
 
@@ -688,8 +706,6 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
 				put_bh(bh);
 				goto err;
 			}
-			/* validate the extent entries */
-			need_to_validate = 1;
 		}
 		eh = ext_block_hdr(bh);
 		ppos++;
@@ -703,7 +719,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
 		path[ppos].p_hdr = eh;
 		i--;
 
-		if (need_to_validate && ext4_ext_check(inode, eh, i))
+		if (ext4_ext_check_block(inode, eh, i, bh))
 			goto err;
 	}
 
@@ -1344,7 +1360,8 @@ got_index:
 			return -EIO;
 		eh = ext_block_hdr(bh);
 		/* subtract from p_depth to get proper eh_depth */
-		if (ext4_ext_check(inode, eh, path->p_depth - depth)) {
+		if (ext4_ext_check_block(inode, eh,
+					 path->p_depth - depth, bh)) {
 			put_bh(bh);
 			return -EIO;
 		}
@@ -1357,7 +1374,7 @@ got_index:
 	if (bh == NULL)
 		return -EIO;
 	eh = ext_block_hdr(bh);
-	if (ext4_ext_check(inode, eh, path->p_depth - depth)) {
+	if (ext4_ext_check_block(inode, eh, path->p_depth - depth, bh)) {
 		put_bh(bh);
 		return -EIO;
 	}
@@ -2644,8 +2661,8 @@ cont:
 				err = -EIO;
 				break;
 			}
-			if (ext4_ext_check(inode, ext_block_hdr(bh),
-							depth - i - 1)) {
+			if (ext4_ext_check_block(inode, ext_block_hdr(bh),
+							depth - i - 1, bh)) {
 				err = -EIO;
 				break;
 			}
diff --git a/include/linux/jbd_common.h b/include/linux/jbd_common.h
index 6230f8556a4e..6133679bc4c0 100644
--- a/include/linux/jbd_common.h
+++ b/include/linux/jbd_common.h
@@ -12,6 +12,7 @@ enum jbd_state_bits {
 	BH_State,		/* Pins most journal_head state */
 	BH_JournalHead,		/* Pins bh->b_private and jh->b_bh */
 	BH_Unshadow,		/* Dummy bit, for BJ_Shadow wakeup filtering */
+	BH_Verified,		/* Metadata block has been verified ok */
 	BH_JBDPrivateStart,	/* First bit available for private use by FS */
 };
 
@@ -24,6 +25,7 @@ TAS_BUFFER_FNS(Revoked, revoked)
 BUFFER_FNS(RevokeValid, revokevalid)
 TAS_BUFFER_FNS(RevokeValid, revokevalid)
 BUFFER_FNS(Freed, freed)
+BUFFER_FNS(Verified, verified)
 
 static inline struct buffer_head *jh2bh(struct journal_head *jh)
 {
-- 
cgit 


From a2ebfe2fc6e088a70d06cd15a5bc9bcb621cc195 Mon Sep 17 00:00:00 2001
From: Ramakrishna Pallala <ramakrishna.pallala@intel.com>
Date: Tue, 10 Apr 2012 16:21:20 +0530
Subject: power_supply: Add voltage_ocv property and use it for max17042 driver

This adds a new sysfs file called 'voltage_ocv' which gives the
Open Circuit Voltage of the battery.

This property can be used for platform shutdown policies and
can be useful for initial capacity estimations.

Note: This patch is generated against linux-next branch.

Signed-off-by: Ramakrishna Pallala <ramakrishna.pallala@intel.com>
Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
---
 Documentation/power/power_supply_class.txt | 2 ++
 drivers/power/max17042_battery.c           | 8 ++++++++
 drivers/power/power_supply_sysfs.c         | 1 +
 include/linux/power_supply.h               | 2 ++
 4 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/power/power_supply_class.txt b/Documentation/power/power_supply_class.txt
index 9f16c5178b66..211831d4095f 100644
--- a/Documentation/power/power_supply_class.txt
+++ b/Documentation/power/power_supply_class.txt
@@ -84,6 +84,8 @@ are already charged or discharging, 'n/a' can be displayed (or
 HEALTH - represents health of the battery, values corresponds to
 POWER_SUPPLY_HEALTH_*, defined in battery.h.
 
+VOLTAGE_OCV - open circuit voltage of the battery.
+
 VOLTAGE_MAX_DESIGN, VOLTAGE_MIN_DESIGN - design values for maximal and
 minimal power supply voltages. Maximal/minimal means values of voltages
 when battery considered "full"/"empty" at normal conditions. Yes, there is
diff --git a/drivers/power/max17042_battery.c b/drivers/power/max17042_battery.c
index 738648d1d9fc..42c4be9a664b 100644
--- a/drivers/power/max17042_battery.c
+++ b/drivers/power/max17042_battery.c
@@ -106,6 +106,7 @@ static enum power_supply_property max17042_battery_props[] = {
 	POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
 	POWER_SUPPLY_PROP_VOLTAGE_NOW,
 	POWER_SUPPLY_PROP_VOLTAGE_AVG,
+	POWER_SUPPLY_PROP_VOLTAGE_OCV,
 	POWER_SUPPLY_PROP_CAPACITY,
 	POWER_SUPPLY_PROP_CHARGE_FULL,
 	POWER_SUPPLY_PROP_TEMP,
@@ -170,6 +171,13 @@ static int max17042_get_property(struct power_supply *psy,
 		if (ret < 0)
 			return ret;
 
+		val->intval = ret * 625 / 8;
+		break;
+	case POWER_SUPPLY_PROP_VOLTAGE_OCV:
+		ret = max17042_read_reg(chip->client, MAX17042_OCVInternal);
+		if (ret < 0)
+			return ret;
+
 		val->intval = ret * 625 / 8;
 		break;
 	case POWER_SUPPLY_PROP_CAPACITY:
diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index 4368e7d61316..4150747f9186 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -146,6 +146,7 @@ static struct device_attribute power_supply_attrs[] = {
 	POWER_SUPPLY_ATTR(voltage_min_design),
 	POWER_SUPPLY_ATTR(voltage_now),
 	POWER_SUPPLY_ATTR(voltage_avg),
+	POWER_SUPPLY_ATTR(voltage_ocv),
 	POWER_SUPPLY_ATTR(current_max),
 	POWER_SUPPLY_ATTR(current_now),
 	POWER_SUPPLY_ATTR(current_avg),
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index c38c13db8832..fd17ae0f9c20 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -96,6 +96,7 @@ enum power_supply_property {
 	POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN,
 	POWER_SUPPLY_PROP_VOLTAGE_NOW,
 	POWER_SUPPLY_PROP_VOLTAGE_AVG,
+	POWER_SUPPLY_PROP_VOLTAGE_OCV,
 	POWER_SUPPLY_PROP_CURRENT_MAX,
 	POWER_SUPPLY_PROP_CURRENT_NOW,
 	POWER_SUPPLY_PROP_CURRENT_AVG,
@@ -261,6 +262,7 @@ static inline bool power_supply_is_watt_property(enum power_supply_property psp)
 	case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
 	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
 	case POWER_SUPPLY_PROP_VOLTAGE_AVG:
+	case POWER_SUPPLY_PROP_VOLTAGE_OCV:
 	case POWER_SUPPLY_PROP_POWER_NOW:
 		return 1;
 	default:
-- 
cgit 


From 0d4ed4e27a4cb180af395fa3d7aa98d79f3d3015 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <cbouatmailru@gmail.com>
Date: Fri, 4 May 2012 21:06:19 -0700
Subject: power_supply: Make the core a boolean instead of a tristate

On Mon, Apr 02, 2012 at 01:53:23PM +1000, Benjamin Herrenschmidt wrote:
> > drivers/built-in.o: In function `.nouveau_pm_trigger':
> > (.text+0xa56e8): undefined reference to `.power_supply_is_system_supplied'
> >
> > nouveau probably needs to depends on CONFIG_POWER_SUPPLY to force a module
> > build with the latter is =m
>
> Ok, not that trivial...
>
> The problem is more like POWER_SUPPLY should be a bool, not a tristate.
>
> If you think about it: you don't want things like nouveau to depend on a
> random subsystem like that, people will never get it. In fact,
> POWER_SUPPLY provides empty inline stubs when not enabled, so that's
> really designed to not have depends...
>
> However that -cannot- work if POWER_SUPPLY is modular and the drivers
> who use it are not.
>
> The only fixes here that make sense I can think of
> that don't also involve Kconfig horrors are:
>
>  - Ugly: in power_supply.h, use the extern variant if
>
>       defined(CONFIG_POWER_SUPPLY) ||
>        (defined(CONFIG_POWER_SUPPLY_MODULE) && defined(MODULE))
>
> IE. use the stub if power supply is a module and what is being built is
> built-in. Of course that's not only ugly, it somewhat sucks from a user
> perspective as the subsystem now exists but can't be used by some
> drivers...
>
>  - Better: Just make the bloody thing a bool :-) The power supply
> framework itself is small enough, just make it a boolean option and
> avoid the problem entirely. The actual power supply sub drivers can
> remain modular of course.

Suggested-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Anton Vorontsov <cbouatmailru@gmail.com>
---
 drivers/power/Kconfig        | 2 +-
 include/linux/power_supply.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index 99dc29f2f2f2..0c52a4079124 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -1,5 +1,5 @@
 menuconfig POWER_SUPPLY
-	tristate "Power supply class support"
+	bool "Power supply class support"
 	help
 	  Say Y here to enable power supply class support. This allows
 	  power supply (batteries, AC, USB) monitoring by userspace
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index fd17ae0f9c20..3b912bee28d1 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -212,7 +212,7 @@ extern void power_supply_changed(struct power_supply *psy);
 extern int power_supply_am_i_supplied(struct power_supply *psy);
 extern int power_supply_set_battery_charged(struct power_supply *psy);
 
-#if defined(CONFIG_POWER_SUPPLY) || defined(CONFIG_POWER_SUPPLY_MODULE)
+#ifdef CONFIG_POWER_SUPPLY
 extern int power_supply_is_system_supplied(void);
 #else
 static inline int power_supply_is_system_supplied(void) { return -ENOSYS; }
-- 
cgit 


From d829dc75bafb10754f35fb8895e5143d20267b04 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Sat, 5 May 2012 06:24:10 -0700
Subject: charger-manager: Poll battery health in normal state

Charger-Manager needs to check battery health in normal state
as well as suspend-to-RAM state. When the battery is fully charged,
Charger-Manager needs to determine when the chargers restart charging.

This patch allows Charger-Manager to monitor battery health in normal
state and handle operation for chargers after battery is fully charged.

Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: Donggeun Kim <dg77.kim@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
---
 Documentation/power/charger-manager.txt |  25 +++-
 drivers/power/charger-manager.c         | 229 ++++++++++++++++++++++++++++++++
 include/linux/power/charger-manager.h   |  25 ++++
 3 files changed, 278 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/power/charger-manager.txt b/Documentation/power/charger-manager.txt
index fdcca991df30..9b3863386e54 100644
--- a/Documentation/power/charger-manager.txt
+++ b/Documentation/power/charger-manager.txt
@@ -44,6 +44,12 @@ Charger Manager supports the following:
 	Normally, the platform will need to resume and suspend some devices
 	that are used by Charger Manager.
 
+* Support for premature full-battery event handling
+	If the battery voltage drops by "fullbatt_vchkdrop_uV" after
+	"fullbatt_vchkdrop_ms" from the full-battery event, the framework
+	restarts charging. This check is also performed while suspended by
+	setting wakeup time accordingly and using suspend_again.
+
 2. Global Charger-Manager Data related with suspend_again
 ========================================================
 In order to setup Charger Manager with suspend-again feature
@@ -55,7 +61,7 @@ if there are multiple batteries. If there are multiple batteries, the
 multiple instances of Charger Manager share the same charger_global_desc
 and it will manage in-suspend monitoring for all instances of Charger Manager.
 
-The user needs to provide all the two entries properly in order to activate
+The user needs to provide all the three entries properly in order to activate
 in-suspend monitoring:
 
 struct charger_global_desc {
@@ -74,6 +80,11 @@ bool (*rtc_only_wakeup)(void);
 	same struct. If there is any other wakeup source triggered the
 	wakeup, it should return false. If the "rtc" is the only wakeup
 	reason, it should return true.
+
+bool assume_timer_stops_in_suspend;
+	: if true, Charger Manager assumes that
+	the timer (CM uses jiffies as timer) stops during suspend. Then, CM
+	assumes that the suspend-duration is same as the alarm length.
 };
 
 3. How to setup suspend_again
@@ -111,6 +122,16 @@ enum polling_modes polling_mode;
 	  CM_POLL_CHARGING_ONLY: poll this battery if and only if the
 				 battery is being charged.
 
+unsigned int fullbatt_vchkdrop_ms;
+unsigned int fullbatt_vchkdrop_uV;
+	: If both have non-zero values, Charger Manager will check the
+	battery voltage drop fullbatt_vchkdrop_ms after the battery is fully
+	charged. If the voltage drop is over fullbatt_vchkdrop_uV, Charger
+	Manager will try to recharge the battery by disabling and enabling
+	chargers. Recharge with voltage drop condition only (without delay
+	condition) is needed to be implemented with hardware interrupts from
+	fuel gauges or charger devices/chips.
+
 unsigned int fullbatt_uV;
 	: If specified with a non-zero value, Charger Manager assumes
 	that the battery is full (capacity = 100) if the battery is not being
@@ -122,6 +143,8 @@ unsigned int polling_interval_ms;
 	this battery every polling_interval_ms or more frequently.
 
 enum data_source battery_present;
+	: CM_BATTERY_PRESENT: assume that the battery exists.
+	CM_NO_BATTERY: assume that the battery does not exists.
 	CM_FUEL_GAUGE: get battery presence information from fuel gauge.
 	CM_CHARGER_STAT: get battery presence from chargers.
 
diff --git a/drivers/power/charger-manager.c b/drivers/power/charger-manager.c
index 9eca9f1ff0ea..959062d16bac 100644
--- a/drivers/power/charger-manager.c
+++ b/drivers/power/charger-manager.c
@@ -57,6 +57,12 @@ static bool cm_suspended;
 static bool cm_rtc_set;
 static unsigned long cm_suspend_duration_ms;
 
+/* About normal (not suspended) monitoring */
+static unsigned long polling_jiffy = ULONG_MAX; /* ULONG_MAX: no polling */
+static unsigned long next_polling; /* Next appointed polling time */
+static struct workqueue_struct *cm_wq; /* init at driver add */
+static struct delayed_work cm_monitor_work; /* init at driver add */
+
 /* Global charger-manager description */
 static struct charger_global_desc *g_desc; /* init with setup_charger_manager */
 
@@ -71,6 +77,11 @@ static bool is_batt_present(struct charger_manager *cm)
 	int i, ret;
 
 	switch (cm->desc->battery_present) {
+	case CM_BATTERY_PRESENT:
+		present = true;
+		break;
+	case CM_NO_BATTERY:
+		break;
 	case CM_FUEL_GAUGE:
 		ret = cm->fuel_gauge->get_property(cm->fuel_gauge,
 				POWER_SUPPLY_PROP_PRESENT, &val);
@@ -278,6 +289,26 @@ static int try_charger_enable(struct charger_manager *cm, bool enable)
 	return err;
 }
 
+/**
+ * try_charger_restart - Restart charging.
+ * @cm: the Charger Manager representing the battery.
+ *
+ * Restart charging by turning off and on the charger.
+ */
+static int try_charger_restart(struct charger_manager *cm)
+{
+	int err;
+
+	if (cm->emergency_stop)
+		return -EAGAIN;
+
+	err = try_charger_enable(cm, false);
+	if (err)
+		return err;
+
+	return try_charger_enable(cm, true);
+}
+
 /**
  * uevent_notify - Let users know something has changed.
  * @cm: the Charger Manager representing the battery.
@@ -333,6 +364,46 @@ static void uevent_notify(struct charger_manager *cm, const char *event)
 	dev_info(cm->dev, event);
 }
 
+/**
+ * fullbatt_vchk - Check voltage drop some times after "FULL" event.
+ * @work: the work_struct appointing the function
+ *
+ * If a user has designated "fullbatt_vchkdrop_ms/uV" values with
+ * charger_desc, Charger Manager checks voltage drop after the battery
+ * "FULL" event. It checks whether the voltage has dropped more than
+ * fullbatt_vchkdrop_uV by calling this function after fullbatt_vchkrop_ms.
+ */
+static void fullbatt_vchk(struct work_struct *work)
+{
+	struct delayed_work *dwork = to_delayed_work(work);
+	struct charger_manager *cm = container_of(dwork,
+			struct charger_manager, fullbatt_vchk_work);
+	struct charger_desc *desc = cm->desc;
+	int batt_uV, err, diff;
+
+	/* remove the appointment for fullbatt_vchk */
+	cm->fullbatt_vchk_jiffies_at = 0;
+
+	if (!desc->fullbatt_vchkdrop_uV || !desc->fullbatt_vchkdrop_ms)
+		return;
+
+	err = get_batt_uV(cm, &batt_uV);
+	if (err) {
+		dev_err(cm->dev, "%s: get_batt_uV error(%d).\n", __func__, err);
+		return;
+	}
+
+	diff = cm->fullbatt_vchk_uV;
+	diff -= batt_uV;
+
+	dev_dbg(cm->dev, "VBATT dropped %duV after full-batt.\n", diff);
+
+	if (diff > desc->fullbatt_vchkdrop_uV) {
+		try_charger_restart(cm);
+		uevent_notify(cm, "Recharge");
+	}
+}
+
 /**
  * _cm_monitor - Monitor the temperature and return true for exceptions.
  * @cm: the Charger Manager representing the battery.
@@ -392,6 +463,68 @@ static bool cm_monitor(void)
 	return stop;
 }
 
+/**
+ * _setup_polling - Setup the next instance of polling.
+ * @work: work_struct of the function _setup_polling.
+ */
+static void _setup_polling(struct work_struct *work)
+{
+	unsigned long min = ULONG_MAX;
+	struct charger_manager *cm;
+	bool keep_polling = false;
+	unsigned long _next_polling;
+
+	mutex_lock(&cm_list_mtx);
+
+	list_for_each_entry(cm, &cm_list, entry) {
+		if (is_polling_required(cm) && cm->desc->polling_interval_ms) {
+			keep_polling = true;
+
+			if (min > cm->desc->polling_interval_ms)
+				min = cm->desc->polling_interval_ms;
+		}
+	}
+
+	polling_jiffy = msecs_to_jiffies(min);
+	if (polling_jiffy <= CM_JIFFIES_SMALL)
+		polling_jiffy = CM_JIFFIES_SMALL + 1;
+
+	if (!keep_polling)
+		polling_jiffy = ULONG_MAX;
+	if (polling_jiffy == ULONG_MAX)
+		goto out;
+
+	WARN(cm_wq == NULL, "charger-manager: workqueue not initialized"
+			    ". try it later. %s\n", __func__);
+
+	_next_polling = jiffies + polling_jiffy;
+
+	if (!delayed_work_pending(&cm_monitor_work) ||
+	    (delayed_work_pending(&cm_monitor_work) &&
+	     time_after(next_polling, _next_polling))) {
+		cancel_delayed_work_sync(&cm_monitor_work);
+		next_polling = jiffies + polling_jiffy;
+		queue_delayed_work(cm_wq, &cm_monitor_work, polling_jiffy);
+	}
+
+out:
+	mutex_unlock(&cm_list_mtx);
+}
+static DECLARE_WORK(setup_polling, _setup_polling);
+
+/**
+ * cm_monitor_poller - The Monitor / Poller.
+ * @work: work_struct of the function cm_monitor_poller
+ *
+ * During non-suspended state, cm_monitor_poller is used to poll and monitor
+ * the batteries.
+ */
+static void cm_monitor_poller(struct work_struct *work)
+{
+	cm_monitor();
+	schedule_work(&setup_polling);
+}
+
 static int charger_get_property(struct power_supply *psy,
 		enum power_supply_property psp,
 		union power_supply_propval *val)
@@ -613,6 +746,21 @@ static bool cm_setup_timer(void)
 	mutex_lock(&cm_list_mtx);
 
 	list_for_each_entry(cm, &cm_list, entry) {
+		unsigned int fbchk_ms = 0;
+
+		/* fullbatt_vchk is required. setup timer for that */
+		if (cm->fullbatt_vchk_jiffies_at) {
+			fbchk_ms = jiffies_to_msecs(cm->fullbatt_vchk_jiffies_at
+						    - jiffies);
+			if (time_is_before_eq_jiffies(
+				cm->fullbatt_vchk_jiffies_at) ||
+				msecs_to_jiffies(fbchk_ms) < CM_JIFFIES_SMALL) {
+				fullbatt_vchk(&cm->fullbatt_vchk_work.work);
+				fbchk_ms = 0;
+			}
+		}
+		CM_MIN_VALID(wakeup_ms, fbchk_ms);
+
 		/* Skip if polling is not required for this CM */
 		if (!is_polling_required(cm) && !cm->emergency_stop)
 			continue;
@@ -672,6 +820,23 @@ static bool cm_setup_timer(void)
 	return false;
 }
 
+static void _cm_fbchk_in_suspend(struct charger_manager *cm)
+{
+	unsigned long jiffy_now = jiffies;
+
+	if (!cm->fullbatt_vchk_jiffies_at)
+		return;
+
+	if (g_desc && g_desc->assume_timer_stops_in_suspend)
+		jiffy_now += msecs_to_jiffies(cm_suspend_duration_ms);
+
+	/* Execute now if it's going to be executed not too long after */
+	jiffy_now += CM_JIFFIES_SMALL;
+
+	if (time_after_eq(jiffy_now, cm->fullbatt_vchk_jiffies_at))
+		fullbatt_vchk(&cm->fullbatt_vchk_work.work);
+}
+
 /**
  * cm_suspend_again - Determine whether suspend again or not
  *
@@ -693,6 +858,8 @@ bool cm_suspend_again(void)
 	ret = true;
 	mutex_lock(&cm_list_mtx);
 	list_for_each_entry(cm, &cm_list, entry) {
+		_cm_fbchk_in_suspend(cm);
+
 		if (cm->status_save_ext_pwr_inserted != is_ext_pwr_online(cm) ||
 		    cm->status_save_batt != is_batt_present(cm)) {
 			ret = false;
@@ -796,6 +963,21 @@ static int charger_manager_probe(struct platform_device *pdev)
 	memcpy(cm->desc, desc, sizeof(struct charger_desc));
 	cm->last_temp_mC = INT_MIN; /* denotes "unmeasured, yet" */
 
+	/*
+	 * The following two do not need to be errors.
+	 * Users may intentionally ignore those two features.
+	 */
+	if (desc->fullbatt_uV == 0) {
+		dev_info(&pdev->dev, "Ignoring full-battery voltage threshold"
+					" as it is not supplied.");
+	}
+	if (!desc->fullbatt_vchkdrop_ms || !desc->fullbatt_vchkdrop_uV) {
+		dev_info(&pdev->dev, "Disabling full-battery voltage drop "
+				"checking mechanism as it is not supplied.");
+		desc->fullbatt_vchkdrop_ms = 0;
+		desc->fullbatt_vchkdrop_uV = 0;
+	}
+
 	if (!desc->charger_regulators || desc->num_charger_regulators < 1) {
 		ret = -EINVAL;
 		dev_err(&pdev->dev, "charger_regulators undefined.\n");
@@ -903,6 +1085,8 @@ static int charger_manager_probe(struct platform_device *pdev)
 		cm->charger_psy.num_properties++;
 	}
 
+	INIT_DELAYED_WORK(&cm->fullbatt_vchk_work, fullbatt_vchk);
+
 	ret = power_supply_register(NULL, &cm->charger_psy);
 	if (ret) {
 		dev_err(&pdev->dev, "Cannot register charger-manager with"
@@ -928,6 +1112,8 @@ static int charger_manager_probe(struct platform_device *pdev)
 	list_add(&cm->entry, &cm_list);
 	mutex_unlock(&cm_list_mtx);
 
+	schedule_work(&setup_polling);
+
 	return 0;
 
 err_chg_enable:
@@ -958,9 +1144,17 @@ static int __devexit charger_manager_remove(struct platform_device *pdev)
 	list_del(&cm->entry);
 	mutex_unlock(&cm_list_mtx);
 
+	if (work_pending(&setup_polling))
+		cancel_work_sync(&setup_polling);
+	if (delayed_work_pending(&cm_monitor_work))
+		cancel_delayed_work_sync(&cm_monitor_work);
+
 	regulator_bulk_free(desc->num_charger_regulators,
 			    desc->charger_regulators);
 	power_supply_unregister(&cm->charger_psy);
+
+	try_charger_enable(cm, false);
+
 	kfree(cm->charger_psy.properties);
 	kfree(cm->charger_stat);
 	kfree(cm->desc);
@@ -1000,6 +1194,8 @@ static int cm_suspend_prepare(struct device *dev)
 		cm_suspended = true;
 	}
 
+	if (delayed_work_pending(&cm->fullbatt_vchk_work))
+		cancel_delayed_work(&cm->fullbatt_vchk_work);
 	cm->status_save_ext_pwr_inserted = is_ext_pwr_online(cm);
 	cm->status_save_batt = is_batt_present(cm);
 
@@ -1027,6 +1223,33 @@ static void cm_suspend_complete(struct device *dev)
 		cm_rtc_set = false;
 	}
 
+	/* Re-enqueue delayed work (fullbatt_vchk_work) */
+	if (cm->fullbatt_vchk_jiffies_at) {
+		unsigned long delay = 0;
+		unsigned long now = jiffies + CM_JIFFIES_SMALL;
+
+		if (time_after_eq(now, cm->fullbatt_vchk_jiffies_at)) {
+			delay = (unsigned long)((long)now
+				- (long)(cm->fullbatt_vchk_jiffies_at));
+			delay = jiffies_to_msecs(delay);
+		} else {
+			delay = 0;
+		}
+
+		/*
+		 * Account for cm_suspend_duration_ms if
+		 * assume_timer_stops_in_suspend is active
+		 */
+		if (g_desc && g_desc->assume_timer_stops_in_suspend) {
+			if (delay > cm_suspend_duration_ms)
+				delay -= cm_suspend_duration_ms;
+			else
+				delay = 0;
+		}
+
+		queue_delayed_work(cm_wq, &cm->fullbatt_vchk_work,
+				   msecs_to_jiffies(delay));
+	}
 	uevent_notify(cm, NULL);
 }
 
@@ -1048,12 +1271,18 @@ static struct platform_driver charger_manager_driver = {
 
 static int __init charger_manager_init(void)
 {
+	cm_wq = create_freezable_workqueue("charger_manager");
+	INIT_DELAYED_WORK(&cm_monitor_work, cm_monitor_poller);
+
 	return platform_driver_register(&charger_manager_driver);
 }
 late_initcall(charger_manager_init);
 
 static void __exit charger_manager_cleanup(void)
 {
+	destroy_workqueue(cm_wq);
+	cm_wq = NULL;
+
 	platform_driver_unregister(&charger_manager_driver);
 }
 module_exit(charger_manager_cleanup);
diff --git a/include/linux/power/charger-manager.h b/include/linux/power/charger-manager.h
index 4f75e531c112..baa299a95e13 100644
--- a/include/linux/power/charger-manager.h
+++ b/include/linux/power/charger-manager.h
@@ -18,6 +18,8 @@
 #include <linux/power_supply.h>
 
 enum data_source {
+	CM_BATTERY_PRESENT,
+	CM_NO_BATTERY,
 	CM_FUEL_GAUGE,
 	CM_CHARGER_STAT,
 };
@@ -38,11 +40,18 @@ enum polling_modes {
  *	rtc_only_wakeup() returning false.
  *	If the RTC given to CM is the only wakeup reason,
  *	rtc_only_wakeup should return true.
+ * @assume_timer_stops_in_suspend:
+ *	Assume that the jiffy timer stops in suspend-to-RAM.
+ *	When enabled, CM does not rely on jiffies value in
+ *	suspend_again and assumes that jiffies value does not
+ *	change during suspend.
  */
 struct charger_global_desc {
 	char *rtc_name;
 
 	bool (*rtc_only_wakeup)(void);
+
+	bool assume_timer_stops_in_suspend;
 };
 
 /**
@@ -50,6 +59,11 @@ struct charger_global_desc {
  * @psy_name: the name of power-supply-class for charger manager
  * @polling_mode:
  *	Determine which polling mode will be used
+ * @fullbatt_vchkdrop_ms:
+ * @fullbatt_vchkdrop_uV:
+ *	Check voltage drop after the battery is fully charged.
+ *	If it has dropped more than fullbatt_vchkdrop_uV after
+ *	fullbatt_vchkdrop_ms, CM will restart charging.
  * @fullbatt_uV: voltage in microvolt
  *	If it is not being charged and VBATT >= fullbatt_uV,
  *	it is assumed to be full.
@@ -76,6 +90,8 @@ struct charger_desc {
 	enum polling_modes polling_mode;
 	unsigned int polling_interval_ms;
 
+	unsigned int fullbatt_vchkdrop_ms;
+	unsigned int fullbatt_vchkdrop_uV;
 	unsigned int fullbatt_uV;
 
 	enum data_source battery_present;
@@ -101,6 +117,11 @@ struct charger_desc {
  * @fuel_gauge: power_supply for fuel gauge
  * @charger_stat: array of power_supply for chargers
  * @charger_enabled: the state of charger
+ * @fullbatt_vchk_jiffies_at:
+ *	jiffies at the time full battery check will occur.
+ * @fullbatt_vchk_uV: voltage in microvolt
+ *	criteria for full battery
+ * @fullbatt_vchk_work: work queue for full battery check
  * @emergency_stop:
  *	When setting true, stop charging
  * @last_temp_mC: the measured temperature in milli-Celsius
@@ -121,6 +142,10 @@ struct charger_manager {
 
 	bool charger_enabled;
 
+	unsigned long fullbatt_vchk_jiffies_at;
+	unsigned int fullbatt_vchk_uV;
+	struct delayed_work fullbatt_vchk_work;
+
 	int emergency_stop;
 	int last_temp_mC;
 
-- 
cgit 


From dfeccb12b4614befc49a92eb121c2211294ca669 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Sat, 5 May 2012 06:26:47 -0700
Subject: charger-manager: Provide cm_notify_event function for in-kernel use

By using cm_notify_event function, charger driver can report several
charger events (e.g. battery full and external power in/out, etc) to
Charger-Manager. Charger-Manager can properly and immediately control
chargers by the reported event.

Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
Signed-off-by: Donggeun Kim <dg77.kim@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
---
 Documentation/power/charger-manager.txt |  16 +++-
 drivers/power/charger-manager.c         | 163 ++++++++++++++++++++++++++++++++
 include/linux/power/charger-manager.h   |  25 +++--
 3 files changed, 195 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/power/charger-manager.txt b/Documentation/power/charger-manager.txt
index 9b3863386e54..b4f7f4b23f64 100644
--- a/Documentation/power/charger-manager.txt
+++ b/Documentation/power/charger-manager.txt
@@ -50,6 +50,10 @@ Charger Manager supports the following:
 	restarts charging. This check is also performed while suspended by
 	setting wakeup time accordingly and using suspend_again.
 
+* Support for uevent-notify
+	With the charger-related events, the device sends
+	notification to users with UEVENT.
+
 2. Global Charger-Manager Data related with suspend_again
 ========================================================
 In order to setup Charger Manager with suspend-again feature
@@ -174,7 +178,17 @@ bool measure_battery_temp;
 	the value of measure_battery_temp.
 };
 
-5. Other Considerations
+5. Notify Charger-Manager of charger events: cm_notify_event()
+=========================================================
+If there is an charger event is required to notify
+Charger Manager, a charger device driver that triggers the event can call
+cm_notify_event(psy, type, msg) to notify the corresponding Charger Manager.
+In the function, psy is the charger driver's power_supply pointer, which is
+associated with Charger-Manager. The parameter "type"
+is the same as irq's type (enum cm_event_types). The event message "msg" is
+optional and is effective only if the event type is "UNDESCRIBED" or "OTHERS".
+
+6. Other Considerations
 =======================
 
 At the charger/battery-related events such as battery-pulled-out,
diff --git a/drivers/power/charger-manager.c b/drivers/power/charger-manager.c
index 959062d16bac..86935ec18954 100644
--- a/drivers/power/charger-manager.c
+++ b/drivers/power/charger-manager.c
@@ -23,6 +23,16 @@
 #include <linux/power/charger-manager.h>
 #include <linux/regulator/consumer.h>
 
+static const char * const default_event_names[] = {
+	[CM_EVENT_UNKNOWN] = "Unknown",
+	[CM_EVENT_BATT_FULL] = "Battery Full",
+	[CM_EVENT_BATT_IN] = "Battery Inserted",
+	[CM_EVENT_BATT_OUT] = "Battery Pulled Out",
+	[CM_EVENT_EXT_PWR_IN_OUT] = "External Power Attach/Detach",
+	[CM_EVENT_CHG_START_STOP] = "Charging Start/Stop",
+	[CM_EVENT_OTHERS] = "Other battery events"
+};
+
 /*
  * Regard CM_JIFFIES_SMALL jiffies is small enough to ignore for
  * delayed works so that we can run delayed works with CM_JIFFIES_SMALL
@@ -525,6 +535,69 @@ static void cm_monitor_poller(struct work_struct *work)
 	schedule_work(&setup_polling);
 }
 
+/**
+ * fullbatt_handler - Event handler for CM_EVENT_BATT_FULL
+ * @cm: the Charger Manager representing the battery.
+ */
+static void fullbatt_handler(struct charger_manager *cm)
+{
+	struct charger_desc *desc = cm->desc;
+
+	if (!desc->fullbatt_vchkdrop_uV || !desc->fullbatt_vchkdrop_ms)
+		goto out;
+
+	if (cm_suspended)
+		device_set_wakeup_capable(cm->dev, true);
+
+	if (delayed_work_pending(&cm->fullbatt_vchk_work))
+		cancel_delayed_work(&cm->fullbatt_vchk_work);
+	queue_delayed_work(cm_wq, &cm->fullbatt_vchk_work,
+			   msecs_to_jiffies(desc->fullbatt_vchkdrop_ms));
+	cm->fullbatt_vchk_jiffies_at = jiffies + msecs_to_jiffies(
+				       desc->fullbatt_vchkdrop_ms);
+
+	if (cm->fullbatt_vchk_jiffies_at == 0)
+		cm->fullbatt_vchk_jiffies_at = 1;
+
+out:
+	dev_info(cm->dev, "EVENT_HANDLE: Battery Fully Charged.\n");
+	uevent_notify(cm, default_event_names[CM_EVENT_BATT_FULL]);
+}
+
+/**
+ * battout_handler - Event handler for CM_EVENT_BATT_OUT
+ * @cm: the Charger Manager representing the battery.
+ */
+static void battout_handler(struct charger_manager *cm)
+{
+	if (cm_suspended)
+		device_set_wakeup_capable(cm->dev, true);
+
+	if (!is_batt_present(cm)) {
+		dev_emerg(cm->dev, "Battery Pulled Out!\n");
+		uevent_notify(cm, default_event_names[CM_EVENT_BATT_OUT]);
+	} else {
+		uevent_notify(cm, "Battery Reinserted?");
+	}
+}
+
+/**
+ * misc_event_handler - Handler for other evnets
+ * @cm: the Charger Manager representing the battery.
+ * @type: the Charger Manager representing the battery.
+ */
+static void misc_event_handler(struct charger_manager *cm,
+			enum cm_event_types type)
+{
+	if (cm_suspended)
+		device_set_wakeup_capable(cm->dev, true);
+
+	if (!delayed_work_pending(&cm_monitor_work) &&
+	    is_polling_required(cm) && cm->desc->polling_interval_ms)
+		schedule_work(&setup_polling);
+	uevent_notify(cm, default_event_names[type]);
+}
+
 static int charger_get_property(struct power_supply *psy,
 		enum power_supply_property psp,
 		union power_supply_propval *val)
@@ -1112,6 +1185,13 @@ static int charger_manager_probe(struct platform_device *pdev)
 	list_add(&cm->entry, &cm_list);
 	mutex_unlock(&cm_list_mtx);
 
+	/*
+	 * Charger-manager is capable of waking up the systme from sleep
+	 * when event is happend through cm_notify_event()
+	 */
+	device_init_wakeup(&pdev->dev, true);
+	device_set_wakeup_capable(&pdev->dev, false);
+
 	schedule_work(&setup_polling);
 
 	return 0;
@@ -1169,6 +1249,18 @@ static const struct platform_device_id charger_manager_id[] = {
 };
 MODULE_DEVICE_TABLE(platform, charger_manager_id);
 
+static int cm_suspend_noirq(struct device *dev)
+{
+	int ret = 0;
+
+	if (device_may_wakeup(dev)) {
+		device_set_wakeup_capable(dev, false);
+		ret = -EAGAIN;
+	}
+
+	return ret;
+}
+
 static int cm_suspend_prepare(struct device *dev)
 {
 	struct charger_manager *cm = dev_get_drvdata(dev);
@@ -1250,11 +1342,13 @@ static void cm_suspend_complete(struct device *dev)
 		queue_delayed_work(cm_wq, &cm->fullbatt_vchk_work,
 				   msecs_to_jiffies(delay));
 	}
+	device_set_wakeup_capable(cm->dev, false);
 	uevent_notify(cm, NULL);
 }
 
 static const struct dev_pm_ops charger_manager_pm = {
 	.prepare	= cm_suspend_prepare,
+	.suspend_noirq	= cm_suspend_noirq,
 	.complete	= cm_suspend_complete,
 };
 
@@ -1287,6 +1381,75 @@ static void __exit charger_manager_cleanup(void)
 }
 module_exit(charger_manager_cleanup);
 
+/**
+ * find_power_supply - find the associated power_supply of charger
+ * @cm: the Charger Manager representing the battery
+ * @psy: pointer to instance of charger's power_supply
+ */
+static bool find_power_supply(struct charger_manager *cm,
+			struct power_supply *psy)
+{
+	int i;
+	bool found = false;
+
+	for (i = 0; cm->charger_stat[i]; i++) {
+		if (psy == cm->charger_stat[i]) {
+			found = true;
+			break;
+		}
+	}
+
+	return found;
+}
+
+/**
+ * cm_notify_event - charger driver notify Charger Manager of charger event
+ * @psy: pointer to instance of charger's power_supply
+ * @type: type of charger event
+ * @msg: optional message passed to uevent_notify fuction
+ */
+void cm_notify_event(struct power_supply *psy, enum cm_event_types type,
+		     char *msg)
+{
+	struct charger_manager *cm;
+	bool found_power_supply = false;
+
+	if (psy == NULL)
+		return;
+
+	mutex_lock(&cm_list_mtx);
+	list_for_each_entry(cm, &cm_list, entry) {
+		found_power_supply = find_power_supply(cm, psy);
+		if (found_power_supply)
+			break;
+	}
+	mutex_unlock(&cm_list_mtx);
+
+	if (!found_power_supply)
+		return;
+
+	switch (type) {
+	case CM_EVENT_BATT_FULL:
+		fullbatt_handler(cm);
+		break;
+	case CM_EVENT_BATT_OUT:
+		battout_handler(cm);
+		break;
+	case CM_EVENT_BATT_IN:
+	case CM_EVENT_EXT_PWR_IN_OUT ... CM_EVENT_CHG_START_STOP:
+		misc_event_handler(cm, type);
+		break;
+	case CM_EVENT_UNKNOWN:
+	case CM_EVENT_OTHERS:
+		uevent_notify(cm, msg ? msg : default_event_names[type]);
+		break;
+	default:
+		dev_err(cm->dev, "%s type not specified.\n", __func__);
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(cm_notify_event);
+
 MODULE_AUTHOR("MyungJoo Ham <myungjoo.ham@samsung.com>");
 MODULE_DESCRIPTION("Charger Manager");
 MODULE_LICENSE("GPL");
diff --git a/include/linux/power/charger-manager.h b/include/linux/power/charger-manager.h
index baa299a95e13..241065c9ce51 100644
--- a/include/linux/power/charger-manager.h
+++ b/include/linux/power/charger-manager.h
@@ -31,6 +31,16 @@ enum polling_modes {
 	CM_POLL_CHARGING_ONLY,
 };
 
+enum cm_event_types {
+	CM_EVENT_UNKNOWN = 0,
+	CM_EVENT_BATT_FULL,
+	CM_EVENT_BATT_IN,
+	CM_EVENT_BATT_OUT,
+	CM_EVENT_EXT_PWR_IN_OUT,
+	CM_EVENT_CHG_START_STOP,
+	CM_EVENT_OTHERS,
+};
+
 /**
  * struct charger_global_desc
  * @rtc_name: the name of RTC used to wake up the system from suspend.
@@ -159,14 +169,13 @@ struct charger_manager {
 #ifdef CONFIG_CHARGER_MANAGER
 extern int setup_charger_manager(struct charger_global_desc *gd);
 extern bool cm_suspend_again(void);
+extern void cm_notify_event(struct power_supply *psy,
+				enum cm_event_types type, char *msg);
 #else
-static void __maybe_unused setup_charger_manager(struct charger_global_desc *gd)
-{ }
-
-static bool __maybe_unused cm_suspend_again(void)
-{
-	return false;
-}
+static inline int setup_charger_manager(struct charger_global_desc *gd)
+{ return 0; }
+static inline bool cm_suspend_again(void) { return false; }
+static inline void cm_notify_event(struct power_supply *psy,
+				enum cm_event_types type, char *msg) { }
 #endif
-
 #endif /* _CHARGER_MANAGER_H */
-- 
cgit 


From 9a8422d205ea142a27c2573e5ca3d2cc87d75260 Mon Sep 17 00:00:00 2001
From: Ramakrishna Pallala <ramakrishna.pallala@intel.com>
Date: Sat, 5 May 2012 14:34:26 +0530
Subject: max17042_battery: Add support for max17047/50 chip

max17047 is improved version of max17042 chip. It has few HW bug
fixes with minor changes in register set.

max17050 is same as max17047 chip except its silicon packging. So from
driver's point of view there is no difference btw max1047 and max1050.

This patch adds the support to dynamically detect the chip type and
adds steps to initialize the max17047 chip.

Signed-off-by: Ramakrishna Pallala <ramakrishna.pallala@intel.com>
Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
---
 drivers/power/Kconfig                  |  5 ++-
 drivers/power/max17042_battery.c       | 69 ++++++++++++++++++++++++++++------
 include/linux/power/max17042_battery.h | 17 +++++++++
 3 files changed, 78 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index b711795c6664..e3a3b4956f08 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -181,14 +181,15 @@ config BATTERY_MAX17040
 	  to operate with a single lithium cell
 
 config BATTERY_MAX17042
-	tristate "Maxim MAX17042/8997/8966 Fuel Gauge"
+	tristate "Maxim MAX17042/17047/17050/8997/8966 Fuel Gauge"
 	depends on I2C
 	help
 	  MAX17042 is fuel-gauge systems for lithium-ion (Li+) batteries
 	  in handheld and portable equipment. The MAX17042 is configured
 	  to operate with a single lithium cell. MAX8997 and MAX8966 are
 	  multi-function devices that include fuel gauages that are compatible
-	  with MAX17042.
+	  with MAX17042. This driver also supports max17047/50 chips which are
+	  improved version of max17042.
 
 config BATTERY_Z2
 	tristate "Z2 battery driver"
diff --git a/drivers/power/max17042_battery.c b/drivers/power/max17042_battery.c
index 8d28006322e9..140788b309f8 100644
--- a/drivers/power/max17042_battery.c
+++ b/drivers/power/max17042_battery.c
@@ -62,9 +62,13 @@
 #define dP_ACC_100	0x1900
 #define dP_ACC_200	0x3200
 
+#define MAX17042_IC_VERSION	0x0092
+#define MAX17047_IC_VERSION	0x00AC	/* same for max17050 */
+
 struct max17042_chip {
 	struct i2c_client *client;
 	struct power_supply battery;
+	enum max170xx_chip_type chip_type;
 	struct max17042_platform_data *pdata;
 	struct work_struct work;
 	int    init_complete;
@@ -152,7 +156,10 @@ static int max17042_get_property(struct power_supply *psy,
 		val->intval *= 20000; /* Units of LSB = 20mV */
 		break;
 	case POWER_SUPPLY_PROP_VOLTAGE_MIN_DESIGN:
-		ret = max17042_read_reg(chip->client, MAX17042_V_empty);
+		if (chip->chip_type == MAX17042)
+			ret = max17042_read_reg(chip->client, MAX17042_V_empty);
+		else
+			ret = max17042_read_reg(chip->client, MAX17047_V_empty);
 		if (ret < 0)
 			return ret;
 
@@ -389,6 +396,9 @@ static void max17042_write_config_regs(struct max17042_chip *chip)
 	max17042_write_reg(chip->client, MAX17042_FilterCFG,
 			config->filter_cfg);
 	max17042_write_reg(chip->client, MAX17042_RelaxCFG, config->relax_cfg);
+	if (chip->chip_type == MAX17047)
+		max17042_write_reg(chip->client, MAX17047_FullSOCThr,
+						config->full_soc_thresh);
 }
 
 static void  max17042_write_custom_regs(struct max17042_chip *chip)
@@ -399,12 +409,23 @@ static void  max17042_write_custom_regs(struct max17042_chip *chip)
 				config->rcomp0);
 	max17042_write_verify_reg(chip->client, MAX17042_TempCo,
 				config->tcompc0);
-	max17042_write_reg(chip->client, MAX17042_EmptyTempCo,
-			config->empty_tempco);
-	max17042_write_verify_reg(chip->client, MAX17042_K_empty0,
-				config->kempty0);
 	max17042_write_verify_reg(chip->client, MAX17042_ICHGTerm,
 				config->ichgt_term);
+	if (chip->chip_type == MAX17042) {
+		max17042_write_reg(chip->client, MAX17042_EmptyTempCo,
+					config->empty_tempco);
+		max17042_write_verify_reg(chip->client, MAX17042_K_empty0,
+					config->kempty0);
+	} else {
+		max17042_write_verify_reg(chip->client, MAX17047_QRTbl00,
+						config->qrtbl00);
+		max17042_write_verify_reg(chip->client, MAX17047_QRTbl10,
+						config->qrtbl10);
+		max17042_write_verify_reg(chip->client, MAX17047_QRTbl20,
+						config->qrtbl20);
+		max17042_write_verify_reg(chip->client, MAX17047_QRTbl30,
+						config->qrtbl30);
+	}
 }
 
 static void max17042_update_capacity_regs(struct max17042_chip *chip)
@@ -460,6 +481,8 @@ static void max17042_load_new_capacity_params(struct max17042_chip *chip)
 			config->design_cap);
 	max17042_write_verify_reg(chip->client, MAX17042_FullCAPNom,
 			config->fullcapnom);
+	/* Update SOC register with new SOC */
+	max17042_write_reg(chip->client, MAX17042_RepSOC, vfSoc);
 }
 
 /*
@@ -496,20 +519,28 @@ static inline void max17042_override_por_values(struct max17042_chip *chip)
 
 	max17042_override_por(client, MAX17042_FullCAP, config->fullcap);
 	max17042_override_por(client, MAX17042_FullCAPNom, config->fullcapnom);
-	max17042_override_por(client, MAX17042_SOC_empty, config->socempty);
+	if (chip->chip_type == MAX17042)
+		max17042_override_por(client, MAX17042_SOC_empty,
+						config->socempty);
 	max17042_override_por(client, MAX17042_LAvg_empty, config->lavg_empty);
 	max17042_override_por(client, MAX17042_dQacc, config->dqacc);
 	max17042_override_por(client, MAX17042_dPacc, config->dpacc);
 
-	max17042_override_por(client, MAX17042_V_empty, config->vempty);
+	if (chip->chip_type == MAX17042)
+		max17042_override_por(client, MAX17042_V_empty, config->vempty);
+	else
+		max17042_override_por(client, MAX17047_V_empty, config->vempty);
 	max17042_override_por(client, MAX17042_TempNom, config->temp_nom);
 	max17042_override_por(client, MAX17042_TempLim, config->temp_lim);
 	max17042_override_por(client, MAX17042_FCTC, config->fctc);
 	max17042_override_por(client, MAX17042_RCOMP0, config->rcomp0);
 	max17042_override_por(client, MAX17042_TempCo, config->tcompc0);
-	max17042_override_por(client, MAX17042_EmptyTempCo,
-			config->empty_tempco);
-	max17042_override_por(client, MAX17042_K_empty0, config->kempty0);
+	if (chip->chip_type) {
+		max17042_override_por(client, MAX17042_EmptyTempCo,
+					config->empty_tempco);
+		max17042_override_por(client, MAX17042_K_empty0,
+					config->kempty0);
+	}
 }
 
 static int max17042_init_chip(struct max17042_chip *chip)
@@ -666,7 +697,19 @@ static int __devinit max17042_probe(struct i2c_client *client,
 
 	i2c_set_clientdata(client, chip);
 
-	chip->battery.name		= "max17042_battery";
+	ret = max17042_read_reg(chip->client, MAX17042_DevName);
+	if (ret == MAX17042_IC_VERSION) {
+		dev_dbg(&client->dev, "chip type max17042 detected\n");
+		chip->chip_type = MAX17042;
+	} else if (ret == MAX17047_IC_VERSION) {
+		dev_dbg(&client->dev, "chip type max17047/50 detected\n");
+		chip->chip_type = MAX17047;
+	} else {
+		dev_err(&client->dev, "device version mismatch: %x\n", ret);
+		return -EIO;
+	}
+
+	chip->battery.name		= "max170xx_battery";
 	chip->battery.type		= POWER_SUPPLY_TYPE_BATTERY;
 	chip->battery.get_property	= max17042_get_property;
 	chip->battery.properties	= max17042_battery_props;
@@ -778,6 +821,8 @@ static const struct dev_pm_ops max17042_pm_ops = {
 #ifdef CONFIG_OF
 static const struct of_device_id max17042_dt_match[] = {
 	{ .compatible = "maxim,max17042" },
+	{ .compatible = "maxim,max17047" },
+	{ .compatible = "maxim,max17050" },
 	{ },
 };
 MODULE_DEVICE_TABLE(of, max17042_dt_match);
@@ -785,6 +830,8 @@ MODULE_DEVICE_TABLE(of, max17042_dt_match);
 
 static const struct i2c_device_id max17042_id[] = {
 	{ "max17042", 0 },
+	{ "max17047", 1 },
+	{ "max17050", 2 },
 	{ }
 };
 MODULE_DEVICE_TABLE(i2c, max17042_id);
diff --git a/include/linux/power/max17042_battery.h b/include/linux/power/max17042_battery.h
index e01b167e66f0..89dd84f47c6e 100644
--- a/include/linux/power/max17042_battery.h
+++ b/include/linux/power/max17042_battery.h
@@ -116,6 +116,18 @@ enum max17042_register {
 	MAX17042_VFSOC		= 0xFF,
 };
 
+/* Registers specific to max17047/50 */
+enum max17047_register {
+	MAX17047_QRTbl00	= 0x12,
+	MAX17047_FullSOCThr	= 0x13,
+	MAX17047_QRTbl10	= 0x22,
+	MAX17047_QRTbl20	= 0x32,
+	MAX17047_V_empty	= 0x3A,
+	MAX17047_QRTbl30	= 0x42,
+};
+
+enum max170xx_chip_type {MAX17042, MAX17047};
+
 /*
  * used for setting a register to a desired value
  * addr : address for a register
@@ -144,6 +156,7 @@ struct max17042_config_data {
 	u16	shdntimer;	/* 0x03F */
 
 	/* App data */
+	u16	full_soc_thresh;	/* 0x13 */
 	u16	design_cap;	/* 0x18 */
 	u16	ichgt_term;	/* 0x1E */
 
@@ -162,6 +175,10 @@ struct max17042_config_data {
 	u16	lavg_empty;	/* 0x36 */
 	u16	dqacc;		/* 0x45 */
 	u16	dpacc;		/* 0x46 */
+	u16	qrtbl00;	/* 0x12 */
+	u16	qrtbl10;	/* 0x22 */
+	u16	qrtbl20;	/* 0x32 */
+	u16	qrtbl30;	/* 0x42 */
 
 	/* Cell technology from power_supply.h */
 	u16	cell_technology;
-- 
cgit 


From 86c2072be6f3c2150cc35f00233f2c31bdba2745 Mon Sep 17 00:00:00 2001
From: Mike Dunn <mikedunn@newsguy.com>
Date: Wed, 25 Apr 2012 12:06:05 -0700
Subject: mtd: ecc_strength is at ecc step granularity
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ecc_strength element of mtd_info will be the strength of one ecc step, not of
the entire writesize, as was previously planned.  This is the appropriate way
because, as was pointed out¹, bit errors in excess of the strength of one
step can cause a hard error if they all occur within the same ecc region.

¹ http://lists.infradead.org/pipermail/linux-mtd/2012-March/040313.html

Signed-off-by: Mike Dunn <mikedunn@newsguy.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/nand_base.c | 2 +-
 include/linux/mtd/mtd.h      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 512066e3d143..671f228d4c7c 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -3487,7 +3487,7 @@ int nand_scan_tail(struct mtd_info *mtd)
 
 	/* propagate ecc info to mtd_info */
 	mtd->ecclayout = chip->ecc.layout;
-	mtd->ecc_strength = chip->ecc.strength * chip->ecc.steps;
+	mtd->ecc_strength = chip->ecc.strength;
 
 	/* Check, if we should skip the bad block table scan */
 	if (chip->options & NAND_SKIP_BBTSCAN)
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index cf5ea8cdcf8e..cd0119d19cd9 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -164,7 +164,7 @@ struct mtd_info {
 	/* ECC layout structure pointer - read only! */
 	struct nand_ecclayout *ecclayout;
 
-	/* max number of correctible bit errors per writesize */
+	/* max number of correctible bit errors per ecc step */
 	unsigned int ecc_strength;
 
 	/* Data for variable erase regions. If numeraseregions is zero,
-- 
cgit 


From d062d4ede877fcd2ecc4c6262abad09a6f32950a Mon Sep 17 00:00:00 2001
From: Mike Dunn <mikedunn@newsguy.com>
Date: Wed, 25 Apr 2012 12:06:08 -0700
Subject: mtd: bitflip_threshold added to mtd_info and sysfs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

An element 'bitflip_threshold' is added to struct mtd_info, and also exposed as
a read/write variable in sysfs.  This will be used to determine whether or not
mtd_read() returns -EUCLEAN or 0 (absent a hard error).  If the driver leaves it
as zero, mtd will set it to a default value of ecc_strength.

This v2 adds the line that propagates bitflip_threshold from the master to the
partitions - thanks Ivan¹.

¹ http://lists.infradead.org/pipermail/linux-mtd/2012-April/040900.html

Signed-off-by: Mike Dunn <mikedunn@newsguy.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 Documentation/ABI/testing/sysfs-class-mtd | 36 +++++++++++++++++++++++++++++++
 drivers/mtd/mtdcore.c                     | 33 ++++++++++++++++++++++++++++
 drivers/mtd/mtdpart.c                     |  2 ++
 include/linux/mtd/mtd.h                   |  9 ++++++++
 4 files changed, 80 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-class-mtd b/Documentation/ABI/testing/sysfs-class-mtd
index 43d18180b46e..78835080856a 100644
--- a/Documentation/ABI/testing/sysfs-class-mtd
+++ b/Documentation/ABI/testing/sysfs-class-mtd
@@ -135,3 +135,39 @@ Description:
 		have multiple ecc steps within each writesize region.
 
 		In the case of devices lacking any ECC capability, it is 0.
+
+What:		/sys/class/mtd/mtdX/bitflip_threshold
+Date:		April 2012
+KernelVersion:	3.4
+Contact:	linux-mtd@lists.infradead.org
+Description:
+		This allows the user to examine and adjust the criteria by which
+		mtd returns -EUCLEAN from mtd_read().  If the maximum number of
+		bit errors that were corrected on any single region comprising
+		an ecc step (as reported by the driver) equals or exceeds this
+		value, -EUCLEAN is returned.  Otherwise, absent an error, 0 is
+		returned.  Higher layers (e.g., UBI) use this return code as an
+		indication that an erase block may be degrading and should be
+		scrutinized as a candidate for being marked as bad.
+
+		The initial value may be specified by the flash device driver.
+		If not, then the default value is ecc_strength.
+
+		The introduction of this feature brings a subtle change to the
+		meaning of the -EUCLEAN return code.  Previously, it was
+		interpreted to mean simply "one or more bit errors were
+		corrected".  Its new interpretation can be phrased as "a
+		dangerously high number of bit errors were corrected on one or
+		more regions comprising an ecc step".  The precise definition of
+		"dangerously high" can be adjusted by the user with
+		bitflip_threshold.  Users are discouraged from doing this,
+		however, unless they know what they are doing and have intimate
+		knowledge of the properties of their device.  Broadly speaking,
+		bitflip_threshold should be low enough to detect genuine erase
+		block degradation, but high enough to avoid the consequences of
+		a persistent return value of -EUCLEAN on devices where sticky
+		bitflips occur.  Note that if bitflip_threshold exceeds
+		ecc_strength, -EUCLEAN is never returned by the read functions.
+
+		This is generally applicable only to NAND flash devices with ECC
+		capability.  It is ignored on devices lacking ECC capability.
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 090e849d3dcd..6a7cba1e24e6 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -259,6 +259,34 @@ static ssize_t mtd_ecc_strength_show(struct device *dev,
 }
 static DEVICE_ATTR(ecc_strength, S_IRUGO, mtd_ecc_strength_show, NULL);
 
+static ssize_t mtd_bitflip_threshold_show(struct device *dev,
+					  struct device_attribute *attr,
+					  char *buf)
+{
+	struct mtd_info *mtd = dev_get_drvdata(dev);
+
+	return snprintf(buf, PAGE_SIZE, "%u\n", mtd->bitflip_threshold);
+}
+
+static ssize_t mtd_bitflip_threshold_store(struct device *dev,
+					   struct device_attribute *attr,
+					   const char *buf, size_t count)
+{
+	struct mtd_info *mtd = dev_get_drvdata(dev);
+	unsigned int bitflip_threshold;
+	int retval;
+
+	retval = kstrtouint(buf, 0, &bitflip_threshold);
+	if (retval)
+		return retval;
+
+	mtd->bitflip_threshold = bitflip_threshold;
+	return count;
+}
+static DEVICE_ATTR(bitflip_threshold, S_IRUGO | S_IWUSR,
+		   mtd_bitflip_threshold_show,
+		   mtd_bitflip_threshold_store);
+
 static struct attribute *mtd_attrs[] = {
 	&dev_attr_type.attr,
 	&dev_attr_flags.attr,
@@ -270,6 +298,7 @@ static struct attribute *mtd_attrs[] = {
 	&dev_attr_numeraseregions.attr,
 	&dev_attr_name.attr,
 	&dev_attr_ecc_strength.attr,
+	&dev_attr_bitflip_threshold.attr,
 	NULL,
 };
 
@@ -332,6 +361,10 @@ int add_mtd_device(struct mtd_info *mtd)
 	mtd->index = i;
 	mtd->usecount = 0;
 
+	/* default value if not set by driver */
+	if (mtd->bitflip_threshold == 0)
+		mtd->bitflip_threshold = mtd->ecc_strength;
+
 	if (is_power_of_2(mtd->erasesize))
 		mtd->erasesize_shift = ffs(mtd->erasesize) - 1;
 	else
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index 9651c06de0a9..ec75d44e0253 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -517,6 +517,8 @@ static struct mtd_part *allocate_partition(struct mtd_info *master,
 
 	slave->mtd.ecclayout = master->ecclayout;
 	slave->mtd.ecc_strength = master->ecc_strength;
+	slave->mtd.bitflip_threshold = master->bitflip_threshold;
+
 	if (master->_block_isbad) {
 		uint64_t offs = 0;
 
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index cd0119d19cd9..63dadc0dfb62 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -157,6 +157,15 @@ struct mtd_info {
 	unsigned int erasesize_mask;
 	unsigned int writesize_mask;
 
+	/*
+	 * read ops return -EUCLEAN if max number of bitflips corrected on any
+	 * one region comprising an ecc step equals or exceeds this value.
+	 * Settable by driver, else defaults to ecc_strength.  User can override
+	 * in sysfs.  N.B. The meaning of the -EUCLEAN return code has changed;
+	 * see Documentation/ABI/testing/sysfs-class-mtd for more detail.
+	 */
+	unsigned int bitflip_threshold;
+
 	// Kernel-only stuff starts here.
 	const char *name;
 	int index;
-- 
cgit 


From edbc4540e02c201bdd4f4d498ebb6ed517fd36e2 Mon Sep 17 00:00:00 2001
From: Mike Dunn <mikedunn@newsguy.com>
Date: Wed, 25 Apr 2012 12:06:11 -0700
Subject: mtd: driver _read() returns max_bitflips; mtd_read() returns -EUCLEAN
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The drivers' _read() method, absent an error, returns a non-negative integer
indicating the maximum number of bit errors that were corrected in any one
region comprising an ecc step.  MTD returns -EUCLEAN if this is >=
bitflip_threshold, 0 otherwise.  If bitflip_threshold is zero, the comparison is
not made since these devices lack ECC and always return zero in the non-error
case (thanks Brian)¹.  Note that this is a subtle change to the driver
interface.

This and the preceding patches in this set were tested with ubi on top of the
nandsim and docg4 devices, running the ubi test io_basic from mtd-utils.

¹ http://lists.infradead.org/pipermail/linux-mtd/2012-March/040468.html

Signed-off-by: Mike Dunn <mikedunn@newsguy.com>
Acked-by: Robert Jarzmik <robert.jarzmik@free.fr>
Acked-by: Brian Norris <computersforpeace@gmail.com>
Ivan Djelic <ivan.djelic@parrot.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 Documentation/ABI/testing/sysfs-class-mtd |  7 +++++--
 drivers/mtd/devices/docg3.c               |  6 ++++--
 drivers/mtd/mtdcore.c                     | 14 +++++++++++++-
 drivers/mtd/mtdpart.c                     | 12 ++++++------
 drivers/mtd/nand/alauda.c                 |  4 ++--
 drivers/mtd/nand/nand_base.c              | 18 ++++++++++++++----
 drivers/mtd/onenand/onenand_base.c        |  6 ++++--
 include/linux/mtd/nand.h                  |  3 +++
 8 files changed, 51 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-class-mtd b/Documentation/ABI/testing/sysfs-class-mtd
index 78835080856a..db1ad7e34fc3 100644
--- a/Documentation/ABI/testing/sysfs-class-mtd
+++ b/Documentation/ABI/testing/sysfs-class-mtd
@@ -167,7 +167,10 @@ Description:
 		block degradation, but high enough to avoid the consequences of
 		a persistent return value of -EUCLEAN on devices where sticky
 		bitflips occur.  Note that if bitflip_threshold exceeds
-		ecc_strength, -EUCLEAN is never returned by the read functions.
+		ecc_strength, -EUCLEAN is never returned by mtd_read().
+		Conversely, if bitflip_threshold is zero, -EUCLEAN is always
+		returned, absent a hard error.
 
 		This is generally applicable only to NAND flash devices with ECC
-		capability.  It is ignored on devices lacking ECC capability.
+		capability.  It is ignored on devices lacking ECC capability;
+		i.e., devices for which ecc_strength is zero.
diff --git a/drivers/mtd/devices/docg3.c b/drivers/mtd/devices/docg3.c
index 8272c02668d6..65d22a0439c6 100644
--- a/drivers/mtd/devices/docg3.c
+++ b/drivers/mtd/devices/docg3.c
@@ -850,6 +850,7 @@ static int doc_read_oob(struct mtd_info *mtd, loff_t from,
 	u8 *buf = ops->datbuf;
 	size_t len, ooblen, nbdata, nboob;
 	u8 hwecc[DOC_ECC_BCH_SIZE], eccconf1;
+	int max_bitflips = 0;
 
 	if (buf)
 		len = ops->len;
@@ -876,7 +877,7 @@ static int doc_read_oob(struct mtd_info *mtd, loff_t from,
 	ret = 0;
 	skip = from % DOC_LAYOUT_PAGE_SIZE;
 	mutex_lock(&docg3->cascade->lock);
-	while (!ret && (len > 0 || ooblen > 0)) {
+	while (ret >= 0 && (len > 0 || ooblen > 0)) {
 		calc_block_sector(from - skip, &block0, &block1, &page, &ofs,
 			docg3->reliable);
 		nbdata = min_t(size_t, len, DOC_LAYOUT_PAGE_SIZE - skip);
@@ -936,7 +937,8 @@ static int doc_read_oob(struct mtd_info *mtd, loff_t from,
 			}
 			if (ret > 0) {
 				mtd->ecc_stats.corrected += ret;
-				ret = -EUCLEAN;
+				max_bitflips = max(max_bitflips, ret);
+				ret = max_bitflips;
 			}
 		}
 
diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 6a7cba1e24e6..575730744fdb 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -800,12 +800,24 @@ EXPORT_SYMBOL_GPL(mtd_get_unmapped_area);
 int mtd_read(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen,
 	     u_char *buf)
 {
+	int ret_code;
 	*retlen = 0;
 	if (from < 0 || from > mtd->size || len > mtd->size - from)
 		return -EINVAL;
 	if (!len)
 		return 0;
-	return mtd->_read(mtd, from, len, retlen, buf);
+
+	/*
+	 * In the absence of an error, drivers return a non-negative integer
+	 * representing the maximum number of bitflips that were corrected on
+	 * any one ecc region (if applicable; zero otherwise).
+	 */
+	ret_code = mtd->_read(mtd, from, len, retlen, buf);
+	if (unlikely(ret_code < 0))
+		return ret_code;
+	if (mtd->ecc_strength == 0)
+		return 0;	/* device lacks ecc */
+	return ret_code >= mtd->bitflip_threshold ? -EUCLEAN : 0;
 }
 EXPORT_SYMBOL_GPL(mtd_read);
 
diff --git a/drivers/mtd/mtdpart.c b/drivers/mtd/mtdpart.c
index ec75d44e0253..d518e4db8a0b 100644
--- a/drivers/mtd/mtdpart.c
+++ b/drivers/mtd/mtdpart.c
@@ -67,12 +67,12 @@ static int part_read(struct mtd_info *mtd, loff_t from, size_t len,
 	stats = part->master->ecc_stats;
 	res = part->master->_read(part->master, from + part->offset, len,
 				  retlen, buf);
-	if (unlikely(res)) {
-		if (mtd_is_bitflip(res))
-			mtd->ecc_stats.corrected += part->master->ecc_stats.corrected - stats.corrected;
-		if (mtd_is_eccerr(res))
-			mtd->ecc_stats.failed += part->master->ecc_stats.failed - stats.failed;
-	}
+	if (unlikely(mtd_is_eccerr(res)))
+		mtd->ecc_stats.failed +=
+			part->master->ecc_stats.failed - stats.failed;
+	else
+		mtd->ecc_stats.corrected +=
+			part->master->ecc_stats.corrected - stats.corrected;
 	return res;
 }
 
diff --git a/drivers/mtd/nand/alauda.c b/drivers/mtd/nand/alauda.c
index 4f20e1d8bef1..60a0dfdb0808 100644
--- a/drivers/mtd/nand/alauda.c
+++ b/drivers/mtd/nand/alauda.c
@@ -414,7 +414,7 @@ static int alauda_bounce_read(struct mtd_info *mtd, loff_t from, size_t len,
 	}
 	err = 0;
 	if (corrected)
-		err = -EUCLEAN;
+		err = 1;	/* return max_bitflips per ecc step */
 	if (uncorrected)
 		err = -EBADMSG;
 out:
@@ -446,7 +446,7 @@ static int alauda_read(struct mtd_info *mtd, loff_t from, size_t len,
 	}
 	err = 0;
 	if (corrected)
-		err = -EUCLEAN;
+		err = 1;	/* return max_bitflips per ecc step */
 	if (uncorrected)
 		err = -EBADMSG;
 	return err;
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 9f5d339a3610..640f1f8159e8 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -1486,6 +1486,7 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from,
 		mtd->oobavail : mtd->oobsize;
 
 	uint8_t *bufpoi, *oob, *buf;
+	unsigned int max_bitflips = 0;
 
 	stats = mtd->ecc_stats;
 
@@ -1513,7 +1514,10 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from,
 				sndcmd = 0;
 			}
 
-			/* Now read the page into the buffer */
+			/*
+			 * Now read the page into the buffer.  Absent an error,
+			 * the read methods return max bitflips per ecc step.
+			 */
 			if (unlikely(ops->mode == MTD_OPS_RAW))
 				ret = chip->ecc.read_page_raw(mtd, chip,
 							      bufpoi, page);
@@ -1530,15 +1534,19 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from,
 				break;
 			}
 
+			max_bitflips = max_t(unsigned int, max_bitflips, ret);
+
 			/* Transfer not aligned data */
 			if (!aligned) {
 				if (!NAND_SUBPAGE_READ(chip) && !oob &&
 				    !(mtd->ecc_stats.failed - stats.failed) &&
-				    (ops->mode != MTD_OPS_RAW))
+				    (ops->mode != MTD_OPS_RAW)) {
 					chip->pagebuf = realpage;
-				else
+					chip->pagebuf_bitflips = ret;
+				} else {
 					/* Invalidate page cache */
 					chip->pagebuf = -1;
+				}
 				memcpy(buf, chip->buffers->databuf + col, bytes);
 			}
 
@@ -1571,6 +1579,8 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from,
 		} else {
 			memcpy(buf, chip->buffers->databuf + col, bytes);
 			buf += bytes;
+			max_bitflips = max_t(unsigned int, max_bitflips,
+					     chip->pagebuf_bitflips);
 		}
 
 		readlen -= bytes;
@@ -1609,7 +1619,7 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from,
 	if (mtd->ecc_stats.failed - stats.failed)
 		return -EBADMSG;
 
-	return  mtd->ecc_stats.corrected - stats.corrected ? -EUCLEAN : 0;
+	return max_bitflips;
 }
 
 /**
diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c
index b3ce12ef359e..7153e0d27101 100644
--- a/drivers/mtd/onenand/onenand_base.c
+++ b/drivers/mtd/onenand/onenand_base.c
@@ -1201,7 +1201,8 @@ static int onenand_mlc_read_ops_nolock(struct mtd_info *mtd, loff_t from,
 	if (mtd->ecc_stats.failed - stats.failed)
 		return -EBADMSG;
 
-	return mtd->ecc_stats.corrected - stats.corrected ? -EUCLEAN : 0;
+	/* return max bitflips per ecc step; ONENANDs correct 1 bit only */
+	return mtd->ecc_stats.corrected != stats.corrected ? 1 : 0;
 }
 
 /**
@@ -1333,7 +1334,8 @@ static int onenand_read_ops_nolock(struct mtd_info *mtd, loff_t from,
 	if (mtd->ecc_stats.failed - stats.failed)
 		return -EBADMSG;
 
-	return mtd->ecc_stats.corrected - stats.corrected ? -EUCLEAN : 0;
+	/* return max bitflips per ecc step; ONENANDs correct 1 bit only */
+	return mtd->ecc_stats.corrected != stats.corrected ? 1 : 0;
 }
 
 /**
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 1482340d3d9f..2829e8be3a62 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -459,6 +459,8 @@ struct nand_buffers {
  * @pagemask:		[INTERN] page number mask = number of (pages / chip) - 1
  * @pagebuf:		[INTERN] holds the pagenumber which is currently in
  *			data_buf.
+ * @pagebuf_bitflips:	[INTERN] holds the bitflip count for the page which is
+ *			currently in data_buf.
  * @subpagesize:	[INTERN] holds the subpagesize
  * @onfi_version:	[INTERN] holds the chip ONFI version (BCD encoded),
  *			non 0 if ONFI supported.
@@ -519,6 +521,7 @@ struct nand_chip {
 	uint64_t chipsize;
 	int pagemask;
 	int pagebuf;
+	unsigned int pagebuf_bitflips;
 	int subpagesize;
 	uint8_t cellinfo;
 	int badblockpos;
-- 
cgit 


From 1826dbccebc9a58a0b0c0a9b7c09e47b19d97398 Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Tue, 1 May 2012 17:12:55 -0700
Subject: mtd: nand: kill NAND_NO_AUTOINCR option

No drivers use auto-increment NAND, so kill the NO_AUTOINCR option entirely.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 Documentation/DocBook/mtdnand.tmpl    | 2 --
 arch/arm/mach-ep93xx/snappercl15.c    | 1 -
 arch/arm/mach-ixp4xx/ixdp425-setup.c  | 1 -
 arch/arm/mach-nomadik/board-nhk8815.c | 2 +-
 arch/mips/rb532/devices.c             | 1 -
 drivers/mtd/nand/au1550nd.c           | 2 --
 drivers/mtd/nand/cafe_nand.c          | 2 +-
 drivers/mtd/nand/cs553x_nand.c        | 1 -
 drivers/mtd/nand/docg4.c              | 3 +--
 drivers/mtd/nand/fsl_elbc_nand.c      | 2 +-
 drivers/mtd/nand/fsl_ifc_nand.c       | 2 +-
 drivers/mtd/nand/h1910.c              | 1 -
 drivers/mtd/nand/mpc5121_nfc.c        | 1 -
 drivers/mtd/nand/nand_base.c          | 8 +-------
 drivers/mtd/nand/nand_ids.c           | 6 ++----
 drivers/mtd/nand/pasemi_nand.c        | 1 -
 drivers/mtd/nand/pxa3xx_nand.c        | 1 -
 drivers/mtd/nand/sh_flctl.c           | 2 --
 drivers/mtd/nand/sm_common.c          | 9 ++++-----
 include/linux/mtd/nand.h              | 5 +----
 20 files changed, 13 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/mtdnand.tmpl b/Documentation/DocBook/mtdnand.tmpl
index 0c674be0d3c6..e0aedb7a7827 100644
--- a/Documentation/DocBook/mtdnand.tmpl
+++ b/Documentation/DocBook/mtdnand.tmpl
@@ -1119,8 +1119,6 @@ in this page</entry>
 		These constants are defined in nand.h. They are ored together to describe
 		the chip functionality.
      		<programlisting>
-/* Chip can not auto increment pages */
-#define NAND_NO_AUTOINCR	0x00000001
 /* Buswitdh is 16 bit */
 #define NAND_BUSWIDTH_16	0x00000002
 /* Device supports partial programming without padding */
diff --git a/arch/arm/mach-ep93xx/snappercl15.c b/arch/arm/mach-ep93xx/snappercl15.c
index 5df9092f1fcc..376b8f5ecfa3 100644
--- a/arch/arm/mach-ep93xx/snappercl15.c
+++ b/arch/arm/mach-ep93xx/snappercl15.c
@@ -100,7 +100,6 @@ static struct platform_nand_data snappercl15_nand_data = {
 		.nr_chips		= 1,
 		.partitions		= snappercl15_nand_parts,
 		.nr_partitions		= ARRAY_SIZE(snappercl15_nand_parts),
-		.options		= NAND_NO_AUTOINCR,
 		.chip_delay		= 25,
 	},
 	.ctrl = {
diff --git a/arch/arm/mach-ixp4xx/ixdp425-setup.c b/arch/arm/mach-ixp4xx/ixdp425-setup.c
index fccfc73e69f1..108a9d3f382d 100644
--- a/arch/arm/mach-ixp4xx/ixdp425-setup.c
+++ b/arch/arm/mach-ixp4xx/ixdp425-setup.c
@@ -98,7 +98,6 @@ static struct platform_nand_data ixdp425_flash_nand_data = {
 	.chip = {
 		.nr_chips		= 1,
 		.chip_delay		= 30,
-		.options		= NAND_NO_AUTOINCR,
 		.partitions	 	= ixdp425_partitions,
 		.nr_partitions	 	= ARRAY_SIZE(ixdp425_partitions),
 	},
diff --git a/arch/arm/mach-nomadik/board-nhk8815.c b/arch/arm/mach-nomadik/board-nhk8815.c
index 58cacafcf662..2e8d3e176bc7 100644
--- a/arch/arm/mach-nomadik/board-nhk8815.c
+++ b/arch/arm/mach-nomadik/board-nhk8815.c
@@ -111,7 +111,7 @@ static struct nomadik_nand_platform_data nhk8815_nand_data = {
 	.parts		= nhk8815_partitions,
 	.nparts		= ARRAY_SIZE(nhk8815_partitions),
 	.options	= NAND_COPYBACK | NAND_CACHEPRG | NAND_NO_PADDING \
-			| NAND_NO_READRDY | NAND_NO_AUTOINCR,
+			| NAND_NO_READRDY,
 	.init		= nhk8815_nand_init,
 };
 
diff --git a/arch/mips/rb532/devices.c b/arch/mips/rb532/devices.c
index a969eb826634..08c4f8e03ffe 100644
--- a/arch/mips/rb532/devices.c
+++ b/arch/mips/rb532/devices.c
@@ -292,7 +292,6 @@ static void __init rb532_nand_setup(void)
 	rb532_nand_data.chip.nr_partitions = ARRAY_SIZE(rb532_partition_info);
 	rb532_nand_data.chip.partitions = rb532_partition_info;
 	rb532_nand_data.chip.chip_delay = NAND_CHIP_DELAY;
-	rb532_nand_data.chip.options = NAND_NO_AUTOINCR;
 }
 
 
diff --git a/drivers/mtd/nand/au1550nd.c b/drivers/mtd/nand/au1550nd.c
index 73abbc3e093e..9f609d2dcf62 100644
--- a/drivers/mtd/nand/au1550nd.c
+++ b/drivers/mtd/nand/au1550nd.c
@@ -508,8 +508,6 @@ static int __devinit au1550nd_probe(struct platform_device *pdev)
 	this->chip_delay = 30;
 	this->ecc.mode = NAND_ECC_SOFT;
 
-	this->options = NAND_NO_AUTOINCR;
-
 	if (pd->devwidth)
 		this->options |= NAND_BUSWIDTH_16;
 
diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c
index 886ff3a064c2..75fb77b96efb 100644
--- a/drivers/mtd/nand/cafe_nand.c
+++ b/drivers/mtd/nand/cafe_nand.c
@@ -687,7 +687,7 @@ static int __devinit cafe_nand_probe(struct pci_dev *pdev,
 
 	/* Enable the following for a flash based bad block table */
 	cafe->nand.bbt_options = NAND_BBT_USE_FLASH;
-	cafe->nand.options = NAND_NO_AUTOINCR | NAND_OWN_BUFFERS;
+	cafe->nand.options = NAND_OWN_BUFFERS;
 
 	if (skipbbt) {
 		cafe->nand.options |= NAND_SKIP_BBTSCAN;
diff --git a/drivers/mtd/nand/cs553x_nand.c b/drivers/mtd/nand/cs553x_nand.c
index 821c34c62500..adb6c3ef37fb 100644
--- a/drivers/mtd/nand/cs553x_nand.c
+++ b/drivers/mtd/nand/cs553x_nand.c
@@ -240,7 +240,6 @@ static int __init cs553x_init_one(int cs, int mmio, unsigned long adr)
 
 	/* Enable the following for a flash based bad block table */
 	this->bbt_options = NAND_BBT_USE_FLASH;
-	this->options = NAND_NO_AUTOINCR;
 
 	/* Scan to find existence of the device */
 	if (nand_scan(new_mtd, 1)) {
diff --git a/drivers/mtd/nand/docg4.c b/drivers/mtd/nand/docg4.c
index 8e7da0170a2b..1540de2ad39a 100644
--- a/drivers/mtd/nand/docg4.c
+++ b/drivers/mtd/nand/docg4.c
@@ -1193,8 +1193,7 @@ static void __init init_mtd_structs(struct mtd_info *mtd)
 	nand->ecc.prepad = 8;
 	nand->ecc.bytes	= 8;
 	nand->ecc.strength = DOCG4_T;
-	nand->options =
-		NAND_BUSWIDTH_16 | NAND_NO_SUBPAGE_WRITE | NAND_NO_AUTOINCR;
+	nand->options = NAND_BUSWIDTH_16 | NAND_NO_SUBPAGE_WRITE;
 	nand->IO_ADDR_R = nand->IO_ADDR_W = doc->virtadr + DOC_IOSPACE_DATA;
 	nand->controller = &nand->hwcontrol;
 	spin_lock_init(&nand->controller->lock);
diff --git a/drivers/mtd/nand/fsl_elbc_nand.c b/drivers/mtd/nand/fsl_elbc_nand.c
index 8638b5e734cc..4d995875d366 100644
--- a/drivers/mtd/nand/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/fsl_elbc_nand.c
@@ -807,7 +807,7 @@ static int fsl_elbc_chip_init(struct fsl_elbc_mtd *priv)
 	chip->bbt_md = &bbt_mirror_descr;
 
 	/* set up nand options */
-	chip->options = NAND_NO_READRDY | NAND_NO_AUTOINCR;
+	chip->options = NAND_NO_READRDY;
 	chip->bbt_options = NAND_BBT_USE_FLASH;
 
 	chip->controller = &elbc_fcm_ctrl->controller;
diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c
index 0adde9670418..dffd2fa353ae 100644
--- a/drivers/mtd/nand/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/fsl_ifc_nand.c
@@ -806,7 +806,7 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv)
 	out_be32(&ifc->ifc_nand.ncfgr, 0x0);
 
 	/* set up nand options */
-	chip->options = NAND_NO_READRDY | NAND_NO_AUTOINCR;
+	chip->options = NAND_NO_READRDY;
 	chip->bbt_options = NAND_BBT_USE_FLASH;
 
 
diff --git a/drivers/mtd/nand/h1910.c b/drivers/mtd/nand/h1910.c
index 11e487813428..85e55a5eb2ff 100644
--- a/drivers/mtd/nand/h1910.c
+++ b/drivers/mtd/nand/h1910.c
@@ -124,7 +124,6 @@ static int __init h1910_init(void)
 	/* 15 us command delay time */
 	this->chip_delay = 50;
 	this->ecc.mode = NAND_ECC_SOFT;
-	this->options = NAND_NO_AUTOINCR;
 
 	/* Scan to find existence of the device */
 	if (nand_scan(h1910_nand_mtd, 1)) {
diff --git a/drivers/mtd/nand/mpc5121_nfc.c b/drivers/mtd/nand/mpc5121_nfc.c
index c240cf1af961..c259c24d7986 100644
--- a/drivers/mtd/nand/mpc5121_nfc.c
+++ b/drivers/mtd/nand/mpc5121_nfc.c
@@ -734,7 +734,6 @@ static int __devinit mpc5121_nfc_probe(struct platform_device *op)
 	chip->write_buf = mpc5121_nfc_write_buf;
 	chip->verify_buf = mpc5121_nfc_verify_buf;
 	chip->select_chip = mpc5121_nfc_select_chip;
-	chip->options = NAND_NO_AUTOINCR;
 	chip->bbt_options = NAND_BBT_USE_FLASH;
 	chip->ecc.mode = NAND_ECC_SOFT;
 
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 6246b0fc2fcd..5ec4d2c01b87 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -2895,8 +2895,7 @@ static int nand_flash_detect_onfi(struct mtd_info *mtd, struct nand_chip *chip,
 		*busw = NAND_BUSWIDTH_16;
 
 	chip->options &= ~NAND_CHIPOPTIONS_MSK;
-	chip->options |= (NAND_NO_READRDY |
-			NAND_NO_AUTOINCR) & NAND_CHIPOPTIONS_MSK;
+	chip->options |= NAND_NO_READRDY & NAND_CHIPOPTIONS_MSK;
 
 	pr_info("ONFI flash detected\n");
 	return 1;
@@ -3073,11 +3072,6 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd,
 		chip->options &= ~NAND_SAMSUNG_LP_OPTIONS;
 ident_done:
 
-	/*
-	 * Set chip as a default. Board drivers can override it, if necessary.
-	 */
-	chip->options |= NAND_NO_AUTOINCR;
-
 	/* Try to identify manufacturer */
 	for (maf_idx = 0; nand_manuf_ids[maf_idx].id != 0x0; maf_idx++) {
 		if (nand_manuf_ids[maf_idx].id == *maf_id)
diff --git a/drivers/mtd/nand/nand_ids.c b/drivers/mtd/nand/nand_ids.c
index af4fe8ca7b5e..621b70b7a159 100644
--- a/drivers/mtd/nand/nand_ids.c
+++ b/drivers/mtd/nand/nand_ids.c
@@ -70,7 +70,7 @@ struct nand_flash_dev nand_flash_ids[] = {
 	 * These are the new chips with large page size. The pagesize and the
 	 * erasesize is determined from the extended id bytes
 	 */
-#define LP_OPTIONS (NAND_SAMSUNG_LP_OPTIONS | NAND_NO_READRDY | NAND_NO_AUTOINCR)
+#define LP_OPTIONS (NAND_SAMSUNG_LP_OPTIONS | NAND_NO_READRDY)
 #define LP_OPTIONS16 (LP_OPTIONS | NAND_BUSWIDTH_16)
 
 	/* 512 Megabit */
@@ -157,9 +157,7 @@ struct nand_flash_dev nand_flash_ids[] = {
 	 * writes possible, but not implemented now
 	 */
 	{"AND 128MiB 3,3V 8-bit",	0x01, 2048, 128, 0x4000,
-	 NAND_IS_AND | NAND_NO_AUTOINCR |NAND_NO_READRDY | NAND_4PAGE_ARRAY |
-	 BBT_AUTO_REFRESH
-	},
+	 NAND_IS_AND | NAND_NO_READRDY | NAND_4PAGE_ARRAY | BBT_AUTO_REFRESH},
 
 	{NULL,}
 };
diff --git a/drivers/mtd/nand/pasemi_nand.c b/drivers/mtd/nand/pasemi_nand.c
index 974dbf8251c9..1440e51cedcc 100644
--- a/drivers/mtd/nand/pasemi_nand.c
+++ b/drivers/mtd/nand/pasemi_nand.c
@@ -155,7 +155,6 @@ static int __devinit pasemi_nand_probe(struct platform_device *ofdev)
 	chip->ecc.mode = NAND_ECC_SOFT;
 
 	/* Enable the following for a flash based bad block table */
-	chip->options = NAND_NO_AUTOINCR;
 	chip->bbt_options = NAND_BBT_USE_FLASH;
 
 	/* Scan to find existence of the device */
diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index def50caa6f84..36a32a0d3b9f 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -1004,7 +1004,6 @@ KEEP_CONFIG:
 	chip->ecc.size = host->page_size;
 	chip->ecc.strength = 1;
 
-	chip->options = NAND_NO_AUTOINCR;
 	chip->options |= NAND_NO_READRDY;
 	if (host->reg_ndcr & NDCR_DWIDTH_M)
 		chip->options |= NAND_BUSWIDTH_16;
diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c
index 4ea3e20169ba..3f0788fad66f 100644
--- a/drivers/mtd/nand/sh_flctl.c
+++ b/drivers/mtd/nand/sh_flctl.c
@@ -881,8 +881,6 @@ static int __devinit flctl_probe(struct platform_device *pdev)
 	flctl->hwecc = pdata->has_hwecc;
 	flctl->holden = pdata->use_holden;
 
-	nand->options = NAND_NO_AUTOINCR;
-
 	/* Set address of hardware control function */
 	/* 20 us command delay time */
 	nand->chip_delay = 20;
diff --git a/drivers/mtd/nand/sm_common.c b/drivers/mtd/nand/sm_common.c
index 774c3c266713..082bcdcd6bcf 100644
--- a/drivers/mtd/nand/sm_common.c
+++ b/drivers/mtd/nand/sm_common.c
@@ -94,17 +94,16 @@ static struct nand_flash_dev nand_smartmedia_flash_ids[] = {
 	{NULL,}
 };
 
-#define XD_TYPEM       (NAND_NO_AUTOINCR | NAND_BROKEN_XD)
 static struct nand_flash_dev nand_xd_flash_ids[] = {
 
 	{"xD 16MiB 3,3V",    0x73, 512, 16, 0x4000, 0},
 	{"xD 32MiB 3,3V",    0x75, 512, 32, 0x4000, 0},
 	{"xD 64MiB 3,3V",    0x76, 512, 64, 0x4000, 0},
 	{"xD 128MiB 3,3V",   0x79, 512, 128, 0x4000, 0},
-	{"xD 256MiB 3,3V",   0x71, 512, 256, 0x4000, XD_TYPEM},
-	{"xD 512MiB 3,3V",   0xdc, 512, 512, 0x4000, XD_TYPEM},
-	{"xD 1GiB 3,3V",     0xd3, 512, 1024, 0x4000, XD_TYPEM},
-	{"xD 2GiB 3,3V",     0xd5, 512, 2048, 0x4000, XD_TYPEM},
+	{"xD 256MiB 3,3V",   0x71, 512, 256, 0x4000, NAND_BROKEN_XD},
+	{"xD 512MiB 3,3V",   0xdc, 512, 512, 0x4000, NAND_BROKEN_XD},
+	{"xD 1GiB 3,3V",     0xd3, 512, 1024, 0x4000, NAND_BROKEN_XD},
+	{"xD 2GiB 3,3V",     0xd5, 512, 2048, 0x4000, NAND_BROKEN_XD},
 	{NULL,}
 };
 
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 2829e8be3a62..627f0c575ac4 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -161,8 +161,6 @@ typedef enum {
  * Option constants for bizarre disfunctionality and real
  * features.
  */
-/* Chip can not auto increment pages */
-#define NAND_NO_AUTOINCR	0x00000001
 /* Buswidth is 16 bit */
 #define NAND_BUSWIDTH_16	0x00000002
 /* Device supports partial programming without padding */
@@ -207,7 +205,6 @@ typedef enum {
 	(NAND_NO_PADDING | NAND_CACHEPRG | NAND_COPYBACK)
 
 /* Macros to identify the above */
-#define NAND_CANAUTOINCR(chip) (!(chip->options & NAND_NO_AUTOINCR))
 #define NAND_MUST_PAD(chip) (!(chip->options & NAND_NO_PADDING))
 #define NAND_HAS_CACHEPROG(chip) ((chip->options & NAND_CACHEPRG))
 #define NAND_HAS_COPYBACK(chip) ((chip->options & NAND_COPYBACK))
@@ -216,7 +213,7 @@ typedef enum {
 					&& (chip->page_shift > 9))
 
 /* Mask to zero out the chip options, which come from the id table */
-#define NAND_CHIPOPTIONS_MSK	(0x0000ffff & ~NAND_NO_AUTOINCR)
+#define NAND_CHIPOPTIONS_MSK	0x0000ffff
 
 /* Non chip related options */
 /* This option skips the bbt scan during initialization. */
-- 
cgit 


From b4f7aa84d6ff44327ab91a2973ebf0c2a7797d24 Mon Sep 17 00:00:00 2001
From: John Crispin <blogic@openwrt.org>
Date: Mon, 30 Apr 2012 19:30:47 +0200
Subject: mtd: add read_byte support to plat_nand

Lantiq SoCs have a External Bus Unit (EBU) that is used to attach MTD media.
As we need to co-exist with PCI on the same bus, certain swapping settings must
be applied. Similar to the NOR map driver we need to apply a fix to make NAND
work. The easiest way is to use byte reads.

Signed-off-by: John Crispin <blogic@openwrt.org>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/plat_nand.c | 1 +
 include/linux/mtd/nand.h     | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/plat_nand.c b/drivers/mtd/nand/plat_nand.c
index fd2d2a1b78ba..1bcb52040422 100644
--- a/drivers/mtd/nand/plat_nand.c
+++ b/drivers/mtd/nand/plat_nand.c
@@ -79,6 +79,7 @@ static int __devinit plat_nand_probe(struct platform_device *pdev)
 	data->chip.select_chip = pdata->ctrl.select_chip;
 	data->chip.write_buf = pdata->ctrl.write_buf;
 	data->chip.read_buf = pdata->ctrl.read_buf;
+	data->chip.read_byte = pdata->ctrl.read_byte;
 	data->chip.chip_delay = pdata->chip.chip_delay;
 	data->chip.options |= pdata->chip.options;
 	data->chip.bbt_options |= pdata->chip.bbt_options;
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 627f0c575ac4..94a6679bfc2e 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -654,6 +654,7 @@ struct platform_nand_ctrl {
 	void (*cmd_ctrl)(struct mtd_info *mtd, int dat, unsigned int ctrl);
 	void (*write_buf)(struct mtd_info *mtd, const uint8_t *buf, int len);
 	void (*read_buf)(struct mtd_info *mtd, uint8_t *buf, int len);
+	unsigned char (*read_byte)(struct mtd_info *mtd);
 	void *priv;
 };
 
-- 
cgit 


From 1fbb938dff5b6bb4514a4e7600276b03c7f08e25 Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Wed, 2 May 2012 10:14:55 -0700
Subject: mtd: nand: add 'oob_required' argument to NAND {read,write}_page
 interfaces

New NAND controllers can perform read/write via HW engines which don't expose
OOB data in their DMA mode. To reflect this, we should rework the nand_chip /
nand_ecc_ctrl interfaces that assume that drivers will always read/write OOB
data in the nand_chip.oob_poi buffer. A better interface includes a boolean
argument that explicitly tells the callee when OOB data is requested by the
calling layer (for reading/writing to/from nand_chip.oob_poi).

This patch adds the 'oob_required' parameter to each relevant {read,write}_page
interface; all 'oob_required' parameters are left unused for now. The next
patch will set the parameter properly in the nand_base.c callers, and follow-up
patches will make use of 'oob_required' in some of the callee functions.

Note that currently, there is no harm in ignoring the 'oob_required' parameter
and *always* utilizing nand_chip.oob_poi, but there can be
performance/complexity/design benefits from avoiding filling oob_poi in the
common case. I will try to implement this for some drivers which can be ported
easily.

Note: I couldn't compile-test all of these easily, as some had ARCH
dependencies.

[dwmw2: Merge later 1/0 vs. true/false cleanup]
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Reviewed-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Acked-by: Jiandong Zheng <jdzheng@broadcom.com>
Acked-by: Mike Dunn <mikedunn@newsguy.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/atmel_nand.c          |  5 +--
 drivers/mtd/nand/bcm_umi_bch.c         | 10 +++---
 drivers/mtd/nand/bcm_umi_nand.c        |  2 +-
 drivers/mtd/nand/bf5xx_nand.c          |  4 +--
 drivers/mtd/nand/cafe_nand.c           | 13 +++++---
 drivers/mtd/nand/denali.c              |  8 ++---
 drivers/mtd/nand/docg4.c               | 12 ++++----
 drivers/mtd/nand/fsl_elbc_nand.c       | 11 +++----
 drivers/mtd/nand/fsl_ifc_nand.c        | 10 +++---
 drivers/mtd/nand/fsmc_nand.c           |  3 +-
 drivers/mtd/nand/gpmi-nand/gpmi-nand.c |  8 ++---
 drivers/mtd/nand/nand_base.c           | 56 +++++++++++++++++++++-------------
 drivers/mtd/nand/pxa3xx_nand.c         |  5 +--
 drivers/mtd/nand/sh_flctl.c            |  4 +--
 include/linux/mtd/nand.h               | 11 ++++---
 15 files changed, 90 insertions(+), 72 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c
index 9a7876e8de42..97ac6712bb19 100644
--- a/drivers/mtd/nand/atmel_nand.c
+++ b/drivers/mtd/nand/atmel_nand.c
@@ -324,9 +324,10 @@ static int atmel_nand_calculate(struct mtd_info *mtd,
  * mtd:        mtd info structure
  * chip:       nand chip info structure
  * buf:        buffer to store read data
+ * oob_required:    caller expects OOB data read to chip->oob_poi
  */
-static int atmel_nand_read_page(struct mtd_info *mtd,
-		struct nand_chip *chip, uint8_t *buf, int page)
+static int atmel_nand_read_page(struct mtd_info *mtd, struct nand_chip *chip,
+				uint8_t *buf, int oob_required, int page)
 {
 	int eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
diff --git a/drivers/mtd/nand/bcm_umi_bch.c b/drivers/mtd/nand/bcm_umi_bch.c
index f8472b49567a..5914bb32e001 100644
--- a/drivers/mtd/nand/bcm_umi_bch.c
+++ b/drivers/mtd/nand/bcm_umi_bch.c
@@ -22,9 +22,9 @@
 
 /* ---- Private Function Prototypes -------------------------------------- */
 static int bcm_umi_bch_read_page_hwecc(struct mtd_info *mtd,
-	struct nand_chip *chip, uint8_t *buf, int page);
+	struct nand_chip *chip, uint8_t *buf, int oob_required, int page);
 static void bcm_umi_bch_write_page_hwecc(struct mtd_info *mtd,
-	struct nand_chip *chip, const uint8_t *buf);
+	struct nand_chip *chip, const uint8_t *buf, int oob_required);
 
 /* ---- Private Variables ------------------------------------------------ */
 
@@ -103,11 +103,12 @@ static struct nand_ecclayout nand_hw_eccoob_4096 = {
 *  @mtd:	mtd info structure
 *  @chip:	nand chip info structure
 *  @buf:	buffer to store read data
+*  @oob_required:	caller expects OOB data read to chip->oob_poi
 *
 ***************************************************************************/
 static int bcm_umi_bch_read_page_hwecc(struct mtd_info *mtd,
 				       struct nand_chip *chip, uint8_t * buf,
-						 int page)
+				       int oob_required, int page)
 {
 	int sectorIdx = 0;
 	int eccsize = chip->ecc.size;
@@ -190,10 +191,11 @@ static int bcm_umi_bch_read_page_hwecc(struct mtd_info *mtd,
 *  @mtd:	mtd info structure
 *  @chip:	nand chip info structure
 *  @buf:	data buffer
+*  @oob_required:	must write chip->oob_poi to OOB
 *
 ***************************************************************************/
 static void bcm_umi_bch_write_page_hwecc(struct mtd_info *mtd,
-	struct nand_chip *chip, const uint8_t *buf)
+	struct nand_chip *chip, const uint8_t *buf, int oob_required)
 {
 	int sectorIdx = 0;
 	int eccsize = chip->ecc.size;
diff --git a/drivers/mtd/nand/bcm_umi_nand.c b/drivers/mtd/nand/bcm_umi_nand.c
index 7134adfa1089..c855e7cd337b 100644
--- a/drivers/mtd/nand/bcm_umi_nand.c
+++ b/drivers/mtd/nand/bcm_umi_nand.c
@@ -341,7 +341,7 @@ static int bcm_umi_nand_verify_buf(struct mtd_info *mtd, const u_char * buf,
 	 * for MLC parts which may have permanently stuck bits.
 	 */
 	struct nand_chip *chip = mtd->priv;
-	int ret = chip->ecc.read_page(mtd, chip, readbackbuf, 0);
+	int ret = chip->ecc.read_page(mtd, chip, readbackbuf, 0, 0);
 	if (ret < 0)
 		return -EFAULT;
 	else {
diff --git a/drivers/mtd/nand/bf5xx_nand.c b/drivers/mtd/nand/bf5xx_nand.c
index d7b86b925de5..3f1c18599cbd 100644
--- a/drivers/mtd/nand/bf5xx_nand.c
+++ b/drivers/mtd/nand/bf5xx_nand.c
@@ -558,7 +558,7 @@ static void bf5xx_nand_dma_write_buf(struct mtd_info *mtd,
 }
 
 static int bf5xx_nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-		uint8_t *buf, int page)
+		uint8_t *buf, int oob_required, int page)
 {
 	bf5xx_nand_read_buf(mtd, buf, mtd->writesize);
 	bf5xx_nand_read_buf(mtd, chip->oob_poi, mtd->oobsize);
@@ -567,7 +567,7 @@ static int bf5xx_nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip
 }
 
 static void bf5xx_nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-		const uint8_t *buf)
+		const uint8_t *buf, int oob_required)
 {
 	bf5xx_nand_write_buf(mtd, buf, mtd->writesize);
 	bf5xx_nand_write_buf(mtd, chip->oob_poi, mtd->oobsize);
diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c
index 75fb77b96efb..3a6c88d69bc7 100644
--- a/drivers/mtd/nand/cafe_nand.c
+++ b/drivers/mtd/nand/cafe_nand.c
@@ -375,12 +375,13 @@ static int cafe_nand_read_oob(struct mtd_info *mtd, struct nand_chip *chip,
  * @mtd:	mtd info structure
  * @chip:	nand chip info structure
  * @buf:	buffer to store read data
+ * @oob_required:	caller expects OOB data read to chip->oob_poi
  *
  * The hw generator calculates the error syndrome automatically. Therefor
  * we need a special oob layout and handling.
  */
 static int cafe_nand_read_page(struct mtd_info *mtd, struct nand_chip *chip,
-			       uint8_t *buf, int page)
+			       uint8_t *buf, int oob_required, int page)
 {
 	struct cafe_priv *cafe = mtd->priv;
 	unsigned int max_bitflips = 0;
@@ -520,7 +521,8 @@ static struct nand_bbt_descr cafe_bbt_mirror_descr_512 = {
 
 
 static void cafe_nand_write_page_lowlevel(struct mtd_info *mtd,
-					  struct nand_chip *chip, const uint8_t *buf)
+					  struct nand_chip *chip,
+					  const uint8_t *buf, int oob_required)
 {
 	struct cafe_priv *cafe = mtd->priv;
 
@@ -532,16 +534,17 @@ static void cafe_nand_write_page_lowlevel(struct mtd_info *mtd,
 }
 
 static int cafe_nand_write_page(struct mtd_info *mtd, struct nand_chip *chip,
-				const uint8_t *buf, int page, int cached, int raw)
+				const uint8_t *buf, int oob_required, int page,
+				int cached, int raw)
 {
 	int status;
 
 	chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0x00, page);
 
 	if (unlikely(raw))
-		chip->ecc.write_page_raw(mtd, chip, buf);
+		chip->ecc.write_page_raw(mtd, chip, buf, oob_required);
 	else
-		chip->ecc.write_page(mtd, chip, buf);
+		chip->ecc.write_page(mtd, chip, buf, oob_required);
 
 	/*
 	 * Cached progamming disabled for now, Not sure if its worth the
diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index 1b346474dba8..a54c18611945 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -1087,7 +1087,7 @@ static void write_page(struct mtd_info *mtd, struct nand_chip *chip,
  * by write_page above.
  * */
 static void denali_write_page(struct mtd_info *mtd, struct nand_chip *chip,
-				const uint8_t *buf)
+				const uint8_t *buf, int oob_required)
 {
 	/* for regular page writes, we let HW handle all the ECC
 	 * data written to the device. */
@@ -1099,7 +1099,7 @@ static void denali_write_page(struct mtd_info *mtd, struct nand_chip *chip,
  * write_page() function above.
  */
 static void denali_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-					const uint8_t *buf)
+					const uint8_t *buf, int oob_required)
 {
 	/* for raw page writes, we want to disable ECC and simply write
 	   whatever data is in the buffer. */
@@ -1122,7 +1122,7 @@ static int denali_read_oob(struct mtd_info *mtd, struct nand_chip *chip,
 }
 
 static int denali_read_page(struct mtd_info *mtd, struct nand_chip *chip,
-			    uint8_t *buf, int page)
+			    uint8_t *buf, int oob_required, int page)
 {
 	unsigned int max_bitflips;
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
@@ -1175,7 +1175,7 @@ static int denali_read_page(struct mtd_info *mtd, struct nand_chip *chip,
 }
 
 static int denali_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-				uint8_t *buf, int page)
+				uint8_t *buf, int oob_required, int page)
 {
 	struct denali_nand_info *denali = mtd_to_denali(mtd);
 
diff --git a/drivers/mtd/nand/docg4.c b/drivers/mtd/nand/docg4.c
index 1540de2ad39a..1f8485d7500c 100644
--- a/drivers/mtd/nand/docg4.c
+++ b/drivers/mtd/nand/docg4.c
@@ -787,13 +787,13 @@ static int read_page(struct mtd_info *mtd, struct nand_chip *nand,
 
 
 static int docg4_read_page_raw(struct mtd_info *mtd, struct nand_chip *nand,
-			       uint8_t *buf, int page)
+			       uint8_t *buf, int oob_required, int page)
 {
 	return read_page(mtd, nand, buf, page, false);
 }
 
 static int docg4_read_page(struct mtd_info *mtd, struct nand_chip *nand,
-			   uint8_t *buf, int page)
+			   uint8_t *buf, int oob_required, int page)
 {
 	return read_page(mtd, nand, buf, page, true);
 }
@@ -953,13 +953,13 @@ static void write_page(struct mtd_info *mtd, struct nand_chip *nand,
 }
 
 static void docg4_write_page_raw(struct mtd_info *mtd, struct nand_chip *nand,
-				 const uint8_t *buf)
+				 const uint8_t *buf, int oob_required)
 {
 	return write_page(mtd, nand, buf, false);
 }
 
 static void docg4_write_page(struct mtd_info *mtd, struct nand_chip *nand,
-			     const uint8_t *buf)
+			     const uint8_t *buf, int oob_required)
 {
 	return write_page(mtd, nand, buf, true);
 }
@@ -1003,7 +1003,7 @@ static int __init read_factory_bbt(struct mtd_info *mtd)
 		return -ENOMEM;
 
 	read_page_prologue(mtd, g4_addr);
-	status = docg4_read_page(mtd, nand, buf, DOCG4_FACTORY_BBT_PAGE);
+	status = docg4_read_page(mtd, nand, buf, 0, DOCG4_FACTORY_BBT_PAGE);
 	if (status)
 		goto exit;
 
@@ -1080,7 +1080,7 @@ static int docg4_block_markbad(struct mtd_info *mtd, loff_t ofs)
 
 	/* write first page of block */
 	write_page_prologue(mtd, g4_addr);
-	docg4_write_page(mtd, nand, buf);
+	docg4_write_page(mtd, nand, buf, 1);
 	ret = pageprog(mtd);
 	if (!ret)
 		mtd->ecc_stats.badblocks++;
diff --git a/drivers/mtd/nand/fsl_elbc_nand.c b/drivers/mtd/nand/fsl_elbc_nand.c
index 4d995875d366..11e34010272f 100644
--- a/drivers/mtd/nand/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/fsl_elbc_nand.c
@@ -746,10 +746,8 @@ static int fsl_elbc_chip_init_tail(struct mtd_info *mtd)
 	return 0;
 }
 
-static int fsl_elbc_read_page(struct mtd_info *mtd,
-                              struct nand_chip *chip,
-			      uint8_t *buf,
-			      int page)
+static int fsl_elbc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
+			      uint8_t *buf, int oob_required, int page)
 {
 	struct fsl_elbc_mtd *priv = chip->priv;
 	struct fsl_lbc_ctrl *ctrl = priv->ctrl;
@@ -767,9 +765,8 @@ static int fsl_elbc_read_page(struct mtd_info *mtd,
 /* ECC will be calculated automatically, and errors will be detected in
  * waitfunc.
  */
-static void fsl_elbc_write_page(struct mtd_info *mtd,
-                                struct nand_chip *chip,
-                                const uint8_t *buf)
+static void fsl_elbc_write_page(struct mtd_info *mtd, struct nand_chip *chip,
+				const uint8_t *buf, int oob_required)
 {
 	fsl_elbc_write_buf(mtd, buf, mtd->writesize);
 	fsl_elbc_write_buf(mtd, chip->oob_poi, mtd->oobsize);
diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c
index dffd2fa353ae..c085df3d816a 100644
--- a/drivers/mtd/nand/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/fsl_ifc_nand.c
@@ -698,9 +698,8 @@ static int fsl_ifc_wait(struct mtd_info *mtd, struct nand_chip *chip)
 	return nand_fsr | NAND_STATUS_WP;
 }
 
-static int fsl_ifc_read_page(struct mtd_info *mtd,
-			      struct nand_chip *chip,
-			      uint8_t *buf, int page)
+static int fsl_ifc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
+			     uint8_t *buf, int oob_required, int page)
 {
 	struct fsl_ifc_mtd *priv = chip->priv;
 	struct fsl_ifc_ctrl *ctrl = priv->ctrl;
@@ -721,9 +720,8 @@ static int fsl_ifc_read_page(struct mtd_info *mtd,
 /* ECC will be calculated automatically, and errors will be detected in
  * waitfunc.
  */
-static void fsl_ifc_write_page(struct mtd_info *mtd,
-				struct nand_chip *chip,
-				const uint8_t *buf)
+static void fsl_ifc_write_page(struct mtd_info *mtd, struct nand_chip *chip,
+			       const uint8_t *buf, int oob_required)
 {
 	fsl_ifc_write_buf(mtd, buf, mtd->writesize);
 	fsl_ifc_write_buf(mtd, chip->oob_poi, mtd->oobsize);
diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c
index 6bf59fdde263..38d26240d8b1 100644
--- a/drivers/mtd/nand/fsmc_nand.c
+++ b/drivers/mtd/nand/fsmc_nand.c
@@ -692,6 +692,7 @@ static void fsmc_write_buf_dma(struct mtd_info *mtd, const uint8_t *buf,
  * @mtd:	mtd info structure
  * @chip:	nand chip info structure
  * @buf:	buffer to store read data
+ * @oob_required:	caller expects OOB data read to chip->oob_poi
  * @page:	page number to read
  *
  * This routine is needed for fsmc version 8 as reading from NAND chip has to be
@@ -701,7 +702,7 @@ static void fsmc_write_buf_dma(struct mtd_info *mtd, const uint8_t *buf,
  * max of 8 bits)
  */
 static int fsmc_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
-				 uint8_t *buf, int page)
+				 uint8_t *buf, int oob_required, int page)
 {
 	struct fsmc_nand_data *host = container_of(mtd,
 					struct fsmc_nand_data, mtd);
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
index 9ec51cec2e14..d85a2c1fad54 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
@@ -842,7 +842,7 @@ static void block_mark_swapping(struct gpmi_nand_data *this,
 }
 
 static int gpmi_ecc_read_page(struct mtd_info *mtd, struct nand_chip *chip,
-				uint8_t *buf, int page)
+				uint8_t *buf, int oob_required, int page)
 {
 	struct gpmi_nand_data *this = chip->priv;
 	struct bch_geometry *nfc_geo = &this->bch_geometry;
@@ -928,8 +928,8 @@ exit_nfc:
 	return ret;
 }
 
-static void gpmi_ecc_write_page(struct mtd_info *mtd,
-				struct nand_chip *chip, const uint8_t *buf)
+static void gpmi_ecc_write_page(struct mtd_info *mtd, struct nand_chip *chip,
+				const uint8_t *buf, int oob_required)
 {
 	struct gpmi_nand_data *this = chip->priv;
 	struct bch_geometry *nfc_geo = &this->bch_geometry;
@@ -1309,7 +1309,7 @@ static int mx23_write_transcription_stamp(struct gpmi_nand_data *this)
 		/* Write the first page of the current stride. */
 		dev_dbg(dev, "Writing an NCB fingerprint in page 0x%x\n", page);
 		chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0x00, page);
-		chip->ecc.write_page_raw(mtd, chip, buffer);
+		chip->ecc.write_page_raw(mtd, chip, buffer, 0);
 		chip->cmdfunc(mtd, NAND_CMD_PAGEPROG, -1, -1);
 
 		/* Wait for the write to finish. */
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 5ec4d2c01b87..6d4894acb567 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -1066,12 +1066,13 @@ EXPORT_SYMBOL(nand_lock);
  * @mtd: mtd info structure
  * @chip: nand chip info structure
  * @buf: buffer to store read data
+ * @oob_required: caller requires OOB data read to chip->oob_poi
  * @page: page number to read
  *
  * Not for syndrome calculating ECC controllers, which use a special oob layout.
  */
 static int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-			      uint8_t *buf, int page)
+			      uint8_t *buf, int oob_required, int page)
 {
 	chip->read_buf(mtd, buf, mtd->writesize);
 	chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
@@ -1083,13 +1084,14 @@ static int nand_read_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
  * @mtd: mtd info structure
  * @chip: nand chip info structure
  * @buf: buffer to store read data
+ * @oob_required: caller requires OOB data read to chip->oob_poi
  * @page: page number to read
  *
  * We need a special oob layout and handling even when OOB isn't used.
  */
 static int nand_read_page_raw_syndrome(struct mtd_info *mtd,
-					struct nand_chip *chip,
-					uint8_t *buf, int page)
+				       struct nand_chip *chip, uint8_t *buf,
+				       int oob_required, int page)
 {
 	int eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
@@ -1126,10 +1128,11 @@ static int nand_read_page_raw_syndrome(struct mtd_info *mtd,
  * @mtd: mtd info structure
  * @chip: nand chip info structure
  * @buf: buffer to store read data
+ * @oob_required: caller requires OOB data read to chip->oob_poi
  * @page: page number to read
  */
 static int nand_read_page_swecc(struct mtd_info *mtd, struct nand_chip *chip,
-				uint8_t *buf, int page)
+				uint8_t *buf, int oob_required, int page)
 {
 	int i, eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
@@ -1140,7 +1143,7 @@ static int nand_read_page_swecc(struct mtd_info *mtd, struct nand_chip *chip,
 	uint32_t *eccpos = chip->ecc.layout->eccpos;
 	unsigned int max_bitflips = 0;
 
-	chip->ecc.read_page_raw(mtd, chip, buf, page);
+	chip->ecc.read_page_raw(mtd, chip, buf, 1, page);
 
 	for (i = 0; eccsteps; eccsteps--, i += eccbytes, p += eccsize)
 		chip->ecc.calculate(mtd, p, &ecc_calc[i]);
@@ -1263,12 +1266,13 @@ static int nand_read_subpage(struct mtd_info *mtd, struct nand_chip *chip,
  * @mtd: mtd info structure
  * @chip: nand chip info structure
  * @buf: buffer to store read data
+ * @oob_required: caller requires OOB data read to chip->oob_poi
  * @page: page number to read
  *
  * Not for syndrome calculating ECC controllers which need a special oob layout.
  */
 static int nand_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
-				uint8_t *buf, int page)
+				uint8_t *buf, int oob_required, int page)
 {
 	int i, eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
@@ -1311,6 +1315,7 @@ static int nand_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
  * @mtd: mtd info structure
  * @chip: nand chip info structure
  * @buf: buffer to store read data
+ * @oob_required: caller requires OOB data read to chip->oob_poi
  * @page: page number to read
  *
  * Hardware ECC for large page chips, require OOB to be read first. For this
@@ -1320,7 +1325,7 @@ static int nand_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
  * the data area, by overwriting the NAND manufacturer bad block markings.
  */
 static int nand_read_page_hwecc_oob_first(struct mtd_info *mtd,
-	struct nand_chip *chip, uint8_t *buf, int page)
+	struct nand_chip *chip, uint8_t *buf, int oob_required, int page)
 {
 	int i, eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
@@ -1362,13 +1367,14 @@ static int nand_read_page_hwecc_oob_first(struct mtd_info *mtd,
  * @mtd: mtd info structure
  * @chip: nand chip info structure
  * @buf: buffer to store read data
+ * @oob_required: caller requires OOB data read to chip->oob_poi
  * @page: page number to read
  *
  * The hw generator calculates the error syndrome automatically. Therefore we
  * need a special oob layout and handling.
  */
 static int nand_read_page_syndrome(struct mtd_info *mtd, struct nand_chip *chip,
-				   uint8_t *buf, int page)
+				   uint8_t *buf, int oob_required, int page)
 {
 	int i, eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
@@ -1514,14 +1520,14 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from,
 			 * the read methods return max bitflips per ecc step.
 			 */
 			if (unlikely(ops->mode == MTD_OPS_RAW))
-				ret = chip->ecc.read_page_raw(mtd, chip,
-							      bufpoi, page);
+				ret = chip->ecc.read_page_raw(mtd, chip, bufpoi,
+							      1, page);
 			else if (!aligned && NAND_SUBPAGE_READ(chip) && !oob)
 				ret = chip->ecc.read_subpage(mtd, chip,
 							col, bytes, bufpoi);
 			else
 				ret = chip->ecc.read_page(mtd, chip, bufpoi,
-							  page);
+							  1, page);
 			if (ret < 0) {
 				if (!aligned)
 					/* Invalidate page cache */
@@ -1913,11 +1919,12 @@ out:
  * @mtd: mtd info structure
  * @chip: nand chip info structure
  * @buf: data buffer
+ * @oob_required: must write chip->oob_poi to OOB
  *
  * Not for syndrome calculating ECC controllers, which use a special oob layout.
  */
 static void nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
-				const uint8_t *buf)
+				const uint8_t *buf, int oob_required)
 {
 	chip->write_buf(mtd, buf, mtd->writesize);
 	chip->write_buf(mtd, chip->oob_poi, mtd->oobsize);
@@ -1928,12 +1935,13 @@ static void nand_write_page_raw(struct mtd_info *mtd, struct nand_chip *chip,
  * @mtd: mtd info structure
  * @chip: nand chip info structure
  * @buf: data buffer
+ * @oob_required: must write chip->oob_poi to OOB
  *
  * We need a special oob layout and handling even when ECC isn't checked.
  */
 static void nand_write_page_raw_syndrome(struct mtd_info *mtd,
 					struct nand_chip *chip,
-					const uint8_t *buf)
+					const uint8_t *buf, int oob_required)
 {
 	int eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
@@ -1967,9 +1975,10 @@ static void nand_write_page_raw_syndrome(struct mtd_info *mtd,
  * @mtd: mtd info structure
  * @chip: nand chip info structure
  * @buf: data buffer
+ * @oob_required: must write chip->oob_poi to OOB
  */
 static void nand_write_page_swecc(struct mtd_info *mtd, struct nand_chip *chip,
-				  const uint8_t *buf)
+				  const uint8_t *buf, int oob_required)
 {
 	int i, eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
@@ -1985,7 +1994,7 @@ static void nand_write_page_swecc(struct mtd_info *mtd, struct nand_chip *chip,
 	for (i = 0; i < chip->ecc.total; i++)
 		chip->oob_poi[eccpos[i]] = ecc_calc[i];
 
-	chip->ecc.write_page_raw(mtd, chip, buf);
+	chip->ecc.write_page_raw(mtd, chip, buf, 1);
 }
 
 /**
@@ -1993,9 +2002,10 @@ static void nand_write_page_swecc(struct mtd_info *mtd, struct nand_chip *chip,
  * @mtd: mtd info structure
  * @chip: nand chip info structure
  * @buf: data buffer
+ * @oob_required: must write chip->oob_poi to OOB
  */
 static void nand_write_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
-				  const uint8_t *buf)
+				  const uint8_t *buf, int oob_required)
 {
 	int i, eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
@@ -2021,12 +2031,14 @@ static void nand_write_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
  * @mtd: mtd info structure
  * @chip: nand chip info structure
  * @buf: data buffer
+ * @oob_required: must write chip->oob_poi to OOB
  *
  * The hw generator calculates the error syndrome automatically. Therefore we
  * need a special oob layout and handling.
  */
 static void nand_write_page_syndrome(struct mtd_info *mtd,
-				    struct nand_chip *chip, const uint8_t *buf)
+				    struct nand_chip *chip,
+				    const uint8_t *buf, int oob_required)
 {
 	int i, eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
@@ -2065,21 +2077,23 @@ static void nand_write_page_syndrome(struct mtd_info *mtd,
  * @mtd: MTD device structure
  * @chip: NAND chip descriptor
  * @buf: the data to write
+ * @oob_required: must write chip->oob_poi to OOB
  * @page: page number to write
  * @cached: cached programming
  * @raw: use _raw version of write_page
  */
 static int nand_write_page(struct mtd_info *mtd, struct nand_chip *chip,
-			   const uint8_t *buf, int page, int cached, int raw)
+			   const uint8_t *buf, int oob_required, int page,
+			   int cached, int raw)
 {
 	int status;
 
 	chip->cmdfunc(mtd, NAND_CMD_SEQIN, 0x00, page);
 
 	if (unlikely(raw))
-		chip->ecc.write_page_raw(mtd, chip, buf);
+		chip->ecc.write_page_raw(mtd, chip, buf, oob_required);
 	else
-		chip->ecc.write_page(mtd, chip, buf);
+		chip->ecc.write_page(mtd, chip, buf, oob_required);
 
 	/*
 	 * Cached progamming disabled for now. Not sure if it's worth the
@@ -2261,7 +2275,7 @@ static int nand_do_write_ops(struct mtd_info *mtd, loff_t to,
 			memset(chip->oob_poi, 0xff, mtd->oobsize);
 		}
 
-		ret = chip->write_page(mtd, chip, wbuf, page, cached,
+		ret = chip->write_page(mtd, chip, wbuf, 1, page, cached,
 				       (ops->mode == MTD_OPS_RAW));
 		if (ret)
 			break;
diff --git a/drivers/mtd/nand/pxa3xx_nand.c b/drivers/mtd/nand/pxa3xx_nand.c
index 36a32a0d3b9f..252aaefcacfa 100644
--- a/drivers/mtd/nand/pxa3xx_nand.c
+++ b/drivers/mtd/nand/pxa3xx_nand.c
@@ -682,14 +682,15 @@ static void pxa3xx_nand_cmdfunc(struct mtd_info *mtd, unsigned command,
 }
 
 static void pxa3xx_nand_write_page_hwecc(struct mtd_info *mtd,
-		struct nand_chip *chip, const uint8_t *buf)
+		struct nand_chip *chip, const uint8_t *buf, int oob_required)
 {
 	chip->write_buf(mtd, buf, mtd->writesize);
 	chip->write_buf(mtd, chip->oob_poi, mtd->oobsize);
 }
 
 static int pxa3xx_nand_read_page_hwecc(struct mtd_info *mtd,
-		struct nand_chip *chip, uint8_t *buf, int page)
+		struct nand_chip *chip, uint8_t *buf, int oob_required,
+		int page)
 {
 	struct pxa3xx_nand_host *host = mtd->priv;
 	struct pxa3xx_nand_info *info = host->info_data;
diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c
index 3f0788fad66f..aa9b8a5e0b8f 100644
--- a/drivers/mtd/nand/sh_flctl.c
+++ b/drivers/mtd/nand/sh_flctl.c
@@ -344,7 +344,7 @@ static void set_cmd_regs(struct mtd_info *mtd, uint32_t cmd, uint32_t flcmcdr_va
 }
 
 static int flctl_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
-				uint8_t *buf, int page)
+				uint8_t *buf, int oob_required, int page)
 {
 	int i, eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
@@ -366,7 +366,7 @@ static int flctl_read_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
 }
 
 static void flctl_write_page_hwecc(struct mtd_info *mtd, struct nand_chip *chip,
-				   const uint8_t *buf)
+				   const uint8_t *buf, int oob_required)
 {
 	int i, eccsize = chip->ecc.size;
 	int eccbytes = chip->ecc.bytes;
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 94a6679bfc2e..c7755f455c81 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -360,15 +360,15 @@ struct nand_ecc_ctrl {
 	int (*correct)(struct mtd_info *mtd, uint8_t *dat, uint8_t *read_ecc,
 			uint8_t *calc_ecc);
 	int (*read_page_raw)(struct mtd_info *mtd, struct nand_chip *chip,
-			uint8_t *buf, int page);
+			uint8_t *buf, int oob_required, int page);
 	void (*write_page_raw)(struct mtd_info *mtd, struct nand_chip *chip,
-			const uint8_t *buf);
+			const uint8_t *buf, int oob_required);
 	int (*read_page)(struct mtd_info *mtd, struct nand_chip *chip,
-			uint8_t *buf, int page);
+			uint8_t *buf, int oob_required, int page);
 	int (*read_subpage)(struct mtd_info *mtd, struct nand_chip *chip,
 			uint32_t offs, uint32_t len, uint8_t *buf);
 	void (*write_page)(struct mtd_info *mtd, struct nand_chip *chip,
-			const uint8_t *buf);
+			const uint8_t *buf, int oob_required);
 	int (*write_oob_raw)(struct mtd_info *mtd, struct nand_chip *chip,
 			int page);
 	int (*read_oob_raw)(struct mtd_info *mtd, struct nand_chip *chip,
@@ -504,7 +504,8 @@ struct nand_chip {
 	int (*errstat)(struct mtd_info *mtd, struct nand_chip *this, int state,
 			int status, int page);
 	int (*write_page)(struct mtd_info *mtd, struct nand_chip *chip,
-			const uint8_t *buf, int page, int cached, int raw);
+			const uint8_t *buf, int oob_required, int page,
+			int cached, int raw);
 
 	int chip_delay;
 	unsigned int options;
-- 
cgit 


From e10db1f00a5e3c2ec04d7fe26c7444dc55a59b19 Mon Sep 17 00:00:00 2001
From: Huang Shijie <b32955@freescale.com>
Date: Fri, 4 May 2012 21:42:05 -0400
Subject: mtd: gpmi: add device tree support to gpmi-nand

This patch just adds the DT support to gpmi-nand.

Signed-off-by: Huang Shijie <b32955@freescale.com>
Signed-off-by: Huang Shijie <shijie8@gmail.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 .../devicetree/bindings/mtd/gpmi-nand.txt          |  33 ++++++
 drivers/mtd/nand/gpmi-nand/gpmi-lib.c              |   7 +-
 drivers/mtd/nand/gpmi-nand/gpmi-nand.c             | 126 ++++++++++-----------
 drivers/mtd/nand/gpmi-nand/gpmi-nand.h             |   4 +-
 include/linux/mtd/gpmi-nand.h                      |   8 +-
 5 files changed, 105 insertions(+), 73 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/mtd/gpmi-nand.txt

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/mtd/gpmi-nand.txt b/Documentation/devicetree/bindings/mtd/gpmi-nand.txt
new file mode 100644
index 000000000000..1a5bbd346d22
--- /dev/null
+++ b/Documentation/devicetree/bindings/mtd/gpmi-nand.txt
@@ -0,0 +1,33 @@
+* Freescale General-Purpose Media Interface (GPMI)
+
+The GPMI nand controller provides an interface to control the
+NAND flash chips. We support only one NAND chip now.
+
+Required properties:
+  - compatible : should be "fsl,<chip>-gpmi-nand"
+  - reg : should contain registers location and length for gpmi and bch.
+  - reg-names: Should contain the reg names "gpmi-nand" and "bch"
+  - interrupts : The first is the DMA interrupt number for GPMI.
+                 The second is the BCH interrupt number.
+  - interrupt-names : The interrupt names "gpmi-dma", "bch";
+  - fsl,gpmi-dma-channel : Should contain the dma channel it uses.
+
+The device tree may optionally contain sub-nodes describing partitions of the
+address space. See partition.txt for more detail.
+
+Examples:
+
+gpmi-nand@8000c000 {
+	compatible = "fsl,imx28-gpmi-nand";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	reg = <0x8000c000 2000>, <0x8000a000 2000>;
+	reg-names = "gpmi-nand", "bch";
+	interrupts = <88>, <41>;
+	interrupt-names = "gpmi-dma", "bch";
+	fsl,gpmi-dma-channel = <4>;
+
+	partition@0 {
+	...
+	};
+};
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
index fa5200b8c895..5c55c717fe14 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-lib.c
@@ -256,11 +256,12 @@ static unsigned int ns_to_cycles(unsigned int time,
 	return max(k, min);
 }
 
+#define DEF_MIN_PROP_DELAY	5
+#define DEF_MAX_PROP_DELAY	9
 /* Apply timing to current hardware conditions. */
 static int gpmi_nfc_compute_hardware_timing(struct gpmi_nand_data *this,
 					struct gpmi_nfc_hardware_timing *hw)
 {
-	struct gpmi_nand_platform_data *pdata = this->pdata;
 	struct timing_threshod *nfc = &timing_default_threshold;
 	struct nand_chip *nand = &this->nand;
 	struct nand_timing target = this->timing;
@@ -277,8 +278,8 @@ static int gpmi_nfc_compute_hardware_timing(struct gpmi_nand_data *this,
 	int ideal_sample_delay_in_ns;
 	unsigned int sample_delay_factor;
 	int tEYE;
-	unsigned int min_prop_delay_in_ns = pdata->min_prop_delay_in_ns;
-	unsigned int max_prop_delay_in_ns = pdata->max_prop_delay_in_ns;
+	unsigned int min_prop_delay_in_ns = DEF_MIN_PROP_DELAY;
+	unsigned int max_prop_delay_in_ns = DEF_MAX_PROP_DELAY;
 
 	/*
 	 * If there are multiple chips, we need to relax the timings to allow
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
index 7e8a035d444a..d9dc4c874ffa 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
@@ -24,6 +24,8 @@
 #include <linux/module.h>
 #include <linux/mtd/gpmi-nand.h>
 #include <linux/mtd/partitions.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
 #include "gpmi-nand.h"
 
 /* add our owner bbt descriptor */
@@ -386,7 +388,7 @@ static void release_bch_irq(struct gpmi_nand_data *this)
 static bool gpmi_dma_filter(struct dma_chan *chan, void *param)
 {
 	struct gpmi_nand_data *this = param;
-	struct resource *r = this->private;
+	int dma_channel = (int)this->private;
 
 	if (!mxs_dma_is_apbh(chan))
 		return false;
@@ -398,7 +400,7 @@ static bool gpmi_dma_filter(struct dma_chan *chan, void *param)
 	 *	for mx28 :	MX28_DMA_GPMI0 ~ MX28_DMA_GPMI7
 	 *		(These eight channels share the same IRQ!)
 	 */
-	if (r->start <= chan->chan_id && chan->chan_id <= r->end) {
+	if (dma_channel == chan->chan_id) {
 		chan->private = &this->dma_data;
 		return true;
 	}
@@ -418,57 +420,45 @@ static void release_dma_channels(struct gpmi_nand_data *this)
 static int __devinit acquire_dma_channels(struct gpmi_nand_data *this)
 {
 	struct platform_device *pdev = this->pdev;
-	struct gpmi_nand_platform_data *pdata = this->pdata;
-	struct resources *res = &this->resources;
-	struct resource *r, *r_dma;
-	unsigned int i;
+	struct resource *r_dma;
+	struct device_node *dn;
+	int dma_channel;
+	unsigned int ret;
+	struct dma_chan *dma_chan;
+	dma_cap_mask_t mask;
+
+	/* dma channel, we only use the first one. */
+	dn = pdev->dev.of_node;
+	ret = of_property_read_u32(dn, "fsl,gpmi-dma-channel", &dma_channel);
+	if (ret) {
+		pr_err("unable to get DMA channel from dt.\n");
+		goto acquire_err;
+	}
+	this->private = (void *)dma_channel;
 
-	r = platform_get_resource_byname(pdev, IORESOURCE_DMA,
-					GPMI_NAND_DMA_CHANNELS_RES_NAME);
+	/* gpmi dma interrupt */
 	r_dma = platform_get_resource_byname(pdev, IORESOURCE_IRQ,
 					GPMI_NAND_DMA_INTERRUPT_RES_NAME);
-	if (!r || !r_dma) {
+	if (!r_dma) {
 		pr_err("Can't get resource for DMA\n");
-		return -ENXIO;
+		goto acquire_err;
 	}
+	this->dma_data.chan_irq = r_dma->start;
 
-	/* used in gpmi_dma_filter() */
-	this->private = r;
-
-	for (i = r->start; i <= r->end; i++) {
-		struct dma_chan *dma_chan;
-		dma_cap_mask_t mask;
+	/* request dma channel */
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
 
-		if (i - r->start >= pdata->max_chip_count)
-			break;
-
-		dma_cap_zero(mask);
-		dma_cap_set(DMA_SLAVE, mask);
-
-		/* get the DMA interrupt */
-		if (r_dma->start == r_dma->end) {
-			/* only register the first. */
-			if (i == r->start)
-				this->dma_data.chan_irq = r_dma->start;
-			else
-				this->dma_data.chan_irq = NO_IRQ;
-		} else
-			this->dma_data.chan_irq = r_dma->start + (i - r->start);
-
-		dma_chan = dma_request_channel(mask, gpmi_dma_filter, this);
-		if (!dma_chan)
-			goto acquire_err;
-
-		/* fill the first empty item */
-		this->dma_chans[i - r->start] = dma_chan;
+	dma_chan = dma_request_channel(mask, gpmi_dma_filter, this);
+	if (!dma_chan) {
+		pr_err("dma_request_channel failed.\n");
+		goto acquire_err;
 	}
 
-	res->dma_low_channel = r->start;
-	res->dma_high_channel = i;
+	this->dma_chans[0] = dma_chan;
 	return 0;
 
 acquire_err:
-	pr_err("Can't acquire DMA channel %u\n", i);
 	release_dma_channels(this);
 	return -EINVAL;
 }
@@ -1465,9 +1455,9 @@ void gpmi_nfc_exit(struct gpmi_nand_data *this)
 
 static int __devinit gpmi_nfc_init(struct gpmi_nand_data *this)
 {
-	struct gpmi_nand_platform_data *pdata = this->pdata;
 	struct mtd_info  *mtd = &this->mtd;
 	struct nand_chip *chip = &this->nand;
+	struct mtd_part_parser_data ppdata = {};
 	int ret;
 
 	/* init current chip */
@@ -1505,14 +1495,14 @@ static int __devinit gpmi_nfc_init(struct gpmi_nand_data *this)
 	if (ret)
 		goto err_out;
 
-	ret = nand_scan(mtd, pdata->max_chip_count);
+	ret = nand_scan(mtd, 1);
 	if (ret) {
 		pr_err("Chip scan failed\n");
 		goto err_out;
 	}
 
-	ret = mtd_device_parse_register(mtd, NULL, NULL,
-			pdata->partitions, pdata->partition_count);
+	ppdata.of_node = this->pdev->dev.of_node;
+	ret = mtd_device_parse_register(mtd, NULL, &ppdata, NULL, 0);
 	if (ret)
 		goto err_out;
 	return 0;
@@ -1522,12 +1512,37 @@ err_out:
 	return ret;
 }
 
+static const struct platform_device_id gpmi_ids[] = {
+	{ .name = "imx23-gpmi-nand", .driver_data = IS_MX23, },
+	{ .name = "imx28-gpmi-nand", .driver_data = IS_MX28, },
+	{},
+};
+
+static const struct of_device_id gpmi_nand_id_table[] = {
+	{
+		.compatible = "fsl,imx23-gpmi-nand",
+		.data = (void *)&gpmi_ids[IS_MX23]
+	}, {
+		.compatible = "fsl,imx28-gpmi-nand",
+		.data = (void *)&gpmi_ids[IS_MX28]
+	}, {}
+};
+MODULE_DEVICE_TABLE(of, gpmi_nand_id_table);
+
 static int __devinit gpmi_nand_probe(struct platform_device *pdev)
 {
-	struct gpmi_nand_platform_data *pdata = pdev->dev.platform_data;
 	struct gpmi_nand_data *this;
+	const struct of_device_id *of_id;
 	int ret;
 
+	of_id = of_match_device(gpmi_nand_id_table, &pdev->dev);
+	if (of_id) {
+		pdev->id_entry = of_id->data;
+	} else {
+		pr_err("Failed to find the right device id.\n");
+		return -ENOMEM;
+	}
+
 	this = kzalloc(sizeof(*this), GFP_KERNEL);
 	if (!this) {
 		pr_err("Failed to allocate per-device memory\n");
@@ -1537,13 +1552,6 @@ static int __devinit gpmi_nand_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, this);
 	this->pdev  = pdev;
 	this->dev   = &pdev->dev;
-	this->pdata = pdata;
-
-	if (pdata->platform_init) {
-		ret = pdata->platform_init();
-		if (ret)
-			goto platform_init_error;
-	}
 
 	ret = acquire_resources(this);
 	if (ret)
@@ -1561,7 +1569,6 @@ static int __devinit gpmi_nand_probe(struct platform_device *pdev)
 
 exit_nfc_init:
 	release_resources(this);
-platform_init_error:
 exit_acquire_resources:
 	platform_set_drvdata(pdev, NULL);
 	kfree(this);
@@ -1579,19 +1586,10 @@ static int __exit gpmi_nand_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static const struct platform_device_id gpmi_ids[] = {
-	{
-		.name = "imx23-gpmi-nand",
-		.driver_data = IS_MX23,
-	}, {
-		.name = "imx28-gpmi-nand",
-		.driver_data = IS_MX28,
-	}, {},
-};
-
 static struct platform_driver gpmi_nand_driver = {
 	.driver = {
 		.name = "gpmi-nand",
+		.of_match_table = gpmi_nand_id_table,
 	},
 	.probe   = gpmi_nand_probe,
 	.remove  = __exit_p(gpmi_nand_remove),
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
index ec6180d4ff8f..12fdd7767c1c 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.h
@@ -266,8 +266,8 @@ extern int gpmi_read_page(struct gpmi_nand_data *,
 #define STATUS_UNCORRECTABLE	0xfe
 
 /* Use the platform_id to distinguish different Archs. */
-#define IS_MX23			0x1
-#define IS_MX28			0x2
+#define IS_MX23			0x0
+#define IS_MX28			0x1
 #define GPMI_IS_MX23(x)		((x)->pdev->id_entry->driver_data == IS_MX23)
 #define GPMI_IS_MX28(x)		((x)->pdev->id_entry->driver_data == IS_MX28)
 #endif
diff --git a/include/linux/mtd/gpmi-nand.h b/include/linux/mtd/gpmi-nand.h
index 69b6dbf46b5e..ed3c4e09f3d1 100644
--- a/include/linux/mtd/gpmi-nand.h
+++ b/include/linux/mtd/gpmi-nand.h
@@ -23,12 +23,12 @@
 #define GPMI_NAND_RES_SIZE	6
 
 /* Resource names for the GPMI NAND driver. */
-#define GPMI_NAND_GPMI_REGS_ADDR_RES_NAME  "GPMI NAND GPMI Registers"
+#define GPMI_NAND_GPMI_REGS_ADDR_RES_NAME  "gpmi-nand"
 #define GPMI_NAND_GPMI_INTERRUPT_RES_NAME  "GPMI NAND GPMI Interrupt"
-#define GPMI_NAND_BCH_REGS_ADDR_RES_NAME   "GPMI NAND BCH Registers"
-#define GPMI_NAND_BCH_INTERRUPT_RES_NAME   "GPMI NAND BCH Interrupt"
+#define GPMI_NAND_BCH_REGS_ADDR_RES_NAME   "bch"
+#define GPMI_NAND_BCH_INTERRUPT_RES_NAME   "bch"
 #define GPMI_NAND_DMA_CHANNELS_RES_NAME    "GPMI NAND DMA Channels"
-#define GPMI_NAND_DMA_INTERRUPT_RES_NAME   "GPMI NAND DMA Interrupt"
+#define GPMI_NAND_DMA_INTERRUPT_RES_NAME   "gpmi-dma"
 
 /**
  * struct gpmi_nand_platform_data - GPMI NAND driver platform data.
-- 
cgit 


From 5c2ffb11d40dd967eecb45b8570a871746ba124b Mon Sep 17 00:00:00 2001
From: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Date: Wed, 9 May 2012 13:06:35 +0300
Subject: mtd: nand: remove 'sndcmd' parameter of 'read_oob/read_oob_raw'

As of [mtd: nand: remove autoincrement 'sndcmd' code], the
NAND_CMD_READ0 command is issued unconditionally.

Thus, read_oob/read_oob_raw's 'sndcmd' argument is no longer needed, as
well as their return code.

Remove the 'sndcmd' parameter, and set the return code to 0.

Signed-off-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/cafe_nand.c           |  4 ++--
 drivers/mtd/nand/denali.c              |  5 ++---
 drivers/mtd/nand/docg4.c               |  2 +-
 drivers/mtd/nand/gpmi-nand/gpmi-nand.c |  8 ++------
 drivers/mtd/nand/nand_base.c           | 19 +++++++------------
 drivers/mtd/nand/r852.c                |  9 +++------
 include/linux/mtd/nand.h               |  5 ++---
 7 files changed, 19 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c
index 3a6c88d69bc7..41371ba1a811 100644
--- a/drivers/mtd/nand/cafe_nand.c
+++ b/drivers/mtd/nand/cafe_nand.c
@@ -364,11 +364,11 @@ static int cafe_nand_write_oob(struct mtd_info *mtd,
 
 /* Don't use -- use nand_read_oob_std for now */
 static int cafe_nand_read_oob(struct mtd_info *mtd, struct nand_chip *chip,
-			      int page, int sndcmd)
+			      int page)
 {
 	chip->cmdfunc(mtd, NAND_CMD_READOOB, 0, page);
 	chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
-	return 1;
+	return 0;
 }
 /**
  * cafe_nand_read_page_syndrome - [REPLACEABLE] hardware ecc syndrome based page read
diff --git a/drivers/mtd/nand/denali.c b/drivers/mtd/nand/denali.c
index a54c18611945..0650aafa0dd2 100644
--- a/drivers/mtd/nand/denali.c
+++ b/drivers/mtd/nand/denali.c
@@ -1113,12 +1113,11 @@ static int denali_write_oob(struct mtd_info *mtd, struct nand_chip *chip,
 }
 
 static int denali_read_oob(struct mtd_info *mtd, struct nand_chip *chip,
-			   int page, int sndcmd)
+			   int page)
 {
 	read_oob_data(mtd, chip->oob_poi, page);
 
-	return 0; /* notify NAND core to send command to
-			   NAND device. */
+	return 0;
 }
 
 static int denali_read_page(struct mtd_info *mtd, struct nand_chip *chip,
diff --git a/drivers/mtd/nand/docg4.c b/drivers/mtd/nand/docg4.c
index 1f8485d7500c..a225e49a5623 100644
--- a/drivers/mtd/nand/docg4.c
+++ b/drivers/mtd/nand/docg4.c
@@ -799,7 +799,7 @@ static int docg4_read_page(struct mtd_info *mtd, struct nand_chip *nand,
 }
 
 static int docg4_read_oob(struct mtd_info *mtd, struct nand_chip *nand,
-			  int page, int sndcmd)
+			  int page)
 {
 	struct docg4_priv *doc = nand->priv;
 	void __iomem *docptr = doc->virtadr;
diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
index 519e4306e352..95f2af848ff0 100644
--- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
+++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c
@@ -1061,7 +1061,7 @@ exit_auxiliary:
  * this driver.
  */
 static int gpmi_ecc_read_oob(struct mtd_info *mtd, struct nand_chip *chip,
-				int page, int sndcmd)
+				int page)
 {
 	struct gpmi_nand_data *this = chip->priv;
 
@@ -1084,11 +1084,7 @@ static int gpmi_ecc_read_oob(struct mtd_info *mtd, struct nand_chip *chip,
 		chip->oob_poi[0] = chip->read_byte(mtd);
 	}
 
-	/*
-	 * Return true, indicating that the next call to this function must send
-	 * a command.
-	 */
-	return true;
+	return 0;
 }
 
 static int
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 14e9b32ed8c7..4047d7c2fee6 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -1645,17 +1645,13 @@ static int nand_read(struct mtd_info *mtd, loff_t from, size_t len,
  * @mtd: mtd info structure
  * @chip: nand chip info structure
  * @page: page number to read
- * @sndcmd: flag whether to issue read command or not
  */
 static int nand_read_oob_std(struct mtd_info *mtd, struct nand_chip *chip,
-			     int page, int sndcmd)
+			     int page)
 {
-	if (sndcmd) {
-		chip->cmdfunc(mtd, NAND_CMD_READOOB, 0, page);
-		sndcmd = 0;
-	}
+	chip->cmdfunc(mtd, NAND_CMD_READOOB, 0, page);
 	chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
-	return sndcmd;
+	return 0;
 }
 
 /**
@@ -1664,10 +1660,9 @@ static int nand_read_oob_std(struct mtd_info *mtd, struct nand_chip *chip,
  * @mtd: mtd info structure
  * @chip: nand chip info structure
  * @page: page number to read
- * @sndcmd: flag whether to issue read command or not
  */
 static int nand_read_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip,
-				  int page, int sndcmd)
+				  int page)
 {
 	uint8_t *buf = chip->oob_poi;
 	int length = mtd->oobsize;
@@ -1694,7 +1689,7 @@ static int nand_read_oob_syndrome(struct mtd_info *mtd, struct nand_chip *chip,
 	if (length > 0)
 		chip->read_buf(mtd, bufpoi, length);
 
-	return 1;
+	return 0;
 }
 
 /**
@@ -1831,9 +1826,9 @@ static int nand_do_read_oob(struct mtd_info *mtd, loff_t from,
 
 	while (1) {
 		if (ops->mode == MTD_OPS_RAW)
-			chip->ecc.read_oob_raw(mtd, chip, page, 1);
+			chip->ecc.read_oob_raw(mtd, chip, page);
 		else
-			chip->ecc.read_oob(mtd, chip, page, 1);
+			chip->ecc.read_oob(mtd, chip, page);
 
 		len = min(len, readlen);
 		buf = nand_transfer_oob(chip, buf, ops, len);
diff --git a/drivers/mtd/nand/r852.c b/drivers/mtd/nand/r852.c
index 859ef318e5dc..8cb627751c9c 100644
--- a/drivers/mtd/nand/r852.c
+++ b/drivers/mtd/nand/r852.c
@@ -539,14 +539,11 @@ exit:
  * nand_read_oob_syndrome assumes we can send column address - we can't
  */
 static int r852_read_oob(struct mtd_info *mtd, struct nand_chip *chip,
-			     int page, int sndcmd)
+			     int page)
 {
-	if (sndcmd) {
-		chip->cmdfunc(mtd, NAND_CMD_READOOB, 0, page);
-		sndcmd = 0;
-	}
+	chip->cmdfunc(mtd, NAND_CMD_READOOB, 0, page);
 	chip->read_buf(mtd, chip->oob_poi, mtd->oobsize);
-	return sndcmd;
+	return 0;
 }
 
 /*
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index c7755f455c81..57977c640529 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -372,9 +372,8 @@ struct nand_ecc_ctrl {
 	int (*write_oob_raw)(struct mtd_info *mtd, struct nand_chip *chip,
 			int page);
 	int (*read_oob_raw)(struct mtd_info *mtd, struct nand_chip *chip,
-			int page, int sndcmd);
-	int (*read_oob)(struct mtd_info *mtd, struct nand_chip *chip, int page,
-			int sndcmd);
+			int page);
+	int (*read_oob)(struct mtd_info *mtd, struct nand_chip *chip, int page);
 	int (*write_oob)(struct mtd_info *mtd, struct nand_chip *chip,
 			int page);
 };
-- 
cgit 


From c3ba9698152b17fdc2c7cd0f7cbeb571e3367e9d Mon Sep 17 00:00:00 2001
From: Dan Magenheimer <dan.magenheimer@oracle.com>
Date: Mon, 9 Apr 2012 17:06:54 -0600
Subject: mm: frontswap: add frontswap header file

Frontswap is the alter ego of cleancache, the "yang" to cleancache's
"yin"... and more precisely frontswap is the provider of anonymous
pages to transcendent memory to nicely complement cleancache's providing
of clean pagecache pages to transcendent memory.  For optimal use
of transcendent memory, both are necessary... because a kernel
under memory pressure first reclaims clean pagecache pages and,
when under more memory pressure, starts swapping anonymous pages.

Frontswap and cleancache (which was merged at 3.0) are the "frontends"
and the only necessary changes to the core kernel for transcendent memory;
all other supporting code -- the "backends" -- is implemented as drivers.
See the LWN.net article "Transcendent memory in a nutshell" for a detailed
overview of frontswap and related kernel parts:
https://lwn.net/Articles/454795/

Frontswap code was first posted publicly in January 2009 and on LKML in
May 2009, and has remained functionally stable for nearly three years now.
It is barely invasive, touching only the swap subsystem and adds less
than 100 lines of code to existing swap subsystem code files.
It has improved syntactically substantially between V1 and this posting
of V14, thanks to the review of a few kernel developers, and has adapted
easily to at least one major swap subsystem change.  As of 3.4, there are
three in-tree users of frontswap patiently waiting for this patchset and
for CONFIG_FRONTSWAP to be enabled: zcache (staging driver merged at
2.6.39), Xen tmem (merged at 3.0 and 3.1) and RAMster (staging driver
merged at 3.4).  In addition, a RFC has been posted for a KVM backend.
The frontswap patchset has been in linux-next since next-110603.  Earlier
versions of frontswap already ship in the Oracle Unbreakable Enterprise Kernel
and SuSE SLES.

This patch, 1of4, provides the header file for the core code for frontswap
that interfaces between the hooks in the swap subsystem and a frontswap
backend via frontswap_ops.
---
New file added: include/linux/frontswap.h

[v14: add support for writethrough, per suggestion by aarcange@redhat.com]
[v14: rebase to 3.4-rc2]
[v11: konrad.wilk@oracle.com: squashed s/flush/invalidate/ in]
[v10: no change]
[v9: akpm@linux-foundation.org: change "flush" to "invalidate", part 1]
[v8: rebase to 3.0-rc4]
[v7: rebase to 3.0-rc3]
[v7: JBeulich@novell.com: new static inlines resolve to no-ops if not config'd]
[v7: JBeulich@novell.com: avoid redundant shifts/divides for *_bit lib calls]
[v6: rebase to 3.1-rc1]
[v5: no change from v4]
[v4: rebase to 2.6.39]
Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Jan Beulich <JBeulich@novell.com>
Acked-by: Seth Jennings <sjenning@linux.vnet.ibm.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Rik Riel <riel@redhat.com>
[v15: int/bool on some functions]
Signed-off-by: Konrad Wilk <konrad.wilk@oracle.com>
---
 include/linux/frontswap.h | 127 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 127 insertions(+)
 create mode 100644 include/linux/frontswap.h

(limited to 'include/linux')

diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h
new file mode 100644
index 000000000000..68ff7af5c5fb
--- /dev/null
+++ b/include/linux/frontswap.h
@@ -0,0 +1,127 @@
+#ifndef _LINUX_FRONTSWAP_H
+#define _LINUX_FRONTSWAP_H
+
+#include <linux/swap.h>
+#include <linux/mm.h>
+#include <linux/bitops.h>
+
+struct frontswap_ops {
+	void (*init)(unsigned);
+	int (*put_page)(unsigned, pgoff_t, struct page *);
+	int (*get_page)(unsigned, pgoff_t, struct page *);
+	void (*invalidate_page)(unsigned, pgoff_t);
+	void (*invalidate_area)(unsigned);
+};
+
+extern bool frontswap_enabled;
+extern struct frontswap_ops
+	frontswap_register_ops(struct frontswap_ops *ops);
+extern void frontswap_shrink(unsigned long);
+extern unsigned long frontswap_curr_pages(void);
+extern void frontswap_writethrough(bool);
+
+extern void __frontswap_init(unsigned type);
+extern int __frontswap_put_page(struct page *page);
+extern int __frontswap_get_page(struct page *page);
+extern void __frontswap_invalidate_page(unsigned, pgoff_t);
+extern void __frontswap_invalidate_area(unsigned);
+
+#ifdef CONFIG_FRONTSWAP
+
+static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset)
+{
+	bool ret = false;
+
+	if (frontswap_enabled && sis->frontswap_map)
+		ret = test_bit(offset, sis->frontswap_map);
+	return ret;
+}
+
+static inline void frontswap_set(struct swap_info_struct *sis, pgoff_t offset)
+{
+	if (frontswap_enabled && sis->frontswap_map)
+		set_bit(offset, sis->frontswap_map);
+}
+
+static inline void frontswap_clear(struct swap_info_struct *sis, pgoff_t offset)
+{
+	if (frontswap_enabled && sis->frontswap_map)
+		clear_bit(offset, sis->frontswap_map);
+}
+
+static inline void frontswap_map_set(struct swap_info_struct *p,
+				     unsigned long *map)
+{
+	p->frontswap_map = map;
+}
+
+static inline unsigned long *frontswap_map_get(struct swap_info_struct *p)
+{
+	return p->frontswap_map;
+}
+#else
+/* all inline routines become no-ops and all externs are ignored */
+
+#define frontswap_enabled (0)
+
+static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset)
+{
+	return false;
+}
+
+static inline void frontswap_set(struct swap_info_struct *sis, pgoff_t offset)
+{
+}
+
+static inline void frontswap_clear(struct swap_info_struct *sis, pgoff_t offset)
+{
+}
+
+static inline void frontswap_map_set(struct swap_info_struct *p,
+				     unsigned long *map)
+{
+}
+
+static inline unsigned long *frontswap_map_get(struct swap_info_struct *p)
+{
+	return NULL;
+}
+#endif
+
+static inline int frontswap_put_page(struct page *page)
+{
+	int ret = -1;
+
+	if (frontswap_enabled)
+		ret = __frontswap_put_page(page);
+	return ret;
+}
+
+static inline int frontswap_get_page(struct page *page)
+{
+	int ret = -1;
+
+	if (frontswap_enabled)
+		ret = __frontswap_get_page(page);
+	return ret;
+}
+
+static inline void frontswap_invalidate_page(unsigned type, pgoff_t offset)
+{
+	if (frontswap_enabled)
+		__frontswap_invalidate_page(type, offset);
+}
+
+static inline void frontswap_invalidate_area(unsigned type)
+{
+	if (frontswap_enabled)
+		__frontswap_invalidate_area(type);
+}
+
+static inline void frontswap_init(unsigned type)
+{
+	if (frontswap_enabled)
+		__frontswap_init(type);
+}
+
+#endif /* _LINUX_FRONTSWAP_H */
-- 
cgit 


From 38b5faf4b178d5279b1fca5d7dadc68881342660 Mon Sep 17 00:00:00 2001
From: Dan Magenheimer <dan.magenheimer@oracle.com>
Date: Mon, 9 Apr 2012 17:08:06 -0600
Subject: mm: frontswap: core swap subsystem hooks and headers

This patch, 2of4, contains the changes to the core swap subsystem.
This includes:

(1) makes available core swap data structures (swap_lock, swap_list and
swap_info) that are needed by frontswap.c but we don't need to expose them
to the dozens of files that include swap.h so we create a new swapfile.h
just to extern-ify these and modify their declarations to non-static

(2) adds frontswap-related elements to swap_info_struct.  Frontswap_map
points to vzalloc'ed one-bit-per-swap-page metadata that indicates
whether the swap page is in frontswap or in the device and frontswap_pages
counts how many pages are in frontswap.

(3) adds hooks in the swap subsystem and extends try_to_unuse so that
frontswap_shrink can do a "partial swapoff".

Note that a failed frontswap_map allocation is safe... failure is noted
by lack of "FS" in the subsequent printk.

---

[v14: rebase to 3.4-rc2]
[v10: no change]
[v9: akpm@linux-foundation.org: mark some statics __read_mostly]
[v9: akpm@linux-foundation.org: add clarifying comments]
[v9: akpm@linux-foundation.org: no need to loop repeating try_to_unuse]
[v9: error27@gmail.com: remove superfluous check for NULL]
[v8: rebase to 3.0-rc4]
[v8: kamezawa.hiroyu@jp.fujitsu.com: change counter to atomic_t to avoid races]
[v8: kamezawa.hiroyu@jp.fujitsu.com: comment to clarify informational counters]
[v7: rebase to 3.0-rc3]
[v7: JBeulich@novell.com: add new swap struct elements only if config'd]
[v6: rebase to 3.0-rc1]
[v6: lliubbo@gmail.com: fix null pointer deref if vzalloc fails]
[v6: konrad.wilk@oracl.com: various checks and code clarifications/comments]
[v5: no change from v4]
[v4: rebase to 2.6.39]
Signed-off-by: Dan Magenheimer <dan.magenheimer@oracle.com>
Reviewed-by: Kamezawa Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Jan Beulich <JBeulich@novell.com>
Acked-by: Seth Jennings <sjenning@linux.vnet.ibm.com>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Matthew Wilcox <matthew@wil.cx>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Rik Riel <riel@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
[v11: Rebased, fixed mm/swapfile.c context change]
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 include/linux/swap.h     |  4 ++++
 include/linux/swapfile.h | 13 ++++++++++++
 mm/page_io.c             | 12 +++++++++++
 mm/swapfile.c            | 54 ++++++++++++++++++++++++++++++++++++------------
 4 files changed, 70 insertions(+), 13 deletions(-)
 create mode 100644 include/linux/swapfile.h

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index b1fd5c7925fe..50a55e2d58ec 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -197,6 +197,10 @@ struct swap_info_struct {
 	struct block_device *bdev;	/* swap device or bdev of swap file */
 	struct file *swap_file;		/* seldom referenced */
 	unsigned int old_block_size;	/* seldom referenced */
+#ifdef CONFIG_FRONTSWAP
+	unsigned long *frontswap_map;	/* frontswap in-use, one bit per page */
+	atomic_t frontswap_pages;	/* frontswap pages in-use counter */
+#endif
 };
 
 struct swap_list_t {
diff --git a/include/linux/swapfile.h b/include/linux/swapfile.h
new file mode 100644
index 000000000000..e282624e8c10
--- /dev/null
+++ b/include/linux/swapfile.h
@@ -0,0 +1,13 @@
+#ifndef _LINUX_SWAPFILE_H
+#define _LINUX_SWAPFILE_H
+
+/*
+ * these were static in swapfile.c but frontswap.c needs them and we don't
+ * want to expose them to the dozens of source files that include swap.h
+ */
+extern spinlock_t swap_lock;
+extern struct swap_list_t swap_list;
+extern struct swap_info_struct *swap_info[];
+extern int try_to_unuse(unsigned int, bool, unsigned long);
+
+#endif /* _LINUX_SWAPFILE_H */
diff --git a/mm/page_io.c b/mm/page_io.c
index dc76b4d0611e..651a91259317 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -18,6 +18,7 @@
 #include <linux/bio.h>
 #include <linux/swapops.h>
 #include <linux/writeback.h>
+#include <linux/frontswap.h>
 #include <asm/pgtable.h>
 
 static struct bio *get_swap_bio(gfp_t gfp_flags,
@@ -98,6 +99,12 @@ int swap_writepage(struct page *page, struct writeback_control *wbc)
 		unlock_page(page);
 		goto out;
 	}
+	if (frontswap_put_page(page) == 0) {
+		set_page_writeback(page);
+		unlock_page(page);
+		end_page_writeback(page);
+		goto out;
+	}
 	bio = get_swap_bio(GFP_NOIO, page, end_swap_bio_write);
 	if (bio == NULL) {
 		set_page_dirty(page);
@@ -122,6 +129,11 @@ int swap_readpage(struct page *page)
 
 	VM_BUG_ON(!PageLocked(page));
 	VM_BUG_ON(PageUptodate(page));
+	if (frontswap_get_page(page) == 0) {
+		SetPageUptodate(page);
+		unlock_page(page);
+		goto out;
+	}
 	bio = get_swap_bio(GFP_KERNEL, page, end_swap_bio_read);
 	if (bio == NULL) {
 		unlock_page(page);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index fafc26d1b1dc..9c7be87175c5 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -31,6 +31,8 @@
 #include <linux/memcontrol.h>
 #include <linux/poll.h>
 #include <linux/oom.h>
+#include <linux/frontswap.h>
+#include <linux/swapfile.h>
 
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
@@ -42,7 +44,7 @@ static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
 static void free_swap_count_continuations(struct swap_info_struct *);
 static sector_t map_swap_entry(swp_entry_t, struct block_device**);
 
-static DEFINE_SPINLOCK(swap_lock);
+DEFINE_SPINLOCK(swap_lock);
 static unsigned int nr_swapfiles;
 long nr_swap_pages;
 long total_swap_pages;
@@ -53,9 +55,9 @@ static const char Unused_file[] = "Unused swap file entry ";
 static const char Bad_offset[] = "Bad swap offset entry ";
 static const char Unused_offset[] = "Unused swap offset entry ";
 
-static struct swap_list_t swap_list = {-1, -1};
+struct swap_list_t swap_list = {-1, -1};
 
-static struct swap_info_struct *swap_info[MAX_SWAPFILES];
+struct swap_info_struct *swap_info[MAX_SWAPFILES];
 
 static DEFINE_MUTEX(swapon_mutex);
 
@@ -556,6 +558,7 @@ static unsigned char swap_entry_free(struct swap_info_struct *p,
 			swap_list.next = p->type;
 		nr_swap_pages++;
 		p->inuse_pages--;
+		frontswap_invalidate_page(p->type, offset);
 		if ((p->flags & SWP_BLKDEV) &&
 				disk->fops->swap_slot_free_notify)
 			disk->fops->swap_slot_free_notify(p->bdev, offset);
@@ -1016,11 +1019,12 @@ static int unuse_mm(struct mm_struct *mm,
 }
 
 /*
- * Scan swap_map from current position to next entry still in use.
+ * Scan swap_map (or frontswap_map if frontswap parameter is true)
+ * from current position to next entry still in use.
  * Recycle to start on reaching the end, returning 0 when empty.
  */
 static unsigned int find_next_to_unuse(struct swap_info_struct *si,
-					unsigned int prev)
+					unsigned int prev, bool frontswap)
 {
 	unsigned int max = si->max;
 	unsigned int i = prev;
@@ -1046,6 +1050,12 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
 			prev = 0;
 			i = 1;
 		}
+		if (frontswap) {
+			if (frontswap_test(si, i))
+				break;
+			else
+				continue;
+		}
 		count = si->swap_map[i];
 		if (count && swap_count(count) != SWAP_MAP_BAD)
 			break;
@@ -1057,8 +1067,12 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
  * We completely avoid races by reading each swap page in advance,
  * and then search for the process using it.  All the necessary
  * page table adjustments can then be made atomically.
+ *
+ * if the boolean frontswap is true, only unuse pages_to_unuse pages;
+ * pages_to_unuse==0 means all pages; ignored if frontswap is false
  */
-static int try_to_unuse(unsigned int type)
+int try_to_unuse(unsigned int type, bool frontswap,
+		 unsigned long pages_to_unuse)
 {
 	struct swap_info_struct *si = swap_info[type];
 	struct mm_struct *start_mm;
@@ -1091,7 +1105,7 @@ static int try_to_unuse(unsigned int type)
 	 * one pass through swap_map is enough, but not necessarily:
 	 * there are races when an instance of an entry might be missed.
 	 */
-	while ((i = find_next_to_unuse(si, i)) != 0) {
+	while ((i = find_next_to_unuse(si, i, frontswap)) != 0) {
 		if (signal_pending(current)) {
 			retval = -EINTR;
 			break;
@@ -1258,6 +1272,10 @@ static int try_to_unuse(unsigned int type)
 		 * interactive performance.
 		 */
 		cond_resched();
+		if (frontswap && pages_to_unuse > 0) {
+			if (!--pages_to_unuse)
+				break;
+		}
 	}
 
 	mmput(start_mm);
@@ -1517,7 +1535,8 @@ bad_bmap:
 }
 
 static void enable_swap_info(struct swap_info_struct *p, int prio,
-				unsigned char *swap_map)
+				unsigned char *swap_map,
+				unsigned long *frontswap_map)
 {
 	int i, prev;
 
@@ -1527,6 +1546,7 @@ static void enable_swap_info(struct swap_info_struct *p, int prio,
 	else
 		p->prio = --least_priority;
 	p->swap_map = swap_map;
+	frontswap_map_set(p, frontswap_map);
 	p->flags |= SWP_WRITEOK;
 	nr_swap_pages += p->pages;
 	total_swap_pages += p->pages;
@@ -1543,6 +1563,7 @@ static void enable_swap_info(struct swap_info_struct *p, int prio,
 		swap_list.head = swap_list.next = p->type;
 	else
 		swap_info[prev]->next = p->type;
+	frontswap_init(p->type);
 	spin_unlock(&swap_lock);
 }
 
@@ -1616,7 +1637,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	spin_unlock(&swap_lock);
 
 	oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX);
-	err = try_to_unuse(type);
+	err = try_to_unuse(type, false, 0); /* force all pages to be unused */
 	compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX, oom_score_adj);
 
 	if (err) {
@@ -1627,7 +1648,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 		 * sys_swapoff for this swap_info_struct at this point.
 		 */
 		/* re-insert swap space back into swap_list */
-		enable_swap_info(p, p->prio, p->swap_map);
+		enable_swap_info(p, p->prio, p->swap_map, frontswap_map_get(p));
 		goto out_dput;
 	}
 
@@ -1653,9 +1674,11 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	swap_map = p->swap_map;
 	p->swap_map = NULL;
 	p->flags = 0;
+	frontswap_invalidate_area(type);
 	spin_unlock(&swap_lock);
 	mutex_unlock(&swapon_mutex);
 	vfree(swap_map);
+	vfree(frontswap_map_get(p));
 	/* Destroy swap account informatin */
 	swap_cgroup_swapoff(type);
 
@@ -2019,6 +2042,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	sector_t span;
 	unsigned long maxpages;
 	unsigned char *swap_map = NULL;
+	unsigned long *frontswap_map = NULL;
 	struct page *page = NULL;
 	struct inode *inode = NULL;
 
@@ -2102,6 +2126,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 		error = nr_extents;
 		goto bad_swap;
 	}
+	/* frontswap enabled? set up bit-per-page map for frontswap */
+	if (frontswap_enabled)
+		frontswap_map = vzalloc(maxpages / sizeof(long));
 
 	if (p->bdev) {
 		if (blk_queue_nonrot(bdev_get_queue(p->bdev))) {
@@ -2117,14 +2144,15 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	if (swap_flags & SWAP_FLAG_PREFER)
 		prio =
 		  (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT;
-	enable_swap_info(p, prio, swap_map);
+	enable_swap_info(p, prio, swap_map, frontswap_map);
 
 	printk(KERN_INFO "Adding %uk swap on %s.  "
-			"Priority:%d extents:%d across:%lluk %s%s\n",
+			"Priority:%d extents:%d across:%lluk %s%s%s\n",
 		p->pages<<(PAGE_SHIFT-10), name, p->prio,
 		nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10),
 		(p->flags & SWP_SOLIDSTATE) ? "SS" : "",
-		(p->flags & SWP_DISCARDABLE) ? "D" : "");
+		(p->flags & SWP_DISCARDABLE) ? "D" : "",
+		(frontswap_map) ? "FS" : "");
 
 	mutex_unlock(&swapon_mutex);
 	atomic_inc(&proc_poll_event);
-- 
cgit 


From 165c8aed5bbc6bdddbccae0ba9db451732558ff9 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Tue, 15 May 2012 11:32:15 -0400
Subject: frontswap: s/put_page/store/g s/get_page/load

Sounds so much more natural.

Suggested-by: Andrea Arcangeli <aarcange@redhat.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 Documentation/vm/frontswap.txt        | 50 +++++++++++++++----------------
 drivers/staging/ramster/zcache-main.c |  8 ++---
 drivers/staging/zcache/zcache-main.c  | 10 +++----
 drivers/xen/tmem.c                    |  8 ++---
 include/linux/frontswap.h             | 16 +++++-----
 mm/frontswap.c                        | 56 +++++++++++++++++------------------
 mm/page_io.c                          |  4 +--
 7 files changed, 76 insertions(+), 76 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/vm/frontswap.txt b/Documentation/vm/frontswap.txt
index a9f731af0fac..37067cf455f4 100644
--- a/Documentation/vm/frontswap.txt
+++ b/Documentation/vm/frontswap.txt
@@ -21,21 +21,21 @@ frontswap_ops funcs appropriately and the functions it provides must
 conform to certain policies as follows:
 
 An "init" prepares the device to receive frontswap pages associated
-with the specified swap device number (aka "type").  A "put_page" will
+with the specified swap device number (aka "type").  A "store" will
 copy the page to transcendent memory and associate it with the type and
-offset associated with the page. A "get_page" will copy the page, if found,
+offset associated with the page. A "load" will copy the page, if found,
 from transcendent memory into kernel memory, but will NOT remove the page
 from from transcendent memory.  An "invalidate_page" will remove the page
 from transcendent memory and an "invalidate_area" will remove ALL pages
 associated with the swap type (e.g., like swapoff) and notify the "device"
-to refuse further puts with that swap type.
+to refuse further stores with that swap type.
 
-Once a page is successfully put, a matching get on the page will normally
+Once a page is successfully stored, a matching load on the page will normally
 succeed.  So when the kernel finds itself in a situation where it needs
-to swap out a page, it first attempts to use frontswap.  If the put returns
+to swap out a page, it first attempts to use frontswap.  If the store returns
 success, the data has been successfully saved to transcendent memory and
 a disk write and, if the data is later read back, a disk read are avoided.
-If a put returns failure, transcendent memory has rejected the data, and the
+If a store returns failure, transcendent memory has rejected the data, and the
 page can be written to swap as usual.
 
 If a backend chooses, frontswap can be configured as a "writethrough
@@ -44,18 +44,18 @@ in swap device writes is lost (and also a non-trivial performance advantage)
 in order to allow the backend to arbitrarily "reclaim" space used to
 store frontswap pages to more completely manage its memory usage.
 
-Note that if a page is put and the page already exists in transcendent memory
-(a "duplicate" put), either the put succeeds and the data is overwritten,
-or the put fails AND the page is invalidated.  This ensures stale data may
+Note that if a page is stored and the page already exists in transcendent memory
+(a "duplicate" store), either the store succeeds and the data is overwritten,
+or the store fails AND the page is invalidated.  This ensures stale data may
 never be obtained from frontswap.
 
 If properly configured, monitoring of frontswap is done via debugfs in
 the /sys/kernel/debug/frontswap directory.  The effectiveness of
 frontswap can be measured (across all swap devices) with:
 
-failed_puts	- how many put attempts have failed
-gets		- how many gets were attempted (all should succeed)
-succ_puts	- how many put attempts have succeeded
+failed_stores	- how many store attempts have failed
+loads		- how many loads were attempted (all should succeed)
+succ_stores	- how many store attempts have succeeded
 invalidates	- how many invalidates were attempted
 
 A backend implementation may provide additional metrics.
@@ -125,7 +125,7 @@ nothingness and the only overhead is a few extra bytes per swapon'ed
 swap device.  If CONFIG_FRONTSWAP is enabled but no frontswap "backend"
 registers, there is one extra global variable compared to zero for
 every swap page read or written.  If CONFIG_FRONTSWAP is enabled
-AND a frontswap backend registers AND the backend fails every "put"
+AND a frontswap backend registers AND the backend fails every "store"
 request (i.e. provides no memory despite claiming it might),
 CPU overhead is still negligible -- and since every frontswap fail
 precedes a swap page write-to-disk, the system is highly likely
@@ -159,13 +159,13 @@ entirely dynamic and random.
 
 Whenever a swap-device is swapon'd frontswap_init() is called,
 passing the swap device number (aka "type") as a parameter.
-This notifies frontswap to expect attempts to "put" swap pages
+This notifies frontswap to expect attempts to "store" swap pages
 associated with that number.
 
 Whenever the swap subsystem is readying a page to write to a swap
-device (c.f swap_writepage()), frontswap_put_page is called.  Frontswap
+device (c.f swap_writepage()), frontswap_store is called.  Frontswap
 consults with the frontswap backend and if the backend says it does NOT
-have room, frontswap_put_page returns -1 and the kernel swaps the page
+have room, frontswap_store returns -1 and the kernel swaps the page
 to the swap device as normal.  Note that the response from the frontswap
 backend is unpredictable to the kernel; it may choose to never accept a
 page, it could accept every ninth page, or it might accept every
@@ -177,7 +177,7 @@ corresponding to the page offset on the swap device to which it would
 otherwise have written the data.
 
 When the swap subsystem needs to swap-in a page (swap_readpage()),
-it first calls frontswap_get_page() which checks the frontswap_map to
+it first calls frontswap_load() which checks the frontswap_map to
 see if the page was earlier accepted by the frontswap backend.  If
 it was, the page of data is filled from the frontswap backend and
 the swap-in is complete.  If not, the normal swap-in code is
@@ -185,7 +185,7 @@ executed to obtain the page of data from the real swap device.
 
 So every time the frontswap backend accepts a page, a swap device read
 and (potentially) a swap device write are replaced by a "frontswap backend
-put" and (possibly) a "frontswap backend get", which are presumably much
+store" and (possibly) a "frontswap backend loads", which are presumably much
 faster.
 
 4) Can't frontswap be configured as a "special" swap device that is
@@ -215,8 +215,8 @@ that are inappropriate for a RAM-oriented device including delaying
 the write of some pages for a significant amount of time.  Synchrony is
 required to ensure the dynamicity of the backend and to avoid thorny race
 conditions that would unnecessarily and greatly complicate frontswap
-and/or the block I/O subsystem.  That said, only the initial "put"
-and "get" operations need be synchronous.  A separate asynchronous thread
+and/or the block I/O subsystem.  That said, only the initial "store"
+and "load" operations need be synchronous.  A separate asynchronous thread
 is free to manipulate the pages stored by frontswap.  For example,
 the "remotification" thread in RAMster uses standard asynchronous
 kernel sockets to move compressed frontswap pages to a remote machine.
@@ -229,7 +229,7 @@ choose to accept pages only until host-swapping might be imminent,
 then force guests to do their own swapping.
 
 There is a downside to the transcendent memory specifications for
-frontswap:  Since any "put" might fail, there must always be a real
+frontswap:  Since any "store" might fail, there must always be a real
 slot on a real swap device to swap the page.  Thus frontswap must be
 implemented as a "shadow" to every swapon'd device with the potential
 capability of holding every page that the swap device might have held
@@ -240,16 +240,16 @@ installation, frontswap is useless.  Swapless portable devices
 can still use frontswap but a backend for such devices must configure
 some kind of "ghost" swap device and ensure that it is never used.
 
-5) Why this weird definition about "duplicate puts"?  If a page
-   has been previously successfully put, can't it always be
+5) Why this weird definition about "duplicate stores"?  If a page
+   has been previously successfully stored, can't it always be
    successfully overwritten?
 
 Nearly always it can, but no, sometimes it cannot.  Consider an example
 where data is compressed and the original 4K page has been compressed
 to 1K.  Now an attempt is made to overwrite the page with data that
 is non-compressible and so would take the entire 4K.  But the backend
-has no more space.  In this case, the put must be rejected.  Whenever
-frontswap rejects a put that would overwrite, it also must invalidate
+has no more space.  In this case, the store must be rejected.  Whenever
+frontswap rejects a store that would overwrite, it also must invalidate
 the old data and ensure that it is no longer accessible.  Since the
 swap subsystem then writes the new data to the read swap device,
 this is the correct course of action to ensure coherency.
diff --git a/drivers/staging/ramster/zcache-main.c b/drivers/staging/ramster/zcache-main.c
index 68b2e053a0e6..2627b3de0d21 100644
--- a/drivers/staging/ramster/zcache-main.c
+++ b/drivers/staging/ramster/zcache-main.c
@@ -3002,7 +3002,7 @@ static inline struct tmem_oid oswiz(unsigned type, u32 ind)
 	return oid;
 }
 
-static int zcache_frontswap_put_page(unsigned type, pgoff_t offset,
+static int zcache_frontswap_store(unsigned type, pgoff_t offset,
 				   struct page *page)
 {
 	u64 ind64 = (u64)offset;
@@ -3025,7 +3025,7 @@ static int zcache_frontswap_put_page(unsigned type, pgoff_t offset,
 
 /* returns 0 if the page was successfully gotten from frontswap, -1 if
  * was not present (should never happen!) */
-static int zcache_frontswap_get_page(unsigned type, pgoff_t offset,
+static int zcache_frontswap_load(unsigned type, pgoff_t offset,
 				   struct page *page)
 {
 	u64 ind64 = (u64)offset;
@@ -3080,8 +3080,8 @@ static void zcache_frontswap_init(unsigned ignored)
 }
 
 static struct frontswap_ops zcache_frontswap_ops = {
-	.put_page = zcache_frontswap_put_page,
-	.get_page = zcache_frontswap_get_page,
+	.store = zcache_frontswap_store,
+	.load = zcache_frontswap_load,
 	.invalidate_page = zcache_frontswap_flush_page,
 	.invalidate_area = zcache_frontswap_flush_area,
 	.init = zcache_frontswap_init
diff --git a/drivers/staging/zcache/zcache-main.c b/drivers/staging/zcache/zcache-main.c
index 2734dacacbaf..784c796b9848 100644
--- a/drivers/staging/zcache/zcache-main.c
+++ b/drivers/staging/zcache/zcache-main.c
@@ -1835,7 +1835,7 @@ static int zcache_frontswap_poolid = -1;
  * Swizzling increases objects per swaptype, increasing tmem concurrency
  * for heavy swaploads.  Later, larger nr_cpus -> larger SWIZ_BITS
  * Setting SWIZ_BITS to 27 basically reconstructs the swap entry from
- * frontswap_get_page(), but has side-effects. Hence using 8.
+ * frontswap_load(), but has side-effects. Hence using 8.
  */
 #define SWIZ_BITS		8
 #define SWIZ_MASK		((1 << SWIZ_BITS) - 1)
@@ -1849,7 +1849,7 @@ static inline struct tmem_oid oswiz(unsigned type, u32 ind)
 	return oid;
 }
 
-static int zcache_frontswap_put_page(unsigned type, pgoff_t offset,
+static int zcache_frontswap_store(unsigned type, pgoff_t offset,
 				   struct page *page)
 {
 	u64 ind64 = (u64)offset;
@@ -1870,7 +1870,7 @@ static int zcache_frontswap_put_page(unsigned type, pgoff_t offset,
 
 /* returns 0 if the page was successfully gotten from frontswap, -1 if
  * was not present (should never happen!) */
-static int zcache_frontswap_get_page(unsigned type, pgoff_t offset,
+static int zcache_frontswap_load(unsigned type, pgoff_t offset,
 				   struct page *page)
 {
 	u64 ind64 = (u64)offset;
@@ -1919,8 +1919,8 @@ static void zcache_frontswap_init(unsigned ignored)
 }
 
 static struct frontswap_ops zcache_frontswap_ops = {
-	.put_page = zcache_frontswap_put_page,
-	.get_page = zcache_frontswap_get_page,
+	.store = zcache_frontswap_store,
+	.load = zcache_frontswap_load,
 	.invalidate_page = zcache_frontswap_flush_page,
 	.invalidate_area = zcache_frontswap_flush_area,
 	.init = zcache_frontswap_init
diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c
index dcb79521e6c8..89f264c67420 100644
--- a/drivers/xen/tmem.c
+++ b/drivers/xen/tmem.c
@@ -269,7 +269,7 @@ static inline struct tmem_oid oswiz(unsigned type, u32 ind)
 }
 
 /* returns 0 if the page was successfully put into frontswap, -1 if not */
-static int tmem_frontswap_put_page(unsigned type, pgoff_t offset,
+static int tmem_frontswap_store(unsigned type, pgoff_t offset,
 				   struct page *page)
 {
 	u64 ind64 = (u64)offset;
@@ -295,7 +295,7 @@ static int tmem_frontswap_put_page(unsigned type, pgoff_t offset,
  * returns 0 if the page was successfully gotten from frontswap, -1 if
  * was not present (should never happen!)
  */
-static int tmem_frontswap_get_page(unsigned type, pgoff_t offset,
+static int tmem_frontswap_load(unsigned type, pgoff_t offset,
 				   struct page *page)
 {
 	u64 ind64 = (u64)offset;
@@ -362,8 +362,8 @@ static int __init no_frontswap(char *s)
 __setup("nofrontswap", no_frontswap);
 
 static struct frontswap_ops __initdata tmem_frontswap_ops = {
-	.put_page = tmem_frontswap_put_page,
-	.get_page = tmem_frontswap_get_page,
+	.store = tmem_frontswap_store,
+	.load = tmem_frontswap_load,
 	.invalidate_page = tmem_frontswap_flush_page,
 	.invalidate_area = tmem_frontswap_flush_area,
 	.init = tmem_frontswap_init
diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h
index 68ff7af5c5fb..0e4e2eec5c1d 100644
--- a/include/linux/frontswap.h
+++ b/include/linux/frontswap.h
@@ -7,8 +7,8 @@
 
 struct frontswap_ops {
 	void (*init)(unsigned);
-	int (*put_page)(unsigned, pgoff_t, struct page *);
-	int (*get_page)(unsigned, pgoff_t, struct page *);
+	int (*store)(unsigned, pgoff_t, struct page *);
+	int (*load)(unsigned, pgoff_t, struct page *);
 	void (*invalidate_page)(unsigned, pgoff_t);
 	void (*invalidate_area)(unsigned);
 };
@@ -21,8 +21,8 @@ extern unsigned long frontswap_curr_pages(void);
 extern void frontswap_writethrough(bool);
 
 extern void __frontswap_init(unsigned type);
-extern int __frontswap_put_page(struct page *page);
-extern int __frontswap_get_page(struct page *page);
+extern int __frontswap_store(struct page *page);
+extern int __frontswap_load(struct page *page);
 extern void __frontswap_invalidate_page(unsigned, pgoff_t);
 extern void __frontswap_invalidate_area(unsigned);
 
@@ -88,21 +88,21 @@ static inline unsigned long *frontswap_map_get(struct swap_info_struct *p)
 }
 #endif
 
-static inline int frontswap_put_page(struct page *page)
+static inline int frontswap_store(struct page *page)
 {
 	int ret = -1;
 
 	if (frontswap_enabled)
-		ret = __frontswap_put_page(page);
+		ret = __frontswap_store(page);
 	return ret;
 }
 
-static inline int frontswap_get_page(struct page *page)
+static inline int frontswap_load(struct page *page)
 {
 	int ret = -1;
 
 	if (frontswap_enabled)
-		ret = __frontswap_get_page(page);
+		ret = __frontswap_load(page);
 	return ret;
 }
 
diff --git a/mm/frontswap.c b/mm/frontswap.c
index 8c0a5f8683f0..e25025574a02 100644
--- a/mm/frontswap.c
+++ b/mm/frontswap.c
@@ -39,7 +39,7 @@ bool frontswap_enabled __read_mostly;
 EXPORT_SYMBOL(frontswap_enabled);
 
 /*
- * If enabled, frontswap_put will return failure even on success.  As
+ * If enabled, frontswap_store will return failure even on success.  As
  * a result, the swap subsystem will always write the page to swap, in
  * effect converting frontswap into a writethrough cache.  In this mode,
  * there is no direct reduction in swap writes, but a frontswap backend
@@ -54,27 +54,27 @@ static bool frontswap_writethrough_enabled __read_mostly;
  * properly configured).  These are for information only so are not protected
  * against increment races.
  */
-static u64 frontswap_gets;
-static u64 frontswap_succ_puts;
-static u64 frontswap_failed_puts;
+static u64 frontswap_loads;
+static u64 frontswap_succ_stores;
+static u64 frontswap_failed_stores;
 static u64 frontswap_invalidates;
 
-static inline void inc_frontswap_gets(void) {
-	frontswap_gets++;
+static inline void inc_frontswap_loads(void) {
+	frontswap_loads++;
 }
-static inline void inc_frontswap_succ_puts(void) {
-	frontswap_succ_puts++;
+static inline void inc_frontswap_succ_stores(void) {
+	frontswap_succ_stores++;
 }
-static inline void inc_frontswap_failed_puts(void) {
-	frontswap_failed_puts++;
+static inline void inc_frontswap_failed_stores(void) {
+	frontswap_failed_stores++;
 }
 static inline void inc_frontswap_invalidates(void) {
 	frontswap_invalidates++;
 }
 #else
-static inline void inc_frontswap_gets(void) { }
-static inline void inc_frontswap_succ_puts(void) { }
-static inline void inc_frontswap_failed_puts(void) { }
+static inline void inc_frontswap_loads(void) { }
+static inline void inc_frontswap_succ_stores(void) { }
+static inline void inc_frontswap_failed_stores(void) { }
 static inline void inc_frontswap_invalidates(void) { }
 #endif
 /*
@@ -116,13 +116,13 @@ void __frontswap_init(unsigned type)
 EXPORT_SYMBOL(__frontswap_init);
 
 /*
- * "Put" data from a page to frontswap and associate it with the page's
+ * "Store" data from a page to frontswap and associate it with the page's
  * swaptype and offset.  Page must be locked and in the swap cache.
  * If frontswap already contains a page with matching swaptype and
  * offset, the frontswap implmentation may either overwrite the data and
  * return success or invalidate the page from frontswap and return failure.
  */
-int __frontswap_put_page(struct page *page)
+int __frontswap_store(struct page *page)
 {
 	int ret = -1, dup = 0;
 	swp_entry_t entry = { .val = page_private(page), };
@@ -134,10 +134,10 @@ int __frontswap_put_page(struct page *page)
 	BUG_ON(sis == NULL);
 	if (frontswap_test(sis, offset))
 		dup = 1;
-	ret = (*frontswap_ops.put_page)(type, offset, page);
+	ret = (*frontswap_ops.store)(type, offset, page);
 	if (ret == 0) {
 		frontswap_set(sis, offset);
-		inc_frontswap_succ_puts();
+		inc_frontswap_succ_stores();
 		if (!dup)
 			atomic_inc(&sis->frontswap_pages);
 	} else if (dup) {
@@ -147,22 +147,22 @@ int __frontswap_put_page(struct page *page)
 		 */
 		frontswap_clear(sis, offset);
 		atomic_dec(&sis->frontswap_pages);
-		inc_frontswap_failed_puts();
+		inc_frontswap_failed_stores();
 	} else
-		inc_frontswap_failed_puts();
+		inc_frontswap_failed_stores();
 	if (frontswap_writethrough_enabled)
 		/* report failure so swap also writes to swap device */
 		ret = -1;
 	return ret;
 }
-EXPORT_SYMBOL(__frontswap_put_page);
+EXPORT_SYMBOL(__frontswap_store);
 
 /*
  * "Get" data from frontswap associated with swaptype and offset that were
  * specified when the data was put to frontswap and use it to fill the
  * specified page with data. Page must be locked and in the swap cache.
  */
-int __frontswap_get_page(struct page *page)
+int __frontswap_load(struct page *page)
 {
 	int ret = -1;
 	swp_entry_t entry = { .val = page_private(page), };
@@ -173,12 +173,12 @@ int __frontswap_get_page(struct page *page)
 	BUG_ON(!PageLocked(page));
 	BUG_ON(sis == NULL);
 	if (frontswap_test(sis, offset))
-		ret = (*frontswap_ops.get_page)(type, offset, page);
+		ret = (*frontswap_ops.load)(type, offset, page);
 	if (ret == 0)
-		inc_frontswap_gets();
+		inc_frontswap_loads();
 	return ret;
 }
-EXPORT_SYMBOL(__frontswap_get_page);
+EXPORT_SYMBOL(__frontswap_load);
 
 /*
  * Invalidate any data from frontswap associated with the specified swaptype
@@ -301,10 +301,10 @@ static int __init init_frontswap(void)
 	struct dentry *root = debugfs_create_dir("frontswap", NULL);
 	if (root == NULL)
 		return -ENXIO;
-	debugfs_create_u64("gets", S_IRUGO, root, &frontswap_gets);
-	debugfs_create_u64("succ_puts", S_IRUGO, root, &frontswap_succ_puts);
-	debugfs_create_u64("failed_puts", S_IRUGO, root,
-				&frontswap_failed_puts);
+	debugfs_create_u64("loads", S_IRUGO, root, &frontswap_loads);
+	debugfs_create_u64("succ_stores", S_IRUGO, root, &frontswap_succ_stores);
+	debugfs_create_u64("failed_stores", S_IRUGO, root,
+				&frontswap_failed_stores);
 	debugfs_create_u64("invalidates", S_IRUGO,
 				root, &frontswap_invalidates);
 #endif
diff --git a/mm/page_io.c b/mm/page_io.c
index 651a91259317..34f02923744c 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -99,7 +99,7 @@ int swap_writepage(struct page *page, struct writeback_control *wbc)
 		unlock_page(page);
 		goto out;
 	}
-	if (frontswap_put_page(page) == 0) {
+	if (frontswap_store(page) == 0) {
 		set_page_writeback(page);
 		unlock_page(page);
 		end_page_writeback(page);
@@ -129,7 +129,7 @@ int swap_readpage(struct page *page)
 
 	VM_BUG_ON(!PageLocked(page));
 	VM_BUG_ON(PageUptodate(page));
-	if (frontswap_get_page(page) == 0) {
+	if (frontswap_load(page) == 0) {
 		SetPageUptodate(page);
 		unlock_page(page);
 		goto out;
-- 
cgit 


From 8f888ef846d4481e24c74b4a91ece771d2bcbcb5 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@us.ibm.com>
Date: Tue, 22 May 2012 22:43:41 -0400
Subject: jbd2: change disk layout for metadata checksumming

Define flags and allocate space in on-disk journal structures to support
checksumming of journal metadata.

Signed-off-by: Darrick J. Wong <djwong@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/commit.c     |  4 ++--
 fs/jbd2/recovery.c   |  6 +++---
 include/linux/jbd2.h | 30 ++++++++++++++++++++++++++++--
 3 files changed, 33 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 840f70f50792..69d780310aea 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -627,7 +627,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 
 		tag = (journal_block_tag_t *) tagp;
 		write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr);
-		tag->t_flags = cpu_to_be32(tag_flag);
+		tag->t_flags = cpu_to_be16(tag_flag);
 		tagp += tag_bytes;
 		space_left -= tag_bytes;
 
@@ -651,7 +651,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
                            submitting the IOs.  "tag" still points to
                            the last tag we set up. */
 
-			tag->t_flags |= cpu_to_be32(JBD2_FLAG_LAST_TAG);
+			tag->t_flags |= cpu_to_be16(JBD2_FLAG_LAST_TAG);
 
 start_journal_io:
 			for (i = 0; i < bufs; i++) {
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index c1a03354a22f..980f3d6b5f88 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -193,10 +193,10 @@ static int count_tags(journal_t *journal, struct buffer_head *bh)
 
 		nr++;
 		tagp += tag_bytes;
-		if (!(tag->t_flags & cpu_to_be32(JBD2_FLAG_SAME_UUID)))
+		if (!(tag->t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID)))
 			tagp += 16;
 
-		if (tag->t_flags & cpu_to_be32(JBD2_FLAG_LAST_TAG))
+		if (tag->t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG))
 			break;
 	}
 
@@ -485,7 +485,7 @@ static int do_one_pass(journal_t *journal,
 				unsigned long io_block;
 
 				tag = (journal_block_tag_t *) tagp;
-				flags = be32_to_cpu(tag->t_flags);
+				flags = be16_to_cpu(tag->t_flags);
 
 				io_block = next_log_block++;
 				wrap(journal, next_log_block);
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 912c30a8ddb1..809c439066c5 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -147,12 +147,24 @@ typedef struct journal_header_s
 #define JBD2_CRC32_CHKSUM   1
 #define JBD2_MD5_CHKSUM     2
 #define JBD2_SHA1_CHKSUM    3
+#define JBD2_CRC32C_CHKSUM  4
 
 #define JBD2_CRC32_CHKSUM_SIZE 4
 
 #define JBD2_CHECKSUM_BYTES (32 / sizeof(u32))
 /*
  * Commit block header for storing transactional checksums:
+ *
+ * NOTE: If FEATURE_COMPAT_CHECKSUM (checksum v1) is set, the h_chksum*
+ * fields are used to store a checksum of the descriptor and data blocks.
+ *
+ * If FEATURE_INCOMPAT_CSUM_V2 (checksum v2) is set, then the h_chksum
+ * field is used to store crc32c(uuid+commit_block).  Each journal metadata
+ * block gets its own checksum, and data block checksums are stored in
+ * journal_block_tag (in the descriptor).  The other h_chksum* fields are
+ * not used.
+ *
+ * Checksum v1 and v2 are mutually exclusive features.
  */
 struct commit_header {
 	__be32		h_magic;
@@ -175,13 +187,19 @@ struct commit_header {
 typedef struct journal_block_tag_s
 {
 	__be32		t_blocknr;	/* The on-disk block number */
-	__be32		t_flags;	/* See below */
+	__be16		t_checksum;	/* truncated crc32c(uuid+seq+block) */
+	__be16		t_flags;	/* See below */
 	__be32		t_blocknr_high; /* most-significant high 32bits. */
 } journal_block_tag_t;
 
 #define JBD2_TAG_SIZE32 (offsetof(journal_block_tag_t, t_blocknr_high))
 #define JBD2_TAG_SIZE64 (sizeof(journal_block_tag_t))
 
+/* Tail of descriptor block, for checksumming */
+struct jbd2_journal_block_tail {
+	__be32		t_checksum;	/* crc32c(uuid+descr_block) */
+};
+
 /*
  * The revoke descriptor: used on disk to describe a series of blocks to
  * be revoked from the log
@@ -192,6 +210,10 @@ typedef struct jbd2_journal_revoke_header_s
 	__be32		 r_count;	/* Count of bytes used in the block */
 } jbd2_journal_revoke_header_t;
 
+/* Tail of revoke block, for checksumming */
+struct jbd2_journal_revoke_tail {
+	__be32		r_checksum;	/* crc32c(uuid+revoke_block) */
+};
 
 /* Definitions for the journal tag flags word: */
 #define JBD2_FLAG_ESCAPE		1	/* on-disk block is escaped */
@@ -241,7 +263,10 @@ typedef struct journal_superblock_s
 	__be32	s_max_trans_data;	/* Limit of data blocks per trans. */
 
 /* 0x0050 */
-	__u32	s_padding[44];
+	__u8	s_checksum_type;	/* checksum type */
+	__u8	s_padding2[3];
+	__u32	s_padding[42];
+	__be32	s_checksum;		/* crc32c(superblock) */
 
 /* 0x0100 */
 	__u8	s_users[16*48];		/* ids of all fs'es sharing the log */
@@ -263,6 +288,7 @@ typedef struct journal_superblock_s
 #define JBD2_FEATURE_INCOMPAT_REVOKE		0x00000001
 #define JBD2_FEATURE_INCOMPAT_64BIT		0x00000002
 #define JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT	0x00000004
+#define JBD2_FEATURE_INCOMPAT_CSUM_V2		0x00000008
 
 /* Features known to this kernel version: */
 #define JBD2_KNOWN_COMPAT_FEATURES	JBD2_FEATURE_COMPAT_CHECKSUM
-- 
cgit 


From 1227dd773d8d4e3983b4b751f9ffa0f41402fb7c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 24 Apr 2012 02:44:49 -0400
Subject: TIF_NOTIFY_RESUME is defined on all targets now

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/tracehook.h |  2 --
 security/keys/keyctl.c    | 10 ----------
 2 files changed, 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 51bd91d911c3..8a2a3fc9bd05 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -153,7 +153,6 @@ static inline void tracehook_signal_handler(int sig, siginfo_t *info,
 		ptrace_notify(SIGTRAP);
 }
 
-#ifdef TIF_NOTIFY_RESUME
 /**
  * set_notify_resume - cause tracehook_notify_resume() to be called
  * @task:		task that will call tracehook_notify_resume()
@@ -185,6 +184,5 @@ static inline void set_notify_resume(struct task_struct *task)
 static inline void tracehook_notify_resume(struct pt_regs *regs)
 {
 }
-#endif	/* TIF_NOTIFY_RESUME */
 
 #endif	/* <linux/tracehook.h> */
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index ddb3e05bc5fc..534a634283a4 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -1454,7 +1454,6 @@ long keyctl_get_security(key_serial_t keyid,
  */
 long keyctl_session_to_parent(void)
 {
-#ifdef TIF_NOTIFY_RESUME
 	struct task_struct *me, *parent;
 	const struct cred *mycred, *pcred;
 	struct cred *cred, *oldcred;
@@ -1542,15 +1541,6 @@ not_permitted:
 error_keyring:
 	key_ref_put(keyring_r);
 	return ret;
-
-#else /* !TIF_NOTIFY_RESUME */
-	/*
-	 * To be removed when TIF_NOTIFY_RESUME has been implemented on
-	 * m68k/xtensa
-	 */
-#warning TIF_NOTIFY_RESUME not implemented
-	return -EOPNOTSUPP;
-#endif /* !TIF_NOTIFY_RESUME */
 }
 
 /*
-- 
cgit 


From a42c6ded827dbd396d2efde7530620be029a72d1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 23 May 2012 14:44:37 -0400
Subject: move key_repace_session_keyring() into tracehook_notify_resume()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/alpha/kernel/signal.c      | 2 --
 arch/arm/kernel/signal.c        | 2 --
 arch/avr32/kernel/signal.c      | 2 --
 arch/blackfin/kernel/signal.c   | 2 --
 arch/c6x/kernel/signal.c        | 2 --
 arch/cris/kernel/ptrace.c       | 2 --
 arch/frv/kernel/signal.c        | 2 --
 arch/h8300/kernel/signal.c      | 2 --
 arch/hexagon/kernel/signal.c    | 2 --
 arch/ia64/kernel/process.c      | 2 --
 arch/m32r/kernel/signal.c       | 2 --
 arch/m68k/kernel/signal.c       | 5 +----
 arch/microblaze/kernel/signal.c | 5 +----
 arch/mips/kernel/signal.c       | 2 --
 arch/mn10300/kernel/signal.c    | 2 --
 arch/openrisc/kernel/signal.c   | 2 --
 arch/parisc/kernel/signal.c     | 2 --
 arch/powerpc/kernel/signal.c    | 2 --
 arch/s390/kernel/signal.c       | 2 --
 arch/score/kernel/signal.c      | 2 --
 arch/sh/kernel/signal_32.c      | 2 --
 arch/sh/kernel/signal_64.c      | 2 --
 arch/sparc/kernel/signal_32.c   | 2 --
 arch/sparc/kernel/signal_64.c   | 2 --
 arch/tile/kernel/process.c      | 2 --
 arch/um/kernel/process.c        | 5 +----
 arch/unicore32/kernel/signal.c  | 2 --
 arch/x86/kernel/signal.c        | 2 --
 arch/xtensa/kernel/signal.c     | 5 +----
 include/linux/tracehook.h       | 2 ++
 30 files changed, 6 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c
index 10ab2d74ecbb..f6db3032ddf0 100644
--- a/arch/alpha/kernel/signal.c
+++ b/arch/alpha/kernel/signal.c
@@ -590,7 +590,5 @@ do_notify_resume(struct pt_regs *regs, struct switch_stack *sw,
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
-		if (current->replacement_session_keyring)
-			key_replace_session_keyring();
 	}
 }
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 4e5fdd9bd9e3..ec640412aed0 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -728,7 +728,5 @@ do_notify_resume(struct pt_regs *regs, unsigned int thread_flags, int syscall)
 	if (thread_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
-		if (current->replacement_session_keyring)
-			key_replace_session_keyring();
 	}
 }
diff --git a/arch/avr32/kernel/signal.c b/arch/avr32/kernel/signal.c
index ae386c304bee..e7595ef74f51 100644
--- a/arch/avr32/kernel/signal.c
+++ b/arch/avr32/kernel/signal.c
@@ -321,7 +321,5 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, struct thread_info *ti)
 	if (ti->flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
-		if (current->replacement_session_keyring)
-			key_replace_session_keyring();
 	}
 }
diff --git a/arch/blackfin/kernel/signal.c b/arch/blackfin/kernel/signal.c
index e5bbc1a5edc2..fc9ecce8b6ce 100644
--- a/arch/blackfin/kernel/signal.c
+++ b/arch/blackfin/kernel/signal.c
@@ -336,8 +336,6 @@ asmlinkage void do_notify_resume(struct pt_regs *regs)
 	if (test_thread_flag(TIF_NOTIFY_RESUME)) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
-		if (current->replacement_session_keyring)
-			key_replace_session_keyring();
 	}
 }
 
diff --git a/arch/c6x/kernel/signal.c b/arch/c6x/kernel/signal.c
index cf37478c1169..9493f0bbf0a6 100644
--- a/arch/c6x/kernel/signal.c
+++ b/arch/c6x/kernel/signal.c
@@ -364,7 +364,5 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, u32 thread_info_flags,
 	if (thread_info_flags & (1 << TIF_NOTIFY_RESUME)) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
-		if (current->replacement_session_keyring)
-			key_replace_session_keyring();
 	}
 }
diff --git a/arch/cris/kernel/ptrace.c b/arch/cris/kernel/ptrace.c
index d114ad3da9b1..58d44ee1a71f 100644
--- a/arch/cris/kernel/ptrace.c
+++ b/arch/cris/kernel/ptrace.c
@@ -40,7 +40,5 @@ void do_notify_resume(int canrestart, struct pt_regs *regs,
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
-		if (current->replacement_session_keyring)
-			key_replace_session_keyring();
 	}
 }
diff --git a/arch/frv/kernel/signal.c b/arch/frv/kernel/signal.c
index 8cf5dca01758..595bf1e5a5dc 100644
--- a/arch/frv/kernel/signal.c
+++ b/arch/frv/kernel/signal.c
@@ -562,8 +562,6 @@ asmlinkage void do_notify_resume(__u32 thread_info_flags)
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(__frame);
-		if (current->replacement_session_keyring)
-			key_replace_session_keyring();
 	}
 
 } /* end do_notify_resume() */
diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c
index d4b0555d2904..e58992ad789e 100644
--- a/arch/h8300/kernel/signal.c
+++ b/arch/h8300/kernel/signal.c
@@ -513,7 +513,5 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, u32 thread_info_flags)
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
-		if (current->replacement_session_keyring)
-			key_replace_session_keyring();
 	}
 }
diff --git a/arch/hexagon/kernel/signal.c b/arch/hexagon/kernel/signal.c
index 434866eb0f1c..21a3018cb9bf 100644
--- a/arch/hexagon/kernel/signal.c
+++ b/arch/hexagon/kernel/signal.c
@@ -273,8 +273,6 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
-		if (current->replacement_session_keyring)
-			key_replace_session_keyring();
 	}
 }
 
diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
index 5e0e86ddb12f..dd6fc1449741 100644
--- a/arch/ia64/kernel/process.c
+++ b/arch/ia64/kernel/process.c
@@ -199,8 +199,6 @@ do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall)
 	if (test_thread_flag(TIF_NOTIFY_RESUME)) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(&scr->pt);
-		if (current->replacement_session_keyring)
-			key_replace_session_keyring();
 	}
 
 	/* copy user rbs to kernel rbs */
diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c
index f54d96993ea1..64804f1f5141 100644
--- a/arch/m32r/kernel/signal.c
+++ b/arch/m32r/kernel/signal.c
@@ -383,8 +383,6 @@ void do_notify_resume(struct pt_regs *regs, __u32 thread_info_flags)
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
-		if (current->replacement_session_keyring)
-			key_replace_session_keyring();
 	}
 
 	clear_thread_flag(TIF_IRET);
diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c
index d9f3d1900eed..973eec60cad4 100644
--- a/arch/m68k/kernel/signal.c
+++ b/arch/m68k/kernel/signal.c
@@ -1193,9 +1193,6 @@ void do_notify_resume(struct pt_regs *regs)
 	if (test_thread_flag(TIF_SIGPENDING))
 		do_signal(regs);
 
-	if (test_and_clear_thread_flag(TIF_NOTIFY_RESUME)) {
+	if (test_and_clear_thread_flag(TIF_NOTIFY_RESUME))
 		tracehook_notify_resume(regs);
-		if (current->replacement_session_keyring)
-			key_replace_session_keyring();
-	}
 }
diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c
index 7f4c7bef1642..5d796e32786e 100644
--- a/arch/microblaze/kernel/signal.c
+++ b/arch/microblaze/kernel/signal.c
@@ -401,9 +401,6 @@ void do_notify_resume(struct pt_regs *regs, int in_syscall)
 	if (test_thread_flag(TIF_SIGPENDING))
 		do_signal(regs, in_syscall);
 
-	if (test_and_clear_thread_flag(TIF_NOTIFY_RESUME)) {
+	if (test_and_clear_thread_flag(TIF_NOTIFY_RESUME))
 		tracehook_notify_resume(regs);
-		if (current->replacement_session_keyring)
-			key_replace_session_keyring();
-	}
 }
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index 17f6ee30ad0d..8a6e6d116ab0 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -636,8 +636,6 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, void *unused,
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
-		if (current->replacement_session_keyring)
-			key_replace_session_keyring();
 	}
 }
 
diff --git a/arch/mn10300/kernel/signal.c b/arch/mn10300/kernel/signal.c
index 890cf91767cc..b8b6aa1a6837 100644
--- a/arch/mn10300/kernel/signal.c
+++ b/arch/mn10300/kernel/signal.c
@@ -554,7 +554,5 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, u32 thread_info_flags)
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(current_frame());
-		if (current->replacement_session_keyring)
-			key_replace_session_keyring();
 	}
 }
diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c
index e970743251ae..9ae611522953 100644
--- a/arch/openrisc/kernel/signal.c
+++ b/arch/openrisc/kernel/signal.c
@@ -376,7 +376,5 @@ asmlinkage void do_notify_resume(struct pt_regs *regs)
 	if (current_thread_info()->flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
-		if (current->replacement_session_keyring)
-			key_replace_session_keyring();
 	}
 }
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index 4b9cb0d546d1..e7a7cd3e1120 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -638,7 +638,5 @@ void do_notify_resume(struct pt_regs *regs, long in_syscall)
 	if (test_thread_flag(TIF_NOTIFY_RESUME)) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
-		if (current->replacement_session_keyring)
-			key_replace_session_keyring();
 	}
 }
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index 651c5963662b..bfc3ec1382fb 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -193,8 +193,6 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
-		if (current->replacement_session_keyring)
-			key_replace_session_keyring();
 	}
 }
 
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index f626232e216c..42a6e8b47f06 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -494,6 +494,4 @@ void do_notify_resume(struct pt_regs *regs)
 {
 	clear_thread_flag(TIF_NOTIFY_RESUME);
 	tracehook_notify_resume(regs);
-	if (current->replacement_session_keyring)
-		key_replace_session_keyring();
 }
diff --git a/arch/score/kernel/signal.c b/arch/score/kernel/signal.c
index d4a49011c48a..302838d3acf6 100644
--- a/arch/score/kernel/signal.c
+++ b/arch/score/kernel/signal.c
@@ -356,7 +356,5 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, void *unused,
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
-		if (current->replacement_session_keyring)
-			key_replace_session_keyring();
 	}
 }
diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c
index cb4172c8af7d..9d7bfd66f189 100644
--- a/arch/sh/kernel/signal_32.c
+++ b/arch/sh/kernel/signal_32.c
@@ -626,7 +626,5 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned int save_r0,
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
-		if (current->replacement_session_keyring)
-			key_replace_session_keyring();
 	}
 }
diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c
index b589a354c069..aa6428430842 100644
--- a/arch/sh/kernel/signal_64.c
+++ b/arch/sh/kernel/signal_64.c
@@ -685,7 +685,5 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned long thread_info
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
-		if (current->replacement_session_keyring)
-			key_replace_session_keyring();
 	}
 }
diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c
index 2b7e849f7c65..6b42e8622d12 100644
--- a/arch/sparc/kernel/signal_32.c
+++ b/arch/sparc/kernel/signal_32.c
@@ -590,8 +590,6 @@ void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0,
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
-		if (current->replacement_session_keyring)
-			key_replace_session_keyring();
 	}
 }
 
diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
index eafaab486b2d..c82cf1cc3965 100644
--- a/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c
@@ -607,8 +607,6 @@ void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0, unsigned long
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
-		if (current->replacement_session_keyring)
-			key_replace_session_keyring();
 	}
 }
 
diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c
index f572c19c4082..32817ab6062a 100644
--- a/arch/tile/kernel/process.c
+++ b/arch/tile/kernel/process.c
@@ -569,8 +569,6 @@ int do_work_pending(struct pt_regs *regs, u32 thread_info_flags)
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
-		if (current->replacement_session_keyring)
-			key_replace_session_keyring();
 		return 1;
 	}
 	if (thread_info_flags & _TIF_SINGLESTEP) {
diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
index 3a2235e0abc3..ccb9a9d283f1 100644
--- a/arch/um/kernel/process.c
+++ b/arch/um/kernel/process.c
@@ -117,11 +117,8 @@ void interrupt_end(void)
 		schedule();
 	if (test_thread_flag(TIF_SIGPENDING))
 		do_signal();
-	if (test_and_clear_thread_flag(TIF_NOTIFY_RESUME)) {
+	if (test_and_clear_thread_flag(TIF_NOTIFY_RESUME))
 		tracehook_notify_resume(&current->thread.regs);
-		if (current->replacement_session_keyring)
-			key_replace_session_keyring();
-	}
 }
 
 void exit_thread(void)
diff --git a/arch/unicore32/kernel/signal.c b/arch/unicore32/kernel/signal.c
index 7754df6ef7d4..28782ad47b93 100644
--- a/arch/unicore32/kernel/signal.c
+++ b/arch/unicore32/kernel/signal.c
@@ -464,8 +464,6 @@ asmlinkage void do_notify_resume(struct pt_regs *regs,
 	if (thread_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
-		if (current->replacement_session_keyring)
-			key_replace_session_keyring();
 	}
 }
 
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index b68ccadd2ff4..9363b58b967c 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -821,8 +821,6 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 	if (thread_info_flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
 		tracehook_notify_resume(regs);
-		if (current->replacement_session_keyring)
-			key_replace_session_keyring();
 	}
 	if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
 		fire_user_return_notifiers();
diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c
index c5e4ec0598d2..ea7e17778a75 100644
--- a/arch/xtensa/kernel/signal.c
+++ b/arch/xtensa/kernel/signal.c
@@ -548,9 +548,6 @@ void do_notify_resume(struct pt_regs *regs)
 	if (test_thread_flag(TIF_SIGPENDING))
 		do_signal(regs);
 
-	if (test_and_clear_thread_flag(TIF_NOTIFY_RESUME)) {
+	if (test_and_clear_thread_flag(TIF_NOTIFY_RESUME))
 		tracehook_notify_resume(regs);
-		if (current->replacement_session_keyring)
-			key_replace_session_keyring();
-	}
 }
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index 8a2a3fc9bd05..b9ca903bb553 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -183,6 +183,8 @@ static inline void set_notify_resume(struct task_struct *task)
  */
 static inline void tracehook_notify_resume(struct pt_regs *regs)
 {
+	if (current->replacement_session_keyring)
+		key_replace_session_keyring();
 }
 
 #endif	/* <linux/tracehook.h> */
-- 
cgit 


From e73f8959af0439d114847eab5a8a5ce48f1217c4 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 11 May 2012 10:59:07 +1000
Subject: task_work_add: generic process-context callbacks

Provide a simple mechanism that allows running code in the (nonatomic)
context of the arbitrary task.

The caller does task_work_add(task, task_work) and this task executes
task_work->func() either from do_notify_resume() or from do_exit().  The
callback can rely on PF_EXITING to detect the latter case.

"struct task_work" can be embedded in another struct, still it has "void
*data" to handle the most common/simple case.

This allows us to kill the ->replacement_session_keyring hack, and
potentially this can have more users.

Performance-wise, this adds 2 "unlikely(!hlist_empty())" checks into
tracehook_notify_resume() and do_exit().  But at the same time we can
remove the "replacement_session_keyring != NULL" checks from
arch/*/signal.c and exit_creds().

Note: task_work_add/task_work_run abuses ->pi_lock.  This is only because
this lock is already used by lookup_pi_state() to synchronize with
do_exit() setting PF_EXITING.  Fortunately the scope of this lock in
task_work.c is really tiny, and the code is unlikely anyway.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: David Howells <dhowells@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Alexander Gordeev <agordeev@redhat.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: David Smith <dsmith@redhat.com>
Cc: "Frank Ch. Eigler" <fche@redhat.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/sched.h     |  2 ++
 include/linux/task_work.h | 33 +++++++++++++++++++
 include/linux/tracehook.h | 11 +++++++
 kernel/Makefile           |  2 +-
 kernel/exit.c             |  5 ++-
 kernel/fork.c             |  1 +
 kernel/task_work.c        | 84 +++++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 136 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/task_work.h
 create mode 100644 kernel/task_work.c

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5ea8baea9387..7930131abc1a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1400,6 +1400,8 @@ struct task_struct {
 	int (*notifier)(void *priv);
 	void *notifier_data;
 	sigset_t *notifier_mask;
+	struct hlist_head task_works;
+
 	struct audit_context *audit_context;
 #ifdef CONFIG_AUDITSYSCALL
 	uid_t loginuid;
diff --git a/include/linux/task_work.h b/include/linux/task_work.h
new file mode 100644
index 000000000000..294d5d5e90b1
--- /dev/null
+++ b/include/linux/task_work.h
@@ -0,0 +1,33 @@
+#ifndef _LINUX_TASK_WORK_H
+#define _LINUX_TASK_WORK_H
+
+#include <linux/list.h>
+#include <linux/sched.h>
+
+struct task_work;
+typedef void (*task_work_func_t)(struct task_work *);
+
+struct task_work {
+	struct hlist_node hlist;
+	task_work_func_t func;
+	void *data;
+};
+
+static inline void
+init_task_work(struct task_work *twork, task_work_func_t func, void *data)
+{
+	twork->func = func;
+	twork->data = data;
+}
+
+int task_work_add(struct task_struct *task, struct task_work *twork, bool);
+struct task_work *task_work_cancel(struct task_struct *, task_work_func_t);
+void task_work_run(void);
+
+static inline void exit_task_work(struct task_struct *task)
+{
+	if (unlikely(!hlist_empty(&task->task_works)))
+		task_work_run();
+}
+
+#endif	/* _LINUX_TASK_WORK_H */
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index b9ca903bb553..b2dd0917ca0d 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -49,6 +49,7 @@
 #include <linux/sched.h>
 #include <linux/ptrace.h>
 #include <linux/security.h>
+#include <linux/task_work.h>
 struct linux_binprm;
 
 /*
@@ -164,8 +165,10 @@ static inline void tracehook_signal_handler(int sig, siginfo_t *info,
  */
 static inline void set_notify_resume(struct task_struct *task)
 {
+#ifdef TIF_NOTIFY_RESUME
 	if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_RESUME))
 		kick_process(task);
+#endif
 }
 
 /**
@@ -185,6 +188,14 @@ static inline void tracehook_notify_resume(struct pt_regs *regs)
 {
 	if (current->replacement_session_keyring)
 		key_replace_session_keyring();
+	/*
+	 * The caller just cleared TIF_NOTIFY_RESUME. This barrier
+	 * pairs with task_work_add()->set_notify_resume() after
+	 * hlist_add_head(task->task_works);
+	 */
+	smp_mb__after_clear_bit();
+	if (unlikely(!hlist_empty(&current->task_works)))
+		task_work_run();
 }
 
 #endif	/* <linux/tracehook.h> */
diff --git a/kernel/Makefile b/kernel/Makefile
index 6c07f30fa9b7..bf1034008aca 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -5,7 +5,7 @@
 obj-y     = fork.o exec_domain.o panic.o printk.o \
 	    cpu.o exit.o itimer.o time.o softirq.o resource.o \
 	    sysctl.o sysctl_binary.o capability.o ptrace.o timer.o user.o \
-	    signal.o sys.o kmod.o workqueue.o pid.o \
+	    signal.o sys.o kmod.o workqueue.o pid.o task_work.o \
 	    rcupdate.o extable.o params.o posix-timers.o \
 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
 	    hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
diff --git a/kernel/exit.c b/kernel/exit.c
index 910a0716e17a..3d93325e0b1a 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -946,11 +946,14 @@ void do_exit(long code)
 	exit_signals(tsk);  /* sets PF_EXITING */
 	/*
 	 * tsk->flags are checked in the futex code to protect against
-	 * an exiting task cleaning up the robust pi futexes.
+	 * an exiting task cleaning up the robust pi futexes, and in
+	 * task_work_add() to avoid the race with exit_task_work().
 	 */
 	smp_mb();
 	raw_spin_unlock_wait(&tsk->pi_lock);
 
+	exit_task_work(tsk);
+
 	exit_irq_thread();
 
 	if (unlikely(in_atomic()))
diff --git a/kernel/fork.c b/kernel/fork.c
index 05c813dc9ecc..a46db217a589 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1411,6 +1411,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	 */
 	p->group_leader = p;
 	INIT_LIST_HEAD(&p->thread_group);
+	INIT_HLIST_HEAD(&p->task_works);
 
 	/* Now that the task is set up, run cgroup callbacks if
 	 * necessary. We need to run them before the task is visible
diff --git a/kernel/task_work.c b/kernel/task_work.c
new file mode 100644
index 000000000000..82d1c794066d
--- /dev/null
+++ b/kernel/task_work.c
@@ -0,0 +1,84 @@
+#include <linux/spinlock.h>
+#include <linux/task_work.h>
+#include <linux/tracehook.h>
+
+int
+task_work_add(struct task_struct *task, struct task_work *twork, bool notify)
+{
+	unsigned long flags;
+	int err = -ESRCH;
+
+#ifndef TIF_NOTIFY_RESUME
+	if (notify)
+		return -ENOTSUPP;
+#endif
+	/*
+	 * We must not insert the new work if the task has already passed
+	 * exit_task_work(). We rely on do_exit()->raw_spin_unlock_wait()
+	 * and check PF_EXITING under pi_lock.
+	 */
+	raw_spin_lock_irqsave(&task->pi_lock, flags);
+	if (likely(!(task->flags & PF_EXITING))) {
+		hlist_add_head(&twork->hlist, &task->task_works);
+		err = 0;
+	}
+	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+
+	/* test_and_set_bit() implies mb(), see tracehook_notify_resume(). */
+	if (likely(!err) && notify)
+		set_notify_resume(task);
+	return err;
+}
+
+struct task_work *
+task_work_cancel(struct task_struct *task, task_work_func_t func)
+{
+	unsigned long flags;
+	struct task_work *twork;
+	struct hlist_node *pos;
+
+	raw_spin_lock_irqsave(&task->pi_lock, flags);
+	hlist_for_each_entry(twork, pos, &task->task_works, hlist) {
+		if (twork->func == func) {
+			hlist_del(&twork->hlist);
+			goto found;
+		}
+	}
+	twork = NULL;
+ found:
+	raw_spin_unlock_irqrestore(&task->pi_lock, flags);
+
+	return twork;
+}
+
+void task_work_run(void)
+{
+	struct task_struct *task = current;
+	struct hlist_head task_works;
+	struct hlist_node *pos;
+
+	raw_spin_lock_irq(&task->pi_lock);
+	hlist_move_list(&task->task_works, &task_works);
+	raw_spin_unlock_irq(&task->pi_lock);
+
+	if (unlikely(hlist_empty(&task_works)))
+		return;
+	/*
+	 * We use hlist to save the space in task_struct, but we want fifo.
+	 * Find the last entry, the list should be short, then process them
+	 * in reverse order.
+	 */
+	for (pos = task_works.first; pos->next; pos = pos->next)
+		;
+
+	for (;;) {
+		struct hlist_node **pprev = pos->pprev;
+		struct task_work *twork = container_of(pos, struct task_work,
+							hlist);
+		twork->func(twork);
+
+		if (pprev == &task_works.first)
+			break;
+		pos = container_of(pprev, struct hlist_node, next);
+	}
+}
-- 
cgit 


From 4d1d61a6b203d957777d73fcebf19d90b038b5b2 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 11 May 2012 10:59:08 +1000
Subject: genirq: reimplement exit_irq_thread() hook via task_work_add()

exit_irq_thread() and task->irq_thread are needed to handle the unexpected
(and unlikely) exit of irq-thread.

We can use task_work instead and make this all private to
kernel/irq/manage.c, cleanup plus micro-optimization.

1. rename exit_irq_thread() to irq_thread_dtor(), make it
   static, and move it up before irq_thread().

2. change irq_thread() to do task_work_add(irq_thread_dtor)
   at the start and task_work_cancel() before return.

   tracehook_notify_resume() can never play with kthreads,
   only do_exit()->exit_task_work() can call the callback
   and this is what we want.

3. remove task_struct->irq_thread and the special hook
   in do_exit().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Cc: David Howells <dhowells@redhat.com>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Alexander Gordeev <agordeev@redhat.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: David Smith <dsmith@redhat.com>
Cc: "Frank Ch. Eigler" <fche@redhat.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/interrupt.h |  4 ---
 include/linux/sched.h     | 10 ++-----
 kernel/exit.c             |  2 --
 kernel/irq/manage.c       | 68 +++++++++++++++++++++++------------------------
 4 files changed, 35 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index c91171599cb6..e68a8e53bb59 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -142,8 +142,6 @@ request_any_context_irq(unsigned int irq, irq_handler_t handler,
 extern int __must_check
 request_percpu_irq(unsigned int irq, irq_handler_t handler,
 		   const char *devname, void __percpu *percpu_dev_id);
-
-extern void exit_irq_thread(void);
 #else
 
 extern int __must_check
@@ -177,8 +175,6 @@ request_percpu_irq(unsigned int irq, irq_handler_t handler,
 {
 	return request_irq(irq, handler, 0, devname, percpu_dev_id);
 }
-
-static inline void exit_irq_thread(void) { }
 #endif
 
 extern void free_irq(unsigned int, void *);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7930131abc1a..da013853a622 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1301,11 +1301,6 @@ struct task_struct {
 	unsigned sched_reset_on_fork:1;
 	unsigned sched_contributes_to_load:1;
 
-#ifdef CONFIG_GENERIC_HARDIRQS
-	/* IRQ handler threads */
-	unsigned irq_thread:1;
-#endif
-
 	pid_t pid;
 	pid_t tgid;
 
@@ -1313,10 +1308,9 @@ struct task_struct {
 	/* Canary value for the -fstack-protector gcc feature */
 	unsigned long stack_canary;
 #endif
-
-	/* 
+	/*
 	 * pointers to (original) parent process, youngest child, younger sibling,
-	 * older sibling, respectively.  (p->father can be replaced with 
+	 * older sibling, respectively.  (p->father can be replaced with
 	 * p->real_parent->pid)
 	 */
 	struct task_struct __rcu *real_parent; /* real parent process */
diff --git a/kernel/exit.c b/kernel/exit.c
index 3d93325e0b1a..3ecd096e5d4d 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -954,8 +954,6 @@ void do_exit(long code)
 
 	exit_task_work(tsk);
 
-	exit_irq_thread();
-
 	if (unlikely(in_atomic()))
 		printk(KERN_INFO "note: %s[%d] exited with preempt_count %d\n",
 				current->comm, task_pid_nr(current),
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index bb32326afe87..4d1f8f897414 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -14,6 +14,7 @@
 #include <linux/interrupt.h>
 #include <linux/slab.h>
 #include <linux/sched.h>
+#include <linux/task_work.h>
 
 #include "internals.h"
 
@@ -773,11 +774,39 @@ static void wake_threads_waitq(struct irq_desc *desc)
 		wake_up(&desc->wait_for_threads);
 }
 
+static void irq_thread_dtor(struct task_work *unused)
+{
+	struct task_struct *tsk = current;
+	struct irq_desc *desc;
+	struct irqaction *action;
+
+	if (WARN_ON_ONCE(!(current->flags & PF_EXITING)))
+		return;
+
+	action = kthread_data(tsk);
+
+	pr_err("genirq: exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n",
+	       tsk->comm ? tsk->comm : "", tsk->pid, action->irq);
+
+
+	desc = irq_to_desc(action->irq);
+	/*
+	 * If IRQTF_RUNTHREAD is set, we need to decrement
+	 * desc->threads_active and wake possible waiters.
+	 */
+	if (test_and_clear_bit(IRQTF_RUNTHREAD, &action->thread_flags))
+		wake_threads_waitq(desc);
+
+	/* Prevent a stale desc->threads_oneshot */
+	irq_finalize_oneshot(desc, action);
+}
+
 /*
  * Interrupt handler thread
  */
 static int irq_thread(void *data)
 {
+	struct task_work on_exit_work;
 	static const struct sched_param param = {
 		.sched_priority = MAX_USER_RT_PRIO/2,
 	};
@@ -793,7 +822,9 @@ static int irq_thread(void *data)
 		handler_fn = irq_thread_fn;
 
 	sched_setscheduler(current, SCHED_FIFO, &param);
-	current->irq_thread = 1;
+
+	init_task_work(&on_exit_work, irq_thread_dtor, NULL);
+	task_work_add(current, &on_exit_work, false);
 
 	while (!irq_wait_for_interrupt(action)) {
 		irqreturn_t action_ret;
@@ -815,44 +846,11 @@ static int irq_thread(void *data)
 	 * cannot touch the oneshot mask at this point anymore as
 	 * __setup_irq() might have given out currents thread_mask
 	 * again.
-	 *
-	 * Clear irq_thread. Otherwise exit_irq_thread() would make
-	 * fuzz about an active irq thread going into nirvana.
 	 */
-	current->irq_thread = 0;
+	task_work_cancel(current, irq_thread_dtor);
 	return 0;
 }
 
-/*
- * Called from do_exit()
- */
-void exit_irq_thread(void)
-{
-	struct task_struct *tsk = current;
-	struct irq_desc *desc;
-	struct irqaction *action;
-
-	if (!tsk->irq_thread)
-		return;
-
-	action = kthread_data(tsk);
-
-	pr_err("genirq: exiting task \"%s\" (%d) is an active IRQ thread (irq %d)\n",
-	       tsk->comm ? tsk->comm : "", tsk->pid, action->irq);
-
-	desc = irq_to_desc(action->irq);
-
-	/*
-	 * If IRQTF_RUNTHREAD is set, we need to decrement
-	 * desc->threads_active and wake possible waiters.
-	 */
-	if (test_and_clear_bit(IRQTF_RUNTHREAD, &action->thread_flags))
-		wake_threads_waitq(desc);
-
-	/* Prevent a stale desc->threads_oneshot */
-	irq_finalize_oneshot(desc, action);
-}
-
 static void irq_setup_forced_threading(struct irqaction *new)
 {
 	if (!force_irqthreads)
-- 
cgit 


From 413cd3d9abeaef590e5ce00564f7a443165db238 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 11 May 2012 10:59:08 +1000
Subject: keys: change keyctl_session_to_parent() to use task_work_add()

Change keyctl_session_to_parent() to use task_work_add() and move
key_replace_session_keyring() logic into task_work->func().

Note that we do task_work_cancel() before task_work_add() to ensure that
only one work can be pending at any time.  This is important, we must not
allow user-space to abuse the parent's ->task_works list.

The callback, replace_session_keyring(), checks PF_EXITING.  I guess this
is not really needed but looks better.

As a side effect, this fixes the (unlikely) race.  The callers of
key_replace_session_keyring() and keyctl_session_to_parent() lack the
necessary barriers, the parent can miss the request.

Now we can remove task_struct->replacement_session_keyring and related
code.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: David Howells <dhowells@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Alexander Gordeev <agordeev@redhat.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: David Smith <dsmith@redhat.com>
Cc: "Frank Ch. Eigler" <fche@redhat.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/key.h          |  6 ++---
 security/keys/internal.h     |  2 ++
 security/keys/keyctl.c       | 63 ++++++++++++++++++++++++--------------------
 security/keys/process_keys.c | 20 +++++---------
 4 files changed, 46 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/key.h b/include/linux/key.h
index 5231800770e1..2a0ee11584e9 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -33,6 +33,8 @@ typedef uint32_t key_perm_t;
 
 struct key;
 
+#define key_replace_session_keyring()	do { } while (0)
+
 #ifdef CONFIG_KEYS
 
 #undef KEY_DEBUGGING
@@ -308,9 +310,6 @@ static inline bool key_is_instantiated(const struct key *key)
 #ifdef CONFIG_SYSCTL
 extern ctl_table key_sysctls[];
 #endif
-
-extern void key_replace_session_keyring(void);
-
 /*
  * the userspace interface
  */
@@ -334,7 +333,6 @@ extern void key_init(void);
 #define key_fsuid_changed(t)		do { } while(0)
 #define key_fsgid_changed(t)		do { } while(0)
 #define key_init()			do { } while(0)
-#define key_replace_session_keyring()	do { } while(0)
 
 #endif /* CONFIG_KEYS */
 #endif /* __KERNEL__ */
diff --git a/security/keys/internal.h b/security/keys/internal.h
index f711b094ed41..3dcbf86b0d31 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -14,6 +14,7 @@
 
 #include <linux/sched.h>
 #include <linux/key-type.h>
+#include <linux/task_work.h>
 
 #ifdef __KDEBUG
 #define kenter(FMT, ...) \
@@ -148,6 +149,7 @@ extern key_ref_t lookup_user_key(key_serial_t id, unsigned long flags,
 #define KEY_LOOKUP_FOR_UNLINK	0x04
 
 extern long join_session_keyring(const char *name);
+extern void key_change_session_keyring(struct task_work *twork);
 
 extern struct work_struct key_gc_work;
 extern unsigned key_gc_delay;
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 534a634283a4..2f28126215a2 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -1456,47 +1456,55 @@ long keyctl_session_to_parent(void)
 {
 	struct task_struct *me, *parent;
 	const struct cred *mycred, *pcred;
-	struct cred *cred, *oldcred;
+	struct task_work *newwork, *oldwork;
 	key_ref_t keyring_r;
+	struct cred *cred;
 	int ret;
 
 	keyring_r = lookup_user_key(KEY_SPEC_SESSION_KEYRING, 0, KEY_LINK);
 	if (IS_ERR(keyring_r))
 		return PTR_ERR(keyring_r);
 
+	ret = -ENOMEM;
+	newwork = kmalloc(sizeof(struct task_work), GFP_KERNEL);
+	if (!newwork)
+		goto error_keyring;
+
 	/* our parent is going to need a new cred struct, a new tgcred struct
 	 * and new security data, so we allocate them here to prevent ENOMEM in
 	 * our parent */
-	ret = -ENOMEM;
 	cred = cred_alloc_blank();
 	if (!cred)
-		goto error_keyring;
+		goto error_newwork;
 
 	cred->tgcred->session_keyring = key_ref_to_ptr(keyring_r);
-	keyring_r = NULL;
+	init_task_work(newwork, key_change_session_keyring, cred);
 
 	me = current;
 	rcu_read_lock();
 	write_lock_irq(&tasklist_lock);
 
-	parent = me->real_parent;
 	ret = -EPERM;
+	oldwork = NULL;
+	parent = me->real_parent;
 
 	/* the parent mustn't be init and mustn't be a kernel thread */
 	if (parent->pid <= 1 || !parent->mm)
-		goto not_permitted;
+		goto unlock;
 
 	/* the parent must be single threaded */
 	if (!thread_group_empty(parent))
-		goto not_permitted;
+		goto unlock;
 
 	/* the parent and the child must have different session keyrings or
 	 * there's no point */
 	mycred = current_cred();
 	pcred = __task_cred(parent);
 	if (mycred == pcred ||
-	    mycred->tgcred->session_keyring == pcred->tgcred->session_keyring)
-		goto already_same;
+	    mycred->tgcred->session_keyring == pcred->tgcred->session_keyring) {
+		ret = 0;
+		goto unlock;
+	}
 
 	/* the parent must have the same effective ownership and mustn't be
 	 * SUID/SGID */
@@ -1506,38 +1514,37 @@ long keyctl_session_to_parent(void)
 	    pcred->gid	!= mycred->egid	||
 	    pcred->egid	!= mycred->egid	||
 	    pcred->sgid	!= mycred->egid)
-		goto not_permitted;
+		goto unlock;
 
 	/* the keyrings must have the same UID */
 	if ((pcred->tgcred->session_keyring &&
 	     pcred->tgcred->session_keyring->uid != mycred->euid) ||
 	    mycred->tgcred->session_keyring->uid != mycred->euid)
-		goto not_permitted;
+		goto unlock;
 
-	/* if there's an already pending keyring replacement, then we replace
-	 * that */
-	oldcred = parent->replacement_session_keyring;
+	/* cancel an already pending keyring replacement */
+	oldwork = task_work_cancel(parent, key_change_session_keyring);
 
 	/* the replacement session keyring is applied just prior to userspace
 	 * restarting */
-	parent->replacement_session_keyring = cred;
-	cred = NULL;
-	set_ti_thread_flag(task_thread_info(parent), TIF_NOTIFY_RESUME);
-
-	write_unlock_irq(&tasklist_lock);
-	rcu_read_unlock();
-	if (oldcred)
-		put_cred(oldcred);
-	return 0;
-
-already_same:
-	ret = 0;
-not_permitted:
+	ret = task_work_add(parent, newwork, true);
+	if (!ret)
+		newwork = NULL;
+unlock:
 	write_unlock_irq(&tasklist_lock);
 	rcu_read_unlock();
-	put_cred(cred);
+	if (oldwork) {
+		put_cred(oldwork->data);
+		kfree(oldwork);
+	}
+	if (newwork) {
+		put_cred(newwork->data);
+		kfree(newwork);
+	}
 	return ret;
 
+error_newwork:
+	kfree(newwork);
 error_keyring:
 	key_ref_put(keyring_r);
 	return ret;
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index d71056db7b67..4ad54eea1ea4 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -834,23 +834,17 @@ error:
  * Replace a process's session keyring on behalf of one of its children when
  * the target  process is about to resume userspace execution.
  */
-void key_replace_session_keyring(void)
+void key_change_session_keyring(struct task_work *twork)
 {
-	const struct cred *old;
-	struct cred *new;
-
-	if (!current->replacement_session_keyring)
-		return;
+	const struct cred *old = current_cred();
+	struct cred *new = twork->data;
 
-	write_lock_irq(&tasklist_lock);
-	new = current->replacement_session_keyring;
-	current->replacement_session_keyring = NULL;
-	write_unlock_irq(&tasklist_lock);
-
-	if (!new)
+	kfree(twork);
+	if (unlikely(current->flags & PF_EXITING)) {
+		put_cred(new);
 		return;
+	}
 
-	old = current_cred();
 	new->  uid	= old->  uid;
 	new-> euid	= old-> euid;
 	new-> suid	= old-> suid;
-- 
cgit 


From dea649b8ac1861107c5d91e1a71121434fc64193 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 11 May 2012 10:59:09 +1000
Subject: keys: kill the dummy key_replace_session_keyring()

After the previouse change key_replace_session_keyring() becomes a nop.
Remove the dummy definition in key.h and update the callers in
arch/*/kernel/signal.c.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: David Howells <dhowells@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Alexander Gordeev <agordeev@redhat.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: David Smith <dsmith@redhat.com>
Cc: "Frank Ch. Eigler" <fche@redhat.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/key.h       | 2 --
 include/linux/tracehook.h | 2 --
 2 files changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/key.h b/include/linux/key.h
index 2a0ee11584e9..4cd22ed627ef 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -33,8 +33,6 @@ typedef uint32_t key_perm_t;
 
 struct key;
 
-#define key_replace_session_keyring()	do { } while (0)
-
 #ifdef CONFIG_KEYS
 
 #undef KEY_DEBUGGING
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index b2dd0917ca0d..6a4d82bedb03 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -186,8 +186,6 @@ static inline void set_notify_resume(struct task_struct *task)
  */
 static inline void tracehook_notify_resume(struct pt_regs *regs)
 {
-	if (current->replacement_session_keyring)
-		key_replace_session_keyring();
 	/*
 	 * The caller just cleared TIF_NOTIFY_RESUME. This barrier
 	 * pairs with task_work_add()->set_notify_resume() after
-- 
cgit 


From f23ca335462e3c84f13270b9e65f83936068ec2c Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 11 May 2012 10:59:09 +1000
Subject: keys: kill task_struct->replacement_session_keyring

Kill the no longer used task_struct->replacement_session_keyring, update
copy_creds() and exit_creds().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: David Howells <dhowells@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Richard Kuo <rkuo@codeaurora.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Alexander Gordeev <agordeev@redhat.com>
Cc: Chris Zankel <chris@zankel.net>
Cc: David Smith <dsmith@redhat.com>
Cc: "Frank Ch. Eigler" <fche@redhat.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Larry Woodman <lwoodman@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/sched.h | 2 --
 kernel/cred.c         | 9 ---------
 2 files changed, 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index da013853a622..17c6c929ee94 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1357,8 +1357,6 @@ struct task_struct {
 					 * credentials (COW) */
 	const struct cred __rcu *cred;	/* effective (overridable) subjective task
 					 * credentials (COW) */
-	struct cred *replacement_session_keyring; /* for KEYCTL_SESSION_TO_PARENT */
-
 	char comm[TASK_COMM_LEN]; /* executable name excluding path
 				     - access with [gs]et_task_comm (which lock
 				       it with task_lock())
diff --git a/kernel/cred.c b/kernel/cred.c
index 430557ea488f..de728ac50d82 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -207,13 +207,6 @@ void exit_creds(struct task_struct *tsk)
 	validate_creds(cred);
 	alter_cred_subscribers(cred, -1);
 	put_cred(cred);
-
-	cred = (struct cred *) tsk->replacement_session_keyring;
-	if (cred) {
-		tsk->replacement_session_keyring = NULL;
-		validate_creds(cred);
-		put_cred(cred);
-	}
 }
 
 /**
@@ -396,8 +389,6 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags)
 	struct cred *new;
 	int ret;
 
-	p->replacement_session_keyring = NULL;
-
 	if (
 #ifdef CONFIG_KEYS
 		!p->cred->thread_keyring &&
-- 
cgit 


From e5400321a6f15ce0fe77c8455954f213ef7dcc54 Mon Sep 17 00:00:00 2001
From: Magnus Damm <magnus.damm@gmail.com>
Date: Wed, 9 May 2012 23:39:34 +0900
Subject: clockevents: Make clockevents_config() a global symbol

Make clockevents_config() into a global symbol to allow it to be used
by compiled-in clockevent drivers. This is needed by drivers that want
to update the timer frequency after registration time.

Signed-off-by: Magnus Damm <damm@opensource.se>
Tested-by: Simon Horman <horms@verge.net.au>
Cc: arnd@arndb.de
Cc: johnstul@us.ibm.com
Cc: rjw@sisk.pl
Cc: lethal@linux-sh.org
Cc: gregkh@linuxfoundation.org
Cc: olof@lixom.net
Cc: Magnus Damm <magnus.damm@gmail.com>
Link: http://lkml.kernel.org/r/20120509143934.27521.46553.sendpatchset@w520
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clockchips.h | 1 +
 kernel/time/clockevents.c  | 3 +--
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h
index 81e803e90aa4..acba894374a1 100644
--- a/include/linux/clockchips.h
+++ b/include/linux/clockchips.h
@@ -132,6 +132,7 @@ extern u64 clockevent_delta2ns(unsigned long latch,
 			       struct clock_event_device *evt);
 extern void clockevents_register_device(struct clock_event_device *dev);
 
+extern void clockevents_config(struct clock_event_device *dev, u32 freq);
 extern void clockevents_config_and_register(struct clock_event_device *dev,
 					    u32 freq, unsigned long min_delta,
 					    unsigned long max_delta);
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 9cd928f7a7c6..7e1ce012a851 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -297,8 +297,7 @@ void clockevents_register_device(struct clock_event_device *dev)
 }
 EXPORT_SYMBOL_GPL(clockevents_register_device);
 
-static void clockevents_config(struct clock_event_device *dev,
-			       u32 freq)
+void clockevents_config(struct clock_event_device *dev, u32 freq)
 {
 	u64 sec;
 
-- 
cgit 


From 01b5adcebb977bc61b64167adce6d8260c9da33c Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@us.ibm.com>
Date: Sun, 27 May 2012 07:50:56 -0400
Subject: jbd2: Grab a reference to the crc32c driver if necessary

Obtain a reference to the crc32c driver if needed for the v2 checksum.

Signed-off-by: Darrick J. Wong <djwong@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/Kconfig      |  2 ++
 fs/jbd2/journal.c    | 25 +++++++++++++++++++++++++
 include/linux/jbd2.h | 23 +++++++++++++++++++++++
 3 files changed, 50 insertions(+)

(limited to 'include/linux')

diff --git a/fs/jbd2/Kconfig b/fs/jbd2/Kconfig
index f32f346f4b0a..69a48c2944da 100644
--- a/fs/jbd2/Kconfig
+++ b/fs/jbd2/Kconfig
@@ -1,6 +1,8 @@
 config JBD2
 	tristate
 	select CRC32
+	select CRYPTO
+	select CRYPTO_CRC32C
 	help
 	  This is a generic journaling layer for block devices that support
 	  both 32-bit and 64-bit block numbers.  It is currently used by
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 63175f9391ab..f04ab6c4b428 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1438,6 +1438,17 @@ static int journal_get_superblock(journal_t *journal)
 		goto out;
 	}
 
+	/* Load the checksum driver */
+	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) {
+		journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
+		if (IS_ERR(journal->j_chksum_driver)) {
+			printk(KERN_ERR "JBD: Cannot load crc32c driver.\n");
+			err = PTR_ERR(journal->j_chksum_driver);
+			journal->j_chksum_driver = NULL;
+			goto out;
+		}
+	}
+
 	set_buffer_verified(bh);
 
 	return 0;
@@ -1591,6 +1602,8 @@ int jbd2_journal_destroy(journal_t *journal)
 		iput(journal->j_inode);
 	if (journal->j_revoke)
 		jbd2_journal_destroy_revoke(journal);
+	if (journal->j_chksum_driver)
+		crypto_free_shash(journal->j_chksum_driver);
 	kfree(journal->j_wbuf);
 	kfree(journal);
 
@@ -1707,6 +1720,18 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
 		sb->s_checksum_type = JBD2_CRC32C_CHKSUM;
 		sb->s_feature_compat &=
 			~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM);
+
+		/* Load the checksum driver */
+		if (journal->j_chksum_driver == NULL) {
+			journal->j_chksum_driver = crypto_alloc_shash("crc32c",
+								      0, 0);
+			if (IS_ERR(journal->j_chksum_driver)) {
+				printk(KERN_ERR "JBD: Cannot load crc32c "
+				       "driver.\n");
+				journal->j_chksum_driver = NULL;
+				return 0;
+			}
+		}
 	}
 
 	/* If enabling v1 checksums, downgrade superblock */
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 809c439066c5..71e77dddebf1 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -31,6 +31,7 @@
 #include <linux/mutex.h>
 #include <linux/timer.h>
 #include <linux/slab.h>
+#include <crypto/hash.h>
 #endif
 
 #define journal_oom_retry 1
@@ -965,6 +966,9 @@ struct journal_s
 	 * superblock pointer here
 	 */
 	void *j_private;
+
+	/* Reference to checksum algorithm driver via cryptoapi */
+	struct crypto_shash *j_chksum_driver;
 };
 
 /*
@@ -1294,6 +1298,25 @@ static inline int jbd_space_needed(journal_t *journal)
 
 extern int jbd_blocks_per_page(struct inode *inode);
 
+static inline u32 jbd2_chksum(journal_t *journal, u32 crc,
+			      const void *address, unsigned int length)
+{
+	struct {
+		struct shash_desc shash;
+		char ctx[crypto_shash_descsize(journal->j_chksum_driver)];
+	} desc;
+	int err;
+
+	desc.shash.tfm = journal->j_chksum_driver;
+	desc.shash.flags = 0;
+	*(u32 *)desc.ctx = crc;
+
+	err = crypto_shash_update(&desc.shash, address, length);
+	BUG_ON(err);
+
+	return *(u32 *)desc.ctx;
+}
+
 #ifdef __KERNEL__
 
 #define buffer_trace_init(bh)	do {} while (0)
-- 
cgit 


From 4fd5ea43bc11602bfabe2c8f5378586d34bd2b0a Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@us.ibm.com>
Date: Sun, 27 May 2012 08:08:22 -0400
Subject: jbd2: checksum journal superblock

Calculate and verify a checksum covering the journal superblock.

Signed-off-by: Darrick J. Wong <djwong@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/jbd2/journal.c    | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/jbd2.h |  3 +++
 2 files changed, 50 insertions(+)

(limited to 'include/linux')

diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index f04ab6c4b428..9072f03d30b5 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -106,6 +106,34 @@ int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb)
 	return sb->s_checksum_type == JBD2_CRC32C_CHKSUM;
 }
 
+static __u32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb)
+{
+	__u32 csum, old_csum;
+
+	old_csum = sb->s_checksum;
+	sb->s_checksum = 0;
+	csum = jbd2_chksum(j, ~0, (char *)sb, sizeof(journal_superblock_t));
+	sb->s_checksum = old_csum;
+
+	return cpu_to_be32(csum);
+}
+
+int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb)
+{
+	if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+		return 1;
+
+	return sb->s_checksum == jbd2_superblock_csum(j, sb);
+}
+
+void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb)
+{
+	if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+		return;
+
+	sb->s_checksum = jbd2_superblock_csum(j, sb);
+}
+
 /*
  * Helper function used to manage commit timeouts
  */
@@ -1357,6 +1385,7 @@ static void jbd2_journal_update_sb_errno(journal_t *journal)
 	jbd_debug(1, "JBD2: updating superblock error (errno %d)\n",
 		  journal->j_errno);
 	sb->s_errno    = cpu_to_be32(journal->j_errno);
+	jbd2_superblock_csum_set(journal, sb);
 	read_unlock(&journal->j_state_lock);
 
 	jbd2_write_superblock(journal, WRITE_SYNC);
@@ -1449,6 +1478,17 @@ static int journal_get_superblock(journal_t *journal)
 		}
 	}
 
+	/* Check superblock checksum */
+	if (!jbd2_superblock_csum_verify(journal, sb)) {
+		printk(KERN_ERR "JBD: journal checksum error\n");
+		goto out;
+	}
+
+	/* Precompute checksum seed for all metadata */
+	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+		journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
+						   sizeof(sb->s_uuid));
+
 	set_buffer_verified(bh);
 
 	return 0;
@@ -1732,6 +1772,13 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
 				return 0;
 			}
 		}
+
+		/* Precompute checksum seed for all metadata */
+		if (JBD2_HAS_INCOMPAT_FEATURE(journal,
+					      JBD2_FEATURE_INCOMPAT_CSUM_V2))
+			journal->j_csum_seed = jbd2_chksum(journal, ~0,
+							   sb->s_uuid,
+							   sizeof(sb->s_uuid));
 	}
 
 	/* If enabling v1 checksums, downgrade superblock */
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 71e77dddebf1..a9632bc55d97 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -969,6 +969,9 @@ struct journal_s
 
 	/* Reference to checksum algorithm driver via cryptoapi */
 	struct crypto_shash *j_chksum_driver;
+
+	/* Precomputed journal UUID checksum for seeding other checksums */
+	__u32 j_csum_seed;
 };
 
 /*
-- 
cgit 


From e93376c20b70d1e62bb3246acd1bbe21fe58859f Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@us.ibm.com>
Date: Sun, 27 May 2012 08:12:42 -0400
Subject: ext4/jbd2: add metadata checksumming to the list of supported
 features

Activate the metadata checksumming feature by adding it to ext4 and
jbd2's lists of supported features.

Signed-off-by: Darrick J. Wong <djwong@us.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/ext4.h       | 3 ++-
 include/linux/jbd2.h | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0ee9669394f5..d1e2735ac22f 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1494,7 +1494,8 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
 					 EXT4_FEATURE_RO_COMPAT_EXTRA_ISIZE | \
 					 EXT4_FEATURE_RO_COMPAT_BTREE_DIR |\
 					 EXT4_FEATURE_RO_COMPAT_HUGE_FILE |\
-					 EXT4_FEATURE_RO_COMPAT_BIGALLOC)
+					 EXT4_FEATURE_RO_COMPAT_BIGALLOC |\
+					 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)
 
 /*
  * Default values for user and/or group using reserved blocks
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index a9632bc55d97..f334c7fab967 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -296,7 +296,8 @@ typedef struct journal_superblock_s
 #define JBD2_KNOWN_ROCOMPAT_FEATURES	0
 #define JBD2_KNOWN_INCOMPAT_FEATURES	(JBD2_FEATURE_INCOMPAT_REVOKE | \
 					JBD2_FEATURE_INCOMPAT_64BIT | \
-					JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)
+					JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT | \
+					JBD2_FEATURE_INCOMPAT_CSUM_V2)
 
 #ifdef __KERNEL__
 
-- 
cgit 


From 617c8c11236716dcbda877e764b7bf37c6fd8063 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Tue, 29 May 2012 03:35:08 +0000
Subject: skb: avoid unnecessary reallocations in __skb_cow

At the beginning of __skb_cow, headroom gets set to a minimum of
NET_SKB_PAD. This causes unnecessary reallocations if the buffer was not
cloned and the headroom is just below NET_SKB_PAD, but still more than the
amount requested by the caller.
This was showing up frequently in my tests on VLAN tx, where
vlan_insert_tag calls skb_cow_head(skb, VLAN_HLEN).

Locally generated packets should have enough headroom, and for forward
paths, we already have NET_SKB_PAD bytes of headroom, so we don't need to
add any extra space here.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 0e501714d47f..b534a1be540a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1896,8 +1896,6 @@ static inline int __skb_cow(struct sk_buff *skb, unsigned int headroom,
 {
 	int delta = 0;
 
-	if (headroom < NET_SKB_PAD)
-		headroom = NET_SKB_PAD;
 	if (headroom > skb_headroom(skb))
 		delta = headroom - skb_headroom(skb);
 
-- 
cgit 


From 2033e9bf06f07e049bbc77e9452856df846714cc Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Tue, 29 May 2012 09:30:40 +0000
Subject: net: add MODULE_ALIAS_NET_PF_PROTO_NAME

The MODULE_ALAIS_NET_PF macro set is missing a variant that allows for the
appending of an arbitrary string to the net-pf-<x>-proto-<y> base.  while
MODULE_ALIAS_NET_PF_PROTO_NAME_TYPE allows an appending of a numerical type, we
need to be able to append a generic string to support generic netlink families
that have neither a fix numberical protocol nor type number

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: Eric Dumazet <eric.dumazet@gmail.com>
CC: David Miller <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index 2d7510f38934..e9ac2df079ba 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -313,5 +313,8 @@ extern int kernel_sock_shutdown(struct socket *sock,
 	MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto) \
 		     "-type-" __stringify(type))
 
+#define MODULE_ALIAS_NET_PF_PROTO_NAME(pf, proto, name) \
+	MODULE_ALIAS("net-pf-" __stringify(pf) "-proto-" __stringify(proto) \
+		     name)
 #endif /* __KERNEL__ */
 #endif	/* _LINUX_NET_H */
-- 
cgit 


From e9412c37082b5c932e83364aaed0c38c2ce33acb Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@tuxdriver.com>
Date: Tue, 29 May 2012 09:30:41 +0000
Subject: genetlink: Build a generic netlink family module alias

Generic netlink searches for -type- formatted aliases when requesting a module to
fulfill a protocol request (i.e. net-pf-16-proto-16-type-<x>, where x is a type
value).  However generic netlink protocols have no well defined type numbers,
they have string names.  Modify genl_ctrl_getfamily to request an alias in the
format net-pf-16-proto-16-family-<x> instead, where x is a generic string, and
add a macro that builds on the previously added MODULE_ALIAS_NET_PF_PROTO_NAME
macro to allow modules to specifify those generic strings.

Note, l2tp previously hacked together an net-pf-16-proto-16-type-l2tp alias
using the MODULE_ALIAS macro, with these updates we can convert that to use the
PROTO_NAME macro.

Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
CC: Eric Dumazet <eric.dumazet@gmail.com>
CC: James Chapman <jchapman@katalix.com>
CC: David Miller <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/genetlink.h | 3 +++
 net/l2tp/l2tp_netlink.c   | 3 +--
 net/netlink/genetlink.c   | 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h
index 73c28dea10ae..7a114016ac7d 100644
--- a/include/linux/genetlink.h
+++ b/include/linux/genetlink.h
@@ -110,6 +110,9 @@ extern int lockdep_genl_is_held(void);
 #define genl_dereference(p)					\
 	rcu_dereference_protected(p, lockdep_genl_is_held())
 
+#define MODULE_ALIAS_GENL_FAMILY(family)\
+ MODULE_ALIAS_NET_PF_PROTO_NAME(PF_NETLINK, NETLINK_GENERIC, "-family-" family)
+
 #endif /* __KERNEL__ */
 
 #endif	/* __LINUX_GENERIC_NETLINK_H */
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 8577264378fe..ddc553e76671 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -923,5 +923,4 @@ MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
 MODULE_DESCRIPTION("L2TP netlink");
 MODULE_LICENSE("GPL");
 MODULE_VERSION("1.0");
-MODULE_ALIAS("net-pf-" __stringify(PF_NETLINK) "-proto-" \
-	     __stringify(NETLINK_GENERIC) "-type-" "l2tp");
+MODULE_ALIAS_GENL_FAMILY("l2tp");
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 8340ace837f2..2cc7c1ee7690 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -836,7 +836,7 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info)
 #ifdef CONFIG_MODULES
 		if (res == NULL) {
 			genl_unlock();
-			request_module("net-pf-%d-proto-%d-type-%s",
+			request_module("net-pf-%d-proto-%d-family-%s",
 				       PF_NETLINK, NETLINK_GENERIC, name);
 			genl_lock();
 			res = genl_family_find_byname(name);
-- 
cgit 


From b0b0382bb4904965a9e9fca77ad87514dfda0d1c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 2 Apr 2012 14:34:06 -0400
Subject: ->encode_fh() API change

pass inode + parent's inode or NULL instead of dentry + bool saying
whether we want the parent or not.

NOTE: that needs ceph fix folded in.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/export.c        | 15 ++++-----------
 fs/ceph/export.c         |  2 ++
 fs/exportfs/expfs.c      | 33 +++++++++++++++++++--------------
 fs/fat/inode.c           |  9 ++++-----
 fs/fuse/inode.c          | 17 +++++------------
 fs/gfs2/export.c         | 17 +++++------------
 fs/isofs/export.c        | 13 ++++---------
 fs/nilfs2/namei.c        | 22 ++++++++++------------
 fs/ocfs2/export.c        | 19 +++++++------------
 fs/reiserfs/inode.c      | 30 ++++++++++++------------------
 fs/reiserfs/reiserfs.h   |  4 ++--
 fs/udf/namei.c           | 14 +++++---------
 fs/xfs/xfs_export.c      | 23 +++++++++--------------
 include/linux/exportfs.h |  4 ++--
 mm/cleancache.c          |  6 ++----
 mm/shmem.c               |  6 ++----
 16 files changed, 94 insertions(+), 140 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index e887ee62b6d4..614f34a899c2 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -13,15 +13,14 @@
 					     parent_root_objectid) / 4)
 #define BTRFS_FID_SIZE_CONNECTABLE_ROOT (sizeof(struct btrfs_fid) / 4)
 
-static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
-			   int connectable)
+static int btrfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
+			   struct inode *parent)
 {
 	struct btrfs_fid *fid = (struct btrfs_fid *)fh;
-	struct inode *inode = dentry->d_inode;
 	int len = *max_len;
 	int type;
 
-	if (connectable && (len < BTRFS_FID_SIZE_CONNECTABLE)) {
+	if (parent && (len < BTRFS_FID_SIZE_CONNECTABLE)) {
 		*max_len = BTRFS_FID_SIZE_CONNECTABLE;
 		return 255;
 	} else if (len < BTRFS_FID_SIZE_NON_CONNECTABLE) {
@@ -36,19 +35,13 @@ static int btrfs_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
 	fid->root_objectid = BTRFS_I(inode)->root->objectid;
 	fid->gen = inode->i_generation;
 
-	if (connectable && !S_ISDIR(inode->i_mode)) {
-		struct inode *parent;
+	if (parent) {
 		u64 parent_root_id;
 
-		spin_lock(&dentry->d_lock);
-
-		parent = dentry->d_parent->d_inode;
 		fid->parent_objectid = BTRFS_I(parent)->location.objectid;
 		fid->parent_gen = parent->i_generation;
 		parent_root_id = BTRFS_I(parent)->root->objectid;
 
-		spin_unlock(&dentry->d_lock);
-
 		if (parent_root_id != fid->root_objectid) {
 			fid->parent_root_objectid = parent_root_id;
 			len = BTRFS_FID_SIZE_CONNECTABLE_ROOT;
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index fbb2a643ef10..4f9234c6da61 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -247,7 +247,9 @@ static struct dentry *ceph_fh_to_parent(struct super_block *sb,
 }
 
 const struct export_operations ceph_export_ops = {
+#ifdef CEPH_BREAKAGE_FIXED
 	.encode_fh = ceph_encode_fh,
+#endif
 	.fh_to_dentry = ceph_fh_to_dentry,
 	.fh_to_parent = ceph_fh_to_parent,
 };
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index b05acb796135..b0201ca6e9c6 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -304,24 +304,23 @@ out:
 
 /**
  * export_encode_fh - default export_operations->encode_fh function
- * @dentry:  the dentry to encode
+ * @inode:   the object to encode
  * @fh:      where to store the file handle fragment
  * @max_len: maximum length to store there
- * @connectable: whether to store parent information
+ * @parent:  parent directory inode, if wanted
  *
  * This default encode_fh function assumes that the 32 inode number
  * is suitable for locating an inode, and that the generation number
  * can be used to check that it is still valid.  It places them in the
  * filehandle fragment where export_decode_fh expects to find them.
  */
-static int export_encode_fh(struct dentry *dentry, struct fid *fid,
-		int *max_len, int connectable)
+static int export_encode_fh(struct inode *inode, struct fid *fid,
+		int *max_len, struct inode *parent)
 {
-	struct inode * inode = dentry->d_inode;
 	int len = *max_len;
 	int type = FILEID_INO32_GEN;
 
-	if (connectable && (len < 4)) {
+	if (parent && (len < 4)) {
 		*max_len = 4;
 		return 255;
 	} else if (len < 2) {
@@ -332,14 +331,9 @@ static int export_encode_fh(struct dentry *dentry, struct fid *fid,
 	len = 2;
 	fid->i32.ino = inode->i_ino;
 	fid->i32.gen = inode->i_generation;
-	if (connectable && !S_ISDIR(inode->i_mode)) {
-		struct inode *parent;
-
-		spin_lock(&dentry->d_lock);
-		parent = dentry->d_parent->d_inode;
+	if (parent) {
 		fid->i32.parent_ino = parent->i_ino;
 		fid->i32.parent_gen = parent->i_generation;
-		spin_unlock(&dentry->d_lock);
 		len = 4;
 		type = FILEID_INO32_GEN_PARENT;
 	}
@@ -352,11 +346,22 @@ int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len,
 {
 	const struct export_operations *nop = dentry->d_sb->s_export_op;
 	int error;
+	struct dentry *p = NULL;
+	struct inode *inode = dentry->d_inode, *parent = NULL;
 
+	if (connectable && !S_ISDIR(inode->i_mode)) {
+		p = dget_parent(dentry);
+		/*
+		 * note that while p might've ceased to be our parent already,
+		 * it's still pinned by and still positive.
+		 */
+		parent = p->d_inode;
+	}
 	if (nop->encode_fh)
-		error = nop->encode_fh(dentry, fid->raw, max_len, connectable);
+		error = nop->encode_fh(inode, fid->raw, max_len, parent);
 	else
-		error = export_encode_fh(dentry, fid, max_len, connectable);
+		error = export_encode_fh(inode, fid, max_len, parent);
+	dput(p);
 
 	return error;
 }
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index b3d290c1b513..7edfaadc0787 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -752,10 +752,9 @@ static struct dentry *fat_fh_to_dentry(struct super_block *sb,
 }
 
 static int
-fat_encode_fh(struct dentry *de, __u32 *fh, int *lenp, int connectable)
+fat_encode_fh(struct inode *inode, __u32 *fh, int *lenp, struct inode *parent)
 {
 	int len = *lenp;
-	struct inode *inode =  de->d_inode;
 	u32 ipos_h, ipos_m, ipos_l;
 
 	if (len < 5) {
@@ -771,9 +770,9 @@ fat_encode_fh(struct dentry *de, __u32 *fh, int *lenp, int connectable)
 	fh[1] = inode->i_generation;
 	fh[2] = ipos_h;
 	fh[3] = ipos_m | MSDOS_I(inode)->i_logstart;
-	spin_lock(&de->d_lock);
-	fh[4] = ipos_l | MSDOS_I(de->d_parent->d_inode)->i_logstart;
-	spin_unlock(&de->d_lock);
+	fh[4] = ipos_l;
+	if (parent)
+		fh[4] |= MSDOS_I(parent)->i_logstart;
 	return 3;
 }
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 56f6dcf30768..42678a33b7bb 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -627,12 +627,10 @@ static struct dentry *fuse_get_dentry(struct super_block *sb,
 	return ERR_PTR(err);
 }
 
-static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
-			   int connectable)
+static int fuse_encode_fh(struct inode *inode, u32 *fh, int *max_len,
+			   struct inode *parent)
 {
-	struct inode *inode = dentry->d_inode;
-	bool encode_parent = connectable && !S_ISDIR(inode->i_mode);
-	int len = encode_parent ? 6 : 3;
+	int len = parent ? 6 : 3;
 	u64 nodeid;
 	u32 generation;
 
@@ -648,14 +646,9 @@ static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
 	fh[1] = (u32)(nodeid & 0xffffffff);
 	fh[2] = generation;
 
-	if (encode_parent) {
-		struct inode *parent;
-
-		spin_lock(&dentry->d_lock);
-		parent = dentry->d_parent->d_inode;
+	if (parent) {
 		nodeid = get_fuse_inode(parent)->nodeid;
 		generation = parent->i_generation;
-		spin_unlock(&dentry->d_lock);
 
 		fh[3] = (u32)(nodeid >> 32);
 		fh[4] = (u32)(nodeid & 0xffffffff);
@@ -663,7 +656,7 @@ static int fuse_encode_fh(struct dentry *dentry, u32 *fh, int *max_len,
 	}
 
 	*max_len = len;
-	return encode_parent ? 0x82 : 0x81;
+	return parent ? 0x82 : 0x81;
 }
 
 static struct dentry *fuse_fh_to_dentry(struct super_block *sb,
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 70ba891654f8..e8ed6d4a6181 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -28,15 +28,14 @@
 #define GFS2_LARGE_FH_SIZE 8
 #define GFS2_OLD_FH_SIZE 10
 
-static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
-			  int connectable)
+static int gfs2_encode_fh(struct inode *inode, __u32 *p, int *len,
+			  struct inode *parent)
 {
 	__be32 *fh = (__force __be32 *)p;
-	struct inode *inode = dentry->d_inode;
 	struct super_block *sb = inode->i_sb;
 	struct gfs2_inode *ip = GFS2_I(inode);
 
-	if (connectable && (*len < GFS2_LARGE_FH_SIZE)) {
+	if (parent && (*len < GFS2_LARGE_FH_SIZE)) {
 		*len = GFS2_LARGE_FH_SIZE;
 		return 255;
 	} else if (*len < GFS2_SMALL_FH_SIZE) {
@@ -50,14 +49,10 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
 	fh[3] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF);
 	*len = GFS2_SMALL_FH_SIZE;
 
-	if (!connectable || inode == sb->s_root->d_inode)
+	if (!parent || inode == sb->s_root->d_inode)
 		return *len;
 
-	spin_lock(&dentry->d_lock);
-	inode = dentry->d_parent->d_inode;
-	ip = GFS2_I(inode);
-	igrab(inode);
-	spin_unlock(&dentry->d_lock);
+	ip = GFS2_I(parent);
 
 	fh[4] = cpu_to_be32(ip->i_no_formal_ino >> 32);
 	fh[5] = cpu_to_be32(ip->i_no_formal_ino & 0xFFFFFFFF);
@@ -65,8 +60,6 @@ static int gfs2_encode_fh(struct dentry *dentry, __u32 *p, int *len,
 	fh[7] = cpu_to_be32(ip->i_no_addr & 0xFFFFFFFF);
 	*len = GFS2_LARGE_FH_SIZE;
 
-	iput(inode);
-
 	return *len;
 }
 
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index dd4687ff30d0..aa4356d09eee 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -107,12 +107,11 @@ static struct dentry *isofs_export_get_parent(struct dentry *child)
 }
 
 static int
-isofs_export_encode_fh(struct dentry *dentry,
+isofs_export_encode_fh(struct inode *inode,
 		       __u32 *fh32,
 		       int *max_len,
-		       int connectable)
+		       struct inode *parent)
 {
-	struct inode * inode = dentry->d_inode;
 	struct iso_inode_info * ei = ISOFS_I(inode);
 	int len = *max_len;
 	int type = 1;
@@ -124,7 +123,7 @@ isofs_export_encode_fh(struct dentry *dentry,
 	 * offset of the inode and the upper 16 bits of fh32[1] to
 	 * hold the offset of the parent.
 	 */
-	if (connectable && (len < 5)) {
+	if (parent && (len < 5)) {
 		*max_len = 5;
 		return 255;
 	} else if (len < 3) {
@@ -136,16 +135,12 @@ isofs_export_encode_fh(struct dentry *dentry,
 	fh32[0] = ei->i_iget5_block;
  	fh16[2] = (__u16)ei->i_iget5_offset;  /* fh16 [sic] */
 	fh32[2] = inode->i_generation;
-	if (connectable && !S_ISDIR(inode->i_mode)) {
-		struct inode *parent;
+	if (parent) {
 		struct iso_inode_info *eparent;
-		spin_lock(&dentry->d_lock);
-		parent = dentry->d_parent->d_inode;
 		eparent = ISOFS_I(parent);
 		fh32[3] = eparent->i_iget5_block;
 		fh16[3] = (__u16)eparent->i_iget5_offset;  /* fh16 [sic] */
 		fh32[4] = parent->i_generation;
-		spin_unlock(&dentry->d_lock);
 		len = 5;
 		type = 2;
 	}
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 0bb2c2010b95..b72847988b78 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -508,31 +508,29 @@ static struct dentry *nilfs_fh_to_parent(struct super_block *sb, struct fid *fh,
 	return nilfs_get_dentry(sb, fid->cno, fid->parent_ino, fid->parent_gen);
 }
 
-static int nilfs_encode_fh(struct dentry *dentry, __u32 *fh, int *lenp,
-			   int connectable)
+static int nilfs_encode_fh(struct inode *inode, __u32 *fh, int *lenp,
+			   struct inode *parent)
 {
 	struct nilfs_fid *fid = (struct nilfs_fid *)fh;
-	struct inode *inode = dentry->d_inode;
 	struct nilfs_root *root = NILFS_I(inode)->i_root;
 	int type;
 
-	if (*lenp < NILFS_FID_SIZE_NON_CONNECTABLE ||
-	    (connectable && *lenp < NILFS_FID_SIZE_CONNECTABLE))
+	if (parent && *lenp < NILFS_FID_SIZE_CONNECTABLE) {
+		*lenp = NILFS_FID_SIZE_CONNECTABLE;
+		return 255;
+	}
+	if (*lenp < NILFS_FID_SIZE_NON_CONNECTABLE) {
+		*lenp = NILFS_FID_SIZE_NON_CONNECTABLE;
 		return 255;
+	}
 
 	fid->cno = root->cno;
 	fid->ino = inode->i_ino;
 	fid->gen = inode->i_generation;
 
-	if (connectable && !S_ISDIR(inode->i_mode)) {
-		struct inode *parent;
-
-		spin_lock(&dentry->d_lock);
-		parent = dentry->d_parent->d_inode;
+	if (parent) {
 		fid->parent_ino = parent->i_ino;
 		fid->parent_gen = parent->i_generation;
-		spin_unlock(&dentry->d_lock);
-
 		type = FILEID_NILFS_WITH_PARENT;
 		*lenp = NILFS_FID_SIZE_CONNECTABLE;
 	} else {
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 745db42528d5..322216a5f0dd 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -177,21 +177,23 @@ bail:
 	return parent;
 }
 
-static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
-			   int connectable)
+static int ocfs2_encode_fh(struct inode *inode, u32 *fh_in, int *max_len,
+			   struct inode *parent)
 {
-	struct inode *inode = dentry->d_inode;
 	int len = *max_len;
 	int type = 1;
 	u64 blkno;
 	u32 generation;
 	__le32 *fh = (__force __le32 *) fh_in;
 
+#ifdef TRACE_HOOKS_ARE_NOT_BRAINDEAD_IN_YOUR_OPINION
+#error "You go ahead and fix that mess, then.  Somehow"
 	trace_ocfs2_encode_fh_begin(dentry, dentry->d_name.len,
 				    dentry->d_name.name,
 				    fh, len, connectable);
+#endif
 
-	if (connectable && (len < 6)) {
+	if (parent && (len < 6)) {
 		*max_len = 6;
 		type = 255;
 		goto bail;
@@ -211,12 +213,7 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
 	fh[1] = cpu_to_le32((u32)(blkno & 0xffffffff));
 	fh[2] = cpu_to_le32(generation);
 
-	if (connectable && !S_ISDIR(inode->i_mode)) {
-		struct inode *parent;
-
-		spin_lock(&dentry->d_lock);
-
-		parent = dentry->d_parent->d_inode;
+	if (parent) {
 		blkno = OCFS2_I(parent)->ip_blkno;
 		generation = parent->i_generation;
 
@@ -224,8 +221,6 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
 		fh[4] = cpu_to_le32((u32)(blkno & 0xffffffff));
 		fh[5] = cpu_to_le32(generation);
 
-		spin_unlock(&dentry->d_lock);
-
 		len = 6;
 		type = 2;
 
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 59d06871a850..a6d4268fb6c1 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1592,13 +1592,12 @@ struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid,
 		(fh_type == 6) ? fid->raw[5] : 0);
 }
 
-int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp,
-		       int need_parent)
+int reiserfs_encode_fh(struct inode *inode, __u32 * data, int *lenp,
+		       struct inode *parent)
 {
-	struct inode *inode = dentry->d_inode;
 	int maxlen = *lenp;
 
-	if (need_parent && (maxlen < 5)) {
+	if (parent && (maxlen < 5)) {
 		*lenp = 5;
 		return 255;
 	} else if (maxlen < 3) {
@@ -1610,20 +1609,15 @@ int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp,
 	data[1] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
 	data[2] = inode->i_generation;
 	*lenp = 3;
-	/* no room for directory info? return what we've stored so far */
-	if (maxlen < 5 || !need_parent)
-		return 3;
-
-	spin_lock(&dentry->d_lock);
-	inode = dentry->d_parent->d_inode;
-	data[3] = inode->i_ino;
-	data[4] = le32_to_cpu(INODE_PKEY(inode)->k_dir_id);
-	*lenp = 5;
-	if (maxlen >= 6) {
-		data[5] = inode->i_generation;
-		*lenp = 6;
-	}
-	spin_unlock(&dentry->d_lock);
+	if (parent) {
+		data[3] = parent->i_ino;
+		data[4] = le32_to_cpu(INODE_PKEY(parent)->k_dir_id);
+		*lenp = 5;
+		if (maxlen >= 6) {
+			data[5] = parent->i_generation;
+			*lenp = 6;
+		}
+	}
 	return *lenp;
 }
 
diff --git a/fs/reiserfs/reiserfs.h b/fs/reiserfs/reiserfs.h
index a59d27126338..14a4f9dfb171 100644
--- a/fs/reiserfs/reiserfs.h
+++ b/fs/reiserfs/reiserfs.h
@@ -2611,8 +2611,8 @@ struct dentry *reiserfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
 				     int fh_len, int fh_type);
 struct dentry *reiserfs_fh_to_parent(struct super_block *sb, struct fid *fid,
 				     int fh_len, int fh_type);
-int reiserfs_encode_fh(struct dentry *dentry, __u32 * data, int *lenp,
-		       int connectable);
+int reiserfs_encode_fh(struct inode *inode, __u32 * data, int *lenp,
+		       struct inode *parent);
 
 int reiserfs_truncate_file(struct inode *, int update_timestamps);
 void make_cpu_key(struct cpu_key *cpu_key, struct inode *inode, loff_t offset,
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index a165c66e3eef..18024178ac4c 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -1260,16 +1260,15 @@ static struct dentry *udf_fh_to_parent(struct super_block *sb,
 				 fid->udf.parent_partref,
 				 fid->udf.parent_generation);
 }
-static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp,
-			 int connectable)
+static int udf_encode_fh(struct inode *inode, __u32 *fh, int *lenp,
+			 struct inode *parent)
 {
 	int len = *lenp;
-	struct inode *inode =  de->d_inode;
 	struct kernel_lb_addr location = UDF_I(inode)->i_location;
 	struct fid *fid = (struct fid *)fh;
 	int type = FILEID_UDF_WITHOUT_PARENT;
 
-	if (connectable && (len < 5)) {
+	if (parent && (len < 5)) {
 		*lenp = 5;
 		return 255;
 	} else if (len < 3) {
@@ -1282,14 +1281,11 @@ static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp,
 	fid->udf.partref = location.partitionReferenceNum;
 	fid->udf.generation = inode->i_generation;
 
-	if (connectable && !S_ISDIR(inode->i_mode)) {
-		spin_lock(&de->d_lock);
-		inode = de->d_parent->d_inode;
-		location = UDF_I(inode)->i_location;
+	if (parent) {
+		location = UDF_I(parent)->i_location;
 		fid->udf.parent_block = location.logicalBlockNum;
 		fid->udf.parent_partref = location.partitionReferenceNum;
 		fid->udf.parent_generation = inode->i_generation;
-		spin_unlock(&de->d_lock);
 		*lenp = 5;
 		type = FILEID_UDF_WITH_PARENT;
 	}
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
index 2d25d19c4ea1..42679223a0fd 100644
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -52,19 +52,18 @@ static int xfs_fileid_length(int fileid_type)
 
 STATIC int
 xfs_fs_encode_fh(
-	struct dentry		*dentry,
-	__u32			*fh,
-	int			*max_len,
-	int			connectable)
+	struct inode	*inode,
+	__u32		*fh,
+	int		*max_len,
+	struct inode	*parent)
 {
 	struct fid		*fid = (struct fid *)fh;
 	struct xfs_fid64	*fid64 = (struct xfs_fid64 *)fh;
-	struct inode		*inode = dentry->d_inode;
 	int			fileid_type;
 	int			len;
 
 	/* Directories don't need their parent encoded, they have ".." */
-	if (S_ISDIR(inode->i_mode) || !connectable)
+	if (!parent)
 		fileid_type = FILEID_INO32_GEN;
 	else
 		fileid_type = FILEID_INO32_GEN_PARENT;
@@ -96,20 +95,16 @@ xfs_fs_encode_fh(
 
 	switch (fileid_type) {
 	case FILEID_INO32_GEN_PARENT:
-		spin_lock(&dentry->d_lock);
-		fid->i32.parent_ino = XFS_I(dentry->d_parent->d_inode)->i_ino;
-		fid->i32.parent_gen = dentry->d_parent->d_inode->i_generation;
-		spin_unlock(&dentry->d_lock);
+		fid->i32.parent_ino = XFS_I(parent)->i_ino;
+		fid->i32.parent_gen = parent->i_generation;
 		/*FALLTHRU*/
 	case FILEID_INO32_GEN:
 		fid->i32.ino = XFS_I(inode)->i_ino;
 		fid->i32.gen = inode->i_generation;
 		break;
 	case FILEID_INO32_GEN_PARENT | XFS_FILEID_TYPE_64FLAG:
-		spin_lock(&dentry->d_lock);
-		fid64->parent_ino = XFS_I(dentry->d_parent->d_inode)->i_ino;
-		fid64->parent_gen = dentry->d_parent->d_inode->i_generation;
-		spin_unlock(&dentry->d_lock);
+		fid64->parent_ino = XFS_I(parent)->i_ino;
+		fid64->parent_gen = parent->i_generation;
 		/*FALLTHRU*/
 	case FILEID_INO32_GEN | XFS_FILEID_TYPE_64FLAG:
 		fid64->ino = XFS_I(inode)->i_ino;
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 3a4cef5322dc..12291a7ee275 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -165,8 +165,8 @@ struct fid {
  */
 
 struct export_operations {
-	int (*encode_fh)(struct dentry *de, __u32 *fh, int *max_len,
-			int connectable);
+	int (*encode_fh)(struct inode *inode, __u32 *fh, int *max_len,
+			struct inode *parent);
 	struct dentry * (*fh_to_dentry)(struct super_block *sb, struct fid *fid,
 			int fh_len, int fh_type);
 	struct dentry * (*fh_to_parent)(struct super_block *sb, struct fid *fid,
diff --git a/mm/cleancache.c b/mm/cleancache.c
index 5646c740f613..32e6f4136fa2 100644
--- a/mm/cleancache.c
+++ b/mm/cleancache.c
@@ -80,7 +80,7 @@ EXPORT_SYMBOL(__cleancache_init_shared_fs);
 static int cleancache_get_key(struct inode *inode,
 			      struct cleancache_filekey *key)
 {
-	int (*fhfn)(struct dentry *, __u32 *fh, int *, int);
+	int (*fhfn)(struct inode *, __u32 *fh, int *, struct inode *);
 	int len = 0, maxlen = CLEANCACHE_KEY_MAX;
 	struct super_block *sb = inode->i_sb;
 
@@ -88,9 +88,7 @@ static int cleancache_get_key(struct inode *inode,
 	if (sb->s_export_op != NULL) {
 		fhfn = sb->s_export_op->encode_fh;
 		if  (fhfn) {
-			struct dentry d;
-			d.d_inode = inode;
-			len = (*fhfn)(&d, &key->u.fh[0], &maxlen, 0);
+			len = (*fhfn)(inode, &key->u.fh[0], &maxlen, NULL);
 			if (len <= 0 || len == 255)
 				return -1;
 			if (maxlen > CLEANCACHE_KEY_MAX)
diff --git a/mm/shmem.c b/mm/shmem.c
index be5af34a070d..3711422c3172 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2033,11 +2033,9 @@ static struct dentry *shmem_fh_to_dentry(struct super_block *sb,
 	return dentry;
 }
 
-static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
-				int connectable)
+static int shmem_encode_fh(struct inode *inode, __u32 *fh, int *len,
+				struct inode *parent)
 {
-	struct inode *inode = dentry->d_inode;
-
 	if (*len < 3) {
 		*len = 3;
 		return 255;
-- 
cgit 


From 9dd6fa03ab31bb57cee4623a689d058d222fbe68 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Tue, 8 May 2012 13:29:45 +0930
Subject: lglock: remove online variants of lock

Optimizing the slow paths adds a lot of complexity.  If you need to
grab every lock often, you have other problems.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Nick Piggin <npiggin@kernel.dk>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/lglock.h | 58 ++------------------------------------------------
 1 file changed, 2 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lglock.h b/include/linux/lglock.h
index 87f402ccec55..0fdd821e77b7 100644
--- a/include/linux/lglock.h
+++ b/include/linux/lglock.h
@@ -28,8 +28,8 @@
 #define br_lock_init(name)	name##_lock_init()
 #define br_read_lock(name)	name##_local_lock()
 #define br_read_unlock(name)	name##_local_unlock()
-#define br_write_lock(name)	name##_global_lock_online()
-#define br_write_unlock(name)	name##_global_unlock_online()
+#define br_write_lock(name)	name##_global_lock()
+#define br_write_unlock(name)	name##_global_unlock()
 
 #define DECLARE_BRLOCK(name)	DECLARE_LGLOCK(name)
 #define DEFINE_BRLOCK(name)	DEFINE_LGLOCK(name)
@@ -42,8 +42,6 @@
 #define lg_local_unlock_cpu(name, cpu)	name##_local_unlock_cpu(cpu)
 #define lg_global_lock(name)	name##_global_lock()
 #define lg_global_unlock(name)	name##_global_unlock()
-#define lg_global_lock_online(name) name##_global_lock_online()
-#define lg_global_unlock_online(name) name##_global_unlock_online()
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 #define LOCKDEP_INIT_MAP lockdep_init_map
@@ -68,36 +66,13 @@
  extern void name##_local_unlock_cpu(int cpu);				\
  extern void name##_global_lock(void);					\
  extern void name##_global_unlock(void);				\
- extern void name##_global_lock_online(void);				\
- extern void name##_global_unlock_online(void);				\
 
 #define DEFINE_LGLOCK(name)						\
 									\
  DEFINE_SPINLOCK(name##_cpu_lock);					\
- cpumask_t name##_cpus __read_mostly;					\
  DEFINE_PER_CPU(arch_spinlock_t, name##_lock);				\
  DEFINE_LGLOCK_LOCKDEP(name);						\
 									\
- static int								\
- name##_lg_cpu_callback(struct notifier_block *nb,			\
-				unsigned long action, void *hcpu)	\
- {									\
-	switch (action & ~CPU_TASKS_FROZEN) {				\
-	case CPU_UP_PREPARE:						\
-		spin_lock(&name##_cpu_lock);				\
-		cpu_set((unsigned long)hcpu, name##_cpus);		\
-		spin_unlock(&name##_cpu_lock);				\
-		break;							\
-	case CPU_UP_CANCELED: case CPU_DEAD:				\
-		spin_lock(&name##_cpu_lock);				\
-		cpu_clear((unsigned long)hcpu, name##_cpus);		\
-		spin_unlock(&name##_cpu_lock);				\
-	}								\
-	return NOTIFY_OK;						\
- }									\
- static struct notifier_block name##_lg_cpu_notifier = {		\
-	.notifier_call = name##_lg_cpu_callback,			\
- };									\
  void name##_lock_init(void) {						\
 	int i;								\
 	LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \
@@ -106,11 +81,6 @@
 		lock = &per_cpu(name##_lock, i);			\
 		*lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;	\
 	}								\
-	register_hotcpu_notifier(&name##_lg_cpu_notifier);		\
-	get_online_cpus();						\
-	for_each_online_cpu(i)						\
-		cpu_set(i, name##_cpus);				\
-	put_online_cpus();						\
  }									\
  EXPORT_SYMBOL(name##_lock_init);					\
 									\
@@ -150,30 +120,6 @@
  }									\
  EXPORT_SYMBOL(name##_local_unlock_cpu);				\
 									\
- void name##_global_lock_online(void) {					\
-	int i;								\
-	spin_lock(&name##_cpu_lock);					\
-	rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_);		\
-	for_each_cpu(i, &name##_cpus) {					\
-		arch_spinlock_t *lock;					\
-		lock = &per_cpu(name##_lock, i);			\
-		arch_spin_lock(lock);					\
-	}								\
- }									\
- EXPORT_SYMBOL(name##_global_lock_online);				\
-									\
- void name##_global_unlock_online(void) {				\
-	int i;								\
-	rwlock_release(&name##_lock_dep_map, 1, _RET_IP_);		\
-	for_each_cpu(i, &name##_cpus) {					\
-		arch_spinlock_t *lock;					\
-		lock = &per_cpu(name##_lock, i);			\
-		arch_spin_unlock(lock);					\
-	}								\
-	spin_unlock(&name##_cpu_lock);					\
- }									\
- EXPORT_SYMBOL(name##_global_unlock_online);				\
-									\
  void name##_global_lock(void) {					\
 	int i;								\
 	preempt_disable();						\
-- 
cgit 


From eea62f831b8030b0eeea8314eed73b6132d1de26 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 8 May 2012 13:32:24 +0930
Subject: brlocks/lglocks: turn into functions

lglocks and brlocks are currently generated with some complicated macros
in lglock.h.  But there's no reason to not just use common utility
functions and put all the data into a common data structure.

Since there are at least two users it makes sense to share this code in a
library.  This is also easier maintainable than a macro forest.

This will also make it later possible to dynamically allocate lglocks and
also use them in modules (this would both still need some additional, but
now straightforward, code)

[akpm@linux-foundation.org: checkpatch fixes]
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/file_table.c        |   1 -
 fs/internal.h          |   2 +-
 include/linux/lglock.h | 125 ++++++++++---------------------------------------
 kernel/Makefile        |   2 +-
 kernel/lglock.c        |  89 +++++++++++++++++++++++++++++++++++
 5 files changed, 117 insertions(+), 102 deletions(-)
 create mode 100644 kernel/lglock.c

(limited to 'include/linux')

diff --git a/fs/file_table.c b/fs/file_table.c
index 70f2a0fd6aec..f5c67c59ec10 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -34,7 +34,6 @@ struct files_stat_struct files_stat = {
 	.max_files = NR_FILE
 };
 
-DECLARE_LGLOCK(files_lglock);
 DEFINE_LGLOCK(files_lglock);
 
 /* SLAB cache for file structures */
diff --git a/fs/internal.h b/fs/internal.h
index 9962c59ba280..8040af489c78 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -56,7 +56,7 @@ extern int sb_prepare_remount_readonly(struct super_block *);
 
 extern void __init mnt_init(void);
 
-DECLARE_BRLOCK(vfsmount_lock);
+extern struct lglock vfsmount_lock;
 
 
 /*
diff --git a/include/linux/lglock.h b/include/linux/lglock.h
index 0fdd821e77b7..f01e5f6d1f07 100644
--- a/include/linux/lglock.h
+++ b/include/linux/lglock.h
@@ -23,26 +23,17 @@
 #include <linux/lockdep.h>
 #include <linux/percpu.h>
 #include <linux/cpu.h>
+#include <linux/notifier.h>
 
 /* can make br locks by using local lock for read side, global lock for write */
-#define br_lock_init(name)	name##_lock_init()
-#define br_read_lock(name)	name##_local_lock()
-#define br_read_unlock(name)	name##_local_unlock()
-#define br_write_lock(name)	name##_global_lock()
-#define br_write_unlock(name)	name##_global_unlock()
+#define br_lock_init(name)	lg_lock_init(name, #name)
+#define br_read_lock(name)	lg_local_lock(name)
+#define br_read_unlock(name)	lg_local_unlock(name)
+#define br_write_lock(name)	lg_global_lock(name)
+#define br_write_unlock(name)	lg_global_unlock(name)
 
-#define DECLARE_BRLOCK(name)	DECLARE_LGLOCK(name)
 #define DEFINE_BRLOCK(name)	DEFINE_LGLOCK(name)
 
-
-#define lg_lock_init(name)	name##_lock_init()
-#define lg_local_lock(name)	name##_local_lock()
-#define lg_local_unlock(name)	name##_local_unlock()
-#define lg_local_lock_cpu(name, cpu)	name##_local_lock_cpu(cpu)
-#define lg_local_unlock_cpu(name, cpu)	name##_local_unlock_cpu(cpu)
-#define lg_global_lock(name)	name##_global_lock()
-#define lg_global_unlock(name)	name##_global_unlock()
-
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 #define LOCKDEP_INIT_MAP lockdep_init_map
 
@@ -57,90 +48,26 @@
 #define DEFINE_LGLOCK_LOCKDEP(name)
 #endif
 
-
-#define DECLARE_LGLOCK(name)						\
- extern void name##_lock_init(void);					\
- extern void name##_local_lock(void);					\
- extern void name##_local_unlock(void);					\
- extern void name##_local_lock_cpu(int cpu);				\
- extern void name##_local_unlock_cpu(int cpu);				\
- extern void name##_global_lock(void);					\
- extern void name##_global_unlock(void);				\
+struct lglock {
+	arch_spinlock_t __percpu *lock;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+	struct lock_class_key lock_key;
+	struct lockdep_map    lock_dep_map;
+#endif
+};
 
 #define DEFINE_LGLOCK(name)						\
-									\
- DEFINE_SPINLOCK(name##_cpu_lock);					\
- DEFINE_PER_CPU(arch_spinlock_t, name##_lock);				\
- DEFINE_LGLOCK_LOCKDEP(name);						\
-									\
- void name##_lock_init(void) {						\
-	int i;								\
-	LOCKDEP_INIT_MAP(&name##_lock_dep_map, #name, &name##_lock_key, 0); \
-	for_each_possible_cpu(i) {					\
-		arch_spinlock_t *lock;					\
-		lock = &per_cpu(name##_lock, i);			\
-		*lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;	\
-	}								\
- }									\
- EXPORT_SYMBOL(name##_lock_init);					\
-									\
- void name##_local_lock(void) {						\
-	arch_spinlock_t *lock;						\
-	preempt_disable();						\
-	rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_);	\
-	lock = &__get_cpu_var(name##_lock);				\
-	arch_spin_lock(lock);						\
- }									\
- EXPORT_SYMBOL(name##_local_lock);					\
-									\
- void name##_local_unlock(void) {					\
-	arch_spinlock_t *lock;						\
-	rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_);		\
-	lock = &__get_cpu_var(name##_lock);				\
-	arch_spin_unlock(lock);						\
-	preempt_enable();						\
- }									\
- EXPORT_SYMBOL(name##_local_unlock);					\
-									\
- void name##_local_lock_cpu(int cpu) {					\
-	arch_spinlock_t *lock;						\
-	preempt_disable();						\
-	rwlock_acquire_read(&name##_lock_dep_map, 0, 0, _THIS_IP_);	\
-	lock = &per_cpu(name##_lock, cpu);				\
-	arch_spin_lock(lock);						\
- }									\
- EXPORT_SYMBOL(name##_local_lock_cpu);					\
-									\
- void name##_local_unlock_cpu(int cpu) {				\
-	arch_spinlock_t *lock;						\
-	rwlock_release(&name##_lock_dep_map, 1, _THIS_IP_);		\
-	lock = &per_cpu(name##_lock, cpu);				\
-	arch_spin_unlock(lock);						\
-	preempt_enable();						\
- }									\
- EXPORT_SYMBOL(name##_local_unlock_cpu);				\
-									\
- void name##_global_lock(void) {					\
-	int i;								\
-	preempt_disable();						\
-	rwlock_acquire(&name##_lock_dep_map, 0, 0, _RET_IP_);		\
-	for_each_possible_cpu(i) {					\
-		arch_spinlock_t *lock;					\
-		lock = &per_cpu(name##_lock, i);			\
-		arch_spin_lock(lock);					\
-	}								\
- }									\
- EXPORT_SYMBOL(name##_global_lock);					\
-									\
- void name##_global_unlock(void) {					\
-	int i;								\
-	rwlock_release(&name##_lock_dep_map, 1, _RET_IP_);		\
-	for_each_possible_cpu(i) {					\
-		arch_spinlock_t *lock;					\
-		lock = &per_cpu(name##_lock, i);			\
-		arch_spin_unlock(lock);					\
-	}								\
-	preempt_enable();						\
- }									\
- EXPORT_SYMBOL(name##_global_unlock);
+	DEFINE_LGLOCK_LOCKDEP(name);					\
+	DEFINE_PER_CPU(arch_spinlock_t, name ## _lock)			\
+	= __ARCH_SPIN_LOCK_UNLOCKED;					\
+	struct lglock name = { .lock = &name ## _lock }
+
+void lg_lock_init(struct lglock *lg, char *name);
+void lg_local_lock(struct lglock *lg);
+void lg_local_unlock(struct lglock *lg);
+void lg_local_lock_cpu(struct lglock *lg, int cpu);
+void lg_local_unlock_cpu(struct lglock *lg, int cpu);
+void lg_global_lock(struct lglock *lg);
+void lg_global_unlock(struct lglock *lg);
+
 #endif
diff --git a/kernel/Makefile b/kernel/Makefile
index 6c07f30fa9b7..296132c19a57 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -10,7 +10,7 @@ obj-y     = fork.o exec_domain.o panic.o printk.o \
 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
 	    hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
 	    notifier.o ksysfs.o cred.o \
-	    async.o range.o groups.o
+	    async.o range.o groups.o lglock.o
 
 ifdef CONFIG_FUNCTION_TRACER
 # Do not trace debug files and internal ftrace files
diff --git a/kernel/lglock.c b/kernel/lglock.c
new file mode 100644
index 000000000000..6535a667a5a7
--- /dev/null
+++ b/kernel/lglock.c
@@ -0,0 +1,89 @@
+/* See include/linux/lglock.h for description */
+#include <linux/module.h>
+#include <linux/lglock.h>
+#include <linux/cpu.h>
+#include <linux/string.h>
+
+/*
+ * Note there is no uninit, so lglocks cannot be defined in
+ * modules (but it's fine to use them from there)
+ * Could be added though, just undo lg_lock_init
+ */
+
+void lg_lock_init(struct lglock *lg, char *name)
+{
+	LOCKDEP_INIT_MAP(&lg->lock_dep_map, name, &lg->lock_key, 0);
+}
+EXPORT_SYMBOL(lg_lock_init);
+
+void lg_local_lock(struct lglock *lg)
+{
+	arch_spinlock_t *lock;
+
+	preempt_disable();
+	rwlock_acquire_read(&lg->lock_dep_map, 0, 0, _RET_IP_);
+	lock = this_cpu_ptr(lg->lock);
+	arch_spin_lock(lock);
+}
+EXPORT_SYMBOL(lg_local_lock);
+
+void lg_local_unlock(struct lglock *lg)
+{
+	arch_spinlock_t *lock;
+
+	rwlock_release(&lg->lock_dep_map, 1, _RET_IP_);
+	lock = this_cpu_ptr(lg->lock);
+	arch_spin_unlock(lock);
+	preempt_enable();
+}
+EXPORT_SYMBOL(lg_local_unlock);
+
+void lg_local_lock_cpu(struct lglock *lg, int cpu)
+{
+	arch_spinlock_t *lock;
+
+	preempt_disable();
+	rwlock_acquire_read(&lg->lock_dep_map, 0, 0, _RET_IP_);
+	lock = per_cpu_ptr(lg->lock, cpu);
+	arch_spin_lock(lock);
+}
+EXPORT_SYMBOL(lg_local_lock_cpu);
+
+void lg_local_unlock_cpu(struct lglock *lg, int cpu)
+{
+	arch_spinlock_t *lock;
+
+	rwlock_release(&lg->lock_dep_map, 1, _RET_IP_);
+	lock = per_cpu_ptr(lg->lock, cpu);
+	arch_spin_unlock(lock);
+	preempt_enable();
+}
+EXPORT_SYMBOL(lg_local_unlock_cpu);
+
+void lg_global_lock(struct lglock *lg)
+{
+	int i;
+
+	preempt_disable();
+	rwlock_acquire(&lg->lock_dep_map, 0, 0, _RET_IP_);
+	for_each_possible_cpu(i) {
+		arch_spinlock_t *lock;
+		lock = per_cpu_ptr(lg->lock, i);
+		arch_spin_lock(lock);
+	}
+}
+EXPORT_SYMBOL(lg_global_lock);
+
+void lg_global_unlock(struct lglock *lg)
+{
+	int i;
+
+	rwlock_release(&lg->lock_dep_map, 1, _RET_IP_);
+	for_each_possible_cpu(i) {
+		arch_spinlock_t *lock;
+		lock = per_cpu_ptr(lg->lock, i);
+		arch_spin_unlock(lock);
+	}
+	preempt_enable();
+}
+EXPORT_SYMBOL(lg_global_unlock);
-- 
cgit 


From 5aaa0b7a2ed5b12692c9ffb5222182bd558d3146 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 17 May 2012 17:15:29 +0200
Subject: sched/nohz: Fix rq->cpu_load calculations some more

Follow up on commit 556061b00 ("sched/nohz: Fix rq->cpu_load[]
calculations") since while that fixed the busy case it regressed the
mostly idle case.

Add a callback from the nohz exit to also age the rq->cpu_load[]
array. This closes the hole where either there was no nohz load
balance pass during the nohz, or there was a 'significant' amount of
idle time between the last nohz balance and the nohz exit.

So we'll update unconditionally from the tick to not insert any
accidental 0 load periods while busy, and we try and catch up from
nohz idle balance and nohz exit. Both these are still prone to missing
a jiffy, but that has always been the case.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: pjt@google.com
Cc: Venkatesh Pallipadi <venki@google.com>
Link: http://lkml.kernel.org/n/tip-kt0trz0apodbf84ucjfdbr1a@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h    |  1 +
 kernel/sched/core.c      | 53 +++++++++++++++++++++++++++++++++++++++---------
 kernel/time/tick-sched.c |  1 +
 3 files changed, 45 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f45c0b280b5d..d61e5977e517 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -145,6 +145,7 @@ extern unsigned long this_cpu_load(void);
 
 
 extern void calc_global_load(unsigned long ticks);
+extern void update_cpu_load_nohz(void);
 
 extern unsigned long get_parent_ip(unsigned long addr);
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 39eb6011bc38..75844a8f9aeb 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2517,25 +2517,32 @@ static void __update_cpu_load(struct rq *this_rq, unsigned long this_load,
 	sched_avg_update(this_rq);
 }
 
+#ifdef CONFIG_NO_HZ
+/*
+ * There is no sane way to deal with nohz on smp when using jiffies because the
+ * cpu doing the jiffies update might drift wrt the cpu doing the jiffy reading
+ * causing off-by-one errors in observed deltas; {0,2} instead of {1,1}.
+ *
+ * Therefore we cannot use the delta approach from the regular tick since that
+ * would seriously skew the load calculation. However we'll make do for those
+ * updates happening while idle (nohz_idle_balance) or coming out of idle
+ * (tick_nohz_idle_exit).
+ *
+ * This means we might still be one tick off for nohz periods.
+ */
+
 /*
  * Called from nohz_idle_balance() to update the load ratings before doing the
  * idle balance.
  */
 void update_idle_cpu_load(struct rq *this_rq)
 {
-	unsigned long curr_jiffies = jiffies;
+	unsigned long curr_jiffies = ACCESS_ONCE(jiffies);
 	unsigned long load = this_rq->load.weight;
 	unsigned long pending_updates;
 
 	/*
-	 * Bloody broken means of dealing with nohz, but better than nothing..
-	 * jiffies is updated by one cpu, another cpu can drift wrt the jiffy
-	 * update and see 0 difference the one time and 2 the next, even though
-	 * we ticked at roughtly the same rate.
-	 *
-	 * Hence we only use this from nohz_idle_balance() and skip this
-	 * nonsense when called from the scheduler_tick() since that's
-	 * guaranteed a stable rate.
+	 * bail if there's load or we're actually up-to-date.
 	 */
 	if (load || curr_jiffies == this_rq->last_load_update_tick)
 		return;
@@ -2546,13 +2553,39 @@ void update_idle_cpu_load(struct rq *this_rq)
 	__update_cpu_load(this_rq, load, pending_updates);
 }
 
+/*
+ * Called from tick_nohz_idle_exit() -- try and fix up the ticks we missed.
+ */
+void update_cpu_load_nohz(void)
+{
+	struct rq *this_rq = this_rq();
+	unsigned long curr_jiffies = ACCESS_ONCE(jiffies);
+	unsigned long pending_updates;
+
+	if (curr_jiffies == this_rq->last_load_update_tick)
+		return;
+
+	raw_spin_lock(&this_rq->lock);
+	pending_updates = curr_jiffies - this_rq->last_load_update_tick;
+	if (pending_updates) {
+		this_rq->last_load_update_tick = curr_jiffies;
+		/*
+		 * We were idle, this means load 0, the current load might be
+		 * !0 due to remote wakeups and the sort.
+		 */
+		__update_cpu_load(this_rq, 0, pending_updates);
+	}
+	raw_spin_unlock(&this_rq->lock);
+}
+#endif /* CONFIG_NO_HZ */
+
 /*
  * Called from scheduler_tick()
  */
 static void update_cpu_load_active(struct rq *this_rq)
 {
 	/*
-	 * See the mess in update_idle_cpu_load().
+	 * See the mess around update_idle_cpu_load() / update_cpu_load_nohz().
 	 */
 	this_rq->last_load_update_tick = jiffies;
 	__update_cpu_load(this_rq, this_rq->load.weight, 1);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 6a3a5b9ff561..0c927cd85345 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -576,6 +576,7 @@ void tick_nohz_idle_exit(void)
 	/* Update jiffies first */
 	select_nohz_load_balancer(0);
 	tick_do_update_jiffies64(now);
+	update_cpu_load_nohz();
 
 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
 	/*
-- 
cgit 


From 29baa7478ba47d746e3625c91d3b2afbf46b4312 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 23 Apr 2012 12:11:21 +0200
Subject: sched: Move nr_cpus_allowed out of 'struct sched_rt_entity'

Since nr_cpus_allowed is used outside of sched/rt.c and wants to be
used outside of there more, move it to a more natural site.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-kr61f02y9brwzkh6x53pdptm@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/blackfin/kernel/process.c |  2 +-
 include/linux/init_task.h      |  2 +-
 include/linux/sched.h          |  2 +-
 kernel/sched/core.c            |  2 +-
 kernel/sched/fair.c            |  2 +-
 kernel/sched/rt.c              | 36 +++++++++++++++++++++---------------
 6 files changed, 26 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c
index 2e3994b20169..62bcea7dcc6d 100644
--- a/arch/blackfin/kernel/process.c
+++ b/arch/blackfin/kernel/process.c
@@ -173,7 +173,7 @@ asmlinkage int bfin_clone(struct pt_regs *regs)
 	unsigned long newsp;
 
 #ifdef __ARCH_SYNC_CORE_DCACHE
-	if (current->rt.nr_cpus_allowed == num_possible_cpus())
+	if (current->nr_cpus_allowed == num_possible_cpus())
 		set_cpus_allowed_ptr(current, cpumask_of(smp_processor_id()));
 #endif
 
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index e4baff5f7ff4..9e65eff6af3b 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -149,6 +149,7 @@ extern struct cred init_cred;
 	.normal_prio	= MAX_PRIO-20,					\
 	.policy		= SCHED_NORMAL,					\
 	.cpus_allowed	= CPU_MASK_ALL,					\
+	.nr_cpus_allowed= NR_CPUS,					\
 	.mm		= NULL,						\
 	.active_mm	= &init_mm,					\
 	.se		= {						\
@@ -157,7 +158,6 @@ extern struct cred init_cred;
 	.rt		= {						\
 		.run_list	= LIST_HEAD_INIT(tsk.rt.run_list),	\
 		.time_slice	= RR_TIMESLICE,				\
-		.nr_cpus_allowed = NR_CPUS,				\
 	},								\
 	.tasks		= LIST_HEAD_INIT(tsk.tasks),			\
 	INIT_PUSHABLE_TASKS(tsk)					\
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d61e5977e517..0f50e78f7f44 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1188,7 +1188,6 @@ struct sched_rt_entity {
 	struct list_head run_list;
 	unsigned long timeout;
 	unsigned int time_slice;
-	int nr_cpus_allowed;
 
 	struct sched_rt_entity *back;
 #ifdef CONFIG_RT_GROUP_SCHED
@@ -1253,6 +1252,7 @@ struct task_struct {
 #endif
 
 	unsigned int policy;
+	int nr_cpus_allowed;
 	cpumask_t cpus_allowed;
 
 #ifdef CONFIG_PREEMPT_RCU
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3a69374fb427..70cc36a6073f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5015,7 +5015,7 @@ void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
 		p->sched_class->set_cpus_allowed(p, new_mask);
 
 	cpumask_copy(&p->cpus_allowed, new_mask);
-	p->rt.nr_cpus_allowed = cpumask_weight(new_mask);
+	p->nr_cpus_allowed = cpumask_weight(new_mask);
 }
 
 /*
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 2b449a762074..b2a2d236f27b 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2703,7 +2703,7 @@ select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
 	int want_sd = 1;
 	int sync = wake_flags & WF_SYNC;
 
-	if (p->rt.nr_cpus_allowed == 1)
+	if (p->nr_cpus_allowed == 1)
 		return prev_cpu;
 
 	if (sd_flag & SD_BALANCE_WAKE) {
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index c5565c3c515f..295da737b6fe 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -274,13 +274,16 @@ static void update_rt_migration(struct rt_rq *rt_rq)
 
 static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 {
+	struct task_struct *p;
+
 	if (!rt_entity_is_task(rt_se))
 		return;
 
+	p = rt_task_of(rt_se);
 	rt_rq = &rq_of_rt_rq(rt_rq)->rt;
 
 	rt_rq->rt_nr_total++;
-	if (rt_se->nr_cpus_allowed > 1)
+	if (p->nr_cpus_allowed > 1)
 		rt_rq->rt_nr_migratory++;
 
 	update_rt_migration(rt_rq);
@@ -288,13 +291,16 @@ static void inc_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 
 static void dec_rt_migration(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
 {
+	struct task_struct *p;
+
 	if (!rt_entity_is_task(rt_se))
 		return;
 
+	p = rt_task_of(rt_se);
 	rt_rq = &rq_of_rt_rq(rt_rq)->rt;
 
 	rt_rq->rt_nr_total--;
-	if (rt_se->nr_cpus_allowed > 1)
+	if (p->nr_cpus_allowed > 1)
 		rt_rq->rt_nr_migratory--;
 
 	update_rt_migration(rt_rq);
@@ -1161,7 +1167,7 @@ enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
 
 	enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD);
 
-	if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
+	if (!task_current(rq, p) && p->nr_cpus_allowed > 1)
 		enqueue_pushable_task(rq, p);
 
 	inc_nr_running(rq);
@@ -1225,7 +1231,7 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
 
 	cpu = task_cpu(p);
 
-	if (p->rt.nr_cpus_allowed == 1)
+	if (p->nr_cpus_allowed == 1)
 		goto out;
 
 	/* For anything but wake ups, just return the task_cpu */
@@ -1260,9 +1266,9 @@ select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
 	 * will have to sort it out.
 	 */
 	if (curr && unlikely(rt_task(curr)) &&
-	    (curr->rt.nr_cpus_allowed < 2 ||
+	    (curr->nr_cpus_allowed < 2 ||
 	     curr->prio <= p->prio) &&
-	    (p->rt.nr_cpus_allowed > 1)) {
+	    (p->nr_cpus_allowed > 1)) {
 		int target = find_lowest_rq(p);
 
 		if (target != -1)
@@ -1276,10 +1282,10 @@ out:
 
 static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
 {
-	if (rq->curr->rt.nr_cpus_allowed == 1)
+	if (rq->curr->nr_cpus_allowed == 1)
 		return;
 
-	if (p->rt.nr_cpus_allowed != 1
+	if (p->nr_cpus_allowed != 1
 	    && cpupri_find(&rq->rd->cpupri, p, NULL))
 		return;
 
@@ -1395,7 +1401,7 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
 	 * The previous task needs to be made eligible for pushing
 	 * if it is still active
 	 */
-	if (on_rt_rq(&p->rt) && p->rt.nr_cpus_allowed > 1)
+	if (on_rt_rq(&p->rt) && p->nr_cpus_allowed > 1)
 		enqueue_pushable_task(rq, p);
 }
 
@@ -1408,7 +1414,7 @@ static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
 {
 	if (!task_running(rq, p) &&
 	    (cpu < 0 || cpumask_test_cpu(cpu, tsk_cpus_allowed(p))) &&
-	    (p->rt.nr_cpus_allowed > 1))
+	    (p->nr_cpus_allowed > 1))
 		return 1;
 	return 0;
 }
@@ -1464,7 +1470,7 @@ static int find_lowest_rq(struct task_struct *task)
 	if (unlikely(!lowest_mask))
 		return -1;
 
-	if (task->rt.nr_cpus_allowed == 1)
+	if (task->nr_cpus_allowed == 1)
 		return -1; /* No other targets possible */
 
 	if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
@@ -1586,7 +1592,7 @@ static struct task_struct *pick_next_pushable_task(struct rq *rq)
 
 	BUG_ON(rq->cpu != task_cpu(p));
 	BUG_ON(task_current(rq, p));
-	BUG_ON(p->rt.nr_cpus_allowed <= 1);
+	BUG_ON(p->nr_cpus_allowed <= 1);
 
 	BUG_ON(!p->on_rq);
 	BUG_ON(!rt_task(p));
@@ -1793,9 +1799,9 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p)
 	if (!task_running(rq, p) &&
 	    !test_tsk_need_resched(rq->curr) &&
 	    has_pushable_tasks(rq) &&
-	    p->rt.nr_cpus_allowed > 1 &&
+	    p->nr_cpus_allowed > 1 &&
 	    rt_task(rq->curr) &&
-	    (rq->curr->rt.nr_cpus_allowed < 2 ||
+	    (rq->curr->nr_cpus_allowed < 2 ||
 	     rq->curr->prio <= p->prio))
 		push_rt_tasks(rq);
 }
@@ -1817,7 +1823,7 @@ static void set_cpus_allowed_rt(struct task_struct *p,
 	 * Only update if the process changes its state from whether it
 	 * can migrate or not.
 	 */
-	if ((p->rt.nr_cpus_allowed > 1) == (weight > 1))
+	if ((p->nr_cpus_allowed > 1) == (weight > 1))
 		return;
 
 	rq = task_rq(p);
-- 
cgit 


From 0053ea9c34a41865ec89ffbf3d3033f9a503bccc Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 30 May 2012 07:43:34 +0000
Subject: netdevice: Update netif_dbg for CONFIG_DYNAMIC_DEBUG

Make netif_dbg use dynamic debugging whenever
CONFIG_DYNAMIC_DEBUG is enabled.

commit b558c96ffa53
("dynamic_debug: make dynamic-debug supersede DEBUG ccflag")
missed updating the netif_dbg variant.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e7fd468f7126..d94cb1431519 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2795,15 +2795,15 @@ do {								\
 #define netif_info(priv, type, dev, fmt, args...)		\
 	netif_level(info, priv, type, dev, fmt, ##args)
 
-#if defined(DEBUG)
-#define netif_dbg(priv, type, dev, format, args...)		\
-	netif_printk(priv, type, KERN_DEBUG, dev, format, ##args)
-#elif defined(CONFIG_DYNAMIC_DEBUG)
+#if defined(CONFIG_DYNAMIC_DEBUG)
 #define netif_dbg(priv, type, netdev, format, args...)		\
 do {								\
 	if (netif_msg_##type(priv))				\
 		dynamic_netdev_dbg(netdev, format, ##args);	\
 } while (0)
+#elif defined(DEBUG)
+#define netif_dbg(priv, type, dev, format, args...)		\
+	netif_printk(priv, type, KERN_DEBUG, dev, format, ##args)
 #else
 #define netif_dbg(priv, type, dev, format, args...)			\
 ({									\
-- 
cgit 


From bb8ac181a5cf50458a0d83b4460790badc9fdc16 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 19 May 2012 10:25:23 -0400
Subject: bury __kernel_nlink_t, make internal nlink_t consistent

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/alpha/include/asm/posix_types.h    | 3 ---
 arch/arm/include/asm/posix_types.h      | 3 ---
 arch/avr32/include/asm/posix_types.h    | 3 ---
 arch/blackfin/include/asm/posix_types.h | 3 ---
 arch/cris/include/asm/posix_types.h     | 3 ---
 arch/frv/include/asm/posix_types.h      | 3 ---
 arch/h8300/include/asm/posix_types.h    | 3 ---
 arch/ia64/include/asm/posix_types.h     | 3 ---
 arch/m32r/include/asm/posix_types.h     | 3 ---
 arch/m68k/include/asm/posix_types.h     | 3 ---
 arch/mips/include/asm/posix_types.h     | 5 -----
 arch/mn10300/include/asm/posix_types.h  | 3 ---
 arch/parisc/include/asm/posix_types.h   | 3 ---
 arch/powerpc/include/asm/posix_types.h  | 3 ---
 arch/s390/include/asm/posix_types.h     | 3 ---
 arch/sh/include/asm/posix_types_32.h    | 2 --
 arch/sh/include/asm/posix_types_64.h    | 2 --
 arch/sparc/include/asm/posix_types.h    | 5 -----
 arch/tile/include/asm/compat.h          | 1 -
 arch/x86/include/asm/posix_types_32.h   | 3 ---
 include/asm-generic/posix_types.h       | 4 ----
 include/linux/types.h                   | 2 +-
 22 files changed, 1 insertion(+), 65 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/posix_types.h b/arch/alpha/include/asm/posix_types.h
index 24779fc95994..5a8a48320efe 100644
--- a/arch/alpha/include/asm/posix_types.h
+++ b/arch/alpha/include/asm/posix_types.h
@@ -10,9 +10,6 @@
 typedef unsigned int	__kernel_ino_t;
 #define __kernel_ino_t __kernel_ino_t
 
-typedef unsigned int	__kernel_nlink_t;
-#define __kernel_nlink_t __kernel_nlink_t
-
 typedef unsigned long	__kernel_sigset_t;	/* at least 32 bits */
 
 #include <asm-generic/posix_types.h>
diff --git a/arch/arm/include/asm/posix_types.h b/arch/arm/include/asm/posix_types.h
index efdf99045d87..d2de9cbbcd9b 100644
--- a/arch/arm/include/asm/posix_types.h
+++ b/arch/arm/include/asm/posix_types.h
@@ -22,9 +22,6 @@
 typedef unsigned short		__kernel_mode_t;
 #define __kernel_mode_t __kernel_mode_t
 
-typedef unsigned short		__kernel_nlink_t;
-#define __kernel_nlink_t __kernel_nlink_t
-
 typedef unsigned short		__kernel_ipc_pid_t;
 #define __kernel_ipc_pid_t __kernel_ipc_pid_t
 
diff --git a/arch/avr32/include/asm/posix_types.h b/arch/avr32/include/asm/posix_types.h
index 74667bfc88cc..9ba9e749b3f3 100644
--- a/arch/avr32/include/asm/posix_types.h
+++ b/arch/avr32/include/asm/posix_types.h
@@ -17,9 +17,6 @@
 typedef unsigned short  __kernel_mode_t;
 #define __kernel_mode_t __kernel_mode_t
 
-typedef unsigned short  __kernel_nlink_t;
-#define __kernel_nlink_t __kernel_nlink_t
-
 typedef unsigned short  __kernel_ipc_pid_t;
 #define __kernel_ipc_pid_t __kernel_ipc_pid_t
 
diff --git a/arch/blackfin/include/asm/posix_types.h b/arch/blackfin/include/asm/posix_types.h
index 41bc1875c4d7..1bd3436db6a7 100644
--- a/arch/blackfin/include/asm/posix_types.h
+++ b/arch/blackfin/include/asm/posix_types.h
@@ -10,9 +10,6 @@
 typedef unsigned short __kernel_mode_t;
 #define __kernel_mode_t __kernel_mode_t
 
-typedef unsigned short __kernel_nlink_t;
-#define __kernel_nlink_t __kernel_nlink_t
-
 typedef unsigned int __kernel_ipc_pid_t;
 #define __kernel_ipc_pid_t __kernel_ipc_pid_t
 
diff --git a/arch/cris/include/asm/posix_types.h b/arch/cris/include/asm/posix_types.h
index 234891c74e2b..ce4e51793151 100644
--- a/arch/cris/include/asm/posix_types.h
+++ b/arch/cris/include/asm/posix_types.h
@@ -15,9 +15,6 @@
 typedef unsigned short	__kernel_mode_t;
 #define __kernel_mode_t __kernel_mode_t
 
-typedef unsigned short	__kernel_nlink_t;
-#define __kernel_nlink_t __kernel_nlink_t
-
 typedef unsigned short  __kernel_ipc_pid_t;
 #define __kernel_ipc_pid_t __kernel_ipc_pid_t
 
diff --git a/arch/frv/include/asm/posix_types.h b/arch/frv/include/asm/posix_types.h
index 3f34cb45fbb3..fe512af74a5a 100644
--- a/arch/frv/include/asm/posix_types.h
+++ b/arch/frv/include/asm/posix_types.h
@@ -10,9 +10,6 @@
 typedef unsigned short	__kernel_mode_t;
 #define __kernel_mode_t __kernel_mode_t
 
-typedef unsigned short	__kernel_nlink_t;
-#define __kernel_nlink_t __kernel_nlink_t
-
 typedef unsigned short	__kernel_ipc_pid_t;
 #define __kernel_ipc_pid_t __kernel_ipc_pid_t
 
diff --git a/arch/h8300/include/asm/posix_types.h b/arch/h8300/include/asm/posix_types.h
index bc4c34efb1ad..91e62ba4c7b0 100644
--- a/arch/h8300/include/asm/posix_types.h
+++ b/arch/h8300/include/asm/posix_types.h
@@ -10,9 +10,6 @@
 typedef unsigned short	__kernel_mode_t;
 #define __kernel_mode_t __kernel_mode_t
 
-typedef unsigned short	__kernel_nlink_t;
-#define __kernel_nlink_t __kernel_nlink_t
-
 typedef unsigned short	__kernel_ipc_pid_t;
 #define __kernel_ipc_pid_t __kernel_ipc_pid_t
 
diff --git a/arch/ia64/include/asm/posix_types.h b/arch/ia64/include/asm/posix_types.h
index 7323ab9467eb..99ee1d6510cf 100644
--- a/arch/ia64/include/asm/posix_types.h
+++ b/arch/ia64/include/asm/posix_types.h
@@ -1,9 +1,6 @@
 #ifndef _ASM_IA64_POSIX_TYPES_H
 #define _ASM_IA64_POSIX_TYPES_H
 
-typedef unsigned int	__kernel_nlink_t;
-#define __kernel_nlink_t __kernel_nlink_t
-
 typedef unsigned long	__kernel_sigset_t;	/* at least 32 bits */
 
 #include <asm-generic/posix_types.h>
diff --git a/arch/m32r/include/asm/posix_types.h b/arch/m32r/include/asm/posix_types.h
index 0195850e1f88..236de26a409b 100644
--- a/arch/m32r/include/asm/posix_types.h
+++ b/arch/m32r/include/asm/posix_types.h
@@ -10,9 +10,6 @@
 typedef unsigned short	__kernel_mode_t;
 #define __kernel_mode_t __kernel_mode_t
 
-typedef unsigned short	__kernel_nlink_t;
-#define __kernel_nlink_t __kernel_nlink_t
-
 typedef unsigned short	__kernel_ipc_pid_t;
 #define __kernel_ipc_pid_t __kernel_ipc_pid_t
 
diff --git a/arch/m68k/include/asm/posix_types.h b/arch/m68k/include/asm/posix_types.h
index 6373093be72b..cf4dbf70fdc7 100644
--- a/arch/m68k/include/asm/posix_types.h
+++ b/arch/m68k/include/asm/posix_types.h
@@ -10,9 +10,6 @@
 typedef unsigned short	__kernel_mode_t;
 #define __kernel_mode_t __kernel_mode_t
 
-typedef unsigned short	__kernel_nlink_t;
-#define __kernel_nlink_t __kernel_nlink_t
-
 typedef unsigned short	__kernel_ipc_pid_t;
 #define __kernel_ipc_pid_t __kernel_ipc_pid_t
 
diff --git a/arch/mips/include/asm/posix_types.h b/arch/mips/include/asm/posix_types.h
index e0308dcca135..fa03ec3fbf89 100644
--- a/arch/mips/include/asm/posix_types.h
+++ b/arch/mips/include/asm/posix_types.h
@@ -17,11 +17,6 @@
  * assume GCC is being used.
  */
 
-#if (_MIPS_SZLONG == 64)
-typedef unsigned int	__kernel_nlink_t;
-#define __kernel_nlink_t __kernel_nlink_t
-#endif
-
 typedef long		__kernel_daddr_t;
 #define __kernel_daddr_t __kernel_daddr_t
 
diff --git a/arch/mn10300/include/asm/posix_types.h b/arch/mn10300/include/asm/posix_types.h
index ab506181ec31..d31eeea480cf 100644
--- a/arch/mn10300/include/asm/posix_types.h
+++ b/arch/mn10300/include/asm/posix_types.h
@@ -20,9 +20,6 @@
 typedef unsigned short	__kernel_mode_t;
 #define __kernel_mode_t __kernel_mode_t
 
-typedef unsigned short	__kernel_nlink_t;
-#define __kernel_nlink_t __kernel_nlink_t
-
 typedef unsigned short	__kernel_ipc_pid_t;
 #define __kernel_ipc_pid_t __kernel_ipc_pid_t
 
diff --git a/arch/parisc/include/asm/posix_types.h b/arch/parisc/include/asm/posix_types.h
index 5212b0357daf..b9344256f76b 100644
--- a/arch/parisc/include/asm/posix_types.h
+++ b/arch/parisc/include/asm/posix_types.h
@@ -10,9 +10,6 @@
 typedef unsigned short		__kernel_mode_t;
 #define __kernel_mode_t __kernel_mode_t
 
-typedef unsigned short		__kernel_nlink_t;
-#define __kernel_nlink_t __kernel_nlink_t
-
 typedef unsigned short		__kernel_ipc_pid_t;
 #define __kernel_ipc_pid_t __kernel_ipc_pid_t
 
diff --git a/arch/powerpc/include/asm/posix_types.h b/arch/powerpc/include/asm/posix_types.h
index f1393252bbda..2958c5b97b2d 100644
--- a/arch/powerpc/include/asm/posix_types.h
+++ b/arch/powerpc/include/asm/posix_types.h
@@ -16,9 +16,6 @@ typedef int		__kernel_ssize_t;
 typedef long		__kernel_ptrdiff_t;
 #define __kernel_size_t __kernel_size_t
 
-typedef unsigned short	__kernel_nlink_t;
-#define __kernel_nlink_t __kernel_nlink_t
-
 typedef short		__kernel_ipc_pid_t;
 #define __kernel_ipc_pid_t __kernel_ipc_pid_t
 #endif
diff --git a/arch/s390/include/asm/posix_types.h b/arch/s390/include/asm/posix_types.h
index edf8527ff08d..7be104c0f192 100644
--- a/arch/s390/include/asm/posix_types.h
+++ b/arch/s390/include/asm/posix_types.h
@@ -24,7 +24,6 @@ typedef unsigned short	__kernel_old_dev_t;
 
 typedef unsigned long   __kernel_ino_t;
 typedef unsigned short  __kernel_mode_t;
-typedef unsigned short  __kernel_nlink_t;
 typedef unsigned short  __kernel_ipc_pid_t;
 typedef unsigned short  __kernel_uid_t;
 typedef unsigned short  __kernel_gid_t;
@@ -35,7 +34,6 @@ typedef int             __kernel_ptrdiff_t;
 
 typedef unsigned int    __kernel_ino_t;
 typedef unsigned int    __kernel_mode_t;
-typedef unsigned int    __kernel_nlink_t;
 typedef int             __kernel_ipc_pid_t;
 typedef unsigned int    __kernel_uid_t;
 typedef unsigned int    __kernel_gid_t;
@@ -47,7 +45,6 @@ typedef unsigned long   __kernel_sigset_t;      /* at least 32 bits */
 
 #define __kernel_ino_t  __kernel_ino_t
 #define __kernel_mode_t __kernel_mode_t
-#define __kernel_nlink_t __kernel_nlink_t
 #define __kernel_ipc_pid_t __kernel_ipc_pid_t
 #define __kernel_uid_t __kernel_uid_t
 #define __kernel_gid_t __kernel_gid_t
diff --git a/arch/sh/include/asm/posix_types_32.h b/arch/sh/include/asm/posix_types_32.h
index abda58467ece..ba0bdc423b07 100644
--- a/arch/sh/include/asm/posix_types_32.h
+++ b/arch/sh/include/asm/posix_types_32.h
@@ -3,8 +3,6 @@
 
 typedef unsigned short	__kernel_mode_t;
 #define __kernel_mode_t __kernel_mode_t
-typedef unsigned short	__kernel_nlink_t;
-#define __kernel_nlink_t __kernel_nlink_t
 typedef unsigned short	__kernel_ipc_pid_t;
 #define __kernel_ipc_pid_t __kernel_ipc_pid_t
 typedef unsigned short	__kernel_uid_t;
diff --git a/arch/sh/include/asm/posix_types_64.h b/arch/sh/include/asm/posix_types_64.h
index fcda07b4a616..244f7e950e17 100644
--- a/arch/sh/include/asm/posix_types_64.h
+++ b/arch/sh/include/asm/posix_types_64.h
@@ -3,8 +3,6 @@
 
 typedef unsigned short	__kernel_mode_t;
 #define __kernel_mode_t __kernel_mode_t
-typedef unsigned short	__kernel_nlink_t;
-#define __kernel_nlink_t __kernel_nlink_t
 typedef unsigned short	__kernel_ipc_pid_t;
 #define __kernel_ipc_pid_t __kernel_ipc_pid_t
 typedef unsigned short	__kernel_uid_t;
diff --git a/arch/sparc/include/asm/posix_types.h b/arch/sparc/include/asm/posix_types.h
index 3070f25ae90a..156220ed99eb 100644
--- a/arch/sparc/include/asm/posix_types.h
+++ b/arch/sparc/include/asm/posix_types.h
@@ -9,8 +9,6 @@
 
 #if defined(__sparc__) && defined(__arch64__)
 /* sparc 64 bit */
-typedef unsigned int           __kernel_nlink_t;
-#define __kernel_nlink_t __kernel_nlink_t
 
 typedef unsigned short 	       __kernel_old_uid_t;
 typedef unsigned short         __kernel_old_gid_t;
@@ -38,9 +36,6 @@ typedef unsigned short         __kernel_gid_t;
 typedef unsigned short         __kernel_mode_t;
 #define __kernel_mode_t __kernel_mode_t
 
-typedef short                  __kernel_nlink_t;
-#define __kernel_nlink_t __kernel_nlink_t
-
 typedef long                   __kernel_daddr_t;
 #define __kernel_daddr_t __kernel_daddr_t
 
diff --git a/arch/tile/include/asm/compat.h b/arch/tile/include/asm/compat.h
index 69adc08d36a5..6e74450ff0a1 100644
--- a/arch/tile/include/asm/compat.h
+++ b/arch/tile/include/asm/compat.h
@@ -44,7 +44,6 @@ typedef __kernel_uid32_t __compat_gid32_t;
 typedef __kernel_mode_t compat_mode_t;
 typedef __kernel_dev_t compat_dev_t;
 typedef __kernel_loff_t compat_loff_t;
-typedef __kernel_nlink_t compat_nlink_t;
 typedef __kernel_ipc_pid_t compat_ipc_pid_t;
 typedef __kernel_daddr_t compat_daddr_t;
 typedef __kernel_fsid_t	compat_fsid_t;
diff --git a/arch/x86/include/asm/posix_types_32.h b/arch/x86/include/asm/posix_types_32.h
index 99f262e04b91..8e525059e7d8 100644
--- a/arch/x86/include/asm/posix_types_32.h
+++ b/arch/x86/include/asm/posix_types_32.h
@@ -10,9 +10,6 @@
 typedef unsigned short	__kernel_mode_t;
 #define __kernel_mode_t __kernel_mode_t
 
-typedef unsigned short	__kernel_nlink_t;
-#define __kernel_nlink_t __kernel_nlink_t
-
 typedef unsigned short	__kernel_ipc_pid_t;
 #define __kernel_ipc_pid_t __kernel_ipc_pid_t
 
diff --git a/include/asm-generic/posix_types.h b/include/asm-generic/posix_types.h
index 91d44bd4dde3..fe74fccf18db 100644
--- a/include/asm-generic/posix_types.h
+++ b/include/asm-generic/posix_types.h
@@ -23,10 +23,6 @@ typedef __kernel_ulong_t __kernel_ino_t;
 typedef unsigned int	__kernel_mode_t;
 #endif
 
-#ifndef __kernel_nlink_t
-typedef __kernel_ulong_t __kernel_nlink_t;
-#endif
-
 #ifndef __kernel_pid_t
 typedef int		__kernel_pid_t;
 #endif
diff --git a/include/linux/types.h b/include/linux/types.h
index 7f480db60231..9c1bd539ea70 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -25,7 +25,7 @@ typedef __kernel_dev_t		dev_t;
 typedef __kernel_ino_t		ino_t;
 typedef __kernel_mode_t		mode_t;
 typedef unsigned short		umode_t;
-typedef __kernel_nlink_t	nlink_t;
+typedef __u32			nlink_t;
 typedef __kernel_off_t		off_t;
 typedef __kernel_pid_t		pid_t;
 typedef __kernel_daddr_t	daddr_t;
-- 
cgit 


From a4f9a9a635e4d54ac93df4b861ed8792e17bd4a2 Mon Sep 17 00:00:00 2001
From: Naohiro Aota <naota@elisp.net>
Date: Tue, 29 May 2012 11:02:24 -0700
Subject: fsnotify: handle subfiles' perm events

Recently I'm working on fanotify and found the following strange
behaviors.

I wrote a program to set fanotify_mark on "/tmp/block" and FAN_DENY
all events notified.

fanotify_mask = FAN_ALL_EVENTS | FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD:
$ cd /tmp/block; cat foo
cat: foo: Operation not permitted

Operation on the file is blocked as expected.

But,

fanotify_mask = FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD:
$ cd /tmp/block; cat foo
aaa

It's not blocked anymore.  This is confusing behavior.  Also reading
commit "fsnotify: call fsnotify_parent in perm events", it seems like
fsnotify should handle subfiles' perm events as well as the other notify
events.

With this patch, regardless of FAN_ALL_EVENTS set or not:
$ cd /tmp/block; cat foo
cat: foo: Operation not permitted

Operation on the file is now blocked properly.

FS_OPEN_PERM and FS_ACCESS_PERM are not listed on FS_EVENTS_POSS_ON_CHILD.
 Due to fsnotify_inode_watches_children() check, if you only specify only
these events as fsnotify_mask, you don't get subfiles' perm events
notified.

This patch add the events to FS_EVENTS_POSS_ON_CHILD to get them notified
even if only these events are specified to fsnotify_mask.

Signed-off-by: Naohiro Aota <naota@elisp.net>
Cc: Eric Paris <eparis@redhat.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fsnotify_backend.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 91d0e0a34ef3..63d966d5c2ea 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -60,7 +60,7 @@
 #define FS_EVENTS_POSS_ON_CHILD   (FS_ACCESS | FS_MODIFY | FS_ATTRIB |\
 				   FS_CLOSE_WRITE | FS_CLOSE_NOWRITE | FS_OPEN |\
 				   FS_MOVED_FROM | FS_MOVED_TO | FS_CREATE |\
-				   FS_DELETE)
+				   FS_DELETE | FS_OPEN_PERM | FS_ACCESS_PERM)
 
 #define FS_MOVE			(FS_MOVED_FROM | FS_MOVED_TO)
 
-- 
cgit 


From 114067b69e7b2c691faace0e33db2f04096f668d Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung.kim@lge.com>
Date: Thu, 31 May 2012 14:43:27 +0900
Subject: perf tools: Check if callchain is corrupted

We faced segmentation fault on perf top -G at very high sampling rate
due to a corrupted callchain. While the root cause was not revealed (I
failed to figure it out), this patch tries to protect us from the
segfault on such cases.

Reported-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Namhyung Kim <namhyung.kim@lge.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Sunjin Yang <fan4326@gmail.com>
Link: http://lkml.kernel.org/r/1338443007-24857-2-git-send-email-namhyung.kim@lge.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 include/linux/perf_event.h |  4 ++--
 tools/perf/util/session.c  | 14 +++++++++++++-
 2 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f32578634d9d..1817d4015e5f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -555,6 +555,8 @@ enum perf_event_type {
 	PERF_RECORD_MAX,			/* non-ABI */
 };
 
+#define PERF_MAX_STACK_DEPTH		255
+
 enum perf_callchain_context {
 	PERF_CONTEXT_HV			= (__u64)-32,
 	PERF_CONTEXT_KERNEL		= (__u64)-128,
@@ -609,8 +611,6 @@ struct perf_guest_info_callbacks {
 #include <linux/sysfs.h>
 #include <asm/local.h>
 
-#define PERF_MAX_STACK_DEPTH		255
-
 struct perf_callchain_entry {
 	__u64				nr;
 	__u64				ip[PERF_MAX_STACK_DEPTH];
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 3b6f8e460a31..04d1e33f4592 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -300,6 +300,11 @@ int machine__resolve_callchain(struct machine *self,
 
 	callchain_cursor_reset(&callchain_cursor);
 
+	if (chain->nr > PERF_MAX_STACK_DEPTH) {
+		pr_warning("corrupted callchain. skipping...\n");
+		return 0;
+	}
+
 	for (i = 0; i < chain->nr; i++) {
 		u64 ip;
 		struct addr_location al;
@@ -318,7 +323,14 @@ int machine__resolve_callchain(struct machine *self,
 			case PERF_CONTEXT_USER:
 				cpumode = PERF_RECORD_MISC_USER;	break;
 			default:
-				break;
+				pr_debug("invalid callchain context: "
+					 "%"PRId64"\n", (s64) ip);
+				/*
+				 * It seems the callchain is corrupted.
+				 * Discard all.
+				 */
+				callchain_cursor_reset(&callchain_cursor);
+				return 0;
 			}
 			continue;
 		}
-- 
cgit 


From d007794a182bc072a7b7479909dbd0d67ba341be Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 30 May 2012 13:11:37 -0400
Subject: split cap_mmap_addr() out of cap_file_mmap()

... switch callers.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/security.h   |  3 ++-
 security/apparmor/lsm.c    |  2 +-
 security/commoncap.c       | 32 +++++++++++++++++++++++---------
 security/selinux/hooks.c   |  2 +-
 security/smack/smack_lsm.c |  2 +-
 5 files changed, 28 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index ab0e091ce5fa..4ad59c9fa731 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -86,6 +86,7 @@ extern int cap_inode_setxattr(struct dentry *dentry, const char *name,
 extern int cap_inode_removexattr(struct dentry *dentry, const char *name);
 extern int cap_inode_need_killpriv(struct dentry *dentry);
 extern int cap_inode_killpriv(struct dentry *dentry);
+extern int cap_mmap_addr(unsigned long addr);
 extern int cap_file_mmap(struct file *file, unsigned long reqprot,
 			 unsigned long prot, unsigned long flags,
 			 unsigned long addr, unsigned long addr_only);
@@ -2187,7 +2188,7 @@ static inline int security_file_mmap(struct file *file, unsigned long reqprot,
 				     unsigned long addr,
 				     unsigned long addr_only)
 {
-	return cap_file_mmap(file, reqprot, prot, flags, addr, addr_only);
+	return cap_mmap_addr(addr);
 }
 
 static inline int security_file_mprotect(struct vm_area_struct *vma,
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 032daab449b0..8430d8937afb 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -497,7 +497,7 @@ static int apparmor_file_mmap(struct file *file, unsigned long reqprot,
 	int rc = 0;
 
 	/* do DAC check */
-	rc = cap_file_mmap(file, reqprot, prot, flags, addr, addr_only);
+	rc = cap_mmap_addr(addr);
 	if (rc || addr_only)
 		return rc;
 
diff --git a/security/commoncap.c b/security/commoncap.c
index e771cb1b2d79..ebac3618896e 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -958,22 +958,15 @@ int cap_vm_enough_memory(struct mm_struct *mm, long pages)
 }
 
 /*
- * cap_file_mmap - check if able to map given addr
- * @file: unused
- * @reqprot: unused
- * @prot: unused
- * @flags: unused
+ * cap_mmap_addr - check if able to map given addr
  * @addr: address attempting to be mapped
- * @addr_only: unused
  *
  * If the process is attempting to map memory below dac_mmap_min_addr they need
  * CAP_SYS_RAWIO.  The other parameters to this function are unused by the
  * capability security module.  Returns 0 if this mapping should be allowed
  * -EPERM if not.
  */
-int cap_file_mmap(struct file *file, unsigned long reqprot,
-		  unsigned long prot, unsigned long flags,
-		  unsigned long addr, unsigned long addr_only)
+int cap_mmap_addr(unsigned long addr)
 {
 	int ret = 0;
 
@@ -986,3 +979,24 @@ int cap_file_mmap(struct file *file, unsigned long reqprot,
 	}
 	return ret;
 }
+
+/*
+ * cap_file_mmap - check if able to map given addr
+ * @file: unused
+ * @reqprot: unused
+ * @prot: unused
+ * @flags: unused
+ * @addr: address attempting to be mapped
+ * @addr_only: unused
+ *
+ * If the process is attempting to map memory below dac_mmap_min_addr they need
+ * CAP_SYS_RAWIO.  The other parameters to this function are unused by the
+ * capability security module.  Returns 0 if this mapping should be allowed
+ * -EPERM if not.
+ */
+int cap_file_mmap(struct file *file, unsigned long reqprot,
+		  unsigned long prot, unsigned long flags,
+		  unsigned long addr, unsigned long addr_only)
+{
+	return cap_mmap_addr(addr);
+}
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index fa2341b68331..25c125eaa3d8 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3104,7 +3104,7 @@ static int selinux_file_mmap(struct file *file, unsigned long reqprot,
 	}
 
 	/* do DAC check on address space usage */
-	rc = cap_file_mmap(file, reqprot, prot, flags, addr, addr_only);
+	rc = cap_mmap_addr(addr);
 	if (rc || addr_only)
 		return rc;
 
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index d583c0545808..a62197718768 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -1199,7 +1199,7 @@ static int smack_file_mmap(struct file *file,
 	int rc;
 
 	/* do DAC check on address space usage */
-	rc = cap_file_mmap(file, reqprot, prot, flags, addr, addr_only);
+	rc = cap_mmap_addr(addr);
 	if (rc || addr_only)
 		return rc;
 
-- 
cgit 


From e5467859f7f79b69fc49004403009dfdba3bec53 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 30 May 2012 13:30:51 -0400
Subject: split ->file_mmap() into ->mmap_addr()/->mmap_file()

... i.e. file-dependent and address-dependent checks.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/exec.c                  |  4 ----
 include/linux/security.h   | 36 ++++++++++++++++++++----------------
 mm/mmap.c                  | 12 ++++++++----
 mm/mremap.c                |  4 ++--
 mm/nommu.c                 |  5 ++++-
 security/apparmor/lsm.c    | 15 ++++-----------
 security/capability.c      |  3 ++-
 security/commoncap.c       | 21 +++------------------
 security/security.c        | 12 ++++++++----
 security/selinux/hooks.c   | 15 ++++++++-------
 security/smack/smack_lsm.c | 15 +++++----------
 11 files changed, 64 insertions(+), 78 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index 52c9e2ff6e6b..a79786a8d2c8 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -280,10 +280,6 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
 	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
 	INIT_LIST_HEAD(&vma->anon_vma_chain);
 
-	err = security_file_mmap(NULL, 0, 0, 0, vma->vm_start, 1);
-	if (err)
-		goto err;
-
 	err = insert_vm_struct(mm, vma);
 	if (err)
 		goto err;
diff --git a/include/linux/security.h b/include/linux/security.h
index 4ad59c9fa731..f1bae0963ddc 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -87,9 +87,8 @@ extern int cap_inode_removexattr(struct dentry *dentry, const char *name);
 extern int cap_inode_need_killpriv(struct dentry *dentry);
 extern int cap_inode_killpriv(struct dentry *dentry);
 extern int cap_mmap_addr(unsigned long addr);
-extern int cap_file_mmap(struct file *file, unsigned long reqprot,
-			 unsigned long prot, unsigned long flags,
-			 unsigned long addr, unsigned long addr_only);
+extern int cap_mmap_file(struct file *file, unsigned long reqprot,
+			 unsigned long prot, unsigned long flags);
 extern int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags);
 extern int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
 			  unsigned long arg4, unsigned long arg5);
@@ -587,15 +586,17 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	simple integer value.  When @arg represents a user space pointer, it
  *	should never be used by the security module.
  *	Return 0 if permission is granted.
- * @file_mmap :
+ * @mmap_addr :
+ *	Check permissions for a mmap operation at @addr.
+ *	@addr contains virtual address that will be used for the operation.
+ *	Return 0 if permission is granted.
+ * @mmap_file :
  *	Check permissions for a mmap operation.  The @file may be NULL, e.g.
  *	if mapping anonymous memory.
  *	@file contains the file structure for file to map (may be NULL).
  *	@reqprot contains the protection requested by the application.
  *	@prot contains the protection that will be applied by the kernel.
  *	@flags contains the operational flags.
- *	@addr contains virtual address that will be used for the operation.
- *	@addr_only contains a boolean: 0 if file-backed VMA, otherwise 1.
  *	Return 0 if permission is granted.
  * @file_mprotect:
  *	Check permissions before changing memory access permissions.
@@ -1482,10 +1483,10 @@ struct security_operations {
 	void (*file_free_security) (struct file *file);
 	int (*file_ioctl) (struct file *file, unsigned int cmd,
 			   unsigned long arg);
-	int (*file_mmap) (struct file *file,
+	int (*mmap_addr) (unsigned long addr);
+	int (*mmap_file) (struct file *file,
 			  unsigned long reqprot, unsigned long prot,
-			  unsigned long flags, unsigned long addr,
-			  unsigned long addr_only);
+			  unsigned long flags);
 	int (*file_mprotect) (struct vm_area_struct *vma,
 			      unsigned long reqprot,
 			      unsigned long prot);
@@ -1744,9 +1745,9 @@ int security_file_permission(struct file *file, int mask);
 int security_file_alloc(struct file *file);
 void security_file_free(struct file *file);
 int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
-int security_file_mmap(struct file *file, unsigned long reqprot,
-			unsigned long prot, unsigned long flags,
-			unsigned long addr, unsigned long addr_only);
+int security_mmap_file(struct file *file, unsigned long reqprot,
+			unsigned long prot, unsigned long flags);
+int security_mmap_addr(unsigned long addr);
 int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
 			   unsigned long prot);
 int security_file_lock(struct file *file, unsigned int cmd);
@@ -2182,11 +2183,14 @@ static inline int security_file_ioctl(struct file *file, unsigned int cmd,
 	return 0;
 }
 
-static inline int security_file_mmap(struct file *file, unsigned long reqprot,
+static inline int security_mmap_file(struct file *file, unsigned long reqprot,
 				     unsigned long prot,
-				     unsigned long flags,
-				     unsigned long addr,
-				     unsigned long addr_only)
+				     unsigned long flags)
+{
+	return 0;
+}
+
+static inline int security_mmap_addr(unsigned long addr)
 {
 	return cap_mmap_addr(addr);
 }
diff --git a/mm/mmap.c b/mm/mmap.c
index 83c56624f1f6..49283da9a2ae 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1101,7 +1101,11 @@ static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 		}
 	}
 
-	error = security_file_mmap(file, reqprot, prot, flags, addr, 0);
+	error = security_mmap_addr(addr);
+	if (error)
+		return error;
+
+	error = security_mmap_file(file, reqprot, prot, flags);
 	if (error)
 		return error;
 
@@ -1817,7 +1821,7 @@ int expand_downwards(struct vm_area_struct *vma,
 		return -ENOMEM;
 
 	address &= PAGE_MASK;
-	error = security_file_mmap(NULL, 0, 0, 0, address, 1);
+	error = security_mmap_addr(address);
 	if (error)
 		return error;
 
@@ -2205,7 +2209,7 @@ static unsigned long do_brk(unsigned long addr, unsigned long len)
 	if (!len)
 		return addr;
 
-	error = security_file_mmap(NULL, 0, 0, 0, addr, 1);
+	error = security_mmap_addr(addr);
 	if (error)
 		return error;
 
@@ -2561,7 +2565,7 @@ int install_special_mapping(struct mm_struct *mm,
 	vma->vm_ops = &special_mapping_vmops;
 	vma->vm_private_data = pages;
 
-	ret = security_file_mmap(NULL, 0, 0, 0, vma->vm_start, 1);
+	ret = security_mmap_addr(vma->vm_start);
 	if (ret)
 		goto out;
 
diff --git a/mm/mremap.c b/mm/mremap.c
index 169c53b87749..ebf10892b63d 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -371,7 +371,7 @@ static unsigned long mremap_to(unsigned long addr,
 	if ((addr <= new_addr) && (addr+old_len) > new_addr)
 		goto out;
 
-	ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1);
+	ret = security_mmap_addr(new_addr);
 	if (ret)
 		goto out;
 
@@ -532,7 +532,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len,
 			goto out;
 		}
 
-		ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1);
+		ret = security_mmap_addr(new_addr);
 		if (ret)
 			goto out;
 		ret = move_vma(vma, addr, old_len, new_len, new_addr);
diff --git a/mm/nommu.c b/mm/nommu.c
index de6084e3a046..acfe419785db 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1047,7 +1047,10 @@ static int validate_mmap_request(struct file *file,
 	}
 
 	/* allow the security API to have its say */
-	ret = security_file_mmap(file, reqprot, prot, flags, addr, 0);
+	ret = security_mmap_addr(addr);
+	if (ret < 0)
+		return ret;
+	ret = security_mmap_file(file, reqprot, prot, flags);
 	if (ret < 0)
 		return ret;
 
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 8430d8937afb..8ea39aabe948 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -490,17 +490,9 @@ static int common_mmap(int op, struct file *file, unsigned long prot,
 	return common_file_perm(op, file, mask);
 }
 
-static int apparmor_file_mmap(struct file *file, unsigned long reqprot,
-			      unsigned long prot, unsigned long flags,
-			      unsigned long addr, unsigned long addr_only)
+static int apparmor_mmap_file(struct file *file, unsigned long reqprot,
+			      unsigned long prot, unsigned long flags)
 {
-	int rc = 0;
-
-	/* do DAC check */
-	rc = cap_mmap_addr(addr);
-	if (rc || addr_only)
-		return rc;
-
 	return common_mmap(OP_FMMAP, file, prot, flags);
 }
 
@@ -646,7 +638,8 @@ static struct security_operations apparmor_ops = {
 	.file_permission =		apparmor_file_permission,
 	.file_alloc_security =		apparmor_file_alloc_security,
 	.file_free_security =		apparmor_file_free_security,
-	.file_mmap =			apparmor_file_mmap,
+	.mmap_file =			apparmor_mmap_file,
+	.mmap_addr =			cap_mmap_addr,
 	.file_mprotect =		apparmor_file_mprotect,
 	.file_lock =			apparmor_file_lock,
 
diff --git a/security/capability.c b/security/capability.c
index fca889676c5e..61095df8b89a 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -949,7 +949,8 @@ void __init security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, file_alloc_security);
 	set_to_cap_if_null(ops, file_free_security);
 	set_to_cap_if_null(ops, file_ioctl);
-	set_to_cap_if_null(ops, file_mmap);
+	set_to_cap_if_null(ops, mmap_addr);
+	set_to_cap_if_null(ops, mmap_file);
 	set_to_cap_if_null(ops, file_mprotect);
 	set_to_cap_if_null(ops, file_lock);
 	set_to_cap_if_null(ops, file_fcntl);
diff --git a/security/commoncap.c b/security/commoncap.c
index ebac3618896e..6dbae4650abe 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -980,23 +980,8 @@ int cap_mmap_addr(unsigned long addr)
 	return ret;
 }
 
-/*
- * cap_file_mmap - check if able to map given addr
- * @file: unused
- * @reqprot: unused
- * @prot: unused
- * @flags: unused
- * @addr: address attempting to be mapped
- * @addr_only: unused
- *
- * If the process is attempting to map memory below dac_mmap_min_addr they need
- * CAP_SYS_RAWIO.  The other parameters to this function are unused by the
- * capability security module.  Returns 0 if this mapping should be allowed
- * -EPERM if not.
- */
-int cap_file_mmap(struct file *file, unsigned long reqprot,
-		  unsigned long prot, unsigned long flags,
-		  unsigned long addr, unsigned long addr_only)
+int cap_mmap_file(struct file *file, unsigned long reqprot,
+		  unsigned long prot, unsigned long flags)
 {
-	return cap_mmap_addr(addr);
+	return 0;
 }
diff --git a/security/security.c b/security/security.c
index 5497a57fba01..d91c66d3956b 100644
--- a/security/security.c
+++ b/security/security.c
@@ -657,18 +657,22 @@ int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	return security_ops->file_ioctl(file, cmd, arg);
 }
 
-int security_file_mmap(struct file *file, unsigned long reqprot,
-			unsigned long prot, unsigned long flags,
-			unsigned long addr, unsigned long addr_only)
+int security_mmap_file(struct file *file, unsigned long reqprot,
+			unsigned long prot, unsigned long flags)
 {
 	int ret;
 
-	ret = security_ops->file_mmap(file, reqprot, prot, flags, addr, addr_only);
+	ret = security_ops->mmap_file(file, reqprot, prot, flags);
 	if (ret)
 		return ret;
 	return ima_file_mmap(file, prot);
 }
 
+int security_mmap_addr(unsigned long addr)
+{
+	return security_ops->mmap_addr(addr);
+}
+
 int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
 			    unsigned long prot)
 {
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 25c125eaa3d8..372ec6502aa8 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3083,9 +3083,7 @@ error:
 	return rc;
 }
 
-static int selinux_file_mmap(struct file *file, unsigned long reqprot,
-			     unsigned long prot, unsigned long flags,
-			     unsigned long addr, unsigned long addr_only)
+static int selinux_mmap_addr(unsigned long addr)
 {
 	int rc = 0;
 	u32 sid = current_sid();
@@ -3104,10 +3102,12 @@ static int selinux_file_mmap(struct file *file, unsigned long reqprot,
 	}
 
 	/* do DAC check on address space usage */
-	rc = cap_mmap_addr(addr);
-	if (rc || addr_only)
-		return rc;
+	return cap_mmap_addr(addr);
+}
 
+static int selinux_mmap_file(struct file *file, unsigned long reqprot,
+			     unsigned long prot, unsigned long flags)
+{
 	if (selinux_checkreqprot)
 		prot = reqprot;
 
@@ -5570,7 +5570,8 @@ static struct security_operations selinux_ops = {
 	.file_alloc_security =		selinux_file_alloc_security,
 	.file_free_security =		selinux_file_free_security,
 	.file_ioctl =			selinux_file_ioctl,
-	.file_mmap =			selinux_file_mmap,
+	.mmap_file =			selinux_mmap_file,
+	.mmap_addr =			selinux_mmap_addr,
 	.file_mprotect =		selinux_file_mprotect,
 	.file_lock =			selinux_file_lock,
 	.file_fcntl =			selinux_file_fcntl,
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index a62197718768..ee0bb5735f35 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -1171,7 +1171,7 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd,
 }
 
 /**
- * smack_file_mmap :
+ * smack_mmap_file :
  * Check permissions for a mmap operation.  The @file may be NULL, e.g.
  * if mapping anonymous memory.
  * @file contains the file structure for file to map (may be NULL).
@@ -1180,10 +1180,9 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd,
  * @flags contains the operational flags.
  * Return 0 if permission is granted.
  */
-static int smack_file_mmap(struct file *file,
+static int smack_mmap_file(struct file *file,
 			   unsigned long reqprot, unsigned long prot,
-			   unsigned long flags, unsigned long addr,
-			   unsigned long addr_only)
+			   unsigned long flags)
 {
 	struct smack_known *skp;
 	struct smack_rule *srp;
@@ -1198,11 +1197,6 @@ static int smack_file_mmap(struct file *file,
 	int tmay;
 	int rc;
 
-	/* do DAC check on address space usage */
-	rc = cap_mmap_addr(addr);
-	if (rc || addr_only)
-		return rc;
-
 	if (file == NULL || file->f_dentry == NULL)
 		return 0;
 
@@ -3482,7 +3476,8 @@ struct security_operations smack_ops = {
 	.file_ioctl = 			smack_file_ioctl,
 	.file_lock = 			smack_file_lock,
 	.file_fcntl = 			smack_file_fcntl,
-	.file_mmap =			smack_file_mmap,
+	.mmap_file =			smack_mmap_file,
+	.mmap_addr =			cap_mmap_addr,
 	.file_set_fowner = 		smack_file_set_fowner,
 	.file_send_sigiotask = 		smack_file_send_sigiotask,
 	.file_receive = 		smack_file_receive,
-- 
cgit 


From 1d59d61f606547f0712aa6971f91f71154071c99 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 31 May 2012 12:22:33 -0400
Subject: NFS: Ensure that setattr and getattr wait for O_DIRECT write
 completion

Use the same mechanism as the block devices are using, but move the
helper functions from fs/direct-io.c into fs/inode.c to remove the
dependency on CONFIG_BLOCK.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Fred Isaman <iisaman@netapp.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/direct-io.c     | 44 --------------------------------------------
 fs/inode.c         | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/direct.c    | 15 ++++++++++++---
 fs/nfs/inode.c     |  5 ++++-
 fs/nfs/internal.h  |  4 ++++
 include/linux/fs.h |  9 +++------
 6 files changed, 70 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/fs/direct-io.c b/fs/direct-io.c
index f4aadd15b613..0c85fae37666 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -145,50 +145,6 @@ struct dio {
 
 static struct kmem_cache *dio_cache __read_mostly;
 
-static void __inode_dio_wait(struct inode *inode)
-{
-	wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
-	DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
-
-	do {
-		prepare_to_wait(wq, &q.wait, TASK_UNINTERRUPTIBLE);
-		if (atomic_read(&inode->i_dio_count))
-			schedule();
-	} while (atomic_read(&inode->i_dio_count));
-	finish_wait(wq, &q.wait);
-}
-
-/**
- * inode_dio_wait - wait for outstanding DIO requests to finish
- * @inode: inode to wait for
- *
- * Waits for all pending direct I/O requests to finish so that we can
- * proceed with a truncate or equivalent operation.
- *
- * Must be called under a lock that serializes taking new references
- * to i_dio_count, usually by inode->i_mutex.
- */
-void inode_dio_wait(struct inode *inode)
-{
-	if (atomic_read(&inode->i_dio_count))
-		__inode_dio_wait(inode);
-}
-EXPORT_SYMBOL(inode_dio_wait);
-
-/*
- * inode_dio_done - signal finish of a direct I/O requests
- * @inode: inode the direct I/O happens on
- *
- * This is called once we've finished processing a direct I/O request,
- * and is used to wake up callers waiting for direct I/O to be quiesced.
- */
-void inode_dio_done(struct inode *inode)
-{
-	if (atomic_dec_and_test(&inode->i_dio_count))
-		wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
-}
-EXPORT_SYMBOL(inode_dio_done);
-
 /*
  * How many pages are in the queue?
  */
diff --git a/fs/inode.c b/fs/inode.c
index 6bc8761cc333..c474c1d7062b 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1748,3 +1748,50 @@ bool inode_owner_or_capable(const struct inode *inode)
 	return false;
 }
 EXPORT_SYMBOL(inode_owner_or_capable);
+
+/*
+ * Direct i/o helper functions
+ */
+static void __inode_dio_wait(struct inode *inode)
+{
+	wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
+	DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
+
+	do {
+		prepare_to_wait(wq, &q.wait, TASK_UNINTERRUPTIBLE);
+		if (atomic_read(&inode->i_dio_count))
+			schedule();
+	} while (atomic_read(&inode->i_dio_count));
+	finish_wait(wq, &q.wait);
+}
+
+/**
+ * inode_dio_wait - wait for outstanding DIO requests to finish
+ * @inode: inode to wait for
+ *
+ * Waits for all pending direct I/O requests to finish so that we can
+ * proceed with a truncate or equivalent operation.
+ *
+ * Must be called under a lock that serializes taking new references
+ * to i_dio_count, usually by inode->i_mutex.
+ */
+void inode_dio_wait(struct inode *inode)
+{
+	if (atomic_read(&inode->i_dio_count))
+		__inode_dio_wait(inode);
+}
+EXPORT_SYMBOL(inode_dio_wait);
+
+/*
+ * inode_dio_done - signal finish of a direct I/O requests
+ * @inode: inode the direct I/O happens on
+ *
+ * This is called once we've finished processing a direct I/O request,
+ * and is used to wake up callers waiting for direct I/O to be quiesced.
+ */
+void inode_dio_done(struct inode *inode)
+{
+	if (atomic_dec_and_test(&inode->i_dio_count))
+		wake_up_bit(&inode->i_state, __I_DIO_WAKEUP);
+}
+EXPORT_SYMBOL(inode_dio_done);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 23d170bc44f4..ad2775d3e219 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -454,6 +454,12 @@ out:
 	return result;
 }
 
+static void nfs_inode_dio_write_done(struct inode *inode)
+{
+	nfs_zap_mapping(inode, inode->i_mapping);
+	inode_dio_done(inode);
+}
+
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 {
@@ -564,7 +570,7 @@ static void nfs_direct_write_schedule_work(struct work_struct *work)
 			nfs_direct_write_reschedule(dreq);
 			break;
 		default:
-			nfs_zap_mapping(dreq->inode, dreq->inode->i_mapping);
+			nfs_inode_dio_write_done(dreq->inode);
 			nfs_direct_complete(dreq);
 	}
 }
@@ -581,7 +587,7 @@ static void nfs_direct_write_schedule_work(struct work_struct *work)
 
 static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
 {
-	nfs_zap_mapping(inode, inode->i_mapping);
+	nfs_inode_dio_write_done(inode);
 	nfs_direct_complete(dreq);
 }
 #endif
@@ -766,14 +772,16 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 					       loff_t pos)
 {
 	struct nfs_pageio_descriptor desc;
+	struct inode *inode = dreq->inode;
 	ssize_t result = 0;
 	size_t requested_bytes = 0;
 	unsigned long seg;
 
-	nfs_pageio_init_write(&desc, dreq->inode, FLUSH_COND_STABLE,
+	nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE,
 			      &nfs_direct_write_completion_ops);
 	desc.pg_dreq = dreq;
 	get_dreq(dreq);
+	atomic_inc(&inode->i_dio_count);
 
 	for (seg = 0; seg < nr_segs; seg++) {
 		const struct iovec *vec = &iov[seg];
@@ -793,6 +801,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 	 * generic layer handle the completion.
 	 */
 	if (requested_bytes == 0) {
+		inode_dio_done(inode);
 		nfs_direct_req_release(dreq);
 		return result < 0 ? result : -EIO;
 	}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 2f6f78c4b42d..e605d695dbcb 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -418,8 +418,10 @@ nfs_setattr(struct dentry *dentry, struct iattr *attr)
 		return 0;
 
 	/* Write all dirty data */
-	if (S_ISREG(inode->i_mode))
+	if (S_ISREG(inode->i_mode)) {
+		nfs_inode_dio_wait(inode);
 		nfs_wb_all(inode);
+	}
 
 	fattr = nfs_alloc_fattr();
 	if (fattr == NULL)
@@ -503,6 +505,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 
 	/* Flush out writes to the server in order to update c/mtime.  */
 	if (S_ISREG(inode->i_mode)) {
+		nfs_inode_dio_wait(inode);
 		err = filemap_write_and_wait(inode->i_mapping);
 		if (err)
 			goto out;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 1848a7275592..18f99ef71343 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -369,6 +369,10 @@ extern int nfs_migrate_page(struct address_space *,
 /* direct.c */
 void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
 			      struct nfs_direct_req *dreq);
+static inline void nfs_inode_dio_wait(struct inode *inode)
+{
+	inode_dio_wait(inode);
+}
 
 /* nfs4proc.c */
 extern void __nfs4_read_done_cb(struct nfs_read_data *);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 038076b27ea4..598a5892ff2b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2453,8 +2453,6 @@ enum {
 };
 
 void dio_end_io(struct bio *bio, int error);
-void inode_dio_wait(struct inode *inode);
-void inode_dio_done(struct inode *inode);
 
 ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	struct block_device *bdev, const struct iovec *iov, loff_t offset,
@@ -2469,12 +2467,11 @@ static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb,
 				    offset, nr_segs, get_block, NULL, NULL,
 				    DIO_LOCKING | DIO_SKIP_HOLES);
 }
-#else
-static inline void inode_dio_wait(struct inode *inode)
-{
-}
 #endif
 
+void inode_dio_wait(struct inode *inode);
+void inode_dio_done(struct inode *inode);
+
 extern const struct file_operations generic_ro_fops;
 
 #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
-- 
cgit 


From 3fc929e2d693185aac2686e5e64e24eae10642a4 Mon Sep 17 00:00:00 2001
From: Marcel Apfelbaum <marcela@dev.mellanox.co.il>
Date: Wed, 30 May 2012 09:14:51 +0000
Subject: net/mlx4_core: Fix number of EQs used in ICM initialisation

In SRIOV mode, the number of EQs used when computing the total ICM size
was incorrect.

To fix this, we do the following:
1. We add a new structure to mlx4_dev, mlx4_phys_caps, to contain physical HCA
   capabilities.  The PPF uses the phys capabilities when it computes things
   like ICM size.

   The dev_caps structure will then contain the paravirtualized values, making
   bookkeeping much easier in SRIOV mode. We add a structure rather than a
   single parameter because there will be other fields in the phys_caps.

   The first field we add to the mlx4_phys_caps structure is num_phys_eqs.

2. In INIT_HCA, when running in SRIOV mode, the "log_num_eqs" parameter
   passed to the FW is the number of EQs per VF/PF; each function (PF or VF)
   has this number of EQs available.

   However, the total number of EQs which must be allowed for in the ICM is
   (1 << log_num_eqs) * (#VFs + #PFs).  Rather than compute this quantity,
   we allocate ICM space for 1024 EQs (which is the device maximum
   number of EQs, and which is the value we place in the mlx4_phys_caps structure).

   For INIT_HCA, however, we use the per-function number of EQs as described
   above.

Signed-off-by: Marcel Apfelbaum <marcela@dev.mellanox.co.il>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/main.c    | 17 +++++------------
 drivers/net/ethernet/mellanox/mlx4/profile.c |  9 ++++++---
 include/linux/mlx4/device.h                  |  6 ++++++
 3 files changed, 17 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 2e024a68fa81..2fe9fe5f7cfb 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -142,12 +142,6 @@ struct mlx4_port_config {
 	struct pci_dev *pdev;
 };
 
-static inline int mlx4_master_get_num_eqs(struct mlx4_dev *dev)
-{
-	return dev->caps.reserved_eqs +
-		MLX4_MFUNC_EQ_NUM * (dev->num_slaves + 1);
-}
-
 int mlx4_check_port_params(struct mlx4_dev *dev,
 			   enum mlx4_port_type *port_type)
 {
@@ -217,6 +211,7 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	}
 
 	dev->caps.num_ports	     = dev_cap->num_ports;
+	dev->phys_caps.num_phys_eqs  = MLX4_MAX_EQ_NUM;
 	for (i = 1; i <= dev->caps.num_ports; ++i) {
 		dev->caps.vl_cap[i]	    = dev_cap->max_vl[i];
 		dev->caps.ib_mtu_cap[i]	    = dev_cap->ib_mtu[i];
@@ -810,9 +805,8 @@ static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
 	if (err)
 		goto err_srq;
 
-	num_eqs = (mlx4_is_master(dev)) ?
-		roundup_pow_of_two(mlx4_master_get_num_eqs(dev)) :
-		dev->caps.num_eqs;
+	num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs :
+		  dev->caps.num_eqs;
 	err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table,
 				  cmpt_base +
 				  ((u64) (MLX4_CMPT_TYPE_EQ *
@@ -874,9 +868,8 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
 	}
 
 
-	num_eqs = (mlx4_is_master(dev)) ?
-		roundup_pow_of_two(mlx4_master_get_num_eqs(dev)) :
-		dev->caps.num_eqs;
+	num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs :
+		   dev->caps.num_eqs;
 	err = mlx4_init_icm_table(dev, &priv->eq_table.table,
 				  init_hca->eqc_base, dev_cap->eqc_entry_sz,
 				  num_eqs, num_eqs, 0, 0);
diff --git a/drivers/net/ethernet/mellanox/mlx4/profile.c b/drivers/net/ethernet/mellanox/mlx4/profile.c
index 06e5adeb76f7..b83bc928d52a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/profile.c
+++ b/drivers/net/ethernet/mellanox/mlx4/profile.c
@@ -126,7 +126,9 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
 	profile[MLX4_RES_AUXC].num    = request->num_qp;
 	profile[MLX4_RES_SRQ].num     = request->num_srq;
 	profile[MLX4_RES_CQ].num      = request->num_cq;
-	profile[MLX4_RES_EQ].num      = min_t(unsigned, dev_cap->max_eqs, MAX_MSIX);
+	profile[MLX4_RES_EQ].num      = mlx4_is_mfunc(dev) ?
+					dev->phys_caps.num_phys_eqs :
+					min_t(unsigned, dev_cap->max_eqs, MAX_MSIX);
 	profile[MLX4_RES_DMPT].num    = request->num_mpt;
 	profile[MLX4_RES_CMPT].num    = MLX4_NUM_CMPTS;
 	profile[MLX4_RES_MTT].num     = request->num_mtt * (1 << log_mtts_per_seg);
@@ -215,9 +217,10 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
 			init_hca->log_num_cqs = profile[i].log_num;
 			break;
 		case MLX4_RES_EQ:
-			dev->caps.num_eqs     = profile[i].num;
+			dev->caps.num_eqs     = roundup_pow_of_two(min_t(unsigned, dev_cap->max_eqs,
+									 MAX_MSIX));
 			init_hca->eqc_base    = profile[i].start;
-			init_hca->log_num_eqs = profile[i].log_num;
+			init_hca->log_num_eqs = ilog2(dev->caps.num_eqs);
 			break;
 		case MLX4_RES_DMPT:
 			dev->caps.num_mpts	= profile[i].num;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 6e27fa99e8b9..6a8f002b8ed3 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -64,6 +64,7 @@ enum {
 	MLX4_MAX_NUM_PF		= 16,
 	MLX4_MAX_NUM_VF		= 64,
 	MLX4_MFUNC_MAX		= 80,
+	MLX4_MAX_EQ_NUM		= 1024,
 	MLX4_MFUNC_EQ_NUM	= 4,
 	MLX4_MFUNC_MAX_EQES     = 8,
 	MLX4_MFUNC_EQE_MASK     = (MLX4_MFUNC_MAX_EQES - 1)
@@ -239,6 +240,10 @@ static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor)
 	return (major << 32) | (minor << 16) | subminor;
 }
 
+struct mlx4_phys_caps {
+	u32			num_phys_eqs;
+};
+
 struct mlx4_caps {
 	u64			fw_ver;
 	u32			function;
@@ -499,6 +504,7 @@ struct mlx4_dev {
 	unsigned long		flags;
 	unsigned long		num_slaves;
 	struct mlx4_caps	caps;
+	struct mlx4_phys_caps	phys_caps;
 	struct radix_tree_root	qp_table_tree;
 	u8			rev_id;
 	char			board_id[MLX4_BOARD_ID_LEN];
-- 
cgit 


From 9793f7c88937e7ac07305ab1af1a519225836823 Mon Sep 17 00:00:00 2001
From: Stanislav Kinsbursky <skinsbursky@parallels.com>
Date: Wed, 2 May 2012 16:08:38 +0400
Subject: SUNRPC: new svc_bind() routine introduced

This new routine is responsible for service registration in a specified
network context.

The idea is to separate service creation from per-net operations.

Note also: since registering service with svc_bind() can fail, the
service will be destroyed and during destruction it will try to
unregister itself from rpcbind. In this case unregistration has to be
skipped.

Signed-off-by: Stanislav Kinsbursky <skinsbursky@parallels.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/lockd/svc.c             |  6 ++++++
 fs/nfs/callback.c          |  8 ++++++++
 fs/nfsd/nfssvc.c           |  9 +++++++++
 include/linux/sunrpc/svc.h |  1 +
 net/sunrpc/rpcb_clnt.c     | 12 +++++++-----
 net/sunrpc/svc.c           | 19 ++++++++++---------
 6 files changed, 41 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 1ead0750cdbb..b7e92ed56885 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -324,6 +324,12 @@ int lockd_up(struct net *net)
 		goto out;
 	}
 
+	error = svc_bind(serv, net);
+	if (error < 0) {
+		printk(KERN_WARNING "lockd_up: bind service failed\n");
+		goto destroy_and_out;
+	}
+
 	error = make_socks(serv, net);
 	if (error < 0)
 		goto destroy_and_out;
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index eb95f5091c1a..26b38fb8102e 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -17,6 +17,7 @@
 #include <linux/kthread.h>
 #include <linux/sunrpc/svcauth_gss.h>
 #include <linux/sunrpc/bc_xprt.h>
+#include <linux/nsproxy.h>
 
 #include <net/inet_sock.h>
 
@@ -253,6 +254,7 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt)
 	char svc_name[12];
 	int ret = 0;
 	int minorversion_setup;
+	struct net *net = current->nsproxy->net_ns;
 
 	mutex_lock(&nfs_callback_mutex);
 	if (cb_info->users++ || cb_info->task != NULL) {
@@ -265,6 +267,12 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt)
 		goto out_err;
 	}
 
+	ret = svc_bind(serv, net);
+	if (ret < 0) {
+		printk(KERN_WARNING "NFS: bind callback service failed\n");
+		goto out_err;
+	}
+
 	minorversion_setup =  nfs_minorversion_callback_svc_setup(minorversion,
 					serv, xprt, &rqstp, &callback_svc);
 	if (!minorversion_setup) {
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index cb4d51d8cbdb..0762f3c9e0fb 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/fs_struct.h>
 #include <linux/swap.h>
+#include <linux/nsproxy.h>
 
 #include <linux/sunrpc/stats.h>
 #include <linux/sunrpc/svcsock.h>
@@ -330,6 +331,8 @@ static int nfsd_get_default_max_blksize(void)
 
 int nfsd_create_serv(void)
 {
+	int error;
+
 	WARN_ON(!mutex_is_locked(&nfsd_mutex));
 	if (nfsd_serv) {
 		svc_get(nfsd_serv);
@@ -343,6 +346,12 @@ int nfsd_create_serv(void)
 	if (nfsd_serv == NULL)
 		return -ENOMEM;
 
+	error = svc_bind(nfsd_serv, current->nsproxy->net_ns);
+	if (error < 0) {
+		svc_destroy(nfsd_serv);
+		return error;
+	}
+
 	set_max_drc();
 	do_gettimeofday(&nfssvc_boot);		/* record boot time */
 	return 0;
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 51b29ac45a8e..2b43e0214261 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -416,6 +416,7 @@ struct svc_procedure {
  */
 int svc_rpcb_setup(struct svc_serv *serv, struct net *net);
 void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net);
+int svc_bind(struct svc_serv *serv, struct net *net);
 struct svc_serv *svc_create(struct svc_program *, unsigned int,
 			    void (*shutdown)(struct svc_serv *, struct net *net));
 struct svc_rqst *svc_prepare_thread(struct svc_serv *serv,
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 78ac39fd9fe7..4c38b33ab8a8 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -180,14 +180,16 @@ void rpcb_put_local(struct net *net)
 	struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
 	struct rpc_clnt *clnt = sn->rpcb_local_clnt;
 	struct rpc_clnt *clnt4 = sn->rpcb_local_clnt4;
-	int shutdown;
+	int shutdown = 0;
 
 	spin_lock(&sn->rpcb_clnt_lock);
-	if (--sn->rpcb_users == 0) {
-		sn->rpcb_local_clnt = NULL;
-		sn->rpcb_local_clnt4 = NULL;
+	if (sn->rpcb_users) {
+		if (--sn->rpcb_users == 0) {
+			sn->rpcb_local_clnt = NULL;
+			sn->rpcb_local_clnt4 = NULL;
+		}
+		shutdown = !sn->rpcb_users;
 	}
-	shutdown = !sn->rpcb_users;
 	spin_unlock(&sn->rpcb_clnt_lock);
 
 	if (shutdown) {
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 4153846984ac..e6d542cee0f3 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -407,6 +407,14 @@ static int svc_uses_rpcbind(struct svc_serv *serv)
 	return 0;
 }
 
+int svc_bind(struct svc_serv *serv, struct net *net)
+{
+	if (!svc_uses_rpcbind(serv))
+		return 0;
+	return svc_rpcb_setup(serv, net);
+}
+EXPORT_SYMBOL_GPL(svc_bind);
+
 /*
  * Create an RPC service
  */
@@ -471,15 +479,8 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
 		spin_lock_init(&pool->sp_lock);
 	}
 
-	if (svc_uses_rpcbind(serv)) {
-		if (svc_rpcb_setup(serv, current->nsproxy->net_ns) < 0) {
-			kfree(serv->sv_pools);
-			kfree(serv);
-			return NULL;
-		}
-		if (!serv->sv_shutdown)
-			serv->sv_shutdown = svc_rpcb_cleanup;
-	}
+	if (svc_uses_rpcbind(serv) && (!serv->sv_shutdown))
+		serv->sv_shutdown = svc_rpcb_cleanup;
 
 	return serv;
 }
-- 
cgit 


From 03a4e1f6ddf25f48848e1bddcffc0ad489648331 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 14 May 2012 19:55:22 -0400
Subject: nfsd4: move principal name into svc_cred

Instead of keeping the principal name associated with a request in a
structure that's private to auth_gss and using an accessor function,
move it to svc_cred.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfs/callback.c                  |  2 +-
 fs/nfsd/nfs4callback.c             |  5 +++--
 fs/nfsd/nfs4state.c                | 34 +++++++++++++++++-----------------
 fs/nfsd/state.h                    |  1 -
 include/linux/sunrpc/svcauth.h     |  9 +++++++++
 include/linux/sunrpc/svcauth_gss.h |  1 -
 net/sunrpc/auth_gss/svcauth_gss.c  | 25 ++++++-------------------
 net/sunrpc/svcauth_unix.c          |  2 ++
 8 files changed, 38 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index cff39406f965..970659daa323 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -343,7 +343,7 @@ void nfs_callback_down(int minorversion)
 int
 check_gss_callback_principal(struct nfs_client *clp, struct svc_rqst *rqstp)
 {
-	char *p = svc_gss_principal(rqstp);
+	char *p = rqstp->rq_cred.cr_principal;
 
 	if (rqstp->rq_authop->flavour != RPC_AUTH_GSS)
 		return 1;
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index c8e9f637153a..a5fd6b982f27 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -650,9 +650,10 @@ static int setup_callback_client(struct nfs4_client *clp, struct nfs4_cb_conn *c
 	struct rpc_clnt *client;
 
 	if (clp->cl_minorversion == 0) {
-		if (!clp->cl_principal && (clp->cl_flavor >= RPC_AUTH_GSS_KRB5))
+		if (!clp->cl_cred.cr_principal &&
+				(clp->cl_flavor >= RPC_AUTH_GSS_KRB5))
 			return -EINVAL;
-		args.client_name = clp->cl_principal;
+		args.client_name = clp->cl_cred.cr_principal;
 		args.prognumber	= conn->cb_prog,
 		args.protocol = XPRT_TRANSPORT_TCP;
 		args.authflavor = clp->cl_flavor;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 5415550a63a9..37bafb290c11 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1087,9 +1087,7 @@ free_client(struct nfs4_client *clp)
 		list_del(&ses->se_perclnt);
 		nfsd4_put_session_locked(ses);
 	}
-	if (clp->cl_cred.cr_group_info)
-		put_group_info(clp->cl_cred.cr_group_info);
-	kfree(clp->cl_principal);
+	free_svc_cred(&clp->cl_cred);
 	kfree(clp->cl_name.data);
 	kfree(clp);
 }
@@ -1170,12 +1168,20 @@ static void copy_clid(struct nfs4_client *target, struct nfs4_client *source)
 	target->cl_clientid.cl_id = source->cl_clientid.cl_id; 
 }
 
-static void copy_cred(struct svc_cred *target, struct svc_cred *source)
+static int copy_cred(struct svc_cred *target, struct svc_cred *source)
 {
+	if (source->cr_principal) {
+		target->cr_principal =
+				kstrdup(source->cr_principal, GFP_KERNEL);
+		if (target->cr_principal == NULL)
+			return -ENOMEM;
+	} else
+		target->cr_principal = NULL;
 	target->cr_uid = source->cr_uid;
 	target->cr_gid = source->cr_gid;
 	target->cr_group_info = source->cr_group_info;
 	get_group_info(target->cr_group_info);
+	return 0;
 }
 
 static int same_name(const char *n1, const char *n2)
@@ -1242,25 +1248,20 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
 {
 	struct nfs4_client *clp;
 	struct sockaddr *sa = svc_addr(rqstp);
-	char *princ;
+	int ret;
 
 	clp = alloc_client(name);
 	if (clp == NULL)
 		return NULL;
 
 	INIT_LIST_HEAD(&clp->cl_sessions);
-
-	princ = svc_gss_principal(rqstp);
-	if (princ) {
-		clp->cl_principal = kstrdup(princ, GFP_KERNEL);
-		if (clp->cl_principal == NULL) {
-			spin_lock(&client_lock);
-			free_client(clp);
-			spin_unlock(&client_lock);
-			return NULL;
-		}
+	ret = copy_cred(&clp->cl_cred, &rqstp->rq_cred);
+	if (ret) {
+		spin_lock(&client_lock);
+		free_client(clp);
+		spin_unlock(&client_lock);
+		return NULL;
 	}
-
 	idr_init(&clp->cl_stateids);
 	memcpy(clp->cl_recdir, recdir, HEXDIR_LEN);
 	atomic_set(&clp->cl_refcount, 0);
@@ -1279,7 +1280,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
 	copy_verf(clp, verf);
 	rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa);
 	clp->cl_flavor = rqstp->rq_flavor;
-	copy_cred(&clp->cl_cred, &rqstp->rq_cred);
 	gen_confirm(clp);
 	clp->cl_cb_session = NULL;
 	return clp;
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 89ab137d379a..849091e16ea6 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -232,7 +232,6 @@ struct nfs4_client {
 	time_t                  cl_time;        /* time of last lease renewal */
 	struct sockaddr_storage	cl_addr; 	/* client ipaddress */
 	u32			cl_flavor;	/* setclientid pseudoflavor */
-	char			*cl_principal;	/* setclientid principal name */
 	struct svc_cred		cl_cred; 	/* setclientid principal */
 	clientid_t		cl_clientid;	/* generated by server */
 	nfs4_verifier		cl_confirm;	/* generated by server */
diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index 2c54683b91de..16fe477a96e0 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -15,13 +15,22 @@
 #include <linux/sunrpc/msg_prot.h>
 #include <linux/sunrpc/cache.h>
 #include <linux/hash.h>
+#include <linux/cred.h>
 
 struct svc_cred {
 	uid_t			cr_uid;
 	gid_t			cr_gid;
 	struct group_info	*cr_group_info;
+	char			*cr_principal; /* for gss */
 };
 
+static inline void free_svc_cred(struct svc_cred *cred)
+{
+	if (cred->cr_group_info)
+		put_group_info(cred->cr_group_info);
+	kfree(cred->cr_principal);
+}
+
 struct svc_rqst;		/* forward decl */
 struct in6_addr;
 
diff --git a/include/linux/sunrpc/svcauth_gss.h b/include/linux/sunrpc/svcauth_gss.h
index 7c32daa025eb..726aff1a5201 100644
--- a/include/linux/sunrpc/svcauth_gss.h
+++ b/include/linux/sunrpc/svcauth_gss.h
@@ -22,7 +22,6 @@ int gss_svc_init_net(struct net *net);
 void gss_svc_shutdown_net(struct net *net);
 int svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name);
 u32 svcauth_gss_flavor(struct auth_domain *dom);
-char *svc_gss_principal(struct svc_rqst *);
 
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SUNRPC_SVCAUTH_GSS_H */
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index f0a0cd4470b7..d091d7d09bea 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -335,7 +335,6 @@ struct rsc {
 	struct svc_cred		cred;
 	struct gss_svc_seq_data	seqdata;
 	struct gss_ctx		*mechctx;
-	char			*client_name;
 };
 
 static struct rsc *rsc_update(struct cache_detail *cd, struct rsc *new, struct rsc *old);
@@ -346,9 +345,7 @@ static void rsc_free(struct rsc *rsci)
 	kfree(rsci->handle.data);
 	if (rsci->mechctx)
 		gss_delete_sec_context(&rsci->mechctx);
-	if (rsci->cred.cr_group_info)
-		put_group_info(rsci->cred.cr_group_info);
-	kfree(rsci->client_name);
+	free_svc_cred(&rsci->cred);
 }
 
 static void rsc_put(struct kref *ref)
@@ -386,7 +383,7 @@ rsc_init(struct cache_head *cnew, struct cache_head *ctmp)
 	tmp->handle.data = NULL;
 	new->mechctx = NULL;
 	new->cred.cr_group_info = NULL;
-	new->client_name = NULL;
+	new->cred.cr_principal = NULL;
 }
 
 static void
@@ -401,8 +398,8 @@ update_rsc(struct cache_head *cnew, struct cache_head *ctmp)
 	spin_lock_init(&new->seqdata.sd_lock);
 	new->cred = tmp->cred;
 	tmp->cred.cr_group_info = NULL;
-	new->client_name = tmp->client_name;
-	tmp->client_name = NULL;
+	new->cred.cr_principal = tmp->cred.cr_principal;
+	tmp->cred.cr_principal = NULL;
 }
 
 static struct cache_head *
@@ -496,8 +493,8 @@ static int rsc_parse(struct cache_detail *cd,
 		/* get client name */
 		len = qword_get(&mesg, buf, mlen);
 		if (len > 0) {
-			rsci.client_name = kstrdup(buf, GFP_KERNEL);
-			if (!rsci.client_name)
+			rsci.cred.cr_principal = kstrdup(buf, GFP_KERNEL);
+			if (!rsci.cred.cr_principal)
 				goto out;
 		}
 
@@ -927,16 +924,6 @@ struct gss_svc_data {
 	struct rsc			*rsci;
 };
 
-char *svc_gss_principal(struct svc_rqst *rqstp)
-{
-	struct gss_svc_data *gd = (struct gss_svc_data *)rqstp->rq_auth_data;
-
-	if (gd && gd->rsci)
-		return gd->rsci->client_name;
-	return NULL;
-}
-EXPORT_SYMBOL_GPL(svc_gss_principal);
-
 static int
 svcauth_gss_set_client(struct svc_rqst *rqstp)
 {
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 9c3b9f014468..12e4897d0bf3 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -740,6 +740,7 @@ svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp)
 	struct svc_cred	*cred = &rqstp->rq_cred;
 
 	cred->cr_group_info = NULL;
+	cred->cr_principal = NULL;
 	rqstp->rq_client = NULL;
 
 	if (argv->iov_len < 3*4)
@@ -805,6 +806,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
 	int		len   = argv->iov_len;
 
 	cred->cr_group_info = NULL;
+	cred->cr_principal = NULL;
 	rqstp->rq_client = NULL;
 
 	if ((len -= 3*4) < 0)
-- 
cgit 


From d5497fc693a446ce9100fcf4117c3f795ddfd0d2 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Mon, 14 May 2012 22:06:49 -0400
Subject: nfsd4: move rq_flavor into svc_cred

Move the rq_flavor into struct svc_cred, and use it in setclientid and
exchange_id comparisons as well.

Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/auth.c                    | 2 +-
 fs/nfsd/export.c                  | 6 +++---
 fs/nfsd/nfs4idmap.c               | 4 ++--
 fs/nfsd/nfs4state.c               | 6 +++---
 include/linux/sunrpc/svc.h        | 1 -
 include/linux/sunrpc/svcauth.h    | 1 +
 net/sunrpc/auth_gss/svcauth_gss.c | 2 +-
 net/sunrpc/svcauth_unix.c         | 4 ++--
 8 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 79717a40daba..b42eaf3aac16 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -10,7 +10,7 @@ int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp)
 	struct exp_flavor_info *end = exp->ex_flavors + exp->ex_nflavors;
 
 	for (f = exp->ex_flavors; f < end; f++) {
-		if (f->pseudoflavor == rqstp->rq_flavor)
+		if (f->pseudoflavor == rqstp->rq_cred.cr_flavor)
 			return f->flags;
 	}
 	return exp->ex_flags;
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index ec16364f782e..ba233499b9a5 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -904,13 +904,13 @@ __be32 check_nfsd_access(struct svc_export *exp, struct svc_rqst *rqstp)
 		return 0;
 	/* ip-address based client; check sec= export option: */
 	for (f = exp->ex_flavors; f < end; f++) {
-		if (f->pseudoflavor == rqstp->rq_flavor)
+		if (f->pseudoflavor == rqstp->rq_cred.cr_flavor)
 			return 0;
 	}
 	/* defaults in absence of sec= options: */
 	if (exp->ex_nflavors == 0) {
-		if (rqstp->rq_flavor == RPC_AUTH_NULL ||
-		    rqstp->rq_flavor == RPC_AUTH_UNIX)
+		if (rqstp->rq_cred.cr_flavor == RPC_AUTH_NULL ||
+		    rqstp->rq_cred.cr_flavor == RPC_AUTH_UNIX)
 			return 0;
 	}
 	return nfserr_wrongsec;
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index 286a7f8f2024..dae36f1dee95 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -605,7 +605,7 @@ numeric_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namel
 static __be32
 do_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, uid_t *id)
 {
-	if (nfs4_disable_idmapping && rqstp->rq_flavor < RPC_AUTH_GSS)
+	if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS)
 		if (numeric_name_to_id(rqstp, type, name, namelen, id))
 			return 0;
 		/*
@@ -618,7 +618,7 @@ do_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, u
 static int
 do_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name)
 {
-	if (nfs4_disable_idmapping && rqstp->rq_flavor < RPC_AUTH_GSS)
+	if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS)
 		return sprintf(name, "%u", id);
 	return idmap_id_to_name(rqstp, type, id, name);
 }
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6dc0cfb37541..c743cdf51ebc 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1177,6 +1177,7 @@ static int copy_cred(struct svc_cred *target, struct svc_cred *source)
 			return -ENOMEM;
 	} else
 		target->cr_principal = NULL;
+	target->cr_flavor = source->cr_flavor;
 	target->cr_uid = source->cr_uid;
 	target->cr_gid = source->cr_gid;
 	target->cr_group_info = source->cr_group_info;
@@ -1213,11 +1214,11 @@ static bool groups_equal(struct group_info *g1, struct group_info *g2)
 	return true;
 }
 
-/* XXX what about NGROUP */
 static int
 same_creds(struct svc_cred *cr1, struct svc_cred *cr2)
 {
-	if ((cr1->cr_uid != cr2->cr_uid)
+	if ((cr1->cr_flavor != cr2->cr_flavor)
+		|| (cr1->cr_uid != cr2->cr_uid)
 		|| (cr1->cr_gid != cr2->cr_gid)
 		|| !groups_equal(cr1->cr_group_info, cr2->cr_group_info))
 		return false;
@@ -1299,7 +1300,6 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir,
 	rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
 	copy_verf(clp, verf);
 	rpc_copy_addr((struct sockaddr *) &clp->cl_addr, sa);
-	clp->cl_flavor = rqstp->rq_flavor;
 	gen_confirm(clp);
 	clp->cl_cb_session = NULL;
 	return clp;
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 2b43e0214261..40e0a273faea 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -232,7 +232,6 @@ struct svc_rqst {
 	struct svc_pool *	rq_pool;	/* thread pool */
 	struct svc_procedure *	rq_procinfo;	/* procedure info */
 	struct auth_ops *	rq_authop;	/* authentication flavour */
-	u32			rq_flavor;	/* pseudoflavor */
 	struct svc_cred		rq_cred;	/* auth info */
 	void *			rq_xprt_ctxt;	/* transport specific context ptr */
 	struct svc_deferred_req*rq_deferred;	/* deferred request we are replaying */
diff --git a/include/linux/sunrpc/svcauth.h b/include/linux/sunrpc/svcauth.h
index 16fe477a96e0..dd74084a9799 100644
--- a/include/linux/sunrpc/svcauth.h
+++ b/include/linux/sunrpc/svcauth.h
@@ -21,6 +21,7 @@ struct svc_cred {
 	uid_t			cr_uid;
 	gid_t			cr_gid;
 	struct group_info	*cr_group_info;
+	u32			cr_flavor; /* pseudoflavor */
 	char			*cr_principal; /* for gss */
 };
 
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index d091d7d09bea..bcb773781ec0 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1202,7 +1202,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
 		}
 		svcdata->rsci = rsci;
 		cache_get(&rsci->h);
-		rqstp->rq_flavor = gss_svc_to_pseudoflavor(
+		rqstp->rq_cred.cr_flavor = gss_svc_to_pseudoflavor(
 					rsci->mechctx->mech_type, gc->gc_svc);
 		ret = SVC_OK;
 		goto out;
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index 12e4897d0bf3..88962cf34377 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -768,7 +768,7 @@ svcauth_null_accept(struct svc_rqst *rqstp, __be32 *authp)
 	svc_putnl(resv, RPC_AUTH_NULL);
 	svc_putnl(resv, 0);
 
-	rqstp->rq_flavor = RPC_AUTH_NULL;
+	rqstp->rq_cred.cr_flavor = RPC_AUTH_NULL;
 	return SVC_OK;
 }
 
@@ -839,7 +839,7 @@ svcauth_unix_accept(struct svc_rqst *rqstp, __be32 *authp)
 	svc_putnl(resv, RPC_AUTH_NULL);
 	svc_putnl(resv, 0);
 
-	rqstp->rq_flavor = RPC_AUTH_UNIX;
+	rqstp->rq_cred.cr_flavor = RPC_AUTH_UNIX;
 	return SVC_OK;
 
 badcred:
-- 
cgit 


From a3860c1c5dd1137db23d7786d284939c5761d517 Mon Sep 17 00:00:00 2001
From: Xi Wang <xi.wang@gmail.com>
Date: Thu, 31 May 2012 16:26:04 -0700
Subject: introduce SIZE_MAX

ULONG_MAX is often used to check for integer overflow when calculating
allocation size.  While ULONG_MAX happens to work on most systems, there
is no guarantee that `size_t' must be the same size as `long'.

This patch introduces SIZE_MAX, the maximum value of `size_t', to improve
portability and readability for allocation size validation.

Signed-off-by: Xi Wang <xi.wang@gmail.com>
Acked-by: Alex Elder <elder@dreamhost.com>
Cc: David Airlie <airlied@linux.ie>
Cc: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ceph/snap.c             | 2 +-
 include/drm/drm_mem_util.h | 4 ++--
 include/linux/kernel.h     | 1 +
 include/linux/slab.h       | 2 +-
 4 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index f04c0961f993..e5206fc76562 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -331,7 +331,7 @@ static int build_snap_context(struct ceph_snap_realm *realm)
 
 	/* alloc new snap context */
 	err = -ENOMEM;
-	if (num > (ULONG_MAX - sizeof(*snapc)) / sizeof(u64))
+	if (num > (SIZE_MAX - sizeof(*snapc)) / sizeof(u64))
 		goto fail;
 	snapc = kzalloc(sizeof(*snapc) + num*sizeof(u64), GFP_NOFS);
 	if (!snapc)
diff --git a/include/drm/drm_mem_util.h b/include/drm/drm_mem_util.h
index 6bd325fedc87..19a240446fca 100644
--- a/include/drm/drm_mem_util.h
+++ b/include/drm/drm_mem_util.h
@@ -31,7 +31,7 @@
 
 static __inline__ void *drm_calloc_large(size_t nmemb, size_t size)
 {
-	if (size != 0 && nmemb > ULONG_MAX / size)
+	if (size != 0 && nmemb > SIZE_MAX / size)
 		return NULL;
 
 	if (size * nmemb <= PAGE_SIZE)
@@ -44,7 +44,7 @@ static __inline__ void *drm_calloc_large(size_t nmemb, size_t size)
 /* Modeled after cairo's malloc_ab, it's like calloc but without the zeroing. */
 static __inline__ void *drm_malloc_ab(size_t nmemb, size_t size)
 {
-	if (size != 0 && nmemb > ULONG_MAX / size)
+	if (size != 0 && nmemb > SIZE_MAX / size)
 		return NULL;
 
 	if (size * nmemb <= PAGE_SIZE)
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index ec55a3c8ba77..e07f5e0c5df4 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -35,6 +35,7 @@
 #define LLONG_MAX	((long long)(~0ULL>>1))
 #define LLONG_MIN	(-LLONG_MAX - 1)
 #define ULLONG_MAX	(~0ULL)
+#define SIZE_MAX	(~(size_t)0)
 
 #define STACK_MAGIC	0xdeadbeef
 
diff --git a/include/linux/slab.h b/include/linux/slab.h
index a595dce6b0c7..67d5d94b783a 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -242,7 +242,7 @@ size_t ksize(const void *);
  */
 static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags)
 {
-	if (size != 0 && n > ULONG_MAX / size)
+	if (size != 0 && n > SIZE_MAX / size)
 		return NULL;
 	return __kmalloc(n * size, flags);
 }
-- 
cgit 


From 020ac5b6bef15785f9dde9de89d2734ff97da733 Mon Sep 17 00:00:00 2001
From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Date: Thu, 31 May 2012 16:26:12 -0700
Subject: fat: introduce special inode for managing the FSINFO block

This is patchset makes fatfs stop using the VFS '->write_super()' method
for writing out the FSINFO block.

The final goal is to get rid of the 'sync_supers()' kernel thread.  This
kernel thread wakes up every 5 seconds (by default) and calls
'->write_super()' for all mounted file-systems.  And the bad thing is that
this is done even if all the superblocks are clean.  Moreover, some
file-systems do not even need this end they do not register the
'->write_super()' method at all (e.g., btrfs).

So 'sync_supers()' most often just generates useless wake-ups and wastes
power.  I am trying to make all file-systems independent of
'->write_super()' and plan to remove 'sync_supers()' and '->write_super'
completely once there are no more users.

The '->write_supers()' method is mostly used by baroque file-systems like
hfs, udf, etc.  Modern file-systems like btrfs and xfs do not use it.
This justifies removing this stuff from VFS completely and make every FS
self-manage own superblock.

Tested with xfstests.

This patch:

Preparation for further changes.  It introduces a special inode
('fsinfo_inode') in FAT file-system which we'll later use for managing the
FSINFO block.  Note, this there is already one special inode ('fat_inode')
which is used for managing the FAT tables.

Introduce new 'MSDOS_FSINFO_INO' constant for this special inode.  It is
safe to do because FAT file-system does not store inode numbers on the
media but generates them run-time.

I've also cleaned up the comment to existing 'MSDOS_ROOT_INO' constant,
while on it.

Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fat/fat.h             |  1 +
 fs/fat/inode.c           | 12 ++++++++++++
 include/linux/msdos_fs.h |  3 ++-
 3 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 66994f316e18..951d12b61b58 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -82,6 +82,7 @@ struct msdos_sb_info {
 	int fatent_shift;
 	struct fatent_operations *fatent_ops;
 	struct inode *fat_inode;
+	struct inode *fsinfo_inode;
 
 	struct ratelimit_state ratelimit;
 
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index b3d290c1b513..84c602b4bb25 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -490,6 +490,7 @@ static void fat_put_super(struct super_block *sb)
 	if (sb->s_dirt)
 		fat_write_super(sb);
 
+	iput(sbi->fsinfo_inode);
 	iput(sbi->fat_inode);
 
 	unload_nls(sbi->nls_disk);
@@ -1244,6 +1245,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
 		   void (*setup)(struct super_block *))
 {
 	struct inode *root_inode = NULL, *fat_inode = NULL;
+	struct inode *fsinfo_inode = NULL;
 	struct buffer_head *bh;
 	struct fat_boot_sector *b;
 	struct msdos_sb_info *sbi;
@@ -1490,6 +1492,14 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
 		goto out_fail;
 	MSDOS_I(fat_inode)->i_pos = 0;
 	sbi->fat_inode = fat_inode;
+
+	fsinfo_inode = new_inode(sb);
+	if (!fsinfo_inode)
+		goto out_fail;
+	fsinfo_inode->i_ino = MSDOS_FSINFO_INO;
+	sbi->fsinfo_inode = fsinfo_inode;
+	insert_inode_hash(fsinfo_inode);
+
 	root_inode = new_inode(sb);
 	if (!root_inode)
 		goto out_fail;
@@ -1516,6 +1526,8 @@ out_invalid:
 		fat_msg(sb, KERN_INFO, "Can't find a valid FAT filesystem");
 
 out_fail:
+	if (fsinfo_inode)
+		iput(fsinfo_inode);
 	if (fat_inode)
 		iput(fat_inode);
 	unload_nls(sbi->nls_io);
diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h
index 34066e65fdeb..11cc2ac67e75 100644
--- a/include/linux/msdos_fs.h
+++ b/include/linux/msdos_fs.h
@@ -21,8 +21,9 @@
 #define CT_LE_W(v)	cpu_to_le16(v)
 #define CT_LE_L(v)	cpu_to_le32(v)
 
+#define MSDOS_ROOT_INO	 1	/* The root inode number */
+#define MSDOS_FSINFO_INO 2	/* Used for managing the FSINFO block */
 
-#define MSDOS_ROOT_INO	1	/* == MINIX_ROOT_INO */
 #define MSDOS_DIR_BITS	5	/* log2(sizeof(struct msdos_dir_entry)) */
 
 /* directory limit */
-- 
cgit 


From ae3cef7300e9fddc35ad251dd5f27c5b88c8594a Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Thu, 31 May 2012 16:26:14 -0700
Subject: kmod: unexport call_usermodehelper_freeinfo()

call_usermodehelper_freeinfo() is not used outside of kmod.c.  So unexport
it, and make it static to kmod.c

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kmod.h | 4 ----
 kernel/kmod.c        | 3 +--
 2 files changed, 1 insertion(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kmod.h b/include/linux/kmod.h
index dd99c329e161..f07f9a4e10ff 100644
--- a/include/linux/kmod.h
+++ b/include/linux/kmod.h
@@ -79,10 +79,6 @@ void call_usermodehelper_setfns(struct subprocess_info *info,
 /* Actually execute the sub-process */
 int call_usermodehelper_exec(struct subprocess_info *info, int wait);
 
-/* Free the subprocess_info. This is only needed if you're not going
-   to call call_usermodehelper_exec */
-void call_usermodehelper_freeinfo(struct subprocess_info *info);
-
 static inline int
 call_usermodehelper_fns(char *path, char **argv, char **envp, int wait,
 			int (*init)(struct subprocess_info *info, struct cred *new),
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 05698a7415fe..21a0f8e99102 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -221,13 +221,12 @@ fail:
 	return 0;
 }
 
-void call_usermodehelper_freeinfo(struct subprocess_info *info)
+static void call_usermodehelper_freeinfo(struct subprocess_info *info)
 {
 	if (info->cleanup)
 		(*info->cleanup)(info);
 	kfree(info);
 }
-EXPORT_SYMBOL(call_usermodehelper_freeinfo);
 
 static void umh_complete(struct subprocess_info *sub_info)
 {
-- 
cgit 


From 785042f2e275089e22c36b462f6495ce8d91732d Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <bharrosh@panasas.com>
Date: Thu, 31 May 2012 16:26:15 -0700
Subject: kmod: move call_usermodehelper_fns() to .c file and unexport all it's
 helpers

If we move call_usermodehelper_fns() to kmod.c file and EXPORT_SYMBOL it
we can avoid exporting all it's helper functions:
	call_usermodehelper_setup
	call_usermodehelper_setfns
	call_usermodehelper_exec
And make all of them static to kmod.c

Since the optimizer will see all these as a single call site it will
inline them inside call_usermodehelper_fns().  So we loose the call to
_fns but gain 3 calls to the helpers.  (Not that it matters)

Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kmod.h | 30 ++----------------------------
 kernel/kmod.c        | 25 ++++++++++++++++++++++---
 2 files changed, 24 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kmod.h b/include/linux/kmod.h
index f07f9a4e10ff..5398d5807075 100644
--- a/include/linux/kmod.h
+++ b/include/linux/kmod.h
@@ -66,36 +66,10 @@ struct subprocess_info {
 	void *data;
 };
 
-/* Allocate a subprocess_info structure */
-struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
-						  char **envp, gfp_t gfp_mask);
-
-/* Set various pieces of state into the subprocess_info structure */
-void call_usermodehelper_setfns(struct subprocess_info *info,
-		    int (*init)(struct subprocess_info *info, struct cred *new),
-		    void (*cleanup)(struct subprocess_info *info),
-		    void *data);
-
-/* Actually execute the sub-process */
-int call_usermodehelper_exec(struct subprocess_info *info, int wait);
-
-static inline int
+extern int
 call_usermodehelper_fns(char *path, char **argv, char **envp, int wait,
 			int (*init)(struct subprocess_info *info, struct cred *new),
-			void (*cleanup)(struct subprocess_info *), void *data)
-{
-	struct subprocess_info *info;
-	gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
-
-	info = call_usermodehelper_setup(path, argv, envp, gfp_mask);
-
-	if (info == NULL)
-		return -ENOMEM;
-
-	call_usermodehelper_setfns(info, init, cleanup, data);
-
-	return call_usermodehelper_exec(info, wait);
-}
+			void (*cleanup)(struct subprocess_info *), void *data);
 
 static inline int
 call_usermodehelper(char *path, char **argv, char **envp, int wait)
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 21a0f8e99102..1f596e4de306 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -478,6 +478,7 @@ static void helper_unlock(void)
  * structure.  This should be passed to call_usermodehelper_exec to
  * exec the process and free the structure.
  */
+static
 struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
 						  char **envp, gfp_t gfp_mask)
 {
@@ -493,7 +494,6 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
   out:
 	return sub_info;
 }
-EXPORT_SYMBOL(call_usermodehelper_setup);
 
 /**
  * call_usermodehelper_setfns - set a cleanup/init function
@@ -511,6 +511,7 @@ EXPORT_SYMBOL(call_usermodehelper_setup);
  * Function must be runnable in either a process context or the
  * context in which call_usermodehelper_exec is called.
  */
+static
 void call_usermodehelper_setfns(struct subprocess_info *info,
 		    int (*init)(struct subprocess_info *info, struct cred *new),
 		    void (*cleanup)(struct subprocess_info *info),
@@ -520,7 +521,6 @@ void call_usermodehelper_setfns(struct subprocess_info *info,
 	info->init = init;
 	info->data = data;
 }
-EXPORT_SYMBOL(call_usermodehelper_setfns);
 
 /**
  * call_usermodehelper_exec - start a usermode application
@@ -534,6 +534,7 @@ EXPORT_SYMBOL(call_usermodehelper_setfns);
  * asynchronously if wait is not set, and runs as a child of keventd.
  * (ie. it runs with full root capabilities).
  */
+static
 int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
 {
 	DECLARE_COMPLETION_ONSTACK(done);
@@ -575,7 +576,25 @@ unlock:
 	helper_unlock();
 	return retval;
 }
-EXPORT_SYMBOL(call_usermodehelper_exec);
+
+int call_usermodehelper_fns(
+	char *path, char **argv, char **envp, int wait,
+	int (*init)(struct subprocess_info *info, struct cred *new),
+	void (*cleanup)(struct subprocess_info *), void *data)
+{
+	struct subprocess_info *info;
+	gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
+
+	info = call_usermodehelper_setup(path, argv, envp, gfp_mask);
+
+	if (info == NULL)
+		return -ENOMEM;
+
+	call_usermodehelper_setfns(info, init, cleanup, data);
+
+	return call_usermodehelper_exec(info, wait);
+}
+EXPORT_SYMBOL(call_usermodehelper_fns);
 
 static int proc_cap_handler(struct ctl_table *table, int write,
 			 void __user *buffer, size_t *lenp, loff_t *ppos)
-- 
cgit 


From 43e13cc107cf6cd3c15fbe1cef849435c2223d50 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 31 May 2012 16:26:16 -0700
Subject: cred: remove task_is_dead() from __task_cred() validation

Commit 8f92054e7ca1 ("CRED: Fix __task_cred()'s lockdep check and banner
comment"):

    add the following validation condition:

        task->exit_state >= 0

    to permit the access if the target task is dead and therefore
    unable to change its own credentials.

OK, but afaics currently this can only help wait_task_zombie() which calls
__task_cred() without rcu lock.

Remove this validation and change wait_task_zombie() to use task_uid()
instead.  This means we do rcu_read_lock() only to shut up the lockdep,
but we already do the same in, say, wait_task_stopped().

task_is_dead() should die, task->exit_state != 0 means that this task has
passed exit_notify(), only do_wait-like code paths should use this.

Unfortunately, we can't kill task_is_dead() right now, it has already
acquired buggy users in drivers/staging.  The fix already exists.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reviewed-by: "Eric W. Biederman" <ebiederm@xmission.com>
Acked-by: David Howells <dhowells@redhat.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: James Morris <jmorris@namei.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cred.h | 10 +++-------
 kernel/exit.c        |  2 +-
 2 files changed, 4 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cred.h b/include/linux/cred.h
index 917dc5aeb1d4..ebbed2ce6637 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -277,17 +277,13 @@ static inline void put_cred(const struct cred *_cred)
  * @task: The task to query
  *
  * Access the objective credentials of a task.  The caller must hold the RCU
- * readlock or the task must be dead and unable to change its own credentials.
+ * readlock.
  *
  * The result of this function should not be passed directly to get_cred();
  * rather get_task_cred() should be used instead.
  */
-#define __task_cred(task)						\
-	({								\
-		const struct task_struct *__t = (task);			\
-		rcu_dereference_check(__t->real_cred,			\
-				      task_is_dead(__t));		\
-	})
+#define __task_cred(task)	\
+	rcu_dereference((task)->real_cred)
 
 /**
  * get_current_cred - Get the current task's subjective credentials
diff --git a/kernel/exit.c b/kernel/exit.c
index 910a0716e17a..3281493ce7ad 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -1214,7 +1214,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 	unsigned long state;
 	int retval, status, traced;
 	pid_t pid = task_pid_vnr(p);
-	uid_t uid = from_kuid_munged(current_user_ns(), __task_cred(p)->uid);
+	uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p));
 	struct siginfo __user *infop;
 
 	if (!likely(wo->wo_flags & WEXITED))
-- 
cgit 


From cb79295e20a8088a2fd6a9b3cb5f2d889ec36b4d Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <anton.vorontsov@linaro.org>
Date: Thu, 31 May 2012 16:26:22 -0700
Subject: cpu: introduce clear_tasks_mm_cpumask() helper

Many architectures clear tasks' mm_cpumask like this:

	read_lock(&tasklist_lock);
	for_each_process(p) {
		if (p->mm)
			cpumask_clear_cpu(cpu, mm_cpumask(p->mm));
	}
	read_unlock(&tasklist_lock);

Depending on the context, the code above may have several problems,
such as:

1. Working with task->mm w/o getting mm or grabing the task lock is
   dangerous as ->mm might disappear (exit_mm() assigns NULL under
   task_lock(), so tasklist lock is not enough).

2. Checking for process->mm is not enough because process' main
   thread may exit or detach its mm via use_mm(), but other threads
   may still have a valid mm.

This patch implements a small helper function that does things
correctly, i.e.:

1. We take the task's lock while whe handle its mm (we can't use
   get_task_mm()/mmput() pair as mmput() might sleep);

2. To catch exited main thread case, we use find_lock_task_mm(),
   which walks up all threads and returns an appropriate task
   (with task lock held).

Also, Per Peter Zijlstra's idea, now we don't grab tasklist_lock in
the new helper, instead we take the rcu read lock. We can do this
because the function is called after the cpu is taken down and marked
offline, so no new tasks will get this cpu set in their mm mask.

Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
Cc: Richard Weinberger <richard@nod.at>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Russell King <rmk@arm.linux.org.uk>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Mike Frysinger <vapier@gentoo.org>
Cc: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cpu.h |  1 +
 kernel/cpu.c        | 26 ++++++++++++++++++++++++++
 2 files changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 7230bb59a06f..2e9b9ebbeb78 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -177,6 +177,7 @@ extern void put_online_cpus(void);
 #define hotcpu_notifier(fn, pri)	cpu_notifier(fn, pri)
 #define register_hotcpu_notifier(nb)	register_cpu_notifier(nb)
 #define unregister_hotcpu_notifier(nb)	unregister_cpu_notifier(nb)
+void clear_tasks_mm_cpumask(int cpu);
 int cpu_down(unsigned int cpu);
 
 #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 0e6353cf147a..0575197deb4a 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -10,6 +10,8 @@
 #include <linux/sched.h>
 #include <linux/unistd.h>
 #include <linux/cpu.h>
+#include <linux/oom.h>
+#include <linux/rcupdate.h>
 #include <linux/export.h>
 #include <linux/kthread.h>
 #include <linux/stop_machine.h>
@@ -173,6 +175,30 @@ void __ref unregister_cpu_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL(unregister_cpu_notifier);
 
+void clear_tasks_mm_cpumask(int cpu)
+{
+	struct task_struct *p;
+
+	/*
+	 * This function is called after the cpu is taken down and marked
+	 * offline, so its not like new tasks will ever get this cpu set in
+	 * their mm mask. -- Peter Zijlstra
+	 * Thus, we may use rcu_read_lock() here, instead of grabbing
+	 * full-fledged tasklist_lock.
+	 */
+	rcu_read_lock();
+	for_each_process(p) {
+		struct task_struct *t;
+
+		t = find_lock_task_mm(p);
+		if (!t)
+			continue;
+		cpumask_clear_cpu(cpu, mm_cpumask(t->mm));
+		task_unlock(t);
+	}
+	rcu_read_unlock();
+}
+
 static inline void check_for_tasks(int cpu)
 {
 	struct task_struct *p;
-- 
cgit 


From 29a5c67e7a78815fda0567a867adce467f6e6e5a Mon Sep 17 00:00:00 2001
From: maximilian attems <max@stro.at>
Date: Thu, 31 May 2012 16:26:27 -0700
Subject: kexec: export kexec.h to user space

Add userspace definitions, guard all relevant kernel structures.  While at
it document stuff and remove now useless userspace hint.

It is easy to add the relevant system call to respective libc's, but it
seems pointless to have to duplicate the data structures.

This is based on the kexec-tools headers, with the exception of just using
int on return (succes or failure) and using size_t instead of 'unsigned
long int' for the number of segments argument of kexec_load().

Signed-off-by: maximilian attems <max@stro.at>
Cc: Simon Horman <horms@verge.net.au>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Haren Myneni <hbabu@us.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/Kbuild  |  1 +
 include/linux/kexec.h | 75 ++++++++++++++++++++++++++++++++++++---------------
 2 files changed, 54 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 7185b8f15ced..8760be30b375 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -226,6 +226,7 @@ header-y += kdev_t.h
 header-y += kernel.h
 header-y += kernelcapi.h
 header-y += kernel-page-flags.h
+header-y += kexec.h
 header-y += keyboard.h
 header-y += keyctl.h
 header-y += l2tp.h
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 0d7d6a1b172f..37c5f7261142 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -1,8 +1,58 @@
 #ifndef LINUX_KEXEC_H
 #define LINUX_KEXEC_H
 
-#ifdef CONFIG_KEXEC
+/* kexec system call -  It loads the new kernel to boot into.
+ * kexec does not sync, or unmount filesystems so if you need
+ * that to happen you need to do that yourself.
+ */
+
 #include <linux/types.h>
+
+/* kexec flags for different usage scenarios */
+#define KEXEC_ON_CRASH		0x00000001
+#define KEXEC_PRESERVE_CONTEXT	0x00000002
+#define KEXEC_ARCH_MASK		0xffff0000
+
+/* These values match the ELF architecture values.
+ * Unless there is a good reason that should continue to be the case.
+ */
+#define KEXEC_ARCH_DEFAULT ( 0 << 16)
+#define KEXEC_ARCH_386     ( 3 << 16)
+#define KEXEC_ARCH_X86_64  (62 << 16)
+#define KEXEC_ARCH_PPC     (20 << 16)
+#define KEXEC_ARCH_PPC64   (21 << 16)
+#define KEXEC_ARCH_IA_64   (50 << 16)
+#define KEXEC_ARCH_ARM     (40 << 16)
+#define KEXEC_ARCH_S390    (22 << 16)
+#define KEXEC_ARCH_SH      (42 << 16)
+#define KEXEC_ARCH_MIPS_LE (10 << 16)
+#define KEXEC_ARCH_MIPS    ( 8 << 16)
+
+/* The artificial cap on the number of segments passed to kexec_load. */
+#define KEXEC_SEGMENT_MAX 16
+
+#ifndef __KERNEL__
+/*
+ * This structure is used to hold the arguments that are used when
+ * loading  kernel binaries.
+ */
+struct kexec_segment {
+	const void *buf;
+	size_t bufsz;
+	const void *mem;
+	size_t memsz;
+};
+
+/* Load a new kernel image as described by the kexec_segment array
+ * consisting of passed number of segments at the entry-point address.
+ * The flags allow different useage types.
+ */
+extern int kexec_load(void *, size_t, struct kexec_segment *,
+		unsigned long int);
+#endif /* __KERNEL__ */
+
+#ifdef __KERNEL__
+#ifdef CONFIG_KEXEC
 #include <linux/list.h>
 #include <linux/linkage.h>
 #include <linux/compat.h>
@@ -67,11 +117,10 @@ typedef unsigned long kimage_entry_t;
 #define IND_DONE         0x4
 #define IND_SOURCE       0x8
 
-#define KEXEC_SEGMENT_MAX 16
 struct kexec_segment {
 	void __user *buf;
 	size_t bufsz;
-	unsigned long mem;	/* User space sees this as a (void *) ... */
+	unsigned long mem;
 	size_t memsz;
 };
 
@@ -175,25 +224,6 @@ extern struct kimage *kexec_crash_image;
 #define kexec_flush_icache_page(page)
 #endif
 
-#define KEXEC_ON_CRASH		0x00000001
-#define KEXEC_PRESERVE_CONTEXT	0x00000002
-#define KEXEC_ARCH_MASK		0xffff0000
-
-/* These values match the ELF architecture values.
- * Unless there is a good reason that should continue to be the case.
- */
-#define KEXEC_ARCH_DEFAULT ( 0 << 16)
-#define KEXEC_ARCH_386     ( 3 << 16)
-#define KEXEC_ARCH_X86_64  (62 << 16)
-#define KEXEC_ARCH_PPC     (20 << 16)
-#define KEXEC_ARCH_PPC64   (21 << 16)
-#define KEXEC_ARCH_IA_64   (50 << 16)
-#define KEXEC_ARCH_ARM     (40 << 16)
-#define KEXEC_ARCH_S390    (22 << 16)
-#define KEXEC_ARCH_SH      (42 << 16)
-#define KEXEC_ARCH_MIPS_LE (10 << 16)
-#define KEXEC_ARCH_MIPS    ( 8 << 16)
-
 /* List of defined/legal kexec flags */
 #ifndef CONFIG_KEXEC_JUMP
 #define KEXEC_FLAGS    KEXEC_ON_CRASH
@@ -228,4 +258,5 @@ struct task_struct;
 static inline void crash_kexec(struct pt_regs *regs) { }
 static inline int kexec_should_crash(struct task_struct *p) { return 0; }
 #endif /* CONFIG_KEXEC */
+#endif /* __KERNEL__ */
 #endif /* LINUX_KEXEC_H */
-- 
cgit 


From 93e6f119c0ce8a1bba6e81dc8dd97d67be360844 Mon Sep 17 00:00:00 2001
From: Doug Ledford <dledford@redhat.com>
Date: Thu, 31 May 2012 16:26:28 -0700
Subject: ipc/mqueue: cleanup definition names and locations

Since commit b231cca4381e ("message queues: increase range limits") on
Oct 18, 2008, calls to mq_open() that did not pass in an attribute
struct and expected to get default values for the size of the queue and
the max message size now get the system wide maximums instead of
hardwired defaults like they used to get.

This was uncovered when one of the earlier patches in this patch set
increased the default system wide maximums at the same time it increased
the hard ceiling on the system wide maximums (a customer specifically
needed the hard ceiling brought back up, the new ceiling that commit
b231cca4381e introduced was too low for their production systems).  By
increasing the default maximums and not realising they were tied to any
attempt to create a message queue without an attribute struct, I had
inadvertently made it such that all message queue creation attempts
without an attribute struct were failing because the new default
maximums would create a queue that exceeded the default rlimit for
message queue bytes.

As a result, the system wide defaults were brought back down to their
previous levels, and the system wide ceilings on the maximums were
raised to meet the customer's needs.  However, the fact that the no
attribute struct behavior of mq_open() could be broken by changing the
system wide maximums for message queues was seen as fundamentally broken
itself.  So we hardwired the no attribute case back like it used to be.
But, then we realized that on the very off chance that some piece of
software in the wild depended on that behavior, we could work around
that issue by adding two new knobs to /proc that allowed setting the
defaults for message queues created without an attr struct separately
from the system wide maximums.

What is not an option IMO is to leave the current behavior in place.  No
piece of software should ever rely on setting the system wide maximums
in order to get a desired message queue.  Such a reliance would be so
fundamentally multitasking OS unfriendly as to not really be tolerable.
Fortunately, we don't know of any software in the wild that uses this
except for a regression test program that caught the issue in the first
place.  If there is though, we have made accommodations with the two new
/proc knobs (and that's all the accommodations such fundamentally broken
software can be allowed)..

This patch:

The various defines for minimums and maximums of the sysctl controllable
mqueue values are scattered amongst different files and named
inconsistently.  Move them all into ipc_namespace.h and make them have
consistent names.  Additionally, make the number of queues per namespace
also have a minimum and maximum and use the same sysctl function as the
other two settable variables.

Signed-off-by: Doug Ledford <dledford@redhat.com>
Acked-by: Serge E. Hallyn <serue@us.ibm.com>
Cc: Amerigo Wang <amwang@redhat.com>
Cc: Joe Korty <joe.korty@ccur.com>
Cc: Jiri Slaby <jslaby@suse.cz>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipc_namespace.h |  5 +++++
 ipc/mq_sysctl.c               | 31 ++++++++-----------------------
 2 files changed, 13 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 8a297a5e794c..1372b566e1e1 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -91,10 +91,15 @@ static inline void shm_destroy_orphaned(struct ipc_namespace *ns) {}
 #ifdef CONFIG_POSIX_MQUEUE
 extern int mq_init_ns(struct ipc_namespace *ns);
 /* default values */
+#define MIN_QUEUESMAX  1
 #define DFLT_QUEUESMAX 256     /* max number of message queues */
+#define HARD_QUEUESMAX 1024
+#define MIN_MSGMAX     1
 #define DFLT_MSGMAX    10      /* max number of messages in each queue */
 #define HARD_MSGMAX    (32768*sizeof(void *)/4)
+#define MIN_MSGSIZEMAX  128
 #define DFLT_MSGSIZEMAX 8192   /* max message size */
+#define HARD_MSGSIZEMAX (8192*128)
 #else
 static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; }
 #endif
diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c
index 0c09366b96f3..e22336a09b4a 100644
--- a/ipc/mq_sysctl.c
+++ b/ipc/mq_sysctl.c
@@ -13,15 +13,6 @@
 #include <linux/ipc_namespace.h>
 #include <linux/sysctl.h>
 
-/*
- * Define the ranges various user-specified maximum values can
- * be set to.
- */
-#define MIN_MSGMAX	1		/* min value for msg_max */
-#define MAX_MSGMAX	HARD_MSGMAX	/* max value for msg_max */
-#define MIN_MSGSIZEMAX	128		/* min value for msgsize_max */
-#define MAX_MSGSIZEMAX	(8192*128)	/* max value for msgsize_max */
-
 #ifdef CONFIG_PROC_SYSCTL
 static void *get_mq(ctl_table *table)
 {
@@ -31,16 +22,6 @@ static void *get_mq(ctl_table *table)
 	return which;
 }
 
-static int proc_mq_dointvec(ctl_table *table, int write,
-	void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	struct ctl_table mq_table;
-	memcpy(&mq_table, table, sizeof(mq_table));
-	mq_table.data = get_mq(table);
-
-	return proc_dointvec(&mq_table, write, buffer, lenp, ppos);
-}
-
 static int proc_mq_dointvec_minmax(ctl_table *table, int write,
 	void __user *buffer, size_t *lenp, loff_t *ppos)
 {
@@ -52,15 +33,17 @@ static int proc_mq_dointvec_minmax(ctl_table *table, int write,
 					lenp, ppos);
 }
 #else
-#define proc_mq_dointvec NULL
 #define proc_mq_dointvec_minmax NULL
 #endif
 
+static int msg_queues_limit_min = MIN_QUEUESMAX;
+static int msg_queues_limit_max = HARD_QUEUESMAX;
+
 static int msg_max_limit_min = MIN_MSGMAX;
-static int msg_max_limit_max = MAX_MSGMAX;
+static int msg_max_limit_max = HARD_MSGMAX;
 
 static int msg_maxsize_limit_min = MIN_MSGSIZEMAX;
-static int msg_maxsize_limit_max = MAX_MSGSIZEMAX;
+static int msg_maxsize_limit_max = HARD_MSGSIZEMAX;
 
 static ctl_table mq_sysctls[] = {
 	{
@@ -68,7 +51,9 @@ static ctl_table mq_sysctls[] = {
 		.data		= &init_ipc_ns.mq_queues_max,
 		.maxlen		= sizeof(int),
 		.mode		= 0644,
-		.proc_handler	= proc_mq_dointvec,
+		.proc_handler	= proc_mq_dointvec_minmax,
+		.extra1		= &msg_queues_limit_min,
+		.extra2		= &msg_queues_limit_max,
 	},
 	{
 		.procname	= "msg_max",
-- 
cgit 


From 858ee3784e8105467f1f3017f4ece51cb51d4830 Mon Sep 17 00:00:00 2001
From: Doug Ledford <dledford@redhat.com>
Date: Thu, 31 May 2012 16:26:29 -0700
Subject: ipc/mqueue: switch back to using non-max values on create

Commit b231cca4381e ("message queues: increase range limits") changed
how we create a queue that does not include an attr struct passed to
open so that it creates the queue with whatever the maximum values are.
However, if the admin has set the maximums to allow flexibility in
creating a queue (aka, both a large size and large queue are allowed,
but combined they create a queue too large for the RLIMIT_MSGQUEUE of
the user), then attempts to create a queue without an attr struct will
fail.  Switch back to using acceptable defaults regardless of what the
maximums are.

Note: so far, we only know of a few applications that rely on this
behavior (specifically, set the maximums in /proc, then run the
application which calls mq_open() without passing in an attr struct, and
the application expects the newly created message queue to have the
maximum sizes that were set in /proc used on the mq_open() call, and all
of those applications that we know of are actually part of regression
test suites that were coded to do something like this:

for size in 4096 65536 $((1024 * 1024)) $((16 * 1024 * 1024)); do
	echo $size > /proc/sys/fs/mqueue/msgsize_max
	mq_open || echo "Error opening mq with size $size"
done

These test suites that depend on any behavior like this are broken.  The
concept that programs should rely upon the system wide maximum in order
to get their desired results instead of simply using a attr struct to
specify what they want is fundamentally unfriendly programming practice
for any multi-tasking OS.

Fixing this will break those few apps that we know of (and those app
authors recognize the brokenness of their code and the need to fix it).
However, the following patch "mqueue: separate mqueue default value"
allows a workaround in the form of new knobs for the default msg queue
creation parameters for any software out there that we don't already
know about that might rely on this behavior at the moment.

Signed-off-by: Doug Ledford <dledford@redhat.com>
Cc: Serge E. Hallyn <serue@us.ibm.com>
Cc: Amerigo Wang <amwang@redhat.com>
Cc: Joe Korty <joe.korty@ccur.com>
Cc: Jiri Slaby <jslaby@suse.cz>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipc_namespace.h | 2 ++
 ipc/mqueue.c                  | 5 +++--
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 1372b566e1e1..bde094ee7b0e 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -95,9 +95,11 @@ extern int mq_init_ns(struct ipc_namespace *ns);
 #define DFLT_QUEUESMAX 256     /* max number of message queues */
 #define HARD_QUEUESMAX 1024
 #define MIN_MSGMAX     1
+#define DFLT_MSG       10U
 #define DFLT_MSGMAX    10      /* max number of messages in each queue */
 #define HARD_MSGMAX    (32768*sizeof(void *)/4)
 #define MIN_MSGSIZEMAX  128
+#define DFLT_MSGSIZE    8192U
 #define DFLT_MSGSIZEMAX 8192   /* max message size */
 #define HARD_MSGSIZEMAX (8192*128)
 #else
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index a2757d4ab773..b103022179a3 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -144,8 +144,9 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
 		info->qsize = 0;
 		info->user = NULL;	/* set when all is ok */
 		memset(&info->attr, 0, sizeof(info->attr));
-		info->attr.mq_maxmsg = ipc_ns->mq_msg_max;
-		info->attr.mq_msgsize = ipc_ns->mq_msgsize_max;
+		info->attr.mq_maxmsg = min(ipc_ns->mq_msg_max, DFLT_MSG);
+		info->attr.mq_msgsize =
+			min(ipc_ns->mq_msgsize_max, DFLT_MSGSIZE);
 		if (attr) {
 			info->attr.mq_maxmsg = attr->mq_maxmsg;
 			info->attr.mq_msgsize = attr->mq_msgsize;
-- 
cgit 


From 5b5c4d1a1440e94994c73dddbad7be0676cd8b9a Mon Sep 17 00:00:00 2001
From: Doug Ledford <dledford@redhat.com>
Date: Thu, 31 May 2012 16:26:30 -0700
Subject: ipc/mqueue: update maximums for the mqueue subsystem

Commit b231cca4381e ("message queues: increase range limits") changed the
maximum size of a message in a message queue from INT_MAX to 8192*128.
Unfortunately, we had customers that relied on a size much larger than
8192*128 on their production systems.  After reviewing POSIX, we found
that it is silent on the maximum message size.  We did find a couple other
areas in which it was not silent.  Fix up the mqueue maximums so that the
customer's system can continue to work, and document both the POSIX and
real world requirements in ipc_namespace.h so that we don't have this
issue crop back up.

Also, commit 9cf18e1dd74cd0 ("ipc: HARD_MSGMAX should be higher not lower
on 64bit") fiddled with HARD_MSGMAX without realizing that the number was
intentionally in place to limit the msg queue depth to one that was small
enough to kmalloc an array of pointers (hence why we divided 128k by
sizeof(long)).  If we wish to meet POSIX requirements, we have no choice
but to change our allocation to a vmalloc instead (at least for the large
queue size case).  With that, it's possible to increase our allowed
maximum to the POSIX requirements (or more if we choose).

[sfr@canb.auug.org.au: using vmalloc requires including vmalloc.h]
Signed-off-by: Doug Ledford <dledford@redhat.com>
Cc: Serge E. Hallyn <serue@us.ibm.com>
Cc: Amerigo Wang <amwang@redhat.com>
Cc: Joe Korty <joe.korty@ccur.com>
Cc: Jiri Slaby <jslaby@suse.cz>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Manfred Spraul <manfred@colorfullife.com>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipc_namespace.h | 47 ++++++++++++++++++++++++++++++++-----------
 ipc/mqueue.c                  | 11 ++++++++--
 2 files changed, 44 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index bde094ee7b0e..6e1dd08194fd 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -90,18 +90,41 @@ static inline void shm_destroy_orphaned(struct ipc_namespace *ns) {}
 
 #ifdef CONFIG_POSIX_MQUEUE
 extern int mq_init_ns(struct ipc_namespace *ns);
-/* default values */
-#define MIN_QUEUESMAX  1
-#define DFLT_QUEUESMAX 256     /* max number of message queues */
-#define HARD_QUEUESMAX 1024
-#define MIN_MSGMAX     1
-#define DFLT_MSG       10U
-#define DFLT_MSGMAX    10      /* max number of messages in each queue */
-#define HARD_MSGMAX    (32768*sizeof(void *)/4)
-#define MIN_MSGSIZEMAX  128
-#define DFLT_MSGSIZE    8192U
-#define DFLT_MSGSIZEMAX 8192   /* max message size */
-#define HARD_MSGSIZEMAX (8192*128)
+/*
+ * POSIX Message Queue default values:
+ *
+ * MIN_*: Lowest value an admin can set the maximum unprivileged limit to
+ * DFLT_*MAX: Default values for the maximum unprivileged limits
+ * DFLT_{MSG,MSGSIZE}: Default values used when the user doesn't supply
+ *   an attribute to the open call and the queue must be created
+ * HARD_*: Highest value the maximums can be set to.  These are enforced
+ *   on CAP_SYS_RESOURCE apps as well making them inviolate (so make them
+ *   suitably high)
+ *
+ * POSIX Requirements:
+ *   Per app minimum openable message queues - 8.  This does not map well
+ *     to the fact that we limit the number of queues on a per namespace
+ *     basis instead of a per app basis.  So, make the default high enough
+ *     that no given app should have a hard time opening 8 queues.
+ *   Minimum maximum for HARD_MSGMAX - 32767.  I bumped this to 65536.
+ *   Minimum maximum for HARD_MSGSIZEMAX - POSIX is silent on this.  However,
+ *     we have run into a situation where running applications in the wild
+ *     require this to be at least 5MB, and preferably 10MB, so I set the
+ *     value to 16MB in hopes that this user is the worst of the bunch and
+ *     the new maximum will handle anyone else.  I may have to revisit this
+ *     in the future.
+ */
+#define MIN_QUEUESMAX			1
+#define DFLT_QUEUESMAX		      256
+#define HARD_QUEUESMAX		     1024
+#define MIN_MSGMAX			1
+#define DFLT_MSG		       64U
+#define DFLT_MSGMAX		     1024
+#define HARD_MSGMAX		    65536
+#define MIN_MSGSIZEMAX		      128
+#define DFLT_MSGSIZE		     8192U
+#define DFLT_MSGSIZEMAX	       (1024*1024)
+#define HARD_MSGSIZEMAX	    (16*1024*1024)
 #else
 static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; }
 #endif
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 6e10a55a78c5..f8eba5e46c5a 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -24,6 +24,7 @@
 #include <linux/mqueue.h>
 #include <linux/msg.h>
 #include <linux/skbuff.h>
+#include <linux/vmalloc.h>
 #include <linux/netlink.h>
 #include <linux/syscalls.h>
 #include <linux/audit.h>
@@ -152,7 +153,10 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
 			info->attr.mq_msgsize = attr->mq_msgsize;
 		}
 		mq_msg_tblsz = info->attr.mq_maxmsg * sizeof(struct msg_msg *);
-		info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL);
+		if (mq_msg_tblsz > KMALLOC_MAX_SIZE)
+			info->messages = vmalloc(mq_msg_tblsz);
+		else
+			info->messages = kmalloc(mq_msg_tblsz, GFP_KERNEL);
 		if (!info->messages)
 			goto out_inode;
 
@@ -262,7 +266,10 @@ static void mqueue_evict_inode(struct inode *inode)
 	spin_lock(&info->lock);
 	for (i = 0; i < info->attr.mq_curmsgs; i++)
 		free_msg(info->messages[i]);
-	kfree(info->messages);
+	if (info->attr.mq_maxmsg * sizeof(struct msg_msg *) > KMALLOC_MAX_SIZE)
+		vfree(info->messages);
+	else
+		kfree(info->messages);
 	spin_unlock(&info->lock);
 
 	/* Total amount of bytes accounted for the mqueue */
-- 
cgit 


From e6315bb154e778391ce64b194756bd3d108dadf6 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Thu, 31 May 2012 16:26:31 -0700
Subject: mqueue: revert bump up DFLT_*MAX

Mqueue limitation is slightly naieve parameter likes other ipcs because
unprivileged user can consume kernel memory by using ipcs.

Thus, too aggressive raise bring us security issue.  Example, current
setting allow evil unprivileged user use 256GB (= 256 * 1024 * 1024*1024)
and it's enough large to system will belome unresponsive.  Don't do that.

Instead, every admin should adjust the knobs for their own systems.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Acked-by: Doug Ledford <dledford@redhat.com>
Acked-by: Joe Korty <joe.korty@ccur.com>
Cc: Amerigo Wang <amwang@redhat.com>
Acked-by: Serge E. Hallyn <serue@us.ibm.com>
Cc: Jiri Slaby <jslaby@suse.cz>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Dave Hansen <haveblue@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipc_namespace.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 6e1dd08194fd..2488535a32a3 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -118,12 +118,12 @@ extern int mq_init_ns(struct ipc_namespace *ns);
 #define DFLT_QUEUESMAX		      256
 #define HARD_QUEUESMAX		     1024
 #define MIN_MSGMAX			1
-#define DFLT_MSG		       64U
-#define DFLT_MSGMAX		     1024
+#define DFLT_MSG		       10U
+#define DFLT_MSGMAX		       10
 #define HARD_MSGMAX		    65536
 #define MIN_MSGSIZEMAX		      128
 #define DFLT_MSGSIZE		     8192U
-#define DFLT_MSGSIZEMAX	       (1024*1024)
+#define DFLT_MSGSIZEMAX		     8192
 #define HARD_MSGSIZEMAX	    (16*1024*1024)
 #else
 static inline int mq_init_ns(struct ipc_namespace *ns) { return 0; }
-- 
cgit 


From cef0184c115e5e4e10498f6548d9526465e72478 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Date: Thu, 31 May 2012 16:26:33 -0700
Subject: mqueue: separate mqueue default value from maximum value

Commit b231cca4381e ("message queues: increase range limits") changed
mqueue default value when attr parameter is specified NULL from hard
coded value to fs.mqueue.{msg,msgsize}_max sysctl value.

This made large side effect.  When user need to use two mqueue
applications 1) using !NULL attr parameter and it require big message
size and 2) using NULL attr parameter and only need small size message,
app (1) require to raise fs.mqueue.msgsize_max and app (2) consume large
memory size even though it doesn't need.

Doug Ledford propsed to switch back it to static hard coded value.
However it also has a compatibility problem.  Some applications might
started depend on the default value is tunable.

The solution is to separate default value from maximum value.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Doug Ledford <dledford@redhat.com>
Acked-by: Doug Ledford <dledford@redhat.com>
Acked-by: Joe Korty <joe.korty@ccur.com>
Cc: Amerigo Wang <amwang@redhat.com>
Acked-by: Serge E. Hallyn <serue@us.ibm.com>
Cc: Jiri Slaby <jslaby@suse.cz>
Cc: Manfred Spraul <manfred@colorfullife.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/sysctl/fs.txt   |  7 +++++++
 include/linux/ipc_namespace.h |  2 ++
 ipc/mq_sysctl.c               | 18 ++++++++++++++++++
 ipc/mqueue.c                  |  9 ++++++---
 4 files changed, 33 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/sysctl/fs.txt b/Documentation/sysctl/fs.txt
index 88fd7f5c8dcd..13d6166d7a27 100644
--- a/Documentation/sysctl/fs.txt
+++ b/Documentation/sysctl/fs.txt
@@ -225,6 +225,13 @@ a queue must be less or equal then msg_max.
 maximum  message size value (it is every  message queue's attribute set during
 its creation).
 
+/proc/sys/fs/mqueue/msg_default is  a read/write  file for setting/getting the
+default number of messages in a queue value if attr parameter of mq_open(2) is
+NULL. If it exceed msg_max, the default value is initialized msg_max.
+
+/proc/sys/fs/mqueue/msgsize_default is a read/write file for setting/getting
+the default message size value if attr parameter of mq_open(2) is NULL. If it
+exceed msgsize_max, the default value is initialized msgsize_max.
 
 4. /proc/sys/fs/epoll - Configuration options for the epoll interface
 --------------------------------------------------------
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 2488535a32a3..5499c92a9153 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -62,6 +62,8 @@ struct ipc_namespace {
 	unsigned int    mq_queues_max;   /* initialized to DFLT_QUEUESMAX */
 	unsigned int    mq_msg_max;      /* initialized to DFLT_MSGMAX */
 	unsigned int    mq_msgsize_max;  /* initialized to DFLT_MSGSIZEMAX */
+	unsigned int    mq_msg_default;
+	unsigned int    mq_msgsize_default;
 
 	/* user_ns which owns the ipc ns */
 	struct user_namespace *user_ns;
diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c
index e22336a09b4a..383d638340b8 100644
--- a/ipc/mq_sysctl.c
+++ b/ipc/mq_sysctl.c
@@ -73,6 +73,24 @@ static ctl_table mq_sysctls[] = {
 		.extra1		= &msg_maxsize_limit_min,
 		.extra2		= &msg_maxsize_limit_max,
 	},
+	{
+		.procname	= "msg_default",
+		.data		= &init_ipc_ns.mq_msg_default,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_mq_dointvec_minmax,
+		.extra1		= &msg_max_limit_min,
+		.extra2		= &msg_max_limit_max,
+	},
+	{
+		.procname	= "msgsize_default",
+		.data		= &init_ipc_ns.mq_msgsize_default,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_mq_dointvec_minmax,
+		.extra1		= &msg_maxsize_limit_min,
+		.extra2		= &msg_maxsize_limit_max,
+	},
 	{}
 };
 
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 6828e2c93cef..609d53f7a915 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -145,9 +145,10 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
 		info->qsize = 0;
 		info->user = NULL;	/* set when all is ok */
 		memset(&info->attr, 0, sizeof(info->attr));
-		info->attr.mq_maxmsg = min(ipc_ns->mq_msg_max, DFLT_MSG);
-		info->attr.mq_msgsize =
-			min(ipc_ns->mq_msgsize_max, DFLT_MSGSIZE);
+		info->attr.mq_maxmsg = min(ipc_ns->mq_msg_max,
+					   ipc_ns->mq_msg_default);
+		info->attr.mq_msgsize = min(ipc_ns->mq_msgsize_max,
+					    ipc_ns->mq_msgsize_default);
 		if (attr) {
 			info->attr.mq_maxmsg = attr->mq_maxmsg;
 			info->attr.mq_msgsize = attr->mq_msgsize;
@@ -1261,6 +1262,8 @@ int mq_init_ns(struct ipc_namespace *ns)
 	ns->mq_queues_max    = DFLT_QUEUESMAX;
 	ns->mq_msg_max       = DFLT_MSGMAX;
 	ns->mq_msgsize_max   = DFLT_MSGSIZEMAX;
+	ns->mq_msg_default   = DFLT_MSG;
+	ns->mq_msgsize_default  = DFLT_MSGSIZE;
 
 	ns->mq_mnt = kern_mount_data(&mqueue_fs_type, ns);
 	if (IS_ERR(ns->mq_mnt)) {
-- 
cgit 


From e42d98ebe7d754a2c9fbccd6186721d3ca8679f6 Mon Sep 17 00:00:00 2001
From: Alexandre Bounine <alexandre.bounine@idt.com>
Date: Thu, 31 May 2012 16:26:38 -0700
Subject: rapidio: add DMA engine support for RIO data transfers

Adds DMA Engine framework support into RapidIO subsystem.

Uses DMA Engine DMA_SLAVE interface to generate data transfers to/from
remote RapidIO target devices.

Introduces RapidIO-specific wrapper for prep_slave_sg() interface with an
extra parameter to pass target specific information.

Uses scatterlist to describe local data buffer.  Address flat data buffer
on a remote side.

Signed-off-by: Alexandre Bounine <alexandre.bounine@idt.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Acked-by: Vinod Koul <vinod.koul@linux.intel.com>
Cc: Li Yang <leoli@freescale.com>
Cc: Matt Porter <mporter@kernel.crashing.org>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/rapidio/Kconfig   | 14 ++++++++
 drivers/rapidio/rio.c     | 81 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/dmaengine.h | 12 +++++++
 include/linux/rio.h       | 47 +++++++++++++++++++++++++++
 include/linux/rio_drv.h   |  9 ++++++
 5 files changed, 163 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/rapidio/Kconfig b/drivers/rapidio/Kconfig
index bc8719238793..6194d35ebb97 100644
--- a/drivers/rapidio/Kconfig
+++ b/drivers/rapidio/Kconfig
@@ -22,6 +22,20 @@ config RAPIDIO_ENABLE_RX_TX_PORTS
 	  ports for Input/Output direction to allow other traffic
 	  than Maintenance transfers.
 
+config RAPIDIO_DMA_ENGINE
+	bool "DMA Engine support for RapidIO"
+	depends on RAPIDIO
+	select DMADEVICES
+	select DMA_ENGINE
+	help
+	  Say Y here if you want to use DMA Engine frameork for RapidIO data
+	  transfers to/from target RIO devices. RapidIO uses NREAD and
+	  NWRITE (NWRITE_R, SWRITE) requests to transfer data between local
+	  memory and memory on remote target device. You need a DMA controller
+	  capable to perform data transfers to/from RapidIO.
+
+	  If you are unsure about this, say Y here.
+
 config RAPIDIO_DEBUG
 	bool "RapidIO subsystem debug messages"
 	depends on RAPIDIO
diff --git a/drivers/rapidio/rio.c b/drivers/rapidio/rio.c
index 86c9a091a2ff..c40665a4fa33 100644
--- a/drivers/rapidio/rio.c
+++ b/drivers/rapidio/rio.c
@@ -1121,6 +1121,87 @@ int rio_std_route_clr_table(struct rio_mport *mport, u16 destid, u8 hopcount,
 	return 0;
 }
 
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+
+static bool rio_chan_filter(struct dma_chan *chan, void *arg)
+{
+	struct rio_dev *rdev = arg;
+
+	/* Check that DMA device belongs to the right MPORT */
+	return (rdev->net->hport ==
+		container_of(chan->device, struct rio_mport, dma));
+}
+
+/**
+ * rio_request_dma - request RapidIO capable DMA channel that supports
+ *   specified target RapidIO device.
+ * @rdev: RIO device control structure
+ *
+ * Returns pointer to allocated DMA channel or NULL if failed.
+ */
+struct dma_chan *rio_request_dma(struct rio_dev *rdev)
+{
+	dma_cap_mask_t mask;
+	struct dma_chan *dchan;
+
+	dma_cap_zero(mask);
+	dma_cap_set(DMA_SLAVE, mask);
+	dchan = dma_request_channel(mask, rio_chan_filter, rdev);
+
+	return dchan;
+}
+EXPORT_SYMBOL_GPL(rio_request_dma);
+
+/**
+ * rio_release_dma - release specified DMA channel
+ * @dchan: DMA channel to release
+ */
+void rio_release_dma(struct dma_chan *dchan)
+{
+	dma_release_channel(dchan);
+}
+EXPORT_SYMBOL_GPL(rio_release_dma);
+
+/**
+ * rio_dma_prep_slave_sg - RapidIO specific wrapper
+ *   for device_prep_slave_sg callback defined by DMAENGINE.
+ * @rdev: RIO device control structure
+ * @dchan: DMA channel to configure
+ * @data: RIO specific data descriptor
+ * @direction: DMA data transfer direction (TO or FROM the device)
+ * @flags: dmaengine defined flags
+ *
+ * Initializes RapidIO capable DMA channel for the specified data transfer.
+ * Uses DMA channel private extension to pass information related to remote
+ * target RIO device.
+ * Returns pointer to DMA transaction descriptor or NULL if failed.
+ */
+struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(struct rio_dev *rdev,
+	struct dma_chan *dchan, struct rio_dma_data *data,
+	enum dma_transfer_direction direction, unsigned long flags)
+{
+	struct dma_async_tx_descriptor *txd = NULL;
+	struct rio_dma_ext rio_ext;
+
+	if (dchan->device->device_prep_slave_sg == NULL) {
+		pr_err("%s: prep_rio_sg == NULL\n", __func__);
+		return NULL;
+	}
+
+	rio_ext.destid = rdev->destid;
+	rio_ext.rio_addr_u = data->rio_addr_u;
+	rio_ext.rio_addr = data->rio_addr;
+	rio_ext.wr_type = data->wr_type;
+
+	txd = dmaengine_prep_rio_sg(dchan, data->sg, data->sg_len,
+					direction, flags, &rio_ext);
+
+	return txd;
+}
+EXPORT_SYMBOL_GPL(rio_dma_prep_slave_sg);
+
+#endif /* CONFIG_RAPIDIO_DMA_ENGINE */
+
 static void rio_fixup_device(struct rio_dev *dev)
 {
 }
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index d3fec584e8c3..56377df39124 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -635,6 +635,18 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_slave_sg(
 						  dir, flags, NULL);
 }
 
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+struct rio_dma_ext;
+static inline struct dma_async_tx_descriptor *dmaengine_prep_rio_sg(
+	struct dma_chan *chan, struct scatterlist *sgl,	unsigned int sg_len,
+	enum dma_transfer_direction dir, unsigned long flags,
+	struct rio_dma_ext *rio_ext)
+{
+	return chan->device->device_prep_slave_sg(chan, sgl, sg_len,
+						  dir, flags, rio_ext);
+}
+#endif
+
 static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_cyclic(
 		struct dma_chan *chan, dma_addr_t buf_addr, size_t buf_len,
 		size_t period_len, enum dma_transfer_direction dir)
diff --git a/include/linux/rio.h b/include/linux/rio.h
index 4d50611112ba..a90ebadd9da0 100644
--- a/include/linux/rio.h
+++ b/include/linux/rio.h
@@ -20,6 +20,9 @@
 #include <linux/errno.h>
 #include <linux/device.h>
 #include <linux/rio_regs.h>
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+#include <linux/dmaengine.h>
+#endif
 
 #define RIO_NO_HOPCOUNT		-1
 #define RIO_INVALID_DESTID	0xffff
@@ -254,6 +257,9 @@ struct rio_mport {
 	u32 phys_efptr;
 	unsigned char name[40];
 	void *priv;		/* Master port private data */
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+	struct dma_device	dma;
+#endif
 };
 
 /**
@@ -395,6 +401,47 @@ union rio_pw_msg {
 	u32 raw[RIO_PW_MSG_SIZE/sizeof(u32)];
 };
 
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+
+/**
+ * enum rio_write_type - RIO write transaction types used in DMA transfers
+ *
+ * Note: RapidIO specification defines write (NWRITE) and
+ * write-with-response (NWRITE_R) data transfer operations.
+ * Existing DMA controllers that service RapidIO may use one of these operations
+ * for entire data transfer or their combination with only the last data packet
+ * requires response.
+ */
+enum rio_write_type {
+	RDW_DEFAULT,		/* default method used by DMA driver */
+	RDW_ALL_NWRITE,		/* all packets use NWRITE */
+	RDW_ALL_NWRITE_R,	/* all packets use NWRITE_R */
+	RDW_LAST_NWRITE_R,	/* last packet uses NWRITE_R, others - NWRITE */
+};
+
+struct rio_dma_ext {
+	u16 destid;
+	u64 rio_addr;	/* low 64-bits of 66-bit RapidIO address */
+	u8  rio_addr_u;  /* upper 2-bits of 66-bit RapidIO address */
+	enum rio_write_type wr_type; /* preferred RIO write operation type */
+};
+
+struct rio_dma_data {
+	/* Local data (as scatterlist) */
+	struct scatterlist	*sg;	/* I/O scatter list */
+	unsigned int		sg_len;	/* size of scatter list */
+	/* Remote device address (flat buffer) */
+	u64 rio_addr;	/* low 64-bits of 66-bit RapidIO address */
+	u8  rio_addr_u;  /* upper 2-bits of 66-bit RapidIO address */
+	enum rio_write_type wr_type; /* preferred RIO write operation type */
+};
+
+static inline struct rio_mport *dma_to_mport(struct dma_device *ddev)
+{
+	return container_of(ddev, struct rio_mport, dma);
+}
+#endif /* CONFIG_RAPIDIO_DMA_ENGINE */
+
 /* Architecture and hardware-specific functions */
 extern int rio_register_mport(struct rio_mport *);
 extern int rio_open_inb_mbox(struct rio_mport *, void *, int, int);
diff --git a/include/linux/rio_drv.h b/include/linux/rio_drv.h
index 7f07470e1ed9..31ad146be316 100644
--- a/include/linux/rio_drv.h
+++ b/include/linux/rio_drv.h
@@ -377,6 +377,15 @@ void rio_unregister_driver(struct rio_driver *);
 struct rio_dev *rio_dev_get(struct rio_dev *);
 void rio_dev_put(struct rio_dev *);
 
+#ifdef CONFIG_RAPIDIO_DMA_ENGINE
+extern struct dma_chan *rio_request_dma(struct rio_dev *rdev);
+extern void rio_release_dma(struct dma_chan *dchan);
+extern struct dma_async_tx_descriptor *rio_dma_prep_slave_sg(
+		struct rio_dev *rdev, struct dma_chan *dchan,
+		struct rio_dma_data *data,
+		enum dma_transfer_direction direction, unsigned long flags);
+#endif
+
 /**
  * rio_name - Get the unique RIO device identifier
  * @rdev: RIO device
-- 
cgit 


From ee62c6b2dc93c09585b51fad18449dc5edb9977f Mon Sep 17 00:00:00 2001
From: Sha Zhengju <handai.szj@taobao.com>
Date: Thu, 31 May 2012 16:26:41 -0700
Subject: eventfd: change int to __u64 in eventfd_signal()

eventfd_ctx->count is an __u64 counter which is allowed to reach
ULLONG_MAX.  eventfd_write() adds a __u64 value to "count", but the kernel
side eventfd_signal() only adds an int value to it.  Make them consistent.

[akpm@linux-foundation.org: update interface documentation]
Signed-off-by: Sha Zhengju <handai.szj@taobao.com>
Cc: Davide Libenzi <davidel@xmailserver.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/eventfd.c            | 12 ++++--------
 include/linux/eventfd.h |  2 +-
 2 files changed, 5 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/eventfd.c b/fs/eventfd.c
index dba15fecf23e..d81b9f654086 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -46,20 +46,16 @@ struct eventfd_ctx {
  * value, and we signal this as overflow condition by returining a POLLERR
  * to poll(2).
  *
- * Returns @n in case of success, a non-negative number lower than @n in case
- * of overflow, or the following error codes:
- *
- * -EINVAL    : The value of @n is negative.
+ * Returns the amount by which the counter was incrememnted.  This will be less
+ * than @n if the counter has overflowed.
  */
-int eventfd_signal(struct eventfd_ctx *ctx, int n)
+__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n)
 {
 	unsigned long flags;
 
-	if (n < 0)
-		return -EINVAL;
 	spin_lock_irqsave(&ctx->wqh.lock, flags);
 	if (ULLONG_MAX - ctx->count < n)
-		n = (int) (ULLONG_MAX - ctx->count);
+		n = ULLONG_MAX - ctx->count;
 	ctx->count += n;
 	if (waitqueue_active(&ctx->wqh))
 		wake_up_locked_poll(&ctx->wqh, POLLIN);
diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h
index 91bb4f27238c..3c3ef19a625a 100644
--- a/include/linux/eventfd.h
+++ b/include/linux/eventfd.h
@@ -34,7 +34,7 @@ void eventfd_ctx_put(struct eventfd_ctx *ctx);
 struct file *eventfd_fget(int fd);
 struct eventfd_ctx *eventfd_ctx_fdget(int fd);
 struct eventfd_ctx *eventfd_ctx_fileget(struct file *file);
-int eventfd_signal(struct eventfd_ctx *ctx, int n);
+__u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n);
 ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt);
 int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait,
 				  __u64 *cnt);
-- 
cgit 


From ac34ebb3a67e699edcb5ac72f19d31679369dfaa Mon Sep 17 00:00:00 2001
From: Christopher Yeoh <cyeoh@au1.ibm.com>
Date: Thu, 31 May 2012 16:26:42 -0700
Subject: aio/vfs: cleanup of rw_copy_check_uvector() and
 compat_rw_copy_check_uvector()

A cleanup of rw_copy_check_uvector and compat_rw_copy_check_uvector after
changes made to support CMA in an earlier patch.

Rather than having an additional check_access parameter to these
functions, the first paramater type is overloaded to allow the caller to
specify CHECK_IOVEC_ONLY which means check that the contents of the iovec
are valid, but do not check the memory that they point to.  This is used
by process_vm_readv/writev where we need to validate that a iovec passed
to the syscall is valid but do not want to check the memory that it points
to at this point because it refers to an address space in another process.

Signed-off-by: Chris Yeoh <yeohc@au1.ibm.com>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/aio.c               |  4 ++--
 fs/compat.c            |  6 +++---
 fs/read_write.c        |  7 +++----
 include/linux/compat.h |  3 +--
 include/linux/fs.h     | 12 ++++++++++--
 mm/process_vm_access.c | 16 ++++++++--------
 security/keys/compat.c |  2 +-
 security/keys/keyctl.c |  2 +-
 8 files changed, 29 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/fs/aio.c b/fs/aio.c
index e7f2fad7b4ce..8c7c8b805372 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1446,13 +1446,13 @@ static ssize_t aio_setup_vectored_rw(int type, struct kiocb *kiocb, bool compat)
 		ret = compat_rw_copy_check_uvector(type,
 				(struct compat_iovec __user *)kiocb->ki_buf,
 				kiocb->ki_nbytes, 1, &kiocb->ki_inline_vec,
-				&kiocb->ki_iovec, 1);
+				&kiocb->ki_iovec);
 	else
 #endif
 		ret = rw_copy_check_uvector(type,
 				(struct iovec __user *)kiocb->ki_buf,
 				kiocb->ki_nbytes, 1, &kiocb->ki_inline_vec,
-				&kiocb->ki_iovec, 1);
+				&kiocb->ki_iovec);
 	if (ret < 0)
 		goto out;
 
diff --git a/fs/compat.c b/fs/compat.c
index 0781e619a62a..6556a9ce8a28 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -532,7 +532,7 @@ out:
 ssize_t compat_rw_copy_check_uvector(int type,
 		const struct compat_iovec __user *uvector, unsigned long nr_segs,
 		unsigned long fast_segs, struct iovec *fast_pointer,
-		struct iovec **ret_pointer, int check_access)
+		struct iovec **ret_pointer)
 {
 	compat_ssize_t tot_len;
 	struct iovec *iov = *ret_pointer = fast_pointer;
@@ -579,7 +579,7 @@ ssize_t compat_rw_copy_check_uvector(int type,
 		}
 		if (len < 0)	/* size_t not fitting in compat_ssize_t .. */
 			goto out;
-		if (check_access &&
+		if (type >= 0 &&
 		    !access_ok(vrfy_dir(type), compat_ptr(buf), len)) {
 			ret = -EFAULT;
 			goto out;
@@ -1094,7 +1094,7 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
 		goto out;
 
 	tot_len = compat_rw_copy_check_uvector(type, uvector, nr_segs,
-					       UIO_FASTIOV, iovstack, &iov, 1);
+					       UIO_FASTIOV, iovstack, &iov);
 	if (tot_len == 0) {
 		ret = 0;
 		goto out;
diff --git a/fs/read_write.c b/fs/read_write.c
index ffc99d22e0a3..c20614f86c01 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -633,8 +633,7 @@ ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov,
 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 			      unsigned long nr_segs, unsigned long fast_segs,
 			      struct iovec *fast_pointer,
-			      struct iovec **ret_pointer,
-			      int check_access)
+			      struct iovec **ret_pointer)
 {
 	unsigned long seg;
 	ssize_t ret;
@@ -690,7 +689,7 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 			ret = -EINVAL;
 			goto out;
 		}
-		if (check_access
+		if (type >= 0
 		    && unlikely(!access_ok(vrfy_dir(type), buf, len))) {
 			ret = -EFAULT;
 			goto out;
@@ -723,7 +722,7 @@ static ssize_t do_readv_writev(int type, struct file *file,
 	}
 
 	ret = rw_copy_check_uvector(type, uvector, nr_segs,
-				    ARRAY_SIZE(iovstack), iovstack, &iov, 1);
+				    ARRAY_SIZE(iovstack), iovstack, &iov);
 	if (ret <= 0)
 		goto out;
 
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 5d46217f84ad..4e890394ef99 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -577,8 +577,7 @@ extern ssize_t compat_rw_copy_check_uvector(int type,
 		const struct compat_iovec __user *uvector,
 		unsigned long nr_segs,
 		unsigned long fast_segs, struct iovec *fast_pointer,
-		struct iovec **ret_pointer,
-		int check_access);
+		struct iovec **ret_pointer);
 
 extern void __user *compat_alloc_user_space(unsigned long len);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 038076b27ea4..cf2c5611b19b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -173,6 +173,15 @@ struct inodes_stat_t {
 #define WRITE_FUA		(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FUA)
 #define WRITE_FLUSH_FUA		(WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH | REQ_FUA)
 
+
+/*
+ * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
+ * that indicates that they should check the contents of the iovec are
+ * valid, but not check the memory that the iovec elements
+ * points too.
+ */
+#define CHECK_IOVEC_ONLY -1
+
 #define SEL_IN		1
 #define SEL_OUT		2
 #define SEL_EX		4
@@ -1690,8 +1699,7 @@ struct seq_file;
 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 			      unsigned long nr_segs, unsigned long fast_segs,
 			      struct iovec *fast_pointer,
-			      struct iovec **ret_pointer,
-			      int check_access);
+			      struct iovec **ret_pointer);
 
 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
 extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index c20ff48994c2..926b46649749 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -371,15 +371,15 @@ static ssize_t process_vm_rw(pid_t pid,
 	/* Check iovecs */
 	if (vm_write)
 		rc = rw_copy_check_uvector(WRITE, lvec, liovcnt, UIO_FASTIOV,
-					   iovstack_l, &iov_l, 1);
+					   iovstack_l, &iov_l);
 	else
 		rc = rw_copy_check_uvector(READ, lvec, liovcnt, UIO_FASTIOV,
-					   iovstack_l, &iov_l, 1);
+					   iovstack_l, &iov_l);
 	if (rc <= 0)
 		goto free_iovecs;
 
-	rc = rw_copy_check_uvector(READ, rvec, riovcnt, UIO_FASTIOV,
-				   iovstack_r, &iov_r, 0);
+	rc = rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt, UIO_FASTIOV,
+				   iovstack_r, &iov_r);
 	if (rc <= 0)
 		goto free_iovecs;
 
@@ -438,16 +438,16 @@ compat_process_vm_rw(compat_pid_t pid,
 	if (vm_write)
 		rc = compat_rw_copy_check_uvector(WRITE, lvec, liovcnt,
 						  UIO_FASTIOV, iovstack_l,
-						  &iov_l, 1);
+						  &iov_l);
 	else
 		rc = compat_rw_copy_check_uvector(READ, lvec, liovcnt,
 						  UIO_FASTIOV, iovstack_l,
-						  &iov_l, 1);
+						  &iov_l);
 	if (rc <= 0)
 		goto free_iovecs;
-	rc = compat_rw_copy_check_uvector(READ, rvec, riovcnt,
+	rc = compat_rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt,
 					  UIO_FASTIOV, iovstack_r,
-					  &iov_r, 0);
+					  &iov_r);
 	if (rc <= 0)
 		goto free_iovecs;
 
diff --git a/security/keys/compat.c b/security/keys/compat.c
index fab4f8dda6c6..c92d42b021aa 100644
--- a/security/keys/compat.c
+++ b/security/keys/compat.c
@@ -38,7 +38,7 @@ long compat_keyctl_instantiate_key_iov(
 
 	ret = compat_rw_copy_check_uvector(WRITE, _payload_iov, ioc,
 					   ARRAY_SIZE(iovstack),
-					   iovstack, &iov, 1);
+					   iovstack, &iov);
 	if (ret < 0)
 		return ret;
 	if (ret == 0)
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 18f29de88fda..21907ea35b15 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -1110,7 +1110,7 @@ long keyctl_instantiate_key_iov(key_serial_t id,
 		goto no_payload;
 
 	ret = rw_copy_check_uvector(WRITE, _payload_iov, ioc,
-				    ARRAY_SIZE(iovstack), iovstack, &iov, 1);
+				    ARRAY_SIZE(iovstack), iovstack, &iov);
 	if (ret < 0)
 		return ret;
 	if (ret == 0)
-- 
cgit 


From d97b46a64674a267bc41c9e16132ee2a98c3347d Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Thu, 31 May 2012 16:26:44 -0700
Subject: syscalls, x86: add __NR_kcmp syscall

While doing the checkpoint-restore in the user space one need to determine
whether various kernel objects (like mm_struct-s of file_struct-s) are
shared between tasks and restore this state.

The 2nd step can be solved by using appropriate CLONE_ flags and the
unshare syscall, while there's currently no ways for solving the 1st one.

One of the ways for checking whether two tasks share e.g.  mm_struct is to
provide some mm_struct ID of a task to its proc file, but showing such
info considered to be not that good for security reasons.

Thus after some debates we end up in conclusion that using that named
'comparison' syscall might be the best candidate.  So here is it --
__NR_kcmp.

It takes up to 5 arguments - the pids of the two tasks (which
characteristics should be compared), the comparison type and (in case of
comparison of files) two file descriptors.

Lookups for pids are done in the caller's PID namespace only.

At moment only x86 is supported and tested.

[akpm@linux-foundation.org: fix up selftests, warnings]
[akpm@linux-foundation.org: include errno.h]
[akpm@linux-foundation.org: tweak comment text]
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Andrey Vagin <avagin@openvz.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@gmail.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Glauber Costa <glommer@parallels.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Vasiliy Kulikov <segoon@openwall.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Valdis.Kletnieks@vt.edu
Cc: Michal Marek <mmarek@suse.cz>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/syscalls/syscall_32.tbl         |   1 +
 arch/x86/syscalls/syscall_64.tbl         |   2 +
 include/linux/kcmp.h                     |  17 +++
 include/linux/syscalls.h                 |   2 +
 kernel/Makefile                          |   3 +
 kernel/kcmp.c                            | 196 +++++++++++++++++++++++++++++++
 kernel/sys_ni.c                          |   3 +
 tools/testing/selftests/Makefile         |   2 +-
 tools/testing/selftests/kcmp/Makefile    |  29 +++++
 tools/testing/selftests/kcmp/kcmp_test.c |  94 +++++++++++++++
 10 files changed, 348 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/kcmp.h
 create mode 100644 kernel/kcmp.c
 create mode 100644 tools/testing/selftests/kcmp/Makefile
 create mode 100644 tools/testing/selftests/kcmp/kcmp_test.c

(limited to 'include/linux')

diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index 29f9f0554f7d..7a35a6e71d44 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -355,3 +355,4 @@
 346	i386	setns			sys_setns
 347	i386	process_vm_readv	sys_process_vm_readv		compat_sys_process_vm_readv
 348	i386	process_vm_writev	sys_process_vm_writev		compat_sys_process_vm_writev
+349	i386	kcmp			sys_kcmp
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index dd29a9ea27c5..51171aeff0dc 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -318,6 +318,8 @@
 309	common	getcpu			sys_getcpu
 310	64	process_vm_readv	sys_process_vm_readv
 311	64	process_vm_writev	sys_process_vm_writev
+312	64	kcmp			sys_kcmp
+
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
 # for native 64-bit operation.
diff --git a/include/linux/kcmp.h b/include/linux/kcmp.h
new file mode 100644
index 000000000000..2dcd1b3aafc8
--- /dev/null
+++ b/include/linux/kcmp.h
@@ -0,0 +1,17 @@
+#ifndef _LINUX_KCMP_H
+#define _LINUX_KCMP_H
+
+/* Comparison type */
+enum kcmp_type {
+	KCMP_FILE,
+	KCMP_VM,
+	KCMP_FILES,
+	KCMP_FS,
+	KCMP_SIGHAND,
+	KCMP_IO,
+	KCMP_SYSVSEM,
+
+	KCMP_TYPES,
+};
+
+#endif /* _LINUX_KCMP_H */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 3de3acb84a95..19439c75c5b2 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -858,4 +858,6 @@ asmlinkage long sys_process_vm_writev(pid_t pid,
 				      unsigned long riovcnt,
 				      unsigned long flags);
 
+asmlinkage long sys_kcmp(pid_t pid1, pid_t pid2, int type,
+			 unsigned long idx1, unsigned long idx2);
 #endif
diff --git a/kernel/Makefile b/kernel/Makefile
index 6c07f30fa9b7..80be6ca0cc75 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -25,6 +25,9 @@ endif
 obj-y += sched/
 obj-y += power/
 
+ifeq ($(CONFIG_CHECKPOINT_RESTORE),y)
+obj-$(CONFIG_X86) += kcmp.o
+endif
 obj-$(CONFIG_FREEZER) += freezer.o
 obj-$(CONFIG_PROFILING) += profile.o
 obj-$(CONFIG_STACKTRACE) += stacktrace.o
diff --git a/kernel/kcmp.c b/kernel/kcmp.c
new file mode 100644
index 000000000000..30b7b225306c
--- /dev/null
+++ b/kernel/kcmp.c
@@ -0,0 +1,196 @@
+#include <linux/kernel.h>
+#include <linux/syscalls.h>
+#include <linux/fdtable.h>
+#include <linux/string.h>
+#include <linux/random.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/cache.h>
+#include <linux/bug.h>
+#include <linux/err.h>
+#include <linux/kcmp.h>
+
+#include <asm/unistd.h>
+
+/*
+ * We don't expose the real in-memory order of objects for security reasons.
+ * But still the comparison results should be suitable for sorting. So we
+ * obfuscate kernel pointers values and compare the production instead.
+ *
+ * The obfuscation is done in two steps. First we xor the kernel pointer with
+ * a random value, which puts pointer into a new position in a reordered space.
+ * Secondly we multiply the xor production with a large odd random number to
+ * permute its bits even more (the odd multiplier guarantees that the product
+ * is unique ever after the high bits are truncated, since any odd number is
+ * relative prime to 2^n).
+ *
+ * Note also that the obfuscation itself is invisible to userspace and if needed
+ * it can be changed to an alternate scheme.
+ */
+static unsigned long cookies[KCMP_TYPES][2] __read_mostly;
+
+static long kptr_obfuscate(long v, int type)
+{
+	return (v ^ cookies[type][0]) * cookies[type][1];
+}
+
+/*
+ * 0 - equal, i.e. v1 = v2
+ * 1 - less than, i.e. v1 < v2
+ * 2 - greater than, i.e. v1 > v2
+ * 3 - not equal but ordering unavailable (reserved for future)
+ */
+static int kcmp_ptr(void *v1, void *v2, enum kcmp_type type)
+{
+	long ret;
+
+	ret = kptr_obfuscate((long)v1, type) - kptr_obfuscate((long)v2, type);
+
+	return (ret < 0) | ((ret > 0) << 1);
+}
+
+/* The caller must have pinned the task */
+static struct file *
+get_file_raw_ptr(struct task_struct *task, unsigned int idx)
+{
+	struct file *file = NULL;
+
+	task_lock(task);
+	rcu_read_lock();
+
+	if (task->files)
+		file = fcheck_files(task->files, idx);
+
+	rcu_read_unlock();
+	task_unlock(task);
+
+	return file;
+}
+
+static void kcmp_unlock(struct mutex *m1, struct mutex *m2)
+{
+	if (likely(m2 != m1))
+		mutex_unlock(m2);
+	mutex_unlock(m1);
+}
+
+static int kcmp_lock(struct mutex *m1, struct mutex *m2)
+{
+	int err;
+
+	if (m2 > m1)
+		swap(m1, m2);
+
+	err = mutex_lock_killable(m1);
+	if (!err && likely(m1 != m2)) {
+		err = mutex_lock_killable_nested(m2, SINGLE_DEPTH_NESTING);
+		if (err)
+			mutex_unlock(m1);
+	}
+
+	return err;
+}
+
+SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t, pid2, int, type,
+		unsigned long, idx1, unsigned long, idx2)
+{
+	struct task_struct *task1, *task2;
+	int ret;
+
+	rcu_read_lock();
+
+	/*
+	 * Tasks are looked up in caller's PID namespace only.
+	 */
+	task1 = find_task_by_vpid(pid1);
+	task2 = find_task_by_vpid(pid2);
+	if (!task1 || !task2)
+		goto err_no_task;
+
+	get_task_struct(task1);
+	get_task_struct(task2);
+
+	rcu_read_unlock();
+
+	/*
+	 * One should have enough rights to inspect task details.
+	 */
+	ret = kcmp_lock(&task1->signal->cred_guard_mutex,
+			&task2->signal->cred_guard_mutex);
+	if (ret)
+		goto err;
+	if (!ptrace_may_access(task1, PTRACE_MODE_READ) ||
+	    !ptrace_may_access(task2, PTRACE_MODE_READ)) {
+		ret = -EPERM;
+		goto err_unlock;
+	}
+
+	switch (type) {
+	case KCMP_FILE: {
+		struct file *filp1, *filp2;
+
+		filp1 = get_file_raw_ptr(task1, idx1);
+		filp2 = get_file_raw_ptr(task2, idx2);
+
+		if (filp1 && filp2)
+			ret = kcmp_ptr(filp1, filp2, KCMP_FILE);
+		else
+			ret = -EBADF;
+		break;
+	}
+	case KCMP_VM:
+		ret = kcmp_ptr(task1->mm, task2->mm, KCMP_VM);
+		break;
+	case KCMP_FILES:
+		ret = kcmp_ptr(task1->files, task2->files, KCMP_FILES);
+		break;
+	case KCMP_FS:
+		ret = kcmp_ptr(task1->fs, task2->fs, KCMP_FS);
+		break;
+	case KCMP_SIGHAND:
+		ret = kcmp_ptr(task1->sighand, task2->sighand, KCMP_SIGHAND);
+		break;
+	case KCMP_IO:
+		ret = kcmp_ptr(task1->io_context, task2->io_context, KCMP_IO);
+		break;
+	case KCMP_SYSVSEM:
+#ifdef CONFIG_SYSVIPC
+		ret = kcmp_ptr(task1->sysvsem.undo_list,
+			       task2->sysvsem.undo_list,
+			       KCMP_SYSVSEM);
+#else
+		ret = -EOPNOTSUPP;
+#endif
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+err_unlock:
+	kcmp_unlock(&task1->signal->cred_guard_mutex,
+		    &task2->signal->cred_guard_mutex);
+err:
+	put_task_struct(task1);
+	put_task_struct(task2);
+
+	return ret;
+
+err_no_task:
+	rcu_read_unlock();
+	return -ESRCH;
+}
+
+static __init int kcmp_cookies_init(void)
+{
+	int i;
+
+	get_random_bytes(cookies, sizeof(cookies));
+
+	for (i = 0; i < KCMP_TYPES; i++)
+		cookies[i][1] |= (~(~0UL >>  1) | 1);
+
+	return 0;
+}
+arch_initcall(kcmp_cookies_init);
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 47bfa16430d7..dbff751e4086 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -203,3 +203,6 @@ cond_syscall(sys_fanotify_mark);
 cond_syscall(sys_name_to_handle_at);
 cond_syscall(sys_open_by_handle_at);
 cond_syscall(compat_sys_open_by_handle_at);
+
+/* compare kernel pointers */
+cond_syscall(sys_kcmp);
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 14972017a43e..a4162e15c25f 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -1,4 +1,4 @@
-TARGETS = breakpoints mqueue vm
+TARGETS = breakpoints kcmp mqueue vm
 
 all:
 	for TARGET in $(TARGETS); do \
diff --git a/tools/testing/selftests/kcmp/Makefile b/tools/testing/selftests/kcmp/Makefile
new file mode 100644
index 000000000000..dc79b86ea65c
--- /dev/null
+++ b/tools/testing/selftests/kcmp/Makefile
@@ -0,0 +1,29 @@
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/)
+ifeq ($(ARCH),i386)
+        ARCH := X86
+	CFLAGS := -DCONFIG_X86_32 -D__i386__
+endif
+ifeq ($(ARCH),x86_64)
+	ARCH := X86
+	CFLAGS := -DCONFIG_X86_64 -D__x86_64__
+endif
+
+CFLAGS += -I../../../../arch/x86/include/generated/
+CFLAGS += -I../../../../include/
+CFLAGS += -I../../../../usr/include/
+CFLAGS += -I../../../../arch/x86/include/
+
+all:
+ifeq ($(ARCH),X86)
+	gcc $(CFLAGS) kcmp_test.c -o run_test
+else
+	echo "Not an x86 target, can't build kcmp selftest"
+endif
+
+run-tests: all
+	./kcmp_test
+
+clean:
+	rm -fr ./run_test
+	rm -fr ./test-file
diff --git a/tools/testing/selftests/kcmp/kcmp_test.c b/tools/testing/selftests/kcmp/kcmp_test.c
new file mode 100644
index 000000000000..358cc6bfa35d
--- /dev/null
+++ b/tools/testing/selftests/kcmp/kcmp_test.c
@@ -0,0 +1,94 @@
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <limits.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+
+#include <linux/unistd.h>
+#include <linux/kcmp.h>
+
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+
+static long sys_kcmp(int pid1, int pid2, int type, int fd1, int fd2)
+{
+	return syscall(__NR_kcmp, pid1, pid2, type, fd1, fd2);
+}
+
+int main(int argc, char **argv)
+{
+	const char kpath[] = "kcmp-test-file";
+	int pid1, pid2;
+	int fd1, fd2;
+	int status;
+
+	fd1 = open(kpath, O_RDWR | O_CREAT | O_TRUNC, 0644);
+	pid1 = getpid();
+
+	if (fd1 < 0) {
+		perror("Can't create file");
+		exit(1);
+	}
+
+	pid2 = fork();
+	if (pid2 < 0) {
+		perror("fork failed");
+		exit(1);
+	}
+
+	if (!pid2) {
+		int pid2 = getpid();
+		int ret;
+
+		fd2 = open(kpath, O_RDWR, 0644);
+		if (fd2 < 0) {
+			perror("Can't open file");
+			exit(1);
+		}
+
+		/* An example of output and arguments */
+		printf("pid1: %6d pid2: %6d FD: %2ld FILES: %2ld VM: %2ld "
+		       "FS: %2ld SIGHAND: %2ld IO: %2ld SYSVSEM: %2ld "
+		       "INV: %2ld\n",
+		       pid1, pid2,
+		       sys_kcmp(pid1, pid2, KCMP_FILE,		fd1, fd2),
+		       sys_kcmp(pid1, pid2, KCMP_FILES,		0, 0),
+		       sys_kcmp(pid1, pid2, KCMP_VM,		0, 0),
+		       sys_kcmp(pid1, pid2, KCMP_FS,		0, 0),
+		       sys_kcmp(pid1, pid2, KCMP_SIGHAND,	0, 0),
+		       sys_kcmp(pid1, pid2, KCMP_IO,		0, 0),
+		       sys_kcmp(pid1, pid2, KCMP_SYSVSEM,	0, 0),
+
+			/* This one should fail */
+		       sys_kcmp(pid1, pid2, KCMP_TYPES + 1,	0, 0));
+
+		/* This one should return same fd */
+		ret = sys_kcmp(pid1, pid2, KCMP_FILE, fd1, fd1);
+		if (ret) {
+			printf("FAIL: 0 expected but %d returned\n", ret);
+			ret = -1;
+		} else
+			printf("PASS: 0 returned as expected\n");
+
+		/* Compare with self */
+		ret = sys_kcmp(pid1, pid1, KCMP_VM, 0, 0);
+		if (ret) {
+			printf("FAIL: 0 expected but %li returned\n", ret);
+			ret = -1;
+		} else
+			printf("PASS: 0 returned as expected\n");
+
+		exit(ret);
+	}
+
+	waitpid(pid2, &status, P_ALL);
+
+	return 0;
+}
-- 
cgit 


From fe8c7f5cbf91124987106faa3bdf0c8b955c4cf7 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Thu, 31 May 2012 16:26:45 -0700
Subject: c/r: prctl: extend PR_SET_MM to set up more mm_struct entries

During checkpoint we dump whole process memory to a file and the dump
includes process stack memory.  But among stack data itself, the stack
carries additional parameters such as command line arguments, environment
data and auxiliary vector.

So when we do restore procedure and once we've restored stack data itself
we need to setup mm_struct::arg_start/end, env_start/end, so restored
process would be able to find command line arguments and environment data
it had at checkpoint time.  The same applies to auxiliary vector.

For this reason additional PR_SET_MM_(ARG_START | ARG_END | ENV_START |
ENV_END | AUXV) codes are introduced.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Andrew Vagin <avagin@openvz.org>
Cc: Serge Hallyn <serge.hallyn@canonical.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Vasiliy Kulikov <segoon@openwall.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/prctl.h |   5 ++
 kernel/sys.c          | 134 +++++++++++++++++++++++++++++++-------------------
 2 files changed, 88 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index 78b76e24cc7e..18d84c4b42d8 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -113,6 +113,11 @@
 # define PR_SET_MM_START_STACK		5
 # define PR_SET_MM_START_BRK		6
 # define PR_SET_MM_BRK			7
+# define PR_SET_MM_ARG_START		8
+# define PR_SET_MM_ARG_END		9
+# define PR_SET_MM_ENV_START		10
+# define PR_SET_MM_ENV_END		11
+# define PR_SET_MM_AUXV			12
 
 /*
  * Set specific pid that is allowed to ptrace the current task.
diff --git a/kernel/sys.c b/kernel/sys.c
index 6e81aa7e4688..8b544972e46e 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1784,17 +1784,23 @@ SYSCALL_DEFINE1(umask, int, mask)
 }
 
 #ifdef CONFIG_CHECKPOINT_RESTORE
+static bool vma_flags_mismatch(struct vm_area_struct *vma,
+			       unsigned long required,
+			       unsigned long banned)
+{
+	return (vma->vm_flags & required) != required ||
+		(vma->vm_flags & banned);
+}
+
 static int prctl_set_mm(int opt, unsigned long addr,
 			unsigned long arg4, unsigned long arg5)
 {
 	unsigned long rlim = rlimit(RLIMIT_DATA);
-	unsigned long vm_req_flags;
-	unsigned long vm_bad_flags;
-	struct vm_area_struct *vma;
-	int error = 0;
 	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+	int error;
 
-	if (arg4 | arg5)
+	if (arg5 || (arg4 && opt != PR_SET_MM_AUXV))
 		return -EINVAL;
 
 	if (!capable(CAP_SYS_RESOURCE))
@@ -1803,58 +1809,23 @@ static int prctl_set_mm(int opt, unsigned long addr,
 	if (addr >= TASK_SIZE)
 		return -EINVAL;
 
+	error = -EINVAL;
+
 	down_read(&mm->mmap_sem);
 	vma = find_vma(mm, addr);
 
-	if (opt != PR_SET_MM_START_BRK && opt != PR_SET_MM_BRK) {
-		/* It must be existing VMA */
-		if (!vma || vma->vm_start > addr)
-			goto out;
-	}
-
-	error = -EINVAL;
 	switch (opt) {
 	case PR_SET_MM_START_CODE:
+		mm->start_code = addr;
+		break;
 	case PR_SET_MM_END_CODE:
-		vm_req_flags = VM_READ | VM_EXEC;
-		vm_bad_flags = VM_WRITE | VM_MAYSHARE;
-
-		if ((vma->vm_flags & vm_req_flags) != vm_req_flags ||
-		    (vma->vm_flags & vm_bad_flags))
-			goto out;
-
-		if (opt == PR_SET_MM_START_CODE)
-			mm->start_code = addr;
-		else
-			mm->end_code = addr;
+		mm->end_code = addr;
 		break;
-
 	case PR_SET_MM_START_DATA:
-	case PR_SET_MM_END_DATA:
-		vm_req_flags = VM_READ | VM_WRITE;
-		vm_bad_flags = VM_EXEC | VM_MAYSHARE;
-
-		if ((vma->vm_flags & vm_req_flags) != vm_req_flags ||
-		    (vma->vm_flags & vm_bad_flags))
-			goto out;
-
-		if (opt == PR_SET_MM_START_DATA)
-			mm->start_data = addr;
-		else
-			mm->end_data = addr;
+		mm->start_data = addr;
 		break;
-
-	case PR_SET_MM_START_STACK:
-
-#ifdef CONFIG_STACK_GROWSUP
-		vm_req_flags = VM_READ | VM_WRITE | VM_GROWSUP;
-#else
-		vm_req_flags = VM_READ | VM_WRITE | VM_GROWSDOWN;
-#endif
-		if ((vma->vm_flags & vm_req_flags) != vm_req_flags)
-			goto out;
-
-		mm->start_stack = addr;
+	case PR_SET_MM_END_DATA:
+		mm->end_data = addr;
 		break;
 
 	case PR_SET_MM_START_BRK:
@@ -1881,16 +1852,77 @@ static int prctl_set_mm(int opt, unsigned long addr,
 		mm->brk = addr;
 		break;
 
+	/*
+	 * If command line arguments and environment
+	 * are placed somewhere else on stack, we can
+	 * set them up here, ARG_START/END to setup
+	 * command line argumets and ENV_START/END
+	 * for environment.
+	 */
+	case PR_SET_MM_START_STACK:
+	case PR_SET_MM_ARG_START:
+	case PR_SET_MM_ARG_END:
+	case PR_SET_MM_ENV_START:
+	case PR_SET_MM_ENV_END:
+		if (!vma) {
+			error = -EFAULT;
+			goto out;
+		}
+#ifdef CONFIG_STACK_GROWSUP
+		if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSUP, 0))
+#else
+		if (vma_flags_mismatch(vma, VM_READ | VM_WRITE | VM_GROWSDOWN, 0))
+#endif
+			goto out;
+		if (opt == PR_SET_MM_START_STACK)
+			mm->start_stack = addr;
+		else if (opt == PR_SET_MM_ARG_START)
+			mm->arg_start = addr;
+		else if (opt == PR_SET_MM_ARG_END)
+			mm->arg_end = addr;
+		else if (opt == PR_SET_MM_ENV_START)
+			mm->env_start = addr;
+		else if (opt == PR_SET_MM_ENV_END)
+			mm->env_end = addr;
+		break;
+
+	/*
+	 * This doesn't move auxiliary vector itself
+	 * since it's pinned to mm_struct, but allow
+	 * to fill vector with new values. It's up
+	 * to a caller to provide sane values here
+	 * otherwise user space tools which use this
+	 * vector might be unhappy.
+	 */
+	case PR_SET_MM_AUXV: {
+		unsigned long user_auxv[AT_VECTOR_SIZE];
+
+		if (arg4 > sizeof(user_auxv))
+			goto out;
+		up_read(&mm->mmap_sem);
+
+		if (copy_from_user(user_auxv, (const void __user *)addr, arg4))
+			return -EFAULT;
+
+		/* Make sure the last entry is always AT_NULL */
+		user_auxv[AT_VECTOR_SIZE - 2] = 0;
+		user_auxv[AT_VECTOR_SIZE - 1] = 0;
+
+		BUILD_BUG_ON(sizeof(user_auxv) != sizeof(mm->saved_auxv));
+
+		task_lock(current);
+		memcpy(mm->saved_auxv, user_auxv, arg4);
+		task_unlock(current);
+
+		return 0;
+	}
 	default:
-		error = -EINVAL;
 		goto out;
 	}
 
 	error = 0;
-
 out:
 	up_read(&mm->mmap_sem);
-
 	return error;
 }
 #else /* CONFIG_CHECKPOINT_RESTORE */
-- 
cgit 


From b32dfe377102ce668775f8b6b1461f7ad428f8b6 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Thu, 31 May 2012 16:26:46 -0700
Subject: c/r: prctl: add ability to set new mm_struct::exe_file

When we do restore we would like to have a way to setup a former
mm_struct::exe_file so that /proc/pid/exe would point to the original
executable file a process had at checkpoint time.

For this the PR_SET_MM_EXE_FILE code is introduced.  This option takes a
file descriptor which will be set as a source for new /proc/$pid/exe
symlink.

Note it allows to change /proc/$pid/exe if there are no VM_EXECUTABLE
vmas present for current process, simply because this feature is a special
to C/R and mm::num_exe_file_vmas become meaningless after that.

To minimize the amount of transition the /proc/pid/exe symlink might have,
this feature is implemented in one-shot manner.  Thus once changed the
symlink can't be changed again.  This should help sysadmins to monitor the
symlinks over all process running in a system.

In particular one could make a snapshot of processes and ring alarm if
there unexpected changes of /proc/pid/exe's in a system.

Note -- this feature is available iif CONFIG_CHECKPOINT_RESTORE is set and
the caller must have CAP_SYS_RESOURCE capability granted, otherwise the
request to change symlink will be rejected.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Matt Helsley <matthltc@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/prctl.h |  1 +
 kernel/sys.c          | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index 18d84c4b42d8..711e0a30aacc 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -118,6 +118,7 @@
 # define PR_SET_MM_ENV_START		10
 # define PR_SET_MM_ENV_END		11
 # define PR_SET_MM_AUXV			12
+# define PR_SET_MM_EXE_FILE		13
 
 /*
  * Set specific pid that is allowed to ptrace the current task.
diff --git a/kernel/sys.c b/kernel/sys.c
index 8b544972e46e..9ff89cb9657a 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -36,6 +36,8 @@
 #include <linux/personality.h>
 #include <linux/ptrace.h>
 #include <linux/fs_struct.h>
+#include <linux/file.h>
+#include <linux/mount.h>
 #include <linux/gfp.h>
 #include <linux/syscore_ops.h>
 #include <linux/version.h>
@@ -1792,6 +1794,57 @@ static bool vma_flags_mismatch(struct vm_area_struct *vma,
 		(vma->vm_flags & banned);
 }
 
+static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
+{
+	struct file *exe_file;
+	struct dentry *dentry;
+	int err;
+
+	/*
+	 * Setting new mm::exe_file is only allowed when no VM_EXECUTABLE vma's
+	 * remain. So perform a quick test first.
+	 */
+	if (mm->num_exe_file_vmas)
+		return -EBUSY;
+
+	exe_file = fget(fd);
+	if (!exe_file)
+		return -EBADF;
+
+	dentry = exe_file->f_path.dentry;
+
+	/*
+	 * Because the original mm->exe_file points to executable file, make
+	 * sure that this one is executable as well, to avoid breaking an
+	 * overall picture.
+	 */
+	err = -EACCES;
+	if (!S_ISREG(dentry->d_inode->i_mode)	||
+	    exe_file->f_path.mnt->mnt_flags & MNT_NOEXEC)
+		goto exit;
+
+	err = inode_permission(dentry->d_inode, MAY_EXEC);
+	if (err)
+		goto exit;
+
+	/*
+	 * The symlink can be changed only once, just to disallow arbitrary
+	 * transitions malicious software might bring in. This means one
+	 * could make a snapshot over all processes running and monitor
+	 * /proc/pid/exe changes to notice unusual activity if needed.
+	 */
+	down_write(&mm->mmap_sem);
+	if (likely(!mm->exe_file))
+		set_mm_exe_file(mm, exe_file);
+	else
+		err = -EBUSY;
+	up_write(&mm->mmap_sem);
+
+exit:
+	fput(exe_file);
+	return err;
+}
+
 static int prctl_set_mm(int opt, unsigned long addr,
 			unsigned long arg4, unsigned long arg5)
 {
@@ -1806,6 +1859,9 @@ static int prctl_set_mm(int opt, unsigned long addr,
 	if (!capable(CAP_SYS_RESOURCE))
 		return -EPERM;
 
+	if (opt == PR_SET_MM_EXE_FILE)
+		return prctl_set_mm_exe_file(mm, (unsigned int)addr);
+
 	if (addr >= TASK_SIZE)
 		return -EINVAL;
 
-- 
cgit 


From 8b3ec6814c83d76b85bd13badc48552836c24839 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 30 May 2012 17:11:23 -0400
Subject: take security_mmap_file() outside of ->mmap_sem

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/security.h |  7 +++----
 ipc/shm.c                |  5 +++++
 mm/mmap.c                | 23 ++++++++++++-----------
 mm/nommu.c               | 22 ++++++++++++----------
 security/security.c      | 33 ++++++++++++++++++++++++++++++---
 5 files changed, 62 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index f1bae0963ddc..4e5a73cdbbef 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1745,8 +1745,8 @@ int security_file_permission(struct file *file, int mask);
 int security_file_alloc(struct file *file);
 void security_file_free(struct file *file);
 int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
-int security_mmap_file(struct file *file, unsigned long reqprot,
-			unsigned long prot, unsigned long flags);
+int security_mmap_file(struct file *file, unsigned long prot,
+			unsigned long flags);
 int security_mmap_addr(unsigned long addr);
 int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
 			   unsigned long prot);
@@ -2183,8 +2183,7 @@ static inline int security_file_ioctl(struct file *file, unsigned int cmd,
 	return 0;
 }
 
-static inline int security_mmap_file(struct file *file, unsigned long reqprot,
-				     unsigned long prot,
+static inline int security_mmap_file(struct file *file, unsigned long prot,
 				     unsigned long flags)
 {
 	return 0;
diff --git a/ipc/shm.c b/ipc/shm.c
index 406c5b208193..e3a8063b1768 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -1036,6 +1036,10 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
 	sfd->file = shp->shm_file;
 	sfd->vm_ops = NULL;
 
+	err = security_mmap_file(file, prot, flags);
+	if (err)
+		goto out_fput;
+
 	down_write(&current->mm->mmap_sem);
 	if (addr && !(shmflg & SHM_REMAP)) {
 		err = -EINVAL;
@@ -1058,6 +1062,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
 invalid:
 	up_write(&current->mm->mmap_sem);
 
+out_fput:
 	fput(file);
 
 out_nattch:
diff --git a/mm/mmap.c b/mm/mmap.c
index 49283da9a2ae..34b280f4238d 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -979,7 +979,6 @@ static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 	struct inode *inode;
 	vm_flags_t vm_flags;
 	int error;
-	unsigned long reqprot = prot;
 
 	/*
 	 * Does the application expect PROT_READ to imply PROT_EXEC?
@@ -1105,10 +1104,6 @@ static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 	if (error)
 		return error;
 
-	error = security_mmap_file(file, reqprot, prot, flags);
-	if (error)
-		return error;
-
 	return mmap_region(file, addr, len, flags, vm_flags, pgoff);
 }
 
@@ -1130,9 +1125,12 @@ unsigned long vm_mmap(struct file *file, unsigned long addr,
 	unsigned long ret;
 	struct mm_struct *mm = current->mm;
 
-	down_write(&mm->mmap_sem);
-	ret = do_mmap(file, addr, len, prot, flag, offset);
-	up_write(&mm->mmap_sem);
+	ret = security_mmap_file(file, prot, flag);
+	if (!ret) {
+		down_write(&mm->mmap_sem);
+		ret = do_mmap(file, addr, len, prot, flag, offset);
+		up_write(&mm->mmap_sem);
+	}
 	return ret;
 }
 EXPORT_SYMBOL(vm_mmap);
@@ -1168,9 +1166,12 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
 
 	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
 
-	down_write(&current->mm->mmap_sem);
-	retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
-	up_write(&current->mm->mmap_sem);
+	retval = security_mmap_file(file, prot, flags);
+	if (!retval) {
+		down_write(&current->mm->mmap_sem);
+		retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
+		up_write(&current->mm->mmap_sem);
+	}
 
 	if (file)
 		fput(file);
diff --git a/mm/nommu.c b/mm/nommu.c
index acfe419785db..8cbfd623b04a 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -889,7 +889,6 @@ static int validate_mmap_request(struct file *file,
 				 unsigned long *_capabilities)
 {
 	unsigned long capabilities, rlen;
-	unsigned long reqprot = prot;
 	int ret;
 
 	/* do the simple checks first */
@@ -1048,9 +1047,6 @@ static int validate_mmap_request(struct file *file,
 
 	/* allow the security API to have its say */
 	ret = security_mmap_addr(addr);
-	if (ret < 0)
-		return ret;
-	ret = security_mmap_file(file, reqprot, prot, flags);
 	if (ret < 0)
 		return ret;
 
@@ -1492,9 +1488,12 @@ unsigned long vm_mmap(struct file *file, unsigned long addr,
 	unsigned long ret;
 	struct mm_struct *mm = current->mm;
 
-	down_write(&mm->mmap_sem);
-	ret = do_mmap(file, addr, len, prot, flag, offset);
-	up_write(&mm->mmap_sem);
+	ret = security_mmap_file(file, prot, flag);
+	if (!ret) {
+		down_write(&mm->mmap_sem);
+		ret = do_mmap(file, addr, len, prot, flag, offset);
+		up_write(&mm->mmap_sem);
+	}
 	return ret;
 }
 EXPORT_SYMBOL(vm_mmap);
@@ -1515,9 +1514,12 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
 
 	flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
 
-	down_write(&current->mm->mmap_sem);
-	retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
-	up_write(&current->mm->mmap_sem);
+	ret = security_mmap_file(file, prot, flags);
+	if (!ret) {
+		down_write(&current->mm->mmap_sem);
+		retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
+		up_write(&current->mm->mmap_sem);
+	}
 
 	if (file)
 		fput(file);
diff --git a/security/security.c b/security/security.c
index d91c66d3956b..3b11b3b72fe2 100644
--- a/security/security.c
+++ b/security/security.c
@@ -20,6 +20,9 @@
 #include <linux/ima.h>
 #include <linux/evm.h>
 #include <linux/fsnotify.h>
+#include <linux/mman.h>
+#include <linux/mount.h>
+#include <linux/personality.h>
 #include <net/flow.h>
 
 #define MAX_LSM_EVM_XATTR	2
@@ -657,11 +660,35 @@ int security_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	return security_ops->file_ioctl(file, cmd, arg);
 }
 
-int security_mmap_file(struct file *file, unsigned long reqprot,
-			unsigned long prot, unsigned long flags)
+int security_mmap_file(struct file *file, unsigned long prot,
+			unsigned long flags)
 {
+	unsigned long reqprot = prot;
 	int ret;
-
+	/*
+	 * Does the application expect PROT_READ to imply PROT_EXEC?
+	 *
+	 * (the exception is when the underlying filesystem is noexec
+	 *  mounted, in which case we dont add PROT_EXEC.)
+	 */
+	if (!(reqprot & PROT_READ))
+		goto out;
+	if (!(current->personality & READ_IMPLIES_EXEC))
+		goto out;
+	if (!file) {
+		prot |= PROT_EXEC;
+	} else if (!(file->f_path.mnt->mnt_flags & MNT_NOEXEC)) {
+#ifndef CONFIG_MMU
+		unsigned long caps = 0;
+		struct address_space *mapping = file->f_mapping;
+		if (mapping && mapping->backing_dev_info)
+			caps = mapping->backing_dev_info->capabilities;
+		if (!(caps & BDI_CAP_EXEC_MAP))
+			goto out;
+#endif
+		prot |= PROT_EXEC;
+	}
+out:
 	ret = security_ops->mmap_file(file, reqprot, prot, flags);
 	if (ret)
 		return ret;
-- 
cgit 


From e3fc629d7bb70848fbf479688a66d4e76dff46ac Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 30 May 2012 20:08:42 -0400
Subject: switch aio and shm to do_mmap_pgoff(), make do_mmap() static

after all, 0 bytes and 0 pages is the same thing...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/aio.c           | 6 +++---
 include/linux/mm.h | 2 +-
 ipc/shm.c          | 2 +-
 mm/mmap.c          | 4 ++--
 mm/nommu.c         | 4 ++--
 5 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/aio.c b/fs/aio.c
index e7f2fad7b4ce..07154d99cc67 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -134,9 +134,9 @@ static int aio_setup_ring(struct kioctx *ctx)
 	info->mmap_size = nr_pages * PAGE_SIZE;
 	dprintk("attempting mmap of %lu bytes\n", info->mmap_size);
 	down_write(&ctx->mm->mmap_sem);
-	info->mmap_base = do_mmap(NULL, 0, info->mmap_size, 
-				  PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE,
-				  0);
+	info->mmap_base = do_mmap_pgoff(NULL, 0, info->mmap_size, 
+					PROT_READ|PROT_WRITE,
+					MAP_ANONYMOUS|MAP_PRIVATE, 0);
 	if (IS_ERR((void *)info->mmap_base)) {
 		up_write(&ctx->mm->mmap_sem);
 		info->mmap_size = 0;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7d5c37f24c63..4189e0d0ac05 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1394,7 +1394,7 @@ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned lo
 extern unsigned long mmap_region(struct file *file, unsigned long addr,
 	unsigned long len, unsigned long flags,
 	vm_flags_t vm_flags, unsigned long pgoff);
-extern unsigned long do_mmap(struct file *, unsigned long,
+extern unsigned long do_mmap_pgoff(struct file *, unsigned long,
         unsigned long, unsigned long,
         unsigned long, unsigned long);
 extern int do_munmap(struct mm_struct *, unsigned long, size_t);
diff --git a/ipc/shm.c b/ipc/shm.c
index e3a8063b1768..5e2cbfdab6fc 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -1054,7 +1054,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr)
 			goto invalid;
 	}
 		
-	user_addr = do_mmap (file, addr, size, prot, flags, 0);
+	user_addr = do_mmap_pgoff(file, addr, size, prot, flags, 0);
 	*raddr = user_addr;
 	err = 0;
 	if (IS_ERR_VALUE(user_addr))
diff --git a/mm/mmap.c b/mm/mmap.c
index 131521e12f13..f7786542c59d 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -971,7 +971,7 @@ static inline unsigned long round_hint_to_min(unsigned long hint)
  * The caller must hold down_write(&current->mm->mmap_sem).
  */
 
-static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
+unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 			unsigned long len, unsigned long prot,
 			unsigned long flags, unsigned long pgoff)
 {
@@ -1102,7 +1102,7 @@ static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 	return mmap_region(file, addr, len, flags, vm_flags, pgoff);
 }
 
-unsigned long do_mmap(struct file *file, unsigned long addr,
+static unsigned long do_mmap(struct file *file, unsigned long addr,
 	unsigned long len, unsigned long prot,
 	unsigned long flag, unsigned long offset)
 {
diff --git a/mm/nommu.c b/mm/nommu.c
index 8cbfd623b04a..a1792ed2cb1a 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1232,7 +1232,7 @@ enomem:
 /*
  * handle mapping creation for uClinux
  */
-static unsigned long do_mmap_pgoff(struct file *file,
+unsigned long do_mmap_pgoff(struct file *file,
 			    unsigned long addr,
 			    unsigned long len,
 			    unsigned long prot,
@@ -1470,7 +1470,7 @@ error_getting_region:
 	return -ENOMEM;
 }
 
-unsigned long do_mmap(struct file *file, unsigned long addr,
+static unsigned long do_mmap(struct file *file, unsigned long addr,
 	unsigned long len, unsigned long prot,
 	unsigned long flag, unsigned long offset)
 {
-- 
cgit 


From c3b2da314834499f34cba94f7053e55f6d6f92d8 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Mon, 26 Mar 2012 09:59:21 -0400
Subject: fs: introduce inode operation ->update_time

Btrfs has to make sure we have space to allocate new blocks in order to modify
the inode, so updating time can fail.  We've gotten around this by having our
own file_update_time but this is kind of a pain, and Christoph has indicated he
would like to make xfs do something different with atime updates.  So introduce
->update_time, where we will deal with i_version an a/m/c time updates and
indicate which changes need to be made.  The normal version just does what it
has always done, updates the time and marks the inode dirty, and then
filesystems can choose to do something different.

I've gone through all of the users of file_update_time and made them check for
errors with the exception of the fault code since it's complicated and I wasn't
quite sure what to do there, also Jan is going to be pushing the file time
updates into page_mkwrite for those who have it so that should satisfy btrfs and
make it not a big deal to check the file_update_time() return code in the
generic fault path. Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
---
 Documentation/filesystems/Locking |  3 +++
 Documentation/filesystems/vfs.txt |  4 +++
 fs/fuse/file.c                    |  4 ++-
 fs/inode.c                        | 56 ++++++++++++++++++++++++++++-----------
 fs/ncpfs/file.c                   |  6 +++--
 fs/ntfs/file.c                    |  4 ++-
 fs/pipe.c                         |  7 +++--
 fs/splice.c                       |  6 +++--
 fs/xfs/xfs_file.c                 |  7 +++--
 include/linux/fs.h                | 10 ++++++-
 mm/filemap.c                      |  4 ++-
 mm/filemap_xip.c                  |  4 ++-
 12 files changed, 86 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 4fca82e5276e..d5a269a51a9e 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -62,6 +62,7 @@ ata *);
 	int (*removexattr) (struct dentry *, const char *);
 	void (*truncate_range)(struct inode *, loff_t, loff_t);
 	int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len);
+	void (*update_time)(struct inode *, struct timespec *, int);
 
 locking rules:
 	all may block
@@ -89,6 +90,8 @@ listxattr:	no
 removexattr:	yes
 truncate_range:	yes
 fiemap:		no
+update_time:	no
+
 	Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
 victim.
 	cross-directory ->rename() has (per-superblock) ->s_vfs_rename_sem.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 0d0492028082..b2aa722e5ea2 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -364,6 +364,7 @@ struct inode_operations {
 	ssize_t (*listxattr) (struct dentry *, char *, size_t);
 	int (*removexattr) (struct dentry *, const char *);
 	void (*truncate_range)(struct inode *, loff_t, loff_t);
+	void (*update_time)(struct inode *, struct timespec *, int);
 };
 
 Again, all methods are called without any locks being held, unless
@@ -475,6 +476,9 @@ otherwise noted.
   truncate_range: a method provided by the underlying filesystem to truncate a
   	range of blocks , i.e. punch a hole somewhere in a file.
 
+  update_time: called by the VFS to update a specific time or the i_version of
+  	an inode.  If this is not defined the VFS will update the inode itself
+  	and call mark_inode_dirty_sync.
 
 The Address Space Object
 ========================
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 504e61b7fd75..9562109d3a87 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -962,7 +962,9 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	if (err)
 		goto out;
 
-	file_update_time(file);
+	err = file_update_time(file);
+	if (err)
+		goto out;
 
 	if (file->f_flags & O_DIRECT) {
 		written = generic_file_direct_write(iocb, iov, &nr_segs,
diff --git a/fs/inode.c b/fs/inode.c
index a79555e492e0..f0335fc315ed 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1487,6 +1487,27 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
 	return 0;
 }
 
+/*
+ * This does the actual work of updating an inodes time or version.  Must have
+ * had called mnt_want_write() before calling this.
+ */
+static int update_time(struct inode *inode, struct timespec *time, int flags)
+{
+	if (inode->i_op->update_time)
+		return inode->i_op->update_time(inode, time, flags);
+
+	if (flags & S_ATIME)
+		inode->i_atime = *time;
+	if (flags & S_VERSION)
+		inode_inc_iversion(inode);
+	if (flags & S_CTIME)
+		inode->i_ctime = *time;
+	if (flags & S_MTIME)
+		inode->i_mtime = *time;
+	mark_inode_dirty_sync(inode);
+	return 0;
+}
+
 /**
  *	touch_atime	-	update the access time
  *	@path: the &struct path to update
@@ -1524,8 +1545,14 @@ void touch_atime(struct path *path)
 	if (mnt_want_write(mnt))
 		return;
 
-	inode->i_atime = now;
-	mark_inode_dirty_sync(inode);
+	/*
+	 * File systems can error out when updating inodes if they need to
+	 * allocate new space to modify an inode (such is the case for
+	 * Btrfs), but since we touch atime while walking down the path we
+	 * really don't care if we failed to update the atime of the file,
+	 * so just ignore the return value.
+	 */
+	update_time(inode, &now, S_ATIME);
 	mnt_drop_write(mnt);
 }
 EXPORT_SYMBOL(touch_atime);
@@ -1604,18 +1631,20 @@ EXPORT_SYMBOL(file_remove_suid);
  *	usage in the file write path of filesystems, and filesystems may
  *	choose to explicitly ignore update via this function with the
  *	S_NOCMTIME inode flag, e.g. for network filesystem where these
- *	timestamps are handled by the server.
+ *	timestamps are handled by the server.  This can return an error for
+ *	file systems who need to allocate space in order to update an inode.
  */
 
-void file_update_time(struct file *file)
+int file_update_time(struct file *file)
 {
 	struct inode *inode = file->f_path.dentry->d_inode;
 	struct timespec now;
-	enum { S_MTIME = 1, S_CTIME = 2, S_VERSION = 4 } sync_it = 0;
+	int sync_it = 0;
+	int ret;
 
 	/* First try to exhaust all avenues to not sync */
 	if (IS_NOCMTIME(inode))
-		return;
+		return 0;
 
 	now = current_fs_time(inode->i_sb);
 	if (!timespec_equal(&inode->i_mtime, &now))
@@ -1628,21 +1657,16 @@ void file_update_time(struct file *file)
 		sync_it |= S_VERSION;
 
 	if (!sync_it)
-		return;
+		return 0;
 
 	/* Finally allowed to write? Takes lock. */
 	if (mnt_want_write_file(file))
-		return;
+		return 0;
 
-	/* Only change inode inside the lock region */
-	if (sync_it & S_VERSION)
-		inode_inc_iversion(inode);
-	if (sync_it & S_CTIME)
-		inode->i_ctime = now;
-	if (sync_it & S_MTIME)
-		inode->i_mtime = now;
-	mark_inode_dirty_sync(inode);
+	ret = update_time(inode, &now, sync_it);
 	mnt_drop_write_file(file);
+
+	return ret;
 }
 EXPORT_SYMBOL(file_update_time);
 
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 3ff5fcc1528f..122e260247f5 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -221,6 +221,10 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
 
 	already_written = 0;
 
+	errno = file_update_time(file);
+	if (errno)
+		goto outrel;
+
 	bouncebuffer = vmalloc(bufsize);
 	if (!bouncebuffer) {
 		errno = -EIO;	/* -ENOMEM */
@@ -252,8 +256,6 @@ ncp_file_write(struct file *file, const char __user *buf, size_t count, loff_t *
 	}
 	vfree(bouncebuffer);
 
-	file_update_time(file);
-
 	*ppos = pos;
 
 	if (pos > i_size_read(inode)) {
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 8639169221c7..7389d2d5e51d 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -2096,7 +2096,9 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb,
 	err = file_remove_suid(file);
 	if (err)
 		goto out;
-	file_update_time(file);
+	err = file_update_time(file);
+	if (err)
+		goto out;
 	written = ntfs_file_buffered_write(iocb, iov, nr_segs, pos, ppos,
 			count);
 out:
diff --git a/fs/pipe.c b/fs/pipe.c
index 95ebb56de494..49c1065256fd 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -654,8 +654,11 @@ out:
 		wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM);
 		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 	}
-	if (ret > 0)
-		file_update_time(filp);
+	if (ret > 0) {
+		int err = file_update_time(filp);
+		if (err)
+			ret = err;
+	}
 	return ret;
 }
 
diff --git a/fs/splice.c b/fs/splice.c
index f8476841eb04..47c4c1ad0c04 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1003,8 +1003,10 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 		mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
 		ret = file_remove_suid(out);
 		if (!ret) {
-			file_update_time(out);
-			ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file);
+			ret = file_update_time(out);
+			if (!ret)
+				ret = splice_from_pipe_feed(pipe, &sd,
+							    pipe_to_file);
 		}
 		mutex_unlock(&inode->i_mutex);
 	} while (ret > 0);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 8d214b87f6bb..9f7ec15a6522 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -586,8 +586,11 @@ restart:
 	 * lock above.  Eventually we should look into a way to avoid
 	 * the pointless lock roundtrip.
 	 */
-	if (likely(!(file->f_mode & FMODE_NOCMTIME)))
-		file_update_time(file);
+	if (likely(!(file->f_mode & FMODE_NOCMTIME))) {
+		error = file_update_time(file);
+		if (error)
+			return error;
+	}
 
 	/*
 	 * If we're writing the file then make sure to clear the setuid and
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cdc1a9630948..57fc70574d20 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1684,6 +1684,7 @@ struct inode_operations {
 	void (*truncate_range)(struct inode *, loff_t, loff_t);
 	int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
 		      u64 len);
+	int (*update_time)(struct inode *, struct timespec *, int);
 } ____cacheline_aligned;
 
 struct seq_file;
@@ -1843,6 +1844,13 @@ static inline void inode_inc_iversion(struct inode *inode)
        spin_unlock(&inode->i_lock);
 }
 
+enum file_time_flags {
+	S_ATIME = 1,
+	S_MTIME = 2,
+	S_CTIME = 4,
+	S_VERSION = 8,
+};
+
 extern void touch_atime(struct path *);
 static inline void file_accessed(struct file *file)
 {
@@ -2579,7 +2587,7 @@ extern int inode_change_ok(const struct inode *, struct iattr *);
 extern int inode_newsize_ok(const struct inode *, loff_t offset);
 extern void setattr_copy(struct inode *inode, const struct iattr *attr);
 
-extern void file_update_time(struct file *file);
+extern int file_update_time(struct file *file);
 
 extern int generic_show_options(struct seq_file *m, struct dentry *root);
 extern void save_mount_options(struct super_block *sb, char *options);
diff --git a/mm/filemap.c b/mm/filemap.c
index 21e5abfbcdf6..51070f1f1b52 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2463,7 +2463,9 @@ ssize_t __generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 	if (err)
 		goto out;
 
-	file_update_time(file);
+	err = file_update_time(file);
+	if (err)
+		goto out;
 
 	/* coalesce the iovecs and go direct-to-BIO for O_DIRECT */
 	if (unlikely(file->f_flags & O_DIRECT)) {
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index a4eb31132229..213ca1f53409 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -426,7 +426,9 @@ xip_file_write(struct file *filp, const char __user *buf, size_t len,
 	if (ret)
 		goto out_backing;
 
-	file_update_time(filp);
+	ret = file_update_time(filp);
+	if (ret)
+		goto out_backing;
 
 	ret = __xip_file_write (filp, buf, count, pos, ppos);
 
-- 
cgit 


From 16b1c1cd71176ab0a76b26818fbf12db9183ed57 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Mon, 21 May 2012 17:30:19 +0200
Subject: vfs: retry last component if opening stale dentry

NFS optimizes away d_revalidates for last component of open.  This means that
open itself can find the dentry stale.

This patch allows the filesystem to return EOPENSTALE and the VFS will retry the
lookup on just the last component if possible.

If the lookup was done using RCU mode, including the last component, then this
is not possible since the parent dentry is lost.  In this case fall back to
non-RCU lookup.  Currently this is not used since NFS will always leave RCU
mode.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c            | 37 +++++++++++++++++++++++++++++++++++--
 include/linux/errno.h |  1 +
 2 files changed, 36 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index 998d5316921a..7d694194024a 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2202,6 +2202,8 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 	struct file *filp;
 	struct inode *inode;
 	int symlink_ok = 0;
+	struct path save_parent = { .dentry = NULL, .mnt = NULL };
+	bool retried = false;
 	int error;
 
 	nd->flags &= ~LOOKUP_PARENT;
@@ -2267,6 +2269,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 	if (nd->last.name[nd->last.len])
 		goto exit;
 
+retry_lookup:
 	mutex_lock(&dir->d_inode->i_mutex);
 
 	dentry = lookup_hash(nd);
@@ -2349,12 +2352,21 @@ finish_lookup:
 		return NULL;
 	}
 
-	path_to_nameidata(path, nd);
+	if ((nd->flags & LOOKUP_RCU) || nd->path.mnt != path->mnt) {
+		path_to_nameidata(path, nd);
+	} else {
+		save_parent.dentry = nd->path.dentry;
+		save_parent.mnt = mntget(path->mnt);
+		nd->path.dentry = path->dentry;
+
+	}
 	nd->inode = inode;
 	/* Why this, you ask?  _Now_ we might have grown LOOKUP_JUMPED... */
 	error = complete_walk(nd);
-	if (error)
+	if (error) {
+		path_put(&save_parent);
 		return ERR_PTR(error);
+	}
 	error = -EISDIR;
 	if ((open_flag & O_CREAT) && S_ISDIR(nd->inode->i_mode))
 		goto exit;
@@ -2377,6 +2389,20 @@ common:
 	if (error)
 		goto exit;
 	filp = nameidata_to_filp(nd);
+	if (filp == ERR_PTR(-EOPENSTALE) && save_parent.dentry && !retried) {
+		BUG_ON(save_parent.dentry != dir);
+		path_put(&nd->path);
+		nd->path = save_parent;
+		nd->inode = dir->d_inode;
+		save_parent.mnt = NULL;
+		save_parent.dentry = NULL;
+		if (want_write) {
+			mnt_drop_write(nd->path.mnt);
+			want_write = 0;
+		}
+		retried = true;
+		goto retry_lookup;
+	}
 	if (!IS_ERR(filp)) {
 		error = ima_file_check(filp, op->acc_mode);
 		if (error) {
@@ -2396,6 +2422,7 @@ common:
 out:
 	if (want_write)
 		mnt_drop_write(nd->path.mnt);
+	path_put(&save_parent);
 	terminate_walk(nd);
 	return filp;
 
@@ -2459,6 +2486,12 @@ out:
 	if (base)
 		fput(base);
 	release_open_intent(nd);
+	if (filp == ERR_PTR(-EOPENSTALE)) {
+		if (flags & LOOKUP_RCU)
+			filp = ERR_PTR(-ECHILD);
+		else
+			filp = ERR_PTR(-ESTALE);
+	}
 	return filp;
 
 out_filp:
diff --git a/include/linux/errno.h b/include/linux/errno.h
index 2d09bfa5c262..e0de516374da 100644
--- a/include/linux/errno.h
+++ b/include/linux/errno.h
@@ -17,6 +17,7 @@
 #define ENOIOCTLCMD	515	/* No ioctl command */
 #define ERESTART_RESTARTBLOCK 516 /* restart by calling sys_restart_syscall */
 #define EPROBE_DEFER	517	/* Driver requests probe retry */
+#define EOPENSTALE	518	/* open found a stale dentry */
 
 /* Defined for the NFSv3 protocol */
 #define EBADHANDLE	521	/* Illegal NFS file handle */
-- 
cgit 


From 754421c8cab1a568be844a7069fe04c1cf6391b8 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 26 Apr 2012 18:31:00 -0400
Subject: HAVE_RESTORE_SIGMASK is defined on all architectures now

Everyone either defines it in arch thread_info.h or has TIF_RESTORE_SIGMASK
and picks default set_restore_sigmask() in linux/thread_info.h.  Kill the
ifdefs, slap #error in linux/thread_info.h to catch breakage when new ones
get merged.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/compat.c                 | 4 ----
 fs/eventpoll.c              | 4 ----
 fs/select.c                 | 4 ----
 include/linux/thread_info.h | 4 ++++
 kernel/signal.c             | 2 --
 5 files changed, 4 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/compat.c b/fs/compat.c
index 6556a9ce8a28..3adf3d4c2cd9 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1547,7 +1547,6 @@ asmlinkage long compat_sys_old_select(struct compat_sel_arg_struct __user *arg)
 				 compat_ptr(a.exp), compat_ptr(a.tvp));
 }
 
-#ifdef HAVE_SET_RESTORE_SIGMASK
 static long do_compat_pselect(int n, compat_ulong_t __user *inp,
 	compat_ulong_t __user *outp, compat_ulong_t __user *exp,
 	struct compat_timespec __user *tsp, compat_sigset_t __user *sigmask,
@@ -1670,11 +1669,9 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds,
 
 	return ret;
 }
-#endif /* HAVE_SET_RESTORE_SIGMASK */
 
 #ifdef CONFIG_EPOLL
 
-#ifdef HAVE_SET_RESTORE_SIGMASK
 asmlinkage long compat_sys_epoll_pwait(int epfd,
 			struct compat_epoll_event __user *events,
 			int maxevents, int timeout,
@@ -1718,7 +1715,6 @@ asmlinkage long compat_sys_epoll_pwait(int epfd,
 
 	return err;
 }
-#endif /* HAVE_SET_RESTORE_SIGMASK */
 
 #endif /* CONFIG_EPOLL */
 
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 079d1be65ba9..74598f67efeb 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1853,8 +1853,6 @@ error_return:
 	return error;
 }
 
-#ifdef HAVE_SET_RESTORE_SIGMASK
-
 /*
  * Implement the event wait interface for the eventpoll file. It is the kernel
  * part of the user space epoll_pwait(2).
@@ -1899,8 +1897,6 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events,
 	return error;
 }
 
-#endif /* HAVE_SET_RESTORE_SIGMASK */
-
 static int __init eventpoll_init(void)
 {
 	struct sysinfo si;
diff --git a/fs/select.c b/fs/select.c
index 17d33d09fc16..bae321569dfa 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -614,7 +614,6 @@ SYSCALL_DEFINE5(select, int, n, fd_set __user *, inp, fd_set __user *, outp,
 	return ret;
 }
 
-#ifdef HAVE_SET_RESTORE_SIGMASK
 static long do_pselect(int n, fd_set __user *inp, fd_set __user *outp,
 		       fd_set __user *exp, struct timespec __user *tsp,
 		       const sigset_t __user *sigmask, size_t sigsetsize)
@@ -686,7 +685,6 @@ SYSCALL_DEFINE6(pselect6, int, n, fd_set __user *, inp, fd_set __user *, outp,
 
 	return do_pselect(n, inp, outp, exp, tsp, up, sigsetsize);
 }
-#endif /* HAVE_SET_RESTORE_SIGMASK */
 
 #ifdef __ARCH_WANT_SYS_OLD_SELECT
 struct sel_arg_struct {
@@ -941,7 +939,6 @@ SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
 	return ret;
 }
 
-#ifdef HAVE_SET_RESTORE_SIGMASK
 SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds,
 		struct timespec __user *, tsp, const sigset_t __user *, sigmask,
 		size_t, sigsetsize)
@@ -992,4 +989,3 @@ SYSCALL_DEFINE5(ppoll, struct pollfd __user *, ufds, unsigned int, nfds,
 
 	return ret;
 }
-#endif /* HAVE_SET_RESTORE_SIGMASK */
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index db78775eff3b..eee729428683 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -129,6 +129,10 @@ static inline void set_restore_sigmask(void)
 }
 #endif	/* TIF_RESTORE_SIGMASK && !HAVE_SET_RESTORE_SIGMASK */
 
+#ifndef HAVE_SET_RESTORE_SIGMASK
+#error "no set_restore_sigmask() provided and default one won't work"
+#endif
+
 #endif	/* __KERNEL__ */
 
 #endif /* _LINUX_THREAD_INFO_H */
diff --git a/kernel/signal.c b/kernel/signal.c
index 08dfbd748cd2..95a9d9d8122b 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -3235,7 +3235,6 @@ SYSCALL_DEFINE0(pause)
 
 #endif
 
-#ifdef HAVE_SET_RESTORE_SIGMASK
 int sigsuspend(sigset_t *set)
 {
 	sigdelsetmask(set, sigmask(SIGKILL)|sigmask(SIGSTOP));
@@ -3248,7 +3247,6 @@ int sigsuspend(sigset_t *set)
 	set_restore_sigmask();
 	return -ERESTARTNOHAND;
 }
-#endif
 
 #ifdef __ARCH_WANT_SYS_RT_SIGSUSPEND
 /**
-- 
cgit 


From 4ebefe3ec729003443daf153ed6fad1739271283 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 26 Apr 2012 22:29:20 -0400
Subject: new helpers: {clear,test,test_and_clear}_restore_sigmask()

helpers parallel to set_restore_sigmask(), used in the next commits

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/ia64/include/asm/thread_info.h       | 16 ++++++++++++++++
 arch/microblaze/include/asm/thread_info.h | 16 ++++++++++++++++
 arch/powerpc/include/asm/thread_info.h    | 16 ++++++++++++++++
 arch/sh/include/asm/thread_info.h         | 17 +++++++++++++++++
 arch/sparc/include/asm/thread_info_64.h   | 16 ++++++++++++++++
 arch/tile/include/asm/thread_info.h       | 16 ++++++++++++++++
 arch/x86/include/asm/thread_info.h        | 16 ++++++++++++++++
 include/linux/thread_info.h               | 12 ++++++++++++
 8 files changed, 125 insertions(+)

(limited to 'include/linux')

diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
index 310d9734f02d..8d600363fa57 100644
--- a/arch/ia64/include/asm/thread_info.h
+++ b/arch/ia64/include/asm/thread_info.h
@@ -143,6 +143,22 @@ static inline void set_restore_sigmask(void)
 	ti->status |= TS_RESTORE_SIGMASK;
 	set_bit(TIF_SIGPENDING, &ti->flags);
 }
+static inline void clear_restore_sigmask(void)
+{
+	current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
+}
+static inline bool test_restore_sigmask(void)
+{
+	return current_thread_info()->status & TS_RESTORE_SIGMASK;
+}
+static inline bool test_and_clear_restore_sigmask(void)
+{
+	struct thread_info *ti = current_thread_info();
+	if (!(ti->status & TS_RESTORE_SIGMASK))
+		return false;
+	ti->status &= ~TS_RESTORE_SIGMASK;
+	return true;
+}
 #endif	/* !__ASSEMBLY__ */
 
 #endif /* _ASM_IA64_THREAD_INFO_H */
diff --git a/arch/microblaze/include/asm/thread_info.h b/arch/microblaze/include/asm/thread_info.h
index 1a8ab6a5c03f..12e39206b3ef 100644
--- a/arch/microblaze/include/asm/thread_info.h
+++ b/arch/microblaze/include/asm/thread_info.h
@@ -168,6 +168,22 @@ static inline void set_restore_sigmask(void)
 	ti->status |= TS_RESTORE_SIGMASK;
 	set_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags);
 }
+static inline void clear_restore_sigmask(void)
+{
+	current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
+}
+static inline bool test_restore_sigmask(void)
+{
+	return current_thread_info()->status & TS_RESTORE_SIGMASK;
+}
+static inline bool test_and_clear_restore_sigmask(void)
+{
+	struct thread_info *ti = current_thread_info();
+	if (!(ti->status & TS_RESTORE_SIGMASK))
+		return false;
+	ti->status &= ~TS_RESTORE_SIGMASK;
+	return true;
+}
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index a556ccc16b58..85d50a93a92f 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -142,6 +142,22 @@ static inline void set_restore_sigmask(void)
 	ti->local_flags |= _TLF_RESTORE_SIGMASK;
 	set_bit(TIF_SIGPENDING, &ti->flags);
 }
+static inline void clear_restore_sigmask(void)
+{
+	current_thread_info()->local_flags &= ~_TLF_RESTORE_SIGMASK;
+}
+static inline bool test_restore_sigmask(void)
+{
+	return current_thread_info()->local_flags & _TLF_RESTORE_SIGMASK;
+}
+static inline bool test_and_clear_restore_sigmask(void)
+{
+	struct thread_info *ti = current_thread_info();
+	if (!(ti->local_flags & _TLF_RESTORE_SIGMASK))
+		return false;
+	ti->local_flags &= ~_TLF_RESTORE_SIGMASK;
+	return true;
+}
 
 static inline bool test_thread_local_flags(unsigned int flags)
 {
diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h
index 0c04ffc4f12c..a109157c6b8f 100644
--- a/arch/sh/include/asm/thread_info.h
+++ b/arch/sh/include/asm/thread_info.h
@@ -189,6 +189,23 @@ static inline unsigned int get_thread_fault_code(void)
 	struct thread_info *ti = current_thread_info();
 	return ti->flags >> TI_FLAG_FAULT_CODE_SHIFT;
 }
+
+static inline void clear_restore_sigmask(void)
+{
+	current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
+}
+static inline bool test_restore_sigmask(void)
+{
+	return current_thread_info()->status & TS_RESTORE_SIGMASK;
+}
+static inline bool test_and_clear_restore_sigmask(void)
+{
+	struct thread_info *ti = current_thread_info();
+	if (!(ti->status & TS_RESTORE_SIGMASK))
+		return false;
+	ti->status &= ~TS_RESTORE_SIGMASK;
+	return true;
+}
 #endif	/* !__ASSEMBLY__ */
 
 #endif /* __KERNEL__ */
diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
index 7f0981b09451..cb9b7a9f5fc1 100644
--- a/arch/sparc/include/asm/thread_info_64.h
+++ b/arch/sparc/include/asm/thread_info_64.h
@@ -240,6 +240,22 @@ static inline void set_restore_sigmask(void)
 	ti->status |= TS_RESTORE_SIGMASK;
 	set_bit(TIF_SIGPENDING, &ti->flags);
 }
+static inline void clear_restore_sigmask(void)
+{
+	current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
+}
+static inline bool test_restore_sigmask(void)
+{
+	return current_thread_info()->status & TS_RESTORE_SIGMASK;
+}
+static inline bool test_and_clear_restore_sigmask(void)
+{
+	struct thread_info *ti = current_thread_info();
+	if (!(ti->status & TS_RESTORE_SIGMASK))
+		return false;
+	ti->status &= ~TS_RESTORE_SIGMASK;
+	return true;
+}
 #endif	/* !__ASSEMBLY__ */
 
 #endif /* __KERNEL__ */
diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h
index 656c486e64fa..5aef371921e4 100644
--- a/arch/tile/include/asm/thread_info.h
+++ b/arch/tile/include/asm/thread_info.h
@@ -168,6 +168,22 @@ static inline void set_restore_sigmask(void)
 	ti->status |= TS_RESTORE_SIGMASK;
 	set_bit(TIF_SIGPENDING, &ti->flags);
 }
+static inline void clear_restore_sigmask(void)
+{
+	current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
+}
+static inline bool test_restore_sigmask(void)
+{
+	return current_thread_info()->status & TS_RESTORE_SIGMASK;
+}
+static inline bool test_and_clear_restore_sigmask(void)
+{
+	struct thread_info *ti = current_thread_info();
+	if (!(ti->status & TS_RESTORE_SIGMASK))
+		return false;
+	ti->status &= ~TS_RESTORE_SIGMASK;
+	return true;
+}
 #endif	/* !__ASSEMBLY__ */
 
 #endif /* _ASM_TILE_THREAD_INFO_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 5c25de07cba8..8f3f1ff69fa9 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -250,6 +250,22 @@ static inline void set_restore_sigmask(void)
 	ti->status |= TS_RESTORE_SIGMASK;
 	set_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags);
 }
+static inline void clear_restore_sigmask(void)
+{
+	current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
+}
+static inline bool test_restore_sigmask(void)
+{
+	return current_thread_info()->status & TS_RESTORE_SIGMASK;
+}
+static inline bool test_and_clear_restore_sigmask(void)
+{
+	struct thread_info *ti = current_thread_info();
+	if (!(ti->status & TS_RESTORE_SIGMASK))
+		return false;
+	ti->status &= ~TS_RESTORE_SIGMASK;
+	return true;
+}
 
 static inline bool is_ia32_task(void)
 {
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index eee729428683..ed279701ac79 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -127,6 +127,18 @@ static inline void set_restore_sigmask(void)
 	set_thread_flag(TIF_RESTORE_SIGMASK);
 	set_thread_flag(TIF_SIGPENDING);
 }
+static inline void clear_restore_sigmask(void)
+{
+	clear_thread_flag(TIF_RESTORE_SIGMASK);
+}
+static inline bool test_restore_sigmask(void)
+{
+	return test_thread_flag(TIF_RESTORE_SIGMASK);
+}
+static inline bool test_and_clear_restore_sigmask(void)
+{
+	return test_and_clear_thread_flag(TIF_RESTORE_SIGMASK);
+}
 #endif	/* TIF_RESTORE_SIGMASK && !HAVE_SET_RESTORE_SIGMASK */
 
 #ifndef HAVE_SET_RESTORE_SIGMASK
-- 
cgit 


From 51a7b448d4134e3e8eec633435e3e8faee14a828 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 21 May 2012 23:33:55 -0400
Subject: new helper: restore_saved_sigmask()

first fruits of ..._restore_sigmask() helpers: now we can take
boilerplate "signal didn't have a handler, clear RESTORE_SIGMASK
and restore the blocked mask from ->saved_mask" into a common
helper.  Open-coded instances switched...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/alpha/kernel/signal.c         | 4 +---
 arch/arm/kernel/signal.c           | 6 +-----
 arch/avr32/kernel/signal.c         | 5 +----
 arch/blackfin/kernel/signal.c      | 5 +----
 arch/c6x/kernel/signal.c           | 5 +----
 arch/cris/arch-v10/kernel/signal.c | 5 +----
 arch/cris/arch-v32/kernel/signal.c | 5 +----
 arch/frv/kernel/signal.c           | 6 +-----
 arch/h8300/kernel/signal.c         | 3 +--
 arch/hexagon/kernel/signal.c       | 5 +----
 arch/ia64/kernel/signal.c          | 5 +----
 arch/m32r/kernel/signal.c          | 5 +----
 arch/m68k/kernel/signal.c          | 5 +----
 arch/microblaze/kernel/signal.c    | 5 +----
 arch/mips/kernel/signal.c          | 5 +----
 arch/mn10300/kernel/signal.c       | 5 +----
 arch/openrisc/kernel/signal.c      | 6 +-----
 arch/parisc/kernel/signal.c        | 7 +------
 arch/powerpc/kernel/signal.c       | 6 +-----
 arch/s390/kernel/signal.c          | 5 +----
 arch/score/kernel/signal.c         | 5 +----
 arch/sh/kernel/signal_32.c         | 5 +----
 arch/sh/kernel/signal_64.c         | 7 +------
 arch/sparc/kernel/signal32.c       | 5 +----
 arch/sparc/kernel/signal_32.c      | 5 +----
 arch/sparc/kernel/signal_64.c      | 5 +----
 arch/tile/kernel/signal.c          | 5 +----
 arch/um/kernel/signal.c            | 6 ++----
 arch/unicore32/kernel/signal.c     | 3 +--
 arch/x86/kernel/signal.c           | 5 +----
 arch/xtensa/kernel/signal.c        | 3 +--
 include/linux/sched.h              | 6 ++++++
 32 files changed, 38 insertions(+), 125 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c
index f6db3032ddf0..cadf4571ca31 100644
--- a/arch/alpha/kernel/signal.c
+++ b/arch/alpha/kernel/signal.c
@@ -572,9 +572,7 @@ do_signal(struct pt_regs * regs, struct switch_stack * sw,
 	}
 
 	/* If there's no signal to deliver, we just restore the saved mask.  */
-	if (test_and_clear_thread_flag(TIF_RESTORE_SIGMASK))
-		set_current_blocked(&current->saved_sigmask);
-
+	restore_saved_sigmask();
 	if (single_stepping)
 		ptrace_set_bpt(current);	/* re-set breakpoint */
 }
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 63f327dd5198..3d1daac8ea04 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -663,11 +663,7 @@ static void do_signal(struct pt_regs *regs, int syscall)
 			set_thread_flag(TIF_SYSCALL_RESTARTSYS);
 	}
 
-	/* If there's no signal to deliver, we just put the saved sigmask
-	 * back.
-	 */
-	if (test_and_clear_thread_flag(TIF_RESTORE_SIGMASK))
-		set_current_blocked(&current->saved_sigmask);
+	restore_saved_sigmask();
 }
 
 asmlinkage void
diff --git a/arch/avr32/kernel/signal.c b/arch/avr32/kernel/signal.c
index e7595ef74f51..8b12c3046137 100644
--- a/arch/avr32/kernel/signal.c
+++ b/arch/avr32/kernel/signal.c
@@ -297,10 +297,7 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset, int syscall)
 
 	if (signr == 0) {
 		/* No signal to deliver -- put the saved sigmask back */
-		if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
-			clear_thread_flag(TIF_RESTORE_SIGMASK);
-			sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
-		}
+		restore_saved_sigmask();
 		return 0;
 	}
 
diff --git a/arch/blackfin/kernel/signal.c b/arch/blackfin/kernel/signal.c
index fc9ecce8b6ce..9d692a1277b3 100644
--- a/arch/blackfin/kernel/signal.c
+++ b/arch/blackfin/kernel/signal.c
@@ -319,10 +319,7 @@ asmlinkage void do_signal(struct pt_regs *regs)
 
 	/* if there's no signal to deliver, we just put the saved sigmask
 	 * back */
-	if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
-		clear_thread_flag(TIF_RESTORE_SIGMASK);
-		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
-	}
+	restore_saved_sigmask();
 }
 
 /*
diff --git a/arch/c6x/kernel/signal.c b/arch/c6x/kernel/signal.c
index 9493f0bbf0a6..bfbcc958bbb4 100644
--- a/arch/c6x/kernel/signal.c
+++ b/arch/c6x/kernel/signal.c
@@ -343,10 +343,7 @@ static void do_signal(struct pt_regs *regs, int syscall)
 
 	/* if there's no signal to deliver, we just put the saved sigmask
 	 * back */
-	if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
-		clear_thread_flag(TIF_RESTORE_SIGMASK);
-		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
-	}
+	restore_saved_sigmask();
 }
 
 /*
diff --git a/arch/cris/arch-v10/kernel/signal.c b/arch/cris/arch-v10/kernel/signal.c
index e16f8f297f61..06885e94e455 100644
--- a/arch/cris/arch-v10/kernel/signal.c
+++ b/arch/cris/arch-v10/kernel/signal.c
@@ -525,8 +525,5 @@ void do_signal(int canrestart, struct pt_regs *regs)
 
 	/* if there's no signal to deliver, we just put the saved sigmask
 	 * back */
-	if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
-		clear_thread_flag(TIF_RESTORE_SIGMASK);
-		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
-	}
+	restore_saved_sigmask();
 }
diff --git a/arch/cris/arch-v32/kernel/signal.c b/arch/cris/arch-v32/kernel/signal.c
index b338d8fc0c12..fe12cdca0bac 100644
--- a/arch/cris/arch-v32/kernel/signal.c
+++ b/arch/cris/arch-v32/kernel/signal.c
@@ -560,10 +560,7 @@ do_signal(int canrestart, struct pt_regs *regs)
 
 	/* if there's no signal to deliver, we just put the saved sigmask
 	 * back */
-	if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
-		clear_thread_flag(TIF_RESTORE_SIGMASK);
-		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
-	}
+	restore_saved_sigmask();
 }
 
 asmlinkage void
diff --git a/arch/frv/kernel/signal.c b/arch/frv/kernel/signal.c
index 595bf1e5a5dc..16351cc8c36c 100644
--- a/arch/frv/kernel/signal.c
+++ b/arch/frv/kernel/signal.c
@@ -536,11 +536,7 @@ no_signal:
 
 	/* if there's no signal to deliver, we just put the saved sigmask
 	 * back */
-	if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
-		clear_thread_flag(TIF_RESTORE_SIGMASK);
-		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
-	}
-
+	restore_saved_sigmask();
 } /* end do_signal() */
 
 /*****************************************************************************/
diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c
index e58992ad789e..63623dabab32 100644
--- a/arch/h8300/kernel/signal.c
+++ b/arch/h8300/kernel/signal.c
@@ -501,8 +501,7 @@ statis void do_signal(struct pt_regs *regs)
 	}
 
 	/* If there's no signal to deliver, we just restore the saved mask.  */
-	if (test_and_clear_thread_flag(TIF_RESTORE_SIGMASK))
-		set_current_blocked(&current->saved_sigmask);
+	restore_saved_sigmask();
 }
 
 asmlinkage void do_notify_resume(struct pt_regs *regs, u32 thread_info_flags)
diff --git a/arch/hexagon/kernel/signal.c b/arch/hexagon/kernel/signal.c
index 21a3018cb9bf..acd6272913b3 100644
--- a/arch/hexagon/kernel/signal.c
+++ b/arch/hexagon/kernel/signal.c
@@ -259,10 +259,7 @@ no_signal:
 
 no_restart:
 	/* If there's no signal to deliver, put the saved sigmask back */
-	if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
-		clear_thread_flag(TIF_RESTORE_SIGMASK);
-		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
-	}
+	restore_saved_sigmask();
 }
 
 void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags)
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index 7523501d3bc0..39d8f3afff49 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -538,8 +538,5 @@ ia64_do_signal (struct sigscratch *scr, long in_syscall)
 
 	/* if there's no signal to deliver, we just put the saved sigmask
 	 * back */
-	if (current_thread_info()->status & TS_RESTORE_SIGMASK) {
-		current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
-		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
-	}
+	restore_saved_sigmask();
 }
diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c
index 64804f1f5141..2ad7c4587669 100644
--- a/arch/m32r/kernel/signal.c
+++ b/arch/m32r/kernel/signal.c
@@ -360,10 +360,7 @@ static void do_signal(struct pt_regs *regs)
 			prev_insn(regs);
 		}
 	}
-	if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
-		clear_thread_flag(TIF_RESTORE_SIGMASK);
-		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
-	}
+	restore_saved_sigmask();
 }
 
 /*
diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c
index 973eec60cad4..685cbe84f33f 100644
--- a/arch/m68k/kernel/signal.c
+++ b/arch/m68k/kernel/signal.c
@@ -1182,10 +1182,7 @@ static void do_signal(struct pt_regs *regs)
 		handle_restart(regs, NULL, 0);
 
 	/* If there's no signal to deliver, we just restore the saved mask.  */
-	if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
-		clear_thread_flag(TIF_RESTORE_SIGMASK);
-		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
-	}
+	restore_saved_sigmask();
 }
 
 void do_notify_resume(struct pt_regs *regs)
diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c
index 5d796e32786e..8e644dfaba4f 100644
--- a/arch/microblaze/kernel/signal.c
+++ b/arch/microblaze/kernel/signal.c
@@ -381,10 +381,7 @@ static void do_signal(struct pt_regs *regs, int in_syscall)
 	 * If there's no signal to deliver, we just put the saved sigmask
 	 * back.
 	 */
-	if (current_thread_info()->status & TS_RESTORE_SIGMASK) {
-		current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
-		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
-	}
+	restore_saved_sigmask();
 }
 
 void do_notify_resume(struct pt_regs *regs, int in_syscall)
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index 8a6e6d116ab0..aad2d2da5eec 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -614,10 +614,7 @@ static void do_signal(struct pt_regs *regs)
 	 * If there's no signal to deliver, we just put the saved sigmask
 	 * back
 	 */
-	if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
-		clear_thread_flag(TIF_RESTORE_SIGMASK);
-		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
-	}
+	restore_saved_sigmask();
 }
 
 /*
diff --git a/arch/mn10300/kernel/signal.c b/arch/mn10300/kernel/signal.c
index b8b6aa1a6837..b7994c38eacc 100644
--- a/arch/mn10300/kernel/signal.c
+++ b/arch/mn10300/kernel/signal.c
@@ -525,10 +525,7 @@ static void do_signal(struct pt_regs *regs)
 
 	/* if there's no signal to deliver, we just put the saved sigmask
 	 * back */
-	if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
-		clear_thread_flag(TIF_RESTORE_SIGMASK);
-		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
-	}
+	restore_saved_sigmask();
 }
 
 /*
diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c
index 9ae611522953..266c6fd2eb5c 100644
--- a/arch/openrisc/kernel/signal.c
+++ b/arch/openrisc/kernel/signal.c
@@ -339,11 +339,7 @@ void do_signal(struct pt_regs *regs)
 	if (signr <= 0) {
 		/* no signal to deliver so we just put the saved sigmask
 		 * back */
-		if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
-			clear_thread_flag(TIF_RESTORE_SIGMASK);
-			sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
-		}
-
+		restore_saved_sigmask();
 	} else {		/* signr > 0 */
 		sigset_t *oldset;
 
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index e7a7cd3e1120..277cacadf653 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -621,12 +621,7 @@ do_signal(struct pt_regs *regs, long in_syscall)
 	DBG(1,"do_signal: Exit (not delivered), regs->gr[28] = %ld\n", 
 		regs->gr[28]);
 
-	if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
-		clear_thread_flag(TIF_RESTORE_SIGMASK);
-		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
-	}
-
-	return;
+	restore_saved_sigmask();
 }
 
 void do_notify_resume(struct pt_regs *regs, long in_syscall)
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index bfc3ec1382fb..0f4cc67f4268 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -132,12 +132,8 @@ static int do_signal(struct pt_regs *regs)
 	check_syscall_restart(regs, &ka, signr > 0);
 
 	if (signr <= 0) {
-		struct thread_info *ti = current_thread_info();
 		/* No signal to deliver -- put the saved sigmask back */
-		if (ti->local_flags & _TLF_RESTORE_SIGMASK) {
-			ti->local_flags &= ~_TLF_RESTORE_SIGMASK;
-			sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
-		}
+		restore_saved_sigmask();
 		regs->trap = 0;
 		return 0;               /* no signals delivered */
 	}
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 42a6e8b47f06..37799089c38e 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -484,10 +484,7 @@ void do_signal(struct pt_regs *regs)
 	/*
 	 * If there's no signal to deliver, we just put the saved sigmask back.
 	 */
-	if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
-		clear_thread_flag(TIF_RESTORE_SIGMASK);
-		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
-	}
+	restore_saved_sigmask();
 }
 
 void do_notify_resume(struct pt_regs *regs)
diff --git a/arch/score/kernel/signal.c b/arch/score/kernel/signal.c
index 302838d3acf6..9e751559375b 100644
--- a/arch/score/kernel/signal.c
+++ b/arch/score/kernel/signal.c
@@ -337,10 +337,7 @@ static void do_signal(struct pt_regs *regs)
 	 * If there's no signal to deliver, we just put the saved sigmask
 	 * back
 	 */
-	if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
-		clear_thread_flag(TIF_RESTORE_SIGMASK);
-		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
-	}
+	restore_saved_sigmask();
 }
 
 /*
diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c
index 9d7bfd66f189..92f4173ad29a 100644
--- a/arch/sh/kernel/signal_32.c
+++ b/arch/sh/kernel/signal_32.c
@@ -610,10 +610,7 @@ static void do_signal(struct pt_regs *regs, unsigned int save_r0)
 	 * If there's no signal to deliver, we just put the saved sigmask
 	 * back.
 	 */
-	if (current_thread_info()->status & TS_RESTORE_SIGMASK) {
-		current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
-		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
-	}
+	restore_saved_sigmask();
 }
 
 asmlinkage void do_notify_resume(struct pt_regs *regs, unsigned int save_r0,
diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c
index aa6428430842..6e191ef0aa62 100644
--- a/arch/sh/kernel/signal_64.c
+++ b/arch/sh/kernel/signal_64.c
@@ -143,12 +143,7 @@ static void do_signal(struct pt_regs *regs)
 	}
 
 	/* No signal to deliver -- put the saved sigmask back */
-	if (current_thread_info()->status & TS_RESTORE_SIGMASK) {
-		current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
-		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
-	}
-
-	return;
+	restore_saved_sigmask();
 }
 
 /*
diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c
index bb1513e45f1a..88e0d8122d2c 100644
--- a/arch/sparc/kernel/signal32.c
+++ b/arch/sparc/kernel/signal32.c
@@ -872,10 +872,7 @@ void do_signal32(sigset_t *oldset, struct pt_regs * regs)
 	/* If there's no signal to deliver, we just put the saved sigmask
 	 * back
 	 */
-	if (current_thread_info()->status & TS_RESTORE_SIGMASK) {
-		current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
-		set_current_blocked(&current->saved_sigmask);
-	}
+	restore_saved_sigmask();
 }
 
 struct sigstack32 {
diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c
index 6b42e8622d12..9dd97d2e171e 100644
--- a/arch/sparc/kernel/signal_32.c
+++ b/arch/sparc/kernel/signal_32.c
@@ -576,10 +576,7 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0)
 	/* if there's no signal to deliver, we just put the saved sigmask
 	 * back
 	 */
-	if (test_thread_flag(TIF_RESTORE_SIGMASK)) {
-		clear_thread_flag(TIF_RESTORE_SIGMASK);
-		set_current_blocked(&current->saved_sigmask);
-	}
+	restore_saved_sigmask();
 }
 
 void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0,
diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
index c82cf1cc3965..55b820ee0ac9 100644
--- a/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c
@@ -594,10 +594,7 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0)
 	/* If there's no signal to deliver, we just put the saved sigmask
 	 * back
 	 */
-	if (current_thread_info()->status & TS_RESTORE_SIGMASK) {
-		current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
-		set_current_blocked(&current->saved_sigmask);
-	}
+	restore_saved_sigmask();
 }
 
 void do_notify_resume(struct pt_regs *regs, unsigned long orig_i0, unsigned long thread_info_flags)
diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c
index f79d4b88c747..62b3493ea77d 100644
--- a/arch/tile/kernel/signal.c
+++ b/arch/tile/kernel/signal.c
@@ -350,10 +350,7 @@ void do_signal(struct pt_regs *regs)
 	}
 
 	/* If there's no signal to deliver, just put the saved sigmask back. */
-	if (current_thread_info()->status & TS_RESTORE_SIGMASK) {
-		current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
-		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
-	}
+	restore_saved_sigmask();
 
 done:
 	/* Avoid double syscall restart if there are nested signals. */
diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
index 292e706016c5..6acf13c1740b 100644
--- a/arch/um/kernel/signal.c
+++ b/arch/um/kernel/signal.c
@@ -130,10 +130,8 @@ static int kern_do_signal(struct pt_regs *regs)
 	 * if there's no signal to deliver, we just put the saved sigmask
 	 * back
 	 */
-	if (!handled_sig && test_thread_flag(TIF_RESTORE_SIGMASK)) {
-		clear_thread_flag(TIF_RESTORE_SIGMASK);
-		sigprocmask(SIG_SETMASK, &current->saved_sigmask, NULL);
-	}
+	if (!handled_sig)
+		restore_saved_sigmask();
 	return handled_sig;
 }
 
diff --git a/arch/unicore32/kernel/signal.c b/arch/unicore32/kernel/signal.c
index 28782ad47b93..65a5ed3b6f2a 100644
--- a/arch/unicore32/kernel/signal.c
+++ b/arch/unicore32/kernel/signal.c
@@ -451,8 +451,7 @@ static void do_signal(struct pt_regs *regs, int syscall)
 	/* If there's no signal to deliver, we just put the saved
 	 * sigmask back.
 	 */
-	if (test_and_clear_thread_flag(TIF_RESTORE_SIGMASK))
-		set_current_blocked(&current->saved_sigmask);
+	restore_saved_sigmask();
 }
 
 asmlinkage void do_notify_resume(struct pt_regs *regs,
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 2e937a5ad531..25a4a81a51aa 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -796,10 +796,7 @@ static void do_signal(struct pt_regs *regs)
 	 * If there's no signal to deliver, we just put the saved sigmask
 	 * back.
 	 */
-	if (current_thread_info()->status & TS_RESTORE_SIGMASK) {
-		current_thread_info()->status &= ~TS_RESTORE_SIGMASK;
-		set_current_blocked(&current->saved_sigmask);
-	}
+	restore_saved_sigmask();
 }
 
 /*
diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c
index ea7e17778a75..8c4e751e3b83 100644
--- a/arch/xtensa/kernel/signal.c
+++ b/arch/xtensa/kernel/signal.c
@@ -532,8 +532,7 @@ no_signal:
 	}
 
 	/* If there's no signal to deliver, we just restore the saved mask.  */
-	if (test_and_clear_thread_flag(TIF_RESTORE_SIGMASK))
-		set_current_blocked(&current->saved_sigmask);
+	restore_saved_sigmask();
 
 	if (current->ptrace & PT_SINGLESTEP)
 		task_pt_regs(current)->icountlevel = 1;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 660c8ae93471..f1b46b88f6f5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2207,6 +2207,12 @@ extern int send_sigqueue(struct sigqueue *,  struct task_struct *, int group);
 extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
 extern int do_sigaltstack(const stack_t __user *, stack_t __user *, unsigned long);
 
+static inline void restore_saved_sigmask(void)
+{
+	if (test_and_clear_restore_sigmask())
+		set_current_blocked(&current->saved_sigmask);
+}
+
 static inline int kill_cad_pid(int sig, int priv)
 {
 	return kill_pid(cad_pid, sig, priv);
-- 
cgit 


From b7f9a11a6cf1ea9ee6be3eb2b90d91327a09ad14 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 2 May 2012 09:59:21 -0400
Subject: new helper: sigmask_to_save()

replace boilerplate "should we use ->saved_sigmask or ->blocked?"
with calls of obvious inlined helper...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/alpha/kernel/signal.c         |  5 +----
 arch/arm/kernel/signal.c           | 12 +++---------
 arch/avr32/kernel/signal.c         | 20 +++++++-------------
 arch/blackfin/kernel/signal.c      | 12 +++---------
 arch/c6x/kernel/signal.c           | 14 +++-----------
 arch/cris/arch-v10/kernel/signal.c | 11 +++--------
 arch/cris/arch-v32/kernel/signal.c | 11 +++--------
 arch/frv/kernel/signal.c           | 10 +++-------
 arch/h8300/kernel/signal.c         | 11 +++--------
 arch/hexagon/kernel/signal.c       | 13 +++----------
 arch/ia64/kernel/signal.c          | 12 +++---------
 arch/m32r/kernel/signal.c          | 12 +++---------
 arch/m68k/kernel/signal.c          | 11 +++--------
 arch/microblaze/kernel/signal.c    |  9 ++-------
 arch/mips/kernel/signal.c          | 11 +++--------
 arch/mn10300/kernel/signal.c       | 11 +++--------
 arch/openrisc/kernel/signal.c      | 12 +++---------
 arch/parisc/kernel/signal.c        | 21 +++++----------------
 arch/powerpc/kernel/signal.c       |  7 +------
 arch/s390/kernel/signal.c          |  7 +------
 arch/score/kernel/signal.c         | 12 +++---------
 arch/sh/kernel/signal_32.c         | 11 +++--------
 arch/sh/kernel/signal_64.c         | 13 ++++---------
 arch/sparc/kernel/signal_32.c      | 11 +++--------
 arch/sparc/kernel/signal_64.c      |  7 +------
 arch/tile/kernel/signal.c          | 11 +++--------
 arch/um/kernel/signal.c            | 11 +++--------
 arch/unicore32/kernel/signal.c     | 13 +++----------
 arch/x86/kernel/signal.c           |  5 +----
 arch/xtensa/kernel/signal.c        |  8 +-------
 include/linux/sched.h              |  8 ++++++++
 31 files changed, 92 insertions(+), 250 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c
index cadf4571ca31..f1e7d2aa2586 100644
--- a/arch/alpha/kernel/signal.c
+++ b/arch/alpha/kernel/signal.c
@@ -468,12 +468,9 @@ static inline void
 handle_signal(int sig, struct k_sigaction *ka, siginfo_t *info,
 	      struct pt_regs * regs, struct switch_stack *sw)
 {
-	sigset_t *oldset = &current->blocked;
+	sigset_t *oldset = sigmask_to_save();
 	int ret;
 
-	if (test_thread_flag(TIF_RESTORE_SIGMASK))
-		oldset = &current->saved_sigmask;
-
 	if (ka->sa.sa_flags & SA_SIGINFO)
 		ret = setup_rt_frame(sig, ka, info, oldset, regs, sw);
 	else
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 3d1daac8ea04..2e66c93973c3 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -530,11 +530,11 @@ setup_rt_frame(int usig, struct k_sigaction *ka, siginfo_t *info,
  */	
 static int
 handle_signal(unsigned long sig, struct k_sigaction *ka,
-	      siginfo_t *info, sigset_t *oldset,
-	      struct pt_regs * regs)
+	      siginfo_t *info, struct pt_regs *regs)
 {
 	struct thread_info *thread = current_thread_info();
 	struct task_struct *tsk = current;
+	sigset_t *oldset = sigmask_to_save();
 	int usig = sig;
 	int ret;
 
@@ -617,8 +617,6 @@ static void do_signal(struct pt_regs *regs, int syscall)
 	 */
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 	if (signr > 0) {
-		sigset_t *oldset;
-
 		/*
 		 * Depending on the signal settings we may need to revert the
 		 * decision to restart the system call.  But skip this if a
@@ -635,11 +633,7 @@ static void do_signal(struct pt_regs *regs, int syscall)
 			clear_thread_flag(TIF_SYSCALL_RESTARTSYS);
 		}
 
-		if (test_thread_flag(TIF_RESTORE_SIGMASK))
-			oldset = &current->saved_sigmask;
-		else
-			oldset = &current->blocked;
-		if (handle_signal(signr, &ka, &info, oldset, regs) == 0) {
+		if (handle_signal(signr, &ka, &info, regs) == 0) {
 			/*
 			 * A signal was successfully delivered; the saved
 			 * sigmask will have been stored in the signal frame,
diff --git a/arch/avr32/kernel/signal.c b/arch/avr32/kernel/signal.c
index 8b12c3046137..0e2c0527c9fe 100644
--- a/arch/avr32/kernel/signal.c
+++ b/arch/avr32/kernel/signal.c
@@ -224,14 +224,14 @@ static inline void setup_syscall_restart(struct pt_regs *regs)
 
 static inline void
 handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info,
-	      sigset_t *oldset, struct pt_regs *regs, int syscall)
+	      struct pt_regs *regs, int syscall)
 {
 	int ret;
 
 	/*
 	 * Set up the stack frame
 	 */
-	ret = setup_rt_frame(sig, ka, info, oldset, regs);
+	ret = setup_rt_frame(sig, ka, info, sigmask_to_save(), regs);
 
 	/*
 	 * Check that the resulting registers are sane
@@ -255,7 +255,7 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info,
  * doesn't want to handle. Thus you cannot kill init even with a
  * SIGKILL even by mistake.
  */
-int do_signal(struct pt_regs *regs, sigset_t *oldset, int syscall)
+static void do_signal(struct pt_regs *regs, int syscall)
 {
 	siginfo_t info;
 	int signr;
@@ -267,12 +267,7 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset, int syscall)
 	 * without doing anything if so.
 	 */
 	if (!user_mode(regs))
-		return 0;
-
-	if (test_thread_flag(TIF_RESTORE_SIGMASK))
-		oldset = &current->saved_sigmask;
-	else if (!oldset)
-		oldset = &current->blocked;
+		return;
 
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 	if (syscall) {
@@ -298,11 +293,10 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset, int syscall)
 	if (signr == 0) {
 		/* No signal to deliver -- put the saved sigmask back */
 		restore_saved_sigmask();
-		return 0;
+		return;
 	}
 
-	handle_signal(signr, &ka, &info, oldset, regs, syscall);
-	return 1;
+	handle_signal(signr, &ka, &info, regs, syscall);
 }
 
 asmlinkage void do_notify_resume(struct pt_regs *regs, struct thread_info *ti)
@@ -313,7 +307,7 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, struct thread_info *ti)
 		syscall = 1;
 
 	if (ti->flags & (_TIF_SIGPENDING | _TIF_RESTORE_SIGMASK))
-		do_signal(regs, &current->blocked, syscall);
+		do_signal(regs, syscall);
 
 	if (ti->flags & _TIF_NOTIFY_RESUME) {
 		clear_thread_flag(TIF_NOTIFY_RESUME);
diff --git a/arch/blackfin/kernel/signal.c b/arch/blackfin/kernel/signal.c
index 9d692a1277b3..7f4205ddfa4d 100644
--- a/arch/blackfin/kernel/signal.c
+++ b/arch/blackfin/kernel/signal.c
@@ -249,7 +249,7 @@ handle_restart(struct pt_regs *regs, struct k_sigaction *ka, int has_handler)
  */
 static int
 handle_signal(int sig, siginfo_t *info, struct k_sigaction *ka,
-	      sigset_t *oldset, struct pt_regs *regs)
+	      struct pt_regs *regs)
 {
 	int ret;
 
@@ -259,7 +259,7 @@ handle_signal(int sig, siginfo_t *info, struct k_sigaction *ka,
 		handle_restart(regs, ka, 1);
 
 	/* set up the stack frame */
-	ret = setup_rt_frame(sig, ka, info, oldset, regs);
+	ret = setup_rt_frame(sig, ka, info, sigmask_to_save(), regs);
 
 	if (ret == 0)
 		block_sigmask(ka, sig);
@@ -281,22 +281,16 @@ asmlinkage void do_signal(struct pt_regs *regs)
 	siginfo_t info;
 	int signr;
 	struct k_sigaction ka;
-	sigset_t *oldset;
 
 	current->thread.esp0 = (unsigned long)regs;
 
 	if (try_to_freeze())
 		goto no_signal;
 
-	if (test_thread_flag(TIF_RESTORE_SIGMASK))
-		oldset = &current->saved_sigmask;
-	else
-		oldset = &current->blocked;
-
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 	if (signr > 0) {
 		/* Whee!  Actually deliver the signal.  */
-		if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
+		if (handle_signal(signr, &info, &ka, regs) == 0) {
 			/* a signal was successfully delivered; the saved
 			 * sigmask will have been stored in the signal frame,
 			 * and will be restored by sigreturn, so we can simply
diff --git a/arch/c6x/kernel/signal.c b/arch/c6x/kernel/signal.c
index bfbcc958bbb4..38bb501eb117 100644
--- a/arch/c6x/kernel/signal.c
+++ b/arch/c6x/kernel/signal.c
@@ -250,8 +250,7 @@ do_restart:
  */
 static int handle_signal(int sig,
 			 siginfo_t *info, struct k_sigaction *ka,
-			 sigset_t *oldset, struct pt_regs *regs,
-			 int syscall)
+			 struct pt_regs *regs, int syscall)
 {
 	int ret;
 
@@ -278,7 +277,7 @@ static int handle_signal(int sig,
 	}
 
 	/* Set up the stack frame */
-	ret = setup_rt_frame(sig, ka, info, oldset, regs);
+	ret = setup_rt_frame(sig, ka, info, sigmask_to_save(), regs);
 	if (ret == 0)
 		block_sigmask(ka, sig);
 
@@ -292,7 +291,6 @@ static void do_signal(struct pt_regs *regs, int syscall)
 {
 	struct k_sigaction ka;
 	siginfo_t info;
-	sigset_t *oldset;
 	int signr;
 
 	/* we want the common case to go fast, which is why we may in certain
@@ -300,15 +298,9 @@ static void do_signal(struct pt_regs *regs, int syscall)
 	if (!user_mode(regs))
 		return;
 
-	if (test_thread_flag(TIF_RESTORE_SIGMASK))
-		oldset = &current->saved_sigmask;
-	else
-		oldset = &current->blocked;
-
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 	if (signr > 0) {
-		if (handle_signal(signr, &info, &ka, oldset,
-				  regs, syscall) == 0) {
+		if (handle_signal(signr, &info, &ka, regs, syscall) == 0) {
 			/* a signal was successfully delivered; the saved
 			 * sigmask will have been stored in the signal frame,
 			 * and will be restored by sigreturn, so we can simply
diff --git a/arch/cris/arch-v10/kernel/signal.c b/arch/cris/arch-v10/kernel/signal.c
index 06885e94e455..09a4cf4eb08a 100644
--- a/arch/cris/arch-v10/kernel/signal.c
+++ b/arch/cris/arch-v10/kernel/signal.c
@@ -417,8 +417,9 @@ give_sigsegv:
 
 static inline int handle_signal(int canrestart, unsigned long sig,
 	siginfo_t *info, struct k_sigaction *ka,
-	sigset_t *oldset, struct pt_regs *regs)
+	struct pt_regs *regs)
 {
+	sigset_t *oldset = sigmask_to_save();
 	int ret;
 
 	/* Are we from a system call? */
@@ -478,7 +479,6 @@ void do_signal(int canrestart, struct pt_regs *regs)
 	siginfo_t info;
 	int signr;
         struct k_sigaction ka;
-	sigset_t *oldset;
 
 	/*
 	 * We want the common case to go fast, which
@@ -489,16 +489,11 @@ void do_signal(int canrestart, struct pt_regs *regs)
 	if (!user_mode(regs))
 		return;
 
-	if (test_thread_flag(TIF_RESTORE_SIGMASK))
-		oldset = &current->saved_sigmask;
-	else
-		oldset = &current->blocked;
-
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 	if (signr > 0) {
 		/* Whee!  Actually deliver the signal.  */
 		if (handle_signal(canrestart, signr, &info, &ka,
-				oldset, regs)) {
+				regs)) {
 			/* a signal was successfully delivered; the saved
 			 * sigmask will have been stored in the signal frame,
 			 * and will be restored by sigreturn, so we can simply
diff --git a/arch/cris/arch-v32/kernel/signal.c b/arch/cris/arch-v32/kernel/signal.c
index fe12cdca0bac..d52276ddae4b 100644
--- a/arch/cris/arch-v32/kernel/signal.c
+++ b/arch/cris/arch-v32/kernel/signal.c
@@ -437,8 +437,9 @@ give_sigsegv:
 static inline int
 handle_signal(int canrestart, unsigned long sig,
 	      siginfo_t *info, struct k_sigaction *ka,
-              sigset_t *oldset, struct pt_regs * regs)
+              struct pt_regs * regs)
 {
+	sigset_t *oldset = sigmask_to_save();
 	int ret;
 
 	/* Check if this got called from a system call. */
@@ -511,7 +512,6 @@ do_signal(int canrestart, struct pt_regs *regs)
 	int signr;
 	siginfo_t info;
         struct k_sigaction ka;
-	sigset_t *oldset;
 
 	/*
 	 * The common case should go fast, which is why this point is
@@ -521,17 +521,12 @@ do_signal(int canrestart, struct pt_regs *regs)
 	if (!user_mode(regs))
 		return;
 
-	if (test_thread_flag(TIF_RESTORE_SIGMASK))
-		oldset = &current->saved_sigmask;
-	else
-		oldset = &current->blocked;
-
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 
 	if (signr > 0) {
 		/* Whee!  Actually deliver the signal.  */
 		if (handle_signal(canrestart, signr, &info, &ka,
-				oldset, regs)) {
+				regs)) {
 			/* a signal was successfully delivered; the saved
 			 * sigmask will have been stored in the signal frame,
 			 * and will be restored by sigreturn, so we can simply
diff --git a/arch/frv/kernel/signal.c b/arch/frv/kernel/signal.c
index 16351cc8c36c..22efe8d25038 100644
--- a/arch/frv/kernel/signal.c
+++ b/arch/frv/kernel/signal.c
@@ -427,8 +427,9 @@ give_sigsegv:
  * OK, we're invoking a handler
  */
 static int handle_signal(unsigned long sig, siginfo_t *info,
-			 struct k_sigaction *ka, sigset_t *oldset)
+			 struct k_sigaction *ka)
 {
+	sigset_t *oldset = sigmask_to_save();
 	int ret;
 
 	/* Are we from a system call? */
@@ -492,14 +493,9 @@ static void do_signal(void)
 	if (try_to_freeze())
 		goto no_signal;
 
-	if (test_thread_flag(TIF_RESTORE_SIGMASK))
-		oldset = &current->saved_sigmask;
-	else
-		oldset = &current->blocked;
-
 	signr = get_signal_to_deliver(&info, &ka, __frame, NULL);
 	if (signr > 0) {
-		if (handle_signal(signr, &info, &ka, oldset) == 0) {
+		if (handle_signal(signr, &info, &ka) == 0) {
 			/* a signal was successfully delivered; the saved
 			 * sigmask will have been stored in the signal frame,
 			 * and will be restored by sigreturn, so we can simply
diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c
index 63623dabab32..d4d2f72672ad 100644
--- a/arch/h8300/kernel/signal.c
+++ b/arch/h8300/kernel/signal.c
@@ -412,8 +412,9 @@ give_sigsegv:
  */
 static void
 handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
-	      sigset_t *oldset,	struct pt_regs * regs)
+	      struct pt_regs * regs)
 {
+	sigset_t *oldset = sigmask_to_save();
 	int ret;
 	/* are we from a system call? */
 	if (regs->orig_er0 >= 0) {
@@ -457,7 +458,6 @@ statis void do_signal(struct pt_regs *regs)
 	siginfo_t info;
 	int signr;
 	struct k_sigaction ka;
-	sigset_t *oldset;
 
 	/*
 	 * We want the common case to go fast, which
@@ -473,15 +473,10 @@ statis void do_signal(struct pt_regs *regs)
 
 	current->thread.esp0 = (unsigned long) regs;
 
-	if (test_thread_flag(TIF_RESTORE_SIGMASK))
-		oldset = &current->saved_sigmask;
-	else
-		oldset = &current->blocked;
-
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 	if (signr > 0) {
 		/* Whee!  Actually deliver the signal.  */
-		handle_signal(signr, &info, &ka, oldset, regs);
+		handle_signal(signr, &info, &ka, regs);
 		return;
 	}
  no_signal:
diff --git a/arch/hexagon/kernel/signal.c b/arch/hexagon/kernel/signal.c
index acd6272913b3..f73fcee09bac 100644
--- a/arch/hexagon/kernel/signal.c
+++ b/arch/hexagon/kernel/signal.c
@@ -150,7 +150,7 @@ sigsegv:
  * Setup invocation of signal handler
  */
 static int handle_signal(int sig, siginfo_t *info, struct k_sigaction *ka,
-			 sigset_t *oldset, struct pt_regs *regs)
+			 struct pt_regs *regs)
 {
 	int rc;
 
@@ -186,7 +186,7 @@ static int handle_signal(int sig, siginfo_t *info, struct k_sigaction *ka,
 	 * Set up the stack frame; not doing the SA_SIGINFO thing.  We
 	 * only set up the rt_frame flavor.
 	 */
-	rc = setup_rt_frame(sig, ka, info, oldset, regs);
+	rc = setup_rt_frame(sig, ka, info, sigmask_to_save(), regs);
 
 	/* If there was an error on setup, no signal was delivered. */
 	if (rc)
@@ -215,14 +215,7 @@ static void do_signal(struct pt_regs *regs)
 	signo = get_signal_to_deliver(&info, &sigact, regs, NULL);
 
 	if (signo > 0) {
-		sigset_t *oldset;
-
-		if (test_thread_flag(TIF_RESTORE_SIGMASK))
-			oldset = &current->saved_sigmask;
-		else
-			oldset = &current->blocked;
-
-		if (handle_signal(signo, &info, &sigact, oldset, regs) == 0) {
+		if (handle_signal(signo, &info, &sigact, regs) == 0) {
 			/*
 			 * Successful delivery case.  The saved sigmask is
 			 * stored in the signal frame, and will be restored
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index 39d8f3afff49..9fee6d6a3f21 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -415,10 +415,10 @@ setup_frame (int sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set,
 }
 
 static long
-handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info, sigset_t *oldset,
+handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info,
 	       struct sigscratch *scr)
 {
-	if (!setup_frame(sig, ka, info, oldset, scr))
+	if (!setup_frame(sig, ka, info, sigmask_to_save(), scr))
 		return 0;
 
 	block_sigmask(ka, sig);
@@ -440,7 +440,6 @@ void
 ia64_do_signal (struct sigscratch *scr, long in_syscall)
 {
 	struct k_sigaction ka;
-	sigset_t *oldset;
 	siginfo_t info;
 	long restart = in_syscall;
 	long errno = scr->pt.r8;
@@ -453,11 +452,6 @@ ia64_do_signal (struct sigscratch *scr, long in_syscall)
 	if (!user_mode(&scr->pt))
 		return;
 
-	if (current_thread_info()->status & TS_RESTORE_SIGMASK)
-		oldset = &current->saved_sigmask;
-	else
-		oldset = &current->blocked;
-
 	/*
 	 * This only loops in the rare cases of handle_signal() failing, in which case we
 	 * need to push through a forced SIGSEGV.
@@ -507,7 +501,7 @@ ia64_do_signal (struct sigscratch *scr, long in_syscall)
 		 * Whee!  Actually deliver the signal.  If the delivery failed, we need to
 		 * continue to iterate in this loop so we can deliver the SIGSEGV...
 		 */
-		if (handle_signal(signr, &ka, &info, oldset, scr)) {
+		if (handle_signal(signr, &ka, &info, scr)) {
 			/*
 			 * A signal was successfully delivered; the saved
 			 * sigmask will have been stored in the signal frame,
diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c
index 2ad7c4587669..e0d6d1079f33 100644
--- a/arch/m32r/kernel/signal.c
+++ b/arch/m32r/kernel/signal.c
@@ -269,7 +269,7 @@ static int prev_insn(struct pt_regs *regs)
 
 static int
 handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info,
-	      sigset_t *oldset, struct pt_regs *regs)
+	      struct pt_regs *regs)
 {
 	/* Are we from a system call? */
 	if (regs->syscall_nr >= 0) {
@@ -294,7 +294,7 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info,
 	}
 
 	/* Set up the stack frame */
-	if (setup_rt_frame(sig, ka, info, oldset, regs))
+	if (setup_rt_frame(sig, ka, info, sigmask_to_save(), regs))
 		return -EFAULT;
 
 	block_sigmask(ka, sig);
@@ -311,7 +311,6 @@ static void do_signal(struct pt_regs *regs)
 	siginfo_t info;
 	int signr;
 	struct k_sigaction ka;
-	sigset_t *oldset;
 
 	/*
 	 * We want the common case to go fast, which
@@ -325,11 +324,6 @@ static void do_signal(struct pt_regs *regs)
 	if (try_to_freeze()) 
 		goto no_signal;
 
-	if (test_thread_flag(TIF_RESTORE_SIGMASK))
-		oldset = &current->saved_sigmask;
-	else
-		oldset = &current->blocked;
-
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 	if (signr > 0) {
 		/* Re-enable any watchpoints before delivering the
@@ -339,7 +333,7 @@ static void do_signal(struct pt_regs *regs)
 		 */
 
 		/* Whee!  Actually deliver the signal.  */
-		if (handle_signal(signr, &ka, &info, oldset, regs) == 0)
+		if (handle_signal(signr, &ka, &info, regs) == 0)
 			clear_thread_flag(TIF_RESTORE_SIGMASK);
 
 		return;
diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c
index 685cbe84f33f..c83eb5a8ed8b 100644
--- a/arch/m68k/kernel/signal.c
+++ b/arch/m68k/kernel/signal.c
@@ -1123,8 +1123,9 @@ handle_restart(struct pt_regs *regs, struct k_sigaction *ka, int has_handler)
  */
 static void
 handle_signal(int sig, struct k_sigaction *ka, siginfo_t *info,
-	      sigset_t *oldset, struct pt_regs *regs)
+	      struct pt_regs *regs)
 {
+	sigset_t *oldset = sigmask_to_save();
 	int err;
 	/* are we from a system call? */
 	if (regs->orig_d0 >= 0)
@@ -1160,19 +1161,13 @@ static void do_signal(struct pt_regs *regs)
 	siginfo_t info;
 	struct k_sigaction ka;
 	int signr;
-	sigset_t *oldset;
 
 	current->thread.esp0 = (unsigned long) regs;
 
-	if (test_thread_flag(TIF_RESTORE_SIGMASK))
-		oldset = &current->saved_sigmask;
-	else
-		oldset = &current->blocked;
-
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 	if (signr > 0) {
 		/* Whee!  Actually deliver the signal.  */
-		handle_signal(signr, &ka, &info, oldset, regs);
+		handle_signal(signr, &ka, &info, regs);
 		return;
 	}
 
diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c
index 8e644dfaba4f..fd2de5718a4e 100644
--- a/arch/microblaze/kernel/signal.c
+++ b/arch/microblaze/kernel/signal.c
@@ -312,8 +312,9 @@ do_restart:
 
 static int
 handle_signal(unsigned long sig, struct k_sigaction *ka,
-		siginfo_t *info, sigset_t *oldset, struct pt_regs *regs)
+		siginfo_t *info, struct pt_regs *regs)
 {
+	sigset_t *oldset = sigmask_to_save();
 	int ret;
 
 	/* Set up the stack frame */
@@ -344,18 +345,12 @@ static void do_signal(struct pt_regs *regs, int in_syscall)
 	siginfo_t info;
 	int signr;
 	struct k_sigaction ka;
-	sigset_t *oldset;
 #ifdef DEBUG_SIG
 	printk(KERN_INFO "do signal: %p %d\n", regs, in_syscall);
 	printk(KERN_INFO "do signal2: %lx %lx %ld [%lx]\n", regs->pc, regs->r1,
 			regs->r12, current_thread_info()->flags);
 #endif
 
-	if (current_thread_info()->status & TS_RESTORE_SIGMASK)
-		oldset = &current->saved_sigmask;
-	else
-		oldset = &current->blocked;
-
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 	if (signr > 0) {
 		/* Whee! Actually deliver the signal. */
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index aad2d2da5eec..18355060f241 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -515,8 +515,9 @@ struct mips_abi mips_abi = {
 };
 
 static int handle_signal(unsigned long sig, siginfo_t *info,
-	struct k_sigaction *ka, sigset_t *oldset, struct pt_regs *regs)
+	struct k_sigaction *ka, struct pt_regs *regs)
 {
+	sigset_t *oldset = sigmask_to_save();
 	int ret;
 	struct mips_abi *abi = current->thread.abi;
 	void *vdso = current->mm->context.vdso;
@@ -560,7 +561,6 @@ static int handle_signal(unsigned long sig, siginfo_t *info,
 static void do_signal(struct pt_regs *regs)
 {
 	struct k_sigaction ka;
-	sigset_t *oldset;
 	siginfo_t info;
 	int signr;
 
@@ -572,15 +572,10 @@ static void do_signal(struct pt_regs *regs)
 	if (!user_mode(regs))
 		return;
 
-	if (test_thread_flag(TIF_RESTORE_SIGMASK))
-		oldset = &current->saved_sigmask;
-	else
-		oldset = &current->blocked;
-
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 	if (signr > 0) {
 		/* Whee!  Actually deliver the signal.  */
-		if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
+		if (handle_signal(signr, &info, &ka, regs) == 0) {
 			/*
 			 * A signal was successfully delivered; the saved
 			 * sigmask will have been stored in the signal frame,
diff --git a/arch/mn10300/kernel/signal.c b/arch/mn10300/kernel/signal.c
index b7994c38eacc..26a1d98c62a1 100644
--- a/arch/mn10300/kernel/signal.c
+++ b/arch/mn10300/kernel/signal.c
@@ -430,8 +430,9 @@ static inline void stepback(struct pt_regs *regs)
  */
 static int handle_signal(int sig,
 			 siginfo_t *info, struct k_sigaction *ka,
-			 sigset_t *oldset, struct pt_regs *regs)
+			 struct pt_regs *regs)
 {
+	sigset_t *oldset = sigmask_to_save();
 	int ret;
 
 	/* Are we from a system call? */
@@ -475,7 +476,6 @@ static void do_signal(struct pt_regs *regs)
 {
 	struct k_sigaction ka;
 	siginfo_t info;
-	sigset_t *oldset;
 	int signr;
 
 	/* we want the common case to go fast, which is why we may in certain
@@ -483,14 +483,9 @@ static void do_signal(struct pt_regs *regs)
 	if (!user_mode(regs))
 		return;
 
-	if (test_thread_flag(TIF_RESTORE_SIGMASK))
-		oldset = &current->saved_sigmask;
-	else
-		oldset = &current->blocked;
-
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 	if (signr > 0) {
-		if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
+		if (handle_signal(signr, &info, &ka, regs) == 0) {
 			/* a signal was successfully delivered; the saved
 			 * sigmask will have been stored in the signal frame,
 			 * and will be restored by sigreturn, so we can simply
diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c
index 266c6fd2eb5c..721c584ff44a 100644
--- a/arch/openrisc/kernel/signal.c
+++ b/arch/openrisc/kernel/signal.c
@@ -254,11 +254,11 @@ give_sigsegv:
 static inline int
 handle_signal(unsigned long sig,
 	      siginfo_t *info, struct k_sigaction *ka,
-	      sigset_t *oldset, struct pt_regs *regs)
+	      struct pt_regs *regs)
 {
 	int ret;
 
-	ret = setup_rt_frame(sig, ka, info, oldset, regs);
+	ret = setup_rt_frame(sig, ka, info, sigmask_to_save(), regs);
 	if (ret)
 		return ret;
 
@@ -341,15 +341,9 @@ void do_signal(struct pt_regs *regs)
 		 * back */
 		restore_saved_sigmask();
 	} else {		/* signr > 0 */
-		sigset_t *oldset;
-
-		if (current_thread_info()->flags & _TIF_RESTORE_SIGMASK)
-			oldset = &current->saved_sigmask;
-		else
-			oldset = &current->blocked;
 
 		/* Whee!  Actually deliver the signal.  */
-		if (!handle_signal(signr, &info, &ka, oldset, regs)) {
+		if (!handle_signal(signr, &info, &ka, regs)) {
 			/* a signal was successfully delivered; the saved
 			 * sigmask will have been stored in the signal frame,
 			 * and will be restored by sigreturn, so we can simply
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index 277cacadf653..441b25992846 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -443,8 +443,9 @@ give_sigsegv:
 
 static long
 handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
-		sigset_t *oldset, struct pt_regs *regs, int in_syscall)
+		struct pt_regs *regs, int in_syscall)
 {
+	sigset_t *oldset = sigmask_to_save();
 	DBG(1,"handle_signal: sig=%ld, ka=%p, info=%p, oldset=%p, regs=%p\n",
 	       sig, ka, info, oldset, regs);
 	
@@ -568,28 +569,17 @@ do_signal(struct pt_regs *regs, long in_syscall)
 	siginfo_t info;
 	struct k_sigaction ka;
 	int signr;
-	sigset_t *oldset;
 
-	DBG(1,"\ndo_signal: oldset=0x%p, regs=0x%p, sr7 %#lx, in_syscall=%d\n",
-	       oldset, regs, regs->sr[7], in_syscall);
+	DBG(1,"\ndo_signal: regs=0x%p, sr7 %#lx, in_syscall=%d\n",
+	       regs, regs->sr[7], in_syscall);
 
 	/* Everyone else checks to see if they are in kernel mode at
 	   this point and exits if that's the case.  I'm not sure why
 	   we would be called in that case, but for some reason we
 	   are. */
 
-	if (test_thread_flag(TIF_RESTORE_SIGMASK))
-		oldset = &current->saved_sigmask;
-	else
-		oldset = &current->blocked;
-
-	DBG(1,"do_signal: oldset %08lx / %08lx\n", 
-		oldset->sig[0], oldset->sig[1]);
-
-
 	/* May need to force signal if handle_signal failed to deliver */
 	while (1) {
-	  
 		signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 		DBG(3,"do_signal: signr = %d, regs->gr[28] = %ld\n", signr, regs->gr[28]); 
 	
@@ -603,8 +593,7 @@ do_signal(struct pt_regs *regs, long in_syscall)
 		/* Whee!  Actually deliver the signal.  If the
 		   delivery failed, we need to continue to iterate in
 		   this loop so we can deliver the SIGSEGV... */
-		if (handle_signal(signr, &info, &ka, oldset,
-				  regs, in_syscall)) {
+		if (handle_signal(signr, &info, &ka, regs, in_syscall)) {
 			DBG(1,KERN_DEBUG "do_signal: Exit (success), regs->gr[28] = %ld\n",
 				regs->gr[28]);
 			if (test_thread_flag(TIF_RESTORE_SIGMASK))
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index 0f4cc67f4268..8e9ddab7ade6 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -114,18 +114,13 @@ static void check_syscall_restart(struct pt_regs *regs, struct k_sigaction *ka,
 
 static int do_signal(struct pt_regs *regs)
 {
-	sigset_t *oldset;
+	sigset_t *oldset = sigmask_to_save();
 	siginfo_t info;
 	int signr;
 	struct k_sigaction ka;
 	int ret;
 	int is32 = is_32bit_task();
 
-	if (current_thread_info()->local_flags & _TLF_RESTORE_SIGMASK)
-		oldset = &current->saved_sigmask;
-	else
-		oldset = &current->blocked;
-
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 
 	/* Is there any syscall restart business here ? */
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 37799089c38e..c880c48a09f3 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -398,12 +398,7 @@ void do_signal(struct pt_regs *regs)
 	siginfo_t info;
 	int signr;
 	struct k_sigaction ka;
-	sigset_t *oldset;
-
-	if (test_thread_flag(TIF_RESTORE_SIGMASK))
-		oldset = &current->saved_sigmask;
-	else
-		oldset = &current->blocked;
+	sigset_t *oldset = sigmask_to_save();
 
 	/*
 	 * Get signal to deliver. When running under ptrace, at this point
diff --git a/arch/score/kernel/signal.c b/arch/score/kernel/signal.c
index 9e751559375b..b24dfaf2462f 100644
--- a/arch/score/kernel/signal.c
+++ b/arch/score/kernel/signal.c
@@ -242,7 +242,7 @@ give_sigsegv:
 }
 
 static int handle_signal(unsigned long sig, siginfo_t *info,
-	struct k_sigaction *ka, sigset_t *oldset, struct pt_regs *regs)
+	struct k_sigaction *ka, struct pt_regs *regs)
 {
 	int ret;
 
@@ -269,7 +269,7 @@ static int handle_signal(unsigned long sig, siginfo_t *info,
 	/*
 	 * Set up the stack frame
 	 */
-	ret = setup_rt_frame(ka, regs, sig, oldset, info);
+	ret = setup_rt_frame(ka, regs, sig, sigmask_to_save(), info);
 
 	if (ret == 0)
 		block_sigmask(ka, sig);
@@ -280,7 +280,6 @@ static int handle_signal(unsigned long sig, siginfo_t *info,
 static void do_signal(struct pt_regs *regs)
 {
 	struct k_sigaction ka;
-	sigset_t *oldset;
 	siginfo_t info;
 	int signr;
 
@@ -292,15 +291,10 @@ static void do_signal(struct pt_regs *regs)
 	if (!user_mode(regs))
 		return;
 
-	if (test_thread_flag(TIF_RESTORE_SIGMASK))
-		oldset = &current->saved_sigmask;
-	else
-		oldset = &current->blocked;
-
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 	if (signr > 0) {
 		/* Actually deliver the signal.  */
-		if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
+		if (handle_signal(signr, &info, &ka, regs) == 0) {
 			/*
 			 * A signal was successfully delivered; the saved
 			 * sigmask will have been stored in the signal frame,
diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c
index 92f4173ad29a..bfb3d599f032 100644
--- a/arch/sh/kernel/signal_32.c
+++ b/arch/sh/kernel/signal_32.c
@@ -524,8 +524,9 @@ handle_syscall_restart(unsigned long save_r0, struct pt_regs *regs,
  */
 static int
 handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info,
-	      sigset_t *oldset, struct pt_regs *regs, unsigned int save_r0)
+	      struct pt_regs *regs, unsigned int save_r0)
 {
+	sigset_t *oldset = sigmask_to_save();
 	int ret;
 
 	/* Set up the stack frame */
@@ -554,7 +555,6 @@ static void do_signal(struct pt_regs *regs, unsigned int save_r0)
 	siginfo_t info;
 	int signr;
 	struct k_sigaction ka;
-	sigset_t *oldset;
 
 	/*
 	 * We want the common case to go fast, which
@@ -565,17 +565,12 @@ static void do_signal(struct pt_regs *regs, unsigned int save_r0)
 	if (!user_mode(regs))
 		return;
 
-	if (current_thread_info()->status & TS_RESTORE_SIGMASK)
-		oldset = &current->saved_sigmask;
-	else
-		oldset = &current->blocked;
-
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 	if (signr > 0) {
 		handle_syscall_restart(save_r0, regs, &ka.sa);
 
 		/* Whee!  Actually deliver the signal.  */
-		if (handle_signal(signr, &ka, &info, oldset,
+		if (handle_signal(signr, &ka, &info,
 				  regs, save_r0) == 0) {
 			/*
 			 * A signal was successfully delivered; the saved
diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c
index 6e191ef0aa62..cc22d2b2e3f2 100644
--- a/arch/sh/kernel/signal_64.c
+++ b/arch/sh/kernel/signal_64.c
@@ -45,7 +45,7 @@
 
 static int
 handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
-		sigset_t *oldset, struct pt_regs * regs);
+		struct pt_regs * regs);
 
 static inline void
 handle_syscall_restart(struct pt_regs *regs, struct sigaction *sa)
@@ -88,7 +88,6 @@ static void do_signal(struct pt_regs *regs)
 	siginfo_t info;
 	int signr;
 	struct k_sigaction ka;
-	sigset_t *oldset;
 
 	/*
 	 * We want the common case to go fast, which
@@ -99,17 +98,12 @@ static void do_signal(struct pt_regs *regs)
 	if (!user_mode(regs))
 		return;
 
-	if (current_thread_info()->status & TS_RESTORE_SIGMASK)
-		oldset = &current->saved_sigmask;
-	else
-		oldset = &current->blocked;
-
 	signr = get_signal_to_deliver(&info, &ka, regs, 0);
 	if (signr > 0) {
 		handle_syscall_restart(regs, &ka.sa);
 
 		/* Whee!  Actually deliver the signal.  */
-		if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
+		if (handle_signal(signr, &info, &ka, regs) == 0) {
 			/*
 			 * If a signal was successfully delivered, the
 			 * saved sigmask is in its frame, and we can
@@ -656,8 +650,9 @@ give_sigsegv:
  */
 static int
 handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
-		sigset_t *oldset, struct pt_regs * regs)
+		struct pt_regs * regs)
 {
+	sigset_t *oldset = sigmask_to_save();
 	int ret;
 
 	/* Set up the stack frame */
diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c
index 9dd97d2e171e..5d74410c787b 100644
--- a/arch/sparc/kernel/signal_32.c
+++ b/arch/sparc/kernel/signal_32.c
@@ -451,8 +451,9 @@ sigsegv:
 
 static inline int
 handle_signal(unsigned long signr, struct k_sigaction *ka,
-	      siginfo_t *info, sigset_t *oldset, struct pt_regs *regs)
+	      siginfo_t *info, struct pt_regs *regs)
 {
+	sigset_t *oldset = sigmask_to_save();
 	int err;
 
 	if (ka->sa.sa_flags & SA_SIGINFO)
@@ -498,7 +499,6 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0)
 {
 	struct k_sigaction ka;
 	int restart_syscall;
-	sigset_t *oldset;
 	siginfo_t info;
 	int signr;
 
@@ -523,11 +523,6 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0)
 	if (pt_regs_is_syscall(regs) && (regs->psr & PSR_C))
 		regs->u_regs[UREG_G6] = orig_i0;
 
-	if (test_thread_flag(TIF_RESTORE_SIGMASK))
-		oldset = &current->saved_sigmask;
-	else
-		oldset = &current->blocked;
-
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 
 	/* If the debugger messes with the program counter, it clears
@@ -544,7 +539,7 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0)
 	if (signr > 0) {
 		if (restart_syscall)
 			syscall_restart(orig_i0, regs, &ka.sa);
-		if (handle_signal(signr, &ka, &info, oldset, regs) == 0) {
+		if (handle_signal(signr, &ka, &info, regs) == 0) {
 			/* a signal was successfully delivered; the saved
 			 * sigmask will have been stored in the signal frame,
 			 * and will be restored by sigreturn, so we can simply
diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
index 55b820ee0ac9..088a733f83f9 100644
--- a/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c
@@ -512,7 +512,7 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0)
 {
 	struct k_sigaction ka;
 	int restart_syscall;
-	sigset_t *oldset;
+	sigset_t *oldset = sigmask_to_save();
 	siginfo_t info;
 	int signr;
 	
@@ -538,11 +538,6 @@ static void do_signal(struct pt_regs *regs, unsigned long orig_i0)
 	    (regs->tstate & (TSTATE_XCARRY | TSTATE_ICARRY)))
 		regs->u_regs[UREG_G6] = orig_i0;
 
-	if (current_thread_info()->status & TS_RESTORE_SIGMASK)
-		oldset = &current->saved_sigmask;
-	else
-		oldset = &current->blocked;
-
 #ifdef CONFIG_COMPAT
 	if (test_thread_flag(TIF_32BIT)) {
 		extern void do_signal32(sigset_t *, struct pt_regs *);
diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c
index 62b3493ea77d..588c28b2db58 100644
--- a/arch/tile/kernel/signal.c
+++ b/arch/tile/kernel/signal.c
@@ -243,9 +243,10 @@ give_sigsegv:
  */
 
 static int handle_signal(unsigned long sig, siginfo_t *info,
-			 struct k_sigaction *ka, sigset_t *oldset,
+			 struct k_sigaction *ka,
 			 struct pt_regs *regs)
 {
+	sigset_t *oldset = sigmask_to_save();
 	int ret;
 
 	/* Are we from a system call? */
@@ -299,7 +300,6 @@ void do_signal(struct pt_regs *regs)
 	siginfo_t info;
 	int signr;
 	struct k_sigaction ka;
-	sigset_t *oldset;
 
 	/*
 	 * i386 will check if we're coming from kernel mode and bail out
@@ -308,15 +308,10 @@ void do_signal(struct pt_regs *regs)
 	 * helpful, we can reinstate the check on "!user_mode(regs)".
 	 */
 
-	if (current_thread_info()->status & TS_RESTORE_SIGMASK)
-		oldset = &current->saved_sigmask;
-	else
-		oldset = &current->blocked;
-
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 	if (signr > 0) {
 		/* Whee! Actually deliver the signal.  */
-		if (handle_signal(signr, &info, &ka, oldset, regs) == 0) {
+		if (handle_signal(signr, &info, &ka, regs) == 0) {
 			/*
 			 * A signal was successfully delivered; the saved
 			 * sigmask will have been stored in the signal frame,
diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
index 6acf13c1740b..909e9b8d6612 100644
--- a/arch/um/kernel/signal.c
+++ b/arch/um/kernel/signal.c
@@ -23,9 +23,9 @@ EXPORT_SYMBOL(unblock_signals);
  * OK, we're invoking a handler
  */
 static int handle_signal(struct pt_regs *regs, unsigned long signr,
-			 struct k_sigaction *ka, siginfo_t *info,
-			 sigset_t *oldset)
+			 struct k_sigaction *ka, siginfo_t *info)
 {
+	sigset_t *oldset = sigmask_to_save();
 	unsigned long sp;
 	int err;
 
@@ -77,14 +77,9 @@ static int kern_do_signal(struct pt_regs *regs)
 	int sig, handled_sig = 0;
 
 	while ((sig = get_signal_to_deliver(&info, &ka_copy, regs, NULL)) > 0) {
-		sigset_t *oldset;
-		if (test_thread_flag(TIF_RESTORE_SIGMASK))
-			oldset = &current->saved_sigmask;
-		else
-			oldset = &current->blocked;
 		handled_sig = 1;
 		/* Whee!  Actually deliver the signal.  */
-		if (!handle_signal(regs, sig, &ka_copy, &info, oldset)) {
+		if (!handle_signal(regs, sig, &ka_copy, &info)) {
 			/*
 			 * a signal was successfully delivered; the saved
 			 * sigmask will have been stored in the signal frame,
diff --git a/arch/unicore32/kernel/signal.c b/arch/unicore32/kernel/signal.c
index 65a5ed3b6f2a..bf23194dc74d 100644
--- a/arch/unicore32/kernel/signal.c
+++ b/arch/unicore32/kernel/signal.c
@@ -313,12 +313,11 @@ static inline void setup_syscall_restart(struct pt_regs *regs)
  * OK, we're invoking a handler
  */
 static int handle_signal(unsigned long sig, struct k_sigaction *ka,
-	      siginfo_t *info, sigset_t *oldset,
-	      struct pt_regs *regs, int syscall)
+	      siginfo_t *info, struct pt_regs *regs, int syscall)
 {
 	struct thread_info *thread = current_thread_info();
 	struct task_struct *tsk = current;
-	sigset_t blocked;
+	sigset_t *oldset = sigmask_to_save();
 	int usig = sig;
 	int ret;
 
@@ -404,13 +403,7 @@ static void do_signal(struct pt_regs *regs, int syscall)
 
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
 	if (signr > 0) {
-		sigset_t *oldset;
-
-		if (test_thread_flag(TIF_RESTORE_SIGMASK))
-			oldset = &current->saved_sigmask;
-		else
-			oldset = &current->blocked;
-		if (handle_signal(signr, &ka, &info, oldset, regs, syscall)
+		if (handle_signal(signr, &ka, &info, regs, syscall)
 				== 0) {
 			/*
 			 * A signal was successfully delivered; the saved
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 25a4a81a51aa..56f3062c5111 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -647,12 +647,9 @@ setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 		struct pt_regs *regs)
 {
 	int usig = signr_convert(sig);
-	sigset_t *set = &current->blocked;
+	sigset_t *set = sigmask_to_save();
 	int ret;
 
-	if (current_thread_info()->status & TS_RESTORE_SIGMASK)
-		set = &current->saved_sigmask;
-
 	/* Set up the stack frame */
 	if (is_ia32) {
 		if (ka->sa.sa_flags & SA_SIGINFO)
diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c
index 8c4e751e3b83..e4b06e2d4eb9 100644
--- a/arch/xtensa/kernel/signal.c
+++ b/arch/xtensa/kernel/signal.c
@@ -452,16 +452,10 @@ static void do_signal(struct pt_regs *regs)
 	siginfo_t info;
 	int signr;
 	struct k_sigaction ka;
-	sigset_t oldset;
 
 	if (try_to_freeze())
 		goto no_signal;
 
-	if (test_thread_flag(TIF_RESTORE_SIGMASK))
-		oldset = &current->saved_sigmask;
-	else
-		oldset = &current->blocked;
-
 	task_pt_regs(current)->icountlevel = 0;
 
 	signr = get_signal_to_deliver(&info, &ka, regs, NULL);
@@ -501,7 +495,7 @@ static void do_signal(struct pt_regs *regs)
 
 		/* Whee!  Actually deliver the signal.  */
 		/* Set up the stack frame */
-		ret = setup_frame(signr, &ka, &info, oldset, regs);
+		ret = setup_frame(signr, &ka, &info, sigmask_to_save(), regs);
 		if (ret)
 			return;
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f1b46b88f6f5..ded3fb63fb06 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2213,6 +2213,14 @@ static inline void restore_saved_sigmask(void)
 		set_current_blocked(&current->saved_sigmask);
 }
 
+static inline sigset_t *sigmask_to_save(void)
+{
+	sigset_t *res = &current->blocked;
+	if (unlikely(test_restore_sigmask()))
+		res = &current->saved_sigmask;
+	return res;
+}
+
 static inline int kill_cad_pid(int sig, int priv)
 {
 	return kill_pid(cad_pid, sig, priv);
-- 
cgit 


From edd63a2763bdae0daa4f0a4d4c5d61d1154352a5 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 27 Apr 2012 13:42:45 -0400
Subject: set_restore_sigmask() is never called without SIGPENDING (and never
 should be)

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/ia64/include/asm/thread_info.h       | 2 +-
 arch/microblaze/include/asm/thread_info.h | 2 +-
 arch/powerpc/include/asm/thread_info.h    | 2 +-
 arch/sh/include/asm/thread_info.h         | 2 +-
 arch/sparc/include/asm/thread_info_64.h   | 2 +-
 arch/tile/include/asm/thread_info.h       | 2 +-
 arch/x86/include/asm/thread_info.h        | 2 +-
 include/linux/thread_info.h               | 3 ++-
 8 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h
index 8d600363fa57..f7ee85378311 100644
--- a/arch/ia64/include/asm/thread_info.h
+++ b/arch/ia64/include/asm/thread_info.h
@@ -141,7 +141,7 @@ static inline void set_restore_sigmask(void)
 {
 	struct thread_info *ti = current_thread_info();
 	ti->status |= TS_RESTORE_SIGMASK;
-	set_bit(TIF_SIGPENDING, &ti->flags);
+	WARN_ON(!test_bit(TIF_SIGPENDING, &ti->flags));
 }
 static inline void clear_restore_sigmask(void)
 {
diff --git a/arch/microblaze/include/asm/thread_info.h b/arch/microblaze/include/asm/thread_info.h
index 12e39206b3ef..6c610234ffab 100644
--- a/arch/microblaze/include/asm/thread_info.h
+++ b/arch/microblaze/include/asm/thread_info.h
@@ -166,7 +166,7 @@ static inline void set_restore_sigmask(void)
 {
 	struct thread_info *ti = current_thread_info();
 	ti->status |= TS_RESTORE_SIGMASK;
-	set_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags);
+	WARN_ON(!test_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags));
 }
 static inline void clear_restore_sigmask(void)
 {
diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h
index 85d50a93a92f..68831e9cf82f 100644
--- a/arch/powerpc/include/asm/thread_info.h
+++ b/arch/powerpc/include/asm/thread_info.h
@@ -140,7 +140,7 @@ static inline void set_restore_sigmask(void)
 {
 	struct thread_info *ti = current_thread_info();
 	ti->local_flags |= _TLF_RESTORE_SIGMASK;
-	set_bit(TIF_SIGPENDING, &ti->flags);
+	WARN_ON(!test_bit(TIF_SIGPENDING, &ti->flags));
 }
 static inline void clear_restore_sigmask(void)
 {
diff --git a/arch/sh/include/asm/thread_info.h b/arch/sh/include/asm/thread_info.h
index a109157c6b8f..bc13b57cdc83 100644
--- a/arch/sh/include/asm/thread_info.h
+++ b/arch/sh/include/asm/thread_info.h
@@ -169,7 +169,7 @@ static inline void set_restore_sigmask(void)
 {
 	struct thread_info *ti = current_thread_info();
 	ti->status |= TS_RESTORE_SIGMASK;
-	set_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags);
+	WARN_ON(!test_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags));
 }
 
 #define TI_FLAG_FAULT_CODE_SHIFT	24
diff --git a/arch/sparc/include/asm/thread_info_64.h b/arch/sparc/include/asm/thread_info_64.h
index cb9b7a9f5fc1..cfa8c38fb9c8 100644
--- a/arch/sparc/include/asm/thread_info_64.h
+++ b/arch/sparc/include/asm/thread_info_64.h
@@ -238,7 +238,7 @@ static inline void set_restore_sigmask(void)
 {
 	struct thread_info *ti = current_thread_info();
 	ti->status |= TS_RESTORE_SIGMASK;
-	set_bit(TIF_SIGPENDING, &ti->flags);
+	WARN_ON(!test_bit(TIF_SIGPENDING, &ti->flags));
 }
 static inline void clear_restore_sigmask(void)
 {
diff --git a/arch/tile/include/asm/thread_info.h b/arch/tile/include/asm/thread_info.h
index 5aef371921e4..7e1fef36bde6 100644
--- a/arch/tile/include/asm/thread_info.h
+++ b/arch/tile/include/asm/thread_info.h
@@ -166,7 +166,7 @@ static inline void set_restore_sigmask(void)
 {
 	struct thread_info *ti = current_thread_info();
 	ti->status |= TS_RESTORE_SIGMASK;
-	set_bit(TIF_SIGPENDING, &ti->flags);
+	WARN_ON(!test_bit(TIF_SIGPENDING, &ti->flags));
 }
 static inline void clear_restore_sigmask(void)
 {
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 8f3f1ff69fa9..89f794f007ec 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -248,7 +248,7 @@ static inline void set_restore_sigmask(void)
 {
 	struct thread_info *ti = current_thread_info();
 	ti->status |= TS_RESTORE_SIGMASK;
-	set_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags);
+	WARN_ON(!test_bit(TIF_SIGPENDING, (unsigned long *)&ti->flags));
 }
 static inline void clear_restore_sigmask(void)
 {
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index ed279701ac79..ccc1899bd62e 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -8,6 +8,7 @@
 #define _LINUX_THREAD_INFO_H
 
 #include <linux/types.h>
+#include <linux/bug.h>
 
 struct timespec;
 struct compat_timespec;
@@ -125,7 +126,7 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag)
 static inline void set_restore_sigmask(void)
 {
 	set_thread_flag(TIF_RESTORE_SIGMASK);
-	set_thread_flag(TIF_SIGPENDING);
+	WARN_ON(!test_thread_flag(TIF_SIGPENDING));
 }
 static inline void clear_restore_sigmask(void)
 {
-- 
cgit 


From 77097ae503b170120ab66dd1d547f8577193f91f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 27 Apr 2012 13:58:59 -0400
Subject: most of set_current_blocked() callers want SIGKILL/SIGSTOP removed
 from set

Only 3 out of 63 do not.  Renamed the current variant to __set_current_blocked(),
added set_current_blocked() that will exclude unblockable signals, switched
open-coded instances to it.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/alpha/kernel/signal.c          |  2 --
 arch/arm/kernel/signal.c            |  6 +-----
 arch/avr32/kernel/signal.c          |  3 ---
 arch/blackfin/kernel/signal.c       |  3 ---
 arch/c6x/kernel/signal.c            |  3 ---
 arch/cris/arch-v10/kernel/signal.c  |  4 ----
 arch/cris/arch-v32/kernel/signal.c  |  5 -----
 arch/frv/kernel/signal.c            |  4 ----
 arch/h8300/kernel/signal.c          |  4 ----
 arch/hexagon/kernel/signal.c        |  3 ---
 arch/ia64/kernel/signal.c           |  2 --
 arch/m32r/kernel/signal.c           |  3 ---
 arch/m68k/kernel/signal.c           |  4 ----
 arch/microblaze/kernel/signal.c     |  3 ---
 arch/mips/kernel/signal-common.h    |  2 --
 arch/mips/kernel/signal.c           |  2 --
 arch/mips/kernel/signal32.c         |  2 --
 arch/mips/kernel/signal_n32.c       |  1 -
 arch/mn10300/kernel/signal.c        |  4 ----
 arch/openrisc/kernel/signal.c       |  3 ---
 arch/parisc/kernel/signal.c         |  4 ----
 arch/parisc/kernel/signal32.c       |  2 --
 arch/powerpc/kernel/signal.c        |  1 -
 arch/powerpc/kernel/signal.h        |  2 --
 arch/s390/kernel/compat_signal.c    |  4 ----
 arch/s390/kernel/signal.c           |  5 -----
 arch/score/kernel/signal.c          |  3 ---
 arch/sh/kernel/signal_32.c          |  4 ----
 arch/sh/kernel/signal_64.c          |  4 ----
 arch/sparc/kernel/signal32.c        |  4 ----
 arch/sparc/kernel/signal_32.c       |  4 ----
 arch/sparc/kernel/signal_64.c       |  4 ----
 arch/tile/kernel/compat_signal.c    |  3 ---
 arch/tile/kernel/signal.c           |  3 ---
 arch/um/include/shared/frame_kern.h |  3 ---
 arch/um/kernel/signal.c             |  4 ----
 arch/unicore32/kernel/signal.c      |  6 +-----
 arch/x86/ia32/ia32_signal.c         |  2 --
 arch/x86/include/asm/sighandling.h  |  2 --
 arch/x86/kernel/signal.c            |  3 ---
 arch/x86/um/signal.c                |  2 --
 arch/xtensa/kernel/signal.c         |  3 ---
 include/linux/sched.h               |  2 +-
 include/linux/signal.h              |  3 ++-
 kernel/signal.c                     | 18 ++++++++++++------
 45 files changed, 17 insertions(+), 141 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c
index bb45a8813393..48c4df2389ac 100644
--- a/arch/alpha/kernel/signal.c
+++ b/arch/alpha/kernel/signal.c
@@ -226,7 +226,6 @@ do_sigreturn(struct sigcontext __user *sc, struct pt_regs *regs,
 	if (__get_user(set.sig[0], &sc->sc_mask))
 		goto give_sigsegv;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	if (restore_sigcontext(sc, regs, sw))
@@ -261,7 +260,6 @@ do_rt_sigreturn(struct rt_sigframe __user *frame, struct pt_regs *regs,
 	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
 		goto give_sigsegv;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	if (restore_sigcontext(&frame->uc.uc_mcontext, regs, sw))
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index 7f9abd75fc2e..c126eba8411d 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -22,8 +22,6 @@
 
 #include "signal.h"
 
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
 /*
  * For ARM syscalls, we encode the syscall number into the instruction.
  */
@@ -210,10 +208,8 @@ static int restore_sigframe(struct pt_regs *regs, struct sigframe __user *sf)
 	int err;
 
 	err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set));
-	if (err == 0) {
-		sigdelsetmask(&set, ~_BLOCKABLE);
+	if (err == 0)
 		set_current_blocked(&set);
-	}
 
 	__get_user_error(regs->ARM_r0, &sf->uc.uc_mcontext.arm_r0, err);
 	__get_user_error(regs->ARM_r1, &sf->uc.uc_mcontext.arm_r1, err);
diff --git a/arch/avr32/kernel/signal.c b/arch/avr32/kernel/signal.c
index 3ac1a60f9eb6..e883fa5eb845 100644
--- a/arch/avr32/kernel/signal.c
+++ b/arch/avr32/kernel/signal.c
@@ -22,8 +22,6 @@
 #include <asm/ucontext.h>
 #include <asm/syscalls.h>
 
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
 asmlinkage int sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
 			       struct pt_regs *regs)
 {
@@ -89,7 +87,6 @@ asmlinkage int sys_rt_sigreturn(struct pt_regs *regs)
 	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
 		goto badframe;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
diff --git a/arch/blackfin/kernel/signal.c b/arch/blackfin/kernel/signal.c
index b20d435d084a..463612643821 100644
--- a/arch/blackfin/kernel/signal.c
+++ b/arch/blackfin/kernel/signal.c
@@ -19,8 +19,6 @@
 #include <asm/fixed_code.h>
 #include <asm/syscall.h>
 
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
 /* Location of the trace bit in SYSCFG. */
 #define TRACE_BITS 0x0001
 
@@ -98,7 +96,6 @@ asmlinkage int do_rt_sigreturn(unsigned long __unused)
 	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
 		goto badframe;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	if (rt_restore_sigcontext(regs, &frame->uc.uc_mcontext, &r0))
diff --git a/arch/c6x/kernel/signal.c b/arch/c6x/kernel/signal.c
index d599a7fb5d24..eb1b3086ae00 100644
--- a/arch/c6x/kernel/signal.c
+++ b/arch/c6x/kernel/signal.c
@@ -20,8 +20,6 @@
 #include <asm/cacheflush.h>
 
 
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
 /*
  * Do a signal return, undo the signal stack.
  */
@@ -87,7 +85,6 @@ asmlinkage int do_rt_sigreturn(struct pt_regs *regs)
 	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
 		goto badframe;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
diff --git a/arch/cris/arch-v10/kernel/signal.c b/arch/cris/arch-v10/kernel/signal.c
index 46c8ca605e4d..cf6380cb9a57 100644
--- a/arch/cris/arch-v10/kernel/signal.c
+++ b/arch/cris/arch-v10/kernel/signal.c
@@ -31,8 +31,6 @@
 
 #define DEBUG_SIG 0
 
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
 /* a syscall in Linux/CRIS is a break 13 instruction which is 2 bytes */
 /* manipulate regs so that upon return, it will be re-executed */
 
@@ -176,7 +174,6 @@ asmlinkage int sys_sigreturn(long r10, long r11, long r12, long r13, long mof,
 				    sizeof(frame->extramask))))
 		goto badframe;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	if (restore_sigcontext(regs, &frame->sc))
@@ -212,7 +209,6 @@ asmlinkage int sys_rt_sigreturn(long r10, long r11, long r12, long r13,
 	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
 		goto badframe;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
diff --git a/arch/cris/arch-v32/kernel/signal.c b/arch/cris/arch-v32/kernel/signal.c
index e0431328b7cd..07b81ee09f65 100644
--- a/arch/cris/arch-v32/kernel/signal.c
+++ b/arch/cris/arch-v32/kernel/signal.c
@@ -24,9 +24,6 @@
 
 extern unsigned long cris_signal_return_page;
 
-/* Flag to check if a signal is blockable. */
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
 /*
  * A syscall in CRIS is really a "break 13" instruction, which is 2
  * bytes. The registers is manipulated so upon return the instruction
@@ -167,7 +164,6 @@ sys_sigreturn(long r10, long r11, long r12, long r13, long mof, long srp,
 						 sizeof(frame->extramask))))
 		goto badframe;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	if (restore_sigcontext(regs, &frame->sc))
@@ -208,7 +204,6 @@ sys_rt_sigreturn(long r10, long r11, long r12, long r13, long mof, long srp,
 	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
 		goto badframe;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
diff --git a/arch/frv/kernel/signal.c b/arch/frv/kernel/signal.c
index 9ec3d2e27b4c..511285fa2461 100644
--- a/arch/frv/kernel/signal.c
+++ b/arch/frv/kernel/signal.c
@@ -28,8 +28,6 @@
 
 #define DEBUG_SIG 0
 
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
 struct fdpic_func_descriptor {
 	unsigned long	text;
 	unsigned long	GOT;
@@ -149,7 +147,6 @@ asmlinkage int sys_sigreturn(void)
 	    __copy_from_user(&set.sig[1], &frame->extramask, sizeof(frame->extramask)))
 		goto badframe;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	if (restore_sigcontext(&frame->sc, &gr8))
@@ -172,7 +169,6 @@ asmlinkage int sys_rt_sigreturn(void)
 	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
 		goto badframe;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	if (restore_sigcontext(&frame->uc.uc_mcontext, &gr8))
diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c
index 8fbfc39574f5..aa6f09666915 100644
--- a/arch/h8300/kernel/signal.c
+++ b/arch/h8300/kernel/signal.c
@@ -47,8 +47,6 @@
 #include <asm/traps.h>
 #include <asm/ucontext.h>
 
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
 /*
  * Atomically swap in the new signal mask, and wait for a signal.
  */
@@ -186,7 +184,6 @@ asmlinkage int do_sigreturn(unsigned long __unused,...)
 			      sizeof(frame->extramask))))
 		goto badframe;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 	
 	if (restore_sigcontext(regs, &frame->sc, &er0))
@@ -211,7 +208,6 @@ asmlinkage int do_rt_sigreturn(unsigned long __unused,...)
 	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
 		goto badframe;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 	
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &er0))
diff --git a/arch/hexagon/kernel/signal.c b/arch/hexagon/kernel/signal.c
index c9caf7401191..439f11a3a8ef 100644
--- a/arch/hexagon/kernel/signal.c
+++ b/arch/hexagon/kernel/signal.c
@@ -31,8 +31,6 @@
 #include <asm/signal.h>
 #include <asm/vdso.h>
 
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
 struct rt_sigframe {
 	unsigned long tramp[2];
 	struct siginfo info;
@@ -273,7 +271,6 @@ asmlinkage int sys_rt_sigreturn(void)
 	if (__copy_from_user(&blocked, &frame->uc.uc_sigmask, sizeof(blocked)))
 		goto badframe;
 
-	sigdelsetmask(&blocked, ~_BLOCKABLE);
 	set_current_blocked(&blocked);
 
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index dc6fe6573465..c4041c76c07d 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -30,7 +30,6 @@
 
 #define DEBUG_SIG	0
 #define STACK_ALIGN	16		/* minimal alignment for stack pointer */
-#define _BLOCKABLE	(~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
 
 #if _NSIG_WORDS > 1
 # define PUT_SIGSET(k,u)	__copy_to_user((u)->sig, (k)->sig, sizeof(sigset_t))
@@ -200,7 +199,6 @@ ia64_rt_sigreturn (struct sigscratch *scr)
 	if (GET_SIGSET(&set, &sc->sc_mask))
 		goto give_sigsegv;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	if (restore_sigcontext(sc, scr))
diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c
index 7cbfa639fbfa..07f9032576c0 100644
--- a/arch/m32r/kernel/signal.c
+++ b/arch/m32r/kernel/signal.c
@@ -28,8 +28,6 @@
 
 #define DEBUG_SIG 0
 
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
 asmlinkage int
 sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
 		unsigned long r2, unsigned long r3, unsigned long r4,
@@ -111,7 +109,6 @@ sys_rt_sigreturn(unsigned long r0, unsigned long r1,
 	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
 		goto badframe;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &result))
diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c
index 6dbee8a167a5..c00caad215a6 100644
--- a/arch/m68k/kernel/signal.c
+++ b/arch/m68k/kernel/signal.c
@@ -51,8 +51,6 @@
 #include <asm/traps.h>
 #include <asm/ucontext.h>
 
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
 #ifdef CONFIG_MMU
 
 /*
@@ -795,7 +793,6 @@ asmlinkage int do_sigreturn(unsigned long __unused)
 			      sizeof(frame->extramask))))
 		goto badframe;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	if (restore_sigcontext(regs, &frame->sc, frame + 1))
@@ -820,7 +817,6 @@ asmlinkage int do_rt_sigreturn(unsigned long __unused)
 	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
 		goto badframe;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	if (rt_restore_ucontext(regs, sw, &frame->uc))
diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c
index 03641199666e..c662e68671a2 100644
--- a/arch/microblaze/kernel/signal.c
+++ b/arch/microblaze/kernel/signal.c
@@ -41,8 +41,6 @@
 #include <asm/cacheflush.h>
 #include <asm/syscalls.h>
 
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
 asmlinkage long
 sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
 		struct pt_regs *regs)
@@ -106,7 +104,6 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
 	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
 		goto badframe;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &rval))
diff --git a/arch/mips/kernel/signal-common.h b/arch/mips/kernel/signal-common.h
index 10263b405981..9c60d09e62a7 100644
--- a/arch/mips/kernel/signal-common.h
+++ b/arch/mips/kernel/signal-common.h
@@ -19,8 +19,6 @@
 #  define DEBUGP(fmt, args...)
 #endif
 
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
 /*
  * Determine which stack to use..
  */
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index 896165757e6f..02e0cba24f82 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -339,7 +339,6 @@ asmlinkage void sys_sigreturn(nabi_no_regargs struct pt_regs regs)
 	if (__copy_from_user(&blocked, &frame->sf_mask, sizeof(blocked)))
 		goto badframe;
 
-	sigdelsetmask(&blocked, ~_BLOCKABLE);
 	set_current_blocked(&blocked);
 
 	sig = restore_sigcontext(&regs, &frame->sf_sc);
@@ -375,7 +374,6 @@ asmlinkage void sys_rt_sigreturn(nabi_no_regargs struct pt_regs regs)
 	if (__copy_from_user(&set, &frame->rs_uc.uc_sigmask, sizeof(set)))
 		goto badframe;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	sig = restore_sigcontext(&regs, &frame->rs_uc.uc_mcontext);
diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c
index b4fe2eacbd5d..da1b56a39ac7 100644
--- a/arch/mips/kernel/signal32.c
+++ b/arch/mips/kernel/signal32.c
@@ -465,7 +465,6 @@ asmlinkage void sys32_sigreturn(nabi_no_regargs struct pt_regs regs)
 	if (__copy_conv_sigset_from_user(&blocked, &frame->sf_mask))
 		goto badframe;
 
-	sigdelsetmask(&blocked, ~_BLOCKABLE);
 	set_current_blocked(&blocked);
 
 	sig = restore_sigcontext32(&regs, &frame->sf_sc);
@@ -503,7 +502,6 @@ asmlinkage void sys32_rt_sigreturn(nabi_no_regargs struct pt_regs regs)
 	if (__copy_conv_sigset_from_user(&set, &frame->rs_uc.uc_sigmask))
 		goto badframe;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	sig = restore_sigcontext32(&regs, &frame->rs_uc.uc_mcontext);
diff --git a/arch/mips/kernel/signal_n32.c b/arch/mips/kernel/signal_n32.c
index 63ffac9af7c5..3574c145511b 100644
--- a/arch/mips/kernel/signal_n32.c
+++ b/arch/mips/kernel/signal_n32.c
@@ -109,7 +109,6 @@ asmlinkage void sysn32_rt_sigreturn(nabi_no_regargs struct pt_regs regs)
 	if (__copy_conv_sigset_from_user(&set, &frame->rs_uc.uc_sigmask))
 		goto badframe;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	sig = restore_sigcontext(&regs, &frame->rs_uc.uc_mcontext);
diff --git a/arch/mn10300/kernel/signal.c b/arch/mn10300/kernel/signal.c
index d57013e06ea0..4f6d20763061 100644
--- a/arch/mn10300/kernel/signal.c
+++ b/arch/mn10300/kernel/signal.c
@@ -31,8 +31,6 @@
 
 #define DEBUG_SIG 0
 
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
 /*
  * atomically swap in the new signal mask, and wait for a signal.
  */
@@ -163,7 +161,6 @@ asmlinkage long sys_sigreturn(void)
 			     sizeof(frame->extramask)))
 		goto badframe;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	if (restore_sigcontext(current_frame(), &frame->sc, &d0))
@@ -191,7 +188,6 @@ asmlinkage long sys_rt_sigreturn(void)
 	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
 		goto badframe;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	if (restore_sigcontext(current_frame(), &frame->uc.uc_mcontext, &d0))
diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c
index aa1105c1618f..53972b7260b7 100644
--- a/arch/openrisc/kernel/signal.c
+++ b/arch/openrisc/kernel/signal.c
@@ -33,8 +33,6 @@
 
 #define DEBUG_SIG 0
 
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
 asmlinkage long
 _sys_sigaltstack(const stack_t *uss, stack_t *uoss, struct pt_regs *regs)
 {
@@ -101,7 +99,6 @@ asmlinkage long _sys_rt_sigreturn(struct pt_regs *regs)
 	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
 		goto badframe;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index 7f3c8f2c962d..25161eaf720d 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -48,9 +48,6 @@
 #define DBG(LEVEL, ...)
 #endif
 	
-
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
 /* gcc will complain if a pointer is cast to an integer of different
  * size.  If you really need to do this (and we do for an ELF32 user
  * application in an ELF64 kernel) then you have to do a cast to an
@@ -131,7 +128,6 @@ sys_rt_sigreturn(struct pt_regs *regs, int in_syscall)
 			goto give_sigsegv;
 	}
 		
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	/* Good thing we saved the old gr[30], eh? */
diff --git a/arch/parisc/kernel/signal32.c b/arch/parisc/kernel/signal32.c
index e14132430762..fd49aeda9eb8 100644
--- a/arch/parisc/kernel/signal32.c
+++ b/arch/parisc/kernel/signal32.c
@@ -47,8 +47,6 @@
 #define DBG(LEVEL, ...)
 #endif
 
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
 inline void
 sigset_32to64(sigset_t *s64, compat_sigset_t *s32)
 {
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index d926d2e4611a..3a3413c049c3 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -57,7 +57,6 @@ void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
  */
 void restore_sigmask(sigset_t *set)
 {
-	sigdelsetmask(set, ~_BLOCKABLE);
 	set_current_blocked(set);
 }
 
diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h
index 8dde973aaaf5..11439ea18ed4 100644
--- a/arch/powerpc/kernel/signal.h
+++ b/arch/powerpc/kernel/signal.h
@@ -10,8 +10,6 @@
 #ifndef _POWERPC_ARCH_SIGNAL_H
 #define _POWERPC_ARCH_SIGNAL_H
 
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
 extern void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags);
 
 extern void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 233db1d68eee..923baa96c0b0 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -32,8 +32,6 @@
 #include "compat_ptrace.h"
 #include "entry.h"
 
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
 typedef struct 
 {
 	__u8 callee_used_stack[__SIGNAL_FRAMESIZE32];
@@ -364,7 +362,6 @@ asmlinkage long sys32_sigreturn(void)
 		goto badframe;
 	if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32))
 		goto badframe;
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 	if (restore_sigregs32(regs, &frame->sregs))
 		goto badframe;
@@ -390,7 +387,6 @@ asmlinkage long sys32_rt_sigreturn(void)
 		goto badframe;
 	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
 		goto badframe;
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 	if (restore_sigregs32(regs, &frame->uc.uc_mcontext))
 		goto badframe;
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 7f9a862a161a..8332a6943384 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -33,9 +33,6 @@
 #include <asm/switch_to.h>
 #include "entry.h"
 
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
-
 typedef struct 
 {
 	__u8 callee_used_stack[__SIGNAL_FRAMESIZE];
@@ -169,7 +166,6 @@ SYSCALL_DEFINE0(sigreturn)
 		goto badframe;
 	if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE))
 		goto badframe;
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 	if (restore_sigregs(regs, &frame->sregs))
 		goto badframe;
@@ -189,7 +185,6 @@ SYSCALL_DEFINE0(rt_sigreturn)
 		goto badframe;
 	if (__copy_from_user(&set.sig, &frame->uc.uc_sigmask, sizeof(set)))
 		goto badframe;
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 	if (restore_sigregs(regs, &frame->uc.uc_mcontext))
 		goto badframe;
diff --git a/arch/score/kernel/signal.c b/arch/score/kernel/signal.c
index 13e0eed0e301..f1b3fef0907b 100644
--- a/arch/score/kernel/signal.c
+++ b/arch/score/kernel/signal.c
@@ -34,8 +34,6 @@
 #include <asm/syscalls.h>
 #include <asm/ucontext.h>
 
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
 struct rt_sigframe {
 	u32 rs_ass[4];		/* argument save space */
 	u32 rs_code[2];		/* signal trampoline */
@@ -162,7 +160,6 @@ score_rt_sigreturn(struct pt_regs *regs)
 	if (__copy_from_user(&set, &frame->rs_uc.uc_sigmask, sizeof(set)))
 		goto badframe;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	sig = restore_sigcontext(regs, &frame->rs_uc.uc_mcontext);
diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c
index 2675a97f374f..e4a531414e19 100644
--- a/arch/sh/kernel/signal_32.c
+++ b/arch/sh/kernel/signal_32.c
@@ -32,8 +32,6 @@
 #include <asm/syscalls.h>
 #include <asm/fpu.h>
 
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
 struct fdpic_func_descriptor {
 	unsigned long	text;
 	unsigned long	GOT;
@@ -226,7 +224,6 @@ asmlinkage int sys_sigreturn(unsigned long r4, unsigned long r5,
 				    sizeof(frame->extramask))))
 		goto badframe;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	if (restore_sigcontext(regs, &frame->sc, &r0))
@@ -256,7 +253,6 @@ asmlinkage int sys_rt_sigreturn(unsigned long r4, unsigned long r5,
 	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
 		goto badframe;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &r0))
diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c
index 7075c63bfc6f..75960ef6c1d1 100644
--- a/arch/sh/kernel/signal_64.c
+++ b/arch/sh/kernel/signal_64.c
@@ -41,8 +41,6 @@
 
 #define DEBUG_SIG 0
 
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
 static void
 handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 		struct pt_regs * regs);
@@ -330,7 +328,6 @@ asmlinkage int sys_sigreturn(unsigned long r2, unsigned long r3,
 				    sizeof(frame->extramask))))
 		goto badframe;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	if (restore_sigcontext(regs, &frame->sc, &ret))
@@ -363,7 +360,6 @@ asmlinkage int sys_rt_sigreturn(unsigned long r2, unsigned long r3,
 	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
 		goto badframe;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ret))
diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c
index 8c93c00922a7..ba3dbfcdb28e 100644
--- a/arch/sparc/kernel/signal32.c
+++ b/arch/sparc/kernel/signal32.c
@@ -32,8 +32,6 @@
 
 #include "sigutil.h"
 
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
 /* This magic should be in g_upper[0] for all upper parts
  * to be valid.
  */
@@ -274,7 +272,6 @@ void do_sigreturn32(struct pt_regs *regs)
 		case 2: set.sig[1] = seta[2] + (((long)seta[3]) << 32);
 		case 1: set.sig[0] = seta[0] + (((long)seta[1]) << 32);
 	}
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 	return;
 
@@ -376,7 +373,6 @@ asmlinkage void do_rt_sigreturn32(struct pt_regs *regs)
 		case 2: set.sig[1] = seta.sig[2] + (((long)seta.sig[3]) << 32);
 		case 1: set.sig[0] = seta.sig[0] + (((long)seta.sig[1]) << 32);
 	}
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 	return;
 segv:
diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c
index f6722427203d..1bfa854be602 100644
--- a/arch/sparc/kernel/signal_32.c
+++ b/arch/sparc/kernel/signal_32.c
@@ -29,8 +29,6 @@
 
 #include "sigutil.h"
 
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
 extern void fpsave(unsigned long *fpregs, unsigned long *fsr,
 		   void *fpqueue, unsigned long *fpqdepth);
 extern void fpload(unsigned long *fpregs, unsigned long *fsr);
@@ -130,7 +128,6 @@ asmlinkage void do_sigreturn(struct pt_regs *regs)
 	if (err)
 		goto segv_and_exit;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 	return;
 
@@ -197,7 +194,6 @@ asmlinkage void do_rt_sigreturn(struct pt_regs *regs)
 			goto segv;
 	}
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 	return;
 segv:
diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
index febbc4b697ba..23b60caa6c43 100644
--- a/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c
@@ -38,8 +38,6 @@
 #include "systbls.h"
 #include "sigutil.h"
 
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
 /* {set, get}context() needed for 64-bit SparcLinux userland. */
 asmlinkage void sparc64_set_context(struct pt_regs *regs)
 {
@@ -71,7 +69,6 @@ asmlinkage void sparc64_set_context(struct pt_regs *regs)
 			if (__copy_from_user(&set, &ucp->uc_sigmask, sizeof(sigset_t)))
 				goto do_sigsegv;
 		}
-		sigdelsetmask(&set, ~_BLOCKABLE);
 		set_current_blocked(&set);
 	}
 	if (test_thread_flag(TIF_32BIT)) {
@@ -315,7 +312,6 @@ void do_rt_sigreturn(struct pt_regs *regs)
 	/* Prevent syscall restart.  */
 	pt_regs_clear_syscall(regs);
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 	return;
 segv:
diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c
index cdef6e5ec022..474571b84085 100644
--- a/arch/tile/kernel/compat_signal.c
+++ b/arch/tile/kernel/compat_signal.c
@@ -118,8 +118,6 @@ struct compat_rt_sigframe {
 	struct compat_ucontext uc;
 };
 
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
 long compat_sys_rt_sigaction(int sig, struct compat_sigaction __user *act,
 			     struct compat_sigaction __user *oact,
 			     size_t sigsetsize)
@@ -302,7 +300,6 @@ long compat_sys_rt_sigreturn(struct pt_regs *regs)
 	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
 		goto badframe;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c
index 9b71bfd4913d..e068aa0c6dfc 100644
--- a/arch/tile/kernel/signal.c
+++ b/arch/tile/kernel/signal.c
@@ -37,8 +37,6 @@
 
 #define DEBUG_SIG 0
 
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
 SYSCALL_DEFINE3(sigaltstack, const stack_t __user *, uss,
 		stack_t __user *, uoss, struct pt_regs *, regs)
 {
@@ -96,7 +94,6 @@ SYSCALL_DEFINE1(rt_sigreturn, struct pt_regs *, regs)
 	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
 		goto badframe;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
diff --git a/arch/um/include/shared/frame_kern.h b/arch/um/include/shared/frame_kern.h
index 76078490c258..e584e40ee832 100644
--- a/arch/um/include/shared/frame_kern.h
+++ b/arch/um/include/shared/frame_kern.h
@@ -6,9 +6,6 @@
 #ifndef __FRAME_KERN_H_
 #define __FRAME_KERN_H_
 
-#define _S(nr) (1<<((nr)-1))
-#define _BLOCKABLE (~(_S(SIGKILL) | _S(SIGSTOP)))
-
 extern int setup_signal_stack_sc(unsigned long stack_top, int sig, 
 				 struct k_sigaction *ka,
 				 struct pt_regs *regs, 
diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
index 549a51c8e54f..4ce6ab2d2996 100644
--- a/arch/um/kernel/signal.c
+++ b/arch/um/kernel/signal.c
@@ -15,10 +15,6 @@
 EXPORT_SYMBOL(block_signals);
 EXPORT_SYMBOL(unblock_signals);
 
-#define _S(nr) (1<<((nr)-1))
-
-#define _BLOCKABLE (~(_S(SIGKILL) | _S(SIGSTOP)))
-
 /*
  * OK, we're invoking a handler
  */
diff --git a/arch/unicore32/kernel/signal.c b/arch/unicore32/kernel/signal.c
index af962e57efb2..4d9c4841989d 100644
--- a/arch/unicore32/kernel/signal.c
+++ b/arch/unicore32/kernel/signal.c
@@ -21,8 +21,6 @@
 #include <asm/cacheflush.h>
 #include <asm/ucontext.h>
 
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
 /*
  * For UniCore syscalls, we encode the syscall number into the instruction.
  */
@@ -61,10 +59,8 @@ static int restore_sigframe(struct pt_regs *regs, struct sigframe __user *sf)
 	int err;
 
 	err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set));
-	if (err == 0) {
-		sigdelsetmask(&set, ~_BLOCKABLE);
+	if (err == 0)
 		set_current_blocked(&set);
-	}
 
 	err |= __get_user(regs->UCreg_00, &sf->uc.uc_mcontext.regs.UCreg_00);
 	err |= __get_user(regs->UCreg_01, &sf->uc.uc_mcontext.regs.UCreg_01);
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index 98bd70faccc5..daeca56211e3 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -273,7 +273,6 @@ asmlinkage long sys32_sigreturn(struct pt_regs *regs)
 				    sizeof(frame->extramask))))
 		goto badframe;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	if (ia32_restore_sigcontext(regs, &frame->sc, &ax))
@@ -299,7 +298,6 @@ asmlinkage long sys32_rt_sigreturn(struct pt_regs *regs)
 	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
 		goto badframe;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	if (ia32_restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h
index ada93b3b8c66..beff97f7df37 100644
--- a/arch/x86/include/asm/sighandling.h
+++ b/arch/x86/include/asm/sighandling.h
@@ -7,8 +7,6 @@
 
 #include <asm/processor-flags.h>
 
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
 #define __FIX_EFLAGS	(X86_EFLAGS_AC | X86_EFLAGS_OF | \
 			 X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \
 			 X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 700c49dcd84e..11e206f0f45a 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -555,7 +555,6 @@ unsigned long sys_sigreturn(struct pt_regs *regs)
 				    sizeof(frame->extramask))))
 		goto badframe;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	if (restore_sigcontext(regs, &frame->sc, &ax))
@@ -581,7 +580,6 @@ long sys_rt_sigreturn(struct pt_regs *regs)
 	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
 		goto badframe;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
@@ -915,7 +913,6 @@ asmlinkage long sys32_x32_rt_sigreturn(struct pt_regs *regs)
 	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
 		goto badframe;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	if (restore_sigcontext(regs, &frame->uc.uc_mcontext, &ax))
diff --git a/arch/x86/um/signal.c b/arch/x86/um/signal.c
index bb0fb03b9f85..a508cea13503 100644
--- a/arch/x86/um/signal.c
+++ b/arch/x86/um/signal.c
@@ -486,7 +486,6 @@ long sys_sigreturn(struct pt_regs *regs)
 	    copy_from_user(&set.sig[1], extramask, sig_size))
 		goto segfault;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	if (copy_sc_from_user(&current->thread.regs, sc))
@@ -600,7 +599,6 @@ long sys_rt_sigreturn(struct pt_regs *regs)
 	if (copy_from_user(&set, &uc->uc_sigmask, sizeof(set)))
 		goto segfault;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	if (copy_sc_from_user(&current->thread.regs, &uc->uc_mcontext))
diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c
index ca98b86ef9a7..4da3c6f6d929 100644
--- a/arch/xtensa/kernel/signal.c
+++ b/arch/xtensa/kernel/signal.c
@@ -30,8 +30,6 @@
 
 #define DEBUG_SIG  0
 
-#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP)))
-
 extern struct task_struct *coproc_owners[];
 
 struct rt_sigframe
@@ -261,7 +259,6 @@ asmlinkage long xtensa_rt_sigreturn(long a0, long a1, long a2, long a3,
 	if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
 		goto badframe;
 
-	sigdelsetmask(&set, ~_BLOCKABLE);
 	set_current_blocked(&set);
 
 	if (restore_sigcontext(regs, frame))
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ded3fb63fb06..f34437e835a7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2210,7 +2210,7 @@ extern int do_sigaltstack(const stack_t __user *, stack_t __user *, unsigned lon
 static inline void restore_saved_sigmask(void)
 {
 	if (test_and_clear_restore_sigmask())
-		set_current_blocked(&current->saved_sigmask);
+		__set_current_blocked(&current->saved_sigmask);
 }
 
 static inline sigset_t *sigmask_to_save(void)
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 17046cc484bc..065e76330398 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -250,7 +250,8 @@ extern long do_sigpending(void __user *, unsigned long);
 extern int do_sigtimedwait(const sigset_t *, siginfo_t *,
 				const struct timespec *);
 extern int sigprocmask(int, sigset_t *, sigset_t *);
-extern void set_current_blocked(const sigset_t *);
+extern void set_current_blocked(sigset_t *);
+extern void __set_current_blocked(const sigset_t *);
 extern int show_unhandled_signals;
 extern int sigsuspend(sigset_t *);
 
diff --git a/kernel/signal.c b/kernel/signal.c
index b9be7e0fe41a..df8d721a9e6f 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2524,7 +2524,16 @@ static void __set_task_blocked(struct task_struct *tsk, const sigset_t *newset)
  * It is wrong to change ->blocked directly, this helper should be used
  * to ensure the process can't miss a shared signal we are going to block.
  */
-void set_current_blocked(const sigset_t *newset)
+void set_current_blocked(sigset_t *newset)
+{
+	struct task_struct *tsk = current;
+	sigdelsetmask(newset, sigmask(SIGKILL) | sigmask(SIGSTOP));
+	spin_lock_irq(&tsk->sighand->siglock);
+	__set_task_blocked(tsk, newset);
+	spin_unlock_irq(&tsk->sighand->siglock);
+}
+
+void __set_current_blocked(const sigset_t *newset)
 {
 	struct task_struct *tsk = current;
 
@@ -2564,7 +2573,7 @@ int sigprocmask(int how, sigset_t *set, sigset_t *oldset)
 		return -EINVAL;
 	}
 
-	set_current_blocked(&newset);
+	__set_current_blocked(&newset);
 	return 0;
 }
 
@@ -3138,7 +3147,7 @@ SYSCALL_DEFINE3(sigprocmask, int, how, old_sigset_t __user *, nset,
 			return -EINVAL;
 		}
 
-		set_current_blocked(&new_blocked);
+		__set_current_blocked(&new_blocked);
 	}
 
 	if (oset) {
@@ -3202,7 +3211,6 @@ SYSCALL_DEFINE1(ssetmask, int, newmask)
 	int old = current->blocked.sig[0];
 	sigset_t newset;
 
-	siginitset(&newset, newmask & ~(sigmask(SIGKILL) | sigmask(SIGSTOP)));
 	set_current_blocked(&newset);
 
 	return old;
@@ -3243,8 +3251,6 @@ SYSCALL_DEFINE0(pause)
 
 int sigsuspend(sigset_t *set)
 {
-	sigdelsetmask(set, sigmask(SIGKILL)|sigmask(SIGSTOP));
-
 	current->saved_sigmask = current->blocked;
 	set_current_blocked(set);
 
-- 
cgit 


From efee984c27b67e3ebef40410f35671997441b57c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 28 Apr 2012 02:04:15 -0400
Subject: new helper: signal_delivered()

Does block_sigmask() + tracehook_signal_handler();  called when
sigframe has been successfully built.  All architectures converted
to it; block_sigmask() itself is gone now (merged into this one).

I'm still not too happy with the signature, but that's a separate
story (IMO we need a structure that would contain signal number +
siginfo + k_sigaction, so that get_signal_to_deliver() would fill one,
signal_delivered(), handle_signal() and probably setup...frame() -
take one).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/alpha/kernel/signal.c         |  2 +-
 arch/arm/kernel/signal.c           |  7 +------
 arch/avr32/kernel/signal.c         |  2 +-
 arch/blackfin/kernel/signal.c      |  3 +--
 arch/c6x/kernel/signal.c           |  3 +--
 arch/cris/arch-v10/kernel/signal.c |  2 +-
 arch/cris/arch-v32/kernel/signal.c |  2 +-
 arch/frv/kernel/signal.c           |  3 +--
 arch/h8300/kernel/signal.c         |  2 +-
 arch/hexagon/kernel/signal.c       |  3 +--
 arch/ia64/kernel/signal.c          |  7 +------
 arch/m32r/kernel/signal.c          |  2 +-
 arch/m68k/kernel/signal.c          |  2 +-
 arch/microblaze/kernel/signal.c    |  2 +-
 arch/mips/kernel/signal.c          |  2 +-
 arch/mn10300/kernel/signal.c       |  3 +--
 arch/openrisc/kernel/signal.c      |  3 +--
 arch/parisc/kernel/signal.c        |  4 +---
 arch/powerpc/kernel/signal.c       |  6 +-----
 arch/s390/kernel/compat_signal.c   |  6 +-----
 arch/s390/kernel/signal.c          |  6 +-----
 arch/score/kernel/signal.c         |  2 +-
 arch/sh/kernel/signal_32.c         |  3 +--
 arch/sh/kernel/signal_64.c         |  3 +--
 arch/sparc/kernel/signal32.c       |  3 +--
 arch/sparc/kernel/signal_32.c      |  3 +--
 arch/sparc/kernel/signal_64.c      |  3 +--
 arch/tile/kernel/signal.c          |  2 +-
 arch/um/kernel/signal.c            |  2 +-
 arch/unicore32/kernel/signal.c     |  5 +----
 arch/x86/kernel/signal.c           |  6 ++----
 arch/xtensa/kernel/signal.c        |  2 +-
 include/linux/signal.h             |  2 +-
 kernel/signal.c                    | 22 +++++++++++++---------
 34 files changed, 47 insertions(+), 83 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c
index 48c4df2389ac..a8c97d42ec8e 100644
--- a/arch/alpha/kernel/signal.c
+++ b/arch/alpha/kernel/signal.c
@@ -478,7 +478,7 @@ handle_signal(int sig, struct k_sigaction *ka, siginfo_t *info,
 		force_sigsegv(sig, current);
 		return;
 	}
-	block_sigmask(ka, sig);
+	signal_delivered(sig, info, ka, regs, 0);
 }
 
 static inline void
diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c
index c126eba8411d..fd2392a17ac1 100644
--- a/arch/arm/kernel/signal.c
+++ b/arch/arm/kernel/signal.c
@@ -557,12 +557,7 @@ handle_signal(unsigned long sig, struct k_sigaction *ka,
 		force_sigsegv(sig, tsk);
 		return;
 	}
-
-	/*
-	 * Block the signal if we were successful.
-	 */
-	block_sigmask(ka, sig);
-	tracehook_signal_handler(sig, info, ka, regs, 0);
+	signal_delivered(sig, info, ka, regs, 0);
 }
 
 /*
diff --git a/arch/avr32/kernel/signal.c b/arch/avr32/kernel/signal.c
index e883fa5eb845..c140f9b41dce 100644
--- a/arch/avr32/kernel/signal.c
+++ b/arch/avr32/kernel/signal.c
@@ -241,7 +241,7 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info,
 	if (ret != 0)
 		force_sigsegv(sig, current);
 	else
-		block_sigmask(ka, sig);
+		signal_delivered(sig, info, ka, regs, 0);
 }
 
 /*
diff --git a/arch/blackfin/kernel/signal.c b/arch/blackfin/kernel/signal.c
index 463612643821..35459e681483 100644
--- a/arch/blackfin/kernel/signal.c
+++ b/arch/blackfin/kernel/signal.c
@@ -260,8 +260,7 @@ handle_signal(int sig, siginfo_t *info, struct k_sigaction *ka,
 	if (ret)
 		return;
 
-	block_sigmask(ka, sig);
-	tracehook_signal_handler(sig, info, ka, regs,
+	signal_delivered(sig, info, ka, regs,
 			test_thread_flag(TIF_SINGLESTEP));
 }
 
diff --git a/arch/c6x/kernel/signal.c b/arch/c6x/kernel/signal.c
index eb1b3086ae00..3d8f3c22a94f 100644
--- a/arch/c6x/kernel/signal.c
+++ b/arch/c6x/kernel/signal.c
@@ -276,8 +276,7 @@ static void handle_signal(int sig,
 	/* Set up the stack frame */
 	if (setup_rt_frame(sig, ka, info, sigmask_to_save(), regs) < 0)
 		return;
-	block_sigmask(ka, sig);
-	tracehook_signal_handler(sig, info, ka, regs, 0);
+	signal_delivered(sig, info, ka, regs, 0);
 }
 
 /*
diff --git a/arch/cris/arch-v10/kernel/signal.c b/arch/cris/arch-v10/kernel/signal.c
index cf6380cb9a57..0bb477c13a4e 100644
--- a/arch/cris/arch-v10/kernel/signal.c
+++ b/arch/cris/arch-v10/kernel/signal.c
@@ -453,7 +453,7 @@ static inline void handle_signal(int canrestart, unsigned long sig,
 		ret = setup_frame(sig, ka, oldset, regs);
 
 	if (ret == 0)
-		block_sigmask(ka, sig);
+		signal_delivered(sig, info, ka, regs, 0);
 }
 
 /*
diff --git a/arch/cris/arch-v32/kernel/signal.c b/arch/cris/arch-v32/kernel/signal.c
index 07b81ee09f65..b60d1b65a426 100644
--- a/arch/cris/arch-v32/kernel/signal.c
+++ b/arch/cris/arch-v32/kernel/signal.c
@@ -485,7 +485,7 @@ handle_signal(int canrestart, unsigned long sig,
 		ret = setup_frame(sig, ka, oldset, regs);
 
 	if (ret == 0)
-		block_sigmask(ka, sig);
+		signal_delivered(sig, info, ka, regs, 0);
 }
 
 /*
diff --git a/arch/frv/kernel/signal.c b/arch/frv/kernel/signal.c
index 511285fa2461..4e134c7eceea 100644
--- a/arch/frv/kernel/signal.c
+++ b/arch/frv/kernel/signal.c
@@ -460,8 +460,7 @@ static void handle_signal(unsigned long sig, siginfo_t *info,
 	if (ret)
 		return;
 
-	block_sigmask(ka, sig);
-	tracehook_signal_handler(sig, info, ka, __frame,
+	signal_delivered(sig, info, ka, __frame,
 				 test_thread_flag(TIF_SINGLESTEP));
 } /* end handle_signal() */
 
diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c
index aa6f09666915..fca10378701b 100644
--- a/arch/h8300/kernel/signal.c
+++ b/arch/h8300/kernel/signal.c
@@ -439,7 +439,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 		ret = setup_frame(sig, ka, oldset, regs);
 
 	if (!ret)
-		block_sigmask(ka, sig);
+		signal_delivered(sig, info, ka, regs, 0);
 }
 
 /*
diff --git a/arch/hexagon/kernel/signal.c b/arch/hexagon/kernel/signal.c
index 439f11a3a8ef..304b0808d072 100644
--- a/arch/hexagon/kernel/signal.c
+++ b/arch/hexagon/kernel/signal.c
@@ -186,8 +186,7 @@ static void handle_signal(int sig, siginfo_t *info, struct k_sigaction *ka,
 	if (setup_rt_frame(sig, ka, info, sigmask_to_save(), regs) < 0)
 		return;
 
-	block_sigmask(ka, sig);
-	tracehook_signal_handler(sig, info, ka, regs,
+	signal_delivered(sig, info, ka, regs,
 			test_thread_flag(TIF_SINGLESTEP));
 }
 
diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c
index c4041c76c07d..a199be1fe619 100644
--- a/arch/ia64/kernel/signal.c
+++ b/arch/ia64/kernel/signal.c
@@ -419,12 +419,7 @@ handle_signal (unsigned long sig, struct k_sigaction *ka, siginfo_t *info,
 	if (!setup_frame(sig, ka, info, sigmask_to_save(), scr))
 		return 0;
 
-	block_sigmask(ka, sig);
-
-	/*
-	 * Let tracing know that we've done the handler setup.
-	 */
-	tracehook_signal_handler(sig, info, ka, &scr->pt,
+	signal_delivered(sig, info, ka, &scr->pt,
 				 test_thread_flag(TIF_SINGLESTEP));
 
 	return 1;
diff --git a/arch/m32r/kernel/signal.c b/arch/m32r/kernel/signal.c
index 07f9032576c0..f3fb2c029cfc 100644
--- a/arch/m32r/kernel/signal.c
+++ b/arch/m32r/kernel/signal.c
@@ -294,7 +294,7 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info,
 	if (setup_rt_frame(sig, ka, info, sigmask_to_save(), regs))
 		return;
 
-	block_sigmask(ka, sig);
+	signal_delivered(sig, info, ka, regs, 0);
 }
 
 /*
diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c
index c00caad215a6..710a528b928b 100644
--- a/arch/m68k/kernel/signal.c
+++ b/arch/m68k/kernel/signal.c
@@ -1137,7 +1137,7 @@ handle_signal(int sig, struct k_sigaction *ka, siginfo_t *info,
 	if (err)
 		return;
 
-	block_sigmask(ka, sig);
+	signal_delivered(sig, info, ka, regs, 0);
 
 	if (test_thread_flag(TIF_DELAYED_TRACE)) {
 		regs->sr &= ~0x8000;
diff --git a/arch/microblaze/kernel/signal.c b/arch/microblaze/kernel/signal.c
index c662e68671a2..76b9722557db 100644
--- a/arch/microblaze/kernel/signal.c
+++ b/arch/microblaze/kernel/signal.c
@@ -323,7 +323,7 @@ handle_signal(unsigned long sig, struct k_sigaction *ka,
 	if (ret)
 		return;
 
-	block_sigmask(ka, sig);
+	signal_delivered(sig, info, ka, regs, 0);
 }
 
 /*
diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c
index 02e0cba24f82..f2c09cfc60ac 100644
--- a/arch/mips/kernel/signal.c
+++ b/arch/mips/kernel/signal.c
@@ -551,7 +551,7 @@ static void handle_signal(unsigned long sig, siginfo_t *info,
 	if (ret)
 		return;
 
-	block_sigmask(ka, sig);
+	signal_delivered(sig, info, ka, regs, 0);
 }
 
 static void do_signal(struct pt_regs *regs)
diff --git a/arch/mn10300/kernel/signal.c b/arch/mn10300/kernel/signal.c
index 4f6d20763061..6ab0bee2a54f 100644
--- a/arch/mn10300/kernel/signal.c
+++ b/arch/mn10300/kernel/signal.c
@@ -461,8 +461,7 @@ static int handle_signal(int sig,
 	if (ret)
 		return;
 
-	block_sigmask(ka, sig);
-	tracehook_signal_handler(sig, info, ka, regs,
+	signal_delivered(sig, info, ka, regs,
 				 test_thread_flag(TIF_SINGLESTEP));
 }
 
diff --git a/arch/openrisc/kernel/signal.c b/arch/openrisc/kernel/signal.c
index 53972b7260b7..30110297f4f9 100644
--- a/arch/openrisc/kernel/signal.c
+++ b/arch/openrisc/kernel/signal.c
@@ -259,8 +259,7 @@ handle_signal(unsigned long sig,
 	if (ret)
 		return;
 
-	block_sigmask(ka, sig);
-	tracehook_signal_handler(sig, info, ka, regs,
+	signal_delivered(sig, info, ka, regs,
 				 test_thread_flag(TIF_SINGLESTEP));
 }
 
diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c
index 25161eaf720d..594459bde14e 100644
--- a/arch/parisc/kernel/signal.c
+++ b/arch/parisc/kernel/signal.c
@@ -449,9 +449,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	if (!setup_rt_frame(sig, ka, info, oldset, regs, in_syscall))
 		return 0;
 
-	block_sigmask(ka, sig);
-
-	tracehook_signal_handler(sig, info, ka, regs, 
+	signal_delivered(sig, info, ka, regs, 
 		test_thread_flag(TIF_SINGLESTEP) ||
 		test_thread_flag(TIF_BLOCKSTEP));
 
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index 129bdffc6daf..5c023c9cf16e 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -148,11 +148,7 @@ static int do_signal(struct pt_regs *regs)
 
 	regs->trap = 0;
 	if (ret) {
-		block_sigmask(&ka, signr);
-		/*
-		 * Let tracing know that we've done the handler setup.
-		 */
-		tracehook_signal_handler(signr, &info, &ka, regs,
+		signal_delivered(signr, &info, &ka, regs,
 					 test_thread_flag(TIF_SINGLESTEP));
 	}
 
diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c
index 923baa96c0b0..3c0c19830c37 100644
--- a/arch/s390/kernel/compat_signal.c
+++ b/arch/s390/kernel/compat_signal.c
@@ -580,11 +580,7 @@ void handle_signal32(unsigned long sig, struct k_sigaction *ka,
 		ret = setup_frame32(sig, ka, oldset, regs);
 	if (ret)
 		return;
-	block_sigmask(ka, sig);
-	/*
-	 * Let tracing know that we've done the handler setup.
-	 */
-	tracehook_signal_handler(sig, info, ka, regs,
+	signal_delivered(sig, info, ka, regs,
 				 test_thread_flag(TIF_SINGLE_STEP));
 }
 
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 8332a6943384..ac565b44aabb 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -375,11 +375,7 @@ static void handle_signal(unsigned long sig, struct k_sigaction *ka,
 		ret = setup_frame(sig, ka, oldset, regs);
 	if (ret)
 		return;
-	block_sigmask(ka, sig);
-	/*
-	 * Let tracing know that we've done the handler setup.
-	 */
-	tracehook_signal_handler(sig, info, ka, regs,
+	signal_delivered(sig, info, ka, regs,
 				 test_thread_flag(TIF_SINGLE_STEP));
 }
 
diff --git a/arch/score/kernel/signal.c b/arch/score/kernel/signal.c
index f1b3fef0907b..e382c52ca0d9 100644
--- a/arch/score/kernel/signal.c
+++ b/arch/score/kernel/signal.c
@@ -267,7 +267,7 @@ static void handle_signal(unsigned long sig, siginfo_t *info,
 	if (setup_rt_frame(ka, regs, sig, sigmask_to_save(), info) < 0)
 		return;
 
-	block_sigmask(ka, sig);
+	signal_delivered(sig, info, ka, regs, 0);
 }
 
 static void do_signal(struct pt_regs *regs)
diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c
index e4a531414e19..d6b7b6154f87 100644
--- a/arch/sh/kernel/signal_32.c
+++ b/arch/sh/kernel/signal_32.c
@@ -533,8 +533,7 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, siginfo_t *info,
 
 	if (ret)
 		return;
-	block_sigmask(ka, sig);
-	tracehook_signal_handler(sig, info, ka, regs,
+	signal_delivered(sig, info, ka, regs,
 			test_thread_flag(TIF_SINGLESTEP));
 }
 
diff --git a/arch/sh/kernel/signal_64.c b/arch/sh/kernel/signal_64.c
index 75960ef6c1d1..6b5b3dfe886b 100644
--- a/arch/sh/kernel/signal_64.c
+++ b/arch/sh/kernel/signal_64.c
@@ -650,8 +650,7 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	if (ret)
 		return;
 
-	block_sigmask(ka, sig);
-	tracehook_signal_handler(sig, info, ka, regs,
+	signal_delivered(sig, info, ka, regs,
 			test_thread_flag(TIF_SINGLESTEP));
 }
 
diff --git a/arch/sparc/kernel/signal32.c b/arch/sparc/kernel/signal32.c
index ba3dbfcdb28e..a53e0a5fd3a3 100644
--- a/arch/sparc/kernel/signal32.c
+++ b/arch/sparc/kernel/signal32.c
@@ -785,8 +785,7 @@ static inline void handle_signal32(unsigned long signr, struct k_sigaction *ka,
 	if (err)
 		return;
 
-	block_sigmask(ka, signr);
-	tracehook_signal_handler(signr, info, ka, regs, 0);
+	signal_delivered(signr, info, ka, regs, 0);
 }
 
 static inline void syscall_restart32(unsigned long orig_i0, struct pt_regs *regs,
diff --git a/arch/sparc/kernel/signal_32.c b/arch/sparc/kernel/signal_32.c
index 1bfa854be602..68f9c8650af4 100644
--- a/arch/sparc/kernel/signal_32.c
+++ b/arch/sparc/kernel/signal_32.c
@@ -460,8 +460,7 @@ handle_signal(unsigned long signr, struct k_sigaction *ka,
 	if (err)
 		return;
 
-	block_sigmask(ka, signr);
-	tracehook_signal_handler(signr, info, ka, regs, 0);
+	signal_delivered(signr, info, ka, regs, 0);
 }
 
 static inline void syscall_restart(unsigned long orig_i0, struct pt_regs *regs,
diff --git a/arch/sparc/kernel/signal_64.c b/arch/sparc/kernel/signal_64.c
index 23b60caa6c43..867de2f8189c 100644
--- a/arch/sparc/kernel/signal_64.c
+++ b/arch/sparc/kernel/signal_64.c
@@ -473,8 +473,7 @@ static inline void handle_signal(unsigned long signr, struct k_sigaction *ka,
 	if (err)
 		return;
 
-	block_sigmask(ka, signr);
-	tracehook_signal_handler(signr, info, ka, regs, 0);
+	signal_delivered(signr, info, ka, regs, 0);
 }
 
 static inline void syscall_restart(unsigned long orig_i0, struct pt_regs *regs,
diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c
index e068aa0c6dfc..e29b0553211d 100644
--- a/arch/tile/kernel/signal.c
+++ b/arch/tile/kernel/signal.c
@@ -278,7 +278,7 @@ static void handle_signal(unsigned long sig, siginfo_t *info,
 		ret = setup_rt_frame(sig, ka, info, oldset, regs);
 	if (ret)
 		return;
-	block_sigmask(ka, sig);
+	signal_delivered(sig, info, ka, regs, 0);
 }
 
 /*
diff --git a/arch/um/kernel/signal.c b/arch/um/kernel/signal.c
index 4ce6ab2d2996..7362d58efc29 100644
--- a/arch/um/kernel/signal.c
+++ b/arch/um/kernel/signal.c
@@ -61,7 +61,7 @@ static void handle_signal(struct pt_regs *regs, unsigned long signr,
 	if (err)
 		force_sigsegv(signr, current);
 	else
-		block_sigmask(ka, signr);
+		signal_delivered(signr, info, ka, regs, 0);
 }
 
 static int kern_do_signal(struct pt_regs *regs)
diff --git a/arch/unicore32/kernel/signal.c b/arch/unicore32/kernel/signal.c
index 4d9c4841989d..8adedb37720a 100644
--- a/arch/unicore32/kernel/signal.c
+++ b/arch/unicore32/kernel/signal.c
@@ -362,10 +362,7 @@ static void handle_signal(unsigned long sig, struct k_sigaction *ka,
 		return;
 	}
 
-	/*
-	 * Block the signal if we were successful.
-	 */
-	block_sigmask(ka, sig);
+	signal_delivered(sig, info, ka, regs, 0);
 }
 
 /*
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 11e206f0f45a..e8a89374d356 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -715,10 +715,8 @@ handle_signal(unsigned long sig, siginfo_t *info, struct k_sigaction *ka,
 	 */
 	regs->flags &= ~X86_EFLAGS_TF;
 
-	block_sigmask(ka, sig);
-
-	tracehook_signal_handler(sig, info, ka, regs,
-				 test_thread_flag(TIF_SINGLESTEP));
+	signal_delivered(sig, info, ka, regs,
+			 test_thread_flag(TIF_SINGLESTEP));
 }
 
 #ifdef CONFIG_X86_32
diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c
index 4da3c6f6d929..b9f8e5850d3a 100644
--- a/arch/xtensa/kernel/signal.c
+++ b/arch/xtensa/kernel/signal.c
@@ -493,7 +493,7 @@ static void do_signal(struct pt_regs *regs)
 		if (ret)
 			return;
 
-		block_sigmask(&ka, signr);
+		signal_delivered(signr, info, ka, regs, 0);
 		if (current->ptrace & PT_SINGLESTEP)
 			task_pt_regs(current)->icountlevel = 1;
 
diff --git a/include/linux/signal.h b/include/linux/signal.h
index 065e76330398..26b424adc842 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -256,7 +256,7 @@ extern int show_unhandled_signals;
 extern int sigsuspend(sigset_t *);
 
 extern int get_signal_to_deliver(siginfo_t *info, struct k_sigaction *return_ka, struct pt_regs *regs, void *cookie);
-extern void block_sigmask(struct k_sigaction *ka, int signr);
+extern void signal_delivered(int sig, siginfo_t *info, struct k_sigaction *ka, struct pt_regs *regs, int stepping);
 extern void exit_signals(struct task_struct *tsk);
 
 extern struct kmem_cache *sighand_cachep;
diff --git a/kernel/signal.c b/kernel/signal.c
index df8d721a9e6f..677102789cf2 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -2368,17 +2368,20 @@ relock:
 }
 
 /**
- * block_sigmask - add @ka's signal mask to current->blocked
- * @ka: action for @signr
- * @signr: signal that has been successfully delivered
+ * signal_delivered - 
+ * @sig:		number of signal being delivered
+ * @info:		siginfo_t of signal being delivered
+ * @ka:			sigaction setting that chose the handler
+ * @regs:		user register state
+ * @stepping:		nonzero if debugger single-step or block-step in use
  *
  * This function should be called when a signal has succesfully been
- * delivered. It adds the mask of signals for @ka to current->blocked
- * so that they are blocked during the execution of the signal
- * handler. In addition, @signr will be blocked unless %SA_NODEFER is
- * set in @ka->sa.sa_flags.
+ * delivered. It updates the blocked signals accordingly (@ka->sa.sa_mask
+ * is always blocked, and the signal itself is blocked unless %SA_NODEFER
+ * is set in @ka->sa.sa_flags.  Tracing is notified.
  */
-void block_sigmask(struct k_sigaction *ka, int signr)
+void signal_delivered(int sig, siginfo_t *info, struct k_sigaction *ka,
+			struct pt_regs *regs, int stepping)
 {
 	sigset_t blocked;
 
@@ -2390,8 +2393,9 @@ void block_sigmask(struct k_sigaction *ka, int signr)
 
 	sigorsets(&blocked, &current->blocked, &ka->sa.sa_mask);
 	if (!(ka->sa.sa_flags & SA_NODEFER))
-		sigaddset(&blocked, signr);
+		sigaddset(&blocked, sig);
 	set_current_blocked(&blocked);
+	tracehook_signal_handler(sig, info, ka, regs, stepping);
 }
 
 /*
-- 
cgit 


From f309532bf3e1cc1b787403d84e3039812a7dbe50 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 2 Jun 2012 15:21:43 -0700
Subject: tty: Revert the tty locking series, it needs more work

This reverts the tty layer change to use per-tty locking, because it's
not correct yet, and fixing it will require some more deep surgery.

The main revert is d29f3ef39be4 ("tty_lock: Localise the lock"), but
there are several smaller commits that built upon it, they also get
reverted here. The list of reverted commits is:

  fde86d310886 - tty: add lockdep annotations
  8f6576ad476b - tty: fix ldisc lock inversion trace
  d3ca8b64b97e - pty: Fix lock inversion
  b1d679afd766 - tty: drop the pty lock during hangup
  abcefe5fc357 - tty/amiserial: Add missing argument for tty_unlock()
  fd11b42e3598 - cris: fix missing tty arg in wait_event_interruptible_tty call
  d29f3ef39be4 - tty_lock: Localise the lock

The revert had a trivial conflict in the 68360serial.c staging driver
that got removed in the meantime.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/tty/amiserial.c      | 14 ++++-----
 drivers/tty/cyclades.c       |  2 +-
 drivers/tty/n_r3964.c        | 11 ++++---
 drivers/tty/pty.c            | 25 +++++++---------
 drivers/tty/serial/crisv10.c |  8 ++---
 drivers/tty/synclink.c       |  4 +--
 drivers/tty/synclink_gt.c    |  4 +--
 drivers/tty/synclinkmp.c     |  4 +--
 drivers/tty/tty_io.c         | 67 +++++++++++++++++------------------------
 drivers/tty/tty_ldisc.c      | 67 ++++++++++++++++++-----------------------
 drivers/tty/tty_mutex.c      | 71 ++++++++++----------------------------------
 drivers/tty/tty_port.c       |  6 ++--
 include/linux/tty.h          | 23 ++++++--------
 net/bluetooth/rfcomm/tty.c   |  4 +--
 14 files changed, 119 insertions(+), 191 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/amiserial.c b/drivers/tty/amiserial.c
index 35819e312624..6cc4358f68c1 100644
--- a/drivers/tty/amiserial.c
+++ b/drivers/tty/amiserial.c
@@ -1033,7 +1033,7 @@ static int get_serial_info(struct tty_struct *tty, struct serial_state *state,
 	if (!retinfo)
 		return -EFAULT;
 	memset(&tmp, 0, sizeof(tmp));
-	tty_lock(tty);
+	tty_lock();
 	tmp.line = tty->index;
 	tmp.port = state->port;
 	tmp.flags = state->tport.flags;
@@ -1042,7 +1042,7 @@ static int get_serial_info(struct tty_struct *tty, struct serial_state *state,
 	tmp.close_delay = state->tport.close_delay;
 	tmp.closing_wait = state->tport.closing_wait;
 	tmp.custom_divisor = state->custom_divisor;
-	tty_unlock(tty);
+	tty_unlock();
 	if (copy_to_user(retinfo,&tmp,sizeof(*retinfo)))
 		return -EFAULT;
 	return 0;
@@ -1059,12 +1059,12 @@ static int set_serial_info(struct tty_struct *tty, struct serial_state *state,
 	if (copy_from_user(&new_serial,new_info,sizeof(new_serial)))
 		return -EFAULT;
 
-	tty_lock(tty);
+	tty_lock();
 	change_spd = ((new_serial.flags ^ port->flags) & ASYNC_SPD_MASK) ||
 		new_serial.custom_divisor != state->custom_divisor;
 	if (new_serial.irq || new_serial.port != state->port ||
 			new_serial.xmit_fifo_size != state->xmit_fifo_size) {
-		tty_unlock(tty);
+		tty_unlock();
 		return -EINVAL;
 	}
   
@@ -1074,7 +1074,7 @@ static int set_serial_info(struct tty_struct *tty, struct serial_state *state,
 		    (new_serial.xmit_fifo_size != state->xmit_fifo_size) ||
 		    ((new_serial.flags & ~ASYNC_USR_MASK) !=
 		     (port->flags & ~ASYNC_USR_MASK))) {
-			tty_unlock(tty);
+			tty_unlock();
 			return -EPERM;
 		}
 		port->flags = ((port->flags & ~ASYNC_USR_MASK) |
@@ -1084,7 +1084,7 @@ static int set_serial_info(struct tty_struct *tty, struct serial_state *state,
 	}
 
 	if (new_serial.baud_base < 9600) {
-		tty_unlock(tty);
+		tty_unlock();
 		return -EINVAL;
 	}
 
@@ -1116,7 +1116,7 @@ check_and_exit:
 		}
 	} else
 		retval = startup(tty, state);
-	tty_unlock(tty);
+	tty_unlock();
 	return retval;
 }
 
diff --git a/drivers/tty/cyclades.c b/drivers/tty/cyclades.c
index 6984e1a2686a..e61cabdd69df 100644
--- a/drivers/tty/cyclades.c
+++ b/drivers/tty/cyclades.c
@@ -1599,7 +1599,7 @@ static int cy_open(struct tty_struct *tty, struct file *filp)
 	 * If the port is the middle of closing, bail out now
 	 */
 	if (tty_hung_up_p(filp) || (info->port.flags & ASYNC_CLOSING)) {
-		wait_event_interruptible_tty(tty, info->port.close_wait,
+		wait_event_interruptible_tty(info->port.close_wait,
 				!(info->port.flags & ASYNC_CLOSING));
 		return (info->port.flags & ASYNC_HUP_NOTIFY) ? -EAGAIN: -ERESTARTSYS;
 	}
diff --git a/drivers/tty/n_r3964.c b/drivers/tty/n_r3964.c
index 656ad93bbc96..5c6c31459a2f 100644
--- a/drivers/tty/n_r3964.c
+++ b/drivers/tty/n_r3964.c
@@ -1065,8 +1065,7 @@ static ssize_t r3964_read(struct tty_struct *tty, struct file *file,
 
 	TRACE_L("read()");
 
-	/* FIXME: should use a private lock */
-	tty_lock(tty);
+	tty_lock();
 
 	pClient = findClient(pInfo, task_pid(current));
 	if (pClient) {
@@ -1078,7 +1077,7 @@ static ssize_t r3964_read(struct tty_struct *tty, struct file *file,
 				goto unlock;
 			}
 			/* block until there is a message: */
-			wait_event_interruptible_tty(tty, pInfo->read_wait,
+			wait_event_interruptible_tty(pInfo->read_wait,
 					(pMsg = remove_msg(pInfo, pClient)));
 		}
 
@@ -1108,7 +1107,7 @@ static ssize_t r3964_read(struct tty_struct *tty, struct file *file,
 	}
 	ret = -EPERM;
 unlock:
-	tty_unlock(tty);
+	tty_unlock();
 	return ret;
 }
 
@@ -1157,7 +1156,7 @@ static ssize_t r3964_write(struct tty_struct *tty, struct file *file,
 	pHeader->locks = 0;
 	pHeader->owner = NULL;
 
-	tty_lock(tty);
+	tty_lock();
 
 	pClient = findClient(pInfo, task_pid(current));
 	if (pClient) {
@@ -1176,7 +1175,7 @@ static ssize_t r3964_write(struct tty_struct *tty, struct file *file,
 	add_tx_queue(pInfo, pHeader);
 	trigger_transmit(pInfo);
 
-	tty_unlock(tty);
+	tty_unlock();
 
 	return 0;
 }
diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index 65c7c62c7aae..5505ffc91da4 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -47,7 +47,6 @@ static void pty_close(struct tty_struct *tty, struct file *filp)
 	wake_up_interruptible(&tty->read_wait);
 	wake_up_interruptible(&tty->write_wait);
 	tty->packet = 0;
-	/* Review - krefs on tty_link ?? */
 	if (!tty->link)
 		return;
 	tty->link->packet = 0;
@@ -63,9 +62,9 @@ static void pty_close(struct tty_struct *tty, struct file *filp)
 		        mutex_unlock(&devpts_mutex);
 		}
 #endif
-		tty_unlock(tty);
+		tty_unlock();
 		tty_vhangup(tty->link);
-		tty_lock(tty);
+		tty_lock();
 	}
 }
 
@@ -623,27 +622,26 @@ static int ptmx_open(struct inode *inode, struct file *filp)
 		return retval;
 
 	/* find a device that is not in use. */
-	mutex_lock(&devpts_mutex);
+	tty_lock();
 	index = devpts_new_index(inode);
+	tty_unlock();
 	if (index < 0) {
 		retval = index;
 		goto err_file;
 	}
 
-	mutex_unlock(&devpts_mutex);
-
 	mutex_lock(&tty_mutex);
+	mutex_lock(&devpts_mutex);
 	tty = tty_init_dev(ptm_driver, index);
+	mutex_unlock(&devpts_mutex);
+	tty_lock();
+	mutex_unlock(&tty_mutex);
 
 	if (IS_ERR(tty)) {
 		retval = PTR_ERR(tty);
 		goto out;
 	}
 
-	/* The tty returned here is locked so we can safely
-	   drop the mutex */
-	mutex_unlock(&tty_mutex);
-
 	set_bit(TTY_PTY_LOCK, &tty->flags); /* LOCK THE SLAVE */
 
 	tty_add_file(tty, filp);
@@ -656,17 +654,16 @@ static int ptmx_open(struct inode *inode, struct file *filp)
 	if (retval)
 		goto err_release;
 
-	tty_unlock(tty);
+	tty_unlock();
 	return 0;
 err_release:
-	tty_unlock(tty);
+	tty_unlock();
 	tty_release(inode, filp);
 	return retval;
 out:
-	mutex_unlock(&tty_mutex);
 	devpts_kill_index(inode, index);
+	tty_unlock();
 err_file:
-        mutex_unlock(&devpts_mutex);
 	tty_free_file(filp);
 	return retval;
 }
diff --git a/drivers/tty/serial/crisv10.c b/drivers/tty/serial/crisv10.c
index 7264d4d26717..80b6b1b1f725 100644
--- a/drivers/tty/serial/crisv10.c
+++ b/drivers/tty/serial/crisv10.c
@@ -3976,7 +3976,7 @@ block_til_ready(struct tty_struct *tty, struct file * filp,
 	 */
 	if (tty_hung_up_p(filp) ||
 	    (info->flags & ASYNC_CLOSING)) {
-		wait_event_interruptible_tty(tty, info->close_wait,
+		wait_event_interruptible_tty(info->close_wait,
 			!(info->flags & ASYNC_CLOSING));
 #ifdef SERIAL_DO_RESTART
 		if (info->flags & ASYNC_HUP_NOTIFY)
@@ -4052,9 +4052,9 @@ block_til_ready(struct tty_struct *tty, struct file * filp,
 		printk("block_til_ready blocking: ttyS%d, count = %d\n",
 		       info->line, info->count);
 #endif
-		tty_unlock(tty);
+		tty_unlock();
 		schedule();
-		tty_lock(tty);
+		tty_lock();
 	}
 	set_current_state(TASK_RUNNING);
 	remove_wait_queue(&info->open_wait, &wait);
@@ -4115,7 +4115,7 @@ rs_open(struct tty_struct *tty, struct file * filp)
 	 */
 	if (tty_hung_up_p(filp) ||
 	    (info->flags & ASYNC_CLOSING)) {
-		wait_event_interruptible_tty(tty, info->close_wait,
+		wait_event_interruptible_tty(info->close_wait,
 			!(info->flags & ASYNC_CLOSING));
 #ifdef SERIAL_DO_RESTART
 		return ((info->flags & ASYNC_HUP_NOTIFY) ?
diff --git a/drivers/tty/synclink.c b/drivers/tty/synclink.c
index 5ed0daae6564..593d40ad0a6b 100644
--- a/drivers/tty/synclink.c
+++ b/drivers/tty/synclink.c
@@ -3338,9 +3338,9 @@ static int block_til_ready(struct tty_struct *tty, struct file * filp,
 			printk("%s(%d):block_til_ready blocking on %s count=%d\n",
 				 __FILE__,__LINE__, tty->driver->name, port->count );
 				 
-		tty_unlock(tty);
+		tty_unlock();
 		schedule();
-		tty_lock(tty);
+		tty_lock();
 	}
 	
 	set_current_state(TASK_RUNNING);
diff --git a/drivers/tty/synclink_gt.c b/drivers/tty/synclink_gt.c
index 45b43f11ca39..aa1debf97cc7 100644
--- a/drivers/tty/synclink_gt.c
+++ b/drivers/tty/synclink_gt.c
@@ -3336,9 +3336,9 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 		}
 
 		DBGINFO(("%s block_til_ready wait\n", tty->driver->name));
-		tty_unlock(tty);
+		tty_unlock();
 		schedule();
-		tty_lock(tty);
+		tty_lock();
 	}
 
 	set_current_state(TASK_RUNNING);
diff --git a/drivers/tty/synclinkmp.c b/drivers/tty/synclinkmp.c
index 4a1e4f07765b..a3dddc12d2fe 100644
--- a/drivers/tty/synclinkmp.c
+++ b/drivers/tty/synclinkmp.c
@@ -3357,9 +3357,9 @@ static int block_til_ready(struct tty_struct *tty, struct file *filp,
 			printk("%s(%d):%s block_til_ready() count=%d\n",
 				 __FILE__,__LINE__, tty->driver->name, port->count );
 
-		tty_unlock(tty);
+		tty_unlock();
 		schedule();
-		tty_lock(tty);
+		tty_lock();
 	}
 
 	set_current_state(TASK_RUNNING);
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 9e930c009bf2..b425c79675ad 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -185,7 +185,6 @@ void free_tty_struct(struct tty_struct *tty)
 		put_device(tty->dev);
 	kfree(tty->write_buf);
 	tty_buffer_free_all(tty);
-	tty->magic = 0xDEADDEAD;
 	kfree(tty);
 }
 
@@ -574,7 +573,7 @@ void __tty_hangup(struct tty_struct *tty)
 	}
 	spin_unlock(&redirect_lock);
 
-	tty_lock(tty);
+	tty_lock();
 
 	/* some functions below drop BTM, so we need this bit */
 	set_bit(TTY_HUPPING, &tty->flags);
@@ -667,7 +666,7 @@ void __tty_hangup(struct tty_struct *tty)
 	clear_bit(TTY_HUPPING, &tty->flags);
 	tty_ldisc_enable(tty);
 
-	tty_unlock(tty);
+	tty_unlock();
 
 	if (f)
 		fput(f);
@@ -1104,12 +1103,12 @@ void tty_write_message(struct tty_struct *tty, char *msg)
 {
 	if (tty) {
 		mutex_lock(&tty->atomic_write_lock);
-		tty_lock(tty);
+		tty_lock();
 		if (tty->ops->write && !test_bit(TTY_CLOSING, &tty->flags)) {
-			tty_unlock(tty);
+			tty_unlock();
 			tty->ops->write(tty, msg, strlen(msg));
 		} else
-			tty_unlock(tty);
+			tty_unlock();
 		tty_write_unlock(tty);
 	}
 	return;
@@ -1404,7 +1403,6 @@ struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx)
 	}
 	initialize_tty_struct(tty, driver, idx);
 
-	tty_lock(tty);
 	retval = tty_driver_install_tty(driver, tty);
 	if (retval < 0)
 		goto err_deinit_tty;
@@ -1417,11 +1415,9 @@ struct tty_struct *tty_init_dev(struct tty_driver *driver, int idx)
 	retval = tty_ldisc_setup(tty, tty->link);
 	if (retval)
 		goto err_release_tty;
-	/* Return the tty locked so that it cannot vanish under the caller */
 	return tty;
 
 err_deinit_tty:
-	tty_unlock(tty);
 	deinitialize_tty_struct(tty);
 	free_tty_struct(tty);
 err_module_put:
@@ -1430,7 +1426,6 @@ err_module_put:
 
 	/* call the tty release_tty routine to clean out this slot */
 err_release_tty:
-	tty_unlock(tty);
 	printk_ratelimited(KERN_INFO "tty_init_dev: ldisc open failed, "
 				 "clearing slot %d\n", idx);
 	release_tty(tty, idx);
@@ -1633,7 +1628,7 @@ int tty_release(struct inode *inode, struct file *filp)
 	if (tty_paranoia_check(tty, inode, __func__))
 		return 0;
 
-	tty_lock(tty);
+	tty_lock();
 	check_tty_count(tty, __func__);
 
 	__tty_fasync(-1, filp, 0);
@@ -1642,11 +1637,10 @@ int tty_release(struct inode *inode, struct file *filp)
 	pty_master = (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
 		      tty->driver->subtype == PTY_TYPE_MASTER);
 	devpts = (tty->driver->flags & TTY_DRIVER_DEVPTS_MEM) != 0;
-	/* Review: parallel close */
 	o_tty = tty->link;
 
 	if (tty_release_checks(tty, o_tty, idx)) {
-		tty_unlock(tty);
+		tty_unlock();
 		return 0;
 	}
 
@@ -1658,7 +1652,7 @@ int tty_release(struct inode *inode, struct file *filp)
 	if (tty->ops->close)
 		tty->ops->close(tty, filp);
 
-	tty_unlock(tty);
+	tty_unlock();
 	/*
 	 * Sanity check: if tty->count is going to zero, there shouldn't be
 	 * any waiters on tty->read_wait or tty->write_wait.  We test the
@@ -1681,7 +1675,7 @@ int tty_release(struct inode *inode, struct file *filp)
 		   opens on /dev/tty */
 
 		mutex_lock(&tty_mutex);
-		tty_lock_pair(tty, o_tty);
+		tty_lock();
 		tty_closing = tty->count <= 1;
 		o_tty_closing = o_tty &&
 			(o_tty->count <= (pty_master ? 1 : 0));
@@ -1712,7 +1706,7 @@ int tty_release(struct inode *inode, struct file *filp)
 
 		printk(KERN_WARNING "%s: %s: read/write wait queue active!\n",
 				__func__, tty_name(tty, buf));
-		tty_unlock_pair(tty, o_tty);
+		tty_unlock();
 		mutex_unlock(&tty_mutex);
 		schedule();
 	}
@@ -1775,7 +1769,7 @@ int tty_release(struct inode *inode, struct file *filp)
 
 	/* check whether both sides are closing ... */
 	if (!tty_closing || (o_tty && !o_tty_closing)) {
-		tty_unlock_pair(tty, o_tty);
+		tty_unlock();
 		return 0;
 	}
 
@@ -1788,16 +1782,14 @@ int tty_release(struct inode *inode, struct file *filp)
 	tty_ldisc_release(tty, o_tty);
 	/*
 	 * The release_tty function takes care of the details of clearing
-	 * the slots and preserving the termios structure. The tty_unlock_pair
-	 * should be safe as we keep a kref while the tty is locked (so the
-	 * unlock never unlocks a freed tty).
+	 * the slots and preserving the termios structure.
 	 */
 	release_tty(tty, idx);
-	tty_unlock_pair(tty, o_tty);
 
 	/* Make this pty number available for reallocation */
 	if (devpts)
 		devpts_kill_index(inode, idx);
+	tty_unlock();
 	return 0;
 }
 
@@ -1901,9 +1893,6 @@ static struct tty_driver *tty_lookup_driver(dev_t device, struct file *filp,
  *	Locking: tty_mutex protects tty, tty_lookup_driver and tty_init_dev.
  *		 tty->count should protect the rest.
  *		 ->siglock protects ->signal/->sighand
- *
- *	Note: the tty_unlock/lock cases without a ref are only safe due to
- *	tty_mutex
  */
 
 static int tty_open(struct inode *inode, struct file *filp)
@@ -1927,7 +1916,8 @@ retry_open:
 	retval = 0;
 
 	mutex_lock(&tty_mutex);
-	/* This is protected by the tty_mutex */
+	tty_lock();
+
 	tty = tty_open_current_tty(device, filp);
 	if (IS_ERR(tty)) {
 		retval = PTR_ERR(tty);
@@ -1948,19 +1938,17 @@ retry_open:
 	}
 
 	if (tty) {
-		tty_lock(tty);
 		retval = tty_reopen(tty);
-		if (retval < 0) {
-			tty_unlock(tty);
+		if (retval)
 			tty = ERR_PTR(retval);
-		}
-	} else	/* Returns with the tty_lock held for now */
+	} else
 		tty = tty_init_dev(driver, index);
 
 	mutex_unlock(&tty_mutex);
 	if (driver)
 		tty_driver_kref_put(driver);
 	if (IS_ERR(tty)) {
+		tty_unlock();
 		retval = PTR_ERR(tty);
 		goto err_file;
 	}
@@ -1989,7 +1977,7 @@ retry_open:
 		printk(KERN_DEBUG "%s: error %d in opening %s...\n", __func__,
 				retval, tty->name);
 #endif
-		tty_unlock(tty); /* need to call tty_release without BTM */
+		tty_unlock(); /* need to call tty_release without BTM */
 		tty_release(inode, filp);
 		if (retval != -ERESTARTSYS)
 			return retval;
@@ -2001,15 +1989,17 @@ retry_open:
 		/*
 		 * Need to reset f_op in case a hangup happened.
 		 */
+		tty_lock();
 		if (filp->f_op == &hung_up_tty_fops)
 			filp->f_op = &tty_fops;
+		tty_unlock();
 		goto retry_open;
 	}
-	tty_unlock(tty);
+	tty_unlock();
 
 
 	mutex_lock(&tty_mutex);
-	tty_lock(tty);
+	tty_lock();
 	spin_lock_irq(&current->sighand->siglock);
 	if (!noctty &&
 	    current->signal->leader &&
@@ -2017,10 +2007,11 @@ retry_open:
 	    tty->session == NULL)
 		__proc_set_tty(current, tty);
 	spin_unlock_irq(&current->sighand->siglock);
-	tty_unlock(tty);
+	tty_unlock();
 	mutex_unlock(&tty_mutex);
 	return 0;
 err_unlock:
+	tty_unlock();
 	mutex_unlock(&tty_mutex);
 	/* after locks to avoid deadlock */
 	if (!IS_ERR_OR_NULL(driver))
@@ -2103,13 +2094,10 @@ out:
 
 static int tty_fasync(int fd, struct file *filp, int on)
 {
-	struct tty_struct *tty = file_tty(filp);
 	int retval;
-
-	tty_lock(tty);
+	tty_lock();
 	retval = __tty_fasync(fd, filp, on);
-	tty_unlock(tty);
-
+	tty_unlock();
 	return retval;
 }
 
@@ -2946,7 +2934,6 @@ void initialize_tty_struct(struct tty_struct *tty,
 	tty->pgrp = NULL;
 	tty->overrun_time = jiffies;
 	tty_buffer_init(tty);
-	mutex_init(&tty->legacy_mutex);
 	mutex_init(&tty->termios_mutex);
 	mutex_init(&tty->ldisc_mutex);
 	init_waitqueue_head(&tty->write_wait);
diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
index ba8be396a621..9911eb6b34cd 100644
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -568,7 +568,7 @@ int tty_set_ldisc(struct tty_struct *tty, int ldisc)
 	if (IS_ERR(new_ldisc))
 		return PTR_ERR(new_ldisc);
 
-	tty_lock(tty);
+	tty_lock();
 	/*
 	 *	We need to look at the tty locking here for pty/tty pairs
 	 *	when both sides try to change in parallel.
@@ -582,12 +582,12 @@ int tty_set_ldisc(struct tty_struct *tty, int ldisc)
 	 */
 
 	if (tty->ldisc->ops->num == ldisc) {
-		tty_unlock(tty);
+		tty_unlock();
 		tty_ldisc_put(new_ldisc);
 		return 0;
 	}
 
-	tty_unlock(tty);
+	tty_unlock();
 	/*
 	 *	Problem: What do we do if this blocks ?
 	 *	We could deadlock here
@@ -595,7 +595,7 @@ int tty_set_ldisc(struct tty_struct *tty, int ldisc)
 
 	tty_wait_until_sent(tty, 0);
 
-	tty_lock(tty);
+	tty_lock();
 	mutex_lock(&tty->ldisc_mutex);
 
 	/*
@@ -605,10 +605,10 @@ int tty_set_ldisc(struct tty_struct *tty, int ldisc)
 
 	while (test_bit(TTY_LDISC_CHANGING, &tty->flags)) {
 		mutex_unlock(&tty->ldisc_mutex);
-		tty_unlock(tty);
+		tty_unlock();
 		wait_event(tty_ldisc_wait,
 			test_bit(TTY_LDISC_CHANGING, &tty->flags) == 0);
-		tty_lock(tty);
+		tty_lock();
 		mutex_lock(&tty->ldisc_mutex);
 	}
 
@@ -623,7 +623,7 @@ int tty_set_ldisc(struct tty_struct *tty, int ldisc)
 
 	o_ldisc = tty->ldisc;
 
-	tty_unlock(tty);
+	tty_unlock();
 	/*
 	 *	Make sure we don't change while someone holds a
 	 *	reference to the line discipline. The TTY_LDISC bit
@@ -650,7 +650,7 @@ int tty_set_ldisc(struct tty_struct *tty, int ldisc)
 
 	retval = tty_ldisc_wait_idle(tty, 5 * HZ);
 
-	tty_lock(tty);
+	tty_lock();
 	mutex_lock(&tty->ldisc_mutex);
 
 	/* handle wait idle failure locked */
@@ -665,7 +665,7 @@ int tty_set_ldisc(struct tty_struct *tty, int ldisc)
 		clear_bit(TTY_LDISC_CHANGING, &tty->flags);
 		mutex_unlock(&tty->ldisc_mutex);
 		tty_ldisc_put(new_ldisc);
-		tty_unlock(tty);
+		tty_unlock();
 		return -EIO;
 	}
 
@@ -708,7 +708,7 @@ enable:
 	if (o_work)
 		schedule_work(&o_tty->buf.work);
 	mutex_unlock(&tty->ldisc_mutex);
-	tty_unlock(tty);
+	tty_unlock();
 	return retval;
 }
 
@@ -816,11 +816,11 @@ void tty_ldisc_hangup(struct tty_struct *tty)
 	 * need to wait for another function taking the BTM
 	 */
 	clear_bit(TTY_LDISC, &tty->flags);
-	tty_unlock(tty);
+	tty_unlock();
 	cancel_work_sync(&tty->buf.work);
 	mutex_unlock(&tty->ldisc_mutex);
 retry:
-	tty_lock(tty);
+	tty_lock();
 	mutex_lock(&tty->ldisc_mutex);
 
 	/* At this point we have a closed ldisc and we want to
@@ -831,7 +831,7 @@ retry:
 		if (atomic_read(&tty->ldisc->users) != 1) {
 			char cur_n[TASK_COMM_LEN], tty_n[64];
 			long timeout = 3 * HZ;
-			tty_unlock(tty);
+			tty_unlock();
 
 			while (tty_ldisc_wait_idle(tty, timeout) == -EBUSY) {
 				timeout = MAX_SCHEDULE_TIMEOUT;
@@ -894,23 +894,6 @@ int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty)
 	tty_ldisc_enable(tty);
 	return 0;
 }
-
-static void tty_ldisc_kill(struct tty_struct *tty)
-{
-	mutex_lock(&tty->ldisc_mutex);
-	/*
-	 * Now kill off the ldisc
-	 */
-	tty_ldisc_close(tty, tty->ldisc);
-	tty_ldisc_put(tty->ldisc);
-	/* Force an oops if we mess this up */
-	tty->ldisc = NULL;
-
-	/* Ensure the next open requests the N_TTY ldisc */
-	tty_set_termios_ldisc(tty, N_TTY);
-	mutex_unlock(&tty->ldisc_mutex);
-}
-
 /**
  *	tty_ldisc_release		-	release line discipline
  *	@tty: tty being shut down
@@ -929,19 +912,27 @@ void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty)
 	 * race with the set_ldisc code path.
 	 */
 
-	tty_unlock_pair(tty, o_tty);
+	tty_unlock();
 	tty_ldisc_halt(tty);
 	tty_ldisc_flush_works(tty);
-	if (o_tty) {
-		tty_ldisc_halt(o_tty);
-		tty_ldisc_flush_works(o_tty);
-	}
-	tty_lock_pair(tty, o_tty);
+	tty_lock();
 
+	mutex_lock(&tty->ldisc_mutex);
+	/*
+	 * Now kill off the ldisc
+	 */
+	tty_ldisc_close(tty, tty->ldisc);
+	tty_ldisc_put(tty->ldisc);
+	/* Force an oops if we mess this up */
+	tty->ldisc = NULL;
+
+	/* Ensure the next open requests the N_TTY ldisc */
+	tty_set_termios_ldisc(tty, N_TTY);
+	mutex_unlock(&tty->ldisc_mutex);
 
-	tty_ldisc_kill(tty);
+	/* This will need doing differently if we need to lock */
 	if (o_tty)
-		tty_ldisc_kill(o_tty);
+		tty_ldisc_release(o_tty, NULL);
 
 	/* And the memory resources remaining (buffers, termios) will be
 	   disposed of when the kref hits zero */
diff --git a/drivers/tty/tty_mutex.c b/drivers/tty/tty_mutex.c
index 67feac9e6ebb..9ff986c32a21 100644
--- a/drivers/tty/tty_mutex.c
+++ b/drivers/tty/tty_mutex.c
@@ -4,70 +4,29 @@
 #include <linux/semaphore.h>
 #include <linux/sched.h>
 
-/* Legacy tty mutex glue */
-
-enum {
-	TTY_MUTEX_NORMAL,
-	TTY_MUTEX_NESTED,
-};
+/*
+ * The 'big tty mutex'
+ *
+ * This mutex is taken and released by tty_lock() and tty_unlock(),
+ * replacing the older big kernel lock.
+ * It can no longer be taken recursively, and does not get
+ * released implicitly while sleeping.
+ *
+ * Don't use in new code.
+ */
+static DEFINE_MUTEX(big_tty_mutex);
 
 /*
  * Getting the big tty mutex.
  */
-
-static void __lockfunc tty_lock_nested(struct tty_struct *tty,
-				       unsigned int subclass)
+void __lockfunc tty_lock(void)
 {
-	if (tty->magic != TTY_MAGIC) {
-		printk(KERN_ERR "L Bad %p\n", tty);
-		WARN_ON(1);
-		return;
-	}
-	tty_kref_get(tty);
-	mutex_lock_nested(&tty->legacy_mutex, subclass);
-}
-
-void __lockfunc tty_lock(struct tty_struct *tty)
-{
-	return tty_lock_nested(tty, TTY_MUTEX_NORMAL);
+	mutex_lock(&big_tty_mutex);
 }
 EXPORT_SYMBOL(tty_lock);
 
-void __lockfunc tty_unlock(struct tty_struct *tty)
+void __lockfunc tty_unlock(void)
 {
-	if (tty->magic != TTY_MAGIC) {
-		printk(KERN_ERR "U Bad %p\n", tty);
-		WARN_ON(1);
-		return;
-	}
-	mutex_unlock(&tty->legacy_mutex);
-	tty_kref_put(tty);
+	mutex_unlock(&big_tty_mutex);
 }
 EXPORT_SYMBOL(tty_unlock);
-
-/*
- * Getting the big tty mutex for a pair of ttys with lock ordering
- * On a non pty/tty pair tty2 can be NULL which is just fine.
- */
-void __lockfunc tty_lock_pair(struct tty_struct *tty,
-					struct tty_struct *tty2)
-{
-	if (tty < tty2) {
-		tty_lock(tty);
-		tty_lock_nested(tty2, TTY_MUTEX_NESTED);
-	} else {
-		if (tty2 && tty2 != tty)
-			tty_lock(tty2);
-		tty_lock_nested(tty, TTY_MUTEX_NESTED);
-	}
-}
-EXPORT_SYMBOL(tty_lock_pair);
-
-void __lockfunc tty_unlock_pair(struct tty_struct *tty,
-						struct tty_struct *tty2)
-{
-	tty_unlock(tty);
-	if (tty2 && tty2 != tty)
-		tty_unlock(tty2);
-}
-EXPORT_SYMBOL(tty_unlock_pair);
diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c
index d9cca95a5452..bf6e238146ae 100644
--- a/drivers/tty/tty_port.c
+++ b/drivers/tty/tty_port.c
@@ -230,7 +230,7 @@ int tty_port_block_til_ready(struct tty_port *port,
 
 	/* block if port is in the process of being closed */
 	if (tty_hung_up_p(filp) || port->flags & ASYNC_CLOSING) {
-		wait_event_interruptible_tty(tty, port->close_wait,
+		wait_event_interruptible_tty(port->close_wait,
 				!(port->flags & ASYNC_CLOSING));
 		if (port->flags & ASYNC_HUP_NOTIFY)
 			return -EAGAIN;
@@ -296,9 +296,9 @@ int tty_port_block_til_ready(struct tty_port *port,
 			retval = -ERESTARTSYS;
 			break;
 		}
-		tty_unlock(tty);
+		tty_unlock();
 		schedule();
-		tty_lock(tty);
+		tty_lock();
 	}
 	finish_wait(&port->open_wait, &wait);
 
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 4990ef2b1fb7..9f47ab540f65 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -268,7 +268,6 @@ struct tty_struct {
 	struct mutex ldisc_mutex;
 	struct tty_ldisc *ldisc;
 
-	struct mutex legacy_mutex;
 	struct mutex termios_mutex;
 	spinlock_t ctrl_lock;
 	/* Termios values are protected by the termios mutex */
@@ -606,12 +605,8 @@ extern long vt_compat_ioctl(struct tty_struct *tty,
 
 /* tty_mutex.c */
 /* functions for preparation of BKL removal */
-extern void __lockfunc tty_lock(struct tty_struct *tty);
-extern void __lockfunc tty_unlock(struct tty_struct *tty);
-extern void __lockfunc tty_lock_pair(struct tty_struct *tty,
-				struct tty_struct *tty2);
-extern void __lockfunc tty_unlock_pair(struct tty_struct *tty,
-				struct tty_struct *tty2);
+extern void __lockfunc tty_lock(void) __acquires(tty_lock);
+extern void __lockfunc tty_unlock(void) __releases(tty_lock);
 
 /*
  * this shall be called only from where BTM is held (like close)
@@ -626,9 +621,9 @@ extern void __lockfunc tty_unlock_pair(struct tty_struct *tty,
 static inline void tty_wait_until_sent_from_close(struct tty_struct *tty,
 		long timeout)
 {
-	tty_unlock(tty); /* tty->ops->close holds the BTM, drop it while waiting */
+	tty_unlock(); /* tty->ops->close holds the BTM, drop it while waiting */
 	tty_wait_until_sent(tty, timeout);
-	tty_lock(tty);
+	tty_lock();
 }
 
 /*
@@ -643,16 +638,16 @@ static inline void tty_wait_until_sent_from_close(struct tty_struct *tty,
  *
  * Do not use in new code.
  */
-#define wait_event_interruptible_tty(tty, wq, condition)		\
+#define wait_event_interruptible_tty(wq, condition)			\
 ({									\
 	int __ret = 0;							\
 	if (!(condition)) {						\
-		__wait_event_interruptible_tty(tty, wq, condition, __ret);	\
+		__wait_event_interruptible_tty(wq, condition, __ret);	\
 	}								\
 	__ret;								\
 })
 
-#define __wait_event_interruptible_tty(tty, wq, condition, ret)		\
+#define __wait_event_interruptible_tty(wq, condition, ret)		\
 do {									\
 	DEFINE_WAIT(__wait);						\
 									\
@@ -661,9 +656,9 @@ do {									\
 		if (condition)						\
 			break;						\
 		if (!signal_pending(current)) {				\
-			tty_unlock(tty);					\
+			tty_unlock();					\
 			schedule();					\
-			tty_lock(tty);					\
+			tty_lock();					\
 			continue;					\
 		}							\
 		ret = -ERESTARTSYS;					\
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index aa5d73b786ac..d1820ff14aee 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -710,9 +710,9 @@ static int rfcomm_tty_open(struct tty_struct *tty, struct file *filp)
 			break;
 		}
 
-		tty_unlock(tty);
+		tty_unlock();
 		schedule();
-		tty_lock(tty);
+		tty_lock();
 	}
 	set_current_state(TASK_RUNNING);
 	remove_wait_queue(&dev->wait, &wait);
-- 
cgit 


From 2f9d3df8aa1cc3c6db5cfa0bad3f0745e04cc27d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 3 Jun 2012 14:50:19 -0700
Subject: vfs: move inode stat information closer together

The comment above it says "Stat data, not accessed from path walking",
but in fact some of inode fields we use for the common stat data was way
down at the end of the inode, causing unnecessary cache misses for the
common stat operations.

The inode structure is pretty big, and this can change padding depending
on field width, but at least on the common 64-bit configurations this
doesn't change the size.  Some of our inode layout has historically been
to tro to avoid unnecessary padding fields, but cache locality is at
least as important for layout, if not more.

Noticed by looking at kernel profiles, and noticing that the "i_blkbits"
access stood out like a sore thumb.

Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 51978ed43e97..17fd887c798f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -802,13 +802,14 @@ struct inode {
 		unsigned int __i_nlink;
 	};
 	dev_t			i_rdev;
+	loff_t			i_size;
 	struct timespec		i_atime;
 	struct timespec		i_mtime;
 	struct timespec		i_ctime;
 	spinlock_t		i_lock;	/* i_blocks, i_bytes, maybe i_size */
 	unsigned short          i_bytes;
+	unsigned int		i_blkbits;
 	blkcnt_t		i_blocks;
-	loff_t			i_size;
 
 #ifdef __NEED_I_SIZE_ORDERED
 	seqcount_t		i_size_seqcount;
@@ -828,9 +829,8 @@ struct inode {
 		struct list_head	i_dentry;
 		struct rcu_head		i_rcu;
 	};
-	atomic_t		i_count;
-	unsigned int		i_blkbits;
 	u64			i_version;
+	atomic_t		i_count;
 	atomic_t		i_dio_count;
 	atomic_t		i_writecount;
 	const struct file_operations	*i_fop;	/* former ->i_op->default_file_ops */
-- 
cgit 


From 68e3e92620c323703bc7db75c2ba15239ee85c39 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sun, 3 Jun 2012 20:05:57 -0700
Subject: Revert "mm: compaction: handle incorrect MIGRATE_UNMOVABLE type
 pageblocks"

This reverts commit 5ceb9ce6fe9462a298bb2cd5c9f1ca6cb80a0199.

That commit seems to be the cause of the mm compation list corruption
issues that Dave Jones reported.  The locking (or rather, absense
there-of) is dubious, as is the use of the 'page' variable once it has
been found to be outside the pageblock range.

So revert it for now, we can re-visit this for 3.6.  If we even need to:
as Minchan Kim says, "The patch wasn't a bug fix and even test workload
was very theoretical".

Reported-and-tested-by: Dave Jones <davej@redhat.com>
Acked-by: Hugh Dickins <hughd@google.com>
Acked-by: KOSAKI Motohiro <kosaki.motohiro@gmail.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compaction.h |  19 ------
 mm/compaction.c            | 142 ++++++++-------------------------------------
 mm/internal.h              |   9 +--
 mm/page_alloc.c            |   8 +--
 4 files changed, 28 insertions(+), 150 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index e988037abd2a..51a90b7f2d60 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -1,8 +1,6 @@
 #ifndef _LINUX_COMPACTION_H
 #define _LINUX_COMPACTION_H
 
-#include <linux/node.h>
-
 /* Return values for compact_zone() and try_to_compact_pages() */
 /* compaction didn't start as it was not possible or direct reclaim was more suitable */
 #define COMPACT_SKIPPED		0
@@ -13,23 +11,6 @@
 /* The full zone was compacted */
 #define COMPACT_COMPLETE	3
 
-/*
- * compaction supports three modes
- *
- * COMPACT_ASYNC_MOVABLE uses asynchronous migration and only scans
- *    MIGRATE_MOVABLE pageblocks as migration sources and targets.
- * COMPACT_ASYNC_UNMOVABLE uses asynchronous migration and only scans
- *    MIGRATE_MOVABLE pageblocks as migration sources.
- *    MIGRATE_UNMOVABLE pageblocks are scanned as potential migration
- *    targets and convers them to MIGRATE_MOVABLE if possible
- * COMPACT_SYNC uses synchronous migration and scans all pageblocks
- */
-enum compact_mode {
-	COMPACT_ASYNC_MOVABLE,
-	COMPACT_ASYNC_UNMOVABLE,
-	COMPACT_SYNC,
-};
-
 #ifdef CONFIG_COMPACTION
 extern int sysctl_compact_memory;
 extern int sysctl_compaction_handler(struct ctl_table *table, int write,
diff --git a/mm/compaction.c b/mm/compaction.c
index 4ac338af5120..7ea259d82a99 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -236,7 +236,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
 	 */
 	while (unlikely(too_many_isolated(zone))) {
 		/* async migration should just abort */
-		if (cc->mode != COMPACT_SYNC)
+		if (!cc->sync)
 			return 0;
 
 		congestion_wait(BLK_RW_ASYNC, HZ/10);
@@ -304,8 +304,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
 		 * satisfies the allocation
 		 */
 		pageblock_nr = low_pfn >> pageblock_order;
-		if (cc->mode != COMPACT_SYNC &&
-		    last_pageblock_nr != pageblock_nr &&
+		if (!cc->sync && last_pageblock_nr != pageblock_nr &&
 		    !migrate_async_suitable(get_pageblock_migratetype(page))) {
 			low_pfn += pageblock_nr_pages;
 			low_pfn = ALIGN(low_pfn, pageblock_nr_pages) - 1;
@@ -326,7 +325,7 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
 			continue;
 		}
 
-		if (cc->mode != COMPACT_SYNC)
+		if (!cc->sync)
 			mode |= ISOLATE_ASYNC_MIGRATE;
 
 		lruvec = mem_cgroup_page_lruvec(page, zone);
@@ -361,90 +360,27 @@ isolate_migratepages_range(struct zone *zone, struct compact_control *cc,
 
 #endif /* CONFIG_COMPACTION || CONFIG_CMA */
 #ifdef CONFIG_COMPACTION
-/*
- * Returns true if MIGRATE_UNMOVABLE pageblock was successfully
- * converted to MIGRATE_MOVABLE type, false otherwise.
- */
-static bool rescue_unmovable_pageblock(struct page *page)
-{
-	unsigned long pfn, start_pfn, end_pfn;
-	struct page *start_page, *end_page;
-
-	pfn = page_to_pfn(page);
-	start_pfn = pfn & ~(pageblock_nr_pages - 1);
-	end_pfn = start_pfn + pageblock_nr_pages;
-
-	start_page = pfn_to_page(start_pfn);
-	end_page = pfn_to_page(end_pfn);
-
-	/* Do not deal with pageblocks that overlap zones */
-	if (page_zone(start_page) != page_zone(end_page))
-		return false;
-
-	for (page = start_page, pfn = start_pfn; page < end_page; pfn++,
-								  page++) {
-		if (!pfn_valid_within(pfn))
-			continue;
-
-		if (PageBuddy(page)) {
-			int order = page_order(page);
-
-			pfn += (1 << order) - 1;
-			page += (1 << order) - 1;
-
-			continue;
-		} else if (page_count(page) == 0 || PageLRU(page))
-			continue;
-
-		return false;
-	}
-
-	set_pageblock_migratetype(page, MIGRATE_MOVABLE);
-	move_freepages_block(page_zone(page), page, MIGRATE_MOVABLE);
-	return true;
-}
 
-enum smt_result {
-	GOOD_AS_MIGRATION_TARGET,
-	FAIL_UNMOVABLE_TARGET,
-	FAIL_BAD_TARGET,
-};
-
-/*
- * Returns GOOD_AS_MIGRATION_TARGET if the page is within a block
- * suitable for migration to, FAIL_UNMOVABLE_TARGET if the page
- * is within a MIGRATE_UNMOVABLE block, FAIL_BAD_TARGET otherwise.
- */
-static enum smt_result suitable_migration_target(struct page *page,
-				      struct compact_control *cc)
+/* Returns true if the page is within a block suitable for migration to */
+static bool suitable_migration_target(struct page *page)
 {
 
 	int migratetype = get_pageblock_migratetype(page);
 
 	/* Don't interfere with memory hot-remove or the min_free_kbytes blocks */
 	if (migratetype == MIGRATE_ISOLATE || migratetype == MIGRATE_RESERVE)
-		return FAIL_BAD_TARGET;
+		return false;
 
 	/* If the page is a large free page, then allow migration */
 	if (PageBuddy(page) && page_order(page) >= pageblock_order)
-		return GOOD_AS_MIGRATION_TARGET;
+		return true;
 
 	/* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */
-	if (cc->mode != COMPACT_ASYNC_UNMOVABLE &&
-	    migrate_async_suitable(migratetype))
-		return GOOD_AS_MIGRATION_TARGET;
-
-	if (cc->mode == COMPACT_ASYNC_MOVABLE &&
-	    migratetype == MIGRATE_UNMOVABLE)
-		return FAIL_UNMOVABLE_TARGET;
-
-	if (cc->mode != COMPACT_ASYNC_MOVABLE &&
-	    migratetype == MIGRATE_UNMOVABLE &&
-	    rescue_unmovable_pageblock(page))
-		return GOOD_AS_MIGRATION_TARGET;
+	if (migrate_async_suitable(migratetype))
+		return true;
 
 	/* Otherwise skip the block */
-	return FAIL_BAD_TARGET;
+	return false;
 }
 
 /*
@@ -477,13 +413,6 @@ static void isolate_freepages(struct zone *zone,
 
 	zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;
 
-	/*
-	 * isolate_freepages() may be called more than once during
-	 * compact_zone_order() run and we want only the most recent
-	 * count.
-	 */
-	cc->nr_pageblocks_skipped = 0;
-
 	/*
 	 * Isolate free pages until enough are available to migrate the
 	 * pages on cc->migratepages. We stop searching if the migrate
@@ -492,7 +421,6 @@ static void isolate_freepages(struct zone *zone,
 	for (; pfn > low_pfn && cc->nr_migratepages > nr_freepages;
 					pfn -= pageblock_nr_pages) {
 		unsigned long isolated;
-		enum smt_result ret;
 
 		if (!pfn_valid(pfn))
 			continue;
@@ -509,12 +437,9 @@ static void isolate_freepages(struct zone *zone,
 			continue;
 
 		/* Check the block is suitable for migration */
-		ret = suitable_migration_target(page, cc);
-		if (ret != GOOD_AS_MIGRATION_TARGET) {
-			if (ret == FAIL_UNMOVABLE_TARGET)
-				cc->nr_pageblocks_skipped++;
+		if (!suitable_migration_target(page))
 			continue;
-		}
+
 		/*
 		 * Found a block suitable for isolating free pages from. Now
 		 * we disabled interrupts, double check things are ok and
@@ -523,14 +448,12 @@ static void isolate_freepages(struct zone *zone,
 		 */
 		isolated = 0;
 		spin_lock_irqsave(&zone->lock, flags);
-		ret = suitable_migration_target(page, cc);
-		if (ret == GOOD_AS_MIGRATION_TARGET) {
+		if (suitable_migration_target(page)) {
 			end_pfn = min(pfn + pageblock_nr_pages, zone_end_pfn);
 			isolated = isolate_freepages_block(pfn, end_pfn,
 							   freelist, false);
 			nr_freepages += isolated;
-		} else if (ret == FAIL_UNMOVABLE_TARGET)
-			cc->nr_pageblocks_skipped++;
+		}
 		spin_unlock_irqrestore(&zone->lock, flags);
 
 		/*
@@ -762,9 +685,8 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 
 		nr_migrate = cc->nr_migratepages;
 		err = migrate_pages(&cc->migratepages, compaction_alloc,
-			(unsigned long)&cc->freepages, false,
-			(cc->mode == COMPACT_SYNC) ? MIGRATE_SYNC_LIGHT
-						      : MIGRATE_ASYNC);
+				(unsigned long)cc, false,
+				cc->sync ? MIGRATE_SYNC_LIGHT : MIGRATE_ASYNC);
 		update_nr_listpages(cc);
 		nr_remaining = cc->nr_migratepages;
 
@@ -793,8 +715,7 @@ out:
 
 static unsigned long compact_zone_order(struct zone *zone,
 				 int order, gfp_t gfp_mask,
-				 enum compact_mode mode,
-				 unsigned long *nr_pageblocks_skipped)
+				 bool sync)
 {
 	struct compact_control cc = {
 		.nr_freepages = 0,
@@ -802,17 +723,12 @@ static unsigned long compact_zone_order(struct zone *zone,
 		.order = order,
 		.migratetype = allocflags_to_migratetype(gfp_mask),
 		.zone = zone,
-		.mode = mode,
+		.sync = sync,
 	};
-	unsigned long rc;
-
 	INIT_LIST_HEAD(&cc.freepages);
 	INIT_LIST_HEAD(&cc.migratepages);
 
-	rc = compact_zone(zone, &cc);
-	*nr_pageblocks_skipped = cc.nr_pageblocks_skipped;
-
-	return rc;
+	return compact_zone(zone, &cc);
 }
 
 int sysctl_extfrag_threshold = 500;
@@ -837,8 +753,6 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
 	struct zoneref *z;
 	struct zone *zone;
 	int rc = COMPACT_SKIPPED;
-	unsigned long nr_pageblocks_skipped;
-	enum compact_mode mode;
 
 	/*
 	 * Check whether it is worth even starting compaction. The order check is
@@ -855,22 +769,12 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
 								nodemask) {
 		int status;
 
-		mode = sync ? COMPACT_SYNC : COMPACT_ASYNC_MOVABLE;
-retry:
-		status = compact_zone_order(zone, order, gfp_mask, mode,
-						&nr_pageblocks_skipped);
+		status = compact_zone_order(zone, order, gfp_mask, sync);
 		rc = max(status, rc);
 
 		/* If a normal allocation would succeed, stop compacting */
 		if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0, 0))
 			break;
-
-		if (rc == COMPACT_COMPLETE && mode == COMPACT_ASYNC_MOVABLE) {
-			if (nr_pageblocks_skipped) {
-				mode = COMPACT_ASYNC_UNMOVABLE;
-				goto retry;
-			}
-		}
 	}
 
 	return rc;
@@ -904,7 +808,7 @@ static int __compact_pgdat(pg_data_t *pgdat, struct compact_control *cc)
 			if (ok && cc->order > zone->compact_order_failed)
 				zone->compact_order_failed = cc->order + 1;
 			/* Currently async compaction is never deferred. */
-			else if (!ok && cc->mode == COMPACT_SYNC)
+			else if (!ok && cc->sync)
 				defer_compaction(zone, cc->order);
 		}
 
@@ -919,7 +823,7 @@ int compact_pgdat(pg_data_t *pgdat, int order)
 {
 	struct compact_control cc = {
 		.order = order,
-		.mode = COMPACT_ASYNC_MOVABLE,
+		.sync = false,
 	};
 
 	return __compact_pgdat(pgdat, &cc);
@@ -929,7 +833,7 @@ static int compact_node(int nid)
 {
 	struct compact_control cc = {
 		.order = -1,
-		.mode = COMPACT_SYNC,
+		.sync = true,
 	};
 
 	return __compact_pgdat(NODE_DATA(nid), &cc);
diff --git a/mm/internal.h b/mm/internal.h
index 5cbb78190041..2ba87fbfb75b 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -94,9 +94,6 @@ extern void putback_lru_page(struct page *page);
 /*
  * in mm/page_alloc.c
  */
-extern void set_pageblock_migratetype(struct page *page, int migratetype);
-extern int move_freepages_block(struct zone *zone, struct page *page,
-				int migratetype);
 extern void __free_pages_bootmem(struct page *page, unsigned int order);
 extern void prep_compound_page(struct page *page, unsigned long order);
 #ifdef CONFIG_MEMORY_FAILURE
@@ -104,7 +101,6 @@ extern bool is_free_buddy_page(struct page *page);
 #endif
 
 #if defined CONFIG_COMPACTION || defined CONFIG_CMA
-#include <linux/compaction.h>
 
 /*
  * in mm/compaction.c
@@ -123,14 +119,11 @@ struct compact_control {
 	unsigned long nr_migratepages;	/* Number of pages to migrate */
 	unsigned long free_pfn;		/* isolate_freepages search base */
 	unsigned long migrate_pfn;	/* isolate_migratepages search base */
-	enum compact_mode mode;		/* Compaction mode */
+	bool sync;			/* Synchronous migration */
 
 	int order;			/* order a direct compactor needs */
 	int migratetype;		/* MOVABLE, RECLAIMABLE etc */
 	struct zone *zone;
-
-	/* Number of UNMOVABLE destination pageblocks skipped during scan */
-	unsigned long nr_pageblocks_skipped;
 };
 
 unsigned long
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6092f331b32e..44030096da63 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -219,7 +219,7 @@ EXPORT_SYMBOL(nr_online_nodes);
 
 int page_group_by_mobility_disabled __read_mostly;
 
-void set_pageblock_migratetype(struct page *page, int migratetype)
+static void set_pageblock_migratetype(struct page *page, int migratetype)
 {
 
 	if (unlikely(page_group_by_mobility_disabled))
@@ -954,8 +954,8 @@ static int move_freepages(struct zone *zone,
 	return pages_moved;
 }
 
-int move_freepages_block(struct zone *zone, struct page *page,
-			 int migratetype)
+static int move_freepages_block(struct zone *zone, struct page *page,
+				int migratetype)
 {
 	unsigned long start_pfn, end_pfn;
 	struct page *start_page, *end_page;
@@ -5651,7 +5651,7 @@ static int __alloc_contig_migrate_range(unsigned long start, unsigned long end)
 		.nr_migratepages = 0,
 		.order = -1,
 		.zone = page_zone(pfn_to_page(start)),
-		.mode = COMPACT_SYNC,
+		.sync = true,
 	};
 	INIT_LIST_HEAD(&cc.migratepages);
 
-- 
cgit 


From ae58d1e406986f31d1e88b32f5ac601506c196d8 Mon Sep 17 00:00:00 2001
From: Stephen Warren <swarren@nvidia.com>
Date: Fri, 18 May 2012 09:29:34 -0600
Subject: i2c: Add generic I2C multiplexer using pinctrl API

This is useful for SoCs whose I2C module's signals can be routed to
different sets of pins at run-time, using the pinctrl API.

                                 +-----+  +-----+
                                 | dev |  | dev |
    +------------------------+   +-----+  +-----+
    | SoC                    |      |        |
    |                   /----|------+--------+
    |   +---+   +------+     | child bus A, on first set of pins
    |   |I2C|---|Pinmux|     |
    |   +---+   +------+     | child bus B, on second set of pins
    |                   \----|------+--------+--------+
    |                        |      |        |        |
    +------------------------+  +-----+  +-----+  +-----+
                                | dev |  | dev |  | dev |
                                +-----+  +-----+  +-----+

Signed-off-by: Stephen Warren <swarren@nvidia.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Rob Herring <rob.herring@calxeda.com>
Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
---
 .../devicetree/bindings/i2c/i2c-mux-pinctrl.txt    |  93 +++++++
 drivers/i2c/muxes/Kconfig                          |  12 +
 drivers/i2c/muxes/Makefile                         |   1 +
 drivers/i2c/muxes/i2c-mux-pinctrl.c                | 279 +++++++++++++++++++++
 include/linux/i2c-mux-pinctrl.h                    |  41 +++
 5 files changed, 426 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt
 create mode 100644 drivers/i2c/muxes/i2c-mux-pinctrl.c
 create mode 100644 include/linux/i2c-mux-pinctrl.h

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt b/Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt
new file mode 100644
index 000000000000..ae8af1694e95
--- /dev/null
+++ b/Documentation/devicetree/bindings/i2c/i2c-mux-pinctrl.txt
@@ -0,0 +1,93 @@
+Pinctrl-based I2C Bus Mux
+
+This binding describes an I2C bus multiplexer that uses pin multiplexing to
+route the I2C signals, and represents the pin multiplexing configuration
+using the pinctrl device tree bindings.
+
+                                 +-----+  +-----+
+                                 | dev |  | dev |
+    +------------------------+   +-----+  +-----+
+    | SoC                    |      |        |
+    |                   /----|------+--------+
+    |   +---+   +------+     | child bus A, on first set of pins
+    |   |I2C|---|Pinmux|     |
+    |   +---+   +------+     | child bus B, on second set of pins
+    |                   \----|------+--------+--------+
+    |                        |      |        |        |
+    +------------------------+  +-----+  +-----+  +-----+
+                                | dev |  | dev |  | dev |
+                                +-----+  +-----+  +-----+
+
+Required properties:
+- compatible: i2c-mux-pinctrl
+- i2c-parent: The phandle of the I2C bus that this multiplexer's master-side
+  port is connected to.
+
+Also required are:
+
+* Standard pinctrl properties that specify the pin mux state for each child
+  bus. See ../pinctrl/pinctrl-bindings.txt.
+
+* Standard I2C mux properties. See mux.txt in this directory.
+
+* I2C child bus nodes. See mux.txt in this directory.
+
+For each named state defined in the pinctrl-names property, an I2C child bus
+will be created. I2C child bus numbers are assigned based on the index into
+the pinctrl-names property.
+
+The only exception is that no bus will be created for a state named "idle". If
+such a state is defined, it must be the last entry in pinctrl-names. For
+example:
+
+	pinctrl-names = "ddc", "pta", "idle"  ->  ddc = bus 0, pta = bus 1
+	pinctrl-names = "ddc", "idle", "pta"  ->  Invalid ("idle" not last)
+	pinctrl-names = "idle", "ddc", "pta"  ->  Invalid ("idle" not last)
+
+Whenever an access is made to a device on a child bus, the relevant pinctrl
+state will be programmed into hardware.
+
+If an idle state is defined, whenever an access is not being made to a device
+on a child bus, the idle pinctrl state will be programmed into hardware.
+
+If an idle state is not defined, the most recently used pinctrl state will be
+left programmed into hardware whenever no access is being made of a device on
+a child bus.
+
+Example:
+
+	i2cmux {
+		compatible = "i2c-mux-pinctrl";
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		i2c-parent = <&i2c1>;
+
+		pinctrl-names = "ddc", "pta", "idle";
+		pinctrl-0 = <&state_i2cmux_ddc>;
+		pinctrl-1 = <&state_i2cmux_pta>;
+		pinctrl-2 = <&state_i2cmux_idle>;
+
+		i2c@0 {
+			reg = <0>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			eeprom {
+				compatible = "eeprom";
+				reg = <0x50>;
+			};
+		};
+
+		i2c@1 {
+			reg = <1>;
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			eeprom {
+				compatible = "eeprom";
+				reg = <0x50>;
+			};
+		};
+	};
+
diff --git a/drivers/i2c/muxes/Kconfig b/drivers/i2c/muxes/Kconfig
index beb2491db274..a0edd9854218 100644
--- a/drivers/i2c/muxes/Kconfig
+++ b/drivers/i2c/muxes/Kconfig
@@ -37,4 +37,16 @@ config I2C_MUX_PCA954x
 	  This driver can also be built as a module.  If so, the module
 	  will be called i2c-mux-pca954x.
 
+config I2C_MUX_PINCTRL
+	tristate "pinctrl-based I2C multiplexer"
+	depends on PINCTRL
+	help
+	  If you say yes to this option, support will be included for an I2C
+	  multiplexer that uses the pinctrl subsystem, i.e. pin multiplexing.
+	  This is useful for SoCs whose I2C module's signals can be routed to
+	  different sets of pins at run-time.
+
+	  This driver can also be built as a module. If so, the module will be
+	  called pinctrl-i2cmux.
+
 endmenu
diff --git a/drivers/i2c/muxes/Makefile b/drivers/i2c/muxes/Makefile
index 5826249b29ca..76da8692afff 100644
--- a/drivers/i2c/muxes/Makefile
+++ b/drivers/i2c/muxes/Makefile
@@ -4,5 +4,6 @@
 obj-$(CONFIG_I2C_MUX_GPIO)	+= i2c-mux-gpio.o
 obj-$(CONFIG_I2C_MUX_PCA9541)	+= i2c-mux-pca9541.o
 obj-$(CONFIG_I2C_MUX_PCA954x)	+= i2c-mux-pca954x.o
+obj-$(CONFIG_I2C_MUX_PINCTRL)	+= i2c-mux-pinctrl.o
 
 ccflags-$(CONFIG_I2C_DEBUG_BUS) := -DDEBUG
diff --git a/drivers/i2c/muxes/i2c-mux-pinctrl.c b/drivers/i2c/muxes/i2c-mux-pinctrl.c
new file mode 100644
index 000000000000..46a669763476
--- /dev/null
+++ b/drivers/i2c/muxes/i2c-mux-pinctrl.c
@@ -0,0 +1,279 @@
+/*
+ * I2C multiplexer using pinctrl API
+ *
+ * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/i2c.h>
+#include <linux/i2c-mux.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/of_i2c.h>
+#include <linux/pinctrl/consumer.h>
+#include <linux/i2c-mux-pinctrl.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+struct i2c_mux_pinctrl {
+	struct device *dev;
+	struct i2c_mux_pinctrl_platform_data *pdata;
+	struct pinctrl *pinctrl;
+	struct pinctrl_state **states;
+	struct pinctrl_state *state_idle;
+	struct i2c_adapter *parent;
+	struct i2c_adapter **busses;
+};
+
+static int i2c_mux_pinctrl_select(struct i2c_adapter *adap, void *data,
+				  u32 chan)
+{
+	struct i2c_mux_pinctrl *mux = data;
+
+	return pinctrl_select_state(mux->pinctrl, mux->states[chan]);
+}
+
+static int i2c_mux_pinctrl_deselect(struct i2c_adapter *adap, void *data,
+				    u32 chan)
+{
+	struct i2c_mux_pinctrl *mux = data;
+
+	return pinctrl_select_state(mux->pinctrl, mux->state_idle);
+}
+
+#ifdef CONFIG_OF
+static int i2c_mux_pinctrl_parse_dt(struct i2c_mux_pinctrl *mux,
+				struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	int num_names, i, ret;
+	struct device_node *adapter_np;
+	struct i2c_adapter *adapter;
+
+	if (!np)
+		return 0;
+
+	mux->pdata = devm_kzalloc(&pdev->dev, sizeof(*mux->pdata), GFP_KERNEL);
+	if (!mux->pdata) {
+		dev_err(mux->dev,
+			"Cannot allocate i2c_mux_pinctrl_platform_data\n");
+		return -ENOMEM;
+	}
+
+	num_names = of_property_count_strings(np, "pinctrl-names");
+	if (num_names < 0) {
+		dev_err(mux->dev, "Cannot parse pinctrl-names: %d\n",
+			num_names);
+		return num_names;
+	}
+
+	mux->pdata->pinctrl_states = devm_kzalloc(&pdev->dev,
+		sizeof(*mux->pdata->pinctrl_states) * num_names,
+		GFP_KERNEL);
+	if (!mux->pdata->pinctrl_states) {
+		dev_err(mux->dev, "Cannot allocate pinctrl_states\n");
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < num_names; i++) {
+		ret = of_property_read_string_index(np, "pinctrl-names", i,
+			&mux->pdata->pinctrl_states[mux->pdata->bus_count]);
+		if (ret < 0) {
+			dev_err(mux->dev, "Cannot parse pinctrl-names: %d\n",
+				ret);
+			return ret;
+		}
+		if (!strcmp(mux->pdata->pinctrl_states[mux->pdata->bus_count],
+			    "idle")) {
+			if (i != num_names - 1) {
+				dev_err(mux->dev, "idle state must be last\n");
+				return -EINVAL;
+			}
+			mux->pdata->pinctrl_state_idle = "idle";
+		} else {
+			mux->pdata->bus_count++;
+		}
+	}
+
+	adapter_np = of_parse_phandle(np, "i2c-parent", 0);
+	if (!adapter_np) {
+		dev_err(mux->dev, "Cannot parse i2c-parent\n");
+		return -ENODEV;
+	}
+	adapter = of_find_i2c_adapter_by_node(adapter_np);
+	if (!adapter) {
+		dev_err(mux->dev, "Cannot find parent bus\n");
+		return -ENODEV;
+	}
+	mux->pdata->parent_bus_num = i2c_adapter_id(adapter);
+	put_device(&adapter->dev);
+
+	return 0;
+}
+#else
+static inline int i2c_mux_pinctrl_parse_dt(struct i2c_mux_pinctrl *mux,
+					   struct platform_device *pdev)
+{
+	return 0;
+}
+#endif
+
+static int __devinit i2c_mux_pinctrl_probe(struct platform_device *pdev)
+{
+	struct i2c_mux_pinctrl *mux;
+	int (*deselect)(struct i2c_adapter *, void *, u32);
+	int i, ret;
+
+	mux = devm_kzalloc(&pdev->dev, sizeof(*mux), GFP_KERNEL);
+	if (!mux) {
+		dev_err(&pdev->dev, "Cannot allocate i2c_mux_pinctrl\n");
+		ret = -ENOMEM;
+		goto err;
+	}
+	platform_set_drvdata(pdev, mux);
+
+	mux->dev = &pdev->dev;
+
+	mux->pdata = pdev->dev.platform_data;
+	if (!mux->pdata) {
+		ret = i2c_mux_pinctrl_parse_dt(mux, pdev);
+		if (ret < 0)
+			goto err;
+	}
+	if (!mux->pdata) {
+		dev_err(&pdev->dev, "Missing platform data\n");
+		ret = -ENODEV;
+		goto err;
+	}
+
+	mux->states = devm_kzalloc(&pdev->dev,
+				   sizeof(*mux->states) * mux->pdata->bus_count,
+				   GFP_KERNEL);
+	if (!mux->states) {
+		dev_err(&pdev->dev, "Cannot allocate states\n");
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	mux->busses = devm_kzalloc(&pdev->dev,
+				   sizeof(mux->busses) * mux->pdata->bus_count,
+				   GFP_KERNEL);
+	if (!mux->states) {
+		dev_err(&pdev->dev, "Cannot allocate busses\n");
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	mux->pinctrl = devm_pinctrl_get(&pdev->dev);
+	if (IS_ERR(mux->pinctrl)) {
+		ret = PTR_ERR(mux->pinctrl);
+		dev_err(&pdev->dev, "Cannot get pinctrl: %d\n", ret);
+		goto err;
+	}
+	for (i = 0; i < mux->pdata->bus_count; i++) {
+		mux->states[i] = pinctrl_lookup_state(mux->pinctrl,
+						mux->pdata->pinctrl_states[i]);
+			if (IS_ERR(mux->states[i])) {
+				ret = PTR_ERR(mux->states[i]);
+				dev_err(&pdev->dev,
+					"Cannot look up pinctrl state %s: %d\n",
+					mux->pdata->pinctrl_states[i], ret);
+				goto err;
+			}
+	}
+	if (mux->pdata->pinctrl_state_idle) {
+		mux->state_idle = pinctrl_lookup_state(mux->pinctrl,
+						mux->pdata->pinctrl_state_idle);
+		if (IS_ERR(mux->state_idle)) {
+			ret = PTR_ERR(mux->state_idle);
+			dev_err(&pdev->dev,
+				"Cannot look up pinctrl state %s: %d\n",
+				mux->pdata->pinctrl_state_idle, ret);
+			goto err;
+		}
+
+		deselect = i2c_mux_pinctrl_deselect;
+	} else {
+		deselect = NULL;
+	}
+
+	mux->parent = i2c_get_adapter(mux->pdata->parent_bus_num);
+	if (!mux->parent) {
+		dev_err(&pdev->dev, "Parent adapter (%d) not found\n",
+			mux->pdata->parent_bus_num);
+		ret = -ENODEV;
+		goto err;
+	}
+
+	for (i = 0; i < mux->pdata->bus_count; i++) {
+		u32 bus = mux->pdata->base_bus_num ?
+				(mux->pdata->base_bus_num + i) : 0;
+
+		mux->busses[i] = i2c_add_mux_adapter(mux->parent, &pdev->dev,
+						     mux, bus, i,
+						     i2c_mux_pinctrl_select,
+						     deselect);
+		if (!mux->busses[i]) {
+			ret = -ENODEV;
+			dev_err(&pdev->dev, "Failed to add adapter %d\n", i);
+			goto err_del_adapter;
+		}
+	}
+
+	return 0;
+
+err_del_adapter:
+	for (; i > 0; i--)
+		i2c_del_mux_adapter(mux->busses[i - 1]);
+	i2c_put_adapter(mux->parent);
+err:
+	return ret;
+}
+
+static int __devexit i2c_mux_pinctrl_remove(struct platform_device *pdev)
+{
+	struct i2c_mux_pinctrl *mux = platform_get_drvdata(pdev);
+	int i;
+
+	for (i = 0; i < mux->pdata->bus_count; i++)
+		i2c_del_mux_adapter(mux->busses[i]);
+
+	i2c_put_adapter(mux->parent);
+
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id i2c_mux_pinctrl_of_match[] __devinitconst = {
+	{ .compatible = "i2c-mux-pinctrl", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, i2c_mux_pinctrl_of_match);
+#endif
+
+static struct platform_driver i2c_mux_pinctrl_driver = {
+	.driver	= {
+		.name	= "i2c-mux-pinctrl",
+		.owner	= THIS_MODULE,
+		.of_match_table = of_match_ptr(i2c_mux_pinctrl_of_match),
+	},
+	.probe	= i2c_mux_pinctrl_probe,
+	.remove	= __devexit_p(i2c_mux_pinctrl_remove),
+};
+module_platform_driver(i2c_mux_pinctrl_driver);
+
+MODULE_DESCRIPTION("pinctrl-based I2C multiplexer driver");
+MODULE_AUTHOR("Stephen Warren <swarren@nvidia.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:i2c-mux-pinctrl");
diff --git a/include/linux/i2c-mux-pinctrl.h b/include/linux/i2c-mux-pinctrl.h
new file mode 100644
index 000000000000..a65c86429e84
--- /dev/null
+++ b/include/linux/i2c-mux-pinctrl.h
@@ -0,0 +1,41 @@
+/*
+ * i2c-mux-pinctrl platform data
+ *
+ * Copyright (c) 2012, NVIDIA CORPORATION.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _LINUX_I2C_MUX_PINCTRL_H
+#define _LINUX_I2C_MUX_PINCTRL_H
+
+/**
+ * struct i2c_mux_pinctrl_platform_data - Platform data for i2c-mux-pinctrl
+ * @parent_bus_num: Parent I2C bus number
+ * @base_bus_num: Base I2C bus number for the child busses. 0 for dynamic.
+ * @bus_count: Number of child busses. Also the number of elements in
+ *	@pinctrl_states
+ * @pinctrl_states: The names of the pinctrl state to select for each child bus
+ * @pinctrl_state_idle: The pinctrl state to select when no child bus is being
+ *	accessed. If NULL, the most recently used pinctrl state will be left
+ *	selected.
+ */
+struct i2c_mux_pinctrl_platform_data {
+	int parent_bus_num;
+	int base_bus_num;
+	int bus_count;
+	const char **pinctrl_states;
+	const char *pinctrl_state_idle;
+};
+
+#endif
-- 
cgit 


From 1549210fcc17e9ae20c09ac8cd4c48a8dfd431bd Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 5 Jun 2012 09:16:47 -0400
Subject: NFSv4: Fix an Oops in the open recovery code

The open recovery code does not need to request a new value for the
mdsthreshold, and so does not allocate a struct nfs4_threshold.
The problem is that encode_getfattr_open() will still request an
mdsthreshold, and so we end up Oopsing in decode_attr_mdsthreshold.

This patch fixes encode_getfattr_open so that it doesn't request an
mdsthreshold when the caller isn't asking for one. It also fixes
decode_attr_mdsthreshold so that it errors if the server returns
an mdsthreshold that we didn't ask for (instead of Oopsing).

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Andy Adamson <andros@netapp.com>
---
 fs/nfs/nfs4_fs.h        |  2 +-
 fs/nfs/nfs4proc.c       | 22 +++++++++++++++++++++-
 fs/nfs/nfs4xdr.c        | 12 ++++++++----
 include/linux/nfs_xdr.h |  1 +
 4 files changed, 31 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index c6827f93ab57..cc5900ac61b5 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -295,7 +295,7 @@ is_ds_client(struct nfs_client *clp)
 
 extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[];
 
-extern const u32 nfs4_fattr_bitmap[2];
+extern const u32 nfs4_fattr_bitmap[3];
 extern const u32 nfs4_statfs_bitmap[2];
 extern const u32 nfs4_pathconf_bitmap[2];
 extern const u32 nfs4_fsinfo_bitmap[3];
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d48dbefa0e71..ad1515521a6a 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -116,7 +116,7 @@ static int nfs4_map_errors(int err)
 /*
  * This is our standard bitmap for GETATTR requests.
  */
-const u32 nfs4_fattr_bitmap[2] = {
+const u32 nfs4_fattr_bitmap[3] = {
 	FATTR4_WORD0_TYPE
 	| FATTR4_WORD0_CHANGE
 	| FATTR4_WORD0_SIZE
@@ -133,6 +133,24 @@ const u32 nfs4_fattr_bitmap[2] = {
 	| FATTR4_WORD1_TIME_MODIFY
 };
 
+static const u32 nfs4_pnfs_open_bitmap[3] = {
+	FATTR4_WORD0_TYPE
+	| FATTR4_WORD0_CHANGE
+	| FATTR4_WORD0_SIZE
+	| FATTR4_WORD0_FSID
+	| FATTR4_WORD0_FILEID,
+	FATTR4_WORD1_MODE
+	| FATTR4_WORD1_NUMLINKS
+	| FATTR4_WORD1_OWNER
+	| FATTR4_WORD1_OWNER_GROUP
+	| FATTR4_WORD1_RAWDEV
+	| FATTR4_WORD1_SPACE_USED
+	| FATTR4_WORD1_TIME_ACCESS
+	| FATTR4_WORD1_TIME_METADATA
+	| FATTR4_WORD1_TIME_MODIFY,
+	FATTR4_WORD2_MDSTHRESHOLD
+};
+
 const u32 nfs4_statfs_bitmap[2] = {
 	FATTR4_WORD0_FILES_AVAIL
 	| FATTR4_WORD0_FILES_FREE
@@ -844,6 +862,7 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
 	p->o_arg.name = &dentry->d_name;
 	p->o_arg.server = server;
 	p->o_arg.bitmask = server->attr_bitmask;
+	p->o_arg.open_bitmap = &nfs4_fattr_bitmap[0];
 	p->o_arg.claim = NFS4_OPEN_CLAIM_NULL;
 	if (attrs != NULL && attrs->ia_valid != 0) {
 		__be32 verf[2];
@@ -1820,6 +1839,7 @@ static int _nfs4_do_open(struct inode *dir,
 		opendata->f_attr.mdsthreshold = pnfs_mdsthreshold_alloc();
 		if (!opendata->f_attr.mdsthreshold)
 			goto err_opendata_put;
+		opendata->o_arg.open_bitmap = &nfs4_pnfs_open_bitmap[0];
 	}
 	if (dentry->d_inode != NULL)
 		opendata->state = nfs4_get_open_state(dentry->d_inode, sp);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index ee4a74db95d0..9ca1428da9d9 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1198,12 +1198,13 @@ static void encode_getfattr(struct xdr_stream *xdr, const u32* bitmask, struct c
 }
 
 static void encode_getfattr_open(struct xdr_stream *xdr, const u32 *bitmask,
+				 const u32 *open_bitmap,
 				 struct compound_hdr *hdr)
 {
 	encode_getattr_three(xdr,
-			     bitmask[0] & nfs4_fattr_bitmap[0],
-			     bitmask[1] & nfs4_fattr_bitmap[1],
-			     bitmask[2] & FATTR4_WORD2_MDSTHRESHOLD,
+			     bitmask[0] & open_bitmap[0],
+			     bitmask[1] & open_bitmap[1],
+			     bitmask[2] & open_bitmap[2],
 			     hdr);
 }
 
@@ -2221,7 +2222,7 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_putfh(xdr, args->fh, &hdr);
 	encode_open(xdr, args, &hdr);
 	encode_getfh(xdr, &hdr);
-	encode_getfattr_open(xdr, args->bitmask, &hdr);
+	encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr);
 	encode_nops(&hdr);
 }
 
@@ -4360,6 +4361,9 @@ static int decode_attr_mdsthreshold(struct xdr_stream *xdr,
 	if (unlikely(bitmap[2] & (FATTR4_WORD2_MDSTHRESHOLD - 1U)))
 		return -EIO;
 	if (likely(bitmap[2] & FATTR4_WORD2_MDSTHRESHOLD)) {
+		/* Did the server return an unrequested attribute? */
+		if (unlikely(res == NULL))
+			return -EREMOTEIO;
 		p = xdr_inline_decode(xdr, 4);
 		if (unlikely(!p))
 			goto out_overflow;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index d1a7bf51c326..7519baef025b 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -348,6 +348,7 @@ struct nfs_openargs {
 	const struct qstr *	name;
 	const struct nfs_server *server;	 /* Needed for ID mapping */
 	const u32 *		bitmask;
+	const u32 *		open_bitmap;
 	__u32			claim;
 	struct nfs4_sequence_args	seq_args;
 };
-- 
cgit 


From fffaee365fded09f9ebf2db19066065fa54323c3 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Tue, 5 Jun 2012 21:36:33 +0400
Subject: radix-tree: fix contiguous iterator
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch fixes bug in macro radix_tree_for_each_contig().

If radix_tree_next_slot() sees NULL in next slot it returns NULL, but following
radix_tree_next_chunk() switches iterating into next chunk. As result iterating
becomes non-contiguous and breaks vfs "splice" and all its users.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Reported-and-bisected-by: Hans de Bruin <jmdebruin@xmsnet.nl>
Reported-and-bisected-by: Ondrej Zary <linux@rainbow-software.org>
Reported-bisected-and-tested-by: Toralf Förster <toralf.foerster@gmx.de>
Link: https://lkml.org/lkml/2012/6/5/64
Cc: stable <stable@vger.kernel.org> # 3.4.x
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/radix-tree.h | 5 ++++-
 lib/radix-tree.c           | 3 +++
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 0d04cd69ab9b..ffc444c38b0a 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -368,8 +368,11 @@ radix_tree_next_slot(void **slot, struct radix_tree_iter *iter, unsigned flags)
 			iter->index++;
 			if (likely(*slot))
 				return slot;
-			if (flags & RADIX_TREE_ITER_CONTIG)
+			if (flags & RADIX_TREE_ITER_CONTIG) {
+				/* forbid switching to the next chunk */
+				iter->next_index = 0;
 				break;
+			}
 		}
 	}
 	return NULL;
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index d7c878cc006c..e7964296fd50 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -686,6 +686,9 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
 	 * during iterating; it can be zero only at the beginning.
 	 * And we cannot overflow iter->next_index in a single step,
 	 * because RADIX_TREE_MAP_SHIFT < BITS_PER_LONG.
+	 *
+	 * This condition also used by radix_tree_next_slot() to stop
+	 * contiguous iterating, and forbid swithing to the next chunk.
 	 */
 	index = iter->next_index;
 	if (!index && iter->index)
-- 
cgit 


From 9bce008bae8b57bc7b007bcc2071d1247a527120 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 5 Jun 2012 18:32:03 -0400
Subject: NFS: Fix a commit bug

The new commit code fails to copy the verifier into the wb_verf field
of _all_ the nfs_page structures; it only copies it into the first entry.
The consequence is that most requests end up failing to match in
nfs_commit_release.

Fix is to copy the verifier into the req->wb_verf field in
nfs_write_completion.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Fred Isaman <iisaman@netapp.com>
---
 fs/nfs/direct.c         | 4 ++--
 fs/nfs/write.c          | 7 ++++---
 include/linux/nfs_xdr.h | 2 ++
 3 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 23d170bc44f4..b5385a7efd56 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -710,12 +710,12 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
 			if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
 				bit = NFS_IOHDR_NEED_RESCHED;
 			else if (dreq->flags == 0) {
-				memcpy(&dreq->verf, &req->wb_verf,
+				memcpy(&dreq->verf, hdr->verf,
 				       sizeof(dreq->verf));
 				bit = NFS_IOHDR_NEED_COMMIT;
 				dreq->flags = NFS_ODIRECT_DO_COMMIT;
 			} else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
-				if (memcmp(&dreq->verf, &req->wb_verf, sizeof(dreq->verf))) {
+				if (memcmp(&dreq->verf, hdr->verf, sizeof(dreq->verf))) {
 					dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
 					bit = NFS_IOHDR_NEED_RESCHED;
 				} else
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e6fe3d69d14c..4d6861c0dc14 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -80,6 +80,7 @@ struct nfs_write_header *nfs_writehdr_alloc(void)
 		INIT_LIST_HEAD(&hdr->rpc_list);
 		spin_lock_init(&hdr->lock);
 		atomic_set(&hdr->refcnt, 0);
+		hdr->verf = &p->verf;
 	}
 	return p;
 }
@@ -619,6 +620,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
 			goto next;
 		}
 		if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
+			memcpy(&req->wb_verf, hdr->verf, sizeof(req->wb_verf));
 			nfs_mark_request_commit(req, hdr->lseg, &cinfo);
 			goto next;
 		}
@@ -1255,15 +1257,14 @@ static void nfs_writeback_release_common(void *calldata)
 	struct nfs_write_data	*data = calldata;
 	struct nfs_pgio_header *hdr = data->header;
 	int status = data->task.tk_status;
-	struct nfs_page *req = hdr->req;
 
 	if ((status >= 0) && nfs_write_need_commit(data)) {
 		spin_lock(&hdr->lock);
 		if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))
 			; /* Do nothing */
 		else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
-			memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
-		else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf)))
+			memcpy(hdr->verf, &data->verf, sizeof(*hdr->verf));
+		else if (memcmp(hdr->verf, &data->verf, sizeof(*hdr->verf)))
 			set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
 		spin_unlock(&hdr->lock);
 	}
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 7519baef025b..8aadd90b808a 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1237,6 +1237,7 @@ struct nfs_pgio_header {
 	struct list_head	rpc_list;
 	atomic_t		refcnt;
 	struct nfs_page		*req;
+	struct nfs_writeverf	*verf;
 	struct pnfs_layout_segment *lseg;
 	loff_t			io_start;
 	const struct rpc_call_ops *mds_ops;
@@ -1274,6 +1275,7 @@ struct nfs_write_data {
 struct nfs_write_header {
 	struct nfs_pgio_header	header;
 	struct nfs_write_data	rpc_data;
+	struct nfs_writeverf	verf;
 };
 
 struct nfs_mds_commit_info {
-- 
cgit 


From 2a0fe914a38745f5b03534c4e4f4056cbd6978b8 Mon Sep 17 00:00:00 2001
From: Yong Ding <yongd@marvell.com>
Date: Tue, 15 May 2012 13:09:43 +0800
Subject: mmc: sdio: fix setting card data bus width as 4-bit

SDIO_CCCR_IF[1:0] in SDIO card is used for card data bus width
setting as below:

     00b: 1-bit bus
     01b: Reserved
     10b: 4-bit bus
     11b: 8-bit bus (only for embedded SDIO)

And sdio_enable_wide is for setting data bus width as 4-bit.
But currently, it first reads the register, second OR' 1b with
SDIO_CCCR_IF[1], and then writes it back.

As we can see, this is based on such assumption that the
SDIO_CCCR_IF[0] is always 0. Apparently, this is not right.

Signed-off-by: Yong Ding <yongd@marvell.com>
Acked-by: Philip Rakity <prakity@marvell.com>
Signed-off-by: Chris Ball <cjb@laptop.org>
---
 drivers/mmc/core/sdio.c  | 6 ++++++
 include/linux/mmc/sdio.h | 2 ++
 2 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 13d0e95380ab..41c5fd8848f4 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -218,6 +218,12 @@ static int sdio_enable_wide(struct mmc_card *card)
 	if (ret)
 		return ret;
 
+	if ((ctrl & SDIO_BUS_WIDTH_MASK) == SDIO_BUS_WIDTH_RESERVED)
+		pr_warning("%s: SDIO_CCCR_IF is invalid: 0x%02x\n",
+			   mmc_hostname(card->host), ctrl);
+
+	/* set as 4-bit bus width */
+	ctrl &= ~SDIO_BUS_WIDTH_MASK;
 	ctrl |= SDIO_BUS_WIDTH_4BIT;
 
 	ret = mmc_io_rw_direct(card, 1, 0, SDIO_CCCR_IF, ctrl, NULL);
diff --git a/include/linux/mmc/sdio.h b/include/linux/mmc/sdio.h
index c9fe66c58f8f..17446d3c3602 100644
--- a/include/linux/mmc/sdio.h
+++ b/include/linux/mmc/sdio.h
@@ -98,7 +98,9 @@
 
 #define SDIO_CCCR_IF		0x07	/* bus interface controls */
 
+#define  SDIO_BUS_WIDTH_MASK	0x03	/* data bus width setting */
 #define  SDIO_BUS_WIDTH_1BIT	0x00
+#define  SDIO_BUS_WIDTH_RESERVED 0x01
 #define  SDIO_BUS_WIDTH_4BIT	0x02
 #define  SDIO_BUS_ECSI		0x20	/* Enable continuous SPI interrupt */
 #define  SDIO_BUS_SCSI		0x40	/* Support continuous SPI interrupt */
-- 
cgit 


From c1174876874dcf8986806e4dad3d7d07af20b439 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 31 May 2012 14:47:33 +0200
Subject: sched: Fix domain iteration

Weird topologies can lead to asymmetric domain setups. This needs
further consideration since these setups are typically non-minimal
too.

For now, make it work by adding an extra mask selecting which CPUs
are allowed to iterate up.

The topology that triggered it is the one from David Rientjes:

	10 20 20 30
	20 10 20 20
	20 20 10 20
	30 20 20 10

resulting in boxes that wouldn't even boot.

Reported-by: David Rientjes <rientjes@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-3p86l9cuaqnxz7uxsojmz5rm@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 11 +++++++++
 kernel/sched/core.c   | 64 ++++++++++++++++++++++++++++++++++++++++++++-------
 kernel/sched/fair.c   |  5 ++--
 kernel/sched/sched.h  |  2 ++
 4 files changed, 72 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6029d8c54476..ac321d753470 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -876,6 +876,8 @@ struct sched_group_power {
 	 * Number of busy cpus in this group.
 	 */
 	atomic_t nr_busy_cpus;
+
+	unsigned long cpumask[0]; /* iteration mask */
 };
 
 struct sched_group {
@@ -900,6 +902,15 @@ static inline struct cpumask *sched_group_cpus(struct sched_group *sg)
 	return to_cpumask(sg->cpumask);
 }
 
+/*
+ * cpumask masking which cpus in the group are allowed to iterate up the domain
+ * tree.
+ */
+static inline struct cpumask *sched_group_mask(struct sched_group *sg)
+{
+	return to_cpumask(sg->sgp->cpumask);
+}
+
 /**
  * group_first_cpu - Returns the first cpu in the cpumask of a sched_group.
  * @group: The group whose first cpu is to be returned.
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 6546083af3e0..781acb91a50a 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5994,6 +5994,44 @@ struct sched_domain_topology_level {
 	struct sd_data      data;
 };
 
+/*
+ * Build an iteration mask that can exclude certain CPUs from the upwards
+ * domain traversal.
+ *
+ * Asymmetric node setups can result in situations where the domain tree is of
+ * unequal depth, make sure to skip domains that already cover the entire
+ * range.
+ *
+ * In that case build_sched_domains() will have terminated the iteration early
+ * and our sibling sd spans will be empty. Domains should always include the
+ * cpu they're built on, so check that.
+ *
+ */
+static void build_group_mask(struct sched_domain *sd, struct sched_group *sg)
+{
+	const struct cpumask *span = sched_domain_span(sd);
+	struct sd_data *sdd = sd->private;
+	struct sched_domain *sibling;
+	int i;
+
+	for_each_cpu(i, span) {
+		sibling = *per_cpu_ptr(sdd->sd, i);
+		if (!cpumask_test_cpu(i, sched_domain_span(sibling)))
+			continue;
+
+		cpumask_set_cpu(i, sched_group_mask(sg));
+	}
+}
+
+/*
+ * Return the canonical balance cpu for this group, this is the first cpu
+ * of this group that's also in the iteration mask.
+ */
+int group_balance_cpu(struct sched_group *sg)
+{
+	return cpumask_first_and(sched_group_cpus(sg), sched_group_mask(sg));
+}
+
 static int
 build_overlap_sched_groups(struct sched_domain *sd, int cpu)
 {
@@ -6012,6 +6050,12 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
 		if (cpumask_test_cpu(i, covered))
 			continue;
 
+		child = *per_cpu_ptr(sdd->sd, i);
+
+		/* See the comment near build_group_mask(). */
+		if (!cpumask_test_cpu(i, sched_domain_span(child)))
+			continue;
+
 		sg = kzalloc_node(sizeof(struct sched_group) + cpumask_size(),
 				GFP_KERNEL, cpu_to_node(cpu));
 
@@ -6019,8 +6063,6 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
 			goto fail;
 
 		sg_span = sched_group_cpus(sg);
-
-		child = *per_cpu_ptr(sdd->sd, i);
 		if (child->child) {
 			child = child->child;
 			cpumask_copy(sg_span, sched_domain_span(child));
@@ -6030,13 +6072,18 @@ build_overlap_sched_groups(struct sched_domain *sd, int cpu)
 		cpumask_or(covered, covered, sg_span);
 
 		sg->sgp = *per_cpu_ptr(sdd->sgp, i);
-		atomic_inc(&sg->sgp->ref);
+		if (atomic_inc_return(&sg->sgp->ref) == 1)
+			build_group_mask(sd, sg);
+
 
+		/*
+		 * Make sure the first group of this domain contains the
+		 * canonical balance cpu. Otherwise the sched_domain iteration
+		 * breaks. See update_sg_lb_stats().
+		 */
 		if ((!groups && cpumask_test_cpu(cpu, sg_span)) ||
-			       cpumask_first(sg_span) == cpu) {
-			WARN_ON_ONCE(!cpumask_test_cpu(cpu, sg_span));
+		    group_balance_cpu(sg) == cpu)
 			groups = sg;
-		}
 
 		if (!first)
 			first = sg;
@@ -6109,6 +6156,7 @@ build_sched_groups(struct sched_domain *sd, int cpu)
 
 		cpumask_clear(sched_group_cpus(sg));
 		sg->sgp->power = 0;
+		cpumask_setall(sched_group_mask(sg));
 
 		for_each_cpu(j, span) {
 			if (get_group(j, sdd, NULL) != group)
@@ -6150,7 +6198,7 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
 		sg = sg->next;
 	} while (sg != sd->groups);
 
-	if (cpu != group_first_cpu(sg))
+	if (cpu != group_balance_cpu(sg))
 		return;
 
 	update_group_power(sd, cpu);
@@ -6525,7 +6573,7 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
 
 			*per_cpu_ptr(sdd->sg, j) = sg;
 
-			sgp = kzalloc_node(sizeof(struct sched_group_power),
+			sgp = kzalloc_node(sizeof(struct sched_group_power) + cpumask_size(),
 					GFP_KERNEL, cpu_to_node(j));
 			if (!sgp)
 				return -ENOMEM;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b2a2d236f27b..54cbaa4e7b37 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3652,7 +3652,7 @@ static inline void update_sg_lb_stats(struct lb_env *env,
 	int i;
 
 	if (local_group)
-		balance_cpu = group_first_cpu(group);
+		balance_cpu = group_balance_cpu(group);
 
 	/* Tally up the load of all CPUs in the group */
 	max_cpu_load = 0;
@@ -3667,7 +3667,8 @@ static inline void update_sg_lb_stats(struct lb_env *env,
 
 		/* Bias balancing toward cpus of our domain */
 		if (local_group) {
-			if (idle_cpu(i) && !first_idle_cpu) {
+			if (idle_cpu(i) && !first_idle_cpu &&
+					cpumask_test_cpu(i, sched_group_mask(group))) {
 				first_idle_cpu = 1;
 				balance_cpu = i;
 			}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ba9dccfd24ce..6d52cea7f33d 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -526,6 +526,8 @@ static inline struct sched_domain *highest_flag_domain(int cpu, int flag)
 DECLARE_PER_CPU(struct sched_domain *, sd_llc);
 DECLARE_PER_CPU(int, sd_llc_id);
 
+extern int group_balance_cpu(struct sched_group *sg);
+
 #endif /* CONFIG_SMP */
 
 #include "stats.h"
-- 
cgit 


From 0b0d9cf6ec7bab91977da2d71c09157f110f7c2e Mon Sep 17 00:00:00 2001
From: Arun Sharma <asharma@fb.com>
Date: Fri, 20 Apr 2012 15:41:34 -0700
Subject: perf: Limit callchains to 127

Stack depth of 255 seems excessive, given that copy_from_user_nmi()
could be slow.

Signed-off-by: Arun Sharma <asharma@fb.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1334961696-19580-3-git-send-email-asharma@fb.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 1817d4015e5f..45db49f64bb4 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -555,7 +555,7 @@ enum perf_event_type {
 	PERF_RECORD_MAX,			/* non-ABI */
 };
 
-#define PERF_MAX_STACK_DEPTH		255
+#define PERF_MAX_STACK_DEPTH		127
 
 enum perf_callchain_context {
 	PERF_CONTEXT_HV			= (__u64)-32,
-- 
cgit 


From aa9b16306e3243229580ff889cc59fd66bf77973 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paul.mckenney@linaro.org>
Date: Thu, 10 May 2012 16:41:44 -0700
Subject: rcu: Precompute RCU_FAST_NO_HZ timer offsets

When a CPU is entering dyntick-idle mode, tick_nohz_stop_sched_tick()
calls rcu_needs_cpu() see if RCU needs that CPU, and, if not, computes the
next wakeup time based on the timer wheels.  Only later, when actually
entering the idle loop, rcu_prepare_for_idle() will be invoked.  In some
cases, rcu_prepare_for_idle() will post timers to wake the CPU back up.
But all for naught: The next wakeup time for the CPU has already been
computed, and posting a timer afterwards does not force that wakeup
time to be recomputed.  This means that rcu_prepare_for_idle()'s have
no effect.

This is not a problem on a busy system because something else will wake
up the CPU soon enough.  However, on lightly loaded systems, the CPU
might stay asleep for a considerable length of time.  If that CPU has
a callback that the rest of the system is waiting on, the system might
run very slowly or (in theory) even hang.

This commit avoids this problem by having rcu_needs_cpu() give
tick_nohz_stop_sched_tick() an estimate of when RCU will need the CPU
to wake back up, which tick_nohz_stop_sched_tick() takes into account
when programming the CPU's wakeup time.  An alternative approach is
for rcu_prepare_for_idle() to use hrtimers instead of normal timers,
but timers are much more efficient than are hrtimers for frequently
and repeatedly posting and cancelling a given timer, which is exactly
what RCU_FAST_NO_HZ does.

Reported-by: Pascal Chapperon <pascal.chapperon@wanadoo.fr>
Reported-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Tested-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Tested-by: Pascal Chapperon <pascal.chapperon@wanadoo.fr>
---
 include/linux/rcutiny.h  |  6 +++--
 include/linux/rcutree.h  |  2 +-
 kernel/rcutree_plugin.h  | 66 +++++++++++++++++++++++++++++++-----------------
 kernel/time/tick-sched.c |  7 ++++-
 4 files changed, 54 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index adb5e5a38cae..854dc4c5c271 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -87,8 +87,9 @@ static inline void kfree_call_rcu(struct rcu_head *head,
 
 #ifdef CONFIG_TINY_RCU
 
-static inline int rcu_needs_cpu(int cpu)
+static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
 {
+	*delta_jiffies = ULONG_MAX;
 	return 0;
 }
 
@@ -96,8 +97,9 @@ static inline int rcu_needs_cpu(int cpu)
 
 int rcu_preempt_needs_cpu(void);
 
-static inline int rcu_needs_cpu(int cpu)
+static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
 {
+	*delta_jiffies = ULONG_MAX;
 	return rcu_preempt_needs_cpu();
 }
 
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 3c6083cde4fc..952b79339304 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -32,7 +32,7 @@
 
 extern void rcu_init(void);
 extern void rcu_note_context_switch(int cpu);
-extern int rcu_needs_cpu(int cpu);
+extern int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies);
 extern void rcu_cpu_stall_reset(void);
 
 /*
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 6bd9637d5d83..5271a020887e 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -1886,8 +1886,9 @@ static void __cpuinit rcu_prepare_kthreads(int cpu)
  * Because we not have RCU_FAST_NO_HZ, just check whether this CPU needs
  * any flavor of RCU.
  */
-int rcu_needs_cpu(int cpu)
+int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
 {
+	*delta_jiffies = ULONG_MAX;
 	return rcu_cpu_has_callbacks(cpu);
 }
 
@@ -1962,28 +1963,6 @@ static void rcu_idle_count_callbacks_posted(void)
 #define RCU_IDLE_GP_DELAY 6		/* Roughly one grace period. */
 #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ)	/* Roughly six seconds. */
 
-/*
- * Allow the CPU to enter dyntick-idle mode if either: (1) There are no
- * callbacks on this CPU, (2) this CPU has not yet attempted to enter
- * dyntick-idle mode, or (3) this CPU is in the process of attempting to
- * enter dyntick-idle mode.  Otherwise, if we have recently tried and failed
- * to enter dyntick-idle mode, we refuse to try to enter it.  After all,
- * it is better to incur scheduling-clock interrupts than to spin
- * continuously for the same time duration!
- */
-int rcu_needs_cpu(int cpu)
-{
-	struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
-
-	/* Flag a new idle sojourn to the idle-entry state machine. */
-	rdtp->idle_first_pass = 1;
-	/* If no callbacks, RCU doesn't need the CPU. */
-	if (!rcu_cpu_has_callbacks(cpu))
-		return 0;
-	/* Otherwise, RCU needs the CPU only if it recently tried and failed. */
-	return rdtp->dyntick_holdoff == jiffies;
-}
-
 /*
  * Does the specified flavor of RCU have non-lazy callbacks pending on
  * the specified CPU?  Both RCU flavor and CPU are specified by the
@@ -2026,6 +2005,47 @@ static bool rcu_cpu_has_nonlazy_callbacks(int cpu)
 	       rcu_preempt_cpu_has_nonlazy_callbacks(cpu);
 }
 
+/*
+ * Allow the CPU to enter dyntick-idle mode if either: (1) There are no
+ * callbacks on this CPU, (2) this CPU has not yet attempted to enter
+ * dyntick-idle mode, or (3) this CPU is in the process of attempting to
+ * enter dyntick-idle mode.  Otherwise, if we have recently tried and failed
+ * to enter dyntick-idle mode, we refuse to try to enter it.  After all,
+ * it is better to incur scheduling-clock interrupts than to spin
+ * continuously for the same time duration!
+ *
+ * The delta_jiffies argument is used to store the time when RCU is
+ * going to need the CPU again if it still has callbacks.  The reason
+ * for this is that rcu_prepare_for_idle() might need to post a timer,
+ * but if so, it will do so after tick_nohz_stop_sched_tick() has set
+ * the wakeup time for this CPU.  This means that RCU's timer can be
+ * delayed until the wakeup time, which defeats the purpose of posting
+ * a timer.
+ */
+int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
+{
+	struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
+
+	/* Flag a new idle sojourn to the idle-entry state machine. */
+	rdtp->idle_first_pass = 1;
+	/* If no callbacks, RCU doesn't need the CPU. */
+	if (!rcu_cpu_has_callbacks(cpu)) {
+		*delta_jiffies = ULONG_MAX;
+		return 0;
+	}
+	if (rdtp->dyntick_holdoff == jiffies) {
+		/* RCU recently tried and failed, so don't try again. */
+		*delta_jiffies = 1;
+		return 1;
+	}
+	/* Set up for the possibility that RCU will post a timer. */
+	if (rcu_cpu_has_nonlazy_callbacks(cpu))
+		*delta_jiffies = RCU_IDLE_GP_DELAY;
+	else
+		*delta_jiffies = RCU_IDLE_LAZY_GP_DELAY;
+	return 0;
+}
+
 /*
  * Handler for smp_call_function_single().  The only point of this
  * handler is to wake the CPU up, so the handler does only tracing.
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 6a3a5b9ff561..52f5ebbd443b 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -274,6 +274,7 @@ EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
 static void tick_nohz_stop_sched_tick(struct tick_sched *ts)
 {
 	unsigned long seq, last_jiffies, next_jiffies, delta_jiffies;
+	unsigned long rcu_delta_jiffies;
 	ktime_t last_update, expires, now;
 	struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev;
 	u64 time_delta;
@@ -322,7 +323,7 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts)
 		time_delta = timekeeping_max_deferment();
 	} while (read_seqretry(&xtime_lock, seq));
 
-	if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) ||
+	if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) || printk_needs_cpu(cpu) ||
 	    arch_needs_cpu(cpu)) {
 		next_jiffies = last_jiffies + 1;
 		delta_jiffies = 1;
@@ -330,6 +331,10 @@ static void tick_nohz_stop_sched_tick(struct tick_sched *ts)
 		/* Get the next timer wheel timer */
 		next_jiffies = get_next_timer_interrupt(last_jiffies);
 		delta_jiffies = next_jiffies - last_jiffies;
+		if (rcu_delta_jiffies < delta_jiffies) {
+			next_jiffies = last_jiffies + rcu_delta_jiffies;
+			delta_jiffies = rcu_delta_jiffies;
+		}
 	}
 	/*
 	 * Do not stop the tick, if we are only one off
-- 
cgit 


From d1992b169d31f339dc5ea4e9f312567c8cf322a3 Mon Sep 17 00:00:00 2001
From: Hans Schillstrom <hans@schillstrom.com>
Date: Thu, 17 May 2012 22:35:46 +0000
Subject: netfilter: xt_HMARK: fix endianness and provide consistent hashing

This patch addresses two issues:

a) Fix usage of u32 and __be32 that causes endianess warnings via sparse.
b) Ensure consistent hashing in a cluster that is composed of big and
   little endian systems. Thus, we obtain the same hash mark in an
   heterogeneous cluster.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Hans Schillstrom <hans@schillstrom.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter/xt_HMARK.h |  5 +++
 net/netfilter/xt_HMARK.c           | 72 ++++++++++++++++++++++----------------
 2 files changed, 46 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/xt_HMARK.h b/include/linux/netfilter/xt_HMARK.h
index abb1650940d2..826fc5807577 100644
--- a/include/linux/netfilter/xt_HMARK.h
+++ b/include/linux/netfilter/xt_HMARK.h
@@ -27,7 +27,12 @@ union hmark_ports {
 		__u16	src;
 		__u16	dst;
 	} p16;
+	struct {
+		__be16	src;
+		__be16	dst;
+	} b16;
 	__u32	v32;
+	__be32	b32;
 };
 
 struct xt_hmark_info {
diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c
index 0a96a43108ed..1686ca1b53a1 100644
--- a/net/netfilter/xt_HMARK.c
+++ b/net/netfilter/xt_HMARK.c
@@ -32,13 +32,13 @@ MODULE_ALIAS("ipt_HMARK");
 MODULE_ALIAS("ip6t_HMARK");
 
 struct hmark_tuple {
-	u32			src;
-	u32			dst;
+	__be32			src;
+	__be32			dst;
 	union hmark_ports	uports;
-	uint8_t			proto;
+	u8			proto;
 };
 
-static inline u32 hmark_addr6_mask(const __u32 *addr32, const __u32 *mask)
+static inline __be32 hmark_addr6_mask(const __be32 *addr32, const __be32 *mask)
 {
 	return (addr32[0] & mask[0]) ^
 	       (addr32[1] & mask[1]) ^
@@ -46,8 +46,8 @@ static inline u32 hmark_addr6_mask(const __u32 *addr32, const __u32 *mask)
 	       (addr32[3] & mask[3]);
 }
 
-static inline u32
-hmark_addr_mask(int l3num, const __u32 *addr32, const __u32 *mask)
+static inline __be32
+hmark_addr_mask(int l3num, const __be32 *addr32, const __be32 *mask)
 {
 	switch (l3num) {
 	case AF_INET:
@@ -58,6 +58,22 @@ hmark_addr_mask(int l3num, const __u32 *addr32, const __u32 *mask)
 	return 0;
 }
 
+static inline void hmark_swap_ports(union hmark_ports *uports,
+				    const struct xt_hmark_info *info)
+{
+	union hmark_ports hp;
+	u16 src, dst;
+
+	hp.b32 = (uports->b32 & info->port_mask.b32) | info->port_set.b32;
+	src = ntohs(hp.b16.src);
+	dst = ntohs(hp.b16.dst);
+
+	if (dst > src)
+		uports->v32 = (dst << 16) | src;
+	else
+		uports->v32 = (src << 16) | dst;
+}
+
 static int
 hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t,
 		    const struct xt_hmark_info *info)
@@ -74,22 +90,19 @@ hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t,
 	otuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
 	rtuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
 
-	t->src = hmark_addr_mask(otuple->src.l3num, otuple->src.u3.all,
-				 info->src_mask.all);
-	t->dst = hmark_addr_mask(otuple->src.l3num, rtuple->src.u3.all,
-				 info->dst_mask.all);
+	t->src = hmark_addr_mask(otuple->src.l3num, otuple->src.u3.ip6,
+				 info->src_mask.ip6);
+	t->dst = hmark_addr_mask(otuple->src.l3num, rtuple->src.u3.ip6,
+				 info->dst_mask.ip6);
 
 	if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
 		return 0;
 
 	t->proto = nf_ct_protonum(ct);
 	if (t->proto != IPPROTO_ICMP) {
-		t->uports.p16.src = otuple->src.u.all;
-		t->uports.p16.dst = rtuple->src.u.all;
-		t->uports.v32 = (t->uports.v32 & info->port_mask.v32) |
-				info->port_set.v32;
-		if (t->uports.p16.dst < t->uports.p16.src)
-			swap(t->uports.p16.dst, t->uports.p16.src);
+		t->uports.b16.src = otuple->src.u.all;
+		t->uports.b16.dst = rtuple->src.u.all;
+		hmark_swap_ports(&t->uports, info);
 	}
 
 	return 0;
@@ -98,15 +111,19 @@ hmark_ct_set_htuple(const struct sk_buff *skb, struct hmark_tuple *t,
 #endif
 }
 
+/* This hash function is endian independent, to ensure consistent hashing if
+ * the cluster is composed of big and little endian systems. */
 static inline u32
 hmark_hash(struct hmark_tuple *t, const struct xt_hmark_info *info)
 {
 	u32 hash;
+	u32 src = ntohl(t->src);
+	u32 dst = ntohl(t->dst);
 
-	if (t->dst < t->src)
-		swap(t->src, t->dst);
+	if (dst < src)
+		swap(src, dst);
 
-	hash = jhash_3words(t->src, t->dst, t->uports.v32, info->hashrnd);
+	hash = jhash_3words(src, dst, t->uports.v32, info->hashrnd);
 	hash = hash ^ (t->proto & info->proto_mask);
 
 	return (((u64)hash * info->hmodulus) >> 32) + info->hoffset;
@@ -126,11 +143,7 @@ hmark_set_tuple_ports(const struct sk_buff *skb, unsigned int nhoff,
 	if (skb_copy_bits(skb, nhoff, &t->uports, sizeof(t->uports)) < 0)
 		return;
 
-	t->uports.v32 = (t->uports.v32 & info->port_mask.v32) |
-			info->port_set.v32;
-
-	if (t->uports.p16.dst < t->uports.p16.src)
-		swap(t->uports.p16.dst, t->uports.p16.src);
+	hmark_swap_ports(&t->uports, info);
 }
 
 #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
@@ -178,8 +191,8 @@ hmark_pkt_set_htuple_ipv6(const struct sk_buff *skb, struct hmark_tuple *t,
 			return -1;
 	}
 noicmp:
-	t->src = hmark_addr6_mask(ip6->saddr.s6_addr32, info->src_mask.all);
-	t->dst = hmark_addr6_mask(ip6->daddr.s6_addr32, info->dst_mask.all);
+	t->src = hmark_addr6_mask(ip6->saddr.s6_addr32, info->src_mask.ip6);
+	t->dst = hmark_addr6_mask(ip6->daddr.s6_addr32, info->dst_mask.ip6);
 
 	if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
 		return 0;
@@ -255,11 +268,8 @@ hmark_pkt_set_htuple_ipv4(const struct sk_buff *skb, struct hmark_tuple *t,
 		}
 	}
 
-	t->src = (__force u32) ip->saddr;
-	t->dst = (__force u32) ip->daddr;
-
-	t->src &= info->src_mask.ip;
-	t->dst &= info->dst_mask.ip;
+	t->src = ip->saddr & info->src_mask.ip;
+	t->dst = ip->daddr & info->dst_mask.ip;
 
 	if (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))
 		return 0;
-- 
cgit 


From bafb282df29c1524b1617019adebd6d0c3eb7a47 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <khlebnikov@openvz.org>
Date: Thu, 7 Jun 2012 14:21:11 -0700
Subject: c/r: prctl: update prctl_set_mm_exe_file() after
 mm->num_exe_file_vmas removal

A fix for commit b32dfe377102 ("c/r: prctl: add ability to set new
mm_struct::exe_file").

After removing mm->num_exe_file_vmas kernel keeps mm->exe_file until
final mmput(), it never becomes NULL while task is alive.

We can check for other mapped files in mm instead of checking
mm->num_exe_file_vmas, and mark mm with flag MMF_EXE_FILE_CHANGED in
order to forbid second changing of mm->exe_file.

Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Reviewed-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Matt Helsley <matthltc@us.ibm.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Pavel Emelyanov <xemul@parallels.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  1 +
 kernel/sys.c          | 31 +++++++++++++++++++------------
 2 files changed, 20 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6029d8c54476..c688d4cc2e40 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -439,6 +439,7 @@ extern int get_dumpable(struct mm_struct *mm);
 					/* leave room for more dump flags */
 #define MMF_VM_MERGEABLE	16	/* KSM may merge identical pages */
 #define MMF_VM_HUGEPAGE		17	/* set when VM_HUGEPAGE is set on vma */
+#define MMF_EXE_FILE_CHANGED	18	/* see prctl_set_mm_exe_file() */
 
 #define MMF_INIT_MASK		(MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
 
diff --git a/kernel/sys.c b/kernel/sys.c
index 9ff89cb9657a..54f20fdee93c 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1796,17 +1796,11 @@ static bool vma_flags_mismatch(struct vm_area_struct *vma,
 
 static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
 {
+	struct vm_area_struct *vma;
 	struct file *exe_file;
 	struct dentry *dentry;
 	int err;
 
-	/*
-	 * Setting new mm::exe_file is only allowed when no VM_EXECUTABLE vma's
-	 * remain. So perform a quick test first.
-	 */
-	if (mm->num_exe_file_vmas)
-		return -EBUSY;
-
 	exe_file = fget(fd);
 	if (!exe_file)
 		return -EBADF;
@@ -1827,17 +1821,30 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
 	if (err)
 		goto exit;
 
+	down_write(&mm->mmap_sem);
+
+	/*
+	 * Forbid mm->exe_file change if there are mapped other files.
+	 */
+	err = -EBUSY;
+	for (vma = mm->mmap; vma; vma = vma->vm_next) {
+		if (vma->vm_file && !path_equal(&vma->vm_file->f_path,
+						&exe_file->f_path))
+			goto exit_unlock;
+	}
+
 	/*
 	 * The symlink can be changed only once, just to disallow arbitrary
 	 * transitions malicious software might bring in. This means one
 	 * could make a snapshot over all processes running and monitor
 	 * /proc/pid/exe changes to notice unusual activity if needed.
 	 */
-	down_write(&mm->mmap_sem);
-	if (likely(!mm->exe_file))
-		set_mm_exe_file(mm, exe_file);
-	else
-		err = -EBUSY;
+	err = -EPERM;
+	if (test_and_set_bit(MMF_EXE_FILE_CHANGED, &mm->flags))
+		goto exit_unlock;
+
+	set_mm_exe_file(mm, exe_file);
+exit_unlock:
 	up_write(&mm->mmap_sem);
 
 exit:
-- 
cgit 


From 300f786b2683f8bb1ec0afb6e1851183a479c86d Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Thu, 7 Jun 2012 14:21:12 -0700
Subject: c/r: prctl: add ability to get clear_tid_address

Zero is written at clear_tid_address when the process exits.  This
functionality is used by pthread_join().

We already have sys_set_tid_address() to change this address for the
current task but there is no way to obtain it from user space.

Without the ability to find this address and dump it we can't restore
pthread'ed apps which call pthread_join() once they have been restored.

This patch introduces the PR_GET_TID_ADDRESS prctl option which allows
the current process to obtain own clear_tid_address.

This feature is available iif CONFIG_CHECKPOINT_RESTORE is set.

[akpm@linux-foundation.org: fix prctl numbering]
Signed-off-by: Andrew Vagin <avagin@openvz.org>
Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Pedro Alves <palves@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Tejun Heo <tj@kernel.org>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/prctl.h | 10 ++++++----
 kernel/sys.c          | 13 +++++++++++++
 2 files changed, 19 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/prctl.h b/include/linux/prctl.h
index 711e0a30aacc..3988012255dc 100644
--- a/include/linux/prctl.h
+++ b/include/linux/prctl.h
@@ -127,8 +127,8 @@
 #define PR_SET_PTRACER 0x59616d61
 # define PR_SET_PTRACER_ANY ((unsigned long)-1)
 
-#define PR_SET_CHILD_SUBREAPER 36
-#define PR_GET_CHILD_SUBREAPER 37
+#define PR_SET_CHILD_SUBREAPER	36
+#define PR_GET_CHILD_SUBREAPER	37
 
 /*
  * If no_new_privs is set, then operations that grant new privileges (i.e.
@@ -142,7 +142,9 @@
  * asking selinux for a specific new context (e.g. with runcon) will result
  * in execve returning -EPERM.
  */
-#define PR_SET_NO_NEW_PRIVS 38
-#define PR_GET_NO_NEW_PRIVS 39
+#define PR_SET_NO_NEW_PRIVS	38
+#define PR_GET_NO_NEW_PRIVS	39
+
+#define PR_GET_TID_ADDRESS	40
 
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index 19a2c7139960..0ec1942ba7ea 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1988,12 +1988,22 @@ out:
 	up_read(&mm->mmap_sem);
 	return error;
 }
+
+static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
+{
+	return put_user(me->clear_child_tid, tid_addr);
+}
+
 #else /* CONFIG_CHECKPOINT_RESTORE */
 static int prctl_set_mm(int opt, unsigned long addr,
 			unsigned long arg4, unsigned long arg5)
 {
 	return -EINVAL;
 }
+static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
+{
+	return -EINVAL;
+}
 #endif
 
 SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
@@ -2131,6 +2141,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 				else
 					return -EINVAL;
 				break;
+		case PR_GET_TID_ADDRESS:
+			error = prctl_get_tid_address(me, (int __user **)arg2);
+			break;
 			default:
 				return -EINVAL;
 			}
-- 
cgit 


From ae82fdb1406ad41d68f07027fe31f2d35ba22a90 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Fri, 8 Jun 2012 14:58:13 +0930
Subject: module_param: stop double-calling parameters.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 026cee0086fe1df4cf74691cf273062cc769617d "params:
<level>_initcall-like kernel parameters" set old-style module
parameters to level 0.  And we call those level 0 calls where we used
to, early in start_kernel().

We also loop through the initcall levels and call the levelled
module_params before the corresponding initcall.  Unfortunately level
0 is early_init(), so we call the standard module_param calls twice.

(Turns out most things don't care, but at least ubi.mtd does).

Change the level to -1 for standard module_param calls.

Reported-by: Benoît Thébaudeau <benoit.thebaudeau@advansee.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Cc: stable@kernel.org
---
 include/linux/moduleparam.h | 10 +++++-----
 init/main.c                 |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 1b14d25162cb..d6a58065c09c 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -128,7 +128,7 @@ struct kparam_array
  * The ops can have NULL set or get functions.
  */
 #define module_param_cb(name, ops, arg, perm)				      \
-	__module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, 0)
+	__module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, -1)
 
 /**
  * <level>_param_cb - general callback for a module/cmdline parameter
@@ -192,7 +192,7 @@ struct kparam_array
 		 { (void *)set, (void *)get };				\
 	__module_param_call(MODULE_PARAM_PREFIX,			\
 			    name, &__param_ops_##name, arg,		\
-			    (perm) + sizeof(__check_old_set_param(set))*0, 0)
+			    (perm) + sizeof(__check_old_set_param(set))*0, -1)
 
 /* We don't get oldget: it's often a new-style param_get_uint, etc. */
 static inline int
@@ -272,7 +272,7 @@ static inline void __kernel_param_unlock(void)
  */
 #define core_param(name, var, type, perm)				\
 	param_check_##type(name, &(var));				\
-	__module_param_call("", name, &param_ops_##type, &var, perm, 0)
+	__module_param_call("", name, &param_ops_##type, &var, perm, -1)
 #endif /* !MODULE */
 
 /**
@@ -290,7 +290,7 @@ static inline void __kernel_param_unlock(void)
 		= { len, string };					\
 	__module_param_call(MODULE_PARAM_PREFIX, name,			\
 			    &param_ops_string,				\
-			    .str = &__param_string_##name, perm, 0);	\
+			    .str = &__param_string_##name, perm, -1);	\
 	__MODULE_PARM_TYPE(name, "string")
 
 /**
@@ -432,7 +432,7 @@ extern int param_set_bint(const char *val, const struct kernel_param *kp);
 	__module_param_call(MODULE_PARAM_PREFIX, name,			\
 			    &param_array_ops,				\
 			    .arr = &__param_arr_##name,			\
-			    perm, 0);					\
+			    perm, -1);					\
 	__MODULE_PARM_TYPE(name, "array of " #type)
 
 extern struct kernel_param_ops param_array_ops;
diff --git a/init/main.c b/init/main.c
index 1ca6b32c4828..37e12098eac1 100644
--- a/init/main.c
+++ b/init/main.c
@@ -508,7 +508,7 @@ asmlinkage void __init start_kernel(void)
 	parse_early_param();
 	parse_args("Booting kernel", static_command_line, __start___param,
 		   __stop___param - __start___param,
-		   0, 0, &unknown_bootoption);
+		   -1, -1, &unknown_bootoption);
 
 	jump_label_init();
 
-- 
cgit 


From c8e9cf7bb240049117d2fa64d1540476c289396d Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Thu, 7 Jun 2012 12:15:15 +0200
Subject: vga_switcheroo: Add a helper function to get the client state

Add vga_switcheroo_get_client_state() to get the current state of the
client.  This is necessary to determine the proper initial state of
audio clients in HD-audio driver.

Acked-by: Dave Airlie <airlied@redhat.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 drivers/gpu/vga/vga_switcheroo.c | 13 +++++++++++++
 include/linux/vga_switcheroo.h   |  7 +++++++
 2 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index 38f9534ac513..eb4f64f0a565 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -190,6 +190,19 @@ find_active_client(struct list_head *head)
 	return NULL;
 }
 
+int vga_switcheroo_get_client_state(struct pci_dev *pdev)
+{
+	struct vga_switcheroo_client *client;
+
+	client = find_client_from_pci(&vgasr_priv.clients, pdev);
+	if (!client)
+		return VGA_SWITCHEROO_NOT_FOUND;
+	if (!vgasr_priv.active)
+		return VGA_SWITCHEROO_INIT;
+	return client->pwr_state;
+}
+EXPORT_SYMBOL(vga_switcheroo_get_client_state);
+
 void vga_switcheroo_unregister_client(struct pci_dev *pdev)
 {
 	struct vga_switcheroo_client *client;
diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
index b455c7c212eb..b176342ca031 100644
--- a/include/linux/vga_switcheroo.h
+++ b/include/linux/vga_switcheroo.h
@@ -12,6 +12,9 @@
 enum vga_switcheroo_state {
 	VGA_SWITCHEROO_OFF,
 	VGA_SWITCHEROO_ON,
+	/* below are referred only from vga_switcheroo_get_client_state() */
+	VGA_SWITCHEROO_INIT,
+	VGA_SWITCHEROO_NOT_FOUND,
 };
 
 enum vga_switcheroo_client_id {
@@ -50,6 +53,8 @@ void vga_switcheroo_unregister_handler(void);
 
 int vga_switcheroo_process_delayed_switch(void);
 
+int vga_switcheroo_get_client_state(struct pci_dev *dev);
+
 #else
 
 static inline void vga_switcheroo_unregister_client(struct pci_dev *dev) {}
@@ -62,5 +67,7 @@ static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
 	int id, bool active) { return 0; }
 static inline void vga_switcheroo_unregister_handler(void) {}
 static inline int vga_switcheroo_process_delayed_switch(void) { return 0; }
+static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_CLIENT_ON; }
+
 
 #endif
-- 
cgit 


From 505cff00de9c303b95c204eb4544066e3e707911 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Fri, 8 Jun 2012 12:49:17 +0200
Subject: vga_switcheroo: Fix error without CONFIG_VGA_SWITCHEROO

Fix a typo that is built only when CONFIG_VGA_SWITCHEROO=n.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/vga_switcheroo.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
index b176342ca031..60da41fe9dc2 100644
--- a/include/linux/vga_switcheroo.h
+++ b/include/linux/vga_switcheroo.h
@@ -67,7 +67,7 @@ static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev,
 	int id, bool active) { return 0; }
 static inline void vga_switcheroo_unregister_handler(void) {}
 static inline int vga_switcheroo_process_delayed_switch(void) { return 0; }
-static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_CLIENT_ON; }
+static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_ON; }
 
 
 #endif
-- 
cgit 


From 8876d6b5f81f4e242a6660da22bbd92f17a8d058 Mon Sep 17 00:00:00 2001
From: Paul Pluzhnikov <ppluzhnikov@google.com>
Date: Sat, 9 Jun 2012 07:53:03 -0700
Subject: net: Make linux/tcp.h C++ friendly (trivial)

I originally sent this patch to <trivial@kernel.org>, but Jiri Kosina did
not feel that this is fully appropriate for the trivial tree.

Using linux/tcp.h from C++ results in:

cat t.cc
#include <linux/tcp.h>
int main() { }

g++ -c t.cc

In file included from t.cc:1:
/usr/include/linux/tcp.h:72: error: '__u32 __fswab32(__u32)' cannot appear in a constant-expression
/usr/include/linux/tcp.h:72: error: a function call cannot appear in a constant-expression
...

Attached trivial patch fixes this problem.

Tested:
- the t.cc above compiles with g++ and
- the following program generates the same output before/after
  the patch:

#include <linux/tcp.h>
#include <stdio.h>

int main ()
{
#define P(a) printf("%s: %08x\n", #a, (int)a)
 P(TCP_FLAG_CWR);
 P(TCP_FLAG_ECE);
 P(TCP_FLAG_URG);
 P(TCP_FLAG_ACK);
 P(TCP_FLAG_PSH);
 P(TCP_FLAG_RST);
 P(TCP_FLAG_SYN);
 P(TCP_FLAG_FIN);
 P(TCP_RESERVED_BITS);
 P(TCP_DATA_OFFSET);
#undef P
 return 0;
}

Signed-off-by: Paul Pluzhnikov <ppluzhnikov@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 4c5b63283377..5f359dbfcdce 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -69,16 +69,16 @@ union tcp_word_hdr {
 #define tcp_flag_word(tp) ( ((union tcp_word_hdr *)(tp))->words [3]) 
 
 enum { 
-	TCP_FLAG_CWR = __cpu_to_be32(0x00800000),
-	TCP_FLAG_ECE = __cpu_to_be32(0x00400000),
-	TCP_FLAG_URG = __cpu_to_be32(0x00200000),
-	TCP_FLAG_ACK = __cpu_to_be32(0x00100000),
-	TCP_FLAG_PSH = __cpu_to_be32(0x00080000),
-	TCP_FLAG_RST = __cpu_to_be32(0x00040000),
-	TCP_FLAG_SYN = __cpu_to_be32(0x00020000),
-	TCP_FLAG_FIN = __cpu_to_be32(0x00010000),
-	TCP_RESERVED_BITS = __cpu_to_be32(0x0F000000),
-	TCP_DATA_OFFSET = __cpu_to_be32(0xF0000000)
+	TCP_FLAG_CWR = __constant_cpu_to_be32(0x00800000),
+	TCP_FLAG_ECE = __constant_cpu_to_be32(0x00400000),
+	TCP_FLAG_URG = __constant_cpu_to_be32(0x00200000),
+	TCP_FLAG_ACK = __constant_cpu_to_be32(0x00100000),
+	TCP_FLAG_PSH = __constant_cpu_to_be32(0x00080000),
+	TCP_FLAG_RST = __constant_cpu_to_be32(0x00040000),
+	TCP_FLAG_SYN = __constant_cpu_to_be32(0x00020000),
+	TCP_FLAG_FIN = __constant_cpu_to_be32(0x00010000),
+	TCP_RESERVED_BITS = __constant_cpu_to_be32(0x0F000000),
+	TCP_DATA_OFFSET = __constant_cpu_to_be32(0xF0000000)
 }; 
 
 /*
-- 
cgit 


From 601722157b3f6be73623644eeae6f14940f0bd8f Mon Sep 17 00:00:00 2001
From: Qiao Zhou <zhouqiao@marvell.com>
Date: Mon, 4 Jun 2012 10:41:03 +0800
Subject: ARM: MMP: add pxa910-ssp into ssp_id_table

add pxa910-ssp into ssp_id_table, and fix pxa-ssp compiling issue
under mach-mmp architect.

Signed-off-by: Qiao Zhou <zhouqiao@marvell.com>
Acked-by: Haojian Zhuang <haojian.zhuang@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 arch/arm/plat-pxa/ssp.c        | 1 +
 include/linux/pxa2xx_ssp.h     | 1 +
 include/linux/spi/pxa2xx_spi.h | 2 +-
 3 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/arm/plat-pxa/ssp.c b/arch/arm/plat-pxa/ssp.c
index 58b79809d20c..584c9bf8ed2d 100644
--- a/arch/arm/plat-pxa/ssp.c
+++ b/arch/arm/plat-pxa/ssp.c
@@ -193,6 +193,7 @@ static const struct platform_device_id ssp_id_table[] = {
 	{ "pxa25x-nssp",	PXA25x_NSSP },
 	{ "pxa27x-ssp",		PXA27x_SSP },
 	{ "pxa168-ssp",		PXA168_SSP },
+	{ "pxa910-ssp",		PXA910_SSP },
 	{ },
 };
 
diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index 44835fb39793..f9fe15ec2b51 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -161,6 +161,7 @@ enum pxa_ssp_type {
 	PXA25x_NSSP, /* pxa 255, 26x (including ASSP) */
 	PXA27x_SSP,
 	PXA168_SSP,
+	PXA910_SSP,
 	CE4100_SSP,
 };
 
diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h
index d3e1075f7b60..c73d1445c77e 100644
--- a/include/linux/spi/pxa2xx_spi.h
+++ b/include/linux/spi/pxa2xx_spi.h
@@ -43,7 +43,7 @@ struct pxa2xx_spi_chip {
 	void (*cs_control)(u32 command);
 };
 
-#ifdef CONFIG_ARCH_PXA
+#if defined(CONFIG_ARCH_PXA) || defined(CONFIG_ARCH_MMP)
 
 #include <linux/clk.h>
 #include <mach/dma.h>
-- 
cgit 


From 972a55b62d592cfcd6d73577df8a52f1251ea9a7 Mon Sep 17 00:00:00 2001
From: Qiao Zhou <zhouqiao@marvell.com>
Date: Mon, 4 Jun 2012 10:41:04 +0800
Subject: ASoC: fix pxa-ssp compiling issue under mach-mmp

pxa-ssp.c uses API like cpu_is_pxa3xx(), cpu_is_pxa2xx(), which is
defined under arch-pxa architecture, and drivers under mach-mmp
can't find it. so just use ssp->type to replace that API.

Signed-off-by: Qiao Zhou <zhouqiao@marvell.com>
Acked-by: Haojian Zhuang <haojian.zhuang@gmail.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 include/linux/pxa2xx_ssp.h |  1 +
 sound/soc/pxa/pxa-ssp.c    | 38 +++++++++++---------------------------
 2 files changed, 12 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index f9fe15ec2b51..f36632061c66 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -160,6 +160,7 @@ enum pxa_ssp_type {
 	PXA25x_SSP,  /* pxa 210, 250, 255, 26x */
 	PXA25x_NSSP, /* pxa 255, 26x (including ASSP) */
 	PXA27x_SSP,
+	PXA3xx_SSP,
 	PXA168_SSP,
 	PXA910_SSP,
 	CE4100_SSP,
diff --git a/sound/soc/pxa/pxa-ssp.c b/sound/soc/pxa/pxa-ssp.c
index 1c2aa7fab3fd..4da5fc55c7ee 100644
--- a/sound/soc/pxa/pxa-ssp.c
+++ b/sound/soc/pxa/pxa-ssp.c
@@ -33,7 +33,6 @@
 
 #include <mach/hardware.h>
 #include <mach/dma.h>
-#include <mach/audio.h>
 
 #include "../../arm/pxa2xx-pcm.h"
 #include "pxa-ssp.h"
@@ -194,7 +193,7 @@ static void pxa_ssp_set_scr(struct ssp_device *ssp, u32 div)
 {
 	u32 sscr0 = pxa_ssp_read_reg(ssp, SSCR0);
 
-	if (cpu_is_pxa25x() && ssp->type == PXA25x_SSP) {
+	if (ssp->type == PXA25x_SSP) {
 		sscr0 &= ~0x0000ff00;
 		sscr0 |= ((div - 2)/2) << 8; /* 2..512 */
 	} else {
@@ -212,7 +211,7 @@ static u32 pxa_ssp_get_scr(struct ssp_device *ssp)
 	u32 sscr0 = pxa_ssp_read_reg(ssp, SSCR0);
 	u32 div;
 
-	if (cpu_is_pxa25x() && ssp->type == PXA25x_SSP)
+	if (ssp->type == PXA25x_SSP)
 		div = ((sscr0 >> 8) & 0xff) * 2 + 2;
 	else
 		div = ((sscr0 >> 8) & 0xfff) + 1;
@@ -242,7 +241,7 @@ static int pxa_ssp_set_dai_sysclk(struct snd_soc_dai *cpu_dai,
 		break;
 	case PXA_SSP_CLK_PLL:
 		/* Internal PLL is fixed */
-		if (cpu_is_pxa25x())
+		if (ssp->type == PXA25x_SSP)
 			priv->sysclk = 1843200;
 		else
 			priv->sysclk = 13000000;
@@ -266,11 +265,11 @@ static int pxa_ssp_set_dai_sysclk(struct snd_soc_dai *cpu_dai,
 
 	/* The SSP clock must be disabled when changing SSP clock mode
 	 * on PXA2xx.  On PXA3xx it must be enabled when doing so. */
-	if (!cpu_is_pxa3xx())
+	if (ssp->type != PXA3xx_SSP)
 		clk_disable(ssp->clk);
 	val = pxa_ssp_read_reg(ssp, SSCR0) | sscr0;
 	pxa_ssp_write_reg(ssp, SSCR0, val);
-	if (!cpu_is_pxa3xx())
+	if (ssp->type != PXA3xx_SSP)
 		clk_enable(ssp->clk);
 
 	return 0;
@@ -294,24 +293,20 @@ static int pxa_ssp_set_dai_clkdiv(struct snd_soc_dai *cpu_dai,
 	case PXA_SSP_AUDIO_DIV_SCDB:
 		val = pxa_ssp_read_reg(ssp, SSACD);
 		val &= ~SSACD_SCDB;
-#if defined(CONFIG_PXA3xx)
-		if (cpu_is_pxa3xx())
+		if (ssp->type == PXA3xx_SSP)
 			val &= ~SSACD_SCDX8;
-#endif
 		switch (div) {
 		case PXA_SSP_CLK_SCDB_1:
 			val |= SSACD_SCDB;
 			break;
 		case PXA_SSP_CLK_SCDB_4:
 			break;
-#if defined(CONFIG_PXA3xx)
 		case PXA_SSP_CLK_SCDB_8:
-			if (cpu_is_pxa3xx())
+			if (ssp->type == PXA3xx_SSP)
 				val |= SSACD_SCDX8;
 			else
 				return -EINVAL;
 			break;
-#endif
 		default:
 			return -EINVAL;
 		}
@@ -337,10 +332,8 @@ static int pxa_ssp_set_dai_pll(struct snd_soc_dai *cpu_dai, int pll_id,
 	struct ssp_device *ssp = priv->ssp;
 	u32 ssacd = pxa_ssp_read_reg(ssp, SSACD) & ~0x70;
 
-#if defined(CONFIG_PXA3xx)
-	if (cpu_is_pxa3xx())
+	if (ssp->type == PXA3xx_SSP)
 		pxa_ssp_write_reg(ssp, SSACDD, 0);
-#endif
 
 	switch (freq_out) {
 	case 5622000:
@@ -365,11 +358,10 @@ static int pxa_ssp_set_dai_pll(struct snd_soc_dai *cpu_dai, int pll_id,
 		break;
 
 	default:
-#ifdef CONFIG_PXA3xx
 		/* PXA3xx has a clock ditherer which can be used to generate
 		 * a wider range of frequencies - calculate a value for it.
 		 */
-		if (cpu_is_pxa3xx()) {
+		if (ssp->type == PXA3xx_SSP) {
 			u32 val;
 			u64 tmp = 19968;
 			tmp *= 1000000;
@@ -386,7 +378,6 @@ static int pxa_ssp_set_dai_pll(struct snd_soc_dai *cpu_dai, int pll_id,
 				val, freq_out);
 			break;
 		}
-#endif
 
 		return -EINVAL;
 	}
@@ -590,10 +581,8 @@ static int pxa_ssp_hw_params(struct snd_pcm_substream *substream,
 	/* bit size */
 	switch (params_format(params)) {
 	case SNDRV_PCM_FORMAT_S16_LE:
-#ifdef CONFIG_PXA3xx
-		if (cpu_is_pxa3xx())
+		if (ssp->type == PXA3xx_SSP)
 			sscr0 |= SSCR0_FPCKE;
-#endif
 		sscr0 |= SSCR0_DataSize(16);
 		break;
 	case SNDRV_PCM_FORMAT_S24_LE:
@@ -618,9 +607,7 @@ static int pxa_ssp_hw_params(struct snd_pcm_substream *substream,
 			* trying and failing a lot; some of the registers
 			* needed for that mode are only available on PXA3xx.
 			*/
-
-#ifdef CONFIG_PXA3xx
-			if (!cpu_is_pxa3xx())
+			if (ssp->type != PXA3xx_SSP)
 				return -EINVAL;
 
 			sspsp |= SSPSP_SFRMWDTH(width * 2);
@@ -628,9 +615,6 @@ static int pxa_ssp_hw_params(struct snd_pcm_substream *substream,
 			sspsp |= SSPSP_EDMYSTOP(3);
 			sspsp |= SSPSP_DMYSTOP(3);
 			sspsp |= SSPSP_DMYSTRT(1);
-#else
-			return -EINVAL;
-#endif
 		} else {
 			/* The frame width is the width the LRCLK is
 			 * asserted for; the delay is expressed in
-- 
cgit 


From c2fb8a3fa25513de8fedb38509b1f15a5bbee47b Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 13 Jun 2012 11:20:19 -0400
Subject: USB: add NO_D3_DURING_SLEEP flag and revert 151b61284776be2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch (as1558) fixes a problem affecting several ASUS computers:
The machine crashes or corrupts memory when going into suspend if the
ehci-hcd driver is bound to any controllers.  Users have been forced
to unbind or unload ehci-hcd before putting their systems to sleep.

After extensive testing, it was determined that the machines don't
like going into suspend when any EHCI controllers are in the PCI D3
power state.  Presumably this is a firmware bug, but there's nothing
we can do about it except to avoid putting the controllers in D3
during system sleep.

The patch adds a new flag to indicate whether the problem is present,
and avoids changing the controller's power state if the flag is set.
Runtime suspend is unaffected; this matters only for system suspend.
However as a side effect, the controller will not respond to remote
wakeup requests while the system is asleep.  Hence USB wakeup is not
functional -- but of course, this is already true in the current state
of affairs.

A similar patch has already been applied as commit
151b61284776be2d6f02d48c23c3625678960b97 (USB: EHCI: fix crash during
suspend on ASUS computers).  The patch supersedes that one and reverts
it.  There are two differences:

	The old patch added the flag at the USB level; this patch
	adds it at the PCI level.

	The old patch applied to all chipsets with the same vendor,
	subsystem vendor, and product IDs; this patch makes an
	exception for a known-good system (based on DMI information).

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Tested-by: Dâniel Fraga <fragabr@gmail.com>
Tested-by: Andrey Rahmatullin <wrar@wrar.name>
Tested-by: Steven Rostedt <rostedt@goodmis.org>
Cc: stable <stable@vger.kernel.org>
Reviewed-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pci/pci.c           |  5 +++++
 drivers/pci/quirks.c        | 26 ++++++++++++++++++++++++++
 drivers/usb/core/hcd-pci.c  |  9 ---------
 drivers/usb/host/ehci-pci.c |  8 --------
 include/linux/pci.h         |  2 ++
 include/linux/usb/hcd.h     |  2 --
 6 files changed, 33 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 447e83472c01..77cb54a65cde 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1744,6 +1744,11 @@ int pci_prepare_to_sleep(struct pci_dev *dev)
 	if (target_state == PCI_POWER_ERROR)
 		return -EIO;
 
+	/* Some devices mustn't be in D3 during system sleep */
+	if (target_state == PCI_D3hot &&
+			(dev->dev_flags & PCI_DEV_FLAGS_NO_D3_DURING_SLEEP))
+		return 0;
+
 	pci_enable_wake(dev, target_state, device_may_wakeup(&dev->dev));
 
 	error = pci_set_power_state(dev, target_state);
diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index 2a7521677541..194b243a2817 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -2929,6 +2929,32 @@ static void __devinit disable_igfx_irq(struct pci_dev *dev)
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x0102, disable_igfx_irq);
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x010a, disable_igfx_irq);
 
+/*
+ * The Intel 6 Series/C200 Series chipset's EHCI controllers on many
+ * ASUS motherboards will cause memory corruption or a system crash
+ * if they are in D3 while the system is put into S3 sleep.
+ */
+static void __devinit asus_ehci_no_d3(struct pci_dev *dev)
+{
+	const char *sys_info;
+	static const char good_Asus_board[] = "P8Z68-V";
+
+	if (dev->dev_flags & PCI_DEV_FLAGS_NO_D3_DURING_SLEEP)
+		return;
+	if (dev->subsystem_vendor != PCI_VENDOR_ID_ASUSTEK)
+		return;
+	sys_info = dmi_get_system_info(DMI_BOARD_NAME);
+	if (sys_info && memcmp(sys_info, good_Asus_board,
+			sizeof(good_Asus_board) - 1) == 0)
+		return;
+
+	dev_info(&dev->dev, "broken D3 during system sleep on ASUS\n");
+	dev->dev_flags |= PCI_DEV_FLAGS_NO_D3_DURING_SLEEP;
+	device_set_wakeup_capable(&dev->dev, false);
+}
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1c26, asus_ehci_no_d3);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, 0x1c2d, asus_ehci_no_d3);
+
 static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f,
 			  struct pci_fixup *end)
 {
diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c
index 57ed9e400c06..622b4a48e732 100644
--- a/drivers/usb/core/hcd-pci.c
+++ b/drivers/usb/core/hcd-pci.c
@@ -493,15 +493,6 @@ static int hcd_pci_suspend_noirq(struct device *dev)
 
 	pci_save_state(pci_dev);
 
-	/*
-	 * Some systems crash if an EHCI controller is in D3 during
-	 * a sleep transition.  We have to leave such controllers in D0.
-	 */
-	if (hcd->broken_pci_sleep) {
-		dev_dbg(dev, "Staying in PCI D0\n");
-		return retval;
-	}
-
 	/* If the root hub is dead rather than suspended, disallow remote
 	 * wakeup.  usb_hc_died() should ensure that both hosts are marked as
 	 * dying, so we only need to check the primary roothub.
diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c
index bc94d7bf072d..123481793a47 100644
--- a/drivers/usb/host/ehci-pci.c
+++ b/drivers/usb/host/ehci-pci.c
@@ -144,14 +144,6 @@ static int ehci_pci_setup(struct usb_hcd *hcd)
 			hcd->has_tt = 1;
 			tdi_reset(ehci);
 		}
-		if (pdev->subsystem_vendor == PCI_VENDOR_ID_ASUSTEK) {
-			/* EHCI #1 or #2 on 6 Series/C200 Series chipset */
-			if (pdev->device == 0x1c26 || pdev->device == 0x1c2d) {
-				ehci_info(ehci, "broken D3 during system sleep on ASUS\n");
-				hcd->broken_pci_sleep = 1;
-				device_set_wakeup_capable(&pdev->dev, false);
-			}
-		}
 		break;
 	case PCI_VENDOR_ID_TDI:
 		if (pdev->device == PCI_DEVICE_ID_TDI_EHCI) {
diff --git a/include/linux/pci.h b/include/linux/pci.h
index d8c379dba6ad..fefb4e19bf6a 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -176,6 +176,8 @@ enum pci_dev_flags {
 	PCI_DEV_FLAGS_NO_D3 = (__force pci_dev_flags_t) 2,
 	/* Provide indication device is assigned by a Virtual Machine Manager */
 	PCI_DEV_FLAGS_ASSIGNED = (__force pci_dev_flags_t) 4,
+	/* Device causes system crash if in D3 during S3 sleep */
+	PCI_DEV_FLAGS_NO_D3_DURING_SLEEP = (__force pci_dev_flags_t) 8,
 };
 
 enum pci_irq_reroute_variant {
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 7f855d50cdf5..49b3ac29726a 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -126,8 +126,6 @@ struct usb_hcd {
 	unsigned		wireless:1;	/* Wireless USB HCD */
 	unsigned		authorized_default:1;
 	unsigned		has_tt:1;	/* Integrated TT in root hub */
-	unsigned		broken_pci_sleep:1;	/* Don't put the
-			controller in PCI-D3 for system sleep */
 
 	unsigned int		irq;		/* irq allocated */
 	void __iomem		*regs;		/* device memory/io */
-- 
cgit 


From 201e4aca5aa179e6c69a4dcd36a3562e56b8d670 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <anton.vorontsov@linaro.org>
Date: Sat, 26 May 2012 06:07:49 -0700
Subject: pstore/ram: Should update old dmesg buffer before reading

Without the update, we'll only see the new dmesg buffer after the
reboot, but previously we could see it right away. Making an oops
visible in pstore filesystem before reboot is a somewhat dubious
feature, but removing it wasn't an intentional change, so let's
restore it.

For this we have to make persistent_ram_save_old() safe for calling
multiple times, and also extern it.

Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/pstore/ram.c            |  2 ++
 fs/pstore/ram_core.c       | 15 ++++++++-------
 include/linux/pstore_ram.h |  1 +
 3 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 9123cce28c1e..16ff7332eae0 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -106,6 +106,8 @@ static ssize_t ramoops_pstore_read(u64 *id, enum pstore_type_id *type,
 	time->tv_sec = 0;
 	time->tv_nsec = 0;
 
+	/* Update old/shadowed buffer. */
+	persistent_ram_save_old(prz);
 	size = persistent_ram_old_size(prz);
 	*buf = kmalloc(size, GFP_KERNEL);
 	if (*buf == NULL)
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 31f8d184f3a0..235513c46aaf 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -250,23 +250,24 @@ static void notrace persistent_ram_update(struct persistent_ram_zone *prz,
 	persistent_ram_update_ecc(prz, start, count);
 }
 
-static void __init
-persistent_ram_save_old(struct persistent_ram_zone *prz)
+void persistent_ram_save_old(struct persistent_ram_zone *prz)
 {
 	struct persistent_ram_buffer *buffer = prz->buffer;
 	size_t size = buffer_size(prz);
 	size_t start = buffer_start(prz);
-	char *dest;
 
-	persistent_ram_ecc_old(prz);
+	if (!size)
+		return;
 
-	dest = kmalloc(size, GFP_KERNEL);
-	if (dest == NULL) {
+	if (!prz->old_log) {
+		persistent_ram_ecc_old(prz);
+		prz->old_log = kmalloc(size, GFP_KERNEL);
+	}
+	if (!prz->old_log) {
 		pr_err("persistent_ram: failed to allocate buffer\n");
 		return;
 	}
 
-	prz->old_log = dest;
 	prz->old_log_size = size;
 	memcpy(prz->old_log, &buffer->data[start], size - start);
 	memcpy(prz->old_log + size - start, &buffer->data[0], start);
diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h
index 7ed7fd4dba49..4491e8ff36e6 100644
--- a/include/linux/pstore_ram.h
+++ b/include/linux/pstore_ram.h
@@ -75,6 +75,7 @@ struct persistent_ram_zone *persistent_ram_init_ringbuffer(struct device *dev,
 int persistent_ram_write(struct persistent_ram_zone *prz, const void *s,
 	unsigned int count);
 
+void persistent_ram_save_old(struct persistent_ram_zone *prz);
 size_t persistent_ram_old_size(struct persistent_ram_zone *prz);
 void *persistent_ram_old(struct persistent_ram_zone *prz);
 void persistent_ram_free_old(struct persistent_ram_zone *prz);
-- 
cgit 


From fce397930475f7efc712a1345dc0dad269a10544 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <anton.vorontsov@linaro.org>
Date: Sat, 26 May 2012 06:07:51 -0700
Subject: pstore/ram_core: Factor persistent_ram_zap() out of post_init()

A handy function that we will use outside of ram_core soon. But
so far just factor it out and start using it in post_init().

Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/pstore/ram_core.c       | 11 ++++++++---
 include/linux/pstore_ram.h |  1 +
 2 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index f6650d12c0c1..c5fbdbbf81ac 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -320,6 +320,13 @@ void persistent_ram_free_old(struct persistent_ram_zone *prz)
 	prz->old_log_size = 0;
 }
 
+void persistent_ram_zap(struct persistent_ram_zone *prz)
+{
+	atomic_set(&prz->buffer->start, 0);
+	atomic_set(&prz->buffer->size, 0);
+	persistent_ram_update_header_ecc(prz);
+}
+
 static void *persistent_ram_vmap(phys_addr_t start, size_t size)
 {
 	struct page **pages;
@@ -414,8 +421,7 @@ static int __init persistent_ram_post_init(struct persistent_ram_zone *prz, bool
 	}
 
 	prz->buffer->sig = PERSISTENT_RAM_SIG;
-	atomic_set(&prz->buffer->start, 0);
-	atomic_set(&prz->buffer->size, 0);
+	persistent_ram_zap(prz);
 
 	return 0;
 }
@@ -450,7 +456,6 @@ struct persistent_ram_zone * __init persistent_ram_new(phys_addr_t start,
 		goto err;
 
 	persistent_ram_post_init(prz, ecc);
-	persistent_ram_update_header_ecc(prz);
 
 	return prz;
 err:
diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h
index 4491e8ff36e6..3b823d49a85a 100644
--- a/include/linux/pstore_ram.h
+++ b/include/linux/pstore_ram.h
@@ -69,6 +69,7 @@ struct persistent_ram_zone * __init persistent_ram_new(phys_addr_t start,
 						       size_t size,
 						       bool ecc);
 void persistent_ram_free(struct persistent_ram_zone *prz);
+void persistent_ram_zap(struct persistent_ram_zone *prz);
 struct persistent_ram_zone *persistent_ram_init_ringbuffer(struct device *dev,
 		bool ecc);
 
-- 
cgit 


From e2ae715d66bf4becfb85eb84b7150e23cf27df30 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay@vrfy.org>
Date: Fri, 15 Jun 2012 14:07:51 +0200
Subject: kmsg - kmsg_dump() use iterator to receive log buffer content

Provide an iterator to receive the log buffer content, and convert all
kmsg_dump() users to it.

The structured data in the kmsg buffer now contains binary data, which
should no longer be copied verbatim to the kmsg_dump() users.

The iterator should provide reliable access to the buffer data, and also
supports proper log line-aware chunking of data while iterating.

Signed-off-by: Kay Sievers <kay@vrfy.org>
Tested-by: Tony Luck <tony.luck@intel.com>
Reported-by: Anton Vorontsov <anton.vorontsov@linaro.org>
Tested-by: Anton Vorontsov <anton.vorontsov@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/powerpc/platforms/pseries/nvram.c     |  61 +-------
 arch/x86/platform/mrst/early_printk_mrst.c |  13 +-
 drivers/mtd/mtdoops.c                      |  22 +--
 fs/pstore/platform.c                       |  34 ++---
 include/linux/kmsg_dump.h                  |  45 +++++-
 kernel/printk.c                            | 220 +++++++++++++++++++++++++----
 6 files changed, 258 insertions(+), 137 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c
index 36f957f31842..8733a86ad52e 100644
--- a/arch/powerpc/platforms/pseries/nvram.c
+++ b/arch/powerpc/platforms/pseries/nvram.c
@@ -68,9 +68,7 @@ static const char *pseries_nvram_os_partitions[] = {
 };
 
 static void oops_to_nvram(struct kmsg_dumper *dumper,
-		enum kmsg_dump_reason reason,
-		const char *old_msgs, unsigned long old_len,
-		const char *new_msgs, unsigned long new_len);
+			  enum kmsg_dump_reason reason);
 
 static struct kmsg_dumper nvram_kmsg_dumper = {
 	.dump = oops_to_nvram
@@ -503,28 +501,6 @@ int __init pSeries_nvram_init(void)
 	return 0;
 }
 
-/*
- * Try to capture the last capture_len bytes of the printk buffer.  Return
- * the amount actually captured.
- */
-static size_t capture_last_msgs(const char *old_msgs, size_t old_len,
-				const char *new_msgs, size_t new_len,
-				char *captured, size_t capture_len)
-{
-	if (new_len >= capture_len) {
-		memcpy(captured, new_msgs + (new_len - capture_len),
-								capture_len);
-		return capture_len;
-	} else {
-		/* Grab the end of old_msgs. */
-		size_t old_tail_len = min(old_len, capture_len - new_len);
-		memcpy(captured, old_msgs + (old_len - old_tail_len),
-								old_tail_len);
-		memcpy(captured + old_tail_len, new_msgs, new_len);
-		return old_tail_len + new_len;
-	}
-}
-
 /*
  * Are we using the ibm,rtas-log for oops/panic reports?  And if so,
  * would logging this oops/panic overwrite an RTAS event that rtas_errd
@@ -541,27 +517,6 @@ static int clobbering_unread_rtas_event(void)
 						NVRAM_RTAS_READ_TIMEOUT);
 }
 
-/* Squeeze out each line's <n> severity prefix. */
-static size_t elide_severities(char *buf, size_t len)
-{
-	char *in, *out, *buf_end = buf + len;
-	/* Assume a <n> at the very beginning marks the start of a line. */
-	int newline = 1;
-
-	in = out = buf;
-	while (in < buf_end) {
-		if (newline && in+3 <= buf_end &&
-				*in == '<' && isdigit(in[1]) && in[2] == '>') {
-			in += 3;
-			newline = 0;
-		} else {
-			newline = (*in == '\n');
-			*out++ = *in++;
-		}
-	}
-	return out - buf;
-}
-
 /* Derived from logfs_compress() */
 static int nvram_compress(const void *in, void *out, size_t inlen,
 							size_t outlen)
@@ -619,9 +574,7 @@ static int zip_oops(size_t text_len)
  * partition.  If that's too much, go back and capture uncompressed text.
  */
 static void oops_to_nvram(struct kmsg_dumper *dumper,
-		enum kmsg_dump_reason reason,
-		const char *old_msgs, unsigned long old_len,
-		const char *new_msgs, unsigned long new_len)
+			  enum kmsg_dump_reason reason)
 {
 	static unsigned int oops_count = 0;
 	static bool panicking = false;
@@ -660,14 +613,14 @@ static void oops_to_nvram(struct kmsg_dumper *dumper,
 		return;
 
 	if (big_oops_buf) {
-		text_len = capture_last_msgs(old_msgs, old_len,
-			new_msgs, new_len, big_oops_buf, big_oops_buf_sz);
-		text_len = elide_severities(big_oops_buf, text_len);
+		kmsg_dump_get_buffer(dumper, false,
+				     big_oops_buf, big_oops_buf_sz, &text_len);
 		rc = zip_oops(text_len);
 	}
 	if (rc != 0) {
-		text_len = capture_last_msgs(old_msgs, old_len,
-				new_msgs, new_len, oops_data, oops_data_sz);
+		kmsg_dump_rewind(dumper);
+		kmsg_dump_get_buffer(dumper, true,
+				     oops_data, oops_data_sz, &text_len);
 		err_type = ERR_TYPE_KERNEL_PANIC;
 		*oops_len = (u16) text_len;
 	}
diff --git a/arch/x86/platform/mrst/early_printk_mrst.c b/arch/x86/platform/mrst/early_printk_mrst.c
index 3c6e328483c7..028454f0c3a5 100644
--- a/arch/x86/platform/mrst/early_printk_mrst.c
+++ b/arch/x86/platform/mrst/early_printk_mrst.c
@@ -110,19 +110,16 @@ static struct kmsg_dumper dw_dumper;
 static int dumper_registered;
 
 static void dw_kmsg_dump(struct kmsg_dumper *dumper,
-			enum kmsg_dump_reason reason,
-			const char *s1, unsigned long l1,
-			const char *s2, unsigned long l2)
+			 enum kmsg_dump_reason reason)
 {
-	int i;
+	static char line[1024];
+	size_t len;
 
 	/* When run to this, we'd better re-init the HW */
 	mrst_early_console_init();
 
-	for (i = 0; i < l1; i++)
-		early_mrst_console.write(&early_mrst_console, s1 + i, 1);
-	for (i = 0; i < l2; i++)
-		early_mrst_console.write(&early_mrst_console, s2 + i, 1);
+	while (kmsg_dump_get_line(dumper, true, line, sizeof(line), &len))
+		early_mrst_console.write(&early_mrst_console, line, len);
 }
 
 /* Set the ratio rate to 115200, 8n1, IRQ disabled */
diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c
index ae36d7e1e913..551e316e4454 100644
--- a/drivers/mtd/mtdoops.c
+++ b/drivers/mtd/mtdoops.c
@@ -304,32 +304,17 @@ static void find_next_position(struct mtdoops_context *cxt)
 }
 
 static void mtdoops_do_dump(struct kmsg_dumper *dumper,
-		enum kmsg_dump_reason reason, const char *s1, unsigned long l1,
-		const char *s2, unsigned long l2)
+			    enum kmsg_dump_reason reason)
 {
 	struct mtdoops_context *cxt = container_of(dumper,
 			struct mtdoops_context, dump);
-	unsigned long s1_start, s2_start;
-	unsigned long l1_cpy, l2_cpy;
-	char *dst;
-
-	if (reason != KMSG_DUMP_OOPS &&
-	    reason != KMSG_DUMP_PANIC)
-		return;
 
 	/* Only dump oopses if dump_oops is set */
 	if (reason == KMSG_DUMP_OOPS && !dump_oops)
 		return;
 
-	dst = cxt->oops_buf + MTDOOPS_HEADER_SIZE; /* Skip the header */
-	l2_cpy = min(l2, record_size - MTDOOPS_HEADER_SIZE);
-	l1_cpy = min(l1, record_size - MTDOOPS_HEADER_SIZE - l2_cpy);
-
-	s2_start = l2 - l2_cpy;
-	s1_start = l1 - l1_cpy;
-
-	memcpy(dst, s1 + s1_start, l1_cpy);
-	memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy);
+	kmsg_dump_get_buffer(dumper, true, cxt->oops_buf + MTDOOPS_HEADER_SIZE,
+			     record_size - MTDOOPS_HEADER_SIZE, NULL);
 
 	/* Panics must be written immediately */
 	if (reason != KMSG_DUMP_OOPS)
@@ -375,6 +360,7 @@ static void mtdoops_notify_add(struct mtd_info *mtd)
 		return;
 	}
 
+	cxt->dump.max_reason = KMSG_DUMP_OOPS;
 	cxt->dump.dump = mtdoops_do_dump;
 	err = kmsg_dump_register(&cxt->dump);
 	if (err) {
diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c
index 82c585f715e3..03ce7a9b81cc 100644
--- a/fs/pstore/platform.c
+++ b/fs/pstore/platform.c
@@ -94,20 +94,15 @@ static const char *get_reason_str(enum kmsg_dump_reason reason)
  * as we can from the end of the buffer.
  */
 static void pstore_dump(struct kmsg_dumper *dumper,
-	    enum kmsg_dump_reason reason,
-	    const char *s1, unsigned long l1,
-	    const char *s2, unsigned long l2)
+			enum kmsg_dump_reason reason)
 {
-	unsigned long	s1_start, s2_start;
-	unsigned long	l1_cpy, l2_cpy;
-	unsigned long	size, total = 0;
-	char		*dst;
+	unsigned long	total = 0;
 	const char	*why;
 	u64		id;
-	int		hsize, ret;
 	unsigned int	part = 1;
 	unsigned long	flags = 0;
 	int		is_locked = 0;
+	int		ret;
 
 	why = get_reason_str(reason);
 
@@ -119,30 +114,25 @@ static void pstore_dump(struct kmsg_dumper *dumper,
 		spin_lock_irqsave(&psinfo->buf_lock, flags);
 	oopscount++;
 	while (total < kmsg_bytes) {
+		char *dst;
+		unsigned long size;
+		int hsize;
+		size_t len;
+
 		dst = psinfo->buf;
 		hsize = sprintf(dst, "%s#%d Part%d\n", why, oopscount, part);
 		size = psinfo->bufsize - hsize;
 		dst += hsize;
 
-		l2_cpy = min(l2, size);
-		l1_cpy = min(l1, size - l2_cpy);
-
-		if (l1_cpy + l2_cpy == 0)
+		if (!kmsg_dump_get_buffer(dumper, true, dst, size, &len))
 			break;
 
-		s2_start = l2 - l2_cpy;
-		s1_start = l1 - l1_cpy;
-
-		memcpy(dst, s1 + s1_start, l1_cpy);
-		memcpy(dst + l1_cpy, s2 + s2_start, l2_cpy);
-
 		ret = psinfo->write(PSTORE_TYPE_DMESG, reason, &id, part,
-				   hsize + l1_cpy + l2_cpy, psinfo);
+				    hsize + len, psinfo);
 		if (ret == 0 && reason == KMSG_DUMP_OOPS && pstore_is_mounted())
 			pstore_new_entry = 1;
-		l1 -= l1_cpy;
-		l2 -= l2_cpy;
-		total += l1_cpy + l2_cpy;
+
+		total += hsize + len;
 		part++;
 	}
 	if (in_nmi()) {
diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h
index 35f7237ec972..af4eb5a39d9a 100644
--- a/include/linux/kmsg_dump.h
+++ b/include/linux/kmsg_dump.h
@@ -21,6 +21,7 @@
  * is passed to the kernel.
  */
 enum kmsg_dump_reason {
+	KMSG_DUMP_UNDEF,
 	KMSG_DUMP_PANIC,
 	KMSG_DUMP_OOPS,
 	KMSG_DUMP_EMERG,
@@ -31,23 +32,37 @@ enum kmsg_dump_reason {
 
 /**
  * struct kmsg_dumper - kernel crash message dumper structure
- * @dump:	The callback which gets called on crashes. The buffer is passed
- * 		as two sections, where s1 (length l1) contains the older
- * 		messages and s2 (length l2) contains the newer.
  * @list:	Entry in the dumper list (private)
+ * @dump:	Call into dumping code which will retrieve the data with
+ * 		through the record iterator
+ * @max_reason:	filter for highest reason number that should be dumped
  * @registered:	Flag that specifies if this is already registered
  */
 struct kmsg_dumper {
-	void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason,
-			const char *s1, unsigned long l1,
-			const char *s2, unsigned long l2);
 	struct list_head list;
-	int registered;
+	void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason);
+	enum kmsg_dump_reason max_reason;
+	bool active;
+	bool registered;
+
+	/* private state of the kmsg iterator */
+	u32 cur_idx;
+	u32 next_idx;
+	u64 cur_seq;
+	u64 next_seq;
 };
 
 #ifdef CONFIG_PRINTK
 void kmsg_dump(enum kmsg_dump_reason reason);
 
+bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
+			char *line, size_t size, size_t *len);
+
+bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
+			  char *buf, size_t size, size_t *len);
+
+void kmsg_dump_rewind(struct kmsg_dumper *dumper);
+
 int kmsg_dump_register(struct kmsg_dumper *dumper);
 
 int kmsg_dump_unregister(struct kmsg_dumper *dumper);
@@ -56,6 +71,22 @@ static inline void kmsg_dump(enum kmsg_dump_reason reason)
 {
 }
 
+bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
+			  const char *line, size_t size, size_t *len)
+{
+	return false;
+}
+
+bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
+			    char *buf, size_t size, size_t *len)
+{
+	return false;
+}
+
+void kmsg_dump_rewind(struct kmsg_dumper *dumper)
+{
+}
+
 static inline int kmsg_dump_register(struct kmsg_dumper *dumper)
 {
 	return -EINVAL;
diff --git a/kernel/printk.c b/kernel/printk.c
index f205c25c37e2..ceb4a2f775a1 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -909,7 +909,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
 		/*
 		 * Find first record that fits, including all following records,
 		 * into the user-provided buffer for this dump.
-		*/
+		 */
 		seq = clear_seq;
 		idx = clear_idx;
 		while (seq < log_next_seq) {
@@ -919,6 +919,8 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
 			idx = log_next(idx);
 			seq++;
 		}
+
+		/* move first record forward until length fits into the buffer */
 		seq = clear_seq;
 		idx = clear_idx;
 		while (len > size && seq < log_next_seq) {
@@ -929,7 +931,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
 			seq++;
 		}
 
-		/* last message in this dump */
+		/* last message fitting into this dump */
 		next_seq = log_next_seq;
 
 		len = 0;
@@ -2300,48 +2302,210 @@ module_param_named(always_kmsg_dump, always_kmsg_dump, bool, S_IRUGO | S_IWUSR);
  * kmsg_dump - dump kernel log to kernel message dumpers.
  * @reason: the reason (oops, panic etc) for dumping
  *
- * Iterate through each of the dump devices and call the oops/panic
- * callbacks with the log buffer.
+ * Call each of the registered dumper's dump() callback, which can
+ * retrieve the kmsg records with kmsg_dump_get_line() or
+ * kmsg_dump_get_buffer().
  */
 void kmsg_dump(enum kmsg_dump_reason reason)
 {
-	u64 idx;
 	struct kmsg_dumper *dumper;
-	const char *s1, *s2;
-	unsigned long l1, l2;
 	unsigned long flags;
 
 	if ((reason > KMSG_DUMP_OOPS) && !always_kmsg_dump)
 		return;
 
-	/* Theoretically, the log could move on after we do this, but
-	   there's not a lot we can do about that. The new messages
-	   will overwrite the start of what we dump. */
+	rcu_read_lock();
+	list_for_each_entry_rcu(dumper, &dump_list, list) {
+		if (dumper->max_reason && reason > dumper->max_reason)
+			continue;
+
+		/* initialize iterator with data about the stored records */
+		dumper->active = true;
+
+		raw_spin_lock_irqsave(&logbuf_lock, flags);
+		dumper->cur_seq = clear_seq;
+		dumper->cur_idx = clear_idx;
+		dumper->next_seq = log_next_seq;
+		dumper->next_idx = log_next_idx;
+		raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+
+		/* invoke dumper which will iterate over records */
+		dumper->dump(dumper, reason);
+
+		/* reset iterator */
+		dumper->active = false;
+	}
+	rcu_read_unlock();
+}
+
+/**
+ * kmsg_dump_get_line - retrieve one kmsg log line
+ * @dumper: registered kmsg dumper
+ * @syslog: include the "<4>" prefixes
+ * @line: buffer to copy the line to
+ * @size: maximum size of the buffer
+ * @len: length of line placed into buffer
+ *
+ * Start at the beginning of the kmsg buffer, with the oldest kmsg
+ * record, and copy one record into the provided buffer.
+ *
+ * Consecutive calls will return the next available record moving
+ * towards the end of the buffer with the youngest messages.
+ *
+ * A return value of FALSE indicates that there are no more records to
+ * read.
+ */
+bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
+			char *line, size_t size, size_t *len)
+{
+	unsigned long flags;
+	struct log *msg;
+	size_t l = 0;
+	bool ret = false;
+
+	if (!dumper->active)
+		goto out;
 
 	raw_spin_lock_irqsave(&logbuf_lock, flags);
-	if (syslog_seq < log_first_seq)
-		idx = syslog_idx;
-	else
-		idx = log_first_idx;
+	if (dumper->cur_seq < log_first_seq) {
+		/* messages are gone, move to first available one */
+		dumper->cur_seq = log_first_seq;
+		dumper->cur_idx = log_first_idx;
+	}
 
-	if (idx > log_next_idx) {
-		s1 = log_buf;
-		l1 = log_next_idx;
+	/* last entry */
+	if (dumper->cur_seq >= log_next_seq) {
+		raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+		goto out;
+	}
 
-		s2 = log_buf + idx;
-		l2 = log_buf_len - idx;
-	} else {
-		s1 = "";
-		l1 = 0;
+	msg = log_from_idx(dumper->cur_idx);
+	l = msg_print_text(msg, syslog,
+			      line, size);
+
+	dumper->cur_idx = log_next(dumper->cur_idx);
+	dumper->cur_seq++;
+	ret = true;
+	raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+out:
+	if (len)
+		*len = l;
+	return ret;
+}
+EXPORT_SYMBOL_GPL(kmsg_dump_get_line);
+
+/**
+ * kmsg_dump_get_buffer - copy kmsg log lines
+ * @dumper: registered kmsg dumper
+ * @syslog: include the "<4>" prefixes
+ * @line: buffer to copy the line to
+ * @size: maximum size of the buffer
+ * @len: length of line placed into buffer
+ *
+ * Start at the end of the kmsg buffer and fill the provided buffer
+ * with as many of the the *youngest* kmsg records that fit into it.
+ * If the buffer is large enough, all available kmsg records will be
+ * copied with a single call.
+ *
+ * Consecutive calls will fill the buffer with the next block of
+ * available older records, not including the earlier retrieved ones.
+ *
+ * A return value of FALSE indicates that there are no more records to
+ * read.
+ */
+bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
+			  char *buf, size_t size, size_t *len)
+{
+	unsigned long flags;
+	u64 seq;
+	u32 idx;
+	u64 next_seq;
+	u32 next_idx;
+	size_t l = 0;
+	bool ret = false;
+
+	if (!dumper->active)
+		goto out;
+
+	raw_spin_lock_irqsave(&logbuf_lock, flags);
+	if (dumper->cur_seq < log_first_seq) {
+		/* messages are gone, move to first available one */
+		dumper->cur_seq = log_first_seq;
+		dumper->cur_idx = log_first_idx;
+	}
+
+	/* last entry */
+	if (dumper->cur_seq >= dumper->next_seq) {
+		raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+		goto out;
+	}
+
+	/* calculate length of entire buffer */
+	seq = dumper->cur_seq;
+	idx = dumper->cur_idx;
+	while (seq < dumper->next_seq) {
+		struct log *msg = log_from_idx(idx);
+
+		l += msg_print_text(msg, true, NULL, 0);
+		idx = log_next(idx);
+		seq++;
+	}
+
+	/* move first record forward until length fits into the buffer */
+	seq = dumper->cur_seq;
+	idx = dumper->cur_idx;
+	while (l > size && seq < dumper->next_seq) {
+		struct log *msg = log_from_idx(idx);
 
-		s2 = log_buf + idx;
-		l2 = log_next_idx - idx;
+		l -= msg_print_text(msg, true, NULL, 0);
+		idx = log_next(idx);
+		seq++;
 	}
+
+	/* last message in next interation */
+	next_seq = seq;
+	next_idx = idx;
+
+	l = 0;
+	while (seq < dumper->next_seq) {
+		struct log *msg = log_from_idx(idx);
+
+		l += msg_print_text(msg, syslog,
+				    buf + l, size - l);
+
+		idx = log_next(idx);
+		seq++;
+	}
+
+	dumper->next_seq = next_seq;
+	dumper->next_idx = next_idx;
+	ret = true;
 	raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+out:
+	if (len)
+		*len = l;
+	return ret;
+}
+EXPORT_SYMBOL_GPL(kmsg_dump_get_buffer);
 
-	rcu_read_lock();
-	list_for_each_entry_rcu(dumper, &dump_list, list)
-		dumper->dump(dumper, reason, s1, l1, s2, l2);
-	rcu_read_unlock();
+/**
+ * kmsg_dump_rewind - reset the interator
+ * @dumper: registered kmsg dumper
+ *
+ * Reset the dumper's iterator so that kmsg_dump_get_line() and
+ * kmsg_dump_get_buffer() can be called again and used multiple
+ * times within the same dumper.dump() callback.
+ */
+void kmsg_dump_rewind(struct kmsg_dumper *dumper)
+{
+	unsigned long flags;
+
+	raw_spin_lock_irqsave(&logbuf_lock, flags);
+	dumper->cur_seq = clear_seq;
+	dumper->cur_idx = clear_idx;
+	dumper->next_seq = log_next_seq;
+	dumper->next_idx = log_next_idx;
+	raw_spin_unlock_irqrestore(&logbuf_lock, flags);
 }
+EXPORT_SYMBOL_GPL(kmsg_dump_rewind);
 #endif
-- 
cgit 


From 9b15b817f3d62409290fd56fe3cbb076a931bb0a Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hughd@google.com>
Date: Fri, 15 Jun 2012 17:55:50 -0700
Subject: swap: fix shmem swapping when more than 8 areas

Minchan Kim reports that when a system has many swap areas, and tmpfs
swaps out to the ninth or more, shmem_getpage_gfp()'s attempts to read
back the page cannot locate it, and the read fails with -ENOMEM.

Whoops.  Yes, I blindly followed read_swap_header()'s pte_to_swp_entry(
swp_entry_to_pte()) technique for determining maximum usable swap
offset, without stopping to realize that that actually depends upon the
pte swap encoding shifting swap offset to the higher bits and truncating
it there.  Whereas our radix_tree swap encoding leaves offset in the
lower bits: it's swap "type" (that is, index of swap area) that was
truncated.

Fix it by reducing the SWP_TYPE_SHIFT() in swapops.h, and removing the
broken radix_to_swp_entry(swp_to_radix_entry()) from read_swap_header().

This does not reduce the usable size of a swap area any further, it
leaves it as claimed when making the original commit: no change from 3.0
on x86_64, nor on i386 without PAE; but 3.0's 512GB is reduced to 128GB
per swapfile on i386 with PAE.  It's not a change I would have risked
five years ago, but with x86_64 supported for ten years, I believe it's
appropriate now.

Hmm, and what if some architecture implements its swap pte with offset
encoded below type? That would equally break the maximum usable swap
offset check.  Happily, they all follow the same tradition of encoding
offset above type, but I'll prepare a check on that for next.

Reported-and-Reviewed-and-Tested-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: stable@vger.kernel.org [3.1, 3.2, 3.3, 3.4]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swapops.h |  8 +++++---
 mm/swapfile.c           | 12 ++++--------
 2 files changed, 9 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 792d16d9cbc7..47ead515c811 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -9,13 +9,15 @@
  * get good packing density in that tree, so the index should be dense in
  * the low-order bits.
  *
- * We arrange the `type' and `offset' fields so that `type' is at the five
+ * We arrange the `type' and `offset' fields so that `type' is at the seven
  * high-order bits of the swp_entry_t and `offset' is right-aligned in the
- * remaining bits.
+ * remaining bits.  Although `type' itself needs only five bits, we allow for
+ * shmem/tmpfs to shift it all up a further two bits: see swp_to_radix_entry().
  *
  * swp_entry_t's are *never* stored anywhere in their arch-dependent format.
  */
-#define SWP_TYPE_SHIFT(e)	(sizeof(e.val) * 8 - MAX_SWAPFILES_SHIFT)
+#define SWP_TYPE_SHIFT(e)	((sizeof(e.val) * 8) - \
+			(MAX_SWAPFILES_SHIFT + RADIX_TREE_EXCEPTIONAL_SHIFT))
 #define SWP_OFFSET_MASK(e)	((1UL << SWP_TYPE_SHIFT(e)) - 1)
 
 /*
diff --git a/mm/swapfile.c b/mm/swapfile.c
index de5bc51c4a66..71373d03fcee 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1916,24 +1916,20 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
 
 	/*
 	 * Find out how many pages are allowed for a single swap
-	 * device. There are three limiting factors: 1) the number
+	 * device. There are two limiting factors: 1) the number
 	 * of bits for the swap offset in the swp_entry_t type, and
 	 * 2) the number of bits in the swap pte as defined by the
-	 * the different architectures, and 3) the number of free bits
-	 * in an exceptional radix_tree entry. In order to find the
+	 * different architectures. In order to find the
 	 * largest possible bit mask, a swap entry with swap type 0
 	 * and swap offset ~0UL is created, encoded to a swap pte,
 	 * decoded to a swp_entry_t again, and finally the swap
 	 * offset is extracted. This will mask all the bits from
 	 * the initial ~0UL mask that can't be encoded in either
 	 * the swp_entry_t or the architecture definition of a
-	 * swap pte.  Then the same is done for a radix_tree entry.
+	 * swap pte.
 	 */
 	maxpages = swp_offset(pte_to_swp_entry(
-			swp_entry_to_pte(swp_entry(0, ~0UL))));
-	maxpages = swp_offset(radix_to_swp_entry(
-			swp_to_radix_entry(swp_entry(0, maxpages)))) + 1;
-
+			swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
 	if (maxpages > swap_header->info.last_page) {
 		maxpages = swap_header->info.last_page + 1;
 		/* p->max is an unsigned int: don't overflow it */
-- 
cgit 


From f8fee8f5acb5c3f82e02f2ae139a6f1e7b4eb583 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Fri, 15 Jun 2012 12:46:17 -0700
Subject: vga_switcheroo.h: fix pci_dev warning

Fix warnings on some architectures/configs (not on x86):

include/linux/vga_switcheroo.h:28:30: warning: 'struct pci_dev' declared inside parameter list [enabled by default]
include/linux/vga_switcheroo.h:28:30: warning: its scope is only this definition or declaration, which is probably not what you want [enabled by default]

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Cc:	Takashi Iwai <tiwai@suse.de>
Reported-by:	Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/linux/vga_switcheroo.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
index 60da41fe9dc2..d844b7790ea6 100644
--- a/include/linux/vga_switcheroo.h
+++ b/include/linux/vga_switcheroo.h
@@ -9,6 +9,8 @@
 
 #include <linux/fb.h>
 
+struct pci_dev;
+
 enum vga_switcheroo_state {
 	VGA_SWITCHEROO_OFF,
 	VGA_SWITCHEROO_ON,
-- 
cgit 


From 2a4c8994eeef50796015f8a2005e4a75c1929166 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 14 Jun 2012 13:08:38 -0400
Subject: NFSv4.1: Fix umount when filelayout DS is also the MDS

Currently there is a 'chicken and egg' issue when the DS is also the mounted
MDS. The nfs_match_client() reference from nfs4_set_ds_client bumps the
cl_count, the nfs_client is not freed at umount, and nfs4_deviceid_purge_client
is not called to dereference the MDS usage of a deviceid which holds a
reference to the DS nfs_client.  The result is the umount program returns,
but the nfs_client is not freed, and the cl_session hearbeat continues.

The MDS (and all other nfs mounts) lose their last nfs_client reference in
nfs_free_server when the last nfs_server (fsid) is umounted.
The file layout DS lose their last nfs_client reference in destroy_ds
when the last deviceid referencing the data server is put and destroy_ds is
called. This is triggered by a call to nfs4_deviceid_purge_client which
removes references to a pNFS deviceid used by an MDS mount.

The fix is to track how many pnfs enabled filesystems are mounted from
this server, and then to purge the device id cache once that count reaches
zero.

Reported-by: Jorge Mora <Jorge.Mora@netapp.com>
Reported-by: Andy Adamson <andros@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/client.c           | 1 -
 fs/nfs/pnfs.c             | 5 +++++
 include/linux/nfs_fs_sb.h | 1 +
 3 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 17ba6b995659..f005b5bebdc7 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -207,7 +207,6 @@ error_0:
 static void nfs4_shutdown_session(struct nfs_client *clp)
 {
 	if (nfs4_has_session(clp)) {
-		nfs4_deviceid_purge_client(clp);
 		nfs4_destroy_session(clp->cl_session);
 		nfs4_destroy_clientid(clp);
 	}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index b8323aa7b543..bdf7e52943c8 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -80,6 +80,9 @@ unset_pnfs_layoutdriver(struct nfs_server *nfss)
 	if (nfss->pnfs_curr_ld) {
 		if (nfss->pnfs_curr_ld->clear_layoutdriver)
 			nfss->pnfs_curr_ld->clear_layoutdriver(nfss);
+		/* Decrement the MDS count. Purge the deviceid cache if zero */
+		if (atomic_dec_and_test(&nfss->nfs_client->cl_mds_count))
+			nfs4_deviceid_purge_client(nfss->nfs_client);
 		module_put(nfss->pnfs_curr_ld->owner);
 	}
 	nfss->pnfs_curr_ld = NULL;
@@ -127,6 +130,8 @@ set_pnfs_layoutdriver(struct nfs_server *server, const struct nfs_fh *mntfh,
 		module_put(ld_type->owner);
 		goto out_no_driver;
 	}
+	/* Bump the MDS count */
+	atomic_inc(&server->nfs_client->cl_mds_count);
 
 	dprintk("%s: pNFS module for %u set\n", __func__, id);
 	return;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index fbb78fb09bd2..f58325a1d8fb 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -25,6 +25,7 @@ struct nfs41_impl_id;
  */
 struct nfs_client {
 	atomic_t		cl_count;
+	atomic_t		cl_mds_count;
 	int			cl_cons_state;	/* current construction state (-ve: init error) */
 #define NFS_CS_READY		0		/* ready to be used */
 #define NFS_CS_INITING		1		/* busy initialising */
-- 
cgit 


From 93b3cca1ccd30b1ad290951a3fc7c10c73db7313 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <rostedt@goodmis.org>
Date: Thu, 14 Jun 2012 10:54:28 -0400
Subject: ftrace: Make all inline tags also include notrace

Commit 5963e317b1e9d2a ("ftrace/x86: Do not change stacks in DEBUG when
calling lockdep") prevented lockdep calls from the int3 breakpoint handler
from reseting the stack if a function that was called was in the process
of being converted for tracing and had a breakpoint on it. The idea is,
before calling the lockdep code, do a load_idt() to the special IDT that
kept the breakpoint stack from reseting. This worked well as a quick fix
for this kernel release, until a certain config caused a lockup in the
function tracer start up tests.

Investigating it, I found that the load_idt that was used to prevent
the int3 from changing stacks was itself being traced!

Even though the config had CONFIG_OPTIMIZE_INLINING disabled, and
all 'inline' tags were set to always inline, there were still cases that
it did not inline! This was caused by CONFIG_PARAVIRT_GUEST, where it
would add a pointer to the native_load_idt() which made that function
to be traced.

Commit 45959ee7aa645815a ("ftrace: Do not function trace inlined functions")
only touched the 'inline' tags when CONFIG_OPMITIZE_INLINING was enabled.
PARAVIRT_GUEST shows that this was not enough and we need to also
mark always_inline with notrace as well.

Reported-by: Fengguang Wu <wfg@linux.intel.com>
Tested-by: Fengguang Wu <wfg@linux.intel.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/compiler-gcc.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h
index e5834aa24b9e..6a6d7aefe12d 100644
--- a/include/linux/compiler-gcc.h
+++ b/include/linux/compiler-gcc.h
@@ -47,9 +47,9 @@
  */
 #if !defined(CONFIG_ARCH_SUPPORTS_OPTIMIZED_INLINING) || \
     !defined(CONFIG_OPTIMIZE_INLINING) || (__GNUC__ < 4)
-# define inline		inline		__attribute__((always_inline))
-# define __inline__	__inline__	__attribute__((always_inline))
-# define __inline	__inline	__attribute__((always_inline))
+# define inline		inline		__attribute__((always_inline)) notrace
+# define __inline__	__inline__	__attribute__((always_inline)) notrace
+# define __inline	__inline	__attribute__((always_inline)) notrace
 #else
 /* A lot of inline functions can cause havoc with function tracing */
 # define inline		inline		notrace
-- 
cgit 


From 246f6f2ff2c5cf46ded6d06f11f63e38bad880d1 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay@vrfy.org>
Date: Tue, 19 Jun 2012 00:32:57 +0200
Subject: kmsg - kmsg_dump() fix CONFIG_PRINTK=n compilation

Signed-off-by: Kay Sievers <kay@vrfy.org>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Reported-by: Randy Dunlap <rdunlap@xenotime.net>
Reported-by: Fengguang Wu <wfg@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/kmsg_dump.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h
index af4eb5a39d9a..d6bd50110ec2 100644
--- a/include/linux/kmsg_dump.h
+++ b/include/linux/kmsg_dump.h
@@ -71,19 +71,19 @@ static inline void kmsg_dump(enum kmsg_dump_reason reason)
 {
 }
 
-bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
-			  const char *line, size_t size, size_t *len)
+static inline bool kmsg_dump_get_line(struct kmsg_dumper *dumper, bool syslog,
+				const char *line, size_t size, size_t *len)
 {
 	return false;
 }
 
-bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
-			    char *buf, size_t size, size_t *len)
+static inline bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,
+					char *buf, size_t size, size_t *len)
 {
 	return false;
 }
 
-void kmsg_dump_rewind(struct kmsg_dumper *dumper)
+static inline void kmsg_dump_rewind(struct kmsg_dumper *dumper)
 {
 }
 
-- 
cgit 


From abca7c4965845924f65d40e0aa1092bdd895e314 Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Wed, 20 Jun 2012 12:52:56 -0700
Subject: mm: fix slab->page _count corruption when using slub

On arches that do not support this_cpu_cmpxchg_double() slab_lock is used
to do atomic cmpxchg() on double word which contains page->_count.  The
page count can be changed from get_page() or put_page() without taking
slab_lock.  That corrupts page counter.

Fix it by moving page->_count out of cmpxchg_double data.  So that slub
does no change it while updating slub meta-data in struct page.

[akpm@linux-foundation.org: use standard comment layout, tweak comment text]
Reported-by: Amey Bhide <abhide@nicira.com>
Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index dad95bdd06d7..704a626d94a0 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -57,8 +57,18 @@ struct page {
 		};
 
 		union {
+#if defined(CONFIG_HAVE_CMPXCHG_DOUBLE) && \
+	defined(CONFIG_HAVE_ALIGNED_STRUCT_PAGE)
 			/* Used for cmpxchg_double in slub */
 			unsigned long counters;
+#else
+			/*
+			 * Keep _count separate from slub cmpxchg_double data.
+			 * As the rest of the double word is protected by
+			 * slab_lock but _count is not.
+			 */
+			unsigned counters;
+#endif
 
 			struct {
 
-- 
cgit 


From 10d8935f46e5028847b179757ecbf9238b13d129 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.linux@gmail.com>
Date: Wed, 20 Jun 2012 12:53:02 -0700
Subject: Viresh has moved

viresh.kumar@st.com email-id doesn't exist anymore as I have left the
company.  Replace ST's id with viresh.linux@gmail.com.

It also updates .mailmap file to fix address for 'git shortlog'

Signed-off-by: Viresh Kumar <viresh.linux@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 .mailmap                                           |  1 +
 Documentation/arm/SPEAr/overview.txt               |  2 +-
 MAINTAINERS                                        | 18 +++++++++---------
 arch/arm/boot/dts/spear1310-evb.dts                |  2 +-
 arch/arm/boot/dts/spear1310.dtsi                   |  2 +-
 arch/arm/boot/dts/spear1340-evb.dts                |  2 +-
 arch/arm/boot/dts/spear1340.dtsi                   |  2 +-
 arch/arm/boot/dts/spear13xx.dtsi                   |  2 +-
 arch/arm/boot/dts/spear300-evb.dts                 |  2 +-
 arch/arm/boot/dts/spear300.dtsi                    |  2 +-
 arch/arm/boot/dts/spear310-evb.dts                 |  2 +-
 arch/arm/boot/dts/spear310.dtsi                    |  2 +-
 arch/arm/boot/dts/spear320-evb.dts                 |  2 +-
 arch/arm/boot/dts/spear320.dtsi                    |  2 +-
 arch/arm/boot/dts/spear3xx.dtsi                    |  2 +-
 arch/arm/include/asm/hardware/sp810.h              |  2 +-
 arch/arm/mach-spear13xx/include/mach/debug-macro.S |  2 +-
 arch/arm/mach-spear13xx/include/mach/dma.h         |  2 +-
 arch/arm/mach-spear13xx/include/mach/generic.h     |  2 +-
 arch/arm/mach-spear13xx/include/mach/gpio.h        |  2 +-
 arch/arm/mach-spear13xx/include/mach/irqs.h        |  2 +-
 arch/arm/mach-spear13xx/include/mach/spear.h       |  2 +-
 arch/arm/mach-spear13xx/include/mach/timex.h       |  2 +-
 arch/arm/mach-spear13xx/include/mach/uncompress.h  |  2 +-
 arch/arm/mach-spear13xx/spear1310.c                |  2 +-
 arch/arm/mach-spear13xx/spear1340.c                |  2 +-
 arch/arm/mach-spear13xx/spear13xx.c                |  2 +-
 arch/arm/mach-spear3xx/include/mach/debug-macro.S  |  2 +-
 arch/arm/mach-spear3xx/include/mach/generic.h      |  2 +-
 arch/arm/mach-spear3xx/include/mach/gpio.h         |  2 +-
 arch/arm/mach-spear3xx/include/mach/irqs.h         |  2 +-
 arch/arm/mach-spear3xx/include/mach/misc_regs.h    |  2 +-
 arch/arm/mach-spear3xx/include/mach/spear.h        |  2 +-
 arch/arm/mach-spear3xx/include/mach/timex.h        |  2 +-
 arch/arm/mach-spear3xx/include/mach/uncompress.h   |  2 +-
 arch/arm/mach-spear3xx/spear300.c                  |  2 +-
 arch/arm/mach-spear3xx/spear310.c                  |  2 +-
 arch/arm/mach-spear3xx/spear320.c                  |  2 +-
 arch/arm/mach-spear3xx/spear3xx.c                  |  2 +-
 arch/arm/mach-spear6xx/include/mach/gpio.h         |  2 +-
 arch/arm/mach-spear6xx/include/mach/misc_regs.h    |  2 +-
 arch/arm/plat-spear/include/plat/debug-macro.S     |  2 +-
 arch/arm/plat-spear/include/plat/pl080.h           |  2 +-
 arch/arm/plat-spear/include/plat/shirq.h           |  2 +-
 arch/arm/plat-spear/include/plat/timex.h           |  2 +-
 arch/arm/plat-spear/include/plat/uncompress.h      |  2 +-
 arch/arm/plat-spear/pl080.c                        |  2 +-
 arch/arm/plat-spear/restart.c                      |  2 +-
 arch/arm/plat-spear/shirq.c                        |  2 +-
 drivers/ata/pata_arasan_cf.c                       |  4 ++--
 drivers/clk/spear/clk-aux-synth.c                  |  2 +-
 drivers/clk/spear/clk-frac-synth.c                 |  2 +-
 drivers/clk/spear/clk-gpt-synth.c                  |  2 +-
 drivers/clk/spear/clk-vco-pll.c                    |  2 +-
 drivers/clk/spear/clk.c                            |  2 +-
 drivers/clk/spear/clk.h                            |  2 +-
 drivers/clk/spear/spear1310_clock.c                |  2 +-
 drivers/clk/spear/spear1340_clock.c                |  2 +-
 drivers/clk/spear/spear3xx_clock.c                 |  2 +-
 drivers/clk/spear/spear6xx_clock.c                 |  2 +-
 drivers/dma/dw_dmac.c                              |  2 +-
 drivers/mfd/stmpe-i2c.c                            |  2 +-
 drivers/mfd/stmpe-spi.c                            |  4 ++--
 drivers/mmc/host/sdhci-spear.c                     |  4 ++--
 drivers/pinctrl/spear/pinctrl-spear.c              |  2 +-
 drivers/pinctrl/spear/pinctrl-spear.h              |  2 +-
 drivers/pinctrl/spear/pinctrl-spear1310.c          |  4 ++--
 drivers/pinctrl/spear/pinctrl-spear1340.c          |  4 ++--
 drivers/pinctrl/spear/pinctrl-spear300.c           |  4 ++--
 drivers/pinctrl/spear/pinctrl-spear310.c           |  4 ++--
 drivers/pinctrl/spear/pinctrl-spear320.c           |  4 ++--
 drivers/pinctrl/spear/pinctrl-spear3xx.c           |  2 +-
 drivers/pinctrl/spear/pinctrl-spear3xx.h           |  2 +-
 drivers/watchdog/sp805_wdt.c                       |  4 ++--
 include/linux/mmc/sdhci-spear.h                    |  2 +-
 include/linux/pata_arasan_cf_data.h                |  2 +-
 76 files changed, 93 insertions(+), 92 deletions(-)

(limited to 'include/linux')

diff --git a/.mailmap b/.mailmap
index 2909c33bc54e..658003aa9446 100644
--- a/.mailmap
+++ b/.mailmap
@@ -111,6 +111,7 @@ Uwe Kleine-König <ukleinek@informatik.uni-freiburg.de>
 Uwe Kleine-König <ukl@pengutronix.de>
 Uwe Kleine-König <Uwe.Kleine-Koenig@digi.com>
 Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
+Viresh Kumar <viresh.linux@gmail.com> <viresh.kumar@st.com>
 Takashi YOSHII <takashi.yoshii.zj@renesas.com>
 Yusuke Goda <goda.yusuke@renesas.com>
 Gustavo Padovan <gustavo@las.ic.unicamp.br>
diff --git a/Documentation/arm/SPEAr/overview.txt b/Documentation/arm/SPEAr/overview.txt
index 57aae7765c74..65610bf52ebf 100644
--- a/Documentation/arm/SPEAr/overview.txt
+++ b/Documentation/arm/SPEAr/overview.txt
@@ -60,4 +60,4 @@ Introduction
   Document Author
   ---------------
 
-  Viresh Kumar <viresh.kumar@st.com>, (c) 2010-2012 ST Microelectronics
+  Viresh Kumar <viresh.linux@gmail.com>, (c) 2010-2012 ST Microelectronics
diff --git a/MAINTAINERS b/MAINTAINERS
index 3e30a3afe2a4..a81b298d9c75 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -579,7 +579,7 @@ F:	drivers/net/appletalk/
 F:	net/appletalk/
 
 ARASAN COMPACT FLASH PATA CONTROLLER
-M:	Viresh Kumar <viresh.kumar@st.com>
+M:     Viresh Kumar <viresh.linux@gmail.com>
 L:	linux-ide@vger.kernel.org
 S:	Maintained
 F:	include/linux/pata_arasan_cf_data.h
@@ -5296,7 +5296,7 @@ S:	Maintained
 F:	drivers/pinctrl/
 
 PIN CONTROLLER - ST SPEAR
-M:	Viresh Kumar <viresh.kumar@st.com>
+M:     Viresh Kumar <viresh.linux@gmail.com>
 L:	spear-devel@list.st.com
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:	http://www.st.com/spear
@@ -5873,7 +5873,7 @@ S:	Maintained
 F:	drivers/tty/serial
 
 SYNOPSYS DESIGNWARE DMAC DRIVER
-M:	Viresh Kumar <viresh.kumar@st.com>
+M:     Viresh Kumar <viresh.linux@gmail.com>
 S:	Maintained
 F:	include/linux/dw_dmac.h
 F:	drivers/dma/dw_dmac_regs.h
@@ -6021,7 +6021,7 @@ S:	Maintained
 F:	drivers/mmc/host/sdhci-s3c.c
 
 SECURE DIGITAL HOST CONTROLLER INTERFACE (SDHCI) ST SPEAR DRIVER
-M:	Viresh Kumar <viresh.kumar@st.com>
+M:     Viresh Kumar <viresh.linux@gmail.com>
 L:	spear-devel@list.st.com
 L:	linux-mmc@vger.kernel.org
 S:	Maintained
@@ -6377,7 +6377,7 @@ S:	Maintained
 F:	include/linux/compiler.h
 
 SPEAR PLATFORM SUPPORT
-M:	Viresh Kumar <viresh.kumar@st.com>
+M:     Viresh Kumar <viresh.linux@gmail.com>
 M:	Shiraz Hashim <shiraz.hashim@st.com>
 L:	spear-devel@list.st.com
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -6386,7 +6386,7 @@ S:	Maintained
 F:	arch/arm/plat-spear/
 
 SPEAR13XX MACHINE SUPPORT
-M:	Viresh Kumar <viresh.kumar@st.com>
+M:     Viresh Kumar <viresh.linux@gmail.com>
 M:	Shiraz Hashim <shiraz.hashim@st.com>
 L:	spear-devel@list.st.com
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -6395,7 +6395,7 @@ S:	Maintained
 F:	arch/arm/mach-spear13xx/
 
 SPEAR3XX MACHINE SUPPORT
-M:	Viresh Kumar <viresh.kumar@st.com>
+M:     Viresh Kumar <viresh.linux@gmail.com>
 M:	Shiraz Hashim <shiraz.hashim@st.com>
 L:	spear-devel@list.st.com
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -6406,7 +6406,7 @@ F:	arch/arm/mach-spear3xx/
 SPEAR6XX MACHINE SUPPORT
 M:	Rajeev Kumar <rajeev-dlh.kumar@st.com>
 M:	Shiraz Hashim <shiraz.hashim@st.com>
-M:	Viresh Kumar <viresh.kumar@st.com>
+M:     Viresh Kumar <viresh.linux@gmail.com>
 L:	spear-devel@list.st.com
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:	http://www.st.com/spear
@@ -6414,7 +6414,7 @@ S:	Maintained
 F:	arch/arm/mach-spear6xx/
 
 SPEAR CLOCK FRAMEWORK SUPPORT
-M:	Viresh Kumar <viresh.kumar@st.com>
+M:     Viresh Kumar <viresh.linux@gmail.com>
 L:	spear-devel@list.st.com
 L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:	http://www.st.com/spear
diff --git a/arch/arm/boot/dts/spear1310-evb.dts b/arch/arm/boot/dts/spear1310-evb.dts
index 8314e4171884..dd4358bc26e2 100644
--- a/arch/arm/boot/dts/spear1310-evb.dts
+++ b/arch/arm/boot/dts/spear1310-evb.dts
@@ -1,7 +1,7 @@
 /*
  * DTS file for SPEAr1310 Evaluation Baord
  *
- * Copyright 2012 Viresh Kumar <viresh.kumar@st.com>
+ * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/spear1310.dtsi b/arch/arm/boot/dts/spear1310.dtsi
index 9e61da404d57..419ea7413d23 100644
--- a/arch/arm/boot/dts/spear1310.dtsi
+++ b/arch/arm/boot/dts/spear1310.dtsi
@@ -1,7 +1,7 @@
 /*
  * DTS file for all SPEAr1310 SoCs
  *
- * Copyright 2012 Viresh Kumar <viresh.kumar@st.com>
+ * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/spear1340-evb.dts b/arch/arm/boot/dts/spear1340-evb.dts
index 0d8472e5ab9f..c9a54e06fb68 100644
--- a/arch/arm/boot/dts/spear1340-evb.dts
+++ b/arch/arm/boot/dts/spear1340-evb.dts
@@ -1,7 +1,7 @@
 /*
  * DTS file for SPEAr1340 Evaluation Baord
  *
- * Copyright 2012 Viresh Kumar <viresh.kumar@st.com>
+ * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/spear1340.dtsi b/arch/arm/boot/dts/spear1340.dtsi
index a26fc47a55e8..d71fe2a68f09 100644
--- a/arch/arm/boot/dts/spear1340.dtsi
+++ b/arch/arm/boot/dts/spear1340.dtsi
@@ -1,7 +1,7 @@
 /*
  * DTS file for all SPEAr1340 SoCs
  *
- * Copyright 2012 Viresh Kumar <viresh.kumar@st.com>
+ * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/spear13xx.dtsi b/arch/arm/boot/dts/spear13xx.dtsi
index 1f8e1e1481df..10dcec7e7321 100644
--- a/arch/arm/boot/dts/spear13xx.dtsi
+++ b/arch/arm/boot/dts/spear13xx.dtsi
@@ -1,7 +1,7 @@
 /*
  * DTS file for all SPEAr13xx SoCs
  *
- * Copyright 2012 Viresh Kumar <viresh.kumar@st.com>
+ * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/spear300-evb.dts b/arch/arm/boot/dts/spear300-evb.dts
index fc82b1a26458..d71b8d581e3d 100644
--- a/arch/arm/boot/dts/spear300-evb.dts
+++ b/arch/arm/boot/dts/spear300-evb.dts
@@ -1,7 +1,7 @@
 /*
  * DTS file for SPEAr300 Evaluation Baord
  *
- * Copyright 2012 Viresh Kumar <viresh.kumar@st.com>
+ * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/spear300.dtsi b/arch/arm/boot/dts/spear300.dtsi
index 01c5e358fdb2..ed3627c116cc 100644
--- a/arch/arm/boot/dts/spear300.dtsi
+++ b/arch/arm/boot/dts/spear300.dtsi
@@ -1,7 +1,7 @@
 /*
  * DTS file for SPEAr300 SoC
  *
- * Copyright 2012 Viresh Kumar <viresh.kumar@st.com>
+ * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/spear310-evb.dts b/arch/arm/boot/dts/spear310-evb.dts
index dc5e2d445a93..b00544e0cd5d 100644
--- a/arch/arm/boot/dts/spear310-evb.dts
+++ b/arch/arm/boot/dts/spear310-evb.dts
@@ -1,7 +1,7 @@
 /*
  * DTS file for SPEAr310 Evaluation Baord
  *
- * Copyright 2012 Viresh Kumar <viresh.kumar@st.com>
+ * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/spear310.dtsi b/arch/arm/boot/dts/spear310.dtsi
index e47081c494d9..62fc4fb3e5f9 100644
--- a/arch/arm/boot/dts/spear310.dtsi
+++ b/arch/arm/boot/dts/spear310.dtsi
@@ -1,7 +1,7 @@
 /*
  * DTS file for SPEAr310 SoC
  *
- * Copyright 2012 Viresh Kumar <viresh.kumar@st.com>
+ * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/spear320-evb.dts b/arch/arm/boot/dts/spear320-evb.dts
index 6308fa3bec1e..c13fd1f3b09f 100644
--- a/arch/arm/boot/dts/spear320-evb.dts
+++ b/arch/arm/boot/dts/spear320-evb.dts
@@ -1,7 +1,7 @@
 /*
  * DTS file for SPEAr320 Evaluation Baord
  *
- * Copyright 2012 Viresh Kumar <viresh.kumar@st.com>
+ * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/spear320.dtsi b/arch/arm/boot/dts/spear320.dtsi
index 5372ca399b1f..1f49d69595a0 100644
--- a/arch/arm/boot/dts/spear320.dtsi
+++ b/arch/arm/boot/dts/spear320.dtsi
@@ -1,7 +1,7 @@
 /*
  * DTS file for SPEAr320 SoC
  *
- * Copyright 2012 Viresh Kumar <viresh.kumar@st.com>
+ * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/boot/dts/spear3xx.dtsi b/arch/arm/boot/dts/spear3xx.dtsi
index 91072553963f..3a8bb5736928 100644
--- a/arch/arm/boot/dts/spear3xx.dtsi
+++ b/arch/arm/boot/dts/spear3xx.dtsi
@@ -1,7 +1,7 @@
 /*
  * DTS file for all SPEAr3xx SoCs
  *
- * Copyright 2012 Viresh Kumar <viresh.kumar@st.com>
+ * Copyright 2012 Viresh Kumar <viresh.linux@gmail.com>
  *
  * The code contained herein is licensed under the GNU General Public
  * License. You may obtain a copy of the GNU General Public License
diff --git a/arch/arm/include/asm/hardware/sp810.h b/arch/arm/include/asm/hardware/sp810.h
index e0d1c0cfa548..6b9b077d86b3 100644
--- a/arch/arm/include/asm/hardware/sp810.h
+++ b/arch/arm/include/asm/hardware/sp810.h
@@ -4,7 +4,7 @@
  * ARM PrimeXsys System Controller SP810 header file
  *
  * Copyright (C) 2009 ST Microelectronics
- * Viresh Kumar<viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear13xx/include/mach/debug-macro.S b/arch/arm/mach-spear13xx/include/mach/debug-macro.S
index ea1564609bd4..9e3ae6bfe50d 100644
--- a/arch/arm/mach-spear13xx/include/mach/debug-macro.S
+++ b/arch/arm/mach-spear13xx/include/mach/debug-macro.S
@@ -4,7 +4,7 @@
  * Debugging macro include header spear13xx machine family
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear13xx/include/mach/dma.h b/arch/arm/mach-spear13xx/include/mach/dma.h
index 383ab04dc6c9..d50bdb605925 100644
--- a/arch/arm/mach-spear13xx/include/mach/dma.h
+++ b/arch/arm/mach-spear13xx/include/mach/dma.h
@@ -4,7 +4,7 @@
  * DMA information for SPEAr13xx machine family
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear13xx/include/mach/generic.h b/arch/arm/mach-spear13xx/include/mach/generic.h
index 6d8c45b9f298..dac57fd0cdfd 100644
--- a/arch/arm/mach-spear13xx/include/mach/generic.h
+++ b/arch/arm/mach-spear13xx/include/mach/generic.h
@@ -4,7 +4,7 @@
  * spear13xx machine family generic header file
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear13xx/include/mach/gpio.h b/arch/arm/mach-spear13xx/include/mach/gpio.h
index cd6f4f86a56b..85f176311f63 100644
--- a/arch/arm/mach-spear13xx/include/mach/gpio.h
+++ b/arch/arm/mach-spear13xx/include/mach/gpio.h
@@ -4,7 +4,7 @@
  * GPIO macros for SPEAr13xx machine family
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear13xx/include/mach/irqs.h b/arch/arm/mach-spear13xx/include/mach/irqs.h
index f542a24aa5f2..271a62b4cd31 100644
--- a/arch/arm/mach-spear13xx/include/mach/irqs.h
+++ b/arch/arm/mach-spear13xx/include/mach/irqs.h
@@ -4,7 +4,7 @@
  * IRQ helper macros for spear13xx machine family
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear13xx/include/mach/spear.h b/arch/arm/mach-spear13xx/include/mach/spear.h
index 30c57ef72686..65f27def239b 100644
--- a/arch/arm/mach-spear13xx/include/mach/spear.h
+++ b/arch/arm/mach-spear13xx/include/mach/spear.h
@@ -4,7 +4,7 @@
  * spear13xx Machine family specific definition
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear13xx/include/mach/timex.h b/arch/arm/mach-spear13xx/include/mach/timex.h
index 31af3e8d976e..3a58b8284a6a 100644
--- a/arch/arm/mach-spear13xx/include/mach/timex.h
+++ b/arch/arm/mach-spear13xx/include/mach/timex.h
@@ -4,7 +4,7 @@
  * SPEAr3XX machine family specific timex definitions
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear13xx/include/mach/uncompress.h b/arch/arm/mach-spear13xx/include/mach/uncompress.h
index c7840896ae6e..70fe72f05dea 100644
--- a/arch/arm/mach-spear13xx/include/mach/uncompress.h
+++ b/arch/arm/mach-spear13xx/include/mach/uncompress.h
@@ -4,7 +4,7 @@
  * Serial port stubs for kernel decompress status messages
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear13xx/spear1310.c b/arch/arm/mach-spear13xx/spear1310.c
index fefd15b2f380..732d29bc7330 100644
--- a/arch/arm/mach-spear13xx/spear1310.c
+++ b/arch/arm/mach-spear13xx/spear1310.c
@@ -4,7 +4,7 @@
  * SPEAr1310 machine source file
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear13xx/spear1340.c b/arch/arm/mach-spear13xx/spear1340.c
index ee38cbc56869..81e4ed76ad06 100644
--- a/arch/arm/mach-spear13xx/spear1340.c
+++ b/arch/arm/mach-spear13xx/spear1340.c
@@ -4,7 +4,7 @@
  * SPEAr1340 machine source file
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear13xx/spear13xx.c b/arch/arm/mach-spear13xx/spear13xx.c
index 50b349ae863d..cf936b106e27 100644
--- a/arch/arm/mach-spear13xx/spear13xx.c
+++ b/arch/arm/mach-spear13xx/spear13xx.c
@@ -4,7 +4,7 @@
  * SPEAr13XX machines common source file
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear3xx/include/mach/debug-macro.S b/arch/arm/mach-spear3xx/include/mach/debug-macro.S
index 590519f10d6e..0a6381fad5d9 100644
--- a/arch/arm/mach-spear3xx/include/mach/debug-macro.S
+++ b/arch/arm/mach-spear3xx/include/mach/debug-macro.S
@@ -4,7 +4,7 @@
  * Debugging macro include header spear3xx machine family
  *
  * Copyright (C) 2009 ST Microelectronics
- * Viresh Kumar<viresh.kumar@st.com>
+ * Viresh Kumar<viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear3xx/include/mach/generic.h b/arch/arm/mach-spear3xx/include/mach/generic.h
index 4a95b9453c2a..ce19113ca791 100644
--- a/arch/arm/mach-spear3xx/include/mach/generic.h
+++ b/arch/arm/mach-spear3xx/include/mach/generic.h
@@ -4,7 +4,7 @@
  * SPEAr3XX machine family generic header file
  *
  * Copyright (C) 2009 ST Microelectronics
- * Viresh Kumar<viresh.kumar@st.com>
+ * Viresh Kumar<viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear3xx/include/mach/gpio.h b/arch/arm/mach-spear3xx/include/mach/gpio.h
index 451b2081bfc9..2ac74c6db7f1 100644
--- a/arch/arm/mach-spear3xx/include/mach/gpio.h
+++ b/arch/arm/mach-spear3xx/include/mach/gpio.h
@@ -4,7 +4,7 @@
  * GPIO macros for SPEAr3xx machine family
  *
  * Copyright (C) 2009 ST Microelectronics
- * Viresh Kumar<viresh.kumar@st.com>
+ * Viresh Kumar<viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear3xx/include/mach/irqs.h b/arch/arm/mach-spear3xx/include/mach/irqs.h
index 51bd62a0254c..803de76f5f36 100644
--- a/arch/arm/mach-spear3xx/include/mach/irqs.h
+++ b/arch/arm/mach-spear3xx/include/mach/irqs.h
@@ -4,7 +4,7 @@
  * IRQ helper macros for SPEAr3xx machine family
  *
  * Copyright (C) 2009 ST Microelectronics
- * Viresh Kumar<viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear3xx/include/mach/misc_regs.h b/arch/arm/mach-spear3xx/include/mach/misc_regs.h
index 18e2ac576f25..6309bf68d6f8 100644
--- a/arch/arm/mach-spear3xx/include/mach/misc_regs.h
+++ b/arch/arm/mach-spear3xx/include/mach/misc_regs.h
@@ -4,7 +4,7 @@
  * Miscellaneous registers definitions for SPEAr3xx machine family
  *
  * Copyright (C) 2009 ST Microelectronics
- * Viresh Kumar<viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear3xx/include/mach/spear.h b/arch/arm/mach-spear3xx/include/mach/spear.h
index 51eb953148a9..8cca95193d4d 100644
--- a/arch/arm/mach-spear3xx/include/mach/spear.h
+++ b/arch/arm/mach-spear3xx/include/mach/spear.h
@@ -4,7 +4,7 @@
  * SPEAr3xx Machine family specific definition
  *
  * Copyright (C) 2009 ST Microelectronics
- * Viresh Kumar<viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear3xx/include/mach/timex.h b/arch/arm/mach-spear3xx/include/mach/timex.h
index a38cc9de876f..9f5d08bd0c44 100644
--- a/arch/arm/mach-spear3xx/include/mach/timex.h
+++ b/arch/arm/mach-spear3xx/include/mach/timex.h
@@ -4,7 +4,7 @@
  * SPEAr3XX machine family specific timex definitions
  *
  * Copyright (C) 2009 ST Microelectronics
- * Viresh Kumar<viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear3xx/include/mach/uncompress.h b/arch/arm/mach-spear3xx/include/mach/uncompress.h
index 53ba8bbc0dfa..b909b011f7c8 100644
--- a/arch/arm/mach-spear3xx/include/mach/uncompress.h
+++ b/arch/arm/mach-spear3xx/include/mach/uncompress.h
@@ -4,7 +4,7 @@
  * Serial port stubs for kernel decompress status messages
  *
  * Copyright (C) 2009 ST Microelectronics
- * Viresh Kumar<viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear3xx/spear300.c b/arch/arm/mach-spear3xx/spear300.c
index f74a05bdb829..0f882ecb7d81 100644
--- a/arch/arm/mach-spear3xx/spear300.c
+++ b/arch/arm/mach-spear3xx/spear300.c
@@ -4,7 +4,7 @@
  * SPEAr300 machine source file
  *
  * Copyright (C) 2009-2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear3xx/spear310.c b/arch/arm/mach-spear3xx/spear310.c
index 84dfb0900747..bbcf4571d361 100644
--- a/arch/arm/mach-spear3xx/spear310.c
+++ b/arch/arm/mach-spear3xx/spear310.c
@@ -4,7 +4,7 @@
  * SPEAr310 machine source file
  *
  * Copyright (C) 2009-2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear3xx/spear320.c b/arch/arm/mach-spear3xx/spear320.c
index a88fa841d29d..88d483bcd66a 100644
--- a/arch/arm/mach-spear3xx/spear320.c
+++ b/arch/arm/mach-spear3xx/spear320.c
@@ -4,7 +4,7 @@
  * SPEAr320 machine source file
  *
  * Copyright (C) 2009-2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear3xx/spear3xx.c b/arch/arm/mach-spear3xx/spear3xx.c
index f22419ed74a8..0f41bd1c47c3 100644
--- a/arch/arm/mach-spear3xx/spear3xx.c
+++ b/arch/arm/mach-spear3xx/spear3xx.c
@@ -4,7 +4,7 @@
  * SPEAr3XX machines common source file
  *
  * Copyright (C) 2009-2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear6xx/include/mach/gpio.h b/arch/arm/mach-spear6xx/include/mach/gpio.h
index 3a789dbb69f7..d42cefc0356d 100644
--- a/arch/arm/mach-spear6xx/include/mach/gpio.h
+++ b/arch/arm/mach-spear6xx/include/mach/gpio.h
@@ -4,7 +4,7 @@
  * GPIO macros for SPEAr6xx machine family
  *
  * Copyright (C) 2009 ST Microelectronics
- * Viresh Kumar<viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/mach-spear6xx/include/mach/misc_regs.h b/arch/arm/mach-spear6xx/include/mach/misc_regs.h
index 179e45774b3a..c34acc201d34 100644
--- a/arch/arm/mach-spear6xx/include/mach/misc_regs.h
+++ b/arch/arm/mach-spear6xx/include/mach/misc_regs.h
@@ -4,7 +4,7 @@
  * Miscellaneous registers definitions for SPEAr6xx machine family
  *
  * Copyright (C) 2009 ST Microelectronics
- * Viresh Kumar<viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/plat-spear/include/plat/debug-macro.S b/arch/arm/plat-spear/include/plat/debug-macro.S
index ab3de721c5db..75b05ad0fbad 100644
--- a/arch/arm/plat-spear/include/plat/debug-macro.S
+++ b/arch/arm/plat-spear/include/plat/debug-macro.S
@@ -4,7 +4,7 @@
  * Debugging macro include header for spear platform
  *
  * Copyright (C) 2009 ST Microelectronics
- * Viresh Kumar<viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/plat-spear/include/plat/pl080.h b/arch/arm/plat-spear/include/plat/pl080.h
index e14a3e4932f9..2bc6b54460a8 100644
--- a/arch/arm/plat-spear/include/plat/pl080.h
+++ b/arch/arm/plat-spear/include/plat/pl080.h
@@ -4,7 +4,7 @@
  * DMAC pl080 definitions for SPEAr platform
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/plat-spear/include/plat/shirq.h b/arch/arm/plat-spear/include/plat/shirq.h
index 03ed8b585dcf..88a7fbd24793 100644
--- a/arch/arm/plat-spear/include/plat/shirq.h
+++ b/arch/arm/plat-spear/include/plat/shirq.h
@@ -4,7 +4,7 @@
  * SPEAr platform shared irq layer header file
  *
  * Copyright (C) 2009 ST Microelectronics
- * Viresh Kumar<viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/plat-spear/include/plat/timex.h b/arch/arm/plat-spear/include/plat/timex.h
index 914d09dd50fd..ef95e5b780bd 100644
--- a/arch/arm/plat-spear/include/plat/timex.h
+++ b/arch/arm/plat-spear/include/plat/timex.h
@@ -4,7 +4,7 @@
  * SPEAr platform specific timex definitions
  *
  * Copyright (C) 2009 ST Microelectronics
- * Viresh Kumar<viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/plat-spear/include/plat/uncompress.h b/arch/arm/plat-spear/include/plat/uncompress.h
index 6dd455bafdfd..2ce6cb17a98b 100644
--- a/arch/arm/plat-spear/include/plat/uncompress.h
+++ b/arch/arm/plat-spear/include/plat/uncompress.h
@@ -4,7 +4,7 @@
  * Serial port stubs for kernel decompress status messages
  *
  * Copyright (C) 2009 ST Microelectronics
- * Viresh Kumar<viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/plat-spear/pl080.c b/arch/arm/plat-spear/pl080.c
index a56a067717c1..12cf27f935f9 100644
--- a/arch/arm/plat-spear/pl080.c
+++ b/arch/arm/plat-spear/pl080.c
@@ -4,7 +4,7 @@
  * DMAC pl080 definitions for SPEAr platform
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/plat-spear/restart.c b/arch/arm/plat-spear/restart.c
index ea0a61302b7e..4f990115b1bd 100644
--- a/arch/arm/plat-spear/restart.c
+++ b/arch/arm/plat-spear/restart.c
@@ -4,7 +4,7 @@
  * SPEAr platform specific restart functions
  *
  * Copyright (C) 2009 ST Microelectronics
- * Viresh Kumar<viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/arch/arm/plat-spear/shirq.c b/arch/arm/plat-spear/shirq.c
index 961fb7261243..853e891e1184 100644
--- a/arch/arm/plat-spear/shirq.c
+++ b/arch/arm/plat-spear/shirq.c
@@ -4,7 +4,7 @@
  * SPEAr platform shared irq layer source file
  *
  * Copyright (C) 2009 ST Microelectronics
- * Viresh Kumar<viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/drivers/ata/pata_arasan_cf.c b/drivers/ata/pata_arasan_cf.c
index 3239517f4d90..ac6a5beb28f3 100644
--- a/drivers/ata/pata_arasan_cf.c
+++ b/drivers/ata/pata_arasan_cf.c
@@ -4,7 +4,7 @@
  * Arasan Compact Flash host controller source file
  *
  * Copyright (C) 2011 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
@@ -959,7 +959,7 @@ static struct platform_driver arasan_cf_driver = {
 
 module_platform_driver(arasan_cf_driver);
 
-MODULE_AUTHOR("Viresh Kumar <viresh.kumar@st.com>");
+MODULE_AUTHOR("Viresh Kumar <viresh.linux@gmail.com>");
 MODULE_DESCRIPTION("Arasan ATA Compact Flash driver");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("platform:" DRIVER_NAME);
diff --git a/drivers/clk/spear/clk-aux-synth.c b/drivers/clk/spear/clk-aux-synth.c
index af34074e702b..6756e7c3bc07 100644
--- a/drivers/clk/spear/clk-aux-synth.c
+++ b/drivers/clk/spear/clk-aux-synth.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/drivers/clk/spear/clk-frac-synth.c b/drivers/clk/spear/clk-frac-synth.c
index 4dbdb3fe18e0..958aa3ad1d60 100644
--- a/drivers/clk/spear/clk-frac-synth.c
+++ b/drivers/clk/spear/clk-frac-synth.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/drivers/clk/spear/clk-gpt-synth.c b/drivers/clk/spear/clk-gpt-synth.c
index b471c9762a97..1afc18c4effc 100644
--- a/drivers/clk/spear/clk-gpt-synth.c
+++ b/drivers/clk/spear/clk-gpt-synth.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/drivers/clk/spear/clk-vco-pll.c b/drivers/clk/spear/clk-vco-pll.c
index dcd4bdf4b0d9..5f1b6badeb15 100644
--- a/drivers/clk/spear/clk-vco-pll.c
+++ b/drivers/clk/spear/clk-vco-pll.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/drivers/clk/spear/clk.c b/drivers/clk/spear/clk.c
index 376d4e5ff326..7cd63788d546 100644
--- a/drivers/clk/spear/clk.c
+++ b/drivers/clk/spear/clk.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/drivers/clk/spear/clk.h b/drivers/clk/spear/clk.h
index 3321c46a071c..931737677dfa 100644
--- a/drivers/clk/spear/clk.h
+++ b/drivers/clk/spear/clk.h
@@ -2,7 +2,7 @@
  * Clock framework definitions for SPEAr platform
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/drivers/clk/spear/spear1310_clock.c b/drivers/clk/spear/spear1310_clock.c
index 42b68df9aeef..8f05652d53e6 100644
--- a/drivers/clk/spear/spear1310_clock.c
+++ b/drivers/clk/spear/spear1310_clock.c
@@ -4,7 +4,7 @@
  * SPEAr1310 machine clock framework source file
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/drivers/clk/spear/spear1340_clock.c b/drivers/clk/spear/spear1340_clock.c
index f130919d5bf8..e3ea72162236 100644
--- a/drivers/clk/spear/spear1340_clock.c
+++ b/drivers/clk/spear/spear1340_clock.c
@@ -4,7 +4,7 @@
  * SPEAr1340 machine clock framework source file
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/drivers/clk/spear/spear3xx_clock.c b/drivers/clk/spear/spear3xx_clock.c
index 440bb3e4c971..01dd6daff2a1 100644
--- a/drivers/clk/spear/spear3xx_clock.c
+++ b/drivers/clk/spear/spear3xx_clock.c
@@ -2,7 +2,7 @@
  * SPEAr3xx machines clock framework source file
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/drivers/clk/spear/spear6xx_clock.c b/drivers/clk/spear/spear6xx_clock.c
index f9a20b382304..554d64b062a1 100644
--- a/drivers/clk/spear/spear6xx_clock.c
+++ b/drivers/clk/spear/spear6xx_clock.c
@@ -2,7 +2,7 @@
  * SPEAr6xx machines clock framework source file
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
index e23dc82d43ac..721296157577 100644
--- a/drivers/dma/dw_dmac.c
+++ b/drivers/dma/dw_dmac.c
@@ -1626,4 +1626,4 @@ module_exit(dw_exit);
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller driver");
 MODULE_AUTHOR("Haavard Skinnemoen (Atmel)");
-MODULE_AUTHOR("Viresh Kumar <viresh.kumar@st.com>");
+MODULE_AUTHOR("Viresh Kumar <viresh.linux@gmail.com>");
diff --git a/drivers/mfd/stmpe-i2c.c b/drivers/mfd/stmpe-i2c.c
index 373f423b1181..947a06a1845f 100644
--- a/drivers/mfd/stmpe-i2c.c
+++ b/drivers/mfd/stmpe-i2c.c
@@ -6,7 +6,7 @@
  *
  * License Terms: GNU General Public License, version 2
  * Author: Rabin Vincent <rabin.vincent@stericsson.com> for ST-Ericsson
- * Author: Viresh Kumar <viresh.kumar@st.com> for ST Microelectronics
+ * Author: Viresh Kumar <viresh.linux@gmail.com> for ST Microelectronics
  */
 
 #include <linux/i2c.h>
diff --git a/drivers/mfd/stmpe-spi.c b/drivers/mfd/stmpe-spi.c
index afd459013ecb..9edfe864cc05 100644
--- a/drivers/mfd/stmpe-spi.c
+++ b/drivers/mfd/stmpe-spi.c
@@ -4,7 +4,7 @@
  * Copyright (C) ST Microelectronics SA 2011
  *
  * License Terms: GNU General Public License, version 2
- * Author: Viresh Kumar <viresh.kumar@st.com> for ST Microelectronics
+ * Author: Viresh Kumar <viresh.linux@gmail.com> for ST Microelectronics
  */
 
 #include <linux/spi/spi.h>
@@ -146,4 +146,4 @@ module_exit(stmpe_exit);
 
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("STMPE MFD SPI Interface Driver");
-MODULE_AUTHOR("Viresh Kumar <viresh.kumar@st.com>");
+MODULE_AUTHOR("Viresh Kumar <viresh.linux@gmail.com>");
diff --git a/drivers/mmc/host/sdhci-spear.c b/drivers/mmc/host/sdhci-spear.c
index 1fe32dfa7cd4..423da8194cd8 100644
--- a/drivers/mmc/host/sdhci-spear.c
+++ b/drivers/mmc/host/sdhci-spear.c
@@ -4,7 +4,7 @@
  * Support of SDHCI platform devices for spear soc family
  *
  * Copyright (C) 2010 ST Microelectronics
- * Viresh Kumar<viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * Inspired by sdhci-pltfm.c
  *
@@ -289,5 +289,5 @@ static struct platform_driver sdhci_driver = {
 module_platform_driver(sdhci_driver);
 
 MODULE_DESCRIPTION("SPEAr Secure Digital Host Controller Interface driver");
-MODULE_AUTHOR("Viresh Kumar <viresh.kumar@st.com>");
+MODULE_AUTHOR("Viresh Kumar <viresh.linux@gmail.com>");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/pinctrl/spear/pinctrl-spear.c b/drivers/pinctrl/spear/pinctrl-spear.c
index 5ae50aadf885..b3f6b2873fdd 100644
--- a/drivers/pinctrl/spear/pinctrl-spear.c
+++ b/drivers/pinctrl/spear/pinctrl-spear.c
@@ -2,7 +2,7 @@
  * Driver for the ST Microelectronics SPEAr pinmux
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * Inspired from:
  * - U300 Pinctl drivers
diff --git a/drivers/pinctrl/spear/pinctrl-spear.h b/drivers/pinctrl/spear/pinctrl-spear.h
index 9155783bb47f..d950eb78d939 100644
--- a/drivers/pinctrl/spear/pinctrl-spear.h
+++ b/drivers/pinctrl/spear/pinctrl-spear.h
@@ -2,7 +2,7 @@
  * Driver header file for the ST Microelectronics SPEAr pinmux
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/drivers/pinctrl/spear/pinctrl-spear1310.c b/drivers/pinctrl/spear/pinctrl-spear1310.c
index fff168be7f00..d6cca8c81b92 100644
--- a/drivers/pinctrl/spear/pinctrl-spear1310.c
+++ b/drivers/pinctrl/spear/pinctrl-spear1310.c
@@ -2,7 +2,7 @@
  * Driver for the ST Microelectronics SPEAr1310 pinmux
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
@@ -2192,7 +2192,7 @@ static void __exit spear1310_pinctrl_exit(void)
 }
 module_exit(spear1310_pinctrl_exit);
 
-MODULE_AUTHOR("Viresh Kumar <viresh.kumar@st.com>");
+MODULE_AUTHOR("Viresh Kumar <viresh.linux@gmail.com>");
 MODULE_DESCRIPTION("ST Microelectronics SPEAr1310 pinctrl driver");
 MODULE_LICENSE("GPL v2");
 MODULE_DEVICE_TABLE(of, spear1310_pinctrl_of_match);
diff --git a/drivers/pinctrl/spear/pinctrl-spear1340.c b/drivers/pinctrl/spear/pinctrl-spear1340.c
index a8ab2a6f51bf..a0eb057e55bd 100644
--- a/drivers/pinctrl/spear/pinctrl-spear1340.c
+++ b/drivers/pinctrl/spear/pinctrl-spear1340.c
@@ -2,7 +2,7 @@
  * Driver for the ST Microelectronics SPEAr1340 pinmux
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
@@ -1983,7 +1983,7 @@ static void __exit spear1340_pinctrl_exit(void)
 }
 module_exit(spear1340_pinctrl_exit);
 
-MODULE_AUTHOR("Viresh Kumar <viresh.kumar@st.com>");
+MODULE_AUTHOR("Viresh Kumar <viresh.linux@gmail.com>");
 MODULE_DESCRIPTION("ST Microelectronics SPEAr1340 pinctrl driver");
 MODULE_LICENSE("GPL v2");
 MODULE_DEVICE_TABLE(of, spear1340_pinctrl_of_match);
diff --git a/drivers/pinctrl/spear/pinctrl-spear300.c b/drivers/pinctrl/spear/pinctrl-spear300.c
index 9c82a35e4e78..4dfc2849b172 100644
--- a/drivers/pinctrl/spear/pinctrl-spear300.c
+++ b/drivers/pinctrl/spear/pinctrl-spear300.c
@@ -2,7 +2,7 @@
  * Driver for the ST Microelectronics SPEAr300 pinmux
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
@@ -702,7 +702,7 @@ static void __exit spear300_pinctrl_exit(void)
 }
 module_exit(spear300_pinctrl_exit);
 
-MODULE_AUTHOR("Viresh Kumar <viresh.kumar@st.com>");
+MODULE_AUTHOR("Viresh Kumar <viresh.linux@gmail.com>");
 MODULE_DESCRIPTION("ST Microelectronics SPEAr300 pinctrl driver");
 MODULE_LICENSE("GPL v2");
 MODULE_DEVICE_TABLE(of, spear300_pinctrl_of_match);
diff --git a/drivers/pinctrl/spear/pinctrl-spear310.c b/drivers/pinctrl/spear/pinctrl-spear310.c
index 1a9707605125..96883693fb7e 100644
--- a/drivers/pinctrl/spear/pinctrl-spear310.c
+++ b/drivers/pinctrl/spear/pinctrl-spear310.c
@@ -2,7 +2,7 @@
  * Driver for the ST Microelectronics SPEAr310 pinmux
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
@@ -425,7 +425,7 @@ static void __exit spear310_pinctrl_exit(void)
 }
 module_exit(spear310_pinctrl_exit);
 
-MODULE_AUTHOR("Viresh Kumar <viresh.kumar@st.com>");
+MODULE_AUTHOR("Viresh Kumar <viresh.linux@gmail.com>");
 MODULE_DESCRIPTION("ST Microelectronics SPEAr310 pinctrl driver");
 MODULE_LICENSE("GPL v2");
 MODULE_DEVICE_TABLE(of, SPEAr310_pinctrl_of_match);
diff --git a/drivers/pinctrl/spear/pinctrl-spear320.c b/drivers/pinctrl/spear/pinctrl-spear320.c
index de726e6c283a..020b1e0bdb3e 100644
--- a/drivers/pinctrl/spear/pinctrl-spear320.c
+++ b/drivers/pinctrl/spear/pinctrl-spear320.c
@@ -2,7 +2,7 @@
  * Driver for the ST Microelectronics SPEAr320 pinmux
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
@@ -3462,7 +3462,7 @@ static void __exit spear320_pinctrl_exit(void)
 }
 module_exit(spear320_pinctrl_exit);
 
-MODULE_AUTHOR("Viresh Kumar <viresh.kumar@st.com>");
+MODULE_AUTHOR("Viresh Kumar <viresh.linux@gmail.com>");
 MODULE_DESCRIPTION("ST Microelectronics SPEAr320 pinctrl driver");
 MODULE_LICENSE("GPL v2");
 MODULE_DEVICE_TABLE(of, spear320_pinctrl_of_match);
diff --git a/drivers/pinctrl/spear/pinctrl-spear3xx.c b/drivers/pinctrl/spear/pinctrl-spear3xx.c
index 91c883bc46a6..0242378f7cb8 100644
--- a/drivers/pinctrl/spear/pinctrl-spear3xx.c
+++ b/drivers/pinctrl/spear/pinctrl-spear3xx.c
@@ -2,7 +2,7 @@
  * Driver for the ST Microelectronics SPEAr3xx pinmux
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/drivers/pinctrl/spear/pinctrl-spear3xx.h b/drivers/pinctrl/spear/pinctrl-spear3xx.h
index 5d5fdd8df7b8..31f44347f17c 100644
--- a/drivers/pinctrl/spear/pinctrl-spear3xx.h
+++ b/drivers/pinctrl/spear/pinctrl-spear3xx.h
@@ -2,7 +2,7 @@
  * Header file for the ST Microelectronics SPEAr3xx pinmux
  *
  * Copyright (C) 2012 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/drivers/watchdog/sp805_wdt.c b/drivers/watchdog/sp805_wdt.c
index afcd13676542..e4841c36798b 100644
--- a/drivers/watchdog/sp805_wdt.c
+++ b/drivers/watchdog/sp805_wdt.c
@@ -4,7 +4,7 @@
  * Watchdog driver for ARM SP805 watchdog module
  *
  * Copyright (C) 2010 ST Microelectronics
- * Viresh Kumar<viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2 or later. This program is licensed "as is" without any
@@ -331,6 +331,6 @@ static struct amba_driver sp805_wdt_driver = {
 
 module_amba_driver(sp805_wdt_driver);
 
-MODULE_AUTHOR("Viresh Kumar <viresh.kumar@st.com>");
+MODULE_AUTHOR("Viresh Kumar <viresh.linux@gmail.com>");
 MODULE_DESCRIPTION("ARM SP805 Watchdog Driver");
 MODULE_LICENSE("GPL");
diff --git a/include/linux/mmc/sdhci-spear.h b/include/linux/mmc/sdhci-spear.h
index 5cdc96da9dd5..e78c0e236e9d 100644
--- a/include/linux/mmc/sdhci-spear.h
+++ b/include/linux/mmc/sdhci-spear.h
@@ -4,7 +4,7 @@
  * SDHCI declarations specific to ST SPEAr platform
  *
  * Copyright (C) 2010 ST Microelectronics
- * Viresh Kumar<viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
diff --git a/include/linux/pata_arasan_cf_data.h b/include/linux/pata_arasan_cf_data.h
index a6ee9aa898bb..a7b4fc386e63 100644
--- a/include/linux/pata_arasan_cf_data.h
+++ b/include/linux/pata_arasan_cf_data.h
@@ -4,7 +4,7 @@
  * Arasan Compact Flash host controller platform data header file
  *
  * Copyright (C) 2011 ST Microelectronics
- * Viresh Kumar <viresh.kumar@st.com>
+ * Viresh Kumar <viresh.linux@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public
  * License version 2. This program is licensed "as is" without any
-- 
cgit 


From d3decf3a0c1d28c80c76be170373f4c7a7217f09 Mon Sep 17 00:00:00 2001
From: Ozan Çağlayan <ozancag@gmail.com>
Date: Thu, 14 Jun 2012 15:02:35 +0300
Subject: vga_switcheroo: Add include guard
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Guard vga_switcheroo.h against multiple inclusion.

Signed-off-by: Ozan Çağlayan <ozancag@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 include/linux/vga_switcheroo.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h
index d844b7790ea6..ddb419cf4530 100644
--- a/include/linux/vga_switcheroo.h
+++ b/include/linux/vga_switcheroo.h
@@ -7,6 +7,9 @@
  * vga_switcheroo.h - Support for laptop with dual GPU using one set of outputs
  */
 
+#ifndef _LINUX_VGA_SWITCHEROO_H_
+#define _LINUX_VGA_SWITCHEROO_H_
+
 #include <linux/fb.h>
 
 struct pci_dev;
@@ -73,3 +76,4 @@ static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return
 
 
 #endif
+#endif /* _LINUX_VGA_SWITCHEROO_H_ */
-- 
cgit