From e73f843a3235a19de38359c91586e9eadef12238 Mon Sep 17 00:00:00 2001
From: Suresh Jayaraman <sjayaraman@suse.com>
Date: Tue, 12 Jun 2012 07:15:50 +0530
Subject: cifs: fix parsing of password mount option

The double delimiter check that allows a comma in the password parsing code is
unconditional. We set "tmp_end" to the end of the string and we continue to
check for double delimiter. In the case where the password doesn't contain a
comma we end up setting tmp_end to NULL and eventually setting "options" to
"end". This results in the premature termination of the options string and hence
the values of UNCip and UNC are being set to NULL. This results in mount failure
with "Connecting to DFS root not implemented yet" error.

This error is usually not noticable as we have password as the last option in
the superblock mountdata. But when we call expand_dfs_referral() from
cifs_mount() and try to compose mount options for the submount, the resulting
mountdata will be of the form

   ",ver=1,user=foo,pass=bar,ip=x.x.x.x,unc=\\server\share"

and hence results in the above error. This bug has been seen with older NAS
servers running Samba 3.0.24.

Fix this by moving the double delimiter check inside the conditional loop.

Changes since -v1

   - removed the wrong strlen() micro optimization.

Signed-off-by: Suresh Jayaraman <sjayaraman@suse.com>
Acked-by: Sachin Prabhu <sprabhu@redhat.com>
Cc: stable@vger.kernel.org [3.1+]
Signed-off-by: Steve French <sfrench@us.ibm.com>
---
 fs/cifs/connect.c | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 78db68a5cf44..5b3840725d01 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1653,24 +1653,26 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
 			 * If yes, we have encountered a double deliminator
 			 * reset the NULL character to the deliminator
 			 */
-			if (tmp_end < end && tmp_end[1] == delim)
+			if (tmp_end < end && tmp_end[1] == delim) {
 				tmp_end[0] = delim;
 
-			/* Keep iterating until we get to a single deliminator
-			 * OR the end
-			 */
-			while ((tmp_end = strchr(tmp_end, delim)) != NULL &&
-			       (tmp_end[1] == delim)) {
-				tmp_end = (char *) &tmp_end[2];
-			}
+				/* Keep iterating until we get to a single
+				 * deliminator OR the end
+				 */
+				while ((tmp_end = strchr(tmp_end, delim))
+					!= NULL && (tmp_end[1] == delim)) {
+						tmp_end = (char *) &tmp_end[2];
+				}
 
-			/* Reset var options to point to next element */
-			if (tmp_end) {
-				tmp_end[0] = '\0';
-				options = (char *) &tmp_end[1];
-			} else
-				/* Reached the end of the mount option string */
-				options = end;
+				/* Reset var options to point to next element */
+				if (tmp_end) {
+					tmp_end[0] = '\0';
+					options = (char *) &tmp_end[1];
+				} else
+					/* Reached the end of the mount option
+					 * string */
+					options = end;
+			}
 
 			/* Now build new password string */
 			temp_len = strlen(value);
-- 
cgit 


From 047fe3605235888f3ebcda0c728cb31937eadfe6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 12 Jun 2012 15:24:40 +0200
Subject: splice: fix racy pipe->buffers uses

Dave Jones reported a kernel BUG at mm/slub.c:3474! triggered
by splice_shrink_spd() called from vmsplice_to_pipe()

commit 35f3d14dbbc5 (pipe: add support for shrinking and growing pipes)
added capability to adjust pipe->buffers.

Problem is some paths don't hold pipe mutex and assume pipe->buffers
doesn't change for their duration.

Fix this by adding nr_pages_max field in struct splice_pipe_desc, and
use it in place of pipe->buffers where appropriate.

splice_shrink_spd() loses its struct pipe_inode_info argument.

Reported-by: Dave Jones <davej@redhat.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Tom Herbert <therbert@google.com>
Cc: stable <stable@vger.kernel.org> # 2.6.35
Tested-by: Dave Jones <davej@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 fs/splice.c            | 35 ++++++++++++++++++++---------------
 include/linux/splice.h |  8 ++++----
 kernel/relay.c         |  5 +++--
 kernel/trace/trace.c   |  6 ++++--
 mm/shmem.c             |  3 ++-
 net/core/skbuff.c      |  1 +
 6 files changed, 34 insertions(+), 24 deletions(-)

(limited to 'fs')

diff --git a/fs/splice.c b/fs/splice.c
index c9f1318a3b82..7bf08fa22ec9 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -273,13 +273,16 @@ void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
  * Check if we need to grow the arrays holding pages and partial page
  * descriptions.
  */
-int splice_grow_spd(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd)
+int splice_grow_spd(const struct pipe_inode_info *pipe, struct splice_pipe_desc *spd)
 {
-	if (pipe->buffers <= PIPE_DEF_BUFFERS)
+	unsigned int buffers = ACCESS_ONCE(pipe->buffers);
+
+	spd->nr_pages_max = buffers;
+	if (buffers <= PIPE_DEF_BUFFERS)
 		return 0;
 
-	spd->pages = kmalloc(pipe->buffers * sizeof(struct page *), GFP_KERNEL);
-	spd->partial = kmalloc(pipe->buffers * sizeof(struct partial_page), GFP_KERNEL);
+	spd->pages = kmalloc(buffers * sizeof(struct page *), GFP_KERNEL);
+	spd->partial = kmalloc(buffers * sizeof(struct partial_page), GFP_KERNEL);
 
 	if (spd->pages && spd->partial)
 		return 0;
@@ -289,10 +292,9 @@ int splice_grow_spd(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd)
 	return -ENOMEM;
 }
 
-void splice_shrink_spd(struct pipe_inode_info *pipe,
-		       struct splice_pipe_desc *spd)
+void splice_shrink_spd(struct splice_pipe_desc *spd)
 {
-	if (pipe->buffers <= PIPE_DEF_BUFFERS)
+	if (spd->nr_pages_max <= PIPE_DEF_BUFFERS)
 		return;
 
 	kfree(spd->pages);
@@ -315,6 +317,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
 	struct splice_pipe_desc spd = {
 		.pages = pages,
 		.partial = partial,
+		.nr_pages_max = PIPE_DEF_BUFFERS,
 		.flags = flags,
 		.ops = &page_cache_pipe_buf_ops,
 		.spd_release = spd_release_page,
@@ -326,7 +329,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
 	index = *ppos >> PAGE_CACHE_SHIFT;
 	loff = *ppos & ~PAGE_CACHE_MASK;
 	req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-	nr_pages = min(req_pages, pipe->buffers);
+	nr_pages = min(req_pages, spd.nr_pages_max);
 
 	/*
 	 * Lookup the (hopefully) full range of pages we need.
@@ -497,7 +500,7 @@ fill_it:
 	if (spd.nr_pages)
 		error = splice_to_pipe(pipe, &spd);
 
-	splice_shrink_spd(pipe, &spd);
+	splice_shrink_spd(&spd);
 	return error;
 }
 
@@ -598,6 +601,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
 	struct splice_pipe_desc spd = {
 		.pages = pages,
 		.partial = partial,
+		.nr_pages_max = PIPE_DEF_BUFFERS,
 		.flags = flags,
 		.ops = &default_pipe_buf_ops,
 		.spd_release = spd_release_page,
@@ -608,8 +612,8 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
 
 	res = -ENOMEM;
 	vec = __vec;
-	if (pipe->buffers > PIPE_DEF_BUFFERS) {
-		vec = kmalloc(pipe->buffers * sizeof(struct iovec), GFP_KERNEL);
+	if (spd.nr_pages_max > PIPE_DEF_BUFFERS) {
+		vec = kmalloc(spd.nr_pages_max * sizeof(struct iovec), GFP_KERNEL);
 		if (!vec)
 			goto shrink_ret;
 	}
@@ -617,7 +621,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
 	offset = *ppos & ~PAGE_CACHE_MASK;
 	nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 
-	for (i = 0; i < nr_pages && i < pipe->buffers && len; i++) {
+	for (i = 0; i < nr_pages && i < spd.nr_pages_max && len; i++) {
 		struct page *page;
 
 		page = alloc_page(GFP_USER);
@@ -665,7 +669,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
 shrink_ret:
 	if (vec != __vec)
 		kfree(vec);
-	splice_shrink_spd(pipe, &spd);
+	splice_shrink_spd(&spd);
 	return res;
 
 err:
@@ -1614,6 +1618,7 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
 	struct splice_pipe_desc spd = {
 		.pages = pages,
 		.partial = partial,
+		.nr_pages_max = PIPE_DEF_BUFFERS,
 		.flags = flags,
 		.ops = &user_page_pipe_buf_ops,
 		.spd_release = spd_release_page,
@@ -1629,13 +1634,13 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
 
 	spd.nr_pages = get_iovec_page_array(iov, nr_segs, spd.pages,
 					    spd.partial, false,
-					    pipe->buffers);
+					    spd.nr_pages_max);
 	if (spd.nr_pages <= 0)
 		ret = spd.nr_pages;
 	else
 		ret = splice_to_pipe(pipe, &spd);
 
-	splice_shrink_spd(pipe, &spd);
+	splice_shrink_spd(&spd);
 	return ret;
 }
 
diff --git a/include/linux/splice.h b/include/linux/splice.h
index 26e5b613deda..09a545a7dfa3 100644
--- a/include/linux/splice.h
+++ b/include/linux/splice.h
@@ -51,7 +51,8 @@ struct partial_page {
 struct splice_pipe_desc {
 	struct page **pages;		/* page map */
 	struct partial_page *partial;	/* pages[] may not be contig */
-	int nr_pages;			/* number of pages in map */
+	int nr_pages;			/* number of populated pages in map */
+	unsigned int nr_pages_max;	/* pages[] & partial[] arrays size */
 	unsigned int flags;		/* splice flags */
 	const struct pipe_buf_operations *ops;/* ops associated with output pipe */
 	void (*spd_release)(struct splice_pipe_desc *, unsigned int);
@@ -85,9 +86,8 @@ extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *,
 /*
  * for dynamic pipe sizing
  */
-extern int splice_grow_spd(struct pipe_inode_info *, struct splice_pipe_desc *);
-extern void splice_shrink_spd(struct pipe_inode_info *,
-				struct splice_pipe_desc *);
+extern int splice_grow_spd(const struct pipe_inode_info *, struct splice_pipe_desc *);
+extern void splice_shrink_spd(struct splice_pipe_desc *);
 extern void spd_release_page(struct splice_pipe_desc *, unsigned int);
 
 extern const struct pipe_buf_operations page_cache_pipe_buf_ops;
diff --git a/kernel/relay.c b/kernel/relay.c
index ab56a1764d4d..e8cd2027abbd 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -1235,6 +1235,7 @@ static ssize_t subbuf_splice_actor(struct file *in,
 	struct splice_pipe_desc spd = {
 		.pages = pages,
 		.nr_pages = 0,
+		.nr_pages_max = PIPE_DEF_BUFFERS,
 		.partial = partial,
 		.flags = flags,
 		.ops = &relay_pipe_buf_ops,
@@ -1302,8 +1303,8 @@ static ssize_t subbuf_splice_actor(struct file *in,
                 ret += padding;
 
 out:
-	splice_shrink_spd(pipe, &spd);
-        return ret;
+	splice_shrink_spd(&spd);
+	return ret;
 }
 
 static ssize_t relay_file_splice_read(struct file *in,
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 68032c6177db..288488082224 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3609,6 +3609,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
 		.pages		= pages_def,
 		.partial	= partial_def,
 		.nr_pages	= 0, /* This gets updated below. */
+		.nr_pages_max	= PIPE_DEF_BUFFERS,
 		.flags		= flags,
 		.ops		= &tracing_pipe_buf_ops,
 		.spd_release	= tracing_spd_release_pipe,
@@ -3680,7 +3681,7 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
 
 	ret = splice_to_pipe(pipe, &spd);
 out:
-	splice_shrink_spd(pipe, &spd);
+	splice_shrink_spd(&spd);
 	return ret;
 
 out_err:
@@ -4231,6 +4232,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 	struct splice_pipe_desc spd = {
 		.pages		= pages_def,
 		.partial	= partial_def,
+		.nr_pages_max	= PIPE_DEF_BUFFERS,
 		.flags		= flags,
 		.ops		= &buffer_pipe_buf_ops,
 		.spd_release	= buffer_spd_release,
@@ -4318,7 +4320,7 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 	}
 
 	ret = splice_to_pipe(pipe, &spd);
-	splice_shrink_spd(pipe, &spd);
+	splice_shrink_spd(&spd);
 out:
 	return ret;
 }
diff --git a/mm/shmem.c b/mm/shmem.c
index 585bd220a21e..c244e93a70fa 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1577,6 +1577,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
 	struct splice_pipe_desc spd = {
 		.pages = pages,
 		.partial = partial,
+		.nr_pages_max = PIPE_DEF_BUFFERS,
 		.flags = flags,
 		.ops = &page_cache_pipe_buf_ops,
 		.spd_release = spd_release_page,
@@ -1665,7 +1666,7 @@ static ssize_t shmem_file_splice_read(struct file *in, loff_t *ppos,
 	if (spd.nr_pages)
 		error = splice_to_pipe(pipe, &spd);
 
-	splice_shrink_spd(pipe, &spd);
+	splice_shrink_spd(&spd);
 
 	if (error > 0) {
 		*ppos += error;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 016694d62484..bac3c5756d63 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1755,6 +1755,7 @@ int skb_splice_bits(struct sk_buff *skb, unsigned int offset,
 	struct splice_pipe_desc spd = {
 		.pages = pages,
 		.partial = partial,
+		.nr_pages_max = MAX_SKB_FRAGS,
 		.flags = flags,
 		.ops = &sock_pipe_buf_ops,
 		.spd_release = sock_spd_release,
-- 
cgit 


From 1cfb7271076a265b7c2cbbf9cf5c2ae060a24385 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 9 Jun 2012 12:08:23 +0300
Subject: UBIFS: fix assertion

The asserts here never check anything because it uses '|' instead of
'&'.  Now if the flags are not set it prints a warning a a stack trace.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 fs/ubifs/find.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ubifs/find.c b/fs/ubifs/find.c
index 2559d174e004..28ec13af28d9 100644
--- a/fs/ubifs/find.c
+++ b/fs/ubifs/find.c
@@ -939,8 +939,8 @@ static int find_dirtiest_idx_leb(struct ubifs_info *c)
 	}
 	dbg_find("LEB %d, dirty %d and free %d flags %#x", lp->lnum, lp->dirty,
 		 lp->free, lp->flags);
-	ubifs_assert(lp->flags | LPROPS_TAKEN);
-	ubifs_assert(lp->flags | LPROPS_INDEX);
+	ubifs_assert(lp->flags & LPROPS_TAKEN);
+	ubifs_assert(lp->flags & LPROPS_INDEX);
 	return lnum;
 }
 
-- 
cgit 


From 2d4cf5ae123e4a7bd26a88e600581aa809bcad7f Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Mon, 18 Jun 2012 16:31:22 -0700
Subject: UBIFS: correct usage of IS_ENABLED()

Commit "818039c UBIFS: fix debugfs-less systems support" fixed one
regression but introduced a different regression - the debugfs is now always
compiled out. Root cause: IS_ENABLED() arguments should be used with the
CONFIG_* prefix.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
---
 fs/ubifs/debug.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index 84a7e6f3c046..92df3b081539 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -2918,7 +2918,7 @@ int dbg_debugfs_init_fs(struct ubifs_info *c)
 	struct dentry *dent;
 	struct ubifs_debug_info *d = c->dbg;
 
-	if (!IS_ENABLED(DEBUG_FS))
+	if (!IS_ENABLED(CONFIG_DEBUG_FS))
 		return 0;
 
 	n = snprintf(d->dfs_dir_name, UBIFS_DFS_DIR_LEN + 1, UBIFS_DFS_DIR_NAME,
@@ -3013,7 +3013,7 @@ out:
  */
 void dbg_debugfs_exit_fs(struct ubifs_info *c)
 {
-	if (IS_ENABLED(DEBUG_FS))
+	if (IS_ENABLED(CONFIG_DEBUG_FS))
 		debugfs_remove_recursive(c->dbg->dfs_dir);
 }
 
@@ -3099,7 +3099,7 @@ int dbg_debugfs_init(void)
 	const char *fname;
 	struct dentry *dent;
 
-	if (!IS_ENABLED(DEBUG_FS))
+	if (!IS_ENABLED(CONFIG_DEBUG_FS))
 		return 0;
 
 	fname = "ubifs";
@@ -3166,7 +3166,7 @@ out:
  */
 void dbg_debugfs_exit(void)
 {
-	if (IS_ENABLED(DEBUG_FS))
+	if (IS_ENABLED(CONFIG_DEBUG_FS))
 		debugfs_remove_recursive(dfs_rootdir);
 }
 
-- 
cgit 


From 8ca78f3eda4bf1799e8c4ba02035623fd7a347df Mon Sep 17 00:00:00 2001
From: Jan Schmidt <list.btrfs@jan-o-sch.net>
Date: Wed, 27 Jun 2012 15:05:48 +0200
Subject: Btrfs: avoid waiting for delayed refs when we must not

We track two conditions to decide if we should sleep while waiting for more
delayed refs, the number of delayed refs (num_refs) and the first entry in
the list of blockers (first_seq).

When we suspect staleness, we save num_refs and do one more cycle. If
nothing changes, we then save first_seq for later comparison and do
wait_event. We ought to save first_seq the very same moment we're saving
num_refs. Otherwise we cannot be sure that nothing has changed and we might
start waiting when we shouldn't, which could lead to starvation.

Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
---
 fs/btrfs/extent-tree.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4b5a1e1bdefb..6e1d36702ff7 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2347,12 +2347,10 @@ next:
 	return count;
 }
 
-
 static void wait_for_more_refs(struct btrfs_delayed_ref_root *delayed_refs,
-			unsigned long num_refs)
+			       unsigned long num_refs,
+			       struct list_head *first_seq)
 {
-	struct list_head *first_seq = delayed_refs->seq_head.next;
-
 	spin_unlock(&delayed_refs->lock);
 	pr_debug("waiting for more refs (num %ld, first %p)\n",
 		 num_refs, first_seq);
@@ -2381,6 +2379,7 @@ int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
 	struct btrfs_delayed_ref_root *delayed_refs;
 	struct btrfs_delayed_ref_node *ref;
 	struct list_head cluster;
+	struct list_head *first_seq = NULL;
 	int ret;
 	u64 delayed_start;
 	int run_all = count == (unsigned long)-1;
@@ -2436,8 +2435,10 @@ again:
 				 */
 				consider_waiting = 1;
 				num_refs = delayed_refs->num_entries;
+				first_seq = root->fs_info->tree_mod_seq_list.next;
 			} else {
-				wait_for_more_refs(delayed_refs, num_refs);
+				wait_for_more_refs(delayed_refs,
+						   num_refs, first_seq);
 				/*
 				 * after waiting, things have changed. we
 				 * dropped the lock and someone else might have
-- 
cgit 


From 9345457f4a539a40056431aeb6f068750857472f Mon Sep 17 00:00:00 2001
From: Jan Schmidt <list.btrfs@jan-o-sch.net>
Date: Wed, 27 Jun 2012 15:23:09 +0200
Subject: Btrfs: support root level changes in __resolve_indirect_ref

With the tree mod log, we can have a tree that's two levels high, but
btrfs_search_old_slot may still return a path with the tree root at level
one instead. __resolve_indirect_ref must care for this and accept parents in
a lower level than expected.

Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
---
 fs/btrfs/backref.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 7301cdb4b2cb..cf0df904347f 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -301,10 +301,14 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
 		goto out;
 
 	eb = path->nodes[level];
-	if (!eb) {
-		WARN_ON(1);
-		ret = 1;
-		goto out;
+	while (!eb) {
+		if (!level) {
+			WARN_ON(1);
+			ret = 1;
+			goto out;
+		}
+		level--;
+		eb = path->nodes[level];
 	}
 
 	ret = add_all_parents(root, path, parents, level, &ref->key_for_search,
-- 
cgit 


From 28da9fb4467f7a650cd31af6dfad3a4e4a3abf6e Mon Sep 17 00:00:00 2001
From: Jan Schmidt <list.btrfs@jan-o-sch.net>
Date: Thu, 21 Jun 2012 10:59:13 +0200
Subject: Btrfs: fix tree mod log for root replacements at leaf level

For the tree mod log, we don't log any operations at leaf level. If the root
is at the leaf level (i.e. the tree consists only of the root), then
__tree_mod_log_oldest_root will find a ROOT_REPLACE operation in the log
(because we always log that one no matter which level), but no other
operations.

With this patch __tree_mod_log_oldest_root exits cleanly instead of
BUGging in this situation. get_old_root checks if its really a root at leaf
level in case we don't have any operations and WARNs if this assumption
breaks.

Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
---
 fs/btrfs/ctree.c | 28 +++++++++++++++-------------
 1 file changed, 15 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 15cbc2bf4ff0..7d1e4fc5fb6a 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1024,11 +1024,18 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
 		if (!looped && !tm)
 			return 0;
 		/*
-		 * we must have key remove operations in the log before the
-		 * replace operation.
+		 * if there are no tree operation for the oldest root, we simply
+		 * return it. this should only happen if that (old) root is at
+		 * level 0.
 		 */
-		BUG_ON(!tm);
+		if (!tm)
+			break;
 
+		/*
+		 * if there's an operation that's not a root replacement, we
+		 * found the oldest version of our root. normally, we'll find a
+		 * MOD_LOG_KEY_REMOVE_WHILE_FREEING operation here.
+		 */
 		if (tm->op != MOD_LOG_ROOT_REPLACE)
 			break;
 
@@ -1192,16 +1199,8 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
 	}
 
 	tm = tree_mod_log_search(root->fs_info, logical, time_seq);
-	/*
-	 * there was an item in the log when __tree_mod_log_oldest_root
-	 * returned. this one must not go away, because the time_seq passed to
-	 * us must be blocking its removal.
-	 */
-	BUG_ON(!tm);
-
 	if (old_root)
-		eb = alloc_dummy_extent_buffer(tm->index << PAGE_CACHE_SHIFT,
-					       root->nodesize);
+		eb = alloc_dummy_extent_buffer(logical, root->nodesize);
 	else
 		eb = btrfs_clone_extent_buffer(root->node);
 	btrfs_tree_read_unlock(root->node);
@@ -1216,7 +1215,10 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
 		btrfs_set_header_level(eb, old_root->level);
 		btrfs_set_header_generation(eb, old_generation);
 	}
-	__tree_mod_log_rewind(eb, time_seq, tm);
+	if (tm)
+		__tree_mod_log_rewind(eb, time_seq, tm);
+	else
+		WARN_ON(btrfs_header_level(eb) != 0);
 	extent_buffer_get(eb);
 
 	return eb;
-- 
cgit 


From c3e0696523862c48b4d8c73ffb2867e9db478338 Mon Sep 17 00:00:00 2001
From: Jan Schmidt <list.btrfs@jan-o-sch.net>
Date: Thu, 21 Jun 2012 11:01:06 +0200
Subject: Btrfs: always put insert_ptr modifications into the tree mod log

Several callers of insert_ptr set the tree_mod_log parameter to 0 to avoid
addition to the tree mod log. In fact, we need all of those operations. This
commit simply removes the additional parameter and makes addition to the
tree mod log unconditional.

Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
---
 fs/btrfs/ctree.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 7d1e4fc5fb6a..e005d9b04616 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -2997,7 +2997,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
 static void insert_ptr(struct btrfs_trans_handle *trans,
 		       struct btrfs_root *root, struct btrfs_path *path,
 		       struct btrfs_disk_key *key, u64 bytenr,
-		       int slot, int level, int tree_mod_log)
+		       int slot, int level)
 {
 	struct extent_buffer *lower;
 	int nritems;
@@ -3010,7 +3010,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
 	BUG_ON(slot > nritems);
 	BUG_ON(nritems == BTRFS_NODEPTRS_PER_BLOCK(root));
 	if (slot != nritems) {
-		if (tree_mod_log && level)
+		if (level)
 			tree_mod_log_eb_move(root->fs_info, lower, slot + 1,
 					     slot, nritems - slot);
 		memmove_extent_buffer(lower,
@@ -3018,7 +3018,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
 			      btrfs_node_key_ptr_offset(slot),
 			      (nritems - slot) * sizeof(struct btrfs_key_ptr));
 	}
-	if (tree_mod_log && level) {
+	if (level) {
 		ret = tree_mod_log_insert_key(root->fs_info, lower, slot,
 					      MOD_LOG_KEY_ADD);
 		BUG_ON(ret < 0);
@@ -3106,7 +3106,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
 	btrfs_mark_buffer_dirty(split);
 
 	insert_ptr(trans, root, path, &disk_key, split->start,
-		   path->slots[level + 1] + 1, level + 1, 1);
+		   path->slots[level + 1] + 1, level + 1);
 
 	if (path->slots[level] >= mid) {
 		path->slots[level] -= mid;
@@ -3643,7 +3643,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
 	btrfs_set_header_nritems(l, mid);
 	btrfs_item_key(right, &disk_key, 0);
 	insert_ptr(trans, root, path, &disk_key, right->start,
-		   path->slots[1] + 1, 1, 0);
+		   path->slots[1] + 1, 1);
 
 	btrfs_mark_buffer_dirty(right);
 	btrfs_mark_buffer_dirty(l);
@@ -3850,7 +3850,7 @@ again:
 		if (mid <= slot) {
 			btrfs_set_header_nritems(right, 0);
 			insert_ptr(trans, root, path, &disk_key, right->start,
-				   path->slots[1] + 1, 1, 0);
+				   path->slots[1] + 1, 1);
 			btrfs_tree_unlock(path->nodes[0]);
 			free_extent_buffer(path->nodes[0]);
 			path->nodes[0] = right;
@@ -3859,7 +3859,7 @@ again:
 		} else {
 			btrfs_set_header_nritems(right, 0);
 			insert_ptr(trans, root, path, &disk_key, right->start,
-					  path->slots[1], 1, 0);
+					  path->slots[1], 1);
 			btrfs_tree_unlock(path->nodes[0]);
 			free_extent_buffer(path->nodes[0]);
 			path->nodes[0] = right;
-- 
cgit 


From 155725c9c051a343be5e555bf943da827e6cf721 Mon Sep 17 00:00:00 2001
From: Jan Schmidt <list.btrfs@jan-o-sch.net>
Date: Fri, 22 Jun 2012 14:01:00 +0200
Subject: Btrfs: leave critical region in btrfs_find_all_roots as soon as
 possible

When delayed refs exist, btrfs_find_all_roots used to hold the delayed ref
mutex way longer than actually required. We ought to drop it immediately
after we're done collecting all the delayed refs.

Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
---
 fs/btrfs/backref.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index cf0df904347f..a383c18e74e8 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -839,6 +839,7 @@ again:
 			}
 			ret = __add_delayed_refs(head, delayed_ref_seq,
 						 &prefs_delayed);
+			mutex_unlock(&head->mutex);
 			if (ret) {
 				spin_unlock(&delayed_refs->lock);
 				goto out;
@@ -932,8 +933,6 @@ again:
 	}
 
 out:
-	if (head)
-		mutex_unlock(&head->mutex);
 	btrfs_free_path(path);
 	while (!list_empty(&prefs)) {
 		ref = list_first_entry(&prefs, struct __prelim_ref, list);
-- 
cgit 


From 19956c7e94a7a58d6df8c4db5ae62f9109a7c663 Mon Sep 17 00:00:00 2001
From: Jan Schmidt <list.btrfs@jan-o-sch.net>
Date: Fri, 22 Jun 2012 14:52:13 +0200
Subject: Btrfs: fix tree mod log rewind of ADD operations

When a MOD_LOG_KEY_ADD operation is rewinded, we remove the key from the
tree block. If its not the last key, removal involves a move operation.
This move operation was explicitly done before this commit.

However, at insertion time, there's a move operation before the actual
addition to make room for the new key, which is recorded in the tree mod
log as well. This means, we must drop the move operation when rewinding the
add operation, because the next operation we'll be rewinding will be the
corresponding MOD_LOG_MOVE_KEYS operation.

Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
---
 fs/btrfs/ctree.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index e005d9b04616..b98f8604f4f6 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1094,11 +1094,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
 						      tm->generation);
 			break;
 		case MOD_LOG_KEY_ADD:
-			if (tm->slot != n - 1) {
-				o_dst = btrfs_node_key_ptr_offset(tm->slot);
-				o_src = btrfs_node_key_ptr_offset(tm->slot + 1);
-				memmove_extent_buffer(eb, o_dst, o_src, p_size);
-			}
+			/* if a move operation is needed it's in the log */
 			n--;
 			break;
 		case MOD_LOG_MOVE_KEYS:
-- 
cgit 


From d42244a0d36ad0939c5f173ebf15841a0678899c Mon Sep 17 00:00:00 2001
From: Jan Schmidt <list.btrfs@jan-o-sch.net>
Date: Fri, 22 Jun 2012 14:51:15 +0200
Subject: Btrfs: resolve tree mod log locking issue in btrfs_next_leaf

With the tree mod log, we may end up with two roots (the current root and a
rewinded version of it) both pointing to two leaves, l1 and l2, of which l2
had already been cow-ed in the current transaction. If we don't rewind any
tree blocks, we cannot have two roots both pointing to an already cowed tree
block.

Now there is btrfs_next_leaf, which has a leaf locked and wants a lock on
the next (right) leaf. And there is push_leaf_left, which has a (cowed!)
leaf locked and wants a lock on the previous (left) leaf.

In order to solve this dead lock situation, we use try_lock in
btrfs_next_leaf (only in case it's called with a tree mod log time_seq
paramter) and if we fail to get a lock on the next leaf, we give up our lock
on the current leaf and retry from the very beginning.

Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
---
 fs/btrfs/ctree.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

(limited to 'fs')

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index b98f8604f4f6..8206b3900587 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -5119,6 +5119,18 @@ again:
 
 		if (!path->skip_locking) {
 			ret = btrfs_try_tree_read_lock(next);
+			if (!ret && time_seq) {
+				/*
+				 * If we don't get the lock, we may be racing
+				 * with push_leaf_left, holding that lock while
+				 * itself waiting for the leaf we've currently
+				 * locked. To solve this situation, we give up
+				 * on our lock and cycle.
+				 */
+				btrfs_release_path(path);
+				cond_resched();
+				goto again;
+			}
 			if (!ret) {
 				btrfs_set_path_blocking(path);
 				btrfs_tree_read_lock(next);
-- 
cgit 


From cb14d340ef1737c24125dd663eff77734a482d47 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 27 Jun 2012 20:08:44 +0200
Subject: udf: Use 'ret' instead of abusing 'i' in udf_load_logicalvol()

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/udf/super.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/udf/super.c b/fs/udf/super.c
index ac8a348dcb69..9da6f4ee112c 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -1233,11 +1233,9 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
 	BUG_ON(ident != TAG_IDENT_LVD);
 	lvd = (struct logicalVolDesc *)bh->b_data;
 
-	i = udf_sb_alloc_partition_maps(sb, le32_to_cpu(lvd->numPartitionMaps));
-	if (i != 0) {
-		ret = i;
+	ret = udf_sb_alloc_partition_maps(sb, le32_to_cpu(lvd->numPartitionMaps));
+	if (ret)
 		goto out_bh;
-	}
 
 	for (i = 0, offset = 0;
 	     i < sbi->s_partitions && offset < le32_to_cpu(lvd->mapTableLength);
-- 
cgit 


From adee11b2085bee90bd8f4f52123ffb07882d6256 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 27 Jun 2012 20:20:22 +0200
Subject: udf: Avoid run away loop when partition table length is corrupted

Check provided length of partition table so that (possibly maliciously)
corrupted partition table cannot cause accessing data beyond current buffer.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/udf/super.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/udf/super.c b/fs/udf/super.c
index 9da6f4ee112c..ce911f50b39d 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -1225,6 +1225,7 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
 	struct genericPartitionMap *gpm;
 	uint16_t ident;
 	struct buffer_head *bh;
+	unsigned int table_len;
 	int ret = 0;
 
 	bh = udf_read_tagged(sb, block, block, &ident);
@@ -1232,13 +1233,20 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
 		return 1;
 	BUG_ON(ident != TAG_IDENT_LVD);
 	lvd = (struct logicalVolDesc *)bh->b_data;
+	table_len = le32_to_cpu(lvd->mapTableLength);
+	if (sizeof(*lvd) + table_len > sb->s_blocksize) {
+		udf_err(sb, "error loading logical volume descriptor: "
+			"Partition table too long (%u > %lu)\n", table_len,
+			sb->s_blocksize - sizeof(*lvd));
+		goto out_bh;
+	}
 
 	ret = udf_sb_alloc_partition_maps(sb, le32_to_cpu(lvd->numPartitionMaps));
 	if (ret)
 		goto out_bh;
 
 	for (i = 0, offset = 0;
-	     i < sbi->s_partitions && offset < le32_to_cpu(lvd->mapTableLength);
+	     i < sbi->s_partitions && offset < table_len;
 	     i++, offset += gpm->partitionMapLength) {
 		struct udf_part_map *map = &sbi->s_partmaps[i];
 		gpm = (struct genericPartitionMap *)
-- 
cgit 


From 1df2ae31c724e57be9d7ac00d78db8a5dabdd050 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 27 Jun 2012 21:23:07 +0200
Subject: udf: Fortify loading of sparing table

Add sanity checks when loading sparing table from disk to avoid accessing
unallocated memory or writing to it.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/udf/super.c | 86 ++++++++++++++++++++++++++++++++++++----------------------
 1 file changed, 53 insertions(+), 33 deletions(-)

(limited to 'fs')

diff --git a/fs/udf/super.c b/fs/udf/super.c
index ce911f50b39d..8d86a8706c0e 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -56,6 +56,7 @@
 #include <linux/seq_file.h>
 #include <linux/bitmap.h>
 #include <linux/crc-itu-t.h>
+#include <linux/log2.h>
 #include <asm/byteorder.h>
 
 #include "udf_sb.h"
@@ -1215,11 +1216,59 @@ out_bh:
 	return ret;
 }
 
+static int udf_load_sparable_map(struct super_block *sb,
+				 struct udf_part_map *map,
+				 struct sparablePartitionMap *spm)
+{
+	uint32_t loc;
+	uint16_t ident;
+	struct sparingTable *st;
+	struct udf_sparing_data *sdata = &map->s_type_specific.s_sparing;
+	int i;
+	struct buffer_head *bh;
+
+	map->s_partition_type = UDF_SPARABLE_MAP15;
+	sdata->s_packet_len = le16_to_cpu(spm->packetLength);
+	if (!is_power_of_2(sdata->s_packet_len)) {
+		udf_err(sb, "error loading logical volume descriptor: "
+			"Invalid packet length %u\n",
+			(unsigned)sdata->s_packet_len);
+		return -EIO;
+	}
+	if (spm->numSparingTables > 4) {
+		udf_err(sb, "error loading logical volume descriptor: "
+			"Too many sparing tables (%d)\n",
+			(int)spm->numSparingTables);
+		return -EIO;
+	}
+
+	for (i = 0; i < spm->numSparingTables; i++) {
+		loc = le32_to_cpu(spm->locSparingTable[i]);
+		bh = udf_read_tagged(sb, loc, loc, &ident);
+		if (!bh)
+			continue;
+
+		st = (struct sparingTable *)bh->b_data;
+		if (ident != 0 ||
+		    strncmp(st->sparingIdent.ident, UDF_ID_SPARING,
+			    strlen(UDF_ID_SPARING)) ||
+		    sizeof(*st) + le16_to_cpu(st->reallocationTableLen) >
+							sb->s_blocksize) {
+			brelse(bh);
+			continue;
+		}
+
+		sdata->s_spar_map[i] = bh;
+	}
+	map->s_partition_func = udf_get_pblock_spar15;
+	return 0;
+}
+
 static int udf_load_logicalvol(struct super_block *sb, sector_t block,
 			       struct kernel_lb_addr *fileset)
 {
 	struct logicalVolDesc *lvd;
-	int i, j, offset;
+	int i, offset;
 	uint8_t type;
 	struct udf_sb_info *sbi = UDF_SB(sb);
 	struct genericPartitionMap *gpm;
@@ -1281,38 +1330,9 @@ static int udf_load_logicalvol(struct super_block *sb, sector_t block,
 			} else if (!strncmp(upm2->partIdent.ident,
 						UDF_ID_SPARABLE,
 						strlen(UDF_ID_SPARABLE))) {
-				uint32_t loc;
-				struct sparingTable *st;
-				struct sparablePartitionMap *spm =
-					(struct sparablePartitionMap *)gpm;
-
-				map->s_partition_type = UDF_SPARABLE_MAP15;
-				map->s_type_specific.s_sparing.s_packet_len =
-						le16_to_cpu(spm->packetLength);
-				for (j = 0; j < spm->numSparingTables; j++) {
-					struct buffer_head *bh2;
-
-					loc = le32_to_cpu(
-						spm->locSparingTable[j]);
-					bh2 = udf_read_tagged(sb, loc, loc,
-							     &ident);
-					map->s_type_specific.s_sparing.
-							s_spar_map[j] = bh2;
-
-					if (bh2 == NULL)
-						continue;
-
-					st = (struct sparingTable *)bh2->b_data;
-					if (ident != 0 || strncmp(
-						st->sparingIdent.ident,
-						UDF_ID_SPARING,
-						strlen(UDF_ID_SPARING))) {
-						brelse(bh2);
-						map->s_type_specific.s_sparing.
-							s_spar_map[j] = NULL;
-					}
-				}
-				map->s_partition_func = udf_get_pblock_spar15;
+				if (udf_load_sparable_map(sb, map,
+				    (struct sparablePartitionMap *)gpm) < 0)
+					goto out_bh;
 			} else if (!strncmp(upm2->partIdent.ident,
 						UDF_ID_METADATA,
 						strlen(UDF_ID_METADATA))) {
-- 
cgit 


From 597a60fadedf9a40fdff8735054bf772b3dafd57 Mon Sep 17 00:00:00 2001
From: Stefan Behrens <sbehrens@giantdisaster.de>
Date: Thu, 14 Jun 2012 16:42:31 +0200
Subject: Btrfs: don't count I/O statistic read errors for missing devices

It is normal behaviour of the low level btrfs function btrfs_map_bio()
to complete a bio with -EIO if the device is missing, instead of just
preventing the bio creation in an earlier step.
This used to cause I/O statistic read error increments and annoying
printk_ratelimited messages. This commit fixes the issue.

Signed-off-by: Stefan Behrens <sbehrens@giantdisaster.de>
Reported-by: Carey Underwood <cwillu@cwillu.com>
---
 fs/btrfs/volumes.c | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 8a3d2594b807..3f292cf693a7 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -4061,16 +4061,18 @@ static void btrfs_end_bio(struct bio *bio, int err)
 
 			BUG_ON(stripe_index >= bbio->num_stripes);
 			dev = bbio->stripes[stripe_index].dev;
-			if (bio->bi_rw & WRITE)
-				btrfs_dev_stat_inc(dev,
-						   BTRFS_DEV_STAT_WRITE_ERRS);
-			else
-				btrfs_dev_stat_inc(dev,
-						   BTRFS_DEV_STAT_READ_ERRS);
-			if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH)
-				btrfs_dev_stat_inc(dev,
-						   BTRFS_DEV_STAT_FLUSH_ERRS);
-			btrfs_dev_stat_print_on_error(dev);
+			if (dev->bdev) {
+				if (bio->bi_rw & WRITE)
+					btrfs_dev_stat_inc(dev,
+						BTRFS_DEV_STAT_WRITE_ERRS);
+				else
+					btrfs_dev_stat_inc(dev,
+						BTRFS_DEV_STAT_READ_ERRS);
+				if ((bio->bi_rw & WRITE_FLUSH) == WRITE_FLUSH)
+					btrfs_dev_stat_inc(dev,
+						BTRFS_DEV_STAT_FLUSH_ERRS);
+				btrfs_dev_stat_print_on_error(dev);
+			}
 		}
 	}
 
-- 
cgit 


From c3473e830074ef04f974f2829690942dd8580619 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@redhat.com>
Date: Tue, 19 Jun 2012 10:59:00 -0400
Subject: Btrfs: fix dio write vs buffered read race

Miao pointed out there's a problem with mixing dio writes and buffered
reads.  If the read happens between us invalidating the page range and
actually locking the extent we can bring in pages into page cache.  Then
once the write finishes if somebody tries to read again it will just find
uptodate pages and we'll read stale data.  So we need to lock the extent and
check for uptodate bits in the range.  If there are uptodate bits we need to
unlock and invalidate again.  This will keep this race from happening since
we will hold the extent locked until we create the ordered extent, and then
teh read side always waits for ordered extents.  There was also a race in
how we updated i_size, previously we were relying on the generic DIO stuff
to adjust the i_size after the DIO had completed, but this happens outside
of the extent lock which means reads could come in and not see the updated
i_size.  So instead move this work into where we create the extents, and
then this way the update ordered i_size stuff works properly in the endio
handlers.  Thanks,

Signed-off-by: Josef Bacik <josef@redhat.com>
---
 fs/btrfs/file.c  | 13 -------------
 fs/btrfs/inode.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 50 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 876cddd6b2f0..248d20265249 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -1334,7 +1334,6 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
 				    loff_t *ppos, size_t count, size_t ocount)
 {
 	struct file *file = iocb->ki_filp;
-	struct inode *inode = fdentry(file)->d_inode;
 	struct iov_iter i;
 	ssize_t written;
 	ssize_t written_buffered;
@@ -1344,18 +1343,6 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
 	written = generic_file_direct_write(iocb, iov, &nr_segs, pos, ppos,
 					    count, ocount);
 
-	/*
-	 * the generic O_DIRECT will update in-memory i_size after the
-	 * DIOs are done.  But our endio handlers that update the on
-	 * disk i_size never update past the in memory i_size.  So we
-	 * need one more update here to catch any additions to the
-	 * file
-	 */
-	if (inode->i_size != BTRFS_I(inode)->disk_i_size) {
-		btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
-		mark_inode_dirty(inode);
-	}
-
 	if (written < 0 || written == count)
 		return written;
 
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4a4f2d59a64b..6971fb5fc859 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5904,8 +5904,17 @@ map:
 	bh_result->b_size = len;
 	bh_result->b_bdev = em->bdev;
 	set_buffer_mapped(bh_result);
-	if (create && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
-		set_buffer_new(bh_result);
+	if (create) {
+		if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+			set_buffer_new(bh_result);
+
+		/*
+		 * Need to update the i_size under the extent lock so buffered
+		 * readers will get the updated i_size when we unlock.
+		 */
+		if (start + len > i_size_read(inode))
+			i_size_write(inode, start + len);
+	}
 
 	free_extent_map(em);
 
@@ -6388,12 +6397,48 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
 		 */
 		ordered = btrfs_lookup_ordered_range(inode, lockstart,
 						     lockend - lockstart + 1);
-		if (!ordered)
+
+		/*
+		 * We need to make sure there are no buffered pages in this
+		 * range either, we could have raced between the invalidate in
+		 * generic_file_direct_write and locking the extent.  The
+		 * invalidate needs to happen so that reads after a write do not
+		 * get stale data.
+		 */
+		if (!ordered && (!writing ||
+		    !test_range_bit(&BTRFS_I(inode)->io_tree,
+				    lockstart, lockend, EXTENT_UPTODATE, 0,
+				    cached_state)))
 			break;
+
 		unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
 				     &cached_state, GFP_NOFS);
-		btrfs_start_ordered_extent(inode, ordered, 1);
-		btrfs_put_ordered_extent(ordered);
+
+		if (ordered) {
+			btrfs_start_ordered_extent(inode, ordered, 1);
+			btrfs_put_ordered_extent(ordered);
+		} else {
+			/* Screw you mmap */
+			ret = filemap_write_and_wait_range(file->f_mapping,
+							   lockstart,
+							   lockend);
+			if (ret)
+				goto out;
+
+			/*
+			 * If we found a page that couldn't be invalidated just
+			 * fall back to buffered.
+			 */
+			ret = invalidate_inode_pages2_range(file->f_mapping,
+					lockstart >> PAGE_CACHE_SHIFT,
+					lockend >> PAGE_CACHE_SHIFT);
+			if (ret) {
+				if (ret == -EBUSY)
+					ret = 0;
+				goto out;
+			}
+		}
+
 		cond_resched();
 	}
 
-- 
cgit 


From 68310a5e42f93c2242ec1836c3b18d531e0065e2 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Fri, 22 Jun 2012 12:24:12 -0600
Subject: Btrfs: restore restriper state on all mounts

Fix a bug that triggered asserts in btrfs_balance() in both normal and
resume modes -- restriper state was not properly restored on read-only
mounts.  This factors out resuming code from btrfs_restore_balance(),
which is now also called earlier in the mount sequence to avoid the
problem of some early writes getting the old profile.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/btrfs/disk-io.c | 10 ++++++----
 fs/btrfs/volumes.c | 39 +++++++++++++++++++--------------------
 fs/btrfs/volumes.h |  2 +-
 3 files changed, 26 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 7d7bc8eace86..3a7961ba161e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2354,12 +2354,17 @@ retry_root_backup:
 				  BTRFS_CSUM_TREE_OBJECTID, csum_root);
 	if (ret)
 		goto recovery_tree_root;
-
 	csum_root->track_dirty = 1;
 
 	fs_info->generation = generation;
 	fs_info->last_trans_committed = generation;
 
+	ret = btrfs_recover_balance(fs_info);
+	if (ret) {
+		printk(KERN_WARNING "btrfs: failed to recover balance\n");
+		goto fail_block_groups;
+	}
+
 	ret = btrfs_init_dev_stats(fs_info);
 	if (ret) {
 		printk(KERN_ERR "btrfs: failed to init dev_stats: %d\n",
@@ -2492,9 +2497,6 @@ retry_root_backup:
 			err = btrfs_orphan_cleanup(fs_info->tree_root);
 		up_read(&fs_info->cleanup_work_sem);
 
-		if (!err)
-			err = btrfs_recover_balance(fs_info->tree_root);
-
 		if (err) {
 			close_ctree(tree_root);
 			return err;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 3f292cf693a7..48943d0f861a 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2867,9 +2867,8 @@ static int balance_kthread(void *data)
 	return ret;
 }
 
-int btrfs_recover_balance(struct btrfs_root *tree_root)
+int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
 {
-	struct task_struct *tsk;
 	struct btrfs_balance_control *bctl;
 	struct btrfs_balance_item *item;
 	struct btrfs_disk_balance_args disk_bargs;
@@ -2882,29 +2881,30 @@ int btrfs_recover_balance(struct btrfs_root *tree_root)
 	if (!path)
 		return -ENOMEM;
 
-	bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
-	if (!bctl) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
 	key.objectid = BTRFS_BALANCE_OBJECTID;
 	key.type = BTRFS_BALANCE_ITEM_KEY;
 	key.offset = 0;
 
-	ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0);
+	ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
 	if (ret < 0)
-		goto out_bctl;
+		goto out;
 	if (ret > 0) { /* ret = -ENOENT; */
 		ret = 0;
-		goto out_bctl;
+		goto out;
+	}
+
+	bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
+	if (!bctl) {
+		ret = -ENOMEM;
+		goto out;
 	}
 
 	leaf = path->nodes[0];
 	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
 
-	bctl->fs_info = tree_root->fs_info;
-	bctl->flags = btrfs_balance_flags(leaf, item) | BTRFS_BALANCE_RESUME;
+	bctl->fs_info = fs_info;
+	bctl->flags = btrfs_balance_flags(leaf, item);
+	bctl->flags |= BTRFS_BALANCE_RESUME;
 
 	btrfs_balance_data(leaf, item, &disk_bargs);
 	btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs);
@@ -2913,14 +2913,13 @@ int btrfs_recover_balance(struct btrfs_root *tree_root)
 	btrfs_balance_sys(leaf, item, &disk_bargs);
 	btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
 
-	tsk = kthread_run(balance_kthread, bctl, "btrfs-balance");
-	if (IS_ERR(tsk))
-		ret = PTR_ERR(tsk);
-	else
-		goto out;
+	mutex_lock(&fs_info->volume_mutex);
+	mutex_lock(&fs_info->balance_mutex);
 
-out_bctl:
-	kfree(bctl);
+	set_balance_control(bctl);
+
+	mutex_unlock(&fs_info->balance_mutex);
+	mutex_unlock(&fs_info->volume_mutex);
 out:
 	btrfs_free_path(path);
 	return ret;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 74366f27a76b..e1b1a649fc5a 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -281,7 +281,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
 int btrfs_init_new_device(struct btrfs_root *root, char *path);
 int btrfs_balance(struct btrfs_balance_control *bctl,
 		  struct btrfs_ioctl_balance_args *bargs);
-int btrfs_recover_balance(struct btrfs_root *tree_root);
+int btrfs_recover_balance(struct btrfs_fs_info *fs_info);
 int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
 int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
 int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
-- 
cgit 


From 2b6ba629b5aac51e7099efbb43e2b403213aa7fb Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@gmail.com>
Date: Fri, 22 Jun 2012 12:24:13 -0600
Subject: Btrfs: resume balance on rw (re)mounts properly

This introduces btrfs_resume_balance_async(), which, given that
restriper state was recovered earlier by btrfs_recover_balance(),
resumes balance in btrfs-balance kthread.

Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
---
 fs/btrfs/disk-io.c | 24 +++++++++++++++---------
 fs/btrfs/super.c   |  4 ++++
 fs/btrfs/volumes.c | 36 +++++++++++++++++++++++++++---------
 fs/btrfs/volumes.h |  1 +
 4 files changed, 47 insertions(+), 18 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 3a7961ba161e..8cc47103a32e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2490,17 +2490,23 @@ retry_root_backup:
 		goto fail_trans_kthread;
 	}
 
-	if (!(sb->s_flags & MS_RDONLY)) {
-		down_read(&fs_info->cleanup_work_sem);
-		err = btrfs_orphan_cleanup(fs_info->fs_root);
-		if (!err)
-			err = btrfs_orphan_cleanup(fs_info->tree_root);
+	if (sb->s_flags & MS_RDONLY)
+		return 0;
+
+	down_read(&fs_info->cleanup_work_sem);
+	if ((ret = btrfs_orphan_cleanup(fs_info->fs_root)) ||
+	    (ret = btrfs_orphan_cleanup(fs_info->tree_root))) {
 		up_read(&fs_info->cleanup_work_sem);
+		close_ctree(tree_root);
+		return ret;
+	}
+	up_read(&fs_info->cleanup_work_sem);
 
-		if (err) {
-			close_ctree(tree_root);
-			return err;
-		}
+	ret = btrfs_resume_balance_async(fs_info);
+	if (ret) {
+		printk(KERN_WARNING "btrfs: failed to resume balance\n");
+		close_ctree(tree_root);
+		return ret;
 	}
 
 	return 0;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 0eb9a4da069e..e23991574fdf 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1187,6 +1187,10 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
 		if (ret)
 			goto restore;
 
+		ret = btrfs_resume_balance_async(fs_info);
+		if (ret)
+			goto restore;
+
 		sb->s_flags &= ~MS_RDONLY;
 	}
 
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 48943d0f861a..ecaad40e7ef4 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2845,28 +2845,46 @@ out:
 
 static int balance_kthread(void *data)
 {
-	struct btrfs_balance_control *bctl =
-			(struct btrfs_balance_control *)data;
-	struct btrfs_fs_info *fs_info = bctl->fs_info;
+	struct btrfs_fs_info *fs_info = data;
 	int ret = 0;
 
 	mutex_lock(&fs_info->volume_mutex);
 	mutex_lock(&fs_info->balance_mutex);
 
-	set_balance_control(bctl);
-
-	if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
-		printk(KERN_INFO "btrfs: force skipping balance\n");
-	} else {
+	if (fs_info->balance_ctl) {
 		printk(KERN_INFO "btrfs: continuing balance\n");
-		ret = btrfs_balance(bctl, NULL);
+		ret = btrfs_balance(fs_info->balance_ctl, NULL);
 	}
 
 	mutex_unlock(&fs_info->balance_mutex);
 	mutex_unlock(&fs_info->volume_mutex);
+
 	return ret;
 }
 
+int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
+{
+	struct task_struct *tsk;
+
+	spin_lock(&fs_info->balance_lock);
+	if (!fs_info->balance_ctl) {
+		spin_unlock(&fs_info->balance_lock);
+		return 0;
+	}
+	spin_unlock(&fs_info->balance_lock);
+
+	if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) {
+		printk(KERN_INFO "btrfs: force skipping balance\n");
+		return 0;
+	}
+
+	tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
+	if (IS_ERR(tsk))
+		return PTR_ERR(tsk);
+
+	return 0;
+}
+
 int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
 {
 	struct btrfs_balance_control *bctl;
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index e1b1a649fc5a..95f6637614db 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -281,6 +281,7 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
 int btrfs_init_new_device(struct btrfs_root *root, char *path);
 int btrfs_balance(struct btrfs_balance_control *bctl,
 		  struct btrfs_ioctl_balance_args *bargs);
+int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info);
 int btrfs_recover_balance(struct btrfs_fs_info *fs_info);
 int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
 int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
-- 
cgit 


From d3a94048c912e18e99a091d5ea2d0a1178152d6f Mon Sep 17 00:00:00 2001
From: Alexander Block <ablock84@googlemail.com>
Date: Mon, 25 Jun 2012 15:36:12 -0600
Subject: Btrfs: use _IOR for BTRFS_IOC_SUBVOL_GETFLAGS

We used the wrong ioctl macro for the getflags ioctl before.
As we don't have the set/getflags ioctls in the user space ioctl.h
at the moment, it's safe to fix it now.

Reviewed-by: David Sterba <dsterba@suse.cz>
Signed-off-by: Alexander Block <ablock84@googlemail.com>
---
 fs/btrfs/ioctl.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 497c530724cf..e440aa653c30 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -339,7 +339,7 @@ struct btrfs_ioctl_get_dev_stats {
 #define BTRFS_IOC_WAIT_SYNC  _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
 #define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
 				   struct btrfs_ioctl_vol_args_v2)
-#define BTRFS_IOC_SUBVOL_GETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 25, __u64)
+#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64)
 #define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
 #define BTRFS_IOC_SCRUB _IOWR(BTRFS_IOCTL_MAGIC, 27, \
 			      struct btrfs_ioctl_scrub_args)
-- 
cgit 


From 6bf02314d9a5c29f6ec30285b9ad5361c2d4c85a Mon Sep 17 00:00:00 2001
From: Liu Bo <liubo2009@cn.fujitsu.com>
Date: Mon, 25 Jun 2012 21:59:09 -0600
Subject: Btrfs: fix wrong check during log recovery

When we're evicting an inode during log recovery, we need to ensure that the inode
is not in orphan state any more, which means inode's run_time flags has _no_
BTRFS_INODE_HAS_ORPHAN_ITEM.  Thus, the BUG_ON was triggered because of a wrong
check for the flags.

Reviewed-by: David Sterba <dsterba@suse.cz>
Signed-off-by: Liu Bo <liubo2009@cn.fujitsu.com>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
---
 fs/btrfs/inode.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 6971fb5fc859..9f07bd121f66 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3754,7 +3754,7 @@ void btrfs_evict_inode(struct inode *inode)
 	btrfs_wait_ordered_range(inode, 0, (u64)-1);
 
 	if (root->fs_info->log_root_recovering) {
-		BUG_ON(!test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
+		BUG_ON(test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
 				 &BTRFS_I(inode)->runtime_flags));
 		goto no_delete;
 	}
-- 
cgit 


From bdb7d303b33c1648514c9f9461d7513a4c05ce48 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fusionio.com>
Date: Wed, 27 Jun 2012 15:10:56 -0400
Subject: Btrfs: fix tree log remove space corner case

The tree log stuff can have allocated space that we end up having split
across a bitmap and a real extent.  The free space code does not deal with
this, it assumes that if it finds an extent or bitmap entry that the entire
range must fall within the entry it finds.  This isn't necessarily the case,
so rework the remove function so it can handle this case properly.  This
fixed two panics the user hit, first in the case where the space was
initially in a bitmap and then in an extent entry, and then the reverse
case.  Thanks,

Reported-and-tested-by: Shaun Reich <sreich@kde.org>
Signed-off-by: Josef Bacik <jbacik@fusionio.com>
---
 fs/btrfs/free-space-cache.c | 145 ++++++++++++++++----------------------------
 1 file changed, 52 insertions(+), 93 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 19a0d85b451c..a70c54e2e1be 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1542,29 +1542,26 @@ again:
 	end = bitmap_info->offset + (u64)(BITS_PER_BITMAP * ctl->unit) - 1;
 
 	/*
-	 * XXX - this can go away after a few releases.
-	 *
-	 * since the only user of btrfs_remove_free_space is the tree logging
-	 * stuff, and the only way to test that is under crash conditions, we
-	 * want to have this debug stuff here just in case somethings not
-	 * working.  Search the bitmap for the space we are trying to use to
-	 * make sure its actually there.  If its not there then we need to stop
-	 * because something has gone wrong.
+	 * We need to search for bits in this bitmap.  We could only cover some
+	 * of the extent in this bitmap thanks to how we add space, so we need
+	 * to search for as much as it as we can and clear that amount, and then
+	 * go searching for the next bit.
 	 */
 	search_start = *offset;
-	search_bytes = *bytes;
+	search_bytes = ctl->unit;
 	search_bytes = min(search_bytes, end - search_start + 1);
 	ret = search_bitmap(ctl, bitmap_info, &search_start, &search_bytes);
 	BUG_ON(ret < 0 || search_start != *offset);
 
-	if (*offset > bitmap_info->offset && *offset + *bytes > end) {
-		bitmap_clear_bits(ctl, bitmap_info, *offset, end - *offset + 1);
-		*bytes -= end - *offset + 1;
-		*offset = end + 1;
-	} else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) {
-		bitmap_clear_bits(ctl, bitmap_info, *offset, *bytes);
-		*bytes = 0;
-	}
+	/* We may have found more bits than what we need */
+	search_bytes = min(search_bytes, *bytes);
+
+	/* Cannot clear past the end of the bitmap */
+	search_bytes = min(search_bytes, end - search_start + 1);
+
+	bitmap_clear_bits(ctl, bitmap_info, search_start, search_bytes);
+	*offset += search_bytes;
+	*bytes -= search_bytes;
 
 	if (*bytes) {
 		struct rb_node *next = rb_next(&bitmap_info->offset_index);
@@ -1595,7 +1592,7 @@ again:
 		 * everything over again.
 		 */
 		search_start = *offset;
-		search_bytes = *bytes;
+		search_bytes = ctl->unit;
 		ret = search_bitmap(ctl, bitmap_info, &search_start,
 				    &search_bytes);
 		if (ret < 0 || search_start != *offset)
@@ -1878,12 +1875,14 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
 {
 	struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
 	struct btrfs_free_space *info;
-	struct btrfs_free_space *next_info = NULL;
 	int ret = 0;
 
 	spin_lock(&ctl->tree_lock);
 
 again:
+	if (!bytes)
+		goto out_lock;
+
 	info = tree_search_offset(ctl, offset, 0, 0);
 	if (!info) {
 		/*
@@ -1904,88 +1903,48 @@ again:
 		}
 	}
 
-	if (info->bytes < bytes && rb_next(&info->offset_index)) {
-		u64 end;
-		next_info = rb_entry(rb_next(&info->offset_index),
-					     struct btrfs_free_space,
-					     offset_index);
-
-		if (next_info->bitmap)
-			end = next_info->offset +
-			      BITS_PER_BITMAP * ctl->unit - 1;
-		else
-			end = next_info->offset + next_info->bytes;
-
-		if (next_info->bytes < bytes ||
-		    next_info->offset > offset || offset > end) {
-			printk(KERN_CRIT "Found free space at %llu, size %llu,"
-			      " trying to use %llu\n",
-			      (unsigned long long)info->offset,
-			      (unsigned long long)info->bytes,
-			      (unsigned long long)bytes);
-			WARN_ON(1);
-			ret = -EINVAL;
-			goto out_lock;
-		}
-
-		info = next_info;
-	}
-
-	if (info->bytes == bytes) {
+	if (!info->bitmap) {
 		unlink_free_space(ctl, info);
-		if (info->bitmap) {
-			kfree(info->bitmap);
-			ctl->total_bitmaps--;
-		}
-		kmem_cache_free(btrfs_free_space_cachep, info);
-		ret = 0;
-		goto out_lock;
-	}
-
-	if (!info->bitmap && info->offset == offset) {
-		unlink_free_space(ctl, info);
-		info->offset += bytes;
-		info->bytes -= bytes;
-		ret = link_free_space(ctl, info);
-		WARN_ON(ret);
-		goto out_lock;
-	}
+		if (offset == info->offset) {
+			u64 to_free = min(bytes, info->bytes);
+
+			info->bytes -= to_free;
+			info->offset += to_free;
+			if (info->bytes) {
+				ret = link_free_space(ctl, info);
+				WARN_ON(ret);
+			} else {
+				kmem_cache_free(btrfs_free_space_cachep, info);
+			}
 
-	if (!info->bitmap && info->offset <= offset &&
-	    info->offset + info->bytes >= offset + bytes) {
-		u64 old_start = info->offset;
-		/*
-		 * we're freeing space in the middle of the info,
-		 * this can happen during tree log replay
-		 *
-		 * first unlink the old info and then
-		 * insert it again after the hole we're creating
-		 */
-		unlink_free_space(ctl, info);
-		if (offset + bytes < info->offset + info->bytes) {
-			u64 old_end = info->offset + info->bytes;
+			offset += to_free;
+			bytes -= to_free;
+			goto again;
+		} else {
+			u64 old_end = info->bytes + info->offset;
 
-			info->offset = offset + bytes;
-			info->bytes = old_end - info->offset;
+			info->bytes = offset - info->offset;
 			ret = link_free_space(ctl, info);
 			WARN_ON(ret);
 			if (ret)
 				goto out_lock;
-		} else {
-			/* the hole we're creating ends at the end
-			 * of the info struct, just free the info
-			 */
-			kmem_cache_free(btrfs_free_space_cachep, info);
-		}
-		spin_unlock(&ctl->tree_lock);
 
-		/* step two, insert a new info struct to cover
-		 * anything before the hole
-		 */
-		ret = btrfs_add_free_space(block_group, old_start,
-					   offset - old_start);
-		WARN_ON(ret); /* -ENOMEM */
-		goto out;
+			/* Not enough bytes in this entry to satisfy us */
+			if (old_end < offset + bytes) {
+				bytes -= old_end - offset;
+				offset = old_end;
+				goto again;
+			} else if (old_end == offset + bytes) {
+				/* all done */
+				goto out_lock;
+			}
+			spin_unlock(&ctl->tree_lock);
+
+			ret = btrfs_add_free_space(block_group, offset + bytes,
+						   old_end - (offset + bytes));
+			WARN_ON(ret);
+			goto out;
+		}
 	}
 
 	ret = remove_from_bitmap(ctl, info, &offset, &bytes);
-- 
cgit 


From 7fd1a3f73f3743b4ffd520effe288a70b0ec47c9 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fusionio.com>
Date: Wed, 27 Jun 2012 17:18:41 -0400
Subject: Btrfs: hold a ref on the inode during writepages

We can race with unlink and not actually be able to do our igrab in
btrfs_add_ordered_extent.  This will result in all sorts of problems.
Instead of doing the complicated work to try and handle returning an error
properly from btrfs_add_ordered_extent, just hold a ref to the inode during
writepages.  If we cannot grab a ref we know we're freeing this inode anyway
and can just drop the dirty pages on the floor, because screw them we're
going to invalidate them anyway.  Thanks,

Signed-off-by: Josef Bacik <jbacik@fusionio.com>
---
 fs/btrfs/extent_io.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'fs')

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index aaa12c1eb348..01c21b6c6d43 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3324,6 +3324,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
 			     writepage_t writepage, void *data,
 			     void (*flush_fn)(void *))
 {
+	struct inode *inode = mapping->host;
 	int ret = 0;
 	int done = 0;
 	int nr_to_write_done = 0;
@@ -3334,6 +3335,18 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
 	int scanned = 0;
 	int tag;
 
+	/*
+	 * We have to hold onto the inode so that ordered extents can do their
+	 * work when the IO finishes.  The alternative to this is failing to add
+	 * an ordered extent if the igrab() fails there and that is a huge pain
+	 * to deal with, so instead just hold onto the inode throughout the
+	 * writepages operation.  If it fails here we are freeing up the inode
+	 * anyway and we'd rather not waste our time writing out stuff that is
+	 * going to be truncated anyway.
+	 */
+	if (!igrab(inode))
+		return 0;
+
 	pagevec_init(&pvec, 0);
 	if (wbc->range_cyclic) {
 		index = mapping->writeback_index; /* Start from prev offset */
@@ -3428,6 +3441,7 @@ retry:
 		index = 0;
 		goto retry;
 	}
+	btrfs_add_delayed_iput(inode);
 	return ret;
 }
 
-- 
cgit 


From b6305567e7d31b0bec1b8cb9ec0cadd7f7086f5f Mon Sep 17 00:00:00 2001
From: Chris Mason <chris.mason@fusionio.com>
Date: Mon, 2 Jul 2012 15:29:53 -0400
Subject: Btrfs: run delayed directory updates during log replay

While we are resolving directory modifications in the
tree log, we are triggering delayed metadata updates to
the filesystem btrees.

This commit forces the delayed updates to run so the
replay code can find any modifications done.  It stops
us from crashing because the directory deleltion replay
expects items to be removed immediately from the tree.

Signed-off-by: Chris Mason <chris.mason@fusionio.com>
cc: stable@kernel.org
---
 fs/btrfs/tree-log.c | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'fs')

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 2017d0ff511c..8abeae4224f9 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -690,6 +690,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
 	kfree(name);
 
 	iput(inode);
+
+	btrfs_run_delayed_items(trans, root);
 	return ret;
 }
 
@@ -895,6 +897,7 @@ again:
 				ret = btrfs_unlink_inode(trans, root, dir,
 							 inode, victim_name,
 							 victim_name_len);
+				btrfs_run_delayed_items(trans, root);
 			}
 			kfree(victim_name);
 			ptr = (unsigned long)(victim_ref + 1) + victim_name_len;
@@ -1475,6 +1478,9 @@ again:
 			ret = btrfs_unlink_inode(trans, root, dir, inode,
 						 name, name_len);
 			BUG_ON(ret);
+
+			btrfs_run_delayed_items(trans, root);
+
 			kfree(name);
 			iput(inode);
 
-- 
cgit 


From ec01d738a1691dfc85b96b9f796020267a7be577 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@redhat.com>
Date: Mon, 2 Jul 2012 07:24:25 -0400
Subject: cifs: when server doesn't set CAP_LARGE_READ_X, cap default rsize at
 MaxBufferSize

When the server doesn't advertise CAP_LARGE_READ_X, then MS-CIFS states
that you must cap the size of the read at the client's MaxBufferSize.
Unfortunately, testing with many older servers shows that they often
can't service a read larger than their own MaxBufferSize.

Since we can't assume what the server will do in this situation, we must
be conservative here for the default. When the server can't do large
reads, then assume that it can't satisfy any read larger than its
MaxBufferSize either.

Luckily almost all modern servers can do large reads, so this won't
affect them. This is really just for older win9x and OS/2 era servers.
Also, note that this patch just governs the default rsize. The admin can
always override this if he so chooses.

Cc: <stable@vger.kernel.org> # 3.2
Reported-by: David H. Durgee <dhdurgee@acm.org>
Signed-off-by: Jeff Layton <jlayton@redhat.com>
Signed-off-by: Steven French <sfrench@w500smf.(none)>
---
 fs/cifs/connect.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 5b3840725d01..0ae86ddf2213 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3495,18 +3495,15 @@ cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *pvolume_info)
 	 * MS-CIFS indicates that servers are only limited by the client's
 	 * bufsize for reads, testing against win98se shows that it throws
 	 * INVALID_PARAMETER errors if you try to request too large a read.
+	 * OS/2 just sends back short reads.
 	 *
-	 * If the server advertises a MaxBufferSize of less than one page,
-	 * assume that it also can't satisfy reads larger than that either.
-	 *
-	 * FIXME: Is there a better heuristic for this?
+	 * If the server doesn't advertise CAP_LARGE_READ_X, then assume that
+	 * it can't handle a read request larger than its MaxBufferSize either.
 	 */
 	if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_READ_CAP))
 		defsize = CIFS_DEFAULT_IOSIZE;
 	else if (server->capabilities & CAP_LARGE_READ_X)
 		defsize = CIFS_DEFAULT_NON_POSIX_RSIZE;
-	else if (server->maxBuf >= PAGE_CACHE_SIZE)
-		defsize = CIFSMaxBufSize;
 	else
 		defsize = server->maxBuf - sizeof(READ_RSP);
 
-- 
cgit 


From 9fe79d7600497ed8a95c3981cbe5b73ab98222f0 Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@canonical.com>
Date: Tue, 12 Jun 2012 11:17:01 -0700
Subject: eCryptfs: Properly check for O_RDONLY flag before doing privileged
 open

If the first attempt at opening the lower file read/write fails,
eCryptfs will retry using a privileged kthread. However, the privileged
retry should not happen if the lower file's inode is read-only because a
read/write open will still be unsuccessful.

The check for determining if the open should be retried was intended to
be based on the access mode of the lower file's open flags being
O_RDONLY, but the check was incorrectly performed. This would cause the
open to be retried by the privileged kthread, resulting in a second
failed open of the lower file. This patch corrects the check to
determine if the open request should be handled by the privileged
kthread.

Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Dan Carpenter <dan.carpenter@oracle.com>
---
 fs/ecryptfs/kthread.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/kthread.c b/fs/ecryptfs/kthread.c
index 69f994a7d524..0dbe58a8b172 100644
--- a/fs/ecryptfs/kthread.c
+++ b/fs/ecryptfs/kthread.c
@@ -149,7 +149,7 @@ int ecryptfs_privileged_open(struct file **lower_file,
 	(*lower_file) = dentry_open(lower_dentry, lower_mnt, flags, cred);
 	if (!IS_ERR(*lower_file))
 		goto out;
-	if (flags & O_RDONLY) {
+	if ((flags & O_ACCMODE) == O_RDONLY) {
 		rc = PTR_ERR((*lower_file));
 		goto out;
 	}
-- 
cgit 


From 60d65f1f07a7d81d3eb3b91fc13fca80f2fdbb12 Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@canonical.com>
Date: Mon, 11 Jun 2012 10:21:34 -0700
Subject: eCryptfs: Fix lockdep warning in miscdev operations

Don't grab the daemon mutex while holding the message context mutex.
Addresses this lockdep warning:

 ecryptfsd/2141 is trying to acquire lock:
  (&ecryptfs_msg_ctx_arr[i].mux){+.+.+.}, at: [<ffffffffa029c213>] ecryptfs_miscdev_read+0x143/0x470 [ecryptfs]

 but task is already holding lock:
  (&(*daemon)->mux){+.+...}, at: [<ffffffffa029c2ec>] ecryptfs_miscdev_read+0x21c/0x470 [ecryptfs]

 which lock already depends on the new lock.

 the existing dependency chain (in reverse order) is:

 -> #1 (&(*daemon)->mux){+.+...}:
        [<ffffffff810a3b8d>] lock_acquire+0x9d/0x220
        [<ffffffff8151c6da>] __mutex_lock_common+0x5a/0x4b0
        [<ffffffff8151cc64>] mutex_lock_nested+0x44/0x50
        [<ffffffffa029c5d7>] ecryptfs_send_miscdev+0x97/0x120 [ecryptfs]
        [<ffffffffa029b744>] ecryptfs_send_message+0x134/0x1e0 [ecryptfs]
        [<ffffffffa029a24e>] ecryptfs_generate_key_packet_set+0x2fe/0xa80 [ecryptfs]
        [<ffffffffa02960f8>] ecryptfs_write_metadata+0x108/0x250 [ecryptfs]
        [<ffffffffa0290f80>] ecryptfs_create+0x130/0x250 [ecryptfs]
        [<ffffffff811963a4>] vfs_create+0xb4/0x120
        [<ffffffff81197865>] do_last+0x8c5/0xa10
        [<ffffffff811998f9>] path_openat+0xd9/0x460
        [<ffffffff81199da2>] do_filp_open+0x42/0xa0
        [<ffffffff81187998>] do_sys_open+0xf8/0x1d0
        [<ffffffff81187a91>] sys_open+0x21/0x30
        [<ffffffff81527d69>] system_call_fastpath+0x16/0x1b

 -> #0 (&ecryptfs_msg_ctx_arr[i].mux){+.+.+.}:
        [<ffffffff810a3418>] __lock_acquire+0x1bf8/0x1c50
        [<ffffffff810a3b8d>] lock_acquire+0x9d/0x220
        [<ffffffff8151c6da>] __mutex_lock_common+0x5a/0x4b0
        [<ffffffff8151cc64>] mutex_lock_nested+0x44/0x50
        [<ffffffffa029c213>] ecryptfs_miscdev_read+0x143/0x470 [ecryptfs]
        [<ffffffff811887d3>] vfs_read+0xb3/0x180
        [<ffffffff811888ed>] sys_read+0x4d/0x90
        [<ffffffff81527d69>] system_call_fastpath+0x16/0x1b

Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
---
 fs/ecryptfs/miscdev.c | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index 3a06f4043df4..3c632ec412ec 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -191,31 +191,32 @@ int ecryptfs_send_miscdev(char *data, size_t data_size,
 			  struct ecryptfs_msg_ctx *msg_ctx, u8 msg_type,
 			  u16 msg_flags, struct ecryptfs_daemon *daemon)
 {
-	int rc = 0;
+	struct ecryptfs_message *msg;
 
-	mutex_lock(&msg_ctx->mux);
-	msg_ctx->msg = kmalloc((sizeof(*msg_ctx->msg) + data_size),
-			       GFP_KERNEL);
-	if (!msg_ctx->msg) {
-		rc = -ENOMEM;
+	msg = kmalloc((sizeof(*msg) + data_size), GFP_KERNEL);
+	if (!msg) {
 		printk(KERN_ERR "%s: Out of memory whilst attempting "
 		       "to kmalloc(%zd, GFP_KERNEL)\n", __func__,
-		       (sizeof(*msg_ctx->msg) + data_size));
-		goto out_unlock;
+		       (sizeof(*msg) + data_size));
+		return -ENOMEM;
 	}
+
+	mutex_lock(&msg_ctx->mux);
+	msg_ctx->msg = msg;
 	msg_ctx->msg->index = msg_ctx->index;
 	msg_ctx->msg->data_len = data_size;
 	msg_ctx->type = msg_type;
 	memcpy(msg_ctx->msg->data, data, data_size);
 	msg_ctx->msg_size = (sizeof(*msg_ctx->msg) + data_size);
-	mutex_lock(&daemon->mux);
 	list_add_tail(&msg_ctx->daemon_out_list, &daemon->msg_ctx_out_queue);
+	mutex_unlock(&msg_ctx->mux);
+
+	mutex_lock(&daemon->mux);
 	daemon->num_queued_msg_ctx++;
 	wake_up_interruptible(&daemon->wait);
 	mutex_unlock(&daemon->mux);
-out_unlock:
-	mutex_unlock(&msg_ctx->mux);
-	return rc;
+
+	return 0;
 }
 
 /*
-- 
cgit 


From 3e5d3c35a68c9a933bdbdd8685bd1a205b57e806 Mon Sep 17 00:00:00 2001
From: Junxiao Bi <junxiao.bi@oracle.com>
Date: Wed, 27 Jun 2012 17:09:55 +0800
Subject: ocfs2: clear unaligned io flag when dio fails

The unaligned io flag is set in the kiocb when an unaligned
dio is issued, it should be cleared even when the dio fails,
or it may affect the following io which are using the same
kiocb.

Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
Cc: stable@vger.kernel.org
Signed-off-by: Joel Becker <jlbec@evilplan.org>
---
 fs/ocfs2/file.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 061591a3ab08..98513c8ed589 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2422,8 +2422,10 @@ out_dio:
 		unaligned_dio = 0;
 	}
 
-	if (unaligned_dio)
+	if (unaligned_dio) {
+		ocfs2_iocb_clear_unaligned_aio(iocb);
 		atomic_dec(&OCFS2_I(inode)->ip_unaligned_aio);
+	}
 
 out:
 	if (rw_level != -1)
-- 
cgit 


From 16865b7c42fbce8a4d2b278460e387e719e289cb Mon Sep 17 00:00:00 2001
From: roel <roel.kluin@gmail.com>
Date: Mon, 12 Dec 2011 23:40:51 +0100
Subject: ocfs2: Misplaced parens in unlikley

Fix misplaced parentheses

Signed-off-by: Roel Kluin <roel.kluin@gmail.com>
Signed-off-by: Joel Becker <jlbec@evilplan.org>
---
 fs/ocfs2/dlmglue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 81a4cd22f80b..274529cce9c6 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -456,7 +456,7 @@ static void ocfs2_update_lock_stats(struct ocfs2_lock_res *res, int level,
 	stats->ls_gets++;
 	stats->ls_total += ktime_to_ns(kt);
 	/* overflow */
-	if (unlikely(stats->ls_gets) == 0) {
+	if (unlikely(stats->ls_gets == 0)) {
 		stats->ls_gets++;
 		stats->ls_total = ktime_to_ns(kt);
 	}
-- 
cgit 


From a75e9ccabd925d16954739bd977c54695c9310d0 Mon Sep 17 00:00:00 2001
From: Srinivas Eeda <srinivas.eeda@oracle.com>
Date: Mon, 30 Jan 2012 21:51:22 -0800
Subject: ocfs2: use spinlock irqsave for downconvert lock.patch

When ocfs2dc thread holds dc_task_lock spinlock and receives soft IRQ it
deadlock itself trying to get same spinlock in ocfs2_wake_downconvert_thread.
Below is the stack snippet.

The patch disables interrupts when acquiring dc_task_lock spinlock.

	ocfs2_wake_downconvert_thread
	ocfs2_rw_unlock
	ocfs2_dio_end_io
	dio_complete
	.....
	bio_endio
	req_bio_endio
	....
	scsi_io_completion
	blk_done_softirq
	__do_softirq
	do_softirq
	irq_exit
	do_IRQ
	ocfs2_downconvert_thread
	[kthread]

Signed-off-by: Srinivas Eeda <srinivas.eeda@oracle.com>
Signed-off-by: Joel Becker <jlbec@evilplan.org>
---
 fs/ocfs2/dlmglue.c | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 274529cce9c6..4f7795fb5fc0 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -3932,6 +3932,8 @@ unqueue:
 static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
 					struct ocfs2_lock_res *lockres)
 {
+	unsigned long flags;
+
 	assert_spin_locked(&lockres->l_lock);
 
 	if (lockres->l_flags & OCFS2_LOCK_FREEING) {
@@ -3945,21 +3947,22 @@ static void ocfs2_schedule_blocked_lock(struct ocfs2_super *osb,
 
 	lockres_or_flags(lockres, OCFS2_LOCK_QUEUED);
 
-	spin_lock(&osb->dc_task_lock);
+	spin_lock_irqsave(&osb->dc_task_lock, flags);
 	if (list_empty(&lockres->l_blocked_list)) {
 		list_add_tail(&lockres->l_blocked_list,
 			      &osb->blocked_lock_list);
 		osb->blocked_lock_count++;
 	}
-	spin_unlock(&osb->dc_task_lock);
+	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
 }
 
 static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)
 {
 	unsigned long processed;
+	unsigned long flags;
 	struct ocfs2_lock_res *lockres;
 
-	spin_lock(&osb->dc_task_lock);
+	spin_lock_irqsave(&osb->dc_task_lock, flags);
 	/* grab this early so we know to try again if a state change and
 	 * wake happens part-way through our work  */
 	osb->dc_work_sequence = osb->dc_wake_sequence;
@@ -3972,38 +3975,40 @@ static void ocfs2_downconvert_thread_do_work(struct ocfs2_super *osb)
 				     struct ocfs2_lock_res, l_blocked_list);
 		list_del_init(&lockres->l_blocked_list);
 		osb->blocked_lock_count--;
-		spin_unlock(&osb->dc_task_lock);
+		spin_unlock_irqrestore(&osb->dc_task_lock, flags);
 
 		BUG_ON(!processed);
 		processed--;
 
 		ocfs2_process_blocked_lock(osb, lockres);
 
-		spin_lock(&osb->dc_task_lock);
+		spin_lock_irqsave(&osb->dc_task_lock, flags);
 	}
-	spin_unlock(&osb->dc_task_lock);
+	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
 }
 
 static int ocfs2_downconvert_thread_lists_empty(struct ocfs2_super *osb)
 {
 	int empty = 0;
+	unsigned long flags;
 
-	spin_lock(&osb->dc_task_lock);
+	spin_lock_irqsave(&osb->dc_task_lock, flags);
 	if (list_empty(&osb->blocked_lock_list))
 		empty = 1;
 
-	spin_unlock(&osb->dc_task_lock);
+	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
 	return empty;
 }
 
 static int ocfs2_downconvert_thread_should_wake(struct ocfs2_super *osb)
 {
 	int should_wake = 0;
+	unsigned long flags;
 
-	spin_lock(&osb->dc_task_lock);
+	spin_lock_irqsave(&osb->dc_task_lock, flags);
 	if (osb->dc_work_sequence != osb->dc_wake_sequence)
 		should_wake = 1;
-	spin_unlock(&osb->dc_task_lock);
+	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
 
 	return should_wake;
 }
@@ -4033,10 +4038,12 @@ static int ocfs2_downconvert_thread(void *arg)
 
 void ocfs2_wake_downconvert_thread(struct ocfs2_super *osb)
 {
-	spin_lock(&osb->dc_task_lock);
+	unsigned long flags;
+
+	spin_lock_irqsave(&osb->dc_task_lock, flags);
 	/* make sure the voting thread gets a swipe at whatever changes
 	 * the caller may have made to the voting state */
 	osb->dc_wake_sequence++;
-	spin_unlock(&osb->dc_task_lock);
+	spin_unlock_irqrestore(&osb->dc_task_lock, flags);
 	wake_up(&osb->dc_event);
 }
-- 
cgit 


From 65622e647bfff3ed89f95576ec120693ed4085a6 Mon Sep 17 00:00:00 2001
From: Jeff Liu <jeff.liu@oracle.com>
Date: Thu, 9 Feb 2012 14:42:22 +0800
Subject: ocfs2: for SEEK_DATA/SEEK_HOLE, return internal error unchanged if
 ocfs2_get_clusters_nocache() or ocfs2_inode_lock() call failed.

Hello,

Since ENXIO only means "offset beyond EOF" for SEEK_DATA/SEEK_HOLE,
Hence we should return the internal error unchanged if ocfs2_inode_lock() or
ocfs2_get_clusters_nocache() call failed rather than ENXIO.
Otherwise, it will confuse the user applications when they trying to understand the root cause.

Thanks Dave for pointing this out.

Thanks,
-Jeff

Cc: Dave Chinner <david@fromorbit.com>
Signed-off-by: Jie Liu <jeff.liu@oracle.com>
Signed-off-by: Joel Becker <jlbec@evilplan.org>
---
 fs/ocfs2/extent_map.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 2f5b92ef0e53..70b5863a2d64 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -923,8 +923,6 @@ out_unlock:
 
 	ocfs2_inode_unlock(inode, 0);
 out:
-	if (ret && ret != -ENXIO)
-		ret = -ENXIO;
 	return ret;
 }
 
-- 
cgit 


From a4564ead763a9264edbec6d4e72aa273f05eb39c Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 10 Feb 2012 10:50:07 +0100
Subject: ocfs2: Fix bogus error message from ocfs2_global_read_info

'status' variable in ocfs2_global_read_info() is always != 0 when leaving the
function because it happens to contain number of read bytes. Thus we always log
error message although everything is OK. Since all error cases properly call
mlog_errno() before jumping to out_err, there's no reason to call mlog_errno()
on exit at all. This is a fallout of c1e8d35e (conversion of mlog_exit()
calls).

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Joel Becker <jlbec@evilplan.org>
---
 fs/ocfs2/quota_global.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 92fcd575775a..0a86e302655f 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -399,8 +399,6 @@ int ocfs2_global_read_info(struct super_block *sb, int type)
 			      msecs_to_jiffies(oinfo->dqi_syncms));
 
 out_err:
-	if (status)
-		mlog_errno(status);
 	return status;
 out_unlock:
 	ocfs2_unlock_global_qf(oinfo, 0);
-- 
cgit 


From 8dc6780587c99286c0d3de747a2946a76989414a Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@canonical.com>
Date: Mon, 11 Jun 2012 09:24:11 -0700
Subject: eCryptfs: Gracefully refuse miscdev file ops on inherited/passed
 files

File operations on /dev/ecryptfs would BUG() when the operations were
performed by processes other than the process that originally opened the
file. This could happen with open files inherited after fork() or file
descriptors passed through IPC mechanisms. Rather than calling BUG(), an
error code can be safely returned in most situations.

In ecryptfs_miscdev_release(), eCryptfs still needs to handle the
release even if the last file reference is being held by a process that
didn't originally open the file. ecryptfs_find_daemon_by_euid() will not
be successful, so a pointer to the daemon is stored in the file's
private_data. The private_data pointer is initialized when the miscdev
file is opened and only used when the file is released.

https://launchpad.net/bugs/994247

Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Reported-by: Sasha Levin <levinsasha928@gmail.com>
Tested-by: Sasha Levin <levinsasha928@gmail.com>
---
 fs/ecryptfs/miscdev.c | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/ecryptfs/miscdev.c b/fs/ecryptfs/miscdev.c
index 3c632ec412ec..c0038f6566d4 100644
--- a/fs/ecryptfs/miscdev.c
+++ b/fs/ecryptfs/miscdev.c
@@ -49,7 +49,10 @@ ecryptfs_miscdev_poll(struct file *file, poll_table *pt)
 	mutex_lock(&ecryptfs_daemon_hash_mux);
 	/* TODO: Just use file->private_data? */
 	rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
-	BUG_ON(rc || !daemon);
+	if (rc || !daemon) {
+		mutex_unlock(&ecryptfs_daemon_hash_mux);
+		return -EINVAL;
+	}
 	mutex_lock(&daemon->mux);
 	mutex_unlock(&ecryptfs_daemon_hash_mux);
 	if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
@@ -122,6 +125,7 @@ ecryptfs_miscdev_open(struct inode *inode, struct file *file)
 		goto out_unlock_daemon;
 	}
 	daemon->flags |= ECRYPTFS_DAEMON_MISCDEV_OPEN;
+	file->private_data = daemon;
 	atomic_inc(&ecryptfs_num_miscdev_opens);
 out_unlock_daemon:
 	mutex_unlock(&daemon->mux);
@@ -152,9 +156,9 @@ ecryptfs_miscdev_release(struct inode *inode, struct file *file)
 
 	mutex_lock(&ecryptfs_daemon_hash_mux);
 	rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
-	BUG_ON(rc || !daemon);
+	if (rc || !daemon)
+		daemon = file->private_data;
 	mutex_lock(&daemon->mux);
-	BUG_ON(daemon->pid != task_pid(current));
 	BUG_ON(!(daemon->flags & ECRYPTFS_DAEMON_MISCDEV_OPEN));
 	daemon->flags &= ~ECRYPTFS_DAEMON_MISCDEV_OPEN;
 	atomic_dec(&ecryptfs_num_miscdev_opens);
@@ -270,8 +274,16 @@ ecryptfs_miscdev_read(struct file *file, char __user *buf, size_t count,
 	mutex_lock(&ecryptfs_daemon_hash_mux);
 	/* TODO: Just use file->private_data? */
 	rc = ecryptfs_find_daemon_by_euid(&daemon, euid, current_user_ns());
-	BUG_ON(rc || !daemon);
+	if (rc || !daemon) {
+		mutex_unlock(&ecryptfs_daemon_hash_mux);
+		return -EINVAL;
+	}
 	mutex_lock(&daemon->mux);
+	if (task_pid(current) != daemon->pid) {
+		mutex_unlock(&daemon->mux);
+		mutex_unlock(&ecryptfs_daemon_hash_mux);
+		return -EPERM;
+	}
 	if (daemon->flags & ECRYPTFS_DAEMON_ZOMBIE) {
 		rc = 0;
 		mutex_unlock(&ecryptfs_daemon_hash_mux);
@@ -308,9 +320,6 @@ check_list:
 		 * message from the queue; try again */
 		goto check_list;
 	}
-	BUG_ON(euid != daemon->euid);
-	BUG_ON(current_user_ns() != daemon->user_ns);
-	BUG_ON(task_pid(current) != daemon->pid);
 	msg_ctx = list_first_entry(&daemon->msg_ctx_out_queue,
 				   struct ecryptfs_msg_ctx, daemon_out_list);
 	BUG_ON(!msg_ctx);
-- 
cgit 


From 332a2e1244bd08b9e3ecd378028513396a004a24 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Sat, 7 Jul 2012 10:17:00 -0700
Subject: vfs: make O_PATH file descriptors usable for 'fchdir()'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We already use them for openat() and friends, but fchdir() also wants to
be able to use O_PATH file descriptors.  This should make it comparable
to the O_SEARCH of Solaris.  In particular, O_PATH allows you to access
(not-quite-open) a directory you don't have read persmission to, only
execute permission.

Noticed during development of multithread support for ksh93.

Reported-by: ольга крыжановская <olga.kryzhanovska@gmail.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: stable@kernel.org    # O_PATH introduced in 3.0+
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/open.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/open.c b/fs/open.c
index d6c79a0dffc7..1540632d8387 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -397,10 +397,10 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
 {
 	struct file *file;
 	struct inode *inode;
-	int error;
+	int error, fput_needed;
 
 	error = -EBADF;
-	file = fget(fd);
+	file = fget_raw_light(fd, &fput_needed);
 	if (!file)
 		goto out;
 
@@ -414,7 +414,7 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
 	if (!error)
 		set_fs_pwd(current->fs, &file->f_path);
 out_putf:
-	fput(file);
+	fput_light(file, fput_needed);
 out:
 	return error;
 }
-- 
cgit 


From 4035c2487f179327fae87af3477659402b797584 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sun, 8 Jul 2012 10:24:10 -0400
Subject: NFS: Fix list manipulation snafus in fs/nfs/direct.c

Fix 2 bugs in nfs_direct_write_reschedule:

 - The request needs to be removed from the 'reqs' list before it can
   be added to 'failed'.
 - Fix an infinite loop if the 'failed' list is non-empty.

Reported-by: Julia Lawall <julia.lawall@lip6.fr>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 9a4cbfc85d81..48253372ab1d 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -484,6 +484,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 
 	list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
 		if (!nfs_pageio_add_request(&desc, req)) {
+			nfs_list_remove_request(req);
 			nfs_list_add_request(req, &failed);
 			spin_lock(cinfo.lock);
 			dreq->flags = 0;
@@ -494,8 +495,11 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 	}
 	nfs_pageio_complete(&desc);
 
-	while (!list_empty(&failed))
+	while (!list_empty(&failed)) {
+		req = nfs_list_entry(failed.next);
+		nfs_list_remove_request(req);
 		nfs_unlock_and_release_request(req);
+	}
 
 	if (put_dreq(dreq))
 		nfs_direct_write_complete(dreq, dreq->inode);
-- 
cgit 


From f1daf666dd7d097653bd38320248c68084b1febf Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 10 Jul 2012 12:04:15 -0400
Subject: NFSv4: Fix an NFSv4 mount regression

The helper nfs_fs_mount() will always call nfs4_try_mount with the
mount_info->fill_super argument pointing to nfs_fill_super, which is
NFSv2/v3 only.
Fix is to have nfs4_try_mount replace it with nfs4_fill_super.

The regression was introduced by commit c40f8d1d (NFS: Create a common
fs_mount() function)

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/super.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 906f09c7d842..06228192f64e 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2860,6 +2860,8 @@ static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
 
 	dfprintk(MOUNT, "--> nfs4_try_mount()\n");
 
+	mount_info->fill_super = nfs4_fill_super;
+
 	export_path = data->nfs_server.export_path;
 	data->nfs_server.export_path = "/";
 	root_mnt = nfs_do_root_mount(&nfs4_remote_fs_type, flags, mount_info,
-- 
cgit 


From a4e08d001f2e50bb8b3c4eebadcf08e5535f02ee Mon Sep 17 00:00:00 2001
From: Luis Henriques <luis.henriques@canonical.com>
Date: Wed, 11 Jul 2012 14:02:10 -0700
Subject: ocfs2: fix NULL pointer dereference in __ocfs2_change_file_space()

As ocfs2_fallocate() will invoke __ocfs2_change_file_space() with a NULL
as the first parameter (file), it may trigger a NULL pointer dereferrence
due to a missing check.

Addresses http://bugs.launchpad.net/bugs/1006012

Signed-off-by: Luis Henriques <luis.henriques@canonical.com>
Reported-by: Bret Towe <magnade@gmail.com>
Tested-by: Bret Towe <magnade@gmail.com>
Cc: Sunil Mushran <sunil.mushran@oracle.com>
Acked-by: Joel Becker <jlbec@evilplan.org>
Acked-by: Mark Fasheh <mfasheh@suse.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ocfs2/file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 98513c8ed589..7602783d7f41 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -1950,7 +1950,7 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode,
 	if (ret < 0)
 		mlog_errno(ret);
 
-	if (file->f_flags & O_SYNC)
+	if (file && (file->f_flags & O_SYNC))
 		handle->h_sync = 1;
 
 	ocfs2_commit_trans(osb, handle);
-- 
cgit 


From fea9f718b3d68147f162ed2d870183ce5e0ad8d8 Mon Sep 17 00:00:00 2001
From: Bob Liu <lliubbo@gmail.com>
Date: Wed, 11 Jul 2012 14:02:35 -0700
Subject: fs: ramfs: file-nommu: add SetPageUptodate()

There is a bug in the below scenario for !CONFIG_MMU:

 1. create a new file
 2. mmap the file and write to it
 3. read the file can't get the correct value

Because

  sys_read() -> generic_file_aio_read() -> simple_readpage() -> clear_page()

which causes the page to be zeroed.

Add SetPageUptodate() to ramfs_nommu_expand_for_mapping() so that
generic_file_aio_read() do not call simple_readpage().

Signed-off-by: Bob Liu <lliubbo@gmail.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greg Ungerer <gerg@uclinux.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ramfs/file-nommu.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index fbb0b478a346..d5378d028589 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -110,6 +110,7 @@ int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
 
 		/* prevent the page from being discarded on memory pressure */
 		SetPageDirty(page);
+		SetPageUptodate(page);
 
 		unlock_page(page);
 		put_page(page);
-- 
cgit 


From 5d8ecbbc284f7e7568969574a6601b05f1ed1d90 Mon Sep 17 00:00:00 2001
From: "Steven J. Magnani" <steve@digidescorp.com>
Date: Wed, 11 Jul 2012 14:02:42 -0700
Subject: fat: fix non-atomic NFS i_pos read

fat_encode_fh() can fetch an invalid i_pos value on systems where 64-bit
accesses are not atomic.  Make it use the same accessor as the rest of the
FAT code.

Signed-off-by: Steven J. Magnani <steve@digidescorp.com>
Acked-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fat/inode.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index a3d81ebf6d86..0038b32cb362 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -738,22 +738,21 @@ static int
 fat_encode_fh(struct inode *inode, __u32 *fh, int *lenp, struct inode *parent)
 {
 	int len = *lenp;
-	u32 ipos_h, ipos_m, ipos_l;
+	struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
+	loff_t i_pos;
 
 	if (len < 5) {
 		*lenp = 5;
 		return 255; /* no room */
 	}
 
-	ipos_h = MSDOS_I(inode)->i_pos >> 8;
-	ipos_m = (MSDOS_I(inode)->i_pos & 0xf0) << 24;
-	ipos_l = (MSDOS_I(inode)->i_pos & 0x0f) << 28;
+	i_pos = fat_i_pos_read(sbi, inode);
 	*lenp = 5;
 	fh[0] = inode->i_ino;
 	fh[1] = inode->i_generation;
-	fh[2] = ipos_h;
-	fh[3] = ipos_m | MSDOS_I(inode)->i_logstart;
-	fh[4] = ipos_l;
+	fh[2] = i_pos >> 8;
+	fh[3] = ((i_pos & 0xf0) << 24) | MSDOS_I(inode)->i_logstart;
+	fh[4] = (i_pos & 0x0f) << 28;
 	if (parent)
 		fh[4] |= MSDOS_I(parent)->i_logstart;
 	return 3;
-- 
cgit 


From 91f68c89d8f35fe98ea04159b9a3b42d0149478f Mon Sep 17 00:00:00 2001
From: Jeff Moyer <jmoyer@redhat.com>
Date: Thu, 12 Jul 2012 09:43:14 -0400
Subject: block: fix infinite loop in __getblk_slow

Commit 080399aaaf35 ("block: don't mark buffers beyond end of disk as
mapped") exposed a bug in __getblk_slow that causes mount to hang as it
loops infinitely waiting for a buffer that lies beyond the end of the
disk to become uptodate.

The problem was initially reported by Torsten Hilbrich here:

    https://lkml.org/lkml/2012/6/18/54

and also reported independently here:

    http://www.sysresccd.org/forums/viewtopic.php?f=13&t=4511

and then Richard W.M.  Jones and Marcos Mello noted a few separate
bugzillas also associated with the same issue.  This patch has been
confirmed to fix:

    https://bugzilla.redhat.com/show_bug.cgi?id=835019

The main problem is here, in __getblk_slow:

        for (;;) {
                struct buffer_head * bh;
                int ret;

                bh = __find_get_block(bdev, block, size);
                if (bh)
                        return bh;

                ret = grow_buffers(bdev, block, size);
                if (ret < 0)
                        return NULL;
                if (ret == 0)
                        free_more_memory();
        }

__find_get_block does not find the block, since it will not be marked as
mapped, and so grow_buffers is called to fill in the buffers for the
associated page.  I believe the for (;;) loop is there primarily to
retry in the case of memory pressure keeping grow_buffers from
succeeding.  However, we also continue to loop for other cases, like the
block lying beond the end of the disk.  So, the fix I came up with is to
only loop when grow_buffers fails due to memory allocation issues
(return value of 0).

The attached patch was tested by myself, Torsten, and Rich, and was
found to resolve the problem in call cases.

Signed-off-by: Jeff Moyer <jmoyer@redhat.com>
Reported-and-Tested-by: Torsten Hilbrich <torsten.hilbrich@secunet.com>
Tested-by: Richard W.M. Jones <rjones@redhat.com>
Reviewed-by: Josh Boyer <jwboyer@redhat.com>
Cc: Stable <stable@vger.kernel.org>  # 3.0+
[ Jens is on vacation, taking this directly  - Linus ]
--
Stable Notes: this patch requires backport to 3.0, 3.2 and 3.3.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/buffer.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 838a9cf246bd..c7062c896d7c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1036,6 +1036,9 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
 static struct buffer_head *
 __getblk_slow(struct block_device *bdev, sector_t block, int size)
 {
+	int ret;
+	struct buffer_head *bh;
+
 	/* Size must be multiple of hard sectorsize */
 	if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
 			(size < 512 || size > PAGE_SIZE))) {
@@ -1048,20 +1051,21 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
 		return NULL;
 	}
 
-	for (;;) {
-		struct buffer_head * bh;
-		int ret;
+retry:
+	bh = __find_get_block(bdev, block, size);
+	if (bh)
+		return bh;
 
+	ret = grow_buffers(bdev, block, size);
+	if (ret == 0) {
+		free_more_memory();
+		goto retry;
+	} else if (ret > 0) {
 		bh = __find_get_block(bdev, block, size);
 		if (bh)
 			return bh;
-
-		ret = grow_buffers(bdev, block, size);
-		if (ret < 0)
-			return NULL;
-		if (ret == 0)
-			free_more_memory();
 	}
+	return NULL;
 }
 
 /*
-- 
cgit 


From 8d657eb3b43861064d36241e88d9d61c709f33f0 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Fri, 13 Jul 2012 13:35:36 -0400
Subject: Remove easily user-triggerable BUG from generic_setlease

This can be trivially triggered from userspace by passing in something unexpected.

    kernel BUG at fs/locks.c:1468!
    invalid opcode: 0000 [#1] SMP
    RIP: 0010:generic_setlease+0xc2/0x100
    Call Trace:
      __vfs_setlease+0x35/0x40
      fcntl_setlease+0x76/0x150
      sys_fcntl+0x1c6/0x810
      system_call_fastpath+0x1a/0x1f

Signed-off-by: Dave Jones <davej@redhat.com>
Cc: stable@kernel.org # 3.2+
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/locks.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/locks.c b/fs/locks.c
index 814c51d0de47..fce6238d52c1 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1465,7 +1465,7 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
 	case F_WRLCK:
 		return generic_add_lease(filp, arg, flp);
 	default:
-		BUG();
+		return -EINVAL;
 	}
 }
 EXPORT_SYMBOL(generic_setlease);
-- 
cgit