aboutsummaryrefslogtreecommitdiff
path: root/fs/ext3/inode.c
diff options
context:
space:
mode:
authorMauro Carvalho Chehab <[email protected]>2014-04-14 12:00:36 -0300
committerMauro Carvalho Chehab <[email protected]>2014-04-14 12:00:36 -0300
commit277a163c83d7ba93fba1e8980d29a9f8bfcfba6c (patch)
treeccfd357d152292958957b6b8a993892e7a8cc95f /fs/ext3/inode.c
parenta83b93a7480441a47856dc9104bea970e84cda87 (diff)
parentc9eaa447e77efe77b7fa4c953bd62de8297fd6c5 (diff)
Merge tag 'v3.15-rc1' into patchwork
Linux 3.15-rc1 * tag 'v3.15-rc1': (12180 commits) Linux 3.15-rc1 mm: Initialize error in shmem_file_aio_read() cifs: Use min_t() when comparing "size_t" and "unsigned long" sym53c8xx_2: Set DID_REQUEUE return code when aborting squeue powerpc: Don't try to set LPCR unless we're in hypervisor mode futex: update documentation for ordering guarantees ceph: fix pr_fmt() redefinition vti: don't allow to add the same tunnel twice gre: don't allow to add the same tunnel twice drivers: net: xen-netfront: fix array initialization bug missing bits of "splice: fix racy pipe->buffers uses" cifs: fix the race in cifs_writev() ceph_sync_{,direct_}write: fix an oops on ceph_osdc_new_request() failure pktgen: be friendly to LLTX devices r8152: check RTL8152_UNPLUG net: sun4i-emac: add promiscuous support net/apne: replace IS_ERR and PTR_ERR with PTR_ERR_OR_ZERO blackfin: cleanup board files bf609: clock: drop unused clock bit set/clear functions Blackfin: bf537: rename "CONFIG_ADT75" ...
Diffstat (limited to 'fs/ext3/inode.c')
-rw-r--r--fs/ext3/inode.c88
1 files changed, 26 insertions, 62 deletions
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 384b6ebb655f..f5157d0d1b43 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -228,7 +228,7 @@ void ext3_evict_inode (struct inode *inode)
log_wait_commit(journal, commit_tid);
filemap_write_and_wait(&inode->i_data);
}
- truncate_inode_pages(&inode->i_data, 0);
+ truncate_inode_pages_final(&inode->i_data);
ext3_discard_reservation(inode);
rsv = ei->i_block_alloc_info;
@@ -1559,56 +1559,17 @@ static int buffer_unmapped(handle_t *handle, struct buffer_head *bh)
}
/*
- * Note that we always start a transaction even if we're not journalling
- * data. This is to preserve ordering: any hole instantiation within
- * __block_write_full_page -> ext3_get_block() should be journalled
- * along with the data so we don't crash and then get metadata which
+ * Note that whenever we need to map blocks we start a transaction even if
+ * we're not journalling data. This is to preserve ordering: any hole
+ * instantiation within __block_write_full_page -> ext3_get_block() should be
+ * journalled along with the data so we don't crash and then get metadata which
* refers to old data.
*
* In all journalling modes block_write_full_page() will start the I/O.
*
- * Problem:
- *
- * ext3_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() ->
- * ext3_writepage()
- *
- * Similar for:
- *
- * ext3_file_write() -> generic_file_write() -> __alloc_pages() -> ...
- *
- * Same applies to ext3_get_block(). We will deadlock on various things like
- * lock_journal and i_truncate_mutex.
- *
- * Setting PF_MEMALLOC here doesn't work - too many internal memory
- * allocations fail.
- *
- * 16May01: If we're reentered then journal_current_handle() will be
- * non-zero. We simply *return*.
- *
- * 1 July 2001: @@@ FIXME:
- * In journalled data mode, a data buffer may be metadata against the
- * current transaction. But the same file is part of a shared mapping
- * and someone does a writepage() on it.
- *
- * We will move the buffer onto the async_data list, but *after* it has
- * been dirtied. So there's a small window where we have dirty data on
- * BJ_Metadata.
- *
- * Note that this only applies to the last partial page in the file. The
- * bit which block_write_full_page() uses prepare/commit for. (That's
- * broken code anyway: it's wrong for msync()).
- *
- * It's a rare case: affects the final partial page, for journalled data
- * where the file is subject to bith write() and writepage() in the same
- * transction. To fix it we'll need a custom block_write_full_page().
- * We'll probably need that anyway for journalling writepage() output.
- *
* We don't honour synchronous mounts for writepage(). That would be
* disastrous. Any write() or metadata operation will sync the fs for
* us.
- *
- * AKPM2: if all the page's buffers are mapped to disk and !data=journal,
- * we don't need to open a transaction here.
*/
static int ext3_ordered_writepage(struct page *page,
struct writeback_control *wbc)
@@ -1673,12 +1634,9 @@ static int ext3_ordered_writepage(struct page *page,
* block_write_full_page() succeeded. Otherwise they are unmapped,
* and generally junk.
*/
- if (ret == 0) {
- err = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE,
+ if (ret == 0)
+ ret = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE,
NULL, journal_dirty_data_fn);
- if (!ret)
- ret = err;
- }
walk_page_buffers(handle, page_bufs, 0,
PAGE_CACHE_SIZE, NULL, bput_one);
err = ext3_journal_stop(handle);
@@ -1925,6 +1883,8 @@ retry:
* and pretend the write failed... */
ext3_truncate_failed_direct_write(inode);
ret = PTR_ERR(handle);
+ if (inode->i_nlink)
+ ext3_orphan_del(NULL, inode);
goto out;
}
if (inode->i_nlink)
@@ -3212,21 +3172,20 @@ out_brelse:
*
* We are called from a few places:
*
- * - Within generic_file_write() for O_SYNC files.
+ * - Within generic_file_aio_write() -> generic_write_sync() for O_SYNC files.
* Here, there will be no transaction running. We wait for any running
* transaction to commit.
*
- * - Within sys_sync(), kupdate and such.
- * We wait on commit, if tol to.
+ * - Within flush work (for sys_sync(), kupdate and such).
+ * We wait on commit, if told to.
*
- * - Within prune_icache() (PF_MEMALLOC == true)
- * Here we simply return. We can't afford to block kswapd on the
- * journal commit.
+ * - Within iput_final() -> write_inode_now()
+ * We wait on commit, if told to.
*
* In all cases it is actually safe for us to return without doing anything,
* because the inode has been copied into a raw inode buffer in
- * ext3_mark_inode_dirty(). This is a correctness thing for O_SYNC and for
- * knfsd.
+ * ext3_mark_inode_dirty(). This is a correctness thing for WB_SYNC_ALL
+ * writeback.
*
* Note that we are absolutely dependent upon all inode dirtiers doing the
* right thing: they *must* call mark_inode_dirty() after dirtying info in
@@ -3238,13 +3197,13 @@ out_brelse:
* stuff();
* inode->i_size = expr;
*
- * is in error because a kswapd-driven write_inode() could occur while
- * `stuff()' is running, and the new i_size will be lost. Plus the inode
- * will no longer be on the superblock's dirty inode list.
+ * is in error because write_inode() could occur while `stuff()' is running,
+ * and the new i_size will be lost. Plus the inode will no longer be on the
+ * superblock's dirty inode list.
*/
int ext3_write_inode(struct inode *inode, struct writeback_control *wbc)
{
- if (current->flags & PF_MEMALLOC)
+ if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
return 0;
if (ext3_journal_current_handle()) {
@@ -3253,7 +3212,12 @@ int ext3_write_inode(struct inode *inode, struct writeback_control *wbc)
return -EIO;
}
- if (wbc->sync_mode != WB_SYNC_ALL)
+ /*
+ * No need to force transaction in WB_SYNC_NONE mode. Also
+ * ext3_sync_fs() will force the commit after everything is
+ * written.
+ */
+ if (wbc->sync_mode != WB_SYNC_ALL || wbc->for_sync)
return 0;
return ext3_force_commit(inode->i_sb);