From ea0f04e59543bafb3d2cbe37a0d375acb0bb2c34 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 4 Jun 2010 11:29:54 +0200 Subject: get rid of nobh_write_begin_newtrunc Move the call to vmtruncate to get rid of accessive blocks to the only remaining caller and rename the non-truncating version to nobh_write_begin. Get rid of the superflous file argument to it while we're at it. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/buffer_head.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux/buffer_head.h') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 1b9ba193b789..cfda5f0b2a4b 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -231,11 +231,7 @@ void block_sync_page(struct page *); sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); int block_truncate_page(struct address_space *, loff_t, get_block_t *); int file_fsync(struct file *, int); -int nobh_write_begin_newtrunc(struct file *, struct address_space *, - loff_t, unsigned, unsigned, - struct page **, void **, get_block_t*); -int nobh_write_begin(struct file *, struct address_space *, - loff_t, unsigned, unsigned, +int nobh_write_begin(struct address_space *, loff_t, unsigned, unsigned, struct page **, void **, get_block_t*); int nobh_write_end(struct file *, struct address_space *, loff_t, unsigned, unsigned, -- cgit From 282dc178849882289d30e58b54be6b2799b351aa Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 4 Jun 2010 11:29:55 +0200 Subject: get rid of cont_write_begin_newtrunc Move the call to vmtruncate to get rid of accessive blocks to the callers in preparation of the new truncate sequence and rename the non-truncating version to cont_write_begin. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/adfs/inode.c | 11 ++++++++++- fs/affs/file.c | 11 ++++++++++- fs/buffer.c | 21 +-------------------- fs/fat/inode.c | 2 +- fs/hfs/inode.c | 11 ++++++++++- fs/hfsplus/inode.c | 11 ++++++++++- fs/hpfs/file.c | 11 ++++++++++- fs/qnx4/inode.c | 11 ++++++++++- include/linux/buffer_head.h | 3 --- 9 files changed, 62 insertions(+), 30 deletions(-) (limited to 'include/linux/buffer_head.h') diff --git a/fs/adfs/inode.c b/fs/adfs/inode.c index 6f850b06ab62..b3dec193036b 100644 --- a/fs/adfs/inode.c +++ b/fs/adfs/inode.c @@ -50,10 +50,19 @@ static int adfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { + int ret; + *pagep = NULL; - return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, adfs_get_block, &ADFS_I(mapping->host)->mmu_private); + if (unlikely(ret)) { + loff_t isize = mapping->host->i_size; + if (pos + len > isize) + vmtruncate(mapping->host, isize); + } + + return ret; } static sector_t _adfs_bmap(struct address_space *mapping, sector_t block) diff --git a/fs/affs/file.c b/fs/affs/file.c index 322710c3eedf..c4a9875bd1a6 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -406,10 +406,19 @@ static int affs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { + int ret; + *pagep = NULL; - return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, affs_get_block, &AFFS_I(mapping->host)->mmu_private); + if (unlikely(ret)) { + loff_t isize = mapping->host->i_size; + if (pos + len > isize) + vmtruncate(mapping->host, isize); + } + + return ret; } static sector_t _affs_bmap(struct address_space *mapping, sector_t block) diff --git a/fs/buffer.c b/fs/buffer.c index 559daf76bca4..14529ec759b9 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2351,7 +2351,7 @@ out: * For moronic filesystems that do not allow holes in file. * We may have to extend the file. */ -int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping, +int cont_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata, get_block_t *get_block, loff_t *bytes) @@ -2377,25 +2377,6 @@ int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping, out: return err; } -EXPORT_SYMBOL(cont_write_begin_newtrunc); - -int cont_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata, - get_block_t *get_block, loff_t *bytes) -{ - int ret; - - ret = cont_write_begin_newtrunc(file, mapping, pos, len, flags, - pagep, fsdata, get_block, bytes); - if (unlikely(ret)) { - loff_t isize = mapping->host->i_size; - if (pos + len > isize) - vmtruncate(mapping->host, isize); - } - - return ret; -} EXPORT_SYMBOL(cont_write_begin); int block_prepare_write(struct page *page, unsigned from, unsigned to, diff --git a/fs/fat/inode.c b/fs/fat/inode.c index ffe7c6fdc1ec..ec6a699a4023 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -159,7 +159,7 @@ static int fat_write_begin(struct file *file, struct address_space *mapping, int err; *pagep = NULL; - err = cont_write_begin_newtrunc(file, mapping, pos, len, flags, + err = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, fat_get_block, &MSDOS_I(mapping->host)->mmu_private); if (err < 0) diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 07b2464b5716..8df18e63eb6b 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -39,10 +39,19 @@ static int hfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { + int ret; + *pagep = NULL; - return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, hfs_get_block, &HFS_I(mapping->host)->phys_size); + if (unlikely(ret)) { + loff_t isize = mapping->host->i_size; + if (pos + len > isize) + vmtruncate(mapping->host, isize); + } + + return ret; } static sector_t hfs_bmap(struct address_space *mapping, sector_t block) diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 486021773911..88bf1b562641 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -31,10 +31,19 @@ static int hfsplus_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { + int ret; + *pagep = NULL; - return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, hfsplus_get_block, &HFSPLUS_I(mapping->host).phys_size); + if (unlikely(ret)) { + loff_t isize = mapping->host->i_size; + if (pos + len > isize) + vmtruncate(mapping->host, isize); + } + + return ret; } static sector_t hfsplus_bmap(struct address_space *mapping, sector_t block) diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index a9ae9bfa752f..c0340887c7ea 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -97,10 +97,19 @@ static int hpfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { + int ret; + *pagep = NULL; - return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, hpfs_get_block, &hpfs_i(mapping->host)->mmu_private); + if (unlikely(ret)) { + loff_t isize = mapping->host->i_size; + if (pos + len > isize) + vmtruncate(mapping->host, isize); + } + + return ret; } static sector_t _hpfs_bmap(struct address_space *mapping, sector_t block) diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c index 277575ddc05c..16829722be93 100644 --- a/fs/qnx4/inode.c +++ b/fs/qnx4/inode.c @@ -320,10 +320,19 @@ static int qnx4_write_begin(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata) { struct qnx4_inode_info *qnx4_inode = qnx4_i(mapping->host); + int ret; + *pagep = NULL; - return cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + ret = cont_write_begin(file, mapping, pos, len, flags, pagep, fsdata, qnx4_get_block, &qnx4_inode->mmu_private); + if (unlikely(ret)) { + loff_t isize = mapping->host->i_size; + if (pos + len > isize) + vmtruncate(mapping->host, isize); + } + + return ret; } static sector_t qnx4_bmap(struct address_space *mapping, sector_t block) { diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index cfda5f0b2a4b..7638647f0424 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -217,9 +217,6 @@ int generic_write_end(struct file *, struct address_space *, struct page *, void *); void page_zero_new_buffers(struct page *page, unsigned from, unsigned to); int block_prepare_write(struct page*, unsigned, unsigned, get_block_t*); -int cont_write_begin_newtrunc(struct file *, struct address_space *, loff_t, - unsigned, unsigned, struct page **, void **, - get_block_t *, loff_t *); int cont_write_begin(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page **, void **, get_block_t *, loff_t *); -- cgit From 6e1db88d536adcbbfe562b2d4b7d6425784fff12 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 4 Jun 2010 11:29:57 +0200 Subject: introduce __block_write_begin Split up the block_write_begin implementation - __block_write_begin is a new trivial wrapper for block_prepare_write that always takes an already allocated page and can be either called from block_write_begin or filesystem code that already has a page allocated. Remove the handling of already allocated pages from block_write_begin after switching all callers that do it to __block_write_begin. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/buffer.c | 69 +++++++++++++++++---------------------------- fs/ext2/dir.c | 3 +- fs/ext3/inode.c | 3 +- fs/ext4/inode.c | 11 +++----- fs/minix/inode.c | 3 +- fs/nilfs2/dir.c | 3 +- fs/reiserfs/inode.c | 3 +- fs/sysv/itree.c | 3 +- fs/ufs/inode.c | 3 +- include/linux/buffer_head.h | 2 ++ 10 files changed, 39 insertions(+), 64 deletions(-) (limited to 'include/linux/buffer_head.h') diff --git a/fs/buffer.c b/fs/buffer.c index 14529ec759b9..c319c49da511 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1833,9 +1833,10 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to) } EXPORT_SYMBOL(page_zero_new_buffers); -static int __block_prepare_write(struct inode *inode, struct page *page, - unsigned from, unsigned to, get_block_t *get_block) +int block_prepare_write(struct page *page, unsigned from, unsigned to, + get_block_t *get_block) { + struct inode *inode = page->mapping->host; unsigned block_start, block_end; sector_t block; int err = 0; @@ -1908,10 +1909,13 @@ static int __block_prepare_write(struct inode *inode, struct page *page, if (!buffer_uptodate(*wait_bh)) err = -EIO; } - if (unlikely(err)) + if (unlikely(err)) { page_zero_new_buffers(page, from, to); + ClearPageUptodate(page); + } return err; } +EXPORT_SYMBOL(block_prepare_write); static int __block_commit_write(struct inode *inode, struct page *page, unsigned from, unsigned to) @@ -1948,6 +1952,15 @@ static int __block_commit_write(struct inode *inode, struct page *page, return 0; } +int __block_write_begin(struct page *page, loff_t pos, unsigned len, + get_block_t *get_block) +{ + unsigned start = pos & (PAGE_CACHE_SIZE - 1); + + return block_prepare_write(page, start, start + len, get_block); +} +EXPORT_SYMBOL(__block_write_begin); + /* * Filesystems implementing the new truncate sequence should use the * _newtrunc postfix variant which won't incorrectly call vmtruncate. @@ -1958,41 +1971,22 @@ int block_write_begin_newtrunc(struct file *file, struct address_space *mapping, struct page **pagep, void **fsdata, get_block_t *get_block) { - struct inode *inode = mapping->host; - int status = 0; + pgoff_t index = pos >> PAGE_CACHE_SHIFT; struct page *page; - pgoff_t index; - unsigned start, end; - int ownpage = 0; + int status; - index = pos >> PAGE_CACHE_SHIFT; - start = pos & (PAGE_CACHE_SIZE - 1); - end = start + len; - - page = *pagep; - if (page == NULL) { - ownpage = 1; - page = grab_cache_page_write_begin(mapping, index, flags); - if (!page) { - status = -ENOMEM; - goto out; - } - *pagep = page; - } else - BUG_ON(!PageLocked(page)); + page = grab_cache_page_write_begin(mapping, index, flags); + if (!page) + return -ENOMEM; - status = __block_prepare_write(inode, page, start, end, get_block); + status = __block_write_begin(page, pos, len, get_block); if (unlikely(status)) { - ClearPageUptodate(page); - - if (ownpage) { - unlock_page(page); - page_cache_release(page); - *pagep = NULL; - } + unlock_page(page); + page_cache_release(page); + page = NULL; } -out: + *pagep = page; return status; } EXPORT_SYMBOL(block_write_begin_newtrunc); @@ -2379,17 +2373,6 @@ out: } EXPORT_SYMBOL(cont_write_begin); -int block_prepare_write(struct page *page, unsigned from, unsigned to, - get_block_t *get_block) -{ - struct inode *inode = page->mapping->host; - int err = __block_prepare_write(inode, page, from, to, get_block); - if (err) - ClearPageUptodate(page); - return err; -} -EXPORT_SYMBOL(block_prepare_write); - int block_commit_write(struct page *page, unsigned from, unsigned to) { struct inode *inode = page->mapping->host; diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 6b946bae11cf..764109886ec0 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -450,8 +450,7 @@ ino_t ext2_inode_by_name(struct inode *dir, struct qstr *child) static int ext2_prepare_chunk(struct page *page, loff_t pos, unsigned len) { - return block_write_begin_newtrunc(NULL, page->mapping, pos, len, 0, - &page, NULL, ext2_get_block); + return __block_write_begin(page, pos, len, ext2_get_block); } /* Releases the page */ diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index a66f3fe33672..5c6f07eefa4a 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1196,8 +1196,7 @@ retry: ret = PTR_ERR(handle); goto out; } - ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, - ext3_get_block); + ret = __block_write_begin(page, pos, len, ext3_get_block); if (ret) goto write_begin_failed; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d6a7701018a6..3da3c9646e5e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1578,11 +1578,9 @@ retry: *pagep = page; if (ext4_should_dioread_nolock(inode)) - ret = block_write_begin(file, mapping, pos, len, flags, pagep, - fsdata, ext4_get_block_write); + ret = __block_write_begin(page, pos, len, ext4_get_block_write); else - ret = block_write_begin(file, mapping, pos, len, flags, pagep, - fsdata, ext4_get_block); + ret = __block_write_begin(page, pos, len, ext4_get_block); if (!ret && ext4_should_journal_data(inode)) { ret = walk_page_buffers(handle, page_buffers(page), @@ -1593,7 +1591,7 @@ retry: unlock_page(page); page_cache_release(page); /* - * block_write_begin may have instantiated a few blocks + * __block_write_begin may have instantiated a few blocks * outside i_size. Trim these off again. Don't need * i_size_read because we hold i_mutex. * @@ -3185,8 +3183,7 @@ retry: } *pagep = page; - ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, - ext4_da_get_block_prep); + ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep); if (ret < 0) { unlock_page(page); ext4_journal_stop(handle); diff --git a/fs/minix/inode.c b/fs/minix/inode.c index f4abe45229bb..6b29e73f0ca6 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -359,8 +359,7 @@ static int minix_readpage(struct file *file, struct page *page) int minix_prepare_chunk(struct page *page, loff_t pos, unsigned len) { - return block_write_begin_newtrunc(NULL, page->mapping, pos, len, 0, - &page, NULL, minix_get_block); + return __block_write_begin(page, pos, len, minix_get_block); } static int minix_write_begin(struct file *file, struct address_space *mapping, diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index fc2bcfa599a3..d14e3b94d81f 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -83,8 +83,7 @@ static unsigned nilfs_last_byte(struct inode *inode, unsigned long page_nr) static int nilfs_prepare_chunk(struct page *page, unsigned from, unsigned to) { loff_t pos = page_offset(page) + from; - return block_write_begin_newtrunc(NULL, page->mapping, pos, to - from, - 0, &page, NULL, nilfs_get_block); + return __block_write_begin(page, pos, to - from, nilfs_get_block); } static void nilfs_commit_chunk(struct page *page, diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 4c1fb548ab64..045729f5674a 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -2585,8 +2585,7 @@ static int reiserfs_write_begin(struct file *file, old_ref = th->t_refcount; th->t_refcount++; } - ret = block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, - reiserfs_get_block); + ret = __block_write_begin(page, pos, len, reiserfs_get_block); if (ret && reiserfs_transaction_running(inode->i_sb)) { struct reiserfs_transaction_handle *th = current->journal_info; /* this gets a little ugly. If reiserfs_get_block returned an diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c index 4068f485cfd6..82a005c3d7eb 100644 --- a/fs/sysv/itree.c +++ b/fs/sysv/itree.c @@ -461,8 +461,7 @@ static int sysv_readpage(struct file *file, struct page *page) int sysv_prepare_chunk(struct page *page, loff_t pos, unsigned len) { - return block_write_begin_newtrunc(NULL, page->mapping, pos, len, 0, - &page, NULL, get_block); + return __block_write_begin(page, pos, len, get_block); } static int sysv_write_begin(struct file *file, struct address_space *mapping, diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index a9555b1ffd28..45ce32391f8f 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -560,8 +560,7 @@ static int ufs_readpage(struct file *file, struct page *page) int ufs_prepare_chunk(struct page *page, loff_t pos, unsigned len) { - return block_write_begin_newtrunc(NULL, page->mapping, pos, len, 0, - &page, NULL, ufs_getfrag_block); + return __block_write_begin(page, pos, len, ufs_getfrag_block); } static int ufs_write_begin(struct file *file, struct address_space *mapping, diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 7638647f0424..accc9f81bb63 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -209,6 +209,8 @@ int block_write_begin_newtrunc(struct file *, struct address_space *, int block_write_begin(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page **, void **, get_block_t*); +int __block_write_begin(struct page *page, loff_t pos, unsigned len, + get_block_t *get_block); int block_write_end(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page *, void *); -- cgit From 155130a4f7848b1aac439cab6bda1a175507c71c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 4 Jun 2010 11:29:58 +0200 Subject: get rid of block_write_begin_newtrunc Move the call to vmtruncate to get rid of accessive blocks to the callers in preparation of the new truncate sequence and rename the non-truncating version to block_write_begin. While we're at it also remove several unused arguments to block_write_begin. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/bfs/file.c | 14 ++++++++--- fs/block_dev.c | 5 ++-- fs/buffer.c | 61 +++++++-------------------------------------- fs/ext2/inode.c | 5 ++-- fs/minix/inode.c | 12 +++++++-- fs/nilfs2/inode.c | 12 ++++++--- fs/nilfs2/recovery.c | 11 +++++--- fs/omfs/file.c | 14 ++++++++--- fs/sysv/itree.c | 13 +++++++--- fs/udf/inode.c | 13 +++++++--- fs/ufs/inode.c | 12 +++++++-- fs/xfs/linux-2.6/xfs_aops.c | 14 ++++++++--- include/linux/buffer_head.h | 8 ++---- 13 files changed, 103 insertions(+), 91 deletions(-) (limited to 'include/linux/buffer_head.h') diff --git a/fs/bfs/file.c b/fs/bfs/file.c index 88b9a3ff44e4..8fc2e9c9739d 100644 --- a/fs/bfs/file.c +++ b/fs/bfs/file.c @@ -168,9 +168,17 @@ static int bfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { - *pagep = NULL; - return block_write_begin(file, mapping, pos, len, flags, - pagep, fsdata, bfs_get_block); + int ret; + + ret = block_write_begin(mapping, pos, len, flags, pagep, + bfs_get_block); + if (unlikely(ret)) { + loff_t isize = mapping->host->i_size; + if (pos + len > isize) + vmtruncate(mapping->host, isize); + } + + return ret; } static sector_t bfs_bmap(struct address_space *mapping, sector_t block) diff --git a/fs/block_dev.c b/fs/block_dev.c index 65a0c26508e5..63c9d6076205 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -308,9 +308,8 @@ static int blkdev_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { - *pagep = NULL; - return block_write_begin_newtrunc(file, mapping, pos, len, flags, - pagep, fsdata, blkdev_get_block); + return block_write_begin(mapping, pos, len, flags, pagep, + blkdev_get_block); } static int blkdev_write_end(struct file *file, struct address_space *mapping, diff --git a/fs/buffer.c b/fs/buffer.c index c319c49da511..50efa339e051 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1962,14 +1962,13 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len, EXPORT_SYMBOL(__block_write_begin); /* - * Filesystems implementing the new truncate sequence should use the - * _newtrunc postfix variant which won't incorrectly call vmtruncate. + * block_write_begin takes care of the basic task of block allocation and + * bringing partial write blocks uptodate first. + * * The filesystem needs to handle block truncation upon failure. */ -int block_write_begin_newtrunc(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata, - get_block_t *get_block) +int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, + unsigned flags, struct page **pagep, get_block_t *get_block) { pgoff_t index = pos >> PAGE_CACHE_SHIFT; struct page *page; @@ -1989,44 +1988,6 @@ int block_write_begin_newtrunc(struct file *file, struct address_space *mapping, *pagep = page; return status; } -EXPORT_SYMBOL(block_write_begin_newtrunc); - -/* - * block_write_begin takes care of the basic task of block allocation and - * bringing partial write blocks uptodate first. - * - * If *pagep is not NULL, then block_write_begin uses the locked page - * at *pagep rather than allocating its own. In this case, the page will - * not be unlocked or deallocated on failure. - */ -int block_write_begin(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned flags, - struct page **pagep, void **fsdata, - get_block_t *get_block) -{ - int ret; - - ret = block_write_begin_newtrunc(file, mapping, pos, len, flags, - pagep, fsdata, get_block); - - /* - * prepare_write() may have instantiated a few blocks - * outside i_size. Trim these off again. Don't need - * i_size_read because we hold i_mutex. - * - * Filesystems which pass down their own page also cannot - * call into vmtruncate here because it would lead to lock - * inversion problems (*pagep is locked). This is a further - * example of where the old truncate sequence is inadequate. - */ - if (unlikely(ret) && *pagep == NULL) { - loff_t isize = mapping->host->i_size; - if (pos + len > isize) - vmtruncate(mapping->host, isize); - } - - return ret; -} EXPORT_SYMBOL(block_write_begin); int block_write_end(struct file *file, struct address_space *mapping, @@ -2357,7 +2318,7 @@ int cont_write_begin(struct file *file, struct address_space *mapping, err = cont_expand_zero(file, mapping, pos, bytes); if (err) - goto out; + return err; zerofrom = *bytes & ~PAGE_CACHE_MASK; if (pos+len > *bytes && zerofrom & (blocksize-1)) { @@ -2365,11 +2326,7 @@ int cont_write_begin(struct file *file, struct address_space *mapping, (*bytes)++; } - *pagep = NULL; - err = block_write_begin_newtrunc(file, mapping, pos, len, - flags, pagep, fsdata, get_block); -out: - return err; + return block_write_begin(mapping, pos, len, flags, pagep, get_block); } EXPORT_SYMBOL(cont_write_begin); @@ -2511,8 +2468,8 @@ int nobh_write_begin(struct address_space *mapping, unlock_page(page); page_cache_release(page); *pagep = NULL; - return block_write_begin_newtrunc(NULL, mapping, pos, len, - flags, pagep, fsdata, get_block); + return block_write_begin(mapping, pos, len, flags, pagep, + get_block); } if (PageMappedToDisk(page)) diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 2f4dfbcd7696..74dfe5f73330 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -772,9 +772,8 @@ ext2_write_begin(struct file *file, struct address_space *mapping, { int ret; - *pagep = NULL; - ret = block_write_begin_newtrunc(file, mapping, pos, len, flags, - pagep, fsdata, ext2_get_block); + ret = block_write_begin(mapping, pos, len, flags, pagep, + ext2_get_block); if (ret < 0) ext2_write_failed(mapping, pos + len); return ret; diff --git a/fs/minix/inode.c b/fs/minix/inode.c index 6b29e73f0ca6..125062f55ef2 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -366,9 +366,17 @@ static int minix_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { - *pagep = NULL; - return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + int ret; + + ret = block_write_begin(mapping, pos, len, flags, pagep, minix_get_block); + if (unlikely(ret)) { + loff_t isize = mapping->host->i_size; + if (pos + len > isize) + vmtruncate(mapping->host, isize); + } + + return ret; } static sector_t minix_bmap(struct address_space *mapping, sector_t block) diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 1dd9e6a7d787..5c694ece172e 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -197,11 +197,15 @@ static int nilfs_write_begin(struct file *file, struct address_space *mapping, if (unlikely(err)) return err; - *pagep = NULL; - err = block_write_begin(file, mapping, pos, len, flags, pagep, - fsdata, nilfs_get_block); - if (unlikely(err)) + err = block_write_begin(mapping, pos, len, flags, pagep, + nilfs_get_block); + if (unlikely(err)) { + loff_t isize = mapping->host->i_size; + if (pos + len > isize) + vmtruncate(mapping->host, isize); + nilfs_transaction_abort(inode->i_sb); + } return err; } diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c index bae2a516b4ee..2f11f0868d87 100644 --- a/fs/nilfs2/recovery.c +++ b/fs/nilfs2/recovery.c @@ -505,11 +505,14 @@ static int recover_dsync_blocks(struct nilfs_sb_info *sbi, } pos = rb->blkoff << inode->i_blkbits; - page = NULL; - err = block_write_begin(NULL, inode->i_mapping, pos, blocksize, - 0, &page, NULL, nilfs_get_block); - if (unlikely(err)) + err = block_write_begin(inode->i_mapping, pos, blocksize, + 0, &page, nilfs_get_block); + if (unlikely(err)) { + loff_t isize = inode->i_size; + if (pos + blocksize > isize) + vmtruncate(inode, isize); goto failed_inode; + } err = nilfs_recovery_copy_block(sbi, rb, page); if (unlikely(err)) diff --git a/fs/omfs/file.c b/fs/omfs/file.c index 6e7a3291bbe8..810cff346468 100644 --- a/fs/omfs/file.c +++ b/fs/omfs/file.c @@ -312,9 +312,17 @@ static int omfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { - *pagep = NULL; - return block_write_begin(file, mapping, pos, len, flags, - pagep, fsdata, omfs_get_block); + int ret; + + ret = block_write_begin(mapping, pos, len, flags, pagep, + omfs_get_block); + if (unlikely(ret)) { + loff_t isize = mapping->host->i_size; + if (pos + len > isize) + vmtruncate(mapping->host, isize); + } + + return ret; } static sector_t omfs_bmap(struct address_space *mapping, sector_t block) diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c index 82a005c3d7eb..9ca66276315e 100644 --- a/fs/sysv/itree.c +++ b/fs/sysv/itree.c @@ -468,9 +468,16 @@ static int sysv_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { - *pagep = NULL; - return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, - get_block); + int ret; + + ret = block_write_begin(mapping, pos, len, flags, pagep, get_block); + if (unlikely(ret)) { + loff_t isize = mapping->host->i_size; + if (pos + len > isize) + vmtruncate(mapping->host, isize); + } + + return ret; } static sector_t sysv_bmap(struct address_space *mapping, sector_t block) diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 124852bcf6fe..ecddcc2ed746 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -127,9 +127,16 @@ static int udf_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { - *pagep = NULL; - return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, - udf_get_block); + int ret; + + ret = block_write_begin(mapping, pos, len, flags, pagep, udf_get_block); + if (unlikely(ret)) { + loff_t isize = mapping->host->i_size; + if (pos + len > isize) + vmtruncate(mapping->host, isize); + } + + return ret; } static sector_t udf_bmap(struct address_space *mapping, sector_t block) diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c index 45ce32391f8f..45cafa937a4b 100644 --- a/fs/ufs/inode.c +++ b/fs/ufs/inode.c @@ -567,9 +567,17 @@ static int ufs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) { - *pagep = NULL; - return block_write_begin(file, mapping, pos, len, flags, pagep, fsdata, + int ret; + + ret = block_write_begin(mapping, pos, len, flags, pagep, ufs_getfrag_block); + if (unlikely(ret)) { + loff_t isize = mapping->host->i_size; + if (pos + len > isize) + vmtruncate(mapping->host, isize); + } + + return ret; } static sector_t ufs_bmap(struct address_space *mapping, sector_t block) diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 7968d41e27ad..bf7aad0d78b8 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -1504,9 +1504,17 @@ xfs_vm_write_begin( struct page **pagep, void **fsdata) { - *pagep = NULL; - return block_write_begin(file, mapping, pos, len, flags | AOP_FLAG_NOFS, - pagep, fsdata, xfs_get_blocks); + int ret; + + ret = block_write_begin(mapping, pos, len, flags | AOP_FLAG_NOFS, + pagep, xfs_get_blocks); + if (unlikely(ret)) { + loff_t isize = mapping->host->i_size; + if (pos + len > isize) + vmtruncate(mapping->host, isize); + } + + return ret; } STATIC sector_t diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index accc9f81bb63..3f69054f86d9 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -203,12 +203,8 @@ int block_write_full_page_endio(struct page *page, get_block_t *get_block, int block_read_full_page(struct page*, get_block_t*); int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, unsigned long from); -int block_write_begin_newtrunc(struct file *, struct address_space *, - loff_t, unsigned, unsigned, - struct page **, void **, get_block_t*); -int block_write_begin(struct file *, struct address_space *, - loff_t, unsigned, unsigned, - struct page **, void **, get_block_t*); +int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, + unsigned flags, struct page **pagep, get_block_t *get_block); int __block_write_begin(struct page *page, loff_t pos, unsigned len, get_block_t *get_block); int block_write_end(struct file *, struct address_space *, -- cgit From b5fc510c48f631882ccec3c0f02a25d5b67de09f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 4 Jul 2010 12:24:09 +0400 Subject: get rid of file_fsync() Copy and simplify in the only two users remaining. Signed-off-by: Al Viro --- fs/hfs/inode.c | 26 +++++++++++++++++++++++++- fs/hfsplus/hfsplus_fs.h | 1 + fs/hfsplus/inode.c | 27 ++++++++++++++++++++++++++- fs/hfsplus/super.c | 2 +- fs/sync.c | 25 ------------------------- include/linux/buffer_head.h | 1 - 6 files changed, 53 insertions(+), 29 deletions(-) (limited to 'include/linux/buffer_head.h') diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 87de671baa83..93ceec8fbb8f 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -625,6 +625,30 @@ int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr) return 0; } +static int hfs_file_fsync(struct file *filp, int datasync) +{ + struct inode *inode = filp->f_mapping->host; + struct super_block * sb; + int ret, err; + + /* sync the inode to buffers */ + ret = write_inode_now(inode, 0); + + /* sync the superblock to buffers */ + sb = inode->i_sb; + if (sb->s_dirt) { + lock_super(sb); + sb->s_dirt = 0; + if (!(sb->s_flags & MS_RDONLY)) + hfs_mdb_commit(sb); + unlock_super(sb); + } + /* .. finally sync the buffers to disk */ + err = sync_blockdev(sb->s_bdev); + if (!ret) + ret = err; + return ret; +} static const struct file_operations hfs_file_operations = { .llseek = generic_file_llseek, @@ -634,7 +658,7 @@ static const struct file_operations hfs_file_operations = { .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, .splice_read = generic_file_splice_read, - .fsync = file_fsync, + .fsync = hfs_file_fsync, .open = hfs_file_open, .release = hfs_file_release, }; diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index 6505c30ad965..dc856be3c2b0 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -351,6 +351,7 @@ int hfsplus_show_options(struct seq_file *, struct vfsmount *); /* super.c */ struct inode *hfsplus_iget(struct super_block *, unsigned long); +int hfsplus_sync_fs(struct super_block *sb, int wait); /* tables.c */ extern u16 hfsplus_case_fold_table[]; diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 654c5a8ddf1c..c5a979d62c65 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -311,6 +311,31 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr) return 0; } +static int hfsplus_file_fsync(struct file *filp, int datasync) +{ + struct inode *inode = filp->f_mapping->host; + struct super_block * sb; + int ret, err; + + /* sync the inode to buffers */ + ret = write_inode_now(inode, 0); + + /* sync the superblock to buffers */ + sb = inode->i_sb; + if (sb->s_dirt) { + if (!(sb->s_flags & MS_RDONLY)) + hfsplus_sync_fs(sb, 1); + else + sb->s_dirt = 0; + } + + /* .. finally sync the buffers to disk */ + err = sync_blockdev(sb->s_bdev); + if (!ret) + ret = err; + return ret; +} + static const struct inode_operations hfsplus_file_inode_operations = { .lookup = hfsplus_file_lookup, .truncate = hfsplus_file_truncate, @@ -328,7 +353,7 @@ static const struct file_operations hfsplus_file_operations = { .aio_write = generic_file_aio_write, .mmap = generic_file_mmap, .splice_read = generic_file_splice_read, - .fsync = file_fsync, + .fsync = hfsplus_file_fsync, .open = hfsplus_file_open, .release = hfsplus_file_release, .unlocked_ioctl = hfsplus_ioctl, diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 74b473a8ef92..a32c241e4e45 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -154,7 +154,7 @@ static void hfsplus_clear_inode(struct inode *inode) } } -static int hfsplus_sync_fs(struct super_block *sb, int wait) +int hfsplus_sync_fs(struct super_block *sb, int wait) { struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr; diff --git a/fs/sync.c b/fs/sync.c index 15aa6f03b2da..ba76b9623e7e 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -128,31 +128,6 @@ void emergency_sync(void) } } -/* - * Generic function to fsync a file. - */ -int file_fsync(struct file *filp, int datasync) -{ - struct inode *inode = filp->f_mapping->host; - struct super_block * sb; - int ret, err; - - /* sync the inode to buffers */ - ret = write_inode_now(inode, 0); - - /* sync the superblock to buffers */ - sb = inode->i_sb; - if (sb->s_dirt && sb->s_op->write_super) - sb->s_op->write_super(sb); - - /* .. finally sync the buffers to disk */ - err = sync_blockdev(sb->s_bdev); - if (!ret) - ret = err; - return ret; -} -EXPORT_SYMBOL(file_fsync); - /** * vfs_fsync_range - helper to sync a range of data & metadata to disk * @file: file to sync diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 3f69054f86d9..620f1d1088cb 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -225,7 +225,6 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, void block_sync_page(struct page *); sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); int block_truncate_page(struct address_space *, loff_t, get_block_t *); -int file_fsync(struct file *, int); int nobh_write_begin(struct address_space *, loff_t, unsigned, unsigned, struct page **, void **, get_block_t*); int nobh_write_end(struct file *, struct address_space *, -- cgit From 87e99511ea54510ffb60b98001d108794d5037f8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 11 Aug 2010 17:05:45 +0200 Subject: kill BH_Ordered flag Instead of abusing a buffer_head flag just add a variant of sync_dirty_buffer which allows passing the exact type of write flag required. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/buffer.c | 17 ++++++++-------- fs/jbd/commit.c | 49 +++++++++++++++++++++++---------------------- fs/jbd2/commit.c | 39 ++++++++++++++---------------------- fs/nilfs2/super.c | 28 +++++++++++++------------- include/linux/buffer_head.h | 3 +-- 5 files changed, 63 insertions(+), 73 deletions(-) (limited to 'include/linux/buffer_head.h') diff --git a/fs/buffer.c b/fs/buffer.c index 50efa339e051..6c8ad977f3d4 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2911,13 +2911,6 @@ int submit_bh(int rw, struct buffer_head * bh) BUG_ON(buffer_delay(bh)); BUG_ON(buffer_unwritten(bh)); - /* - * Mask in barrier bit for a write (could be either a WRITE or a - * WRITE_SYNC - */ - if (buffer_ordered(bh) && (rw & WRITE)) - rw |= WRITE_BARRIER; - /* * Only clear out a write error when rewriting */ @@ -3021,7 +3014,7 @@ EXPORT_SYMBOL(ll_rw_block); * and then start new I/O and then wait upon it. The caller must have a ref on * the buffer_head. */ -int sync_dirty_buffer(struct buffer_head *bh) +int __sync_dirty_buffer(struct buffer_head *bh, int rw) { int ret = 0; @@ -3030,7 +3023,7 @@ int sync_dirty_buffer(struct buffer_head *bh) if (test_clear_buffer_dirty(bh)) { get_bh(bh); bh->b_end_io = end_buffer_write_sync; - ret = submit_bh(WRITE_SYNC, bh); + ret = submit_bh(rw, bh); wait_on_buffer(bh); if (buffer_eopnotsupp(bh)) { clear_buffer_eopnotsupp(bh); @@ -3043,6 +3036,12 @@ int sync_dirty_buffer(struct buffer_head *bh) } return ret; } +EXPORT_SYMBOL(__sync_dirty_buffer); + +int sync_dirty_buffer(struct buffer_head *bh) +{ + return __sync_dirty_buffer(bh, WRITE_SYNC); +} EXPORT_SYMBOL(sync_dirty_buffer); /* diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 28a9ddaa0c49..95d8c11c929e 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -119,7 +119,6 @@ static int journal_write_commit_record(journal_t *journal, struct buffer_head *bh; journal_header_t *header; int ret; - int barrier_done = 0; if (is_journal_aborted(journal)) return 0; @@ -137,34 +136,36 @@ static int journal_write_commit_record(journal_t *journal, JBUFFER_TRACE(descriptor, "write commit block"); set_buffer_dirty(bh); + if (journal->j_flags & JFS_BARRIER) { - set_buffer_ordered(bh); - barrier_done = 1; - } - ret = sync_dirty_buffer(bh); - if (barrier_done) - clear_buffer_ordered(bh); - /* is it possible for another commit to fail at roughly - * the same time as this one? If so, we don't want to - * trust the barrier flag in the super, but instead want - * to remember if we sent a barrier request - */ - if (ret == -EOPNOTSUPP && barrier_done) { - char b[BDEVNAME_SIZE]; + ret = __sync_dirty_buffer(bh, WRITE_SYNC | WRITE_BARRIER); - printk(KERN_WARNING - "JBD: barrier-based sync failed on %s - " - "disabling barriers\n", - bdevname(journal->j_dev, b)); - spin_lock(&journal->j_state_lock); - journal->j_flags &= ~JFS_BARRIER; - spin_unlock(&journal->j_state_lock); + /* + * Is it possible for another commit to fail at roughly + * the same time as this one? If so, we don't want to + * trust the barrier flag in the super, but instead want + * to remember if we sent a barrier request + */ + if (ret == -EOPNOTSUPP) { + char b[BDEVNAME_SIZE]; - /* And try again, without the barrier */ - set_buffer_uptodate(bh); - set_buffer_dirty(bh); + printk(KERN_WARNING + "JBD: barrier-based sync failed on %s - " + "disabling barriers\n", + bdevname(journal->j_dev, b)); + spin_lock(&journal->j_state_lock); + journal->j_flags &= ~JFS_BARRIER; + spin_unlock(&journal->j_state_lock); + + /* And try again, without the barrier */ + set_buffer_uptodate(bh); + set_buffer_dirty(bh); + ret = sync_dirty_buffer(bh); + } + } else { ret = sync_dirty_buffer(bh); } + put_bh(bh); /* One for getblk() */ journal_put_journal_head(descriptor); diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index f52e5e8049f1..7c068c189d80 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -101,7 +101,6 @@ static int journal_submit_commit_record(journal_t *journal, struct commit_header *tmp; struct buffer_head *bh; int ret; - int barrier_done = 0; struct timespec now = current_kernel_time(); if (is_journal_aborted(journal)) @@ -136,30 +135,22 @@ static int journal_submit_commit_record(journal_t *journal, if (journal->j_flags & JBD2_BARRIER && !JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) { - set_buffer_ordered(bh); - barrier_done = 1; - } - ret = submit_bh(WRITE_SYNC_PLUG, bh); - if (barrier_done) - clear_buffer_ordered(bh); - - /* is it possible for another commit to fail at roughly - * the same time as this one? If so, we don't want to - * trust the barrier flag in the super, but instead want - * to remember if we sent a barrier request - */ - if (ret == -EOPNOTSUPP && barrier_done) { - printk(KERN_WARNING - "JBD2: Disabling barriers on %s, " - "not supported by device\n", journal->j_devname); - write_lock(&journal->j_state_lock); - journal->j_flags &= ~JBD2_BARRIER; - write_unlock(&journal->j_state_lock); + ret = submit_bh(WRITE_SYNC_PLUG | WRITE_BARRIER, bh); + if (ret == -EOPNOTSUPP) { + printk(KERN_WARNING + "JBD2: Disabling barriers on %s, " + "not supported by device\n", journal->j_devname); + write_lock(&journal->j_state_lock); + journal->j_flags &= ~JBD2_BARRIER; + write_unlock(&journal->j_state_lock); - /* And try again, without the barrier */ - lock_buffer(bh); - set_buffer_uptodate(bh); - clear_buffer_dirty(bh); + /* And try again, without the barrier */ + lock_buffer(bh); + set_buffer_uptodate(bh); + clear_buffer_dirty(bh); + ret = submit_bh(WRITE_SYNC_PLUG, bh); + } + } else { ret = submit_bh(WRITE_SYNC_PLUG, bh); } *cbh = bh; diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c index 1fa86b9df73b..68345430fb48 100644 --- a/fs/nilfs2/super.c +++ b/fs/nilfs2/super.c @@ -175,24 +175,24 @@ static int nilfs_sync_super(struct nilfs_sb_info *sbi, int flag) { struct the_nilfs *nilfs = sbi->s_nilfs; int err; - int barrier_done = 0; - if (nilfs_test_opt(sbi, BARRIER)) { - set_buffer_ordered(nilfs->ns_sbh[0]); - barrier_done = 1; - } retry: set_buffer_dirty(nilfs->ns_sbh[0]); - err = sync_dirty_buffer(nilfs->ns_sbh[0]); - if (err == -EOPNOTSUPP && barrier_done) { - nilfs_warning(sbi->s_super, __func__, - "barrier-based sync failed. " - "disabling barriers\n"); - nilfs_clear_opt(sbi, BARRIER); - barrier_done = 0; - clear_buffer_ordered(nilfs->ns_sbh[0]); - goto retry; + + if (nilfs_test_opt(sbi, BARRIER)) { + err = __sync_dirty_buffer(nilfs->ns_sbh[0], + WRITE_SYNC | WRITE_BARRIER); + if (err == -EOPNOTSUPP) { + nilfs_warning(sbi->s_super, __func__, + "barrier-based sync failed. " + "disabling barriers\n"); + nilfs_clear_opt(sbi, BARRIER); + goto retry; + } + } else { + err = sync_dirty_buffer(nilfs->ns_sbh[0]); } + if (unlikely(err)) { printk(KERN_ERR "NILFS: unable to write superblock (err=%d)\n", err); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 43e649a72529..72c1cf83eb85 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -32,7 +32,6 @@ enum bh_state_bits { BH_Delay, /* Buffer is not yet allocated on disk */ BH_Boundary, /* Block is followed by a discontiguity */ BH_Write_EIO, /* I/O error on write */ - BH_Ordered, /* ordered write */ BH_Eopnotsupp, /* operation not supported (barrier) */ BH_Unwritten, /* Buffer is allocated on disk but not written */ BH_Quiet, /* Buffer Error Prinks to be quiet */ @@ -125,7 +124,6 @@ BUFFER_FNS(Async_Write, async_write) BUFFER_FNS(Delay, delay) BUFFER_FNS(Boundary, boundary) BUFFER_FNS(Write_EIO, write_io_error) -BUFFER_FNS(Ordered, ordered) BUFFER_FNS(Eopnotsupp, eopnotsupp) BUFFER_FNS(Unwritten, unwritten) @@ -183,6 +181,7 @@ void unlock_buffer(struct buffer_head *bh); void __lock_buffer(struct buffer_head *bh); void ll_rw_block(int, int, struct buffer_head * bh[]); int sync_dirty_buffer(struct buffer_head *bh); +int __sync_dirty_buffer(struct buffer_head *bh, int rw); int submit_bh(int, struct buffer_head *); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); -- cgit From 9cb569d601e0b93e01c20a22872270ec663b75f6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 11 Aug 2010 17:06:24 +0200 Subject: remove SWRITE* I/O types These flags aren't real I/O types, but tell ll_rw_block to always lock the buffer instead of giving up on a failed trylock. Instead add a new write_dirty_buffer helper that implements this semantic and use it from the existing SWRITE* callers. Note that the ll_rw_block code had a bug where it didn't promote WRITE_SYNC_PLUG properly, which this patch fixes. In the ufs code clean up the helper that used to call ll_rw_block to mirror sync_dirty_buffer, which is the function it implements for compound buffers. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/buffer.c | 52 +++++++++++++++++++++++++-------------------- fs/fat/misc.c | 4 +++- fs/jbd/checkpoint.c | 4 +++- fs/jbd/journal.c | 2 +- fs/jbd/revoke.c | 2 +- fs/jbd2/checkpoint.c | 4 +++- fs/jbd2/journal.c | 2 +- fs/jbd2/revoke.c | 2 +- fs/reiserfs/journal.c | 2 +- fs/ufs/balloc.c | 24 +++++++-------------- fs/ufs/ialloc.c | 18 ++++++---------- fs/ufs/truncate.c | 18 ++++++---------- fs/ufs/util.c | 20 +++++++---------- fs/ufs/util.h | 3 +-- include/linux/buffer_head.h | 1 + include/linux/fs.h | 9 -------- 16 files changed, 73 insertions(+), 94 deletions(-) (limited to 'include/linux/buffer_head.h') diff --git a/fs/buffer.c b/fs/buffer.c index 6c8ad977f3d4..3e7dca279d1c 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -770,11 +770,12 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) spin_unlock(lock); /* * Ensure any pending I/O completes so that - * ll_rw_block() actually writes the current - * contents - it is a noop if I/O is still in - * flight on potentially older contents. + * write_dirty_buffer() actually writes the + * current contents - it is a noop if I/O is + * still in flight on potentially older + * contents. */ - ll_rw_block(SWRITE_SYNC_PLUG, 1, &bh); + write_dirty_buffer(bh, WRITE_SYNC_PLUG); /* * Kick off IO for the previous mapping. Note @@ -2949,22 +2950,21 @@ EXPORT_SYMBOL(submit_bh); /** * ll_rw_block: low-level access to block devices (DEPRECATED) - * @rw: whether to %READ or %WRITE or %SWRITE or maybe %READA (readahead) + * @rw: whether to %READ or %WRITE or maybe %READA (readahead) * @nr: number of &struct buffer_heads in the array * @bhs: array of pointers to &struct buffer_head * * ll_rw_block() takes an array of pointers to &struct buffer_heads, and * requests an I/O operation on them, either a %READ or a %WRITE. The third - * %SWRITE is like %WRITE only we make sure that the *current* data in buffers - * are sent to disk. The fourth %READA option is described in the documentation - * for generic_make_request() which ll_rw_block() calls. + * %READA option is described in the documentation for generic_make_request() + * which ll_rw_block() calls. * * This function drops any buffer that it cannot get a lock on (with the - * BH_Lock state bit) unless SWRITE is required, any buffer that appears to be - * clean when doing a write request, and any buffer that appears to be - * up-to-date when doing read request. Further it marks as clean buffers that - * are processed for writing (the buffer cache won't assume that they are - * actually clean until the buffer gets unlocked). + * BH_Lock state bit), any buffer that appears to be clean when doing a write + * request, and any buffer that appears to be up-to-date when doing read + * request. Further it marks as clean buffers that are processed for + * writing (the buffer cache won't assume that they are actually clean + * until the buffer gets unlocked). * * ll_rw_block sets b_end_io to simple completion handler that marks * the buffer up-to-date (if approriate), unlocks the buffer and wakes @@ -2980,20 +2980,13 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) for (i = 0; i < nr; i++) { struct buffer_head *bh = bhs[i]; - if (rw == SWRITE || rw == SWRITE_SYNC || rw == SWRITE_SYNC_PLUG) - lock_buffer(bh); - else if (!trylock_buffer(bh)) + if (!trylock_buffer(bh)) continue; - - if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC || - rw == SWRITE_SYNC_PLUG) { + if (rw == WRITE) { if (test_clear_buffer_dirty(bh)) { bh->b_end_io = end_buffer_write_sync; get_bh(bh); - if (rw == SWRITE_SYNC) - submit_bh(WRITE_SYNC, bh); - else - submit_bh(WRITE, bh); + submit_bh(WRITE, bh); continue; } } else { @@ -3009,6 +3002,19 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[]) } EXPORT_SYMBOL(ll_rw_block); +void write_dirty_buffer(struct buffer_head *bh, int rw) +{ + lock_buffer(bh); + if (!test_clear_buffer_dirty(bh)) { + unlock_buffer(bh); + return; + } + bh->b_end_io = end_buffer_write_sync; + get_bh(bh); + submit_bh(rw, bh); +} +EXPORT_SYMBOL(write_dirty_buffer); + /* * For a data-integrity writeout, we need to wait upon any in-progress I/O * and then start new I/O and then wait upon it. The caller must have a ref on diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 1fa23f6ffba5..1736f2356388 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -250,7 +250,9 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs) { int i, err = 0; - ll_rw_block(SWRITE, nr_bhs, bhs); + for (i = 0; i < nr_bhs; i++) + write_dirty_buffer(bhs[i], WRITE); + for (i = 0; i < nr_bhs; i++) { wait_on_buffer(bhs[i]); if (buffer_eopnotsupp(bhs[i])) { diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index b0435dd0654d..05a38b9c4c0e 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -254,7 +254,9 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) { int i; - ll_rw_block(SWRITE, *batch_count, bhs); + for (i = 0; i < *batch_count; i++) + write_dirty_buffer(bhs[i], WRITE); + for (i = 0; i < *batch_count; i++) { struct buffer_head *bh = bhs[i]; clear_buffer_jwrite(bh); diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index f19ce94693d8..2c4b1f109da9 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -1024,7 +1024,7 @@ void journal_update_superblock(journal_t *journal, int wait) if (wait) sync_dirty_buffer(bh); else - ll_rw_block(SWRITE, 1, &bh); + write_dirty_buffer(bh, WRITE); out: /* If we have just flushed the log (by marking s_start==0), then diff --git a/fs/jbd/revoke.c b/fs/jbd/revoke.c index ad717328343a..d29018307e2e 100644 --- a/fs/jbd/revoke.c +++ b/fs/jbd/revoke.c @@ -617,7 +617,7 @@ static void flush_descriptor(journal_t *journal, set_buffer_jwrite(bh); BUFFER_TRACE(bh, "write"); set_buffer_dirty(bh); - ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh); + write_dirty_buffer(bh, write_op); } #endif diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 1c23a0f4e8a3..5247e7ffdcb4 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -255,7 +255,9 @@ __flush_batch(journal_t *journal, int *batch_count) { int i; - ll_rw_block(SWRITE, *batch_count, journal->j_chkpt_bhs); + for (i = 0; i < *batch_count; i++) + write_dirty_buffer(journal->j_chkpt_bhs[i], WRITE); + for (i = 0; i < *batch_count; i++) { struct buffer_head *bh = journal->j_chkpt_bhs[i]; clear_buffer_jwrite(bh); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index ad5866aaf0f9..0e8014ea6b94 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1124,7 +1124,7 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) set_buffer_uptodate(bh); } } else - ll_rw_block(SWRITE, 1, &bh); + write_dirty_buffer(bh, WRITE); out: /* If we have just flushed the log (by marking s_start==0), then diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index a360b06af2e3..9ad321fd63fd 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -625,7 +625,7 @@ static void flush_descriptor(journal_t *journal, set_buffer_jwrite(bh); BUFFER_TRACE(bh, "write"); set_buffer_dirty(bh); - ll_rw_block((write_op == WRITE) ? SWRITE : SWRITE_SYNC_PLUG, 1, &bh); + write_dirty_buffer(bh, write_op); } #endif diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 1ec952b1f036..812e2c05aa29 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -2311,7 +2311,7 @@ static int journal_read_transaction(struct super_block *sb, /* flush out the real blocks */ for (i = 0; i < get_desc_trans_len(desc); i++) { set_buffer_dirty(real_blocks[i]); - ll_rw_block(SWRITE, 1, real_blocks + i); + write_dirty_buffer(real_blocks[i], WRITE); } for (i = 0; i < get_desc_trans_len(desc); i++) { wait_on_buffer(real_blocks[i]); diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 048484fb10d2..46f7a807bbc1 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -114,10 +114,8 @@ void ufs_free_fragments(struct inode *inode, u64 fragment, unsigned count) ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; unlock_super (sb); @@ -207,10 +205,8 @@ do_more: ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); if (overflow) { fragment += count; @@ -558,10 +554,8 @@ static u64 ufs_add_fragments(struct inode *inode, u64 fragment, ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; UFSD("EXIT, fragment %llu\n", (unsigned long long)fragment); @@ -680,10 +674,8 @@ cg_found: succed: ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; result += cgno * uspi->s_fpg; diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c index 428017e018fe..2eabf04af3de 100644 --- a/fs/ufs/ialloc.c +++ b/fs/ufs/ialloc.c @@ -113,10 +113,8 @@ void ufs_free_inode (struct inode * inode) ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; unlock_super (sb); @@ -156,10 +154,8 @@ static void ufs2_init_inodes_chunk(struct super_block *sb, fs32_add(sb, &ucg->cg_u.cg_u2.cg_initediblk, uspi->s_inopb); ubh_mark_buffer_dirty(UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer(UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); UFSD("EXIT\n"); } @@ -290,10 +286,8 @@ cg_found: } ubh_mark_buffer_dirty (USPI_UBH(uspi)); ubh_mark_buffer_dirty (UCPI_UBH(ucpi)); - if (sb->s_flags & MS_SYNCHRONOUS) { - ubh_ll_rw_block(SWRITE, UCPI_UBH(ucpi)); - ubh_wait_on_buffer (UCPI_UBH(ucpi)); - } + if (sb->s_flags & MS_SYNCHRONOUS) + ubh_sync_block(UCPI_UBH(ucpi)); sb->s_dirt = 1; inode->i_ino = cg * uspi->s_ipg + bit; diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c index 34d5cb135320..a58f9155fc9a 100644 --- a/fs/ufs/truncate.c +++ b/fs/ufs/truncate.c @@ -243,10 +243,8 @@ static int ufs_trunc_indirect(struct inode *inode, u64 offset, void *p) ubh_bforget(ind_ubh); ind_ubh = NULL; } - if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) { - ubh_ll_rw_block(SWRITE, ind_ubh); - ubh_wait_on_buffer (ind_ubh); - } + if (IS_SYNC(inode) && ind_ubh && ubh_buffer_dirty(ind_ubh)) + ubh_sync_block(ind_ubh); ubh_brelse (ind_ubh); UFSD("EXIT: ino %lu\n", inode->i_ino); @@ -307,10 +305,8 @@ static int ufs_trunc_dindirect(struct inode *inode, u64 offset, void *p) ubh_bforget(dind_bh); dind_bh = NULL; } - if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) { - ubh_ll_rw_block(SWRITE, dind_bh); - ubh_wait_on_buffer (dind_bh); - } + if (IS_SYNC(inode) && dind_bh && ubh_buffer_dirty(dind_bh)) + ubh_sync_block(dind_bh); ubh_brelse (dind_bh); UFSD("EXIT: ino %lu\n", inode->i_ino); @@ -367,10 +363,8 @@ static int ufs_trunc_tindirect(struct inode *inode) ubh_bforget(tind_bh); tind_bh = NULL; } - if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) { - ubh_ll_rw_block(SWRITE, tind_bh); - ubh_wait_on_buffer (tind_bh); - } + if (IS_SYNC(inode) && tind_bh && ubh_buffer_dirty(tind_bh)) + ubh_sync_block(tind_bh); ubh_brelse (tind_bh); UFSD("EXIT: ino %lu\n", inode->i_ino); diff --git a/fs/ufs/util.c b/fs/ufs/util.c index 85a7fc9e4a4e..d2c36d53fe66 100644 --- a/fs/ufs/util.c +++ b/fs/ufs/util.c @@ -113,21 +113,17 @@ void ubh_mark_buffer_uptodate (struct ufs_buffer_head * ubh, int flag) } } -void ubh_ll_rw_block(int rw, struct ufs_buffer_head *ubh) +void ubh_sync_block(struct ufs_buffer_head *ubh) { - if (!ubh) - return; + if (ubh) { + unsigned i; - ll_rw_block(rw, ubh->count, ubh->bh); -} + for (i = 0; i < ubh->count; i++) + write_dirty_buffer(ubh->bh[i], WRITE); -void ubh_wait_on_buffer (struct ufs_buffer_head * ubh) -{ - unsigned i; - if (!ubh) - return; - for ( i = 0; i < ubh->count; i++ ) - wait_on_buffer (ubh->bh[i]); + for (i = 0; i < ubh->count; i++) + wait_on_buffer(ubh->bh[i]); + } } void ubh_bforget (struct ufs_buffer_head * ubh) diff --git a/fs/ufs/util.h b/fs/ufs/util.h index 0466036912f1..9f8775ce381c 100644 --- a/fs/ufs/util.h +++ b/fs/ufs/util.h @@ -269,8 +269,7 @@ extern void ubh_brelse (struct ufs_buffer_head *); extern void ubh_brelse_uspi (struct ufs_sb_private_info *); extern void ubh_mark_buffer_dirty (struct ufs_buffer_head *); extern void ubh_mark_buffer_uptodate (struct ufs_buffer_head *, int); -extern void ubh_ll_rw_block(int, struct ufs_buffer_head *); -extern void ubh_wait_on_buffer (struct ufs_buffer_head *); +extern void ubh_sync_block(struct ufs_buffer_head *); extern void ubh_bforget (struct ufs_buffer_head *); extern int ubh_buffer_dirty (struct ufs_buffer_head *); #define ubh_ubhcpymem(mem,ubh,size) _ubh_ubhcpymem_(uspi,mem,ubh,size) diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 72c1cf83eb85..ec94c12f21da 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -182,6 +182,7 @@ void __lock_buffer(struct buffer_head *bh); void ll_rw_block(int, int, struct buffer_head * bh[]); int sync_dirty_buffer(struct buffer_head *bh); int __sync_dirty_buffer(struct buffer_head *bh, int rw); +void write_dirty_buffer(struct buffer_head *bh, int rw); int submit_bh(int, struct buffer_head *); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); diff --git a/include/linux/fs.h b/include/linux/fs.h index 9a96b4d83fc1..29f7c975304c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -125,9 +125,6 @@ struct inodes_stat_t { * block layer could (in theory) choose to ignore this * request if it runs into resource problems. * WRITE A normal async write. Device will be plugged. - * SWRITE Like WRITE, but a special case for ll_rw_block() that - * tells it to lock the buffer first. Normally a buffer - * must be locked before doing IO. * WRITE_SYNC_PLUG Synchronous write. Identical to WRITE, but passes down * the hint that someone will be waiting on this IO * shortly. The device must still be unplugged explicitly, @@ -138,9 +135,6 @@ struct inodes_stat_t { * immediately after submission. The write equivalent * of READ_SYNC. * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only. - * SWRITE_SYNC - * SWRITE_SYNC_PLUG Like WRITE_SYNC/WRITE_SYNC_PLUG, but locks the buffer. - * See SWRITE. * WRITE_BARRIER Like WRITE_SYNC, but tells the block layer that all * previously submitted writes must be safely on storage * before this one is started. Also guarantees that when @@ -155,7 +149,6 @@ struct inodes_stat_t { #define READ 0 #define WRITE RW_MASK #define READA RWA_MASK -#define SWRITE (WRITE | READA) #define READ_SYNC (READ | REQ_SYNC | REQ_UNPLUG) #define READ_META (READ | REQ_META) @@ -165,8 +158,6 @@ struct inodes_stat_t { #define WRITE_META (WRITE | REQ_META) #define WRITE_BARRIER (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ REQ_HARDBARRIER) -#define SWRITE_SYNC_PLUG (SWRITE | REQ_SYNC | REQ_NOIDLE) -#define SWRITE_SYNC (SWRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG) /* * These aren't really reads or writes, they pass down information about -- cgit From 4fed947cb311e5aa51781d316cefca836352f6ce Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:17 +0200 Subject: block: implement REQ_FLUSH/FUA based interface for FLUSH/FUA requests Now that the backend conversion is complete, export sequenced FLUSH/FUA capability through REQ_FLUSH/FUA flags. REQ_FLUSH means the device cache should be flushed before executing the request. REQ_FUA means that the data in the request should be on non-volatile media on completion. Block layer will choose the correct way of implementing the semantics and execute it. The request may be passed to the device directly if the device can handle it; otherwise, it will be sequenced using one or more proxy requests. Devices will never see REQ_FLUSH and/or FUA which it doesn't support. Also, unlike the original REQ_HARDBARRIER, REQ_FLUSH/FUA requests are never failed with -EOPNOTSUPP. If the underlying device doesn't support FLUSH/FUA, the block layer simply make those noop. IOW, it no longer distinguishes between writeback cache which doesn't support cache flush and writethrough/no cache. Devices which have WB cache w/o flush are very difficult to come by these days and there's nothing much we can do anyway, so it doesn't make sense to require everyone to implement -EOPNOTSUPP handling. This will simplify filesystems and block drivers as they can drop -EOPNOTSUPP retry logic for barriers. * QUEUE_ORDERED_* are removed and QUEUE_FSEQ_* are moved into blk-flush.c. * REQ_FLUSH w/o data can also be directly passed to drivers without sequencing but some drivers assume that zero length requests don't have rq->bio which isn't true for these requests requiring the use of proxy requests. * REQ_COMMON_MASK now includes REQ_FLUSH | REQ_FUA so that they are copied from bio to request. * WRITE_BARRIER is marked deprecated and WRITE_FLUSH, WRITE_FUA and WRITE_FLUSH_FUA are added. Signed-off-by: Tejun Heo Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- block/blk-flush.c | 85 ++++++++++++++++++++++++--------------------- block/blk.h | 3 ++ include/linux/blk_types.h | 2 +- include/linux/blkdev.h | 38 ++------------------ include/linux/buffer_head.h | 2 +- include/linux/fs.h | 19 ++++++---- 7 files changed, 67 insertions(+), 84 deletions(-) (limited to 'include/linux/buffer_head.h') diff --git a/block/blk-core.c b/block/blk-core.c index 8870ae40179d..18455c4f618a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1204,7 +1204,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) spin_lock_irq(q->queue_lock); - if (bio->bi_rw & REQ_HARDBARRIER) { + if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) { where = ELEVATOR_INSERT_FRONT; goto get_rq; } diff --git a/block/blk-flush.c b/block/blk-flush.c index dd873225da97..452c552e9ead 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -1,5 +1,5 @@ /* - * Functions related to barrier IO handling + * Functions to sequence FLUSH and FUA writes. */ #include #include @@ -9,6 +9,15 @@ #include "blk.h" +/* FLUSH/FUA sequences */ +enum { + QUEUE_FSEQ_STARTED = (1 << 0), /* flushing in progress */ + QUEUE_FSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ + QUEUE_FSEQ_DATA = (1 << 2), /* data write in progress */ + QUEUE_FSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ + QUEUE_FSEQ_DONE = (1 << 4), +}; + static struct request *queue_next_fseq(struct request_queue *q); unsigned blk_flush_cur_seq(struct request_queue *q) @@ -79,6 +88,7 @@ static void queue_flush(struct request_queue *q, struct request *rq, static struct request *queue_next_fseq(struct request_queue *q) { + struct request *orig_rq = q->orig_flush_rq; struct request *rq = &q->flush_rq; switch (blk_flush_cur_seq(q)) { @@ -87,12 +97,11 @@ static struct request *queue_next_fseq(struct request_queue *q) break; case QUEUE_FSEQ_DATA: - /* initialize proxy request and queue it */ + /* initialize proxy request, inherit FLUSH/FUA and queue it */ blk_rq_init(q, rq); - init_request_from_bio(rq, q->orig_flush_rq->bio); - rq->cmd_flags &= ~REQ_HARDBARRIER; - if (q->ordered & QUEUE_ORDERED_DO_FUA) - rq->cmd_flags |= REQ_FUA; + init_request_from_bio(rq, orig_rq->bio); + rq->cmd_flags &= ~(REQ_FLUSH | REQ_FUA); + rq->cmd_flags |= orig_rq->cmd_flags & (REQ_FLUSH | REQ_FUA); rq->end_io = flush_data_end_io; elv_insert(q, rq, ELEVATOR_INSERT_FRONT); @@ -110,60 +119,58 @@ static struct request *queue_next_fseq(struct request_queue *q) struct request *blk_do_flush(struct request_queue *q, struct request *rq) { + unsigned int fflags = q->flush_flags; /* may change, cache it */ + bool has_flush = fflags & REQ_FLUSH, has_fua = fflags & REQ_FUA; + bool do_preflush = has_flush && (rq->cmd_flags & REQ_FLUSH); + bool do_postflush = has_flush && !has_fua && (rq->cmd_flags & REQ_FUA); unsigned skip = 0; - if (!(rq->cmd_flags & REQ_HARDBARRIER)) + /* + * Special case. If there's data but flush is not necessary, + * the request can be issued directly. + * + * Flush w/o data should be able to be issued directly too but + * currently some drivers assume that rq->bio contains + * non-zero data if it isn't NULL and empty FLUSH requests + * getting here usually have bio's without data. + */ + if (blk_rq_sectors(rq) && !do_preflush && !do_postflush) { + rq->cmd_flags &= ~REQ_FLUSH; + if (!has_fua) + rq->cmd_flags &= ~REQ_FUA; return rq; + } + /* + * Sequenced flushes can't be processed in parallel. If + * another one is already in progress, queue for later + * processing. + */ if (q->flush_seq) { - /* - * Sequenced flush is already in progress and they - * can't be processed in parallel. Queue for later - * processing. - */ list_move_tail(&rq->queuelist, &q->pending_flushes); return NULL; } - if (unlikely(q->next_ordered == QUEUE_ORDERED_NONE)) { - /* - * Queue ordering not supported. Terminate - * with prejudice. - */ - blk_dequeue_request(rq); - __blk_end_request_all(rq, -EOPNOTSUPP); - return NULL; - } - /* * Start a new flush sequence */ q->flush_err = 0; - q->ordered = q->next_ordered; q->flush_seq |= QUEUE_FSEQ_STARTED; - /* - * For an empty barrier, there's no actual BAR request, which - * in turn makes POSTFLUSH unnecessary. Mask them off. - */ - if (!blk_rq_sectors(rq)) - q->ordered &= ~(QUEUE_ORDERED_DO_BAR | - QUEUE_ORDERED_DO_POSTFLUSH); - - /* stash away the original request */ + /* adjust FLUSH/FUA of the original request and stash it away */ + rq->cmd_flags &= ~REQ_FLUSH; + if (!has_fua) + rq->cmd_flags &= ~REQ_FUA; blk_dequeue_request(rq); q->orig_flush_rq = rq; - if (!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) + /* skip unneded sequences and return the first one */ + if (!do_preflush) skip |= QUEUE_FSEQ_PREFLUSH; - - if (!(q->ordered & QUEUE_ORDERED_DO_BAR)) + if (!blk_rq_sectors(rq)) skip |= QUEUE_FSEQ_DATA; - - if (!(q->ordered & QUEUE_ORDERED_DO_POSTFLUSH)) + if (!do_postflush) skip |= QUEUE_FSEQ_POSTFLUSH; - - /* complete skipped sequences and return the first sequence */ return blk_flush_complete_seq(q, skip, 0); } diff --git a/block/blk.h b/block/blk.h index 24b92bd78f37..a09c18b19116 100644 --- a/block/blk.h +++ b/block/blk.h @@ -60,6 +60,9 @@ static inline struct request *__elv_next_request(struct request_queue *q) while (1) { while (!list_empty(&q->queue_head)) { rq = list_entry_rq(q->queue_head.next); + if (!(rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) || + rq == &q->flush_rq) + return rq; rq = blk_do_flush(q, rq); if (rq) return rq; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 9192282b4259..179799479e6f 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -167,7 +167,7 @@ enum rq_flag_bits { (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) #define REQ_COMMON_MASK \ (REQ_WRITE | REQ_FAILFAST_MASK | REQ_HARDBARRIER | REQ_SYNC | \ - REQ_META| REQ_DISCARD | REQ_NOIDLE) + REQ_META | REQ_DISCARD | REQ_NOIDLE | REQ_FLUSH | REQ_FUA) #define REQ_UNPLUG (1 << __REQ_UNPLUG) #define REQ_RAHEAD (1 << __REQ_RAHEAD) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1cd83ec077db..8ef705f800ab 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -357,7 +357,6 @@ struct request_queue /* * for flush operations */ - unsigned int ordered, next_ordered; unsigned int flush_flags; unsigned int flush_seq; int flush_err; @@ -465,40 +464,6 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) __clear_bit(flag, &q->queue_flags); } -enum { - /* - * Hardbarrier is supported with one of the following methods. - * - * NONE : hardbarrier unsupported - * DRAIN : ordering by draining is enough - * DRAIN_FLUSH : ordering by draining w/ pre and post flushes - * DRAIN_FUA : ordering by draining w/ pre flush and FUA write - */ - QUEUE_ORDERED_DO_PREFLUSH = 0x10, - QUEUE_ORDERED_DO_BAR = 0x20, - QUEUE_ORDERED_DO_POSTFLUSH = 0x40, - QUEUE_ORDERED_DO_FUA = 0x80, - - QUEUE_ORDERED_NONE = 0x00, - - QUEUE_ORDERED_DRAIN = QUEUE_ORDERED_DO_BAR, - QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | - QUEUE_ORDERED_DO_PREFLUSH | - QUEUE_ORDERED_DO_POSTFLUSH, - QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN | - QUEUE_ORDERED_DO_PREFLUSH | - QUEUE_ORDERED_DO_FUA, - - /* - * FLUSH/FUA sequences. - */ - QUEUE_FSEQ_STARTED = (1 << 0), /* flushing in progress */ - QUEUE_FSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ - QUEUE_FSEQ_DATA = (1 << 2), /* data write in progress */ - QUEUE_FSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ - QUEUE_FSEQ_DONE = (1 << 4), -}; - #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) @@ -578,7 +543,8 @@ static inline void blk_clear_queue_full(struct request_queue *q, int sync) * it already be started by driver. */ #define RQ_NOMERGE_FLAGS \ - (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) + (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER | \ + REQ_FLUSH | REQ_FUA) #define rq_mergeable(rq) \ (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ (((rq)->cmd_flags & REQ_DISCARD) || \ diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index ec94c12f21da..fc999f583fda 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -32,7 +32,7 @@ enum bh_state_bits { BH_Delay, /* Buffer is not yet allocated on disk */ BH_Boundary, /* Block is followed by a discontiguity */ BH_Write_EIO, /* I/O error on write */ - BH_Eopnotsupp, /* operation not supported (barrier) */ + BH_Eopnotsupp, /* DEPRECATED: operation not supported (barrier) */ BH_Unwritten, /* Buffer is allocated on disk but not written */ BH_Quiet, /* Buffer Error Prinks to be quiet */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 76041b614758..352c48627381 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -135,12 +135,13 @@ struct inodes_stat_t { * immediately after submission. The write equivalent * of READ_SYNC. * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only. - * WRITE_BARRIER Like WRITE_SYNC, but tells the block layer that all - * previously submitted writes must be safely on storage - * before this one is started. Also guarantees that when - * this write is complete, it itself is also safely on - * storage. Prevents reordering of writes on both sides - * of this IO. + * WRITE_BARRIER DEPRECATED. Always fails. Use FLUSH/FUA instead. + * WRITE_FLUSH Like WRITE_SYNC but with preceding cache flush. + * WRITE_FUA Like WRITE_SYNC but data is guaranteed to be on + * non-volatile media on completion. + * WRITE_FLUSH_FUA Combination of WRITE_FLUSH and FUA. The IO is preceded + * by a cache flush and data is guaranteed to be on + * non-volatile media on completion. * */ #define RW_MASK REQ_WRITE @@ -158,6 +159,12 @@ struct inodes_stat_t { #define WRITE_META (WRITE | REQ_META) #define WRITE_BARRIER (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ REQ_HARDBARRIER) +#define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ + REQ_FLUSH) +#define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ + REQ_FUA) +#define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ + REQ_FLUSH | REQ_FUA) /* * These aren't really reads or writes, they pass down information about -- cgit From 0edd55faea7c8081bc826234b917501738a6218f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 18 Aug 2010 05:29:23 -0400 Subject: block: remove the BH_Eopnotsupp flag This flag was only set for barrier buffers, which we don't submit anymore. Signed-off-by: Christoph Hellwig Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- fs/buffer.c | 7 +------ fs/fat/misc.c | 5 +---- include/linux/buffer_head.h | 2 -- 3 files changed, 2 insertions(+), 12 deletions(-) (limited to 'include/linux/buffer_head.h') diff --git a/fs/buffer.c b/fs/buffer.c index 3e7dca279d1c..7f0b9b083f77 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -156,7 +156,7 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate) if (uptodate) { set_buffer_uptodate(bh); } else { - if (!buffer_eopnotsupp(bh) && !quiet_error(bh)) { + if (!quiet_error(bh)) { buffer_io_error(bh); printk(KERN_WARNING "lost page write due to " "I/O error on %s\n", @@ -2891,7 +2891,6 @@ static void end_bio_bh_io_sync(struct bio *bio, int err) if (err == -EOPNOTSUPP) { set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); - set_bit(BH_Eopnotsupp, &bh->b_state); } if (unlikely (test_bit(BIO_QUIET,&bio->bi_flags))) @@ -3031,10 +3030,6 @@ int __sync_dirty_buffer(struct buffer_head *bh, int rw) bh->b_end_io = end_buffer_write_sync; ret = submit_bh(rw, bh); wait_on_buffer(bh); - if (buffer_eopnotsupp(bh)) { - clear_buffer_eopnotsupp(bh); - ret = -EOPNOTSUPP; - } if (!ret && !buffer_uptodate(bh)) ret = -EIO; } else { diff --git a/fs/fat/misc.c b/fs/fat/misc.c index 1736f2356388..970e682ea754 100644 --- a/fs/fat/misc.c +++ b/fs/fat/misc.c @@ -255,10 +255,7 @@ int fat_sync_bhs(struct buffer_head **bhs, int nr_bhs) for (i = 0; i < nr_bhs; i++) { wait_on_buffer(bhs[i]); - if (buffer_eopnotsupp(bhs[i])) { - clear_buffer_eopnotsupp(bhs[i]); - err = -EOPNOTSUPP; - } else if (!err && !buffer_uptodate(bhs[i])) + if (!err && !buffer_uptodate(bhs[i])) err = -EIO; } return err; diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index fc999f583fda..dd1b25b2641c 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -32,7 +32,6 @@ enum bh_state_bits { BH_Delay, /* Buffer is not yet allocated on disk */ BH_Boundary, /* Block is followed by a discontiguity */ BH_Write_EIO, /* I/O error on write */ - BH_Eopnotsupp, /* DEPRECATED: operation not supported (barrier) */ BH_Unwritten, /* Buffer is allocated on disk but not written */ BH_Quiet, /* Buffer Error Prinks to be quiet */ @@ -124,7 +123,6 @@ BUFFER_FNS(Async_Write, async_write) BUFFER_FNS(Delay, delay) BUFFER_FNS(Boundary, boundary) BUFFER_FNS(Write_EIO, write_io_error) -BUFFER_FNS(Eopnotsupp, eopnotsupp) BUFFER_FNS(Unwritten, unwritten) #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) -- cgit From ebdec241d509cf69f6ebf1ecdc036359d3dbe154 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Oct 2010 10:47:23 +0200 Subject: fs: kill block_prepare_write __block_write_begin and block_prepare_write are identical except for slightly different calling conventions. Convert all callers to the __block_write_begin calling conventions and drop block_prepare_write. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/buffer.c | 17 +++++------------ fs/ext3/inode.c | 4 ++-- fs/ext4/inode.c | 11 +++++------ fs/gfs2/aops.c | 3 +-- fs/gfs2/ops_inode.c | 6 +++--- fs/ocfs2/aops.c | 19 ++----------------- fs/ocfs2/aops.h | 3 --- fs/ocfs2/file.c | 9 ++++----- fs/reiserfs/inode.c | 24 +++++++++++------------- fs/reiserfs/ioctl.c | 6 ++---- fs/reiserfs/xattr.c | 5 +---- fs/xfs/linux-2.6/xfs_super.c | 2 +- include/linux/buffer_head.h | 1 - include/linux/reiserfs_fs.h | 2 ++ 14 files changed, 39 insertions(+), 73 deletions(-) (limited to 'include/linux/buffer_head.h') diff --git a/fs/buffer.c b/fs/buffer.c index 7f0b9b083f77..a7b8f3c59a4e 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1834,9 +1834,11 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to) } EXPORT_SYMBOL(page_zero_new_buffers); -int block_prepare_write(struct page *page, unsigned from, unsigned to, +int __block_write_begin(struct page *page, loff_t pos, unsigned len, get_block_t *get_block) { + unsigned from = pos & (PAGE_CACHE_SIZE - 1); + unsigned to = from + len; struct inode *inode = page->mapping->host; unsigned block_start, block_end; sector_t block; @@ -1916,7 +1918,7 @@ int block_prepare_write(struct page *page, unsigned from, unsigned to, } return err; } -EXPORT_SYMBOL(block_prepare_write); +EXPORT_SYMBOL(__block_write_begin); static int __block_commit_write(struct inode *inode, struct page *page, unsigned from, unsigned to) @@ -1953,15 +1955,6 @@ static int __block_commit_write(struct inode *inode, struct page *page, return 0; } -int __block_write_begin(struct page *page, loff_t pos, unsigned len, - get_block_t *get_block) -{ - unsigned start = pos & (PAGE_CACHE_SIZE - 1); - - return block_prepare_write(page, start, start + len, get_block); -} -EXPORT_SYMBOL(__block_write_begin); - /* * block_write_begin takes care of the basic task of block allocation and * bringing partial write blocks uptodate first. @@ -2379,7 +2372,7 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, else end = PAGE_CACHE_SIZE; - ret = block_prepare_write(page, 0, end, get_block); + ret = __block_write_begin(page, 0, end, get_block); if (!ret) ret = block_commit_write(page, 0, end); diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 5e0faf4cda79..ad05353040a1 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1696,8 +1696,8 @@ static int ext3_journalled_writepage(struct page *page, * doesn't seem much point in redirtying the page here. */ ClearPageChecked(page); - ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE, - ext3_get_block); + ret = __block_write_begin(page, 0, PAGE_CACHE_SIZE, + ext3_get_block); if (ret != 0) { ext3_journal_stop(handle); goto out_unlock; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 4b8debeb3965..49635ef236f8 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1538,10 +1538,10 @@ static int do_journal_get_write_access(handle_t *handle, if (!buffer_mapped(bh) || buffer_freed(bh)) return 0; /* - * __block_prepare_write() could have dirtied some buffers. Clean + * __block_write_begin() could have dirtied some buffers. Clean * the dirty bit as jbd2_journal_get_write_access() could complain * otherwise about fs integrity issues. Setting of the dirty bit - * by __block_prepare_write() isn't a real problem here as we clear + * by __block_write_begin() isn't a real problem here as we clear * the bit before releasing a page lock and thus writeback cannot * ever write the buffer. */ @@ -2550,8 +2550,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, if (buffer_delay(bh)) return 0; /* Not sure this could or should happen */ /* - * XXX: __block_prepare_write() unmaps passed block, - * is it OK? + * XXX: __block_write_begin() unmaps passed block, is it OK? */ ret = ext4_da_reserve_space(inode, iblock); if (ret) @@ -2583,7 +2582,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock, /* * This function is used as a standard get_block_t calback function * when there is no desire to allocate any blocks. It is used as a - * callback function for block_prepare_write() and block_write_full_page(). + * callback function for block_write_begin() and block_write_full_page(). * These functions should only try to map a single block at a time. * * Since this function doesn't do block allocations even if the caller @@ -2743,7 +2742,7 @@ static int ext4_writepage(struct page *page, * all are mapped and non delay. We don't want to * do block allocation here. */ - ret = block_prepare_write(page, 0, len, + ret = __block_write_begin(page, 0, len, noalloc_get_block_write); if (!ret) { page_bufs = page_buffers(page); diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 6b24afb96aae..4f36f8832b9b 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -618,7 +618,6 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, struct gfs2_alloc *al = NULL; pgoff_t index = pos >> PAGE_CACHE_SHIFT; unsigned from = pos & (PAGE_CACHE_SIZE - 1); - unsigned to = from + len; struct page *page; gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh); @@ -691,7 +690,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, } prepare_write: - error = block_prepare_write(page, from, to, gfs2_block_map); + error = __block_write_begin(page, from, len, gfs2_block_map); out: if (error == 0) return 0; diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 0534510200d5..48a274f1674c 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -1294,7 +1294,7 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to) int error; if (!page_has_buffers(page)) { - error = block_prepare_write(page, from, to, gfs2_block_map); + error = __block_write_begin(page, from, to - from, gfs2_block_map); if (unlikely(error)) return error; @@ -1313,7 +1313,7 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to) next += bh->b_size; if (buffer_mapped(bh)) { if (end) { - error = block_prepare_write(page, start, end, + error = __block_write_begin(page, start, end - start, gfs2_block_map); if (unlikely(error)) return error; @@ -1328,7 +1328,7 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to) } while (next < to); if (end) { - error = block_prepare_write(page, start, end, gfs2_block_map); + error = __block_write_begin(page, start, end - start, gfs2_block_map); if (unlikely(error)) return error; empty_write_end(page, start, end); diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 5cfeee118158..f1e962cb3b73 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -165,7 +165,7 @@ int ocfs2_get_block(struct inode *inode, sector_t iblock, * ocfs2 never allocates in this function - the only time we * need to use BH_New is when we're extending i_size on a file * system which doesn't support holes, in which case BH_New - * allows block_prepare_write() to zero. + * allows __block_write_begin() to zero. * * If we see this on a sparse file system, then a truncate has * raced us and removed the cluster. In this case, we clear @@ -407,21 +407,6 @@ static int ocfs2_writepage(struct page *page, struct writeback_control *wbc) return ret; } -/* - * This is called from ocfs2_write_zero_page() which has handled it's - * own cluster locking and has ensured allocation exists for those - * blocks to be written. - */ -int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page, - unsigned from, unsigned to) -{ - int ret; - - ret = block_prepare_write(page, from, to, ocfs2_get_block); - - return ret; -} - /* Taken from ext3. We don't necessarily need the full blown * functionality yet, but IMHO it's better to cut and paste the whole * thing so we can avoid introducing our own bugs (and easily pick up @@ -732,7 +717,7 @@ static int ocfs2_should_read_blk(struct inode *inode, struct page *page, } /* - * Some of this taken from block_prepare_write(). We already have our + * Some of this taken from __block_write_begin(). We already have our * mapping by now though, and the entire write will be allocating or * it won't, so not much need to use BH_New. * diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h index 7606f663da6d..76bfdfda691a 100644 --- a/fs/ocfs2/aops.h +++ b/fs/ocfs2/aops.h @@ -22,9 +22,6 @@ #ifndef OCFS2_AOPS_H #define OCFS2_AOPS_H -int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page, - unsigned from, unsigned to); - handle_t *ocfs2_start_walk_page_trans(struct inode *inode, struct page *page, unsigned from, diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 1ca6867935bb..77b4c04a2809 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -796,13 +796,12 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from, block_end = block_start + (1 << inode->i_blkbits); /* - * block_start is block-aligned. Bump it by one to - * force ocfs2_{prepare,commit}_write() to zero the + * block_start is block-aligned. Bump it by one to force + * __block_write_begin and block_commit_write to zero the * whole block. */ - ret = ocfs2_prepare_write_nolock(inode, page, - block_start + 1, - block_start + 1); + ret = __block_write_begin(page, block_start + 1, 0, + ocfs2_get_block); if (ret < 0) { mlog_errno(ret); goto out_unlock; diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index caa758377d66..4dcb88046030 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -22,8 +22,6 @@ int reiserfs_commit_write(struct file *f, struct page *page, unsigned from, unsigned to); -int reiserfs_prepare_write(struct file *f, struct page *page, - unsigned from, unsigned to); void reiserfs_evict_inode(struct inode *inode) { @@ -165,7 +163,7 @@ inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key, ** but tail is still sitting in a direct item, and we can't write to ** it. So, look through this page, and check all the mapped buffers ** to make sure they have valid block numbers. Any that don't need -** to be unmapped, so that block_prepare_write will correctly call +** to be unmapped, so that __block_write_begin will correctly call ** reiserfs_get_block to convert the tail into an unformatted node */ static inline void fix_tail_page_for_writing(struct page *page) @@ -439,13 +437,13 @@ static int reiserfs_bmap(struct inode *inode, sector_t block, } /* special version of get_block that is only used by grab_tail_page right -** now. It is sent to block_prepare_write, and when you try to get a +** now. It is sent to __block_write_begin, and when you try to get a ** block past the end of the file (or a block from a hole) it returns -** -ENOENT instead of a valid buffer. block_prepare_write expects to +** -ENOENT instead of a valid buffer. __block_write_begin expects to ** be able to do i/o on the buffers returned, unless an error value ** is also returned. ** -** So, this allows block_prepare_write to be used for reading a single block +** So, this allows __block_write_begin to be used for reading a single block ** in a page. Where it does not produce a valid page for holes, or past the ** end of the file. This turns out to be exactly what we need for reading ** tails for conversion. @@ -558,11 +556,12 @@ static int convert_tail_for_hole(struct inode *inode, ** ** We must fix the tail page for writing because it might have buffers ** that are mapped, but have a block number of 0. This indicates tail - ** data that has been read directly into the page, and block_prepare_write - ** won't trigger a get_block in this case. + ** data that has been read directly into the page, and + ** __block_write_begin won't trigger a get_block in this case. */ fix_tail_page_for_writing(tail_page); - retval = reiserfs_prepare_write(NULL, tail_page, tail_start, tail_end); + retval = __reiserfs_write_begin(tail_page, tail_start, + tail_end - tail_start); if (retval) goto unlock; @@ -2033,7 +2032,7 @@ static int grab_tail_page(struct inode *inode, /* start within the page of the last block in the file */ start = (offset / blocksize) * blocksize; - error = block_prepare_write(page, start, offset, + error = __block_write_begin(page, start, offset - start, reiserfs_get_block_create_0); if (error) goto unlock; @@ -2628,8 +2627,7 @@ static int reiserfs_write_begin(struct file *file, return ret; } -int reiserfs_prepare_write(struct file *f, struct page *page, - unsigned from, unsigned to) +int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len) { struct inode *inode = page->mapping->host; int ret; @@ -2650,7 +2648,7 @@ int reiserfs_prepare_write(struct file *f, struct page *page, th->t_refcount++; } - ret = block_prepare_write(page, from, to, reiserfs_get_block); + ret = __block_write_begin(page, from, len, reiserfs_get_block); if (ret && reiserfs_transaction_running(inode->i_sb)) { struct reiserfs_transaction_handle *th = current->journal_info; /* this gets a little ugly. If reiserfs_get_block returned an diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c index 5cbb81e134ac..adf22b485cea 100644 --- a/fs/reiserfs/ioctl.c +++ b/fs/reiserfs/ioctl.c @@ -160,8 +160,6 @@ long reiserfs_compat_ioctl(struct file *file, unsigned int cmd, int reiserfs_commit_write(struct file *f, struct page *page, unsigned from, unsigned to); -int reiserfs_prepare_write(struct file *f, struct page *page, - unsigned from, unsigned to); /* ** reiserfs_unpack ** Function try to convert tail from direct item into indirect. @@ -200,7 +198,7 @@ int reiserfs_unpack(struct inode *inode, struct file *filp) } /* we unpack by finding the page with the tail, and calling - ** reiserfs_prepare_write on that page. This will force a + ** __reiserfs_write_begin on that page. This will force a ** reiserfs_get_block to unpack the tail for us. */ index = inode->i_size >> PAGE_CACHE_SHIFT; @@ -210,7 +208,7 @@ int reiserfs_unpack(struct inode *inode, struct file *filp) if (!page) { goto out; } - retval = reiserfs_prepare_write(NULL, page, write_from, write_from); + retval = __reiserfs_write_begin(page, write_from, 0); if (retval) goto out_unlock; diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 8c4cf273c672..f7415de13878 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -418,8 +418,6 @@ static inline __u32 xattr_hash(const char *msg, int len) int reiserfs_commit_write(struct file *f, struct page *page, unsigned from, unsigned to); -int reiserfs_prepare_write(struct file *f, struct page *page, - unsigned from, unsigned to); static void update_ctime(struct inode *inode) { @@ -532,8 +530,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th, rxh->h_hash = cpu_to_le32(xahash); } - err = reiserfs_prepare_write(NULL, page, page_offset, - page_offset + chunk + skip); + err = __reiserfs_write_begin(page, page_offset, chunk + skip); if (!err) { if (buffer) memcpy(data + skip, buffer + buffer_pos, chunk); diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c index ab31ce5aeaf9..cf808782c065 100644 --- a/fs/xfs/linux-2.6/xfs_super.c +++ b/fs/xfs/linux-2.6/xfs_super.c @@ -576,7 +576,7 @@ xfs_max_file_offset( /* Figure out maximum filesize, on Linux this can depend on * the filesystem blocksize (on 32 bit platforms). - * __block_prepare_write does this in an [unsigned] long... + * __block_write_begin does this in an [unsigned] long... * page->index << (PAGE_CACHE_SHIFT - bbits) * So, for page sized blocks (4K on 32 bit platforms), * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index dd1b25b2641c..68d1fe7b877c 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -212,7 +212,6 @@ int generic_write_end(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page *, void *); void page_zero_new_buffers(struct page *page, unsigned from, unsigned to); -int block_prepare_write(struct page*, unsigned, unsigned, get_block_t*); int cont_write_begin(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page **, void **, get_block_t *, loff_t *); diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h index 91a4177e60ce..5ca47e59b727 100644 --- a/include/linux/reiserfs_fs.h +++ b/include/linux/reiserfs_fs.h @@ -2072,6 +2072,8 @@ void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode); void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs); int reiserfs_setattr(struct dentry *dentry, struct iattr *attr); +int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len); + /* namei.c */ void set_de_name_and_namelen(struct reiserfs_dir_entry *de); int search_by_entry_key(struct super_block *sb, const struct cpu_key *key, -- cgit