aboutsummaryrefslogtreecommitdiff
path: root/fs/ext4/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r--fs/ext4/inode.c74
1 files changed, 64 insertions, 10 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index f0729b0705c7..5cf82d03968c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1643,6 +1643,7 @@ struct mpage_da_data {
*/
struct ext4_map_blocks map;
struct ext4_io_submit io_submit; /* IO submission data */
+ unsigned int do_map:1;
};
static void mpage_release_unused_pages(struct mpage_da_data *mpd,
@@ -2123,15 +2124,29 @@ static int ext4_writepage(struct page *page,
static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
{
int len;
- loff_t size = i_size_read(mpd->inode);
+ loff_t size;
int err;
BUG_ON(page->index != mpd->first_page);
+ clear_page_dirty_for_io(page);
+ /*
+ * We have to be very careful here! Nothing protects writeback path
+ * against i_size changes and the page can be writeably mapped into
+ * page tables. So an application can be growing i_size and writing
+ * data through mmap while writeback runs. clear_page_dirty_for_io()
+ * write-protects our page in page tables and the page cannot get
+ * written to again until we release page lock. So only after
+ * clear_page_dirty_for_io() we are safe to sample i_size for
+ * ext4_bio_write_page() to zero-out tail of the written page. We rely
+ * on the barrier provided by TestClearPageDirty in
+ * clear_page_dirty_for_io() to make sure i_size is really sampled only
+ * after page tables are updated.
+ */
+ size = i_size_read(mpd->inode);
if (page->index == size >> PAGE_SHIFT)
len = size & ~PAGE_MASK;
else
len = PAGE_SIZE;
- clear_page_dirty_for_io(page);
err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false);
if (!err)
mpd->wbc->nr_to_write--;
@@ -2179,6 +2194,9 @@ static bool mpage_add_bh_to_extent(struct mpage_da_data *mpd, ext4_lblk_t lblk,
/* First block in the extent? */
if (map->m_len == 0) {
+ /* We cannot map unless handle is started... */
+ if (!mpd->do_map)
+ return false;
map->m_lblk = lblk;
map->m_len = 1;
map->m_flags = bh->b_state & BH_FLAGS;
@@ -2231,6 +2249,9 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd,
/* Found extent to map? */
if (mpd->map.m_len)
return 0;
+ /* Buffer needs mapping and handle is not started? */
+ if (!mpd->do_map)
+ return 0;
/* Everything mapped so far and we hit EOF */
break;
}
@@ -2747,6 +2768,29 @@ retry:
tag_pages_for_writeback(mapping, mpd.first_page, mpd.last_page);
done = false;
blk_start_plug(&plug);
+
+ /*
+ * First writeback pages that don't need mapping - we can avoid
+ * starting a transaction unnecessarily and also avoid being blocked
+ * in the block layer on device congestion while having transaction
+ * started.
+ */
+ mpd.do_map = 0;
+ mpd.io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL);
+ if (!mpd.io_submit.io_end) {
+ ret = -ENOMEM;
+ goto unplug;
+ }
+ ret = mpage_prepare_extent_to_map(&mpd);
+ /* Submit prepared bio */
+ ext4_io_submit(&mpd.io_submit);
+ ext4_put_io_end_defer(mpd.io_submit.io_end);
+ mpd.io_submit.io_end = NULL;
+ /* Unlock pages we didn't use */
+ mpage_release_unused_pages(&mpd, false);
+ if (ret < 0)
+ goto unplug;
+
while (!done && mpd.first_page <= mpd.last_page) {
/* For each extent of pages we use new io_end */
mpd.io_submit.io_end = ext4_init_io_end(inode, GFP_KERNEL);
@@ -2775,8 +2819,10 @@ retry:
wbc->nr_to_write, inode->i_ino, ret);
/* Release allocated io_end */
ext4_put_io_end(mpd.io_submit.io_end);
+ mpd.io_submit.io_end = NULL;
break;
}
+ mpd.do_map = 1;
trace_ext4_da_write_pages(inode, mpd.first_page, mpd.wbc);
ret = mpage_prepare_extent_to_map(&mpd);
@@ -2807,6 +2853,7 @@ retry:
if (!ext4_handle_valid(handle) || handle->h_sync == 0) {
ext4_journal_stop(handle);
handle = NULL;
+ mpd.do_map = 0;
}
/* Submit prepared bio */
ext4_io_submit(&mpd.io_submit);
@@ -2824,6 +2871,7 @@ retry:
ext4_journal_stop(handle);
} else
ext4_put_io_end(mpd.io_submit.io_end);
+ mpd.io_submit.io_end = NULL;
if (ret == -ENOSPC && sbi->s_journal) {
/*
@@ -2839,6 +2887,7 @@ retry:
if (ret)
break;
}
+unplug:
blk_finish_plug(&plug);
if (!ret && !cycled && wbc->nr_to_write > 0) {
cycled = 1;
@@ -3377,7 +3426,7 @@ retry:
bdev = inode->i_sb->s_bdev;
iomap->bdev = bdev;
if (blk_queue_dax(bdev->bd_queue))
- iomap->dax_dev = dax_get_by_host(bdev->bd_disk->disk_name);
+ iomap->dax_dev = fs_dax_get_by_host(bdev->bd_disk->disk_name);
else
iomap->dax_dev = NULL;
iomap->offset = first_block << blkbits;
@@ -3412,7 +3461,7 @@ static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length,
int blkbits = inode->i_blkbits;
bool truncate = false;
- put_dax(iomap->dax_dev);
+ fs_put_dax(iomap->dax_dev);
if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT))
return 0;
@@ -3594,9 +3643,6 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)
get_block_func = ext4_dio_get_block_unwritten_async;
dio_flags = DIO_LOCKING;
}
-#ifdef CONFIG_EXT4_FS_ENCRYPTION
- BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode));
-#endif
ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
get_block_func, ext4_end_io_dio, NULL,
dio_flags);
@@ -3678,7 +3724,7 @@ static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)
*/
inode_lock_shared(inode);
ret = filemap_write_and_wait_range(mapping, iocb->ki_pos,
- iocb->ki_pos + count);
+ iocb->ki_pos + count - 1);
if (ret)
goto out_unlock;
ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
@@ -4172,6 +4218,8 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
inode->i_mtime = inode->i_ctime = current_time(inode);
ext4_mark_inode_dirty(handle, inode);
+ if (ret >= 0)
+ ext4_update_inode_fsync_trans(handle, inode, 1);
out_stop:
ext4_journal_stop(handle);
out_dio:
@@ -5602,8 +5650,9 @@ static int ext4_expand_extra_isize(struct inode *inode,
/* No extended attributes present */
if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) ||
header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) {
- memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0,
- new_extra_isize);
+ memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE +
+ EXT4_I(inode)->i_extra_isize, 0,
+ new_extra_isize - EXT4_I(inode)->i_extra_isize);
EXT4_I(inode)->i_extra_isize = new_extra_isize;
return 0;
}
@@ -5855,6 +5904,11 @@ int ext4_page_mkwrite(struct vm_fault *vmf)
file_update_time(vma->vm_file);
down_read(&EXT4_I(inode)->i_mmap_sem);
+
+ ret = ext4_convert_inline_data(inode);
+ if (ret)
+ goto out_ret;
+
/* Delalloc case is easy... */
if (test_opt(inode->i_sb, DELALLOC) &&
!ext4_should_journal_data(inode) &&