diff options
Diffstat (limited to 'fs/ext4/file.c')
| -rw-r--r-- | fs/ext4/file.c | 184 | 
1 files changed, 132 insertions, 52 deletions
diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 2a822d30e73f..b5f184493c57 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -31,6 +31,42 @@  #include "xattr.h"  #include "acl.h" +#ifdef CONFIG_FS_DAX +static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to) +{ +	struct inode *inode = file_inode(iocb->ki_filp); +	ssize_t ret; + +	inode_lock_shared(inode); +	/* +	 * Recheck under inode lock - at this point we are sure it cannot +	 * change anymore +	 */ +	if (!IS_DAX(inode)) { +		inode_unlock_shared(inode); +		/* Fallback to buffered IO in case we cannot support DAX */ +		return generic_file_read_iter(iocb, to); +	} +	ret = dax_iomap_rw(iocb, to, &ext4_iomap_ops); +	inode_unlock_shared(inode); + +	file_accessed(iocb->ki_filp); +	return ret; +} +#endif + +static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to) +{ +	if (!iov_iter_count(to)) +		return 0; /* skip atime */ + +#ifdef CONFIG_FS_DAX +	if (IS_DAX(file_inode(iocb->ki_filp))) +		return ext4_dax_read_iter(iocb, to); +#endif +	return generic_file_read_iter(iocb, to); +} +  /*   * Called when an inode is released. Note that this is different   * from ext4_file_open: open gets called at every open, but release @@ -88,6 +124,86 @@ ext4_unaligned_aio(struct inode *inode, struct iov_iter *from, loff_t pos)  	return 0;  } +/* Is IO overwriting allocated and initialized blocks? */ +static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len) +{ +	struct ext4_map_blocks map; +	unsigned int blkbits = inode->i_blkbits; +	int err, blklen; + +	if (pos + len > i_size_read(inode)) +		return false; + +	map.m_lblk = pos >> blkbits; +	map.m_len = EXT4_MAX_BLOCKS(len, pos, blkbits); +	blklen = map.m_len; + +	err = ext4_map_blocks(NULL, inode, &map, 0); +	/* +	 * 'err==len' means that all of the blocks have been preallocated, +	 * regardless of whether they have been initialized or not. To exclude +	 * unwritten extents, we need to check m_flags. +	 */ +	return err == blklen && (map.m_flags & EXT4_MAP_MAPPED); +} + +static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from) +{ +	struct inode *inode = file_inode(iocb->ki_filp); +	ssize_t ret; + +	ret = generic_write_checks(iocb, from); +	if (ret <= 0) +		return ret; +	/* +	 * If we have encountered a bitmap-format file, the size limit +	 * is smaller than s_maxbytes, which is for extent-mapped files. +	 */ +	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { +		struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + +		if (iocb->ki_pos >= sbi->s_bitmap_maxbytes) +			return -EFBIG; +		iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos); +	} +	return iov_iter_count(from); +} + +#ifdef CONFIG_FS_DAX +static ssize_t +ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) +{ +	struct inode *inode = file_inode(iocb->ki_filp); +	ssize_t ret; +	bool overwrite = false; + +	inode_lock(inode); +	ret = ext4_write_checks(iocb, from); +	if (ret <= 0) +		goto out; +	ret = file_remove_privs(iocb->ki_filp); +	if (ret) +		goto out; +	ret = file_update_time(iocb->ki_filp); +	if (ret) +		goto out; + +	if (ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from))) { +		overwrite = true; +		downgrade_write(&inode->i_rwsem); +	} +	ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops); +out: +	if (!overwrite) +		inode_unlock(inode); +	else +		inode_unlock_shared(inode); +	if (ret > 0) +		ret = generic_write_sync(iocb, ret); +	return ret; +} +#endif +  static ssize_t  ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)  { @@ -97,8 +213,13 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)  	int overwrite = 0;  	ssize_t ret; +#ifdef CONFIG_FS_DAX +	if (IS_DAX(inode)) +		return ext4_dax_write_iter(iocb, from); +#endif +  	inode_lock(inode); -	ret = generic_write_checks(iocb, from); +	ret = ext4_write_checks(iocb, from);  	if (ret <= 0)  		goto out; @@ -114,53 +235,11 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)  		ext4_unwritten_wait(inode);  	} -	/* -	 * If we have encountered a bitmap-format file, the size limit -	 * is smaller than s_maxbytes, which is for extent-mapped files. -	 */ -	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { -		struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - -		if (iocb->ki_pos >= sbi->s_bitmap_maxbytes) { -			ret = -EFBIG; -			goto out; -		} -		iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos); -	} -  	iocb->private = &overwrite; -	if (o_direct) { -		size_t length = iov_iter_count(from); -		loff_t pos = iocb->ki_pos; - -		/* check whether we do a DIO overwrite or not */ -		if (ext4_should_dioread_nolock(inode) && !unaligned_aio && -		    pos + length <= i_size_read(inode)) { -			struct ext4_map_blocks map; -			unsigned int blkbits = inode->i_blkbits; -			int err, len; - -			map.m_lblk = pos >> blkbits; -			map.m_len = EXT4_MAX_BLOCKS(length, pos, blkbits); -			len = map.m_len; - -			err = ext4_map_blocks(NULL, inode, &map, 0); -			/* -			 * 'err==len' means that all of blocks has -			 * been preallocated no matter they are -			 * initialized or not.  For excluding -			 * unwritten extents, we need to check -			 * m_flags.  There are two conditions that -			 * indicate for initialized extents.  1) If we -			 * hit extent cache, EXT4_MAP_MAPPED flag is -			 * returned; 2) If we do a real lookup, -			 * non-flags are returned.  So we should check -			 * these two conditions. -			 */ -			if (err == len && (map.m_flags & EXT4_MAP_MAPPED)) -				overwrite = 1; -		} -	} +	/* Check whether we do a DIO overwrite or not */ +	if (o_direct && ext4_should_dioread_nolock(inode) && !unaligned_aio && +	    ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from))) +		overwrite = 1;  	ret = __generic_file_write_iter(iocb, from);  	inode_unlock(inode); @@ -196,7 +275,7 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)  	if (IS_ERR(handle))  		result = VM_FAULT_SIGBUS;  	else -		result = dax_fault(vma, vmf, ext4_dax_get_block); +		result = dax_iomap_fault(vma, vmf, &ext4_iomap_ops);  	if (write) {  		if (!IS_ERR(handle)) @@ -230,9 +309,10 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,  	if (IS_ERR(handle))  		result = VM_FAULT_SIGBUS; -	else -		result = dax_pmd_fault(vma, addr, pmd, flags, -					 ext4_dax_get_block); +	else { +		result = dax_iomap_pmd_fault(vma, addr, pmd, flags, +					     &ext4_iomap_ops); +	}  	if (write) {  		if (!IS_ERR(handle)) @@ -687,7 +767,7 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int whence)  const struct file_operations ext4_file_operations = {  	.llseek		= ext4_llseek, -	.read_iter	= generic_file_read_iter, +	.read_iter	= ext4_file_read_iter,  	.write_iter	= ext4_file_write_iter,  	.unlocked_ioctl = ext4_ioctl,  #ifdef CONFIG_COMPAT  |