diff options
Diffstat (limited to 'fs/ext4')
| -rw-r--r-- | fs/ext4/acl.c | 2 | ||||
| -rw-r--r-- | fs/ext4/ext4.h | 31 | ||||
| -rw-r--r-- | fs/ext4/ext4_jbd2.h | 14 | ||||
| -rw-r--r-- | fs/ext4/extents.c | 42 | ||||
| -rw-r--r-- | fs/ext4/file.c | 224 | ||||
| -rw-r--r-- | fs/ext4/ialloc.c | 5 | ||||
| -rw-r--r-- | fs/ext4/inline.c | 18 | ||||
| -rw-r--r-- | fs/ext4/inode.c | 367 | ||||
| -rw-r--r-- | fs/ext4/ioctl.c | 84 | ||||
| -rw-r--r-- | fs/ext4/mballoc.c | 4 | ||||
| -rw-r--r-- | fs/ext4/mmp.c | 6 | ||||
| -rw-r--r-- | fs/ext4/namei.c | 24 | ||||
| -rw-r--r-- | fs/ext4/page-io.c | 7 | ||||
| -rw-r--r-- | fs/ext4/super.c | 160 | ||||
| -rw-r--r-- | fs/ext4/symlink.c | 3 | ||||
| -rw-r--r-- | fs/ext4/xattr.c | 45 | 
16 files changed, 638 insertions, 398 deletions
| diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index dfa519979038..fd389935ecd1 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -196,7 +196,7 @@ __ext4_set_acl(handle_t *handle, struct inode *inode, int type,  			error = posix_acl_update_mode(inode, &inode->i_mode, &acl);  			if (error)  				return error; -			inode->i_ctime = ext4_current_time(inode); +			inode->i_ctime = current_time(inode);  			ext4_mark_inode_dirty(handle, inode);  		}  		break; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index a8a750f59621..2163c1e69f2a 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -397,8 +397,9 @@ struct flex_groups {  #define EXT4_RESERVED_FL		0x80000000 /* reserved for ext4 lib */  #define EXT4_FL_USER_VISIBLE		0x304BDFFF /* User visible flags */ -#define EXT4_FL_USER_MODIFIABLE		0x204380FF /* User modifiable flags */ +#define EXT4_FL_USER_MODIFIABLE		0x204BC0FF /* User modifiable flags */ +/* Flags we can manipulate with through EXT4_IOC_FSSETXATTR */  #define EXT4_FL_XFLAG_VISIBLE		(EXT4_SYNC_FL | \  					 EXT4_IMMUTABLE_FL | \  					 EXT4_APPEND_FL | \ @@ -1533,12 +1534,6 @@ static inline struct ext4_inode_info *EXT4_I(struct inode *inode)  	return container_of(inode, struct ext4_inode_info, vfs_inode);  } -static inline struct timespec ext4_current_time(struct inode *inode) -{ -	return (inode->i_sb->s_time_gran < NSEC_PER_SEC) ? -		current_fs_time(inode->i_sb) : CURRENT_TIME_SEC; -} -  static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)  {  	return ino == EXT4_ROOT_INO || @@ -2277,11 +2272,6 @@ extern unsigned ext4_free_clusters_after_init(struct super_block *sb,  					      struct ext4_group_desc *gdp);  ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); -static inline int ext4_sb_has_crypto(struct super_block *sb) -{ -	return ext4_has_feature_encrypt(sb); -} -  static inline bool ext4_encrypted_inode(struct inode *inode)  {  	return ext4_test_inode_flag(inode, EXT4_INODE_ENCRYPT); @@ -2339,8 +2329,8 @@ static inline void ext4_fname_free_filename(struct ext4_filename *fname) { }  #define fscrypt_pullback_bio_page	fscrypt_notsupp_pullback_bio_page  #define fscrypt_restore_control_page	fscrypt_notsupp_restore_control_page  #define fscrypt_zeroout_range		fscrypt_notsupp_zeroout_range -#define fscrypt_process_policy		fscrypt_notsupp_process_policy -#define fscrypt_get_policy		fscrypt_notsupp_get_policy +#define fscrypt_ioctl_set_policy	fscrypt_notsupp_ioctl_set_policy +#define fscrypt_ioctl_get_policy	fscrypt_notsupp_ioctl_get_policy  #define fscrypt_has_permitted_context	fscrypt_notsupp_has_permitted_context  #define fscrypt_inherit_context		fscrypt_notsupp_inherit_context  #define fscrypt_get_encryption_info	fscrypt_notsupp_get_encryption_info @@ -2458,8 +2448,6 @@ struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);  struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);  int ext4_get_block_unwritten(struct inode *inode, sector_t iblock,  			     struct buffer_head *bh_result, int create); -int ext4_dax_get_block(struct inode *inode, sector_t iblock, -		       struct buffer_head *bh_result, int create);  int ext4_get_block(struct inode *inode, sector_t iblock,  		   struct buffer_head *bh_result, int create);  int ext4_dio_get_block(struct inode *inode, sector_t iblock, @@ -2492,7 +2480,7 @@ extern int ext4_change_inode_journal_flag(struct inode *, int);  extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);  extern int ext4_inode_attach_jinode(struct inode *inode);  extern int ext4_can_truncate(struct inode *inode); -extern void ext4_truncate(struct inode *); +extern int ext4_truncate(struct inode *);  extern int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length);  extern int ext4_truncate_restart_trans(handle_t *, struct inode *, int nblocks);  extern void ext4_set_inode_flags(struct inode *); @@ -3129,7 +3117,7 @@ extern int ext4_ext_writepage_trans_blocks(struct inode *, int);  extern int ext4_ext_index_trans_blocks(struct inode *inode, int extents);  extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,  			       struct ext4_map_blocks *map, int flags); -extern void ext4_ext_truncate(handle_t *, struct inode *); +extern int ext4_ext_truncate(handle_t *, struct inode *);  extern int ext4_ext_remove_space(struct inode *inode, ext4_lblk_t start,  				 ext4_lblk_t end);  extern void ext4_ext_init(struct super_block *); @@ -3265,12 +3253,7 @@ static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)  	}  } -static inline bool ext4_aligned_io(struct inode *inode, loff_t off, loff_t len) -{ -	int blksize = 1 << inode->i_blkbits; - -	return IS_ALIGNED(off, blksize) && IS_ALIGNED(len, blksize); -} +extern struct iomap_ops ext4_iomap_ops;  #endif	/* __KERNEL__ */ diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index b1d52c14098e..f97611171023 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h @@ -414,17 +414,19 @@ static inline int ext4_inode_journal_mode(struct inode *inode)  		return EXT4_INODE_WRITEBACK_DATA_MODE;	/* writeback */  	/* We do not support data journalling with delayed allocation */  	if (!S_ISREG(inode->i_mode) || -	    test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) -		return EXT4_INODE_JOURNAL_DATA_MODE;	/* journal data */ -	if (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) && -	    !test_opt(inode->i_sb, DELALLOC)) +	    test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA || +	    (ext4_test_inode_flag(inode, EXT4_INODE_JOURNAL_DATA) && +	    !test_opt(inode->i_sb, DELALLOC))) { +		/* We do not support data journalling for encrypted data */ +		if (S_ISREG(inode->i_mode) && ext4_encrypted_inode(inode)) +			return EXT4_INODE_ORDERED_DATA_MODE;  /* ordered */  		return EXT4_INODE_JOURNAL_DATA_MODE;	/* journal data */ +	}  	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)  		return EXT4_INODE_ORDERED_DATA_MODE;	/* ordered */  	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)  		return EXT4_INODE_WRITEBACK_DATA_MODE;	/* writeback */ -	else -		BUG(); +	BUG();  }  static inline int ext4_should_journal_data(struct inode *inode) diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index c930a0110fb4..3e295d3350a9 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -37,7 +37,7 @@  #include <linux/quotaops.h>  #include <linux/string.h>  #include <linux/slab.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h>  #include <linux/fiemap.h>  #include <linux/backing-dev.h>  #include "ext4_jbd2.h" @@ -3777,14 +3777,6 @@ out:  	return err;  } -static void unmap_underlying_metadata_blocks(struct block_device *bdev, -			sector_t block, int count) -{ -	int i; -	for (i = 0; i < count; i++) -                unmap_underlying_metadata(bdev, block + i); -} -  /*   * Handle EOFBLOCKS_FL flag, clearing it if necessary   */ @@ -4121,9 +4113,8 @@ out:  	 * new.  	 */  	if (allocated > map->m_len) { -		unmap_underlying_metadata_blocks(inode->i_sb->s_bdev, -					newblock + map->m_len, -					allocated - map->m_len); +		clean_bdev_aliases(inode->i_sb->s_bdev, newblock + map->m_len, +				   allocated - map->m_len);  		allocated = map->m_len;  	}  	map->m_len = allocated; @@ -4631,7 +4622,7 @@ out2:  	return err ? err : allocated;  } -void ext4_ext_truncate(handle_t *handle, struct inode *inode) +int ext4_ext_truncate(handle_t *handle, struct inode *inode)  {  	struct super_block *sb = inode->i_sb;  	ext4_lblk_t last_block; @@ -4645,7 +4636,9 @@ void ext4_ext_truncate(handle_t *handle, struct inode *inode)  	/* we have to know where to truncate from in crash case */  	EXT4_I(inode)->i_disksize = inode->i_size; -	ext4_mark_inode_dirty(handle, inode); +	err = ext4_mark_inode_dirty(handle, inode); +	if (err) +		return err;  	last_block = (inode->i_size + sb->s_blocksize - 1)  			>> EXT4_BLOCK_SIZE_BITS(sb); @@ -4657,12 +4650,9 @@ retry:  		congestion_wait(BLK_RW_ASYNC, HZ/50);  		goto retry;  	} -	if (err) { -		ext4_std_error(inode->i_sb, err); -		return; -	} -	err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1); -	ext4_std_error(inode->i_sb, err); +	if (err) +		return err; +	return ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);  }  static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset, @@ -4701,7 +4691,7 @@ retry:  		/*  		 * Recalculate credits when extent tree depth changes.  		 */ -		if (depth >= 0 && depth != ext_depth(inode)) { +		if (depth != ext_depth(inode)) {  			credits = ext4_chunk_trans_blocks(inode, len);  			depth = ext_depth(inode);  		} @@ -4725,7 +4715,7 @@ retry:  		map.m_lblk += ret;  		map.m_len = len = len - ret;  		epos = (loff_t)map.m_lblk << inode->i_blkbits; -		inode->i_ctime = ext4_current_time(inode); +		inode->i_ctime = current_time(inode);  		if (new_size) {  			if (epos > new_size)  				epos = new_size; @@ -4853,7 +4843,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,  		}  		/* Now release the pages and zero block aligned part of pages */  		truncate_pagecache_range(inode, start, end - 1); -		inode->i_mtime = inode->i_ctime = ext4_current_time(inode); +		inode->i_mtime = inode->i_ctime = current_time(inode);  		ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,  					     flags, mode); @@ -4878,7 +4868,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,  		goto out_dio;  	} -	inode->i_mtime = inode->i_ctime = ext4_current_time(inode); +	inode->i_mtime = inode->i_ctime = current_time(inode);  	if (new_size) {  		ext4_update_inode_size(inode, new_size);  	} else { @@ -5568,7 +5558,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)  	up_write(&EXT4_I(inode)->i_data_sem);  	if (IS_SYNC(inode))  		ext4_handle_sync(handle); -	inode->i_mtime = inode->i_ctime = ext4_current_time(inode); +	inode->i_mtime = inode->i_ctime = current_time(inode);  	ext4_mark_inode_dirty(handle, inode);  out_stop: @@ -5678,7 +5668,7 @@ int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)  	/* Expand file to avoid data loss if there is error while shifting */  	inode->i_size += len;  	EXT4_I(inode)->i_disksize += len; -	inode->i_mtime = inode->i_ctime = ext4_current_time(inode); +	inode->i_mtime = inode->i_ctime = current_time(inode);  	ret = ext4_mark_inode_dirty(handle, inode);  	if (ret)  		goto out_stop; diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 2a822d30e73f..d663d3d7c81c 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -31,6 +31,42 @@  #include "xattr.h"  #include "acl.h" +#ifdef CONFIG_FS_DAX +static ssize_t ext4_dax_read_iter(struct kiocb *iocb, struct iov_iter *to) +{ +	struct inode *inode = file_inode(iocb->ki_filp); +	ssize_t ret; + +	inode_lock_shared(inode); +	/* +	 * Recheck under inode lock - at this point we are sure it cannot +	 * change anymore +	 */ +	if (!IS_DAX(inode)) { +		inode_unlock_shared(inode); +		/* Fallback to buffered IO in case we cannot support DAX */ +		return generic_file_read_iter(iocb, to); +	} +	ret = dax_iomap_rw(iocb, to, &ext4_iomap_ops); +	inode_unlock_shared(inode); + +	file_accessed(iocb->ki_filp); +	return ret; +} +#endif + +static ssize_t ext4_file_read_iter(struct kiocb *iocb, struct iov_iter *to) +{ +	if (!iov_iter_count(to)) +		return 0; /* skip atime */ + +#ifdef CONFIG_FS_DAX +	if (IS_DAX(file_inode(iocb->ki_filp))) +		return ext4_dax_read_iter(iocb, to); +#endif +	return generic_file_read_iter(iocb, to); +} +  /*   * Called when an inode is released. Note that this is different   * from ext4_file_open: open gets called at every open, but release @@ -88,6 +124,86 @@ ext4_unaligned_aio(struct inode *inode, struct iov_iter *from, loff_t pos)  	return 0;  } +/* Is IO overwriting allocated and initialized blocks? */ +static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len) +{ +	struct ext4_map_blocks map; +	unsigned int blkbits = inode->i_blkbits; +	int err, blklen; + +	if (pos + len > i_size_read(inode)) +		return false; + +	map.m_lblk = pos >> blkbits; +	map.m_len = EXT4_MAX_BLOCKS(len, pos, blkbits); +	blklen = map.m_len; + +	err = ext4_map_blocks(NULL, inode, &map, 0); +	/* +	 * 'err==len' means that all of the blocks have been preallocated, +	 * regardless of whether they have been initialized or not. To exclude +	 * unwritten extents, we need to check m_flags. +	 */ +	return err == blklen && (map.m_flags & EXT4_MAP_MAPPED); +} + +static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from) +{ +	struct inode *inode = file_inode(iocb->ki_filp); +	ssize_t ret; + +	ret = generic_write_checks(iocb, from); +	if (ret <= 0) +		return ret; +	/* +	 * If we have encountered a bitmap-format file, the size limit +	 * is smaller than s_maxbytes, which is for extent-mapped files. +	 */ +	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { +		struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); + +		if (iocb->ki_pos >= sbi->s_bitmap_maxbytes) +			return -EFBIG; +		iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos); +	} +	return iov_iter_count(from); +} + +#ifdef CONFIG_FS_DAX +static ssize_t +ext4_dax_write_iter(struct kiocb *iocb, struct iov_iter *from) +{ +	struct inode *inode = file_inode(iocb->ki_filp); +	ssize_t ret; +	bool overwrite = false; + +	inode_lock(inode); +	ret = ext4_write_checks(iocb, from); +	if (ret <= 0) +		goto out; +	ret = file_remove_privs(iocb->ki_filp); +	if (ret) +		goto out; +	ret = file_update_time(iocb->ki_filp); +	if (ret) +		goto out; + +	if (ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from))) { +		overwrite = true; +		downgrade_write(&inode->i_rwsem); +	} +	ret = dax_iomap_rw(iocb, from, &ext4_iomap_ops); +out: +	if (!overwrite) +		inode_unlock(inode); +	else +		inode_unlock_shared(inode); +	if (ret > 0) +		ret = generic_write_sync(iocb, ret); +	return ret; +} +#endif +  static ssize_t  ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)  { @@ -97,8 +213,13 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)  	int overwrite = 0;  	ssize_t ret; +#ifdef CONFIG_FS_DAX +	if (IS_DAX(inode)) +		return ext4_dax_write_iter(iocb, from); +#endif +  	inode_lock(inode); -	ret = generic_write_checks(iocb, from); +	ret = ext4_write_checks(iocb, from);  	if (ret <= 0)  		goto out; @@ -114,53 +235,11 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)  		ext4_unwritten_wait(inode);  	} -	/* -	 * If we have encountered a bitmap-format file, the size limit -	 * is smaller than s_maxbytes, which is for extent-mapped files. -	 */ -	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) { -		struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); - -		if (iocb->ki_pos >= sbi->s_bitmap_maxbytes) { -			ret = -EFBIG; -			goto out; -		} -		iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos); -	} -  	iocb->private = &overwrite; -	if (o_direct) { -		size_t length = iov_iter_count(from); -		loff_t pos = iocb->ki_pos; - -		/* check whether we do a DIO overwrite or not */ -		if (ext4_should_dioread_nolock(inode) && !unaligned_aio && -		    pos + length <= i_size_read(inode)) { -			struct ext4_map_blocks map; -			unsigned int blkbits = inode->i_blkbits; -			int err, len; - -			map.m_lblk = pos >> blkbits; -			map.m_len = EXT4_MAX_BLOCKS(length, pos, blkbits); -			len = map.m_len; - -			err = ext4_map_blocks(NULL, inode, &map, 0); -			/* -			 * 'err==len' means that all of blocks has -			 * been preallocated no matter they are -			 * initialized or not.  For excluding -			 * unwritten extents, we need to check -			 * m_flags.  There are two conditions that -			 * indicate for initialized extents.  1) If we -			 * hit extent cache, EXT4_MAP_MAPPED flag is -			 * returned; 2) If we do a real lookup, -			 * non-flags are returned.  So we should check -			 * these two conditions. -			 */ -			if (err == len && (map.m_flags & EXT4_MAP_MAPPED)) -				overwrite = 1; -		} -	} +	/* Check whether we do a DIO overwrite or not */ +	if (o_direct && ext4_should_dioread_nolock(inode) && !unaligned_aio && +	    ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from))) +		overwrite = 1;  	ret = __generic_file_write_iter(iocb, from);  	inode_unlock(inode); @@ -179,7 +258,6 @@ out:  static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)  {  	int result; -	handle_t *handle = NULL;  	struct inode *inode = file_inode(vma->vm_file);  	struct super_block *sb = inode->i_sb;  	bool write = vmf->flags & FAULT_FLAG_WRITE; @@ -187,24 +265,12 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)  	if (write) {  		sb_start_pagefault(sb);  		file_update_time(vma->vm_file); -		down_read(&EXT4_I(inode)->i_mmap_sem); -		handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, -						EXT4_DATA_TRANS_BLOCKS(sb)); -	} else -		down_read(&EXT4_I(inode)->i_mmap_sem); - -	if (IS_ERR(handle)) -		result = VM_FAULT_SIGBUS; -	else -		result = dax_fault(vma, vmf, ext4_dax_get_block); - -	if (write) { -		if (!IS_ERR(handle)) -			ext4_journal_stop(handle); -		up_read(&EXT4_I(inode)->i_mmap_sem); +	} +	down_read(&EXT4_I(inode)->i_mmap_sem); +	result = dax_iomap_fault(vma, vmf, &ext4_iomap_ops); +	up_read(&EXT4_I(inode)->i_mmap_sem); +	if (write)  		sb_end_pagefault(sb); -	} else -		up_read(&EXT4_I(inode)->i_mmap_sem);  	return result;  } @@ -213,7 +279,6 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,  						pmd_t *pmd, unsigned int flags)  {  	int result; -	handle_t *handle = NULL;  	struct inode *inode = file_inode(vma->vm_file);  	struct super_block *sb = inode->i_sb;  	bool write = flags & FAULT_FLAG_WRITE; @@ -221,26 +286,13 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,  	if (write) {  		sb_start_pagefault(sb);  		file_update_time(vma->vm_file); -		down_read(&EXT4_I(inode)->i_mmap_sem); -		handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE, -				ext4_chunk_trans_blocks(inode, -							PMD_SIZE / PAGE_SIZE)); -	} else -		down_read(&EXT4_I(inode)->i_mmap_sem); - -	if (IS_ERR(handle)) -		result = VM_FAULT_SIGBUS; -	else -		result = dax_pmd_fault(vma, addr, pmd, flags, -					 ext4_dax_get_block); - -	if (write) { -		if (!IS_ERR(handle)) -			ext4_journal_stop(handle); -		up_read(&EXT4_I(inode)->i_mmap_sem); +	} +	down_read(&EXT4_I(inode)->i_mmap_sem); +	result = dax_iomap_pmd_fault(vma, addr, pmd, flags, +				     &ext4_iomap_ops); +	up_read(&EXT4_I(inode)->i_mmap_sem); +	if (write)  		sb_end_pagefault(sb); -	} else -		up_read(&EXT4_I(inode)->i_mmap_sem);  	return result;  } @@ -687,7 +739,7 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int whence)  const struct file_operations ext4_file_operations = {  	.llseek		= ext4_llseek, -	.read_iter	= generic_file_read_iter, +	.read_iter	= ext4_file_read_iter,  	.write_iter	= ext4_file_write_iter,  	.unlocked_ioctl = ext4_ioctl,  #ifdef CONFIG_COMPAT diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 170421edfdfe..e57e8d90ea54 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -1039,7 +1039,7 @@ got:  	/* This is the optimal IO size (for stat), not the fs block size */  	inode->i_blocks = 0;  	inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime = -						       ext4_current_time(inode); +						       current_time(inode);  	memset(ei->i_data, 0, sizeof(ei->i_data));  	ei->i_dir_start_lookup = 0; @@ -1115,8 +1115,7 @@ got:  	}  	if (encrypt) { -		/* give pointer to avoid set_context with journal ops. */ -		err = fscrypt_inherit_context(dir, inode, &encrypt, true); +		err = fscrypt_inherit_context(dir, inode, handle, true);  		if (err)  			goto fail_free_drop;  	} diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c index f74d5ee2cdec..437df6a1a841 100644 --- a/fs/ext4/inline.c +++ b/fs/ext4/inline.c @@ -299,6 +299,11 @@ static int ext4_create_inline_data(handle_t *handle,  	EXT4_I(inode)->i_inline_size = len + EXT4_MIN_INLINE_DATA_SIZE;  	ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);  	ext4_set_inode_flag(inode, EXT4_INODE_INLINE_DATA); +	/* +	 * Propagate changes to inode->i_flags as well - e.g. S_DAX may +	 * get cleared +	 */ +	ext4_set_inode_flags(inode);  	get_bh(is.iloc.bh);  	error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); @@ -336,8 +341,10 @@ static int ext4_update_inline_data(handle_t *handle, struct inode *inode,  	len -= EXT4_MIN_INLINE_DATA_SIZE;  	value = kzalloc(len, GFP_NOFS); -	if (!value) +	if (!value) { +		error = -ENOMEM;  		goto out; +	}  	error = ext4_xattr_ibody_get(inode, i.name_index, i.name,  				     value, len); @@ -442,6 +449,11 @@ static int ext4_destroy_inline_data_nolock(handle_t *handle,  		}  	}  	ext4_clear_inode_flag(inode, EXT4_INODE_INLINE_DATA); +	/* +	 * Propagate changes to inode->i_flags as well - e.g. S_DAX may +	 * get set. +	 */ +	ext4_set_inode_flags(inode);  	get_bh(is.iloc.bh);  	error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); @@ -1028,7 +1040,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle,  	 * happen is that the times are slightly out of date  	 * and/or different from the directory change time.  	 */ -	dir->i_mtime = dir->i_ctime = ext4_current_time(dir); +	dir->i_mtime = dir->i_ctime = current_time(dir);  	ext4_update_dx_flag(dir);  	dir->i_version++;  	ext4_mark_inode_dirty(handle, dir); @@ -1971,7 +1983,7 @@ out:  	if (inode->i_nlink)  		ext4_orphan_del(handle, inode); -	inode->i_mtime = inode->i_ctime = ext4_current_time(inode); +	inode->i_mtime = inode->i_ctime = current_time(inode);  	ext4_mark_inode_dirty(handle, inode);  	if (IS_SYNC(inode))  		ext4_handle_sync(handle); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9c064727ed62..88d57af1b516 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -37,6 +37,7 @@  #include <linux/printk.h>  #include <linux/slab.h>  #include <linux/bitops.h> +#include <linux/iomap.h>  #include "ext4_jbd2.h"  #include "xattr.h" @@ -71,10 +72,9 @@ static __u32 ext4_inode_csum(struct inode *inode, struct ext4_inode *raw,  			csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum,  					   csum_size);  			offset += csum_size; -			csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset, -					   EXT4_INODE_SIZE(inode->i_sb) - -					   offset);  		} +		csum = ext4_chksum(sbi, csum, (__u8 *)raw + offset, +				   EXT4_INODE_SIZE(inode->i_sb) - offset);  	}  	return csum; @@ -261,8 +261,15 @@ void ext4_evict_inode(struct inode *inode)  			     "couldn't mark inode dirty (err %d)", err);  		goto stop_handle;  	} -	if (inode->i_blocks) -		ext4_truncate(inode); +	if (inode->i_blocks) { +		err = ext4_truncate(inode); +		if (err) { +			ext4_error(inode->i_sb, +				   "couldn't truncate inode %lu (err %d)", +				   inode->i_ino, err); +			goto stop_handle; +		} +	}  	/*  	 * ext4_ext_truncate() doesn't reserve any slop when it @@ -654,12 +661,8 @@ found:  		if (flags & EXT4_GET_BLOCKS_ZERO &&  		    map->m_flags & EXT4_MAP_MAPPED &&  		    map->m_flags & EXT4_MAP_NEW) { -			ext4_lblk_t i; - -			for (i = 0; i < map->m_len; i++) { -				unmap_underlying_metadata(inode->i_sb->s_bdev, -							  map->m_pblk + i); -			} +			clean_bdev_aliases(inode->i_sb->s_bdev, map->m_pblk, +					   map->m_len);  			ret = ext4_issue_zeroout(inode, map->m_lblk,  						 map->m_pblk, map->m_len);  			if (ret) { @@ -767,6 +770,9 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,  		ext4_update_bh_state(bh, map.m_flags);  		bh->b_size = inode->i_sb->s_blocksize * map.m_len;  		ret = 0; +	} else if (ret == 0) { +		/* hole case, need to fill in bh->b_size */ +		bh->b_size = inode->i_sb->s_blocksize * map.m_len;  	}  	return ret;  } @@ -1127,8 +1133,7 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,  			if (err)  				break;  			if (buffer_new(bh)) { -				unmap_underlying_metadata(bh->b_bdev, -							  bh->b_blocknr); +				clean_bdev_bh_alias(bh);  				if (PageUptodate(page)) {  					clear_buffer_new(bh);  					set_buffer_uptodate(bh); @@ -1166,7 +1171,8 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,  	if (unlikely(err))  		page_zero_new_buffers(page, from, to);  	else if (decrypt) -		err = fscrypt_decrypt_page(page); +		err = fscrypt_decrypt_page(page->mapping->host, page, +				PAGE_SIZE, 0, page->index);  	return err;  }  #endif @@ -2360,11 +2366,8 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)  	BUG_ON(map->m_len == 0);  	if (map->m_flags & EXT4_MAP_NEW) { -		struct block_device *bdev = inode->i_sb->s_bdev; -		int i; - -		for (i = 0; i < map->m_len; i++) -			unmap_underlying_metadata(bdev, map->m_pblk + i); +		clean_bdev_aliases(inode->i_sb->s_bdev, map->m_pblk, +				   map->m_len);  	}  	return 0;  } @@ -2891,7 +2894,8 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,  	index = pos >> PAGE_SHIFT; -	if (ext4_nonda_switch(inode->i_sb)) { +	if (ext4_nonda_switch(inode->i_sb) || +	    S_ISLNK(inode->i_mode)) {  		*fsdata = (void *)FALL_BACK_TO_NONDELALLOC;  		return ext4_write_begin(file, mapping, pos,  					len, flags, pagep, fsdata); @@ -3268,53 +3272,159 @@ static int ext4_releasepage(struct page *page, gfp_t wait)  }  #ifdef CONFIG_FS_DAX -/* - * Get block function for DAX IO and mmap faults. It takes care of converting - * unwritten extents to written ones and initializes new / converted blocks - * to zeros. - */ -int ext4_dax_get_block(struct inode *inode, sector_t iblock, -		       struct buffer_head *bh_result, int create) +static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length, +			    unsigned flags, struct iomap *iomap)  { +	unsigned int blkbits = inode->i_blkbits; +	unsigned long first_block = offset >> blkbits; +	unsigned long last_block = (offset + length - 1) >> blkbits; +	struct ext4_map_blocks map;  	int ret; -	ext4_debug("inode %lu, create flag %d\n", inode->i_ino, create); -	if (!create) -		return _ext4_get_block(inode, iblock, bh_result, 0); +	if (WARN_ON_ONCE(ext4_has_inline_data(inode))) +		return -ERANGE; -	ret = ext4_get_block_trans(inode, iblock, bh_result, -				   EXT4_GET_BLOCKS_PRE_IO | -				   EXT4_GET_BLOCKS_CREATE_ZERO); -	if (ret < 0) -		return ret; +	map.m_lblk = first_block; +	map.m_len = last_block - first_block + 1; -	if (buffer_unwritten(bh_result)) { +	if (!(flags & IOMAP_WRITE)) { +		ret = ext4_map_blocks(NULL, inode, &map, 0); +	} else { +		int dio_credits; +		handle_t *handle; +		int retries = 0; + +		/* Trim mapping request to maximum we can map at once for DIO */ +		if (map.m_len > DIO_MAX_BLOCKS) +			map.m_len = DIO_MAX_BLOCKS; +		dio_credits = ext4_chunk_trans_blocks(inode, map.m_len); +retry:  		/* -		 * We are protected by i_mmap_sem or i_mutex so we know block -		 * cannot go away from under us even though we dropped -		 * i_data_sem. Convert extent to written and write zeros there. +		 * Either we allocate blocks and then we don't get unwritten +		 * extent so we have reserved enough credits, or the blocks +		 * are already allocated and unwritten and in that case +		 * extent conversion fits in the credits as well.  		 */ -		ret = ext4_get_block_trans(inode, iblock, bh_result, -					   EXT4_GET_BLOCKS_CONVERT | -					   EXT4_GET_BLOCKS_CREATE_ZERO); -		if (ret < 0) +		handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, +					    dio_credits); +		if (IS_ERR(handle)) +			return PTR_ERR(handle); + +		ret = ext4_map_blocks(handle, inode, &map, +				      EXT4_GET_BLOCKS_CREATE_ZERO); +		if (ret < 0) { +			ext4_journal_stop(handle); +			if (ret == -ENOSPC && +			    ext4_should_retry_alloc(inode->i_sb, &retries)) +				goto retry;  			return ret; +		} + +		/* +		 * If we added blocks beyond i_size, we need to make sure they +		 * will get truncated if we crash before updating i_size in +		 * ext4_iomap_end(). For faults we don't need to do that (and +		 * even cannot because for orphan list operations inode_lock is +		 * required) - if we happen to instantiate block beyond i_size, +		 * it is because we race with truncate which has already added +		 * the inode to the orphan list. +		 */ +		if (!(flags & IOMAP_FAULT) && first_block + map.m_len > +		    (i_size_read(inode) + (1 << blkbits) - 1) >> blkbits) { +			int err; + +			err = ext4_orphan_add(handle, inode); +			if (err < 0) { +				ext4_journal_stop(handle); +				return err; +			} +		} +		ext4_journal_stop(handle);  	} -	/* -	 * At least for now we have to clear BH_New so that DAX code -	 * doesn't attempt to zero blocks again in a racy way. -	 */ -	clear_buffer_new(bh_result); + +	iomap->flags = 0; +	iomap->bdev = inode->i_sb->s_bdev; +	iomap->offset = first_block << blkbits; + +	if (ret == 0) { +		iomap->type = IOMAP_HOLE; +		iomap->blkno = IOMAP_NULL_BLOCK; +		iomap->length = (u64)map.m_len << blkbits; +	} else { +		if (map.m_flags & EXT4_MAP_MAPPED) { +			iomap->type = IOMAP_MAPPED; +		} else if (map.m_flags & EXT4_MAP_UNWRITTEN) { +			iomap->type = IOMAP_UNWRITTEN; +		} else { +			WARN_ON_ONCE(1); +			return -EIO; +		} +		iomap->blkno = (sector_t)map.m_pblk << (blkbits - 9); +		iomap->length = (u64)map.m_len << blkbits; +	} + +	if (map.m_flags & EXT4_MAP_NEW) +		iomap->flags |= IOMAP_F_NEW;  	return 0;  } -#else -/* Just define empty function, it will never get called. */ -int ext4_dax_get_block(struct inode *inode, sector_t iblock, -		       struct buffer_head *bh_result, int create) + +static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length, +			  ssize_t written, unsigned flags, struct iomap *iomap)  { -	BUG(); -	return 0; +	int ret = 0; +	handle_t *handle; +	int blkbits = inode->i_blkbits; +	bool truncate = false; + +	if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT)) +		return 0; + +	handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); +	if (IS_ERR(handle)) { +		ret = PTR_ERR(handle); +		goto orphan_del; +	} +	if (ext4_update_inode_size(inode, offset + written)) +		ext4_mark_inode_dirty(handle, inode); +	/* +	 * We may need to truncate allocated but not written blocks beyond EOF. +	 */ +	if (iomap->offset + iomap->length >  +	    ALIGN(inode->i_size, 1 << blkbits)) { +		ext4_lblk_t written_blk, end_blk; + +		written_blk = (offset + written) >> blkbits; +		end_blk = (offset + length) >> blkbits; +		if (written_blk < end_blk && ext4_can_truncate(inode)) +			truncate = true; +	} +	/* +	 * Remove inode from orphan list if we were extending a inode and +	 * everything went fine. +	 */ +	if (!truncate && inode->i_nlink && +	    !list_empty(&EXT4_I(inode)->i_orphan)) +		ext4_orphan_del(handle, inode); +	ext4_journal_stop(handle); +	if (truncate) { +		ext4_truncate_failed_write(inode); +orphan_del: +		/* +		 * If truncate failed early the inode might still be on the +		 * orphan list; we need to make sure the inode is removed from +		 * the orphan list in that case. +		 */ +		if (inode->i_nlink) +			ext4_orphan_del(NULL, inode); +	} +	return ret;  } + +struct iomap_ops ext4_iomap_ops = { +	.iomap_begin		= ext4_iomap_begin, +	.iomap_end		= ext4_iomap_end, +}; +  #endif  static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset, @@ -3436,19 +3546,7 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)  	iocb->private = NULL;  	if (overwrite)  		get_block_func = ext4_dio_get_block_overwrite; -	else if (IS_DAX(inode)) { -		/* -		 * We can avoid zeroing for aligned DAX writes beyond EOF. Other -		 * writes need zeroing either because they can race with page -		 * faults or because they use partial blocks. -		 */ -		if (round_down(offset, 1<<inode->i_blkbits) >= inode->i_size && -		    ext4_aligned_io(inode, offset, count)) -			get_block_func = ext4_dio_get_block; -		else -			get_block_func = ext4_dax_get_block; -		dio_flags = DIO_LOCKING; -	} else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) || +	else if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) ||  		   round_down(offset, 1 << inode->i_blkbits) >= inode->i_size) {  		get_block_func = ext4_dio_get_block;  		dio_flags = DIO_LOCKING | DIO_SKIP_HOLES; @@ -3462,14 +3560,9 @@ static ssize_t ext4_direct_IO_write(struct kiocb *iocb, struct iov_iter *iter)  #ifdef CONFIG_EXT4_FS_ENCRYPTION  	BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode));  #endif -	if (IS_DAX(inode)) { -		ret = dax_do_io(iocb, inode, iter, get_block_func, -				ext4_end_io_dio, dio_flags); -	} else -		ret = __blockdev_direct_IO(iocb, inode, -					   inode->i_sb->s_bdev, iter, -					   get_block_func, -					   ext4_end_io_dio, NULL, dio_flags); +	ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter, +				   get_block_func, ext4_end_io_dio, NULL, +				   dio_flags);  	if (ret > 0 && !overwrite && ext4_test_inode_state(inode,  						EXT4_STATE_DIO_UNWRITTEN)) { @@ -3538,6 +3631,7 @@ static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)  {  	struct address_space *mapping = iocb->ki_filp->f_mapping;  	struct inode *inode = mapping->host; +	size_t count = iov_iter_count(iter);  	ssize_t ret;  	/* @@ -3546,19 +3640,12 @@ static ssize_t ext4_direct_IO_read(struct kiocb *iocb, struct iov_iter *iter)  	 * we are protected against page writeback as well.  	 */  	inode_lock_shared(inode); -	if (IS_DAX(inode)) { -		ret = dax_do_io(iocb, inode, iter, ext4_dio_get_block, NULL, 0); -	} else { -		size_t count = iov_iter_count(iter); - -		ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, -						   iocb->ki_pos + count); -		if (ret) -			goto out_unlock; -		ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, -					   iter, ext4_dio_get_block, -					   NULL, NULL, 0); -	} +	ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, +					   iocb->ki_pos + count); +	if (ret) +		goto out_unlock; +	ret = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, +				   iter, ext4_dio_get_block, NULL, NULL, 0);  out_unlock:  	inode_unlock_shared(inode);  	return ret; @@ -3587,6 +3674,10 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter)  	if (ext4_has_inline_data(inode))  		return 0; +	/* DAX uses iomap path now */ +	if (WARN_ON_ONCE(IS_DAX(inode))) +		return 0; +  	trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));  	if (iov_iter_rw(iter) == READ)  		ret = ext4_direct_IO_read(iocb, iter); @@ -3615,6 +3706,13 @@ static int ext4_journalled_set_page_dirty(struct page *page)  	return __set_page_dirty_nobuffers(page);  } +static int ext4_set_page_dirty(struct page *page) +{ +	WARN_ON_ONCE(!PageLocked(page) && !PageDirty(page)); +	WARN_ON_ONCE(!page_has_buffers(page)); +	return __set_page_dirty_buffers(page); +} +  static const struct address_space_operations ext4_aops = {  	.readpage		= ext4_readpage,  	.readpages		= ext4_readpages, @@ -3622,6 +3720,7 @@ static const struct address_space_operations ext4_aops = {  	.writepages		= ext4_writepages,  	.write_begin		= ext4_write_begin,  	.write_end		= ext4_write_end, +	.set_page_dirty		= ext4_set_page_dirty,  	.bmap			= ext4_bmap,  	.invalidatepage		= ext4_invalidatepage,  	.releasepage		= ext4_releasepage, @@ -3654,6 +3753,7 @@ static const struct address_space_operations ext4_da_aops = {  	.writepages		= ext4_writepages,  	.write_begin		= ext4_da_write_begin,  	.write_end		= ext4_da_write_end, +	.set_page_dirty		= ext4_set_page_dirty,  	.bmap			= ext4_bmap,  	.invalidatepage		= ext4_da_invalidatepage,  	.releasepage		= ext4_releasepage, @@ -3743,7 +3843,8 @@ static int __ext4_block_zero_page_range(handle_t *handle,  			/* We expect the key to be set. */  			BUG_ON(!fscrypt_has_encryption_key(inode));  			BUG_ON(blocksize != PAGE_SIZE); -			WARN_ON_ONCE(fscrypt_decrypt_page(page)); +			WARN_ON_ONCE(fscrypt_decrypt_page(page->mapping->host, +						page, PAGE_SIZE, 0, page->index));  		}  	}  	if (ext4_should_journal_data(inode)) { @@ -3792,8 +3893,10 @@ static int ext4_block_zero_page_range(handle_t *handle,  	if (length > max || length < 0)  		length = max; -	if (IS_DAX(inode)) -		return dax_zero_page_range(inode, from, length, ext4_get_block); +	if (IS_DAX(inode)) { +		return iomap_zero_range(inode, from, length, NULL, +					&ext4_iomap_ops); +	}  	return __ext4_block_zero_page_range(handle, mapping, from, length);  } @@ -4026,7 +4129,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)  	if (IS_SYNC(inode))  		ext4_handle_sync(handle); -	inode->i_mtime = inode->i_ctime = ext4_current_time(inode); +	inode->i_mtime = inode->i_ctime = current_time(inode);  	ext4_mark_inode_dirty(handle, inode);  out_stop:  	ext4_journal_stop(handle); @@ -4091,10 +4194,11 @@ int ext4_inode_attach_jinode(struct inode *inode)   * that's fine - as long as they are linked from the inode, the post-crash   * ext4_truncate() run will find them and release them.   */ -void ext4_truncate(struct inode *inode) +int ext4_truncate(struct inode *inode)  {  	struct ext4_inode_info *ei = EXT4_I(inode);  	unsigned int credits; +	int err = 0;  	handle_t *handle;  	struct address_space *mapping = inode->i_mapping; @@ -4108,7 +4212,7 @@ void ext4_truncate(struct inode *inode)  	trace_ext4_truncate_enter(inode);  	if (!ext4_can_truncate(inode)) -		return; +		return 0;  	ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS); @@ -4120,13 +4224,13 @@ void ext4_truncate(struct inode *inode)  		ext4_inline_data_truncate(inode, &has_inline);  		if (has_inline) -			return; +			return 0;  	}  	/* If we zero-out tail of the page, we have to create jinode for jbd2 */  	if (inode->i_size & (inode->i_sb->s_blocksize - 1)) {  		if (ext4_inode_attach_jinode(inode) < 0) -			return; +			return 0;  	}  	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) @@ -4135,10 +4239,8 @@ void ext4_truncate(struct inode *inode)  		credits = ext4_blocks_for_truncate(inode);  	handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits); -	if (IS_ERR(handle)) { -		ext4_std_error(inode->i_sb, PTR_ERR(handle)); -		return; -	} +	if (IS_ERR(handle)) +		return PTR_ERR(handle);  	if (inode->i_size & (inode->i_sb->s_blocksize - 1))  		ext4_block_truncate_page(handle, mapping, inode->i_size); @@ -4152,7 +4254,8 @@ void ext4_truncate(struct inode *inode)  	 * Implication: the file must always be in a sane, consistent  	 * truncatable state while each transaction commits.  	 */ -	if (ext4_orphan_add(handle, inode)) +	err = ext4_orphan_add(handle, inode); +	if (err)  		goto out_stop;  	down_write(&EXT4_I(inode)->i_data_sem); @@ -4160,11 +4263,13 @@ void ext4_truncate(struct inode *inode)  	ext4_discard_preallocations(inode);  	if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) -		ext4_ext_truncate(handle, inode); +		err = ext4_ext_truncate(handle, inode);  	else  		ext4_ind_truncate(handle, inode);  	up_write(&ei->i_data_sem); +	if (err) +		goto out_stop;  	if (IS_SYNC(inode))  		ext4_handle_sync(handle); @@ -4180,11 +4285,12 @@ out_stop:  	if (inode->i_nlink)  		ext4_orphan_del(handle, inode); -	inode->i_mtime = inode->i_ctime = ext4_current_time(inode); +	inode->i_mtime = inode->i_ctime = current_time(inode);  	ext4_mark_inode_dirty(handle, inode);  	ext4_journal_stop(handle);  	trace_ext4_truncate_exit(inode); +	return err;  }  /* @@ -4352,7 +4458,9 @@ void ext4_set_inode_flags(struct inode *inode)  		new_fl |= S_NOATIME;  	if (flags & EXT4_DIRSYNC_FL)  		new_fl |= S_DIRSYNC; -	if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode)) +	if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode) && +	    !ext4_should_journal_data(inode) && !ext4_has_inline_data(inode) && +	    !ext4_encrypted_inode(inode))  		new_fl |= S_DAX;  	inode_set_flags(inode, new_fl,  			S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX); @@ -4411,7 +4519,9 @@ static inline void ext4_iget_extra_inode(struct inode *inode,  {  	__le32 *magic = (void *)raw_inode +  			EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize; -	if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) { +	if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize + sizeof(__le32) <= +	    EXT4_INODE_SIZE(inode->i_sb) && +	    *magic == cpu_to_le32(EXT4_XATTR_MAGIC)) {  		ext4_set_inode_state(inode, EXT4_STATE_XATTR);  		ext4_find_inline_data_nolock(inode);  	} else @@ -4434,6 +4544,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)  	struct inode *inode;  	journal_t *journal = EXT4_SB(sb)->s_journal;  	long ret; +	loff_t size;  	int block;  	uid_t i_uid;  	gid_t i_gid; @@ -4456,10 +4567,12 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)  	if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {  		ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);  		if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > -		    EXT4_INODE_SIZE(inode->i_sb)) { -			EXT4_ERROR_INODE(inode, "bad extra_isize (%u != %u)", -				EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize, -				EXT4_INODE_SIZE(inode->i_sb)); +			EXT4_INODE_SIZE(inode->i_sb) || +		    (ei->i_extra_isize & 3)) { +			EXT4_ERROR_INODE(inode, +					 "bad extra_isize %u (inode size %u)", +					 ei->i_extra_isize, +					 EXT4_INODE_SIZE(inode->i_sb));  			ret = -EFSCORRUPTED;  			goto bad_inode;  		} @@ -4534,6 +4647,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)  		ei->i_file_acl |=  			((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;  	inode->i_size = ext4_isize(raw_inode); +	if ((size = i_size_read(inode)) < 0) { +		EXT4_ERROR_INODE(inode, "bad i_size value: %lld", size); +		ret = -EFSCORRUPTED; +		goto bad_inode; +	}  	ei->i_disksize = inode->i_size;  #ifdef CONFIG_QUOTA  	ei->i_reserved_quota = 0; @@ -4577,6 +4695,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)  	if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {  		if (ei->i_extra_isize == 0) {  			/* The extra space is currently unused. Use it. */ +			BUILD_BUG_ON(sizeof(struct ext4_inode) & 3);  			ei->i_extra_isize = sizeof(struct ext4_inode) -  					    EXT4_GOOD_OLD_INODE_SIZE;  		} else { @@ -5154,7 +5273,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)  			 * update c/mtime in shrink case below  			 */  			if (!shrink) { -				inode->i_mtime = ext4_current_time(inode); +				inode->i_mtime = current_time(inode);  				inode->i_ctime = inode->i_mtime;  			}  			down_write(&EXT4_I(inode)->i_data_sem); @@ -5199,12 +5318,15 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)  		 * in data=journal mode to make pages freeable.  		 */  		truncate_pagecache(inode, inode->i_size); -		if (shrink) -			ext4_truncate(inode); +		if (shrink) { +			rc = ext4_truncate(inode); +			if (rc) +				error = rc; +		}  		up_write(&EXT4_I(inode)->i_mmap_sem);  	} -	if (!rc) { +	if (!error) {  		setattr_copy(inode, attr);  		mark_inode_dirty(inode);  	} @@ -5216,7 +5338,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)  	if (orphan && inode->i_nlink)  		ext4_orphan_del(NULL, inode); -	if (!rc && (ia_valid & ATTR_MODE)) +	if (!error && (ia_valid & ATTR_MODE))  		rc = posix_acl_chmod(inode, inode->i_mode);  err_out: @@ -5455,18 +5577,20 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)  	err = ext4_reserve_inode_write(handle, inode, &iloc);  	if (err)  		return err; -	if (ext4_handle_valid(handle) && -	    EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && +	if (EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&  	    !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) {  		/* -		 * We need extra buffer credits since we may write into EA block +		 * In nojournal mode, we can immediately attempt to expand +		 * the inode.  When journaled, we first need to obtain extra +		 * buffer credits since we may write into the EA block  		 * with this same handle. If journal_extend fails, then it will  		 * only result in a minor loss of functionality for that inode.  		 * If this is felt to be critical, then e2fsck should be run to  		 * force a large enough s_min_extra_isize.  		 */ -		if ((jbd2_journal_extend(handle, -			     EXT4_DATA_TRANS_BLOCKS(inode->i_sb))) == 0) { +		if (!ext4_handle_valid(handle) || +		    jbd2_journal_extend(handle, +			     EXT4_DATA_TRANS_BLOCKS(inode->i_sb)) == 0) {  			ret = ext4_expand_extra_isize(inode,  						      sbi->s_want_extra_isize,  						      iloc, handle); @@ -5620,6 +5744,11 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)  		ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);  	}  	ext4_set_aops(inode); +	/* +	 * Update inode->i_flags after EXT4_INODE_JOURNAL_DATA was updated. +	 * E.g. S_DAX may get cleared / set. +	 */ +	ext4_set_inode_flags(inode);  	jbd2_journal_unlock_updates(journal);  	percpu_up_write(&sbi->s_journal_flag_rwsem); diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index bf5ae8ebbc97..d534399cf607 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c @@ -15,7 +15,7 @@  #include <linux/file.h>  #include <linux/quotaops.h>  #include <linux/uuid.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h>  #include "ext4_jbd2.h"  #include "ext4.h" @@ -153,7 +153,7 @@ static long swap_inode_boot_loader(struct super_block *sb,  	swap_inode_data(inode, inode_bl); -	inode->i_ctime = inode_bl->i_ctime = ext4_current_time(inode); +	inode->i_ctime = inode_bl->i_ctime = current_time(inode);  	spin_lock(&sbi->s_next_gen_lock);  	inode->i_generation = sbi->s_next_generation++; @@ -191,6 +191,7 @@ journal_err_out:  	return err;  } +#ifdef CONFIG_EXT4_FS_ENCRYPTION  static int uuid_is_zero(__u8 u[16])  {  	int	i; @@ -200,6 +201,7 @@ static int uuid_is_zero(__u8 u[16])  			return 0;  	return 1;  } +#endif  static int ext4_ioctl_setflags(struct inode *inode,  			       unsigned int flags) @@ -248,8 +250,11 @@ static int ext4_ioctl_setflags(struct inode *inode,  			err = -EOPNOTSUPP;  			goto flags_out;  		} -	} else if (oldflags & EXT4_EOFBLOCKS_FL) -		ext4_truncate(inode); +	} else if (oldflags & EXT4_EOFBLOCKS_FL) { +		err = ext4_truncate(inode); +		if (err) +			goto flags_out; +	}  	handle = ext4_journal_start(inode, EXT4_HT_INODE, 1);  	if (IS_ERR(handle)) { @@ -265,6 +270,9 @@ static int ext4_ioctl_setflags(struct inode *inode,  	for (i = 0, mask = 1; i < 32; i++, mask <<= 1) {  		if (!(mask & EXT4_FL_USER_MODIFIABLE))  			continue; +		/* These flags get special treatment later */ +		if (mask == EXT4_JOURNAL_DATA_FL || mask == EXT4_EXTENTS_FL) +			continue;  		if (mask & flags)  			ext4_set_inode_flag(inode, i);  		else @@ -272,7 +280,7 @@ static int ext4_ioctl_setflags(struct inode *inode,  	}  	ext4_set_inode_flags(inode); -	inode->i_ctime = ext4_current_time(inode); +	inode->i_ctime = current_time(inode);  	err = ext4_mark_iloc_dirty(handle, inode, &iloc);  flags_err: @@ -368,7 +376,7 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid)  	}  	EXT4_I(inode)->i_projid = kprojid; -	inode->i_ctime = ext4_current_time(inode); +	inode->i_ctime = current_time(inode);  out_dirty:  	rc = ext4_mark_iloc_dirty(handle, inode, &iloc);  	if (!err) @@ -409,6 +417,10 @@ static inline __u32 ext4_iflags_to_xflags(unsigned long iflags)  	return xflags;  } +#define EXT4_SUPPORTED_FS_XFLAGS (FS_XFLAG_SYNC | FS_XFLAG_IMMUTABLE | \ +				  FS_XFLAG_APPEND | FS_XFLAG_NODUMP | \ +				  FS_XFLAG_NOATIME | FS_XFLAG_PROJINHERIT) +  /* Transfer xflags flags to internal */  static inline unsigned long ext4_xflags_to_iflags(__u32 xflags)  { @@ -453,12 +465,22 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)  		if (get_user(flags, (int __user *) arg))  			return -EFAULT; +		if (flags & ~EXT4_FL_USER_VISIBLE) +			return -EOPNOTSUPP; +		/* +		 * chattr(1) grabs flags via GETFLAGS, modifies the result and +		 * passes that to SETFLAGS. So we cannot easily make SETFLAGS +		 * more restrictive than just silently masking off visible but +		 * not settable flags as we always did. +		 */ +		flags &= EXT4_FL_USER_MODIFIABLE; +		if (ext4_mask_flags(inode->i_mode, flags) != flags) +			return -EOPNOTSUPP; +  		err = mnt_want_write_file(filp);  		if (err)  			return err; -		flags = ext4_mask_flags(inode->i_mode, flags); -  		inode_lock(inode);  		err = ext4_ioctl_setflags(inode, flags);  		inode_unlock(inode); @@ -500,7 +522,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)  		}  		err = ext4_reserve_inode_write(handle, inode, &iloc);  		if (err == 0) { -			inode->i_ctime = ext4_current_time(inode); +			inode->i_ctime = current_time(inode);  			inode->i_generation = generation;  			err = ext4_mark_iloc_dirty(handle, inode, &iloc);  		} @@ -765,28 +787,19 @@ resizefs_out:  	}  	case EXT4_IOC_PRECACHE_EXTENTS:  		return ext4_ext_precache(inode); -	case EXT4_IOC_SET_ENCRYPTION_POLICY: { -#ifdef CONFIG_EXT4_FS_ENCRYPTION -		struct fscrypt_policy policy; +	case EXT4_IOC_SET_ENCRYPTION_POLICY:  		if (!ext4_has_feature_encrypt(sb))  			return -EOPNOTSUPP; +		return fscrypt_ioctl_set_policy(filp, (const void __user *)arg); -		if (copy_from_user(&policy, -				   (struct fscrypt_policy __user *)arg, -				   sizeof(policy))) -			return -EFAULT; -		return fscrypt_process_policy(filp, &policy); -#else -		return -EOPNOTSUPP; -#endif -	}  	case EXT4_IOC_GET_ENCRYPTION_PWSALT: { +#ifdef CONFIG_EXT4_FS_ENCRYPTION  		int err, err2;  		struct ext4_sb_info *sbi = EXT4_SB(sb);  		handle_t *handle; -		if (!ext4_sb_has_crypto(sb)) +		if (!ext4_has_feature_encrypt(sb))  			return -EOPNOTSUPP;  		if (uuid_is_zero(sbi->s_es->s_encrypt_pw_salt)) {  			err = mnt_want_write_file(filp); @@ -816,24 +829,13 @@ resizefs_out:  				 sbi->s_es->s_encrypt_pw_salt, 16))  			return -EFAULT;  		return 0; -	} -	case EXT4_IOC_GET_ENCRYPTION_POLICY: { -#ifdef CONFIG_EXT4_FS_ENCRYPTION -		struct fscrypt_policy policy; -		int err = 0; - -		if (!ext4_encrypted_inode(inode)) -			return -ENOENT; -		err = fscrypt_get_policy(inode, &policy); -		if (err) -			return err; -		if (copy_to_user((void __user *)arg, &policy, sizeof(policy))) -			return -EFAULT; -		return 0;  #else  		return -EOPNOTSUPP;  #endif  	} +	case EXT4_IOC_GET_ENCRYPTION_POLICY: +		return fscrypt_ioctl_get_policy(filp, (void __user *)arg); +  	case EXT4_IOC_FSGETXATTR:  	{  		struct fsxattr fa; @@ -865,13 +867,17 @@ resizefs_out:  		if (!inode_owner_or_capable(inode))  			return -EACCES; +		if (fa.fsx_xflags & ~EXT4_SUPPORTED_FS_XFLAGS) +			return -EOPNOTSUPP; + +		flags = ext4_xflags_to_iflags(fa.fsx_xflags); +		if (ext4_mask_flags(inode->i_mode, flags) != flags) +			return -EOPNOTSUPP; +  		err = mnt_want_write_file(filp);  		if (err)  			return err; -		flags = ext4_xflags_to_iflags(fa.fsx_xflags); -		flags = ext4_mask_flags(inode->i_mode, flags); -  		inode_lock(inode);  		flags = (ei->i_flags & ~EXT4_FL_XFLAG_VISIBLE) |  			 (flags & EXT4_FL_XFLAG_VISIBLE); diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index f418f55c2bbe..7ae43c59bc79 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -669,7 +669,7 @@ static void ext4_mb_mark_free_simple(struct super_block *sb,  	ext4_grpblk_t min;  	ext4_grpblk_t max;  	ext4_grpblk_t chunk; -	unsigned short border; +	unsigned int border;  	BUG_ON(len > EXT4_CLUSTERS_PER_GROUP(sb)); @@ -2287,7 +2287,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)  	struct ext4_group_info *grinfo;  	struct sg {  		struct ext4_group_info info; -		ext4_grpblk_t counters[16]; +		ext4_grpblk_t counters[EXT4_MAX_BLOCK_LOG_SIZE + 2];  	} sg;  	group--; diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index d89754ef1aab..eb9835638680 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -35,7 +35,7 @@ static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)  }  /* - * Write the MMP block using WRITE_SYNC to try to get the block on-disk + * Write the MMP block using REQ_SYNC to try to get the block on-disk   * faster.   */  static int write_mmp_block(struct super_block *sb, struct buffer_head *bh) @@ -52,7 +52,7 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)  	lock_buffer(bh);  	bh->b_end_io = end_buffer_write_sync;  	get_bh(bh); -	submit_bh(REQ_OP_WRITE, WRITE_SYNC | REQ_META | REQ_PRIO, bh); +	submit_bh(REQ_OP_WRITE, REQ_SYNC | REQ_META | REQ_PRIO, bh);  	wait_on_buffer(bh);  	sb_end_write(sb);  	if (unlikely(!buffer_uptodate(bh))) @@ -88,7 +88,7 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,  	get_bh(*bh);  	lock_buffer(*bh);  	(*bh)->b_end_io = end_buffer_read_sync; -	submit_bh(REQ_OP_READ, READ_SYNC | REQ_META | REQ_PRIO, *bh); +	submit_bh(REQ_OP_READ, REQ_META | REQ_PRIO, *bh);  	wait_on_buffer(*bh);  	if (!buffer_uptodate(*bh)) {  		ret = -EIO; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 104f8bfba718..eadba919f26b 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1941,7 +1941,7 @@ static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,  	 * happen is that the times are slightly out of date  	 * and/or different from the directory change time.  	 */ -	dir->i_mtime = dir->i_ctime = ext4_current_time(dir); +	dir->i_mtime = dir->i_ctime = current_time(dir);  	ext4_update_dx_flag(dir);  	dir->i_version++;  	ext4_mark_inode_dirty(handle, dir); @@ -2987,7 +2987,7 @@ static int ext4_rmdir(struct inode *dir, struct dentry *dentry)  	 * recovery. */  	inode->i_size = 0;  	ext4_orphan_add(handle, inode); -	inode->i_ctime = dir->i_ctime = dir->i_mtime = ext4_current_time(inode); +	inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);  	ext4_mark_inode_dirty(handle, inode);  	ext4_dec_count(handle, dir);  	ext4_update_dx_flag(dir); @@ -3050,13 +3050,13 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry)  	retval = ext4_delete_entry(handle, dir, de, bh);  	if (retval)  		goto end_unlink; -	dir->i_ctime = dir->i_mtime = ext4_current_time(dir); +	dir->i_ctime = dir->i_mtime = current_time(dir);  	ext4_update_dx_flag(dir);  	ext4_mark_inode_dirty(handle, dir);  	drop_nlink(inode);  	if (!inode->i_nlink)  		ext4_orphan_add(handle, inode); -	inode->i_ctime = ext4_current_time(inode); +	inode->i_ctime = current_time(inode);  	ext4_mark_inode_dirty(handle, inode);  end_unlink: @@ -3254,7 +3254,7 @@ retry:  	if (IS_DIRSYNC(dir))  		ext4_handle_sync(handle); -	inode->i_ctime = ext4_current_time(inode); +	inode->i_ctime = current_time(inode);  	ext4_inc_count(handle, inode);  	ihold(inode); @@ -3381,7 +3381,7 @@ static int ext4_setent(handle_t *handle, struct ext4_renament *ent,  		ent->de->file_type = file_type;  	ent->dir->i_version++;  	ent->dir->i_ctime = ent->dir->i_mtime = -		ext4_current_time(ent->dir); +		current_time(ent->dir);  	ext4_mark_inode_dirty(handle, ent->dir);  	BUFFER_TRACE(ent->bh, "call ext4_handle_dirty_metadata");  	if (!ent->inlined) { @@ -3651,7 +3651,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,  	 * Like most other Unix systems, set the ctime for inodes on a  	 * rename.  	 */ -	old.inode->i_ctime = ext4_current_time(old.inode); +	old.inode->i_ctime = current_time(old.inode);  	ext4_mark_inode_dirty(handle, old.inode);  	if (!whiteout) { @@ -3663,9 +3663,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,  	if (new.inode) {  		ext4_dec_count(handle, new.inode); -		new.inode->i_ctime = ext4_current_time(new.inode); +		new.inode->i_ctime = current_time(new.inode);  	} -	old.dir->i_ctime = old.dir->i_mtime = ext4_current_time(old.dir); +	old.dir->i_ctime = old.dir->i_mtime = current_time(old.dir);  	ext4_update_dx_flag(old.dir);  	if (old.dir_bh) {  		retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino); @@ -3723,6 +3723,7 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,  	};  	u8 new_file_type;  	int retval; +	struct timespec ctime;  	if ((ext4_encrypted_inode(old_dir) ||  	     ext4_encrypted_inode(new_dir)) && @@ -3823,8 +3824,9 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,  	 * Like most other Unix systems, set the ctime for inodes on a  	 * rename.  	 */ -	old.inode->i_ctime = ext4_current_time(old.inode); -	new.inode->i_ctime = ext4_current_time(new.inode); +	ctime = current_time(old.inode); +	old.inode->i_ctime = ctime; +	new.inode->i_ctime = ctime;  	ext4_mark_inode_dirty(handle, old.inode);  	ext4_mark_inode_dirty(handle, new.inode); diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 0094923e5ebf..d83b0f3c5fe9 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c @@ -340,7 +340,7 @@ void ext4_io_submit(struct ext4_io_submit *io)  	if (bio) {  		int io_op_flags = io->io_wbc->sync_mode == WB_SYNC_ALL ? -				  WRITE_SYNC : 0; +				  REQ_SYNC : 0;  		bio_set_op_attrs(io->io_bio, REQ_OP_WRITE, io_op_flags);  		submit_bio(io->io_bio);  	} @@ -457,7 +457,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,  		}  		if (buffer_new(bh)) {  			clear_buffer_new(bh); -			unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr); +			clean_bdev_bh_alias(bh);  		}  		set_buffer_async_write(bh);  		nr_to_submit++; @@ -470,7 +470,8 @@ int ext4_bio_write_page(struct ext4_io_submit *io,  		gfp_t gfp_flags = GFP_NOFS;  	retry_encrypt: -		data_page = fscrypt_encrypt_page(inode, page, gfp_flags); +		data_page = fscrypt_encrypt_page(inode, page, PAGE_SIZE, 0, +						page->index, gfp_flags);  		if (IS_ERR(data_page)) {  			ret = PTR_ERR(data_page);  			if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) { diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 52b0530c5d65..66845a08a87a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -38,7 +38,7 @@  #include <linux/log2.h>  #include <linux/crc16.h>  #include <linux/cleancache.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h>  #include <linux/kthread.h>  #include <linux/freezer.h> @@ -863,7 +863,6 @@ static void ext4_put_super(struct super_block *sb)  	percpu_counter_destroy(&sbi->s_dirs_counter);  	percpu_counter_destroy(&sbi->s_dirtyclusters_counter);  	percpu_free_rwsem(&sbi->s_journal_flag_rwsem); -	brelse(sbi->s_sbh);  #ifdef CONFIG_QUOTA  	for (i = 0; i < EXT4_MAXQUOTAS; i++)  		kfree(sbi->s_qf_names[i]); @@ -895,6 +894,7 @@ static void ext4_put_super(struct super_block *sb)  	}  	if (sbi->s_mmp_tsk)  		kthread_stop(sbi->s_mmp_tsk); +	brelse(sbi->s_sbh);  	sb->s_fs_info = NULL;  	/*  	 * Now that we are completely done shutting down the @@ -1114,37 +1114,55 @@ static int ext4_prepare_context(struct inode *inode)  static int ext4_set_context(struct inode *inode, const void *ctx, size_t len,  							void *fs_data)  { -	handle_t *handle; -	int res, res2; +	handle_t *handle = fs_data; +	int res, res2, retries = 0; + +	/* +	 * If a journal handle was specified, then the encryption context is +	 * being set on a new inode via inheritance and is part of a larger +	 * transaction to create the inode.  Otherwise the encryption context is +	 * being set on an existing inode in its own transaction.  Only in the +	 * latter case should the "retry on ENOSPC" logic be used. +	 */ -	/* fs_data is null when internally used. */ -	if (fs_data) { -		res  = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION, -				EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, -				len, 0); +	if (handle) { +		res = ext4_xattr_set_handle(handle, inode, +					    EXT4_XATTR_INDEX_ENCRYPTION, +					    EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, +					    ctx, len, 0);  		if (!res) {  			ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);  			ext4_clear_inode_state(inode,  					EXT4_STATE_MAY_INLINE_DATA); +			/* +			 * Update inode->i_flags - e.g. S_DAX may get disabled +			 */ +			ext4_set_inode_flags(inode);  		}  		return res;  	} +retry:  	handle = ext4_journal_start(inode, EXT4_HT_MISC,  			ext4_jbd2_credits_xattr(inode));  	if (IS_ERR(handle))  		return PTR_ERR(handle); -	res = ext4_xattr_set(inode, EXT4_XATTR_INDEX_ENCRYPTION, -			EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, ctx, -			len, 0); +	res = ext4_xattr_set_handle(handle, inode, EXT4_XATTR_INDEX_ENCRYPTION, +				    EXT4_XATTR_NAME_ENCRYPTION_CONTEXT, +				    ctx, len, 0);  	if (!res) {  		ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT); +		/* Update inode->i_flags - e.g. S_DAX may get disabled */ +		ext4_set_inode_flags(inode);  		res = ext4_mark_inode_dirty(handle, inode);  		if (res)  			EXT4_ERROR_INODE(inode, "Failed to mark inode dirty");  	}  	res2 = ext4_journal_stop(handle); + +	if (res == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) +		goto retry;  	if (!res)  		res = res2;  	return res; @@ -1187,7 +1205,7 @@ static int ext4_release_dquot(struct dquot *dquot);  static int ext4_mark_dquot_dirty(struct dquot *dquot);  static int ext4_write_info(struct super_block *sb, int type);  static int ext4_quota_on(struct super_block *sb, int type, int format_id, -			 struct path *path); +			 const struct path *path);  static int ext4_quota_off(struct super_block *sb, int type);  static int ext4_quota_on_mount(struct super_block *sb, int type);  static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, @@ -1883,12 +1901,6 @@ static int parse_options(char *options, struct super_block *sb,  			return 0;  		}  	} -	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA && -	    test_opt(sb, JOURNAL_ASYNC_COMMIT)) { -		ext4_msg(sb, KERN_ERR, "can't mount with journal_async_commit " -			 "in data=ordered mode"); -		return 0; -	}  	return 1;  } @@ -2330,7 +2342,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,  				struct ext4_super_block *es)  {  	unsigned int s_flags = sb->s_flags; -	int nr_orphans = 0, nr_truncates = 0; +	int ret, nr_orphans = 0, nr_truncates = 0;  #ifdef CONFIG_QUOTA  	int i;  #endif @@ -2412,7 +2424,9 @@ static void ext4_orphan_cleanup(struct super_block *sb,  				  inode->i_ino, inode->i_size);  			inode_lock(inode);  			truncate_inode_pages(inode->i_mapping, inode->i_size); -			ext4_truncate(inode); +			ret = ext4_truncate(inode); +			if (ret) +				ext4_std_error(inode->i_sb, ret);  			inode_unlock(inode);  			nr_truncates++;  		} else { @@ -3193,10 +3207,15 @@ static int count_overhead(struct super_block *sb, ext4_group_t grp,  			ext4_set_bit(s++, buf);  			count++;  		} -		for (j = ext4_bg_num_gdb(sb, grp); j > 0; j--) { -			ext4_set_bit(EXT4_B2C(sbi, s++), buf); -			count++; +		j = ext4_bg_num_gdb(sb, grp); +		if (s + j > EXT4_BLOCKS_PER_GROUP(sb)) { +			ext4_error(sb, "Invalid number of block group " +				   "descriptor blocks: %d", j); +			j = EXT4_BLOCKS_PER_GROUP(sb) - s;  		} +		count += j; +		for (; j > 0; j--) +			ext4_set_bit(EXT4_B2C(sbi, s++), buf);  	}  	if (!count)  		return 0; @@ -3301,7 +3320,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)  	char *orig_data = kstrdup(data, GFP_KERNEL);  	struct buffer_head *bh;  	struct ext4_super_block *es = NULL; -	struct ext4_sb_info *sbi; +	struct ext4_sb_info *sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);  	ext4_fsblk_t block;  	ext4_fsblk_t sb_block = get_sb_block(&data);  	ext4_fsblk_t logical_sb_block; @@ -3320,16 +3339,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)  	unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;  	ext4_group_t first_not_zeroed; -	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); -	if (!sbi) -		goto out_free_orig; +	if ((data && !orig_data) || !sbi) +		goto out_free_base;  	sbi->s_blockgroup_lock =  		kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL); -	if (!sbi->s_blockgroup_lock) { -		kfree(sbi); -		goto out_free_orig; -	} +	if (!sbi->s_blockgroup_lock) +		goto out_free_base; +  	sb->s_fs_info = sbi;  	sbi->s_sb = sb;  	sbi->s_inode_readahead_blks = EXT4_DEF_INODE_READAHEAD_BLKS; @@ -3475,11 +3492,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)  	 */  	sbi->s_li_wait_mult = EXT4_DEF_LI_WAIT_MULT; -	if (!parse_options((char *) sbi->s_es->s_mount_opts, sb, -			   &journal_devnum, &journal_ioprio, 0)) { -		ext4_msg(sb, KERN_WARNING, -			 "failed to parse options in superblock: %s", -			 sbi->s_es->s_mount_opts); +	if (sbi->s_es->s_mount_opts[0]) { +		char *s_mount_opts = kstrndup(sbi->s_es->s_mount_opts, +					      sizeof(sbi->s_es->s_mount_opts), +					      GFP_KERNEL); +		if (!s_mount_opts) +			goto failed_mount; +		if (!parse_options(s_mount_opts, sb, &journal_devnum, +				   &journal_ioprio, 0)) { +			ext4_msg(sb, KERN_WARNING, +				 "failed to parse options in superblock: %s", +				 s_mount_opts); +		} +		kfree(s_mount_opts);  	}  	sbi->s_def_mount_opt = sbi->s_mount_opt;  	if (!parse_options((char *) data, sb, &journal_devnum, @@ -3505,6 +3530,11 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)  				 "both data=journal and dax");  			goto failed_mount;  		} +		if (ext4_has_feature_encrypt(sb)) { +			ext4_msg(sb, KERN_WARNING, +				 "encrypted files will use data=ordered " +				 "instead of data journaling mode"); +		}  		if (test_opt(sb, DELALLOC))  			clear_opt(sb, DELALLOC);  	} else { @@ -3660,12 +3690,16 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)  	sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);  	sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group); -	if (EXT4_INODE_SIZE(sb) == 0 || EXT4_INODES_PER_GROUP(sb) == 0) -		goto cantfind_ext4;  	sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);  	if (sbi->s_inodes_per_block == 0)  		goto cantfind_ext4; +	if (sbi->s_inodes_per_group < sbi->s_inodes_per_block || +	    sbi->s_inodes_per_group > blocksize * 8) { +		ext4_msg(sb, KERN_ERR, "invalid inodes per group: %lu\n", +			 sbi->s_blocks_per_group); +		goto failed_mount; +	}  	sbi->s_itb_per_group = sbi->s_inodes_per_group /  					sbi->s_inodes_per_block;  	sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb); @@ -3748,13 +3782,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)  	}  	sbi->s_cluster_ratio = clustersize / blocksize; -	if (sbi->s_inodes_per_group > blocksize * 8) { -		ext4_msg(sb, KERN_ERR, -		       "#inodes per group too big: %lu", -		       sbi->s_inodes_per_group); -		goto failed_mount; -	} -  	/* Do we have standard group size of clustersize * 8 blocks ? */  	if (sbi->s_blocks_per_group == clustersize << 3)  		set_opt2(sb, STD_GROUP_SIZE); @@ -3814,6 +3841,15 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)  			(EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));  	db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /  		   EXT4_DESC_PER_BLOCK(sb); +	if (ext4_has_feature_meta_bg(sb)) { +		if (le32_to_cpu(es->s_first_meta_bg) >= db_count) { +			ext4_msg(sb, KERN_WARNING, +				 "first meta block group too large: %u " +				 "(group descriptor block count %u)", +				 le32_to_cpu(es->s_first_meta_bg), db_count); +			goto failed_mount; +		} +	}  	sbi->s_group_desc = ext4_kvmalloc(db_count *  					  sizeof(struct buffer_head *),  					  GFP_KERNEL); @@ -3967,6 +4003,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)  	default:  		break;  	} + +	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA && +	    test_opt(sb, JOURNAL_ASYNC_COMMIT)) { +		ext4_msg(sb, KERN_ERR, "can't mount with " +			"journal_async_commit in data=ordered mode"); +		goto failed_mount_wq; +	} +  	set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);  	sbi->s_journal->j_commit_callback = ext4_journal_commit_callback; @@ -4160,7 +4204,9 @@ no_journal:  	if (___ratelimit(&ext4_mount_msg_ratelimit, "EXT4-fs mount"))  		ext4_msg(sb, KERN_INFO, "mounted filesystem with%s. " -			 "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts, +			 "Opts: %.*s%s%s", descr, +			 (int) sizeof(sbi->s_es->s_mount_opts), +			 sbi->s_es->s_mount_opts,  			 *sbi->s_es->s_mount_opts ? "; " : "", orig_data);  	if (es->s_error_count) @@ -4239,8 +4285,8 @@ failed_mount:  out_fail:  	sb->s_fs_info = NULL;  	kfree(sbi->s_blockgroup_lock); +out_free_base:  	kfree(sbi); -out_free_orig:  	kfree(orig_data);  	return err ? err : ret;  } @@ -4550,7 +4596,8 @@ static int ext4_commit_super(struct super_block *sb, int sync)  				&EXT4_SB(sb)->s_freeinodes_counter));  	BUFFER_TRACE(sbh, "marking dirty");  	ext4_superblock_csum_set(sb); -	lock_buffer(sbh); +	if (sync) +		lock_buffer(sbh);  	if (buffer_write_io_error(sbh)) {  		/*  		 * Oh, dear.  A previous attempt to write the @@ -4566,10 +4613,10 @@ static int ext4_commit_super(struct super_block *sb, int sync)  		set_buffer_uptodate(sbh);  	}  	mark_buffer_dirty(sbh); -	unlock_buffer(sbh);  	if (sync) { +		unlock_buffer(sbh);  		error = __sync_dirty_buffer(sbh, -			test_opt(sb, BARRIER) ? WRITE_FUA : WRITE_SYNC); +			test_opt(sb, BARRIER) ? REQ_FUA : REQ_SYNC);  		if (error)  			return error; @@ -4857,6 +4904,13 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)  			err = -EINVAL;  			goto restore_opts;  		} +	} else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA) { +		if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { +			ext4_msg(sb, KERN_ERR, "can't mount with " +				"journal_async_commit in data=ordered mode"); +			err = -EINVAL; +			goto restore_opts; +		}  	}  	if ((sbi->s_mount_opt ^ old_opts.s_mount_opt) & EXT4_MOUNT_DAX) { @@ -5239,7 +5293,7 @@ static void lockdep_set_quota_inode(struct inode *inode, int subclass)   * Standard function to be called on quota_on   */  static int ext4_quota_on(struct super_block *sb, int type, int format_id, -			 struct path *path) +			 const struct path *path)  {  	int err; @@ -5366,7 +5420,7 @@ static int ext4_quota_off(struct super_block *sb, int type)  	handle = ext4_journal_start(inode, EXT4_HT_QUOTA, 1);  	if (IS_ERR(handle))  		goto out; -	inode->i_mtime = inode->i_ctime = CURRENT_TIME; +	inode->i_mtime = inode->i_ctime = current_time(inode);  	ext4_mark_inode_dirty(handle, inode);  	ext4_journal_stop(handle); diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c index 557b3b0d668c..73b184d161fc 100644 --- a/fs/ext4/symlink.c +++ b/fs/ext4/symlink.c @@ -83,21 +83,18 @@ errout:  }  const struct inode_operations ext4_encrypted_symlink_inode_operations = { -	.readlink	= generic_readlink,  	.get_link	= ext4_encrypted_get_link,  	.setattr	= ext4_setattr,  	.listxattr	= ext4_listxattr,  };  const struct inode_operations ext4_symlink_inode_operations = { -	.readlink	= generic_readlink,  	.get_link	= page_get_link,  	.setattr	= ext4_setattr,  	.listxattr	= ext4_listxattr,  };  const struct inode_operations ext4_fast_symlink_inode_operations = { -	.readlink	= generic_readlink,  	.get_link	= simple_get_link,  	.setattr	= ext4_setattr,  	.listxattr	= ext4_listxattr, diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index d77be9e9f535..5a94fa52b74f 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -185,6 +185,7 @@ ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end,  {  	struct ext4_xattr_entry *e = entry; +	/* Find the end of the names list */  	while (!IS_LAST_ENTRY(e)) {  		struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e);  		if ((void *)next >= end) @@ -192,15 +193,29 @@ ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end,  		e = next;  	} +	/* Check the values */  	while (!IS_LAST_ENTRY(entry)) {  		if (entry->e_value_block != 0)  			return -EFSCORRUPTED; -		if (entry->e_value_size != 0 && -		    (value_start + le16_to_cpu(entry->e_value_offs) < -		     (void *)e + sizeof(__u32) || -		     value_start + le16_to_cpu(entry->e_value_offs) + -		    le32_to_cpu(entry->e_value_size) > end)) -			return -EFSCORRUPTED; +		if (entry->e_value_size != 0) { +			u16 offs = le16_to_cpu(entry->e_value_offs); +			u32 size = le32_to_cpu(entry->e_value_size); +			void *value; + +			/* +			 * The value cannot overlap the names, and the value +			 * with padding cannot extend beyond 'end'.  Check both +			 * the padded and unpadded sizes, since the size may +			 * overflow to 0 when adding padding. +			 */ +			if (offs > end - value_start) +				return -EFSCORRUPTED; +			value = value_start + offs; +			if (value < (void *)e + sizeof(u32) || +			    size > end - value || +			    EXT4_XATTR_SIZE(size) > end - value) +				return -EFSCORRUPTED; +		}  		entry = EXT4_XATTR_NEXT(entry);  	} @@ -231,13 +246,12 @@ static int  __xattr_check_inode(struct inode *inode, struct ext4_xattr_ibody_header *header,  			 void *end, const char *function, unsigned int line)  { -	struct ext4_xattr_entry *entry = IFIRST(header);  	int error = -EFSCORRUPTED; -	if (((void *) header >= end) || +	if (end - (void *)header < sizeof(*header) + sizeof(u32) ||  	    (header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)))  		goto errout; -	error = ext4_xattr_check_names(entry, end, entry); +	error = ext4_xattr_check_names(IFIRST(header), end, IFIRST(header));  errout:  	if (error)  		__ext4_error_inode(inode, function, line, 0, @@ -1109,7 +1123,7 @@ int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,  	return 0;  } -static int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode, +static int ext4_xattr_ibody_set(struct inode *inode,  				struct ext4_xattr_info *i,  				struct ext4_xattr_ibody_find *is)  { @@ -1216,7 +1230,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,  	}  	if (!value) {  		if (!is.s.not_found) -			error = ext4_xattr_ibody_set(handle, inode, &i, &is); +			error = ext4_xattr_ibody_set(inode, &i, &is);  		else if (!bs.s.not_found)  			error = ext4_xattr_block_set(handle, inode, &i, &bs);  	} else { @@ -1227,7 +1241,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,  		if (!bs.s.not_found && ext4_xattr_value_same(&bs.s, &i))  			goto cleanup; -		error = ext4_xattr_ibody_set(handle, inode, &i, &is); +		error = ext4_xattr_ibody_set(inode, &i, &is);  		if (!error && !bs.s.not_found) {  			i.value = NULL;  			error = ext4_xattr_block_set(handle, inode, &i, &bs); @@ -1242,14 +1256,13 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,  				goto cleanup;  			if (!is.s.not_found) {  				i.value = NULL; -				error = ext4_xattr_ibody_set(handle, inode, &i, -							     &is); +				error = ext4_xattr_ibody_set(inode, &i, &is);  			}  		}  	}  	if (!error) {  		ext4_xattr_update_super_block(handle, inode->i_sb); -		inode->i_ctime = ext4_current_time(inode); +		inode->i_ctime = current_time(inode);  		if (!value)  			ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);  		error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); @@ -1384,7 +1397,7 @@ static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,  		goto out;  	/* Remove the chosen entry from the inode */ -	error = ext4_xattr_ibody_set(handle, inode, &i, is); +	error = ext4_xattr_ibody_set(inode, &i, is);  	if (error)  		goto out; |