diff options
Diffstat (limited to 'fs/ext4/extents.c')
| -rw-r--r-- | fs/ext4/extents.c | 626 | 
1 files changed, 406 insertions, 220 deletions
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 74292a71b384..0b16fb4c06d3 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -73,8 +73,7 @@ static int ext4_extent_block_csum_verify(struct inode *inode,  {  	struct ext4_extent_tail *et; -	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, -		EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) +	if (!ext4_has_metadata_csum(inode->i_sb))  		return 1;  	et = find_ext4_extent_tail(eh); @@ -88,8 +87,7 @@ static void ext4_extent_block_csum_set(struct inode *inode,  {  	struct ext4_extent_tail *et; -	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, -		EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) +	if (!ext4_has_metadata_csum(inode->i_sb))  		return;  	et = find_ext4_extent_tail(eh); @@ -98,14 +96,14 @@ static void ext4_extent_block_csum_set(struct inode *inode,  static int ext4_split_extent(handle_t *handle,  				struct inode *inode, -				struct ext4_ext_path *path, +				struct ext4_ext_path **ppath,  				struct ext4_map_blocks *map,  				int split_flag,  				int flags);  static int ext4_split_extent_at(handle_t *handle,  			     struct inode *inode, -			     struct ext4_ext_path *path, +			     struct ext4_ext_path **ppath,  			     ext4_lblk_t split,  			     int split_flag,  			     int flags); @@ -291,6 +289,20 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)  	return size;  } +static inline int +ext4_force_split_extent_at(handle_t *handle, struct inode *inode, +			   struct ext4_ext_path **ppath, ext4_lblk_t lblk, +			   int nofail) +{ +	struct ext4_ext_path *path = *ppath; +	int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext); + +	return ext4_split_extent_at(handle, inode, ppath, lblk, unwritten ? +			EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0, +			EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO | +			(nofail ? EXT4_GET_BLOCKS_METADATA_NOFAIL:0)); +} +  /*   * Calculate the number of metadata blocks needed   * to allocate @blocks @@ -695,9 +707,11 @@ static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,  void ext4_ext_drop_refs(struct ext4_ext_path *path)  { -	int depth = path->p_depth; -	int i; +	int depth, i; +	if (!path) +		return; +	depth = path->p_depth;  	for (i = 0; i <= depth; i++, path++)  		if (path->p_bh) {  			brelse(path->p_bh); @@ -841,24 +855,32 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode)  }  struct ext4_ext_path * -ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, -		     struct ext4_ext_path *path, int flags) +ext4_find_extent(struct inode *inode, ext4_lblk_t block, +		 struct ext4_ext_path **orig_path, int flags)  {  	struct ext4_extent_header *eh;  	struct buffer_head *bh; -	short int depth, i, ppos = 0, alloc = 0; +	struct ext4_ext_path *path = orig_path ? *orig_path : NULL; +	short int depth, i, ppos = 0;  	int ret;  	eh = ext_inode_hdr(inode);  	depth = ext_depth(inode); -	/* account possible depth increase */ +	if (path) { +		ext4_ext_drop_refs(path); +		if (depth > path[0].p_maxdepth) { +			kfree(path); +			*orig_path = path = NULL; +		} +	}  	if (!path) { +		/* account possible depth increase */  		path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2),  				GFP_NOFS); -		if (!path) +		if (unlikely(!path))  			return ERR_PTR(-ENOMEM); -		alloc = 1; +		path[0].p_maxdepth = depth + 1;  	}  	path[0].p_hdr = eh;  	path[0].p_bh = NULL; @@ -876,7 +898,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,  		bh = read_extent_tree_block(inode, path[ppos].p_block, --i,  					    flags); -		if (IS_ERR(bh)) { +		if (unlikely(IS_ERR(bh))) {  			ret = PTR_ERR(bh);  			goto err;  		} @@ -910,8 +932,9 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,  err:  	ext4_ext_drop_refs(path); -	if (alloc) -		kfree(path); +	kfree(path); +	if (orig_path) +		*orig_path = NULL;  	return ERR_PTR(ret);  } @@ -1238,16 +1261,24 @@ cleanup:   *   just created block   */  static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, -				 unsigned int flags, -				 struct ext4_extent *newext) +				 unsigned int flags)  {  	struct ext4_extent_header *neh;  	struct buffer_head *bh; -	ext4_fsblk_t newblock; +	ext4_fsblk_t newblock, goal = 0; +	struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;  	int err = 0; -	newblock = ext4_ext_new_meta_block(handle, inode, NULL, -		newext, &err, flags); +	/* Try to prepend new index to old one */ +	if (ext_depth(inode)) +		goal = ext4_idx_pblock(EXT_FIRST_INDEX(ext_inode_hdr(inode))); +	if (goal > le32_to_cpu(es->s_first_data_block)) { +		flags |= EXT4_MB_HINT_TRY_GOAL; +		goal--; +	} else +		goal = ext4_inode_to_goal_block(inode); +	newblock = ext4_new_meta_blocks(handle, inode, goal, flags, +					NULL, &err);  	if (newblock == 0)  		return err; @@ -1314,9 +1345,10 @@ out:  static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,  				    unsigned int mb_flags,  				    unsigned int gb_flags, -				    struct ext4_ext_path *path, +				    struct ext4_ext_path **ppath,  				    struct ext4_extent *newext)  { +	struct ext4_ext_path *path = *ppath;  	struct ext4_ext_path *curp;  	int depth, i, err = 0; @@ -1340,23 +1372,21 @@ repeat:  			goto out;  		/* refill path */ -		ext4_ext_drop_refs(path); -		path = ext4_ext_find_extent(inode, +		path = ext4_find_extent(inode,  				    (ext4_lblk_t)le32_to_cpu(newext->ee_block), -				    path, gb_flags); +				    ppath, gb_flags);  		if (IS_ERR(path))  			err = PTR_ERR(path);  	} else {  		/* tree is full, time to grow in depth */ -		err = ext4_ext_grow_indepth(handle, inode, mb_flags, newext); +		err = ext4_ext_grow_indepth(handle, inode, mb_flags);  		if (err)  			goto out;  		/* refill path */ -		ext4_ext_drop_refs(path); -		path = ext4_ext_find_extent(inode, +		path = ext4_find_extent(inode,  				   (ext4_lblk_t)le32_to_cpu(newext->ee_block), -				    path, gb_flags); +				    ppath, gb_flags);  		if (IS_ERR(path)) {  			err = PTR_ERR(path);  			goto out; @@ -1559,7 +1589,7 @@ found_extent:   * allocated block. Thus, index entries have to be consistent   * with leaves.   */ -static ext4_lblk_t +ext4_lblk_t  ext4_ext_next_allocated_block(struct ext4_ext_path *path)  {  	int depth; @@ -1802,6 +1832,7 @@ static void ext4_ext_try_to_merge_up(handle_t *handle,  		sizeof(struct ext4_extent_idx);  	s += sizeof(struct ext4_extent_header); +	path[1].p_maxdepth = path[0].p_maxdepth;  	memcpy(path[0].p_hdr, path[1].p_hdr, s);  	path[0].p_depth = 0;  	path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) + @@ -1896,9 +1927,10 @@ out:   * creating new leaf in the no-space case.   */  int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, -				struct ext4_ext_path *path, +				struct ext4_ext_path **ppath,  				struct ext4_extent *newext, int gb_flags)  { +	struct ext4_ext_path *path = *ppath;  	struct ext4_extent_header *eh;  	struct ext4_extent *ex, *fex;  	struct ext4_extent *nearex; /* nearest extent */ @@ -1907,6 +1939,8 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,  	ext4_lblk_t next;  	int mb_flags = 0, unwritten; +	if (gb_flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) +		mb_flags |= EXT4_MB_DELALLOC_RESERVED;  	if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {  		EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");  		return -EIO; @@ -1925,7 +1959,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,  		/*  		 * Try to see whether we should rather test the extent on  		 * right from ex, or from the left of ex. This is because -		 * ext4_ext_find_extent() can return either extent on the +		 * ext4_find_extent() can return either extent on the  		 * left, or on the right from the searched position. This  		 * will make merging more effective.  		 */ @@ -2008,7 +2042,7 @@ prepend:  	if (next != EXT_MAX_BLOCKS) {  		ext_debug("next leaf block - %u\n", next);  		BUG_ON(npath != NULL); -		npath = ext4_ext_find_extent(inode, next, NULL, 0); +		npath = ext4_find_extent(inode, next, NULL, 0);  		if (IS_ERR(npath))  			return PTR_ERR(npath);  		BUG_ON(npath->p_depth != path->p_depth); @@ -2028,9 +2062,9 @@ prepend:  	 * We're gonna add a new leaf in the tree.  	 */  	if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL) -		mb_flags = EXT4_MB_USE_RESERVED; +		mb_flags |= EXT4_MB_USE_RESERVED;  	err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags, -				       path, newext); +				       ppath, newext);  	if (err)  		goto cleanup;  	depth = ext_depth(inode); @@ -2108,10 +2142,8 @@ merge:  	err = ext4_ext_dirty(handle, inode, path + path->p_depth);  cleanup: -	if (npath) { -		ext4_ext_drop_refs(npath); -		kfree(npath); -	} +	ext4_ext_drop_refs(npath); +	kfree(npath);  	return err;  } @@ -2133,13 +2165,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode,  		/* find extent for this block */  		down_read(&EXT4_I(inode)->i_data_sem); -		if (path && ext_depth(inode) != depth) { -			/* depth was changed. we have to realloc path */ -			kfree(path); -			path = NULL; -		} - -		path = ext4_ext_find_extent(inode, block, path, 0); +		path = ext4_find_extent(inode, block, &path, 0);  		if (IS_ERR(path)) {  			up_read(&EXT4_I(inode)->i_data_sem);  			err = PTR_ERR(path); @@ -2156,7 +2182,6 @@ static int ext4_fill_fiemap_extents(struct inode *inode,  		}  		ex = path[depth].p_ext;  		next = ext4_ext_next_allocated_block(path); -		ext4_ext_drop_refs(path);  		flags = 0;  		exists = 0; @@ -2266,11 +2291,8 @@ static int ext4_fill_fiemap_extents(struct inode *inode,  		block = es.es_lblk + es.es_len;  	} -	if (path) { -		ext4_ext_drop_refs(path); -		kfree(path); -	} - +	ext4_ext_drop_refs(path); +	kfree(path);  	return err;  } @@ -2826,7 +2848,7 @@ again:  		ext4_lblk_t ee_block;  		/* find extent for this block */ -		path = ext4_ext_find_extent(inode, end, NULL, EXT4_EX_NOCACHE); +		path = ext4_find_extent(inode, end, NULL, EXT4_EX_NOCACHE);  		if (IS_ERR(path)) {  			ext4_journal_stop(handle);  			return PTR_ERR(path); @@ -2854,24 +2876,14 @@ again:  		 */  		if (end >= ee_block &&  		    end < ee_block + ext4_ext_get_actual_len(ex) - 1) { -			int split_flag = 0; - -			if (ext4_ext_is_unwritten(ex)) -				split_flag = EXT4_EXT_MARK_UNWRIT1 | -					     EXT4_EXT_MARK_UNWRIT2; -  			/*  			 * Split the extent in two so that 'end' is the last  			 * block in the first new extent. Also we should not  			 * fail removing space due to ENOSPC so try to use  			 * reserved block if that happens.  			 */ -			err = ext4_split_extent_at(handle, inode, path, -					end + 1, split_flag, -					EXT4_EX_NOCACHE | -					EXT4_GET_BLOCKS_PRE_IO | -					EXT4_GET_BLOCKS_METADATA_NOFAIL); - +			err = ext4_force_split_extent_at(handle, inode, &path, +							 end + 1, 1);  			if (err < 0)  				goto out;  		} @@ -2893,7 +2905,7 @@ again:  			ext4_journal_stop(handle);  			return -ENOMEM;  		} -		path[0].p_depth = depth; +		path[0].p_maxdepth = path[0].p_depth = depth;  		path[0].p_hdr = ext_inode_hdr(inode);  		i = 0; @@ -3013,10 +3025,9 @@ again:  out:  	ext4_ext_drop_refs(path);  	kfree(path); -	if (err == -EAGAIN) { -		path = NULL; +	path = NULL; +	if (err == -EAGAIN)  		goto again; -	}  	ext4_journal_stop(handle);  	return err; @@ -3130,11 +3141,12 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)   */  static int ext4_split_extent_at(handle_t *handle,  			     struct inode *inode, -			     struct ext4_ext_path *path, +			     struct ext4_ext_path **ppath,  			     ext4_lblk_t split,  			     int split_flag,  			     int flags)  { +	struct ext4_ext_path *path = *ppath;  	ext4_fsblk_t newblock;  	ext4_lblk_t ee_block;  	struct ext4_extent *ex, newex, orig_ex, zero_ex; @@ -3205,7 +3217,7 @@ static int ext4_split_extent_at(handle_t *handle,  	if (split_flag & EXT4_EXT_MARK_UNWRIT2)  		ext4_ext_mark_unwritten(ex2); -	err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); +	err = ext4_ext_insert_extent(handle, inode, ppath, &newex, flags);  	if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {  		if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {  			if (split_flag & EXT4_EXT_DATA_VALID1) { @@ -3271,11 +3283,12 @@ fix_extent_len:   */  static int ext4_split_extent(handle_t *handle,  			      struct inode *inode, -			      struct ext4_ext_path *path, +			      struct ext4_ext_path **ppath,  			      struct ext4_map_blocks *map,  			      int split_flag,  			      int flags)  { +	struct ext4_ext_path *path = *ppath;  	ext4_lblk_t ee_block;  	struct ext4_extent *ex;  	unsigned int ee_len, depth; @@ -3298,7 +3311,7 @@ static int ext4_split_extent(handle_t *handle,  				       EXT4_EXT_MARK_UNWRIT2;  		if (split_flag & EXT4_EXT_DATA_VALID2)  			split_flag1 |= EXT4_EXT_DATA_VALID1; -		err = ext4_split_extent_at(handle, inode, path, +		err = ext4_split_extent_at(handle, inode, ppath,  				map->m_lblk + map->m_len, split_flag1, flags1);  		if (err)  			goto out; @@ -3309,8 +3322,7 @@ static int ext4_split_extent(handle_t *handle,  	 * Update path is required because previous ext4_split_extent_at() may  	 * result in split of original leaf or extent zeroout.  	 */ -	ext4_ext_drop_refs(path); -	path = ext4_ext_find_extent(inode, map->m_lblk, path, 0); +	path = ext4_find_extent(inode, map->m_lblk, ppath, 0);  	if (IS_ERR(path))  		return PTR_ERR(path);  	depth = ext_depth(inode); @@ -3330,7 +3342,7 @@ static int ext4_split_extent(handle_t *handle,  			split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT |  						     EXT4_EXT_MARK_UNWRIT2);  		} -		err = ext4_split_extent_at(handle, inode, path, +		err = ext4_split_extent_at(handle, inode, ppath,  				map->m_lblk, split_flag1, flags);  		if (err)  			goto out; @@ -3364,9 +3376,10 @@ out:  static int ext4_ext_convert_to_initialized(handle_t *handle,  					   struct inode *inode,  					   struct ext4_map_blocks *map, -					   struct ext4_ext_path *path, +					   struct ext4_ext_path **ppath,  					   int flags)  { +	struct ext4_ext_path *path = *ppath;  	struct ext4_sb_info *sbi;  	struct ext4_extent_header *eh;  	struct ext4_map_blocks split_map; @@ -3590,11 +3603,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,  		}  	} -	allocated = ext4_split_extent(handle, inode, path, -				      &split_map, split_flag, flags); -	if (allocated < 0) -		err = allocated; - +	err = ext4_split_extent(handle, inode, ppath, &split_map, split_flag, +				flags); +	if (err > 0) +		err = 0;  out:  	/* If we have gotten a failure, don't zero out status tree */  	if (!err) @@ -3629,9 +3641,10 @@ out:  static int ext4_split_convert_extents(handle_t *handle,  					struct inode *inode,  					struct ext4_map_blocks *map, -					struct ext4_ext_path *path, +					struct ext4_ext_path **ppath,  					int flags)  { +	struct ext4_ext_path *path = *ppath;  	ext4_lblk_t eof_block;  	ext4_lblk_t ee_block;  	struct ext4_extent *ex; @@ -3665,74 +3678,15 @@ static int ext4_split_convert_extents(handle_t *handle,  		split_flag |= (EXT4_EXT_MARK_UNWRIT2 | EXT4_EXT_DATA_VALID2);  	}  	flags |= EXT4_GET_BLOCKS_PRE_IO; -	return ext4_split_extent(handle, inode, path, map, split_flag, flags); +	return ext4_split_extent(handle, inode, ppath, map, split_flag, flags);  } -static int ext4_convert_initialized_extents(handle_t *handle, -					    struct inode *inode, -					    struct ext4_map_blocks *map, -					    struct ext4_ext_path *path) -{ -	struct ext4_extent *ex; -	ext4_lblk_t ee_block; -	unsigned int ee_len; -	int depth; -	int err = 0; - -	depth = ext_depth(inode); -	ex = path[depth].p_ext; -	ee_block = le32_to_cpu(ex->ee_block); -	ee_len = ext4_ext_get_actual_len(ex); - -	ext_debug("%s: inode %lu, logical" -		"block %llu, max_blocks %u\n", __func__, inode->i_ino, -		  (unsigned long long)ee_block, ee_len); - -	if (ee_block != map->m_lblk || ee_len > map->m_len) { -		err = ext4_split_convert_extents(handle, inode, map, path, -				EXT4_GET_BLOCKS_CONVERT_UNWRITTEN); -		if (err < 0) -			goto out; -		ext4_ext_drop_refs(path); -		path = ext4_ext_find_extent(inode, map->m_lblk, path, 0); -		if (IS_ERR(path)) { -			err = PTR_ERR(path); -			goto out; -		} -		depth = ext_depth(inode); -		ex = path[depth].p_ext; -		if (!ex) { -			EXT4_ERROR_INODE(inode, "unexpected hole at %lu", -					 (unsigned long) map->m_lblk); -			err = -EIO; -			goto out; -		} -	} - -	err = ext4_ext_get_access(handle, inode, path + depth); -	if (err) -		goto out; -	/* first mark the extent as unwritten */ -	ext4_ext_mark_unwritten(ex); - -	/* note: ext4_ext_correct_indexes() isn't needed here because -	 * borders are not changed -	 */ -	ext4_ext_try_to_merge(handle, inode, path, ex); - -	/* Mark modified extent as dirty */ -	err = ext4_ext_dirty(handle, inode, path + path->p_depth); -out: -	ext4_ext_show_leaf(inode, path); -	return err; -} - -  static int ext4_convert_unwritten_extents_endio(handle_t *handle,  						struct inode *inode,  						struct ext4_map_blocks *map, -						struct ext4_ext_path *path) +						struct ext4_ext_path **ppath)  { +	struct ext4_ext_path *path = *ppath;  	struct ext4_extent *ex;  	ext4_lblk_t ee_block;  	unsigned int ee_len; @@ -3761,16 +3715,13 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,  			     inode->i_ino, (unsigned long long)ee_block, ee_len,  			     (unsigned long long)map->m_lblk, map->m_len);  #endif -		err = ext4_split_convert_extents(handle, inode, map, path, +		err = ext4_split_convert_extents(handle, inode, map, ppath,  						 EXT4_GET_BLOCKS_CONVERT);  		if (err < 0) -			goto out; -		ext4_ext_drop_refs(path); -		path = ext4_ext_find_extent(inode, map->m_lblk, path, 0); -		if (IS_ERR(path)) { -			err = PTR_ERR(path); -			goto out; -		} +			return err; +		path = ext4_find_extent(inode, map->m_lblk, ppath, 0); +		if (IS_ERR(path)) +			return PTR_ERR(path);  		depth = ext_depth(inode);  		ex = path[depth].p_ext;  	} @@ -3963,12 +3914,16 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,  }  static int -ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode, -			struct ext4_map_blocks *map, -			struct ext4_ext_path *path, int flags, -			unsigned int allocated, ext4_fsblk_t newblock) +convert_initialized_extent(handle_t *handle, struct inode *inode, +			   struct ext4_map_blocks *map, +			   struct ext4_ext_path **ppath, int flags, +			   unsigned int allocated, ext4_fsblk_t newblock)  { -	int ret = 0; +	struct ext4_ext_path *path = *ppath; +	struct ext4_extent *ex; +	ext4_lblk_t ee_block; +	unsigned int ee_len; +	int depth;  	int err = 0;  	/* @@ -3978,28 +3933,67 @@ ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode,  	if (map->m_len > EXT_UNWRITTEN_MAX_LEN)  		map->m_len = EXT_UNWRITTEN_MAX_LEN / 2; -	ret = ext4_convert_initialized_extents(handle, inode, map, -						path); -	if (ret >= 0) { -		ext4_update_inode_fsync_trans(handle, inode, 1); -		err = check_eofblocks_fl(handle, inode, map->m_lblk, -					 path, map->m_len); -	} else -		err = ret; +	depth = ext_depth(inode); +	ex = path[depth].p_ext; +	ee_block = le32_to_cpu(ex->ee_block); +	ee_len = ext4_ext_get_actual_len(ex); + +	ext_debug("%s: inode %lu, logical" +		"block %llu, max_blocks %u\n", __func__, inode->i_ino, +		  (unsigned long long)ee_block, ee_len); + +	if (ee_block != map->m_lblk || ee_len > map->m_len) { +		err = ext4_split_convert_extents(handle, inode, map, ppath, +				EXT4_GET_BLOCKS_CONVERT_UNWRITTEN); +		if (err < 0) +			return err; +		path = ext4_find_extent(inode, map->m_lblk, ppath, 0); +		if (IS_ERR(path)) +			return PTR_ERR(path); +		depth = ext_depth(inode); +		ex = path[depth].p_ext; +		if (!ex) { +			EXT4_ERROR_INODE(inode, "unexpected hole at %lu", +					 (unsigned long) map->m_lblk); +			return -EIO; +		} +	} + +	err = ext4_ext_get_access(handle, inode, path + depth); +	if (err) +		return err; +	/* first mark the extent as unwritten */ +	ext4_ext_mark_unwritten(ex); + +	/* note: ext4_ext_correct_indexes() isn't needed here because +	 * borders are not changed +	 */ +	ext4_ext_try_to_merge(handle, inode, path, ex); + +	/* Mark modified extent as dirty */ +	err = ext4_ext_dirty(handle, inode, path + path->p_depth); +	if (err) +		return err; +	ext4_ext_show_leaf(inode, path); + +	ext4_update_inode_fsync_trans(handle, inode, 1); +	err = check_eofblocks_fl(handle, inode, map->m_lblk, path, map->m_len); +	if (err) +		return err;  	map->m_flags |= EXT4_MAP_UNWRITTEN;  	if (allocated > map->m_len)  		allocated = map->m_len;  	map->m_len = allocated; - -	return err ? err : allocated; +	return allocated;  }  static int  ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,  			struct ext4_map_blocks *map, -			struct ext4_ext_path *path, int flags, +			struct ext4_ext_path **ppath, int flags,  			unsigned int allocated, ext4_fsblk_t newblock)  { +	struct ext4_ext_path *path = *ppath;  	int ret = 0;  	int err = 0;  	ext4_io_end_t *io = ext4_inode_aio(inode); @@ -4021,8 +4015,8 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,  	/* get_block() before submit the IO, split the extent */  	if (flags & EXT4_GET_BLOCKS_PRE_IO) { -		ret = ext4_split_convert_extents(handle, inode, map, -					 path, flags | EXT4_GET_BLOCKS_CONVERT); +		ret = ext4_split_convert_extents(handle, inode, map, ppath, +					 flags | EXT4_GET_BLOCKS_CONVERT);  		if (ret <= 0)  			goto out;  		/* @@ -4040,7 +4034,7 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,  	/* IO end_io complete, convert the filled extent to written */  	if (flags & EXT4_GET_BLOCKS_CONVERT) {  		ret = ext4_convert_unwritten_extents_endio(handle, inode, map, -							path); +							   ppath);  		if (ret >= 0) {  			ext4_update_inode_fsync_trans(handle, inode, 1);  			err = check_eofblocks_fl(handle, inode, map->m_lblk, @@ -4078,7 +4072,7 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,  	}  	/* buffered write, writepage time, convert*/ -	ret = ext4_ext_convert_to_initialized(handle, inode, map, path, flags); +	ret = ext4_ext_convert_to_initialized(handle, inode, map, ppath, flags);  	if (ret >= 0)  		ext4_update_inode_fsync_trans(handle, inode, 1);  out: @@ -4279,7 +4273,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,  	trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);  	/* find extent for this block */ -	path = ext4_ext_find_extent(inode, map->m_lblk, NULL, 0); +	path = ext4_find_extent(inode, map->m_lblk, NULL, 0);  	if (IS_ERR(path)) {  		err = PTR_ERR(path);  		path = NULL; @@ -4291,7 +4285,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,  	/*  	 * consistent leaf must not be empty;  	 * this situation is possible, though, _during_ tree modification; -	 * this is why assert can't be put in ext4_ext_find_extent() +	 * this is why assert can't be put in ext4_find_extent()  	 */  	if (unlikely(path[depth].p_ext == NULL && depth != 0)) {  		EXT4_ERROR_INODE(inode, "bad extent address " @@ -4331,15 +4325,15 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,  			 */  			if ((!ext4_ext_is_unwritten(ex)) &&  			    (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) { -				allocated = ext4_ext_convert_initialized_extent( -						handle, inode, map, path, flags, -						allocated, newblock); +				allocated = convert_initialized_extent( +						handle, inode, map, &path, +						flags, allocated, newblock);  				goto out2;  			} else if (!ext4_ext_is_unwritten(ex))  				goto out;  			ret = ext4_ext_handle_unwritten_extents( -				handle, inode, map, path, flags, +				handle, inode, map, &path, flags,  				allocated, newblock);  			if (ret < 0)  				err = ret; @@ -4376,7 +4370,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,  	/*  	 * If we are doing bigalloc, check to see if the extent returned -	 * by ext4_ext_find_extent() implies a cluster we can use. +	 * by ext4_find_extent() implies a cluster we can use.  	 */  	if (cluster_offset && ex &&  	    get_implied_cluster_alloc(inode->i_sb, map, ex, path)) { @@ -4451,6 +4445,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,  		ar.flags = 0;  	if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE)  		ar.flags |= EXT4_MB_HINT_NOPREALLOC; +	if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) +		ar.flags |= EXT4_MB_DELALLOC_RESERVED;  	newblock = ext4_mb_new_blocks(handle, &ar, &err);  	if (!newblock)  		goto out2; @@ -4486,7 +4482,7 @@ got_allocated_blocks:  		err = check_eofblocks_fl(handle, inode, map->m_lblk,  					 path, ar.len);  	if (!err) -		err = ext4_ext_insert_extent(handle, inode, path, +		err = ext4_ext_insert_extent(handle, inode, &path,  					     &newex, flags);  	if (!err && set_unwritten) { @@ -4619,10 +4615,8 @@ out:  	map->m_pblk = newblock;  	map->m_len = allocated;  out2: -	if (path) { -		ext4_ext_drop_refs(path); -		kfree(path); -	} +	ext4_ext_drop_refs(path); +	kfree(path);  	trace_ext4_ext_map_blocks_exit(inode, flags, map,  				       err ? err : allocated); @@ -4799,7 +4793,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,  		max_blocks -= lblk;  	flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT | -		EXT4_GET_BLOCKS_CONVERT_UNWRITTEN; +		EXT4_GET_BLOCKS_CONVERT_UNWRITTEN | +		EXT4_EX_NOCACHE;  	if (mode & FALLOC_FL_KEEP_SIZE)  		flags |= EXT4_GET_BLOCKS_KEEP_SIZE; @@ -4837,15 +4832,21 @@ static long ext4_zero_range(struct file *file, loff_t offset,  		ext4_inode_block_unlocked_dio(inode);  		inode_dio_wait(inode); +		ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, +					     flags, mode); +		if (ret) +			goto out_dio;  		/*  		 * Remove entire range from the extent status tree. +		 * +		 * ext4_es_remove_extent(inode, lblk, max_blocks) is +		 * NOT sufficient.  I'm not sure why this is the case, +		 * but let's be conservative and remove the extent +		 * status tree for the entire inode.  There should be +		 * no outstanding delalloc extents thanks to the +		 * filemap_write_and_wait_range() call above.  		 */ -		ret = ext4_es_remove_extent(inode, lblk, max_blocks); -		if (ret) -			goto out_dio; - -		ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size, -					     flags, mode); +		ret = ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);  		if (ret)  			goto out_dio;  	} @@ -5304,36 +5305,31 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,  	struct ext4_ext_path *path;  	int ret = 0, depth;  	struct ext4_extent *extent; -	ext4_lblk_t stop_block, current_block; +	ext4_lblk_t stop_block;  	ext4_lblk_t ex_start, ex_end;  	/* Let path point to the last extent */ -	path = ext4_ext_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0); +	path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);  	if (IS_ERR(path))  		return PTR_ERR(path);  	depth = path->p_depth;  	extent = path[depth].p_ext; -	if (!extent) { -		ext4_ext_drop_refs(path); -		kfree(path); -		return ret; -	} +	if (!extent) +		goto out;  	stop_block = le32_to_cpu(extent->ee_block) +  			ext4_ext_get_actual_len(extent); -	ext4_ext_drop_refs(path); -	kfree(path);  	/* Nothing to shift, if hole is at the end of file */  	if (start >= stop_block) -		return ret; +		goto out;  	/*  	 * Don't start shifting extents until we make sure the hole is big  	 * enough to accomodate the shift.  	 */ -	path = ext4_ext_find_extent(inode, start - 1, NULL, 0); +	path = ext4_find_extent(inode, start - 1, &path, 0);  	if (IS_ERR(path))  		return PTR_ERR(path);  	depth = path->p_depth; @@ -5346,8 +5342,6 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,  		ex_start = 0;  		ex_end = 0;  	} -	ext4_ext_drop_refs(path); -	kfree(path);  	if ((start == ex_start && shift > ex_start) ||  	    (shift > start - ex_end)) @@ -5355,7 +5349,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,  	/* Its safe to start updating extents */  	while (start < stop_block) { -		path = ext4_ext_find_extent(inode, start, NULL, 0); +		path = ext4_find_extent(inode, start, &path, 0);  		if (IS_ERR(path))  			return PTR_ERR(path);  		depth = path->p_depth; @@ -5365,27 +5359,23 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,  					 (unsigned long) start);  			return -EIO;  		} - -		current_block = le32_to_cpu(extent->ee_block); -		if (start > current_block) { +		if (start > le32_to_cpu(extent->ee_block)) {  			/* Hole, move to the next extent */ -			ret = mext_next_extent(inode, path, &extent); -			if (ret != 0) { -				ext4_ext_drop_refs(path); -				kfree(path); -				if (ret == 1) -					ret = 0; -				break; +			if (extent < EXT_LAST_EXTENT(path[depth].p_hdr)) { +				path[depth].p_ext++; +			} else { +				start = ext4_ext_next_allocated_block(path); +				continue;  			}  		}  		ret = ext4_ext_shift_path_extents(path, shift, inode,  				handle, &start); -		ext4_ext_drop_refs(path); -		kfree(path);  		if (ret)  			break;  	} - +out: +	ext4_ext_drop_refs(path); +	kfree(path);  	return ret;  } @@ -5508,3 +5498,199 @@ out_mutex:  	mutex_unlock(&inode->i_mutex);  	return ret;  } + +/** + * ext4_swap_extents - Swap extents between two inodes + * + * @inode1:	First inode + * @inode2:	Second inode + * @lblk1:	Start block for first inode + * @lblk2:	Start block for second inode + * @count:	Number of blocks to swap + * @mark_unwritten: Mark second inode's extents as unwritten after swap + * @erp:	Pointer to save error value + * + * This helper routine does exactly what is promise "swap extents". All other + * stuff such as page-cache locking consistency, bh mapping consistency or + * extent's data copying must be performed by caller. + * Locking: + * 		i_mutex is held for both inodes + * 		i_data_sem is locked for write for both inodes + * Assumptions: + *		All pages from requested range are locked for both inodes + */ +int +ext4_swap_extents(handle_t *handle, struct inode *inode1, +		     struct inode *inode2, ext4_lblk_t lblk1, ext4_lblk_t lblk2, +		  ext4_lblk_t count, int unwritten, int *erp) +{ +	struct ext4_ext_path *path1 = NULL; +	struct ext4_ext_path *path2 = NULL; +	int replaced_count = 0; + +	BUG_ON(!rwsem_is_locked(&EXT4_I(inode1)->i_data_sem)); +	BUG_ON(!rwsem_is_locked(&EXT4_I(inode2)->i_data_sem)); +	BUG_ON(!mutex_is_locked(&inode1->i_mutex)); +	BUG_ON(!mutex_is_locked(&inode1->i_mutex)); + +	*erp = ext4_es_remove_extent(inode1, lblk1, count); +	if (unlikely(*erp)) +		return 0; +	*erp = ext4_es_remove_extent(inode2, lblk2, count); +	if (unlikely(*erp)) +		return 0; + +	while (count) { +		struct ext4_extent *ex1, *ex2, tmp_ex; +		ext4_lblk_t e1_blk, e2_blk; +		int e1_len, e2_len, len; +		int split = 0; + +		path1 = ext4_find_extent(inode1, lblk1, NULL, EXT4_EX_NOCACHE); +		if (unlikely(IS_ERR(path1))) { +			*erp = PTR_ERR(path1); +			path1 = NULL; +		finish: +			count = 0; +			goto repeat; +		} +		path2 = ext4_find_extent(inode2, lblk2, NULL, EXT4_EX_NOCACHE); +		if (unlikely(IS_ERR(path2))) { +			*erp = PTR_ERR(path2); +			path2 = NULL; +			goto finish; +		} +		ex1 = path1[path1->p_depth].p_ext; +		ex2 = path2[path2->p_depth].p_ext; +		/* Do we have somthing to swap ? */ +		if (unlikely(!ex2 || !ex1)) +			goto finish; + +		e1_blk = le32_to_cpu(ex1->ee_block); +		e2_blk = le32_to_cpu(ex2->ee_block); +		e1_len = ext4_ext_get_actual_len(ex1); +		e2_len = ext4_ext_get_actual_len(ex2); + +		/* Hole handling */ +		if (!in_range(lblk1, e1_blk, e1_len) || +		    !in_range(lblk2, e2_blk, e2_len)) { +			ext4_lblk_t next1, next2; + +			/* if hole after extent, then go to next extent */ +			next1 = ext4_ext_next_allocated_block(path1); +			next2 = ext4_ext_next_allocated_block(path2); +			/* If hole before extent, then shift to that extent */ +			if (e1_blk > lblk1) +				next1 = e1_blk; +			if (e2_blk > lblk2) +				next2 = e1_blk; +			/* Do we have something to swap */ +			if (next1 == EXT_MAX_BLOCKS || next2 == EXT_MAX_BLOCKS) +				goto finish; +			/* Move to the rightest boundary */ +			len = next1 - lblk1; +			if (len < next2 - lblk2) +				len = next2 - lblk2; +			if (len > count) +				len = count; +			lblk1 += len; +			lblk2 += len; +			count -= len; +			goto repeat; +		} + +		/* Prepare left boundary */ +		if (e1_blk < lblk1) { +			split = 1; +			*erp = ext4_force_split_extent_at(handle, inode1, +						&path1, lblk1, 0); +			if (unlikely(*erp)) +				goto finish; +		} +		if (e2_blk < lblk2) { +			split = 1; +			*erp = ext4_force_split_extent_at(handle, inode2, +						&path2,  lblk2, 0); +			if (unlikely(*erp)) +				goto finish; +		} +		/* ext4_split_extent_at() may result in leaf extent split, +		 * path must to be revalidated. */ +		if (split) +			goto repeat; + +		/* Prepare right boundary */ +		len = count; +		if (len > e1_blk + e1_len - lblk1) +			len = e1_blk + e1_len - lblk1; +		if (len > e2_blk + e2_len - lblk2) +			len = e2_blk + e2_len - lblk2; + +		if (len != e1_len) { +			split = 1; +			*erp = ext4_force_split_extent_at(handle, inode1, +						&path1, lblk1 + len, 0); +			if (unlikely(*erp)) +				goto finish; +		} +		if (len != e2_len) { +			split = 1; +			*erp = ext4_force_split_extent_at(handle, inode2, +						&path2, lblk2 + len, 0); +			if (*erp) +				goto finish; +		} +		/* ext4_split_extent_at() may result in leaf extent split, +		 * path must to be revalidated. */ +		if (split) +			goto repeat; + +		BUG_ON(e2_len != e1_len); +		*erp = ext4_ext_get_access(handle, inode1, path1 + path1->p_depth); +		if (unlikely(*erp)) +			goto finish; +		*erp = ext4_ext_get_access(handle, inode2, path2 + path2->p_depth); +		if (unlikely(*erp)) +			goto finish; + +		/* Both extents are fully inside boundaries. Swap it now */ +		tmp_ex = *ex1; +		ext4_ext_store_pblock(ex1, ext4_ext_pblock(ex2)); +		ext4_ext_store_pblock(ex2, ext4_ext_pblock(&tmp_ex)); +		ex1->ee_len = cpu_to_le16(e2_len); +		ex2->ee_len = cpu_to_le16(e1_len); +		if (unwritten) +			ext4_ext_mark_unwritten(ex2); +		if (ext4_ext_is_unwritten(&tmp_ex)) +			ext4_ext_mark_unwritten(ex1); + +		ext4_ext_try_to_merge(handle, inode2, path2, ex2); +		ext4_ext_try_to_merge(handle, inode1, path1, ex1); +		*erp = ext4_ext_dirty(handle, inode2, path2 + +				      path2->p_depth); +		if (unlikely(*erp)) +			goto finish; +		*erp = ext4_ext_dirty(handle, inode1, path1 + +				      path1->p_depth); +		/* +		 * Looks scarry ah..? second inode already points to new blocks, +		 * and it was successfully dirtied. But luckily error may happen +		 * only due to journal error, so full transaction will be +		 * aborted anyway. +		 */ +		if (unlikely(*erp)) +			goto finish; +		lblk1 += len; +		lblk2 += len; +		replaced_count += len; +		count -= len; + +	repeat: +		ext4_ext_drop_refs(path1); +		kfree(path1); +		ext4_ext_drop_refs(path2); +		kfree(path2); +		path1 = path2 = NULL; +	} +	return replaced_count; +}  |