diff options
Diffstat (limited to 'fs/ext4/extents_status.c')
| -rw-r--r-- | fs/ext4/extents_status.c | 207 | 
1 files changed, 129 insertions, 78 deletions
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c index 595abb9e7d74..9b5b8951afb4 100644 --- a/fs/ext4/extents_status.c +++ b/fs/ext4/extents_status.c @@ -144,9 +144,11 @@  static struct kmem_cache *ext4_es_cachep;  static struct kmem_cache *ext4_pending_cachep; -static int __es_insert_extent(struct inode *inode, struct extent_status *newes); +static int __es_insert_extent(struct inode *inode, struct extent_status *newes, +			      struct extent_status *prealloc);  static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, -			      ext4_lblk_t end, int *reserved); +			      ext4_lblk_t end, int *reserved, +			      struct extent_status *prealloc);  static int es_reclaim_extents(struct ext4_inode_info *ei, int *nr_to_scan);  static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,  		       struct ext4_inode_info *locked_ei); @@ -446,22 +448,36 @@ static void ext4_es_list_del(struct inode *inode)  	spin_unlock(&sbi->s_es_lock);  } -static struct extent_status * -ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len, -		     ext4_fsblk_t pblk) +/* + * Returns true if we cannot fail to allocate memory for this extent_status + * entry and cannot reclaim it until its status changes. + */ +static inline bool ext4_es_must_keep(struct extent_status *es) +{ +	/* fiemap, bigalloc, and seek_data/hole need to use it. */ +	if (ext4_es_is_delayed(es)) +		return true; + +	return false; +} + +static inline struct extent_status *__es_alloc_extent(bool nofail) +{ +	if (!nofail) +		return kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC); + +	return kmem_cache_zalloc(ext4_es_cachep, GFP_KERNEL | __GFP_NOFAIL); +} + +static void ext4_es_init_extent(struct inode *inode, struct extent_status *es, +		ext4_lblk_t lblk, ext4_lblk_t len, ext4_fsblk_t pblk)  { -	struct extent_status *es; -	es = kmem_cache_alloc(ext4_es_cachep, GFP_ATOMIC); -	if (es == NULL) -		return NULL;  	es->es_lblk = lblk;  	es->es_len = len;  	es->es_pblk = pblk; -	/* -	 * We don't count delayed extent because we never try to reclaim them -	 */ -	if (!ext4_es_is_delayed(es)) { +	/* We never try to reclaim a must kept extent, so we don't count it. */ +	if (!ext4_es_must_keep(es)) {  		if (!EXT4_I(inode)->i_es_shk_nr++)  			ext4_es_list_add(inode);  		percpu_counter_inc(&EXT4_SB(inode->i_sb)-> @@ -470,8 +486,11 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,  	EXT4_I(inode)->i_es_all_nr++;  	percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt); +} -	return es; +static inline void __es_free_extent(struct extent_status *es) +{ +	kmem_cache_free(ext4_es_cachep, es);  }  static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) @@ -479,8 +498,8 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)  	EXT4_I(inode)->i_es_all_nr--;  	percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt); -	/* Decrease the shrink counter when this es is not delayed */ -	if (!ext4_es_is_delayed(es)) { +	/* Decrease the shrink counter when we can reclaim the extent. */ +	if (!ext4_es_must_keep(es)) {  		BUG_ON(EXT4_I(inode)->i_es_shk_nr == 0);  		if (!--EXT4_I(inode)->i_es_shk_nr)  			ext4_es_list_del(inode); @@ -488,7 +507,7 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)  					s_es_stats.es_stats_shk_cnt);  	} -	kmem_cache_free(ext4_es_cachep, es); +	__es_free_extent(es);  }  /* @@ -749,7 +768,8 @@ static inline void ext4_es_insert_extent_check(struct inode *inode,  }  #endif -static int __es_insert_extent(struct inode *inode, struct extent_status *newes) +static int __es_insert_extent(struct inode *inode, struct extent_status *newes, +			      struct extent_status *prealloc)  {  	struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;  	struct rb_node **p = &tree->root.rb_node; @@ -789,10 +809,15 @@ static int __es_insert_extent(struct inode *inode, struct extent_status *newes)  		}  	} -	es = ext4_es_alloc_extent(inode, newes->es_lblk, newes->es_len, -				  newes->es_pblk); +	if (prealloc) +		es = prealloc; +	else +		es = __es_alloc_extent(false);  	if (!es)  		return -ENOMEM; +	ext4_es_init_extent(inode, es, newes->es_lblk, newes->es_len, +			    newes->es_pblk); +  	rb_link_node(&es->rb_node, parent, p);  	rb_insert_color(&es->rb_node, &tree->root); @@ -804,26 +829,27 @@ out:  /*   * ext4_es_insert_extent() adds information to an inode's extent   * status tree. - * - * Return 0 on success, error code on failure.   */ -int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, -			  ext4_lblk_t len, ext4_fsblk_t pblk, -			  unsigned int status) +void ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk, +			   ext4_lblk_t len, ext4_fsblk_t pblk, +			   unsigned int status)  {  	struct extent_status newes;  	ext4_lblk_t end = lblk + len - 1; -	int err = 0; +	int err1 = 0; +	int err2 = 0;  	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); +	struct extent_status *es1 = NULL; +	struct extent_status *es2 = NULL;  	if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) -		return 0; +		return;  	es_debug("add [%u/%u) %llu %x to extent status tree of inode %lu\n",  		 lblk, len, pblk, status, inode->i_ino);  	if (!len) -		return 0; +		return;  	BUG_ON(end < lblk); @@ -842,29 +868,40 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,  	ext4_es_insert_extent_check(inode, &newes); +retry: +	if (err1 && !es1) +		es1 = __es_alloc_extent(true); +	if ((err1 || err2) && !es2) +		es2 = __es_alloc_extent(true);  	write_lock(&EXT4_I(inode)->i_es_lock); -	err = __es_remove_extent(inode, lblk, end, NULL); -	if (err != 0) + +	err1 = __es_remove_extent(inode, lblk, end, NULL, es1); +	if (err1 != 0) +		goto error; + +	err2 = __es_insert_extent(inode, &newes, es2); +	if (err2 == -ENOMEM && !ext4_es_must_keep(&newes)) +		err2 = 0; +	if (err2 != 0)  		goto error; -retry: -	err = __es_insert_extent(inode, &newes); -	if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb), -					  128, EXT4_I(inode))) -		goto retry; -	if (err == -ENOMEM && !ext4_es_is_delayed(&newes)) -		err = 0;  	if (sbi->s_cluster_ratio > 1 && test_opt(inode->i_sb, DELALLOC) &&  	    (status & EXTENT_STATUS_WRITTEN ||  	     status & EXTENT_STATUS_UNWRITTEN))  		__revise_pending(inode, lblk, len); +	/* es is pre-allocated but not used, free it. */ +	if (es1 && !es1->es_len) +		__es_free_extent(es1); +	if (es2 && !es2->es_len) +		__es_free_extent(es2);  error:  	write_unlock(&EXT4_I(inode)->i_es_lock); +	if (err1 || err2) +		goto retry;  	ext4_es_print_tree(inode); - -	return err; +	return;  }  /* @@ -897,7 +934,7 @@ void ext4_es_cache_extent(struct inode *inode, ext4_lblk_t lblk,  	es = __es_tree_search(&EXT4_I(inode)->i_es_tree.root, lblk);  	if (!es || es->es_lblk > end) -		__es_insert_extent(inode, &newes); +		__es_insert_extent(inode, &newes, NULL);  	write_unlock(&EXT4_I(inode)->i_es_lock);  } @@ -1287,6 +1324,7 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,   * @lblk - first block in range   * @end - last block in range   * @reserved - number of cluster reservations released + * @prealloc - pre-allocated es to avoid memory allocation failures   *   * If @reserved is not NULL and delayed allocation is enabled, counts   * block/cluster reservations freed by removing range and if bigalloc @@ -1294,7 +1332,8 @@ static unsigned int get_rsvd(struct inode *inode, ext4_lblk_t end,   * error code on failure.   */  static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk, -			      ext4_lblk_t end, int *reserved) +			      ext4_lblk_t end, int *reserved, +			      struct extent_status *prealloc)  {  	struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;  	struct rb_node *node; @@ -1302,14 +1341,12 @@ static int __es_remove_extent(struct inode *inode, ext4_lblk_t lblk,  	struct extent_status orig_es;  	ext4_lblk_t len1, len2;  	ext4_fsblk_t block; -	int err; +	int err = 0;  	bool count_reserved = true;  	struct rsvd_count rc;  	if (reserved == NULL || !test_opt(inode->i_sb, DELALLOC))  		count_reserved = false; -retry: -	err = 0;  	es = __es_tree_search(&tree->root, lblk);  	if (!es) @@ -1343,14 +1380,13 @@ retry:  					orig_es.es_len - len2;  			ext4_es_store_pblock_status(&newes, block,  						    ext4_es_status(&orig_es)); -			err = __es_insert_extent(inode, &newes); +			err = __es_insert_extent(inode, &newes, prealloc);  			if (err) { +				if (!ext4_es_must_keep(&newes)) +					return 0; +  				es->es_lblk = orig_es.es_lblk;  				es->es_len = orig_es.es_len; -				if ((err == -ENOMEM) && -				    __es_shrink(EXT4_SB(inode->i_sb), -							128, EXT4_I(inode))) -					goto retry;  				goto out;  			}  		} else { @@ -1422,39 +1458,48 @@ out:   * @len - number of blocks to remove   *   * Reduces block/cluster reservation count and for bigalloc cancels pending - * reservations as needed. Returns 0 on success, error code on failure. + * reservations as needed.   */ -int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, -			  ext4_lblk_t len) +void ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk, +			   ext4_lblk_t len)  {  	ext4_lblk_t end;  	int err = 0;  	int reserved = 0; +	struct extent_status *es = NULL;  	if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) -		return 0; +		return;  	trace_ext4_es_remove_extent(inode, lblk, len);  	es_debug("remove [%u/%u) from extent status tree of inode %lu\n",  		 lblk, len, inode->i_ino);  	if (!len) -		return err; +		return;  	end = lblk + len - 1;  	BUG_ON(end < lblk); +retry: +	if (err && !es) +		es = __es_alloc_extent(true);  	/*  	 * ext4_clear_inode() depends on us taking i_es_lock unconditionally  	 * so that we are sure __es_shrink() is done with the inode before it  	 * is reclaimed.  	 */  	write_lock(&EXT4_I(inode)->i_es_lock); -	err = __es_remove_extent(inode, lblk, end, &reserved); +	err = __es_remove_extent(inode, lblk, end, &reserved, es); +	if (es && !es->es_len) +		__es_free_extent(es);  	write_unlock(&EXT4_I(inode)->i_es_lock); +	if (err) +		goto retry; +  	ext4_es_print_tree(inode);  	ext4_da_release_space(inode, reserved); -	return err; +	return;  }  static int __es_shrink(struct ext4_sb_info *sbi, int nr_to_scan, @@ -1702,11 +1747,8 @@ static int es_do_reclaim_extents(struct ext4_inode_info *ei, ext4_lblk_t end,  		(*nr_to_scan)--;  		node = rb_next(&es->rb_node); -		/* -		 * We can't reclaim delayed extent from status tree because -		 * fiemap, bigallic, and seek_data/hole need to use it. -		 */ -		if (ext4_es_is_delayed(es)) + +		if (ext4_es_must_keep(es))  			goto next;  		if (ext4_es_is_referenced(es)) {  			ext4_es_clear_referenced(es); @@ -1770,7 +1812,7 @@ void ext4_clear_inode_es(struct inode *inode)  	while (node) {  		es = rb_entry(node, struct extent_status, rb_node);  		node = rb_next(node); -		if (!ext4_es_is_delayed(es)) { +		if (!ext4_es_must_keep(es)) {  			rb_erase(&es->rb_node, &tree->root);  			ext4_es_free_extent(inode, es);  		} @@ -1972,17 +2014,18 @@ bool ext4_is_pending(struct inode *inode, ext4_lblk_t lblk)   * @lblk - logical block to be added   * @allocated - indicates whether a physical cluster has been allocated for   *              the logical cluster that contains the block - * - * Returns 0 on success, negative error code on failure.   */ -int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk, -				 bool allocated) +void ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk, +				  bool allocated)  {  	struct extent_status newes; -	int err = 0; +	int err1 = 0; +	int err2 = 0; +	struct extent_status *es1 = NULL; +	struct extent_status *es2 = NULL;  	if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY) -		return 0; +		return;  	es_debug("add [%u/1) delayed to extent status tree of inode %lu\n",  		 lblk, inode->i_ino); @@ -1994,29 +2037,37 @@ int ext4_es_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk,  	ext4_es_insert_extent_check(inode, &newes); +retry: +	if (err1 && !es1) +		es1 = __es_alloc_extent(true); +	if ((err1 || err2) && !es2) +		es2 = __es_alloc_extent(true);  	write_lock(&EXT4_I(inode)->i_es_lock); -	err = __es_remove_extent(inode, lblk, lblk, NULL); -	if (err != 0) +	err1 = __es_remove_extent(inode, lblk, lblk, NULL, es1); +	if (err1 != 0)  		goto error; -retry: -	err = __es_insert_extent(inode, &newes); -	if (err == -ENOMEM && __es_shrink(EXT4_SB(inode->i_sb), -					  128, EXT4_I(inode))) -		goto retry; -	if (err != 0) + +	err2 = __es_insert_extent(inode, &newes, es2); +	if (err2 != 0)  		goto error;  	if (allocated)  		__insert_pending(inode, lblk); +	/* es is pre-allocated but not used, free it. */ +	if (es1 && !es1->es_len) +		__es_free_extent(es1); +	if (es2 && !es2->es_len) +		__es_free_extent(es2);  error:  	write_unlock(&EXT4_I(inode)->i_es_lock); +	if (err1 || err2) +		goto retry;  	ext4_es_print_tree(inode);  	ext4_print_pending_tree(inode); - -	return err; +	return;  }  /*  |