diff options
Diffstat (limited to 'fs/f2fs/data.c')
| -rw-r--r-- | fs/f2fs/data.c | 967 | 
1 files changed, 243 insertions, 724 deletions
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 9bedfa8dd3a5..a82abe921b89 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -14,6 +14,7 @@  #include <linux/mpage.h>  #include <linux/writeback.h>  #include <linux/backing-dev.h> +#include <linux/pagevec.h>  #include <linux/blkdev.h>  #include <linux/bio.h>  #include <linux/prefetch.h> @@ -26,16 +27,13 @@  #include "trace.h"  #include <trace/events/f2fs.h> -static struct kmem_cache *extent_tree_slab; -static struct kmem_cache *extent_node_slab; - -static void f2fs_read_end_io(struct bio *bio, int err) +static void f2fs_read_end_io(struct bio *bio)  {  	struct bio_vec *bvec;  	int i;  	if (f2fs_bio_encrypted(bio)) { -		if (err) { +		if (bio->bi_error) {  			f2fs_release_crypto_ctx(bio->bi_private);  		} else {  			f2fs_end_io_crypto_work(bio->bi_private, bio); @@ -46,7 +44,7 @@ static void f2fs_read_end_io(struct bio *bio, int err)  	bio_for_each_segment_all(bvec, bio, i) {  		struct page *page = bvec->bv_page; -		if (!err) { +		if (!bio->bi_error) {  			SetPageUptodate(page);  		} else {  			ClearPageUptodate(page); @@ -57,7 +55,7 @@ static void f2fs_read_end_io(struct bio *bio, int err)  	bio_put(bio);  } -static void f2fs_write_end_io(struct bio *bio, int err) +static void f2fs_write_end_io(struct bio *bio)  {  	struct f2fs_sb_info *sbi = bio->bi_private;  	struct bio_vec *bvec; @@ -68,7 +66,7 @@ static void f2fs_write_end_io(struct bio *bio, int err)  		f2fs_restore_and_release_control_page(&page); -		if (unlikely(err)) { +		if (unlikely(bio->bi_error)) {  			set_page_dirty(page);  			set_bit(AS_EIO, &page->mapping->flags);  			f2fs_stop_checkpoint(sbi); @@ -92,8 +90,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,  {  	struct bio *bio; -	/* No failure on bio allocation */ -	bio = bio_alloc(GFP_NOIO, npages); +	bio = f2fs_bio_alloc(npages);  	bio->bi_bdev = sbi->sb->s_bdev;  	bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr); @@ -158,7 +155,6 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)  	if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {  		bio_put(bio); -		f2fs_put_page(page, 1);  		return -EFAULT;  	} @@ -266,645 +262,17 @@ int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)  	return err;  } -static bool lookup_extent_info(struct inode *inode, pgoff_t pgofs, -							struct extent_info *ei) -{ -	struct f2fs_inode_info *fi = F2FS_I(inode); -	pgoff_t start_fofs, end_fofs; -	block_t start_blkaddr; - -	read_lock(&fi->ext_lock); -	if (fi->ext.len == 0) { -		read_unlock(&fi->ext_lock); -		return false; -	} - -	stat_inc_total_hit(inode->i_sb); - -	start_fofs = fi->ext.fofs; -	end_fofs = fi->ext.fofs + fi->ext.len - 1; -	start_blkaddr = fi->ext.blk; - -	if (pgofs >= start_fofs && pgofs <= end_fofs) { -		*ei = fi->ext; -		stat_inc_read_hit(inode->i_sb); -		read_unlock(&fi->ext_lock); -		return true; -	} -	read_unlock(&fi->ext_lock); -	return false; -} - -static bool update_extent_info(struct inode *inode, pgoff_t fofs, -								block_t blkaddr) -{ -	struct f2fs_inode_info *fi = F2FS_I(inode); -	pgoff_t start_fofs, end_fofs; -	block_t start_blkaddr, end_blkaddr; -	int need_update = true; - -	write_lock(&fi->ext_lock); - -	start_fofs = fi->ext.fofs; -	end_fofs = fi->ext.fofs + fi->ext.len - 1; -	start_blkaddr = fi->ext.blk; -	end_blkaddr = fi->ext.blk + fi->ext.len - 1; - -	/* Drop and initialize the matched extent */ -	if (fi->ext.len == 1 && fofs == start_fofs) -		fi->ext.len = 0; - -	/* Initial extent */ -	if (fi->ext.len == 0) { -		if (blkaddr != NULL_ADDR) { -			fi->ext.fofs = fofs; -			fi->ext.blk = blkaddr; -			fi->ext.len = 1; -		} -		goto end_update; -	} - -	/* Front merge */ -	if (fofs == start_fofs - 1 && blkaddr == start_blkaddr - 1) { -		fi->ext.fofs--; -		fi->ext.blk--; -		fi->ext.len++; -		goto end_update; -	} - -	/* Back merge */ -	if (fofs == end_fofs + 1 && blkaddr == end_blkaddr + 1) { -		fi->ext.len++; -		goto end_update; -	} - -	/* Split the existing extent */ -	if (fi->ext.len > 1 && -		fofs >= start_fofs && fofs <= end_fofs) { -		if ((end_fofs - fofs) < (fi->ext.len >> 1)) { -			fi->ext.len = fofs - start_fofs; -		} else { -			fi->ext.fofs = fofs + 1; -			fi->ext.blk = start_blkaddr + fofs - start_fofs + 1; -			fi->ext.len -= fofs - start_fofs + 1; -		} -	} else { -		need_update = false; -	} - -	/* Finally, if the extent is very fragmented, let's drop the cache. */ -	if (fi->ext.len < F2FS_MIN_EXTENT_LEN) { -		fi->ext.len = 0; -		set_inode_flag(fi, FI_NO_EXTENT); -		need_update = true; -	} -end_update: -	write_unlock(&fi->ext_lock); -	return need_update; -} - -static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi, -				struct extent_tree *et, struct extent_info *ei, -				struct rb_node *parent, struct rb_node **p) -{ -	struct extent_node *en; - -	en = kmem_cache_alloc(extent_node_slab, GFP_ATOMIC); -	if (!en) -		return NULL; - -	en->ei = *ei; -	INIT_LIST_HEAD(&en->list); - -	rb_link_node(&en->rb_node, parent, p); -	rb_insert_color(&en->rb_node, &et->root); -	et->count++; -	atomic_inc(&sbi->total_ext_node); -	return en; -} - -static void __detach_extent_node(struct f2fs_sb_info *sbi, -				struct extent_tree *et, struct extent_node *en) -{ -	rb_erase(&en->rb_node, &et->root); -	et->count--; -	atomic_dec(&sbi->total_ext_node); - -	if (et->cached_en == en) -		et->cached_en = NULL; -} - -static struct extent_tree *__find_extent_tree(struct f2fs_sb_info *sbi, -							nid_t ino) -{ -	struct extent_tree *et; - -	down_read(&sbi->extent_tree_lock); -	et = radix_tree_lookup(&sbi->extent_tree_root, ino); -	if (!et) { -		up_read(&sbi->extent_tree_lock); -		return NULL; -	} -	atomic_inc(&et->refcount); -	up_read(&sbi->extent_tree_lock); - -	return et; -} - -static struct extent_tree *__grab_extent_tree(struct inode *inode) -{ -	struct f2fs_sb_info *sbi = F2FS_I_SB(inode); -	struct extent_tree *et; -	nid_t ino = inode->i_ino; - -	down_write(&sbi->extent_tree_lock); -	et = radix_tree_lookup(&sbi->extent_tree_root, ino); -	if (!et) { -		et = f2fs_kmem_cache_alloc(extent_tree_slab, GFP_NOFS); -		f2fs_radix_tree_insert(&sbi->extent_tree_root, ino, et); -		memset(et, 0, sizeof(struct extent_tree)); -		et->ino = ino; -		et->root = RB_ROOT; -		et->cached_en = NULL; -		rwlock_init(&et->lock); -		atomic_set(&et->refcount, 0); -		et->count = 0; -		sbi->total_ext_tree++; -	} -	atomic_inc(&et->refcount); -	up_write(&sbi->extent_tree_lock); - -	return et; -} - -static struct extent_node *__lookup_extent_tree(struct extent_tree *et, -							unsigned int fofs) -{ -	struct rb_node *node = et->root.rb_node; -	struct extent_node *en; - -	if (et->cached_en) { -		struct extent_info *cei = &et->cached_en->ei; - -		if (cei->fofs <= fofs && cei->fofs + cei->len > fofs) -			return et->cached_en; -	} - -	while (node) { -		en = rb_entry(node, struct extent_node, rb_node); - -		if (fofs < en->ei.fofs) { -			node = node->rb_left; -		} else if (fofs >= en->ei.fofs + en->ei.len) { -			node = node->rb_right; -		} else { -			et->cached_en = en; -			return en; -		} -	} -	return NULL; -} - -static struct extent_node *__try_back_merge(struct f2fs_sb_info *sbi, -				struct extent_tree *et, struct extent_node *en) -{ -	struct extent_node *prev; -	struct rb_node *node; - -	node = rb_prev(&en->rb_node); -	if (!node) -		return NULL; - -	prev = rb_entry(node, struct extent_node, rb_node); -	if (__is_back_mergeable(&en->ei, &prev->ei)) { -		en->ei.fofs = prev->ei.fofs; -		en->ei.blk = prev->ei.blk; -		en->ei.len += prev->ei.len; -		__detach_extent_node(sbi, et, prev); -		return prev; -	} -	return NULL; -} - -static struct extent_node *__try_front_merge(struct f2fs_sb_info *sbi, -				struct extent_tree *et, struct extent_node *en) -{ -	struct extent_node *next; -	struct rb_node *node; - -	node = rb_next(&en->rb_node); -	if (!node) -		return NULL; - -	next = rb_entry(node, struct extent_node, rb_node); -	if (__is_front_mergeable(&en->ei, &next->ei)) { -		en->ei.len += next->ei.len; -		__detach_extent_node(sbi, et, next); -		return next; -	} -	return NULL; -} - -static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi, -				struct extent_tree *et, struct extent_info *ei, -				struct extent_node **den) -{ -	struct rb_node **p = &et->root.rb_node; -	struct rb_node *parent = NULL; -	struct extent_node *en; - -	while (*p) { -		parent = *p; -		en = rb_entry(parent, struct extent_node, rb_node); - -		if (ei->fofs < en->ei.fofs) { -			if (__is_front_mergeable(ei, &en->ei)) { -				f2fs_bug_on(sbi, !den); -				en->ei.fofs = ei->fofs; -				en->ei.blk = ei->blk; -				en->ei.len += ei->len; -				*den = __try_back_merge(sbi, et, en); -				return en; -			} -			p = &(*p)->rb_left; -		} else if (ei->fofs >= en->ei.fofs + en->ei.len) { -			if (__is_back_mergeable(ei, &en->ei)) { -				f2fs_bug_on(sbi, !den); -				en->ei.len += ei->len; -				*den = __try_front_merge(sbi, et, en); -				return en; -			} -			p = &(*p)->rb_right; -		} else { -			f2fs_bug_on(sbi, 1); -		} -	} - -	return __attach_extent_node(sbi, et, ei, parent, p); -} - -static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi, -					struct extent_tree *et, bool free_all) +int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)  { -	struct rb_node *node, *next; -	struct extent_node *en; -	unsigned int count = et->count; - -	node = rb_first(&et->root); -	while (node) { -		next = rb_next(node); -		en = rb_entry(node, struct extent_node, rb_node); - -		if (free_all) { -			spin_lock(&sbi->extent_lock); -			if (!list_empty(&en->list)) -				list_del_init(&en->list); -			spin_unlock(&sbi->extent_lock); -		} - -		if (free_all || list_empty(&en->list)) { -			__detach_extent_node(sbi, et, en); -			kmem_cache_free(extent_node_slab, en); -		} -		node = next; -	} - -	return count - et->count; -} - -static void f2fs_init_extent_tree(struct inode *inode, -						struct f2fs_extent *i_ext) -{ -	struct f2fs_sb_info *sbi = F2FS_I_SB(inode); -	struct extent_tree *et; -	struct extent_node *en;  	struct extent_info ei; +	struct inode *inode = dn->inode; -	if (le32_to_cpu(i_ext->len) < F2FS_MIN_EXTENT_LEN) -		return; - -	et = __grab_extent_tree(inode); - -	write_lock(&et->lock); -	if (et->count) -		goto out; - -	set_extent_info(&ei, le32_to_cpu(i_ext->fofs), -		le32_to_cpu(i_ext->blk), le32_to_cpu(i_ext->len)); - -	en = __insert_extent_tree(sbi, et, &ei, NULL); -	if (en) { -		et->cached_en = en; - -		spin_lock(&sbi->extent_lock); -		list_add_tail(&en->list, &sbi->extent_list); -		spin_unlock(&sbi->extent_lock); -	} -out: -	write_unlock(&et->lock); -	atomic_dec(&et->refcount); -} - -static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs, -							struct extent_info *ei) -{ -	struct f2fs_sb_info *sbi = F2FS_I_SB(inode); -	struct extent_tree *et; -	struct extent_node *en; - -	trace_f2fs_lookup_extent_tree_start(inode, pgofs); - -	et = __find_extent_tree(sbi, inode->i_ino); -	if (!et) -		return false; - -	read_lock(&et->lock); -	en = __lookup_extent_tree(et, pgofs); -	if (en) { -		*ei = en->ei; -		spin_lock(&sbi->extent_lock); -		if (!list_empty(&en->list)) -			list_move_tail(&en->list, &sbi->extent_list); -		spin_unlock(&sbi->extent_lock); -		stat_inc_read_hit(sbi->sb); -	} -	stat_inc_total_hit(sbi->sb); -	read_unlock(&et->lock); - -	trace_f2fs_lookup_extent_tree_end(inode, pgofs, en); - -	atomic_dec(&et->refcount); -	return en ? true : false; -} - -static void f2fs_update_extent_tree(struct inode *inode, pgoff_t fofs, -							block_t blkaddr) -{ -	struct f2fs_sb_info *sbi = F2FS_I_SB(inode); -	struct extent_tree *et; -	struct extent_node *en = NULL, *en1 = NULL, *en2 = NULL, *en3 = NULL; -	struct extent_node *den = NULL; -	struct extent_info ei, dei; -	unsigned int endofs; - -	trace_f2fs_update_extent_tree(inode, fofs, blkaddr); - -	et = __grab_extent_tree(inode); - -	write_lock(&et->lock); - -	/* 1. lookup and remove existing extent info in cache */ -	en = __lookup_extent_tree(et, fofs); -	if (!en) -		goto update_extent; - -	dei = en->ei; -	__detach_extent_node(sbi, et, en); - -	/* 2. if extent can be split more, split and insert the left part */ -	if (dei.len > 1) { -		/*  insert left part of split extent into cache */ -		if (fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN) { -			set_extent_info(&ei, dei.fofs, dei.blk, -							fofs - dei.fofs); -			en1 = __insert_extent_tree(sbi, et, &ei, NULL); -		} - -		/* insert right part of split extent into cache */ -		endofs = dei.fofs + dei.len - 1; -		if (endofs - fofs >= F2FS_MIN_EXTENT_LEN) { -			set_extent_info(&ei, fofs + 1, -				fofs - dei.fofs + dei.blk, endofs - fofs); -			en2 = __insert_extent_tree(sbi, et, &ei, NULL); -		} -	} - -update_extent: -	/* 3. update extent in extent cache */ -	if (blkaddr) { -		set_extent_info(&ei, fofs, blkaddr, 1); -		en3 = __insert_extent_tree(sbi, et, &ei, &den); -	} - -	/* 4. update in global extent list */ -	spin_lock(&sbi->extent_lock); -	if (en && !list_empty(&en->list)) -		list_del(&en->list); -	/* -	 * en1 and en2 split from en, they will become more and more smaller -	 * fragments after splitting several times. So if the length is smaller -	 * than F2FS_MIN_EXTENT_LEN, we will not add them into extent tree. -	 */ -	if (en1) -		list_add_tail(&en1->list, &sbi->extent_list); -	if (en2) -		list_add_tail(&en2->list, &sbi->extent_list); -	if (en3) { -		if (list_empty(&en3->list)) -			list_add_tail(&en3->list, &sbi->extent_list); -		else -			list_move_tail(&en3->list, &sbi->extent_list); -	} -	if (den && !list_empty(&den->list)) -		list_del(&den->list); -	spin_unlock(&sbi->extent_lock); - -	/* 5. release extent node */ -	if (en) -		kmem_cache_free(extent_node_slab, en); -	if (den) -		kmem_cache_free(extent_node_slab, den); - -	write_unlock(&et->lock); -	atomic_dec(&et->refcount); -} - -void f2fs_preserve_extent_tree(struct inode *inode) -{ -	struct extent_tree *et; -	struct extent_info *ext = &F2FS_I(inode)->ext; -	bool sync = false; - -	if (!test_opt(F2FS_I_SB(inode), EXTENT_CACHE)) -		return; - -	et = __find_extent_tree(F2FS_I_SB(inode), inode->i_ino); -	if (!et) { -		if (ext->len) { -			ext->len = 0; -			update_inode_page(inode); -		} -		return; -	} - -	read_lock(&et->lock); -	if (et->count) { -		struct extent_node *en; - -		if (et->cached_en) { -			en = et->cached_en; -		} else { -			struct rb_node *node = rb_first(&et->root); - -			if (!node) -				node = rb_last(&et->root); -			en = rb_entry(node, struct extent_node, rb_node); -		} - -		if (__is_extent_same(ext, &en->ei)) -			goto out; - -		*ext = en->ei; -		sync = true; -	} else if (ext->len) { -		ext->len = 0; -		sync = true; -	} -out: -	read_unlock(&et->lock); -	atomic_dec(&et->refcount); - -	if (sync) -		update_inode_page(inode); -} - -void f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink) -{ -	struct extent_tree *treevec[EXT_TREE_VEC_SIZE]; -	struct extent_node *en, *tmp; -	unsigned long ino = F2FS_ROOT_INO(sbi); -	struct radix_tree_iter iter; -	void **slot; -	unsigned int found; -	unsigned int node_cnt = 0, tree_cnt = 0; - -	if (!test_opt(sbi, EXTENT_CACHE)) -		return; - -	if (available_free_memory(sbi, EXTENT_CACHE)) -		return; - -	spin_lock(&sbi->extent_lock); -	list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) { -		if (!nr_shrink--) -			break; -		list_del_init(&en->list); -	} -	spin_unlock(&sbi->extent_lock); - -	down_read(&sbi->extent_tree_lock); -	while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root, -				(void **)treevec, ino, EXT_TREE_VEC_SIZE))) { -		unsigned i; - -		ino = treevec[found - 1]->ino + 1; -		for (i = 0; i < found; i++) { -			struct extent_tree *et = treevec[i]; - -			atomic_inc(&et->refcount); -			write_lock(&et->lock); -			node_cnt += __free_extent_tree(sbi, et, false); -			write_unlock(&et->lock); -			atomic_dec(&et->refcount); -		} -	} -	up_read(&sbi->extent_tree_lock); - -	down_write(&sbi->extent_tree_lock); -	radix_tree_for_each_slot(slot, &sbi->extent_tree_root, &iter, -							F2FS_ROOT_INO(sbi)) { -		struct extent_tree *et = (struct extent_tree *)*slot; - -		if (!atomic_read(&et->refcount) && !et->count) { -			radix_tree_delete(&sbi->extent_tree_root, et->ino); -			kmem_cache_free(extent_tree_slab, et); -			sbi->total_ext_tree--; -			tree_cnt++; -		} -	} -	up_write(&sbi->extent_tree_lock); - -	trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt); -} - -void f2fs_destroy_extent_tree(struct inode *inode) -{ -	struct f2fs_sb_info *sbi = F2FS_I_SB(inode); -	struct extent_tree *et; -	unsigned int node_cnt = 0; - -	if (!test_opt(sbi, EXTENT_CACHE)) -		return; - -	et = __find_extent_tree(sbi, inode->i_ino); -	if (!et) -		goto out; - -	/* free all extent info belong to this extent tree */ -	write_lock(&et->lock); -	node_cnt = __free_extent_tree(sbi, et, true); -	write_unlock(&et->lock); - -	atomic_dec(&et->refcount); - -	/* try to find and delete extent tree entry in radix tree */ -	down_write(&sbi->extent_tree_lock); -	et = radix_tree_lookup(&sbi->extent_tree_root, inode->i_ino); -	if (!et) { -		up_write(&sbi->extent_tree_lock); -		goto out; +	if (f2fs_lookup_extent_cache(inode, index, &ei)) { +		dn->data_blkaddr = ei.blk + index - ei.fofs; +		return 0;  	} -	f2fs_bug_on(sbi, atomic_read(&et->refcount) || et->count); -	radix_tree_delete(&sbi->extent_tree_root, inode->i_ino); -	kmem_cache_free(extent_tree_slab, et); -	sbi->total_ext_tree--; -	up_write(&sbi->extent_tree_lock); -out: -	trace_f2fs_destroy_extent_tree(inode, node_cnt); -	return; -} - -void f2fs_init_extent_cache(struct inode *inode, struct f2fs_extent *i_ext) -{ -	if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE)) -		f2fs_init_extent_tree(inode, i_ext); - -	write_lock(&F2FS_I(inode)->ext_lock); -	get_extent_info(&F2FS_I(inode)->ext, *i_ext); -	write_unlock(&F2FS_I(inode)->ext_lock); -} -static bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs, -							struct extent_info *ei) -{ -	if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT)) -		return false; - -	if (test_opt(F2FS_I_SB(inode), EXTENT_CACHE)) -		return f2fs_lookup_extent_tree(inode, pgofs, ei); - -	return lookup_extent_info(inode, pgofs, ei); -} - -void f2fs_update_extent_cache(struct dnode_of_data *dn) -{ -	struct f2fs_inode_info *fi = F2FS_I(dn->inode); -	pgoff_t fofs; - -	f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR); - -	if (is_inode_flag_set(fi, FI_NO_EXTENT)) -		return; - -	fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) + -							dn->ofs_in_node; - -	if (test_opt(F2FS_I_SB(dn->inode), EXTENT_CACHE)) -		return f2fs_update_extent_tree(dn->inode, fofs, -							dn->data_blkaddr); - -	if (update_extent_info(dn->inode, fofs, dn->data_blkaddr)) -		sync_inode_page(dn); +	return f2fs_reserve_block(dn, index);  }  struct page *get_read_data_page(struct inode *inode, pgoff_t index, int rw) @@ -935,15 +303,13 @@ struct page *get_read_data_page(struct inode *inode, pgoff_t index, int rw)  	set_new_dnode(&dn, inode, NULL, NULL, 0);  	err = get_dnode_of_data(&dn, index, LOOKUP_NODE); -	if (err) { -		f2fs_put_page(page, 1); -		return ERR_PTR(err); -	} +	if (err) +		goto put_err;  	f2fs_put_dnode(&dn);  	if (unlikely(dn.data_blkaddr == NULL_ADDR)) { -		f2fs_put_page(page, 1); -		return ERR_PTR(-ENOENT); +		err = -ENOENT; +		goto put_err;  	}  got_it:  	if (PageUptodate(page)) { @@ -968,8 +334,12 @@ got_it:  	fio.page = page;  	err = f2fs_submit_page_bio(&fio);  	if (err) -		return ERR_PTR(err); +		goto put_err;  	return page; + +put_err: +	f2fs_put_page(page, 1); +	return ERR_PTR(err);  }  struct page *find_data_page(struct inode *inode, pgoff_t index) @@ -1030,7 +400,8 @@ repeat:   *   * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and   * f2fs_unlock_op(). - * Note that, ipage is set only by make_empty_dir. + * Note that, ipage is set only by make_empty_dir, and if any error occur, + * ipage should be released by this function.   */  struct page *get_new_data_page(struct inode *inode,  		struct page *ipage, pgoff_t index, bool new_i_size) @@ -1041,8 +412,14 @@ struct page *get_new_data_page(struct inode *inode,  	int err;  repeat:  	page = grab_cache_page(mapping, index); -	if (!page) +	if (!page) { +		/* +		 * before exiting, we should make sure ipage will be released +		 * if any error occur. +		 */ +		f2fs_put_page(ipage, 1);  		return ERR_PTR(-ENOMEM); +	}  	set_new_dnode(&dn, inode, ipage, NULL, 0);  	err = f2fs_reserve_block(&dn, index); @@ -1107,8 +484,6 @@ alloc:  	allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,  								&sum, seg); - -	/* direct IO doesn't use extent cache to maximize the performance */  	set_data_blkaddr(dn);  	/* update i_size */ @@ -1117,6 +492,9 @@ alloc:  	if (i_size_read(dn->inode) < ((fofs + 1) << PAGE_CACHE_SHIFT))  		i_size_write(dn->inode, ((fofs + 1) << PAGE_CACHE_SHIFT)); +	/* direct IO doesn't use extent cache to maximize the performance */ +	f2fs_drop_largest_extent(dn->inode, fofs); +  	return 0;  } @@ -1183,7 +561,7 @@ out:   *     c. give the block addresses to blockdev   */  static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, -			int create, bool fiemap) +						int create, int flag)  {  	unsigned int maxblocks = map->m_len;  	struct dnode_of_data dn; @@ -1217,8 +595,19 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,  			err = 0;  		goto unlock_out;  	} -	if (dn.data_blkaddr == NEW_ADDR && !fiemap) -		goto put_out; +	if (dn.data_blkaddr == NEW_ADDR) { +		if (flag == F2FS_GET_BLOCK_BMAP) { +			err = -ENOENT; +			goto put_out; +		} else if (flag == F2FS_GET_BLOCK_READ || +				flag == F2FS_GET_BLOCK_DIO) { +			goto put_out; +		} +		/* +		 * if it is in fiemap call path (flag = F2FS_GET_BLOCK_FIEMAP), +		 * mark it as mapped and unwritten block. +		 */ +	}  	if (dn.data_blkaddr != NULL_ADDR) {  		map->m_flags = F2FS_MAP_MAPPED; @@ -1233,6 +622,8 @@ static int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,  		map->m_flags = F2FS_MAP_NEW | F2FS_MAP_MAPPED;  		map->m_pblk = dn.data_blkaddr;  	} else { +		if (flag == F2FS_GET_BLOCK_BMAP) +			err = -ENOENT;  		goto put_out;  	} @@ -1255,7 +646,9 @@ get_next:  				err = 0;  			goto unlock_out;  		} -		if (dn.data_blkaddr == NEW_ADDR && !fiemap) + +		if (dn.data_blkaddr == NEW_ADDR && +				flag != F2FS_GET_BLOCK_FIEMAP)  			goto put_out;  		end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode)); @@ -1297,7 +690,7 @@ out:  }  static int __get_data_block(struct inode *inode, sector_t iblock, -			struct buffer_head *bh, int create, bool fiemap) +			struct buffer_head *bh, int create, int flag)  {  	struct f2fs_map_blocks map;  	int ret; @@ -1305,7 +698,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock,  	map.m_lblk = iblock;  	map.m_len = bh->b_size >> inode->i_blkbits; -	ret = f2fs_map_blocks(inode, &map, create, fiemap); +	ret = f2fs_map_blocks(inode, &map, create, flag);  	if (!ret) {  		map_bh(bh, inode->i_sb, map.m_pblk);  		bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags; @@ -1315,15 +708,23 @@ static int __get_data_block(struct inode *inode, sector_t iblock,  }  static int get_data_block(struct inode *inode, sector_t iblock, +			struct buffer_head *bh_result, int create, int flag) +{ +	return __get_data_block(inode, iblock, bh_result, create, flag); +} + +static int get_data_block_dio(struct inode *inode, sector_t iblock,  			struct buffer_head *bh_result, int create)  { -	return __get_data_block(inode, iblock, bh_result, create, false); +	return __get_data_block(inode, iblock, bh_result, create, +						F2FS_GET_BLOCK_DIO);  } -static int get_data_block_fiemap(struct inode *inode, sector_t iblock, +static int get_data_block_bmap(struct inode *inode, sector_t iblock,  			struct buffer_head *bh_result, int create)  { -	return __get_data_block(inode, iblock, bh_result, create, true); +	return __get_data_block(inode, iblock, bh_result, create, +						F2FS_GET_BLOCK_BMAP);  }  static inline sector_t logical_to_blk(struct inode *inode, loff_t offset) @@ -1367,7 +768,8 @@ next:  	memset(&map_bh, 0, sizeof(struct buffer_head));  	map_bh.b_size = len; -	ret = get_data_block_fiemap(inode, start_blk, &map_bh, 0); +	ret = get_data_block(inode, start_blk, &map_bh, 0, +					F2FS_GET_BLOCK_FIEMAP);  	if (ret)  		goto out; @@ -1552,7 +954,7 @@ submit_and_realloc:  			}  			bio = bio_alloc(GFP_KERNEL, -				min_t(int, nr_pages, bio_get_nr_vecs(bdev))); +				min_t(int, nr_pages, BIO_MAX_PAGES));  			if (!bio) {  				if (ctx)  					f2fs_release_crypto_ctx(ctx); @@ -1770,6 +1172,137 @@ static int __f2fs_writepage(struct page *page, struct writeback_control *wbc,  	return ret;  } +/* + * This function was copied from write_cche_pages from mm/page-writeback.c. + * The major change is making write step of cold data page separately from + * warm/hot data page. + */ +static int f2fs_write_cache_pages(struct address_space *mapping, +			struct writeback_control *wbc, writepage_t writepage, +			void *data) +{ +	int ret = 0; +	int done = 0; +	struct pagevec pvec; +	int nr_pages; +	pgoff_t uninitialized_var(writeback_index); +	pgoff_t index; +	pgoff_t end;		/* Inclusive */ +	pgoff_t done_index; +	int cycled; +	int range_whole = 0; +	int tag; +	int step = 0; + +	pagevec_init(&pvec, 0); +next: +	if (wbc->range_cyclic) { +		writeback_index = mapping->writeback_index; /* prev offset */ +		index = writeback_index; +		if (index == 0) +			cycled = 1; +		else +			cycled = 0; +		end = -1; +	} else { +		index = wbc->range_start >> PAGE_CACHE_SHIFT; +		end = wbc->range_end >> PAGE_CACHE_SHIFT; +		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) +			range_whole = 1; +		cycled = 1; /* ignore range_cyclic tests */ +	} +	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) +		tag = PAGECACHE_TAG_TOWRITE; +	else +		tag = PAGECACHE_TAG_DIRTY; +retry: +	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) +		tag_pages_for_writeback(mapping, index, end); +	done_index = index; +	while (!done && (index <= end)) { +		int i; + +		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, +			      min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1); +		if (nr_pages == 0) +			break; + +		for (i = 0; i < nr_pages; i++) { +			struct page *page = pvec.pages[i]; + +			if (page->index > end) { +				done = 1; +				break; +			} + +			done_index = page->index; + +			lock_page(page); + +			if (unlikely(page->mapping != mapping)) { +continue_unlock: +				unlock_page(page); +				continue; +			} + +			if (!PageDirty(page)) { +				/* someone wrote it for us */ +				goto continue_unlock; +			} + +			if (step == is_cold_data(page)) +				goto continue_unlock; + +			if (PageWriteback(page)) { +				if (wbc->sync_mode != WB_SYNC_NONE) +					f2fs_wait_on_page_writeback(page, DATA); +				else +					goto continue_unlock; +			} + +			BUG_ON(PageWriteback(page)); +			if (!clear_page_dirty_for_io(page)) +				goto continue_unlock; + +			ret = (*writepage)(page, wbc, data); +			if (unlikely(ret)) { +				if (ret == AOP_WRITEPAGE_ACTIVATE) { +					unlock_page(page); +					ret = 0; +				} else { +					done_index = page->index + 1; +					done = 1; +					break; +				} +			} + +			if (--wbc->nr_to_write <= 0 && +			    wbc->sync_mode == WB_SYNC_NONE) { +				done = 1; +				break; +			} +		} +		pagevec_release(&pvec); +		cond_resched(); +	} + +	if (step < 1) { +		step++; +		goto next; +	} + +	if (!cycled && !done) { +		cycled = 1; +		index = 0; +		end = writeback_index - 1; +		goto retry; +	} +	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0)) +		mapping->writeback_index = done_index; + +	return ret; +} +  static int f2fs_write_data_pages(struct address_space *mapping,  			    struct writeback_control *wbc)  { @@ -1785,6 +1318,10 @@ static int f2fs_write_data_pages(struct address_space *mapping,  	if (!mapping->a_ops->writepage)  		return 0; +	/* skip writing if there is no dirty page in this inode */ +	if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE) +		return 0; +  	if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&  			get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&  			available_free_memory(sbi, DIRTY_DENTS)) @@ -1800,12 +1337,11 @@ static int f2fs_write_data_pages(struct address_space *mapping,  		mutex_lock(&sbi->writepages);  		locked = true;  	} -	ret = write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); +	ret = f2fs_write_cache_pages(mapping, wbc, __f2fs_writepage, mapping); +	f2fs_submit_merged_bio(sbi, DATA, WRITE);  	if (locked)  		mutex_unlock(&sbi->writepages); -	f2fs_submit_merged_bio(sbi, DATA, WRITE); -  	remove_dirty_dir_inode(inode);  	wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff); @@ -1832,7 +1368,8 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,  {  	struct inode *inode = mapping->host;  	struct f2fs_sb_info *sbi = F2FS_I_SB(inode); -	struct page *page, *ipage; +	struct page *page = NULL; +	struct page *ipage;  	pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;  	struct dnode_of_data dn;  	int err = 0; @@ -1882,25 +1419,28 @@ repeat:  		if (err)  			goto put_fail;  	} -	err = f2fs_reserve_block(&dn, index); + +	err = f2fs_get_block(&dn, index);  	if (err)  		goto put_fail;  put_next:  	f2fs_put_dnode(&dn);  	f2fs_unlock_op(sbi); -	if ((len == PAGE_CACHE_SIZE) || PageUptodate(page)) -		return 0; -  	f2fs_wait_on_page_writeback(page, DATA); +	if (len == PAGE_CACHE_SIZE) +		goto out_update; +	if (PageUptodate(page)) +		goto out_clear; +  	if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {  		unsigned start = pos & (PAGE_CACHE_SIZE - 1);  		unsigned end = start + len;  		/* Reading beyond i_size is simple: memset to zero */  		zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE); -		goto out; +		goto out_update;  	}  	if (dn.data_blkaddr == NEW_ADDR) { @@ -1920,7 +1460,6 @@ put_next:  		lock_page(page);  		if (unlikely(!PageUptodate(page))) { -			f2fs_put_page(page, 1);  			err = -EIO;  			goto fail;  		} @@ -1932,14 +1471,13 @@ put_next:  		/* avoid symlink page */  		if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {  			err = f2fs_decrypt_one(inode, page); -			if (err) { -				f2fs_put_page(page, 1); +			if (err)  				goto fail; -			}  		}  	} -out: +out_update:  	SetPageUptodate(page); +out_clear:  	clear_cold_data(page);  	return 0; @@ -1947,8 +1485,8 @@ put_fail:  	f2fs_put_dnode(&dn);  unlock_fail:  	f2fs_unlock_op(sbi); -	f2fs_put_page(page, 1);  fail: +	f2fs_put_page(page, 1);  	f2fs_write_failed(mapping, pos + len);  	return err;  } @@ -1979,9 +1517,6 @@ static int check_direct_IO(struct inode *inode, struct iov_iter *iter,  {  	unsigned blocksize_mask = inode->i_sb->s_blocksize - 1; -	if (iov_iter_rw(iter) == READ) -		return 0; -  	if (offset & blocksize_mask)  		return -EINVAL; @@ -2010,15 +1545,16 @@ static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,  	if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))  		return 0; -	if (check_direct_IO(inode, iter, offset)) -		return 0; +	err = check_direct_IO(inode, iter, offset); +	if (err) +		return err;  	trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));  	if (iov_iter_rw(iter) == WRITE)  		__allocate_data_blocks(inode, offset, count); -	err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block); +	err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio);  	if (err < 0 && iov_iter_rw(iter) == WRITE)  		f2fs_write_failed(mapping, offset + count); @@ -2045,6 +1581,11 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset,  		else  			inode_dec_dirty_pages(inode);  	} + +	/* This is atomic written page, keep Private */ +	if (IS_ATOMIC_WRITTEN_PAGE(page)) +		return; +  	ClearPagePrivate(page);  } @@ -2054,6 +1595,10 @@ int f2fs_release_page(struct page *page, gfp_t wait)  	if (PageDirty(page))  		return 0; +	/* This is atomic written page, keep Private */ +	if (IS_ATOMIC_WRITTEN_PAGE(page)) +		return 0; +  	ClearPagePrivate(page);  	return 1;  } @@ -2068,12 +1613,17 @@ static int f2fs_set_data_page_dirty(struct page *page)  	SetPageUptodate(page);  	if (f2fs_is_atomic_file(inode)) { -		register_inmem_page(inode, page); -		return 1; +		if (!IS_ATOMIC_WRITTEN_PAGE(page)) { +			register_inmem_page(inode, page); +			return 1; +		} +		/* +		 * Previously, this page has been registered, we just +		 * return here. +		 */ +		return 0;  	} -	mark_inode_dirty(inode); -  	if (!PageDirty(page)) {  		__set_page_dirty_nobuffers(page);  		update_dirty_page(inode, page); @@ -2092,38 +1642,7 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)  		if (err)  			return err;  	} -	return generic_block_bmap(mapping, block, get_data_block); -} - -void init_extent_cache_info(struct f2fs_sb_info *sbi) -{ -	INIT_RADIX_TREE(&sbi->extent_tree_root, GFP_NOIO); -	init_rwsem(&sbi->extent_tree_lock); -	INIT_LIST_HEAD(&sbi->extent_list); -	spin_lock_init(&sbi->extent_lock); -	sbi->total_ext_tree = 0; -	atomic_set(&sbi->total_ext_node, 0); -} - -int __init create_extent_cache(void) -{ -	extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree", -			sizeof(struct extent_tree)); -	if (!extent_tree_slab) -		return -ENOMEM; -	extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node", -			sizeof(struct extent_node)); -	if (!extent_node_slab) { -		kmem_cache_destroy(extent_tree_slab); -		return -ENOMEM; -	} -	return 0; -} - -void destroy_extent_cache(void) -{ -	kmem_cache_destroy(extent_node_slab); -	kmem_cache_destroy(extent_tree_slab); +	return generic_block_bmap(mapping, block, get_data_block_bmap);  }  const struct address_space_operations f2fs_dblock_aops = {  |