diff options
Diffstat (limited to 'fs/ext4/inode.c')
| -rw-r--r-- | fs/ext4/inode.c | 176 | 
1 files changed, 59 insertions, 117 deletions
| diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index d18852d6029c..0f06305167d5 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1284,22 +1284,14 @@ static int ext4_write_end(struct file *file,  	loff_t old_size = inode->i_size;  	int ret = 0, ret2;  	int i_size_changed = 0; -	int inline_data = ext4_has_inline_data(inode);  	bool verity = ext4_verity_in_progress(inode);  	trace_ext4_write_end(inode, pos, len, copied); -	if (inline_data) { -		ret = ext4_write_inline_data_end(inode, pos, len, -						 copied, page); -		if (ret < 0) { -			unlock_page(page); -			put_page(page); -			goto errout; -		} -		copied = ret; -	} else -		copied = block_write_end(file, mapping, pos, -					 len, copied, page, fsdata); + +	if (ext4_has_inline_data(inode)) +		return ext4_write_inline_data_end(inode, pos, len, copied, page); + +	copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);  	/*  	 * it's important to update i_size while still holding page lock:  	 * page writeout could otherwise come in and zero beyond i_size. @@ -1320,7 +1312,7 @@ static int ext4_write_end(struct file *file,  	 * ordering of page lock and transaction start for journaling  	 * filesystems.  	 */ -	if (i_size_changed || inline_data) +	if (i_size_changed)  		ret = ext4_mark_inode_dirty(handle, inode);  	if (pos + len > inode->i_size && !verity && ext4_can_truncate(inode)) @@ -1329,7 +1321,7 @@ static int ext4_write_end(struct file *file,  		 * inode->i_size. So truncate them  		 */  		ext4_orphan_add(handle, inode); -errout: +  	ret2 = ext4_journal_stop(handle);  	if (!ret)  		ret = ret2; @@ -1395,7 +1387,6 @@ static int ext4_journalled_write_end(struct file *file,  	int partial = 0;  	unsigned from, to;  	int size_changed = 0; -	int inline_data = ext4_has_inline_data(inode);  	bool verity = ext4_verity_in_progress(inode);  	trace_ext4_journalled_write_end(inode, pos, len, copied); @@ -1404,16 +1395,10 @@ static int ext4_journalled_write_end(struct file *file,  	BUG_ON(!ext4_handle_valid(handle)); -	if (inline_data) { -		ret = ext4_write_inline_data_end(inode, pos, len, -						 copied, page); -		if (ret < 0) { -			unlock_page(page); -			put_page(page); -			goto errout; -		} -		copied = ret; -	} else if (unlikely(copied < len) && !PageUptodate(page)) { +	if (ext4_has_inline_data(inode)) +		return ext4_write_inline_data_end(inode, pos, len, copied, page); + +	if (unlikely(copied < len) && !PageUptodate(page)) {  		copied = 0;  		ext4_journalled_zero_new_buffers(handle, inode, page, from, to);  	} else { @@ -1436,7 +1421,7 @@ static int ext4_journalled_write_end(struct file *file,  	if (old_size < pos && !verity)  		pagecache_isize_extended(inode, old_size, pos); -	if (size_changed || inline_data) { +	if (size_changed) {  		ret2 = ext4_mark_inode_dirty(handle, inode);  		if (!ret)  			ret = ret2; @@ -1449,7 +1434,6 @@ static int ext4_journalled_write_end(struct file *file,  		 */  		ext4_orphan_add(handle, inode); -errout:  	ret2 = ext4_journal_stop(handle);  	if (!ret)  		ret = ret2; @@ -1644,6 +1628,7 @@ static int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk)  	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);  	int ret;  	bool allocated = false; +	bool reserved = false;  	/*  	 * If the cluster containing lblk is shared with a delayed, @@ -1660,6 +1645,7 @@ static int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk)  		ret = ext4_da_reserve_space(inode);  		if (ret != 0)   /* ENOSPC */  			goto errout; +		reserved = true;  	} else {   /* bigalloc */  		if (!ext4_es_scan_clu(inode, &ext4_es_is_delonly, lblk)) {  			if (!ext4_es_scan_clu(inode, @@ -1672,6 +1658,7 @@ static int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk)  					ret = ext4_da_reserve_space(inode);  					if (ret != 0)   /* ENOSPC */  						goto errout; +					reserved = true;  				} else {  					allocated = true;  				} @@ -1682,6 +1669,8 @@ static int ext4_insert_delayed_block(struct inode *inode, ext4_lblk_t lblk)  	}  	ret = ext4_es_insert_delayed_block(inode, lblk, allocated); +	if (ret && reserved) +		ext4_da_release_space(inode, 1);  errout:  	return ret; @@ -1722,13 +1711,16 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,  		}  		/* -		 * Delayed extent could be allocated by fallocate. -		 * So we need to check it. +		 * the buffer head associated with a delayed and not unwritten +		 * block found in the extent status cache must contain an +		 * invalid block number and have its BH_New and BH_Delay bits +		 * set, reflecting the state assigned when the block was +		 * initially delayed allocated  		 */ -		if (ext4_es_is_delayed(&es) && !ext4_es_is_unwritten(&es)) { -			map_bh(bh, inode->i_sb, invalid_block); -			set_buffer_new(bh); -			set_buffer_delay(bh); +		if (ext4_es_is_delonly(&es)) { +			BUG_ON(bh->b_blocknr != invalid_block); +			BUG_ON(!buffer_new(bh)); +			BUG_ON(!buffer_delay(bh));  			return 0;  		} @@ -2932,19 +2924,6 @@ static int ext4_nonda_switch(struct super_block *sb)  	return 0;  } -/* We always reserve for an inode update; the superblock could be there too */ -static int ext4_da_write_credits(struct inode *inode, loff_t pos, unsigned len) -{ -	if (likely(ext4_has_feature_large_file(inode->i_sb))) -		return 1; - -	if (pos + len <= 0x7fffffffULL) -		return 1; - -	/* We might need to update the superblock to set LARGE_FILE */ -	return 2; -} -  static int ext4_da_write_begin(struct file *file, struct address_space *mapping,  			       loff_t pos, unsigned len, unsigned flags,  			       struct page **pagep, void **fsdata) @@ -2953,7 +2932,6 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,  	struct page *page;  	pgoff_t index;  	struct inode *inode = mapping->host; -	handle_t *handle;  	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))  		return -EIO; @@ -2979,41 +2957,11 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,  			return 0;  	} -	/* -	 * grab_cache_page_write_begin() can take a long time if the -	 * system is thrashing due to memory pressure, or if the page -	 * is being written back.  So grab it first before we start -	 * the transaction handle.  This also allows us to allocate -	 * the page (if needed) without using GFP_NOFS. -	 */ -retry_grab: +retry:  	page = grab_cache_page_write_begin(mapping, index, flags);  	if (!page)  		return -ENOMEM; -	unlock_page(page); -	/* -	 * With delayed allocation, we don't log the i_disksize update -	 * if there is delayed block allocation. But we still need -	 * to journalling the i_disksize update if writes to the end -	 * of file which has an already mapped buffer. -	 */ -retry_journal: -	handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, -				ext4_da_write_credits(inode, pos, len)); -	if (IS_ERR(handle)) { -		put_page(page); -		return PTR_ERR(handle); -	} - -	lock_page(page); -	if (page->mapping != mapping) { -		/* The page got truncated from under us */ -		unlock_page(page); -		put_page(page); -		ext4_journal_stop(handle); -		goto retry_grab; -	}  	/* In case writeback began while the page was unlocked */  	wait_for_stable_page(page); @@ -3025,20 +2973,18 @@ retry_journal:  #endif  	if (ret < 0) {  		unlock_page(page); -		ext4_journal_stop(handle); +		put_page(page);  		/*  		 * block_write_begin may have instantiated a few blocks  		 * outside i_size.  Trim these off again. Don't need -		 * i_size_read because we hold i_mutex. +		 * i_size_read because we hold inode lock.  		 */  		if (pos + len > inode->i_size)  			ext4_truncate_failed_write(inode);  		if (ret == -ENOSPC &&  		    ext4_should_retry_alloc(inode->i_sb, &retries)) -			goto retry_journal; - -		put_page(page); +			goto retry;  		return ret;  	} @@ -3075,8 +3021,6 @@ static int ext4_da_write_end(struct file *file,  			     struct page *page, void *fsdata)  {  	struct inode *inode = mapping->host; -	int ret = 0, ret2; -	handle_t *handle = ext4_journal_current_handle();  	loff_t new_i_size;  	unsigned long start, end;  	int write_mode = (int)(unsigned long)fsdata; @@ -3086,44 +3030,36 @@ static int ext4_da_write_end(struct file *file,  				      len, copied, page, fsdata);  	trace_ext4_da_write_end(inode, pos, len, copied); + +	if (write_mode != CONVERT_INLINE_DATA && +	    ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) && +	    ext4_has_inline_data(inode)) +		return ext4_write_inline_data_end(inode, pos, len, copied, page); +  	start = pos & (PAGE_SIZE - 1);  	end = start + copied - 1;  	/* -	 * generic_write_end() will run mark_inode_dirty() if i_size -	 * changes.  So let's piggyback the i_disksize mark_inode_dirty -	 * into that. +	 * Since we are holding inode lock, we are sure i_disksize <= +	 * i_size. We also know that if i_disksize < i_size, there are +	 * delalloc writes pending in the range upto i_size. If the end of +	 * the current write is <= i_size, there's no need to touch +	 * i_disksize since writeback will push i_disksize upto i_size +	 * eventually. If the end of the current write is > i_size and +	 * inside an allocated block (ext4_da_should_update_i_disksize() +	 * check), we need to update i_disksize here as neither +	 * ext4_writepage() nor certain ext4_writepages() paths not +	 * allocating blocks update i_disksize. +	 * +	 * Note that we defer inode dirtying to generic_write_end() / +	 * ext4_da_write_inline_data_end().  	 */  	new_i_size = pos + copied; -	if (copied && new_i_size > EXT4_I(inode)->i_disksize) { -		if (ext4_has_inline_data(inode) || -		    ext4_da_should_update_i_disksize(page, end)) { -			ext4_update_i_disksize(inode, new_i_size); -			/* We need to mark inode dirty even if -			 * new_i_size is less that inode->i_size -			 * bu greater than i_disksize.(hint delalloc) -			 */ -			ret = ext4_mark_inode_dirty(handle, inode); -		} -	} +	if (copied && new_i_size > inode->i_size && +	    ext4_da_should_update_i_disksize(page, end)) +		ext4_update_i_disksize(inode, new_i_size); -	if (write_mode != CONVERT_INLINE_DATA && -	    ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA) && -	    ext4_has_inline_data(inode)) -		ret2 = ext4_da_write_inline_data_end(inode, pos, len, copied, -						     page); -	else -		ret2 = generic_write_end(file, mapping, pos, len, copied, -							page, fsdata); - -	copied = ret2; -	if (ret2 < 0) -		ret = ret2; -	ret2 = ext4_journal_stop(handle); -	if (unlikely(ret2 && !ret)) -		ret = ret2; - -	return ret ? ret : copied; +	return generic_write_end(file, mapping, pos, len, copied, page, fsdata);  }  /* @@ -4340,6 +4276,12 @@ static int __ext4_get_inode_loc(struct super_block *sb, unsigned long ino,  		goto has_buffer;  	lock_buffer(bh); +	if (ext4_buffer_uptodate(bh)) { +		/* Someone brought it uptodate while we waited */ +		unlock_buffer(bh); +		goto has_buffer; +	} +  	/*  	 * If we have all information of the inode in memory and this  	 * is the only valid inode in the block, we need not read the |