diff options
Diffstat (limited to 'mm/filemap.c')
| -rw-r--r-- | mm/filemap.c | 181 | 
1 files changed, 149 insertions, 32 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index d1458ecf2f51..920e8dc03251 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -76,8 +76,9 @@   *      ->swap_lock		(exclusive_swap_page, others)   *        ->i_pages lock   * - *  ->i_mutex - *    ->i_mmap_rwsem		(truncate->unmap_mapping_range) + *  ->i_rwsem + *    ->invalidate_lock		(acquired by fs in truncate path) + *      ->i_mmap_rwsem		(truncate->unmap_mapping_range)   *   *  ->mmap_lock   *    ->i_mmap_rwsem @@ -85,9 +86,10 @@   *        ->i_pages lock	(arch-dependent flush_dcache_mmap_lock)   *   *  ->mmap_lock - *    ->lock_page		(access_process_vm) + *    ->invalidate_lock		(filemap_fault) + *      ->lock_page		(filemap_fault, access_process_vm)   * - *  ->i_mutex			(generic_perform_write) + *  ->i_rwsem			(generic_perform_write)   *    ->mmap_lock		(fault_in_pages_readable->do_page_fault)   *   *  bdi->wb.list_lock @@ -378,6 +380,32 @@ static int filemap_check_and_keep_errors(struct address_space *mapping)  }  /** + * filemap_fdatawrite_wbc - start writeback on mapping dirty pages in range + * @mapping:	address space structure to write + * @wbc:	the writeback_control controlling the writeout + * + * Call writepages on the mapping using the provided wbc to control the + * writeout. + * + * Return: %0 on success, negative error code otherwise. + */ +int filemap_fdatawrite_wbc(struct address_space *mapping, +			   struct writeback_control *wbc) +{ +	int ret; + +	if (!mapping_can_writeback(mapping) || +	    !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) +		return 0; + +	wbc_attach_fdatawrite_inode(wbc, mapping->host); +	ret = do_writepages(mapping, wbc); +	wbc_detach_inode(wbc); +	return ret; +} +EXPORT_SYMBOL(filemap_fdatawrite_wbc); + +/**   * __filemap_fdatawrite_range - start writeback on mapping dirty pages in range   * @mapping:	address space structure to write   * @start:	offset in bytes where the range starts @@ -397,7 +425,6 @@ static int filemap_check_and_keep_errors(struct address_space *mapping)  int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,  				loff_t end, int sync_mode)  { -	int ret;  	struct writeback_control wbc = {  		.sync_mode = sync_mode,  		.nr_to_write = LONG_MAX, @@ -405,14 +432,7 @@ int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,  		.range_end = end,  	}; -	if (!mapping_can_writeback(mapping) || -	    !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) -		return 0; - -	wbc_attach_fdatawrite_inode(&wbc, mapping->host); -	ret = do_writepages(mapping, &wbc); -	wbc_detach_inode(&wbc); -	return ret; +	return filemap_fdatawrite_wbc(mapping, &wbc);  }  static inline int __filemap_fdatawrite(struct address_space *mapping, @@ -1008,6 +1028,44 @@ EXPORT_SYMBOL(__page_cache_alloc);  #endif  /* + * filemap_invalidate_lock_two - lock invalidate_lock for two mappings + * + * Lock exclusively invalidate_lock of any passed mapping that is not NULL. + * + * @mapping1: the first mapping to lock + * @mapping2: the second mapping to lock + */ +void filemap_invalidate_lock_two(struct address_space *mapping1, +				 struct address_space *mapping2) +{ +	if (mapping1 > mapping2) +		swap(mapping1, mapping2); +	if (mapping1) +		down_write(&mapping1->invalidate_lock); +	if (mapping2 && mapping1 != mapping2) +		down_write_nested(&mapping2->invalidate_lock, 1); +} +EXPORT_SYMBOL(filemap_invalidate_lock_two); + +/* + * filemap_invalidate_unlock_two - unlock invalidate_lock for two mappings + * + * Unlock exclusive invalidate_lock of any passed mapping that is not NULL. + * + * @mapping1: the first mapping to unlock + * @mapping2: the second mapping to unlock + */ +void filemap_invalidate_unlock_two(struct address_space *mapping1, +				   struct address_space *mapping2) +{ +	if (mapping1) +		up_write(&mapping1->invalidate_lock); +	if (mapping2 && mapping1 != mapping2) +		up_write(&mapping2->invalidate_lock); +} +EXPORT_SYMBOL(filemap_invalidate_unlock_two); + +/*   * In order to wait for pages to become available there must be   * waitqueues associated with pages. By using a hash table of   * waitqueues where the bucket discipline is to maintain all @@ -2368,20 +2426,30 @@ static int filemap_update_page(struct kiocb *iocb,  {  	int error; +	if (iocb->ki_flags & IOCB_NOWAIT) { +		if (!filemap_invalidate_trylock_shared(mapping)) +			return -EAGAIN; +	} else { +		filemap_invalidate_lock_shared(mapping); +	} +  	if (!trylock_page(page)) { +		error = -EAGAIN;  		if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_NOIO)) -			return -EAGAIN; +			goto unlock_mapping;  		if (!(iocb->ki_flags & IOCB_WAITQ)) { +			filemap_invalidate_unlock_shared(mapping);  			put_and_wait_on_page_locked(page, TASK_KILLABLE);  			return AOP_TRUNCATED_PAGE;  		}  		error = __lock_page_async(page, iocb->ki_waitq);  		if (error) -			return error; +			goto unlock_mapping;  	} +	error = AOP_TRUNCATED_PAGE;  	if (!page->mapping) -		goto truncated; +		goto unlock;  	error = 0;  	if (filemap_range_uptodate(mapping, iocb->ki_pos, iter, page)) @@ -2392,15 +2460,13 @@ static int filemap_update_page(struct kiocb *iocb,  		goto unlock;  	error = filemap_read_page(iocb->ki_filp, mapping, page); -	if (error == AOP_TRUNCATED_PAGE) -		put_page(page); -	return error; -truncated: -	unlock_page(page); -	put_page(page); -	return AOP_TRUNCATED_PAGE; +	goto unlock_mapping;  unlock:  	unlock_page(page); +unlock_mapping: +	filemap_invalidate_unlock_shared(mapping); +	if (error == AOP_TRUNCATED_PAGE) +		put_page(page);  	return error;  } @@ -2415,6 +2481,19 @@ static int filemap_create_page(struct file *file,  	if (!page)  		return -ENOMEM; +	/* +	 * Protect against truncate / hole punch. Grabbing invalidate_lock here +	 * assures we cannot instantiate and bring uptodate new pagecache pages +	 * after evicting page cache during truncate and before actually +	 * freeing blocks.  Note that we could release invalidate_lock after +	 * inserting the page into page cache as the locked page would then be +	 * enough to synchronize with hole punching. But there are code paths +	 * such as filemap_update_page() filling in partially uptodate pages or +	 * ->readpages() that need to hold invalidate_lock while mapping blocks +	 * for IO so let's hold the lock here as well to keep locking rules +	 * simple. +	 */ +	filemap_invalidate_lock_shared(mapping);  	error = add_to_page_cache_lru(page, mapping, index,  			mapping_gfp_constraint(mapping, GFP_KERNEL));  	if (error == -EEXIST) @@ -2426,9 +2505,11 @@ static int filemap_create_page(struct file *file,  	if (error)  		goto error; +	filemap_invalidate_unlock_shared(mapping);  	pagevec_add(pvec, page);  	return 0;  error: +	filemap_invalidate_unlock_shared(mapping);  	put_page(page);  	return error;  } @@ -2967,6 +3048,7 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)  	pgoff_t max_off;  	struct page *page;  	vm_fault_t ret = 0; +	bool mapping_locked = false;  	max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);  	if (unlikely(offset >= max_off)) @@ -2976,25 +3058,39 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)  	 * Do we have something in the page cache already?  	 */  	page = find_get_page(mapping, offset); -	if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) { +	if (likely(page)) {  		/* -		 * We found the page, so try async readahead before -		 * waiting for the lock. +		 * We found the page, so try async readahead before waiting for +		 * the lock.  		 */ -		fpin = do_async_mmap_readahead(vmf, page); -	} else if (!page) { +		if (!(vmf->flags & FAULT_FLAG_TRIED)) +			fpin = do_async_mmap_readahead(vmf, page); +		if (unlikely(!PageUptodate(page))) { +			filemap_invalidate_lock_shared(mapping); +			mapping_locked = true; +		} +	} else {  		/* No page in the page cache at all */  		count_vm_event(PGMAJFAULT);  		count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);  		ret = VM_FAULT_MAJOR;  		fpin = do_sync_mmap_readahead(vmf);  retry_find: +		/* +		 * See comment in filemap_create_page() why we need +		 * invalidate_lock +		 */ +		if (!mapping_locked) { +			filemap_invalidate_lock_shared(mapping); +			mapping_locked = true; +		}  		page = pagecache_get_page(mapping, offset,  					  FGP_CREAT|FGP_FOR_MMAP,  					  vmf->gfp_mask);  		if (!page) {  			if (fpin)  				goto out_retry; +			filemap_invalidate_unlock_shared(mapping);  			return VM_FAULT_OOM;  		}  	} @@ -3014,8 +3110,20 @@ retry_find:  	 * We have a locked page in the page cache, now we need to check  	 * that it's up-to-date. If not, it is going to be due to an error.  	 */ -	if (unlikely(!PageUptodate(page))) +	if (unlikely(!PageUptodate(page))) { +		/* +		 * The page was in cache and uptodate and now it is not. +		 * Strange but possible since we didn't hold the page lock all +		 * the time. Let's drop everything get the invalidate lock and +		 * try again. +		 */ +		if (!mapping_locked) { +			unlock_page(page); +			put_page(page); +			goto retry_find; +		}  		goto page_not_uptodate; +	}  	/*  	 * We've made it this far and we had to drop our mmap_lock, now is the @@ -3026,6 +3134,8 @@ retry_find:  		unlock_page(page);  		goto out_retry;  	} +	if (mapping_locked) +		filemap_invalidate_unlock_shared(mapping);  	/*  	 * Found the page and have a reference on it. @@ -3056,6 +3166,7 @@ page_not_uptodate:  	if (!error || error == AOP_TRUNCATED_PAGE)  		goto retry_find; +	filemap_invalidate_unlock_shared(mapping);  	return VM_FAULT_SIGBUS; @@ -3067,6 +3178,8 @@ out_retry:  	 */  	if (page)  		put_page(page); +	if (mapping_locked) +		filemap_invalidate_unlock_shared(mapping);  	if (fpin)  		fput(fpin);  	return ret | VM_FAULT_RETRY; @@ -3437,6 +3550,8 @@ out:   *   * If the page does not get brought uptodate, return -EIO.   * + * The function expects mapping->invalidate_lock to be already held. + *   * Return: up to date page on success, ERR_PTR() on failure.   */  struct page *read_cache_page(struct address_space *mapping, @@ -3460,6 +3575,8 @@ EXPORT_SYMBOL(read_cache_page);   *   * If the page does not get brought uptodate, return -EIO.   * + * The function expects mapping->invalidate_lock to be already held. + *   * Return: up to date page on success, ERR_PTR() on failure.   */  struct page *read_cache_page_gfp(struct address_space *mapping, @@ -3704,12 +3821,12 @@ EXPORT_SYMBOL(generic_perform_write);   * modification times and calls proper subroutines depending on whether we   * do direct IO or a standard buffered write.   * - * It expects i_mutex to be grabbed unless we work on a block device or similar + * It expects i_rwsem to be grabbed unless we work on a block device or similar   * object which does not need locking at all.   *   * This function does *not* take care of syncing data in case of O_SYNC write.   * A caller has to handle it. This is mainly due to the fact that we want to - * avoid syncing under i_mutex. + * avoid syncing under i_rwsem.   *   * Return:   * * number of bytes written, even for truncated writes @@ -3797,7 +3914,7 @@ EXPORT_SYMBOL(__generic_file_write_iter);   *   * This is a wrapper around __generic_file_write_iter() to be used by most   * filesystems. It takes care of syncing the file in case of O_SYNC file - * and acquires i_mutex as needed. + * and acquires i_rwsem as needed.   * Return:   * * negative error code if no data has been written at all of   *   vfs_fsync_range() failed for a synchronous write  |