diff options
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 183 |
1 files changed, 134 insertions, 49 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 43700480d897..66f7e9fdfbc4 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -142,17 +142,6 @@ static void page_cache_delete(struct address_space *mapping, page->mapping = NULL; /* Leave page->index set: truncation lookup relies upon it */ - - if (shadow) { - mapping->nrexceptional += nr; - /* - * Make sure the nrexceptional update is committed before - * the nrpages update so that final truncate racing - * with reclaim does not see both counters 0 at the - * same time and miss a shadow entry. - */ - smp_wmb(); - } mapping->nrpages -= nr; } @@ -629,13 +618,53 @@ EXPORT_SYMBOL(filemap_fdatawait_keep_errors); /* Returns true if writeback might be needed or already in progress. */ static bool mapping_needs_writeback(struct address_space *mapping) { - if (dax_mapping(mapping)) - return mapping->nrexceptional; - return mapping->nrpages; } /** + * filemap_range_needs_writeback - check if range potentially needs writeback + * @mapping: address space within which to check + * @start_byte: offset in bytes where the range starts + * @end_byte: offset in bytes where the range ends (inclusive) + * + * Find at least one page in the range supplied, usually used to check if + * direct writing in this range will trigger a writeback. Used by O_DIRECT + * read/write with IOCB_NOWAIT, to see if the caller needs to do + * filemap_write_and_wait_range() before proceeding. + * + * Return: %true if the caller should do filemap_write_and_wait_range() before + * doing O_DIRECT to a page in this range, %false otherwise. + */ +bool filemap_range_needs_writeback(struct address_space *mapping, + loff_t start_byte, loff_t end_byte) +{ + XA_STATE(xas, &mapping->i_pages, start_byte >> PAGE_SHIFT); + pgoff_t max = end_byte >> PAGE_SHIFT; + struct page *page; + + if (!mapping_needs_writeback(mapping)) + return false; + if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) && + !mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) + return false; + if (end_byte < start_byte) + return false; + + rcu_read_lock(); + xas_for_each(&xas, page, max) { + if (xas_retry(&xas, page)) + continue; + if (xa_is_value(page)) + continue; + if (PageDirty(page) || PageLocked(page) || PageWriteback(page)) + break; + } + rcu_read_unlock(); + return page != NULL; +} +EXPORT_SYMBOL_GPL(filemap_range_needs_writeback); + +/** * filemap_write_and_wait_range - write out & wait on a file range * @mapping: the address_space for the pages * @lstart: offset in bytes where the range starts @@ -882,8 +911,6 @@ noinline int __add_to_page_cache_locked(struct page *page, if (xas_error(&xas)) goto unlock; - if (old) - mapping->nrexceptional--; mapping->nrpages++; /* hugetlb pages do not participate in page cache accounting */ @@ -1433,6 +1460,67 @@ void unlock_page(struct page *page) EXPORT_SYMBOL(unlock_page); /** + * end_page_private_2 - Clear PG_private_2 and release any waiters + * @page: The page + * + * Clear the PG_private_2 bit on a page and wake up any sleepers waiting for + * this. The page ref held for PG_private_2 being set is released. + * + * This is, for example, used when a netfs page is being written to a local + * disk cache, thereby allowing writes to the cache for the same page to be + * serialised. + */ +void end_page_private_2(struct page *page) +{ + page = compound_head(page); + VM_BUG_ON_PAGE(!PagePrivate2(page), page); + clear_bit_unlock(PG_private_2, &page->flags); + wake_up_page_bit(page, PG_private_2); + put_page(page); +} +EXPORT_SYMBOL(end_page_private_2); + +/** + * wait_on_page_private_2 - Wait for PG_private_2 to be cleared on a page + * @page: The page to wait on + * + * Wait for PG_private_2 (aka PG_fscache) to be cleared on a page. + */ +void wait_on_page_private_2(struct page *page) +{ + page = compound_head(page); + while (PagePrivate2(page)) + wait_on_page_bit(page, PG_private_2); +} +EXPORT_SYMBOL(wait_on_page_private_2); + +/** + * wait_on_page_private_2_killable - Wait for PG_private_2 to be cleared on a page + * @page: The page to wait on + * + * Wait for PG_private_2 (aka PG_fscache) to be cleared on a page or until a + * fatal signal is received by the calling task. + * + * Return: + * - 0 if successful. + * - -EINTR if a fatal signal was encountered. + */ +int wait_on_page_private_2_killable(struct page *page) +{ + int ret = 0; + + page = compound_head(page); + while (PagePrivate2(page)) { + ret = wait_on_page_bit_killable(page, PG_private_2); + if (ret < 0) + break; + } + + return ret; +} +EXPORT_SYMBOL(wait_on_page_private_2_killable); + +/** * end_page_writeback - end writeback against a page * @page: the page */ @@ -1663,7 +1751,7 @@ EXPORT_SYMBOL(page_cache_prev_miss); * @mapping: the address_space to search * @index: The page cache index. * - * Looks up the page cache slot at @mapping & @offset. If there is a + * Looks up the page cache slot at @mapping & @index. If there is a * page cache page, the head page is returned with an increased refcount. * * If the slot holds a shadow entry of a previously evicted page, or a @@ -1969,8 +2057,14 @@ unlock: put: put_page(page); next: - if (!xa_is_value(page) && PageTransHuge(page)) - xas_set(&xas, page->index + thp_nr_pages(page)); + if (!xa_is_value(page) && PageTransHuge(page)) { + unsigned int nr_pages = thp_nr_pages(page); + + /* Final THP may cross MAX_LFS_FILESIZE on 32-bit */ + xas_set(&xas, page->index + nr_pages); + if (xas.xa_index < nr_pages) + break; + } } rcu_read_unlock(); @@ -2238,8 +2332,6 @@ static int filemap_read_page(struct file *file, struct address_space *mapping, return error; if (PageUptodate(page)) return 0; - if (!page->mapping) /* page truncated */ - return AOP_TRUNCATED_PAGE; shrink_readahead_size_eio(&file->f_ra); return -EIO; } @@ -2571,8 +2663,8 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) size = i_size_read(inode); if (iocb->ki_flags & IOCB_NOWAIT) { - if (filemap_range_has_page(mapping, iocb->ki_pos, - iocb->ki_pos + count - 1)) + if (filemap_range_needs_writeback(mapping, iocb->ki_pos, + iocb->ki_pos + count - 1)) return -EAGAIN; } else { retval = filemap_write_and_wait_range(mapping, @@ -2663,7 +2755,7 @@ unsigned int seek_page_size(struct xa_state *xas, struct page *page) * entirely memory-based such as tmpfs, and filesystems which support * unwritten extents. * - * Return: The requested offset on successs, or -ENXIO if @whence specifies + * Return: The requested offset on success, or -ENXIO if @whence specifies * SEEK_DATA and there is no data after @start. There is an implicit hole * after @end - 1, so SEEK_HOLE returns @end if all the bytes between @start * and @end contain data. @@ -2672,7 +2764,7 @@ loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start, loff_t end, int whence) { XA_STATE(xas, &mapping->i_pages, start >> PAGE_SHIFT); - pgoff_t max = (end - 1) / PAGE_SIZE; + pgoff_t max = (end - 1) >> PAGE_SHIFT; bool seek_data = (whence == SEEK_DATA); struct page *page; @@ -2681,7 +2773,8 @@ loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start, rcu_read_lock(); while ((page = find_get_entry(&xas, max, XA_PRESENT))) { - loff_t pos = xas.xa_index * PAGE_SIZE; + loff_t pos = (u64)xas.xa_index << PAGE_SHIFT; + unsigned int seek_size; if (start < pos) { if (!seek_data) @@ -2689,25 +2782,25 @@ loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start, start = pos; } - pos += seek_page_size(&xas, page); + seek_size = seek_page_size(&xas, page); + pos = round_up(pos + 1, seek_size); start = page_seek_hole_data(&xas, mapping, page, start, pos, seek_data); if (start < pos) goto unlock; + if (start >= end) + break; + if (seek_size > PAGE_SIZE) + xas_set(&xas, pos >> PAGE_SHIFT); if (!xa_is_value(page)) put_page(page); } - rcu_read_unlock(); - if (seek_data) - return -ENXIO; - goto out; - + start = -ENXIO; unlock: rcu_read_unlock(); - if (!xa_is_value(page)) + if (page && !xa_is_value(page)) put_page(page); -out: if (start > end) return end; return start; @@ -2771,7 +2864,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) struct file *file = vmf->vma->vm_file; struct file_ra_state *ra = &file->f_ra; struct address_space *mapping = file->f_mapping; - DEFINE_READAHEAD(ractl, file, mapping, vmf->pgoff); + DEFINE_READAHEAD(ractl, file, ra, mapping, vmf->pgoff); struct file *fpin = NULL; unsigned int mmap_miss; @@ -2783,7 +2876,7 @@ static struct file *do_sync_mmap_readahead(struct vm_fault *vmf) if (vmf->vma->vm_flags & VM_SEQ_READ) { fpin = maybe_unlock_mmap_for_io(vmf, fpin); - page_cache_sync_ra(&ractl, ra, ra->ra_pages); + page_cache_sync_ra(&ractl, ra->ra_pages); return fpin; } @@ -2869,7 +2962,6 @@ vm_fault_t filemap_fault(struct vm_fault *vmf) struct file *file = vmf->vma->vm_file; struct file *fpin = NULL; struct address_space *mapping = file->f_mapping; - struct file_ra_state *ra = &file->f_ra; struct inode *inode = mapping->host; pgoff_t offset = vmf->pgoff; pgoff_t max_off; @@ -2956,14 +3048,8 @@ page_not_uptodate: * because there really aren't any performance issues here * and we need to check for errors. */ - ClearPageError(page); fpin = maybe_unlock_mmap_for_io(vmf, fpin); - error = mapping->a_ops->readpage(file, page); - if (!error) { - wait_on_page_locked(page); - if (!PageUptodate(page)) - error = -EIO; - } + error = filemap_read_page(file, mapping, page); if (fpin) goto out_retry; put_page(page); @@ -2971,7 +3057,6 @@ page_not_uptodate: if (!error || error == AOP_TRUNCATED_PAGE) goto retry_find; - shrink_readahead_size_eio(ra); return VM_FAULT_SIGBUS; out_retry: @@ -3182,7 +3267,7 @@ const struct vm_operations_struct generic_file_vm_ops = { /* This is used for a general mmap of a disk file */ -int generic_file_mmap(struct file * file, struct vm_area_struct * vma) +int generic_file_mmap(struct file *file, struct vm_area_struct *vma) { struct address_space *mapping = file->f_mapping; @@ -3207,11 +3292,11 @@ vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf) { return VM_FAULT_SIGBUS; } -int generic_file_mmap(struct file * file, struct vm_area_struct * vma) +int generic_file_mmap(struct file *file, struct vm_area_struct *vma) { return -ENOSYS; } -int generic_file_readonly_mmap(struct file * file, struct vm_area_struct * vma) +int generic_file_readonly_mmap(struct file *file, struct vm_area_struct *vma) { return -ENOSYS; } @@ -3639,7 +3724,7 @@ EXPORT_SYMBOL(generic_perform_write); ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; - struct address_space * mapping = file->f_mapping; + struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; ssize_t written = 0; ssize_t err; |