diff options
Diffstat (limited to 'fs/xfs/xfs_file.c')
| -rw-r--r-- | fs/xfs/xfs_file.c | 532 |
1 files changed, 87 insertions, 445 deletions
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index bbb9eb6811b2..c4893e226fd8 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -140,7 +140,7 @@ xfs_file_fsync( trace_xfs_file_fsync(ip); - error = filemap_write_and_wait_range(inode->i_mapping, start, end); + error = file_write_and_wait_range(file, start, end); if (error) return error; @@ -237,7 +237,11 @@ xfs_file_dax_read( if (!count) return 0; /* skip atime */ - xfs_ilock(ip, XFS_IOLOCK_SHARED); + if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) { + if (iocb->ki_flags & IOCB_NOWAIT) + return -EAGAIN; + xfs_ilock(ip, XFS_IOLOCK_SHARED); + } ret = dax_iomap_rw(iocb, to, &xfs_iomap_ops); xfs_iunlock(ip, XFS_IOLOCK_SHARED); @@ -527,12 +531,25 @@ xfs_file_dio_aio_write( if ((iocb->ki_pos & mp->m_blockmask) || ((iocb->ki_pos + count) & mp->m_blockmask)) { unaligned_io = 1; + + /* + * We can't properly handle unaligned direct I/O to reflink + * files yet, as we can't unshare a partial block. + */ + if (xfs_is_reflink_inode(ip)) { + trace_xfs_reflink_bounce_dio_write(ip, iocb->ki_pos, count); + return -EREMCHG; + } iolock = XFS_IOLOCK_EXCL; } else { iolock = XFS_IOLOCK_SHARED; } - xfs_ilock(ip, iolock); + if (!xfs_ilock_nowait(ip, iolock)) { + if (iocb->ki_flags & IOCB_NOWAIT) + return -EAGAIN; + xfs_ilock(ip, iolock); + } ret = xfs_file_aio_write_checks(iocb, from, &iolock); if (ret) @@ -544,22 +561,20 @@ xfs_file_dio_aio_write( * otherwise demote the lock if we had to take the exclusive lock * for other reasons in xfs_file_aio_write_checks. */ - if (unaligned_io) - inode_dio_wait(inode); - else if (iolock == XFS_IOLOCK_EXCL) { + if (unaligned_io) { + /* If we are going to wait for other DIO to finish, bail */ + if (iocb->ki_flags & IOCB_NOWAIT) { + if (atomic_read(&inode->i_dio_count)) + return -EAGAIN; + } else { + inode_dio_wait(inode); + } + } else if (iolock == XFS_IOLOCK_EXCL) { xfs_ilock_demote(ip, XFS_IOLOCK_EXCL); iolock = XFS_IOLOCK_SHARED; } trace_xfs_file_direct_write(ip, count, iocb->ki_pos); - - /* If this is a block-aligned directio CoW, remap immediately. */ - if (xfs_is_reflink_inode(ip) && !unaligned_io) { - ret = xfs_reflink_allocate_cow_range(ip, iocb->ki_pos, count); - if (ret) - goto out; - } - ret = iomap_dio_rw(iocb, from, &xfs_iomap_ops, xfs_dio_write_end_io); out: xfs_iunlock(ip, iolock); @@ -584,7 +599,12 @@ xfs_file_dax_write( size_t count; loff_t pos; - xfs_ilock(ip, iolock); + if (!xfs_ilock_nowait(ip, iolock)) { + if (iocb->ki_flags & IOCB_NOWAIT) + return -EAGAIN; + xfs_ilock(ip, iolock); + } + ret = xfs_file_aio_write_checks(iocb, from, &iolock); if (ret) goto out; @@ -614,8 +634,10 @@ xfs_file_buffered_aio_write( struct xfs_inode *ip = XFS_I(inode); ssize_t ret; int enospc = 0; - int iolock = XFS_IOLOCK_EXCL; + int iolock; +write_retry: + iolock = XFS_IOLOCK_EXCL; xfs_ilock(ip, iolock); ret = xfs_file_aio_write_checks(iocb, from, &iolock); @@ -625,7 +647,6 @@ xfs_file_buffered_aio_write( /* We can write back this queue in page reclaim */ current->backing_dev_info = inode_to_bdi(inode); -write_retry: trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos); ret = iomap_file_buffered_write(iocb, from, &xfs_iomap_ops); if (likely(ret >= 0)) @@ -641,26 +662,31 @@ write_retry: * running at the same time. */ if (ret == -EDQUOT && !enospc) { + xfs_iunlock(ip, iolock); enospc = xfs_inode_free_quota_eofblocks(ip); if (enospc) goto write_retry; enospc = xfs_inode_free_quota_cowblocks(ip); if (enospc) goto write_retry; + iolock = 0; } else if (ret == -ENOSPC && !enospc) { struct xfs_eofblocks eofb = {0}; enospc = 1; xfs_flush_inodes(ip->i_mount); - eofb.eof_scan_owner = ip->i_ino; /* for locking */ + + xfs_iunlock(ip, iolock); eofb.eof_flags = XFS_EOF_FLAGS_SYNC; xfs_icache_free_eofblocks(ip->i_mount, &eofb); + xfs_icache_free_cowblocks(ip->i_mount, &eofb); goto write_retry; } current->backing_dev_info = NULL; out: - xfs_iunlock(ip, iolock); + if (iolock) + xfs_iunlock(ip, iolock); return ret; } @@ -748,7 +774,7 @@ xfs_file_fallocate( if (error) goto out_unlock; } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { - unsigned blksize_mask = (1 << inode->i_blkbits) - 1; + unsigned int blksize_mask = i_blocksize(inode) - 1; if (offset & blksize_mask || len & blksize_mask) { error = -EINVAL; @@ -770,7 +796,7 @@ xfs_file_fallocate( if (error) goto out_unlock; } else if (mode & FALLOC_FL_INSERT_RANGE) { - unsigned blksize_mask = (1 << inode->i_blkbits) - 1; + unsigned int blksize_mask = i_blocksize(inode) - 1; new_size = i_size_read(inode) + len; if (offset & blksize_mask || len & blksize_mask) { @@ -886,6 +912,7 @@ xfs_file_open( return -EFBIG; if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb))) return -EIO; + file->f_mode |= FMODE_AIO_NOWAIT; return 0; } @@ -908,9 +935,9 @@ xfs_dir_open( */ mode = xfs_ilock_data_map_shared(ip); if (ip->i_d.di_nextents > 0) - xfs_dir3_data_readahead(ip, 0, -1); + error = xfs_dir3_data_readahead(ip, 0, -1); xfs_iunlock(ip, mode); - return 0; + return error; } STATIC int @@ -944,395 +971,7 @@ xfs_file_readdir( */ bufsize = (size_t)min_t(loff_t, 32768, ip->i_d.di_size); - return xfs_readdir(ip, ctx, bufsize); -} - -/* - * This type is designed to indicate the type of offset we would like - * to search from page cache for xfs_seek_hole_data(). - */ -enum { - HOLE_OFF = 0, - DATA_OFF, -}; - -/* - * Lookup the desired type of offset from the given page. - * - * On success, return true and the offset argument will point to the - * start of the region that was found. Otherwise this function will - * return false and keep the offset argument unchanged. - */ -STATIC bool -xfs_lookup_buffer_offset( - struct page *page, - loff_t *offset, - unsigned int type) -{ - loff_t lastoff = page_offset(page); - bool found = false; - struct buffer_head *bh, *head; - - bh = head = page_buffers(page); - do { - /* - * Unwritten extents that have data in the page - * cache covering them can be identified by the - * BH_Unwritten state flag. Pages with multiple - * buffers might have a mix of holes, data and - * unwritten extents - any buffer with valid - * data in it should have BH_Uptodate flag set - * on it. - */ - if (buffer_unwritten(bh) || - buffer_uptodate(bh)) { - if (type == DATA_OFF) - found = true; - } else { - if (type == HOLE_OFF) - found = true; - } - - if (found) { - *offset = lastoff; - break; - } - lastoff += bh->b_size; - } while ((bh = bh->b_this_page) != head); - - return found; -} - -/* - * This routine is called to find out and return a data or hole offset - * from the page cache for unwritten extents according to the desired - * type for xfs_seek_hole_data(). - * - * The argument offset is used to tell where we start to search from the - * page cache. Map is used to figure out the end points of the range to - * lookup pages. - * - * Return true if the desired type of offset was found, and the argument - * offset is filled with that address. Otherwise, return false and keep - * offset unchanged. - */ -STATIC bool -xfs_find_get_desired_pgoff( - struct inode *inode, - struct xfs_bmbt_irec *map, - unsigned int type, - loff_t *offset) -{ - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - struct pagevec pvec; - pgoff_t index; - pgoff_t end; - loff_t endoff; - loff_t startoff = *offset; - loff_t lastoff = startoff; - bool found = false; - - pagevec_init(&pvec, 0); - - index = startoff >> PAGE_SHIFT; - endoff = XFS_FSB_TO_B(mp, map->br_startoff + map->br_blockcount); - end = endoff >> PAGE_SHIFT; - do { - int want; - unsigned nr_pages; - unsigned int i; - - want = min_t(pgoff_t, end - index, PAGEVEC_SIZE); - nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index, - want); - /* - * No page mapped into given range. If we are searching holes - * and if this is the first time we got into the loop, it means - * that the given offset is landed in a hole, return it. - * - * If we have already stepped through some block buffers to find - * holes but they all contains data. In this case, the last - * offset is already updated and pointed to the end of the last - * mapped page, if it does not reach the endpoint to search, - * that means there should be a hole between them. - */ - if (nr_pages == 0) { - /* Data search found nothing */ - if (type == DATA_OFF) - break; - - ASSERT(type == HOLE_OFF); - if (lastoff == startoff || lastoff < endoff) { - found = true; - *offset = lastoff; - } - break; - } - - /* - * At lease we found one page. If this is the first time we - * step into the loop, and if the first page index offset is - * greater than the given search offset, a hole was found. - */ - if (type == HOLE_OFF && lastoff == startoff && - lastoff < page_offset(pvec.pages[0])) { - found = true; - break; - } - - for (i = 0; i < nr_pages; i++) { - struct page *page = pvec.pages[i]; - loff_t b_offset; - - /* - * At this point, the page may be truncated or - * invalidated (changing page->mapping to NULL), - * or even swizzled back from swapper_space to tmpfs - * file mapping. However, page->index will not change - * because we have a reference on the page. - * - * Searching done if the page index is out of range. - * If the current offset is not reaches the end of - * the specified search range, there should be a hole - * between them. - */ - if (page->index > end) { - if (type == HOLE_OFF && lastoff < endoff) { - *offset = lastoff; - found = true; - } - goto out; - } - - lock_page(page); - /* - * Page truncated or invalidated(page->mapping == NULL). - * We can freely skip it and proceed to check the next - * page. - */ - if (unlikely(page->mapping != inode->i_mapping)) { - unlock_page(page); - continue; - } - - if (!page_has_buffers(page)) { - unlock_page(page); - continue; - } - - found = xfs_lookup_buffer_offset(page, &b_offset, type); - if (found) { - /* - * The found offset may be less than the start - * point to search if this is the first time to - * come here. - */ - *offset = max_t(loff_t, startoff, b_offset); - unlock_page(page); - goto out; - } - - /* - * We either searching data but nothing was found, or - * searching hole but found a data buffer. In either - * case, probably the next page contains the desired - * things, update the last offset to it so. - */ - lastoff = page_offset(page) + PAGE_SIZE; - unlock_page(page); - } - - /* - * The number of returned pages less than our desired, search - * done. In this case, nothing was found for searching data, - * but we found a hole behind the last offset. - */ - if (nr_pages < want) { - if (type == HOLE_OFF) { - *offset = lastoff; - found = true; - } - break; - } - - index = pvec.pages[i - 1]->index + 1; - pagevec_release(&pvec); - } while (index <= end); - -out: - pagevec_release(&pvec); - return found; -} - -/* - * caller must lock inode with xfs_ilock_data_map_shared, - * can we craft an appropriate ASSERT? - * - * end is because the VFS-level lseek interface is defined such that any - * offset past i_size shall return -ENXIO, but we use this for quota code - * which does not maintain i_size, and we want to SEEK_DATA past i_size. - */ -loff_t -__xfs_seek_hole_data( - struct inode *inode, - loff_t start, - loff_t end, - int whence) -{ - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - loff_t uninitialized_var(offset); - xfs_fileoff_t fsbno; - xfs_filblks_t lastbno; - int error; - - if (start >= end) { - error = -ENXIO; - goto out_error; - } - - /* - * Try to read extents from the first block indicated - * by fsbno to the end block of the file. - */ - fsbno = XFS_B_TO_FSBT(mp, start); - lastbno = XFS_B_TO_FSB(mp, end); - - for (;;) { - struct xfs_bmbt_irec map[2]; - int nmap = 2; - unsigned int i; - - error = xfs_bmapi_read(ip, fsbno, lastbno - fsbno, map, &nmap, - XFS_BMAPI_ENTIRE); - if (error) - goto out_error; - - /* No extents at given offset, must be beyond EOF */ - if (nmap == 0) { - error = -ENXIO; - goto out_error; - } - - for (i = 0; i < nmap; i++) { - offset = max_t(loff_t, start, - XFS_FSB_TO_B(mp, map[i].br_startoff)); - - /* Landed in the hole we wanted? */ - if (whence == SEEK_HOLE && - map[i].br_startblock == HOLESTARTBLOCK) - goto out; - - /* Landed in the data extent we wanted? */ - if (whence == SEEK_DATA && - (map[i].br_startblock == DELAYSTARTBLOCK || - (map[i].br_state == XFS_EXT_NORM && - !isnullstartblock(map[i].br_startblock)))) - goto out; - - /* - * Landed in an unwritten extent, try to search - * for hole or data from page cache. - */ - if (map[i].br_state == XFS_EXT_UNWRITTEN) { - if (xfs_find_get_desired_pgoff(inode, &map[i], - whence == SEEK_HOLE ? HOLE_OFF : DATA_OFF, - &offset)) - goto out; - } - } - - /* - * We only received one extent out of the two requested. This - * means we've hit EOF and didn't find what we are looking for. - */ - if (nmap == 1) { - /* - * If we were looking for a hole, set offset to - * the end of the file (i.e., there is an implicit - * hole at the end of any file). - */ - if (whence == SEEK_HOLE) { - offset = end; - break; - } - /* - * If we were looking for data, it's nowhere to be found - */ - ASSERT(whence == SEEK_DATA); - error = -ENXIO; - goto out_error; - } - - ASSERT(i > 1); - - /* - * Nothing was found, proceed to the next round of search - * if the next reading offset is not at or beyond EOF. - */ - fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount; - start = XFS_FSB_TO_B(mp, fsbno); - if (start >= end) { - if (whence == SEEK_HOLE) { - offset = end; - break; - } - ASSERT(whence == SEEK_DATA); - error = -ENXIO; - goto out_error; - } - } - -out: - /* - * If at this point we have found the hole we wanted, the returned - * offset may be bigger than the file size as it may be aligned to - * page boundary for unwritten extents. We need to deal with this - * situation in particular. - */ - if (whence == SEEK_HOLE) - offset = min_t(loff_t, offset, end); - - return offset; - -out_error: - return error; -} - -STATIC loff_t -xfs_seek_hole_data( - struct file *file, - loff_t start, - int whence) -{ - struct inode *inode = file->f_mapping->host; - struct xfs_inode *ip = XFS_I(inode); - struct xfs_mount *mp = ip->i_mount; - uint lock; - loff_t offset, end; - int error = 0; - - if (XFS_FORCED_SHUTDOWN(mp)) - return -EIO; - - lock = xfs_ilock_data_map_shared(ip); - - end = i_size_read(inode); - offset = __xfs_seek_hole_data(inode, start, end, whence); - if (offset < 0) { - error = offset; - goto out_unlock; - } - - offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); - -out_unlock: - xfs_iunlock(ip, lock); - - if (error) - return error; - return offset; + return xfs_readdir(NULL, ip, ctx, bufsize); } STATIC loff_t @@ -1341,17 +980,25 @@ xfs_file_llseek( loff_t offset, int whence) { + struct inode *inode = file->f_mapping->host; + + if (XFS_FORCED_SHUTDOWN(XFS_I(inode)->i_mount)) + return -EIO; + switch (whence) { - case SEEK_END: - case SEEK_CUR: - case SEEK_SET: + default: return generic_file_llseek(file, offset, whence); case SEEK_HOLE: + offset = iomap_seek_hole(inode, offset, &xfs_iomap_ops); + break; case SEEK_DATA: - return xfs_seek_hole_data(file, offset, whence); - default: - return -EINVAL; + offset = iomap_seek_data(inode, offset, &xfs_iomap_ops); + break; } + + if (offset < 0) + return offset; + return vfs_setpos(file, offset, inode->i_sb->s_maxbytes); } /* @@ -1373,22 +1020,21 @@ xfs_file_llseek( */ STATIC int xfs_filemap_page_mkwrite( - struct vm_area_struct *vma, struct vm_fault *vmf) { - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); int ret; trace_xfs_filemap_page_mkwrite(XFS_I(inode)); sb_start_pagefault(inode->i_sb); - file_update_time(vma->vm_file); + file_update_time(vmf->vma->vm_file); xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); if (IS_DAX(inode)) { - ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops); + ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops); } else { - ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops); + ret = iomap_page_mkwrite(vmf, &xfs_iomap_ops); ret = block_page_mkwrite_return(ret); } @@ -1400,23 +1046,22 @@ xfs_filemap_page_mkwrite( STATIC int xfs_filemap_fault( - struct vm_area_struct *vma, struct vm_fault *vmf) { - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); int ret; trace_xfs_filemap_fault(XFS_I(inode)); /* DAX can shortcut the normal fault path on write faults! */ if ((vmf->flags & FAULT_FLAG_WRITE) && IS_DAX(inode)) - return xfs_filemap_page_mkwrite(vma, vmf); + return xfs_filemap_page_mkwrite(vmf); xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); if (IS_DAX(inode)) - ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops); + ret = dax_iomap_fault(vmf, PE_SIZE_PTE, &xfs_iomap_ops); else - ret = filemap_fault(vma, vmf); + ret = filemap_fault(vmf); xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); return ret; @@ -1425,36 +1070,34 @@ xfs_filemap_fault( /* * Similar to xfs_filemap_fault(), the DAX fault path can call into here on * both read and write faults. Hence we need to handle both cases. There is no - * ->pmd_mkwrite callout for huge pages, so we have a single function here to + * ->huge_mkwrite callout for huge pages, so we have a single function here to * handle both cases here. @flags carries the information on the type of fault * occuring. */ STATIC int -xfs_filemap_pmd_fault( - struct vm_area_struct *vma, - unsigned long addr, - pmd_t *pmd, - unsigned int flags) +xfs_filemap_huge_fault( + struct vm_fault *vmf, + enum page_entry_size pe_size) { - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); struct xfs_inode *ip = XFS_I(inode); int ret; if (!IS_DAX(inode)) return VM_FAULT_FALLBACK; - trace_xfs_filemap_pmd_fault(ip); + trace_xfs_filemap_huge_fault(ip); - if (flags & FAULT_FLAG_WRITE) { + if (vmf->flags & FAULT_FLAG_WRITE) { sb_start_pagefault(inode->i_sb); - file_update_time(vma->vm_file); + file_update_time(vmf->vma->vm_file); } xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); - ret = dax_iomap_pmd_fault(vma, addr, pmd, flags, &xfs_iomap_ops); + ret = dax_iomap_fault(vmf, pe_size, &xfs_iomap_ops); xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); - if (flags & FAULT_FLAG_WRITE) + if (vmf->flags & FAULT_FLAG_WRITE) sb_end_pagefault(inode->i_sb); return ret; @@ -1468,11 +1111,10 @@ xfs_filemap_pmd_fault( */ static int xfs_filemap_pfn_mkwrite( - struct vm_area_struct *vma, struct vm_fault *vmf) { - struct inode *inode = file_inode(vma->vm_file); + struct inode *inode = file_inode(vmf->vma->vm_file); struct xfs_inode *ip = XFS_I(inode); int ret = VM_FAULT_NOPAGE; loff_t size; @@ -1480,7 +1122,7 @@ xfs_filemap_pfn_mkwrite( trace_xfs_filemap_pfn_mkwrite(ip); sb_start_pagefault(inode->i_sb); - file_update_time(vma->vm_file); + file_update_time(vmf->vma->vm_file); /* check if the faulting page hasn't raced with truncate */ xfs_ilock(ip, XFS_MMAPLOCK_SHARED); @@ -1488,7 +1130,7 @@ xfs_filemap_pfn_mkwrite( if (vmf->pgoff >= size) ret = VM_FAULT_SIGBUS; else if (IS_DAX(inode)) - ret = dax_pfn_mkwrite(vma, vmf); + ret = dax_pfn_mkwrite(vmf); xfs_iunlock(ip, XFS_MMAPLOCK_SHARED); sb_end_pagefault(inode->i_sb); return ret; @@ -1497,7 +1139,7 @@ xfs_filemap_pfn_mkwrite( static const struct vm_operations_struct xfs_file_vm_ops = { .fault = xfs_filemap_fault, - .pmd_fault = xfs_filemap_pmd_fault, + .huge_fault = xfs_filemap_huge_fault, .map_pages = filemap_map_pages, .page_mkwrite = xfs_filemap_page_mkwrite, .pfn_mkwrite = xfs_filemap_pfn_mkwrite, |