diff options
Diffstat (limited to 'fs/hugetlbfs/inode.c')
-rw-r--r-- | fs/hugetlbfs/inode.c | 137 |
1 files changed, 82 insertions, 55 deletions
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 62408047e8d7..20336cb3c040 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -108,16 +108,6 @@ static inline void hugetlb_drop_vma_policy(struct vm_area_struct *vma) } #endif -static void huge_pagevec_release(struct pagevec *pvec) -{ - int i; - - for (i = 0; i < pagevec_count(pvec); ++i) - put_page(pvec->pages[i]); - - pagevec_reinit(pvec); -} - /* * Mask used when checking the page offset value passed in via system * calls. This value will be converted to a loff_t which is signed. @@ -480,25 +470,19 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, struct address_space *mapping = &inode->i_data; const pgoff_t start = lstart >> huge_page_shift(h); const pgoff_t end = lend >> huge_page_shift(h); - struct pagevec pvec; + struct folio_batch fbatch; pgoff_t next, index; int i, freed = 0; bool truncate_op = (lend == LLONG_MAX); - pagevec_init(&pvec); + folio_batch_init(&fbatch); next = start; - while (next < end) { - /* - * When no more pages are found, we are done. - */ - if (!pagevec_lookup_range(&pvec, mapping, &next, end - 1)) - break; - - for (i = 0; i < pagevec_count(&pvec); ++i) { - struct page *page = pvec.pages[i]; + while (filemap_get_folios(mapping, &next, end - 1, &fbatch)) { + for (i = 0; i < folio_batch_count(&fbatch); ++i) { + struct folio *folio = fbatch.folios[i]; u32 hash = 0; - index = page->index; + index = folio->index; if (!truncate_op) { /* * Only need to hold the fault mutex in the @@ -511,15 +495,15 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, } /* - * If page is mapped, it was faulted in after being + * If folio is mapped, it was faulted in after being * unmapped in caller. Unmap (again) now after taking * the fault mutex. The mutex will prevent faults - * until we finish removing the page. + * until we finish removing the folio. * * This race can only happen in the hole punch case. * Getting here in a truncate operation is a bug. */ - if (unlikely(page_mapped(page))) { + if (unlikely(folio_mapped(folio))) { BUG_ON(truncate_op); mutex_unlock(&hugetlb_fault_mutex_table[hash]); @@ -532,7 +516,7 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, i_mmap_unlock_write(mapping); } - lock_page(page); + folio_lock(folio); /* * We must free the huge page and remove from page * cache (remove_huge_page) BEFORE removing the @@ -542,8 +526,8 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, * the subpool and global reserve usage count can need * to be adjusted. */ - VM_BUG_ON(HPageRestoreReserve(page)); - remove_huge_page(page); + VM_BUG_ON(HPageRestoreReserve(&folio->page)); + remove_huge_page(&folio->page); freed++; if (!truncate_op) { if (unlikely(hugetlb_unreserve_pages(inode, @@ -551,11 +535,11 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, hugetlb_fix_reserve_counts(inode); } - unlock_page(page); + folio_unlock(folio); if (!truncate_op) mutex_unlock(&hugetlb_fault_mutex_table[hash]); } - huge_pagevec_release(&pvec); + folio_batch_release(&fbatch); cond_resched(); } @@ -600,41 +584,79 @@ static void hugetlb_vmtruncate(struct inode *inode, loff_t offset) remove_inode_hugepages(inode, offset, LLONG_MAX); } +static void hugetlbfs_zero_partial_page(struct hstate *h, + struct address_space *mapping, + loff_t start, + loff_t end) +{ + pgoff_t idx = start >> huge_page_shift(h); + struct folio *folio; + + folio = filemap_lock_folio(mapping, idx); + if (!folio) + return; + + start = start & ~huge_page_mask(h); + end = end & ~huge_page_mask(h); + if (!end) + end = huge_page_size(h); + + folio_zero_segment(folio, (size_t)start, (size_t)end); + + folio_unlock(folio); + folio_put(folio); +} + static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len) { + struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode); + struct address_space *mapping = inode->i_mapping; struct hstate *h = hstate_inode(inode); loff_t hpage_size = huge_page_size(h); loff_t hole_start, hole_end; /* - * For hole punch round up the beginning offset of the hole and - * round down the end. + * hole_start and hole_end indicate the full pages within the hole. */ hole_start = round_up(offset, hpage_size); hole_end = round_down(offset + len, hpage_size); - if (hole_end > hole_start) { - struct address_space *mapping = inode->i_mapping; - struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode); + inode_lock(inode); - inode_lock(inode); + /* protected by i_rwsem */ + if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) { + inode_unlock(inode); + return -EPERM; + } - /* protected by i_rwsem */ - if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) { - inode_unlock(inode); - return -EPERM; - } + i_mmap_lock_write(mapping); + + /* If range starts before first full page, zero partial page. */ + if (offset < hole_start) + hugetlbfs_zero_partial_page(h, mapping, + offset, min(offset + len, hole_start)); - i_mmap_lock_write(mapping); + /* Unmap users of full pages in the hole. */ + if (hole_end > hole_start) { if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root)) hugetlb_vmdelete_list(&mapping->i_mmap, hole_start >> PAGE_SHIFT, hole_end >> PAGE_SHIFT, 0); - i_mmap_unlock_write(mapping); - remove_inode_hugepages(inode, hole_start, hole_end); - inode_unlock(inode); } + /* If range extends beyond last full page, zero partial page. */ + if ((offset + len) > hole_end && (offset + len) > hole_start) + hugetlbfs_zero_partial_page(h, mapping, + hole_end, offset + len); + + i_mmap_unlock_write(mapping); + + /* Remove full pages from the file. */ + if (hole_end > hole_start) + remove_inode_hugepages(inode, hole_start, hole_end); + + inode_unlock(inode); + return 0; } @@ -759,7 +781,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, SetHPageMigratable(page); /* - * unlock_page because locked by add_to_page_cache() + * unlock_page because locked by huge_add_to_page_cache() * put_page() due to reference from alloc_huge_page() */ unlock_page(page); @@ -970,28 +992,33 @@ static int hugetlbfs_symlink(struct user_namespace *mnt_userns, return error; } -static int hugetlbfs_migrate_page(struct address_space *mapping, - struct page *newpage, struct page *page, +#ifdef CONFIG_MIGRATION +static int hugetlbfs_migrate_folio(struct address_space *mapping, + struct folio *dst, struct folio *src, enum migrate_mode mode) { int rc; - rc = migrate_huge_page_move_mapping(mapping, newpage, page); + rc = migrate_huge_page_move_mapping(mapping, dst, src); if (rc != MIGRATEPAGE_SUCCESS) return rc; - if (hugetlb_page_subpool(page)) { - hugetlb_set_page_subpool(newpage, hugetlb_page_subpool(page)); - hugetlb_set_page_subpool(page, NULL); + if (hugetlb_page_subpool(&src->page)) { + hugetlb_set_page_subpool(&dst->page, + hugetlb_page_subpool(&src->page)); + hugetlb_set_page_subpool(&src->page, NULL); } if (mode != MIGRATE_SYNC_NO_COPY) - migrate_page_copy(newpage, page); + folio_migrate_copy(dst, src); else - migrate_page_states(newpage, page); + folio_migrate_flags(dst, src); return MIGRATEPAGE_SUCCESS; } +#else +#define hugetlbfs_migrate_folio NULL +#endif static int hugetlbfs_error_remove_page(struct address_space *mapping, struct page *page) @@ -1158,7 +1185,7 @@ static const struct address_space_operations hugetlbfs_aops = { .write_begin = hugetlbfs_write_begin, .write_end = hugetlbfs_write_end, .dirty_folio = noop_dirty_folio, - .migratepage = hugetlbfs_migrate_page, + .migrate_folio = hugetlbfs_migrate_folio, .error_remove_page = hugetlbfs_error_remove_page, }; |