diff options
Diffstat (limited to 'mm/gup.c')
-rw-r--r-- | mm/gup.c | 129 |
1 files changed, 61 insertions, 68 deletions
@@ -29,85 +29,70 @@ struct follow_page_context { unsigned int page_mask; }; -typedef int (*set_dirty_func_t)(struct page *page); - -static void __put_user_pages_dirty(struct page **pages, - unsigned long npages, - set_dirty_func_t sdf) -{ - unsigned long index; - - for (index = 0; index < npages; index++) { - struct page *page = compound_head(pages[index]); - - /* - * Checking PageDirty at this point may race with - * clear_page_dirty_for_io(), but that's OK. Two key cases: - * - * 1) This code sees the page as already dirty, so it skips - * the call to sdf(). That could happen because - * clear_page_dirty_for_io() called page_mkclean(), - * followed by set_page_dirty(). However, now the page is - * going to get written back, which meets the original - * intention of setting it dirty, so all is well: - * clear_page_dirty_for_io() goes on to call - * TestClearPageDirty(), and write the page back. - * - * 2) This code sees the page as clean, so it calls sdf(). - * The page stays dirty, despite being written back, so it - * gets written back again in the next writeback cycle. - * This is harmless. - */ - if (!PageDirty(page)) - sdf(page); - - put_user_page(page); - } -} - /** - * put_user_pages_dirty() - release and dirty an array of gup-pinned pages - * @pages: array of pages to be marked dirty and released. + * put_user_pages_dirty_lock() - release and optionally dirty gup-pinned pages + * @pages: array of pages to be maybe marked dirty, and definitely released. * @npages: number of pages in the @pages array. + * @make_dirty: whether to mark the pages dirty * * "gup-pinned page" refers to a page that has had one of the get_user_pages() * variants called on that page. * * For each page in the @pages array, make that page (or its head page, if a - * compound page) dirty, if it was previously listed as clean. Then, release - * the page using put_user_page(). + * compound page) dirty, if @make_dirty is true, and if the page was previously + * listed as clean. In any case, releases all pages using put_user_page(), + * possibly via put_user_pages(), for the non-dirty case. * * Please see the put_user_page() documentation for details. * - * set_page_dirty(), which does not lock the page, is used here. - * Therefore, it is the caller's responsibility to ensure that this is - * safe. If not, then put_user_pages_dirty_lock() should be called instead. + * set_page_dirty_lock() is used internally. If instead, set_page_dirty() is + * required, then the caller should a) verify that this is really correct, + * because _lock() is usually required, and b) hand code it: + * set_page_dirty_lock(), put_user_page(). * */ -void put_user_pages_dirty(struct page **pages, unsigned long npages) +void put_user_pages_dirty_lock(struct page **pages, unsigned long npages, + bool make_dirty) { - __put_user_pages_dirty(pages, npages, set_page_dirty); -} -EXPORT_SYMBOL(put_user_pages_dirty); + unsigned long index; -/** - * put_user_pages_dirty_lock() - release and dirty an array of gup-pinned pages - * @pages: array of pages to be marked dirty and released. - * @npages: number of pages in the @pages array. - * - * For each page in the @pages array, make that page (or its head page, if a - * compound page) dirty, if it was previously listed as clean. Then, release - * the page using put_user_page(). - * - * Please see the put_user_page() documentation for details. - * - * This is just like put_user_pages_dirty(), except that it invokes - * set_page_dirty_lock(), instead of set_page_dirty(). - * - */ -void put_user_pages_dirty_lock(struct page **pages, unsigned long npages) -{ - __put_user_pages_dirty(pages, npages, set_page_dirty_lock); + /* + * TODO: this can be optimized for huge pages: if a series of pages is + * physically contiguous and part of the same compound page, then a + * single operation to the head page should suffice. + */ + + if (!make_dirty) { + put_user_pages(pages, npages); + return; + } + + for (index = 0; index < npages; index++) { + struct page *page = compound_head(pages[index]); + /* + * Checking PageDirty at this point may race with + * clear_page_dirty_for_io(), but that's OK. Two key + * cases: + * + * 1) This code sees the page as already dirty, so it + * skips the call to set_page_dirty(). That could happen + * because clear_page_dirty_for_io() called + * page_mkclean(), followed by set_page_dirty(). + * However, now the page is going to get written back, + * which meets the original intention of setting it + * dirty, so all is well: clear_page_dirty_for_io() goes + * on to call TestClearPageDirty(), and write the page + * back. + * + * 2) This code sees the page as clean, so it calls + * set_page_dirty(). The page stays dirty, despite being + * written back, so it gets written back again in the + * next writeback cycle. This is harmless. + */ + if (!PageDirty(page)) + set_page_dirty_lock(page); + put_user_page(page); + } } EXPORT_SYMBOL(put_user_pages_dirty_lock); @@ -399,7 +384,7 @@ retry_locked: spin_unlock(ptl); return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap); } - if (flags & FOLL_SPLIT) { + if (flags & (FOLL_SPLIT | FOLL_SPLIT_PMD)) { int ret; page = pmd_page(*pmd); if (is_huge_zero_page(page)) { @@ -408,7 +393,7 @@ retry_locked: split_huge_pmd(vma, pmd, address); if (pmd_trans_unstable(pmd)) ret = -EBUSY; - } else { + } else if (flags & FOLL_SPLIT) { if (unlikely(!try_get_page(page))) { spin_unlock(ptl); return ERR_PTR(-ENOMEM); @@ -420,6 +405,10 @@ retry_locked: put_page(page); if (pmd_none(*pmd)) return no_page_table(vma, flags); + } else { /* flags & FOLL_SPLIT_PMD */ + spin_unlock(ptl); + split_huge_pmd(vma, pmd, address); + ret = pte_alloc(mm, pmd) ? -ENOMEM : 0; } return ret ? ERR_PTR(ret) : @@ -799,6 +788,8 @@ static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm, if (!nr_pages) return 0; + start = untagged_addr(start); + VM_BUG_ON(!!pages != !!(gup_flags & FOLL_GET)); /* @@ -961,6 +952,8 @@ int fixup_user_fault(struct task_struct *tsk, struct mm_struct *mm, struct vm_area_struct *vma; vm_fault_t ret, major = 0; + address = untagged_addr(address); + if (unlocked) fault_flags |= FAULT_FLAG_ALLOW_RETRY; @@ -1460,7 +1453,7 @@ check_again: * gup may start from a tail page. Advance step by the left * part. */ - step = (1 << compound_order(head)) - (pages[i] - head); + step = compound_nr(head) - (pages[i] - head); /* * If we get a page from the CMA zone, since we are going to * be pinning these entries, we might as well move them out |