diff options
Diffstat (limited to 'mm/gup.c')
-rw-r--r-- | mm/gup.c | 149 |
1 files changed, 105 insertions, 44 deletions
@@ -213,6 +213,58 @@ void unpin_user_page(struct page *page) } EXPORT_SYMBOL(unpin_user_page); +static inline void compound_range_next(unsigned long i, unsigned long npages, + struct page **list, struct page **head, + unsigned int *ntails) +{ + struct page *next, *page; + unsigned int nr = 1; + + if (i >= npages) + return; + + next = *list + i; + page = compound_head(next); + if (PageCompound(page) && compound_order(page) >= 1) + nr = min_t(unsigned int, + page + compound_nr(page) - next, npages - i); + + *head = page; + *ntails = nr; +} + +#define for_each_compound_range(__i, __list, __npages, __head, __ntails) \ + for (__i = 0, \ + compound_range_next(__i, __npages, __list, &(__head), &(__ntails)); \ + __i < __npages; __i += __ntails, \ + compound_range_next(__i, __npages, __list, &(__head), &(__ntails))) + +static inline void compound_next(unsigned long i, unsigned long npages, + struct page **list, struct page **head, + unsigned int *ntails) +{ + struct page *page; + unsigned int nr; + + if (i >= npages) + return; + + page = compound_head(list[i]); + for (nr = i + 1; nr < npages; nr++) { + if (compound_head(list[nr]) != page) + break; + } + + *head = page; + *ntails = nr - i; +} + +#define for_each_compound_head(__i, __list, __npages, __head, __ntails) \ + for (__i = 0, \ + compound_next(__i, __npages, __list, &(__head), &(__ntails)); \ + __i < __npages; __i += __ntails, \ + compound_next(__i, __npages, __list, &(__head), &(__ntails))) + /** * unpin_user_pages_dirty_lock() - release and optionally dirty gup-pinned pages * @pages: array of pages to be maybe marked dirty, and definitely released. @@ -239,20 +291,15 @@ void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages, bool make_dirty) { unsigned long index; - - /* - * TODO: this can be optimized for huge pages: if a series of pages is - * physically contiguous and part of the same compound page, then a - * single operation to the head page should suffice. - */ + struct page *head; + unsigned int ntails; if (!make_dirty) { unpin_user_pages(pages, npages); return; } - for (index = 0; index < npages; index++) { - struct page *page = compound_head(pages[index]); + for_each_compound_head(index, pages, npages, head, ntails) { /* * Checking PageDirty at this point may race with * clear_page_dirty_for_io(), but that's OK. Two key @@ -273,14 +320,50 @@ void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages, * written back, so it gets written back again in the * next writeback cycle. This is harmless. */ - if (!PageDirty(page)) - set_page_dirty_lock(page); - unpin_user_page(page); + if (!PageDirty(head)) + set_page_dirty_lock(head); + put_compound_head(head, ntails, FOLL_PIN); } } EXPORT_SYMBOL(unpin_user_pages_dirty_lock); /** + * unpin_user_page_range_dirty_lock() - release and optionally dirty + * gup-pinned page range + * + * @page: the starting page of a range maybe marked dirty, and definitely released. + * @npages: number of consecutive pages to release. + * @make_dirty: whether to mark the pages dirty + * + * "gup-pinned page range" refers to a range of pages that has had one of the + * pin_user_pages() variants called on that page. + * + * For the page ranges defined by [page .. page+npages], make that range (or + * its head pages, if a compound page) dirty, if @make_dirty is true, and if the + * page range was previously listed as clean. + * + * set_page_dirty_lock() is used internally. If instead, set_page_dirty() is + * required, then the caller should a) verify that this is really correct, + * because _lock() is usually required, and b) hand code it: + * set_page_dirty_lock(), unpin_user_page(). + * + */ +void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages, + bool make_dirty) +{ + unsigned long index; + struct page *head; + unsigned int ntails; + + for_each_compound_range(index, &page, npages, head, ntails) { + if (make_dirty && !PageDirty(head)) + set_page_dirty_lock(head); + put_compound_head(head, ntails, FOLL_PIN); + } +} +EXPORT_SYMBOL(unpin_user_page_range_dirty_lock); + +/** * unpin_user_pages() - release an array of gup-pinned pages. * @pages: array of pages to be marked dirty and released. * @npages: number of pages in the @pages array. @@ -292,6 +375,8 @@ EXPORT_SYMBOL(unpin_user_pages_dirty_lock); void unpin_user_pages(struct page **pages, unsigned long npages) { unsigned long index; + struct page *head; + unsigned int ntails; /* * If this WARN_ON() fires, then the system *might* be leaking pages (by @@ -300,13 +385,9 @@ void unpin_user_pages(struct page **pages, unsigned long npages) */ if (WARN_ON(IS_ERR_VALUE(npages))) return; - /* - * TODO: this can be optimized for huge pages: if a series of pages is - * physically contiguous and part of the same compound page, then a - * single operation to the head page should suffice. - */ - for (index = 0; index < npages; index++) - unpin_user_page(pages[index]); + + for_each_compound_head(index, pages, npages, head, ntails) + put_compound_head(head, ntails, FOLL_PIN); } EXPORT_SYMBOL(unpin_user_pages); @@ -435,18 +516,6 @@ retry: } } - if (flags & FOLL_SPLIT && PageTransCompound(page)) { - get_page(page); - pte_unmap_unlock(ptep, ptl); - lock_page(page); - ret = split_huge_page(page); - unlock_page(page); - put_page(page); - if (ret) - return ERR_PTR(ret); - goto retry; - } - /* try_grab_page() does nothing unless FOLL_GET or FOLL_PIN is set. */ if (unlikely(!try_grab_page(page, flags))) { page = ERR_PTR(-ENOMEM); @@ -591,7 +660,7 @@ retry_locked: spin_unlock(ptl); return follow_page_pte(vma, address, pmd, flags, &ctx->pgmap); } - if (flags & (FOLL_SPLIT | FOLL_SPLIT_PMD)) { + if (flags & FOLL_SPLIT_PMD) { int ret; page = pmd_page(*pmd); if (is_huge_zero_page(page)) { @@ -600,19 +669,7 @@ retry_locked: split_huge_pmd(vma, pmd, address); if (pmd_trans_unstable(pmd)) ret = -EBUSY; - } else if (flags & FOLL_SPLIT) { - if (unlikely(!try_get_page(page))) { - spin_unlock(ptl); - return ERR_PTR(-ENOMEM); - } - spin_unlock(ptl); - lock_page(page); - ret = split_huge_page(page); - unlock_page(page); - put_page(page); - if (pmd_none(*pmd)) - return no_page_table(vma, flags); - } else { /* flags & FOLL_SPLIT_PMD */ + } else { spin_unlock(ptl); split_huge_pmd(vma, pmd, address); ret = pte_alloc(mm, pmd) ? -ENOMEM : 0; @@ -1535,6 +1592,10 @@ struct page *get_dump_page(unsigned long addr) FOLL_FORCE | FOLL_DUMP | FOLL_GET); if (locked) mmap_read_unlock(mm); + + if (ret == 1 && is_page_poisoned(page)) + return NULL; + return (ret == 1) ? page : NULL; } #endif /* CONFIG_ELF_CORE */ |