diff options
Diffstat (limited to 'include/linux/rmap.h')
| -rw-r--r-- | include/linux/rmap.h | 73 | 
1 files changed, 62 insertions, 11 deletions
diff --git a/include/linux/rmap.h b/include/linux/rmap.h index b89b4b86951f..bd3504d11b15 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -166,7 +166,7 @@ static inline void anon_vma_merge(struct vm_area_struct *vma,  	unlink_anon_vmas(next);  } -struct anon_vma *page_get_anon_vma(struct page *page); +struct anon_vma *folio_get_anon_vma(struct folio *folio);  /* RMAP flags, currently only relevant for some anon rmap operations. */  typedef int __bitwise rmap_t; @@ -270,7 +270,7 @@ dup:   * @page: the exclusive anonymous page to try marking possibly shared   *   * The caller needs to hold the PT lock and has to have the page table entry - * cleared/invalidated+flushed, to properly sync against GUP-fast. + * cleared/invalidated.   *   * This is similar to page_try_dup_anon_rmap(), however, not used during fork()   * to duplicate a mapping, but instead to prepare for KSM or temporarily @@ -286,12 +286,68 @@ static inline int page_try_share_anon_rmap(struct page *page)  {  	VM_BUG_ON_PAGE(!PageAnon(page) || !PageAnonExclusive(page), page); -	/* See page_try_dup_anon_rmap(). */ -	if (likely(!is_device_private_page(page) && -	    unlikely(page_maybe_dma_pinned(page)))) -		return -EBUSY; +	/* device private pages cannot get pinned via GUP. */ +	if (unlikely(is_device_private_page(page))) { +		ClearPageAnonExclusive(page); +		return 0; +	} + +	/* +	 * We have to make sure that when we clear PageAnonExclusive, that +	 * the page is not pinned and that concurrent GUP-fast won't succeed in +	 * concurrently pinning the page. +	 * +	 * Conceptually, PageAnonExclusive clearing consists of: +	 * (A1) Clear PTE +	 * (A2) Check if the page is pinned; back off if so. +	 * (A3) Clear PageAnonExclusive +	 * (A4) Restore PTE (optional, but certainly not writable) +	 * +	 * When clearing PageAnonExclusive, we cannot possibly map the page +	 * writable again, because anon pages that may be shared must never +	 * be writable. So in any case, if the PTE was writable it cannot +	 * be writable anymore afterwards and there would be a PTE change. Only +	 * if the PTE wasn't writable, there might not be a PTE change. +	 * +	 * Conceptually, GUP-fast pinning of an anon page consists of: +	 * (B1) Read the PTE +	 * (B2) FOLL_WRITE: check if the PTE is not writable; back off if so. +	 * (B3) Pin the mapped page +	 * (B4) Check if the PTE changed by re-reading it; back off if so. +	 * (B5) If the original PTE is not writable, check if +	 *	PageAnonExclusive is not set; back off if so. +	 * +	 * If the PTE was writable, we only have to make sure that GUP-fast +	 * observes a PTE change and properly backs off. +	 * +	 * If the PTE was not writable, we have to make sure that GUP-fast either +	 * detects a (temporary) PTE change or that PageAnonExclusive is cleared +	 * and properly backs off. +	 * +	 * Consequently, when clearing PageAnonExclusive(), we have to make +	 * sure that (A1), (A2)/(A3) and (A4) happen in the right memory +	 * order. In GUP-fast pinning code, we have to make sure that (B3),(B4) +	 * and (B5) happen in the right memory order. +	 * +	 * We assume that there might not be a memory barrier after +	 * clearing/invalidating the PTE (A1) and before restoring the PTE (A4), +	 * so we use explicit ones here. +	 */ +	/* Paired with the memory barrier in try_grab_folio(). */ +	if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) +		smp_mb(); + +	if (unlikely(page_maybe_dma_pinned(page))) +		return -EBUSY;  	ClearPageAnonExclusive(page); + +	/* +	 * This is conceptually a smp_wmb() paired with the smp_rmb() in +	 * gup_must_unshare(). +	 */ +	if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) +		smp_mb__after_atomic();  	return 0;  } @@ -405,13 +461,8 @@ struct rmap_walk_control {  void rmap_walk(struct folio *folio, struct rmap_walk_control *rwc);  void rmap_walk_locked(struct folio *folio, struct rmap_walk_control *rwc); - -/* - * Called by memory-failure.c to kill processes. - */  struct anon_vma *folio_lock_anon_vma_read(struct folio *folio,  					  struct rmap_walk_control *rwc); -void page_unlock_anon_vma_read(struct anon_vma *anon_vma);  #else	/* !CONFIG_MMU */  |