diff options
Diffstat (limited to 'mm/rmap.c')
| -rw-r--r-- | mm/rmap.c | 640 | 
1 files changed, 290 insertions, 350 deletions
diff --git a/mm/rmap.c b/mm/rmap.c index 163ac4e6bcee..615b5d323ee2 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -31,8 +31,8 @@   *               mm->page_table_lock or pte_lock   *                 swap_lock (in swap_duplicate, swap_info_get)   *                   mmlist_lock (in mmput, drain_mmlist and others) - *                   mapping->private_lock (in __set_page_dirty_buffers) - *                     lock_page_memcg move_lock (in __set_page_dirty_buffers) + *                   mapping->private_lock (in block_dirty_folio) + *                     folio_lock_memcg move_lock (in block_dirty_folio)   *                       i_pages lock (widely used)   *                         lruvec->lru_lock (in folio_lruvec_lock_irq)   *                   inode->i_lock (in set_page_dirty's __mark_inode_dirty) @@ -107,15 +107,15 @@ static inline void anon_vma_free(struct anon_vma *anon_vma)  	VM_BUG_ON(atomic_read(&anon_vma->refcount));  	/* -	 * Synchronize against page_lock_anon_vma_read() such that +	 * Synchronize against folio_lock_anon_vma_read() such that  	 * we can safely hold the lock without the anon_vma getting  	 * freed.  	 *  	 * Relies on the full mb implied by the atomic_dec_and_test() from  	 * put_anon_vma() against the acquire barrier implied by -	 * down_read_trylock() from page_lock_anon_vma_read(). This orders: +	 * down_read_trylock() from folio_lock_anon_vma_read(). This orders:  	 * -	 * page_lock_anon_vma_read()	VS	put_anon_vma() +	 * folio_lock_anon_vma_read()	VS	put_anon_vma()  	 *   down_read_trylock()		  atomic_dec_and_test()  	 *   LOCK				  MB  	 *   atomic_read()			  rwsem_is_locked() @@ -168,7 +168,7 @@ static void anon_vma_chain_link(struct vm_area_struct *vma,   * allocate a new one.   *   * Anon-vma allocations are very subtle, because we may have - * optimistically looked up an anon_vma in page_lock_anon_vma_read() + * optimistically looked up an anon_vma in folio_lock_anon_vma_read()   * and that may actually touch the rwsem even in the newly   * allocated vma (it depends on RCU to make sure that the   * anon_vma isn't actually destroyed). @@ -526,28 +526,28 @@ out:   * atomic op -- the trylock. If we fail the trylock, we fall back to getting a   * reference like with page_get_anon_vma() and then block on the mutex.   */ -struct anon_vma *page_lock_anon_vma_read(struct page *page) +struct anon_vma *folio_lock_anon_vma_read(struct folio *folio)  {  	struct anon_vma *anon_vma = NULL;  	struct anon_vma *root_anon_vma;  	unsigned long anon_mapping;  	rcu_read_lock(); -	anon_mapping = (unsigned long)READ_ONCE(page->mapping); +	anon_mapping = (unsigned long)READ_ONCE(folio->mapping);  	if ((anon_mapping & PAGE_MAPPING_FLAGS) != PAGE_MAPPING_ANON)  		goto out; -	if (!page_mapped(page)) +	if (!folio_mapped(folio))  		goto out;  	anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON);  	root_anon_vma = READ_ONCE(anon_vma->root);  	if (down_read_trylock(&root_anon_vma->rwsem)) {  		/* -		 * If the page is still mapped, then this anon_vma is still +		 * If the folio is still mapped, then this anon_vma is still  		 * its anon_vma, and holding the mutex ensures that it will  		 * not go away, see anon_vma_free().  		 */ -		if (!page_mapped(page)) { +		if (!folio_mapped(folio)) {  			up_read(&root_anon_vma->rwsem);  			anon_vma = NULL;  		} @@ -560,7 +560,7 @@ struct anon_vma *page_lock_anon_vma_read(struct page *page)  		goto out;  	} -	if (!page_mapped(page)) { +	if (!folio_mapped(folio)) {  		rcu_read_unlock();  		put_anon_vma(anon_vma);  		return NULL; @@ -621,9 +621,20 @@ void try_to_unmap_flush_dirty(void)  		try_to_unmap_flush();  } +/* + * Bits 0-14 of mm->tlb_flush_batched record pending generations. + * Bits 16-30 of mm->tlb_flush_batched bit record flushed generations. + */ +#define TLB_FLUSH_BATCH_FLUSHED_SHIFT	16 +#define TLB_FLUSH_BATCH_PENDING_MASK			\ +	((1 << (TLB_FLUSH_BATCH_FLUSHED_SHIFT - 1)) - 1) +#define TLB_FLUSH_BATCH_PENDING_LARGE			\ +	(TLB_FLUSH_BATCH_PENDING_MASK / 2) +  static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)  {  	struct tlbflush_unmap_batch *tlb_ubc = ¤t->tlb_ubc; +	int batch, nbatch;  	arch_tlbbatch_add_mm(&tlb_ubc->arch, mm);  	tlb_ubc->flush_required = true; @@ -633,7 +644,22 @@ static void set_tlb_ubc_flush_pending(struct mm_struct *mm, bool writable)  	 * before the PTE is cleared.  	 */  	barrier(); -	mm->tlb_flush_batched = true; +	batch = atomic_read(&mm->tlb_flush_batched); +retry: +	if ((batch & TLB_FLUSH_BATCH_PENDING_MASK) > TLB_FLUSH_BATCH_PENDING_LARGE) { +		/* +		 * Prevent `pending' from catching up with `flushed' because of +		 * overflow.  Reset `pending' and `flushed' to be 1 and 0 if +		 * `pending' becomes large. +		 */ +		nbatch = atomic_cmpxchg(&mm->tlb_flush_batched, batch, 1); +		if (nbatch != batch) { +			batch = nbatch; +			goto retry; +		} +	} else { +		atomic_inc(&mm->tlb_flush_batched); +	}  	/*  	 * If the PTE was dirty then it's best to assume it's writable. The @@ -680,15 +706,18 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)   */  void flush_tlb_batched_pending(struct mm_struct *mm)  { -	if (data_race(mm->tlb_flush_batched)) { -		flush_tlb_mm(mm); +	int batch = atomic_read(&mm->tlb_flush_batched); +	int pending = batch & TLB_FLUSH_BATCH_PENDING_MASK; +	int flushed = batch >> TLB_FLUSH_BATCH_FLUSHED_SHIFT; +	if (pending != flushed) { +		flush_tlb_mm(mm);  		/* -		 * Do not allow the compiler to re-order the clearing of -		 * tlb_flush_batched before the tlb is flushed. +		 * If the new TLB flushing is pending during flushing, leave +		 * mm->tlb_flush_batched as is, to avoid losing flushing.  		 */ -		barrier(); -		mm->tlb_flush_batched = false; +		atomic_cmpxchg(&mm->tlb_flush_batched, batch, +			       pending | (pending << TLB_FLUSH_BATCH_FLUSHED_SHIFT));  	}  }  #else @@ -708,8 +737,9 @@ static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)   */  unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)  { -	if (PageAnon(page)) { -		struct anon_vma *page__anon_vma = page_anon_vma(page); +	struct folio *folio = page_folio(page); +	if (folio_test_anon(folio)) { +		struct anon_vma *page__anon_vma = folio_anon_vma(folio);  		/*  		 * Note: swapoff's unuse_vma() is more efficient with this  		 * check, and needs it to match anon_vma when KSM is active. @@ -719,7 +749,7 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma)  			return -EFAULT;  	} else if (!vma->vm_file) {  		return -EFAULT; -	} else if (vma->vm_file->f_mapping != compound_head(page)->mapping) { +	} else if (vma->vm_file->f_mapping != folio->mapping) {  		return -EFAULT;  	} @@ -760,30 +790,29 @@ out:  	return pmd;  } -struct page_referenced_arg { +struct folio_referenced_arg {  	int mapcount;  	int referenced;  	unsigned long vm_flags;  	struct mem_cgroup *memcg;  };  /* - * arg: page_referenced_arg will be passed + * arg: folio_referenced_arg will be passed   */ -static bool page_referenced_one(struct page *page, struct vm_area_struct *vma, -			unsigned long address, void *arg) +static bool folio_referenced_one(struct folio *folio, +		struct vm_area_struct *vma, unsigned long address, void *arg)  { -	struct page_referenced_arg *pra = arg; -	struct page_vma_mapped_walk pvmw = { -		.page = page, -		.vma = vma, -		.address = address, -	}; +	struct folio_referenced_arg *pra = arg; +	DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);  	int referenced = 0;  	while (page_vma_mapped_walk(&pvmw)) {  		address = pvmw.address; -		if (vma->vm_flags & VM_LOCKED) { +		if ((vma->vm_flags & VM_LOCKED) && +		    (!folio_test_large(folio) || !pvmw.pte)) { +			/* Restore the mlock which got missed */ +			mlock_vma_folio(folio, vma, !pvmw.pte);  			page_vma_mapped_walk_done(&pvmw);  			pra->vm_flags |= VM_LOCKED;  			return false; /* To break the loop */ @@ -795,10 +824,10 @@ static bool page_referenced_one(struct page *page, struct vm_area_struct *vma,  				/*  				 * Don't treat a reference through  				 * a sequentially read mapping as such. -				 * If the page has been used in another mapping, +				 * If the folio has been used in another mapping,  				 * we will catch it; if this other mapping is  				 * already gone, the unmap path will have set -				 * PG_referenced or activated the page. +				 * the referenced flag or activated the folio.  				 */  				if (likely(!(vma->vm_flags & VM_SEQ_READ)))  					referenced++; @@ -808,7 +837,7 @@ static bool page_referenced_one(struct page *page, struct vm_area_struct *vma,  						pvmw.pmd))  				referenced++;  		} else { -			/* unexpected pmd-mapped page? */ +			/* unexpected pmd-mapped folio? */  			WARN_ON_ONCE(1);  		} @@ -816,13 +845,13 @@ static bool page_referenced_one(struct page *page, struct vm_area_struct *vma,  	}  	if (referenced) -		clear_page_idle(page); -	if (test_and_clear_page_young(page)) +		folio_clear_idle(folio); +	if (folio_test_clear_young(folio))  		referenced++;  	if (referenced) {  		pra->referenced++; -		pra->vm_flags |= vma->vm_flags; +		pra->vm_flags |= vma->vm_flags & ~VM_LOCKED;  	}  	if (!pra->mapcount) @@ -831,9 +860,9 @@ static bool page_referenced_one(struct page *page, struct vm_area_struct *vma,  	return true;  } -static bool invalid_page_referenced_vma(struct vm_area_struct *vma, void *arg) +static bool invalid_folio_referenced_vma(struct vm_area_struct *vma, void *arg)  { -	struct page_referenced_arg *pra = arg; +	struct folio_referenced_arg *pra = arg;  	struct mem_cgroup *memcg = pra->memcg;  	if (!mm_match_cgroup(vma->vm_mm, memcg)) @@ -843,40 +872,39 @@ static bool invalid_page_referenced_vma(struct vm_area_struct *vma, void *arg)  }  /** - * page_referenced - test if the page was referenced - * @page: the page to test - * @is_locked: caller holds lock on the page + * folio_referenced() - Test if the folio was referenced. + * @folio: The folio to test. + * @is_locked: Caller holds lock on the folio.   * @memcg: target memory cgroup - * @vm_flags: collect encountered vma->vm_flags who actually referenced the page + * @vm_flags: A combination of all the vma->vm_flags which referenced the folio. + * + * Quick test_and_clear_referenced for all mappings of a folio,   * - * Quick test_and_clear_referenced for all mappings to a page, - * returns the number of ptes which referenced the page. + * Return: The number of mappings which referenced the folio.   */ -int page_referenced(struct page *page, -		    int is_locked, -		    struct mem_cgroup *memcg, -		    unsigned long *vm_flags) +int folio_referenced(struct folio *folio, int is_locked, +		     struct mem_cgroup *memcg, unsigned long *vm_flags)  {  	int we_locked = 0; -	struct page_referenced_arg pra = { -		.mapcount = total_mapcount(page), +	struct folio_referenced_arg pra = { +		.mapcount = folio_mapcount(folio),  		.memcg = memcg,  	};  	struct rmap_walk_control rwc = { -		.rmap_one = page_referenced_one, +		.rmap_one = folio_referenced_one,  		.arg = (void *)&pra, -		.anon_lock = page_lock_anon_vma_read, +		.anon_lock = folio_lock_anon_vma_read,  	};  	*vm_flags = 0;  	if (!pra.mapcount)  		return 0; -	if (!page_rmapping(page)) +	if (!folio_raw_mapping(folio))  		return 0; -	if (!is_locked && (!PageAnon(page) || PageKsm(page))) { -		we_locked = trylock_page(page); +	if (!is_locked && (!folio_test_anon(folio) || folio_test_ksm(folio))) { +		we_locked = folio_trylock(folio);  		if (!we_locked)  			return 1;  	} @@ -887,37 +915,32 @@ int page_referenced(struct page *page,  	 * cgroups  	 */  	if (memcg) { -		rwc.invalid_vma = invalid_page_referenced_vma; +		rwc.invalid_vma = invalid_folio_referenced_vma;  	} -	rmap_walk(page, &rwc); +	rmap_walk(folio, &rwc);  	*vm_flags = pra.vm_flags;  	if (we_locked) -		unlock_page(page); +		folio_unlock(folio);  	return pra.referenced;  } -static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, +static bool page_mkclean_one(struct folio *folio, struct vm_area_struct *vma,  			    unsigned long address, void *arg)  { -	struct page_vma_mapped_walk pvmw = { -		.page = page, -		.vma = vma, -		.address = address, -		.flags = PVMW_SYNC, -	}; +	DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, PVMW_SYNC);  	struct mmu_notifier_range range;  	int *cleaned = arg;  	/*  	 * We have to assume the worse case ie pmd for invalidation. Note that -	 * the page can not be free from this function. +	 * the folio can not be freed from this function.  	 */  	mmu_notifier_range_init(&range, MMU_NOTIFY_PROTECTION_PAGE,  				0, vma, vma->vm_mm, address, -				vma_address_end(page, vma)); +				vma_address_end(&pvmw));  	mmu_notifier_invalidate_range_start(&range);  	while (page_vma_mapped_walk(&pvmw)) { @@ -945,14 +968,14 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma,  			if (!pmd_dirty(*pmd) && !pmd_write(*pmd))  				continue; -			flush_cache_page(vma, address, page_to_pfn(page)); +			flush_cache_page(vma, address, folio_pfn(folio));  			entry = pmdp_invalidate(vma, address, pmd);  			entry = pmd_wrprotect(entry);  			entry = pmd_mkclean(entry);  			set_pmd_at(vma->vm_mm, address, pmd, entry);  			ret = 1;  #else -			/* unexpected pmd-mapped page? */ +			/* unexpected pmd-mapped folio? */  			WARN_ON_ONCE(1);  #endif  		} @@ -1000,7 +1023,7 @@ int folio_mkclean(struct folio *folio)  	if (!mapping)  		return 0; -	rmap_walk(&folio->page, &rwc); +	rmap_walk(folio, &rwc);  	return cleaned;  } @@ -1028,8 +1051,8 @@ void page_move_anon_rmap(struct page *page, struct vm_area_struct *vma)  	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;  	/*  	 * Ensure that anon_vma and the PAGE_MAPPING_ANON bit are written -	 * simultaneously, so a concurrent reader (eg page_referenced()'s -	 * PageAnon()) will not see one without the other. +	 * simultaneously, so a concurrent reader (eg folio_referenced()'s +	 * folio_test_anon()) will not see one without the other.  	 */  	WRITE_ONCE(page->mapping, (struct address_space *) anon_vma);  } @@ -1079,6 +1102,7 @@ static void __page_set_anon_rmap(struct page *page,  static void __page_check_anon_rmap(struct page *page,  	struct vm_area_struct *vma, unsigned long address)  { +	struct folio *folio = page_folio(page);  	/*  	 * The page's anon-rmap details (mapping and index) are guaranteed to  	 * be set up correctly at this point. @@ -1090,7 +1114,8 @@ static void __page_check_anon_rmap(struct page *page,  	 * are initially only visible via the pagetables, and the pte is locked  	 * over the call to page_add_new_anon_rmap.  	 */ -	VM_BUG_ON_PAGE(page_anon_vma(page)->root != vma->anon_vma->root, page); +	VM_BUG_ON_FOLIO(folio_anon_vma(folio)->root != vma->anon_vma->root, +			folio);  	VM_BUG_ON_PAGE(page_to_pgoff(page) != linear_page_index(vma, address),  		       page);  } @@ -1152,17 +1177,17 @@ void do_page_add_anon_rmap(struct page *page,  		__mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);  	} -	if (unlikely(PageKsm(page))) { +	if (unlikely(PageKsm(page)))  		unlock_page_memcg(page); -		return; -	}  	/* address might be in next vma when migration races vma_adjust */ -	if (first) +	else if (first)  		__page_set_anon_rmap(page, vma, address,  				flags & RMAP_EXCLUSIVE);  	else  		__page_check_anon_rmap(page, vma, address); + +	mlock_vma_page(page, vma, compound);  }  /** @@ -1187,8 +1212,7 @@ void page_add_new_anon_rmap(struct page *page,  		VM_BUG_ON_PAGE(!PageTransHuge(page), page);  		/* increment count (starts at -1) */  		atomic_set(compound_mapcount_ptr(page), 0); -		if (hpage_pincount_available(page)) -			atomic_set(compound_pincount_ptr(page), 0); +		atomic_set(compound_pincount_ptr(page), 0);  		__mod_lruvec_page_state(page, NR_ANON_THPS, nr);  	} else { @@ -1203,12 +1227,14 @@ void page_add_new_anon_rmap(struct page *page,  /**   * page_add_file_rmap - add pte mapping to a file page - * @page: the page to add the mapping to - * @compound: charge the page as compound or small page + * @page:	the page to add the mapping to + * @vma:	the vm area in which the mapping is added + * @compound:	charge the page as compound or small page   *   * The caller needs to hold the pte lock.   */ -void page_add_file_rmap(struct page *page, bool compound) +void page_add_file_rmap(struct page *page, +	struct vm_area_struct *vma, bool compound)  {  	int i, nr = 1; @@ -1223,6 +1249,17 @@ void page_add_file_rmap(struct page *page, bool compound)  		}  		if (!atomic_inc_and_test(compound_mapcount_ptr(page)))  			goto out; + +		/* +		 * It is racy to ClearPageDoubleMap in page_remove_file_rmap(); +		 * but page lock is held by all page_add_file_rmap() compound +		 * callers, and SetPageDoubleMap below warns if !PageLocked: +		 * so here is a place that DoubleMap can be safely cleared. +		 */ +		VM_WARN_ON_ONCE(!PageLocked(page)); +		if (nr == nr_pages && PageDoubleMap(page)) +			ClearPageDoubleMap(page); +  		if (PageSwapBacked(page))  			__mod_lruvec_page_state(page, NR_SHMEM_PMDMAPPED,  						nr_pages); @@ -1231,13 +1268,8 @@ void page_add_file_rmap(struct page *page, bool compound)  						nr_pages);  	} else {  		if (PageTransCompound(page) && page_mapping(page)) { -			struct page *head = compound_head(page); -  			VM_WARN_ON_ONCE(!PageLocked(page)); - -			SetPageDoubleMap(head); -			if (PageMlocked(page)) -				clear_page_mlock(head); +			SetPageDoubleMap(compound_head(page));  		}  		if (!atomic_inc_and_test(&page->_mapcount))  			goto out; @@ -1245,6 +1277,8 @@ void page_add_file_rmap(struct page *page, bool compound)  	__mod_lruvec_page_state(page, NR_FILE_MAPPED, nr);  out:  	unlock_page_memcg(page); + +	mlock_vma_page(page, vma, compound);  }  static void page_remove_file_rmap(struct page *page, bool compound) @@ -1287,9 +1321,6 @@ static void page_remove_file_rmap(struct page *page, bool compound)  	 * pte lock(a spinlock) is held, which implies preemption disabled.  	 */  	__mod_lruvec_page_state(page, NR_FILE_MAPPED, -nr); - -	if (unlikely(PageMlocked(page))) -		clear_page_mlock(page);  }  static void page_remove_anon_compound_rmap(struct page *page) @@ -1329,9 +1360,6 @@ static void page_remove_anon_compound_rmap(struct page *page)  		nr = thp_nr_pages(page);  	} -	if (unlikely(PageMlocked(page))) -		clear_page_mlock(page); -  	if (nr)  		__mod_lruvec_page_state(page, NR_ANON_MAPPED, -nr);  } @@ -1339,11 +1367,13 @@ static void page_remove_anon_compound_rmap(struct page *page)  /**   * page_remove_rmap - take down pte mapping from a page   * @page:	page to remove mapping from + * @vma:	the vm area from which the mapping is removed   * @compound:	uncharge the page as compound or small page   *   * The caller needs to hold the pte lock.   */ -void page_remove_rmap(struct page *page, bool compound) +void page_remove_rmap(struct page *page, +	struct vm_area_struct *vma, bool compound)  {  	lock_page_memcg(page); @@ -1368,9 +1398,6 @@ void page_remove_rmap(struct page *page, bool compound)  	 */  	__dec_lruvec_page_state(page, NR_ANON_MAPPED); -	if (unlikely(PageMlocked(page))) -		clear_page_mlock(page); -  	if (PageTransCompound(page))  		deferred_split_huge_page(compound_head(page)); @@ -1385,20 +1412,18 @@ void page_remove_rmap(struct page *page, bool compound)  	 */  out:  	unlock_page_memcg(page); + +	munlock_vma_page(page, vma, compound);  }  /*   * @arg: enum ttu_flags will be passed to this argument   */ -static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, +static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma,  		     unsigned long address, void *arg)  {  	struct mm_struct *mm = vma->vm_mm; -	struct page_vma_mapped_walk pvmw = { -		.page = page, -		.vma = vma, -		.address = address, -	}; +	DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);  	pte_t pteval;  	struct page *subpage;  	bool ret = true; @@ -1415,21 +1440,20 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,  		pvmw.flags = PVMW_SYNC;  	if (flags & TTU_SPLIT_HUGE_PMD) -		split_huge_pmd_address(vma, address, false, page); +		split_huge_pmd_address(vma, address, false, folio);  	/*  	 * For THP, we have to assume the worse case ie pmd for invalidation.  	 * For hugetlb, it could be much worse if we need to do pud  	 * invalidation in the case of pmd sharing.  	 * -	 * Note that the page can not be free in this function as call of -	 * try_to_unmap() must hold a reference on the page. +	 * Note that the folio can not be freed in this function as call of +	 * try_to_unmap() must hold a reference on the folio.  	 */ -	range.end = PageKsm(page) ? -			address + PAGE_SIZE : vma_address_end(page, vma); +	range.end = vma_address_end(&pvmw);  	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,  				address, range.end); -	if (PageHuge(page)) { +	if (folio_test_hugetlb(folio)) {  		/*  		 * If sharing is possible, start and end will be adjusted  		 * accordingly. @@ -1440,32 +1464,26 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,  	mmu_notifier_invalidate_range_start(&range);  	while (page_vma_mapped_walk(&pvmw)) { +		/* Unexpected PMD-mapped THP? */ +		VM_BUG_ON_FOLIO(!pvmw.pte, folio); +  		/* -		 * If the page is mlock()d, we cannot swap it out. +		 * If the folio is in an mlock()d vma, we must not swap it out.  		 */  		if (!(flags & TTU_IGNORE_MLOCK) &&  		    (vma->vm_flags & VM_LOCKED)) { -			/* -			 * PTE-mapped THP are never marked as mlocked: so do -			 * not set it on a DoubleMap THP, nor on an Anon THP -			 * (which may still be PTE-mapped after DoubleMap was -			 * cleared).  But stop unmapping even in those cases. -			 */ -			if (!PageTransCompound(page) || (PageHead(page) && -			     !PageDoubleMap(page) && !PageAnon(page))) -				mlock_vma_page(page); +			/* Restore the mlock which got missed */ +			mlock_vma_folio(folio, vma, false);  			page_vma_mapped_walk_done(&pvmw);  			ret = false;  			break;  		} -		/* Unexpected PMD-mapped THP? */ -		VM_BUG_ON_PAGE(!pvmw.pte, page); - -		subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte); +		subpage = folio_page(folio, +					pte_pfn(*pvmw.pte) - folio_pfn(folio));  		address = pvmw.address; -		if (PageHuge(page) && !PageAnon(page)) { +		if (folio_test_hugetlb(folio) && !folio_test_anon(folio)) {  			/*  			 * To call huge_pmd_unshare, i_mmap_rwsem must be  			 * held in write mode.  Caller needs to explicitly @@ -1504,7 +1522,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,  		if (should_defer_flush(mm, flags)) {  			/*  			 * We clear the PTE but do not flush so potentially -			 * a remote CPU could still be writing to the page. +			 * a remote CPU could still be writing to the folio.  			 * If the entry was previously clean then the  			 * architecture must guarantee that a clear->dirty  			 * transition on a cached TLB entry is written through @@ -1517,22 +1535,22 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,  			pteval = ptep_clear_flush(vma, address, pvmw.pte);  		} -		/* Move the dirty bit to the page. Now the pte is gone. */ +		/* Set the dirty flag on the folio now the pte is gone. */  		if (pte_dirty(pteval)) -			set_page_dirty(page); +			folio_mark_dirty(folio);  		/* Update high watermark before we lower rss */  		update_hiwater_rss(mm); -		if (PageHWPoison(page) && !(flags & TTU_IGNORE_HWPOISON)) { +		if (PageHWPoison(subpage) && !(flags & TTU_IGNORE_HWPOISON)) {  			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage)); -			if (PageHuge(page)) { -				hugetlb_count_sub(compound_nr(page), mm); +			if (folio_test_hugetlb(folio)) { +				hugetlb_count_sub(folio_nr_pages(folio), mm);  				set_huge_swap_pte_at(mm, address,  						     pvmw.pte, pteval,  						     vma_mmu_pagesize(vma));  			} else { -				dec_mm_counter(mm, mm_counter(page)); +				dec_mm_counter(mm, mm_counter(&folio->page));  				set_pte_at(mm, address, pvmw.pte, pteval);  			} @@ -1547,18 +1565,19 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,  			 * migration) will not expect userfaults on already  			 * copied pages.  			 */ -			dec_mm_counter(mm, mm_counter(page)); +			dec_mm_counter(mm, mm_counter(&folio->page));  			/* We have to invalidate as we cleared the pte */  			mmu_notifier_invalidate_range(mm, address,  						      address + PAGE_SIZE); -		} else if (PageAnon(page)) { +		} else if (folio_test_anon(folio)) {  			swp_entry_t entry = { .val = page_private(subpage) };  			pte_t swp_pte;  			/*  			 * Store the swap location in the pte.  			 * See handle_pte_fault() ...  			 */ -			if (unlikely(PageSwapBacked(page) != PageSwapCache(page))) { +			if (unlikely(folio_test_swapbacked(folio) != +					folio_test_swapcache(folio))) {  				WARN_ON_ONCE(1);  				ret = false;  				/* We have to invalidate as we cleared the pte */ @@ -1569,8 +1588,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,  			}  			/* MADV_FREE page check */ -			if (!PageSwapBacked(page)) { -				if (!PageDirty(page)) { +			if (!folio_test_swapbacked(folio)) { +				if (!folio_test_dirty(folio)) {  					/* Invalidate as we cleared the pte */  					mmu_notifier_invalidate_range(mm,  						address, address + PAGE_SIZE); @@ -1579,11 +1598,11 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,  				}  				/* -				 * If the page was redirtied, it cannot be +				 * If the folio was redirtied, it cannot be  				 * discarded. Remap the page to page table.  				 */  				set_pte_at(mm, address, pvmw.pte, pteval); -				SetPageSwapBacked(page); +				folio_set_swapbacked(folio);  				ret = false;  				page_vma_mapped_walk_done(&pvmw);  				break; @@ -1620,16 +1639,17 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,  						      address + PAGE_SIZE);  		} else {  			/* -			 * This is a locked file-backed page, thus it cannot -			 * be removed from the page cache and replaced by a new -			 * page before mmu_notifier_invalidate_range_end, so no -			 * concurrent thread might update its page table to -			 * point at new page while a device still is using this -			 * page. +			 * This is a locked file-backed folio, +			 * so it cannot be removed from the page +			 * cache and replaced by a new folio before +			 * mmu_notifier_invalidate_range_end, so no +			 * concurrent thread might update its page table +			 * to point at a new folio while a device is +			 * still using this folio.  			 *  			 * See Documentation/vm/mmu_notifier.rst  			 */ -			dec_mm_counter(mm, mm_counter_file(page)); +			dec_mm_counter(mm, mm_counter_file(&folio->page));  		}  discard:  		/* @@ -1639,8 +1659,10 @@ discard:  		 *  		 * See Documentation/vm/mmu_notifier.rst  		 */ -		page_remove_rmap(subpage, PageHuge(page)); -		put_page(page); +		page_remove_rmap(subpage, vma, folio_test_hugetlb(folio)); +		if (vma->vm_flags & VM_LOCKED) +			mlock_page_drain(smp_processor_id()); +		folio_put(folio);  	}  	mmu_notifier_invalidate_range_end(&range); @@ -1653,35 +1675,35 @@ static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)  	return vma_is_temporary_stack(vma);  } -static int page_not_mapped(struct page *page) +static int page_not_mapped(struct folio *folio)  { -	return !page_mapped(page); +	return !folio_mapped(folio);  }  /** - * try_to_unmap - try to remove all page table mappings to a page - * @page: the page to get unmapped + * try_to_unmap - Try to remove all page table mappings to a folio. + * @folio: The folio to unmap.   * @flags: action and flags   *   * Tries to remove all the page table entries which are mapping this - * page, used in the pageout path.  Caller must hold the page lock. + * folio.  It is the caller's responsibility to check if the folio is + * still mapped if needed (use TTU_SYNC to prevent accounting races).   * - * It is the caller's responsibility to check if the page is still - * mapped when needed (use TTU_SYNC to prevent accounting races). + * Context: Caller must hold the folio lock.   */ -void try_to_unmap(struct page *page, enum ttu_flags flags) +void try_to_unmap(struct folio *folio, enum ttu_flags flags)  {  	struct rmap_walk_control rwc = {  		.rmap_one = try_to_unmap_one,  		.arg = (void *)flags,  		.done = page_not_mapped, -		.anon_lock = page_lock_anon_vma_read, +		.anon_lock = folio_lock_anon_vma_read,  	};  	if (flags & TTU_RMAP_LOCKED) -		rmap_walk_locked(page, &rwc); +		rmap_walk_locked(folio, &rwc);  	else -		rmap_walk(page, &rwc); +		rmap_walk(folio, &rwc);  }  /* @@ -1690,15 +1712,11 @@ void try_to_unmap(struct page *page, enum ttu_flags flags)   * If TTU_SPLIT_HUGE_PMD is specified any PMD mappings will be split into PTEs   * containing migration entries.   */ -static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma, +static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma,  		     unsigned long address, void *arg)  {  	struct mm_struct *mm = vma->vm_mm; -	struct page_vma_mapped_walk pvmw = { -		.page = page, -		.vma = vma, -		.address = address, -	}; +	DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);  	pte_t pteval;  	struct page *subpage;  	bool ret = true; @@ -1719,7 +1737,7 @@ static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma,  	 * TTU_SPLIT_HUGE_PMD and it wants to freeze.  	 */  	if (flags & TTU_SPLIT_HUGE_PMD) -		split_huge_pmd_address(vma, address, true, page); +		split_huge_pmd_address(vma, address, true, folio);  	/*  	 * For THP, we have to assume the worse case ie pmd for invalidation. @@ -1729,11 +1747,10 @@ static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma,  	 * Note that the page can not be free in this function as call of  	 * try_to_unmap() must hold a reference on the page.  	 */ -	range.end = PageKsm(page) ? -			address + PAGE_SIZE : vma_address_end(page, vma); +	range.end = vma_address_end(&pvmw);  	mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, vma->vm_mm,  				address, range.end); -	if (PageHuge(page)) { +	if (folio_test_hugetlb(folio)) {  		/*  		 * If sharing is possible, start and end will be adjusted  		 * accordingly. @@ -1747,21 +1764,24 @@ static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma,  #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION  		/* PMD-mapped THP migration entry */  		if (!pvmw.pte) { -			VM_BUG_ON_PAGE(PageHuge(page) || -				       !PageTransCompound(page), page); +			subpage = folio_page(folio, +				pmd_pfn(*pvmw.pmd) - folio_pfn(folio)); +			VM_BUG_ON_FOLIO(folio_test_hugetlb(folio) || +					!folio_test_pmd_mappable(folio), folio); -			set_pmd_migration_entry(&pvmw, page); +			set_pmd_migration_entry(&pvmw, subpage);  			continue;  		}  #endif  		/* Unexpected PMD-mapped THP? */ -		VM_BUG_ON_PAGE(!pvmw.pte, page); +		VM_BUG_ON_FOLIO(!pvmw.pte, folio); -		subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte); +		subpage = folio_page(folio, +				pte_pfn(*pvmw.pte) - folio_pfn(folio));  		address = pvmw.address; -		if (PageHuge(page) && !PageAnon(page)) { +		if (folio_test_hugetlb(folio) && !folio_test_anon(folio)) {  			/*  			 * To call huge_pmd_unshare, i_mmap_rwsem must be  			 * held in write mode.  Caller needs to explicitly @@ -1799,15 +1819,15 @@ static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma,  		flush_cache_page(vma, address, pte_pfn(*pvmw.pte));  		pteval = ptep_clear_flush(vma, address, pvmw.pte); -		/* Move the dirty bit to the page. Now the pte is gone. */ +		/* Set the dirty flag on the folio now the pte is gone. */  		if (pte_dirty(pteval)) -			set_page_dirty(page); +			folio_mark_dirty(folio);  		/* Update high watermark before we lower rss */  		update_hiwater_rss(mm); -		if (is_zone_device_page(page)) { -			unsigned long pfn = page_to_pfn(page); +		if (folio_is_zone_device(folio)) { +			unsigned long pfn = folio_pfn(folio);  			swp_entry_t entry;  			pte_t swp_pte; @@ -1843,16 +1863,16 @@ static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma,  			 * changed when hugepage migrations to device private  			 * memory are supported.  			 */ -			subpage = page; -		} else if (PageHWPoison(page)) { +			subpage = &folio->page; +		} else if (PageHWPoison(subpage)) {  			pteval = swp_entry_to_pte(make_hwpoison_entry(subpage)); -			if (PageHuge(page)) { -				hugetlb_count_sub(compound_nr(page), mm); +			if (folio_test_hugetlb(folio)) { +				hugetlb_count_sub(folio_nr_pages(folio), mm);  				set_huge_swap_pte_at(mm, address,  						     pvmw.pte, pteval,  						     vma_mmu_pagesize(vma));  			} else { -				dec_mm_counter(mm, mm_counter(page)); +				dec_mm_counter(mm, mm_counter(&folio->page));  				set_pte_at(mm, address, pvmw.pte, pteval);  			} @@ -1867,7 +1887,7 @@ static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma,  			 * migration) will not expect userfaults on already  			 * copied pages.  			 */ -			dec_mm_counter(mm, mm_counter(page)); +			dec_mm_counter(mm, mm_counter(&folio->page));  			/* We have to invalidate as we cleared the pte */  			mmu_notifier_invalidate_range(mm, address,  						      address + PAGE_SIZE); @@ -1913,8 +1933,10 @@ static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma,  		 *  		 * See Documentation/vm/mmu_notifier.rst  		 */ -		page_remove_rmap(subpage, PageHuge(page)); -		put_page(page); +		page_remove_rmap(subpage, vma, folio_test_hugetlb(folio)); +		if (vma->vm_flags & VM_LOCKED) +			mlock_page_drain(smp_processor_id()); +		folio_put(folio);  	}  	mmu_notifier_invalidate_range_end(&range); @@ -1924,19 +1946,19 @@ static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma,  /**   * try_to_migrate - try to replace all page table mappings with swap entries - * @page: the page to replace page table entries for + * @folio: the folio to replace page table entries for   * @flags: action and flags   * - * Tries to remove all the page table entries which are mapping this page and - * replace them with special swap entries. Caller must hold the page lock. + * Tries to remove all the page table entries which are mapping this folio and + * replace them with special swap entries. Caller must hold the folio lock.   */ -void try_to_migrate(struct page *page, enum ttu_flags flags) +void try_to_migrate(struct folio *folio, enum ttu_flags flags)  {  	struct rmap_walk_control rwc = {  		.rmap_one = try_to_migrate_one,  		.arg = (void *)flags,  		.done = page_not_mapped, -		.anon_lock = page_lock_anon_vma_read, +		.anon_lock = folio_lock_anon_vma_read,  	};  	/* @@ -1947,7 +1969,7 @@ void try_to_migrate(struct page *page, enum ttu_flags flags)  					TTU_SYNC)))  		return; -	if (is_zone_device_page(page) && !is_device_private_page(page)) +	if (folio_is_zone_device(folio) && !folio_is_device_private(folio))  		return;  	/* @@ -1958,83 +1980,13 @@ void try_to_migrate(struct page *page, enum ttu_flags flags)  	 * locking requirements of exec(), migration skips  	 * temporary VMAs until after exec() completes.  	 */ -	if (!PageKsm(page) && PageAnon(page)) +	if (!folio_test_ksm(folio) && folio_test_anon(folio))  		rwc.invalid_vma = invalid_migration_vma;  	if (flags & TTU_RMAP_LOCKED) -		rmap_walk_locked(page, &rwc); +		rmap_walk_locked(folio, &rwc);  	else -		rmap_walk(page, &rwc); -} - -/* - * Walks the vma's mapping a page and mlocks the page if any locked vma's are - * found. Once one is found the page is locked and the scan can be terminated. - */ -static bool page_mlock_one(struct page *page, struct vm_area_struct *vma, -				 unsigned long address, void *unused) -{ -	struct page_vma_mapped_walk pvmw = { -		.page = page, -		.vma = vma, -		.address = address, -	}; - -	/* An un-locked vma doesn't have any pages to lock, continue the scan */ -	if (!(vma->vm_flags & VM_LOCKED)) -		return true; - -	while (page_vma_mapped_walk(&pvmw)) { -		/* -		 * Need to recheck under the ptl to serialise with -		 * __munlock_pagevec_fill() after VM_LOCKED is cleared in -		 * munlock_vma_pages_range(). -		 */ -		if (vma->vm_flags & VM_LOCKED) { -			/* -			 * PTE-mapped THP are never marked as mlocked; but -			 * this function is never called on a DoubleMap THP, -			 * nor on an Anon THP (which may still be PTE-mapped -			 * after DoubleMap was cleared). -			 */ -			mlock_vma_page(page); -			/* -			 * No need to scan further once the page is marked -			 * as mlocked. -			 */ -			page_vma_mapped_walk_done(&pvmw); -			return false; -		} -	} - -	return true; -} - -/** - * page_mlock - try to mlock a page - * @page: the page to be mlocked - * - * Called from munlock code. Checks all of the VMAs mapping the page and mlocks - * the page if any are found. The page will be returned with PG_mlocked cleared - * if it is not mapped by any locked vmas. - */ -void page_mlock(struct page *page) -{ -	struct rmap_walk_control rwc = { -		.rmap_one = page_mlock_one, -		.done = page_not_mapped, -		.anon_lock = page_lock_anon_vma_read, - -	}; - -	VM_BUG_ON_PAGE(!PageLocked(page) || PageLRU(page), page); -	VM_BUG_ON_PAGE(PageCompound(page) && PageDoubleMap(page), page); - -	/* Anon THP are only marked as mlocked when singly mapped */ -	if (PageTransCompound(page) && PageAnon(page)) -		return; - -	rmap_walk(page, &rwc); +		rmap_walk(folio, &rwc);  }  #ifdef CONFIG_DEVICE_PRIVATE @@ -2045,15 +1997,11 @@ struct make_exclusive_args {  	bool valid;  }; -static bool page_make_device_exclusive_one(struct page *page, +static bool page_make_device_exclusive_one(struct folio *folio,  		struct vm_area_struct *vma, unsigned long address, void *priv)  {  	struct mm_struct *mm = vma->vm_mm; -	struct page_vma_mapped_walk pvmw = { -		.page = page, -		.vma = vma, -		.address = address, -	}; +	DEFINE_FOLIO_VMA_WALK(pvmw, folio, vma, address, 0);  	struct make_exclusive_args *args = priv;  	pte_t pteval;  	struct page *subpage; @@ -2064,12 +2012,13 @@ static bool page_make_device_exclusive_one(struct page *page,  	mmu_notifier_range_init_owner(&range, MMU_NOTIFY_EXCLUSIVE, 0, vma,  				      vma->vm_mm, address, min(vma->vm_end, -				      address + page_size(page)), args->owner); +				      address + folio_size(folio)), +				      args->owner);  	mmu_notifier_invalidate_range_start(&range);  	while (page_vma_mapped_walk(&pvmw)) {  		/* Unexpected PMD-mapped THP? */ -		VM_BUG_ON_PAGE(!pvmw.pte, page); +		VM_BUG_ON_FOLIO(!pvmw.pte, folio);  		if (!pte_present(*pvmw.pte)) {  			ret = false; @@ -2077,16 +2026,17 @@ static bool page_make_device_exclusive_one(struct page *page,  			break;  		} -		subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte); +		subpage = folio_page(folio, +				pte_pfn(*pvmw.pte) - folio_pfn(folio));  		address = pvmw.address;  		/* Nuke the page table entry. */  		flush_cache_page(vma, address, pte_pfn(*pvmw.pte));  		pteval = ptep_clear_flush(vma, address, pvmw.pte); -		/* Move the dirty bit to the page. Now the pte is gone. */ +		/* Set the dirty flag on the folio now the pte is gone. */  		if (pte_dirty(pteval)) -			set_page_dirty(page); +			folio_mark_dirty(folio);  		/*  		 * Check that our target page is still mapped at the expected @@ -2119,7 +2069,7 @@ static bool page_make_device_exclusive_one(struct page *page,  		 * There is a reference on the page for the swap entry which has  		 * been removed, so shouldn't take another.  		 */ -		page_remove_rmap(subpage, false); +		page_remove_rmap(subpage, vma, false);  	}  	mmu_notifier_invalidate_range_end(&range); @@ -2128,21 +2078,22 @@ static bool page_make_device_exclusive_one(struct page *page,  }  /** - * page_make_device_exclusive - mark the page exclusively owned by a device - * @page: the page to replace page table entries for - * @mm: the mm_struct where the page is expected to be mapped - * @address: address where the page is expected to be mapped + * folio_make_device_exclusive - Mark the folio exclusively owned by a device. + * @folio: The folio to replace page table entries for. + * @mm: The mm_struct where the folio is expected to be mapped. + * @address: Address where the folio is expected to be mapped.   * @owner: passed to MMU_NOTIFY_EXCLUSIVE range notifier callbacks   * - * Tries to remove all the page table entries which are mapping this page and - * replace them with special device exclusive swap entries to grant a device - * exclusive access to the page. Caller must hold the page lock. + * Tries to remove all the page table entries which are mapping this + * folio and replace them with special device exclusive swap entries to + * grant a device exclusive access to the folio.   * - * Returns false if the page is still mapped, or if it could not be unmapped + * Context: Caller must hold the folio lock. + * Return: false if the page is still mapped, or if it could not be unmapped   * from the expected address. Otherwise returns true (success).   */ -static bool page_make_device_exclusive(struct page *page, struct mm_struct *mm, -				unsigned long address, void *owner) +static bool folio_make_device_exclusive(struct folio *folio, +		struct mm_struct *mm, unsigned long address, void *owner)  {  	struct make_exclusive_args args = {  		.mm = mm, @@ -2153,21 +2104,20 @@ static bool page_make_device_exclusive(struct page *page, struct mm_struct *mm,  	struct rmap_walk_control rwc = {  		.rmap_one = page_make_device_exclusive_one,  		.done = page_not_mapped, -		.anon_lock = page_lock_anon_vma_read, +		.anon_lock = folio_lock_anon_vma_read,  		.arg = &args,  	};  	/* -	 * Restrict to anonymous pages for now to avoid potential writeback -	 * issues. Also tail pages shouldn't be passed to rmap_walk so skip -	 * those. +	 * Restrict to anonymous folios for now to avoid potential writeback +	 * issues.  	 */ -	if (!PageAnon(page) || PageTail(page)) +	if (!folio_test_anon(folio))  		return false; -	rmap_walk(page, &rwc); +	rmap_walk(folio, &rwc); -	return args.valid && !page_mapcount(page); +	return args.valid && !folio_mapcount(folio);  }  /** @@ -2205,15 +2155,16 @@ int make_device_exclusive_range(struct mm_struct *mm, unsigned long start,  		return npages;  	for (i = 0; i < npages; i++, start += PAGE_SIZE) { -		if (!trylock_page(pages[i])) { -			put_page(pages[i]); +		struct folio *folio = page_folio(pages[i]); +		if (PageTail(pages[i]) || !folio_trylock(folio)) { +			folio_put(folio);  			pages[i] = NULL;  			continue;  		} -		if (!page_make_device_exclusive(pages[i], mm, start, owner)) { -			unlock_page(pages[i]); -			put_page(pages[i]); +		if (!folio_make_device_exclusive(folio, mm, start, owner)) { +			folio_unlock(folio); +			folio_put(folio);  			pages[i] = NULL;  		}  	} @@ -2232,21 +2183,21 @@ void __put_anon_vma(struct anon_vma *anon_vma)  		anon_vma_free(root);  } -static struct anon_vma *rmap_walk_anon_lock(struct page *page, -					struct rmap_walk_control *rwc) +static struct anon_vma *rmap_walk_anon_lock(struct folio *folio, +					const struct rmap_walk_control *rwc)  {  	struct anon_vma *anon_vma;  	if (rwc->anon_lock) -		return rwc->anon_lock(page); +		return rwc->anon_lock(folio);  	/* -	 * Note: remove_migration_ptes() cannot use page_lock_anon_vma_read() +	 * Note: remove_migration_ptes() cannot use folio_lock_anon_vma_read()  	 * because that depends on page_mapped(); but not all its usages  	 * are holding mmap_lock. Users without mmap_lock are required to  	 * take a reference count to prevent the anon_vma disappearing  	 */ -	anon_vma = page_anon_vma(page); +	anon_vma = folio_anon_vma(folio);  	if (!anon_vma)  		return NULL; @@ -2262,35 +2213,30 @@ static struct anon_vma *rmap_walk_anon_lock(struct page *page,   *   * Find all the mappings of a page using the mapping pointer and the vma chains   * contained in the anon_vma struct it points to. - * - * When called from page_mlock(), the mmap_lock of the mm containing the vma - * where the page was found will be held for write.  So, we won't recheck - * vm_flags for that VMA.  That should be OK, because that vma shouldn't be - * LOCKED.   */ -static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc, -		bool locked) +static void rmap_walk_anon(struct folio *folio, +		const struct rmap_walk_control *rwc, bool locked)  {  	struct anon_vma *anon_vma;  	pgoff_t pgoff_start, pgoff_end;  	struct anon_vma_chain *avc;  	if (locked) { -		anon_vma = page_anon_vma(page); +		anon_vma = folio_anon_vma(folio);  		/* anon_vma disappear under us? */ -		VM_BUG_ON_PAGE(!anon_vma, page); +		VM_BUG_ON_FOLIO(!anon_vma, folio);  	} else { -		anon_vma = rmap_walk_anon_lock(page, rwc); +		anon_vma = rmap_walk_anon_lock(folio, rwc);  	}  	if (!anon_vma)  		return; -	pgoff_start = page_to_pgoff(page); -	pgoff_end = pgoff_start + thp_nr_pages(page) - 1; +	pgoff_start = folio_pgoff(folio); +	pgoff_end = pgoff_start + folio_nr_pages(folio) - 1;  	anon_vma_interval_tree_foreach(avc, &anon_vma->rb_root,  			pgoff_start, pgoff_end) {  		struct vm_area_struct *vma = avc->vma; -		unsigned long address = vma_address(page, vma); +		unsigned long address = vma_address(&folio->page, vma);  		VM_BUG_ON_VMA(address == -EFAULT, vma);  		cond_resched(); @@ -2298,9 +2244,9 @@ static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,  		if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))  			continue; -		if (!rwc->rmap_one(page, vma, address, rwc->arg)) +		if (!rwc->rmap_one(folio, vma, address, rwc->arg))  			break; -		if (rwc->done && rwc->done(page)) +		if (rwc->done && rwc->done(folio))  			break;  	} @@ -2315,16 +2261,11 @@ static void rmap_walk_anon(struct page *page, struct rmap_walk_control *rwc,   *   * Find all the mappings of a page using the mapping pointer and the vma chains   * contained in the address_space struct it points to. - * - * When called from page_mlock(), the mmap_lock of the mm containing the vma - * where the page was found will be held for write.  So, we won't recheck - * vm_flags for that VMA.  That should be OK, because that vma shouldn't be - * LOCKED.   */ -static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc, -		bool locked) +static void rmap_walk_file(struct folio *folio, +		const struct rmap_walk_control *rwc, bool locked)  { -	struct address_space *mapping = page_mapping(page); +	struct address_space *mapping = folio_mapping(folio);  	pgoff_t pgoff_start, pgoff_end;  	struct vm_area_struct *vma; @@ -2334,18 +2275,18 @@ static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc,  	 * structure at mapping cannot be freed and reused yet,  	 * so we can safely take mapping->i_mmap_rwsem.  	 */ -	VM_BUG_ON_PAGE(!PageLocked(page), page); +	VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);  	if (!mapping)  		return; -	pgoff_start = page_to_pgoff(page); -	pgoff_end = pgoff_start + thp_nr_pages(page) - 1; +	pgoff_start = folio_pgoff(folio); +	pgoff_end = pgoff_start + folio_nr_pages(folio) - 1;  	if (!locked)  		i_mmap_lock_read(mapping);  	vma_interval_tree_foreach(vma, &mapping->i_mmap,  			pgoff_start, pgoff_end) { -		unsigned long address = vma_address(page, vma); +		unsigned long address = vma_address(&folio->page, vma);  		VM_BUG_ON_VMA(address == -EFAULT, vma);  		cond_resched(); @@ -2353,9 +2294,9 @@ static void rmap_walk_file(struct page *page, struct rmap_walk_control *rwc,  		if (rwc->invalid_vma && rwc->invalid_vma(vma, rwc->arg))  			continue; -		if (!rwc->rmap_one(page, vma, address, rwc->arg)) +		if (!rwc->rmap_one(folio, vma, address, rwc->arg))  			goto done; -		if (rwc->done && rwc->done(page)) +		if (rwc->done && rwc->done(folio))  			goto done;  	} @@ -2364,25 +2305,25 @@ done:  		i_mmap_unlock_read(mapping);  } -void rmap_walk(struct page *page, struct rmap_walk_control *rwc) +void rmap_walk(struct folio *folio, const struct rmap_walk_control *rwc)  { -	if (unlikely(PageKsm(page))) -		rmap_walk_ksm(page, rwc); -	else if (PageAnon(page)) -		rmap_walk_anon(page, rwc, false); +	if (unlikely(folio_test_ksm(folio))) +		rmap_walk_ksm(folio, rwc); +	else if (folio_test_anon(folio)) +		rmap_walk_anon(folio, rwc, false);  	else -		rmap_walk_file(page, rwc, false); +		rmap_walk_file(folio, rwc, false);  }  /* Like rmap_walk, but caller holds relevant rmap lock */ -void rmap_walk_locked(struct page *page, struct rmap_walk_control *rwc) +void rmap_walk_locked(struct folio *folio, const struct rmap_walk_control *rwc)  {  	/* no ksm support for now */ -	VM_BUG_ON_PAGE(PageKsm(page), page); -	if (PageAnon(page)) -		rmap_walk_anon(page, rwc, true); +	VM_BUG_ON_FOLIO(folio_test_ksm(folio), folio); +	if (folio_test_anon(folio)) +		rmap_walk_anon(folio, rwc, true);  	else -		rmap_walk_file(page, rwc, true); +		rmap_walk_file(folio, rwc, true);  }  #ifdef CONFIG_HUGETLB_PAGE @@ -2410,8 +2351,7 @@ void hugepage_add_new_anon_rmap(struct page *page,  {  	BUG_ON(address < vma->vm_start || address >= vma->vm_end);  	atomic_set(compound_mapcount_ptr(page), 0); -	if (hpage_pincount_available(page)) -		atomic_set(compound_pincount_ptr(page), 0); +	atomic_set(compound_pincount_ptr(page), 0);  	__page_set_anon_rmap(page, vma, address, 1);  }  |