diff options
Diffstat (limited to 'mm/internal.h')
| -rw-r--r-- | mm/internal.h | 205 | 
1 files changed, 156 insertions, 49 deletions
diff --git a/mm/internal.h b/mm/internal.h index 07ad2675a88b..2adabe369403 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -11,6 +11,8 @@  #include <linux/mm.h>  #include <linux/pagemap.h>  #include <linux/rmap.h> +#include <linux/swap.h> +#include <linux/swapops.h>  #include <linux/tracepoint-defs.h>  struct folio_batch; @@ -70,13 +72,30 @@ void page_writeback_init(void);  /*   * How many individual pages have an elevated _mapcount.  Excludes   * the folio's entire_mapcount. + * + * Don't use this function outside of debugging code.   */ -static inline int folio_nr_pages_mapped(struct folio *folio) +static inline int folio_nr_pages_mapped(const struct folio *folio)  {  	return atomic_read(&folio->_nr_pages_mapped) & FOLIO_PAGES_MAPPED;  } -static inline void *folio_raw_mapping(struct folio *folio) +/* + * Retrieve the first entry of a folio based on a provided entry within the + * folio. We cannot rely on folio->swap as there is no guarantee that it has + * been initialized. Used for calling arch_swap_restore() + */ +static inline swp_entry_t folio_swap(swp_entry_t entry, +		const struct folio *folio) +{ +	swp_entry_t swap = { +		.val = ALIGN_DOWN(entry.val, folio_nr_pages(folio)), +	}; + +	return swap; +} + +static inline void *folio_raw_mapping(const struct folio *folio)  {  	unsigned long mapping = (unsigned long)folio->mapping; @@ -113,6 +132,10 @@ static inline pte_t __pte_batch_clear_ignored(pte_t pte, fpb_t flags)   * @flags: Flags to modify the PTE batch semantics.   * @any_writable: Optional pointer to indicate whether any entry except the   *		  first one is writable. + * @any_young: Optional pointer to indicate whether any entry except the + *		  first one is young. + * @any_dirty: Optional pointer to indicate whether any entry except the + *		  first one is dirty.   *   * Detect a PTE batch: consecutive (present) PTEs that map consecutive   * pages of the same large folio. @@ -128,16 +151,20 @@ static inline pte_t __pte_batch_clear_ignored(pte_t pte, fpb_t flags)   */  static inline int folio_pte_batch(struct folio *folio, unsigned long addr,  		pte_t *start_ptep, pte_t pte, int max_nr, fpb_t flags, -		bool *any_writable) +		bool *any_writable, bool *any_young, bool *any_dirty)  {  	unsigned long folio_end_pfn = folio_pfn(folio) + folio_nr_pages(folio);  	const pte_t *end_ptep = start_ptep + max_nr;  	pte_t expected_pte, *ptep; -	bool writable; +	bool writable, young, dirty;  	int nr;  	if (any_writable)  		*any_writable = false; +	if (any_young) +		*any_young = false; +	if (any_dirty) +		*any_dirty = false;  	VM_WARN_ON_FOLIO(!pte_present(pte), folio);  	VM_WARN_ON_FOLIO(!folio_test_large(folio) || max_nr < 1, folio); @@ -151,6 +178,10 @@ static inline int folio_pte_batch(struct folio *folio, unsigned long addr,  		pte = ptep_get(ptep);  		if (any_writable)  			writable = !!pte_write(pte); +		if (any_young) +			young = !!pte_young(pte); +		if (any_dirty) +			dirty = !!pte_dirty(pte);  		pte = __pte_batch_clear_ignored(pte, flags);  		if (!pte_same(pte, expected_pte)) @@ -166,6 +197,10 @@ static inline int folio_pte_batch(struct folio *folio, unsigned long addr,  		if (any_writable)  			*any_writable |= writable; +		if (any_young) +			*any_young |= young; +		if (any_dirty) +			*any_dirty |= dirty;  		nr = pte_batch_hint(ptep, pte);  		expected_pte = pte_advance_pfn(expected_pte, nr); @@ -174,6 +209,68 @@ static inline int folio_pte_batch(struct folio *folio, unsigned long addr,  	return min(ptep - start_ptep, max_nr);  } + +/** + * pte_next_swp_offset - Increment the swap entry offset field of a swap pte. + * @pte: The initial pte state; is_swap_pte(pte) must be true and + *	 non_swap_entry() must be false. + * + * Increments the swap offset, while maintaining all other fields, including + * swap type, and any swp pte bits. The resulting pte is returned. + */ +static inline pte_t pte_next_swp_offset(pte_t pte) +{ +	swp_entry_t entry = pte_to_swp_entry(pte); +	pte_t new = __swp_entry_to_pte(__swp_entry(swp_type(entry), +						   (swp_offset(entry) + 1))); + +	if (pte_swp_soft_dirty(pte)) +		new = pte_swp_mksoft_dirty(new); +	if (pte_swp_exclusive(pte)) +		new = pte_swp_mkexclusive(new); +	if (pte_swp_uffd_wp(pte)) +		new = pte_swp_mkuffd_wp(new); + +	return new; +} + +/** + * swap_pte_batch - detect a PTE batch for a set of contiguous swap entries + * @start_ptep: Page table pointer for the first entry. + * @max_nr: The maximum number of table entries to consider. + * @pte: Page table entry for the first entry. + * + * Detect a batch of contiguous swap entries: consecutive (non-present) PTEs + * containing swap entries all with consecutive offsets and targeting the same + * swap type, all with matching swp pte bits. + * + * max_nr must be at least one and must be limited by the caller so scanning + * cannot exceed a single page table. + * + * Return: the number of table entries in the batch. + */ +static inline int swap_pte_batch(pte_t *start_ptep, int max_nr, pte_t pte) +{ +	pte_t expected_pte = pte_next_swp_offset(pte); +	const pte_t *end_ptep = start_ptep + max_nr; +	pte_t *ptep = start_ptep + 1; + +	VM_WARN_ON(max_nr < 1); +	VM_WARN_ON(!is_swap_pte(pte)); +	VM_WARN_ON(non_swap_entry(pte_to_swp_entry(pte))); + +	while (ptep < end_ptep) { +		pte = ptep_get(ptep); + +		if (!pte_same(pte, expected_pte)) +			break; + +		expected_pte = pte_next_swp_offset(expected_pte); +		ptep++; +	} + +	return ptep - start_ptep; +}  #endif /* CONFIG_MMU */  void __acct_reclaim_writeback(pg_data_t *pgdat, struct folio *folio, @@ -491,6 +588,7 @@ extern void __putback_isolated_page(struct page *page, unsigned int order,  extern void memblock_free_pages(struct page *page, unsigned long pfn,  					unsigned int order);  extern void __free_pages_core(struct page *page, unsigned int order); +extern void kernel_init_pages(struct page *page, int numpages);  /*   * This will have no effect, other than possibly generating a warning, if the @@ -513,7 +611,8 @@ static inline struct folio *page_rmappable_folio(struct page *page)  {  	struct folio *folio = (struct folio *)page; -	folio_prep_large_rmappable(folio); +	if (folio && folio_test_large(folio)) +		folio_set_large_rmappable(folio);  	return folio;  } @@ -522,9 +621,12 @@ static inline void prep_compound_head(struct page *page, unsigned int order)  	struct folio *folio = (struct folio *)page;  	folio_set_order(folio, order); +	atomic_set(&folio->_large_mapcount, -1);  	atomic_set(&folio->_entire_mapcount, -1);  	atomic_set(&folio->_nr_pages_mapped, 0);  	atomic_set(&folio->_pincount, 0); +	if (order > 1) +		INIT_LIST_HEAD(&folio->_deferred_list);  }  static inline void prep_compound_tail(struct page *head, int tail_idx) @@ -559,10 +661,6 @@ extern void *memmap_alloc(phys_addr_t size, phys_addr_t align,  void memmap_init_range(unsigned long, int, unsigned long, unsigned long,  		unsigned long, enum meminit_context, struct vmem_altmap *, int); - -int split_free_page(struct page *free_page, -			unsigned int order, unsigned long split_pfn_offset); -  #if defined CONFIG_COMPACTION || defined CONFIG_CMA  /* @@ -789,13 +887,17 @@ void mlock_drain_remote(int cpu);  extern pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma); -/* - * Return the start of user virtual address at the specific offset within - * a vma. +/** + * vma_address - Find the virtual address a page range is mapped at + * @vma: The vma which maps this object. + * @pgoff: The page offset within its object. + * @nr_pages: The number of pages to consider. + * + * If any page in this range is mapped by this VMA, return the first address + * where any of these pages appear.  Otherwise, return -EFAULT.   */ -static inline unsigned long -vma_pgoff_address(pgoff_t pgoff, unsigned long nr_pages, -		  struct vm_area_struct *vma) +static inline unsigned long vma_address(struct vm_area_struct *vma, +		pgoff_t pgoff, unsigned long nr_pages)  {  	unsigned long address; @@ -815,18 +917,6 @@ vma_pgoff_address(pgoff_t pgoff, unsigned long nr_pages,  }  /* - * Return the start of user virtual address of a page within a vma. - * Returns -EFAULT if all of the page is outside the range of vma. - * If page is a compound head, the entire compound page is considered. - */ -static inline unsigned long -vma_address(struct page *page, struct vm_area_struct *vma) -{ -	VM_BUG_ON_PAGE(PageKsm(page), page);	/* KSM page->index unusable */ -	return vma_pgoff_address(page_to_pgoff(page), compound_nr(page), vma); -} - -/*   * Then at what user virtual address will none of the range be found in vma?   * Assumes that vma_address() already returned a good starting address.   */ @@ -947,6 +1037,7 @@ static inline int find_next_best_node(int node, nodemask_t *used_node_mask)  /*   * mm/memory-failure.c   */ +void shake_folio(struct folio *folio);  extern int hwpoison_filter(struct page *p);  extern u32 hwpoison_filter_dev_major; @@ -961,7 +1052,7 @@ extern unsigned long  __must_check vm_mmap_pgoff(struct file *, unsigned long,          unsigned long, unsigned long);  extern void set_pageblock_order(void); -unsigned long reclaim_pages(struct list_head *folio_list, bool ignore_references); +unsigned long reclaim_pages(struct list_head *folio_list);  unsigned int reclaim_clean_pages_from_list(struct zone *zone,  					    struct list_head *folio_list);  /* The ALLOC_WMARK bits are used as an index to zone->watermark */ @@ -1040,17 +1131,13 @@ static inline bool is_migrate_highatomic(enum migratetype migratetype)  	return migratetype == MIGRATE_HIGHATOMIC;  } -static inline bool is_migrate_highatomic_page(struct page *page) -{ -	return get_pageblock_migratetype(page) == MIGRATE_HIGHATOMIC; -} -  void setup_zone_pageset(struct zone *zone);  struct migration_target_control {  	int nid;		/* preferred node id */  	nodemask_t *nmask;  	gfp_t gfp_mask; +	enum migrate_reason reason;  };  /* @@ -1087,10 +1174,10 @@ void vunmap_range_noflush(unsigned long start, unsigned long end);  void __vunmap_range_noflush(unsigned long start, unsigned long end); -int numa_migrate_prep(struct folio *folio, struct vm_area_struct *vma, +int numa_migrate_prep(struct folio *folio, struct vm_fault *vmf,  		      unsigned long addr, int page_nid, int *flags); -void free_zone_device_page(struct page *page); +void free_zone_device_folio(struct folio *folio);  int migrate_device_coherent_page(struct page *page);  /* @@ -1102,9 +1189,10 @@ int __must_check try_grab_page(struct page *page, unsigned int flags);  /*   * mm/huge_memory.c   */ -struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, -				   unsigned long addr, pmd_t *pmd, -				   unsigned int flags); +void touch_pud(struct vm_area_struct *vma, unsigned long addr, +	       pud_t *pud, bool write); +void touch_pmd(struct vm_area_struct *vma, unsigned long addr, +	       pmd_t *pmd, bool write);  /*   * mm/mmap.c @@ -1189,20 +1277,10 @@ static inline bool gup_must_unshare(struct vm_area_struct *vma,  	}  	/* Paired with a memory barrier in folio_try_share_anon_rmap_*(). */ -	if (IS_ENABLED(CONFIG_HAVE_FAST_GUP)) +	if (IS_ENABLED(CONFIG_HAVE_GUP_FAST))  		smp_rmb();  	/* -	 * During GUP-fast we might not get called on the head page for a -	 * hugetlb page that is mapped using cont-PTE, because GUP-fast does -	 * not work with the abstracted hugetlb PTEs that always point at the -	 * head page. For hugetlb, PageAnonExclusive only applies on the head -	 * page (as it cannot be partially COW-shared), so lookup the head page. -	 */ -	if (unlikely(!PageHead(page) && PageHuge(page))) -		page = compound_head(page); - -	/*  	 * Note that PageKsm() pages cannot be exclusive, and consequently,  	 * cannot get pinned.  	 */ @@ -1245,6 +1323,35 @@ static inline void vma_iter_config(struct vma_iterator *vmi,  	__mas_set_range(&vmi->mas, index, last - 1);  } +static inline void vma_iter_reset(struct vma_iterator *vmi) +{ +	mas_reset(&vmi->mas); +} + +static inline +struct vm_area_struct *vma_iter_prev_range_limit(struct vma_iterator *vmi, unsigned long min) +{ +	return mas_prev_range(&vmi->mas, min); +} + +static inline +struct vm_area_struct *vma_iter_next_range_limit(struct vma_iterator *vmi, unsigned long max) +{ +	return mas_next_range(&vmi->mas, max); +} + +static inline int vma_iter_area_lowest(struct vma_iterator *vmi, unsigned long min, +				       unsigned long max, unsigned long size) +{ +	return mas_empty_area(&vmi->mas, min, max - 1, size); +} + +static inline int vma_iter_area_highest(struct vma_iterator *vmi, unsigned long min, +					unsigned long max, unsigned long size) +{ +	return mas_empty_area_rev(&vmi->mas, min, max - 1, size); +} +  /*   * VMA Iterator functions shared between nommu and mmap   */  |