diff options
Diffstat (limited to 'include/linux/mm.h')
-rw-r--r-- | include/linux/mm.h | 235 |
1 files changed, 148 insertions, 87 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h index 0436b919f1c7..9849dfda44d4 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -5,6 +5,7 @@ #include <linux/errno.h> #include <linux/mmdebug.h> #include <linux/gfp.h> +#include <linux/pgalloc_tag.h> #include <linux/bug.h> #include <linux/list.h> #include <linux/mmzone.h> @@ -1199,7 +1200,7 @@ static inline int is_vmalloc_or_module_addr(const void *x) * debugging purposes - it does not include PTE-mapped sub-pages; look * at folio_mapcount() or page_mapcount() instead. */ -static inline int folio_entire_mapcount(struct folio *folio) +static inline int folio_entire_mapcount(const struct folio *folio) { VM_BUG_ON_FOLIO(!folio_test_large(folio), folio); return atomic_read(&folio->_entire_mapcount) + 1; @@ -1223,48 +1224,60 @@ static inline void page_mapcount_reset(struct page *page) * a large folio, it includes the number of times this page is mapped * as part of that folio. * - * The result is undefined for pages which cannot be mapped into userspace. - * For example SLAB or special types of pages. See function page_has_type(). - * They use this field in struct page differently. + * Will report 0 for pages which cannot be mapped into userspace, eg + * slab, page tables and similar. */ static inline int page_mapcount(struct page *page) { int mapcount = atomic_read(&page->_mapcount) + 1; + /* Handle page_has_type() pages */ + if (mapcount < PAGE_MAPCOUNT_RESERVE + 1) + mapcount = 0; if (unlikely(PageCompound(page))) mapcount += folio_entire_mapcount(page_folio(page)); return mapcount; } -int folio_total_mapcount(struct folio *folio); +static inline int folio_large_mapcount(const struct folio *folio) +{ + VM_WARN_ON_FOLIO(!folio_test_large(folio), folio); + return atomic_read(&folio->_large_mapcount) + 1; +} /** - * folio_mapcount() - Calculate the number of mappings of this folio. + * folio_mapcount() - Number of mappings of this folio. * @folio: The folio. * - * A large folio tracks both how many times the entire folio is mapped, - * and how many times each individual page in the folio is mapped. - * This function calculates the total number of times the folio is - * mapped. + * The folio mapcount corresponds to the number of present user page table + * entries that reference any part of a folio. Each such present user page + * table entry must be paired with exactly on folio reference. + * + * For ordindary folios, each user page table entry (PTE/PMD/PUD/...) counts + * exactly once. + * + * For hugetlb folios, each abstracted "hugetlb" user page table entry that + * references the entire folio counts exactly once, even when such special + * page table entries are comprised of multiple ordinary page table entries. + * + * Will report 0 for pages which cannot be mapped into userspace, such as + * slab, page tables and similar. * * Return: The number of times this folio is mapped. */ -static inline int folio_mapcount(struct folio *folio) +static inline int folio_mapcount(const struct folio *folio) { - if (likely(!folio_test_large(folio))) - return atomic_read(&folio->_mapcount) + 1; - return folio_total_mapcount(folio); -} + int mapcount; -static inline bool folio_large_is_mapped(struct folio *folio) -{ - /* - * Reading _entire_mapcount below could be omitted if hugetlb - * participated in incrementing nr_pages_mapped when compound mapped. - */ - return atomic_read(&folio->_nr_pages_mapped) > 0 || - atomic_read(&folio->_entire_mapcount) >= 0; + if (likely(!folio_test_large(folio))) { + mapcount = atomic_read(&folio->_mapcount) + 1; + /* Handle page_has_type() pages */ + if (mapcount < PAGE_MAPCOUNT_RESERVE + 1) + mapcount = 0; + return mapcount; + } + return folio_large_mapcount(folio); } /** @@ -1273,11 +1286,9 @@ static inline bool folio_large_is_mapped(struct folio *folio) * * Return: True if any page in this folio is referenced by user page tables. */ -static inline bool folio_mapped(struct folio *folio) +static inline bool folio_mapped(const struct folio *folio) { - if (likely(!folio_test_large(folio))) - return atomic_read(&folio->_mapcount) >= 0; - return folio_large_is_mapped(folio); + return folio_mapcount(folio) >= 1; } /* @@ -1285,11 +1296,9 @@ static inline bool folio_mapped(struct folio *folio) * For compound page it returns true if any sub-page of compound page is mapped, * even if this particular sub-page is not itself mapped by any PTE or PMD. */ -static inline bool page_mapped(struct page *page) +static inline bool page_mapped(const struct page *page) { - if (likely(!PageCompound(page))) - return atomic_read(&page->_mapcount) >= 0; - return folio_large_is_mapped(page_folio(page)); + return folio_mapped(page_folio(page)); } static inline struct page *virt_to_head_page(const void *x) @@ -1315,8 +1324,6 @@ void folio_copy(struct folio *dst, struct folio *src); unsigned long nr_free_buffer_pages(void); -void destroy_large_folio(struct folio *folio); - /* Returns the number of bytes in this potentially compound page. */ static inline unsigned long page_size(struct page *page) { @@ -1434,27 +1441,22 @@ vm_fault_t finish_fault(struct vm_fault *vmf); #if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_FS_DAX) DECLARE_STATIC_KEY_FALSE(devmap_managed_key); -bool __put_devmap_managed_page_refs(struct page *page, int refs); -static inline bool put_devmap_managed_page_refs(struct page *page, int refs) +bool __put_devmap_managed_folio_refs(struct folio *folio, int refs); +static inline bool put_devmap_managed_folio_refs(struct folio *folio, int refs) { if (!static_branch_unlikely(&devmap_managed_key)) return false; - if (!is_zone_device_page(page)) + if (!folio_is_zone_device(folio)) return false; - return __put_devmap_managed_page_refs(page, refs); + return __put_devmap_managed_folio_refs(folio, refs); } #else /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */ -static inline bool put_devmap_managed_page_refs(struct page *page, int refs) +static inline bool put_devmap_managed_folio_refs(struct folio *folio, int refs) { return false; } #endif /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */ -static inline bool put_devmap_managed_page(struct page *page) -{ - return put_devmap_managed_page_refs(page, 1); -} - /* 127: arbitrary random number, small enough to assemble well */ #define folio_ref_zero_or_close_to_overflow(folio) \ ((unsigned int) folio_ref_count(folio) + 127u <= 127u) @@ -1573,7 +1575,7 @@ static inline void put_page(struct page *page) * For some devmap managed pages we need to catch refcount transition * from 2 to 1: */ - if (put_devmap_managed_page(&folio->page)) + if (put_devmap_managed_folio_refs(folio, 1)) return; folio_put(folio); } @@ -2067,7 +2069,7 @@ static inline void set_page_links(struct page *page, enum zone_type zone, * * Return: A positive power of two. */ -static inline long folio_nr_pages(struct folio *folio) +static inline long folio_nr_pages(const struct folio *folio) { if (!folio_test_large(folio)) return 1; @@ -2162,21 +2164,64 @@ static inline size_t folio_size(struct folio *folio) } /** - * folio_estimated_sharers - Estimate the number of sharers of a folio. + * folio_likely_mapped_shared - Estimate if the folio is mapped into the page + * tables of more than one MM * @folio: The folio. * - * folio_estimated_sharers() aims to serve as a function to efficiently - * estimate the number of processes sharing a folio. This is done by - * looking at the precise mapcount of the first subpage in the folio, and - * assuming the other subpages are the same. This may not be true for large - * folios. If you want exact mapcounts for exact calculations, look at - * page_mapcount() or folio_total_mapcount(). + * This function checks if the folio is currently mapped into more than one + * MM ("mapped shared"), or if the folio is only mapped into a single MM + * ("mapped exclusively"). + * + * As precise information is not easily available for all folios, this function + * estimates the number of MMs ("sharers") that are currently mapping a folio + * using the number of times the first page of the folio is currently mapped + * into page tables. + * + * For small anonymous folios (except KSM folios) and anonymous hugetlb folios, + * the return value will be exactly correct, because they can only be mapped + * at most once into an MM, and they cannot be partially mapped. + * + * For other folios, the result can be fuzzy: + * #. For partially-mappable large folios (THP), the return value can wrongly + * indicate "mapped exclusively" (false negative) when the folio is + * only partially mapped into at least one MM. + * #. For pagecache folios (including hugetlb), the return value can wrongly + * indicate "mapped shared" (false positive) when two VMAs in the same MM + * cover the same file range. + * #. For (small) KSM folios, the return value can wrongly indicate "mapped + * shared" (false positive), when the folio is mapped multiple times into + * the same MM. + * + * Further, this function only considers current page table mappings that + * are tracked using the folio mapcount(s). * - * Return: The estimated number of processes sharing a folio. + * This function does not consider: + * #. If the folio might get mapped in the (near) future (e.g., swapcache, + * pagecache, temporary unmapping for migration). + * #. If the folio is mapped differently (VM_PFNMAP). + * #. If hugetlb page table sharing applies. Callers might want to check + * hugetlb_pmd_shared(). + * + * Return: Whether the folio is estimated to be mapped into more than one MM. */ -static inline int folio_estimated_sharers(struct folio *folio) +static inline bool folio_likely_mapped_shared(struct folio *folio) { - return page_mapcount(folio_page(folio, 0)); + int mapcount = folio_mapcount(folio); + + /* Only partially-mappable folios require more care. */ + if (!folio_test_large(folio) || unlikely(folio_test_hugetlb(folio))) + return mapcount > 1; + + /* A single mapping implies "mapped exclusively". */ + if (mapcount <= 1) + return false; + + /* If any page is mapped more than once we treat it "mapped shared". */ + if (folio_entire_mapcount(folio) || mapcount > folio_nr_pages(folio)) + return true; + + /* Let's guess based on the first subpage. */ + return atomic_read(&folio->_mapcount) > 0; } #ifndef HAVE_ARCH_MAKE_PAGE_ACCESSIBLE @@ -2207,11 +2252,6 @@ static inline int arch_make_folio_accessible(struct folio *folio) */ #include <linux/vmstat.h> -static __always_inline void *lowmem_page_address(const struct page *page) -{ - return page_to_virt(page); -} - #if defined(CONFIG_HIGHMEM) && !defined(WANT_PAGE_VIRTUAL) #define HASHED_PAGE_VIRTUAL #endif @@ -2234,6 +2274,11 @@ void set_page_address(struct page *page, void *virtual); void page_address_init(void); #endif +static __always_inline void *lowmem_page_address(const struct page *page) +{ + return page_to_virt(page); +} + #if !defined(HASHED_PAGE_VIRTUAL) && !defined(WANT_PAGE_VIRTUAL) #define page_address(page) lowmem_page_address(page) #define set_page_address(page, address) do { } while(0) @@ -2391,12 +2436,8 @@ void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling); int copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma); -int follow_pte(struct mm_struct *mm, unsigned long address, +int follow_pte(struct vm_area_struct *vma, unsigned long address, pte_t **ptepp, spinlock_t **ptlp); -int follow_pfn(struct vm_area_struct *vma, unsigned long address, - unsigned long *pfn); -int follow_phys(struct vm_area_struct *vma, unsigned long address, - unsigned int flags, unsigned long *prot, resource_size_t *phys); int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, void *buf, int len, int write); @@ -2550,7 +2591,7 @@ extern unsigned long move_page_tables(struct vm_area_struct *vma, MM_CP_UFFD_WP_RESOLVE) bool vma_needs_dirty_tracking(struct vm_area_struct *vma); -int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot); +bool vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot); static inline bool vma_wants_manual_pte_write_upgrade(struct vm_area_struct *vma) { /* @@ -2857,12 +2898,13 @@ static inline bool pagetable_is_reserved(struct ptdesc *pt) * * Return: The ptdesc describing the allocated page tables. */ -static inline struct ptdesc *pagetable_alloc(gfp_t gfp, unsigned int order) +static inline struct ptdesc *pagetable_alloc_noprof(gfp_t gfp, unsigned int order) { - struct page *page = alloc_pages(gfp | __GFP_COMP, order); + struct page *page = alloc_pages_noprof(gfp | __GFP_COMP, order); return page_ptdesc(page); } +#define pagetable_alloc(...) alloc_hooks(pagetable_alloc_noprof(__VA_ARGS__)) /** * pagetable_free - Free pagetables @@ -3132,6 +3174,14 @@ extern void reserve_bootmem_region(phys_addr_t start, /* Free the reserved page into the buddy system, so it gets managed. */ static inline void free_reserved_page(struct page *page) { + if (mem_alloc_profiling_enabled()) { + union codetag_ref *ref = get_page_tag_ref(page); + + if (ref) { + set_codetag_empty(ref); + put_page_tag_ref(ref); + } + } ClearPageReserved(page); init_page_count(page); __free_page(page); @@ -3193,8 +3243,6 @@ static inline unsigned long get_num_physpages(void) */ void free_area_init(unsigned long *max_zone_pfn); unsigned long node_map_pfn_alignment(void); -unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn, - unsigned long end_pfn); extern unsigned long absent_pages_in_range(unsigned long start_pfn, unsigned long end_pfn); extern void get_pfn_range_for_nid(unsigned int nid, @@ -3210,7 +3258,6 @@ static inline int early_pfn_to_nid(unsigned long pfn) extern int __meminit early_pfn_to_nid(unsigned long pfn); #endif -extern void set_dma_reserve(unsigned long new_dma_reserve); extern void mem_init(void); extern void __init mmap_init(void); @@ -3222,9 +3269,6 @@ static inline void show_mem(void) extern long si_mem_available(void); extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); -#ifdef __HAVE_ARCH_RESERVED_KERNEL_PAGES -extern unsigned long arch_reserved_kernel_pages(void); -#endif extern __printf(3, 4) void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...); @@ -3383,7 +3427,16 @@ extern int install_special_mapping(struct mm_struct *mm, unsigned long randomize_stack_top(unsigned long stack_top); unsigned long randomize_page(unsigned long start, unsigned long range); -extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); +unsigned long +__get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags, vm_flags_t vm_flags); + +static inline unsigned long +get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags) +{ + return __get_unmapped_area(file, addr, len, pgoff, flags, 0); +} extern unsigned long mmap_region(struct file *file, unsigned long addr, unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, @@ -3429,6 +3482,7 @@ struct vm_unmapped_area_info { unsigned long high_limit; unsigned long align_mask; unsigned long align_offset; + unsigned long start_gap; }; extern unsigned long vm_unmapped_area(struct vm_unmapped_area_info *info); @@ -3722,7 +3776,14 @@ DECLARE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_FREE_DEFAULT_ON, init_on_free); static inline bool want_init_on_free(void) { return static_branch_maybe(CONFIG_INIT_ON_FREE_DEFAULT_ON, - &init_on_free); + &init_on_free); +} + +DECLARE_STATIC_KEY_MAYBE(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON, init_mlocked_on_free); +static inline bool want_init_mlocked_on_free(void) +{ + return static_branch_maybe(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON, + &init_mlocked_on_free); } extern bool _debug_pagealloc_enabled_early; @@ -3785,24 +3846,22 @@ static inline bool page_is_guard(struct page *page) return PageGuard(page); } -bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order, - int migratetype); +bool __set_page_guard(struct zone *zone, struct page *page, unsigned int order); static inline bool set_page_guard(struct zone *zone, struct page *page, - unsigned int order, int migratetype) + unsigned int order) { if (!debug_guardpage_enabled()) return false; - return __set_page_guard(zone, page, order, migratetype); + return __set_page_guard(zone, page, order); } -void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order, - int migratetype); +void __clear_page_guard(struct zone *zone, struct page *page, unsigned int order); static inline void clear_page_guard(struct zone *zone, struct page *page, - unsigned int order, int migratetype) + unsigned int order) { if (!debug_guardpage_enabled()) return; - __clear_page_guard(zone, page, order, migratetype); + __clear_page_guard(zone, page, order); } #else /* CONFIG_DEBUG_PAGEALLOC */ @@ -3812,9 +3871,9 @@ static inline unsigned int debug_guardpage_minorder(void) { return 0; } static inline bool debug_guardpage_enabled(void) { return false; } static inline bool page_is_guard(struct page *page) { return false; } static inline bool set_page_guard(struct zone *zone, struct page *page, - unsigned int order, int migratetype) { return false; } + unsigned int order) { return false; } static inline void clear_page_guard(struct zone *zone, struct page *page, - unsigned int order, int migratetype) {} + unsigned int order) {} #endif /* CONFIG_DEBUG_PAGEALLOC */ #ifdef __HAVE_ARCH_GATE_AREA @@ -3969,7 +4028,6 @@ int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index, extern int memory_failure(unsigned long pfn, int flags); extern void memory_failure_queue_kick(int cpu); extern int unpoison_memory(unsigned long pfn); -extern void shake_page(struct page *p); extern atomic_long_t num_poisoned_pages __read_mostly; extern int soft_offline_page(unsigned long pfn, int flags); #ifdef CONFIG_MEMORY_FAILURE @@ -4202,4 +4260,7 @@ static inline bool pfn_is_unaccepted_memory(unsigned long pfn) return range_contains_unaccepted_memory(paddr, paddr + PAGE_SIZE); } +void vma_pgtable_walk_begin(struct vm_area_struct *vma); +void vma_pgtable_walk_end(struct vm_area_struct *vma); + #endif /* _LINUX_MM_H */ |