diff options
Diffstat (limited to 'include/linux/mm.h')
| -rw-r--r-- | include/linux/mm.h | 128 | 
1 files changed, 78 insertions, 50 deletions
| diff --git a/include/linux/mm.h b/include/linux/mm.h index cfaa8feecfe8..52269e56c514 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -70,11 +70,6 @@ static inline void totalram_pages_add(long count)  	atomic_long_add(count, &_totalram_pages);  } -static inline void totalram_pages_set(long val) -{ -	atomic_long_set(&_totalram_pages, val); -} -  extern void * high_memory;  extern int page_cluster; @@ -625,24 +620,19 @@ unsigned long vmalloc_to_pfn(const void *addr);   * On nommu, vmalloc/vfree wrap through kmalloc/kfree directly, so there   * is no special casing required.   */ -static inline bool is_vmalloc_addr(const void *x) -{ -#ifdef CONFIG_MMU -	unsigned long addr = (unsigned long)x; - -	return addr >= VMALLOC_START && addr < VMALLOC_END; -#else -	return false; -#endif -}  #ifndef is_ioremap_addr  #define is_ioremap_addr(x) is_vmalloc_addr(x)  #endif  #ifdef CONFIG_MMU +extern bool is_vmalloc_addr(const void *x);  extern int is_vmalloc_or_module_addr(const void *x);  #else +static inline bool is_vmalloc_addr(const void *x) +{ +	return false; +}  static inline int is_vmalloc_or_module_addr(const void *x)  {  	return 0; @@ -921,10 +911,6 @@ vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf);  #define ZONEID_PGSHIFT		(ZONEID_PGOFF * (ZONEID_SHIFT != 0)) -#if SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS -#error SECTIONS_WIDTH+NODES_WIDTH+ZONES_WIDTH > BITS_PER_LONG - NR_PAGEFLAGS -#endif -  #define ZONES_MASK		((1UL << ZONES_WIDTH) - 1)  #define NODES_MASK		((1UL << NODES_WIDTH) - 1)  #define SECTIONS_MASK		((1UL << SECTIONS_WIDTH) - 1) @@ -952,9 +938,10 @@ static inline bool is_zone_device_page(const struct page *page)  #endif  #ifdef CONFIG_DEV_PAGEMAP_OPS -void __put_devmap_managed_page(struct page *page); +void free_devmap_managed_page(struct page *page);  DECLARE_STATIC_KEY_FALSE(devmap_managed_key); -static inline bool put_devmap_managed_page(struct page *page) + +static inline bool page_is_devmap_managed(struct page *page)  {  	if (!static_branch_unlikely(&devmap_managed_key))  		return false; @@ -963,7 +950,6 @@ static inline bool put_devmap_managed_page(struct page *page)  	switch (page->pgmap->type) {  	case MEMORY_DEVICE_PRIVATE:  	case MEMORY_DEVICE_FS_DAX: -		__put_devmap_managed_page(page);  		return true;  	default:  		break; @@ -971,11 +957,17 @@ static inline bool put_devmap_managed_page(struct page *page)  	return false;  } +void put_devmap_managed_page(struct page *page); +  #else /* CONFIG_DEV_PAGEMAP_OPS */ -static inline bool put_devmap_managed_page(struct page *page) +static inline bool page_is_devmap_managed(struct page *page)  {  	return false;  } + +static inline void put_devmap_managed_page(struct page *page) +{ +}  #endif /* CONFIG_DEV_PAGEMAP_OPS */  static inline bool is_device_private_page(const struct page *page) @@ -1028,37 +1020,37 @@ static inline void put_page(struct page *page)  	 * need to inform the device driver through callback. See  	 * include/linux/memremap.h and HMM for details.  	 */ -	if (put_devmap_managed_page(page)) +	if (page_is_devmap_managed(page)) { +		put_devmap_managed_page(page);  		return; +	}  	if (put_page_testzero(page))  		__put_page(page);  }  /** - * put_user_page() - release a gup-pinned page + * unpin_user_page() - release a gup-pinned page   * @page:            pointer to page to be released   * - * Pages that were pinned via get_user_pages*() must be released via - * either put_user_page(), or one of the put_user_pages*() routines - * below. This is so that eventually, pages that are pinned via - * get_user_pages*() can be separately tracked and uniquely handled. In - * particular, interactions with RDMA and filesystems need special - * handling. + * Pages that were pinned via pin_user_pages*() must be released via either + * unpin_user_page(), or one of the unpin_user_pages*() routines. This is so + * that eventually such pages can be separately tracked and uniquely handled. In + * particular, interactions with RDMA and filesystems need special handling.   * - * put_user_page() and put_page() are not interchangeable, despite this early - * implementation that makes them look the same. put_user_page() calls must - * be perfectly matched up with get_user_page() calls. + * unpin_user_page() and put_page() are not interchangeable, despite this early + * implementation that makes them look the same. unpin_user_page() calls must + * be perfectly matched up with pin*() calls.   */ -static inline void put_user_page(struct page *page) +static inline void unpin_user_page(struct page *page)  {  	put_page(page);  } -void put_user_pages_dirty_lock(struct page **pages, unsigned long npages, -			       bool make_dirty); +void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages, +				 bool make_dirty); -void put_user_pages(struct page **pages, unsigned long npages); +void unpin_user_pages(struct page **pages, unsigned long npages);  #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)  #define SECTION_IN_PAGE_FLAGS @@ -1506,9 +1498,16 @@ long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,  			    unsigned long start, unsigned long nr_pages,  			    unsigned int gup_flags, struct page **pages,  			    struct vm_area_struct **vmas, int *locked); +long pin_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, +			   unsigned long start, unsigned long nr_pages, +			   unsigned int gup_flags, struct page **pages, +			   struct vm_area_struct **vmas, int *locked);  long get_user_pages(unsigned long start, unsigned long nr_pages,  			    unsigned int gup_flags, struct page **pages,  			    struct vm_area_struct **vmas); +long pin_user_pages(unsigned long start, unsigned long nr_pages, +		    unsigned int gup_flags, struct page **pages, +		    struct vm_area_struct **vmas);  long get_user_pages_locked(unsigned long start, unsigned long nr_pages,  		    unsigned int gup_flags, struct page **pages, int *locked);  long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, @@ -1516,6 +1515,8 @@ long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,  int get_user_pages_fast(unsigned long start, int nr_pages,  			unsigned int gup_flags, struct page **pages); +int pin_user_pages_fast(unsigned long start, int nr_pages, +			unsigned int gup_flags, struct page **pages);  int account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc);  int __account_locked_vm(struct mm_struct *mm, unsigned long pages, bool inc, @@ -2181,12 +2182,6 @@ extern int __meminit __early_pfn_to_nid(unsigned long pfn,  					struct mminit_pfnnid_cache *state);  #endif -#if !defined(CONFIG_FLAT_NODE_MEM_MAP) -void zero_resv_unavail(void); -#else -static inline void zero_resv_unavail(void) {} -#endif -  extern void set_dma_reserve(unsigned long new_dma_reserve);  extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long,  		enum memmap_context, struct vmem_altmap *); @@ -2328,6 +2323,7 @@ extern int __do_munmap(struct mm_struct *, unsigned long, size_t,  		       struct list_head *uf, bool downgrade);  extern int do_munmap(struct mm_struct *, unsigned long, size_t,  		     struct list_head *uf); +extern int do_madvise(unsigned long start, size_t len_in, int behavior);  static inline unsigned long  do_mmap_pgoff(struct file *file, unsigned long addr, @@ -2533,6 +2529,8 @@ vm_fault_t vmf_insert_pfn_prot(struct vm_area_struct *vma, unsigned long addr,  			unsigned long pfn, pgprot_t pgprot);  vm_fault_t vmf_insert_mixed(struct vm_area_struct *vma, unsigned long addr,  			pfn_t pfn); +vm_fault_t vmf_insert_mixed_prot(struct vm_area_struct *vma, unsigned long addr, +			pfn_t pfn, pgprot_t pgprot);  vm_fault_t vmf_insert_mixed_mkwrite(struct vm_area_struct *vma,  		unsigned long addr, pfn_t pfn);  int vm_iomap_memory(struct vm_area_struct *vma, phys_addr_t start, unsigned long len); @@ -2579,13 +2577,15 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,  #define FOLL_ANON	0x8000	/* don't do file mappings */  #define FOLL_LONGTERM	0x10000	/* mapping lifetime is indefinite: see below */  #define FOLL_SPLIT_PMD	0x20000	/* split huge pmd before returning */ +#define FOLL_PIN	0x40000	/* pages must be released via unpin_user_page */  /* - * NOTE on FOLL_LONGTERM: + * FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each + * other. Here is what they mean, and how to use them:   *   * FOLL_LONGTERM indicates that the page will be held for an indefinite time - * period _often_ under userspace control.  This is contrasted with - * iov_iter_get_pages() where usages which are transient. + * period _often_ under userspace control.  This is in contrast to + * iov_iter_get_pages(), whose usages are transient.   *   * FIXME: For pages which are part of a filesystem, mappings are subject to the   * lifetime enforced by the filesystem and we need guarantees that longterm @@ -2600,11 +2600,39 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,   * Currently only get_user_pages() and get_user_pages_fast() support this flag   * and calls to get_user_pages_[un]locked are specifically not allowed.  This   * is due to an incompatibility with the FS DAX check and - * FAULT_FLAG_ALLOW_RETRY + * FAULT_FLAG_ALLOW_RETRY.   * - * In the CMA case: longterm pins in a CMA region would unnecessarily fragment - * that region.  And so CMA attempts to migrate the page before pinning when + * In the CMA case: long term pins in a CMA region would unnecessarily fragment + * that region.  And so, CMA attempts to migrate the page before pinning, when   * FOLL_LONGTERM is specified. + * + * FOLL_PIN indicates that a special kind of tracking (not just page->_refcount, + * but an additional pin counting system) will be invoked. This is intended for + * anything that gets a page reference and then touches page data (for example, + * Direct IO). This lets the filesystem know that some non-file-system entity is + * potentially changing the pages' data. In contrast to FOLL_GET (whose pages + * are released via put_page()), FOLL_PIN pages must be released, ultimately, by + * a call to unpin_user_page(). + * + * FOLL_PIN is similar to FOLL_GET: both of these pin pages. They use different + * and separate refcounting mechanisms, however, and that means that each has + * its own acquire and release mechanisms: + * + *     FOLL_GET: get_user_pages*() to acquire, and put_page() to release. + * + *     FOLL_PIN: pin_user_pages*() to acquire, and unpin_user_pages to release. + * + * FOLL_PIN and FOLL_GET are mutually exclusive for a given function call. + * (The underlying pages may experience both FOLL_GET-based and FOLL_PIN-based + * calls applied to them, and that's perfectly OK. This is a constraint on the + * callers, not on the pages.) + * + * FOLL_PIN should be set internally by the pin_user_pages*() APIs, never + * directly by the caller. That's in order to help avoid mismatches when + * releasing pages: get_user_pages*() pages must be released via put_page(), + * while pin_user_pages*() pages must be released via unpin_user_page(). + * + * Please see Documentation/vm/pin_user_pages.rst for more information.   */  static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags) |