diff options
Diffstat (limited to 'mm/migrate.c')
| -rw-r--r-- | mm/migrate.c | 207 | 
1 files changed, 105 insertions, 102 deletions
diff --git a/mm/migrate.c b/mm/migrate.c index 98f1c11197a8..01cac26a3127 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -213,20 +213,15 @@ static bool remove_migration_pte(struct folio *folio,  		if (pte_swp_soft_dirty(*pvmw.pte))  			pte = pte_mksoft_dirty(pte); -		/* -		 * Recheck VMA as permissions can change since migration started -		 */  		entry = pte_to_swp_entry(*pvmw.pte);  		if (!is_migration_entry_young(entry))  			pte = pte_mkold(pte);  		if (folio_test_dirty(folio) && is_migration_entry_dirty(entry))  			pte = pte_mkdirty(pte);  		if (is_writable_migration_entry(entry)) -			pte = maybe_mkwrite(pte, vma); +			pte = pte_mkwrite(pte);  		else if (pte_swp_uffd_wp(*pvmw.pte))  			pte = pte_mkuffd_wp(pte); -		else -			pte = pte_wrprotect(pte);  		if (folio_test_anon(folio) && !is_readable_migration_entry(entry))  			rmap_flags |= RMAP_EXCLUSIVE; @@ -249,7 +244,6 @@ static bool remove_migration_pte(struct folio *folio,  		if (folio_test_hugetlb(folio)) {  			unsigned int shift = huge_page_shift(hstate_vma(vma)); -			pte = pte_mkhuge(pte);  			pte = arch_make_huge_pte(pte, shift, vma->vm_flags);  			if (folio_test_anon(folio))  				hugepage_add_anon_rmap(new, vma, pvmw.address, @@ -1112,9 +1106,8 @@ static void migrate_folio_done(struct folio *src,  /* Obtain the lock on page, remove all ptes. */  static int migrate_folio_unmap(new_page_t get_new_page, free_page_t put_new_page,  			       unsigned long private, struct folio *src, -			       struct folio **dstp, int force, bool avoid_force_lock, -			       enum migrate_mode mode, enum migrate_reason reason, -			       struct list_head *ret) +			       struct folio **dstp, enum migrate_mode mode, +			       enum migrate_reason reason, struct list_head *ret)  {  	struct folio *dst;  	int rc = -EAGAIN; @@ -1144,7 +1137,7 @@ static int migrate_folio_unmap(new_page_t get_new_page, free_page_t put_new_page  	dst->private = NULL;  	if (!folio_trylock(src)) { -		if (!force || mode == MIGRATE_ASYNC) +		if (mode == MIGRATE_ASYNC)  			goto out;  		/* @@ -1163,17 +1156,6 @@ static int migrate_folio_unmap(new_page_t get_new_page, free_page_t put_new_page  		if (current->flags & PF_MEMALLOC)  			goto out; -		/* -		 * We have locked some folios and are going to wait to lock -		 * this folio.  To avoid a potential deadlock, let's bail -		 * out and not do that. The locked folios will be moved and -		 * unlocked, then we can wait to lock this folio. -		 */ -		if (avoid_force_lock) { -			rc = -EDEADLOCK; -			goto out; -		} -  		folio_lock(src);  	}  	locked = true; @@ -1193,8 +1175,6 @@ static int migrate_folio_unmap(new_page_t get_new_page, free_page_t put_new_page  			rc = -EBUSY;  			goto out;  		} -		if (!force) -			goto out;  		folio_wait_writeback(src);  	} @@ -1253,7 +1233,7 @@ static int migrate_folio_unmap(new_page_t get_new_page, free_page_t put_new_page  		/* Establish migration ptes */  		VM_BUG_ON_FOLIO(folio_test_anon(src) &&  			       !folio_test_ksm(src) && !anon_vma, src); -		try_to_migrate(src, TTU_BATCH_FLUSH); +		try_to_migrate(src, mode == MIGRATE_ASYNC ? TTU_BATCH_FLUSH : 0);  		page_was_mapped = 1;  	} @@ -1267,7 +1247,7 @@ out:  	 * A folio that has not been unmapped will be restored to  	 * right list unless we want to retry.  	 */ -	if (rc == -EAGAIN || rc == -EDEADLOCK) +	if (rc == -EAGAIN)  		ret = NULL;  	migrate_folio_undo_src(src, page_was_mapped, anon_vma, locked, ret); @@ -1508,6 +1488,9 @@ static inline int try_split_folio(struct folio *folio, struct list_head *split_f  #define NR_MAX_BATCHED_MIGRATION	512  #endif  #define NR_MAX_MIGRATE_PAGES_RETRY	10 +#define NR_MAX_MIGRATE_ASYNC_RETRY	3 +#define NR_MAX_MIGRATE_SYNC_RETRY					\ +	(NR_MAX_MIGRATE_PAGES_RETRY - NR_MAX_MIGRATE_ASYNC_RETRY)  struct migrate_pages_stats {  	int nr_succeeded;	/* Normal and large folios migrated successfully, in @@ -1618,13 +1601,19 @@ static int migrate_hugetlbs(struct list_head *from, new_page_t get_new_page,  /*   * migrate_pages_batch() first unmaps folios in the from list as many as   * possible, then move the unmapped folios. + * + * We only batch migration if mode == MIGRATE_ASYNC to avoid to wait a + * lock or bit when we have locked more than one folio.  Which may cause + * deadlock (e.g., for loop device).  So, if mode != MIGRATE_ASYNC, the + * length of the from list must be <= 1.   */  static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,  		free_page_t put_new_page, unsigned long private,  		enum migrate_mode mode, int reason, struct list_head *ret_folios, -		struct migrate_pages_stats *stats) +		struct list_head *split_folios, struct migrate_pages_stats *stats, +		int nr_pass)  { -	int retry; +	int retry = 1;  	int large_retry = 1;  	int thp_retry = 1;  	int nr_failed = 0; @@ -1634,21 +1623,15 @@ static int migrate_pages_batch(struct list_head *from, new_page_t get_new_page,  	bool is_large = false;  	bool is_thp = false;  	struct folio *folio, *folio2, *dst = NULL, *dst2; -	int rc, rc_saved, nr_pages; -	LIST_HEAD(split_folios); +	int rc, rc_saved = 0, nr_pages;  	LIST_HEAD(unmap_folios);  	LIST_HEAD(dst_folios);  	bool nosplit = (reason == MR_NUMA_MISPLACED); -	bool no_split_folio_counting = false; -	bool avoid_force_lock; -retry: -	rc_saved = 0; -	avoid_force_lock = false; -	retry = 1; -	for (pass = 0; -	     pass < NR_MAX_MIGRATE_PAGES_RETRY && (retry || large_retry); -	     pass++) { +	VM_WARN_ON_ONCE(mode != MIGRATE_ASYNC && +			!list_empty(from) && !list_is_singular(from)); + +	for (pass = 0; pass < nr_pass && (retry || large_retry); pass++) {  		retry = 0;  		large_retry = 0;  		thp_retry = 0; @@ -1679,7 +1662,7 @@ retry:  			if (!thp_migration_supported() && is_thp) {  				nr_large_failed++;  				stats->nr_thp_failed++; -				if (!try_split_folio(folio, &split_folios)) { +				if (!try_split_folio(folio, split_folios)) {  					stats->nr_thp_split++;  					continue;  				} @@ -1689,15 +1672,13 @@ retry:  			}  			rc = migrate_folio_unmap(get_new_page, put_new_page, private, -						 folio, &dst, pass > 2, avoid_force_lock, -						 mode, reason, ret_folios); +						 folio, &dst, mode, reason, ret_folios);  			/*  			 * The rules are:  			 *	Success: folio will be freed  			 *	Unmap: folio will be put on unmap_folios list,  			 *	       dst folio put on dst_folios list  			 *	-EAGAIN: stay on the from list -			 *	-EDEADLOCK: stay on the from list  			 *	-ENOMEM: stay on the from list  			 *	Other errno: put on ret_folios list  			 */ @@ -1712,7 +1693,7 @@ retry:  					stats->nr_thp_failed += is_thp;  					/* Large folio NUMA faulting doesn't split to retry. */  					if (!nosplit) { -						int ret = try_split_folio(folio, &split_folios); +						int ret = try_split_folio(folio, split_folios);  						if (!ret) {  							stats->nr_thp_split += is_thp; @@ -1726,21 +1707,17 @@ retry:  							large_retry++;  							thp_retry += is_thp;  							nr_retry_pages += nr_pages; +							/* Undo duplicated failure counting. */ +							nr_large_failed--; +							stats->nr_thp_failed -= is_thp;  							break;  						}  					} -				} else if (!no_split_folio_counting) { +				} else {  					nr_failed++;  				}  				stats->nr_failed_pages += nr_pages + nr_retry_pages; -				/* -				 * There might be some split folios of fail-to-migrate large -				 * folios left in split_folios list. Move them to ret_folios -				 * list so that they could be put back to the right list by -				 * the caller otherwise the folio refcnt will be leaked. -				 */ -				list_splice_init(&split_folios, ret_folios);  				/* nr_failed isn't updated for not used */  				nr_large_failed += large_retry;  				stats->nr_thp_failed += thp_retry; @@ -1749,19 +1726,11 @@ retry:  					goto out;  				else  					goto move; -			case -EDEADLOCK: -				/* -				 * The folio cannot be locked for potential deadlock. -				 * Go move (and unlock) all locked folios.  Then we can -				 * try again. -				 */ -				rc_saved = rc; -				goto move;  			case -EAGAIN:  				if (is_large) {  					large_retry++;  					thp_retry += is_thp; -				} else if (!no_split_folio_counting) { +				} else {  					retry++;  				}  				nr_retry_pages += nr_pages; @@ -1771,11 +1740,6 @@ retry:  				stats->nr_thp_succeeded += is_thp;  				break;  			case MIGRATEPAGE_UNMAP: -				/* -				 * We have locked some folios, don't force lock -				 * to avoid deadlock. -				 */ -				avoid_force_lock = true;  				list_move_tail(&folio->lru, &unmap_folios);  				list_add_tail(&dst->lru, &dst_folios);  				break; @@ -1789,7 +1753,7 @@ retry:  				if (is_large) {  					nr_large_failed++;  					stats->nr_thp_failed += is_thp; -				} else if (!no_split_folio_counting) { +				} else {  					nr_failed++;  				} @@ -1807,9 +1771,7 @@ move:  	try_to_unmap_flush();  	retry = 1; -	for (pass = 0; -	     pass < NR_MAX_MIGRATE_PAGES_RETRY && (retry || large_retry); -	     pass++) { +	for (pass = 0; pass < nr_pass && (retry || large_retry); pass++) {  		retry = 0;  		large_retry = 0;  		thp_retry = 0; @@ -1838,7 +1800,7 @@ move:  				if (is_large) {  					large_retry++;  					thp_retry += is_thp; -				} else if (!no_split_folio_counting) { +				} else {  					retry++;  				}  				nr_retry_pages += nr_pages; @@ -1851,7 +1813,7 @@ move:  				if (is_large) {  					nr_large_failed++;  					stats->nr_thp_failed += is_thp; -				} else if (!no_split_folio_counting) { +				} else {  					nr_failed++;  				} @@ -1888,30 +1850,52 @@ out:  		dst2 = list_next_entry(dst, lru);  	} -	/* -	 * Try to migrate split folios of fail-to-migrate large folios, no -	 * nr_failed counting in this round, since all split folios of a -	 * large folio is counted as 1 failure in the first round. -	 */ -	if (rc >= 0 && !list_empty(&split_folios)) { -		/* -		 * Move non-migrated folios (after NR_MAX_MIGRATE_PAGES_RETRY -		 * retries) to ret_folios to avoid migrating them again. -		 */ -		list_splice_init(from, ret_folios); -		list_splice_init(&split_folios, from); -		no_split_folio_counting = true; -		goto retry; -	} +	return rc; +} +static int migrate_pages_sync(struct list_head *from, new_page_t get_new_page, +		free_page_t put_new_page, unsigned long private, +		enum migrate_mode mode, int reason, struct list_head *ret_folios, +		struct list_head *split_folios, struct migrate_pages_stats *stats) +{ +	int rc, nr_failed = 0; +	LIST_HEAD(folios); +	struct migrate_pages_stats astats; + +	memset(&astats, 0, sizeof(astats)); +	/* Try to migrate in batch with MIGRATE_ASYNC mode firstly */ +	rc = migrate_pages_batch(from, get_new_page, put_new_page, private, MIGRATE_ASYNC, +				 reason, &folios, split_folios, &astats, +				 NR_MAX_MIGRATE_ASYNC_RETRY); +	stats->nr_succeeded += astats.nr_succeeded; +	stats->nr_thp_succeeded += astats.nr_thp_succeeded; +	stats->nr_thp_split += astats.nr_thp_split; +	if (rc < 0) { +		stats->nr_failed_pages += astats.nr_failed_pages; +		stats->nr_thp_failed += astats.nr_thp_failed; +		list_splice_tail(&folios, ret_folios); +		return rc; +	} +	stats->nr_thp_failed += astats.nr_thp_split; +	nr_failed += astats.nr_thp_split;  	/* -	 * We have unlocked all locked folios, so we can force lock now, let's -	 * try again. +	 * Fall back to migrate all failed folios one by one synchronously. All +	 * failed folios except split THPs will be retried, so their failure +	 * isn't counted  	 */ -	if (rc == -EDEADLOCK) -		goto retry; +	list_splice_tail_init(&folios, from); +	while (!list_empty(from)) { +		list_move(from->next, &folios); +		rc = migrate_pages_batch(&folios, get_new_page, put_new_page, +					 private, mode, reason, ret_folios, +					 split_folios, stats, NR_MAX_MIGRATE_SYNC_RETRY); +		list_splice_tail_init(&folios, ret_folios); +		if (rc < 0) +			return rc; +		nr_failed += rc; +	} -	return rc; +	return nr_failed;  }  /* @@ -1949,6 +1933,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,  	struct folio *folio, *folio2;  	LIST_HEAD(folios);  	LIST_HEAD(ret_folios); +	LIST_HEAD(split_folios);  	struct migrate_pages_stats stats;  	trace_mm_migrate_pages_start(mode, reason); @@ -1959,6 +1944,7 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,  				     mode, reason, &stats, &ret_folios);  	if (rc_gather < 0)  		goto out; +  again:  	nr_pages = 0;  	list_for_each_entry_safe(folio, folio2, from, lru) { @@ -1969,20 +1955,36 @@ again:  		}  		nr_pages += folio_nr_pages(folio); -		if (nr_pages > NR_MAX_BATCHED_MIGRATION) +		if (nr_pages >= NR_MAX_BATCHED_MIGRATION)  			break;  	} -	if (nr_pages > NR_MAX_BATCHED_MIGRATION) -		list_cut_before(&folios, from, &folio->lru); +	if (nr_pages >= NR_MAX_BATCHED_MIGRATION) +		list_cut_before(&folios, from, &folio2->lru);  	else  		list_splice_init(from, &folios); -	rc = migrate_pages_batch(&folios, get_new_page, put_new_page, private, -				 mode, reason, &ret_folios, &stats); +	if (mode == MIGRATE_ASYNC) +		rc = migrate_pages_batch(&folios, get_new_page, put_new_page, private, +					 mode, reason, &ret_folios, &split_folios, &stats, +					 NR_MAX_MIGRATE_PAGES_RETRY); +	else +		rc = migrate_pages_sync(&folios, get_new_page, put_new_page, private, +					mode, reason, &ret_folios, &split_folios, &stats);  	list_splice_tail_init(&folios, &ret_folios);  	if (rc < 0) {  		rc_gather = rc; +		list_splice_tail(&split_folios, &ret_folios);  		goto out;  	} +	if (!list_empty(&split_folios)) { +		/* +		 * Failure isn't counted since all split folios of a large folio +		 * is counted as 1 failure already.  And, we only try to migrate +		 * with minimal effort, force MIGRATE_ASYNC mode and retry once. +		 */ +		migrate_pages_batch(&split_folios, get_new_page, put_new_page, private, +				    MIGRATE_ASYNC, reason, &ret_folios, NULL, &stats, 1); +		list_splice_tail_init(&split_folios, &ret_folios); +	}  	rc_gather += rc;  	if (!list_empty(from))  		goto again; @@ -2097,15 +2099,18 @@ static int do_move_pages_to_node(struct mm_struct *mm,   *         target node   *     1 - when it has been queued   */ -static int add_page_for_migration(struct mm_struct *mm, unsigned long addr, +static int add_page_for_migration(struct mm_struct *mm, const void __user *p,  		int node, struct list_head *pagelist, bool migrate_all)  {  	struct vm_area_struct *vma; +	unsigned long addr;  	struct page *page;  	int err;  	bool isolated;  	mmap_read_lock(mm); +	addr = (unsigned long)untagged_addr_remote(mm, p); +  	err = -EFAULT;  	vma = vma_lookup(mm, addr);  	if (!vma || !vma_migratable(vma)) @@ -2211,7 +2216,6 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,  	for (i = start = 0; i < nr_pages; i++) {  		const void __user *p; -		unsigned long addr;  		int node;  		err = -EFAULT; @@ -2219,7 +2223,6 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,  			goto out_flush;  		if (get_user(node, nodes + i))  			goto out_flush; -		addr = (unsigned long)untagged_addr(p);  		err = -ENODEV;  		if (node < 0 || node >= MAX_NUMNODES) @@ -2247,8 +2250,8 @@ static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,  		 * Errors in the page lookup or isolation are not fatal and we simply  		 * report them via status  		 */ -		err = add_page_for_migration(mm, addr, current_node, -				&pagelist, flags & MPOL_MF_MOVE_ALL); +		err = add_page_for_migration(mm, p, current_node, &pagelist, +					     flags & MPOL_MF_MOVE_ALL);  		if (err > 0) {  			/* The page is successfully queued for migration */  |