diff options
Diffstat (limited to 'mm/memory-failure.c')
| -rw-r--r-- | mm/memory-failure.c | 149 | 
1 files changed, 95 insertions, 54 deletions
| diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 3e6449f2102a..f64ebb6226cb 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -39,6 +39,7 @@  #include <linux/kernel-page-flags.h>  #include <linux/sched/signal.h>  #include <linux/sched/task.h> +#include <linux/dax.h>  #include <linux/ksm.h>  #include <linux/rmap.h>  #include <linux/export.h> @@ -57,6 +58,7 @@  #include <linux/ratelimit.h>  #include <linux/page-isolation.h>  #include <linux/pagewalk.h> +#include <linux/shmem_fs.h>  #include "internal.h"  #include "ras/ras_event.h" @@ -673,7 +675,7 @@ static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask,  #define hwpoison_hugetlb_range	NULL  #endif -static struct mm_walk_ops hwp_walk_ops = { +static const struct mm_walk_ops hwp_walk_ops = {  	.pmd_entry = hwpoison_pte_range,  	.hugetlb_entry = hwpoison_hugetlb_range,  }; @@ -762,7 +764,7 @@ static int delete_from_lru_cache(struct page *p)  		 * Poisoned page might never drop its ref count to 0 so we have  		 * to uncharge it manually from its memcg.  		 */ -		mem_cgroup_uncharge(p); +		mem_cgroup_uncharge(page_folio(p));  		/*  		 * drop the page count elevated by isolate_lru_page() @@ -806,12 +808,44 @@ static int truncate_error_page(struct page *p, unsigned long pfn,  	return ret;  } +struct page_state { +	unsigned long mask; +	unsigned long res; +	enum mf_action_page_type type; + +	/* Callback ->action() has to unlock the relevant page inside it. */ +	int (*action)(struct page_state *ps, struct page *p); +}; + +/* + * Return true if page is still referenced by others, otherwise return + * false. + * + * The extra_pins is true when one extra refcount is expected. + */ +static bool has_extra_refcount(struct page_state *ps, struct page *p, +			       bool extra_pins) +{ +	int count = page_count(p) - 1; + +	if (extra_pins) +		count -= 1; + +	if (count > 0) { +		pr_err("Memory failure: %#lx: %s still referenced by %d users\n", +		       page_to_pfn(p), action_page_types[ps->type], count); +		return true; +	} + +	return false; +} +  /*   * Error hit kernel page.   * Do nothing, try to be lucky and not touch this instead. For a few cases we   * could be more sophisticated.   */ -static int me_kernel(struct page *p, unsigned long pfn) +static int me_kernel(struct page_state *ps, struct page *p)  {  	unlock_page(p);  	return MF_IGNORED; @@ -820,9 +854,9 @@ static int me_kernel(struct page *p, unsigned long pfn)  /*   * Page in unknown state. Do nothing.   */ -static int me_unknown(struct page *p, unsigned long pfn) +static int me_unknown(struct page_state *ps, struct page *p)  { -	pr_err("Memory failure: %#lx: Unknown page state\n", pfn); +	pr_err("Memory failure: %#lx: Unknown page state\n", page_to_pfn(p));  	unlock_page(p);  	return MF_FAILED;  } @@ -830,10 +864,11 @@ static int me_unknown(struct page *p, unsigned long pfn)  /*   * Clean (or cleaned) page cache page.   */ -static int me_pagecache_clean(struct page *p, unsigned long pfn) +static int me_pagecache_clean(struct page_state *ps, struct page *p)  {  	int ret;  	struct address_space *mapping; +	bool extra_pins;  	delete_from_lru_cache(p); @@ -863,13 +898,23 @@ static int me_pagecache_clean(struct page *p, unsigned long pfn)  	}  	/* +	 * The shmem page is kept in page cache instead of truncating +	 * so is expected to have an extra refcount after error-handling. +	 */ +	extra_pins = shmem_mapping(mapping); + +	/*  	 * Truncation is a bit tricky. Enable it per file system for now.  	 *  	 * Open: to take i_rwsem or not for this? Right now we don't.  	 */ -	ret = truncate_error_page(p, pfn, mapping); +	ret = truncate_error_page(p, page_to_pfn(p), mapping); +	if (has_extra_refcount(ps, p, extra_pins)) +		ret = MF_FAILED; +  out:  	unlock_page(p); +  	return ret;  } @@ -878,7 +923,7 @@ out:   * Issues: when the error hit a hole page the error is not properly   * propagated.   */ -static int me_pagecache_dirty(struct page *p, unsigned long pfn) +static int me_pagecache_dirty(struct page_state *ps, struct page *p)  {  	struct address_space *mapping = page_mapping(p); @@ -922,7 +967,7 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn)  		mapping_set_error(mapping, -EIO);  	} -	return me_pagecache_clean(p, pfn); +	return me_pagecache_clean(ps, p);  }  /* @@ -944,9 +989,10 @@ static int me_pagecache_dirty(struct page *p, unsigned long pfn)   * Clean swap cache pages can be directly isolated. A later page fault will   * bring in the known good data from disk.   */ -static int me_swapcache_dirty(struct page *p, unsigned long pfn) +static int me_swapcache_dirty(struct page_state *ps, struct page *p)  {  	int ret; +	bool extra_pins = false;  	ClearPageDirty(p);  	/* Trigger EIO in shmem: */ @@ -954,10 +1000,17 @@ static int me_swapcache_dirty(struct page *p, unsigned long pfn)  	ret = delete_from_lru_cache(p) ? MF_FAILED : MF_DELAYED;  	unlock_page(p); + +	if (ret == MF_DELAYED) +		extra_pins = true; + +	if (has_extra_refcount(ps, p, extra_pins)) +		ret = MF_FAILED; +  	return ret;  } -static int me_swapcache_clean(struct page *p, unsigned long pfn) +static int me_swapcache_clean(struct page_state *ps, struct page *p)  {  	int ret; @@ -965,6 +1018,10 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn)  	ret = delete_from_lru_cache(p) ? MF_FAILED : MF_RECOVERED;  	unlock_page(p); + +	if (has_extra_refcount(ps, p, false)) +		ret = MF_FAILED; +  	return ret;  } @@ -974,7 +1031,7 @@ static int me_swapcache_clean(struct page *p, unsigned long pfn)   * - Error on hugepage is contained in hugepage unit (not in raw page unit.)   *   To narrow down kill region to one page, we need to break up pmd.   */ -static int me_huge_page(struct page *p, unsigned long pfn) +static int me_huge_page(struct page_state *ps, struct page *p)  {  	int res;  	struct page *hpage = compound_head(p); @@ -985,7 +1042,7 @@ static int me_huge_page(struct page *p, unsigned long pfn)  	mapping = page_mapping(hpage);  	if (mapping) { -		res = truncate_error_page(hpage, pfn, mapping); +		res = truncate_error_page(hpage, page_to_pfn(p), mapping);  		unlock_page(hpage);  	} else {  		res = MF_FAILED; @@ -1003,6 +1060,9 @@ static int me_huge_page(struct page *p, unsigned long pfn)  		}  	} +	if (has_extra_refcount(ps, p, false)) +		res = MF_FAILED; +  	return res;  } @@ -1028,14 +1088,7 @@ static int me_huge_page(struct page *p, unsigned long pfn)  #define slab		(1UL << PG_slab)  #define reserved	(1UL << PG_reserved) -static struct page_state { -	unsigned long mask; -	unsigned long res; -	enum mf_action_page_type type; - -	/* Callback ->action() has to unlock the relevant page inside it. */ -	int (*action)(struct page *p, unsigned long pfn); -} error_states[] = { +static struct page_state error_states[] = {  	{ reserved,	reserved,	MF_MSG_KERNEL,	me_kernel },  	/*  	 * free pages are specially detected outside this table: @@ -1095,19 +1148,10 @@ static int page_action(struct page_state *ps, struct page *p,  			unsigned long pfn)  {  	int result; -	int count;  	/* page p should be unlocked after returning from ps->action().  */ -	result = ps->action(p, pfn); +	result = ps->action(ps, p); -	count = page_count(p) - 1; -	if (ps->action == me_swapcache_dirty && result == MF_DELAYED) -		count--; -	if (count > 0) { -		pr_err("Memory failure: %#lx: %s still referenced by %d users\n", -		       pfn, action_page_types[ps->type], count); -		result = MF_FAILED; -	}  	action_result(pfn, ps->type, result);  	/* Could do more checks here if page looks ok */ @@ -1147,20 +1191,6 @@ static int __get_hwpoison_page(struct page *page)  	if (!HWPoisonHandlable(head))  		return -EBUSY; -	if (PageTransHuge(head)) { -		/* -		 * Non anonymous thp exists only in allocation/free time. We -		 * can't handle such a case correctly, so let's give it up. -		 * This should be better than triggering BUG_ON when kernel -		 * tries to touch the "partially handled" page. -		 */ -		if (!PageAnon(head)) { -			pr_err("Memory failure: %#lx: non anonymous thp\n", -				page_to_pfn(page)); -			return 0; -		} -	} -  	if (get_page_unless_zero(head)) {  		if (head == compound_head(page))  			return 1; @@ -1414,14 +1444,11 @@ static int identify_page_state(unsigned long pfn, struct page *p,  static int try_to_split_thp_page(struct page *page, const char *msg)  {  	lock_page(page); -	if (!PageAnon(page) || unlikely(split_huge_page(page))) { +	if (unlikely(split_huge_page(page))) {  		unsigned long pfn = page_to_pfn(page);  		unlock_page(page); -		if (!PageAnon(page)) -			pr_info("%s: %#lx: non anonymous thp\n", msg, pfn); -		else -			pr_info("%s: %#lx: thp split failed\n", msg, pfn); +		pr_info("%s: %#lx: thp split failed\n", msg, pfn);  		put_page(page);  		return -EBUSY;  	} @@ -1708,6 +1735,20 @@ try_again:  	}  	if (PageTransHuge(hpage)) { +		/* +		 * The flag must be set after the refcount is bumped +		 * otherwise it may race with THP split. +		 * And the flag can't be set in get_hwpoison_page() since +		 * it is called by soft offline too and it is just called +		 * for !MF_COUNT_INCREASE.  So here seems to be the best +		 * place. +		 * +		 * Don't need care about the above error handling paths for +		 * get_hwpoison_page() since they handle either free page +		 * or unhandlable page.  The refcount is bumped iff the +		 * page is a valid handlable page. +		 */ +		SetPageHasHWPoisoned(hpage);  		if (try_to_split_thp_page(p, "Memory Failure") < 0) {  			action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED);  			res = -EBUSY; @@ -2109,14 +2150,14 @@ static int __soft_offline_page(struct page *page)  			if (!list_empty(&pagelist))  				putback_movable_pages(&pagelist); -			pr_info("soft offline: %#lx: %s migration failed %d, type %lx (%pGp)\n", -				pfn, msg_page[huge], ret, page->flags, &page->flags); +			pr_info("soft offline: %#lx: %s migration failed %d, type %pGp\n", +				pfn, msg_page[huge], ret, &page->flags);  			if (ret > 0)  				ret = -EBUSY;  		}  	} else { -		pr_info("soft offline: %#lx: %s isolation failed, page count %d, type %lx (%pGp)\n", -			pfn, msg_page[huge], page_count(page), page->flags, &page->flags); +		pr_info("soft offline: %#lx: %s isolation failed, page count %d, type %pGp\n", +			pfn, msg_page[huge], page_count(page), &page->flags);  		ret = -EBUSY;  	}  	return ret; |