diff options
Diffstat (limited to 'mm/memory-failure.c')
| -rw-r--r-- | mm/memory-failure.c | 193 | 
1 files changed, 125 insertions, 68 deletions
| diff --git a/mm/memory-failure.c b/mm/memory-failure.c index f1c389f7e669..97a9ed8f87a9 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -58,6 +58,7 @@  #include <linux/ratelimit.h>  #include <linux/page-isolation.h>  #include <linux/pagewalk.h> +#include <linux/shmem_fs.h>  #include "internal.h"  #include "ras/ras_event.h" @@ -722,7 +723,6 @@ static const char * const action_page_types[] = {  	[MF_MSG_KERNEL_HIGH_ORDER]	= "high-order kernel page",  	[MF_MSG_SLAB]			= "kernel slab page",  	[MF_MSG_DIFFERENT_COMPOUND]	= "different compound page after locking", -	[MF_MSG_POISONED_HUGE]		= "huge page already hardware poisoned",  	[MF_MSG_HUGE]			= "huge page",  	[MF_MSG_FREE_HUGE]		= "free huge page",  	[MF_MSG_NON_PMD_HUGE]		= "non-pmd-sized huge page", @@ -737,7 +737,6 @@ static const char * const action_page_types[] = {  	[MF_MSG_CLEAN_LRU]		= "clean LRU page",  	[MF_MSG_TRUNCATED_LRU]		= "already truncated LRU page",  	[MF_MSG_BUDDY]			= "free buddy page", -	[MF_MSG_BUDDY_2ND]		= "free buddy page (2nd try)",  	[MF_MSG_DAX]			= "dax page",  	[MF_MSG_UNSPLIT_THP]		= "unsplit thp",  	[MF_MSG_UNKNOWN]		= "unknown page", @@ -867,6 +866,7 @@ static int me_pagecache_clean(struct page_state *ps, struct page *p)  {  	int ret;  	struct address_space *mapping; +	bool extra_pins;  	delete_from_lru_cache(p); @@ -896,17 +896,23 @@ static int me_pagecache_clean(struct page_state *ps, struct page *p)  	}  	/* +	 * The shmem page is kept in page cache instead of truncating +	 * so is expected to have an extra refcount after error-handling. +	 */ +	extra_pins = shmem_mapping(mapping); + +	/*  	 * Truncation is a bit tricky. Enable it per file system for now.  	 *  	 * Open: to take i_rwsem or not for this? Right now we don't.  	 */  	ret = truncate_error_page(p, page_to_pfn(p), mapping); +	if (has_extra_refcount(ps, p, extra_pins)) +		ret = MF_FAILED; +  out:  	unlock_page(p); -	if (has_extra_refcount(ps, p, false)) -		ret = MF_FAILED; -  	return ret;  } @@ -1154,6 +1160,22 @@ static int page_action(struct page_state *ps, struct page *p,  	return (result == MF_RECOVERED || result == MF_DELAYED) ? 0 : -EBUSY;  } +static inline bool PageHWPoisonTakenOff(struct page *page) +{ +	return PageHWPoison(page) && page_private(page) == MAGIC_HWPOISON; +} + +void SetPageHWPoisonTakenOff(struct page *page) +{ +	set_page_private(page, MAGIC_HWPOISON); +} + +void ClearPageHWPoisonTakenOff(struct page *page) +{ +	if (PageHWPoison(page)) +		set_page_private(page, 0); +} +  /*   * Return true if a page type of a given page is supported by hwpoison   * mechanism (while handling could fail), otherwise false.  This function @@ -1256,6 +1278,27 @@ out:  	return ret;  } +static int __get_unpoison_page(struct page *page) +{ +	struct page *head = compound_head(page); +	int ret = 0; +	bool hugetlb = false; + +	ret = get_hwpoison_huge_page(head, &hugetlb); +	if (hugetlb) +		return ret; + +	/* +	 * PageHWPoisonTakenOff pages are not only marked as PG_hwpoison, +	 * but also isolated from buddy freelist, so need to identify the +	 * state and have to cancel both operations to unpoison. +	 */ +	if (PageHWPoisonTakenOff(page)) +		return -EHWPOISON; + +	return get_page_unless_zero(page) ? 1 : 0; +} +  /**   * get_hwpoison_page() - Get refcount for memory error handling   * @p:		Raw error page (hit by memory error) @@ -1263,7 +1306,7 @@ out:   *   * get_hwpoison_page() takes a page refcount of an error page to handle memory   * error on it, after checking that the error page is in a well-defined state - * (defined as a page-type we can successfully handle the memor error on it, + * (defined as a page-type we can successfully handle the memory error on it,   * such as LRU page and hugetlb page).   *   * Memory error handling could be triggered at any time on any type of page, @@ -1272,18 +1315,26 @@ out:   * extra care for the error page's state (as done in __get_hwpoison_page()),   * and has some retry logic in get_any_page().   * + * When called from unpoison_memory(), the caller should already ensure that + * the given page has PG_hwpoison. So it's never reused for other page + * allocations, and __get_unpoison_page() never races with them. + *   * Return: 0 on failure,   *         1 on success for in-use pages in a well-defined state,   *         -EIO for pages on which we can not handle memory errors,   *         -EBUSY when get_hwpoison_page() has raced with page lifecycle - *         operations like allocation and free. + *         operations like allocation and free, + *         -EHWPOISON when the page is hwpoisoned and taken off from buddy.   */  static int get_hwpoison_page(struct page *p, unsigned long flags)  {  	int ret;  	zone_pcp_disable(page_zone(p)); -	ret = get_any_page(p, flags); +	if (flags & MF_UNPOISON) +		ret = __get_unpoison_page(p); +	else +		ret = get_any_page(p, flags);  	zone_pcp_enable(page_zone(p));  	return ret; @@ -1494,14 +1545,6 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)  	lock_page(head);  	page_flags = head->flags; -	if (!PageHWPoison(head)) { -		pr_err("Memory failure: %#lx: just unpoisoned\n", pfn); -		num_poisoned_pages_dec(); -		unlock_page(head); -		put_page(head); -		return 0; -	} -  	/*  	 * TODO: hwpoison for pud-sized hugetlb doesn't work right now, so  	 * simply disable it. In order to make it work properly, we need @@ -1553,6 +1596,12 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,  	}  	/* +	 * Pages instantiated by device-dax (not filesystem-dax) +	 * may be compound pages. +	 */ +	page = compound_head(page); + +	/*  	 * Prevent the inode from being freed while we are interrogating  	 * the address_space, typically this would be handled by  	 * lock_page(), but dax pages do not use the page lock. This @@ -1615,6 +1664,8 @@ out:  	return rc;  } +static DEFINE_MUTEX(mf_mutex); +  /**   * memory_failure - Handle memory failure of a page.   * @pfn: Page Number of the corrupted page @@ -1641,7 +1692,6 @@ int memory_failure(unsigned long pfn, int flags)  	int res = 0;  	unsigned long page_flags;  	bool retry = true; -	static DEFINE_MUTEX(mf_mutex);  	if (!sysctl_memory_failure_recovery)  		panic("Memory failure on page %lx", pfn); @@ -1782,16 +1832,6 @@ try_again:  	 */  	page_flags = p->flags; -	/* -	 * unpoison always clear PG_hwpoison inside page lock -	 */ -	if (!PageHWPoison(p)) { -		pr_err("Memory failure: %#lx: just unpoisoned\n", pfn); -		num_poisoned_pages_dec(); -		unlock_page(p); -		put_page(p); -		goto unlock_mutex; -	}  	if (hwpoison_filter(p)) {  		if (TestClearPageHWPoison(p))  			num_poisoned_pages_dec(); @@ -1955,6 +1995,28 @@ core_initcall(memory_failure_init);  		pr_info(fmt, pfn);			\  }) +static inline int clear_page_hwpoison(struct ratelimit_state *rs, struct page *p) +{ +	if (TestClearPageHWPoison(p)) { +		unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n", +				 page_to_pfn(p), rs); +		num_poisoned_pages_dec(); +		return 1; +	} +	return 0; +} + +static inline int unpoison_taken_off_page(struct ratelimit_state *rs, +					  struct page *p) +{ +	if (put_page_back_buddy(p)) { +		unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n", +				 page_to_pfn(p), rs); +		return 0; +	} +	return -EBUSY; +} +  /**   * unpoison_memory - Unpoison a previously poisoned page   * @pfn: Page number of the to be unpoisoned page @@ -1971,8 +2033,7 @@ int unpoison_memory(unsigned long pfn)  {  	struct page *page;  	struct page *p; -	int freeit = 0; -	unsigned long flags = 0; +	int ret = -EBUSY;  	static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,  					DEFAULT_RATELIMIT_BURST); @@ -1982,69 +2043,60 @@ int unpoison_memory(unsigned long pfn)  	p = pfn_to_page(pfn);  	page = compound_head(p); +	mutex_lock(&mf_mutex); +  	if (!PageHWPoison(p)) {  		unpoison_pr_info("Unpoison: Page was already unpoisoned %#lx\n",  				 pfn, &unpoison_rs); -		return 0; +		goto unlock_mutex;  	}  	if (page_count(page) > 1) {  		unpoison_pr_info("Unpoison: Someone grabs the hwpoison page %#lx\n",  				 pfn, &unpoison_rs); -		return 0; +		goto unlock_mutex;  	}  	if (page_mapped(page)) {  		unpoison_pr_info("Unpoison: Someone maps the hwpoison page %#lx\n",  				 pfn, &unpoison_rs); -		return 0; +		goto unlock_mutex;  	}  	if (page_mapping(page)) {  		unpoison_pr_info("Unpoison: the hwpoison page has non-NULL mapping %#lx\n",  				 pfn, &unpoison_rs); -		return 0; -	} - -	/* -	 * unpoison_memory() can encounter thp only when the thp is being -	 * worked by memory_failure() and the page lock is not held yet. -	 * In such case, we yield to memory_failure() and make unpoison fail. -	 */ -	if (!PageHuge(page) && PageTransHuge(page)) { -		unpoison_pr_info("Unpoison: Memory failure is now running on %#lx\n", -				 pfn, &unpoison_rs); -		return 0; +		goto unlock_mutex;  	} -	if (!get_hwpoison_page(p, flags)) { -		if (TestClearPageHWPoison(p)) -			num_poisoned_pages_dec(); -		unpoison_pr_info("Unpoison: Software-unpoisoned free page %#lx\n", -				 pfn, &unpoison_rs); -		return 0; -	} +	if (PageSlab(page) || PageTable(page)) +		goto unlock_mutex; -	lock_page(page); -	/* -	 * This test is racy because PG_hwpoison is set outside of page lock. -	 * That's acceptable because that won't trigger kernel panic. Instead, -	 * the PG_hwpoison page will be caught and isolated on the entrance to -	 * the free buddy page pool. -	 */ -	if (TestClearPageHWPoison(page)) { -		unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n", -				 pfn, &unpoison_rs); -		num_poisoned_pages_dec(); -		freeit = 1; -	} -	unlock_page(page); +	ret = get_hwpoison_page(p, MF_UNPOISON); +	if (!ret) { +		if (clear_page_hwpoison(&unpoison_rs, page)) +			ret = 0; +		else +			ret = -EBUSY; +	} else if (ret < 0) { +		if (ret == -EHWPOISON) { +			ret = unpoison_taken_off_page(&unpoison_rs, p); +		} else +			unpoison_pr_info("Unpoison: failed to grab page %#lx\n", +					 pfn, &unpoison_rs); +	} else { +		int freeit = clear_page_hwpoison(&unpoison_rs, p); -	put_page(page); -	if (freeit && !(pfn == my_zero_pfn(0) && page_count(p) == 1))  		put_page(page); +		if (freeit && !(pfn == my_zero_pfn(0) && page_count(p) == 1)) { +			put_page(page); +			ret = 0; +		} +	} -	return 0; +unlock_mutex: +	mutex_unlock(&mf_mutex); +	return ret;  }  EXPORT_SYMBOL(unpoison_memory); @@ -2225,9 +2277,12 @@ int soft_offline_page(unsigned long pfn, int flags)  		return -EIO;  	} +	mutex_lock(&mf_mutex); +  	if (PageHWPoison(page)) {  		pr_info("%s: %#lx page already poisoned\n", __func__, pfn);  		put_ref_page(ref_page); +		mutex_unlock(&mf_mutex);  		return 0;  	} @@ -2246,5 +2301,7 @@ retry:  		}  	} +	mutex_unlock(&mf_mutex); +  	return ret;  } |