aboutsummaryrefslogtreecommitdiff
path: root/mm/memory-failure.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r--mm/memory-failure.c170
1 files changed, 102 insertions, 68 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index fe121fdb05f7..4d6e43c88489 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -39,7 +39,6 @@
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/page-flags.h>
-#include <linux/kernel-page-flags.h>
#include <linux/sched/signal.h>
#include <linux/sched/task.h>
#include <linux/dax.h>
@@ -50,7 +49,6 @@
#include <linux/swap.h>
#include <linux/backing-dev.h>
#include <linux/migrate.h>
-#include <linux/suspend.h>
#include <linux/slab.h>
#include <linux/swapops.h>
#include <linux/hugetlb.h>
@@ -59,7 +57,6 @@
#include <linux/memremap.h>
#include <linux/kfifo.h>
#include <linux/ratelimit.h>
-#include <linux/page-isolation.h>
#include <linux/pagewalk.h>
#include <linux/shmem_fs.h>
#include <linux/sysctl.h>
@@ -75,13 +72,15 @@ atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0);
static bool hw_memory_failure __read_mostly = false;
-inline void num_poisoned_pages_inc(unsigned long pfn)
+static DEFINE_MUTEX(mf_mutex);
+
+void num_poisoned_pages_inc(unsigned long pfn)
{
atomic_long_inc(&num_poisoned_pages);
memblk_nr_poison_inc(pfn);
}
-inline void num_poisoned_pages_sub(unsigned long pfn, long i)
+void num_poisoned_pages_sub(unsigned long pfn, long i)
{
atomic_long_sub(i, &num_poisoned_pages);
if (pfn != -1UL)
@@ -363,17 +362,14 @@ void shake_page(struct page *p)
{
if (PageHuge(p))
return;
-
- if (!PageSlab(p)) {
- lru_add_drain_all();
- if (PageLRU(p) || is_free_buddy_page(p))
- return;
- }
-
/*
* TODO: Could shrink slab caches here if a lightweight range-based
* shrinker will be available.
*/
+ if (PageSlab(p))
+ return;
+
+ lru_add_drain_all();
}
EXPORT_SYMBOL_GPL(shake_page);
@@ -551,8 +547,8 @@ static void kill_procs(struct list_head *to_kill, int forcekill, bool fail,
* on behalf of the thread group. Return task_struct of the (first found)
* dedicated thread if found, and return NULL otherwise.
*
- * We already hold read_lock(&tasklist_lock) in the caller, so we don't
- * have to call rcu_read_lock/unlock() in this function.
+ * We already hold rcu lock in the caller, so we don't have to call
+ * rcu_read_lock/unlock() in this function.
*/
static struct task_struct *find_early_kill_thread(struct task_struct *tsk)
{
@@ -613,8 +609,8 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
return;
pgoff = page_to_pgoff(page);
- read_lock(&tasklist_lock);
- for_each_process (tsk) {
+ rcu_read_lock();
+ for_each_process(tsk) {
struct anon_vma_chain *vmac;
struct task_struct *t = task_early_kill(tsk, force_early);
@@ -630,7 +626,7 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
add_to_kill_anon_file(t, page, vma, to_kill);
}
}
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
anon_vma_unlock_read(av);
}
@@ -646,7 +642,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
pgoff_t pgoff;
i_mmap_lock_read(mapping);
- read_lock(&tasklist_lock);
+ rcu_read_lock();
pgoff = page_to_pgoff(page);
for_each_process(tsk) {
struct task_struct *t = task_early_kill(tsk, force_early);
@@ -658,7 +654,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
/*
* Send early kill signal to tasks where a vma covers
* the page but the corrupted page is not necessarily
- * mapped it in its pte.
+ * mapped in its pte.
* Assume applications who requested early kill want
* to be informed of all such data corruptions.
*/
@@ -666,7 +662,7 @@ static void collect_procs_file(struct page *page, struct list_head *to_kill,
add_to_kill_anon_file(t, page, vma, to_kill);
}
}
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
i_mmap_unlock_read(mapping);
}
@@ -689,7 +685,7 @@ static void collect_procs_fsdax(struct page *page,
struct task_struct *tsk;
i_mmap_lock_read(mapping);
- read_lock(&tasklist_lock);
+ rcu_read_lock();
for_each_process(tsk) {
struct task_struct *t = task_early_kill(tsk, true);
@@ -700,7 +696,7 @@ static void collect_procs_fsdax(struct page *page,
add_to_kill_fsdax(t, page, vma, to_kill, pgoff);
}
}
- read_unlock(&tasklist_lock);
+ rcu_read_unlock();
i_mmap_unlock_read(mapping);
}
#endif /* CONFIG_FS_DAX */
@@ -721,7 +717,7 @@ static void collect_procs(struct page *page, struct list_head *tokill,
collect_procs_file(page, tokill, force_early);
}
-struct hwp_walk {
+struct hwpoison_walk {
struct to_kill tk;
unsigned long pfn;
int flags;
@@ -756,7 +752,7 @@ static int check_hwpoisoned_entry(pte_t pte, unsigned long addr, short shift,
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
- struct hwp_walk *hwp)
+ struct hwpoison_walk *hwp)
{
pmd_t pmd = *pmdp;
unsigned long pfn;
@@ -774,7 +770,7 @@ static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
}
#else
static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
- struct hwp_walk *hwp)
+ struct hwpoison_walk *hwp)
{
return 0;
}
@@ -783,7 +779,7 @@ static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr,
static int hwpoison_pte_range(pmd_t *pmdp, unsigned long addr,
unsigned long end, struct mm_walk *walk)
{
- struct hwp_walk *hwp = walk->private;
+ struct hwpoison_walk *hwp = walk->private;
int ret = 0;
pte_t *ptep, *mapped_pte;
spinlock_t *ptl;
@@ -817,7 +813,7 @@ static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask,
unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
- struct hwp_walk *hwp = walk->private;
+ struct hwpoison_walk *hwp = walk->private;
pte_t pte = huge_ptep_get(ptep);
struct hstate *h = hstate_vma(walk->vma);
@@ -828,7 +824,7 @@ static int hwpoison_hugetlb_range(pte_t *ptep, unsigned long hmask,
#define hwpoison_hugetlb_range NULL
#endif
-static const struct mm_walk_ops hwp_walk_ops = {
+static const struct mm_walk_ops hwpoison_walk_ops = {
.pmd_entry = hwpoison_pte_range,
.hugetlb_entry = hwpoison_hugetlb_range,
.walk_lock = PGWALK_RDLOCK,
@@ -851,7 +847,7 @@ static int kill_accessing_process(struct task_struct *p, unsigned long pfn,
int flags)
{
int ret;
- struct hwp_walk priv = {
+ struct hwpoison_walk priv = {
.pfn = pfn,
};
priv.tk.tsk = p;
@@ -860,7 +856,7 @@ static int kill_accessing_process(struct task_struct *p, unsigned long pfn,
return -EFAULT;
mmap_read_lock(p->mm);
- ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwp_walk_ops,
+ ret = walk_page_range(p->mm, 0, TASK_SIZE, &hwpoison_walk_ops,
(void *)&priv);
if (ret == 1 && priv.tk.addr)
kill_proc(&priv.tk, pfn, flags);
@@ -940,14 +936,12 @@ static int truncate_error_page(struct page *p, unsigned long pfn,
struct folio *folio = page_folio(p);
int err = mapping->a_ops->error_remove_page(mapping, p);
- if (err != 0) {
+ if (err != 0)
pr_info("%#lx: Failed to punch page: %d\n", pfn, err);
- } else if (folio_has_private(folio) &&
- !filemap_release_folio(folio, GFP_NOIO)) {
+ else if (!filemap_release_folio(folio, GFP_NOIO))
pr_info("%#lx: failed to release buffers\n", pfn);
- } else {
+ else
ret = MF_RECOVERED;
- }
} else {
/*
* If the file system doesn't support it just invalidate
@@ -1193,9 +1187,6 @@ static int me_huge_page(struct page_state *ps, struct page *p)
struct address_space *mapping;
bool extra_pins = false;
- if (!PageHuge(hpage))
- return MF_DELAYED;
-
mapping = page_mapping(hpage);
if (mapping) {
res = truncate_error_page(hpage, page_to_pfn(p), mapping);
@@ -1395,8 +1386,15 @@ static int __get_hwpoison_page(struct page *page, unsigned long flags)
bool hugetlb = false;
ret = get_hwpoison_hugetlb_folio(folio, &hugetlb, false);
- if (hugetlb)
- return ret;
+ if (hugetlb) {
+ /* Make sure hugetlb demotion did not happen from under us. */
+ if (folio == page_folio(page))
+ return ret;
+ if (ret > 0) {
+ folio_put(folio);
+ folio = page_folio(page);
+ }
+ }
/*
* This check prevents from calling folio_try_get() for any
@@ -1485,8 +1483,13 @@ static int __get_unpoison_page(struct page *page)
bool hugetlb = false;
ret = get_hwpoison_hugetlb_folio(folio, &hugetlb, true);
- if (hugetlb)
- return ret;
+ if (hugetlb) {
+ /* Make sure hugetlb demotion did not happen from under us. */
+ if (folio == page_folio(page))
+ return ret;
+ if (ret > 0)
+ folio_put(folio);
+ }
/*
* PageHWPoisonTakenOff pages are not only marked as PG_hwpoison,
@@ -1559,7 +1562,7 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
* Here we are interested only in user-mapped pages, so skip any
* other types of pages.
*/
- if (PageReserved(p) || PageSlab(p) || PageTable(p))
+ if (PageReserved(p) || PageSlab(p) || PageTable(p) || PageOffline(p))
return true;
if (!(PageLRU(hpage) || PageHuge(p)))
return true;
@@ -1814,6 +1817,7 @@ EXPORT_SYMBOL_GPL(mf_dax_kill_procs);
#endif /* CONFIG_FS_DAX */
#ifdef CONFIG_HUGETLB_PAGE
+
/*
* Struct raw_hwp_page represents information about "raw error page",
* constructing singly linked list from ->_hugetlb_hwpoison field of folio.
@@ -1828,16 +1832,49 @@ static inline struct llist_head *raw_hwp_list_head(struct folio *folio)
return (struct llist_head *)&folio->_hugetlb_hwpoison;
}
+bool is_raw_hwpoison_page_in_hugepage(struct page *page)
+{
+ struct llist_head *raw_hwp_head;
+ struct raw_hwp_page *p;
+ struct folio *folio = page_folio(page);
+ bool ret = false;
+
+ if (!folio_test_hwpoison(folio))
+ return false;
+
+ if (!folio_test_hugetlb(folio))
+ return PageHWPoison(page);
+
+ /*
+ * When RawHwpUnreliable is set, kernel lost track of which subpages
+ * are HWPOISON. So return as if ALL subpages are HWPOISONed.
+ */
+ if (folio_test_hugetlb_raw_hwp_unreliable(folio))
+ return true;
+
+ mutex_lock(&mf_mutex);
+
+ raw_hwp_head = raw_hwp_list_head(folio);
+ llist_for_each_entry(p, raw_hwp_head->first, node) {
+ if (page == p->page) {
+ ret = true;
+ break;
+ }
+ }
+
+ mutex_unlock(&mf_mutex);
+
+ return ret;
+}
+
static unsigned long __folio_free_raw_hwp(struct folio *folio, bool move_flag)
{
- struct llist_head *head;
- struct llist_node *t, *tnode;
+ struct llist_node *head;
+ struct raw_hwp_page *p, *next;
unsigned long count = 0;
- head = raw_hwp_list_head(folio);
- llist_for_each_safe(tnode, t, head->first) {
- struct raw_hwp_page *p = container_of(tnode, struct raw_hwp_page, node);
-
+ head = llist_del_all(raw_hwp_list_head(folio));
+ llist_for_each_entry_safe(p, next, head, node) {
if (move_flag)
SetPageHWPoison(p->page);
else
@@ -1845,7 +1882,6 @@ static unsigned long __folio_free_raw_hwp(struct folio *folio, bool move_flag)
kfree(p);
count++;
}
- llist_del_all(head);
return count;
}
@@ -1853,7 +1889,7 @@ static int folio_set_hugetlb_hwpoison(struct folio *folio, struct page *page)
{
struct llist_head *head;
struct raw_hwp_page *raw_hwp;
- struct llist_node *t, *tnode;
+ struct raw_hwp_page *p, *next;
int ret = folio_test_set_hwpoison(folio) ? -EHWPOISON : 0;
/*
@@ -1864,9 +1900,7 @@ static int folio_set_hugetlb_hwpoison(struct folio *folio, struct page *page)
if (folio_test_hugetlb_raw_hwp_unreliable(folio))
return -EHWPOISON;
head = raw_hwp_list_head(folio);
- llist_for_each_safe(tnode, t, head->first) {
- struct raw_hwp_page *p = container_of(tnode, struct raw_hwp_page, node);
-
+ llist_for_each_entry_safe(p, next, head->first, node) {
if (p->page == page)
return -EHWPOISON;
}
@@ -1917,6 +1951,8 @@ void folio_clear_hugetlb_hwpoison(struct folio *folio)
{
if (folio_test_hugetlb_raw_hwp_unreliable(folio))
return;
+ if (folio_test_hugetlb_vmemmap_optimized(folio))
+ return;
folio_clear_hwpoison(folio);
folio_free_raw_hwp(folio, true);
}
@@ -2081,8 +2117,6 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
{
int rc = -ENXIO;
- put_ref_page(pfn, flags);
-
/* device metadata space is not recoverable */
if (!pgmap_pfn_valid(pgmap, pfn))
goto out;
@@ -2105,12 +2139,11 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
out:
/* drop pgmap ref acquired in caller */
put_dev_pagemap(pgmap);
- action_result(pfn, MF_MSG_DAX, rc ? MF_FAILED : MF_RECOVERED);
+ if (rc != -EOPNOTSUPP)
+ action_result(pfn, MF_MSG_DAX, rc ? MF_FAILED : MF_RECOVERED);
return rc;
}
-static DEFINE_MUTEX(mf_mutex);
-
/**
* memory_failure - Handle memory failure of a page.
* @pfn: Page Number of the corrupted page
@@ -2126,7 +2159,7 @@ static DEFINE_MUTEX(mf_mutex);
* detected by a background scrubber)
*
* Must run in process context (e.g. a work queue) with interrupts
- * enabled and no spinlocks hold.
+ * enabled and no spinlocks held.
*
* Return: 0 for successfully handled the memory error,
* -EOPNOTSUPP for hwpoison_filter() filtered the error event,
@@ -2158,6 +2191,7 @@ int memory_failure(unsigned long pfn, int flags)
if (pfn_valid(pfn)) {
pgmap = get_dev_pagemap(pfn, NULL);
+ put_ref_page(pfn, flags);
if (pgmap) {
res = memory_failure_dev_pagemap(pfn, flags,
pgmap);
@@ -2184,8 +2218,6 @@ try_again:
goto unlock_mutex;
}
- hpage = compound_head(p);
-
/*
* We need/can do nothing about count=0 pages.
* 1) it's a free page, and therefore in safe hand:
@@ -2224,13 +2256,14 @@ try_again:
}
}
+ hpage = compound_head(p);
if (PageTransHuge(hpage)) {
/*
* The flag must be set after the refcount is bumped
* otherwise it may race with THP split.
* And the flag can't be set in get_hwpoison_page() since
* it is called by soft offline too and it is just called
- * for !MF_COUNT_INCREASE. So here seems to be the best
+ * for !MF_COUNT_INCREASED. So here seems to be the best
* place.
*
* Don't need care about the above error handling paths for
@@ -2500,7 +2533,8 @@ int unpoison_memory(unsigned long pfn)
goto unlock_mutex;
}
- if (folio_test_slab(folio) || PageTable(&folio->page) || folio_test_reserved(folio))
+ if (folio_test_slab(folio) || PageTable(&folio->page) ||
+ folio_test_reserved(folio) || PageOffline(&folio->page))
goto unlock_mutex;
/*
@@ -2590,10 +2624,10 @@ static bool isolate_page(struct page *page, struct list_head *pagelist)
/*
* If we succeed to isolate the page, we grabbed another refcount on
- * the page, so we can safely drop the one we got from get_any_pages().
+ * the page, so we can safely drop the one we got from get_any_page().
* If we failed to isolate the page, it means that we cannot go further
* and we will return an error, so drop the reference we got from
- * get_any_pages() as well.
+ * get_any_page() as well.
*/
put_page(page);
return isolated;
@@ -2626,7 +2660,7 @@ static int soft_offline_in_use_page(struct page *page)
}
lock_page(page);
- if (!PageHuge(page))
+ if (!huge)
wait_on_page_writeback(page);
if (PageHWPoison(page)) {
unlock_page(page);
@@ -2635,7 +2669,7 @@ static int soft_offline_in_use_page(struct page *page)
return 0;
}
- if (!PageHuge(page) && PageLRU(page) && !PageSwapCache(page))
+ if (!huge && PageLRU(page) && !PageSwapCache(page))
/*
* Try to invalidate first. This should work for
* non dirty unmapped page cache pages.