aboutsummaryrefslogtreecommitdiff
path: root/mm/memory-failure.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory-failure.c')
-rw-r--r--mm/memory-failure.c345
1 files changed, 212 insertions, 133 deletions
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index f1c389f7e669..dcb6bb9cf731 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -58,6 +58,7 @@
#include <linux/ratelimit.h>
#include <linux/page-isolation.h>
#include <linux/pagewalk.h>
+#include <linux/shmem_fs.h>
#include "internal.h"
#include "ras/ras_event.h"
@@ -129,12 +130,6 @@ static int hwpoison_filter_dev(struct page *p)
hwpoison_filter_dev_minor == ~0U)
return 0;
- /*
- * page_mapping() does not accept slab pages.
- */
- if (PageSlab(p))
- return -EINVAL;
-
mapping = page_mapping(p);
if (mapping == NULL || mapping->host == NULL)
return -EINVAL;
@@ -257,16 +252,13 @@ static int kill_proc(struct to_kill *tk, unsigned long pfn, int flags)
pr_err("Memory failure: %#lx: Sending SIGBUS to %s:%d due to hardware memory corruption\n",
pfn, t->comm, t->pid);
- if (flags & MF_ACTION_REQUIRED) {
- if (t == current)
- ret = force_sig_mceerr(BUS_MCEERR_AR,
- (void __user *)tk->addr, addr_lsb);
- else
- /* Signal other processes sharing the page if they have PF_MCE_EARLY set. */
- ret = send_sig_mceerr(BUS_MCEERR_AO, (void __user *)tk->addr,
- addr_lsb, t);
- } else {
+ if ((flags & MF_ACTION_REQUIRED) && (t == current))
+ ret = force_sig_mceerr(BUS_MCEERR_AR,
+ (void __user *)tk->addr, addr_lsb);
+ else
/*
+ * Signal other processes sharing the page if they have
+ * PF_MCE_EARLY set.
* Don't use force here, it's convenient if the signal
* can be temporarily blocked.
* This could cause a loop when the user sets SIGBUS
@@ -274,7 +266,6 @@ static int kill_proc(struct to_kill *tk, unsigned long pfn, int flags)
*/
ret = send_sig_mceerr(BUS_MCEERR_AO, (void __user *)tk->addr,
addr_lsb, t); /* synchronous? */
- }
if (ret < 0)
pr_info("Memory failure: Error sending signal to %s:%d: %d\n",
t->comm, t->pid, ret);
@@ -314,6 +305,7 @@ static unsigned long dev_pagemap_mapping_shift(struct page *page,
pmd_t *pmd;
pte_t *pte;
+ VM_BUG_ON_VMA(address == -EFAULT, vma);
pgd = pgd_offset(vma->vm_mm, address);
if (!pgd_present(*pgd))
return 0;
@@ -486,12 +478,13 @@ static struct task_struct *task_early_kill(struct task_struct *tsk,
static void collect_procs_anon(struct page *page, struct list_head *to_kill,
int force_early)
{
+ struct folio *folio = page_folio(page);
struct vm_area_struct *vma;
struct task_struct *tsk;
struct anon_vma *av;
pgoff_t pgoff;
- av = page_lock_anon_vma_read(page);
+ av = folio_lock_anon_vma_read(folio);
if (av == NULL) /* Not actually mapped anymore */
return;
@@ -706,8 +699,10 @@ static int kill_accessing_process(struct task_struct *p, unsigned long pfn,
(void *)&priv);
if (ret == 1 && priv.tk.addr)
kill_proc(&priv.tk, pfn, flags);
+ else
+ ret = 0;
mmap_read_unlock(p->mm);
- return ret ? -EFAULT : -EHWPOISON;
+ return ret > 0 ? -EHWPOISON : -EFAULT;
}
static const char *action_name[] = {
@@ -722,7 +717,6 @@ static const char * const action_page_types[] = {
[MF_MSG_KERNEL_HIGH_ORDER] = "high-order kernel page",
[MF_MSG_SLAB] = "kernel slab page",
[MF_MSG_DIFFERENT_COMPOUND] = "different compound page after locking",
- [MF_MSG_POISONED_HUGE] = "huge page already hardware poisoned",
[MF_MSG_HUGE] = "huge page",
[MF_MSG_FREE_HUGE] = "free huge page",
[MF_MSG_NON_PMD_HUGE] = "non-pmd-sized huge page",
@@ -737,9 +731,9 @@ static const char * const action_page_types[] = {
[MF_MSG_CLEAN_LRU] = "clean LRU page",
[MF_MSG_TRUNCATED_LRU] = "already truncated LRU page",
[MF_MSG_BUDDY] = "free buddy page",
- [MF_MSG_BUDDY_2ND] = "free buddy page (2nd try)",
[MF_MSG_DAX] = "dax page",
[MF_MSG_UNSPLIT_THP] = "unsplit thp",
+ [MF_MSG_DIFFERENT_PAGE_SIZE] = "different page size",
[MF_MSG_UNKNOWN] = "unknown page",
};
@@ -867,6 +861,7 @@ static int me_pagecache_clean(struct page_state *ps, struct page *p)
{
int ret;
struct address_space *mapping;
+ bool extra_pins;
delete_from_lru_cache(p);
@@ -896,17 +891,23 @@ static int me_pagecache_clean(struct page_state *ps, struct page *p)
}
/*
+ * The shmem page is kept in page cache instead of truncating
+ * so is expected to have an extra refcount after error-handling.
+ */
+ extra_pins = shmem_mapping(mapping);
+
+ /*
* Truncation is a bit tricky. Enable it per file system for now.
*
* Open: to take i_rwsem or not for this? Right now we don't.
*/
ret = truncate_error_page(p, page_to_pfn(p), mapping);
+ if (has_extra_refcount(ps, p, extra_pins))
+ ret = MF_FAILED;
+
out:
unlock_page(p);
- if (has_extra_refcount(ps, p, false))
- ret = MF_FAILED;
-
return ret;
}
@@ -1154,18 +1155,40 @@ static int page_action(struct page_state *ps, struct page *p,
return (result == MF_RECOVERED || result == MF_DELAYED) ? 0 : -EBUSY;
}
+static inline bool PageHWPoisonTakenOff(struct page *page)
+{
+ return PageHWPoison(page) && page_private(page) == MAGIC_HWPOISON;
+}
+
+void SetPageHWPoisonTakenOff(struct page *page)
+{
+ set_page_private(page, MAGIC_HWPOISON);
+}
+
+void ClearPageHWPoisonTakenOff(struct page *page)
+{
+ if (PageHWPoison(page))
+ set_page_private(page, 0);
+}
+
/*
* Return true if a page type of a given page is supported by hwpoison
* mechanism (while handling could fail), otherwise false. This function
* does not return true for hugetlb or device memory pages, so it's assumed
* to be called only in the context where we never have such pages.
*/
-static inline bool HWPoisonHandlable(struct page *page)
+static inline bool HWPoisonHandlable(struct page *page, unsigned long flags)
{
- return PageLRU(page) || __PageMovable(page) || is_free_buddy_page(page);
+ bool movable = false;
+
+ /* Soft offline could mirgate non-LRU movable pages */
+ if ((flags & MF_SOFT_OFFLINE) && __PageMovable(page))
+ movable = true;
+
+ return movable || PageLRU(page) || is_free_buddy_page(page);
}
-static int __get_hwpoison_page(struct page *page)
+static int __get_hwpoison_page(struct page *page, unsigned long flags)
{
struct page *head = compound_head(page);
int ret = 0;
@@ -1180,7 +1203,7 @@ static int __get_hwpoison_page(struct page *page)
* for any unsupported type of page in order to reduce the risk of
* unexpected races caused by taking a page refcount.
*/
- if (!HWPoisonHandlable(head))
+ if (!HWPoisonHandlable(head, flags))
return -EBUSY;
if (get_page_unless_zero(head)) {
@@ -1205,7 +1228,7 @@ static int get_any_page(struct page *p, unsigned long flags)
try_again:
if (!count_increased) {
- ret = __get_hwpoison_page(p);
+ ret = __get_hwpoison_page(p, flags);
if (!ret) {
if (page_count(p)) {
/* We raced with an allocation, retry. */
@@ -1233,7 +1256,7 @@ try_again:
}
}
- if (PageHuge(p) || HWPoisonHandlable(p)) {
+ if (PageHuge(p) || HWPoisonHandlable(p, flags)) {
ret = 1;
} else {
/*
@@ -1256,6 +1279,27 @@ out:
return ret;
}
+static int __get_unpoison_page(struct page *page)
+{
+ struct page *head = compound_head(page);
+ int ret = 0;
+ bool hugetlb = false;
+
+ ret = get_hwpoison_huge_page(head, &hugetlb);
+ if (hugetlb)
+ return ret;
+
+ /*
+ * PageHWPoisonTakenOff pages are not only marked as PG_hwpoison,
+ * but also isolated from buddy freelist, so need to identify the
+ * state and have to cancel both operations to unpoison.
+ */
+ if (PageHWPoisonTakenOff(page))
+ return -EHWPOISON;
+
+ return get_page_unless_zero(page) ? 1 : 0;
+}
+
/**
* get_hwpoison_page() - Get refcount for memory error handling
* @p: Raw error page (hit by memory error)
@@ -1263,7 +1307,7 @@ out:
*
* get_hwpoison_page() takes a page refcount of an error page to handle memory
* error on it, after checking that the error page is in a well-defined state
- * (defined as a page-type we can successfully handle the memor error on it,
+ * (defined as a page-type we can successfully handle the memory error on it,
* such as LRU page and hugetlb page).
*
* Memory error handling could be triggered at any time on any type of page,
@@ -1272,18 +1316,26 @@ out:
* extra care for the error page's state (as done in __get_hwpoison_page()),
* and has some retry logic in get_any_page().
*
+ * When called from unpoison_memory(), the caller should already ensure that
+ * the given page has PG_hwpoison. So it's never reused for other page
+ * allocations, and __get_unpoison_page() never races with them.
+ *
* Return: 0 on failure,
* 1 on success for in-use pages in a well-defined state,
* -EIO for pages on which we can not handle memory errors,
* -EBUSY when get_hwpoison_page() has raced with page lifecycle
- * operations like allocation and free.
+ * operations like allocation and free,
+ * -EHWPOISON when the page is hwpoisoned and taken off from buddy.
*/
static int get_hwpoison_page(struct page *p, unsigned long flags)
{
int ret;
zone_pcp_disable(page_zone(p));
- ret = get_any_page(p, flags);
+ if (flags & MF_UNPOISON)
+ ret = __get_unpoison_page(p);
+ else
+ ret = get_any_page(p, flags);
zone_pcp_enable(page_zone(p));
return ret;
@@ -1296,6 +1348,7 @@ static int get_hwpoison_page(struct page *p, unsigned long flags)
static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
int flags, struct page *hpage)
{
+ struct folio *folio = page_folio(hpage);
enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_SYNC;
struct address_space *mapping;
LIST_HEAD(tokill);
@@ -1360,26 +1413,22 @@ static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
if (kill)
collect_procs(hpage, &tokill, flags & MF_ACTION_REQUIRED);
- if (!PageHuge(hpage)) {
- try_to_unmap(hpage, ttu);
+ if (PageHuge(hpage) && !PageAnon(hpage)) {
+ /*
+ * For hugetlb pages in shared mappings, try_to_unmap
+ * could potentially call huge_pmd_unshare. Because of
+ * this, take semaphore in write mode here and set
+ * TTU_RMAP_LOCKED to indicate we have taken the lock
+ * at this higher level.
+ */
+ mapping = hugetlb_page_mapping_lock_write(hpage);
+ if (mapping) {
+ try_to_unmap(folio, ttu|TTU_RMAP_LOCKED);
+ i_mmap_unlock_write(mapping);
+ } else
+ pr_info("Memory failure: %#lx: could not lock mapping for mapped huge page\n", pfn);
} else {
- if (!PageAnon(hpage)) {
- /*
- * For hugetlb pages in shared mappings, try_to_unmap
- * could potentially call huge_pmd_unshare. Because of
- * this, take semaphore in write mode here and set
- * TTU_RMAP_LOCKED to indicate we have taken the lock
- * at this higher level.
- */
- mapping = hugetlb_page_mapping_lock_write(hpage);
- if (mapping) {
- try_to_unmap(hpage, ttu|TTU_RMAP_LOCKED);
- i_mmap_unlock_write(mapping);
- } else
- pr_info("Memory failure: %#lx: could not lock mapping for mapped huge page\n", pfn);
- } else {
- try_to_unmap(hpage, ttu);
- }
+ try_to_unmap(folio, ttu);
}
unmap_success = !page_mapped(hpage);
@@ -1475,7 +1524,7 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
if (TestClearPageHWPoison(head))
num_poisoned_pages_dec();
unlock_page(head);
- return 0;
+ return -EOPNOTSUPP;
}
unlock_page(head);
res = MF_FAILED;
@@ -1492,14 +1541,25 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags)
}
lock_page(head);
+
+ /*
+ * The page could have changed compound pages due to race window.
+ * If this happens just bail out.
+ */
+ if (!PageHuge(p) || compound_head(p) != head) {
+ action_result(pfn, MF_MSG_DIFFERENT_PAGE_SIZE, MF_IGNORED);
+ res = -EBUSY;
+ goto out;
+ }
+
page_flags = head->flags;
- if (!PageHWPoison(head)) {
- pr_err("Memory failure: %#lx: just unpoisoned\n", pfn);
- num_poisoned_pages_dec();
- unlock_page(head);
- put_page(head);
- return 0;
+ if (hwpoison_filter(p)) {
+ if (TestClearPageHWPoison(head))
+ num_poisoned_pages_dec();
+ put_page(p);
+ res = -EOPNOTSUPP;
+ goto out;
}
/*
@@ -1553,6 +1613,12 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
}
/*
+ * Pages instantiated by device-dax (not filesystem-dax)
+ * may be compound pages.
+ */
+ page = compound_head(page);
+
+ /*
* Prevent the inode from being freed while we are interrogating
* the address_space, typically this would be handled by
* lock_page(), but dax pages do not use the page lock. This
@@ -1564,7 +1630,7 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
goto out;
if (hwpoison_filter(page)) {
- rc = 0;
+ rc = -EOPNOTSUPP;
goto unlock;
}
@@ -1589,7 +1655,7 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
* SIGBUS (i.e. MF_MUST_KILL)
*/
flags |= MF_ACTION_REQUIRED | MF_MUST_KILL;
- collect_procs(page, &tokill, flags & MF_ACTION_REQUIRED);
+ collect_procs(page, &tokill, true);
list_for_each_entry(tk, &tokill, nd)
if (tk->size_shift)
@@ -1604,7 +1670,7 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
start = (page->index << PAGE_SHIFT) & ~(size - 1);
unmap_mapping_range(page->mapping, start, size, 0);
}
- kill_procs(&tokill, flags & MF_MUST_KILL, false, pfn, flags);
+ kill_procs(&tokill, true, false, pfn, flags);
rc = 0;
unlock:
dax_unlock_page(page, cookie);
@@ -1615,6 +1681,8 @@ out:
return rc;
}
+static DEFINE_MUTEX(mf_mutex);
+
/**
* memory_failure - Handle memory failure of a page.
* @pfn: Page Number of the corrupted page
@@ -1631,17 +1699,19 @@ out:
*
* Must run in process context (e.g. a work queue) with interrupts
* enabled and no spinlocks hold.
+ *
+ * Return: 0 for successfully handled the memory error,
+ * -EOPNOTSUPP for memory_filter() filtered the error event,
+ * < 0(except -EOPNOTSUPP) on failure.
*/
int memory_failure(unsigned long pfn, int flags)
{
struct page *p;
struct page *hpage;
- struct page *orig_head;
struct dev_pagemap *pgmap;
int res = 0;
unsigned long page_flags;
bool retry = true;
- static DEFINE_MUTEX(mf_mutex);
if (!sysctl_memory_failure_recovery)
panic("Memory failure on page %lx", pfn);
@@ -1683,7 +1753,7 @@ try_again:
goto unlock_mutex;
}
- orig_head = hpage = compound_head(p);
+ hpage = compound_head(p);
num_poisoned_pages_inc();
/*
@@ -1764,10 +1834,21 @@ try_again:
lock_page(p);
/*
- * The page could have changed compound pages during the locking.
- * If this happens just bail out.
+ * We're only intended to deal with the non-Compound page here.
+ * However, the page could have changed compound pages due to
+ * race window. If this happens, we could try again to hopefully
+ * handle the page next round.
*/
- if (PageCompound(p) && compound_head(p) != orig_head) {
+ if (PageCompound(p)) {
+ if (retry) {
+ if (TestClearPageHWPoison(p))
+ num_poisoned_pages_dec();
+ unlock_page(p);
+ put_page(p);
+ flags &= ~MF_COUNT_INCREASED;
+ retry = false;
+ goto try_again;
+ }
action_result(pfn, MF_MSG_DIFFERENT_COMPOUND, MF_IGNORED);
res = -EBUSY;
goto unlock_page;
@@ -1782,21 +1863,12 @@ try_again:
*/
page_flags = p->flags;
- /*
- * unpoison always clear PG_hwpoison inside page lock
- */
- if (!PageHWPoison(p)) {
- pr_err("Memory failure: %#lx: just unpoisoned\n", pfn);
- num_poisoned_pages_dec();
- unlock_page(p);
- put_page(p);
- goto unlock_mutex;
- }
if (hwpoison_filter(p)) {
if (TestClearPageHWPoison(p))
num_poisoned_pages_dec();
unlock_page(p);
put_page(p);
+ res = -EOPNOTSUPP;
goto unlock_mutex;
}
@@ -1805,7 +1877,7 @@ try_again:
* page_lock. We need wait writeback completion for this page or it
* may trigger vfs BUG while evict inode.
*/
- if (!PageTransTail(p) && !PageLRU(p) && !PageWriteback(p))
+ if (!PageLRU(p) && !PageWriteback(p))
goto identify_page_state;
/*
@@ -1955,6 +2027,28 @@ core_initcall(memory_failure_init);
pr_info(fmt, pfn); \
})
+static inline int clear_page_hwpoison(struct ratelimit_state *rs, struct page *p)
+{
+ if (TestClearPageHWPoison(p)) {
+ unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
+ page_to_pfn(p), rs);
+ num_poisoned_pages_dec();
+ return 1;
+ }
+ return 0;
+}
+
+static inline int unpoison_taken_off_page(struct ratelimit_state *rs,
+ struct page *p)
+{
+ if (put_page_back_buddy(p)) {
+ unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
+ page_to_pfn(p), rs);
+ return 0;
+ }
+ return -EBUSY;
+}
+
/**
* unpoison_memory - Unpoison a previously poisoned page
* @pfn: Page number of the to be unpoisoned page
@@ -1971,8 +2065,7 @@ int unpoison_memory(unsigned long pfn)
{
struct page *page;
struct page *p;
- int freeit = 0;
- unsigned long flags = 0;
+ int ret = -EBUSY;
static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
@@ -1982,69 +2075,60 @@ int unpoison_memory(unsigned long pfn)
p = pfn_to_page(pfn);
page = compound_head(p);
+ mutex_lock(&mf_mutex);
+
if (!PageHWPoison(p)) {
unpoison_pr_info("Unpoison: Page was already unpoisoned %#lx\n",
pfn, &unpoison_rs);
- return 0;
+ goto unlock_mutex;
}
if (page_count(page) > 1) {
unpoison_pr_info("Unpoison: Someone grabs the hwpoison page %#lx\n",
pfn, &unpoison_rs);
- return 0;
+ goto unlock_mutex;
}
if (page_mapped(page)) {
unpoison_pr_info("Unpoison: Someone maps the hwpoison page %#lx\n",
pfn, &unpoison_rs);
- return 0;
+ goto unlock_mutex;
}
if (page_mapping(page)) {
unpoison_pr_info("Unpoison: the hwpoison page has non-NULL mapping %#lx\n",
pfn, &unpoison_rs);
- return 0;
- }
-
- /*
- * unpoison_memory() can encounter thp only when the thp is being
- * worked by memory_failure() and the page lock is not held yet.
- * In such case, we yield to memory_failure() and make unpoison fail.
- */
- if (!PageHuge(page) && PageTransHuge(page)) {
- unpoison_pr_info("Unpoison: Memory failure is now running on %#lx\n",
- pfn, &unpoison_rs);
- return 0;
+ goto unlock_mutex;
}
- if (!get_hwpoison_page(p, flags)) {
- if (TestClearPageHWPoison(p))
- num_poisoned_pages_dec();
- unpoison_pr_info("Unpoison: Software-unpoisoned free page %#lx\n",
- pfn, &unpoison_rs);
- return 0;
- }
+ if (PageSlab(page) || PageTable(page))
+ goto unlock_mutex;
- lock_page(page);
- /*
- * This test is racy because PG_hwpoison is set outside of page lock.
- * That's acceptable because that won't trigger kernel panic. Instead,
- * the PG_hwpoison page will be caught and isolated on the entrance to
- * the free buddy page pool.
- */
- if (TestClearPageHWPoison(page)) {
- unpoison_pr_info("Unpoison: Software-unpoisoned page %#lx\n",
- pfn, &unpoison_rs);
- num_poisoned_pages_dec();
- freeit = 1;
- }
- unlock_page(page);
+ ret = get_hwpoison_page(p, MF_UNPOISON);
+ if (!ret) {
+ if (clear_page_hwpoison(&unpoison_rs, page))
+ ret = 0;
+ else
+ ret = -EBUSY;
+ } else if (ret < 0) {
+ if (ret == -EHWPOISON) {
+ ret = unpoison_taken_off_page(&unpoison_rs, p);
+ } else
+ unpoison_pr_info("Unpoison: failed to grab page %#lx\n",
+ pfn, &unpoison_rs);
+ } else {
+ int freeit = clear_page_hwpoison(&unpoison_rs, p);
- put_page(page);
- if (freeit && !(pfn == my_zero_pfn(0) && page_count(p) == 1))
put_page(page);
+ if (freeit && !(pfn == my_zero_pfn(0) && page_count(p) == 1)) {
+ put_page(page);
+ ret = 0;
+ }
+ }
- return 0;
+unlock_mutex:
+ mutex_unlock(&mf_mutex);
+ return ret;
}
EXPORT_SYMBOL(unpoison_memory);
@@ -2087,7 +2171,7 @@ static bool isolate_page(struct page *page, struct list_head *pagelist)
*/
static int __soft_offline_page(struct page *page)
{
- int ret = 0;
+ long ret = 0;
unsigned long pfn = page_to_pfn(page);
struct page *hpage = compound_head(page);
char const *msg_page[] = {"page", "hugepage"};
@@ -2098,12 +2182,6 @@ static int __soft_offline_page(struct page *page)
.gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
};
- /*
- * Check PageHWPoison again inside page lock because PageHWPoison
- * is set by memory_failure() outside page lock. Note that
- * memory_failure() also double-checks PageHWPoison inside page lock,
- * so there's no race between soft_offline_page() and memory_failure().
- */
lock_page(page);
if (!PageHuge(page))
wait_on_page_writeback(page);
@@ -2114,7 +2192,7 @@ static int __soft_offline_page(struct page *page)
return 0;
}
- if (!PageHuge(page))
+ if (!PageHuge(page) && PageLRU(page) && !PageSwapCache(page))
/*
* Try to invalidate first. This should work for
* non dirty unmapped page cache pages.
@@ -2122,10 +2200,6 @@ static int __soft_offline_page(struct page *page)
ret = invalidate_inode_page(page);
unlock_page(page);
- /*
- * RED-PEN would be better to keep it isolated here, but we
- * would need to fix isolation locking first.
- */
if (ret) {
pr_info("soft_offline: %#lx: invalidated\n", pfn);
page_handle_poison(page, false, true);
@@ -2144,7 +2218,7 @@ static int __soft_offline_page(struct page *page)
if (!list_empty(&pagelist))
putback_movable_pages(&pagelist);
- pr_info("soft offline: %#lx: %s migration failed %d, type %pGp\n",
+ pr_info("soft offline: %#lx: %s migration failed %ld, type %pGp\n",
pfn, msg_page[huge], ret, &page->flags);
if (ret > 0)
ret = -EBUSY;
@@ -2225,15 +2299,18 @@ int soft_offline_page(unsigned long pfn, int flags)
return -EIO;
}
+ mutex_lock(&mf_mutex);
+
if (PageHWPoison(page)) {
pr_info("%s: %#lx page already poisoned\n", __func__, pfn);
put_ref_page(ref_page);
+ mutex_unlock(&mf_mutex);
return 0;
}
retry:
get_online_mems();
- ret = get_hwpoison_page(page, flags);
+ ret = get_hwpoison_page(page, flags | MF_SOFT_OFFLINE);
put_online_mems();
if (ret > 0) {
@@ -2246,5 +2323,7 @@ retry:
}
}
+ mutex_unlock(&mf_mutex);
+
return ret;
}