From 1f503443e7df8dc8366608b4d810ce2d6669827c Mon Sep 17 00:00:00 2001 From: Pingfan Liu Date: Thu, 30 Jan 2020 22:11:10 -0800 Subject: mm/sparse.c: reset section's mem_map when fully deactivated After commit ba72b4c8cf60 ("mm/sparsemem: support sub-section hotplug"), when a mem section is fully deactivated, section_mem_map still records the section's start pfn, which is not used any more and will be reassigned during re-addition. In analogy with alloc/free pattern, it is better to clear all fields of section_mem_map. Beside this, it breaks the user space tool "makedumpfile" [1], which makes assumption that a hot-removed section has mem_map as NULL, instead of checking directly against SECTION_MARKED_PRESENT bit. (makedumpfile will be better to change the assumption, and need a patch) The bug can be reproduced on IBM POWERVM by "drmgr -c mem -r -q 5" , trigger a crash, and save vmcore by makedumpfile [1]: makedumpfile, commit e73016540293 ("[v1.6.7] Update version") Link: http://lkml.kernel.org/r/1579487594-28889-1-git-send-email-kernelfans@gmail.com Signed-off-by: Pingfan Liu Acked-by: Michal Hocko Acked-by: David Hildenbrand Cc: Dan Williams Cc: Oscar Salvador Cc: Baoquan He Cc: Qian Cai Cc: Kazuhito Hagio Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/sparse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/sparse.c') diff --git a/mm/sparse.c b/mm/sparse.c index 3822ecbd8a1f..3918fc3eaef1 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -789,7 +789,7 @@ static void section_deactivate(unsigned long pfn, unsigned long nr_pages, ms->usage = NULL; } memmap = sparse_decode_mem_map(ms->section_mem_map, section_nr); - ms->section_mem_map = sparse_encode_mem_map(NULL, section_nr); + ms->section_mem_map = (unsigned long)NULL; } if (section_is_early && memmap) -- cgit From 4c6058814ec4460c25111e29452ef596acdcd61b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 3 Feb 2020 17:34:02 -0800 Subject: mm: factor out next_present_section_nr() Let's move it to the header and use the shorter variant from mm/page_alloc.c (the original one will also check "__highest_present_section_nr + 1", which is not necessary). While at it, make the section_nr in next_pfn() const. In next_pfn(), we now return section_nr_to_pfn(-1) instead of -1 once we exceed __highest_present_section_nr, which doesn't make a difference in the caller as it is big enough (>= all sane end_pfn). Link: http://lkml.kernel.org/r/20200113144035.10848-3-david@redhat.com Signed-off-by: David Hildenbrand Acked-by: Kirill A. Shutemov Cc: Baoquan He Cc: Dan Williams Cc: "Jin, Zhi" Cc: "Kirill A. Shutemov" Cc: Mel Gorman Cc: Michal Hocko Cc: Michal Hocko Cc: Oscar Salvador Cc: Pavel Tatashin Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 10 ++++++++++ mm/page_alloc.c | 11 ++--------- mm/sparse.c | 10 ---------- 3 files changed, 12 insertions(+), 19 deletions(-) (limited to 'mm/sparse.c') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index c2bc309d1634..462f6873905a 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -1379,6 +1379,16 @@ static inline int pfn_present(unsigned long pfn) return present_section(__nr_to_section(pfn_to_section_nr(pfn))); } +static inline unsigned long next_present_section_nr(unsigned long section_nr) +{ + while (++section_nr <= __highest_present_section_nr) { + if (present_section_nr(section_nr)) + return section_nr; + } + + return -1; +} + /* * These are _only_ used during initialisation, therefore they * can use __initdata ... They could have names to indicate diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 461ed73bc30f..494f74a1725d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5852,18 +5852,11 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn) /* Skip PFNs that belong to non-present sections */ static inline __meminit unsigned long next_pfn(unsigned long pfn) { - unsigned long section_nr; + const unsigned long section_nr = pfn_to_section_nr(++pfn); - section_nr = pfn_to_section_nr(++pfn); if (present_section_nr(section_nr)) return pfn; - - while (++section_nr <= __highest_present_section_nr) { - if (present_section_nr(section_nr)) - return section_nr_to_pfn(section_nr); - } - - return -1; + return section_nr_to_pfn(next_present_section_nr(section_nr)); } #else static inline __meminit unsigned long next_pfn(unsigned long pfn) diff --git a/mm/sparse.c b/mm/sparse.c index 3918fc3eaef1..c184b69460b7 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -198,16 +198,6 @@ static void section_mark_present(struct mem_section *ms) ms->section_mem_map |= SECTION_MARKED_PRESENT; } -static inline unsigned long next_present_section_nr(unsigned long section_nr) -{ - do { - section_nr++; - if (present_section_nr(section_nr)) - return section_nr; - } while ((section_nr <= __highest_present_section_nr)); - - return -1; -} #define for_each_present_section_nr(start, section_nr) \ for (section_nr = next_present_section_nr(start-1); \ ((section_nr != -1) && \ -- cgit From 18e19f195cd888f65643a77a0c6aee8f5be6439a Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Thu, 20 Feb 2020 20:04:27 -0800 Subject: mm/sparsemem: pfn_to_page is not valid yet on SPARSEMEM When we use SPARSEMEM instead of SPARSEMEM_VMEMMAP, pfn_to_page() doesn't work before sparse_init_one_section() is called. This leads to a crash when hotplug memory: BUG: unable to handle page fault for address: 0000000006400000 #PF: supervisor write access in kernel mode #PF: error_code(0x0002) - not-present page PGD 0 P4D 0 Oops: 0002 [#1] SMP PTI CPU: 3 PID: 221 Comm: kworker/u16:1 Tainted: G W 5.5.0-next-20200205+ #343 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 0.0.0 02/06/2015 Workqueue: kacpi_hotplug acpi_hotplug_work_fn RIP: 0010:__memset+0x24/0x30 Code: cc cc cc cc cc cc 0f 1f 44 00 00 49 89 f9 48 89 d1 83 e2 07 48 c1 e9 03 40 0f b6 f6 48 b8 01 01 01 01 01 01 01 01 48 0f af c6 48 ab 89 d1 f3 aa 4c 89 c8 c3 90 49 89 f9 40 88 f0 48 89 d1 f3 RSP: 0018:ffffb43ac0373c80 EFLAGS: 00010a87 RAX: ffffffffffffffff RBX: ffff8a1518800000 RCX: 0000000000050000 RDX: 0000000000000000 RSI: 00000000000000ff RDI: 0000000006400000 RBP: 0000000000140000 R08: 0000000000100000 R09: 0000000006400000 R10: 0000000000000000 R11: 0000000000000002 R12: 0000000000000000 R13: 0000000000000028 R14: 0000000000000000 R15: ffff8a153ffd9280 FS: 0000000000000000(0000) GS:ffff8a153ab00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000006400000 CR3: 0000000136fca000 CR4: 00000000000006e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: sparse_add_section+0x1c9/0x26a __add_pages+0xbf/0x150 add_pages+0x12/0x60 add_memory_resource+0xc8/0x210 __add_memory+0x62/0xb0 acpi_memory_device_add+0x13f/0x300 acpi_bus_attach+0xf6/0x200 acpi_bus_scan+0x43/0x90 acpi_device_hotplug+0x275/0x3d0 acpi_hotplug_work_fn+0x1a/0x30 process_one_work+0x1a7/0x370 worker_thread+0x30/0x380 kthread+0x112/0x130 ret_from_fork+0x35/0x40 We should use memmap as it did. On x86 the impact is limited to x86_32 builds, or x86_64 configurations that override the default setting for SPARSEMEM_VMEMMAP. Other memory hotplug archs (arm64, ia64, and ppc) also default to SPARSEMEM_VMEMMAP=y. [dan.j.williams@intel.com: changelog update] {rppt@linux.ibm.com: changelog update] Link: http://lkml.kernel.org/r/20200219030454.4844-1-bhe@redhat.com Fixes: ba72b4c8cf60 ("mm/sparsemem: support sub-section hotplug") Signed-off-by: Wei Yang Signed-off-by: Baoquan He Acked-by: David Hildenbrand Reviewed-by: Baoquan He Reviewed-by: Dan Williams Acked-by: Michal Hocko Cc: Mike Rapoport Cc: Oscar Salvador Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/sparse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/sparse.c') diff --git a/mm/sparse.c b/mm/sparse.c index c184b69460b7..596b2a45b100 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -876,7 +876,7 @@ int __meminit sparse_add_section(int nid, unsigned long start_pfn, * Poison uninitialized struct pages in order to catch invalid flags * combinations. */ - page_init_poison(pfn_to_page(start_pfn), sizeof(struct page) * nr_pages); + page_init_poison(memmap, sizeof(struct page) * nr_pages); ms = __nr_to_section(section_nr); set_section_nid(section_nr, nid); -- cgit