From 364df0ebfbbb1330bfc6ca159f4d6020efc15a12 Mon Sep 17 00:00:00 2001 From: Dimitri Sivanich Date: Tue, 23 Jun 2009 12:37:04 -0700 Subject: mm: fix handling of pagesets for downed cpus After downing/upping a cpu, an attempt to set /proc/sys/vm/percpu_pagelist_fraction results in an oops in percpu_pagelist_fraction_sysctl_handler(). If a processor is downed then we need to set the pageset pointer back to the boot pageset. Updates of the high water marks should not access pagesets of unpopulated zones (those pointer go to the boot pagesets which would be no longer functional if their size would be increased beyond zero). Signed-off-by: Dimitri Sivanich Signed-off-by: Christoph Lameter Reviewed-by: KOSAKI Motohiro Cc: Nick Piggin Cc: Mel Gorman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 30d5093a099d..aecc9cdfdfce 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3026,7 +3026,7 @@ bad: if (dzone == zone) break; kfree(zone_pcp(dzone, cpu)); - zone_pcp(dzone, cpu) = NULL; + zone_pcp(dzone, cpu) = &boot_pageset[cpu]; } return -ENOMEM; } @@ -3041,7 +3041,7 @@ static inline void free_zone_pagesets(int cpu) /* Free per_cpu_pageset if it is slab allocated */ if (pset != &boot_pageset[cpu]) kfree(pset); - zone_pcp(zone, cpu) = NULL; + zone_pcp(zone, cpu) = &boot_pageset[cpu]; } } @@ -4659,7 +4659,7 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos); if (!write || (ret == -EINVAL)) return ret; - for_each_zone(zone) { + for_each_populated_zone(zone) { for_each_online_cpu(cpu) { unsigned long high; high = zone->present_pages / percpu_pagelist_fraction; -- cgit From 4923abf9f1a4c1864af438a57c1f3686548230e9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 24 Jun 2009 12:16:49 -0700 Subject: Don't warn about order-1 allocations with __GFP_NOFAIL Traditionally, we never failed small orders (even regardless of any __GFP_NOFAIL flags), and slab will allocate order-1 allocations even for small allocations that could fit in a single page (in order to avoid excessive fragmentation). Maybe we should remove this warning entirely, but before making that judgement, at least limit it to bigger allocations. Acked-by: Pekka Enberg Cc: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index aecc9cdfdfce..5d714f8fb303 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1153,10 +1153,10 @@ again: * properly detect and handle allocation failures. * * We most definitely don't want callers attempting to - * allocate greater than single-page units with + * allocate greater than order-1 page units with * __GFP_NOFAIL. */ - WARN_ON_ONCE(order > 0); + WARN_ON_ONCE(order > 1); } spin_lock_irqsave(&zone->lock, flags); page = __rmqueue(zone, order, migratetype); -- cgit From 66918dcdf91ad101194c749c18099e836ba3de2b Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Tue, 30 Jun 2009 11:41:37 -0700 Subject: x86: only clear node_states for 64bit Nathan reported that | commit 73d60b7f747176dbdff826c4127d22e1fd3f9f74 | Author: Yinghai Lu | Date: Tue Jun 16 15:33:00 2009 -0700 | | page-allocator: clear N_HIGH_MEMORY map before we set it again | | SRAT tables may contains nodes of very small size. The arch code may | decide to not activate such a node. However, currently the early boot | code sets N_HIGH_MEMORY for such nodes. These nodes therefore seem to be | active although these nodes have no present pages. | | For 64bit N_HIGH_MEMORY == N_NORMAL_MEMORY, so that works for 64 bit too unintentionally and incorrectly clears the cpuset.mems cgroup attribute on an i386 kvm guest, meaning that cpuset.mems can not be used. Fix this by only clearing node_states[N_NORMAL_MEMORY] for 64bit only. and need to do save/restore for that in find_zone_movable_pfn Reported-by: Nathan Lynch Tested-by: Nathan Lynch Signed-off-by: Yinghai Lu Cc: Christoph Lameter Cc: Ingo Molnar , Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/init_64.c | 2 ++ mm/page_alloc.c | 13 +++++++------ 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'mm/page_alloc.c') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index c4378f4fd4a5..b177652251a4 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -598,6 +598,8 @@ void __init paging_init(void) sparse_memory_present_with_active_regions(MAX_NUMNODES); sparse_init(); + /* clear the default setting with node 0 */ + nodes_clear(node_states[N_NORMAL_MEMORY]); free_area_init_nodes(max_zone_pfns); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5d714f8fb303..e0f2cdf9d8b1 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4032,6 +4032,8 @@ static void __init find_zone_movable_pfns_for_nodes(unsigned long *movable_pfn) int i, nid; unsigned long usable_startpfn; unsigned long kernelcore_node, kernelcore_remaining; + /* save the state before borrow the nodemask */ + nodemask_t saved_node_state = node_states[N_HIGH_MEMORY]; unsigned long totalpages = early_calculate_totalpages(); int usable_nodes = nodes_weight(node_states[N_HIGH_MEMORY]); @@ -4059,7 +4061,7 @@ static void __init find_zone_movable_pfns_for_nodes(unsigned long *movable_pfn) /* If kernelcore was not specified, there is no ZONE_MOVABLE */ if (!required_kernelcore) - return; + goto out; /* usable_startpfn is the lowest possible pfn ZONE_MOVABLE can be at */ find_usable_zone_for_movable(); @@ -4158,6 +4160,10 @@ restart: for (nid = 0; nid < MAX_NUMNODES; nid++) zone_movable_pfn[nid] = roundup(zone_movable_pfn[nid], MAX_ORDER_NR_PAGES); + +out: + /* restore the node_state */ + node_states[N_HIGH_MEMORY] = saved_node_state; } /* Any regular memory on that node ? */ @@ -4242,11 +4248,6 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) early_node_map[i].start_pfn, early_node_map[i].end_pfn); - /* - * find_zone_movable_pfns_for_nodes/early_calculate_totalpages init - * that node_mask, clear it at first - */ - nodes_clear(node_states[N_HIGH_MEMORY]); /* Initialise every node */ mminit_verify_pageflags_layout(); setup_nr_node_ids(); -- cgit