From a0d40c80256e31b23849f2ba781b74bf0218a1fa Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Fri, 26 Mar 2010 15:28:51 -0700 Subject: vmap: add flag to allow lazy unmap to be disabled at runtime Add a flag to force lazy_max_pages() to zero to prevent any outstanding mapped pages. We'll need this for Xen. Signed-off-by: Jeremy Fitzhardinge Signed-off-by: Konrad Rzeszutek Wilk Acked-by: Nick Piggin --- include/linux/vmalloc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/vmalloc.h') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 227c2a585e4f..b840fdaf438c 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -7,6 +7,8 @@ struct vm_area_struct; /* vma defining user mapping in mm_types.h */ +extern bool vmap_lazy_unmap; + /* bits in flags of vmalloc's vm_struct below */ #define VM_IOREMAP 0x00000001 /* ioremap() and friends */ #define VM_ALLOC 0x00000002 /* vmalloc() */ -- cgit From 4f8b02b4e5c6896e073bed736136d420bd44b627 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 18:22:47 +0200 Subject: vmalloc: pcpu_get/free_vm_areas() aren't needed on UP These functions are used only by percpu memory allocator on SMP. Don't build them on UP. Signed-off-by: Tejun Heo Cc: Nick Piggin Reviewed-by: Chrsitoph Lameter --- include/linux/vmalloc.h | 2 ++ mm/vmalloc.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/linux/vmalloc.h') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 01c2145118dc..63a4fe6d51bd 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -117,10 +117,12 @@ extern rwlock_t vmlist_lock; extern struct vm_struct *vmlist; extern __init void vm_area_register_early(struct vm_struct *vm, size_t align); +#ifdef CONFIG_SMP struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, const size_t *sizes, int nr_vms, size_t align, gfp_t gfp_mask); void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms); +#endif #endif /* _LINUX_VMALLOC_H */ diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 6b8889da69a6..c623e0ce3f00 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2056,6 +2056,7 @@ void free_vm_area(struct vm_struct *area) } EXPORT_SYMBOL_GPL(free_vm_area); +#ifdef CONFIG_SMP static struct vmap_area *node_to_va(struct rb_node *n) { return n ? rb_entry(n, struct vmap_area, rb_node) : NULL; @@ -2336,6 +2337,7 @@ void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) free_vm_area(vms[i]); kfree(vms); } +#endif /* CONFIG_SMP */ #ifdef CONFIG_PROC_FS static void *s_start(struct seq_file *m, loff_t *pos) -- cgit From e1ca7788dec6773b1a2bce51b7141948f2b8bccf Mon Sep 17 00:00:00 2001 From: Dave Young Date: Tue, 26 Oct 2010 14:22:06 -0700 Subject: mm: add vzalloc() and vzalloc_node() helpers Add vzalloc() and vzalloc_node() to encapsulate the vmalloc-then-memset-zero operation. Use __GFP_ZERO to zero fill the allocated memory. Signed-off-by: Dave Young Cc: Christoph Lameter Acked-by: Greg Ungerer Cc: David Howells Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 2 ++ mm/nommu.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++++- mm/vmalloc.c | 46 ++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 94 insertions(+), 3 deletions(-) (limited to 'include/linux/vmalloc.h') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 63a4fe6d51bd..a03dcf62ca9d 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -53,8 +53,10 @@ static inline void vmalloc_init(void) #endif extern void *vmalloc(unsigned long size); +extern void *vzalloc(unsigned long size); extern void *vmalloc_user(unsigned long size); extern void *vmalloc_node(unsigned long size, int node); +extern void *vzalloc_node(unsigned long size, int node); extern void *vmalloc_exec(unsigned long size); extern void *vmalloc_32(unsigned long size); extern void *vmalloc_32_user(unsigned long size); diff --git a/mm/nommu.c b/mm/nommu.c index 88ff091eb07a..30b5c20eec15 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -293,11 +293,58 @@ void *vmalloc(unsigned long size) } EXPORT_SYMBOL(vmalloc); +/* + * vzalloc - allocate virtually continguos memory with zero fill + * + * @size: allocation size + * + * Allocate enough pages to cover @size from the page level + * allocator and map them into continguos kernel virtual space. + * The memory allocated is set to zero. + * + * For tight control over page level allocator and protection flags + * use __vmalloc() instead. + */ +void *vzalloc(unsigned long size) +{ + return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, + PAGE_KERNEL); +} +EXPORT_SYMBOL(vzalloc); + +/** + * vmalloc_node - allocate memory on a specific node + * @size: allocation size + * @node: numa node + * + * Allocate enough pages to cover @size from the page level + * allocator and map them into contiguous kernel virtual space. + * + * For tight control over page level allocator and protection flags + * use __vmalloc() instead. + */ void *vmalloc_node(unsigned long size, int node) { return vmalloc(size); } -EXPORT_SYMBOL(vmalloc_node); + +/** + * vzalloc_node - allocate memory on a specific node with zero fill + * @size: allocation size + * @node: numa node + * + * Allocate enough pages to cover @size from the page level + * allocator and map them into contiguous kernel virtual space. + * The memory allocated is set to zero. + * + * For tight control over page level allocator and protection flags + * use __vmalloc() instead. + */ +void *vzalloc_node(unsigned long size, int node) +{ + return vzalloc(size); +} +EXPORT_SYMBOL(vzalloc_node); #ifndef PAGE_KERNEL_EXEC # define PAGE_KERNEL_EXEC PAGE_KERNEL diff --git a/mm/vmalloc.c b/mm/vmalloc.c index f492c774fa7b..a3d66b3dc5cb 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -1596,6 +1596,13 @@ void *__vmalloc(unsigned long size, gfp_t gfp_mask, pgprot_t prot) } EXPORT_SYMBOL(__vmalloc); +static inline void *__vmalloc_node_flags(unsigned long size, + int node, gfp_t flags) +{ + return __vmalloc_node(size, 1, flags, PAGE_KERNEL, + node, __builtin_return_address(0)); +} + /** * vmalloc - allocate virtually contiguous memory * @size: allocation size @@ -1607,11 +1614,27 @@ EXPORT_SYMBOL(__vmalloc); */ void *vmalloc(unsigned long size) { - return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL, - -1, __builtin_return_address(0)); + return __vmalloc_node_flags(size, -1, GFP_KERNEL | __GFP_HIGHMEM); } EXPORT_SYMBOL(vmalloc); +/** + * vzalloc - allocate virtually contiguous memory with zero fill + * @size: allocation size + * Allocate enough pages to cover @size from the page level + * allocator and map them into contiguous kernel virtual space. + * The memory allocated is set to zero. + * + * For tight control over page level allocator and protection flags + * use __vmalloc() instead. + */ +void *vzalloc(unsigned long size) +{ + return __vmalloc_node_flags(size, -1, + GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); +} +EXPORT_SYMBOL(vzalloc); + /** * vmalloc_user - allocate zeroed virtually contiguous memory for userspace * @size: allocation size @@ -1653,6 +1676,25 @@ void *vmalloc_node(unsigned long size, int node) } EXPORT_SYMBOL(vmalloc_node); +/** + * vzalloc_node - allocate memory on a specific node with zero fill + * @size: allocation size + * @node: numa node + * + * Allocate enough pages to cover @size from the page level + * allocator and map them into contiguous kernel virtual space. + * The memory allocated is set to zero. + * + * For tight control over page level allocator and protection flags + * use __vmalloc_node() instead. + */ +void *vzalloc_node(unsigned long size, int node) +{ + return __vmalloc_node_flags(size, node, + GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO); +} +EXPORT_SYMBOL(vzalloc_node); + #ifndef PAGE_KERNEL_EXEC # define PAGE_KERNEL_EXEC PAGE_KERNEL #endif -- cgit From 64141da587241301ce8638cc945f8b67853156ec Mon Sep 17 00:00:00 2001 From: Jeremy Fitzhardinge Date: Thu, 2 Dec 2010 14:31:18 -0800 Subject: vmalloc: eagerly clear ptes on vunmap On stock 2.6.37-rc4, running: # mount lilith:/export /mnt/lilith # find /mnt/lilith/ -type f -print0 | xargs -0 file crashes the machine fairly quickly under Xen. Often it results in oops messages, but the couple of times I tried just now, it just hung quietly and made Xen print some rude messages: (XEN) mm.c:2389:d80 Bad type (saw 7400000000000001 != exp 3000000000000000) for mfn 1d7058 (pfn 18fa7) (XEN) mm.c:964:d80 Attempt to create linear p.t. with write perms (XEN) mm.c:2389:d80 Bad type (saw 7400000000000010 != exp 1000000000000000) for mfn 1d2e04 (pfn 1d1fb) (XEN) mm.c:2965:d80 Error while pinning mfn 1d2e04 Which means the domain tried to map a pagetable page RW, which would allow it to map arbitrary memory, so Xen stopped it. This is because vm_unmap_ram() left some pages mapped in the vmalloc area after NFS had finished with them, and those pages got recycled as pagetable pages while still having these RW aliases. Removing those mappings immediately removes the Xen-visible aliases, and so it has no problem with those pages being reused as pagetable pages. Deferring the TLB flush doesn't upset Xen because it can flush the TLB itself as needed to maintain its invariants. When unmapping a region in the vmalloc space, clear the ptes immediately. There's no point in deferring this because there's no amortization benefit. The TLBs are left dirty, and they are flushed lazily to amortize the cost of the IPIs. This specific motivation for this patch is an oops-causing regression since 2.6.36 when using NFS under Xen, triggered by the NFS client's use of vm_map_ram() introduced in 56e4ebf877b60 ("NFS: readdir with vmapped pages") . XFS also uses vm_map_ram() and could cause similar problems. Signed-off-by: Jeremy Fitzhardinge Cc: Nick Piggin Cc: Bryan Schumaker Cc: Trond Myklebust Cc: Alex Elder Cc: Dave Chinner Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/xen/mmu.c | 2 -- include/linux/vmalloc.h | 2 -- mm/vmalloc.c | 28 +++++++++++++++++----------- 3 files changed, 17 insertions(+), 15 deletions(-) (limited to 'include/linux/vmalloc.h') diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index a1feff9e59b6..44924e551fde 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -2415,8 +2415,6 @@ void __init xen_init_mmu_ops(void) x86_init.paging.pagetable_setup_done = xen_pagetable_setup_done; pv_mmu_ops = xen_mmu_ops; - vmap_lazy_unmap = false; - memset(dummy_mapping, 0xff, PAGE_SIZE); } diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index a03dcf62ca9d..44b54f619ac6 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -7,8 +7,6 @@ struct vm_area_struct; /* vma defining user mapping in mm_types.h */ -extern bool vmap_lazy_unmap; - /* bits in flags of vmalloc's vm_struct below */ #define VM_IOREMAP 0x00000001 /* ioremap() and friends */ #define VM_ALLOC 0x00000002 /* vmalloc() */ diff --git a/mm/vmalloc.c b/mm/vmalloc.c index a3d66b3dc5cb..eb5cc7d00c5a 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -31,8 +31,6 @@ #include #include -bool vmap_lazy_unmap __read_mostly = true; - /*** Page table manipulation functions ***/ static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end) @@ -503,9 +501,6 @@ static unsigned long lazy_max_pages(void) { unsigned int log; - if (!vmap_lazy_unmap) - return 0; - log = fls(num_online_cpus()); return log * (32UL * 1024 * 1024 / PAGE_SIZE); @@ -566,7 +561,6 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, if (va->va_end > *end) *end = va->va_end; nr += (va->va_end - va->va_start) >> PAGE_SHIFT; - unmap_vmap_area(va); list_add_tail(&va->purge_list, &valist); va->flags |= VM_LAZY_FREEING; va->flags &= ~VM_LAZY_FREE; @@ -611,10 +605,11 @@ static void purge_vmap_area_lazy(void) } /* - * Free and unmap a vmap area, caller ensuring flush_cache_vunmap had been - * called for the correct range previously. + * Free a vmap area, caller ensuring that the area has been unmapped + * and flush_cache_vunmap had been called for the correct range + * previously. */ -static void free_unmap_vmap_area_noflush(struct vmap_area *va) +static void free_vmap_area_noflush(struct vmap_area *va) { va->flags |= VM_LAZY_FREE; atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr); @@ -622,6 +617,16 @@ static void free_unmap_vmap_area_noflush(struct vmap_area *va) try_purge_vmap_area_lazy(); } +/* + * Free and unmap a vmap area, caller ensuring flush_cache_vunmap had been + * called for the correct range previously. + */ +static void free_unmap_vmap_area_noflush(struct vmap_area *va) +{ + unmap_vmap_area(va); + free_vmap_area_noflush(va); +} + /* * Free and unmap a vmap area */ @@ -798,7 +803,7 @@ static void free_vmap_block(struct vmap_block *vb) spin_unlock(&vmap_block_tree_lock); BUG_ON(tmp != vb); - free_unmap_vmap_area_noflush(vb->va); + free_vmap_area_noflush(vb->va); call_rcu(&vb->rcu_head, rcu_free_vb); } @@ -936,6 +941,8 @@ static void vb_free(const void *addr, unsigned long size) rcu_read_unlock(); BUG_ON(!vb); + vunmap_page_range((unsigned long)addr, (unsigned long)addr + size); + spin_lock(&vb->lock); BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order)); @@ -988,7 +995,6 @@ void vm_unmap_aliases(void) s = vb->va->va_start + (i << PAGE_SHIFT); e = vb->va->va_start + (j << PAGE_SHIFT); - vunmap_page_range(s, e); flush = 1; if (s < start) -- cgit