From 90c5029e471636f21221bf66b9a46ada2ab79a22 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 27 Jul 2005 11:43:50 -0700 Subject: [PATCH] Undo mempolicy shared policy rbtree microoptimization All mempolicy changes must be inside the spinlock and readding the rb_erase prevents a crash while doing: > echo "1" > /tmp/numatest > numactl --length=0x4000 --shm /tmp/numatest --localalloc > numactl --length=0x2000 --offset=0 --shm /tmp/numatest --membind=0 > numactl --length=0x2000 --offset=0x2000 --shm /tmp/numatest --membind=1 > ipcs > ipcrm -M "the_key_value_of_this_shm_area" Based on a patch by John Blackwood Cc: Cc: Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/mempolicy.c b/mm/mempolicy.c index cb41c31e7c87..1694845526be 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1138,11 +1138,11 @@ void mpol_free_shared_policy(struct shared_policy *p) while (next) { n = rb_entry(next, struct sp_node, nd); next = rb_next(&n->nd); + rb_erase(&n->nd, &p->root); mpol_free(n->policy); kmem_cache_free(sn_cache, n); } spin_unlock(&p->lock); - p->root = RB_ROOT; } /* assumes fs == KERNEL_DS */ -- cgit From 1aaf18ff9de1f37bf674236fc0779c3aaa65b998 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Wed, 27 Jul 2005 11:43:54 -0700 Subject: [PATCH] check_user_page_readable() deadlock fix Fix bug identifued by Richard Purdie . oprofile calls check_user_page_readable() from interrupt context, so we deadlock over various VFS locks. But check_user_page_readable() doesn't imply either a read or a write of the page's contents. Change __follow_page() so that check_user_page_readable() can tell __follow_page() that we're not accessing the page's contents, and use that info to avoid the troublesome lock-takings. Also, make follow_page() inline for the single callsite in memory.c to save a bit of stack space. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) (limited to 'mm') diff --git a/mm/memory.c b/mm/memory.c index beabdefa6254..6fe77acbc1cd 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -776,8 +776,8 @@ unsigned long zap_page_range(struct vm_area_struct *vma, unsigned long address, * Do a quick page-table lookup for a single page. * mm->page_table_lock must be held. */ -static struct page * -__follow_page(struct mm_struct *mm, unsigned long address, int read, int write) +static struct page *__follow_page(struct mm_struct *mm, unsigned long address, + int read, int write, int accessed) { pgd_t *pgd; pud_t *pud; @@ -818,9 +818,11 @@ __follow_page(struct mm_struct *mm, unsigned long address, int read, int write) pfn = pte_pfn(pte); if (pfn_valid(pfn)) { page = pfn_to_page(pfn); - if (write && !pte_dirty(pte) && !PageDirty(page)) - set_page_dirty(page); - mark_page_accessed(page); + if (accessed) { + if (write && !pte_dirty(pte) &&!PageDirty(page)) + set_page_dirty(page); + mark_page_accessed(page); + } return page; } } @@ -829,16 +831,19 @@ out: return NULL; } -struct page * +inline struct page * follow_page(struct mm_struct *mm, unsigned long address, int write) { - return __follow_page(mm, address, /*read*/0, write); + return __follow_page(mm, address, 0, write, 1); } -int -check_user_page_readable(struct mm_struct *mm, unsigned long address) +/* + * check_user_page_readable() can be called frm niterrupt context by oprofile, + * so we need to avoid taking any non-irq-safe locks + */ +int check_user_page_readable(struct mm_struct *mm, unsigned long address) { - return __follow_page(mm, address, /*read*/1, /*write*/0) != NULL; + return __follow_page(mm, address, 1, 0, 0) != NULL; } EXPORT_SYMBOL(check_user_page_readable); -- cgit From 165cd40235732644b1856a5ed5e158c9b93f6010 Mon Sep 17 00:00:00 2001 From: suzuki Date: Wed, 27 Jul 2005 11:43:59 -0700 Subject: [PATCH] madvise() does not always return -EBADF on non-file mapped area The madvise() system call returns -EBADF for areas which does not map to files, only for *behaviour* request MADV_WILLNEED. According to man pages, madvise returns : EBADF - the map exists, but the area maps something that isn't a file. Fixes bug 2995. Signed-off-by: Suzuki K P Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/madvise.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'mm') diff --git a/mm/madvise.c b/mm/madvise.c index 73180a22877e..c8c01a12fea4 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -83,9 +83,6 @@ static long madvise_willneed(struct vm_area_struct * vma, { struct file *file = vma->vm_file; - if (!file) - return -EBADF; - if (file->f_mapping->a_ops->get_xip_page) { /* no bad return value, but ignore advice */ return 0; @@ -140,11 +137,16 @@ static long madvise_dontneed(struct vm_area_struct * vma, return 0; } -static long madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, - unsigned long start, unsigned long end, int behavior) +static long +madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, + unsigned long start, unsigned long end, int behavior) { + struct file *filp = vma->vm_file; long error = -EBADF; + if (!filp) + goto out; + switch (behavior) { case MADV_NORMAL: case MADV_SEQUENTIAL: @@ -165,6 +167,7 @@ static long madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev break; } +out: return error; } -- cgit From 12b1c5f382194d3f656e78fb5c9c8f2bfbe8ed8a Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Wed, 27 Jul 2005 11:44:02 -0700 Subject: [PATCH] Remove bogus warning in page_alloc.c Originally __free_pages_bulk used the relative page number within a zone to define its buddies. This meant that to maintain the "maximally aligned" requirements (that an allocation of size N will be aligned at least to N physically) zones had to also be aligned to 1< Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/page_alloc.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'mm') diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 1d6ba6a4b594..42bccfb8464d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1861,7 +1861,6 @@ static void __init free_area_init_core(struct pglist_data *pgdat, unsigned long *zones_size, unsigned long *zholes_size) { unsigned long i, j; - const unsigned long zone_required_alignment = 1UL << (MAX_ORDER-1); int cpu, nid = pgdat->node_id; unsigned long zone_start_pfn = pgdat->node_start_pfn; @@ -1934,9 +1933,6 @@ static void __init free_area_init_core(struct pglist_data *pgdat, zone->zone_mem_map = pfn_to_page(zone_start_pfn); zone->zone_start_pfn = zone_start_pfn; - if ((zone_start_pfn) & (zone_required_alignment-1)) - printk(KERN_CRIT "BUG: wrong zone alignment, it will crash\n"); - memmap_init(size, nid, j, zone_start_pfn); zonetable_add(zone, nid, j, zone_start_pfn, size); -- cgit