diff options
author | Dmitry Torokhov <dtor@insightbb.com> | 2007-05-01 00:24:54 -0400 |
---|---|---|
committer | Dmitry Torokhov <dtor@insightbb.com> | 2007-05-01 00:24:54 -0400 |
commit | bc95f3669f5e6f63cf0b84fe4922c3c6dd4aa775 (patch) | |
tree | 427fcf2a7287c16d4b5aa6cbf494d59579a6a8b1 /mm | |
parent | 3d29cdff999c37b3876082278a8134a0642a02cd (diff) | |
parent | dc87c3985e9b442c60994308a96f887579addc39 (diff) |
Merge master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6
Conflicts:
drivers/usb/input/Makefile
drivers/usb/input/gtco.c
Diffstat (limited to 'mm')
-rw-r--r-- | mm/backing-dev.c | 16 | ||||
-rw-r--r-- | mm/bounce.c | 2 | ||||
-rw-r--r-- | mm/filemap.c | 46 | ||||
-rw-r--r-- | mm/filemap_xip.c | 48 | ||||
-rw-r--r-- | mm/madvise.c | 22 | ||||
-rw-r--r-- | mm/mempolicy.c | 22 | ||||
-rw-r--r-- | mm/migrate.c | 17 | ||||
-rw-r--r-- | mm/mmap.c | 2 | ||||
-rw-r--r-- | mm/nommu.c | 29 | ||||
-rw-r--r-- | mm/oom_kill.c | 6 | ||||
-rw-r--r-- | mm/page-writeback.c | 17 | ||||
-rw-r--r-- | mm/page_alloc.c | 37 | ||||
-rw-r--r-- | mm/rmap.c | 27 | ||||
-rw-r--r-- | mm/shmem.c | 141 | ||||
-rw-r--r-- | mm/slab.c | 8 | ||||
-rw-r--r-- | mm/tiny-shmem.c | 2 | ||||
-rw-r--r-- | mm/truncate.c | 4 | ||||
-rw-r--r-- | mm/vmscan.c | 2 |
18 files changed, 334 insertions, 114 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index f50a2811f9dc..e5de3781d3fe 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -55,6 +55,22 @@ long congestion_wait(int rw, long timeout) } EXPORT_SYMBOL(congestion_wait); +long congestion_wait_interruptible(int rw, long timeout) +{ + long ret; + DEFINE_WAIT(wait); + wait_queue_head_t *wqh = &congestion_wqh[rw]; + + prepare_to_wait(wqh, &wait, TASK_INTERRUPTIBLE); + if (signal_pending(current)) + ret = -ERESTARTSYS; + else + ret = io_schedule_timeout(timeout); + finish_wait(wqh, &wait); + return ret; +} +EXPORT_SYMBOL(congestion_wait_interruptible); + /** * congestion_end - wake up sleepers on a congested backing_dev_info * @rw: READ or WRITE diff --git a/mm/bounce.c b/mm/bounce.c index 643efbe82402..ad401fc57440 100644 --- a/mm/bounce.c +++ b/mm/bounce.c @@ -204,7 +204,7 @@ static void __blk_queue_bounce(request_queue_t *q, struct bio **bio_orig, /* * is destination page below bounce pfn? */ - if (page_to_pfn(page) < q->bounce_pfn) + if (page_to_pfn(page) <= q->bounce_pfn) continue; /* diff --git a/mm/filemap.c b/mm/filemap.c index d1060b8d3cd6..5dfc093ceb3d 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2379,7 +2379,8 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; ssize_t retval; - size_t write_len = 0; + size_t write_len; + pgoff_t end = 0; /* silence gcc */ /* * If it's a write, unmap all mmappings of the file up-front. This @@ -2388,23 +2389,46 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, */ if (rw == WRITE) { write_len = iov_length(iov, nr_segs); + end = (offset + write_len - 1) >> PAGE_CACHE_SHIFT; if (mapping_mapped(mapping)) unmap_mapping_range(mapping, offset, write_len, 0); } retval = filemap_write_and_wait(mapping); - if (retval == 0) { - retval = mapping->a_ops->direct_IO(rw, iocb, iov, - offset, nr_segs); - if (rw == WRITE && mapping->nrpages) { - pgoff_t end = (offset + write_len - 1) - >> PAGE_CACHE_SHIFT; - int err = invalidate_inode_pages2_range(mapping, + if (retval) + goto out; + + /* + * After a write we want buffered reads to be sure to go to disk to get + * the new data. We invalidate clean cached page from the region we're + * about to write. We do this *before* the write so that we can return + * -EIO without clobbering -EIOCBQUEUED from ->direct_IO(). + */ + if (rw == WRITE && mapping->nrpages) { + retval = invalidate_inode_pages2_range(mapping, offset >> PAGE_CACHE_SHIFT, end); - if (err) - retval = err; - } + if (retval) + goto out; } + + retval = mapping->a_ops->direct_IO(rw, iocb, iov, offset, nr_segs); + if (retval) + goto out; + + /* + * Finally, try again to invalidate clean pages which might have been + * faulted in by get_user_pages() if the source of the write was an + * mmap()ed region of the file we're writing. That's a pretty crazy + * thing to do, so we don't support it 100%. If this invalidation + * fails and we have -EIOCBQUEUED we ignore the failure. + */ + if (rw == WRITE && mapping->nrpages) { + int err = invalidate_inode_pages2_range(mapping, + offset >> PAGE_CACHE_SHIFT, end); + if (err && retval >= 0) + retval = err; + } +out: return retval; } diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c index 9dd9fbb75139..cbb335813ec0 100644 --- a/mm/filemap_xip.c +++ b/mm/filemap_xip.c @@ -17,6 +17,29 @@ #include "filemap.h" /* + * We do use our own empty page to avoid interference with other users + * of ZERO_PAGE(), such as /dev/zero + */ +static struct page *__xip_sparse_page; + +static struct page *xip_sparse_page(void) +{ + if (!__xip_sparse_page) { + unsigned long zeroes = get_zeroed_page(GFP_HIGHUSER); + if (zeroes) { + static DEFINE_SPINLOCK(xip_alloc_lock); + spin_lock(&xip_alloc_lock); + if (!__xip_sparse_page) + __xip_sparse_page = virt_to_page(zeroes); + else + free_page(zeroes); + spin_unlock(&xip_alloc_lock); + } + } + return __xip_sparse_page; +} + +/* * This is a file read routine for execute in place files, and uses * the mapping->a_ops->get_xip_page() function for the actual low-level * stuff. @@ -162,7 +185,7 @@ EXPORT_SYMBOL_GPL(xip_file_sendfile); * xip_write * * This function walks all vmas of the address_space and unmaps the - * ZERO_PAGE when found at pgoff. Should it go in rmap.c? + * __xip_sparse_page when found at pgoff. */ static void __xip_unmap (struct address_space * mapping, @@ -177,13 +200,16 @@ __xip_unmap (struct address_space * mapping, spinlock_t *ptl; struct page *page; + page = __xip_sparse_page; + if (!page) + return; + spin_lock(&mapping->i_mmap_lock); vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { mm = vma->vm_mm; address = vma->vm_start + ((pgoff - vma->vm_pgoff) << PAGE_SHIFT); BUG_ON(address < vma->vm_start || address >= vma->vm_end); - page = ZERO_PAGE(0); pte = page_check_address(page, mm, address, &ptl); if (pte) { /* Nuke the page table entry. */ @@ -222,16 +248,14 @@ xip_file_nopage(struct vm_area_struct * area, + area->vm_pgoff; size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (pgoff >= size) { - return NULL; - } + if (pgoff >= size) + return NOPAGE_SIGBUS; page = mapping->a_ops->get_xip_page(mapping, pgoff*(PAGE_SIZE/512), 0); - if (!IS_ERR(page)) { + if (!IS_ERR(page)) goto out; - } if (PTR_ERR(page) != -ENODATA) - return NULL; + return NOPAGE_SIGBUS; /* sparse block */ if ((area->vm_flags & (VM_WRITE | VM_MAYWRITE)) && @@ -241,12 +265,14 @@ xip_file_nopage(struct vm_area_struct * area, page = mapping->a_ops->get_xip_page (mapping, pgoff*(PAGE_SIZE/512), 1); if (IS_ERR(page)) - return NULL; + return NOPAGE_SIGBUS; /* unmap page at pgoff from all other vmas */ __xip_unmap(mapping, pgoff); } else { - /* not shared and writable, use ZERO_PAGE() */ - page = ZERO_PAGE(0); + /* not shared and writable, use xip_sparse_page() */ + page = xip_sparse_page(); + if (!page) + return NOPAGE_OOM; } out: diff --git a/mm/madvise.c b/mm/madvise.c index 4e196155a0c3..603c5257ed6e 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -155,10 +155,14 @@ static long madvise_dontneed(struct vm_area_struct * vma, * Other filesystems return -ENOSYS. */ static long madvise_remove(struct vm_area_struct *vma, + struct vm_area_struct **prev, unsigned long start, unsigned long end) { struct address_space *mapping; - loff_t offset, endoff; + loff_t offset, endoff; + int error; + + *prev = NULL; /* tell sys_madvise we drop mmap_sem */ if (vma->vm_flags & (VM_LOCKED|VM_NONLINEAR|VM_HUGETLB)) return -EINVAL; @@ -177,7 +181,12 @@ static long madvise_remove(struct vm_area_struct *vma, + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); endoff = (loff_t)(end - vma->vm_start - 1) + ((loff_t)vma->vm_pgoff << PAGE_SHIFT); - return vmtruncate_range(mapping->host, offset, endoff); + + /* vmtruncate_range needs to take i_mutex and i_alloc_sem */ + up_write(¤t->mm->mmap_sem); + error = vmtruncate_range(mapping->host, offset, endoff); + down_write(¤t->mm->mmap_sem); + return error; } static long @@ -199,7 +208,7 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev, error = madvise_behavior(vma, prev, start, end, behavior); break; case MADV_REMOVE: - error = madvise_remove(vma, start, end); + error = madvise_remove(vma, prev, start, end); break; case MADV_WILLNEED: @@ -312,12 +321,15 @@ asmlinkage long sys_madvise(unsigned long start, size_t len_in, int behavior) if (error) goto out; start = tmp; - if (start < prev->vm_end) + if (prev && start < prev->vm_end) start = prev->vm_end; error = unmapped_error; if (start >= end) goto out; - vma = prev->vm_next; + if (prev) + vma = prev->vm_next; + else /* madvise_remove dropped mmap_sem */ + vma = find_vma(current->mm, start); } out: up_write(¤t->mm->mmap_sem); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 259a706bd83e..d76e8eb342d0 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -144,7 +144,7 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes) max++; /* space for zlcache_ptr (see mmzone.h) */ zl = kmalloc(sizeof(struct zone *) * max, GFP_KERNEL); if (!zl) - return NULL; + return ERR_PTR(-ENOMEM); zl->zlcache_ptr = NULL; num = 0; /* First put in the highest zones from all nodes, then all the next @@ -162,6 +162,10 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes) break; k--; } + if (num == 0) { + kfree(zl); + return ERR_PTR(-EINVAL); + } zl->zones[num] = NULL; return zl; } @@ -193,9 +197,10 @@ static struct mempolicy *mpol_new(int mode, nodemask_t *nodes) break; case MPOL_BIND: policy->v.zonelist = bind_zonelist(nodes); - if (policy->v.zonelist == NULL) { + if (IS_ERR(policy->v.zonelist)) { + void *error_code = policy->v.zonelist; kmem_cache_free(policy_cache, policy); - return ERR_PTR(-ENOMEM); + return error_code; } break; } @@ -316,15 +321,6 @@ static inline int check_pgd_range(struct vm_area_struct *vma, return 0; } -/* Check if a vma is migratable */ -static inline int vma_migratable(struct vm_area_struct *vma) -{ - if (vma->vm_flags & ( - VM_LOCKED|VM_IO|VM_HUGETLB|VM_PFNMAP|VM_RESERVED)) - return 0; - return 1; -} - /* * Check if all pages in a range are on a set of nodes. * If pagelist != NULL then isolate pages from the LRU and @@ -1667,7 +1663,7 @@ void mpol_rebind_policy(struct mempolicy *pol, const nodemask_t *newmask) * then zonelist_policy() will "FALL THROUGH" to MPOL_DEFAULT. */ - if (zonelist) { + if (!IS_ERR(zonelist)) { /* Good - got mem - substitute new zonelist */ kfree(pol->v.zonelist); pol->v.zonelist = zonelist; diff --git a/mm/migrate.c b/mm/migrate.c index e9b161bde95b..a91ca00abebe 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -297,7 +297,7 @@ static int migrate_page_move_mapping(struct address_space *mapping, void **pslot; if (!mapping) { - /* Anonymous page */ + /* Anonymous page without mapping */ if (page_count(page) != 1) return -EAGAIN; return 0; @@ -333,6 +333,19 @@ static int migrate_page_move_mapping(struct address_space *mapping, */ __put_page(page); + /* + * If moved to a different zone then also account + * the page for that zone. Other VM counters will be + * taken care of when we establish references to the + * new page and drop references to the old page. + * + * Note that anonymous pages are accounted for + * via NR_FILE_PAGES and NR_ANON_PAGES if they + * are mapped to swap space. + */ + __dec_zone_page_state(page, NR_FILE_PAGES); + __inc_zone_page_state(newpage, NR_FILE_PAGES); + write_unlock_irq(&mapping->tree_lock); return 0; @@ -781,7 +794,7 @@ static int do_move_pages(struct mm_struct *mm, struct page_to_node *pm, err = -EFAULT; vma = find_vma(mm, pp->addr); - if (!vma) + if (!vma || !vma_migratable(vma)) goto set_status; page = follow_page(vma, pp->addr, FOLL_GET); diff --git a/mm/mmap.c b/mm/mmap.c index eb509ae76553..84f997da78d7 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -299,6 +299,8 @@ static int browse_rb(struct rb_root *root) printk("vm_end %lx < vm_start %lx\n", vma->vm_end, vma->vm_start); i++; pn = nd; + prev = vma->vm_start; + pend = vma->vm_end; } j = 0; for (nd = pn; nd; nd = rb_prev(nd)) { diff --git a/mm/nommu.c b/mm/nommu.c index 23fb033e596d..1f60194d9b9b 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -45,6 +45,7 @@ int heap_stack_gap = 0; EXPORT_SYMBOL(mem_map); EXPORT_SYMBOL(__vm_enough_memory); +EXPORT_SYMBOL(num_physpages); /* list of shareable VMAs */ struct rb_root nommu_vma_tree = RB_ROOT; @@ -826,6 +827,11 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long pglen = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; unsigned long vmpglen; + /* suppress VMA sharing for shared regions */ + if (vm_flags & VM_SHARED && + capabilities & BDI_CAP_MAP_DIRECT) + goto dont_share_VMAs; + for (rb = rb_first(&nommu_vma_tree); rb; rb = rb_next(rb)) { vma = rb_entry(rb, struct vm_area_struct, vm_rb); @@ -859,6 +865,7 @@ unsigned long do_mmap_pgoff(struct file *file, goto shared; } + dont_share_VMAs: vma = NULL; /* obtain the address at which to make a shared mapping @@ -1193,6 +1200,28 @@ void unmap_mapping_range(struct address_space *mapping, EXPORT_SYMBOL(unmap_mapping_range); /* + * ask for an unmapped area at which to create a mapping on a file + */ +unsigned long get_unmapped_area(struct file *file, unsigned long addr, + unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + unsigned long (*get_area)(struct file *, unsigned long, unsigned long, + unsigned long, unsigned long); + + get_area = current->mm->get_unmapped_area; + if (file && file->f_op && file->f_op->get_unmapped_area) + get_area = file->f_op->get_unmapped_area; + + if (!get_area) + return -ENOSYS; + + return get_area(file, addr, len, pgoff, flags); +} + +EXPORT_SYMBOL(get_unmapped_area); + +/* * Check that a process has enough memory to allocate a new virtual * mapping. 0 means there is enough memory for the allocation to * succeed and -ENOMEM implies there is not. diff --git a/mm/oom_kill.c b/mm/oom_kill.c index b278b8d60eee..3791edfffeeb 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -176,6 +176,8 @@ static inline int constrained_alloc(struct zonelist *zonelist, gfp_t gfp_mask) struct zone **z; nodemask_t nodes; int node; + + nodes_clear(nodes); /* node has memory ? */ for_each_online_node(node) if (NODE_DATA(node)->node_present_pages) @@ -320,7 +322,7 @@ static int oom_kill_task(struct task_struct *p) * Don't kill the process if any threads are set to OOM_DISABLE */ do_each_thread(g, q) { - if (q->mm == mm && p->oomkilladj == OOM_DISABLE) + if (q->mm == mm && q->oomkilladj == OOM_DISABLE) return 1; } while_each_thread(g, q); @@ -333,7 +335,7 @@ static int oom_kill_task(struct task_struct *p) */ do_each_thread(g, q) { if (q->mm == mm && q->tgid != p->tgid) - force_sig(SIGKILL, p); + force_sig(SIGKILL, q); } while_each_thread(g, q); return 0; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index f7e088f5a309..a794945fd194 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -67,12 +67,12 @@ static inline long sync_writeback_pages(void) /* * Start background writeback (via pdflush) at this percentage */ -int dirty_background_ratio = 10; +int dirty_background_ratio = 5; /* * The generator of dirty data starts writeback at this percentage */ -int vm_dirty_ratio = 40; +int vm_dirty_ratio = 10; /* * The interval between `kupdate'-style writebacks, in jiffies @@ -296,11 +296,21 @@ void balance_dirty_pages_ratelimited_nr(struct address_space *mapping, } EXPORT_SYMBOL(balance_dirty_pages_ratelimited_nr); -void throttle_vm_writeout(void) +void throttle_vm_writeout(gfp_t gfp_mask) { long background_thresh; long dirty_thresh; + if ((gfp_mask & (__GFP_FS|__GFP_IO)) != (__GFP_FS|__GFP_IO)) { + /* + * The caller might hold locks which can prevent IO completion + * or progress in the filesystem. So we cannot just sit here + * waiting for IO to complete. + */ + congestion_wait(WRITE, HZ/10); + return; + } + for ( ; ; ) { get_dirty_limits(&background_thresh, &dirty_thresh, NULL); @@ -317,7 +327,6 @@ void throttle_vm_writeout(void) } } - /* * writeback at least _min_pages, and keep writing until the amount of dirty * memory is less than the background threshold, or until we're all clean. diff --git a/mm/page_alloc.c b/mm/page_alloc.c index d461b23a27a1..353ce9039a86 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -600,7 +600,7 @@ static int prep_new_page(struct page *page, int order, gfp_t gfp_flags) page->flags &= ~(1 << PG_uptodate | 1 << PG_error | 1 << PG_referenced | 1 << PG_arch_1 | - 1 << PG_checked | 1 << PG_mappedtodisk); + 1 << PG_owner_priv_1 | 1 << PG_mappedtodisk); set_page_private(page, 0); set_page_refcounted(page); @@ -664,6 +664,26 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, return i; } +#if MAX_NUMNODES > 1 +int nr_node_ids __read_mostly; +EXPORT_SYMBOL(nr_node_ids); + +/* + * Figure out the number of possible node ids. + */ +static void __init setup_nr_node_ids(void) +{ + unsigned int node; + unsigned int highest = 0; + + for_each_node_mask(node, node_possible_map) + highest = node; + nr_node_ids = highest + 1; +} +#else +static void __init setup_nr_node_ids(void) {} +#endif + #ifdef CONFIG_NUMA /* * Called from the slab reaper to drain pagesets on a particular node that @@ -2944,6 +2964,7 @@ void __init free_area_init_nodes(unsigned long *max_zone_pfn) early_node_map[i].end_pfn); /* Initialise every node */ + setup_nr_node_ids(); for_each_online_node(nid) { pg_data_t *pgdat = NODE_DATA(nid); free_area_init_node(nid, pgdat, NULL, @@ -3370,18 +3391,4 @@ EXPORT_SYMBOL(pfn_to_page); EXPORT_SYMBOL(page_to_pfn); #endif /* CONFIG_OUT_OF_LINE_PFN_TO_PAGE */ -#if MAX_NUMNODES > 1 -/* - * Find the highest possible node id. - */ -int highest_possible_node_id(void) -{ - unsigned int node; - unsigned int highest = 0; - for_each_node_mask(node, node_possible_map) - highest = node; - return highest; -} -EXPORT_SYMBOL(highest_possible_node_id); -#endif diff --git a/mm/rmap.c b/mm/rmap.c index 669acb22b572..59da5b734c80 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -183,7 +183,7 @@ void __init anon_vma_init(void) */ static struct anon_vma *page_lock_anon_vma(struct page *page) { - struct anon_vma *anon_vma = NULL; + struct anon_vma *anon_vma; unsigned long anon_mapping; rcu_read_lock(); @@ -195,9 +195,16 @@ static struct anon_vma *page_lock_anon_vma(struct page *page) anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON); spin_lock(&anon_vma->lock); + return anon_vma; out: rcu_read_unlock(); - return anon_vma; + return NULL; +} + +static void page_unlock_anon_vma(struct anon_vma *anon_vma) +{ + spin_unlock(&anon_vma->lock); + rcu_read_unlock(); } /* @@ -333,7 +340,8 @@ static int page_referenced_anon(struct page *page) if (!mapcount) break; } - spin_unlock(&anon_vma->lock); + + page_unlock_anon_vma(anon_vma); return referenced; } @@ -490,9 +498,11 @@ int page_mkclean(struct page *page) struct address_space *mapping = page_mapping(page); if (mapping) ret = page_mkclean_file(mapping, page); + if (page_test_dirty(page)) { + page_clear_dirty(page); + ret = 1; + } } - if (page_test_and_clear_dirty(page)) - ret = 1; return ret; } @@ -597,8 +607,10 @@ void page_remove_rmap(struct page *page, struct vm_area_struct *vma) * Leaving it set also helps swapoff to reinstate ptes * faster for those pages still in swapcache. */ - if (page_test_and_clear_dirty(page)) + if (page_test_dirty(page)) { + page_clear_dirty(page); set_page_dirty(page); + } __dec_zone_page_state(page, PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED); } @@ -802,7 +814,8 @@ static int try_to_unmap_anon(struct page *page, int migration) if (ret == SWAP_FAIL || !page_mapped(page)) break; } - spin_unlock(&anon_vma->lock); + + page_unlock_anon_vma(anon_vma); return ret; } diff --git a/mm/shmem.c b/mm/shmem.c index 882053031aa0..b2a35ebf071a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -175,7 +175,7 @@ static inline void shmem_unacct_blocks(unsigned long flags, long pages) vm_unacct_memory(pages * VM_ACCT(PAGE_CACHE_SIZE)); } -static struct super_operations shmem_ops; +static const struct super_operations shmem_ops; static const struct address_space_operations shmem_aops; static const struct file_operations shmem_file_operations; static const struct inode_operations shmem_inode_operations; @@ -402,26 +402,38 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long /* * shmem_free_swp - free some swap entries in a directory * - * @dir: pointer to the directory - * @edir: pointer after last entry of the directory + * @dir: pointer to the directory + * @edir: pointer after last entry of the directory + * @punch_lock: pointer to spinlock when needed for the holepunch case */ -static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir) +static int shmem_free_swp(swp_entry_t *dir, swp_entry_t *edir, + spinlock_t *punch_lock) { + spinlock_t *punch_unlock = NULL; swp_entry_t *ptr; int freed = 0; for (ptr = dir; ptr < edir; ptr++) { if (ptr->val) { + if (unlikely(punch_lock)) { + punch_unlock = punch_lock; + punch_lock = NULL; + spin_lock(punch_unlock); + if (!ptr->val) + continue; + } free_swap_and_cache(*ptr); *ptr = (swp_entry_t){0}; freed++; } } + if (punch_unlock) + spin_unlock(punch_unlock); return freed; } -static int shmem_map_and_free_swp(struct page *subdir, - int offset, int limit, struct page ***dir) +static int shmem_map_and_free_swp(struct page *subdir, int offset, + int limit, struct page ***dir, spinlock_t *punch_lock) { swp_entry_t *ptr; int freed = 0; @@ -431,7 +443,8 @@ static int shmem_map_and_free_swp(struct page *subdir, int size = limit - offset; if (size > LATENCY_LIMIT) size = LATENCY_LIMIT; - freed += shmem_free_swp(ptr+offset, ptr+offset+size); + freed += shmem_free_swp(ptr+offset, ptr+offset+size, + punch_lock); if (need_resched()) { shmem_swp_unmap(ptr); if (*dir) { @@ -481,7 +494,10 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) long nr_swaps_freed = 0; int offset; int freed; - int punch_hole = 0; + int punch_hole; + spinlock_t *needs_lock; + spinlock_t *punch_lock; + unsigned long upper_limit; inode->i_ctime = inode->i_mtime = CURRENT_TIME; idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; @@ -492,11 +508,20 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) info->flags |= SHMEM_TRUNCATE; if (likely(end == (loff_t) -1)) { limit = info->next_index; + upper_limit = SHMEM_MAX_INDEX; info->next_index = idx; + needs_lock = NULL; + punch_hole = 0; } else { - limit = (end + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - if (limit > info->next_index) - limit = info->next_index; + if (end + 1 >= inode->i_size) { /* we may free a little more */ + limit = (inode->i_size + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; + upper_limit = SHMEM_MAX_INDEX; + } else { + limit = (end + 1) >> PAGE_CACHE_SHIFT; + upper_limit = limit; + } + needs_lock = &info->lock; punch_hole = 1; } @@ -513,17 +538,30 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) size = limit; if (size > SHMEM_NR_DIRECT) size = SHMEM_NR_DIRECT; - nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size); + nr_swaps_freed = shmem_free_swp(ptr+idx, ptr+size, needs_lock); } /* * If there are no indirect blocks or we are punching a hole * below indirect blocks, nothing to be done. */ - if (!topdir || (punch_hole && (limit <= SHMEM_NR_DIRECT))) + if (!topdir || limit <= SHMEM_NR_DIRECT) goto done2; - BUG_ON(limit <= SHMEM_NR_DIRECT); + /* + * The truncation case has already dropped info->lock, and we're safe + * because i_size and next_index have already been lowered, preventing + * access beyond. But in the punch_hole case, we still need to take + * the lock when updating the swap directory, because there might be + * racing accesses by shmem_getpage(SGP_CACHE), shmem_unuse_inode or + * shmem_writepage. However, whenever we find we can remove a whole + * directory page (not at the misaligned start or end of the range), + * we first NULLify its pointer in the level above, and then have no + * need to take the lock when updating its contents: needs_lock and + * punch_lock (either pointing to info->lock or NULL) manage this. + */ + + upper_limit -= SHMEM_NR_DIRECT; limit -= SHMEM_NR_DIRECT; idx = (idx > SHMEM_NR_DIRECT)? (idx - SHMEM_NR_DIRECT): 0; offset = idx % ENTRIES_PER_PAGE; @@ -543,8 +581,14 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) if (*dir) { diroff = ((idx - ENTRIES_PER_PAGEPAGE/2) % ENTRIES_PER_PAGEPAGE) / ENTRIES_PER_PAGE; - if (!diroff && !offset) { - *dir = NULL; + if (!diroff && !offset && upper_limit >= stage) { + if (needs_lock) { + spin_lock(needs_lock); + *dir = NULL; + spin_unlock(needs_lock); + needs_lock = NULL; + } else + *dir = NULL; nr_pages_to_free++; list_add(&middir->lru, &pages_to_free); } @@ -570,39 +614,55 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end) } stage = idx + ENTRIES_PER_PAGEPAGE; middir = *dir; - *dir = NULL; - nr_pages_to_free++; - list_add(&middir->lru, &pages_to_free); + if (punch_hole) + needs_lock = &info->lock; + if (upper_limit >= stage) { + if (needs_lock) { + spin_lock(needs_lock); + *dir = NULL; + spin_unlock(needs_lock); + needs_lock = NULL; + } else + *dir = NULL; + nr_pages_to_free++; + list_add(&middir->lru, &pages_to_free); + } shmem_dir_unmap(dir); cond_resched(); dir = shmem_dir_map(middir); diroff = 0; } + punch_lock = needs_lock; subdir = dir[diroff]; - if (subdir && page_private(subdir)) { + if (subdir && !offset && upper_limit-idx >= ENTRIES_PER_PAGE) { + if (needs_lock) { + spin_lock(needs_lock); + dir[diroff] = NULL; + spin_unlock(needs_lock); + punch_lock = NULL; + } else + dir[diroff] = NULL; + nr_pages_to_free++; + list_add(&subdir->lru, &pages_to_free); + } + if (subdir && page_private(subdir) /* has swap entries */) { size = limit - idx; if (size > ENTRIES_PER_PAGE) size = ENTRIES_PER_PAGE; freed = shmem_map_and_free_swp(subdir, - offset, size, &dir); + offset, size, &dir, punch_lock); if (!dir) dir = shmem_dir_map(middir); nr_swaps_freed += freed; - if (offset) + if (offset || punch_lock) { spin_lock(&info->lock); - set_page_private(subdir, page_private(subdir) - freed); - if (offset) + set_page_private(subdir, + page_private(subdir) - freed); spin_unlock(&info->lock); - if (!punch_hole) - BUG_ON(page_private(subdir) > offset); - } - if (offset) - offset = 0; - else if (subdir && !page_private(subdir)) { - dir[diroff] = NULL; - nr_pages_to_free++; - list_add(&subdir->lru, &pages_to_free); + } else + BUG_ON(page_private(subdir) != freed); } + offset = 0; } done1: shmem_dir_unmap(dir); @@ -614,8 +674,16 @@ done2: * generic_delete_inode did it, before we lowered next_index. * Also, though shmem_getpage checks i_size before adding to * cache, no recheck after: so fix the narrow window there too. + * + * Recalling truncate_inode_pages_range and unmap_mapping_range + * every time for punch_hole (which never got a chance to clear + * SHMEM_PAGEIN at the start of vmtruncate_range) is expensive, + * yet hardly ever necessary: try to optimize them out later. */ truncate_inode_pages_range(inode->i_mapping, start, end); + if (punch_hole) + unmap_mapping_range(inode->i_mapping, start, + end - start, 1); } spin_lock(&info->lock); @@ -1228,7 +1296,8 @@ failed: return error; } -struct page *shmem_nopage(struct vm_area_struct *vma, unsigned long address, int *type) +static struct page *shmem_nopage(struct vm_area_struct *vma, + unsigned long address, int *type) { struct inode *inode = vma->vm_file->f_path.dentry->d_inode; struct page *page = NULL; @@ -1335,7 +1404,7 @@ out_nomem: return retval; } -int shmem_mmap(struct file *file, struct vm_area_struct *vma) +static int shmem_mmap(struct file *file, struct vm_area_struct *vma) { file_accessed(file); vma->vm_ops = &shmem_vm_ops; @@ -2382,7 +2451,7 @@ static const struct inode_operations shmem_special_inode_operations = { #endif }; -static struct super_operations shmem_ops = { +static const struct super_operations shmem_ops = { .alloc_inode = shmem_alloc_inode, .destroy_inode = shmem_destroy_inode, #ifdef CONFIG_TMPFS diff --git a/mm/slab.c b/mm/slab.c index 70784b848b69..4cbac24ae2f1 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1042,7 +1042,7 @@ static void *alternate_node_alloc(struct kmem_cache *, gfp_t); static struct array_cache **alloc_alien_cache(int node, int limit) { struct array_cache **ac_ptr; - int memsize = sizeof(void *) * MAX_NUMNODES; + int memsize = sizeof(void *) * nr_node_ids; int i; if (limit > 1) @@ -1802,8 +1802,8 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) /* Print header */ if (lines == 0) { printk(KERN_ERR - "Slab corruption: start=%p, len=%d\n", - realobj, size); + "Slab corruption: %s start=%p, len=%d\n", + cachep->name, realobj, size); print_objinfo(cachep, objp, 0); } /* Hexdump the affected line */ @@ -4026,7 +4026,7 @@ void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, /** * cache_reap - Reclaim memory from caches. - * @unused: unused parameter + * @w: work descriptor * * Called from workqueue/eventd every few seconds. * Purpose: diff --git a/mm/tiny-shmem.c b/mm/tiny-shmem.c index c7f6e1914bc4..8803471593fd 100644 --- a/mm/tiny-shmem.c +++ b/mm/tiny-shmem.c @@ -126,6 +126,7 @@ int shmem_unuse(swp_entry_t entry, struct page *page) return 0; } +#if 0 int shmem_mmap(struct file *file, struct vm_area_struct *vma) { file_accessed(file); @@ -135,6 +136,7 @@ int shmem_mmap(struct file *file, struct vm_area_struct *vma) return 0; #endif } +#endif /* 0 */ #ifndef CONFIG_MMU unsigned long shmem_get_unmapped_area(struct file *file, diff --git a/mm/truncate.c b/mm/truncate.c index ebf3fcb4115b..0f4b6d18ab0e 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -375,10 +375,10 @@ int invalidate_inode_pages2_range(struct address_space *mapping, pagevec_init(&pvec, 0); next = start; - while (next <= end && !ret && !wrapped && + while (next <= end && !wrapped && pagevec_lookup(&pvec, mapping, next, min(end - next, (pgoff_t)PAGEVEC_SIZE - 1) + 1)) { - for (i = 0; !ret && i < pagevec_count(&pvec); i++) { + for (i = 0; i < pagevec_count(&pvec); i++) { struct page *page = pvec.pages[i]; pgoff_t page_index; diff --git a/mm/vmscan.c b/mm/vmscan.c index 0655d5fe73e8..db023e2ff385 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -952,7 +952,7 @@ static unsigned long shrink_zone(int priority, struct zone *zone, } } - throttle_vm_writeout(); + throttle_vm_writeout(sc->gfp_mask); atomic_dec(&zone->reclaim_in_progress); return nr_reclaimed; |