From 859f7ef142a956676cb387b90f18e2e71e959c68 Mon Sep 17 00:00:00 2001 From: Zhihui Zhang Date: Thu, 18 Dec 2014 16:17:09 -0800 Subject: mm/mempolicy.c: remove unnecessary is_valid_nodemask() When nodes is true, nsc->mask2 has already been filtered by nsc->mask1, which has already factored in node_states[N_MEMORY]. Signed-off-by: Zhihui Zhang Cc: Mel Gorman Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/mempolicy.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index e58725aff7e9..f22c55947181 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -162,12 +162,6 @@ static const struct mempolicy_operations { enum mpol_rebind_step step); } mpol_ops[MPOL_MAX]; -/* Check that the nodemask contains at least one populated zone */ -static int is_valid_nodemask(const nodemask_t *nodemask) -{ - return nodes_intersects(*nodemask, node_states[N_MEMORY]); -} - static inline int mpol_store_user_nodemask(const struct mempolicy *pol) { return pol->flags & MPOL_MODE_FLAGS; @@ -202,7 +196,7 @@ static int mpol_new_preferred(struct mempolicy *pol, const nodemask_t *nodes) static int mpol_new_bind(struct mempolicy *pol, const nodemask_t *nodes) { - if (!is_valid_nodemask(nodes)) + if (nodes_empty(*nodes)) return -EINVAL; pol->v.nodes = *nodes; return 0; @@ -234,7 +228,7 @@ static int mpol_set_nodemask(struct mempolicy *pol, nodes = NULL; /* explicit local allocation */ else { if (pol->flags & MPOL_F_RELATIVE_NODES) - mpol_relative_nodemask(&nsc->mask2, nodes,&nsc->mask1); + mpol_relative_nodemask(&nsc->mask2, nodes, &nsc->mask1); else nodes_and(nsc->mask2, *nodes, nsc->mask1); -- cgit From 89ac9b4d3d1a049ae1054f99b1aed81092cd0a82 Mon Sep 17 00:00:00 2001 From: Sougata Santra Date: Thu, 18 Dec 2014 16:17:12 -0800 Subject: hfsplus: fix longname handling Longname is not correctly handled by hfsplus driver. If an attempt to create a longname(>255) file/directory is made, it succeeds by creating a file/directory with HFSPLUS_MAX_STRLEN and incorrect catalog key. Thus leaving the volume in an inconsistent state. This patch fixes this issue. Although lookup is always called first to create a negative entry, so just doing a check in lookup would probably fix this issue. I choose to propagate error to other iops as well. Please NOTE: I have factored out hfsplus_cat_build_key_with_cnid from hfsplus_cat_build_key, to avoid unncessary branching. Thanks a lot. TEST: ------ dir="TEST_DIR" cdir=`pwd` name255="_123456789_123456789_123456789_123456789_123456789_123456789\ _123456789_123456789_123456789_123456789_123456789_123456789_123456789\ _123456789_123456789_123456789_123456789_123456789_123456789_123456789\ _123456789_123456789_123456789_123456789_123456789_1234" name256="${name255}5" mkdir $dir cd $dir touch $name255 rm -f $name255 touch $name256 ls -la cd $cdir rm -rf $dir RESULT: ------- [sougata@ultrabook tmp]$ cdir=`pwd` [sougata@ultrabook tmp]$ name255="_123456789_123456789_123456789_123456789_123456789_123456789\ > _123456789_123456789_123456789_123456789_123456789_123456789_123456789\ > _123456789_123456789_123456789_123456789_123456789_123456789_123456789\ > _123456789_123456789_123456789_123456789_123456789_1234" [sougata@ultrabook tmp]$ name256="${name255}5" [sougata@ultrabook tmp]$ [sougata@ultrabook tmp]$ mkdir $dir [sougata@ultrabook tmp]$ cd $dir [sougata@ultrabook TEST_DIR]$ touch $name255 [sougata@ultrabook TEST_DIR]$ rm -f $name255 [sougata@ultrabook TEST_DIR]$ touch $name256 [sougata@ultrabook TEST_DIR]$ ls -la ls: cannot access _123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_1234: No such file or directory total 0 drwxrwxr-x 1 sougata sougata 3 Feb 20 19:56 . drwxrwxrwx 1 root root 6 Feb 20 19:56 .. -????????? ? ? ? ? ? _123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_123456789_1234 [sougata@ultrabook TEST_DIR]$ cd $cdir [sougata@ultrabook tmp]$ rm -rf $dir rm: cannot remove `TEST_DIR': Directory not empty -ENAMETOOLONG returned from hfsplus_asc2uni was not propaged to iops. This allowed hfsplus to create files/directories with HFSPLUS_MAX_STRLEN and incorrect keys, leaving the FS in an inconsistent state. This patch fixes this issue. Signed-off-by: Sougata Santra Reviewed-by: Christoph Hellwig Cc: Vyacheslav Dubeyko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/hfsplus/catalog.c | 89 ++++++++++++++++++++++++++++++++++++------------- fs/hfsplus/dir.c | 11 ++++-- fs/hfsplus/hfsplus_fs.h | 4 ++- fs/hfsplus/super.c | 4 ++- 4 files changed, 79 insertions(+), 29 deletions(-) diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index 32602c667b4a..7892e6fddb66 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c @@ -38,21 +38,30 @@ int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *k1, return hfsplus_strcmp(&k1->cat.name, &k2->cat.name); } -void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key, - u32 parent, struct qstr *str) +/* Generates key for catalog file/folders record. */ +int hfsplus_cat_build_key(struct super_block *sb, + hfsplus_btree_key *key, u32 parent, struct qstr *str) { - int len; + int len, err; key->cat.parent = cpu_to_be32(parent); - if (str) { - hfsplus_asc2uni(sb, &key->cat.name, HFSPLUS_MAX_STRLEN, - str->name, str->len); - len = be16_to_cpu(key->cat.name.length); - } else { - key->cat.name.length = 0; - len = 0; - } + err = hfsplus_asc2uni(sb, &key->cat.name, HFSPLUS_MAX_STRLEN, + str->name, str->len); + if (unlikely(err < 0)) + return err; + + len = be16_to_cpu(key->cat.name.length); key->key_len = cpu_to_be16(6 + 2 * len); + return 0; +} + +/* Generates key for catalog thread record. */ +void hfsplus_cat_build_key_with_cnid(struct super_block *sb, + hfsplus_btree_key *key, u32 parent) +{ + key->cat.parent = cpu_to_be32(parent); + key->cat.name.length = 0; + key->key_len = cpu_to_be16(6); } static void hfsplus_cat_build_key_uni(hfsplus_btree_key *key, u32 parent, @@ -167,11 +176,16 @@ static int hfsplus_fill_cat_thread(struct super_block *sb, hfsplus_cat_entry *entry, int type, u32 parentid, struct qstr *str) { + int err; + entry->type = cpu_to_be16(type); entry->thread.reserved = 0; entry->thread.parentID = cpu_to_be32(parentid); - hfsplus_asc2uni(sb, &entry->thread.nodeName, HFSPLUS_MAX_STRLEN, + err = hfsplus_asc2uni(sb, &entry->thread.nodeName, HFSPLUS_MAX_STRLEN, str->name, str->len); + if (unlikely(err < 0)) + return err; + return 10 + be16_to_cpu(entry->thread.nodeName.length) * 2; } @@ -183,7 +197,7 @@ int hfsplus_find_cat(struct super_block *sb, u32 cnid, int err; u16 type; - hfsplus_cat_build_key(sb, fd->search_key, cnid, NULL); + hfsplus_cat_build_key_with_cnid(sb, fd->search_key, cnid); err = hfs_brec_read(fd, &tmp, sizeof(hfsplus_cat_entry)); if (err) return err; @@ -250,11 +264,16 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, if (err) return err; - hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); + hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid); entry_size = hfsplus_fill_cat_thread(sb, &entry, S_ISDIR(inode->i_mode) ? HFSPLUS_FOLDER_THREAD : HFSPLUS_FILE_THREAD, dir->i_ino, str); + if (unlikely(entry_size < 0)) { + err = entry_size; + goto err2; + } + err = hfs_brec_find(&fd, hfs_find_rec_by_key); if (err != -ENOENT) { if (!err) @@ -265,7 +284,10 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, if (err) goto err2; - hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str); + err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str); + if (unlikely(err)) + goto err1; + entry_size = hfsplus_cat_build_record(&entry, cnid, inode); err = hfs_brec_find(&fd, hfs_find_rec_by_key); if (err != -ENOENT) { @@ -288,7 +310,7 @@ int hfsplus_create_cat(u32 cnid, struct inode *dir, return 0; err1: - hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); + hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid); if (!hfs_brec_find(&fd, hfs_find_rec_by_key)) hfs_brec_remove(&fd); err2: @@ -313,7 +335,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) if (!str) { int len; - hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); + hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid); err = hfs_brec_find(&fd, hfs_find_rec_by_key); if (err) goto out; @@ -329,7 +351,9 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) off + 2, len); fd.search_key->key_len = cpu_to_be16(6 + len); } else - hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str); + err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, str); + if (unlikely(err)) + goto out; err = hfs_brec_find(&fd, hfs_find_rec_by_key); if (err) @@ -360,7 +384,7 @@ int hfsplus_delete_cat(u32 cnid, struct inode *dir, struct qstr *str) if (err) goto out; - hfsplus_cat_build_key(sb, fd.search_key, cnid, NULL); + hfsplus_cat_build_key_with_cnid(sb, fd.search_key, cnid); err = hfs_brec_find(&fd, hfs_find_rec_by_key); if (err) goto out; @@ -405,7 +429,11 @@ int hfsplus_rename_cat(u32 cnid, dst_fd = src_fd; /* find the old dir entry and read the data */ - hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name); + err = hfsplus_cat_build_key(sb, src_fd.search_key, + src_dir->i_ino, src_name); + if (unlikely(err)) + goto out; + err = hfs_brec_find(&src_fd, hfs_find_rec_by_key); if (err) goto out; @@ -419,7 +447,11 @@ int hfsplus_rename_cat(u32 cnid, type = be16_to_cpu(entry.type); /* create new dir entry with the data from the old entry */ - hfsplus_cat_build_key(sb, dst_fd.search_key, dst_dir->i_ino, dst_name); + err = hfsplus_cat_build_key(sb, dst_fd.search_key, + dst_dir->i_ino, dst_name); + if (unlikely(err)) + goto out; + err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key); if (err != -ENOENT) { if (!err) @@ -436,7 +468,11 @@ int hfsplus_rename_cat(u32 cnid, dst_dir->i_mtime = dst_dir->i_ctime = CURRENT_TIME_SEC; /* finally remove the old entry */ - hfsplus_cat_build_key(sb, src_fd.search_key, src_dir->i_ino, src_name); + err = hfsplus_cat_build_key(sb, src_fd.search_key, + src_dir->i_ino, src_name); + if (unlikely(err)) + goto out; + err = hfs_brec_find(&src_fd, hfs_find_rec_by_key); if (err) goto out; @@ -449,7 +485,7 @@ int hfsplus_rename_cat(u32 cnid, src_dir->i_mtime = src_dir->i_ctime = CURRENT_TIME_SEC; /* remove old thread entry */ - hfsplus_cat_build_key(sb, src_fd.search_key, cnid, NULL); + hfsplus_cat_build_key_with_cnid(sb, src_fd.search_key, cnid); err = hfs_brec_find(&src_fd, hfs_find_rec_by_key); if (err) goto out; @@ -459,9 +495,14 @@ int hfsplus_rename_cat(u32 cnid, goto out; /* create new thread entry */ - hfsplus_cat_build_key(sb, dst_fd.search_key, cnid, NULL); + hfsplus_cat_build_key_with_cnid(sb, dst_fd.search_key, cnid); entry_size = hfsplus_fill_cat_thread(sb, &entry, type, dst_dir->i_ino, dst_name); + if (unlikely(entry_size < 0)) { + err = entry_size; + goto out; + } + err = hfs_brec_find(&dst_fd, hfs_find_rec_by_key); if (err != -ENOENT) { if (!err) diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 610a3260bef1..435bea231cc6 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -44,7 +44,10 @@ static struct dentry *hfsplus_lookup(struct inode *dir, struct dentry *dentry, err = hfs_find_init(HFSPLUS_SB(sb)->cat_tree, &fd); if (err) return ERR_PTR(err); - hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, &dentry->d_name); + err = hfsplus_cat_build_key(sb, fd.search_key, dir->i_ino, + &dentry->d_name); + if (unlikely(err < 0)) + goto fail; again: err = hfs_brec_read(&fd, &entry, sizeof(entry)); if (err) { @@ -97,9 +100,11 @@ again: be32_to_cpu(entry.file.permissions.dev); str.len = sprintf(name, "iNode%d", linkid); str.name = name; - hfsplus_cat_build_key(sb, fd.search_key, + err = hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_SB(sb)->hidden_dir->i_ino, &str); + if (unlikely(err < 0)) + goto fail; goto again; } } else if (!dentry->d_fsdata) @@ -145,7 +150,7 @@ static int hfsplus_readdir(struct file *file, struct dir_context *ctx) err = -ENOMEM; goto out; } - hfsplus_cat_build_key(sb, fd.search_key, inode->i_ino, NULL); + hfsplus_cat_build_key_with_cnid(sb, fd.search_key, inode->i_ino); err = hfs_brec_find(&fd, hfs_find_rec_by_key); if (err) goto out; diff --git a/fs/hfsplus/hfsplus_fs.h b/fs/hfsplus/hfsplus_fs.h index eb5e059f481a..b0441d65fa54 100644 --- a/fs/hfsplus/hfsplus_fs.h +++ b/fs/hfsplus/hfsplus_fs.h @@ -443,8 +443,10 @@ int hfsplus_cat_case_cmp_key(const hfsplus_btree_key *k1, const hfsplus_btree_key *k2); int hfsplus_cat_bin_cmp_key(const hfsplus_btree_key *k1, const hfsplus_btree_key *k2); -void hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key, +int hfsplus_cat_build_key(struct super_block *sb, hfsplus_btree_key *key, u32 parent, struct qstr *str); +void hfsplus_cat_build_key_with_cnid(struct super_block *sb, + hfsplus_btree_key *key, u32 parent); void hfsplus_cat_set_perms(struct inode *inode, struct hfsplus_perm *perms); int hfsplus_find_cat(struct super_block *sb, u32 cnid, struct hfs_find_data *fd); diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 4cf2024b87da..593af2fdcc2d 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -515,7 +515,9 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) err = hfs_find_init(sbi->cat_tree, &fd); if (err) goto out_put_root; - hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str); + err = hfsplus_cat_build_key(sb, fd.search_key, HFSPLUS_ROOT_CNID, &str); + if (unlikely(err < 0)) + goto out_put_root; if (!hfs_brec_read(&fd, &entry, sizeof(entry))) { hfs_find_exit(&fd); if (entry.type != cpu_to_be16(HFSPLUS_FOLDER)) -- cgit From e48322abb061d75096fe52d71886b237e7ae7bfb Mon Sep 17 00:00:00 2001 From: Pintu Kumar Date: Thu, 18 Dec 2014 16:17:15 -0800 Subject: mm: cma: split cma-reserved in dmesg log When the system boots up, in the dmesg logs we can see the memory statistics along with total reserved as below. Memory: 458840k/458840k available, 65448k reserved, 0K highmem When CMA is enabled, still the total reserved memory remains the same. However, the CMA memory is not considered as reserved. But, when we see /proc/meminfo, the CMA memory is part of free memory. This creates confusion. This patch corrects the problem by properly subtracting the CMA reserved memory from the total reserved memory in dmesg logs. Below is the dmesg snapshot from an arm based device with 512MB RAM and 12MB single CMA region. Before this change: Memory: 458840k/458840k available, 65448k reserved, 0K highmem After this change: Memory: 458840k/458840k available, 53160k reserved, 12288k cma-reserved, 0K highmem Signed-off-by: Pintu Kumar Signed-off-by: Vishnu Pratap Singh Acked-by: Michal Nazarewicz Cc: Rafael Aquini Cc: Jerome Marchand Cc: Marek Szyprowski Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cma.h | 1 + mm/cma.c | 1 + mm/page_alloc.c | 6 ++++-- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/linux/cma.h b/include/linux/cma.h index a93438beb33c..9384ba66e975 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -15,6 +15,7 @@ struct cma; +extern unsigned long totalcma_pages; extern phys_addr_t cma_get_base(struct cma *cma); extern unsigned long cma_get_size(struct cma *cma); diff --git a/mm/cma.c b/mm/cma.c index f8917629cbdd..a85ae28709a3 100644 --- a/mm/cma.c +++ b/mm/cma.c @@ -337,6 +337,7 @@ int __init cma_declare_contiguous(phys_addr_t base, if (ret) goto err; + totalcma_pages += (size / PAGE_SIZE); pr_info("Reserved %ld MiB at %pa\n", (unsigned long)size / SZ_1M, &base); return 0; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index fa974d87f60d..7633c503a116 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -111,6 +111,7 @@ static DEFINE_SPINLOCK(managed_page_count_lock); unsigned long totalram_pages __read_mostly; unsigned long totalreserve_pages __read_mostly; +unsigned long totalcma_pages __read_mostly; /* * When calculating the number of globally allowed dirty pages, there * is a certain number of per-zone reserves that should not be @@ -5586,7 +5587,7 @@ void __init mem_init_print_info(const char *str) pr_info("Memory: %luK/%luK available " "(%luK kernel code, %luK rwdata, %luK rodata, " - "%luK init, %luK bss, %luK reserved" + "%luK init, %luK bss, %luK reserved, %luK cma-reserved" #ifdef CONFIG_HIGHMEM ", %luK highmem" #endif @@ -5594,7 +5595,8 @@ void __init mem_init_print_info(const char *str) nr_free_pages() << (PAGE_SHIFT-10), physpages << (PAGE_SHIFT-10), codesize >> 10, datasize >> 10, rosize >> 10, (init_data_size + init_code_size) >> 10, bss_size >> 10, - (physpages - totalram_pages) << (PAGE_SHIFT-10), + (physpages - totalram_pages - totalcma_pages) << (PAGE_SHIFT-10), + totalcma_pages << (PAGE_SHIFT-10), #ifdef CONFIG_HIGHMEM totalhigh_pages << (PAGE_SHIFT-10), #endif -- cgit From 47f8f9297d2247d65ee46d8403a73b30f8d0249b Mon Sep 17 00:00:00 2001 From: Pintu Kumar Date: Thu, 18 Dec 2014 16:17:18 -0800 Subject: fs/proc/meminfo.c: include cma info in proc/meminfo This patch include CMA info (CMATotal, CMAFree) in /proc/meminfo. Currently, in a CMA enabled system, if somebody wants to know the total CMA size declared, there is no way to tell, other than the dmesg or /var/log/messages logs. With this patch we are showing the CMA info as part of meminfo, so that it can be determined at any point of time. This will be populated only when CMA is enabled. Below is the sample output from a ARM based device with RAM:512MB and CMA:16MB. MemTotal: 471172 kB MemFree: 111712 kB MemAvailable: 271172 kB . . . CmaTotal: 16384 kB CmaFree: 6144 kB This patch also fix below checkpatch errors that were found during these changes. ERROR: space required after that ',' (ctx:ExV) 199: FILE: fs/proc/meminfo.c:199: + ,atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10) ^ ERROR: space required after that ',' (ctx:ExV) 202: FILE: fs/proc/meminfo.c:202: + ,K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) * ^ ERROR: space required after that ',' (ctx:ExV) 206: FILE: fs/proc/meminfo.c:206: + ,K(totalcma_pages) ^ total: 3 errors, 0 warnings, 2 checks, 236 lines checked Signed-off-by: Pintu Kumar Signed-off-by: Vishnu Pratap Singh Acked-by: Michal Nazarewicz Cc: Rafael Aquini Cc: Jerome Marchand Cc: Marek Szyprowski Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/meminfo.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index aa1eee06420f..d3ebf2e61853 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -12,6 +12,9 @@ #include #include #include +#ifdef CONFIG_CMA +#include +#endif #include #include #include "internal.h" @@ -137,6 +140,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v) #endif #ifdef CONFIG_TRANSPARENT_HUGEPAGE "AnonHugePages: %8lu kB\n" +#endif +#ifdef CONFIG_CMA + "CmaTotal: %8lu kB\n" + "CmaFree: %8lu kB\n" #endif , K(i.totalram), @@ -187,11 +194,15 @@ static int meminfo_proc_show(struct seq_file *m, void *v) vmi.used >> 10, vmi.largest_chunk >> 10 #ifdef CONFIG_MEMORY_FAILURE - ,atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10) + , atomic_long_read(&num_poisoned_pages) << (PAGE_SHIFT - 10) #endif #ifdef CONFIG_TRANSPARENT_HUGEPAGE - ,K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) * + , K(global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) * HPAGE_PMD_NR) +#endif +#ifdef CONFIG_CMA + , K(totalcma_pages) + , K(global_page_state(NR_FREE_CMA_PAGES)) #endif ); -- cgit From 49abd8c28046adf77c5ce1949549aa64d7221881 Mon Sep 17 00:00:00 2001 From: Vishnu Pratap Singh Date: Thu, 18 Dec 2014 16:17:21 -0800 Subject: lib/show_mem.c: add cma reserved information Add cma reserved information which is currently shown as a part of total reserved only. This patch is continuation of our previous cma patches related to this. https://lkml.org/lkml/2014/10/20/64 https://lkml.org/lkml/2014/10/22/383 [akpm@linux-foundation.org: remove hopefully-unneeded ifdefs] Signed-off-by: Vishnu Pratap Singh Cc: David Rientjes Cc: KOSAKI Motohiro Cc: Mel Gorman Cc: Xishi Qiu Cc: Pintu Kumar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/show_mem.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/show_mem.c b/lib/show_mem.c index 5e256271b47b..7de89f4a36cf 100644 --- a/lib/show_mem.c +++ b/lib/show_mem.c @@ -8,6 +8,7 @@ #include #include #include +#include void show_mem(unsigned int filter) { @@ -38,7 +39,12 @@ void show_mem(unsigned int filter) printk("%lu pages RAM\n", total); printk("%lu pages HighMem/MovableOnly\n", highmem); +#ifdef CONFIG_CMA + printk("%lu pages reserved\n", (reserved - totalcma_pages)); + printk("%lu pages cma reserved\n", totalcma_pages); +#else printk("%lu pages reserved\n", reserved); +#endif #ifdef CONFIG_QUICKLIST printk("%lu pages in pagetable cache\n", quicklist_total_size()); -- cgit From 7d19c8ffb50c8337e4f933031e685eea2ce440dc Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 18 Dec 2014 16:17:23 -0800 Subject: .mailmap: update akpm@osdl.org Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .mailmap | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.mailmap b/.mailmap index 1ad68731fb47..fd6e27adb7d6 100644 --- a/.mailmap +++ b/.mailmap @@ -17,7 +17,7 @@ Aleksey Gorelov Al Viro Al Viro Andreas Herrmann -Andrew Morton +Andrew Morton Andrew Vasquez Andy Adamson Archit Taneja -- cgit From 48332f28461c72af4b98611e0b4f02d769b8cc70 Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Thu, 18 Dec 2014 16:17:26 -0800 Subject: .mailmap: Santosh Shilimkar has moved Add my new email address along with kernel.org email id Signed-off-by: Santosh Shilimkar Signed-off-by: Santosh Shilimkar Cc: Joe Perches Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- .mailmap | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.mailmap b/.mailmap index fd6e27adb7d6..ada8ad696b2e 100644 --- a/.mailmap +++ b/.mailmap @@ -102,6 +102,8 @@ Rudolf Marek Rui Saraiva Sachin P Sant Sam Ravnborg +Santosh Shilimkar +Santosh Shilimkar Sascha Hauer S.Çağlar Onur Shiraz Hashim -- cgit From d82fa87d2b60e8affea3b244ad23c5d9a59c584a Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 18 Dec 2014 16:17:29 -0800 Subject: mm/memory.c:do_shared_fault(): add comment Belatedly document the changes in commit f0c6d4d295e4 ("mm: introduce do_shared_fault() and drop do_fault()"). Cc: Andi Kleen Cc: Bob Liu Cc: Dave Hansen Cc: "Kirill A. Shutemov" Cc: Matthew Wilcox Cc: Mel Gorman Cc: Naoya Horiguchi Cc: Rik van Riel Cc: Sasha Levin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/memory.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/mm/memory.c b/mm/memory.c index 6efe36a998ba..d8aebc52265f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2996,6 +2996,12 @@ static int do_shared_fault(struct mm_struct *mm, struct vm_area_struct *vma, if (set_page_dirty(fault_page)) dirtied = 1; + /* + * Take a local copy of the address_space - page.mapping may be zeroed + * by truncate after unlock_page(). The address_space itself remains + * pinned by vma->vm_file's reference. We rely on unlock_page()'s + * release semantics to prevent the compiler from undoing this copying. + */ mapping = fault_page->mapping; unlock_page(fault_page); if ((dirtied || vma->vm_ops->page_mkwrite) && mapping) { -- cgit From f62f12b3a426c8f65b10011b1ec40ba4277cbf5f Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Thu, 18 Dec 2014 16:17:32 -0800 Subject: ocfs2: reflink: fix slow unlink for refcounted file When running ocfs2 test suite multiple nodes reflink stress test, for a 4 nodes cluster, every unlink() for refcounted file needs about 700s. The slow unlink is caused by the contention of refcount tree lock since all nodes are unlink files using the same refcount tree. When the unlinking file have many extents(over 1600 in our test), most of the extents has refcounted flag set. In ocfs2_commit_truncate(), it will execute the following call trace for every extents. This means it needs get and released refcount tree lock about 1600 times. And when several nodes are do this at the same time, the performance will be very low. ocfs2_remove_btree_range() -- ocfs2_lock_refcount_tree() ---- ocfs2_refcount_lock() ------ __ocfs2_cluster_lock() ocfs2_refcount_lock() is costly, move it to ocfs2_commit_truncate() to do lock/unlock once can improve a lot performance. Signed-off-by: Junxiao Bi Cc: Wengang Reviewed-by: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/alloc.c | 28 +++++++++++++++++++++------- fs/ocfs2/alloc.h | 2 +- fs/ocfs2/dir.c | 2 +- fs/ocfs2/file.c | 2 +- 4 files changed, 24 insertions(+), 10 deletions(-) diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index a93bf9892256..fcae9ef1a328 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -5662,7 +5662,7 @@ int ocfs2_remove_btree_range(struct inode *inode, struct ocfs2_extent_tree *et, u32 cpos, u32 phys_cpos, u32 len, int flags, struct ocfs2_cached_dealloc_ctxt *dealloc, - u64 refcount_loc) + u64 refcount_loc, bool refcount_tree_locked) { int ret, credits = 0, extra_blocks = 0; u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos); @@ -5676,11 +5676,13 @@ int ocfs2_remove_btree_range(struct inode *inode, BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL)); - ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1, - &ref_tree, NULL); - if (ret) { - mlog_errno(ret); - goto bail; + if (!refcount_tree_locked) { + ret = ocfs2_lock_refcount_tree(osb, refcount_loc, 1, + &ref_tree, NULL); + if (ret) { + mlog_errno(ret); + goto bail; + } } ret = ocfs2_prepare_refcount_change_for_del(inode, @@ -7021,6 +7023,7 @@ int ocfs2_commit_truncate(struct ocfs2_super *osb, u64 refcount_loc = le64_to_cpu(di->i_refcount_loc); struct ocfs2_extent_tree et; struct ocfs2_cached_dealloc_ctxt dealloc; + struct ocfs2_refcount_tree *ref_tree = NULL; ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh); ocfs2_init_dealloc_ctxt(&dealloc); @@ -7130,9 +7133,18 @@ start: phys_cpos = ocfs2_blocks_to_clusters(inode->i_sb, blkno); + if ((flags & OCFS2_EXT_REFCOUNTED) && trunc_len && !ref_tree) { + status = ocfs2_lock_refcount_tree(osb, refcount_loc, 1, + &ref_tree, NULL); + if (status) { + mlog_errno(status); + goto bail; + } + } + status = ocfs2_remove_btree_range(inode, &et, trunc_cpos, phys_cpos, trunc_len, flags, &dealloc, - refcount_loc); + refcount_loc, true); if (status < 0) { mlog_errno(status); goto bail; @@ -7147,6 +7159,8 @@ start: goto start; bail: + if (ref_tree) + ocfs2_unlock_refcount_tree(osb, ref_tree, 1); ocfs2_schedule_truncate_log_flush(osb, 1); diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h index ca381c584127..fb09b97db162 100644 --- a/fs/ocfs2/alloc.h +++ b/fs/ocfs2/alloc.h @@ -142,7 +142,7 @@ int ocfs2_remove_btree_range(struct inode *inode, struct ocfs2_extent_tree *et, u32 cpos, u32 phys_cpos, u32 len, int flags, struct ocfs2_cached_dealloc_ctxt *dealloc, - u64 refcount_loc); + u64 refcount_loc, bool refcount_tree_locked); int ocfs2_num_free_extents(struct ocfs2_super *osb, struct ocfs2_extent_tree *et); diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c index 79d56dc981bc..319e786175af 100644 --- a/fs/ocfs2/dir.c +++ b/fs/ocfs2/dir.c @@ -4479,7 +4479,7 @@ int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh) p_cpos = ocfs2_blocks_to_clusters(dir->i_sb, blkno); ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen, 0, - &dealloc, 0); + &dealloc, 0, false); if (ret) { mlog_errno(ret); goto out; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 69fb9f75b082..3950693dd0f6 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1803,7 +1803,7 @@ static int ocfs2_remove_inode_range(struct inode *inode, ret = ocfs2_remove_btree_range(inode, &et, trunc_cpos, phys_cpos, trunc_len, flags, - &dealloc, refcount_loc); + &dealloc, refcount_loc, false); if (ret < 0) { mlog_errno(ret); goto out; -- cgit From 1e5895816030eaadb952c89eb9f4054e5c0082c3 Mon Sep 17 00:00:00 2001 From: Joseph Qi Date: Thu, 18 Dec 2014 16:17:34 -0800 Subject: ocfs2/dlm: fix race between dispatched_work and dlm_lockres_grab_inflight_worker Commit ac4fef4d23ed ("ocfs2/dlm: do not purge lockres that is queued for assert master") may have the following possible race case: dlm_dispatch_assert_master dlm_wq ======================================================================== queue_work(dlm->quedlm_worker, &dlm->dispatched_work); dispatch work, dlm_lockres_drop_inflight_worker *BUG_ON(res->inflight_assert_workers == 0)* dlm_lockres_grab_inflight_worker inflight_assert_workers++ So ensure inflight_assert_workers to be increased first. Signed-off-by: Joseph Qi Signed-off-by: Xue jiufei Cc: Joel Becker Reviewed-by: Mark Fasheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/dlm/dlmmaster.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 3689b3592042..a6944b25fd5b 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -695,14 +695,6 @@ void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm, res->inflight_assert_workers); } -static void dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm, - struct dlm_lock_resource *res) -{ - spin_lock(&res->spinlock); - __dlm_lockres_grab_inflight_worker(dlm, res); - spin_unlock(&res->spinlock); -} - static void __dlm_lockres_drop_inflight_worker(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) { @@ -1646,6 +1638,7 @@ send_response: } mlog(0, "%u is the owner of %.*s, cleaning everyone else\n", dlm->node_num, res->lockname.len, res->lockname.name); + spin_lock(&res->spinlock); ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx, DLM_ASSERT_MASTER_MLE_CLEANUP); if (ret < 0) { @@ -1653,7 +1646,8 @@ send_response: response = DLM_MASTER_RESP_ERROR; dlm_lockres_put(res); } else - dlm_lockres_grab_inflight_worker(dlm, res); + __dlm_lockres_grab_inflight_worker(dlm, res); + spin_unlock(&res->spinlock); } else { if (res) dlm_lockres_put(res); -- cgit From 136f49b9171074872f2a14ad0ab10486d1ba13ca Mon Sep 17 00:00:00 2001 From: Junxiao Bi Date: Thu, 18 Dec 2014 16:17:37 -0800 Subject: ocfs2: fix journal commit deadlock For buffer write, page lock will be got in write_begin and released in write_end, in ocfs2_write_end_nolock(), before it unlock the page in ocfs2_free_write_ctxt(), it calls ocfs2_run_deallocs(), this will ask for the read lock of journal->j_trans_barrier. Holding page lock and ask for journal->j_trans_barrier breaks the locking order. This will cause a deadlock with journal commit threads, ocfs2cmt will get write lock of journal->j_trans_barrier first, then it wakes up kjournald2 to do the commit work, at last it waits until done. To commit journal, kjournald2 needs flushing data first, it needs get the cache page lock. Since some ocfs2 cluster locks are holding by write process, this deadlock may hung the whole cluster. unlock pages before ocfs2_run_deallocs() can fix the locking order, also put unlock before ocfs2_commit_trans() to make page lock is unlocked before j_trans_barrier to preserve unlocking order. Signed-off-by: Junxiao Bi Reviewed-by: Wengang Wang Cc: Reviewed-by: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/aops.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index d9f222987f24..46d93e941f3d 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -894,7 +894,7 @@ void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages) } } -static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) +static void ocfs2_unlock_pages(struct ocfs2_write_ctxt *wc) { int i; @@ -915,7 +915,11 @@ static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) page_cache_release(wc->w_target_page); } ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages); +} +static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc) +{ + ocfs2_unlock_pages(wc); brelse(wc->w_di_bh); kfree(wc); } @@ -2042,11 +2046,19 @@ out_write_size: ocfs2_update_inode_fsync_trans(handle, inode, 1); ocfs2_journal_dirty(handle, wc->w_di_bh); + /* unlock pages before dealloc since it needs acquiring j_trans_barrier + * lock, or it will cause a deadlock since journal commit threads holds + * this lock and will ask for the page lock when flushing the data. + * put it here to preserve the unlock order. + */ + ocfs2_unlock_pages(wc); + ocfs2_commit_trans(osb, handle); ocfs2_run_deallocs(osb, &wc->w_dealloc); - ocfs2_free_write_ctxt(wc); + brelse(wc->w_di_bh); + kfree(wc); return copied; } -- cgit From 66cdef663cd7a97aff6bbbf41a81a0205dc81ba2 Mon Sep 17 00:00:00 2001 From: Ganesh Mahendran Date: Thu, 18 Dec 2014 16:17:40 -0800 Subject: mm/zsmalloc: adjust order of functions Currently functions in zsmalloc.c does not arranged in a readable and reasonable sequence. With the more and more functions added, we may meet below inconvenience. For example: Current functions: void zs_init() { } static void get_maxobj_per_zspage() { } Then I want to add a func_1() which is called from zs_init(), and this new added function func_1() will used get_maxobj_per_zspage() which is defined below zs_init(). void func_1() { get_maxobj_per_zspage() } void zs_init() { func_1() } static void get_maxobj_per_zspage() { } This will cause compiling issue. So we must add a declaration: static void get_maxobj_per_zspage(); before func_1() if we do not put get_maxobj_per_zspage() before func_1(). In addition, puting module_[init|exit] functions at the bottom of the file conforms to our habit. So, this patch ajusts function sequence as: /* helper functions */ ... obj_location_to_handle() ... /* Some exported functions */ ... zs_map_object() zs_unmap_object() zs_malloc() zs_free() zs_init() zs_exit() Signed-off-by: Ganesh Mahendran Cc: Nitin Gupta Acked-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/zsmalloc.c | 374 +++++++++++++++++++++++++++++----------------------------- 1 file changed, 187 insertions(+), 187 deletions(-) diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index 4d0a063145ec..b72403927aa4 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -884,19 +884,6 @@ static struct notifier_block zs_cpu_nb = { .notifier_call = zs_cpu_notifier }; -static void zs_unregister_cpu_notifier(void) -{ - int cpu; - - cpu_notifier_register_begin(); - - for_each_online_cpu(cpu) - zs_cpu_notifier(NULL, CPU_DEAD, (void *)(long)cpu); - __unregister_cpu_notifier(&zs_cpu_nb); - - cpu_notifier_register_done(); -} - static int zs_register_cpu_notifier(void) { int cpu, uninitialized_var(ret); @@ -914,40 +901,28 @@ static int zs_register_cpu_notifier(void) return notifier_to_errno(ret); } -static void init_zs_size_classes(void) +static void zs_unregister_cpu_notifier(void) { - int nr; + int cpu; - nr = (ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) / ZS_SIZE_CLASS_DELTA + 1; - if ((ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) % ZS_SIZE_CLASS_DELTA) - nr += 1; + cpu_notifier_register_begin(); - zs_size_classes = nr; -} + for_each_online_cpu(cpu) + zs_cpu_notifier(NULL, CPU_DEAD, (void *)(long)cpu); + __unregister_cpu_notifier(&zs_cpu_nb); -static void __exit zs_exit(void) -{ -#ifdef CONFIG_ZPOOL - zpool_unregister_driver(&zs_zpool_driver); -#endif - zs_unregister_cpu_notifier(); + cpu_notifier_register_done(); } -static int __init zs_init(void) +static void init_zs_size_classes(void) { - int ret = zs_register_cpu_notifier(); - - if (ret) { - zs_unregister_cpu_notifier(); - return ret; - } + int nr; - init_zs_size_classes(); + nr = (ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) / ZS_SIZE_CLASS_DELTA + 1; + if ((ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) % ZS_SIZE_CLASS_DELTA) + nr += 1; -#ifdef CONFIG_ZPOOL - zpool_register_driver(&zs_zpool_driver); -#endif - return 0; + zs_size_classes = nr; } static unsigned int get_maxobj_per_zspage(int size, int pages_per_zspage) @@ -967,113 +942,101 @@ static bool can_merge(struct size_class *prev, int size, int pages_per_zspage) return true; } +unsigned long zs_get_total_pages(struct zs_pool *pool) +{ + return atomic_long_read(&pool->pages_allocated); +} +EXPORT_SYMBOL_GPL(zs_get_total_pages); + /** - * zs_create_pool - Creates an allocation pool to work from. - * @flags: allocation flags used to allocate pool metadata + * zs_map_object - get address of allocated object from handle. + * @pool: pool from which the object was allocated + * @handle: handle returned from zs_malloc * - * This function must be called before anything when using - * the zsmalloc allocator. + * Before using an object allocated from zs_malloc, it must be mapped using + * this function. When done with the object, it must be unmapped using + * zs_unmap_object. * - * On success, a pointer to the newly created pool is returned, - * otherwise NULL. + * Only one object can be mapped per cpu at a time. There is no protection + * against nested mappings. + * + * This function returns with preemption and page faults disabled. */ -struct zs_pool *zs_create_pool(gfp_t flags) +void *zs_map_object(struct zs_pool *pool, unsigned long handle, + enum zs_mapmode mm) { - int i; - struct zs_pool *pool; - struct size_class *prev_class = NULL; + struct page *page; + unsigned long obj_idx, off; - pool = kzalloc(sizeof(*pool), GFP_KERNEL); - if (!pool) - return NULL; + unsigned int class_idx; + enum fullness_group fg; + struct size_class *class; + struct mapping_area *area; + struct page *pages[2]; - pool->size_class = kcalloc(zs_size_classes, sizeof(struct size_class *), - GFP_KERNEL); - if (!pool->size_class) { - kfree(pool); - return NULL; - } + BUG_ON(!handle); /* - * Iterate reversly, because, size of size_class that we want to use - * for merging should be larger or equal to current size. + * Because we use per-cpu mapping areas shared among the + * pools/users, we can't allow mapping in interrupt context + * because it can corrupt another users mappings. */ - for (i = zs_size_classes - 1; i >= 0; i--) { - int size; - int pages_per_zspage; - struct size_class *class; - - size = ZS_MIN_ALLOC_SIZE + i * ZS_SIZE_CLASS_DELTA; - if (size > ZS_MAX_ALLOC_SIZE) - size = ZS_MAX_ALLOC_SIZE; - pages_per_zspage = get_pages_per_zspage(size); - - /* - * size_class is used for normal zsmalloc operation such - * as alloc/free for that size. Although it is natural that we - * have one size_class for each size, there is a chance that we - * can get more memory utilization if we use one size_class for - * many different sizes whose size_class have same - * characteristics. So, we makes size_class point to - * previous size_class if possible. - */ - if (prev_class) { - if (can_merge(prev_class, size, pages_per_zspage)) { - pool->size_class[i] = prev_class; - continue; - } - } - - class = kzalloc(sizeof(struct size_class), GFP_KERNEL); - if (!class) - goto err; + BUG_ON(in_interrupt()); - class->size = size; - class->index = i; - class->pages_per_zspage = pages_per_zspage; - spin_lock_init(&class->lock); - pool->size_class[i] = class; + obj_handle_to_location(handle, &page, &obj_idx); + get_zspage_mapping(get_first_page(page), &class_idx, &fg); + class = pool->size_class[class_idx]; + off = obj_idx_to_offset(page, obj_idx, class->size); - prev_class = class; + area = &get_cpu_var(zs_map_area); + area->vm_mm = mm; + if (off + class->size <= PAGE_SIZE) { + /* this object is contained entirely within a page */ + area->vm_addr = kmap_atomic(page); + return area->vm_addr + off; } - pool->flags = flags; - - return pool; + /* this object spans two pages */ + pages[0] = page; + pages[1] = get_next_page(page); + BUG_ON(!pages[1]); -err: - zs_destroy_pool(pool); - return NULL; + return __zs_map_object(area, pages, off, class->size); } -EXPORT_SYMBOL_GPL(zs_create_pool); +EXPORT_SYMBOL_GPL(zs_map_object); -void zs_destroy_pool(struct zs_pool *pool) +void zs_unmap_object(struct zs_pool *pool, unsigned long handle) { - int i; + struct page *page; + unsigned long obj_idx, off; - for (i = 0; i < zs_size_classes; i++) { - int fg; - struct size_class *class = pool->size_class[i]; + unsigned int class_idx; + enum fullness_group fg; + struct size_class *class; + struct mapping_area *area; - if (!class) - continue; + BUG_ON(!handle); - if (class->index != i) - continue; + obj_handle_to_location(handle, &page, &obj_idx); + get_zspage_mapping(get_first_page(page), &class_idx, &fg); + class = pool->size_class[class_idx]; + off = obj_idx_to_offset(page, obj_idx, class->size); - for (fg = 0; fg < _ZS_NR_FULLNESS_GROUPS; fg++) { - if (class->fullness_list[fg]) { - pr_info("Freeing non-empty class with size %db, fullness group %d\n", - class->size, fg); - } - } - kfree(class); - } + area = this_cpu_ptr(&zs_map_area); + if (off + class->size <= PAGE_SIZE) + kunmap_atomic(area->vm_addr); + else { + struct page *pages[2]; - kfree(pool->size_class); - kfree(pool); + pages[0] = page; + pages[1] = get_next_page(page); + BUG_ON(!pages[1]); + + __zs_unmap_object(area, pages, off, class->size); + } + put_cpu_var(zs_map_area); } -EXPORT_SYMBOL_GPL(zs_destroy_pool); +EXPORT_SYMBOL_GPL(zs_unmap_object); /** * zs_malloc - Allocate block of given size from pool. @@ -1176,100 +1139,137 @@ void zs_free(struct zs_pool *pool, unsigned long obj) EXPORT_SYMBOL_GPL(zs_free); /** - * zs_map_object - get address of allocated object from handle. - * @pool: pool from which the object was allocated - * @handle: handle returned from zs_malloc - * - * Before using an object allocated from zs_malloc, it must be mapped using - * this function. When done with the object, it must be unmapped using - * zs_unmap_object. + * zs_create_pool - Creates an allocation pool to work from. + * @flags: allocation flags used to allocate pool metadata * - * Only one object can be mapped per cpu at a time. There is no protection - * against nested mappings. + * This function must be called before anything when using + * the zsmalloc allocator. * - * This function returns with preemption and page faults disabled. + * On success, a pointer to the newly created pool is returned, + * otherwise NULL. */ -void *zs_map_object(struct zs_pool *pool, unsigned long handle, - enum zs_mapmode mm) +struct zs_pool *zs_create_pool(gfp_t flags) { - struct page *page; - unsigned long obj_idx, off; + int i; + struct zs_pool *pool; + struct size_class *prev_class = NULL; - unsigned int class_idx; - enum fullness_group fg; - struct size_class *class; - struct mapping_area *area; - struct page *pages[2]; + pool = kzalloc(sizeof(*pool), GFP_KERNEL); + if (!pool) + return NULL; - BUG_ON(!handle); + pool->size_class = kcalloc(zs_size_classes, sizeof(struct size_class *), + GFP_KERNEL); + if (!pool->size_class) { + kfree(pool); + return NULL; + } /* - * Because we use per-cpu mapping areas shared among the - * pools/users, we can't allow mapping in interrupt context - * because it can corrupt another users mappings. + * Iterate reversly, because, size of size_class that we want to use + * for merging should be larger or equal to current size. */ - BUG_ON(in_interrupt()); + for (i = zs_size_classes - 1; i >= 0; i--) { + int size; + int pages_per_zspage; + struct size_class *class; - obj_handle_to_location(handle, &page, &obj_idx); - get_zspage_mapping(get_first_page(page), &class_idx, &fg); - class = pool->size_class[class_idx]; - off = obj_idx_to_offset(page, obj_idx, class->size); + size = ZS_MIN_ALLOC_SIZE + i * ZS_SIZE_CLASS_DELTA; + if (size > ZS_MAX_ALLOC_SIZE) + size = ZS_MAX_ALLOC_SIZE; + pages_per_zspage = get_pages_per_zspage(size); - area = &get_cpu_var(zs_map_area); - area->vm_mm = mm; - if (off + class->size <= PAGE_SIZE) { - /* this object is contained entirely within a page */ - area->vm_addr = kmap_atomic(page); - return area->vm_addr + off; + /* + * size_class is used for normal zsmalloc operation such + * as alloc/free for that size. Although it is natural that we + * have one size_class for each size, there is a chance that we + * can get more memory utilization if we use one size_class for + * many different sizes whose size_class have same + * characteristics. So, we makes size_class point to + * previous size_class if possible. + */ + if (prev_class) { + if (can_merge(prev_class, size, pages_per_zspage)) { + pool->size_class[i] = prev_class; + continue; + } + } + + class = kzalloc(sizeof(struct size_class), GFP_KERNEL); + if (!class) + goto err; + + class->size = size; + class->index = i; + class->pages_per_zspage = pages_per_zspage; + spin_lock_init(&class->lock); + pool->size_class[i] = class; + + prev_class = class; } - /* this object spans two pages */ - pages[0] = page; - pages[1] = get_next_page(page); - BUG_ON(!pages[1]); + pool->flags = flags; - return __zs_map_object(area, pages, off, class->size); + return pool; + +err: + zs_destroy_pool(pool); + return NULL; } -EXPORT_SYMBOL_GPL(zs_map_object); +EXPORT_SYMBOL_GPL(zs_create_pool); -void zs_unmap_object(struct zs_pool *pool, unsigned long handle) +void zs_destroy_pool(struct zs_pool *pool) { - struct page *page; - unsigned long obj_idx, off; + int i; - unsigned int class_idx; - enum fullness_group fg; - struct size_class *class; - struct mapping_area *area; + for (i = 0; i < zs_size_classes; i++) { + int fg; + struct size_class *class = pool->size_class[i]; - BUG_ON(!handle); + if (!class) + continue; - obj_handle_to_location(handle, &page, &obj_idx); - get_zspage_mapping(get_first_page(page), &class_idx, &fg); - class = pool->size_class[class_idx]; - off = obj_idx_to_offset(page, obj_idx, class->size); + if (class->index != i) + continue; - area = this_cpu_ptr(&zs_map_area); - if (off + class->size <= PAGE_SIZE) - kunmap_atomic(area->vm_addr); - else { - struct page *pages[2]; + for (fg = 0; fg < _ZS_NR_FULLNESS_GROUPS; fg++) { + if (class->fullness_list[fg]) { + pr_info("Freeing non-empty class with size %db, fullness group %d\n", + class->size, fg); + } + } + kfree(class); + } - pages[0] = page; - pages[1] = get_next_page(page); - BUG_ON(!pages[1]); + kfree(pool->size_class); + kfree(pool); +} +EXPORT_SYMBOL_GPL(zs_destroy_pool); - __zs_unmap_object(area, pages, off, class->size); +static int __init zs_init(void) +{ + int ret = zs_register_cpu_notifier(); + + if (ret) { + zs_unregister_cpu_notifier(); + return ret; } - put_cpu_var(zs_map_area); + + init_zs_size_classes(); + +#ifdef CONFIG_ZPOOL + zpool_register_driver(&zs_zpool_driver); +#endif + return 0; } -EXPORT_SYMBOL_GPL(zs_unmap_object); -unsigned long zs_get_total_pages(struct zs_pool *pool) +static void __exit zs_exit(void) { - return atomic_long_read(&pool->pages_allocated); +#ifdef CONFIG_ZPOOL + zpool_unregister_driver(&zs_zpool_driver); +#endif + zs_unregister_cpu_notifier(); } -EXPORT_SYMBOL_GPL(zs_get_total_pages); module_init(zs_init); module_exit(zs_exit); -- cgit From 96e869d85745df36e3a1bf794ce472cc2b18939b Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 18 Dec 2014 16:17:43 -0800 Subject: tools/testing/selftests/Makefile: alphasort the TARGETS list This list is supposed to be sorted, to reduce patch collisions. Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- tools/testing/selftests/Makefile | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index b3831f4ba845..4e511221a0c1 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -1,22 +1,23 @@ TARGETS = breakpoints TARGETS += cpu-hotplug TARGETS += efivarfs +TARGETS += exec +TARGETS += firmware +TARGETS += ftrace TARGETS += kcmp TARGETS += memfd TARGETS += memory-hotplug -TARGETS += mqueue TARGETS += mount +TARGETS += mqueue TARGETS += net +TARGETS += powerpc TARGETS += ptrace +TARGETS += size +TARGETS += sysctl TARGETS += timers -TARGETS += vm -TARGETS += powerpc TARGETS += user -TARGETS += sysctl -TARGETS += firmware -TARGETS += ftrace -TARGETS += exec -TARGETS += size +TARGETS += vm +#Please keep the TARGETS list alphabetically sorted TARGETS_HOTPLUG = cpu-hotplug TARGETS_HOTPLUG += memory-hotplug -- cgit