diff --git a/Documentation/filesystems/tmpfs.txt b/Documentation/filesystems/tmpfs.txt index 0d783c504ead..dbe4d87d2615 100644 --- a/Documentation/filesystems/tmpfs.txt +++ b/Documentation/filesystems/tmpfs.txt @@ -78,6 +78,18 @@ use up all the memory on the machine; but enhances the scalability of that instance in a system with many cpus making intensive use of it. +tmpfs has a mount option to set the NUMA memory allocation policy for +all files in that instance: +mpol=interleave prefers to allocate memory from each node in turn +mpol=default prefers to allocate memory from the local node +mpol=bind prefers to allocate from mpol_nodelist +mpol=preferred prefers to allocate from first node in mpol_nodelist + +The following mount option is used in conjunction with mpol=interleave, +mpol=bind or mpol=preferred: +mpol_nodelist: nodelist suitable for parsing with nodelist_parse. + + To specify the initial root directory you can use the following mount options: diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index ab4c3a9d51b8..f568102da1e8 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -402,7 +402,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; info = HUGETLBFS_I(inode); - mpol_shared_policy_init(&info->policy); + mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, NULL); switch (mode & S_IFMT) { default: init_special_inode(inode, mode, dev); diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index c7ac77e873b3..d6a53ed6ab6c 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -132,12 +132,8 @@ struct shared_policy { spinlock_t lock; }; -static inline void mpol_shared_policy_init(struct shared_policy *info) -{ - info->root = RB_ROOT; - spin_lock_init(&info->lock); -} - +void mpol_shared_policy_init(struct shared_policy *info, int policy, + nodemask_t *nodes); int mpol_set_shared_policy(struct shared_policy *info, struct vm_area_struct *vma, struct mempolicy *new); @@ -211,7 +207,8 @@ static inline int mpol_set_shared_policy(struct shared_policy *info, return -EINVAL; } -static inline void mpol_shared_policy_init(struct shared_policy *info) +static inline void mpol_shared_policy_init(struct shared_policy *info, + int policy, nodemask_t *nodes) { } diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index c3e598276e78..c057f0b32318 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -26,6 +26,8 @@ struct shmem_sb_info { unsigned long free_blocks; /* How many are left for allocation */ unsigned long max_inodes; /* How many inodes are allowed */ unsigned long free_inodes; /* How many are left for allocation */ + int policy; /* Default NUMA memory alloc policy */ + nodemask_t policy_nodes; /* nodemask for preferred and bind */ spinlock_t stat_lock; }; diff --git a/mm/mempolicy.c b/mm/mempolicy.c index b62cab575a84..3171f884d245 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1359,6 +1359,30 @@ static int shared_policy_replace(struct shared_policy *sp, unsigned long start, return 0; } +void mpol_shared_policy_init(struct shared_policy *info, int policy, + nodemask_t *policy_nodes) +{ + info->root = RB_ROOT; + spin_lock_init(&info->lock); + + if (policy != MPOL_DEFAULT) { + struct mempolicy *newpol; + + /* Falls back to MPOL_DEFAULT on any error */ + newpol = mpol_new(policy, policy_nodes); + if (!IS_ERR(newpol)) { + /* Create pseudo-vma that contains just the policy */ + struct vm_area_struct pvma; + + memset(&pvma, 0, sizeof(struct vm_area_struct)); + /* Policy covers entire file */ + pvma.vm_end = TASK_SIZE; + mpol_set_shared_policy(info, &pvma, newpol); + mpol_free(newpol); + } + } +} + int mpol_set_shared_policy(struct shared_policy *info, struct vm_area_struct *vma, struct mempolicy *npol) { diff --git a/mm/shmem.c b/mm/shmem.c index 343b3c0937e5..ce501bce1c2e 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1316,7 +1316,8 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev) case S_IFREG: inode->i_op = &shmem_inode_operations; inode->i_fop = &shmem_file_operations; - mpol_shared_policy_init(&info->policy); + mpol_shared_policy_init(&info->policy, sbinfo->policy, + &sbinfo->policy_nodes); break; case S_IFDIR: inode->i_nlink++; @@ -1330,7 +1331,8 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev) * Must not load anything in the rbtree, * mpol_free_shared_policy will not be called. */ - mpol_shared_policy_init(&info->policy); + mpol_shared_policy_init(&info->policy, MPOL_DEFAULT, + NULL); break; } } else if (sbinfo->max_inodes) { @@ -1843,7 +1845,9 @@ static struct inode_operations shmem_symlink_inode_operations = { .put_link = shmem_put_link, }; -static int shmem_parse_options(char *options, int *mode, uid_t *uid, gid_t *gid, unsigned long *blocks, unsigned long *inodes) +static int shmem_parse_options(char *options, int *mode, uid_t *uid, + gid_t *gid, unsigned long *blocks, unsigned long *inodes, + int *policy, nodemask_t *policy_nodes) { char *this_char, *value, *rest; @@ -1897,6 +1901,19 @@ static int shmem_parse_options(char *options, int *mode, uid_t *uid, gid_t *gid, *gid = simple_strtoul(value,&rest,0); if (*rest) goto bad_val; + } else if (!strcmp(this_char,"mpol")) { + if (!strcmp(value,"default")) + *policy = MPOL_DEFAULT; + else if (!strcmp(value,"preferred")) + *policy = MPOL_PREFERRED; + else if (!strcmp(value,"bind")) + *policy = MPOL_BIND; + else if (!strcmp(value,"interleave")) + *policy = MPOL_INTERLEAVE; + else + goto bad_val; + } else if (!strcmp(this_char,"mpol_nodelist")) { + nodelist_parse(value, *policy_nodes); } else { printk(KERN_ERR "tmpfs: Bad mount option %s\n", this_char); @@ -1917,12 +1934,14 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data) struct shmem_sb_info *sbinfo = SHMEM_SB(sb); unsigned long max_blocks = sbinfo->max_blocks; unsigned long max_inodes = sbinfo->max_inodes; + int policy = sbinfo->policy; + nodemask_t policy_nodes = sbinfo->policy_nodes; unsigned long blocks; unsigned long inodes; int error = -EINVAL; - if (shmem_parse_options(data, NULL, NULL, NULL, - &max_blocks, &max_inodes)) + if (shmem_parse_options(data, NULL, NULL, NULL, &max_blocks, + &max_inodes, &policy, &policy_nodes)) return error; spin_lock(&sbinfo->stat_lock); @@ -1948,6 +1967,8 @@ static int shmem_remount_fs(struct super_block *sb, int *flags, char *data) sbinfo->free_blocks = max_blocks - blocks; sbinfo->max_inodes = max_inodes; sbinfo->free_inodes = max_inodes - inodes; + sbinfo->policy = policy; + sbinfo->policy_nodes = policy_nodes; out: spin_unlock(&sbinfo->stat_lock); return error; @@ -1972,6 +1993,8 @@ static int shmem_fill_super(struct super_block *sb, struct shmem_sb_info *sbinfo; unsigned long blocks = 0; unsigned long inodes = 0; + int policy = MPOL_DEFAULT; + nodemask_t policy_nodes = node_online_map; #ifdef CONFIG_TMPFS /* @@ -1984,8 +2007,8 @@ static int shmem_fill_super(struct super_block *sb, inodes = totalram_pages - totalhigh_pages; if (inodes > blocks) inodes = blocks; - if (shmem_parse_options(data, &mode, &uid, &gid, - &blocks, &inodes)) + if (shmem_parse_options(data, &mode, &uid, &gid, &blocks, + &inodes, &policy, &policy_nodes)) return -EINVAL; } #else @@ -2003,6 +2026,8 @@ static int shmem_fill_super(struct super_block *sb, sbinfo->free_blocks = blocks; sbinfo->max_inodes = inodes; sbinfo->free_inodes = inodes; + sbinfo->policy = policy; + sbinfo->policy_nodes = policy_nodes; sb->s_fs_info = sbinfo; sb->s_maxbytes = SHMEM_MAX_BYTES;