mm: shmem: add multi-size THP sysfs interface for anonymous shmem

To support the use of mTHP with anonymous shmem, add a new sysfs interface
'shmem_enabled' in the '/sys/kernel/mm/transparent_hugepage/hugepages-kB/'
directory for each mTHP to control whether shmem is enabled for that mTHP,
with a value similar to the top level 'shmem_enabled', which can be set
to: "always", "inherit (to inherit the top level setting)", "within_size",
"advise", "never".  An 'inherit' option is added to ensure compatibility
with these global settings, and the options 'force' and 'deny' are
dropped, which are rather testing artifacts from the old ages.

By default, PMD-sized hugepages have enabled="inherit" and all other
hugepage sizes have enabled="never" for
'/sys/kernel/mm/transparent_hugepage/hugepages-xxkB/shmem_enabled'.

In addition, if top level value is 'force', then only PMD-sized hugepages
have enabled="inherit", otherwise configuration will be failed and vice
versa.  That means now we will avoid using non-PMD sized THP to override
the global huge allocation.

[baolin.wang@linux.alibaba.com: fix transhuge.rst indentation]
  Link: https://lkml.kernel.org/r/b189d815-998b-4dfd-ba89-218ff51313f8@linux.alibaba.com
[akpm@linux-foundation.org: reflow transhuge.rst addition to 80 cols]
[baolin.wang@linux.alibaba.com: move huge_shmem_orders_lock under CONFIG_SYSFS]
  Link: https://lkml.kernel.org/r/eb34da66-7f12-44f3-a39e-2bcc90c33354@linux.alibaba.com
[akpm@linux-foundation.org: huge_memory.c needs mm_types.h]
Link: https://lkml.kernel.org/r/ffddfa8b3cb4266ff963099ab78cfd7184c57ac7.1718090413.git.baolin.wang@linux.alibaba.com
Signed-off-by: Baolin Wang <baolin.wang@linux.alibaba.com>
Cc: Barry Song <v-songbaohua@oppo.com>
Cc: Daniel Gomez <da.gomez@samsung.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: "Huang, Ying" <ying.huang@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Kefeng Wang <wangkefeng.wang@huawei.com>
Cc: Lance Yang <ioworker0@gmail.com>
Cc: Pankaj Raghav <p.raghav@samsung.com>
Cc: Ryan Roberts <ryan.roberts@arm.com>
Cc: Yang Shi <shy828301@gmail.com>
Cc: Zi Yan <ziy@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
This commit is contained in:
Baolin Wang 2024-06-11 18:11:07 +08:00 committed by Andrew Morton
parent 3d95bc21ce
commit 4b98995530
4 changed files with 135 additions and 8 deletions

View file

@ -332,6 +332,31 @@ deny
force
Force the huge option on for all - very useful for testing;
Shmem can also use "multi-size THP" (mTHP) by adding a new sysfs knob to
control mTHP allocation:
'/sys/kernel/mm/transparent_hugepage/hugepages-<size>kB/shmem_enabled',
and its value for each mTHP is essentially consistent with the global
setting. An 'inherit' option is added to ensure compatibility with these
global settings. Conversely, the options 'force' and 'deny' are dropped,
which are rather testing artifacts from the old ages.
always
Attempt to allocate <size> huge pages every time we need a new page;
inherit
Inherit the top-level "shmem_enabled" value. By default, PMD-sized hugepages
have enabled="inherit" and all other hugepage sizes have enabled="never";
never
Do not allocate <size> huge pages;
within_size
Only allocate <size> huge page if it will be fully within i_size.
Also respect fadvise()/madvise() hints;
advise
Only allocate <size> huge pages if requested with fadvise()/madvise();
Need of application restart
===========================

View file

@ -6,6 +6,7 @@
#include <linux/mm_types.h>
#include <linux/fs.h> /* only for vma_is_dax() */
#include <linux/kobject.h>
vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf);
int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
@ -63,6 +64,7 @@ ssize_t single_hugepage_flag_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf,
enum transparent_hugepage_flag flag);
extern struct kobj_attribute shmem_enabled_attr;
extern struct kobj_attribute thpsize_shmem_enabled_attr;
/*
* Mask of all large folio orders supported for anonymous THP; all orders up to
@ -265,6 +267,14 @@ unsigned long thp_vma_allowable_orders(struct vm_area_struct *vma,
return __thp_vma_allowable_orders(vma, vm_flags, tva_flags, orders);
}
struct thpsize {
struct kobject kobj;
struct list_head node;
int order;
};
#define to_thpsize(kobj) container_of(kobj, struct thpsize, kobj)
enum mthp_stat_item {
MTHP_STAT_ANON_FAULT_ALLOC,
MTHP_STAT_ANON_FAULT_FALLBACK,

View file

@ -20,6 +20,7 @@
#include <linux/swapops.h>
#include <linux/backing-dev.h>
#include <linux/dax.h>
#include <linux/mm_types.h>
#include <linux/khugepaged.h>
#include <linux/freezer.h>
#include <linux/pfn_t.h>
@ -449,14 +450,6 @@ static void thpsize_release(struct kobject *kobj);
static DEFINE_SPINLOCK(huge_anon_orders_lock);
static LIST_HEAD(thpsize_list);
struct thpsize {
struct kobject kobj;
struct list_head node;
int order;
};
#define to_thpsize(kobj) container_of(kobj, struct thpsize, kobj)
static ssize_t thpsize_enabled_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
@ -517,6 +510,9 @@ static struct kobj_attribute thpsize_enabled_attr =
static struct attribute *thpsize_attrs[] = {
&thpsize_enabled_attr.attr,
#ifdef CONFIG_SHMEM
&thpsize_shmem_enabled_attr.attr,
#endif
NULL,
};

View file

@ -131,6 +131,13 @@ struct shmem_options {
#define SHMEM_SEEN_QUOTA 32
};
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static unsigned long huge_shmem_orders_always __read_mostly;
static unsigned long huge_shmem_orders_madvise __read_mostly;
static unsigned long huge_shmem_orders_inherit __read_mostly;
static unsigned long huge_shmem_orders_within_size __read_mostly;
#endif
#ifdef CONFIG_TMPFS
static unsigned long shmem_default_max_blocks(void)
{
@ -4672,6 +4679,12 @@ void __init shmem_init(void)
SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge;
else
shmem_huge = SHMEM_HUGE_NEVER; /* just in case it was patched */
/*
* Default to setting PMD-sized THP to inherit the global setting and
* disable all other multi-size THPs.
*/
huge_shmem_orders_inherit = BIT(HPAGE_PMD_ORDER);
#endif
return;
@ -4731,6 +4744,11 @@ static ssize_t shmem_enabled_store(struct kobject *kobj,
huge != SHMEM_HUGE_NEVER && huge != SHMEM_HUGE_DENY)
return -EINVAL;
/* Do not override huge allocation policy with non-PMD sized mTHP */
if (huge == SHMEM_HUGE_FORCE &&
huge_shmem_orders_inherit != BIT(HPAGE_PMD_ORDER))
return -EINVAL;
shmem_huge = huge;
if (shmem_huge > SHMEM_HUGE_DENY)
SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge;
@ -4738,6 +4756,84 @@ static ssize_t shmem_enabled_store(struct kobject *kobj,
}
struct kobj_attribute shmem_enabled_attr = __ATTR_RW(shmem_enabled);
static DEFINE_SPINLOCK(huge_shmem_orders_lock);
static ssize_t thpsize_shmem_enabled_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
int order = to_thpsize(kobj)->order;
const char *output;
if (test_bit(order, &huge_shmem_orders_always))
output = "[always] inherit within_size advise never";
else if (test_bit(order, &huge_shmem_orders_inherit))
output = "always [inherit] within_size advise never";
else if (test_bit(order, &huge_shmem_orders_within_size))
output = "always inherit [within_size] advise never";
else if (test_bit(order, &huge_shmem_orders_madvise))
output = "always inherit within_size [advise] never";
else
output = "always inherit within_size advise [never]";
return sysfs_emit(buf, "%s\n", output);
}
static ssize_t thpsize_shmem_enabled_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t count)
{
int order = to_thpsize(kobj)->order;
ssize_t ret = count;
if (sysfs_streq(buf, "always")) {
spin_lock(&huge_shmem_orders_lock);
clear_bit(order, &huge_shmem_orders_inherit);
clear_bit(order, &huge_shmem_orders_madvise);
clear_bit(order, &huge_shmem_orders_within_size);
set_bit(order, &huge_shmem_orders_always);
spin_unlock(&huge_shmem_orders_lock);
} else if (sysfs_streq(buf, "inherit")) {
/* Do not override huge allocation policy with non-PMD sized mTHP */
if (shmem_huge == SHMEM_HUGE_FORCE &&
order != HPAGE_PMD_ORDER)
return -EINVAL;
spin_lock(&huge_shmem_orders_lock);
clear_bit(order, &huge_shmem_orders_always);
clear_bit(order, &huge_shmem_orders_madvise);
clear_bit(order, &huge_shmem_orders_within_size);
set_bit(order, &huge_shmem_orders_inherit);
spin_unlock(&huge_shmem_orders_lock);
} else if (sysfs_streq(buf, "within_size")) {
spin_lock(&huge_shmem_orders_lock);
clear_bit(order, &huge_shmem_orders_always);
clear_bit(order, &huge_shmem_orders_inherit);
clear_bit(order, &huge_shmem_orders_madvise);
set_bit(order, &huge_shmem_orders_within_size);
spin_unlock(&huge_shmem_orders_lock);
} else if (sysfs_streq(buf, "madvise")) {
spin_lock(&huge_shmem_orders_lock);
clear_bit(order, &huge_shmem_orders_always);
clear_bit(order, &huge_shmem_orders_inherit);
clear_bit(order, &huge_shmem_orders_within_size);
set_bit(order, &huge_shmem_orders_madvise);
spin_unlock(&huge_shmem_orders_lock);
} else if (sysfs_streq(buf, "never")) {
spin_lock(&huge_shmem_orders_lock);
clear_bit(order, &huge_shmem_orders_always);
clear_bit(order, &huge_shmem_orders_inherit);
clear_bit(order, &huge_shmem_orders_within_size);
clear_bit(order, &huge_shmem_orders_madvise);
spin_unlock(&huge_shmem_orders_lock);
} else {
ret = -EINVAL;
}
return ret;
}
struct kobj_attribute thpsize_shmem_enabled_attr =
__ATTR(shmem_enabled, 0644, thpsize_shmem_enabled_show, thpsize_shmem_enabled_store);
#endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_SYSFS */
#else /* !CONFIG_SHMEM */