From aafd4562dfee81a40ba21b5ea3cf5e06664bc7f6 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 6 Sep 2017 16:23:29 -0700 Subject: mm: arch: consolidate mmap hugetlb size encodings A non-default huge page size can be encoded in the flags argument of the mmap system call. The definitions for these encodings are in arch specific header files. However, all architectures use the same values. Consolidate all the definitions in the primary user header file (uapi/linux/mman.h). Include definitions for all known huge page sizes. Use the generic encoding definitions in hugetlb_encode.h as the basis for these definitions. Link: http://lkml.kernel.org/r/1501527386-10736-3-git-send-email-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Acked-by: Michal Hocko Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Aneesh Kumar K.V Cc: Anshuman Khandual Cc: Arnd Bergmann Cc: Davidlohr Bueso Cc: Matthew Wilcox Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/mman.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/mman.h b/include/uapi/linux/mman.h index ade4acd3a90c..a937480d7cd3 100644 --- a/include/uapi/linux/mman.h +++ b/include/uapi/linux/mman.h @@ -2,6 +2,7 @@ #define _UAPI_LINUX_MMAN_H #include +#include #define MREMAP_MAYMOVE 1 #define MREMAP_FIXED 2 @@ -10,4 +11,25 @@ #define OVERCOMMIT_ALWAYS 1 #define OVERCOMMIT_NEVER 2 +/* + * Huge page size encoding when MAP_HUGETLB is specified, and a huge page + * size other than the default is desired. See hugetlb_encode.h. + * All known huge page size encodings are provided here. It is the + * responsibility of the application to know which sizes are supported on + * the running system. See mmap(2) man page for details. + */ +#define MAP_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT +#define MAP_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK + +#define MAP_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB +#define MAP_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB +#define MAP_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB +#define MAP_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB +#define MAP_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB +#define MAP_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB +#define MAP_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB +#define MAP_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB +#define MAP_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB +#define MAP_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB + #endif /* _UAPI_LINUX_MMAN_H */ -- cgit From 4da243ac1cf6aeb30b7c555d56208982d66d6d33 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 6 Sep 2017 16:23:33 -0700 Subject: mm: shm: use new hugetlb size encoding definitions Use the common definitions from hugetlb_encode.h header file for encoding hugetlb size definitions in shmget system call flags. In addition, move these definitions from the internal (kernel) to user (uapi) header file. Link: http://lkml.kernel.org/r/1501527386-10736-4-git-send-email-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Suggested-by: Matthew Wilcox Acked-by: Michal Hocko Cc: Andi Kleen Cc: Andrea Arcangeli Cc: Aneesh Kumar K.V Cc: Anshuman Khandual Cc: Arnd Bergmann Cc: Davidlohr Bueso Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/shm.h | 17 ----------------- include/uapi/linux/shm.h | 31 +++++++++++++++++++++++++++++-- 2 files changed, 29 insertions(+), 19 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/linux/shm.h b/include/linux/shm.h index 0fb7061ec54c..21a5e6c43385 100644 --- a/include/linux/shm.h +++ b/include/linux/shm.h @@ -27,23 +27,6 @@ struct shmid_kernel /* private to the kernel */ /* shm_mode upper byte flags */ #define SHM_DEST 01000 /* segment will be destroyed on last detach */ #define SHM_LOCKED 02000 /* segment will not be swapped */ -#define SHM_HUGETLB 04000 /* segment will use huge TLB pages */ -#define SHM_NORESERVE 010000 /* don't check for reservations */ - -/* Bits [26:31] are reserved */ - -/* - * When SHM_HUGETLB is set bits [26:31] encode the log2 of the huge page size. - * This gives us 6 bits, which is enough until someone invents 128 bit address - * spaces. - * - * Assume these are all power of twos. - * When 0 use the default page size. - */ -#define SHM_HUGE_SHIFT 26 -#define SHM_HUGE_MASK 0x3f -#define SHM_HUGE_2MB (21 << SHM_HUGE_SHIFT) -#define SHM_HUGE_1GB (30 << SHM_HUGE_SHIFT) #ifdef CONFIG_SYSVIPC struct sysv_shm { diff --git a/include/uapi/linux/shm.h b/include/uapi/linux/shm.h index 1fbf24ea37fd..cf23c873719d 100644 --- a/include/uapi/linux/shm.h +++ b/include/uapi/linux/shm.h @@ -3,6 +3,7 @@ #include #include +#include #ifndef __KERNEL__ #include #endif @@ -40,11 +41,37 @@ struct shmid_ds { /* Include the definition of shmid64_ds and shminfo64 */ #include -/* permission flag for shmget */ +/* + * shmget() shmflg values. + */ +/* The bottom nine bits are the same as open(2) mode flags */ #define SHM_R 0400 /* or S_IRUGO from */ #define SHM_W 0200 /* or S_IWUGO from */ +/* Bits 9 & 10 are IPC_CREAT and IPC_EXCL */ +#define SHM_HUGETLB 04000 /* segment will use huge TLB pages */ +#define SHM_NORESERVE 010000 /* don't check for reservations */ + +/* + * Huge page size encoding when SHM_HUGETLB is specified, and a huge page + * size other than the default is desired. See hugetlb_encode.h + */ +#define SHM_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT +#define SHM_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK + +#define SHM_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB +#define SHM_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB +#define SHM_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB +#define SHM_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB +#define SHM_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB +#define SHM_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB +#define SHM_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB +#define SHM_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB +#define SHM_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB +#define SHM_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB -/* mode for attach */ +/* + * shmat() shmflg values + */ #define SHM_RDONLY 010000 /* read-only access */ #define SHM_RND 020000 /* round attach address to SHMLBA boundary */ #define SHM_REMAP 040000 /* take-over region on attach */ -- cgit From 2d6d6f5a09a96cc1fec7ed992b825e05f64cb50e Mon Sep 17 00:00:00 2001 From: Prakash Sangappa Date: Wed, 6 Sep 2017 16:23:39 -0700 Subject: mm: userfaultfd: add feature to request for a signal delivery In some cases, userfaultfd mechanism should just deliver a SIGBUS signal to the faulting process, instead of the page-fault event. Dealing with page-fault event using a monitor thread can be an overhead in these cases. For example applications like the database could use the signaling mechanism for robustness purpose. Database uses hugetlbfs for performance reason. Files on hugetlbfs filesystem are created and huge pages allocated using fallocate() API. Pages are deallocated/freed using fallocate() hole punching support. These files are mmapped and accessed by many processes as shared memory. The database keeps track of which offsets in the hugetlbfs file have pages allocated. Any access to mapped address over holes in the file, which can occur due to bugs in the application, is considered invalid and expect the process to simply receive a SIGBUS. However, currently when a hole in the file is accessed via the mapped address, kernel/mm attempts to automatically allocate a page at page fault time, resulting in implicitly filling the hole in the file. This may not be the desired behavior for applications like the database that want to explicitly manage page allocations of hugetlbfs files. Using userfaultfd mechanism with this support to get a signal, database application can prevent pages from being allocated implicitly when processes access mapped address over holes in the file. This patch adds UFFD_FEATURE_SIGBUS feature to userfaultfd mechnism to request for a SIGBUS signal. See following for previous discussion about the database requirement leading to this proposal as suggested by Andrea. http://www.spinics.net/lists/linux-mm/msg129224.html Link: http://lkml.kernel.org/r/1501552446-748335-2-git-send-email-prakash.sangappa@oracle.com Signed-off-by: Prakash Sangappa Reviewed-by: Mike Rapoport Reviewed-by: Andrea Arcangeli Cc: Mike Kravetz Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/userfaultfd.c | 3 +++ include/uapi/linux/userfaultfd.h | 10 +++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include/uapi/linux') diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 01a85e2660b8..5fd4d846691f 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -370,6 +370,9 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) VM_BUG_ON(reason & ~(VM_UFFD_MISSING|VM_UFFD_WP)); VM_BUG_ON(!(reason & VM_UFFD_MISSING) ^ !!(reason & VM_UFFD_WP)); + if (ctx->features & UFFD_FEATURE_SIGBUS) + goto out; + /* * If it's already released don't get it. This avoids to loop * in __get_user_pages if userfaultfd_release waits on the diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index 3b059530dac9..d39d5db56771 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -23,7 +23,8 @@ UFFD_FEATURE_EVENT_REMOVE | \ UFFD_FEATURE_EVENT_UNMAP | \ UFFD_FEATURE_MISSING_HUGETLBFS | \ - UFFD_FEATURE_MISSING_SHMEM) + UFFD_FEATURE_MISSING_SHMEM | \ + UFFD_FEATURE_SIGBUS) #define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -153,6 +154,12 @@ struct uffdio_api { * UFFD_FEATURE_MISSING_SHMEM works the same as * UFFD_FEATURE_MISSING_HUGETLBFS, but it applies to shmem * (i.e. tmpfs and other shmem based APIs). + * + * UFFD_FEATURE_SIGBUS feature means no page-fault + * (UFFD_EVENT_PAGEFAULT) event will be delivered, instead + * a SIGBUS signal will be sent to the faulting process. + * The application process can enable this behavior by adding + * it to uffdio_api.features. */ #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) #define UFFD_FEATURE_EVENT_FORK (1<<1) @@ -161,6 +168,7 @@ struct uffdio_api { #define UFFD_FEATURE_MISSING_HUGETLBFS (1<<4) #define UFFD_FEATURE_MISSING_SHMEM (1<<5) #define UFFD_FEATURE_EVENT_UNMAP (1<<6) +#define UFFD_FEATURE_SIGBUS (1<<7) __u64 features; __u64 ioctls; -- cgit From 9d4ac934829ac58c5109c49e6dfe677300e5e652 Mon Sep 17 00:00:00 2001 From: Alexey Perevalov Date: Wed, 6 Sep 2017 16:23:56 -0700 Subject: userfaultfd: provide pid in userfault msg It could be useful for calculating downtime during postcopy live migration per vCPU. Side observer or application itself will be informed about proper task's sleep during userfaultfd processing. Process's thread id is being provided when user requeste it by setting UFFD_FEATURE_THREAD_ID bit into uffdio_api.features. Link: http://lkml.kernel.org/r/20170802165145.22628-6-aarcange@redhat.com Signed-off-by: Alexey Perevalov Signed-off-by: Andrea Arcangeli Cc: "Dr. David Alan Gilbert" Cc: Maxime Coquelin Cc: Mike Kravetz Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/userfaultfd.c | 8 ++++++-- include/uapi/linux/userfaultfd.h | 10 +++++++--- 2 files changed, 13 insertions(+), 5 deletions(-) (limited to 'include/uapi/linux') diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 5fd4d846691f..665bf7a930b2 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -178,7 +178,8 @@ static inline void msg_init(struct uffd_msg *msg) static inline struct uffd_msg userfault_msg(unsigned long address, unsigned int flags, - unsigned long reason) + unsigned long reason, + unsigned int features) { struct uffd_msg msg; msg_init(&msg); @@ -202,6 +203,8 @@ static inline struct uffd_msg userfault_msg(unsigned long address, * write protect fault. */ msg.arg.pagefault.flags |= UFFD_PAGEFAULT_FLAG_WP; + if (features & UFFD_FEATURE_THREAD_ID) + msg.arg.pagefault.ptid = task_pid_vnr(current); return msg; } @@ -422,7 +425,8 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) init_waitqueue_func_entry(&uwq.wq, userfaultfd_wake_function); uwq.wq.private = current; - uwq.msg = userfault_msg(vmf->address, vmf->flags, reason); + uwq.msg = userfault_msg(vmf->address, vmf->flags, reason, + ctx->features); uwq.ctx = ctx; uwq.waken = false; diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index d39d5db56771..2b24c28d99a7 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -24,7 +24,8 @@ UFFD_FEATURE_EVENT_UNMAP | \ UFFD_FEATURE_MISSING_HUGETLBFS | \ UFFD_FEATURE_MISSING_SHMEM | \ - UFFD_FEATURE_SIGBUS) + UFFD_FEATURE_SIGBUS | \ + UFFD_FEATURE_THREAD_ID) #define UFFD_API_IOCTLS \ ((__u64)1 << _UFFDIO_REGISTER | \ (__u64)1 << _UFFDIO_UNREGISTER | \ @@ -79,6 +80,7 @@ struct uffd_msg { struct { __u64 flags; __u64 address; + __u32 ptid; } pagefault; struct { @@ -158,8 +160,9 @@ struct uffdio_api { * UFFD_FEATURE_SIGBUS feature means no page-fault * (UFFD_EVENT_PAGEFAULT) event will be delivered, instead * a SIGBUS signal will be sent to the faulting process. - * The application process can enable this behavior by adding - * it to uffdio_api.features. + * + * UFFD_FEATURE_THREAD_ID pid of the page faulted task_struct will + * be returned, if feature is not requested 0 will be returned. */ #define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0) #define UFFD_FEATURE_EVENT_FORK (1<<1) @@ -169,6 +172,7 @@ struct uffdio_api { #define UFFD_FEATURE_MISSING_SHMEM (1<<5) #define UFFD_FEATURE_EVENT_UNMAP (1<<6) #define UFFD_FEATURE_SIGBUS (1<<7) +#define UFFD_FEATURE_THREAD_ID (1<<8) __u64 features; __u64 ioctls; -- cgit From a36985d31a65d5c0559fb582719e32eaf0ccec3b Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Wed, 6 Sep 2017 16:23:59 -0700 Subject: userfaultfd: provide pid in userfault msg - add feat union No ABI change, but this will make it more explicit to software that ptid is only available if requested by passing UFFD_FEATURE_THREAD_ID to UFFDIO_API. The fact it's a union will also self document it shouldn't be taken for granted there's a tpid there. Link: http://lkml.kernel.org/r/20170802165145.22628-7-aarcange@redhat.com Signed-off-by: Andrea Arcangeli Cc: "Dr. David Alan Gilbert" Cc: Alexey Perevalov Cc: Maxime Coquelin Cc: Mike Kravetz Cc: Mike Rapoport Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/userfaultfd.c | 2 +- include/uapi/linux/userfaultfd.h | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include/uapi/linux') diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 665bf7a930b2..5419e7da82ba 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -204,7 +204,7 @@ static inline struct uffd_msg userfault_msg(unsigned long address, */ msg.arg.pagefault.flags |= UFFD_PAGEFAULT_FLAG_WP; if (features & UFFD_FEATURE_THREAD_ID) - msg.arg.pagefault.ptid = task_pid_vnr(current); + msg.arg.pagefault.feat.ptid = task_pid_vnr(current); return msg; } diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index 2b24c28d99a7..d6d1f65cb3c3 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -80,7 +80,9 @@ struct uffd_msg { struct { __u64 flags; __u64 address; - __u32 ptid; + union { + __u32 ptid; + } feat; } pagefault; struct { -- cgit From 749df87bd7bee5a79cef073f5d032ddb2b211de8 Mon Sep 17 00:00:00 2001 From: Mike Kravetz Date: Wed, 6 Sep 2017 16:24:16 -0700 Subject: mm/shmem: add hugetlbfs support to memfd_create() This patch came out of discussions in this e-mail thread: http://lkml.kernel.org/r/1499357846-7481-1-git-send-email-mike.kravetz%40oracle.com The Oracle JVM team is developing a new garbage collection model. This new model requires multiple mappings of the same anonymous memory. One straight forward way to accomplish this is with memfd_create. They can use the returned fd to create multiple mappings of the same memory. The JVM today has an option to use (static hugetlb) huge pages. If this option is specified, they would like to use the same garbage collection model requiring multiple mappings to the same memory. Using hugetlbfs, it is possible to explicitly mount a filesystem and specify file paths in order to get an fd that can be used for multiple mappings. However, this introduces additional system admin work and coordination. Ideally they would like to get a hugetlbfs fd without requiring explicit mounting of a filesystem. Today, mmap and shmget can make use of hugetlbfs without explicitly mounting a filesystem. The patch adds this functionality to memfd_create. Add a new flag MFD_HUGETLB to memfd_create() that will specify the file to be created resides in the hugetlbfs filesystem. This is the generic hugetlbfs filesystem not associated with any specific mount point. As with other system calls that request hugetlbfs backed pages, there is the ability to encode huge page size in the flag arguments. hugetlbfs does not support sealing operations, therefore specifying MFD_ALLOW_SEALING with MFD_HUGETLB will result in EINVAL. Of course, the memfd_man page would need updating if this type of functionality moves forward. Link: http://lkml.kernel.org/r/1502149672-7759-2-git-send-email-mike.kravetz@oracle.com Signed-off-by: Mike Kravetz Acked-by: Michal Hocko Cc: Hugh Dickins Cc: Andrea Arcangeli Cc: "Kirill A . Shutemov" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/memfd.h | 24 ++++++++++++++++++++++++ mm/shmem.c | 37 +++++++++++++++++++++++++++++++------ 2 files changed, 55 insertions(+), 6 deletions(-) (limited to 'include/uapi/linux') diff --git a/include/uapi/linux/memfd.h b/include/uapi/linux/memfd.h index 534e364bda92..7f3a722dbd72 100644 --- a/include/uapi/linux/memfd.h +++ b/include/uapi/linux/memfd.h @@ -1,8 +1,32 @@ #ifndef _UAPI_LINUX_MEMFD_H #define _UAPI_LINUX_MEMFD_H +#include + /* flags for memfd_create(2) (unsigned int) */ #define MFD_CLOEXEC 0x0001U #define MFD_ALLOW_SEALING 0x0002U +#define MFD_HUGETLB 0x0004U + +/* + * Huge page size encoding when MFD_HUGETLB is specified, and a huge page + * size other than the default is desired. See hugetlb_encode.h. + * All known huge page size encodings are provided here. It is the + * responsibility of the application to know which sizes are supported on + * the running system. See mmap(2) man page for details. + */ +#define MFD_HUGE_SHIFT HUGETLB_FLAG_ENCODE_SHIFT +#define MFD_HUGE_MASK HUGETLB_FLAG_ENCODE_MASK + +#define MFD_HUGE_64KB HUGETLB_FLAG_ENCODE_64KB +#define MFD_HUGE_512KB HUGETLB_FLAG_ENCODE_512KB +#define MFD_HUGE_1MB HUGETLB_FLAG_ENCODE_1MB +#define MFD_HUGE_2MB HUGETLB_FLAG_ENCODE_2MB +#define MFD_HUGE_8MB HUGETLB_FLAG_ENCODE_8MB +#define MFD_HUGE_16MB HUGETLB_FLAG_ENCODE_16MB +#define MFD_HUGE_256MB HUGETLB_FLAG_ENCODE_256MB +#define MFD_HUGE_1GB HUGETLB_FLAG_ENCODE_1GB +#define MFD_HUGE_2GB HUGETLB_FLAG_ENCODE_2GB +#define MFD_HUGE_16GB HUGETLB_FLAG_ENCODE_16GB #endif /* _UAPI_LINUX_MEMFD_H */ diff --git a/mm/shmem.c b/mm/shmem.c index 64bdc91187f7..47179bbe9ee7 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -34,6 +34,7 @@ #include #include #include +#include #include /* for arch/microblaze update_mmu_cache() */ @@ -3652,7 +3653,7 @@ static int shmem_show_options(struct seq_file *seq, struct dentry *root) #define MFD_NAME_PREFIX_LEN (sizeof(MFD_NAME_PREFIX) - 1) #define MFD_NAME_MAX_LEN (NAME_MAX - MFD_NAME_PREFIX_LEN) -#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING) +#define MFD_ALL_FLAGS (MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_HUGETLB) SYSCALL_DEFINE2(memfd_create, const char __user *, uname, @@ -3664,8 +3665,18 @@ SYSCALL_DEFINE2(memfd_create, char *name; long len; - if (flags & ~(unsigned int)MFD_ALL_FLAGS) - return -EINVAL; + if (!(flags & MFD_HUGETLB)) { + if (flags & ~(unsigned int)MFD_ALL_FLAGS) + return -EINVAL; + } else { + /* Sealing not supported in hugetlbfs (MFD_HUGETLB) */ + if (flags & MFD_ALLOW_SEALING) + return -EINVAL; + /* Allow huge page size encoding in flags. */ + if (flags & ~(unsigned int)(MFD_ALL_FLAGS | + (MFD_HUGE_MASK << MFD_HUGE_SHIFT))) + return -EINVAL; + } /* length includes terminating zero */ len = strnlen_user(uname, MFD_NAME_MAX_LEN + 1); @@ -3696,16 +3707,30 @@ SYSCALL_DEFINE2(memfd_create, goto err_name; } - file = shmem_file_setup(name, 0, VM_NORESERVE); + if (flags & MFD_HUGETLB) { + struct user_struct *user = NULL; + + file = hugetlb_file_setup(name, 0, VM_NORESERVE, &user, + HUGETLB_ANONHUGE_INODE, + (flags >> MFD_HUGE_SHIFT) & + MFD_HUGE_MASK); + } else + file = shmem_file_setup(name, 0, VM_NORESERVE); if (IS_ERR(file)) { error = PTR_ERR(file); goto err_fd; } - info = SHMEM_I(file_inode(file)); file->f_mode |= FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE; file->f_flags |= O_RDWR | O_LARGEFILE; - if (flags & MFD_ALLOW_SEALING) + + if (flags & MFD_ALLOW_SEALING) { + /* + * flags check at beginning of function ensures + * this is not a hugetlbfs (MFD_HUGETLB) file. + */ + info = SHMEM_I(file_inode(file)); info->seals &= ~F_SEAL_SEAL; + } fd_install(fd, file); kfree(name); -- cgit