From 9746c9be0bb5860592e048468b37974be4c59d44 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 11 Jul 2020 06:45:36 -0500 Subject: exec: Remove unnecessary spaces from binfmts.h The general convention in the linux kernel is to define a pointer member as "type *name". The declaration of struct linux_binprm has several pointer defined as "type * name". Update them to the form of "type *name" for consistency. Suggested-by: Kees Cook Reviewed-by: Kees Cook Reviewed-by: Christoph Hellwig Link: https://lkml.kernel.org/r/87v9iq6x9x.fsf@x220.int.ebiederm.org Signed-off-by: "Eric W. Biederman" --- include/linux/binfmts.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 7c27d7b57871..eb5cb8df5485 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -45,15 +45,15 @@ struct linux_binprm { #ifdef __alpha__ unsigned int taso:1; #endif - struct file * executable; /* Executable to pass to the interpreter */ - struct file * interpreter; - struct file * file; + struct file *executable; /* Executable to pass to the interpreter */ + struct file *interpreter; + struct file *file; struct cred *cred; /* new credentials */ int unsafe; /* how unsafe this exec is (mask of LSM_UNSAFE_*) */ unsigned int per_clear; /* bits to clear in current->personality */ int argc, envc; - const char * filename; /* Name of binary as seen by procps */ - const char * interp; /* Name of the binary really executed. Most + const char *filename; /* Name of binary as seen by procps */ + const char *interp; /* Name of the binary really executed. Most of the time same as filename, but could be different for binfmt_{misc,script} */ unsigned interp_flags; -- cgit From 60d9ad1d1d7f15964d23f6e71a7adcf1bde0e18e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 11 Jul 2020 08:16:15 -0500 Subject: exec: Move initialization of bprm->filename into alloc_bprm Currently it is necessary for the usermode helper code and the code that launches init to use set_fs so that pages coming from the kernel look like they are coming from userspace. To allow that usage of set_fs to be removed cleanly the argument copying from userspace needs to happen earlier. Move the computation of bprm->filename and possible allocation of a name in the case of execveat into alloc_bprm to make that possible. The exectuable name, the arguments, and the environment are copied into the new usermode stack which is stored in bprm until exec passes the point of no return. As the executable name is copied first onto the usermode stack it needs to be known. As there are no dependencies to computing the executable name, compute it early in alloc_bprm. As an implementation detail if the filename needs to be generated because it embeds a file descriptor store that filename in a new field bprm->fdpath, and free it in free_bprm. Previously this was done in an independent variable pathbuf. I have renamed pathbuf fdpath because fdpath is more suggestive of what kind of path is in the variable. I moved fdpath into struct linux_binprm because it is tightly tied to the other variables in struct linux_binprm, and as such is needed to allow the call alloc_binprm to move. Reviewed-by: Kees Cook Reviewed-by: Christoph Hellwig Link: https://lkml.kernel.org/r/87k0z66x8f.fsf@x220.int.ebiederm.org Signed-off-by: "Eric W. Biederman" --- fs/exec.c | 61 ++++++++++++++++++++++++++----------------------- include/linux/binfmts.h | 1 + 2 files changed, 34 insertions(+), 28 deletions(-) (limited to 'include/linux') diff --git a/fs/exec.c b/fs/exec.c index 526156d6461d..7e8af27dd199 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1557,15 +1557,37 @@ static void free_bprm(struct linux_binprm *bprm) /* If a binfmt changed the interp, free it. */ if (bprm->interp != bprm->filename) kfree(bprm->interp); + kfree(bprm->fdpath); kfree(bprm); } -static struct linux_binprm *alloc_bprm(void) +static struct linux_binprm *alloc_bprm(int fd, struct filename *filename) { struct linux_binprm *bprm = kzalloc(sizeof(*bprm), GFP_KERNEL); + int retval = -ENOMEM; if (!bprm) - return ERR_PTR(-ENOMEM); + goto out; + + if (fd == AT_FDCWD || filename->name[0] == '/') { + bprm->filename = filename->name; + } else { + if (filename->name[0] == '\0') + bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d", fd); + else + bprm->fdpath = kasprintf(GFP_KERNEL, "/dev/fd/%d/%s", + fd, filename->name); + if (!bprm->fdpath) + goto out_free; + + bprm->filename = bprm->fdpath; + } + bprm->interp = bprm->filename; return bprm; + +out_free: + free_bprm(bprm); +out: + return ERR_PTR(retval); } int bprm_change_interp(const char *interp, struct linux_binprm *bprm) @@ -1831,7 +1853,6 @@ static int do_execveat_common(int fd, struct filename *filename, struct user_arg_ptr envp, int flags) { - char *pathbuf = NULL; struct linux_binprm *bprm; struct file *file; struct files_struct *displaced; @@ -1856,7 +1877,7 @@ static int do_execveat_common(int fd, struct filename *filename, * further execve() calls fail. */ current->flags &= ~PF_NPROC_EXCEEDED; - bprm = alloc_bprm(); + bprm = alloc_bprm(fd, filename); if (IS_ERR(bprm)) { retval = PTR_ERR(bprm); goto out_ret; @@ -1881,28 +1902,14 @@ static int do_execveat_common(int fd, struct filename *filename, sched_exec(); bprm->file = file; - if (fd == AT_FDCWD || filename->name[0] == '/') { - bprm->filename = filename->name; - } else { - if (filename->name[0] == '\0') - pathbuf = kasprintf(GFP_KERNEL, "/dev/fd/%d", fd); - else - pathbuf = kasprintf(GFP_KERNEL, "/dev/fd/%d/%s", - fd, filename->name); - if (!pathbuf) { - retval = -ENOMEM; - goto out_unmark; - } - /* - * Record that a name derived from an O_CLOEXEC fd will be - * inaccessible after exec. Relies on having exclusive access to - * current->files (due to unshare_files above). - */ - if (close_on_exec(fd, rcu_dereference_raw(current->files->fdt))) - bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE; - bprm->filename = pathbuf; - } - bprm->interp = bprm->filename; + /* + * Record that a name derived from an O_CLOEXEC fd will be + * inaccessible after exec. Relies on having exclusive access to + * current->files (due to unshare_files above). + */ + if (bprm->fdpath && + close_on_exec(fd, rcu_dereference_raw(current->files->fdt))) + bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE; retval = bprm_mm_init(bprm); if (retval) @@ -1941,7 +1948,6 @@ static int do_execveat_common(int fd, struct filename *filename, acct_update_integrals(current); task_numa_free(current, false); free_bprm(bprm); - kfree(pathbuf); putname(filename); if (displaced) put_files_struct(displaced); @@ -1970,7 +1976,6 @@ out_files: reset_files_struct(displaced); out_free: free_bprm(bprm); - kfree(pathbuf); out_ret: putname(filename); diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index eb5cb8df5485..8e9e1b0c8eb8 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -56,6 +56,7 @@ struct linux_binprm { const char *interp; /* Name of the binary really executed. Most of the time same as filename, but could be different for binfmt_{misc,script} */ + const char *fdpath; /* generated filename for execveat */ unsigned interp_flags; int execfd; /* File descriptor of the executable */ unsigned long loader, exec; -- cgit From be619f7f063a49c656f620a46af4f8ea3e759e91 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 13 Jul 2020 12:06:48 -0500 Subject: exec: Implement kernel_execve To allow the kernel not to play games with set_fs to call exec implement kernel_execve. The function kernel_execve takes pointers into kernel memory and copies the values pointed to onto the new userspace stack. The calls with arguments from kernel space of do_execve are replaced with calls to kernel_execve. The calls do_execve and do_execveat are made static as there are now no callers outside of exec. The comments that mention do_execve are updated to refer to kernel_execve or execve depending on the circumstances. In addition to correcting the comments, this makes it easy to grep for do_execve and verify it is not used. Inspired-by: https://lkml.kernel.org/r/20200627072704.2447163-1-hch@lst.de Reviewed-by: Kees Cook Link: https://lkml.kernel.org/r/87wo365ikj.fsf@x220.int.ebiederm.org Signed-off-by: "Eric W. Biederman" --- arch/x86/entry/entry_32.S | 2 +- arch/x86/entry/entry_64.S | 2 +- arch/x86/kernel/unwind_frame.c | 2 +- fs/exec.c | 88 +++++++++++++++++++++++++++++++++++++++++- include/linux/binfmts.h | 9 +---- init/main.c | 4 +- kernel/umh.c | 6 +-- security/tomoyo/common.h | 2 +- security/tomoyo/domain.c | 4 +- security/tomoyo/tomoyo.c | 4 +- 10 files changed, 100 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 024d7d276cd4..8f4e085ee06d 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -854,7 +854,7 @@ SYM_CODE_START(ret_from_fork) CALL_NOSPEC ebx /* * A kernel thread is allowed to return here after successfully - * calling do_execve(). Exit to userspace to complete the execve() + * calling kernel_execve(). Exit to userspace to complete the execve() * syscall. */ movl $0, PT_EAX(%esp) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index d2a00c97e53f..73c7e255256b 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -293,7 +293,7 @@ SYM_CODE_START(ret_from_fork) CALL_NOSPEC rbx /* * A kernel thread is allowed to return here after successfully - * calling do_execve(). Exit to userspace to complete the execve() + * calling kernel_execve(). Exit to userspace to complete the execve() * syscall. */ movq $0, RAX(%rsp) diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index 722a85f3b2dd..e40b4942157f 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -275,7 +275,7 @@ bool unwind_next_frame(struct unwind_state *state) * This user_mode() check is slightly broader than a PF_KTHREAD * check because it also catches the awkward situation where a * newly forked kthread transitions into a user task by calling - * do_execve(), which eventually clears PF_KTHREAD. + * kernel_execve(), which eventually clears PF_KTHREAD. */ if (!user_mode(regs)) goto the_end; diff --git a/fs/exec.c b/fs/exec.c index f8135dc149b3..3698252719a3 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -448,6 +448,23 @@ static int count(struct user_arg_ptr argv, int max) return i; } +static int count_strings_kernel(const char *const *argv) +{ + int i; + + if (!argv) + return 0; + + for (i = 0; argv[i]; ++i) { + if (i >= MAX_ARG_STRINGS) + return -E2BIG; + if (fatal_signal_pending(current)) + return -ERESTARTNOHAND; + cond_resched(); + } + return i; +} + static int bprm_stack_limits(struct linux_binprm *bprm) { unsigned long limit, ptr_size; @@ -624,6 +641,20 @@ int copy_string_kernel(const char *arg, struct linux_binprm *bprm) } EXPORT_SYMBOL(copy_string_kernel); +static int copy_strings_kernel(int argc, const char *const *argv, + struct linux_binprm *bprm) +{ + while (argc-- > 0) { + int ret = copy_string_kernel(argv[argc], bprm); + if (ret < 0) + return ret; + if (fatal_signal_pending(current)) + return -ERESTARTNOHAND; + cond_resched(); + } + return 0; +} + #ifdef CONFIG_MMU /* @@ -1991,7 +2022,60 @@ out_ret: return retval; } -int do_execve(struct filename *filename, +int kernel_execve(const char *kernel_filename, + const char *const *argv, const char *const *envp) +{ + struct filename *filename; + struct linux_binprm *bprm; + int fd = AT_FDCWD; + int retval; + + filename = getname_kernel(kernel_filename); + if (IS_ERR(filename)) + return PTR_ERR(filename); + + bprm = alloc_bprm(fd, filename); + if (IS_ERR(bprm)) { + retval = PTR_ERR(bprm); + goto out_ret; + } + + retval = count_strings_kernel(argv); + if (retval < 0) + goto out_free; + bprm->argc = retval; + + retval = count_strings_kernel(envp); + if (retval < 0) + goto out_free; + bprm->envc = retval; + + retval = bprm_stack_limits(bprm); + if (retval < 0) + goto out_free; + + retval = copy_string_kernel(bprm->filename, bprm); + if (retval < 0) + goto out_free; + bprm->exec = bprm->p; + + retval = copy_strings_kernel(bprm->envc, envp, bprm); + if (retval < 0) + goto out_free; + + retval = copy_strings_kernel(bprm->argc, argv, bprm); + if (retval < 0) + goto out_free; + + retval = bprm_execve(bprm, fd, filename, 0); +out_free: + free_bprm(bprm); +out_ret: + putname(filename); + return retval; +} + +static int do_execve(struct filename *filename, const char __user *const __user *__argv, const char __user *const __user *__envp) { @@ -2000,7 +2084,7 @@ int do_execve(struct filename *filename, return do_execveat_common(AT_FDCWD, filename, argv, envp, 0); } -int do_execveat(int fd, struct filename *filename, +static int do_execveat(int fd, struct filename *filename, const char __user *const __user *__argv, const char __user *const __user *__envp, int flags) diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 8e9e1b0c8eb8..0571701ab1c5 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -135,12 +135,7 @@ int copy_string_kernel(const char *arg, struct linux_binprm *bprm); extern void set_binfmt(struct linux_binfmt *new); extern ssize_t read_code(struct file *, unsigned long, loff_t, size_t); -extern int do_execve(struct filename *, - const char __user * const __user *, - const char __user * const __user *); -extern int do_execveat(int, struct filename *, - const char __user * const __user *, - const char __user * const __user *, - int); +int kernel_execve(const char *filename, + const char *const *argv, const char *const *envp); #endif /* _LINUX_BINFMTS_H */ diff --git a/init/main.c b/init/main.c index 0ead83e86b5a..78ccec5c28f3 100644 --- a/init/main.c +++ b/init/main.c @@ -1329,9 +1329,7 @@ static int run_init_process(const char *init_filename) pr_debug(" with environment:\n"); for (p = envp_init; *p; p++) pr_debug(" %s\n", *p); - return do_execve(getname_kernel(init_filename), - (const char __user *const __user *)argv_init, - (const char __user *const __user *)envp_init); + return kernel_execve(init_filename, argv_init, envp_init); } static int try_to_run_init_process(const char *init_filename) diff --git a/kernel/umh.c b/kernel/umh.c index 6ca2096298b9..a25433f9cd9a 100644 --- a/kernel/umh.c +++ b/kernel/umh.c @@ -98,9 +98,9 @@ static int call_usermodehelper_exec_async(void *data) commit_creds(new); - retval = do_execve(getname_kernel(sub_info->path), - (const char __user *const __user *)sub_info->argv, - (const char __user *const __user *)sub_info->envp); + retval = kernel_execve(sub_info->path, + (const char *const *)sub_info->argv, + (const char *const *)sub_info->envp); out: sub_info->retval = retval; /* diff --git a/security/tomoyo/common.h b/security/tomoyo/common.h index 050473df5809..85246b9df7ca 100644 --- a/security/tomoyo/common.h +++ b/security/tomoyo/common.h @@ -425,7 +425,7 @@ struct tomoyo_request_info { struct tomoyo_obj_info *obj; /* * For holding parameters specific to execve() request. - * NULL if not dealing do_execve(). + * NULL if not dealing execve(). */ struct tomoyo_execve *ee; struct tomoyo_domain_info *domain; diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c index 7869d6a9980b..53b3e1f5f227 100644 --- a/security/tomoyo/domain.c +++ b/security/tomoyo/domain.c @@ -767,7 +767,7 @@ retry: /* * Check for domain transition preference if "file execute" matched. - * If preference is given, make do_execve() fail if domain transition + * If preference is given, make execve() fail if domain transition * has failed, for domain transition preference should be used with * destination domain defined. */ @@ -810,7 +810,7 @@ force_reset_domain: snprintf(ee->tmp, TOMOYO_EXEC_TMPSIZE - 1, "<%s>", candidate->name); /* - * Make do_execve() fail if domain transition across namespaces + * Make execve() fail if domain transition across namespaces * has failed. */ reject_on_transition_failure = true; diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index f9adddc42ac8..1f3cd432d830 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -93,7 +93,7 @@ static int tomoyo_bprm_check_security(struct linux_binprm *bprm) struct tomoyo_task *s = tomoyo_task(current); /* - * Execute permission is checked against pathname passed to do_execve() + * Execute permission is checked against pathname passed to execve() * using current domain. */ if (!s->old_domain_info) { @@ -307,7 +307,7 @@ static int tomoyo_file_fcntl(struct file *file, unsigned int cmd, */ static int tomoyo_file_open(struct file *f) { - /* Don't check read permission here if called from do_execve(). */ + /* Don't check read permission here if called from execve(). */ if (current->in_execve) return 0; return tomoyo_check_open_permission(tomoyo_domain(), &f->f_path, -- cgit