diff options
Diffstat (limited to 'kernel/exit.c')
| -rw-r--r-- | kernel/exit.c | 419 |
1 files changed, 231 insertions, 188 deletions
diff --git a/kernel/exit.c b/kernel/exit.c index 8f14b866f9f6..c5548faa9f37 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -6,6 +6,12 @@ #include <linux/mm.h> #include <linux/slab.h> +#include <linux/sched/autogroup.h> +#include <linux/sched/mm.h> +#include <linux/sched/stat.h> +#include <linux/sched/task.h> +#include <linux/sched/task_stack.h> +#include <linux/sched/cputime.h> #include <linux/interrupt.h> #include <linux/module.h> #include <linux/capability.h> @@ -14,7 +20,6 @@ #include <linux/tty.h> #include <linux/iocontext.h> #include <linux/key.h> -#include <linux/security.h> #include <linux/cpu.h> #include <linux/acct.h> #include <linux/tsacct_kern.h> @@ -55,6 +60,8 @@ #include <linux/shm.h> #include <linux/kcov.h> #include <linux/random.h> +#include <linux/rcuwait.h> +#include <linux/compat.h> #include <linux/uaccess.h> #include <asm/unistd.h> @@ -86,7 +93,7 @@ static void __exit_signal(struct task_struct *tsk) bool group_dead = thread_group_leader(tsk); struct sighand_struct *sighand; struct tty_struct *uninitialized_var(tty); - cputime_t utime, stime; + u64 utime, stime; sighand = rcu_dereference_check(tsk->sighand, lockdep_tasklist_lock_is_held()); @@ -282,17 +289,33 @@ retry: return task; } -struct task_struct *try_get_task_struct(struct task_struct **ptask) +void rcuwait_wake_up(struct rcuwait *w) { struct task_struct *task; rcu_read_lock(); - task = task_rcu_dereference(ptask); + + /* + * Order condition vs @task, such that everything prior to the load + * of @task is visible. This is the condition as to why the user called + * rcuwait_trywake() in the first place. Pairs with set_current_state() + * barrier (A) in rcuwait_wait_event(). + * + * WAIT WAKE + * [S] tsk = current [S] cond = true + * MB (A) MB (B) + * [L] cond [L] tsk + */ + smp_rmb(); /* (B) */ + + /* + * Avoid using task_rcu_dereference() magic as long as we are careful, + * see comment in rcuwait_wait_event() regarding ->exit_state. + */ + task = rcu_dereference(w->task); if (task) - get_task_struct(task); + wake_up_process(task); rcu_read_unlock(); - - return task; } /* @@ -468,12 +491,12 @@ assign_new_owner: * Turn us into a lazy TLB process if we * aren't already.. */ -static void exit_mm(struct task_struct *tsk) +static void exit_mm(void) { - struct mm_struct *mm = tsk->mm; + struct mm_struct *mm = current->mm; struct core_state *core_state; - mm_release(tsk, mm); + mm_release(current, mm); if (!mm) return; sync_mm_rss(mm); @@ -491,7 +514,7 @@ static void exit_mm(struct task_struct *tsk) up_read(&mm->mmap_sem); - self.task = tsk; + self.task = current; self.next = xchg(&core_state->dumper.next, &self); /* * Implies mb(), the result of xchg() must be visible @@ -501,22 +524,22 @@ static void exit_mm(struct task_struct *tsk) complete(&core_state->startup); for (;;) { - set_task_state(tsk, TASK_UNINTERRUPTIBLE); + set_current_state(TASK_UNINTERRUPTIBLE); if (!self.task) /* see coredump_finish() */ break; freezable_schedule(); } - __set_task_state(tsk, TASK_RUNNING); + __set_current_state(TASK_RUNNING); down_read(&mm->mmap_sem); } - atomic_inc(&mm->mm_count); - BUG_ON(mm != tsk->active_mm); + mmgrab(mm); + BUG_ON(mm != current->active_mm); /* more a memory barrier than a real lock */ - task_lock(tsk); - tsk->mm = NULL; + task_lock(current); + current->mm = NULL; up_read(&mm->mmap_sem); enter_lazy_tlb(mm, current); - task_unlock(tsk); + task_unlock(current); mm_update_next_owner(mm); mmput(mm); if (test_thread_flag(TIF_MEMDIE)) @@ -578,15 +601,18 @@ static struct task_struct *find_new_reaper(struct task_struct *father, return thread; if (father->signal->has_child_subreaper) { + unsigned int ns_level = task_pid(father)->level; /* * Find the first ->is_child_subreaper ancestor in our pid_ns. - * We start from father to ensure we can not look into another - * namespace, this is safe because all its threads are dead. + * We can't check reaper != child_reaper to ensure we do not + * cross the namespaces, the exiting parent could be injected + * by setns() + fork(). + * We check pid->level, this is slightly more efficient than + * task_active_pid_ns(reaper) != task_active_pid_ns(father). */ - for (reaper = father; - !same_thread_group(reaper, child_reaper); + for (reaper = father->real_parent; + task_pid(reaper)->level == ns_level; reaper = reaper->real_parent) { - /* call_usermodehelper() descendants need this check */ if (reaper == &init_task) break; if (!reaper->signal->is_child_subreaper) @@ -823,7 +849,7 @@ void __noreturn do_exit(long code) tsk->exit_code = code; taskstats_exit(tsk, group_dead); - exit_mm(tsk); + exit_mm(); if (group_dead) acct_process(); @@ -956,16 +982,23 @@ SYSCALL_DEFINE1(exit_group, int, error_code) return 0; } +struct waitid_info { + pid_t pid; + uid_t uid; + int status; + int cause; +}; + struct wait_opts { enum pid_type wo_type; int wo_flags; struct pid *wo_pid; - struct siginfo __user *wo_info; - int __user *wo_stat; - struct rusage __user *wo_rusage; + struct waitid_info *wo_info; + int wo_stat; + struct rusage *wo_rusage; - wait_queue_t child_wait; + wait_queue_entry_t child_wait; int notask_error; }; @@ -1010,34 +1043,6 @@ eligible_child(struct wait_opts *wo, bool ptrace, struct task_struct *p) return 1; } -static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p, - pid_t pid, uid_t uid, int why, int status) -{ - struct siginfo __user *infop; - int retval = wo->wo_rusage - ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; - - put_task_struct(p); - infop = wo->wo_info; - if (infop) { - if (!retval) - retval = put_user(SIGCHLD, &infop->si_signo); - if (!retval) - retval = put_user(0, &infop->si_errno); - if (!retval) - retval = put_user((short)why, &infop->si_code); - if (!retval) - retval = put_user(pid, &infop->si_pid); - if (!retval) - retval = put_user(uid, &infop->si_uid); - if (!retval) - retval = put_user(status, &infop->si_status); - } - if (!retval) - retval = pid; - return retval; -} - /* * Handle sys_wait4 work for one task in state EXIT_ZOMBIE. We hold * read_lock(&tasklist_lock) on entry. If we return zero, we still hold @@ -1046,30 +1051,23 @@ static int wait_noreap_copyout(struct wait_opts *wo, struct task_struct *p, */ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) { - int state, retval, status; + int state, status; pid_t pid = task_pid_vnr(p); uid_t uid = from_kuid_munged(current_user_ns(), task_uid(p)); - struct siginfo __user *infop; + struct waitid_info *infop; if (!likely(wo->wo_flags & WEXITED)) return 0; if (unlikely(wo->wo_flags & WNOWAIT)) { - int exit_code = p->exit_code; - int why; - + status = p->exit_code; get_task_struct(p); read_unlock(&tasklist_lock); sched_annotate_sleep(); - - if ((exit_code & 0x7f) == 0) { - why = CLD_EXITED; - status = exit_code >> 8; - } else { - why = (exit_code & 0x80) ? CLD_DUMPED : CLD_KILLED; - status = exit_code & 0x7f; - } - return wait_noreap_copyout(wo, p, pid, uid, why, status); + if (wo->wo_rusage) + getrusage(p, RUSAGE_BOTH, wo->wo_rusage); + put_task_struct(p); + goto out_info; } /* * Move the task's state to DEAD/TRACE, only one thread can do this. @@ -1091,7 +1089,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) struct signal_struct *sig = p->signal; struct signal_struct *psig = current->signal; unsigned long maxrss; - cputime_t tgutime, tgstime; + u64 tgutime, tgstime; /* * The resource counters for the group leader are in its @@ -1142,38 +1140,11 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) spin_unlock_irq(¤t->sighand->siglock); } - retval = wo->wo_rusage - ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; + if (wo->wo_rusage) + getrusage(p, RUSAGE_BOTH, wo->wo_rusage); status = (p->signal->flags & SIGNAL_GROUP_EXIT) ? p->signal->group_exit_code : p->exit_code; - if (!retval && wo->wo_stat) - retval = put_user(status, wo->wo_stat); - - infop = wo->wo_info; - if (!retval && infop) - retval = put_user(SIGCHLD, &infop->si_signo); - if (!retval && infop) - retval = put_user(0, &infop->si_errno); - if (!retval && infop) { - int why; - - if ((status & 0x7f) == 0) { - why = CLD_EXITED; - status >>= 8; - } else { - why = (status & 0x80) ? CLD_DUMPED : CLD_KILLED; - status &= 0x7f; - } - retval = put_user((short)why, &infop->si_code); - if (!retval) - retval = put_user(status, &infop->si_status); - } - if (!retval && infop) - retval = put_user(pid, &infop->si_pid); - if (!retval && infop) - retval = put_user(uid, &infop->si_uid); - if (!retval) - retval = pid; + wo->wo_stat = status; if (state == EXIT_TRACE) { write_lock_irq(&tasklist_lock); @@ -1190,7 +1161,21 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) if (state == EXIT_DEAD) release_task(p); - return retval; +out_info: + infop = wo->wo_info; + if (infop) { + if ((status & 0x7f) == 0) { + infop->cause = CLD_EXITED; + infop->status = status >> 8; + } else { + infop->cause = (status & 0x80) ? CLD_DUMPED : CLD_KILLED; + infop->status = status & 0x7f; + } + infop->pid = pid; + infop->uid = uid; + } + + return pid; } static int *task_stopped_code(struct task_struct *p, bool ptrace) @@ -1226,8 +1211,8 @@ static int *task_stopped_code(struct task_struct *p, bool ptrace) static int wait_task_stopped(struct wait_opts *wo, int ptrace, struct task_struct *p) { - struct siginfo __user *infop; - int retval, exit_code, *p_code, why; + struct waitid_info *infop; + int exit_code, *p_code, why; uid_t uid = 0; /* unneeded, required by compiler */ pid_t pid; @@ -1272,34 +1257,21 @@ unlock_sig: why = ptrace ? CLD_TRAPPED : CLD_STOPPED; read_unlock(&tasklist_lock); sched_annotate_sleep(); + if (wo->wo_rusage) + getrusage(p, RUSAGE_BOTH, wo->wo_rusage); + put_task_struct(p); - if (unlikely(wo->wo_flags & WNOWAIT)) - return wait_noreap_copyout(wo, p, pid, uid, why, exit_code); - - retval = wo->wo_rusage - ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; - if (!retval && wo->wo_stat) - retval = put_user((exit_code << 8) | 0x7f, wo->wo_stat); + if (likely(!(wo->wo_flags & WNOWAIT))) + wo->wo_stat = (exit_code << 8) | 0x7f; infop = wo->wo_info; - if (!retval && infop) - retval = put_user(SIGCHLD, &infop->si_signo); - if (!retval && infop) - retval = put_user(0, &infop->si_errno); - if (!retval && infop) - retval = put_user((short)why, &infop->si_code); - if (!retval && infop) - retval = put_user(exit_code, &infop->si_status); - if (!retval && infop) - retval = put_user(pid, &infop->si_pid); - if (!retval && infop) - retval = put_user(uid, &infop->si_uid); - if (!retval) - retval = pid; - put_task_struct(p); - - BUG_ON(!retval); - return retval; + if (infop) { + infop->cause = why; + infop->status = exit_code; + infop->pid = pid; + infop->uid = uid; + } + return pid; } /* @@ -1310,7 +1282,7 @@ unlock_sig: */ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p) { - int retval; + struct waitid_info *infop; pid_t pid; uid_t uid; @@ -1335,22 +1307,20 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p) get_task_struct(p); read_unlock(&tasklist_lock); sched_annotate_sleep(); + if (wo->wo_rusage) + getrusage(p, RUSAGE_BOTH, wo->wo_rusage); + put_task_struct(p); - if (!wo->wo_info) { - retval = wo->wo_rusage - ? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0; - put_task_struct(p); - if (!retval && wo->wo_stat) - retval = put_user(0xffff, wo->wo_stat); - if (!retval) - retval = pid; + infop = wo->wo_info; + if (!infop) { + wo->wo_stat = 0xffff; } else { - retval = wait_noreap_copyout(wo, p, pid, uid, - CLD_CONTINUED, SIGCONT); - BUG_ON(retval == 0); + infop->cause = CLD_CONTINUED; + infop->pid = pid; + infop->uid = uid; + infop->status = SIGCONT; } - - return retval; + return pid; } /* @@ -1360,7 +1330,7 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p) * Returns nonzero for a final return, when we have unlocked tasklist_lock. * Returns zero if the search for a child should continue; * then ->notask_error is 0 if @p is an eligible child, - * or another error from security_task_wait(), or still -ECHILD. + * or still -ECHILD. */ static int wait_consider_task(struct wait_opts *wo, int ptrace, struct task_struct *p) @@ -1380,20 +1350,6 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace, if (!ret) return ret; - ret = security_task_wait(p); - if (unlikely(ret < 0)) { - /* - * If we have not yet seen any eligible child, - * then let this error code replace -ECHILD. - * A permission error will give the user a clue - * to look for security policy problems, rather - * than for mysterious wait bugs. - */ - if (wo->notask_error) - wo->notask_error = ret; - return 0; - } - if (unlikely(exit_state == EXIT_TRACE)) { /* * ptrace == 0 means we are the natural parent. In this case @@ -1486,7 +1442,7 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace, * Returns nonzero for a final return, when we have unlocked tasklist_lock. * Returns zero if the search for a child should continue; then * ->notask_error is 0 if there were any eligible children, - * or another error from security_task_wait(), or still -ECHILD. + * or still -ECHILD. */ static int do_wait_thread(struct wait_opts *wo, struct task_struct *tsk) { @@ -1516,7 +1472,7 @@ static int ptrace_do_wait(struct wait_opts *wo, struct task_struct *tsk) return 0; } -static int child_wait_callback(wait_queue_t *wait, unsigned mode, +static int child_wait_callback(wait_queue_entry_t *wait, unsigned mode, int sync, void *key) { struct wait_opts *wo = container_of(wait, struct wait_opts, @@ -1592,8 +1548,8 @@ end: return retval; } -SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, - infop, int, options, struct rusage __user *, ru) +static long kernel_waitid(int which, pid_t upid, struct waitid_info *infop, + int options, struct rusage *ru) { struct wait_opts wo; struct pid *pid = NULL; @@ -1631,38 +1587,48 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, wo.wo_pid = pid; wo.wo_flags = options; wo.wo_info = infop; - wo.wo_stat = NULL; wo.wo_rusage = ru; ret = do_wait(&wo); - if (ret > 0) { - ret = 0; - } else if (infop) { - /* - * For a WNOHANG return, clear out all the fields - * we would set so the user can easily tell the - * difference. - */ - if (!ret) - ret = put_user(0, &infop->si_signo); - if (!ret) - ret = put_user(0, &infop->si_errno); - if (!ret) - ret = put_user(0, &infop->si_code); - if (!ret) - ret = put_user(0, &infop->si_pid); - if (!ret) - ret = put_user(0, &infop->si_uid); - if (!ret) - ret = put_user(0, &infop->si_status); - } - put_pid(pid); return ret; } -SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, - int, options, struct rusage __user *, ru) +SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, + infop, int, options, struct rusage __user *, ru) +{ + struct rusage r; + struct waitid_info info = {.status = 0}; + long err = kernel_waitid(which, upid, &info, options, ru ? &r : NULL); + int signo = 0; + if (err > 0) { + signo = SIGCHLD; + err = 0; + } + + if (!err) { + if (ru && copy_to_user(ru, &r, sizeof(struct rusage))) + return -EFAULT; + } + if (!infop) + return err; + + user_access_begin(); + unsafe_put_user(signo, &infop->si_signo, Efault); + unsafe_put_user(0, &infop->si_errno, Efault); + unsafe_put_user((short)info.cause, &infop->si_code, Efault); + unsafe_put_user(info.pid, &infop->si_pid, Efault); + unsafe_put_user(info.uid, &infop->si_uid, Efault); + unsafe_put_user(info.status, &infop->si_status, Efault); + user_access_end(); + return err; +Efault: + user_access_end(); + return -EFAULT; +} + +long kernel_wait4(pid_t upid, int __user *stat_addr, int options, + struct rusage *ru) { struct wait_opts wo; struct pid *pid = NULL; @@ -1673,6 +1639,10 @@ SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, __WNOTHREAD|__WCLONE|__WALL)) return -EINVAL; + /* -INT_MIN is not defined */ + if (upid == INT_MIN) + return -ESRCH; + if (upid == -1) type = PIDTYPE_MAX; else if (upid < 0) { @@ -1690,14 +1660,29 @@ SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, wo.wo_pid = pid; wo.wo_flags = options | WEXITED; wo.wo_info = NULL; - wo.wo_stat = stat_addr; + wo.wo_stat = 0; wo.wo_rusage = ru; ret = do_wait(&wo); put_pid(pid); + if (ret > 0 && stat_addr && put_user(wo.wo_stat, stat_addr)) + ret = -EFAULT; return ret; } +SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, + int, options, struct rusage __user *, ru) +{ + struct rusage r; + long err = kernel_wait4(upid, stat_addr, options, ru ? &r : NULL); + + if (err > 0) { + if (ru && copy_to_user(ru, &r, sizeof(struct rusage))) + return -EFAULT; + } + return err; +} + #ifdef __ARCH_WANT_SYS_WAITPID /* @@ -1710,3 +1695,61 @@ SYSCALL_DEFINE3(waitpid, pid_t, pid, int __user *, stat_addr, int, options) } #endif + +#ifdef CONFIG_COMPAT +COMPAT_SYSCALL_DEFINE4(wait4, + compat_pid_t, pid, + compat_uint_t __user *, stat_addr, + int, options, + struct compat_rusage __user *, ru) +{ + struct rusage r; + long err = kernel_wait4(pid, stat_addr, options, ru ? &r : NULL); + if (err > 0) { + if (ru && put_compat_rusage(&r, ru)) + return -EFAULT; + } + return err; +} + +COMPAT_SYSCALL_DEFINE5(waitid, + int, which, compat_pid_t, pid, + struct compat_siginfo __user *, infop, int, options, + struct compat_rusage __user *, uru) +{ + struct rusage ru; + struct waitid_info info = {.status = 0}; + long err = kernel_waitid(which, pid, &info, options, uru ? &ru : NULL); + int signo = 0; + if (err > 0) { + signo = SIGCHLD; + err = 0; + } + + if (!err && uru) { + /* kernel_waitid() overwrites everything in ru */ + if (COMPAT_USE_64BIT_TIME) + err = copy_to_user(uru, &ru, sizeof(ru)); + else + err = put_compat_rusage(&ru, uru); + if (err) + return -EFAULT; + } + + if (!infop) + return err; + + user_access_begin(); + unsafe_put_user(signo, &infop->si_signo, Efault); + unsafe_put_user(0, &infop->si_errno, Efault); + unsafe_put_user((short)info.cause, &infop->si_code, Efault); + unsafe_put_user(info.pid, &infop->si_pid, Efault); + unsafe_put_user(info.uid, &infop->si_uid, Efault); + unsafe_put_user(info.status, &infop->si_status, Efault); + user_access_end(); + return err; +Efault: + user_access_end(); + return -EFAULT; +} +#endif |