diff options
Diffstat (limited to 'include/linux/sched.h')
-rw-r--r-- | include/linux/sched.h | 241 |
1 files changed, 144 insertions, 97 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index e9c009dc3a4a..451e241f32c5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -29,7 +29,6 @@ struct sched_param { #include <asm/page.h> #include <asm/ptrace.h> -#include <linux/cputime.h> #include <linux/smp.h> #include <linux/sem.h> @@ -227,7 +226,7 @@ extern void proc_sched_set_task(struct task_struct *p); extern char ___assert_task_state[1 - 2*!!( sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)]; -/* Convenience macros for the sake of set_task_state */ +/* Convenience macros for the sake of set_current_state */ #define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) #define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) #define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED) @@ -254,28 +253,6 @@ extern char ___assert_task_state[1 - 2*!!( #ifdef CONFIG_DEBUG_ATOMIC_SLEEP -#define __set_task_state(tsk, state_value) \ - do { \ - (tsk)->task_state_change = _THIS_IP_; \ - (tsk)->state = (state_value); \ - } while (0) -#define set_task_state(tsk, state_value) \ - do { \ - (tsk)->task_state_change = _THIS_IP_; \ - smp_store_mb((tsk)->state, (state_value)); \ - } while (0) - -/* - * set_current_state() includes a barrier so that the write of current->state - * is correctly serialised wrt the caller's subsequent test of whether to - * actually sleep: - * - * set_current_state(TASK_UNINTERRUPTIBLE); - * if (do_i_need_to_sleep()) - * schedule(); - * - * If the caller does not need such serialisation then use __set_current_state() - */ #define __set_current_state(state_value) \ do { \ current->task_state_change = _THIS_IP_; \ @@ -284,26 +261,43 @@ extern char ___assert_task_state[1 - 2*!!( #define set_current_state(state_value) \ do { \ current->task_state_change = _THIS_IP_; \ - smp_store_mb(current->state, (state_value)); \ + smp_store_mb(current->state, (state_value)); \ } while (0) #else - -#define __set_task_state(tsk, state_value) \ - do { (tsk)->state = (state_value); } while (0) -#define set_task_state(tsk, state_value) \ - smp_store_mb((tsk)->state, (state_value)) - /* * set_current_state() includes a barrier so that the write of current->state * is correctly serialised wrt the caller's subsequent test of whether to * actually sleep: * + * for (;;) { * set_current_state(TASK_UNINTERRUPTIBLE); - * if (do_i_need_to_sleep()) - * schedule(); + * if (!need_sleep) + * break; + * + * schedule(); + * } + * __set_current_state(TASK_RUNNING); + * + * If the caller does not need such serialisation (because, for instance, the + * condition test and condition change and wakeup are under the same lock) then + * use __set_current_state(). + * + * The above is typically ordered against the wakeup, which does: + * + * need_sleep = false; + * wake_up_state(p, TASK_UNINTERRUPTIBLE); * - * If the caller does not need such serialisation then use __set_current_state() + * Where wake_up_state() (and all other wakeup primitives) imply enough + * barriers to order the store of the variable against wakeup. + * + * Wakeup will do: if (@state & p->state) p->state = TASK_RUNNING, that is, + * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a + * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING). + * + * This is obviously fine, since they both store the exact same value. + * + * Also see the comments of try_to_wake_up(). */ #define __set_current_state(state_value) \ do { current->state = (state_value); } while (0) @@ -441,12 +435,10 @@ extern signed long schedule_timeout_idle(signed long timeout); asmlinkage void schedule(void); extern void schedule_preempt_disabled(void); +extern int __must_check io_schedule_prepare(void); +extern void io_schedule_finish(int token); extern long io_schedule_timeout(long timeout); - -static inline void io_schedule(void) -{ - io_schedule_timeout(MAX_SCHEDULE_TIMEOUT); -} +extern void io_schedule(void); void __noreturn do_task_dead(void); @@ -520,7 +512,11 @@ static inline int get_dumpable(struct mm_struct *mm) /* leave room for more dump flags */ #define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */ #define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */ -#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ +/* + * This one-shot flag is dropped due to necessity of changing exe once again + * on NFS restore + */ +//#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */ #define MMF_HAS_UPROBES 19 /* has uprobes */ #define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */ @@ -541,15 +537,13 @@ struct pacct_struct { int ac_flag; long ac_exitcode; unsigned long ac_mem; - cputime_t ac_utime, ac_stime; + u64 ac_utime, ac_stime; unsigned long ac_minflt, ac_majflt; }; struct cpu_itimer { - cputime_t expires; - cputime_t incr; - u32 error; - u32 incr_error; + u64 expires; + u64 incr; }; /** @@ -563,8 +557,8 @@ struct cpu_itimer { */ struct prev_cputime { #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - cputime_t utime; - cputime_t stime; + u64 utime; + u64 stime; raw_spinlock_t lock; #endif }; @@ -579,8 +573,8 @@ static inline void prev_cputime_init(struct prev_cputime *prev) /** * struct task_cputime - collected CPU time counts - * @utime: time spent in user mode, in &cputime_t units - * @stime: time spent in kernel mode, in &cputime_t units + * @utime: time spent in user mode, in nanoseconds + * @stime: time spent in kernel mode, in nanoseconds * @sum_exec_runtime: total time spent on the CPU, in nanoseconds * * This structure groups together three kinds of CPU time that are tracked for @@ -588,8 +582,8 @@ static inline void prev_cputime_init(struct prev_cputime *prev) * these counts together and treat all three of them in parallel. */ struct task_cputime { - cputime_t utime; - cputime_t stime; + u64 utime; + u64 stime; unsigned long long sum_exec_runtime; }; @@ -598,13 +592,6 @@ struct task_cputime { #define prof_exp stime #define sched_exp sum_exec_runtime -#define INIT_CPUTIME \ - (struct task_cputime) { \ - .utime = 0, \ - .stime = 0, \ - .sum_exec_runtime = 0, \ - } - /* * This is the atomic variant of task_cputime, which can be used for * storing and updating task_cputime statistics without locking. @@ -710,13 +697,14 @@ struct signal_struct { unsigned int is_child_subreaper:1; unsigned int has_child_subreaper:1; +#ifdef CONFIG_POSIX_TIMERS + /* POSIX.1b Interval Timers */ int posix_timer_id; struct list_head posix_timers; /* ITIMER_REAL timer for the process */ struct hrtimer real_timer; - struct pid *leader_pid; ktime_t it_real_incr; /* @@ -735,12 +723,16 @@ struct signal_struct { /* Earliest-expiration cache. */ struct task_cputime cputime_expires; + struct list_head cpu_timers[3]; + +#endif + + struct pid *leader_pid; + #ifdef CONFIG_NO_HZ_FULL atomic_t tick_dep_mask; #endif - struct list_head cpu_timers[3]; - struct pid *tty_old_pgrp; /* boolean value for session group leader */ @@ -758,9 +750,9 @@ struct signal_struct { * in __exit_signal, except for the group leader. */ seqlock_t stats_lock; - cputime_t utime, stime, cutime, cstime; - cputime_t gtime; - cputime_t cgtime; + u64 utime, stime, cutime, cstime; + u64 gtime; + u64 cgtime; struct prev_cputime prev_cputime; unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw; unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt; @@ -830,6 +822,16 @@ struct signal_struct { #define SIGNAL_UNKILLABLE 0x00000040 /* for init: ignore fatal signals */ +#define SIGNAL_STOP_MASK (SIGNAL_CLD_MASK | SIGNAL_STOP_STOPPED | \ + SIGNAL_STOP_CONTINUED) + +static inline void signal_set_stop_flags(struct signal_struct *sig, + unsigned int flags) +{ + WARN_ON(sig->flags & (SIGNAL_GROUP_EXIT|SIGNAL_GROUP_COREDUMP)); + sig->flags = (sig->flags & ~SIGNAL_STOP_MASK) | flags; +} + /* If true, all threads except ->group_exit_task have pending SIGKILL */ static inline int signal_group_exit(const struct signal_struct *sig) { @@ -844,10 +846,6 @@ struct user_struct { atomic_t __count; /* reference count */ atomic_t processes; /* How many processes does this user have? */ atomic_t sigpending; /* How many pending signals does this user have? */ -#ifdef CONFIG_INOTIFY_USER - atomic_t inotify_watches; /* How many inotify watches does this user have? */ - atomic_t inotify_devs; /* How many inotify devs does this user have opened? */ -#endif #ifdef CONFIG_FANOTIFY atomic_t fanotify_listeners; #endif @@ -989,10 +987,10 @@ enum cpu_idle_type { * already in a wake queue, the wakeup will happen soon and the second * waker can just skip it. * - * The WAKE_Q macro declares and initializes the list head. + * The DEFINE_WAKE_Q macro declares and initializes the list head. * wake_up_q() does NOT reinitialize the list; it's expected to be - * called near the end of a function, where the fact that the queue is - * not used again will be easy to see by inspection. + * called near the end of a function. Otherwise, the list can be + * re-initialized for later re-use by wake_q_init(). * * Note that this can cause spurious wakeups. schedule() callers * must ensure the call is done inside a loop, confirming that the @@ -1009,9 +1007,15 @@ struct wake_q_head { #define WAKE_Q_TAIL ((struct wake_q_node *) 0x01) -#define WAKE_Q(name) \ +#define DEFINE_WAKE_Q(name) \ struct wake_q_head name = { WAKE_Q_TAIL, &name.first } +static inline void wake_q_init(struct wake_q_head *head) +{ + head->first = WAKE_Q_TAIL; + head->lastp = &head->first; +} + extern void wake_q_add(struct wake_q_head *head, struct task_struct *task); extern void wake_up_q(struct wake_q_head *head); @@ -1057,6 +1061,8 @@ static inline int cpu_numa_flags(void) } #endif +extern int arch_asym_cpu_priority(int cpu); + struct sched_domain_attr { int relax_domain_level; }; @@ -1627,8 +1633,11 @@ struct task_struct { int __user *set_child_tid; /* CLONE_CHILD_SETTID */ int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */ - cputime_t utime, stime, utimescaled, stimescaled; - cputime_t gtime; + u64 utime, stime; +#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME + u64 utimescaled, stimescaled; +#endif + u64 gtime; struct prev_cputime prev_cputime; #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN seqcount_t vtime_seqcount; @@ -1652,10 +1661,13 @@ struct task_struct { /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ unsigned long min_flt, maj_flt; +#ifdef CONFIG_POSIX_TIMERS struct task_cputime cputime_expires; struct list_head cpu_timers[3]; +#endif /* process credentials */ + const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */ const struct cred __rcu *real_cred; /* objective and real subjective task * credentials (COW) */ const struct cred __rcu *cred; /* effective (overridable) subjective task @@ -1777,7 +1789,7 @@ struct task_struct { #if defined(CONFIG_TASK_XACCT) u64 acct_rss_mem1; /* accumulated rss usage */ u64 acct_vm_mem1; /* accumulated virtual memory usage */ - cputime_t acct_timexpd; /* stime + utime since last update */ + u64 acct_timexpd; /* stime + utime since last update */ #endif #ifdef CONFIG_CPUSETS nodemask_t mems_allowed; /* Protected by alloc_lock */ @@ -1791,6 +1803,9 @@ struct task_struct { /* cg_list protected by css_set_lock and tsk->alloc_lock */ struct list_head cg_list; #endif +#ifdef CONFIG_INTEL_RDT_A + int closid; +#endif #ifdef CONFIG_FUTEX struct robust_list_head __user *robust_list; #ifdef CONFIG_COMPAT @@ -2219,41 +2234,46 @@ struct task_struct *try_get_task_struct(struct task_struct **ptask); #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN extern void task_cputime(struct task_struct *t, - cputime_t *utime, cputime_t *stime); -extern void task_cputime_scaled(struct task_struct *t, - cputime_t *utimescaled, cputime_t *stimescaled); -extern cputime_t task_gtime(struct task_struct *t); + u64 *utime, u64 *stime); +extern u64 task_gtime(struct task_struct *t); #else static inline void task_cputime(struct task_struct *t, - cputime_t *utime, cputime_t *stime) + u64 *utime, u64 *stime) { - if (utime) - *utime = t->utime; - if (stime) - *stime = t->stime; + *utime = t->utime; + *stime = t->stime; } -static inline void task_cputime_scaled(struct task_struct *t, - cputime_t *utimescaled, - cputime_t *stimescaled) +static inline u64 task_gtime(struct task_struct *t) { - if (utimescaled) - *utimescaled = t->utimescaled; - if (stimescaled) - *stimescaled = t->stimescaled; + return t->gtime; } +#endif -static inline cputime_t task_gtime(struct task_struct *t) +#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME +static inline void task_cputime_scaled(struct task_struct *t, + u64 *utimescaled, + u64 *stimescaled) { - return t->gtime; + *utimescaled = t->utimescaled; + *stimescaled = t->stimescaled; +} +#else +static inline void task_cputime_scaled(struct task_struct *t, + u64 *utimescaled, + u64 *stimescaled) +{ + task_cputime(t, utimescaled, stimescaled); } #endif -extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st); -extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st); + +extern void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st); +extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st); /* * Per process flags */ +#define PF_IDLE 0x00000002 /* I am an IDLE thread */ #define PF_EXITING 0x00000004 /* getting shut down */ #define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ #define PF_VCPU 0x00000010 /* I'm a virtual CPU */ @@ -2444,6 +2464,10 @@ static inline void calc_load_enter_idle(void) { } static inline void calc_load_exit_idle(void) { } #endif /* CONFIG_NO_HZ_COMMON */ +#ifndef cpu_relax_yield +#define cpu_relax_yield() cpu_relax() +#endif + /* * Do not use outside of architecture code which knows its limitations. * @@ -2463,10 +2487,18 @@ extern u64 sched_clock_cpu(int cpu); extern void sched_clock_init(void); #ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK +static inline void sched_clock_init_late(void) +{ +} + static inline void sched_clock_tick(void) { } +static inline void clear_sched_clock_stable(void) +{ +} + static inline void sched_clock_idle_sleep_event(void) { } @@ -2485,6 +2517,7 @@ static inline u64 local_clock(void) return sched_clock(); } #else +extern void sched_clock_init_late(void); /* * Architectures can set this to 1 if they have specified * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig, @@ -2492,7 +2525,6 @@ static inline u64 local_clock(void) * is reliable after all: */ extern int sched_clock_stable(void); -extern void set_sched_clock_stable(void); extern void clear_sched_clock_stable(void); extern void sched_clock_tick(void); @@ -2611,7 +2643,7 @@ extern struct task_struct *idle_task(int cpu); */ static inline bool is_idle_task(const struct task_struct *p) { - return p->pid == 0; + return !!(p->flags & PF_IDLE); } extern struct task_struct *curr_task(int cpu); extern void ia64_set_curr_task(int cpu, struct task_struct *p); @@ -3015,6 +3047,9 @@ extern bool current_is_single_threaded(void); #define for_each_process_thread(p, t) \ for_each_process(p) for_each_thread(p, t) +typedef int (*proc_visitor)(struct task_struct *p, void *data); +void walk_process_tree(struct task_struct *top, proc_visitor, void *); + static inline int get_nr_threads(struct task_struct *tsk) { return tsk->signal->nr_threads; @@ -3508,6 +3543,18 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu) #endif /* CONFIG_SMP */ +/* + * In order to reduce various lock holder preemption latencies provide an + * interface to see if a vCPU is currently running or not. + * + * This allows us to terminate optimistic spin loops and block, analogous to + * the native optimistic spin heuristic of testing if the lock owner task is + * running or not. + */ +#ifndef vcpu_is_preempted +# define vcpu_is_preempted(cpu) false +#endif + extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask); extern long sched_getaffinity(pid_t pid, struct cpumask *mask); |