aboutsummaryrefslogtreecommitdiff
path: root/include/linux/sched.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/sched.h')
-rw-r--r--include/linux/sched.h244
1 files changed, 231 insertions, 13 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6e42ada26345..348f51b0ec92 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -219,9 +219,10 @@ extern void proc_sched_set_task(struct task_struct *p);
#define TASK_WAKING 256
#define TASK_PARKED 512
#define TASK_NOLOAD 1024
-#define TASK_STATE_MAX 2048
+#define TASK_NEW 2048
+#define TASK_STATE_MAX 4096
-#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPN"
+#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNn"
extern char ___assert_task_state[1 - 2*!!(
sizeof(TASK_STATE_TO_CHAR_STR)-1 != ilog2(TASK_STATE_MAX)+1)];
@@ -447,6 +448,8 @@ static inline void io_schedule(void)
io_schedule_timeout(MAX_SCHEDULE_TIMEOUT);
}
+void __noreturn do_task_dead(void);
+
struct nsproxy;
struct user_namespace;
@@ -521,7 +524,9 @@ static inline int get_dumpable(struct mm_struct *mm)
#define MMF_HAS_UPROBES 19 /* has uprobes */
#define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */
-#define MMF_OOM_REAPED 21 /* mm has been already reaped */
+#define MMF_OOM_SKIP 21 /* mm is of no interest for the OOM killer */
+#define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */
+#define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */
#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK)
@@ -669,7 +674,6 @@ struct signal_struct {
atomic_t sigcnt;
atomic_t live;
int nr_threads;
- atomic_t oom_victims; /* # of TIF_MEDIE threads in this thread group */
struct list_head thread_head;
wait_queue_head_t wait_chldexit; /* for wait4() */
@@ -802,6 +806,8 @@ struct signal_struct {
short oom_score_adj; /* OOM kill score adjustment */
short oom_score_adj_min; /* OOM kill score adjustment min value.
* Only settable by CAP_SYS_RESOURCE. */
+ struct mm_struct *oom_mm; /* recorded mm when the thread group got
+ * killed by the oom killer */
struct mutex cred_guard_mutex; /* guard against foreign influences on
* credential calculations
@@ -1020,7 +1026,8 @@ extern void wake_up_q(struct wake_q_head *head);
#define SD_BALANCE_FORK 0x0008 /* Balance on fork, clone */
#define SD_BALANCE_WAKE 0x0010 /* Balance on wakeup */
#define SD_WAKE_AFFINE 0x0020 /* Wake task to waking CPU */
-#define SD_SHARE_CPUCAPACITY 0x0080 /* Domain members share cpu power */
+#define SD_ASYM_CPUCAPACITY 0x0040 /* Groups have different max cpu capacities */
+#define SD_SHARE_CPUCAPACITY 0x0080 /* Domain members share cpu capacity */
#define SD_SHARE_POWERDOMAIN 0x0100 /* Domain members share power domain */
#define SD_SHARE_PKG_RESOURCES 0x0200 /* Domain members share cpu pkg resources */
#define SD_SERIALIZE 0x0400 /* Only a single load balancing instance */
@@ -1062,6 +1069,12 @@ extern int sched_domain_level_max;
struct sched_group;
+struct sched_domain_shared {
+ atomic_t ref;
+ atomic_t nr_busy_cpus;
+ int has_idle_cores;
+};
+
struct sched_domain {
/* These fields must be setup */
struct sched_domain *parent; /* top domain must be null terminated */
@@ -1092,6 +1105,8 @@ struct sched_domain {
u64 max_newidle_lb_cost;
unsigned long next_decay_max_lb_cost;
+ u64 avg_scan_cost; /* select_idle_sibling */
+
#ifdef CONFIG_SCHEDSTATS
/* load_balance() stats */
unsigned int lb_count[CPU_MAX_IDLE_TYPES];
@@ -1130,6 +1145,7 @@ struct sched_domain {
void *private; /* used during construction */
struct rcu_head rcu; /* used during destruction */
};
+ struct sched_domain_shared *shared;
unsigned int span_weight;
/*
@@ -1163,6 +1179,7 @@ typedef int (*sched_domain_flags_f)(void);
struct sd_data {
struct sched_domain **__percpu sd;
+ struct sched_domain_shared **__percpu sds;
struct sched_group **__percpu sg;
struct sched_group_capacity **__percpu sgc;
};
@@ -1456,6 +1473,13 @@ struct tlbflush_unmap_batch {
};
struct task_struct {
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+ /*
+ * For reasons of header soup (see current_thread_info()), this
+ * must be the first element of task_struct.
+ */
+ struct thread_info thread_info;
+#endif
volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
void *stack;
atomic_t usage;
@@ -1465,6 +1489,9 @@ struct task_struct {
#ifdef CONFIG_SMP
struct llist_node wake_entry;
int on_cpu;
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+ unsigned int cpu; /* current CPU */
+#endif
unsigned int wakee_flips;
unsigned long wakee_flip_decay_ts;
struct task_struct *last_wakee;
@@ -1545,6 +1572,9 @@ struct task_struct {
/* unserialized, strictly 'current' */
unsigned in_execve:1; /* bit to tell LSMs we're in execve */
unsigned in_iowait:1;
+#if !defined(TIF_RESTORE_SIGMASK)
+ unsigned restore_sigmask:1;
+#endif
#ifdef CONFIG_MEMCG
unsigned memcg_may_oom:1;
#ifndef CONFIG_SLOB
@@ -1918,6 +1948,13 @@ struct task_struct {
#ifdef CONFIG_MMU
struct task_struct *oom_reaper_list;
#endif
+#ifdef CONFIG_VMAP_STACK
+ struct vm_struct *stack_vm_area;
+#endif
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+ /* A live task holds one reference. */
+ atomic_t stack_refcount;
+#endif
/* CPU-specific state of this task */
struct thread_struct thread;
/*
@@ -1934,6 +1971,18 @@ extern int arch_task_struct_size __read_mostly;
# define arch_task_struct_size (sizeof(struct task_struct))
#endif
+#ifdef CONFIG_VMAP_STACK
+static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
+{
+ return t->stack_vm_area;
+}
+#else
+static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
+{
+ return NULL;
+}
+#endif
+
/* Future-safe accessor for struct task_struct's cpus_allowed. */
#define tsk_cpus_allowed(tsk) (&(tsk)->cpus_allowed)
@@ -1948,6 +1997,32 @@ static inline int tsk_nr_cpus_allowed(struct task_struct *p)
#define TNF_FAULT_LOCAL 0x08
#define TNF_MIGRATE_FAIL 0x10
+static inline bool in_vfork(struct task_struct *tsk)
+{
+ bool ret;
+
+ /*
+ * need RCU to access ->real_parent if CLONE_VM was used along with
+ * CLONE_PARENT.
+ *
+ * We check real_parent->mm == tsk->mm because CLONE_VFORK does not
+ * imply CLONE_VM
+ *
+ * CLONE_VFORK can be used with CLONE_PARENT/CLONE_THREAD and thus
+ * ->real_parent is not necessarily the task doing vfork(), so in
+ * theory we can't rely on task_lock() if we want to dereference it.
+ *
+ * And in this case we can't trust the real_parent->mm == tsk->mm
+ * check, it can be false negative. But we do not care, if init or
+ * another oom-unkillable task does this it should blame itself.
+ */
+ rcu_read_lock();
+ ret = tsk->vfork_done && tsk->real_parent->mm == tsk->mm;
+ rcu_read_unlock();
+
+ return ret;
+}
+
#ifdef CONFIG_NUMA_BALANCING
extern void task_numa_fault(int last_node, int node, int pages, int flags);
extern pid_t task_numa_group_id(struct task_struct *p);
@@ -2139,6 +2214,9 @@ static inline void put_task_struct(struct task_struct *t)
__put_task_struct(t);
}
+struct task_struct *task_rcu_dereference(struct task_struct **ptask);
+struct task_struct *try_get_task_struct(struct task_struct **ptask);
+
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
extern void task_cputime(struct task_struct *t,
cputime_t *utime, cputime_t *stime);
@@ -2534,12 +2612,14 @@ static inline bool is_idle_task(const struct task_struct *p)
return p->pid == 0;
}
extern struct task_struct *curr_task(int cpu);
-extern void set_curr_task(int cpu, struct task_struct *p);
+extern void ia64_set_curr_task(int cpu, struct task_struct *p);
void yield(void);
union thread_union {
+#ifndef CONFIG_THREAD_INFO_IN_TASK
struct thread_info thread_info;
+#endif
unsigned long stack[THREAD_SIZE/sizeof(long)];
};
@@ -2649,6 +2729,66 @@ extern void sigqueue_free(struct sigqueue *);
extern int send_sigqueue(struct sigqueue *, struct task_struct *, int group);
extern int do_sigaction(int, struct k_sigaction *, struct k_sigaction *);
+#ifdef TIF_RESTORE_SIGMASK
+/*
+ * Legacy restore_sigmask accessors. These are inefficient on
+ * SMP architectures because they require atomic operations.
+ */
+
+/**
+ * set_restore_sigmask() - make sure saved_sigmask processing gets done
+ *
+ * This sets TIF_RESTORE_SIGMASK and ensures that the arch signal code
+ * will run before returning to user mode, to process the flag. For
+ * all callers, TIF_SIGPENDING is already set or it's no harm to set
+ * it. TIF_RESTORE_SIGMASK need not be in the set of bits that the
+ * arch code will notice on return to user mode, in case those bits
+ * are scarce. We set TIF_SIGPENDING here to ensure that the arch
+ * signal code always gets run when TIF_RESTORE_SIGMASK is set.
+ */
+static inline void set_restore_sigmask(void)
+{
+ set_thread_flag(TIF_RESTORE_SIGMASK);
+ WARN_ON(!test_thread_flag(TIF_SIGPENDING));
+}
+static inline void clear_restore_sigmask(void)
+{
+ clear_thread_flag(TIF_RESTORE_SIGMASK);
+}
+static inline bool test_restore_sigmask(void)
+{
+ return test_thread_flag(TIF_RESTORE_SIGMASK);
+}
+static inline bool test_and_clear_restore_sigmask(void)
+{
+ return test_and_clear_thread_flag(TIF_RESTORE_SIGMASK);
+}
+
+#else /* TIF_RESTORE_SIGMASK */
+
+/* Higher-quality implementation, used if TIF_RESTORE_SIGMASK doesn't exist. */
+static inline void set_restore_sigmask(void)
+{
+ current->restore_sigmask = true;
+ WARN_ON(!test_thread_flag(TIF_SIGPENDING));
+}
+static inline void clear_restore_sigmask(void)
+{
+ current->restore_sigmask = false;
+}
+static inline bool test_restore_sigmask(void)
+{
+ return current->restore_sigmask;
+}
+static inline bool test_and_clear_restore_sigmask(void)
+{
+ if (!current->restore_sigmask)
+ return false;
+ current->restore_sigmask = false;
+ return true;
+}
+#endif
+
static inline void restore_saved_sigmask(void)
{
if (test_and_clear_restore_sigmask())
@@ -2738,6 +2878,20 @@ static inline void mmdrop(struct mm_struct *mm)
__mmdrop(mm);
}
+static inline void mmdrop_async_fn(struct work_struct *work)
+{
+ struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work);
+ __mmdrop(mm);
+}
+
+static inline void mmdrop_async(struct mm_struct *mm)
+{
+ if (unlikely(atomic_dec_and_test(&mm->mm_count))) {
+ INIT_WORK(&mm->async_put_work, mmdrop_async_fn);
+ schedule_work(&mm->async_put_work);
+ }
+}
+
static inline bool mmget_not_zero(struct mm_struct *mm)
{
return atomic_inc_not_zero(&mm->mm_users);
@@ -2967,10 +3121,34 @@ static inline void threadgroup_change_end(struct task_struct *tsk)
cgroup_threadgroup_change_end(tsk);
}
-#ifndef __HAVE_THREAD_FUNCTIONS
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+
+static inline struct thread_info *task_thread_info(struct task_struct *task)
+{
+ return &task->thread_info;
+}
+
+/*
+ * When accessing the stack of a non-current task that might exit, use
+ * try_get_task_stack() instead. task_stack_page will return a pointer
+ * that could get freed out from under you.
+ */
+static inline void *task_stack_page(const struct task_struct *task)
+{
+ return task->stack;
+}
+
+#define setup_thread_stack(new,old) do { } while(0)
+
+static inline unsigned long *end_of_stack(const struct task_struct *task)
+{
+ return task->stack;
+}
+
+#elif !defined(__HAVE_THREAD_FUNCTIONS)
#define task_thread_info(task) ((struct thread_info *)(task)->stack)
-#define task_stack_page(task) ((task)->stack)
+#define task_stack_page(task) ((void *)(task)->stack)
static inline void setup_thread_stack(struct task_struct *p, struct task_struct *org)
{
@@ -2997,6 +3175,24 @@ static inline unsigned long *end_of_stack(struct task_struct *p)
}
#endif
+
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+static inline void *try_get_task_stack(struct task_struct *tsk)
+{
+ return atomic_inc_not_zero(&tsk->stack_refcount) ?
+ task_stack_page(tsk) : NULL;
+}
+
+extern void put_task_stack(struct task_struct *tsk);
+#else
+static inline void *try_get_task_stack(struct task_struct *tsk)
+{
+ return task_stack_page(tsk);
+}
+
+static inline void put_task_stack(struct task_struct *tsk) {}
+#endif
+
#define task_stack_end_corrupted(task) \
(*(end_of_stack(task)) != STACK_END_MAGIC)
@@ -3007,7 +3203,7 @@ static inline int object_is_on_stack(void *obj)
return (obj >= stack) && (obj < (stack + THREAD_SIZE));
}
-extern void thread_info_cache_init(void);
+extern void thread_stack_cache_init(void);
#ifdef CONFIG_DEBUG_STACK_USAGE
static inline unsigned long stack_not_used(struct task_struct *p)
@@ -3112,7 +3308,11 @@ static inline int signal_pending_state(long state, struct task_struct *p)
* cond_resched_lock() will drop the spinlock before scheduling,
* cond_resched_softirq() will enable bhs before scheduling.
*/
+#ifndef CONFIG_PREEMPT
extern int _cond_resched(void);
+#else
+static inline int _cond_resched(void) { return 0; }
+#endif
#define cond_resched() ({ \
___might_sleep(__FILE__, __LINE__, 0); \
@@ -3142,6 +3342,15 @@ static inline void cond_resched_rcu(void)
#endif
}
+static inline unsigned long get_preempt_disable_ip(struct task_struct *p)
+{
+#ifdef CONFIG_DEBUG_PREEMPT
+ return p->preempt_disable_ip;
+#else
+ return 0;
+#endif
+}
+
/*
* Does a critical section need to be broken due to another
* task waiting?: (technically does not depend on CONFIG_PREEMPT,
@@ -3270,7 +3479,11 @@ static inline void ptrace_signal_wake_up(struct task_struct *t, bool resume)
static inline unsigned int task_cpu(const struct task_struct *p)
{
+#ifdef CONFIG_THREAD_INFO_IN_TASK
+ return p->cpu;
+#else
return task_thread_info(p)->cpu;
+#endif
}
static inline int task_node(const struct task_struct *p)
@@ -3375,15 +3588,20 @@ static inline unsigned long rlimit_max(unsigned int limit)
return task_rlimit_max(current, limit);
}
+#define SCHED_CPUFREQ_RT (1U << 0)
+#define SCHED_CPUFREQ_DL (1U << 1)
+#define SCHED_CPUFREQ_IOWAIT (1U << 2)
+
+#define SCHED_CPUFREQ_RT_DL (SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL)
+
#ifdef CONFIG_CPU_FREQ
struct update_util_data {
- void (*func)(struct update_util_data *data,
- u64 time, unsigned long util, unsigned long max);
+ void (*func)(struct update_util_data *data, u64 time, unsigned int flags);
};
void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
- void (*func)(struct update_util_data *data, u64 time,
- unsigned long util, unsigned long max));
+ void (*func)(struct update_util_data *data, u64 time,
+ unsigned int flags));
void cpufreq_remove_update_util_hook(int cpu);
#endif /* CONFIG_CPU_FREQ */