diff options
Diffstat (limited to 'include/linux/sched.h')
| -rw-r--r-- | include/linux/sched.h | 214 | 
1 files changed, 198 insertions, 16 deletions
| diff --git a/include/linux/sched.h b/include/linux/sched.h index b3d697f3b573..9256118bd40c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -27,6 +27,7 @@  #include <linux/signal_types.h>  #include <linux/mm_types_task.h>  #include <linux/task_io_accounting.h> +#include <linux/rseq.h>  /* task_struct member predeclarations (sorted alphabetically): */  struct audit_context; @@ -112,17 +113,36 @@ struct task_group;  #ifdef CONFIG_DEBUG_ATOMIC_SLEEP +/* + * Special states are those that do not use the normal wait-loop pattern. See + * the comment with set_special_state(). + */ +#define is_special_task_state(state)				\ +	((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_DEAD)) +  #define __set_current_state(state_value)			\  	do {							\ +		WARN_ON_ONCE(is_special_task_state(state_value));\  		current->task_state_change = _THIS_IP_;		\  		current->state = (state_value);			\  	} while (0) +  #define set_current_state(state_value)				\  	do {							\ +		WARN_ON_ONCE(is_special_task_state(state_value));\  		current->task_state_change = _THIS_IP_;		\  		smp_store_mb(current->state, (state_value));	\  	} while (0) +#define set_special_state(state_value)					\ +	do {								\ +		unsigned long flags; /* may shadow */			\ +		WARN_ON_ONCE(!is_special_task_state(state_value));	\ +		raw_spin_lock_irqsave(¤t->pi_lock, flags);	\ +		current->task_state_change = _THIS_IP_;			\ +		current->state = (state_value);				\ +		raw_spin_unlock_irqrestore(¤t->pi_lock, flags);	\ +	} while (0)  #else  /*   * set_current_state() includes a barrier so that the write of current->state @@ -144,8 +164,8 @@ struct task_group;   *   * The above is typically ordered against the wakeup, which does:   * - *	need_sleep = false; - *	wake_up_state(p, TASK_UNINTERRUPTIBLE); + *   need_sleep = false; + *   wake_up_state(p, TASK_UNINTERRUPTIBLE);   *   * Where wake_up_state() (and all other wakeup primitives) imply enough   * barriers to order the store of the variable against wakeup. @@ -154,12 +174,33 @@ struct task_group;   * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a   * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING).   * - * This is obviously fine, since they both store the exact same value. + * However, with slightly different timing the wakeup TASK_RUNNING store can + * also collide with the TASK_UNINTERRUPTIBLE store. Loosing that store is not + * a problem either because that will result in one extra go around the loop + * and our @cond test will save the day.   *   * Also see the comments of try_to_wake_up().   */ -#define __set_current_state(state_value) do { current->state = (state_value); } while (0) -#define set_current_state(state_value)	 smp_store_mb(current->state, (state_value)) +#define __set_current_state(state_value)				\ +	current->state = (state_value) + +#define set_current_state(state_value)					\ +	smp_store_mb(current->state, (state_value)) + +/* + * set_special_state() should be used for those states when the blocking task + * can not use the regular condition based wait-loop. In that case we must + * serialize against wakeups such that any possible in-flight TASK_RUNNING stores + * will not collide with our state change. + */ +#define set_special_state(state_value)					\ +	do {								\ +		unsigned long flags; /* may shadow */			\ +		raw_spin_lock_irqsave(¤t->pi_lock, flags);	\ +		current->state = (state_value);				\ +		raw_spin_unlock_irqrestore(¤t->pi_lock, flags);	\ +	} while (0) +  #endif  /* Task command name length: */ @@ -701,7 +742,7 @@ struct task_struct {  	pid_t				pid;  	pid_t				tgid; -#ifdef CONFIG_CC_STACKPROTECTOR +#ifdef CONFIG_STACKPROTECTOR  	/* Canary value for the -fstack-protector GCC feature: */  	unsigned long			stack_canary;  #endif @@ -1007,6 +1048,17 @@ struct task_struct {  	unsigned long			numa_pages_migrated;  #endif /* CONFIG_NUMA_BALANCING */ +#ifdef CONFIG_RSEQ +	struct rseq __user *rseq; +	u32 rseq_len; +	u32 rseq_sig; +	/* +	 * RmW on rseq_event_mask must be performed atomically +	 * with respect to preemption. +	 */ +	unsigned long rseq_event_mask; +#endif +  	struct tlbflush_unmap_batch	tlb_ubc;  	struct rcu_head			rcu; @@ -1078,7 +1130,7 @@ struct task_struct {  #ifdef CONFIG_KCOV  	/* Coverage collection mode enabled for this task (0 if disabled): */ -	enum kcov_mode			kcov_mode; +	unsigned int			kcov_mode;  	/* Size of the kcov_area: */  	unsigned int			kcov_size; @@ -1393,7 +1445,8 @@ static inline bool is_percpu_thread(void)  #define PFA_NO_NEW_PRIVS		0	/* May not gain new privileges. */  #define PFA_SPREAD_PAGE			1	/* Spread page cache over cpuset */  #define PFA_SPREAD_SLAB			2	/* Spread some slab caches over cpuset */ - +#define PFA_SPEC_SSB_DISABLE		3	/* Speculative Store Bypass disabled */ +#define PFA_SPEC_SSB_FORCE_DISABLE	4	/* Speculative Store Bypass force disabled*/  #define TASK_PFA_TEST(name, func)					\  	static inline bool task_##func(struct task_struct *p)		\ @@ -1418,6 +1471,13 @@ TASK_PFA_TEST(SPREAD_SLAB, spread_slab)  TASK_PFA_SET(SPREAD_SLAB, spread_slab)  TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) +TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable) +TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable) +TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable) + +TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) +TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) +  static inline void  current_restore_flags(unsigned long orig_flags, unsigned long flags)  { @@ -1464,6 +1524,7 @@ static inline int task_nice(const struct task_struct *p)  extern int can_nice(const struct task_struct *p, const int nice);  extern int task_curr(const struct task_struct *p);  extern int idle_cpu(int cpu); +extern int available_idle_cpu(int cpu);  extern int sched_setscheduler(struct task_struct *, int, const struct sched_param *);  extern int sched_setscheduler_nocheck(struct task_struct *, int, const struct sched_param *);  extern int sched_setattr(struct task_struct *, const struct sched_attr *); @@ -1578,6 +1639,12 @@ static inline void clear_tsk_thread_flag(struct task_struct *tsk, int flag)  	clear_ti_thread_flag(task_thread_info(tsk), flag);  } +static inline void update_tsk_thread_flag(struct task_struct *tsk, int flag, +					  bool value) +{ +	update_ti_thread_flag(task_thread_info(tsk), flag, value); +} +  static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag)  {  	return test_and_set_ti_thread_flag(task_thread_info(tsk), flag); @@ -1613,7 +1680,6 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)   * explicit rescheduling in places that are safe. The return   * value indicates whether a reschedule was done in fact.   * cond_resched_lock() will drop the spinlock before scheduling, - * cond_resched_softirq() will enable bhs before scheduling.   */  #ifndef CONFIG_PREEMPT  extern int _cond_resched(void); @@ -1633,13 +1699,6 @@ extern int __cond_resched_lock(spinlock_t *lock);  	__cond_resched_lock(lock);				\  }) -extern int __cond_resched_softirq(void); - -#define cond_resched_softirq() ({					\ -	___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET);	\ -	__cond_resched_softirq();					\ -}) -  static inline void cond_resched_rcu(void)  {  #if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU) @@ -1716,4 +1775,127 @@ extern long sched_getaffinity(pid_t pid, struct cpumask *mask);  #define TASK_SIZE_OF(tsk)	TASK_SIZE  #endif +#ifdef CONFIG_RSEQ + +/* + * Map the event mask on the user-space ABI enum rseq_cs_flags + * for direct mask checks. + */ +enum rseq_event_mask_bits { +	RSEQ_EVENT_PREEMPT_BIT	= RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT, +	RSEQ_EVENT_SIGNAL_BIT	= RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT, +	RSEQ_EVENT_MIGRATE_BIT	= RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT, +}; + +enum rseq_event_mask { +	RSEQ_EVENT_PREEMPT	= (1U << RSEQ_EVENT_PREEMPT_BIT), +	RSEQ_EVENT_SIGNAL	= (1U << RSEQ_EVENT_SIGNAL_BIT), +	RSEQ_EVENT_MIGRATE	= (1U << RSEQ_EVENT_MIGRATE_BIT), +}; + +static inline void rseq_set_notify_resume(struct task_struct *t) +{ +	if (t->rseq) +		set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); +} + +void __rseq_handle_notify_resume(struct ksignal *sig, struct pt_regs *regs); + +static inline void rseq_handle_notify_resume(struct ksignal *ksig, +					     struct pt_regs *regs) +{ +	if (current->rseq) +		__rseq_handle_notify_resume(ksig, regs); +} + +static inline void rseq_signal_deliver(struct ksignal *ksig, +				       struct pt_regs *regs) +{ +	preempt_disable(); +	__set_bit(RSEQ_EVENT_SIGNAL_BIT, ¤t->rseq_event_mask); +	preempt_enable(); +	rseq_handle_notify_resume(ksig, regs); +} + +/* rseq_preempt() requires preemption to be disabled. */ +static inline void rseq_preempt(struct task_struct *t) +{ +	__set_bit(RSEQ_EVENT_PREEMPT_BIT, &t->rseq_event_mask); +	rseq_set_notify_resume(t); +} + +/* rseq_migrate() requires preemption to be disabled. */ +static inline void rseq_migrate(struct task_struct *t) +{ +	__set_bit(RSEQ_EVENT_MIGRATE_BIT, &t->rseq_event_mask); +	rseq_set_notify_resume(t); +} + +/* + * If parent process has a registered restartable sequences area, the + * child inherits. Only applies when forking a process, not a thread. + */ +static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) +{ +	if (clone_flags & CLONE_THREAD) { +		t->rseq = NULL; +		t->rseq_len = 0; +		t->rseq_sig = 0; +		t->rseq_event_mask = 0; +	} else { +		t->rseq = current->rseq; +		t->rseq_len = current->rseq_len; +		t->rseq_sig = current->rseq_sig; +		t->rseq_event_mask = current->rseq_event_mask; +	} +} + +static inline void rseq_execve(struct task_struct *t) +{ +	t->rseq = NULL; +	t->rseq_len = 0; +	t->rseq_sig = 0; +	t->rseq_event_mask = 0; +} + +#else + +static inline void rseq_set_notify_resume(struct task_struct *t) +{ +} +static inline void rseq_handle_notify_resume(struct ksignal *ksig, +					     struct pt_regs *regs) +{ +} +static inline void rseq_signal_deliver(struct ksignal *ksig, +				       struct pt_regs *regs) +{ +} +static inline void rseq_preempt(struct task_struct *t) +{ +} +static inline void rseq_migrate(struct task_struct *t) +{ +} +static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags) +{ +} +static inline void rseq_execve(struct task_struct *t) +{ +} + +#endif + +#ifdef CONFIG_DEBUG_RSEQ + +void rseq_syscall(struct pt_regs *regs); + +#else + +static inline void rseq_syscall(struct pt_regs *regs) +{ +} + +#endif +  #endif |