diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/cgroup.c | 3 | ||||
| -rw-r--r-- | kernel/exit.c | 3 | ||||
| -rw-r--r-- | kernel/fork.c | 5 | ||||
| -rw-r--r-- | kernel/itimer.c | 4 | ||||
| -rw-r--r-- | kernel/posix-cpu-timers.c | 117 | ||||
| -rw-r--r-- | kernel/profile.c | 3 | ||||
| -rw-r--r-- | kernel/sched.c | 27 | ||||
| -rw-r--r-- | kernel/sched_fair.c | 11 | ||||
| -rw-r--r-- | kernel/sched_stats.h | 45 | ||||
| -rw-r--r-- | kernel/signal.c | 8 | ||||
| -rw-r--r-- | kernel/sysctl.c | 5 | ||||
| -rw-r--r-- | kernel/user.c | 3 | 
12 files changed, 179 insertions, 55 deletions
| diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 5a54ff42874e..e14db9c089b9 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -2351,7 +2351,7 @@ static void cgroup_lock_hierarchy(struct cgroupfs_root *root)  	for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {  		struct cgroup_subsys *ss = subsys[i];  		if (ss->root == root) -			mutex_lock_nested(&ss->hierarchy_mutex, i); +			mutex_lock(&ss->hierarchy_mutex);  	}  } @@ -2637,6 +2637,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss)  	BUG_ON(!list_empty(&init_task.tasks));  	mutex_init(&ss->hierarchy_mutex); +	lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key);  	ss->active = 1;  } diff --git a/kernel/exit.c b/kernel/exit.c index f80dec3f1875..efd30ccf3858 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -118,6 +118,8 @@ static void __exit_signal(struct task_struct *tsk)  		 * We won't ever get here for the group leader, since it  		 * will have been the last reference on the signal_struct.  		 */ +		sig->utime = cputime_add(sig->utime, task_utime(tsk)); +		sig->stime = cputime_add(sig->stime, task_stime(tsk));  		sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));  		sig->min_flt += tsk->min_flt;  		sig->maj_flt += tsk->maj_flt; @@ -126,6 +128,7 @@ static void __exit_signal(struct task_struct *tsk)  		sig->inblock += task_io_get_inblock(tsk);  		sig->oublock += task_io_get_oublock(tsk);  		task_io_accounting_add(&sig->ioac, &tsk->ioac); +		sig->sum_sched_runtime += tsk->se.sum_exec_runtime;  		sig = NULL; /* Marker for below. */  	} diff --git a/kernel/fork.c b/kernel/fork.c index 6d5dbb7a13e2..a66fbde20715 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -851,13 +851,14 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)  	sig->tty_old_pgrp = NULL;  	sig->tty = NULL; -	sig->cutime = sig->cstime = cputime_zero; +	sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;  	sig->gtime = cputime_zero;  	sig->cgtime = cputime_zero;  	sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;  	sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;  	sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;  	task_io_accounting_init(&sig->ioac); +	sig->sum_sched_runtime = 0;  	taskstats_tgid_init(sig);  	task_lock(current->group_leader); @@ -1094,7 +1095,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,  #ifdef CONFIG_DEBUG_MUTEXES  	p->blocked_on = NULL; /* not blocked yet */  #endif -	if (unlikely(ptrace_reparented(current))) +	if (unlikely(current->ptrace))  		ptrace_fork(p, clone_flags);  	/* Perform scheduler related setup. Assign this task to a CPU. */ diff --git a/kernel/itimer.c b/kernel/itimer.c index 6a5fe93dd8bd..58762f7077ec 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c @@ -62,7 +62,7 @@ int do_getitimer(int which, struct itimerval *value)  			struct task_cputime cputime;  			cputime_t utime; -			thread_group_cputime(tsk, &cputime); +			thread_group_cputimer(tsk, &cputime);  			utime = cputime.utime;  			if (cputime_le(cval, utime)) { /* about to fire */  				cval = jiffies_to_cputime(1); @@ -82,7 +82,7 @@ int do_getitimer(int which, struct itimerval *value)  			struct task_cputime times;  			cputime_t ptime; -			thread_group_cputime(tsk, ×); +			thread_group_cputimer(tsk, ×);  			ptime = cputime_add(times.utime, times.stime);  			if (cputime_le(cval, ptime)) { /* about to fire */  				cval = jiffies_to_cputime(1); diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c index fa07da94d7be..2313a4cc14ea 100644 --- a/kernel/posix-cpu-timers.c +++ b/kernel/posix-cpu-timers.c @@ -230,6 +230,71 @@ static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,  	return 0;  } +void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) +{ +	struct sighand_struct *sighand; +	struct signal_struct *sig; +	struct task_struct *t; + +	*times = INIT_CPUTIME; + +	rcu_read_lock(); +	sighand = rcu_dereference(tsk->sighand); +	if (!sighand) +		goto out; + +	sig = tsk->signal; + +	t = tsk; +	do { +		times->utime = cputime_add(times->utime, t->utime); +		times->stime = cputime_add(times->stime, t->stime); +		times->sum_exec_runtime += t->se.sum_exec_runtime; + +		t = next_thread(t); +	} while (t != tsk); + +	times->utime = cputime_add(times->utime, sig->utime); +	times->stime = cputime_add(times->stime, sig->stime); +	times->sum_exec_runtime += sig->sum_sched_runtime; +out: +	rcu_read_unlock(); +} + +static void update_gt_cputime(struct task_cputime *a, struct task_cputime *b) +{ +	if (cputime_gt(b->utime, a->utime)) +		a->utime = b->utime; + +	if (cputime_gt(b->stime, a->stime)) +		a->stime = b->stime; + +	if (b->sum_exec_runtime > a->sum_exec_runtime) +		a->sum_exec_runtime = b->sum_exec_runtime; +} + +void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) +{ +	struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; +	struct task_cputime sum; +	unsigned long flags; + +	spin_lock_irqsave(&cputimer->lock, flags); +	if (!cputimer->running) { +		cputimer->running = 1; +		/* +		 * The POSIX timer interface allows for absolute time expiry +		 * values through the TIMER_ABSTIME flag, therefore we have +		 * to synchronize the timer to the clock every time we start +		 * it. +		 */ +		thread_group_cputime(tsk, &sum); +		update_gt_cputime(&cputimer->cputime, &sum); +	} +	*times = cputimer->cputime; +	spin_unlock_irqrestore(&cputimer->lock, flags); +} +  /*   * Sample a process (thread group) clock for the given group_leader task.   * Must be called with tasklist_lock held for reading. @@ -457,7 +522,7 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk)  {  	struct task_cputime cputime; -	thread_group_cputime(tsk, &cputime); +	thread_group_cputimer(tsk, &cputime);  	cleanup_timers(tsk->signal->cpu_timers,  		       cputime.utime, cputime.stime, cputime.sum_exec_runtime);  } @@ -964,6 +1029,19 @@ static void check_thread_timers(struct task_struct *tsk,  	}  } +static void stop_process_timers(struct task_struct *tsk) +{ +	struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; +	unsigned long flags; + +	if (!cputimer->running) +		return; + +	spin_lock_irqsave(&cputimer->lock, flags); +	cputimer->running = 0; +	spin_unlock_irqrestore(&cputimer->lock, flags); +} +  /*   * Check for any per-thread CPU timers that have fired and move them   * off the tsk->*_timers list onto the firing list.  Per-thread timers @@ -987,13 +1065,15 @@ static void check_process_timers(struct task_struct *tsk,  	    sig->rlim[RLIMIT_CPU].rlim_cur == RLIM_INFINITY &&  	    list_empty(&timers[CPUCLOCK_VIRT]) &&  	    cputime_eq(sig->it_virt_expires, cputime_zero) && -	    list_empty(&timers[CPUCLOCK_SCHED])) +	    list_empty(&timers[CPUCLOCK_SCHED])) { +		stop_process_timers(tsk);  		return; +	}  	/*  	 * Collect the current process totals.  	 */ -	thread_group_cputime(tsk, &cputime); +	thread_group_cputimer(tsk, &cputime);  	utime = cputime.utime;  	ptime = cputime_add(utime, cputime.stime);  	sum_sched_runtime = cputime.sum_exec_runtime; @@ -1259,7 +1339,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk)  	if (!task_cputime_zero(&sig->cputime_expires)) {  		struct task_cputime group_sample; -		thread_group_cputime(tsk, &group_sample); +		thread_group_cputimer(tsk, &group_sample);  		if (task_cputime_expired(&group_sample, &sig->cputime_expires))  			return 1;  	} @@ -1329,6 +1409,33 @@ void run_posix_cpu_timers(struct task_struct *tsk)  }  /* + * Sample a process (thread group) timer for the given group_leader task. + * Must be called with tasklist_lock held for reading. + */ +static int cpu_timer_sample_group(const clockid_t which_clock, +				  struct task_struct *p, +				  union cpu_time_count *cpu) +{ +	struct task_cputime cputime; + +	thread_group_cputimer(p, &cputime); +	switch (CPUCLOCK_WHICH(which_clock)) { +	default: +		return -EINVAL; +	case CPUCLOCK_PROF: +		cpu->cpu = cputime_add(cputime.utime, cputime.stime); +		break; +	case CPUCLOCK_VIRT: +		cpu->cpu = cputime.utime; +		break; +	case CPUCLOCK_SCHED: +		cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p); +		break; +	} +	return 0; +} + +/*   * Set one of the process-wide special case CPU timers.   * The tsk->sighand->siglock must be held by the caller.   * The *newval argument is relative and we update it to be absolute, *oldval @@ -1341,7 +1448,7 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,  	struct list_head *head;  	BUG_ON(clock_idx == CPUCLOCK_SCHED); -	cpu_clock_sample_group(clock_idx, tsk, &now); +	cpu_timer_sample_group(clock_idx, tsk, &now);  	if (oldval) {  		if (!cputime_eq(*oldval, cputime_zero)) { diff --git a/kernel/profile.c b/kernel/profile.c index 784933acf5b8..7724e0409bae 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -114,12 +114,15 @@ int __ref profile_init(void)  	if (!slab_is_available()) {  		prof_buffer = alloc_bootmem(buffer_bytes);  		alloc_bootmem_cpumask_var(&prof_cpu_mask); +		cpumask_copy(prof_cpu_mask, cpu_possible_mask);  		return 0;  	}  	if (!alloc_cpumask_var(&prof_cpu_mask, GFP_KERNEL))  		return -ENOMEM; +	cpumask_copy(prof_cpu_mask, cpu_possible_mask); +  	prof_buffer = kzalloc(buffer_bytes, GFP_KERNEL);  	if (prof_buffer)  		return 0; diff --git a/kernel/sched.c b/kernel/sched.c index 8ee437a5ec1d..c1d0ed360088 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2266,16 +2266,6 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state, int sync)  	if (!sched_feat(SYNC_WAKEUPS))  		sync = 0; -	if (!sync) { -		if (current->se.avg_overlap < sysctl_sched_migration_cost && -			  p->se.avg_overlap < sysctl_sched_migration_cost) -			sync = 1; -	} else { -		if (current->se.avg_overlap >= sysctl_sched_migration_cost || -			  p->se.avg_overlap >= sysctl_sched_migration_cost) -			sync = 0; -	} -  #ifdef CONFIG_SMP  	if (sched_feat(LB_WAKEUP_UPDATE)) {  		struct sched_domain *sd; @@ -3890,19 +3880,24 @@ int select_nohz_load_balancer(int stop_tick)  	int cpu = smp_processor_id();  	if (stop_tick) { -		cpumask_set_cpu(cpu, nohz.cpu_mask);  		cpu_rq(cpu)->in_nohz_recently = 1; -		/* -		 * If we are going offline and still the leader, give up! -		 */ -		if (!cpu_active(cpu) && -		    atomic_read(&nohz.load_balancer) == cpu) { +		if (!cpu_active(cpu)) { +			if (atomic_read(&nohz.load_balancer) != cpu) +				return 0; + +			/* +			 * If we are going offline and still the leader, +			 * give up! +			 */  			if (atomic_cmpxchg(&nohz.load_balancer, cpu, -1) != cpu)  				BUG(); +  			return 0;  		} +		cpumask_set_cpu(cpu, nohz.cpu_mask); +  		/* time for ilb owner also to sleep */  		if (cpumask_weight(nohz.cpu_mask) == num_online_cpus()) {  			if (atomic_read(&nohz.load_balancer) == cpu) diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index a7e50ba185ac..0566f2a03c42 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1191,15 +1191,20 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,  	    int idx, unsigned long load, unsigned long this_load,  	    unsigned int imbalance)  { +	struct task_struct *curr = this_rq->curr; +	struct task_group *tg;  	unsigned long tl = this_load;  	unsigned long tl_per_task; -	struct task_group *tg;  	unsigned long weight;  	int balanced;  	if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS))  		return 0; +	if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost || +			p->se.avg_overlap > sysctl_sched_migration_cost)) +		sync = 0; +  	/*  	 * If sync wakeup then subtract the (maximum possible)  	 * effect of the currently running task from the load @@ -1426,7 +1431,9 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)  	if (!sched_feat(WAKEUP_PREEMPT))  		return; -	if (sched_feat(WAKEUP_OVERLAP) && sync) { +	if (sched_feat(WAKEUP_OVERLAP) && (sync || +			(se->avg_overlap < sysctl_sched_migration_cost && +			 pse->avg_overlap < sysctl_sched_migration_cost))) {  		resched_task(curr);  		return;  	} diff --git a/kernel/sched_stats.h b/kernel/sched_stats.h index 8ab0cef8ecab..a8f93dd374e1 100644 --- a/kernel/sched_stats.h +++ b/kernel/sched_stats.h @@ -296,19 +296,21 @@ sched_info_switch(struct task_struct *prev, struct task_struct *next)  static inline void account_group_user_time(struct task_struct *tsk,  					   cputime_t cputime)  { -	struct task_cputime *times; -	struct signal_struct *sig; +	struct thread_group_cputimer *cputimer;  	/* tsk == current, ensure it is safe to use ->signal */  	if (unlikely(tsk->exit_state))  		return; -	sig = tsk->signal; -	times = &sig->cputime.totals; +	cputimer = &tsk->signal->cputimer; -	spin_lock(×->lock); -	times->utime = cputime_add(times->utime, cputime); -	spin_unlock(×->lock); +	if (!cputimer->running) +		return; + +	spin_lock(&cputimer->lock); +	cputimer->cputime.utime = +		cputime_add(cputimer->cputime.utime, cputime); +	spin_unlock(&cputimer->lock);  }  /** @@ -324,19 +326,21 @@ static inline void account_group_user_time(struct task_struct *tsk,  static inline void account_group_system_time(struct task_struct *tsk,  					     cputime_t cputime)  { -	struct task_cputime *times; -	struct signal_struct *sig; +	struct thread_group_cputimer *cputimer;  	/* tsk == current, ensure it is safe to use ->signal */  	if (unlikely(tsk->exit_state))  		return; -	sig = tsk->signal; -	times = &sig->cputime.totals; +	cputimer = &tsk->signal->cputimer; + +	if (!cputimer->running) +		return; -	spin_lock(×->lock); -	times->stime = cputime_add(times->stime, cputime); -	spin_unlock(×->lock); +	spin_lock(&cputimer->lock); +	cputimer->cputime.stime = +		cputime_add(cputimer->cputime.stime, cputime); +	spin_unlock(&cputimer->lock);  }  /** @@ -352,7 +356,7 @@ static inline void account_group_system_time(struct task_struct *tsk,  static inline void account_group_exec_runtime(struct task_struct *tsk,  					      unsigned long long ns)  { -	struct task_cputime *times; +	struct thread_group_cputimer *cputimer;  	struct signal_struct *sig;  	sig = tsk->signal; @@ -361,9 +365,12 @@ static inline void account_group_exec_runtime(struct task_struct *tsk,  	if (unlikely(!sig))  		return; -	times = &sig->cputime.totals; +	cputimer = &sig->cputimer; + +	if (!cputimer->running) +		return; -	spin_lock(×->lock); -	times->sum_exec_runtime += ns; -	spin_unlock(×->lock); +	spin_lock(&cputimer->lock); +	cputimer->cputime.sum_exec_runtime += ns; +	spin_unlock(&cputimer->lock);  } diff --git a/kernel/signal.c b/kernel/signal.c index b6b36768b758..2a74fe87c0dd 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1367,7 +1367,6 @@ int do_notify_parent(struct task_struct *tsk, int sig)  	struct siginfo info;  	unsigned long flags;  	struct sighand_struct *psig; -	struct task_cputime cputime;  	int ret = sig;  	BUG_ON(sig == -1); @@ -1397,9 +1396,10 @@ int do_notify_parent(struct task_struct *tsk, int sig)  	info.si_uid = __task_cred(tsk)->uid;  	rcu_read_unlock(); -	thread_group_cputime(tsk, &cputime); -	info.si_utime = cputime_to_jiffies(cputime.utime); -	info.si_stime = cputime_to_jiffies(cputime.stime); +	info.si_utime = cputime_to_clock_t(cputime_add(tsk->utime, +				tsk->signal->utime)); +	info.si_stime = cputime_to_clock_t(cputime_add(tsk->stime, +				tsk->signal->stime));  	info.si_status = tsk->exit_code & 0x7f;  	if (tsk->exit_code & 0x80) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 790f9d785663..c5ef44ff850f 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -101,6 +101,7 @@ static int two = 2;  static int zero;  static int one = 1; +static unsigned long one_ul = 1;  static int one_hundred = 100;  /* this is needed for the proc_dointvec_minmax for [fs_]overflow UID and GID */ @@ -974,7 +975,7 @@ static struct ctl_table vm_table[] = {  		.mode		= 0644,  		.proc_handler	= &dirty_background_bytes_handler,  		.strategy	= &sysctl_intvec, -		.extra1		= &one, +		.extra1		= &one_ul,  	},  	{  		.ctl_name	= VM_DIRTY_RATIO, @@ -995,7 +996,7 @@ static struct ctl_table vm_table[] = {  		.mode		= 0644,  		.proc_handler	= &dirty_bytes_handler,  		.strategy	= &sysctl_intvec, -		.extra1		= &one, +		.extra1		= &one_ul,  	},  	{  		.procname	= "dirty_writeback_centisecs", diff --git a/kernel/user.c b/kernel/user.c index 477b6660f447..3551ac742395 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -72,6 +72,7 @@ static void uid_hash_insert(struct user_struct *up, struct hlist_head *hashent)  static void uid_hash_remove(struct user_struct *up)  {  	hlist_del_init(&up->uidhash_node); +	put_user_ns(up->user_ns);  }  static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent) @@ -334,7 +335,6 @@ static void free_user(struct user_struct *up, unsigned long flags)  	atomic_inc(&up->__count);  	spin_unlock_irqrestore(&uidhash_lock, flags); -	put_user_ns(up->user_ns);  	INIT_WORK(&up->work, remove_user_sysfs_dir);  	schedule_work(&up->work);  } @@ -357,7 +357,6 @@ static void free_user(struct user_struct *up, unsigned long flags)  	sched_destroy_user(up);  	key_put(up->uid_keyring);  	key_put(up->session_keyring); -	put_user_ns(up->user_ns);  	kmem_cache_free(uid_cachep, up);  } |