diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/audit_tree.c | 19 | ||||
| -rw-r--r-- | kernel/fork.c | 4 | ||||
| -rw-r--r-- | kernel/sched/core.c | 35 | ||||
| -rw-r--r-- | kernel/sched/fair.c | 11 | ||||
| -rw-r--r-- | kernel/sched/rt.c | 13 | ||||
| -rw-r--r-- | kernel/sched/sched.h | 8 | ||||
| -rw-r--r-- | kernel/sched/stop_task.c | 22 | ||||
| -rw-r--r-- | kernel/task_work.c | 1 | ||||
| -rw-r--r-- | kernel/time/timekeeping.c | 37 | ||||
| -rw-r--r-- | kernel/timer.c | 9 | ||||
| -rw-r--r-- | kernel/trace/trace_syscalls.c | 4 | 
11 files changed, 121 insertions, 42 deletions
| diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 3a5ca582ba1e..ed206fd88cca 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -250,7 +250,6 @@ static void untag_chunk(struct node *p)  		spin_unlock(&hash_lock);  		spin_unlock(&entry->lock);  		fsnotify_destroy_mark(entry); -		fsnotify_put_mark(entry);  		goto out;  	} @@ -259,7 +258,7 @@ static void untag_chunk(struct node *p)  	fsnotify_duplicate_mark(&new->mark, entry);  	if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.i.inode, NULL, 1)) { -		free_chunk(new); +		fsnotify_put_mark(&new->mark);  		goto Fallback;  	} @@ -293,7 +292,7 @@ static void untag_chunk(struct node *p)  	spin_unlock(&hash_lock);  	spin_unlock(&entry->lock);  	fsnotify_destroy_mark(entry); -	fsnotify_put_mark(entry); +	fsnotify_put_mark(&new->mark);	/* drop initial reference */  	goto out;  Fallback: @@ -322,7 +321,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)  	entry = &chunk->mark;  	if (fsnotify_add_mark(entry, audit_tree_group, inode, NULL, 0)) { -		free_chunk(chunk); +		fsnotify_put_mark(entry);  		return -ENOSPC;  	} @@ -347,6 +346,7 @@ static int create_chunk(struct inode *inode, struct audit_tree *tree)  	insert_hash(chunk);  	spin_unlock(&hash_lock);  	spin_unlock(&entry->lock); +	fsnotify_put_mark(entry);	/* drop initial reference */  	return 0;  } @@ -396,7 +396,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)  	fsnotify_duplicate_mark(chunk_entry, old_entry);  	if (fsnotify_add_mark(chunk_entry, chunk_entry->group, chunk_entry->i.inode, NULL, 1)) {  		spin_unlock(&old_entry->lock); -		free_chunk(chunk); +		fsnotify_put_mark(chunk_entry);  		fsnotify_put_mark(old_entry);  		return -ENOSPC;  	} @@ -444,8 +444,8 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)  	spin_unlock(&chunk_entry->lock);  	spin_unlock(&old_entry->lock);  	fsnotify_destroy_mark(old_entry); +	fsnotify_put_mark(chunk_entry);	/* drop initial reference */  	fsnotify_put_mark(old_entry); /* pair to fsnotify_find mark_entry */ -	fsnotify_put_mark(old_entry); /* and kill it */  	return 0;  } @@ -916,7 +916,12 @@ static void audit_tree_freeing_mark(struct fsnotify_mark *entry, struct fsnotify  	struct audit_chunk *chunk = container_of(entry, struct audit_chunk, mark);  	evict_chunk(chunk); -	fsnotify_put_mark(entry); + +	/* +	 * We are guaranteed to have at least one reference to the mark from +	 * either the inode or the caller of fsnotify_destroy_mark(). +	 */ +	BUG_ON(atomic_read(&entry->refcnt) < 1);  }  static bool audit_tree_send_event(struct fsnotify_group *group, struct inode *inode, diff --git a/kernel/fork.c b/kernel/fork.c index 3bd2280d79f6..2c8857e12855 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -455,8 +455,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)  		if (retval)  			goto out; -		if (file && uprobe_mmap(tmp)) -			goto out; +		if (file) +			uprobe_mmap(tmp);  	}  	/* a new mm has just been created */  	arch_dup_mmap(oldmm, mm); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 82ad284f823b..fbf1fd098dc6 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -3142,6 +3142,20 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)  # define nsecs_to_cputime(__nsecs)	nsecs_to_jiffies(__nsecs)  #endif +static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total) +{ +	u64 temp = (__force u64) rtime; + +	temp *= (__force u64) utime; + +	if (sizeof(cputime_t) == 4) +		temp = div_u64(temp, (__force u32) total); +	else +		temp = div64_u64(temp, (__force u64) total); + +	return (__force cputime_t) temp; +} +  void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)  {  	cputime_t rtime, utime = p->utime, total = utime + p->stime; @@ -3151,13 +3165,9 @@ void task_times(struct task_struct *p, cputime_t *ut, cputime_t *st)  	 */  	rtime = nsecs_to_cputime(p->se.sum_exec_runtime); -	if (total) { -		u64 temp = (__force u64) rtime; - -		temp *= (__force u64) utime; -		do_div(temp, (__force u32) total); -		utime = (__force cputime_t) temp; -	} else +	if (total) +		utime = scale_utime(utime, rtime, total); +	else  		utime = rtime;  	/* @@ -3184,13 +3194,9 @@ void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)  	total = cputime.utime + cputime.stime;  	rtime = nsecs_to_cputime(cputime.sum_exec_runtime); -	if (total) { -		u64 temp = (__force u64) rtime; - -		temp *= (__force u64) cputime.utime; -		do_div(temp, (__force u32) total); -		utime = (__force cputime_t) temp; -	} else +	if (total) +		utime = scale_utime(cputime.utime, rtime, total); +	else  		utime = rtime;  	sig->prev_utime = max(sig->prev_utime, utime); @@ -7246,6 +7252,7 @@ int in_sched_functions(unsigned long addr)  #ifdef CONFIG_CGROUP_SCHED  struct task_group root_task_group; +LIST_HEAD(task_groups);  #endif  DECLARE_PER_CPU(cpumask_var_t, load_balance_tmpmask); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index d0cc03b3e70b..c219bf8d704c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -3387,6 +3387,14 @@ static int tg_load_down(struct task_group *tg, void *data)  static void update_h_load(long cpu)  { +	struct rq *rq = cpu_rq(cpu); +	unsigned long now = jiffies; + +	if (rq->h_load_throttle == now) +		return; + +	rq->h_load_throttle = now; +  	rcu_read_lock();  	walk_tg_tree(tg_load_down, tg_nop, (void *)cpu);  	rcu_read_unlock(); @@ -4293,11 +4301,10 @@ redo:  		env.src_rq    = busiest;  		env.loop_max  = min(sysctl_sched_nr_migrate, busiest->nr_running); +		update_h_load(env.src_cpu);  more_balance:  		local_irq_save(flags);  		double_rq_lock(this_rq, busiest); -		if (!env.loop) -			update_h_load(env.src_cpu);  		/*  		 * cur_ld_moved - load moved in current iteration diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 573e1ca01102..944cb68420e9 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -788,6 +788,19 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)  	const struct cpumask *span;  	span = sched_rt_period_mask(); +#ifdef CONFIG_RT_GROUP_SCHED +	/* +	 * FIXME: isolated CPUs should really leave the root task group, +	 * whether they are isolcpus or were isolated via cpusets, lest +	 * the timer run on a CPU which does not service all runqueues, +	 * potentially leaving other CPUs indefinitely throttled.  If +	 * isolation is really required, the user will turn the throttle +	 * off to kill the perturbations it causes anyway.  Meanwhile, +	 * this maintains functionality for boot and/or troubleshooting. +	 */ +	if (rt_b == &root_task_group.rt_bandwidth) +		span = cpu_online_mask; +#endif  	for_each_cpu(i, span) {  		int enqueue = 0;  		struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c35a1a7dd4d6..f6714d009e77 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -80,7 +80,7 @@ extern struct mutex sched_domains_mutex;  struct cfs_rq;  struct rt_rq; -static LIST_HEAD(task_groups); +extern struct list_head task_groups;  struct cfs_bandwidth {  #ifdef CONFIG_CFS_BANDWIDTH @@ -374,7 +374,11 @@ struct rq {  #ifdef CONFIG_FAIR_GROUP_SCHED  	/* list of leaf cfs_rq on this cpu: */  	struct list_head leaf_cfs_rq_list; -#endif +#ifdef CONFIG_SMP +	unsigned long h_load_throttle; +#endif /* CONFIG_SMP */ +#endif /* CONFIG_FAIR_GROUP_SCHED */ +  #ifdef CONFIG_RT_GROUP_SCHED  	struct list_head leaf_rt_rq_list;  #endif diff --git a/kernel/sched/stop_task.c b/kernel/sched/stop_task.c index 7b386e86fd23..da5eb5bed84a 100644 --- a/kernel/sched/stop_task.c +++ b/kernel/sched/stop_task.c @@ -27,8 +27,10 @@ static struct task_struct *pick_next_task_stop(struct rq *rq)  {  	struct task_struct *stop = rq->stop; -	if (stop && stop->on_rq) +	if (stop && stop->on_rq) { +		stop->se.exec_start = rq->clock_task;  		return stop; +	}  	return NULL;  } @@ -52,6 +54,21 @@ static void yield_task_stop(struct rq *rq)  static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)  { +	struct task_struct *curr = rq->curr; +	u64 delta_exec; + +	delta_exec = rq->clock_task - curr->se.exec_start; +	if (unlikely((s64)delta_exec < 0)) +		delta_exec = 0; + +	schedstat_set(curr->se.statistics.exec_max, +			max(curr->se.statistics.exec_max, delta_exec)); + +	curr->se.sum_exec_runtime += delta_exec; +	account_group_exec_runtime(curr, delta_exec); + +	curr->se.exec_start = rq->clock_task; +	cpuacct_charge(curr, delta_exec);  }  static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued) @@ -60,6 +77,9 @@ static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued)  static void set_curr_task_stop(struct rq *rq)  { +	struct task_struct *stop = rq->stop; + +	stop->se.exec_start = rq->clock_task;  }  static void switched_to_stop(struct rq *rq, struct task_struct *p) diff --git a/kernel/task_work.c b/kernel/task_work.c index 91d4e1742a0c..d320d44903bd 100644 --- a/kernel/task_work.c +++ b/kernel/task_work.c @@ -75,6 +75,7 @@ void task_work_run(void)  			p = q->next;  			q->func(q);  			q = p; +			cond_resched();  		}  	}  } diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index e16af197a2bc..0c1485e42be6 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -115,6 +115,7 @@ static void tk_xtime_add(struct timekeeper *tk, const struct timespec *ts)  {  	tk->xtime_sec += ts->tv_sec;  	tk->xtime_nsec += (u64)ts->tv_nsec << tk->shift; +	tk_normalize_xtime(tk);  }  static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec wtm) @@ -276,7 +277,7 @@ static void timekeeping_forward_now(struct timekeeper *tk)  	tk->xtime_nsec += cycle_delta * tk->mult;  	/* If arch requires, add in gettimeoffset() */ -	tk->xtime_nsec += arch_gettimeoffset() << tk->shift; +	tk->xtime_nsec += (u64)arch_gettimeoffset() << tk->shift;  	tk_normalize_xtime(tk); @@ -427,7 +428,7 @@ int do_settimeofday(const struct timespec *tv)  	struct timespec ts_delta, xt;  	unsigned long flags; -	if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) +	if (!timespec_valid(tv))  		return -EINVAL;  	write_seqlock_irqsave(&tk->lock, flags); @@ -463,6 +464,8 @@ int timekeeping_inject_offset(struct timespec *ts)  {  	struct timekeeper *tk = &timekeeper;  	unsigned long flags; +	struct timespec tmp; +	int ret = 0;  	if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC)  		return -EINVAL; @@ -471,10 +474,17 @@ int timekeeping_inject_offset(struct timespec *ts)  	timekeeping_forward_now(tk); +	/* Make sure the proposed value is valid */ +	tmp = timespec_add(tk_xtime(tk),  *ts); +	if (!timespec_valid(&tmp)) { +		ret = -EINVAL; +		goto error; +	}  	tk_xtime_add(tk, ts);  	tk_set_wall_to_mono(tk, timespec_sub(tk->wall_to_monotonic, *ts)); +error: /* even if we error out, we forwarded the time, so call update */  	timekeeping_update(tk, true);  	write_sequnlock_irqrestore(&tk->lock, flags); @@ -482,7 +492,7 @@ int timekeeping_inject_offset(struct timespec *ts)  	/* signal hrtimers about time change */  	clock_was_set(); -	return 0; +	return ret;  }  EXPORT_SYMBOL(timekeeping_inject_offset); @@ -649,7 +659,20 @@ void __init timekeeping_init(void)  	struct timespec now, boot, tmp;  	read_persistent_clock(&now); +	if (!timespec_valid(&now)) { +		pr_warn("WARNING: Persistent clock returned invalid value!\n" +			"         Check your CMOS/BIOS settings.\n"); +		now.tv_sec = 0; +		now.tv_nsec = 0; +	} +  	read_boot_clock(&boot); +	if (!timespec_valid(&boot)) { +		pr_warn("WARNING: Boot clock returned invalid value!\n" +			"         Check your CMOS/BIOS settings.\n"); +		boot.tv_sec = 0; +		boot.tv_nsec = 0; +	}  	seqlock_init(&tk->lock); @@ -1129,6 +1152,10 @@ static void update_wall_time(void)  	offset = (clock->read(clock) - clock->cycle_last) & clock->mask;  #endif +	/* Check if there's really nothing to do */ +	if (offset < tk->cycle_interval) +		goto out; +  	/*  	 * With NO_HZ we may have to accumulate many cycle_intervals  	 * (think "ticks") worth of time at once. To do this efficiently, @@ -1161,9 +1188,9 @@ static void update_wall_time(void)  	* the vsyscall implementations are converted to use xtime_nsec  	* (shifted nanoseconds), this can be killed.  	*/ -	remainder = tk->xtime_nsec & ((1 << tk->shift) - 1); +	remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1);  	tk->xtime_nsec -= remainder; -	tk->xtime_nsec += 1 << tk->shift; +	tk->xtime_nsec += 1ULL << tk->shift;  	tk->ntp_error += remainder << tk->ntp_error_shift;  	/* diff --git a/kernel/timer.c b/kernel/timer.c index a61c09374eba..8c5e7b908c68 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1407,13 +1407,6 @@ SYSCALL_DEFINE1(alarm, unsigned int, seconds)  #endif -#ifndef __alpha__ - -/* - * The Alpha uses getxpid, getxuid, and getxgid instead.  Maybe this - * should be moved into arch/i386 instead? - */ -  /**   * sys_getpid - return the thread group id of the current process   * @@ -1469,8 +1462,6 @@ SYSCALL_DEFINE0(getegid)  	return from_kgid_munged(current_user_ns(), current_egid());  } -#endif -  static void process_timeout(unsigned long __data)  {  	wake_up_process((struct task_struct *)__data); diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c index 60e4d7875672..6b245f64c8dd 100644 --- a/kernel/trace/trace_syscalls.c +++ b/kernel/trace/trace_syscalls.c @@ -506,6 +506,8 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)  	int size;  	syscall_nr = syscall_get_nr(current, regs); +	if (syscall_nr < 0) +		return;  	if (!test_bit(syscall_nr, enabled_perf_enter_syscalls))  		return; @@ -580,6 +582,8 @@ static void perf_syscall_exit(void *ignore, struct pt_regs *regs, long ret)  	int size;  	syscall_nr = syscall_get_nr(current, regs); +	if (syscall_nr < 0) +		return;  	if (!test_bit(syscall_nr, enabled_perf_exit_syscalls))  		return; |