diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/cgroup/cgroup-v1.c | 3 | ||||
| -rw-r--r-- | kernel/cgroup/cgroup.c | 39 | ||||
| -rw-r--r-- | kernel/exit.c | 4 | ||||
| -rw-r--r-- | kernel/fork.c | 2 | ||||
| -rw-r--r-- | kernel/pid.c | 10 | ||||
| -rw-r--r-- | kernel/power/snapshot.c | 2 | ||||
| -rw-r--r-- | kernel/sched/fair.c | 2 | ||||
| -rw-r--r-- | kernel/trace/blktrace.c | 117 | ||||
| -rw-r--r-- | kernel/trace/ftrace.c | 2 | ||||
| -rw-r--r-- | kernel/workqueue.c | 14 | 
10 files changed, 140 insertions, 55 deletions
| diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c index be1a1c83cdd1..f2d7cea86ffe 100644 --- a/kernel/cgroup/cgroup-v1.c +++ b/kernel/cgroup/cgroup-v1.c @@ -471,6 +471,7 @@ static void *cgroup_pidlist_next(struct seq_file *s, void *v, loff_t *pos)  	 */  	p++;  	if (p >= end) { +		(*pos)++;  		return NULL;  	} else {  		*pos = *p; @@ -782,7 +783,7 @@ void cgroup1_release_agent(struct work_struct *work)  	pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);  	agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL); -	if (!pathbuf || !agentbuf) +	if (!pathbuf || !agentbuf || !strlen(agentbuf))  		goto out;  	spin_lock_irq(&css_set_lock); diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 6b2fc56b2201..3dead0416b91 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -3542,21 +3542,21 @@ static int cpu_stat_show(struct seq_file *seq, void *v)  static int cgroup_io_pressure_show(struct seq_file *seq, void *v)  {  	struct cgroup *cgrp = seq_css(seq)->cgroup; -	struct psi_group *psi = cgroup_id(cgrp) == 1 ? &psi_system : &cgrp->psi; +	struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;  	return psi_show(seq, psi, PSI_IO);  }  static int cgroup_memory_pressure_show(struct seq_file *seq, void *v)  {  	struct cgroup *cgrp = seq_css(seq)->cgroup; -	struct psi_group *psi = cgroup_id(cgrp) == 1 ? &psi_system : &cgrp->psi; +	struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;  	return psi_show(seq, psi, PSI_MEM);  }  static int cgroup_cpu_pressure_show(struct seq_file *seq, void *v)  {  	struct cgroup *cgrp = seq_css(seq)->cgroup; -	struct psi_group *psi = cgroup_id(cgrp) == 1 ? &psi_system : &cgrp->psi; +	struct psi_group *psi = cgroup_ino(cgrp) == 1 ? &psi_system : &cgrp->psi;  	return psi_show(seq, psi, PSI_CPU);  } @@ -4400,12 +4400,16 @@ static void css_task_iter_advance_css_set(struct css_task_iter *it)  		}  	} while (!css_set_populated(cset) && list_empty(&cset->dying_tasks)); -	if (!list_empty(&cset->tasks)) +	if (!list_empty(&cset->tasks)) {  		it->task_pos = cset->tasks.next; -	else if (!list_empty(&cset->mg_tasks)) +		it->cur_tasks_head = &cset->tasks; +	} else if (!list_empty(&cset->mg_tasks)) {  		it->task_pos = cset->mg_tasks.next; -	else +		it->cur_tasks_head = &cset->mg_tasks; +	} else {  		it->task_pos = cset->dying_tasks.next; +		it->cur_tasks_head = &cset->dying_tasks; +	}  	it->tasks_head = &cset->tasks;  	it->mg_tasks_head = &cset->mg_tasks; @@ -4463,10 +4467,14 @@ repeat:  		else  			it->task_pos = it->task_pos->next; -		if (it->task_pos == it->tasks_head) +		if (it->task_pos == it->tasks_head) {  			it->task_pos = it->mg_tasks_head->next; -		if (it->task_pos == it->mg_tasks_head) +			it->cur_tasks_head = it->mg_tasks_head; +		} +		if (it->task_pos == it->mg_tasks_head) {  			it->task_pos = it->dying_tasks_head->next; +			it->cur_tasks_head = it->dying_tasks_head; +		}  		if (it->task_pos == it->dying_tasks_head)  			css_task_iter_advance_css_set(it);  	} else { @@ -4485,11 +4493,12 @@ repeat:  			goto repeat;  		/* and dying leaders w/o live member threads */ -		if (!atomic_read(&task->signal->live)) +		if (it->cur_tasks_head == it->dying_tasks_head && +		    !atomic_read(&task->signal->live))  			goto repeat;  	} else {  		/* skip all dying ones */ -		if (task->flags & PF_EXITING) +		if (it->cur_tasks_head == it->dying_tasks_head)  			goto repeat;  	}  } @@ -4595,6 +4604,9 @@ static void *cgroup_procs_next(struct seq_file *s, void *v, loff_t *pos)  	struct kernfs_open_file *of = s->private;  	struct css_task_iter *it = of->priv; +	if (pos) +		(*pos)++; +  	return css_task_iter_next(it);  } @@ -4610,7 +4622,7 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos,  	 * from position 0, so we can simply keep iterating on !0 *pos.  	 */  	if (!it) { -		if (WARN_ON_ONCE((*pos)++)) +		if (WARN_ON_ONCE((*pos)))  			return ERR_PTR(-EINVAL);  		it = kzalloc(sizeof(*it), GFP_KERNEL); @@ -4618,10 +4630,11 @@ static void *__cgroup_procs_start(struct seq_file *s, loff_t *pos,  			return ERR_PTR(-ENOMEM);  		of->priv = it;  		css_task_iter_start(&cgrp->self, iter_flags, it); -	} else if (!(*pos)++) { +	} else if (!(*pos)) {  		css_task_iter_end(it);  		css_task_iter_start(&cgrp->self, iter_flags, it); -	} +	} else +		return it->cur_task;  	return cgroup_procs_next(s, NULL, NULL);  } diff --git a/kernel/exit.c b/kernel/exit.c index 2833ffb0c211..0b81b26a872a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -619,8 +619,8 @@ static void forget_original_parent(struct task_struct *father,  	reaper = find_new_reaper(father, reaper);  	list_for_each_entry(p, &father->children, sibling) {  		for_each_thread(p, t) { -			t->real_parent = reaper; -			BUG_ON((!t->ptrace) != (t->parent == father)); +			RCU_INIT_POINTER(t->real_parent, reaper); +			BUG_ON((!t->ptrace) != (rcu_access_pointer(t->parent) == father));  			if (likely(!t->ptrace))  				t->parent = t->real_parent;  			if (t->pdeath_signal) diff --git a/kernel/fork.c b/kernel/fork.c index 60a1295f4384..86425305cd4a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1508,7 +1508,7 @@ static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)  		return 0;  	}  	sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL); -	rcu_assign_pointer(tsk->sighand, sig); +	RCU_INIT_POINTER(tsk->sighand, sig);  	if (!sig)  		return -ENOMEM; diff --git a/kernel/pid.c b/kernel/pid.c index 0f4ecb57214c..647b4bb457b5 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -247,6 +247,16 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,  		tmp = tmp->parent;  	} +	/* +	 * ENOMEM is not the most obvious choice especially for the case +	 * where the child subreaper has already exited and the pid +	 * namespace denies the creation of any new processes. But ENOMEM +	 * is what we have exposed to userspace for a long time and it is +	 * documented behavior for pid namespaces. So we can't easily +	 * change it even if there were an error code better suited. +	 */ +	retval = -ENOMEM; +  	if (unlikely(is_child_reaper(pid))) {  		if (pid_ns_prepare_proc(ns))  			goto out_free; diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index ddade80ad276..d82b7b88d616 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -1681,7 +1681,7 @@ static unsigned long minimum_image_size(unsigned long saveable)   * hibernation for allocations made while saving the image and for device   * drivers, in case they need to allocate memory from their hibernation   * callbacks (these two numbers are given by PAGES_FOR_IO (which is a rough - * estimate) and reserverd_size divided by PAGE_SIZE (which is tunable through + * estimate) and reserved_size divided by PAGE_SIZE (which is tunable through   * /sys/power/reserved_size, respectively).  To make this happen, we compute the   * total number of available page frames and allocate at least   * diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3c8a379c357e..c1217bfe5e81 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -8337,6 +8337,8 @@ static inline void update_sg_wakeup_stats(struct sched_domain *sd,  	sgs->group_capacity = group->sgc->capacity; +	sgs->group_weight = group->group_weight; +  	sgs->group_type = group_classify(sd->imbalance_pct, group, sgs);  	/* diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 0735ae8545d8..ca39dc3230cb 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -335,6 +335,7 @@ static void put_probe_ref(void)  static void blk_trace_cleanup(struct blk_trace *bt)  { +	synchronize_rcu();  	blk_trace_free(bt);  	put_probe_ref();  } @@ -629,8 +630,10 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,  static int __blk_trace_startstop(struct request_queue *q, int start)  {  	int ret; -	struct blk_trace *bt = q->blk_trace; +	struct blk_trace *bt; +	bt = rcu_dereference_protected(q->blk_trace, +				       lockdep_is_held(&q->blk_trace_mutex));  	if (bt == NULL)  		return -EINVAL; @@ -740,8 +743,8 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)  void blk_trace_shutdown(struct request_queue *q)  {  	mutex_lock(&q->blk_trace_mutex); - -	if (q->blk_trace) { +	if (rcu_dereference_protected(q->blk_trace, +				      lockdep_is_held(&q->blk_trace_mutex))) {  		__blk_trace_startstop(q, 0);  		__blk_trace_remove(q);  	} @@ -752,8 +755,10 @@ void blk_trace_shutdown(struct request_queue *q)  #ifdef CONFIG_BLK_CGROUP  static u64 blk_trace_bio_get_cgid(struct request_queue *q, struct bio *bio)  { -	struct blk_trace *bt = q->blk_trace; +	struct blk_trace *bt; +	/* We don't use the 'bt' value here except as an optimization... */ +	bt = rcu_dereference_protected(q->blk_trace, 1);  	if (!bt || !(blk_tracer_flags.val & TRACE_BLK_OPT_CGROUP))  		return 0; @@ -796,10 +801,14 @@ blk_trace_request_get_cgid(struct request_queue *q, struct request *rq)  static void blk_add_trace_rq(struct request *rq, int error,  			     unsigned int nr_bytes, u32 what, u64 cgid)  { -	struct blk_trace *bt = rq->q->blk_trace; +	struct blk_trace *bt; -	if (likely(!bt)) +	rcu_read_lock(); +	bt = rcu_dereference(rq->q->blk_trace); +	if (likely(!bt)) { +		rcu_read_unlock();  		return; +	}  	if (blk_rq_is_passthrough(rq))  		what |= BLK_TC_ACT(BLK_TC_PC); @@ -808,6 +817,7 @@ static void blk_add_trace_rq(struct request *rq, int error,  	__blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, req_op(rq),  			rq->cmd_flags, what, error, 0, NULL, cgid); +	rcu_read_unlock();  }  static void blk_add_trace_rq_insert(void *ignore, @@ -853,14 +863,19 @@ static void blk_add_trace_rq_complete(void *ignore, struct request *rq,  static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,  			      u32 what, int error)  { -	struct blk_trace *bt = q->blk_trace; +	struct blk_trace *bt; -	if (likely(!bt)) +	rcu_read_lock(); +	bt = rcu_dereference(q->blk_trace); +	if (likely(!bt)) { +		rcu_read_unlock();  		return; +	}  	__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,  			bio_op(bio), bio->bi_opf, what, error, 0, NULL,  			blk_trace_bio_get_cgid(q, bio)); +	rcu_read_unlock();  }  static void blk_add_trace_bio_bounce(void *ignore, @@ -905,11 +920,14 @@ static void blk_add_trace_getrq(void *ignore,  	if (bio)  		blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0);  	else { -		struct blk_trace *bt = q->blk_trace; +		struct blk_trace *bt; +		rcu_read_lock(); +		bt = rcu_dereference(q->blk_trace);  		if (bt)  			__blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_GETRQ, 0, 0,  					NULL, 0); +		rcu_read_unlock();  	}  } @@ -921,27 +939,35 @@ static void blk_add_trace_sleeprq(void *ignore,  	if (bio)  		blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0);  	else { -		struct blk_trace *bt = q->blk_trace; +		struct blk_trace *bt; +		rcu_read_lock(); +		bt = rcu_dereference(q->blk_trace);  		if (bt)  			__blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_SLEEPRQ,  					0, 0, NULL, 0); +		rcu_read_unlock();  	}  }  static void blk_add_trace_plug(void *ignore, struct request_queue *q)  { -	struct blk_trace *bt = q->blk_trace; +	struct blk_trace *bt; +	rcu_read_lock(); +	bt = rcu_dereference(q->blk_trace);  	if (bt)  		__blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, 0); +	rcu_read_unlock();  }  static void blk_add_trace_unplug(void *ignore, struct request_queue *q,  				    unsigned int depth, bool explicit)  { -	struct blk_trace *bt = q->blk_trace; +	struct blk_trace *bt; +	rcu_read_lock(); +	bt = rcu_dereference(q->blk_trace);  	if (bt) {  		__be64 rpdu = cpu_to_be64(depth);  		u32 what; @@ -953,14 +979,17 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q,  		__blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, 0);  	} +	rcu_read_unlock();  }  static void blk_add_trace_split(void *ignore,  				struct request_queue *q, struct bio *bio,  				unsigned int pdu)  { -	struct blk_trace *bt = q->blk_trace; +	struct blk_trace *bt; +	rcu_read_lock(); +	bt = rcu_dereference(q->blk_trace);  	if (bt) {  		__be64 rpdu = cpu_to_be64(pdu); @@ -969,6 +998,7 @@ static void blk_add_trace_split(void *ignore,  				BLK_TA_SPLIT, bio->bi_status, sizeof(rpdu),  				&rpdu, blk_trace_bio_get_cgid(q, bio));  	} +	rcu_read_unlock();  }  /** @@ -988,11 +1018,15 @@ static void blk_add_trace_bio_remap(void *ignore,  				    struct request_queue *q, struct bio *bio,  				    dev_t dev, sector_t from)  { -	struct blk_trace *bt = q->blk_trace; +	struct blk_trace *bt;  	struct blk_io_trace_remap r; -	if (likely(!bt)) +	rcu_read_lock(); +	bt = rcu_dereference(q->blk_trace); +	if (likely(!bt)) { +		rcu_read_unlock();  		return; +	}  	r.device_from = cpu_to_be32(dev);  	r.device_to   = cpu_to_be32(bio_dev(bio)); @@ -1001,6 +1035,7 @@ static void blk_add_trace_bio_remap(void *ignore,  	__blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,  			bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_status,  			sizeof(r), &r, blk_trace_bio_get_cgid(q, bio)); +	rcu_read_unlock();  }  /** @@ -1021,11 +1056,15 @@ static void blk_add_trace_rq_remap(void *ignore,  				   struct request *rq, dev_t dev,  				   sector_t from)  { -	struct blk_trace *bt = q->blk_trace; +	struct blk_trace *bt;  	struct blk_io_trace_remap r; -	if (likely(!bt)) +	rcu_read_lock(); +	bt = rcu_dereference(q->blk_trace); +	if (likely(!bt)) { +		rcu_read_unlock();  		return; +	}  	r.device_from = cpu_to_be32(dev);  	r.device_to   = cpu_to_be32(disk_devt(rq->rq_disk)); @@ -1034,6 +1073,7 @@ static void blk_add_trace_rq_remap(void *ignore,  	__blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq),  			rq_data_dir(rq), 0, BLK_TA_REMAP, 0,  			sizeof(r), &r, blk_trace_request_get_cgid(q, rq)); +	rcu_read_unlock();  }  /** @@ -1051,14 +1091,19 @@ void blk_add_driver_data(struct request_queue *q,  			 struct request *rq,  			 void *data, size_t len)  { -	struct blk_trace *bt = q->blk_trace; +	struct blk_trace *bt; -	if (likely(!bt)) +	rcu_read_lock(); +	bt = rcu_dereference(q->blk_trace); +	if (likely(!bt)) { +		rcu_read_unlock();  		return; +	}  	__blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0,  				BLK_TA_DRV_DATA, 0, len, data,  				blk_trace_request_get_cgid(q, rq)); +	rcu_read_unlock();  }  EXPORT_SYMBOL_GPL(blk_add_driver_data); @@ -1597,6 +1642,7 @@ static int blk_trace_remove_queue(struct request_queue *q)  		return -EINVAL;  	put_probe_ref(); +	synchronize_rcu();  	blk_trace_free(bt);  	return 0;  } @@ -1758,6 +1804,7 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,  	struct hd_struct *p = dev_to_part(dev);  	struct request_queue *q;  	struct block_device *bdev; +	struct blk_trace *bt;  	ssize_t ret = -ENXIO;  	bdev = bdget(part_devt(p)); @@ -1770,21 +1817,23 @@ static ssize_t sysfs_blk_trace_attr_show(struct device *dev,  	mutex_lock(&q->blk_trace_mutex); +	bt = rcu_dereference_protected(q->blk_trace, +				       lockdep_is_held(&q->blk_trace_mutex));  	if (attr == &dev_attr_enable) { -		ret = sprintf(buf, "%u\n", !!q->blk_trace); +		ret = sprintf(buf, "%u\n", !!bt);  		goto out_unlock_bdev;  	} -	if (q->blk_trace == NULL) +	if (bt == NULL)  		ret = sprintf(buf, "disabled\n");  	else if (attr == &dev_attr_act_mask) -		ret = blk_trace_mask2str(buf, q->blk_trace->act_mask); +		ret = blk_trace_mask2str(buf, bt->act_mask);  	else if (attr == &dev_attr_pid) -		ret = sprintf(buf, "%u\n", q->blk_trace->pid); +		ret = sprintf(buf, "%u\n", bt->pid);  	else if (attr == &dev_attr_start_lba) -		ret = sprintf(buf, "%llu\n", q->blk_trace->start_lba); +		ret = sprintf(buf, "%llu\n", bt->start_lba);  	else if (attr == &dev_attr_end_lba) -		ret = sprintf(buf, "%llu\n", q->blk_trace->end_lba); +		ret = sprintf(buf, "%llu\n", bt->end_lba);  out_unlock_bdev:  	mutex_unlock(&q->blk_trace_mutex); @@ -1801,6 +1850,7 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,  	struct block_device *bdev;  	struct request_queue *q;  	struct hd_struct *p; +	struct blk_trace *bt;  	u64 value;  	ssize_t ret = -EINVAL; @@ -1831,8 +1881,10 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,  	mutex_lock(&q->blk_trace_mutex); +	bt = rcu_dereference_protected(q->blk_trace, +				       lockdep_is_held(&q->blk_trace_mutex));  	if (attr == &dev_attr_enable) { -		if (!!value == !!q->blk_trace) { +		if (!!value == !!bt) {  			ret = 0;  			goto out_unlock_bdev;  		} @@ -1844,18 +1896,21 @@ static ssize_t sysfs_blk_trace_attr_store(struct device *dev,  	}  	ret = 0; -	if (q->blk_trace == NULL) +	if (bt == NULL) {  		ret = blk_trace_setup_queue(q, bdev); +		bt = rcu_dereference_protected(q->blk_trace, +				lockdep_is_held(&q->blk_trace_mutex)); +	}  	if (ret == 0) {  		if (attr == &dev_attr_act_mask) -			q->blk_trace->act_mask = value; +			bt->act_mask = value;  		else if (attr == &dev_attr_pid) -			q->blk_trace->pid = value; +			bt->pid = value;  		else if (attr == &dev_attr_start_lba) -			q->blk_trace->start_lba = value; +			bt->start_lba = value;  		else if (attr == &dev_attr_end_lba) -			q->blk_trace->end_lba = value; +			bt->end_lba = value;  	}  out_unlock_bdev: diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 3f7ee102868a..fd81c7de77a7 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1547,6 +1547,8 @@ static struct dyn_ftrace *lookup_rec(unsigned long start, unsigned long end)  		rec = bsearch(&key, pg->records, pg->index,  			      sizeof(struct dyn_ftrace),  			      ftrace_cmp_recs); +		if (rec) +			break;  	}  	return rec;  } diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 301db4406bc3..4e01c448b4b4 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1411,14 +1411,16 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,  		return;  	rcu_read_lock();  retry: -	if (req_cpu == WORK_CPU_UNBOUND) -		cpu = wq_select_unbound_cpu(raw_smp_processor_id()); -  	/* pwq which will be used unless @work is executing elsewhere */ -	if (!(wq->flags & WQ_UNBOUND)) -		pwq = per_cpu_ptr(wq->cpu_pwqs, cpu); -	else +	if (wq->flags & WQ_UNBOUND) { +		if (req_cpu == WORK_CPU_UNBOUND) +			cpu = wq_select_unbound_cpu(raw_smp_processor_id());  		pwq = unbound_pwq_by_node(wq, cpu_to_node(cpu)); +	} else { +		if (req_cpu == WORK_CPU_UNBOUND) +			cpu = raw_smp_processor_id(); +		pwq = per_cpu_ptr(wq->cpu_pwqs, cpu); +	}  	/*  	 * If @work was previously on a different pool, it might still be |