diff options
Diffstat (limited to 'kernel')
30 files changed, 342 insertions, 269 deletions
| diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 14750e7c5ee4..027107f4be53 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -476,7 +476,7 @@ static u32 prog_fd_array_sys_lookup_elem(void *ptr)  }  /* decrement refcnt of all bpf_progs that are stored in this map */ -void bpf_fd_array_map_clear(struct bpf_map *map) +static void bpf_fd_array_map_clear(struct bpf_map *map)  {  	struct bpf_array *array = container_of(map, struct bpf_array, map);  	int i; @@ -495,6 +495,7 @@ const struct bpf_map_ops prog_array_map_ops = {  	.map_fd_get_ptr = prog_fd_array_get_ptr,  	.map_fd_put_ptr = prog_fd_array_put_ptr,  	.map_fd_sys_lookup_elem = prog_fd_array_sys_lookup_elem, +	.map_release_uref = bpf_fd_array_map_clear,  };  static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file, diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index d315b393abdd..ba03ec39efb3 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1572,13 +1572,32 @@ int bpf_prog_array_length(struct bpf_prog_array __rcu *progs)  	return cnt;  } +static bool bpf_prog_array_copy_core(struct bpf_prog **prog, +				     u32 *prog_ids, +				     u32 request_cnt) +{ +	int i = 0; + +	for (; *prog; prog++) { +		if (*prog == &dummy_bpf_prog.prog) +			continue; +		prog_ids[i] = (*prog)->aux->id; +		if (++i == request_cnt) { +			prog++; +			break; +		} +	} + +	return !!(*prog); +} +  int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,  				__u32 __user *prog_ids, u32 cnt)  {  	struct bpf_prog **prog;  	unsigned long err = 0; -	u32 i = 0, *ids;  	bool nospc; +	u32 *ids;  	/* users of this function are doing:  	 * cnt = bpf_prog_array_length(); @@ -1595,16 +1614,7 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs,  		return -ENOMEM;  	rcu_read_lock();  	prog = rcu_dereference(progs)->progs; -	for (; *prog; prog++) { -		if (*prog == &dummy_bpf_prog.prog) -			continue; -		ids[i] = (*prog)->aux->id; -		if (++i == cnt) { -			prog++; -			break; -		} -	} -	nospc = !!(*prog); +	nospc = bpf_prog_array_copy_core(prog, ids, cnt);  	rcu_read_unlock();  	err = copy_to_user(prog_ids, ids, cnt * sizeof(u32));  	kfree(ids); @@ -1683,22 +1693,25 @@ int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array,  }  int bpf_prog_array_copy_info(struct bpf_prog_array __rcu *array, -			     __u32 __user *prog_ids, u32 request_cnt, -			     __u32 __user *prog_cnt) +			     u32 *prog_ids, u32 request_cnt, +			     u32 *prog_cnt)  { +	struct bpf_prog **prog;  	u32 cnt = 0;  	if (array)  		cnt = bpf_prog_array_length(array); -	if (copy_to_user(prog_cnt, &cnt, sizeof(cnt))) -		return -EFAULT; +	*prog_cnt = cnt;  	/* return early if user requested only program count or nothing to copy */  	if (!request_cnt || !cnt)  		return 0; -	return bpf_prog_array_copy_to_user(array, prog_ids, request_cnt); +	/* this function is called under trace/bpf_trace.c: bpf_event_mutex */ +	prog = rcu_dereference_check(array, 1)->progs; +	return bpf_prog_array_copy_core(prog, prog_ids, request_cnt) ? -ENOSPC +								     : 0;  }  static void bpf_prog_free_deferred(struct work_struct *work) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 8dd9210d7db7..098eca568c2b 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -43,6 +43,7 @@  #include <net/tcp.h>  #include <linux/ptr_ring.h>  #include <net/inet_common.h> +#include <linux/sched/signal.h>  #define SOCK_CREATE_FLAG_MASK \  	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) @@ -325,6 +326,9 @@ retry:  			if (ret > 0) {  				if (apply)  					apply_bytes -= ret; + +				sg->offset += ret; +				sg->length -= ret;  				size -= ret;  				offset += ret;  				if (uncharge) @@ -332,8 +336,6 @@ retry:  				goto retry;  			} -			sg->length = size; -			sg->offset = offset;  			return ret;  		} @@ -391,7 +393,8 @@ static void return_mem_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)  	} while (i != md->sg_end);  } -static void free_bytes_sg(struct sock *sk, int bytes, struct sk_msg_buff *md) +static void free_bytes_sg(struct sock *sk, int bytes, +			  struct sk_msg_buff *md, bool charge)  {  	struct scatterlist *sg = md->sg_data;  	int i = md->sg_start, free; @@ -401,11 +404,13 @@ static void free_bytes_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)  		if (bytes < free) {  			sg[i].length -= bytes;  			sg[i].offset += bytes; -			sk_mem_uncharge(sk, bytes); +			if (charge) +				sk_mem_uncharge(sk, bytes);  			break;  		} -		sk_mem_uncharge(sk, sg[i].length); +		if (charge) +			sk_mem_uncharge(sk, sg[i].length);  		put_page(sg_page(&sg[i]));  		bytes -= sg[i].length;  		sg[i].length = 0; @@ -416,6 +421,7 @@ static void free_bytes_sg(struct sock *sk, int bytes, struct sk_msg_buff *md)  		if (i == MAX_SKB_FRAGS)  			i = 0;  	} +	md->sg_start = i;  }  static int free_sg(struct sock *sk, int start, struct sk_msg_buff *md) @@ -523,8 +529,6 @@ static int bpf_tcp_ingress(struct sock *sk, int apply_bytes,  	i = md->sg_start;  	do { -		r->sg_data[i] = md->sg_data[i]; -  		size = (apply && apply_bytes < md->sg_data[i].length) ?  			apply_bytes : md->sg_data[i].length; @@ -535,6 +539,7 @@ static int bpf_tcp_ingress(struct sock *sk, int apply_bytes,  		}  		sk_mem_charge(sk, size); +		r->sg_data[i] = md->sg_data[i];  		r->sg_data[i].length = size;  		md->sg_data[i].length -= size;  		md->sg_data[i].offset += size; @@ -575,10 +580,10 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,  				       struct sk_msg_buff *md,  				       int flags)  { +	bool ingress = !!(md->flags & BPF_F_INGRESS);  	struct smap_psock *psock;  	struct scatterlist *sg; -	int i, err, free = 0; -	bool ingress = !!(md->flags & BPF_F_INGRESS); +	int err = 0;  	sg = md->sg_data; @@ -606,16 +611,8 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,  out_rcu:  	rcu_read_unlock();  out: -	i = md->sg_start; -	while (sg[i].length) { -		free += sg[i].length; -		put_page(sg_page(&sg[i])); -		sg[i].length = 0; -		i++; -		if (i == MAX_SKB_FRAGS) -			i = 0; -	} -	return free; +	free_bytes_sg(NULL, send, md, false); +	return err;  }  static inline void bpf_md_init(struct smap_psock *psock) @@ -700,19 +697,26 @@ more_data:  		err = bpf_tcp_sendmsg_do_redirect(redir, send, m, flags);  		lock_sock(sk); +		if (unlikely(err < 0)) { +			free_start_sg(sk, m); +			psock->sg_size = 0; +			if (!cork) +				*copied -= send; +		} else { +			psock->sg_size -= send; +		} +  		if (cork) {  			free_start_sg(sk, m); +			psock->sg_size = 0;  			kfree(m);  			m = NULL; +			err = 0;  		} -		if (unlikely(err)) -			*copied -= err; -		else -			psock->sg_size -= send;  		break;  	case __SK_DROP:  	default: -		free_bytes_sg(sk, send, m); +		free_bytes_sg(sk, send, m, true);  		apply_bytes_dec(psock, send);  		*copied -= send;  		psock->sg_size -= send; @@ -732,6 +736,26 @@ out_err:  	return err;  } +static int bpf_wait_data(struct sock *sk, +			 struct smap_psock *psk, int flags, +			 long timeo, int *err) +{ +	int rc; + +	DEFINE_WAIT_FUNC(wait, woken_wake_function); + +	add_wait_queue(sk_sleep(sk), &wait); +	sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); +	rc = sk_wait_event(sk, &timeo, +			   !list_empty(&psk->ingress) || +			   !skb_queue_empty(&sk->sk_receive_queue), +			   &wait); +	sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); +	remove_wait_queue(sk_sleep(sk), &wait); + +	return rc; +} +  static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,  			   int nonblock, int flags, int *addr_len)  { @@ -755,6 +779,7 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,  		return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);  	lock_sock(sk); +bytes_ready:  	while (copied != len) {  		struct scatterlist *sg;  		struct sk_msg_buff *md; @@ -809,6 +834,28 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,  		}  	} +	if (!copied) { +		long timeo; +		int data; +		int err = 0; + +		timeo = sock_rcvtimeo(sk, nonblock); +		data = bpf_wait_data(sk, psock, flags, timeo, &err); + +		if (data) { +			if (!skb_queue_empty(&sk->sk_receive_queue)) { +				release_sock(sk); +				smap_release_sock(psock, sk); +				copied = tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); +				return copied; +			} +			goto bytes_ready; +		} + +		if (err) +			copied = err; +	} +  	release_sock(sk);  	smap_release_sock(psock, sk);  	return copied; @@ -1442,9 +1489,6 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr)  	    attr->value_size != 4 || attr->map_flags & ~SOCK_CREATE_FLAG_MASK)  		return ERR_PTR(-EINVAL); -	if (attr->value_size > KMALLOC_MAX_SIZE) -		return ERR_PTR(-E2BIG); -  	err = bpf_tcp_ulp_register();  	if (err && err != -EEXIST)  		return ERR_PTR(err); @@ -1834,7 +1878,7 @@ static int sock_map_update_elem(struct bpf_map *map,  	return err;  } -static void sock_map_release(struct bpf_map *map, struct file *map_file) +static void sock_map_release(struct bpf_map *map)  {  	struct bpf_stab *stab = container_of(map, struct bpf_stab, map);  	struct bpf_prog *orig; @@ -1858,7 +1902,7 @@ const struct bpf_map_ops sock_map_ops = {  	.map_get_next_key = sock_map_get_next_key,  	.map_update_elem = sock_map_update_elem,  	.map_delete_elem = sock_map_delete_elem, -	.map_release = sock_map_release, +	.map_release_uref = sock_map_release,  };  BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock, diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 4ca46df19c9a..ebfe9f29dae8 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -257,8 +257,8 @@ static void bpf_map_free_deferred(struct work_struct *work)  static void bpf_map_put_uref(struct bpf_map *map)  {  	if (atomic_dec_and_test(&map->usercnt)) { -		if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY) -			bpf_fd_array_map_clear(map); +		if (map->ops->map_release_uref) +			map->ops->map_release_uref(map);  	}  } diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index 772a43fea825..c187aa3df3c8 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -119,23 +119,20 @@ int get_callchain_buffers(int event_max_stack)  		goto exit;  	} -	if (count > 1) { -		/* If the allocation failed, give up */ -		if (!callchain_cpus_entries) -			err = -ENOMEM; -		/* -		 * If requesting per event more than the global cap, -		 * return a different error to help userspace figure -		 * this out. -		 * -		 * And also do it here so that we have &callchain_mutex held. -		 */ -		if (event_max_stack > sysctl_perf_event_max_stack) -			err = -EOVERFLOW; +	/* +	 * If requesting per event more than the global cap, +	 * return a different error to help userspace figure +	 * this out. +	 * +	 * And also do it here so that we have &callchain_mutex held. +	 */ +	if (event_max_stack > sysctl_perf_event_max_stack) { +		err = -EOVERFLOW;  		goto exit;  	} -	err = alloc_callchain_buffers(); +	if (count == 1) +		err = alloc_callchain_buffers();  exit:  	if (err)  		atomic_dec(&nr_callchain_events); diff --git a/kernel/events/core.c b/kernel/events/core.c index 2d5fe26551f8..67612ce359ad 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7587,6 +7587,10 @@ static void perf_event_switch(struct task_struct *task,  		},  	}; +	if (!sched_in && task->state == TASK_RUNNING) +		switch_event.event_id.header.misc |= +				PERF_RECORD_MISC_SWITCH_OUT_PREEMPT; +  	perf_iterate_sb(perf_event_switch_output,  		       &switch_event,  		       NULL); @@ -10205,9 +10209,9 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,  		 * __u16 sample size limit.  		 */  		if (attr->sample_stack_user >= USHRT_MAX) -			ret = -EINVAL; +			return -EINVAL;  		else if (!IS_ALIGNED(attr->sample_stack_user, sizeof(u64))) -			ret = -EINVAL; +			return -EINVAL;  	}  	if (!attr->sample_max_stack) diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index ce6848e46e94..1725b902983f 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -491,7 +491,7 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)  	if (!uprobe)  		return NULL; -	uprobe->inode = igrab(inode); +	uprobe->inode = inode;  	uprobe->offset = offset;  	init_rwsem(&uprobe->register_rwsem);  	init_rwsem(&uprobe->consumer_rwsem); @@ -502,7 +502,6 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset)  	if (cur_uprobe) {  		kfree(uprobe);  		uprobe = cur_uprobe; -		iput(inode);  	}  	return uprobe; @@ -701,7 +700,6 @@ static void delete_uprobe(struct uprobe *uprobe)  	rb_erase(&uprobe->rb_node, &uprobes_tree);  	spin_unlock(&uprobes_treelock);  	RB_CLEAR_NODE(&uprobe->rb_node); /* for uprobe_is_active() */ -	iput(uprobe->inode);  	put_uprobe(uprobe);  } @@ -873,7 +871,8 @@ static void __uprobe_unregister(struct uprobe *uprobe, struct uprobe_consumer *u   * tuple).  Creation refcount stops uprobe_unregister from freeing the   * @uprobe even before the register operation is complete. Creation   * refcount is released when the last @uc for the @uprobe - * unregisters. + * unregisters. Caller of uprobe_register() is required to keep @inode + * (and the containing mount) referenced.   *   * Return errno if it cannot successully install probes   * else return 0 (success) diff --git a/kernel/fork.c b/kernel/fork.c index 242c8c93d285..a5d21c42acfc 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -216,10 +216,9 @@ static unsigned long *alloc_thread_stack_node(struct task_struct *tsk, int node)  		if (!s)  			continue; -#ifdef CONFIG_DEBUG_KMEMLEAK  		/* Clear stale pointers from reused stack. */  		memset(s->addr, 0, THREAD_SIZE); -#endif +  		tsk->stack_vm_area = s;  		return s->addr;  	} diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 102160ff5c66..ea619021d901 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2428,7 +2428,7 @@ static int kprobe_blacklist_seq_show(struct seq_file *m, void *v)  	struct kprobe_blacklist_entry *ent =  		list_entry(v, struct kprobe_blacklist_entry, list); -	seq_printf(m, "0x%p-0x%p\t%ps\n", (void *)ent->start_addr, +	seq_printf(m, "0x%px-0x%px\t%ps\n", (void *)ent->start_addr,  		   (void *)ent->end_addr, (void *)ent->start_addr);  	return 0;  } diff --git a/kernel/livepatch/shadow.c b/kernel/livepatch/shadow.c index fdac27588d60..83958c814439 100644 --- a/kernel/livepatch/shadow.c +++ b/kernel/livepatch/shadow.c @@ -113,8 +113,10 @@ void *klp_shadow_get(void *obj, unsigned long id)  }  EXPORT_SYMBOL_GPL(klp_shadow_get); -static void *__klp_shadow_get_or_alloc(void *obj, unsigned long id, void *data, -		       size_t size, gfp_t gfp_flags, bool warn_on_exist) +static void *__klp_shadow_get_or_alloc(void *obj, unsigned long id, +				       size_t size, gfp_t gfp_flags, +				       klp_shadow_ctor_t ctor, void *ctor_data, +				       bool warn_on_exist)  {  	struct klp_shadow *new_shadow;  	void *shadow_data; @@ -125,18 +127,15 @@ static void *__klp_shadow_get_or_alloc(void *obj, unsigned long id, void *data,  	if (shadow_data)  		goto exists; -	/* Allocate a new shadow variable for use inside the lock below */ +	/* +	 * Allocate a new shadow variable.  Fill it with zeroes by default. +	 * More complex setting can be done by @ctor function.  But it is +	 * called only when the buffer is really used (under klp_shadow_lock). +	 */  	new_shadow = kzalloc(size + sizeof(*new_shadow), gfp_flags);  	if (!new_shadow)  		return NULL; -	new_shadow->obj = obj; -	new_shadow->id = id; - -	/* Initialize the shadow variable if data provided */ -	if (data) -		memcpy(new_shadow->data, data, size); -  	/* Look for <obj, id> again under the lock */  	spin_lock_irqsave(&klp_shadow_lock, flags);  	shadow_data = klp_shadow_get(obj, id); @@ -150,6 +149,22 @@ static void *__klp_shadow_get_or_alloc(void *obj, unsigned long id, void *data,  		goto exists;  	} +	new_shadow->obj = obj; +	new_shadow->id = id; + +	if (ctor) { +		int err; + +		err = ctor(obj, new_shadow->data, ctor_data); +		if (err) { +			spin_unlock_irqrestore(&klp_shadow_lock, flags); +			kfree(new_shadow); +			pr_err("Failed to construct shadow variable <%p, %lx> (%d)\n", +			       obj, id, err); +			return NULL; +		} +	} +  	/* No <obj, id> found, so attach the newly allocated one */  	hash_add_rcu(klp_shadow_hash, &new_shadow->node,  		     (unsigned long)new_shadow->obj); @@ -170,26 +185,32 @@ exists:   * klp_shadow_alloc() - allocate and add a new shadow variable   * @obj:	pointer to parent object   * @id:		data identifier - * @data:	pointer to data to attach to parent   * @size:	size of attached data   * @gfp_flags:	GFP mask for allocation + * @ctor:	custom constructor to initialize the shadow data (optional) + * @ctor_data:	pointer to any data needed by @ctor (optional) + * + * Allocates @size bytes for new shadow variable data using @gfp_flags. + * The data are zeroed by default.  They are further initialized by @ctor + * function if it is not NULL.  The new shadow variable is then added + * to the global hashtable.   * - * Allocates @size bytes for new shadow variable data using @gfp_flags - * and copies @size bytes from @data into the new shadow variable's own - * data space.  If @data is NULL, @size bytes are still allocated, but - * no copy is performed.  The new shadow variable is then added to the - * global hashtable. + * If an existing <obj, id> shadow variable can be found, this routine will + * issue a WARN, exit early and return NULL.   * - * If an existing <obj, id> shadow variable can be found, this routine - * will issue a WARN, exit early and return NULL. + * This function guarantees that the constructor function is called only when + * the variable did not exist before.  The cost is that @ctor is called + * in atomic context under a spin lock.   *   * Return: the shadow variable data element, NULL on duplicate or   * failure.   */ -void *klp_shadow_alloc(void *obj, unsigned long id, void *data, -		       size_t size, gfp_t gfp_flags) +void *klp_shadow_alloc(void *obj, unsigned long id, +		       size_t size, gfp_t gfp_flags, +		       klp_shadow_ctor_t ctor, void *ctor_data)  { -	return __klp_shadow_get_or_alloc(obj, id, data, size, gfp_flags, true); +	return __klp_shadow_get_or_alloc(obj, id, size, gfp_flags, +					 ctor, ctor_data, true);  }  EXPORT_SYMBOL_GPL(klp_shadow_alloc); @@ -197,37 +218,51 @@ EXPORT_SYMBOL_GPL(klp_shadow_alloc);   * klp_shadow_get_or_alloc() - get existing or allocate a new shadow variable   * @obj:	pointer to parent object   * @id:		data identifier - * @data:	pointer to data to attach to parent   * @size:	size of attached data   * @gfp_flags:	GFP mask for allocation + * @ctor:	custom constructor to initialize the shadow data (optional) + * @ctor_data:	pointer to any data needed by @ctor (optional)   *   * Returns a pointer to existing shadow data if an <obj, id> shadow   * variable is already present.  Otherwise, it creates a new shadow   * variable like klp_shadow_alloc().   * - * This function guarantees that only one shadow variable exists with - * the given @id for the given @obj.  It also guarantees that the shadow - * variable will be initialized by the given @data only when it did not - * exist before. + * This function guarantees that only one shadow variable exists with the given + * @id for the given @obj.  It also guarantees that the constructor function + * will be called only when the variable did not exist before.  The cost is + * that @ctor is called in atomic context under a spin lock.   *   * Return: the shadow variable data element, NULL on failure.   */ -void *klp_shadow_get_or_alloc(void *obj, unsigned long id, void *data, -			       size_t size, gfp_t gfp_flags) +void *klp_shadow_get_or_alloc(void *obj, unsigned long id, +			      size_t size, gfp_t gfp_flags, +			      klp_shadow_ctor_t ctor, void *ctor_data)  { -	return __klp_shadow_get_or_alloc(obj, id, data, size, gfp_flags, false); +	return __klp_shadow_get_or_alloc(obj, id, size, gfp_flags, +					 ctor, ctor_data, false);  }  EXPORT_SYMBOL_GPL(klp_shadow_get_or_alloc); +static void klp_shadow_free_struct(struct klp_shadow *shadow, +				   klp_shadow_dtor_t dtor) +{ +	hash_del_rcu(&shadow->node); +	if (dtor) +		dtor(shadow->obj, shadow->data); +	kfree_rcu(shadow, rcu_head); +} +  /**   * klp_shadow_free() - detach and free a <obj, id> shadow variable   * @obj:	pointer to parent object   * @id:		data identifier + * @dtor:	custom callback that can be used to unregister the variable + *		and/or free data that the shadow variable points to (optional)   *   * This function releases the memory for this <obj, id> shadow variable   * instance, callers should stop referencing it accordingly.   */ -void klp_shadow_free(void *obj, unsigned long id) +void klp_shadow_free(void *obj, unsigned long id, klp_shadow_dtor_t dtor)  {  	struct klp_shadow *shadow;  	unsigned long flags; @@ -239,8 +274,7 @@ void klp_shadow_free(void *obj, unsigned long id)  			       (unsigned long)obj) {  		if (klp_shadow_match(shadow, obj, id)) { -			hash_del_rcu(&shadow->node); -			kfree_rcu(shadow, rcu_head); +			klp_shadow_free_struct(shadow, dtor);  			break;  		}  	} @@ -252,11 +286,13 @@ EXPORT_SYMBOL_GPL(klp_shadow_free);  /**   * klp_shadow_free_all() - detach and free all <*, id> shadow variables   * @id:		data identifier + * @dtor:	custom callback that can be used to unregister the variable + *		and/or free data that the shadow variable points to (optional)   *   * This function releases the memory for all <*, id> shadow variable   * instances, callers should stop referencing them accordingly.   */ -void klp_shadow_free_all(unsigned long id) +void klp_shadow_free_all(unsigned long id, klp_shadow_dtor_t dtor)  {  	struct klp_shadow *shadow;  	unsigned long flags; @@ -266,10 +302,8 @@ void klp_shadow_free_all(unsigned long id)  	/* Delete all <*, id> from hash */  	hash_for_each(klp_shadow_hash, i, shadow, node) { -		if (klp_shadow_match(shadow, shadow->obj, id)) { -			hash_del_rcu(&shadow->node); -			kfree_rcu(shadow, rcu_head); -		} +		if (klp_shadow_match(shadow, shadow->obj, id)) +			klp_shadow_free_struct(shadow, dtor);  	}  	spin_unlock_irqrestore(&klp_shadow_lock, flags); diff --git a/kernel/module.c b/kernel/module.c index a6e43a5806a1..ce8066b88178 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -1472,7 +1472,8 @@ static ssize_t module_sect_show(struct module_attribute *mattr,  {  	struct module_sect_attr *sattr =  		container_of(mattr, struct module_sect_attr, mattr); -	return sprintf(buf, "0x%pK\n", (void *)sattr->address); +	return sprintf(buf, "0x%px\n", kptr_restrict < 2 ? +		       (void *)sattr->address : NULL);  }  static void free_sect_attrs(struct module_sect_attrs *sect_attrs) diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c index e8c0dab4fd65..07148b497451 100644 --- a/kernel/sysctl_binary.c +++ b/kernel/sysctl_binary.c @@ -704,24 +704,6 @@ static const struct bin_table bin_net_netfilter_table[] = {  	{}  }; -static const struct bin_table bin_net_irda_table[] = { -	{ CTL_INT,	NET_IRDA_DISCOVERY,		"discovery" }, -	{ CTL_STR,	NET_IRDA_DEVNAME,		"devname" }, -	{ CTL_INT,	NET_IRDA_DEBUG,			"debug" }, -	{ CTL_INT,	NET_IRDA_FAST_POLL,		"fast_poll_increase" }, -	{ CTL_INT,	NET_IRDA_DISCOVERY_SLOTS,	"discovery_slots" }, -	{ CTL_INT,	NET_IRDA_DISCOVERY_TIMEOUT,	"discovery_timeout" }, -	{ CTL_INT,	NET_IRDA_SLOT_TIMEOUT,		"slot_timeout" }, -	{ CTL_INT,	NET_IRDA_MAX_BAUD_RATE,		"max_baud_rate" }, -	{ CTL_INT,	NET_IRDA_MIN_TX_TURN_TIME,	"min_tx_turn_time" }, -	{ CTL_INT,	NET_IRDA_MAX_TX_DATA_SIZE,	"max_tx_data_size" }, -	{ CTL_INT,	NET_IRDA_MAX_TX_WINDOW,		"max_tx_window" }, -	{ CTL_INT,	NET_IRDA_MAX_NOREPLY_TIME,	"max_noreply_time" }, -	{ CTL_INT,	NET_IRDA_WARN_NOREPLY_TIME,	"warn_noreply_time" }, -	{ CTL_INT,	NET_IRDA_LAP_KEEPALIVE_TIME,	"lap_keepalive_time" }, -	{} -}; -  static const struct bin_table bin_net_table[] = {  	{ CTL_DIR,	NET_CORE,		"core",		bin_net_core_table },  	/* NET_ETHER not used */ @@ -743,7 +725,7 @@ static const struct bin_table bin_net_table[] = {  	{ CTL_DIR,	NET_LLC,		"llc",		bin_net_llc_table },  	{ CTL_DIR,	NET_NETFILTER,		"netfilter",	bin_net_netfilter_table },  	/* NET_DCCP "dccp" no longer used */ -	{ CTL_DIR,	NET_IRDA,		"irda",		bin_net_irda_table }, +	/* NET_IRDA "irda" no longer used */  	{ CTL_INT,	2089,			"nf_conntrack_max" },  	{}  }; diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index eda1210ce50f..14e858753d76 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -91,6 +91,11 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =  			.get_time = &ktime_get_real,  		},  		{ +			.index = HRTIMER_BASE_BOOTTIME, +			.clockid = CLOCK_BOOTTIME, +			.get_time = &ktime_get_boottime, +		}, +		{  			.index = HRTIMER_BASE_TAI,  			.clockid = CLOCK_TAI,  			.get_time = &ktime_get_clocktai, @@ -106,6 +111,11 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =  			.get_time = &ktime_get_real,  		},  		{ +			.index = HRTIMER_BASE_BOOTTIME_SOFT, +			.clockid = CLOCK_BOOTTIME, +			.get_time = &ktime_get_boottime, +		}, +		{  			.index = HRTIMER_BASE_TAI_SOFT,  			.clockid = CLOCK_TAI,  			.get_time = &ktime_get_clocktai, @@ -119,7 +129,7 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {  	[CLOCK_REALTIME]	= HRTIMER_BASE_REALTIME,  	[CLOCK_MONOTONIC]	= HRTIMER_BASE_MONOTONIC, -	[CLOCK_BOOTTIME]	= HRTIMER_BASE_MONOTONIC, +	[CLOCK_BOOTTIME]	= HRTIMER_BASE_BOOTTIME,  	[CLOCK_TAI]		= HRTIMER_BASE_TAI,  }; @@ -571,12 +581,14 @@ __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_  static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)  {  	ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset; +	ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset;  	ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset;  	ktime_t now = ktime_get_update_offsets_now(&base->clock_was_set_seq, -						   offs_real, offs_tai); +					    offs_real, offs_boot, offs_tai);  	base->clock_base[HRTIMER_BASE_REALTIME_SOFT].offset = *offs_real; +	base->clock_base[HRTIMER_BASE_BOOTTIME_SOFT].offset = *offs_boot;  	base->clock_base[HRTIMER_BASE_TAI_SOFT].offset = *offs_tai;  	return now; diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 2541bd89f20e..5a6251ac6f7a 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -1205,10 +1205,12 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,  			   u64 *newval, u64 *oldval)  {  	u64 now; +	int ret;  	WARN_ON_ONCE(clock_idx == CPUCLOCK_SCHED); +	ret = cpu_timer_sample_group(clock_idx, tsk, &now); -	if (oldval && cpu_timer_sample_group(clock_idx, tsk, &now) != -EINVAL) { +	if (oldval && ret != -EINVAL) {  		/*  		 * We are setting itimer. The *oldval is absolute and we update  		 * it to be relative, *newval argument is relative and we update diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c index e0dbae98db9d..69a937c3cd81 100644 --- a/kernel/time/posix-stubs.c +++ b/kernel/time/posix-stubs.c @@ -83,8 +83,6 @@ int do_clock_gettime(clockid_t which_clock, struct timespec64 *tp)  	case CLOCK_BOOTTIME:  		get_monotonic_boottime64(tp);  		break; -	case CLOCK_MONOTONIC_ACTIVE: -		ktime_get_active_ts64(tp);  	default:  		return -EINVAL;  	} diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index b6899b5060bd..10b7186d0638 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -252,16 +252,15 @@ static int posix_get_coarse_res(const clockid_t which_clock, struct timespec64 *  	return 0;  } -static int posix_get_tai(clockid_t which_clock, struct timespec64 *tp) +static int posix_get_boottime(const clockid_t which_clock, struct timespec64 *tp)  { -	timekeeping_clocktai64(tp); +	get_monotonic_boottime64(tp);  	return 0;  } -static int posix_get_monotonic_active(clockid_t which_clock, -				      struct timespec64 *tp) +static int posix_get_tai(clockid_t which_clock, struct timespec64 *tp)  { -	ktime_get_active_ts64(tp); +	timekeeping_clocktai64(tp);  	return 0;  } @@ -1317,9 +1316,19 @@ static const struct k_clock clock_tai = {  	.timer_arm		= common_hrtimer_arm,  }; -static const struct k_clock clock_monotonic_active = { +static const struct k_clock clock_boottime = {  	.clock_getres		= posix_get_hrtimer_res, -	.clock_get		= posix_get_monotonic_active, +	.clock_get		= posix_get_boottime, +	.nsleep			= common_nsleep, +	.timer_create		= common_timer_create, +	.timer_set		= common_timer_set, +	.timer_get		= common_timer_get, +	.timer_del		= common_timer_del, +	.timer_rearm		= common_hrtimer_rearm, +	.timer_forward		= common_hrtimer_forward, +	.timer_remaining	= common_hrtimer_remaining, +	.timer_try_to_cancel	= common_hrtimer_try_to_cancel, +	.timer_arm		= common_hrtimer_arm,  };  static const struct k_clock * const posix_clocks[] = { @@ -1330,11 +1339,10 @@ static const struct k_clock * const posix_clocks[] = {  	[CLOCK_MONOTONIC_RAW]		= &clock_monotonic_raw,  	[CLOCK_REALTIME_COARSE]		= &clock_realtime_coarse,  	[CLOCK_MONOTONIC_COARSE]	= &clock_monotonic_coarse, -	[CLOCK_BOOTTIME]		= &clock_monotonic, +	[CLOCK_BOOTTIME]		= &clock_boottime,  	[CLOCK_REALTIME_ALARM]		= &alarm_clock,  	[CLOCK_BOOTTIME_ALARM]		= &alarm_clock,  	[CLOCK_TAI]			= &clock_tai, -	[CLOCK_MONOTONIC_ACTIVE]	= &clock_monotonic_active,  };  static const struct k_clock *clockid_to_kclock(const clockid_t id) diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c index 099572ca4a8f..49edc1c4f3e6 100644 --- a/kernel/time/tick-common.c +++ b/kernel/time/tick-common.c @@ -419,19 +419,6 @@ void tick_suspend_local(void)  	clockevents_shutdown(td->evtdev);  } -static void tick_forward_next_period(void) -{ -	ktime_t delta, now = ktime_get(); -	u64 n; - -	delta = ktime_sub(now, tick_next_period); -	n = ktime_divns(delta, tick_period); -	tick_next_period += n * tick_period; -	if (tick_next_period < now) -		tick_next_period += tick_period; -	tick_sched_forward_next_period(); -} -  /**   * tick_resume_local - Resume the local tick device   * @@ -444,8 +431,6 @@ void tick_resume_local(void)  	struct tick_device *td = this_cpu_ptr(&tick_cpu_device);  	bool broadcast = tick_resume_check_broadcast(); -	tick_forward_next_period(); -  	clockevents_tick_resume(td->evtdev);  	if (!broadcast) {  		if (td->mode == TICKDEV_MODE_PERIODIC) diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h index 21efab7485ca..e277284c2831 100644 --- a/kernel/time/tick-internal.h +++ b/kernel/time/tick-internal.h @@ -141,12 +141,6 @@ static inline void tick_check_oneshot_broadcast_this_cpu(void) { }  static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_possible(); }  #endif /* !(BROADCAST && ONESHOT) */ -#if defined(CONFIG_NO_HZ_COMMON) || defined(CONFIG_HIGH_RES_TIMERS) -extern void tick_sched_forward_next_period(void); -#else -static inline void tick_sched_forward_next_period(void) { } -#endif -  /* NO_HZ_FULL internal */  #ifdef CONFIG_NO_HZ_FULL  extern void tick_nohz_init(void); diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c index c1f518e7aa80..6fe615d57ebb 100644 --- a/kernel/time/tick-oneshot.c +++ b/kernel/time/tick-oneshot.c @@ -82,16 +82,15 @@ int tick_switch_to_oneshot(void (*handler)(struct clock_event_device *))  	if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT) ||  		    !tick_device_is_functional(dev)) { -		printk(KERN_INFO "Clockevents: " -		       "could not switch to one-shot mode:"); +		pr_info("Clockevents: could not switch to one-shot mode:");  		if (!dev) { -			printk(" no tick device\n"); +			pr_cont(" no tick device\n");  		} else {  			if (!tick_device_is_functional(dev)) -				printk(" %s is not functional.\n", dev->name); +				pr_cont(" %s is not functional.\n", dev->name);  			else -				printk(" %s does not support one-shot mode.\n", -				       dev->name); +				pr_cont(" %s does not support one-shot mode.\n", +					dev->name);  		}  		return -EINVAL;  	} diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 646645e981f9..da9455a6b42b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -52,15 +52,6 @@ struct tick_sched *tick_get_tick_sched(int cpu)  static ktime_t last_jiffies_update;  /* - * Called after resume. Make sure that jiffies are not fast forwarded due to - * clock monotonic being forwarded by the suspended time. - */ -void tick_sched_forward_next_period(void) -{ -	last_jiffies_update = tick_next_period; -} - -/*   * Must be called with interrupts disabled !   */  static void tick_do_update_jiffies64(ktime_t now) @@ -804,12 +795,12 @@ static void tick_nohz_stop_tick(struct tick_sched *ts, int cpu)  		return;  	} -	hrtimer_set_expires(&ts->sched_timer, tick); - -	if (ts->nohz_mode == NOHZ_MODE_HIGHRES) -		hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED); -	else +	if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { +		hrtimer_start(&ts->sched_timer, tick, HRTIMER_MODE_ABS_PINNED); +	} else { +		hrtimer_set_expires(&ts->sched_timer, tick);  		tick_program_event(tick, 1); +	}  }  static void tick_nohz_retain_tick(struct tick_sched *ts) diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index ca90219a1e73..49cbceef5deb 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -138,12 +138,7 @@ static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm)  static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)  { -	/* Update both bases so mono and raw stay coupled. */ -	tk->tkr_mono.base += delta; -	tk->tkr_raw.base += delta; - -	/* Accumulate time spent in suspend */ -	tk->time_suspended += delta; +	tk->offs_boot = ktime_add(tk->offs_boot, delta);  }  /* @@ -473,6 +468,36 @@ u64 ktime_get_raw_fast_ns(void)  }  EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns); +/** + * ktime_get_boot_fast_ns - NMI safe and fast access to boot clock. + * + * To keep it NMI safe since we're accessing from tracing, we're not using a + * separate timekeeper with updates to monotonic clock and boot offset + * protected with seqlocks. This has the following minor side effects: + * + * (1) Its possible that a timestamp be taken after the boot offset is updated + * but before the timekeeper is updated. If this happens, the new boot offset + * is added to the old timekeeping making the clock appear to update slightly + * earlier: + *    CPU 0                                        CPU 1 + *    timekeeping_inject_sleeptime64() + *    __timekeeping_inject_sleeptime(tk, delta); + *                                                 timestamp(); + *    timekeeping_update(tk, TK_CLEAR_NTP...); + * + * (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be + * partially updated.  Since the tk->offs_boot update is a rare event, this + * should be a rare occurrence which postprocessing should be able to handle. + */ +u64 notrace ktime_get_boot_fast_ns(void) +{ +	struct timekeeper *tk = &tk_core.timekeeper; + +	return (ktime_get_mono_fast_ns() + ktime_to_ns(tk->offs_boot)); +} +EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns); + +  /*   * See comment for __ktime_get_fast_ns() vs. timestamp ordering   */ @@ -764,6 +789,7 @@ EXPORT_SYMBOL_GPL(ktime_get_resolution_ns);  static ktime_t *offsets[TK_OFFS_MAX] = {  	[TK_OFFS_REAL]	= &tk_core.timekeeper.offs_real, +	[TK_OFFS_BOOT]	= &tk_core.timekeeper.offs_boot,  	[TK_OFFS_TAI]	= &tk_core.timekeeper.offs_tai,  }; @@ -861,39 +887,6 @@ void ktime_get_ts64(struct timespec64 *ts)  EXPORT_SYMBOL_GPL(ktime_get_ts64);  /** - * ktime_get_active_ts64 - Get the active non-suspended monotonic clock - * @ts:		pointer to timespec variable - * - * The function calculates the monotonic clock from the realtime clock and - * the wall_to_monotonic offset, subtracts the accumulated suspend time and - * stores the result in normalized timespec64 format in the variable - * pointed to by @ts. - */ -void ktime_get_active_ts64(struct timespec64 *ts) -{ -	struct timekeeper *tk = &tk_core.timekeeper; -	struct timespec64 tomono, tsusp; -	u64 nsec, nssusp; -	unsigned int seq; - -	WARN_ON(timekeeping_suspended); - -	do { -		seq = read_seqcount_begin(&tk_core.seq); -		ts->tv_sec = tk->xtime_sec; -		nsec = timekeeping_get_ns(&tk->tkr_mono); -		tomono = tk->wall_to_monotonic; -		nssusp = tk->time_suspended; -	} while (read_seqcount_retry(&tk_core.seq, seq)); - -	ts->tv_sec += tomono.tv_sec; -	ts->tv_nsec = 0; -	timespec64_add_ns(ts, nsec + tomono.tv_nsec); -	tsusp = ns_to_timespec64(nssusp); -	*ts = timespec64_sub(*ts, tsusp); -} - -/**   * ktime_get_seconds - Get the seconds portion of CLOCK_MONOTONIC   *   * Returns the seconds portion of CLOCK_MONOTONIC with a single non @@ -1593,6 +1586,7 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk,  		return;  	}  	tk_xtime_add(tk, delta); +	tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *delta));  	tk_update_sleep_time(tk, timespec64_to_ktime(*delta));  	tk_debug_account_sleep_time(delta);  } @@ -2125,7 +2119,7 @@ out:  void getboottime64(struct timespec64 *ts)  {  	struct timekeeper *tk = &tk_core.timekeeper; -	ktime_t t = ktime_sub(tk->offs_real, tk->time_suspended); +	ktime_t t = ktime_sub(tk->offs_real, tk->offs_boot);  	*ts = ktime_to_timespec64(t);  } @@ -2139,13 +2133,6 @@ unsigned long get_seconds(void)  }  EXPORT_SYMBOL(get_seconds); -struct timespec __current_kernel_time(void) -{ -	struct timekeeper *tk = &tk_core.timekeeper; - -	return timespec64_to_timespec(tk_xtime(tk)); -} -  struct timespec64 current_kernel_time64(void)  {  	struct timekeeper *tk = &tk_core.timekeeper; @@ -2195,6 +2182,7 @@ void do_timer(unsigned long ticks)   * ktime_get_update_offsets_now - hrtimer helper   * @cwsseq:	pointer to check and store the clock was set sequence number   * @offs_real:	pointer to storage for monotonic -> realtime offset + * @offs_boot:	pointer to storage for monotonic -> boottime offset   * @offs_tai:	pointer to storage for monotonic -> clock tai offset   *   * Returns current monotonic time and updates the offsets if the @@ -2204,7 +2192,7 @@ void do_timer(unsigned long ticks)   * Called from hrtimer_interrupt() or retrigger_next_event()   */  ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real, -				     ktime_t *offs_tai) +				     ktime_t *offs_boot, ktime_t *offs_tai)  {  	struct timekeeper *tk = &tk_core.timekeeper;  	unsigned int seq; @@ -2221,6 +2209,7 @@ ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, ktime_t *offs_real,  		if (*cwsseq != tk->clock_was_set_seq) {  			*cwsseq = tk->clock_was_set_seq;  			*offs_real = tk->offs_real; +			*offs_boot = tk->offs_boot;  			*offs_tai = tk->offs_tai;  		} diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h index 79b67f5e0343..7a9b4eb7a1d5 100644 --- a/kernel/time/timekeeping.h +++ b/kernel/time/timekeeping.h @@ -6,6 +6,7 @@   */  extern ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq,  					    ktime_t *offs_real, +					    ktime_t *offs_boot,  					    ktime_t *offs_tai);  extern int timekeeping_valid_for_hres(void); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index d88e96d4e12c..56ba0f2a01db 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -977,6 +977,7 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info)  {  	struct perf_event_query_bpf __user *uquery = info;  	struct perf_event_query_bpf query = {}; +	u32 *ids, prog_cnt, ids_len;  	int ret;  	if (!capable(CAP_SYS_ADMIN)) @@ -985,16 +986,32 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info)  		return -EINVAL;  	if (copy_from_user(&query, uquery, sizeof(query)))  		return -EFAULT; -	if (query.ids_len > BPF_TRACE_MAX_PROGS) + +	ids_len = query.ids_len; +	if (ids_len > BPF_TRACE_MAX_PROGS)  		return -E2BIG; +	ids = kcalloc(ids_len, sizeof(u32), GFP_USER | __GFP_NOWARN); +	if (!ids) +		return -ENOMEM; +	/* +	 * The above kcalloc returns ZERO_SIZE_PTR when ids_len = 0, which +	 * is required when user only wants to check for uquery->prog_cnt. +	 * There is no need to check for it since the case is handled +	 * gracefully in bpf_prog_array_copy_info. +	 */  	mutex_lock(&bpf_event_mutex);  	ret = bpf_prog_array_copy_info(event->tp_event->prog_array, -				       uquery->ids, -				       query.ids_len, -				       &uquery->prog_cnt); +				       ids, +				       ids_len, +				       &prog_cnt);  	mutex_unlock(&bpf_event_mutex); +	if (copy_to_user(&uquery->prog_cnt, &prog_cnt, sizeof(prog_cnt)) || +	    copy_to_user(uquery->ids, ids, ids_len * sizeof(u32))) +		ret = -EFAULT; + +	kfree(ids);  	return ret;  } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index dfbcf9ee1447..414d7210b2ec 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1165,7 +1165,7 @@ static struct {  	{ trace_clock,			"perf",		1 },  	{ ktime_get_mono_fast_ns,	"mono",		1 },  	{ ktime_get_raw_fast_ns,	"mono_raw",	1 }, -	{ ktime_get_mono_fast_ns,	"boot",		1 }, +	{ ktime_get_boot_fast_ns,	"boot",		1 },  	ARCH_TRACE_CLOCKS  }; diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h index e954ae3d82c0..e3a658bac10f 100644 --- a/kernel/trace/trace_entries.h +++ b/kernel/trace/trace_entries.h @@ -356,7 +356,7 @@ FTRACE_ENTRY(hwlat, hwlat_entry,  		__field(	unsigned int,		seqnum		)  	), -	F_printk("cnt:%u\tts:%010llu.%010lu\tinner:%llu\touter:%llunmi-ts:%llu\tnmi-count:%u\n", +	F_printk("cnt:%u\tts:%010llu.%010lu\tinner:%llu\touter:%llu\tnmi-ts:%llu\tnmi-count:%u\n",  		 __entry->seqnum,  		 __entry->tv_sec,  		 __entry->tv_nsec, diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 9b4716bb8bb0..1f951b3df60c 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1499,14 +1499,14 @@ static int process_preds(struct trace_event_call *call,  		return ret;  	} -	if (!nr_preds) { -		prog = NULL; -	} else { -		prog = predicate_parse(filter_string, nr_parens, nr_preds, +	if (!nr_preds) +		return -EINVAL; + +	prog = predicate_parse(filter_string, nr_parens, nr_preds,  			       parse_pred, call, pe); -		if (IS_ERR(prog)) -			return PTR_ERR(prog); -	} +	if (IS_ERR(prog)) +		return PTR_ERR(prog); +  	rcu_assign_pointer(filter->prog, prog);  	return 0;  } diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index 0d7b3ffbecc2..b9061ed59bbd 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -2466,6 +2466,7 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,  		else if (strcmp(modifier, "usecs") == 0)  			*flags |= HIST_FIELD_FL_TIMESTAMP_USECS;  		else { +			hist_err("Invalid field modifier: ", modifier);  			field = ERR_PTR(-EINVAL);  			goto out;  		} @@ -2481,6 +2482,7 @@ parse_field(struct hist_trigger_data *hist_data, struct trace_event_file *file,  	else {  		field = trace_find_event_field(file->event_call, field_name);  		if (!field || !field->size) { +			hist_err("Couldn't find field: ", field_name);  			field = ERR_PTR(-EINVAL);  			goto out;  		} @@ -4913,6 +4915,16 @@ static void hist_field_print(struct seq_file *m, struct hist_field *hist_field)  		seq_printf(m, "%s", field_name);  	} else if (hist_field->flags & HIST_FIELD_FL_TIMESTAMP)  		seq_puts(m, "common_timestamp"); + +	if (hist_field->flags) { +		if (!(hist_field->flags & HIST_FIELD_FL_VAR_REF) && +		    !(hist_field->flags & HIST_FIELD_FL_EXPR)) { +			const char *flags = get_hist_field_flags(hist_field); + +			if (flags) +				seq_printf(m, ".%s", flags); +		} +	}  }  static int event_hist_trigger_print(struct seq_file *m, diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 1cd3fb4d70f8..02aed76e0978 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -512,8 +512,6 @@ static int __register_trace_kprobe(struct trace_kprobe *tk)  	if (ret == 0)  		tk->tp.flags |= TP_FLAG_REGISTERED;  	else { -		pr_warn("Could not insert probe at %s+%lu: %d\n", -			trace_kprobe_symbol(tk), trace_kprobe_offset(tk), ret);  		if (ret == -ENOENT && trace_kprobe_is_on_module(tk)) {  			pr_warn("This probe might be able to register after target module is loaded. Continue.\n");  			ret = 0; diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 34fd0e0ec51d..ac892878dbe6 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -55,6 +55,7 @@ struct trace_uprobe {  	struct list_head		list;  	struct trace_uprobe_filter	filter;  	struct uprobe_consumer		consumer; +	struct path			path;  	struct inode			*inode;  	char				*filename;  	unsigned long			offset; @@ -289,7 +290,7 @@ static void free_trace_uprobe(struct trace_uprobe *tu)  	for (i = 0; i < tu->tp.nr_args; i++)  		traceprobe_free_probe_arg(&tu->tp.args[i]); -	iput(tu->inode); +	path_put(&tu->path);  	kfree(tu->tp.call.class->system);  	kfree(tu->tp.call.name);  	kfree(tu->filename); @@ -363,7 +364,6 @@ end:  static int create_trace_uprobe(int argc, char **argv)  {  	struct trace_uprobe *tu; -	struct inode *inode;  	char *arg, *event, *group, *filename;  	char buf[MAX_EVENT_NAME_LEN];  	struct path path; @@ -371,7 +371,6 @@ static int create_trace_uprobe(int argc, char **argv)  	bool is_delete, is_return;  	int i, ret; -	inode = NULL;  	ret = 0;  	is_delete = false;  	is_return = false; @@ -437,21 +436,16 @@ static int create_trace_uprobe(int argc, char **argv)  	}  	/* Find the last occurrence, in case the path contains ':' too. */  	arg = strrchr(argv[1], ':'); -	if (!arg) { -		ret = -EINVAL; -		goto fail_address_parse; -	} +	if (!arg) +		return -EINVAL;  	*arg++ = '\0';  	filename = argv[1];  	ret = kern_path(filename, LOOKUP_FOLLOW, &path);  	if (ret) -		goto fail_address_parse; - -	inode = igrab(d_real_inode(path.dentry)); -	path_put(&path); +		return ret; -	if (!inode || !S_ISREG(inode->i_mode)) { +	if (!d_is_reg(path.dentry)) {  		ret = -EINVAL;  		goto fail_address_parse;  	} @@ -490,7 +484,7 @@ static int create_trace_uprobe(int argc, char **argv)  		goto fail_address_parse;  	}  	tu->offset = offset; -	tu->inode = inode; +	tu->path = path;  	tu->filename = kstrdup(filename, GFP_KERNEL);  	if (!tu->filename) { @@ -558,7 +552,7 @@ error:  	return ret;  fail_address_parse: -	iput(inode); +	path_put(&path);  	pr_info("Failed to parse address or file.\n"); @@ -922,6 +916,7 @@ probe_event_enable(struct trace_uprobe *tu, struct trace_event_file *file,  		goto err_flags;  	tu->consumer.filter = filter; +	tu->inode = d_real_inode(tu->path.dentry);  	ret = uprobe_register(tu->inode, tu->offset, &tu->consumer);  	if (ret)  		goto err_buffer; @@ -967,6 +962,7 @@ probe_event_disable(struct trace_uprobe *tu, struct trace_event_file *file)  	WARN_ON(!uprobe_filter_is_empty(&tu->filter));  	uprobe_unregister(tu->inode, tu->offset, &tu->consumer); +	tu->inode = NULL;  	tu->tp.flags &= file ? ~TP_FLAG_TRACE : ~TP_FLAG_PROFILE;  	uprobe_buffer_disable(); @@ -1337,7 +1333,6 @@ struct trace_event_call *  create_local_trace_uprobe(char *name, unsigned long offs, bool is_return)  {  	struct trace_uprobe *tu; -	struct inode *inode;  	struct path path;  	int ret; @@ -1345,11 +1340,8 @@ create_local_trace_uprobe(char *name, unsigned long offs, bool is_return)  	if (ret)  		return ERR_PTR(ret); -	inode = igrab(d_inode(path.dentry)); -	path_put(&path); - -	if (!inode || !S_ISREG(inode->i_mode)) { -		iput(inode); +	if (!d_is_reg(path.dentry)) { +		path_put(&path);  		return ERR_PTR(-EINVAL);  	} @@ -1364,11 +1356,12 @@ create_local_trace_uprobe(char *name, unsigned long offs, bool is_return)  	if (IS_ERR(tu)) {  		pr_info("Failed to allocate trace_uprobe.(%d)\n",  			(int)PTR_ERR(tu)); +		path_put(&path);  		return ERR_CAST(tu);  	}  	tu->offset = offs; -	tu->inode = inode; +	tu->path = path;  	tu->filename = kstrdup(name, GFP_KERNEL);  	init_trace_event_call(tu, &tu->tp.call); diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 671b13457387..1e37da2e0c25 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -207,7 +207,7 @@ static int tracepoint_add_func(struct tracepoint *tp,  			lockdep_is_held(&tracepoints_mutex));  	old = func_add(&tp_funcs, func, prio);  	if (IS_ERR(old)) { -		WARN_ON_ONCE(1); +		WARN_ON_ONCE(PTR_ERR(old) != -ENOMEM);  		return PTR_ERR(old);  	} @@ -239,7 +239,7 @@ static int tracepoint_remove_func(struct tracepoint *tp,  			lockdep_is_held(&tracepoints_mutex));  	old = func_remove(&tp_funcs, func);  	if (IS_ERR(old)) { -		WARN_ON_ONCE(1); +		WARN_ON_ONCE(PTR_ERR(old) != -ENOMEM);  		return PTR_ERR(old);  	} |