diff options
| author | Linus Torvalds <[email protected]> | 2024-07-16 13:12:16 -0700 | 
|---|---|---|
| committer | Linus Torvalds <[email protected]> | 2024-07-16 13:12:16 -0700 | 
| commit | 1ca995edf838a70c7c0aba2de7fc6da57e22cbf3 (patch) | |
| tree | 9c353521b749b26625bd04d69933efcd71939d19 | |
| parent | 72fda6c8e553699f6ba8d3ddc34f0bbe7a5898df (diff) | |
| parent | f0c508faea645da58d6ae6b644a1b68020d5a9d2 (diff) | |
Merge tag 'seccomp-v6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux
Pull seccomp updates from Kees Cook:
 - interrupt SECCOMP_IOCTL_NOTIF_RECV when all users exit (Andrei Vagin)
 - Update selftests to check for expected NOTIF_RECV exits (Andrei
   Vagin)
* tag 'seccomp-v6.11-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux:
  selftests/seccomp: check that a zombie leader doesn't affect others
  selftests/seccomp: add test for NOTIF_RECV and unused filters
  seccomp: release task filters when the task exits
  seccomp: interrupt SECCOMP_IOCTL_NOTIF_RECV when all users have exited
| -rw-r--r-- | kernel/exit.c | 3 | ||||
| -rw-r--r-- | kernel/seccomp.c | 30 | ||||
| -rw-r--r-- | tools/testing/selftests/seccomp/seccomp_bpf.c | 131 | 
3 files changed, 157 insertions, 7 deletions
| diff --git a/kernel/exit.c b/kernel/exit.c index 81fcee45d630..be81342caf1b 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -277,7 +277,6 @@ repeat:  	}  	write_unlock_irq(&tasklist_lock); -	seccomp_filter_release(p);  	proc_flush_pid(thread_pid);  	put_pid(thread_pid);  	release_thread(p); @@ -834,6 +833,8 @@ void __noreturn do_exit(long code)  	io_uring_files_cancel();  	exit_signals(tsk);  /* sets PF_EXITING */ +	seccomp_filter_release(tsk); +  	acct_update_integrals(tsk);  	group_dead = atomic_dec_and_test(&tsk->signal->live);  	if (group_dead) { diff --git a/kernel/seccomp.c b/kernel/seccomp.c index e30b60b57614..dc51e521bc1d 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -502,6 +502,9 @@ static inline pid_t seccomp_can_sync_threads(void)  		/* Skip current, since it is initiating the sync. */  		if (thread == caller)  			continue; +		/* Skip exited threads. */ +		if (thread->flags & PF_EXITING) +			continue;  		if (thread->seccomp.mode == SECCOMP_MODE_DISABLED ||  		    (thread->seccomp.mode == SECCOMP_MODE_FILTER && @@ -563,18 +566,21 @@ static void __seccomp_filter_release(struct seccomp_filter *orig)   * @tsk: task the filter should be released from.   *   * This function should only be called when the task is exiting as - * it detaches it from its filter tree. As such, READ_ONCE() and - * barriers are not needed here, as would normally be needed. + * it detaches it from its filter tree. PF_EXITING has to be set + * for the task.   */  void seccomp_filter_release(struct task_struct *tsk)  { -	struct seccomp_filter *orig = tsk->seccomp.filter; +	struct seccomp_filter *orig; -	/* We are effectively holding the siglock by not having any sighand. */ -	WARN_ON(tsk->sighand != NULL); +	if (WARN_ON((tsk->flags & PF_EXITING) == 0)) +		return; +	spin_lock_irq(&tsk->sighand->siglock); +	orig = tsk->seccomp.filter;  	/* Detach task from its filter tree. */  	tsk->seccomp.filter = NULL; +	spin_unlock_irq(&tsk->sighand->siglock);  	__seccomp_filter_release(orig);  } @@ -602,6 +608,13 @@ static inline void seccomp_sync_threads(unsigned long flags)  		if (thread == caller)  			continue; +		/* +		 * Skip exited threads. seccomp_filter_release could have +		 * been already called for this task. +		 */ +		if (thread->flags & PF_EXITING) +			continue; +  		/* Get a task reference for the new leaf node. */  		get_seccomp_filter(caller); @@ -1466,7 +1479,7 @@ static int recv_wake_function(wait_queue_entry_t *wait, unsigned int mode, int s  				  void *key)  {  	/* Avoid a wakeup if event not interesting for us. */ -	if (key && !(key_to_poll(key) & (EPOLLIN | EPOLLERR))) +	if (key && !(key_to_poll(key) & (EPOLLIN | EPOLLERR | EPOLLHUP)))  		return 0;  	return autoremove_wake_function(wait, mode, sync, key);  } @@ -1476,6 +1489,9 @@ static int recv_wait_event(struct seccomp_filter *filter)  	DEFINE_WAIT_FUNC(wait, recv_wake_function);  	int ret; +	if (refcount_read(&filter->users) == 0) +		return 0; +  	if (atomic_dec_if_positive(&filter->notif->requests) >= 0)  		return 0; @@ -1484,6 +1500,8 @@ static int recv_wait_event(struct seccomp_filter *filter)  		if (atomic_dec_if_positive(&filter->notif->requests) >= 0)  			break; +		if (refcount_read(&filter->users) == 0) +			break;  		if (ret)  			return ret; diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 783ebce8c4de..e3f97f90d8db 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -3954,6 +3954,60 @@ TEST(user_notification_filter_empty)  	EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0);  } +TEST(user_ioctl_notification_filter_empty) +{ +	pid_t pid; +	long ret; +	int status, p[2]; +	struct __clone_args args = { +		.flags = CLONE_FILES, +		.exit_signal = SIGCHLD, +	}; +	struct seccomp_notif req = {}; + +	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); +	ASSERT_EQ(0, ret) { +		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); +	} + +	if (__NR_clone3 < 0) +		SKIP(return, "Test not built with clone3 support"); + +	ASSERT_EQ(0, pipe(p)); + +	pid = sys_clone3(&args, sizeof(args)); +	ASSERT_GE(pid, 0); + +	if (pid == 0) { +		int listener; + +		listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER); +		if (listener < 0) +			_exit(EXIT_FAILURE); + +		if (dup2(listener, 200) != 200) +			_exit(EXIT_FAILURE); +		close(p[1]); +		close(listener); +		sleep(1); + +		_exit(EXIT_SUCCESS); +	} +	if (read(p[0], &status, 1) != 0) +		_exit(EXIT_SUCCESS); +	close(p[0]); +	/* +	 * The seccomp filter has become unused so we should be notified once +	 * the kernel gets around to cleaning up task struct. +	 */ +	EXPECT_EQ(ioctl(200, SECCOMP_IOCTL_NOTIF_RECV, &req), -1); +	EXPECT_EQ(errno, ENOENT); + +	EXPECT_EQ(waitpid(pid, &status, 0), pid); +	EXPECT_EQ(true, WIFEXITED(status)); +	EXPECT_EQ(0, WEXITSTATUS(status)); +} +  static void *do_thread(void *data)  {  	return NULL; @@ -4755,6 +4809,83 @@ TEST(user_notification_wait_killable_fatal)  	EXPECT_EQ(SIGTERM, WTERMSIG(status));  } +struct tsync_vs_thread_leader_args { +	pthread_t leader; +}; + +static void *tsync_vs_dead_thread_leader_sibling(void *_args) +{ +	struct sock_filter allow_filter[] = { +		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), +	}; +	struct sock_fprog allow_prog = { +		.len = (unsigned short)ARRAY_SIZE(allow_filter), +		.filter = allow_filter, +	}; +	struct tsync_vs_thread_leader_args *args = _args; +	void *retval; +	long ret; + +	ret = pthread_join(args->leader, &retval); +	if (ret) +		exit(1); +	if (retval != _args) +		exit(2); +	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &allow_prog); +	if (ret) +		exit(3); + +	exit(0); +} + +/* + * Ensure that a dead thread leader doesn't prevent installing new filters with + * SECCOMP_FILTER_FLAG_TSYNC from other threads. + */ +TEST(tsync_vs_dead_thread_leader) +{ +	int status; +	pid_t pid; +	long ret; + +	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); +	ASSERT_EQ(0, ret) { +		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); +	} + +	pid = fork(); +	ASSERT_GE(pid, 0); + +	if (pid == 0) { +		struct sock_filter allow_filter[] = { +			BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), +		}; +		struct sock_fprog allow_prog = { +			.len = (unsigned short)ARRAY_SIZE(allow_filter), +			.filter = allow_filter, +		}; +		struct  tsync_vs_thread_leader_args *args; +		pthread_t sibling; + +		args = malloc(sizeof(*args)); +		ASSERT_NE(NULL, args); +		args->leader = pthread_self(); + +		ret = pthread_create(&sibling, NULL, +				     tsync_vs_dead_thread_leader_sibling, args); +		ASSERT_EQ(0, ret); + +		/* Install a new filter just to the leader thread. */ +		ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog); +		ASSERT_EQ(0, ret); +		pthread_exit(args); +		exit(1); +	} + +	EXPECT_EQ(pid, waitpid(pid, &status, 0)); +	EXPECT_EQ(0, status); +} +  /*   * TODO:   * - expand NNP testing |